I've a parser where I've now found an issue with my new logic for retrieving the line number and the line offset to which an offset belongs in a source code.
/// `CompilationUnit` identifies a AS3 compilation unit and contains
/// a source text.
pub struct CompilationUnit {
pub(crate) file_path: Option<String>,
pub(crate) text: String,
/// Collection of ascending line number *skips* used
/// for optimizing retrieval of line numbers or line offsets.
pub(crate) line_skips: RefCell<Vec<LineSkip>>,
pub(crate) line_skips_counter: Cell<usize>,
/// Collection used before `line_skips` in line lookups
/// to skip lines in a higher threshold.
pub(crate) higher_line_skips: RefCell<Vec<HigherLineSkip>>,
pub(crate) higher_line_skips_counter: Cell<usize>,
/// Collection used before `higher_line_skips` in line lookups
/// to skip lines in an extra higher threshold.
pub(crate) extra_higher_line_skips: RefCell<Vec<HigherLineSkip>>,
pub(crate) extra_higher_line_skips_counter: Cell<usize>
...
}
#[derive(Copy, Clone)]
pub(crate) struct LineSkip {
/// Line offset.
pub offset: usize,
/// Line number counting from one.
pub line_number: usize,
}
#[derive(Copy, Clone)]
pub(crate) struct HigherLineSkip {
/// Index to a `LineSkip`, or another `HigherLineSkip` in the case
/// of extra higher line skips.
pub skip_index: usize,
/// Line offset.
pub offset: usize,
/// Line number counting from one.
pub line_number: usize,
}
impl CompilationUnit {
/// Constructs a source file in unparsed and non verified state.
pub fn new(file_path: Option<String>, text: String, compiler_options: &Rc<CompilerOptions>) -> Rc<Self> {
Rc::new(Self {
file_path,
text,
line_skips: RefCell::new(vec![LineSkip { offset: 0, line_number: 1 }]),
line_skips_counter: Cell::new(0),
higher_line_skips: RefCell::new(vec![HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 }]),
higher_line_skips_counter: Cell::new(0),
extra_higher_line_skips: RefCell::new(vec![HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 }]),
extra_higher_line_skips_counter: Cell::new(0),
...
})
}
pub(crate) fn push_line_skip(&self, line_number: usize, offset: usize) {
let counter = self.line_skips_counter.get();
if counter == LINE_SKIP_THRESOLD {
self.line_skips.borrow_mut().push(LineSkip { line_number, offset });
self.line_skips_counter.set(0);
} else {
self.line_skips_counter.set(counter + 1);
}
let counter = self.higher_line_skips_counter.get();
if counter == HIGHER_LINE_SKIP_THRESOLD {
self.higher_line_skips.borrow_mut().push(HigherLineSkip { skip_index: self.line_skips.borrow().len() - 1, line_number, offset });
self.higher_line_skips_counter.set(0);
} else {
self.higher_line_skips_counter.set(counter + 1);
}
let counter = self.extra_higher_line_skips_counter.get();
if counter == EXTRA_HIGHER_LINE_SKIP_THRESOLD {
self.extra_higher_line_skips.borrow_mut().push(HigherLineSkip { skip_index: self.higher_line_skips.borrow().len() - 1, line_number, offset });
self.extra_higher_line_skips_counter.set(0);
} else {
self.extra_higher_line_skips_counter.set(counter + 1);
}
}
/// Retrieves line number from an offset. The resulting line number
/// is counted from one.
pub fn get_line_number(&self, offset: usize) -> usize {
// Extra higher line skips
let mut last_skip = HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 };
let skips = self.extra_higher_line_skips.borrow();
let mut skips = skips.iter();
while let Some(skip_1) = skips.next() {
if offset < skip_1.offset {
break;
}
last_skip = *skip_1;
}
// Higher line skips
let skips = self.higher_line_skips.borrow();
let mut skips = skips[last_skip.skip_index..].iter();
let mut last_skip = skips.next().unwrap();
while let Some(skip_1) = skips.next() {
if offset < skip_1.offset {
break;
}
last_skip = skip_1;
}
// Line skips
let skips = self.line_skips.borrow();
let mut skips = skips[last_skip.skip_index..].iter();
let mut last_skip = skips.next().unwrap();
while let Some(skip_1) = skips.next() {
if offset < skip_1.offset {
break;
}
last_skip = skip_1;
}
let mut current_line = last_skip.line_number;
let mut characters = CharacterReader::from(&self.text[last_skip.offset..]);
while last_skip.offset + characters.index() < offset {
let ch_1 = characters.next();
if let Some(ch_1) = ch_1 {
if CharacterValidator::is_line_terminator(ch_1) {
if ch_1 == '\r' && characters.peek_or_zero() == '\n' {
characters.next();
}
current_line += 1;
}
} else {
break;
}
}
current_line
}
/// Retrieves offset from line number (counted from one).
pub fn get_line_offset(&self, line: usize) -> Option<usize> {
// Extra higher line skips
let mut last_skip = HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 };
let skips = self.extra_higher_line_skips.borrow();
let mut skips = skips.iter();
while let Some(skip_1) = skips.next() {
if line < skip_1.line_number {
break;
}
last_skip = *skip_1;
}
// Higher line skips
let skips = self.higher_line_skips.borrow();
let mut skips = skips[last_skip.skip_index..].iter();
let mut last_skip = skips.next().unwrap();
while let Some(skip_1) = skips.next() {
if line < skip_1.line_number {
break;
}
last_skip = skip_1;
}
// Line skips
let skips = self.line_skips.borrow();
let mut skips = skips[last_skip.skip_index..].iter();
let mut last_skip = skips.next().unwrap();
while let Some(skip_1) = skips.next() {
if line < skip_1.line_number {
break;
}
last_skip = skip_1;
}
let mut current_line = last_skip.line_number;
let mut characters = CharacterReader::from(&self.text[last_skip.offset..]);
while current_line != line {
let ch_1 = characters.next();
if let Some(ch_1) = ch_1 {
if CharacterValidator::is_line_terminator(ch_1) {
if ch_1 == '\r' && characters.peek_or_zero() == '\n' {
characters.next();
}
current_line += 1;
}
} else {
return None;
}
}
Some(last_skip.offset + characters.index())
}
/// Retrieves the offset from the corresponding line of an offset.
pub fn get_line_offset_from_offset(&self, offset: usize) -> usize {
// Extra higher line skips
let mut last_skip = HigherLineSkip { skip_index: 0, offset: 0, line_number: 1 };
let skips = self.extra_higher_line_skips.borrow();
let mut skips = skips.iter();
while let Some(skip_1) = skips.next() {
if offset < skip_1.offset {
break;
}
last_skip = *skip_1;
}
// Higher line skips
let skips = self.higher_line_skips.borrow();
let mut skips = skips[last_skip.skip_index..].iter();
let mut last_skip = skips.next().unwrap();
while let Some(skip_1) = skips.next() {
if offset < skip_1.offset {
break;
}
last_skip = skip_1;
}
// Line skips
let skips = self.line_skips.borrow();
let mut skips = skips[last_skip.skip_index..].iter();
let mut last_skip = skips.next().unwrap();
while let Some(skip_1) = skips.next() {
if offset < skip_1.offset {
break;
}
last_skip = skip_1;
}
let mut current_line_offset = last_skip.offset;
let mut characters = CharacterReader::from(&self.text[last_skip.offset..]);
while last_skip.offset + characters.index() < offset {
let ch_1 = characters.next();
if let Some(ch_1) = ch_1 {
if CharacterValidator::is_line_terminator(ch_1) {
if ch_1 == '\r' && characters.peek_or_zero() == '\n' {
characters.next();
}
current_line_offset = last_skip.offset + characters.index();
}
} else {
break;
}
}
current_line_offset
}
}
E.g., I now have this sample source code in another language:
default xml namespace =
And it yields a syntax error at the wrong column.
C:\Users\mathe\UnsyncDocuments\Test.as:1:1: Syntax error #1030: Expected expression before end of program
If I complicate the syntax errors a little more, I get three syntax errors at the wrong locations.
default xml namespace =
default xml namespace =
default xml namespace =
Log:
C:\Users\mathe\UnsyncDocuments\Test.as:1:1: Syntax error #1030: Expected expression before end of program
C:\Users\mathe\UnsyncDocuments\Test.as:2:1: Syntax error #1030: Expected expression before 'default'
C:\Users\mathe\UnsyncDocuments\Test.as:3:1: Syntax error #1030: Expected expression before 'default'