mirror of
https://github.com/danbulant/oxc
synced 2026-05-25 04:42:10 +00:00
refactor(parser): byte_search macro evaluate to matched byte (#2555)
Change behavior of `byte_search!` macro, to make it easier to understand and use: 1. `handle_match` removed. Macro instead evaluates to the first matching byte. 2. `handle_eof` does not return from enclosing function. 3. Alter syntax to make clear that `continue_if` and `handle_eof` are not closures, so can use `return` statements in them. These changes enabled by #2552.
This commit is contained in:
parent
c579620701
commit
dd31c6453a
6 changed files with 179 additions and 218 deletions
|
|
@ -27,7 +27,7 @@ impl<'a> Lexer<'a> {
|
||||||
byte_search! {
|
byte_search! {
|
||||||
lexer: self,
|
lexer: self,
|
||||||
table: LINE_BREAK_TABLE,
|
table: LINE_BREAK_TABLE,
|
||||||
continue_if: |next_byte, pos| {
|
continue_if: (next_byte, pos) {
|
||||||
// Match found. Decide whether to continue searching.
|
// Match found. Decide whether to continue searching.
|
||||||
// If this is end of comment, create trivia, and advance `pos` to after line break.
|
// If this is end of comment, create trivia, and advance `pos` to after line break.
|
||||||
// Do that here rather than in `handle_match`, to avoid branching twice on value of
|
// Do that here rather than in `handle_match`, to avoid branching twice on value of
|
||||||
|
|
@ -69,15 +69,14 @@ impl<'a> Lexer<'a> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
handle_match: |_next_byte| {
|
handle_eof: {
|
||||||
self.token.is_on_new_line = true;
|
|
||||||
Kind::Skip
|
|
||||||
},
|
|
||||||
handle_eof: || {
|
|
||||||
self.trivia_builder.add_single_line_comment(self.token.start, self.offset());
|
self.trivia_builder.add_single_line_comment(self.token.start, self.offset());
|
||||||
Kind::Skip
|
return Kind::Skip;
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
self.token.is_on_new_line = true;
|
||||||
|
Kind::Skip
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Section 12.4 Multi Line Comment
|
/// Section 12.4 Multi Line Comment
|
||||||
|
|
@ -90,7 +89,7 @@ impl<'a> Lexer<'a> {
|
||||||
byte_search! {
|
byte_search! {
|
||||||
lexer: self,
|
lexer: self,
|
||||||
table: MULTILINE_COMMENT_START_TABLE,
|
table: MULTILINE_COMMENT_START_TABLE,
|
||||||
continue_if: |next_byte, pos| {
|
continue_if: (next_byte, pos) {
|
||||||
// Match found. Decide whether to continue searching.
|
// Match found. Decide whether to continue searching.
|
||||||
if next_byte == b'*' {
|
if next_byte == b'*' {
|
||||||
// SAFETY: Next byte is `*` (ASCII) so after it is UTF-8 char boundary
|
// SAFETY: Next byte is `*` (ASCII) so after it is UTF-8 char boundary
|
||||||
|
|
@ -145,15 +144,14 @@ impl<'a> Lexer<'a> {
|
||||||
return self.skip_multi_line_comment_after_line_break(after_line_break);
|
return self.skip_multi_line_comment_after_line_break(after_line_break);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
handle_match: |_next_byte| {
|
handle_eof: {
|
||||||
self.trivia_builder.add_multi_line_comment(self.token.start, self.offset());
|
|
||||||
Kind::Skip
|
|
||||||
},
|
|
||||||
handle_eof: || {
|
|
||||||
self.error(diagnostics::UnterminatedMultiLineComment(self.unterminated_range()));
|
self.error(diagnostics::UnterminatedMultiLineComment(self.unterminated_range()));
|
||||||
Kind::Eof
|
return Kind::Eof;
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
self.trivia_builder.add_multi_line_comment(self.token.start, self.offset());
|
||||||
|
Kind::Skip
|
||||||
}
|
}
|
||||||
|
|
||||||
fn skip_multi_line_comment_after_line_break(&mut self, pos: SourcePosition) -> Kind {
|
fn skip_multi_line_comment_after_line_break(&mut self, pos: SourcePosition) -> Kind {
|
||||||
|
|
|
||||||
|
|
@ -54,45 +54,44 @@ impl<'a> Lexer<'a> {
|
||||||
let after_first = self.source.position().add(1);
|
let after_first = self.source.position().add(1);
|
||||||
|
|
||||||
// Consume bytes which are part of identifier
|
// Consume bytes which are part of identifier
|
||||||
byte_search! {
|
let next_byte = byte_search! {
|
||||||
lexer: self,
|
lexer: self,
|
||||||
table: NOT_ASCII_ID_CONTINUE_TABLE,
|
table: NOT_ASCII_ID_CONTINUE_TABLE,
|
||||||
start: after_first,
|
start: after_first,
|
||||||
handle_match: |next_byte| {
|
handle_eof: {
|
||||||
// Found a matching byte.
|
|
||||||
// Either end of identifier found, or a Unicode char, or `\` escape.
|
|
||||||
// Handle uncommon cases in cold branches to keep the common ASCII path
|
|
||||||
// as fast as possible.
|
|
||||||
if !next_byte.is_ascii() {
|
|
||||||
return cold_branch(|| {
|
|
||||||
// SAFETY: `after_first` is position after consuming 1 byte, so subtracting 1
|
|
||||||
// makes `start_pos` `source`'s position as it was at start of this function
|
|
||||||
let start_pos = unsafe { after_first.sub(1) };
|
|
||||||
&self.identifier_tail_unicode(start_pos)[1..]
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if next_byte == b'\\' {
|
|
||||||
return cold_branch(|| {
|
|
||||||
// SAFETY: `after_first` is position after consuming 1 byte, so subtracting 1
|
|
||||||
// makes `start_pos` `source`'s position as it was at start of this function
|
|
||||||
let start_pos = unsafe { after_first.sub(1) };
|
|
||||||
&self.identifier_backslash(start_pos, false)[1..]
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Return identifier minus its first char.
|
|
||||||
// SAFETY: `after_first` was position of `lexer.source` at start of this search.
|
|
||||||
// Searching only proceeds in forwards direction, so `lexer.source.position()`
|
|
||||||
// cannot be before `after_first`.
|
|
||||||
unsafe { self.source.str_from_pos_to_current_unchecked(after_first) }
|
|
||||||
},
|
|
||||||
handle_eof: || {
|
|
||||||
// Return identifier minus its first char.
|
// Return identifier minus its first char.
|
||||||
// SAFETY: `lexer.source` is positioned at EOF, so there is no valid value
|
// SAFETY: `lexer.source` is positioned at EOF, so there is no valid value
|
||||||
// of `after_first` which could be after current position.
|
// of `after_first` which could be after current position.
|
||||||
unsafe { self.source.str_from_pos_to_current_unchecked(after_first) }
|
return unsafe { self.source.str_from_pos_to_current_unchecked(after_first) };
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Found a matching byte.
|
||||||
|
// Either end of identifier found, or a Unicode char, or `\` escape.
|
||||||
|
// Handle uncommon cases in cold branches to keep the common ASCII path
|
||||||
|
// as fast as possible.
|
||||||
|
if !next_byte.is_ascii() {
|
||||||
|
return cold_branch(|| {
|
||||||
|
// SAFETY: `after_first` is position after consuming 1 byte, so subtracting 1
|
||||||
|
// makes `start_pos` `source`'s position as it was at start of this function
|
||||||
|
let start_pos = unsafe { after_first.sub(1) };
|
||||||
|
&self.identifier_tail_unicode(start_pos)[1..]
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if next_byte == b'\\' {
|
||||||
|
return cold_branch(|| {
|
||||||
|
// SAFETY: `after_first` is position after consuming 1 byte, so subtracting 1
|
||||||
|
// makes `start_pos` `source`'s position as it was at start of this function
|
||||||
|
let start_pos = unsafe { after_first.sub(1) };
|
||||||
|
&self.identifier_backslash(start_pos, false)[1..]
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return identifier minus its first char.
|
||||||
|
// SAFETY: `after_first` was position of `lexer.source` at start of this search.
|
||||||
|
// Searching only proceeds in forwards direction, so `lexer.source.position()`
|
||||||
|
// cannot be before `after_first`.
|
||||||
|
unsafe { self.source.str_from_pos_to_current_unchecked(after_first) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle rest of identifier after first byte of a multi-byte Unicode char found.
|
/// Handle rest of identifier after first byte of a multi-byte Unicode char found.
|
||||||
|
|
@ -233,40 +232,39 @@ impl<'a> Lexer<'a> {
|
||||||
let after_first = unsafe { start_pos.add(1) };
|
let after_first = unsafe { start_pos.add(1) };
|
||||||
|
|
||||||
// Consume bytes which are part of identifier
|
// Consume bytes which are part of identifier
|
||||||
byte_search! {
|
let next_byte = byte_search! {
|
||||||
lexer: self,
|
lexer: self,
|
||||||
table: NOT_ASCII_ID_CONTINUE_TABLE,
|
table: NOT_ASCII_ID_CONTINUE_TABLE,
|
||||||
start: after_first,
|
start: after_first,
|
||||||
handle_match: |next_byte| {
|
handle_eof: {
|
||||||
// Found a matching byte.
|
return Kind::PrivateIdentifier;
|
||||||
// Either end of identifier found, or a Unicode char, or `\` escape.
|
|
||||||
// Handle uncommon cases in cold branches to keep the common ASCII path
|
|
||||||
// as fast as possible.
|
|
||||||
if !next_byte.is_ascii() {
|
|
||||||
return cold_branch(|| {
|
|
||||||
// SAFETY: `after_first` is position after consuming 1 byte, so subtracting 1
|
|
||||||
// makes `start_pos` `source`'s position as it was at start of this function
|
|
||||||
let start_pos = unsafe { after_first.sub(1) };
|
|
||||||
self.identifier_tail_unicode(start_pos);
|
|
||||||
Kind::PrivateIdentifier
|
|
||||||
});
|
|
||||||
}
|
|
||||||
if next_byte == b'\\' {
|
|
||||||
return cold_branch(|| {
|
|
||||||
// SAFETY: `after_first` is position after consuming 1 byte, so subtracting 1
|
|
||||||
// makes `start_pos` `source`'s position as it was at start of this function
|
|
||||||
let start_pos = unsafe { after_first.sub(1) };
|
|
||||||
self.identifier_backslash(start_pos, false);
|
|
||||||
Kind::PrivateIdentifier
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
Kind::PrivateIdentifier
|
|
||||||
},
|
|
||||||
handle_eof: || {
|
|
||||||
Kind::PrivateIdentifier
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Found a matching byte.
|
||||||
|
// Either end of identifier found, or a Unicode char, or `\` escape.
|
||||||
|
// Handle uncommon cases in cold branches to keep the common ASCII path
|
||||||
|
// as fast as possible.
|
||||||
|
if !next_byte.is_ascii() {
|
||||||
|
return cold_branch(|| {
|
||||||
|
// SAFETY: `after_first` is position after consuming 1 byte, so subtracting 1
|
||||||
|
// makes `start_pos` `source`'s position as it was at start of this function
|
||||||
|
let start_pos = unsafe { after_first.sub(1) };
|
||||||
|
self.identifier_tail_unicode(start_pos);
|
||||||
|
Kind::PrivateIdentifier
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if next_byte == b'\\' {
|
||||||
|
return cold_branch(|| {
|
||||||
|
// SAFETY: `after_first` is position after consuming 1 byte, so subtracting 1
|
||||||
|
// makes `start_pos` `source`'s position as it was at start of this function
|
||||||
|
let start_pos = unsafe { after_first.sub(1) };
|
||||||
|
self.identifier_backslash(start_pos, false);
|
||||||
|
Kind::PrivateIdentifier
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
Kind::PrivateIdentifier
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Handle private identifier whose first byte is not an ASCII identifier start byte.
|
/// Handle private identifier whose first byte is not an ASCII identifier start byte.
|
||||||
|
|
|
||||||
|
|
@ -250,10 +250,9 @@ pub(crate) use safe_byte_match_table;
|
||||||
/// Search processes source in batches of `SEARCH_BATCH_SIZE` bytes for speed.
|
/// Search processes source in batches of `SEARCH_BATCH_SIZE` bytes for speed.
|
||||||
/// When not enough bytes remaining in source for a batch, search source byte by byte.
|
/// When not enough bytes remaining in source for a batch, search source byte by byte.
|
||||||
///
|
///
|
||||||
/// This is a macro rather than a function for 2 reasons:
|
/// This is a macro rather than a function because searching is a bit faster when all the code
|
||||||
/// 1. Searching is a bit faster when all the code is in a single function.
|
/// is in a single function, and some parts (e.g. `continue_if`) can be statically removed by
|
||||||
/// 2. The `handle_match` section has to be repeated twice.
|
/// the compiler if they're not used.
|
||||||
/// This macro does that, so code using the macro can be DRY-er.
|
|
||||||
///
|
///
|
||||||
/// Used as follows:
|
/// Used as follows:
|
||||||
///
|
///
|
||||||
|
|
@ -262,28 +261,24 @@ pub(crate) use safe_byte_match_table;
|
||||||
///
|
///
|
||||||
/// impl<'a> Lexer<'a> {
|
/// impl<'a> Lexer<'a> {
|
||||||
/// fn eat_stuff(&mut self) -> bool {
|
/// fn eat_stuff(&mut self) -> bool {
|
||||||
/// byte_search! {
|
/// let matched_byte = byte_search! {
|
||||||
/// lexer: self,
|
/// lexer: self,
|
||||||
/// table: NOT_STUFF_TABLE,
|
/// table: NOT_STUFF_TABLE,
|
||||||
/// handle_match: |matched_byte| {
|
/// handle_eof: {
|
||||||
/// // Matching byte has been found.
|
|
||||||
/// // `matched_byte` is `u8` value of first byte which matched the table.
|
|
||||||
/// // `lexer.source` is now positioned on first matching byte.
|
|
||||||
/// // Handle the next matching byte (deal with any special cases).
|
|
||||||
/// // Value this block evaluates to will be returned from enclosing function.
|
|
||||||
/// matched_byte == b'X'
|
|
||||||
/// },
|
|
||||||
/// handle_eof: || {
|
|
||||||
/// // No bytes from start position to end of source matched the table.
|
/// // No bytes from start position to end of source matched the table.
|
||||||
/// // `lexer.source` is now positioned at EOF.
|
/// // `lexer.source` is now positioned at EOF.
|
||||||
/// // Handle EOF in some way.
|
/// // Evaluate to a `u8` which macro call will evaluate to.
|
||||||
/// // Value this block evaluates to will be returned from enclosing function.
|
/// 0xFF
|
||||||
/// false
|
/// // Or can `return` from enclosing function e.g. `return false;`
|
||||||
/// },
|
/// },
|
||||||
/// };
|
/// };
|
||||||
///
|
///
|
||||||
/// // This is unreachable.
|
/// // Matching byte has been found.
|
||||||
/// // Macro always exits current function with a `return` statement.
|
/// // `matched_byte` is `u8` value of first byte which matched the table
|
||||||
|
/// // (or `0xFF` if EOF, because `handle_eof` evaluates to `0xFF`).
|
||||||
|
/// // `lexer.source` is now positioned on first matching byte.
|
||||||
|
/// // Handle the next matching byte (deal with any special cases).
|
||||||
|
/// matched_byte == b'X'
|
||||||
/// }
|
/// }
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
|
|
@ -294,79 +289,65 @@ pub(crate) use safe_byte_match_table;
|
||||||
/// impl<'a> Lexer<'a> {
|
/// impl<'a> Lexer<'a> {
|
||||||
/// fn eat_stuff(&mut self) -> bool {
|
/// fn eat_stuff(&mut self) -> bool {
|
||||||
/// let start = unsafe { self.source.position().add(1) };
|
/// let start = unsafe { self.source.position().add(1) };
|
||||||
/// byte_search! {
|
/// let matched_byte = byte_search! {
|
||||||
/// lexer: self,
|
/// lexer: self,
|
||||||
/// table: NOT_STUFF_TABLE,
|
/// table: NOT_STUFF_TABLE,
|
||||||
/// start: start,
|
/// start: start,
|
||||||
/// handle_match: |matched_byte| {
|
/// handle_eof: {
|
||||||
/// // Matching byte has been found.
|
|
||||||
/// // `matched_byte` is `u8` value of first byte which matched the table.
|
|
||||||
/// // `lexer.source` is now positioned on first matching byte.
|
|
||||||
/// // Handle the next matching byte (deal with any special cases).
|
|
||||||
/// // Value this block evaluates to will be returned from enclosing function.
|
|
||||||
/// true
|
|
||||||
/// },
|
|
||||||
/// handle_eof: || {
|
|
||||||
/// // No bytes from start position to end of source matched the table.
|
/// // No bytes from start position to end of source matched the table.
|
||||||
/// // `lexer.source` is now positioned at EOF.
|
/// // `lexer.source` is now positioned at EOF.
|
||||||
/// // Handle EOF in some way.
|
/// return false;
|
||||||
/// // Value this block evaluates to will be returned from enclosing function.
|
|
||||||
/// false
|
|
||||||
/// },
|
/// },
|
||||||
/// };
|
/// };
|
||||||
///
|
///
|
||||||
/// // This is unreachable.
|
/// // Matching byte has been found.
|
||||||
/// // Macro always exits current function with a `return` statement.
|
/// // `matched_byte` is `u8` value of first byte which matched the table.
|
||||||
|
/// // `lexer.source` is now positioned on first matching byte.
|
||||||
|
/// // Handle the next matching byte (deal with any special cases).
|
||||||
|
/// matched_byte == b'X'
|
||||||
/// }
|
/// }
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// Can also add a block to decide whether to continue searching for some matches:
|
/// Can also add a block to decide whether to continue searching for some matches:
|
||||||
///
|
///
|
||||||
/// ```text
|
/// ```
|
||||||
/// impl<'a> Lexer<'a> {
|
/// impl<'a> Lexer<'a> {
|
||||||
/// fn eat_stuff(&mut self) -> bool {
|
/// fn eat_stuff(&mut self) -> bool {
|
||||||
/// byte_search! {
|
/// let matched_byte = byte_search! {
|
||||||
/// lexer: self,
|
/// lexer: self,
|
||||||
/// table: NOT_STUFF_TABLE,
|
/// table: NOT_STUFF_TABLE,
|
||||||
/// continue_if: |matched_byte, pos| {
|
/// continue_if: (matched_byte, pos) {
|
||||||
/// // Matching byte found. Decide whether it's really a match.
|
/// // Matching byte found. Decide whether it's really a match.
|
||||||
|
/// // Return `true` to continue searching, or `false` to end search.
|
||||||
/// // NB: `lexer.source` has NOT been updated at this point.
|
/// // NB: `lexer.source` has NOT been updated at this point.
|
||||||
/// if matched_byte == 0xE2 {
|
/// if matched_byte == 0xE2 {
|
||||||
/// // Only match a specific Unicode char (in this case 0xE2, 0x80, 0xA8)
|
/// // Only match a specific Unicode char (in this case 0xE2, 0x80, 0xA8)
|
||||||
/// unsafe { pos.add(1).read() != 0x80 || pos.add(2).read() != 0xA8) }
|
/// // NB: We don't need to check if `pos` is at EOF here, as 0xE2 is always 1st byte
|
||||||
|
/// // of a 3-byte Unicode char, but if matching an ASCII char, would need to make sure
|
||||||
|
/// // don't read out of bounds.
|
||||||
|
/// unsafe { pos.add(1).read() != 0x80 || pos.add(2).read() != 0xA8 }
|
||||||
/// } else {
|
/// } else {
|
||||||
/// // All others do match. `handle_match` is executed.
|
/// // End search for all other possibilities
|
||||||
/// false
|
/// false
|
||||||
/// }
|
/// }
|
||||||
/// },
|
/// },
|
||||||
/// handle_match: |matched_byte| {
|
/// handle_eof: {
|
||||||
/// // Matching byte has been found and `continue_if` returned `false` for it.
|
|
||||||
/// // `matched_byte` is `u8` value of first byte which matched the table.
|
|
||||||
/// // `lexer.source` is now positioned on first matching byte.
|
|
||||||
/// // Handle the next matching byte (deal with any special cases).
|
|
||||||
/// // Value this block evaluates to will be returned from enclosing function.
|
|
||||||
/// true
|
|
||||||
/// },
|
|
||||||
/// handle_eof: || {
|
|
||||||
/// // No bytes from start position to end of source matched the table.
|
/// // No bytes from start position to end of source matched the table.
|
||||||
/// // `lexer.source` is now positioned at EOF.
|
/// // `lexer.source` is now positioned at EOF.
|
||||||
/// // Handle EOF in some way.
|
/// return false;
|
||||||
/// // Value this block evaluates to will be returned from enclosing function.
|
|
||||||
/// false
|
|
||||||
/// },
|
/// },
|
||||||
/// };
|
/// };
|
||||||
///
|
///
|
||||||
/// // This is unreachable.
|
/// // Matching byte has been found.
|
||||||
/// // Macro always exits current function with a `return` statement.
|
/// // `matched_byte` is `u8` value of first byte which matched the table.
|
||||||
|
/// // `lexer.source` is now positioned on first matching byte.
|
||||||
|
/// // Handle the next matching byte (deal with any special cases).
|
||||||
|
/// matched_byte == b'X'
|
||||||
/// }
|
/// }
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
///
|
///
|
||||||
/// NB: The macro always causes enclosing function to return.
|
|
||||||
/// It creates `return` statements with the value that `handle_match` / `handle_eof` blocks evaluate to.
|
|
||||||
/// After the `byte_search!` macro is unreachable.
|
|
||||||
///
|
|
||||||
/// # SAFETY
|
/// # SAFETY
|
||||||
///
|
///
|
||||||
/// This macro will consume bytes from `lexer.source` according to the `ByteMatchTable`
|
/// This macro will consume bytes from `lexer.source` according to the `ByteMatchTable`
|
||||||
|
|
@ -380,71 +361,64 @@ pub(crate) use safe_byte_match_table;
|
||||||
/// It is caller's responsibility to ensure that `lexer.source` is moved onto a UTF-8 character boundary.
|
/// It is caller's responsibility to ensure that `lexer.source` is moved onto a UTF-8 character boundary.
|
||||||
/// This is similar to the contract's of `Source`'s unsafe methods.
|
/// This is similar to the contract's of `Source`'s unsafe methods.
|
||||||
macro_rules! byte_search {
|
macro_rules! byte_search {
|
||||||
// Standard version.
|
// Simple version.
|
||||||
// `start` is calculated from current position of `lexer.source`.
|
// `start` is calculated from current position of `lexer.source`.
|
||||||
(
|
(
|
||||||
lexer: $lexer:ident,
|
lexer: $lexer:ident,
|
||||||
table: $table:ident,
|
table: $table:ident,
|
||||||
handle_match: |$match_byte:ident| $match_handler:expr,
|
handle_eof: $eof_handler:expr,
|
||||||
handle_eof: || $eof_handler:expr,
|
|
||||||
) => {{
|
) => {{
|
||||||
let start = $lexer.source.position();
|
let start = $lexer.source.position();
|
||||||
byte_search! {
|
byte_search! {
|
||||||
lexer: $lexer,
|
lexer: $lexer,
|
||||||
table: $table,
|
table: $table,
|
||||||
start: start,
|
start: start,
|
||||||
continue_if: |byte, pos| false,
|
continue_if: (byte, pos) false,
|
||||||
handle_match: |$match_byte| $match_handler,
|
handle_eof: $eof_handler,
|
||||||
handle_eof: || $eof_handler,
|
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
|
|
||||||
// Standard version with `continue_if`.
|
// With `continue_if`.
|
||||||
// `start` is calculated from current position of `lexer.source`.
|
// `start` is calculated from current position of `lexer.source`.
|
||||||
(
|
(
|
||||||
lexer: $lexer:ident,
|
lexer: $lexer:ident,
|
||||||
table: $table:ident,
|
table: $table:ident,
|
||||||
continue_if: |$continue_byte:ident, $pos:ident| $should_continue:expr,
|
continue_if: ($byte:ident, $pos:ident) $should_continue:expr,
|
||||||
handle_match: |$match_byte:ident| $match_handler:expr,
|
handle_eof: $eof_handler:expr,
|
||||||
handle_eof: || $eof_handler:expr,
|
|
||||||
) => {{
|
) => {{
|
||||||
let start = $lexer.source.position();
|
let start = $lexer.source.position();
|
||||||
byte_search! {
|
byte_search! {
|
||||||
lexer: $lexer,
|
lexer: $lexer,
|
||||||
table: $table,
|
table: $table,
|
||||||
start: start,
|
start: start,
|
||||||
continue_if: |$continue_byte, $pos| $should_continue,
|
continue_if: ($byte, $pos) $should_continue,
|
||||||
handle_match: |$match_byte| $match_handler,
|
handle_eof: $eof_handler,
|
||||||
handle_eof: || $eof_handler,
|
|
||||||
}
|
}
|
||||||
}};
|
}};
|
||||||
|
|
||||||
// Provide your own `start` position
|
// With provided `start` position
|
||||||
(
|
(
|
||||||
lexer: $lexer:ident,
|
lexer: $lexer:ident,
|
||||||
table: $table:ident,
|
table: $table:ident,
|
||||||
start: $start:ident,
|
start: $start:ident,
|
||||||
handle_match: |$match_byte:ident| $match_handler:expr,
|
handle_eof: $eof_handler:expr,
|
||||||
handle_eof: || $eof_handler:expr,
|
|
||||||
) => {
|
) => {
|
||||||
byte_search! {
|
byte_search! {
|
||||||
lexer: $lexer,
|
lexer: $lexer,
|
||||||
table: $table,
|
table: $table,
|
||||||
start: $start,
|
start: $start,
|
||||||
continue_if: |byte, pos| false,
|
continue_if: (byte, pos) false,
|
||||||
handle_match: |$match_byte| $match_handler,
|
handle_eof: $eof_handler,
|
||||||
handle_eof: || $eof_handler,
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// Actual implementation
|
// Actual implementation - with both `start` and `continue_if`
|
||||||
(
|
(
|
||||||
lexer: $lexer:ident,
|
lexer: $lexer:ident,
|
||||||
table: $table:ident,
|
table: $table:ident,
|
||||||
start: $start:ident,
|
start: $start:ident,
|
||||||
continue_if: |$continue_byte:ident, $pos:ident| $should_continue:expr,
|
continue_if: ($byte:ident, $pos:ident) $should_continue:expr,
|
||||||
handle_match: |$match_byte:ident| $match_handler:expr,
|
handle_eof: $eof_handler:expr,
|
||||||
handle_eof: || $eof_handler:expr,
|
|
||||||
) => {{
|
) => {{
|
||||||
// SAFETY:
|
// SAFETY:
|
||||||
// If `$table` is a `SafeByteMatchTable`, it's guaranteed that `lexer.source`
|
// If `$table` is a `SafeByteMatchTable`, it's guaranteed that `lexer.source`
|
||||||
|
|
@ -458,8 +432,8 @@ macro_rules! byte_search {
|
||||||
|
|
||||||
let mut $pos = $start;
|
let mut $pos = $start;
|
||||||
#[allow(unused_unsafe)] // Silence warnings if macro called in unsafe code
|
#[allow(unused_unsafe)] // Silence warnings if macro called in unsafe code
|
||||||
let $match_byte = 'outer: loop {
|
'outer: loop {
|
||||||
let $continue_byte = if $pos.addr() <= $lexer.source.end_for_batch_search_addr() {
|
let $byte = if $pos.addr() <= $lexer.source.end_for_batch_search_addr() {
|
||||||
// Search a batch of `SEARCH_BATCH_SIZE` bytes.
|
// Search a batch of `SEARCH_BATCH_SIZE` bytes.
|
||||||
//
|
//
|
||||||
// `'inner: loop {}` is not a real loop - it always exits on first turn.
|
// `'inner: loop {}` is not a real loop - it always exits on first turn.
|
||||||
|
|
@ -510,7 +484,12 @@ macro_rules! byte_search {
|
||||||
// Advance `lexer.source`'s position to end of file.
|
// Advance `lexer.source`'s position to end of file.
|
||||||
$lexer.source.set_position($pos);
|
$lexer.source.set_position($pos);
|
||||||
|
|
||||||
return $eof_handler;
|
// Avoid lint errors if `$eof_handler` contains `return` statement
|
||||||
|
#[allow(unused_variables, unreachable_code, clippy::diverging_sub_expression)]
|
||||||
|
{
|
||||||
|
let eof_ret = $eof_handler;
|
||||||
|
break 'outer eof_ret;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -523,15 +502,13 @@ macro_rules! byte_search {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Match confirmed
|
// Match confirmed.
|
||||||
break $continue_byte;
|
// Advance `lexer.source`'s position up to `$pos`, consuming unmatched bytes.
|
||||||
};
|
// SAFETY: See above about UTF-8 character boundaries invariant.
|
||||||
|
$lexer.source.set_position($pos);
|
||||||
|
|
||||||
// Advance `lexer.source`'s position up to `$pos`, consuming unmatched bytes.
|
break $byte;
|
||||||
// SAFETY: See above about UTF-8 character boundaries invariant.
|
}
|
||||||
$lexer.source.set_position($pos);
|
|
||||||
|
|
||||||
return $match_handler;
|
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
pub(crate) use byte_search;
|
pub(crate) use byte_search;
|
||||||
|
|
|
||||||
|
|
@ -37,43 +37,38 @@ macro_rules! handle_string_literal {
|
||||||
let after_opening_quote = $lexer.source.position().add(1);
|
let after_opening_quote = $lexer.source.position().add(1);
|
||||||
|
|
||||||
// Consume bytes which are part of identifier
|
// Consume bytes which are part of identifier
|
||||||
byte_search! {
|
let next_byte = byte_search! {
|
||||||
lexer: $lexer,
|
lexer: $lexer,
|
||||||
table: $table,
|
table: $table,
|
||||||
start: after_opening_quote,
|
start: after_opening_quote,
|
||||||
handle_match: |next_byte| {
|
handle_eof: {
|
||||||
// Found a matching byte.
|
|
||||||
// Either end of string found, or a line break, or `\` escape.
|
|
||||||
match next_byte {
|
|
||||||
$delimiter => {
|
|
||||||
// SAFETY: `handle_match` is only called if there's a byte to consume,
|
|
||||||
// and `next_byte` is the next byte in `lexer.source`.
|
|
||||||
// Macro user guarantees delimiter is ASCII, so consuming it cannot move
|
|
||||||
// `lexer.source` off a UTF-8 character boundary.
|
|
||||||
$lexer.source.next_byte_unchecked();
|
|
||||||
Kind::Str
|
|
||||||
},
|
|
||||||
b'\\' => {
|
|
||||||
cold_branch(|| {
|
|
||||||
handle_string_literal_escape!($lexer, $delimiter, $table, after_opening_quote)
|
|
||||||
})
|
|
||||||
},
|
|
||||||
_ => {
|
|
||||||
// Line break. This is impossible in valid JS, so cold path.
|
|
||||||
cold_branch(|| {
|
|
||||||
debug_assert!(matches!(next_byte, b'\r' | b'\n'));
|
|
||||||
$lexer.consume_char();
|
|
||||||
$lexer.error(diagnostics::UnterminatedString($lexer.unterminated_range()));
|
|
||||||
Kind::Undetermined
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
},
|
|
||||||
handle_eof: || {
|
|
||||||
$lexer.error(diagnostics::UnterminatedString($lexer.unterminated_range()));
|
$lexer.error(diagnostics::UnterminatedString($lexer.unterminated_range()));
|
||||||
Kind::Undetermined
|
return Kind::Undetermined;
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Found a matching byte.
|
||||||
|
// Either end of string found, or a line break, or `\` escape.
|
||||||
|
match next_byte {
|
||||||
|
$delimiter => {
|
||||||
|
// SAFETY: Macro user guarantees delimiter is ASCII, so consuming it cannot move
|
||||||
|
// `lexer.source` off a UTF-8 character boundary.
|
||||||
|
$lexer.source.next_byte_unchecked();
|
||||||
|
Kind::Str
|
||||||
|
}
|
||||||
|
b'\\' => cold_branch(|| {
|
||||||
|
handle_string_literal_escape!($lexer, $delimiter, $table, after_opening_quote)
|
||||||
|
}),
|
||||||
|
_ => {
|
||||||
|
// Line break. This is impossible in valid JS, so cold path.
|
||||||
|
cold_branch(|| {
|
||||||
|
debug_assert!(matches!(next_byte, b'\r' | b'\n'));
|
||||||
|
$lexer.consume_char();
|
||||||
|
$lexer.error(diagnostics::UnterminatedString($lexer.unterminated_range()));
|
||||||
|
Kind::Undetermined
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
}};
|
}};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ impl<'a> Lexer<'a> {
|
||||||
byte_search! {
|
byte_search! {
|
||||||
lexer: self,
|
lexer: self,
|
||||||
table: TEMPLATE_LITERAL_TABLE,
|
table: TEMPLATE_LITERAL_TABLE,
|
||||||
continue_if: |next_byte, pos| {
|
continue_if: (next_byte, pos) {
|
||||||
match next_byte {
|
match next_byte {
|
||||||
b'$' => {
|
b'$' => {
|
||||||
// SAFETY: Next byte is `$` which is ASCII, so after it is a UTF-8 char boundary
|
// SAFETY: Next byte is `$` which is ASCII, so after it is a UTF-8 char boundary
|
||||||
|
|
@ -73,14 +73,13 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
handle_match: |_next_byte| {
|
handle_eof: {
|
||||||
ret
|
|
||||||
},
|
|
||||||
handle_eof: || {
|
|
||||||
self.error(diagnostics::UnterminatedString(self.unterminated_range()));
|
self.error(diagnostics::UnterminatedString(self.unterminated_range()));
|
||||||
Kind::Undetermined
|
return Kind::Undetermined;
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
ret
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Consume rest of template literal after a `\r` is found.
|
/// Consume rest of template literal after a `\r` is found.
|
||||||
|
|
@ -195,7 +194,7 @@ impl<'a> Lexer<'a> {
|
||||||
lexer: self,
|
lexer: self,
|
||||||
table: TEMPLATE_LITERAL_TABLE,
|
table: TEMPLATE_LITERAL_TABLE,
|
||||||
start: pos,
|
start: pos,
|
||||||
continue_if: |next_byte, pos| {
|
continue_if: (next_byte, pos) {
|
||||||
if next_byte == b'$' {
|
if next_byte == b'$' {
|
||||||
// SAFETY: Next byte is `$` which is ASCII, so after it is a UTF-8 char boundary
|
// SAFETY: Next byte is `$` which is ASCII, so after it is a UTF-8 char boundary
|
||||||
let after_dollar = pos.add(1);
|
let after_dollar = pos.add(1);
|
||||||
|
|
@ -295,18 +294,15 @@ impl<'a> Lexer<'a> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
handle_match: |_next_byte| {
|
handle_eof: {
|
||||||
self.save_template_string(
|
|
||||||
is_valid_escape_sequence,
|
|
||||||
str.into_bump_str(),
|
|
||||||
);
|
|
||||||
ret
|
|
||||||
},
|
|
||||||
handle_eof: || {
|
|
||||||
self.error(diagnostics::UnterminatedString(self.unterminated_range()));
|
self.error(diagnostics::UnterminatedString(self.unterminated_range()));
|
||||||
Kind::Undetermined
|
return Kind::Undetermined;
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
self.save_template_string(is_valid_escape_sequence, str.into_bump_str());
|
||||||
|
|
||||||
|
ret
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Re-tokenize the current `}` token for `TemplateSubstitutionTail`
|
/// Re-tokenize the current `}` token for `TemplateSubstitutionTail`
|
||||||
|
|
|
||||||
|
|
@ -17,12 +17,9 @@ impl<'a> Lexer<'a> {
|
||||||
byte_search! {
|
byte_search! {
|
||||||
lexer: self,
|
lexer: self,
|
||||||
table: NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE,
|
table: NOT_REGULAR_WHITESPACE_OR_LINE_BREAK_TABLE,
|
||||||
handle_match: |_next_byte| {
|
handle_eof: 0, // Fall through to below
|
||||||
Kind::Skip
|
|
||||||
},
|
|
||||||
handle_eof: || {
|
|
||||||
Kind::Skip
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
Kind::Skip
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue