mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 04:08:41 +00:00
refactor(lexer): tighten safety of lexer by always including lifetime on SourcePosition (#8293)
Always use `SourcePosition<'a>` not `SourcePosition`, to ensure not reading from a `Source` with a `SourcePosition` which is from *another* `Source`. This doesn't definitively guard against UB - that is the job of `UniquePromise` - but it adds another level of defence.
This commit is contained in:
parent
16dcdaf6cd
commit
64bfdfe223
4 changed files with 23 additions and 14 deletions
|
|
@ -22,7 +22,7 @@ static LINE_BREAK_TABLE: SafeByteMatchTable =
|
|||
static MULTILINE_COMMENT_START_TABLE: SafeByteMatchTable =
|
||||
safe_byte_match_table!(|b| matches!(b, b'*' | b'\r' | b'\n' | LS_OR_PS_FIRST));
|
||||
|
||||
impl Lexer<'_> {
|
||||
impl<'a> Lexer<'a> {
|
||||
/// Section 12.4 Single Line Comment
|
||||
pub(super) fn skip_single_line_comment(&mut self) -> Kind {
|
||||
byte_search! {
|
||||
|
|
@ -151,7 +151,7 @@ impl Lexer<'_> {
|
|||
Kind::Skip
|
||||
}
|
||||
|
||||
fn skip_multi_line_comment_after_line_break(&mut self, pos: SourcePosition) -> Kind {
|
||||
fn skip_multi_line_comment_after_line_break(&mut self, pos: SourcePosition<'a>) -> Kind {
|
||||
// Can use `memchr` here as only searching for 1 pattern.
|
||||
// Cache `Finder` instance on `Lexer` as there's a significant cost to creating it.
|
||||
// `Finder::new` isn't a const function, so can't make it a `static`, and `lazy_static!`
|
||||
|
|
|
|||
|
|
@ -98,7 +98,7 @@ impl<'a> Lexer<'a> {
|
|||
/// Handle rest of identifier after first byte of a multi-byte Unicode char found.
|
||||
/// Any number of characters can have already been consumed from `self.source` prior to it.
|
||||
/// `self.source` should be positioned at start of Unicode character.
|
||||
fn identifier_tail_unicode(&mut self, start_pos: SourcePosition) -> &'a str {
|
||||
fn identifier_tail_unicode(&mut self, start_pos: SourcePosition<'a>) -> &'a str {
|
||||
let c = self.peek_char().unwrap();
|
||||
if is_identifier_part_unicode(c) {
|
||||
self.consume_char();
|
||||
|
|
@ -113,7 +113,10 @@ impl<'a> Lexer<'a> {
|
|||
///
|
||||
/// First char should have been consumed from `self.source` prior to calling this.
|
||||
/// `start_pos` should be position of the start of the identifier (before first char was consumed).
|
||||
pub(super) fn identifier_tail_after_unicode(&mut self, start_pos: SourcePosition) -> &'a str {
|
||||
pub(super) fn identifier_tail_after_unicode(
|
||||
&mut self,
|
||||
start_pos: SourcePosition<'a>,
|
||||
) -> &'a str {
|
||||
// Identifier contains a Unicode chars, so probably contains more.
|
||||
// So just iterate over chars now, instead of bytes.
|
||||
while let Some(c) = self.peek_char() {
|
||||
|
|
@ -146,7 +149,7 @@ impl<'a> Lexer<'a> {
|
|||
///
|
||||
/// The `\` must not have be consumed from `lexer.source`.
|
||||
/// `start_pos` must be position of start of identifier.
|
||||
fn identifier_backslash(&mut self, start_pos: SourcePosition, is_start: bool) -> &'a str {
|
||||
fn identifier_backslash(&mut self, start_pos: SourcePosition<'a>, is_start: bool) -> &'a str {
|
||||
// Create arena string to hold unescaped identifier.
|
||||
// We don't know how long identifier will end up being. Take a guess that total length
|
||||
// will be double what we've seen so far, or `MIN_ESCAPED_STR_LEN` minimum.
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ impl<'a> Source<'a> {
|
|||
|
||||
/// Move current position.
|
||||
#[inline]
|
||||
pub(super) fn set_position(&mut self, pos: SourcePosition) {
|
||||
pub(super) fn set_position(&mut self, pos: SourcePosition<'a>) {
|
||||
// `SourcePosition` always upholds the invariants of `Source`, as long as it's created
|
||||
// from this `Source`. `SourcePosition`s can only be created from a `Source`.
|
||||
// `Source::new` takes a `UniquePromise`, which guarantees that it's the only `Source`
|
||||
|
|
@ -216,7 +216,7 @@ impl<'a> Source<'a> {
|
|||
}
|
||||
|
||||
/// Get string slice from a `SourcePosition` up to the current position of `Source`.
|
||||
pub(super) fn str_from_pos_to_current(&self, pos: SourcePosition) -> &'a str {
|
||||
pub(super) fn str_from_pos_to_current(&self, pos: SourcePosition<'a>) -> &'a str {
|
||||
assert!(pos.ptr <= self.ptr);
|
||||
// SAFETY: The above assertion satisfies `str_from_pos_to_current_unchecked`'s requirements
|
||||
unsafe { self.str_from_pos_to_current_unchecked(pos) }
|
||||
|
|
@ -230,7 +230,10 @@ impl<'a> Source<'a> {
|
|||
/// 1. `Source::set_position` has not been called since `pos` was created.
|
||||
/// 2. `pos` has not been advanced with `SourcePosition::add`.
|
||||
#[inline]
|
||||
pub(super) unsafe fn str_from_pos_to_current_unchecked(&self, pos: SourcePosition) -> &'a str {
|
||||
pub(super) unsafe fn str_from_pos_to_current_unchecked(
|
||||
&self,
|
||||
pos: SourcePosition<'a>,
|
||||
) -> &'a str {
|
||||
// SAFETY: Caller guarantees `pos` is not after current position of `Source`.
|
||||
// `self.ptr` is always a valid `SourcePosition` due to invariants of `Source`.
|
||||
self.str_between_positions_unchecked(pos, SourcePosition::new(self.ptr))
|
||||
|
|
@ -244,7 +247,10 @@ impl<'a> Source<'a> {
|
|||
/// 1. `Source::set_position` has not been called since `pos` was created.
|
||||
/// 2. `pos` has not been moved backwards with `SourcePosition::sub`.
|
||||
#[inline]
|
||||
pub(super) unsafe fn str_from_current_to_pos_unchecked(&self, pos: SourcePosition) -> &'a str {
|
||||
pub(super) unsafe fn str_from_current_to_pos_unchecked(
|
||||
&self,
|
||||
pos: SourcePosition<'a>,
|
||||
) -> &'a str {
|
||||
// SAFETY: Caller guarantees `pos` is not before current position of `Source`.
|
||||
// `self.ptr` is always a valid `SourcePosition` due to invariants of `Source`.
|
||||
self.str_between_positions_unchecked(SourcePosition::new(self.ptr), pos)
|
||||
|
|
@ -252,7 +258,7 @@ impl<'a> Source<'a> {
|
|||
|
||||
/// Get string slice from a `SourcePosition` up to the end of `Source`.
|
||||
#[inline]
|
||||
pub(super) fn str_from_pos_to_end(&self, pos: SourcePosition) -> &'a str {
|
||||
pub(super) fn str_from_pos_to_end(&self, pos: SourcePosition<'a>) -> &'a str {
|
||||
// SAFETY: Invariants of `SourcePosition` is that it cannot be after end of `Source`,
|
||||
// and always on a UTF-8 character boundary.
|
||||
// `self.end` is always a valid `SourcePosition` due to invariants of `Source`.
|
||||
|
|
@ -266,8 +272,8 @@ impl<'a> Source<'a> {
|
|||
#[inline]
|
||||
pub(super) unsafe fn str_between_positions_unchecked(
|
||||
&self,
|
||||
start: SourcePosition,
|
||||
end: SourcePosition,
|
||||
start: SourcePosition<'a>,
|
||||
end: SourcePosition<'a>,
|
||||
) -> &'a str {
|
||||
// Check `start` is not after `end`
|
||||
debug_assert!(start.ptr <= end.ptr);
|
||||
|
|
@ -304,7 +310,7 @@ impl<'a> Source<'a> {
|
|||
/// Get offset of `pos`.
|
||||
#[expect(clippy::cast_possible_truncation)]
|
||||
#[inline]
|
||||
pub(super) fn offset_of(&self, pos: SourcePosition) -> u32 {
|
||||
pub(super) fn offset_of(&self, pos: SourcePosition<'a>) -> u32 {
|
||||
// Cannot overflow `u32` because of `MAX_LEN` check in `Source::new`
|
||||
(pos.addr() - self.start as usize) as u32
|
||||
}
|
||||
|
|
|
|||
|
|
@ -167,7 +167,7 @@ impl<'a> Lexer<'a> {
|
|||
/// # SAFETY
|
||||
/// `pos` must not be before `self.source.position()`
|
||||
#[expect(clippy::unnecessary_safety_comment)]
|
||||
unsafe fn template_literal_create_string(&self, pos: SourcePosition) -> String<'a> {
|
||||
unsafe fn template_literal_create_string(&self, pos: SourcePosition<'a>) -> String<'a> {
|
||||
// Create arena string to hold modified template literal.
|
||||
// We don't know how long template literal will end up being. Take a guess that total length
|
||||
// will be double what we've seen so far, or `MIN_ESCAPED_TEMPLATE_LIT_LEN` minimum.
|
||||
|
|
|
|||
Loading…
Reference in a new issue