mirror of
https://github.com/danbulant/oxc
synced 2026-05-22 13:48:55 +00:00
398 lines
16 KiB
Rust
398 lines
16 KiB
Rust
use std::sync::Arc;
|
|
|
|
use oxc_span::Span;
|
|
use oxc_syntax::identifier::{LS, PS};
|
|
|
|
// Irregular line breaks - '\u{2028}' (LS) and '\u{2029}' (PS)
|
|
const LS_OR_PS_FIRST: u8 = 0xE2;
|
|
const LS_OR_PS_SECOND: u8 = 0x80;
|
|
const LS_THIRD: u8 = 0xA8;
|
|
const PS_THIRD: u8 = 0xA9;
|
|
|
|
/// Line offset table
|
|
///
|
|
/// Used for tracking lines and columns from byte offsets via binary search.
|
|
///
|
|
/// Code is adapted from [esbuild](https://github.com/evanw/esbuild/blob/cc74e6042a9f573bf58e1e3f165ebda70af4ad3b/internal/js_printer/js_printer.go#L4806-L4808)
|
|
#[derive(Debug)]
|
|
pub struct LineOffsetTable {
|
|
columns: Option<Vec<usize>>,
|
|
byte_offset_to_first: usize,
|
|
byte_offset_to_start_of_line: usize,
|
|
}
|
|
|
|
#[allow(clippy::struct_field_names)]
|
|
pub struct SourcemapBuilder {
|
|
source_id: u32,
|
|
original_source: Arc<str>,
|
|
last_generated_update: usize,
|
|
last_position: Option<u32>,
|
|
last_search_line: usize,
|
|
line_offset_tables: Vec<LineOffsetTable>,
|
|
sourcemap_builder: oxc_sourcemap::SourceMapBuilder,
|
|
generated_line: u32,
|
|
generated_column: u32,
|
|
}
|
|
|
|
impl Default for SourcemapBuilder {
|
|
fn default() -> Self {
|
|
Self {
|
|
source_id: 0,
|
|
original_source: "".into(),
|
|
last_generated_update: 0,
|
|
last_position: None,
|
|
last_search_line: 0,
|
|
line_offset_tables: vec![],
|
|
sourcemap_builder: oxc_sourcemap::SourceMapBuilder::default(),
|
|
generated_line: 0,
|
|
generated_column: 0,
|
|
}
|
|
}
|
|
}
|
|
|
|
impl SourcemapBuilder {
|
|
pub fn with_name_and_source(&mut self, name: &str, source: &str) {
|
|
self.line_offset_tables = Self::generate_line_offset_tables(source);
|
|
self.source_id = self.sourcemap_builder.set_source_and_content(name, source);
|
|
self.original_source = source.into();
|
|
}
|
|
|
|
pub fn into_sourcemap(self) -> oxc_sourcemap::SourceMap {
|
|
self.sourcemap_builder.into_sourcemap()
|
|
}
|
|
|
|
pub fn add_source_mapping_for_name(&mut self, output: &[u8], span: Span, name: &str) {
|
|
// SAFETY: search original string by span.
|
|
let original_name =
|
|
unsafe { self.original_source.get_unchecked(span.start as usize..span.end as usize) };
|
|
// The token name should be original name.
|
|
// If it hasn't change, name should be `None` to reduce `SourceMap` size.
|
|
let token_name = if original_name == name { None } else { Some(original_name.into()) };
|
|
self.add_source_mapping(output, span.start, token_name);
|
|
}
|
|
|
|
pub fn add_source_mapping(&mut self, output: &[u8], position: u32, name: Option<Arc<str>>) {
|
|
if matches!(self.last_position, Some(last_position) if last_position >= position) {
|
|
return;
|
|
}
|
|
let (original_line, original_column) = self.search_original_line_and_column(position);
|
|
self.update_generated_line_and_column(output);
|
|
let name_id = name.map(|s| self.sourcemap_builder.add_name(&s));
|
|
self.sourcemap_builder.add_token(
|
|
self.generated_line,
|
|
self.generated_column,
|
|
original_line,
|
|
original_column,
|
|
Some(self.source_id),
|
|
name_id,
|
|
);
|
|
self.last_position = Some(position);
|
|
}
|
|
|
|
#[allow(clippy::cast_possible_truncation)]
|
|
fn search_original_line_and_column(&mut self, position: u32) -> (u32, u32) {
|
|
let mut original_line = 0;
|
|
for i in self.last_search_line..self.line_offset_tables.len() {
|
|
if self.line_offset_tables[i].byte_offset_to_start_of_line > position as usize {
|
|
original_line = i - 1;
|
|
break;
|
|
}
|
|
if i == self.line_offset_tables.len() - 1 {
|
|
original_line = i;
|
|
}
|
|
}
|
|
self.last_search_line = original_line;
|
|
let line = &self.line_offset_tables[original_line];
|
|
let mut original_column = (position as usize) - line.byte_offset_to_start_of_line;
|
|
if original_column >= line.byte_offset_to_first {
|
|
if let Some(cols) = &line.columns {
|
|
original_column = cols[original_column - line.byte_offset_to_first];
|
|
}
|
|
}
|
|
(original_line as u32, original_column as u32)
|
|
}
|
|
|
|
#[allow(clippy::cast_possible_truncation)]
|
|
fn update_generated_line_and_column(&mut self, output: &[u8]) {
|
|
let remaining = &output[self.last_generated_update..];
|
|
|
|
// Find last line break
|
|
let mut line_start_ptr = remaining.as_ptr();
|
|
let mut last_line_is_ascii = true;
|
|
let mut iter = remaining.iter();
|
|
while let Some(&b) = iter.next() {
|
|
match b {
|
|
b'\n' => {}
|
|
b'\r' => {
|
|
// Handle Windows-specific "\r\n" newlines
|
|
if iter.clone().next() == Some(&b'\n') {
|
|
iter.next();
|
|
}
|
|
}
|
|
_ if b.is_ascii() => {
|
|
continue;
|
|
}
|
|
LS_OR_PS_FIRST => {
|
|
let next_byte = *iter.next().unwrap();
|
|
let next_next_byte = *iter.next().unwrap();
|
|
if next_byte != LS_OR_PS_SECOND
|
|
|| !matches!(next_next_byte, LS_THIRD | PS_THIRD)
|
|
{
|
|
last_line_is_ascii = false;
|
|
continue;
|
|
}
|
|
}
|
|
_ => {
|
|
// Unicode char
|
|
last_line_is_ascii = false;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Line break found.
|
|
// `iter` is now positioned after line break.
|
|
line_start_ptr = iter.as_slice().as_ptr();
|
|
self.generated_line += 1;
|
|
self.generated_column = 0;
|
|
last_line_is_ascii = true;
|
|
}
|
|
|
|
// Calculate column
|
|
self.generated_column += if last_line_is_ascii {
|
|
// `iter` is now exhausted, so `iter.as_slice().as_ptr()` is pointer to end of `output`
|
|
(iter.as_slice().as_ptr() as usize - line_start_ptr as usize) as u32
|
|
} else {
|
|
let line_byte_offset = line_start_ptr as usize - remaining.as_ptr() as usize;
|
|
// TODO: It'd be better if could use `from_utf8_unchecked` here, but we'd need to make this
|
|
// function unsafe and caller guarantees `output` contains a valid UTF-8 string
|
|
let last_line = std::str::from_utf8(&remaining[line_byte_offset..]).unwrap();
|
|
// Mozilla's "source-map" library counts columns using UTF-16 code units
|
|
last_line.encode_utf16().count() as u32
|
|
};
|
|
self.last_generated_update = output.len();
|
|
}
|
|
|
|
fn generate_line_offset_tables(content: &str) -> Vec<LineOffsetTable> {
|
|
let mut tables = vec![];
|
|
|
|
// Process content line-by-line.
|
|
// For each line, start by assuming line will be entirely ASCII, and read byte-by-byte.
|
|
// If line is all ASCII, UTF-8 columns and UTF-16 columns are the same,
|
|
// so no need to create a `columns` Vec. This is the fast path for common case.
|
|
// If a Unicode character found, read rest of line char-by-char, populating `columns` Vec.
|
|
// At end of line, go back to top of outer loop, and again assume ASCII for next line.
|
|
let mut line_byte_offset = 0;
|
|
'lines: loop {
|
|
tables.push(LineOffsetTable {
|
|
columns: None,
|
|
// `usize::MAX` so `original_column >= line.byte_offset_to_first` check in
|
|
// `search_original_line_and_column` fails if line is all ASCII
|
|
byte_offset_to_first: usize::MAX,
|
|
byte_offset_to_start_of_line: line_byte_offset,
|
|
});
|
|
|
|
let remaining = &content.as_bytes()[line_byte_offset..];
|
|
for (mut byte_offset_from_line_start, b) in remaining.iter().enumerate() {
|
|
match b {
|
|
b'\n' => {
|
|
byte_offset_from_line_start += 1;
|
|
}
|
|
b'\r' => {
|
|
byte_offset_from_line_start += 1;
|
|
// Handle Windows-specific "\r\n" newlines
|
|
if remaining.get(byte_offset_from_line_start) == Some(&b'\n') {
|
|
byte_offset_from_line_start += 1;
|
|
}
|
|
}
|
|
_ if b.is_ascii() => {
|
|
continue;
|
|
}
|
|
_ => {
|
|
// Unicode char found.
|
|
// Create `columns` Vec, and set `byte_offset_to_first`.
|
|
let table = tables.iter_mut().last().unwrap();
|
|
table.byte_offset_to_first = byte_offset_from_line_start;
|
|
table.columns = Some(vec![]);
|
|
let columns = table.columns.as_mut().unwrap();
|
|
|
|
// Loop through rest of line char-by-char.
|
|
// `chunk_byte_offset` in this loop is byte offset from start of this 1st
|
|
// Unicode char.
|
|
let mut column = byte_offset_from_line_start;
|
|
line_byte_offset += byte_offset_from_line_start;
|
|
let remaining = &content[line_byte_offset..];
|
|
for (mut chunk_byte_offset, ch) in remaining.char_indices() {
|
|
for _ in 0..ch.len_utf8() {
|
|
columns.push(column);
|
|
}
|
|
|
|
match ch {
|
|
'\r' => {
|
|
// Handle Windows-specific "\r\n" newlines
|
|
chunk_byte_offset += 1;
|
|
if remaining.as_bytes().get(chunk_byte_offset) == Some(&b'\n') {
|
|
chunk_byte_offset += 1;
|
|
columns.push(column + 1);
|
|
}
|
|
}
|
|
'\n' => {
|
|
chunk_byte_offset += 1;
|
|
}
|
|
LS | PS => {
|
|
chunk_byte_offset += 3;
|
|
}
|
|
_ => {
|
|
// Mozilla's "source-map" library counts columns using UTF-16 code units
|
|
column += ch.len_utf16();
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Line break found.
|
|
// `chunk_byte_offset` is now the offset of *end* of the line break.
|
|
line_byte_offset += chunk_byte_offset;
|
|
// Revert back to outer loop for next line
|
|
continue 'lines;
|
|
}
|
|
|
|
// EOF.
|
|
// One last column entry for EOF position.
|
|
columns.push(column);
|
|
break 'lines;
|
|
}
|
|
};
|
|
|
|
// Line break found.
|
|
// `byte_offset_from_line_start` is now the length of line *including* line break.
|
|
line_byte_offset += byte_offset_from_line_start;
|
|
continue 'lines;
|
|
}
|
|
|
|
// EOF
|
|
break;
|
|
}
|
|
|
|
tables
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn builder_ascii() {
|
|
assert_mapping("", &[(0, 0, 0)]);
|
|
assert_mapping("a", &[(0, 0, 0), (1, 0, 1)]);
|
|
assert_mapping("\n", &[(0, 0, 0), (1, 1, 0)]);
|
|
assert_mapping("a\n", &[(0, 0, 0), (1, 0, 1), (2, 1, 0)]);
|
|
assert_mapping("\na", &[(0, 0, 0), (1, 1, 0), (2, 1, 1)]);
|
|
assert_mapping(
|
|
"ab\ncd\n\nef",
|
|
&[
|
|
(0, 0, 0),
|
|
(1, 0, 1),
|
|
(2, 0, 2),
|
|
(3, 1, 0),
|
|
(4, 1, 1),
|
|
(5, 1, 2),
|
|
(6, 2, 0),
|
|
(7, 3, 0),
|
|
(8, 3, 1),
|
|
(9, 3, 2),
|
|
],
|
|
);
|
|
|
|
assert_mapping("\r", &[(0, 0, 0), (1, 1, 0)]);
|
|
assert_mapping("\r\r", &[(0, 0, 0), (1, 1, 0), (2, 2, 0)]);
|
|
assert_mapping("a\ra", &[(0, 0, 0), (1, 0, 1), (2, 1, 0), (3, 1, 1)]);
|
|
|
|
assert_mapping("\r\n", &[(0, 0, 0), (1, 0, 1), (2, 1, 0)]);
|
|
assert_mapping("\r\n\r\n", &[(0, 0, 0), (1, 0, 1), (2, 1, 0), (3, 1, 1), (4, 2, 0)]);
|
|
assert_mapping("a\r\na", &[(0, 0, 0), (1, 0, 1), (2, 0, 2), (3, 1, 0), (4, 1, 1)]);
|
|
}
|
|
|
|
#[test]
|
|
fn builder_unicode() {
|
|
assert_mapping("Ö", &[(0, 0, 0), (2, 0, 1)]);
|
|
assert_mapping("ÖÖ", &[(0, 0, 0), (2, 0, 1), (4, 0, 2)]);
|
|
assert_mapping("Ö\n", &[(0, 0, 0), (2, 0, 1), (3, 1, 0)]);
|
|
assert_mapping("ÖÖ\n", &[(0, 0, 0), (2, 0, 1), (4, 0, 2), (5, 1, 0)]);
|
|
assert_mapping("\nÖ", &[(0, 0, 0), (1, 1, 0), (3, 1, 1)]);
|
|
assert_mapping("\nÖÖ", &[(0, 0, 0), (1, 1, 0), (3, 1, 1), (5, 1, 2)]);
|
|
assert_mapping("Ö\nÖ", &[(0, 0, 0), (2, 0, 1), (3, 1, 0), (5, 1, 1)]);
|
|
assert_mapping("\nÖÖ\n", &[(0, 0, 0), (1, 1, 0), (3, 1, 1), (5, 1, 2), (6, 2, 0)]);
|
|
assert_mapping("Ö\ra", &[(0, 0, 0), (2, 0, 1), (3, 1, 0), (4, 1, 1)]);
|
|
assert_mapping("Ö\r\na", &[(0, 0, 0), (2, 0, 1), (3, 0, 2), (4, 1, 0), (5, 1, 1)]);
|
|
}
|
|
|
|
fn assert_mapping(source: &str, mappings: &[(u32, u32, u32)]) {
|
|
let mut builder = SourcemapBuilder::default();
|
|
builder.with_name_and_source("x.js", source);
|
|
for (position, expected_line, expected_col) in mappings.iter().copied() {
|
|
let (line, col) = builder.search_original_line_and_column(position);
|
|
assert_eq!(
|
|
builder.search_original_line_and_column(position),
|
|
(expected_line, expected_col),
|
|
"Incorrect mapping for '{source}' - position {position} = line {line}, column {col}"
|
|
);
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn add_source_mapping() {
|
|
fn create_mappings(source: &str, line: u32, column: u32) {
|
|
let mut builder = SourcemapBuilder::default();
|
|
builder.with_name_and_source("x.js", source);
|
|
let output: Vec<u8> = source.as_bytes().into();
|
|
for (i, _ch) in source.char_indices() {
|
|
#[allow(clippy::cast_possible_truncation)]
|
|
builder.add_source_mapping(&output, i as u32, None);
|
|
assert!(
|
|
builder.generated_line == line && builder.generated_column == column,
|
|
"Incorrect generated mapping for '{source}' ({:?}) starting at {i} - line {}, column {}",
|
|
source.as_bytes(),
|
|
builder.generated_line,
|
|
builder.generated_column
|
|
);
|
|
assert_eq!(builder.last_generated_update, source.len());
|
|
}
|
|
}
|
|
|
|
create_mappings("", 0, 0);
|
|
create_mappings("abc", 0, 3);
|
|
create_mappings("\n", 1, 0);
|
|
create_mappings("\n\n\n", 3, 0);
|
|
create_mappings("\r", 1, 0);
|
|
create_mappings("\r\r\r", 3, 0);
|
|
create_mappings("\r\n", 1, 0);
|
|
create_mappings("\r\n\r\n\r\n", 3, 0);
|
|
create_mappings("\nabc", 1, 3);
|
|
create_mappings("abc\n", 1, 0);
|
|
create_mappings("\rabc", 1, 3);
|
|
create_mappings("abc\r", 1, 0);
|
|
create_mappings("\r\nabc", 1, 3);
|
|
create_mappings("abc\r\n", 1, 0);
|
|
create_mappings("ÖÖ\nÖ\nÖÖÖ", 2, 3);
|
|
}
|
|
|
|
#[test]
|
|
fn add_source_mapping_for_name() {
|
|
let output = "ac".as_bytes();
|
|
let mut builder = SourcemapBuilder::default();
|
|
builder.with_name_and_source("x.js", "ab");
|
|
builder.add_source_mapping_for_name(output, Span::new(0, 1), "a");
|
|
builder.add_source_mapping_for_name(output, Span::new(1, 2), "c");
|
|
let sm = builder.into_sourcemap();
|
|
// The name `a` not change.
|
|
assert_eq!(
|
|
sm.get_source_view_token(0_u32).as_ref().and_then(|token| token.get_name()),
|
|
None
|
|
);
|
|
// The name `b` -> `c`, save `b` to token.
|
|
assert_eq!(
|
|
sm.get_source_view_token(1_u32).as_ref().and_then(|token| token.get_name()),
|
|
Some("b")
|
|
);
|
|
}
|
|
}
|