perf(codegen): u32 indexes in LineOffsetTable for source maps (#4641)

Oxc have a limit on size of source files of 4 GiB, so `u32` is sufficient to hold line and column offsets. Use `u32` for these values in `LineOffsetTable`, which reduces size of the type by 8 bytes.
This commit is contained in:
overlookmotel 2024-08-05 02:28:50 +00:00
parent 6c612d141c
commit b8e67538f8

View file

@ -16,9 +16,9 @@ const PS_THIRD: u8 = 0xA9;
/// Code is adapted from [esbuild](https://github.com/evanw/esbuild/blob/cc74e6042a9f573bf58e1e3f165ebda70af4ad3b/internal/js_printer/js_printer.go#L4806-L4808) /// Code is adapted from [esbuild](https://github.com/evanw/esbuild/blob/cc74e6042a9f573bf58e1e3f165ebda70af4ad3b/internal/js_printer/js_printer.go#L4806-L4808)
#[derive(Debug)] #[derive(Debug)]
pub struct LineOffsetTable { pub struct LineOffsetTable {
columns: Option<Vec<usize>>, columns: Option<Vec<u32>>,
byte_offset_to_first: usize, byte_offset_to_first: u32,
byte_offset_to_start_of_line: usize, byte_offset_to_start_of_line: u32,
} }
#[allow(clippy::struct_field_names)] #[allow(clippy::struct_field_names)]
@ -97,16 +97,17 @@ impl SourcemapBuilder {
fn search_original_line_and_column(&mut self, position: u32) -> (u32, u32) { fn search_original_line_and_column(&mut self, position: u32) -> (u32, u32) {
let result = self let result = self
.line_offset_tables .line_offset_tables
.partition_point(|table| table.byte_offset_to_start_of_line <= position as usize); .partition_point(|table| table.byte_offset_to_start_of_line <= position)
as u32;
let original_line = if result > 0 { result - 1 } else { 0 }; let original_line = if result > 0 { result - 1 } else { 0 };
let line = &self.line_offset_tables[original_line]; let line = &self.line_offset_tables[original_line as usize];
let mut original_column = (position as usize) - line.byte_offset_to_start_of_line; let mut original_column = position - line.byte_offset_to_start_of_line;
if original_column >= line.byte_offset_to_first { if original_column >= line.byte_offset_to_first {
if let Some(cols) = &line.columns { if let Some(cols) = &line.columns {
original_column = cols[original_column - line.byte_offset_to_first]; original_column = cols[(original_column - line.byte_offset_to_first) as usize];
} }
} }
(original_line as u32, original_column as u32) (original_line, original_column)
} }
#[allow(clippy::cast_possible_truncation)] #[allow(clippy::cast_possible_truncation)]
@ -184,12 +185,14 @@ impl SourcemapBuilder {
columns: None, columns: None,
// `usize::MAX` so `original_column >= line.byte_offset_to_first` check in // `usize::MAX` so `original_column >= line.byte_offset_to_first` check in
// `search_original_line_and_column` fails if line is all ASCII // `search_original_line_and_column` fails if line is all ASCII
byte_offset_to_first: usize::MAX, byte_offset_to_first: u32::MAX,
byte_offset_to_start_of_line: line_byte_offset, byte_offset_to_start_of_line: line_byte_offset,
}); });
let remaining = &content.as_bytes()[line_byte_offset..]; let remaining = &content.as_bytes()[line_byte_offset as usize..];
for (mut byte_offset_from_line_start, b) in remaining.iter().enumerate() { for (byte_offset_from_line_start, b) in remaining.iter().enumerate() {
#[allow(clippy::cast_possible_truncation)]
let mut byte_offset_from_line_start = byte_offset_from_line_start as u32;
match b { match b {
b'\n' => { b'\n' => {
byte_offset_from_line_start += 1; byte_offset_from_line_start += 1;
@ -197,7 +200,7 @@ impl SourcemapBuilder {
b'\r' => { b'\r' => {
byte_offset_from_line_start += 1; byte_offset_from_line_start += 1;
// Handle Windows-specific "\r\n" newlines // Handle Windows-specific "\r\n" newlines
if remaining.get(byte_offset_from_line_start) == Some(&b'\n') { if remaining.get(byte_offset_from_line_start as usize) == Some(&b'\n') {
byte_offset_from_line_start += 1; byte_offset_from_line_start += 1;
} }
} }
@ -217,8 +220,10 @@ impl SourcemapBuilder {
// Unicode char. // Unicode char.
let mut column = byte_offset_from_line_start; let mut column = byte_offset_from_line_start;
line_byte_offset += byte_offset_from_line_start; line_byte_offset += byte_offset_from_line_start;
let remaining = &content[line_byte_offset..]; let remaining = &content[line_byte_offset as usize..];
for (mut chunk_byte_offset, ch) in remaining.char_indices() { for (chunk_byte_offset, ch) in remaining.char_indices() {
#[allow(clippy::cast_possible_truncation)]
let mut chunk_byte_offset = chunk_byte_offset as u32;
for _ in 0..ch.len_utf8() { for _ in 0..ch.len_utf8() {
columns.push(column); columns.push(column);
} }
@ -227,7 +232,9 @@ impl SourcemapBuilder {
'\r' => { '\r' => {
// Handle Windows-specific "\r\n" newlines // Handle Windows-specific "\r\n" newlines
chunk_byte_offset += 1; chunk_byte_offset += 1;
if remaining.as_bytes().get(chunk_byte_offset) == Some(&b'\n') { if remaining.as_bytes().get(chunk_byte_offset as usize)
== Some(&b'\n')
{
chunk_byte_offset += 1; chunk_byte_offset += 1;
columns.push(column + 1); columns.push(column + 1);
} }
@ -238,9 +245,10 @@ impl SourcemapBuilder {
LS | PS => { LS | PS => {
chunk_byte_offset += 3; chunk_byte_offset += 3;
} }
#[allow(clippy::cast_possible_truncation)]
_ => { _ => {
// Mozilla's "source-map" library counts columns using UTF-16 code units // Mozilla's "source-map" library counts columns using UTF-16 code units
column += ch.len_utf16(); column += ch.len_utf16() as u32;
continue; continue;
} }
} }