mirror of
https://github.com/danbulant/oxc
synced 2026-05-19 12:19:15 +00:00
fix(transformer): JSX source calculate correct column when Unicode chars (#3615)
Fix column number in JSX source transform, and add tests. It was correct in all cases, except for when a Unicode character with code point above `0xFFFF` appears earlier on the line. Such characters are: * 4 bytes in UTF-8. * 2 characters in UTF-16. * 1 `char` in Rust. Babel (which we're trying to match) uses count of UTF-16 characters for column number, whereas we were using count of Rust `char`s.
This commit is contained in:
parent
9e8f4d60b5
commit
8d237c49a9
2 changed files with 88 additions and 8 deletions
|
|
@ -85,6 +85,9 @@ impl<'a> ReactJsxSource<'a> {
|
|||
let key = JSXAttributeName::Identifier(
|
||||
self.ctx.ast.alloc(self.ctx.ast.jsx_identifier(SPAN, SOURCE.into())),
|
||||
);
|
||||
// TODO: We shouldn't calculate line + column from scratch each time as it's expensive.
|
||||
// Build a table of byte indexes of each line's start on first usage, and save it.
|
||||
// Then calculate line and column from that.
|
||||
let (line, column) = get_line_column(elem.span.start, self.ctx.source_text);
|
||||
let object = self.get_source_object(line, column, ctx);
|
||||
let expr = self.ctx.ast.jsx_expression_container(SPAN, JSXExpression::from(object));
|
||||
|
|
|
|||
|
|
@ -1,14 +1,91 @@
|
|||
use ropey::Rope;
|
||||
|
||||
/// Get line and column from offset and source text
|
||||
/// Get line and column from offset and source text.
|
||||
///
|
||||
/// Line number starts at 1.
|
||||
/// Column number is in UTF-16 characters, and starts at 1.
|
||||
///
|
||||
/// This matches Babel's output.
|
||||
pub fn get_line_column(offset: u32, source_text: &str) -> (usize, usize) {
|
||||
let offset = offset as usize;
|
||||
let rope = Rope::from_str(source_text);
|
||||
let line = rope.byte_to_line(offset);
|
||||
let first_char_of_line = rope.line_to_char(line);
|
||||
// Original offset is byte, but Rope uses char offset
|
||||
let offset = rope.byte_to_char(offset);
|
||||
let column = offset - first_char_of_line;
|
||||
// line and column is zero-indexed, but we want 1-indexed
|
||||
(line + 1, column + 1)
|
||||
// Get line number and byte offset of start of line
|
||||
let line_index = rope.byte_to_line(offset);
|
||||
let line_offset = rope.line_to_byte(line_index);
|
||||
// Get column number
|
||||
let column_index = source_text[line_offset..offset].encode_utf16().count();
|
||||
// line and column are zero-indexed, but we want 1-indexed
|
||||
(line_index + 1, column_index + 1)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_file() {
|
||||
assert_eq!(get_line_column(0, ""), (1, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_line_start() {
|
||||
assert_eq!(get_line_column(0, "foo\nbar\n"), (1, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_line_middle() {
|
||||
assert_eq!(get_line_column(5, "blahblahblah\noops\n"), (1, 6));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn later_line_start() {
|
||||
assert_eq!(get_line_column(8, "foo\nbar\nblahblahblah"), (3, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn later_line_middle() {
|
||||
assert_eq!(get_line_column(12, "foo\nbar\nblahblahblah"), (3, 5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn after_2_byte_unicode() {
|
||||
assert_eq!("£".len(), 2);
|
||||
assert_eq!(utf16_len("£"), 1);
|
||||
assert_eq!(get_line_column(4, "£abc"), (1, 4));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn after_3_byte_unicode() {
|
||||
assert_eq!("अ".len(), 3);
|
||||
assert_eq!(utf16_len("अ"), 1);
|
||||
assert_eq!(get_line_column(5, "अabc"), (1, 4));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn after_4_byte_unicode() {
|
||||
assert_eq!("🍄".len(), 4);
|
||||
assert_eq!(utf16_len("🍄"), 2);
|
||||
assert_eq!(get_line_column(6, "🍄abc"), (1, 5));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn after_2_byte_unicode_on_previous_line() {
|
||||
assert_eq!("£".len(), 2);
|
||||
assert_eq!(utf16_len("£"), 1);
|
||||
assert_eq!(get_line_column(4, "£\nabc"), (2, 2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn after_3_byte_unicode_on_previous_line() {
|
||||
assert_eq!("अ".len(), 3);
|
||||
assert_eq!(utf16_len("अ"), 1);
|
||||
assert_eq!(get_line_column(5, "अ\nabc"), (2, 2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn after_4_byte_unicode_on_previous_line() {
|
||||
assert_eq!("🍄".len(), 4);
|
||||
assert_eq!(utf16_len("🍄"), 2);
|
||||
assert_eq!(get_line_column(6, "🍄\nabc"), (2, 2));
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn utf16_len(s: &str) -> usize {
|
||||
s.encode_utf16().count()
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue