diff --git a/crates/oxc_transformer/src/react_jsx/mod.rs b/crates/oxc_transformer/src/react_jsx/mod.rs index 50d01473f..dfa3ee9e5 100644 --- a/crates/oxc_transformer/src/react_jsx/mod.rs +++ b/crates/oxc_transformer/src/react_jsx/mod.rs @@ -5,7 +5,10 @@ use std::rc::Rc; use oxc_allocator::Vec; use oxc_ast::{ast::*, AstBuilder}; use oxc_span::{Atom, SPAN}; -use oxc_syntax::xml_entities::XML_ENTITIES; +use oxc_syntax::{ + identifier::{is_irregular_whitespace, is_line_terminator}, + xml_entities::XML_ENTITIES, +}; pub use self::options::{ReactJsxOptions, ReactJsxRuntime}; @@ -408,7 +411,7 @@ impl<'a> ReactJsx<'a> { ) -> Expression<'a> { match value { Some(JSXAttributeValue::String(s)) => { - let jsx_text = Self::decode_jsx_text(&s.value); + let jsx_text = Self::decode_entities(&s.value); let literal = StringLiteral::new(s.span, jsx_text.into()); self.ast.literal_string_expression(literal) } @@ -461,24 +464,66 @@ impl<'a> ReactJsx<'a> { } fn transform_jsx_text(&self, text: &JSXString) -> Option> { - let text = text.value.trim(); - (!text.trim().is_empty()).then(|| { - let text = text - .split(char::is_whitespace) - .map(str::trim) - .filter(|c| !c.is_empty()) - .map(Self::decode_jsx_text) - .collect::>() - .join(" "); - let s = StringLiteral::new(SPAN, text.into()); + Self::fixup_whitespace_and_decode_entities(text.value.as_str()).map(|s| { + let s = StringLiteral::new(SPAN, s.into()); self.ast.literal_string_expression(s) }) } + /// JSX trims whitespace at the end and beginning of lines, except that the + /// start/end of a tag is considered a start/end of a line only if that line is + /// on the same line as the closing tag. See examples in + /// tests/cases/conformance/jsx/tsxReactEmitWhitespace.tsx + /// See also https://www.w3.org/TR/html4/struct/text.html#h-9.1 and https://www.w3.org/TR/CSS2/text.html#white-space-model + /// + /// An equivalent algorithm would be: + /// - If there is only one line, return it. + /// - If there is only whitespace (but multiple lines), return `undefined`. + /// - Split the text into lines. + /// - 'trimRight' the first line, 'trimLeft' the last line, 'trim' middle lines. + /// - Decode entities on each line (individually). + /// - Remove empty lines and join the rest with " ". + /// + /// + fn fixup_whitespace_and_decode_entities(text: &str) -> Option { + let mut acc: Option = None; + let mut first_non_whitespace: Option = Some(0); + let mut last_non_whitespace: Option = None; + let mut i: usize = 0; + for c in text.chars() { + if is_line_terminator(c) { + if let (Some(first), Some(last)) = (first_non_whitespace, last_non_whitespace) { + acc = Some(Self::add_line_of_jsx_text(acc, &text[first..=last])); + } + first_non_whitespace = None; + } else if c != ' ' && !is_irregular_whitespace(c) { + last_non_whitespace = Some(i); + if first_non_whitespace.is_none() { + first_non_whitespace.replace(i); + } + } + i += c.len_utf8(); + } + if let Some(first) = first_non_whitespace { + Some(Self::add_line_of_jsx_text(acc, &text[first..])) + } else { + acc + } + } + + fn add_line_of_jsx_text(acc: Option, trimmed_line: &str) -> String { + let decoded = Self::decode_entities(trimmed_line); + if let Some(acc) = acc { + format!("{acc} {decoded}") + } else { + decoded + } + } + /// * Replace entities like " ", "{", and "�" with the characters they encode. /// * See https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references /// Code adapted from - fn decode_jsx_text(s: &str) -> String { + fn decode_entities(s: &str) -> String { let mut buffer = vec![]; let mut chars = s.bytes().enumerate(); let mut prev = 0; diff --git a/tasks/transform_conformance/babel.snap.md b/tasks/transform_conformance/babel.snap.md index 394303f80..f30083455 100644 --- a/tasks/transform_conformance/babel.snap.md +++ b/tasks/transform_conformance/babel.snap.md @@ -1,4 +1,4 @@ -Passed: 234/1083 +Passed: 241/1083 # All Passed: * babel-plugin-transform-numeric-separator @@ -804,7 +804,7 @@ Passed: 234/1083 * regression/11061/input.mjs * variable-declaration/non-null-in-optional-chain/input.ts -# babel-plugin-transform-react-jsx (85/172) +# babel-plugin-transform-react-jsx (92/172) * autoImport/after-polyfills-compiled-to-cjs/input.mjs * autoImport/after-polyfills-script-not-supported/input.js * autoImport/auto-import-react-source-type-module/input.js @@ -847,10 +847,6 @@ Passed: 234/1083 * react/should-disallow-spread-children/input.js * react/should-disallow-valueless-key/input.js * react/should-disallow-xml-namespacing/input.js -* react/should-escape-xhtml-jsxtext/input.js -* react/should-escape-xhtml-jsxtext-babel-7/input.js -* react/should-handle-attributed-elements/input.js -* react/should-not-strip-nbsp-even-coupled-with-other-whitespace/input.js * react/should-support-xml-namespaces-if-flag/input.js * react/should-throw-error-namespaces-if-not-flag/input.js * react/should-warn-when-importSource-is-set/input.js @@ -870,10 +866,7 @@ Passed: 234/1083 * react-automatic/should-disallow-spread-children/input.js * react-automatic/should-disallow-valueless-key/input.js * react-automatic/should-disallow-xml-namespacing/input.js -* react-automatic/should-escape-xhtml-jsxtext/input.js -* react-automatic/should-escape-xhtml-jsxtext-babel-7/input.js * react-automatic/should-handle-attributed-elements/input.js -* react-automatic/should-not-strip-nbsp-even-coupled-with-other-whitespace/input.js * react-automatic/should-properly-handle-comments-between-props/input.js * react-automatic/should-throw-error-namespaces-if-not-flag/input.js * react-automatic/should-throw-when-filter-is-specified/input.js