From 37efbd7af3cb097411a4da89b63be8365c17b6d9 Mon Sep 17 00:00:00 2001 From: Boshen Date: Sat, 12 Aug 2023 23:51:37 +0800 Subject: [PATCH] feat(resolver): resolve `#` as path instead of a fragment (#727) --- crates/oxc_resolver/src/lib.rs | 84 +++++++++++----- crates/oxc_resolver/src/specifier.rs | 121 +++++++++++++---------- crates/oxc_resolver/src/tests/resolve.rs | 9 +- 3 files changed, 133 insertions(+), 81 deletions(-) diff --git a/crates/oxc_resolver/src/lib.rs b/crates/oxc_resolver/src/lib.rs index 192444ebf..4230a4ce1 100644 --- a/crates/oxc_resolver/src/lib.rs +++ b/crates/oxc_resolver/src/lib.rs @@ -41,7 +41,7 @@ use crate::{ file_system::FileSystemOs, package_json::{ExportsField, ExportsKey, MatchObject, PackageJson}, path::PathUtil, - specifier::{Specifier, SpecifierPath}, + specifier::{Specifier, SpecifierKind}, }; pub use crate::{ error::{JSONError, ResolveError}, @@ -144,10 +144,12 @@ impl ResolverGeneric { fully_specified: self.options.fully_specified, ..ResolveContextImpl::default() })); + let specifier = Specifier::parse(specifier).map_err(ResolveError::Specifier)?; + ctx.with_query_fragment(specifier.query, specifier.fragment); let cached_path = self.cache.value(path); - let cached_path = self.require(&cached_path, specifier, &ctx).or_else(|err| { + let cached_path = self.require(&cached_path, &specifier, &ctx).or_else(|err| { // enhanced-resolve: try fallback - self.load_alias(&cached_path, Some(specifier), &self.options.fallback, &ctx) + self.load_alias(&cached_path, Some(specifier.path()), &self.options.fallback, &ctx) .and_then(|value| value.ok_or(err)) })?; let path = self.load_realpath(&cached_path).unwrap_or_else(|| cached_path.to_path_buf()); @@ -163,42 +165,39 @@ impl ResolverGeneric { fn require( &self, cached_path: &CachedPath, - specifier: &str, + specifier: &Specifier, ctx: &ResolveContext, ) -> Result { ctx.test_for_infinite_recursion()?; - let specifier = Specifier::parse(specifier).map_err(ResolveError::Specifier)?; - ctx.with_query_fragment(specifier.query, specifier.fragment); + // enhanced-resolve: try fragment as path + if let Some(path) = self.try_fragment_as_path(cached_path, specifier, ctx) { + return Ok(path); + } // enhanced-resolve: try alias if let Some(path) = - self.load_alias(cached_path, Some(specifier.path.as_str()), &self.options.alias, ctx)? + self.load_alias(cached_path, Some(specifier.path()), &self.options.alias, ctx)? { return Ok(path); } - match specifier.path { + let specifier_str = specifier.path(); + match specifier.kind { // 1. If X is a core module, // a. return the core module // b. STOP // 2. If X begins with '/' // a. set Y to be the file system root - SpecifierPath::Absolute(absolute_path) => { - self.require_absolute(cached_path, absolute_path, ctx) - } + SpecifierKind::Absolute => self.require_absolute(cached_path, specifier_str, ctx), // 3. If X begins with './' or '/' or '../' - SpecifierPath::Relative(relative_path) => { - self.require_relative(cached_path, relative_path, ctx) - } + SpecifierKind::Relative => self.require_relative(cached_path, specifier_str, ctx), // 4. If X begins with '#' - SpecifierPath::Hash(specifier) => self.require_hash(cached_path, specifier, ctx), + SpecifierKind::Hash => self.require_hash(cached_path, specifier_str, ctx), // (ESM) 5. Otherwise, // Note: specifier is now a bare specifier. // Set resolved the result of PACKAGE_RESOLVE(specifier, parentURL). - SpecifierPath::Bare(bare_specifier) => { - self.require_bare(cached_path, bare_specifier, ctx) - } + SpecifierKind::Bare => self.require_bare(cached_path, specifier_str, ctx), } } @@ -282,6 +281,33 @@ impl ResolverGeneric { self.load_package_self_or_node_modules(cached_path, specifier, ctx) } + /// Try fragment as part of the path + /// + /// It's allowed to escape # as \0# to avoid parsing it as fragment. + /// enhanced-resolve will try to resolve requests containing `#` as path and as fragment, + /// so it will automatically figure out if `./some#thing` means `.../some.js#thing` or `.../some#thing.js`. + /// When a # is resolved as path it will be escaped in the result. Here: `.../some\0#thing.js`. + /// + /// + fn try_fragment_as_path( + &self, + cached_path: &CachedPath, + specifier: &Specifier, + ctx: &ResolveContext, + ) -> Option { + if ctx.borrow().fragment.is_some() && ctx.borrow().query.is_none() { + let fragment = ctx.borrow_mut().fragment.take().unwrap(); + let path = format!("{}{fragment}", specifier.path()); + let mut specifier = specifier.clone(); + specifier.set_path(&path); + if let Ok(path) = self.require(cached_path, &specifier, ctx) { + return Some(path); + } + ctx.borrow_mut().fragment.replace(fragment); + } + None + } + fn load_package_self_or_node_modules( &self, cached_path: &CachedPath, @@ -627,10 +653,12 @@ impl ResolverGeneric { if ctx.borrow().resolving_alias.as_ref().is_some_and(|s| s == specifier) { return Ok(None); } - ctx.with_resolving_alias(specifier.to_string()); - let cached_path = self.cache.value(package_json.directory()); + let specifier = Specifier::parse(specifier).map_err(ResolveError::Specifier)?; + ctx.with_query_fragment(specifier.query, specifier.fragment); + ctx.with_resolving_alias(specifier.path().to_string()); ctx.with_fully_specified(false); - self.require(&cached_path, specifier, ctx).map(Some) + let cached_path = self.cache.value(package_json.directory()); + self.require(&cached_path, &specifier, ctx).map(Some) } /// enhanced-resolve: AliasPlugin for [ResolveOptions::alias] and [ResolveOptions::fallback]. @@ -655,18 +683,20 @@ impl ResolverGeneric { match r { AliasValue::Path(alias) => { let specifier = Specifier::parse(alias).map_err(ResolveError::Specifier)?; - let alias = specifier.path.as_str(); + let alias = specifier.path(); if inner_request.as_ref() != alias && !inner_request .strip_prefix(alias) .is_some_and(|prefix| prefix.starts_with('/')) { - let new_request_str = + let new_specifier = format!("{alias}{}", &inner_request[alias_key.len()..]); + let new_specifier = Specifier::parse(&new_specifier) + .map_err(ResolveError::Specifier)?; ctx.with_fully_specified(false); - // Alias may contain `?query`, pass it along. + // Override query and fragment from the alias ctx.with_query_fragment(specifier.query, specifier.fragment); - match self.require(cached_path, &new_request_str, ctx) { + match self.require(cached_path, &new_specifier, ctx) { Err(ResolveError::NotFound(_)) => { /* noop */ } Ok(path) => return Ok(Some(path)), Err(err) => return Err(err), @@ -754,8 +784,10 @@ impl ResolverGeneric { } } let subpath = format!(".{subpath}"); + let specifier = Specifier::parse(&subpath).map_err(ResolveError::Specifier)?; ctx.with_fully_specified(false); - return self.require(&cached_path, &subpath, ctx).map(Some); + ctx.with_query_fragment(specifier.query, specifier.fragment); + return self.require(&cached_path, &specifier, ctx).map(Some); } parent_url.pop(); } diff --git a/crates/oxc_resolver/src/specifier.rs b/crates/oxc_resolver/src/specifier.rs index 467318651..80df5a6a3 100644 --- a/crates/oxc_resolver/src/specifier.rs +++ b/crates/oxc_resolver/src/specifier.rs @@ -1,78 +1,77 @@ use crate::error::SpecifierError; +use std::borrow::Cow; #[derive(Debug, Clone, Eq, PartialEq)] pub struct Specifier<'a> { - pub path: SpecifierPath<'a>, + path: Cow<'a, str>, + pub kind: SpecifierKind, pub query: Option<&'a str>, pub fragment: Option<&'a str>, } -#[derive(Debug, Clone, Eq, PartialEq)] -pub enum SpecifierPath<'a> { +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum SpecifierKind { /// `/path` - Absolute(&'a str), + Absolute, /// `./path`, `../path` - Relative(&'a str), + Relative, /// `#path` - Hash(&'a str), + Hash, /// Specifier without any leading syntax is called a bare specifier. - Bare(&'a str), -} - -impl<'a> SpecifierPath<'a> { - pub fn as_str(&self) -> &str { - match self { - Self::Absolute(s) | Self::Relative(s) | Self::Hash(s) | Self::Bare(s) => s, - } - } + Bare, } impl<'a> Specifier<'a> { + pub fn path(&'a self) -> &'a str { + self.path.as_ref() + } + + pub fn set_path(&mut self, path: &'a str) { + self.path = Cow::Borrowed(path); + } + pub fn parse(specifier: &'a str) -> Result, SpecifierError> { if specifier.is_empty() { return Err(SpecifierError::Empty); } - - let (path, query, fragment) = match specifier.as_bytes()[0] { - b'/' => { - let (path, query, fragment) = Self::parse_query_framgment(specifier, 1); - (SpecifierPath::Absolute(path), query, fragment) - } - b'.' => { - let (path, query, fragment) = Self::parse_query_framgment(specifier, 1); - (SpecifierPath::Relative(path), query, fragment) - } - b'#' => { - let (path, query, fragment) = Self::parse_query_framgment(specifier, 1); - (SpecifierPath::Hash(path), query, fragment) - } - _ => { - let (path, query, fragment) = Self::parse_query_framgment(specifier, 0); - (SpecifierPath::Bare(path), query, fragment) - } + let (kind, offset) = match specifier.as_bytes()[0] { + b'/' => (SpecifierKind::Absolute, 1), + b'.' => (SpecifierKind::Relative, 1), + b'#' => (SpecifierKind::Hash, 1), + _ => (SpecifierKind::Bare, 0), }; - - Ok(Self { path, query, fragment }) + let (path, query, fragment) = Self::parse_query_framgment(specifier, offset); + Ok(Self { path, kind, query, fragment }) } - fn parse_query_framgment(specifier: &str, skip: usize) -> (&str, Option<&str>, Option<&str>) { + fn parse_query_framgment( + specifier: &'a str, + skip: usize, + ) -> (Cow<'a, str>, Option<&str>, Option<&str>) { let mut query_start: Option = None; let mut fragment_start: Option = None; - for (i, c) in specifier.as_bytes().iter().enumerate().skip(skip) { - if *c == b'?' { + let mut prev = specifier.chars().next().unwrap(); + let mut escaped_indexes = vec![]; + for (i, c) in specifier.chars().enumerate().skip(skip) { + if c == '?' { query_start = Some(i); } - if *c == b'#' { - fragment_start = Some(i); - break; + if c == '#' { + if prev == '\0' { + escaped_indexes.push(i - 1); + } else { + fragment_start = Some(i); + break; + } } + prev = c; } - match (query_start, fragment_start) { + let (path, query, fragment) = match (query_start, fragment_start) { (Some(i), Some(j)) => { debug_assert!(i < j); (&specifier[..i], Some(&specifier[i..j]), Some(&specifier[j..])) @@ -80,18 +79,32 @@ impl<'a> Specifier<'a> { (Some(i), None) => (&specifier[..i], Some(&specifier[i..]), None), (None, Some(j)) => (&specifier[..j], None, Some(&specifier[j..])), _ => (specifier, None, None), - } + }; + + let path = if escaped_indexes.is_empty() { + Cow::Borrowed(path) + } else { + // Remove the `\0` characters for a legal path. + Cow::Owned( + path.chars() + .enumerate() + .filter_map(|(i, c)| (!escaped_indexes.contains(&i)).then_some(c)) + .collect::(), + ) + }; + + (path, query, fragment) } } #[cfg(test)] mod tests { - use super::{Specifier, SpecifierError, SpecifierPath}; + use super::{Specifier, SpecifierError, SpecifierKind}; #[test] #[cfg(target_pointer_width = "64")] fn size_asserts() { - static_assertions::assert_eq_size!(Specifier, [u8; 56]); + static_assertions::assert_eq_size!(Specifier, [u8; 64]); } #[test] @@ -104,7 +117,8 @@ mod tests { fn absolute() -> Result<(), SpecifierError> { let specifier = "/test?#"; let parsed = Specifier::parse(specifier)?; - assert_eq!(parsed.path, SpecifierPath::Absolute("/test")); + assert_eq!(parsed.path, "/test"); + assert_eq!(parsed.kind, SpecifierKind::Absolute); assert_eq!(parsed.query, Some("?")); assert_eq!(parsed.fragment, Some("#")); Ok(()) @@ -117,7 +131,8 @@ mod tests { let mut r = specifier.to_string(); r.push_str("?#"); let parsed = Specifier::parse(&r)?; - assert_eq!(parsed.path, SpecifierPath::Relative(specifier)); + assert_eq!(parsed.path, specifier); + assert_eq!(parsed.kind, SpecifierKind::Relative); assert_eq!(parsed.query, Some("?")); assert_eq!(parsed.fragment, Some("#")); } @@ -131,7 +146,8 @@ mod tests { let mut r = specifier.to_string(); r.push_str("?#"); let parsed = Specifier::parse(&r)?; - assert_eq!(parsed.path, SpecifierPath::Hash(specifier)); + assert_eq!(parsed.path, specifier); + assert_eq!(parsed.kind, SpecifierKind::Hash); assert_eq!(parsed.query, Some("?")); assert_eq!(parsed.fragment, Some("#")); } @@ -145,7 +161,8 @@ mod tests { let mut r = specifier.to_string(); r.push_str("?#"); let parsed = Specifier::parse(&r)?; - assert_eq!(parsed.path, SpecifierPath::Bare(specifier)); + assert_eq!(parsed.path, specifier); + assert_eq!(parsed.kind, SpecifierKind::Bare); assert_eq!(parsed.query, Some("?")); assert_eq!(parsed.fragment, Some("#")); } @@ -169,7 +186,7 @@ mod tests { for (specifier_str, query, fragment) in data { let specifier = Specifier::parse(specifier_str)?; - assert_eq!(specifier.path.as_str(), "a", "{specifier_str}"); + assert_eq!(specifier.path, "a", "{specifier_str}"); assert_eq!(specifier.query, query, "{specifier_str}"); assert_eq!(specifier.fragment, fragment, "{specifier_str}"); } @@ -193,7 +210,7 @@ mod tests { for (specifier_str, path, query, fragment) in data { let specifier = Specifier::parse(specifier_str)?; - assert_eq!(specifier.path.as_str(), path, "{specifier_str}"); + assert_eq!(specifier.path, path, "{specifier_str}"); assert_eq!(specifier.query.unwrap_or(""), query, "{specifier_str}"); assert_eq!(specifier.fragment.unwrap_or(""), fragment, "{specifier_str}"); } @@ -217,7 +234,7 @@ mod tests { for (specifier_str, path, query, fragment) in data { let specifier = Specifier::parse(specifier_str)?; - assert_eq!(specifier.path.as_str(), path, "{specifier_str}"); + assert_eq!(specifier.path, path, "{specifier_str}"); assert_eq!(specifier.query.unwrap_or(""), query, "{specifier_str}"); assert_eq!(specifier.fragment.unwrap_or(""), fragment, "{specifier_str}"); } diff --git a/crates/oxc_resolver/src/tests/resolve.rs b/crates/oxc_resolver/src/tests/resolve.rs index fa2b5ab2a..5ad563cb0 100644 --- a/crates/oxc_resolver/src/tests/resolve.rs +++ b/crates/oxc_resolver/src/tests/resolve.rs @@ -37,9 +37,12 @@ fn resolve() { ("find node_modules outside of node_modules", f.join("browser-module/node_modules"), "m1/a", f.join("node_modules/m1/a.js")), ("don't crash on main field pointing to self", f.clone(), "./main-field-self", f.join("./main-field-self/index.js")), ("don't crash on main field pointing to self (2)", f.clone(), "./main-field-self2", f.join("./main-field-self2/index.js")), - // ("handle fragment edge case (no fragment)", f.clone(), "./no#fragment/#/#", f.join("no\0#fragment/\0#/\0#.js")), - // ("handle fragment edge case (fragment)", f.clone(), "./no#fragment/#/", f.join("no.js#fragment/#/")), - // ("handle fragment escaping", f.clone(), "./no\0#fragment/\0#/\0##fragment", f.join("no\0#fragment/\0#\0#.js#fragment")), + // enhanced-resolve has `#` prepended with a `\0`, they are removed from the + // following 3 expected test results. + // See https://github.com/webpack/enhanced-resolve#escaping + ("handle fragment edge case (no fragment)", f.clone(), "./no#fragment/#/#", f.join("no#fragment/#/#.js")), + ("handle fragment edge case (fragment)", f.clone(), "./no#fragment/#/", f.join("no.js#fragment/#/")), + ("handle fragment escaping", f.clone(), "./no\0#fragment/\0#/\0##fragment", f.join("no#fragment/#/#.js#fragment")), ]; for (comment, path, request, expected) in pass {