mirror of
https://github.com/danbulant/oxc
synced 2026-05-25 12:51:57 +00:00
This PR re-implements lexing identifiers with a fast path for the most common case - identifiers which are pure ASCII characters, using the new `Source` / `SourcePosition` APIs. Lexing identifiers is a hot path, and accounts for the majority of the time the Lexer spends. The performance bump from this change is (if I do say so myself!) quite decent. I've spent a lot of time tuning the implementation, which gained a further 10-15% on the Lexer benchmarks compared to my first, simpler attempt. Some of the design decisions, if they look odd, are likely motivated by gains in performance. ### Techniques This implementation uses a few different strategies for performance: * Search byte-by-byte, not char-by-char. * Process batches of 32 bytes at a time to reduce bounds checks. * Mark uncommon paths `#[cold]`. ### Structure The implementation is built in 3 layers: 1. ASCII characters only. 2. ASCII and Unicode characters. 3. `\` escape sequences (and all the above). `identifier_name_handler` starts at the top layer, and is optimized for consuming ASCII as fast as possible. Each "layer" is considered more uncommon than the previous, and dropping down a layer is a de-opt. I'm assuming that 95%+ of JavaScript code does not include either Unicode characters or escapes in identifiers, so the speed of the fast path is prioritised. That said, once a Unicode character is encountered, the next layer does expect to find further Unicode characters, rather than de-opting over and over again. If an identifier *starts* with a Unicode character, it enters the code straight on the 2nd layer, so is not penalised by going through a `#[cold]` boundary. Lexing Unicode is never going to be as fast as ASCII, but still I felt it was important not to penalise it unnecessarily, so as not to be Anglo-centric. ### ASCII search macro The main ASCII search is implemented as a macro. I found that, for reasons I don't understand, it's significantly faster to have all the code in a single function, even compared to multiple functions marked `#[inline]` or `#[inline(always)]`. The fastest implementation also requires some code to be repeated twice, which is nicer to do with a macro. This macro, and the `ByteMatchTable` types that go with it, are designed to be re-usable. Next step will be to apply them for whitespace and strings, which should be fairly simple. Searching in batches of 32 bytes is also designed to be forward-compatible with SIMD. ### Bye bye `AutoCow` `AutoCow` is removed. Instead, a string-builder is only created if it's needed, when a `\` escape is first encountered. The string builder is also more efficient than `AutoCow` was, as it copies bytes in chunks, rather than 1-by-1. This won't make much difference for identifiers, as escapes are so rare anyway, but this same technique can be used for strings, where they're more common.
158 lines
7.1 KiB
TOML
158 lines
7.1 KiB
TOML
[workspace]
|
|
resolver = "2"
|
|
members = ["crates/*", "tasks/*", "napi/*", "wasm/*"]
|
|
exclude = ["tasks/lint_rules"]
|
|
|
|
[workspace.package]
|
|
authors = ["Boshen <boshenc@gmail.com>", "Oxc contributors"]
|
|
categories = ["development-tools", "web-programming", "compilers"]
|
|
description = "A collection of JavaScript tools written in Rust."
|
|
edition = "2021"
|
|
homepage = "https://oxc-project.github.io"
|
|
keywords = ["JavaScript", "TypeScript", "parser", "linter", "minifier"]
|
|
license = "MIT"
|
|
repository = "https://github.com/oxc-project/oxc"
|
|
rust-version = "1.60"
|
|
|
|
[workspace.lints.rust]
|
|
|
|
[workspace.lints.clippy]
|
|
all = { level = "warn" }
|
|
# restriction
|
|
dbg_macro = "warn"
|
|
todo = "warn"
|
|
unimplemented = "warn"
|
|
# I like the explicitness of this rule as it removes confusion around `clone`.
|
|
# This increases readability, avoids `clone` mindlessly and heap allocating on accident.
|
|
clone_on_ref_ptr = "warn"
|
|
# These two are mutually exclusive, I like `mod.rs` files for better fuzzy searches on module entries.
|
|
self_named_module_files = "warn" # "-Wclippy::mod_module_files"
|
|
empty_drop = "warn"
|
|
empty_structs_with_brackets = "warn"
|
|
exit = "warn"
|
|
filetype_is_file = "warn"
|
|
get_unwrap = "warn"
|
|
impl_trait_in_params = "warn"
|
|
rc_buffer = "warn"
|
|
rc_mutex = "warn"
|
|
rest_pat_in_fully_bound_structs = "warn"
|
|
unnecessary_safety_comment = "warn"
|
|
undocumented_unsafe_blocks = "warn"
|
|
# I want to write the best Rust code so pedantic is enabled.
|
|
# We should only disable rules globally if they are either false positives, chaotic, or does not make sense.
|
|
pedantic = { level = "warn", priority = -1 }
|
|
# Allowed rules
|
|
# pedantic
|
|
# This rule is too pedantic, I don't want to force this because naming things are hard.
|
|
module_name_repetitions = "allow"
|
|
# All triggers are mostly ignored in our codebase, so this is ignored globally.
|
|
struct_excessive_bools = "allow"
|
|
too_many_lines = "allow"
|
|
# #[must_use] is creating too much noise for this codebase, it does not add much value except nagging
|
|
# the programmer to add a #[must_use] after clippy has been run.
|
|
# Having #[must_use] every where also hinders readability.
|
|
must_use_candidate = "allow"
|
|
# used_underscore_binding= "allow"
|
|
doc_markdown = "allow"
|
|
# nursery
|
|
# `const` functions do not make sense for our project because this is not a `const` library.
|
|
# This rule also confuses new comers and forces them to add `const` blindlessly without any reason.
|
|
missing_const_for_fn = "allow"
|
|
|
|
[workspace.dependencies]
|
|
# publish = true
|
|
oxc = { version = "0.6.0", path = "crates/oxc" }
|
|
oxc_allocator = { version = "0.6.0", path = "crates/oxc_allocator" }
|
|
oxc_ast = { version = "0.6.0", path = "crates/oxc_ast" }
|
|
oxc_codegen = { version = "0.6.0", path = "crates/oxc_codegen" }
|
|
oxc_diagnostics = { version = "0.6.0", path = "crates/oxc_diagnostics" }
|
|
oxc_index = { version = "0.6.0", path = "crates/oxc_index" }
|
|
oxc_minifier = { version = "0.6.0", path = "crates/oxc_minifier" }
|
|
oxc_parser = { version = "0.6.0", path = "crates/oxc_parser" }
|
|
oxc_semantic = { version = "0.6.0", path = "crates/oxc_semantic" }
|
|
oxc_span = { version = "0.6.0", path = "crates/oxc_span" }
|
|
oxc_syntax = { version = "0.6.0", path = "crates/oxc_syntax" }
|
|
oxc_transformer = { version = "0.6.0", path = "crates/oxc_transformer" }
|
|
|
|
# publish = false
|
|
oxc_macros = { path = "crates/oxc_macros" }
|
|
oxc_linter = { path = "crates/oxc_linter" }
|
|
oxc_type_synthesis = { path = "crates/oxc_type_synthesis" }
|
|
oxc_prettier = { path = "crates/oxc_prettier" }
|
|
|
|
oxc_tasks_common = { path = "tasks/common" }
|
|
oxc_language_server = { path = "crates/oxc_language_server" }
|
|
|
|
assert-unchecked = { version = "0.1.2" }
|
|
bpaf = { version = "0.9.9" }
|
|
bitflags = { version = "2.4.2" }
|
|
bumpalo = { version = "3.14.0" }
|
|
convert_case = { version = "0.6.0" }
|
|
criterion = { version = "0.5.1", default-features = false }
|
|
crossbeam-channel = { version = "0.5.11" }
|
|
dashmap = { version = "5.5.3" }
|
|
flate2 = { version = "1.0.28" }
|
|
futures = { version = "0.3.30" }
|
|
ignore = { version = "0.4.22" }
|
|
itertools = { version = "0.12.1" }
|
|
jemallocator = { version = "0.5.4" }
|
|
lazy_static = { version = "1.4.0" }
|
|
miette = { version = "5.10.0", features = ["fancy-no-backtrace"] }
|
|
mimalloc = { version = "0.1.39" }
|
|
num-bigint = { version = "0.4.4" }
|
|
num-traits = { version = "0.2.17" }
|
|
phf = { version = "0.11" }
|
|
pico-args = { version = "0.5.0" }
|
|
proc-macro2 = { version = "1.0.78" }
|
|
project-root = { version = "0.2.2" }
|
|
quote = { version = "1.0.35" }
|
|
rayon = { version = "1.8.1" }
|
|
regex = { version = "1.10.3" }
|
|
rustc-hash = { version = "1.1.0", default-features = false, features = ["std"] }
|
|
ryu-js = { version = "1.0.0" }
|
|
ropey = { version = "1.6.1" }
|
|
seq-macro = { version = "0.3.5" }
|
|
serde = { version = "1.0.196" }
|
|
serde_json = { version = "1.0.113" }
|
|
syn = { version = "=1.0.109" }
|
|
thiserror = { version = "1.0.56" }
|
|
tokio = { version = "1" }
|
|
tower-lsp = { version = "0.20.0", features = ["proposed"] }
|
|
unicode-id-start = { version = "1.1.2" }
|
|
ureq = { version = "2.9.1", default-features = false, features = ["tls", "json"] }
|
|
url = { version = "2.5.0" }
|
|
walkdir = { version = "2.4.0" }
|
|
indexmap = { version = "2.2.2" }
|
|
index_vec = { version = "0.1.3" }
|
|
static_assertions = { version = "1.1.0" }
|
|
stacker = { version = "0.1.15" }
|
|
tracing = { version = "0.1" }
|
|
tracing-subscriber = { version = "0.3" }
|
|
insta = { version = "1.34.0", features = ["glob"] }
|
|
codspeed-criterion-compat = { version = "2.3.3", default-features = false }
|
|
glob = { version = "0.3.1" }
|
|
mime_guess = { version = "2.0.4" }
|
|
language-tags = { version = "0.3.2" }
|
|
tsify = { version = "0.4.5" }
|
|
wasm-bindgen = { version = "0.2" }
|
|
serde-wasm-bindgen = { version = "0.6.3" }
|
|
|
|
[profile.release.package.oxc_wasm]
|
|
opt-level = 'z'
|
|
|
|
[profile.release]
|
|
# Configurations explicitly listed here for clarity.
|
|
# Using the best options for performance.
|
|
opt-level = 3
|
|
lto = "fat"
|
|
codegen-units = 1
|
|
strip = "symbols"
|
|
debug = false
|
|
panic = "abort" # Let it crash and force ourselves to write safe Rust.
|
|
|
|
# Use the `--profile release-debug` flag to show symbols in release mode.
|
|
# e.g. `cargo build --profile release-debug`
|
|
[profile.release-debug]
|
|
inherits = "release"
|
|
strip = false
|
|
debug = true
|