feat(oxc): add Compiler and CompilerInterface (#4954)

This PR adds a full compiler pipeline to the `oxc` crate, to stop us
from implementing the same pipeline over and over again.

relates #4455
This commit is contained in:
Boshen 2024-08-19 10:20:05 +08:00 committed by GitHub
parent b58413ffa4
commit 6800e694e3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 386 additions and 111 deletions

View file

@ -20,6 +20,11 @@ workspace = true
test = false
doctest = false
[[example]]
name = "compiler"
path = "examples/compiler.rs"
required-features = ["full"]
[dependencies]
oxc_allocator = { workspace = true }
oxc_ast = { workspace = true }
@ -37,14 +42,18 @@ oxc_sourcemap = { workspace = true, optional = true }
oxc_isolated_declarations = { workspace = true, optional = true }
[features]
serialize = ["oxc_ast/serialize", "oxc_semantic?/serialize", "oxc_span/serialize", "oxc_syntax/serialize"]
semantic = ["oxc_semantic"]
transformer = ["oxc_transformer"]
minifier = ["oxc_mangler", "oxc_minifier"]
codegen = ["oxc_codegen"]
isolated_declarations = ["oxc_isolated_declarations"]
full = ["codegen", "minifier", "semantic", "transformer"]
semantic = ["oxc_semantic"]
transformer = ["oxc_transformer"]
minifier = ["oxc_mangler", "oxc_minifier"]
codegen = ["oxc_codegen"]
serialize = ["oxc_ast/serialize", "oxc_semantic?/serialize", "oxc_span/serialize", "oxc_syntax/serialize"]
sourcemap = ["oxc_sourcemap"]
sourcemap_concurrent = ["oxc_sourcemap/concurrent", "sourcemap"]
isolated_declarations = ["oxc_isolated_declarations"]
wasm = ["oxc_transformer/wasm"]

View file

@ -0,0 +1,32 @@
#![allow(clippy::print_stdout)]
use std::{env, io, path::Path};
use oxc::{span::SourceType, Compiler};
// Instruction:
// 1. create a `test.js`
// 2. run either
// * `cargo run -p oxc --example compiler --features="full"`
// * `just watch 'run -p oxc --example compiler --features="full"'`
fn main() -> io::Result<()> {
let name = env::args().nth(1).unwrap_or_else(|| "test.js".to_string());
let path = Path::new(&name);
let source_text = std::fs::read_to_string(path)?;
let source_type = SourceType::from_path(path).unwrap();
match Compiler::default().execute(&source_text, source_type, path) {
Ok(printed) => {
println!("{printed}");
}
Err(errors) => {
for error in errors {
let error = error.with_source_code(source_text.to_string());
println!("{error:?}");
}
}
}
Ok(())
}

225
crates/oxc/src/compiler.rs Normal file
View file

@ -0,0 +1,225 @@
use std::{mem, ops::ControlFlow, path::Path};
use oxc_allocator::Allocator;
use oxc_ast::{ast::Program, Trivias};
use oxc_codegen::{CodeGenerator, CodegenOptions, CommentOptions, WhitespaceRemover};
use oxc_diagnostics::OxcDiagnostic;
use oxc_parser::{ParseOptions, Parser, ParserReturn};
use oxc_span::SourceType;
use oxc_minifier::{CompressOptions, Compressor};
use oxc_semantic::{ScopeTree, SemanticBuilder, SemanticBuilderReturn, SymbolTable};
use oxc_transformer::{TransformOptions, Transformer, TransformerReturn};
#[derive(Default)]
pub struct Compiler {
printed: String,
errors: Vec<OxcDiagnostic>,
}
impl CompilerInterface for Compiler {
fn handle_errors(&mut self, errors: Vec<OxcDiagnostic>) {
self.errors.extend(errors);
}
fn after_codegen(&mut self, printed: String) {
self.printed = printed;
}
}
impl Compiler {
/// # Errors
///
/// * A list of [OxcDiagnostic].
pub fn execute(
&mut self,
source_text: &str,
source_type: SourceType,
source_path: &Path,
) -> Result<String, Vec<OxcDiagnostic>> {
self.compile(source_text, source_type, source_path);
if self.errors.is_empty() {
Ok(mem::take(&mut self.printed))
} else {
Err(mem::take(&mut self.errors))
}
}
}
pub trait CompilerInterface {
fn handle_errors(&mut self, _errors: Vec<OxcDiagnostic>) {}
fn parser_options(&self) -> ParseOptions {
ParseOptions::default()
}
fn transform_options(&self) -> Option<TransformOptions> {
Some(TransformOptions::default())
}
fn compress_options(&self) -> Option<CompressOptions> {
Some(CompressOptions::all_true())
}
fn codegen_options(&self) -> Option<CodegenOptions> {
Some(CodegenOptions::default())
}
fn remove_whitespace(&self) -> bool {
false
}
fn after_parse(&mut self, _parser_return: &mut ParserReturn) -> ControlFlow<()> {
ControlFlow::Continue(())
}
fn after_semantic(
&mut self,
_program: &mut Program<'_>,
_semantic_return: &mut SemanticBuilderReturn,
) -> ControlFlow<()> {
ControlFlow::Continue(())
}
fn after_transform(
&mut self,
_program: &mut Program<'_>,
_transformer_return: &mut TransformerReturn,
) -> ControlFlow<()> {
ControlFlow::Continue(())
}
fn after_codegen(&mut self, _printed: String) {}
fn compile(&mut self, source_text: &str, source_type: SourceType, source_path: &Path) {
let allocator = Allocator::default();
/* Parse */
let mut parser_return = self.parse(&allocator, source_text, source_type);
if self.after_parse(&mut parser_return).is_break() {
return;
}
if !parser_return.errors.is_empty() {
self.handle_errors(parser_return.errors);
}
/* Semantic */
let mut program = parser_return.program;
let trivias = parser_return.trivias;
let mut semantic_return = self.semantic(&program, source_text, source_type, source_path);
if !semantic_return.errors.is_empty() {
self.handle_errors(semantic_return.errors);
return;
}
if self.after_semantic(&mut program, &mut semantic_return).is_break() {
return;
}
let (symbols, scopes) = semantic_return.semantic.into_symbol_table_and_scope_tree();
/* Transform */
if let Some(options) = self.transform_options() {
let mut transformer_return = self.transform(
options,
&allocator,
&mut program,
source_path,
source_text,
source_type,
&trivias,
symbols,
scopes,
);
if !transformer_return.errors.is_empty() {
self.handle_errors(transformer_return.errors);
return;
}
if self.after_transform(&mut program, &mut transformer_return).is_break() {
return;
}
}
if let Some(options) = self.compress_options() {
self.compress(&allocator, &mut program, options);
}
if let Some(options) = self.codegen_options() {
let printed = self.codegen(&program, source_text, &trivias, options);
self.after_codegen(printed);
}
}
fn parse<'a>(
&self,
allocator: &'a Allocator,
source_text: &'a str,
source_type: SourceType,
) -> ParserReturn<'a> {
Parser::new(allocator, source_text, source_type).with_options(self.parser_options()).parse()
}
fn semantic<'a>(
&self,
program: &Program<'a>,
source_text: &'a str,
source_type: SourceType,
source_path: &Path,
) -> SemanticBuilderReturn<'a> {
SemanticBuilder::new(source_text, source_type)
.with_check_syntax_error(true)
.build_module_record(source_path.to_path_buf(), program)
.build(program)
}
#[allow(clippy::too_many_arguments)]
fn transform<'a>(
&self,
options: TransformOptions,
allocator: &'a Allocator,
program: &mut Program<'a>,
source_path: &Path,
source_text: &'a str,
source_type: SourceType,
trivias: &Trivias,
symbols: SymbolTable,
scopes: ScopeTree,
) -> TransformerReturn {
Transformer::new(allocator, source_path, source_type, source_text, trivias.clone(), options)
.build_with_symbols_and_scopes(symbols, scopes, program)
}
fn compress<'a>(
&self,
allocator: &'a Allocator,
program: &mut Program<'a>,
options: CompressOptions,
) {
Compressor::new(allocator, options).build(program);
}
fn codegen<'a>(
&self,
program: &Program<'a>,
source_text: &'a str,
trivias: &Trivias,
options: CodegenOptions,
) -> String {
let comment_options = CommentOptions { preserve_annotate_comments: true };
if self.remove_whitespace() {
WhitespaceRemover::new().with_options(options).build(program).source_text
} else {
CodeGenerator::new()
.with_options(options)
.enable_comment(source_text, trivias.clone(), comment_options)
.build(program)
.source_text
}
}
}

View file

@ -2,6 +2,12 @@
//!
//! <https://github.com/oxc-project/oxc>
#[cfg(feature = "full")]
mod compiler;
#[cfg(feature = "full")]
pub use compiler::{Compiler, CompilerInterface};
pub mod allocator {
#[doc(inline)]
pub use oxc_allocator::*;

View file

@ -105,7 +105,7 @@ pub struct ParserReturn<'a> {
/// Parser options
#[derive(Clone, Copy)]
struct ParserOptions {
pub struct ParseOptions {
pub allow_return_outside_function: bool,
/// Emit `ParenthesizedExpression` in AST.
///
@ -117,7 +117,7 @@ struct ParserOptions {
pub preserve_parens: bool,
}
impl Default for ParserOptions {
impl Default for ParseOptions {
fn default() -> Self {
Self { allow_return_outside_function: false, preserve_parens: true }
}
@ -130,16 +130,22 @@ pub struct Parser<'a> {
allocator: &'a Allocator,
source_text: &'a str,
source_type: SourceType,
options: ParserOptions,
options: ParseOptions,
}
impl<'a> Parser<'a> {
/// Create a new parser
pub fn new(allocator: &'a Allocator, source_text: &'a str, source_type: SourceType) -> Self {
let options = ParserOptions::default();
let options = ParseOptions::default();
Self { allocator, source_text, source_type, options }
}
#[must_use]
pub fn with_options(mut self, options: ParseOptions) -> Self {
self.options = options;
self
}
/// Allow return outside of function
///
/// By default, a return statement at the top level raises an error.
@ -279,7 +285,7 @@ impl<'a> ParserImpl<'a> {
allocator: &'a Allocator,
source_text: &'a str,
source_type: SourceType,
options: ParserOptions,
options: ParseOptions,
unique: UniquePromise,
) -> Self {
Self {
@ -347,7 +353,7 @@ impl<'a> ParserImpl<'a> {
Ok(self.ast.program(span, self.source_type, hashbang, directives, statements))
}
fn default_context(source_type: SourceType, options: ParserOptions) -> Context {
fn default_context(source_type: SourceType, options: ParseOptions) -> Context {
let mut ctx = Context::default().and_ambient(source_type.is_typescript_definition());
if source_type.module_kind() == ModuleKind::Module {
// for [top-level-await](https://tc39.es/proposal-top-level-await/)

View file

@ -22,16 +22,8 @@ test = false
doctest = false
[dependencies]
oxc = { workspace = true, features = [
"codegen",
"isolated_declarations",
"minifier",
"semantic",
"serialize",
"sourcemap",
"transformer",
] }
oxc_prettier = { workspace = true }
oxc = { workspace = true, features = ["full", "isolated_declarations", "serialize", "sourcemap"] }
oxc_prettier = { workspace = true }
oxc_tasks_common = { workspace = true }
serde = { workspace = true, features = ["derive"] }

View file

@ -1,16 +1,21 @@
use std::{cell::Cell, collections::HashSet, path::PathBuf};
use std::{cell::Cell, collections::HashSet, ops::ControlFlow, path::PathBuf, rc::Rc};
use oxc::CompilerInterface;
use oxc::allocator::{Allocator, CloneIn};
#[allow(clippy::wildcard_imports)]
use oxc::ast::{ast::*, visit::walk, Trivias, Visit};
use oxc::codegen::{CodeGenerator, CommentOptions, WhitespaceRemover};
use oxc::codegen::CodegenOptions;
use oxc::diagnostics::OxcDiagnostic;
use oxc::minifier::{CompressOptions, Compressor};
use oxc::parser::{Parser, ParserReturn};
use oxc::semantic::{ReferenceId, ScopeFlags, ScopeTree, SemanticBuilder, SymbolId, SymbolTable};
use oxc::minifier::CompressOptions;
use oxc::parser::{ParseOptions, ParserReturn};
use oxc::semantic::{
ReferenceId, ScopeFlags, ScopeTree, SemanticBuilder, SemanticBuilderReturn, SymbolId,
SymbolTable,
};
use oxc::span::{CompactStr, SourceType, Span};
use oxc::syntax::scope::ScopeId;
use oxc::transformer::{TransformOptions, Transformer, TransformerReturn};
use oxc::transformer::{TransformOptions, TransformerReturn};
use crate::suite::TestResult;
@ -29,6 +34,86 @@ pub struct Driver {
pub panicked: bool,
pub errors: Vec<OxcDiagnostic>,
pub printed: String,
// states
pub check1: Option<Rc<SemanticCollector>>,
}
impl CompilerInterface for Driver {
fn parser_options(&self) -> ParseOptions {
ParseOptions {
allow_return_outside_function: self.allow_return_outside_function,
..ParseOptions::default()
}
}
fn transform_options(&self) -> Option<TransformOptions> {
self.transform.clone()
}
fn compress_options(&self) -> Option<CompressOptions> {
self.compress.then(CompressOptions::all_true)
}
fn codegen_options(&self) -> Option<CodegenOptions> {
self.codegen.then(CodegenOptions::default)
}
fn remove_whitespace(&self) -> bool {
self.remove_whitespace
}
fn handle_errors(&mut self, errors: Vec<OxcDiagnostic>) {
self.errors.extend(errors);
}
fn after_parse(&mut self, parser_return: &mut ParserReturn) -> ControlFlow<()> {
let ParserReturn { program, trivias, panicked, .. } = parser_return;
self.panicked = *panicked;
if self.check_comments(trivias) {
return ControlFlow::Break(());
}
// Make sure serialization doesn't crash; also for code coverage.
let _serializer = program.serializer();
ControlFlow::Continue(())
}
fn after_semantic(
&mut self,
program: &mut Program<'_>,
_semantic_return: &mut SemanticBuilderReturn,
) -> ControlFlow<()> {
if self.check_semantic {
let mut check1 = SemanticCollector::default();
if let Some(errors) = check1.check(program) {
self.errors.extend(errors);
return ControlFlow::Break(());
}
self.check1 = Some(Rc::new(check1));
};
ControlFlow::Continue(())
}
fn after_transform(
&mut self,
program: &mut Program<'_>,
transformer_return: &mut TransformerReturn,
) -> ControlFlow<()> {
if let Some(check1) = self.check1.clone() {
if self.check_semantic(
&check1,
&transformer_return.symbols,
&transformer_return.scopes,
program,
) {
return ControlFlow::Break(());
}
}
ControlFlow::Continue(())
}
fn after_codegen(&mut self, printed: String) {
self.printed = printed;
}
}
impl Driver {
@ -54,88 +139,8 @@ impl Driver {
}
pub fn run(&mut self, source_text: &str, source_type: SourceType) {
let allocator = Allocator::default();
let ParserReturn { mut program, errors, trivias, panicked } =
Parser::new(&allocator, source_text, source_type)
.allow_return_outside_function(self.allow_return_outside_function)
.parse();
self.panicked = panicked;
if self.check_comments(&trivias) {
return;
}
// Make sure serialization doesn't crash; also for code coverage.
let _serializer = program.serializer();
if !errors.is_empty() {
self.errors.extend(errors);
}
let semantic_ret = SemanticBuilder::new(source_text, source_type)
.with_trivias(trivias.clone())
.with_check_syntax_error(true)
.build_module_record(self.path.clone(), &program)
.build(&program);
if !semantic_ret.errors.is_empty() {
self.errors.extend(semantic_ret.errors);
return;
}
let check1 = if self.check_semantic {
let mut check1 = SemanticCollector::default();
if let Some(errors) = check1.check(&program) {
self.errors.extend(errors);
return;
}
Some(check1)
} else {
None
};
if let Some(options) = self.transform.clone() {
let (symbols, scopes) = semantic_ret.semantic.into_symbol_table_and_scope_tree();
let TransformerReturn { symbols, scopes, errors } = Transformer::new(
&allocator,
&self.path,
source_type,
source_text,
trivias.clone(),
options,
)
.build_with_symbols_and_scopes(symbols, scopes, &mut program);
if !errors.is_empty() {
self.errors.extend(errors);
return;
}
if let Some(check1) = check1 {
if self.check_semantic(&check1, &symbols, &scopes, &program) {
return;
}
}
}
if self.compress {
Compressor::new(&allocator, CompressOptions::all_true()).build(&mut program);
}
if self.codegen {
let comment_options = CommentOptions { preserve_annotate_comments: true };
let printed = if self.remove_whitespace {
WhitespaceRemover::new().build(&program).source_text
} else {
CodeGenerator::new()
.enable_comment(source_text, trivias, comment_options)
.build(&program)
.source_text
};
self.printed = printed;
}
let path = self.path.clone();
self.compile(source_text, source_type, &path);
}
fn check_comments(&mut self, trivias: &Trivias) -> bool {
@ -308,7 +313,7 @@ current reference {cur_reference_id:?}: {cur_symbol_name:?}
}
#[derive(Default)]
struct SemanticCollector {
pub struct SemanticCollector {
scope_ids: Vec<ScopeId>,
symbol_ids: Vec<SymbolId>,
reference_ids: Vec<ReferenceId>,