fix(semantic): Refactor jsdoc finding (#2437)

Partial fix for #168 

- [x] Fix general finding behavior for leading comments
- [x] Accept multiple jsdoc comments per node
- [x] Provide `get_one` and also `get_all` 
- [x] Add `iter_all()` for non-node related usage
- [x] Limit AST node kinds to parse
This commit is contained in:
Yuji Sugiura 2024-02-24 18:24:01 +09:00 committed by GitHub
parent 696818ad08
commit bc22ae569e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 226 additions and 75 deletions

View file

@ -2,14 +2,14 @@ use std::{cell::RefCell, path::Path, rc::Rc, sync::Arc};
use oxc_codegen::{Codegen, CodegenOptions};
use oxc_diagnostics::Error;
use oxc_semantic::{AstNodes, JSDocComment, ScopeTree, Semantic, SymbolTable};
use oxc_semantic::{AstNodes, JSDoc, ScopeTree, Semantic, SymbolTable};
use oxc_span::SourceType;
use crate::{
disable_directives::{DisableDirectives, DisableDirectivesBuilder},
fixer::{Fix, Message},
javascript_globals::GLOBALS,
AstNode, ESLintEnv, ESLintSettings,
ESLintEnv, ESLintSettings,
};
pub struct LintContext<'a> {
@ -155,7 +155,7 @@ impl<'a> LintContext<'a> {
}
/* JSDoc */
pub fn jsdoc(&self, node: &AstNode<'a>) -> Option<JSDocComment<'a>> {
self.semantic().jsdoc().get_by_node(node)
pub fn jsdoc(&self) -> &JSDoc<'a> {
self.semantic().jsdoc()
}
}

View file

@ -204,9 +204,10 @@ impl<'a> SemanticBuilder<'a> {
fn create_ast_node(&mut self, kind: AstKind<'a>) {
let mut flags = self.current_node_flags;
if self.jsdoc.retrieve_jsdoc_comment(kind) {
if self.jsdoc.retrieve_attached_jsdoc(&kind) {
flags |= NodeFlags::JSDoc;
}
let ast_node = AstNode::new(kind, self.current_scope_id, self.cfg.current_node_ix, flags);
let parent_node_id =
if matches!(kind, AstKind::Program(_)) { None } else { Some(self.current_node_id) };

View file

@ -1,67 +1,127 @@
use std::{collections::BTreeMap, rc::Rc};
use std::collections::BTreeMap;
use std::rc::Rc;
use oxc_ast::{AstKind, TriviasMap};
use oxc_ast::{AstKind, Comment, TriviasMap};
use oxc_span::{GetSpan, Span};
use rustc_hash::FxHashSet;
use super::{JSDoc, JSDocComment};
pub struct JSDocBuilder<'a> {
source_text: &'a str,
trivias: Rc<TriviasMap>,
docs: BTreeMap<Span, JSDocComment<'a>>,
attached_docs: BTreeMap<Span, Vec<JSDocComment<'a>>>,
leading_comments_seen: FxHashSet<u32>,
}
impl<'a> JSDocBuilder<'a> {
pub fn new(source_text: &'a str, trivias: &Rc<TriviasMap>) -> Self {
Self { source_text, trivias: Rc::clone(trivias), docs: BTreeMap::default() }
Self {
source_text,
trivias: Rc::clone(trivias),
attached_docs: BTreeMap::default(),
leading_comments_seen: FxHashSet::default(),
}
}
pub fn build(self) -> JSDoc<'a> {
JSDoc::new(self.docs)
let not_attached_docs = self
.trivias
.comments()
.iter()
.filter(|(start, _)| !self.leading_comments_seen.contains(start))
.filter_map(|(start, comment)| self.parse_if_jsdoc_comment(*start, *comment))
.collect::<Vec<_>>();
JSDoc::new(self.attached_docs, not_attached_docs)
}
/// Save the span if the given kind has a jsdoc comment attached
pub fn retrieve_jsdoc_comment(&mut self, kind: AstKind<'a>) -> bool {
if !kind.is_declaration() {
// This process is done in conjunction with the `semantic.build()`.
// This means that it's done "before" each use case (e.g. execute rule in oxlint) runs.
//
// If you need to control this attaching logic (e.g. by rule configurations), one of these would be necessary.
// - 1. Give up pre-attaching here and leave it for use cases
// - 2. Attach it more broadly(= loosely) here (may cause performance impact), so that it can be narrowed down later
//
// Since there is no reliable spec for JSDoc, there are some naive topics to consider:
//
// Q. Which node to attach JSDoc to?
// A. Each implementation decides according to its own use case.
//
// For example, TypeScript tries to target quite broadly nodes.
// > https://github.com/microsoft/TypeScript/blob/d04e3489b0d8e6bc9a8a9396a633632a5a467328/src/compiler/utilities.ts#L4195
//
// In case of `eslint-plugin-jsdoc`, its targets can be freely changed by rule configurations!
// (But, default is only about function related nodes.)
// > https://github.com/gajus/eslint-plugin-jsdoc/blob/e948bee821e964a92fbabc01574eca226e9e1252/src/iterateJsdoc.js#L2517-L2536
//
// Q. How do we attach JSDoc to that node?
// A. Also depends on the implementation.
//
// In the case of TypeScript (they have specific AST node for JSDoc and an `endOfFileToken`),
// some AST nodes have the `jsDoc` property and multiple `JSDocComment`s are attached.
//
// In the case of `eslint-plugin-jsdoc` (`@es-joy/jsdoccomment` is used)
// tries to get a only single nearest comment, with some exception handling.
//
// It is hard to say how we should behave as OXC Semantic, but the current implementation is,
// - Intuitive TypeScript-like attaching strategy
// - Provide `get_one` or `get_all` APIs for each use case
//
// Of course, this can be changed in the future.
pub fn retrieve_attached_jsdoc(&mut self, kind: &AstKind<'a>) -> bool {
// This may be diffed compare to TypeScript's `canHaveJSDoc()`, should adjust if needed
if !(kind.is_statement()
|| kind.is_declaration()
|| matches!(kind, AstKind::ParenthesizedExpression(_)))
{
return false;
}
// 1. Retrieve every kind of leading comments for this node
let span = kind.span();
let comment_text = self.find_jsdoc_comment(span);
if let Some(comment_text) = comment_text {
self.docs.insert(span, JSDocComment::new(comment_text));
let mut leading_comments = vec![];
for (start, comment) in self.trivias.comments().range(..span.start) {
if !self.leading_comments_seen.contains(start) {
leading_comments.push((start, comment));
}
self.leading_comments_seen.insert(*start);
}
comment_text.is_some()
// 2. Filter and parse JSDoc comments only
let leading_jsdoc_comments = leading_comments
.iter()
.filter_map(|(start, comment)| self.parse_if_jsdoc_comment(**start, **comment))
.collect::<Vec<_>>();
// 3. Save and return `true` to mark JSDoc flag
if !leading_jsdoc_comments.is_empty() {
self.attached_docs.insert(span, leading_jsdoc_comments);
return true;
}
false
}
/// Find the jsdoc doc in front of this span, a.k.a leading comment
fn find_jsdoc_comment(&self, span: Span) -> Option<&'a str> {
let (start, comment) = self.trivias.comments().range(..span.start).next()?;
if comment.kind().is_single_line() {
fn parse_if_jsdoc_comment(
&self,
span_start: u32,
comment: Comment,
) -> Option<JSDocComment<'a>> {
if !comment.is_multi_line() {
return None;
}
let comment_text = Span::new(*start, comment.end()).source_text(self.source_text);
// Comments beginning with /*, /***, or more than 3 stars will be ignored.
let mut chars = comment_text.chars();
if chars.next() != Some('*') {
return None;
}
if chars.next() == Some('*') {
let comment_span = Span::new(span_start, comment.end());
// Inside of marker: /*_CONTENT_*/
let comment_content = comment_span.source_text(self.source_text);
// Should start with "*": /**_CONTENT_*/
if !comment_content.starts_with('*') {
return None;
}
// The comment is the leading comment of this span if there is nothing in between.
// +2 to skip `*/` ending
let text_between = Span::new(comment.end() + 2, span.start).source_text(self.source_text);
if text_between.chars().any(|c| !c.is_whitespace()) {
return None;
}
Some(comment_text)
// Should remove the very first `*`?
Some(JSDocComment::new(comment_content))
}
}
@ -71,15 +131,13 @@ mod test {
use oxc_parser::Parser;
use oxc_span::{SourceType, Span};
use crate::{jsdoc::JSDocComment, SemanticBuilder};
use crate::{jsdoc::JSDocComment, Semantic, SemanticBuilder};
#[allow(clippy::cast_possible_truncation)]
fn get_jsdoc<'a>(
fn build_semantic<'a>(
allocator: &'a Allocator,
source_text: &'a str,
symbol: &'a str,
source_type: Option<SourceType>,
) -> Option<JSDocComment<'a>> {
) -> Semantic<'a> {
let source_type = source_type.unwrap_or_default();
let ret = Parser::new(allocator, source_text, source_type).parse();
let program = allocator.alloc(ret.program);
@ -87,13 +145,23 @@ mod test {
.with_trivias(ret.trivias)
.build(program)
.semantic;
let jsdoc = semantic.jsdoc();
let start = source_text.find(symbol).unwrap() as u32;
let span = Span::new(start, start + symbol.len() as u32);
jsdoc.get_by_span(span)
semantic
}
fn test_jsdoc(source_text: &str, symbol: &str, source_type: Option<SourceType>) {
#[allow(clippy::cast_possible_truncation)]
fn get_jsdoc<'a>(
allocator: &'a Allocator,
source_text: &'a str,
symbol: &'a str,
source_type: Option<SourceType>,
) -> Option<Vec<JSDocComment<'a>>> {
let semantic = build_semantic(allocator, source_text, source_type);
let start = source_text.find(symbol).unwrap_or(0) as u32;
let span = Span::new(start, start + symbol.len() as u32);
semantic.jsdoc().get_all_by_span(span)
}
fn test_jsdoc_found(source_text: &str, symbol: &str, source_type: Option<SourceType>) {
let allocator = Allocator::default();
assert!(
get_jsdoc(&allocator, source_text, symbol, source_type).is_some(),
@ -112,45 +180,109 @@ mod test {
#[test]
fn not_found() {
let source_texts = [
"function foo() {}",
"/* test */function foo() {}",
"/*** test */function foo() {}",
"/** test */ ; function foo() {}",
"/** test */ function foo1() {} function foo() {}",
("function foo() {}", "function foo() {}"),
("// test", "function foo() {}"),
("function foo() {}", "function foo() {}"),
("/* test */function foo() {}", "function foo() {}"),
("/** test */ ; function foo() {}", "function foo() {}"),
("/** test */ function foo1() {} function foo() {}", "function foo() {}"),
("function foo() {} /** test */", "function foo() {}"),
];
for source_text in source_texts {
test_jsdoc_not_found(source_text, "function foo() {}");
for (source_text, target) in source_texts {
test_jsdoc_not_found(source_text, target);
}
}
#[test]
fn found() {
let source_texts = [
"/** test */function foo() {}",
"
("/** test */function foo() {}", "function foo() {}"),
("/*** test */function foo() {}", "function foo() {}"),
(
"
/** test */
function foo() {}",
"/** test */
"function foo() {}",
),
(
"/** test */
function foo() {}",
"/**
"function foo() {}",
),
(
"/**
* test
* */
function foo() {}",
"/** test */
"function foo() {}",
),
(
"/** test */
function foo() {}",
"function foo() {}",
),
(
"/** test */
// noop
function foo() {}",
"function foo() {}",
),
(
"/** test */
/*noop*/
function foo() {}",
"function foo() {}",
),
("/** foo1 */ function foo1() {} /** test */ function foo() {}", "function foo() {}"),
];
for source_text in source_texts {
test_jsdoc(source_text, "function foo() {}", None);
for (source_text, target) in source_texts {
test_jsdoc_found(source_text, target, None);
}
}
#[test]
fn found_on_property_definition() {
let source = "class Foo {
fn found_ts() {
let source_texts = [(
"class Foo {
/** jsdoc */
bar: string;
}";
}",
"bar: string;",
)];
let source_type = SourceType::default().with_typescript(true);
test_jsdoc(source, "bar: string;", Some(source_type));
for (source_text, target) in source_texts {
test_jsdoc_found(source_text, target, Some(source_type));
}
}
#[test]
fn get_all_jsdoc() {
let allocator = Allocator::default();
let semantic = build_semantic(
&allocator,
r"
// noop
/** 1. ; */
// noop
;
/** 2. class X {} *//** 3. class X {} */
class X {
/** 4. foo */
foo = /** 5. () */ (() => {});
}
/** 6. let x; */
/* noop */
let x;
/** 7. Not attached but collected! */
",
Some(SourceType::default()),
);
assert_eq!(semantic.jsdoc().iter_all().count(), 7);
}
}

View file

@ -14,7 +14,8 @@ mod parser;
#[derive(Debug)]
pub struct JSDoc<'a> {
/// JSDocs by Span
docs: BTreeMap<Span, JSDocComment<'a>>,
attached: BTreeMap<Span, Vec<JSDocComment<'a>>>,
not_attached: Vec<JSDocComment<'a>>,
}
#[derive(Debug, Clone)]
@ -25,20 +26,37 @@ pub struct JSDocComment<'a> {
}
impl<'a> JSDoc<'a> {
pub fn new(docs: BTreeMap<Span, JSDocComment<'a>>) -> Self {
Self { docs }
pub fn new(
attached: BTreeMap<Span, Vec<JSDocComment<'a>>>,
not_attached: Vec<JSDocComment<'a>>,
) -> Self {
Self { attached, not_attached }
}
pub fn get_by_node<'b>(&'b self, node: &AstNode<'a>) -> Option<JSDocComment<'a>> {
pub fn get_one_by_node<'b>(&'b self, node: &AstNode<'a>) -> Option<JSDocComment<'a>> {
let Some(jsdocs) = self.get_all_by_node(node) else {
return None;
};
// If flagged, at least 1 JSDoc is attached
jsdocs.first().cloned()
}
pub fn get_all_by_node<'b>(&'b self, node: &AstNode<'a>) -> Option<Vec<JSDocComment<'a>>> {
if !node.flags().has_jsdoc() {
return None;
}
let span = node.kind().span();
self.get_by_span(span)
self.get_all_by_span(span)
}
pub fn get_by_span<'b>(&'b self, span: Span) -> Option<JSDocComment<'a>> {
self.docs.get(&span).cloned()
pub fn get_all_by_span<'b>(&'b self, span: Span) -> Option<Vec<JSDocComment<'a>>> {
self.attached.get(&span).cloned()
}
pub fn iter_all<'b>(&'b self) -> impl Iterator<Item = &JSDocComment<'a>> + 'b {
self.attached.values().flatten().chain(self.not_attached.iter())
}
}