From 2c325ef3d620144ffc74e122699a0932717fa69b Mon Sep 17 00:00:00 2001 From: Yuji Sugiura <6259812+leaysgur@users.noreply.github.com> Date: Thu, 18 Apr 2024 20:18:46 +0900 Subject: [PATCH] fix(semantic/jsdoc): Skip parsing `@` inside of backticks (#3017) This PR aims to support these cases. ````js /** * This is normal comment, `@xxx` should not parsed as tag. * * @example ```ts // @comment @decoratorInComment class Foo { } ``` */ ```` Only `@example` should be parsed as tag. --- crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs | 26 +++++++++++++++++++ crates/oxc_semantic/src/jsdoc/parser/parse.rs | 20 ++++++++++++-- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs b/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs index 811719735..4daaae97c 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/jsdoc.rs @@ -304,4 +304,30 @@ line2 let (type_part, comment_part) = tag.type_comment(); assert_eq!((type_part, comment_part.parsed()), (None, "flattened data".to_string())); } + + #[test] + fn parses_with_backticks() { + let allocator = Allocator::default(); + let semantic = build_semantic( + &allocator, + " + /** + * This is normal comment, `@xxx` should not parsed as tag. + * + * @example ```ts + // @comment + @decoratorInComment + class Foo { } + ``` + */ + ", + ); + let jsdoc = semantic.jsdoc().iter_all().next().unwrap(); + + let mut tags = jsdoc.tags().iter(); + assert_eq!(tags.len(), 1); + + let tag = tags.next().unwrap(); + assert_eq!(tag.kind.parsed(), "example"); + } } diff --git a/crates/oxc_semantic/src/jsdoc/parser/parse.rs b/crates/oxc_semantic/src/jsdoc/parser/parse.rs index 37f6d1082..377e0752f 100644 --- a/crates/oxc_semantic/src/jsdoc/parser/parse.rs +++ b/crates/oxc_semantic/src/jsdoc/parser/parse.rs @@ -19,14 +19,30 @@ pub fn parse_jsdoc(source_text: &str, jsdoc_span_start: u32) -> (JSDocCommentPar // So, find `@` to split comment and each tag. // But `@` can be found inside of `{}` (e.g. `{@see link}`), it should be distinguished. let mut in_braces = false; + // Also, `@` is often found inside of backtick(` or ```), like markdown. + let mut in_backticks = false; let mut comment_found = false; // Parser local offsets, not for global span let (mut start, mut end) = (0, 0); - for ch in source_text.chars() { + + let mut chars = source_text.chars().peekable(); + while let Some(ch) = chars.next() { + let can_parse = !(in_braces || in_backticks); match ch { + // NOTE: For now, only odd backtick(s) are handled. + // - 1 backtick: inline code + // - 3, 5, ... backticks: code fence + // Not so common but technically, major markdown parser can handle 3 or more backticks as code fence. + // (for nested code blocks) + // But for now, 4, 6, ... backticks are not handled here to keep things simple... + '`' => { + if chars.peek().is_some_and(|&c| c != '`') { + in_backticks = !in_backticks; + } + } '{' => in_braces = true, '}' => in_braces = false, - '@' if !in_braces => { + '@' if can_parse => { let part = &source_text[start..end]; let span = Span::new( jsdoc_span_start + u32::try_from(start).unwrap_or_default(),