mirror of
https://github.com/danbulant/oxc
synced 2026-05-25 04:42:10 +00:00
434 lines
12 KiB
Rust
434 lines
12 KiB
Rust
#![allow(dead_code)]
|
|
|
|
//! Replace json comments with string in place.
|
|
//!
|
|
//! <https://github.com/tmccombs/json-comments-rs/pull/11>
|
|
|
|
//! `json_comments` is a library to strip out comments from JSON-like test. By processing text
|
|
//! through a [`StripComments`] adapter first, it is possible to use a standard JSON parser (such
|
|
//! as [serde_json](https://crates.io/crates/serde_json) with quasi-json input that contains
|
|
//! comments.
|
|
//!
|
|
//! In fact, this code makes few assumptions about the input and could probably be used to strip
|
|
//! comments out of other types of code as well, provided that strings use double quotes and
|
|
//! backslashes are used for escapes in strings.
|
|
//!
|
|
//! The following types of comments are supported:
|
|
//! - C style block comments (`/* ... */`)
|
|
//! - C style line comments (`// ...`)
|
|
//! - Shell style line comments (`# ...`)
|
|
//!
|
|
//! ## Example using serde_json
|
|
//!
|
|
//! ```
|
|
//! use serde_json::{Result, Value};
|
|
//! use json_comments::StripComments;
|
|
//!
|
|
//! # fn main() -> Result<()> {
|
|
//! // Some JSON input data as a &str. Maybe this comes form the user.
|
|
//! let data = r#"
|
|
//! {
|
|
//! "name": /* full */ "John Doe",
|
|
//! "age": 43,
|
|
//! "phones": [
|
|
//! "+44 1234567", // work phone
|
|
//! "+44 2345678" // home phone
|
|
//! ]
|
|
//! }"#;
|
|
//!
|
|
//! // Strip the comments from the input (use `as_bytes()` to get a `Read`).
|
|
//! let stripped = StripComments::new(data.as_bytes());
|
|
//! // Parse the string of data into serde_json::Value.
|
|
//! let v: Value = serde_json::from_reader(stripped)?;
|
|
//!
|
|
//! println!("Please call {} at the number {}", v["name"], v["phones"][0]);
|
|
//!
|
|
//! # Ok(())
|
|
//! # }
|
|
//! ```
|
|
//!
|
|
use std::io::{ErrorKind, Read, Result};
|
|
|
|
#[derive(Eq, PartialEq, Copy, Clone, Debug)]
|
|
enum State {
|
|
Top,
|
|
InString,
|
|
StringEscape,
|
|
InComment,
|
|
InBlockComment,
|
|
MaybeCommentEnd,
|
|
InLineComment,
|
|
}
|
|
|
|
use State::{
|
|
InBlockComment, InComment, InLineComment, InString, MaybeCommentEnd, StringEscape, Top,
|
|
};
|
|
|
|
/// A [`Read`] that transforms another [`Read`] so that it changes all comments to spaces so that a downstream json parser
|
|
/// (such as json-serde) doesn't choke on them.
|
|
///
|
|
/// The supported comments are:
|
|
/// - C style block comments (`/* ... */`)
|
|
/// - C style line comments (`// ...`)
|
|
/// - Shell style line comments (`# ...`)
|
|
///
|
|
/// ## Example
|
|
/// ```
|
|
/// use json_comments::StripComments;
|
|
/// use std::io::Read;
|
|
///
|
|
/// let input = r#"{
|
|
/// // c line comment
|
|
/// "a": "comment in string /* a */",
|
|
/// ## shell line comment
|
|
/// } /** end */"#;
|
|
///
|
|
/// let mut stripped = String::new();
|
|
/// StripComments::new(input.as_bytes()).read_to_string(&mut stripped).unwrap();
|
|
///
|
|
/// assert_eq!(stripped, "{
|
|
/// \n\"a\": \"comment in string /* a */\",
|
|
/// \n} ");
|
|
///
|
|
/// ```
|
|
///
|
|
pub struct StripComments<T: Read> {
|
|
inner: T,
|
|
state: State,
|
|
settings: CommentSettings,
|
|
}
|
|
|
|
impl<T> StripComments<T>
|
|
where
|
|
T: Read,
|
|
{
|
|
pub fn new(input: T) -> Self {
|
|
Self { inner: input, state: Top, settings: CommentSettings::default() }
|
|
}
|
|
|
|
/// Create a new `StripComments` with settings which may be different from the default.
|
|
///
|
|
/// This is useful if you wish to disable allowing certain kinds of comments.
|
|
#[inline]
|
|
pub fn with_settings(settings: CommentSettings, input: T) -> Self {
|
|
Self { inner: input, state: Top, settings }
|
|
}
|
|
}
|
|
|
|
macro_rules! invalid_data {
|
|
() => {
|
|
return Err(ErrorKind::InvalidData.into())
|
|
};
|
|
}
|
|
|
|
impl<T> Read for StripComments<T>
|
|
where
|
|
T: Read,
|
|
{
|
|
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
|
|
let count = self.inner.read(buf)?;
|
|
if count > 0 {
|
|
strip_buf(&mut self.state, &mut buf[..count], self.settings)?;
|
|
} else if self.state != Top && self.state != InLineComment {
|
|
invalid_data!();
|
|
}
|
|
Ok(count)
|
|
}
|
|
}
|
|
|
|
fn strip_buf(state: &mut State, buf: &mut [u8], settings: CommentSettings) -> Result<()> {
|
|
for c in &mut *buf {
|
|
*state = match state {
|
|
Top => top(c, settings),
|
|
InString => in_string(*c),
|
|
StringEscape => InString,
|
|
InComment => in_comment(c, settings)?,
|
|
InBlockComment => in_block_comment(c),
|
|
MaybeCommentEnd => maybe_comment_end(c),
|
|
InLineComment => in_line_comment(c),
|
|
}
|
|
}
|
|
|
|
Ok(())
|
|
}
|
|
|
|
/// Strips comments from a string in place, replacing it with whitespaces.
|
|
///
|
|
/// /// ## Example
|
|
/// ```
|
|
/// use json_comments::strip_comments_in_place;
|
|
///
|
|
/// let mut string = String::from(r#"{
|
|
/// // c line comment
|
|
/// "a": "comment in string /* a */",
|
|
/// ## shell line comment
|
|
/// } /** end */"#);
|
|
///
|
|
/// strip_comments_in_place(&mut string, Default::default()).unwrap();
|
|
///
|
|
/// assert_eq!(string, "{
|
|
/// \n\"a\": \"comment in string /* a */\",
|
|
/// \n} ");
|
|
///
|
|
/// ```
|
|
pub fn strip_comments_in_place(s: &mut str) -> Result<()> {
|
|
strip_buf(&mut Top, unsafe { s.as_bytes_mut() }, CommentSettings::all())
|
|
}
|
|
|
|
/// Settings for `StripComments`
|
|
///
|
|
/// The default is for all comment types to be enabled.
|
|
#[derive(Copy, Clone, Debug)]
|
|
pub struct CommentSettings {
|
|
/// True if c-style block comments (`/* ... */`) are allowed
|
|
block_comments: bool,
|
|
/// True if c-style `//` line comments are allowed
|
|
slash_line_comments: bool,
|
|
/// True if shell-style `#` line comments are allowed
|
|
hash_line_comments: bool,
|
|
}
|
|
|
|
impl Default for CommentSettings {
|
|
fn default() -> Self {
|
|
Self::all()
|
|
}
|
|
}
|
|
|
|
impl CommentSettings {
|
|
/// Enable all comment Styles
|
|
pub const fn all() -> Self {
|
|
Self { block_comments: true, slash_line_comments: true, hash_line_comments: true }
|
|
}
|
|
/// Only allow line comments starting with `#`
|
|
pub const fn hash_only() -> Self {
|
|
Self { hash_line_comments: true, block_comments: false, slash_line_comments: false }
|
|
}
|
|
/// Only allow "c-style" comments.
|
|
///
|
|
/// Specifically, line comments beginning with `//` and
|
|
/// block comment like `/* ... */`.
|
|
pub const fn c_style() -> Self {
|
|
Self { block_comments: true, slash_line_comments: true, hash_line_comments: false }
|
|
}
|
|
|
|
/// Create a new `StripComments` for `input`, using these settings.
|
|
///
|
|
/// Transform `input` into a [`Read`] that strips out comments.
|
|
/// The types of comments to support are determined by the configuration of
|
|
/// `self`.
|
|
///
|
|
/// ## Examples
|
|
///
|
|
/// ```
|
|
/// use json_comments::CommentSettings;
|
|
/// use std::io::Read;
|
|
///
|
|
/// let input = r#"{
|
|
/// // c line comment
|
|
/// "a": "b"
|
|
/// /** multi line
|
|
/// comment
|
|
/// */ }"#;
|
|
///
|
|
/// let mut stripped = String::new();
|
|
/// CommentSettings::c_style().strip_comments(input.as_bytes()).read_to_string(&mut stripped).unwrap();
|
|
///
|
|
/// assert_eq!(stripped, "{
|
|
/// \n\"a\": \"b\"
|
|
/// }");
|
|
/// ```
|
|
///
|
|
/// ```
|
|
/// use json_comments::CommentSettings;
|
|
/// use std::io::Read;
|
|
///
|
|
/// let input = r#"{
|
|
/// ## shell line comment
|
|
/// "a": "b"
|
|
/// }"#;
|
|
///
|
|
/// let mut stripped = String::new();
|
|
/// CommentSettings::hash_only().strip_comments(input.as_bytes()).read_to_string(&mut stripped).unwrap();
|
|
///
|
|
/// assert_eq!(stripped, "{
|
|
/// \n\"a\": \"b\"\n}");
|
|
/// ```
|
|
#[inline]
|
|
pub fn strip_comments<I: Read>(self, input: I) -> StripComments<I> {
|
|
StripComments::with_settings(self, input)
|
|
}
|
|
}
|
|
|
|
fn top(c: &mut u8, settings: CommentSettings) -> State {
|
|
match *c {
|
|
b'"' => InString,
|
|
b'/' => {
|
|
*c = b' ';
|
|
InComment
|
|
}
|
|
b'#' if settings.hash_line_comments => {
|
|
*c = b' ';
|
|
InLineComment
|
|
}
|
|
_ => Top,
|
|
}
|
|
}
|
|
|
|
fn in_string(c: u8) -> State {
|
|
match c {
|
|
b'"' => Top,
|
|
b'\\' => StringEscape,
|
|
_ => InString,
|
|
}
|
|
}
|
|
|
|
fn in_comment(c: &mut u8, settings: CommentSettings) -> Result<State> {
|
|
let new_state = match c {
|
|
b'*' if settings.block_comments => InBlockComment,
|
|
b'/' if settings.slash_line_comments => InLineComment,
|
|
_ => invalid_data!(),
|
|
};
|
|
*c = b' ';
|
|
Ok(new_state)
|
|
}
|
|
|
|
fn in_block_comment(c: &mut u8) -> State {
|
|
let old = *c;
|
|
*c = b' ';
|
|
if old == b'*' {
|
|
MaybeCommentEnd
|
|
} else {
|
|
InBlockComment
|
|
}
|
|
}
|
|
|
|
fn maybe_comment_end(c: &mut u8) -> State {
|
|
let old = *c;
|
|
*c = b' ';
|
|
if old == b'/' {
|
|
*c = b' ';
|
|
Top
|
|
} else {
|
|
InBlockComment
|
|
}
|
|
}
|
|
|
|
fn in_line_comment(c: &mut u8) -> State {
|
|
if *c == b'\n' {
|
|
Top
|
|
} else {
|
|
*c = b' ';
|
|
InLineComment
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use std::io::{ErrorKind, Read};
|
|
|
|
fn strip_string(input: &str) -> String {
|
|
let mut out = String::new();
|
|
let count = StripComments::new(input.as_bytes()).read_to_string(&mut out).unwrap();
|
|
assert_eq!(count, input.len());
|
|
out
|
|
}
|
|
|
|
#[test]
|
|
fn block_comments() {
|
|
let json = r#"{/* Comment */"hi": /** abc */ "bye"}"#;
|
|
let stripped = strip_string(json);
|
|
assert_eq!(stripped, r#"{ "hi": "bye"}"#);
|
|
}
|
|
|
|
#[test]
|
|
fn block_comments_with_possible_end() {
|
|
let json = r#"{/* Comment*PossibleEnd */"hi": /** abc */ "bye"}"#;
|
|
let stripped = strip_string(json);
|
|
assert_eq!(stripped, r#"{ "hi": "bye"}"#);
|
|
}
|
|
|
|
#[test]
|
|
fn line_comments() {
|
|
let json = r#"{
|
|
// line comment
|
|
"a": 4,
|
|
# another
|
|
}"#;
|
|
|
|
let expected = "{
|
|
\n \"a\": 4,
|
|
\n }";
|
|
|
|
assert_eq!(strip_string(json), expected);
|
|
}
|
|
|
|
#[test]
|
|
fn incomplete_string() {
|
|
let json = r#""foo"#;
|
|
let mut stripped = String::new();
|
|
|
|
let err = StripComments::new(json.as_bytes()).read_to_string(&mut stripped).unwrap_err();
|
|
assert_eq!(err.kind(), ErrorKind::InvalidData);
|
|
}
|
|
|
|
#[test]
|
|
fn incomplete_comment() {
|
|
let json = r#"/* foo "#;
|
|
let mut stripped = String::new();
|
|
|
|
let err = StripComments::new(json.as_bytes()).read_to_string(&mut stripped).unwrap_err();
|
|
assert_eq!(err.kind(), ErrorKind::InvalidData);
|
|
}
|
|
|
|
#[test]
|
|
fn incomplete_comment2() {
|
|
let json = r#"/* foo *"#;
|
|
let mut stripped = String::new();
|
|
|
|
let err = StripComments::new(json.as_bytes()).read_to_string(&mut stripped).unwrap_err();
|
|
assert_eq!(err.kind(), ErrorKind::InvalidData);
|
|
}
|
|
|
|
#[test]
|
|
fn no_hash_comments() {
|
|
let json = r#"# bad comment
|
|
{"a": "b"}"#;
|
|
let mut stripped = String::new();
|
|
CommentSettings::c_style()
|
|
.strip_comments(json.as_bytes())
|
|
.read_to_string(&mut stripped)
|
|
.unwrap();
|
|
assert_eq!(stripped, json);
|
|
}
|
|
|
|
#[test]
|
|
fn no_slash_line_comments() {
|
|
let json = r#"// bad comment
|
|
{"a": "b"}"#;
|
|
let mut stripped = String::new();
|
|
let err = CommentSettings::hash_only()
|
|
.strip_comments(json.as_bytes())
|
|
.read_to_string(&mut stripped)
|
|
.unwrap_err();
|
|
assert_eq!(err.kind(), ErrorKind::InvalidData);
|
|
}
|
|
|
|
#[test]
|
|
fn no_block_comments() {
|
|
let json = r#"/* bad comment */ {"a": "b"}"#;
|
|
let mut stripped = String::new();
|
|
let err = CommentSettings::hash_only()
|
|
.strip_comments(json.as_bytes())
|
|
.read_to_string(&mut stripped)
|
|
.unwrap_err();
|
|
assert_eq!(err.kind(), ErrorKind::InvalidData);
|
|
}
|
|
|
|
#[test]
|
|
fn strip_in_place() {
|
|
let mut json = String::from(r#"{/* Comment */"hi": /** abc */ "bye"}"#);
|
|
strip_comments_in_place(&mut json).unwrap();
|
|
assert_eq!(json, r#"{ "hi": "bye"}"#);
|
|
}
|
|
}
|