refactor(ast)!: store trivia comments in a sorted slice (#4045)

This gets rid of `TriviasMap`, introducing `SortedComments` in order to
store trivia comments in a sorted slice.

Closes: https://github.com/oxc-project/backlog/issues/38
This commit is contained in:
Luca Bruno 2024-07-03 19:57:36 +02:00 committed by GitHub
parent 09cc760475
commit e32b4bc57c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 117 additions and 16 deletions

View file

@ -42,7 +42,7 @@ pub use num_bigint::BigUint;
pub use crate::{
ast_builder::AstBuilder,
ast_kind::{AstKind, AstType},
trivia::{Comment, CommentKind, Trivias, TriviasMap},
trivia::{Comment, CommentKind, SortedComments, Trivias},
visit::{Visit, VisitMut},
};

View file

@ -1,8 +1,8 @@
//! Trivias such as comments and irregular whitespaces
use std::{
collections::btree_map::{BTreeMap, Range},
ops::{Deref, RangeBounds},
iter::FusedIterator,
ops::{Bound, Deref, RangeBounds},
sync::Arc,
};
@ -37,15 +37,16 @@ impl CommentKind {
}
}
pub type TriviasMap = BTreeMap<u32, Comment>;
/// Sorted set of unique trivia comments, in ascending order by starting position.
pub type SortedComments = Box<[(u32, Comment)]>;
#[derive(Debug, Clone, Default)]
pub struct Trivias(Arc<TriviasImpl>);
#[derive(Debug, Default)]
pub struct TriviasImpl {
/// Keyed by span.start
comments: TriviasMap,
/// Unique comments, ordered by increasing span-start.
comments: SortedComments,
irregular_whitespaces: Vec<Span>,
}
@ -59,7 +60,7 @@ impl Deref for Trivias {
}
impl Trivias {
pub fn new(comments: TriviasMap, irregular_whitespaces: Vec<Span>) -> Trivias {
pub fn new(comments: SortedComments, irregular_whitespaces: Vec<Span>) -> Trivias {
Self(Arc::new(TriviasImpl { comments, irregular_whitespaces }))
}
@ -67,18 +68,116 @@ impl Trivias {
self.comments.iter().map(|(start, comment)| (comment.kind, Span::new(*start, comment.end)))
}
pub fn comments_range<R>(&self, range: R) -> Range<'_, u32, Comment>
pub fn comments_range<R>(&self, range: R) -> CommentsRange<'_>
where
R: RangeBounds<u32>,
{
self.comments.range(range)
CommentsRange::new(&self.comments, range.start_bound().cloned(), range.end_bound().cloned())
}
pub fn has_comments_between(&self, span: Span) -> bool {
self.comments.range(span.start..span.end).count() > 0
self.comments_range(span.start..span.end).count() > 0
}
pub fn irregular_whitespaces(&self) -> &Vec<Span> {
&self.irregular_whitespaces
}
}
/// Double-ended iterator over a range of comments, by starting position.
pub struct CommentsRange<'a> {
comments: &'a [(u32, Comment)],
range: (Bound<u32>, Bound<u32>),
current_start: usize,
current_end: usize,
}
impl<'a> CommentsRange<'a> {
fn new(comments: &'a [(u32, Comment)], start: Bound<u32>, end: Bound<u32>) -> Self {
// Directly skip all comments that are already known to start
// outside the requested range.
let partition_start = {
let range_start = match start {
Bound::Unbounded => 0,
Bound::Included(x) => x,
Bound::Excluded(x) => x.saturating_add(1),
};
comments.partition_point(|(start, _)| *start < range_start)
};
let partition_end = {
let range_end = match end {
Bound::Unbounded => u32::MAX,
Bound::Included(x) => x,
Bound::Excluded(x) => x.saturating_sub(1),
};
comments.partition_point(|(start, _)| *start <= range_end)
};
Self {
comments,
range: (start, end),
current_start: partition_start,
current_end: partition_end,
}
}
}
impl<'c> Iterator for CommentsRange<'c> {
type Item = (&'c u32, &'c Comment);
fn next(&mut self) -> Option<Self::Item> {
if self.current_start < self.current_end {
for (start, comment) in &self.comments[self.current_start..self.current_end] {
self.current_start = self.current_start.saturating_add(1);
if self.range.contains(start) {
return Some((start, comment));
}
}
}
None
}
fn size_hint(&self) -> (usize, Option<usize>) {
let max_remaining = self.current_end.saturating_sub(self.current_start);
(0, Some(max_remaining))
}
}
impl<'c> DoubleEndedIterator for CommentsRange<'c> {
fn next_back(&mut self) -> Option<Self::Item> {
if self.current_start < self.current_end {
for (start, comment) in self.comments[self.current_start..self.current_end].iter().rev()
{
self.current_end = self.current_end.saturating_sub(1);
if self.range.contains(start) {
return Some((start, comment));
}
}
}
None
}
}
impl FusedIterator for CommentsRange<'_> {}
#[cfg(test)]
mod test {
use super::*;
#[test]
fn test_comments_range() {
let comments: SortedComments = vec![
(0, Comment { end: 4, kind: CommentKind::SingleLine }),
(5, Comment { end: 9, kind: CommentKind::SingleLine }),
(10, Comment { end: 13, kind: CommentKind::SingleLine }),
(14, Comment { end: 17, kind: CommentKind::SingleLine }),
(18, Comment { end: 23, kind: CommentKind::SingleLine }),
]
.into_boxed_slice();
let full_len = comments.len();
let trivias = Trivias::new(comments, vec![]);
assert_eq!(trivias.comments_range(..).count(), full_len);
assert_eq!(trivias.comments_range(1..).count(), full_len.saturating_sub(1));
assert_eq!(trivias.comments_range(..18).count(), full_len.saturating_sub(1));
assert_eq!(trivias.comments_range(..=18).count(), full_len);
}
}

View file

@ -1,16 +1,18 @@
use oxc_ast::{Comment, CommentKind, Trivias, TriviasMap};
use oxc_ast::{Comment, CommentKind, SortedComments, Trivias};
use oxc_span::Span;
#[derive(Debug, Default)]
pub struct TriviaBuilder {
// Duplicated comments can be added from rewind, use `BTreeMap` to ensure uniqueness
// NOTE(lucab): This is a set of unique comments. Duplicated
// comments could be generated in case of rewind; they are
// filtered out at insertion time.
comments: Vec<(u32, Comment)>,
irregular_whitespaces: Vec<Span>,
}
impl TriviaBuilder {
pub fn build(self) -> Trivias {
let comments = TriviasMap::from_iter(self.comments);
let comments = SortedComments::from_iter(self.comments);
Trivias::new(comments, self.irregular_whitespaces)
}
@ -26,9 +28,9 @@ impl TriviaBuilder {
fn add_comment(&mut self, start: u32, comment: Comment) {
// The comments array is an ordered vec, only add the comment if its not added before,
// to avoid situations where the parser needs to rewind and reinsert the comment.
if let Some(comment) = self.comments.last_mut() {
if start <= comment.0 {
// to avoid situations where the parser needs to rewind and tries to reinsert the comment.
if let Some((last_start, _)) = self.comments.last() {
if start <= *last_start {
return;
}
}