From 4d3e7efe25135e3e7bb83250cc17d645a1bac178 Mon Sep 17 00:00:00 2001 From: Yehuda Katz Date: Mon, 9 Sep 2019 10:43:10 -0700 Subject: [PATCH 1/3] Close a bunch of holes in external command args Previously, there was a single parsing rule for "bare words" that applied to both internal and external commands. This meant that, because `cargo +nightly` needed to work, we needed to add `+` as a valid character in bare words. The number of characters continued to grow, and the situation was becoming untenable. The current strategy would eventually eat up all syntax and make it impossible to add syntax like `@foo` to internal commands. This patch significantly restricts bare words and introduces a new token type (`ExternalWord`). An `ExternalWord` expands to an error in the internal syntax, but expands to a bare word in the external syntax. `ExternalWords` are highlighted in grey in the shell. --- src/errors.rs | 15 +++ src/evaluate/evaluator.rs | 7 +- src/parser/hir.rs | 8 ++ src/parser/hir/baseline_parse.rs | 46 ++++--- src/parser/hir/baseline_parse_tokens.rs | 16 +-- src/parser/parse/call_node.rs | 16 +++ src/parser/parse/parser.rs | 161 +++++++++++++++++++----- src/parser/parse/pipeline.rs | 36 ++++++ src/parser/parse/token_tree.rs | 21 ++-- src/parser/parse/token_tree_builder.rs | 32 ++++- src/parser/parse/tokens.rs | 6 +- src/parser/parse_command.rs | 3 +- src/shell/helper.rs | 6 +- 13 files changed, 300 insertions(+), 73 deletions(-) diff --git a/src/errors.rs b/src/errors.rs index 3eb8e33e..d97435d2 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -38,6 +38,7 @@ pub enum ArgumentError { MissingMandatoryFlag(String), MissingMandatoryPositional(String), MissingValueForName(String), + InvalidExternalWord, } #[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd, Serialize, Deserialize)] @@ -136,6 +137,16 @@ impl ShellError { .start() } + pub(crate) fn invalid_external_word(span: Span) -> ShellError { + ProximateShellError::ArgumentError { + command: "Invalid argument to Nu command (did you mean to call an external command?)" + .into(), + error: ArgumentError::InvalidExternalWord, + span, + } + .start() + } + pub(crate) fn parse_error( error: nom::Err<(nom5_locate::LocatedSpan<&str>, nom::error::ErrorKind)>, ) -> ShellError { @@ -190,6 +201,10 @@ impl ShellError { error, span, } => match error { + ArgumentError::InvalidExternalWord => Diagnostic::new( + Severity::Error, + format!("Invalid bare word for Nu command (did you intend to invoke an external command?)")) + .with_label(Label::new_primary(span)), ArgumentError::MissingMandatoryFlag(name) => Diagnostic::new( Severity::Error, format!( diff --git a/src/evaluate/evaluator.rs b/src/evaluate/evaluator.rs index ee241583..6419ab73 100644 --- a/src/evaluate/evaluator.rs +++ b/src/evaluate/evaluator.rs @@ -1,5 +1,5 @@ use crate::data::base::Block; -use crate::errors::Description; +use crate::errors::{ArgumentError, Description}; use crate::parser::{ hir::{self, Expression, RawExpression}, CommandRegistry, Text, @@ -39,6 +39,11 @@ pub(crate) fn evaluate_baseline_expr( ) -> Result, ShellError> { match &expr.item { RawExpression::Literal(literal) => Ok(evaluate_literal(expr.copy_span(literal), source)), + RawExpression::ExternalWord => Err(ShellError::argument_error( + "Invalid external word", + ArgumentError::InvalidExternalWord, + expr.span(), + )), RawExpression::FilePath(path) => Ok(Value::path(path.clone()).tagged(expr.span())), RawExpression::Synthetic(hir::Synthetic::String(s)) => Ok(Value::string(s).tagged_unknown()), RawExpression::Variable(var) => evaluate_reference(var, scope, source), diff --git a/src/parser/hir.rs b/src/parser/hir.rs index 3e155cc0..aaf5bb77 100644 --- a/src/parser/hir.rs +++ b/src/parser/hir.rs @@ -83,6 +83,7 @@ impl ToDebug for Call { #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] pub enum RawExpression { Literal(Literal), + ExternalWord, Synthetic(Synthetic), Variable(Variable), Binary(Box), @@ -113,6 +114,7 @@ impl RawExpression { match self { RawExpression::Literal(literal) => literal.type_name(), RawExpression::Synthetic(synthetic) => synthetic.type_name(), + RawExpression::ExternalWord => "externalword", RawExpression::FilePath(..) => "filepath", RawExpression::Variable(..) => "variable", RawExpression::List(..) => "list", @@ -189,6 +191,7 @@ impl ToDebug for Expression { match self.item() { RawExpression::Literal(l) => l.tagged(self.span()).fmt_debug(f, source), RawExpression::FilePath(p) => write!(f, "{}", p.display()), + RawExpression::ExternalWord => write!(f, "{}", self.span().slice(source)), RawExpression::Synthetic(Synthetic::String(s)) => write!(f, "{:?}", s), RawExpression::Variable(Variable::It(_)) => write!(f, "$it"), RawExpression::Variable(Variable::Other(s)) => write!(f, "${}", s.slice(source)), @@ -225,6 +228,11 @@ impl From> for Expression { } } +/// Literals are expressions that are: +/// +/// 1. Copy +/// 2. Can be evaluated without additional context +/// 3. Evaluation cannot produce an error #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Serialize, Deserialize)] pub enum Literal { Number(Number), diff --git a/src/parser/hir/baseline_parse.rs b/src/parser/hir/baseline_parse.rs index d76a88d5..4437a6d3 100644 --- a/src/parser/hir/baseline_parse.rs +++ b/src/parser/hir/baseline_parse.rs @@ -1,10 +1,14 @@ use crate::context::Context; +use crate::errors::ShellError; use crate::parser::{hir, RawToken, Token}; use crate::Text; use std::path::PathBuf; -pub fn baseline_parse_single_token(token: &Token, source: &Text) -> hir::Expression { - match *token.item() { +pub fn baseline_parse_single_token( + token: &Token, + source: &Text, +) -> Result { + Ok(match *token.item() { RawToken::Number(number) => hir::Expression::number(number.to_number(source), token.span()), RawToken::Size(int, unit) => { hir::Expression::size(int.to_number(source), unit, token.span()) @@ -14,17 +18,22 @@ pub fn baseline_parse_single_token(token: &Token, source: &Text) -> hir::Express hir::Expression::it_variable(span, token.span()) } RawToken::Variable(span) => hir::Expression::variable(span, token.span()), - RawToken::External(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalCommand(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), RawToken::Bare => hir::Expression::bare(token.span()), - } + }) } -pub fn baseline_parse_token_as_number(token: &Token, source: &Text) -> hir::Expression { - match *token.item() { +pub fn baseline_parse_token_as_number( + token: &Token, + source: &Text, +) -> Result { + Ok(match *token.item() { RawToken::Variable(span) if span.slice(source) == "it" => { hir::Expression::it_variable(span, token.span()) } - RawToken::External(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalCommand(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), RawToken::Variable(span) => hir::Expression::variable(span, token.span()), RawToken::Number(number) => hir::Expression::number(number.to_number(source), token.span()), RawToken::Size(number, unit) => { @@ -32,33 +41,38 @@ pub fn baseline_parse_token_as_number(token: &Token, source: &Text) -> hir::Expr } RawToken::Bare => hir::Expression::bare(token.span()), RawToken::String(span) => hir::Expression::string(span, token.span()), - } + }) } -pub fn baseline_parse_token_as_string(token: &Token, source: &Text) -> hir::Expression { - match *token.item() { +pub fn baseline_parse_token_as_string( + token: &Token, + source: &Text, +) -> Result { + Ok(match *token.item() { RawToken::Variable(span) if span.slice(source) == "it" => { hir::Expression::it_variable(span, token.span()) } - RawToken::External(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalCommand(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), RawToken::Variable(span) => hir::Expression::variable(span, token.span()), RawToken::Number(_) => hir::Expression::bare(token.span()), RawToken::Size(_, _) => hir::Expression::bare(token.span()), RawToken::Bare => hir::Expression::bare(token.span()), RawToken::String(span) => hir::Expression::string(span, token.span()), - } + }) } pub fn baseline_parse_token_as_path( token: &Token, context: &Context, source: &Text, -) -> hir::Expression { - match *token.item() { +) -> Result { + Ok(match *token.item() { RawToken::Variable(span) if span.slice(source) == "it" => { hir::Expression::it_variable(span, token.span()) } - RawToken::External(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalCommand(span) => hir::Expression::external_command(span, token.span()), + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), RawToken::Variable(span) => hir::Expression::variable(span, token.span()), RawToken::Number(_) => hir::Expression::bare(token.span()), RawToken::Size(_, _) => hir::Expression::bare(token.span()), @@ -69,7 +83,7 @@ pub fn baseline_parse_token_as_path( RawToken::String(span) => { hir::Expression::file_path(expand_path(span.slice(source), context), token.span()) } - } + }) } pub fn expand_path(string: &str, context: &Context) -> PathBuf { diff --git a/src/parser/hir/baseline_parse_tokens.rs b/src/parser/hir/baseline_parse_tokens.rs index ca9b0bb3..13c7630f 100644 --- a/src/parser/hir/baseline_parse_tokens.rs +++ b/src/parser/hir/baseline_parse_tokens.rs @@ -33,7 +33,6 @@ pub fn baseline_parse_tokens( Ok(exprs) } - #[derive(Debug, Copy, Clone, Serialize, Deserialize)] pub enum SyntaxType { Any, @@ -62,7 +61,7 @@ impl std::fmt::Display for SyntaxType { SyntaxType::Path => write!(f, "Path"), SyntaxType::Binary => write!(f, "Binary"), SyntaxType::Block => write!(f, "Block"), - SyntaxType::Boolean => write!(f, "Boolean") + SyntaxType::Boolean => write!(f, "Boolean"), } } } @@ -81,7 +80,7 @@ pub fn baseline_parse_next_expr( match (syntax_type, next) { (SyntaxType::Path, TokenNode::Token(token)) => { - return Ok(baseline_parse_token_as_path(token, context, source)) + return baseline_parse_token_as_path(token, context, source) } (SyntaxType::Path, token) => { @@ -92,7 +91,7 @@ pub fn baseline_parse_next_expr( } (SyntaxType::String, TokenNode::Token(token)) => { - return Ok(baseline_parse_token_as_string(token, source)); + return baseline_parse_token_as_string(token, source); } (SyntaxType::String, token) => { @@ -103,7 +102,7 @@ pub fn baseline_parse_next_expr( } (SyntaxType::Number, TokenNode::Token(token)) => { - return Ok(baseline_parse_token_as_number(token, source)); + return Ok(baseline_parse_token_as_number(token, source)?); } (SyntaxType::Number, token) => { @@ -115,7 +114,7 @@ pub fn baseline_parse_next_expr( // TODO: More legit member processing (SyntaxType::Member, TokenNode::Token(token)) => { - return Ok(baseline_parse_token_as_string(token, source)); + return baseline_parse_token_as_string(token, source); } (SyntaxType::Member, token) => { @@ -245,7 +244,7 @@ pub fn baseline_parse_semantic_token( source: &Text, ) -> Result { match token { - TokenNode::Token(token) => Ok(baseline_parse_single_token(token, source)), + TokenNode::Token(token) => baseline_parse_single_token(token, source), TokenNode::Call(_call) => unimplemented!(), TokenNode::Delimited(delimited) => baseline_parse_delimited(delimited, context, source), TokenNode::Pipeline(_pipeline) => unimplemented!(), @@ -315,7 +314,8 @@ pub fn baseline_parse_path( RawToken::Number(_) | RawToken::Size(..) | RawToken::Variable(_) - | RawToken::External(_) => { + | RawToken::ExternalCommand(_) + | RawToken::ExternalWord => { return Err(ShellError::type_error( "String", token.type_name().simple_spanned(part), diff --git a/src/parser/parse/call_node.rs b/src/parser/parse/call_node.rs index 2869abb4..eb715cd3 100644 --- a/src/parser/parse/call_node.rs +++ b/src/parser/parse/call_node.rs @@ -1,5 +1,7 @@ use crate::parser::TokenNode; +use crate::traits::ToDebug; use getset::Getters; +use std::fmt; #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Getters)] pub struct CallNode { @@ -24,3 +26,17 @@ impl CallNode { } } } + +impl ToDebug for CallNode { + fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { + write!(f, "{}", self.head.debug(source))?; + + if let Some(children) = &self.children { + for child in children { + write!(f, "{}", child.debug(source))? + } + } + + Ok(()) + } +} diff --git a/src/parser/parse/parser.rs b/src/parser/parse/parser.rs index f230c36c..a691fb24 100644 --- a/src/parser/parse/parser.rs +++ b/src/parser/parse/parser.rs @@ -236,12 +236,34 @@ pub fn bare(input: NomSpan) -> IResult { let start = input.offset; let (input, _) = take_while1(is_start_bare_char)(input)?; let (input, _) = take_while(is_bare_char)(input)?; + + let next_char = &input.fragment.chars().nth(0); + + if let Some(next_char) = next_char { + if is_external_word_char(*next_char) { + return Err(nom::Err::Error(nom::error::make_error( + input, + nom::error::ErrorKind::TakeWhile1, + ))); + } + } + let end = input.offset; Ok((input, TokenTreeBuilder::spanned_bare((start, end)))) }) } +pub fn external_word(input: NomSpan) -> IResult { + trace_step(input, "bare", move |input| { + let start = input.offset; + let (input, _) = take_while1(is_external_word_char)(input)?; + let end = input.offset; + + Ok((input, TokenTreeBuilder::spanned_external_word((start, end)))) + }) +} + pub fn var(input: NomSpan) -> IResult { trace_step(input, "var", move |input| { let start = input.offset; @@ -364,8 +386,17 @@ pub fn size(input: NomSpan) -> IResult { pub fn leaf(input: NomSpan) -> IResult { trace_step(input, "leaf", move |input| { - let (input, node) = - alt((size, string, operator, flag, shorthand, var, external, bare))(input)?; + let (input, node) = alt(( + size, + string, + operator, + flag, + shorthand, + var, + external, + bare, + external_word, + ))(input)?; Ok((input, node)) }) @@ -582,26 +613,13 @@ pub fn pipeline(input: NomSpan) -> IResult { } fn make_call_list( - head: Option<( - Option, - Tagged, - Option - )>, - items: Vec<( - NomSpan, - Option, - Tagged, - Option, - )>, + head: Option<(Option, Tagged, Option)>, + items: Vec<(NomSpan, Option, Tagged, Option)>, ) -> Vec { let mut out = vec![]; if let Some(head) = head { - let el = PipelineElement::new( - None, - head.0.map(Span::from), - head.1, - head.2.map(Span::from)); + let el = PipelineElement::new(None, head.0.map(Span::from), head.1, head.2.map(Span::from)); out.push(el); } @@ -611,7 +629,8 @@ fn make_call_list( Some(pipe).map(Span::from), ws1.map(Span::from), call, - ws2.map(Span::from)); + ws2.map(Span::from), + ); out.push(el); } @@ -628,40 +647,39 @@ fn int(frag: &str, neg: Option) -> i64 { } } +fn is_external_word_char(c: char) -> bool { + match c { + ';' | '|' | '#' | '-' | '"' | '\'' | '$' | '(' | ')' | '[' | ']' | '{' | '}' | '`' => false, + other if other.is_whitespace() => false, + _ => true, + } +} + fn is_start_bare_char(c: char) -> bool { match c { + '+' => false, _ if c.is_alphabetic() => true, - _ if c.is_numeric() => true, '.' => true, '\\' => true, '/' => true, '_' => true, '-' => true, - '@' => true, - '*' => true, - '?' => true, '~' => true, - '+' => true, _ => false, } } fn is_bare_char(c: char) -> bool { match c { + '+' => false, _ if c.is_alphanumeric() => true, - ':' => true, '.' => true, '\\' => true, '/' => true, '_' => true, '-' => true, - '@' => true, - '*' => true, - '?' => true, '=' => true, '~' => true, - '+' => true, - '%' => true, _ => false, } } @@ -724,6 +742,44 @@ mod tests { } } + macro_rules! equal_tokens { + ($source:tt -> $tokens:expr) => { + let result = apply(pipeline, "pipeline", $source); + let (expected_tree, expected_source) = TokenTreeBuilder::build($tokens); + + if result != expected_tree { + let debug_result = format!("{}", result.debug($source)); + let debug_expected = format!("{}", expected_tree.debug(&expected_source)); + + if debug_result == debug_expected { + assert_eq!( + result, expected_tree, + "NOTE: actual and expected had equivalent debug serializations, source={:?}, debug_expected={:?}", + $source, + debug_expected + ) + } else { + assert_eq!(debug_result, debug_expected) + } + } + + // apply(pipeline, "pipeline", r#"cargo +nightly run"#), + // build_token(b::pipeline(vec![( + // None, + // b::call( + // b::bare("cargo"), + // vec![ + // b::sp(), + // b::external_word("+nightly"), + // b::sp(), + // b::bare("run") + // ] + // ), + // None + // )])) + }; + } + #[test] fn test_integer() { assert_leaf! { @@ -854,7 +910,7 @@ mod tests { fn test_external() { assert_leaf! { parsers [ external ] - "^ls" -> 0..3 { External(span(1, 3)) } + "^ls" -> 0..3 { ExternalCommand(span(1, 3)) } } } @@ -1058,6 +1114,46 @@ mod tests { ); } + #[test] + fn test_external_word() { + let _ = pretty_env_logger::try_init(); + + equal_tokens!( + "cargo +nightly run" -> + b::pipeline(vec![( + None, + b::call( + b::bare("cargo"), + vec![ + b::sp(), + b::external_word("+nightly"), + b::sp(), + b::bare("run") + ] + ), + None + )]) + ); + + equal_tokens!( + "rm foo%bar" -> + b::pipeline(vec![( + None, + b::call(b::bare("rm"), vec![b::sp(), b::external_word("foo%bar"),]), + None + )]) + ); + + equal_tokens!( + "rm foo%bar" -> + b::pipeline(vec![( + None, + b::call(b::bare("rm"), vec![b::sp(), b::external_word("foo%bar"),]), + None + )]) + ); + } + #[test] fn test_smoke_pipeline() { let _ = pretty_env_logger::try_init(); @@ -1178,7 +1274,6 @@ mod tests { } fn build_token(block: CurriedToken) -> TokenNode { - let mut builder = TokenTreeBuilder::new(); - block(&mut builder) + TokenTreeBuilder::build(block).0 } } diff --git a/src/parser/parse/pipeline.rs b/src/parser/parse/pipeline.rs index 75155d14..64a899c1 100644 --- a/src/parser/parse/pipeline.rs +++ b/src/parser/parse/pipeline.rs @@ -1,7 +1,9 @@ use crate::parser::CallNode; +use crate::traits::ToDebug; use crate::{Span, Tagged}; use derive_new::new; use getset::Getters; +use std::fmt; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, new)] pub struct Pipeline { @@ -9,6 +11,20 @@ pub struct Pipeline { pub(crate) post_ws: Option, } +impl ToDebug for Pipeline { + fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { + for part in &self.parts { + write!(f, "{}", part.debug(source))?; + } + + if let Some(post_ws) = self.post_ws { + write!(f, "{}", post_ws.slice(source))? + } + + Ok(()) + } +} + #[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd, Getters, new)] pub struct PipelineElement { pub pipe: Option, @@ -17,3 +33,23 @@ pub struct PipelineElement { call: Tagged, pub post_ws: Option, } + +impl ToDebug for PipelineElement { + fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { + if let Some(pipe) = self.pipe { + write!(f, "{}", pipe.slice(source))?; + } + + if let Some(pre_ws) = self.pre_ws { + write!(f, "{}", pre_ws.slice(source))?; + } + + write!(f, "{}", self.call.debug(source))?; + + if let Some(post_ws) = self.post_ws { + write!(f, "{}", post_ws.slice(source))?; + } + + Ok(()) + } +} diff --git a/src/parser/parse/token_tree.rs b/src/parser/parse/token_tree.rs index df189a1a..f69c176e 100644 --- a/src/parser/parse/token_tree.rs +++ b/src/parser/parse/token_tree.rs @@ -1,5 +1,6 @@ use crate::errors::ShellError; use crate::parser::parse::{call_node::*, flag::*, operator::*, pipeline::*, tokens::*}; +use crate::traits::ToDebug; use crate::{Span, Tagged, Text}; use derive_new::new; use enum_utils::FromStr; @@ -22,6 +23,12 @@ pub enum TokenNode { Path(Tagged), } +impl ToDebug for TokenNode { + fn fmt_debug(&self, f: &mut fmt::Formatter, source: &str) -> fmt::Result { + write!(f, "{:?}", self.old_debug(&Text::from(source))) + } +} + pub struct DebugTokenNode<'a> { node: &'a TokenNode, source: &'a Text, @@ -34,11 +41,11 @@ impl fmt::Debug for DebugTokenNode<'_> { TokenNode::Call(s) => { write!(f, "(")?; - write!(f, "{:?}", s.head().debug(self.source))?; + write!(f, "{}", s.head().debug(self.source))?; if let Some(children) = s.children() { for child in children { - write!(f, "{:?}", child.debug(self.source))?; + write!(f, "{}", child.debug(self.source))?; } } @@ -57,7 +64,7 @@ impl fmt::Debug for DebugTokenNode<'_> { )?; for child in d.children() { - write!(f, "{:?}", child.debug(self.source))?; + write!(f, "{:?}", child.old_debug(self.source))?; } write!( @@ -70,7 +77,7 @@ impl fmt::Debug for DebugTokenNode<'_> { } ) } - TokenNode::Pipeline(_) => write!(f, ""), + TokenNode::Pipeline(pipeline) => write!(f, "{}", pipeline.debug(self.source)), TokenNode::Error(s) => write!(f, " for {:?}", s.span().slice(self.source)), rest => write!(f, "{}", rest.span().slice(self.source)), } @@ -115,7 +122,7 @@ impl TokenNode { .to_string() } - pub fn debug<'a>(&'a self, source: &'a Text) -> DebugTokenNode<'a> { + pub fn old_debug<'a>(&'a self, source: &'a Text) -> DebugTokenNode<'a> { DebugTokenNode { node: self, source } } @@ -140,7 +147,7 @@ impl TokenNode { pub fn is_external(&self) -> bool { match self { TokenNode::Token(Tagged { - item: RawToken::External(..), + item: RawToken::ExternalCommand(..), .. }) => true, _ => false, @@ -150,7 +157,7 @@ impl TokenNode { pub fn expect_external(&self) -> Span { match self { TokenNode::Token(Tagged { - item: RawToken::External(span), + item: RawToken::ExternalCommand(span), .. }) => *span, _ => panic!("Only call expect_external if you checked is_external first"), diff --git a/src/parser/parse/token_tree_builder.rs b/src/parser/parse/token_tree_builder.rs index 9dd1ebc1..8034e8b0 100644 --- a/src/parser/parse/token_tree_builder.rs +++ b/src/parser/parse/token_tree_builder.rs @@ -14,15 +14,19 @@ use derive_new::new; pub struct TokenTreeBuilder { #[new(default)] pos: usize, + + #[new(default)] + output: String, } pub type CurriedToken = Box TokenNode + 'static>; pub type CurriedCall = Box Tagged + 'static>; impl TokenTreeBuilder { - pub fn build(block: impl FnOnce(&mut Self) -> TokenNode) -> TokenNode { + pub fn build(block: impl FnOnce(&mut Self) -> TokenNode) -> (TokenNode, String) { let mut builder = TokenTreeBuilder::new(); - block(&mut builder) + let node = block(&mut builder); + (node, builder.output) } pub fn pipeline(input: Vec<(Option<&str>, CurriedCall, Option<&str>)>) -> CurriedToken { @@ -56,7 +60,8 @@ impl TokenTreeBuilder { pipe, pre_span.map(Span::from), call, - post_span.map(Span::from))); + post_span.map(Span::from), + )); loop { match input.next() { @@ -147,9 +152,27 @@ impl TokenTreeBuilder { )) } + pub fn external_word(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + let (start, end) = b.consume(&input); + b.pos = end; + + TokenTreeBuilder::spanned_external_word((start, end)) + }) + } + + pub fn spanned_external_word(input: impl Into) -> TokenNode { + TokenNode::Token(Tagged::from_simple_spanned_item( + RawToken::ExternalWord, + input.into(), + )) + } + pub fn spanned_external(input: impl Into, span: impl Into) -> TokenNode { TokenNode::Token(Tagged::from_simple_spanned_item( - RawToken::External(input.into()), + RawToken::ExternalCommand(input.into()), span.into(), )) } @@ -422,6 +445,7 @@ impl TokenTreeBuilder { fn consume(&mut self, input: &str) -> (usize, usize) { let start = self.pos; self.pos += input.len(); + self.output.push_str(input); (start, self.pos) } } diff --git a/src/parser/parse/tokens.rs b/src/parser/parse/tokens.rs index ed9c1f72..0bb2e3f1 100644 --- a/src/parser/parse/tokens.rs +++ b/src/parser/parse/tokens.rs @@ -10,7 +10,8 @@ pub enum RawToken { Size(RawNumber, Unit), String(Span), Variable(Span), - External(Span), + ExternalCommand(Span), + ExternalWord, Bare, } @@ -50,7 +51,8 @@ impl RawToken { RawToken::Size(..) => "Size", RawToken::String(_) => "String", RawToken::Variable(_) => "Variable", - RawToken::External(_) => "External", + RawToken::ExternalCommand(_) => "ExternalCommand", + RawToken::ExternalWord => "ExternalWord", RawToken::Bare => "String", } } diff --git a/src/parser/parse_command.rs b/src/parser/parse_command.rs index 33ad25e6..e0fc9d86 100644 --- a/src/parser/parse_command.rs +++ b/src/parser/parse_command.rs @@ -6,6 +6,7 @@ use crate::parser::{ hir::{self, NamedArguments}, Flag, RawToken, TokenNode, }; +use crate::traits::ToDebug; use crate::{Span, Tag, Tagged, Text}; use log::trace; @@ -248,7 +249,7 @@ pub fn trace_remaining(desc: &'static str, tail: hir::TokensIterator<'_>, source itertools::join( tail.debug_remaining() .iter() - .map(|i| format!("%{:?}%", i.debug(source))), + .map(|i| format!("%{}%", i.debug(&source))), " " ) ); diff --git a/src/shell/helper.rs b/src/shell/helper.rs index 8e21c50e..462f3752 100644 --- a/src/shell/helper.rs +++ b/src/shell/helper.rs @@ -136,9 +136,13 @@ fn paint_token_node(token_node: &TokenNode, line: &str) -> String { .. }) => Color::Green.normal().paint(token_node.span().slice(line)), TokenNode::Token(Tagged { - item: RawToken::External(..), + item: RawToken::ExternalCommand(..), .. }) => Color::Cyan.bold().paint(token_node.span().slice(line)), + TokenNode::Token(Tagged { + item: RawToken::ExternalWord, + .. + }) => Color::Black.bold().paint(token_node.span().slice(line)), }; styled.to_string() From b15bb2c667c122b161a01131c2143e96b39430e3 Mon Sep 17 00:00:00 2001 From: Yehuda Katz Date: Tue, 10 Sep 2019 08:31:21 -0700 Subject: [PATCH 2/3] Added glob patterns to the syntax shapes Bare words now represent literal file names, and globs are a different syntax shape called "Pattern". This allows commands like `cp` to ask for a pattern as a source and a literal file as a target. This also means that attempting to pass a glob to a command that expects a literal path will produce an error. --- src/commands/cp.rs | 2 +- src/commands/to_bson.rs | 1 + src/commands/to_json.rs | 1 + src/commands/to_sqlite.rs | 1 + src/commands/to_toml.rs | 1 + src/commands/to_yaml.rs | 1 + src/data/base.rs | 8 ++++ src/evaluate/evaluator.rs | 1 + src/parser/hir.rs | 10 ++++- src/parser/hir/baseline_parse.rs | 49 +++++++++++++++++++++++++ src/parser/hir/baseline_parse_tokens.rs | 16 +++++++- src/parser/parse/parser.rs | 35 +++++++++++++++++- src/parser/parse/token_tree_builder.rs | 18 +++++++++ src/parser/parse/tokens.rs | 2 + src/shell/helper.rs | 4 ++ 15 files changed, 146 insertions(+), 4 deletions(-) diff --git a/src/commands/cp.rs b/src/commands/cp.rs index 8160fc9d..491e18b1 100644 --- a/src/commands/cp.rs +++ b/src/commands/cp.rs @@ -21,7 +21,7 @@ impl PerItemCommand for Cpy { fn signature(&self) -> Signature { Signature::build("cp") - .required("src", SyntaxType::Path) + .required("src", SyntaxType::Pattern) .required("dst", SyntaxType::Path) .named("file", SyntaxType::Any) .switch("recursive") diff --git a/src/commands/to_bson.rs b/src/commands/to_bson.rs index bb0355a5..a77bebea 100644 --- a/src/commands/to_bson.rs +++ b/src/commands/to_bson.rs @@ -50,6 +50,7 @@ pub fn value_to_bson_value(v: &Tagged) -> Result { } Value::Primitive(Primitive::Nothing) => Bson::Null, Value::Primitive(Primitive::String(s)) => Bson::String(s.clone()), + Value::Primitive(Primitive::Pattern(p)) => Bson::String(p.clone()), Value::Primitive(Primitive::Path(s)) => Bson::String(s.display().to_string()), Value::Table(l) => Bson::Array( l.iter() diff --git a/src/commands/to_json.rs b/src/commands/to_json.rs index f53fbd8d..35c03af3 100644 --- a/src/commands/to_json.rs +++ b/src/commands/to_json.rs @@ -45,6 +45,7 @@ pub fn value_to_json_value(v: &Tagged) -> Result::coerce_into(i.tagged(v.tag), "converting to JSON number")?, )), Value::Primitive(Primitive::Nothing) => serde_json::Value::Null, + Value::Primitive(Primitive::Pattern(s)) => serde_json::Value::String(s.clone()), Value::Primitive(Primitive::String(s)) => serde_json::Value::String(s.clone()), Value::Primitive(Primitive::Path(s)) => serde_json::Value::String(s.display().to_string()), diff --git a/src/commands/to_sqlite.rs b/src/commands/to_sqlite.rs index 7580c3f4..0fd392f3 100644 --- a/src/commands/to_sqlite.rs +++ b/src/commands/to_sqlite.rs @@ -91,6 +91,7 @@ fn nu_value_to_sqlite_string(v: Value) -> String { Primitive::Int(i) => format!("{}", i), Primitive::Decimal(f) => format!("{}", f), Primitive::Bytes(u) => format!("{}", u), + Primitive::Pattern(s) => format!("'{}'", s.replace("'", "''")), Primitive::String(s) => format!("'{}'", s.replace("'", "''")), Primitive::Boolean(true) => "1".into(), Primitive::Boolean(_) => "0".into(), diff --git a/src/commands/to_toml.rs b/src/commands/to_toml.rs index 7bca9840..e18e1523 100644 --- a/src/commands/to_toml.rs +++ b/src/commands/to_toml.rs @@ -44,6 +44,7 @@ pub fn value_to_toml_value(v: &Tagged) -> Result toml::Value::Integer(i.tagged(v.tag).coerce_into("converting to TOML integer")?) } Value::Primitive(Primitive::Nothing) => toml::Value::String("".to_string()), + Value::Primitive(Primitive::Pattern(s)) => toml::Value::String(s.clone()), Value::Primitive(Primitive::String(s)) => toml::Value::String(s.clone()), Value::Primitive(Primitive::Path(s)) => toml::Value::String(s.display().to_string()), diff --git a/src/commands/to_yaml.rs b/src/commands/to_yaml.rs index 129deebd..91582725 100644 --- a/src/commands/to_yaml.rs +++ b/src/commands/to_yaml.rs @@ -42,6 +42,7 @@ pub fn value_to_yaml_value(v: &Tagged) -> Result::coerce_into(i.tagged(v.tag), "converting to YAML number")?, )), Value::Primitive(Primitive::Nothing) => serde_yaml::Value::Null, + Value::Primitive(Primitive::Pattern(s)) => serde_yaml::Value::String(s.clone()), Value::Primitive(Primitive::String(s)) => serde_yaml::Value::String(s.clone()), Value::Primitive(Primitive::Path(s)) => serde_yaml::Value::String(s.display().to_string()), diff --git a/src/data/base.rs b/src/data/base.rs index b48d6921..6707d640 100644 --- a/src/data/base.rs +++ b/src/data/base.rs @@ -20,6 +20,7 @@ pub enum Primitive { Decimal(BigDecimal), Bytes(u64), String(String), + Pattern(String), Boolean(bool), Date(DateTime), Path(PathBuf), @@ -53,6 +54,7 @@ impl Primitive { Int(_) => "int", Decimal(_) => "decimal", Bytes(_) => "bytes", + Pattern(_) => "pattern", String(_) => "string", Boolean(_) => "boolean", Date(_) => "date", @@ -71,6 +73,7 @@ impl Primitive { Path(path) => write!(f, "{}", path.display()), Decimal(decimal) => write!(f, "{}", decimal), Bytes(bytes) => write!(f, "{}", bytes), + Pattern(string) => write!(f, "{:?}", string), String(string) => write!(f, "{:?}", string), Boolean(boolean) => write!(f, "{}", boolean), Date(date) => write!(f, "{}", date), @@ -108,6 +111,7 @@ impl Primitive { } Primitive::Int(i) => format!("{}", i), Primitive::Decimal(decimal) => format!("{}", decimal), + Primitive::Pattern(s) => format!("{}", s), Primitive::String(s) => format!("{}", s), Primitive::Boolean(b) => match (b, field_name) { (true, None) => format!("Yes"), @@ -577,6 +581,10 @@ impl Value { Value::Primitive(Primitive::String(s.into())) } + pub fn pattern(s: impl Into) -> Value { + Value::Primitive(Primitive::String(s.into())) + } + pub fn path(s: impl Into) -> Value { Value::Primitive(Primitive::Path(s.into())) } diff --git a/src/evaluate/evaluator.rs b/src/evaluate/evaluator.rs index 6419ab73..52edf698 100644 --- a/src/evaluate/evaluator.rs +++ b/src/evaluate/evaluator.rs @@ -114,6 +114,7 @@ fn evaluate_literal(literal: Tagged<&hir::Literal>, source: &Text) -> Tagged int.into(), hir::Literal::Size(int, unit) => unit.compute(int), hir::Literal::String(span) => Value::string(span.slice(source)), + hir::Literal::GlobPattern => Value::pattern(literal.span().slice(source)), hir::Literal::Bare => Value::string(literal.span().slice(source)), }; diff --git a/src/parser/hir.rs b/src/parser/hir.rs index aaf5bb77..90bb3879 100644 --- a/src/parser/hir.rs +++ b/src/parser/hir.rs @@ -17,7 +17,7 @@ use crate::evaluate::Scope; pub(crate) use self::baseline_parse::{ baseline_parse_single_token, baseline_parse_token_as_number, baseline_parse_token_as_path, - baseline_parse_token_as_string, + baseline_parse_token_as_pattern, baseline_parse_token_as_string, }; pub(crate) use self::baseline_parse_tokens::{baseline_parse_next_expr, TokensIterator}; pub(crate) use self::binary::Binary; @@ -90,6 +90,7 @@ pub enum RawExpression { Block(Vec), List(Vec), Path(Box), + FilePath(PathBuf), ExternalCommand(ExternalCommand), @@ -164,6 +165,10 @@ impl Expression { Tagged::from_simple_spanned_item(RawExpression::Literal(Literal::Bare), span.into()) } + pub(crate) fn pattern(tag: impl Into) -> Expression { + RawExpression::Literal(Literal::GlobPattern).tagged(tag.into()) + } + pub(crate) fn variable(inner: impl Into, outer: impl Into) -> Expression { Tagged::from_simple_spanned_item( RawExpression::Variable(Variable::Other(inner.into())), @@ -238,6 +243,7 @@ pub enum Literal { Number(Number), Size(Number, Unit), String(Span), + GlobPattern, Bare, } @@ -247,6 +253,7 @@ impl ToDebug for Tagged<&Literal> { Literal::Number(number) => write!(f, "{:?}", *number), Literal::Size(number, unit) => write!(f, "{:?}{:?}", *number, unit), Literal::String(span) => write!(f, "{}", span.slice(source)), + Literal::GlobPattern => write!(f, "{}", self.span().slice(source)), Literal::Bare => write!(f, "{}", self.span().slice(source)), } } @@ -259,6 +266,7 @@ impl Literal { Literal::Size(..) => "size", Literal::String(..) => "string", Literal::Bare => "string", + Literal::GlobPattern => "pattern", } } } diff --git a/src/parser/hir/baseline_parse.rs b/src/parser/hir/baseline_parse.rs index 4437a6d3..5248bde5 100644 --- a/src/parser/hir/baseline_parse.rs +++ b/src/parser/hir/baseline_parse.rs @@ -1,6 +1,7 @@ use crate::context::Context; use crate::errors::ShellError; use crate::parser::{hir, RawToken, Token}; +use crate::TaggedItem; use crate::Text; use std::path::PathBuf; @@ -20,6 +21,7 @@ pub fn baseline_parse_single_token( RawToken::Variable(span) => hir::Expression::variable(span, token.span()), RawToken::ExternalCommand(span) => hir::Expression::external_command(span, token.span()), RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), + RawToken::GlobPattern => hir::Expression::pattern(token.span()), RawToken::Bare => hir::Expression::bare(token.span()), }) } @@ -40,6 +42,12 @@ pub fn baseline_parse_token_as_number( hir::Expression::size(number.to_number(source), unit, token.span()) } RawToken::Bare => hir::Expression::bare(token.span()), + RawToken::GlobPattern => { + return Err(ShellError::type_error( + "Number", + "glob pattern".to_string().tagged(token.tag()), + )) + } RawToken::String(span) => hir::Expression::string(span, token.span()), }) } @@ -58,6 +66,12 @@ pub fn baseline_parse_token_as_string( RawToken::Number(_) => hir::Expression::bare(token.span()), RawToken::Size(_, _) => hir::Expression::bare(token.span()), RawToken::Bare => hir::Expression::bare(token.span()), + RawToken::GlobPattern => { + return Err(ShellError::type_error( + "String", + "glob pattern".tagged(token.tag()), + )) + } RawToken::String(span) => hir::Expression::string(span, token.span()), }) } @@ -80,6 +94,41 @@ pub fn baseline_parse_token_as_path( expand_path(token.span().slice(source), context), token.span(), ), + RawToken::GlobPattern => { + return Err(ShellError::type_error( + "Path", + "glob pattern".tagged(token.tag()), + )) + } + RawToken::String(span) => { + hir::Expression::file_path(expand_path(span.slice(source), context), token.span()) + } + }) +} + +pub fn baseline_parse_token_as_pattern( + token: &Token, + context: &Context, + source: &Text, +) -> Result { + Ok(match *token.item() { + RawToken::Variable(span) if span.slice(source) == "it" => { + hir::Expression::it_variable(span, token.span()) + } + RawToken::ExternalCommand(_) => { + return Err(ShellError::syntax_error( + "Invalid external command".to_string().tagged(token.tag()), + )) + } + RawToken::ExternalWord => return Err(ShellError::invalid_external_word(token.span())), + RawToken::Variable(span) => hir::Expression::variable(span, token.span()), + RawToken::Number(_) => hir::Expression::bare(token.span()), + RawToken::Size(_, _) => hir::Expression::bare(token.span()), + RawToken::GlobPattern => hir::Expression::pattern(token.span()), + RawToken::Bare => hir::Expression::file_path( + expand_path(token.span().slice(source), context), + token.span(), + ), RawToken::String(span) => { hir::Expression::file_path(expand_path(span.slice(source), context), token.span()) } diff --git a/src/parser/hir/baseline_parse_tokens.rs b/src/parser/hir/baseline_parse_tokens.rs index 13c7630f..ac2c703d 100644 --- a/src/parser/hir/baseline_parse_tokens.rs +++ b/src/parser/hir/baseline_parse_tokens.rs @@ -4,7 +4,7 @@ use crate::parser::{ hir, hir::{ baseline_parse_single_token, baseline_parse_token_as_number, baseline_parse_token_as_path, - baseline_parse_token_as_string, + baseline_parse_token_as_pattern, baseline_parse_token_as_string, }, DelimitedNode, Delimiter, PathNode, RawToken, TokenNode, }; @@ -43,6 +43,7 @@ pub enum SyntaxType { Variable, Number, Path, + Pattern, Binary, Block, Boolean, @@ -59,6 +60,7 @@ impl std::fmt::Display for SyntaxType { SyntaxType::Variable => write!(f, "Variable"), SyntaxType::Number => write!(f, "Number"), SyntaxType::Path => write!(f, "Path"), + SyntaxType::Pattern => write!(f, "Pattern"), SyntaxType::Binary => write!(f, "Binary"), SyntaxType::Block => write!(f, "Block"), SyntaxType::Boolean => write!(f, "Boolean"), @@ -90,6 +92,17 @@ pub fn baseline_parse_next_expr( )) } + (SyntaxType::Pattern, TokenNode::Token(token)) => { + return baseline_parse_token_as_pattern(token, context, source) + } + + (SyntaxType::Pattern, token) => { + return Err(ShellError::type_error( + "Path", + token.type_name().simple_spanned(token.span()), + )) + } + (SyntaxType::String, TokenNode::Token(token)) => { return baseline_parse_token_as_string(token, source); } @@ -315,6 +328,7 @@ pub fn baseline_parse_path( | RawToken::Size(..) | RawToken::Variable(_) | RawToken::ExternalCommand(_) + | RawToken::GlobPattern | RawToken::ExternalWord => { return Err(ShellError::type_error( "String", diff --git a/src/parser/parse/parser.rs b/src/parser/parse/parser.rs index a691fb24..66656619 100644 --- a/src/parser/parse/parser.rs +++ b/src/parser/parse/parser.rs @@ -231,6 +231,29 @@ pub fn external(input: NomSpan) -> IResult { }) } +pub fn pattern(input: NomSpan) -> IResult { + trace_step(input, "bare", move |input| { + let start = input.offset; + let (input, _) = take_while1(is_start_glob_char)(input)?; + let (input, _) = take_while(is_glob_char)(input)?; + + let next_char = &input.fragment.chars().nth(0); + + if let Some(next_char) = next_char { + if is_external_word_char(*next_char) { + return Err(nom::Err::Error(nom::error::make_error( + input, + nom::error::ErrorKind::TakeWhile1, + ))); + } + } + + let end = input.offset; + + Ok((input, TokenTreeBuilder::spanned_pattern((start, end)))) + }) +} + pub fn bare(input: NomSpan) -> IResult { trace_step(input, "bare", move |input| { let start = input.offset; @@ -240,7 +263,7 @@ pub fn bare(input: NomSpan) -> IResult { let next_char = &input.fragment.chars().nth(0); if let Some(next_char) = next_char { - if is_external_word_char(*next_char) { + if is_external_word_char(*next_char) || *next_char == '*' { return Err(nom::Err::Error(nom::error::make_error( input, nom::error::ErrorKind::TakeWhile1, @@ -395,6 +418,7 @@ pub fn leaf(input: NomSpan) -> IResult { var, external, bare, + pattern, external_word, ))(input)?; @@ -655,6 +679,14 @@ fn is_external_word_char(c: char) -> bool { } } +fn is_start_glob_char(c: char) -> bool { + is_start_bare_char(c) || c == '*' +} + +fn is_glob_char(c: char) -> bool { + is_bare_char(c) || c == '*' +} + fn is_start_bare_char(c: char) -> bool { match c { '+' => false, @@ -680,6 +712,7 @@ fn is_bare_char(c: char) -> bool { '-' => true, '=' => true, '~' => true, + ':' => true, _ => false, } } diff --git a/src/parser/parse/token_tree_builder.rs b/src/parser/parse/token_tree_builder.rs index 8034e8b0..ae1b344c 100644 --- a/src/parser/parse/token_tree_builder.rs +++ b/src/parser/parse/token_tree_builder.rs @@ -152,6 +152,24 @@ impl TokenTreeBuilder { )) } + pub fn pattern(input: impl Into) -> CurriedToken { + let input = input.into(); + + Box::new(move |b| { + let (start, end) = b.consume(&input); + b.pos = end; + + TokenTreeBuilder::spanned_pattern((start, end)) + }) + } + + pub fn spanned_pattern(input: impl Into) -> TokenNode { + TokenNode::Token(Tagged::from_simple_spanned_item( + RawToken::Bare, + input.into(), + )) + } + pub fn external_word(input: impl Into) -> CurriedToken { let input = input.into(); diff --git a/src/parser/parse/tokens.rs b/src/parser/parse/tokens.rs index 0bb2e3f1..b5998524 100644 --- a/src/parser/parse/tokens.rs +++ b/src/parser/parse/tokens.rs @@ -12,6 +12,7 @@ pub enum RawToken { Variable(Span), ExternalCommand(Span), ExternalWord, + GlobPattern, Bare, } @@ -53,6 +54,7 @@ impl RawToken { RawToken::Variable(_) => "Variable", RawToken::ExternalCommand(_) => "ExternalCommand", RawToken::ExternalWord => "ExternalWord", + RawToken::GlobPattern => "GlobPattern", RawToken::Bare => "String", } } diff --git a/src/shell/helper.rs b/src/shell/helper.rs index 462f3752..16802657 100644 --- a/src/shell/helper.rs +++ b/src/shell/helper.rs @@ -123,6 +123,10 @@ fn paint_token_node(token_node: &TokenNode, line: &str) -> String { item: RawToken::Size(..), .. }) => Color::Purple.bold().paint(token_node.span().slice(line)), + TokenNode::Token(Tagged { + item: RawToken::GlobPattern, + .. + }) => Color::Cyan.normal().paint(token_node.span().slice(line)), TokenNode::Token(Tagged { item: RawToken::String(..), .. From 540e93aa3ada4d722cc6eac7df64322cb83e9097 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9s=20N=2E=20Robalino?= Date: Tue, 10 Sep 2019 12:26:56 -0500 Subject: [PATCH 3/3] question mark character can also be in glob patterns. --- src/commands/ls.rs | 2 +- src/parser/parse/parser.rs | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/commands/ls.rs b/src/commands/ls.rs index 70961415..d2a0f6c0 100644 --- a/src/commands/ls.rs +++ b/src/commands/ls.rs @@ -10,7 +10,7 @@ impl WholeStreamCommand for LS { } fn signature(&self) -> Signature { - Signature::build("ls").optional("path", SyntaxType::Path) + Signature::build("ls").optional("path", SyntaxType::Pattern) } fn usage(&self) -> &str { diff --git a/src/parser/parse/parser.rs b/src/parser/parse/parser.rs index 66656619..0be05af0 100644 --- a/src/parser/parse/parser.rs +++ b/src/parser/parse/parser.rs @@ -713,6 +713,7 @@ fn is_bare_char(c: char) -> bool { '=' => true, '~' => true, ':' => true, + '?' => true, _ => false, } }