feat: working AST parsing

This commit is contained in:
Daniel Bulant 2021-12-08 21:29:59 +01:00
parent 78d4cea4fe
commit 847e49ec92
5 changed files with 333 additions and 95 deletions

View file

@ -1,17 +1,16 @@
mod parser;
use std::io::{self, BufRead, Read, Stdout, Write};
use std::io::{self, BufRead, Stdout, Write};
use std::cmp;
use std::collections::HashMap;
use std::convert::TryInto;
use std::ops::Add;
use std::process;
use termion::raw::{IntoRawMode, RawTerminal};
use termion::input::TermRead;
use termion::cursor::{self, DetectCursorPos};
use termion::cursor::{DetectCursorPos};
use termion::event::*;
use termion::input::{MouseTerminal};
// use termion::input::{MouseTerminal};
struct Term {
input: String,
@ -72,6 +71,8 @@ impl Shell {
fn main() {
loop {
print!("$: ");
io::stdout().flush();
let mut shell = collect();
shell.term.input += "\n";
parser::exec(&mut shell.term.input.as_bytes(), shell.ctx);

View file

@ -1,12 +1,23 @@
use crate::parser::tokens::Tokens;
use crate::parser::vars::Variable;
#[derive(Debug)]
pub struct LetExpression {
pub key: String,
pub key: Box<Value>,
pub value: Box<Value>
}
#[derive(Debug)]
pub struct AndExpression {
pub first: Box<Expression>,
pub second: Box<Expression>
}
#[derive(Debug)]
pub struct OrExpression {
pub first: Box<Expression>,
pub second: Box<Expression>
}
#[derive(Debug)]
pub struct IfExpression {
pub condition: Box<Expression>,
@ -47,6 +58,7 @@ pub enum Value {
ArrayFunction(DefinedFunction),
StringFunction(DefinedFunction),
Expressions(Vec<Expression>),
Expression(Expression),
Values(Vec<Value>)
}
@ -84,108 +96,197 @@ pub enum CommandValue {
pub enum Expression {
LetExpression(LetExpression),
Command(Vec<CommandValue>),
JobCommand(Box<Expression>),
Function(FunctionDefinitionExpression),
IfExpression(IfExpression),
WhileExpression(WhileExpression),
ForExpression(ForExpression),
RedirectTargetExpression(RedirectTargetExpression),
FileTargetExpression(FileTargetExpression),
FileSourceExpression(FileSourceExpression)
FileSourceExpression(FileSourceExpression),
Expressions(Vec<Expression>),
OrExpression(OrExpression),
AndExpression(AndExpression)
}
#[derive(Debug)]
struct Tree {
tokens: Vec<Tokens>,
i: usize
}
impl Tree {
fn parse_call(&mut self) -> Vec<CommandValue> {
fn parse_call(&mut self, end: usize) -> Expression {
let mut values: Vec<CommandValue> = Vec::new();
let mut token = self.get_current_token();
let mut buf: Vec<Value> = Vec::new();
dbg!("call parse");
let mut token = self.get_current_token();
loop {
if matches!(token, Tokens::Space) {
if buf.len() > 0 {
values.push(CommandValue::Value(Value::Values(buf)));
buf = Vec::new();
}
token = self.next();
if self.i >= end - 1 { break }
self.i += 1;
token = self.get_current_token();
dbg!("skip space");
continue;
}
let val = match token {
let val = match &token {
Tokens::Literal(str) => Value::Literal(str.clone()),
Tokens::SubStart => Value::Expressions(self.parse_sub()),
Tokens::SubStart => {
let val = self.get_value(end);
token = self.get_current_token();
val
},
Tokens::StringVariable(str, _) => Value::Variable(str.clone()),
Tokens::ArrayVariable(str, _) => Value::ArrayVariable(str.clone()),
Tokens::FileWrite => {
Tokens::FileWrite => break,
Tokens::FileRead => break,
Tokens::RedirectInto => break,
Tokens::ParenthesisEnd => {
if self.i >= end - 1 {
break;
}
Value::Literal(token.to_str())
}
_ => {
Value::Literal(token.to_str())
}
};
buf.push(val);
if self.i == self.tokens.len() { break }
token = self.next();
if self.i >= end - 1 { break }
self.i += 1;
token = self.tokens.get(self.i).unwrap();
if matches!(token, Tokens::CommandEnd(_)) { break }
dbg!("parse call loop");
}
dbg!(&token);
match &token {
Tokens::FileWrite | Tokens::FileRead | Tokens::RedirectInto => self.i -= 1,
_ => {}
}
dbg!(&self);
// self.next();
if buf.len() > 0 {
values.push(CommandValue::Value(Value::Values(buf)));
}
values
Expression::Command(values)
}
fn parse_let(&self) -> LetExpression {
panic!("Let not yet implemented");
fn parse_let(&mut self, end: usize) -> Expression {
if end < self.i + 2 { panic!("Let needs name and equal sign (=) at minimum") }
self.inc();
let mut len = 0;
for token in &self.tokens[self.i..] {
match token {
Tokens::ExportSet => { break },
_ => len += 1
}
}
let key = Box::new(self.get_value(self.i + len));
self.inc(); // ????
self.inc();
let value = Box::new(self.get_value(end));
Expression::LetExpression(LetExpression { key, value })
}
fn parse_read(&self) -> FileSourceExpression {
panic!("Read not yet implemented");
fn parse_read(&mut self, target: Option<Expression>, end: usize) -> Expression {
let target = match target {
Some(source) => Some(Box::new(source)),
None => None
};
self.i += 1;
let mut val_end = self.i;
let mut found_first = false;
for token in &self.tokens[self.i..] {
dbg!(&token);
val_end += 1;
match token {
Tokens::Space => if found_first { break },
Tokens::CommandEnd(_) => if !found_first { panic!("Unexpected command end") } else { break },
Tokens::FileRead => panic!("Unexpected file read (<)"),
Tokens::FileWrite => panic!("Unexpected file write (>)"),
_ => { found_first = true; }
}
}
dbg!(&self.tokens[self.i..val_end]);
val_end -= 1;
let source = Box::new(self.get_value(val_end));
self.inc();
Expression::FileSourceExpression(FileSourceExpression { source, target })
}
fn parse_write(&self, source: Option<Expression>) -> FileTargetExpression {
fn parse_write(&mut self, source: Option<Expression>, end: usize) -> Expression {
let source = match source {
Some(source) => Some(Box::new(source)),
None => None
};
self.i += 1;
let mut val_end = self.i;
let mut found_first = false;
for token in &self.tokens[self.i..] {
val_end += 1;
match token {
Tokens::Space => if found_first { break },
Tokens::CommandEnd(_) => if !found_first { panic!("Unexpected command end") } else { break },
Tokens::FileRead => panic!("Unexpected file read (<)"),
Tokens::FileWrite => panic!("Unexpected file write (>)"),
_ => { found_first = true; }
}
}
val_end -= 1;
let target = Box::new(self.get_value(val_end));
self.inc();
Expression::FileTargetExpression(FileTargetExpression { source, target })
}
fn parse_function(&self) -> FunctionDefinitionExpression {
fn parse_function(&self, end: usize) -> FunctionDefinitionExpression {
panic!("Functions not yet implemented")
}
fn parse_array_func(&self, str: String) -> DefinedFunction {
fn parse_array_func(&self, str: String, end: usize) -> DefinedFunction {
panic!("Array functions not yet implemented");
}
fn parse_for(&self) -> ForExpression {
fn parse_string_func(&self, str: String, end: usize) -> DefinedFunction {
panic!("Array functions not yet implemented");
}
fn parse_for(&self, end: usize) -> ForExpression {
panic!("For loop not yet implemented");
}
fn parse_if(&self) -> IfExpression {
fn parse_if(&self, end: usize) -> IfExpression {
panic!("If not yet implemented");
}
fn parse_while(&self) -> WhileExpression {
fn parse_while(&self, end: usize) -> WhileExpression {
panic!("While not yet implemented");
}
fn parse_sub(&self) -> Vec<Expression> {
panic!("Sub not yet implemented")
fn parse_sub(&mut self, end: usize) -> Vec<Expression> {
let mut expressions: Vec<Expression> = Vec::new();
loop {
if self.i >= end - 1 { break; }
expressions.push(self.get_expression(end));
dbg!(&expressions);
}
expressions
}
fn get_value(&mut self) -> Value {
fn get_value(&mut self, end: usize) -> Value {
let mut token = self.get_current_token();
let mut values: Vec<Value> = Vec::new();
let mut buf: Vec<Value> = Vec::new();
loop {
match token {
Tokens::Space => {
if buf.len() == 0 { continue; }
if buf.len() == 0 { token = self.inc().get_current_token(); continue; }
values.push(Value::Values(buf));
buf = Vec::new();
if self.i >= end - 1 { break }
},
Tokens::CommandEnd(_) => break,
Tokens::Literal(str) => buf.push(Value::Literal(str.clone())),
@ -194,57 +295,155 @@ impl Tree {
Tokens::Function => buf.push(Value::Literal(token.to_str())),
Tokens::FileWrite => buf.push(Value::Literal(token.to_str())),
Tokens::RedirectInto => panic!("Unexpected token REDIRECT (|)"),
Tokens::FunctionCallEnd => panic!("Unexpected token FUNCTION CALL END ())"),
Tokens::ParenthesisEnd => panic!("Unexpected token FUNCTION CALL END ())"),
Tokens::ArrayFunction(_) => panic!("Unexpected array function"),
Tokens::StringFunction(_) => panic!("Unexpected string function"),
Tokens::SubStart => /* SUB START PROCESS */,
Tokens::SubEnd => panic!("Unexpected token SUB END ())"),
Tokens::ParenthesisStart => panic!("Parenthesis not yet implemented"),
Tokens::SubStart => {
let mut len = 1;
let mut lvl = 1;
self.inc();
for token in &self.tokens[self.i..] {
match token {
Tokens::SubStart => lvl += 1,
Tokens::ParenthesisEnd => lvl -= 1,
_ => {}
}
if lvl == 0 {
break;
}
if len + self.i == end { break }
len += 1;
}
// self.inc();
dbg!(&self);
dbg!(len, lvl);
if lvl != 0 {
panic!("Sub not ended properly");
}
return Value::Expressions(self.parse_sub(self.i + len));
},
Tokens::Else => buf.push(Value::Literal(token.to_str())),
Tokens::End => buf.push(Value::Literal(token.to_str())),
Tokens::For => return Expression::ForExpression(self.parse_for()),
Tokens::If => return Expression::IfExpression(self.parse_if()),
Tokens::Let => return Expression::LetExpression(self.parse_let()),
Tokens::While => return Expression::WhileExpression(self.parse_while()),
Tokens::StringVariable(_, _) => return Expression::Command(self.parse_call()),
Tokens::ArrayVariable(_, _) => panic!("Unexpected array variable")
Tokens::For => buf.push(Value::Literal(token.to_str())),
Tokens::If => buf.push(Value::Literal(token.to_str())),
Tokens::Let => buf.push(Value::Literal(token.to_str())),
Tokens::While => buf.push(Value::Literal(token.to_str())),
Tokens::StringVariable(str, _) => {
if buf.len() != 0 {
values.push(Value::Values(buf));
buf = Vec::new();
}
values.push(Value::Variable(str.clone()));
},
Tokens::ArrayVariable(str, _) => {
if buf.len() != 0 {
values.push(Value::Values(buf));
buf = Vec::new();
}
values.push(Value::ArrayVariable(str.clone()));
},
Tokens::And => panic!("Unexpected AND (&&)"),
Tokens::Or => panic!("Unexpected OR (||)"),
Tokens::JobCommandEnd => panic!("Unexpected job command end (&)"),
}
token = self.next();
if self.i >= end - 1 { break }
token = self.inc().get_current_token();
}
if buf.len() > 0 {
values.push(Value::Values(buf));
}
Value::Values(values)
}
fn get_expression(&mut self, is_sub: bool) -> Expression {
fn get_expression(&mut self, end: usize) -> Expression {
let mut token = self.get_current_token();
dbg!("expr");
let mut expr: Option<Expression> = None;
loop {
dbg!(&token);
match token {
Tokens::Space => continue,
Tokens::CommandEnd(_) => continue,
Tokens::Literal(_) => return Expression::Command(self.parse_call()),
Tokens::Space => {self.inc();},
Tokens::CommandEnd(_) => {self.inc();},
Tokens::Literal(t) => if matches!(expr, Some(_)) {
dbg!(t);
dbg!(expr);
panic!("Unexpected literal. After file redirect, you need to use a semicolon or newline.");
} else {
expr = Some(self.parse_call(end));
dbg!(&self);
},
Tokens::ExportSet => panic!("Unexpected token EXPORT_SET (=)"),
Tokens::FileRead => return Expression::FileSourceExpression(self.parse_read()),
Tokens::Function => return Expression::Function(self.parse_function()),
Tokens::FileWrite => return Expression::FileTargetExpression(self.parse_write(None)),
Tokens::RedirectInto => panic!("Unexpected token REDIRECT (|)"),
Tokens::FunctionCallEnd => panic!("Unexpected token FUNCTION CALL END ())"),
Tokens::Function => return Expression::Function(self.parse_function(end)),
Tokens::FileRead => expr = Some(self.parse_read(expr, end)),
Tokens::FileWrite => expr = Some(self.parse_write(expr, end)),
Tokens::RedirectInto => match expr {
None => panic!("Unexpected token REDIRECT (|)"),
Some(_) => {
self.i += 1;
dbg!(&self);
expr = Some(Expression::RedirectTargetExpression(RedirectTargetExpression { source: Box::new(expr.unwrap()), target: Box::new(self.get_expression(end)) }));
dbg!("after redirect");
}
},
Tokens::ParenthesisStart => if matches!(expr, Some(_)) {
dbg!(expr);
panic!("Unexpected parenthesis. After file redirect, you need to use a semicolon or newline.");
} else {
let mut len = 1;
let mut lvl = 1;
self.inc();
for token in &self.tokens[self.i..] {
match token {
Tokens::ParenthesisStart => lvl += 1,
Tokens::ParenthesisEnd => lvl -= 1,
_ => {}
}
if lvl == 0 {
break;
}
if len + self.i == end { break }
len += 1;
}
if lvl != 0 {
panic!("Parenthesis not ended properly.");
}
expr = Some(self.get_expression(self.i + len));
dbg!(&self);
},
Tokens::ParenthesisEnd => panic!("Unexpected token PARENTHESIS END ())"),
Tokens::ArrayFunction(_) => panic!("Unexpected array function"),
Tokens::StringFunction(_) => panic!("Unexpected string function"),
Tokens::SubStart => return Expression::Command(self.parse_call()),
Tokens::SubEnd => if !is_sub { panic!("Unexpected token SUB END ())") },
Tokens::SubStart => return self.parse_call(end),
Tokens::Else => panic!("Unexpected token ELSE"),
Tokens::End => panic!("Unexpected token END"),
Tokens::For => return Expression::ForExpression(self.parse_for()),
Tokens::If => return Expression::IfExpression(self.parse_if()),
Tokens::Let => return Expression::LetExpression(self.parse_let()),
Tokens::While => return Expression::WhileExpression(self.parse_while()),
Tokens::StringVariable(_, _) => return Expression::Command(self.parse_call()),
Tokens::ArrayVariable(_, _) => panic!("Unexpected array variable")
Tokens::For => return Expression::ForExpression(self.parse_for(end)),
Tokens::If => return Expression::IfExpression(self.parse_if(end)),
Tokens::Let => return self.parse_let(end),
Tokens::While => return Expression::WhileExpression(self.parse_while(end)),
Tokens::StringVariable(_, _) => if matches!(expr, Some(_)) {
dbg!(expr);
panic!("Unexpected variable. After file redirect, you need to use a semicolon or newline.");
} else {
expr = Some(self.parse_call(end));
dbg!(&self);
},
Tokens::ArrayVariable(_, _) => panic!("Unexpected array variable"),
Tokens::And => panic!("And not yet implemented"),
Tokens::Or => panic!("Or not yet implemented"),
Tokens::JobCommandEnd => panic!("Jobs not yet implemented")
}
token = self.next();
if self.i >= end - 1 { break }
token = self.get_current_token();
}
expr.unwrap()
}
fn next(&mut self) -> &Tokens {
fn inc(&mut self) -> &Self {
self.i += 1;
self.get_current_token()
self
}
fn get_current_token(&self) -> &Tokens {
self.tokens.get(self.i).unwrap()
@ -255,12 +454,15 @@ impl Tree {
}
pub fn build_tree(tokens: Vec<Tokens>) -> Vec<Expression> {
println!("Building tree");
let mut expressions: Vec<Expression> = Vec::new();
let mut tree = Tree { tokens, i: 0 };
loop {
if tree.i == tree.tokens.len() - 1 { break; }
expressions.push(tree.get_expression());
let val = tree.get_expression(tree.tokens.len());
expressions.push(val);
}
dbg!(&expressions);
expressions
}
}

View file

@ -2,19 +2,14 @@ pub mod vars;
pub mod ast;
pub mod tokens;
use std::io;
use std::io::Read;
//use std::io::prelude::*;
use utf8_chars::BufReadCharsExt;
use crate::parser::ast::{build_tree, Expression};
use crate::parser::tokens::{tokenize, Tokens};
use crate::parser::ast::{build_tree};
use crate::parser::tokens::{tokenize};
pub fn exec(reader: &mut dyn std::io::BufRead, ctx: vars::Context) {
let tokens = tokenize(reader).unwrap();
dbg!(&tokens);
println!("Building tree");
let expressions = build_tree(tokens);
dbg!(&expressions);
@ -22,4 +17,4 @@ pub fn exec(reader: &mut dyn std::io::BufRead, ctx: vars::Context) {
pub fn escape(str: String) -> String {
str
}
}

View file

@ -1,6 +1,4 @@
use std::fmt::format;
use std::io;
use crate::parser::vars;
#[derive(Debug)]
pub enum Tokens {
@ -12,7 +10,8 @@ pub enum Tokens {
ArrayVariable(String, bool),
ArrayFunction(String),
StringFunction(String),
FunctionCallEnd,
ParenthesisStart,
ParenthesisEnd,
CommandEnd(char),
If,
Else,
@ -21,10 +20,12 @@ pub enum Tokens {
Function,
End,
SubStart,
SubEnd,
RedirectInto,
FileRead,
FileWrite,
And,
Or,
JobCommandEnd
}
impl Tokens {
@ -38,7 +39,8 @@ impl Tokens {
"else" => Tokens::Else,
"end" => Tokens::End,
"$(" => Tokens::SubStart,
")" => Tokens::SubEnd,
"(" => Tokens::ParenthesisStart,
")" => Tokens::ParenthesisEnd,
">" => Tokens::FileWrite,
"<" => Tokens::FileRead,
"|" => Tokens::RedirectInto,
@ -59,7 +61,6 @@ impl Tokens {
Tokens::StringFunction(str) => format!("${}", str.as_str()),
Tokens::CommandEnd(str) => str.to_string(),
Tokens::ExportSet => "=".to_string(),
Tokens::FunctionCallEnd => ")".to_string(),
Tokens::Function => "function".to_string(),
Tokens::If => "if".to_string(),
Tokens::Else => "else".to_string(),
@ -67,10 +68,14 @@ impl Tokens {
Tokens::For => "for".to_string(),
Tokens::End => "end".to_string(),
Tokens::SubStart => "$(".to_string(),
Tokens::SubEnd => ")".to_string(),
Tokens::ParenthesisStart => "(".to_string(),
Tokens::ParenthesisEnd => ")".to_string(),
Tokens::RedirectInto => "|".to_string(),
Tokens::FileRead => "<".to_string(),
Tokens::FileWrite => ">".to_string()
Tokens::FileWrite => ">".to_string(),
Tokens::And => "&&".to_string(),
Tokens::Or => "||".to_string(),
Tokens::JobCommandEnd => "&".to_string()
}
}
}
@ -84,7 +89,7 @@ fn read_var_ahead(i: usize, text: &String) -> (usize, Tokens) {
x += 1;
let letter: char = text.chars().nth(x).unwrap();
match letter {
'a'...'z' | 'A'...'Z' | '0'...'9' | ':' => {
'a'..='z' | 'A'..='Z' | '0'..='9' | ':' | '_' => {
buf.push(letter.clone());
}
'}' => {
@ -93,7 +98,7 @@ fn read_var_ahead(i: usize, text: &String) -> (usize, Tokens) {
}
break;
}
_ => { if !parens_mode { break } }
_ => { if !parens_mode { break } else { panic!("Invalid variable name") } }
}
}
let token = match text.chars().nth(i).unwrap() {
@ -174,15 +179,23 @@ pub fn tokenize(reader: &mut dyn std::io::BufRead) -> io::Result<Vec<Tokens>> {
skipper = x - i - 1;
buf_add = false;
},
')' => if !quote_active && !double_quote_active {
'(' => if !quote_active && !double_quote_active && !escape_active {
if buf.len() > 0 {
tokens.push(Tokens::Literal(buf));
buf = String::new();
}
tokens.push(Tokens::FunctionCallEnd);
tokens.push(Tokens::ParenthesisStart);
buf_add = false;
}
')' => if !quote_active && !double_quote_active && !escape_active {
if buf.len() > 0 {
tokens.push(Tokens::Literal(buf));
buf = String::new();
}
tokens.push(Tokens::ParenthesisEnd);
buf_add = false;
},
'i' => if !quote_active && !double_quote_active {
'i' => if !quote_active && !double_quote_active && !escape_active {
if check_keyword(&text, i, "if") {
if buf.len() > 0 {
tokens.push(Tokens::Literal(buf));
@ -193,7 +206,7 @@ pub fn tokenize(reader: &mut dyn std::io::BufRead) -> io::Result<Vec<Tokens>> {
buf_add = false;
}
},
'e' => if !quote_active && !double_quote_active {
'e' => if !quote_active && !double_quote_active && !escape_active {
if check_keyword(&text, i, "else") {
if buf.len() > 0 {
tokens.push(Tokens::Literal(buf));
@ -212,7 +225,18 @@ pub fn tokenize(reader: &mut dyn std::io::BufRead) -> io::Result<Vec<Tokens>> {
buf_add = false;
}
},
'w' => if !quote_active && !double_quote_active {
'l' => if !quote_active && !double_quote_active && !escape_active {
if check_keyword(&text, i, "let") {
if buf.len() > 0 {
tokens.push(Tokens::Literal(buf));
buf = String::new();
}
skipper = 3;
tokens.push(Tokens::Let);
buf_add = false;
}
},
'w' => if !quote_active && !double_quote_active && !escape_active {
if check_keyword(&text, i, "while") {
if buf.len() > 0 {
tokens.push(Tokens::Literal(buf));
@ -223,7 +247,7 @@ pub fn tokenize(reader: &mut dyn std::io::BufRead) -> io::Result<Vec<Tokens>> {
buf_add = false;
}
},
'f' => if !quote_active && !double_quote_active {
'f' => if !quote_active && !double_quote_active && !escape_active {
if check_keyword(&text, i, "for") {
if buf.len() > 0 {
tokens.push(Tokens::Literal(buf));
@ -239,7 +263,25 @@ pub fn tokenize(reader: &mut dyn std::io::BufRead) -> io::Result<Vec<Tokens>> {
tokens.push(Tokens::Literal(buf));
buf = String::new();
}
tokens.push(Tokens::RedirectInto);
if check_keyword(&text, i, "||") {
skipper = 1;
tokens.push(Tokens::Or);
} else {
tokens.push(Tokens::RedirectInto);
}
buf_add = false;
},
'&' => if !escape_active && !quote_active && !double_quote_active {
if buf.len() > 0 {
tokens.push(Tokens::Literal(buf));
buf = String::new();
}
if check_keyword(&text, i, "&&") {
skipper = 1;
tokens.push(Tokens::And);
} else {
tokens.push(Tokens::JobCommandEnd);
}
buf_add = false;
},
'>' => if !escape_active && !quote_active && !double_quote_active {
@ -292,4 +334,4 @@ pub fn tokenize(reader: &mut dyn std::io::BufRead) -> io::Result<Vec<Tokens>> {
}
Ok(tokens)
}
}

View file

@ -1,7 +1,5 @@
use std::collections::HashMap;
use std::ops::{Add, Deref};
use crate::parser::ast::FunctionDefinitionExpression;
use crate::parser::escape;
#[derive(Debug)]
pub enum Variable {
@ -30,7 +28,7 @@ impl Variable {
Variable::U128(num) => num.to_string(),
Variable::F32(num) => num.to_string(),
Variable::F64(num) => num.to_string(),
Variable::HMap(map) => {
Variable::HMap(_map) => {
String::from("[Object object]")
},
Variable::Array(vars) => {
@ -122,4 +120,4 @@ impl Context {
let mut func = &mut self.scopes.last_mut().unwrap().func;
func.insert(key, val);
}
}
}