hence/henceforth/src/parser.rs

169 lines
6.4 KiB
Rust

use anyhow::{bail, Result};
use parse_int;
use crate::lexer;
pub mod ast;
pub fn parse_stack_state(s: Option<&str>) -> Vec<String> {
match s {
Some(x) if !x.trim().is_empty() => {
x.split_whitespace().map(|x| x.trim().to_string()).collect()
}
_ => vec![],
}
}
pub fn parse_stack_result(s: &str) -> ast::StackResult {
let mut splitter = s.splitn(2, "--");
ast::StackResult {
before: parse_stack_state(splitter.next()),
after: parse_stack_state(splitter.next()),
}
}
pub fn parse(tokens: Vec<lexer::Token>) -> Result<ast::AST> {
let mut iter = tokens.into_iter().peekable();
let mut body: ast::Body = vec![];
while let Some(token) = iter.next() {
match token {
lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {}
lexer::Token::ParenComment(x)
| lexer::Token::BackslashComment(x)
| lexer::Token::DoubleDashComment(x) => {
body.push(ast::Node::Comment(x.trim().to_string()));
}
lexer::Token::StringLiteral { mode, string } => {
body.push(ast::Node::String { mode, string });
}
lexer::Token::Number(x) => body.push(ast::Node::Number(parse_int::parse(&x)?)),
lexer::Token::Word(x) => match x.as_str() {
":" => {
let mut depth: usize = 1;
let mut content = iter
.by_ref()
.take_while(|t| match t {
lexer::Token::Word(x) => match x.as_str() {
":" => {
depth += 1;
true
}
";" => {
depth -= 1;
depth != 0
}
_ => true,
},
_ => true,
})
.collect::<Vec<_>>()
.into_iter()
.peekable();
if depth != 0 {
bail!("Unbalanced word definitions");
}
let name = match content.find(|t| {
!matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
}) {
Some(t) => match t {
lexer::Token::Word(x) => x.clone(),
_ => bail!("Word definition name must be a word itself: {:?}", t),
},
None => bail!("Word definition can not be empty"),
};
let stack = loop {
if let Some(t) = content.peek() {
match t {
lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {
content.next();
}
lexer::Token::ParenComment(x)
| lexer::Token::BackslashComment(x)
| lexer::Token::DoubleDashComment(x) => {
let y = &x.to_string();
content.next();
break Some(parse_stack_result(y));
}
_ => break None,
}
} else {
break None;
}
};
body.push(ast::Node::WordDefinition {
name,
stack,
body: parse(content.collect())?.body,
});
}
"if" => {
let mut depth: usize = 1;
let mut else_used = false;
let if_toks: Vec<_> = iter
.by_ref()
.take_while(|t| match t {
lexer::Token::Word(x) => match x.as_str() {
"if" => {
depth += 1;
true
}
"else" => {
if depth == 1 {
else_used = true;
false
} else {
true
}
}
"then" => {
depth -= 1;
depth != 0
}
_ => true,
},
_ => true,
})
.collect();
let else_toks: Vec<_> = if else_used {
iter.by_ref()
.take_while(|t| match t {
lexer::Token::Word(x) => match x.as_str() {
"if" => {
depth += 1;
true
}
"then" => {
depth -= 1;
depth != 0
}
_ => true,
},
_ => true,
})
.collect()
} else {
vec![]
};
if depth != 0 {
bail!("Unbalanced conditions");
}
body.push(ast::Node::Condition {
if_body: parse(if_toks)?.body,
else_body: parse(else_toks)?.body,
});
}
_ => {
body.push(ast::Node::Word(x));
}
},
}
}
Ok(ast::AST { body })
}