hence/henceforth/src/lib/parser.rs

108 lines
3.8 KiB
Rust

use anyhow::{bail, Result};
use num_parse;
use crate::lexer;
pub mod ast;
fn process_raw_stack_result(s: Option<&str>) -> Vec<String> {
match s {
Some(x) if !x.trim().is_empty() => {
x.split_whitespace().map(|x| x.trim().to_string()).collect()
}
_ => vec![],
}
}
pub fn parse_stack_result(s: String) -> ast::StackResult {
let mut splitter = s.splitn(2, "--");
ast::StackResult {
before: process_raw_stack_result(splitter.next()),
after: process_raw_stack_result(splitter.next()),
}
}
pub fn parse(tokens: Vec<lexer::Token>) -> Result<ast::AST> {
let mut iter = tokens.into_iter().peekable();
let mut body: ast::Body = vec![];
while let Some(token) = iter.next() {
match token {
lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {}
lexer::Token::ParenComment(x)
| lexer::Token::BackslashComment(x)
| lexer::Token::DoubleDashComment(x) => {
body.push(ast::Node::Comment(x.trim().to_string()));
}
lexer::Token::StringLiteral { mode, string } => {
body.push(ast::Node::String { mode, string });
}
lexer::Token::Number(x) => match num_parse::parse_int::<i32>(x.as_str()) {
Some(n) => {
body.push(ast::Node::Number(n));
}
None => bail!("Invalid number: {}", x),
},
lexer::Token::Word(x) => match x.as_str() {
":" => {
let mut depth: usize = 1;
let mut content = iter
.by_ref()
.take_while(|t| match t {
lexer::Token::Word(x) => match x.as_str() {
":" => {
depth += 1;
true
}
";" => {
depth -= 1;
depth != 0
}
_ => true,
},
_ => true,
})
.collect::<Vec<_>>()
.into_iter();
if depth != 0 {
bail!("Unbalanced word definitions");
}
let name = match content.find(|t| {
!matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
}) {
Some(t) => match t {
lexer::Token::Word(x) => x.clone(),
_ => bail!("Word definition name must be a word itself: {:?}", t),
},
None => bail!("Word definition can not be empty"),
};
let stack = match content.find(|t| {
!matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
}) {
Some(t) => match t {
lexer::Token::ParenComment(x)
| lexer::Token::BackslashComment(x)
| lexer::Token::DoubleDashComment(x) => Some(parse_stack_result(x)),
_ => None,
},
None => None,
};
body.push(ast::Node::WordDefinition {
name,
stack,
body: parse(content.collect())?.body,
});
}
_ => {
body.push(ast::Node::Word(x));
}
},
}
}
Ok(ast::AST { body })
}