hence/henceforth/src/parser.rs

use anyhow::{bail, Result};
use parse_int;

use crate::lexer;

pub mod ast;

pub fn parse_stack_state(s: Option<&str>) -> Vec<String> {
    match s {
        Some(x) if !x.trim().is_empty() => {
            x.split_whitespace().map(|x| x.trim().to_string()).collect()
        }
        _ => vec![],
    }
}

pub fn parse_stack_result(s: &str) -> ast::StackResult {
    let mut splitter = s.splitn(2, "--");

    ast::StackResult {
        before: parse_stack_state(splitter.next()),
        after: parse_stack_state(splitter.next()),
    }
}

pub fn parse(tokens: Vec<lexer::Token>) -> Result<ast::AST> {
    let mut iter = tokens.into_iter().peekable();
    let mut body: ast::Body = vec![];

    while let Some(token) = iter.next() {
        match token {
            lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {}
            lexer::Token::ParenComment(x)
            | lexer::Token::BackslashComment(x)
            | lexer::Token::DoubleDashComment(x) => {
                body.push(ast::Node::Comment(x.trim().to_string()));
            }
            lexer::Token::StringLiteral { mode, string } => {
                body.push(ast::Node::String {
                    mode,
                    string: snailquote::unescape(&format!("\"{}\"", string))?,
                });
            }
            lexer::Token::Number(x) => body.push(ast::Node::Number(if x.starts_with('-') {
                -parse_int::parse(&x[1..])?
            } else {
                parse_int::parse(&x)?
            })),
            lexer::Token::Word(x) => match x.as_str() {
                ":" => {
                    let mut depth: usize = 1;
                    let mut content = iter
                        .by_ref()
                        .take_while(|t| match t {
                            lexer::Token::Word(x) => match x.as_str() {
                                ":" => {
                                    depth += 1;
                                    true
                                }
                                ";" => {
                                    depth -= 1;
                                    depth != 0
                                }
                                _ => true,
                            },
                            _ => true,
                        })
                        .collect::<Vec<_>>()
                        .into_iter()
                        .peekable();
                    if depth != 0 {
                        bail!("Unbalanced word definitions");
                    }

                    let name = match content.find(|t| {
                        !matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
                    }) {
                        Some(t) => match t {
                            lexer::Token::Word(x) => x.clone(),
                            _ => bail!("Word definition name must be a word itself: {:?}", t),
                        },
                        None => bail!("Word definition can not be empty"),
                    };
                    let stack = loop {
                        if let Some(t) = content.peek() {
                            match t {
                                lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {
                                    content.next();
                                }
                                lexer::Token::ParenComment(x)
                                | lexer::Token::BackslashComment(x)
                                | lexer::Token::DoubleDashComment(x) => {
                                    let y = &x.to_string();
                                    content.next();
                                    break Some(parse_stack_result(y));
                                }
                                _ => break None,
                            }
                        } else {
                            break None;
                        }
                    };

                    body.push(ast::Node::WordDefinition {
                        name,
                        stack,
                        body: parse(content.collect())?.body,
                    });
                }
                "if" => {
                    let mut depth: usize = 1;
                    let mut else_used = false;
                    let if_toks: Vec<_> = iter
                        .by_ref()
                        .take_while(|t| match t {
                            lexer::Token::Word(x) => match x.as_str() {
                                "if" => {
                                    depth += 1;
                                    true
                                }
                                "else" => {
                                    if depth == 1 {
                                        else_used = true;
                                        false
                                    } else {
                                        true
                                    }
                                }
                                "then" => {
                                    depth -= 1;
                                    depth != 0
                                }
                                _ => true,
                            },
                            _ => true,
                        })
                        .collect();
                    let else_toks: Vec<_> = if else_used {
                        iter.by_ref()
                            .take_while(|t| match t {
                                lexer::Token::Word(x) => match x.as_str() {
                                    "if" => {
                                        depth += 1;
                                        true
                                    }
                                    "then" => {
                                        depth -= 1;
                                        depth != 0
                                    }
                                    _ => true,
                                },
                                _ => true,
                            })
                            .collect()
                    } else {
                        vec![]
                    };
                    if depth != 0 {
                        bail!("Unbalanced conditions");
                    }

                    body.push(ast::Node::Condition {
                        if_body: parse(if_toks)?.body,
                        else_body: parse(else_toks)?.body,
                    });
                }
                _ => {
                    body.push(ast::Node::Word(x));
                }
            },
        }
    }

    Ok(ast::AST { body })
}