use anyhow::{bail, Result}; use itertools::Itertools; use crate::assembler; #[derive(Debug, PartialEq, Eq)] pub enum Token { Comment(String), CharLiteral(String), StringLiteral(String), MacroLiteral(String), Literal(String), Number(String), Comma, Colon, LParen, RParen, Not, And, Nand, Or, Nor, Xor, Xnor, Lsh, Rsh, Add, Sub, Mul, Div, Pow, Cmp, Eq, Neq, Lt, Gt, Leq, Geq, Bol, Inv, Rnd, Newline(String), Whitespace(String), } impl assembler::ToCode for Token { fn to_code(&self) -> String { match self { Token::Comment(x) => format!(";{}", x), Token::CharLiteral(x) => format!("'{}'", x), Token::StringLiteral(x) => format!("\"{}\"", x), Token::MacroLiteral(x) => x.clone(), Token::Literal(x) => x.clone(), Token::Number(x) => x.clone(), Token::Comma => ",".to_string(), Token::Colon => ":".to_string(), Token::LParen => "(".to_string(), Token::RParen => ")".to_string(), Token::Not => "~".to_string(), Token::And => "&".to_string(), Token::Nand => "~&".to_string(), Token::Or => "|".to_string(), Token::Nor => "~|".to_string(), Token::Xor => "^".to_string(), Token::Xnor => "~^".to_string(), Token::Lsh => "<<".to_string(), Token::Rsh => ">>".to_string(), Token::Add => "+".to_string(), Token::Sub => "-".to_string(), Token::Mul => "*".to_string(), Token::Div => "/".to_string(), Token::Pow => "**".to_string(), Token::Cmp => "<=>".to_string(), Token::Eq => "==".to_string(), Token::Neq => "!=".to_string(), Token::Lt => "<".to_string(), Token::Gt => ">".to_string(), Token::Leq => "<=".to_string(), Token::Geq => ">=".to_string(), Token::Bol => "!!".to_string(), Token::Inv => "!".to_string(), Token::Rnd => "?".to_string(), Token::Newline(x) | Token::Whitespace(x) => x.clone(), } } } pub fn lex(source: String) -> Result> { let mut chars = source.chars().peekable(); let mut tokens: Vec = vec![]; while let Some(&ch) = chars.peek() { tokens.push(match ch { ';' => { chars.next(); chars.next_if(|c| *c == ';'); Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect()) } '@' => { chars.next(); Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect()) } '#' => { chars.next(); Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect()) } '\'' => { chars.next(); Token::CharLiteral(chars.by_ref().take_while(|c| *c != '\'').collect()) } '"' => { chars.next(); Token::StringLiteral(chars.by_ref().take_while(|c| *c != '"').collect()) } '.' => { chars.next(); Token::MacroLiteral(format!( ".{}", chars .peeking_take_while(|c| c.is_alphabetic() || c.is_numeric() || *c == '_') .collect::() )) } ch if ch.is_alphabetic() => { let name: String = chars .peeking_take_while(|c| c.is_alphabetic() || c.is_numeric() || *c == '_') .collect(); Token::Literal(name) } ch if ch.is_numeric() => Token::Number( chars .peeking_take_while(|c| c.is_alphanumeric() || *c == '_') .collect(), ), ',' => { chars.next(); Token::Comma } ':' => { chars.next(); Token::Colon } '(' => { chars.next(); Token::LParen } ')' => { chars.next(); Token::RParen } '~' => { chars.next(); if let Some(c) = chars.peek() { match c { '&' => { chars.next(); Token::Nand } '|' => { chars.next(); Token::Nor } '^' => { chars.next(); Token::Xnor } _ => Token::Not, } } else { Token::Not } } '&' => { chars.next(); Token::And } '|' => { chars.next(); Token::Or } '^' => { chars.next(); Token::Xor } '<' => { chars.next(); match chars.peek() { Some('<') => { chars.next(); Token::Lsh } Some('=') => { chars.next(); match chars.peek() { Some('>') => { chars.next(); Token::Cmp } _ => Token::Leq, } } _ => Token::Lt, } } '>' => { chars.next(); match chars.peek() { Some('>') => { chars.next(); Token::Rsh } Some('=') => { chars.next(); Token::Geq } _ => Token::Gt, } } '+' => { chars.next(); Token::Add } '-' => { chars.next(); Token::Sub } '*' => { chars.next(); if let Some('*') = chars.peek() { chars.next(); Token::Pow } else { Token::Mul } } '/' => { chars.next(); Token::Div } '=' => { chars.next(); if let Some('=') = chars.peek() { chars.next(); } Token::Eq } '!' => { chars.next(); match chars.peek() { Some('!') => { chars.next(); Token::Bol } Some('=') => { chars.next(); Token::Neq } _ => Token::Inv, } } '?' => { chars.next(); Token::Rnd } '\n' => Token::Newline(chars.peeking_take_while(|c| *c == '\n').collect()), ch if ch.is_whitespace() => Token::Whitespace( chars .peeking_take_while(|c| c.is_whitespace() && *c != '\n') .collect(), ), _ => bail!("Unexpected token: {}", ch), }); } Ok(tokens) } #[cfg(test)] mod tests { use super::*; use crate::assembler::ToCode; #[test] fn test_token_to_assembly() { assert_eq!( Token::Comment(" \"main function\" like definition macro".to_string()).to_code(), "; \"main function\" like definition macro".to_string() ); assert_eq!( Token::CharLiteral("\\n".to_string()).to_code(), "'\\n'".to_string() ); assert_eq!( Token::MacroLiteral("xyz".to_string()).to_code(), "xyz".to_string() ); assert_eq!( Token::Literal("xkcd".to_string()).to_code(), "xkcd".to_string() ); assert_eq!(Token::Newline("\n".to_string()).to_code(), "\n".to_string()); assert_eq!( Token::Whitespace(" ".to_string()).to_code(), " ".to_string() ); } #[test] fn test_lex() -> Result<()> { assert_eq!( lex(";; test".to_string())?, vec![Token::Comment(" test".to_string())] ); assert_eq!( lex("@ test".to_string())?, vec![Token::Comment(" test".to_string())] ); assert_eq!( lex("# test".to_string())?, vec![Token::Comment(" test".to_string())] ); assert_eq!( lex("'\\n'".to_string())?, vec![Token::CharLiteral("\\n".to_string())] ); assert_eq!( lex("\"test\"".to_string())?, vec![Token::StringLiteral("test".to_string())] ); assert_eq!( lex(".debug CORE_REG_PC".to_string())?, vec![ Token::MacroLiteral(".debug".to_string()), Token::Whitespace(" ".to_string()), Token::Literal("CORE_REG_PC".to_string()) ] ); Ok(()) } }