use anyhow::Result; use hence::assembler::ToCode; use itertools::Itertools; #[derive(Debug)] pub enum Token { Newline(usize), Whitespace(usize), ParenComment(String), BackslashComment(String), DoubleDashComment(String), StringLiteral { mode: String, string: String }, Number(String), Word(String), } impl ToCode for Token { fn to_code(&self) -> String { match self { Token::Newline(x) => ["\n"].into_iter().cycle().take(*x).join(""), Token::Whitespace(x) => [" "].into_iter().cycle().take(*x).join(""), Token::ParenComment(x) => format!("( {})", x), Token::BackslashComment(x) => format!("\\{}", x), Token::DoubleDashComment(x) => format!("-- {}", x), Token::StringLiteral { mode, string } => format!("{}\" {}\"", mode, string), Token::Number(x) | Token::Word(x) => x.clone(), } } } pub fn is_space(c: char) -> bool { c.is_whitespace() || c == '\n' } pub fn lex(source: &str) -> Result> { let mut chars = source.chars().peekable(); let mut tokens: Vec = vec![]; while let Some(c) = chars.peek() { tokens.push(match c { '\n' => Token::Newline(chars.peeking_take_while(|&c| c == '\n').count()), _ if c.is_whitespace() => { Token::Whitespace(chars.peeking_take_while(|&c| c.is_whitespace()).count()) } '\\' => Token::BackslashComment(chars.peeking_take_while(|&c| c != '\n').collect()), _ if c.is_numeric() => { Token::Number(chars.peeking_take_while(|&c| !is_space(c)).collect()) } _ => { let x: String = chars.peeking_take_while(|&c| !is_space(c)).collect(); let mut iter = x.chars(); match x.as_str() { "(" => Token::ParenComment( chars.by_ref().skip(1).take_while(|&c| c != ')').collect(), ), "--" => Token::DoubleDashComment( chars.by_ref().take_while(|&c| c != '\n').collect(), ), _ if x.ends_with('"') => Token::StringLiteral { mode: x.chars().take(x.len() - 1).collect(), string: chars.by_ref().skip(1).take_while(|&c| c != '"').collect(), }, _ if iter.next() == Some('-') => { if let Some(c) = iter.next() { if c.is_numeric() { Token::Number(x) } else { Token::Word(x) } } else { Token::Word(x) } } _ => Token::Word(x), } } }); } Ok(tokens) }