hence/src/lib/lexer.rs

166 lines
4.8 KiB
Rust

use crate::assembler;
use itertools::{Itertools, PeekingNext};
#[derive(Debug)]
pub enum Token {
Comment(String),
StringLiteral(String),
MacroLiteral(String),
Literal(String),
Number(String),
Comma,
Colon,
LParen,
RParen,
Assign,
Add,
Sub,
Mul,
Div,
Pow,
Newline(String),
Whitespace(String),
}
impl assembler::ToAssembly for Token {
fn to_assembly(&self) -> String {
match self {
Token::Comment(x) => format!(";{x}"),
Token::StringLiteral(x) => format!("\"{x}\""),
Token::MacroLiteral(x) => x.clone(),
Token::Literal(x) => x.clone(),
Token::Number(x) => x.clone(),
Token::Comma => ",".to_string(),
Token::Colon => ":".to_string(),
Token::LParen => "(".to_string(),
Token::RParen => ")".to_string(),
Token::Assign => "=".to_string(),
Token::Add => "+".to_string(),
Token::Sub => "-".to_string(),
Token::Mul => "*".to_string(),
Token::Div => "/".to_string(),
Token::Pow => "**".to_string(),
Token::Newline(x) | Token::Whitespace(x) => x.clone(),
}
}
}
pub fn lex(source: String) -> Result<Vec<Token>, String> {
let mut chars = source.chars().peekable();
let mut tokens = Vec::<Token>::new();
while let Some(&ch) = chars.peek() {
match ch {
// ';' => {
// chars.next();
// chars.next_if(|c| *c == ';');
// tokens.push(Token::Comment(
// chars.peeking_take_while(|c| *c != '\n').collect::<String>(),
// ));
// }
';' | '@' => {
chars.next();
chars.next_if(|c| *c == ';' || *c == '@');
tokens.push(Token::Comment(
chars.peeking_take_while(|c| *c != '\n').collect::<String>(),
));
}
'"' => {
chars.next();
tokens.push(Token::StringLiteral(
chars.by_ref().take_while(|c| *c != '"').collect::<String>(),
));
}
'.' => {
chars.next();
tokens.push(Token::MacroLiteral(format!(
".{}",
chars
.peeking_take_while(|c| c.is_alphabetic() || c.is_numeric() || *c == '_')
.collect::<String>()
)));
}
ch if ch.is_alphabetic() => {
let name: String = chars
.peeking_take_while(|c| c.is_alphabetic() || c.is_numeric() || *c == '_')
.collect();
tokens.push(Token::Literal(name));
}
ch if ch.is_numeric() => {
tokens.push(Token::Number(
chars
.peeking_take_while(|c| c.is_alphanumeric() || *c == '_')
.collect::<String>(),
));
}
',' => {
tokens.push(Token::Comma);
chars.next();
}
':' => {
tokens.push(Token::Colon);
chars.next();
}
'(' => {
tokens.push(Token::LParen);
chars.next();
}
')' => {
tokens.push(Token::RParen);
chars.next();
}
'=' => {
tokens.push(Token::Assign);
chars.next();
}
'+' => {
tokens.push(Token::Add);
chars.next();
}
'-' => {
tokens.push(Token::Sub);
chars.next();
}
'*' => {
chars.next();
tokens.push(if chars.peeking_next(|c| *c == '*').is_some() {
Token::Pow
} else {
Token::Mul
});
}
'/' => {
tokens.push(Token::Div);
chars.next();
}
'\n' => {
tokens.push(Token::Newline(
chars.peeking_take_while(|c| *c == '\n').collect::<String>(),
));
}
ch if ch.is_whitespace() => {
tokens.push(Token::Whitespace(
chars
.peeking_take_while(|c| c.is_whitespace() && *c != '\n')
.collect::<String>(),
));
}
_ => {
// tokens.push(Token::Error(ch.to_string()));
// chars.next();
return Err(format!("Unexpected token: '{ch}'"));
}
}
}
Ok(tokens)
}