347 lines
9.8 KiB
Rust
347 lines
9.8 KiB
Rust
use anyhow::{bail, Result};
|
|
use itertools::Itertools;
|
|
|
|
use crate::assembler;
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub enum Token {
|
|
Comment(String),
|
|
|
|
CharLiteral(String),
|
|
StringLiteral(String),
|
|
MacroLiteral(String),
|
|
Literal(String),
|
|
Number(String),
|
|
|
|
Comma,
|
|
Colon,
|
|
LParen,
|
|
RParen,
|
|
|
|
Not,
|
|
And,
|
|
Nand,
|
|
Or,
|
|
Nor,
|
|
Xor,
|
|
Xnor,
|
|
Lsh,
|
|
Rsh,
|
|
Add,
|
|
Sub,
|
|
Mul,
|
|
Div,
|
|
Pow,
|
|
Cmp,
|
|
Eq,
|
|
Neq,
|
|
Lt,
|
|
Gt,
|
|
Leq,
|
|
Geq,
|
|
Bol,
|
|
Inv,
|
|
Rnd,
|
|
|
|
Newline(String),
|
|
Whitespace(String),
|
|
}
|
|
|
|
impl assembler::ToCode for Token {
|
|
fn to_code(&self) -> String {
|
|
match self {
|
|
Token::Comment(x) => format!(";{}", x),
|
|
Token::CharLiteral(x) => format!("'{}'", x),
|
|
Token::StringLiteral(x) => format!("\"{}\"", x),
|
|
Token::MacroLiteral(x) => x.clone(),
|
|
Token::Literal(x) => x.clone(),
|
|
Token::Number(x) => x.clone(),
|
|
Token::Comma => ",".to_string(),
|
|
Token::Colon => ":".to_string(),
|
|
Token::LParen => "(".to_string(),
|
|
Token::RParen => ")".to_string(),
|
|
Token::Not => "~".to_string(),
|
|
Token::And => "&".to_string(),
|
|
Token::Nand => "~&".to_string(),
|
|
Token::Or => "|".to_string(),
|
|
Token::Nor => "~|".to_string(),
|
|
Token::Xor => "^".to_string(),
|
|
Token::Xnor => "~^".to_string(),
|
|
Token::Lsh => "<<".to_string(),
|
|
Token::Rsh => ">>".to_string(),
|
|
Token::Add => "+".to_string(),
|
|
Token::Sub => "-".to_string(),
|
|
Token::Mul => "*".to_string(),
|
|
Token::Div => "/".to_string(),
|
|
Token::Pow => "**".to_string(),
|
|
Token::Cmp => "<=>".to_string(),
|
|
Token::Eq => "==".to_string(),
|
|
Token::Neq => "!=".to_string(),
|
|
Token::Lt => "<".to_string(),
|
|
Token::Gt => ">".to_string(),
|
|
Token::Leq => "<=".to_string(),
|
|
Token::Geq => ">=".to_string(),
|
|
Token::Bol => "!!".to_string(),
|
|
Token::Inv => "!".to_string(),
|
|
Token::Rnd => "?".to_string(),
|
|
Token::Newline(x) | Token::Whitespace(x) => x.clone(),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn lex(source: String) -> Result<Vec<Token>> {
|
|
let mut chars = source.chars().peekable();
|
|
let mut tokens: Vec<Token> = vec![];
|
|
|
|
while let Some(&ch) = chars.peek() {
|
|
tokens.push(match ch {
|
|
';' => {
|
|
chars.next();
|
|
chars.next_if(|c| *c == ';');
|
|
Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect())
|
|
}
|
|
'@' => {
|
|
chars.next();
|
|
Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect())
|
|
}
|
|
'#' => {
|
|
chars.next();
|
|
Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect())
|
|
}
|
|
'\'' => {
|
|
chars.next();
|
|
Token::CharLiteral(chars.by_ref().take_while(|c| *c != '\'').collect())
|
|
}
|
|
'"' => {
|
|
chars.next();
|
|
Token::StringLiteral(chars.by_ref().take_while(|c| *c != '"').collect())
|
|
}
|
|
'.' => {
|
|
chars.next();
|
|
Token::MacroLiteral(format!(
|
|
".{}",
|
|
chars
|
|
.peeking_take_while(|c| c.is_alphabetic() || c.is_numeric() || *c == '_')
|
|
.collect::<String>()
|
|
))
|
|
}
|
|
ch if ch.is_alphabetic() => {
|
|
let name: String = chars
|
|
.peeking_take_while(|c| c.is_alphabetic() || c.is_numeric() || *c == '_')
|
|
.collect();
|
|
|
|
Token::Literal(name)
|
|
}
|
|
ch if ch.is_numeric() => Token::Number(
|
|
chars
|
|
.peeking_take_while(|c| c.is_alphanumeric() || *c == '_')
|
|
.collect(),
|
|
),
|
|
',' => {
|
|
chars.next();
|
|
Token::Comma
|
|
}
|
|
':' => {
|
|
chars.next();
|
|
Token::Colon
|
|
}
|
|
'(' => {
|
|
chars.next();
|
|
Token::LParen
|
|
}
|
|
')' => {
|
|
chars.next();
|
|
Token::RParen
|
|
}
|
|
'~' => {
|
|
chars.next();
|
|
if let Some(c) = chars.peek() {
|
|
match c {
|
|
'&' => {
|
|
chars.next();
|
|
Token::Nand
|
|
}
|
|
'|' => {
|
|
chars.next();
|
|
Token::Nor
|
|
}
|
|
'^' => {
|
|
chars.next();
|
|
Token::Xnor
|
|
}
|
|
_ => Token::Not,
|
|
}
|
|
} else {
|
|
Token::Not
|
|
}
|
|
}
|
|
'&' => {
|
|
chars.next();
|
|
Token::And
|
|
}
|
|
'|' => {
|
|
chars.next();
|
|
Token::Or
|
|
}
|
|
'^' => {
|
|
chars.next();
|
|
Token::Xor
|
|
}
|
|
'<' => {
|
|
chars.next();
|
|
match chars.peek() {
|
|
Some('<') => {
|
|
chars.next();
|
|
Token::Lsh
|
|
}
|
|
Some('=') => {
|
|
chars.next();
|
|
match chars.peek() {
|
|
Some('>') => {
|
|
chars.next();
|
|
Token::Cmp
|
|
}
|
|
_ => Token::Leq,
|
|
}
|
|
}
|
|
_ => Token::Lt,
|
|
}
|
|
}
|
|
'>' => {
|
|
chars.next();
|
|
match chars.peek() {
|
|
Some('>') => {
|
|
chars.next();
|
|
Token::Rsh
|
|
}
|
|
Some('=') => {
|
|
chars.next();
|
|
Token::Geq
|
|
}
|
|
_ => Token::Gt,
|
|
}
|
|
}
|
|
'+' => {
|
|
chars.next();
|
|
Token::Add
|
|
}
|
|
'-' => {
|
|
chars.next();
|
|
Token::Sub
|
|
}
|
|
'*' => {
|
|
chars.next();
|
|
if let Some('*') = chars.peek() {
|
|
chars.next();
|
|
Token::Pow
|
|
} else {
|
|
Token::Mul
|
|
}
|
|
}
|
|
'/' => {
|
|
chars.next();
|
|
Token::Div
|
|
}
|
|
'=' => {
|
|
chars.next();
|
|
if let Some('=') = chars.peek() {
|
|
chars.next();
|
|
}
|
|
Token::Eq
|
|
}
|
|
'!' => {
|
|
chars.next();
|
|
match chars.peek() {
|
|
Some('!') => {
|
|
chars.next();
|
|
Token::Bol
|
|
}
|
|
Some('=') => {
|
|
chars.next();
|
|
Token::Neq
|
|
}
|
|
_ => Token::Inv,
|
|
}
|
|
}
|
|
'?' => {
|
|
chars.next();
|
|
Token::Rnd
|
|
}
|
|
'\n' => Token::Newline(chars.peeking_take_while(|c| *c == '\n').collect()),
|
|
ch if ch.is_whitespace() => Token::Whitespace(
|
|
chars
|
|
.peeking_take_while(|c| c.is_whitespace() && *c != '\n')
|
|
.collect(),
|
|
),
|
|
_ => bail!("Unexpected token: {}", ch),
|
|
});
|
|
}
|
|
|
|
Ok(tokens)
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
use crate::assembler::ToCode;
|
|
|
|
#[test]
|
|
fn test_token_to_assembly() {
|
|
assert_eq!(
|
|
Token::Comment(" \"main function\" like definition macro".to_string()).to_code(),
|
|
"; \"main function\" like definition macro".to_string()
|
|
);
|
|
assert_eq!(
|
|
Token::CharLiteral("\\n".to_string()).to_code(),
|
|
"'\\n'".to_string()
|
|
);
|
|
assert_eq!(
|
|
Token::MacroLiteral("xyz".to_string()).to_code(),
|
|
"xyz".to_string()
|
|
);
|
|
assert_eq!(
|
|
Token::Literal("xkcd".to_string()).to_code(),
|
|
"xkcd".to_string()
|
|
);
|
|
assert_eq!(Token::Newline("\n".to_string()).to_code(), "\n".to_string());
|
|
assert_eq!(
|
|
Token::Whitespace(" ".to_string()).to_code(),
|
|
" ".to_string()
|
|
);
|
|
}
|
|
|
|
#[test]
|
|
fn test_lex() -> Result<()> {
|
|
assert_eq!(
|
|
lex(";; test".to_string())?,
|
|
vec![Token::Comment(" test".to_string())]
|
|
);
|
|
assert_eq!(
|
|
lex("@ test".to_string())?,
|
|
vec![Token::Comment(" test".to_string())]
|
|
);
|
|
assert_eq!(
|
|
lex("# test".to_string())?,
|
|
vec![Token::Comment(" test".to_string())]
|
|
);
|
|
assert_eq!(
|
|
lex("'\\n'".to_string())?,
|
|
vec![Token::CharLiteral("\\n".to_string())]
|
|
);
|
|
assert_eq!(
|
|
lex("\"test\"".to_string())?,
|
|
vec![Token::StringLiteral("test".to_string())]
|
|
);
|
|
assert_eq!(
|
|
lex(".debug CORE_REG_PC".to_string())?,
|
|
vec![
|
|
Token::MacroLiteral(".debug".to_string()),
|
|
Token::Whitespace(" ".to_string()),
|
|
Token::Literal("CORE_REG_PC".to_string())
|
|
]
|
|
);
|
|
|
|
Ok(())
|
|
}
|
|
}
|