294 lines
8.4 KiB
Rust
294 lines
8.4 KiB
Rust
use anyhow::{bail, Result};
|
|
use itertools::Itertools;
|
|
use std::fmt;
|
|
|
|
use crate::assembler;
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
pub enum Token {
|
|
Comment(String),
|
|
|
|
CharLiteral(String),
|
|
StringLiteral(String),
|
|
MacroLiteral(String),
|
|
Literal(String),
|
|
Number(String),
|
|
|
|
Comma,
|
|
Colon,
|
|
LParen,
|
|
RParen,
|
|
|
|
Not,
|
|
And,
|
|
Nand,
|
|
Or,
|
|
Nor,
|
|
Xor,
|
|
Xnor,
|
|
Lsh,
|
|
Rsh,
|
|
Add,
|
|
Sub,
|
|
Mul,
|
|
Div,
|
|
Pow,
|
|
Cmp,
|
|
Eq,
|
|
Neq,
|
|
Lt,
|
|
Gt,
|
|
Leq,
|
|
Geq,
|
|
Bol,
|
|
Inv,
|
|
Rnd,
|
|
|
|
Newline(String),
|
|
Whitespace(String),
|
|
}
|
|
|
|
impl fmt::Display for Token {
|
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
match self {
|
|
Token::Comment(x) => write!(f, ";{}", x),
|
|
Token::CharLiteral(x) => write!(f, "'{}'", x),
|
|
Token::StringLiteral(x) => write!(f, "\"{}\"", x),
|
|
Token::MacroLiteral(x) => write!(f, "{}", x),
|
|
Token::Literal(x) => write!(f, "{}", x),
|
|
Token::Number(x) => write!(f, "{}", x),
|
|
Token::Comma => write!(f, ","),
|
|
Token::Colon => write!(f, ":"),
|
|
Token::LParen => write!(f, "("),
|
|
Token::RParen => write!(f, ")"),
|
|
Token::Not => write!(f, "~"),
|
|
Token::And => write!(f, "&"),
|
|
Token::Nand => write!(f, "~&"),
|
|
Token::Or => write!(f, "|"),
|
|
Token::Nor => write!(f, "~|"),
|
|
Token::Xor => write!(f, "^"),
|
|
Token::Xnor => write!(f, "~^"),
|
|
Token::Lsh => write!(f, "<<"),
|
|
Token::Rsh => write!(f, ">>"),
|
|
Token::Add => write!(f, "+"),
|
|
Token::Sub => write!(f, "-"),
|
|
Token::Mul => write!(f, "*"),
|
|
Token::Div => write!(f, "/"),
|
|
Token::Pow => write!(f, "**"),
|
|
Token::Cmp => write!(f, "<=>"),
|
|
Token::Eq => write!(f, "=="),
|
|
Token::Neq => write!(f, "!="),
|
|
Token::Lt => write!(f, "<"),
|
|
Token::Gt => write!(f, ">"),
|
|
Token::Leq => write!(f, "<="),
|
|
Token::Geq => write!(f, ">="),
|
|
Token::Bol => write!(f, "!!"),
|
|
Token::Inv => write!(f, "!"),
|
|
Token::Rnd => write!(f, "?"),
|
|
Token::Newline(x) | Token::Whitespace(x) => write!(f, "{}", x),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl assembler::ToCode for Token {}
|
|
|
|
pub fn lex(source: &str) -> Result<Vec<Token>> {
|
|
let mut chars = source.chars().peekable();
|
|
let mut tokens: Vec<Token> = vec![];
|
|
|
|
while let Some(&ch) = chars.peek() {
|
|
tokens.push(match ch {
|
|
';' => {
|
|
chars.next();
|
|
chars.next_if(|c| *c == ';');
|
|
Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect())
|
|
}
|
|
'@' => {
|
|
chars.next();
|
|
Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect())
|
|
}
|
|
'#' => {
|
|
chars.next();
|
|
Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect())
|
|
}
|
|
'\'' => {
|
|
chars.next();
|
|
Token::CharLiteral(chars.by_ref().take_while(|c| *c != '\'').collect())
|
|
}
|
|
'"' => {
|
|
chars.next();
|
|
Token::StringLiteral(chars.by_ref().take_while(|c| *c != '"').collect())
|
|
}
|
|
'.' => {
|
|
chars.next();
|
|
Token::MacroLiteral(format!(
|
|
".{}",
|
|
chars
|
|
.peeking_take_while(|c| c.is_alphabetic() || c.is_numeric() || *c == '_')
|
|
.collect::<String>()
|
|
))
|
|
}
|
|
_ if ch.is_alphabetic() => {
|
|
let name: String = chars
|
|
.peeking_take_while(|c| c.is_alphabetic() || c.is_numeric() || *c == '_')
|
|
.collect();
|
|
|
|
Token::Literal(name)
|
|
}
|
|
_ if ch.is_numeric() => Token::Number(
|
|
chars
|
|
.peeking_take_while(|c| c.is_alphanumeric() || *c == '_')
|
|
.collect(),
|
|
),
|
|
',' => {
|
|
chars.next();
|
|
Token::Comma
|
|
}
|
|
':' => {
|
|
chars.next();
|
|
Token::Colon
|
|
}
|
|
'(' => {
|
|
chars.next();
|
|
Token::LParen
|
|
}
|
|
')' => {
|
|
chars.next();
|
|
Token::RParen
|
|
}
|
|
'~' => {
|
|
chars.next();
|
|
if let Some(c) = chars.peek() {
|
|
match c {
|
|
'&' => {
|
|
chars.next();
|
|
Token::Nand
|
|
}
|
|
'|' => {
|
|
chars.next();
|
|
Token::Nor
|
|
}
|
|
'^' => {
|
|
chars.next();
|
|
Token::Xnor
|
|
}
|
|
_ => Token::Not,
|
|
}
|
|
} else {
|
|
Token::Not
|
|
}
|
|
}
|
|
'&' => {
|
|
chars.next();
|
|
Token::And
|
|
}
|
|
'|' => {
|
|
chars.next();
|
|
Token::Or
|
|
}
|
|
'^' => {
|
|
chars.next();
|
|
Token::Xor
|
|
}
|
|
'<' => {
|
|
chars.next();
|
|
match chars.peek() {
|
|
Some('<') => {
|
|
chars.next();
|
|
Token::Lsh
|
|
}
|
|
Some('=') => {
|
|
chars.next();
|
|
match chars.peek() {
|
|
Some('>') => {
|
|
chars.next();
|
|
Token::Cmp
|
|
}
|
|
_ => Token::Leq,
|
|
}
|
|
}
|
|
_ => Token::Lt,
|
|
}
|
|
}
|
|
'>' => {
|
|
chars.next();
|
|
match chars.peek() {
|
|
Some('>') => {
|
|
chars.next();
|
|
Token::Rsh
|
|
}
|
|
Some('=') => {
|
|
chars.next();
|
|
Token::Geq
|
|
}
|
|
_ => Token::Gt,
|
|
}
|
|
}
|
|
'+' => {
|
|
chars.next();
|
|
Token::Add
|
|
}
|
|
'-' => {
|
|
chars.next();
|
|
|
|
match chars.peek() {
|
|
Some(ch) if ch.is_numeric() => Token::Number(format!(
|
|
"-{}",
|
|
chars
|
|
.peeking_take_while(|c| c.is_alphanumeric() || *c == '_')
|
|
.collect::<String>(),
|
|
)),
|
|
_ => Token::Sub,
|
|
}
|
|
}
|
|
'*' => {
|
|
chars.next();
|
|
if let Some('*') = chars.peek() {
|
|
chars.next();
|
|
Token::Pow
|
|
} else {
|
|
Token::Mul
|
|
}
|
|
}
|
|
'/' => {
|
|
chars.next();
|
|
Token::Div
|
|
}
|
|
'=' => {
|
|
chars.next();
|
|
if let Some('=') = chars.peek() {
|
|
chars.next();
|
|
}
|
|
Token::Eq
|
|
}
|
|
'!' => {
|
|
chars.next();
|
|
match chars.peek() {
|
|
Some('!') => {
|
|
chars.next();
|
|
Token::Bol
|
|
}
|
|
Some('=') => {
|
|
chars.next();
|
|
Token::Neq
|
|
}
|
|
_ => Token::Inv,
|
|
}
|
|
}
|
|
'?' => {
|
|
chars.next();
|
|
Token::Rnd
|
|
}
|
|
'\n' => Token::Newline(chars.peeking_take_while(|c| *c == '\n').collect()),
|
|
_ if ch.is_whitespace() => Token::Whitespace(
|
|
chars
|
|
.peeking_take_while(|c| c.is_whitespace() && *c != '\n')
|
|
.collect(),
|
|
),
|
|
_ => bail!("Unexpected token: {}", ch),
|
|
});
|
|
}
|
|
|
|
Ok(tokens)
|
|
}
|