hence/hence/src/lexer.rs

294 lines
8.4 KiB
Rust

use anyhow::{bail, Result};
use itertools::Itertools;
use std::fmt;
use crate::assembler;
#[derive(Debug, PartialEq, Eq)]
pub enum Token {
Comment(String),
CharLiteral(String),
StringLiteral(String),
MacroLiteral(String),
Literal(String),
Number(String),
Comma,
Colon,
LParen,
RParen,
Not,
And,
Nand,
Or,
Nor,
Xor,
Xnor,
Lsh,
Rsh,
Add,
Sub,
Mul,
Div,
Pow,
Cmp,
Eq,
Neq,
Lt,
Gt,
Leq,
Geq,
Bol,
Inv,
Rnd,
Newline(String),
Whitespace(String),
}
impl fmt::Display for Token {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Token::Comment(x) => write!(f, ";{}", x),
Token::CharLiteral(x) => write!(f, "'{}'", x),
Token::StringLiteral(x) => write!(f, "\"{}\"", x),
Token::MacroLiteral(x) => write!(f, "{}", x),
Token::Literal(x) => write!(f, "{}", x),
Token::Number(x) => write!(f, "{}", x),
Token::Comma => write!(f, ","),
Token::Colon => write!(f, ":"),
Token::LParen => write!(f, "("),
Token::RParen => write!(f, ")"),
Token::Not => write!(f, "~"),
Token::And => write!(f, "&"),
Token::Nand => write!(f, "~&"),
Token::Or => write!(f, "|"),
Token::Nor => write!(f, "~|"),
Token::Xor => write!(f, "^"),
Token::Xnor => write!(f, "~^"),
Token::Lsh => write!(f, "<<"),
Token::Rsh => write!(f, ">>"),
Token::Add => write!(f, "+"),
Token::Sub => write!(f, "-"),
Token::Mul => write!(f, "*"),
Token::Div => write!(f, "/"),
Token::Pow => write!(f, "**"),
Token::Cmp => write!(f, "<=>"),
Token::Eq => write!(f, "=="),
Token::Neq => write!(f, "!="),
Token::Lt => write!(f, "<"),
Token::Gt => write!(f, ">"),
Token::Leq => write!(f, "<="),
Token::Geq => write!(f, ">="),
Token::Bol => write!(f, "!!"),
Token::Inv => write!(f, "!"),
Token::Rnd => write!(f, "?"),
Token::Newline(x) | Token::Whitespace(x) => write!(f, "{}", x),
}
}
}
impl assembler::ToCode for Token {}
pub fn lex(source: &str) -> Result<Vec<Token>> {
let mut chars = source.chars().peekable();
let mut tokens: Vec<Token> = vec![];
while let Some(&ch) = chars.peek() {
tokens.push(match ch {
';' => {
chars.next();
chars.next_if(|c| *c == ';');
Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect())
}
'@' => {
chars.next();
Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect())
}
'#' => {
chars.next();
Token::Comment(chars.peeking_take_while(|c| *c != '\n').collect())
}
'\'' => {
chars.next();
Token::CharLiteral(chars.by_ref().take_while(|c| *c != '\'').collect())
}
'"' => {
chars.next();
Token::StringLiteral(chars.by_ref().take_while(|c| *c != '"').collect())
}
'.' => {
chars.next();
Token::MacroLiteral(format!(
".{}",
chars
.peeking_take_while(|c| c.is_alphabetic() || c.is_numeric() || *c == '_')
.collect::<String>()
))
}
_ if ch.is_alphabetic() => {
let name: String = chars
.peeking_take_while(|c| c.is_alphabetic() || c.is_numeric() || *c == '_')
.collect();
Token::Literal(name)
}
_ if ch.is_numeric() => Token::Number(
chars
.peeking_take_while(|c| c.is_alphanumeric() || *c == '_')
.collect(),
),
',' => {
chars.next();
Token::Comma
}
':' => {
chars.next();
Token::Colon
}
'(' => {
chars.next();
Token::LParen
}
')' => {
chars.next();
Token::RParen
}
'~' => {
chars.next();
if let Some(c) = chars.peek() {
match c {
'&' => {
chars.next();
Token::Nand
}
'|' => {
chars.next();
Token::Nor
}
'^' => {
chars.next();
Token::Xnor
}
_ => Token::Not,
}
} else {
Token::Not
}
}
'&' => {
chars.next();
Token::And
}
'|' => {
chars.next();
Token::Or
}
'^' => {
chars.next();
Token::Xor
}
'<' => {
chars.next();
match chars.peek() {
Some('<') => {
chars.next();
Token::Lsh
}
Some('=') => {
chars.next();
match chars.peek() {
Some('>') => {
chars.next();
Token::Cmp
}
_ => Token::Leq,
}
}
_ => Token::Lt,
}
}
'>' => {
chars.next();
match chars.peek() {
Some('>') => {
chars.next();
Token::Rsh
}
Some('=') => {
chars.next();
Token::Geq
}
_ => Token::Gt,
}
}
'+' => {
chars.next();
Token::Add
}
'-' => {
chars.next();
match chars.peek() {
Some(ch) if ch.is_numeric() => Token::Number(format!(
"-{}",
chars
.peeking_take_while(|c| c.is_alphanumeric() || *c == '_')
.collect::<String>(),
)),
_ => Token::Sub,
}
}
'*' => {
chars.next();
if let Some('*') = chars.peek() {
chars.next();
Token::Pow
} else {
Token::Mul
}
}
'/' => {
chars.next();
Token::Div
}
'=' => {
chars.next();
if let Some('=') = chars.peek() {
chars.next();
}
Token::Eq
}
'!' => {
chars.next();
match chars.peek() {
Some('!') => {
chars.next();
Token::Bol
}
Some('=') => {
chars.next();
Token::Neq
}
_ => Token::Inv,
}
}
'?' => {
chars.next();
Token::Rnd
}
'\n' => Token::Newline(chars.peeking_take_while(|c| *c == '\n').collect()),
_ if ch.is_whitespace() => Token::Whitespace(
chars
.peeking_take_while(|c| c.is_whitespace() && *c != '\n')
.collect(),
),
_ => bail!("Unexpected token: {}", ch),
});
}
Ok(tokens)
}