85 lines
2.9 KiB
Rust
85 lines
2.9 KiB
Rust
use anyhow::Result;
|
|
use hence::assembler::ToCode;
|
|
use itertools::Itertools;
|
|
|
|
#[derive(Debug)]
|
|
pub enum Token {
|
|
Newline(usize),
|
|
Whitespace(usize),
|
|
|
|
ParenComment(String),
|
|
BackslashComment(String),
|
|
DoubleDashComment(String),
|
|
|
|
StringLiteral { mode: String, string: String },
|
|
Number(String),
|
|
Word(String),
|
|
}
|
|
|
|
impl ToCode for Token {
|
|
fn to_code(&self) -> String {
|
|
match self {
|
|
Token::Newline(x) => ["\n"].into_iter().cycle().take(*x).join(""),
|
|
Token::Whitespace(x) => [" "].into_iter().cycle().take(*x).join(""),
|
|
Token::ParenComment(x) => format!("( {})", x),
|
|
Token::BackslashComment(x) => format!("\\{}", x),
|
|
Token::DoubleDashComment(x) => format!("-- {}", x),
|
|
Token::StringLiteral { mode, string } => format!("{}\" {}\"", mode, string),
|
|
Token::Number(x) | Token::Word(x) => x.clone(),
|
|
}
|
|
}
|
|
}
|
|
|
|
pub fn is_space(c: char) -> bool {
|
|
c.is_whitespace() || c == '\n'
|
|
}
|
|
|
|
pub fn lex(source: &str) -> Result<Vec<Token>> {
|
|
let mut chars = source.chars().peekable();
|
|
let mut tokens: Vec<Token> = vec![];
|
|
|
|
while let Some(c) = chars.peek() {
|
|
tokens.push(match c {
|
|
'\n' => Token::Newline(chars.peeking_take_while(|&c| c == '\n').count()),
|
|
_ if c.is_whitespace() => {
|
|
Token::Whitespace(chars.peeking_take_while(|&c| c.is_whitespace()).count())
|
|
}
|
|
'\\' => Token::BackslashComment(chars.peeking_take_while(|&c| c != '\n').collect()),
|
|
_ if c.is_numeric() => {
|
|
Token::Number(chars.peeking_take_while(|&c| !is_space(c)).collect())
|
|
}
|
|
_ => {
|
|
let x: String = chars.peeking_take_while(|&c| !is_space(c)).collect();
|
|
let mut iter = x.chars();
|
|
|
|
match x.as_str() {
|
|
"(" => Token::ParenComment(
|
|
chars.by_ref().skip(1).take_while(|&c| c != ')').collect(),
|
|
),
|
|
"--" => Token::DoubleDashComment(
|
|
chars.by_ref().take_while(|&c| c != '\n').collect(),
|
|
),
|
|
_ if x.ends_with('"') => Token::StringLiteral {
|
|
mode: x.chars().take(x.len() - 1).collect(),
|
|
string: chars.by_ref().skip(1).take_while(|&c| c != '"').collect(),
|
|
},
|
|
_ if iter.next() == Some('-') => {
|
|
if let Some(c) = iter.next() {
|
|
if c.is_numeric() {
|
|
Token::Number(x)
|
|
} else {
|
|
Token::Word(x)
|
|
}
|
|
} else {
|
|
Token::Word(x)
|
|
}
|
|
}
|
|
_ => Token::Word(x),
|
|
}
|
|
}
|
|
});
|
|
}
|
|
|
|
Ok(tokens)
|
|
}
|