Update lisp lexer

This commit is contained in:
Dominic Grimm 2022-07-17 22:07:59 +02:00
parent ffad349a41
commit dd23759f85
No known key found for this signature in database
GPG key ID: A6C051C716D2CE65
9 changed files with 110 additions and 24 deletions

View file

@ -15,3 +15,4 @@ path = "src/bin/main.rs"
[dependencies]
hence = { path = "../hence" }
clap = { version = "3.2.12", features = ["derive"] }
itertools = "0.10.3"

View file

@ -23,7 +23,8 @@ fn main() {
match args.commands {
Commands::Lex { src } => {
let source = fs::read_to_string(src).unwrap();
println!("{source}");
let tokens = lexer::lex(source).unwrap();
dbg!(tokens);
}
}
}

View file

@ -1 +1,86 @@
pub fn lex(source: String) {}
use hence;
use itertools::Itertools;
#[derive(Debug)]
pub enum Token {
Comment(String),
MultiLineComment(String),
Newline(String),
Whitespace(String),
LParen,
RParen,
StringLiteral(String),
Number(String),
Literal(String),
}
impl hence::assembler::ToCode for Token {
fn to_code(&self) -> String {
match self {
Token::Comment(x) => format!(";;{x}"),
Token::MultiLineComment(x) => format!("#|{x}|#"),
Token::Newline(x) | Token::Whitespace(x) => x.clone(),
Token::LParen => "(".to_string(),
Token::RParen => ")".to_string(),
Token::StringLiteral(x) => format!("\"{x}\""),
Token::Number(x) | Token::Literal(x) => x.clone(),
}
}
}
pub fn lex(source: String) -> Result<Vec<Token>, String> {
let mut chars = source.chars().peekable();
let mut tokens: Vec<Token> = Vec::new();
while let Some(&ch) = chars.peek() {
match ch {
';' => {
chars.next();
chars.next_if(|c| *c == ';');
tokens.push(Token::Comment(
chars.peeking_take_while(|c| *c != '\n').collect(),
));
}
'\n' => {
tokens.push(Token::Newline(
chars.peeking_take_while(|c| *c == '\n').collect(),
));
}
_ if ch.is_whitespace() => {
tokens.push(Token::Whitespace(
chars
.peeking_take_while(|c| c.is_whitespace() && *c != '\n')
.collect(),
));
}
'(' => {
tokens.push(Token::LParen);
chars.next();
}
')' => {
tokens.push(Token::RParen);
chars.next();
}
'"' => {
chars.next();
tokens.push(Token::StringLiteral(
chars.by_ref().take_while(|c| *c != '"').collect(),
));
}
_ => {
dbg!(ch);
tokens.push(Token::Literal(
chars
.peeking_take_while(|c| !c.is_whitespace() && *c != '(' && *c != ')')
.collect(),
));
}
}
}
Ok(tokens)
}