From dd23759f85718e82042de82d83620fe959f4fb79 Mon Sep 17 00:00:00 2001 From: Dominic Grimm Date: Sun, 17 Jul 2022 22:07:59 +0200 Subject: [PATCH] Update lisp lexer --- Cargo.lock | 1 + examples/test.lisp | 1 + hence/src/lib/arg.rs | 14 +++--- hence/src/lib/assembler.rs | 6 +-- hence/src/lib/lexer.rs | 4 +- hence/src/lib/parser/ast.rs | 17 +++----- hencelisp/Cargo.toml | 1 + hencelisp/src/bin/main.rs | 3 +- hencelisp/src/lib/lexer.rs | 87 ++++++++++++++++++++++++++++++++++++- 9 files changed, 110 insertions(+), 24 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f4258b0..e65e703 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -117,6 +117,7 @@ version = "0.1.0" dependencies = [ "clap", "hence", + "itertools", ] [[package]] diff --git a/examples/test.lisp b/examples/test.lisp index 4bb6275..7dec60a 100644 --- a/examples/test.lisp +++ b/examples/test.lisp @@ -1,3 +1,4 @@ +;; test module (module test "Main module" (defun main () diff --git a/hence/src/lib/arg.rs b/hence/src/lib/arg.rs index 88531d3..61c9e8d 100644 --- a/hence/src/lib/arg.rs +++ b/hence/src/lib/arg.rs @@ -14,8 +14,8 @@ pub enum Arg { }, } -impl assembler::ToAssembly for Arg { - fn to_assembly(&self) -> String { +impl assembler::ToCode for Arg { + fn to_code(&self) -> String { match self { Arg::String(x) => format!("\"{x}\""), Arg::Number(x) => x.to_string(), @@ -23,9 +23,9 @@ impl assembler::ToAssembly for Arg { Arg::BinaryExpression { left, right, op } => { format!( "({left} {op} {right})", - left = left.to_assembly(), - op = op.to_assembly(), - right = right.to_assembly() + left = left.to_code(), + op = op.to_code(), + right = right.to_code() ) } } @@ -136,8 +136,8 @@ pub enum BinaryExpressionOperator { Pow, } -impl assembler::ToAssembly for BinaryExpressionOperator { - fn to_assembly(&self) -> String { +impl assembler::ToCode for BinaryExpressionOperator { + fn to_code(&self) -> String { match self { BinaryExpressionOperator::Add => "+".to_string(), BinaryExpressionOperator::Sub => "-".to_string(), diff --git a/hence/src/lib/assembler.rs b/hence/src/lib/assembler.rs index 66b8dee..7c8d507 100644 --- a/hence/src/lib/assembler.rs +++ b/hence/src/lib/assembler.rs @@ -5,8 +5,8 @@ use std::collections::HashMap; use crate::arg; use crate::parser; -pub trait ToAssembly { - fn to_assembly(&self) -> String; +pub trait ToCode { + fn to_code(&self) -> String; } pub trait ByteResolvable { @@ -91,7 +91,7 @@ pub fn assemble(ast: parser::ast::AST, data: &mut Data) -> Result<(), String> { for arg in args { let bytes = arg.resolve_bytes(data).unwrap(); - println!("{}", arg.to_assembly().replace('\n', "\\n")); + println!("{}", arg.to_code().replace('\n', "\\n")); println!(" => {}", arg.resolve_number(data).unwrap()); println!( " => [{}]", diff --git a/hence/src/lib/lexer.rs b/hence/src/lib/lexer.rs index 90b3ebe..494a509 100644 --- a/hence/src/lib/lexer.rs +++ b/hence/src/lib/lexer.rs @@ -27,8 +27,8 @@ pub enum Token { Whitespace(String), } -impl assembler::ToAssembly for Token { - fn to_assembly(&self) -> String { +impl assembler::ToCode for Token { + fn to_code(&self) -> String { match self { Token::Comment(x) => format!(";{x}"), Token::StringLiteral(x) => format!("\"{x}\""), diff --git a/hence/src/lib/parser/ast.rs b/hence/src/lib/parser/ast.rs index b7c4354..a92dfbb 100644 --- a/hence/src/lib/parser/ast.rs +++ b/hence/src/lib/parser/ast.rs @@ -11,14 +11,14 @@ pub enum Node { MacroCall { name: String, args: Vec }, } -impl assembler::ToAssembly for Node { - fn to_assembly(&self) -> String { +impl assembler::ToCode for Node { + fn to_code(&self) -> String { match self { Node::Comment(x) => format!("; {x}"), Node::Label(x) => format!("{x}:"), Node::Call { name, arg } => { if let Some(a) = arg { - format!("{name} {arg}", arg = a.to_assembly()) + format!("{name} {arg}", arg = a.to_code()) } else { name.clone() } @@ -27,10 +27,7 @@ impl assembler::ToAssembly for Node { if args.is_empty() { format!(".{name}") } else { - format!( - ".{name} {}", - args.iter().map(|a| a.to_assembly()).join(", ") - ) + format!(".{name} {}", args.iter().map(|a| a.to_code()).join(", ")) } } } @@ -42,8 +39,8 @@ pub struct AST { pub body: Vec, } -impl assembler::ToAssembly for AST { - fn to_assembly(&self) -> String { - self.body.iter().map(|n| n.to_assembly()).join("\n") +impl assembler::ToCode for AST { + fn to_code(&self) -> String { + self.body.iter().map(|n| n.to_code()).join("\n") } } diff --git a/hencelisp/Cargo.toml b/hencelisp/Cargo.toml index aa6995e..e8f1348 100644 --- a/hencelisp/Cargo.toml +++ b/hencelisp/Cargo.toml @@ -15,3 +15,4 @@ path = "src/bin/main.rs" [dependencies] hence = { path = "../hence" } clap = { version = "3.2.12", features = ["derive"] } +itertools = "0.10.3" diff --git a/hencelisp/src/bin/main.rs b/hencelisp/src/bin/main.rs index 87b115e..cb1dba3 100644 --- a/hencelisp/src/bin/main.rs +++ b/hencelisp/src/bin/main.rs @@ -23,7 +23,8 @@ fn main() { match args.commands { Commands::Lex { src } => { let source = fs::read_to_string(src).unwrap(); - println!("{source}"); + let tokens = lexer::lex(source).unwrap(); + dbg!(tokens); } } } diff --git a/hencelisp/src/lib/lexer.rs b/hencelisp/src/lib/lexer.rs index 67e8b91..f3ccc70 100644 --- a/hencelisp/src/lib/lexer.rs +++ b/hencelisp/src/lib/lexer.rs @@ -1 +1,86 @@ -pub fn lex(source: String) {} +use hence; +use itertools::Itertools; + +#[derive(Debug)] +pub enum Token { + Comment(String), + MultiLineComment(String), + + Newline(String), + Whitespace(String), + + LParen, + RParen, + + StringLiteral(String), + Number(String), + Literal(String), +} + +impl hence::assembler::ToCode for Token { + fn to_code(&self) -> String { + match self { + Token::Comment(x) => format!(";;{x}"), + Token::MultiLineComment(x) => format!("#|{x}|#"), + Token::Newline(x) | Token::Whitespace(x) => x.clone(), + Token::LParen => "(".to_string(), + Token::RParen => ")".to_string(), + Token::StringLiteral(x) => format!("\"{x}\""), + Token::Number(x) | Token::Literal(x) => x.clone(), + } + } +} + +pub fn lex(source: String) -> Result, String> { + let mut chars = source.chars().peekable(); + let mut tokens: Vec = Vec::new(); + + while let Some(&ch) = chars.peek() { + match ch { + ';' => { + chars.next(); + chars.next_if(|c| *c == ';'); + + tokens.push(Token::Comment( + chars.peeking_take_while(|c| *c != '\n').collect(), + )); + } + '\n' => { + tokens.push(Token::Newline( + chars.peeking_take_while(|c| *c == '\n').collect(), + )); + } + _ if ch.is_whitespace() => { + tokens.push(Token::Whitespace( + chars + .peeking_take_while(|c| c.is_whitespace() && *c != '\n') + .collect(), + )); + } + '(' => { + tokens.push(Token::LParen); + chars.next(); + } + ')' => { + tokens.push(Token::RParen); + chars.next(); + } + '"' => { + chars.next(); + tokens.push(Token::StringLiteral( + chars.by_ref().take_while(|c| *c != '"').collect(), + )); + } + _ => { + dbg!(ch); + tokens.push(Token::Literal( + chars + .peeking_take_while(|c| !c.is_whitespace() && *c != '(' && *c != ')') + .collect(), + )); + } + } + } + + Ok(tokens) +}