Start writing forth compiler
This commit is contained in:
parent
173a857a5a
commit
ec7a147ec9
27 changed files with 790 additions and 221 deletions
24
henceforth/Cargo.toml
Normal file
24
henceforth/Cargo.toml
Normal file
|
@ -0,0 +1,24 @@
|
|||
[package]
|
||||
name = "henceforth"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
authors = ["Dominic Grimm <dominic@dergrimm.net>"]
|
||||
repository = "https://git.dergrimm.net/dergrimm/hence.git"
|
||||
|
||||
[lib]
|
||||
name = "henceforth"
|
||||
path = "src/lib/lib.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "henceforth"
|
||||
path = "src/bin/main.rs"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
hence = { path = "../hence" }
|
||||
clap = { version = "3.2.16", features = ["derive"] }
|
||||
anyhow = { version = "1.0.62", features = ["backtrace"] }
|
||||
itertools = "0.10.2"
|
||||
num-parse = "0.1.2"
|
||||
sailfish = "0.4.0"
|
3
henceforth/examples/test.fth
Normal file
3
henceforth/examples/test.fth
Normal file
|
@ -0,0 +1,3 @@
|
|||
40 2 + drop drop
|
||||
: test ( -- 42 ) 40 2 + ;
|
||||
test .
|
74
henceforth/src/bin/main.rs
Normal file
74
henceforth/src/bin/main.rs
Normal file
|
@ -0,0 +1,74 @@
|
|||
use anyhow::Result;
|
||||
use clap::{Parser, Subcommand};
|
||||
use std::fs;
|
||||
|
||||
use henceforth::*;
|
||||
|
||||
#[derive(Debug, Parser)]
|
||||
#[clap(author, version, about, long_about = None)]
|
||||
struct Cli {
|
||||
#[clap(subcommand)]
|
||||
commands: Commands,
|
||||
}
|
||||
|
||||
#[derive(Debug, Subcommand)]
|
||||
enum Commands {
|
||||
#[clap(about = "Lexes source code and outputs tokens")]
|
||||
Lex {
|
||||
#[clap(value_parser)]
|
||||
src: String,
|
||||
},
|
||||
#[clap(about = "Parses source code and outputs AST")]
|
||||
Parse {
|
||||
#[clap(value_parser)]
|
||||
src: String,
|
||||
},
|
||||
#[clap(about = "Compiles assembly from source code")]
|
||||
Compile {
|
||||
#[clap(value_parser)]
|
||||
src: String,
|
||||
#[clap(value_parser)]
|
||||
out: Option<String>,
|
||||
#[clap(long, action)]
|
||||
dump: bool,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> Result<()> {
|
||||
let args = Cli::parse();
|
||||
match args.commands {
|
||||
Commands::Lex { src } => {
|
||||
let source = fs::read_to_string(src)?;
|
||||
let tokens = lexer::lex(source)?;
|
||||
dbg!(tokens);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Commands::Parse { src } => {
|
||||
let source = fs::read_to_string(src)?;
|
||||
let tokens = lexer::lex(source)?;
|
||||
let body = parser::parse(tokens)?;
|
||||
dbg!(body);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Commands::Compile { src, out, dump } => {
|
||||
let source = fs::read_to_string(&src)?;
|
||||
let tokens = lexer::lex(source)?;
|
||||
let ast = parser::parse(tokens)?;
|
||||
let assembly = compiler::compile(ast)?;
|
||||
|
||||
if let Some(x) = out {
|
||||
fs::write(x, &assembly)?;
|
||||
}
|
||||
if dump {
|
||||
println!("{}", assembly);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {}
|
113
henceforth/src/lib/compiler.rs
Normal file
113
henceforth/src/lib/compiler.rs
Normal file
|
@ -0,0 +1,113 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use sailfish::TemplateOnce;
|
||||
|
||||
use crate::parser;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum Instruction {
|
||||
Push(u16),
|
||||
|
||||
Drop,
|
||||
Add,
|
||||
Sub,
|
||||
Dot,
|
||||
|
||||
Call(String),
|
||||
}
|
||||
|
||||
impl Instruction {
|
||||
pub fn from_word(word: &str) -> Option<Self> {
|
||||
match word {
|
||||
"drop" => Some(Instruction::Drop),
|
||||
"+" => Some(Instruction::Add),
|
||||
"-" => Some(Instruction::Sub),
|
||||
"." => Some(Instruction::Dot),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Word {
|
||||
id: u16,
|
||||
instructions: Vec<Instruction>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Data {
|
||||
instructions: Vec<Instruction>,
|
||||
words: HashMap<String, Word>,
|
||||
}
|
||||
|
||||
impl Data {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
instructions: vec![],
|
||||
words: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn generate_instructions(&mut self, body: parser::ast::Body) -> Result<()> {
|
||||
for node in body {
|
||||
match node {
|
||||
parser::ast::Node::Comment(_) => {}
|
||||
parser::ast::Node::String { mode, string } => {}
|
||||
parser::ast::Node::Number(x) => {
|
||||
self.instructions.push(Instruction::Push(x as u16));
|
||||
}
|
||||
parser::ast::Node::WordDefinition {
|
||||
name,
|
||||
stack: _,
|
||||
body,
|
||||
} => {
|
||||
if Instruction::from_word(&name).is_some() {
|
||||
bail!("Word already exists as compiler instruction: {}", name);
|
||||
} else if self.words.contains_key(&name) {
|
||||
bail!("Word already exists as user word definition: {}", name);
|
||||
}
|
||||
|
||||
let pre_instructions = self.instructions.clone();
|
||||
self.instructions.clear();
|
||||
self.generate_instructions(body)?;
|
||||
let instructions = self.instructions.clone();
|
||||
self.instructions = pre_instructions;
|
||||
|
||||
self.words.insert(
|
||||
name,
|
||||
Word {
|
||||
id: self.words.len() as u16,
|
||||
instructions,
|
||||
},
|
||||
);
|
||||
}
|
||||
parser::ast::Node::Word(x) => {
|
||||
if let Some(ins) = Instruction::from_word(&x) {
|
||||
self.instructions.push(ins);
|
||||
} else if self.words.contains_key(&x) {
|
||||
self.instructions.push(Instruction::Call(x));
|
||||
} else {
|
||||
bail!("Word does not exist: {}", x);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(TemplateOnce)]
|
||||
#[template(path = "assembly.asm")]
|
||||
pub struct Template {
|
||||
pub data: Data,
|
||||
}
|
||||
|
||||
pub fn compile(ast: parser::ast::AST) -> Result<String> {
|
||||
let mut data = Data::new();
|
||||
data.generate_instructions(ast.body)?;
|
||||
dbg!(&data);
|
||||
|
||||
Ok(Template { data }.render_once()?)
|
||||
}
|
74
henceforth/src/lib/lexer.rs
Normal file
74
henceforth/src/lib/lexer.rs
Normal file
|
@ -0,0 +1,74 @@
|
|||
use anyhow::Result;
|
||||
use hence::assembler::ToCode;
|
||||
use itertools::Itertools;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Token {
|
||||
Newline(String),
|
||||
Whitespace(String),
|
||||
|
||||
ParenComment(String),
|
||||
BackslashComment(String),
|
||||
DoubleDashComment(String),
|
||||
|
||||
StringLiteral { mode: String, string: String },
|
||||
Number(String),
|
||||
Word(String),
|
||||
}
|
||||
|
||||
impl ToCode for Token {
|
||||
fn to_code(&self) -> String {
|
||||
match self {
|
||||
Token::Newline(x) | Token::Whitespace(x) => x.clone(),
|
||||
Token::ParenComment(x) => format!("( {})", x),
|
||||
Token::BackslashComment(x) => format!("\\{}", x),
|
||||
Token::DoubleDashComment(x) => format!("-- {}", x),
|
||||
Token::StringLiteral { mode, string } => format!("{}\" {}\"", mode, string),
|
||||
Token::Number(x) | Token::Word(x) => x.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_space(c: char) -> bool {
|
||||
c.is_whitespace() || c == '\n'
|
||||
}
|
||||
|
||||
pub fn lex(source: String) -> Result<Vec<Token>> {
|
||||
let mut chars = source.chars().peekable();
|
||||
let mut tokens: Vec<Token> = vec![];
|
||||
|
||||
while let Some(&c) = chars.peek() {
|
||||
tokens.push(match c {
|
||||
'\n' => Token::Newline(chars.peeking_take_while(|&c| c == '\n').collect()),
|
||||
_ if c.is_whitespace() => {
|
||||
Token::Whitespace(chars.peeking_take_while(|&c| c.is_whitespace()).collect())
|
||||
}
|
||||
'\\' => {
|
||||
chars.next();
|
||||
Token::BackslashComment(chars.peeking_take_while(|&c| c != '\n').collect())
|
||||
}
|
||||
_ if c.is_numeric() => {
|
||||
Token::Number(chars.peeking_take_while(|&c| !is_space(c)).collect())
|
||||
}
|
||||
_ => {
|
||||
let x: String = chars.peeking_take_while(|&c| !is_space(c)).collect();
|
||||
|
||||
match x.as_str() {
|
||||
"(" => Token::ParenComment(
|
||||
chars.by_ref().skip(1).take_while(|&c| c != ')').collect(),
|
||||
),
|
||||
"--" => Token::DoubleDashComment(
|
||||
chars.by_ref().take_while(|&c| c != '\n').collect(),
|
||||
),
|
||||
_ if x.ends_with('"') => Token::StringLiteral {
|
||||
mode: x.chars().take(x.len() - 1).collect(),
|
||||
string: chars.by_ref().skip(1).take_while(|&c| c != '"').collect(),
|
||||
},
|
||||
_ => Token::Word(x),
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Ok(tokens)
|
||||
}
|
3
henceforth/src/lib/lib.rs
Normal file
3
henceforth/src/lib/lib.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
pub mod compiler;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
107
henceforth/src/lib/parser.rs
Normal file
107
henceforth/src/lib/parser.rs
Normal file
|
@ -0,0 +1,107 @@
|
|||
use anyhow::{bail, Result};
|
||||
use num_parse;
|
||||
|
||||
use crate::lexer;
|
||||
|
||||
pub mod ast;
|
||||
|
||||
fn process_raw_stack_result(s: Option<&str>) -> Vec<String> {
|
||||
match s {
|
||||
Some(x) if !x.trim().is_empty() => {
|
||||
x.split_whitespace().map(|x| x.trim().to_string()).collect()
|
||||
}
|
||||
_ => vec![],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_stack_result(s: String) -> ast::StackResult {
|
||||
let mut splitter = s.splitn(2, "--");
|
||||
|
||||
ast::StackResult {
|
||||
before: process_raw_stack_result(splitter.next()),
|
||||
after: process_raw_stack_result(splitter.next()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(tokens: Vec<lexer::Token>) -> Result<ast::AST> {
|
||||
let mut iter = tokens.into_iter().peekable();
|
||||
let mut body: ast::Body = vec![];
|
||||
|
||||
while let Some(token) = iter.next() {
|
||||
match token {
|
||||
lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {}
|
||||
lexer::Token::ParenComment(x)
|
||||
| lexer::Token::BackslashComment(x)
|
||||
| lexer::Token::DoubleDashComment(x) => {
|
||||
body.push(ast::Node::Comment(x.trim().to_string()));
|
||||
}
|
||||
lexer::Token::StringLiteral { mode, string } => {
|
||||
body.push(ast::Node::String { mode, string });
|
||||
}
|
||||
lexer::Token::Number(x) => match num_parse::parse_int::<i32>(x.as_str()) {
|
||||
Some(n) => {
|
||||
body.push(ast::Node::Number(n));
|
||||
}
|
||||
None => bail!("Invalid number: {}", x),
|
||||
},
|
||||
lexer::Token::Word(x) => match x.as_str() {
|
||||
":" => {
|
||||
let mut depth: usize = 1;
|
||||
let mut content = iter
|
||||
.by_ref()
|
||||
.take_while(|t| match t {
|
||||
lexer::Token::Word(x) => match x.as_str() {
|
||||
":" => {
|
||||
depth += 1;
|
||||
true
|
||||
}
|
||||
";" => {
|
||||
depth -= 1;
|
||||
depth != 0
|
||||
}
|
||||
_ => true,
|
||||
},
|
||||
_ => true,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.into_iter();
|
||||
if depth != 0 {
|
||||
bail!("Unbalanced word definitions");
|
||||
}
|
||||
|
||||
let name = match content.find(|t| {
|
||||
!matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
|
||||
}) {
|
||||
Some(t) => match t {
|
||||
lexer::Token::Word(x) => x.clone(),
|
||||
_ => bail!("Word definition name must be a word itself: {:?}", t),
|
||||
},
|
||||
None => bail!("Word definition can not be empty"),
|
||||
};
|
||||
let stack = match content.find(|t| {
|
||||
!matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
|
||||
}) {
|
||||
Some(t) => match t {
|
||||
lexer::Token::ParenComment(x)
|
||||
| lexer::Token::BackslashComment(x)
|
||||
| lexer::Token::DoubleDashComment(x) => Some(parse_stack_result(x)),
|
||||
_ => None,
|
||||
},
|
||||
None => None,
|
||||
};
|
||||
|
||||
body.push(ast::Node::WordDefinition {
|
||||
name,
|
||||
stack,
|
||||
body: parse(content.collect())?.body,
|
||||
});
|
||||
}
|
||||
_ => {
|
||||
body.push(ast::Node::Word(x));
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
Ok(ast::AST { body })
|
||||
}
|
75
henceforth/src/lib/parser/ast.rs
Normal file
75
henceforth/src/lib/parser/ast.rs
Normal file
|
@ -0,0 +1,75 @@
|
|||
use hence::assembler::ToCode;
|
||||
use itertools::Itertools;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct StackResult {
|
||||
pub before: Vec<String>,
|
||||
pub after: Vec<String>,
|
||||
}
|
||||
|
||||
impl ToCode for StackResult {
|
||||
fn to_code(&self) -> String {
|
||||
format!(
|
||||
"{}--{}",
|
||||
if self.before.is_empty() {
|
||||
"".to_string()
|
||||
} else {
|
||||
format!("{} ", self.before.join(" "))
|
||||
},
|
||||
if self.after.is_empty() {
|
||||
"".to_string()
|
||||
} else {
|
||||
format!("{} ", self.after.join(" "))
|
||||
}
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Node {
|
||||
Comment(String),
|
||||
String {
|
||||
mode: String,
|
||||
string: String,
|
||||
},
|
||||
Number(i32),
|
||||
WordDefinition {
|
||||
name: String,
|
||||
stack: Option<StackResult>,
|
||||
body: Body,
|
||||
},
|
||||
Word(String),
|
||||
}
|
||||
|
||||
impl ToCode for Node {
|
||||
fn to_code(&self) -> String {
|
||||
match self {
|
||||
Node::Comment(x) => format!("\\ {}", x),
|
||||
Node::String { mode, string } => format!("{}\" {}\"", mode, string),
|
||||
Node::Number(x) => x.to_string(),
|
||||
Node::WordDefinition { name, stack, body } => format!(
|
||||
": {}{} {} ;",
|
||||
name,
|
||||
match stack {
|
||||
Some(x) => format!(" {}", x.to_code()),
|
||||
None => "".to_string(),
|
||||
},
|
||||
body.iter().map(|x| x.to_code()).join(" ")
|
||||
),
|
||||
Node::Word(x) => x.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type Body = Vec<Node>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct AST {
|
||||
pub body: Body,
|
||||
}
|
||||
|
||||
impl ToCode for AST {
|
||||
fn to_code(&self) -> String {
|
||||
self.body.iter().map(|x| x.to_code()).join(" ")
|
||||
}
|
||||
}
|
10
henceforth/templates/assembly.asm
Normal file
10
henceforth/templates/assembly.asm
Normal file
|
@ -0,0 +1,10 @@
|
|||
.include "$lib/core.asm"
|
||||
.include "$lib/std.asm"
|
||||
.include "$lib/main.asm"
|
||||
|
||||
.jump_main
|
||||
|
||||
data:
|
||||
|
||||
.main
|
||||
.std_stop
|
Loading…
Add table
Add a link
Reference in a new issue