Start writing forth compiler

This commit is contained in:
Dominic Grimm 2022-09-03 15:14:58 +02:00
parent 173a857a5a
commit ec7a147ec9
No known key found for this signature in database
GPG key ID: A6C051C716D2CE65
27 changed files with 790 additions and 221 deletions

24
henceforth/Cargo.toml Normal file
View file

@ -0,0 +1,24 @@
[package]
name = "henceforth"
version = "0.1.0"
edition = "2021"
authors = ["Dominic Grimm <dominic@dergrimm.net>"]
repository = "https://git.dergrimm.net/dergrimm/hence.git"
[lib]
name = "henceforth"
path = "src/lib/lib.rs"
[[bin]]
name = "henceforth"
path = "src/bin/main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
hence = { path = "../hence" }
clap = { version = "3.2.16", features = ["derive"] }
anyhow = { version = "1.0.62", features = ["backtrace"] }
itertools = "0.10.2"
num-parse = "0.1.2"
sailfish = "0.4.0"

View file

@ -0,0 +1,3 @@
40 2 + drop drop
: test ( -- 42 ) 40 2 + ;
test .

View file

@ -0,0 +1,74 @@
use anyhow::Result;
use clap::{Parser, Subcommand};
use std::fs;
use henceforth::*;
#[derive(Debug, Parser)]
#[clap(author, version, about, long_about = None)]
struct Cli {
#[clap(subcommand)]
commands: Commands,
}
#[derive(Debug, Subcommand)]
enum Commands {
#[clap(about = "Lexes source code and outputs tokens")]
Lex {
#[clap(value_parser)]
src: String,
},
#[clap(about = "Parses source code and outputs AST")]
Parse {
#[clap(value_parser)]
src: String,
},
#[clap(about = "Compiles assembly from source code")]
Compile {
#[clap(value_parser)]
src: String,
#[clap(value_parser)]
out: Option<String>,
#[clap(long, action)]
dump: bool,
},
}
fn main() -> Result<()> {
let args = Cli::parse();
match args.commands {
Commands::Lex { src } => {
let source = fs::read_to_string(src)?;
let tokens = lexer::lex(source)?;
dbg!(tokens);
Ok(())
}
Commands::Parse { src } => {
let source = fs::read_to_string(src)?;
let tokens = lexer::lex(source)?;
let body = parser::parse(tokens)?;
dbg!(body);
Ok(())
}
Commands::Compile { src, out, dump } => {
let source = fs::read_to_string(&src)?;
let tokens = lexer::lex(source)?;
let ast = parser::parse(tokens)?;
let assembly = compiler::compile(ast)?;
if let Some(x) = out {
fs::write(x, &assembly)?;
}
if dump {
println!("{}", assembly);
}
Ok(())
}
}
}
#[cfg(test)]
mod tests {}

View file

@ -0,0 +1,113 @@
use std::collections::HashMap;
use anyhow::{bail, Result};
use sailfish::TemplateOnce;
use crate::parser;
#[derive(Debug, Clone, PartialEq)]
pub enum Instruction {
Push(u16),
Drop,
Add,
Sub,
Dot,
Call(String),
}
impl Instruction {
pub fn from_word(word: &str) -> Option<Self> {
match word {
"drop" => Some(Instruction::Drop),
"+" => Some(Instruction::Add),
"-" => Some(Instruction::Sub),
"." => Some(Instruction::Dot),
_ => None,
}
}
}
#[derive(Debug)]
pub struct Word {
id: u16,
instructions: Vec<Instruction>,
}
#[derive(Debug)]
pub struct Data {
instructions: Vec<Instruction>,
words: HashMap<String, Word>,
}
impl Data {
fn new() -> Self {
Self {
instructions: vec![],
words: HashMap::new(),
}
}
pub fn generate_instructions(&mut self, body: parser::ast::Body) -> Result<()> {
for node in body {
match node {
parser::ast::Node::Comment(_) => {}
parser::ast::Node::String { mode, string } => {}
parser::ast::Node::Number(x) => {
self.instructions.push(Instruction::Push(x as u16));
}
parser::ast::Node::WordDefinition {
name,
stack: _,
body,
} => {
if Instruction::from_word(&name).is_some() {
bail!("Word already exists as compiler instruction: {}", name);
} else if self.words.contains_key(&name) {
bail!("Word already exists as user word definition: {}", name);
}
let pre_instructions = self.instructions.clone();
self.instructions.clear();
self.generate_instructions(body)?;
let instructions = self.instructions.clone();
self.instructions = pre_instructions;
self.words.insert(
name,
Word {
id: self.words.len() as u16,
instructions,
},
);
}
parser::ast::Node::Word(x) => {
if let Some(ins) = Instruction::from_word(&x) {
self.instructions.push(ins);
} else if self.words.contains_key(&x) {
self.instructions.push(Instruction::Call(x));
} else {
bail!("Word does not exist: {}", x);
}
}
}
}
Ok(())
}
}
#[derive(TemplateOnce)]
#[template(path = "assembly.asm")]
pub struct Template {
pub data: Data,
}
pub fn compile(ast: parser::ast::AST) -> Result<String> {
let mut data = Data::new();
data.generate_instructions(ast.body)?;
dbg!(&data);
Ok(Template { data }.render_once()?)
}

View file

@ -0,0 +1,74 @@
use anyhow::Result;
use hence::assembler::ToCode;
use itertools::Itertools;
#[derive(Debug)]
pub enum Token {
Newline(String),
Whitespace(String),
ParenComment(String),
BackslashComment(String),
DoubleDashComment(String),
StringLiteral { mode: String, string: String },
Number(String),
Word(String),
}
impl ToCode for Token {
fn to_code(&self) -> String {
match self {
Token::Newline(x) | Token::Whitespace(x) => x.clone(),
Token::ParenComment(x) => format!("( {})", x),
Token::BackslashComment(x) => format!("\\{}", x),
Token::DoubleDashComment(x) => format!("-- {}", x),
Token::StringLiteral { mode, string } => format!("{}\" {}\"", mode, string),
Token::Number(x) | Token::Word(x) => x.clone(),
}
}
}
pub fn is_space(c: char) -> bool {
c.is_whitespace() || c == '\n'
}
pub fn lex(source: String) -> Result<Vec<Token>> {
let mut chars = source.chars().peekable();
let mut tokens: Vec<Token> = vec![];
while let Some(&c) = chars.peek() {
tokens.push(match c {
'\n' => Token::Newline(chars.peeking_take_while(|&c| c == '\n').collect()),
_ if c.is_whitespace() => {
Token::Whitespace(chars.peeking_take_while(|&c| c.is_whitespace()).collect())
}
'\\' => {
chars.next();
Token::BackslashComment(chars.peeking_take_while(|&c| c != '\n').collect())
}
_ if c.is_numeric() => {
Token::Number(chars.peeking_take_while(|&c| !is_space(c)).collect())
}
_ => {
let x: String = chars.peeking_take_while(|&c| !is_space(c)).collect();
match x.as_str() {
"(" => Token::ParenComment(
chars.by_ref().skip(1).take_while(|&c| c != ')').collect(),
),
"--" => Token::DoubleDashComment(
chars.by_ref().take_while(|&c| c != '\n').collect(),
),
_ if x.ends_with('"') => Token::StringLiteral {
mode: x.chars().take(x.len() - 1).collect(),
string: chars.by_ref().skip(1).take_while(|&c| c != '"').collect(),
},
_ => Token::Word(x),
}
}
});
}
Ok(tokens)
}

View file

@ -0,0 +1,3 @@
pub mod compiler;
pub mod lexer;
pub mod parser;

View file

@ -0,0 +1,107 @@
use anyhow::{bail, Result};
use num_parse;
use crate::lexer;
pub mod ast;
fn process_raw_stack_result(s: Option<&str>) -> Vec<String> {
match s {
Some(x) if !x.trim().is_empty() => {
x.split_whitespace().map(|x| x.trim().to_string()).collect()
}
_ => vec![],
}
}
pub fn parse_stack_result(s: String) -> ast::StackResult {
let mut splitter = s.splitn(2, "--");
ast::StackResult {
before: process_raw_stack_result(splitter.next()),
after: process_raw_stack_result(splitter.next()),
}
}
pub fn parse(tokens: Vec<lexer::Token>) -> Result<ast::AST> {
let mut iter = tokens.into_iter().peekable();
let mut body: ast::Body = vec![];
while let Some(token) = iter.next() {
match token {
lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {}
lexer::Token::ParenComment(x)
| lexer::Token::BackslashComment(x)
| lexer::Token::DoubleDashComment(x) => {
body.push(ast::Node::Comment(x.trim().to_string()));
}
lexer::Token::StringLiteral { mode, string } => {
body.push(ast::Node::String { mode, string });
}
lexer::Token::Number(x) => match num_parse::parse_int::<i32>(x.as_str()) {
Some(n) => {
body.push(ast::Node::Number(n));
}
None => bail!("Invalid number: {}", x),
},
lexer::Token::Word(x) => match x.as_str() {
":" => {
let mut depth: usize = 1;
let mut content = iter
.by_ref()
.take_while(|t| match t {
lexer::Token::Word(x) => match x.as_str() {
":" => {
depth += 1;
true
}
";" => {
depth -= 1;
depth != 0
}
_ => true,
},
_ => true,
})
.collect::<Vec<_>>()
.into_iter();
if depth != 0 {
bail!("Unbalanced word definitions");
}
let name = match content.find(|t| {
!matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
}) {
Some(t) => match t {
lexer::Token::Word(x) => x.clone(),
_ => bail!("Word definition name must be a word itself: {:?}", t),
},
None => bail!("Word definition can not be empty"),
};
let stack = match content.find(|t| {
!matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
}) {
Some(t) => match t {
lexer::Token::ParenComment(x)
| lexer::Token::BackslashComment(x)
| lexer::Token::DoubleDashComment(x) => Some(parse_stack_result(x)),
_ => None,
},
None => None,
};
body.push(ast::Node::WordDefinition {
name,
stack,
body: parse(content.collect())?.body,
});
}
_ => {
body.push(ast::Node::Word(x));
}
},
}
}
Ok(ast::AST { body })
}

View file

@ -0,0 +1,75 @@
use hence::assembler::ToCode;
use itertools::Itertools;
#[derive(Debug)]
pub struct StackResult {
pub before: Vec<String>,
pub after: Vec<String>,
}
impl ToCode for StackResult {
fn to_code(&self) -> String {
format!(
"{}--{}",
if self.before.is_empty() {
"".to_string()
} else {
format!("{} ", self.before.join(" "))
},
if self.after.is_empty() {
"".to_string()
} else {
format!("{} ", self.after.join(" "))
}
)
}
}
#[derive(Debug)]
pub enum Node {
Comment(String),
String {
mode: String,
string: String,
},
Number(i32),
WordDefinition {
name: String,
stack: Option<StackResult>,
body: Body,
},
Word(String),
}
impl ToCode for Node {
fn to_code(&self) -> String {
match self {
Node::Comment(x) => format!("\\ {}", x),
Node::String { mode, string } => format!("{}\" {}\"", mode, string),
Node::Number(x) => x.to_string(),
Node::WordDefinition { name, stack, body } => format!(
": {}{} {} ;",
name,
match stack {
Some(x) => format!(" {}", x.to_code()),
None => "".to_string(),
},
body.iter().map(|x| x.to_code()).join(" ")
),
Node::Word(x) => x.clone(),
}
}
}
pub type Body = Vec<Node>;
#[derive(Debug)]
pub struct AST {
pub body: Body,
}
impl ToCode for AST {
fn to_code(&self) -> String {
self.body.iter().map(|x| x.to_code()).join(" ")
}
}

View file

@ -0,0 +1,10 @@
.include "$lib/core.asm"
.include "$lib/std.asm"
.include "$lib/main.asm"
.jump_main
data:
.main
.std_stop