Start writing forth compiler

This commit is contained in:
Dominic Grimm 2022-09-03 15:14:58 +02:00
parent 173a857a5a
commit ec7a147ec9
No known key found for this signature in database
GPG key ID: A6C051C716D2CE65
27 changed files with 790 additions and 221 deletions

204
Cargo.lock generated
View file

@ -19,9 +19,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "anyhow"
version = "1.0.62"
version = "1.0.63"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1485d4d2cc45e7b201ee3767015c96faa5904387c9d87c6efdd0fb511f12d305"
checksum = "a26fa4d7e3f2eebadf743988fc8aec9fa9a9e82611acafd77c1462ed6262440a"
dependencies = [
"backtrace",
]
@ -87,9 +87,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "clap"
version = "3.2.17"
version = "3.2.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29e724a68d9319343bb3328c9cc2dfde263f4b3142ee1059a9980580171c954b"
checksum = "23b71c3ce99b7611011217b366d923f1d0a7e07a92bb2dbf1e84508c673ca3bd"
dependencies = [
"atty",
"bitflags",
@ -104,9 +104,9 @@ dependencies = [
[[package]]
name = "clap_derive"
version = "3.2.17"
version = "3.2.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13547f7012c01ab4a0e8f8967730ada8f9fdf419e8b6c792788f39cf4e46eefa"
checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65"
dependencies = [
"heck",
"proc-macro-error",
@ -168,6 +168,18 @@ version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f"
[[package]]
name = "filetime"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e94a7bbaa59354bc20dd75b67f23e2797b4490e9d6928203fb105c79e448c86c"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"windows-sys",
]
[[package]]
name = "generic-array"
version = "0.14.6"
@ -223,6 +235,18 @@ dependencies = [
"unescape",
]
[[package]]
name = "henceforth"
version = "0.1.0"
dependencies = [
"anyhow",
"clap",
"hence",
"itertools",
"num-parse",
"sailfish",
]
[[package]]
name = "hermit-abi"
version = "0.1.19"
@ -232,6 +256,15 @@ dependencies = [
"libc",
]
[[package]]
name = "home"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2456aef2e6b6a9784192ae780c0f15bc57df0e918585282325e8c8ac27737654"
dependencies = [
"winapi",
]
[[package]]
name = "indexmap"
version = "1.9.1"
@ -251,6 +284,12 @@ dependencies = [
"either",
]
[[package]]
name = "itoap"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8"
[[package]]
name = "libc"
version = "0.2.132"
@ -416,18 +455,18 @@ dependencies = [
[[package]]
name = "proc-macro2"
version = "1.0.43"
version = "1.0.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab"
checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
dependencies = [
"unicode-ident",
"unicode-xid",
]
[[package]]
name = "quote"
version = "1.0.21"
version = "1.0.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179"
checksum = "f53dc8cf16a769a6f677e09e7ff2cd4be1ea0f48754aac39520536962011de0d"
dependencies = [
"proc-macro2",
]
@ -468,6 +507,15 @@ dependencies = [
"getrandom",
]
[[package]]
name = "redox_syscall"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a"
dependencies = [
"bitflags",
]
[[package]]
name = "rhexdump"
version = "0.1.1"
@ -514,6 +562,50 @@ version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342"
[[package]]
name = "ryu"
version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09"
[[package]]
name = "sailfish"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "948a7edfc2f03d7c58a097dda25ed29440a72e8528894a6e182fe9171195fed1"
dependencies = [
"itoap",
"ryu",
"sailfish-macros",
"version_check",
]
[[package]]
name = "sailfish-compiler"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3f0a01133d6ce146020e6416ac6a823f813f1cbb30ff77548b4fa20749524947"
dependencies = [
"filetime",
"home",
"memchr",
"proc-macro2",
"quote",
"serde",
"syn",
"toml",
]
[[package]]
name = "sailfish-macros"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "86326c1f1dce0b316e0a47071f683b185417dc64e1a704380b5c706b09e871b1"
dependencies = [
"proc-macro2",
"sailfish-compiler",
]
[[package]]
name = "same-file"
version = "1.0.6"
@ -523,6 +615,26 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "serde"
version = "1.0.144"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.144"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94ed3a816fb1d101812f83e789f888322c34e291f894f19590dc310963e87a00"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "sha2"
version = "0.9.9"
@ -544,13 +656,13 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "1.0.99"
version = "1.0.94"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13"
checksum = "a07e33e919ebcd69113d5be0e4d70c5707004ff45188910106854f38b960df4a"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
"unicode-xid",
]
[[package]]
@ -578,6 +690,15 @@ version = "0.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb"
[[package]]
name = "toml"
version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7"
dependencies = [
"serde",
]
[[package]]
name = "typenum"
version = "1.15.0"
@ -590,18 +711,18 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccb97dac3243214f8d8507998906ca3e2e0b900bf9bf4870477f125b82e68f6e"
[[package]]
name = "unicode-ident"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf"
[[package]]
name = "unicode-width"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
[[package]]
name = "unicode-xid"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04"
[[package]]
name = "version_check"
version = "0.9.4"
@ -655,3 +776,46 @@ name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "windows-sys"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2"
dependencies = [
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47"
[[package]]
name = "windows_i686_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6"
[[package]]
name = "windows_i686_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024"
[[package]]
name = "windows_x86_64_gnu"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1"
[[package]]
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"

View file

@ -1,28 +1,2 @@
[package]
name = "hence"
version = "0.1.0"
edition = "2021"
authors = ["Dominic Grimm <dominic@dergrimm.net>"]
repository = "https://git.dergrimm.net/dergrimm/hence.git"
[lib]
name = "hence"
path = "src/lib/lib.rs"
[[bin]]
name = "hence"
path = "src/bin/main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
itertools = "0.10.2"
num-parse = "0.1.2"
clap = { version = "3.2.16", features = ["derive"] }
rhexdump = "0.1.1"
radix_fmt = "1"
rand = "0.8.5"
console = "0.15.1"
anyhow = { version = "1.0.62", features = ["backtrace"] }
rust-embed = "6.4.0"
unescape = "0.1.0"
[workspace]
members = ["hence", "henceforth"]

View file

@ -1,6 +0,0 @@
.requires "$lib/core.asm"
.define MEM_INPUT_SIZE, 16
.define MEM_INPUT_DYN_END, CORE_MEM_MEM
.define MEM_INPUT_START, (MEM_INPUT_DYN_END + 1)
.define MEM_INPUT_END, (MEM_INPUT_START + MEM_INPUT_SIZE)

View file

@ -1,64 +0,0 @@
.include "$lib/core.asm"
.include "$lib/std.asm"
.include "$lib/main.asm"
.include "constants.asm"
.jump_main
data:
.main
.std_rset CORE_REG_C, MEM_INPUT_START
get_input_loop:
.std_get CORE_MEM_KEY
tlr CORE_REG_A
@ tlr CORE_REG_D
.std_set CORE_MEM_CHR
tsr CORE_REG_C
set
@ .std_rset CORE_REG_B, ' '
@ .std_alu CORE_ALU_EQ
@ tlr CORE_REG_A
@ .std_cond_jump get_input_loop
@ .std_cp CORE_REG_D, CORE_REG_A
tlr CORE_REG_A
.std_rset CORE_REG_B, 1
.std_alu CORE_ALU_ADD
tlr CORE_REG_A
tlr CORE_REG_C
.std_rset CORE_REG_B, MEM_INPUT_END
.std_alu CORE_ALU_LT
tlr CORE_REG_A
.std_cond_jump get_input_loop
.std_cp CORE_REG_C, CORE_REG_A
.std_set MEM_INPUT_DYN_END
.std_rset CORE_REG_A, '\n'
.std_set CORE_MEM_CHR
.std_rset CORE_REG_B, MEM_INPUT_START
.std_get MEM_INPUT_DYN_END
tlr CORE_REG_D
print_loop:
tsr CORE_REG_B
get
tlr CORE_REG_A
.std_set CORE_MEM_CHR
.std_rset CORE_REG_A, 1
.std_alu CORE_ALU_ADD
tlr CORE_REG_B
.std_cp CORE_REG_D, CORE_REG_A
.std_alu CORE_ALU_GT
tlr CORE_REG_A
.std_cond_jump print_loop
.std_rset CORE_REG_A, '\n'
.std_set CORE_MEM_CHR
.std_stop

28
hence/Cargo.toml Normal file
View file

@ -0,0 +1,28 @@
[package]
name = "hence"
version = "0.1.0"
edition = "2021"
authors = ["Dominic Grimm <dominic@dergrimm.net>"]
repository = "https://git.dergrimm.net/dergrimm/hence.git"
[lib]
name = "hence"
path = "src/lib/lib.rs"
[[bin]]
name = "hence"
path = "src/bin/main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
itertools = "0.10.2"
num-parse = "0.1.2"
clap = { version = "3.2.16", features = ["derive"] }
rhexdump = "0.1.1"
radix_fmt = "1"
rand = "0.8.5"
console = "0.15.1"
anyhow = { version = "1.0.62", features = ["backtrace"] }
rust-embed = "6.4.0"
unescape = "0.1.0"

View file

@ -1,4 +1,5 @@
; hence core lib
core:
.define NULL, 0x0000
.define VOID, NULL

View file

@ -3,16 +3,16 @@
.requires "$lib/core.asm"
.requires "$lib/std.asm"
.define main_local_jump_main, (CORE_MEM_ST - 3 - 1)
.define lib_main_local_jump_main, (CORE_MEM_ST - 3 - 1)
.macro jump_main
.std_jump main_local_jump_main
.std_jump lib_main_local_jump_main
.endmacro
.macro main
main:
.org main_local_jump_main
.org lib_main_local_jump_main
ts main
tlr CORE_REG_PC
.org main

62
hence/lib/std.asm Normal file
View file

@ -0,0 +1,62 @@
; hence standard lib
.requires "$lib/core.asm"
std:
.macro std_tclr
ts NULL
.endmacro
.macro std_rclr, lib_std_rclr_arg_0_reg
ts NULL
tlr lib_std_rclr_arg_0_reg
.endmacro
.macro std_alu, lib_std_alu_arg_0_op
ts lib_std_alu_arg_0_op
alu
.endmacro
.macro std_get, lib_std_get_arg_0_addr
ts lib_std_get_arg_0_addr
get
.endmacro
.macro std_set, lib_std_set_arg_0_addr
ts lib_std_set_arg_0_addr
set
.endmacro
.macro std_cp, lib_std_cp_arg_0_from, lib_std_cp_arg_1_to
tsr lib_std_cp_arg_0_from
tlr lib_std_cp_arg_1_to
.endmacro
.macro std_mv, lib_std_mv_arg_0_from, lib_std_cp_arg_1_to
tsr lib_std_cp_arg_0_from
tlr lib_std_cp_arg_1_to
.std_rclr lib_std_cp_arg_1_to
.endmacro
.macro std_rset, lib_std_init_arg_0_reg, lib_std_init_arg_1_val
ts lib_std_init_arg_1_val
tlr lib_std_init_arg_0_reg
.endmacro
.macro std_jump, lib_std_jump_arg_0_label
.std_rset CORE_REG_PC, lib_std_jump_arg_0_label
.endmacro
.macro std_cond_jump, lib_std_cond_jump_arg_0_label
ts lib_std_cond_jump_arg_0_label
tlrc CORE_REG_PC
.endmacro
.macro std_stop
.std_rset CORE_REG_PC, 0xffff
.endmacro
.macro std_inc, lib_std_inc_arg_0_reg
.std_rset lib_std_inc_arg_0_reg, 1
.std_alu CORE_ALU_ADD
.endmacro

View file

@ -27,20 +27,15 @@ pub enum Arg {
},
}
impl assembler::ToAssembly for Arg {
fn to_assembly(&self) -> String {
impl assembler::ToCode for Arg {
fn to_code(&self) -> String {
match self {
Arg::Char(x) => format!("'{}'", x),
Arg::String(x) => format!("\"{}\"", x),
Arg::Number(x) => x.to_string(),
Arg::Variable(x) => x.clone(),
Arg::BinaryExpression { left, right, op } => {
format!(
"({} {} {})",
left.to_assembly(),
op.to_assembly(),
right.to_assembly()
)
format!("({} {} {})", left.to_code(), op.to_code(), right.to_code())
}
}
}
@ -199,8 +194,8 @@ pub enum BinaryExpressionOperator {
Rnd,
}
impl assembler::ToAssembly for BinaryExpressionOperator {
fn to_assembly(&self) -> String {
impl assembler::ToCode for BinaryExpressionOperator {
fn to_code(&self) -> String {
match self {
BinaryExpressionOperator::Not => "~".to_string(),
BinaryExpressionOperator::And => "&".to_string(),

View file

@ -11,8 +11,8 @@ use crate::arg;
use crate::lexer;
use crate::parser;
pub trait ToAssembly {
fn to_assembly(&self) -> String;
pub trait ToCode {
fn to_code(&self) -> String;
}
pub trait ByteResolvable<T> {
@ -114,7 +114,7 @@ impl Data {
parser::ast::Node::MacroCall { name, args } => match name.as_str() {
"debug" => {
for arg in args {
let assembly = arg.to_assembly().replace('\n', "\\n");
let assembly = arg.to_code().replace('\n', "\\n");
let num = arg.resolve_number(self)?;
let bytes = arg.resolve_bytes(self)?;
@ -356,7 +356,7 @@ impl Data {
call_args = if args.is_empty() {
"".to_string()
} else {
format!(" {}", args.iter().map(|a| a.to_assembly()).join(", "))
format!(" {}", args.iter().map(|a| a.to_code()).join(", "))
}
);
}

View file

@ -3,7 +3,7 @@ use itertools::Itertools;
use crate::assembler;
#[derive(Debug, PartialEq)]
#[derive(Debug, PartialEq, Eq)]
pub enum Token {
Comment(String),
@ -47,8 +47,8 @@ pub enum Token {
Whitespace(String),
}
impl assembler::ToAssembly for Token {
fn to_assembly(&self) -> String {
impl assembler::ToCode for Token {
fn to_code(&self) -> String {
match self {
Token::Comment(x) => format!(";{}", x),
Token::CharLiteral(x) => format!("'{}'", x),
@ -283,32 +283,29 @@ pub fn lex(source: String) -> Result<Vec<Token>> {
#[cfg(test)]
mod tests {
use super::*;
use crate::assembler::ToAssembly;
use crate::assembler::ToCode;
#[test]
fn test_token_to_assembly() {
assert_eq!(
Token::Comment(" \"main function\" like definition macro".to_string()).to_assembly(),
Token::Comment(" \"main function\" like definition macro".to_string()).to_code(),
"; \"main function\" like definition macro".to_string()
);
assert_eq!(
Token::CharLiteral("\\n".to_string()).to_assembly(),
Token::CharLiteral("\\n".to_string()).to_code(),
"'\\n'".to_string()
);
assert_eq!(
Token::MacroLiteral("xyz".to_string()).to_assembly(),
Token::MacroLiteral("xyz".to_string()).to_code(),
"xyz".to_string()
);
assert_eq!(
Token::Literal("xkcd".to_string()).to_assembly(),
Token::Literal("xkcd".to_string()).to_code(),
"xkcd".to_string()
);
assert_eq!(Token::Newline("\n".to_string()).to_code(), "\n".to_string());
assert_eq!(
Token::Newline("\n".to_string()).to_assembly(),
"\n".to_string()
);
assert_eq!(
Token::Whitespace(" ".to_string()).to_assembly(),
Token::Whitespace(" ".to_string()).to_code(),
" ".to_string()
);
}

View file

@ -8,7 +8,7 @@ pub mod ast;
pub fn parse(tokens: Vec<lexer::Token>) -> Result<ast::AST> {
let mut iter = tokens.iter().peekable();
let mut body: Vec<ast::Node> = Vec::new();
let mut body: ast::Body = vec![];
while let Some(&token) = iter.peek() {
match token {

View file

@ -11,14 +11,14 @@ pub enum Node {
MacroCall { name: String, args: Vec<arg::Arg> },
}
impl assembler::ToAssembly for Node {
fn to_assembly(&self) -> String {
impl assembler::ToCode for Node {
fn to_code(&self) -> String {
match self {
Node::Comment(x) => format!("; {x}"),
Node::Label(x) => format!("{x}:"),
Node::Call { name, arg } => {
if let Some(a) = arg {
format!("{name} {arg}", arg = a.to_assembly())
format!("{name} {arg}", arg = a.to_code())
} else {
name.clone()
}
@ -27,10 +27,7 @@ impl assembler::ToAssembly for Node {
if args.is_empty() {
format!(".{name}")
} else {
format!(
".{name} {}",
args.iter().map(|a| a.to_assembly()).join(", ")
)
format!(".{name} {}", args.iter().map(|a| a.to_code()).join(", "))
}
}
}
@ -44,8 +41,8 @@ pub struct AST {
pub body: Vec<Node>,
}
impl assembler::ToAssembly for AST {
fn to_assembly(&self) -> String {
self.body.iter().map(|n| n.to_assembly()).join("\n")
impl assembler::ToCode for AST {
fn to_code(&self) -> String {
self.body.iter().map(|n| n.to_code()).join("\n")
}
}

24
henceforth/Cargo.toml Normal file
View file

@ -0,0 +1,24 @@
[package]
name = "henceforth"
version = "0.1.0"
edition = "2021"
authors = ["Dominic Grimm <dominic@dergrimm.net>"]
repository = "https://git.dergrimm.net/dergrimm/hence.git"
[lib]
name = "henceforth"
path = "src/lib/lib.rs"
[[bin]]
name = "henceforth"
path = "src/bin/main.rs"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
hence = { path = "../hence" }
clap = { version = "3.2.16", features = ["derive"] }
anyhow = { version = "1.0.62", features = ["backtrace"] }
itertools = "0.10.2"
num-parse = "0.1.2"
sailfish = "0.4.0"

View file

@ -0,0 +1,3 @@
40 2 + drop drop
: test ( -- 42 ) 40 2 + ;
test .

View file

@ -0,0 +1,74 @@
use anyhow::Result;
use clap::{Parser, Subcommand};
use std::fs;
use henceforth::*;
#[derive(Debug, Parser)]
#[clap(author, version, about, long_about = None)]
struct Cli {
#[clap(subcommand)]
commands: Commands,
}
#[derive(Debug, Subcommand)]
enum Commands {
#[clap(about = "Lexes source code and outputs tokens")]
Lex {
#[clap(value_parser)]
src: String,
},
#[clap(about = "Parses source code and outputs AST")]
Parse {
#[clap(value_parser)]
src: String,
},
#[clap(about = "Compiles assembly from source code")]
Compile {
#[clap(value_parser)]
src: String,
#[clap(value_parser)]
out: Option<String>,
#[clap(long, action)]
dump: bool,
},
}
fn main() -> Result<()> {
let args = Cli::parse();
match args.commands {
Commands::Lex { src } => {
let source = fs::read_to_string(src)?;
let tokens = lexer::lex(source)?;
dbg!(tokens);
Ok(())
}
Commands::Parse { src } => {
let source = fs::read_to_string(src)?;
let tokens = lexer::lex(source)?;
let body = parser::parse(tokens)?;
dbg!(body);
Ok(())
}
Commands::Compile { src, out, dump } => {
let source = fs::read_to_string(&src)?;
let tokens = lexer::lex(source)?;
let ast = parser::parse(tokens)?;
let assembly = compiler::compile(ast)?;
if let Some(x) = out {
fs::write(x, &assembly)?;
}
if dump {
println!("{}", assembly);
}
Ok(())
}
}
}
#[cfg(test)]
mod tests {}

View file

@ -0,0 +1,113 @@
use std::collections::HashMap;
use anyhow::{bail, Result};
use sailfish::TemplateOnce;
use crate::parser;
#[derive(Debug, Clone, PartialEq)]
pub enum Instruction {
Push(u16),
Drop,
Add,
Sub,
Dot,
Call(String),
}
impl Instruction {
pub fn from_word(word: &str) -> Option<Self> {
match word {
"drop" => Some(Instruction::Drop),
"+" => Some(Instruction::Add),
"-" => Some(Instruction::Sub),
"." => Some(Instruction::Dot),
_ => None,
}
}
}
#[derive(Debug)]
pub struct Word {
id: u16,
instructions: Vec<Instruction>,
}
#[derive(Debug)]
pub struct Data {
instructions: Vec<Instruction>,
words: HashMap<String, Word>,
}
impl Data {
fn new() -> Self {
Self {
instructions: vec![],
words: HashMap::new(),
}
}
pub fn generate_instructions(&mut self, body: parser::ast::Body) -> Result<()> {
for node in body {
match node {
parser::ast::Node::Comment(_) => {}
parser::ast::Node::String { mode, string } => {}
parser::ast::Node::Number(x) => {
self.instructions.push(Instruction::Push(x as u16));
}
parser::ast::Node::WordDefinition {
name,
stack: _,
body,
} => {
if Instruction::from_word(&name).is_some() {
bail!("Word already exists as compiler instruction: {}", name);
} else if self.words.contains_key(&name) {
bail!("Word already exists as user word definition: {}", name);
}
let pre_instructions = self.instructions.clone();
self.instructions.clear();
self.generate_instructions(body)?;
let instructions = self.instructions.clone();
self.instructions = pre_instructions;
self.words.insert(
name,
Word {
id: self.words.len() as u16,
instructions,
},
);
}
parser::ast::Node::Word(x) => {
if let Some(ins) = Instruction::from_word(&x) {
self.instructions.push(ins);
} else if self.words.contains_key(&x) {
self.instructions.push(Instruction::Call(x));
} else {
bail!("Word does not exist: {}", x);
}
}
}
}
Ok(())
}
}
#[derive(TemplateOnce)]
#[template(path = "assembly.asm")]
pub struct Template {
pub data: Data,
}
pub fn compile(ast: parser::ast::AST) -> Result<String> {
let mut data = Data::new();
data.generate_instructions(ast.body)?;
dbg!(&data);
Ok(Template { data }.render_once()?)
}

View file

@ -0,0 +1,74 @@
use anyhow::Result;
use hence::assembler::ToCode;
use itertools::Itertools;
#[derive(Debug)]
pub enum Token {
Newline(String),
Whitespace(String),
ParenComment(String),
BackslashComment(String),
DoubleDashComment(String),
StringLiteral { mode: String, string: String },
Number(String),
Word(String),
}
impl ToCode for Token {
fn to_code(&self) -> String {
match self {
Token::Newline(x) | Token::Whitespace(x) => x.clone(),
Token::ParenComment(x) => format!("( {})", x),
Token::BackslashComment(x) => format!("\\{}", x),
Token::DoubleDashComment(x) => format!("-- {}", x),
Token::StringLiteral { mode, string } => format!("{}\" {}\"", mode, string),
Token::Number(x) | Token::Word(x) => x.clone(),
}
}
}
pub fn is_space(c: char) -> bool {
c.is_whitespace() || c == '\n'
}
pub fn lex(source: String) -> Result<Vec<Token>> {
let mut chars = source.chars().peekable();
let mut tokens: Vec<Token> = vec![];
while let Some(&c) = chars.peek() {
tokens.push(match c {
'\n' => Token::Newline(chars.peeking_take_while(|&c| c == '\n').collect()),
_ if c.is_whitespace() => {
Token::Whitespace(chars.peeking_take_while(|&c| c.is_whitespace()).collect())
}
'\\' => {
chars.next();
Token::BackslashComment(chars.peeking_take_while(|&c| c != '\n').collect())
}
_ if c.is_numeric() => {
Token::Number(chars.peeking_take_while(|&c| !is_space(c)).collect())
}
_ => {
let x: String = chars.peeking_take_while(|&c| !is_space(c)).collect();
match x.as_str() {
"(" => Token::ParenComment(
chars.by_ref().skip(1).take_while(|&c| c != ')').collect(),
),
"--" => Token::DoubleDashComment(
chars.by_ref().take_while(|&c| c != '\n').collect(),
),
_ if x.ends_with('"') => Token::StringLiteral {
mode: x.chars().take(x.len() - 1).collect(),
string: chars.by_ref().skip(1).take_while(|&c| c != '"').collect(),
},
_ => Token::Word(x),
}
}
});
}
Ok(tokens)
}

View file

@ -0,0 +1,3 @@
pub mod compiler;
pub mod lexer;
pub mod parser;

View file

@ -0,0 +1,107 @@
use anyhow::{bail, Result};
use num_parse;
use crate::lexer;
pub mod ast;
fn process_raw_stack_result(s: Option<&str>) -> Vec<String> {
match s {
Some(x) if !x.trim().is_empty() => {
x.split_whitespace().map(|x| x.trim().to_string()).collect()
}
_ => vec![],
}
}
pub fn parse_stack_result(s: String) -> ast::StackResult {
let mut splitter = s.splitn(2, "--");
ast::StackResult {
before: process_raw_stack_result(splitter.next()),
after: process_raw_stack_result(splitter.next()),
}
}
pub fn parse(tokens: Vec<lexer::Token>) -> Result<ast::AST> {
let mut iter = tokens.into_iter().peekable();
let mut body: ast::Body = vec![];
while let Some(token) = iter.next() {
match token {
lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {}
lexer::Token::ParenComment(x)
| lexer::Token::BackslashComment(x)
| lexer::Token::DoubleDashComment(x) => {
body.push(ast::Node::Comment(x.trim().to_string()));
}
lexer::Token::StringLiteral { mode, string } => {
body.push(ast::Node::String { mode, string });
}
lexer::Token::Number(x) => match num_parse::parse_int::<i32>(x.as_str()) {
Some(n) => {
body.push(ast::Node::Number(n));
}
None => bail!("Invalid number: {}", x),
},
lexer::Token::Word(x) => match x.as_str() {
":" => {
let mut depth: usize = 1;
let mut content = iter
.by_ref()
.take_while(|t| match t {
lexer::Token::Word(x) => match x.as_str() {
":" => {
depth += 1;
true
}
";" => {
depth -= 1;
depth != 0
}
_ => true,
},
_ => true,
})
.collect::<Vec<_>>()
.into_iter();
if depth != 0 {
bail!("Unbalanced word definitions");
}
let name = match content.find(|t| {
!matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
}) {
Some(t) => match t {
lexer::Token::Word(x) => x.clone(),
_ => bail!("Word definition name must be a word itself: {:?}", t),
},
None => bail!("Word definition can not be empty"),
};
let stack = match content.find(|t| {
!matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
}) {
Some(t) => match t {
lexer::Token::ParenComment(x)
| lexer::Token::BackslashComment(x)
| lexer::Token::DoubleDashComment(x) => Some(parse_stack_result(x)),
_ => None,
},
None => None,
};
body.push(ast::Node::WordDefinition {
name,
stack,
body: parse(content.collect())?.body,
});
}
_ => {
body.push(ast::Node::Word(x));
}
},
}
}
Ok(ast::AST { body })
}

View file

@ -0,0 +1,75 @@
use hence::assembler::ToCode;
use itertools::Itertools;
#[derive(Debug)]
pub struct StackResult {
pub before: Vec<String>,
pub after: Vec<String>,
}
impl ToCode for StackResult {
fn to_code(&self) -> String {
format!(
"{}--{}",
if self.before.is_empty() {
"".to_string()
} else {
format!("{} ", self.before.join(" "))
},
if self.after.is_empty() {
"".to_string()
} else {
format!("{} ", self.after.join(" "))
}
)
}
}
#[derive(Debug)]
pub enum Node {
Comment(String),
String {
mode: String,
string: String,
},
Number(i32),
WordDefinition {
name: String,
stack: Option<StackResult>,
body: Body,
},
Word(String),
}
impl ToCode for Node {
fn to_code(&self) -> String {
match self {
Node::Comment(x) => format!("\\ {}", x),
Node::String { mode, string } => format!("{}\" {}\"", mode, string),
Node::Number(x) => x.to_string(),
Node::WordDefinition { name, stack, body } => format!(
": {}{} {} ;",
name,
match stack {
Some(x) => format!(" {}", x.to_code()),
None => "".to_string(),
},
body.iter().map(|x| x.to_code()).join(" ")
),
Node::Word(x) => x.clone(),
}
}
}
pub type Body = Vec<Node>;
#[derive(Debug)]
pub struct AST {
pub body: Body,
}
impl ToCode for AST {
fn to_code(&self) -> String {
self.body.iter().map(|x| x.to_code()).join(" ")
}
}

View file

@ -0,0 +1,10 @@
.include "$lib/core.asm"
.include "$lib/std.asm"
.include "$lib/main.asm"
.jump_main
data:
.main
.std_stop

View file

@ -1,62 +0,0 @@
; hence standard lib
.requires "$lib/core.asm"
std:
.macro std_tclr
ts NULL
.endmacro
.macro std_rclr, std_rclr_arg_0_reg
ts NULL
tlr std_rclr_arg_0_reg
.endmacro
.macro std_alu, std_alu_arg_0_op
ts std_alu_arg_0_op
alu
.endmacro
.macro std_get, std_get_arg_0_addr
ts std_get_arg_0_addr
get
.endmacro
.macro std_set, std_set_arg_0_addr
ts std_set_arg_0_addr
set
.endmacro
.macro std_cp, std_cp_arg_0_from, std_cp_arg_1_to
tsr std_cp_arg_0_from
tlr std_cp_arg_1_to
.endmacro
.macro std_mv, std_mv_arg_0_from, std_cp_arg_1_to
tsr std_cp_arg_0_from
tlr std_cp_arg_1_to
.std_rclr std_cp_arg_1_to
.endmacro
.macro std_rset, std_init_arg_0_reg, std_init_arg_1_val
ts std_init_arg_1_val
tlr std_init_arg_0_reg
.endmacro
.macro std_jump, std_jump_arg_0_label
.std_rset CORE_REG_PC, std_jump_arg_0_label
.endmacro
.macro std_cond_jump, std_cond_jump_arg_0_label
ts std_cond_jump_arg_0_label
tlrc CORE_REG_PC
.endmacro
.macro std_stop
.std_rset CORE_REG_PC, 0xffff
.endmacro
.macro std_inc, std_inc_arg_0_reg
.std_rset std_inc_arg_0_reg, 1
.std_alu CORE_ALU_ADD
.endmacro