diff --git a/Cargo.lock b/Cargo.lock index 2936acd..86732a8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -168,18 +168,6 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" -[[package]] -name = "filetime" -version = "0.2.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e94a7bbaa59354bc20dd75b67f23e2797b4490e9d6928203fb105c79e448c86c" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "windows-sys", -] - [[package]] name = "generic-array" version = "0.14.6" @@ -227,7 +215,7 @@ dependencies = [ "clap", "console", "itertools", - "num-parse", + "parse_int", "radix_fmt", "rand", "rhexdump", @@ -242,9 +230,9 @@ dependencies = [ "anyhow", "clap", "hence", + "indexmap", "itertools", - "num-parse", - "sailfish", + "parse_int", ] [[package]] @@ -256,15 +244,6 @@ dependencies = [ "libc", ] -[[package]] -name = "home" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2456aef2e6b6a9784192ae780c0f15bc57df0e918585282325e8c8ac27737654" -dependencies = [ - "winapi", -] - [[package]] name = "indexmap" version = "1.9.1" @@ -284,12 +263,6 @@ dependencies = [ "either", ] -[[package]] -name = "itoap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" - [[package]] name = "libc" version = "0.2.132" @@ -311,82 +284,6 @@ dependencies = [ "adler", ] -[[package]] -name = "num" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - -[[package]] -name = "num-bigint" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93ab6289c7b344a8a9f60f88d80aa20032336fe78da341afc91c8a2341fc75f" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-complex" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ae39348c8bc5fbd7f40c727a9925f03517afd2ab27d46702108b6a7e5414c19" -dependencies = [ - "num-traits", -] - -[[package]] -name = "num-integer" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" -dependencies = [ - "autocfg", - "num-traits", -] - -[[package]] -name = "num-iter" -version = "0.1.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-parse" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c794eedf4b22ca525c2c4602ea17ccd71f69eaaacf546551aba127b2c396a94" -dependencies = [ - "num", -] - -[[package]] -name = "num-rational" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" -dependencies = [ - "autocfg", - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.15" @@ -423,6 +320,15 @@ version = "6.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9ff7415e9ae3fff1225851df9e0d9e4e5479f947619774677a63572e55e80eff" +[[package]] +name = "parse_int" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d695b79916a2c08bcff7be7647ab60d1402885265005a6658ffe6d763553c5a" +dependencies = [ + "num-traits", +] + [[package]] name = "ppv-lite86" version = "0.2.16" @@ -507,15 +413,6 @@ dependencies = [ "getrandom", ] -[[package]] -name = "redox_syscall" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" -dependencies = [ - "bitflags", -] - [[package]] name = "rhexdump" version = "0.1.1" @@ -562,50 +459,6 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" -[[package]] -name = "ryu" -version = "1.0.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" - -[[package]] -name = "sailfish" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "948a7edfc2f03d7c58a097dda25ed29440a72e8528894a6e182fe9171195fed1" -dependencies = [ - "itoap", - "ryu", - "sailfish-macros", - "version_check", -] - -[[package]] -name = "sailfish-compiler" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f0a01133d6ce146020e6416ac6a823f813f1cbb30ff77548b4fa20749524947" -dependencies = [ - "filetime", - "home", - "memchr", - "proc-macro2", - "quote", - "serde", - "syn", - "toml", -] - -[[package]] -name = "sailfish-macros" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86326c1f1dce0b316e0a47071f683b185417dc64e1a704380b5c706b09e871b1" -dependencies = [ - "proc-macro2", - "sailfish-compiler", -] - [[package]] name = "same-file" version = "1.0.6" @@ -615,26 +468,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "serde" -version = "1.0.144" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860" -dependencies = [ - "serde_derive", -] - -[[package]] -name = "serde_derive" -version = "1.0.144" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94ed3a816fb1d101812f83e789f888322c34e291f894f19590dc310963e87a00" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "sha2" version = "0.9.9" @@ -690,15 +523,6 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" -[[package]] -name = "toml" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" -dependencies = [ - "serde", -] - [[package]] name = "typenum" version = "1.15.0" @@ -776,46 +600,3 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-sys" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" -dependencies = [ - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_msvc", -] - -[[package]] -name = "windows_aarch64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" - -[[package]] -name = "windows_i686_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" - -[[package]] -name = "windows_i686_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" - -[[package]] -name = "windows_x86_64_gnu" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" - -[[package]] -name = "windows_x86_64_msvc" -version = "0.36.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" diff --git a/hence/Cargo.toml b/hence/Cargo.toml index 53f436c..4f258a0 100644 --- a/hence/Cargo.toml +++ b/hence/Cargo.toml @@ -17,7 +17,7 @@ path = "src/bin/main.rs" [dependencies] itertools = "0.10.2" -num-parse = "0.1.2" +# num-parse = "0.1.2" clap = { version = "3.2.16", features = ["derive"] } rhexdump = "0.1.1" radix_fmt = "1" @@ -26,3 +26,4 @@ console = "0.15.1" anyhow = { version = "1.0.62", features = ["backtrace"] } rust-embed = "6.4.0" unescape = "0.1.0" +parse_int = "0.6.0" diff --git a/hence/README.md b/hence/README.md index 44bad58..dadeaea 100644 --- a/hence/README.md +++ b/hence/README.md @@ -21,8 +21,7 @@ | `0x06` | `tlr` | Load `tmp`'s value into register | | | `0x07` | `tlrc` | Same as `tlr` but only executes if register `a`'s first bit is `1` | | | `0x08` | `tls` | Push value of `tmp` to stack | | -| `0x09` | `ld` | Loads top of stack into register | | -| `0x0a` | `dbg` | Debug | | -| `0x0b` | `alu` | Runs ALU with `tmp`'s value as operator | | -| `0x0c` | `get` | Sets `tmp` to memory at address in `tmp` | | -| `0x0d` | `set` | Sets memory to value at specific address | | +| `0x09` | `dbg` | Debug | | +| `0x0a` | `alu` | Runs ALU with `tmp`'s value as operator | | +| `0x0b` | `get` | Sets `tmp` to memory at address in `tmp` | | +| `0x0c` | `set` | Sets memory to value at specific address | | diff --git a/hence/lib/core.asm b/hence/lib/core.asm index 5c324e2..88e66e6 100644 --- a/hence/lib/core.asm +++ b/hence/lib/core.asm @@ -14,11 +14,13 @@ core: core_mem: .define CORE_MEM_PRG, (0 * CORE_KB) - .define CORE_MEM_ST, (32 * CORE_KB) + .define CORE_MEM_PRG_END, (32 * CORE_KB) + .define CORE_MEM_ST, CORE_MEM_PRG_END .define CORE_MEM_MEM, (40 * CORE_KB) - .define CORE_MEM_OUT, (56 * CORE_KB) - .define CORE_MEM_CHR, (56 * CORE_KB + 1) - .define CORE_MEM_KEY, (56 * CORE_KB + 2) + .define CORE_MEM_MEM_END, (56 * CORE_KB) + .define CORE_MEM_OUT, CORE_MEM_MEM_END + .define CORE_MEM_CHR, (CORE_MEM_MEM_END + 1) + .define CORE_MEM_KEY, (CORE_MEM_MEM_END + 2) core_reg: .define CORE_REG_PC, 0x0 diff --git a/hence/lib/main.asm b/hence/lib/main.asm index 6666d27..b061933 100644 --- a/hence/lib/main.asm +++ b/hence/lib/main.asm @@ -9,11 +9,9 @@ .std_jump lib_main_local_jump_main .endmacro -.macro main - main: - +.macro main, main_arg_0_label .org lib_main_local_jump_main - ts main + ts main_arg_0_label tlr CORE_REG_PC .org main .endmacro diff --git a/hence/lib/std.asm b/hence/lib/std.asm index 6d22b63..09972bf 100644 --- a/hence/lib/std.asm +++ b/hence/lib/std.asm @@ -60,3 +60,8 @@ std: .std_rset lib_std_inc_arg_0_reg, 1 .std_alu CORE_ALU_ADD .endmacro + + .macro std_ld + tss + pop + .endmacro diff --git a/hence/src/lib/arg.rs b/hence/src/lib/arg.rs index a0a9c98..0f39eaf 100644 --- a/hence/src/lib/arg.rs +++ b/hence/src/lib/arg.rs @@ -1,4 +1,5 @@ use anyhow::{bail, Result}; +use parse_int; use rand; use std::cmp::Ordering; use unescape::unescape; @@ -314,10 +315,7 @@ pub fn parse_args(tokens: Vec<&lexer::Token>) -> Result> { args.push(Arg::Variable(x.clone())); } lexer::Token::Number(x) => { - args.push(Arg::Number(match num_parse::parse_uint(x) { - Some(y) => y, - None => bail!("Error parsing number"), - })); + args.push(Arg::Number(parse_int::parse(x)?)); } lexer::Token::LParen => { let mut depth: usize = 1; diff --git a/hence/src/lib/assembler.rs b/hence/src/lib/assembler.rs index 484525a..26a522a 100644 --- a/hence/src/lib/assembler.rs +++ b/hence/src/lib/assembler.rs @@ -93,11 +93,11 @@ impl Data { "tlr" => 0x06, "tlrc" => 0x07, "tls" => 0x08, - "ld" => 0x09, - "dbg" => 0x0a, - "alu" => 0x0b, - "get" => 0x0c, - "set" => 0x0d, + // "ld" => 0x09, + "dbg" => 0x09, + "alu" => 0x0a, + "get" => 0x0b, + "set" => 0x0c, _ => bail!("Unknown opcode: {}", name), }; let a = match arg { diff --git a/hence/src/lib/emulator.rs b/hence/src/lib/emulator.rs index 0a059b8..8a0a575 100644 --- a/hence/src/lib/emulator.rs +++ b/hence/src/lib/emulator.rs @@ -265,12 +265,12 @@ pub fn emulate(data: &mut Data) -> Result<()> { data.stack[data.reg_sp as usize] = data.tmp; data.reg_sp = data.reg_sp.wrapping_add(1); } + // 0x09 => { + // data.reg_sp = data.reg_sp.wrapping_sub(1); + // data.set_register(data.reg_arg as u8, data.stack[data.reg_sp as usize]); + // data.stack[data.reg_sp as usize] = 0; + // } 0x09 => { - data.reg_sp = data.reg_sp.wrapping_sub(1); - data.set_register(data.reg_arg as u8, data.stack[data.reg_sp as usize]); - data.stack[data.reg_sp as usize] = 0; - } - 0x0a => { println!( "[DEBUG]: [{}]", data.stack.iter().take(data.reg_sp as usize).join(", ") @@ -279,13 +279,13 @@ pub fn emulate(data: &mut Data) -> Result<()> { io::stdout().flush()?; data.term.read_line()?; } - 0x0b => { + 0x0a => { data.alu(data.tmp as u8)?; } - 0x0c => { + 0x0b => { data.tmp = data.get_memory(data.tmp)?; } - 0x0d => { + 0x0c => { data.set_memory(data.tmp, data.reg_a)?; } _ => bail!("Invalid opcode: 0x{}", radix(data.reg_opc, 16)), diff --git a/henceforth/Cargo.toml b/henceforth/Cargo.toml index 3b86fb3..84496ac 100644 --- a/henceforth/Cargo.toml +++ b/henceforth/Cargo.toml @@ -20,5 +20,5 @@ hence = { path = "../hence" } clap = { version = "3.2.16", features = ["derive"] } anyhow = { version = "1.0.62", features = ["backtrace"] } itertools = "0.10.2" -num-parse = "0.1.2" -sailfish = "0.4.0" +parse_int = "0.6.0" +indexmap = "1.9.1" diff --git a/henceforth/examples/test.fth b/henceforth/examples/test.fth index 4e774ed..8455af2 100644 --- a/henceforth/examples/test.fth +++ b/henceforth/examples/test.fth @@ -1,3 +1 @@ -40 2 + drop drop -: test ( -- 42 ) 40 2 + ; -test . +." t;;;;;;;;;;;;;;;;;;;;;;est\n" diff --git a/henceforth/src/bin/main.rs b/henceforth/src/bin/main.rs index bd951d6..04cc3cd 100644 --- a/henceforth/src/bin/main.rs +++ b/henceforth/src/bin/main.rs @@ -1,5 +1,6 @@ use anyhow::Result; use clap::{Parser, Subcommand}; +use hence::assembler::ToCode; use std::fs; use henceforth::*; @@ -56,7 +57,8 @@ fn main() -> Result<()> { let source = fs::read_to_string(&src)?; let tokens = lexer::lex(source)?; let ast = parser::parse(tokens)?; - let assembly = compiler::compile(ast)?; + let ast = compiler::compile(ast)?; + let assembly = ast.to_code(); if let Some(x) = out { fs::write(x, &assembly)?; diff --git a/henceforth/src/lib/compiler.rs b/henceforth/src/lib/compiler.rs index 4490e0a..c69d3d3 100644 --- a/henceforth/src/lib/compiler.rs +++ b/henceforth/src/lib/compiler.rs @@ -1,61 +1,52 @@ -use std::collections::HashMap; - use anyhow::{bail, Result}; -use sailfish::TemplateOnce; +use indexmap::IndexSet; +use itertools::Itertools; +use std::collections::HashMap; use crate::parser; -#[derive(Debug, Clone, PartialEq)] -pub enum Instruction { - Push(u16), +pub mod instruction; +pub use crate::compiler::instruction::Instruction; - Drop, - Add, - Sub, - Dot, - - Call(String), -} - -impl Instruction { - pub fn from_word(word: &str) -> Option { - match word { - "drop" => Some(Instruction::Drop), - "+" => Some(Instruction::Add), - "-" => Some(Instruction::Sub), - "." => Some(Instruction::Dot), - _ => None, - } - } +pub trait Compilable { + fn compile(&self, data: &T) -> Result; } #[derive(Debug)] pub struct Word { - id: u16, - instructions: Vec, + pub id: usize, + pub instructions: Vec, + pub times_used: usize, } #[derive(Debug)] pub struct Data { - instructions: Vec, - words: HashMap, + pub words: HashMap, + pub strings: IndexSet, } impl Data { - fn new() -> Self { + pub fn new() -> Self { Self { - instructions: vec![], words: HashMap::new(), + strings: IndexSet::new(), } } - pub fn generate_instructions(&mut self, body: parser::ast::Body) -> Result<()> { + pub fn generate_instructions(&mut self, body: parser::ast::Body) -> Result> { + let mut instructions: Vec = vec![]; for node in body { match node { parser::ast::Node::Comment(_) => {} - parser::ast::Node::String { mode, string } => {} + parser::ast::Node::String { mode, string } => { + let id = self.strings.insert_full(string).0; + instructions.push(match mode.as_str() { + "." => Instruction::DotQuote(id), + _ => bail!("Unknown string mode: {}", mode), + }); + } parser::ast::Node::Number(x) => { - self.instructions.push(Instruction::Push(x as u16)); + instructions.push(instruction::Instruction::Push(x as u16)); } parser::ast::Node::WordDefinition { name, @@ -68,25 +59,23 @@ impl Data { bail!("Word already exists as user word definition: {}", name); } - let pre_instructions = self.instructions.clone(); - self.instructions.clear(); - self.generate_instructions(body)?; - let instructions = self.instructions.clone(); - self.instructions = pre_instructions; + let ins = self.generate_instructions(body)?; self.words.insert( name, Word { - id: self.words.len() as u16, - instructions, + id: self.words.len(), + instructions: ins, + times_used: 0, }, ); } parser::ast::Node::Word(x) => { if let Some(ins) = Instruction::from_word(&x) { - self.instructions.push(ins); - } else if self.words.contains_key(&x) { - self.instructions.push(Instruction::Call(x)); + instructions.push(ins); + } else if let Some(w) = self.words.get_mut(&x) { + instructions.push(Instruction::Call(x)); + w.times_used += 1; } else { bail!("Word does not exist: {}", x); } @@ -94,20 +83,198 @@ impl Data { } } - Ok(()) + Ok(instructions) + } + + pub fn embed(&self, body: hence::parser::ast::Body) -> hence::parser::ast::Body { + let mut x = vec![ + // includes + hence::parser::ast::Node::MacroCall { + name: "include".to_string(), + args: vec![hence::arg::Arg::String("$lib/core.asm".to_string())], + }, + hence::parser::ast::Node::MacroCall { + name: "include".to_string(), + args: vec![hence::arg::Arg::String("$lib/std.asm".to_string())], + }, + hence::parser::ast::Node::MacroCall { + name: "include".to_string(), + args: vec![hence::arg::Arg::String("$lib/main.asm".to_string())], + }, + // constants + hence::parser::ast::Node::MacroCall { + name: "define".to_string(), + args: vec![ + hence::arg::Arg::Variable("MEM_CALL_STACK".to_string()), + hence::arg::Arg::Variable("CORE_MEM_MEM".to_string()), + ], + }, + // macros + // stack_transfer_alu + hence::parser::ast::Node::MacroCall { + name: "macro".to_string(), + args: vec![hence::arg::Arg::Variable("stack_transfer_alu".to_string())], + }, + hence::parser::ast::Node::MacroCall { + name: "std_ld".to_string(), + args: vec![], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_B".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "std_ld".to_string(), + args: vec![], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "endmacro".to_string(), + args: vec![], + }, + // call_word + hence::parser::ast::Node::MacroCall { + name: "macro".to_string(), + args: vec![ + hence::arg::Arg::Variable("call_word".to_string()), + hence::arg::Arg::Variable("call_word_arg_0_label".to_string()), + ], + }, + hence::parser::ast::Node::Call { + name: "ts".to_string(), + arg: Some(hence::arg::Arg::BinaryExpression { + left: Box::new(hence::arg::Arg::Variable("OFFSET".to_string())), + right: Box::new(hence::arg::Arg::Number(3 + 3 + 3 + 1 + 3 + 1)), + op: hence::arg::BinaryExpressionOperator::Add, + }), + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::Call { + name: "ts".to_string(), + arg: Some(hence::arg::Arg::Variable("MEM_CALL_STACK".to_string())), + }, + hence::parser::ast::Node::Call { + name: "set".to_string(), + arg: None, + }, + hence::parser::ast::Node::Call { + name: "ts".to_string(), + arg: Some(hence::arg::Arg::Variable( + "call_word_arg_0_label".to_string(), + )), + }, + hence::parser::ast::Node::Call { + // tlr CORE_REG_PC -> tlr 0 -> tlr + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_PC".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "endmacro".to_string(), + args: vec![], + }, + // return_word + hence::parser::ast::Node::MacroCall { + name: "macro".to_string(), + args: vec![hence::arg::Arg::Variable("return_word".to_string())], + }, + hence::parser::ast::Node::MacroCall { + name: "std_get".to_string(), + args: vec![hence::arg::Arg::Variable("MEM_CALL_STACK".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_PC".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "endmacro".to_string(), + args: vec![], + }, + // jump_main + hence::parser::ast::Node::MacroCall { + name: "jump_main".to_string(), + args: vec![], + }, + // data + hence::parser::ast::Node::Label("data".to_string()), + hence::parser::ast::Node::Label("data_strings".to_string()), + ]; + + for (id, s) in self.strings.iter().enumerate() { + x.extend([ + hence::parser::ast::Node::Label(format!("data_strings_{}", id)), + hence::parser::ast::Node::MacroCall { + name: "bytes".to_string(), + args: vec![hence::arg::Arg::String(s.to_string())], + }, + hence::parser::ast::Node::Label(format!("data_strings_end_{}", id)), + ]); + } + + // words + x.push(hence::parser::ast::Node::Label("words".to_string())); + for (name, word) in &self + .words + .iter() + .filter(|(_, w)| w.times_used > 1) + .sorted_by(|a, b| Ord::cmp(&a.1.id, &b.1.id)) + .collect::>() + { + x.extend(vec![ + hence::parser::ast::Node::Label(format!("words_{}", word.id)), + hence::parser::ast::Node::Comment(format!("word: \"{}\"", name)), + ]); + x.extend( + word.instructions + .iter() + .map(|ins| ins.compile(self)) + .collect::>>() + .unwrap() + .into_iter() + .flatten(), + ); + x.push(hence::parser::ast::Node::MacroCall { + name: "return_word".to_string(), + args: vec![], + }); + } + + x.extend([ + hence::parser::ast::Node::Label("main".to_string()), + hence::parser::ast::Node::MacroCall { + name: "main".to_string(), + args: vec![hence::arg::Arg::Variable("main".to_string())], + }, + ]); + x.extend(body); + x.push(hence::parser::ast::Node::MacroCall { + name: "std_stop".to_string(), + args: vec![], + }); + + x } } -#[derive(TemplateOnce)] -#[template(path = "assembly.asm")] -pub struct Template { - pub data: Data, -} - -pub fn compile(ast: parser::ast::AST) -> Result { +pub fn compile(ast: parser::ast::AST) -> Result { let mut data = Data::new(); - data.generate_instructions(ast.body)?; - dbg!(&data); + let instructions = data.generate_instructions(ast.body)?; - Ok(Template { data }.render_once()?) + Ok(hence::parser::ast::AST { + body: data.embed( + instructions + .iter() + .map(|ins| ins.compile(&data)) + .collect::>>() + .unwrap() + .into_iter() + .flatten() + .collect(), + ), + }) } diff --git a/henceforth/src/lib/compiler/instruction.rs b/henceforth/src/lib/compiler/instruction.rs new file mode 100644 index 0000000..1be274f --- /dev/null +++ b/henceforth/src/lib/compiler/instruction.rs @@ -0,0 +1,526 @@ +use anyhow::{bail, Result}; + +use crate::compiler; + +#[derive(Debug, Clone, PartialEq)] +pub enum Instruction { + Nop, + Debug, + Quit, + Push(u16), + Drop, + Depth, + Pick, + Dup, + Swap, + Over, + Rot, + Nip, + I, + J, + Tuck, + Fetch, + FetchPrint, + Store, + PlusStore, + MinusStore, + Cells, + Allot, + Dot, + Emit, + Space, + Spaces, + Cr, + DotQuote(usize), + Count, + Not, + And, + Nand, + Or, + Nor, + Xor, + Xnor, + Lsh, + Rsh, + Compare, + Eq, + Neq, + Lt, + Leq, + Gt, + Geq, + Min, + Max, + Boolean, + Invert, + Plus, + OnePlus, + TwoPlus, + Minus, + OneMinus, + TwoMinus, + Times, + Divide, + Mod, + Call(String), +} + +impl Instruction { + pub fn from_word(word: &str) -> Option { + match word.to_lowercase().as_str() { + "nop" => Some(Instruction::Nop), + "debug" => Some(Instruction::Debug), + "quit" => Some(Instruction::Quit), + "drop" => Some(Instruction::Drop), + "depth" => Some(Instruction::Depth), + "pick" => Some(Instruction::Pick), + "dup" => Some(Instruction::Dup), + "swap" => Some(Instruction::Swap), + "over" => Some(Instruction::Over), + "rot" => Some(Instruction::Rot), + "nip" => Some(Instruction::Nip), + "i" => Some(Instruction::I), + "j" => Some(Instruction::J), + "tuck" => Some(Instruction::Tuck), + "@" => Some(Instruction::Fetch), + "?" => Some(Instruction::FetchPrint), + "!" => Some(Instruction::Store), + "+!" => Some(Instruction::PlusStore), + "-!" => Some(Instruction::MinusStore), + "cells" => Some(Instruction::Cells), + "allot" => Some(Instruction::Allot), + "." => Some(Instruction::Dot), + "emit" => Some(Instruction::Emit), + "space" => Some(Instruction::Space), + "spaces" => Some(Instruction::Spaces), + "cr" => Some(Instruction::Cr), + "count" => Some(Instruction::Count), + "not" => Some(Instruction::Not), + "and" => Some(Instruction::And), + "nand" => Some(Instruction::Nand), + "or" => Some(Instruction::Or), + "nor" => Some(Instruction::Nor), + "xor" => Some(Instruction::Xor), + "xnor" => Some(Instruction::Xnor), + "lshift" => Some(Instruction::Lsh), + "rshift" => Some(Instruction::Rsh), + "compare" => Some(Instruction::Compare), + "=" => Some(Instruction::Eq), + "!=" => Some(Instruction::Neq), + "<" => Some(Instruction::Lt), + "<=" => Some(Instruction::Leq), + ">" => Some(Instruction::Gt), + ">=" => Some(Instruction::Geq), + "min" => Some(Instruction::Min), + "max" => Some(Instruction::Max), + "boolean" => Some(Instruction::Boolean), + "invert" => Some(Instruction::Invert), + "+" => Some(Instruction::Plus), + "1+" => Some(Instruction::OnePlus), + "2+" => Some(Instruction::TwoPlus), + "-" => Some(Instruction::Minus), + "1-" => Some(Instruction::OneMinus), + "2-" => Some(Instruction::TwoMinus), + "*" => Some(Instruction::Times), + "/" => Some(Instruction::Divide), + "mod" => Some(Instruction::Mod), + _ => None, + } + } +} + +impl compiler::Compilable for Instruction { + fn compile(&self, data: &compiler::Data) -> Result { + match self { + Instruction::Nop => Ok(vec![hence::parser::ast::Node::Call { + name: "nop".to_string(), + arg: None, + }]), + Instruction::Debug => Ok(vec![hence::parser::ast::Node::Call { + name: "dbg".to_string(), + arg: None, + }]), + Instruction::Quit => Ok(vec![hence::parser::ast::Node::MacroCall { + name: "std_stop".to_string(), + args: vec![], + }]), + Instruction::Push(x) => Ok(vec![hence::parser::ast::Node::Call { + name: "push".to_string(), + arg: Some(hence::arg::Arg::Number(*x)), + }]), + Instruction::Drop => Ok(vec![hence::parser::ast::Node::Call { + name: "pop".to_string(), + arg: None, + }]), + Instruction::Depth => Ok(vec![ + hence::parser::ast::Node::Call { + name: "tsr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_SP".to_string())), + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::Pick => Ok(vec![]), + Instruction::Dup => Ok(vec![ + hence::parser::ast::Node::Call { + name: "tss".to_string(), + arg: None, + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::Swap => Ok(vec![ + // hence::parser::ast::Node::Call { + // name: "ld".to_string(), + // arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + // }, + // hence::parser::ast::Node::Call { + // name: "ld".to_string(), + // arg: Some(hence::arg::Arg::Variable("CORE_REG_B".to_string())), + // }, + hence::parser::ast::Node::MacroCall { + name: "std_ld".to_string(), + args: vec![], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "std_ld".to_string(), + args: vec![], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_B".to_string())), + }, + hence::parser::ast::Node::Call { + name: "tsr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + hence::parser::ast::Node::Call { + name: "tsr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_B".to_string())), + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::Over => Ok(vec![]), + Instruction::Rot => Ok(vec![]), + Instruction::Nip => Ok(vec![]), + Instruction::I => Ok(vec![]), + Instruction::J => Ok(vec![]), + Instruction::Tuck => Ok(vec![]), + Instruction::Fetch => Ok(vec![]), + Instruction::FetchPrint => Ok(vec![]), + Instruction::Store => Ok(vec![]), + Instruction::PlusStore => Ok(vec![]), + Instruction::MinusStore => Ok(vec![]), + Instruction::Cells => Ok(vec![]), + Instruction::Allot => Ok(vec![]), + Instruction::Dot => Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "std_ld".to_string(), + args: vec![], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "std_set".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_MEM_OUT".to_string())], + }, + ]), + Instruction::Emit => Ok(vec![]), + Instruction::Space => Ok(vec![]), + Instruction::Spaces => Ok(vec![]), + Instruction::Cr => Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "std_rset".to_string(), + args: vec![ + hence::arg::Arg::Variable("CORE_REG_A".to_string()), + hence::arg::Arg::Number(0x0a), + ], + }, + hence::parser::ast::Node::MacroCall { + name: "std_set".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_MEM_CHR".to_string())], + }, + ]), + Instruction::DotQuote(x) => { + let loop_label = format!("loop_strings_{}", x); + let data_label = format!("data_strings_{}", x); + let data_end_label = format!("data_strings_end_{}", x); + + Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "std_rset".to_string(), + args: vec![ + hence::arg::Arg::Variable("CORE_REG_B".to_string()), + hence::arg::Arg::Variable(data_label), + ], + }, + hence::parser::ast::Node::Label(loop_label.clone()), + hence::parser::ast::Node::Call { + name: "tsr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_B".to_string())), + }, + hence::parser::ast::Node::Call { + name: "get".to_string(), + arg: None, + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "std_set".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_MEM_CHR".to_string())], + }, + hence::parser::ast::Node::MacroCall { + name: "std_rset".to_string(), + args: vec![ + hence::arg::Arg::Variable("CORE_REG_A".to_string()), + hence::arg::Arg::Number(1), + ], + }, + hence::parser::ast::Node::MacroCall { + name: "std_alu".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_ALU_ADD".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_B".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "std_rset".to_string(), + args: vec![ + hence::arg::Arg::Variable("CORE_REG_A".to_string()), + hence::arg::Arg::Variable(data_end_label), + ], + }, + hence::parser::ast::Node::MacroCall { + name: "std_alu".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_ALU_GT".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "std_cond_jump".to_string(), + args: vec![hence::arg::Arg::Variable(loop_label)], + }, + ]) + } + Instruction::Count => Ok(vec![]), + Instruction::Not => Ok(vec![]), + Instruction::And => Ok(vec![]), + Instruction::Nand => Ok(vec![]), + Instruction::Or => Ok(vec![]), + Instruction::Nor => Ok(vec![]), + Instruction::Xor => Ok(vec![]), + Instruction::Xnor => Ok(vec![]), + Instruction::Lsh => Ok(vec![]), + Instruction::Rsh => Ok(vec![]), + Instruction::Compare => Ok(vec![]), + Instruction::Eq => Ok(vec![]), + Instruction::Neq => Ok(vec![]), + Instruction::Lt => Ok(vec![]), + Instruction::Leq => Ok(vec![]), + Instruction::Gt => Ok(vec![]), + Instruction::Geq => Ok(vec![]), + Instruction::Min => Ok(vec![]), + Instruction::Max => Ok(vec![]), + Instruction::Boolean => Ok(vec![]), + Instruction::Invert => Ok(vec![]), + Instruction::Plus => Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "stack_transfer_alu".to_string(), + args: vec![], + }, + hence::parser::ast::Node::MacroCall { + name: "std_alu".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_ALU_ADD".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::OnePlus => Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "std_ld".to_string(), + args: vec![], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "std_rset".to_string(), + args: vec![ + hence::arg::Arg::Variable("CORE_REG_B".to_string()), + hence::arg::Arg::Number(1), + ], + }, + hence::parser::ast::Node::MacroCall { + name: "std_alu".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_ALU_ADD".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::TwoPlus => Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "std_ld".to_string(), + args: vec![], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "std_rset".to_string(), + args: vec![ + hence::arg::Arg::Variable("CORE_REG_B".to_string()), + hence::arg::Arg::Number(2), + ], + }, + hence::parser::ast::Node::MacroCall { + name: "std_alu".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_ALU_ADD".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::Minus => Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "stack_transfer_alu".to_string(), + args: vec![], + }, + hence::parser::ast::Node::MacroCall { + name: "std_alu".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_ALU_SUB".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::OneMinus => Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "std_ld".to_string(), + args: vec![], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "std_rset".to_string(), + args: vec![ + hence::arg::Arg::Variable("CORE_REG_B".to_string()), + hence::arg::Arg::Number(1), + ], + }, + hence::parser::ast::Node::MacroCall { + name: "std_alu".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_ALU_SUB".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::TwoMinus => Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "std_ld".to_string(), + args: vec![], + }, + hence::parser::ast::Node::Call { + name: "tlr".to_string(), + arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())), + }, + hence::parser::ast::Node::MacroCall { + name: "std_rset".to_string(), + args: vec![ + hence::arg::Arg::Variable("CORE_REG_B".to_string()), + hence::arg::Arg::Number(2), + ], + }, + hence::parser::ast::Node::MacroCall { + name: "std_alu".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_ALU_SUB".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::Times => Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "stack_transfer_alu".to_string(), + args: vec![], + }, + hence::parser::ast::Node::MacroCall { + name: "std_alu".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_ALU_MUL".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::Divide => Ok(vec![ + hence::parser::ast::Node::MacroCall { + name: "stack_transfer_alu".to_string(), + args: vec![], + }, + hence::parser::ast::Node::MacroCall { + name: "std_alu".to_string(), + args: vec![hence::arg::Arg::Variable("CORE_ALU_DIV".to_string())], + }, + hence::parser::ast::Node::Call { + name: "tls".to_string(), + arg: None, + }, + ]), + Instruction::Mod => Ok(vec![]), + Instruction::Call(x) => match data.words.get(x) { + Some(w) => { + if w.times_used > 1 { + Ok(vec![hence::parser::ast::Node::MacroCall { + name: "call_word".to_string(), + args: vec![hence::arg::Arg::Variable(format!("words_{}", w.id))], + }]) + } else { + Ok(w.instructions + .iter() + .map(|ins| ins.compile(data)) + .collect::>>()? + .into_iter() + .flatten() + .collect()) + } + } + None => bail!("Unknown word: {}", x), + }, + } + } +} diff --git a/henceforth/src/lib/parser.rs b/henceforth/src/lib/parser.rs index 120fb1f..14a4faa 100644 --- a/henceforth/src/lib/parser.rs +++ b/henceforth/src/lib/parser.rs @@ -1,11 +1,11 @@ use anyhow::{bail, Result}; -use num_parse; +use parse_int; use crate::lexer; pub mod ast; -fn process_raw_stack_result(s: Option<&str>) -> Vec { +pub fn parse_stack_state(s: Option<&str>) -> Vec { match s { Some(x) if !x.trim().is_empty() => { x.split_whitespace().map(|x| x.trim().to_string()).collect() @@ -18,8 +18,8 @@ pub fn parse_stack_result(s: String) -> ast::StackResult { let mut splitter = s.splitn(2, "--"); ast::StackResult { - before: process_raw_stack_result(splitter.next()), - after: process_raw_stack_result(splitter.next()), + before: parse_stack_state(splitter.next()), + after: parse_stack_state(splitter.next()), } } @@ -38,12 +38,7 @@ pub fn parse(tokens: Vec) -> Result { lexer::Token::StringLiteral { mode, string } => { body.push(ast::Node::String { mode, string }); } - lexer::Token::Number(x) => match num_parse::parse_int::(x.as_str()) { - Some(n) => { - body.push(ast::Node::Number(n)); - } - None => bail!("Invalid number: {}", x), - }, + lexer::Token::Number(x) => body.push(ast::Node::Number(parse_int::parse(&x)?)), lexer::Token::Word(x) => match x.as_str() { ":" => { let mut depth: usize = 1; @@ -64,7 +59,8 @@ pub fn parse(tokens: Vec) -> Result { _ => true, }) .collect::>() - .into_iter(); + .into_iter() + .peekable(); if depth != 0 { bail!("Unbalanced word definitions"); } @@ -78,16 +74,24 @@ pub fn parse(tokens: Vec) -> Result { }, None => bail!("Word definition can not be empty"), }; - let stack = match content.find(|t| { - !matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_)) - }) { - Some(t) => match t { - lexer::Token::ParenComment(x) - | lexer::Token::BackslashComment(x) - | lexer::Token::DoubleDashComment(x) => Some(parse_stack_result(x)), - _ => None, - }, - None => None, + let stack = loop { + if let Some(t) = content.peek() { + match t { + lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => { + content.next(); + } + lexer::Token::ParenComment(x) + | lexer::Token::BackslashComment(x) + | lexer::Token::DoubleDashComment(x) => { + let y = x.to_string(); + content.next(); + break Some(parse_stack_result(y)); + } + _ => break None, + } + } else { + break None; + } }; body.push(ast::Node::WordDefinition { diff --git a/henceforth/templates/assembly.asm b/henceforth/templates/assembly.asm deleted file mode 100644 index e23a194..0000000 --- a/henceforth/templates/assembly.asm +++ /dev/null @@ -1,10 +0,0 @@ -.include "$lib/core.asm" -.include "$lib/std.asm" -.include "$lib/main.asm" - -.jump_main - -data: - -.main -.std_stop