From ec7a147ec95731b6d411fecf0cca5c6919585fb4 Mon Sep 17 00:00:00 2001 From: Dominic Grimm Date: Sat, 3 Sep 2022 15:14:58 +0200 Subject: [PATCH] Start writing forth compiler --- Cargo.lock | 204 ++++++++++++++++++++++++--- Cargo.toml | 30 +--- forth/constants.asm | 6 - forth/main.asm | 64 --------- hence/Cargo.toml | 28 ++++ README.md => hence/README.md | 0 {lib => hence/lib}/core.asm | 1 + {lib => hence/lib}/main.asm | 6 +- hence/lib/std.asm | 62 ++++++++ {src => hence/src}/bin/main.rs | 0 {src => hence/src}/lib/arg.rs | 15 +- {src => hence/src}/lib/assembler.rs | 8 +- {src => hence/src}/lib/emulator.rs | 0 {src => hence/src}/lib/lexer.rs | 23 ++- {src => hence/src}/lib/lib.rs | 0 {src => hence/src}/lib/parser.rs | 2 +- {src => hence/src}/lib/parser/ast.rs | 17 +-- henceforth/Cargo.toml | 24 ++++ henceforth/examples/test.fth | 3 + henceforth/src/bin/main.rs | 74 ++++++++++ henceforth/src/lib/compiler.rs | 113 +++++++++++++++ henceforth/src/lib/lexer.rs | 74 ++++++++++ henceforth/src/lib/lib.rs | 3 + henceforth/src/lib/parser.rs | 107 ++++++++++++++ henceforth/src/lib/parser/ast.rs | 75 ++++++++++ henceforth/templates/assembly.asm | 10 ++ lib/std.asm | 62 -------- 27 files changed, 790 insertions(+), 221 deletions(-) delete mode 100644 forth/constants.asm delete mode 100644 forth/main.asm create mode 100644 hence/Cargo.toml rename README.md => hence/README.md (100%) rename {lib => hence/lib}/core.asm (99%) rename {lib => hence/lib}/main.asm (61%) create mode 100644 hence/lib/std.asm rename {src => hence/src}/bin/main.rs (100%) rename {src => hence/src}/lib/arg.rs (97%) rename {src => hence/src}/lib/assembler.rs (98%) rename {src => hence/src}/lib/emulator.rs (100%) rename {src => hence/src}/lib/lexer.rs (94%) rename {src => hence/src}/lib/lib.rs (100%) rename {src => hence/src}/lib/parser.rs (98%) rename {src => hence/src}/lib/parser/ast.rs (65%) create mode 100644 henceforth/Cargo.toml create mode 100644 henceforth/examples/test.fth create mode 100644 henceforth/src/bin/main.rs create mode 100644 henceforth/src/lib/compiler.rs create mode 100644 henceforth/src/lib/lexer.rs create mode 100644 henceforth/src/lib/lib.rs create mode 100644 henceforth/src/lib/parser.rs create mode 100644 henceforth/src/lib/parser/ast.rs create mode 100644 henceforth/templates/assembly.asm delete mode 100644 lib/std.asm diff --git a/Cargo.lock b/Cargo.lock index e88c93e..2936acd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -19,9 +19,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "anyhow" -version = "1.0.62" +version = "1.0.63" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1485d4d2cc45e7b201ee3767015c96faa5904387c9d87c6efdd0fb511f12d305" +checksum = "a26fa4d7e3f2eebadf743988fc8aec9fa9a9e82611acafd77c1462ed6262440a" dependencies = [ "backtrace", ] @@ -87,9 +87,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "clap" -version = "3.2.17" +version = "3.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29e724a68d9319343bb3328c9cc2dfde263f4b3142ee1059a9980580171c954b" +checksum = "23b71c3ce99b7611011217b366d923f1d0a7e07a92bb2dbf1e84508c673ca3bd" dependencies = [ "atty", "bitflags", @@ -104,9 +104,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "3.2.17" +version = "3.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13547f7012c01ab4a0e8f8967730ada8f9fdf419e8b6c792788f39cf4e46eefa" +checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65" dependencies = [ "heck", "proc-macro-error", @@ -168,6 +168,18 @@ version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" +[[package]] +name = "filetime" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e94a7bbaa59354bc20dd75b67f23e2797b4490e9d6928203fb105c79e448c86c" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "windows-sys", +] + [[package]] name = "generic-array" version = "0.14.6" @@ -223,6 +235,18 @@ dependencies = [ "unescape", ] +[[package]] +name = "henceforth" +version = "0.1.0" +dependencies = [ + "anyhow", + "clap", + "hence", + "itertools", + "num-parse", + "sailfish", +] + [[package]] name = "hermit-abi" version = "0.1.19" @@ -232,6 +256,15 @@ dependencies = [ "libc", ] +[[package]] +name = "home" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2456aef2e6b6a9784192ae780c0f15bc57df0e918585282325e8c8ac27737654" +dependencies = [ + "winapi", +] + [[package]] name = "indexmap" version = "1.9.1" @@ -251,6 +284,12 @@ dependencies = [ "either", ] +[[package]] +name = "itoap" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" + [[package]] name = "libc" version = "0.2.132" @@ -416,18 +455,18 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.43" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a2ca2c61bc9f3d74d2886294ab7b9853abd9c1ad903a3ac7815c58989bb7bab" +checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029" dependencies = [ - "unicode-ident", + "unicode-xid", ] [[package]] name = "quote" -version = "1.0.21" +version = "1.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbe448f377a7d6961e30f5955f9b8d106c3f5e449d493ee1b125c1d43c2b5179" +checksum = "f53dc8cf16a769a6f677e09e7ff2cd4be1ea0f48754aac39520536962011de0d" dependencies = [ "proc-macro2", ] @@ -468,6 +507,15 @@ dependencies = [ "getrandom", ] +[[package]] +name = "redox_syscall" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5a58c1855b4b6819d59012155603f0b22ad30cad752600aadfcb695265519a" +dependencies = [ + "bitflags", +] + [[package]] name = "rhexdump" version = "0.1.1" @@ -514,6 +562,50 @@ version = "0.1.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ef03e0a2b150c7a90d01faf6254c9c48a41e95fb2a8c2ac1c6f0d2b9aefc342" +[[package]] +name = "ryu" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" + +[[package]] +name = "sailfish" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "948a7edfc2f03d7c58a097dda25ed29440a72e8528894a6e182fe9171195fed1" +dependencies = [ + "itoap", + "ryu", + "sailfish-macros", + "version_check", +] + +[[package]] +name = "sailfish-compiler" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f0a01133d6ce146020e6416ac6a823f813f1cbb30ff77548b4fa20749524947" +dependencies = [ + "filetime", + "home", + "memchr", + "proc-macro2", + "quote", + "serde", + "syn", + "toml", +] + +[[package]] +name = "sailfish-macros" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86326c1f1dce0b316e0a47071f683b185417dc64e1a704380b5c706b09e871b1" +dependencies = [ + "proc-macro2", + "sailfish-compiler", +] + [[package]] name = "same-file" version = "1.0.6" @@ -523,6 +615,26 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "serde" +version = "1.0.144" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f747710de3dcd43b88c9168773254e809d8ddbdf9653b84e2554ab219f17860" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.144" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94ed3a816fb1d101812f83e789f888322c34e291f894f19590dc310963e87a00" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "sha2" version = "0.9.9" @@ -544,13 +656,13 @@ checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "syn" -version = "1.0.99" +version = "1.0.94" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58dbef6ec655055e20b86b15a8cc6d439cca19b667537ac6a1369572d151ab13" +checksum = "a07e33e919ebcd69113d5be0e4d70c5707004ff45188910106854f38b960df4a" dependencies = [ "proc-macro2", "quote", - "unicode-ident", + "unicode-xid", ] [[package]] @@ -578,6 +690,15 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" +[[package]] +name = "toml" +version = "0.5.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d82e1a7758622a465f8cee077614c73484dac5b836c02ff6a40d5d1010324d7" +dependencies = [ + "serde", +] + [[package]] name = "typenum" version = "1.15.0" @@ -590,18 +711,18 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccb97dac3243214f8d8507998906ca3e2e0b900bf9bf4870477f125b82e68f6e" -[[package]] -name = "unicode-ident" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4f5b37a154999a8f3f98cc23a628d850e154479cd94decf3414696e12e31aaf" - [[package]] name = "unicode-width" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973" +[[package]] +name = "unicode-xid" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "957e51f3646910546462e67d5f7599b9e4fb8acdd304b087a6494730f9eebf04" + [[package]] name = "version_check" version = "0.9.4" @@ -655,3 +776,46 @@ name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" + +[[package]] +name = "windows_i686_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" diff --git a/Cargo.toml b/Cargo.toml index 53f436c..06c9a23 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,28 +1,2 @@ -[package] -name = "hence" -version = "0.1.0" -edition = "2021" -authors = ["Dominic Grimm "] -repository = "https://git.dergrimm.net/dergrimm/hence.git" - -[lib] -name = "hence" -path = "src/lib/lib.rs" - -[[bin]] -name = "hence" -path = "src/bin/main.rs" - -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - -[dependencies] -itertools = "0.10.2" -num-parse = "0.1.2" -clap = { version = "3.2.16", features = ["derive"] } -rhexdump = "0.1.1" -radix_fmt = "1" -rand = "0.8.5" -console = "0.15.1" -anyhow = { version = "1.0.62", features = ["backtrace"] } -rust-embed = "6.4.0" -unescape = "0.1.0" +[workspace] +members = ["hence", "henceforth"] diff --git a/forth/constants.asm b/forth/constants.asm deleted file mode 100644 index aa3d4d5..0000000 --- a/forth/constants.asm +++ /dev/null @@ -1,6 +0,0 @@ -.requires "$lib/core.asm" - -.define MEM_INPUT_SIZE, 16 -.define MEM_INPUT_DYN_END, CORE_MEM_MEM -.define MEM_INPUT_START, (MEM_INPUT_DYN_END + 1) -.define MEM_INPUT_END, (MEM_INPUT_START + MEM_INPUT_SIZE) diff --git a/forth/main.asm b/forth/main.asm deleted file mode 100644 index ab18f2a..0000000 --- a/forth/main.asm +++ /dev/null @@ -1,64 +0,0 @@ -.include "$lib/core.asm" -.include "$lib/std.asm" -.include "$lib/main.asm" - -.include "constants.asm" - -.jump_main - -data: - -.main - .std_rset CORE_REG_C, MEM_INPUT_START - get_input_loop: - .std_get CORE_MEM_KEY - tlr CORE_REG_A - @ tlr CORE_REG_D - .std_set CORE_MEM_CHR - tsr CORE_REG_C - set - - @ .std_rset CORE_REG_B, ' ' - @ .std_alu CORE_ALU_EQ - @ tlr CORE_REG_A - @ .std_cond_jump get_input_loop - @ .std_cp CORE_REG_D, CORE_REG_A - - tlr CORE_REG_A - .std_rset CORE_REG_B, 1 - .std_alu CORE_ALU_ADD - tlr CORE_REG_A - tlr CORE_REG_C - - .std_rset CORE_REG_B, MEM_INPUT_END - .std_alu CORE_ALU_LT - tlr CORE_REG_A - .std_cond_jump get_input_loop - - .std_cp CORE_REG_C, CORE_REG_A - .std_set MEM_INPUT_DYN_END - - .std_rset CORE_REG_A, '\n' - .std_set CORE_MEM_CHR - - .std_rset CORE_REG_B, MEM_INPUT_START - .std_get MEM_INPUT_DYN_END - tlr CORE_REG_D - print_loop: - tsr CORE_REG_B - get - tlr CORE_REG_A - .std_set CORE_MEM_CHR - - .std_rset CORE_REG_A, 1 - .std_alu CORE_ALU_ADD - tlr CORE_REG_B - - .std_cp CORE_REG_D, CORE_REG_A - .std_alu CORE_ALU_GT - tlr CORE_REG_A - .std_cond_jump print_loop - - .std_rset CORE_REG_A, '\n' - .std_set CORE_MEM_CHR -.std_stop diff --git a/hence/Cargo.toml b/hence/Cargo.toml new file mode 100644 index 0000000..53f436c --- /dev/null +++ b/hence/Cargo.toml @@ -0,0 +1,28 @@ +[package] +name = "hence" +version = "0.1.0" +edition = "2021" +authors = ["Dominic Grimm "] +repository = "https://git.dergrimm.net/dergrimm/hence.git" + +[lib] +name = "hence" +path = "src/lib/lib.rs" + +[[bin]] +name = "hence" +path = "src/bin/main.rs" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +itertools = "0.10.2" +num-parse = "0.1.2" +clap = { version = "3.2.16", features = ["derive"] } +rhexdump = "0.1.1" +radix_fmt = "1" +rand = "0.8.5" +console = "0.15.1" +anyhow = { version = "1.0.62", features = ["backtrace"] } +rust-embed = "6.4.0" +unescape = "0.1.0" diff --git a/README.md b/hence/README.md similarity index 100% rename from README.md rename to hence/README.md diff --git a/lib/core.asm b/hence/lib/core.asm similarity index 99% rename from lib/core.asm rename to hence/lib/core.asm index d87919d..5c324e2 100644 --- a/lib/core.asm +++ b/hence/lib/core.asm @@ -1,4 +1,5 @@ ; hence core lib + core: .define NULL, 0x0000 .define VOID, NULL diff --git a/lib/main.asm b/hence/lib/main.asm similarity index 61% rename from lib/main.asm rename to hence/lib/main.asm index a71f062..6666d27 100644 --- a/lib/main.asm +++ b/hence/lib/main.asm @@ -3,16 +3,16 @@ .requires "$lib/core.asm" .requires "$lib/std.asm" -.define main_local_jump_main, (CORE_MEM_ST - 3 - 1) +.define lib_main_local_jump_main, (CORE_MEM_ST - 3 - 1) .macro jump_main - .std_jump main_local_jump_main + .std_jump lib_main_local_jump_main .endmacro .macro main main: - .org main_local_jump_main + .org lib_main_local_jump_main ts main tlr CORE_REG_PC .org main diff --git a/hence/lib/std.asm b/hence/lib/std.asm new file mode 100644 index 0000000..6d22b63 --- /dev/null +++ b/hence/lib/std.asm @@ -0,0 +1,62 @@ +; hence standard lib + +.requires "$lib/core.asm" + +std: + .macro std_tclr + ts NULL + .endmacro + + .macro std_rclr, lib_std_rclr_arg_0_reg + ts NULL + tlr lib_std_rclr_arg_0_reg + .endmacro + + .macro std_alu, lib_std_alu_arg_0_op + ts lib_std_alu_arg_0_op + alu + .endmacro + + .macro std_get, lib_std_get_arg_0_addr + ts lib_std_get_arg_0_addr + get + .endmacro + + .macro std_set, lib_std_set_arg_0_addr + ts lib_std_set_arg_0_addr + set + .endmacro + + .macro std_cp, lib_std_cp_arg_0_from, lib_std_cp_arg_1_to + tsr lib_std_cp_arg_0_from + tlr lib_std_cp_arg_1_to + .endmacro + + .macro std_mv, lib_std_mv_arg_0_from, lib_std_cp_arg_1_to + tsr lib_std_cp_arg_0_from + tlr lib_std_cp_arg_1_to + .std_rclr lib_std_cp_arg_1_to + .endmacro + + .macro std_rset, lib_std_init_arg_0_reg, lib_std_init_arg_1_val + ts lib_std_init_arg_1_val + tlr lib_std_init_arg_0_reg + .endmacro + + .macro std_jump, lib_std_jump_arg_0_label + .std_rset CORE_REG_PC, lib_std_jump_arg_0_label + .endmacro + + .macro std_cond_jump, lib_std_cond_jump_arg_0_label + ts lib_std_cond_jump_arg_0_label + tlrc CORE_REG_PC + .endmacro + + .macro std_stop + .std_rset CORE_REG_PC, 0xffff + .endmacro + + .macro std_inc, lib_std_inc_arg_0_reg + .std_rset lib_std_inc_arg_0_reg, 1 + .std_alu CORE_ALU_ADD + .endmacro diff --git a/src/bin/main.rs b/hence/src/bin/main.rs similarity index 100% rename from src/bin/main.rs rename to hence/src/bin/main.rs diff --git a/src/lib/arg.rs b/hence/src/lib/arg.rs similarity index 97% rename from src/lib/arg.rs rename to hence/src/lib/arg.rs index 467270d..a0a9c98 100644 --- a/src/lib/arg.rs +++ b/hence/src/lib/arg.rs @@ -27,20 +27,15 @@ pub enum Arg { }, } -impl assembler::ToAssembly for Arg { - fn to_assembly(&self) -> String { +impl assembler::ToCode for Arg { + fn to_code(&self) -> String { match self { Arg::Char(x) => format!("'{}'", x), Arg::String(x) => format!("\"{}\"", x), Arg::Number(x) => x.to_string(), Arg::Variable(x) => x.clone(), Arg::BinaryExpression { left, right, op } => { - format!( - "({} {} {})", - left.to_assembly(), - op.to_assembly(), - right.to_assembly() - ) + format!("({} {} {})", left.to_code(), op.to_code(), right.to_code()) } } } @@ -199,8 +194,8 @@ pub enum BinaryExpressionOperator { Rnd, } -impl assembler::ToAssembly for BinaryExpressionOperator { - fn to_assembly(&self) -> String { +impl assembler::ToCode for BinaryExpressionOperator { + fn to_code(&self) -> String { match self { BinaryExpressionOperator::Not => "~".to_string(), BinaryExpressionOperator::And => "&".to_string(), diff --git a/src/lib/assembler.rs b/hence/src/lib/assembler.rs similarity index 98% rename from src/lib/assembler.rs rename to hence/src/lib/assembler.rs index 2eb749b..484525a 100644 --- a/src/lib/assembler.rs +++ b/hence/src/lib/assembler.rs @@ -11,8 +11,8 @@ use crate::arg; use crate::lexer; use crate::parser; -pub trait ToAssembly { - fn to_assembly(&self) -> String; +pub trait ToCode { + fn to_code(&self) -> String; } pub trait ByteResolvable { @@ -114,7 +114,7 @@ impl Data { parser::ast::Node::MacroCall { name, args } => match name.as_str() { "debug" => { for arg in args { - let assembly = arg.to_assembly().replace('\n', "\\n"); + let assembly = arg.to_code().replace('\n', "\\n"); let num = arg.resolve_number(self)?; let bytes = arg.resolve_bytes(self)?; @@ -356,7 +356,7 @@ impl Data { call_args = if args.is_empty() { "".to_string() } else { - format!(" {}", args.iter().map(|a| a.to_assembly()).join(", ")) + format!(" {}", args.iter().map(|a| a.to_code()).join(", ")) } ); } diff --git a/src/lib/emulator.rs b/hence/src/lib/emulator.rs similarity index 100% rename from src/lib/emulator.rs rename to hence/src/lib/emulator.rs diff --git a/src/lib/lexer.rs b/hence/src/lib/lexer.rs similarity index 94% rename from src/lib/lexer.rs rename to hence/src/lib/lexer.rs index 9f319b2..2c4db7a 100644 --- a/src/lib/lexer.rs +++ b/hence/src/lib/lexer.rs @@ -3,7 +3,7 @@ use itertools::Itertools; use crate::assembler; -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, Eq)] pub enum Token { Comment(String), @@ -47,8 +47,8 @@ pub enum Token { Whitespace(String), } -impl assembler::ToAssembly for Token { - fn to_assembly(&self) -> String { +impl assembler::ToCode for Token { + fn to_code(&self) -> String { match self { Token::Comment(x) => format!(";{}", x), Token::CharLiteral(x) => format!("'{}'", x), @@ -283,32 +283,29 @@ pub fn lex(source: String) -> Result> { #[cfg(test)] mod tests { use super::*; - use crate::assembler::ToAssembly; + use crate::assembler::ToCode; #[test] fn test_token_to_assembly() { assert_eq!( - Token::Comment(" \"main function\" like definition macro".to_string()).to_assembly(), + Token::Comment(" \"main function\" like definition macro".to_string()).to_code(), "; \"main function\" like definition macro".to_string() ); assert_eq!( - Token::CharLiteral("\\n".to_string()).to_assembly(), + Token::CharLiteral("\\n".to_string()).to_code(), "'\\n'".to_string() ); assert_eq!( - Token::MacroLiteral("xyz".to_string()).to_assembly(), + Token::MacroLiteral("xyz".to_string()).to_code(), "xyz".to_string() ); assert_eq!( - Token::Literal("xkcd".to_string()).to_assembly(), + Token::Literal("xkcd".to_string()).to_code(), "xkcd".to_string() ); + assert_eq!(Token::Newline("\n".to_string()).to_code(), "\n".to_string()); assert_eq!( - Token::Newline("\n".to_string()).to_assembly(), - "\n".to_string() - ); - assert_eq!( - Token::Whitespace(" ".to_string()).to_assembly(), + Token::Whitespace(" ".to_string()).to_code(), " ".to_string() ); } diff --git a/src/lib/lib.rs b/hence/src/lib/lib.rs similarity index 100% rename from src/lib/lib.rs rename to hence/src/lib/lib.rs diff --git a/src/lib/parser.rs b/hence/src/lib/parser.rs similarity index 98% rename from src/lib/parser.rs rename to hence/src/lib/parser.rs index 01a2f16..ba9fead 100644 --- a/src/lib/parser.rs +++ b/hence/src/lib/parser.rs @@ -8,7 +8,7 @@ pub mod ast; pub fn parse(tokens: Vec) -> Result { let mut iter = tokens.iter().peekable(); - let mut body: Vec = Vec::new(); + let mut body: ast::Body = vec![]; while let Some(&token) = iter.peek() { match token { diff --git a/src/lib/parser/ast.rs b/hence/src/lib/parser/ast.rs similarity index 65% rename from src/lib/parser/ast.rs rename to hence/src/lib/parser/ast.rs index 0042917..7963d5a 100644 --- a/src/lib/parser/ast.rs +++ b/hence/src/lib/parser/ast.rs @@ -11,14 +11,14 @@ pub enum Node { MacroCall { name: String, args: Vec }, } -impl assembler::ToAssembly for Node { - fn to_assembly(&self) -> String { +impl assembler::ToCode for Node { + fn to_code(&self) -> String { match self { Node::Comment(x) => format!("; {x}"), Node::Label(x) => format!("{x}:"), Node::Call { name, arg } => { if let Some(a) = arg { - format!("{name} {arg}", arg = a.to_assembly()) + format!("{name} {arg}", arg = a.to_code()) } else { name.clone() } @@ -27,10 +27,7 @@ impl assembler::ToAssembly for Node { if args.is_empty() { format!(".{name}") } else { - format!( - ".{name} {}", - args.iter().map(|a| a.to_assembly()).join(", ") - ) + format!(".{name} {}", args.iter().map(|a| a.to_code()).join(", ")) } } } @@ -44,8 +41,8 @@ pub struct AST { pub body: Vec, } -impl assembler::ToAssembly for AST { - fn to_assembly(&self) -> String { - self.body.iter().map(|n| n.to_assembly()).join("\n") +impl assembler::ToCode for AST { + fn to_code(&self) -> String { + self.body.iter().map(|n| n.to_code()).join("\n") } } diff --git a/henceforth/Cargo.toml b/henceforth/Cargo.toml new file mode 100644 index 0000000..3b86fb3 --- /dev/null +++ b/henceforth/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "henceforth" +version = "0.1.0" +edition = "2021" +authors = ["Dominic Grimm "] +repository = "https://git.dergrimm.net/dergrimm/hence.git" + +[lib] +name = "henceforth" +path = "src/lib/lib.rs" + +[[bin]] +name = "henceforth" +path = "src/bin/main.rs" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +hence = { path = "../hence" } +clap = { version = "3.2.16", features = ["derive"] } +anyhow = { version = "1.0.62", features = ["backtrace"] } +itertools = "0.10.2" +num-parse = "0.1.2" +sailfish = "0.4.0" diff --git a/henceforth/examples/test.fth b/henceforth/examples/test.fth new file mode 100644 index 0000000..4e774ed --- /dev/null +++ b/henceforth/examples/test.fth @@ -0,0 +1,3 @@ +40 2 + drop drop +: test ( -- 42 ) 40 2 + ; +test . diff --git a/henceforth/src/bin/main.rs b/henceforth/src/bin/main.rs new file mode 100644 index 0000000..bd951d6 --- /dev/null +++ b/henceforth/src/bin/main.rs @@ -0,0 +1,74 @@ +use anyhow::Result; +use clap::{Parser, Subcommand}; +use std::fs; + +use henceforth::*; + +#[derive(Debug, Parser)] +#[clap(author, version, about, long_about = None)] +struct Cli { + #[clap(subcommand)] + commands: Commands, +} + +#[derive(Debug, Subcommand)] +enum Commands { + #[clap(about = "Lexes source code and outputs tokens")] + Lex { + #[clap(value_parser)] + src: String, + }, + #[clap(about = "Parses source code and outputs AST")] + Parse { + #[clap(value_parser)] + src: String, + }, + #[clap(about = "Compiles assembly from source code")] + Compile { + #[clap(value_parser)] + src: String, + #[clap(value_parser)] + out: Option, + #[clap(long, action)] + dump: bool, + }, +} + +fn main() -> Result<()> { + let args = Cli::parse(); + match args.commands { + Commands::Lex { src } => { + let source = fs::read_to_string(src)?; + let tokens = lexer::lex(source)?; + dbg!(tokens); + + Ok(()) + } + Commands::Parse { src } => { + let source = fs::read_to_string(src)?; + let tokens = lexer::lex(source)?; + let body = parser::parse(tokens)?; + dbg!(body); + + Ok(()) + } + Commands::Compile { src, out, dump } => { + let source = fs::read_to_string(&src)?; + let tokens = lexer::lex(source)?; + let ast = parser::parse(tokens)?; + let assembly = compiler::compile(ast)?; + + if let Some(x) = out { + fs::write(x, &assembly)?; + } + if dump { + println!("{}", assembly); + } + + Ok(()) + } + } +} + +#[cfg(test)] +mod tests {} diff --git a/henceforth/src/lib/compiler.rs b/henceforth/src/lib/compiler.rs new file mode 100644 index 0000000..4490e0a --- /dev/null +++ b/henceforth/src/lib/compiler.rs @@ -0,0 +1,113 @@ +use std::collections::HashMap; + +use anyhow::{bail, Result}; +use sailfish::TemplateOnce; + +use crate::parser; + +#[derive(Debug, Clone, PartialEq)] +pub enum Instruction { + Push(u16), + + Drop, + Add, + Sub, + Dot, + + Call(String), +} + +impl Instruction { + pub fn from_word(word: &str) -> Option { + match word { + "drop" => Some(Instruction::Drop), + "+" => Some(Instruction::Add), + "-" => Some(Instruction::Sub), + "." => Some(Instruction::Dot), + _ => None, + } + } +} + +#[derive(Debug)] +pub struct Word { + id: u16, + instructions: Vec, +} + +#[derive(Debug)] +pub struct Data { + instructions: Vec, + words: HashMap, +} + +impl Data { + fn new() -> Self { + Self { + instructions: vec![], + words: HashMap::new(), + } + } + + pub fn generate_instructions(&mut self, body: parser::ast::Body) -> Result<()> { + for node in body { + match node { + parser::ast::Node::Comment(_) => {} + parser::ast::Node::String { mode, string } => {} + parser::ast::Node::Number(x) => { + self.instructions.push(Instruction::Push(x as u16)); + } + parser::ast::Node::WordDefinition { + name, + stack: _, + body, + } => { + if Instruction::from_word(&name).is_some() { + bail!("Word already exists as compiler instruction: {}", name); + } else if self.words.contains_key(&name) { + bail!("Word already exists as user word definition: {}", name); + } + + let pre_instructions = self.instructions.clone(); + self.instructions.clear(); + self.generate_instructions(body)?; + let instructions = self.instructions.clone(); + self.instructions = pre_instructions; + + self.words.insert( + name, + Word { + id: self.words.len() as u16, + instructions, + }, + ); + } + parser::ast::Node::Word(x) => { + if let Some(ins) = Instruction::from_word(&x) { + self.instructions.push(ins); + } else if self.words.contains_key(&x) { + self.instructions.push(Instruction::Call(x)); + } else { + bail!("Word does not exist: {}", x); + } + } + } + } + + Ok(()) + } +} + +#[derive(TemplateOnce)] +#[template(path = "assembly.asm")] +pub struct Template { + pub data: Data, +} + +pub fn compile(ast: parser::ast::AST) -> Result { + let mut data = Data::new(); + data.generate_instructions(ast.body)?; + dbg!(&data); + + Ok(Template { data }.render_once()?) +} diff --git a/henceforth/src/lib/lexer.rs b/henceforth/src/lib/lexer.rs new file mode 100644 index 0000000..bb49c8f --- /dev/null +++ b/henceforth/src/lib/lexer.rs @@ -0,0 +1,74 @@ +use anyhow::Result; +use hence::assembler::ToCode; +use itertools::Itertools; + +#[derive(Debug)] +pub enum Token { + Newline(String), + Whitespace(String), + + ParenComment(String), + BackslashComment(String), + DoubleDashComment(String), + + StringLiteral { mode: String, string: String }, + Number(String), + Word(String), +} + +impl ToCode for Token { + fn to_code(&self) -> String { + match self { + Token::Newline(x) | Token::Whitespace(x) => x.clone(), + Token::ParenComment(x) => format!("( {})", x), + Token::BackslashComment(x) => format!("\\{}", x), + Token::DoubleDashComment(x) => format!("-- {}", x), + Token::StringLiteral { mode, string } => format!("{}\" {}\"", mode, string), + Token::Number(x) | Token::Word(x) => x.clone(), + } + } +} + +pub fn is_space(c: char) -> bool { + c.is_whitespace() || c == '\n' +} + +pub fn lex(source: String) -> Result> { + let mut chars = source.chars().peekable(); + let mut tokens: Vec = vec![]; + + while let Some(&c) = chars.peek() { + tokens.push(match c { + '\n' => Token::Newline(chars.peeking_take_while(|&c| c == '\n').collect()), + _ if c.is_whitespace() => { + Token::Whitespace(chars.peeking_take_while(|&c| c.is_whitespace()).collect()) + } + '\\' => { + chars.next(); + Token::BackslashComment(chars.peeking_take_while(|&c| c != '\n').collect()) + } + _ if c.is_numeric() => { + Token::Number(chars.peeking_take_while(|&c| !is_space(c)).collect()) + } + _ => { + let x: String = chars.peeking_take_while(|&c| !is_space(c)).collect(); + + match x.as_str() { + "(" => Token::ParenComment( + chars.by_ref().skip(1).take_while(|&c| c != ')').collect(), + ), + "--" => Token::DoubleDashComment( + chars.by_ref().take_while(|&c| c != '\n').collect(), + ), + _ if x.ends_with('"') => Token::StringLiteral { + mode: x.chars().take(x.len() - 1).collect(), + string: chars.by_ref().skip(1).take_while(|&c| c != '"').collect(), + }, + _ => Token::Word(x), + } + } + }); + } + + Ok(tokens) +} diff --git a/henceforth/src/lib/lib.rs b/henceforth/src/lib/lib.rs new file mode 100644 index 0000000..ac7a00d --- /dev/null +++ b/henceforth/src/lib/lib.rs @@ -0,0 +1,3 @@ +pub mod compiler; +pub mod lexer; +pub mod parser; diff --git a/henceforth/src/lib/parser.rs b/henceforth/src/lib/parser.rs new file mode 100644 index 0000000..120fb1f --- /dev/null +++ b/henceforth/src/lib/parser.rs @@ -0,0 +1,107 @@ +use anyhow::{bail, Result}; +use num_parse; + +use crate::lexer; + +pub mod ast; + +fn process_raw_stack_result(s: Option<&str>) -> Vec { + match s { + Some(x) if !x.trim().is_empty() => { + x.split_whitespace().map(|x| x.trim().to_string()).collect() + } + _ => vec![], + } +} + +pub fn parse_stack_result(s: String) -> ast::StackResult { + let mut splitter = s.splitn(2, "--"); + + ast::StackResult { + before: process_raw_stack_result(splitter.next()), + after: process_raw_stack_result(splitter.next()), + } +} + +pub fn parse(tokens: Vec) -> Result { + let mut iter = tokens.into_iter().peekable(); + let mut body: ast::Body = vec![]; + + while let Some(token) = iter.next() { + match token { + lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {} + lexer::Token::ParenComment(x) + | lexer::Token::BackslashComment(x) + | lexer::Token::DoubleDashComment(x) => { + body.push(ast::Node::Comment(x.trim().to_string())); + } + lexer::Token::StringLiteral { mode, string } => { + body.push(ast::Node::String { mode, string }); + } + lexer::Token::Number(x) => match num_parse::parse_int::(x.as_str()) { + Some(n) => { + body.push(ast::Node::Number(n)); + } + None => bail!("Invalid number: {}", x), + }, + lexer::Token::Word(x) => match x.as_str() { + ":" => { + let mut depth: usize = 1; + let mut content = iter + .by_ref() + .take_while(|t| match t { + lexer::Token::Word(x) => match x.as_str() { + ":" => { + depth += 1; + true + } + ";" => { + depth -= 1; + depth != 0 + } + _ => true, + }, + _ => true, + }) + .collect::>() + .into_iter(); + if depth != 0 { + bail!("Unbalanced word definitions"); + } + + let name = match content.find(|t| { + !matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_)) + }) { + Some(t) => match t { + lexer::Token::Word(x) => x.clone(), + _ => bail!("Word definition name must be a word itself: {:?}", t), + }, + None => bail!("Word definition can not be empty"), + }; + let stack = match content.find(|t| { + !matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_)) + }) { + Some(t) => match t { + lexer::Token::ParenComment(x) + | lexer::Token::BackslashComment(x) + | lexer::Token::DoubleDashComment(x) => Some(parse_stack_result(x)), + _ => None, + }, + None => None, + }; + + body.push(ast::Node::WordDefinition { + name, + stack, + body: parse(content.collect())?.body, + }); + } + _ => { + body.push(ast::Node::Word(x)); + } + }, + } + } + + Ok(ast::AST { body }) +} diff --git a/henceforth/src/lib/parser/ast.rs b/henceforth/src/lib/parser/ast.rs new file mode 100644 index 0000000..cd82756 --- /dev/null +++ b/henceforth/src/lib/parser/ast.rs @@ -0,0 +1,75 @@ +use hence::assembler::ToCode; +use itertools::Itertools; + +#[derive(Debug)] +pub struct StackResult { + pub before: Vec, + pub after: Vec, +} + +impl ToCode for StackResult { + fn to_code(&self) -> String { + format!( + "{}--{}", + if self.before.is_empty() { + "".to_string() + } else { + format!("{} ", self.before.join(" ")) + }, + if self.after.is_empty() { + "".to_string() + } else { + format!("{} ", self.after.join(" ")) + } + ) + } +} + +#[derive(Debug)] +pub enum Node { + Comment(String), + String { + mode: String, + string: String, + }, + Number(i32), + WordDefinition { + name: String, + stack: Option, + body: Body, + }, + Word(String), +} + +impl ToCode for Node { + fn to_code(&self) -> String { + match self { + Node::Comment(x) => format!("\\ {}", x), + Node::String { mode, string } => format!("{}\" {}\"", mode, string), + Node::Number(x) => x.to_string(), + Node::WordDefinition { name, stack, body } => format!( + ": {}{} {} ;", + name, + match stack { + Some(x) => format!(" {}", x.to_code()), + None => "".to_string(), + }, + body.iter().map(|x| x.to_code()).join(" ") + ), + Node::Word(x) => x.clone(), + } + } +} + +pub type Body = Vec; + +#[derive(Debug)] +pub struct AST { + pub body: Body, +} + +impl ToCode for AST { + fn to_code(&self) -> String { + self.body.iter().map(|x| x.to_code()).join(" ") + } +} diff --git a/henceforth/templates/assembly.asm b/henceforth/templates/assembly.asm new file mode 100644 index 0000000..e23a194 --- /dev/null +++ b/henceforth/templates/assembly.asm @@ -0,0 +1,10 @@ +.include "$lib/core.asm" +.include "$lib/std.asm" +.include "$lib/main.asm" + +.jump_main + +data: + +.main +.std_stop diff --git a/lib/std.asm b/lib/std.asm deleted file mode 100644 index ed7d8d1..0000000 --- a/lib/std.asm +++ /dev/null @@ -1,62 +0,0 @@ -; hence standard lib - -.requires "$lib/core.asm" - -std: - .macro std_tclr - ts NULL - .endmacro - - .macro std_rclr, std_rclr_arg_0_reg - ts NULL - tlr std_rclr_arg_0_reg - .endmacro - - .macro std_alu, std_alu_arg_0_op - ts std_alu_arg_0_op - alu - .endmacro - - .macro std_get, std_get_arg_0_addr - ts std_get_arg_0_addr - get - .endmacro - - .macro std_set, std_set_arg_0_addr - ts std_set_arg_0_addr - set - .endmacro - - .macro std_cp, std_cp_arg_0_from, std_cp_arg_1_to - tsr std_cp_arg_0_from - tlr std_cp_arg_1_to - .endmacro - - .macro std_mv, std_mv_arg_0_from, std_cp_arg_1_to - tsr std_cp_arg_0_from - tlr std_cp_arg_1_to - .std_rclr std_cp_arg_1_to - .endmacro - - .macro std_rset, std_init_arg_0_reg, std_init_arg_1_val - ts std_init_arg_1_val - tlr std_init_arg_0_reg - .endmacro - - .macro std_jump, std_jump_arg_0_label - .std_rset CORE_REG_PC, std_jump_arg_0_label - .endmacro - - .macro std_cond_jump, std_cond_jump_arg_0_label - ts std_cond_jump_arg_0_label - tlrc CORE_REG_PC - .endmacro - - .macro std_stop - .std_rset CORE_REG_PC, 0xffff - .endmacro - - .macro std_inc, std_inc_arg_0_reg - .std_rset std_inc_arg_0_reg, 1 - .std_alu CORE_ALU_ADD - .endmacro