Update after a long time!

This commit is contained in:
Dominic Grimm 2023-03-23 17:59:10 +01:00
parent 2bc7ee5f42
commit d6f7a51e11
No known key found for this signature in database
GPG key ID: 12EFFCAEA9E620BF
32 changed files with 3005 additions and 117 deletions

2
henceforth/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
examples/*.asm
examples/*.bin

17
henceforth/Cargo.toml Normal file
View file

@ -0,0 +1,17 @@
[package]
name = "henceforth"
version = "0.1.0"
edition = "2021"
authors = ["Dominic Grimm <dominic@dergrimm.net>"]
repository = "https://git.dergrimm.net/dergrimm/hence.git"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
hence = { path = "../hence" }
clap = { version = "3.2.16", features = ["derive"] }
anyhow = { version = "1.0.62", features = ["backtrace"] }
itertools = "0.10.2"
parse_int = "0.6.0"
indexmap = "1.9.1"
lazy_static = "1.4.0"

259
henceforth/src/compiler.rs Normal file
View file

@ -0,0 +1,259 @@
use anyhow::{bail, Context, Result};
use indexmap::IndexSet;
use itertools::Itertools;
use lazy_static::lazy_static;
use std::collections::HashMap;
use crate::parser;
mod instruction;
pub use instruction::Instruction;
pub const TEMPLATE_ASM: &str = include_str!("compiler/templates/default.asm");
lazy_static! {
#[derive(Debug)]
pub static ref TEMPLATE: hence::parser::ast::Body = hence::parser::parse(
hence::lexer::lex(TEMPLATE_ASM).unwrap()
)
.unwrap()
.body;
}
pub trait Compilable<T, U> {
fn compile(&self, data: &T) -> Result<U>;
}
#[derive(Debug)]
pub struct Word {
pub id: usize,
pub instructions: Vec<Instruction>,
pub times_used: usize,
}
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct Condition {
pub if_instructions: Vec<Instruction>,
pub else_instructions: Vec<Instruction>,
}
#[derive(Debug)]
pub enum CallableId {
Word(String),
Condition(usize),
}
#[derive(Debug)]
pub struct Compiler {
pub strings: IndexSet<String>,
pub words: HashMap<String, Word>,
pub conditions: Vec<Condition>,
}
impl Compiler {
pub fn default() -> Self {
Self {
// words: HashMap::new(),
// conditions: IndexSet::new(),
strings: IndexSet::new(),
words: HashMap::new(),
conditions: vec![],
}
}
pub fn generate_instructions(
&mut self,
body: parser::ast::Body,
optimize: bool,
) -> Result<Vec<Instruction>> {
let mut instructions: Vec<Instruction> = vec![];
let mut iter = body.into_iter().peekable();
while let Some(node) = iter.next() {
match node {
_ if optimize && iter.next_if_eq(&node).is_some() => {
let count = iter.by_ref().peeking_take_while(|n| *n == node).count() + 2;
instructions.push(Instruction::Multiple {
instruction: Box::new(
self.generate_instructions(vec![node], optimize)?
.into_iter()
.next()
.unwrap(),
),
count,
});
}
parser::ast::Node::Comment(_) => {}
parser::ast::Node::String { mode, string } => {
instructions.push(match mode.as_str() {
"." => {
let id = self.strings.insert_full(string).0;
Instruction::StringPrint(id)
}
"r" => {
let id = self.strings.insert_full(string).0;
Instruction::StringReference(id)
}
"asm" => Instruction::AsmQuote(string),
_ => bail!("Unknown string mode: {}", mode),
});
}
parser::ast::Node::Number(x) => {
instructions.push(instruction::Instruction::Push(x));
}
parser::ast::Node::WordDefinition {
name,
stack: _,
body,
} => {
if Instruction::from_word(&name).is_some() {
bail!("Word already exists as compiler instruction: {}", name);
} else if self.words.contains_key(&name) {
bail!("Word already exists as user word definition: {}", name);
}
let instructions = self.generate_instructions(body, optimize)?;
self.words.insert(
name.to_string(),
Word {
id: self.words.len(),
instructions,
times_used: 0,
},
);
}
parser::ast::Node::Condition { if_body, else_body } => {
// let if_instructions = self.generate_instructions(if_body, optimize)?;
// let else_instructions = self.generate_instructions(else_body, optimize)?;
// let id = self.conditions.len();
// let origin = self.callable_graph.add_node(CallableId::Condition(id));
// self.conditions.push(Condition {
// if_instructions: if_instructions.clone(),
// else_instructions: else_instructions.clone(),
// callable_graph_node: origin,
// });
// instructions.push(Instruction::Condition(id));
// self.add_graph_edges(origin, if_instructions)?;
// self.add_graph_edges(origin, else_instructions)?;
// dbg!(&self);
}
parser::ast::Node::Word(x) => {
dbg!(&self.words, &x);
if let Some(ins) = Instruction::from_word(&x) {
instructions.push(ins);
} else if let Some(w) = self.words.get_mut(&x) {
w.times_used += 1;
instructions.push(Instruction::Call(x));
} else {
bail!("Word does not exist: {}", x);
}
}
}
}
Ok(instructions)
}
pub fn embed(&self, body: hence::parser::ast::Body) -> Result<hence::parser::ast::Body> {
let mut x = TEMPLATE.to_vec();
// strings
for (id, s) in self.strings.iter().enumerate() {
x.extend([
hence::parser::ast::Node::Label(format!("data_strings_{}", id)),
hence::parser::ast::Node::MacroCall {
name: "bytes".to_string(),
args: vec![hence::arg::Arg::String(s.to_string())],
},
hence::parser::ast::Node::Label(format!("data_strings_end_{}", id)),
]);
}
// conditions
for (id, c) in self.conditions.iter().enumerate() {
x.push(hence::parser::ast::Node::Label(format!(
"conditions_if_{}",
id
)));
x.extend(
c.if_instructions
.iter()
.map(|ins| ins.compile(self))
.collect::<Result<Vec<_>>>()?
.into_iter()
.flatten(),
);
x.push(hence::parser::ast::Node::Label(format!(
"conditions_else_{}",
id
)));
x.extend(
c.else_instructions
.iter()
.map(|ins| ins.compile(self))
.collect::<Result<Vec<_>>>()?
.into_iter()
.flatten(),
);
}
// words
for (name, word) in &self
.words
.iter()
.filter(|(_, w)| w.times_used > 1)
.sorted_by(|a, b| Ord::cmp(&a.1.id, &b.1.id))
.collect::<Vec<_>>()
{
x.extend(vec![
hence::parser::ast::Node::Label(format!("words_{}", word.id)),
hence::parser::ast::Node::Comment(format!("word: \"{}\"", name)),
]);
x.extend(
word.instructions
.iter()
.map(|ins| ins.compile(self))
.collect::<Result<Vec<hence::parser::ast::Body>>>()
.unwrap()
.into_iter()
.flatten(),
);
x.push(hence::parser::ast::Node::MacroCall {
name: "return_call_stack_jump".to_string(),
args: vec![],
});
}
x.extend([
hence::parser::ast::Node::Label("main".to_string()),
hence::parser::ast::Node::MacroCall {
name: "main".to_string(),
args: vec![hence::arg::Arg::Variable("main".to_string())],
},
]);
x.extend(body);
x.push(hence::parser::ast::Node::MacroCall {
name: "std_stop".to_string(),
args: vec![],
});
Ok(x)
}
}
pub fn compile(ast: parser::ast::AST, optimize: bool) -> Result<hence::parser::ast::AST> {
let mut data = Compiler::default();
let instructions = data.generate_instructions(ast.body, optimize)?;
Ok(hence::parser::ast::AST {
body: data.embed(
instructions
.iter()
.map(|ins| ins.compile(&data))
.collect::<Result<Vec<hence::parser::ast::Body>>>()?
.into_iter()
.flatten()
.collect(),
)?,
})
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,67 @@
.include "$lib/core.asm"
.include "$lib/std.asm"
.include "$lib/main.asm"
.define MEM_LOOP_I, CORE_MEM_MEM
.define MEM_LOOP_J, (MEM_LOOP_I + 1)
.define MEM_CALL_STACK_LEN, 16
.define MEM_CALL_STACK_PTR, (MEM_LOOP_J + 1)
.define MEM_CALL_STACK_END, (MEM_CALL_STACK_PTR + MEM_CALL_STACK_LEN)
.define MEM_ALLOC_PTR, MEM_CALL_STACK_END
.macro stack_transfer_alu
.std_ld
tlr CORE_REG_B
.std_ld
tlr CORE_REG_A
.endmacro
.macro call_stack_jump, call_stack_jump_arg_0_label, call_stack_jump_arg_1_offset
.std_rset CORE_REG_C, call_stack_jump_arg_0_label
.std_rset CORE_REG_D, (call_stack_jump_arg_1_offset + 7)
ts call_stack_jump
tlr CORE_REG_PC
.endmacro
.macro return_call_stack_jump
.std_jump return_call_stack_jump
.endmacro
.std_rset CORE_REG_A, MEM_CALL_STACK_PTR
.std_set MEM_CALL_STACK_PTR
.std_rset CORE_REG_A, (MEM_ALLOC_PTR + 1)
.std_set MEM_ALLOC_PTR
.jump_main
call_stack_jump:
.std_get MEM_CALL_STACK_PTR
tlr CORE_REG_A
.std_rset CORE_REG_B, 1
.std_alu CORE_ALU_ADD
tlr CORE_REG_A
tlr CORE_REG_B
.std_set MEM_CALL_STACK_PTR
tsr CORE_REG_D
tlr CORE_REG_A
tsr CORE_REG_B
set
tsr CORE_REG_C
tlr CORE_REG_PC
return_call_stack_jump:
.std_get MEM_CALL_STACK_PTR
tlr CORE_REG_A
tlr CORE_REG_C
.std_rset CORE_REG_B, 1
.std_alu CORE_ALU_SUB
tlr CORE_REG_A
.std_set MEM_CALL_STACK_PTR
tsr CORE_REG_C
get
tlr CORE_REG_PC

84
henceforth/src/lexer.rs Normal file
View file

@ -0,0 +1,84 @@
use anyhow::Result;
use hence::assembler::ToCode;
use itertools::Itertools;
#[derive(Debug)]
pub enum Token {
Newline(usize),
Whitespace(usize),
ParenComment(String),
BackslashComment(String),
DoubleDashComment(String),
StringLiteral { mode: String, string: String },
Number(String),
Word(String),
}
impl ToCode for Token {
fn to_code(&self) -> String {
match self {
Token::Newline(x) => ["\n"].into_iter().cycle().take(*x).join(""),
Token::Whitespace(x) => [" "].into_iter().cycle().take(*x).join(""),
Token::ParenComment(x) => format!("( {})", x),
Token::BackslashComment(x) => format!("\\{}", x),
Token::DoubleDashComment(x) => format!("-- {}", x),
Token::StringLiteral { mode, string } => format!("{}\" {}\"", mode, string),
Token::Number(x) | Token::Word(x) => x.clone(),
}
}
}
pub fn is_space(c: char) -> bool {
c.is_whitespace() || c == '\n'
}
pub fn lex(source: &str) -> Result<Vec<Token>> {
let mut chars = source.chars().peekable();
let mut tokens: Vec<Token> = vec![];
while let Some(c) = chars.peek() {
tokens.push(match c {
'\n' => Token::Newline(chars.peeking_take_while(|&c| c == '\n').count()),
_ if c.is_whitespace() => {
Token::Whitespace(chars.peeking_take_while(|&c| c.is_whitespace()).count())
}
'\\' => Token::BackslashComment(chars.peeking_take_while(|&c| c != '\n').collect()),
_ if c.is_numeric() => {
Token::Number(chars.peeking_take_while(|&c| !is_space(c)).collect())
}
_ => {
let x: String = chars.peeking_take_while(|&c| !is_space(c)).collect();
let mut iter = x.chars();
match x.as_str() {
"(" => Token::ParenComment(
chars.by_ref().skip(1).take_while(|&c| c != ')').collect(),
),
"--" => Token::DoubleDashComment(
chars.by_ref().take_while(|&c| c != '\n').collect(),
),
_ if x.ends_with('"') => Token::StringLiteral {
mode: x.chars().take(x.len() - 1).collect(),
string: chars.by_ref().skip(1).take_while(|&c| c != '"').collect(),
},
_ if iter.next() == Some('-') => {
if let Some(c) = iter.next() {
if c.is_numeric() {
Token::Number(x)
} else {
Token::Word(x)
}
} else {
Token::Word(x)
}
}
_ => Token::Word(x),
}
}
});
}
Ok(tokens)
}

3
henceforth/src/lib.rs Normal file
View file

@ -0,0 +1,3 @@
pub mod compiler;
pub mod lexer;
pub mod parser;

80
henceforth/src/main.rs Normal file
View file

@ -0,0 +1,80 @@
use anyhow::Result;
use clap::{Parser, Subcommand};
use hence::assembler::ToCode;
use std::fs;
use henceforth::*;
#[derive(Debug, Parser)]
#[clap(author, version, about, long_about = None)]
struct Cli {
#[clap(subcommand)]
commands: Commands,
}
#[derive(Debug, Subcommand)]
enum Commands {
#[clap(about = "Lexes source code and outputs tokens")]
Lex {
#[clap(value_parser)]
src: String,
},
#[clap(about = "Parses source code and outputs AST")]
Parse {
#[clap(value_parser)]
src: String,
},
#[clap(about = "Compiles assembly from source code")]
Compile {
#[clap(value_parser)]
src: String,
#[clap(value_parser)]
out: Option<String>,
#[clap(short, long, action)]
optimize: Option<bool>,
#[clap(long, action)]
dump: bool,
},
}
fn main() -> Result<()> {
let args = Cli::parse();
match args.commands {
Commands::Lex { src } => {
let source = fs::read_to_string(src)?;
let tokens = lexer::lex(&source)?;
println!("{:#?}", tokens);
Ok(())
}
Commands::Parse { src } => {
let source = fs::read_to_string(src)?;
let tokens = lexer::lex(&source)?;
let body = parser::parse(tokens)?;
println!("{:#?}", body);
Ok(())
}
Commands::Compile {
src,
out,
optimize,
dump,
} => {
let source = fs::read_to_string(&src)?;
let tokens = lexer::lex(&source)?;
let ast = parser::parse(tokens)?;
let ast = compiler::compile(ast, optimize.unwrap_or(true))?;
let assembly = format!("{}\n", ast.to_code());
if dump {
print!("{}", assembly);
}
if let Some(x) = out {
fs::write(x, &assembly)?;
}
Ok(())
}
}
}

168
henceforth/src/parser.rs Normal file
View file

@ -0,0 +1,168 @@
use anyhow::{bail, Result};
use parse_int;
use crate::lexer;
pub mod ast;
pub fn parse_stack_state(s: Option<&str>) -> Vec<String> {
match s {
Some(x) if !x.trim().is_empty() => {
x.split_whitespace().map(|x| x.trim().to_string()).collect()
}
_ => vec![],
}
}
pub fn parse_stack_result(s: &str) -> ast::StackResult {
let mut splitter = s.splitn(2, "--");
ast::StackResult {
before: parse_stack_state(splitter.next()),
after: parse_stack_state(splitter.next()),
}
}
pub fn parse(tokens: Vec<lexer::Token>) -> Result<ast::AST> {
let mut iter = tokens.into_iter().peekable();
let mut body: ast::Body = vec![];
while let Some(token) = iter.next() {
match token {
lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {}
lexer::Token::ParenComment(x)
| lexer::Token::BackslashComment(x)
| lexer::Token::DoubleDashComment(x) => {
body.push(ast::Node::Comment(x.trim().to_string()));
}
lexer::Token::StringLiteral { mode, string } => {
body.push(ast::Node::String { mode, string });
}
lexer::Token::Number(x) => body.push(ast::Node::Number(parse_int::parse(&x)?)),
lexer::Token::Word(x) => match x.as_str() {
":" => {
let mut depth: usize = 1;
let mut content = iter
.by_ref()
.take_while(|t| match t {
lexer::Token::Word(x) => match x.as_str() {
":" => {
depth += 1;
true
}
";" => {
depth -= 1;
depth != 0
}
_ => true,
},
_ => true,
})
.collect::<Vec<_>>()
.into_iter()
.peekable();
if depth != 0 {
bail!("Unbalanced word definitions");
}
let name = match content.find(|t| {
!matches!(t, lexer::Token::Newline(_) | lexer::Token::Whitespace(_))
}) {
Some(t) => match t {
lexer::Token::Word(x) => x.clone(),
_ => bail!("Word definition name must be a word itself: {:?}", t),
},
None => bail!("Word definition can not be empty"),
};
let stack = loop {
if let Some(t) = content.peek() {
match t {
lexer::Token::Newline(_) | lexer::Token::Whitespace(_) => {
content.next();
}
lexer::Token::ParenComment(x)
| lexer::Token::BackslashComment(x)
| lexer::Token::DoubleDashComment(x) => {
let y = &x.to_string();
content.next();
break Some(parse_stack_result(y));
}
_ => break None,
}
} else {
break None;
}
};
body.push(ast::Node::WordDefinition {
name,
stack,
body: parse(content.collect())?.body,
});
}
"if" => {
let mut depth: usize = 1;
let mut else_used = false;
let if_toks: Vec<_> = iter
.by_ref()
.take_while(|t| match t {
lexer::Token::Word(x) => match x.as_str() {
"if" => {
depth += 1;
true
}
"else" => {
if depth == 1 {
else_used = true;
false
} else {
true
}
}
"then" => {
depth -= 1;
depth != 0
}
_ => true,
},
_ => true,
})
.collect();
let else_toks: Vec<_> = if else_used {
iter.by_ref()
.take_while(|t| match t {
lexer::Token::Word(x) => match x.as_str() {
"if" => {
depth += 1;
true
}
"then" => {
depth -= 1;
depth != 0
}
_ => true,
},
_ => true,
})
.collect()
} else {
vec![]
};
if depth != 0 {
bail!("Unbalanced conditions");
}
body.push(ast::Node::Condition {
if_body: parse(if_toks)?.body,
else_body: parse(else_toks)?.body,
});
}
_ => {
body.push(ast::Node::Word(x));
}
},
}
}
Ok(ast::AST { body })
}

View file

@ -0,0 +1,90 @@
use hence::assembler::ToCode;
use itertools::Itertools;
#[derive(Debug, PartialEq, Eq, Clone)]
pub struct StackResult {
pub before: Vec<String>,
pub after: Vec<String>,
}
impl ToCode for StackResult {
fn to_code(&self) -> String {
format!(
"{}--{}",
if self.before.is_empty() {
"".to_string()
} else {
format!("{} ", self.before.join(" "))
},
if self.after.is_empty() {
"".to_string()
} else {
format!("{} ", self.after.join(" "))
}
)
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum Node {
Comment(String),
String {
mode: String,
string: String,
},
Number(i32),
WordDefinition {
name: String,
stack: Option<StackResult>,
body: Body,
},
Condition {
if_body: Body,
else_body: Body,
},
Word(String),
}
impl ToCode for Node {
fn to_code(&self) -> String {
match self {
Node::Comment(x) => format!("\\ {}", x),
Node::String { mode, string } => format!("{}\" {}\"", mode, string),
Node::Number(x) => x.to_string(),
Node::WordDefinition { name, stack, body } => format!(
": {}{} {} ;",
name,
match stack {
Some(x) => format!(" {}", x.to_code()),
None => "".to_string(),
},
body.iter().map(|x| x.to_code()).join(" ")
),
Node::Condition { if_body, else_body } => {
if else_body.is_empty() {
format!("if {} then", if_body.iter().map(|x| x.to_code()).join(" "))
} else {
format!(
"if {} else {} then",
if_body.iter().map(|x| x.to_code()).join(" "),
else_body.iter().map(|x| x.to_code()).join(" ")
)
}
}
Node::Word(x) => x.to_owned(),
}
}
}
pub type Body = Vec<Node>;
#[derive(Debug)]
pub struct AST {
pub body: Body,
}
impl ToCode for AST {
fn to_code(&self) -> String {
self.body.iter().map(|x| x.to_code()).join(" ")
}
}