hence/henceforth/src/lib/compiler.rs

350 lines
13 KiB
Rust

use anyhow::{bail, Result};
use indexmap::IndexSet;
use itertools::Itertools;
use std::collections::HashMap;
use crate::parser;
mod instruction;
pub use crate::compiler::instruction::Instruction;
pub trait Compilable<T, U> {
fn compile(&self, data: &T) -> Result<U>;
}
#[derive(Debug)]
pub struct Word {
pub id: usize,
pub instructions: Vec<Instruction>,
pub times_used: usize,
}
#[derive(Debug)]
pub struct Data {
pub words: HashMap<String, Word>,
pub strings: IndexSet<String>,
}
impl Data {
pub fn default() -> Self {
Self {
words: HashMap::new(),
strings: IndexSet::new(),
}
}
pub fn generate_instructions(&mut self, body: parser::ast::Body, optimize: bool) -> Result<Vec<Instruction>> {
let mut instructions: Vec<Instruction> = vec![];
let mut iter = body.into_iter().peekable();
while let Some(node) = iter.next() {
match node {
_ if optimize && iter.next_if_eq(&node).is_some() => {
let count = iter.by_ref().peeking_take_while(|n| *n == node).count() + 2;
instructions.push(Instruction::Multiple {
instruction: Box::new(
self.generate_instructions(vec![node], optimize)?
.into_iter()
.next()
.unwrap(),
),
count,
});
}
parser::ast::Node::Comment(_) => {}
parser::ast::Node::String { mode, string } => {
instructions.push(match mode.as_str() {
"." => {
let id = self.strings.insert_full(string).0;
Instruction::StringPrint(id)
}
"r" => {
let id = self.strings.insert_full(string).0;
Instruction::StringReference(id)
}
"asm" => Instruction::AsmQuote(string),
_ => bail!("Unknown string mode: {}", mode),
});
}
parser::ast::Node::Number(x) => {
instructions.push(instruction::Instruction::Push(x as u16));
}
parser::ast::Node::WordDefinition {
name,
stack: _,
body,
} => {
if Instruction::from_word(&name).is_some() {
bail!("Word already exists as compiler instruction: {}", name);
} else if self.words.contains_key(&name) {
bail!("Word already exists as user word definition: {}", name);
}
let ins = self.generate_instructions(body, optimize)?;
self.words.insert(
name,
Word {
id: self.words.len(),
instructions: ins,
times_used: 0,
},
);
}
parser::ast::Node::Word(x) => {
if let Some(ins) = Instruction::from_word(&x) {
instructions.push(ins);
} else if let Some(w) = self.words.get_mut(&x) {
instructions.push(Instruction::Call(x));
w.times_used += 1;
} else {
bail!("Word does not exist: {}", x);
}
}
}
}
Ok(instructions)
}
pub fn embed(&self, body: hence::parser::ast::Body) -> hence::parser::ast::Body {
let mut x = vec![
// includes
hence::parser::ast::Node::MacroCall {
name: "include".to_string(),
args: vec![hence::arg::Arg::String("$lib/core.asm".to_string())],
},
hence::parser::ast::Node::MacroCall {
name: "include".to_string(),
args: vec![hence::arg::Arg::String("$lib/std.asm".to_string())],
},
hence::parser::ast::Node::MacroCall {
name: "include".to_string(),
args: vec![hence::arg::Arg::String("$lib/main.asm".to_string())],
},
// constants
hence::parser::ast::Node::MacroCall {
name: "define".to_string(),
args: vec![
hence::arg::Arg::Variable("MEM_CALL_STACK".to_string()),
hence::arg::Arg::Variable("CORE_MEM_MEM".to_string()),
],
},
hence::parser::ast::Node::MacroCall {
name: "define".to_string(),
args: vec![
hence::arg::Arg::Variable("MEM_LOOP_I".to_string()),
hence::arg::Arg::BinaryExpression {
left: Box::new(hence::arg::Arg::Variable("CORE_MEM_MEM".to_string())),
right: Box::new(hence::arg::Arg::Number(16)),
op: hence::arg::BinaryExpressionOperator::Add,
},
],
},
hence::parser::ast::Node::MacroCall {
name: "define".to_string(),
args: vec![
hence::arg::Arg::Variable("MEM_LOOP_J".to_string()),
hence::arg::Arg::BinaryExpression {
left: Box::new(hence::arg::Arg::Variable("MEM_LOOP_I".to_string())),
right: Box::new(hence::arg::Arg::Number(1)),
op: hence::arg::BinaryExpressionOperator::Add,
},
],
},
hence::parser::ast::Node::MacroCall {
name: "define".to_string(),
args: vec![
hence::arg::Arg::Variable("MEM_ALLOC_PTR".to_string()),
hence::arg::Arg::BinaryExpression {
left: Box::new(hence::arg::Arg::Variable("MEM_LOOP_J".to_string())),
right: Box::new(hence::arg::Arg::Number(1)),
op: hence::arg::BinaryExpressionOperator::Add,
},
],
},
// macros
// stack_transfer_alu
hence::parser::ast::Node::MacroCall {
name: "macro".to_string(),
args: vec![hence::arg::Arg::Variable("stack_transfer_alu".to_string())],
},
hence::parser::ast::Node::MacroCall {
name: "std_ld".to_string(),
args: vec![],
},
hence::parser::ast::Node::Call {
name: "tlr".to_string(),
arg: Some(hence::arg::Arg::Variable("CORE_REG_B".to_string())),
},
hence::parser::ast::Node::MacroCall {
name: "std_ld".to_string(),
args: vec![],
},
hence::parser::ast::Node::Call {
name: "tlr".to_string(),
arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())),
},
hence::parser::ast::Node::MacroCall {
name: "endmacro".to_string(),
args: vec![],
},
// call_word
hence::parser::ast::Node::MacroCall {
name: "macro".to_string(),
args: vec![
hence::arg::Arg::Variable("call_word".to_string()),
hence::arg::Arg::Variable("call_word_arg_0_label".to_string()),
],
},
hence::parser::ast::Node::Call {
name: "ts".to_string(),
arg: Some(hence::arg::Arg::BinaryExpression {
left: Box::new(hence::arg::Arg::Variable("OFFSET".to_string())),
right: Box::new(hence::arg::Arg::Number(3 + 3 + 3 + 1 + 3 + 1)),
op: hence::arg::BinaryExpressionOperator::Add,
}),
},
hence::parser::ast::Node::Call {
name: "tlr".to_string(),
arg: Some(hence::arg::Arg::Variable("CORE_REG_A".to_string())),
},
hence::parser::ast::Node::Call {
name: "ts".to_string(),
arg: Some(hence::arg::Arg::Variable("MEM_CALL_STACK".to_string())),
},
hence::parser::ast::Node::Call {
name: "set".to_string(),
arg: None,
},
hence::parser::ast::Node::Call {
name: "ts".to_string(),
arg: Some(hence::arg::Arg::Variable(
"call_word_arg_0_label".to_string(),
)),
},
hence::parser::ast::Node::Call {
// tlr CORE_REG_PC -> tlr 0 -> tlr
name: "tlr".to_string(),
arg: Some(hence::arg::Arg::Variable("CORE_REG_PC".to_string())),
},
hence::parser::ast::Node::MacroCall {
name: "endmacro".to_string(),
args: vec![],
},
// return_word
hence::parser::ast::Node::MacroCall {
name: "macro".to_string(),
args: vec![hence::arg::Arg::Variable("return_word".to_string())],
},
hence::parser::ast::Node::MacroCall {
name: "std_get".to_string(),
args: vec![hence::arg::Arg::Variable("MEM_CALL_STACK".to_string())],
},
hence::parser::ast::Node::Call {
name: "tlr".to_string(),
arg: Some(hence::arg::Arg::Variable("CORE_REG_PC".to_string())),
},
hence::parser::ast::Node::MacroCall {
name: "endmacro".to_string(),
args: vec![],
},
// setup
hence::parser::ast::Node::MacroCall {
name: "std_rset".to_string(),
args: vec![
hence::arg::Arg::Variable("CORE_REG_A".to_string()),
hence::arg::Arg::BinaryExpression {
left: Box::new(hence::arg::Arg::Variable("MEM_ALLOC_PTR".to_string())),
right: Box::new(hence::arg::Arg::Number(1)),
op: hence::arg::BinaryExpressionOperator::Add,
},
],
},
hence::parser::ast::Node::MacroCall {
name: "std_set".to_string(),
args: vec![hence::arg::Arg::Variable("MEM_ALLOC_PTR".to_string())],
},
// jump_main
hence::parser::ast::Node::MacroCall {
name: "jump_main".to_string(),
args: vec![],
},
// data
hence::parser::ast::Node::Label("data".to_string()),
hence::parser::ast::Node::Label("data_strings".to_string()),
];
for (id, s) in self.strings.iter().enumerate() {
x.extend([
hence::parser::ast::Node::Label(format!("data_strings_{}", id)),
hence::parser::ast::Node::MacroCall {
name: "bytes".to_string(),
args: vec![hence::arg::Arg::String(s.to_string())],
},
hence::parser::ast::Node::Label(format!("data_strings_end_{}", id)),
]);
}
// words
x.push(hence::parser::ast::Node::Label("words".to_string()));
for (name, word) in &self
.words
.iter()
.filter(|(_, w)| w.times_used > 1)
.sorted_by(|a, b| Ord::cmp(&a.1.id, &b.1.id))
.collect::<Vec<_>>()
{
x.extend(vec![
hence::parser::ast::Node::Label(format!("words_{}", word.id)),
hence::parser::ast::Node::Comment(format!("word: \"{}\"", name)),
]);
x.extend(
word.instructions
.iter()
.map(|ins| ins.compile(self))
.collect::<Result<Vec<hence::parser::ast::Body>>>()
.unwrap()
.into_iter()
.flatten(),
);
x.push(hence::parser::ast::Node::MacroCall {
name: "return_word".to_string(),
args: vec![],
});
}
x.extend([
hence::parser::ast::Node::Label("main".to_string()),
hence::parser::ast::Node::MacroCall {
name: "main".to_string(),
args: vec![hence::arg::Arg::Variable("main".to_string())],
},
]);
x.extend(body);
x.push(hence::parser::ast::Node::MacroCall {
name: "std_stop".to_string(),
args: vec![],
});
x
}
}
pub fn compile(ast: parser::ast::AST, optimize: bool) -> Result<hence::parser::ast::AST> {
let mut data = Data::default();
let instructions = data.generate_instructions(ast.body, optimize)?;
Ok(hence::parser::ast::AST {
body: data.embed(
instructions
.iter()
.map(|ins| ins.compile(&data))
.collect::<Result<Vec<hence::parser::ast::Body>>>()
.unwrap()
.into_iter()
.flatten()
.collect(),
),
})
}