hence/henceforth/src/lib/compiler.rs

298 lines
10 KiB
Rust

use anyhow::{bail, Context, Result};
use indexmap::IndexSet;
use itertools::Itertools;
use lazy_static::lazy_static;
use petgraph::{graph::NodeIndex, Graph};
use std::collections::HashMap;
use crate::parser;
mod instruction;
pub use crate::compiler::instruction::Instruction;
pub const TEMPLATE_ASM: &str = include_str!("compiler/templates/default.asm");
lazy_static! {
#[derive(Debug)]
pub static ref TEMPLATE: hence::parser::ast::Body = hence::parser::parse(
hence::lexer::lex(TEMPLATE_ASM).unwrap()
)
.unwrap()
.body;
}
pub trait Compilable<T, U> {
fn compile(&self, data: &T) -> Result<U>;
}
#[derive(Debug)]
pub struct Word {
pub id: usize,
pub instructions: Vec<Instruction>,
pub times_used: usize,
pub callable_graph_node: NodeIndex,
}
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct Condition {
pub if_instructions: Vec<Instruction>,
pub else_instructions: Vec<Instruction>,
pub callable_graph_node: NodeIndex,
}
#[derive(Debug)]
pub enum CallableId {
Word(String),
Condition(usize),
}
#[derive(Debug)]
pub struct Data {
pub strings: IndexSet<String>,
pub callable_graph: Graph<CallableId, ()>,
pub words: HashMap<String, Word>,
pub conditions: Vec<Condition>,
}
impl Data {
pub fn default() -> Self {
Self {
// words: HashMap::new(),
// conditions: IndexSet::new(),
strings: IndexSet::new(),
callable_graph: Graph::new(),
words: HashMap::new(),
conditions: vec![],
}
}
pub fn add_graph_edge(&mut self, origin: NodeIndex, instruction: Instruction) -> Result<()> {
match instruction {
Instruction::Call(x) => {
self.callable_graph.add_edge(
origin,
self.words
.get(&x)
.context(format!("Could not get already resolved referenced word: {}", x))?
.callable_graph_node,
(),
);
}
Instruction::Condition(x) => {
self.callable_graph.add_edge(
origin,
self.conditions
.get(x)
.context(format!("Could not get already resolved referenced condition: {}", x))?
.callable_graph_node,
(),
);
}
Instruction::Multiple {
instruction,
count: _,
} => {
self.add_graph_edge(origin, *instruction)?;
}
_ => {}
}
Ok(())
}
pub fn add_graph_edges(&mut self, origin: NodeIndex, ins: Vec<Instruction>) -> Result<()> {
for instruction in ins {
self.add_graph_edge(origin, instruction)?;
}
Ok(())
}
pub fn generate_instructions(
&mut self,
body: parser::ast::Body,
optimize: bool,
) -> Result<Vec<Instruction>> {
let mut instructions: Vec<Instruction> = vec![];
let mut iter = body.into_iter().peekable();
while let Some(node) = iter.next() {
match node {
_ if optimize && iter.next_if_eq(&node).is_some() => {
let count = iter.by_ref().peeking_take_while(|n| *n == node).count() + 2;
instructions.push(Instruction::Multiple {
instruction: Box::new(
self.generate_instructions(vec![node], optimize)?
.into_iter()
.next()
.unwrap(),
),
count,
});
}
parser::ast::Node::Comment(_) => {}
parser::ast::Node::String { mode, string } => {
instructions.push(match mode.as_str() {
"." => {
let id = self.strings.insert_full(string).0;
Instruction::StringPrint(id)
}
"r" => {
let id = self.strings.insert_full(string).0;
Instruction::StringReference(id)
}
"asm" => Instruction::AsmQuote(string),
_ => bail!("Unknown string mode: {}", mode),
});
}
parser::ast::Node::Number(x) => {
instructions.push(instruction::Instruction::Push(x));
}
parser::ast::Node::WordDefinition {
name,
stack: _,
body,
} => {
if Instruction::from_word(&name).is_some() {
bail!("Word already exists as compiler instruction: {}", name);
} else if self.words.contains_key(&name) {
bail!("Word already exists as user word definition: {}", name);
}
let origin = self
.callable_graph
.add_node(CallableId::Word(name.to_string()));
self.words.insert(
name.to_string(),
Word {
id: self.words.len(),
instructions: vec![],
times_used: 0,
callable_graph_node: origin,
},
);
let ins = self.generate_instructions(body, optimize)?;
self.words
.get_mut(&name)
.context(format!("Could not get word: {}", name))?
.instructions = ins.clone();
self.add_graph_edges(origin, ins)?;
}
parser::ast::Node::Condition { if_body, else_body } => {
let if_instructions = self.generate_instructions(if_body, optimize)?;
let else_instructions = self.generate_instructions(else_body, optimize)?;
let id = self.conditions.len();
let origin = self.callable_graph.add_node(CallableId::Condition(id));
self.conditions.push(Condition {
if_instructions: if_instructions.clone(),
else_instructions: else_instructions.clone(),
callable_graph_node: origin,
});
instructions.push(Instruction::Condition(id));
self.add_graph_edges(origin, if_instructions)?;
self.add_graph_edges(origin, else_instructions)?;
dbg!(&self);
}
parser::ast::Node::Word(x) => {
if let Some(ins) = Instruction::from_word(&x) {
instructions.push(ins);
} else if let Some(w) = self.words.get_mut(&x) {
w.times_used += 1;
instructions.push(Instruction::Call(x));
} else {
bail!("Word does not exist: {}", x);
}
}
}
}
Ok(instructions)
}
pub fn embed(&self, body: hence::parser::ast::Body) -> Result<hence::parser::ast::Body> {
let mut x = TEMPLATE.to_vec();
// strings
for (id, s) in self.strings.iter().enumerate() {
x.extend([
hence::parser::ast::Node::Label(format!("data_strings_{}", id)),
hence::parser::ast::Node::MacroCall {
name: "bytes".to_string(),
args: vec![hence::arg::Arg::String(s.to_string())],
},
hence::parser::ast::Node::Label(format!("data_strings_end_{}", id)),
]);
}
// conditions
// for (id, c) in self.conditions.iter().enumerate() {
// x.push(hence::parser::ast::Node::Label(format!(
// "conditions_if_{}",
// id
// )));
// x.extend(c.if_instructions.iter().map(|ins| ins.compile(self)).collect::<Result<Vec<_>>>()?.into_iter().flatten());
// x.push(hence::parser::ast::Node::Label(format!("conditions_else_{}", id)));
// x.extend(c.else_instructions.iter().map(|ins| ins.compile(self)).collect::<Result<Vec<_>>>()?.into_iter().flatten());
// }
// words
// for (name, word) in &self
// .words
// .iter()
// .filter(|(_, w)| w.times_used > 1)
// .sorted_by(|a, b| Ord::cmp(&a.1.id, &b.1.id))
// .collect::<Vec<_>>()
// {
// x.extend(vec![
// hence::parser::ast::Node::Label(format!("words_{}", word.id)),
// hence::parser::ast::Node::Comment(format!("word: \"{}\"", name)),
// ]);
// x.extend(
// word.instructions
// .iter()
// .map(|ins| ins.compile(self))
// .collect::<Result<Vec<hence::parser::ast::Body>>>()
// .unwrap()
// .into_iter()
// .flatten()
// );
// x.push(hence::parser::ast::Node::MacroCall {
// name: "return_call_stack_jump".to_string(),
// args: vec![],
// });
// }
x.extend([
hence::parser::ast::Node::Label("main".to_string()),
hence::parser::ast::Node::MacroCall {
name: "main".to_string(),
args: vec![hence::arg::Arg::Variable("main".to_string())],
},
]);
x.extend(body);
x.push(hence::parser::ast::Node::MacroCall {
name: "std_stop".to_string(),
args: vec![],
});
Ok(x)
}
}
pub fn compile(ast: parser::ast::AST, optimize: bool) -> Result<hence::parser::ast::AST> {
let mut data = Data::default();
let instructions = data.generate_instructions(ast.body, optimize)?;
Ok(hence::parser::ast::AST {
body: data.embed(
instructions
.iter()
.map(|ins| ins.compile(&data))
.collect::<Result<Vec<hence::parser::ast::Body>>>()
.unwrap()
.into_iter()
.flatten()
.collect(),
)?,
})
}