From 36e1bf722a3d366cea20ab7315d63d588d23dc48 Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Sat, 20 Jun 2020 13:38:03 -0500 Subject: Working on a simple LISP/scheme interpreter. --- .gitignore | 5 + Cargo.toml | 8 ++ src/lib/eval/arith.rs | 34 ++++++ src/lib/eval/mod.rs | 54 +++++++++ src/lib/mod.rs | 3 + src/lib/parse.rs | 296 ++++++++++++++++++++++++++++++++++++++++++++++++ src/lib/sexpr.rs | 7 ++ src/lib/types.rs | 97 ++++++++++++++++ src/main.rs | 71 ++++++++++++ src/tests/mod.rs | 1 + src/tests/parse_test.rs | 67 +++++++++++ 11 files changed, 643 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/lib/eval/arith.rs create mode 100644 src/lib/eval/mod.rs create mode 100644 src/lib/mod.rs create mode 100644 src/lib/parse.rs create mode 100644 src/lib/sexpr.rs create mode 100644 src/lib/types.rs create mode 100644 src/main.rs create mode 100644 src/tests/mod.rs create mode 100644 src/tests/parse_test.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1b31bd7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/target +**/*.rs.bk +*~ +history.txt +Cargo.lock \ No newline at end of file diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..6414a93 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "chisaigo" +version = "0.1.0" +authors = ["Dominick Allen "] +edition = "2018" + +[dependencies] +rustyline = "6.2.0" \ No newline at end of file diff --git a/src/lib/eval/arith.rs b/src/lib/eval/arith.rs new file mode 100644 index 0000000..fae69de --- /dev/null +++ b/src/lib/eval/arith.rs @@ -0,0 +1,34 @@ +use std::ops::Add; +use super::super::types::Type; +use super::super::types::Type::*; +use super::super::types::Number; + +impl Add for Type { + type Output = Result; + + fn add(self, other: Type) -> Result { + match (self, other) { + (Bool(_), _) | (_, Bool(_)) | + (Str(_), _) | (_, Str(_)) | + (Operator(_), _) | (_, Operator(_)) => { + Err("Cannot add these types".to_string()) + }, + (Number(Number::Int(i)), Number(Number::Float(f))) | + (Number(Number::Float(f)), Number(Number::Int(i))) => { + Ok(Number(Number::Float(f + i as f32))) + }, + + (Number(Number::Int(a)), Number(Number::Int(b))) => { + Ok(Number(Number::Int(a + b))) + }, + + (Number(Number::Float(a)), Number(Number::Float(b))) => { + Ok(Number(Number::Float(a + b))) + }, + + (Symbol(_), Number(_)) | + (Number(_), Symbol(_)) | + (Symbol(_), Symbol(_)) => Err("Not yet implemented".to_string()) + } + } +} diff --git a/src/lib/eval/mod.rs b/src/lib/eval/mod.rs new file mode 100644 index 0000000..b582397 --- /dev/null +++ b/src/lib/eval/mod.rs @@ -0,0 +1,54 @@ +use std::collections::HashMap; +use super::types::Op; +use super::types::Op::*; +use super::types::SEXP; +use super::types::SEXP::*; +use super::types::Type::*; +use super::types::Number; + +pub mod arith; + +pub type Env = HashMap; + +pub fn eval(expr: &SEXP, env: &mut Env) -> Result { + match expr { + Atom(ref x) => Ok(Atom((*x).clone())), + Sexpr(ref s) => seval(s, env) + } +} + +fn seval(sexpr: &[SEXP], env: &mut Env) -> Result { + if sexpr.is_empty() { + return Err("Empty S Expression".to_string()) + } + + let op = match &sexpr[0] { + Atom(Operator(x)) => *x, + Atom(Symbol(_s)) => return Err("Not yet implemented".to_string()), + x => return Err(format!("{:?} is not a procedure", x)) + }; + + op_eval(op, &sexpr[1..], env) +} + +fn op_eval(op: Op, expr: &[SEXP], env: &mut Env) -> Result { + match op { + Add => op_add(expr, env), + //Define => define(expr, env), + _ => Err("Not yet implemented".to_string()) + } +} + +fn op_add(expr: &[SEXP], env: &mut Env) -> Result { + let mut acc = Number(Number::Int(0)); + let mut i = 0; + while i < expr.len() { + let b = match eval(&expr[i], env)? { + Atom(x) => x, + Sexpr(_) => panic!("This should have evaluated to an atom") + }; + acc = (acc + b)?; + i += 1; + } + Ok(SEXP::Atom(acc)) +} diff --git a/src/lib/mod.rs b/src/lib/mod.rs new file mode 100644 index 0000000..a547a2d --- /dev/null +++ b/src/lib/mod.rs @@ -0,0 +1,3 @@ +pub mod types; +pub mod parse; +pub mod eval; diff --git a/src/lib/parse.rs b/src/lib/parse.rs new file mode 100644 index 0000000..8e9c785 --- /dev/null +++ b/src/lib/parse.rs @@ -0,0 +1,296 @@ +use super::types::Type; +use super::types::Number; +use super::types::Op; +use super::types::SEXP; + +pub type MaybeToken = (Option>, usize); + +#[derive(PartialEq, Debug)] +pub enum Token { + LParen, + RParen, + Value(Type) +} + +pub struct TokenStream { + expr: String, + index: usize, + rules: Vec MaybeToken>, + on_err: String, +} + +impl TokenStream { + /// Creates a new TokenStream object with the provided string. + pub fn new(expr: String, rules: Vec MaybeToken>) -> TokenStream { + TokenStream { + expr, + index: 0, + rules, + on_err: "ERROR".to_string(), + } + } + + pub fn default(e: &str) -> TokenStream { + TokenStream { + expr: e.to_string(), + index: 0, + rules: vec!(is_paren, is_op, is_bool, is_var, is_string, is_number), + on_err: "ERROR".to_string(), + } + } + + pub fn peek(&self) -> Option> { + + let i = self.count_whitespace(); + if self.index + i == self.expr.len() { + return None + } + /* + let (token, _) = analyze(&self.expr[self.index + i..], + self.rules.as_slice(), + &self.on_err); + */ + let (token, _) = analyze2(&self.expr[self.index + i ..]); + token + } + + + fn count_whitespace(&self) -> usize { + let mut whitespace_count = 0; + for x in self.expr[self.index..].chars() { + if x.is_whitespace() { + whitespace_count += 1; + } else { + break + } + } + whitespace_count + } + + fn skip_whitespace(&mut self) { + if self.index < self.expr.len() { + self.index += self.count_whitespace(); + } + } +} + +impl Iterator for TokenStream { + type Item = Result; + + fn next(&mut self) -> Option { + if self.index == self.expr.len() { + return None + } + + self.skip_whitespace(); + /* + let (token, len) = analyze( + &self.expr[self.index..], + self.rules.as_ref(), &self.on_err); + */ + let (token, len) = analyze2(&self.expr[self.index ..]); + self.index += len; + token + } + + fn size_hint(&self) -> (usize, Option) { + if self.index == self.expr.len() { + (0, None) + } else { + (1, Some(self.expr.len() - self.index)) + } + } +} + +pub fn analyze(expr: &str, funs: &[fn(&str) -> MaybeToken], + on_err: &str) -> MaybeToken { + for &fun in funs.iter() { + let (token, len) = fun(expr); + if token.is_some() { + return (token, len) + } + } + + (Some(Err(on_err.to_string())), 0) +} + +fn analyze2(expr: &str) -> MaybeToken { + //is_var, is_number + let c = expr.chars().next().unwrap(); + /* Check for strings, ( and ) */ + if c == '"' { + let close = get_string_end(expr); + let value = Token::Value(Type::Str(expr[1 .. close + 1].to_string())); + let expr_len = close + 2; + return (Some(Ok(value)), expr_len) + } else if c == '(' { + return (Some(Ok(Token::LParen)), 1) + } else if c == ')' { + return (Some(Ok(Token::RParen)), 1) + } + + let word = &expr[0 .. get_word_end(expr)]; + if word == "true" { + (Some(Ok(Token::Value(Type::Bool(true)))), 4) + } else if word == "false" { + (Some(Ok(Token::Value(Type::Bool(false)))), 5) + } else if let Ok(op) = word.parse::() { + (Some(Ok(Token::Value(Type::Operator(op)))), word.len()) + } else if c.is_alphabetic() { + (Some(Ok(Token::Value(Type::Symbol(word.to_string())))), word.len()) + } else if let (Some(x), len) = is_int(&word) { + (Some(x), len) + } else { + is_float(&word) + } +} + +pub fn make_word(expr: &str) -> String { + let word = expr.split(|c: char| { + c.is_whitespace() + }).next().unwrap(); + let termination = |c: char| { c == ')' || c == '('}; + let word_length = word.find(termination).unwrap_or_else(|| word.len()); + word[0..word_length].to_string() +} + +pub fn get_word_end(expr: &str) -> usize { + let word = expr.split(|c: char| { c.is_whitespace() }).next().unwrap(); + let termination_predicate = |c: char| { c == ')' || c == '('}; + word.find(termination_predicate).unwrap_or_else(|| word.len()) +} + +pub fn is_paren(expr: &str) -> MaybeToken { + match expr.chars().next().unwrap() { + '(' => (Some(Ok(Token::LParen)), 1), + ')' => (Some(Ok(Token::RParen)), 1), + _ => (None, 0) + } +} + +pub fn is_op(expr: &str) -> MaybeToken { + let word = make_word(expr); + match word.parse::() { + Ok(op) => (Some(Ok(Token::Value(Type::Operator(op)))), word.len()), + _ => (None, 0) + } +} + + +pub fn is_bool(expr: &str) -> MaybeToken { + let word = make_word(expr); + match word.as_ref() { + "true" => (Some(Ok(Token::Value(Type::Bool(true)))), 4), + "false" => (Some(Ok(Token::Value(Type::Bool(false)))), 5), + _ => (None, 0) + } +} + +pub fn is_var(expr: &str) -> MaybeToken { + let word = make_word(expr); + let c = word.chars().next().unwrap(); + if c.is_alphabetic() { + (Some(Ok(Token::Value(Type::Symbol(word.to_string())))), word.len()) + } else { + (None, 0) + } +} + +pub fn is_string(expr: &str) -> MaybeToken { + let c = expr.chars().next().unwrap(); + if c == '"' { + let close = get_string_end(expr); + let value = Token::Value(Type::Str(expr[1 .. close + 1].to_string())); + let expr_len = close + 2; + (Some(Ok(value)), expr_len) + } else { + (None, 0) + } +} + +fn get_string_end(expr: &str) -> usize { + let mut previous = '"'; + let maybe_close = expr[1..].find(|current: char| { + if current == '"' && previous != '\\' { + true + } else { + previous = current; + false + } + }); + + match maybe_close { + Some(x) => x, + None => panic!("No string ending found!") + } +} + +pub fn is_number(expr: &str) -> MaybeToken { + let word = make_word(expr); + if let (Some(x), len) = is_int(&word) { + (Some(x), len) + } else { + is_float(&word) + } +} + +pub fn is_int(word: &str) -> MaybeToken { + //let word = make_word(expr); + match word.parse::() { + Ok(x) => (Some(Ok(Token::Value(Type::Number(Number::Int(x))))), word.len()), + _ => (None, 0) + } +} + +pub fn is_float(word: &str) -> MaybeToken { + //let word = make_word(expr); + match word.parse::() { + Ok(x) => (Some(Ok(Token::Value(Type::Number(Number::Float(x))))), word.len()), + _ => (None, 0) + } +} + +pub fn parse(expr: &str) -> Result { + let mut tokenstream = TokenStream::default(expr); + match tokenstream.peek() { + Some(Ok(Token::LParen)) => { + let _ = tokenstream.next(); + descend(&mut tokenstream) + }, + Some(Ok(Token::RParen)) => Err("Malformed expression".to_string()), + Some(Ok(Token::Value(x))) => Ok(SEXP::Atom(x)), + Some(Err(f)) => Err(f), + None => Err("Empty expression".to_string()) + } +} + +pub fn descend(tokenstream: &mut TokenStream) -> Result { + let mut sexp = Vec::new(); + loop { + let token = match tokenstream.next() { + Some(Ok(x)) => x, + Some(Err(f)) => return Err(f), + None => panic!("Empty string".to_string()) + }; + + match token { + Token::LParen => { + let sexp_inner = match descend(tokenstream) { + Ok(x) => x, + Err(f) => return Err(f) + }; + sexp.push(sexp_inner); + continue; + }, + Token::RParen => { + break; + }, + Token::Value(atom) => { + sexp.push(SEXP::Atom(atom)); + continue; + } + } + } + + Ok(SEXP::Sexpr(sexp)) +} diff --git a/src/lib/sexpr.rs b/src/lib/sexpr.rs new file mode 100644 index 0000000..0d37332 --- /dev/null +++ b/src/lib/sexpr.rs @@ -0,0 +1,7 @@ +use super::types::Type; + +pub enum SEXP { + Atom(Type), + List(Vec) +} + diff --git a/src/lib/types.rs b/src/lib/types.rs new file mode 100644 index 0000000..4c2900a --- /dev/null +++ b/src/lib/types.rs @@ -0,0 +1,97 @@ +use std::str::FromStr; + +#[derive(PartialEq, Debug, Clone, Copy)] +pub enum Op { + Add, + Sub, + Mul, + Div, + Modulo, + Equals, + Greater, + GrEqTo, + Less, + LsEqTo, + Define, +} + +impl FromStr for Op { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "+" => Ok(Op::Add), + "-" => Ok(Op::Sub), + "*" => Ok(Op::Mul), + "/" => Ok(Op::Div), + "%" => Ok(Op::Modulo), + "=" => Ok(Op::Equals), + ">" => Ok(Op::Greater), + ">=" => Ok(Op::GrEqTo), + "<" => Ok(Op::Less), + "<=" => Ok(Op::LsEqTo), + "define" => Ok(Op::Define), + x => Err(format!("{} is not an operator", x)) + } + } +} + +#[derive(PartialEq, Debug, Clone)] +pub enum Number { + Int(isize), + Float(f32) +} + +#[derive(PartialEq, Debug, Clone)] +pub enum Type { + Bool(bool), + Number(Number), + Str(String), + Symbol(String), + Operator(Op), +} + +#[derive(PartialEq, Debug)] +pub enum SEXP { + Atom(Type), + Sexpr(Vec) +} + +#[test] +fn construct() { + let atom1 = SEXP::Atom(Type::Number(Number::Int(1))); + let atom2 = SEXP::Atom(Type::Number(Number::Int(2))); + let atom3 = SEXP::Atom(Type::Number(Number::Int(3))); + let atom4 = SEXP::Atom(Type::Number(Number::Int(4))); + let sexp = SEXP::Sexpr(vec!(atom1, atom2, atom3, atom4)); + match sexp { + SEXP::Sexpr(ref x) => { + assert_eq!(x[0], SEXP::Atom(Type::Number(Number::Int(1)))); + }, + _ => panic!("What") + } +} + +#[test] +fn mutability() { + let atom1 = SEXP::Atom(Type::Number(Number::Int(1))); + let atom2 = SEXP::Atom(Type::Number(Number::Int(2))); + let atom3 = SEXP::Atom(Type::Number(Number::Int(3))); + let atom4 = SEXP::Atom(Type::Number(Number::Int(4))); + let mut sexp = SEXP::Sexpr(vec!(atom1, atom2, atom3, atom4)); + match sexp { + SEXP::Sexpr(ref mut x) => match x[0] { + SEXP::Atom(Type::Number(Number::Int(ref mut x))) => *x += 7, + _ => panic!("What") + }, + _ => panic!("What") + } + + match sexp { + SEXP::Sexpr(ref x) => { + assert_eq!(x[0], SEXP::Atom(Type::Number(Number::Int(8)))); + }, + _ => panic!("What") + } +} + diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..15cf60b --- /dev/null +++ b/src/main.rs @@ -0,0 +1,71 @@ +#[cfg(test)] +pub mod tests; + +pub mod lib; + +use rustyline::error::ReadlineError; +use rustyline::Editor; + +fn read(rl: &mut Editor) -> Option { + let readline = rl.readline(">> "); + match readline { + Ok(line) => { + rl.add_history_entry(line.as_str()); + Some(line) + }, + Err(ReadlineError::Interrupted) => { + println!("CTRL-C"); + None + }, + Err(ReadlineError::Eof) => { + println!("CTRL-D"); + None + }, + Err(err) => { + println!("Error: {:?}", err); + None + } + } +} + +fn means_exit(input: &str) -> bool { + match input { + "exit" | "quit" | ",q" => true, + _ => false + } +} + +fn eval(env: &mut lib::eval::Env, input: &str) -> String { + let sexp = match lib::parse::parse(input) { + Ok(x) => x, + Err(f) => return f + }; + + println!("{:?}", sexp); + let res = lib::eval::eval(&sexp, env); + match res { + Ok(x) => format!("{:?}", x), + Err(f) => f + } +} + +fn main() { + let mut env = lib::eval::Env::new(); + let hist_file = "history.txt"; + + // `()` can be used when no completer is required + let mut rl = Editor::<()>::new(); + if rl.load_history(hist_file).is_err() { + println!("No previous history."); + } + loop { + let input_line = read(&mut rl); + let line = match input_line { + None => break, + Some(expr) if means_exit(&expr) => break, + Some(expr) => expr + }; + println!("{}", eval(&mut env, &line)); + } + rl.save_history(hist_file).unwrap(); +} diff --git a/src/tests/mod.rs b/src/tests/mod.rs new file mode 100644 index 0000000..0b03ef2 --- /dev/null +++ b/src/tests/mod.rs @@ -0,0 +1 @@ +pub mod parse_test; diff --git a/src/tests/parse_test.rs b/src/tests/parse_test.rs new file mode 100644 index 0000000..b9e98b4 --- /dev/null +++ b/src/tests/parse_test.rs @@ -0,0 +1,67 @@ +use super::super::lib::types::Op::*; +use super::super::lib::types::Type::*; +use super::super::lib::types::Number; +use super::super::lib::parse::{is_paren, is_op, is_bool, is_var, is_number}; +use super::super::lib::parse::Token::*; +use super::super::lib::parse::TokenStream; + +#[test] +pub fn token_test() { + let add_string = "(+ 1 1)".to_string(); + let add_tokens = vec!(LParen, Value(Operator(Add)), Value(Number(Number::Int(1))), + Value(Number(Number::Int(1))), RParen); + + let tokenstream = TokenStream::new(add_string, + vec!(is_paren, is_op, is_bool, is_var, is_number)); + + let mut tokens = Vec::new(); + for token in tokenstream { + match token { + Ok(x) => tokens.push(x), + Err(f) => panic!(f) + } + } + assert!(tokens.len() == add_tokens.len()); + for (token1, token2) in tokens.iter().zip(add_tokens) { + assert!(token1 == &token2); + } +} + +#[test] +pub fn types_test() { + let foo = "(= (> 3.0 0) true)".to_string(); + + let tokenstream = TokenStream::new(foo, + vec!(is_paren, is_op, is_bool, is_var, is_number)); + + let correct_tokens = vec!(LParen, Value(Operator(Equals)), LParen, + Value(Operator(Greater)), + Value(Number(Number::Float(3.0))), + Value(Number(Number::Int(0))), + RParen, Value(Bool(true)), RParen); + + for (foo_token, correct_token) in tokenstream.zip(correct_tokens.into_iter()) { + match foo_token { + Ok(x) => assert!(x == correct_token), + Err(f) => panic!(f) + } + } +} + +#[test] +pub fn string_test() { + let bar = "(define foo \t \n \t \"Bar \\\" Baz\")"; + let tokenstream = TokenStream::default(bar); + let correct_tokens = vec!(LParen, Value(Operator(Define)), Value(Symbol("foo".to_string())), + Value(Str("Bar \\\" Baz".to_string())), RParen); + for (bar_token, correct_token) in tokenstream.zip(correct_tokens.into_iter()) { + match bar_token { + Ok(x) => { + if x != correct_token { + panic!(format!("{:?} != {:?}", x, correct_token)) + } + }, + Err(f) => panic!(f) + } + } +} -- cgit v1.2.3