From 2a62f88ca03d5db406756525b5a6e24eae027eab Mon Sep 17 00:00:00 2001 From: Dominick Allen Date: Fri, 26 Jun 2020 21:56:34 -0500 Subject: Rename parse to tokenize, add Environment. --- src/lib/environment.rs | 15 +++ src/lib/parse.rs | 296 ------------------------------------------------- src/lib/tokenize.rs | 296 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 311 insertions(+), 296 deletions(-) create mode 100644 src/lib/environment.rs delete mode 100644 src/lib/parse.rs create mode 100644 src/lib/tokenize.rs diff --git a/src/lib/environment.rs b/src/lib/environment.rs new file mode 100644 index 0000000..ef00e13 --- /dev/null +++ b/src/lib/environment.rs @@ -0,0 +1,15 @@ +use std::collections::HashMap; + +use super::sexpr::SExpr; + +pub struct Environment { + env: HashMap +} + +impl Environment { + pub fn new() -> Environment { + Environment { + env: HashMap::new() + } + } +} diff --git a/src/lib/parse.rs b/src/lib/parse.rs deleted file mode 100644 index 047b374..0000000 --- a/src/lib/parse.rs +++ /dev/null @@ -1,296 +0,0 @@ -use super::types::Type; -use super::types::Number; -use super::types::Op; -use super::sexpr::SExpr; - -pub type MaybeToken = (Option>, usize); - -#[derive(PartialEq, Debug)] -pub enum Token { - LParen, - RParen, - Value(Type) -} - -pub struct TokenStream { - expr: String, - index: usize, - rules: Vec MaybeToken>, - on_err: String, -} - -impl TokenStream { - /// Creates a new TokenStream object with the provided string. - pub fn new(expr: String, rules: Vec MaybeToken>) -> TokenStream { - TokenStream { - expr, - index: 0, - rules, - on_err: "ERROR".to_string(), - } - } - - pub fn default(e: &str) -> TokenStream { - TokenStream { - expr: e.to_string(), - index: 0, - rules: vec!(is_paren, is_op, is_bool, is_var, is_string, is_number), - on_err: "ERROR".to_string(), - } - } - - pub fn peek(&self) -> Option> { - - let i = self.count_whitespace(); - if self.index + i == self.expr.len() { - return None - } - /* - let (token, _) = analyze(&self.expr[self.index + i..], - self.rules.as_slice(), - &self.on_err); - */ - let (token, _) = analyze2(&self.expr[self.index + i ..]); - token - } - - - fn count_whitespace(&self) -> usize { - let mut whitespace_count = 0; - for x in self.expr[self.index..].chars() { - if x.is_whitespace() { - whitespace_count += 1; - } else { - break - } - } - whitespace_count - } - - fn skip_whitespace(&mut self) { - if self.index < self.expr.len() { - self.index += self.count_whitespace(); - } - } -} - -impl Iterator for TokenStream { - type Item = Result; - - fn next(&mut self) -> Option { - if self.index == self.expr.len() { - return None - } - - self.skip_whitespace(); - /* - let (token, len) = analyze( - &self.expr[self.index..], - self.rules.as_ref(), &self.on_err); - */ - let (token, len) = analyze2(&self.expr[self.index ..]); - self.index += len; - token - } - - fn size_hint(&self) -> (usize, Option) { - if self.index == self.expr.len() { - (0, None) - } else { - (1, Some(self.expr.len() - self.index)) - } - } -} - -pub fn analyze(expr: &str, funs: &[fn(&str) -> MaybeToken], - on_err: &str) -> MaybeToken { - for &fun in funs.iter() { - let (token, len) = fun(expr); - if token.is_some() { - return (token, len) - } - } - - (Some(Err(on_err.to_string())), 0) -} - -fn analyze2(expr: &str) -> MaybeToken { - //is_var, is_number - let c = expr.chars().next().unwrap(); - /* Check for strings, ( and ) */ - if c == '"' { - let close = get_string_end(expr); - let value = Token::Value(Type::Str(expr[1 .. close + 1].to_string())); - let expr_len = close + 2; - return (Some(Ok(value)), expr_len) - } else if c == '(' { - return (Some(Ok(Token::LParen)), 1) - } else if c == ')' { - return (Some(Ok(Token::RParen)), 1) - } - - let word = &expr[0 .. get_word_end(expr)]; - if word == "true" { - (Some(Ok(Token::Value(Type::Bool(true)))), 4) - } else if word == "false" { - (Some(Ok(Token::Value(Type::Bool(false)))), 5) - } else if let Ok(op) = word.parse::() { - (Some(Ok(Token::Value(Type::Operator(op)))), word.len()) - } else if c.is_alphabetic() { - (Some(Ok(Token::Value(Type::Symbol(word.to_string())))), word.len()) - } else if let (Some(x), len) = is_int(&word) { - (Some(x), len) - } else { - is_float(&word) - } -} - -pub fn make_word(expr: &str) -> String { - let word = expr.split(|c: char| { - c.is_whitespace() - }).next().unwrap(); - let termination = |c: char| { c == ')' || c == '('}; - let word_length = word.find(termination).unwrap_or_else(|| word.len()); - word[0..word_length].to_string() -} - -pub fn get_word_end(expr: &str) -> usize { - let word = expr.split(|c: char| { c.is_whitespace() }).next().unwrap(); - let termination_predicate = |c: char| { c == ')' || c == '('}; - word.find(termination_predicate).unwrap_or_else(|| word.len()) -} - -pub fn is_paren(expr: &str) -> MaybeToken { - match expr.chars().next().unwrap() { - '(' => (Some(Ok(Token::LParen)), 1), - ')' => (Some(Ok(Token::RParen)), 1), - _ => (None, 0) - } -} - -pub fn is_op(expr: &str) -> MaybeToken { - let word = make_word(expr); - match word.parse::() { - Ok(op) => (Some(Ok(Token::Value(Type::Operator(op)))), word.len()), - _ => (None, 0) - } -} - - -pub fn is_bool(expr: &str) -> MaybeToken { - let word = make_word(expr); - match word.as_ref() { - "true" => (Some(Ok(Token::Value(Type::Bool(true)))), 4), - "false" => (Some(Ok(Token::Value(Type::Bool(false)))), 5), - _ => (None, 0) - } -} - -pub fn is_var(expr: &str) -> MaybeToken { - let word = make_word(expr); - let c = word.chars().next().unwrap(); - if c.is_alphabetic() { - (Some(Ok(Token::Value(Type::Symbol(word.to_string())))), word.len()) - } else { - (None, 0) - } -} - -pub fn is_string(expr: &str) -> MaybeToken { - let c = expr.chars().next().unwrap(); - if c == '"' { - let close = get_string_end(expr); - let value = Token::Value(Type::Str(expr[1 .. close + 1].to_string())); - let expr_len = close + 2; - (Some(Ok(value)), expr_len) - } else { - (None, 0) - } -} - -fn get_string_end(expr: &str) -> usize { - let mut previous = '"'; - let maybe_close = expr[1..].find(|current: char| { - if current == '"' && previous != '\\' { - true - } else { - previous = current; - false - } - }); - - match maybe_close { - Some(x) => x, - None => panic!("No string ending found!") - } -} - -pub fn is_number(expr: &str) -> MaybeToken { - let word = make_word(expr); - if let (Some(x), len) = is_int(&word) { - (Some(x), len) - } else { - is_float(&word) - } -} - -pub fn is_int(word: &str) -> MaybeToken { - //let word = make_word(expr); - match word.parse::() { - Ok(x) => (Some(Ok(Token::Value(Type::Number(Number::Int(x))))), word.len()), - _ => (None, 0) - } -} - -pub fn is_float(word: &str) -> MaybeToken { - //let word = make_word(expr); - match word.parse::() { - Ok(x) => (Some(Ok(Token::Value(Type::Number(Number::Float(x))))), word.len()), - _ => (None, 0) - } -} - -pub fn parse(expr: &str) -> Result { - let mut tokenstream = TokenStream::default(expr); - match tokenstream.peek() { - Some(Ok(Token::LParen)) => { - let _ = tokenstream.next(); - descend(&mut tokenstream) - }, - Some(Ok(Token::RParen)) => Err("Malformed expression".to_string()), - Some(Ok(Token::Value(x))) => Ok(SExpr::Atom(x)), - Some(Err(f)) => Err(f), - None => Err("Empty expression".to_string()) - } -} - -pub fn descend(tokenstream: &mut TokenStream) -> Result { - let mut sexp = Vec::new(); - loop { - let token = match tokenstream.next() { - Some(Ok(x)) => x, - Some(Err(f)) => return Err(f), - None => panic!("Empty string".to_string()) - }; - - match token { - Token::LParen => { - let sexp_inner = match descend(tokenstream) { - Ok(x) => x, - Err(f) => return Err(f) - }; - sexp.push(sexp_inner); - continue; - }, - Token::RParen => { - break; - }, - Token::Value(atom) => { - sexp.push(SExpr::Atom(atom)); - continue; - } - } - } - - Ok(SExpr::Sexpr(sexp)) -} diff --git a/src/lib/tokenize.rs b/src/lib/tokenize.rs new file mode 100644 index 0000000..384b04f --- /dev/null +++ b/src/lib/tokenize.rs @@ -0,0 +1,296 @@ +use super::types::Type; +use super::types::Number; +use super::types::Op; +use super::types::SEXP; + +pub type MaybeToken = (Option>, usize); + +#[derive(PartialEq, Debug)] +pub enum Token { + LParen, + RParen, + Value(Type) +} + +pub struct TokenStream { + expr: String, + index: usize, + rules: Vec MaybeToken>, + on_err: String, +} + +impl TokenStream { + /// Creates a new TokenStream object with the provided string. + pub fn new(expr: String, rules: Vec MaybeToken>) -> TokenStream { + TokenStream { + expr, + index: 0, + rules, + on_err: "ERROR".to_string(), + } + } + + pub fn default(e: &str) -> TokenStream { + TokenStream { + expr: e.to_string(), + index: 0, + rules: vec!(is_paren, is_op, is_bool, is_var, is_string, is_number), + on_err: "ERROR".to_string(), + } + } + + pub fn peek(&self) -> Option> { + + let i = self.count_whitespace(); + if self.index + i == self.expr.len() { + return None + } + /* + let (token, _) = analyze(&self.expr[self.index + i..], + self.rules.as_slice(), + &self.on_err); + */ + let (token, _) = analyze2(&self.expr[self.index + i ..]); + token + } + + + fn count_whitespace(&self) -> usize { + let mut whitespace_count = 0; + for x in self.expr[self.index..].chars() { + if x.is_whitespace() { + whitespace_count += 1; + } else { + break + } + } + whitespace_count + } + + fn skip_whitespace(&mut self) { + if self.index < self.expr.len() { + self.index += self.count_whitespace(); + } + } +} + +impl Iterator for TokenStream { + type Item = Result; + + fn next(&mut self) -> Option { + if self.index == self.expr.len() { + return None + } + + self.skip_whitespace(); + /* + let (token, len) = analyze( + &self.expr[self.index..], + self.rules.as_ref(), &self.on_err); + */ + let (token, len) = analyze2(&self.expr[self.index ..]); + self.index += len; + token + } + + fn size_hint(&self) -> (usize, Option) { + if self.index == self.expr.len() { + (0, None) + } else { + (1, Some(self.expr.len() - self.index)) + } + } +} + +pub fn analyze(expr: &str, funs: &[fn(&str) -> MaybeToken], + on_err: &str) -> MaybeToken { + for &fun in funs.iter() { + let (token, len) = fun(expr); + if token.is_some() { + return (token, len) + } + } + + (Some(Err(on_err.to_string())), 0) +} + +fn analyze2(expr: &str) -> MaybeToken { + //is_var, is_number + let c = expr.chars().next().unwrap(); + /* Check for strings, ( and ) */ + if c == '"' { + let close = get_string_end(expr); + let value = Token::Value(Type::Str(expr[1 .. close + 1].to_string())); + let expr_len = close + 2; + return (Some(Ok(value)), expr_len) + } else if c == '(' { + return (Some(Ok(Token::LParen)), 1) + } else if c == ')' { + return (Some(Ok(Token::RParen)), 1) + } + + let word = &expr[0 .. get_word_end(expr)]; + if word == "true" { + (Some(Ok(Token::Value(Type::Bool(true)))), 4) + } else if word == "false" { + (Some(Ok(Token::Value(Type::Bool(false)))), 5) + } else if let Ok(op) = word.tokenize::() { + (Some(Ok(Token::Value(Type::Operator(op)))), word.len()) + } else if c.is_alphabetic() { + (Some(Ok(Token::Value(Type::Symbol(word.to_string())))), word.len()) + } else if let (Some(x), len) = is_int(&word) { + (Some(x), len) + } else { + is_float(&word) + } +} + +pub fn make_word(expr: &str) -> String { + let word = expr.split(|c: char| { + c.is_whitespace() + }).next().unwrap(); + let termination = |c: char| { c == ')' || c == '('}; + let word_length = word.find(termination).unwrap_or_else(|| word.len()); + word[0..word_length].to_string() +} + +pub fn get_word_end(expr: &str) -> usize { + let word = expr.split(|c: char| { c.is_whitespace() }).next().unwrap(); + let termination_predicate = |c: char| { c == ')' || c == '('}; + word.find(termination_predicate).unwrap_or_else(|| word.len()) +} + +pub fn is_paren(expr: &str) -> MaybeToken { + match expr.chars().next().unwrap() { + '(' => (Some(Ok(Token::LParen)), 1), + ')' => (Some(Ok(Token::RParen)), 1), + _ => (None, 0) + } +} + +pub fn is_op(expr: &str) -> MaybeToken { + let word = make_word(expr); + match word.tokenize::() { + Ok(op) => (Some(Ok(Token::Value(Type::Operator(op)))), word.len()), + _ => (None, 0) + } +} + + +pub fn is_bool(expr: &str) -> MaybeToken { + let word = make_word(expr); + match word.as_ref() { + "true" => (Some(Ok(Token::Value(Type::Bool(true)))), 4), + "false" => (Some(Ok(Token::Value(Type::Bool(false)))), 5), + _ => (None, 0) + } +} + +pub fn is_var(expr: &str) -> MaybeToken { + let word = make_word(expr); + let c = word.chars().next().unwrap(); + if c.is_alphabetic() { + (Some(Ok(Token::Value(Type::Symbol(word.to_string())))), word.len()) + } else { + (None, 0) + } +} + +pub fn is_string(expr: &str) -> MaybeToken { + let c = expr.chars().next().unwrap(); + if c == '"' { + let close = get_string_end(expr); + let value = Token::Value(Type::Str(expr[1 .. close + 1].to_string())); + let expr_len = close + 2; + (Some(Ok(value)), expr_len) + } else { + (None, 0) + } +} + +fn get_string_end(expr: &str) -> usize { + let mut previous = '"'; + let maybe_close = expr[1..].find(|current: char| { + if current == '"' && previous != '\\' { + true + } else { + previous = current; + false + } + }); + + match maybe_close { + Some(x) => x, + None => panic!("No string ending found!") + } +} + +pub fn is_number(expr: &str) -> MaybeToken { + let word = make_word(expr); + if let (Some(x), len) = is_int(&word) { + (Some(x), len) + } else { + is_float(&word) + } +} + +pub fn is_int(word: &str) -> MaybeToken { + //let word = make_word(expr); + match word.tokenize::() { + Ok(x) => (Some(Ok(Token::Value(Type::Number(Number::Int(x))))), word.len()), + _ => (None, 0) + } +} + +pub fn is_float(word: &str) -> MaybeToken { + //let word = make_word(expr); + match word.tokenize::() { + Ok(x) => (Some(Ok(Token::Value(Type::Number(Number::Float(x))))), word.len()), + _ => (None, 0) + } +} + +pub fn tokenize(expr: &str) -> Result { + let mut tokenstream = TokenStream::default(expr); + match tokenstream.peek() { + Some(Ok(Token::LParen)) => { + let _ = tokenstream.next(); + descend(&mut tokenstream) + }, + Some(Ok(Token::RParen)) => Err("Malformed expression".to_string()), + Some(Ok(Token::Value(x))) => Ok(SEXP::Atom(x)), + Some(Err(f)) => Err(f), + None => Err("Empty expression".to_string()) + } +} + +pub fn descend(tokenstream: &mut TokenStream) -> Result { + let mut sexp = Vec::new(); + loop { + let token = match tokenstream.next() { + Some(Ok(x)) => x, + Some(Err(f)) => return Err(f), + None => panic!("Empty string".to_string()) + }; + + match token { + Token::LParen => { + let sexp_inner = match descend(tokenstream) { + Ok(x) => x, + Err(f) => return Err(f) + }; + sexp.push(sexp_inner); + continue; + }, + Token::RParen => { + break; + }, + Token::Value(atom) => { + sexp.push(SEXP::Atom(atom)); + continue; + } + } + } + + Ok(SEXP::Sexpr(sexp)) +} -- cgit v1.2.3