Expression Interpreter

Hello!
I am looking for an expression interpreter (rust library). I need to do arithmetic and bitwise operations, conditions.

What do you recommend to me?

You might find it fun and educational to write an expression parser yourself.

There is an excellent description of how to do that, with code examples in Pascal, in the article "Let's Build A Compiler" by Jack Crenshaw : Let's Build a Compiler

Chapters 2, 3 and 4 are all about expression interpretation. It's not so long and complicated.

2 Likes

Expression grammar

atom = number | "(" expression ")";
power = atom ["^" atom];
negation = ["-"] power;
multiplication = negation {("*" | "/") negation};
addition = multiplication {("+" | "-") multiplication};
expression = addition;

Interpreter

What follows is a recursive descent parser which produces an abstract syntax tree. The tree is then evaluated recursively.

enum Symbol {Number(i32), Symbol(char), None}
struct Token {symbol: Symbol, col: usize}
enum AST {Number(i32), Symbol(char), List(Box<[AST]>)}
struct SyntaxError {col: usize, text: String}

fn syntax_error<T: Into<String>>(col: usize, text: T) -> SyntaxError {
    SyntaxError {col, text: text.into()}
}

fn print_syntax_error(e: &SyntaxError, s: &str) {
    println!("\n{}", s);
    for _ in 0..e.col {print!(" ");}
    println!("^");
    println!("Syntax error: {}\n", e.text);
}

fn scan(s: &str) -> Result<Vec<Token>, SyntaxError> {
    let a = s.as_bytes();
    let mut acc: Vec<Token> = Vec::new();
    let mut i = 0;
    let mut col = 0;
    while let Some(&c) = a.get(i) {
        if let b'+' | b'-' | b'*' | b'/' | b'^' | b'(' | b')' = c {
            acc.push(Token {col, symbol: Symbol::Symbol(char::from(c))});
            i += 1; col += 1;
        } else if c.is_ascii_digit() {
            let j = i;
            while let Some(&c) = a.get(i) {
                if !c.is_ascii_digit() {break;}
                i += 1; col += 1;
            }
            let digits = std::str::from_utf8(&a[j..i]).unwrap();
            if let Ok(x) = digits.parse::<i32>() {
                acc.push(Token {col, symbol: Symbol::Number(x)});
            } else {
                return Err(syntax_error(col, "invalid number"));
            };
        } else if let b' ' | b'\t' | b'\n' = c {
            i += 1;
            if c == b'\n' {col = 0} else {col += 1;}
        } else {
            let c = s[i..].chars().next().unwrap();
            return Err(syntax_error(col,
                format!("unexpected character: '{}'", c)));
        }
    }
    acc.push(Token {col, symbol: Symbol::None});
    Ok(acc)
}

type SyntaxResult = Result<(AST, usize), SyntaxError>;

struct Parser<'a> {
    a: &'a [Token],
    level: u32
}

impl Parser<'_> {
    fn atom(&mut self, i: usize) -> SyntaxResult {
        match self.a[i].symbol {
            Symbol::Number(x) => Ok((AST::Number(x), i + 1)),
            Symbol::Symbol(c) => {
                if c == '(' {
                    if self.level == 40 {
                        return Err(syntax_error(self.a[i].col,
                            "too much nesting"));
                    }
                    self.level += 1;
                    let (x, i) = self.expression(i + 1)?;
                    self.level -= 1;
                    if let Symbol::Symbol(')') = self.a[i].symbol {
                        Ok((x, i + 1))
                    } else {
                        Err(syntax_error(self.a[i].col,
                            "a closing bracket was expected"))
                    }
                } else {
                    Err(syntax_error(self.a[i].col, format!(
                        "the symbol '{}' was not expected", c)))
                }
            },
            Symbol::None => {
                Err(syntax_error(self.a[i].col,
                    "unexpected end of input"))
            }
        }
    }

    fn power(&mut self, i: usize) -> SyntaxResult {
        let (x, i) = self.atom(i)?;
        if let Symbol::Symbol('^') = self.a[i].symbol {
            let (y, i) = self.atom(i + 1)?;
            Ok((AST::List(Box::new([AST::Symbol('^'),x , y])), i))
        } else {
            Ok((x, i))
        }
    }
    
    fn negation(&mut self, i: usize) -> SyntaxResult {
        if let Symbol::Symbol('-') = self.a[i].symbol {
            let (x, i) = self.power(i + 1)?;
            Ok((AST::List(Box::new([AST::Symbol('~'), x])), i))
        } else {
            self.power(i)
        }
    }
    
    fn multiplication(&mut self, i: usize) -> SyntaxResult {
        let (mut x, mut i) = self.negation(i)?;
        while let Symbol::Symbol(op)= self.a[i].symbol {
            if op == '*' || op == '/' {
                let (y, j) = self.negation(i + 1)?;
                x = AST::List(Box::new([AST::Symbol(op), x, y]));
                i = j;
            } else {
                break;
            }
        }
        Ok((x, i))
    }
    
    fn addition(&mut self, i: usize) -> SyntaxResult {
        let (mut x, mut i) = self.multiplication(i)?;
        while let Symbol::Symbol(op) = self.a[i].symbol {
            if op == '+' || op == '-' {
                let (y, j) = self.multiplication(i + 1)?;
                x = AST::List(Box::new([AST::Symbol(op), x, y]));
                i = j;
            } else {
                break;
            }
        }
        Ok((x, i))
    }
    
    fn expression(&mut self, i: usize) -> SyntaxResult {
        self.addition(i)
    }
    
    fn full_expression(&mut self) -> Result<AST, SyntaxError> {
        let (x, i) = self.expression(0)?;
        if let Symbol::None = self.a[i].symbol {
            Ok(x)
        } else {
            Err(syntax_error(self.a[i].col,
                "end of input was expected"))
        }
    }
}

fn pow(x: i32, y: i32) -> Option<i32> {
    use std::convert::TryFrom;
    if let Ok(y) = u32::try_from(y) {x.checked_pow(y)} else {None}
}

fn eval(t: &AST) -> Result<i32, String> {
    match *t {
        AST::List(ref a) => {
            let op = match a[0] {
                AST::Symbol(c) => c,
                _ => return Err(String::from("Unknown symbol"))
            };
            let value = match op {
                '+' => eval(&a[1])?.checked_add(eval(&a[2])?),
                '-' => eval(&a[1])?.checked_sub(eval(&a[2])?),
                '*' => eval(&a[1])?.checked_mul(eval(&a[2])?),
                '/' => eval(&a[1])?.checked_div(eval(&a[2])?),
                '^' => pow(eval(&a[1])?, eval(&a[2])?),
                '~' => eval(&a[1])?.checked_neg(),
                _ => return Err(format!("Unknown operator: '{}'", op))
            };
            value.ok_or_else(|| String::from("domain"))
        },
        AST::Number(x) => Ok(x),
        AST::Symbol(_) => Err(String::from("Unexpected operator"))
    }
}

fn ast(s: &str) -> Result<AST, SyntaxError> {
    Parser {a: &scan(s)?, level: 0}.full_expression()
}

fn input(prompt: &str) -> std::io::Result<String> {
    use std::{io, io::Write};
    let mut buffer = String::new();
    print!("{}", prompt);
    io::stdout().flush()?;
    io::stdin().read_line(&mut buffer)?;
    if buffer.ends_with('\n') {
        buffer.pop();
        if buffer.ends_with('\r') {buffer.pop();}
    }
    Ok(buffer)
}

fn main() {
    loop {
        let line = input("> ").unwrap();
        if line.is_empty() {continue;}
        let t = match ast(&line) {
            Ok(x) => x,
            Err(e) => {
                print_syntax_error(&e, &line);
                continue;
            }
        };
        match eval(&t) {
            Ok(value) => println!("Result: {}\n", value),
            Err(e) => println!("Evaluation error: {}\n", e)
        }
    }
}

1 Like

I'll add a recommendation for Kevin Hall's rust-peg crate here which implements Bryan Ford's Parsing Expression Grammars (PEG).

With almost no Rust background, I was able to write a small grammar at first try without major problems. Rust's style of unique ownership and automatic memory reclamation helps with the "packrat" nature of these grammars even in error cases that can provide for problematic code in other non-GC languages. It's not the most efficient (*), though, so don't use it in your inner loop without additional tuning.

As an example, consider this fully self-contained example:

peg::parser!( grammar arithmetic() for str {
    rule number() -> i64
        = n:$(['0'..='9']+) { n.parse().unwrap() }

    pub(crate) rule calculate() -> i64 = precedence!{
        x:(@) "+" y:@ { x + y }
        x:(@) "-" y:@ { x - y }
              "-" v:@ { - v }
        --
        x:(@) "*" y:@ { x * y }
        x:(@) "/" y:@ { x / y }
        --
        x:@   "^" y:(@) { x.pow(y as u32) }
        v:@   "!"       { (1..v+1).product() }
        --
        "(" v:calculate() ")" { v }
        n:number() {n}
    }
});

which then is invoked as:

fn main() {
    assert_eq!(arithmetic::calculate("3+3*3+3"), Ok(15));
    assert_eq!(arithmetic::calculate("2+2^2^2^2/2+2"), Ok(32772));
    assert_eq!(arithmetic::calculate("1024/2/2/2+1"), Ok(129));
    assert_eq!(arithmetic::calculate("1024/(1+1)/2/2+1"), Ok(129));
    assert_eq!(arithmetic::calculate("-1-2*-2"), Ok(3));
    assert_eq!(arithmetic::calculate("1+3!+1"), Ok(8));
}

which I find pretty neat.

(*) although it may be optimal for this kind of grammar.

To the interested reader. In my solution one might do the grammar change

power = atom ["^" negation];

and then implement this naively in power by replacing the line

let (y, i) = self.atom(i + 1)?;

by

let (y, i) = self.negation(i + 1)?;

Question: What's the flaw?

This grammar doesn't parse 3^2^3. (I'm referring to your original grammar, not the post immediately above).

After the change is made, 3^2^3 is parsed as 3^(2^3). It is another problem that emerges: You can type an input that leads to a panic. This input, however, must be rather long-winded.

Source files can be 20KSLOC long (in some cases, and depending on language, even longer). While I wouldn't like that myself for navigational reasons, a production parser must be able to handle input of arbitrary length.

Thanks for reply.

I found this:
https://crates.io/crates/evalexpr
but it has no bitwise operations ;(

That's a perfect opportunity to see if the author is open to a PR. If they are, then you could add those operations, which would benefit you both.

1 Like

An implementation of the shunting-yard algorithm, which is another classic alternative. I conflated input parsing, operator-precedence parsing and postfix code evaluation to make the program as small as possible.

fn op_info(op: u8) -> Option<u32> {
    Some(match op {
        b'+' | b'-' => 0, b'*' | b'/' => 1, b'~' => 2,
        _ => return None
    })
}

fn operate(op: u8, buff: &mut Vec<i32>) -> Option<()> {
    let y = buff.pop()?;
    if op == b'~' {
        buff.push(y.checked_neg()?);
        return Some(());
    }
    let x = buff.pop()?;
    buff.push(match op {
        b'+' => x.checked_add(y),
        b'-' => x.checked_sub(y),
        b'*' => x.checked_mul(y),
        b'/' => x.checked_div(y),
        _ => None
    }?);
    Some(())
}

fn calc(expression: &str) -> Option<i32> {
    let a = expression.as_bytes();
    let len = a.len();
    let mut buff: Vec<i32> = Vec::with_capacity(16);
    let mut stack: Vec<u8> = Vec::with_capacity(16);
    let mut first = true;
    let mut i = 0;
    while i < len {
        let mut c = a[i];
        i += 1;
        if c == b'-' && first {c = b'~';}
        if c.is_ascii_digit() {
            let mut x: i32 =  i32::from(c - b'0');
            while i < len && a[i].is_ascii_digit() {
                let digit = i32::from(a[i] - b'0');
                x = x.checked_mul(10)?.checked_add(digit)?;
                i += 1;
            }
            buff.push(x);
            first = false;
        } else if let Some(prec) = op_info(c) {
            while let Some(&top) = stack.last() {
                if let Some(top_prec) = op_info(top) {
                    if prec > top_prec {break;}
                } else {
                    break;
                }
                operate(top, &mut buff);
                stack.pop();
            }
            stack.push(c);
        } else if c == b'(' {
            stack.push(c);
            first = true;
        } else if c == b')' {
            loop {
                if let Some(top) = stack.pop() {
                    if top == b'(' {break;}
                    operate(top, &mut buff);
                } else {
                    return None;
                }
            }
        } else if !matches!(c, b' ' | b'\t' | b'\n') {
            return None;
        }
    }
    while let Some(top) = stack.pop() {
        if top == b'(' {return None;}
        operate(top, &mut buff);
    }
    if buff.len() == 1 {Some(buff[0])} else {None}
}

fn test() {
    assert_eq!(Some(3), calc("1 + 2"));
    assert_eq!(Some(1), calc("2 - 1"));
    assert_eq!(Some(9), calc("3*(1 + 2)"));
    assert_eq!(Some(14), calc("1*2 + 3*4"));
    assert_eq!(Some(21), calc("(1 + 2)*(3 + 4)"));
    assert_eq!(Some(-1), calc("-1"));
    assert_eq!(Some(1), calc("-1 + 2"));
    assert_eq!(Some(1), calc("2 + (-1)"));
    assert_eq!(None, calc("(1 + 2"));
    assert_eq!(None, calc("1 + 2)"));
    assert_eq!(None, calc("1 +"));
}

fn main() {
    test();
}