Help refactoring this code

#1

I am writing the following Forth VM code. There’s lots of obvious repetition but not lots of obvious refactoring. Suggestions ?

use std::string::String;
use std::collections::HashMap;


#[derive(Debug)]
#[repr(u8)]
pub enum Instr {
    Print = 1,
    Add = 2,
    Sub = 3,
    Mul = 4,
    Div = 5,
    Mod = 6,
    VarStore = 7,
    VarGet = 8,
    And = 9,
    Or = 10,
    Not = 11,
    AltPush = 12,
    AltPop = 13,
    AltCopy = 14,

    Lt = 15,
    Le = 16,
    Eq = 17,
    Ge = 18,
    Gt = 19,

    Drop = 20,
}

#[derive(Debug, Clone)]
pub enum Data {
    Bool(bool),
    I32(i32),
    F32(f32),
    Str(String),
}


pub struct State {
    code: Vec<Instr>,
    data: Vec<Data>,
    alt: Vec<Data>,
    variables: HashMap<String, Data>,
}


pub fn exec(state: &mut State) {
    let State { code, data, alt, variables } = state;

    while let Some(op_code) = code.pop() {
        // println!("data: {:?}", data);
        match op_code {
            Instr::Print => {
                let x = data.pop().unwrap();
                println!("{:?}", x); },
            Instr::Add => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::I32(lhs + rhs)),
                    (Data::F32(lhs), Data::F32(rhs)) => data.push(Data::F32(lhs + rhs)),
                    _ => panic!("+ fail"), } },
            Instr::Sub => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::I32(lhs - rhs)),
                    (Data::F32(lhs), Data::F32(rhs)) => data.push(Data::F32(lhs - rhs)),
                    _ => panic!("- fail"), } },
            Instr::Mul => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::I32(lhs * rhs)),
                    (Data::F32(lhs), Data::F32(rhs)) => data.push(Data::F32(lhs * rhs)),
                    _ => panic!("* fail"), } },
            Instr::Div => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::I32(lhs / rhs)),
                    (Data::F32(lhs), Data::F32(rhs)) => data.push(Data::F32(lhs / rhs)),
                    _ => panic!("/ fail"), } },
            Instr::Mod => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::I32(lhs % rhs)),
                    _ => panic!("mod fail"), } },
            Instr::VarStore => {
                match data.pop().unwrap() {
                    Data::Str(name) => {
                        let value = data.pop().unwrap();
                        variables.insert(name, value); } ,
                    _ => panic!(), } },
            Instr::VarGet => {
                match data.pop().unwrap() {
                    Data::Str(name) => {
                        let value = variables.get(&name).unwrap();
                        data.push(value.clone()); },
                    _ => panic!(), } }
            Instr::And => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::Bool(lhs), Data::Bool(rhs)) => data.push(Data::Bool(lhs && rhs)),
                    _ => panic!("and fail"), } }
            Instr::Or => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::Bool(lhs), Data::Bool(rhs)) => data.push(Data::Bool(lhs || rhs)),
                    _ => panic!("or fail"), } }
            Instr::Not => {
                let lhs = data.pop().unwrap();
                match lhs {
                    Data::Bool(lhs) => data.push(Data::Bool(! lhs)),
                    _ => panic!("not fail"), } }
            Instr::AltPush => {
                let lhs = data.pop().unwrap();
                alt.push(lhs); },
            Instr::AltPop => {
                let lhs = alt.pop().unwrap();
                data.push(lhs); },
            Instr::AltCopy => {
                let lhs = alt.pop().unwrap();
                data.push(lhs); },
            Instr::Drop => { data.pop(); },

            Instr::Lt => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::Bool(lhs < rhs)),
                    (Data::F32(lhs), Data::F32(rhs)) => data.push(Data::Bool(lhs < rhs)),
                    _ => panic!("lt fail") } },
            Instr::Le => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::Bool(lhs <= rhs)),
                    (Data::F32(lhs), Data::F32(rhs)) => data.push(Data::Bool(lhs <= rhs)),
                    _ => panic!("le fail") } },
            Instr::Eq => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::Bool(lhs == rhs)),
                    (Data::F32(lhs), Data::F32(rhs)) => data.push(Data::Bool(lhs == rhs)),
                    _ => panic!("eq fail"), } },
            Instr::Ge => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::Bool(lhs >= rhs)),
                    (Data::F32(lhs), Data::F32(rhs)) => data.push(Data::Bool(lhs >= rhs)),
                    _ => panic!("ge fail"), } },
            Instr::Gt => {
                let rhs = data.pop().unwrap();
                let lhs = data.pop().unwrap();
                match (lhs, rhs) {
                    (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::Bool(lhs > rhs)),
                    (Data::F32(lhs), Data::F32(rhs)) => data.push(Data::Bool(lhs > rhs)),
                    _ => panic!("gt fail"), } },

            _ => panic!(format!("unrecognized instr: {:?}", op_code))
        }
    }
}
#2

This is the sort of stuff that macros are perfect for!

1 Like
#3

@KrishnaSannasi : Want to write some of these macros? It’s much easier to learn from good examples. :slight_smile:

#4

A post on the same quality as

would be ideal :slight_smile:

#5

A macro like this could be useful:

macro_rules! apply_num_op {
    ($data:expr, $op:tt) => {
        let rhs = $data.pop().unwrap();
        let lhs = $data.pop().unwrap();
        match (lhs, rhs) {
            (Data::I32(lhs), Data::I32(rhs)) => data.push(Data::I32(lhs $op rhs)),
            (Data::F32(lhs), Data::F32(rhs)) => data.push(Data::F32(lhs $op rhs)),
            _ => panic!("- fail"),
        }
    };
}

You could then use it like this:

            Instr::Add => {
                apply_num_op!(data, +)
            },
            Instr::Sub => {
                apply_num_op!(data, -)
            },
            ...

The macro apply_num_ops! then just expands to what we put inside the branch in macro_rules, replacing all the $variables with the arguments we specified. Note that rust macros are hygenic, which means that variables declared or used within a macro are considered different from variables of the same names outside of the macro. This is why we need to pass data in as a macro variable rather than using it directly inside the macro. Hygene like this prevents conflicts so that macros behave reliably no matter what variables are in scope and don’t overwrite variables the outer environment expects to stay the same.

https://doc.rust-lang.org/book/macros.html should give you a good overview of macros, and if you want an in-depth guide to some of the tricker things possible, https://danielkeep.github.io/tlborm/book/ is quite a good read.

1 Like
#6

@daboross : Thanks! This is a great macro for the arith / comparison functions.

Being a bit ambitious, it’d be amazing if there was a way to just write all the valid exprs, and have it auto derive the code. I ha e something like the following in mind:

[.... , lhs_i32, rhs_i32] + => [ ..., lhs_i32 + rhs_i32 ]
[.... , lhs_f32, rhs_f32] + => [ ..., lhs_f32 + rhs_f32 ]
[.... , lhs_i32, rhs_i32] * => [ ..., lhs_i32 * rhs_i32 ]
[.... , lhs_f32, rhs_f32] * => [ ..., lhs_f32 * rhs_f32 ]
[..., x] dup = [..., x, x]
[..., x] drop = [...]

It seems at the end of the day, everything is just a stack effect of popping off some elems,checking they are the right variant, and pushing some stuff back.

However, instead of 1-line defs, most defs are 5-10x as long as necessary.

#7

There is!

docs for macros: https://doc.rust-lang.org/book/ch19-06-macros.html?highlight=macro#macros

playground

Some things note:

  • don’t foret $ for macro variables!

  • the macro location matters, it is inside the exec function because it needs to capture the variable data.

  • @internal this tag is used to handle the brunt of the work and shouldn’t be called directly

  • $()* This means match zero or more of the pattern inside the parens if used in the pattern section, or expand something that was matched zero or more times.

    For example
    $($types:ident)* means match zero or more identifiers
    and
    $( println!("{}", $x); )* means print each value of x on its own line.

  • macro patterns are tried in order with no backtracking, so the order of the patterns matter.

  • use {}, (), or [] to group arbitrary things to be processed latter if you want to add more stuff after it. For example the bin_op, pre_op, stuff.
    $instr:ident => { $($inner:tt)* }, $inner will contain all the stuff inside the curly braces, and will be processed later, in a branch marked @internal.

  • This branch allows arbiray code to be run, for example in the one off cases

(@internal $($rest:tt)*) => {
    $($rest)*
};
4 Likes
#8

On a different note, I’d totally introduce a Trait Pop2 and implement it for Vec that let’s you do let (lhs, rhs) = data.pop2().unwrap();.

#9

@KrishnaSannasi

Lines 109-125 looks amazing.

Thanks for writing another highly instructive response.

1 Like