Detokenizing an expr designator in macro?


#1

I’m trying to implement a mini dsl for chemical reactions, e.g. converting expressions like “H2 + 2OH => 2H2O” into some sort of meaningful Rust structure. I thought using/abusing the Rust macro language would be a fun way to do this, but I am struggling wrapping my head around its nuances. Here’s some code:

// this is just a stand-in for a more complicated macro, right now it just counts the tokens it gets
macro_rules! complex {
    [] => { 0 };
    [$car:tt $($cdr:tt)*] => { 1 + complex![$($cdr)*] };
}

macro_rules! rxn {
    [$rate:expr; $lhs:expr => $rhs:expr] => { ($rate, complex![$lhs], complex![$rhs]) };
}

// this doesn't work either -- creates ambiguity with respect to lhs eating the => token
macro_rules! rxn2 {
    [$rate:expr; $($lhs:tt)+ => $($rhs:tt)+] => { ($rate, complex![$($lhs)*], complex![$($rhs)*]) };
}


fn main() {
    let rate = 2.0;

    // the lhs and rhs sides get aggreated into singular 'expr' tokens
        let example = rxn![rate; 1*H2 + 2*OH => 2 * H2O];
        println!("{:?}", example); // should be (2,7,3), but get (2,1,1) instead
    
        // desired behavior occurs when calling sub-macro directly on stream of tokens
        let lhs = complex![1*H2+2*OH];
        let rhs = complex![2*H2O];
        println!("{:?}", (lhs,rhs)); // (7,3)
}

rxn! was my first attempt at separating the chemical reaction, which seems to fail because once the tokens are matched collectively to an expr, they appear to be inseparable afterwards. I am don’t have any intuition into why rxn2! fails, however. Any tips or insight?


#2

Taking an entirely different approach (separately maintaining lhs and rhs state), I did manage to make this work, though I’m still interested academically-speaking if it is possible to ungroup an expr token. Working code:

use std::iter;

macro_rules! rxn {
    [$rate:expr; $($rxn:tt)*] => { ($rate, rxn_h![iter::empty(); iter::empty(); + $($rxn)*]) };
}

macro_rules! rxn_h {
    [$lhs:expr; $rhs:expr; + $x:tt * $y:tt $($cdr:tt)*] => {
        rxn_h![$lhs; $rhs.chain(iter::once((stringify!($y).to_string(),$x))); $($cdr)*]
    };
    [$lhs:expr; $rhs:expr; + $x:tt + $y:tt $($cdr:tt)*] => {
        rxn_h![$lhs; $rhs.chain(iter::once((stringify!($x).to_string(), 1))); + $y $($cdr)*]
    };
    [$lhs:expr; $rhs:expr; + $x:tt] => {
        rxn_h![$lhs; $rhs.chain(iter::once((stringify!($x).to_string(), 1)));]
    };
    [$lhs:expr; $rhs:expr; -> $($cdr:tt)*] => {
        rxn_h![$rhs; $lhs; + $($cdr)*]
    };
    [$lhs:expr; $rhs:expr;] => {
        ($lhs.collect::<Vec<(String,usize)>>(), $rhs.collect::<Vec<(String,usize)>>())
    };
}



fn main() {
    let rate = 2.0;
    let rxn = rxn![rate; H2+2*OH->2*H2O];
    println!("{:?}", rxn);
}

#3

The usual technique is called a “TT muncher”. It is similar to your second comment but operating on tokens only, rather than dealing with doing work along the way (the iterator chains).

// Receives both an expression and the tokens of that expression.
macro_rules! complex {
    ($e:expr, $($tt:tt)+) => {
        concat!(stringify!($e), " == ", stringify!($($tt)+))
    };
}

macro_rules! rxn {
    ($rate:expr; $($tt:tt)+) => {
        ($rate, rxn_muncher!(() () $($tt)+))
    };
}

macro_rules! rxn_muncher {
    // No remaining tokens.
    (($($lhs:tt)+) ($($rhs:tt)+)) => {
        (complex!($($lhs)+, $($lhs)+), complex!($($rhs)+, $($rhs)+))
    };

    // Non-empty left side, empty right side, `=>` separator token.
    (($($lhs:tt)+) () => $first:tt $($rest:tt)*) => {
        rxn_muncher!(($($lhs)+) ($first) $($rest)*)
    };

    // Non-empty left side, non-empty right side, token goes on the right.
    (($($lhs:tt)+) ($($rhs:tt)+) $first:tt $($rest:tt)*) => {
        rxn_muncher!(($($lhs)+) ($($rhs)+ $first) $($rest)*)
    };

    // Possibly empty left side, empty right side, token goes on the left.
    (($($lhs:tt)*) () $first:tt $($rest:tt)+) => {
        rxn_muncher!(($($lhs)* $first) () $($rest)+)
    }
}

fn main() {
    let rate = 2.0;
    let rxn = rxn![rate; H2+2*OH => 2*H2O];
    println!("{:?}", rxn);
}

#4

This is cool! Thank you very much for sharing.


#5

Thinking more about your example and reading through that book, here is the solution I am going to finalize on. I realized that iterator junk was just me not fully understanding the true power of Rust’s macro system:

macro_rules! rxn {
    [$rate:expr; $($rxn:tt)*] => { ($rate, rxn_h![() () + $($rxn)*]) };
}

macro_rules! rxn_h {
    // normal lhs or rhs matching for `n*X`
    [($($lhs:tt)*) ($($rhs:tt)*) + $x:tt * $y:tt $($rest:tt)*] => {
        rxn_h![($($lhs)*) ($($rhs)* (stringify!($y).to_string(),$x),) $($rest)*]
    };
    // normal lhs or rhs matching when `X` instead of `n*X` notation used
    // (we consume an extra token to distinguish from above match, then restore it)
    [($($lhs:tt)*) ($($rhs:tt)*) + $x:tt + $($rest:tt)+] => {
        rxn_h![($($lhs)*) ($($rhs)* (stringify!($x).to_string(), 1),) + $($rest)*]
    };

    // special case: match lhs when only 2 tokens present
    [($($lhs:tt)*) ($($rhs:tt)*) + $x:tt -> $($rest:tt)+] => {
        rxn_h![($($lhs)*) ($($rhs)* (stringify!($x).to_string(), 1),) -> $($rest)*]
    };

    // swap the sides when transitioning lhs to rhs
    [($($lhs:tt)*) ($($rhs:tt)*) -> $($rest:tt)+] => {
        rxn_h![($($rhs)*) ($($lhs)*) + $($rest)*]
    };

    // special case: match rhs when only 2 tokens present
    [($($lhs:tt)*) ($($rhs:tt)*) + $x:tt] => {
        rxn_h![($($lhs)*) ($($rhs)* (stringify!($x).to_string(), 1),)]
    };

    // finalize the collection
    [($($lhs:tt)*) ($($rhs:tt)*)] => {
        ([$($lhs)*], [$($rhs)*])
    };
}



fn main() {
    // reaction rates
    let kfor = 1.0;
    let kbak = 10.0;
    let kcat = 10.0;

    // chemical reaction network
    let michaelis_menten = (
        rxn![kfor; E + S -> ES],
        rxn![kbak; ES -> E + S],
        rxn![kcat; ES -> E + P],
    );

    println!("{:?}", michaelis_menten);
}