Lower-level Rust enums usage case


#1

This is an use case for a lower-level handling of Rust enums.

This is a small coding challenge:

I’ve translated to Rust this Python function:

def widest_leftmost_pair(s):
    index = {}
    prev_index = {}
    pair = [0, 0, 0, None]
    for i, j in enumerate(s):
        if j not in index:
            index[j] = prev_index[j] = i
        else:
            if i - index[j] > pair[0]:
                pair = [i - index[j], index[j], i, j]
            max_j = prev_index[j]
            index = {k: prev_index[k] for k, l in index.iteritems() if prev_index[k] >= max_j}
            prev_index[j] = i
    return pair if pair[3] else None

The “pair” list is nullable (its 4th item is None/Some and if it’s None the function returns just None). In Rust I return an Option. I use fixed-size arrays instead of Python dicts, and I use an Option for the values of the “index” array:

fn widest_leftmost_pair(arr: &[u8]) -> Option<Pair> {
    let mut index = [None; 256];
    let mut prev_index = [0; 256];
    let mut tup: Option<Pair> = None;

    for (i, &b) in arr.iter().enumerate() {
        let ub = usize::from(b);
        if let Some(idxb) = index[ub] {
            let t0 = if let Some(t) = tup { t.0 } else { 0 };
            if i - idxb > t0 {
                tup = Some((i - idxb, idxb, i));
            }
            let max_c = prev_index[ub];
            for j in 0 .. index.len() {
                if let Some(_) = index[j] {
                    index[j] = if prev_index[j] >= max_c { Some(prev_index[j]) }
                                                    else { None };
                }
            }
        } else {
            index[ub] = Some(i);
        }
        prev_index[ub] = i;
    }

    tup
}

Replacing the index array of type [Option; 256], with a simpler [usize; 256], and representing None with usize::MAX, the code gets faster:

fn widest_leftmost_pair(arr: &[u8]) -> Option<Pair> {
    const MISSING: usize = std::usize::MAX;
    let mut index = [MISSING; 256];
    let mut prev_index = [0; 256];
    let mut tup: Option<Pair> = None;

    for (i, &b) in arr.iter().enumerate() {
        let ub = usize::from(b);
        let idxb = index[ub];
        if idxb == MISSING {
            index[ub] = i;
        } else {
            let t0 = if let Some(t) = tup { t.0 } else { 0 };
            if i - idxb > t0 {
                tup = Some((i - idxb, idxb, i));
            }
            let max_c = prev_index[ub];
            for j in 0 .. index.len() {
                if index[j] != MISSING {
                    index[j] = if prev_index[j] >= max_c { prev_index[j] }
                                                    else { MISSING };
                }
            }
        }
        prev_index[ub] = i;
    }

    tup
}

With type-level integers you can create a more efficient OptionDef that represents None with a compile-time specified value, and is used like an nice Option (as done in D language standard library):

fn widest_leftmost_pair(arr: &[u8]) -> Option<Pair> {
    let mut index: [OptionDef<usize, std::usize::MAX>; 256] = [None; 256];
    let mut prev_index = [0; 256];
    let mut tup: Option<Pair> = None;

    for (i, &b) in arr.iter().enumerate() {
        let ub = usize::from(b);
        if let Some(idxb) = index[ub] {
            let t0 = if let Some(t) = tup { t.0 } else { 0 };
            if i - idxb > t0 {
                tup = Some((i - idxb, idxb, i));
            }
            let max_c = prev_index[ub];
            for j in 0 .. index.len() {
                if let Some(_) = index[j] {
                    index[j] = if prev_index[j] >= max_c { Some(prev_index[j]) }
                                                    else { None };
                }
            }
        } else {
            index[ub] = Some(i);
        }
        prev_index[ub] = i;
    }

    tup
}

This OptionDef<usize, std::usize::MAX> is less safe than Option (because assigning the special value turns it into a None, this is sometimes a bug), but it’s more memory-efficient (and performance efficient). And it’s a little safer and more handy than just an usize.