Strange behavior

Hello everyone! Please help me understand, my brain is completely broken.

Here's the code:

const FIX_MAP_SIZE: u8 = 0b_0000_1111;
const FIX_ARR_SIZE: u8 = 0b_0000_1111;
const FIX_STR_SIZE: u8 = 0b_0001_1111;

/// https://github.com/msgpack/msgpack/blob/master/spec.md
#[derive(Debug, Clone, Copy, PartialEq)]
#[repr(u8, C)]
pub enum Marker
{
    FixPos(u8) = 0x00,
    FixMap(u8) = 0x80,
    FixArray(u8) = 0x90,
    FixStr(u8) = 0xa0,
    Null = 0xc0,
    NeverUsed = 0xc1,
    False = 0xc2,
    True = 0xc3,
    Bin8 = 0xc4,
    Bin16 = 0xc5,
    Bin32 = 0xc6,
    Ext8 = 0xc7,
    Ext16 = 0xc8,
    Ext32 = 0xc9,
    Float32 = 0xca,
    Float64 = 0xcb,
    UInt8 = 0xcc,
    UInt16 = 0xcd,
    UInt32 = 0xce,
    UInt64 = 0xcf,
    Int8 = 0xd0,
    Int16 = 0xd1,
    Int32 = 0xd2,
    Int64 = 0xd3,
    FixExt1 = 0xd4,
    FixExt2 = 0xd5,
    FixExt4 = 0xd6,
    FixExt8 = 0xd7,
    FixExt16 = 0xd8,
    Str8 = 0xd9,
    Str16 = 0xda,
    Str32 = 0xdb,
    Array16 = 0xdc,
    Array32 = 0xdd,
    Map16 = 0xde,
    Map32 = 0xdf,
    FixNeg(i8) = 0xe0,
}

impl Marker
{
    #[inline]
    pub fn from_u8(num: u8) -> Marker
    {
        match num {
            0x00 ..= 0x7f => Marker::FixPos(num),
            0x80 ..= 0x8f => Marker::FixMap(num & FIX_MAP_SIZE),
            0x90 ..= 0x9f => Marker::FixArray(num & FIX_ARR_SIZE),
            0xa0 ..= 0xbf => Marker::FixStr(num & FIX_STR_SIZE),
            0xc0 => Marker::Null,
            0xc1 => Marker::NeverUsed,
            0xc2 => Marker::False,
            0xc3 => Marker::True,
            0xc4 => Marker::Bin8,
            0xc5 => Marker::Bin16,
            0xc6 => Marker::Bin32,
            0xc7 => Marker::Ext8,
            0xc8 => Marker::Ext16,
            0xc9 => Marker::Ext32,
            0xca => Marker::Float32,
            0xcb => Marker::Float64,
            0xcc => Marker::UInt8,
            0xcd => Marker::UInt16,
            0xce => Marker::UInt32,
            0xcf => Marker::UInt64,
            0xd0 => Marker::Int8,
            0xd1 => Marker::Int16,
            0xd2 => Marker::Int32,
            0xd3 => Marker::Int64,
            0xd4 => Marker::FixExt1,
            0xd5 => Marker::FixExt2,
            0xd6 => Marker::FixExt4,
            0xd7 => Marker::FixExt8,
            0xd8 => Marker::FixExt16,
            0xd9 => Marker::Str8,
            0xda => Marker::Str16,
            0xdb => Marker::Str32,
            0xdc => Marker::Array16,
            0xdd => Marker::Array32,
            0xde => Marker::Map16,
            0xdf => Marker::Map32,
            0xe0 ..= 0xff => Marker::FixNeg(num as i8),
        }
    }

    #[inline]
    pub fn to_u8(&self) -> u8
    {
        let (d, v) = unsafe { core::mem::transmute::<Self, (u8, u8)>(*self) };
        d | v
    }
}

impl From<u8> for Marker
{
    #[inline]
    fn from(value: u8) -> Self
    {
        Marker::from_u8(value)
    }
}

impl Into<u8> for Marker
{
    #[inline]
    fn into(self) -> u8
    {
        self.to_u8()
    }
}

#[cfg(test)]
mod tests;

and here is the test:

use crate::Marker;

#[test]
fn test_from_u8()
{
    let mut map: [Option<Marker>; 256] = [None; 256];

    for i in 0 .. 128 {
        map[i] = Some(Marker::FixPos(i.try_into().unwrap()));
    }

    for i in 0 .. 32 {
        map[0xe0 + i] = Some(Marker::FixNeg((0xe0 + i) as i8));
        map[0xa0 + i] = Some(Marker::FixStr(i.try_into().unwrap()));
    }

    for i in 0 .. 16 {
        map[0x80 + i] = Some(Marker::FixMap(i.try_into().unwrap()));
        map[0x90 + i] = Some(Marker::FixArray(i.try_into().unwrap()));
    }

    map[0xc0] = Some(Marker::Null);
    map[0xc1] = Some(Marker::NeverUsed);
    map[0xc2] = Some(Marker::False);
    map[0xc3] = Some(Marker::True);
    map[0xc4] = Some(Marker::Bin8);
    map[0xc5] = Some(Marker::Bin16);
    map[0xc6] = Some(Marker::Bin32);
    map[0xc7] = Some(Marker::Ext8);
    map[0xc8] = Some(Marker::Ext16);
    map[0xc9] = Some(Marker::Ext32);
    map[0xca] = Some(Marker::Float32);
    map[0xcb] = Some(Marker::Float64);
    map[0xcc] = Some(Marker::UInt8);
    map[0xcd] = Some(Marker::UInt16);
    map[0xce] = Some(Marker::UInt32);
    map[0xcf] = Some(Marker::UInt64);
    map[0xd0] = Some(Marker::Int8);
    map[0xd1] = Some(Marker::Int16);
    map[0xd2] = Some(Marker::Int32);
    map[0xd3] = Some(Marker::Int64);
    map[0xd4] = Some(Marker::FixExt1);
    map[0xd5] = Some(Marker::FixExt2);
    map[0xd6] = Some(Marker::FixExt4);
    map[0xd7] = Some(Marker::FixExt8);
    map[0xd8] = Some(Marker::FixExt16);
    map[0xd9] = Some(Marker::Str8);
    map[0xda] = Some(Marker::Str16);
    map[0xdb] = Some(Marker::Str32);
    map[0xdc] = Some(Marker::Array16);
    map[0xdd] = Some(Marker::Array32);
    map[0xde] = Some(Marker::Map16);
    map[0xdf] = Some(Marker::Map32);

    for i in 0u8 ..= 255 {
        let marker = i.into();
        assert_eq!(Some(marker), map[i as usize]);

        let code: u8 = marker.into();
        assert_eq!(code, i as u8);
    }
}

Previously the tests were passing. Now I'm getting an error:

running 1 test
thread 'marker::tests::test_from_u8' panicked at 'assertion failed: `(left == right)`
  left: `224`,
 right: `192`', msgpack/src/marker/tests.rs:61:9

But if I add the line

    for i in 0u8 ..= 255 {
        dbg!(i); // <-------- THIS
        let marker = i.into();

to the test, it magically passes as it should. Where am I making a mistake?

Probably right here:

let (d, v) = unsafe { core::mem::transmute::<Self, (u8, u8)>(*self) };

self is #[repr(u8)], not #[repr((u8, u8))].

Oh, I feel incredibly stupid. Thank you very much!

FWIW Here's the error reported by miri:

error: Undefined Behavior: constructing invalid value at .1: encountered uninitialized memory, but expected an integer
   --> src/main.rs:98:31
    |
98  |         let (d, v) = unsafe { core::mem::transmute::<Self, (u8, u8)>(*self) };
    |                               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ constructing invalid value at .1: encountered uninitialized memory, but expected an integer
    |
    = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
    = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
    = note: BACKTRACE:
    = note: inside `Marker::to_u8` at src/main.rs:98:31: 98:76
note: inside `<Marker as std::convert::Into<u8>>::into`
   --> src/main.rs:117:9
    |
117 |         self.to_u8()
    |         ^^^^^^^^^^^^
note: inside `main`
   --> src/main.rs:175:24
    |
175 |         let code: u8 = marker.into();
    |                        ^^^^^^^^^^^^^

The UB can be fixed with:

    pub fn to_u8(&self) -> u8
    {
        unsafe { *(self as *const Self as *const u8) }
    }

But tests still do not pass. With the extra dbg! line, the test fails at i = 1. Anyway, that's at least one bug caught and taken care of.

As far as I understand, with my definition of the layout of this struct, it is u8, u8 (the first one determines the marker and the second one represents the value for, for example, FixPos, FixNeg).

match d >= 0xc0 && d < 0xe0 {
            true => d,
            false => d | v,
        }

seems to solve the problem.. but is it correct?

Yes to the first part...

// [src/lib.rs:235] std::mem::size_of::<Marker>() = 2
dbg!(std::mem::size_of::<Marker>());

The discriminant for FixPos is always 0, regardless of its inner value. You can extract the inner value with a plain old match:

    pub fn to_u8(&self) -> u8
    {
        let d = unsafe { *(self as *const Self as *const u8) };

        match self {
            Self::FixPos(v)
            | Self::FixMap(v)
            | Self::FixArray(v)
            | Self::FixStr(v) => d | *v,
            Self::FixNeg(v) => d | *v as u8,
            _ => d,
        }
    }

thank you!

Why are you using unsafe and transmute for what appears to be simple bit manipulation? This really seems unwarranted.

2 Likes