How about creating a Deserialize
trait like this. The idea is that we will return an instance of Self
and the unread bytes in the happy case, or a DeserializeError
if parsing fails (e.g. bounds checks fail due to insufficient data).
trait Deserialize: Sized {
fn deserialize(buffer: &[u8]) -> Result<(Self, &[u8]), DeserializeError>;
}
struct DeserializeError;
Then we use the byteorder
crate to implement deserialize()
using LittleEndian::read_u32()
and friends. A macro makes this process a lot less repetitive.
use byteorder::{ByteOrder, LittleEndian};
macro_rules! impl_deserialize {
($type:ty, $method:ident) => {
impl Deserialize for $type {
fn deserialize(buffer: &[u8]) -> Result<(Self, &[u8]), DeserializeError> {
if buffer.len() < std::mem::size_of::<Self>() {
return Err(DeserializeError);
}
let (head, rest) = buffer.split_at(std::mem::size_of::<Self>());
let value = LittleEndian::$method(head);
Ok((value, rest))
}
}
};
}
impl_deserialize!(u16, read_u16);
impl_deserialize!(i16, read_i16);
impl_deserialize!(u32, read_u32);
impl_deserialize!(i32, read_i32);
impl_deserialize!(u64, read_u64);
impl_deserialize!(i64, read_i64);
impl_deserialize!(f64, read_f64);
impl_deserialize!(f32, read_f32);
(if you want to be generic over endianness, the Deserialize
trait would accept a type parameter, turning it into Deserialize<T> where T: ByteOrder
)
And now we've done the primitives, you can use it to deserialize more complex types:
#[derive(Copy, Clone, PartialEq)] // floats don't compare equal if NaN, so only partial equal is allowed.
pub struct LLVector2 {
pub x: f32,
pub y: f32,
}
impl Deserialize for LLVector2 {
fn deserialize(buffer: &[u8]) -> Result<(Self, &[u8]), DeserializeError> {
let (x, buffer) = f32::deserialize(buffer)?;
let (y, buffer) = f32::deserialize(buffer)?;
Ok((LLVector2 { x, y }, buffer))
}
}
(playground)
Inspecting the assembly shows simple move instructions and a bit of shuffling to create the rest
slice, as you'd expect:
<f64 as playground::Deserialize>::deserialize:
mov rax, rdi
cmp rdx, 8
jae .LBB2_1
mov qword ptr [rax + 8], 0
ret
.LBB2_1:
add rdx, -8
mov rcx, qword ptr [rsi]
add rsi, 8
mov qword ptr [rax], rcx
mov qword ptr [rax + 8], rsi
mov qword ptr [rax + 16], rdx
ret
I'm not sure why LLVM couldn't coalesce the x
and y
bounds checks into a single cmp rdx, 8; jb .LBB0_3
, though.
<playground::LLVector2 as playground::Deserialize>::deserialize:
mov rax, rdi
cmp rdx, 4
jb .LBB0_3
mov rcx, rdx
and rcx, -4
cmp rcx, 4
jne .LBB0_2
.LBB0_3:
mov qword ptr [rax + 8], 0
ret
.LBB0_2:
mov rcx, qword ptr [rsi]
add rsi, 8
add rdx, -8
mov qword ptr [rax], rcx
mov qword ptr [rax + 8], rsi
mov qword ptr [rax + 16], rdx
ret