The code proves mathematically the total bytes copied will be exactly the vec pre-allocated capacity, yet the compiler generates a branch condition to check if there will be space. I imagine it's related to capacity != len but I'm still speculating on what could be the actual reason.
pub fn from_be_bytes(header: PacketHeader, bytes: &[u8]) -> Result<Self, PacketError> {
let payload_size: usize = bytes.len() - size_of::<u64>();
if payload_size != header.payload_size as usize {
return Err(PacketError::BufferSizeMismatch);
}
let mut payload = Vec::<u8>::with_capacity(payload_size);
payload.copy_from_slice(&bytes[..payload_size]);
let checksum_bytes: [u8; size_of::<u64>()] = bytes[payload_size..].try_into().unwrap();
let checksum = u64::from_be_bytes(checksum_bytes);
let packet = Self {
header,
payload,
checksum,
};
Ok(packet)
}
.cfi_startproc
add rcx, -8
mov eax, dword ptr [rsi + 24]
cmp rcx, rax
jne .LBB2_1
test rcx, rcx ; this is the vec branch
jne .LBB2_5
movbe rax, qword ptr [rdx]
vmovups ymm0, ymmword ptr [rsi]
vmovups ymmword ptr [rdi], ymm0
mov qword ptr [rdi + 32], 0
mov qword ptr [rdi + 40], 1
mov qword ptr [rdi + 48], 0
mov ecx, 56
mov qword ptr [rdi + rcx], rax
mov rax, rdi
vzeroupper
ret
.LBB2_1:
movabs rax, -9223372036854775808
mov ecx, 32
mov qword ptr [rdi + rcx], rax
mov rax, rdi
ret
.LBB2_5:
push rax
.cfi_def_cfa_offset 16
mov rax, qword ptr [rip + __rust_no_alloc_shim_is_unstable@GOTPCREL]
movzx eax, byte ptr [rax]
lea rdx, [rip + .L__unnamed_1]
xor edi, edi
mov rsi, rcx
call qword ptr [rip + core::slice::<impl [T]>::copy_from_slice::len_mismatch_fail@GOTPCREL]
Is it possible to remove that branch? I tried switching to a boxed slice, and other conversions like slice.to_vec()
, Vec::from(slice)
, etc but the generated assembly ends up much worse than this.