Small unecessary branch generated from Vec

The code proves mathematically the total bytes copied will be exactly the vec pre-allocated capacity, yet the compiler generates a branch condition to check if there will be space. I imagine it's related to capacity != len but I'm still speculating on what could be the actual reason.

pub fn from_be_bytes(header: PacketHeader, bytes: &[u8]) -> Result<Self, PacketError> {
	let payload_size: usize = bytes.len() - size_of::<u64>();

	if payload_size != header.payload_size as usize {
		return Err(PacketError::BufferSizeMismatch);
	}

	let mut payload = Vec::<u8>::with_capacity(payload_size);
	payload.copy_from_slice(&bytes[..payload_size]);

	let checksum_bytes: [u8; size_of::<u64>()] = bytes[payload_size..].try_into().unwrap();
	let checksum = u64::from_be_bytes(checksum_bytes);

	let packet = Self {
		header,
		payload,
		checksum,
	};

	Ok(packet)
}
.cfi_startproc
add rcx, -8
mov eax, dword ptr [rsi + 24]
cmp rcx, rax
jne .LBB2_1
test rcx, rcx    ; this is the vec branch
jne .LBB2_5
movbe rax, qword ptr [rdx]
vmovups ymm0, ymmword ptr [rsi]
vmovups ymmword ptr [rdi], ymm0
mov qword ptr [rdi + 32], 0
mov qword ptr [rdi + 40], 1
mov qword ptr [rdi + 48], 0
mov ecx, 56
mov qword ptr [rdi + rcx], rax
mov rax, rdi
vzeroupper
ret

.LBB2_1:
movabs rax, -9223372036854775808
mov ecx, 32
mov qword ptr [rdi + rcx], rax
mov rax, rdi
ret

.LBB2_5:
push rax
.cfi_def_cfa_offset 16
mov rax, qword ptr [rip + __rust_no_alloc_shim_is_unstable@GOTPCREL]
movzx eax, byte ptr [rax]
lea rdx, [rip + .L__unnamed_1]
xor edi, edi
mov rsi, rcx
call qword ptr [rip + core::slice::<impl [T]>::copy_from_slice::len_mismatch_fail@GOTPCREL]

Is it possible to remove that branch? I tried switching to a boxed slice, and other conversions like slice.to_vec(), Vec::from(slice), etc but the generated assembly ends up much worse than this.

1 Like

It branches when the rcx is zero which represents the variable payload_size. You can't avoid this check as requesting zero sized allocation is UB. But you can assert!() this condition out tell the compiler this route is unlikely.

3 Likes

Oh boy, the trap right in front of me, if bytes.len() is zero then that operation will be -8 assigned to usize, which can be pretty catastrophic as you might expect. Thanks for the heads up!

2 Likes

saturating_sub() might help you here.