Hi,
Now and then I take a look at the code generation in Rust for the code I write to make sure what it does and it gives me confidence in the code I write that it generates the code I expect which I think is important in a systems language.
Anyway, I was playing around a bit with iterators and and it's possible to reduce quite a bit of code by using them so I tried this out
I implemented this code
fn get_rect_by_handle(&self, handle: u64) -> Option<Rect> {
for dock in &self.d0 {
if dock.handle == handle {
return Some(dock.rect);
}
}
for dock in &self.d1 {
if dock.handle == handle {
return Some(dock.rect);
}
}
None
}
Then I rewrote it like this
fn get_rect_by_handle(&self, handle: u64) -> Option<Rect> {
self.d0.iter().chain(self.d1.iter()).find(|&dock| dock.handle == handle).map(|dock| dock.rect)
The first code (with two loops) gets generated like this:
_ZN5Split18get_rect_by_handle20h5c2c7519bc9bc06fmcaE:
.cfi_startproc
mov rax, qword ptr [rsi]
mov rcx, qword ptr [rsi + 16]
add rax, -16
shl rcx, 4
.align 16, 0x90
.LBB0_1:
test rcx, rcx
je .LBB0_5
cmp rax, -16
je .LBB0_5
add rcx, -16
cmp qword ptr [rax + 16], 2
lea rax, [rax + 16]
jne .LBB0_1
jmp .LBB0_4
.LBB0_5:
mov rax, qword ptr [rsi + 24]
mov rcx, qword ptr [rsi + 40]
add rax, -16
shl rcx, 4
.align 16, 0x90
.LBB0_6:
test rcx, rcx
je .LBB0_9
cmp rax, -16
je .LBB0_9
add rcx, -16
cmp qword ptr [rax + 16], 2
lea rax, [rax + 16]
jne .LBB0_6
.LBB0_4:
mov rax, qword ptr [rax + 8]
mov qword ptr [rdi + 4], rax
mov dword ptr [rdi], 1
ret
.LBB0_9:
mov eax, dword ptr [rip + const4699+8]
mov dword ptr [rdi + 8], eax
mov rax, qword ptr [rip + const4699]
mov qword ptr [rdi], rax
ret
And the second one like this
.cfi_startproc
mov rcx, qword ptr [rsi]
mov r9, qword ptr [rsi + 16]
shl r9, 4
add r9, rcx
mov rdx, qword ptr [rsi + 24]
mov r8, qword ptr [rsi + 40]
shl r8, 4
add r8, rdx
xor r10d, r10d
.align 16, 0x90
.LBB0_1:
movzx eax, r10b
cmp eax, 1
je .LBB0_6
cmp eax, 2
jne .LBB0_3
cmp rdx, r8
je .LBB0_15
lea rsi, [rdx + 16]
mov r10b, 2
mov r11, rcx
jmp .LBB0_12
.align 16, 0x90
.LBB0_6:
cmp rcx, r9
je .LBB0_15
lea r11, [rcx + 16]
mov r10b, 1
jmp .LBB0_5
.align 16, 0x90
.LBB0_3:
cmp rcx, r9
je .LBB0_10
lea r11, [rcx + 16]
xor r10d, r10d
.LBB0_5:
mov rsi, rdx
mov rax, rcx
jmp .LBB0_13
.LBB0_10:
cmp rdx, r8
je .LBB0_15
lea rsi, [rdx + 16]
mov r10b, 2
mov r11, r9
.LBB0_12:
mov rax, rdx
.LBB0_13:
cmp qword ptr [rax], 2
mov rcx, r11
mov rdx, rsi
jne .LBB0_1
test rax, rax
je .LBB0_15
mov rax, qword ptr [rax + 8]
mov qword ptr [rdi + 4], rax
mov dword ptr [rdi], 1
ret
.LBB0_15:
mov eax, dword ptr [rip + const4376+8]
mov dword ptr [rdi + 8], eax
mov rax, qword ptr [rip + const4376]
mov qword ptr [rdi], rax
ret
The second one generates a bunch of more instructions so I'm wondering if it's to be expected that the second version (to what it seems at least) will be slower than the first one?
Cheers!