Ok, here is a non-simple example, the program:
use pstd::collections::BTreeSet;
fn main() {
let set = BTreeSet::from([2, 4, 6]);
let mut x = 1234;
for _rep in 0..100_000_000 {
for e in set.iter() { x += e; }
}
println!("x={}", x);
}
```
The assembler for main:
.section .text.btest::main,"ax",@progbits
.hidden btest::main
.globl btest::main
.p2align 4
.type btest::main,@function
btest::main:
.cfi_startproc
.cfi_personality 155, DW.ref.rust_eh_personality
.cfi_lsda 27, .Lexception3
push rbp
.cfi_def_cfa_offset 16
push r15
.cfi_def_cfa_offset 24
push r14
.cfi_def_cfa_offset 32
push r13
.cfi_def_cfa_offset 40
push r12
.cfi_def_cfa_offset 48
push rbx
.cfi_def_cfa_offset 56
sub rsp, 1480
.cfi_def_cfa_offset 1536
.cfi_offset rbx, -56
.cfi_offset r12, -48
.cfi_offset r13, -40
.cfi_offset r14, -32
.cfi_offset r15, -24
.cfi_offset rbp, -16
mov qword ptr [rsp + 56], 0
lea r12, [rsp + 64]
mov qword ptr [rsp + 64], 1
mov dword ptr [rsp + 72], 0
lea r15, [rsp + 80]
mov word ptr [rsp + 84], 0
mov dword ptr [rsp + 80], 1048640
mov byte ptr [rsp + 808], 1
mov qword ptr [rsp + 768], 0
mov qword ptr [rsp + 800], r15
lea rdx, [rsp + 768]
mov rdi, r12
mov esi, 2
call <pstd::collections::btree_map::Tree<i32, ()>>::insert::<pstd::collections::btree_map::CustomTuning>
cmp dword ptr [rsp + 768], 1
jne .LBB10_30
mov ebx, dword ptr [rsp + 776]
mov rbp, qword ptr [rsp + 784]
mov r13, qword ptr [rsp + 792]
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
mov edi, 32
mov esi, 8
call qword ptr [rip + __rustc::__rust_alloc@GOTPCREL]
test rax, rax
je .LBB10_3
mov r14, rax
mov qword ptr [rax], 1
mov dword ptr [rax + 8], 0
mov qword ptr [rax + 16], 8
mov dword ptr [rax + 24], 0
mov esi, 1
mov rdi, rax
mov rdx, r15
call <pstd::collections::btree_map::vecs::PairVec<i32, ()>>::set_alloc::<pstd::collections::btree_map::CustomTuning>
movzx eax, word ptr [r14 + 8]
cmp ax, word ptr [r14 + 10]
jae .LBB10_13
mov rdi, r14
add rdi, 16
mov rcx, qword ptr [r14]
mov dword ptr [rcx + 4*rax], ebx
inc eax
mov word ptr [r14 + 8], ax
mov esi, 2
mov rdx, r15
call <pstd::collections::btree_map::vecs::ShortVec<pstd::collections::btree_map::Tree<i32, ()>>>::set_alloc::<pstd::collections::btree_map::CustomTuning>
mov rdi, r12
mov r12, rbp
mov rbp, r13
mov rbx, qword ptr [rsp + 64]
mov r13, qword ptr [rsp + 72]
mov qword ptr [rsp + 64], 1
mov dword ptr [rsp + 72], 0
movzx eax, word ptr [r14 + 24]
movzx edx, word ptr [r14 + 26]
cmp ax, dx
jae .LBB10_16
mov rcx, qword ptr [r14 + 16]
mov esi, eax
shl esi, 4
mov qword ptr [rcx + rsi], rbx
mov qword ptr [rcx + rsi + 8], r13
lea rsi, [rax + 1]
mov word ptr [r14 + 24], si
cmp si, dx
jae .LBB10_21
shl esi, 4
mov qword ptr [rcx + rsi], r12
mov qword ptr [rcx + rsi + 8], rbp
add eax, 2
mov word ptr [r14 + 24], ax
mov qword ptr [rsp + 64], 0
mov qword ptr [rsp + 72], r14
mov r12, rdi
.LBB10_30:
cmp byte ptr [rsp + 808], 0
jne .LBB10_32
inc qword ptr [rsp + 56]
.LBB10_32:
mov byte ptr [rsp + 808], 1
mov qword ptr [rsp + 768], 0
mov qword ptr [rsp + 800], r15
lea rdx, [rsp + 768]
mov rdi, r12
mov esi, 4
call <pstd::collections::btree_map::Tree<i32, ()>>::insert::<pstd::collections::btree_map::CustomTuning>
cmp dword ptr [rsp + 768], 1
jne .LBB10_41
mov ebx, dword ptr [rsp + 776]
mov rbp, qword ptr [rsp + 784]
mov r13, qword ptr [rsp + 792]
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
mov edi, 32
mov esi, 8
call qword ptr [rip + __rustc::__rust_alloc@GOTPCREL]
test rax, rax
je .LBB10_3
mov r14, rax
mov qword ptr [rax], 1
mov dword ptr [rax + 8], 0
mov qword ptr [rax + 16], 8
mov dword ptr [rax + 24], 0
mov esi, 1
mov rdi, rax
mov rdx, r15
call <pstd::collections::btree_map::vecs::PairVec<i32, ()>>::set_alloc::<pstd::collections::btree_map::CustomTuning>
movzx eax, word ptr [r14 + 8]
cmp ax, word ptr [r14 + 10]
jae .LBB10_13
mov rdi, r14
add rdi, 16
mov rcx, qword ptr [r14]
mov dword ptr [rcx + 4*rax], ebx
inc eax
mov word ptr [r14 + 8], ax
mov esi, 2
mov rdx, r15
call <pstd::collections::btree_map::vecs::ShortVec<pstd::collections::btree_map::Tree<i32, ()>>>::set_alloc::<pstd::collections::btree_map::CustomTuning>
mov rdi, r12
mov r12, rbp
mov rbp, r13
mov rbx, qword ptr [rsp + 64]
mov r13, qword ptr [rsp + 72]
mov qword ptr [rsp + 64], 1
mov dword ptr [rsp + 72], 0
movzx eax, word ptr [r14 + 24]
movzx edx, word ptr [r14 + 26]
cmp ax, dx
jae .LBB10_16
mov rcx, qword ptr [r14 + 16]
mov esi, eax
shl esi, 4
mov qword ptr [rcx + rsi], rbx
mov qword ptr [rcx + rsi + 8], r13
lea rsi, [rax + 1]
mov word ptr [r14 + 24], si
cmp si, dx
jae .LBB10_21
shl esi, 4
mov qword ptr [rcx + rsi], r12
mov qword ptr [rcx + rsi + 8], rbp
add eax, 2
mov word ptr [r14 + 24], ax
mov qword ptr [rsp + 64], 0
mov qword ptr [rsp + 72], r14
mov r12, rdi
.LBB10_41:
cmp byte ptr [rsp + 808], 0
jne .LBB10_43
inc qword ptr [rsp + 56]
.LBB10_43:
mov byte ptr [rsp + 808], 1
mov qword ptr [rsp + 768], 0
mov qword ptr [rsp + 800], r15
lea rdx, [rsp + 768]
mov rdi, r12
mov esi, 6
call <pstd::collections::btree_map::Tree<i32, ()>>::insert::<pstd::collections::btree_map::CustomTuning>
cmp dword ptr [rsp + 768], 1
jne .LBB10_52
mov ebx, dword ptr [rsp + 776]
mov rbp, qword ptr [rsp + 784]
mov r13, qword ptr [rsp + 792]
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
mov edi, 32
mov esi, 8
call qword ptr [rip + __rustc::__rust_alloc@GOTPCREL]
test rax, rax
je .LBB10_3
mov r14, rax
mov qword ptr [rax], 1
mov dword ptr [rax + 8], 0
mov qword ptr [rax + 16], 8
mov dword ptr [rax + 24], 0
mov esi, 1
mov rdi, rax
mov rdx, r15
call <pstd::collections::btree_map::vecs::PairVec<i32, ()>>::set_alloc::<pstd::collections::btree_map::CustomTuning>
movzx eax, word ptr [r14 + 8]
cmp ax, word ptr [r14 + 10]
jae .LBB10_13
mov rdi, r14
add rdi, 16
mov rcx, qword ptr [r14]
mov dword ptr [rcx + 4*rax], ebx
inc eax
mov word ptr [r14 + 8], ax
mov esi, 2
mov rdx, r15
call <pstd::collections::btree_map::vecs::ShortVec<pstd::collections::btree_map::Tree<i32, ()>>>::set_alloc::<pstd::collections::btree_map::CustomTuning>
mov r12, rbp
mov rbp, r13
mov rbx, qword ptr [rsp + 64]
mov r13, qword ptr [rsp + 72]
mov qword ptr [rsp + 64], 1
mov dword ptr [rsp + 72], 0
movzx eax, word ptr [r14 + 24]
movzx edx, word ptr [r14 + 26]
cmp ax, dx
jae .LBB10_16
mov rcx, qword ptr [r14 + 16]
mov esi, eax
shl esi, 4
mov qword ptr [rcx + rsi], rbx
mov qword ptr [rcx + rsi + 8], r13
lea rsi, [rax + 1]
mov word ptr [r14 + 24], si
cmp si, dx
jae .LBB10_21
shl esi, 4
mov qword ptr [rcx + rsi], r12
mov qword ptr [rcx + rsi + 8], rbp
add eax, 2
mov word ptr [r14 + 24], ax
mov qword ptr [rsp + 64], 0
mov qword ptr [rsp + 72], r14
.LBB10_52:
cmp byte ptr [rsp + 808], 0
jne .LBB10_54
inc qword ptr [rsp + 56]
.LBB10_54:
movups xmm0, xmmword ptr [rsp + 56]
movups xmm1, xmmword ptr [rsp + 72]
movaps xmmword ptr [rsp + 16], xmm0
movaps xmmword ptr [rsp + 32], xmm1
mov dword ptr [rsp + 12], 1234
mov r12, qword ptr [rsp + 16]
xor ebp, ebp
mov r13d, 1234
lea r14, [rsp + 768]
lea r15, [rsp + 56]
mov rbx, qword ptr [rip + memcpy@GOTPCREL]
jmp .LBB10_55
.p2align 4
.LBB10_70:
inc ebp
cmp ebp, 100000000
je .LBB10_71
.LBB10_55:
mov edx, 704
mov rdi, r14
xor esi, esi
call qword ptr [rip + memset@GOTPCREL]
mov qword ptr [rsp + 1472], r12
mov rdi, r14
lea rsi, [rsp + 24]
mov edx, 1
call <pstd::collections::btree_map::Range<i32, ()>>::push_tree
mov edx, 712
mov rdi, r15
mov rsi, r14
call rbx
mov rax, qword ptr [rsp + 760]
test rax, rax
je .LBB10_70
.p2align 4
.LBB10_58:
dec rax
mov qword ptr [rsp + 760], rax
mov rcx, qword ptr [rsp + 64]
cmp rcx, qword ptr [rsp + 72]
je .LBB10_69
mov rax, qword ptr [rsp + 56]
movzx edx, word ptr [rax + 8]
cmp rcx, rdx
jae .LBB10_60
mov rax, qword ptr [rax]
lea rax, [rax + 4*rcx]
inc rcx
mov qword ptr [rsp + 64], rcx
.LBB10_77:
test rax, rax
je .LBB10_70
add r13d, dword ptr [rax]
mov dword ptr [rsp + 12], r13d
mov rax, qword ptr [rsp + 760]
test rax, rax
jne .LBB10_58
jmp .LBB10_70
.LBB10_69:
mov rdi, r15
call <pstd::collections::btree_map::Range<i32, ()>>::next_cold
jmp .LBB10_77
.LBB10_71:
lea rax, [rsp + 12]
mov qword ptr [rsp + 768], rax
mov rax, qword ptr [rip + <i32 as core::fmt::Display>::fmt@GOTPCREL]
mov qword ptr [rsp + 776], rax
lea rdi, [rip + .Lanon.4e23a766b1161c58f315fd4481a40bbc.31]
lea rsi, [rsp + 768]
call qword ptr [rip + std::io::stdio::_print@GOTPCREL]
lea rsi, [rsp + 40]
lea rdi, [rsp + 24]
call <pstd::collections::btree_map::Tree<i32, ()>>::dealloc::<pstd::collections::btree_map::CustomTuning>
cmp qword ptr [rsp + 24], 0
jne .LBB10_75
mov rdi, qword ptr [rsp + 32]
mov esi, 32
mov edx, 8
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
.LBB10_75:
add rsp, 1480
.cfi_def_cfa_offset 56
pop rbx
.cfi_def_cfa_offset 48
pop r12
.cfi_def_cfa_offset 40
pop r13
.cfi_def_cfa_offset 32
pop r14
.cfi_def_cfa_offset 24
pop r15
.cfi_def_cfa_offset 16
pop rbp
.cfi_def_cfa_offset 8
ret
.LBB10_60:
.cfi_def_cfa_offset 1536
lea rdi, [rip + .Lanon.4e23a766b1161c58f315fd4481a40bbc.26]
lea rdx, [rip + .Lanon.4e23a766b1161c58f315fd4481a40bbc.36]
mov esi, 32
call qword ptr [rip + core::panicking::panic@GOTPCREL]
jmp .LBB10_4
.LBB10_3:
mov edi, 8
mov esi, 32
call qword ptr [rip + alloc::alloc::handle_alloc_error@GOTPCREL]
jmp .LBB10_4
.LBB10_13:
lea rdi, [rip + .Lanon.4e23a766b1161c58f315fd4481a40bbc.38]
lea rdx, [rip + .Lanon.4e23a766b1161c58f315fd4481a40bbc.39]
mov esi, 39
call qword ptr [rip + core::panicking::panic@GOTPCREL]
jmp .LBB10_4
.LBB10_16:
lea rdi, [rip + .Lanon.4e23a766b1161c58f315fd4481a40bbc.38]
lea rdx, [rip + .Lanon.4e23a766b1161c58f315fd4481a40bbc.42]
mov esi, 39
call qword ptr [rip + core::panicking::panic@GOTPCREL]
jmp .LBB10_4
.LBB10_21:
lea rdi, [rip + .Lanon.4e23a766b1161c58f315fd4481a40bbc.38]
lea rdx, [rip + .Lanon.4e23a766b1161c58f315fd4481a40bbc.42]
mov esi, 39
call qword ptr [rip + core::panicking::panic@GOTPCREL]
.LBB10_4:
ud2
jmp .LBB10_65
mov r15, rax
cmp qword ptr [rsp + 24], 0
jne .LBB10_66
mov rdi, qword ptr [rsp + 32]
mov esi, 32
mov edx, 8
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
mov rdi, r15
call _Unwind_Resume@PLT
jmp .LBB10_25
mov r15, rax
cmp qword ptr [rsp + 768], 0
je .LBB10_28
cmp qword ptr [rsp + 784], 0
jne .LBB10_28
mov r14, qword ptr [rsp + 792]
jmp .LBB10_8
jmp .LBB10_65
mov r15, rax
test r12, r12
jne .LBB10_8
mov esi, 32
mov edx, 8
mov rdi, rbp
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
jmp .LBB10_8
mov r15, rax
test rbx, rbx
jne .LBB10_18
mov esi, 32
mov edx, 8
mov rdi, r13
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
.LBB10_18:
mov r13, rbp
mov rbp, r12
jmp .LBB10_26
.LBB10_25:
mov r15, rax
.LBB10_26:
mov esi, 32
mov edx, 8
mov rdi, r14
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
jmp .LBB10_27
mov r15, rax
.LBB10_27:
mov r14, r13
test rbp, rbp
jne .LBB10_28
.LBB10_8:
mov esi, 32
mov edx, 8
mov rdi, r14
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
.LBB10_28:
lea rdi, [rsp + 56]
call core::ptr::drop_in_place::<pstd::collections::btree_set::BTreeSet<i32>>
jmp .LBB10_66
call qword ptr [rip + core::panicking::panic_in_cleanup@GOTPCREL]
.LBB10_65:
mov r15, rax
lea rdi, [rsp + 16]
call core::ptr::drop_in_place::<pstd::collections::btree_set::BTreeSet<i32>>
.LBB10_66:
mov rdi, r15
call _Unwind_Resume@PLT
call qword ptr [rip + core::panicking::panic_in_cleanup@GOTPCREL]
Maybe I am mis-interpreting this code:
.LBB10_55:
mov edx, 704
mov rdi, r14
xor esi, esi
call qword ptr [rip + memset@GOTPCREL]
Now I look at it more carefully, it seems to be zeroing 704 bytes, but I don’t know why. Hmm, maybe it needs to zero various parts of the struct ( Iterator ) so it zeroes all of it rather than fiddling around. Except if it is big, that can be slow. I think…
[ There are 2 fixed-length vecs in the struct and some other stuff, I think the only thing that needs to be initialised is the “other stuff” and the two vec lengths, but instead of doing 2 word-stores, it is zeroing it all… Yes, I think that is it. ]