The following code
use std::mem::MaybeUninit;
extern {
fn init(a: &mut [f64; 8]);
}
struct M {
rows: i32,
cols: i32,
data: [f64; 8],
}
impl M {
fn new(rows: i32, cols: i32) -> M {
let mut m = M {
rows,
cols,
data: unsafe { MaybeUninit::uninit().assume_init() },
};
unsafe { init(&mut m.data); }
m
}
}
#[no_mangle]
fn push(v: &mut Vec<M>, rows: i32, cols: i32) {
v.push(M::new(rows, cols));
}
#[no_mangle]
fn push2(v: &mut Vec<M>, rows: i32, cols: i32) {
let mut m = M {
rows,
cols,
data: unsafe { MaybeUninit::uninit().assume_init() },
};
unsafe { init(&mut m.data); }
v.push(m);
}
produces an unnecessary stack copy:
push:
pushq %r14
pushq %rbx
subq $152, %rsp
movq %rdi, %rbx
# part 1: init m
movl %esi, 144(%rsp)
movl %edx, 148(%rsp)
leaq 80(%rsp), %rdi
callq *init@GOTPCREL(%rip)
# part 2: copy m on stack??
movq 144(%rsp), %rax
movq %rax, 64(%rsp)
movups 80(%rsp), %xmm0
movups 96(%rsp), %xmm1
movups 112(%rsp), %xmm2
movups 128(%rsp), %xmm3
movaps %xmm3, 48(%rsp)
movaps %xmm2, 32(%rsp)
movaps %xmm1, 16(%rsp)
movaps %xmm0, (%rsp)
movq 16(%rbx), %r14
cmpq (%rbx), %r14
jne .LBB2_2
movq %rbx, %rdi
callq alloc::raw_vec::RawVec<T,A>::grow_one::hbe3227e8b97c2791
.LBB2_2:
# part 3: copy m from stack to heap
movq 8(%rbx), %rax
leaq (%r14,%r14,8), %rcx
movq 64(%rsp), %rdx
movq %rdx, 64(%rax,%rcx,8)
movaps (%rsp), %xmm0
movaps 16(%rsp), %xmm1
movaps 32(%rsp), %xmm2
movaps 48(%rsp), %xmm3
movups %xmm3, 48(%rax,%rcx,8)
movups %xmm2, 32(%rax,%rcx,8)
movups %xmm1, 16(%rax,%rcx,8)
movups %xmm0, (%rax,%rcx,8)
incq %r14
movq %r14, 16(%rbx)
addq $152, %rsp
popq %rbx
popq %r14
retq
As we can see the #part 2
in the generated assembly code is redundant. We can simply copy from the first stack location to the heap. Rustc 1.70.0
and earlier versions don't create this stack copy. And interestingly enough, our manually inlined push2
always has this stack copy. Have any (semantics) changes been made since Rust 1.70.0
? Or bugs? Any comment/explanation would be welcome & appreciated ;D
Goldbot link: Compiler Explorer.