Beside Escape Analysis to remove some heap allocations, it's good to have some Bounds Check Elimination in the compiler, this is about recent changes in the Go compiler:
Discussion thread:
https://news.ycombinator.com/item?id=12336430
A paper linked in the ycombinator thread:
http://ps2pdf.com/tgetfile/E-235.pdf?key=1471982981&name=E-235.pdf
The first Go example in Rust:
#[inline(never)]
fn test1(s: &mut [usize]) {
s[0] += 1;
s[1] += 1;
s[2] += 1;
}
#[inline(never)]
fn test2(s: &mut [usize]) {
s[2] += 1;
s[1] += 1;
s[0] += 1;
}
#[inline(never)]
fn test3(s: &mut [usize], index: usize) {
s[index] += 1;
s[index] += 1;
}
#[inline(never)]
fn test4(a: &mut [usize; 5]) {
a[4] += 1;
}
fn main() {
let mut arr = std::env::args()
.skip(1)
.map(|s| s.parse().unwrap())
.collect::<Vec<usize>>();
println!("{:?}", arr);
let idx = arr[0];
test1(&mut arr);
println!("{:?}", arr);
test2(&mut arr);
println!("{:?}", arr);
test3(&mut arr, idx);
println!("{:?}", arr);
let mut arr5 = [0; 5];
arr5.copy_from_slice(&arr[0 .. 5]);
test4(&mut arr5);
println!("{:?}", arr);
}
Its asm shows the same number of bound tests as the Go code (compiled with -C opt-level=3 --emit asm
):
_ZN8example15test117hd1a6d72066f829d8E:
subq $40, %rsp
movq %rdx, %rax
testq %rax, %rax
je .LBB6_4
incq (%rcx)
cmpq $1, %rax
je .LBB6_5
incq 8(%rcx)
cmpq $3, %rax
jb .LBB6_6
incq 16(%rcx)
addq $40, %rsp
retq
.LBB6_4:
leaq panic_bounds_check_loc7695(%rip), %rcx
xorl %edx, %edx
xorl %r8d, %r8d
callq _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
ud2
.LBB6_5:
leaq panic_bounds_check_loc7696(%rip), %rcx
movl $1, %edx
movl $1, %r8d
callq _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
ud2
.LBB6_6:
leaq panic_bounds_check_loc7697(%rip), %rcx
movl $2, %edx
movq %rax, %r8
callq _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
ud2
_ZN8example15test217h3510aa60084d7ad2E:
subq $40, %rsp
movq %rdx, %rax
cmpq $3, %rax
jb .LBB7_2
incq 16(%rcx)
incq 8(%rcx)
incq (%rcx)
addq $40, %rsp
retq
.LBB7_2:
leaq panic_bounds_check_loc7698(%rip), %rcx
movl $2, %edx
movq %rax, %r8
callq _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
ud2
_ZN8example15test317h2b35347fd3633068E:
subq $40, %rsp
movq %rdx, %rax
cmpq %rax, %r8
jae .LBB8_2
addq $2, (%rcx,%r8,8)
addq $40, %rsp
retq
.LBB8_2:
leaq panic_bounds_check_loc7701(%rip), %rcx
movq %r8, %rdx
movq %rax, %r8
callq _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
ud2
_ZN8example15test417h5faa34a7c08a00cbE:
incq 32(%rcx)
retq
This could be similar to the second Go program (but I am not sure this code is equivalent to the Go code regarding the array accesses):
#[inline(never)]
fn test5(s: &mut [usize]) {
for i in 0 .. s.len() {
s[i] += 1;
for j in i .. s.len() { s[j] += 1; }
for j in 0 .. i + 1 { s[j] += 1; }
}
}
#[inline(never)]
fn test6(s: &mut [usize]) {
let mut i = 0;
while i < s.len() {
s[i] += 1;
for j in i .. s.len() { s[j] += 1; }
for j in 0 .. i + 1 { s[j] += 1; }
i += 1;
}
}
#[inline(never)]
fn test7(s: &mut [usize]) {
for i in (0 .. s.len()).rev() {
s[i] += 1;
for j in i .. s.len() { s[j] += 1; }
}
}
#[inline(never)]
fn test8(s: &mut [usize], index: usize) {
if index > 0 && index < s.len() {
s[index] += 1;
for j in index .. s.len() { s[j] += 1; }
}
}
#[inline(never)]
fn test9(s: &mut [usize]) {
if s.len() > 2 {
s[0] += 1;
s[1] += 1;
s[2] += 1;
}
}
fn main() {
let mut arr = std::env::args()
.skip(1)
.map(|s| s.parse().unwrap())
.collect::<Vec<usize>>();
println!("{:?}", arr);
let idx = arr[0];
test5(&mut arr);
println!("{:?}", arr);
test6(&mut arr);
println!("{:?}", arr);
test7(&mut arr);
println!("{:?}", arr);
test8(&mut arr, idx);
println!("{:?}", arr);
test9(&mut arr);
println!("{:?}", arr);
}
Its asm shows more bound tests compared to the Go compiler:
_ZN8example25test517h1ec1f8ae0c4d1368E:
pushq %r15
pushq %r14
pushq %r12
pushq %rsi
pushq %rdi
pushq %rbx
subq $40, %rsp
movq %rdx, %r11
testq %r11, %r11
je .LBB6_19
leaq -1(%r11), %r8
leaq 56(%rcx), %r9
leaq -5(%r11), %r10
xorl %ebx, %ebx
movdqa LCPI6_0(%rip), %xmm0
.p2align 4, 0x90
.LBB6_3:
movq %r10, %rsi
shrq $2, %rsi
leal 1(%rsi), %edi
andl $1, %edi
leaq 1(%rbx), %rax
addq $2, (%rcx,%rbx,8)
cmpq %r11, %rax
je .LBB6_15
movq %r8, %rdx
subq %rbx, %rdx
cmpq $4, %rdx
movq %rax, %rbx
jb .LBB6_13
movq %rdx, %r12
andq $-4, %r12
movq %rdx, %r15
andq $-4, %r15
movq %rax, %rbx
je .LBB6_13
leaq -4(%rdx), %rbx
shrq $2, %rbx
leaq 1(%rbx), %r14
andl $1, %r14d
testq %rbx, %rbx
movl $0, %ebx
je .LBB6_9
decq %rdi
subq %rsi, %rdi
movq %r9, %rsi
xorl %ebx, %ebx
.p2align 4, 0x90
.LBB6_8:
movdqu -48(%rsi), %xmm1
movdqu -32(%rsi), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, -48(%rsi)
movdqu %xmm2, -32(%rsi)
movdqu -16(%rsi), %xmm1
movdqu (%rsi), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, -16(%rsi)
movdqu %xmm2, (%rsi)
addq $8, %rbx
addq $64, %rsi
addq $2, %rdi
jne .LBB6_8
.LBB6_9:
testq %r14, %r14
je .LBB6_11
addq %rax, %rbx
movdqu (%rcx,%rbx,8), %xmm1
movdqu 16(%rcx,%rbx,8), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, (%rcx,%rbx,8)
movdqu %xmm2, 16(%rcx,%rbx,8)
.LBB6_11:
cmpq %r15, %rdx
je .LBB6_15
addq %rax, %r12
movq %r12, %rbx
.p2align 4, 0x90
.LBB6_13:
leaq (%rcx,%rbx,8), %rdx
movq %r11, %rsi
subq %rbx, %rsi
.p2align 4, 0x90
.LBB6_14:
incq (%rdx)
addq $8, %rdx
decq %rsi
jne .LBB6_14
.LBB6_15:
xorl %edx, %edx
.p2align 4, 0x90
.LBB6_16:
cmpq %r11, %rdx
jae .LBB6_18
incq (%rcx,%rdx,8)
incq %rdx
cmpq %rax, %rdx
jb .LBB6_16
addq $8, %r9
decq %r10
cmpq %r11, %rax
movq %rax, %rbx
jb .LBB6_3
.LBB6_19:
addq $40, %rsp
popq %rbx
popq %rdi
popq %rsi
popq %r12
popq %r14
popq %r15
retq
.LBB6_18:
leaq panic_bounds_check_loc7880(%rip), %rcx
movq %r11, %r8
callq _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
ud2
_ZN8example25test617h6ec492878b1c4a03E:
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rsi
pushq %rdi
pushq %rbx
subq $32, %rsp
movq %rdx, %r11
testq %r11, %r11
je .LBB7_19
leaq -1(%r11), %r8
leaq 56(%rcx), %r9
leaq -5(%r11), %r10
xorl %eax, %eax
movdqa LCPI7_0(%rip), %xmm0
.p2align 4, 0x90
.LBB7_3:
movq %r10, %rdx
shrq $2, %rdx
leal 1(%rdx), %ebx
andl $1, %ebx
leaq 1(%rax), %r13
addq $2, (%rcx,%rax,8)
cmpq %r11, %r13
je .LBB7_7
movq %r8, %rsi
subq %rax, %rsi
cmpq $3, %rsi
jbe .LBB7_5
movq %rsi, %r14
andq $-4, %r14
movq %rsi, %r12
andq $-4, %r12
je .LBB7_5
leaq -4(%rsi), %rdi
shrq $2, %rdi
leaq 1(%rdi), %r15
andl $1, %r15d
testq %rdi, %rdi
movl $0, %edi
je .LBB7_14
decq %rbx
subq %rdx, %rbx
movq %r9, %rdx
xorl %edi, %edi
.p2align 4, 0x90
.LBB7_13:
movdqu -48(%rdx), %xmm1
movdqu -32(%rdx), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, -48(%rdx)
movdqu %xmm2, -32(%rdx)
movdqu -16(%rdx), %xmm1
movdqu (%rdx), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, -16(%rdx)
movdqu %xmm2, (%rdx)
addq $8, %rdi
addq $64, %rdx
addq $2, %rbx
jne .LBB7_13
.LBB7_14:
testq %r15, %r15
je .LBB7_16
addq %r13, %rdi
movdqu (%rcx,%rdi,8), %xmm1
movdqu 16(%rcx,%rdi,8), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, (%rcx,%rdi,8)
movdqu %xmm2, 16(%rcx,%rdi,8)
.LBB7_16:
cmpq %r12, %rsi
je .LBB7_7
addq %r14, %r13
.p2align 4, 0x90
.LBB7_5:
leaq (%rcx,%r13,8), %rdx
movq %r11, %rsi
subq %r13, %rsi
.p2align 4, 0x90
.LBB7_6:
incq (%rdx)
addq $8, %rdx
decq %rsi
jne .LBB7_6
.LBB7_7:
incq %rax
xorl %edx, %edx
.p2align 4, 0x90
.LBB7_8:
cmpq %r11, %rdx
jae .LBB7_18
incq (%rcx,%rdx,8)
incq %rdx
cmpq %rax, %rdx
jb .LBB7_8
addq $8, %r9
decq %r10
cmpq %r11, %rax
jb .LBB7_3
.LBB7_19:
addq $32, %rsp
popq %rbx
popq %rdi
popq %rsi
popq %r12
popq %r13
popq %r14
popq %r15
retq
.LBB7_18:
leaq panic_bounds_check_loc7883(%rip), %rcx
movq %r11, %r8
callq _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
ud2
_ZN8example25test717hc14d131564952402E:
pushq %r15
pushq %r14
pushq %r12
pushq %rsi
pushq %rdi
pushq %rbx
subq $40, %rsp
movq %rdx, %r8
testq %r8, %r8
je .LBB8_17
leaq 48(%rcx,%r8,8), %r9
movq $-4, %r10
xorl %r11d, %r11d
movdqa LCPI8_0(%rip), %xmm0
movq %r8, %rdx
.p2align 4, 0x90
.LBB8_3:
movq %rdx, %rax
movq %r10, %rbx
shrq $2, %rbx
leal 1(%rbx), %edi
andl $1, %edi
leaq -1(%rax), %rdx
cmpq %r8, %rdx
jae .LBB8_16
addq $2, -8(%rcx,%rax,8)
cmpq %r8, %rax
je .LBB8_2
cmpq $4, %r11
jb .LBB8_14
movq %r11, %r14
andq $-4, %r14
movq %r11, %r12
andq $-4, %r12
je .LBB8_14
leaq -4(%r11), %rsi
shrq $2, %rsi
leaq 1(%rsi), %r15
andl $1, %r15d
testq %rsi, %rsi
movl $0, %esi
je .LBB8_10
decq %rdi
subq %rbx, %rdi
movq %r9, %rbx
xorl %esi, %esi
.p2align 4, 0x90
.LBB8_9:
movdqu -48(%rbx), %xmm1
movdqu -32(%rbx), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, -48(%rbx)
movdqu %xmm2, -32(%rbx)
movdqu -16(%rbx), %xmm1
movdqu (%rbx), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, -16(%rbx)
movdqu %xmm2, (%rbx)
addq $8, %rsi
addq $64, %rbx
addq $2, %rdi
jne .LBB8_9
.LBB8_10:
testq %r15, %r15
je .LBB8_12
addq %rax, %rsi
movdqu (%rcx,%rsi,8), %xmm1
movdqu 16(%rcx,%rsi,8), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, (%rcx,%rsi,8)
movdqu %xmm2, 16(%rcx,%rsi,8)
.LBB8_12:
cmpq %r12, %r11
je .LBB8_2
addq %r14, %rax
.p2align 4, 0x90
.LBB8_14:
leaq (%rcx,%rax,8), %rsi
movq %r8, %rdi
subq %rax, %rdi
.p2align 4, 0x90
.LBB8_15:
incq (%rsi)
addq $8, %rsi
decq %rdi
jne .LBB8_15
.LBB8_2:
incq %r11
addq $-8, %r9
incq %r10
testq %rdx, %rdx
jne .LBB8_3
.LBB8_17:
addq $40, %rsp
popq %rbx
popq %rdi
popq %rsi
popq %r12
popq %r14
popq %r15
retq
.LBB8_16:
leaq panic_bounds_check_loc7884(%rip), %rcx
callq _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
ud2
_ZN8example25test817h80b36bd263b1d523E:
pushq %r15
pushq %r14
pushq %rsi
pushq %rdi
pushq %rbx
testq %r8, %r8
je .LBB9_14
cmpq %r8, %rdx
jbe .LBB9_14
leaq 1(%r8), %r9
addq $2, (%rcx,%r8,8)
cmpq %rdx, %r9
je .LBB9_14
leaq -1(%rdx), %rax
subq %r8, %rax
cmpq $4, %rax
jb .LBB9_12
movq %rax, %r10
andq $-4, %r10
movq %rax, %r11
andq $-4, %r11
je .LBB9_12
leaq -4(%r11), %r15
shrq $2, %r15
leal 1(%r15), %r14d
andl $1, %r14d
xorl %esi, %esi
testq %r15, %r15
je .LBB9_8
leaq 56(%rcx,%r8,8), %rdi
leaq -1(%r14), %rbx
subq %r15, %rbx
xorl %esi, %esi
movdqa LCPI9_0(%rip), %xmm0
.p2align 4, 0x90
.LBB9_7:
movdqu -48(%rdi,%rsi,8), %xmm1
movdqu -32(%rdi,%rsi,8), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, -48(%rdi,%rsi,8)
movdqu %xmm2, -32(%rdi,%rsi,8)
movdqu -16(%rdi,%rsi,8), %xmm1
movdqu (%rdi,%rsi,8), %xmm2
paddq %xmm0, %xmm1
paddq %xmm0, %xmm2
movdqu %xmm1, -16(%rdi,%rsi,8)
movdqu %xmm2, (%rdi,%rsi,8)
addq $8, %rsi
addq $2, %rbx
jne .LBB9_7
.LBB9_8:
testq %r14, %r14
je .LBB9_10
addq %r9, %rsi
movdqu (%rcx,%rsi,8), %xmm0
movdqu 16(%rcx,%rsi,8), %xmm1
movdqa LCPI9_0(%rip), %xmm2
paddq %xmm2, %xmm0
paddq %xmm2, %xmm1
movdqu %xmm0, (%rcx,%rsi,8)
movdqu %xmm1, 16(%rcx,%rsi,8)
.LBB9_10:
cmpq %r11, %rax
je .LBB9_14
addq %r10, %r9
.LBB9_12:
leaq (%rcx,%r9,8), %rax
subq %r9, %rdx
.p2align 4, 0x90
.LBB9_13:
incq (%rax)
addq $8, %rax
decq %rdx
jne .LBB9_13
.LBB9_14:
popq %rbx
popq %rdi
popq %rsi
popq %r14
popq %r15
retq
_ZN8example25test917hc844fc6cd2df1f23E:
cmpq $3, %rdx
jb .LBB10_2
incq (%rcx)
incq 8(%rcx)
incq 16(%rcx)
.LBB10_2:
retq
This is derived from the third Go program:
#[inline(never)]
fn testb(s: &mut [usize], index: usize) -> usize {
s[.. index].iter().sum::<usize>() +
s[index ..].iter().sum::<usize>()
}
fn main() {
let mut arr = std::env::args()
.skip(1)
.map(|s| s.parse().unwrap())
.collect::<Vec<usize>>();
println!("{:?}", arr);
let idx = arr[0];
println!("{:?}", testb(&mut arr, idx));
}
Its asm:
_ZN8example35testb17heea21a17acd84092E:
subq $40, %rsp
movq %rdx, %r9
cmpq %r8, %r9
jb .LBB6_29
xorl %eax, %eax
testq %r8, %r8
je .LBB6_15
leaq (,%r8,8), %r10
xorl %eax, %eax
movq %rcx, %r11
.p2align 4, 0x90
.LBB6_3:
addq (%r11), %rax
jb .LBB6_4
addq $8, %r11
addq $-8, %r10
jne .LBB6_3
.LBB6_15:
xorl %r10d, %r10d
cmpq %r8, %r9
je .LBB6_28
leaq (%rcx,%r8,8), %rcx
shlq $3, %r9
shlq $3, %r8
subq %r8, %r9
xorl %r10d, %r10d
.p2align 4, 0x90
.LBB6_17:
addq (%rcx), %r10
jb .LBB6_18
addq $8, %rcx
addq $-8, %r9
jne .LBB6_17
.LBB6_28:
addq %rax, %r10
movq %r10, %rax
addq $40, %rsp
retq
.LBB6_4:
.Ltmp53:
leaq str7712(%rip), %rcx
movl $15, %edx
callq _ZN4core6option13expect_failed17h10bc1f1a367b75ceE
.Ltmp54:
ud2
.LBB6_18:
.Ltmp65:
leaq str7712(%rip), %rcx
movl $15, %edx
callq _ZN4core6option13expect_failed17h10bc1f1a367b75ceE
.Ltmp66:
ud2
.LBB6_29:
movq %r8, %rcx
movq %r9, %rdx
callq _ZN4core5slice20slice_index_len_fail17h88e9fffcb3bb41a6E
ud2
.LBB6_22:
.Ltmp67:
.Ltmp68:
movq %rax, %rcx
callq rust_eh_unwind_resume
.Ltmp69:
ud2
.LBB6_20:
.Ltmp70:
.Ltmp71:
movq %rax, %rcx
callq rust_eh_unwind_resume
.Ltmp72:
ud2
.LBB6_24:
.Ltmp73:
.Ltmp74:
movq %rax, %rcx
callq rust_eh_unwind_resume
.Ltmp75:
ud2
.LBB6_26:
.Ltmp76:
jmp .LBB6_13
.LBB6_8:
.Ltmp55:
.Ltmp56:
movq %rax, %rcx
callq rust_eh_unwind_resume
.Ltmp57:
ud2
.LBB6_6:
.Ltmp58:
.Ltmp59:
movq %rax, %rcx
callq rust_eh_unwind_resume
.Ltmp60:
ud2
.LBB6_10:
.Ltmp61:
.Ltmp62:
movq %rax, %rcx
callq rust_eh_unwind_resume
.Ltmp63:
ud2
.LBB6_12:
.Ltmp64:
.LBB6_13:
movq %rax, %rcx
callq rust_eh_unwind_resume
ud2