Bounds Check Elimination in Go 1.7

Beside Escape Analysis to remove some heap allocations, it's good to have some Bounds Check Elimination in the compiler, this is about recent changes in the Go compiler:

Discussion thread:
https://news.ycombinator.com/item?id=12336430

A paper linked in the ycombinator thread:
http://ps2pdf.com/tgetfile/E-235.pdf?key=1471982981&name=E-235.pdf

The first Go example in Rust:

#[inline(never)]
fn test1(s: &mut [usize]) {
    s[0] += 1;
    s[1] += 1;
    s[2] += 1;
}

#[inline(never)]
fn test2(s: &mut [usize]) {
    s[2] += 1;
    s[1] += 1;
    s[0] += 1;
}

#[inline(never)]
fn test3(s: &mut [usize], index: usize) {
    s[index] += 1;
    s[index] += 1;
}

#[inline(never)]
fn test4(a: &mut [usize; 5]) {
    a[4] += 1;
}

fn main() {
    let mut arr = std::env::args()
                  .skip(1)
                  .map(|s| s.parse().unwrap())
                  .collect::<Vec<usize>>();
    println!("{:?}", arr);

    let idx = arr[0];

    test1(&mut arr);
    println!("{:?}", arr);

    test2(&mut arr);
    println!("{:?}", arr);

    test3(&mut arr, idx);
    println!("{:?}", arr);

    let mut arr5 = [0; 5];
    arr5.copy_from_slice(&arr[0 .. 5]);
    test4(&mut arr5);
    println!("{:?}", arr);
}

Its asm shows the same number of bound tests as the Go code (compiled with -C opt-level=3 --emit asm):

_ZN8example15test117hd1a6d72066f829d8E:
    subq    $40, %rsp
    movq    %rdx, %rax
    testq    %rax, %rax
    je    .LBB6_4
    incq    (%rcx)
    cmpq    $1, %rax
    je    .LBB6_5
    incq    8(%rcx)
    cmpq    $3, %rax
    jb    .LBB6_6
    incq    16(%rcx)
    addq    $40, %rsp
    retq
.LBB6_4:
    leaq    panic_bounds_check_loc7695(%rip), %rcx
    xorl    %edx, %edx
    xorl    %r8d, %r8d
    callq    _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
    ud2
.LBB6_5:
    leaq    panic_bounds_check_loc7696(%rip), %rcx
    movl    $1, %edx
    movl    $1, %r8d
    callq    _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
    ud2
.LBB6_6:
    leaq    panic_bounds_check_loc7697(%rip), %rcx
    movl    $2, %edx
    movq    %rax, %r8
    callq    _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
    ud2


_ZN8example15test217h3510aa60084d7ad2E:
    subq    $40, %rsp
    movq    %rdx, %rax
    cmpq    $3, %rax
    jb    .LBB7_2
    incq    16(%rcx)
    incq    8(%rcx)
    incq    (%rcx)
    addq    $40, %rsp
    retq
.LBB7_2:
    leaq    panic_bounds_check_loc7698(%rip), %rcx
    movl    $2, %edx
    movq    %rax, %r8
    callq    _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
    ud2

_ZN8example15test317h2b35347fd3633068E:
    subq    $40, %rsp
    movq    %rdx, %rax
    cmpq    %rax, %r8
    jae    .LBB8_2
    addq    $2, (%rcx,%r8,8)
    addq    $40, %rsp
    retq
.LBB8_2:
    leaq    panic_bounds_check_loc7701(%rip), %rcx
    movq    %r8, %rdx
    movq    %rax, %r8
    callq    _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
    ud2

_ZN8example15test417h5faa34a7c08a00cbE:
    incq    32(%rcx)
    retq

This could be similar to the second Go program (but I am not sure this code is equivalent to the Go code regarding the array accesses):

#[inline(never)]
fn test5(s: &mut [usize]) {
    for i in 0 .. s.len() {
        s[i] += 1;
        for j in i .. s.len() { s[j] += 1; }
        for j in 0 .. i + 1 { s[j] += 1; }
    }
}

#[inline(never)]
fn test6(s: &mut [usize]) {
    let mut i = 0;
    while i < s.len() {
        s[i] += 1;
        for j in i .. s.len() { s[j] += 1; }
        for j in 0 .. i + 1 { s[j] += 1; }
        i += 1;
    }
}

#[inline(never)]
fn test7(s: &mut [usize]) {
    for i in (0 .. s.len()).rev() {
        s[i] += 1;
        for j in i .. s.len() { s[j] += 1; }
    }
}

#[inline(never)]
fn test8(s: &mut [usize], index: usize) {
    if index > 0 && index < s.len() {
        s[index] += 1;
        for j in index .. s.len() { s[j] += 1; }
    }
}

#[inline(never)]
fn test9(s: &mut [usize]) {
    if s.len() > 2 {
        s[0] += 1;
        s[1] += 1;
        s[2] += 1;
    }
}

fn main() {
    let mut arr = std::env::args()
                  .skip(1)
                  .map(|s| s.parse().unwrap())
                  .collect::<Vec<usize>>();
    println!("{:?}", arr);

    let idx = arr[0];

    test5(&mut arr);
    println!("{:?}", arr);

    test6(&mut arr);
    println!("{:?}", arr);

    test7(&mut arr);
    println!("{:?}", arr);

    test8(&mut arr, idx);
    println!("{:?}", arr);

    test9(&mut arr);
    println!("{:?}", arr);
}

Its asm shows more bound tests compared to the Go compiler:

_ZN8example25test517h1ec1f8ae0c4d1368E:
    pushq    %r15
    pushq    %r14
    pushq    %r12
    pushq    %rsi
    pushq    %rdi
    pushq    %rbx
    subq    $40, %rsp
    movq    %rdx, %r11
    testq    %r11, %r11
    je    .LBB6_19
    leaq    -1(%r11), %r8
    leaq    56(%rcx), %r9
    leaq    -5(%r11), %r10
    xorl    %ebx, %ebx
    movdqa    LCPI6_0(%rip), %xmm0
    .p2align    4, 0x90
.LBB6_3:
    movq    %r10, %rsi
    shrq    $2, %rsi
    leal    1(%rsi), %edi
    andl    $1, %edi
    leaq    1(%rbx), %rax
    addq    $2, (%rcx,%rbx,8)
    cmpq    %r11, %rax
    je    .LBB6_15
    movq    %r8, %rdx
    subq    %rbx, %rdx
    cmpq    $4, %rdx
    movq    %rax, %rbx
    jb    .LBB6_13
    movq    %rdx, %r12
    andq    $-4, %r12
    movq    %rdx, %r15
    andq    $-4, %r15
    movq    %rax, %rbx
    je    .LBB6_13
    leaq    -4(%rdx), %rbx
    shrq    $2, %rbx
    leaq    1(%rbx), %r14
    andl    $1, %r14d
    testq    %rbx, %rbx
    movl    $0, %ebx
    je    .LBB6_9
    decq    %rdi
    subq    %rsi, %rdi
    movq    %r9, %rsi
    xorl    %ebx, %ebx
    .p2align    4, 0x90
.LBB6_8:
    movdqu    -48(%rsi), %xmm1
    movdqu    -32(%rsi), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, -48(%rsi)
    movdqu    %xmm2, -32(%rsi)
    movdqu    -16(%rsi), %xmm1
    movdqu    (%rsi), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, -16(%rsi)
    movdqu    %xmm2, (%rsi)
    addq    $8, %rbx
    addq    $64, %rsi
    addq    $2, %rdi
    jne    .LBB6_8
.LBB6_9:
    testq    %r14, %r14
    je    .LBB6_11
    addq    %rax, %rbx
    movdqu    (%rcx,%rbx,8), %xmm1
    movdqu    16(%rcx,%rbx,8), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, (%rcx,%rbx,8)
    movdqu    %xmm2, 16(%rcx,%rbx,8)
.LBB6_11:
    cmpq    %r15, %rdx
    je    .LBB6_15
    addq    %rax, %r12
    movq    %r12, %rbx
    .p2align    4, 0x90
.LBB6_13:
    leaq    (%rcx,%rbx,8), %rdx
    movq    %r11, %rsi
    subq    %rbx, %rsi
    .p2align    4, 0x90
.LBB6_14:
    incq    (%rdx)
    addq    $8, %rdx
    decq    %rsi
    jne    .LBB6_14
.LBB6_15:
    xorl    %edx, %edx
    .p2align    4, 0x90
.LBB6_16:
    cmpq    %r11, %rdx
    jae    .LBB6_18
    incq    (%rcx,%rdx,8)
    incq    %rdx
    cmpq    %rax, %rdx
    jb    .LBB6_16
    addq    $8, %r9
    decq    %r10
    cmpq    %r11, %rax
    movq    %rax, %rbx
    jb    .LBB6_3
.LBB6_19:
    addq    $40, %rsp
    popq    %rbx
    popq    %rdi
    popq    %rsi
    popq    %r12
    popq    %r14
    popq    %r15
    retq
.LBB6_18:
    leaq    panic_bounds_check_loc7880(%rip), %rcx
    movq    %r11, %r8
    callq    _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
    ud2


_ZN8example25test617h6ec492878b1c4a03E:
    pushq    %r15
    pushq    %r14
    pushq    %r13
    pushq    %r12
    pushq    %rsi
    pushq    %rdi
    pushq    %rbx
    subq    $32, %rsp
    movq    %rdx, %r11
    testq    %r11, %r11
    je    .LBB7_19
    leaq    -1(%r11), %r8
    leaq    56(%rcx), %r9
    leaq    -5(%r11), %r10
    xorl    %eax, %eax
    movdqa    LCPI7_0(%rip), %xmm0
    .p2align    4, 0x90
.LBB7_3:
    movq    %r10, %rdx
    shrq    $2, %rdx
    leal    1(%rdx), %ebx
    andl    $1, %ebx
    leaq    1(%rax), %r13
    addq    $2, (%rcx,%rax,8)
    cmpq    %r11, %r13
    je    .LBB7_7
    movq    %r8, %rsi
    subq    %rax, %rsi
    cmpq    $3, %rsi
    jbe    .LBB7_5
    movq    %rsi, %r14
    andq    $-4, %r14
    movq    %rsi, %r12
    andq    $-4, %r12
    je    .LBB7_5
    leaq    -4(%rsi), %rdi
    shrq    $2, %rdi
    leaq    1(%rdi), %r15
    andl    $1, %r15d
    testq    %rdi, %rdi
    movl    $0, %edi
    je    .LBB7_14
    decq    %rbx
    subq    %rdx, %rbx
    movq    %r9, %rdx
    xorl    %edi, %edi
    .p2align    4, 0x90
.LBB7_13:
    movdqu    -48(%rdx), %xmm1
    movdqu    -32(%rdx), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, -48(%rdx)
    movdqu    %xmm2, -32(%rdx)
    movdqu    -16(%rdx), %xmm1
    movdqu    (%rdx), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, -16(%rdx)
    movdqu    %xmm2, (%rdx)
    addq    $8, %rdi
    addq    $64, %rdx
    addq    $2, %rbx
    jne    .LBB7_13
.LBB7_14:
    testq    %r15, %r15
    je    .LBB7_16
    addq    %r13, %rdi
    movdqu    (%rcx,%rdi,8), %xmm1
    movdqu    16(%rcx,%rdi,8), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, (%rcx,%rdi,8)
    movdqu    %xmm2, 16(%rcx,%rdi,8)
.LBB7_16:
    cmpq    %r12, %rsi
    je    .LBB7_7
    addq    %r14, %r13
    .p2align    4, 0x90
.LBB7_5:
    leaq    (%rcx,%r13,8), %rdx
    movq    %r11, %rsi
    subq    %r13, %rsi
    .p2align    4, 0x90
.LBB7_6:
    incq    (%rdx)
    addq    $8, %rdx
    decq    %rsi
    jne    .LBB7_6
.LBB7_7:
    incq    %rax
    xorl    %edx, %edx
    .p2align    4, 0x90
.LBB7_8:
    cmpq    %r11, %rdx
    jae    .LBB7_18
    incq    (%rcx,%rdx,8)
    incq    %rdx
    cmpq    %rax, %rdx
    jb    .LBB7_8
    addq    $8, %r9
    decq    %r10
    cmpq    %r11, %rax
    jb    .LBB7_3
.LBB7_19:
    addq    $32, %rsp
    popq    %rbx
    popq    %rdi
    popq    %rsi
    popq    %r12
    popq    %r13
    popq    %r14
    popq    %r15
    retq
.LBB7_18:
    leaq    panic_bounds_check_loc7883(%rip), %rcx
    movq    %r11, %r8
    callq    _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
    ud2


_ZN8example25test717hc14d131564952402E:
    pushq    %r15
    pushq    %r14
    pushq    %r12
    pushq    %rsi
    pushq    %rdi
    pushq    %rbx
    subq    $40, %rsp
    movq    %rdx, %r8
    testq    %r8, %r8
    je    .LBB8_17
    leaq    48(%rcx,%r8,8), %r9
    movq    $-4, %r10
    xorl    %r11d, %r11d
    movdqa    LCPI8_0(%rip), %xmm0
    movq    %r8, %rdx
    .p2align    4, 0x90
.LBB8_3:
    movq    %rdx, %rax
    movq    %r10, %rbx
    shrq    $2, %rbx
    leal    1(%rbx), %edi
    andl    $1, %edi
    leaq    -1(%rax), %rdx
    cmpq    %r8, %rdx
    jae    .LBB8_16
    addq    $2, -8(%rcx,%rax,8)
    cmpq    %r8, %rax
    je    .LBB8_2
    cmpq    $4, %r11
    jb    .LBB8_14
    movq    %r11, %r14
    andq    $-4, %r14
    movq    %r11, %r12
    andq    $-4, %r12
    je    .LBB8_14
    leaq    -4(%r11), %rsi
    shrq    $2, %rsi
    leaq    1(%rsi), %r15
    andl    $1, %r15d
    testq    %rsi, %rsi
    movl    $0, %esi
    je    .LBB8_10
    decq    %rdi
    subq    %rbx, %rdi
    movq    %r9, %rbx
    xorl    %esi, %esi
    .p2align    4, 0x90
.LBB8_9:
    movdqu    -48(%rbx), %xmm1
    movdqu    -32(%rbx), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, -48(%rbx)
    movdqu    %xmm2, -32(%rbx)
    movdqu    -16(%rbx), %xmm1
    movdqu    (%rbx), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, -16(%rbx)
    movdqu    %xmm2, (%rbx)
    addq    $8, %rsi
    addq    $64, %rbx
    addq    $2, %rdi
    jne    .LBB8_9
.LBB8_10:
    testq    %r15, %r15
    je    .LBB8_12
    addq    %rax, %rsi
    movdqu    (%rcx,%rsi,8), %xmm1
    movdqu    16(%rcx,%rsi,8), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, (%rcx,%rsi,8)
    movdqu    %xmm2, 16(%rcx,%rsi,8)
.LBB8_12:
    cmpq    %r12, %r11
    je    .LBB8_2
    addq    %r14, %rax
    .p2align    4, 0x90
.LBB8_14:
    leaq    (%rcx,%rax,8), %rsi
    movq    %r8, %rdi
    subq    %rax, %rdi
    .p2align    4, 0x90
.LBB8_15:
    incq    (%rsi)
    addq    $8, %rsi
    decq    %rdi
    jne    .LBB8_15
.LBB8_2:
    incq    %r11
    addq    $-8, %r9
    incq    %r10
    testq    %rdx, %rdx
    jne    .LBB8_3
.LBB8_17:
    addq    $40, %rsp
    popq    %rbx
    popq    %rdi
    popq    %rsi
    popq    %r12
    popq    %r14
    popq    %r15
    retq
.LBB8_16:
    leaq    panic_bounds_check_loc7884(%rip), %rcx
    callq    _ZN4core9panicking18panic_bounds_check17hb2d969c3cc11ed08E
    ud2


_ZN8example25test817h80b36bd263b1d523E:
    pushq    %r15
    pushq    %r14
    pushq    %rsi
    pushq    %rdi
    pushq    %rbx
    testq    %r8, %r8
    je    .LBB9_14
    cmpq    %r8, %rdx
    jbe    .LBB9_14
    leaq    1(%r8), %r9
    addq    $2, (%rcx,%r8,8)
    cmpq    %rdx, %r9
    je    .LBB9_14
    leaq    -1(%rdx), %rax
    subq    %r8, %rax
    cmpq    $4, %rax
    jb    .LBB9_12
    movq    %rax, %r10
    andq    $-4, %r10
    movq    %rax, %r11
    andq    $-4, %r11
    je    .LBB9_12
    leaq    -4(%r11), %r15
    shrq    $2, %r15
    leal    1(%r15), %r14d
    andl    $1, %r14d
    xorl    %esi, %esi
    testq    %r15, %r15
    je    .LBB9_8
    leaq    56(%rcx,%r8,8), %rdi
    leaq    -1(%r14), %rbx
    subq    %r15, %rbx
    xorl    %esi, %esi
    movdqa    LCPI9_0(%rip), %xmm0
    .p2align    4, 0x90
.LBB9_7:
    movdqu    -48(%rdi,%rsi,8), %xmm1
    movdqu    -32(%rdi,%rsi,8), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, -48(%rdi,%rsi,8)
    movdqu    %xmm2, -32(%rdi,%rsi,8)
    movdqu    -16(%rdi,%rsi,8), %xmm1
    movdqu    (%rdi,%rsi,8), %xmm2
    paddq    %xmm0, %xmm1
    paddq    %xmm0, %xmm2
    movdqu    %xmm1, -16(%rdi,%rsi,8)
    movdqu    %xmm2, (%rdi,%rsi,8)
    addq    $8, %rsi
    addq    $2, %rbx
    jne    .LBB9_7
.LBB9_8:
    testq    %r14, %r14
    je    .LBB9_10
    addq    %r9, %rsi
    movdqu    (%rcx,%rsi,8), %xmm0
    movdqu    16(%rcx,%rsi,8), %xmm1
    movdqa    LCPI9_0(%rip), %xmm2
    paddq    %xmm2, %xmm0
    paddq    %xmm2, %xmm1
    movdqu    %xmm0, (%rcx,%rsi,8)
    movdqu    %xmm1, 16(%rcx,%rsi,8)
.LBB9_10:
    cmpq    %r11, %rax
    je    .LBB9_14
    addq    %r10, %r9
.LBB9_12:
    leaq    (%rcx,%r9,8), %rax
    subq    %r9, %rdx
    .p2align    4, 0x90
.LBB9_13:
    incq    (%rax)
    addq    $8, %rax
    decq    %rdx
    jne    .LBB9_13
.LBB9_14:
    popq    %rbx
    popq    %rdi
    popq    %rsi
    popq    %r14
    popq    %r15
    retq


_ZN8example25test917hc844fc6cd2df1f23E:
    cmpq    $3, %rdx
    jb    .LBB10_2
    incq    (%rcx)
    incq    8(%rcx)
    incq    16(%rcx)
.LBB10_2:
    retq

This is derived from the third Go program:

#[inline(never)]
fn testb(s: &mut [usize], index: usize) -> usize {
    s[.. index].iter().sum::<usize>() +
    s[index ..].iter().sum::<usize>()
}

fn main() {
    let mut arr = std::env::args()
                  .skip(1)
                  .map(|s| s.parse().unwrap())
                  .collect::<Vec<usize>>();
    println!("{:?}", arr);

    let idx = arr[0];

    println!("{:?}", testb(&mut arr, idx));
}

Its asm:

_ZN8example35testb17heea21a17acd84092E:
    subq    $40, %rsp
    movq    %rdx, %r9
    cmpq    %r8, %r9
    jb    .LBB6_29
    xorl    %eax, %eax
    testq    %r8, %r8
    je    .LBB6_15
    leaq    (,%r8,8), %r10
    xorl    %eax, %eax
    movq    %rcx, %r11
    .p2align    4, 0x90
.LBB6_3:
    addq    (%r11), %rax
    jb    .LBB6_4
    addq    $8, %r11
    addq    $-8, %r10
    jne    .LBB6_3
.LBB6_15:
    xorl    %r10d, %r10d
    cmpq    %r8, %r9
    je    .LBB6_28
    leaq    (%rcx,%r8,8), %rcx
    shlq    $3, %r9
    shlq    $3, %r8
    subq    %r8, %r9
    xorl    %r10d, %r10d
    .p2align    4, 0x90
.LBB6_17:
    addq    (%rcx), %r10
    jb    .LBB6_18
    addq    $8, %rcx
    addq    $-8, %r9
    jne    .LBB6_17
.LBB6_28:
    addq    %rax, %r10
    movq    %r10, %rax
    addq    $40, %rsp
    retq
.LBB6_4:
.Ltmp53:
    leaq    str7712(%rip), %rcx
    movl    $15, %edx
    callq    _ZN4core6option13expect_failed17h10bc1f1a367b75ceE
.Ltmp54:
    ud2
.LBB6_18:
.Ltmp65:
    leaq    str7712(%rip), %rcx
    movl    $15, %edx
    callq    _ZN4core6option13expect_failed17h10bc1f1a367b75ceE
.Ltmp66:
    ud2
.LBB6_29:
    movq    %r8, %rcx
    movq    %r9, %rdx
    callq    _ZN4core5slice20slice_index_len_fail17h88e9fffcb3bb41a6E
    ud2
.LBB6_22:
.Ltmp67:
.Ltmp68:
    movq    %rax, %rcx
    callq    rust_eh_unwind_resume
.Ltmp69:
    ud2
.LBB6_20:
.Ltmp70:
.Ltmp71:
    movq    %rax, %rcx
    callq    rust_eh_unwind_resume
.Ltmp72:
    ud2
.LBB6_24:
.Ltmp73:
.Ltmp74:
    movq    %rax, %rcx
    callq    rust_eh_unwind_resume
.Ltmp75:
    ud2
.LBB6_26:
.Ltmp76:
    jmp    .LBB6_13
.LBB6_8:
.Ltmp55:
.Ltmp56:
    movq    %rax, %rcx
    callq    rust_eh_unwind_resume
.Ltmp57:
    ud2
.LBB6_6:
.Ltmp58:
.Ltmp59:
    movq    %rax, %rcx
    callq    rust_eh_unwind_resume
.Ltmp60:
    ud2
.LBB6_10:
.Ltmp61:
.Ltmp62:
    movq    %rax, %rcx
    callq    rust_eh_unwind_resume
.Ltmp63:
    ud2
.LBB6_12:
.Ltmp64:
.LBB6_13:
    movq    %rax, %rcx
    callq    rust_eh_unwind_resume
    ud2
6 Likes

Any case where the rust compiler could eliminate a bounds check using static analysis that isn't extraordinarily advanced, but doesn't, is a bug. I would just jump straight to filing this in the issue tracker.

I think it would be especially helpful if the comparison were between a recent clang (C/C++) and rustc, because any such bounds checks that aren't eliminated in Rust and are eliminated in clang make it clear that LLVM isn't preventing the optimization. It would help narrow down the possible cause.

2 Likes

After several months, there are some things in the Rust design that I still don't understand. Do you know why Rust designers like to tie Rustc optimizations so much to LLVM? What if someone someday wants to build a Rust compiler with GCC-backend and still hope the same performance of the code? And, aren't some optimizations, like Bounds Check Elimination better performed in a front-end or middle-end, instead of leaving everything to LLVM back-end?

I guess LLVM also implements most of the middle-end, right?

In some cases, the front-end just needs to annotate the IR better so that the LLVM middle-/back- end has enough information to do the optimizations. In other cases, the front end has to do the optimizing itself. It is likely that improvements to bounds checking optimization will use both approaches.

It seems they have introduced in Go 1.7 a compilation flag:

-gcflags="-d=ssa/check_bce/debug=1

It outputs where the compiler wasn't able to remove a bound check. I think I'd like the same feature in Rustc.

2 Likes

In another post I've suggested to introduce in Rust two new slice methods:

verified_get
verified_get_mut

They compile if the compiler is able to remove that specific bound test, otherwise they give a compilation error. So they help make your code both safe and fast, according to Rust philosophy. If you leave the Bounds Check Elimination logic out of the front-end and you compile your program with a different back-end you will have different optimizations, and code that uses those functions will break.

6 Likes

Maybe it should work the other way: Places where one expects a bounds checked could use Indexed::index() directly, and places where we expect the bounds check to be eliminated should use [], and any use of [] that triggers a bounds check could result in a warning. Presumably, this would be a lint that each context (crate, module, item) could control.

However, I think that a "break-the-build-if-bounds-check-isn't-eliminated` would put incredible pressure on rustc developers w.r.t. avoiding breakage. It's probably too expensive to have the "break the build" option for it when taking that into consideration.

2 Likes

MIR opens the door to finally performing meaningful optimizations in rustc itself. It's been suggested for example that some passes could run before monomorphization (and thus save work in later passes).

Until early 2015, the focus of rustc development was on creating an implementation of the Rust language, which was then a moving target. For the past year, the focus has been on refactoring rustc to use AST/HIR/MIR multiple representation architecture, partially to support defining optimization passes in rustc. Its not that people don't want rustc to perform optimizations, its that Rust has been leveraging LLVM to avoid having to reimplement optimizations so far. However, conditions are changing and this will change as well.

8 Likes

@leonardo: You should really start to file bugs. You've found some and the last time you said, you didn't file any bug so far which makes me kinda sad...

Putting the optimizations into the "front end" (read: pre-LLVM) does not really address the problem. Any slight change to the optimizations would be a breaking change, regardless of where they live. I hope I don't need to elaborate how monumentally stupid freezing the optimization pipeline would be. The same (any change being breaking) is technically true for type inference, but at least there is a clear, semi-automatic workaround (add type annotation). Furthermore, since the long term goal is not merely to have multiple backends for one compiler, but to be a standardized language with multiple conforming implementations, the optimizations being used would have the be spec'd in detail to give some semblance of portability across compilers. In sum, this means that depending on optimizations firing for the code to compile is a huge no no.

Of course, if the stakes are "merely" warnings (as @briansmith proposes), it becomes "only" a quality of implementation issue. But with how many bounds checks are not eliminated in practice, I doubt any implementation would want to bite that bullet and produce thousands of warning even on moderately sized code bases. And again, which bounds check are eliminated will vary by version and all the surrounding code (possibly quite far away), so at the end of the day you'd just send programmers on a wild goose chase to switch between the warning and non-warning versions of indexing any time anything at all (the code or the compiler) changes.

An alternative that has been proposed occasionally is to lay out very specific rules that imply certain bounds checks must be eliminated, simple and detailed enough that they can be implementation-independent and reasoned about by the programmer at the source code level. Then, one could reasonably allow a static assertion that "this bounds check is eliminated". But this is a whole lot of infrastructure and spec work, and does not match the reality of what is being compiled (i.e., many bounds checks might get reliably eliminated but not according to the spec'd ruled), so as cool as the idea of programmer-controlled and -checkable optimization is in principle, I am not convinced it's practical in this case.

1 Like

This ... isn't a language design issue? I'm not sure what your point is here. Deferring rustc optimizations to llvm doesn't mean that a gcc-backend compiler can't do the same. There's nothing tying this optimization to llvm as far as language design is concerned; indeed, all language design has to say about optimizations is "an implementation is free to implement this optimization", which doesn't tie to any backend.

You are right that most optimizations are deferred to llvm in rustc -- the reason behind this is that rustc itself was until very recently not designed in a way to permit the easy addition of optimization. This changes with MIR. But alternative compiler implementations are irrelevant to this -- they are free to implement optimizations however they like.

4 Likes

@leonardo I reported this bug. It would be cool, if you would offer a reduced test case!

https://github.com/rust-lang/rust/issues/35981#issuecomment-242442644

3 Likes

The reason we delegate optimizations to LLVM is because LLVM is very good at optimizing code in many cases.

2 Likes

A patch was submitted to llvm and there are two new bug reports.
Please always report bugs to rust or llvm. This is the only way to make a difference.

4 Likes