Update interpretation of the assembly : There is no bound check, but a check to determine the end of the loop, which is logically impossible to remove because it tells the CPU when to stop, using an iterator would still require this check. There is a capacity check in vector.push (I do not know can this be removed using Vec::with_capacity, but that would make the code comparison no longer apple to apple), and a division by zero check
CMIIW :>
The code
use std::time::Instant;
const SEGMENT_SIZE: usize = 1_000_000;
const MAX_N: usize = 50_000_000;
#[inline(never)]
fn simple_sieve(limit: usize) -> Vec<usize> {
let mut is_prime = vec![1u8; limit + 1];
//if limit >= 0 { is_prime[0] = 0; }
if limit >= 1 { is_prime[1] = 0; }
let sqrt = (limit as f64).sqrt() as usize;
for i in 2..=sqrt {
if is_prime[i] == 1 {
let mut j = i * i;
while j <= limit {
is_prime[j] = 0;
j += i;
}
}
}
let mut primes = Vec::new();
for i in 2..=limit {
if is_prime[i] == 1 {
primes.push(i);
}
}
primes
}
#[inline(never)]
fn segmented_sieve(start: usize, end: usize, base_primes: &[usize]) -> Vec<u8> {
let size = end - start + 1;
let mut segment = vec![1u8; size];
for &p in base_primes {
let p2 = p * p;
if p2 > end {
break;
}
let first_multiple = if p2 >= start {
p2
} else {
let rem = start % p;
if rem == 0 { start } else { start + (p - rem) }
};
let mut j = first_multiple;
let offset = start;
let end_limit = end;
while j <= end_limit {
unsafe {
*segment.get_unchecked_mut(j - offset) = 0;
}
j += p;
}
}
segment
}
#[inline(never)]
fn find_twin_primes(target: usize) -> f64 {
let start_time = Instant::now();
let base_limit = (MAX_N as f64).sqrt() as usize;
let base_primes = simple_sieve(base_limit);
let mut count = 0usize;
let mut prev_prime = 3usize;
let mut segment_start = 3usize;
while segment_start <= MAX_N {
let segment_end = (segment_start + SEGMENT_SIZE - 1).min(MAX_N);
let segment = segmented_sieve(segment_start, segment_end, &base_primes);
for i in 0..segment.len() {
if segment[i] == 1 {
let p = segment_start + i;
if p - prev_prime == 2 {
count += 1;
if count == target {
return start_time.elapsed().as_secs_f64();
}
}
prev_prime = p;
}
}
segment_start = segment_end + 1;
}
let time = start_time.elapsed().as_secs_f64();
// println!("count : {}", count);
time
}
fn main() {
let elapsed = find_twin_primes(500_000);
//println!("{}", elapsed);
std::hint::black_box(elapsed);
}
**playground::simple_sieve:**
push rbp
push r15
push r14
push r13
push r12
push rbx
sub rsp, 24
mov r14, rdi
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
mov edi, 7072
mov esi, 1
call qword ptr [rip + __rustc::__rust_alloc@GOTPCREL]
test rax, rax
je .LBB0_19
mov rbx, rax
mov edx, 7072
mov rdi, rax
mov esi, 1
call qword ptr [rip + memset@GOTPCREL]
mov byte ptr [rbx + 1], 0
mov eax, 2
.LBB0_2:
lea rcx, [rax + 1]
cmp rax, 84
cmove rcx, rax
cmp byte ptr [rbx + rax], 1
jne .LBB0_5
mov rdx, rax
imul rdx, rax
.LBB0_4:
mov byte ptr [rbx + rdx], 0
add rdx, rax
cmp rdx, 7072
jb .LBB0_4
.LBB0_5:
cmp rax, 84
je .LBB0_6
mov rax, rcx
cmp rcx, 84
jbe .LBB0_2
.LBB0_6:
mov qword ptr [rsp], 0
mov qword ptr [rsp + 8], 8
mov qword ptr [rsp + 16], 0
mov r13d, 2
mov eax, 8
xor r12d, r12d
mov r15, rsp
.LBB0_7:
lea rbp, [r13 + 1]
cmp r13, 7071
cmove rbp, r13
cmp byte ptr [rbx + r13], 1
jne .LBB0_12
cmp r12, qword ptr [rsp]
jne .LBB0_11
mov rdi, r15
call alloc::raw_vec::RawVec<T,A>::grow_one
mov rax, qword ptr [rsp + 8]
.LBB0_11:
mov qword ptr [rax + 8*r12], r13
inc r12
mov qword ptr [rsp + 16], r12
.LBB0_12:
cmp r13, 7071
je .LBB0_14
mov r13, rbp
cmp rbp, 7071
jbe .LBB0_7
.LBB0_14:
mov rax, qword ptr [rsp + 16]
mov qword ptr [r14 + 16], rax
movups xmm0, xmmword ptr [rsp]
movups xmmword ptr [r14], xmm0
mov esi, 7072
mov edx, 1
mov rdi, rbx
add rsp, 24
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
jmp qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
.LBB0_19:
mov edi, 1
mov esi, 7072
call qword ptr [rip + alloc::raw_vec::handle_error@GOTPCREL]
mov r14, rax
mov rsi, qword ptr [rsp]
test rsi, rsi
je .LBB0_17
mov rdi, qword ptr [rsp + 8]
shl rsi, 3
mov edx, 8
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
.LBB0_17:
mov esi, 7072
mov edx, 1
mov rdi, rbx
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
mov rdi, r14
call _Unwind_Resume@PLT
**playground::segmented_sieve:**
push rbp
push r15
push r14
push r13
push r12
push rbx
push rax
mov rbx, rdx
sub rbx, rsi
inc rbx
jns .LBB1_3
xor r15d, r15d
.LBB1_2:
mov rdi, r15
mov rsi, rbx
call qword ptr [rip + alloc::raw_vec::handle_error@GOTPCREL]
.LBB1_3:
mov r14, r8
mov r12, rcx
mov r13, rdx
mov rbp, rsi
mov qword ptr [rsp], rdi
je .LBB1_4
call qword ptr [rip + __rustc::__rust_no_alloc_shim_is_unstable_v2@GOTPCREL]
mov r15d, 1
mov esi, 1
mov rdi, rbx
call qword ptr [rip + __rustc::__rust_alloc@GOTPCREL]
test rax, rax
je .LBB1_2
mov r15, rax
jmp .LBB1_7
.LBB1_4:
mov r15d, 1
.LBB1_7:
mov rdi, r15
mov esi, 1
mov rdx, rbx
call qword ptr [rip + memset@GOTPCREL]
test r14, r14
je .LBB1_24
lea rcx, [r12 + 8*r14]
mov rsi, r15
sub rsi, rbp
.LBB1_10:
mov rdi, qword ptr [r12]
mov rax, rdi
imul rax, rdi
cmp rax, r13
ja .LBB1_24
cmp rax, rbp
jae .LBB1_17
test rdi, rdi
je .LBB1_22
mov rdx, rbp
cmp rbp, rdi
jb .LBB1_15
mov eax, ebp
xor edx, edx
div edi
.LBB1_15:
mov rax, rbp
test rdx, rdx
je .LBB1_17
lea rax, [rdi + rbp]
sub rax, rdx
jmp .LBB1_17
.LBB1_18:
mov byte ptr [rsi + rax], 0
add rax, rdi
.LBB1_17:
cmp rax, r13
jbe .LBB1_18
add r12, 8
cmp r12, rcx
jne .LBB1_10
.LBB1_24:
mov rax, qword ptr [rsp]
mov qword ptr [rax], rbx
mov qword ptr [rax + 8], r15
mov qword ptr [rax + 16], rbx
add rsp, 8
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
.LBB1_22:
lea rdi, [rip + .Lanon.1d992ed0de9679575ee15502e1faffa0.1]
call qword ptr [rip + core::panicking::panic_const::panic_const_rem_by_zero@GOTPCREL]
ud2
mov r14, rax
test rbx, rbx
je .LBB1_21
mov edx, 1
mov rdi, r15
mov rsi, rbx
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
.LBB1_21:
mov rdi, r14
call _Unwind_Resume@PLT
.LCPI2_0:
.long 1127219200
.long 1160773632
.long 0
.long 0
.LCPI2_1:
.quad 0x4330000000000000
.quad 0x4530000000000000
.LCPI2_2:
.quad 0x41cdcd6500000000
**playground::find_twin_primes:**
push rbp
push r15
push r14
push r13
push r12
push rbx
sub rsp, 88
call qword ptr [rip + std::time::Instant::now@GOTPCREL]
mov qword ptr [rsp + 48], rax
mov dword ptr [rsp + 56], edx
lea rdi, [rsp + 64]
call playground::simple_sieve
mov rax, qword ptr [rsp + 72]
mov qword ptr [rsp + 8], rax
mov r14, qword ptr [rsp + 80]
mov r15d, 3
xor ebp, ebp
mov r12d, 3
jmp .LBB2_1
.LBB2_14:
add rbx, 1000000
cmp r12, 49000001
mov r12, rbx
jae .LBB2_15
.LBB2_1:
cmp r12, 49000001
mov ebx, 49000001
cmovb rbx, r12
lea rdx, [rbx + 999999]
lea rdi, [rsp + 16]
mov rsi, r12
mov rcx, qword ptr [rsp + 8]
mov r8, r14
call playground::segmented_sieve
mov rax, qword ptr [rsp + 32]
test rax, rax
je .LBB2_12
mov r13, qword ptr [rsp + 24]
mov rdx, r15
xor ecx, ecx
jmp .LBB2_4
.LBB2_5:
mov r15, rdx
.LBB2_23:
inc rcx
mov rdx, r15
cmp rax, rcx
je .LBB2_12
.LBB2_4:
cmp byte ptr [r13 + rcx], 1
jne .LBB2_5
lea r15, [r12 + rcx]
mov rsi, r15
sub rsi, rdx
cmp rsi, 2
jne .LBB2_23
inc rbp
cmp rbp, 500000
jne .LBB2_23
jmp .LBB2_25
.LBB2_12:
mov rsi, qword ptr [rsp + 16]
test rsi, rsi
je .LBB2_14
mov rdi, qword ptr [rsp + 24]
mov edx, 1
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
jmp .LBB2_14
.LBB2_25:
lea rdi, [rsp + 48]
call qword ptr [rip + std::time::Instant::elapsed@GOTPCREL]
mov r14, rax
mov ebp, edx
mov rsi, qword ptr [rsp + 16]
test rsi, rsi
je .LBB2_28
mov edx, 1
mov rdi, r13
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
.LBB2_28:
movq xmm2, r14
punpckldq xmm2, xmmword ptr [rip + .LCPI2_0]
subpd xmm2, xmmword ptr [rip + .LCPI2_1]
movapd xmm1, xmm2
unpckhpd xmm1, xmm2
cvtsi2sd xmm0, ebp
jmp .LBB2_17
.LBB2_15:
lea rdi, [rsp + 48]
call qword ptr [rip + std::time::Instant::elapsed@GOTPCREL]
movq xmm2, rax
punpckldq xmm2, xmmword ptr [rip + .LCPI2_0]
subpd xmm2, xmmword ptr [rip + .LCPI2_1]
movapd xmm1, xmm2
unpckhpd xmm1, xmm2
cvtsi2sd xmm0, edx
.LBB2_17:
divsd xmm0, qword ptr [rip + .LCPI2_2]
addsd xmm1, xmm2
addsd xmm0, xmm1
mov rsi, qword ptr [rsp + 64]
test rsi, rsi
je .LBB2_19
movsd qword ptr [rsp + 40], xmm0
shl rsi, 3
mov edx, 8
mov rdi, qword ptr [rsp + 8]
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
movsd xmm0, qword ptr [rsp + 40]
.LBB2_19:
add rsp, 88
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
jmp .LBB2_7
mov r14, rax
mov rsi, qword ptr [rsp + 16]
test rsi, rsi
je .LBB2_8
mov edx, 1
mov rdi, r13
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
jmp .LBB2_8
.LBB2_7:
mov r14, rax
.LBB2_8:
mov rsi, qword ptr [rsp + 64]
test rsi, rsi
je .LBB2_10
shl rsi, 3
mov edx, 8
mov rdi, qword ptr [rsp + 8]
call qword ptr [rip + __rustc::__rust_dealloc@GOTPCREL]
.LBB2_10:
mov rdi, r14
call _Unwind_Resume@PLT
**playground::main:**
push rax
call playground::find_twin_primes
movsd qword ptr [rsp], xmm0
mov rax, rsp
#APP
#NO_APP
pop rax
ret