rust code
fn calc_n(n:u64){
print!("N={},",n);
let now=std::time::Instant::now();
let mut ans=0u64;
let (mut r1,mut r2,mut r3,mut r4,mut r5,mut r6);
for a1 in 1..=n>>3{
r1=n-a1;
for a2 in a1..=r1/7{
r2=r1-a2;
for a3 in a2..=r2/6{
r3=r2-a3;
for a4 in a3..=r3/5{
r4=r3-a4;
for a5 in a4..=r4>>2{
r5=r4-a5;
for a6 in a5..=r5/3{
r6=r5-a6;
for a7 in a6..=r6>>1{
ans+=a1^a2^a3^a4^a5^a6^a7^(r6-a7);
}
}
}
}
}
}
}
println!("{}, cost={:?}",ans,now.elapsed());
}
fn main(){
calc_n(100);
calc_n(160);
calc_n(300);
calc_n(400);
calc_n(500);
calc_n(600);
}
julia code:
function calcN(n::Int64)
t1 = time()
ans::Int64 = 0
for a1=1:div(n, 8)
r1 = n - a1
for a2=a1:div(r1, 7)
r2 = r1 - a2
for a3=a2:div(r2, 6)
r3 = r2 - a3
for a4=a3:div(r3, 5)
r4 = r3 - a4
for a5=a4:div(r4, 4)
r5 = r4 - a5
for a6=a5:div(r5, 3)
r6 = r5 - a6
for a7=a6:div(r6, 2)
a8=r6 - a7
ans += xor(xor(xor(a1, a2), xor(a3, a4)), xor(xor(a5, a6), xor(a7, a8)))
end
end
end
end
end
end
end
println("n=$n, ans = $ans, cost = $(time() -t1)")
end
Using my laptop with i7-8750H CPU, I found that Julia is 5x faster than Rust.
julia> calcN(100);
n=100, ans = 29892426, cost = 0.0006000995635986328
julia> calcN(160);
n=160, ans = 901994896, cost = 0.009427070617675781
julia> calcN(300);
n=300, ans = 109638857854, cost = 0.4203529357910156
julia> calcN(400);
n=400, ans = 1260273347474, cost = 2.5435290336608887
julia> calcN(500);
n=500, ans = 6722928203618, cost = 10.702456951141357
julia> calcN(600);
n=600, ans = 25125831176186, cost = 34.18349599838257
$ rustc test.rs -O 2>/dev/null && ./test
N=100,29892426, cost=1.616831ms
N=160,901994896, cost=30.246651ms
N=300,109638857854, cost=1.737976853s
N=400,1260273347474, cost=11.636545741s
N=500,6722928203618, cost=51.702947067s
N=600,25125831176186, cost=177.460155807s
I asked the same question in Chinese Rust forum, a reply said that, Code generated by Julia use avx2 code, so that Julia is much faster than Rust.
To my best knowledge, both Julia and Rust are using llvm as their backend.
Why Julia could generate much faster code than Rust?
Further more, let us suppose every instruction in Julia is written in avx2. Since the input of calcN
is a 64-bit integer, avx2 instructions could only boost ~4x.
but actually we have, 34.18349599838257*4<177.460155807
avx2 it not all the reasons for the performance regression.
Is such phenomenon worth an issue?