That's because Rust doesn't define how it works. Given:
#[allow(dead_code)]
#[inline(never)]
pub fn count_ones(v: u64) -> u32 {
v.count_ones()
}
and rustc --crate-type lib count_ones.rs -O
as the base compiler invocation, you can get the generated LLVM IR by adding --emit llvm-ir
, which looks like:
; ModuleID = 'count_ones.0.rs'
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i686-pc-windows-gnu"
; Function Attrs: noinline norecurse nounwind readnone uwtable
define i32 @_ZN10count_ones20hc73667123f942da6eaaE(i64) unnamed_addr #0 {
entry-block:
%1 = tail call i64 @llvm.ctpop.i64(i64 %0) #2
%2 = trunc i64 %1 to i32
ret i32 %2
}
; Function Attrs: nounwind readnone
declare i64 @llvm.ctpop.i64(i64) #1
attributes #0 = { noinline norecurse nounwind readnone uwtable }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
So it boils down to an LLVM intrinsic. If you use --emit asm
, you can get the assembly which looks like:
.text
.def @feat.00;
.scl 3;
.type 0;
.endef
.globl @feat.00
@feat.00 = 1
.def __ZN10count_ones20hc73667123f942da6eaaE;
.scl 2;
.type 32;
.endef
.globl __ZN10count_ones20hc73667123f942da6eaaE
.align 16, 0x90
__ZN10count_ones20hc73667123f942da6eaaE:
.cfi_startproc
movl 4(%esp), %eax
movl 8(%esp), %ecx
movl %ecx, %edx
shrl %edx
andl $1431655765, %edx
subl %edx, %ecx
movl %ecx, %edx
andl $858993459, %edx
shrl $2, %ecx
andl $858993459, %ecx
addl %edx, %ecx
movl %ecx, %edx
shrl $4, %edx
addl %ecx, %edx
andl $252645135, %edx
imull $16843009, %edx, %ecx
shrl $24, %ecx
movl %eax, %edx
shrl %edx
andl $1431655765, %edx
subl %edx, %eax
movl %eax, %edx
andl $858993459, %edx
shrl $2, %eax
andl $858993459, %eax
addl %edx, %eax
movl %eax, %edx
shrl $4, %edx
addl %eax, %edx
andl $252645135, %edx
imull $16843009, %edx, %eax
shrl $24, %eax
addl %ecx, %eax
retl
.cfi_endproc
However, if I override the target CPU and specify something more recent using --emit asm -C target-cpu=haswell
:
.text
.def @feat.00;
.scl 3;
.type 0;
.endef
.globl @feat.00
@feat.00 = 1
.def __ZN10count_ones20hc73667123f942da6eaaE;
.scl 2;
.type 32;
.endef
.globl __ZN10count_ones20hc73667123f942da6eaaE
.align 16, 0x90
__ZN10count_ones20hc73667123f942da6eaaE:
.cfi_startproc
popcntl 8(%esp), %ecx
popcntl 4(%esp), %eax
addl %ecx, %eax
retl
.cfi_endproc
And there's popcntl
.
So, the answer to your question is: uh, maybe; it depends.
Edit: oh, a more direct way to get it to use popcntl
: add -C target-feature=+popcnt
. I would have checked this before, but I got bored waiting for LLVM to install so I could list CPU features (llc -mcpu=help
).