I have a question concerning target feature detection, for the following code there are two ways to detect if certain feature is presented: one using is_x86_feature_detected!("...")
, one using cfg!(target_feature = "...")
. The problem is that in case of cfg!
, the code for the corresponding feature will never be executed even though these featurs are presented on my platform (see rustc --print at the end), only when is_x86_feature_detected
is used, the code segment will be executed. To make the cfg!
one work, I need to provide rustc flags explicitly when running cargo build: RUSTFLAGS='-C target_feature=+fma,+avx512vl' cargo build
Shouldn't cfg!
be enough for such use cases? Why it has such kind of unexpected behavior?
pub fn rcp(x: f32) -> f32 {
if cfg!(target_arch = "x86_64") {
use std::arch::x86_64::{_mm_cvtss_f32, _mm_mul_ss, _mm_rcp_ss, _mm_set_ss, _mm_sub_ss};
unsafe {
let a = _mm_set_ss(x);
let r = if is_x86_feature_detected!("avx512vl") {
println!("AVX512VL");
use std::arch::x86_64::_mm_rcp14_ss;
_mm_rcp14_ss(_mm_set_ss(0.0), a)
} else {
println!("AVX");
_mm_rcp_ss(a)
};
if cfg!(target_feature = "fma") {
//is_x86_feature_detected!("fma") {
println!("FMA");
use std::arch::x86_64::_mm_fnmadd_ss;
_mm_cvtss_f32(_mm_mul_ss(r, _mm_fnmadd_ss(r, a, _mm_set_ss(2.0))))
} else {
_mm_cvtss_f32(_mm_mul_ss(r, _mm_sub_ss(_mm_set_ss(2.0), _mm_mul_ss(r, a))))
}
}
} else {
1.0 / x
}
}
rustc --print cfg -C target-cpu=native | grep feature
target_feature="adx"
target_feature="aes"
target_feature="avx"
target_feature="avx2"
target_feature="avx512bitalg"
target_feature="avx512bw"
target_feature="avx512cd"
target_feature="avx512dq"
target_feature="avx512f"
target_feature="avx512gfni"
target_feature="avx512ifma"
target_feature="avx512vaes"
target_feature="avx512vbmi"
target_feature="avx512vbmi2"
target_feature="avx512vl"
target_feature="avx512vnni"
target_feature="avx512vpclmulqdq"
target_feature="avx512vpopcntdq"
target_feature="bmi1"
target_feature="bmi2"
target_feature="cmpxchg16b"
target_feature="ermsb"
target_feature="f16c"
target_feature="fma"
target_feature="fxsr"
target_feature="gfni"
target_feature="lzcnt"
target_feature="movbe"
target_feature="pclmulqdq"
target_feature="popcnt"
target_feature="rdrand"
target_feature="rdseed"
target_feature="sha"
target_feature="sse"
target_feature="sse2"
target_feature="sse3"
target_feature="sse4.1"
target_feature="sse4.2"
target_feature="ssse3"
target_feature="vaes"
target_feature="vpclmulqdq"
target_feature="xsave"
target_feature="xsavec"
target_feature="xsaveopt"
target_feature="xsaves"