Target_feature detection

I have a question concerning target feature detection, for the following code there are two ways to detect if certain feature is presented: one using is_x86_feature_detected!("..."), one using cfg!(target_feature = "..."). The problem is that in case of cfg!, the code for the corresponding feature will never be executed even though these featurs are presented on my platform (see rustc --print at the end), only when is_x86_feature_detected is used, the code segment will be executed. To make the cfg! one work, I need to provide rustc flags explicitly when running cargo build: RUSTFLAGS='-C target_feature=+fma,+avx512vl' cargo build

Shouldn't cfg! be enough for such use cases? Why it has such kind of unexpected behavior?

pub fn rcp(x: f32) -> f32 {
    if cfg!(target_arch = "x86_64") {
        use std::arch::x86_64::{_mm_cvtss_f32, _mm_mul_ss, _mm_rcp_ss, _mm_set_ss, _mm_sub_ss};
        unsafe {
            let a = _mm_set_ss(x);

            let r = if is_x86_feature_detected!("avx512vl") {
                println!("AVX512VL");
                use std::arch::x86_64::_mm_rcp14_ss;
                _mm_rcp14_ss(_mm_set_ss(0.0), a)
            } else {
                println!("AVX");
                _mm_rcp_ss(a)
            };

            if cfg!(target_feature = "fma") {
                //is_x86_feature_detected!("fma") {
                println!("FMA");
                use std::arch::x86_64::_mm_fnmadd_ss;
                _mm_cvtss_f32(_mm_mul_ss(r, _mm_fnmadd_ss(r, a, _mm_set_ss(2.0))))
            } else {
                _mm_cvtss_f32(_mm_mul_ss(r, _mm_sub_ss(_mm_set_ss(2.0), _mm_mul_ss(r, a))))
            }
        }
    } else {
        1.0 / x
    }
}
rustc --print cfg -C target-cpu=native | grep feature
target_feature="adx"
target_feature="aes"
target_feature="avx"
target_feature="avx2"
target_feature="avx512bitalg"
target_feature="avx512bw"
target_feature="avx512cd"
target_feature="avx512dq"
target_feature="avx512f"
target_feature="avx512gfni"
target_feature="avx512ifma"
target_feature="avx512vaes"
target_feature="avx512vbmi"
target_feature="avx512vbmi2"
target_feature="avx512vl"
target_feature="avx512vnni"
target_feature="avx512vpclmulqdq"
target_feature="avx512vpopcntdq"
target_feature="bmi1"
target_feature="bmi2"
target_feature="cmpxchg16b"
target_feature="ermsb"
target_feature="f16c"
target_feature="fma"
target_feature="fxsr"
target_feature="gfni"
target_feature="lzcnt"
target_feature="movbe"
target_feature="pclmulqdq"
target_feature="popcnt"
target_feature="rdrand"
target_feature="rdseed"
target_feature="sha"
target_feature="sse"
target_feature="sse2"
target_feature="sse3"
target_feature="sse4.1"
target_feature="sse4.2"
target_feature="ssse3"
target_feature="vaes"
target_feature="vpclmulqdq"
target_feature="xsave"
target_feature="xsavec"
target_feature="xsaveopt"
target_feature="xsaves"

That's correct. Presumably you could do -C target-cpu=native too.

Otherwise, the defaults for your target are used. The defaults are usually pretty paltry. For example, it doesn't look like FMA gets added until you use the x86-64-v3 level: x86-64 - Wikipedia

The default is just x86-64. Which has just the barest levels of SIMD support with SSE2.

2 Likes

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.