Although setting deubg = true
for relase profiles, the optimizations seem to remove debug symbols (and other information?) needed in perf to have nice results. Setting the optimization level to opt-level = 1
preserves that information. opt-level >= 2
removes it. Is this intended? Could inlining also elide some information/correlation with the source file?
Steps to reproduce
cargo new profile_test
cd profile_test
echo -e "[profile.release]\ndebug = true" >> Cargo.toml
Copy this into src/main.rs
and optional increase numbers in case it is too fast:
// ./src/main.rs
fn f1(n: i64) -> i64 {
let mut res = 0;
for i in 0..n {
res += i // + (res as f64).atan2(i as f64).floor()
}
res
}
fn f2(n: i64) -> i64 {
(0..n).into_iter().fold(0, |acc, x| acc + x // + (acc as f64).atan2(x as f64).sqrt().sin().floor() as i64)
}
fn main() {
let sum1 = f1(10000000); // add zeros if too fast
let sum2 = f2(10000000); // or do something more expensive
println!("1: {}, 2: {}", sum1, sum2);
}
Then run
cargo build
cargo build --release
perf record -o perf_debug.data -g ./target/debug/profile_test
perf record -o perf_relase.data -g ./target/release/profile_test
perf report -i perf_debug.data
perf report -i perf_release.data
It might be needed to run perf
as root or set echo 0 | sudo tee /proc/sys/kernel/perf_event_paranoid
.
Perf output
Output of perf for debug build
Samples: 7K of event 'cycles:uppp', Event count (approx.): 3596385188 Children Self Command Shared Object Symbol + 99.99% 0.00% profile_test [unknown] [.] 0x50f6258d4c544155 + 99.99% 0.00% profile_test libc-2.26.so [.] __libc_start_main + 99.99% 0.00% profile_test profile_test [.] main + 99.99% 0.00% profile_test profile_test [.] std::rt::lang_start + 99.99% 0.00% profile_test profile_test [.] std::rt::lang_start_internal + 99.98% 0.00% profile_test profile_test [.] __rust_maybe_catch_panic + 99.98% 0.00% profile_test profile_test [.] std::panicking::try::do_call + 99.98% 0.00% profile_test profile_test [.] std::rt::lang_start::_$u7b$$u7b$closure$u7d$$u7d$::he6ffe0e7a293ee + 99.98% 0.00% profile_test profile_test [.] profile_test::main + 71.40% 12.84% profile_test profile_test [.] core::iter::range::<impl core::iter::iterator::Iterator for core:: + 57.31% 0.00% profile_test profile_test [.] profile_test::f2 + 57.31% 0.00% profile_test profile_test [.] core::iter::iterator::Iterator::fold + 56.47% 9.46% profile_test profile_test [.] core::iter::iterator::Iterator::try_fold + 42.40% 6.76% profile_test profile_test [.] profile_test::f1 + 38.31% 2.49% profile_test profile_test [.] core::mem::swap + 36.28% 7.88% profile_test profile_test [.] core::ptr::swap_nonoverlapping + 19.45% 12.19% profile_test profile_test [.] core::ptr::swap_nonoverlapping_bytes + 17.59% 9.80% profile_test profile_test [.] <i64 as core::iter::range::Step>::add_usize + 8.46% 6.13% profile_test profile_test [.] core::iter::iterator::Iterator::fold::_$u7b$$u7b$closure$u7d$$u7d$ + 6.25% 6.25% profile_test profile_test [.] core::num::<impl u64>::wrapping_add + 5.63% 5.63% profile_test libc-2.26.so [.] __memmove_avx_unaligned_erms + 5.25% 5.25% profile_test profile_test [.] core::ptr::<impl *mut T>::offset + 3.14% 3.14% profile_test profile_test [.] core::cmp::impls::<impl core::cmp::PartialOrd for i64>::lt + 2.46% 2.46% profile_test profile_test [.] profile_test::f2::_$u7b$$u7b$closure$u7d$$u7d$::h48dd982df31c6245 + 2.03% 2.03% profile_test profile_test [.] <core::iter::AlwaysOk<T> as core::ops::try::Try>::into_result + 1.71% 1.71% profile_test profile_test [.] memcpy@plt + 1.64% 1.64% profile_test profile_test [.] core::mem::size_of + 1.57% 1.57% profile_test profile_test [.] core::iter::range::ptr_try_from_impls::<impl core::iter::range::Pr + 1.40% 1.40% profile_test profile_test [.] core::mem::size_of + 1.35% 1.35% profile_test profile_test [.] core::mem::uninitialized 0.01% 0.01% profile_test [unknown] [.] 0xffffffff9c800a00 0.00% 0.00% profile_test [unknown] [.] 0x0000000000000040 0.00% 0.00% profile_test ld-2.26.so [.] _dl_sysdep_start 0.00% 0.00% profile_test ld-2.26.so [.] dl_main 0.00% 0.00% profile_test ld-2.26.so [.] _dl_relocate_object 0.00% 0.00% profile_test ld-2.26.so [.] do_lookup_x 0.00% 0.00% profile_test ld-2.26.so [.] _dl_lookup_symbol_x 0.00% 0.00% profile_test ld-2.26.so [.] strcmp 0.00% 0.00% profile_test libpthread-2.26.so [.] pthread_attr_init@@GLIBC_2.2.5
Output of perf for release build
Samples: 31 of event 'cycles:uppp', Event count (approx.): 734030 Children Self Command Shared Object Symbol + 25.52% 0.00% profile_test [unknown] [.] 0x0000000000000040 + 25.52% 0.00% profile_test ld-2.26.so [.] _dl_sysdep_start + 25.52% 3.36% profile_test ld-2.26.so [.] dl_main + 22.17% 11.54% profile_test ld-2.26.so [.] _dl_relocate_object + 14.67% 14.67% profile_test [unknown] [.] 0xffffffff9c800a00 + 13.63% 0.00% profile_test [unknown] [.] 0000000000000000 + 10.62% 10.62% profile_test ld-2.26.so [.] _dl_lookup_symbol_x + 10.50% 10.50% profile_test ld-2.26.so [.] do_lookup_x + 8.14% 0.00% profile_test ld-2.26.so [.] _dl_map_object + 7.98% 7.98% profile_test libc-2.26.so [.] intel_check_word.isra.0 + 7.62% 7.62% profile_test libc-2.26.so [.] __get_nprocs + 7.62% 0.00% profile_test libc-2.26.so [.] __sysconf + 7.16% 7.16% profile_test ld-2.26.so [.] check_match + 7.16% 0.00% profile_test [unknown] [.] 0x42494c4700352e32 + 7.16% 0.00% profile_test [unknown] [.] 0x0000000009691a75 + 6.71% 6.71% profile_test libc-2.26.so [.] _IO_vfscanf + 6.71% 0.00% profile_test [unknown] [.] 0x5f64616572687470 + 6.71% 0.00% profile_test libc-2.26.so [.] _IO_vsscanf + 6.21% 0.00% profile_test [unknown] [.] 0x0000000100003dec + 6.01% 6.01% profile_test ld-2.26.so [.] _dl_fini + 6.01% 0.00% profile_test libc-2.26.so [.] 0x00000000003b3000 + 6.01% 0.00% profile_test libc-2.26.so [.] __run_exit_handlers + 5.67% 5.67% profile_test ld-2.26.so [.] strcmp + 5.67% 0.00% profile_test [unknown] [.] 0x4c47004554415649 + 5.67% 0.00% profile_test [unknown] [.] 0x000000000963cf85 + 3.66% 3.66% profile_test ld-2.26.so [.] match_symbol + 3.66% 0.00% profile_test ld-2.26.so [.] _dl_check_all_versions + 3.66% 0.00% profile_test ld-2.26.so [.] _dl_check_map_versions + 3.28% 3.28% profile_test ld-2.26.so [.] __GI___tunables_init + 1.91% 0.00% profile_test [unknown] [.] 0x313d44495f4e4f49 + 1.91% 0.00% profile_test [unknown] [.] 0x0000711030046bbb + 1.36% 0.00% profile_test [unknown] [.] 0x44454352554f535f + 1.36% 0.00% profile_test [unknown] [.] 0x0000711030045f90 + 0.87% 0.87% profile_test ld-2.26.so [.] _dl_start + 0.87% 0.00% profile_test ld-2.26.so [.] _dl_start_user 0.35% 0.35% profile_test ld-2.26.so [.] _start