I am using nightly rustc and LLVM 19.1 using clang-cl.
Versions
> rustc +nightly --version --verbose
rustc 1.83.0-nightly (9e394f551 2024-09-25)
binary: rustc
commit-hash: 9e394f551c050ff03c6fc57f190e0761cf0be6e8
commit-date: 2024-09-25
host: x86_64-pc-windows-msvc
release: 1.83.0-nightly
LLVM version: 19.1.0
> clang --version
clang version 19.1.0
Target: x86_64-pc-windows-msvc
Thread model: posix
InstalledDir: C:\Program Files\LLVM\bin
Code
a.rs
#[no_mangle]
extern "C" fn add_2_numbers(arg: i32)->i32{
arg + arg
}
b.c
#include <stdint.h>
int32_t add_2_numbers(int32_t arg);
[[clang::noinline]]
int32_t process_2_nums(int32_t x, int32_t y) {
return add_2_numbers(x) * add_2_numbers(y);
}
main.c
#include <stdint.h>
#include <stdio.h>
int32_t process_2_nums(int32_t x, int32_t y);
int32_t main() {
int32_t x = 0;
int32_t y = 0;
scanf("%d", &x);
scanf("%d", &y);
const int32_t value = process_2_nums(x, y);
printf("Result is %d", value);
return 0;
}
I also tried a.c for checking if lto works if I only use c:
#include <stdint.h>
int32_t add_2_numbers(int32_t arg) { return arg + arg; }
I compile and link code like this:
rustc +nightly -Copt-level=3 -Ccodegen-units=1 -Clto=fat -Clinker-plugin-lto --crate-type=staticlib --emit=obj a.rs
clang-cl /clang:-std=c17 /clang:-O3 -c /D_CRT_SECURE_NO_WARNINGS=1 /clang:-flto=thin /clang:-fuse-ld=lld-link b.c
clang-cl /clang:-std=c17 /clang:-O3 -c /D_CRT_SECURE_NO_WARNINGS=1 /clang:-flto=thin /clang:-fuse-ld=lld-link main.c
lld-link /out:c_wrap_rust.exe a.o b.obj main.obj -verbose /threads:1
For some reason, function from Rust isn't inlined to function in C.
Disassembled code in Windows
sub_140001010 proc near
push rsi
push rdi
sub rsp, 28h
mov esi, edx
call sub_140001000
mov edi, eax
mov ecx, esi
call sub_140001000
imul eax, edi
add rsp, 28h
pop rdi
pop rsi
retn
sub_140001010 endp
; I would expect this function to be inlined.
sub_140001000 proc near
lea eax, [rcx+rcx]
retn
sub_140001000 endp
I also tried to use -Clto=thin
, compile rust code to static library and link with it, link code using clang-cl
, all got to the same situation.
When I replace a.rs
by a.c
and link 3 object files from C, it works.
sub_140001000 proc near
imul ecx, edx
lea eax, ds:0[rcx*4]
retn
sub_140001000 endp
I also tried to do the same using Linux machine, everything works perfectly there:
Commands
clang-19 -std=c17 -O3 -c -flto=thin -o b.o b.c
clang-19 -std=c17 -O3 -c -flto=thin -o main.o main.c
rustc +nightly -Copt-level=3 -Ccodegen-units=1 -Clto=fat -Clinker-plugin-lto --crate-type=staticlib --emit=obj a.rs
clang-19 -flto -fuse-ld=lld a.o b.o main.o -o c_wrap_rust -v
llvm-objdump-19 --demangle --disassemble-symbols=process_2_nums c_wrap_rust
Result:
c_wrap_rust: file format elf64-x86-64
Disassembly of section .text:
0000000000046820 <process_2_nums>:
46820: 0f af fe imull %esi, %edi
46823: 8d 04 bd 00 00 00 00 leal (,%rdi,4), %eax
4682a: c3 retq
4682b: cc int3
4682c: cc int3
4682d: cc int3
4682e: cc int3
4682f: cc int3
I looked into output of lld-link
with parameter /mllvm:-print-after-all
, and it seems to me that it starts processing object files from C only after finishing emitting machine code for Rust (and it does optimize both C and Rust code during linking, but separately).
I also tried to pass /LTCG
flag (MS link.exe
lto flag) to the linker, didn't work.
How can I make cross-language link time optimization work on pc-windows-msvc?