Hi folks, I recently ran a benchmark with Rust and Zig derived from ffi_ovehead .The benchmark compares the overhead of the C FFI on various programming languages.
In my benchmark, I made modifications to the C code to ensure compatibility with Windows. Here's the modified C code:
#include "plus.h"
#include <windows.h>
int plus(int x, int y) { return x + y; }
int plusone(int x) { return x + 1; }
long long current_timestamp() {
FILETIME ft;
ULARGE_INTEGER uli;
GetSystemTimeAsFileTime(&ft);
uli.LowPart = ft.dwLowDateTime;
uli.HighPart = ft.dwHighDateTime;
long long milliseconds = uli.QuadPart / 10000;
return milliseconds;
}
I ran the benchmark using Rust(1.79.0-nightly) with the following code:
extern "C" {
fn plusone(x: std::ffi::c_int) -> std::ffi::c_int;
fn current_timestamp() -> std::ffi::c_long;
}
use std::env;
fn run(count: std::ffi::c_int) {
unsafe {
// start immediately
let start = current_timestamp();
let mut x = 0;
while x < count {
x = plusone(x);
}
let end = current_timestamp();
let elapsed = end - start;
println!("{}", elapsed);
}
}
fn main() {
let args: Vec<String> = env::args().collect();
if args.len() == 1 {
println!("First arg (0 - 2000000000) is required.");
return;
}
let count = args[1].parse::<i32>().unwrap();
if count <= 0 || count > 2000000000 {
println!("Must be a positive number not exceeding 2 billion.");
return;
}
run(count as std::ffi::c_int);
}
I also used Zig(0.12.0-dev.1684+ea4a07701) to run the benchmark. Here's the Zig code:
const std = @import("std");
const os = std.os;
const io = std.io;
const allocator = std.heap.c_allocator;
const c = @cImport({
@cDefine("_NO_CRT_STDIO_INLINE", "1");
@cInclude("stdio.h");
@cInclude("plus/plus.h");
});
pub fn main() anyerror!void {
const stdout_file = io.getStdOut();
const stdout = stdout_file;
const args = try std.process.argsAlloc(allocator);
if (args.len == 1) {
try stdout.writeAll("First arg (0 - 2000000000) is required.\n");
return;
}
const count = try std.fmt.parseInt(i32, args[1], 10);
if (count <= 0 or count > 2000000000) {
try stdout.writeAll("Must be a positive number not exceeding 2 billion.\n");
return;
}
run(count);
}
fn run(count: i32) void {
const start = c.current_timestamp();
var x: i32 = 0;
while (x < count) {
x |= c.plusone(x);
}
const elapsed = c.current_timestamp() - start;
_ = c.printf("%d %lld\n", x, elapsed);
}
I built rust with option:
[profile.release]
codegen-units = 1
debug-assertions = false
incremental = false
lto = true
opt-level = 3
overflow-checks = false
panic = "abort"
rpath = false
strip = true
Zig with
-OReleaseFast
When I ran the benchmark with a small input value (e.g. 100,000), both Rust and Zig completed the task in 0 milliseconds.
However, when I used a larger input value (e.g. 2,000,000,000), Rust took approximately 2400 milliseconds, while Zig consistently completed it in 0 milliseconds.
This leads me to wonder whether Rust can achieve the same optimization as Zig to improve its performance in similar FFI scenarios.