Basically I want a thread local storage which may be accessed very frequently.
I estimate the performance of 2 ways of doing this:
- use C’s __thread to declare a thread local storage, use C functions to access them from Rust. For this, I pay for the cost of functions calls.
- use Rust’s thread_local!() macro. Rust’s thread local key implementation is pretty costly. This destroys the performance.
This is the result that I get. Using thread local in C took 0 msec. Accessing C’s thread local from Rust took 18 msec. And using Rust’s thread_local!() took 33 msec. I am hoping to get a performance similar to C’s thread local (0 msec) in Rust.
C: 12499997500000
C: 0 msec
Rust_C: 12499997500000
Rust_C: 18 msec
RUST: 12499997500000
RUST: 33 msec
I am wondering if there is a way to achieve this. One possible way that I can think of is to use Rust’s inline assembly to access C’s thread local variable (through FS register on x64), but it is a hack that I would like to avoid if possible.
The following is my testing code:
(src/main.rs)
extern crate time;
use std::cell::RefCell;
const ITERATION : i64 = 5000000;
fn main() {
unsafe {c_thread_local();}
c_version();
rust_version();
}
#[link(name = "thread_local", kind = "static")]
extern "C" {
fn create_thread_local();
fn thread_local() -> *mut ThreadLocal;
fn c_thread_local();
}
#[repr(C)]
struct ThreadLocal {
i : i64
}
fn c_version() {
unsafe {
create_thread_local();
let start = time::now_utc();
for i in 1..ITERATION {
(*thread_local()).i += i;
}
let end = time::now_utc();
println!("Rust_C: {}", (*thread_local()).i);
println!("Rust_C: {} msec", (end - start).num_milliseconds());
}
}
thread_local!(static RUST_THREAD_LOCAL : RefCell<ThreadLocal> = RefCell::new(ThreadLocal{i: 0}));
fn rust_version() {
let start = time::now_utc();
for i in 1..ITERATION {
RUST_THREAD_LOCAL.with(|x| {
x.borrow_mut().i += i;
});
}
let end = time::now_utc();
println!("RUST: {}", RUST_THREAD_LOCAL.with(|x| {x.borrow().i}));
println!("RUST: {} msec", (end - start).num_milliseconds());
}
(src/c/thread_local.c)
#include <sys/time.h>
#include <inttypes.h>
typedef struct ThreadLocal {
int64_t i;
} ThreadLocal;
__thread ThreadLocal* for_rust;
void create_thread_local() {
for_rust = (ThreadLocal*) malloc(sizeof(ThreadLocal));
}
ThreadLocal* thread_local() {
return for_rust;
}
__thread ThreadLocal* for_c;
void c_thread_local() {
for_c = (ThreadLocal*) malloc(sizeof(ThreadLocal));
struct timeval start, end;
gettimeofday(&start, NULL);
int64_t i = 1;
for (; i < 5000000; i++)
for_c->i = for_c->i + i;
gettimeofday(&end, NULL);
int duration = (int) ((double) (end.tv_usec - start.tv_usec) / 1000 + (double) (end.tv_sec - start.tv_sec) * 1000);
printf("C: %" PRIi64 "\n", for_c->i);
printf("C: %d msec\n", duration);
}
(Cargo.toml)
[package]
name = "hello_world"
version = "0.0.1"
authors = [ "Your name <you@example.com>" ]
build = "build.rs"
[build-dependencies.gcc]
git = "https://github.com/alexcrichton/gcc-rs"
[[bin]]
name = "hello_world"
[dependencies]
time = "0.1.34"
(build.rs)
extern crate gcc;
fn main() {
gcc::Config::new()
.file("src/c/thread_local.c")
.include("src")
.compile("libthread_local.a");
}