Why my rust code is much more slower than cpp

Two days before, our teacher let us to finish a linked list using cpp, when i finish it i wanna also know how it works in rust. so i rewrite a simple one for it. when i compare the performance , i found my cpp code using 305127 ms finish task while my rust code using almost 469046 ms.

Maybe is my fault that i do not understanding rust and write some bad code

Cpp code here

struct Node{
  int         v;
  struct Node *next;
};
struct LinkedList{
  struct Node   *head;
  unsigned long len;
public:
  void append(int v);
};

void LinkedList::append(int v){
  if(this->len == 0){
    this->insert(v);
    this->len +=1;
    return;
  } 
  this->len +=1;
  struct Node* cur = this->head;
  while(true){
    if (cur->next == 0){
      break;
    }
    cur = cur->next;
  }
  cur->next = new Node;

}

Rust code here

type LINKNODE = Option<NonNull<Node>>;
struct LinkList {
    head: LINKNODE,
    tail: LINKNODE,
    len: usize,
}

struct Node {
    elm: i32,
    next: LINKNODE,
}

 pub fn append(&mut self,elm:i32){
        let mut s = self.len;
        if self.len == 0 {
            self.push_front_elm(elm);
            self.len += 1;
            return;
        }
        self.len += 1;
        let new = Box::new(Node::from(elm));
        let new = NonNull::from(Box::leak(new));
        let head;
        unsafe{
            head = self.head.as_mut().unwrap().as_mut(); // &NonNull<Node> -> &Node
        }
        let mut cur = head;  //&mut Node
        loop{
            unsafe{
                if s == 1 { //cur.next.is_none(){
                    break;
                }
                cur = cur.next.as_mut().unwrap().as_mut(); // Option<&NonNull> -> 
                s -= 1;
            }
        }
        // Option<NonNull<Node>> 
        cur.next = Some(new);

I test the append for the same time

This is cpp

void fish(int i){
  struct LinkedList* l = new LinkedList;
    
   auto start = std::chrono::high_resolution_clock::now();
    for (int j = 0 ; j< i;j++){
      l->append(i);
    }
    
   auto end = std::chrono::high_resolution_clock::now();
   
   auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
  std::cout << "Function took " << duration.count() << " milliseconds to execute." << std::endl;
}
int main(){
  fish(500000);
  return 0;

This is rust

fn main() {
    let numset = [5_000_00];
    for i in numset{
        finish_f0r_h0m1(i);
    }
}

Here is all my cpp code

#include<iostream>
#include<chrono>

struct Node{
  int         v;
  struct Node *next;
};
struct LinkedList{
  struct Node   *head;
  unsigned long len;
public:
  void append(int v);
  void insert(int v);
};
void LinkedList::append(int v){
  if(this->len == 0){
    this->insert(v);
    this->len +=1;
    return;
  } 
  this->len +=1;
  struct Node* cur = this->head;
  while(true){
    if (cur->next == 0){
      break;
    }
    cur = cur->next;
  }
  cur->next = new Node;

}
void LinkedList::insert(int v){
  struct Node* old = this->head;
  this->head = new Node;
  this->head->v = v;
  this->head->next = old;
  this->len += 1;
}
#include<iostream>
void fish(int i){
   struct LinkedList* l = new LinkedList;
    
    auto start = std::chrono::high_resolution_clock::now();
    for (int j = 0 ; j< i;j++){
      l->append(i);
    }
    
   auto end = std::chrono::high_resolution_clock::now();
   auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
   std::cout << "Function took " << duration.count() << " milliseconds to execute." << std::endl;
}
int main(){
  fish(500000);
  return 0;
}

Here is all my rust code (i am learning rust,so i write much 'dead_code' )

use std::boxed::Box;
use std::ptr::NonNull;
use std::time::Instant;
#[allow(dead_code)]
const NODE_NUMBER: usize = 10_000_0;

#[allow(dead_code)]
fn main() {
    let numset = [5_000_00];
    for i in numset{
        finish_f0r_h0m1(i);
    }
}
fn finish_f0r_h0m1(i : usize){
    let mut l = LinkList::new();
    let now = Instant::now();
    for _ in 0..i {
        l.push_front(3);
    }
    println!("{} costs {} mills insert into head", i, now.elapsed().as_millis());
    l.clear();

    let mut l = LinkList::new();
    let now = Instant::now();
    for _ in 0..i {
        l.append(3);
    }
    println!("{} costs {} mills append into tail", i, now.elapsed().as_millis());
    l.clear();
}
#[test]
fn test1() {
    let mut l = LinkList::new();
    l.push_front(12);
    l.push_front(-12);
    l.push_front(0);
    assert_eq!(l.pop_front(), Some(0));
    assert_eq!(l.pop_front(), Some(-12));
    assert_eq!(l.pop_front(), Some(12));
}
#[test]
fn test2() {
    let mut l = LinkList::new();
    l.push_front(12);
    l.push_front(10);
    assert_eq!(l.as_vec().unwrap(), vec![10, 12]);
}
#[test]
fn test3(){
    let mut l = LinkList::new();
    l.append(12);
    l.append(12);
    l.append(12);
    l.append(0);
    assert_eq!(l.as_vec().unwrap(), vec![12, 12,12,0]);


}
type LINKNODE = Option<NonNull<Node>>;
struct LinkList {
    head: LINKNODE,
    tail: LINKNODE,
    len: usize,
}
#[derive(Debug)]
struct Node {
    elm: i32,
    next: LINKNODE,
}
impl Node {
    fn from(elm: i32) -> Self {
        Node { elm, next: None }
    }
}
//impl Drop for Node{
//    fn drop(&mut self){
//       //println!("I am dropped");
//    }
//}
impl Drop for LinkList {
    fn drop(&mut self) {
        self.clear();
    }
}
#[allow(dead_code)]
impl LinkList {
    pub fn new() -> Self {
        LinkList {
            head: None,
            tail: None,
            len: 0,
        }
    }
    pub fn push_front(&mut self, elm: i32) {
        self.push_front_elm(elm);
        self.len += 1;
    }
    pub fn pop_front(&mut self) -> Option<i32> {
        if self.len <= 0 {
            return None;
        }
        match self.drop_front_elm() {
            None => None,
            Some(v) => {
                self.len -= 1;
                return Some(v);
            }
        }
    }

    //pub fn pop_front(&mut self)->Option<Box<Node>>{
    //    if self.len <=0{
    //        return None;
    //    }
    //    match self.drop_front_elm(){
    //        None=>None,
    //        Some(v)=>{self.len -=1;return Some(v);}
    //    }
    //
    //}
    pub fn append(&mut self,elm:i32){
        let mut s = self.len;
        if self.len == 0 {
            self.push_front_elm(elm);
            self.len += 1;
            return;
        }
        self.len += 1;
        let new = Box::new(Node::from(elm));
        let new = NonNull::from(Box::leak(new));
        let head;
        unsafe{
            head = self.head.as_mut().unwrap().as_mut(); // &NonNull<Node> -> &Node
        }
        let mut cur = head;  //&mut Node
        loop{
            unsafe{
                if s == 1 { //cur.next.is_none(){
                    break;
                }
                cur = cur.next.as_mut().unwrap().as_mut(); // Option<&NonNull> -> 
                s -= 1;
            }
        }
        // Option<NonNull<Node>> 
        cur.next = Some(new);

    }
    pub fn clear(&mut self) {
        if self.len <= 0 {
            return ();
        }
        let mut ptr = self.head.unwrap().as_ptr(); // move NonNull
        loop {
            unsafe {
                self.head = (*ptr).next;
                // std::ptr::drop_in_place(ptr); // it will can old drop fn
                let _ = Box::from_raw(ptr);
                //dealloc(ptr,);
            }
            self.len -= 1;
            if self.head.is_none() {
                break;
            }
            ptr = self.head.unwrap().as_ptr();
        }
    }
    fn drop_front_elm(&mut self) -> Option<i32> {
        let old: NonNull<Node> = self.head.unwrap(); // it cannot move  it copy
        let old_ptr = old.as_ptr(); // *mut Node
        unsafe {
            self.head = (*old_ptr).next;
            let n = (*old_ptr).elm;
            let _ = Box::from_raw(old_ptr);
            return Some(n);
        }
    }

    //    fn drop_front_elm(&mut self)->Option<Box<Node>>{
    //        let old : NonNull<Node> = self.head.unwrap(); // it cannot move  it copy
    //        let old_ptr = old.as_ptr(); // *mut Node
    //        let node: Box<Node>;
    //        unsafe{
    //            node = Box::from_raw(old_ptr); // create a new one
    //            self.head = (*old_ptr).next;
    //            std::ptr::drop_in_place(old_ptr);
    //        }
    //        Some(node)
    //
    //    }
    //
    fn push_front_elm(&mut self, elm: i32) {
        let node: Box<Node> = Box::new(Node::from(elm));

        let node_ptr: NonNull<Node> = NonNull::from(Box::leak(node));
        let ptr = node_ptr.as_ptr();
        unsafe {
            (*ptr).next = self.head;
            (*ptr).elm = elm;
            if self.len == 0 {
                self.tail = Some(node_ptr);
            }
            self.head = Some(node_ptr);
        }
    }

    pub fn as_vec(&self) -> Option<Vec<i32>> {
        if self.len <= 0 {
            return None;
        }
        println!("len -> {}", self.len);
        let mut ptr: *mut Node = self.head.unwrap().as_ptr();
        let mut vec = Vec::with_capacity(self.len);
        loop {
            unsafe {
                vec.push((*ptr).elm);
                if (*ptr).next.is_none() {
                    break;
                }
                ptr = (*ptr).next.unwrap().as_ptr();
            }
        }
        return Some(vec);
    }
}

For picture that i had taken

Rust cost (when runing in release mode)

swappy-20240926_224750

cpp cost(when compile it with just g++ main.cpp )

swappy-20240926_231232

The standard answer of failing to test.

1 Like

Within the loop, you have twice the tests as in the CPP-code

loop{
    unsafe{
        if s == 1 { //cur.next.is_none(){
            break;
        }
        cur = cur.next.as_mut().unwrap().as_mut(); // Option<&NonNull> -> 
        s -= 1;
    }
}

The obvious one is checking your counter. Furthermore, you call .unwrap(), which asserts, that your Next-> Pointer is not null, and therefore makes a second check. Try this instead:

loop{
    unsafe{
        if let Some(x) = cur.next.as_mut() {
            cur = x.as_mut();
        } else {
            break;
        }
    }
}

Single linked lists should be possible without using unsafe code. I'd recommend you to change type LINKNODE = Option<NonNull<Node>>; to Option<Box<Node>>; as a good excercise to make your code more ideomatic. Good luck :slight_smile:

1 Like

I mean, your benchmarks for C++ and Rust don't even try to be equivalent (disregarding any other issues with your benchmarks or code), so it is unclear why you compare their timings.

Also, you should build your C++ code with clang if you're doing language comparison, because clang and rustc use the same optimization backend. This way you exclude any possibility of the same code optimizing in a different way.

4 Likes

There's a couple bugs in the c++ code. You don't initialize some values so there is undefined behavior, you're also leaking memory. However, this doesn't effect the benchmark times because you print the elapsed time before the linked list is dropped on the rust side. You increment the length more then it should be.

I would recommend you run your cpp code under valgrind to fix the bugs. Even correcting for those things the C++ implementation is still faster. You can check by running both implementations against perf:

perf record ./target/release/rust-linked-list
perf stat

A majority of the cpu time (~80%) is spent in the loop on 2-3 instructions chasing pointers in both the C++ and rust implementation.

Anyway, you already know that the option will be Some because of how it's constructed in the linked list, and the number of iterations is the length. You're already using unsafe. So let's use it when we do an unwrap in the tight loop.

Changing the following line of code causes the rust to outperform the c++

                cur = cur.next.as_mut().unwrap().as_mut(); // Option<&NonNull> ->
                cur = cur.next.as_mut().unwrap_unchecked().as_mut(); // Option<&NonNull> ->

Note I'm running with fewer iterations than you.

# original version
❯ cargo run --release                                                                                                                                                
    Finished `release` profile [optimized] target(s) in 0.12s
     Running `target/release/rust`
50000 costs 1 mills insert into head
50000 costs 2418 mills append into tail

# modified to use unwrap_unchecked in hot loop
❯ cargo run --release                                                                                                                                                
    Finished `release` profile [optimized] target(s) in 0.13s
     Running `target/release/rust`
50000 costs 1 mills insert into head
50000 costs 1425 mills append into tail

# c++ version
❯ env CXX=clang++ CPPFLAGS="-O3" make main                                                                                                                           
clang++  -O3   main.cpp   -o main
❯ ./main                                                                                                                                                              
Function took 1941 milliseconds to execute.

Looking at the generated assembly in the perf report it seems by changing it to use unwrap_unchecked the compiler is then able to unroll the hot loop:

Before:

       │       data16 data16 data16 data16 data16 cs nopw 0x0(%rax,%rax,1)                                                                                                   
 87.25 │280:   mov    (%rcx),%rcx                                                                                                                                            
 12.73 │       test   %rcx,%rcx                                                                                                                                              
       │     ↓ je     3f2                                                                                                                                                    
       │       inc    %rdx                                                                                                                                                   
  0.01 │     ↑ jne    280                                                                                                                                                    
       │     ↑ jmp    210                                                                                                                                                    
       │       cs     nopw 0x0(%rax,%rax,1)                                   

After:

       │       dec    %rdx                                                                                                                                                   
       │       data16 data16 data16 data16 cs nopw 0x0(%rax,%rax,1)                                                                                                          
 12.63 │2d0:   mov    (%rcx),%rcx                                                                                                                                            
 11.92 │       mov    (%rcx),%rcx                                                                                                                                            
 12.93 │       mov    (%rcx),%rcx                                                                                                                                            
 12.39 │       mov    (%rcx),%rcx                                                                                                                                            
 11.45 │       mov    (%rcx),%rcx                                                                                                                                            
 13.20 │       mov    (%rcx),%rcx                                                                                                                                            
 13.07 │       mov    (%rcx),%rcx                                                                                                                                            
 12.35 │       mov    (%rcx),%rcx                                                                                                                                            
       │       add    $0xfffffffffffffff8,%rdx                                                                                                                               
       │     ↑ jne    2d0                                                                                                                                                    
       │     ↑ jmp    210                                                
4 Likes

I second this. The last thing you want to do is get in the habit of using unsafe in Rust -- this should only be done after determining there is no practical alternative, and then only with a lot of scrutiny to ensure correctness.

6 Likes

after learning some about Option deepely , it seems that i abuse using unwrap too much ,i will us Some and let :sunny:

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.