Help redesigning attempt at rc_arena

TLDR: Advice on redesigning around Vec<Option<My_RcBox<T>>> appreciated.

high level

Warning: this is untested code. I haven't even checked for UB. It barely compiles. There is one design decision I absolutely loathe:

struct ChunkList<T> {
    current: Vec<Option<My_RcBox<T>>>,
    rest: Vec<Vec<Option<My_RcBox<T>>>>,
    free_list: Vec<My_Id>,
}

I'd much rather prefer

struct ChunkList<T> {
    current: Vec<My_RcBox<T>>,
    rest: Vec<Vec<My_RcBox<T>>>,
    free_list: Vec<My_Id>,
}

but I can't due to potential drop handlers.

Anyway, here is the current motivation, followed by the current code.

motivation

I want something like typed_arena, but instead of &mut T, we get Sorta_Rc<T>. The motivating problem here is a Json / XML / Virtual Dom / Sexp tree w/ lots of nodes.

Starting out, we're going to add a freelist to our ChunkList. This tracks nodes that have been freed:

#[derive(Clone, Copy)]
pub struct My_Id {
    vec_id: usize,
    loc_in_vec: usize,
}

struct ChunkList<T> {
    current: Vec<Option<My_RcBox<T>>>,
    rest: Vec<Vec<Option<My_RcBox<T>>>>,
    free_list: Vec<My_Id>,
}

Similarly, we are going to modify RcBox where we (1) drop weak count as we don't support Weak ptrs right now and (2) track the My_Id, to know what to append to the freelist when the RcBox is freed

#[repr(C)]
struct My_RcBox<T> {
    strong: Cell<usize>,
    free_list_idx: My_Id, // what to append to free list
    value: T,
}

The other design decision is that My_Rc<T> .as_ref() returns Option<&T>. Why? The choice here is this: suppose we drop the arena, but there are My_Rc's to the arena lying around. What do we do?

  1. keep all the memory of the Arena around
  2. free the memory, call the drop handlers , and have all the My_Rc return None.

I'm going with choice 2.

unhappy decision

The one decision I am unhappy with is:

struct ChunkList<T> {
    current: Vec<Option<My_RcBox<T>>>,
    rest: Vec<Vec<Option<My_RcBox<T>>>>,
    free_list: Vec<My_Id>,
}

In typed_arena, they could just store Vec<T> (or in this case Vec<My_RcBox<T>> , but we have the problem that we want to call the drop handler asap, which means some of the My_RcBox<T> would be storing invalid data.

One possible approach is Vec<MaybeUninit<My_RcBox<T>> but then no drop handlers (of the active elements at the time when the arena is dropped) will be called; so we need to track a separate list of items to call drop handlers on (and I have not worked this out in full).

full code

c/rust-typed-arena/tree/master

#[cfg(not(feature = "std"))]
extern crate alloc;

#[cfg(any(feature = "std", test))]
extern crate core;

use alloc::rc::{Rc, Weak};
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;

use core::{
    cell::{Cell, RefCell},
    cmp, mem,
    ptr::NonNull,
};

type T = std::rc::Rc<usize>;

#[repr(C)]
struct My_RcBox<T> {
    strong: Cell<usize>,
    free_list_idx: My_Id, // what to append to free list
    value: T,
}

pub struct My_Rc<T> {
    arena: Weak<Arena<T>>,
    ptr: NonNull<Option<My_RcBox<T>>>,
}

#[derive(Clone, Copy)]
pub struct My_Id {
    vec_id: usize,
    loc_in_vec: usize,
}

struct ChunkList<T> {
    current: Vec<Option<My_RcBox<T>>>,
    rest: Vec<Vec<Option<My_RcBox<T>>>>,
    free_list: Vec<My_Id>,
}

pub struct Arena<T> {
    chunks: RefCell<ChunkList<T>>,
}

impl<T> ChunkList<T> {
    unsafe fn get_mut(&mut self, idx: My_Id) -> *mut Option<My_RcBox<T>> {
        if idx.vec_id == self.rest.len() {
            self.current.as_mut_ptr().offset(idx.loc_in_vec as isize)
        } else {
            self.rest[idx.vec_id].as_mut_ptr().offset(idx.loc_in_vec as isize)
        }
    }

    fn double_size(&mut self) {
        let double_cap = self.current.capacity().checked_mul(2).expect("capacity overflow");
        let chunk = mem::replace(&mut self.current, Vec::with_capacity(double_cap));
        self.rest.push(chunk);
    }

    fn alloc(&mut self, v: T) -> *mut Option<My_RcBox<T>> {
        if let Some(idx) = self.free_list.pop() {
            unsafe {
                let mut t = self.get_mut(idx);
                *t = Some(My_RcBox {
                    strong: Cell::new(0),
                    free_list_idx: idx,
                    value: v,
                });
                return t;
            }
        }

        if self.current.len() == self.current.capacity() {
            self.double_size();
        }

        self.current.push(Some(My_RcBox {
            strong: Cell::new(0),
            free_list_idx: My_Id {
                vec_id: self.rest.len(),
                loc_in_vec: self.current.len(),
            },
            value: v,
        }));

        unsafe {
            return self.current.as_mut_ptr().offset(self.current.len() as isize);
        }
    }
}

impl<T> My_Rc<T> {
    /*
    pub unsafe fn get_mut_unchecked(this: &mut Self) -> &mut T {
        unsafe { &mut (*this.ptr.as_ptr()).value }
    }

     */

    unsafe fn dec_strong(&mut self) -> usize {
        match &self.ptr.as_ref() {
            None => 0,
            Some(x) => {
                let t = &x.strong;
                let n = t.get() - 1;
                t.set(n);
                n
            }
        }
    }

    pub fn as_ref(&self) -> Option<&T> {
        if self.arena.strong_count() == 0 {
            // arena has been dropped
            None
        } else {
            unsafe {
                match self.ptr.as_ref() {
                    None => None,
                    Some(x) => Some(&x.value),
                }
            }
        }
    }
}

impl<T> Drop for My_Rc<T> {
    fn drop(&mut self) {
        let cur_strong_count = unsafe { self.dec_strong() };
        if (cur_strong_count == 0) {
            unsafe {
                *self.ptr.as_ptr() = None;
            }
            if let Some(arena) = self.arena.upgrade() {
                // arena still active
                unsafe {
                    match &self.ptr.as_ref() {
                        None => {}
                        Some(t) => {
                            let free_list_idx = t.free_list_idx;
                            arena.chunks.borrow_mut().free_list.push(free_list_idx);
                        }
                    }
                }
            }
        }
    }
}

const INITIAL_SIZE: usize = 1024;
const MIN_CAPACITY: usize = 1;

impl<T> Arena<T> {
    pub fn new() -> Arena<T> {
        let size = cmp::max(1, mem::size_of::<T>());
        Arena::with_capacity(INITIAL_SIZE / size)
    }

    pub fn with_capacity(n: usize) -> Arena<T> {
        let n = cmp::max(MIN_CAPACITY, n);
        Arena {
            chunks: RefCell::new(ChunkList {
                current: Vec::with_capacity(n),
                rest: Vec::new(),
                free_list: vec![],
            }),
        }
    }

    #[inline]
    pub fn alloc(self: Rc<Self>, v: T) -> My_Rc<T> {
        let t = self.chunks.borrow_mut().alloc(v);
        My_Rc {
            arena: Rc::downgrade(&self),
            ptr: NonNull::new(t).unwrap(),
        }
        // self.alloc_fast_path(value).unwrap_or_else(|value| self.alloc_slow_path(value))
    }
}

Have not tested / thought deeply about UB yet. This re-design is a bit cleaner (by moving the Option to Option<T>). However, this still feels a bit icky. I am open to advice on how to redesign this to be cleaner.

Current code

// base on https://github.com/thomcc/rust-typed-arena/tree/master

#[cfg(not(feature = "std"))]
extern crate alloc;

#[cfg(any(feature = "std", test))]
extern crate core;

use alloc::rc::{Rc, Weak};
#[cfg(not(feature = "std"))]
use alloc::vec::Vec;

use core::{
    cell::{Cell, RefCell},
    cmp, mem,
    ptr::NonNull,
};

type T = std::rc::Rc<usize>;

#[repr(C)]
struct My_RcBox<T> {
    strong: Cell<usize>,
    free_list_idx: My_Id, // what to append to free list
    value: Option<T>,
}

pub struct My_Rc<T> {
    arena: Weak<Arena<T>>,
    ptr: NonNull<My_RcBox<T>>,
}

#[derive(Clone, Copy)]
pub struct My_Id {
    vec_id: usize,
    loc_in_vec: usize,
}

struct ChunkList<T> {
    current: Vec<My_RcBox<T>>,
    rest: Vec<Vec<My_RcBox<T>>>,
    free_list: Vec<My_Id>,
}

pub struct Arena<T> {
    chunks: RefCell<ChunkList<T>>,
}

impl<T> ChunkList<T> {
    unsafe fn get_mut(&mut self, idx: My_Id) -> *mut My_RcBox<T> {
        if idx.vec_id == self.rest.len() {
            self.current.as_mut_ptr().offset(idx.loc_in_vec as isize)
        } else {
            self.rest[idx.vec_id].as_mut_ptr().offset(idx.loc_in_vec as isize)
        }
    }

    fn double_size(&mut self) {
        let double_cap = self.current.capacity().checked_mul(2).expect("capacity overflow");
        let chunk = mem::replace(&mut self.current, Vec::with_capacity(double_cap));
        self.rest.push(chunk);
    }

    fn alloc(&mut self, v: T) -> *mut My_RcBox<T> {
        if let Some(idx) = self.free_list.pop() {
            unsafe {
                let mut t = self.get_mut(idx);
                *t = My_RcBox {
                    strong: Cell::new(0),
                    free_list_idx: idx,
                    value: Some(v),
                };
                return t;
            }
        }

        if self.current.len() == self.current.capacity() {
            self.double_size();
        }

        self.current.push(My_RcBox {
            strong: Cell::new(0),
            free_list_idx: My_Id {
                vec_id: self.rest.len(),
                loc_in_vec: self.current.len(),
            },
            value: Some(v),
        });

        unsafe {
            return self.current.as_mut_ptr().offset(self.current.len() as isize);
        }
    }
}

impl<T> My_Rc<T> {
    unsafe fn dec_strong(&mut self) -> usize {
        let x = &self.ptr.as_ref();
        let t = &x.strong;
        let n = t.get() - 1;
        t.set(n);
        n
    }

    pub fn as_ref(&self) -> Option<&T> {
        if self.arena.strong_count() == 0 {
            // arena has been dropped
            None
        } else {
            unsafe {
                let x = self.ptr.as_ref();
                x.value.as_ref()
            }
        }
    }
}

impl<T> Drop for My_Rc<T> {
    fn drop(&mut self) {
        let cur_strong_count = unsafe { self.dec_strong() };
        if (cur_strong_count == 0) {
            unsafe {
                self.ptr.as_mut().value = None;
            }
            if let Some(arena) = self.arena.upgrade() {
                // arena still active
                unsafe {
                    let t = &self.ptr.as_ref();
                    let free_list_idx = t.free_list_idx;
                    arena.chunks.borrow_mut().free_list.push(free_list_idx);
                }
            }
        }
    }
}

const INITIAL_SIZE: usize = 1024;
const MIN_CAPACITY: usize = 1;

impl<T> Arena<T> {
    pub fn new() -> Arena<T> {
        let size = cmp::max(1, mem::size_of::<T>());
        Arena::with_capacity(INITIAL_SIZE / size)
    }

    pub fn with_capacity(n: usize) -> Arena<T> {
        let n = cmp::max(MIN_CAPACITY, n);
        Arena {
            chunks: RefCell::new(ChunkList {
                current: Vec::with_capacity(n),
                rest: Vec::new(),
                free_list: vec![],
            }),
        }
    }

    #[inline]
    pub fn alloc(self: Rc<Self>, v: T) -> My_Rc<T> {
        let t = self.chunks.borrow_mut().alloc(v);
        My_Rc {
            arena: Rc::downgrade(&self),
            ptr: NonNull::new(t).unwrap(),
        }
    }
}

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.