Is a more compat data storage style possible?

It might be something related to IRLO, but I fear that experts already have a better solution than I am.


I had written C extension for R several years ago, and found something very useful:

R store a pointer ,*x, to the data, and store the data's length in x[-1].

Could we have the same implementation for Rust?

Currently, we have to dereference a pointer twice to get data from &Vec. If such implementation is used, we could index element from &Vec directly, which may save sometime.

Is it possible? Could we add an additional layer for Vecs?

IMHO, the memory layout could be:

struct VecSlice{
   cap:usize,
   len:usize,
   data:[T]// store data here directly.
}
impl Borrow for VecSlice{
    fn borrow(&self)->...suppose it could compile{
        &self.data
    }
}
impl VecSlice{
    fn len(&self)->usize{
        unsafe{*(self as *usize).sub(1)}
    }
}
struct Vec{
    vec_slice:*T // point to VecSlice .data field
}

minimal example to show the disadvantage of current Vec implementation

// we have to perform 2 instruction to get a number from &Vec<u8>, but only one instruction is enough for &[u8]
// Type your code here, or load an example.
pub fn vec(num: &Vec<u8>) -> u8 {
    if num.len()<=12  {
        unsafe{std::hint::unreachable_unchecked()}
    }
    num[12]
}
pub fn slice(num: &[u8]) -> u8 {
    if num.len()<=12  {
        unsafe{std::hint::unreachable_unchecked()}
    }
    num[12]
}
/* godbolt
example::vec:
        mov     rax, qword ptr [rdi]
        mov     al, byte ptr [rax + 12]
        ret

example::slice:
        mov     al, byte ptr [rdi + 12]
        ret
*/

real case:

we may use Vec<Vec<_>> to store arrays with different length, which seems to be very annoying:

let a=vec![vec![0],vec![1,2,3]];

if we want to get a[1][2], we actually compute something like:

let a1=a.buf.get(1);// a1== & vec![1,2,3]
let a2=a1.buf.get(2);// a2= & 3
let res=*a2 ; // res=3

This is a lot of waste, we might have a better implementation:

let a=&[&[0],&[1,2,3]]; // let us suppose it could be.

here, we could dereference the pointer in 2 instructions:

let a1=a.get(1);//a1=&[1,2,3]
let a2=a1[2];// since a1 is a pointer, we could directly dereference it, and get the answer, 3.

update before someone blame me:

the extra consumption is a global constant, EMPTY:

const EMPTY:VecSlice<()>=VecSlice{cap:0,len:0,data:[()]};

index, panic
push increase len, panic
pop decrease len, panic
grow the slice would alloc a memory directly, won't affect EMPTY (thus Vec::<T>::new() == EMPTY holds.)

Thus we could also have the ability to alloc a Vec when it is not empty

If I am not mistaken, the thin-vec crate provides such a type.

1 Like

What do you mean by this? ThinVec::push() takes &mut self.

you're right

I'll check it carefully.


edit: it seems thin_vec generates even more asm instructions than a Vec.

#[macro_use] extern crate thin_vec;
type T=u8;
use thin_vec::ThinVec;
fn main() {
    let p = thin_vec![thin_vec![0u8;17];17];
    println!("{}",thin(&p));
    println!("{}",slice(&[[0u8;17].as_slice();17].as_slice()));
}
#[no_mangle]
fn thin(num:&ThinVec<ThinVec<T>>)->T{
    if num.len()<=16 || num[16].len()<=16  {
        unsafe{std::hint::unreachable_unchecked()}
    }
    num[16][16]
}
#[no_mangle]
fn slice(num:&[&[T]])->T{
    if num.len()<=16 || num[16].len()<=16  {
        unsafe{std::hint::unreachable_unchecked()}
    }
    num[16][16]
}
thin:
        .cfi_startproc
        pushq   %r14
        .cfi_def_cfa_offset 16
        pushq   %rbx
        .cfi_def_cfa_offset 24
        pushq   %rax
        .cfi_def_cfa_offset 32
        .cfi_offset %rbx, -24
        .cfi_offset %r14, -16
        movq    (%rdi), %rbx
        movq    _ZN8thin_vec6Header3len17h9c8e2083710cdb0bE@GOTPCREL(%rip), %r14
        movq    %rbx, %rdi
        callq   *%r14
        movq    %rbx, %rdi
        callq   *%r14
        cmpq    $17, %rax
        jb      .LBB12_4
        movq    144(%rbx), %rdi
        callq   *%r14
        movq    %rbx, %rdi
        callq   *%r14
        cmpq    $16, %rax
        jbe     .LBB12_6
        movq    144(%rbx), %rbx
        movq    %rbx, %rdi
        callq   *_ZN8thin_vec6Header3len17h9c8e2083710cdb0bE@GOTPCREL(%rip)
        cmpq    $16, %rax
        jbe     .LBB12_6
        movb    32(%rbx), %al
        addq    $8, %rsp
        .cfi_def_cfa_offset 24
        popq    %rbx
        .cfi_def_cfa_offset 16
        popq    %r14
        .cfi_def_cfa_offset 8
        retq
.LBB12_6:
        .cfi_def_cfa_offset 32
        leaq    .L__unnamed_7(%rip), %rdx
        jmp     .LBB12_5
.LBB12_4:
        leaq    .L__unnamed_8(%rip), %rdx
.LBB12_5:
        movl    $16, %edi
        movq    %rax, %rsi
        callq   *_ZN4core9panicking18panic_bounds_check17h82dc21a0b39dc163E@GOTPCREL(%rip)
        ud2
.Lfunc_end12:
        .size   thin, .Lfunc_end12-thin
        .cfi_endproc

        .section        .text.slice,"ax",@progbits
        .globl  slice
        .p2align        4, 0x90
        .type   slice,@function
slice:
        .cfi_startproc
        movq    256(%rdi), %rax
        movb    16(%rax), %al
        retq
.Lfunc_end13:
        .size   slice, .Lfunc_end13-slice
        .cfi_endproc

        .section        .text.main,"ax",@progbits
        .globl  main
        .p2align        4, 0x90
        .type   main,@function

This topic was automatically closed 90 days after the last reply. We invite you to open a new topic if you have further questions or comments.