I'm working on a toy project to replicate pandas
's read_csv
function as a way to learn Rust. To start, I've implemented the DataFrame
struct, but was wondering if this is the right approach, because it seems wasteful/wrong to create a Box
pointer for each value in Vec
.
trait Value {}
trait ColumnData {}
impl Value for u8 {}
impl Value for f32 {}
impl Value for String {}
struct Column {
name: String,
data: Vec<Box<dyn Value>>,
}
struct DataFrame {
columns: Vec<Column>,
}
impl DataFrame {}
pub fn test() -> DataFrame {
DataFrame {
columns: vec![
Column {
name: "col_ints".to_string(),
data: vec![Box::new(1), Box::new(2), Box::new(3)],
},
Column {
name: "col_str".to_string(),
data: vec![
Box::new("hello".to_string()),
Box::new("world".to_string()),
Box::new("ok".to_string()),
],
},
],
}
}
I took this approach because I'd only know the data types at runtime when read_csv
is called, and each column would store different data types, e.g. one column might store strings, and another floats.
How would you implement this?
Thanks.
EDIT: I came up with an alternative approach. This seems better to me?
trait Value {}
trait ColumnData {}
impl Value for u8 {}
impl Value for f32 {}
impl Value for String {}
#[derive(Debug)]
struct Column<T: Value> {
name: String,
data: Vec<T>,
}
impl<T: Value> ColumnData for Column<T> {}
struct DataFrame {
columns: Vec<Box<dyn ColumnData>>
}
impl DataFrame {}
pub fn test() -> DataFrame {
DataFrame {
columns: vec![
Box::new(Column {
name: "col_ints".to_string(),
data: vec![1, 2, 3, 4, 5],
}),
Box::new(Column {
name: "col_strs".to_string(),
data: vec![
"hello".to_string(),
"world".to_string(),
"ok".to_string(),
"123".to_string(),
"stuff".to_string(),
],
}),
],
}
}