This is related to a previous post on how to implement num_traits
Zero
for an enum used to unify number-like primitives.
At a high level, the fn has a type Series -> Series
. I have the following code that computes decile
for all the types that make sense for this computation. The decile function is Series -> Vec<u32>
.
There are two dimensions to the question:
- is there a way to write this code more generically?
- how might I re-use the code for functions other than
decile
?
pub fn decile_series(series: &Series, name: &str) -> Result<Series> {
//
// execution requires type specific code
// return value for decile is Vec<u32>
//
let (idxs, data): (Vec<usize>, Vec<u32>) = match series.dtype() {
DataType::Int8 => {
let mut data_with_idx: Vec<(usize, i8)> =
series.i8()?.into_no_null_iter().enumerate().collect();
data_with_idx.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
// extract vectors
let (idxs, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let data: Vec<u32> = decile(&data);
Ok((idxs, data))
}
DataType::Int16 => {
let mut data_with_idx: Vec<(usize, i16)> =
series.i16()?.into_no_null_iter().enumerate().collect();
data_with_idx.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
// extract vectors
let (idxs, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let data: Vec<u32> = decile(&data);
Ok((idxs, data))
}
DataType::Int32 => {
let mut data_with_idx: Vec<(usize, i32)> =
series.i32()?.into_no_null_iter().enumerate().collect();
data_with_idx.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
// extract vectors
let (idxs, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let data: Vec<u32> = decile(&data);
Ok((idxs, data))
}
DataType::Int64 => {
let mut data_with_idx: Vec<(usize, i64)> =
series.i64()?.into_no_null_iter().enumerate().collect();
data_with_idx.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
// extract vectors
let (idxs, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let data: Vec<u32> = decile(&data);
Ok((idxs, data))
}
DataType::UInt8 => {
let mut data_with_idx: Vec<(usize, u8)> =
series.u8()?.into_no_null_iter().enumerate().collect();
data_with_idx.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
// extract vectors
let (idxs, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let data: Vec<u32> = decile(&data);
Ok((idxs, data))
}
DataType::UInt16 => {
let mut data_with_idx: Vec<(usize, u16)> =
series.u16()?.into_no_null_iter().enumerate().collect();
data_with_idx.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
// extract vectors
let (idxs, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let data: Vec<u32> = decile(&data);
Ok((idxs, data))
}
DataType::UInt32 => {
let mut data_with_idx: Vec<(usize, u32)> =
series.u32()?.into_no_null_iter().enumerate().collect();
data_with_idx.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
// extract vectors
let (idxs, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let data: Vec<u32> = decile(&data);
Ok((idxs, data))
}
DataType::UInt64 => {
let mut data_with_idx: Vec<(usize, u64)> =
series.u64()?.into_no_null_iter().enumerate().collect();
data_with_idx.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
// extract vectors
let (idxs, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let data: Vec<u32> = decile(&data);
Ok((idxs, data))
}
DataType::Float32 => {
let mut data_with_idx: Vec<(usize, AnyNumber<f32>)> = series
.f32()?
.into_no_null_iter()
.map(AnyNumber)
.enumerate()
.collect();
data_with_idx.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
// extract vectors
let (idxs, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let data: Vec<u32> = decile(&data);
Ok((idxs, data))
}
DataType::Float64 => {
let mut data_with_idx: Vec<(usize, AnyNumber<f64>)> = series
.f64()?
.into_no_null_iter()
.map(AnyNumber)
.enumerate()
.collect();
data_with_idx.sort_unstable_by(|(_, a), (_, b)| b.cmp(a));
// extract vectors
let (idxs, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let data: Vec<u32> = decile(&data);
Ok((idxs, data))
}
_ => Err(eyre!(
"The underlying type is not a AnyNumber: {}",
series.dtype()
)),
}?;
// zip and sort by the df idxs
let mut data_with_idx: Vec<(_, _)> = std::iter::zip(idxs, data).collect();
data_with_idx.sort_unstable_by(|(idx_a, _), (idx_b, _)| idx_a.cmp(idx_b));
// extract data (drop idxs)
let (_, data): (Vec<_>, Vec<_>) = data_with_idx.into_iter().unzip();
let new_series = Series::new(name, data);
Ok(new_series)
}
Note: AnyNumber
is a wrapper used to implement Zero
, Ord
and others not already implemented by the float primitives.
struct AnyNumber<T>(T);