In the code base of arrow2 we use trait objects and I constantly have to write things like:
pub fn new_null_array(data_type: DataType, length: usize) -> Box<dyn Array> {
match data_type {
DataType::Null => Box::new(NullArray::new_null(length)),
DataType::Boolean => Box::new(BooleanArray::new_null(length)),
DataType::Int8 => Box::new(PrimitiveArray::<i8>::new_null(data_type, length)),
DataType::Int16 => Box::new(PrimitiveArray::<i16>::new_null(data_type, length)),
DataType::Int32
| DataType::Date32
| DataType::Time32(_)
| DataType::Interval(IntervalUnit::YearMonth) => {
Box::new(PrimitiveArray::<i32>::new_null(data_type, length))
}
DataType::Interval(IntervalUnit::DayTime) => {
Box::new(PrimitiveArray::<days_ms>::new_null(data_type, length))
}
DataType::Int64
| DataType::Date64
| DataType::Time64(_)
| DataType::Timestamp(_, _)
| DataType::Duration(_) => Box::new(PrimitiveArray::<i64>::new_null(data_type, length)),
DataType::Decimal(_, _) => Box::new(PrimitiveArray::<i128>::new_null(data_type, length)),
DataType::UInt8 => Box::new(PrimitiveArray::<u8>::new_null(data_type, length)),
DataType::UInt16 => Box::new(PrimitiveArray::<u16>::new_null(data_type, length)),
DataType::UInt32 => Box::new(PrimitiveArray::<u32>::new_null(data_type, length)),
DataType::UInt64 => Box::new(PrimitiveArray::<u64>::new_null(data_type, length)),
DataType::Float16 => unreachable!(),
DataType::Float32 => Box::new(PrimitiveArray::<f32>::new_null(data_type, length)),
DataType::Float64 => Box::new(PrimitiveArray::<f64>::new_null(data_type, length)),
DataType::Binary => Box::new(BinaryArray::<i32>::new_null(length)),
DataType::LargeBinary => Box::new(BinaryArray::<i64>::new_null(length)),
DataType::FixedSizeBinary(_) => Box::new(FixedSizeBinaryArray::new_null(data_type, length)),
DataType::Utf8 => Box::new(Utf8Array::<i32>::new_null(length)),
DataType::LargeUtf8 => Box::new(Utf8Array::<i64>::new_null(length)),
DataType::List(_) => Box::new(ListArray::<i32>::new_null(data_type, length)),
DataType::LargeList(_) => Box::new(ListArray::<i64>::new_null(data_type, length)),
DataType::FixedSizeList(_, _) => Box::new(FixedSizeListArray::new_null(data_type, length)),
DataType::Struct(fields) => Box::new(StructArray::new_null(&fields, length)),
DataType::Union(_, _, _) => Box::new(UnionArray::new_null(data_type, length)),
DataType::Dictionary(key_type, value_type) => match key_type.as_ref() {
DataType::Int8 => Box::new(DictionaryArray::<i8>::new_null(*value_type, length)),
DataType::Int16 => Box::new(DictionaryArray::<i16>::new_null(*value_type, length)),
DataType::Int32 => Box::new(DictionaryArray::<i32>::new_null(*value_type, length)),
DataType::Int64 => Box::new(DictionaryArray::<i64>::new_null(*value_type, length)),
DataType::UInt8 => Box::new(DictionaryArray::<u8>::new_null(*value_type, length)),
DataType::UInt16 => Box::new(DictionaryArray::<u16>::new_null(*value_type, length)),
DataType::UInt32 => Box::new(DictionaryArray::<u32>::new_null(*value_type, length)),
DataType::UInt64 => Box::new(DictionaryArray::<u64>::new_null(*value_type, length)),
_ => unreachable!(),
},
}
}
You can think of DataType
representing a logical (semantic) type, and the different structs representing the physical (in-memory) representation.
since the DataType
enum describes which physical representation Box<dyn Array>
is made of. There are probably about +10 places where these or small variations of these need to be written.
Since this pattern is shown a couple of times (with small variations), I wonder: is there a macro (or macro pattern) that can work this out without me having to write all of the above?