Hi all, newbie here, I'm trying to implement a small autograd crate, this is what I came up so far:

```
use ndarray::{
Array, Array2, Array3, Axis, Data, Dim, Dimension, Ix, Ix1, Ix2, IxDyn, IxDynImpl,
ShapeBuilder, StrideShape,
};
use num_traits::{Float, One, Zero};
use std::ops::Add;
pub struct Tensor<A, D>
where
D: Dimension,
{
data: Array<A, D>,
grad: Option<Box<Tensor<A, IxDyn>>>,
grad_fn: Option<
Box<
dyn FnOnce(
Option<Box<Tensor<A, IxDyn>>>,
&mut Option<Box<Tensor<A, IxDyn>>>,
&mut Option<Box<Tensor<A, IxDyn>>>,
) -> (),
>,
>,
requires_grad: bool,
is_leaf: bool,
}
impl<A, D> Tensor<A, D>
where
D: Dimension,
{
pub fn unbroadcast<T: Dimension>(self, target: &Tensor<A, T>) -> Tensor<A, IxDyn>
where
A: Clone + Zero + Add<Output = A>,
{
let mut unbroadcasted = Tensor {
data: self.data.into_dyn(),
grad: None,
grad_fn: None,
requires_grad: self.requires_grad,
is_leaf: self.is_leaf,
};
while unbroadcasted.data.ndim() > target.data.ndim() {
unbroadcasted.data = unbroadcasted.data.sum_axis(Axis(0));
}
for ax_descr in target.data.axes() {
if ax_descr.len() == 1 {
let axis = ax_descr.axis();
unbroadcasted.data = unbroadcasted.data.sum_axis(axis).insert_axis(axis);
}
}
unbroadcasted
}
}
impl<A, D> Clone for Tensor<A, D>
where
A: Clone,
D: Dimension,
{
fn clone(&self) -> Self {
Self {
data: self.data.clone(),
grad: None,
grad_fn: None,
requires_grad: false,
is_leaf: false,
}
}
}
impl<T: Dimension + 'static, U: Dimension + 'static, A: Clone + Zero + 'static> Add<&Tensor<A, T>>
for &Tensor<A, U>
{
type Output = Tensor<A, U>;
fn add(self, rhs: &Tensor<A, T>) -> Tensor<A, U> {
let new = &self.data + &rhs.data;
let rhs_copy = rhs.clone();
let self_copy = self.clone();
Tensor {
data: new,
grad: None,
grad_fn: Some(Box::new(move |grad, lhs_grad, rhs_grad| {
let unwrapped = grad.unwrap();
let grad_copy = unwrapped.clone();
*lhs_grad = Some(Box::new(grad_copy.unbroadcast(&self_copy)));
*rhs_grad = Some(Box::new(unwrapped.unbroadcast(&rhs_copy)));
})),
requires_grad: self.requires_grad || rhs.requires_grad,
is_leaf: false,
}
}
}
#[macro_export]
macro_rules! tensor {
($([$([$($x:expr),* $(,)*]),+ $(,)*]),+ $(,)*; $y:expr) => {let new;{
new=$crate::Tensor{
data: $crate::Array3::from(vec![$([$([$($x,)*],)*],)*]),
grad:None,
grad_fn:None,
requires_grad:$y,
is_leaf:true
}}new};
($([$($x:expr),* $(,)*]),+ $(,)*; $y:expr) => {{
$crate::Tensor{
data: $crate::Array2::from(vec![$([$($x,)*],)*]),
grad:None,
grad_fn:None,
requires_grad:$y,
is_leaf:true
}}};
($($x:expr),* $(,)*; $y:expr) => {{
$crate::Tensor{
data: $crate::Array::from(vec![$($x,)*]),
grad:None,
grad_fn:None,
requires_grad:$y,
is_leaf:true
}}};
}
```

The thing I'm concerned about so far is the memory usage, is there a way to use less memory? Most importantly, am I thinking right about the problem? This is a very early sketch and I've got plenty to learn so every comment is more than welcome. Thanks in advance for your time.