At https://github.com/bheisler/RustaCUDA/blob/master/examples/launch.rs#L19-L22
let mut in_x = DeviceBuffer::from_slice(&[1.0f32; 10])?;
let mut in_y = DeviceBuffer::from_slice(&[2.0f32; 10])?;
let mut out_1 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
let mut out_2 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
How does Rustacuda know which device to allocate the memory on? Is it based on the most recent
let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?