For me the main motivation is to be able to check some trivial (const
-compatible) things, such as .is_some()
, without needing to replace twice the value:
fn main ()
{
let cell = ClosureCell::new(None);
dbg!(closure_cell_with_inner!(
&cell,
|mut_ref: &mut Option<i32>| mut_ref.is_none(),
));
}
For instance, I have compared the LLVM IR of these two functions:
#[no_mangle] pub
fn foo (cell: &'_ ClosureCell<Option<NonZeroU32>>)
-> bool
{
closure_cell_with_inner!(
cell,
|it: &mut Option<NonZeroU32>| it.is_none(),
)
}
#[no_mangle] pub
fn bar (cell: &'_ ::core::cell::Cell<Option<NonZeroU32>>)
-> bool
{
let prev = cell.replace(None);
let ret = prev.is_none();
cell.set(prev);
ret
}
and this yields:
; Function Attrs: norecurse nounwind nonlazybind readonly uwtable
define zeroext i1 @foo(i32* nocapture readonly align 4 dereferenceable(4) %cell) unnamed_addr #0 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality {
start:
%_8.val.i = load i32, i32* %cell, align 4
%.not.i.not.i.i.i = icmp eq i32 %_8.val.i, 0
ret i1 %.not.i.not.i.i.i
}
; Function Attrs: nounwind nonlazybind uwtable
define zeroext i1 @bar(i32* nocapture align 4 dereferenceable(4) %cell) unnamed_addr #1 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* @rust_eh_personality {
start:
%x.val.i.i.i.i = load i32, i32* %cell, align 4
%.not.i.not.i = icmp eq i32 %x.val.i.i.i.i, 0
ret i1 %.not.i.not.i
}
We can see that Rust, in and of itself, already compiles those two functions to the same "logic" (bitwise-read of the discriminant, here, a comparison against 0
), so this whole thing may be overkill, but we do notice that the emitted LLVM IR does state that foo
does not mutate the pointee (readonly
annotation) whereas this is not guaranteed for bar()
.
Indeed, my suspicion was right: that readonly
can matter quite a lot. Let's compare, the assembly this time, of:
#[no_mangle] pub static mut SIDE_EFFECT: Option<NonZeroU32> = None;
#[no_mangle] pub
unsafe
fn uses_foo (cell: &'_ ClosureCell<Option<NonZeroU32>>)
-> Option<NonZeroU32>
{
let prev = cell.get();
SIDE_EFFECT = prev;
foo(&cell);
cell.get()
}
#[no_mangle] pub
unsafe
fn uses_bar (cell: &'_ ::core::cell::Cell<Option<NonZeroU32>>)
-> Option<NonZeroU32>
{
let prev = cell.get();
SIDE_EFFECT = prev;
bar(&cell);
cell.get()
}
(I was basically trying to test whether the second cell.get()
call gets elided by reusing the first prev
read)
We get:
uses_foo: # @uses_foo
# %bb.0:
movl (%rdi), %eax
movq SIDE_EFFECT@GOTPCREL(%rip), %rcx
movl %eax, (%rcx)
movl (%rdi), %eax
retq
# -- End function
uses_bar: # @uses_bar
# %bb.0:
subq $16, %rsp
movl (%rdi), %eax
movq SIDE_EFFECT@GOTPCREL(%rip), %rcx
movl %eax, (%rcx)
movq %rdi, (%rsp)
movl (%rdi), %eax
movl $0, (%rdi)
movq %rsp, %rcx
movq %rcx, 8(%rsp)
leaq 8(%rsp), %rcx
#APP
#NO_APP
movq (%rsp), %rcx
movl %eax, (%rcx)
movl (%rdi), %eax
addq $16, %rsp
retq
which is basically showing uses_foo
has been optimized not not call foo
(since nothing is done with the return value), whereas uses_bar
has inlined the call to bar
. That can be made obvious by #[inline(never)]
-ing foo
and bar
, then yielding:
uses_foo: # @uses_foo
# %bb.0:
movl (%rdi), %eax
movq SIDE_EFFECT@GOTPCREL(%rip), %rcx
movl %eax, (%rcx)
movl (%rdi), %eax
retq
# -- End function
uses_bar: # @uses_bar
# %bb.0:
pushq %rbx
movq %rdi, %rbx
movl (%rdi), %eax
movq SIDE_EFFECT@GOTPCREL(%rip), %rcx
movl %eax, (%rcx)
callq *bar@GOTPCREL(%rip) # <--- FUNCTION CALL
movl (%rbx), %eax
popq %rbx
retq