Something about std::ptr::read

I find something interesting ,and also learn more about rust ownship
Here is what happened

when i read the rust std doc i notice there is a function std::ptr::read
i check it and this function calls the read_via_copy

pub const unsafe fn read<T>(src: *const T) -> T {
  unsafe {
        #[cfg(debug_assertions)] // Too expensive to always enable (for now?)
        ub_checks::assert_unsafe_precondition!(
            check_language_ub,
            "ptr::read requires that the pointer argument is aligned and non-null",
            (
                addr: *const () = src as *const (),
                align: usize = align_of::<T>(),
            ) => ub_checks::is_aligned_and_not_null(addr, align)
        );
        crate::intrinsics::read_via_copy(src)
    }
}

At first i think it is just do a deep copy from src and return the new 'clone' one as its name
read_via_copy , just copy and read it

but read via a deep copy seems not unsafe , why rust use unsafe
so i wanna check if it is truly deep copy

i write some code to prove

fn main(){
    /* we create a v0 to own the heap data 0**/
    let mut v0 = Vec::new();
    v0.push(12);
    v0.push(20);
    let  v1;
    unsafe{
    /* we create a v1 to own the heap data 0 also here**/
        v1 = std::ptr::read(&v0);
    /* the ptr::read will call read_via_copy , which will be proved a shadow copy **/
    }
    println!("content v0 and v1");
    println!("v0 {:?} v1 {:?}",v0,v1);
    println!("v0 addree {:p}",&v0 as *const _);
    println!("v0.data addree {:p}",v0.as_ptr());

    println!("v1 addree {:p}",&v1 as *const _);
    println!("v1.data addree {:p}",v1.as_ptr());

}

it just read from a vector
but things goes wrong when i execute the program

content v0 and v1
v0 [12, 20] v1 [12, 20]
v0 addree 0x7ffd47b05dc8
v0.data addree 0x5c9f5eebaa80
v1 addree 0x7ffd47b05de0
v1.data addree 0x5c9f5eebaa80
free(): double free detected in tcache 2
Aborted (core dumped)

The Double Free

and i check the asm code for more details

__int64 _1::main()
{
  int v0; // edx
  int v1; // ecx
  int v2; // r8d
  int v3; // r9d
  int v4; // edx
  int v5; // ecx
  int v6; // r8d
  int v7; // r9d
  int v8; // edx
  int v9; // ecx
  int v10; // r8d
  int v11; // r9d
  int v12; // edx
  int v13; // ecx
  int v14; // r8d
  int v15; // r9d
  int v17; // [rsp+0h] [rbp-2F8h]
  int v18; // [rsp+0h] [rbp-2F8h]
  struct _Unwind_Exception *v19; // [rsp+0h] [rbp-2F8h]
  struct _Unwind_Exception *v20; // [rsp+0h] [rbp-2F8h]
  int v21; // [rsp+8h] [rbp-2F0h]
  int v22; // [rsp+8h] [rbp-2F0h]
  int v23[2]; // [rsp+8h] [rbp-2F0h]
  int v24; // [rsp+8h] [rbp-2F0h]
  int v25; // [rsp+10h] [rbp-2E8h]
  int myV0; // [rsp+18h] [rbp-2E0h] BYREF
  struct _Unwind_Exception *v27; // [rsp+20h] [rbp-2D8h]
  int v28; // [rsp+28h] [rbp-2D0h]
  int also_myV1[4]; // [rsp+30h] [rbp-2C8h] BYREF
  __int64 v30; // [rsp+40h] [rbp-2B8h]
  __int128 myV1; // [rsp+48h] [rbp-2B0h] BYREF
  __int64 v32; // [rsp+58h] [rbp-2A0h]
  char v33[48]; // [rsp+60h] [rbp-298h] BYREF
  char v34[48]; // [rsp+90h] [rbp-268h] BYREF
  _OWORD v35[2]; // [rsp+C0h] [rbp-238h] BYREF
  __int128 v36; // [rsp+E0h] [rbp-218h]
  __int128 v37; // [rsp+F0h] [rbp-208h]
  char v38[48]; // [rsp+100h] [rbp-1F8h] BYREF
  __int128 v39; // [rsp+130h] [rbp-1C8h] BYREF
  __int128 v40; // [rsp+148h] [rbp-1B0h]
  int *v41; // [rsp+158h] [rbp-1A0h] BYREF
  char v42[48]; // [rsp+160h] [rbp-198h] BYREF
  __int128 v43; // [rsp+190h] [rbp-168h] BYREF
  __int128 v44; // [rsp+1A8h] [rbp-150h]
  __int64 v45; // [rsp+1B8h] [rbp-140h] BYREF
  char v46[48]; // [rsp+1C0h] [rbp-138h] BYREF
  __int128 v47; // [rsp+1F0h] [rbp-108h] BYREF
  __int128 v48; // [rsp+208h] [rbp-F0h]
  int *v49; // [rsp+218h] [rbp-E0h] BYREF
  char v50[48]; // [rsp+220h] [rbp-D8h] BYREF
  __int128 v51; // [rsp+250h] [rbp-A8h] BYREF
  __int128 v52; // [rsp+268h] [rbp-90h]
  struct _Unwind_Exception *v53; // [rsp+278h] [rbp-80h] BYREF
  char v54; // [rsp+287h] [rbp-71h]
  int **v55; // [rsp+298h] [rbp-60h]
  __int64 (__fastcall *v56)(); // [rsp+2A0h] [rbp-58h]
  int **v57; // [rsp+2A8h] [rbp-50h]
  __int64 (__fastcall *v58)(); // [rsp+2B0h] [rbp-48h]
  struct _Unwind_Exception **v59; // [rsp+2B8h] [rbp-40h]
  __int64 (__fastcall *v60)(); // [rsp+2C0h] [rbp-38h]
  __int64 *v61; // [rsp+2C8h] [rbp-30h]
  __int64 (__fastcall *v62)(); // [rsp+2D0h] [rbp-28h]
  int *v63; // [rsp+2D8h] [rbp-20h]
  __int64 (__fastcall *v64)(); // [rsp+2E0h] [rbp-18h]
  int *v65; // [rsp+2E8h] [rbp-10h]
  __int64 (__fastcall *v66)(); // [rsp+2F0h] [rbp-8h]

  v54 = 0;
  alloc::vec::Vec<T>::new();
  alloc::vec::Vec<T,A>::push((int)&myV0, 12, v0, v1, v2, v3, v17, v21, (int)&myV0, myV0, v27, v28);
  alloc::vec::Vec<T,A>::push((int)&myV0, 20, v4, v5, v6, v7, v18, v22, v25, myV0, v27, v28);
  core::ptr::read(&myV1, &myV0);
  v54 = 1;
  v30 = v32;
  *(_OWORD *)also_myV1 = myV1;
  core::fmt::Arguments::new_const(v33, &off_584F8);
  std::io::stdio::_print();
  v65 = &myV0;
  v66 = <alloc::vec::Vec<T,A> as core::fmt::Debug>::fmt;
  *(_QWORD *)&v36 = &myV0;
  *((_QWORD *)&v36 + 1) = <alloc::vec::Vec<T,A> as core::fmt::Debug>::fmt;
  v63 = also_myV1;
  v64 = <alloc::vec::Vec<T,A> as core::fmt::Debug>::fmt;
  *(_QWORD *)&v37 = also_myV1;
  *((_QWORD *)&v37 + 1) = <alloc::vec::Vec<T,A> as core::fmt::Debug>::fmt;
  v35[0] = v36;
  v35[1] = v37;
  core::fmt::Arguments::new_v1(v34, &off_58508, v35);
  std::io::stdio::_print();
  v41 = &myV0;
  v57 = &v41;
  v58 = <*const T as core::fmt::Pointer>::fmt;
  *(_QWORD *)&v40 = &v41;
  *((_QWORD *)&v40 + 1) = <*const T as core::fmt::Pointer>::fmt;
  v39 = v40;
  core::fmt::Arguments::new_v1(v38, &off_58538, &v39);
  std::io::stdio::_print();
  *(_QWORD *)v23 = alloc::vec::Vec<T,A>::as_ptr(&myV0);
  v45 = *(_QWORD *)v23;
  v61 = &v45;
  v62 = <*const T as core::fmt::Pointer>::fmt;
  *(_QWORD *)&v44 = &v45;
  *((_QWORD *)&v44 + 1) = <*const T as core::fmt::Pointer>::fmt;
  v43 = v44;
  core::fmt::Arguments::new_v1(v42, &off_58558, &v43);
  std::io::stdio::_print();
  v49 = also_myV1;
  v55 = &v49;
  v56 = <*const T as core::fmt::Pointer>::fmt;
  *(_QWORD *)&v48 = &v49;
  *((_QWORD *)&v48 + 1) = <*const T as core::fmt::Pointer>::fmt;
  v47 = v48;
  core::fmt::Arguments::new_v1(v46, &off_58578, &v47);
  std::io::stdio::_print();
  v19 = (struct _Unwind_Exception *)alloc::vec::Vec<T,A>::as_ptr(also_myV1);
  v53 = v19;
  v59 = &v53;
  v60 = <*const T as core::fmt::Pointer>::fmt;
  *(_QWORD *)&v52 = &v53;
  *((_QWORD *)&v52 + 1) = <*const T as core::fmt::Pointer>::fmt;
  v51 = v52;
  core::fmt::Arguments::new_v1(v50, &off_58598, &v51);
  std::io::stdio::_print();
  core::ptr::drop_in_place<alloc::vec::Vec<i32>>((int)also_myV1, (int)&off_58598, v8, v9, v10, v11, v19, v23[0]);
  v54 = 0;
  return core::ptr::drop_in_place<alloc::vec::Vec<i32>>((int)&myV0, (int)&off_58598, v12, v13, v14, v15, v20, v24);
}

In the end of the code
it call twice drop_in_place for v1 and v0 which cause the double free

so , look how ptr::read function works

_QWORD *__fastcall core::ptr::read(_QWORD *a1, _QWORD *a2)
{
  _QWORD *result; // rax

  result = a1;
  *a1 = *a2;
  a1[1] = a2[1];
  a1[2] = a2[2];
  return result;
}

There is no DEEP COPY at all
it just do a shadow copy ,i guess the a[0] a[1] a[2] is the meta data about a len ,a cap ,and a pointer to the heap ,
assume it is a[2] is the pointer to the head data,
here it overwrite the a1 heap pointer with the a2 , and return the a1
so , the a1 is new one , the a2 is the old one
it make new one point to the old one heap and return the new one
so read_via_copy do things like this

new->ptr = old->ptr
new->len = old->len
new->cap = old->cap

all views are based on the rust vector

i am not sure about that, but that's all what i had learnt from a std::ptr::read
and know about what 'move ' means

1 Like


It would be something like this, Vec::new() create a stack variable v0. And ptr::read just copy the bits of v0. ptr::read is a low level api, just like *ptr in C.

2 Likes

There is no operation in Rust which is a deep copy. Nothing in the language, the library, or even user-written code has the information of what would be a truly deep copy. The closest thing to a deep copy is the Clone trait, but implementations of Clone don’t copy e.g. the contents of an Arc or Rc shared pointer.

There are two reasons why std::ptr::read is marked as unsafe fn:

  1. You’ve correctly determined, later in your post, that std::ptr::read is a shallow copy. Therefore, it can cause duplication of a type that owns resources, like Vec (or like std::fs::File, which owns a file handle rather than memory), leading to a double-free later. Therefore, the caller must promise that either the value being read is either safe to duplicate, or the original copy of the value will be forgotten.
  2. std::ptr::read could be given a *const T that points to uninitialized memory, or memory which contains something other than a valid T. The caller must promise that the pointer is valid to read a T from.

These are preconditions that the type system cannot check, so the function is an unsafe fn to indicate that the caller is responsible for ensuring they are true.

9 Likes

Why did you think that? Was there something in particular that misled you?

Did you read the https://doc.rust-lang.org/std/ptr/fn.read.html#ownership-of-the-returned-value section at all? (That specifically talks about "if T is not Copy".)


The intrinsics are named to make sense internally in the compiler; they're not there for end users of Rust to think about. The "copy" in the name read_via_copy is actually talking about Operand::Copy.

2 Likes

It does seem a bit odd that the docs have:

If T is not Copy, using both the returned value and the value at *src can violate memory safety

But also the safety section contains only:

  • src must be valid for reads.

  • src must be properly aligned. Use read_unaligned if this is not the case.

  • src must point to a properly initialized value of type T.

I thought the idea was if you meet the listed requirements in the safety section on every unsafe operation you can't do anything unsound...

Not sure what such a clause would look like... "If T is not Copy, at most one of the result or the target of ptr may be dropped" is a little clumsy...

In retrospect perhaps it should have returned ManuallyDrop<T>, to help address that issue.

Perhaps including a link to the ownership section is the low-hanging fruit solution? It doesn't seem like any other elaboration is required, just a nod to "don't forget about this detail".