Can you find the mistake which cause a invalid memory reference?

What I want is a dynamic iterator over usize:

use std::ptr::NonNull;

type Stream = Box<dyn Iterator<Item = usize>>;

fn func(mut stream: NonNull<Stream>) {
    let mut i = 0;

    *(unsafe { stream.as_mut() }) = Box::new(std::iter::from_fn(move || {
        if i < 5 {
            i += 1;
            Some(i)
        } else {
            // None
            func(stream);
            unsafe { stream.as_mut().next() }
        }
    }));
}

#[test]
fn test() {
    let mut stream: Stream = Box::new(std::iter::empty());

    func(NonNull::new(&mut stream).unwrap());

    for item in &mut stream {
        dbg!(item);
    }

    println!("finish!");
}

When I run this test, it said:

running 1 test
[src/main.rs:27] item = 1
[src/main.rs:27] item = 2
[src/main.rs:27] item = 3
[src/main.rs:27] item = 4
[src/main.rs:27] item = 5
error: test failed, to rerun pass '-p lab --bin lab'

Caused by:
  process didn't exit successfully: `/home/yrpciu/lab/target/debug/deps/lab-b276275ef604ffbd test --exact --nocapture` (signal: 11, SIGSEGV: invalid memory reference)

I've wasted several days to figure out the mistake but...

The func method deallocates the Box previously stored behind the pointer (overwriting the value destroys the old one), so when the func call in the else branch returns, you are suddenly in a function owned by something that has been destroyed.

In general, there are also a lot of other problems with this unsafe code. Mutable references must be unique, so the mutable reference created at &mut stream in the for loop invalidates all existing raw pointers to stream, and any further use of them undefined behavior.

4 Likes

If you first replace *stream with a dummy value, you can move the inner Stream into the closure... which might be what you meant? I'm not sure, but no unsafe required.

Note that this whole recursive self-modifying iterator mess is equivalent to (1..=5).chain(repeat(1)).

2 Likes

Here is a interesting fact, that it will fail if I drop through the pointer:

use std::ptr::NonNull;

type Stream = Box<dyn Iterator<Item = usize>>;

fn func1(mut stream: NonNull<Stream>) {
    let mut i = 0;

    unsafe {
        stream.as_ptr().drop_in_place();
    }

    unsafe {
        stream.as_ptr().write(Box::new(std::iter::from_fn(move || {
            if i < 5 {
                i += 1;
                Some(i)
            } else {
                // None
                func2(stream);
                unsafe { stream.as_mut().next() }
            }
        })))
    }
}

fn func2(mut stream: NonNull<Stream>) {
    let mut i = 10;

    unsafe {
        stream.as_ptr().drop_in_place();
    }

    unsafe {
        stream.as_ptr().write(Box::new(std::iter::from_fn(move || {
            if i < 15 {
                i += 1;
                Some(i)
            } else {
                None
            }
        })))
    }
}

#[test]
fn test() {
    let mut stream: Stream = Box::new(std::iter::empty());

    func1(NonNull::new(&mut stream).unwrap());

    for item in &mut stream {
        dbg!(item);
    }

    println!("finish!");
}
running 1 test
[src/main.rs:52] item = 1
[src/main.rs:52] item = 2
[src/main.rs:52] item = 3
[src/main.rs:52] item = 4
[src/main.rs:52] item = 5
error: test failed, to rerun pass '-p lab --bin lab'

Caused by:
  process didn't exit successfully: `/home/yrpciu/lab/target/debug/deps/lab-b276275ef604ffbd test --exact --nocapture` (signal: 11, SIGSEGV: invalid memory reference)

But if I leak the memory, it runs smoothly:

use std::ptr::NonNull;

type Stream = Box<dyn Iterator<Item = usize>>;

fn func1(mut stream: NonNull<Stream>) {
    let mut i = 0;

    unsafe {
        // stream.as_ptr().drop_in_place();
    }

    unsafe {
        stream.as_ptr().write(Box::new(std::iter::from_fn(move || {
            if i < 5 {
                i += 1;
                Some(i)
            } else {
                // None
                func2(stream);
                unsafe { stream.as_mut().next() }
            }
        })))
    }
}

fn func2(mut stream: NonNull<Stream>) {
    let mut i = 10;

    unsafe {
        // stream.as_ptr().drop_in_place();
    }

    unsafe {
        stream.as_ptr().write(Box::new(std::iter::from_fn(move || {
            if i < 15 {
                i += 1;
                Some(i)
            } else {
                None
            }
        })))
    }
}

#[test]
fn test() {
    let mut stream: Stream = Box::new(std::iter::empty());

    func1(NonNull::new(&mut stream).unwrap());

    for item in &mut stream {
        dbg!(item);
    }

    println!("finish!");
}
running 1 test
[src/main.rs:52] item = 1
[src/main.rs:52] item = 2
[src/main.rs:52] item = 3
[src/main.rs:52] item = 4
[src/main.rs:52] item = 5
[src/main.rs:52] item = 11
[src/main.rs:52] item = 12
[src/main.rs:52] item = 13
[src/main.rs:52] item = 14
[src/main.rs:52] item = 15
finish!
test test ... ok

test result: ok. 1 passed; 0 failed; 0 ignored; 0 measured; 0 filtered out; finished in 0.00s

To be absolutely clear, the version that leaks the memory is still incorrect because the for loop invalidates the raw pointer. You were just lucky that the compiler happens to not make any optimizations that breaks your program in that case.

You can see that the program has UB by running it with miri.

[src/main.rs:51] item = 1
[src/main.rs:51] item = 2
[src/main.rs:51] item = 3
[src/main.rs:51] item = 4
[src/main.rs:51] item = 5
error: Undefined Behavior: attempting a write access using <3299> at alloc1786[0x0], but that tag does not exist in the borrow stack for this location
    --> /playground/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ptr/mod.rs:1305:9
     |
1305 |         copy_nonoverlapping(&src as *const T, dst, 1);
     |         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
     |         |
     |         attempting a write access using <3299> at alloc1786[0x0], but that tag does not exist in the borrow stack for this location
     |         this error occurs as part of an access at alloc1786[0x0..0x10]
     |
     = help: this indicates a potential bug in the program: it performed an invalid operation, but the Stacked Borrows rules it violated are still experimental
     = help: see https://github.com/rust-lang/unsafe-code-guidelines/blob/master/wip/stacked-borrows.md for further information
help: <3299> was created by a retag at offsets [0x0..0x10]
    --> src/main.rs:48:24
     |
48   |     func1(NonNull::new(&mut stream).unwrap());
     |                        ^^^^^^^^^^^
help: <3299> was later invalidated at offsets [0x0..0x10]
    --> src/main.rs:50:17
     |
50   |     for item in &mut stream {
     |                 ^^^^^^^^^^^
     = note: backtrace:
     = note: inside `std::ptr::write::<std::boxed::Box<dyn std::iter::Iterator<Item = usize>>>` at /playground/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ptr/mod.rs:1305:9
     = note: inside `std::ptr::mut_ptr::<impl *mut std::boxed::Box<dyn std::iter::Iterator<Item = usize>>>::write` at /playground/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/ptr/mut_ptr.rs:1408:18
note: inside `func2` at src/main.rs:34:9
    --> src/main.rs:34:9
     |
34   | /         stream.as_ptr().write(Box::new(std::iter::from_fn(move || {
35   | |             if i < 15 {
36   | |                 i += 1;
37   | |                 Some(i)
...    |
40   | |             }
41   | |         })))
     | |____________^
note: inside closure at src/main.rs:19:17
    --> src/main.rs:19:17
     |
19   |                 func2(stream);
     |                 ^^^^^^^^^^^^^
     = note: inside `<std::iter::FromFn<[closure@src/main.rs:13:59: 13:66]> as std::iter::Iterator>::next` at /playground/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/iter/sources/from_fn.rs:69:9
     = note: inside `<std::boxed::Box<dyn std::iter::Iterator<Item = usize>> as std::iter::Iterator>::next` at /playground/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/alloc/src/boxed.rs:1868:9
     = note: inside `<&mut std::boxed::Box<dyn std::iter::Iterator<Item = usize>> as std::iter::Iterator>::next` at /playground/.rustup/toolchains/nightly-x86_64-unknown-linux-gnu/lib/rustlib/src/rust/library/core/src/iter/traits/iterator.rs:3825:9
note: inside `main` at src/main.rs:50:17
    --> src/main.rs:50:17
     |
50   |     for item in &mut stream {
     |                 ^^^^^^^^^^^
6 Likes

Though it's not what I want, but where does these so many

[src/main.rs:27] item = 1
[src/main.rs:27] item = 1
[src/main.rs:27] item = 1

come from? And why doesn't inner_stream get dropped?

Well, every time I call func(x) it replaces *x with a wrapped version of *x that has its own counter starting at 0. But the initial counter's i just goes to 5 and after that every time you call next it calls func again internally to wrap the inner iterator in another layer with another i initialized to 0. After which calling next() just increments it and returns 1.

Neither this nor the original code is sensible, but I'm at a loss to understand what it was you wanted the original code to do. This seemed to be the most obvious way to make sense of it.

1 Like

Oh... I understand. The outer stream is drained, so every time you call next on the outer stream you always have a None, and then the program always goes to the "i>=5" branch, and inner stream is dropped.

To clarify myself, I want to replace the outer stream with the inner stream, and the program should count from 1 to 5 and 1 to 5 and 1 to 5 ....

This could be done here. But as @alice said, this is wrong code, UB. I don't know why, I don't catch up @alice 's words.

It is UB because creating a mutable reference to something invalidates all existing raw pointers. By "invalidates", I mean "makes it UB to use them again".

You create a mutable reference here:

for item in (&mut stream).take(25) {
             ^^^^^^^^^^^

Using the NonNull<Stream> after starting the for loop is UB because the NonNull<Stream> has been invalidated.

Use the safe solution that @trentj posted.

2 Likes

So a UB occurs when I betray the promise to compiler that only have one &mut T or *mut T ? Despite the program access the right memory? Am I just be lucky that the compiler don't optimize that piece of memory?

The problem of @trentj's safe code for me is, I can't satisfy borrow checker all the way. :face_with_symbols_over_mouth: Either "you borrow mut twice" or lifetime problem.

Yes, though the promise is slightly different than that. You may have many *mut T at the same time (raw pointers have their own rules). However, once you make a &mut T, you can't have any *mut T anymore.

Yes.

Yes.

Ah, well, you are not going to be able to have the iterator replace itself like that. You would need to have two layers where the inner layer returns a special value that means "please replace me", and then the outer layer that owns the inner value does the actual replacement.

1 Like

Good advice!

type Stream = Box<dyn Iterator<Item = usize>>;

#[derive(Debug)]
enum State {
    Func1,
    Func2,
}

struct MyIterator {
    stream: Box<dyn Iterator<Item = Result<usize, State>>>,
}

impl Iterator for MyIterator {
    type Item = usize;

    fn next(&mut self) -> Option<Self::Item> {
        match self.stream.next() {
            Some(result) => match result {
                Ok(u) => Some(u),

                Err(state) => {
                    match dbg!(state) {
                        State::Func1 => self.func1(),
                        State::Func2 => self.func2(),
                    };

                    self.next()
                }
            },

            None => None,
        }
    }
}

impl MyIterator {
    fn new() -> Self {
        let mut this = Self {
            stream: Box::new(std::iter::empty()),
        };
        this.func1();
        this
    }

    fn func1(&mut self) {
        self.stream = Box::new(
            // (10..=15)
            //     .into_iter()
            //     .map(|i| Ok(i))
            //     .chain(Err(State::Func2)),
            [Ok(11), Ok(12), Ok(13), Ok(14), Ok(15), Err(State::Func2)].into_iter(),
        );
    }

    fn func2(&mut self) {
        self.stream = Box::new(
            // (20..=25)
            //     .into_iter()
            //     .map(|i| Ok(i))
            //     .chain(Err(State::Func1)),
            [Ok(21), Ok(22), Ok(23), Ok(24), Ok(25), Err(State::Func1)].into_iter(),
        );
    }
}

fn main() {
    let mut stream: Stream = Box::new(MyIterator::new());

    for item in stream.take(25) {
        dbg!(item);
    }
}

[src/main.rs:70] item = 11
[src/main.rs:70] item = 12
[src/main.rs:70] item = 13
[src/main.rs:70] item = 14
[src/main.rs:70] item = 15
[src/main.rs:22] state = Func2
[src/main.rs:70] item = 21
[src/main.rs:70] item = 22
[src/main.rs:70] item = 23
[src/main.rs:70] item = 24
[src/main.rs:70] item = 25
[src/main.rs:22] state = Func1
[src/main.rs:70] item = 11
[src/main.rs:70] item = 12
[src/main.rs:70] item = 13
[src/main.rs:70] item = 14
[src/main.rs:70] item = 15
[src/main.rs:22] state = Func2
[src/main.rs:70] item = 21
[src/main.rs:70] item = 22
[src/main.rs:70] item = 23
[src/main.rs:70] item = 24
[src/main.rs:70] item = 25
[src/main.rs:22] state = Func1
[src/main.rs:70] item = 11
[src/main.rs:70] item = 12
[src/main.rs:70] item = 13
[src/main.rs:70] item = 14
[src/main.rs:70] item = 15

Thank you @alice! I could have a good dream tonight :rofl: :rofl: and @trentj too!

2 Likes