Performance difference among Range, RangeInclusive and reversed

This is a good opportunity to remind people that for loops over iterators are sometimes slower (and never faster) than using internal iteration methods on those iterators. (Usual link: Rust’s iterators are inefficient, and here’s what we can do about it. | by Veedrac | Medium)

A quick demo: Rust Playground

fn test_iter() -> impl Iterator<Item=u32> {
    (0..100).chain(200..300)
}

pub fn sum_iterator_method() -> u32 {
    test_iter().sum()
}

pub fn sum_for_loop() -> u32 {
    let mut sum = 0;
    for x in test_iter() {
        sum += x;
    }
    sum
}

LLVM can completely remove all the loops in the iterator method:

playground::sum_iterator_method:
	mov	eax, 29900
	ret

But with the for loop it cannot:

playground::sum_for_loop:
	mov	edi, 200
	xor	ecx, ecx
	xor	esi, esi
	xor	eax, eax
	mov	edx, esi
	and	dl, 3
	cmp	dl, 1
	jne	.LBB2_2
	jmp	.LBB2_6

.LBB2_1:
	add	eax, ecx
	lea	ecx, [rcx + 1]
	mov	edx, esi
	and	dl, 3
	cmp	dl, 1
	je	.LBB2_6

.LBB2_2:
	cmp	dl, 2
	jne	.LBB2_7
	cmp	edi, 299
	ja	.LBB2_10
	add	eax, edi
	lea	edx, [rdi + 1]
	jmp	.LBB2_5

.LBB2_7:
	cmp	ecx, 100
	jb	.LBB2_1
	cmp	edi, 299
	ja	.LBB2_10
	add	eax, edi
	lea	edx, [rdi + 1]
	mov	sil, 2

.LBB2_5:
	mov	edi, edx
	mov	edx, esi
	and	dl, 3
	cmp	dl, 1
	jne	.LBB2_2

.LBB2_6:
	cmp	ecx, 99
	jbe	.LBB2_1

.LBB2_10:
	ret

TL/DR: If your loop body is very simple, consider phrasing it as fold/try_fold/for_each/etc instead. It won't be slower, and might be faster, depending on the iterator type.

12 Likes