From 51458a8ee7bd52e2c98b7c3bdb795f1b2d5db468 Mon Sep 17 00:00:00 2001 From: Travis Cross Date: Sat, 30 Mar 2024 05:26:55 +0000 Subject: [PATCH] Use RLE as a stronger motivating example The motivating example we had given for `gen` blocks admitted too easy an implementation with existing stable iterator combinators. Let's make the example more *motivating* by showing a simple algorithm, run-length encoding, that's more difficult to implement in other ways. (Thanks to Ralf Jung for pointing out the need for a better example.) --- text/3513-gen-blocks.md | 122 +++++++++++++++++++++++++++++----------- 1 file changed, 90 insertions(+), 32 deletions(-) diff --git a/text/3513-gen-blocks.md b/text/3513-gen-blocks.md index fb86109aa76..0e4bcfb0a92 100644 --- a/text/3513-gen-blocks.md +++ b/text/3513-gen-blocks.md @@ -13,54 +13,112 @@ This RFC reserves the `gen` keyword in the Rust 2024 edition for generators and Writing iterators manually can be painful. Many iterators can be written by chaining together iterator combinators, but some need to be written with a manual implementation of `Iterator`. This can push people to avoid iterators and do worse things such as writing loops that eagerly store values to mutable state. With `gen` blocks, we can now write a simple `for` loop and still get a lazy iterator of values. -By way of example, consider these ways of expressing the same function: +By way of example, consider these alternate ways of expressing [run-length encoding][]: -```rust -// This example uses iterator combinators. -fn odd_dup(xs: impl IntoIterator) -> impl Iterator { - xs.into_iter().filter(|x| x.is_odd()).map(|x| x * 2) -} +[run-length encoding]: https://en.wikipedia.org/wiki/Run-length_encoding -// This example uses `iter::from_fn`. -fn odd_dup(xs: impl IntoIterator) -> impl Iterator { - let mut xs = xs.into_iter(); - std::iter::from_fn(move || { - while let Some(x) = xs.next() { - if x.is_odd() { - return Some(x * 2); +```rust +// This example uses `gen` blocks, introduced in this RFC. +fn rl_encode>( + xs: I, +) -> impl Iterator { + gen { + let mut xs = xs.into_iter(); + let (Some(mut cur), mut n) = (xs.next(), 0) else { return }; + for x in xs { + if x == cur && n < u8::MAX { + n += 1; + } else { + yield n; yield cur; + (cur, n) = (x, 0); } } - None - }) + yield n; yield cur; + }.into_iter() } // This example uses a manual implementation of `Iterator`. -fn odd_dup(xs: impl IntoIterator) -> impl Iterator { - struct OddDup(T); - impl> Iterator for OddDup { - type Item = u32; - fn next(&mut self) -> Option { - while let Some(x) = self.0.next() { - if x.is_odd() { - return Some(x * 2) +fn rl_encode>( + xs: I, +) -> impl Iterator { + struct RlEncode> { + into_xs: Option, + xs: Option<::IntoIter>, + cur: Option<::Item>, + n: u8, + yield_x: Option<::Item>, + } + impl> Iterator for RlEncode { + type Item = u8; + fn next(&mut self) -> Option { + let xs = self.xs.get_or_insert_with(|| unsafe { + self.into_xs.take().unwrap_unchecked().into_iter() + }); + if let Some(x) = self.yield_x.take() { + return Some(x); + } + loop { + match (xs.next(), self.cur) { + (Some(x), Some(cx)) + if x == cx && self.n < u8::MAX => self.n += 1, + (Some(x), Some(cx)) => { + let n_ = self.n; + (self.cur, self.n) = (Some(x), 0); + self.yield_x = Some(cx); + return Some(n_); + } + (Some(x), None) => { + (self.cur, self.n) = (Some(x), 0); + } + (None, Some(cx)) => { + self.cur = None; + self.yield_x = Some(cx); + return Some(self.n); + } + (None, None) => return None, } } - None } } - OddDup(xs.into_iter()) + RlEncode { + into_xs: Some(xs), xs: None, cur: None, n: 0, yield_x: None, + } } -// This example uses `gen` blocks, introduced in this RFC. -fn odd_dup(xs: impl IntoIterator) -> impl Iterator { - gen { - for x in xs { - if x.is_odd() { - yield x * 2; +// This example uses `iter::from_fn`. +fn rl_encode>( + xs: I, +) -> impl Iterator { + let (mut cur, mut n, mut yield_x) = (None, 0, None); + let (mut into_xs, mut xs) = (Some(xs), None); + core::iter::from_fn(move || loop { + let xs = xs.get_or_insert_with(|| unsafe { + into_xs.take().unwrap_unchecked().into_iter() + }); + if let Some(x) = yield_x.take() { + return Some(x); + } + match (xs.next(), cur) { + (Some(x), Some(cx)) if x == cx && n < u8::MAX => n += 1, + (Some(x), Some(cx)) => { + let n_ = n; + (cur, n) = (Some(x), 0); + yield_x = Some(cx); + return Some(n_); + } + (Some(x), None) => (cur, n) = (Some(x), 0), + (None, Some(cx)) => { + cur = None; + yield_x = Some(cx); + return Some(n); } + (None, None) => return None, } - }.into_iter() + }) } + +// Writing a corresponding example using only existing stable +// iterator combinators is left to the reader as an exercise. ``` Iterators created with `gen` blocks return `None` from `next` once the `gen` block has returned (either implicitly at the end of the scope or explicitly with the `return` statement) and are fused (after `next` returns `None` once, it will keep returning `None` forever).