From f45fbfe3aee9856734d7461edd4c7b5dae4946ac Mon Sep 17 00:00:00 2001 From: Diggory Blake Date: Sun, 30 May 2021 17:16:50 +0100 Subject: [PATCH] Split out RNG from MarkovChain to allow reusing a model multiple times. --- Cargo.toml | 2 +- README.md | 60 ++++++++++-------- src/lib.rs | 183 +++++++++++++++++++++++++++++------------------------ 3 files changed, 134 insertions(+), 111 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 301d49a..4f1681c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "lipsum" -version = "0.7.0" +version = "0.8.0" authors = ["Martin Geisler "] description = """ Lipsum is a lorem ipsum text generation library. Use this if you need diff --git a/README.md b/README.md index 949d0d4..e04638d 100644 --- a/README.md +++ b/README.md @@ -17,26 +17,27 @@ used in publishing. It starts with: > aliquip ex ea commodo consequat… The text is generated using a [Markov chain] that has been trained on -the first book in Cicero's work *De finibus bonorum et malorum* (*On -the ends of good and evil*), of which the lorem ipsum text is a +the first book in Cicero's work _De finibus bonorum et malorum_ (_On +the ends of good and evil_), of which the lorem ipsum text is a scrambled subset. ## Usage Add this to your `Cargo.toml`: + ```toml [dependencies] -lipsum = "0.7" +lipsum = "0.8" ``` ## Documentation Please see the **[API documentation][api-docs]**. - ## Getting Started Use the `lipsum` function to generate lorem ipsum text: + ```rust use lipsum::lipsum; @@ -64,24 +65,29 @@ function looks like this: Small words are kept uncapitalized and punctuation is stripped from all words. - ## Release History This is a changelog with the most important changes in each release. +### Version 0.8.0 — May 30th, 2021 + +The random number generator has been separated from the `MarkovChain` +implementation to allow using the same trained model to generate +multiple outputs with different seeds. + ### Version 0.7.0 — July 8th, 2020 -* The code has been updated to the [Rust 2018 edition][rust-2018]. +- The code has been updated to the [Rust 2018 edition][rust-2018]. -* Each new release will only support the latest stable version of - Rust. Trying to support older Rust versions has proven to be a - fool's errand: our dependencies keep releasing new patch versions - that require newer and newer versions of Rust. +- Each new release will only support the latest stable version of + Rust. Trying to support older Rust versions has proven to be a + fool's errand: our dependencies keep releasing new patch versions + that require newer and newer versions of Rust. -* [#65](https://github.com/mgeisler/lipsum/pull/65): A new - `lipsum_words_from_seed` function was added. It generates random but - deterministic lorem ipsum text. This is useful in unit tests when - you need fixed inputs. +- [#65](https://github.com/mgeisler/lipsum/pull/65): A new + `lipsum_words_from_seed` function was added. It generates random but + deterministic lorem ipsum text. This is useful in unit tests when + you need fixed inputs. ### Version 0.6.0 — December 9th, 2018 @@ -113,19 +119,19 @@ training the Markov chain now takes about twice as long as before. The `MarkovChain` struct has many new methods: -* `new_with_rng` makes it possible to specify the random number - generator used by the Markov chain. Use this to get deterministic - and thus reproducible output for tests. `MarkovChain` now owns the - RNG it uses and as a consequence, it has an extra type parameter. - This is a breaking change if you used struct directly in your code. +- `new_with_rng` makes it possible to specify the random number + generator used by the Markov chain. Use this to get deterministic + and thus reproducible output for tests. `MarkovChain` now owns the + RNG it uses and as a consequence, it has an extra type parameter. + This is a breaking change if you used struct directly in your code. -* `iter` and `into_from` return iterators over words in the Markov - chain. The `generate` and `generate_from` methods are now - straight-forward convenience wrappers for the iterators. +- `iter` and `into_from` return iterators over words in the Markov + chain. The `generate` and `generate_from` methods are now + straight-forward convenience wrappers for the iterators. -* `len` tells you the number of stats in the Markov chain and - `is_empty` tells you if the Markov chain is empty, meaning that it - hasn't been trained on anything yet. +- `len` tells you the number of stats in the Markov chain and + `is_empty` tells you if the Markov chain is empty, meaning that it + hasn't been trained on anything yet. ### Version 0.2.0 — July 10th, 2017 @@ -135,18 +141,16 @@ Rust version 1.6.0 is now supported. This is checked with TravisCI. First public release. - ## License Lipsum can be distributed according to the [MIT license][mit]. Contributions will be accepted under the same license. - [crates-io]: https://crates.io/crates/lipsum [api-docs]: https://docs.rs/lipsum/ [codecov]: https://codecov.io/gh/mgeisler/lipsum [lorem ipsum]: https://en.wikipedia.org/wiki/Lorem_ipsum -[Markov chain]: https://en.wikipedia.org/wiki/Markov_chain +[markov chain]: https://en.wikipedia.org/wiki/Markov_chain [travis-ci]: https://travis-ci.org/mgeisler/lipsum [appveyor]: https://ci.appveyor.com/project/mgeisler/lipsum [rust-2018]: https://doc.rust-lang.org/edition-guide/rust-2018/ diff --git a/src/lib.rs b/src/lib.rs index 699210d..b340cb3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -23,15 +23,15 @@ //! [`learn`]: struct.MarkovChain.html#method.learn //! [Markov chain]: https://en.wikipedia.org/wiki/Markov_chain -#![doc(html_root_url = "https://docs.rs/lipsum/0.7.0")] +#![doc(html_root_url = "https://docs.rs/lipsum/0.8.0")] #![forbid(unsafe_code)] #![deny(missing_docs)] use rand::rngs::ThreadRng; use rand::seq::SliceRandom; +use rand::thread_rng; use rand::{Rng, SeedableRng}; use rand_chacha::ChaCha20Rng; -use std::cell::RefCell; use std::collections::HashMap; /// A bigram is simply two consecutive words. @@ -47,65 +47,36 @@ pub type Bigram<'a> = (&'a str, &'a str); /// /// [Markov chain]: https://en.wikipedia.org/wiki/Markov_chain /// [blog post]: https://blakewilliams.me/posts/generating-arbitrary-text-with-markov-chains-in-rust -pub struct MarkovChain<'a, R: Rng> { +#[derive(Debug, Clone, Default)] +pub struct MarkovChain<'a> { map: HashMap, Vec<&'a str>>, keys: Vec>, - rng: R, -} - -impl<'a> MarkovChain<'a, ThreadRng> { - /// Create a new empty Markov chain. It will use a default - /// thread-local random number generator. - /// - /// # Examples - /// - /// ``` - /// use lipsum::MarkovChain; - /// - /// let chain = MarkovChain::new(); - /// assert!(chain.is_empty()); - /// ``` - pub fn new() -> MarkovChain<'a, ThreadRng> { - MarkovChain::new_with_rng(rand::thread_rng()) - } } -impl<'a> Default for MarkovChain<'a, ThreadRng> { - /// Create a new empty Markov chain. It will use a default - /// thread-local random number generator. - fn default() -> Self { - Self::new() - } -} - -impl<'a, R: Rng> MarkovChain<'a, R> { - /// Create a new empty Markov chain that uses the given random - /// number generator. +impl<'a> MarkovChain<'a> { + /// Create a new empty Markov chain. /// /// # Examples /// /// ``` /// # fn main() { + /// use lipsum::MarkovChain; /// use rand::SeedableRng; /// use rand_chacha::ChaCha20Rng; - /// use lipsum::MarkovChain; /// - /// let rng = ChaCha20Rng::seed_from_u64(0); - /// let mut chain = MarkovChain::new_with_rng(rng); + /// let mut chain = MarkovChain::new(); /// chain.learn("infra-red red orange yellow green blue indigo x-ray"); /// + /// let mut rng = ChaCha20Rng::seed_from_u64(0); + /// /// // The chain jumps consistently like this: - /// assert_eq!(chain.generate(1), "Orange."); - /// assert_eq!(chain.generate(1), "Infra-red."); - /// assert_eq!(chain.generate(1), "Yellow."); + /// assert_eq!(chain.generate_with_rng(&mut rng, 1), "Orange."); + /// assert_eq!(chain.generate_with_rng(&mut rng, 1), "Infra-red."); + /// assert_eq!(chain.generate_with_rng(&mut rng, 1), "Yellow."); /// # } /// ``` - pub fn new_with_rng(rng: R) -> MarkovChain<'a, R> { - MarkovChain { - map: HashMap::new(), - keys: Vec::new(), - rng: rng, - } + pub fn new() -> MarkovChain<'a> { + Default::default() } /// Add new text to the Markov chain. This can be called several @@ -188,8 +159,42 @@ impl<'a, R: Rng> MarkovChain<'a, R> { /// Generate a sentence with `n` words of lorem ipsum text. The /// sentence will start from a random point in the Markov chain + /// generated using the specified random number generator, /// and a `.` will be added as necessary to form a full sentence. /// + /// See [`generate_with_rng_from`] if you want to control the + /// starting point for the generated text and see [`iter_with_rng`] + /// if you simply want a sequence of words. + /// + /// # Examples + /// + /// Generating the sounds of a grandfather clock: + /// + /// ``` + /// use lipsum::MarkovChain; + /// use rand::thread_rng; + /// + /// let mut chain = MarkovChain::new(); + /// chain.learn("Tick, Tock, Tick, Tock, Ding! Tick, Tock, Ding! Ding!"); + /// println!("{}", chain.generate_with_rng(thread_rng(), 15)); + /// ``` + /// + /// The output looks like this: + /// + /// > Ding! Tick, Tock, Tick, Tock, Ding! Ding! Tock, Ding! Tick, + /// > Tock, Tick, Tock, Tick, Tock. + /// + /// [`generate_with_rng_from`]: struct.MarkovChain.html#method.generate_with_rng_from + /// [`iter_with_rng`]: struct.MarkovChain.html#method.iter_with_rng + pub fn generate_with_rng(&self, rng: R, n: usize) -> String { + join_words(self.iter_with_rng(rng).take(n)) + } + + /// Generate a sentence with `n` words of lorem ipsum text. The + /// sentence will start from a random point in the Markov chain + /// generated using the default random number generator and a `.` + /// will be added as necessary to form a full sentence. + /// /// See [`generate_from`] if you want to control the starting /// point for the generated text and see [`iter`] if you simply /// want a sequence of words. @@ -213,8 +218,22 @@ impl<'a, R: Rng> MarkovChain<'a, R> { /// /// [`generate_from`]: struct.MarkovChain.html#method.generate_from /// [`iter`]: struct.MarkovChain.html#method.iter - pub fn generate(&mut self, n: usize) -> String { - join_words(self.iter().take(n)) + pub fn generate(&self, n: usize) -> String { + self.generate_with_rng(thread_rng(), n) + } + + /// Generate a sentence with `n` words of lorem ipsum text. The + /// sentence will start from the given bigram and a `.` will be + /// added as necessary to form a full sentence. + /// + /// Use [`generate_with_rng`] if the starting point is not important. See + /// [`iter_with_rng_from`] if you want a sequence of words that you can + /// format yourself. + /// + /// [`generate_with_rng`]: struct.MarkovChain.html#method.generate_with_rng + /// [`iter_with_rng_from`]: struct.MarkovChain.html#method.iter_with_rng_from + pub fn generate_with_rng_from(&self, rng: R, n: usize, from: Bigram<'a>) -> String { + join_words(self.iter_with_rng_from(rng, from).take(n)) } /// Generate a sentence with `n` words of lorem ipsum text. The @@ -227,36 +246,43 @@ impl<'a, R: Rng> MarkovChain<'a, R> { /// /// [`generate`]: struct.MarkovChain.html#method.generate /// [`iter_from`]: struct.MarkovChain.html#method.iter_from - pub fn generate_from(&mut self, n: usize, from: Bigram<'a>) -> String { - join_words(self.iter_from(from).take(n)) + pub fn generate_from(&self, n: usize, from: Bigram<'a>) -> String { + self.generate_with_rng_from(thread_rng(), n, from) } /// Make a never-ending iterator over the words in the Markov /// chain. The iterator starts at a random point in the chain. - pub fn iter(&mut self) -> Words<'_, R> { - let state = if self.is_empty() { + pub fn iter_with_rng(&self, mut rng: R) -> Words<'_, R> { + let initial_bigram = if self.is_empty() { ("", "") } else { - *self.keys.choose(&mut self.rng).unwrap() + *self.keys.choose(&mut rng).unwrap() }; - Words { - map: &self.map, - rng: &mut self.rng, - keys: &self.keys, - state: state, - } + self.iter_with_rng_from(rng, initial_bigram) + } + + /// Make a never-ending iterator over the words in the Markov + /// chain. The iterator starts at a random point in the chain. + pub fn iter(&self) -> Words<'_, ThreadRng> { + self.iter_with_rng(thread_rng()) } /// Make a never-ending iterator over the words in the Markov /// chain. The iterator starts at the given bigram. - pub fn iter_from(&mut self, from: Bigram<'a>) -> Words<'_, R> { + pub fn iter_with_rng_from(&self, rng: R, from: Bigram<'a>) -> Words<'_, R> { Words { map: &self.map, - rng: &mut self.rng, + rng, keys: &self.keys, state: from, } } + + /// Make a never-ending iterator over the words in the Markov + /// chain. The iterator starts at the given bigram. + pub fn iter_from(&self, from: Bigram<'a>) -> Words<'_, ThreadRng> { + self.iter_with_rng_from(thread_rng(), from) + } } /// Never-ending iterator over words in the Markov chain. @@ -267,7 +293,7 @@ impl<'a, R: Rng> MarkovChain<'a, R> { /// [`iter_from`]: struct.MarkovChain.html#method.iter_from pub struct Words<'a, R: Rng> { map: &'a HashMap, Vec<&'a str>>, - rng: &'a mut R, + rng: R, keys: &'a Vec>, state: Bigram<'a>, } @@ -283,10 +309,10 @@ impl<'a, R: Rng> Iterator for Words<'a, R> { let result = Some(self.state.0); while !self.map.contains_key(&self.state) { - self.state = *self.keys.choose(self.rng).unwrap(); + self.state = *self.keys.choose(&mut self.rng).unwrap(); } let next_words = &self.map[&self.state]; - let next = next_words.choose(self.rng).unwrap(); + let next = next_words.choose(&mut self.rng).unwrap(); self.state = (self.state.1, next); result } @@ -360,13 +386,13 @@ pub const LIBER_PRIMUS: &'static str = include_str!("liber-primus.txt"); thread_local! { // Markov chain generating lorem ipsum text. - static LOREM_IPSUM_CHAIN: RefCell> = { + static LOREM_IPSUM_CHAIN: MarkovChain<'static> = { let mut chain = MarkovChain::new(); // The cost of learning increases as more and more text is // added, so we start with the smallest text. chain.learn(LOREM_IPSUM); chain.learn(LIBER_PRIMUS); - RefCell::new(chain) + chain } } @@ -388,10 +414,7 @@ thread_local! { /// [`LOREM_IPSUM`]: constant.LOREM_IPSUM.html /// [`lipsum_words`]: fn.lipsum_words.html pub fn lipsum(n: usize) -> String { - LOREM_IPSUM_CHAIN.with(|cell| { - let mut chain = cell.borrow_mut(); - chain.generate_from(n, ("Lorem", "ipsum")) - }) + LOREM_IPSUM_CHAIN.with(|chain| chain.generate_from(n, ("Lorem", "ipsum"))) } /// Generate `n` random words of lorem ipsum text. @@ -411,10 +434,7 @@ pub fn lipsum(n: usize) -> String { /// /// [`LOREM_IPSUM`]: constant.LOREM_IPSUM.html pub fn lipsum_words(n: usize) -> String { - LOREM_IPSUM_CHAIN.with(|cell| { - let mut chain = cell.borrow_mut(); - chain.generate(n) - }) + LOREM_IPSUM_CHAIN.with(|chain| chain.generate(n)) } /// Generate `n` random words of lorem ipsum text. The seed is used to @@ -434,10 +454,7 @@ pub fn lipsum_words(n: usize) -> String { /// [`lipsum_words`]: fn.lipsum_words.html pub fn lipsum_words_from_seed(n: usize, seed: u64) -> String { let rng = ChaCha20Rng::seed_from_u64(seed); - let mut chain = MarkovChain::new_with_rng(rng); - chain.learn(LOREM_IPSUM); - chain.learn(LIBER_PRIMUS); - chain.generate(n) + LOREM_IPSUM_CHAIN.with(|chain| chain.generate_with_rng(rng, n)) } /// Minimum number of words to include in a title. @@ -466,9 +483,8 @@ const TITLE_SMALL_WORD: usize = 3; /// which should be suitable for use in a document title for section /// heading. pub fn lipsum_title() -> String { - LOREM_IPSUM_CHAIN.with(|cell| { - let n = rand::thread_rng().gen_range(TITLE_MIN_WORDS..TITLE_MAX_WORDS); - let mut chain = cell.borrow_mut(); + LOREM_IPSUM_CHAIN.with(|chain| { + let n = thread_rng().gen_range(TITLE_MIN_WORDS..TITLE_MAX_WORDS); // The average word length with our corpus is 7.6 bytes so // this capacity will avoid most allocations. let mut title = String::with_capacity(8 * n); @@ -549,7 +565,7 @@ mod tests { #[test] fn empty_chain() { - let mut chain = MarkovChain::new(); + let chain = MarkovChain::new(); assert_eq!(chain.generate(10), ""); } @@ -598,11 +614,14 @@ mod tests { #[test] fn new_with_rng() { let rng = ChaCha20Rng::seed_from_u64(1234); - let mut chain = MarkovChain::new_with_rng(rng); + let mut chain = MarkovChain::new(); chain.learn("foo bar x y z"); chain.learn("foo bar a b c"); - assert_eq!(chain.generate(15), "A b bar a b a b bar a b x y b y x."); + assert_eq!( + chain.generate_with_rng(rng, 15), + "A b bar a b a b bar a b x y b y x." + ); } #[test]