@@ -479,31 +479,62 @@ pub trait Hasher {
479479 ///
480480 /// # Note to Implementers
481481 ///
482- /// The default implementation of this method includes a call to
483- /// [`Self::write_length_prefix`], so if your implementation of `Hasher`
484- /// doesn't care about prefix-freedom and you've thus overridden
485- /// that method to do nothing, there's no need to override this one.
486- ///
487- /// This method is available to be overridden separately from the others
488- /// as `str` being UTF-8 means that it never contains `0xFF` bytes, which
489- /// can be used to provide prefix-freedom cheaper than hashing a length.
490- ///
491- /// For example, if your `Hasher` works byte-by-byte (perhaps by accumulating
492- /// them into a buffer), then you can hash the bytes of the `str` followed
493- /// by a single `0xFF` byte.
494- ///
495- /// If your `Hasher` works in chunks, you can also do this by being careful
496- /// about how you pad partial chunks. If the chunks are padded with `0x00`
497- /// bytes then just hashing an extra `0xFF` byte doesn't necessarily
498- /// provide prefix-freedom, as `"ab"` and `"ab\u{0}"` would likely hash
499- /// the same sequence of chunks. But if you pad with `0xFF` bytes instead,
500- /// ensuring at least one padding byte, then it can often provide
501- /// prefix-freedom cheaper than hashing the length would.
482+ /// There are at least two reasonable default ways to implement this.
483+ /// Which one will be the default is not yet decided, so for now
484+ /// you probably want to override it specifically.
485+ ///
486+ /// ## The general answer
487+ ///
488+ /// It's always correct to implement this with a length prefix:
489+ ///
490+ /// ```
491+ /// # #![feature(hasher_prefixfree_extras)]
492+ /// # struct Foo;
493+ /// # impl std::hash::Hasher for Foo {
494+ /// # fn finish(&self) -> u64 { unimplemented!() }
495+ /// # fn write(&mut self, _bytes: &[u8]) { unimplemented!() }
496+ /// fn write_str(&mut self, s: &str) {
497+ /// self.write_length_prefix(s.len());
498+ /// self.write(s.as_bytes());
499+ /// }
500+ /// # }
501+ /// ```
502+ ///
503+ /// And, if your `Hasher` works in `usize` chunks, this is likely a very
504+ /// efficient way to do it, as anything more complicated may well end up
505+ /// slower than just running the round with the length.
506+ ///
507+ /// ## If your `Hasher` works byte-wise
508+ ///
509+ /// One nice thing about `str` being UTF-8 is that the `b'\xFF'` byte
510+ /// never happens. That means that you can append that to the byte stream
511+ /// being hashed and maintain prefix-freedom:
512+ ///
513+ /// ```
514+ /// # #![feature(hasher_prefixfree_extras)]
515+ /// # struct Foo;
516+ /// # impl std::hash::Hasher for Foo {
517+ /// # fn finish(&self) -> u64 { unimplemented!() }
518+ /// # fn write(&mut self, _bytes: &[u8]) { unimplemented!() }
519+ /// fn write_str(&mut self, s: &str) {
520+ /// self.write(s.as_bytes());
521+ /// self.write_u8(0xff);
522+ /// }
523+ /// # }
524+ /// ```
525+ ///
526+ /// This does require that your implementation not add extra padding, and
527+ /// thus generally requires that you maintain a buffer, running a round
528+ /// only once that buffer is full (or `finish` is called).
529+ ///
530+ /// That's because if `write` pads data out to a fixed chunk size, it's
531+ /// likely that it does it in such a way that `"a"` and `"a\x00"` would
532+ /// end up hashing the same sequence of things, introducing conflicts.
502533 #[ inline]
503534 #[ unstable( feature = "hasher_prefixfree_extras" , issue = "96762" ) ]
504535 fn write_str ( & mut self , s : & str ) {
505- self . write_length_prefix ( s. len ( ) ) ;
506536 self . write ( s. as_bytes ( ) ) ;
537+ self . write_u8 ( 0xff ) ;
507538 }
508539}
509540
0 commit comments