Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,15 @@ edition = "2018"
rust-version = "1.49.0"

[workspace]
members = [
".",
]
exclude = [
"fuzz",
"benches"
]
members = ["."]
exclude = ["fuzz", "benches"]

[features]
default = ["std"]
std = []

[dependencies]
bytes = "1"
fnv = "1.0.5"
itoa = "1"

[dev-dependencies]
Expand Down
38 changes: 31 additions & 7 deletions src/header/map.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3650,12 +3650,40 @@ fn probe_distance(mask: Size, hash: HashValue, current: usize) -> usize {
current.wrapping_sub(desired_pos(mask, hash)) & mask as usize
}

fn hash_elem_using<K>(danger: &Danger, k: &K) -> HashValue
#[inline]
// This is a hasher specially designed for single byte http header enum,
// it should not be used for general purpose hashing.
fn first_byte_hash<K>(k: &K) -> u64
where
K: Hash + ?Sized,
{
use fnv::FnvHasher;
struct FirstByteHasher {
hash: u64,
}

impl Hasher for FirstByteHasher {
#[inline]
fn finish(&self) -> u64 {
self.hash
}

#[inline]
fn write(&mut self, bytes: &[u8]) {
if let Some(&b) = bytes.first() {
self.hash = (b as u64) << 56;
}
}
}

let mut hasher = FirstByteHasher { hash: 0 };
k.hash(&mut hasher);
hasher.finish()
}

fn hash_elem_using<K>(danger: &Danger, k: &K) -> HashValue
where
K: Hash + ?Sized,
{
const MASK: u64 = (MAX_SIZE as u64) - 1;

let hash = match *danger {
Expand All @@ -3666,11 +3694,7 @@ where
h.finish()
}
// Fast hash
_ => {
let mut h = FnvHasher::default();
k.hash(&mut h);
h.finish()
}
_ => first_byte_hash(k),
};

HashValue((hash & MASK) as u16)
Expand Down
187 changes: 174 additions & 13 deletions src/header/name.rs
Original file line number Diff line number Diff line change
Expand Up @@ -989,6 +989,9 @@ standard_headers! {
(XXssProtection, X_XSS_PROTECTION, b"x-xss-protection");
}

/// The size of a machine word in bytes.
const WORD_SIZE: usize = std::mem::size_of::<usize>();

/// Valid header name characters
///
/// ```not_rust
Expand Down Expand Up @@ -1070,6 +1073,59 @@ const HEADER_CHARS_H2: [u8; 256] = [
0, 0, 0, 0, 0, 0 // 25x
];

pub(crate) struct WordRegister {
val: usize,
}

/// A helper structure for performing word-sized operations on bytes.
///
/// `WordRegister` wraps a `usize` to allow efficient checking and manipulation of bytes
/// within a machine word. This is typically used for optimizing string scanning or
/// parsing operations where checking 4 or 8 bytes at a time is faster than checking
/// them individually.
impl WordRegister {
/// Creates a new, zero‑initialised buffer.
#[inline]
pub const fn new() -> Self {
Self { val: 0 }
}

/// Returns `true` if any byte in the buffer is zero (null byte).
///
/// This method uses bitwise hacks (often referred to as the "Mycroft" or "Alan Mycroft"
/// algorithm) to determine if a zero byte exists within the word without branching
/// on individual bytes.
#[inline]
pub fn contains_zero(&self) -> bool {
let val = self.val;
const ONES: usize = usize::MAX / 0xFF;
const HIGHS: usize = ONES << 7;
((val.wrapping_sub(ONES)) & !val & HIGHS) != 0
}

/// Writes a single byte at the given byte index.
///
/// The index `idx` corresponds to the byte position within the `usize`, where `0`
/// is the most significant byte.
///
/// # Arguments
///
/// * `idx` - The byte index to write to (0-indexed).
/// * `byte` - The `u8` value to write.
#[inline]
pub fn set(&mut self, idx: usize, byte: u8) {
let shift = (core::mem::size_of::<usize>() - 1 - idx) * 8;
let mask = !(0xFFusize << shift);
self.val = (self.val & mask) | ((byte as usize) << shift);
}

/// Returns the raw `usize` value contained in the register.
#[inline]
pub const fn value(&self) -> usize {
self.val
}
}

fn parse_hdr<'a>(
data: &'a [u8],
b: &'a mut [MaybeUninit<u8>; SCRATCH_BUF_SIZE],
Expand All @@ -1079,20 +1135,49 @@ fn parse_hdr<'a>(
0 => Err(InvalidHeaderName::new()),
len @ 1..=SCRATCH_BUF_SIZE => {
// Read from data into the buffer - transforming using `table` as we go
data.iter()
.zip(b.iter_mut())
.for_each(|(index, out)| *out = MaybeUninit::new(table[*index as usize]));
let mut i = 0;

if WORD_SIZE >= 4 {
let mut register = WordRegister::new();
while i + WORD_SIZE <= len {
let chunk = &data[i..i + WORD_SIZE];

for (j, b) in chunk.iter().enumerate() {
let b = table[*b as usize];
register.set(j, b);
}

if register.contains_zero() {
// Found a zero byte, break to process remaining bytes one by one
return Err(InvalidHeaderName::new());
}

// Safety: We are writing initialized bytes (usize) into MaybeUninit<u8> array.
// This is valid because MaybeUninit<u8> has the same layout as u8, and we are
// writing a chunk of bytes.
unsafe {
let ptr = b.as_mut_ptr().add(i) as *mut [u8; WORD_SIZE];
std::ptr::write_unaligned(ptr, register.value().to_be_bytes());
}

i += WORD_SIZE;
}
}

// Process the remainder bytes
while i < len {
let v = table[data[i] as usize];
if v == 0 {
return Err(InvalidHeaderName::new());
}
b[i] = MaybeUninit::new(v);
i += 1;
}
// Safety: len bytes of b were just initialized.
let name: &'a [u8] = unsafe { slice_assume_init(&b[0..len]) };
match StandardHeader::from_bytes(name) {
Some(sh) => Ok(sh.into()),
None => {
if name.contains(&0) {
Err(InvalidHeaderName::new())
} else {
Ok(HdrName::custom(name, true))
}
}
None => Ok(HdrName::custom(name, true)),
}
}
SCRATCH_BUF_OVERFLOW..=super::MAX_HEADER_NAME_LEN => Ok(HdrName::custom(data, false)),
Expand Down Expand Up @@ -1123,10 +1208,41 @@ impl HeaderName {
let val = unsafe { ByteStr::from_utf8_unchecked(buf) };
Ok(Custom(val).into())
}
Repr::Custom(MaybeLower { buf, lower: false }) => {
Repr::Custom(MaybeLower {
mut buf,
lower: false,
}) => {
use bytes::BufMut;
let mut dst = BytesMut::with_capacity(buf.len());

if WORD_SIZE >= 4 {
let mut register = WordRegister::new();
while buf.len() >= WORD_SIZE {
let chunk = &buf[..WORD_SIZE];

for (i, b) in chunk.iter().enumerate() {
// HEADER_CHARS maps all bytes to valid single-byte UTF-8
let b = HEADER_CHARS[*b as usize];
register.set(i, b);
}

if register.contains_zero() {
return Err(InvalidHeaderName::new());
}

#[cfg(target_pointer_width = "64")]
{
dst.put_u64(register.value() as u64);
}
#[cfg(target_pointer_width = "32")]
{
dst.put_u32(register.value() as u32);
}

buf = &buf[WORD_SIZE..];
}
}
// process the reminder bytes
for b in buf.iter() {
// HEADER_CHARS maps all bytes to valid single-byte UTF-8
let b = HEADER_CHARS[*b as usize];
Expand Down Expand Up @@ -1178,7 +1294,27 @@ impl HeaderName {
Ok(Custom(val).into())
}
Repr::Custom(MaybeLower { buf, lower: false }) => {
for &b in buf.iter() {
let mut check_buf = buf;

if WORD_SIZE >= 4 {
let mut register = WordRegister::new();
while check_buf.len() >= WORD_SIZE {
let chunk = &check_buf[..WORD_SIZE];

for (i, b) in chunk.iter().enumerate() {
let b = HEADER_CHARS_H2[*b as usize];
register.set(i, b);
}

if register.contains_zero() {
return Err(InvalidHeaderName::new());
}

check_buf = &check_buf[WORD_SIZE..];
}
}

for &b in check_buf.iter() {
// HEADER_CHARS_H2 maps all bytes that are not valid single-byte
// UTF-8 to 0 so this check returns an error for invalid UTF-8.
if HEADER_CHARS_H2[b as usize] == 0 {
Expand Down Expand Up @@ -1570,8 +1706,33 @@ impl<'a> From<HdrName<'a>> for HeaderName {
} else {
use bytes::BufMut;
let mut dst = BytesMut::with_capacity(maybe_lower.buf.len());
let mut buf = maybe_lower.buf;

if WORD_SIZE >= 4 {
let mut register = WordRegister::new();
while buf.len() >= WORD_SIZE {
let chunk = &maybe_lower.buf[..WORD_SIZE];

for (i, b) in chunk.iter().enumerate() {
// HEADER_CHARS maps all bytes to valid single-byte UTF-8
let b = HEADER_CHARS[*b as usize];
register.set(i, b);
}

#[cfg(target_pointer_width = "64")]
{
dst.put_u64(register.value() as u64);
}
#[cfg(target_pointer_width = "32")]
{
dst.put_u32(register.value() as u32);
}

buf = &buf[WORD_SIZE..];
}
}

for b in maybe_lower.buf.iter() {
for b in buf.iter() {
// HEADER_CHARS maps each byte to a valid single-byte UTF-8
// codepoint.
dst.put_u8(HEADER_CHARS[*b as usize]);
Expand Down
9 changes: 4 additions & 5 deletions src/header/value.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use bytes::{Bytes, BytesMut};
use bytes::Bytes;

use std::convert::TryFrom;
use std::error::Error;
use std::fmt::Write;
use std::hash::{Hash, Hasher};
use std::str::FromStr;
use std::{cmp, fmt, str};
Expand Down Expand Up @@ -424,10 +423,10 @@ macro_rules! from_integers {
($($name:ident: $t:ident => $max_len:expr),*) => {$(
impl From<$t> for HeaderValue {
fn from(num: $t) -> HeaderValue {
let mut buf = BytesMut::with_capacity($max_len);
let _ = buf.write_str(::itoa::Buffer::new().format(num));
let mut buf = ::itoa::Buffer::new();
let s = buf.format(num);
HeaderValue {
inner: buf.freeze(),
inner: Bytes::copy_from_slice(s.as_bytes()),
is_sensitive: false,
}
}
Expand Down