Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: use varint to encode string/bytes length #1060

Merged
merged 4 commits into from
Jul 11, 2023
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 20 additions & 15 deletions common_types/src/row/contiguous.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ use std::{
str,
};

use arrow::datatypes::ToByteSlice;
use prost::encoding::{decode_varint, encode_varint, encoded_len_varint};
use snafu::{ensure, Backtrace, Snafu};

use crate::{
Expand Down Expand Up @@ -98,10 +100,6 @@ impl Encoding {
const fn size_of_num_bits() -> usize {
mem::size_of::<u32>()
}

const fn size_of_var_len() -> usize {
mem::size_of::<u32>()
}
}

pub enum ContiguousRowReader<'a, T> {
Expand Down Expand Up @@ -352,10 +350,11 @@ impl<'a, T: RowBuffer + 'a> ContiguousRowWriter<'a, T> {
let value_buf = (*next_string_offset as u32).to_ne_bytes();
Self::write_slice_to_offset(inner, offset, &value_buf);

// Encode length of string as a u32.
// Encode length of string as a varint.
ensure!(v.len() <= MAX_STRING_LEN, StringTooLong { len: v.len() });
let string_len = v.len() as u32;
Self::write_slice_to_offset(inner, next_string_offset, &string_len.to_ne_bytes());
let mut buf = Vec::new();
encode_varint(v.len() as u64, &mut buf);
Self::write_slice_to_offset(inner, next_string_offset, buf.to_byte_slice());
Self::write_slice_to_offset(inner, next_string_offset, v);
}
Datum::String(v) => {
Expand All @@ -369,9 +368,12 @@ impl<'a, T: RowBuffer + 'a> ContiguousRowWriter<'a, T> {
// Encode the string offset as a u32.
let value_buf = (*next_string_offset as u32).to_ne_bytes();
Self::write_slice_to_offset(inner, offset, &value_buf);

// Encode length of string as a varint.
ensure!(v.len() <= MAX_STRING_LEN, StringTooLong { len: v.len() });
let bytes_len = v.len() as u32;
Self::write_slice_to_offset(inner, next_string_offset, &bytes_len.to_ne_bytes());
let mut buf = Vec::new();
encode_varint(v.len() as u64, &mut buf);
ShiKaiWi marked this conversation as resolved.
Show resolved Hide resolved
Self::write_slice_to_offset(inner, next_string_offset, buf.to_byte_slice());
Self::write_slice_to_offset(inner, next_string_offset, v.as_bytes());
}
Datum::UInt64(v) => {
Expand Down Expand Up @@ -467,7 +469,8 @@ impl<'a, T: RowBuffer + 'a> ContiguousRowWriter<'a, T> {

if !datum.is_fixed_sized() {
// For the datum content and the length of it
let size = datum.size() + Encoding::size_of_offset();
let len = datum.size();
let size = len + encoded_len_varint(len as u64);
num_bytes_of_variable_col += size;
encoded_len += size;
}
Expand Down Expand Up @@ -526,7 +529,8 @@ impl<'a, T: RowBuffer + 'a> ContiguousRowWriter<'a, T> {
let datum = &row[writer_index];
if !datum.is_fixed_sized() {
// For the datum content and the length of it
encoded_len += Encoding::size_of_var_len() + datum.size();
let len = datum.size();
encoded_len += encoded_len_varint(len as u64) + len;
}
}
}
Expand Down Expand Up @@ -690,12 +694,13 @@ fn must_read_bytes<'a>(datum_buf: &'a [u8], string_buf: &'a [u8]) -> &'a [u8] {
// Read offset of string in string buf.
let value_buf = datum_buf[..mem::size_of::<Offset>()].try_into().unwrap();
let offset = Offset::from_ne_bytes(value_buf) as usize;
let string_buf = &string_buf[offset..];
let mut string_buf = &string_buf[offset..];

// Read len of the string.
let len_buf = string_buf[..mem::size_of::<u32>()].try_into().unwrap();
let string_len = u32::from_ne_bytes(len_buf) as usize;
let string_buf = &string_buf[mem::size_of::<u32>()..];
let string_len = match decode_varint(&mut string_buf) {
Ok(len) => len as usize,
Err(_) => panic!("failed to decode varint"),
ShiKaiWi marked this conversation as resolved.
Show resolved Hide resolved
};

// Read string.
&string_buf[..string_len]
Expand Down