Skip to content

Autogenerate gix-packetline-blocking/src #1340

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
**/generated-archives/*.tar.xz filter=lfs-disabled diff=lfs merge=lfs -text

# assure line feeds don't interfere with our working copy hash
# assure line feeds don't interfere with our working copy hash
**/tests/fixtures/**/*.sh text crlf=input eol=lf
/justfile text crlf=input eol=lf

# have GitHub treat the gix-packetline-blocking src copy as auto-generated
gix-packetline-blocking/src/ linguist-generated=true
32 changes: 31 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ jobs:
# Let's not fail CI for this, it will fail locally often enough, and a crate a little bigger
# than allows is no problem either if it comes to that.
just check-size || true

cargo-deny:
runs-on: ubuntu-latest
strategy:
Expand All @@ -193,6 +193,7 @@ jobs:
- uses: EmbarkStudios/cargo-deny-action@v1
with:
command: check ${{ matrix.checks }}

wasm:
name: WebAssembly
runs-on: ubuntu-latest
Expand All @@ -213,3 +214,32 @@ jobs:
name: crates with 'wasm' feature
- run: cd gix-pack && cargo build --all-features --target ${{ matrix.target }}
name: gix-pack with all features (including wasm)

check-packetline:
strategy:
fail-fast: false
matrix:
os:
- ubuntu-latest
# We consider this script read-only and its effect is the same everywhere.
# However, when changes are made to `etc/copy-packetline.sh`, re-enable the other platforms for testing.
# - macos-latest
# - windows-latest
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v4
- name: Check that working tree is initially clean
run: |
set -x
git status
git diff --exit-code
- name: Regenerate gix-packetline-blocking/src
run: etc/copy-packetline.sh
- name: Check that gix-packetline-blocking/src was already up to date
run: |
set -x
git status
git diff --exit-code
152 changes: 152 additions & 0 deletions etc/copy-packetline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
#!/bin/bash

set -euC -o pipefail

readonly input_dir='gix-packetline/src'
readonly output_parent_dir='gix-packetline-blocking'
readonly output_dir="$output_parent_dir/src"

function fail () {
printf '%s: error: %s\n' "$0" "$1" >&2
exit 1
}

function chdir_toplevel () {
local root_padded root

# Find the working tree's root. (Padding covers the trailing-newline case.)
root_padded="$(git rev-parse --show-toplevel && echo -n .)" ||
fail 'git-rev-parse failed to find top-level dir'
root="${root_padded%$'\n.'}"

cd -- "$root"
}

function merging () {
local git_dir_padded git_dir

# Find the .git directory. (Padding covers the trailing-newline case.)
git_dir_padded="$(git rev-parse --git-dir && echo -n .)" ||
fail 'git-rev-parse failed to find git dir'
git_dir="${git_dir_padded%$'\n.'}"

test -e "$git_dir/MERGE_HEAD"
}

function output_dir_status () {
git status --porcelain --ignored=traditional -- "$output_dir" ||
fail 'git-status failed'
}

function check_output_dir () {
if ! test -e "$output_dir"; then
# The destination does not exist on disk, so nothing will be lost. Proceed.
return
fi

if merging; then
# In a merge, it would be confusing to replace anything at the destination.
if output_dir_status | grep -q '^'; then
fail 'output location exists, and a merge is in progress'
fi
else
# We can lose data if anything of value at the destination is not in the
# index. (This includes unstaged deletions, for two reasons. We could lose
# track of which files had been deleted. More importantly, replacing a
# staged symlink or regular file with an unstaged directory is shown by
# git-status as only a deletion, even if the directory is non-empty.)
if output_dir_status | grep -q '^.[^ ]'; then
fail 'output location exists, with unstaged changes or ignored files'
fi
fi
}

function first_line_ends_crlf () {
# This is tricky to check portably. In Cygwin-like environments including
# MSYS2 and Git Bash, most text processing tools, including awk, sed, and
# grep, automatically ignore \r before \n. Some ignore \r everywhere. Some
# can be told to keep \r, but in non-portable ways that may affect other
# implementations. Bash ignores \r in some places even without "-o igncr",
# and ignores \r even more with it, including in all text from command
# substitution. Simple checks may be non-portable to other OSes. Fortunately,
# tools that treat input as binary data are exempt (even cat, but "-v" is
# non-portable, and unreliable in general because lines can end in "^M").
# This may be doable without od, by using tr more heavily, but it could be
# hard to avoid false positives with unexpected characters or \r without \n.

head -n 1 -- "$1" | # Get the longest prefix with no non-trailing \n byte.
od -An -ta | # Represent all bytes symbolically, without addresses.
tr -sd '\n' ' ' | # Scrunch into one line, so "cr nl" appears as such.
grep -q 'cr nl$' # Check if the result signifies a \r\n line ending.
}

function make_header () {
local input_file endline

input_file="$1"
endline="$2"

# shellcheck disable=SC2016 # The backticks are intentionally literal.
printf '// DO NOT EDIT - this is a copy of %s. Run `just copy-packetline` to update it.%s%s' \
"$input_file" "$endline" "$endline"
}

function copy_with_header () {
local input_file output_file endline

input_file="$1"
output_file="$2"

if first_line_ends_crlf "$input_file"; then
endline=$'\r\n'
else
endline=$'\n'
fi

make_header "$input_file" "$endline" | cat -- - "$input_file" >"$output_file"
}

function generate_one () {
local input_file output_file

input_file="$1"
output_file="$output_dir${input_file#"$input_dir"}"

if test -d "$input_file"; then
mkdir -p -- "$output_file"
elif test -L "$input_file"; then
# Cover this case separately, for more useful error messages.
fail "input file is symbolic link: $input_file"
elif ! test -f "$input_file"; then
# This covers less common kinds of files we can't or shouldn't process.
fail "input file neither regular file nor directory: $input_file"
elif [[ "$input_file" =~ \.rs$ ]]; then
copy_with_header "$input_file" "$output_file"
else
fail "input file not named as Rust source code: $input_file"
fi
}

function generate_all () {
local input_file

if ! test -d "$input_dir"; then
fail "no input directory: $input_dir"
fi
if ! test -d "$output_parent_dir"; then
fail "no output parent directory: $output_parent_dir"
fi
check_output_dir

rm -rf -- "$output_dir" # It may be a directory, symlink, or regular file.
if test -e "$output_dir"; then
fail 'unable to remove output location'
fi

find "$input_dir" -print0 | while IFS= read -r -d '' input_file; do
generate_one "$input_file"
done
}

chdir_toplevel
generate_all
1 change: 0 additions & 1 deletion gix-packetline-blocking/src

This file was deleted.

148 changes: 148 additions & 0 deletions gix-packetline-blocking/src/decode.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
// DO NOT EDIT - this is a copy of gix-packetline/src/decode.rs. Run `just copy-packetline` to update it.

use bstr::BString;

use crate::{PacketLineRef, DELIMITER_LINE, FLUSH_LINE, MAX_DATA_LEN, MAX_LINE_LEN, RESPONSE_END_LINE, U16_HEX_BYTES};

/// The error used in the [`decode`][mod@crate::decode] module
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("Failed to decode the first four hex bytes indicating the line length: {err}")]
HexDecode { err: String },
#[error("The data received claims to be larger than the maximum allowed size: got {length_in_bytes}, exceeds {MAX_DATA_LEN}")]
DataLengthLimitExceeded { length_in_bytes: usize },
#[error("Received an invalid empty line")]
DataIsEmpty,
#[error("Received an invalid line of length 3")]
InvalidLineLength,
#[error("{data:?} - consumed {bytes_consumed} bytes")]
Line { data: BString, bytes_consumed: usize },
#[error("Needing {bytes_needed} additional bytes to decode the line successfully")]
NotEnoughData { bytes_needed: usize },
}

///
#[allow(clippy::empty_docs)]
pub mod band {
/// The error used in [`PacketLineRef::decode_band()`][super::PacketLineRef::decode_band()].
#[derive(Debug, thiserror::Error)]
#[allow(missing_docs)]
pub enum Error {
#[error("attempt to decode a non-side channel line or input was malformed: {band_id}")]
InvalidSideBand { band_id: u8 },
#[error("attempt to decode a non-data line into a side-channel band")]
NonDataLine,
}
}

/// A utility return type to support incremental parsing of packet lines.
#[derive(Debug, Clone)]
pub enum Stream<'a> {
/// Indicate a single packet line was parsed completely
Complete {
/// The parsed packet line
line: PacketLineRef<'a>,
/// The amount of bytes consumed from input
bytes_consumed: usize,
},
/// A packet line could not yet be parsed due to missing bytes
Incomplete {
/// The amount of additional bytes needed for the parsing to complete
bytes_needed: usize,
},
}

/// The result of [`hex_prefix()`] indicating either a special packet line or the amount of wanted bytes
pub enum PacketLineOrWantedSize<'a> {
/// The special kind of packet line decoded from the hex prefix. It never contains actual data.
Line(PacketLineRef<'a>),
/// The amount of bytes indicated by the hex prefix of the packet line.
Wanted(u16),
}

/// Decode the `four_bytes` packet line prefix provided in hexadecimal form and check it for validity.
pub fn hex_prefix(four_bytes: &[u8]) -> Result<PacketLineOrWantedSize<'_>, Error> {
debug_assert_eq!(four_bytes.len(), 4, "need four hex bytes");
for (line_bytes, line_type) in &[
(FLUSH_LINE, PacketLineRef::Flush),
(DELIMITER_LINE, PacketLineRef::Delimiter),
(RESPONSE_END_LINE, PacketLineRef::ResponseEnd),
] {
if four_bytes == *line_bytes {
return Ok(PacketLineOrWantedSize::Line(*line_type));
}
}

let mut buf = [0u8; U16_HEX_BYTES / 2];
faster_hex::hex_decode(four_bytes, &mut buf).map_err(|err| Error::HexDecode { err: err.to_string() })?;
let wanted_bytes = u16::from_be_bytes(buf);

if wanted_bytes == 3 {
return Err(Error::InvalidLineLength);
}
if wanted_bytes == 4 {
return Err(Error::DataIsEmpty);
}
debug_assert!(
wanted_bytes as usize > U16_HEX_BYTES,
"by now there should be more wanted bytes than prefix bytes"
);
Ok(PacketLineOrWantedSize::Wanted(wanted_bytes - U16_HEX_BYTES as u16))
}

/// Obtain a `PacketLine` from `data` after assuring `data` is small enough to fit.
pub fn to_data_line(data: &[u8]) -> Result<PacketLineRef<'_>, Error> {
if data.len() > MAX_LINE_LEN {
return Err(Error::DataLengthLimitExceeded {
length_in_bytes: data.len(),
});
}

Ok(PacketLineRef::Data(data))
}

/// Decode `data` as packet line while reporting whether the data is complete or not using a [`Stream`].
pub fn streaming(data: &[u8]) -> Result<Stream<'_>, Error> {
let data_len = data.len();
if data_len < U16_HEX_BYTES {
return Ok(Stream::Incomplete {
bytes_needed: U16_HEX_BYTES - data_len,
});
}
let wanted_bytes = match hex_prefix(&data[..U16_HEX_BYTES])? {
PacketLineOrWantedSize::Wanted(s) => s as usize,
PacketLineOrWantedSize::Line(line) => {
return Ok(Stream::Complete {
line,
bytes_consumed: 4,
})
}
} + U16_HEX_BYTES;
if wanted_bytes > MAX_LINE_LEN {
return Err(Error::DataLengthLimitExceeded {
length_in_bytes: wanted_bytes,
});
}
if data_len < wanted_bytes {
return Ok(Stream::Incomplete {
bytes_needed: wanted_bytes - data_len,
});
}

Ok(Stream::Complete {
line: to_data_line(&data[U16_HEX_BYTES..wanted_bytes])?,
bytes_consumed: wanted_bytes,
})
}

/// Decode an entire packet line from data or fail.
///
/// Note that failure also happens if there is not enough data to parse a complete packet line, as opposed to [`streaming()`] decoding
/// succeeds in that case, stating how much more bytes are required.
pub fn all_at_once(data: &[u8]) -> Result<PacketLineRef<'_>, Error> {
match streaming(data)? {
Stream::Complete { line, .. } => Ok(line),
Stream::Incomplete { bytes_needed } => Err(Error::NotEnoughData { bytes_needed }),
}
}
Loading
Loading