Skip to content

RUST-2149 Sync additional vector tests #514

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions src/tests/spec/json/bson-binary-vector/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ Each JSON file contains three top-level keys.

- `description`: string describing the test.
- `valid`: boolean indicating if the vector, dtype, and padding should be considered a valid input.
- `vector`: list of numbers
- `vector`: (required if valid is true) list of numbers
- `dtype_hex`: string defining the data type in hex (e.g. "0x10", "0x27")
- `dtype_alias`: (optional) string defining the data dtype, perhaps as Enum.
- `padding`: (optional) integer for byte padding. Defaults to 0.
Expand All @@ -50,7 +50,10 @@ MUST assert that the input float array is the same after encoding and decoding.

#### To prove correct in an invalid case (`valid:false`), one MUST

- raise an exception when attempting to encode a document from the numeric values, dtype, and padding.
- if the vector field is present, raise an exception when attempting to encode a document from the numeric values,
dtype, and padding.
- if the canonical_bson field is present, raise an exception when attempting to deserialize it into the corresponding
numeric values, as the field contains corrupted data.

## FAQ

Expand Down
18 changes: 16 additions & 2 deletions src/tests/spec/json/bson-binary-vector/float32.json
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,22 @@
"vector": [127.0, 7.0],
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"padding": 3
"padding": 3,
"canonical_bson": "1C00000005766563746F72000A0000000927030000FE420000E04000"
},
{
"description": "Insufficient vector data with 3 bytes FLOAT32",
"valid": false,
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"canonical_bson": "1700000005766563746F7200050000000927002A2A2A00"
},
{
"description": "Insufficient vector data with 5 bytes FLOAT32",
"valid": false,
"dtype_hex": "0x27",
"dtype_alias": "FLOAT32",
"canonical_bson": "1900000005766563746F7200070000000927002A2A2A2A2A00"
}
]
}

4 changes: 2 additions & 2 deletions src/tests/spec/json/bson-binary-vector/int8.json
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
"vector": [127, 7],
"dtype_hex": "0x03",
"dtype_alias": "INT8",
"padding": 3
"padding": 3,
"canonical_bson": "1600000005766563746F7200040000000903037F0700"
},
{
"description": "INT8 with float inputs",
Expand All @@ -54,4 +55,3 @@
}
]
}

23 changes: 4 additions & 19 deletions src/tests/spec/json/bson-binary-vector/packed_bit.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"vector": [],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 1
"padding": 1,
"canonical_bson": "1400000005766563746F72000200000009100100"
},
{
"description": "Simple Vector PACKED_BIT",
Expand Down Expand Up @@ -61,21 +62,14 @@
"dtype_alias": "PACKED_BIT",
"padding": 0
},
{
"description": "Padding specified with no vector data PACKED_BIT",
"valid": false,
"vector": [],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 1
},
{
"description": "Exceeding maximum padding PACKED_BIT",
"valid": false,
"vector": [1],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 8
"padding": 8,
"canonical_bson": "1500000005766563746F7200030000000910080100"
},
{
"description": "Negative padding PACKED_BIT",
Expand All @@ -84,15 +78,6 @@
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": -1
},
{
"description": "Vector with float values PACKED_BIT",
"valid": false,
"vector": [127.5],
"dtype_hex": "0x10",
"dtype_alias": "PACKED_BIT",
"padding": 0
}
]
}

77 changes: 47 additions & 30 deletions src/tests/spec/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ struct TestFile {
struct Test {
description: String,
valid: bool,
vector: Vec<Number>,
vector: Option<Vec<Number>>,
#[serde(
rename = "dtype_hex",
deserialize_with = "deserialize_u8_from_hex_string"
Expand Down Expand Up @@ -131,51 +131,68 @@ fn vector_from_numbers(
}
}

// Only return the binary if it represents a valid vector; otherwise, return an error.
fn binary_from_bytes(bson: &str, test_key: &str, description: &str) -> Result<Binary, String> {
let bytes = hex::decode(bson).expect(description);
let mut test_document = Document::from_reader(bytes.as_slice()).expect(description);
let bson = test_document.remove(test_key).expect(description);
let binary = match bson {
Bson::Binary(binary) => binary,
other => panic!("{}: expected binary, got {}", description, other),
};
if let Err(error) = Vector::try_from(&binary) {
Err(error.to_string())
} else {
Ok(binary)
}
}

fn run_test_file(test_file: TestFile) {
for test in test_file.tests {
let description = format!("{} ({})", test.description, test_file.description);

let test_vector = match (
vector_from_numbers(test.vector, test.d_type, test.padding),
test.valid,
) {
(Ok(vector), true) => vector,
(Err(_), false) => return,
(Ok(vector), false) => panic!(
"{}: valid was false but successfully constructed vector {:?}",
description, vector
),
(Err(error), true) => panic!(
"{}: valid was true but vector construction failed with error {}",
description, error
),
let test_vector = match test.vector {
Some(vector) => match vector_from_numbers(vector, test.d_type, test.padding) {
Ok(vector) => {
assert!(test.valid, "{}", description);
Some(vector)
}
Err(error) => {
assert!(!test.valid, "{}: {}", description, error);
None
}
},
None => None,
};

let test_binary = match test.canonical_bson {
Some(bson) => match binary_from_bytes(&bson, &test_file.test_key, &description) {
Ok(vector) => {
assert!(test.valid, "{}", description);
Some(vector)
}
Err(error) => {
assert!(!test.valid, "{}: {}", description, error);
None
}
},
None => None,
};

let Some(canonical_bson) = test.canonical_bson else {
let (Some(test_vector), Some(test_binary)) = (test_vector, test_binary) else {
return;
};

let bytes = hex::decode(canonical_bson).expect(&description);
let mut test_document = Document::from_reader(bytes.as_slice()).expect(&description);
// Rename the field to match the name used in the struct below.
let vector = test_document
.remove(&test_file.test_key)
.expect(&description);
test_document.insert("vector", vector);
let bson = test_document.get("vector").expect(&description);
let test_binary = match bson {
Bson::Binary(binary) => binary,
other => panic!("{}: expected binary, got {}", description, other),
};
let test_document = doc! { "vector": &test_binary };

// TryFrom<Binary> for Vector
let parsed_vector = Vector::try_from(test_binary).expect(&description);
let parsed_vector = Vector::try_from(&test_binary).expect(&description);
assert_eq!(parsed_vector, test_vector);

// From<Vector> for Binary
let binary = Binary::from(&test_vector);
assert_eq!(binary.subtype, BinarySubtype::Vector);
assert_eq!(&binary, test_binary);
assert_eq!(binary, test_binary);

// From<Vector> for Bson
let document = doc! { "vector": &test_vector };
Expand Down