Skip to content

Commit e62da2b

Browse files
authored
CSHARP-5603: Add Big Endian support in BinaryVectorReader and BinaryVectorWriter (#1682)
1 parent 27fd9e5 commit e62da2b

File tree

5 files changed

+229
-25
lines changed

5 files changed

+229
-25
lines changed

src/MongoDB.Bson/IO/BinaryPrimitivesCompat.cs

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
using System;
1717
using System.Buffers.Binary;
18+
using System.Runtime.InteropServices;
1819

1920
namespace MongoDB.Bson.IO
2021
{
@@ -31,5 +32,55 @@ public static void WriteDoubleLittleEndian(Span<byte> destination, double value)
3132
{
3233
BinaryPrimitives.WriteInt64LittleEndian(destination, BitConverter.DoubleToInt64Bits(value));
3334
}
35+
36+
public static float ReadSingleLittleEndian(ReadOnlySpan<byte> source)
37+
{
38+
if (source.Length < 4)
39+
{
40+
throw new ArgumentOutOfRangeException(nameof(source.Length), "Source span is too small to contain a float.");
41+
}
42+
43+
#if NET6_0_OR_GREATER
44+
return BinaryPrimitives.ReadSingleLittleEndian(source);
45+
#else
46+
// Constructs a 32-bit float from 4 Little Endian bytes in a platform-agnostic way.
47+
// Ensures correct bit pattern regardless of system endianness.
48+
int intValue =
49+
source[0] |
50+
(source[1] << 8) |
51+
(source[2] << 16) |
52+
(source[3] << 24);
53+
54+
// This struct emulates BitConverter.Int32BitsToSingle for platforms like net472.
55+
return new FloatIntUnion { IntValue = intValue }.FloatValue;
56+
#endif
57+
}
58+
59+
public static void WriteSingleLittleEndian(Span<byte> destination, float value)
60+
{
61+
if (destination.Length < 4)
62+
{
63+
throw new ArgumentOutOfRangeException(nameof(destination.Length), "Destination span is too small to hold a float.");
64+
}
65+
66+
#if NET6_0_OR_GREATER
67+
BinaryPrimitives.WriteSingleLittleEndian(destination, value);
68+
#else
69+
// This struct emulates BitConverter.SingleToInt32Bits for platforms like net472.
70+
int intValue = new FloatIntUnion { FloatValue = value }.IntValue;
71+
72+
destination[0] = (byte)(intValue);
73+
destination[1] = (byte)(intValue >> 8);
74+
destination[2] = (byte)(intValue >> 16);
75+
destination[3] = (byte)(intValue >> 24);
76+
#endif
77+
}
78+
79+
[StructLayout(LayoutKind.Explicit)]
80+
private struct FloatIntUnion
81+
{
82+
[FieldOffset(0)] public float FloatValue;
83+
[FieldOffset(0)] public int IntValue;
84+
}
3485
}
3586
}

src/MongoDB.Bson/Serialization/BinaryVectorReader.cs

Lines changed: 27 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
using System.Collections.Generic;
1818
using System.Linq;
1919
using System.Runtime.InteropServices;
20+
using MongoDB.Bson.IO;
2021

2122
namespace MongoDB.Bson.Serialization
2223
{
@@ -41,21 +42,8 @@ public static (TItem[] Items, byte Padding, BinaryVectorDataType VectorDataType)
4142
switch (vectorDataType)
4243
{
4344
case BinaryVectorDataType.Float32:
44-
45-
if ((vectorDataBytes.Span.Length & 3) != 0)
46-
{
47-
throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes.");
48-
}
49-
50-
if (BitConverter.IsLittleEndian)
51-
{
52-
var singles = MemoryMarshal.Cast<byte, float>(vectorDataBytes.Span);
53-
items = (TItem[])(object)singles.ToArray();
54-
}
55-
else
56-
{
57-
throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet.");
58-
}
45+
var floatArray = ReadSinglesArrayLittleEndian(vectorDataBytes.Span);
46+
items = (TItem[])(object)floatArray;
5947
break;
6048
case BinaryVectorDataType.Int8:
6149
var itemsSpan = MemoryMarshal.Cast<byte, TItem>(vectorDataBytes.Span);
@@ -123,6 +111,30 @@ TExpectedItem[] AsTypedArrayOrThrow<TExpectedItem>()
123111
return result;
124112
}
125113
}
114+
115+
private static float[] ReadSinglesArrayLittleEndian(ReadOnlySpan<byte> span)
116+
{
117+
if ((span.Length & 3) != 0)
118+
{
119+
throw new FormatException("Data length of binary vector of type Float32 must be a multiple of 4 bytes.");
120+
}
121+
122+
float[] result;
123+
if (BitConverter.IsLittleEndian)
124+
{
125+
result = MemoryMarshal.Cast<byte, float>(span).ToArray();
126+
}
127+
else
128+
{
129+
var count = span.Length / 4;
130+
result = new float[count];
131+
for (int i = 0; i < count; i++)
132+
{
133+
result[i] = BinaryPrimitivesCompat.ReadSingleLittleEndian(span.Slice(i * 4, 4));
134+
}
135+
}
136+
return result;
137+
}
126138

127139
public static void ValidateItemType<TItem>(BinaryVectorDataType binaryVectorDataType)
128140
{

src/MongoDB.Bson/Serialization/BinaryVectorWriter.cs

Lines changed: 31 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515

1616
using System;
1717
using System.Runtime.InteropServices;
18+
using MongoDB.Bson.IO;
1819

1920
namespace MongoDB.Bson.Serialization
2021
{
@@ -35,15 +36,39 @@ public static byte[] WriteToBytes<TItem>(BinaryVector<TItem> binaryVector)
3536
public static byte[] WriteToBytes<TItem>(ReadOnlySpan<TItem> vectorData, BinaryVectorDataType binaryVectorDataType, byte padding)
3637
where TItem : struct
3738
{
38-
if (!BitConverter.IsLittleEndian)
39+
switch (binaryVectorDataType)
3940
{
40-
throw new NotSupportedException("Binary vector data is not supported on Big Endian architecture yet.");
41-
}
41+
case BinaryVectorDataType.Float32:
42+
var length = vectorData.Length * 4;
43+
var result = new byte[2 + length];
44+
result[0] = (byte)binaryVectorDataType;
45+
result[1] = padding;
46+
47+
var floatSpan = MemoryMarshal.Cast<TItem, float>(vectorData);
48+
var floatOutput = result.AsSpan(2);
49+
50+
if (BitConverter.IsLittleEndian)
51+
{
52+
MemoryMarshal.Cast<float, byte>(floatSpan).CopyTo(floatOutput);
53+
}
54+
else
55+
{
56+
for (int i = 0; i < floatSpan.Length; i++)
57+
{
58+
BinaryPrimitivesCompat.WriteSingleLittleEndian(floatOutput.Slice(i * 4, 4), floatSpan[i]);
59+
}
60+
}
4261

43-
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData);
44-
byte[] result = [(byte)binaryVectorDataType, padding, .. vectorDataBytes];
62+
return result;
4563

46-
return result;
64+
case BinaryVectorDataType.Int8:
65+
case BinaryVectorDataType.PackedBit:
66+
var vectorDataBytes = MemoryMarshal.Cast<TItem, byte>(vectorData);
67+
return [(byte)binaryVectorDataType, padding, .. vectorDataBytes];
68+
69+
default:
70+
throw new NotSupportedException($"Binary vector serialization is not supported for {binaryVectorDataType}.");
71+
}
4772
}
4873
}
4974
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
/* Copyright 2010-present MongoDB Inc.
2+
*
3+
* Licensed under the Apache License, Version 2.0 (the "License");
4+
* you may not use this file except in compliance with the License.
5+
* You may obtain a copy of the License at
6+
*
7+
* http://www.apache.org/licenses/LICENSE-2.0
8+
*
9+
* Unless required by applicable law or agreed to in writing, software
10+
* distributed under the License is distributed on an "AS IS" BASIS,
11+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
* See the License for the specific language governing permissions and
13+
* limitations under the License.
14+
*/
15+
16+
using System;
17+
using Xunit;
18+
using FluentAssertions;
19+
using MongoDB.Bson.IO;
20+
21+
namespace MongoDB.Bson.Tests.IO
22+
{
23+
public class BinaryPrimitivesCompatTests
24+
{
25+
[Fact]
26+
public void ReadSingleLittleEndian_should_read_correctly()
27+
{
28+
var bytes = new byte[] { 0x00, 0x00, 0x80, 0x3F }; // 1.0f in little endian
29+
var result = BinaryPrimitivesCompat.ReadSingleLittleEndian(bytes);
30+
result.Should().Be(1.0f);
31+
}
32+
33+
[Fact]
34+
public void ReadSingleLittleEndian_should_throw_on_insufficient_length()
35+
{
36+
var shortBuffer = new byte[3];
37+
var exception = Record.Exception(() =>
38+
BinaryPrimitivesCompat.ReadSingleLittleEndian(shortBuffer));
39+
40+
var e = exception.Should().BeOfType<ArgumentOutOfRangeException>().Subject;
41+
e.ParamName.Should().Be("Length");
42+
}
43+
44+
[Fact]
45+
public void WriteSingleLittleEndian_should_throw_on_insufficient_length()
46+
{
47+
var shortBuffer = new byte[3];
48+
var exception = Record.Exception(() =>
49+
BinaryPrimitivesCompat.WriteSingleLittleEndian(shortBuffer, 1.23f));
50+
51+
var e = exception.Should().BeOfType<ArgumentOutOfRangeException>().Subject;
52+
e.ParamName.Should().Be("Length");
53+
}
54+
55+
[Fact]
56+
public void WriteSingleLittleEndian_should_write_correctly()
57+
{
58+
Span<byte> buffer = new byte[4];
59+
BinaryPrimitivesCompat.WriteSingleLittleEndian(buffer, 1.0f);
60+
buffer.ToArray().Should().Equal(0x00, 0x00, 0x80, 0x3F); // 1.0f little-endian
61+
}
62+
63+
[Theory]
64+
[InlineData(0f)]
65+
[InlineData(1.0f)]
66+
[InlineData(-1.5f)]
67+
[InlineData(float.MaxValue)]
68+
[InlineData(float.MinValue)]
69+
[InlineData(float.NaN)]
70+
[InlineData(float.PositiveInfinity)]
71+
[InlineData(float.NegativeInfinity)]
72+
public void WriteAndReadSingleLittleEndian_should_roundtrip_correctly(float value)
73+
{
74+
Span<byte> buffer = new byte[4];
75+
76+
BinaryPrimitivesCompat.WriteSingleLittleEndian(buffer, value);
77+
float result = BinaryPrimitivesCompat.ReadSingleLittleEndian(buffer);
78+
79+
if (float.IsNaN(value))
80+
{
81+
Assert.True(float.IsNaN(result));
82+
}
83+
else
84+
{
85+
Assert.Equal(value, result);
86+
}
87+
}
88+
}
89+
}

tests/MongoDB.Bson.Tests/Serialization/Serializers/BinaryVectorSerializerTests.cs

Lines changed: 31 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -365,10 +365,16 @@ private BsonBinaryData SerializeToBinaryData<TCollection>(TCollection collection
365365
private static (T[], byte[] VectorBson) GetTestData<T>(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding)
366366
where T : struct
367367
{
368-
var elementsSpan = new ReadOnlySpan<T>(Enumerable.Range(0, elementsCount).Select(i => Convert.ChangeType(i, typeof(T)).As<T>()).ToArray());
369-
byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. MemoryMarshal.Cast<T, byte>(elementsSpan)];
370-
371-
return (elementsSpan.ToArray(), vectorBsonData);
368+
var elementsSpan = new ReadOnlySpan<T>(
369+
Enumerable.Range(0, elementsCount)
370+
.Select(i => Convert.ChangeType(i, typeof(T)).As<T>())
371+
.ToArray());
372+
var elementsBytesLittleEndian = BitConverter.IsLittleEndian
373+
? MemoryMarshal.Cast<T, byte>(elementsSpan)
374+
: BigEndianToLittleEndian(elementsSpan, dataType);
375+
376+
byte[] vectorBsonData = [(byte)dataType, bitsPadding, .. elementsBytesLittleEndian];
377+
return (elementsSpan.ToArray(), vectorBsonData);
372378
}
373379

374380
private static (BinaryVector<T>, byte[] VectorBson) GetTestDataBinaryVector<T>(BinaryVectorDataType dataType, int elementsCount, byte bitsPadding)
@@ -409,6 +415,27 @@ private static IBsonSerializer CreateBinaryVectorSerializer<T>(BinaryVectorDataT
409415
return serializer;
410416
}
411417

418+
private static byte[] BigEndianToLittleEndian<T>(ReadOnlySpan<T> span, BinaryVectorDataType dataType) where T : struct
419+
{
420+
// Types that do NOT need conversion safe on BE
421+
if (dataType == BinaryVectorDataType.Int8 || dataType == BinaryVectorDataType.PackedBit)
422+
{
423+
return MemoryMarshal.Cast<T, byte>(span).ToArray();
424+
}
425+
426+
var elementSize = Marshal.SizeOf<T>();
427+
byte[] result = new byte[span.Length * elementSize];
428+
429+
for (int i = 0; i < span.Length; i++)
430+
{
431+
byte[] bytes = BitConverter.GetBytes((dynamic)span[i]);
432+
Array.Reverse(bytes); // Ensure LE order
433+
Buffer.BlockCopy(bytes, 0, result, i * elementSize, elementSize);
434+
}
435+
436+
return result;
437+
}
438+
412439
public class BinaryVectorNoAttributeHolder
413440
{
414441
public BinaryVectorInt8 ValuesInt8 { get; set; }

0 commit comments

Comments
 (0)