Skip to content

Commit 5d8d3cf

Browse files
committed
add FindFirst/FindLast of VarintEncoder #19
1 parent 406f6dd commit 5d8d3cf

File tree

3 files changed

+119
-4
lines changed

3 files changed

+119
-4
lines changed

src/InformationRetrieval.Test/Utility/VarintEncoderTest.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,5 +23,38 @@ public void VarintEncoderSmokeTest()
2323
Assert.Equal(new ulong[] { 1023, 5, ulong.MaxValue, ulong.MinValue }, encoded);
2424
Assert.Equal(4, VarintEncoder.GetIntegerCount(data.AsSpan()));
2525
}
26+
27+
[Fact]
28+
public void FindFirstTest()
29+
{
30+
var data = new byte[30];
31+
int pos = 0;
32+
pos += VarintEncoder.Encode(1023, data.AsSpan(pos));
33+
pos += VarintEncoder.Encode(5, data.AsSpan(pos));
34+
pos += VarintEncoder.Encode(ulong.MaxValue, data.AsSpan(pos));
35+
pos += VarintEncoder.Encode(ulong.MinValue, data.AsSpan(pos));
36+
37+
int posFirst = VarintEncoder.FindFirst(data.AsSpan());
38+
ulong first = VarintEncoder.Decode(data.AsSpan(0, posFirst));
39+
40+
Assert.Equal(1023ul, first);
41+
}
42+
43+
[Fact]
44+
public void FindLastTest()
45+
{
46+
var data = new byte[30];
47+
int pos = 0;
48+
pos += VarintEncoder.Encode(1023, data.AsSpan(pos));
49+
pos += VarintEncoder.Encode(5, data.AsSpan(pos));
50+
pos += VarintEncoder.Encode(ulong.MaxValue, data.AsSpan(pos));
51+
pos += VarintEncoder.Encode(ulong.MinValue, data.AsSpan(pos));
52+
pos += VarintEncoder.Encode(719, data.AsSpan(pos));
53+
54+
int posLast = VarintEncoder.FindLast(data.AsSpan());
55+
ulong last = VarintEncoder.Decode(data.AsSpan(posLast));
56+
57+
Assert.Equal(719ul, last);
58+
}
2659
}
2760
}

src/InformationRetrieval/Indexing/PostingsList/VarintPostingsList.cs

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ public VarintPostingsList(byte[] buffer, int length)
3434
data = buffer;
3535
this.length = length;
3636
Count = VarintEncoder.GetIntegerCount(buffer.AsSpan(0, length));
37-
prevInserted = this.LastOrDefault().Id;
37+
prevInserted = this.LastOrDefault();
3838
}
3939

4040
public int Count { get; private set; }
@@ -71,6 +71,19 @@ public IEnumerator<DocumentId> GetEnumerator()
7171
}
7272
}
7373

74+
private uint LastOrDefault()
75+
{
76+
int lastPos = VarintEncoder.FindLast(data.AsSpan(0, length));
77+
if (lastPos >= 0)
78+
{
79+
return (uint)VarintEncoder.Decode(data.AsSpan(lastPos));
80+
}
81+
else
82+
{
83+
return 0;
84+
}
85+
}
86+
7487
IEnumerator IEnumerable.GetEnumerator()
7588
=> GetEnumerator();
7689

src/InformationRetrieval/Utility/VarintEncoder.cs

Lines changed: 72 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ public static class VarintEncoder
77
{
88
public const int BufferLength = 10;
99

10+
private const byte LastByteMask = 0x80;
11+
1012
public static int Encode(ulong n, Span<byte> output)
1113
{
1214
Span<byte> buffer = stackalloc byte[BufferLength];
@@ -18,7 +20,7 @@ public static int Encode(ulong n, Span<byte> output)
1820
}
1921
while (n != 0);
2022

21-
buffer[BufferLength - 1] |= 0x80;
23+
buffer[BufferLength - 1] |= LastByteMask;
2224

2325
buffer.Slice(pos, BufferLength - pos).CopyTo(output);
2426

@@ -31,7 +33,7 @@ public static IEnumerable<ulong> Decode(byte[] data, int start, int length)
3133
ulong n = 0;
3234
for (int i = start; i < end; ++i)
3335
{
34-
if ((data[i] & 0x80) == 0x80)
36+
if (IsLastByte(data[i]))
3537
{
3638
n = (n << 7) + (ulong)(data[i] & 0x7f);
3739
yield return n;
@@ -44,19 +46,86 @@ public static IEnumerable<ulong> Decode(byte[] data, int start, int length)
4446
}
4547
}
4648

49+
/// <summary>
50+
/// Decode teh first number from the buffer
51+
/// </summary>
52+
/// <param name="buffer">The buffer</param>
53+
/// <returns>Decoded number</returns>
54+
public static ulong Decode(ReadOnlySpan<byte> buffer)
55+
{
56+
ulong n = 0;
57+
for (int i = 0; i < buffer.Length; ++i)
58+
{
59+
if (IsLastByte(buffer[i]))
60+
{
61+
n = (n << 7) + (ulong)(buffer[i] & 0x7f);
62+
return n;
63+
}
64+
else
65+
{
66+
n = (n << 7) + buffer[i];
67+
}
68+
}
69+
70+
throw new Exception("Failed to decode varint: last byte is missing");
71+
}
72+
4773
public static int GetIntegerCount(ReadOnlySpan<byte> buffer)
4874
{
4975
int count = 0;
5076

5177
for (int i = 0; i < buffer.Length; ++i)
5278
{
53-
if ((buffer[i] & 0x80) == 0x80)
79+
if (IsLastByte(buffer[i]))
5480
{
5581
++count;
5682
}
5783
}
5884

5985
return count;
6086
}
87+
88+
/// <summary>
89+
/// Return length of the first number
90+
/// </summary>
91+
/// <param name="buffer">the buffer</param>
92+
/// <returns>Length of the first byte if found, -1 otherwise</returns>
93+
public static int FindFirst(ReadOnlySpan<byte> buffer)
94+
{
95+
for (int i = 0; i < buffer.Length; ++i)
96+
{
97+
if (IsLastByte(buffer[i]))
98+
{
99+
return i + 1;
100+
}
101+
}
102+
103+
return -1;
104+
}
105+
106+
/// <summary>
107+
/// return positiong of the first byte of the last number
108+
/// </summary>
109+
/// <param name="buffer">the buffer</param>
110+
/// <returns>Position of the last number if found, -1 otherwise</returns>
111+
public static int FindLast(ReadOnlySpan<byte> buffer)
112+
{
113+
int lastBytesCount = 0;
114+
for (int i = buffer.Length; i > 0; --i)
115+
{
116+
if (IsLastByte(buffer[i-1]))
117+
{
118+
if(++lastBytesCount == 2)
119+
{
120+
return i;
121+
}
122+
}
123+
}
124+
125+
return -1;
126+
}
127+
128+
private static bool IsLastByte(byte b)
129+
=> (b & LastByteMask) == LastByteMask;
61130
}
62131
}

0 commit comments

Comments
 (0)