Skip to content

Commit 338a9b1

Browse files
authored
feat: add SsFormat encoding library (#4292)
* feat: add SsFormat encoding library * remove unsigned methods * fix javadoc * refactored appendInt method names + added tests for target range
1 parent a98532a commit 338a9b1

File tree

5 files changed

+1671
-0
lines changed

5 files changed

+1671
-0
lines changed
Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
/*
2+
* Copyright 2026 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.spanner.spi.v1;
18+
19+
import com.google.api.core.InternalApi;
20+
import com.google.protobuf.ByteString;
21+
import java.nio.ByteBuffer;
22+
import java.nio.ByteOrder;
23+
import java.nio.charset.StandardCharsets;
24+
25+
/**
26+
* Sortable String Format encoding utilities for Spanner keys.
27+
*
28+
* <p>This class provides methods to encode various data types into a byte format that preserves
29+
* lexicographic ordering. The encoding supports both increasing and decreasing sort orders.
30+
*/
31+
@InternalApi
32+
public final class SsFormat {
33+
34+
/**
35+
* Makes the given key a prefix successor. This means that the returned key is the smallest
36+
* possible key that is larger than the input key, and that does not have the input key as a
37+
* prefix.
38+
*
39+
* <p>This is done by flipping the least significant bit of the last byte of the key.
40+
*
41+
* @param key The key to make a prefix successor.
42+
* @return The prefix successor key.
43+
*/
44+
public static ByteString makePrefixSuccessor(ByteString key) {
45+
if (key == null || key.isEmpty()) {
46+
return ByteString.EMPTY;
47+
}
48+
byte[] bytes = key.toByteArray();
49+
bytes[bytes.length - 1] = (byte) (bytes[bytes.length - 1] | 1);
50+
return ByteString.copyFrom(bytes);
51+
}
52+
53+
private SsFormat() {}
54+
55+
private static final int IS_KEY = 0x80;
56+
57+
// HeaderType enum values
58+
// Unsigned integers (variable length 1-9 bytes)
59+
private static final int TYPE_UINT_1 = 0;
60+
private static final int TYPE_DECREASING_UINT_1 = 40;
61+
62+
// Signed integers (variable length 1-8 bytes)
63+
private static final int TYPE_NEG_INT_1 = 16;
64+
private static final int TYPE_POS_INT_1 = 17;
65+
private static final int TYPE_DECREASING_NEG_INT_1 = 48;
66+
private static final int TYPE_DECREASING_POS_INT_1 = 49;
67+
68+
// Strings
69+
private static final int TYPE_STRING = 25;
70+
private static final int TYPE_DECREASING_STRING = 57;
71+
72+
// Nullable markers
73+
private static final int TYPE_NULL_ORDERED_FIRST = 27;
74+
private static final int TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_FIRST = 28;
75+
private static final int TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_LAST = 59;
76+
private static final int TYPE_NULL_ORDERED_LAST = 60;
77+
78+
// Doubles (variable length 1-8 bytes, encoded as transformed int64)
79+
private static final int TYPE_NEG_DOUBLE_1 = 73;
80+
private static final int TYPE_POS_DOUBLE_1 = 74;
81+
private static final int TYPE_DECREASING_NEG_DOUBLE_1 = 89;
82+
private static final int TYPE_DECREASING_POS_DOUBLE_1 = 90;
83+
84+
// EscapeChar enum values
85+
private static final byte ASCENDING_ZERO_ESCAPE = (byte) 0xf0;
86+
private static final byte ASCENDING_FF_ESCAPE = (byte) 0x10;
87+
private static final byte SEP = (byte) 0x78; // 'x'
88+
89+
// For AppendCompositeTag
90+
private static final int K_OBJECT_EXISTENCE_TAG = 0x7e;
91+
private static final int K_MAX_FIELD_TAG = 0xffff;
92+
93+
// Offset to make negative timestamp seconds sort correctly
94+
private static final long TIMESTAMP_SECONDS_OFFSET = 1L << 63;
95+
96+
public static void appendCompositeTag(UnsynchronizedByteArrayOutputStream out, int tag) {
97+
if (tag == K_OBJECT_EXISTENCE_TAG || tag <= 0 || tag > K_MAX_FIELD_TAG) {
98+
throw new IllegalArgumentException("Invalid tag value: " + tag);
99+
}
100+
101+
if (tag < 16) {
102+
// Short tag: 000 TTTT S (S is LSB of tag, but here tag is original, so S=0)
103+
// Encodes as (tag << 1)
104+
out.write((byte) (tag << 1));
105+
} else {
106+
// Long tag
107+
int shiftedTag = tag << 1; // LSB is 0 for prefix successor
108+
if (shiftedTag < (1 << (5 + 8))) { // Original tag < 4096
109+
// Header: num_extra_bytes=1 (01xxxxx), P=payload bits from tag
110+
// (1 << 5) is 00100000
111+
// (shiftedTag >> 8) are the 5 MSBs of the payload part of the tag
112+
out.write((byte) ((1 << 5) | (shiftedTag >> 8)));
113+
out.write((byte) (shiftedTag & 0xFF));
114+
} else { // Original tag >= 4096 and <= K_MAX_FIELD_TAG (65535)
115+
// Header: num_extra_bytes=2 (10xxxxx)
116+
// (2 << 5) is 01000000
117+
out.write((byte) ((2 << 5) | (shiftedTag >> 16)));
118+
out.write((byte) ((shiftedTag >> 8) & 0xFF));
119+
out.write((byte) (shiftedTag & 0xFF));
120+
}
121+
}
122+
}
123+
124+
public static void appendNullOrderedFirst(UnsynchronizedByteArrayOutputStream out) {
125+
out.write((byte) (IS_KEY | TYPE_NULL_ORDERED_FIRST));
126+
out.write((byte) 0);
127+
}
128+
129+
public static void appendNullOrderedLast(UnsynchronizedByteArrayOutputStream out) {
130+
out.write((byte) (IS_KEY | TYPE_NULL_ORDERED_LAST));
131+
out.write((byte) 0);
132+
}
133+
134+
public static void appendNotNullMarkerNullOrderedFirst(UnsynchronizedByteArrayOutputStream out) {
135+
out.write((byte) (IS_KEY | TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_FIRST));
136+
}
137+
138+
public static void appendNotNullMarkerNullOrderedLast(UnsynchronizedByteArrayOutputStream out) {
139+
out.write((byte) (IS_KEY | TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_LAST));
140+
}
141+
142+
/**
143+
* Appends a boolean value in ascending (increasing) sort order.
144+
*
145+
* <p>Boolean values are encoded using unsigned integer encoding where false=0 and true=1. This
146+
* preserves the natural ordering where false &lt; true.
147+
*
148+
* @param out the output stream to append to
149+
* @param value the boolean value to encode
150+
*/
151+
public static void appendBoolIncreasing(UnsynchronizedByteArrayOutputStream out, boolean value) {
152+
// BOOL uses unsigned int encoding: false=0, true=1
153+
// For values 0 and 1, payload is always 1 byte
154+
int encoded = value ? 1 : 0;
155+
out.write((byte) (IS_KEY | TYPE_UINT_1)); // Header for 1-byte unsigned int
156+
out.write(
157+
(byte) (encoded << 1)); // Payload: value shifted left by 1 (LSB is prefix-successor bit)
158+
}
159+
160+
/**
161+
* Appends a boolean value in descending (decreasing) sort order.
162+
*
163+
* <p>Boolean values are encoded using unsigned integer encoding where false=0 and true=1, then
164+
* inverted for descending order. This preserves reverse ordering where true &lt; false.
165+
*
166+
* @param out the output stream to append to
167+
* @param value the boolean value to encode
168+
*/
169+
public static void appendBoolDecreasing(UnsynchronizedByteArrayOutputStream out, boolean value) {
170+
// BOOL uses decreasing unsigned int encoding: false=0, true=1, then inverted
171+
// For values 0 and 1, payload is always 1 byte
172+
int encoded = value ? 1 : 0;
173+
out.write(
174+
(byte) (IS_KEY | TYPE_DECREASING_UINT_1)); // Header for 1-byte decreasing unsigned int
175+
out.write((byte) ((~encoded & 0x7F) << 1)); // Inverted payload
176+
}
177+
178+
private static void appendInt64Internal(
179+
UnsynchronizedByteArrayOutputStream out, long val, boolean decreasing, boolean isDouble) {
180+
if (decreasing) {
181+
val = ~val;
182+
}
183+
184+
byte[] buf = new byte[8]; // Max 8 bytes for payload
185+
int len = 0;
186+
long tempVal = val;
187+
188+
if (tempVal >= 0) {
189+
buf[7 - len] = (byte) ((tempVal & 0x7F) << 1);
190+
tempVal >>= 7;
191+
len++;
192+
while (tempVal > 0) {
193+
buf[7 - len] = (byte) (tempVal & 0xFF);
194+
tempVal >>= 8;
195+
len++;
196+
}
197+
} else { // tempVal < 0
198+
// For negative numbers, extend sign bit after shifting
199+
buf[7 - len] = (byte) ((tempVal & 0x7F) << 1);
200+
// Simulate sign extension for right shift of negative number
201+
// (x >> 7) | 0xFE00000000000000ULL; (if x has 64 bits)
202+
// In Java, right shift `>>` on negative longs performs sign extension.
203+
tempVal >>= 7;
204+
len++;
205+
while (tempVal != -1L) { // Loop until all remaining bits are 1s (sign extension)
206+
buf[7 - len] = (byte) (tempVal & 0xFF);
207+
tempVal >>= 8;
208+
len++;
209+
if (len > 8) {
210+
// Defensive assertion: unreachable for any valid 64-bit signed integer
211+
throw new AssertionError("Signed int encoding overflow");
212+
}
213+
}
214+
}
215+
216+
int type;
217+
if (val >= 0) { // Original val before potential bit-negation for decreasing
218+
if (!decreasing) {
219+
type = isDouble ? (TYPE_POS_DOUBLE_1 + len - 1) : (TYPE_POS_INT_1 + len - 1);
220+
} else {
221+
type =
222+
isDouble
223+
? (TYPE_DECREASING_POS_DOUBLE_1 + len - 1)
224+
: (TYPE_DECREASING_POS_INT_1 + len - 1);
225+
}
226+
} else {
227+
if (!decreasing) {
228+
type = isDouble ? (TYPE_NEG_DOUBLE_1 - len + 1) : (TYPE_NEG_INT_1 - len + 1);
229+
} else {
230+
type =
231+
isDouble
232+
? (TYPE_DECREASING_NEG_DOUBLE_1 - len + 1)
233+
: (TYPE_DECREASING_NEG_INT_1 - len + 1);
234+
}
235+
}
236+
out.write((byte) (IS_KEY | type));
237+
out.write(buf, 8 - len, len);
238+
}
239+
240+
public static void appendInt64Increasing(UnsynchronizedByteArrayOutputStream out, long value) {
241+
appendInt64Internal(out, value, false, false);
242+
}
243+
244+
public static void appendInt64Decreasing(UnsynchronizedByteArrayOutputStream out, long value) {
245+
appendInt64Internal(out, value, true, false);
246+
}
247+
248+
public static void appendDoubleIncreasing(UnsynchronizedByteArrayOutputStream out, double value) {
249+
long enc = Double.doubleToRawLongBits(value);
250+
if (enc < 0) {
251+
// Transform negative doubles to maintain lexicographic sort order
252+
enc = Long.MIN_VALUE - enc;
253+
}
254+
appendInt64Internal(out, enc, false, true);
255+
}
256+
257+
public static void appendDoubleDecreasing(UnsynchronizedByteArrayOutputStream out, double value) {
258+
long enc = Double.doubleToRawLongBits(value);
259+
if (enc < 0) {
260+
enc = Long.MIN_VALUE - enc;
261+
}
262+
appendInt64Internal(out, enc, true, true);
263+
}
264+
265+
private static void appendByteSequence(
266+
UnsynchronizedByteArrayOutputStream out, byte[] bytes, boolean decreasing) {
267+
out.write((byte) (IS_KEY | (decreasing ? TYPE_DECREASING_STRING : TYPE_STRING)));
268+
269+
for (byte b : bytes) {
270+
byte currentByte = decreasing ? (byte) ~b : b;
271+
int unsignedByte = currentByte & 0xFF;
272+
if (unsignedByte == 0x00) {
273+
// Escape sequence for 0x00: write 0x00 followed by 0xF0
274+
out.write((byte) 0x00);
275+
out.write(ASCENDING_ZERO_ESCAPE);
276+
} else if (unsignedByte == 0xFF) {
277+
// Escape sequence for 0xFF: write 0xFF followed by 0x10
278+
out.write((byte) 0xFF);
279+
out.write(ASCENDING_FF_ESCAPE);
280+
} else {
281+
out.write((byte) unsignedByte);
282+
}
283+
}
284+
// Terminator
285+
out.write((byte) (decreasing ? 0xFF : 0x00));
286+
out.write(SEP);
287+
}
288+
289+
public static void appendStringIncreasing(UnsynchronizedByteArrayOutputStream out, String value) {
290+
appendByteSequence(out, value.getBytes(StandardCharsets.UTF_8), false);
291+
}
292+
293+
public static void appendStringDecreasing(UnsynchronizedByteArrayOutputStream out, String value) {
294+
appendByteSequence(out, value.getBytes(StandardCharsets.UTF_8), true);
295+
}
296+
297+
public static void appendBytesIncreasing(UnsynchronizedByteArrayOutputStream out, byte[] value) {
298+
appendByteSequence(out, value, false);
299+
}
300+
301+
public static void appendBytesDecreasing(UnsynchronizedByteArrayOutputStream out, byte[] value) {
302+
appendByteSequence(out, value, true);
303+
}
304+
305+
/**
306+
* Encodes a timestamp as 12 bytes: 8 bytes for seconds since epoch (with offset to handle
307+
* negative), 4 bytes for nanoseconds.
308+
*/
309+
public static byte[] encodeTimestamp(long seconds, int nanos) {
310+
long offsetSeconds = seconds + TIMESTAMP_SECONDS_OFFSET;
311+
byte[] buf = new byte[12];
312+
ByteBuffer.wrap(buf).order(ByteOrder.BIG_ENDIAN).putLong(offsetSeconds).putInt(nanos);
313+
return buf;
314+
}
315+
316+
/** Encodes a UUID (128-bit) as 16 bytes in big-endian order. */
317+
public static byte[] encodeUuid(long high, long low) {
318+
byte[] buf = new byte[16];
319+
ByteBuffer.wrap(buf).order(ByteOrder.BIG_ENDIAN).putLong(high).putLong(low);
320+
return buf;
321+
}
322+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* Copyright 2026 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.spanner.spi.v1;
18+
19+
import com.google.api.core.InternalApi;
20+
import com.google.protobuf.ByteString;
21+
22+
/** Represents a key range with start and limit boundaries for routing. */
23+
@InternalApi
24+
public class TargetRange {
25+
public ByteString start;
26+
public ByteString limit;
27+
public boolean approximate;
28+
29+
public TargetRange(ByteString start, ByteString limit, boolean approximate) {
30+
this.start = start;
31+
this.limit = limit;
32+
this.approximate = approximate;
33+
}
34+
35+
public boolean isPoint() {
36+
return limit.isEmpty();
37+
}
38+
39+
/**
40+
* Merges another TargetRange into this one. The resulting range will be the union of the two
41+
* ranges, taking the minimum start key and maximum limit key.
42+
*/
43+
public void mergeFrom(TargetRange other) {
44+
if (ByteString.unsignedLexicographicalComparator().compare(other.start, this.start) < 0) {
45+
this.start = other.start;
46+
}
47+
if (other.isPoint()
48+
&& ByteString.unsignedLexicographicalComparator().compare(other.start, this.limit) >= 0) {
49+
this.limit = SsFormat.makePrefixSuccessor(other.start);
50+
} else if (ByteString.unsignedLexicographicalComparator().compare(other.limit, this.limit)
51+
> 0) {
52+
this.limit = other.limit;
53+
}
54+
this.approximate |= other.approximate;
55+
}
56+
}

0 commit comments

Comments
 (0)