|
| 1 | +/* |
| 2 | + * Copyright 2026 Google LLC |
| 3 | + * |
| 4 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | + * you may not use this file except in compliance with the License. |
| 6 | + * You may obtain a copy of the License at |
| 7 | + * |
| 8 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | + * |
| 10 | + * Unless required by applicable law or agreed to in writing, software |
| 11 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | + * See the License for the specific language governing permissions and |
| 14 | + * limitations under the License. |
| 15 | + */ |
| 16 | + |
| 17 | +package com.google.cloud.spanner.spi.v1; |
| 18 | + |
| 19 | +import com.google.api.core.InternalApi; |
| 20 | +import com.google.protobuf.ByteString; |
| 21 | +import java.nio.ByteBuffer; |
| 22 | +import java.nio.ByteOrder; |
| 23 | +import java.nio.charset.StandardCharsets; |
| 24 | + |
| 25 | +/** |
| 26 | + * Sortable String Format encoding utilities for Spanner keys. |
| 27 | + * |
| 28 | + * <p>This class provides methods to encode various data types into a byte format that preserves |
| 29 | + * lexicographic ordering. The encoding supports both increasing and decreasing sort orders. |
| 30 | + */ |
| 31 | +@InternalApi |
| 32 | +public final class SsFormat { |
| 33 | + |
| 34 | + /** |
| 35 | + * Makes the given key a prefix successor. This means that the returned key is the smallest |
| 36 | + * possible key that is larger than the input key, and that does not have the input key as a |
| 37 | + * prefix. |
| 38 | + * |
| 39 | + * <p>This is done by flipping the least significant bit of the last byte of the key. |
| 40 | + * |
| 41 | + * @param key The key to make a prefix successor. |
| 42 | + * @return The prefix successor key. |
| 43 | + */ |
| 44 | + public static ByteString makePrefixSuccessor(ByteString key) { |
| 45 | + if (key == null || key.isEmpty()) { |
| 46 | + return ByteString.EMPTY; |
| 47 | + } |
| 48 | + byte[] bytes = key.toByteArray(); |
| 49 | + bytes[bytes.length - 1] = (byte) (bytes[bytes.length - 1] | 1); |
| 50 | + return ByteString.copyFrom(bytes); |
| 51 | + } |
| 52 | + |
| 53 | + private SsFormat() {} |
| 54 | + |
| 55 | + private static final int IS_KEY = 0x80; |
| 56 | + |
| 57 | + // HeaderType enum values |
| 58 | + // Unsigned integers (variable length 1-9 bytes) |
| 59 | + private static final int TYPE_UINT_1 = 0; |
| 60 | + private static final int TYPE_DECREASING_UINT_1 = 40; |
| 61 | + |
| 62 | + // Signed integers (variable length 1-8 bytes) |
| 63 | + private static final int TYPE_NEG_INT_1 = 16; |
| 64 | + private static final int TYPE_POS_INT_1 = 17; |
| 65 | + private static final int TYPE_DECREASING_NEG_INT_1 = 48; |
| 66 | + private static final int TYPE_DECREASING_POS_INT_1 = 49; |
| 67 | + |
| 68 | + // Strings |
| 69 | + private static final int TYPE_STRING = 25; |
| 70 | + private static final int TYPE_DECREASING_STRING = 57; |
| 71 | + |
| 72 | + // Nullable markers |
| 73 | + private static final int TYPE_NULL_ORDERED_FIRST = 27; |
| 74 | + private static final int TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_FIRST = 28; |
| 75 | + private static final int TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_LAST = 59; |
| 76 | + private static final int TYPE_NULL_ORDERED_LAST = 60; |
| 77 | + |
| 78 | + // Doubles (variable length 1-8 bytes, encoded as transformed int64) |
| 79 | + private static final int TYPE_NEG_DOUBLE_1 = 73; |
| 80 | + private static final int TYPE_POS_DOUBLE_1 = 74; |
| 81 | + private static final int TYPE_DECREASING_NEG_DOUBLE_1 = 89; |
| 82 | + private static final int TYPE_DECREASING_POS_DOUBLE_1 = 90; |
| 83 | + |
| 84 | + // EscapeChar enum values |
| 85 | + private static final byte ASCENDING_ZERO_ESCAPE = (byte) 0xf0; |
| 86 | + private static final byte ASCENDING_FF_ESCAPE = (byte) 0x10; |
| 87 | + private static final byte SEP = (byte) 0x78; // 'x' |
| 88 | + |
| 89 | + // For AppendCompositeTag |
| 90 | + private static final int K_OBJECT_EXISTENCE_TAG = 0x7e; |
| 91 | + private static final int K_MAX_FIELD_TAG = 0xffff; |
| 92 | + |
| 93 | + // Offset to make negative timestamp seconds sort correctly |
| 94 | + private static final long TIMESTAMP_SECONDS_OFFSET = 1L << 63; |
| 95 | + |
| 96 | + public static void appendCompositeTag(UnsynchronizedByteArrayOutputStream out, int tag) { |
| 97 | + if (tag == K_OBJECT_EXISTENCE_TAG || tag <= 0 || tag > K_MAX_FIELD_TAG) { |
| 98 | + throw new IllegalArgumentException("Invalid tag value: " + tag); |
| 99 | + } |
| 100 | + |
| 101 | + if (tag < 16) { |
| 102 | + // Short tag: 000 TTTT S (S is LSB of tag, but here tag is original, so S=0) |
| 103 | + // Encodes as (tag << 1) |
| 104 | + out.write((byte) (tag << 1)); |
| 105 | + } else { |
| 106 | + // Long tag |
| 107 | + int shiftedTag = tag << 1; // LSB is 0 for prefix successor |
| 108 | + if (shiftedTag < (1 << (5 + 8))) { // Original tag < 4096 |
| 109 | + // Header: num_extra_bytes=1 (01xxxxx), P=payload bits from tag |
| 110 | + // (1 << 5) is 00100000 |
| 111 | + // (shiftedTag >> 8) are the 5 MSBs of the payload part of the tag |
| 112 | + out.write((byte) ((1 << 5) | (shiftedTag >> 8))); |
| 113 | + out.write((byte) (shiftedTag & 0xFF)); |
| 114 | + } else { // Original tag >= 4096 and <= K_MAX_FIELD_TAG (65535) |
| 115 | + // Header: num_extra_bytes=2 (10xxxxx) |
| 116 | + // (2 << 5) is 01000000 |
| 117 | + out.write((byte) ((2 << 5) | (shiftedTag >> 16))); |
| 118 | + out.write((byte) ((shiftedTag >> 8) & 0xFF)); |
| 119 | + out.write((byte) (shiftedTag & 0xFF)); |
| 120 | + } |
| 121 | + } |
| 122 | + } |
| 123 | + |
| 124 | + public static void appendNullOrderedFirst(UnsynchronizedByteArrayOutputStream out) { |
| 125 | + out.write((byte) (IS_KEY | TYPE_NULL_ORDERED_FIRST)); |
| 126 | + out.write((byte) 0); |
| 127 | + } |
| 128 | + |
| 129 | + public static void appendNullOrderedLast(UnsynchronizedByteArrayOutputStream out) { |
| 130 | + out.write((byte) (IS_KEY | TYPE_NULL_ORDERED_LAST)); |
| 131 | + out.write((byte) 0); |
| 132 | + } |
| 133 | + |
| 134 | + public static void appendNotNullMarkerNullOrderedFirst(UnsynchronizedByteArrayOutputStream out) { |
| 135 | + out.write((byte) (IS_KEY | TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_FIRST)); |
| 136 | + } |
| 137 | + |
| 138 | + public static void appendNotNullMarkerNullOrderedLast(UnsynchronizedByteArrayOutputStream out) { |
| 139 | + out.write((byte) (IS_KEY | TYPE_NULLABLE_NOT_NULL_NULL_ORDERED_LAST)); |
| 140 | + } |
| 141 | + |
| 142 | + /** |
| 143 | + * Appends a boolean value in ascending (increasing) sort order. |
| 144 | + * |
| 145 | + * <p>Boolean values are encoded using unsigned integer encoding where false=0 and true=1. This |
| 146 | + * preserves the natural ordering where false < true. |
| 147 | + * |
| 148 | + * @param out the output stream to append to |
| 149 | + * @param value the boolean value to encode |
| 150 | + */ |
| 151 | + public static void appendBoolIncreasing(UnsynchronizedByteArrayOutputStream out, boolean value) { |
| 152 | + // BOOL uses unsigned int encoding: false=0, true=1 |
| 153 | + // For values 0 and 1, payload is always 1 byte |
| 154 | + int encoded = value ? 1 : 0; |
| 155 | + out.write((byte) (IS_KEY | TYPE_UINT_1)); // Header for 1-byte unsigned int |
| 156 | + out.write( |
| 157 | + (byte) (encoded << 1)); // Payload: value shifted left by 1 (LSB is prefix-successor bit) |
| 158 | + } |
| 159 | + |
| 160 | + /** |
| 161 | + * Appends a boolean value in descending (decreasing) sort order. |
| 162 | + * |
| 163 | + * <p>Boolean values are encoded using unsigned integer encoding where false=0 and true=1, then |
| 164 | + * inverted for descending order. This preserves reverse ordering where true < false. |
| 165 | + * |
| 166 | + * @param out the output stream to append to |
| 167 | + * @param value the boolean value to encode |
| 168 | + */ |
| 169 | + public static void appendBoolDecreasing(UnsynchronizedByteArrayOutputStream out, boolean value) { |
| 170 | + // BOOL uses decreasing unsigned int encoding: false=0, true=1, then inverted |
| 171 | + // For values 0 and 1, payload is always 1 byte |
| 172 | + int encoded = value ? 1 : 0; |
| 173 | + out.write( |
| 174 | + (byte) (IS_KEY | TYPE_DECREASING_UINT_1)); // Header for 1-byte decreasing unsigned int |
| 175 | + out.write((byte) ((~encoded & 0x7F) << 1)); // Inverted payload |
| 176 | + } |
| 177 | + |
| 178 | + private static void appendInt64Internal( |
| 179 | + UnsynchronizedByteArrayOutputStream out, long val, boolean decreasing, boolean isDouble) { |
| 180 | + if (decreasing) { |
| 181 | + val = ~val; |
| 182 | + } |
| 183 | + |
| 184 | + byte[] buf = new byte[8]; // Max 8 bytes for payload |
| 185 | + int len = 0; |
| 186 | + long tempVal = val; |
| 187 | + |
| 188 | + if (tempVal >= 0) { |
| 189 | + buf[7 - len] = (byte) ((tempVal & 0x7F) << 1); |
| 190 | + tempVal >>= 7; |
| 191 | + len++; |
| 192 | + while (tempVal > 0) { |
| 193 | + buf[7 - len] = (byte) (tempVal & 0xFF); |
| 194 | + tempVal >>= 8; |
| 195 | + len++; |
| 196 | + } |
| 197 | + } else { // tempVal < 0 |
| 198 | + // For negative numbers, extend sign bit after shifting |
| 199 | + buf[7 - len] = (byte) ((tempVal & 0x7F) << 1); |
| 200 | + // Simulate sign extension for right shift of negative number |
| 201 | + // (x >> 7) | 0xFE00000000000000ULL; (if x has 64 bits) |
| 202 | + // In Java, right shift `>>` on negative longs performs sign extension. |
| 203 | + tempVal >>= 7; |
| 204 | + len++; |
| 205 | + while (tempVal != -1L) { // Loop until all remaining bits are 1s (sign extension) |
| 206 | + buf[7 - len] = (byte) (tempVal & 0xFF); |
| 207 | + tempVal >>= 8; |
| 208 | + len++; |
| 209 | + if (len > 8) { |
| 210 | + // Defensive assertion: unreachable for any valid 64-bit signed integer |
| 211 | + throw new AssertionError("Signed int encoding overflow"); |
| 212 | + } |
| 213 | + } |
| 214 | + } |
| 215 | + |
| 216 | + int type; |
| 217 | + if (val >= 0) { // Original val before potential bit-negation for decreasing |
| 218 | + if (!decreasing) { |
| 219 | + type = isDouble ? (TYPE_POS_DOUBLE_1 + len - 1) : (TYPE_POS_INT_1 + len - 1); |
| 220 | + } else { |
| 221 | + type = |
| 222 | + isDouble |
| 223 | + ? (TYPE_DECREASING_POS_DOUBLE_1 + len - 1) |
| 224 | + : (TYPE_DECREASING_POS_INT_1 + len - 1); |
| 225 | + } |
| 226 | + } else { |
| 227 | + if (!decreasing) { |
| 228 | + type = isDouble ? (TYPE_NEG_DOUBLE_1 - len + 1) : (TYPE_NEG_INT_1 - len + 1); |
| 229 | + } else { |
| 230 | + type = |
| 231 | + isDouble |
| 232 | + ? (TYPE_DECREASING_NEG_DOUBLE_1 - len + 1) |
| 233 | + : (TYPE_DECREASING_NEG_INT_1 - len + 1); |
| 234 | + } |
| 235 | + } |
| 236 | + out.write((byte) (IS_KEY | type)); |
| 237 | + out.write(buf, 8 - len, len); |
| 238 | + } |
| 239 | + |
| 240 | + public static void appendInt64Increasing(UnsynchronizedByteArrayOutputStream out, long value) { |
| 241 | + appendInt64Internal(out, value, false, false); |
| 242 | + } |
| 243 | + |
| 244 | + public static void appendInt64Decreasing(UnsynchronizedByteArrayOutputStream out, long value) { |
| 245 | + appendInt64Internal(out, value, true, false); |
| 246 | + } |
| 247 | + |
| 248 | + public static void appendDoubleIncreasing(UnsynchronizedByteArrayOutputStream out, double value) { |
| 249 | + long enc = Double.doubleToRawLongBits(value); |
| 250 | + if (enc < 0) { |
| 251 | + // Transform negative doubles to maintain lexicographic sort order |
| 252 | + enc = Long.MIN_VALUE - enc; |
| 253 | + } |
| 254 | + appendInt64Internal(out, enc, false, true); |
| 255 | + } |
| 256 | + |
| 257 | + public static void appendDoubleDecreasing(UnsynchronizedByteArrayOutputStream out, double value) { |
| 258 | + long enc = Double.doubleToRawLongBits(value); |
| 259 | + if (enc < 0) { |
| 260 | + enc = Long.MIN_VALUE - enc; |
| 261 | + } |
| 262 | + appendInt64Internal(out, enc, true, true); |
| 263 | + } |
| 264 | + |
| 265 | + private static void appendByteSequence( |
| 266 | + UnsynchronizedByteArrayOutputStream out, byte[] bytes, boolean decreasing) { |
| 267 | + out.write((byte) (IS_KEY | (decreasing ? TYPE_DECREASING_STRING : TYPE_STRING))); |
| 268 | + |
| 269 | + for (byte b : bytes) { |
| 270 | + byte currentByte = decreasing ? (byte) ~b : b; |
| 271 | + int unsignedByte = currentByte & 0xFF; |
| 272 | + if (unsignedByte == 0x00) { |
| 273 | + // Escape sequence for 0x00: write 0x00 followed by 0xF0 |
| 274 | + out.write((byte) 0x00); |
| 275 | + out.write(ASCENDING_ZERO_ESCAPE); |
| 276 | + } else if (unsignedByte == 0xFF) { |
| 277 | + // Escape sequence for 0xFF: write 0xFF followed by 0x10 |
| 278 | + out.write((byte) 0xFF); |
| 279 | + out.write(ASCENDING_FF_ESCAPE); |
| 280 | + } else { |
| 281 | + out.write((byte) unsignedByte); |
| 282 | + } |
| 283 | + } |
| 284 | + // Terminator |
| 285 | + out.write((byte) (decreasing ? 0xFF : 0x00)); |
| 286 | + out.write(SEP); |
| 287 | + } |
| 288 | + |
| 289 | + public static void appendStringIncreasing(UnsynchronizedByteArrayOutputStream out, String value) { |
| 290 | + appendByteSequence(out, value.getBytes(StandardCharsets.UTF_8), false); |
| 291 | + } |
| 292 | + |
| 293 | + public static void appendStringDecreasing(UnsynchronizedByteArrayOutputStream out, String value) { |
| 294 | + appendByteSequence(out, value.getBytes(StandardCharsets.UTF_8), true); |
| 295 | + } |
| 296 | + |
| 297 | + public static void appendBytesIncreasing(UnsynchronizedByteArrayOutputStream out, byte[] value) { |
| 298 | + appendByteSequence(out, value, false); |
| 299 | + } |
| 300 | + |
| 301 | + public static void appendBytesDecreasing(UnsynchronizedByteArrayOutputStream out, byte[] value) { |
| 302 | + appendByteSequence(out, value, true); |
| 303 | + } |
| 304 | + |
| 305 | + /** |
| 306 | + * Encodes a timestamp as 12 bytes: 8 bytes for seconds since epoch (with offset to handle |
| 307 | + * negative), 4 bytes for nanoseconds. |
| 308 | + */ |
| 309 | + public static byte[] encodeTimestamp(long seconds, int nanos) { |
| 310 | + long offsetSeconds = seconds + TIMESTAMP_SECONDS_OFFSET; |
| 311 | + byte[] buf = new byte[12]; |
| 312 | + ByteBuffer.wrap(buf).order(ByteOrder.BIG_ENDIAN).putLong(offsetSeconds).putInt(nanos); |
| 313 | + return buf; |
| 314 | + } |
| 315 | + |
| 316 | + /** Encodes a UUID (128-bit) as 16 bytes in big-endian order. */ |
| 317 | + public static byte[] encodeUuid(long high, long low) { |
| 318 | + byte[] buf = new byte[16]; |
| 319 | + ByteBuffer.wrap(buf).order(ByteOrder.BIG_ENDIAN).putLong(high).putLong(low); |
| 320 | + return buf; |
| 321 | + } |
| 322 | +} |
0 commit comments