16
16
package com .google .cloud .dataflow .sdk .util ;
17
17
18
18
import static com .google .common .base .Preconditions .checkArgument ;
19
+ import static com .google .common .base .Preconditions .checkNotNull ;
19
20
20
21
import com .google .cloud .dataflow .sdk .coders .AtomicCoder ;
21
22
import com .google .cloud .dataflow .sdk .coders .ByteArrayCoder ;
22
23
import com .google .cloud .dataflow .sdk .coders .Coder ;
23
24
import com .google .cloud .dataflow .sdk .coders .CoderException ;
24
25
import com .google .common .base .MoreObjects ;
25
26
import com .google .common .io .ByteStreams ;
27
+ import com .google .common .primitives .UnsignedBytes ;
26
28
27
29
import com .fasterxml .jackson .annotation .JsonCreator ;
28
30
@@ -50,7 +52,9 @@ public class RandomAccessData {
50
52
/**
51
53
* A {@link Coder} which encodes the valid parts of this stream.
52
54
* This follows the same encoding scheme as {@link ByteArrayCoder}.
53
- * This coder is deterministic and the consistent with equals.
55
+ * This coder is deterministic and consistent with equals.
56
+ *
57
+ * This coder does not support encoding positive infinity.
54
58
*/
55
59
public static class RandomAccessDataCoder extends AtomicCoder <RandomAccessData > {
56
60
private static final RandomAccessDataCoder INSTANCE = new RandomAccessDataCoder ();
@@ -63,6 +67,9 @@ public static RandomAccessDataCoder of() {
63
67
@ Override
64
68
public void encode (RandomAccessData value , OutputStream outStream , Coder .Context context )
65
69
throws CoderException , IOException {
70
+ if (value == POSITIVE_INFINITY ) {
71
+ throw new CoderException ("Positive infinity can not be encoded." );
72
+ }
66
73
if (!context .isWholeStream ) {
67
74
VarInt .encode (value .size , outStream );
68
75
}
@@ -107,18 +114,45 @@ protected long getEncodedElementByteSize(RandomAccessData value, Coder.Context c
107
114
}
108
115
}
109
116
117
+ public static final UnsignedLexicographicalComparator UNSIGNED_LEXICOGRAPHICAL_COMPARATOR =
118
+ new UnsignedLexicographicalComparator ();
119
+
110
120
/**
111
121
* A {@link Comparator} that compares two byte arrays lexicographically. It compares
112
122
* values as a list of unsigned bytes. The first pair of values that follow any common prefix,
113
123
* or when one array is a prefix of the other, treats the shorter array as the lesser.
114
- * For example, [] < [0x01] < [0x01, 0x7F] < [0x01, 0x80] < [0x02].
124
+ * For example, [] < [0x01] < [0x01, 0x7F] < [0x01, 0x80] < [0x02] < POSITIVE INFINITY.
125
+ *
126
+ * <p>Note that a token type of positive infinity is supported and is greater than
127
+ * all other {@link RandomAccessData}.
115
128
*/
116
- public static final Comparator <RandomAccessData > UNSIGNED_LEXICOGRAPHICAL_COMPARATOR =
117
- new Comparator <RandomAccessData >() {
129
+ public static final class UnsignedLexicographicalComparator
130
+ implements Comparator <RandomAccessData > {
131
+ // Do not instantiate
132
+ private UnsignedLexicographicalComparator () {
133
+ }
134
+
118
135
@ Override
119
136
public int compare (RandomAccessData o1 , RandomAccessData o2 ) {
137
+ return compare (o1 , o2 , 0 /* start from the beginning */ );
138
+ }
139
+
140
+ /**
141
+ * Compare the two sets of bytes starting at the given offset.
142
+ */
143
+ public int compare (RandomAccessData o1 , RandomAccessData o2 , int startOffset ) {
144
+ if (o1 == o2 ) {
145
+ return 0 ;
146
+ }
147
+ if (o1 == POSITIVE_INFINITY ) {
148
+ return 1 ;
149
+ }
150
+ if (o2 == POSITIVE_INFINITY ) {
151
+ return -1 ;
152
+ }
153
+
120
154
int minBytesLen = Math .min (o1 .size , o2 .size );
121
- for (int i = 0 ; i < minBytesLen ; i ++) {
155
+ for (int i = startOffset ; i < minBytesLen ; i ++) {
122
156
// unsigned comparison
123
157
int b1 = o1 .buffer [i ] & 0xFF ;
124
158
int b2 = o2 .buffer [i ] & 0xFF ;
@@ -132,7 +166,45 @@ public int compare(RandomAccessData o1, RandomAccessData o2) {
132
166
// If both lengths are equal, then both streams are equal.
133
167
return o1 .size - o2 .size ;
134
168
}
135
- };
169
+
170
+ /**
171
+ * Compute the length of the common prefix of the two provided sets of bytes.
172
+ */
173
+ public int commonPrefixLength (RandomAccessData o1 , RandomAccessData o2 ) {
174
+ int minBytesLen = Math .min (o1 .size , o2 .size );
175
+ for (int i = 0 ; i < minBytesLen ; i ++) {
176
+ // unsigned comparison
177
+ int b1 = o1 .buffer [i ] & 0xFF ;
178
+ int b2 = o2 .buffer [i ] & 0xFF ;
179
+ if (b1 != b2 ) {
180
+ return i ;
181
+ }
182
+ }
183
+ return minBytesLen ;
184
+ }
185
+ }
186
+
187
+ /** A token type representing positive infinity. */
188
+ static final RandomAccessData POSITIVE_INFINITY = new RandomAccessData (0 );
189
+
190
+ /**
191
+ * Returns a RandomAccessData that is the smallest value of same length which
192
+ * is strictly greater than this. Note that if this is empty or is all 0xFF then
193
+ * a token value of positive infinity is returned.
194
+ *
195
+ * The {@link UnsignedLexicographicalComparator} supports comparing {@link RandomAccessData}
196
+ * with support for positive infinitiy.
197
+ */
198
+ public RandomAccessData increment () throws IOException {
199
+ RandomAccessData copy = copy ();
200
+ for (int i = copy .size - 1 ; i >= 0 ; --i ) {
201
+ if (copy .buffer [i ] != UnsignedBytes .MAX_VALUE ) {
202
+ copy .buffer [i ] = UnsignedBytes .checkedCast (UnsignedBytes .toInt (copy .buffer [i ]) + 1 );
203
+ return copy ;
204
+ }
205
+ }
206
+ return POSITIVE_INFINITY ;
207
+ }
136
208
137
209
private static final int DEFAULT_INITIAL_BUFFER_SIZE = 128 ;
138
210
@@ -141,10 +213,17 @@ public RandomAccessData() {
141
213
this (DEFAULT_INITIAL_BUFFER_SIZE );
142
214
}
143
215
216
+ /** Constructs a RandomAccessData with the initial buffer. */
217
+ public RandomAccessData (byte [] initialBuffer ) {
218
+ checkNotNull (initialBuffer );
219
+ this .buffer = initialBuffer ;
220
+ this .size = initialBuffer .length ;
221
+ }
222
+
144
223
/** Constructs a RandomAccessData with the given buffer size. */
145
224
public RandomAccessData (int initialBufferSize ) {
146
225
checkArgument (initialBufferSize >= 0 , "Expected initial buffer size to be greater than zero." );
147
- buffer = new byte [initialBufferSize ];
226
+ this . buffer = new byte [initialBufferSize ];
148
227
}
149
228
150
229
private byte [] buffer ;
@@ -220,6 +299,13 @@ public void readFrom(InputStream inStream, int offset, int length) throws IOExce
220
299
size = offset + length ;
221
300
}
222
301
302
+ /** Returns a copy of this RandomAccessData. */
303
+ public RandomAccessData copy () throws IOException {
304
+ RandomAccessData copy = new RandomAccessData (size );
305
+ writeTo (copy .asOutputStream (), 0 , size );
306
+ return copy ;
307
+ }
308
+
223
309
@ Override
224
310
public boolean equals (Object other ) {
225
311
if (other == this ) {
@@ -244,7 +330,7 @@ public int hashCode() {
244
330
@ Override
245
331
public String toString () {
246
332
return MoreObjects .toStringHelper (this )
247
- .add ("buffer" , buffer )
333
+ .add ("buffer" , Arrays . copyOf ( buffer , size ) )
248
334
.add ("size" , size )
249
335
.toString ();
250
336
}
0 commit comments