Skip to content

Commit

Permalink
AVRO-4061: Use Default Value of 1 For UTF8 Hash (#3177)
Browse files Browse the repository at this point in the history
  • Loading branch information
belugabehr authored Sep 27, 2024
1 parent f99aa83 commit 1a2d200
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ private static int hashCode(HashData data, Schema schema) throws IOException {
case FIXED:
return hashBytes(1, data, schema.getFixedSize(), false);
case STRING:
return hashBytes(0, data, decoder.readInt(), false);
return hashBytes(1, data, decoder.readInt(), false);
case BYTES:
return hashBytes(1, data, decoder.readInt(), true);
case NULL:
Expand Down
4 changes: 3 additions & 1 deletion lang/java/avro/src/main/java/org/apache/avro/util/Utf8.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ public class Utf8 implements Comparable<Utf8>, CharSequence, Externalizable {
private String string;

public Utf8() {
bytes = EMPTY;
this.bytes = EMPTY;
this.hash = 1;
}

public Utf8(String string) {
Expand Down Expand Up @@ -174,6 +175,7 @@ public int hashCode() {
if (h == 0) {
byte[] bytes = this.bytes;
int length = this.length;
h = 1;
for (int i = 0; i < length; i++) {
h = h * 31 + bytes[i];
}
Expand Down
37 changes: 19 additions & 18 deletions lang/java/avro/src/test/java/org/apache/avro/util/TestUtf8.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,43 +59,44 @@ void arrayReusedWhenLargerThanRequestedSize() {

@Test
void hashCodeReused() {
assertEquals(97, new Utf8("a").hashCode());
assertEquals(3904, new Utf8("zz").hashCode());
assertEquals(122, new Utf8("z").hashCode());
assertEquals(99162322, new Utf8("hello").hashCode());
assertEquals(3198781, new Utf8("hell").hashCode());
assertEquals(1, new Utf8().hashCode());
assertEquals(128, new Utf8("a").hashCode());
assertEquals(4865, new Utf8("zz").hashCode());
assertEquals(153, new Utf8("z").hashCode());
assertEquals(127791473, new Utf8("hello").hashCode());
assertEquals(4122302, new Utf8("hell").hashCode());

Utf8 u = new Utf8("a");
assertEquals(97, u.hashCode());
assertEquals(97, u.hashCode());
assertEquals(128, u.hashCode());
assertEquals(128, u.hashCode());

u.set("a");
assertEquals(97, u.hashCode());
assertEquals(128, u.hashCode());

u.setByteLength(1);
assertEquals(97, u.hashCode());
assertEquals(128, u.hashCode());
u.setByteLength(2);
assertNotEquals(97, u.hashCode());
assertNotEquals(128, u.hashCode());

u.set("zz");
assertEquals(3904, u.hashCode());
assertEquals(4865, u.hashCode());
u.setByteLength(1);
assertEquals(122, u.hashCode());
assertEquals(153, u.hashCode());

u.set("hello");
assertEquals(99162322, u.hashCode());
assertEquals(127791473, u.hashCode());
u.setByteLength(4);
assertEquals(3198781, u.hashCode());
assertEquals(4122302, u.hashCode());

u.set(new Utf8("zz"));
assertEquals(3904, u.hashCode());
assertEquals(4865, u.hashCode());
u.setByteLength(1);
assertEquals(122, u.hashCode());
assertEquals(153, u.hashCode());

u.set(new Utf8("hello"));
assertEquals(99162322, u.hashCode());
assertEquals(127791473, u.hashCode());
u.setByteLength(4);
assertEquals(3198781, u.hashCode());
assertEquals(4122302, u.hashCode());
}

@Test
Expand Down

0 comments on commit 1a2d200

Please sign in to comment.