Skip to content

Commit

Permalink
Use CONV to apply proper mysql chunking (#983)
Browse files Browse the repository at this point in the history
WARNING - Need to be careful on deployment as any queries for an active bootstrap will be impacted
  • Loading branch information
jogrogan authored May 21, 2024
1 parent 8691fde commit 05eb6d1
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public class MySqlChunkedQueryManager implements ChunkedQueryManager {
private static final String SELECT_FROM = "SELECT * FROM ( ";

/** Generate base predicate for sharding keys to given number of partitions.
* Ex: MOD ( MD5 ( CONCAT ( K1, K2, K3 ) ) , 10 ) for a table with 3 keys {K1, K2, K3} and 10 partitions */
* Ex: MOD ( CONV ( MD5 ( CONCAT ( K1, K2, K3 ) ) , 16, 10 ) , 10 ) for a table with 3 keys {K1, K2, K3} and 10 partitions */
private static String generatePerPartitionHashPredicate(List<String> keys, int partitionCount) {
StringBuilder query = new StringBuilder();
int keyCount = keys.size();
Expand All @@ -31,15 +31,17 @@ private static String generatePerPartitionHashPredicate(List<String> keys, int p
}
query.append(" )");

// Wrap that with MOD and MD5 to generate a hash for sharding
// MOD ( MD5 ( CONCAT ( A, B, C ) ) , 10 )
// Wrap that with MOD, CONV, and MD5 to generate a hash for sharding
// MOD ( CONV ( MD5 ( CONCAT ( A, B, C ) ) , 16, 10 ) , 10 )
query.insert(0, "MD5 ( ").append(" )");
// 16, 10 converts from HEX to DEC
query.insert(0, "CONV ( ").append(" , 16, 10 )");
query.insert(0, "MOD ( ").append(" , ").append(partitionCount).append(" )");
return query.toString();
}

/** Generate predicate for filtering rows hashing to the assigned partitions :
* Ex: WHERE ( MOD ( MD5 ( CONCAT ( K1, K2, K3 ) ) , 10 ) IN (1 , 6 ) )
* Ex: WHERE ( MOD ( CONV ( MD5 ( CONCAT ( K1, K2, K3 ) ) , 16, 10 ) , 10 ) IN (1 , 6 ) )
* where 1 and 6 are the assigned partitions, 10 the partition count and, {K1, K2, K3} the keys of the table
*/
private static String generateFullPartitionHashPredicate(String perPartitionPredicate, List<Integer> partitions) {
Expand Down Expand Up @@ -119,7 +121,7 @@ public void prepareChunkedQuery(PreparedStatement stmt, List<Object> values) thr
// SELECT * FROM
// (
// SELECT * FROM TABLE
// ) nestedTab1 WHERE ( MOD ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 10 ) IN ( 2 , 5 ) )
// ) nestedTab1 WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) 16, 10 ) , 10 ) IN ( 2 , 5 ) )
// AND ( ( KEY1 > ? ) OR ( KEY1 = ? AND KEY2 > ? ) )
// ORDER BY KEY1 , KEY2
// ) as nestedTab2 LIMIT 10;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,13 @@ public void testSimpleKeySinglePartition() {
* (
* SELECT * FROM TABLE
* ) nestedTab1
* WHERE ( MOD ( MD5 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 3 ) )
* WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 3 ) )
* ORDER BY KEY1
* ) as nestedTab2 LIMIT 10;
*/
String firstExpected =
"SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1 "
+ "WHERE ( MOD ( MD5 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 3 ) ) ORDER BY KEY1 ) as nestedTab2 LIMIT 10";
+ "WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 3 ) ) ORDER BY KEY1 ) as nestedTab2 LIMIT 10";

/**
* SELECT * FROM
Expand All @@ -42,12 +42,12 @@ public void testSimpleKeySinglePartition() {
* (
* SELECT * FROM TABLE
* ) nestedTab1
* WHERE ( MOD ( MD5 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) )
* WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) )
* ORDER BY KEY1
* ) as nestedTab2 LIMIT 10;
*/
String chunkedExpected = "SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1 "
+ "WHERE ( MOD ( MD5 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) ) ORDER BY KEY1 ) as nestedTab2 LIMIT 10";
+ "WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) ) ORDER BY KEY1 ) as nestedTab2 LIMIT 10";
testQueryString(MANAGER, firstExpected, chunkedExpected, NESTED_QUERY, KEY, CHUNK_SIZE, PARTITION_COUNT, PARTITION);
}

Expand All @@ -64,11 +64,11 @@ public void testSimpleKeyMultiPartition() {
* (
* SELECT * FROM TABLE
* ) nestedTab1
* WHERE ( MOD ( MD5 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 2 , 5 ) ) ORDER BY KEY1
* WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 2 , 5 ) ) ORDER BY KEY1
* ) as nestedTab2 LIMIT 10;
*/
String firstExpected = "SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1 WHERE ( MOD ( MD5 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 2 , 5 ) ) "
+ "ORDER BY KEY1 ) as nestedTab2 LIMIT 10";
String firstExpected = "SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1 WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) "
+ "IN ( 2 , 5 ) ) ORDER BY KEY1 ) as nestedTab2 LIMIT 10";

/**
* SELECT * FROM
Expand All @@ -77,11 +77,11 @@ public void testSimpleKeyMultiPartition() {
* (
* SELECT * FROM TABLE
* ) nestedTab1
* WHERE ( MOD ( MD5 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 2 , 5 ) ) AND ( ( KEY1 > ? ) ) ORDER BY KEY1
* WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 2 , 5 ) ) AND ( ( KEY1 > ? ) ) ORDER BY KEY1
* ) as nestedTab2 LIMIT 10;
*/
String chunkedExpected = "SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1 "
+ "WHERE ( MOD ( MD5 ( CONCAT ( KEY1 ) ) , 10 ) IN ( 2 , 5 ) ) "
+ "WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 ) ) , 16, 10 ) , 10 ) IN ( 2 , 5 ) ) "
+ "AND ( ( KEY1 > ? ) ) ORDER BY KEY1 ) as nestedTab2 LIMIT 10";
testQueryString(MANAGER, firstExpected, chunkedExpected, NESTED_QUERY, KEY, CHUNK_SIZE, PARTITION_COUNT,
PARTITIONS);
Expand All @@ -96,12 +96,12 @@ public void testCompositeKeySinglePartition() {
* SELECT * FROM
* (
* SELECT * FROM TABLE
* ) nestedTab1 WHERE ( MOD ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 10 ) IN ( 3 ) )
* ) nestedTab1 WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 16, 10 ) , 10 ) IN ( 3 ) )
* ORDER BY KEY1 , KEY2
* ) as nestedTab2 LIMIT 10;
*/
String firstExpected = "SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1"
+ " WHERE ( MOD ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 10 ) IN ( 3 ) ) ORDER BY KEY1 , KEY2 ) as nestedTab2 LIMIT 10";
+ " WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 16, 10 ) , 10 ) IN ( 3 ) ) ORDER BY KEY1 , KEY2 ) as nestedTab2 LIMIT 10";

/**
* SELECT * FROM
Expand All @@ -110,12 +110,12 @@ public void testCompositeKeySinglePartition() {
* (
* SELECT * FROM TABLE
* ) nestedTab1
* WHERE ( MOD ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) OR ( KEY1 = ? AND KEY2 > ? ) )
* WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 16, 10 ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) OR ( KEY1 = ? AND KEY2 > ? ) )
* ORDER BY KEY1 , KEY2
* ) as nestedTab2 LIMIT 10;
*/
String chunkedExpected = "SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1 "
+ "WHERE ( MOD ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) OR ( KEY1 = ? AND KEY2 > ? ) ) "
+ "WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 16, 10 ) , 10 ) IN ( 3 ) ) AND ( ( KEY1 > ? ) OR ( KEY1 = ? AND KEY2 > ? ) ) "
+ "ORDER BY KEY1 , KEY2 ) as nestedTab2 LIMIT 10";
testQueryString(MANAGER, firstExpected, chunkedExpected, NESTED_QUERY, KEYS, CHUNK_SIZE, PARTITION_COUNT,
PARTITION);
Expand All @@ -130,12 +130,12 @@ public void testCompositeKeyMultiPartition() {
* SELECT * FROM
* (
* SELECT * FROM TABLE
* ) nestedTab1 WHERE ( MOD ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 10 ) IN ( 2 , 5 ) )
* ) nestedTab1 WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 16, 10 ) , 10 ) IN ( 2 , 5 ) )
* ORDER BY KEY1 , KEY2
* ) as nestedTab2 LIMIT 10;
*/
String firstExpected = "SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1 "
+ "WHERE ( MOD ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 10 ) IN ( 2 , 5 ) ) "
+ "WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 16, 10 ) , 10 ) IN ( 2 , 5 ) ) "
+ "ORDER BY KEY1 , KEY2 ) as nestedTab2 LIMIT 10";

/**
Expand All @@ -145,13 +145,13 @@ public void testCompositeKeyMultiPartition() {
* SELECT * FROM
* (
* SELECT * FROM TABLE
* ) nestedTab1 WHERE ( MOD ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 10 ) IN ( 2 , 5 ) )
* ) nestedTab1 WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 16, 10 ) , 10 ) IN ( 2 , 5 ) )
* AND ( ( KEY1 > ? ) OR ( KEY1 = ? AND KEY2 > ? ) )
* ORDER BY KEY1 , KEY2
* ) as nestedTab2 LIMIT 10;
*/
String chunkedExpected = "SELECT * FROM ( SELECT * FROM ( SELECT * FROM TABLE ) nestedTab1 "
+ "WHERE ( MOD ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 10 ) IN ( 2 , 5 ) ) "
+ "WHERE ( MOD ( CONV ( MD5 ( CONCAT ( KEY1 , KEY2 ) ) , 16, 10 ) , 10 ) IN ( 2 , 5 ) ) "
+ "AND ( ( KEY1 > ? ) OR ( KEY1 = ? AND KEY2 > ? ) ) ORDER BY KEY1 , KEY2 ) as nestedTab2 LIMIT 10";
testQueryString(MANAGER, firstExpected, chunkedExpected, NESTED_QUERY, KEYS, CHUNK_SIZE, PARTITION_COUNT,
PARTITIONS);
Expand Down

0 comments on commit 05eb6d1

Please sign in to comment.