Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add faster scaling composite hash value encoding for remote path #13155

Merged
merged 7 commits into from
Apr 16, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Incoporate PR review feedback
Signed-off-by: Ashish Singh <ssashish@amazon.com>
  • Loading branch information
ashking94 committed Apr 15, 2024
commit 122bcd3832a3aa222321b1f6b36a76a38cac5895
Original file line number Diff line number Diff line change
Expand Up @@ -272,13 +272,13 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
.get();
assertEquals(RestStatus.ACCEPTED, restoreSnapshotResponse.status());
ensureGreen(restoredIndexName1version2);
validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_COMPOSITE);
validatePathType(restoredIndexName1version2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_COMPOSITE_1);

// Create index with cluster setting cluster.remote_store.index.path.type as hashed_prefix.
indexSettings = getIndexSettings(1, 0).build();
createIndex(indexName2, indexSettings);
ensureGreen(indexName2);
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_COMPOSITE);
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_COMPOSITE_1);

// Validating that custom data has not changed for indexes which were created before the cluster setting got updated
validatePathType(indexName1, PathType.FIXED);
Expand Down Expand Up @@ -309,7 +309,7 @@ public void testRemoteStoreCustomDataOnIndexCreationAndRestore() {
ensureGreen(indexName2);

// Validating that custom data has not changed for testindex2 which was created before the cluster setting got updated
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_COMPOSITE);
validatePathType(indexName2, PathType.HASHED_PREFIX, PathHashAlgorithm.FNV_1A_COMPOSITE_1);
}

private void validatePathType(String index, PathType pathType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import static java.util.Collections.unmodifiableMap;
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.DATA;
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA;
import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeUrlBase64AndBinaryEncodingUsing20Bits;
import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeBase64AndBinaryEncoding;
import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64;

/**
Expand Down Expand Up @@ -231,13 +231,13 @@ String hash(PathInput pathInput) {
* This hash algorithm will generate a hash value which will use 1st 6 bits to create bas64 character and next 14
* bits to create binary string.
*/
FNV_1A_COMPOSITE(1) {
FNV_1A_COMPOSITE_1(1) {
@Override
String hash(PathInput pathInput) {
String input = pathInput.indexUUID() + pathInput.shardId() + pathInput.dataCategory().getName() + pathInput.dataType()
.getName();
long hash = FNV1a.hash64(input);
return longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(hash);
return longToCompositeBase64AndBinaryEncoding(hash);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,9 @@

import org.opensearch.common.collect.Tuple;

import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
Expand All @@ -32,17 +30,7 @@ public class RemoteStoreUtils {
/**
* URL safe base 64 character set. This must not be changed as this is used in deriving the base64 equivalent of binary.
*/
private static final char[] URL_BASE64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".toCharArray();

private static Map<Character, Integer> BASE64_CHARSET_IDX_MAP;

static {
Map<Character, Integer> charToIndexMap = new HashMap<>();
for (int i = 0; i < URL_BASE64_CHARSET.length; i++) {
charToIndexMap.put(URL_BASE64_CHARSET[i], i);
}
BASE64_CHARSET_IDX_MAP = Collections.unmodifiableMap(charToIndexMap);
}
static final char[] URL_BASE64_CHARSET = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_".toCharArray();

/**
* This method subtracts given numbers from Long.MAX_VALUE and returns a string representation of the result.
Expand Down Expand Up @@ -147,14 +135,14 @@ static long urlBase64ToLong(String base64Str) {
* 1. Base 64 string and 2. Binary String. We will use the first 6 bits for creating the base 64 string.
* For the second part, we will use the next 14 bits. For eg - A010001010100010.
*/
static String longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(long value) {
static String longToCompositeBase64AndBinaryEncoding(long value) {
return longToCompositeBase64AndBinaryEncoding(value, 20);
}

/**
* Converts an input hash which occupies 64 bits of memory into a composite encoded string. The string will have 2 parts -
* 1. Base 64 string and 2. Binary String. We will use the first 6 bits for creating the base 64 string.
* For the second part, the rest of the bits will be used as is in string form.
* For the second part, the rest of the bits (of length {@code len}-6) will be used as is in string form.
*/
static String longToCompositeBase64AndBinaryEncoding(long value, int len) {
if (len < 7 || len > 64) {
Expand All @@ -167,12 +155,4 @@ static String longToCompositeBase64AndBinaryEncoding(long value, int len) {
assert base64DecimalValue >= 0 && base64DecimalValue < 64;
return URL_BASE64_CHARSET[base64DecimalValue] + binaryPart;
}

static long compositeUrlBase64BinaryEncodingToLong(String encodedValue) {
char ch = encodedValue.charAt(0);
int base64BitsIntValue = BASE64_CHARSET_IDX_MAP.get(ch);
String base64PartBinary = Integer.toBinaryString(base64BitsIntValue);
String binaryString = base64PartBinary + encodedValue.substring(1);
return new BigInteger(binaryString, 2).longValue();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ public class IndicesService extends AbstractLifecycleComponent
*/
public static final Setting<PathHashAlgorithm> CLUSTER_REMOTE_STORE_PATH_HASH_ALGORITHM_SETTING = new Setting<>(
"cluster.remote_store.index.path.hash_algorithm",
PathHashAlgorithm.FNV_1A_COMPOSITE.toString(),
PathHashAlgorithm.FNV_1A_COMPOSITE_1.toString(),
PathHashAlgorithm::parseString,
Property.NodeScope,
Property.Dynamic
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1711,7 +1711,7 @@ public void testRemoteCustomData() {
validateRemoteCustomData(
indexMetadata.getCustomData(IndexMetadata.REMOTE_STORE_CUSTOM_KEY),
PathHashAlgorithm.NAME,
PathHashAlgorithm.FNV_1A_COMPOSITE.name()
PathHashAlgorithm.FNV_1A_COMPOSITE_1.name()
);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.LOCK_FILES;
import static org.opensearch.index.remote.RemoteStoreEnums.DataType.METADATA;
import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A_BASE64;
import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE;
import static org.opensearch.index.remote.RemoteStoreEnums.PathHashAlgorithm.FNV_1A_COMPOSITE_1;
import static org.opensearch.index.remote.RemoteStoreEnums.PathType.FIXED;
import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_INFIX;
import static org.opensearch.index.remote.RemoteStoreEnums.PathType.HASHED_PREFIX;
Expand Down Expand Up @@ -330,10 +330,12 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
BlobPath result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE);
BlobPath result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1);
assertTrue(
result.buildAsString()
.startsWith(String.join(SEPARATOR, FNV_1A_COMPOSITE.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
.startsWith(
String.join(SEPARATOR, FNV_1A_COMPOSITE_1.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())
)
);

// assert with exact value for known base path
Expand All @@ -347,7 +349,7 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE);
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1);
assertEquals("D10000001001000/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/data/", result.buildAsString());

// Translog Metadata
Expand All @@ -359,10 +361,12 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE);
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1);
assertTrue(
result.buildAsString()
.startsWith(String.join(SEPARATOR, FNV_1A_COMPOSITE.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
.startsWith(
String.join(SEPARATOR, FNV_1A_COMPOSITE_1.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())
)
);

// assert with exact value for known base path
Expand All @@ -373,7 +377,7 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE);
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1);
assertEquals(
"o00101001010011/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/translog/metadata/",
result.buildAsString()
Expand Down Expand Up @@ -410,10 +414,12 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE);
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1);
assertTrue(
result.buildAsString()
.startsWith(String.join(SEPARATOR, FNV_1A_COMPOSITE.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
.startsWith(
String.join(SEPARATOR, FNV_1A_COMPOSITE_1.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())
)
);

// assert with exact value for known base path
Expand All @@ -424,7 +430,7 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE);
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1);
assertEquals("A01010000000101/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/data/", result.buildAsString());

// Segment Metadata
Expand All @@ -436,10 +442,12 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE);
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1);
assertTrue(
result.buildAsString()
.startsWith(String.join(SEPARATOR, FNV_1A_COMPOSITE.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
.startsWith(
String.join(SEPARATOR, FNV_1A_COMPOSITE_1.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())
)
);

// assert with exact value for known base path
Expand All @@ -450,7 +458,7 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE);
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1);
assertEquals(
"e10101111000001/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/metadata/",
result.buildAsString()
Expand All @@ -465,10 +473,12 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE);
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1);
assertTrue(
result.buildAsString()
.startsWith(String.join(SEPARATOR, FNV_1A_COMPOSITE.hash(pathInput), basePath, dataCategory.getName(), dataType.getName()))
.startsWith(
String.join(SEPARATOR, FNV_1A_COMPOSITE_1.hash(pathInput), basePath, dataCategory.getName(), dataType.getName())
)
);

// assert with exact value for known base path
Expand All @@ -479,7 +489,7 @@ public void testGeneratePathForHashedPrefixTypeAndFNVCompositeHashAlgorithm() {
.dataCategory(dataCategory)
.dataType(dataType)
.build();
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE);
result = HASHED_PREFIX.path(pathInput, FNV_1A_COMPOSITE_1);
assertEquals(
"K01111001100000/xjsdhj/ddjsha/yudy7sd/32hdhua7/89jdij/k2ijhe877d7yuhx7/10/segments/lock_files/",
result.buildAsString()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,14 +62,14 @@ public void testGetStrategy() {
clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
resolver = new RemoteStorePathStrategyResolver(clusterSettings, () -> Version.CURRENT);
assertEquals(PathType.HASHED_PREFIX, resolver.get().getType());
assertEquals(PathHashAlgorithm.FNV_1A_COMPOSITE, resolver.get().getHashAlgorithm());
assertEquals(PathHashAlgorithm.FNV_1A_COMPOSITE_1, resolver.get().getHashAlgorithm());

// HASHED_PREFIX type with FNV_1A_COMPOSITE
settings = Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), PathType.HASHED_PREFIX).build();
clusterSettings = new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS);
resolver = new RemoteStorePathStrategyResolver(clusterSettings, () -> Version.CURRENT);
assertEquals(PathType.HASHED_PREFIX, resolver.get().getType());
assertEquals(PathHashAlgorithm.FNV_1A_COMPOSITE, resolver.get().getHashAlgorithm());
assertEquals(PathHashAlgorithm.FNV_1A_COMPOSITE_1, resolver.get().getHashAlgorithm());

// HASHED_PREFIX type with FNV_1A_BASE64
settings = Settings.builder()
Expand Down Expand Up @@ -106,7 +106,7 @@ public void testGetStrategyWithDynamicUpdate() {
Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), PathType.HASHED_PREFIX).build()
);
assertEquals(PathType.HASHED_PREFIX, resolver.get().getType());
assertEquals(PathHashAlgorithm.FNV_1A_COMPOSITE, resolver.get().getHashAlgorithm());
assertEquals(PathHashAlgorithm.FNV_1A_COMPOSITE_1, resolver.get().getHashAlgorithm());

// Set HASHED_PREFIX with FNV_1A_BASE64 hash algorithm
clusterSettings.applySettings(
Expand All @@ -123,7 +123,7 @@ public void testGetStrategyWithDynamicUpdate() {
Settings.builder().put(CLUSTER_REMOTE_STORE_PATH_TYPE_SETTING.getKey(), PathType.HASHED_INFIX).build()
);
assertEquals(PathType.HASHED_INFIX, resolver.get().getType());
assertEquals(PathHashAlgorithm.FNV_1A_COMPOSITE, resolver.get().getHashAlgorithm());
assertEquals(PathHashAlgorithm.FNV_1A_COMPOSITE_1, resolver.get().getHashAlgorithm());

// Set HASHED_INFIX with FNV_1A_BASE64 hash algorithm
clusterSettings.applySettings(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@
import org.opensearch.index.translog.transfer.TranslogTransferMetadata;
import org.opensearch.test.OpenSearchTestCase;

import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

import static org.opensearch.index.remote.RemoteStoreUtils.compositeUrlBase64BinaryEncodingToLong;
import static org.opensearch.index.remote.RemoteStoreUtils.URL_BASE64_CHARSET;
import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeBase64AndBinaryEncoding;
import static org.opensearch.index.remote.RemoteStoreUtils.longToCompositeUrlBase64AndBinaryEncodingUsing20Bits;
import static org.opensearch.index.remote.RemoteStoreUtils.longToUrlBase64;
import static org.opensearch.index.remote.RemoteStoreUtils.urlBase64ToLong;
import static org.opensearch.index.remote.RemoteStoreUtils.verifyNoMultipleWriters;
Expand All @@ -32,6 +34,16 @@

public class RemoteStoreUtilsTests extends OpenSearchTestCase {

private static Map<Character, Integer> BASE64_CHARSET_IDX_MAP;

static {
Map<Character, Integer> charToIndexMap = new HashMap<>();
for (int i = 0; i < URL_BASE64_CHARSET.length; i++) {
charToIndexMap.put(URL_BASE64_CHARSET[i], i);
}
BASE64_CHARSET_IDX_MAP = Collections.unmodifiableMap(charToIndexMap);
}

private final String metadataFilename = RemoteSegmentStoreDirectory.MetadataFilenameUtils.getMetadataFilename(
12,
23,
Expand Down Expand Up @@ -247,7 +259,7 @@ public void testLongToCompositeUrlBase64AndBinaryEncodingUsing20Bits() {
"610010010111111"
);
for (Map.Entry<Long, String> entry : longToExpectedBase64String.entrySet()) {
String base64Str = longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(entry.getKey());
String base64Str = RemoteStoreUtils.longToCompositeBase64AndBinaryEncoding(entry.getKey());
assertEquals(entry.getValue(), base64Str);
assertEquals(15, entry.getValue().length());
assertEquals(longToUrlBase64(entry.getKey()).charAt(0), base64Str.charAt(0));
Expand All @@ -256,7 +268,7 @@ public void testLongToCompositeUrlBase64AndBinaryEncodingUsing20Bits() {
int iters = randomInt(1000);
for (int i = 0; i < iters; i++) {
long value = randomLong();
assertEquals(longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(value).charAt(0), longToUrlBase64(value).charAt(0));
assertEquals(RemoteStoreUtils.longToCompositeBase64AndBinaryEncoding(value).charAt(0), longToUrlBase64(value).charAt(0));
}
}

Expand Down Expand Up @@ -290,7 +302,7 @@ public void testLongToCompositeUrlBase64AndBinaryEncoding() {
assertEquals(expectedCompositeEncoding, actualCompositeEncoding);
assertEquals(59, expectedCompositeEncoding.length());
assertEquals(longToUrlBase64(entry.getKey()).charAt(0), actualCompositeEncoding.charAt(0));
assertEquals(longToCompositeUrlBase64AndBinaryEncodingUsing20Bits(hashValue), actualCompositeEncoding.substring(0, 15));
assertEquals(RemoteStoreUtils.longToCompositeBase64AndBinaryEncoding(hashValue), actualCompositeEncoding.substring(0, 15));

Long computedHashValue = compositeUrlBase64BinaryEncodingToLong(actualCompositeEncoding);
assertEquals(hashValue, computedHashValue);
Expand All @@ -303,4 +315,12 @@ public void testLongToCompositeUrlBase64AndBinaryEncoding() {
assertEquals(value, compositeUrlBase64BinaryEncodingToLong(compositeEncoding));
}
}

static long compositeUrlBase64BinaryEncodingToLong(String encodedValue) {
char ch = encodedValue.charAt(0);
int base64BitsIntValue = BASE64_CHARSET_IDX_MAP.get(ch);
String base64PartBinary = Integer.toBinaryString(base64BitsIntValue);
String binaryString = base64PartBinary + encodedValue.substring(1);
return new BigInteger(binaryString, 2).longValue();
}
}
Loading
Loading