-
Notifications
You must be signed in to change notification settings - Fork 3k
Core: Make namespace separator configurable #10877
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,29 +26,33 @@ | |
| import org.apache.iceberg.relocated.com.google.common.base.Joiner; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Preconditions; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Splitter; | ||
| import org.apache.iceberg.relocated.com.google.common.base.Strings; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; | ||
| import org.apache.iceberg.relocated.com.google.common.collect.Iterables; | ||
| import org.apache.iceberg.util.PropertyUtil; | ||
| import org.apache.iceberg.util.UUIDUtil; | ||
|
|
||
| @SuppressWarnings("UnicodeEscape") | ||
| public class RESTUtil { | ||
| private static final char NAMESPACE_SEPARATOR = '\u001f'; | ||
| private static final String NAMESPACE_ESCAPED_SEPARATOR = "%1F"; | ||
| private static final Joiner NAMESPACE_ESCAPED_JOINER = Joiner.on(NAMESPACE_ESCAPED_SEPARATOR); | ||
| private static final Splitter NAMESPACE_ESCAPED_SPLITTER = | ||
| Splitter.on(NAMESPACE_ESCAPED_SEPARATOR); | ||
| /** The namespace separator as Unicode character */ | ||
| private static final char NAMESPACE_SEPARATOR_AS_UNICODE = '\u001f'; | ||
|
|
||
| /** The namespace separator as url encoded UTF-8 character */ | ||
| static final String NAMESPACE_SEPARATOR_URLENCODED_UTF_8 = "%1F"; | ||
|
|
||
| /** | ||
| * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link | ||
| * RESTUtil#namespaceToQueryParam(Namespace)}} instead. | ||
| */ | ||
| @Deprecated public static final Joiner NAMESPACE_JOINER = Joiner.on(NAMESPACE_SEPARATOR); | ||
| @Deprecated | ||
| public static final Joiner NAMESPACE_JOINER = Joiner.on(NAMESPACE_SEPARATOR_AS_UNICODE); | ||
|
|
||
| /** | ||
| * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link | ||
| * RESTUtil#namespaceFromQueryParam(String)} instead. | ||
| */ | ||
| @Deprecated public static final Splitter NAMESPACE_SPLITTER = Splitter.on(NAMESPACE_SEPARATOR); | ||
| @Deprecated | ||
| public static final Splitter NAMESPACE_SPLITTER = Splitter.on(NAMESPACE_SEPARATOR_AS_UNICODE); | ||
|
|
||
| public static final String IDEMPOTENCY_KEY_HEADER = "Idempotency-Key"; | ||
|
|
||
|
|
@@ -179,8 +183,31 @@ public static String decodeString(String encoded) { | |
| * separated using the unicode character '\u001f' | ||
| */ | ||
| public static String namespaceToQueryParam(Namespace namespace) { | ||
| return namespaceToQueryParam(namespace, String.valueOf(NAMESPACE_SEPARATOR_AS_UNICODE)); | ||
| } | ||
|
|
||
| /** | ||
| * This converts the given namespace to a string and separates each part in a multipart namespace | ||
| * using the provided unicode separator. Note that this method is different from {@link | ||
| * RESTUtil#encodeNamespace(Namespace)}, which uses a UTF-8 escaped separator. | ||
| * | ||
| * <p>{@link #namespaceFromQueryParam(String, String)} should be used to convert the namespace | ||
| * string back to a {@link Namespace} instance. | ||
| * | ||
| * @param namespace The namespace to convert | ||
| * @param unicodeNamespaceSeparator The unicode namespace separator to use, such as '\u002e' | ||
| * @return The namespace converted to a string where each part in a multipart namespace is | ||
| * separated using the given unicode separator | ||
| */ | ||
| public static String namespaceToQueryParam( | ||
| Namespace namespace, String unicodeNamespaceSeparator) { | ||
| Preconditions.checkArgument(null != namespace, "Invalid namespace: null"); | ||
| return RESTUtil.NAMESPACE_JOINER.join(namespace.levels()); | ||
| Preconditions.checkArgument( | ||
| !Strings.isNullOrEmpty(unicodeNamespaceSeparator), "Invalid separator: null or empty"); | ||
|
|
||
| // decode in case the separator was already encoded with UTF-8 | ||
| String separator = URLDecoder.decode(unicodeNamespaceSeparator, StandardCharsets.UTF_8); | ||
| return Joiner.on(separator).join(namespace.levels()); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -191,13 +218,39 @@ public static String namespaceToQueryParam(Namespace namespace) { | |
| * a string. | ||
| * | ||
| * @param namespace The namespace to convert | ||
| * @return The namespace instance from the given namespace string, where each multipart separator | ||
| * ('\u001f') is converted to a separate namespace level | ||
| * @return The namespace instance from the given namespace string, where each part in a multipart | ||
| * namespace is converted using the unicode separator '\u001f' | ||
| */ | ||
| public static Namespace namespaceFromQueryParam(String namespace) { | ||
| return namespaceFromQueryParam(namespace, String.valueOf(NAMESPACE_SEPARATOR_AS_UNICODE)); | ||
| } | ||
|
|
||
| /** | ||
| * This converts a namespace where each part in a multipart namespace has been separated using the | ||
| * provided unicode separator to its original {@link Namespace} instance. | ||
| * | ||
| * <p>{@link #namespaceToQueryParam(Namespace, String)} should be used to convert the {@link | ||
| * Namespace} to a string. | ||
| * | ||
| * @param namespace The namespace to convert | ||
| * @param unicodeNamespaceSeparator The unicode namespace separator to use, such as '\u002e' | ||
| * @return The namespace instance from the given namespace string, where each part in a multipart | ||
| * namespace is converted using the given unicode namespace separator | ||
| */ | ||
| public static Namespace namespaceFromQueryParam( | ||
| String namespace, String unicodeNamespaceSeparator) { | ||
| Preconditions.checkArgument(null != namespace, "Invalid namespace: null"); | ||
| return Namespace.of( | ||
| RESTUtil.NAMESPACE_SPLITTER.splitToStream(namespace).toArray(String[]::new)); | ||
| Preconditions.checkArgument( | ||
| !Strings.isNullOrEmpty(unicodeNamespaceSeparator), "Invalid separator: null or empty"); | ||
|
|
||
| // decode in case the separator was already encoded with UTF-8 | ||
| String separator = URLDecoder.decode(unicodeNamespaceSeparator, StandardCharsets.UTF_8); | ||
| Splitter splitter = | ||
| namespace.contains(String.valueOf(NAMESPACE_SEPARATOR_AS_UNICODE)) | ||
| ? Splitter.on(NAMESPACE_SEPARATOR_AS_UNICODE) | ||
| : Splitter.on(separator); | ||
|
|
||
| return Namespace.of(splitter.splitToStream(namespace).toArray(String[]::new)); | ||
| } | ||
|
|
||
| /** | ||
|
|
@@ -210,17 +263,40 @@ public static Namespace namespaceFromQueryParam(String namespace) { | |
| * | ||
| * @param ns namespace to encode | ||
| * @return UTF-8 encoded string representing the namespace, suitable for use as a URL parameter | ||
| * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link | ||
| * RESTUtil#encodeNamespace(Namespace, String)} instead. | ||
| */ | ||
| @Deprecated | ||
| public static String encodeNamespace(Namespace ns) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we need this util method any more ? i wonder if we should mark it deprecated or explicity mention this will encoded with a certain seperator in the public doc ?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah yes, we should deprecate this one |
||
| Preconditions.checkArgument(ns != null, "Invalid namespace: null"); | ||
| String[] levels = ns.levels(); | ||
| return encodeNamespace(ns, NAMESPACE_SEPARATOR_URLENCODED_UTF_8); | ||
| } | ||
|
|
||
| /** | ||
| * Returns a String representation of a namespace that is suitable for use in a URL / URI. | ||
nastra marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * | ||
| * <p>This function needs to be called when a namespace is used as a path variable (or query | ||
| * parameter etc.), to format the namespace per the spec. | ||
nastra marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * | ||
| * <p>{@link RESTUtil#decodeNamespace(String, String)} should be used to parse the namespace from | ||
| * a URL parameter. | ||
| * | ||
| * @param namespace namespace to encode | ||
| * @param separator The namespace separator to be used for encoding. The separator will be used | ||
| * as-is and won't be UTF-8 encoded. | ||
| * @return UTF-8 encoded string representing the namespace, suitable for use as a URL parameter | ||
nastra marked this conversation as resolved.
Show resolved
Hide resolved
nastra marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| */ | ||
| public static String encodeNamespace(Namespace namespace, String separator) { | ||
| Preconditions.checkArgument(namespace != null, "Invalid namespace: null"); | ||
| Preconditions.checkArgument( | ||
| !Strings.isNullOrEmpty(separator), "Invalid separator: null or empty"); | ||
| String[] levels = namespace.levels(); | ||
| String[] encodedLevels = new String[levels.length]; | ||
|
|
||
| for (int i = 0; i < levels.length; i++) { | ||
| encodedLevels[i] = encodeString(levels[i]); | ||
| } | ||
|
|
||
| return NAMESPACE_ESCAPED_JOINER.join(encodedLevels); | ||
| return Joiner.on(separator).join(encodedLevels); | ||
nastra marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| } | ||
|
|
||
| /** | ||
|
|
@@ -231,10 +307,38 @@ public static String encodeNamespace(Namespace ns) { | |
| * | ||
| * @param encodedNs a namespace to decode | ||
| * @return a namespace | ||
| * @deprecated since 1.11.0, will be removed in 1.12.0; use {@link | ||
| * RESTUtil#decodeNamespace(String, String)} instead. | ||
| */ | ||
| @Deprecated | ||
| public static Namespace decodeNamespace(String encodedNs) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This function looks dangerous now.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you please elaborate what you mean by "dangerous" here? Nothing really changed for callers of this method
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Mixing this and the new function.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's still not clear to my why that would be "dangerous". There is no behavioral change for existing callers of this method. Can you please add a concrete example/explanation to justify the "dangerous" part?
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Users can mix both - causing "confusion".
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think that argument is actually true. Users won't be using this code as this code is used on the client and the server but not by users in the classical sense of a user that uses Iceberg. For engine/catalog implementers the javadoc states that one should have used |
||
| Preconditions.checkArgument(encodedNs != null, "Invalid namespace: null"); | ||
| String[] levels = Iterables.toArray(NAMESPACE_ESCAPED_SPLITTER.split(encodedNs), String.class); | ||
| return decodeNamespace(encodedNs, NAMESPACE_SEPARATOR_URLENCODED_UTF_8); | ||
| } | ||
|
|
||
| /** | ||
| * Takes in a string representation of a namespace as used for a URL parameter and returns the | ||
nastra marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * corresponding namespace. | ||
| * | ||
| * <p>See also {@link #encodeNamespace} for generating correctly formatted URLs. | ||
| * | ||
| * @param encodedNamespace a namespace to decode | ||
| * @param separator The namespace separator to be used as-is for decoding. This should be the same | ||
| * separator that was used when calling {@link RESTUtil#encodeNamespace(Namespace, String)} | ||
| * @return a namespace | ||
| */ | ||
| public static Namespace decodeNamespace(String encodedNamespace, String separator) { | ||
| Preconditions.checkArgument(encodedNamespace != null, "Invalid namespace: null"); | ||
| Preconditions.checkArgument( | ||
| !Strings.isNullOrEmpty(separator), "Invalid separator: null or empty"); | ||
|
|
||
| // use legacy splitter for backwards compatibility in case an old clients encoded the namespace | ||
| // with %1F | ||
| Splitter splitter = | ||
| Splitter.on( | ||
| encodedNamespace.contains(NAMESPACE_SEPARATOR_URLENCODED_UTF_8) | ||
| ? NAMESPACE_SEPARATOR_URLENCODED_UTF_8 | ||
| : separator); | ||
| String[] levels = Iterables.toArray(splitter.split(encodedNamespace), String.class); | ||
|
|
||
| // Decode levels in place | ||
| for (int i = 0; i < levels.length; i++) { | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.