Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,74 @@ private RegexpPatternConverterUtils() {
* Converts a LIKE pattern into REGEXP_LIKE pattern.
*/
public static String likeToRegexpLike(String likePattern) {
return "^" + escapeMetaCharacters(likePattern).replace('_', '.').replace("%", ".*") + "$";
int start = 0;
int end = likePattern.length();
String prefix = "^";
String suffix = "$";
switch (likePattern.length()) {
case 0:
return "^$";
case 1:
if (likePattern.charAt(0) == '%') {
return "";
}
break;
default:
if (likePattern.charAt(0) == '%') {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"%%" becomes "", which should be fine?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we plan to optimize something similar to

LIKE '%%%%%%%%%%%%%zz'
REGEXP_LIKE(col, '((((((.*)*)*)*)*)*)*zz')

listed in this blog

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we plan to optimize something similar to

I don't think this is the place to do that because we don't want to just optimize LIKE '%%%%%%%%%%%%%zz', we also want to optimize REGEXP_LIKE(col, '((((((.*)*)*)*)*)*)*zz').

I mean:

  1. we transform LIKE expressions into REGEXP_LIKE
  2. we let users to write their own REGEXP_LIKE expressions
  3. we know some regex in REGEXP_LIKE are dangerous

We should not focus on making 1. safe, we should focus in making 3. safe. Otherwise an attacker may not be able to use LIKE to create an attack but they could use REGEXP_LIKE.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"%%" becomes "", which should be fine?

% means matches any string with zero or more characters so LIKE('%%') will match with any text and therefore it should be equivalent to REGEXP_LIKE('')

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for the explanation. it just seems ok to recursively prune out leading/trailing % e.g. instead of
start = 1; we can do start equal to first non % char.

start = indexOfFirstDifferent(likePattern, '%');
if (start == -1) {
return "";
}
prefix = "";
}
if (likePattern.charAt(likePattern.length() - 1) == '%') {
end = indexOfLastDifferent(likePattern, '%');
if (end == -1) { //this should never happen, but for clarity
return "";
}
end++;
suffix = "";
}
break;
}

String escaped = escapeMetaCharacters(likePattern.substring(start, end));
StringBuilder sb = new StringBuilder(escaped.length() + 2);
sb.append(prefix);
sb.append(escaped);
sb.append(suffix);

int i = 0;
while (i < sb.length()) {
char c = sb.charAt(i);
if (c == '_') {
sb.replace(i, i + 1, ".");
} else if (c == '%') {
sb.replace(i, i + 1, ".*");
i++;
}
i++;
}

return sb.toString();
}

private static int indexOfFirstDifferent(String str, char character) {
for (int i = 0; i < str.length(); i++) {
if (str.charAt(i) != character) {
return i;
}
}
return -1;
}

private static int indexOfLastDifferent(String str, char character) {
for (int i = str.length() - 1; i >= 0; i--) {
if (str.charAt(i) != character) {
return i;
}
}
return -1;
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,23 +39,23 @@ public class RegexpPatternConverterUtilsTest {
@Test
public void testLeadingWildcard() {
String regexpLikePattern = RegexpPatternConverterUtils.likeToRegexpLike(LEADING_WILDCARD);
assertEquals(regexpLikePattern, "^.*\\+\\+$");
assertEquals(regexpLikePattern, "\\+\\+$");
String luceneRegExpPattern = RegexpPatternConverterUtils.regexpLikeToLuceneRegExp(regexpLikePattern);
assertEquals(luceneRegExpPattern, ".*\\+\\+");
}

@Test
public void testTrailingWildcard() {
String regexpLikePattern = RegexpPatternConverterUtils.likeToRegexpLike(TRAILING_WILDCARD);
assertEquals(regexpLikePattern, "^C\\+.*$");
assertEquals(regexpLikePattern, "^C\\+");
String luceneRegExpPattern = RegexpPatternConverterUtils.regexpLikeToLuceneRegExp(regexpLikePattern);
assertEquals(luceneRegExpPattern, "C\\+.*");
}

@Test
public void testBothSidesWildcard() {
String regexpLikePattern = RegexpPatternConverterUtils.likeToRegexpLike(BOTH_SIDES_WILDCARD);
assertEquals(regexpLikePattern, "^.*\\+.*$");
assertEquals(regexpLikePattern, "\\+");
String luceneRegExpPattern = RegexpPatternConverterUtils.regexpLikeToLuceneRegExp(regexpLikePattern);
assertEquals(luceneRegExpPattern, ".*\\+.*");
}
Expand Down Expand Up @@ -97,8 +97,32 @@ public void testSingleCharacterInMiddle() {
@Test
public void testCombinationPattern() {
String regexpLikePattern = RegexpPatternConverterUtils.likeToRegexpLike(COMBINATION_PATTERN);
assertEquals(regexpLikePattern, "^C..*$");
assertEquals(regexpLikePattern, "^C.");
String luceneRegExpPattern = RegexpPatternConverterUtils.regexpLikeToLuceneRegExp(regexpLikePattern);
assertEquals(luceneRegExpPattern, "C..*");
}

@Test
public void testLeadingRepeatedWildcards() {
String regexpLikePattern = RegexpPatternConverterUtils.likeToRegexpLike("%%%%%%%%%%%%%zz");
assertEquals(regexpLikePattern, "zz$");
}

@Test
public void testTrailingRepeatedWildcards() {
String regexpLikePattern = RegexpPatternConverterUtils.likeToRegexpLike("zz%%%%%%%%%%%%%");
assertEquals(regexpLikePattern, "^zz");
}

@Test
public void testLeadingSize2() {
String regexpLikePattern = RegexpPatternConverterUtils.likeToRegexpLike("%z");
assertEquals(regexpLikePattern, "z$");
}

@Test
public void testTrailingSize2() {
String regexpLikePattern = RegexpPatternConverterUtils.likeToRegexpLike("z%");
assertEquals(regexpLikePattern, "^z");
}
}