Skip to content

[SPARK-4061][SQL] We cannot use EOL character in the operand of LIKE predicate. #2908

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dev/run-tests
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ CURRENT_BLOCK=$BLOCK_SPARK_UNIT_TESTS
if [ -n "$_SQL_TESTS_ONLY" ]; then
# This must be an array of individual arguments. Otherwise, having one long string
#+ will be interpreted as a single test, which doesn't work.
SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test" "hive-thriftserver/test")
SBT_MAVEN_TEST_ARGS=("catalyst/test" "sql/test" "hive/test")
else
SBT_MAVEN_TEST_ARGS=("test")
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,31 +102,27 @@ case class Like(left: Expression, right: Expression)

// replace the _ with .{1} exactly match 1 time of any character
// replace the % with .*, match 0 or more times with any character
override def escape(v: String) = {
val sb = new StringBuilder()
var i = 0;
while (i < v.length) {
// Make a special case for "\\_" and "\\%"
val n = v.charAt(i);
if (n == '\\' && i + 1 < v.length && (v.charAt(i + 1) == '_' || v.charAt(i + 1) == '%')) {
sb.append(v.charAt(i + 1))
i += 1
} else {
if (n == '_') {
sb.append(".");
} else if (n == '%') {
sb.append(".*");
} else {
sb.append(Pattern.quote(Character.toString(n)));
}
}

i += 1
override def escape(v: String) =
if (!v.isEmpty) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, good catch for the empty case...

"(?s)" + (' ' +: v.init).zip(v).flatMap {
case (prev, '\\') => ""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this branch?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I see, we need to remove leading \\.

case ('\\', c) =>
c match {
case '_' => "_"
case '%' => "%"
case _ => Pattern.quote("\\" + c)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe we don't need the "\\" here. Otherwise "\\x" will be transformed into "\\\\x".

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for my previous wrong and misleading sample code, you're correct :)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No problem, thanks for your suggestion.

}
case (prev, c) =>
c match {
case '_' => "."
case '%' => ".*"
case _ => Pattern.quote(Character.toString(c))
}
}.mkString
} else {
v
}

sb.toString()
}

override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("abc" like "a%", true)
checkEvaluation("abc" like "b%", false)
checkEvaluation("abc" like "bc%", false)
checkEvaluation("a\nb" like "a_b", true)
checkEvaluation("ab" like "a%b", true)
checkEvaluation("a\nb" like "a%b", true)
}

test("LIKE Non-literal Regular Expression") {
Expand All @@ -207,6 +210,9 @@ class ExpressionEvaluationSuite extends FunSuite {
checkEvaluation("abc" like regEx, true, new GenericRow(Array[Any]("a%")))
checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("b%")))
checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("bc%")))
checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a_b")))
checkEvaluation("ab" like regEx, true, new GenericRow(Array[Any]("a%b")))
checkEvaluation("a\nb" like regEx, true, new GenericRow(Array[Any]("a%b")))

checkEvaluation(Literal(null, StringType) like regEx, null, new GenericRow(Array[Any]("bc%")))
}
Expand Down