-
Notifications
You must be signed in to change notification settings - Fork 985
DRILL-5450: Fix initcap function to convert upper case characters cor… #821
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,17 +19,26 @@ | |
|
|
||
| import static org.junit.Assert.assertTrue; | ||
|
|
||
| import mockit.Mock; | ||
| import mockit.MockUp; | ||
| import mockit.integration.junit4.JMockit; | ||
| import org.apache.calcite.util.ConversionUtil; | ||
| import org.apache.calcite.util.Util; | ||
| import org.apache.commons.io.FileUtils; | ||
| import org.apache.drill.BaseTestQuery; | ||
| import org.apache.drill.exec.util.Text; | ||
| import org.junit.Ignore; | ||
| import org.junit.Test; | ||
|
|
||
| import com.google.common.collect.ImmutableList; | ||
| import org.junit.runner.RunWith; | ||
|
|
||
| import java.io.BufferedWriter; | ||
| import java.io.File; | ||
| import java.io.FileWriter; | ||
| import java.nio.charset.Charset; | ||
|
|
||
| @RunWith(JMockit.class) | ||
| public class TestStringFunctions extends BaseTestQuery { | ||
|
|
||
| @Test | ||
|
|
@@ -38,14 +47,14 @@ public void testStrPosMultiByte() throws Exception { | |
| .sqlQuery("select `position`('a', 'abc') res1 from (values(1))") | ||
| .ordered() | ||
| .baselineColumns("res1") | ||
| .baselineValues(1l) | ||
| .baselineValues(1L) | ||
| .go(); | ||
|
|
||
| testBuilder() | ||
| .sqlQuery("select `position`('\\u11E9', '\\u11E9\\u0031') res1 from (values(1))") | ||
| .ordered() | ||
| .baselineColumns("res1") | ||
| .baselineValues(1l) | ||
| .baselineValues(1L) | ||
| .go(); | ||
| } | ||
|
|
||
|
|
@@ -308,4 +317,84 @@ public void testReverseLongVarChars() throws Exception { | |
| FileUtils.deleteQuietly(path); | ||
| } | ||
| } | ||
|
|
||
| @Test | ||
| public void testLower() throws Exception { | ||
| testBuilder() | ||
| .sqlQuery("select\n" + | ||
| "lower('ABC') col_upper,\n" + | ||
| "lower('abc') col_lower,\n" + | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add tests for Greek and Cyrillic. Our source encoding is UTF-8, so you can enter the characters directly. Or, if that does not work, you can instead use the Java Unicode encoding: U1234. If the tests fail because of parsing of SQL, please file a bug. If they fail because the function above does not support UTF-8, please file a different bug. In either case, you can then comment out the test cases and add a comment that says that they fail due to DRILL-xxxx, whatever your bug number turns out to be.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created Jira DRILL-5477 and added appropriate unit test which is ignored for now. |
||
| "lower('AbC aBc') col_space,\n" + | ||
| "lower('123ABC$!abc123.') as col_special,\n" + | ||
| "lower('') as col_empty,\n" + | ||
| "lower(cast(null as varchar(10))) as col_null\n" + | ||
| "from (values(1))") | ||
| .unOrdered() | ||
| .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null") | ||
| .baselineValues("abc", "abc", "abc abc", "123abc$!abc123.", "", null) | ||
| .build() | ||
| .run(); | ||
| } | ||
|
|
||
| @Test | ||
| public void testUpper() throws Exception { | ||
| testBuilder() | ||
| .sqlQuery("select\n" + | ||
| "upper('ABC')as col_upper,\n" + | ||
| "upper('abc') as col_lower,\n" + | ||
| "upper('AbC aBc') as col_space,\n" + | ||
| "upper('123ABC$!abc123.') as col_special,\n" + | ||
| "upper('') as col_empty,\n" + | ||
| "upper(cast(null as varchar(10))) as col_null\n" + | ||
| "from (values(1))") | ||
| .unOrdered() | ||
| .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null") | ||
| .baselineValues("ABC", "ABC", "ABC ABC", "123ABC$!ABC123.", "", null) | ||
| .build() | ||
| .run(); | ||
| } | ||
|
|
||
| @Test | ||
| public void testInitcap() throws Exception { | ||
| testBuilder() | ||
| .sqlQuery("select\n" + | ||
| "initcap('ABC')as col_upper,\n" + | ||
| "initcap('abc') as col_lower,\n" + | ||
| "initcap('AbC aBc') as col_space,\n" + | ||
| "initcap('123ABC$!abc123.') as col_special,\n" + | ||
| "initcap('') as col_empty,\n" + | ||
| "initcap(cast(null as varchar(10))) as col_null\n" + | ||
| "from (values(1))") | ||
| .unOrdered() | ||
| .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null") | ||
| .baselineValues("Abc", "Abc", "Abc Abc", "123abc$!Abc123.", "", null) | ||
| .build() | ||
| .run(); | ||
| } | ||
|
|
||
| @Ignore("DRILL-5477") | ||
| @Test | ||
| public void testMultiByteEncoding() throws Exception { | ||
| // mock calcite util method to return utf charset | ||
| // instead of setting saffron.default.charset at system level | ||
| new MockUp<Util>() | ||
| { | ||
| @Mock | ||
| Charset getDefaultCharset() { | ||
| return Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME); | ||
| } | ||
| }; | ||
|
|
||
| testBuilder() | ||
| .sqlQuery("select\n" + | ||
| "upper('привет')as col_upper,\n" + | ||
| "lower('ПРИВЕТ') as col_lower,\n" + | ||
| "initcap('приВЕТ') as col_initcap\n" + | ||
| "from (values(1))") | ||
| .unOrdered() | ||
| .baselineColumns("col_upper", "col_lower", "col_initcap") | ||
| .baselineValues("ПРИВЕТ", "привет", "Привет") | ||
| .build() | ||
| .run(); | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This code works only for ASCII, but not for UTF-8. UTF-8 is a multi-byte code that requires special encoding/decoding to convert to Unicode characters. Without that encoding, this method won't work for Cyrillic, Greek or any other character set with upper/lower distinctions.
Since this method never worked, it is probably OK to make it a bit less broken than before: at least now it works for ASCII. Please add unit tests below, then file a JIRA, for the fact that this function does not work with UTF-8 despite the fact that Drill claims it supports UTF-8.