apache · arina-ielchiieva · Apr 27, 2017 · paul-rogers · May 2, 2017 · paul-rogers
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
@@ -1,4 +1,4 @@
-/*******************************************************************************
+/*
 
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
@@ -15,7 +15,7 @@
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
- ******************************************************************************/
+*/
 package org.apache.drill.exec.expr.fn.impl;
 
 import io.netty.buffer.DrillBuf;
@@ -144,41 +144,28 @@ public static int varTypesToInt(final int start, final int end, DrillBuf buffer)
     return result;
   }
 
-  // Assumes Alpha as [A-Za-z0-9]
-  // white space is treated as everything else.
+  /**
+   * Capitalizes first letter in each word.
+   * Any symbol except digits and letters is considered as word delimiter.
+   *
+   * @param start start position in input buffer
+   * @param end end position in input buffer
+   * @param inBuf buffer with input characters
+   * @param outBuf buffer with output characters
+   */
   public static void initCap(int start, int end, DrillBuf inBuf, DrillBuf outBuf) {
-    boolean capNext = true;
+    boolean capitalizeNext = true;
     int out = 0;
     for (int id = start; id < end; id++, out++) {
-      byte currentByte = inBuf.getByte(id);
-
-      // 'A - Z' : 0x41 - 0x5A
-      // 'a - z' : 0x61 - 0x7A
-      // '0-9' : 0x30 - 0x39
-      if (capNext) { // curCh is whitespace or first character of word.
-        if (currentByte >= 0x30 && currentByte <= 0x39) { // 0-9
-          capNext = false;
-        } else if (currentByte >= 0x41 && currentByte <= 0x5A) { // A-Z
-          capNext = false;
-        } else if (currentByte >= 0x61 && currentByte <= 0x7A) { // a-z
-          capNext = false;
-          currentByte -= 0x20; // Uppercase this character
-        }
-        // else {} whitespace
-      } else { // Inside of a word or white space after end of word.
-        if (currentByte >= 0x30 && currentByte <= 0x39) { // 0-9
-          // noop
-        } else if (currentByte >= 0x41 && currentByte <= 0x5A) { // A-Z
-          currentByte -= 0x20; // Lowercase this character
-        } else if (currentByte >= 0x61 && currentByte <= 0x7A) { // a-z
-          // noop
-        } else { // whitespace
-          capNext = true;
-        }
+      int currentByte = inBuf.getByte(id);
+      if (Character.isLetterOrDigit(currentByte)) {
+        currentByte = capitalizeNext ? Character.toUpperCase(currentByte) : Character.toLowerCase(currentByte);
+        capitalizeNext = false;
+      } else {
+        capitalizeNext = true;
       }
-
       outBuf.setByte(out, currentByte);
-    } // end of for_loop
+    }
   }
 
   /**

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -501,13 +501,7 @@ public void eval() {
 
       for (int id = input.start; id < input.end; id++) {
         byte  currentByte = input.buffer.getByte(id);
-
-        // 'A - Z' : 0x41 - 0x5A
-        // 'a - z' : 0x61 - 0x7A
-        if (currentByte >= 0x41 && currentByte <= 0x5A) {
-          currentByte += 0x20;
-        }
-        out.buffer.setByte(id - input.start, currentByte) ;
+        out.buffer.setByte(id - input.start, Character.toLowerCase(currentByte)) ;
       }
     }
   }
@@ -534,13 +528,7 @@ public void eval() {
 
       for (int id = input.start; id < input.end; id++) {
         byte currentByte = input.buffer.getByte(id);
-
-        // 'A - Z' : 0x41 - 0x5A
-        // 'a - z' : 0x61 - 0x7A
-        if (currentByte >= 0x61 && currentByte <= 0x7A) {
-          currentByte -= 0x20;
-        }
-        out.buffer.setByte(id - input.start, currentByte) ;
+        out.buffer.setByte(id - input.start, Character.toUpperCase(currentByte)) ;
       }
     }
   }

diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
@@ -19,17 +19,26 @@
 
 import static org.junit.Assert.assertTrue;
 
+import mockit.Mock;
+import mockit.MockUp;
+import mockit.integration.junit4.JMockit;
+import org.apache.calcite.util.ConversionUtil;
+import org.apache.calcite.util.Util;
 import org.apache.commons.io.FileUtils;
 import org.apache.drill.BaseTestQuery;
 import org.apache.drill.exec.util.Text;
+import org.junit.Ignore;
 import org.junit.Test;
 
 import com.google.common.collect.ImmutableList;
+import org.junit.runner.RunWith;
 
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
+import java.nio.charset.Charset;
 
+@RunWith(JMockit.class)
 public class TestStringFunctions extends BaseTestQuery {
 
   @Test
@@ -38,14 +47,14 @@ public void testStrPosMultiByte() throws Exception {
         .sqlQuery("select `position`('a', 'abc') res1 from (values(1))")
         .ordered()
         .baselineColumns("res1")
-        .baselineValues(1l)
+        .baselineValues(1L)
         .go();
 
     testBuilder()
         .sqlQuery("select `position`('\\u11E9', '\\u11E9\\u0031') res1 from (values(1))")
         .ordered()
         .baselineColumns("res1")
-        .baselineValues(1l)
+        .baselineValues(1L)
         .go();
   }
 
@@ -308,4 +317,84 @@ public void testReverseLongVarChars() throws Exception {
       FileUtils.deleteQuietly(path);
     }
   }
+
+  @Test
+  public void testLower() throws Exception {
+    testBuilder()
+        .sqlQuery("select\n" +
+            "lower('ABC') col_upper,\n" +
+            "lower('abc') col_lower,\n" +
+            "lower('AbC aBc') col_space,\n" +
+            "lower('123ABC$!abc123.') as col_special,\n" +
+            "lower('') as col_empty,\n" +
+            "lower(cast(null as varchar(10))) as col_null\n" +
+            "from (values(1))")
+        .unOrdered()
+        .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
+        .baselineValues("abc", "abc", "abc abc", "123abc$!abc123.", "", null)
+        .build()
+        .run();
+  }
+
+  @Test
+  public void testUpper() throws Exception {
+    testBuilder()
+        .sqlQuery("select\n" +
+            "upper('ABC')as col_upper,\n" +
+            "upper('abc') as col_lower,\n" +
+            "upper('AbC aBc') as col_space,\n" +
+            "upper('123ABC$!abc123.') as col_special,\n" +
+            "upper('') as col_empty,\n" +
+            "upper(cast(null as varchar(10))) as col_null\n" +
+            "from (values(1))")
+        .unOrdered()
+        .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
+        .baselineValues("ABC", "ABC", "ABC ABC", "123ABC$!ABC123.", "", null)
+        .build()
+        .run();
+  }
+
+  @Test
+  public void testInitcap() throws Exception {
+    testBuilder()
+        .sqlQuery("select\n" +
+            "initcap('ABC')as col_upper,\n" +
+            "initcap('abc') as col_lower,\n" +
+            "initcap('AbC aBc') as col_space,\n" +
+            "initcap('123ABC$!abc123.') as col_special,\n" +
+            "initcap('') as col_empty,\n" +
+            "initcap(cast(null as varchar(10))) as col_null\n" +
+            "from (values(1))")
+        .unOrdered()
+        .baselineColumns("col_upper", "col_lower", "col_space", "col_special", "col_empty", "col_null")
+        .baselineValues("Abc", "Abc", "Abc Abc", "123abc$!Abc123.", "", null)
+        .build()
+        .run();
+  }
+
+  @Ignore("DRILL-5477")
+  @Test
+  public void testMultiByteEncoding() throws Exception {
+    // mock calcite util method to return utf charset
+    // instead of setting saffron.default.charset at system level
+    new MockUp<Util>()
+    {
+      @Mock
+      Charset getDefaultCharset() {
+        return Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME);
+      }
+    };
+
+    testBuilder()
+        .sqlQuery("select\n" +
+            "upper('привет')as col_upper,\n" +
+            "lower('ПРИВЕТ') as col_lower,\n" +
+            "initcap('приВЕТ') as col_initcap\n" +
+            "from (values(1))")
+        .unOrdered()
+        .baselineColumns("col_upper", "col_lower", "col_initcap")
+        .baselineValues("ПРИВЕТ", "привет", "Привет")
+        .build()
+        .run();
+  }
 }