apache · rdblue · Mar 16, 2019 · Mar 27, 2019 · Mar 29, 2019 · Apr 1, 2019
diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4
@@ -92,7 +92,7 @@ statement
     | SHOW DATABASES (LIKE? pattern=STRING)?                           #showDatabases
     | createTableHeader ('(' colTypeList ')')? tableProvider
         ((OPTIONS options=tablePropertyList) |
-        (PARTITIONED BY partitionColumnNames=identifierList) |
+        (PARTITIONED BY partitioning=transformList) |
         bucketSpec |
         locationSpec |
         (COMMENT comment=STRING) |
@@ -587,6 +587,21 @@ namedExpressionSeq
     : namedExpression (',' namedExpression)*
     ;
 
+transformList
+    : '(' transforms+=transform (',' transforms+=transform)* ')'
+    ;
+
+transform
+    : qualifiedName                                                           #identityTransform
+    | transformName=identifier
+      '(' argument+=transformArgument (',' argument+=transformArgument)* ')'  #applyTransform
+    ;
+
+transformArgument
+    : qualifiedName
+    | constant
+    ;
+
 expression
     : booleanExpression
     ;

diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expression.java
@@ -0,0 +1,31 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Base class of the public logical expression API.
+ */
+@Experimental
+public interface Expression {
+  /**
+   * Format the expression as a human readable SQL-like string.
+   */
+  String describe();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Expressions.java
@@ -0,0 +1,162 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.types.DataType;
+import scala.collection.JavaConverters;
+
+import java.util.Arrays;
+
+/**
+ * Helper methods to create logical transforms to pass into Spark.
+ */
+@Experimental
+public class Expressions {
+  private Expressions() {
+  }
+
+  /**
+   * Create a logical transform for applying a named transform.
+   * <p>
+   * This transform can represent applying any named transform.
+   *
+   * @param name the transform name
+   * @param args expression arguments to the transform
+   * @return a logical transform
+   */
+  public Transform apply(String name, Expression... args) {
+    return LogicalExpressions.apply(name,
+        JavaConverters.asScalaBuffer(Arrays.asList(args)).toSeq());
+  }
+
+  /**
+   * Create a named reference expression for a column.
+   *
+   * @param name a column name
+   * @return a named reference for the column
+   */
+  public NamedReference column(String name) {
+    return LogicalExpressions.reference(name);
+  }
+
+  /**
+   * Create a literal from a value.
+   * <p>
+   * The JVM type of the value held by a literal must be the type used by Spark's InternalRow API
+   * for the literal's {@link DataType SQL data type}.
+   *
+   * @param value a value
+   * @param <T> the JVM type of the value
+   * @return a literal expression for the value
+   */
+  public <T> Literal<T> literal(T value) {
+    return LogicalExpressions.literal(value);
+  }
+
+  /**
+   * Create a bucket transform for one or more columns.
+   * <p>
+   * This transform represents a logical mapping from a value to a bucket id in [0, numBuckets)
+   * based on a hash of the value.
+   * <p>
+   * The name reported by transforms created with this method is "bucket".
+   *
+   * @param numBuckets the number of output buckets
+   * @param columns input columns for the bucket transform
+   * @return a logical bucket transform with name "bucket"
+   */
+  public Transform bucket(int numBuckets, String... columns) {
+    return LogicalExpressions.bucket(numBuckets,
+        JavaConverters.asScalaBuffer(Arrays.asList(columns)).toSeq());
+  }
+
+  /**
+   * Create an identity transform for a column.
+   * <p>
+   * This transform represents a logical mapping from a value to itself.
+   * <p>
+   * The name reported by transforms created with this method is "identity".
+   *
+   * @param column an input column
+   * @return a logical identity transform with name "identity"
+   */
+  public Transform identity(String column) {
+    return LogicalExpressions.identity(column);
+  }
+
+  /**
+   * Create a yearly transform for a timestamp or date column.
+   * <p>
+   * This transform represents a logical mapping from a timestamp or date to a year, such as 2018.
+   * <p>
+   * The name reported by transforms created with this method is "years".
+   *
+   * @param column an input timestamp or date column
+   * @return a logical yearly transform with name "years"
+   */
+  public Transform years(String column) {
+    return LogicalExpressions.years(column);
+  }
+
+  /**
+   * Create a monthly transform for a timestamp or date column.
+   * <p>
+   * This transform represents a logical mapping from a timestamp or date to a month, such as
+   * 2018-05.
+   * <p>
+   * The name reported by transforms created with this method is "months".
+   *
+   * @param column an input timestamp or date column
+   * @return a logical monthly transform with name "months"
+   */
+  public Transform months(String column) {
+    return LogicalExpressions.months(column);
+  }
+
+  /**
+   * Create a daily transform for a timestamp or date column.
+   * <p>
+   * This transform represents a logical mapping from a timestamp or date to a date, such as
+   * 2018-05-13.
+   * <p>
+   * The name reported by transforms created with this method is "days".
+   *
+   * @param column an input timestamp or date column
+   * @return a logical daily transform with name "days"
+   */
+  public Transform days(String column) {
+    return LogicalExpressions.days(column);
+  }
+
+  /**
+   * Create an hourly transform for a timestamp column.
+   * <p>
+   * This transform represents a logical mapping from a timestamp to a date and hour, such as
+   * 2018-05-13, hour 19.
+   * <p>
+   * The name reported by transforms created with this method is "hours".
+   *
+   * @param column an input timestamp column
+   * @return a logical hourly transform with name "hours"
+   */
+  public Transform hours(String column) {
+    return LogicalExpressions.hours(column);
+  }
+
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Literal.java
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions;
+
+import org.apache.spark.annotation.Experimental;
+import org.apache.spark.sql.types.DataType;
+
+/**
+ * Represents a constant literal value in the public expression API.
+ * <p>
+ * The JVM type of the value held by a literal must be the type used by Spark's InternalRow API for
+ * the literal's {@link DataType SQL data type}.
+ *
+ * @param <T> the JVM type of a value held by the literal
+ */
+@Experimental
+public interface Literal<T> extends Expression {
+  /**
+   * Returns the literal value.
+   */
+  T value();
+
+  /**
+   * Returns the SQL data type of the literal.
+   */
+  DataType dataType();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/NamedReference.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Represents a field or column reference in the public logical expression API.
+ */
+@Experimental
+public interface NamedReference extends Expression {
+  /**
+   * Returns the referenced field name as an array of String parts.
+   * <p>
+   * Each string in the returned array represents a field name.
+   */
+  String[] fieldNames();
+}
diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalog/v2/expressions/Transform.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalog.v2.expressions;
+
+import org.apache.spark.annotation.Experimental;
+
+/**
+ * Represents a transform function in the public logical expression API.
+ * <p>
+ * For example, the transform date(ts) is used to derive a date value from a timestamp column. The
+ * transform name is "date" and its argument is a reference to the "ts" column.
+ */
+@Experimental
+public interface Transform extends Expression {
+  /**
+   * Returns the transform function name.
+   */
+  String name();
+
+  /**
+   * Returns all field references in the transform arguments.
+   */
+  NamedReference[] references();
+
+  /**
+   * Returns the arguments passed to the transform function.
+   */
+  Expression[] arguments();
+}