diff --git a/dev/java-code-format-template.xml b/dev/java-code-format-template.xml
index 24c03b55f15..e64b4770212 100644
--- a/dev/java-code-format-template.xml
+++ b/dev/java-code-format-template.xml
@@ -32,8 +32,26 @@
+
+
+
+
+
+
+
@@ -80,14 +98,23 @@
+
-
+
+
+
+
+
+
+
+
+
diff --git a/integration/spark/src/main/scala/org/apache/spark/rdd/DummyLoadRDD.scala b/integration/spark/src/main/scala/org/apache/spark/rdd/DummyLoadRDD.scala
index df4686b7b8d..3015e3e7417 100644
--- a/integration/spark/src/main/scala/org/apache/spark/rdd/DummyLoadRDD.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/rdd/DummyLoadRDD.scala
@@ -26,6 +26,7 @@ import org.carbondata.core.load.BlockDetails
/**
* this RDD use to combine blocks in node level
* return (host,Array[BlockDetails])
+ *
* @param prev
*/
class DummyLoadRDD(prev: NewHadoopRDD[LongWritable, Text])
@@ -34,13 +35,14 @@ class DummyLoadRDD(prev: NewHadoopRDD[LongWritable, Text])
override def getPartitions: Array[Partition] = firstParent[(LongWritable, Text)].partitions
override def compute(theSplit: Partition,
- context: TaskContext): Iterator[(String, BlockDetails)] = {
+ context: TaskContext): Iterator[(String, BlockDetails)] = {
new Iterator[(String, BlockDetails)] {
val split = theSplit.asInstanceOf[NewHadoopPartition]
var finished = false
// added to make sure spark distributes tasks not to single node
// giving sufficient time for spark to schedule
Thread.sleep(5000)
+
override def hasNext: Boolean = {
if (!finished) {
finished = true
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonAggregate.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonAggregate.scala
index f3c92a0444c..93cf67581b5 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonAggregate.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonAggregate.scala
@@ -43,13 +43,13 @@ import org.apache.spark.sql.execution.{SparkPlan, UnaryNode}
*/
@DeveloperApi
case class CarbonAggregate(
- partial: Boolean,
- groupingExpressions: Seq[Expression],
- aggregateExpressions: Seq[NamedExpression],
- child: SparkPlan)(@transient sqlContext: SQLContext)
+ partial: Boolean,
+ groupingExpressions: Seq[Expression],
+ aggregateExpressions: Seq[NamedExpression],
+ child: SparkPlan)(@transient sqlContext: SQLContext)
extends UnaryNode {
- override def requiredChildDistribution: Seq[Distribution] =
+ override def requiredChildDistribution: Seq[Distribution] = {
if (partial) {
UnspecifiedDistribution :: Nil
} else {
@@ -59,6 +59,7 @@ case class CarbonAggregate(
ClusteredDistribution(groupingExpressions) :: Nil
}
}
+ }
override def otherCopyArgs: Seq[AnyRef] = sqlContext :: Nil
@@ -77,8 +78,8 @@ case class CarbonAggregate(
* output.
*/
case class ComputedAggregate(unbound: AggregateExpression1,
- aggregate: AggregateExpression1,
- resultAttribute: AttributeReference)
+ aggregate: AggregateExpression1,
+ resultAttribute: AttributeReference)
/** A list of aggregates that need to be computed for each group. */
private[this] val computedAggregates = aggregateExpressions.flatMap { agg =>
@@ -128,75 +129,78 @@ case class CarbonAggregate(
}
}
- override def doExecute(): RDD[InternalRow] = attachTree(this, "execute") {
- if (groupingExpressions.isEmpty) {
- child.execute().mapPartitions { iter =>
- val buffer = newAggregateBuffer()
- var currentRow: InternalRow = null
- while (iter.hasNext) {
- currentRow = iter.next()
- var i = 0
- while (i < buffer.length) {
- buffer(i).update(currentRow)
- i += 1
- }
- }
- val resultProjection = new InterpretedProjection(resultExpressions, computedSchema)
- val aggregateResults = new GenericMutableRow(computedAggregates.length)
-
- var i = 0
- while (i < buffer.length) {
- aggregateResults(i) = buffer(i).eval(EmptyRow)
- i += 1
- }
-
- Iterator(resultProjection(aggregateResults))
- }
- } else {
- child.execute().mapPartitions { iter =>
- val hashTable = new HashMap[InternalRow, Array[AggregateFunction1]]
- val groupingProjection = new InterpretedMutableProjection(groupingExpressions, childOutput)
-
- var currentRow: InternalRow = null
- while (iter.hasNext) {
- currentRow = iter.next()
- val currentGroup = groupingProjection(currentRow)
- var currentBuffer = hashTable.get(currentGroup)
- if (currentBuffer == null) {
- currentBuffer = newAggregateBuffer()
- hashTable.put(currentGroup.copy(), currentBuffer)
+ override def doExecute(): RDD[InternalRow] = {
+ attachTree(this, "execute") {
+ if (groupingExpressions.isEmpty) {
+ child.execute().mapPartitions { iter =>
+ val buffer = newAggregateBuffer()
+ var currentRow: InternalRow = null
+ while (iter.hasNext) {
+ currentRow = iter.next()
+ var i = 0
+ while (i < buffer.length) {
+ buffer(i).update(currentRow)
+ i += 1
+ }
}
+ val resultProjection = new InterpretedProjection(resultExpressions, computedSchema)
+ val aggregateResults = new GenericMutableRow(computedAggregates.length)
var i = 0
- while (i < currentBuffer.length) {
- currentBuffer(i).update(currentRow)
+ while (i < buffer.length) {
+ aggregateResults(i) = buffer(i).eval(EmptyRow)
i += 1
}
- }
-
- new Iterator[InternalRow] {
- private[this] val hashTableIter = hashTable.entrySet().iterator()
- private[this] val aggregateResults = new GenericMutableRow(computedAggregates.length)
- private[this] val resultProjection =
- new InterpretedMutableProjection(resultExpressions,
- computedSchema ++ namedGroups.map(_._2))
- private[this] val joinedRow = new JoinedRow
-
- override final def hasNext: Boolean = hashTableIter.hasNext
- override final def next(): InternalRow = {
- val currentEntry = hashTableIter.next()
- val currentGroup = currentEntry.getKey
- val currentBuffer = currentEntry.getValue
+ Iterator(resultProjection(aggregateResults))
+ }
+ } else {
+ child.execute().mapPartitions { iter =>
+ val hashTable = new HashMap[InternalRow, Array[AggregateFunction1]]
+ val groupingProjection = new InterpretedMutableProjection(groupingExpressions,
+ childOutput)
+
+ var currentRow: InternalRow = null
+ while (iter.hasNext) {
+ currentRow = iter.next()
+ val currentGroup = groupingProjection(currentRow)
+ var currentBuffer = hashTable.get(currentGroup)
+ if (currentBuffer == null) {
+ currentBuffer = newAggregateBuffer()
+ hashTable.put(currentGroup.copy(), currentBuffer)
+ }
var i = 0
while (i < currentBuffer.length) {
- // Evaluating an aggregate buffer returns the result. No row is required since we
- // already added all rows in the group using update.
- aggregateResults(i) = currentBuffer(i).eval(EmptyRow)
+ currentBuffer(i).update(currentRow)
i += 1
}
- resultProjection(joinedRow(aggregateResults, currentGroup))
+ }
+
+ new Iterator[InternalRow] {
+ private[this] val hashTableIter = hashTable.entrySet().iterator()
+ private[this] val aggregateResults = new GenericMutableRow(computedAggregates.length)
+ private[this] val resultProjection =
+ new InterpretedMutableProjection(resultExpressions,
+ computedSchema ++ namedGroups.map(_._2))
+ private[this] val joinedRow = new JoinedRow
+
+ override final def hasNext: Boolean = hashTableIter.hasNext
+
+ override final def next(): InternalRow = {
+ val currentEntry = hashTableIter.next()
+ val currentGroup = currentEntry.getKey
+ val currentBuffer = currentEntry.getValue
+
+ var i = 0
+ while (i < currentBuffer.length) {
+ // Evaluating an aggregate buffer returns the result. No row is required since we
+ // already added all rows in the group using update.
+ aggregateResults(i) = currentBuffer(i).eval(EmptyRow)
+ i += 1
+ }
+ resultProjection(joinedRow(aggregateResults, currentGroup))
+ }
}
}
}
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonBoundReference.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonBoundReference.scala
index 17cc593eee8..2dd569bb59f 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonBoundReference.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonBoundReference.scala
@@ -30,11 +30,11 @@ case class CarbonBoundReference(colExp: ColumnExpression, dataType: DataType, nu
type EvaluatedType = Any
- override def toString: String = s"input[" + colExp.getColIndex() + "]"
+ override def toString: String = s"input[" + colExp.getColIndex + "]"
- override def eval(input: InternalRow): Any = input.get(colExp.getColIndex(), dataType)
+ override def eval(input: InternalRow): Any = input.get(colExp.getColIndex, dataType)
- override def name: String = colExp.getColumnName()
+ override def name: String = colExp.getColumnName
override def toAttribute: Attribute = throw new UnsupportedOperationException
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala
index 585cccb6111..91df42d8839 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonCatalystOperators.scala
@@ -26,13 +26,13 @@ import org.apache.spark.sql.execution.command.tableModel
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.types.{BooleanType, StringType, TimestampType}
-import org.carbondata.spark.agg.{AverageCarbon, CountCarbon, CountDistinctCarbon, MaxCarbon, MeasureAggregatorUDT, MinCarbon, PositionLiteral, SumCarbon, SumDistinctCarbon}
+import org.carbondata.spark.agg._
/**
* Top command
*/
case class Top(count: Int, topOrBottom: Int, dim: NamedExpression, msr: NamedExpression,
- child: LogicalPlan) extends UnaryNode {
+ child: LogicalPlan) extends UnaryNode {
def output: Seq[Attribute] = child.output
override def references: AttributeSet = {
@@ -43,8 +43,9 @@ case class Top(count: Int, topOrBottom: Int, dim: NamedExpression, msr: NamedExp
object getDB {
- def getDatabaseName(dbName: Option[String], sqlContext: SQLContext): String =
+ def getDatabaseName(dbName: Option[String], sqlContext: SQLContext): String = {
dbName.getOrElse(sqlContext.asInstanceOf[HiveContext].catalog.client.currentDatabase)
+ }
}
@@ -54,8 +55,9 @@ object getDB {
case class ShowSchemaCommand(cmd: Option[String]) extends LogicalPlan with Command {
override def children: Seq[LogicalPlan] = Seq.empty
- override def output: Seq[Attribute] =
+ override def output: Seq[Attribute] = {
Seq(AttributeReference("result", StringType, nullable = false)())
+ }
}
/**
@@ -64,8 +66,9 @@ case class ShowSchemaCommand(cmd: Option[String]) extends LogicalPlan with Comma
case class ShowCreateCubeCommand(cm: tableModel) extends LogicalPlan with Command {
override def children: Seq[LogicalPlan] = Seq.empty
- override def output: Seq[Attribute] =
+ override def output: Seq[Attribute] = {
Seq(AttributeReference("createCubeCmd", StringType, nullable = false)())
+ }
}
/**
@@ -75,8 +78,9 @@ case class ShowAggregateTablesCommand(schemaNameOp: Option[String])
extends LogicalPlan with Command {
override def children: Seq[LogicalPlan] = Seq.empty
- override def output: Seq[Attribute] =
+ override def output: Seq[Attribute] = {
Seq(AttributeReference("tableName", StringType, nullable = false)())
+ }
}
/**
@@ -85,9 +89,10 @@ case class ShowAggregateTablesCommand(schemaNameOp: Option[String])
case class ShowCubeCommand(schemaNameOp: Option[String]) extends LogicalPlan with Command {
override def children: Seq[LogicalPlan] = Seq.empty
- override def output: Seq[Attribute] =
+ override def output: Seq[Attribute] = {
Seq(AttributeReference("cubeName", StringType, nullable = false)(),
AttributeReference("isRegisteredWithSpark", BooleanType, nullable = false)())
+ }
}
@@ -97,22 +102,24 @@ case class ShowCubeCommand(schemaNameOp: Option[String]) extends LogicalPlan wit
case class ShowAllCubeCommand() extends LogicalPlan with Command {
override def children: Seq[LogicalPlan] = Seq.empty
- override def output: Seq[Attribute] =
+ override def output: Seq[Attribute] = {
Seq(AttributeReference("schemaName", StringType, nullable = false)(),
AttributeReference("cubeName", StringType, nullable = false)(),
AttributeReference("isRegisteredWithSpark", BooleanType, nullable = false)())
+ }
}
case class SuggestAggregateCommand(
- script: Option[String],
- sugType: Option[String],
- schemaName: Option[String],
- cubeName: String) extends LogicalPlan with Command {
+ script: Option[String],
+ sugType: Option[String],
+ schemaName: Option[String],
+ cubeName: String) extends LogicalPlan with Command {
override def children: Seq[LogicalPlan] = Seq.empty
- override def output: Seq[Attribute] =
+ override def output: Seq[Attribute] = {
Seq(AttributeReference("SuggestionType", StringType, nullable = false)(),
AttributeReference("Suggestion", StringType, nullable = false)())
+ }
}
/**
@@ -122,12 +129,13 @@ case class ShowTablesDetailedCommand(schemaNameOp: Option[String])
extends LogicalPlan with Command {
override def children: Seq[LogicalPlan] = Seq.empty
- override def output: Seq[Attribute] =
+ override def output: Seq[Attribute] = {
Seq(AttributeReference("TABLE_CAT", StringType, nullable = true)(),
AttributeReference("TABLE_SCHEM", StringType, nullable = false)(),
AttributeReference("TABLE_NAME", StringType, nullable = false)(),
AttributeReference("TABLE_TYPE", StringType, nullable = false)(),
AttributeReference("REMARKS", StringType, nullable = false)())
+ }
}
/**
@@ -138,11 +146,12 @@ case class ShowLoadsCommand(schemaNameOp: Option[String], cube: String, limit: O
override def children: Seq[LogicalPlan] = Seq.empty
- override def output: Seq[Attribute] =
+ override def output: Seq[Attribute] = {
Seq(AttributeReference("LoadSequenceId", StringType, nullable = false)(),
AttributeReference("Status", StringType, nullable = false)(),
AttributeReference("Load Start Time", TimestampType, nullable = false)(),
AttributeReference("Load End Time", TimestampType, nullable = false)())
+ }
}
/**
@@ -152,10 +161,11 @@ case class DescribeFormattedCommand(sql: String, tblIdentifier: Seq[String])
extends LogicalPlan with Command {
override def children: Seq[LogicalPlan] = Seq.empty
- override def output: Seq[AttributeReference] =
+ override def output: Seq[AttributeReference] = {
Seq(AttributeReference("col_name", StringType, nullable = false)(),
AttributeReference("data_type", StringType, nullable = false)(),
AttributeReference("comment", StringType, nullable = false)())
+ }
}
/**
@@ -170,7 +180,7 @@ object PhysicalOperation1 extends PredicateHelper {
Option[Seq[SortOrder]], Option[Expression], LogicalPlan)
def apply(plan: LogicalPlan): Option[ReturnType] = {
- val (fields, filters, child, aliases, groupby, sortOrder, limit) =
+ val (fields, filters, child, _, groupby, sortOrder, limit) =
collectProjectsAndFilters(plan)
Some((fields.getOrElse(child.output), filters, groupby, sortOrder, limit, child))
@@ -192,7 +202,7 @@ object PhysicalOperation1 extends PredicateHelper {
def collectProjectsAndFilters(plan: LogicalPlan):
(Option[Seq[NamedExpression]], Seq[Expression], LogicalPlan,
Map[Attribute, Expression], Option[Seq[Expression]],
- Option[Seq[SortOrder]], Option[Expression]) =
+ Option[Seq[SortOrder]], Option[Expression]) = {
plan match {
case Project(fields, child) =>
val (_, filters, other, aliases, groupby, sortOrder, limit) = collectProjectsAndFilters(
@@ -231,6 +241,7 @@ object PhysicalOperation1 extends PredicateHelper {
case other =>
(None, Nil, other, Map.empty, None, None, None)
}
+ }
def findAggreagateExpression(expr: Expression): Seq[AggregateExpression1] = {
val exprList = expr match {
@@ -250,7 +261,7 @@ object PhysicalOperation1 extends PredicateHelper {
def collectProjectsAndFilters1(plan: LogicalPlan):
(Option[Seq[NamedExpression]], Seq[Expression], LogicalPlan, Map[Attribute, Expression],
- Option[Seq[Expression]], Option[Seq[SortOrder]], Option[Expression]) =
+ Option[Seq[Expression]], Option[Seq[SortOrder]], Option[Expression]) = {
plan match {
case Project(fields, child) =>
val (_, filters, other, aliases, groupby, sortOrder, limit) = collectProjectsAndFilters(
@@ -271,7 +282,7 @@ object PhysicalOperation1 extends PredicateHelper {
child)
val aggExps = aggregateExpressions.map {
case Alias(ref, name) => ref
- case other => other
+ case others => others
}.filter {
case d: AggregateExpression1 => true
case _ => false
@@ -289,17 +300,22 @@ object PhysicalOperation1 extends PredicateHelper {
case other =>
(None, Nil, other, Map.empty, None, None, None)
}
+ }
- private def collectAliases(fields: Seq[Expression]) = fields.collect {
- case a@Alias(child, _) => a.toAttribute.asInstanceOf[Attribute] -> child
- }.toMap
+ private def collectAliases(fields: Seq[Expression]) = {
+ fields.collect {
+ case a@Alias(child, _) => a.toAttribute -> child
+ }.toMap
+ }
- private def substitute(aliases: Map[Attribute, Expression])(expr: Expression) = expr.transform {
- case a@Alias(ref: AttributeReference, name) =>
- aliases.get(ref).map(Alias(_, name)(a.exprId, a.qualifiers)).getOrElse(a)
+ private def substitute(aliases: Map[Attribute, Expression])(expr: Expression) = {
+ expr.transform {
+ case a@Alias(ref: AttributeReference, name) =>
+ aliases.get(ref).map(Alias(_, name)(a.exprId, a.qualifiers)).getOrElse(a)
- case a: AttributeReference =>
- aliases.get(a).map(Alias(_, a.name)(a.exprId, a.qualifiers)).getOrElse(a)
+ case a: AttributeReference =>
+ aliases.get(a).map(Alias(_, a.name)(a.exprId, a.qualifiers)).getOrElse(a)
+ }
}
}
@@ -323,7 +339,7 @@ object PartialAggregation {
(Seq[Attribute], Seq[NamedExpression], Seq[Expression], Seq[NamedExpression], LogicalPlan)
private def convertAggregatesForPushdown(convertUnknown: Boolean,
- rewrittenAggregateExpressions: Seq[Expression]) = {
+ rewrittenAggregateExpressions: Seq[Expression]) = {
var counter: Int = 0
var updatedExpressions = MutableList[Expression]()
rewrittenAggregateExpressions.foreach(v => {
@@ -331,7 +347,7 @@ object PartialAggregation {
updatedExpressions += updated
counter = counter + 1
})
- updatedExpressions.toSeq
+ updatedExpressions
}
def makePositionLiteral(expr: Expression, index: Int): PositionLiteral = {
@@ -340,7 +356,7 @@ object PartialAggregation {
posLiteral
}
- def convertAggregate(current: Expression, index: Int, convertUnknown: Boolean): Expression =
+ def convertAggregate(current: Expression, index: Int, convertUnknown: Boolean): Expression = {
if (convertUnknown) {
current.transform {
case a@SumCarbon(_, _) => a
@@ -372,8 +388,8 @@ object PartialAggregation {
makePositionLiteral(attr, index), cast.dataType)
case a@CountDistinct(attr: AttributeReference) => CountDistinctCarbon(
makePositionLiteral(attr, index))
- case a@CountDistinct(childSeq) if (childSeq.size == 1) =>
- childSeq(0) match {
+ case a@CountDistinct(childSeq) if childSeq.size == 1 =>
+ childSeq.head match {
case attr: AttributeReference => CountDistinctCarbon(makePositionLiteral(attr, index))
case _ => a
}
@@ -387,75 +403,86 @@ object PartialAggregation {
}
}
}
+ }
def unapply(plan: LogicalPlan): Option[ReturnType] = unapply((plan, false))
- def unapply(combinedPlan: (LogicalPlan, Boolean)): Option[ReturnType] = combinedPlan._1 match {
- case Aggregate(groupingExpressions, aggregateExpressionsOrig, child) =>
-
- // if detailed query dont convert aggregate expressions to Carbon Aggregate expressions
- val aggregateExpressions =
- if (combinedPlan._2) aggregateExpressionsOrig
- else convertAggregatesForPushdown(false, aggregateExpressionsOrig)
- // Collect all aggregate expressions.
- val allAggregates =
- aggregateExpressions.flatMap(_ collect { case a: AggregateExpression1 => a })
- // Collect all aggregate expressions that can be computed partially.
- val partialAggregates =
- aggregateExpressions.flatMap(_ collect { case p: PartialAggregate1 => p })
-
- // Only do partial aggregation if supported by all aggregate expressions.
- if (allAggregates.size == partialAggregates.size) {
- // Create a map of expressions to their partial evaluations for all aggregate expressions.
- val partialEvaluations: Map[TreeNodeRef, SplitEvaluation] =
- partialAggregates.map(a => (new TreeNodeRef(a), a.asPartial)).toMap
-
- // We need to pass all grouping expressions though so the grouping can happen a second
- // time. However some of them might be unnamed so we alias them allowing them to be
- // referenced in the second aggregation.
- val namedGroupingExpressions: Map[Expression, NamedExpression] = groupingExpressions.map {
- case n: NamedExpression => (n, n)
- case other => (other, Alias(other, "PartialGroup")())
- }.toMap
-
- // Replace aggregations with a new expression that computes the result from the already
- // computed partial evaluations and grouping values.
- val rewrittenAggregateExpressions = aggregateExpressions.map(_.transformUp {
- case e: Expression if partialEvaluations.contains(new TreeNodeRef(e)) =>
- partialEvaluations(new TreeNodeRef(e)).finalEvaluation
-
- case e: Expression =>
- // Should trim aliases around `GetField`s. These aliases are introduced while
- // resolving struct field accesses, because `GetField` is not a `NamedExpression`.
- // (Should we just turn `GetField` into a `NamedExpression`?)
- namedGroupingExpressions.collectFirst {
- case (expr, ne) if expr semanticEquals e => ne.toAttribute
- }.getOrElse(e)
- }).asInstanceOf[Seq[NamedExpression]]
-
- val partialComputation =
- (namedGroupingExpressions.values ++
- partialEvaluations.values.flatMap(_.partialEvaluations)).toSeq
-
- // Convert the other aggregations for push down to Carbon layer. Here don't touch earlier
- // converted native carbon aggregators.
- val convertedPartialComputation =
- if (combinedPlan._2) partialComputation
- else convertAggregatesForPushdown(true, partialComputation)
- .asInstanceOf[Seq[NamedExpression]]
-
- val namedGroupingAttributes = namedGroupingExpressions.values.map(_.toAttribute).toSeq
-
- Some(
- (namedGroupingAttributes,
- rewrittenAggregateExpressions,
- groupingExpressions,
- convertedPartialComputation,
- child))
- } else {
- None
- }
- case _ => None
+ def unapply(combinedPlan: (LogicalPlan, Boolean)): Option[ReturnType] = {
+ combinedPlan._1 match {
+ case Aggregate(groupingExpressions, aggregateExpressionsOrig, child) =>
+
+ // if detailed query dont convert aggregate expressions to Carbon Aggregate expressions
+ val aggregateExpressions =
+ if (combinedPlan._2) {
+ aggregateExpressionsOrig
+ }
+ else {
+ convertAggregatesForPushdown(false, aggregateExpressionsOrig)
+ }
+ // Collect all aggregate expressions.
+ val allAggregates =
+ aggregateExpressions.flatMap(_ collect { case a: AggregateExpression1 => a })
+ // Collect all aggregate expressions that can be computed partially.
+ val partialAggregates =
+ aggregateExpressions.flatMap(_ collect { case p: PartialAggregate1 => p })
+
+ // Only do partial aggregation if supported by all aggregate expressions.
+ if (allAggregates.size == partialAggregates.size) {
+ // Create a map of expressions to their partial evaluations for all aggregate expressions.
+ val partialEvaluations: Map[TreeNodeRef, SplitEvaluation] =
+ partialAggregates.map(a => (new TreeNodeRef(a), a.asPartial)).toMap
+
+ // We need to pass all grouping expressions though so the grouping can happen a second
+ // time. However some of them might be unnamed so we alias them allowing them to be
+ // referenced in the second aggregation.
+ val namedGroupingExpressions: Map[Expression, NamedExpression] = groupingExpressions.map {
+ case n: NamedExpression => (n, n)
+ case other => (other, Alias(other, "PartialGroup")())
+ }.toMap
+
+ // Replace aggregations with a new expression that computes the result from the already
+ // computed partial evaluations and grouping values.
+ val rewrittenAggregateExpressions = aggregateExpressions.map(_.transformUp {
+ case e: Expression if partialEvaluations.contains(new TreeNodeRef(e)) =>
+ partialEvaluations(new TreeNodeRef(e)).finalEvaluation
+
+ case e: Expression =>
+ // Should trim aliases around `GetField`s. These aliases are introduced while
+ // resolving struct field accesses, because `GetField` is not a `NamedExpression`.
+ // (Should we just turn `GetField` into a `NamedExpression`?)
+ namedGroupingExpressions.collectFirst {
+ case (expr, ne) if expr semanticEquals e => ne.toAttribute
+ }.getOrElse(e)
+ }).asInstanceOf[Seq[NamedExpression]]
+
+ val partialComputation =
+ (namedGroupingExpressions.values ++
+ partialEvaluations.values.flatMap(_.partialEvaluations)).toSeq
+
+ // Convert the other aggregations for push down to Carbon layer. Here don't touch earlier
+ // converted native carbon aggregators.
+ val convertedPartialComputation =
+ if (combinedPlan._2) {
+ partialComputation
+ }
+ else {
+ convertAggregatesForPushdown(true, partialComputation)
+ .asInstanceOf[Seq[NamedExpression]]
+ }
+
+ val namedGroupingAttributes = namedGroupingExpressions.values.map(_.toAttribute).toSeq
+
+ Some(
+ (namedGroupingAttributes,
+ rewrittenAggregateExpressions,
+ groupingExpressions,
+ convertedPartialComputation,
+ child))
+ } else {
+ None
+ }
+ case _ => None
+ }
}
}
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonContext.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonContext.scala
index 143811b919b..5779ad42030 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonContext.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonContext.scala
@@ -48,6 +48,8 @@ class CarbonContext(val sc: SparkContext, val storePath: String) extends HiveCon
experimental.extraStrategies = CarbonStrategy.getStrategy(self) :: Nil
+ val LOGGER = LogServiceFactory.getLogService(CarbonContext.getClass.getName)
+
override def sql(sql: String): SchemaRDD = {
// queryId will be unique for each query, creting query detail holder
val queryId: String = System.nanoTime() + ""
@@ -55,9 +57,7 @@ class CarbonContext(val sc: SparkContext, val storePath: String) extends HiveCon
CarbonContext.updateCarbonPorpertiesPath(this)
val sqlString = sql.toUpperCase
- val LOGGER = LogServiceFactory.getLogService(CarbonContext.getClass().getName())
- LOGGER
- .info(s"Query [$sqlString]")
+ LOGGER.info(s"Query [$sqlString]")
val logicPlan: LogicalPlan = parseSql(sql)
val result = new CarbonDataFrameRDD(sql: String, this, logicPlan)
@@ -83,22 +83,22 @@ object CarbonContext {
* @param escapeChar - This parameter by default will be null, there wont be any validation if
* default escape character(\) is found on the RawCSV file
* @param multiLine - This parameter will be check for end of quote character if escape character
- * & quote character is set.
+ * & quote character is set.
* if set as false, it will check for end of quote character within the line
* and skips only 1 line if end of quote not found
* if set as true, By default it will check for 10000 characters in multiple
* lines for end of quote & skip all lines if end of quote not found.
*/
final def partitionData(
- schemaName: String = null,
- cubeName: String,
- factPath: String,
- targetPath: String,
- delimiter: String = ",",
- quoteChar: String = "\"",
- fileHeader: String = null,
- escapeChar: String = null,
- multiLine: Boolean = false)(hiveContext: HiveContext): String = {
+ schemaName: String = null,
+ cubeName: String,
+ factPath: String,
+ targetPath: String,
+ delimiter: String = ",",
+ quoteChar: String = "\"",
+ fileHeader: String = null,
+ escapeChar: String = null,
+ multiLine: Boolean = false)(hiveContext: HiveContext): String = {
updateCarbonPorpertiesPath(hiveContext)
var schemaNameLocal = schemaName
if (schemaNameLocal == null) {
@@ -118,7 +118,7 @@ object CarbonContext {
carbonPropertiesFilePath + "/" + "carbon.properties")
}
// configuring the zookeeper URl .
- var zooKeeperUrl = hiveContext.getConf("spark.deploy.zookeeper.url", "127.0.0.1:2181")
+ val zooKeeperUrl = hiveContext.getConf("spark.deploy.zookeeper.url", "127.0.0.1:2181")
CarbonProperties.getInstance().addProperty("spark.deploy.zookeeper.url", zooKeeperUrl)
@@ -133,8 +133,10 @@ object CarbonContext {
}
def addInstance(sc: SparkContext, cc: CarbonContext): Unit = {
- if (cache.contains(sc)) sys.error("creating multiple instances of CarbonContext is not " +
- "allowed using the same SparkContext instance")
+ if (cache.contains(sc)) {
+ sys.error("creating multiple instances of CarbonContext is not " +
+ "allowed using the same SparkContext instance")
+ }
cache(sc) = cc
}
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceRelation.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceRelation.scala
index 2537470162f..2e6b8a60482 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceRelation.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonDatasourceRelation.scala
@@ -37,7 +37,7 @@ import org.carbondata.spark.{CarbonOption, _}
* Carbon relation provider compliant to data source api.
* Creates carbon relations
*/
-class CarbonSource extends RelationProvider with CreatableRelationProvider{
+class CarbonSource extends RelationProvider with CreatableRelationProvider {
/**
* Returns a new base relation with the given parameters.
@@ -45,18 +45,18 @@ class CarbonSource extends RelationProvider with CreatableRelationProvider{
* by the Map that is passed to the function.
*/
override def createRelation(
- sqlContext: SQLContext,
- parameters: Map[String, String]): BaseRelation = {
+ sqlContext: SQLContext,
+ parameters: Map[String, String]): BaseRelation = {
val options = new CarbonOption(parameters)
val tableIdentifier = options.tableIdentifier.split("""\.""").toSeq
CarbonDatasourceRelation(tableIdentifier, None)(sqlContext)
}
override def createRelation(
- sqlContext: SQLContext,
- mode: SaveMode,
- parameters: Map[String, String],
- data: SchemaRDD): BaseRelation = {
+ sqlContext: SQLContext,
+ mode: SaveMode,
+ parameters: Map[String, String],
+ data: SchemaRDD): BaseRelation = {
// To avoid derby problem, dataframe need to be writen and read using CarbonContext
require(sqlContext.isInstanceOf[CarbonContext], "Error in saving dataframe to carbon file, " +
@@ -71,13 +71,13 @@ class CarbonSource extends RelationProvider with CreatableRelationProvider{
val storePath = CarbonContext.getInstance(sqlContext.sparkContext).storePath
val tablePath = new Path(storePath + "/" + options.dbName + "/" + options.tableName)
val isExists = tablePath.getFileSystem(sqlContext.sparkContext.hadoopConfiguration)
- .exists(tablePath)
+ .exists(tablePath)
val (doSave, doAppend) = (mode, isExists) match {
case (SaveMode.ErrorIfExists, true) =>
sys.error(s"ErrorIfExists mode, path $storePath already exists.")
case (SaveMode.Overwrite, true) =>
val cc = CarbonContext.getInstance(sqlContext.sparkContext)
- cc.sql(s"DROP CUBE IF EXISTS ${options.dbName}.${options.tableName}")
+ cc.sql(s"DROP CUBE IF EXISTS ${ options.dbName }.${ options.tableName }")
(true, false)
case (SaveMode.Overwrite, false) | (SaveMode.ErrorIfExists, false) =>
(true, false)
@@ -103,14 +103,15 @@ class CarbonSource extends RelationProvider with CreatableRelationProvider{
* This relation is stored to hive metastore
*/
private[sql] case class CarbonDatasourceRelation(
- tableIdentifier: Seq[String],
- alias: Option[String])
- (@transient context: SQLContext)
+ tableIdentifier: Seq[String],
+ alias: Option[String])
+ (@transient context: SQLContext)
extends BaseRelation with Serializable with Logging {
- def carbonRelation: CarbonRelation =
+ def carbonRelation: CarbonRelation = {
CarbonEnv.getInstance(context).carbonCatalog.lookupRelation2(tableIdentifier, None)(sqlContext)
.asInstanceOf[CarbonRelation]
+ }
def schema: StructType = carbonRelation.schema
@@ -122,29 +123,29 @@ private[sql] case class CarbonDatasourceRelation(
* Represents logical plan for one carbon cube
*/
case class CarbonRelation(schemaName: String,
- cubeName: String,
- metaData: CarbonMetaData,
- cubeMeta: TableMeta,
- alias: Option[String])(@transient sqlContext: SQLContext)
+ cubeName: String,
+ metaData: CarbonMetaData,
+ cubeMeta: TableMeta,
+ alias: Option[String])(@transient sqlContext: SQLContext)
extends LeafNode with MultiInstanceRelation {
def tableName: String = cubeName
def recursiveMethod(dimName: String): String = {
metaData.carbonTable.getChildren(dimName).asScala.map(childDim => {
- childDim.getDataType().toString.toLowerCase match {
- case "array" => s"array<${getArrayChildren(childDim.getColName)}>"
- case "struct" => s"struct<${getStructChildren(childDim.getColName)}>"
- case dType => s"${childDim.getColName()}:${dType}"
+ childDim.getDataType.toString.toLowerCase match {
+ case "array" => s"array<${ getArrayChildren(childDim.getColName) }>"
+ case "struct" => s"struct<${ getStructChildren(childDim.getColName) }>"
+ case dType => s"${ childDim.getColName }:${ dType }"
}
}).mkString(",")
}
def getArrayChildren(dimName: String): String = {
metaData.carbonTable.getChildren(dimName).asScala.map(childDim => {
- childDim.getDataType().toString.toLowerCase match {
- case "array" => s"array<${getArrayChildren(childDim.getColName())}>"
- case "struct" => s"struct<${getStructChildren(childDim.getColName())}>"
+ childDim.getDataType.toString.toLowerCase match {
+ case "array" => s"array<${ getArrayChildren(childDim.getColName) }>"
+ case "struct" => s"struct<${ getStructChildren(childDim.getColName) }>"
case dType => dType
}
}).mkString(",")
@@ -152,35 +153,36 @@ case class CarbonRelation(schemaName: String,
def getStructChildren(dimName: String): String = {
metaData.carbonTable.getChildren(dimName).asScala.map(childDim => {
- childDim.getDataType().toString.toLowerCase match {
+ childDim.getDataType.toString.toLowerCase match {
case "array" => s"${
- childDim.getColName().substring(dimName.length() + 1)
- }:array<${getArrayChildren(childDim.getColName())}>"
+ childDim.getColName.substring(dimName.length() + 1)
+ }:array<${ getArrayChildren(childDim.getColName) }>"
case "struct" => s"struct<${
metaData.carbonTable.getChildren(childDim.getColName)
- .asScala.map(f => s"${recursiveMethod(f.getColName)}")
+ .asScala.map(f => s"${ recursiveMethod(f.getColName) }")
}>"
- case dType => s"${childDim.getColName.substring(dimName.length() + 1)}:${dType}"
+ case dType => s"${ childDim.getColName.substring(dimName.length() + 1) }:${ dType }"
}
}).mkString(",")
}
- override def newInstance(): LogicalPlan =
+ override def newInstance(): LogicalPlan = {
CarbonRelation(schemaName, cubeName, metaData, cubeMeta, alias)(sqlContext)
.asInstanceOf[this.type]
+ }
val dimensionsAttr = {
val sett = new LinkedHashSet(
cubeMeta.carbonTable.getDimensionByTableName(cubeMeta.carbonTableIdentifier.getTableName)
- .asScala.toSeq.asJava)
+ .asScala.asJava)
sett.asScala.toSeq.map(dim => {
val output: DataType = metaData.carbonTable
- .getDimensionByName(metaData.carbonTable.getFactTableName, dim.getColName).getDataType()
+ .getDimensionByName(metaData.carbonTable.getFactTableName, dim.getColName).getDataType
.toString.toLowerCase match {
case "array" => CarbonMetastoreTypes
- .toDataType(s"array<${getArrayChildren(dim.getColName)}>")
+ .toDataType(s"array<${ getArrayChildren(dim.getColName) }>")
case "struct" => CarbonMetastoreTypes
- .toDataType(s"struct<${getStructChildren(dim.getColName)}>")
+ .toDataType(s"struct<${ getStructChildren(dim.getColName) }>")
case dType => CarbonMetastoreTypes.toDataType(dType)
}
@@ -196,26 +198,28 @@ case class CarbonRelation(schemaName: String,
new LinkedHashSet(
cubeMeta.carbonTable.
getMeasureByTableName(cubeMeta.carbonTable.getFactTableName).
- asScala.toSeq.asJava).asScala.toSeq.map(x => AttributeReference(
- x.getColName,
- CarbonMetastoreTypes.toDataType(
- metaData.carbonTable.getMeasureByName(factTable, x.getColName).getDataType.toString
- .toLowerCase match {
- case "int" => "double"
- case x => x
- }),
- nullable = true)(qualifiers = tableName +: alias.toSeq))
+ asScala.asJava).asScala.toSeq.map(x => AttributeReference(
+ x.getColName,
+ CarbonMetastoreTypes.toDataType(
+ metaData.carbonTable.getMeasureByName(factTable, x.getColName).getDataType.toString
+ .toLowerCase match {
+ case "int" => "double"
+ case others => others
+ }),
+ nullable = true)(qualifiers = tableName +: alias.toSeq))
}
- override val output = (dimensionsAttr ++ measureAttr).toSeq
+ override val output = dimensionsAttr ++ measureAttr
// TODO: Use data from the footers.
override lazy val statistics = Statistics(sizeInBytes = sqlContext.conf.defaultSizeInBytes)
- override def equals(other: Any): Boolean = other match {
- case p: CarbonRelation =>
- p.schemaName == schemaName && p.output == output && p.cubeName == cubeName
- case _ => false
+ override def equals(other: Any): Boolean = {
+ other match {
+ case p: CarbonRelation =>
+ p.schemaName == schemaName && p.output == output && p.cubeName == cubeName
+ case _ => false
+ }
}
}
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala
index 5120806fc26..11ae76f41b0 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonEnv.scala
@@ -22,7 +22,7 @@ import org.apache.spark.sql.hive.{CarbonMetastoreCatalog, HiveContext}
/**
* Carbon Environment for unified context
*/
-case class CarbonEnv(val carbonContext: HiveContext, val carbonCatalog: CarbonMetastoreCatalog)
+case class CarbonEnv(carbonContext: HiveContext, carbonCatalog: CarbonMetastoreCatalog)
object CarbonEnv {
val className = classOf[CarbonEnv].getCanonicalName
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonOperators.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonOperators.scala
index c131c04dcb2..88797024923 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonOperators.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonOperators.scala
@@ -54,15 +54,14 @@ import org.carbondata.spark.rdd.CarbonQueryRDD
import org.carbondata.spark.util.{CarbonQueryUtil, CarbonScalaUtil, QueryPlanUtil}
case class CarbonCubeScan(
-
- var attributes: Seq[Attribute],
- relation: CarbonRelation,
- dimensionPredicates: Seq[Expression],
- aggExprs: Option[Seq[Expression]],
- sortExprs: Option[Seq[SortOrder]],
- limitExpr: Option[Expression],
- isGroupByPresent: Boolean,
- detailQuery: Boolean = false)(@transient val oc: SQLContext)
+ var attributes: Seq[Attribute],
+ relation: CarbonRelation,
+ dimensionPredicates: Seq[Expression],
+ aggExprs: Option[Seq[Expression]],
+ sortExprs: Option[Seq[SortOrder]],
+ limitExpr: Option[Expression],
+ isGroupByPresent: Boolean,
+ detailQuery: Boolean = false)(@transient val oc: SQLContext)
extends LeafNode {
val cubeName = relation.cubeName
@@ -76,19 +75,19 @@ case class CarbonCubeScan(
@transient val carbonCatalog = sqlContext.catalog.asInstanceOf[CarbonMetastoreCatalog]
def processAggregateExpr(plan: CarbonQueryPlan, currentAggregate: AggregateExpression1,
- queryOrder: Int): Int = {
+ queryOrder: Int): Int = {
currentAggregate match {
case SumCarbon(posLiteral@PositionLiteral(attr: AttributeReference, _), _) =>
val msrs = selectedMsrs.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (msrs.length > 0) {
+ if (msrs.nonEmpty) {
val m1 = new QueryMeasure(attr.name)
m1.setAggregateFunction(CarbonCommonConstants.SUM)
m1.setQueryOrder(queryOrder)
plan.addMeasure(m1)
} else {
val dims = selectedDims.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (dims.length > 0) {
+ if (dims.nonEmpty) {
val d1 = new QueryDimension(attr.name)
d1.setQueryOrder(queryOrder)
plan.addAggDimAggInfo(d1.getColumnName, "sum", d1.getQueryOrder)
@@ -99,14 +98,14 @@ case class CarbonCubeScan(
case CountCarbon(posLiteral@PositionLiteral(attr: AttributeReference, _)) =>
val msrs = selectedMsrs.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (msrs.length > 0) {
+ if (msrs.nonEmpty) {
val m1 = new QueryMeasure(attr.name)
m1.setAggregateFunction(CarbonCommonConstants.COUNT)
m1.setQueryOrder(queryOrder)
plan.addMeasure(m1)
} else {
val dims = selectedDims.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (dims.length > 0) {
+ if (dims.nonEmpty) {
val d1 = new QueryDimension(attr.name)
d1.setQueryOrder(queryOrder)
plan.addAggDimAggInfo(d1.getColumnName, "count", d1.getQueryOrder)
@@ -136,14 +135,14 @@ case class CarbonCubeScan(
queryOrder + 1
case CountDistinctCarbon(posLiteral@PositionLiteral(attr: AttributeReference, _)) =>
val msrs = selectedMsrs.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (msrs.length > 0) {
+ if (msrs.nonEmpty) {
val m1 = new QueryMeasure(attr.name)
m1.setAggregateFunction(CarbonCommonConstants.DISTINCT_COUNT)
m1.setQueryOrder(queryOrder)
plan.addMeasure(m1)
} else {
val dims = selectedDims.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (dims.length > 0) {
+ if (dims.nonEmpty) {
val d1 = new QueryDimension(attr.name)
d1.setQueryOrder(queryOrder)
plan.addAggDimAggInfo(d1.getColumnName, "distinct-count", d1.getQueryOrder)
@@ -154,14 +153,14 @@ case class CarbonCubeScan(
case AverageCarbon(posLiteral@PositionLiteral(attr: AttributeReference, _), _) =>
val msrs = selectedMsrs.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (msrs.length > 0) {
+ if (msrs.nonEmpty) {
val m1 = new QueryMeasure(attr.name)
m1.setAggregateFunction(CarbonCommonConstants.AVERAGE)
m1.setQueryOrder(queryOrder)
plan.addMeasure(m1)
} else {
val dims = selectedDims.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (dims.length > 0) {
+ if (dims.nonEmpty) {
val d1 = new QueryDimension(attr.name)
d1.setQueryOrder(queryOrder)
plan.addAggDimAggInfo(d1.getColumnName, "avg", d1.getQueryOrder)
@@ -172,7 +171,7 @@ case class CarbonCubeScan(
case MinCarbon(posLiteral@PositionLiteral(attr: AttributeReference, _), _) =>
val msrs = selectedMsrs.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (msrs.length > 0) {
+ if (msrs.nonEmpty) {
val m1 = new QueryMeasure(attr.name)
m1.setAggregateFunction(CarbonCommonConstants.MIN)
m1.setQueryOrder(queryOrder)
@@ -190,14 +189,14 @@ case class CarbonCubeScan(
case MaxCarbon(posLiteral@PositionLiteral(attr: AttributeReference, _), _) =>
val msrs = selectedMsrs.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (msrs.length > 0) {
+ if (msrs.nonEmpty) {
val m1 = new QueryMeasure(attr.name)
m1.setAggregateFunction(CarbonCommonConstants.MAX)
m1.setQueryOrder(queryOrder)
plan.addMeasure(m1)
} else {
val dims = selectedDims.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (dims.length > 0) {
+ if (dims.nonEmpty) {
val d1 = new QueryDimension(attr.name)
d1.setQueryOrder(queryOrder)
plan.addAggDimAggInfo(d1.getColumnName, "max", d1.getQueryOrder)
@@ -208,7 +207,7 @@ case class CarbonCubeScan(
case SumDistinctCarbon(posLiteral@PositionLiteral(attr: AttributeReference, _), _) =>
val msrs = selectedMsrs.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (msrs.length > 0) {
+ if (msrs.nonEmpty) {
val m1 = new QueryMeasure(attr.name)
m1.setAggregateFunction(CarbonCommonConstants.SUM_DISTINCT)
m1.setQueryOrder(queryOrder)
@@ -239,7 +238,7 @@ case class CarbonCubeScan(
attributes.map(
attr => {
val carbonDimension = carbonTable.getDimensionByName(carbonTable.getFactTableName
- , attr.name)
+ , attr.name)
if (carbonDimension != null) {
// TODO if we can add ordina in carbonDimension, it will be good
allDims += attr.name
@@ -248,8 +247,8 @@ case class CarbonCubeScan(
queryOrder = queryOrder + 1
selectedDims += dim
} else {
- val carbonMeasure = carbonTable.getMeasureByName(carbonTable.getFactTableName()
- , attr.name)
+ val carbonMeasure = carbonTable.getMeasureByName(carbonTable.getFactTableName
+ , attr.name)
if (carbonMeasure != null) {
val m1 = new QueryMeasure(attr.name)
m1.setQueryOrder(queryOrder)
@@ -264,12 +263,12 @@ case class CarbonCubeScan(
// measure and dimensions
// Unknown aggregates & Expressions will use custom aggregator
aggExprs match {
- case Some(a: Seq[Expression]) if (!forceDetailedQuery) =>
+ case Some(a: Seq[Expression]) if !forceDetailedQuery =>
a.foreach {
case attr@AttributeReference(_, _, _, _) => // Add all the references to carbon query
val carbonDimension = selectedDims
.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (carbonDimension.size > 0) {
+ if (carbonDimension.nonEmpty) {
val dim = new QueryDimension(attr.name)
dim.setQueryOrder(queryOrder)
plan.addDimension(dim)
@@ -277,7 +276,7 @@ case class CarbonCubeScan(
} else {
val carbonMeasure = selectedMsrs
.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (carbonMeasure.size > 0) {
+ if (carbonMeasure.nonEmpty) {
// added by vishal as we are adding for dimension so need to add to measure list
// Carbon does not support group by on measure column so throwing exception to
// make it detail query
@@ -289,14 +288,14 @@ case class CarbonCubeScan(
// So, let's fall back to detailed query flow
throw new Exception(
"Some attributes referred looks derived columns. So, force to detailequery " +
- attr.name)
+ attr.name)
}
}
outputColumns += attr
- case par: Alias if par.children(0).isInstanceOf[AggregateExpression1] =>
+ case par: Alias if par.children.head.isInstanceOf[AggregateExpression1] =>
outputColumns += par.toAttribute
queryOrder = processAggregateExpr(plan,
- par.children(0).asInstanceOf[AggregateExpression1], queryOrder)
+ par.children.head.asInstanceOf[AggregateExpression1], queryOrder)
case _ => forceDetailedQuery = true
}
@@ -305,18 +304,18 @@ case class CarbonCubeScan(
if (forceDetailedQuery) {
// First clear the model if Msrs, Expressions and AggDimAggInfo filled
- plan.getDimensions().clear()
- plan.getMeasures().clear()
- plan.getDimAggregatorInfos().clear()
- plan.getExpressions().clear()
+ plan.getDimensions.clear()
+ plan.getMeasures.clear()
+ plan.getDimAggregatorInfos.clear()
+ plan.getExpressions.clear()
// Fill the selected dimensions & measures obtained from
// attributes to query plan for detailed query
- selectedDims.foreach(plan.addDimension(_))
- selectedMsrs.foreach(plan.addMeasure(_))
+ selectedDims.foreach(plan.addDimension)
+ selectedMsrs.foreach(plan.addMeasure)
}
else {
- attributes = outputColumns.toSeq
+ attributes = outputColumns
}
val orderList = new ArrayList[QueryDimension]()
@@ -325,28 +324,28 @@ case class CarbonCubeScan(
sortExprs match {
case Some(a: Seq[SortOrder]) =>
a.foreach {
- case SortOrder(SumCarbon(attr: AttributeReference, _), order) => plan.getMeasures()
- .asScala.filter(m => m.getColumnName().equalsIgnoreCase(attr.name))(0)
+ case SortOrder(SumCarbon(attr: AttributeReference, _), order) => plan.getMeasures
+ .asScala.filter(m => m.getColumnName.equalsIgnoreCase(attr.name)).head
.setSortOrder(getSortDirection(order))
- case SortOrder(CountCarbon(attr: AttributeReference), order) => plan.getMeasures()
- .asScala.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))(0)
+ case SortOrder(CountCarbon(attr: AttributeReference), order) => plan.getMeasures
+ .asScala.filter(m => m.getColumnName.equalsIgnoreCase(attr.name)).head
.setSortOrder(getSortDirection(order))
- case SortOrder(CountDistinctCarbon(attr: AttributeReference), order) => plan.getMeasures()
- .asScala.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))(0)
+ case SortOrder(CountDistinctCarbon(attr: AttributeReference), order) => plan.getMeasures
+ .asScala.filter(m => m.getColumnName.equalsIgnoreCase(attr.name)).head
.setSortOrder(getSortDirection(order))
- case SortOrder(AverageCarbon(attr: AttributeReference, _), order) => plan.getMeasures()
- .asScala.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))(0)
+ case SortOrder(AverageCarbon(attr: AttributeReference, _), order) => plan.getMeasures
+ .asScala.filter(m => m.getColumnName.equalsIgnoreCase(attr.name)).head
.setSortOrder(getSortDirection(order))
case SortOrder(attr: AttributeReference, order) =>
val dim = plan.getDimensions
.asScala.filter(m => m.getColumnName.equalsIgnoreCase(attr.name))
- if (!dim.isEmpty) {
- dim(0).setSortOrder(getSortDirection(order))
- orderList.add(dim(0))
+ if (dim.nonEmpty) {
+ dim.head.setSortOrder(getSortDirection(order))
+ orderList.add(dim.head)
} else {
allSortExprPushed = false
}
- case _ => allSortExprPushed = false
+ case _ => allSortExprPushed = false;
}
case _ =>
}
@@ -354,18 +353,20 @@ case class CarbonCubeScan(
plan.setSortedDimemsions(orderList)
// limit can be pushed down only if sort is not present or all sort expressions are pushed
- if (sortExprs.isEmpty && forceDetailedQuery) limitExpr match {
- case Some(IntegerLiteral(limit)) =>
- // if (plan.getMeasures.size() == 0 && plan.getDimAggregatorInfos.size() == 0) {
- plan.setLimit(limit)
+ if (sortExprs.isEmpty && forceDetailedQuery) {
+ limitExpr match {
+ case Some(IntegerLiteral(limit)) =>
+ // if (plan.getMeasures.size() == 0 && plan.getDimAggregatorInfos.size() == 0) {
+ plan.setLimit(limit)
// }
- case _ =>
+ case _ =>
+ }
}
plan.setDetailQuery(forceDetailedQuery)
plan.setOutLocationPath(
CarbonProperties.getInstance().getProperty(CarbonCommonConstants.STORE_LOCATION_HDFS))
plan.setQueryId(System.nanoTime() + "")
- if (!dimensionPredicates.isEmpty) {
+ if (dimensionPredicates.nonEmpty) {
val exps = preProcessExpressions(dimensionPredicates)
val expressionVal = transformExpression(exps.head)
// adding dimension used in expression in querystats
@@ -421,7 +422,7 @@ case class CarbonCubeScan(
CarbonScalaUtil.convertSparkToCarbonDataType(dataType))
case Literal(name, dataType) => new
CarbonLiteralExpression(name, CarbonScalaUtil.convertSparkToCarbonDataType(dataType))
- case Cast(left, right) if (!left.isInstanceOf[Literal]) => transformExpression(left)
+ case Cast(left, right) if !left.isInstanceOf[Literal] => transformExpression(left)
case _ =>
new SparkUnknownExpression(expr.transform {
case AttributeReference(name, dataType, _, _) =>
@@ -440,7 +441,7 @@ case class CarbonCubeScan(
def addPushdownFilters(keys: Seq[Expression], filters: Array[Array[Expression]],
- conditions: Option[Expression]) {
+ conditions: Option[Expression]) {
// TODO Values in the IN filter is duplicate. replace the list with set
val buffer = new ArrayBuffer[Expression]
@@ -463,10 +464,10 @@ case class CarbonCubeScan(
def inputRdd: CarbonQueryRDD[CarbonKey, CarbonValue] = {
val LOG = LogServiceFactory.getLogService(this.getClass.getCanonicalName)
// Update the FilterExpressions with extra conditions added through join pushdown
- if (!extraPreds.isEmpty) {
- val exps = preProcessExpressions(extraPreds.toSeq)
+ if (extraPreds.nonEmpty) {
+ val exps = preProcessExpressions(extraPreds)
val expressionVal = transformExpression(exps.head)
- val oldExpressionVal = buildCarbonPlan.getFilterExpression()
+ val oldExpressionVal = buildCarbonPlan.getFilterExpression
if (null == oldExpressionVal) {
buildCarbonPlan.setFilterExpression(expressionVal)
} else {
@@ -485,27 +486,29 @@ case class CarbonCubeScan(
buildCarbonPlan.setQueryId(oc.getConf("queryId", System.nanoTime() + ""))
LOG.info("Selected Table to Query ****** "
- + model.getAbsoluteTableIdentifier.getCarbonTableIdentifier.getTableName)
+ + model.getAbsoluteTableIdentifier.getCarbonTableIdentifier.getTableName)
val cubeCreationTime = carbonCatalog.getCubeCreationTime(relation.schemaName, cubeName)
val schemaLastUpdatedTime =
carbonCatalog.getSchemaLastUpdatedTime(relation.schemaName, cubeName)
val big = new CarbonQueryRDD(
- oc.sparkContext,
- model,
- buildCarbonPlan.getFilterExpression,
- kv,
- conf,
- cubeCreationTime,
- schemaLastUpdatedTime,
- carbonCatalog.storePath)
+ oc.sparkContext,
+ model,
+ buildCarbonPlan.getFilterExpression,
+ kv,
+ conf,
+ cubeCreationTime,
+ schemaLastUpdatedTime,
+ carbonCatalog.storePath)
big
}
def doExecute(): RDD[InternalRow] = {
- def toType(obj: Any): Any = obj match {
- case s: String => UTF8String.fromString(s)
- case _ => obj
+ def toType(obj: Any): Any = {
+ obj match {
+ case s: String => UTF8String.fromString(s)
+ case _ => obj
+ }
}
// count(*) query executed in driver by querying from Btree
if (buildCarbonPlan.isCountStarQuery && null == buildCarbonPlan.getFilterExpression) {
@@ -524,7 +527,7 @@ case class CarbonCubeScan(
} else {
// all the other queries are sent to executor
inputRdd.map { row =>
- val dims = row._1.getKey.map(toType).toArray
+ val dims = row._1.getKey.map(toType)
val values = dims
new GenericMutableRow(values.asInstanceOf[Array[Any]])
}
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
index 0f60739e43f..40493f957bf 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/CarbonSqlParser.scala
@@ -20,8 +20,8 @@ package org.apache.spark.sql
import java.util.regex.{Matcher, Pattern}
import scala.collection.JavaConverters._
+import scala.collection.mutable.LinkedHashSet
import scala.language.implicitConversions
-import scala.util.control.Breaks.{break, breakable}
import org.apache.hadoop.hive.ql.lib.Node
import org.apache.hadoop.hive.ql.parse._
@@ -33,7 +33,6 @@ import org.apache.spark.sql.catalyst.trees.CurrentOrigin
import org.apache.spark.sql.execution.command.{DimensionRelation, _}
import org.apache.spark.sql.execution.datasources.DescribeCommand
import org.apache.spark.sql.hive.HiveQlWrapper
-import scala.collection.mutable.LinkedHashSet
/**
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/SparkUnknownCarbonAggregator.scala b/integration/spark/src/main/scala/org/apache/spark/sql/SparkUnknownCarbonAggregator.scala
index 3a90d515772..d5d8bee6dbc 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/SparkUnknownCarbonAggregator.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/SparkUnknownCarbonAggregator.scala
@@ -29,10 +29,10 @@ import org.carbondata.query.aggregator.{CustomMeasureAggregator, MeasureAggregat
import org.carbondata.query.carbonfilterinterface.RowIntf
import org.carbondata.query.expression.ColumnExpression
- /**
- * Custom Aggregator serialized and used to pushdown all aggregate functions from spark layer with
- * expressions to Carbon layer
- */
+/**
+ * Custom Aggregator serialized and used to pushdown all aggregate functions from spark layer with
+ * expressions to Carbon layer
+ */
@SerialVersionUID(-3787749110799088697L)
class SparkUnknownCarbonAggregator(partialAggregate: AggregateExpression1)
extends CustomMeasureAggregator {
@@ -60,24 +60,24 @@ class SparkUnknownCarbonAggregator(partialAggregate: AggregateExpression1)
)
}
- override def getByteArray(): Array[Byte] = {
+ override def getByteArray: Array[Byte] = {
throw new UnsupportedOperationException("getByteArray is not implemented yet")
}
- override def getDoubleValue(): java.lang.Double = {
+ override def getDoubleValue: java.lang.Double = {
throw new UnsupportedOperationException("getValue() is not a valid method for result")
}
- override def getLongValue(): java.lang.Long = {
+ override def getLongValue: java.lang.Long = {
throw new UnsupportedOperationException("getLongValue() is not a valid method for result")
}
- override def getBigDecimalValue(): java.math.BigDecimal = {
+ override def getBigDecimalValue: java.math.BigDecimal = {
throw new
UnsupportedOperationException("getBigDecimalValue() is not a valid method for result")
}
- override def getValueObject(): Object = {
+ override def getValueObject: Object = {
result.iterator.foreach(v => getPartialFunction.update(v))
@@ -87,7 +87,7 @@ class SparkUnknownCarbonAggregator(partialAggregate: AggregateExpression1)
}
override def merge(aggregator: MeasureAggregator): Unit = {
- if (result.size > 0) {
+ if (result.nonEmpty) {
result.iterator.foreach(v => {
getPartialFunction.update(v)
}
@@ -97,15 +97,11 @@ class SparkUnknownCarbonAggregator(partialAggregate: AggregateExpression1)
result.clear
}
- if (aggregator.isInstanceOf[SparkUnknownCarbonAggregator]) {
- aggregator.asInstanceOf[SparkUnknownCarbonAggregator].result.iterator.foreach(v => {
- getPartialFunction.update(v)
- }
- )
-
- aggregator.asInstanceOf[SparkUnknownCarbonAggregator].result.clear
- } else {
- throw new Exception("Invalid merge expected type is" + this.getClass().getName())
+ aggregator match {
+ case s: SparkUnknownCarbonAggregator =>
+ s.result.iterator.foreach(v => getPartialFunction.update(v))
+ s.result.clear
+ case _ => throw new Exception("Invalid merge expected type is" + this.getClass.getName);
}
}
@@ -116,7 +112,7 @@ class SparkUnknownCarbonAggregator(partialAggregate: AggregateExpression1)
partialFunction
}
- override def isFirstTime(): Boolean = {
+ override def isFirstTime: Boolean = {
isRowsAggregated
}
@@ -141,18 +137,18 @@ class SparkUnknownCarbonAggregator(partialAggregate: AggregateExpression1)
}
override def compareTo(aggre: MeasureAggregator): Int = {
- return 0
+ 0
}
- override def getCopy(): MeasureAggregator = {
- return new SparkUnknownCarbonAggregator(partialAggregate)
+ override def getCopy: MeasureAggregator = {
+ new SparkUnknownCarbonAggregator(partialAggregate)
}
override def setNewValue(newVal: Object): Unit = {
}
- override def getColumns(): java.util.List[ColumnExpression] = {
+ override def getColumns: java.util.List[ColumnExpression] = {
if (allColumns == null) {
allColumns = partialAggregate.flatMap(_ collect { case a: CarbonBoundReference => a.colExp })
.asJava
@@ -162,18 +158,16 @@ class SparkUnknownCarbonAggregator(partialAggregate: AggregateExpression1)
override def agg(row: RowIntf): Unit = {
isRowsAggregated = true
- val values = row.getValues().toSeq.map { value =>
- value match {
- case s: String => org.apache.spark.unsafe.types.UTF8String.fromString(s)
- // solve: java.math.BigDecimal cannot be cast to org.apache.spark.sql.types.Decimal
- case d: java.math.BigDecimal =>
- val javaDecVal = new java.math.BigDecimal(d.toString())
- val scalaDecVal = new scala.math.BigDecimal(javaDecVal)
- val decConverter = new org.apache.spark.sql.types.Decimal()
-
- decConverter.set(scalaDecVal)
- case _ => value
- }
+ val values = row.getValues.toSeq.map {
+ case s: String => org.apache.spark.unsafe.types.UTF8String.fromString(s)
+ // solve: java.math.BigDecimal cannot be cast to org.apache.spark.sql.types.Decimal
+ case d: java.math.BigDecimal =>
+ val javaDecVal = new java.math.BigDecimal(d.toString)
+ val scalaDecVal = new scala.math.BigDecimal(javaDecVal)
+ val decConverter = new org.apache.spark.sql.types.Decimal()
+
+ decConverter.set(scalaDecVal)
+ case others => others
}
result += new GenericMutableRow(values.map(a => a.asInstanceOf[Any]).toArray)
}
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
index 4540a1e2aa6..85a348789d7 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/command/carbonTableSchema.scala
@@ -313,9 +313,8 @@ class TableNewProcessor(cm: tableModel, sqlContext: SQLContext) {
}
else {
- try { {
+ try {
Class.forName(part.partitionClass).newInstance()
- }
} catch {
case e: Exception =>
val cl = part.partitionClass
@@ -656,9 +655,8 @@ class TableProcessor(cm: tableModel, sqlContext: SQLContext) {
}
else {
- try { {
+ try {
Class.forName(part.partitionClass).newInstance()
- }
} catch {
case e: Exception =>
val cl = part.partitionClass
@@ -1047,9 +1045,8 @@ private[sql] case class ShowCreateTable(cm: tableModel, override val output: Seq
}
}
- try { {
+ try {
Class.forName(part.partitionClass).newInstance()
- }
} catch {
case e: Exception =>
val cl = part.partitionClass
@@ -1176,11 +1173,10 @@ private[sql] case class CreateCube(cm: tableModel) extends RunnableCommand {
val catalog = CarbonEnv.getInstance(sqlContext).carbonCatalog
// Need to fill partitioner class when we support partition
val cubePath = catalog.createCubeFromThrift(tableInfo, dbName, cubeName, null)(sqlContext)
- try { {
+ try {
sqlContext.sql(
s"""CREATE TABLE $dbName.$cubeName USING org.apache.spark.sql.CarbonSource""" +
s""" OPTIONS (cubename "$dbName.$cubeName", path "$cubePath") """).collect
- }
} catch {
case e: Exception =>
@@ -1302,7 +1298,7 @@ private[sql] case class LoadCube(
val carbonLock = CarbonLockFactory.getCarbonLockObj(org.carbondata.core.
carbon.metadata.CarbonMetadata.getInstance().getCarbonTable(schemaName + "_" + cubeName).
getMetaDataFilepath, LockUsage.METADATA_LOCK)
- try { {
+ try {
if (carbonLock.lockWithRetries()) {
logInfo("Successfully able to get the cube metadata file lock")
}
@@ -1382,44 +1378,42 @@ private[sql] case class LoadCube(
var partitionStatus = CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS
try {
// First system has to partition the data first and then call the load data
- {
- if (null == relation.cubeMeta.partitioner.partitionColumn ||
- relation.cubeMeta.partitioner.partitionColumn(0).isEmpty) {
- LOGGER.info("Initiating Direct Load for the Cube : (" +
- schemaName + "." + cubeName + ")")
- carbonLoadModel.setFactFilePath(factPath)
- carbonLoadModel.setCsvDelimiter(CarbonUtil.unescapeChar(delimiter))
- carbonLoadModel.setCsvHeader(fileHeader)
- carbonLoadModel.setDirectLoad(true)
- }
- else {
- val fileType = FileFactory.getFileType(partitionLocation)
- if (FileFactory.isFileExist(partitionLocation, fileType)) {
- val file = FileFactory.getCarbonFile(partitionLocation, fileType)
- CarbonUtil.deleteFoldersAndFiles(file)
- }
- partitionLocation += System.currentTimeMillis()
- FileFactory.mkdirs(partitionLocation, fileType)
- LOGGER.info("Initiating Data Partitioning for the Cube : (" +
- schemaName + "." + cubeName + ")")
- carbonLoadModel.setFactFilePath(partitionLocation)
- partitionStatus = CarbonContext.partitionData(
- schemaName,
- cubeName,
- factPath,
- partitionLocation,
- delimiter,
- quoteChar,
- fileHeader,
- escapeChar, booleanValForMultiLine)(sqlContext.asInstanceOf[HiveContext])
+ if (null == relation.cubeMeta.partitioner.partitionColumn ||
+ relation.cubeMeta.partitioner.partitionColumn(0).isEmpty) {
+ LOGGER.info("Initiating Direct Load for the Cube : (" +
+ schemaName + "." + cubeName + ")")
+ carbonLoadModel.setFactFilePath(factPath)
+ carbonLoadModel.setCsvDelimiter(CarbonUtil.unescapeChar(delimiter))
+ carbonLoadModel.setCsvHeader(fileHeader)
+ carbonLoadModel.setDirectLoad(true)
+ }
+ else {
+ val fileType = FileFactory.getFileType(partitionLocation)
+ if (FileFactory.isFileExist(partitionLocation, fileType)) {
+ val file = FileFactory.getCarbonFile(partitionLocation, fileType)
+ CarbonUtil.deleteFoldersAndFiles(file)
}
- GlobalDictionaryUtil
- .generateGlobalDictionary(sqlContext, carbonLoadModel, relation.cubeMeta.dataPath)
- CarbonDataRDDFactory
- .loadCarbonData(sqlContext, carbonLoadModel, storeLocation, relation.cubeMeta.dataPath,
- kettleHomePath,
- relation.cubeMeta.partitioner, columinar, isAgg = false, partitionStatus)
+ partitionLocation += System.currentTimeMillis()
+ FileFactory.mkdirs(partitionLocation, fileType)
+ LOGGER.info("Initiating Data Partitioning for the Cube : (" +
+ schemaName + "." + cubeName + ")")
+ carbonLoadModel.setFactFilePath(partitionLocation)
+ partitionStatus = CarbonContext.partitionData(
+ schemaName,
+ cubeName,
+ factPath,
+ partitionLocation,
+ delimiter,
+ quoteChar,
+ fileHeader,
+ escapeChar, booleanValForMultiLine)(sqlContext.asInstanceOf[HiveContext])
}
+ GlobalDictionaryUtil
+ .generateGlobalDictionary(sqlContext, carbonLoadModel, relation.cubeMeta.dataPath)
+ CarbonDataRDDFactory
+ .loadCarbonData(sqlContext, carbonLoadModel, storeLocation, relation.cubeMeta.dataPath,
+ kettleHomePath,
+ relation.cubeMeta.partitioner, columinar, isAgg = false, partitionStatus)
}
catch {
case ex: Exception =>
@@ -1429,14 +1423,13 @@ private[sql] case class LoadCube(
}
finally {
// Once the data load is successful delete the unwanted partition files
- try { {
+ try {
val fileType = FileFactory.getFileType(partitionLocation)
if (FileFactory.isFileExist(partitionLocation, fileType)) {
val file = FileFactory
.getCarbonFile(partitionLocation, fileType)
CarbonUtil.deleteFoldersAndFiles(file)
}
- }
} catch {
case ex: Exception =>
LOGGER.error(ex)
@@ -1445,7 +1438,6 @@ private[sql] case class LoadCube(
}
}
- }
} finally {
if (carbonLock != null) {
if (carbonLock.unlock()) {
@@ -1644,10 +1636,9 @@ private[sql] case class DropCubeCommand(ifExistsSet: Boolean, schemaNameOp: Opti
}
if (sqlContext.tableNames(schemaName).map(x => x.toLowerCase())
.contains(cubeName.toLowerCase())) {
- try { {
+ try {
sqlContext.asInstanceOf[HiveContext].catalog.client.
runSqlHive(s"DROP TABLE IF EXISTS $schemaName.$cubeName")
- }
} catch {
case e: RuntimeException =>
LOGGER.audit(
@@ -1661,7 +1652,7 @@ private[sql] case class DropCubeCommand(ifExistsSet: Boolean, schemaNameOp: Opti
CarbonProperties.getInstance().addProperty("zookeeper.enable.lock", "false")
val carbonLock = CarbonLockFactory
.getCarbonLockObj(tmpCube.getMetaDataFilepath, LockUsage.METADATA_LOCK)
- try { {
+ try {
if (carbonLock.lockWithRetries()) {
logInfo("Successfully able to get the cube metadata file lock")
} else {
@@ -1694,7 +1685,6 @@ private[sql] case class DropCubeCommand(ifExistsSet: Boolean, schemaNameOp: Opti
LOGGER.audit(s"Deleted cube [$cubeName] under schema [$schemaName]")
}
}
- }
finally {
if (carbonLock != null) {
if (carbonLock.unlock()) {
@@ -1768,11 +1758,10 @@ private[sql] case class ShowLoads(
loadMetadataDetailsSortedArray = loadMetadataDetailsSortedArray
.filter(load => load.getVisibility.equalsIgnoreCase("true"))
val limitLoads = limit.get
- try { {
+ try {
val lim = Integer.parseInt(limitLoads)
loadMetadataDetailsSortedArray = loadMetadataDetailsSortedArray.slice(0, lim)
}
- }
catch {
case ex: NumberFormatException => sys.error(s" Entered limit is not a valid Number")
}
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/execution/joins/CarbonJoins.scala b/integration/spark/src/main/scala/org/apache/spark/sql/execution/joins/CarbonJoins.scala
index f992a291970..b9f2ebcaa89 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/execution/joins/CarbonJoins.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/execution/joins/CarbonJoins.scala
@@ -29,12 +29,12 @@ import org.apache.spark.sql.execution.metric.SQLMetrics
import org.apache.spark.unsafe.types.UTF8String
case class FilterPushJoin(
- leftKeys: Seq[Expression],
- rightKeys: Seq[Expression],
- buildSide: BuildSide,
- left: SparkPlan,
- right: SparkPlan,
- condition: Option[Expression]) extends BinaryNode with HashJoin {
+ leftKeys: Seq[Expression],
+ rightKeys: Seq[Expression],
+ buildSide: BuildSide,
+ left: SparkPlan,
+ right: SparkPlan,
+ condition: Option[Expression]) extends BinaryNode with HashJoin {
override private[sql] lazy val metrics = Map(
"numLeftRows" -> SQLMetrics.createLongMetric(sparkContext, "number of left rows"),
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonMetastoreCatalog.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonMetastoreCatalog.scala
index f2838ca9180..d91a4c1c54a 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonMetastoreCatalog.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonMetastoreCatalog.scala
@@ -19,7 +19,7 @@ package org.apache.spark.sql.hive
import java.io.{ByteArrayInputStream, EOFException, File, ObjectInputStream}
import java.net.{InetAddress, InterfaceAddress, NetworkInterface}
-import java.util.{GregorianCalendar, HashMap}
+import java.util.GregorianCalendar
import scala.Array.canBuildFrom
import scala.collection.JavaConverters._
@@ -65,7 +65,7 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
@transient val LOGGER = LogServiceFactory
.getLogService("org.apache.spark.sql.CarbonMetastoreCatalog")
- val cubeModifiedTimeStore = new HashMap[String, Long]()
+ val cubeModifiedTimeStore = new java.util.HashMap[String, Long]()
cubeModifiedTimeStore.put("default", System.currentTimeMillis())
val metadata = loadMetadata(storePath)
@@ -96,9 +96,8 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
override def lookupRelation(tableIdentifier: Seq[String],
alias: Option[String] = None): LogicalPlan = {
- try { {
+ try {
super.lookupRelation(tableIdentifier, alias)
- }
} catch {
case s: java.lang.Exception =>
lookupRelation2(tableIdentifier, alias)(hive.asInstanceOf[SQLContext])
@@ -107,8 +106,8 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
def getCubeCreationTime(schemaName: String, cubeName: String): Long = {
val cubeMeta = metadata.cubesMeta.filter(
- c => (c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(schemaName) &&
- (c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))))
+ c => c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(schemaName) &&
+ c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))
val cubeCreationTime = cubeMeta.head.carbonTable.getTableLastUpdatedTime
cubeCreationTime
}
@@ -120,9 +119,9 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
tableIdentifier match {
case Seq(schemaName, cubeName) =>
val cubes = metadata.cubesMeta.filter(
- c => (c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(schemaName) &&
- (c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))))
- if (cubes.length > 0) {
+ c => c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(schemaName) &&
+ c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))
+ if (cubes.nonEmpty) {
CarbonRelation(schemaName, cubeName,
CarbonSparkUtil.createSparkMeta(cubes.head.carbonTable), cubes.head, alias)(sqlContext)
} else {
@@ -132,9 +131,9 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
case Seq(cubeName) =>
val currentDatabase = getDB.getDatabaseName(None, sqlContext)
val cubes = metadata.cubesMeta.filter(
- c => (c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(currentDatabase) &&
- (c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))))
- if (cubes.length > 0) {
+ c => c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(currentDatabase) &&
+ c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))
+ if (cubes.nonEmpty) {
CarbonRelation(currentDatabase, cubeName,
CarbonSparkUtil.createSparkMeta(cubes.head.carbonTable), cubes.head, alias)(sqlContext)
} else {
@@ -160,15 +159,15 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
tableIdentifier match {
case Seq(schemaName, cubeName) =>
val cubes = metadata.cubesMeta.filter(
- c => (c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(schemaName) &&
- (c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))))
- cubes.length > 0
+ c => c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(schemaName) &&
+ c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))
+ cubes.nonEmpty
case Seq(cubeName) =>
val currentDatabase = getDB.getDatabaseName(None, sqlContext)
val cubes = metadata.cubesMeta.filter(
- c => (c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(currentDatabase) &&
- (c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))))
- cubes.length > 0
+ c => c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(currentDatabase) &&
+ c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))
+ cubes.nonEmpty
case _ => false
}
}
@@ -185,15 +184,14 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
val schemaFolders = file.listFiles()
schemaFolders.foreach(schemaFolder => {
- if (schemaFolder.isDirectory()) {
+ if (schemaFolder.isDirectory) {
val cubeFolders = schemaFolder.listFiles()
cubeFolders.foreach(cubeFolder => {
val schemaPath = metadataPath + "/" + schemaFolder.getName + "/" + cubeFolder.getName
- try { {
+ try {
fillMetaData(schemaPath, fileType, metaDataBuffer)
updateSchemasUpdatedTime(schemaFolder.getName, cubeFolder.getName)
- }
} catch {
case ex: org.apache.hadoop.security.AccessControlException =>
// Ingnore Access control exception and get only accessible cube details
@@ -216,18 +214,18 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
private def fillMetaData(basePath: String, fileType: FileType,
metaDataBuffer: ArrayBuffer[TableMeta]): Unit = {
val schemasPath = basePath // + "/schemas"
- try { {
+ try {
if (FileFactory.isFileExist(schemasPath, fileType)) {
val file = FileFactory.getCarbonFile(schemasPath, fileType)
val schemaFolders = file.listFiles()
schemaFolders.foreach(schemaFolder => {
- if (schemaFolder.isDirectory()) {
+ if (schemaFolder.isDirectory) {
val dbName = schemaFolder.getName
val cubeFolders = schemaFolder.listFiles()
cubeFolders.foreach(cubeFolder => {
- if (cubeFolder.isDirectory()) {
+ if (cubeFolder.isDirectory) {
val carbonTablePath = CarbonStorePath.getCarbonTablePath(basePath,
new CarbonTableIdentifier(schemaFolder.getName, cubeFolder.getName))
val cubeMetadataFile = carbonTablePath.getSchemaFilePath
@@ -239,7 +237,7 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
val createTBase = new ThriftReader.TBaseCreator() {
override def create(): org.apache.thrift.TBase[TableInfo, TableInfo._Fields] = {
- return new TableInfo()
+ new TableInfo()
}
}
val thriftReader = new ThriftReader(cubeMetadataFile, createTBase)
@@ -265,7 +263,7 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
// information and reload when required.
Partitioner("org.carbondata.spark.partition.api.impl." +
"SampleDataPartitionerImpl",
- Array(""), 1, getNodeList()))
+ Array(""), 1, getNodeList))
}
}
})
@@ -278,7 +276,6 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
}
}
- }
catch {
case s: java.io.FileNotFoundException =>
// Create folders and files.
@@ -293,12 +290,6 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
* Prepare Thrift Schema from wrapper TableInfo and write to schema file.
* Load CarbonTable from wrapper tableinfo
*
- * @param tableInfo
- * @param dbName
- * @param tableName
- * @param partitioner
- * @param sqlContext
- * @return
*/
def createCubeFromThrift(tableInfo: org.carbondata.core.carbon.metadata.schema.table.TableInfo,
dbName: String, tableName: String, partitioner: Partitioner)
@@ -327,7 +318,7 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
storePath,
CarbonMetadata.getInstance().getCarbonTable(dbName + "_" + tableName),
Partitioner("org.carbondata.spark.partition.api.impl.SampleDataPartitionerImpl",
- Array(""), 1, getNodeList()))
+ Array(""), 1, getNodeList))
val fileType = FileFactory.getFileType(schemaMetadataPath)
if (!FileFactory.isFileExist(schemaMetadataPath, fileType)) {
@@ -354,7 +345,7 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
* There can be executor spawn in same drive node. So we can remove first occurance of
* localhost for retriving executor list
*/
- def getNodeList(): Array[String] = {
+ def getNodeList: Array[String] = {
val arr =
hive.sparkContext.getExecutorMemoryStatus.map {
@@ -366,14 +357,14 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
val nodelist: List[String] = withoutDriverIP(arr.toList)(selectedLocalIPList.contains(_))
val masterMode = hive.sparkContext.getConf.get("spark.master")
- if (nodelist.length > 0) {
+ if (nodelist.nonEmpty) {
// Specific for Yarn Mode
if ("yarn-cluster".equals(masterMode) || "yarn-client".equals(masterMode)) {
val nodeNames = nodelist.map { x =>
val addr = InetAddress.getByName(x)
- addr.getHostName()
+ addr.getHostName
}
- nodeNames.toSeq.toArray
+ nodeNames.toArray
}
else {
// For Standalone cluster, node IPs will be returned.
@@ -381,17 +372,17 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
}
}
else {
- Seq(InetAddress.getLocalHost().getHostName()).toArray
+ Seq(InetAddress.getLocalHost.getHostName).toArray
}
}
- private def getLocalhostIPs() = {
- val iface = NetworkInterface.getNetworkInterfaces()
+ private def getLocalhostIPs = {
+ val iface = NetworkInterface.getNetworkInterfaces
var addresses: List[InterfaceAddress] = List.empty
- while (iface.hasMoreElements()) {
- addresses = iface.nextElement().getInterfaceAddresses().asScala.toList ++ addresses
+ while (iface.hasMoreElements) {
+ addresses = iface.nextElement().getInterfaceAddresses.asScala.toList ++ addresses
}
- val inets = addresses.map(_.getAddress().getHostAddress())
+ val inets = addresses.map(_.getAddress.getHostAddress)
inets
}
@@ -403,7 +394,7 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
* The resulting List containt List(slave1,Master,slave2,slave3)
*/
def withoutDriverIP[A](xs: List[A])(p: A => Boolean): List[A] = {
- xs.toList match {
+ xs match {
case x :: rest => if (p(x)) {
rest
} else {
@@ -417,13 +408,13 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
def getDimensions(carbonTable: CarbonTable,
aggregateAttributes: List[AggregateTableAttributes]): Array[String] = {
var dimArray = Array[String]()
- aggregateAttributes.filter { agg => null == agg.aggType }.map { agg =>
+ aggregateAttributes.filter { agg => null == agg.aggType }.foreach { agg =>
val colName = agg.colName
- if (null != carbonTable.getMeasureByName(carbonTable.getFactTableName(), colName)) {
+ if (null != carbonTable.getMeasureByName(carbonTable.getFactTableName, colName)) {
sys
.error(s"Measure must be provided along with aggregate function :: $colName")
}
- if (null == carbonTable.getDimensionByName(carbonTable.getFactTableName(), colName)) {
+ if (null == carbonTable.getDimensionByName(carbonTable.getFactTableName, colName)) {
sys
.error(s"Invalid column name. Cannot create an aggregate table :: $colName")
}
@@ -432,11 +423,11 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
}
dimArray :+= colName
}
- return dimArray
+ dimArray
}
def getAggregateTableName(carbonTable: CarbonTable, factTableName: String): String = {
- return CarbonUtil.getNewAggregateTableName(carbonTable.getAggregateTablesName, factTableName)
+ CarbonUtil.getNewAggregateTableName(carbonTable.getAggregateTablesName, factTableName)
}
/**
@@ -483,7 +474,7 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
def dropCube(partitionCount: Int, tableStorePath: String, schemaName: String, cubeName: String)
(sqlContext: SQLContext) {
- if (!cubeExists(Seq(schemaName, cubeName))((sqlContext))) {
+ if (!cubeExists(Seq(schemaName, cubeName))(sqlContext)) {
LOGGER.audit(s"Drop cube failed. Cube with $schemaName.$cubeName does not exist")
sys.error(s"Cube with $schemaName.$cubeName does not exist")
}
@@ -510,9 +501,8 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
}
}
- try { {
+ try {
sqlContext.sql(s"DROP TABLE $schemaName.$cubeName").collect()
- }
} catch {
case e: Exception =>
LOGGER.audit(
@@ -520,8 +510,8 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
}
metadata.cubesMeta -= metadata.cubesMeta.filter(
- c => (c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(schemaName) &&
- (c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))))(0)
+ c => c.carbonTableIdentifier.getDatabaseName.equalsIgnoreCase(schemaName) &&
+ c.carbonTableIdentifier.getTableName.equalsIgnoreCase(cubeName))(0)
org.carbondata.core.carbon.metadata.CarbonMetadata.getInstance
.removeTable(schemaName + "_" + cubeName)
logInfo(s"Cube $cubeName of $schemaName schema dropped syccessfully.")
@@ -531,7 +521,7 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
private def getTimestampFileAndType(schemaName: String, cubeName: String) = {
- var timestampFile = if (useUniquePath) {
+ val timestampFile = if (useUniquePath) {
storePath + "/" + schemaName + "/" + cubeName + "/" +
CarbonCommonConstants.SCHEMAS_MODIFIED_TIME_FILE
}
@@ -555,11 +545,11 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
if (useUniquePath) {
cubeModifiedTimeStore.put(schemaName + '_' + cubeName,
- FileFactory.getCarbonFile(timestampFile, timestampFileType).getLastModifiedTime())
+ FileFactory.getCarbonFile(timestampFile, timestampFileType).getLastModifiedTime)
}
else {
cubeModifiedTimeStore.put("default",
- FileFactory.getCarbonFile(timestampFile, timestampFileType).getLastModifiedTime())
+ FileFactory.getCarbonFile(timestampFile, timestampFileType).getLastModifiedTime)
}
}
@@ -577,10 +567,10 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
c.carbonTableIdentifier.getDatabaseName, c.carbonTableIdentifier.getTableName)
if (FileFactory.isFileExist(timestampFile, timestampFileType)) {
- if (!(FileFactory.getCarbonFile(timestampFile, timestampFileType).getLastModifiedTime() ==
+ if (!(FileFactory.getCarbonFile(timestampFile, timestampFileType).getLastModifiedTime ==
cubeModifiedTimeStore.get(c.carbonTableIdentifier.getDatabaseName + "_" +
c.carbonTableIdentifier.getTableName))) {
- refreshCache
+ refreshCache()
}
}
})
@@ -588,8 +578,8 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
val (timestampFile, timestampFileType) = getTimestampFileAndType("", "")
if (FileFactory.isFileExist(timestampFile, timestampFileType)) {
if (!(FileFactory.getCarbonFile(timestampFile, timestampFileType).
- getLastModifiedTime() == cubeModifiedTimeStore.get("default"))) {
- refreshCache
+ getLastModifiedTime == cubeModifiedTimeStore.get("default"))) {
+ refreshCache()
}
}
}
@@ -604,7 +594,7 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
val (timestampFile, timestampFileType) = getTimestampFileAndType(schemaName, cubeName)
if (FileFactory.isFileExist(timestampFile, timestampFileType)) {
schemaLastUpdatedTime = FileFactory.getCarbonFile(timestampFile, timestampFileType)
- .getLastModifiedTime()
+ .getLastModifiedTime
}
schemaLastUpdatedTime
}
@@ -612,7 +602,7 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
def readCubeMetaDataFile(cubeFolder: CarbonFile,
fileType: FileFactory.FileType):
(String, String, String, String, Partitioner, Long) = {
- val cubeMetadataFile = cubeFolder.getAbsolutePath() + "/metadata"
+ val cubeMetadataFile = cubeFolder.getAbsolutePath + "/metadata"
var schema: String = ""
var schemaName: String = ""
@@ -620,15 +610,14 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
var dataPath: String = ""
var partitioner: Partitioner = null
val cal = new GregorianCalendar(2011, 1, 1)
- var cubeCreationTime = cal.getTime().getTime()
+ var cubeCreationTime = cal.getTime.getTime
if (FileFactory.isFileExist(cubeMetadataFile, fileType)) {
// load metadata
val in = FileFactory.getDataInputStream(cubeMetadataFile, fileType)
var len = 0
- try { {
+ try {
len = in.readInt()
- }
} catch {
case others: EOFException => len = 0
}
@@ -651,7 +640,6 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
val versionLength = in.readInt()
val versionBytes = new Array[Byte](versionLength)
in.readFully(versionBytes)
- val version = new String(versionBytes, "UTF8")
val schemaLen = in.readInt()
val schemaBytes = new Array[Byte](schemaLen)
@@ -664,22 +652,20 @@ class CarbonMetastoreCatalog(hive: HiveContext, val storePath: String, client: C
val inStream = new ByteArrayInputStream(partitionBytes)
val objStream = new ObjectInputStream(inStream)
partitioner = objStream.readObject().asInstanceOf[Partitioner]
- objStream.close
+ objStream.close()
- try { {
+ try {
cubeCreationTime = in.readLong()
len = in.readInt()
- }
} catch {
case others: EOFException => len = 0
}
}
- in.close
- ()
+ in.close()
}
- return (schemaName, cubeName, dataPath, schema, partitioner, cubeCreationTime)
+ (schemaName, cubeName, dataPath, schema, partitioner, cubeCreationTime)
}
}
@@ -735,9 +721,12 @@ object CarbonMetastoreTypes extends RegexParsers {
def toMetastoreType(dt: DataType): String = {
dt match {
- case ArrayType(elementType, _) => s"array<${toMetastoreType(elementType)}>"
+ case ArrayType(elementType, _) => s"array<${ toMetastoreType(elementType) }>"
case StructType(fields) =>
- s"struct<${fields.map(f => s"${f.name}:${toMetastoreType(f.dataType)}").mkString(",")}>"
+ s"struct<${
+ fields.map(f => s"${ f.name }:${ toMetastoreType(f.dataType) }")
+ .mkString(",")
+ }>"
case StringType => "string"
case FloatType => "float"
case IntegerType => "int"
diff --git a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
index 96977f390d8..e9555cb7152 100644
--- a/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/sql/hive/CarbonStrategies.scala
@@ -31,7 +31,6 @@ import org.apache.spark.sql.execution.command._
import org.apache.spark.sql.execution.datasources.{DescribeCommand => LogicalDescribeCommand, LogicalRelation}
import org.apache.spark.sql.execution.joins.{BuildLeft, BuildRight, FilterPushJoin}
import org.apache.spark.sql.hive.execution.{DescribeHiveTableCommand, DropTable, HiveNativeCommand}
-import org.apache.spark.sql.types.{IntegerType, LongType}
import org.carbondata.common.logging.LogServiceFactory
@@ -41,7 +40,7 @@ object CarbonHiveSyntax {
protected val sqlParser = new CarbonSqlParser
def parse(sqlText: String): LogicalPlan = {
- sqlParser.parse(sqlText)
+ sqlParser.parse(sqlText)
}
}
@@ -61,184 +60,205 @@ class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan] {
*/
private[sql] object CarbonCubeScans extends Strategy {
- def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
- case PhysicalOperation(projectList, predicates,
- l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _)) =>
- carbonScan(projectList, predicates, carbonRelation.carbonRelation, None, None, None, false,
- true) :: Nil
-
- case Limit(IntegerLiteral(limit),
- Sort(order, _, p@PartialAggregation(
- namedGroupingAttributes,
- rewrittenAggregateExpressions,
- groupingExpressions,
- partialComputation,
- PhysicalOperation(projectList, predicates,
- l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _))))) =>
- val aggPlan = handleAggregation(plan, p, projectList, predicates, carbonRelation,
- partialComputation, groupingExpressions, namedGroupingAttributes,
- rewrittenAggregateExpressions)
- org.apache.spark.sql.execution.TakeOrderedAndProject(limit,
- order,
- None,
- aggPlan(0)) :: Nil
-
- case Limit(IntegerLiteral(limit), p@PartialAggregation(
- namedGroupingAttributes,
- rewrittenAggregateExpressions,
- groupingExpressions,
- partialComputation,
- PhysicalOperation(projectList, predicates,
- l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _)))) =>
- val aggPlan = handleAggregation(plan, p, projectList, predicates, carbonRelation,
- partialComputation, groupingExpressions, namedGroupingAttributes,
- rewrittenAggregateExpressions)
- org.apache.spark.sql.execution.Limit(limit, aggPlan(0)) :: Nil
-
- case PartialAggregation(
- namedGroupingAttributes,
- rewrittenAggregateExpressions,
- groupingExpressions,
- partialComputation,
- PhysicalOperation(projectList, predicates,
- l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _))) =>
- handleAggregation(plan, plan, projectList, predicates, carbonRelation,
- partialComputation, groupingExpressions, namedGroupingAttributes,
- rewrittenAggregateExpressions)
-
- case Limit(IntegerLiteral(limit),
- PhysicalOperation(projectList, predicates,
- l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _))) =>
- val (_, _, _, aliases, groupExprs, substitutesortExprs, limitExpr) = extractPlan(plan)
- val s = carbonScan(projectList, predicates, carbonRelation.carbonRelation, groupExprs,
- substitutesortExprs, limitExpr, false, true)
- org.apache.spark.sql.execution.Limit(limit, s) :: Nil
-
-
- case Limit(IntegerLiteral(limit),
- Sort(order, _,
- PhysicalOperation(projectList, predicates,
- l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _)))) =>
- val (_, _, _, aliases, groupExprs, substitutesortExprs, limitExpr) = extractPlan(plan)
- val s = carbonScan(projectList, predicates, carbonRelation.carbonRelation, groupExprs,
- substitutesortExprs, limitExpr, false, true)
- org.apache.spark.sql.execution.TakeOrderedAndProject(limit,
- order,
- None,
- s) :: Nil
-
- case ExtractEquiJoinKeys(Inner, leftKeys, rightKeys, condition,
- PhysicalOperation(projectList, predicates,
- l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _)), right)
- if (canPushDownJoin(right, condition)) =>
- LOGGER.info(s"pushing down for ExtractEquiJoinKeys:right")
- val carbon = carbonScan(projectList, predicates, carbonRelation.carbonRelation, None, None,
- None, false, true)
- val pushedDownJoin = FilterPushJoin(
- leftKeys: Seq[Expression],
- rightKeys: Seq[Expression],
- BuildRight,
- carbon,
- planLater(right),
- condition)
-
- condition.map(Filter(_, pushedDownJoin)).getOrElse(pushedDownJoin) :: Nil
-
- case ExtractEquiJoinKeys(Inner, leftKeys, rightKeys, condition, left,
- PhysicalOperation(projectList, predicates,
- l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _)))
- if (canPushDownJoin(left, condition)) =>
- LOGGER.info(s"pushing down for ExtractEquiJoinKeys:left")
- val carbon = carbonScan(projectList, predicates, carbonRelation.carbonRelation, None, None,
- None, false, true)
-
- val pushedDownJoin = FilterPushJoin(
- leftKeys: Seq[Expression],
- rightKeys: Seq[Expression],
- BuildLeft,
- planLater(left),
- carbon,
- condition)
- condition.map(Filter(_, pushedDownJoin)).getOrElse(pushedDownJoin) :: Nil
-
- case ShowCubeCommand(schemaName) =>
- ExecutedCommand(ShowAllTablesInSchema(schemaName, plan.output)) :: Nil
- case c@ShowAllCubeCommand() =>
- ExecutedCommand(ShowAllTables(plan.output)) :: Nil
- case ShowCreateCubeCommand(cm) =>
- ExecutedCommand(ShowCreateTable(cm, plan.output)) :: Nil
- case ShowTablesDetailedCommand(schemaName) =>
- ExecutedCommand(ShowAllTablesDetail(schemaName, plan.output)) :: Nil
- case DropTable(tableName, ifNotExists)
- if (CarbonEnv.getInstance(sqlContext).carbonCatalog.cubeExists(Seq(tableName))
- (sqlContext)) =>
- ExecutedCommand(DropCubeCommand(ifNotExists, None, tableName)) :: Nil
- case ShowAggregateTablesCommand(schemaName) =>
- ExecutedCommand(ShowAggregateTables(schemaName, plan.output)) :: Nil
- case ShowLoadsCommand(schemaName, cube, limit) =>
- ExecutedCommand(ShowLoads(schemaName, cube, limit, plan.output)) :: Nil
- case LoadCube(schemaNameOp, cubeName, factPathFromUser, dimFilesPath,
+ def apply(plan: LogicalPlan): Seq[SparkPlan] = {
+ plan match {
+ case PhysicalOperation(projectList, predicates,
+ l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _)) =>
+ carbonScan(projectList,
+ predicates,
+ carbonRelation.carbonRelation,
+ None,
+ None,
+ None,
+ isGroupByPresent = false,
+ detailQuery = true) :: Nil
+
+ case Limit(IntegerLiteral(limit),
+ Sort(order, _,
+ p@PartialAggregation(namedGroupingAttributes,
+ rewrittenAggregateExpressions,
+ groupingExpressions,
+ partialComputation,
+ PhysicalOperation(
+ projectList,
+ predicates,
+ l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _))))) =>
+ val aggPlan = handleAggregation(plan, p, projectList, predicates, carbonRelation,
+ partialComputation, groupingExpressions, namedGroupingAttributes,
+ rewrittenAggregateExpressions)
+ org.apache.spark.sql.execution.TakeOrderedAndProject(limit,
+ order,
+ None,
+ aggPlan.head) :: Nil
+
+ case Limit(IntegerLiteral(limit), p@PartialAggregation(
+ namedGroupingAttributes,
+ rewrittenAggregateExpressions,
+ groupingExpressions,
+ partialComputation,
+ PhysicalOperation(projectList, predicates,
+ l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _)))) =>
+ val aggPlan = handleAggregation(plan, p, projectList, predicates, carbonRelation,
+ partialComputation, groupingExpressions, namedGroupingAttributes,
+ rewrittenAggregateExpressions)
+ org.apache.spark.sql.execution.Limit(limit, aggPlan.head) :: Nil
+
+ case PartialAggregation(
+ namedGroupingAttributes,
+ rewrittenAggregateExpressions,
+ groupingExpressions,
+ partialComputation,
+ PhysicalOperation(projectList, predicates,
+ l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _))) =>
+ handleAggregation(plan, plan, projectList, predicates, carbonRelation,
+ partialComputation, groupingExpressions, namedGroupingAttributes,
+ rewrittenAggregateExpressions)
+
+ case Limit(IntegerLiteral(limit),
+ PhysicalOperation(projectList, predicates,
+ l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _))) =>
+ val (_, _, _, _, groupExprs, substitutesortExprs, limitExpr) = extractPlan(plan)
+ val s = carbonScan(projectList, predicates, carbonRelation.carbonRelation, groupExprs,
+ substitutesortExprs, limitExpr, isGroupByPresent = false, detailQuery = true)
+ org.apache.spark.sql.execution.Limit(limit, s) :: Nil
+
+ case Limit(IntegerLiteral(limit),
+ Sort(order, _,
+ PhysicalOperation(projectList, predicates,
+ l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _)))) =>
+ val (_, _, _, _, groupExprs, substitutesortExprs, limitExpr) = extractPlan(plan)
+ val s = carbonScan(projectList, predicates, carbonRelation.carbonRelation, groupExprs,
+ substitutesortExprs, limitExpr, isGroupByPresent = false, detailQuery = true)
+ org.apache.spark.sql.execution.TakeOrderedAndProject(limit,
+ order,
+ None,
+ s) :: Nil
+
+ case ExtractEquiJoinKeys(Inner, leftKeys, rightKeys, condition,
+ PhysicalOperation(projectList, predicates,
+ l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _)), right)
+ if canPushDownJoin(right, condition) =>
+ LOGGER.info(s"pushing down for ExtractEquiJoinKeys:right")
+ val carbon = carbonScan(projectList,
+ predicates,
+ carbonRelation.carbonRelation,
+ None,
+ None,
+ None,
+ isGroupByPresent = false,
+ detailQuery = true)
+ val pushedDownJoin = FilterPushJoin(
+ leftKeys: Seq[Expression],
+ rightKeys: Seq[Expression],
+ BuildRight,
+ carbon,
+ planLater(right),
+ condition)
+
+ condition.map(Filter(_, pushedDownJoin)).getOrElse(pushedDownJoin) :: Nil
+
+ case ExtractEquiJoinKeys(Inner, leftKeys, rightKeys, condition, left,
+ PhysicalOperation(projectList, predicates,
+ l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _)))
+ if canPushDownJoin(left, condition) =>
+ LOGGER.info(s"pushing down for ExtractEquiJoinKeys:left")
+ val carbon = carbonScan(projectList,
+ predicates,
+ carbonRelation.carbonRelation,
+ None,
+ None,
+ None,
+ isGroupByPresent = false,
+ detailQuery = true)
+
+ val pushedDownJoin = FilterPushJoin(
+ leftKeys: Seq[Expression],
+ rightKeys: Seq[Expression],
+ BuildLeft,
+ planLater(left),
+ carbon,
+ condition)
+ condition.map(Filter(_, pushedDownJoin)).getOrElse(pushedDownJoin) :: Nil
+
+ case ShowCubeCommand(schemaName) =>
+ ExecutedCommand(ShowAllTablesInSchema(schemaName, plan.output)) :: Nil
+ case c@ShowAllCubeCommand() =>
+ ExecutedCommand(ShowAllTables(plan.output)) :: Nil
+ case ShowCreateCubeCommand(cm) =>
+ ExecutedCommand(ShowCreateTable(cm, plan.output)) :: Nil
+ case ShowTablesDetailedCommand(schemaName) =>
+ ExecutedCommand(ShowAllTablesDetail(schemaName, plan.output)) :: Nil
+ case DropTable(tableName, ifNotExists)
+ if CarbonEnv.getInstance(sqlContext).carbonCatalog
+ .cubeExists(Seq(tableName))(sqlContext) =>
+ ExecutedCommand(DropCubeCommand(ifNotExists, None, tableName)) :: Nil
+ case ShowAggregateTablesCommand(schemaName) =>
+ ExecutedCommand(ShowAggregateTables(schemaName, plan.output)) :: Nil
+ case ShowLoadsCommand(schemaName, cube, limit) =>
+ ExecutedCommand(ShowLoads(schemaName, cube, limit, plan.output)) :: Nil
+ case LoadCube(schemaNameOp, cubeName, factPathFromUser, dimFilesPath,
partionValues, isOverwriteExist, inputSqlString) =>
- val isCarbonTable = CarbonEnv.getInstance(sqlContext).carbonCatalog
- .cubeExists(schemaNameOp, cubeName)(sqlContext)
- if (isCarbonTable) {
- ExecutedCommand(LoadCube(schemaNameOp, cubeName, factPathFromUser,
- dimFilesPath, partionValues, isOverwriteExist, inputSqlString)) :: Nil
- } else {
- ExecutedCommand(HiveNativeCommand(inputSqlString)) :: Nil
- }
- case d: HiveNativeCommand =>
- try {
- val resolvedTable = sqlContext.executePlan(CarbonHiveSyntax.parse(d.sql)).analyzed
- planLater(resolvedTable) :: Nil
- } catch {
- case _ => ExecutedCommand(d) :: Nil
- }
- case DescribeFormattedCommand(sql, tblIdentifier) =>
- val isCube = CarbonEnv.getInstance(sqlContext).carbonCatalog
- .cubeExists(tblIdentifier)(sqlContext)
- if (isCube) {
- val describe = LogicalDescribeCommand(UnresolvedRelation(tblIdentifier, None), false)
+ val isCarbonTable = CarbonEnv.getInstance(sqlContext).carbonCatalog
+ .cubeExists(schemaNameOp, cubeName)(sqlContext)
+ if (isCarbonTable) {
+ ExecutedCommand(LoadCube(schemaNameOp, cubeName, factPathFromUser,
+ dimFilesPath, partionValues, isOverwriteExist, inputSqlString)) :: Nil
+ } else {
+ ExecutedCommand(HiveNativeCommand(inputSqlString)) :: Nil
+ }
+ case d: HiveNativeCommand =>
+ try {
+ val resolvedTable = sqlContext.executePlan(CarbonHiveSyntax.parse(d.sql)).analyzed
+ planLater(resolvedTable) :: Nil
+ } catch {
+ case Exception => ExecutedCommand(d) :: Nil
+ }
+ case DescribeFormattedCommand(sql, tblIdentifier) =>
+ val isCube = CarbonEnv.getInstance(sqlContext).carbonCatalog
+ .cubeExists(tblIdentifier)(sqlContext)
+ if (isCube) {
+ val describe =
+ LogicalDescribeCommand(UnresolvedRelation(tblIdentifier, None), isExtended = false)
+ val resolvedTable = sqlContext.executePlan(describe.table).analyzed
+ val resultPlan = sqlContext.executePlan(resolvedTable).executedPlan
+ ExecutedCommand(DescribeCommandFormatted(resultPlan, plan.output, tblIdentifier)) :: Nil
+ }
+ else {
+ ExecutedCommand(DescribeNativeCommand(sql, plan.output)) :: Nil
+ }
+ case describe@LogicalDescribeCommand(table, isExtended) =>
val resolvedTable = sqlContext.executePlan(describe.table).analyzed
- val resultPlan = sqlContext.executePlan(resolvedTable).executedPlan
- ExecutedCommand(DescribeCommandFormatted(resultPlan, plan.output, tblIdentifier)) :: Nil
- }
- else {
- ExecutedCommand(DescribeNativeCommand(sql, plan.output)) :: Nil
- }
- case describe@LogicalDescribeCommand(table, isExtended) =>
- val resolvedTable = sqlContext.executePlan(describe.table).analyzed
- resolvedTable match {
- case t: MetastoreRelation =>
- ExecutedCommand(DescribeHiveTableCommand(t, describe.output, describe.isExtended)) ::
- Nil
- case o: LogicalPlan =>
- val resultPlan = sqlContext.executePlan(o).executedPlan
- ExecutedCommand(
- RunnableDescribeCommand(resultPlan, describe.output, describe.isExtended)) :: Nil
- }
- case _ =>
- Nil
+ resolvedTable match {
+ case t: MetastoreRelation =>
+ ExecutedCommand(
+ DescribeHiveTableCommand(t, describe.output, describe.isExtended)) :: Nil
+ case o: LogicalPlan =>
+ val resultPlan = sqlContext.executePlan(o).executedPlan
+ ExecutedCommand(
+ RunnableDescribeCommand(resultPlan, describe.output, describe.isExtended)) :: Nil
+ }
+ case _ => Nil
+ }
}
def handleAggregation(plan: LogicalPlan,
- aggPlan: LogicalPlan,
- projectList: Seq[NamedExpression],
- predicates: Seq[Expression],
- carbonRelation: CarbonDatasourceRelation,
- partialComputation: Seq[NamedExpression],
- groupingExpressions: Seq[Expression],
- namedGroupingAttributes: Seq[Attribute],
- rewrittenAggregateExpressions: Seq[NamedExpression]):
+ aggPlan: LogicalPlan,
+ projectList: Seq[NamedExpression],
+ predicates: Seq[Expression],
+ carbonRelation: CarbonDatasourceRelation,
+ partialComputation: Seq[NamedExpression],
+ groupingExpressions: Seq[Expression],
+ namedGroupingAttributes: Seq[Attribute],
+ rewrittenAggregateExpressions: Seq[NamedExpression]):
Seq[SparkPlan] = {
- val (_, _, _, aliases, groupExprs, substitutesortExprs, limitExpr) = extractPlan(plan)
+ val (_, _, _, _, groupExprs, substitutesortExprs, limitExpr) = extractPlan(plan)
val s =
try {
carbonScan(projectList, predicates, carbonRelation.carbonRelation,
- Some(partialComputation), substitutesortExprs, limitExpr, !groupingExpressions.isEmpty)
+ Some(partialComputation), substitutesortExprs, limitExpr, groupingExpressions.nonEmpty)
} catch {
- case _ => null
+ case Exception => null
}
if (s != null) {
@@ -261,12 +281,12 @@ class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan] {
partialComputation,
PhysicalOperation(projectList, predicates,
l@LogicalRelation(carbonRelation: CarbonDatasourceRelation, _))) =>
- val (_, _, _, aliases, groupExprs, substitutesortExprs, limitExpr) = extractPlan(plan)
+ val (_, _, _, _, groupExprs, substitutesortExprs, limitExpr) = extractPlan(plan)
val s = carbonScan(projectList, predicates, carbonRelation.carbonRelation,
Some(partialComputation), substitutesortExprs, limitExpr,
- !groupingExpressions.isEmpty, true)
+ groupingExpressions.nonEmpty, detailQuery = true)
CarbonAggregate(
partial = false,
@@ -281,23 +301,14 @@ class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan] {
}
}
- private def canBeCodeGened(aggs: Seq[AggregateExpression]) = !aggs.exists {
- case _: Sum | _: Count | _: Max | _: CombineSetsAndCount => false
- // The generated set implementation is pretty limited ATM.
- case CollectHashSet(exprs) if exprs.size == 1 &&
- Seq(IntegerType, LongType).contains(exprs.head.dataType) => false
- case _ => true
- }
-
- private def allAggregates(exprs: Seq[Expression]) =
- exprs.flatMap(_.collect { case a: AggregateExpression => a })
-
private def canPushDownJoin(otherRDDPlan: LogicalPlan,
- joinCondition: Option[Expression]): Boolean = {
+ joinCondition: Option[Expression]): Boolean = {
val pushdowmJoinEnabled = sqlContext.sparkContext.conf
- .getBoolean("spark.carbon.pushdown.join.as.filter", true)
+ .getBoolean("spark.carbon.pushdown.join.as.filter", defaultValue = true)
- if (!pushdowmJoinEnabled) return false
+ if (!pushdowmJoinEnabled) {
+ return false
+ }
val isJoinOnCarbonCube = otherRDDPlan match {
case other@PhysicalOperation(projectList, predicates,
@@ -313,10 +324,9 @@ class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan] {
otherRDDPlan match {
case BroadcastHint(p) => true
case p if sqlContext.conf.autoBroadcastJoinThreshold > 0 &&
- p.statistics.sizeInBytes <= sqlContext.conf.autoBroadcastJoinThreshold => {
+ p.statistics.sizeInBytes <= sqlContext.conf.autoBroadcastJoinThreshold =>
LOGGER.info("canPushDownJoin statistics:" + p.statistics.sizeInBytes)
true
- }
case _ => false
}
}
@@ -325,15 +335,15 @@ class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan] {
* Create carbon scan
*/
private def carbonScan(projectList: Seq[NamedExpression],
- predicates: Seq[Expression],
- relation: CarbonRelation,
- groupExprs: Option[Seq[Expression]],
- substitutesortExprs: Option[Seq[SortOrder]],
- limitExpr: Option[Expression],
- isGroupByPresent: Boolean,
- detailQuery: Boolean = false) = {
-
- if (detailQuery == false) {
+ predicates: Seq[Expression],
+ relation: CarbonRelation,
+ groupExprs: Option[Seq[Expression]],
+ substitutesortExprs: Option[Seq[SortOrder]],
+ limitExpr: Option[Expression],
+ isGroupByPresent: Boolean,
+ detailQuery: Boolean = false) = {
+
+ if (!detailQuery) {
val projectSet = AttributeSet(projectList.flatMap(_.references))
CarbonCubeScan(
projectSet.toSeq,
@@ -368,7 +378,7 @@ class CarbonStrategies(sqlContext: SQLContext) extends QueryPlanner[SparkPlan] {
Some(sort.map {
case SortOrder(a: Alias, direction) =>
val ref = aliases.getOrElse(a.toAttribute, a) match {
- case Alias(ref, name) => ref
+ case Alias(reference, name) => reference
case others => others
}
SortOrder(ref, direction)
diff --git a/integration/spark/src/main/scala/org/apache/spark/util/FileUtils.scala b/integration/spark/src/main/scala/org/apache/spark/util/FileUtils.scala
index 05d31ddd2ca..88b30128fa4 100644
--- a/integration/spark/src/main/scala/org/apache/spark/util/FileUtils.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/util/FileUtils.scala
@@ -15,8 +15,6 @@
* limitations under the License.
*/
-
-
package org.apache.spark.util
import org.carbondata.core.datastorage.store.filesystem.CarbonFile
@@ -25,15 +23,13 @@ import org.carbondata.core.datastorage.store.impl.FileFactory
object FileUtils {
/**
* append all csv file path to a String, file path separated by comma
- * @param carbonFile
- * @return
*/
def getPathsFromCarbonFile(carbonFile: CarbonFile): String = {
- if (carbonFile.isDirectory()) {
+ if (carbonFile.isDirectory) {
val files = carbonFile.listFiles()
val stringBuilder = new StringBuilder()
for (j <- 0 until files.size) {
- if (files(j).getName().endsWith(".csv")) {
+ if (files(j).getName.endsWith(".csv")) {
stringBuilder.append(getPathsFromCarbonFile(files(j))).append(",")
}
}
@@ -46,8 +42,7 @@ object FileUtils {
/**
* append all file path to a String, inputPath path separated by comma
- * @param inputPath
- * @return
+ *
*/
def getPaths(inputPath: String): String = {
if (inputPath == null || inputPath.isEmpty) {
diff --git a/integration/spark/src/main/scala/org/apache/spark/util/SplitUtils.scala b/integration/spark/src/main/scala/org/apache/spark/util/SplitUtils.scala
index 525f9d8a14f..d882d3d70b4 100644
--- a/integration/spark/src/main/scala/org/apache/spark/util/SplitUtils.scala
+++ b/integration/spark/src/main/scala/org/apache/spark/util/SplitUtils.scala
@@ -34,9 +34,6 @@ object SplitUtils {
/**
* get file splits,return Array[BlockDetails], if file path is empty,then return empty Array
*
- * @param path
- * @param sc
- * @return
*/
def getSplits(path: String, sc: SparkContext): Array[BlockDetails] = {
val filePath = FileUtils.getPaths(path)
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/CarbonOption.scala b/integration/spark/src/main/scala/org/carbondata/spark/CarbonOption.scala
index 2b36b500d9d..67cb1ff00df 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/CarbonOption.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/CarbonOption.scala
@@ -29,7 +29,9 @@ class CarbonOption(options: Map[String, String]) {
def partitionCount: String = options.getOrElse("partitionCount", "1")
- def partitionClass: String = options.getOrElse("partitionClass",
- "org.carbondata.spark.partition.api.impl.SampleDataPartitionerImpl")
+ def partitionClass: String = {
+ options.getOrElse("partitionClass",
+ "org.carbondata.spark.partition.api.impl.SampleDataPartitionerImpl")
+ }
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/KeyVal.scala b/integration/spark/src/main/scala/org/carbondata/spark/KeyVal.scala
index ff916eeeaa1..a21ca6dff71 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/KeyVal.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/KeyVal.scala
@@ -43,8 +43,9 @@ trait Result[K, V] extends Serializable {
}
class ResultImpl extends Result[Int, LoadMetadataDetails] {
- override def getKey(key: Int, value: LoadMetadataDetails): (Int, LoadMetadataDetails) =
+ override def getKey(key: Int, value: LoadMetadataDetails): (Int, LoadMetadataDetails) = {
(key, value)
+ }
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/agg/CarbonAggregates.scala b/integration/spark/src/main/scala/org/carbondata/spark/agg/CarbonAggregates.scala
index c3f9189d9f0..68c9469ae0c 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/agg/CarbonAggregates.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/agg/CarbonAggregates.scala
@@ -90,10 +90,11 @@ case class CountDistinctCarbonFinal(inputSet: Expression, origDataType: DataType
override def dataType: DataType = origDataType
- override def toString: String = s"COUNTFINAL(DISTINCT ${inputSet}})"
+ override def toString: String = s"COUNTFINAL(DISTINCT ${ inputSet }})"
- override def newInstance(): AggregateFunction1 =
+ override def newInstance(): AggregateFunction1 = {
new CountDistinctFunctionCarbonFinal(inputSet, this)
+ }
}
case class AverageCarbon(child: Expression, castedDataType: DataType = null)
@@ -255,12 +256,17 @@ case class SumDistinctCarbon(child: Expression, castedDataType: DataType = null)
val partialSum = Alias(SumDistinctCarbon(child), "PartialSumDistinct")()
SplitEvaluation(
SumDistinctFinalCarbon(partialSum.toAttribute,
- if (castedDataType != null) castedDataType else child.dataType),
+ if (castedDataType != null) {
+ castedDataType
+ } else {
+ child.dataType
+ }),
partialSum :: Nil)
}
- override def newInstance(): AggregateFunction1 =
+ override def newInstance(): AggregateFunction1 = {
new SumDisctinctFunctionCarbon(child, this, false)
+ }
}
case class SumDistinctFinalCarbon(child: Expression, origDataType: DataType)
@@ -319,20 +325,19 @@ case class AverageFunctionCarbon(expr: Expression, base: AggregateExpression1, f
case s =>
var dc: MeasureAggregator = null
if (s != null) {
- if (s.isInstanceOf[java.math.BigDecimal]) {
- dc = new AvgBigDecimalAggregator
- dc.agg(new java.math.BigDecimal(s.toString))
- dc.setNewValue(new java.math.BigDecimal(s.toString))
- }
- else if (s.isInstanceOf[Long]) {
- dc = new AvgLongAggregator
- dc.agg(s.toString.toLong)
- dc.setNewValue(s.toString.toLong)
- }
- else {
- dc = new AvgDoubleAggregator
- dc.agg(s.toString.toDouble)
- dc.setNewValue(s.toString.toDouble)
+ s match {
+ case v: java.math.BigDecimal =>
+ dc = new AvgBigDecimalAggregator
+ dc.agg(new java.math.BigDecimal(s.toString))
+ dc.setNewValue(new java.math.BigDecimal(s.toString))
+ case l: Long =>
+ dc = new AvgLongAggregator
+ dc.agg(s.toString.toLong)
+ dc.setNewValue(s.toString.toLong)
+ case _ =>
+ dc = new AvgDoubleAggregator
+ dc.agg(s.toString.toDouble)
+ dc.setNewValue(s.toString.toDouble)
}
}
else {
@@ -340,27 +345,31 @@ case class AverageFunctionCarbon(expr: Expression, base: AggregateExpression1, f
}
dc
}
- if (avg == null) avg = agg else avg.merge(agg)
+ if (avg == null) {
+ avg = agg
+ } else {
+ avg.merge(agg)
+ }
}
- override def eval(input: InternalRow): Any =
+ override def eval(input: InternalRow): Any = {
if (finalAgg) {
- if (avg.isFirstTime()) {
+ if (avg.isFirstTime) {
null
} else {
- if (avg.isInstanceOf[AvgBigDecimalAggregator]) {
- Cast(Literal(avg.getBigDecimalValue), base.dataType).eval(null)
- }
- else if (avg.isInstanceOf[AvgLongAggregator]) {
- Cast(Literal(avg.getLongValue), base.dataType).eval(null)
- }
- else {
- Cast(Literal(avg.getDoubleValue), base.dataType).eval(null)
+ avg match {
+ case AvgBigDecimalAggregator =>
+ Cast(Literal(avg.getBigDecimalValue), base.dataType).eval(null)
+ case AvgLongAggregator =>
+ Cast(Literal(avg.getLongValue), base.dataType).eval(null)
+ case _ =>
+ Cast(Literal(avg.getDoubleValue), base.dataType).eval(null)
}
}
} else {
avg
}
+ }
}
case class CountFunctionCarbon(expr: Expression, base: AggregateExpression1, finalAgg: Boolean)
@@ -386,12 +395,16 @@ case class CountFunctionCarbon(expr: Expression, base: AggregateExpression1, fin
}
agg1
}
- if (count == null) count = agg else count.merge(agg)
+ if (count == null) {
+ count = agg
+ } else {
+ count.merge(agg)
+ }
}
- override def eval(input: InternalRow): Any =
+ override def eval(input: InternalRow): Any = {
if (finalAgg && count != null) {
- if (count.isFirstTime()) {
+ if (count.isFirstTime) {
0L
} else {
Cast(Literal(count.getDoubleValue), base.dataType).eval(null)
@@ -399,6 +412,7 @@ case class CountFunctionCarbon(expr: Expression, base: AggregateExpression1, fin
} else {
count
}
+ }
}
@@ -422,20 +436,19 @@ case class SumFunctionCarbon(expr: Expression, base: AggregateExpression1, final
case s =>
var dc: MeasureAggregator = null
if (s != null) {
- if (s.isInstanceOf[java.math.BigDecimal]) {
- dc = new SumBigDecimalAggregator
- dc.agg(new java.math.BigDecimal(s.toString))
- dc.setNewValue(new java.math.BigDecimal(s.toString))
- }
- else if (s.isInstanceOf[Long]) {
- dc = new SumLongAggregator
- dc.agg(s.toString.toLong)
- dc.setNewValue(s.toString.toLong)
- }
- else {
- dc = new SumDoubleAggregator
- dc.agg(s.toString.toDouble)
- dc.setNewValue(s.toString.toDouble)
+ s match {
+ case java.math.BigDecimal =>
+ dc = new SumBigDecimalAggregator
+ dc.agg(new java.math.BigDecimal(s.toString))
+ dc.setNewValue(new java.math.BigDecimal(s.toString))
+ case Long =>
+ dc = new SumLongAggregator
+ dc.agg(s.toString.toLong)
+ dc.setNewValue(s.toString.toLong)
+ case _ =>
+ dc = new SumDoubleAggregator
+ dc.agg(s.toString.toDouble)
+ dc.setNewValue(s.toString.toDouble)
}
}
else {
@@ -443,27 +456,31 @@ case class SumFunctionCarbon(expr: Expression, base: AggregateExpression1, final
}
dc
}
- if (sum == null) sum = agg else sum.merge(agg)
+ if (sum == null) {
+ sum = agg
+ } else {
+ sum.merge(agg)
+ }
}
- override def eval(input: InternalRow): Any =
+ override def eval(input: InternalRow): Any = {
if (finalAgg && sum != null) {
- if (sum.isFirstTime()) {
+ if (sum.isFirstTime) {
null
} else {
- if (sum.isInstanceOf[SumBigDecimalAggregator]) {
- Cast(Literal(sum.getBigDecimalValue), base.dataType).eval(input)
- }
- else if (sum.isInstanceOf[SumLongAggregator]) {
- Cast(Literal(sum.getLongValue), base.dataType).eval(input)
- }
- else {
- Cast(Literal(sum.getDoubleValue), base.dataType).eval(input)
+ sum match {
+ case SumBigDecimalAggregator =>
+ Cast(Literal(sum.getBigDecimalValue), base.dataType).eval(input)
+ case SumLongAggregator =>
+ Cast(Literal(sum.getLongValue), base.dataType).eval(input)
+ case _ =>
+ Cast(Literal(sum.getDoubleValue), base.dataType).eval(input)
}
}
} else {
sum
}
+ }
}
case class MaxFunctionCarbon(expr: Expression, base: AggregateExpression1, finalAgg: Boolean)
@@ -487,8 +504,8 @@ case class MaxFunctionCarbon(expr: Expression, base: AggregateExpression1, final
if (s != null) {
dc.agg(s.toString.toDouble)
dc.setNewValue(s.toString.toDouble)
- }
- dc
+ }
+ dc
}
if (max == null) {
max = agg
@@ -497,9 +514,9 @@ case class MaxFunctionCarbon(expr: Expression, base: AggregateExpression1, final
}
}
- override def eval(input: InternalRow): Any =
+ override def eval(input: InternalRow): Any = {
if (finalAgg && max != null) {
- if (max.isFirstTime()) {
+ if (max.isFirstTime) {
null
} else {
Cast(Literal(max.getValueObject), base.dataType).eval(null)
@@ -507,6 +524,7 @@ case class MaxFunctionCarbon(expr: Expression, base: AggregateExpression1, final
} else {
max
}
+ }
}
case class MinFunctionCarbon(expr: Expression, base: AggregateExpression1, finalAgg: Boolean)
@@ -530,8 +548,8 @@ case class MinFunctionCarbon(expr: Expression, base: AggregateExpression1, final
if (s != null) {
dc.agg(s.toString.toDouble)
dc.setNewValue(s.toString.toDouble)
- }
- dc
+ }
+ dc
}
if (min == null) {
min = agg
@@ -542,9 +560,11 @@ case class MinFunctionCarbon(expr: Expression, base: AggregateExpression1, final
override def eval(input: InternalRow): Any = {
if (finalAgg && min != null) {
- if (min.isFirstTime()) {
+ if (min.isFirstTime) {
null
- } else Cast(Literal(min.getValueObject), base.dataType).eval(null)
+ } else {
+ Cast(Literal(min.getValueObject), base.dataType).eval(null)
+ }
} else {
min
}
@@ -552,7 +572,7 @@ case class MinFunctionCarbon(expr: Expression, base: AggregateExpression1, final
}
case class SumDisctinctFunctionCarbon(expr: Expression, base: AggregateExpression1,
- isFinal: Boolean)
+ isFinal: Boolean)
extends AggregateFunction1 {
def this() = this(null, null, false) // Required for serialization.
@@ -573,23 +593,21 @@ case class SumDisctinctFunctionCarbon(expr: Expression, base: AggregateExpressio
case null => null
case s =>
var dc: MeasureAggregator = null
- if (s.isInstanceOf[Double]) {
- dc = new SumDistinctDoubleAggregator
- dc.setNewValue(s.toString.toDouble)
- }
- else if (s.isInstanceOf[Int]) {
- dc = new SumDistinctLongAggregator
- dc.setNewValue(s.toString.toLong)
- }
- else if (s.isInstanceOf[java.math.BigDecimal]) {
- dc = new SumDistinctBigDecimalAggregator
- dc.setNewValue(new java.math.BigDecimal(s.toString))
+ s match {
+ case Double =>
+ dc = new SumDistinctDoubleAggregator
+ dc.setNewValue(s.toString.toDouble)
+ case Int =>
+ dc = new SumDistinctLongAggregator
+ dc.setNewValue(s.toString.toLong)
+ case java.math.BigDecimal =>
+ dc = new SumDistinctBigDecimalAggregator
+ dc.setNewValue(new java.math.BigDecimal(s.toString))
+ case _ =>
}
dc
}
- if (agg == null) {
- distinct
- } else if (distinct == null) {
+ if (distinct == null) {
distinct = agg
} else {
distinct.merge(agg)
@@ -597,13 +615,15 @@ case class SumDisctinctFunctionCarbon(expr: Expression, base: AggregateExpressio
}
override def eval(input: InternalRow): Any =
- // in case of empty load it was failing so added null check.
+ // in case of empty load it was failing so added null check.
+ {
if (isFinal && distinct != null) {
Cast(Literal(distinct.getValueObject), base.dataType).eval(null)
}
else {
distinct
}
+ }
}
case class CountDistinctFunctionCarbon(expr: Expression, base: AggregateExpression1)
@@ -629,9 +649,7 @@ case class CountDistinctFunctionCarbon(expr: Expression, base: AggregateExpressi
dc.setNewValue(s.toString)
dc
}
- if (agg == null) {
- count
- } else if (count == null) {
+ if (count == null) {
count = agg
} else {
count.merge(agg)
@@ -664,23 +682,22 @@ case class CountDistinctFunctionCarbonFinal(expr: Expression, base: AggregateExp
dc.setNewValue(s.toString)
dc
}
- if (agg == null) {
- count
- } else if (count == null) {
+ if (count == null) {
count = agg
} else {
count.merge(agg)
}
}
- override def eval(input: InternalRow): Any =
+ override def eval(input: InternalRow): Any = {
if (count == null) {
Cast(Literal(0), base.dataType).eval(null)
- } else if (count.isFirstTime()) {
+ } else if (count.isFirstTime) {
Cast(Literal(0), base.dataType).eval(null)
} else {
Cast(Literal(count.getDoubleValue), base.dataType).eval(null)
}
+ }
}
case class FirstFunctionCarbon(expr: Expression, base: AggregateExpression1)
@@ -725,7 +742,7 @@ case class FlattenExpr(expr: Expression) extends Expression with CodegenFallback
override def eval(input: InternalRow): Any = {
expr.eval(input) match {
- case d: MeasureAggregator => d.getDoubleValue()
+ case d: MeasureAggregator => d.getDoubleValue
case others => others
}
}
@@ -751,7 +768,7 @@ case class FlatAggregatorsExpr(expr: Expression) extends Expression with Codegen
override def eval(input: InternalRow): Any = {
expr.eval(input) match {
case d: MeasureAggregator =>
- d.setNewValue(d.getDoubleValue())
+ d.setNewValue(d.getDoubleValue)
d
case others => others
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/agg/MeasureAggregatorUDT.scala b/integration/spark/src/main/scala/org/carbondata/spark/agg/MeasureAggregatorUDT.scala
index 97055e93a43..ea0c8afdf00 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/agg/MeasureAggregatorUDT.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/agg/MeasureAggregatorUDT.scala
@@ -29,7 +29,9 @@ import org.carbondata.query.aggregator.MeasureAggregator
class MeasureAggregatorUDT extends UserDefinedType[MeasureAggregator] {
// the default DoubleType is Ok as we are not going to pass to spark sql to
// evaluate,need to add this for compilation errors
- override def sqlType: DataType = ArrayType(DoubleType, false)
+ override def sqlType: DataType = {
+ ArrayType(DoubleType, containsNull = false)
+ }
override def serialize(obj: Any): Any = {
obj match {
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/package.scala b/integration/spark/src/main/scala/org/carbondata/spark/package.scala
index dab463c2593..274522962df 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/package.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/package.scala
@@ -44,10 +44,10 @@ package object spark {
// temporary solution: write to csv file, then load the csv into carbon
val tempCSVFolder = s"$storePath/$dbName/$tableName/tempCSV"
dataFrame.write
- .format(csvPackage)
- .option("header", "true")
- .mode(SaveMode.Overwrite)
- .save(tempCSVFolder)
+ .format(csvPackage)
+ .option("header", "true")
+ .mode(SaveMode.Overwrite)
+ .save(tempCSVFolder)
val cc = CarbonContext.getInstance(dataFrame.sqlContext.sparkContext)
val tempCSVPath = new Path(tempCSVFolder)
@@ -61,9 +61,9 @@ package object spark {
while (itor.hasNext) {
val f = itor.next()
if (f.getPath.getName.startsWith("part-")) {
- val newPath = s"${f.getPath.getParent}/${f.getPath.getName}.csv"
+ val newPath = s"${ f.getPath.getParent }/${ f.getPath.getName }.csv"
if (!fs.rename(f.getPath, new Path(newPath))) {
- cc.sql(s"DROP CUBE ${options.tableName}")
+ cc.sql(s"DROP CUBE ${ options.tableName }")
throw new RuntimeException("File system rename failed when loading data into carbon")
}
}
@@ -94,12 +94,12 @@ package object spark {
private def makeCreateTableString(schema: StructType, option: CarbonOption): String = {
val tableName = option.tableName
val carbonSchema = schema.map { field =>
- s"${field.name} ${convertToCarbonType(field.dataType)}"
- }
+ s"${ field.name } ${ convertToCarbonType(field.dataType) }"
+ }
s"""
CREATE TABLE IF NOT EXISTS $tableName
- (${carbonSchema.mkString(", ")})
- STORED BY '${CarbonContext.datasourceName}'
+ (${ carbonSchema.mkString(", ") })
+ STORED BY '${ CarbonContext.datasourceName }'
"""
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonCleanFilesRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonCleanFilesRDD.scala
index 5a071a13af3..b947478523c 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonCleanFilesRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonCleanFilesRDD.scala
@@ -29,11 +29,11 @@ import org.carbondata.spark.util.CarbonQueryUtil
class CarbonCleanFilesRDD[K, V](
- sc: SparkContext,
- keyClass: KeyVal[K, V],
- schemaName: String,
- cubeName: String,
- partitioner: Partitioner)
+ sc: SparkContext,
+ keyClass: KeyVal[K, V],
+ schemaName: String,
+ cubeName: String,
+ partitioner: Partitioner)
extends RDD[(K, V)](sc, Nil) with Logging {
sc.setLocalProperty("spark.scheduler.pool", "DDL")
@@ -41,7 +41,7 @@ class CarbonCleanFilesRDD[K, V](
override def getPartitions: Array[Partition] = {
val splits = CarbonQueryUtil.getTableSplits(schemaName, cubeName, null, partitioner)
val result = new Array[Partition](splits.length)
- for (i <- 0 until result.length) {
+ for (i <- result.indices) {
result(i) = new CarbonLoadPartition(id, i, splits(i))
}
result
@@ -59,7 +59,7 @@ class CarbonCleanFilesRDD[K, V](
override def hasNext: Boolean = {
if (!finished && !havePair) {
- finished = !false
+ finished = true
havePair = !finished
}
!finished
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataLoadRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataLoadRDD.scala
index 2b65f59cbb7..ca0aee61caf 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataLoadRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataLoadRDD.scala
@@ -43,10 +43,6 @@ import org.carbondata.spark.util.CarbonQueryUtil
/**
* This partition class use to split by TableSplit
*
- * @param rddId
- * @param idx
- * @param tableSplit
- * @param blocksDetails
*/
class CarbonTableSplitPartition(rddId: Int, val idx: Int, @transient val tableSplit: TableSplit,
val blocksDetails: Array[BlockDetails])
@@ -62,10 +58,6 @@ class CarbonTableSplitPartition(rddId: Int, val idx: Int, @transient val tableSp
/**
* This partition class use to split by Host
*
- * @param rddId
- * @param idx
- * @param host
- * @param blocksDetails
*/
class CarbonNodePartition(rddId: Int, val idx: Int, host: String,
val blocksDetails: Array[BlockDetails])
@@ -123,17 +115,17 @@ class CarbonDataLoadRDD[K, V](
case true =>
// for table split partition
var splits = Array[TableSplit]()
- if (carbonLoadModel.isDirectLoad()) {
- splits = CarbonQueryUtil.getTableSplitsForDirectLoad(carbonLoadModel.getFactFilePath(),
+ if (carbonLoadModel.isDirectLoad) {
+ splits = CarbonQueryUtil.getTableSplitsForDirectLoad(carbonLoadModel.getFactFilePath,
partitioner.nodeList, partitioner.partitionCount)
}
else {
- splits = CarbonQueryUtil.getTableSplits(carbonLoadModel.getDatabaseName(),
- carbonLoadModel.getTableName(), null, partitioner)
+ splits = CarbonQueryUtil.getTableSplits(carbonLoadModel.getDatabaseName,
+ carbonLoadModel.getTableName, null, partitioner)
}
val result = new Array[Partition](splits.length)
- for (i <- 0 until result.length) {
+ for (i <- result.indices) {
// filter the same partition unique id, because only one will match, so get 0 element
val blocksDetails: Array[BlockDetails] = blocksGroupBy.filter(p =>
p._1 == splits(i).getPartition.getUniqueID)(0)._2
@@ -143,7 +135,7 @@ class CarbonDataLoadRDD[K, V](
case false =>
// for node partition
val result = new Array[Partition](blocksGroupBy.length)
- for (i <- 0 until result.length) {
+ for (i <- result.indices) {
result(i) = new CarbonNodePartition(id, i, blocksGroupBy(i)._1, blocksGroupBy(i)._2)
}
result
@@ -155,13 +147,13 @@ class CarbonDataLoadRDD[K, V](
}
override def compute(theSplit: Partition, context: TaskContext): Iterator[(K, V)] = {
- val LOGGER = LogServiceFactory.getLogService(this.getClass().getName())
+ val LOGGER = LogServiceFactory.getLogService(this.getClass.getName)
val iter = new Iterator[(K, V)] {
var dataloadStatus = CarbonCommonConstants.STORE_LOADSTATUS_FAILURE
var partitionID = "0"
var model: CarbonLoadModel = _
- try { {
+ try {
val carbonPropertiesFilePath = System.getProperty("carbon.properties.filepath", null)
if (null == carbonPropertiesFilePath) {
System.setProperty("carbon.properties.filepath",
@@ -184,17 +176,16 @@ class CarbonDataLoadRDD[K, V](
dataloadStatus = CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS
- if (model.isRetentionRequest()) {
+ if (model.isRetentionRequest) {
recreateAggregationTableForRetention
}
- else if (model.isAggLoadRequest()) {
+ else if (model.isAggLoadRequest) {
dataloadStatus = createManualAggregateTable
}
else {
- try { {
+ try {
CarbonLoaderUtil.executeGraph(model, storeLocation, hdfsStoreLocation, kettleHomePath,
currentRestructNumber)
- }
} catch {
case e: DataLoadingException => if (e.getErrorCode ==
DataProcessorConstants.BAD_REC_FOUND) {
@@ -228,8 +219,6 @@ class CarbonDataLoadRDD[K, V](
}
}
}
- }
-
} catch {
case e: Exception =>
dataloadStatus = CarbonCommonConstants.STORE_LOADSTATUS_FAILURE
@@ -245,16 +234,16 @@ class CarbonDataLoadRDD[K, V](
val blocksID = gernerateBlocksID
carbonLoadModel.setBlocksID(blocksID)
carbonLoadModel.setTaskNo(String.valueOf(theSplit.index))
- if (carbonLoadModel.isDirectLoad()) {
+ if (carbonLoadModel.isDirectLoad) {
model = carbonLoadModel.getCopyWithPartition(
- split.serializableHadoopSplit.value.getPartition().getUniqueID(),
- split.serializableHadoopSplit.value.getPartition().getFilesPath,
- carbonLoadModel.getCsvHeader(), carbonLoadModel.getCsvDelimiter())
+ split.serializableHadoopSplit.value.getPartition.getUniqueID,
+ split.serializableHadoopSplit.value.getPartition.getFilesPath,
+ carbonLoadModel.getCsvHeader, carbonLoadModel.getCsvDelimiter)
} else {
model = carbonLoadModel.getCopyWithPartition(
- split.serializableHadoopSplit.value.getPartition().getUniqueID())
+ split.serializableHadoopSplit.value.getPartition.getUniqueID)
}
- partitionID = split.serializableHadoopSplit.value.getPartition().getUniqueID()
+ partitionID = split.serializableHadoopSplit.value.getPartition.getUniqueID
// get this partition data blocks and put it to global static map
GraphGenerator.blockInfo.put(blocksID, split.partitionBlocksDetail)
StandardLogService.setThreadName(partitionID, null)
@@ -268,13 +257,13 @@ class CarbonDataLoadRDD[K, V](
carbonLoadModel.setTaskNo(String.valueOf(theSplit.index))
// set this node blocks info to global static map
GraphGenerator.blockInfo.put(blocksID, split.nodeBlocksDetail)
- if (carbonLoadModel.isDirectLoad()) {
+ if (carbonLoadModel.isDirectLoad) {
val filelist: java.util.List[String] = new java.util.ArrayList[String](
CarbonCommonConstants.CONSTANT_SIZE_TEN)
CarbonQueryUtil.getAllFiles(carbonLoadModel.getFactFilePath, filelist,
FileFactory.getFileType(carbonLoadModel.getFactFilePath))
model = carbonLoadModel.getCopyWithPartition(partitionID, filelist,
- carbonLoadModel.getCsvHeader(), carbonLoadModel.getCsvDelimiter())
+ carbonLoadModel.getCsvHeader, carbonLoadModel.getCsvDelimiter)
}
else {
model = carbonLoadModel.getCopyWithPartition(partitionID)
@@ -288,7 +277,7 @@ class CarbonDataLoadRDD[K, V](
*
* @return
*/
- def gernerateBlocksID(): String = {
+ def gernerateBlocksID: String = {
isTableSplitPartition match {
case true =>
carbonLoadModel.getDatabaseName + "_" + carbonLoadModel.getTableName + "_" +
@@ -300,11 +289,11 @@ class CarbonDataLoadRDD[K, V](
}
}
- def checkAndLoadAggregationTable(): String = {
+ def checkAndLoadAggregationTable: String = {
val schema = model.getCarbonDataLoadSchema
val aggTables = schema.getCarbonTable.getAggregateTablesName
if (null != aggTables && !aggTables.isEmpty) {
- val details = model.getLoadMetadataDetails.asScala.toSeq.toArray
+ val details = model.getLoadMetadataDetails.asScala.toArray
val newSlice = CarbonCommonConstants.LOAD_FOLDER + loadCount
var listOfLoadFolders = CarbonLoaderUtil.getListOfValidSlices(details)
listOfLoadFolders = CarbonLoaderUtil.addNewSliceNameToList(newSlice, listOfLoadFolders)
@@ -334,7 +323,7 @@ class CarbonDataLoadRDD[K, V](
logInfo("Aggregate tables creation successfull")
}
}
- return dataloadStatus
+ dataloadStatus
}
def loadCubeSlices(listOfAllLoadFolders: java.util.List[String],
@@ -343,8 +332,8 @@ class CarbonDataLoadRDD[K, V](
// TODO: Implement it
}
- def createManualAggregateTable(): String = {
- val details = model.getLoadMetadataDetails.asScala.toSeq.toArray
+ def createManualAggregateTable: String = {
+ val details = model.getLoadMetadataDetails.asScala.toArray
val listOfAllLoadFolders = CarbonQueryUtil.getListOfSlices(details)
val listOfLoadFolders = CarbonLoaderUtil.getListOfValidSlices(details)
val listOfUpdatedLoadFolders = CarbonLoaderUtil.getListOfUpdatedSlices(details)
@@ -368,11 +357,11 @@ class CarbonDataLoadRDD[K, V](
dataloadStatus
}
- def recreateAggregationTableForRetention() = {
+ def recreateAggregationTableForRetention = {
val schema = model.getCarbonDataLoadSchema
val aggTables = schema.getCarbonTable.getAggregateTablesName
if (null != aggTables && !aggTables.isEmpty) {
- val details = model.getLoadMetadataDetails.asScala.toSeq.toArray
+ val details = model.getLoadMetadataDetails.asScala.toArray
val listOfLoadFolders = CarbonLoaderUtil.getListOfValidSlices(details)
val listOfUpdatedLoadFolders = CarbonLoaderUtil.getListOfUpdatedSlices(details)
val listOfAllLoadFolder = CarbonQueryUtil.getListOfSlices(details)
@@ -406,14 +395,14 @@ class CarbonDataLoadRDD[K, V](
return dataloadStatus
}
}
- return dataloadStatus
+ dataloadStatus
}
def loadAggregationTable(listOfLoadFolders: java.util.List[String],
listOfUpdatedLoadFolders: java.util.List[String],
loadFolders: Array[String]): String = {
// TODO: Implement it
- return dataloadStatus
+ dataloadStatus
}
var finished = false
@@ -432,7 +421,7 @@ class CarbonDataLoadRDD[K, V](
override def next(): (K, V) = {
val loadMetadataDetails = new LoadMetadataDetails()
loadMetadataDetails.setPartitionCount(partitionID)
- loadMetadataDetails.setLoadStatus(dataloadStatus.toString())
+ loadMetadataDetails.setLoadStatus(dataloadStatus)
result.getKey(loadCount, loadMetadataDetails)
}
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataPartitionRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataPartitionRDD.scala
index 17269dc35d4..45feac6880f 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataPartitionRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataPartitionRDD.scala
@@ -47,19 +47,19 @@ class CarbonSparkRawDataPartition(rddId: Int, val idx: Int, @transient val table
* .
*/
class CarbonDataPartitionRDD[K, V](
- sc: SparkContext,
- results: PartitionResult[K, V],
- schemaName: String,
- cubeName: String,
- sourcePath: String,
- targetFolder: String,
- requiredColumns: Array[String],
- headers: String,
- delimiter: String,
- quoteChar: String,
- escapeChar: String,
- multiLine: Boolean,
- partitioner: Partitioner)
+ sc: SparkContext,
+ results: PartitionResult[K, V],
+ schemaName: String,
+ cubeName: String,
+ sourcePath: String,
+ targetFolder: String,
+ requiredColumns: Array[String],
+ headers: String,
+ delimiter: String,
+ quoteChar: String,
+ escapeChar: String,
+ multiLine: Boolean,
+ partitioner: Partitioner)
extends RDD[(K, V)](sc, Nil) with Logging {
sc.setLocalProperty("spark.scheduler.pool", "DDL")
@@ -69,7 +69,7 @@ class CarbonDataPartitionRDD[K, V](
.getPartitionSplits(sourcePath, partitioner.nodeList, partitioner.partitionCount)
//
val result = new Array[Partition](splits.length)
- for (i <- 0 until result.length) {
+ for (i <- result.indices) {
result(i) = new CarbonSparkRawDataPartition(id, i, splits(i))
}
result
@@ -79,12 +79,12 @@ class CarbonDataPartitionRDD[K, V](
new Iterator[(K, V)] {
val split = theSplit.asInstanceOf[CarbonSparkRawDataPartition]
StandardLogService
- .setThreadName(split.serializableHadoopSplit.value.getPartition().getUniqueID(), null)
+ .setThreadName(split.serializableHadoopSplit.value.getPartition.getUniqueID, null)
logInfo("Input split: " + split.serializableHadoopSplit.value)
val csvPart = new CSVFilePartitioner(partitioner.partitionClass, sourcePath)
csvPart.splitFile(schemaName, cubeName,
- split.serializableHadoopSplit.value.getPartition().getFilesPath, targetFolder,
+ split.serializableHadoopSplit.value.getPartition.getFilesPath, targetFolder,
partitioner.nodeList.toList.asJava, partitioner.partitionCount, partitioner.partitionColumn,
requiredColumns, delimiter, quoteChar, headers, escapeChar, multiLine)
@@ -109,7 +109,7 @@ class CarbonDataPartitionRDD[K, V](
override def getPreferredLocations(split: Partition): Seq[String] = {
val theSplit = split.asInstanceOf[CarbonSparkRawDataPartition]
val s = theSplit.serializableHadoopSplit.value.getLocations.asScala
- logInfo("Host Name : " + s(0) + s.length)
+ logInfo("Host Name : " + s.head + s.length)
s
}
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataRDDFactory.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataRDDFactory.scala
index b07fa917c52..b2e981fb62f 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataRDDFactory.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDataRDDFactory.scala
@@ -52,7 +52,7 @@ import org.carbondata.spark.util.{CarbonQueryUtil, LoadMetadataUtil}
*/
object CarbonDataRDDFactory extends Logging {
- val logger = LogServiceFactory.getLogService(CarbonDataRDDFactory.getClass().getName())
+ val logger = LogServiceFactory.getLogService(CarbonDataRDDFactory.getClass.getName)
// scalastyle:off
def partitionCarbonData(sc: SparkContext,
@@ -77,7 +77,7 @@ object CarbonDataRDDFactory extends Logging {
var loadStatus = CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS
status.foreach {
case (key, value) =>
- if (value == true) {
+ if (value) {
loadStatus = CarbonCommonConstants.STORE_LOADSTATUS_PARTIAL_SUCCESS
}
}
@@ -90,10 +90,9 @@ object CarbonDataRDDFactory extends Logging {
storeLocation: String,
hdfsStoreLocation: String,
partitioner: Partitioner) {
- val kv: KeyVal[CarbonKey, CarbonValue] = new KeyValImpl()
val cube = CarbonMetadata.getInstance()
- .getCarbonTable(carbonLoadModel.getDatabaseName() + "_" + carbonLoadModel.getTableName())
- val metaDataPath: String = cube.getMetaDataFilepath()
+ .getCarbonTable(carbonLoadModel.getDatabaseName + "_" + carbonLoadModel.getTableName)
+ val metaDataPath: String = cube.getMetaDataFilepath
var currentRestructNumber = CarbonUtil
.checkAndReturnCurrentRestructFolderNumber(metaDataPath, "RS_", false)
if (-1 == currentRestructNumber) {
@@ -115,15 +114,15 @@ object CarbonDataRDDFactory extends Logging {
val sc = sqlContext
// Delete the records based on data
- var cube = org.carbondata.core.carbon.metadata.CarbonMetadata.getInstance
+ val cube = org.carbondata.core.carbon.metadata.CarbonMetadata.getInstance
.getCarbonTable(schemaName + "_" + cubeName)
var currentRestructNumber = CarbonUtil
- .checkAndReturnCurrentRestructFolderNumber(cube.getMetaDataFilepath(), "RS_", false)
+ .checkAndReturnCurrentRestructFolderNumber(cube.getMetaDataFilepath, "RS_", false)
if (-1 == currentRestructNumber) {
currentRestructNumber = 0
}
- val loadMetadataDetailsArray = CarbonUtil.readLoadMetadata(cube.getMetaDataFilepath()).toList
+ val loadMetadataDetailsArray = CarbonUtil.readLoadMetadata(cube.getMetaDataFilepath).toList
val resultMap = new CarbonDeleteLoadByDateRDD(
sc.sparkContext,
new DeletedLoadResultImpl(),
@@ -137,10 +136,10 @@ object CarbonDataRDDFactory extends Logging {
tableName,
hdfsStoreLocation,
loadMetadataDetailsArray,
- currentRestructNumber).collect.groupBy(_._1).toMap
+ currentRestructNumber).collect.groupBy(_._1)
var updatedLoadMetadataDetailsList = new ListBuffer[LoadMetadataDetails]()
- if (!resultMap.isEmpty) {
+ if (resultMap.nonEmpty) {
if (resultMap.size == 1) {
if (resultMap.contains("")) {
logError("Delete by Date request is failed")
@@ -149,13 +148,13 @@ object CarbonDataRDDFactory extends Logging {
}
}
val updatedloadMetadataDetails = loadMetadataDetailsArray.map { elem => {
- var statusList = resultMap.get(elem.getLoadName())
+ var statusList = resultMap.get(elem.getLoadName)
// check for the merged load folder.
- if (statusList == None && null != elem.getMergedLoadName()) {
- statusList = resultMap.get(elem.getMergedLoadName())
+ if (statusList.isEmpty && null != elem.getMergedLoadName) {
+ statusList = resultMap.get(elem.getMergedLoadName)
}
- if (statusList != None) {
+ if (statusList.isDefined) {
elem.setModificationOrdeletionTimesStamp(CarbonLoaderUtil.readCurrentTime())
// if atleast on CarbonCommonConstants.MARKED_FOR_UPDATE status exist,
// use MARKED_FOR_UPDATE
@@ -175,12 +174,12 @@ object CarbonDataRDDFactory extends Logging {
}
// Save the load metadata
- var carbonLock = CarbonLockFactory
- .getCarbonLockObj(cube.getMetaDataFilepath(), LockUsage.METADATA_LOCK)
- try { {
+ val carbonLock = CarbonLockFactory
+ .getCarbonLockObj(cube.getMetaDataFilepath, LockUsage.METADATA_LOCK)
+ try {
if (carbonLock.lockWithRetries()) {
logInfo("Successfully got the cube metadata file lock")
- if (!updatedLoadMetadataDetailsList.isEmpty) {
+ if (updatedLoadMetadataDetailsList.nonEmpty) {
LoadAggregateTabAfterRetention(schemaName, cube.getFactTableName, cube.getFactTableName,
sqlContext, schema, updatedLoadMetadataDetailsList)
}
@@ -192,7 +191,6 @@ object CarbonDataRDDFactory extends Logging {
cube.getDatabaseName,
updatedloadMetadataDetails.asJava)
}
- }
} finally {
if (carbonLock.unlock()) {
logInfo("unlock the cube metadata file successfully")
@@ -252,7 +250,7 @@ object CarbonDataRDDFactory extends Logging {
storeLocation,
relation.cubeMeta.dataPath,
kettleHomePath,
- relation.cubeMeta.partitioner, columinar, true)
+ relation.cubeMeta.partitioner, columinar, isAgg = true)
}
}
@@ -267,11 +265,11 @@ object CarbonDataRDDFactory extends Logging {
partitionStatus: String = CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS) {
val carbonTable = carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable
var currentRestructNumber = -1
- try { {
+ try {
logger.audit("The data load request has been received.")
currentRestructNumber = CarbonUtil
- .checkAndReturnCurrentRestructFolderNumber(carbonTable.getMetaDataFilepath(), "RS_", false)
+ .checkAndReturnCurrentRestructFolderNumber(carbonTable.getMetaDataFilepath, "RS_", false)
if (-1 == currentRestructNumber) {
currentRestructNumber = 0
}
@@ -285,9 +283,9 @@ object CarbonDataRDDFactory extends Logging {
}
var currentLoadCount = -1
- if (carbonLoadModel.getLoadMetadataDetails().size() > 0) {
- for (eachLoadMetaData <- carbonLoadModel.getLoadMetadataDetails().asScala) {
- val loadCount = Integer.parseInt(eachLoadMetaData.getLoadName())
+ if (carbonLoadModel.getLoadMetadataDetails.size() > 0) {
+ for (eachLoadMetaData <- carbonLoadModel.getLoadMetadataDetails.asScala) {
+ val loadCount = Integer.parseInt(eachLoadMetaData.getLoadName)
if (currentLoadCount < loadCount) {
currentLoadCount = loadCount
}
@@ -324,9 +322,9 @@ object CarbonDataRDDFactory extends Logging {
* 3) output Array[(partitionID,Array[BlockDetails])] to blocksGroupBy
*/
var splits = Array[TableSplit]()
- if (carbonLoadModel.isDirectLoad()) {
+ if (carbonLoadModel.isDirectLoad) {
// get all table Splits, this part means files were divide to different partitions
- splits = CarbonQueryUtil.getTableSplitsForDirectLoad(carbonLoadModel.getFactFilePath(),
+ splits = CarbonQueryUtil.getTableSplitsForDirectLoad(carbonLoadModel.getFactFilePath,
partitioner.nodeList, partitioner.partitionCount)
// get all partition blocks from file list
blocksGroupBy = splits.map {
@@ -335,7 +333,7 @@ object CarbonDataRDDFactory extends Logging {
for (path <- split.getPartition.getFilesPath.asScala) {
pathBuilder.append(path).append(",")
}
- if (pathBuilder.size > 0) {
+ if (pathBuilder.nonEmpty) {
pathBuilder.substring(0, pathBuilder.size - 1)
}
(split.getPartition.getUniqueID, SplitUtils.getSplits(pathBuilder.toString(),
@@ -371,7 +369,7 @@ object CarbonDataRDDFactory extends Logging {
*/
val hadoopConfiguration = new Configuration(sc.sparkContext.hadoopConfiguration)
// FileUtils will skip file which is no csv, and return all file path which split by ','
- val filePaths = FileUtils.getPaths(carbonLoadModel.getFactFilePath())
+ val filePaths = FileUtils.getPaths(carbonLoadModel.getFactFilePath)
hadoopConfiguration.set("mapreduce.input.fileinputformat.inputdir", filePaths)
hadoopConfiguration.set("mapreduce.input.fileinputformat.input.dir.recursive", "true")
val newHadoopRDD = new NewHadoopRDD[LongWritable, Text](
@@ -394,13 +392,14 @@ object CarbonDataRDDFactory extends Logging {
val newStatusMap = scala.collection.mutable.Map.empty[String, String]
status.foreach { eachLoadStatus =>
val state = newStatusMap.get(eachLoadStatus._2.getPartitionCount)
- if (null == state || None == state ||
- state == CarbonCommonConstants.STORE_LOADSTATUS_FAILURE) {
- newStatusMap.put(eachLoadStatus._2.getPartitionCount, eachLoadStatus._2.getLoadStatus)
- } else if (state == CarbonCommonConstants.STORE_LOADSTATUS_PARTIAL_SUCCESS &&
- eachLoadStatus._2.getLoadStatus ==
- CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS) {
- newStatusMap.put(eachLoadStatus._2.getPartitionCount, eachLoadStatus._2.getLoadStatus)
+ state match {
+ case Some(CarbonCommonConstants.STORE_LOADSTATUS_FAILURE) =>
+ newStatusMap.put(eachLoadStatus._2.getPartitionCount, eachLoadStatus._2.getLoadStatus)
+ case Some(CarbonCommonConstants.STORE_LOADSTATUS_PARTIAL_SUCCESS)
+ if eachLoadStatus._2.getLoadStatus == CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS =>
+ newStatusMap.put(eachLoadStatus._2.getPartitionCount, eachLoadStatus._2.getLoadStatus)
+ case _ =>
+ newStatusMap.put(eachLoadStatus._2.getPartitionCount, eachLoadStatus._2.getLoadStatus)
}
}
@@ -429,7 +428,7 @@ object CarbonDataRDDFactory extends Logging {
currentRestructNumber)
message = "Aggregate table creation failure"
} else {
- val (result, metadataDetails) = status(0)
+ val (result, _) = status(0)
val newSlice = CarbonCommonConstants.LOAD_FOLDER + result
CarbonLoaderUtil.deleteSlice(partitioner.partitionCount, carbonLoadModel.getDatabaseName,
carbonLoadModel.getTableName, carbonLoadModel.getTableName, hdfsStoreLocation,
@@ -454,18 +453,18 @@ object CarbonDataRDDFactory extends Logging {
if (!isAgg) {
CarbonLoaderUtil
.recordLoadMetadata(result, metadataDetails, carbonLoadModel, loadStatus, loadStartTime)
- } else if (!carbonLoadModel.isRetentionRequest()) {
+ } else if (!carbonLoadModel.isRetentionRequest) {
// TODO : Handle it
logInfo("********schema updated**********")
}
logger.audit("The data loading is successful.")
if (CarbonDataMergerUtil
- .checkIfLoadMergingRequired(carbonTable.getMetaDataFilepath(), carbonLoadModel,
+ .checkIfLoadMergingRequired(carbonTable.getMetaDataFilepath, carbonLoadModel,
hdfsStoreLocation, partitioner.partitionCount, currentRestructNumber)) {
val loadsToMerge = CarbonDataMergerUtil.getLoadsToMergeFromHDFS(
hdfsStoreLocation, FileFactory.getFileType(hdfsStoreLocation),
- carbonTable.getMetaDataFilepath(), carbonLoadModel, currentRestructNumber,
+ carbonTable.getMetaDataFilepath, carbonLoadModel, currentRestructNumber,
partitioner.partitionCount)
if (loadsToMerge.size() == 2) {
@@ -479,7 +478,7 @@ object CarbonDataRDDFactory extends Logging {
hdfsStoreLocation,
partitioner,
currentRestructNumber,
- carbonTable.getMetaDataFilepath(),
+ carbonTable.getMetaDataFilepath,
loadsToMerge,
MergedLoadName,
kettleHomePath,
@@ -493,14 +492,13 @@ object CarbonDataRDDFactory extends Logging {
}
if (finalMergeStatus) {
CarbonDataMergerUtil
- .updateLoadMetadataWithMergeStatus(loadsToMerge, carbonTable.getMetaDataFilepath(),
+ .updateLoadMetadataWithMergeStatus(loadsToMerge, carbonTable.getMetaDataFilepath,
MergedLoadName, carbonLoadModel)
}
}
}
}
}
- }
}
@@ -527,7 +525,7 @@ object CarbonDataRDDFactory extends Logging {
.deleteLoadFoldersFromFileSystem(carbonLoadModel, partitioner.partitionCount,
hdfsStoreLocation, isForceDeletion, currentRestructNumber, details)
- if (isUpdationRequired == true) {
+ if (isUpdationRequired) {
// Update load metadate file after cleaning deleted nodes
CarbonLoaderUtil.writeLoadMetadata(
carbonLoadModel.getCarbonDataLoadSchema,
@@ -567,21 +565,24 @@ object CarbonDataRDDFactory extends Logging {
partitioner: Partitioner) {
val cube = org.carbondata.core.carbon.metadata.CarbonMetadata.getInstance
.getCarbonTable(carbonLoadModel.getDatabaseName + "_" + carbonLoadModel.getTableName)
- val metaDataPath: String = cube.getMetaDataFilepath()
+ val metaDataPath: String = cube.getMetaDataFilepath
var currentRestructNumber = CarbonUtil
.checkAndReturnCurrentRestructFolderNumber(metaDataPath, "RS_", false)
if (-1 == currentRestructNumber) {
currentRestructNumber = 0
}
var carbonLock = CarbonLockFactory
- .getCarbonLockObj(cube.getMetaDataFilepath(), LockUsage.METADATA_LOCK)
- try { {
+ .getCarbonLockObj(cube.getMetaDataFilepath, LockUsage.METADATA_LOCK)
+ try {
if (carbonLock.lockWithRetries()) {
- deleteLoadsAndUpdateMetadata(carbonLoadModel, cube, partitioner, hdfsStoreLocation, true,
+ deleteLoadsAndUpdateMetadata(carbonLoadModel,
+ cube,
+ partitioner,
+ hdfsStoreLocation,
+ isForceDeletion = true,
currentRestructNumber)
}
}
- }
finally {
if (carbonLock.unlock()) {
logInfo("unlock the cube metadata file successfully")
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDeleteLoadByDateRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDeleteLoadByDateRDD.scala
index 2bd7aa7a560..8ccd017ae08 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDeleteLoadByDateRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDeleteLoadByDateRDD.scala
@@ -51,7 +51,7 @@ class CarbonDeleteLoadByDateRDD[K, V](
override def getPartitions: Array[Partition] = {
val splits = CarbonQueryUtil.getTableSplits(schemaName, cubeName, null, partitioner)
val result = new Array[Partition](splits.length)
- for (i <- 0 until result.length) {
+ for (i <- result.indices) {
result(i) = new CarbonLoadPartition(id, i, splits(i))
}
result
@@ -64,15 +64,13 @@ class CarbonDeleteLoadByDateRDD[K, V](
logInfo("Input split: " + split.serializableHadoopSplit.value)
logInfo("Input split: " + split.serializableHadoopSplit.value)
- val partitionID = split.serializableHadoopSplit.value.getPartition().getUniqueID()
+ val partitionID = split.serializableHadoopSplit.value.getPartition.getUniqueID
// TODO call CARBON delete API
logInfo("Applying data retention as per date value " + dateValue)
var dateFormat = ""
- try { {
- val dateValueAsDate = DateTimeUtils.stringToTime(dateValue)
+ try {
dateFormat = CarbonCommonConstants.CARBON_TIMESTAMP_DEFAULT_FORMAT
- }
} catch {
case e: Exception => logInfo("Unable to parse with default time format " + dateValue)
}
@@ -92,7 +90,7 @@ class CarbonDeleteLoadByDateRDD[K, V](
override def getPreferredLocations(split: Partition): Seq[String] = {
val theSplit = split.asInstanceOf[CarbonLoadPartition]
val s = theSplit.serializableHadoopSplit.value.getLocations.asScala
- logInfo("Host Name : " + s(0) + s.length)
+ logInfo("Host Name : " + s.head + s.length)
s
}
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDropAggregateTableRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDropAggregateTableRDD.scala
index fad1ae5f49a..9f885a22a63 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDropAggregateTableRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDropAggregateTableRDD.scala
@@ -29,11 +29,11 @@ import org.carbondata.spark.util.CarbonQueryUtil
class CarbonDropAggregateTableRDD[K, V](
- sc: SparkContext,
- keyClass: KeyVal[K, V],
- schemaName: String,
- cubeName: String,
- partitioner: Partitioner)
+ sc: SparkContext,
+ keyClass: KeyVal[K, V],
+ schemaName: String,
+ cubeName: String,
+ partitioner: Partitioner)
extends RDD[(K, V)](sc, Nil) with Logging {
sc.setLocalProperty("spark.scheduler.pool", "DDL")
@@ -41,7 +41,7 @@ class CarbonDropAggregateTableRDD[K, V](
override def getPartitions: Array[Partition] = {
val splits = CarbonQueryUtil.getTableSplits(schemaName, cubeName, null, partitioner)
val result = new Array[Partition](splits.length)
- for (i <- 0 until result.length) {
+ for (i <- result.indices) {
result(i) = new CarbonLoadPartition(id, i, splits(i))
}
result
@@ -53,14 +53,12 @@ class CarbonDropAggregateTableRDD[K, V](
logInfo("Input split: " + split.serializableHadoopSplit.value)
// TODO call CARBON delete API
- // Register an on-task-completion callback to close the input stream.
- context.addOnCompleteCallback(() => close())
var havePair = false
var finished = false
override def hasNext: Boolean = {
if (!finished && !havePair) {
- finished = !false
+ finished = true
havePair = !finished
}
!finished
@@ -75,14 +73,6 @@ class CarbonDropAggregateTableRDD[K, V](
val value = new CarbonValue(null)
keyClass.getKey(row, value)
}
-
- private def close() {
- try {
- // reader.close()
- } catch {
- case e: Exception => logWarning("Exception in RecordReader.close()", e)
- }
- }
}
iter
}
@@ -90,7 +80,7 @@ class CarbonDropAggregateTableRDD[K, V](
override def getPreferredLocations(split: Partition): Seq[String] = {
val theSplit = split.asInstanceOf[CarbonLoadPartition]
val s = theSplit.serializableHadoopSplit.value.getLocations.asScala
- logInfo("Host Name : " + s(0) + s.length)
+ logInfo("Host Name : " + s.head + s.length)
s
}
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDropCubeRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDropCubeRDD.scala
index 95d63fc85e8..d6044c604ff 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDropCubeRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonDropCubeRDD.scala
@@ -21,7 +21,6 @@ import org.apache.spark.{Logging, Partition, SparkContext, TaskContext}
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.execution.command.Partitioner
-import org.carbondata.core.carbon.metadata.CarbonMetadata
import org.carbondata.query.scanner.impl.{CarbonKey, CarbonValue}
import org.carbondata.spark.KeyVal
import org.carbondata.spark.util.CarbonQueryUtil
@@ -39,7 +38,7 @@ class CarbonDropCubeRDD[K, V](
override def getPartitions: Array[Partition] = {
val splits = CarbonQueryUtil.getTableSplits(schemaName, cubeName, null, partitioner)
val result = new Array[Partition](splits.length)
- for (i <- 0 until result.length) {
+ for (i <- result.indices) {
result(i) = new CarbonLoadPartition(id, i, splits(i))
}
result
@@ -52,8 +51,6 @@ class CarbonDropCubeRDD[K, V](
val partitionCount = partitioner.partitionCount
for (a <- 0 until partitionCount) {
- val cubeUniqueName = schemaName + "_" + a + "_" + cubeName + "_" + a
- val carbonTable = CarbonMetadata.getInstance().getCarbonTable(cubeUniqueName)
// TODO: Clear Btree from memory
}
@@ -62,7 +59,7 @@ class CarbonDropCubeRDD[K, V](
override def hasNext: Boolean = {
if (!finished && !havePair) {
- finished = !false
+ finished = true
havePair = !finished
}
!finished
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
index 90abf7749b7..cb36970e2e2 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonGlobalDictionaryRDD.scala
@@ -38,7 +38,7 @@ import org.carbondata.spark.util.GlobalDictionaryUtil
* A partitioner partition by column.
*
* @constructor create a partitioner
- * @param numParts the number of partitions
+ * @param numParts the number of partitions
*/
class ColumnPartitioner(numParts: Int) extends Partitioner {
override def numPartitions: Int = numParts
@@ -48,12 +48,14 @@ class ColumnPartitioner(numParts: Int) extends Partitioner {
trait GenericParser {
val dimension: CarbonDimension
+
def addChild(child: GenericParser): Unit
+
def parseString(input: String): Unit
}
case class PrimitiveParser(dimension: CarbonDimension,
- setOpt: Option[HashSet[String]]) extends GenericParser {
+ setOpt: Option[HashSet[String]]) extends GenericParser {
val (hasDictEncoding, set: HashSet[String]) = setOpt match {
case None => (false, new HashSet[String])
case Some(x) => (true, x)
@@ -69,17 +71,18 @@ case class PrimitiveParser(dimension: CarbonDimension,
}
}
-case class ArrayParser(val dimension: CarbonDimension,
- val format: DataFormat) extends GenericParser {
+case class ArrayParser(dimension: CarbonDimension, format: DataFormat) extends GenericParser {
var children: GenericParser = _
+
def addChild(child: GenericParser): Unit = {
children = child
}
+
def parseString(input: String): Unit = {
if (StringUtils.isNotEmpty(input)) {
val splits = format.getSplits(input)
if (ArrayUtils.isNotEmpty(splits)) {
- for (i <- 0 until splits.length) {
+ for (i <- splits.indices) {
children.parseString(splits(i))
}
}
@@ -88,11 +91,13 @@ case class ArrayParser(val dimension: CarbonDimension,
}
case class StructParser(dimension: CarbonDimension,
- format: DataFormat) extends GenericParser {
+ format: DataFormat) extends GenericParser {
val children = new ArrayBuffer[GenericParser]
+
def addChild(child: GenericParser): Unit = {
children += child
}
+
def parseString(input: String): Unit = {
if (StringUtils.isNotEmpty(input)) {
val splits = format.getSplits(input)
@@ -105,8 +110,8 @@ case class StructParser(dimension: CarbonDimension,
}
case class DataFormat(delimiters: Array[String],
- var delimiterIndex: Int,
- patterns: Array[Pattern]) extends Serializable {
+ var delimiterIndex: Int,
+ patterns: Array[Pattern]) extends Serializable {
self =>
def getSplits(input: String): Array[String] = {
// -1 in case after splitting the last column is empty, the surrogate key ahs to be generated
@@ -123,31 +128,32 @@ case class DataFormat(delimiters: Array[String],
* a case class to package some attributes
*/
case class DictionaryLoadModel(table: CarbonTableIdentifier,
- dimensions: Array[CarbonDimension],
- hdfsLocation: String,
- dictfolderPath: String,
- dictFilePaths: Array[String],
- dictFileExists: Array[Boolean],
- isComplexes: Array[Boolean],
- primDimensions: Array[CarbonDimension],
- delimiters: Array[String]) extends Serializable
+ dimensions: Array[CarbonDimension],
+ hdfsLocation: String,
+ dictfolderPath: String,
+ dictFilePaths: Array[String],
+ dictFileExists: Array[Boolean],
+ isComplexes: Array[Boolean],
+ primDimensions: Array[CarbonDimension],
+ delimiters: Array[String]) extends Serializable
+
/**
* A RDD to combine distinct values in block.
*
* @constructor create a RDD with RDD[Row]
- * @param prev the input RDD[Row]
+ * @param prev the input RDD[Row]
* @param model a model package load info
*/
class CarbonBlockDistinctValuesCombineRDD(
- prev: RDD[Row],
- model: DictionaryLoadModel)
- extends RDD[(Int, Array[String])](prev) with Logging {
+ prev: RDD[Row],
+ model: DictionaryLoadModel)
+ extends RDD[(Int, Array[String])](prev) with Logging {
override def getPartitions: Array[Partition] = firstParent[Row].partitions
override def compute(split: Partition, context: TaskContext
- ): Iterator[(Int, Array[String])] = {
- val LOGGER = LogServiceFactory.getLogService(this.getClass().getName())
+ ): Iterator[(Int, Array[String])] = {
+ val LOGGER = LogServiceFactory.getLogService(this.getClass.getName)
val distinctValuesList = new ArrayBuffer[(Int, HashSet[String])]
try {
@@ -169,7 +175,6 @@ class CarbonBlockDistinctValuesCombineRDD(
mapColumnValuesWithId).get
}
var row: Row = null
- var value: String = null
val rddIter = firstParent[Row].iterator(split, context)
// generate block distinct value set
while (rddIter.hasNext) {
@@ -195,18 +200,18 @@ class CarbonBlockDistinctValuesCombineRDD(
* A RDD to generate dictionary file for each column
*
* @constructor create a RDD with RDD[Row]
- * @param prev the input RDD[Row]
+ * @param prev the input RDD[Row]
* @param model a model package load info
*/
class CarbonGlobalDictionaryGenerateRDD(
- prev: RDD[(Int, Array[String])],
- model: DictionaryLoadModel)
- extends RDD[(String, String)](prev) with Logging {
+ prev: RDD[(Int, Array[String])],
+ model: DictionaryLoadModel)
+ extends RDD[(String, String)](prev) with Logging {
override def getPartitions: Array[Partition] = firstParent[(Int, Array[String])].partitions
override def compute(split: Partition, context: TaskContext): Iterator[(String, String)] = {
- val LOGGER = LogServiceFactory.getLogService(this.getClass().getName())
+ val LOGGER = LogServiceFactory.getLogService(this.getClass.getName)
var status = CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS
val iter = new Iterator[(String, String)] {
// generate distinct value list
@@ -247,12 +252,12 @@ class CarbonGlobalDictionaryGenerateRDD(
val t5 = System.currentTimeMillis
LOGGER.info("\n columnName:" + model.primDimensions(split.index).getColName +
- "\n columnId:" + model.primDimensions(split.index).getColumnId +
- "\n new distinct values count:" + distinctValueCount +
- "\n create dictionary cache:" + (t2 - t1) +
- "\n combine lists:" + (t3 - t2) +
- "\n sort list, distinct and write:" + (t4 - t3) +
- "\n write sort info:" + (t5 - t4))
+ "\n columnId:" + model.primDimensions(split.index).getColumnId +
+ "\n new distinct values count:" + distinctValueCount +
+ "\n create dictionary cache:" + (t2 - t1) +
+ "\n combine lists:" + (t3 - t2) +
+ "\n sort list, distinct and write:" + (t4 - t3) +
+ "\n write sort info:" + (t5 - t4))
}
} catch {
case ex: Exception =>
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonMergerRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonMergerRDD.scala
index 624ea519763..fdaecdbdeb8 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonMergerRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonMergerRDD.scala
@@ -17,8 +17,7 @@
package org.carbondata.spark.rdd
-import java.text.SimpleDateFormat
-import java.util.{Date, List}
+import java.util.List
import scala.collection.JavaConverters._
@@ -52,19 +51,14 @@ class CarbonMergerRDD[K, V](
sc.setLocalProperty("spark.scheduler.pool", "DDL")
- private val jobtrackerId: String = {
- val formatter = new SimpleDateFormat("yyyyMMddHHmm")
- formatter.format(new Date())
- }
-
override def compute(theSplit: Partition, context: TaskContext): Iterator[(K, V)] = {
val iter = new Iterator[(K, V)] {
var dataloadStatus = CarbonCommonConstants.STORE_LOADSTATUS_FAILURE
val split = theSplit.asInstanceOf[CarbonLoadPartition]
logInfo("Input split: " + split.serializableHadoopSplit.value)
- val partitionId = split.serializableHadoopSplit.value.getPartition().getUniqueID()
+ val partitionId = split.serializableHadoopSplit.value.getPartition.getUniqueID
val model = carbonLoadModel
- .getCopyWithPartition(split.serializableHadoopSplit.value.getPartition().getUniqueID())
+ .getCopyWithPartition(split.serializableHadoopSplit.value.getPartition.getUniqueID)
val mergedLoadMetadataDetails = CarbonDataMergerUtil
.executeMerging(model, storeLocation, hdfsStoreLocation, currentRestructNumber,
@@ -73,7 +67,7 @@ class CarbonMergerRDD[K, V](
model.setLoadMetadataDetails(CarbonUtil
.readLoadMetadata(metadataFilePath).toList.asJava)
- if (mergedLoadMetadataDetails == true) {
+ if (mergedLoadMetadataDetails) {
CarbonLoaderUtil.copyMergedLoadToHDFS(model, currentRestructNumber, mergedLoadName)
dataloadStatus = checkAndLoadAggregationTable
@@ -84,7 +78,7 @@ class CarbonMergerRDD[K, V](
override def hasNext: Boolean = {
if (!finished && !havePair) {
- finished = !false
+ finished = true
havePair = !finished
}
!finished
@@ -99,12 +93,12 @@ class CarbonMergerRDD[K, V](
}
- def checkAndLoadAggregationTable(): String = {
+ def checkAndLoadAggregationTable: String = {
var dataloadStatus = CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS
val carbonTable = model.getCarbonDataLoadSchema.getCarbonTable
val aggTables = carbonTable.getAggregateTablesName
if (null != aggTables && !aggTables.isEmpty) {
- val details = model.getLoadMetadataDetails.asScala.toSeq.toArray
+ val details = model.getLoadMetadataDetails.asScala.toArray
val newSlice = CarbonCommonConstants.LOAD_FOLDER + mergedLoadName
var listOfLoadFolders = CarbonLoaderUtil.getListOfValidSlices(details)
listOfLoadFolders = CarbonLoaderUtil.addNewSliceNameToList(newSlice, listOfLoadFolders)
@@ -167,7 +161,7 @@ class CarbonMergerRDD[K, V](
loadFolders: Array[String]): String = {
loadFolders.foreach { loadFolder =>
val restructNumber = CarbonUtil.getRestructureNumber(loadFolder, model.getTableName)
- try { {
+ try {
if (CarbonLoaderUtil
.isSliceValid(loadFolder, listOfLoadFolders, listOfUpdatedLoadFolders,
model.getTableName)) {
@@ -179,7 +173,6 @@ class CarbonMergerRDD[K, V](
CarbonLoaderUtil
.createEmptyLoadFolder(model, loadFolder, hdfsStoreLocation, restructNumber)
}
- }
} catch {
case e: Exception => dataloadStatus = CarbonCommonConstants.STORE_LOADSTATUS_FAILURE
} finally {
@@ -193,7 +186,7 @@ class CarbonMergerRDD[K, V](
}
}
}
- return CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS
+ CarbonCommonConstants.STORE_LOADSTATUS_SUCCESS
}
@@ -204,16 +197,16 @@ class CarbonMergerRDD[K, V](
override def getPreferredLocations(split: Partition): Seq[String] = {
val theSplit = split.asInstanceOf[CarbonLoadPartition]
val s = theSplit.serializableHadoopSplit.value.getLocations.asScala
- logInfo("Host Name : " + s(0) + s.length)
+ logInfo("Host Name : " + s.head + s.length)
s
}
override def getPartitions: Array[Partition] = {
val splits = CarbonQueryUtil
- .getTableSplits(carbonLoadModel.getDatabaseName(), carbonLoadModel.getTableName(), null,
+ .getTableSplits(carbonLoadModel.getDatabaseName, carbonLoadModel.getTableName, null,
partitioner)
val result = new Array[Partition](splits.length)
- for (i <- 0 until result.length) {
+ for (i <- result.indices) {
result(i) = new CarbonLoadPartition(id, i, splits(i))
}
result
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonQueryRDD.scala b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonQueryRDD.scala
index b1c848289b3..5743322521d 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonQueryRDD.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/rdd/CarbonQueryRDD.scala
@@ -18,9 +18,7 @@
package org.carbondata.spark.rdd
-import java.text.SimpleDateFormat
import java.util
-import java.util.Date
import scala.collection.JavaConverters._
@@ -43,37 +41,35 @@ import org.carbondata.spark.load.CarbonLoaderUtil
import org.carbondata.spark.util.QueryPlanUtil
class CarbonSparkPartition(rddId: Int, val idx: Int,
- val locations: Array[String],
- val tableBlockInfos: util.List[TableBlockInfo])
+ val locations: Array[String],
+ val tableBlockInfos: util.List[TableBlockInfo])
extends Partition {
override val index: Int = idx
+
// val serializableHadoopSplit = new SerializableWritable[Array[String]](locations)
- override def hashCode(): Int = 41 * (41 + rddId) + idx
+ override def hashCode(): Int = {
+ 41 * (41 + rddId) + idx
+ }
}
- /**
- * This RDD is used to perform query.
- */
- class CarbonQueryRDD[K, V](
- sc: SparkContext,
- queryModel: QueryModel,
- filterExpression: Expression,
- keyClass: KeyVal[K, V],
- @transient conf: Configuration,
- cubeCreationTime: Long,
- schemaLastUpdatedTime: Long,
- baseStoreLocation: String)
+/**
+ * This RDD is used to perform query.
+ */
+class CarbonQueryRDD[K, V](
+ sc: SparkContext,
+ queryModel: QueryModel,
+ filterExpression: Expression,
+ keyClass: KeyVal[K, V],
+ @transient conf: Configuration,
+ cubeCreationTime: Long,
+ schemaLastUpdatedTime: Long,
+ baseStoreLocation: String)
extends RDD[(K, V)](sc, Nil) with Logging {
val defaultParallelism = sc.defaultParallelism
- private val jobtrackerId: String = {
- val formatter = new SimpleDateFormat("yyyyMMddHHmm")
- formatter.format(new Date())
- }
-
override def getPartitions: Array[Partition] = {
val startTime = System.currentTimeMillis()
val (carbonInputFormat: CarbonInputFormat[RowResult], job: Job) =
@@ -81,7 +77,7 @@ class CarbonSparkPartition(rddId: Int, val idx: Int,
val result = new util.ArrayList[Partition](defaultParallelism)
val validSegments = job.getConfiguration.get(CarbonInputFormat.INPUT_SEGMENT_NUMBERS)
- if(!validSegments.isEmpty) {
+ if (!validSegments.isEmpty) {
var filterResolver: FilterResolverIntf = null
if (filterExpression != null) {
// set filter resolver tree
@@ -98,7 +94,7 @@ class CarbonSparkPartition(rddId: Int, val idx: Int,
inputSplit.getLocations, inputSplit.getLength
)
)
- if(!blockList.isEmpty) {
+ if (blockList.nonEmpty) {
// group blocks to nodes, tasks
val nodeBlockMapping =
CarbonLoaderUtil.nodeBlockTaskMapping(blockList.asJava, -1, defaultParallelism)
@@ -117,16 +113,15 @@ class CarbonSparkPartition(rddId: Int, val idx: Int,
val noOfNodes = nodeBlockMapping.size
val noOfTasks = result.size()
logInfo(s"Identified no.of.Blocks: $noOfBlocks,"
- + s"parallelism: $defaultParallelism , no.of.nodes: $noOfNodes, no.of.tasks: $noOfTasks"
+ + s"parallelism: $defaultParallelism , no.of.nodes: $noOfNodes, no.of.tasks: $noOfTasks"
)
logInfo("Time taken to identify Blocks to scan : " + (System
- .currentTimeMillis() - startTime)
+ .currentTimeMillis() - startTime)
)
- for (j <- 0 to result.size()-1)
- {
+ for (j <- 0 until result.size()) {
val cp = result.get(j).asInstanceOf[CarbonSparkPartition]
logInfo(s"Node : " + cp.locations.toSeq.mkString(",")
- + ", No.Of Blocks : " + cp.tableBlockInfos.size())
+ + ", No.Of Blocks : " + cp.tableBlockInfos.size())
}
} else {
logInfo("No blocks identified to scan")
@@ -143,15 +138,15 @@ class CarbonSparkPartition(rddId: Int, val idx: Int,
}
- override def compute(thepartition: Partition, context: TaskContext): Iterator[(K, V)] = {
- val LOGGER = LogServiceFactory.getLogService(this.getClass().getName())
+ override def compute(thepartition: Partition, context: TaskContext): Iterator[(K, V)] = {
+ val LOGGER = LogServiceFactory.getLogService(this.getClass.getName)
val iter = new Iterator[(K, V)] {
var rowIterator: CarbonIterator[RowResult] = _
var queryStartTime: Long = 0
try {
val carbonSparkPartition = thepartition.asInstanceOf[CarbonSparkPartition]
- if(!carbonSparkPartition.tableBlockInfos.isEmpty) {
- queryModel.setQueryId(queryModel.getQueryId() + "_" + carbonSparkPartition.idx)
+ if (!carbonSparkPartition.tableBlockInfos.isEmpty) {
+ queryModel.setQueryId(queryModel.getQueryId + "_" + carbonSparkPartition.idx)
// fill table block info
queryModel.setTableBlockInfos(carbonSparkPartition.tableBlockInfos)
queryStartTime = System.currentTimeMillis
@@ -160,7 +155,7 @@ class CarbonSparkPartition(rddId: Int, val idx: Int,
logInfo("*************************" + carbonPropertiesFilePath)
if (null == carbonPropertiesFilePath) {
System.setProperty("carbon.properties.filepath", System.getProperty("user.dir")
- + '/' + "conf" + '/' + "carbon.properties"
+ + '/' + "conf" + '/' + "carbon.properties"
)
}
// execute query
@@ -184,7 +179,7 @@ class CarbonSparkPartition(rddId: Int, val idx: Int,
override def hasNext: Boolean = {
if (!finished && !havePair) {
- finished = (null == rowIterator) || (!rowIterator.hasNext())
+ finished = (null == rowIterator) || (!rowIterator.hasNext)
havePair = !finished
}
if (finished) {
@@ -199,22 +194,22 @@ class CarbonSparkPartition(rddId: Int, val idx: Int,
}
havePair = false
val row = rowIterator.next()
- val key = row.getKey()
- val value = row.getValue()
+ val key = row.getKey
+ val value = row.getValue
keyClass.getKey(key, value)
}
logInfo("*************************** Total Time Taken to execute the query in Carbon Side: " +
- (System.currentTimeMillis - queryStartTime)
+ (System.currentTimeMillis - queryStartTime)
)
}
iter
}
- /**
- * Get the preferred locations where to launch this task.
- */
+ /**
+ * Get the preferred locations where to launch this task.
+ */
override def getPreferredLocations(partition: Partition): Seq[String] = {
val theSplit = partition.asInstanceOf[CarbonSparkPartition]
theSplit.locations.filter(_ != "localhost")
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/thriftserver/CarbonThriftServer.scala b/integration/spark/src/main/scala/org/carbondata/spark/thriftserver/CarbonThriftServer.scala
index a64634d19d3..201a068fdc5 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/thriftserver/CarbonThriftServer.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/thriftserver/CarbonThriftServer.scala
@@ -27,7 +27,7 @@ import org.carbondata.core.util.CarbonProperties
object CarbonThriftServer {
def main(args: Array[String]): Unit = {
- var conf = new SparkConf()
+ val conf = new SparkConf()
.setAppName("Carbon Thrift Server")
val sparkHome = System.getenv.get("SPARK_HOME")
if (null != sparkHome) {
@@ -40,14 +40,14 @@ object CarbonThriftServer {
try {
Thread.sleep(Integer.parseInt(warmUpTime))
} catch {
- case _ =>
+ case Exception =>
val LOG = LogServiceFactory.getLogService(this.getClass.getCanonicalName)
LOG.error("Wrong value for carbon.spark.warmUpTime " + warmUpTime +
- "Using default Value and proceeding")
+ "Using default Value and proceeding")
Thread.sleep(30000)
}
- val carbonContext = new CarbonContext(sc, args(0))
+ val carbonContext = new CarbonContext(sc, args.head)
HiveThriftServer2.startWithContext(carbonContext)
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/util/CarbonScalaUtil.scala b/integration/spark/src/main/scala/org/carbondata/spark/util/CarbonScalaUtil.scala
index 3a39b638872..601cf172027 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/util/CarbonScalaUtil.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/util/CarbonScalaUtil.scala
@@ -29,9 +29,9 @@ import org.carbondata.core.constants.CarbonCommonConstants
import org.carbondata.query.expression.{DataType => CarbonDataType}
object CarbonScalaUtil {
- def convertSparkToCarbonDataType(dataType: org.apache.spark.sql.types.DataType): CarbonDataType =
+ def convertSparkToCarbonDataType(
+ dataType: org.apache.spark.sql.types.DataType): CarbonDataType = {
dataType match {
-
case StringType => CarbonDataType.StringType
case IntegerType => CarbonDataType.IntegerType
case LongType => CarbonDataType.LongType
@@ -45,9 +45,9 @@ object CarbonScalaUtil {
case NullType => CarbonDataType.NullType
case _ => CarbonDataType.DecimalType
}
+ }
- def convertSparkToCarbonSchemaDataType(dataType: String): String =
-
+ def convertSparkToCarbonSchemaDataType(dataType: String): String = {
dataType match {
case CarbonCommonConstants.STRING_TYPE => CarbonCommonConstants.STRING
case CarbonCommonConstants.INTEGER_TYPE => CarbonCommonConstants.INTEGER
@@ -62,8 +62,9 @@ object CarbonScalaUtil {
case CarbonCommonConstants.TIMESTAMP_TYPE => CarbonCommonConstants.TIMESTAMP
case anyType => anyType
}
+ }
- def convertSparkColumnToCarbonLevel(field: (String, String)): Seq[Level] =
+ def convertSparkColumnToCarbonLevel(field: (String, String)): Seq[Level] = {
field._2 match {
case CarbonCommonConstants.STRING_TYPE => Seq(
Level(field._1, field._1, Int.MaxValue, CarbonCommonConstants.STRING))
@@ -88,6 +89,7 @@ object CarbonScalaUtil {
case CarbonCommonConstants.TIMESTAMP_TYPE => Seq(
Level(field._1, field._1, Int.MaxValue, CarbonCommonConstants.TIMESTAMP))
}
+ }
case class TransformHolder(rdd: Any, mataData: CarbonMetaData)
@@ -97,7 +99,7 @@ object CarbonScalaUtil {
val relation = CarbonEnv.getInstance(carbonContext).carbonCatalog
.lookupRelation1(Option(carbonTable.getDatabaseName),
carbonTable.getFactTableName, None)(carbonContext).asInstanceOf[CarbonRelation]
- var rdd = new SchemaRDD(carbonContext, relation)
+ val rdd = new SchemaRDD(carbonContext, relation)
rdd.registerTempTable(carbonTable.getFactTableName)
TransformHolder(rdd, createSparkMeta(carbonTable))
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala b/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
index 5700c739813..80addd537b5 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/util/GlobalDictionaryUtil.scala
@@ -59,18 +59,19 @@ object GlobalDictionaryUtil extends Logging {
* find columns which need to generate global dictionary.
*
* @param dimensions dimension list of schema
- * @param columns column list of csv file
+ * @param columns column list of csv file
* @return java.lang.String[]
*/
- def pruneDimensions(dimensions: Array[CarbonDimension], headers: Array[String],
- columns: Array[String]): ( Array[CarbonDimension], Array[String] ) = {
+ def pruneDimensions(dimensions: Array[CarbonDimension],
+ headers: Array[String],
+ columns: Array[String]): (Array[CarbonDimension], Array[String]) = {
val dimensionBuffer = new ArrayBuffer[CarbonDimension]
val columnNameBuffer = new ArrayBuffer[String]
val dimensionsWithDict = dimensions.filter(hasEncoding(_, Encoding.DICTIONARY,
Encoding.DIRECT_DICTIONARY))
for (dim <- dimensionsWithDict) {
breakable {
- for (i <- 0 until headers.length) {
+ for (i <- headers.indices) {
if (dim.getColName.equalsIgnoreCase(headers(i))) {
dimensionBuffer += dim
columnNameBuffer += columns(i)
@@ -83,13 +84,14 @@ object GlobalDictionaryUtil extends Logging {
}
/**
- * use this method to judge whether CarbonDimension use some encoding or not
+ * use this method to judge whether CarbonDimension use some encoding or not
*/
- def hasEncoding(dimension: CarbonDimension, encoding: Encoding,
- excludeEncoding: Encoding): Boolean = {
+ def hasEncoding(dimension: CarbonDimension,
+ encoding: Encoding,
+ excludeEncoding: Encoding): Boolean = {
if (dimension.isComplex()) {
var has = false
- var children = dimension.getListOfChildDimensions
+ val children = dimension.getListOfChildDimensions
breakable {
for (i <- 0 until children.size) {
has = has || hasEncoding(children.get(i), encoding, excludeEncoding)
@@ -101,23 +103,23 @@ object GlobalDictionaryUtil extends Logging {
has
} else {
dimension.hasEncoding(encoding) &&
- ( excludeEncoding == null || ! dimension.hasEncoding(excludeEncoding))
+ (excludeEncoding == null || !dimension.hasEncoding(excludeEncoding))
}
}
def gatherDimensionByEncoding(dimension: CarbonDimension,
- encoding: Encoding,
- excludeEncoding: Encoding,
- dimensionsWithEncoding: ArrayBuffer[CarbonDimension]) {
+ encoding: Encoding,
+ excludeEncoding: Encoding,
+ dimensionsWithEncoding: ArrayBuffer[CarbonDimension]) {
if (dimension.isComplex()) {
- val children = dimension.getListOfChildDimensions()
+ val children = dimension.getListOfChildDimensions
for (i <- 0 until children.size) {
gatherDimensionByEncoding(children.get(i), encoding, excludeEncoding,
dimensionsWithEncoding)
}
} else {
if (dimension.hasEncoding(encoding) &&
- ( excludeEncoding == null || ! dimension.hasEncoding(excludeEncoding))) {
+ (excludeEncoding == null || !dimension.hasEncoding(excludeEncoding))) {
dimensionsWithEncoding += dimension
}
}
@@ -133,13 +135,13 @@ object GlobalDictionaryUtil extends Logging {
/**
* invoke CarbonDictionaryWriter to write dictionary to file.
*
- * @param model instance of DictionaryLoadModel
+ * @param model instance of DictionaryLoadModel
* @param columnIndex the index of current column in column list
- * @param iter distinct value list of dictionary
+ * @param iter distinct value list of dictionary
*/
def writeGlobalDictionaryToFile(model: DictionaryLoadModel,
- columnIndex: Int,
- iter: Iterator[String]): Unit = {
+ columnIndex: Int,
+ iter: Iterator[String]): Unit = {
val writer: CarbonDictionaryWriter = new CarbonDictionaryWriterImpl(
model.hdfsLocation, model.table,
model.primDimensions(columnIndex).getColumnId)
@@ -148,19 +150,18 @@ object GlobalDictionaryUtil extends Logging {
writer.write(iter.next)
}
} finally {
- writer.close
+ writer.close()
}
}
/**
* invokes the CarbonDictionarySortIndexWriter to write column sort info
* sortIndex and sortIndexInverted data to sortinsex file.
- *
- * @param model
- * @param index
+ *
*/
- def writeGlobalDictionaryColumnSortInfo(model: DictionaryLoadModel, index: Int,
- dictionary: Dictionary): Unit = {
+ def writeGlobalDictionaryColumnSortInfo(model: DictionaryLoadModel,
+ index: Int,
+ dictionary: Dictionary): Unit = {
val preparator: CarbonDictionarySortInfoPreparator =
new CarbonDictionarySortInfoPreparator(model.hdfsLocation, model.table)
val dictionarySortInfo: CarbonDictionarySortInfo =
@@ -180,10 +181,9 @@ object GlobalDictionaryUtil extends Logging {
/**
* read global dictionary from cache
*/
- def readGlobalDictionaryFromCache(model: DictionaryLoadModel
- ): HashMap[String, Dictionary] = {
+ def readGlobalDictionaryFromCache(model: DictionaryLoadModel): HashMap[String, Dictionary] = {
val dictMap = new HashMap[String, Dictionary]
- for (i <- 0 until model.primDimensions.length) {
+ for (i <- model.primDimensions.indices) {
if (model.dictFileExists(i)) {
val dict = CarbonLoaderUtil.getDictionary(model.table,
model.primDimensions(i).getColumnId, model.hdfsLocation,
@@ -197,22 +197,19 @@ object GlobalDictionaryUtil extends Logging {
/**
* invoke CarbonDictionaryReader to read dictionary from files.
- *
- * @param model
- * @return: scala.Tuple2[],boolean[]>
*/
- def readGlobalDictionaryFromFile(model: DictionaryLoadModel
- ): HashMap[String, HashSet[String]] = {
+ def readGlobalDictionaryFromFile(model: DictionaryLoadModel): HashMap[String, HashSet[String]] = {
val dictMap = new HashMap[String, HashSet[String]]
- for (i <- 0 until model.primDimensions.length) {
+ for (i <- model.primDimensions.indices) {
val set = new HashSet[String]
if (model.dictFileExists(i)) {
val reader: CarbonDictionaryReader = new CarbonDictionaryReaderImpl(
model.hdfsLocation, model.table, model.primDimensions(i).getColumnId)
val values = reader.read
if (values != null) {
- for (j <- 0 until values.size)
+ for (j <- 0 until values.size) {
set.add(new String(values.get(j)))
+ }
}
}
dictMap.put(model.primDimensions(i).getColumnId, set)
@@ -221,25 +218,24 @@ object GlobalDictionaryUtil extends Logging {
}
def generateParserForChildrenDimension(dim: CarbonDimension,
- format: DataFormat,
- mapColumnValuesWithId:
- HashMap[String, HashSet[String]],
- generic: GenericParser): Unit = {
+ format: DataFormat,
+ mapColumnValuesWithId:
+ HashMap[String, HashSet[String]],
+ generic: GenericParser): Unit = {
val children = dim.getListOfChildDimensions.asScala
- for (i <- 0 until children.length) {
+ for (i <- children.indices) {
generateParserForDimension(Some(children(i)), format.cloneAndIncreaseIndex,
mapColumnValuesWithId) match {
- case Some(childDim) =>
- generic.addChild(childDim)
- case None =>
- }
+ case Some(childDim) =>
+ generic.addChild(childDim)
+ case None =>
+ }
}
}
def generateParserForDimension(dimension: Option[CarbonDimension],
- format: DataFormat,
- mapColumnValuesWithId: HashMap[String, HashSet[String]]
- ): Option[GenericParser] = {
+ format: DataFormat,
+ mapColumnValuesWithId: HashMap[String, HashSet[String]]): Option[GenericParser] = {
dimension match {
case None =>
None
@@ -262,8 +258,12 @@ object GlobalDictionaryUtil extends Logging {
def createDataFormat(delimiters: Array[String]): DataFormat = {
if (ArrayUtils.isNotEmpty(delimiters)) {
val patterns = new Array[Pattern](delimiters.length)
- for (i <- 0 until patterns.length) {
- patterns(i) = Pattern.compile(if (delimiters(i)== null) "" else delimiters(i))
+ for (i <- patterns.indices) {
+ patterns(i) = Pattern.compile(if (delimiters(i) == null) {
+ ""
+ } else {
+ delimiters(i)
+ })
}
DataFormat(delimiters, 0, patterns)
} else {
@@ -274,30 +274,30 @@ object GlobalDictionaryUtil extends Logging {
/**
* create a instance of DictionaryLoadModel
*
- * @param table CarbonTableIdentifier
- * @param columnNames column list
- * @param hdfsLocation store location in HDFS
+ * @param table CarbonTableIdentifier
+ * @param dimensions column list
+ * @param hdfsLocation store location in HDFS
* @param dictfolderPath path of dictionary folder
* @return: org.carbondata.spark.rdd.DictionaryLoadModel
*/
def createDictionaryLoadModel(carbonLoadModel: CarbonLoadModel,
- table: CarbonTableIdentifier,
- dimensions: Array[CarbonDimension],
- hdfsLocation: String,
- dictfolderPath: String): DictionaryLoadModel = {
+ table: CarbonTableIdentifier,
+ dimensions: Array[CarbonDimension],
+ hdfsLocation: String,
+ dictfolderPath: String): DictionaryLoadModel = {
val primDimensionsBuffer = new ArrayBuffer[CarbonDimension]
- for (i <- 0 until dimensions.length) {
+ for (i <- dimensions.indices) {
val dims = getPrimDimensionWithDict(dimensions(i))
- for (j <- 0 until dims.length) {
+ for (j <- dims.indices) {
primDimensionsBuffer += dims(j)
}
}
- val primDimensions = primDimensionsBuffer.toSeq.map { x => x }.toArray
+ val primDimensions = primDimensionsBuffer.map { x => x }.toArray
// list dictionary file path
val dictFilePaths = new Array[String](primDimensions.length)
val dictFileExists = new Array[Boolean](primDimensions.length)
val carbonTablePath = CarbonStorePath.getCarbonTablePath(hdfsLocation, table)
- for (i <- 0 until primDimensions.length) {
+ for (i <- primDimensions.indices) {
dictFilePaths(i) = carbonTablePath.getDictionaryFilePath(primDimensions(i).getColumnId)
dictFileExists(i) = CarbonUtil.isFileExists(dictFilePaths(i))
}
@@ -307,7 +307,7 @@ object GlobalDictionaryUtil extends Logging {
dictfolderPath,
dictFilePaths,
dictFileExists,
- dimensions.map(_.isComplex() == true),
+ dimensions.map(_.isComplex == true),
primDimensions,
carbonLoadModel.getDelimiters)
}
@@ -316,23 +316,27 @@ object GlobalDictionaryUtil extends Logging {
* append all file path to a String, file path separated by comma
*/
def getCsvRecursivePathsFromCarbonFile(carbonFile: CarbonFile): String = {
- if (carbonFile.isDirectory()) {
+ if (carbonFile.isDirectory) {
val files = carbonFile.listFiles()
val stringbuild = new StringBuilder()
for (j <- 0 until files.size) {
val filePath = getCsvRecursivePathsFromCarbonFile(files(j))
- if (!filePath.isEmpty()) {
+ if (!filePath.isEmpty) {
stringbuild.append(filePath).append(",")
}
}
- if (!stringbuild.isEmpty) {
+ if (stringbuild.nonEmpty) {
stringbuild.substring(0, stringbuild.size - 1)
} else {
stringbuild.toString()
}
} else {
val path = carbonFile.getPath
- if (path.toLowerCase().endsWith(".csv")) path else ""
+ if (path.toLowerCase().endsWith(".csv")) {
+ path
+ } else {
+ ""
+ }
}
}
@@ -349,11 +353,11 @@ object GlobalDictionaryUtil extends Logging {
val fileType = FileFactory.getFileType(filePaths(i))
val carbonFile = FileFactory.getCarbonFile(filePaths(i), fileType)
val filePath = getCsvRecursivePathsFromCarbonFile(carbonFile)
- if (!filePath.isEmpty()) {
+ if (!filePath.isEmpty) {
stringbuild.append(filePath).append(",")
}
}
- if (!stringbuild.isEmpty) {
+ if (stringbuild.nonEmpty) {
stringbuild.substring(0, stringbuild.size - 1)
} else {
stringbuild.toString()
@@ -365,19 +369,29 @@ object GlobalDictionaryUtil extends Logging {
* load CSV files to DataFrame by using datasource "com.databricks.spark.csv"
*
* @param sqlContext SQLContext
- * @param filePath file or directory path
+ * @param carbonLoadModel CarbonLoadModel
* @return: org.apache.spark.sql.DataFrame
*/
private def loadDataFrame(sqlContext: SQLContext,
- carbonLoadModel: CarbonLoadModel): DataFrame = {
+ carbonLoadModel: CarbonLoadModel): DataFrame = {
val df = sqlContext.read
.format("com.databricks.spark.csv")
- .option("header",
- {if (StringUtils.isEmpty(carbonLoadModel.getCsvHeader)) "true"
- else "false" })
- .option("delimiter",
- {if (StringUtils.isEmpty(carbonLoadModel.getCsvDelimiter))"" + CSVWriter.DEFAULT_SEPARATOR
- else carbonLoadModel.getCsvDelimiter})
+ .option("header", {
+ if (StringUtils.isEmpty(carbonLoadModel.getCsvHeader)) {
+ "true"
+ }
+ else {
+ "false"
+ }
+ })
+ .option("delimiter", {
+ if (StringUtils.isEmpty(carbonLoadModel.getCsvDelimiter)) {
+ "" + CSVWriter.DEFAULT_SEPARATOR
+ }
+ else {
+ carbonLoadModel.getCsvDelimiter
+ }
+ })
.option("parserLib", "univocity")
.load(getCsvRecursivePaths(carbonLoadModel.getFactFilePath))
df
@@ -401,12 +415,10 @@ object GlobalDictionaryUtil extends Logging {
/**
* generate global dictionary with SQLContext and CarbonLoadModel
*
- * @param sqlContext
- * @param carbonLoadModel
*/
def generateGlobalDictionary(sqlContext: SQLContext,
- carbonLoadModel: CarbonLoadModel,
- hdfsLocation: String): Unit = {
+ carbonLoadModel: CarbonLoadModel,
+ hdfsLocation: String): Unit = {
try {
val table = new CarbonTableIdentifier(carbonLoadModel.getDatabaseName,
carbonLoadModel.getTableName)
@@ -425,10 +437,14 @@ object GlobalDictionaryUtil extends Logging {
val carbonTable = carbonLoadModel.getCarbonDataLoadSchema.getCarbonTable
val dimensions = carbonTable.getDimensionByTableName(
carbonTable.getFactTableName).asScala.toArray
- val headers = if (StringUtils.isEmpty(carbonLoadModel.getCsvHeader)) df.columns
- else carbonLoadModel.getCsvHeader.split("" + CSVWriter.DEFAULT_SEPARATOR)
+ val headers = if (StringUtils.isEmpty(carbonLoadModel.getCsvHeader)) {
+ df.columns
+ }
+ else {
+ carbonLoadModel.getCsvHeader.split("" + CSVWriter.DEFAULT_SEPARATOR)
+ }
val (requireDimension, requireColumnNames) = pruneDimensions(dimensions, headers, df.columns)
- if (requireDimension.size >= 1) {
+ if (requireDimension.length >= 1) {
// select column to push down pruning
df = df.select(requireColumnNames.head, requireColumnNames.tail: _*)
val model = createDictionaryLoadModel(carbonLoadModel, table, requireDimension,
@@ -448,11 +464,10 @@ object GlobalDictionaryUtil extends Logging {
val fileMapArray = carbonLoadModel.getDimFolderPath.split(",")
for (fileMap <- fileMapArray) {
val dimTableName = fileMap.split(":")(0)
- val dimFilePath = fileMap.substring(dimTableName.length + 1)
var dimDataframe = loadDataFrame(sqlContext, carbonLoadModel)
val (requireDimensionForDim, requireColumnNamesForDim) =
pruneDimensions(dimensions, dimDataframe.columns, dimDataframe.columns)
- if (requireDimensionForDim.size >= 1) {
+ if (requireDimensionForDim.length >= 1) {
dimDataframe = dimDataframe.select(requireColumnNamesForDim.head,
requireColumnNamesForDim.tail: _*)
val modelforDim = createDictionaryLoadModel(carbonLoadModel, table,
@@ -476,8 +491,8 @@ object GlobalDictionaryUtil extends Logging {
}
def generateAndWriteNewDistinctValueList(valuesBuffer: mutable.HashSet[String],
- dictionary: Dictionary,
- model: DictionaryLoadModel, columnIndex: Int): Int = {
+ dictionary: Dictionary,
+ model: DictionaryLoadModel, columnIndex: Int): Int = {
val values = valuesBuffer.toArray
java.util.Arrays.sort(values, Ordering[String])
var distinctValueCount: Int = 0
@@ -501,7 +516,7 @@ object GlobalDictionaryUtil extends Logging {
for (i <- 1 until values.length) {
if (preValue != values(i)) {
if (dictionary.getSurrogateKey(values(i)) ==
- CarbonCommonConstants.INVALID_SURROGATE_KEY) {
+ CarbonCommonConstants.INVALID_SURROGATE_KEY) {
writer.write(values(i))
preValue = values(i)
distinctValueCount += 1
@@ -522,7 +537,7 @@ object GlobalDictionaryUtil extends Logging {
}
}
} finally {
- writer.close
+ writer.close()
}
distinctValueCount
}
diff --git a/integration/spark/src/main/scala/org/carbondata/spark/util/QueryPlanUtil.scala b/integration/spark/src/main/scala/org/carbondata/spark/util/QueryPlanUtil.scala
index 11965356138..035c563cc0b 100644
--- a/integration/spark/src/main/scala/org/carbondata/spark/util/QueryPlanUtil.scala
+++ b/integration/spark/src/main/scala/org/carbondata/spark/util/QueryPlanUtil.scala
@@ -38,8 +38,8 @@ object QueryPlanUtil {
/**
* createCarbonInputFormat from query model
*/
- def createCarbonInputFormat(absoluteTableIdentifier: AbsoluteTableIdentifier) :
- (CarbonInputFormat[RowResult], Job) = {
+ def createCarbonInputFormat(
+ absoluteTableIdentifier: AbsoluteTableIdentifier): (CarbonInputFormat[RowResult], Job) = {
val carbonInputFormat = new CarbonInputFormat[RowResult]()
val jobConf: JobConf = new JobConf(new Configuration)
val job: Job = new Job(jobConf)