Skip to content

[SPARK-2883][SQL] Orc support through datasource api #3753

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 17 additions & 19 deletions core/src/main/scala/org/apache/spark/SparkHadoopWriter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -41,20 +41,20 @@ class SparkHadoopWriter(@transient jobConf: JobConf)
with SparkHadoopMapRedUtil
with Serializable {

private val now = new Date()
private val conf = new SerializableWritable(jobConf)
protected val now = new Date()
protected val conf = new SerializableWritable(jobConf)

private var jobID = 0
private var splitID = 0
private var attemptID = 0
private var jID: SerializableWritable[JobID] = null
private var taID: SerializableWritable[TaskAttemptID] = null
protected var jobID = 0
protected var splitID = 0
protected var attemptID = 0
protected var jID: SerializableWritable[JobID] = null
protected var taID: SerializableWritable[TaskAttemptID] = null

@transient private var writer: RecordWriter[AnyRef,AnyRef] = null
@transient private var format: OutputFormat[AnyRef,AnyRef] = null
@transient private var committer: OutputCommitter = null
@transient private var jobContext: JobContext = null
@transient private var taskContext: TaskAttemptContext = null
@transient protected var writer: RecordWriter[AnyRef,AnyRef] = null
@transient protected var format: OutputFormat[AnyRef,AnyRef] = null
@transient protected var committer: OutputCommitter = null
@transient protected var jobContext: JobContext = null
@transient protected var taskContext: TaskAttemptContext = null
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed the scope of var/def of SparkHadoopWriter to reuse these code in orc writting api implementation


def preSetup() {
setIDs(0, 0, 0)
Expand Down Expand Up @@ -112,38 +112,36 @@ class SparkHadoopWriter(@transient jobConf: JobConf)
cmtr.commitJob(getJobContext())
}

// ********* Private Functions *********

private def getOutputFormat(): OutputFormat[AnyRef,AnyRef] = {
def getOutputFormat(): OutputFormat[AnyRef,AnyRef] = {
if (format == null) {
format = conf.value.getOutputFormat()
.asInstanceOf[OutputFormat[AnyRef,AnyRef]]
}
format
}

private def getOutputCommitter(): OutputCommitter = {
def getOutputCommitter(): OutputCommitter = {
if (committer == null) {
committer = conf.value.getOutputCommitter
}
committer
}

private def getJobContext(): JobContext = {
def getJobContext(): JobContext = {
if (jobContext == null) {
jobContext = newJobContext(conf.value, jID.value)
}
jobContext
}

private def getTaskContext(): TaskAttemptContext = {
def getTaskContext(): TaskAttemptContext = {
if (taskContext == null) {
taskContext = newTaskAttemptContext(conf.value, taID.value)
}
taskContext
}

private def setIDs(jobid: Int, splitid: Int, attemptid: Int) {
def setIDs(jobid: Int, splitid: Int, attemptid: Int) {
jobID = jobid
splitID = splitid
attemptID = attemptid
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,7 +330,7 @@ private[hive] object HadoopTableReader extends HiveInspectors {
}

val (fieldRefs, fieldOrdinals) = nonPartitionKeyAttrs.map { case (attr, ordinal) =>
soi.getStructFieldRef(attr.name) -> ordinal
soi.getStructFieldRef(attr.name.toLowerCase) -> ordinal
}.unzip

/**
Expand Down
Loading