@@ -60,9 +60,8 @@ import org.apache.spark.util.Utils
6060 *
6161 * @param paths A list of file system paths that hold data. These will be globbed before and
6262 * qualified. This option only works when reading from a [[FileFormat ]].
63- * @param inputSchema An optional specification of the schema of the data. When present we skip
64- * attempting to infer the schema.
65- * @param isSchemaFromUsers A flag to indicate whether the schema is specified by users.
63+ * @param userSpecifiedSchema An optional specification of the schema of the data. When present
64+ * we skip attempting to infer the schema.
6665 * @param partitionColumns A list of column names that the relation is partitioned by. When this
6766 * list is empty, the relation is unpartitioned.
6867 * @param bucketSpec An optional specification for bucketing (hash-partitioning) of the data.
@@ -71,8 +70,7 @@ case class DataSource(
7170 sparkSession : SparkSession ,
7271 className : String ,
7372 paths : Seq [String ] = Nil ,
74- inputSchema : Option [StructType ] = None ,
75- isSchemaFromUsers : Boolean = false ,
73+ userSpecifiedSchema : Option [StructType ] = None ,
7674 partitionColumns : Seq [String ] = Seq .empty,
7775 bucketSpec : Option [BucketSpec ] = None ,
7876 options : Map [String , String ] = Map .empty) extends Logging {
@@ -189,7 +187,7 @@ case class DataSource(
189187 }
190188
191189 private def inferFileFormatSchema (format : FileFormat ): StructType = {
192- inputSchema .orElse {
190+ userSpecifiedSchema .orElse {
193191 val caseInsensitiveOptions = new CaseInsensitiveMap (options)
194192 val allPaths = caseInsensitiveOptions.get(" path" )
195193 val globbedPaths = allPaths.toSeq.flatMap { path =>
@@ -213,7 +211,7 @@ case class DataSource(
213211 providingClass.newInstance() match {
214212 case s : StreamSourceProvider =>
215213 val (name, schema) = s.sourceSchema(
216- sparkSession.sqlContext, inputSchema , className, options)
214+ sparkSession.sqlContext, userSpecifiedSchema , className, options)
217215 SourceInfo (name, schema)
218216
219217 case format : FileFormat =>
@@ -236,7 +234,7 @@ case class DataSource(
236234 val isSchemaInferenceEnabled = sparkSession.sessionState.conf.streamingSchemaInference
237235 val isTextSource = providingClass == classOf [text.TextFileFormat ]
238236 // If the schema inference is disabled, only text sources require schema to be specified
239- if (! isSchemaInferenceEnabled && ! isTextSource && inputSchema .isEmpty) {
237+ if (! isSchemaInferenceEnabled && ! isTextSource && userSpecifiedSchema .isEmpty) {
240238 throw new IllegalArgumentException (
241239 " Schema must be specified when creating a streaming source DataFrame. " +
242240 " If some files already exist in the directory, then depending on the file format " +
@@ -255,7 +253,8 @@ case class DataSource(
255253 def createSource (metadataPath : String ): Source = {
256254 providingClass.newInstance() match {
257255 case s : StreamSourceProvider =>
258- s.createSource(sparkSession.sqlContext, metadataPath, inputSchema, className, options)
256+ s.createSource(
257+ sparkSession.sqlContext, metadataPath, userSpecifiedSchema, className, options)
259258
260259 case format : FileFormat =>
261260 val path = new CaseInsensitiveMap (options).getOrElse(" path" , {
@@ -320,28 +319,29 @@ case class DataSource(
320319 */
321320 def resolveRelation (): BaseRelation = {
322321 val caseInsensitiveOptions = new CaseInsensitiveMap (options)
323- val relation = (providingClass.newInstance(), inputSchema ) match {
322+ val relation = (providingClass.newInstance(), userSpecifiedSchema ) match {
324323 // TODO: Throw when too much is given.
325324 case (dataSource : SchemaRelationProvider , Some (schema)) =>
326325 dataSource.createRelation(sparkSession.sqlContext, caseInsensitiveOptions, schema)
327326 case (dataSource : RelationProvider , None ) =>
328327 dataSource.createRelation(sparkSession.sqlContext, caseInsensitiveOptions)
329328 case (_ : SchemaRelationProvider , None ) =>
330329 throw new AnalysisException (s " A schema needs to be specified when using $className. " )
331- case (dataSource : RelationProvider , Some (_)) =>
332- if (isSchemaFromUsers) {
333- throw new AnalysisException (s " $className does not allow user-specified schemas. " )
334- } else {
330+ case (dataSource : RelationProvider , Some (schema)) =>
331+ val baseRelation =
335332 dataSource.createRelation(sparkSession.sqlContext, caseInsensitiveOptions)
333+ if (baseRelation.schema != schema) {
334+ throw new AnalysisException (s " $className does not allow user-specified schemas. " )
336335 }
336+ baseRelation
337337
338338 // We are reading from the results of a streaming query. Load files from the metadata log
339339 // instead of listing them using HDFS APIs.
340340 case (format : FileFormat , _)
341341 if hasMetadata(caseInsensitiveOptions.get(" path" ).toSeq ++ paths) =>
342342 val basePath = new Path ((caseInsensitiveOptions.get(" path" ).toSeq ++ paths).head)
343343 val fileCatalog = new MetadataLogFileCatalog (sparkSession, basePath)
344- val dataSchema = inputSchema .orElse {
344+ val dataSchema = userSpecifiedSchema .orElse {
345345 format.inferSchema(
346346 sparkSession,
347347 caseInsensitiveOptions,
@@ -381,7 +381,7 @@ case class DataSource(
381381
382382 // If they gave a schema, then we try and figure out the types of the partition columns
383383 // from that schema.
384- val partitionSchema = inputSchema .map { schema =>
384+ val partitionSchema = userSpecifiedSchema .map { schema =>
385385 StructType (
386386 partitionColumns.map { c =>
387387 // TODO: Case sensitivity.
@@ -395,7 +395,7 @@ case class DataSource(
395395 new ListingFileCatalog (
396396 sparkSession, globbedPaths, options, partitionSchema)
397397
398- val dataSchema = inputSchema .map { schema =>
398+ val dataSchema = userSpecifiedSchema .map { schema =>
399399 val equality = sparkSession.sessionState.conf.resolver
400400 StructType (schema.filterNot(f => partitionColumns.exists(equality(_, f.name))))
401401 }.orElse {
@@ -505,7 +505,7 @@ case class DataSource(
505505 mode)
506506 sparkSession.sessionState.executePlan(plan).toRdd
507507 // Replace the schema with that of the DataFrame we just wrote out to avoid re-inferring it.
508- copy(inputSchema = Some (data.schema.asNullable)).resolveRelation()
508+ copy(userSpecifiedSchema = Some (data.schema.asNullable)).resolveRelation()
509509
510510 case _ =>
511511 sys.error(s " ${providingClass.getCanonicalName} does not allow create table as select. " )
0 commit comments