Skip to content

[SPARK-14460] [SQL] properly handling of column name contains space #12252

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,9 @@ object JdbcUtils extends Logging {
/**
* Returns a PreparedStatement that inserts a row into table via conn.
*/
def insertStatement(conn: Connection, table: String, rddSchema: StructType): PreparedStatement = {
val columns = rddSchema.fields.map(_.name).mkString(",")
def insertStatement(dialect: JdbcDialect, conn: Connection, table: String, rddSchema: StructType)
: PreparedStatement = {
val columns = rddSchema.fields.map(f => quoteColumnName(dialect, f.name)).mkString(",")
val placeholders = rddSchema.fields.map(_ => "?").mkString(",")
val sql = s"INSERT INTO $table ($columns) VALUES ($placeholders)"
conn.prepareStatement(sql)
Expand Down Expand Up @@ -169,7 +170,7 @@ object JdbcUtils extends Logging {
if (supportsTransactions) {
conn.setAutoCommit(false) // Everything in the same db transaction.
}
val stmt = insertStatement(conn, table, rddSchema)
val stmt = insertStatement(dialect, conn, table, rddSchema)
try {
var rowCount = 0
while (iterator.hasNext) {
Expand Down Expand Up @@ -245,14 +246,25 @@ object JdbcUtils extends Logging {
Array[Byte]().iterator
}

/**
* The utility to add quote to the column name based on its dialect
* @param dialect the JDBC dialect
* @param columnName the input column name
* @return the quoted column name
*/
private def quoteColumnName(dialect: JdbcDialect, columnName: String): String = {
dialect.quoteIdentifier(columnName)
}

/**
* Compute the schema string for this RDD.
*/
def schemaString(df: DataFrame, url: String): String = {
val sb = new StringBuilder()
val dialect = JdbcDialects.get(url)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the new dialect you're passing in different from this one in some way?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The purpose to pass in dialect is to get proper quote for columns based on its data source. Any suggestion?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You pass in a parameter named dialect to the schemaString method, but there's also the dialect that comes from JdbcDialects.get(url) --- that's the duplicate I was trying to point out

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for pointing out. I've modified the codes. Please check it out.

df.schema.fields foreach { field =>
val name = field.name

val name = quoteColumnName(dialect, field.name)
val typ: String = getJdbcType(field.dataType, dialect).databaseTypeDefinition
val nullable = if (field.nullable) "" else "NOT NULL"
sb.append(s", $name $typ $nullable")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
conn1.prepareStatement("insert into test.people values ('mary', 2)").executeUpdate()
conn1.prepareStatement("drop table if exists test.people1").executeUpdate()
conn1.prepareStatement(
"create table test.people1 (name TEXT(32) NOT NULL, theid INTEGER NOT NULL)").executeUpdate()
"create table test.people1 (name TEXT(32) NOT NULL, `the id` INTEGER NOT NULL)")
.executeUpdate()
conn1.prepareStatement(
"create table test.orders (`order` TEXT(32) NOT NULL, `order id` INTEGER NOT NULL)")
.executeUpdate()
conn1.commit()

sql(
Expand All @@ -68,6 +72,13 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
|USING org.apache.spark.sql.jdbc
|OPTIONS (url '$url1', dbtable 'TEST.PEOPLE1', user 'testUser', password 'testPass')
""".stripMargin.replaceAll("\n", " "))

sql(
s"""
|CREATE TEMPORARY TABLE ORDERS
|USING org.apache.spark.sql.jdbc
|OPTIONS (url '$url1', dbtable 'TEST.ORDERS', user 'testUser', password 'testPass')
""".stripMargin.replaceAll("\n", " "))
}

after {
Expand Down Expand Up @@ -151,4 +162,13 @@ class JDBCWriteSuite extends SharedSQLContext with BeforeAndAfter {
assert(2 === sqlContext.read.jdbc(url1, "TEST.PEOPLE1", properties).count)
assert(2 === sqlContext.read.jdbc(url1, "TEST.PEOPLE1", properties).collect()(0).length)
}

test("SPARK-14460: Insert into table with column containing space") {
val df = sqlContext.createDataFrame(sparkContext.parallelize(arr2x2), schema2)
df.write.insertInto("PEOPLE1")
assert(2 === sqlContext.read.jdbc(url1, "TEST.PEOPLE1", properties).count)

df.write.insertInto("ORDERS")
assert(2 === sqlContext.read.jdbc(url1, "TEST.ORDERS", properties).count)
}
}