Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 30 additions & 0 deletions data/scripts/q_test_case_sensitive.mariadb.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
-- Create a case-sensitive schema (database)
CREATE SCHEMA `WorldData`;

-- Case-Sensitive Schema and Table
CREATE TABLE `WorldData`.`Country`
(
id int,
name varchar(20)
);

INSERT INTO `WorldData`.`Country` VALUES (1, 'India'), (2, 'USA'), (3, 'Japan'), (4, 'Germany');


-- Case-Sensitive Partition Column
CREATE TABLE `WorldData`.`Cities`
(
id int,
name varchar(20),
`RegionID` int
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what happens if there is another unquoted column: regionid int

);
INSERT INTO `WorldData`.`Cities` VALUES (1, 'Mumbai', 10), (2, 'New York', 20), (3, 'Tokyo', 30), (4, 'Berlin', 40), (5, 'New Delhi', 10), (6, 'Kyoto', 30);


-- Case-Sensitive Query Field Names
CREATE TABLE `WorldData`.`Geography`
(
id int,
`Description` varchar(50)
);
INSERT INTO `WorldData`.`Geography` VALUES (1, 'Asia'), (2, 'North America'), (3, 'Asia'), (4, 'Europe');
34 changes: 34 additions & 0 deletions data/scripts/q_test_case_sensitive.mssql.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
CREATE SCHEMA [WorldData];

-- Case-Sensitive Schema and Table
CREATE TABLE [WorldData].[Country]
(
id int,
name varchar(20)
);
INSERT INTO [WorldData].[Country] VALUES (1, 'India'), (2, 'USA'), (3, 'Japan'), (4, 'Germany');


-- Case-Sensitive Partition Column
CREATE TABLE [WorldData].[Cities]
(
id int,
name varchar(20),
[RegionID] int
);
INSERT INTO [WorldData].[Cities] VALUES (1, 'Mumbai', 10), (2, 'New York', 20), (3, 'Tokyo', 30), (4, 'Berlin', 40), (5, 'New Delhi', 10), (6, 'Kyoto', 30);


-- Case-Sensitive Query Field Names
CREATE TABLE [WorldData].[Geography]
(
id int,
[Description] varchar(50)
);
INSERT INTO [WorldData].[Geography] VALUES (1, 'Asia'), (2, 'North America'), (3, 'Asia'), (4, 'Europe');

-- Create a user and associate them with a default schema
CREATE LOGIN greg WITH PASSWORD = 'GregPass123!$';
CREATE USER greg FOR LOGIN greg WITH DEFAULT_SCHEMA=bob;
-- Allow the user to connect to the database and run queries
GRANT CONNECT, SELECT TO greg;
53 changes: 53 additions & 0 deletions data/scripts/q_test_case_sensitive.oracle.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
-- The comments below are from q_test_country_table_with_schema.oracle.sql

-- In Oracle dividing the tables in different namespaces/schemas is achieved via different users. The CREATE SCHEMA
-- statement exists in Oracle but has different semantics than those defined by SQL Standard and those adopted in other
-- DBMS.

-- In order to create the so-called "local" users in oracle you need to be connected to the Pluggable Database (PDB)
-- and not to the Container Database (CDB). In Oracle XE edition, used by this tests, the default and only PDB is
-- XEPDB1.
ALTER SESSION SET CONTAINER = XEPDB1;

-- Create a case-sensitive user, which also acts as the schema
CREATE USER "WorldData" IDENTIFIED BY QTestPassword123;
ALTER USER "WorldData" QUOTA UNLIMITED ON users;
GRANT CREATE SESSION, CREATE TABLE, CREATE VIEW TO "WorldData";

-- Case-Sensitive Schema and Table
CREATE TABLE "WorldData"."Country"
(
id int,
name varchar(20)
);
INSERT INTO "WorldData"."Country" VALUES (1, 'India');
INSERT INTO "WorldData"."Country" VALUES (2, 'USA');
INSERT INTO "WorldData"."Country" VALUES (3, 'Japan');
INSERT INTO "WorldData"."Country" VALUES (4, 'Germany');


-- Case-Sensitive Partition Column
CREATE TABLE "WorldData"."Cities"
(
id int,
name varchar(20),
"RegionID" int
);
INSERT INTO "WorldData"."Cities" VALUES (1, 'Mumbai', 10);
INSERT INTO "WorldData"."Cities" VALUES (2, 'New York', 20);
INSERT INTO "WorldData"."Cities" VALUES (3, 'Tokyo', 30);
INSERT INTO "WorldData"."Cities" VALUES (4, 'Berlin', 40);
INSERT INTO "WorldData"."Cities" VALUES (5, 'New Delhi', 10);
INSERT INTO "WorldData"."Cities" VALUES (6, 'Kyoto', 30);


-- Case-Sensitive Query Field Names
CREATE TABLE "WorldData"."Geography"
(
id int,
"Description" varchar(50)
);
INSERT INTO "WorldData"."Geography" VALUES (1, 'Asia');
INSERT INTO "WorldData"."Geography" VALUES (2, 'North America');
INSERT INTO "WorldData"."Geography" VALUES (3, 'Asia');
INSERT INTO "WorldData"."Geography" VALUES (4, 'Europe');
37 changes: 37 additions & 0 deletions data/scripts/q_test_case_sensitive.postgres.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
-- Create a case-sensitive schema
CREATE SCHEMA "WorldData";

-- Case-Sensitive Schema and Table
CREATE TABLE "WorldData"."Country"
(
id int,
name varchar(20)
);

INSERT INTO "WorldData"."Country" VALUES (1, 'India'), (2, 'USA'), (3, 'Japan'), (4, 'Germany');


-- Case-Sensitive Partition Column
CREATE TABLE "WorldData"."Cities"
(
id int,
name varchar(20),
"RegionID" int
);
INSERT INTO "WorldData"."Cities" VALUES (1, 'Mumbai', 10), (2, 'New York', 20), (3, 'Tokyo', 30), (4, 'Berlin', 40), (5, 'New Delhi', 10), (6, 'Kyoto', 30);


-- Case-Sensitive Query Field Names
CREATE TABLE "WorldData"."Geography"
(
id int,
"Description" varchar(50)
);
INSERT INTO "WorldData"."Geography" VALUES (1, 'Asia'), (2, 'North America'), (3, 'Asia'), (4, 'Europe');

-- Create a user and associate them with a default schema <=> search_path
CREATE ROLE greg WITH LOGIN PASSWORD 'GregPass123!$';
ALTER ROLE greg SET search_path TO "WorldData";
-- Grant the necessary permissions to be able to access the schema
GRANT USAGE ON SCHEMA "WorldData" TO greg;
GRANT SELECT ON ALL TABLES IN SCHEMA "WorldData" TO greg;
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,13 @@ public void write(PreparedStatement statement) throws SQLException {
ParameterMetaData parameterMetaData = statement.getParameterMetaData();
for (int i = 0; i < columnValues.length; i++) {
Object value = columnValues[i];
if ((parameterMetaData.getParameterType(i + 1) == Types.CHAR) && value != null && value instanceof Boolean) {
value = ((Boolean) value).booleanValue() ? "1" : "0";
try {
if (value instanceof Boolean b && (parameterMetaData.getParameterType(i + 1) == Types.CHAR)) {
value = b ? "1" : "0";
}
} catch (SQLException e) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Catching and ignoring any kind of SQLException is suspicious. It gives the impression that something is broken in the code.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I can probably add a WARN that getParameterType returned an exception. But I think I had to add this because I was getting an exception for Oracle, but it's not fatal and we can just suppress the driver error and move forward.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It may be that the exception that you encountered was not fatal but if we catch every kind of exception and suppress it we cannot really know what is fatal or not.

// Suppress the driver error and rely on setObject to handle the type inference.
// One example of when this happens is when we use quoted table names in Oracle.
}
statement.setObject(i + 1, value);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@
import java.net.URI;
import java.net.URISyntaxException;

import static org.apache.hadoop.hive.ql.exec.Utilities.unescapeHiveJdbcIdentifier;

public class JdbcStorageHandler implements HiveStorageHandler {

private static final Logger LOGGER = LoggerFactory.getLogger(JdbcStorageHandler.class);
Expand Down Expand Up @@ -101,12 +103,12 @@ public void configureInputJobProperties(TableDesc tableDesc, Map<String, String>
@Override
public URI getURIForAuth(Table table) throws URISyntaxException {
Map<String, String> tableProperties = HiveCustomStorageHandlerUtils.getTableProperties(table);
DatabaseType dbType = DatabaseType.valueOf(
DatabaseType dbType = DatabaseType.from(
tableProperties.get(JdbcStorageConfig.DATABASE_TYPE.getPropertyName()));
String host_url = DatabaseType.METASTORE == dbType ?
String hostUrl = DatabaseType.METASTORE == dbType ?
"jdbc:metastore://" : tableProperties.get(Constants.JDBC_URL);
String table_name = tableProperties.get(Constants.JDBC_TABLE);
return new URI(host_url+"/"+table_name);
String tableName = unescapeHiveJdbcIdentifier(tableProperties.get(Constants.JDBC_TABLE));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we "unescape" some content of the property aren't we risking conflicts/ambiguity.
For instance:

  • "hive.sql.table"="\"Country\""
  • "hive.sql.table"="Country"

Moreover, it seems that the current unescaping is not gonna work for other quoting chars:

  • "hive.sql.table"="[Country]"
  • "hive.sql.table"="`Country`"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I checked that we do run into conflicts here. I added

CREATE TABLE "WorldData".Country
(
    id   int,
    name varchar(20)
);

INSERT INTO "WorldData".Country VALUES (5, 'France'), (6, 'Italy'), (7, 'Spain'), (8, 'Portugal');

to q_test_case_sensitive.postgres.sql

While creating the external table, if we pass "hive.sql.table" = "country", the select outputs the new data shown above, but if we pass "hive.sql.table" = "Country" or "hive.sql.table" = "\"Country\"", the select returns data from the case sensitive table.

I think we can check if the value of "hive.sql.table" is escaped or not. If not, then we can probably always just use lower case. Let me know what you think about this.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Using lower case may not always be an option since as far as I recall there are DBMS that store unquoted identifers using different conventions (e.g., upper case).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apart from ambiguity quoted identifiers may contain arbitrary characters inside the quotes that are not URI friendly (e.g., ?%+-/&$). We may have to come up with a more robust normalization strategy but this depends on how getURIForAuth is used and if we care about supporting such use-cases.

return new URI(hostUrl+"/" + tableName);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,68 @@
package org.apache.hive.storage.jdbc.conf;

public enum DatabaseType {
MYSQL,
// Default quote is "
H2,
DB2,
DERBY,
ORACLE,
POSTGRES,
MSSQL,
METASTORE,
JETHRO_DATA,
HIVE
MSSQL,

// Special quote cases
MYSQL("`"),
MARIADB("`"),
HIVE("`"),

// METASTORE will be resolved to another type
METASTORE(null, null);

// Keeping start and end quote separate to allow for future DBs
// that may have different start and end quotes
private final String startQuote;
private final String endQuote;
Comment on lines +35 to +38
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can consider different start/end quotes when the time comes. For now, we don't need to complicate the code if we don't support such use-cases.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, I can simplify this in the next commit 👍🏼


DatabaseType() {
this("\"", "\"");
}

DatabaseType(String quote) {
this(quote, quote);
}

DatabaseType(String startQuote, String endQuote) {
this.startQuote = startQuote;
this.endQuote = endQuote;
}

/**
* Helper to safely get the DatabaseType from a string.
*
* @param dbType The string from configuration properties.
* @return The matching DatabaseType.
* @throws IllegalArgumentException if the dbType is null or not a valid type.
*/
public static DatabaseType from(String dbType) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we need this method and not simply use valueOf?

if (dbType == null) {
throw new IllegalArgumentException("Database type string cannot be null");
}
// METASTORE must be handled before valueOf
if (METASTORE.name().equalsIgnoreCase(dbType)) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why does METASTORE need special handling?

return METASTORE;
}
try {
return valueOf(dbType.toUpperCase());
} catch (IllegalArgumentException e) {
throw new IllegalArgumentException("Invalid database type: " + dbType, e);
}
}

public String getStartQuote() {
return startQuote;
}

public String getEndQuote() {
return endQuote;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ public static void copyConfigurationToJob(Properties props, Map<String, String>
if (!key.equals(CONFIG_PWD) &&
!key.equals(CONFIG_PWD_KEYSTORE) &&
!key.equals(CONFIG_PWD_KEY) &&
!key.equals(CONFIG_PWD_URI) &&
!key.equals(CONFIG_USERNAME)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we changing this code? Why username is relevant to case-sensitivity of identifiers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Without passing in username, we get this error for oracle:

[ERROR] Failures:
[ERROR]   TestMiniLlapLocalCliDriver.testCliDriver:62 Client execution failed with error code = 1
running
SELECT * FROM country_test
fname=jdbc_case_sensitive_oracle.q

See ./ql/target/tmp/log/hive.log or ./itests/qtest/target/tmp/log/hive.log, or check ./ql/target/surefire-reports or ./itests/qtest/target/surefire-reports/ for specific test cases logs.
 java.lang.RuntimeException: java.io.IOException: java.io.IOException: org.apache.hive.storage.jdbc.exception.HiveJdbcDatabaseAccessException: Caught exception while trying to execute query: Cannot create PoolableConnectionFactory (ORA-01017: invalid username/password; logon denied
)
	at org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:210)
	at org.apache.hadoop.hive.ql.exec.FetchTask.execute(FetchTask.java:95)
	at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:196)
	at org.apache.hadoop.hive.ql.Driver.run(Driver.java:142)
	at org.apache.hadoop.hive.ql.Driver.run(Driver.java:137)
	at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:190)
	at org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:235)
	at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:259)
	at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:203)
	at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:129)
	at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:430)
	at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:358)
	at org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:760)
	at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:730)
	at org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:115)
	at org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:139)
	at org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver(TestMiniLlapLocalCliDriver.java:62)
	at java.base/jdk.internal.reflect.DirectMethodHandleAccessor.invoke(DirectMethodHandleAccessor.java:103)
	at java.base/java.lang.reflect.Method.invoke(Method.java:580)
	at org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:59)
	at org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12)
	at org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:56)
	at org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17)
	at org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:118)
	at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
	at org.junit.runners.BlockJUnit4ClassRunner$1.evaluate(BlockJUnit4ClassRunner.java:100)
	at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:366)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:103)
	at org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:63)
	at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
	at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
	at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
	at org.junit.runners.Suite.runChild(Suite.java:128)
	at org.junit.runners.Suite.runChild(Suite.java:27)
	at org.junit.runners.ParentRunner$4.run(ParentRunner.java:331)
	at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:79)
	at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:329)
	at org.junit.runners.ParentRunner.access$100(ParentRunner.java:66)
	at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:293)
	at org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:89)
	at org.junit.rules.RunRules.evaluate(RunRules.java:20)
	at org.junit.runners.ParentRunner$3.evaluate(ParentRunner.java:306)
	at org.junit.runners.ParentRunner.run(ParentRunner.java:413)
	at org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:316)
	at org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:240)
	at org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:214)
	at org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:155)
	at org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:385)
	at org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:162)
	at org.apache.maven.surefire.booter.ForkedBooter.run(ForkedBooter.java:507)
	at org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:495)
Caused by: java.io.IOException: java.io.IOException: org.apache.hive.storage.jdbc.exception.HiveJdbcDatabaseAccessException: Caught exception while trying to execute query: Cannot create PoolableConnectionFactory (ORA-01017: invalid username/password; logon denied
)
	at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:628)
	at org.apache.hadoop.hive.ql.exec.FetchOperator.pushRow(FetchOperator.java:535)
	at org.apache.hadoop.hive.ql.exec.FetchTask.executeInner(FetchTask.java:194)
	... 53 more
Caused by: java.io.IOException: org.apache.hive.storage.jdbc.exception.HiveJdbcDatabaseAccessException: Caught exception while trying to execute query: Cannot create PoolableConnectionFactory (ORA-01017: invalid username/password; logon denied
)
	at org.apache.hive.storage.jdbc.JdbcRecordReader.next(JdbcRecordReader.java:85)
	at org.apache.hive.storage.jdbc.JdbcRecordReader.next(JdbcRecordReader.java:35)
	at org.apache.hadoop.hive.ql.exec.FetchOperator.getNextRow(FetchOperator.java:595)
	... 55 more
Caused by: org.apache.hive.storage.jdbc.exception.HiveJdbcDatabaseAccessException: Caught exception while trying to execute query: Cannot create PoolableConnectionFactory (ORA-01017: invalid username/password; logon denied
)
	at org.apache.hive.storage.jdbc.dao.GenericJdbcDatabaseAccessor.getRecordIterator(GenericJdbcDatabaseAccessor.java:252)
	at org.apache.hive.storage.jdbc.JdbcRecordReader.next(JdbcRecordReader.java:58)
	... 57 more
Caused by: java.sql.SQLException: Cannot create PoolableConnectionFactory (ORA-01017: invalid username/password; logon denied
)
	at org.apache.commons.dbcp2.BasicDataSource.createPoolableConnectionFactory(BasicDataSource.java:636)
	at org.apache.commons.dbcp2.BasicDataSource.createDataSource(BasicDataSource.java:538)
	at org.apache.commons.dbcp2.BasicDataSource.getLogWriter(BasicDataSource.java:1052)
	at org.apache.commons.dbcp2.BasicDataSourceFactory.createDataSource(BasicDataSourceFactory.java:310)
	at org.apache.hive.storage.jdbc.dao.GenericJdbcDatabaseAccessor.initializeDatabaseConnection(GenericJdbcDatabaseAccessor.java:410)
	at org.apache.hive.storage.jdbc.dao.GenericJdbcDatabaseAccessor.getRecordIterator(GenericJdbcDatabaseAccessor.java:229)
	... 58 more
Caused by: java.sql.SQLException: ORA-01017: invalid username/password; logon denied

	at oracle.jdbc.driver.T4CTTIoer11.processError(T4CTTIoer11.java:630)
	at oracle.jdbc.driver.T4CTTIoer11.processError(T4CTTIoer11.java:559)
	at oracle.jdbc.driver.T4CTTIoer11.processError(T4CTTIoer11.java:554)
	at oracle.jdbc.driver.T4CTTIfun.processError(T4CTTIfun.java:1376)
	at oracle.jdbc.driver.T4CTTIoauthenticate.processError(T4CTTIoauthenticate.java:700)
	at oracle.jdbc.driver.T4CTTIfun.receive(T4CTTIfun.java:771)
	at oracle.jdbc.driver.T4CTTIfun.doRPC(T4CTTIfun.java:299)
	at oracle.jdbc.driver.T4CTTIoauthenticate.doOAUTH(T4CTTIoauthenticate.java:390)
	at oracle.jdbc.driver.T4CTTIoauthenticate.doOAUTHWithO5Logon(T4CTTIoauthenticate.java:1471)
	at oracle.jdbc.driver.T4CTTIoauthenticate.doOAUTH(T4CTTIoauthenticate.java:1220)
	at oracle.jdbc.driver.T4CTTIoauthenticate.doOAUTH(T4CTTIoauthenticate.java:1174)
	at oracle.jdbc.driver.T4CConnection.authenticateUserForLogon(T4CConnection.java:1032)
	at oracle.jdbc.driver.T4CConnection.logon(T4CConnection.java:648)
	at oracle.jdbc.driver.PhysicalConnection.connect(PhysicalConnection.java:1039)
	at oracle.jdbc.driver.T4CDriverExtension.getConnection(T4CDriverExtension.java:90)
	at oracle.jdbc.driver.OracleDriver.connect(OracleDriver.java:728)
	at oracle.jdbc.driver.OracleDriver.connect(OracleDriver.java:649)
	at org.apache.commons.dbcp2.DriverConnectionFactory.createConnection(DriverConnectionFactory.java:52)
	at org.apache.commons.dbcp2.PoolableConnectionFactory.makeObject(PoolableConnectionFactory.java:414)
	at org.apache.commons.dbcp2.BasicDataSource.validateConnectionFactory(BasicDataSource.java:113)
	at org.apache.commons.dbcp2.BasicDataSource.createPoolableConnectionFactory(BasicDataSource.java:632)
	... 63 more

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error is completely irrelevant to support for case sensitivity. I would suggest to log a separate ticket with a minimal test and isolate the necessary changes in a dedicated PR.

!key.equals(CONFIG_PWD_URI)
) {
jobProps.put(String.valueOf(entry.getKey()), String.valueOf(entry.getValue()));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;

import org.apache.hive.storage.jdbc.conf.DatabaseType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -52,6 +53,7 @@
import java.util.regex.Pattern;

import static com.google.common.base.MoreObjects.firstNonNull;
import static org.apache.hadoop.hive.ql.exec.Utilities.unescapeHiveJdbcIdentifier;

/**
* A data accessor that should in theory work with all JDBC compliant database drivers.
Expand Down Expand Up @@ -364,7 +366,7 @@ protected String addBoundaryToQuery(String tableName, String sql, String partiti
Matcher m = fromPattern.matcher(sql);
Preconditions.checkArgument(m.matches());

if (!tableName.equals(m.group(2).replaceAll("[`\"]", ""))) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems to change current behavior. We don't need to handle the single quote character anymore?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we don't need to replace anything here as the tableName is already "quoted" correctly by this point, and we expect sql to be correctly quoted by the user via "hive.sql.query" or generated via calcite.

However, this might change if we conditionally quote the table name, so I might have to revisit this after we finalize prior discussions.

if (!tableName.equals(m.group(2))) {
throw new RuntimeException("Cannot find " + tableName + " in sql query " + sql);
}
result = String.format("%s (%s) tmptable %s", m.group(1), boundaryQuery, m.group(3));
Expand Down Expand Up @@ -537,12 +539,27 @@ public boolean needColumnQuote() {
return true;
}

/**
* Quotes an identifier based on the database type.
*
* @param identifier The identifier to quote
* @param dbType The DatabaseType enum
* @return A database-specific quoted identifier (e.g., "\"Country\"" or "`Country`")
*/
protected static String quoteIdentifier(String identifier, DatabaseType dbType) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the record, Calcite provides the following APIs:

  • org.apache.calcite.sql.SqlDialect.Context#identifierQuoteString
  • org.apache.calcite.sql.SqlDialect#quoteIdentifier(java.lang.String)

It may be possible to use them instead of introducing code that does very similar things.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will try to use them in the next run.

if (identifier == null || dbType == null || dbType.getStartQuote() == null) {
return identifier;
}
return dbType.getStartQuote() + identifier + dbType.getEndQuote();
}

private static String getQualifiedTableName(Configuration conf) {
String tableName = conf.get(Constants.JDBC_TABLE);
DatabaseType dbType = DatabaseType.from(conf.get(JdbcStorageConfig.DATABASE_TYPE.getPropertyName()));
String tableName = quoteIdentifier(unescapeHiveJdbcIdentifier(conf.get(Constants.JDBC_TABLE)), dbType);
if (tableName == null) {
return null;
}
String schemaName = conf.get(Constants.JDBC_SCHEMA);
String schemaName = quoteIdentifier(unescapeHiveJdbcIdentifier(conf.get(Constants.JDBC_SCHEMA)), dbType);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we always quote the schema/table name then we are making all code relying on this method case-sensitive. In other words, if previously the user had defined "hive.sql.table" = "Country" then queries over this table may stop working due to case sensitivity. Quoting everything seems to be a breaking change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method getQualifiedTableName is used to build queries that are run in the backend db. We have to make sure to quote the table and schema names here, if we want to support case sensitivity. Currently, if the user defines "hive.sql.table" = "Country", the table name in the query that is run in the backend db is "essentially" country. And if we quote all the time we will be changing the table name to Country, which might break existing jobs.

I think it's easy to check if the table and schema names were "escaped" by the user before we "unescape" and quote. If they were not escaped, we could skip it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Conditional escaping is a good idea. I have the impression though that we may also need to make quoting conditional. For the conditional we have various options:

  • Based on the content/value of the hive.sql.table property as proposed above
  • Based on a new global config property (e.g., hive.jdbc.identifiers.casesensitive)
  • Based on table level property (e.g., hive.sql.identifiers.casesensitive)

Not sure whats the best option to move forward. Maybe we can take some inspiration by checking what other engines (Presto/Trino/Spark/etc) are doing.

return schemaName == null ? tableName : schemaName + "." + tableName;
}

Expand Down
16 changes: 12 additions & 4 deletions ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

package org.apache.hadoop.hive.ql.exec;

import static org.apache.hadoop.hive.conf.Constants.JDBC_USERNAME;
import static org.apache.hadoop.hive.serde.serdeConstants.STRING_TYPE_NAME;

import java.io.ByteArrayOutputStream;
Expand Down Expand Up @@ -74,6 +75,7 @@
import org.apache.hadoop.hive.ql.plan.ExplainWork;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PlanUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.TezWork;
import org.apache.hadoop.hive.ql.plan.api.StageType;
Expand Down Expand Up @@ -107,6 +109,7 @@ public class ExplainTask extends Task<ExplainWork> implements Serializable {
private static final String CBO_INFO_JSON_LABEL = "cboInfo";
private static final String CBO_PLAN_JSON_LABEL = "CBOPlan";
private static final String CBO_PLAN_TEXT_LABEL = "CBO PLAN:";
public static final String JOB_PROPERTIES = "jobProperties";
private final Map<Operator<?>, Integer> operatorVisits = new HashMap<>();
private boolean isLogical = false;

Expand Down Expand Up @@ -1132,15 +1135,20 @@ public JSONObject outputPlan(Object work, PrintStream out,
}

// Try this as a map
if (val instanceof Map) {
// Go through the map and print out the stuff
Map<?, ?> mp = (Map<?, ?>) val;
if (val instanceof Map<?, ?> mp) {

// Remove JDBC username from job properties
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we removing the username? How is this related to the sensitivity of identifiers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because we don't want to print usernames in explain plans. This is explained in #6197 (comment) and #6197 (comment)

// Not handled in TableDesc because getJobProperties() is used in other places too.
if (header.strip().contains(JOB_PROPERTIES)) {
mp = PlanUtils.getPropertiesForExplain((Map<String, String>) mp, JDBC_USERNAME);
}

if (out != null && !skipHeader && mp != null && !mp.isEmpty()) {
if (out != null && !skipHeader && !mp.isEmpty()) {
out.print(header);
}

JSONObject jsonOut = outputMap(mp, !skipHeader && !emptyHeader, out, extended, jsonOutput, ind);
// Go through the map and print out the stuff
if (jsonOutput && !mp.isEmpty()) {
json.put(header, jsonOut);
}
Expand Down
Loading