Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ORC, Hive: Fix column projection when reading ORC files and orc.force.positional.evolution is set to true on the default configuration #2111

Merged
merged 2 commits into from
Jan 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,26 @@ public class TestHiveMetastore {
private HiveClientPool clientPool;

/**
* Starts a TestHiveMetastore with the default connection pool size (5).
* Starts a TestHiveMetastore with the default connection pool size (5) and the default HiveConf.
*/
public void start() {
start(DEFAULT_POOL_SIZE);
start(new HiveConf(new Configuration(), TestHiveMetastore.class), DEFAULT_POOL_SIZE);
}

/**
* Starts a TestHiveMetastore with a provided connection pool size.
* Starts a TestHiveMetastore with the default connection pool size (5) with the provided HiveConf.
* @param hiveConf The hive configuration to use
*/
public void start(HiveConf conf) {
start(conf, DEFAULT_POOL_SIZE);
}

/**
* Starts a TestHiveMetastore with a provided connection pool size and HiveConf.
* @param hiveConf The hive configuration to use
* @param poolSize The number of threads in the executor pool
*/
public void start(int poolSize) {
public void start(HiveConf conf, int poolSize) {
try {
this.hiveLocalDir = createTempDirectory("hive", asFileAttribute(fromString("rwxrwxrwx"))).toFile();
File derbyLogFile = new File(hiveLocalDir, "derby.log");
Expand All @@ -106,7 +115,9 @@ public void start(int poolSize) {

TServerSocket socket = new TServerSocket(0);
int port = socket.getServerSocket().getLocalPort();
this.hiveConf = newHiveConf(port);
initConf(conf, port);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: if initConf returned the configuration, this could be one line still: this.hiveConf = initConf(conf, port);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Before the change the configuration was created in newHiveConf(). The new method changes values in the conf parameter. The change meant to emphasize that we are not creating a new conf, only changing values in the one got in the parameter list.

Shall we move back to create a new config and initialize that?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If conf is passed in, I think the change to initConf makes sense. I just think the patch could be slightly smaller. Not a blocker, though.


this.hiveConf = conf;
this.server = newThriftServer(socket, poolSize, hiveConf);
this.executorService = Executors.newSingleThreadExecutor();
this.executorService.submit(() -> server.serve());
Expand Down Expand Up @@ -196,14 +207,12 @@ private TServer newThriftServer(TServerSocket socket, int poolSize, HiveConf con
return new TThreadPoolServer(args);
}

private HiveConf newHiveConf(int port) {
HiveConf newHiveConf = new HiveConf(new Configuration(), TestHiveMetastore.class);
newHiveConf.set(HiveConf.ConfVars.METASTOREURIS.varname, "thrift://localhost:" + port);
newHiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:" + hiveLocalDir.getAbsolutePath());
newHiveConf.set(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname, "false");
newHiveConf.set(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, "false");
newHiveConf.set("iceberg.hive.client-pool-size", "2");
return newHiveConf;
private void initConf(HiveConf conf, int port) {
conf.set(HiveConf.ConfVars.METASTOREURIS.varname, "thrift://localhost:" + port);
conf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, "file:" + hiveLocalDir.getAbsolutePath());
conf.set(HiveConf.ConfVars.METASTORE_TRY_DIRECT_SQL.varname, "false");
conf.set(HiveConf.ConfVars.METASTORE_DISALLOW_INCOMPATIBLE_COL_TYPE_CHANGES.varname, "false");
conf.set("iceberg.hive.client-pool-size", "2");
}

private void setupMetastoreDB(String dbURL) throws SQLException, IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.iceberg.data.Record;
import org.apache.iceberg.mr.TestHelper;
import org.apache.iceberg.types.Types;
import org.apache.orc.OrcConf;
import org.junit.rules.TemporaryFolder;

import static org.apache.iceberg.types.Types.NestedField.optional;
Expand Down Expand Up @@ -61,6 +62,9 @@ static TestHiveShell shell() {
TestHiveShell shell = new TestHiveShell();
shell.setHiveConfValue("hive.notification.event.poll.interval", "-1");
shell.setHiveConfValue("hive.tez.exec.print.summary", "true");
// We would like to make sure that ORC reading overrides this config, so reading Iceberg tables could work in
// systems (like Hive 3.2 and higher) where this value is set to true explicitly.
shell.setHiveConfValue(OrcConf.FORCE_POSITIONAL_EVOLUTION.getHiveConfName(), "true");
shell.start();
return shell;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ public void setHiveSessionValue(String key, boolean value) {
}

public void start() {
metastore.start();
// Create a copy of the HiveConf for the metastore
metastore.start(new HiveConf(hs2Conf));
hs2Conf.setVar(HiveConf.ConfVars.METASTOREURIS, metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREURIS));
hs2Conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE,
metastore.hiveConf().getVar(HiveConf.ConfVars.METASTOREWAREHOUSE));
Expand Down
3 changes: 3 additions & 0 deletions orc/src/main/java/org/apache/iceberg/orc/ORC.java
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ private ReadBuilder(InputFile file) {
} else {
this.conf = new Configuration();
}

// We need to turn positional schema evolution off since we use column name based schema evolution for projection
this.conf.setBoolean(OrcConf.FORCE_POSITIONAL_EVOLUTION.getHiveConfName(), false);
}

/**
Expand Down