Skip to content

Commit

Permalink
[fix](external catalog) Fix missing fields when rebuilding metadata f…
Browse files Browse the repository at this point in the history
…rom image (apache#47603)

### What problem does this PR solve?

Issue Number: close #xxx

Related PR: apache#41510

Problem Summary:

In PR apache#41510, we added several fields to External Catalog. However, we
only handled the upgrade scenario for EditLog but not for Image. This
causes Catalogs rebuilt from Image to miss these fields, resulting in
NullPointerException during queries. This PR fixes this issue.

Specifically:
1. Added null check and initialization for fields in gsonPostProcess
2. Ensured consistent behavior between EditLog replay and Image
deserialization
3. Added proper logging for better troubleshooting
  • Loading branch information
zy-kkk authored and lzyy2024 committed Feb 21, 2025
1 parent 6cd1185 commit b4b087d
Show file tree
Hide file tree
Showing 4 changed files with 66 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -399,7 +399,7 @@ private void init() {
db.setRemoteName(remoteDbName);
}
tmpIdToDb.put(dbId, db);
initCatalogLog.addRefreshDb(dbId);
initCatalogLog.addRefreshDb(dbId, remoteDbName);
} else {
dbId = Env.getCurrentEnv().getNextId();
tmpDbNameToId.put(localDbName, dbId);
Expand Down Expand Up @@ -734,8 +734,12 @@ private void removeAccessController() {
}

public void replayInitCatalog(InitCatalogLog log) {
// If the remote name is missing during upgrade, all databases in the Map will be reinitialized.
if (log.getCreateCount() > 0 && (log.getRemoteDbNames() == null || log.getRemoteDbNames().isEmpty())) {
// If the remote name is missing during upgrade, or
// the refresh db's remote name is empty,
// all databases in the Map will be reinitialized.
if ((log.getCreateCount() > 0 && (log.getRemoteDbNames() == null || log.getRemoteDbNames().isEmpty()))
|| (log.getRefreshCount() > 0
&& (log.getRefreshRemoteDbNames() == null || log.getRefreshRemoteDbNames().isEmpty()))) {
dbNameToId = Maps.newConcurrentMap();
idToDb = Maps.newConcurrentMap();
lastUpdateTime = log.getLastUpdateTime();
Expand All @@ -755,6 +759,7 @@ public void replayInitCatalog(InitCatalogLog log) {
log.getRefreshDbIds().get(i), name);
continue;
}
db.get().setRemoteName(log.getRefreshRemoteDbNames().get(i));
Preconditions.checkNotNull(db.get());
tmpDbNameToId.put(db.get().getFullName(), db.get().getId());
tmpIdToDb.put(db.get().getId(), db.get());
Expand All @@ -771,6 +776,18 @@ public void replayInitCatalog(InitCatalogLog log) {
db.getFullName(), db.getId(), log.getRemoteDbNames().get(i));
}
}
// Check whether the remoteName of db in tmpIdToDb is empty
for (ExternalDatabase<? extends ExternalTable> db : tmpIdToDb.values()) {
if (Strings.isNullOrEmpty(db.getRemoteName())) {
LOG.info("Database [{}] remoteName is empty in catalog [{}], mark as uninitialized",
db.getFullName(), name);
dbNameToId = Maps.newConcurrentMap();
idToDb = Maps.newConcurrentMap();
lastUpdateTime = log.getLastUpdateTime();
initialized = false;
return;
}
}
dbNameToId = tmpDbNameToId;
idToDb = tmpIdToDb;
lastUpdateTime = log.getLastUpdateTime();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,9 @@ public final synchronized void makeSureInitialized() {

public void replayInitDb(InitDatabaseLog log, ExternalCatalog catalog) {
// If the remote name is missing during upgrade, all tables in the Map will be reinitialized.
if (log.getCreateCount() > 0 && (log.getRemoteTableNames() == null || log.getRemoteTableNames().isEmpty())) {
if ((log.getCreateCount() > 0 && (log.getRemoteTableNames() == null || log.getRemoteTableNames().isEmpty()))
|| (log.getRefreshCount() > 0
&& (log.getRefreshRemoteTableNames() == null || log.getRefreshRemoteTableNames().isEmpty()))) {
tableNameToId = Maps.newConcurrentMap();
idToTbl = Maps.newConcurrentMap();
lastUpdateTime = log.getLastUpdateTime();
Expand All @@ -209,6 +211,7 @@ public void replayInitDb(InitDatabaseLog log, ExternalCatalog catalog) {
// So we need add a validation here to avoid table(s) not found, this is just a temporary solution
// because later we will remove all the logics about InitCatalogLog/InitDatabaseLog.
if (table.isPresent()) {
table.get().setRemoteName(log.getRefreshRemoteTableNames().get(i));
tmpTableNameToId.put(table.get().getName(), table.get().getId());
tmpIdToTbl.put(table.get().getId(), table.get());

Expand All @@ -234,6 +237,19 @@ public void replayInitDb(InitDatabaseLog log, ExternalCatalog catalog) {
LOG.info("Synchronized table (create): [Name: {}, ID: {}, Remote Name: {}]",
table.getName(), table.getId(), log.getRemoteTableNames().get(i));
}
// Check whether the remoteName and db Tbl db in idToTbl is empty
for (T table : idToTbl.values()) {
if (Strings.isNullOrEmpty(table.getRemoteName())
|| table.getDb() == null) {
LOG.info("Table [{}] remoteName or database is empty, mark as uninitialized",
table.getName());
tableNameToId = Maps.newConcurrentMap();
idToTbl = Maps.newConcurrentMap();
lastUpdateTime = log.getLastUpdateTime();
initialized = false;
return;
}
}
tableNameToId = tmpTableNameToId;
idToTbl = tmpIdToTbl;
lastUpdateTime = log.getLastUpdateTime();
Expand Down Expand Up @@ -266,7 +282,7 @@ private void init() {
table.setDb(this);
}
tmpIdToTbl.put(tblId, table);
initDatabaseLog.addRefreshTable(tblId);
initDatabaseLog.addRefreshTable(tblId, remoteTableName);
} else {
tblId = Env.getCurrentEnv().getNextId();
tmpTableNameToId.put(localTableName, tblId);
Expand Down Expand Up @@ -620,14 +636,22 @@ public void gsonPostProcess() throws IOException {
case "ExternalInfoSchemaTable":
ExternalInfoSchemaTable infoSchemaTable = GsonUtils.GSON.fromJson(GsonUtils.GSON.toJson(obj),
ExternalInfoSchemaTable.class);
if (infoSchemaTable.getDb() == null) {
infoSchemaTable.setDb(this);
}
tmpIdToTbl.put(infoSchemaTable.getId(), (T) infoSchemaTable);
tableNameToId.put(infoSchemaTable.getName(), infoSchemaTable.getId());
lowerCaseToTableName.put(infoSchemaTable.getName().toLowerCase(), infoSchemaTable.getName());
break;
case "ExternalMysqlTable":
ExternalMysqlTable mysqlTable = GsonUtils.GSON.fromJson(GsonUtils.GSON.toJson(obj),
ExternalMysqlTable.class);
if (mysqlTable.getDb() == null) {
mysqlTable.setDb(this);
}
tmpIdToTbl.put(mysqlTable.getId(), (T) mysqlTable);
tableNameToId.put(mysqlTable.getName(), mysqlTable.getId());
lowerCaseToTableName.put(mysqlTable.getName().toLowerCase(), mysqlTable.getName());
break;
default:
break;
Expand All @@ -640,6 +664,14 @@ public void gsonPostProcess() throws IOException {
((ExternalTable) obj).getName());
}
}
// Check whether the remoteName and db Tbl db in idToTbl is empty
for (T table : idToTbl.values()) {
if (Strings.isNullOrEmpty(table.getRemoteName())
|| table.getDb() == null) {
initialized = false;
break;
}
}
idToTbl = tmpIdToTbl;
rwLock = new MonitoredReentrantReadWriteLock(true);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ public enum Type {
@SerializedName(value = "refreshDbIds")
private List<Long> refreshDbIds;

@SerializedName(value = "refreshRemoteDbNames")
private List<String> refreshRemoteDbNames;

@SerializedName(value = "createDbIds")
private List<Long> createDbIds;

Expand All @@ -78,15 +81,17 @@ public InitCatalogLog() {
createCount = 0;
catalogId = 0;
refreshDbIds = Lists.newArrayList();
refreshRemoteDbNames = Lists.newArrayList();
createDbIds = Lists.newArrayList();
createDbNames = Lists.newArrayList();
remoteDbNames = Lists.newArrayList();
type = Type.UNKNOWN;
}

public void addRefreshDb(long id) {
public void addRefreshDb(long id, String remoteName) {
refreshCount += 1;
refreshDbIds.add(id);
refreshRemoteDbNames.add(remoteName);
}

public void addCreateDb(long id, String name, String remoteName) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ public enum Type {
@SerializedName(value = "refreshTableIds")
private List<Long> refreshTableIds;

@SerializedName(value = "refreshRemoteTableNames")
private List<String> refreshRemoteTableNames;

@SerializedName(value = "createTableIds")
private List<Long> createTableIds;

Expand All @@ -83,15 +86,17 @@ public InitDatabaseLog() {
catalogId = 0;
dbId = 0;
refreshTableIds = Lists.newArrayList();
refreshRemoteTableNames = Lists.newArrayList();
createTableIds = Lists.newArrayList();
createTableNames = Lists.newArrayList();
remoteTableNames = Lists.newArrayList();
type = Type.UNKNOWN;
}

public void addRefreshTable(long id) {
public void addRefreshTable(long id, String remoteName) {
refreshCount += 1;
refreshTableIds.add(id);
refreshRemoteTableNames.add(remoteName);
}

public void addCreateTable(long id, String name, String remoteName) {
Expand Down

0 comments on commit b4b087d

Please sign in to comment.