Skip to content

Commit 0ed1706

Browse files
epgifalamb
authored andcommitted
feat: add SchemaProvider::table_type(table_name: &str) (apache#16401)
* feat: add SchemaProvider::table_type(table_name: &str) InformationSchemaConfig::make_tables only needs the TableType not the whole TableProvider, and the former may require an expensive catalog operation to construct and the latter may not. This allows avoiding `SELECT * FROM information_schema.tables` having to make 1 of those potentially expensive operations per table. * test: new InformationSchemaConfig::make_tables behavior * Move tests to same file to fix CI --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent 0b003c3 commit 0ed1706

File tree

2 files changed

+102
-2
lines changed

2 files changed

+102
-2
lines changed

datafusion/catalog/src/information_schema.rs

Lines changed: 93 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,12 +102,14 @@ impl InformationSchemaConfig {
102102
// schema name may not exist in the catalog, so we need to check
103103
if let Some(schema) = catalog.schema(&schema_name) {
104104
for table_name in schema.table_names() {
105-
if let Some(table) = schema.table(&table_name).await? {
105+
if let Some(table_type) =
106+
schema.table_type(&table_name).await?
107+
{
106108
builder.add_table(
107109
&catalog_name,
108110
&schema_name,
109111
&table_name,
110-
table.table_type(),
112+
table_type,
111113
);
112114
}
113115
}
@@ -1348,3 +1350,92 @@ impl PartitionStream for InformationSchemaParameters {
13481350
))
13491351
}
13501352
}
1353+
1354+
#[cfg(test)]
1355+
mod tests {
1356+
use super::*;
1357+
use crate::CatalogProvider;
1358+
1359+
#[tokio::test]
1360+
async fn make_tables_uses_table_type() {
1361+
let config = InformationSchemaConfig {
1362+
catalog_list: Arc::new(Fixture),
1363+
};
1364+
let mut builder = InformationSchemaTablesBuilder {
1365+
catalog_names: StringBuilder::new(),
1366+
schema_names: StringBuilder::new(),
1367+
table_names: StringBuilder::new(),
1368+
table_types: StringBuilder::new(),
1369+
schema: Arc::new(Schema::empty()),
1370+
};
1371+
1372+
assert!(config.make_tables(&mut builder).await.is_ok());
1373+
1374+
assert_eq!("BASE TABLE", builder.table_types.finish().value(0));
1375+
}
1376+
1377+
#[derive(Debug)]
1378+
struct Fixture;
1379+
1380+
#[async_trait]
1381+
impl SchemaProvider for Fixture {
1382+
// InformationSchemaConfig::make_tables should use this.
1383+
async fn table_type(&self, _: &str) -> Result<Option<TableType>> {
1384+
Ok(Some(TableType::Base))
1385+
}
1386+
1387+
// InformationSchemaConfig::make_tables used this before `table_type`
1388+
// existed but should not, as it may be expensive.
1389+
async fn table(&self, _: &str) -> Result<Option<Arc<dyn TableProvider>>> {
1390+
panic!("InformationSchemaConfig::make_tables called SchemaProvider::table instead of table_type")
1391+
}
1392+
1393+
fn as_any(&self) -> &dyn Any {
1394+
unimplemented!("not required for these tests")
1395+
}
1396+
1397+
fn table_names(&self) -> Vec<String> {
1398+
vec!["atable".to_string()]
1399+
}
1400+
1401+
fn table_exist(&self, _: &str) -> bool {
1402+
unimplemented!("not required for these tests")
1403+
}
1404+
}
1405+
1406+
impl CatalogProviderList for Fixture {
1407+
fn as_any(&self) -> &dyn Any {
1408+
unimplemented!("not required for these tests")
1409+
}
1410+
1411+
fn register_catalog(
1412+
&self,
1413+
_: String,
1414+
_: Arc<dyn CatalogProvider>,
1415+
) -> Option<Arc<dyn CatalogProvider>> {
1416+
unimplemented!("not required for these tests")
1417+
}
1418+
1419+
fn catalog_names(&self) -> Vec<String> {
1420+
vec!["acatalog".to_string()]
1421+
}
1422+
1423+
fn catalog(&self, _: &str) -> Option<Arc<dyn CatalogProvider>> {
1424+
Some(Arc::new(Self))
1425+
}
1426+
}
1427+
1428+
impl CatalogProvider for Fixture {
1429+
fn as_any(&self) -> &dyn Any {
1430+
unimplemented!("not required for these tests")
1431+
}
1432+
1433+
fn schema_names(&self) -> Vec<String> {
1434+
vec!["aschema".to_string()]
1435+
}
1436+
1437+
fn schema(&self, _: &str) -> Option<Arc<dyn SchemaProvider>> {
1438+
Some(Arc::new(Self))
1439+
}
1440+
}
1441+
}

datafusion/catalog/src/schema.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use std::sync::Arc;
2626

2727
use crate::table::TableProvider;
2828
use datafusion_common::Result;
29+
use datafusion_expr::TableType;
2930

3031
/// Represents a schema, comprising a number of named tables.
3132
///
@@ -54,6 +55,14 @@ pub trait SchemaProvider: Debug + Sync + Send {
5455
name: &str,
5556
) -> Result<Option<Arc<dyn TableProvider>>, DataFusionError>;
5657

58+
/// Retrieves the type of a specific table from the schema by name, if it exists, otherwise
59+
/// returns `None`. Implementations for which this operation is cheap but [Self::table] is
60+
/// expensive can override this to improve operations that only need the type, e.g.
61+
/// `SELECT * FROM information_schema.tables`.
62+
async fn table_type(&self, name: &str) -> Result<Option<TableType>> {
63+
self.table(name).await.map(|o| o.map(|t| t.table_type()))
64+
}
65+
5766
/// If supported by the implementation, adds a new table named `name` to
5867
/// this schema.
5968
///

0 commit comments

Comments
 (0)