Skip to content

Commit ea3dbb6

Browse files
authored
Document ObjectStoreProvider (apache#3619)
1 parent 7325b83 commit ea3dbb6

File tree

1 file changed

+49
-7
lines changed

1 file changed

+49
-7
lines changed

datafusion/core/src/datasource/object_store.rs

Lines changed: 49 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,13 +81,48 @@ impl std::fmt::Display for ObjectStoreUrl {
8181
}
8282
}
8383

84-
/// Object store provider can detector an object store based on the url
84+
/// Provides a mechanism for lazy, on-demand creation of [`ObjectStore`]
85+
///
86+
/// See [`ObjectStoreRegistry::new_with_provider`]
8587
pub trait ObjectStoreProvider: Send + Sync + 'static {
86-
/// Return an ObjectStore for the provided url based on its scheme and authority
88+
/// Return an ObjectStore for the provided url, called by [`ObjectStoreRegistry::get_by_url`]
89+
/// when no matching store has already been registered. The result will be cached based
90+
/// on its schema and authority. Any error will be returned to the caller
8791
fn get_by_url(&self, url: &Url) -> Result<Arc<dyn ObjectStore>>;
8892
}
8993

90-
/// Object store registry
94+
/// [`ObjectStoreRegistry`] stores [`ObjectStore`] keyed by url scheme and authority, that is
95+
/// the part of a URL preceding the path
96+
///
97+
/// This is used by DataFusion to find an appropriate [`ObjectStore`] for a [`ListingTableUrl`]
98+
/// provided in a query such as
99+
///
100+
/// ```sql
101+
/// create external table unicorns stored as parquet location 's3://my_bucket/lineitem/';
102+
/// ```
103+
///
104+
/// In this particular case the url `s3://my_bucket/lineitem/` will be provided to
105+
/// [`ObjectStoreRegistry::get_by_url`] and one of three things will happen:
106+
///
107+
/// - If an [`ObjectStore`] has been registered with [`ObjectStoreRegistry::register_store`] with
108+
/// scheme `s3` and host `my_bucket`, this [`ObjectStore`] will be returned
109+
///
110+
/// - If an [`ObjectStoreProvider`] has been associated with this [`ObjectStoreRegistry`] using
111+
/// [`ObjectStoreRegistry::new_with_provider`], [`ObjectStoreProvider::get_by_url`] will be invoked,
112+
/// and the returned [`ObjectStore`] registered on this [`ObjectStoreRegistry`]. Any error will
113+
/// be returned to the caller
114+
///
115+
/// - Otherwise an error will be returned, indicating that no suitable [`ObjectStore`] could
116+
/// be found
117+
///
118+
/// This allows for two different use-cases:
119+
///
120+
/// * DBMS systems where object store buckets are explicitly created using DDL, can register these
121+
/// buckets using [`ObjectStoreRegistry::register_store`]
122+
/// * DMBS systems relying on ad-hoc discovery, without corresponding DDL, can create [`ObjectStore`]
123+
/// lazily, on-demand using [`ObjectStoreProvider`]
124+
///
125+
/// [`ListingTableUrl`]: crate::datasource::listing::ListingTableUrl
91126
pub struct ObjectStoreRegistry {
92127
/// A map from scheme to object store that serve list / read operations for the store
93128
object_stores: RwLock<HashMap<String, Arc<dyn ObjectStore>>>,
@@ -112,13 +147,19 @@ impl Default for ObjectStoreRegistry {
112147
}
113148

114149
impl ObjectStoreRegistry {
115-
/// By default the self detector is None
150+
/// Create an [`ObjectStoreRegistry`] with no [`ObjectStoreProvider`].
151+
///
152+
/// This will register [`LocalFileSystem`] to handle `file://` paths, further stores
153+
/// will need to be explicitly registered with calls to [`ObjectStoreRegistry::register_store`]
116154
pub fn new() -> Self {
117155
ObjectStoreRegistry::new_with_provider(None)
118156
}
119157

120-
/// Create the registry that object stores can registered into.
121-
/// ['LocalFileSystem'] store is registered in by default to support read local files natively.
158+
/// Create an [`ObjectStoreRegistry`] with the provided [`ObjectStoreProvider`]
159+
///
160+
/// This will register [`LocalFileSystem`] to handle `file://` paths, further stores
161+
/// may be explicity registered with calls to [`ObjectStoreRegistry::register_store`] or
162+
/// created lazily, on-demand by the provided [`ObjectStoreProvider`]
122163
pub fn new_with_provider(provider: Option<Arc<dyn ObjectStoreProvider>>) -> Self {
123164
let mut map: HashMap<String, Arc<dyn ObjectStore>> = HashMap::new();
124165
map.insert("file://".to_string(), Arc::new(LocalFileSystem::new()));
@@ -129,7 +170,8 @@ impl ObjectStoreRegistry {
129170
}
130171

131172
/// Adds a new store to this registry.
132-
/// If a store of the same prefix existed before, it is replaced in the registry and returned.
173+
///
174+
/// If a store with the same schema and host existed before, it is replaced and returned
133175
pub fn register_store(
134176
&self,
135177
scheme: impl AsRef<str>,

0 commit comments

Comments
 (0)