@@ -81,13 +81,48 @@ impl std::fmt::Display for ObjectStoreUrl {
8181 }
8282}
8383
84- /// Object store provider can detector an object store based on the url
84+ /// Provides a mechanism for lazy, on-demand creation of [`ObjectStore`]
85+ ///
86+ /// See [`ObjectStoreRegistry::new_with_provider`]
8587pub trait ObjectStoreProvider : Send + Sync + ' static {
86- /// Return an ObjectStore for the provided url based on its scheme and authority
88+ /// Return an ObjectStore for the provided url, called by [`ObjectStoreRegistry::get_by_url`]
89+ /// when no matching store has already been registered. The result will be cached based
90+ /// on its schema and authority. Any error will be returned to the caller
8791 fn get_by_url ( & self , url : & Url ) -> Result < Arc < dyn ObjectStore > > ;
8892}
8993
90- /// Object store registry
94+ /// [`ObjectStoreRegistry`] stores [`ObjectStore`] keyed by url scheme and authority, that is
95+ /// the part of a URL preceding the path
96+ ///
97+ /// This is used by DataFusion to find an appropriate [`ObjectStore`] for a [`ListingTableUrl`]
98+ /// provided in a query such as
99+ ///
100+ /// ```sql
101+ /// create external table unicorns stored as parquet location 's3://my_bucket/lineitem/';
102+ /// ```
103+ ///
104+ /// In this particular case the url `s3://my_bucket/lineitem/` will be provided to
105+ /// [`ObjectStoreRegistry::get_by_url`] and one of three things will happen:
106+ ///
107+ /// - If an [`ObjectStore`] has been registered with [`ObjectStoreRegistry::register_store`] with
108+ /// scheme `s3` and host `my_bucket`, this [`ObjectStore`] will be returned
109+ ///
110+ /// - If an [`ObjectStoreProvider`] has been associated with this [`ObjectStoreRegistry`] using
111+ /// [`ObjectStoreRegistry::new_with_provider`], [`ObjectStoreProvider::get_by_url`] will be invoked,
112+ /// and the returned [`ObjectStore`] registered on this [`ObjectStoreRegistry`]. Any error will
113+ /// be returned to the caller
114+ ///
115+ /// - Otherwise an error will be returned, indicating that no suitable [`ObjectStore`] could
116+ /// be found
117+ ///
118+ /// This allows for two different use-cases:
119+ ///
120+ /// * DBMS systems where object store buckets are explicitly created using DDL, can register these
121+ /// buckets using [`ObjectStoreRegistry::register_store`]
122+ /// * DMBS systems relying on ad-hoc discovery, without corresponding DDL, can create [`ObjectStore`]
123+ /// lazily, on-demand using [`ObjectStoreProvider`]
124+ ///
125+ /// [`ListingTableUrl`]: crate::datasource::listing::ListingTableUrl
91126pub struct ObjectStoreRegistry {
92127 /// A map from scheme to object store that serve list / read operations for the store
93128 object_stores : RwLock < HashMap < String , Arc < dyn ObjectStore > > > ,
@@ -112,13 +147,19 @@ impl Default for ObjectStoreRegistry {
112147}
113148
114149impl ObjectStoreRegistry {
115- /// By default the self detector is None
150+ /// Create an [`ObjectStoreRegistry`] with no [`ObjectStoreProvider`].
151+ ///
152+ /// This will register [`LocalFileSystem`] to handle `file://` paths, further stores
153+ /// will need to be explicitly registered with calls to [`ObjectStoreRegistry::register_store`]
116154 pub fn new ( ) -> Self {
117155 ObjectStoreRegistry :: new_with_provider ( None )
118156 }
119157
120- /// Create the registry that object stores can registered into.
121- /// ['LocalFileSystem'] store is registered in by default to support read local files natively.
158+ /// Create an [`ObjectStoreRegistry`] with the provided [`ObjectStoreProvider`]
159+ ///
160+ /// This will register [`LocalFileSystem`] to handle `file://` paths, further stores
161+ /// may be explicity registered with calls to [`ObjectStoreRegistry::register_store`] or
162+ /// created lazily, on-demand by the provided [`ObjectStoreProvider`]
122163 pub fn new_with_provider ( provider : Option < Arc < dyn ObjectStoreProvider > > ) -> Self {
123164 let mut map: HashMap < String , Arc < dyn ObjectStore > > = HashMap :: new ( ) ;
124165 map. insert ( "file://" . to_string ( ) , Arc :: new ( LocalFileSystem :: new ( ) ) ) ;
@@ -129,7 +170,8 @@ impl ObjectStoreRegistry {
129170 }
130171
131172 /// Adds a new store to this registry.
132- /// If a store of the same prefix existed before, it is replaced in the registry and returned.
173+ ///
174+ /// If a store with the same schema and host existed before, it is replaced and returned
133175 pub fn register_store (
134176 & self ,
135177 scheme : impl AsRef < str > ,
0 commit comments