From 4ec21f6dc0760b056e9ecc10f33c5a40b40a3907 Mon Sep 17 00:00:00 2001 From: Calvin Min Date: Tue, 21 Oct 2025 11:43:57 -0400 Subject: [PATCH 1/2] v0.2.2 Doc Update --- CMakeLists.txt | 2 +- Doxyfile | 2 +- include/databricks/version.h | 4 ++-- vcpkg.json | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b41894d..03122c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.14) project(databricks_sdk - VERSION 0.2.0 + VERSION 0.2.2 DESCRIPTION "Databricks C++ SDK" LANGUAGES CXX) diff --git a/Doxyfile b/Doxyfile index 84d08a4..fe42f90 100644 --- a/Doxyfile +++ b/Doxyfile @@ -2,7 +2,7 @@ # Project information PROJECT_NAME = "Databricks C++ SDK" -PROJECT_NUMBER = "0.2.0" +PROJECT_NUMBER = "0.2.2" PROJECT_BRIEF = "Interact with Databricks via an SDK" OUTPUT_DIRECTORY = docs diff --git a/include/databricks/version.h b/include/databricks/version.h index 1d331e8..5d74d98 100644 --- a/include/databricks/version.h +++ b/include/databricks/version.h @@ -11,9 +11,9 @@ namespace databricks /** * @brief SDK version information */ - constexpr const char *VERSION = "0.2.0"; + constexpr const char *VERSION = "0.2.2"; constexpr int VERSION_MAJOR = 0; constexpr int VERSION_MINOR = 2; - constexpr int VERSION_PATCH = 0; + constexpr int VERSION_PATCH = 2; } // namespace databricks diff --git a/vcpkg.json b/vcpkg.json index 94c2260..5c9c8c6 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,6 +1,6 @@ { "name": "databricks-sdk-cpp", - "version": "0.2.0", + "version": "0.2.2", "description": "C++ SDK for Databricks, providing an interface for interacting with Databricks services via ODBC", "homepage": "https://github.com/calvinjmin/databricks-sdk-cpp", "license": "MIT", From feb334c39cae82652ce3ec3b37454acfef1258d4 Mon Sep 17 00:00:00 2001 From: Calvin Min <32576363+Calvinjmin@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:44:01 -0400 Subject: [PATCH 2/2] Feature/Unity Catalog API (#10) * Unity Catalog Boiler Plate * Unity Catalog Impl * PR Template Update on Versions * Doxyfile update * v0.2.3 --- .github/pull_request_template.md | 2 +- CMakeLists.txt | 6 +- Doxyfile | 10 +- README.md | 2 +- examples/CMakeLists.txt | 5 + examples/unity_catalog_example.cpp | 129 +++++ .../databricks/unity_catalog/unity_catalog.h | 218 +++++++++ .../unity_catalog/unity_catalog_types.h | 202 ++++++++ include/databricks/version.h | 4 +- src/internal/http_client.cpp | 5 +- src/internal/http_client.h | 3 +- src/unity_catalog/unity_catalog.cpp | 454 ++++++++++++++++++ src/unity_catalog/unity_catalog_types.cpp | 153 ++++++ vcpkg.json | 2 +- 14 files changed, 1181 insertions(+), 14 deletions(-) create mode 100644 examples/unity_catalog_example.cpp create mode 100644 include/databricks/unity_catalog/unity_catalog.h create mode 100644 include/databricks/unity_catalog/unity_catalog_types.h create mode 100644 src/unity_catalog/unity_catalog.cpp create mode 100644 src/unity_catalog/unity_catalog_types.cpp diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 62d67db..2153578 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -18,4 +18,4 @@ - [ ] Code builds successfully - [ ] Tests pass - [ ] Documentation updated (if needed) - +- [ ] Update version and tag diff --git a/CMakeLists.txt b/CMakeLists.txt index 03122c5..de38db0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.14) project(databricks_sdk - VERSION 0.2.2 + VERSION 0.2.3 DESCRIPTION "Databricks C++ SDK" LANGUAGES CXX) @@ -97,6 +97,8 @@ set(SOURCES src/compute/compute_types.cpp src/compute/compute.cpp src/connection_pool.cpp + src/unity_catalog/unity_catalog_types.cpp + src/unity_catalog/unity_catalog.cpp src/internal/pool_manager.cpp src/internal/logger.cpp src/internal/http_client.cpp @@ -110,6 +112,8 @@ set(HEADERS include/databricks/jobs/jobs.h include/databricks/compute/compute.h include/databricks/compute/compute_types.h + include/databricks/unity_catalog/unity_catalog.h + include/databricks/unity_catalog/unity_catalog_types.h ) # Internal headers (not installed) diff --git a/Doxyfile b/Doxyfile index fe42f90..dbf2784 100644 --- a/Doxyfile +++ b/Doxyfile @@ -2,16 +2,16 @@ # Project information PROJECT_NAME = "Databricks C++ SDK" -PROJECT_NUMBER = "0.2.2" +PROJECT_NUMBER = "0.2.3" PROJECT_BRIEF = "Interact with Databricks via an SDK" OUTPUT_DIRECTORY = docs # Input configuration -INPUT = include/databricks src README.md -FILE_PATTERNS = *.h *.cpp *.md +INPUT = include/databricks README.md +FILE_PATTERNS = *.h *.md RECURSIVE = YES -EXCLUDE = src/internal -EXCLUDE_PATTERNS = */build/* */cmake/* +EXCLUDE = +EXCLUDE_PATTERNS = */build/* */cmake/* */internal/* # Output formats GENERATE_HTML = YES diff --git a/README.md b/README.md index 1547364..f16f19a 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ A C++ SDK for Databricks, providing an interface for interacting with Databricks services. -**Latest Release**: [v0.2.2](https://github.com/calvinjmin/databricks-sdk-cpp/releases/tag/v0.2.2) +**Latest Release**: [v0.2.3](https://github.com/calvinjmin/databricks-sdk-cpp/releases/tag/v0.2.3) **Author**: Calvin Min (calvinjmin@gmail.com) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index ae28bb8..063ab16 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -12,11 +12,16 @@ target_link_libraries(jobs_example PRIVATE databricks_sdk) add_executable(compute_example compute_example.cpp) target_link_libraries(compute_example PRIVATE databricks_sdk) +# ========== Unity Catalog API Examples ========== +add_executable(unity_catalog_example unity_catalog_example.cpp) +target_link_libraries(unity_catalog_example PRIVATE databricks_sdk) + # Set RPATH for all examples to find ODBC libraries set_target_properties( simple_query jobs_example compute_example + unity_catalog_example PROPERTIES BUILD_RPATH "${CMAKE_BINARY_DIR};/opt/homebrew/lib;/usr/local/lib" INSTALL_RPATH "/opt/homebrew/lib;/usr/local/lib" diff --git a/examples/unity_catalog_example.cpp b/examples/unity_catalog_example.cpp new file mode 100644 index 0000000..1dd5188 --- /dev/null +++ b/examples/unity_catalog_example.cpp @@ -0,0 +1,129 @@ +/** + * @file unity_catalog_example.cpp + * @brief Example demonstrating the Databricks Unity Catalog API + * + * This example shows how to: + * 1. List all catalogs in your metastore + * 2. Get details for a specific catalog + * 3. List schemas in a catalog + * 4. List tables in a schema + */ + +#include "databricks/unity_catalog/unity_catalog.h" +#include "databricks/core/config.h" +#include +#include + +int main() { + try { + // Load configuration from environment + databricks::AuthConfig auth = databricks::AuthConfig::from_environment(); + + std::cout << "Connecting to: " << auth.host << std::endl; + std::cout << "======================================\n" << std::endl; + + // Create Unity Catalog API client (uses API version 2.1 by default) + databricks::UnityCatalog uc(auth); + + // =================================================================== + // Example 1: List all catalogs + // =================================================================== + std::cout << "1. Listing all catalogs:" << std::endl; + std::cout << "------------------------" << std::endl; + + auto catalogs = uc.list_catalogs(); + std::cout << "Found " << catalogs.size() << " catalogs:\n" << std::endl; + + for ( int i = 0; i < std::min(static_cast(catalogs.size()), 10); i++ ) { + const auto& catalog = catalogs[i]; + std::cout << " Catalog: " << catalog.name << std::endl; + std::cout << " Owner: " << catalog.owner << std::endl; + std::cout << " Type: " << catalog.catalog_type << std::endl; + std::cout << " Metastore: " << catalog.metastore_id << std::endl; + if (!catalog.comment.empty()) { + std::cout << " Comment: " << catalog.comment << std::endl; + } + std::cout << std::endl; + } + + // =================================================================== + // Example 2: Get details for a specific catalog + // =================================================================== + if (!catalogs.empty()) { + std::string catalog_name = catalogs[0].name; + + std::cout << "\n2. Getting details for catalog '" << catalog_name << "':" << std::endl; + std::cout << "-----------------------------------------------------" << std::endl; + + auto catalog_details = uc.get_catalog(catalog_name); + std::cout << " Name: " << catalog_details.name << std::endl; + std::cout << " Full Name: " << catalog_details.full_name << std::endl; + std::cout << " Owner: " << catalog_details.owner << std::endl; + std::cout << " Type: " << catalog_details.catalog_type << std::endl; + std::cout << " Created At: " << catalog_details.created_at << std::endl; + std::cout << " Updated At: " << catalog_details.updated_at << std::endl; + + if (!catalog_details.properties.empty()) { + std::cout << " Properties:" << std::endl; + for (const auto& [key, value] : catalog_details.properties) { + std::cout << " " << key << ": " << value << std::endl; + } + } + std::cout << std::endl; + + // =============================================================== + // Example 3: List schemas in the catalog + // =============================================================== + std::cout << "\n3. Listing schemas in catalog '" << catalog_name << "':" << std::endl; + std::cout << "-----------------------------------------------------------" << std::endl; + + auto schemas = uc.list_schemas(catalog_name); + std::cout << "Found " << schemas.size() << " schemas:\n" << std::endl; + + for (const auto& schema : schemas) { + std::cout << " Schema: " << schema.name << std::endl; + std::cout << " Full Name: " << schema.full_name << std::endl; + std::cout << " Owner: " << schema.owner << std::endl; + if (!schema.comment.empty()) { + std::cout << " Comment: " << schema.comment << std::endl; + } + std::cout << std::endl; + } + + // =============================================================== + // Example 4: List tables in the first schema + // =============================================================== + if (!schemas.empty()) { + std::string schema_name = schemas[0].name; + + std::cout << "\n4. Listing tables in '" << catalog_name << "." << schema_name << "':" << std::endl; + std::cout << "----------------------------------------------------------------" << std::endl; + + auto tables = uc.list_tables(catalog_name, schema_name); + std::cout << "Found " << tables.size() << " tables:\n" << std::endl; + + for (const auto& table : tables) { + std::cout << " Table: " << table.name << std::endl; + std::cout << " Full Name: " << table.full_name << std::endl; + std::cout << " Type: " << table.table_type << std::endl; + std::cout << " Format: " << table.data_source_format << std::endl; + std::cout << " Owner: " << table.owner << std::endl; + std::cout << " Columns: " << table.columns.size() << std::endl; + if (!table.comment.empty()) { + std::cout << " Comment: " << table.comment << std::endl; + } + std::cout << std::endl; + } + } + } + + std::cout << "\n======================================" << std::endl; + std::cout << "Unity Catalog API example completed successfully!" << std::endl; + + } catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + return 0; +} diff --git a/include/databricks/unity_catalog/unity_catalog.h b/include/databricks/unity_catalog/unity_catalog.h new file mode 100644 index 0000000..3a36687 --- /dev/null +++ b/include/databricks/unity_catalog/unity_catalog.h @@ -0,0 +1,218 @@ +#pragma once + +#include "databricks/core/config.h" +#include "databricks/unity_catalog/unity_catalog_types.h" + +#include +#include +#include + +namespace databricks { + // Forward declaration for dependency injection + namespace internal { + class IHttpClient; + } + + /** + * @brief Client for interacting with the Databricks Unity Catalog API + * + * Unity Catalog provides a unified governance solution for data and AI assets. + * This implementation uses Unity Catalog REST API 2.1. + * + * Example usage: + * @code + * databricks::AuthConfig auth = databricks::AuthConfig::from_environment(); + * databricks::UnityCatalog uc(auth); + * + * // List all catalogs + * auto catalogs = uc.list_catalogs(); + * + * // Get specific catalog details + * auto catalog = uc.get_catalog("main"); + * + * // Create a new catalog + * databricks::CreateCatalogRequest req; + * req.name = "my_catalog"; + * req.comment = "My data catalog"; + * uc.create_catalog(req); + * + * // List schemas in a catalog + * auto schemas = uc.list_schemas("main"); + * + * // List tables in a schema + * auto tables = uc.list_tables("main", "default"); + * @endcode + */ + class UnityCatalog { + public: + /** + * @brief Construct a Unity Catalog API client + * @param auth Authentication configuration with host and token + * @param api_version Unity Catalog API version to use (default: "2.1") + */ + explicit UnityCatalog(const AuthConfig& auth, const std::string& api_version = "2.1"); + + /** + * @brief Construct a Unity Catalog API client with dependency injection (for testing) + * @param http_client Injected HTTP client (use MockHttpClient for unit tests) + * @note This constructor is primarily for testing with mock HTTP clients + */ + explicit UnityCatalog(std::shared_ptr http_client); + + /** + * @brief Destructor + */ + ~UnityCatalog(); + + // Disable copy + UnityCatalog(const UnityCatalog&) = delete; + UnityCatalog& operator=(const UnityCatalog&) = delete; + + // ==================== CATALOG OPERATIONS ==================== + + /** + * @brief List all catalogs in the metastore + * + * @return Vector of CatalogInfo objects + * @throws std::runtime_error if the API request fails + */ + std::vector list_catalogs(); + + /** + * @brief Get detailed information about a specific catalog + * + * @param catalog_name The name of the catalog + * @return CatalogInfo object with full details + * @throws std::runtime_error if the catalog is not found or the API request fails + */ + CatalogInfo get_catalog(const std::string& catalog_name); + + /** + * @brief Create a new catalog + * + * @param request Configuration for the new catalog + * @return CatalogInfo object representing the created catalog + * @throws std::runtime_error if the API request fails + */ + CatalogInfo create_catalog(const CreateCatalogRequest& request); + + /** + * @brief Update an existing catalog + * + * @param request Configuration for updating the catalog + * @return CatalogInfo object representing the updated catalog + * @throws std::runtime_error if the API request fails + */ + CatalogInfo update_catalog(const UpdateCatalogRequest& request); + + /** + * @brief Delete a catalog + * + * @param catalog_name The name of the catalog to delete + * @param force If true, deletes the catalog even if it's not empty + * @return true if the operation was successful + * @throws std::runtime_error if the API request fails + * + * @note By default, you cannot delete a catalog that contains schemas. + * Set force=true to delete a catalog and all its contents. + */ + bool delete_catalog(const std::string& catalog_name, bool force = false); + + // ==================== SCHEMA OPERATIONS ==================== + + /** + * @brief List all schemas in a catalog + * + * @param catalog_name The name of the catalog + * @return Vector of SchemaInfo objects + * @throws std::runtime_error if the API request fails + */ + std::vector list_schemas(const std::string& catalog_name); + + /** + * @brief Get detailed information about a specific schema + * + * @param full_name The full name of the schema (catalog.schema) + * @return SchemaInfo object with full details + * @throws std::runtime_error if the schema is not found or the API request fails + */ + SchemaInfo get_schema(const std::string& full_name); + + /** + * @brief Create a new schema + * + * @param request Configuration for the new schema + * @return SchemaInfo object representing the created schema + * @throws std::runtime_error if the API request fails + */ + SchemaInfo create_schema(const CreateSchemaRequest& request); + + /** + * @brief Update an existing schema + * + * @param request Configuration for updating the schema + * @return SchemaInfo object representing the updated schema + * @throws std::runtime_error if the API request fails + */ + SchemaInfo update_schema(const UpdateSchemaRequest& request); + + /** + * @brief Delete a schema + * + * @param full_name The full name of the schema to delete (catalog.schema) + * @return true if the operation was successful + * @throws std::runtime_error if the API request fails + * + * @note The schema must be empty (no tables) before deletion + */ + bool delete_schema(const std::string& full_name); + + // ==================== TABLE OPERATIONS ==================== + + /** + * @brief List all tables in a schema + * + * @param catalog_name The name of the catalog + * @param schema_name The name of the schema + * @return Vector of TableInfo objects + * @throws std::runtime_error if the API request fails + */ + std::vector list_tables(const std::string& catalog_name, + const std::string& schema_name); + + /** + * @brief Get detailed information about a specific table + * + * @param full_name The full name of the table (catalog.schema.table) + * @return TableInfo object with full details + * @throws std::runtime_error if the table is not found or the API request fails + */ + TableInfo get_table(const std::string& full_name); + + /** + * @brief Delete a table + * + * @param full_name The full name of the table to delete (catalog.schema.table) + * @return true if the operation was successful + * @throws std::runtime_error if the API request fails + * + * @note For managed tables, this also deletes the underlying data. + * For external tables, only the metadata is deleted. + */ + bool delete_table(const std::string& full_name); + + private: + class Impl; + std::unique_ptr pimpl_; + + // Parsing methods + static CatalogInfo parse_catalog(const std::string& json_str); + static std::vector parse_catalog_list(const std::string& json_str); + static SchemaInfo parse_schema(const std::string& json_str); + static std::vector parse_schema_list(const std::string& json_str); + static TableInfo parse_table(const std::string& json_str); + static std::vector parse_table_list(const std::string& json_str); + static ColumnInfo parse_column(const std::string& json_str); + }; + +} // namespace databricks diff --git a/include/databricks/unity_catalog/unity_catalog_types.h b/include/databricks/unity_catalog/unity_catalog_types.h new file mode 100644 index 0000000..b7428c1 --- /dev/null +++ b/include/databricks/unity_catalog/unity_catalog_types.h @@ -0,0 +1,202 @@ +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace databricks { + + /** + * @brief Enumeration of catalog types + */ + enum class CatalogTypeEnum { + MANAGED_CATALOG, ///< Databricks-managed catalog + EXTERNAL_CATALOG, ///< External catalog (e.g., AWS Glue, Azure) + SYSTEM_CATALOG, ///< System catalog + UNKNOWN ///< Unknown catalog type + }; + + /** + * @brief Parse a catalog type string into CatalogTypeEnum + * @param type_str String representation of the catalog type + * @return CatalogTypeEnum corresponding to the string + */ + CatalogTypeEnum parse_catalog_type(const std::string& type_str); + + /** + * @brief Convert CatalogTypeEnum to string representation + * @param type CatalogTypeEnum value + * @return String representation of the catalog type + */ + std::string catalog_type_to_string(CatalogTypeEnum type); + + /** + * @brief Enumeration of table types + */ + enum class TableTypeEnum { + MANAGED, ///< Managed table + EXTERNAL, ///< External table + VIEW, ///< View + MATERIALIZED_VIEW, ///< Materialized view + STREAMING_TABLE, ///< Streaming table + UNKNOWN ///< Unknown table type + }; + + /** + * @brief Parse a table type string into TableTypeEnum + * @param type_str String representation of the table type + * @return TableTypeEnum corresponding to the string + */ + TableTypeEnum parse_table_type(const std::string& type_str); + + /** + * @brief Convert TableTypeEnum to string representation + * @param type TableTypeEnum value + * @return String representation of the table type + */ + std::string table_type_to_string(TableTypeEnum type); + + /** + * @brief Represents a Unity Catalog catalog + * + * Catalogs are the top-level container for organizing data objects in Unity Catalog. + */ + struct CatalogInfo { + std::string name; ///< Name of the catalog + std::string comment; ///< User-provided description + std::string owner; ///< Owner of the catalog + std::string catalog_type; ///< Type of catalog (MANAGED_CATALOG, etc.) + uint64_t created_at = 0; ///< Unix timestamp in milliseconds when created + uint64_t updated_at = 0; ///< Unix timestamp in milliseconds when last updated + std::string metastore_id; ///< ID of the metastore containing this catalog + std::string full_name; ///< Full name of the catalog + std::map properties; ///< Catalog properties/metadata + std::optional storage_root; ///< Storage root location (for external catalogs) + std::optional storage_location; ///< Storage location (for managed catalogs) + }; + + /** + * @brief Represents a Unity Catalog schema + * + * Schemas are containers for tables, views, and functions within a catalog. + */ + struct SchemaInfo { + std::string name; ///< Name of the schema + std::string catalog_name; ///< Parent catalog name + std::string comment; ///< User-provided description + std::string owner; ///< Owner of the schema + uint64_t created_at = 0; ///< Unix timestamp in milliseconds when created + uint64_t updated_at = 0; ///< Unix timestamp in milliseconds when last updated + std::string metastore_id; ///< ID of the metastore containing this schema + std::string full_name; ///< Full name (catalog.schema) + std::map properties; ///< Schema properties/metadata + std::optional storage_root; ///< Storage root location + std::optional storage_location; ///< Storage location + }; + + /** + * @brief Represents column information + */ + struct ColumnInfo { + std::string name; ///< Column name + std::string type_text; ///< Data type as text + std::string type_name; ///< Type name (e.g., INT, STRING) + int position = 0; ///< Ordinal position in table + std::string comment; ///< Column description + bool nullable = true; ///< Whether column can be null + std::optional partition_index; ///< Partition index if partitioned + }; + + /** + * @brief Represents a Unity Catalog table + * + * Tables are the primary data storage objects in Unity Catalog. + */ + struct TableInfo { + std::string name; ///< Table name + std::string catalog_name; ///< Parent catalog name + std::string schema_name; ///< Parent schema name + std::string table_type; ///< Type of table (MANAGED, EXTERNAL, VIEW, etc.) + std::string data_source_format; ///< Format (DELTA, PARQUET, CSV, etc.) + std::string comment; ///< User-provided description + std::string owner; ///< Owner of the table + uint64_t created_at = 0; ///< Unix timestamp in milliseconds when created + uint64_t updated_at = 0; ///< Unix timestamp in milliseconds when last updated + std::string metastore_id; ///< ID of the metastore containing this table + std::string full_name; ///< Full name (catalog.schema.table) + std::optional storage_location; ///< Storage location + std::map properties; ///< Table properties/metadata + std::vector columns; ///< Column definitions + std::optional view_definition; ///< SQL definition for views + std::optional table_id; ///< Unique table identifier + }; + + /** + * @brief Configuration for creating a catalog + */ + struct CreateCatalogRequest { + std::string name; ///< Name of the catalog (required) + std::string comment; ///< User-provided description + std::map properties; ///< Catalog properties/metadata + std::optional storage_root; ///< Storage root location + }; + + /** + * @brief Configuration for updating a catalog + */ + struct UpdateCatalogRequest { + std::string name; ///< Name of the catalog (required) + std::optional new_name; ///< New name for the catalog + std::optional comment; ///< Updated description + std::optional owner; ///< New owner + std::map properties; ///< Updated properties + }; + + /** + * @brief Configuration for creating a schema + */ + struct CreateSchemaRequest { + std::string name; ///< Name of the schema (required) + std::string catalog_name; ///< Parent catalog name (required) + std::string comment; ///< User-provided description + std::map properties; ///< Schema properties/metadata + std::optional storage_root; ///< Storage root location + }; + + /** + * @brief Configuration for updating a schema + */ + struct UpdateSchemaRequest { + std::string full_name; ///< Full name (catalog.schema) (required) + std::optional new_name; ///< New name for the schema + std::optional comment; ///< Updated description + std::optional owner; ///< New owner + std::map properties; ///< Updated properties + }; + + // ==================== JSON SERIALIZATION ==================== + + /** + * @brief Convert CreateCatalogRequest to JSON + */ + void to_json(nlohmann::json& j, const CreateCatalogRequest& req); + + /** + * @brief Convert UpdateCatalogRequest to JSON + */ + void to_json(nlohmann::json& j, const UpdateCatalogRequest& req); + + /** + * @brief Convert CreateSchemaRequest to JSON + */ + void to_json(nlohmann::json& j, const CreateSchemaRequest& req); + + /** + * @brief Convert UpdateSchemaRequest to JSON + */ + void to_json(nlohmann::json& j, const UpdateSchemaRequest& req); + +} // namespace databricks diff --git a/include/databricks/version.h b/include/databricks/version.h index 5d74d98..d63a54c 100644 --- a/include/databricks/version.h +++ b/include/databricks/version.h @@ -11,9 +11,9 @@ namespace databricks /** * @brief SDK version information */ - constexpr const char *VERSION = "0.2.2"; + constexpr const char *VERSION = "0.2.3"; constexpr int VERSION_MAJOR = 0; constexpr int VERSION_MINOR = 2; - constexpr int VERSION_PATCH = 2; + constexpr int VERSION_PATCH = 3; } // namespace databricks diff --git a/src/internal/http_client.cpp b/src/internal/http_client.cpp index e5d858c..d805407 100644 --- a/src/internal/http_client.cpp +++ b/src/internal/http_client.cpp @@ -40,7 +40,8 @@ namespace databricks { return total_size; } - HttpClient::HttpClient(const AuthConfig& auth) : auth_(auth) { + HttpClient::HttpClient(const AuthConfig& auth, const std::string& api_version) + : auth_(auth), api_version_(api_version) { static bool curl_initialized = false; // Initialize Curl Client @@ -51,7 +52,7 @@ namespace databricks { } std::string HttpClient::get_base_url() const { - return auth_.host + "/api/2.2"; + return auth_.host + "/api/" + api_version_; } std::map HttpClient::get_headers() const { diff --git a/src/internal/http_client.h b/src/internal/http_client.h index 8bbee0e..48336af 100644 --- a/src/internal/http_client.h +++ b/src/internal/http_client.h @@ -14,7 +14,7 @@ namespace databricks { */ class HttpClient : public IHttpClient { public: - explicit HttpClient( const AuthConfig& auth ); + explicit HttpClient( const AuthConfig& auth, const std::string& api_version = "2.2" ); /** * @brief Wrapper around a GET REST API Call @@ -39,6 +39,7 @@ namespace databricks { private: AuthConfig auth_; + std::string api_version_; std::string get_base_url() const; std::map get_headers() const; diff --git a/src/unity_catalog/unity_catalog.cpp b/src/unity_catalog/unity_catalog.cpp new file mode 100644 index 0000000..bdd6b04 --- /dev/null +++ b/src/unity_catalog/unity_catalog.cpp @@ -0,0 +1,454 @@ +#include "databricks/unity_catalog/unity_catalog.h" +#include "../internal/http_client.h" +#include "../internal/http_client_interface.h" +#include "../internal/logger.h" + +#include + +using json = nlohmann::json; + +namespace databricks { + // ==================== PIMPL IMPLEMENTATION ==================== + + class UnityCatalog::Impl { + public: + // Constructor for production use (creates real HttpClient with Unity Catalog API version) + explicit Impl(const AuthConfig& auth, const std::string& api_version = "2.1") + : http_client_(std::make_shared(auth, api_version)) {} + + // Constructor for testing (accepts injected client) + explicit Impl(std::shared_ptr client) + : http_client_(std::move(client)) {} + + std::shared_ptr http_client_; + }; + + // ==================== CONSTRUCTORS & DESTRUCTOR ==================== + + UnityCatalog::UnityCatalog(const AuthConfig& auth, const std::string& api_version) + : pimpl_(std::make_unique(auth, api_version)) {} + + UnityCatalog::UnityCatalog(std::shared_ptr http_client) + : pimpl_(std::make_unique(std::move(http_client))) {} + + UnityCatalog::~UnityCatalog() = default; + + // ==================== CATALOG OPERATIONS ==================== + + std::vector UnityCatalog::list_catalogs() { + internal::get_logger()->info("Listing Unity Catalog catalogs"); + + auto response = pimpl_->http_client_->get("/unity-catalog/catalogs"); + pimpl_->http_client_->check_response(response, "listCatalogs"); + + internal::get_logger()->debug("Catalogs list response: " + response.body); + return parse_catalog_list(response.body); + } + + CatalogInfo UnityCatalog::get_catalog(const std::string& catalog_name) { + internal::get_logger()->info("Getting catalog details for catalog=" + catalog_name); + + auto response = pimpl_->http_client_->get("/unity-catalog/catalogs/" + catalog_name); + pimpl_->http_client_->check_response(response, "getCatalog"); + + internal::get_logger()->debug("Catalog details response: " + response.body); + return parse_catalog(response.body); + } + + CatalogInfo UnityCatalog::create_catalog(const CreateCatalogRequest& request) { + internal::get_logger()->info("Creating catalog: " + request.name); + + json body_json = request; + std::string body = body_json.dump(); + internal::get_logger()->debug("Create catalog request body: " + body); + + auto response = pimpl_->http_client_->post("/unity-catalog/catalogs", body); + pimpl_->http_client_->check_response(response, "createCatalog"); + + internal::get_logger()->info("Successfully created catalog: " + request.name); + return parse_catalog(response.body); + } + + CatalogInfo UnityCatalog::update_catalog(const UpdateCatalogRequest& request) { + internal::get_logger()->info("Updating catalog: " + request.name); + + json body_json = request; + std::string body = body_json.dump(); + internal::get_logger()->debug("Update catalog request body: " + body); + + auto response = pimpl_->http_client_->post("/unity-catalog/catalogs/" + request.name, body); + pimpl_->http_client_->check_response(response, "updateCatalog"); + + internal::get_logger()->info("Successfully updated catalog: " + request.name); + return parse_catalog(response.body); + } + + bool UnityCatalog::delete_catalog(const std::string& catalog_name, bool force) { + internal::get_logger()->info("Deleting catalog: " + catalog_name); + + // Force Delete Endpoint + std::string endpoint = "/api/2.1/unity-catalog/catalogs/" + catalog_name; + if (force) { + endpoint += "?force=true"; + } + + internal::get_logger()->debug("Delete catalog endpoint: " + endpoint); + + auto response = pimpl_->http_client_->post(endpoint, ""); + pimpl_->http_client_->check_response(response, "deleteCatalog"); + + internal::get_logger()->info("Successfully deleted catalog: " + catalog_name); + return true; + } + + // ==================== SCHEMA OPERATIONS ==================== + + std::vector UnityCatalog::list_schemas(const std::string& catalog_name) { + internal::get_logger()->info("Listing schemas in catalog: " + catalog_name); + + auto response = pimpl_->http_client_->get("/unity-catalog/schemas?catalog_name=" + catalog_name); + pimpl_->http_client_->check_response(response, "listSchemas"); + + internal::get_logger()->debug("Schemas list response: " + response.body); + return parse_schema_list(response.body); + } + + SchemaInfo UnityCatalog::get_schema(const std::string& full_name) { + internal::get_logger()->info("Getting schema details for: " + full_name); + + auto response = pimpl_->http_client_->get("/unity-catalog/schemas/" + full_name); + pimpl_->http_client_->check_response(response, "getSchema"); + + internal::get_logger()->debug("Schema details response: " + response.body); + return parse_schema(response.body); + } + + SchemaInfo UnityCatalog::create_schema(const CreateSchemaRequest& request) { + internal::get_logger()->info("Creating schema: " + request.catalog_name + "." + request.name); + + json body_json = request; + std::string body = body_json.dump(); + internal::get_logger()->debug("Create schema request body: " + body); + + auto response = pimpl_->http_client_->post("/unity-catalog/schemas", body); + pimpl_->http_client_->check_response(response, "createSchema"); + + internal::get_logger()->info("Successfully created schema: " + request.catalog_name + "." + request.name); + return parse_schema(response.body); + } + + SchemaInfo UnityCatalog::update_schema(const UpdateSchemaRequest& request) { + internal::get_logger()->info("Updating schema: " + request.full_name); + + json body_json = request; + std::string body = body_json.dump(); + internal::get_logger()->debug("Update schema request body: " + body); + + auto response = pimpl_->http_client_->post("/unity-catalog/schemas/" + request.full_name, body); + pimpl_->http_client_->check_response(response, "updateSchema"); + + internal::get_logger()->info("Successfully updated schema: " + request.full_name); + return parse_schema(response.body); + } + + bool UnityCatalog::delete_schema(const std::string& full_name) { + internal::get_logger()->info("Deleting schema: " + full_name); + + auto response = pimpl_->http_client_->post("/unity-catalog/schemas/" + full_name, ""); + pimpl_->http_client_->check_response(response, "deleteSchema"); + + internal::get_logger()->info("Successfully deleted schema: " + full_name); + return true; + } + + // ==================== TABLE OPERATIONS ==================== + + std::vector UnityCatalog::list_tables(const std::string& catalog_name, + const std::string& schema_name) { + internal::get_logger()->info("Listing tables in " + catalog_name + "." + schema_name); + + // Create Endpoint with Catalog and Schema name + std::string endpoint = "/unity-catalog/tables?catalog_name=" + catalog_name + + "&schema_name=" + schema_name; + auto response = pimpl_->http_client_->get(endpoint); + pimpl_->http_client_->check_response(response, "listTables"); + + internal::get_logger()->debug("Tables list response: " + response.body); + return parse_table_list(response.body); + } + + TableInfo UnityCatalog::get_table(const std::string& full_name) { + internal::get_logger()->info("Getting table details for: " + full_name); + + auto response = pimpl_->http_client_->get("/unity-catalog/tables/" + full_name); + pimpl_->http_client_->check_response(response, "getTable"); + + internal::get_logger()->debug("Table details response: " + response.body); + return parse_table(response.body); + } + + bool UnityCatalog::delete_table(const std::string& full_name) { + internal::get_logger()->info("Deleting table: " + full_name); + + auto response = pimpl_->http_client_->post("/unity-catalog/tables/" + full_name, ""); + pimpl_->http_client_->check_response(response, "deleteTable"); + + internal::get_logger()->info("Successfully deleted table: " + full_name); + return true; + } + + // ==================== PRIVATE PARSING METHODS ==================== + + CatalogInfo UnityCatalog::parse_catalog(const std::string& json_str) { + try { + auto j = json::parse(json_str); + CatalogInfo catalog; + + catalog.name = j.value("name", ""); + catalog.comment = j.value("comment", ""); + catalog.owner = j.value("owner", ""); + catalog.catalog_type = j.value("catalog_type", ""); + catalog.created_at = j.value("created_at", uint64_t(0)); + catalog.updated_at = j.value("updated_at", uint64_t(0)); + catalog.metastore_id = j.value("metastore_id", ""); + catalog.full_name = j.value("full_name", ""); + + // Parse properties if present + if (j.contains("properties") && j["properties"].is_object()) { + for (auto& [key, value] : j["properties"].items()) { + if (value.is_string()) { + catalog.properties[key] = value.get(); + } + } + } + + // Parse optional fields + if (j.contains("storage_root") && !j["storage_root"].is_null()) { + catalog.storage_root = j["storage_root"].get(); + } + + if (j.contains("storage_location") && !j["storage_location"].is_null()) { + catalog.storage_location = j["storage_location"].get(); + } + + return catalog; + } catch (const json::exception& e) { + throw std::runtime_error("Failed to parse Catalog JSON: " + std::string(e.what())); + } + } + + std::vector UnityCatalog::parse_catalog_list(const std::string& json_str) { + std::vector catalogs; + + try { + auto j = json::parse(json_str); + + if (!j.contains("catalogs") || !j["catalogs"].is_array()) { + internal::get_logger()->warn("No catalogs array found in response"); + return catalogs; + } + + for (const auto& catalog_json : j["catalogs"]) { + catalogs.push_back(parse_catalog(catalog_json.dump())); + } + + internal::get_logger()->info("Parsed " + std::to_string(catalogs.size()) + " catalogs"); + } catch (const json::exception& e) { + internal::get_logger()->error("Failed to parse catalogs list: " + std::string(e.what())); + throw std::runtime_error("Failed to parse catalogs list: " + std::string(e.what())); + } + + return catalogs; + } + + SchemaInfo UnityCatalog::parse_schema(const std::string& json_str) { + try { + auto j = json::parse(json_str); + SchemaInfo schema; + + schema.name = j.value("name", ""); + schema.catalog_name = j.value("catalog_name", ""); + schema.comment = j.value("comment", ""); + schema.owner = j.value("owner", ""); + schema.created_at = j.value("created_at", uint64_t(0)); + schema.updated_at = j.value("updated_at", uint64_t(0)); + schema.metastore_id = j.value("metastore_id", ""); + schema.full_name = j.value("full_name", ""); + + // Parse properties if present + if (j.contains("properties") && j["properties"].is_object()) { + for (auto& [key, value] : j["properties"].items()) { + if (value.is_string()) { + schema.properties[key] = value.get(); + } + } + } + + // Parse optional fields + if (j.contains("storage_root") && !j["storage_root"].is_null()) { + schema.storage_root = j["storage_root"].get(); + } + + if (j.contains("storage_location") && !j["storage_location"].is_null()) { + schema.storage_location = j["storage_location"].get(); + } + + return schema; + } catch (const json::exception& e) { + throw std::runtime_error("Failed to parse Schema JSON: " + std::string(e.what())); + } + } + + std::vector UnityCatalog::parse_schema_list(const std::string& json_str) { + std::vector schemas; + + try { + auto j = json::parse(json_str); + + if (!j.contains("schemas") || !j["schemas"].is_array()) { + internal::get_logger()->warn("No schemas array found in response"); + return schemas; + } + + for (const auto& schema_json : j["schemas"]) { + schemas.push_back(parse_schema(schema_json.dump())); + } + + internal::get_logger()->info("Parsed " + std::to_string(schemas.size()) + " schemas"); + } catch (const json::exception& e) { + internal::get_logger()->error("Failed to parse schemas list: " + std::string(e.what())); + throw std::runtime_error("Failed to parse schemas list: " + std::string(e.what())); + } + + return schemas; + } + + ColumnInfo UnityCatalog::parse_column(const std::string& json_str) { + try { + auto j = json::parse(json_str); + ColumnInfo column; + + column.name = j.value("name", ""); + column.type_text = j.value("type_text", ""); + column.type_name = j.value("type_name", ""); + + // Parse position (can be number or string) + if (j.contains("position")) { + if (j["position"].is_number()) { + column.position = j["position"].get(); + } else if (j["position"].is_string()) { + try { + column.position = std::stoi(j["position"].get()); + } catch (...) { + column.position = 0; + } + } + } + + column.comment = j.value("comment", ""); + column.nullable = j.value("nullable", true); + + // Parse optional partition index (can be number or string) + if (j.contains("partition_index") && !j["partition_index"].is_null()) { + if (j["partition_index"].is_string()) { + column.partition_index = j["partition_index"].get(); + } else if (j["partition_index"].is_number()) { + column.partition_index = std::to_string(j["partition_index"].get()); + } + } + + return column; + } catch (const json::exception& e) { + throw std::runtime_error("Failed to parse Column JSON: " + std::string(e.what())); + } + } + + TableInfo UnityCatalog::parse_table(const std::string& json_str) { + try { + auto j = json::parse(json_str); + TableInfo table; + + table.name = j.value("name", ""); + table.catalog_name = j.value("catalog_name", ""); + table.schema_name = j.value("schema_name", ""); + table.table_type = j.value("table_type", ""); + table.data_source_format = j.value("data_source_format", ""); + table.comment = j.value("comment", ""); + table.owner = j.value("owner", ""); + table.created_at = j.value("created_at", uint64_t(0)); + table.updated_at = j.value("updated_at", uint64_t(0)); + table.metastore_id = j.value("metastore_id", ""); + table.full_name = j.value("full_name", ""); + + // Parse optional storage location + if (j.contains("storage_location") && !j["storage_location"].is_null()) { + table.storage_location = j["storage_location"].get(); + } + + // Parse properties if present + if (j.contains("properties") && j["properties"].is_object()) { + for (auto& [key, value] : j["properties"].items()) { + if (value.is_string()) { + table.properties[key] = value.get(); + } + } + } + + // Parse columns if present + if (j.contains("columns") && j["columns"].is_array()) { + for (const auto& col_json : j["columns"]) { + table.columns.push_back(parse_column(col_json.dump())); + } + } + + // Parse optional view definition + if (j.contains("view_definition") && !j["view_definition"].is_null()) { + table.view_definition = j["view_definition"].get(); + } + + // Parse optional table_id (can be string or number) + if (j.contains("table_id") && !j["table_id"].is_null()) { + if (j["table_id"].is_string()) { + // Parse string to uint64_t + try { + table.table_id = std::stoull(j["table_id"].get()); + } catch (...) { + // If conversion fails, leave it unset + internal::get_logger()->warn("Failed to parse table_id as uint64_t"); + } + } else if (j["table_id"].is_number()) { + table.table_id = j["table_id"].get(); + } + } + + return table; + } catch (const json::exception& e) { + throw std::runtime_error("Failed to parse Table JSON: " + std::string(e.what())); + } + } + + std::vector UnityCatalog::parse_table_list(const std::string& json_str) { + std::vector tables; + + try { + auto j = json::parse(json_str); + + if (!j.contains("tables") || !j["tables"].is_array()) { + internal::get_logger()->warn("No tables array found in response"); + return tables; + } + + for (const auto& table_json : j["tables"]) { + tables.push_back(parse_table(table_json.dump())); + } + + internal::get_logger()->info("Parsed " + std::to_string(tables.size()) + " tables"); + } catch (const json::exception& e) { + internal::get_logger()->error("Failed to parse tables list: " + std::string(e.what())); + throw std::runtime_error("Failed to parse tables list: " + std::string(e.what())); + } + + return tables; + } + +} // namespace databricks diff --git a/src/unity_catalog/unity_catalog_types.cpp b/src/unity_catalog/unity_catalog_types.cpp new file mode 100644 index 0000000..d7e6f6b --- /dev/null +++ b/src/unity_catalog/unity_catalog_types.cpp @@ -0,0 +1,153 @@ +#include "databricks/unity_catalog/unity_catalog_types.h" + +#include +#include + +using json = nlohmann::json; + +namespace databricks { + + // ==================== CATALOG TYPE ENUM HELPERS ==================== + + CatalogTypeEnum parse_catalog_type(const std::string& type_str) { + // TODO: Implement catalog type parsing + if (type_str == "MANAGED_CATALOG") { + return CatalogTypeEnum::MANAGED_CATALOG; + } else if (type_str == "EXTERNAL_CATALOG") { + return CatalogTypeEnum::EXTERNAL_CATALOG; + } else if (type_str == "SYSTEM_CATALOG") { + return CatalogTypeEnum::SYSTEM_CATALOG; + } + return CatalogTypeEnum::UNKNOWN; + } + + std::string catalog_type_to_string(CatalogTypeEnum type) { + // TODO: Implement catalog type to string conversion + switch (type) { + case CatalogTypeEnum::MANAGED_CATALOG: + return "MANAGED_CATALOG"; + case CatalogTypeEnum::EXTERNAL_CATALOG: + return "EXTERNAL_CATALOG"; + case CatalogTypeEnum::SYSTEM_CATALOG: + return "SYSTEM_CATALOG"; + case CatalogTypeEnum::UNKNOWN: + default: + return "UNKNOWN"; + } + } + + // ==================== TABLE TYPE ENUM HELPERS ==================== + + TableTypeEnum parse_table_type(const std::string& type_str) { + // TODO: Implement table type parsing + if (type_str == "MANAGED") { + return TableTypeEnum::MANAGED; + } else if (type_str == "EXTERNAL") { + return TableTypeEnum::EXTERNAL; + } else if (type_str == "VIEW") { + return TableTypeEnum::VIEW; + } else if (type_str == "MATERIALIZED_VIEW") { + return TableTypeEnum::MATERIALIZED_VIEW; + } else if (type_str == "STREAMING_TABLE") { + return TableTypeEnum::STREAMING_TABLE; + } + return TableTypeEnum::UNKNOWN; + } + + std::string table_type_to_string(TableTypeEnum type) { + // TODO: Implement table type to string conversion + switch (type) { + case TableTypeEnum::MANAGED: + return "MANAGED"; + case TableTypeEnum::EXTERNAL: + return "EXTERNAL"; + case TableTypeEnum::VIEW: + return "VIEW"; + case TableTypeEnum::MATERIALIZED_VIEW: + return "MATERIALIZED_VIEW"; + case TableTypeEnum::STREAMING_TABLE: + return "STREAMING_TABLE"; + case TableTypeEnum::UNKNOWN: + default: + return "UNKNOWN"; + } + } + + // ==================== JSON SERIALIZATION ==================== + + void to_json(json& j, const CreateCatalogRequest& req) { + j = json{{"name", req.name}}; + + if (!req.comment.empty()) { + j["comment"] = req.comment; + } + + if (!req.properties.empty()) { + j["properties"] = req.properties; + } + + if (req.storage_root.has_value()) { + j["storage_root"] = req.storage_root.value(); + } + } + + void to_json(json& j, const UpdateCatalogRequest& req) { + j = json{{"name", req.name}}; + + if (req.new_name.has_value()) { + j["new_name"] = req.new_name.value(); + } + + if (req.comment.has_value()) { + j["comment"] = req.comment.value(); + } + + if (req.owner.has_value()) { + j["owner"] = req.owner.value(); + } + + if (!req.properties.empty()) { + j["properties"] = req.properties; + } + } + + void to_json(json& j, const CreateSchemaRequest& req) { + j = json{ + {"name", req.name}, + {"catalog_name", req.catalog_name} + }; + + if (!req.comment.empty()) { + j["comment"] = req.comment; + } + + if (!req.properties.empty()) { + j["properties"] = req.properties; + } + + if (req.storage_root.has_value()) { + j["storage_root"] = req.storage_root.value(); + } + } + + void to_json(json& j, const UpdateSchemaRequest& req) { + j = json{{"full_name", req.full_name}}; + + if (req.new_name.has_value()) { + j["new_name"] = req.new_name.value(); + } + + if (req.comment.has_value()) { + j["comment"] = req.comment.value(); + } + + if (req.owner.has_value()) { + j["owner"] = req.owner.value(); + } + + if (!req.properties.empty()) { + j["properties"] = req.properties; + } + } + +} // namespace databricks diff --git a/vcpkg.json b/vcpkg.json index 5c9c8c6..544fd35 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -1,6 +1,6 @@ { "name": "databricks-sdk-cpp", - "version": "0.2.2", + "version": "0.2.3", "description": "C++ SDK for Databricks, providing an interface for interacting with Databricks services via ODBC", "homepage": "https://github.com/calvinjmin/databricks-sdk-cpp", "license": "MIT",