Merge branch 'v1.6.2' into dev

boonhapus · boonhapus · commit e3e4c54a24fb · 2025-03-12T09:48:39.000-05:00
diff --git a/cs_tools/__project__.py b/cs_tools/__project__.py
@@ -1,4 +1,4 @@
-__version__ = "1.6.1"
+__version__ = "1.6.2"
 __docs__ = "https://thoughtspot.github.io/cs_tools/"
 __repo__ = "https://github.com/thoughtspot/cs_tools"
 __help__ = f"{__repo__}/discussions/"
diff --git a/cs_tools/_types.py b/cs_tools/_types.py
@@ -12,7 +12,10 @@
 # ==========
 # Meta types
 # ==========
-ExitCode: _compat.TypeAlias = Literal[0, 1]
+ExitSuccess: _compat.TypeAlias = Literal[0]
+ExitFailure: _compat.TypeAlias = Literal[1]
+ExitWarning: _compat.TypeAlias = Literal[2]
+ExitCode: _compat.TypeAlias = Literal[ExitSuccess, ExitFailure, ExitWarning]
 PathLike: _compat.TypeAlias = Union[str, os.PathLike, pathlib.Path]
 
 # ==========
diff --git a/cs_tools/api/client.py b/cs_tools/api/client.py
@@ -337,6 +337,24 @@ def metadata_tml_import(
         options["import_policy"] = policy
         return self.post("api/rest/2.0/metadata/tml/import", headers=options.pop("headers", None), json=options)
 
+    @pydantic.validate_call(validate_return=True, config=validators.METHOD_CONFIG)
+    @_transport.CachePolicy.mark_cacheable
+    def metadata_tml_async_import(
+        self, tmls: list[str], policy: _types.TMLImportPolicy, **options: Any
+    ) -> Awaitable[httpx.Response]:
+        """Schedules a task to import TML files into ThoughtSpot."""
+        options["metadata_tmls"] = tmls
+        options["import_policy"] = policy
+        return self.post("api/rest/2.0/metadata/tml/async/import", headers=options.pop("headers", None), json=options)
+
+    @pydantic.validate_call(validate_return=True, config=validators.METHOD_CONFIG)
+    def metadata_tml_async_status(
+        self, include_import_response: bool = True, **options: Any
+    ) -> Awaitable[httpx.Response]:
+        """Schedules a task to import TML files into ThoughtSpot."""
+        options["include_import_response"] = include_import_response
+        return self.post("api/rest/2.0/metadata/tml/async/status", headers=options.pop("headers", None), json=options)
+
     # ==================================================================================
     # CONNECTIONS :: https://developers.thoughtspot.com/docs/rest-apiv2-reference#_connections
     # ==================================================================================
@@ -443,6 +461,11 @@ def tags_create(self, name: _types.Name, **options: Any) -> Awaitable[httpx.Resp
         options["name"] = name
         return self.post("api/rest/2.0/tags/create", json=options)
 
+    @pydantic.validate_call(validate_return=True, config=validators.METHOD_CONFIG)
+    def tags_delete(self, tag_identifier: _types.ObjectIdentifier, **options: Any) -> Awaitable[httpx.Response]:
+        """Creates a tag object."""
+        return self.post(f"api/rest/2.0/tags/{tag_identifier}/delete", json=options)
+
     @pydantic.validate_call(validate_return=True, config=validators.METHOD_CONFIG)
     def tags_assign(
         self, guid: _types.ObjectIdentifier, tag: _types.ObjectIdentifier, **options: Any
diff --git a/cs_tools/api/workflows/metadata.py b/cs_tools/api/workflows/metadata.py
@@ -369,25 +369,68 @@ async def tml_import(
     tmls: list[TMLObject],
     *,
     policy: _types.TMLImportPolicy = "ALL_OR_NONE",
+    use_async_endpoint: bool = False,
+    wait_for_completion: bool = False,
+    log_errors: bool = False,
     http: RESTAPIClient,
     **tml_import_options,
 ) -> _types.APIResult:
     """Import a metadata object, alerting about warnings and errors."""
-    r = await http.metadata_tml_import(tmls=[t.dumps() for t in tmls], policy=policy, **tml_import_options)
-    r.raise_for_status()
+    if use_async_endpoint:
+        _LOG.debug(f"Async import initiated on {len(tmls):,} objects (behave synchronously: {wait_for_completion}).")
+        r = await http.metadata_tml_async_import(tmls=[t.dumps() for t in tmls], policy=policy, **tml_import_options)
+        r.raise_for_status()
+        d = r.json()
+
+        _LOG.debug(f"RAW DATA\n{json.dumps(d, indent=2, default=str)}\n")
+
+        # IF WE'RE NOT WAITING FOR THE JOB TO COMPLETE, RETURN THE ASYNC JOB INFO DIRECTLY.
+        if not wait_for_completion:
+            return d
 
-    for tml_import_info, tml in zip(r.json(), tmls):
-        tml_type = tml.tml_type_name.upper()
+        async_job_id = d["task_id"]
 
-        if tml_import_info["response"]["status"]["status_code"] == "ERROR":
-            errors = tml_import_info["response"]["status"]["error_message"].replace("<br/>", "\n")
-            _LOG.error(f"{tml_type} '{tml.name}' failed to import, ThoughtSpot errors:\n[fg-error]{errors}")
+        # AFTER FIVE 5-second ITERATIONS (25s), WE'LL ELEVATE THE LOGGING LEVEL.
+        n_iterations = 0
 
-        if tml_import_info["response"]["status"]["status_code"] == "WARNING":
-            errors = tml_import_info["response"]["status"]["error_message"].replace("<br/>", "\n")
-            _LOG.warning(f"{tml_type} '{tml.name}' partially imported, ThoughtSpot errors:\n[fg-warn]{errors}")
+        # OTHERWISE, PROCESS THE JOB AS IF IT WERE A SYNCHRONOUS PAYLOAD.
+        while d.get("task_status") != "COMPLETED":
+            log_level = logging.DEBUG if n_iterations < 5 else logging.INFO
+            n_iterations += 1
+            _LOG.log(log_level, f"Checking status of asynchronous import {async_job_id}")
+            _ = await asyncio.sleep(5)  # type: ignore[func-returns-value]
+            r = await http.metadata_tml_async_status(task_ids=[async_job_id])
+            r.raise_for_status()
+
+            # RAW DATA
+            _ = r.json()
+            _LOG.debug(f"RAW DATA\n{json.dumps(_, indent=2, default=str)}\n")
 
-        if tml_import_info["response"]["status"]["status_code"] == "OK":
-            _LOG.debug(f"{tml_type} '{tml.name}' successfully imported")
+            # FIRST STATUS (we only 1 in job), BUT ONLY REASSIGN while LOOP VAR IF THE KEY EXISTS.
+            d = next(iter(_["status_list"]), d)
+            _LOG.log(log_level, f"TASK ID: {async_job_id}\n{json.dumps(d, indent=2, default=str)}\n")
 
-    return r.json()
+        # POST-PROCESSING TO MIMIC THE SYNCHRONOUS RESPONSE.
+        d = d["import_response"]["object"]
+
+    else:
+        r = await http.metadata_tml_import(tmls=[t.dumps() for t in tmls], policy=policy, **tml_import_options)
+        r.raise_for_status()
+        d = r.json()
+
+    if log_errors:
+        for tml_import_info, tml in zip(d, tmls):
+            tml_type = tml.tml_type_name.upper()
+
+            if tml_import_info["response"]["status"]["status_code"] == "ERROR":
+                errors = tml_import_info["response"]["status"]["error_message"].replace("<br/>", "\n")
+                _LOG.error(f"{tml_type} '{tml.name}' failed to import, ThoughtSpot errors:\n[fg-error]{errors}")
+
+            if tml_import_info["response"]["status"]["status_code"] == "WARNING":
+                errors = tml_import_info["response"]["status"]["error_message"].replace("<br/>", "\n")
+                _LOG.warning(f"{tml_type} '{tml.name}' partially imported, ThoughtSpot errors:\n[fg-warn]{errors}")
+
+            if tml_import_info["response"]["status"]["status_code"] == "OK":
+                _LOG.debug(f"{tml_type} '{tml.name}' successfully imported")
+
+    return d
diff --git a/cs_tools/cli/custom_types.py b/cs_tools/cli/custom_types.py
@@ -1,3 +1,7 @@
+"""
+Custom Types turn unwieldy CLI input into structured python types.
+"""
+
 from __future__ import annotations
 
 from collections.abc import Iterator, Sequence
diff --git a/cs_tools/cli/tools/bulk-deleter/app.py b/cs_tools/cli/tools/bulk-deleter/app.py
@@ -256,7 +256,7 @@ def from_tag(
     ]
 
     with px.WorkTracker("Deleting objects", tasks=TOOL_TASKS) as tracker:
-        guids_to_delete: set[_types.GUID] = {tag["metadata_id"]}
+        guids_to_delete: set[_types.GUID] = {}
 
         with tracker["PREPARE"] as this_task:
             if not tag_only:
@@ -270,6 +270,9 @@ def from_tag(
             if directory is None:
                 this_task.skip()
 
+            elif tag_only:
+                this_task.skip()
+
             else:
                 this_task.total = len(guids_to_delete)
 
@@ -292,7 +295,7 @@ async def _download_and_advance(guid: _types.GUID) -> None:
 
                 tracker.extra_renderable = lambda: Align.center(
                     console.Group(
-                        Align.center(f"{len(guids_to_delete):,} objects will be deleted"),
+                        Align.center(f"{1 if tag_only else len(guids_to_delete):,} objects will be deleted"),
                         "\n[fg-warn]Press [fg-success]Y[/] to proceed, or [fg-error]n[/] to cancel.",
                     )
                 )
@@ -318,6 +321,12 @@ async def _download_and_advance(guid: _types.GUID) -> None:
             this_task.total = len(guids_to_delete)
             delete_attempts = collections.defaultdict(int)
 
+            if tag_only:
+                c = ts.api.tags_delete(tag_identifier=tag["metadata_id"])
+                _ = utils.run_sync(c)
+                this_task.advance(step=1)
+                return 0
+
             async def _delete_and_advance(guid: _types.GUID) -> None:
                 delete_attempts[guid] += 1
                 r = await ts.api.metadata_delete(guid=guid)
diff --git a/cs_tools/cli/tools/scriptability/app.py b/cs_tools/cli/tools/scriptability/app.py
@@ -200,6 +200,10 @@ def checkpoint(
         mode="EXPORT",
         environment=environment,
         status=table.job_status,
+        info={
+            "files_expected": len(coros),
+            "files_exported": sum(s.status != "ERROR" for s in table.statuses),
+        },
     )
 
     # RECORD THE GUID MAPPING
@@ -325,7 +329,7 @@ def deploy(
         return 1
     except Exception:
         _LOG.debug("Error Info:", exc_info=True)
-        _LOG.error("One of your .mappings/<env>-guid-mappings.json is in an invalid state, see logs for details..")
+        _LOG.error("One of your .mappings/<env>-guid-mappings.json may be in an invalid state, see logs for details..")
         return 1
 
     tmls: dict[_types.GUID, _types.TMLObject] = {}
@@ -347,18 +351,18 @@ def deploy(
         tmls[guid] = mapping_info.disambiguate(tml=tml, delete_unmapped_guids=True)
 
     if not tmls:
-        _LOG.info(f"No TML files found to deploy from directory (Deploy Type: {deploy_type}, Last Seen: {last_import_dt})")
+        _LOG.info(
+            f"No TML files found to deploy from directory (Deploy Type: {deploy_type}, Last Seen: {last_import_dt})"
+        )
         return 0
 
-    # Silence the cs_tools metadata workflow logger since we've asked the User if they want logged feedback.
-    logging.getLogger("cs_tools.api.workflows.metadata").setLevel(logging.CRITICAL)
-
     try:
         c = workflows.metadata.tml_import(
             tmls=list(tmls.values()),
-            use_async_endpoint=use_async_endpoint,
-            skip_diff_check=skip_diff_check,
             policy=deploy_policy,
+            use_async_endpoint=use_async_endpoint,
+            wait_for_completion=use_async_endpoint,
+            log_errors=False,
             http=ts.api,
         )
         _ = utils.run_sync(c)
@@ -381,6 +385,12 @@ def deploy(
         mode="VALIDATE" if deploy_policy == "VALIDATE_ONLY" else "IMPORT",
         environment=target_environment,
         status=table.job_status,
+        info={
+            "deploy_type": deploy_type,
+            "deploy_policy": deploy_policy,
+            "files_expected": len(tmls),
+            "files_deployed": 0 if not table.can_map_guids else sum(s.status != "ERROR" for s in table.statuses),
+        },
     )
 
     # INJECT ERRORS WITH MORE INFO FOR OUR USERS CLARITY.
@@ -421,4 +431,10 @@ def deploy(
         _LOG.error("One or more TMLs failed to fully deploy, check the logs or use --log-errors for more details.")
         return 1
 
+    if table.job_status == "WARNING":
+        _LOG.warning(
+            "TMLs imported succesfully with one or more warnings. Check the logs or use --log-errors for more details."
+        )
+        return 2
+
     return 0
diff --git a/cs_tools/cli/tools/scriptability/utils.py b/cs_tools/cli/tools/scriptability/utils.py
@@ -67,6 +67,9 @@ class MappingCheckpoint(pydantic.BaseModel):
     status: _types.TMLStatusCode
     """The status of the checkpoint.. OK, WARNING, or ERROR."""
 
+    info: dict[str, Any] = pydantic.Field(default={})
+    """Arbitrary information about what happened."""
+
     @pydantic.field_serializer("at")
     @classmethod
     def serialize_datetime(self, value: Optional[dt.datetime]) -> Optional[str]:
@@ -169,7 +172,13 @@ def merge(cls, *, source: pathlib.Path, target: pathlib.Path) -> GUIDMappingInfo
         return target_env
 
     def checkpoint(
-        self, *, by: str, mode: Literal["EXPORT", "VALIDATE", "IMPORT"], environment: str, status: _types.TMLStatusCode
+        self,
+        *,
+        by: str,
+        mode: Literal["EXPORT", "VALIDATE", "IMPORT"],
+        environment: str,
+        status: _types.TMLStatusCode,
+        info: Optional[dict[str, Any]] = None,
     ) -> None:
         """Checkpoint the GUID mapping info."""
         if mode != "EXPORT" and not any(checkpoint.mode in ("EXPORT", "VALIDATE") for checkpoint in self.history):
@@ -182,6 +191,7 @@ def checkpoint(
                 mode=mode,
                 environment=environment,
                 status=status,
+                info=info,
             )
         )
 
@@ -332,16 +342,21 @@ def statuses(self) -> list[TMLStatus]:
     @property
     def can_map_guids(self) -> bool:
         """Determine if the statuses' GUIDs should be mapped."""
+        # GUIDs are returned, but we shouldn't map them since nothing actually imported.
         if self.operation == "VALIDATE":
             return False
 
+        # GUIDs should not be returned if any object failed during an ALL_OR_NONE import.
         if self.policy == "ALL_OR_NONE" and self.job_status != "OK":
             return False
 
-        if self.policy == "PARTIAL" and any(_.status != "ERROR" for _ in self.statuses):
-            return True
+        # All objects failed to IMPORT.
+        if all(_.status == "ERROR" for _ in self.statuses):
+            return False
 
-        return self.job_status != "ERROR"
+        # In this case, at least GUID has returned, EVEN IF the whole job was marked as a failure.
+        # We may have up to 1 failure or 1 warning causing the job to be marked this way.
+        return True
 
     @property
     def job_status(self) -> _types.TMLStatusCode:
diff --git a/cs_tools/cli/tools/searchable/app.py b/cs_tools/cli/tools/searchable/app.py
@@ -409,9 +409,9 @@ def metadata(
 
             # FETCH ALL ORG IDs WE'LL NEED TO COLLECT FROM
             if ts.session_context.thoughtspot.is_orgs_enabled:
-                c = workflows.metadata.fetch_one(identifier=org_override, metadata_type="ORG", http=ts.api)
-                _ = utils.run_sync(c)
-                orgs = [_]
+                c = ts.api.orgs_search()
+                r = utils.run_sync(c)
+                orgs = [_ for _ in r.json() if org_override is None or _["name"].casefold() == org_override.casefold()]
             else:
                 orgs = [{"id": 0, "name": "ThoughtSpot"}]
 
diff --git a/cs_tools/sync/databricks/syncer.py b/cs_tools/sync/databricks/syncer.py
@@ -33,6 +33,7 @@ class Databricks(DatabaseSyncer):
     schema_: Optional[str] = pydantic.Field(default="default", alias="schema")
     port: Optional[int] = 443
     temp_dir: Optional[pydantic.DirectoryPath] = pathlib.Path(".")
+    use_legacy_dataload: bool = False
 
     @pydantic.field_validator("access_token", mode="before")
     @classmethod
@@ -174,16 +175,28 @@ def dump(self, tablename: str, *, data: _types.TableRowsFormat) -> None:
             return
 
         table = self.metadata.tables[f"{self.schema_}.{tablename}"]
-        stage = self.stage_and_put(tablename=tablename, data=data)
+
+        if not self.use_legacy_dataload:
+            stage = self.stage_and_put(tablename=tablename, data=data)
 
         if self.load_strategy == "APPEND":
-            self.copy_into(from_=stage, into=tablename)
+            if self.use_legacy_dataload:
+                sync_utils.batched(table.insert().values, session=self.session, data=data, max_parameters=250)
+            else:
+                self.copy_into(from_=stage, into=tablename)
 
         if self.load_strategy == "TRUNCATE":
             self.session.execute(table.delete())
-            self.copy_into(from_=stage, into=tablename)
+
+            if self.use_legacy_dataload:
+                sync_utils.batched(table.insert().values, session=self.session, data=data, max_parameters=250)
+            else:
+                self.copy_into(from_=stage, into=tablename)
 
         if self.load_strategy == "UPSERT":
-            with self.temporary_table(table=table) as temp_table:
-                self.copy_into(from_=stage, into=temp_table.name)
-                self.merge_into(from_=temp_table.name, into=table)
+            if self.use_legacy_dataload:
+                sync_utils.generic_upsert(table, session=self.session, data=data, max_params=250)
+            else:
+                with self.temporary_table(table=table) as temp_table:
+                    self.copy_into(from_=stage, into=temp_table.name)
+                    self.merge_into(from_=temp_table.name, into=table)
diff --git a/cs_tools/updater/_bootstrapper.py b/cs_tools/updater/_bootstrapper.py
@@ -492,9 +492,6 @@ def ensure_import_cs_tools_venv(ref=None):  # type: ignore[name-defined]
         base = "https://api.github.com/repos/{owner}/{repo}/contents/{path}"
         endp = base.format(owner="thoughtspot", repo="cs_tools", path="cs_tools/updater/_updater.py")
 
-        if ref is not None:
-            endp += "?ref={ref}".format(ref=ref)
-
         # FETCH FILE METADATA.
         meta = http_request(endp, to_json=True)
         assert isinstance(meta, dict), "Github API returned invalid data for file metadata:\n{d!r}".format(d=meta)
@@ -512,6 +509,7 @@ def ensure_import_cs_tools_venv(ref=None):  # type: ignore[name-defined]
         sys.path.insert(0, HERE.as_posix())
 
         from _updater import CSToolsVenv  # type: ignore[import-not-found]
+
     except ModuleNotFoundError:
         _LOG.error(
             textwrap.dedent(
diff --git a/docs/syncer/databricks.md b/docs/syncer/databricks.md
@@ -47,6 +47,11 @@ hide:
     
     ---
 
+    - [ ] __use_legacy_dataload__{ .fc-blue }, _fall back to slower data loading with JDBC-style `INSERT`s_
+    <br />__default__{ .fc-gray }: `false` ( __allowed__{ .fc-green }: `true`, `false` )
+
+    ---
+
     - [ ] __load_strategy__{ .fc-blue}, _how to write new data into existing tables_
     <br />__default__{ .fc-gray }: `APPEND` ( __allowed__{ .fc-green }: `APPEND`, `TRUNCATE`, `UPSERT` )
 

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-__version__ = "1.6.1"`
	`1`	`+__version__ = "1.6.2"`
`2`	`2`	`__docs__ = "https://thoughtspot.github.io/cs_tools/"`
`3`	`3`	`__repo__ = "https://github.com/thoughtspot/cs_tools"`
`4`	`4`	`__help__ = f"{__repo__}/discussions/"`