thoughtspot · billdback-ts · Apr 9, 2025 · Apr 8, 2025 · Apr 9, 2025 · Apr 9, 2025
@@ -9,5 +9,5 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
-      - uses: actions/checkout@v4
-      - uses: chartboost/ruff-action@v1
+#      - uses: actions/checkout@v4
+#      - uses: chartboost/ruff-action@v1
@@ -9,6 +9,7 @@
 import logging
 import pathlib
 
+from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
 from thoughtspot_tml.types import TMLObject
 import awesomeversion
 import httpx
@@ -54,6 +55,16 @@ async def fetch_all(
     return results
 
 
+retry_on_httpx_errors = retry(
+    stop=stop_after_attempt(3),  # Retry up to 3 times
+    wait=wait_fixed(2),  # Wait 2 seconds between retries
+    retry=retry_if_exception_type((httpx.ReadError, httpx.ConnectTimeout, httpx.HTTPStatusError)),
+    before_sleep=before_sleep_log(_LOG, logging.INFO),  # Log before sleeping
+    reraise=True,  # Reraise the exception if all retries fail
+)
+
+
+@retry_on_httpx_errors
 async def fetch(
     typed_guids: dict[_types.APIObjectType, Iterable[_types.GUID]],
     *,
@@ -62,11 +73,13 @@ async def fetch(
     **search_options,
 ) -> list[_types.APIResult]:
     """Wraps metadata/search fetching specific objects and exhausts the pagination."""
-    CONCURRENCY_MAGIC_NUMBER = 15  # Why? In case **search_options contains
+    CONCURRENCY_MAGIC_NUMBER = 10  # Why? In case **search_options contains
 
     results: list[_types.APIResult] = []
     tasks: list[asyncio.Task] = []
 
+    _LOG.info(f"Max concurrent tasks in fetch func: {CONCURRENCY_MAGIC_NUMBER}")
+
     async with utils.BoundedTaskGroup(max_concurrent=CONCURRENCY_MAGIC_NUMBER) as g:
         for metadata_type, guids in typed_guids.items():
             for guid in guids:
@@ -88,6 +101,11 @@ async def fetch(
             r.raise_for_status()
             d = r.json()
 
+        except httpx.ReadError as e:
+            _LOG.error(f"ReadError for guid={task.get_name()}, see logs for details..")
+            _LOG.debug(f"Full error: {e}", exc_info=True)
+            continue
+
         except httpx.HTTPError as e:
             _LOG.error(f"Could not fetch the object for guid={task.get_name()}, see logs for details..")
             _LOG.debug(f"Full error: {e}", exc_info=True)

@@ -428,6 +428,7 @@ def metadata(
         tracker["ORGS_COUNT"].start()
 
         # LOOP THROUGH EACH ORG COLLECTING DATA
+        primary_org_done = False
         for org in orgs:
             tracker.title = f"Fetching Data in [fg-secondary]{org['name']}[/] (Org {org['id']})"
             seen_guids: dict[_types.APIObjectType, set[_types.GUID]] = collections.defaultdict(set)
@@ -449,7 +450,7 @@ def metadata(
                 temp.dump(models.Org.__tablename__, data=d)
 
             with tracker["TS_GROUP"]:
-                c = workflows.paginator(ts.api.groups_search, record_size=150_000, timeout=60 * 15)
+                c = workflows.paginator(ts.api.groups_search, record_size=5_000, timeout=60 * 15)
                 _ = utils.run_sync(c)
 
                 # DUMP GROUP DATA
@@ -471,21 +472,25 @@ def metadata(
                 d = api_transformer.ts_group_privilege(data=_, cluster=CLUSTER_UUID)
                 temp.dump(models.GroupPrivilege.__tablename__, data=d)
 
-            with tracker["TS_USER"]:
-                c = workflows.paginator(ts.api.users_search, record_size=150_000, timeout=60 * 15)
-                _ = utils.run_sync(c)
-
-                # DUMP USER DATA
-                d = api_transformer.ts_user(data=_, cluster=CLUSTER_UUID)
-                temp.dump(models.User.__tablename__, data=d)
-
-                # DUMP USER->ORG_MEMBERSHIP DATA
-                d = api_transformer.ts_org_membership(data=_, cluster=CLUSTER_UUID)
-                temp.dump(models.OrgMembership.__tablename__, data=d)
-
-                # DUMP USER->GROUP_MEMBERSHIP DATA
-                d = api_transformer.ts_group_membership(data=_, cluster=CLUSTER_UUID)
-                temp.dump(models.GroupMembership.__tablename__, data=d)
+            if org["id"] == 0 and not primary_org_done:
+                with tracker["TS_USER"]:
+                    c = workflows.paginator(ts.api.users_search, record_size=5_000, timeout=60 * 15)
+                    _ = utils.run_sync(c)
+
+                    # DUMP USER DATA
+                    d = api_transformer.ts_user(data=_, cluster=CLUSTER_UUID)
+                    temp.dump(models.User.__tablename__, data=d)
+
+                    # DUMP USER->ORG_MEMBERSHIP DATA
+                    d = api_transformer.ts_org_membership(data=_, cluster=CLUSTER_UUID)
+                    temp.dump(models.OrgMembership.__tablename__, data=d)
+
+                    # DUMP USER->GROUP_MEMBERSHIP DATA
+                    d = api_transformer.ts_group_membership(data=_, cluster=CLUSTER_UUID)
+                    temp.dump(models.GroupMembership.__tablename__, data=d)
+                primary_org_done = True
+            elif org["id"] != 0:
+                log.info(f"Skipping USER data fetch for non-primary org (ID: {org['id']}) as it was already fetched.")
 
             with tracker["TS_TAG"]:
                 c = ts.api.tags_search()