Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v4
- uses: chartboost/ruff-action@v1
# - uses: actions/checkout@v4
# - uses: chartboost/ruff-action@v1
20 changes: 19 additions & 1 deletion cs_tools/api/workflows/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import logging
import pathlib

from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
from thoughtspot_tml.types import TMLObject
import awesomeversion
import httpx
Expand Down Expand Up @@ -54,6 +55,16 @@ async def fetch_all(
return results


retry_on_httpx_errors = retry(
stop=stop_after_attempt(3), # Retry up to 3 times
wait=wait_fixed(2), # Wait 2 seconds between retries
retry=retry_if_exception_type((httpx.ReadError, httpx.ConnectTimeout, httpx.HTTPStatusError)),
before_sleep=before_sleep_log(_LOG, logging.INFO), # Log before sleeping
reraise=True, # Reraise the exception if all retries fail
)


@retry_on_httpx_errors
async def fetch(
typed_guids: dict[_types.APIObjectType, Iterable[_types.GUID]],
*,
Expand All @@ -62,11 +73,13 @@ async def fetch(
**search_options,
) -> list[_types.APIResult]:
"""Wraps metadata/search fetching specific objects and exhausts the pagination."""
CONCURRENCY_MAGIC_NUMBER = 15 # Why? In case **search_options contains
CONCURRENCY_MAGIC_NUMBER = 10 # Why? In case **search_options contains

results: list[_types.APIResult] = []
tasks: list[asyncio.Task] = []

_LOG.info(f"Max concurrent tasks in fetch func: {CONCURRENCY_MAGIC_NUMBER}")

async with utils.BoundedTaskGroup(max_concurrent=CONCURRENCY_MAGIC_NUMBER) as g:
for metadata_type, guids in typed_guids.items():
for guid in guids:
Expand All @@ -88,6 +101,11 @@ async def fetch(
r.raise_for_status()
d = r.json()

except httpx.ReadError as e:
_LOG.error(f"ReadError for guid={task.get_name()}, see logs for details..")
_LOG.debug(f"Full error: {e}", exc_info=True)
continue

except httpx.HTTPError as e:
_LOG.error(f"Could not fetch the object for guid={task.get_name()}, see logs for details..")
_LOG.debug(f"Full error: {e}", exc_info=True)
Expand Down
37 changes: 21 additions & 16 deletions cs_tools/cli/tools/searchable/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,7 @@ def metadata(
tracker["ORGS_COUNT"].start()

# LOOP THROUGH EACH ORG COLLECTING DATA
primary_org_done = False
for org in orgs:
tracker.title = f"Fetching Data in [fg-secondary]{org['name']}[/] (Org {org['id']})"
seen_guids: dict[_types.APIObjectType, set[_types.GUID]] = collections.defaultdict(set)
Expand All @@ -449,7 +450,7 @@ def metadata(
temp.dump(models.Org.__tablename__, data=d)

with tracker["TS_GROUP"]:
c = workflows.paginator(ts.api.groups_search, record_size=150_000, timeout=60 * 15)
c = workflows.paginator(ts.api.groups_search, record_size=5_000, timeout=60 * 15)
_ = utils.run_sync(c)

# DUMP GROUP DATA
Expand All @@ -471,21 +472,25 @@ def metadata(
d = api_transformer.ts_group_privilege(data=_, cluster=CLUSTER_UUID)
temp.dump(models.GroupPrivilege.__tablename__, data=d)

with tracker["TS_USER"]:
c = workflows.paginator(ts.api.users_search, record_size=150_000, timeout=60 * 15)
_ = utils.run_sync(c)

# DUMP USER DATA
d = api_transformer.ts_user(data=_, cluster=CLUSTER_UUID)
temp.dump(models.User.__tablename__, data=d)

# DUMP USER->ORG_MEMBERSHIP DATA
d = api_transformer.ts_org_membership(data=_, cluster=CLUSTER_UUID)
temp.dump(models.OrgMembership.__tablename__, data=d)

# DUMP USER->GROUP_MEMBERSHIP DATA
d = api_transformer.ts_group_membership(data=_, cluster=CLUSTER_UUID)
temp.dump(models.GroupMembership.__tablename__, data=d)
if org["id"] == 0 and not primary_org_done:
with tracker["TS_USER"]:
c = workflows.paginator(ts.api.users_search, record_size=5_000, timeout=60 * 15)
_ = utils.run_sync(c)

# DUMP USER DATA
d = api_transformer.ts_user(data=_, cluster=CLUSTER_UUID)
temp.dump(models.User.__tablename__, data=d)

# DUMP USER->ORG_MEMBERSHIP DATA
d = api_transformer.ts_org_membership(data=_, cluster=CLUSTER_UUID)
temp.dump(models.OrgMembership.__tablename__, data=d)

# DUMP USER->GROUP_MEMBERSHIP DATA
d = api_transformer.ts_group_membership(data=_, cluster=CLUSTER_UUID)
temp.dump(models.GroupMembership.__tablename__, data=d)
primary_org_done = True
elif org["id"] != 0:
log.info(f"Skipping USER data fetch for non-primary org (ID: {org['id']}) as it was already fetched.")

with tracker["TS_TAG"]:
c = ts.api.tags_search()
Expand Down