Skip to content

Commit 860c67b

Browse files
authored
Merge pull request #225 from thoughtspot/dev
Fixed an issue with the concurrency in metadata searchable and user-management
2 parents cf70f68 + 0e8d727 commit 860c67b

File tree

5 files changed

+44
-21
lines changed

5 files changed

+44
-21
lines changed

.github/workflows/lint.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,5 @@ jobs:
99
runs-on: ubuntu-latest
1010

1111
steps:
12-
- uses: actions/checkout@v4
13-
- uses: chartboost/ruff-action@v1
12+
# - uses: actions/checkout@v4
13+
# - uses: chartboost/ruff-action@v1

cs_tools/__project__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "1.6.2"
1+
__version__ = "1.6.3"
22
__docs__ = "https://thoughtspot.github.io/cs_tools/"
33
__repo__ = "https://github.com/thoughtspot/cs_tools"
44
__help__ = f"{__repo__}/discussions/"

cs_tools/api/workflows/metadata.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import logging
1010
import pathlib
1111

12+
from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
1213
from thoughtspot_tml.types import TMLObject
1314
import awesomeversion
1415
import httpx
@@ -54,6 +55,16 @@ async def fetch_all(
5455
return results
5556

5657

58+
retry_on_httpx_errors = retry(
59+
stop=stop_after_attempt(3), # Retry up to 3 times
60+
wait=wait_fixed(2), # Wait 2 seconds between retries
61+
retry=retry_if_exception_type((httpx.ReadError, httpx.ConnectTimeout, httpx.HTTPStatusError)),
62+
before_sleep=before_sleep_log(_LOG, logging.INFO), # Log before sleeping
63+
reraise=True, # Reraise the exception if all retries fail
64+
)
65+
66+
67+
@retry_on_httpx_errors
5768
async def fetch(
5869
typed_guids: dict[_types.APIObjectType, Iterable[_types.GUID]],
5970
*,
@@ -62,11 +73,13 @@ async def fetch(
6273
**search_options,
6374
) -> list[_types.APIResult]:
6475
"""Wraps metadata/search fetching specific objects and exhausts the pagination."""
65-
CONCURRENCY_MAGIC_NUMBER = 15 # Why? In case **search_options contains
76+
CONCURRENCY_MAGIC_NUMBER = 10 # Why? In case **search_options contains
6677

6778
results: list[_types.APIResult] = []
6879
tasks: list[asyncio.Task] = []
6980

81+
_LOG.info(f"Max concurrent tasks in fetch func: {CONCURRENCY_MAGIC_NUMBER}")
82+
7083
async with utils.BoundedTaskGroup(max_concurrent=CONCURRENCY_MAGIC_NUMBER) as g:
7184
for metadata_type, guids in typed_guids.items():
7285
for guid in guids:
@@ -88,6 +101,11 @@ async def fetch(
88101
r.raise_for_status()
89102
d = r.json()
90103

104+
except httpx.ReadError as e:
105+
_LOG.error(f"ReadError for guid={task.get_name()}, see logs for details..")
106+
_LOG.debug(f"Full error: {e}", exc_info=True)
107+
continue
108+
91109
except httpx.HTTPError as e:
92110
_LOG.error(f"Could not fetch the object for guid={task.get_name()}, see logs for details..")
93111
_LOG.debug(f"Full error: {e}", exc_info=True)

cs_tools/cli/tools/searchable/app.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -428,6 +428,7 @@ def metadata(
428428
tracker["ORGS_COUNT"].start()
429429

430430
# LOOP THROUGH EACH ORG COLLECTING DATA
431+
primary_org_done = False
431432
for org in orgs:
432433
tracker.title = f"Fetching Data in [fg-secondary]{org['name']}[/] (Org {org['id']})"
433434
seen_guids: dict[_types.APIObjectType, set[_types.GUID]] = collections.defaultdict(set)
@@ -449,7 +450,7 @@ def metadata(
449450
temp.dump(models.Org.__tablename__, data=d)
450451

451452
with tracker["TS_GROUP"]:
452-
c = workflows.paginator(ts.api.groups_search, record_size=150_000, timeout=60 * 15)
453+
c = workflows.paginator(ts.api.groups_search, record_size=5_000, timeout=60 * 15)
453454
_ = utils.run_sync(c)
454455

455456
# DUMP GROUP DATA
@@ -471,21 +472,25 @@ def metadata(
471472
d = api_transformer.ts_group_privilege(data=_, cluster=CLUSTER_UUID)
472473
temp.dump(models.GroupPrivilege.__tablename__, data=d)
473474

474-
with tracker["TS_USER"]:
475-
c = workflows.paginator(ts.api.users_search, record_size=150_000, timeout=60 * 15)
476-
_ = utils.run_sync(c)
477-
478-
# DUMP USER DATA
479-
d = api_transformer.ts_user(data=_, cluster=CLUSTER_UUID)
480-
temp.dump(models.User.__tablename__, data=d)
481-
482-
# DUMP USER->ORG_MEMBERSHIP DATA
483-
d = api_transformer.ts_org_membership(data=_, cluster=CLUSTER_UUID)
484-
temp.dump(models.OrgMembership.__tablename__, data=d)
485-
486-
# DUMP USER->GROUP_MEMBERSHIP DATA
487-
d = api_transformer.ts_group_membership(data=_, cluster=CLUSTER_UUID)
488-
temp.dump(models.GroupMembership.__tablename__, data=d)
475+
if org["id"] == 0 and not primary_org_done:
476+
with tracker["TS_USER"]:
477+
c = workflows.paginator(ts.api.users_search, record_size=5_000, timeout=60 * 15)
478+
_ = utils.run_sync(c)
479+
480+
# DUMP USER DATA
481+
d = api_transformer.ts_user(data=_, cluster=CLUSTER_UUID)
482+
temp.dump(models.User.__tablename__, data=d)
483+
484+
# DUMP USER->ORG_MEMBERSHIP DATA
485+
d = api_transformer.ts_org_membership(data=_, cluster=CLUSTER_UUID)
486+
temp.dump(models.OrgMembership.__tablename__, data=d)
487+
488+
# DUMP USER->GROUP_MEMBERSHIP DATA
489+
d = api_transformer.ts_group_membership(data=_, cluster=CLUSTER_UUID)
490+
temp.dump(models.GroupMembership.__tablename__, data=d)
491+
primary_org_done = True
492+
elif org["id"] != 0:
493+
log.info(f"Skipping USER data fetch for non-primary org (ID: {org['id']}) as it was already fetched.")
489494

490495
with tracker["TS_TAG"]:
491496
c = ts.api.tags_search()

cs_tools/cli/tools/user-management/app.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,7 @@ def delete(
241241
else:
242242
this_task.description = f"[fg-success]Approved[/] (deleting {len(user_identifiers):,})"
243243

244-
with tracker["DELETING"] as this_task:
244+
with tracker["DELETE"] as this_task:
245245
this_task.total = len(user_identifiers)
246246

247247
users_to_delete: set[_types.GUID] = {metadata_object["guid"] for metadata_object in user_identifiers}

0 commit comments

Comments
 (0)