Skip to content

Commit a142a27

Browse files
Merge pull request #272 from thoughtspot/dev
1.6.5 Release
2 parents ce69eb3 + b64059f commit a142a27

32 files changed

+1516
-1285
lines changed

cs_tools/__project__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "1.6.4"
1+
__version__ = "1.6.5"
22
__docs__ = "https://thoughtspot.github.io/cs_tools/"
33
__repo__ = "https://github.com/thoughtspot/cs_tools"
44
__help__ = f"{__repo__}/discussions/"

cs_tools/cli/commands/config.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def create(
4444
None, help="the password you type on the ThoughtSpot login screen, use [b magenta]prompt[/] to type it hidden"
4545
),
4646
secret: str = typer.Option(None, help="the trusted authentication secret key, found in the developer tab"),
47+
concurrency: int = typer.Option(None, help="change the number call sending to TS, By default 15"),
4748
token: str = typer.Option(None, help="the V2 API bearer token"),
4849
default_org: int = typer.Option(None, help="org ID to sign into by default"),
4950
temp_dir: custom_types.Directory = typer.Option(None, help="the temporary directory to use for uploading files"),
@@ -80,6 +81,7 @@ def create(
8081
"default_org": default_org,
8182
"disable_ssl": disable_ssl,
8283
"proxy": proxy,
84+
"concurrency": concurrency,
8385
},
8486
"verbose": verbose,
8587
"temp_dir": temp_dir or cs_tools_venv.subdir(".tmp"),
@@ -117,6 +119,7 @@ def modify(
117119
),
118120
secret: str = typer.Option(None, help="the trusted authentication secret key"),
119121
token: str = typer.Option(None, help="the V2 API bearer token"),
122+
concurrency: int = typer.Option(None, help="change the number call sending to TS, By default 15"),
120123
temp_dir: custom_types.Directory = typer.Option(None, help="the temporary directory to use for uploading files"),
121124
disable_ssl: bool = typer.Option(
122125
None, "--disable-ssl", help="whether or not to turn off checking the SSL certificate"
@@ -162,6 +165,9 @@ def modify(
162165
if proxy is not None:
163166
data["thoughtspot"]["proxy"] = proxy
164167

168+
if concurrency is not None:
169+
data["thoughtspot"]["concurrency"] = concurrency
170+
165171
conf = CSToolsConfig.model_validate(data)
166172
conf.save()
167173

cs_tools/cli/tools/searchable/api_transformer.py

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -665,3 +665,81 @@ def ts_audit_logs(data: list[_types.APIResult], *, cluster: _types.GUID) -> _typ
665665
reshaped.sort(key=operator.itemgetter(*CLUSTER_KEY))
666666

667667
return reshaped
668+
669+
670+
def ts_ai_stats(data: list[_types.APIResult], *, cluster: _types.GUID) -> _types.TableRowsFormat:
671+
"""Reshapes /searchdata -> searchable.models.BIServer."""
672+
reshaped: _types.TableRowsFormat = []
673+
674+
PARTITION_KEY = ft.partial(lambda r: r["ThoughtSpot Start Time"].date())
675+
CLUSTER_KEY = ("ThoughtSpot Start Time", "User ID", "Visualization ID")
676+
677+
# KEEP TRACK OF DUPLICATE ROWS DUE TO DATA MANAGEMENT ISSUES.
678+
seen: set[str] = set()
679+
680+
# ENSURE ALL DATA IS IN UTC PRIOR TO GENERATING ROW_NUMBERS.
681+
data = [{**row, "ThoughtSpot Start Time": validators.ensure_datetime_is_utc.func(row["ThoughtSpot Start Time"])} for row in data]
682+
683+
# SORT PRIOR TO GROUP BY SO WE MAINTAIN CLUSTERING KEY SEMANTICS
684+
data.sort(key=operator.itemgetter(*CLUSTER_KEY))
685+
686+
for row_date, rows in it.groupby(data, key=PARTITION_KEY):
687+
# MANUAL ENUMERATION BECAUSE WE NEED TO ACCOUNT FOR DEDUPLICATION.
688+
row_number = 0
689+
690+
for row in rows:
691+
if (unique := f"{row['ThoughtSpot Start Time']}-{row['User ID']}-{row['Visualization ID']}") in seen:
692+
continue
693+
694+
row_number += 1
695+
696+
reshaped.append(
697+
models.AIStats.validated_init(
698+
**{
699+
"cluster_guid": cluster,
700+
"sk_dummy": f"{cluster}-{row_date}-{row_number}",
701+
"answer_session_id" : row["Answer Session ID"],
702+
"query_latency" : row["Average Query Latency (External)"],
703+
"system_latency" : row["Average System Latency (Overall)"],
704+
"connection" : row["Connection"],
705+
"connection_id" : row["Connection ID"],
706+
"db_auth_type" : row["DB Auth Type"],
707+
"db_type" : row["DB Type"],
708+
"error_message" : row["Error Message"],
709+
"external_database_query_id" : row["External Database Query ID"],
710+
"impressions" : row["Impressions"],
711+
"is_billable" : row["Is Billable"],
712+
"is_system" : row["Is System"],
713+
"model" : row["Model"],
714+
"model_id" : row["Model ID"],
715+
"object" : row["Object"],
716+
"object_id" : row["Object ID"],
717+
"object_subtype" : row["Object Subtype"],
718+
"object_type" : row["Object Type"],
719+
"org" : row["Org"],
720+
"org_id" : row["Org ID"],
721+
"query_count" : row["Query Count"],
722+
"query_end_time" : row["Query End Time"],
723+
"query_errors" : row["Query Errors"],
724+
"query_start_time" : row["Query Start Time"],
725+
"query_status" : row["Query Status"],
726+
"sql_query" : row["SQL Query"],
727+
"thoughtspot_query_id" : row["ThoughtSpot Query ID"],
728+
"thoughtspot_start_time" : row["ThoughtSpot Start Time"],
729+
"credits" : row["Total Credits"],
730+
"nums_rows_fetched" : row["Total Nums Rows Fetched"],
731+
"trace_id" : row["Trace ID"],
732+
"user" : row["User"],
733+
"user_action" : row["User Action"],
734+
"user_action_count" : row["User Action Count"],
735+
"user_count" : row["User Count"],
736+
"user_display_name" : row["User Display Name"],
737+
"user_id" : row["User ID"],
738+
"visualization_id" : row["Visualization ID"],
739+
}
740+
).model_dump()
741+
)
742+
743+
seen.add(unique)
744+
745+
return reshaped

cs_tools/cli/tools/searchable/app.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,3 +773,103 @@ def tml(
773773
syncer.dump(models.MetadataTML.__tablename__, data=rows)
774774

775775
return 0
776+
777+
@app.command()
778+
@depends_on(thoughtspot=ThoughtSpot())
779+
def ts_ai_stats(
780+
ctx: typer.Context,
781+
syncer: Syncer = typer.Option(
782+
...,
783+
click_type=custom_types.Syncer(models=[models.AIStats]),
784+
help="protocol and path for options to pass to the syncer",
785+
rich_help_panel="Syncer Options",
786+
),
787+
from_date: custom_types.Date = typer.Option(..., help="inclusive lower bound of rows to select from TS: BI Server"),
788+
to_date: custom_types.Date = typer.Option(..., help="inclusive upper bound of rows to select from TS: BI Server"),
789+
org_override: str = typer.Option(None, "--org", help="The Org to switch to before performing actions."),
790+
compact: bool = typer.Option(True, "--compact / --full", help="If compact, add [User Action] != {null} 'invalid'"),
791+
) -> _types.ExitCode:
792+
"""
793+
Extract query performance metrics for each query made against an external database
794+
795+
To extract one day of data, set [b cyan]--from-date[/] and [b cyan]--to-date[/] to the same value.
796+
\b
797+
Fields extracted from TS: AI and BI Stats
798+
- Answer Session ID - Average Query Latency (External) - Average System Latency (Overall) - Impressions
799+
- Connection - Connection ID - DB Auth Type - Is System
800+
- DB Type - Error Message - External Database Query ID - Is Billable
801+
- Model - Model ID - Object - Object ID
802+
- Object Subtype - Object Type - Org - Org ID
803+
- Query Count - Query End Time - Query Errors - Query Start Time
804+
- Query Status - SQL Query - ThoughtSpot Query ID - ThoughtSpot Start Time
805+
- Total Credits - Total Nums Rows Fetched - Trace ID - User
806+
- User Action - User Action Count - User Count - User Display Name
807+
- User ID - Visualization ID
808+
"""
809+
assert isinstance(from_date, dt.date), f"Could not coerce from_date '{from_date}' to a date."
810+
assert isinstance(to_date, dt.date), f"Could not coerce to_date '{to_date}' to a date."
811+
ts = ctx.obj.thoughtspot
812+
813+
CLUSTER_UUID = ts.session_context.thoughtspot.cluster_id
814+
815+
TZ_UTC = zoneinfo.ZoneInfo("UTC")
816+
TS_AI_TIMEZONE = TZ_UTC if ts.session_context.thoughtspot.is_cloud else ts.session_context.thoughtspot.timezone
817+
print(f"TS_AI_TIMEZONE -> {TS_AI_TIMEZONE}")
818+
819+
if syncer.protocol == "falcon":
820+
log.error("Falcon Syncer is not supported for TS: AI Server reflection.")
821+
models.AIStats.__table__.drop(syncer.engine)
822+
return 1
823+
824+
if (to_date - from_date) > dt.timedelta(days=31): # type: ignore[operator]
825+
log.warning("Due to how the Search API functions, it's recommended to request no more than 1 month at a time.")
826+
827+
# DEV NOTE: @boonhapus
828+
# As of 9.10.0.cl , TS: BI Server only resides in the Primary Org(0), so switch to it
829+
if ts.session_context.thoughtspot.is_orgs_enabled:
830+
ts.switch_org(org_id=0)
831+
832+
if org_override is not None:
833+
c = workflows.metadata.fetch_one(identifier=org_override, metadata_type="ORG", attr_path="id", http=ts.api)
834+
_ = utils.run_sync(c)
835+
org_override = _
836+
837+
SEARCH_DATA_DATE_FMT = "%m/%d/%Y"
838+
SEARCH_TOKENS = (
839+
"[Query Start Time] [Query Start Time].detailed [Query End Time] [Query End Time].detailed [Org]"
840+
"[Query Status] [Connection] [User] [Nums Rows Fetched] [ThoughtSpot Query ID] [Is Billable] [ThoughtSpot Start Time]"
841+
"[ThoughtSpot Start Time].detailed [User Action] [Is System] [Visualization ID] [External Database Query ID] [Query Latency (External)] "
842+
"[Object] [User ID] [Org ID] [Credits] [Impressions] [Query Count] [Query Errors] [System Latency (Overall)] [User Action Count]"
843+
"[User Action Count] [User Count] [Answer Session ID] [Connection ID] [DB Auth Type] [DB Type] [Error Message] [Model]"
844+
"[Model ID] [Object ID] [Object Subtype] [Object Type] [SQL Query] [User Display Name] [Trace ID]"
845+
"[ThoughtSpot Start Time].detailed [ThoughtSpot Start Time] != 'today'"
846+
# FOR DATA QUALITY PURPOSES
847+
# CONDITIONALS BASED ON CLI OPTIONS OR ENVIRONMENT
848+
+ ("" if not compact else " [user action] != [user action].invalid [user action].{null}")
849+
+ ("" if from_date is None else f" [ThoughtSpot Start Time] >= '{from_date.strftime(SEARCH_DATA_DATE_FMT)}'")
850+
+ ("" if to_date is None else f" [ThoughtSpot Start Time] <= '{to_date.strftime(SEARCH_DATA_DATE_FMT)}'")
851+
+ ("" if not ts.session_context.thoughtspot.is_orgs_enabled else " [org id]")
852+
+ ("" if org_override is None else f" [org id] = {org_override}")
853+
)
854+
855+
TOOL_TASKS = [
856+
px.WorkTask(id="SEARCH", description="Fetching data from ThoughtSpot"),
857+
px.WorkTask(id="CLEAN", description="Transforming API results"),
858+
px.WorkTask(id="DUMP_DATA", description=f"Sending data to {syncer.name}"),
859+
]
860+
861+
# DEV NOTE: @saurabhsingh1608. 09/15/2025
862+
# Currently worksheet name is "TS: AI and BI Stats (Beta)" change it in future as need arise
863+
864+
with px.WorkTracker("Fetching TS: AI and BI Stats", tasks=TOOL_TASKS) as tracker:
865+
with tracker["SEARCH"]:
866+
c = workflows.search(worksheet="TS: AI and BI Stats (Beta)", query=SEARCH_TOKENS, timezone=TS_AI_TIMEZONE, http=ts.api)
867+
_ = utils.run_sync(c)
868+
869+
with tracker["CLEAN"]:
870+
d = api_transformer.ts_ai_stats(data=_, cluster=CLUSTER_UUID)
871+
872+
with tracker["DUMP_DATA"]:
873+
syncer.dump("ts_ai_stats", data=d)
874+
875+
return 0

cs_tools/cli/tools/searchable/models.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -365,6 +365,72 @@ def export_reserved_characters_are_escaped(self, query_text: Optional[str]) -> O
365365

366366
return query_text
367367

368+
class AIStats(ValidatedSQLModel, table=True):
369+
__tablename__ = "ts_ai_stats"
370+
cluster_guid: str = Field(primary_key=True)
371+
sk_dummy: str = Field(primary_key=True)
372+
answer_session_id : Optional[str]
373+
query_latency : Optional[int]
374+
system_latency : Optional[int]
375+
connection : Optional[str]
376+
connection_id : Optional[str]
377+
db_auth_type : Optional[str]
378+
db_type : Optional[str]
379+
error_message : Optional[str]
380+
external_database_query_id : Optional[str]
381+
impressions : Optional[int]
382+
is_billable : Optional[bool]
383+
is_system : Optional[bool]
384+
model : Optional[str]
385+
model_id : Optional[str]
386+
object : Optional[str]
387+
object_id : Optional[str]
388+
object_subtype : Optional[str]
389+
object_type : Optional[str]
390+
org : Optional[str]
391+
org_id: int = 0
392+
query_count : Optional[int]
393+
query_end_time : dt.datetime = Field(sa_column = Column(TIMESTAMP))
394+
query_errors : Optional[int]
395+
query_start_time : dt.datetime = Field(sa_column =Column(TIMESTAMP))
396+
query_status : Optional[str]
397+
sql_query : Optional[str] = Field(sa_column = Column(Text, info = {"length_override": "MAX"}))
398+
thoughtspot_query_id :Optional[str]
399+
thoughtspot_start_time : dt.datetime = Field(sa_column =Column(TIMESTAMP))
400+
credits : Optional[int]
401+
nums_rows_fetched : Optional[int]
402+
trace_id : Optional[str]
403+
user : Optional[str]
404+
user_action : Optional[str]
405+
user_action_count : Optional[int]
406+
user_count : Optional[int]
407+
user_display_name : Optional[str]
408+
user_id : Optional[str]
409+
visualization_id : Optional[str]
410+
411+
412+
@pydantic.field_validator("thoughtspot_start_time", mode="before")
413+
@classmethod
414+
def check_valid_utc_datetime(cls, value: Any) -> dt.datetime:
415+
return validators.ensure_datetime_is_utc.func(value)
416+
417+
@pydantic.field_validator("user_action", mode="after")
418+
@classmethod
419+
def ensure_is_case_sensitive_thoughtspot_enum_value(cls, value: Optional[str]) -> Optional[str]:
420+
# Why not Annotated[str, pydantic.StringContraints(to_upper=True)] ?
421+
# sqlmodel#67: https://github.com/tiangolo/sqlmodel/issues/67
422+
return None if value is None else value.upper()
423+
424+
@pydantic.field_serializer("sql_query")
425+
def export_reserved_characters_are_escaped(self, sql_query: Optional[str]) -> Optional[str]:
426+
if sql_query is None:
427+
return sql_query
428+
reserved_characters = ("\\",)
429+
430+
for character in reserved_characters:
431+
sql_query = sql_query.replace(character, f"\\{character}")
432+
433+
return sql_query
368434

369435
METADATA_MODELS = [
370436
Cluster,

0 commit comments

Comments
 (0)