Skip to content

Commit 1ce3379

Browse files
author
boonhapus
committed
Merge branch 'dev'
2 parents 823e1f1 + e52d76a commit 1ce3379

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

88 files changed

+4447
-1736
lines changed

cs_tools/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = '1.3.1'
1+
__version__ = '1.3.2'

cs_tools/api/_rest_api_v1.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def __init__(self, config, ts):
3434
self._http = httpx.Client(
3535
headers={'X-Requested-By': 'ThoughtSpot'},
3636
verify=not config.thoughtspot.disable_ssl,
37-
timeout=180.0,
37+
timeout=5 * 60.0,
3838
base_url=config.thoughtspot.fullpath
3939
)
4040

cs_tools/api/middlewares/answer.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,11 +73,19 @@ def all(
7373
answers.extend(to_extend)
7474

7575
if not to_extend and not answers:
76-
rzn = f"'{category.value}' category ("
77-
rzn += 'excluding ' if exclude_system_content else 'including '
78-
rzn += 'admin-generated answers)'
79-
rzn += '' if tags is None else ' and tags: ' + ', '.join(tags)
80-
raise ContentDoesNotExist(type='ANSWER', reason=rzn)
76+
info = {
77+
"incl": "exclude" if exclude_system_content else "include",
78+
"category": category,
79+
"tags": ", ".join(tags),
80+
"reason": (
81+
"Zero {type} matched the following filters"
82+
"\n"
83+
"\n - [blue]{category.value}[/] {type}"
84+
"\n - [blue]{incl}[/] admin-generated {type}"
85+
"\n - with tags [blue]{tags}"
86+
)
87+
}
88+
raise ContentDoesNotExist(type="answers", **info)
8189

8290
if data['isLastBatch']:
8391
break

cs_tools/api/middlewares/metadata.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -97,11 +97,19 @@ def all(
9797
break
9898

9999
if not content:
100-
rzn = f"'{category.value}' category ("
101-
rzn += 'excluding ' if exclude_system_content else 'including '
102-
rzn += 'admin-generated content)'
103-
rzn += '' if tags is None else ' and tags: ' + ', '.join(tags)
104-
raise ContentDoesNotExist(type=content, reason=rzn)
100+
info = {
101+
"incl": "exclude" if exclude_system_content else "include",
102+
"category": category,
103+
"tags": ", ".join(tags),
104+
"reason": (
105+
"Zero {type} matched the following filters"
106+
"\n"
107+
"\n - [blue]{category.value}[/] {type}"
108+
"\n - [blue]{incl}[/] admin-generated {type}"
109+
"\n - with tags [blue]{tags}"
110+
)
111+
}
112+
raise ContentDoesNotExist(type="content", **info)
105113

106114
return content
107115

cs_tools/api/middlewares/pinboard.py

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,11 +64,19 @@ def all(
6464
offset += len(data['headers'])
6565

6666
if not data['headers'] and not pinboards:
67-
rzn = f"'{category.value}' category ("
68-
rzn += 'excluding ' if exclude_system_content else 'including '
69-
rzn += 'admin-generated pinboards)'
70-
rzn += '' if tags is None else ' and tags: ' + ', '.join(tags)
71-
raise ContentDoesNotExist(type='PINBOARD', reason=rzn)
67+
info = {
68+
"incl": "exclude" if exclude_system_content else "include",
69+
"category": category,
70+
"tags": ", ".join(tags),
71+
"reason": (
72+
"Zero {type} matched the following filters"
73+
"\n"
74+
"\n - [blue]{category.value}[/] {type}"
75+
"\n - [blue]{incl}[/] admin-generated {type}"
76+
"\n - with tags [blue]{tags}"
77+
)
78+
}
79+
raise ContentDoesNotExist(type="pinboards", **info)
7280

7381
if data['isLastBatch']:
7482
break

cs_tools/api/middlewares/search.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,12 +118,15 @@ def __call__(
118118
).json()
119119

120120
if not d['headers']:
121-
raise ContentDoesNotExist(type='LOGICAL_TABLE', name=guid)
121+
raise ContentDoesNotExist(
122+
type='LOGICAL_TABLE',
123+
reason="No table or worksheet found with the name [blue]{name}"
124+
)
122125

123126
d = [_ for _ in d['headers'] if _['name'].casefold() == guid.casefold()]
124127

125128
if len(d) > 1:
126-
raise AmbiguousContentError(name=guid, type='LOGICAL_TABLE')
129+
raise AmbiguousContentError(type='LOGICAL_TABLE', name=guid)
127130

128131
guid = d[0]['id']
129132

@@ -151,9 +154,9 @@ def __call__(
151154

152155
if offset % 500_000 == 0:
153156
log.warning(
154-
f'using the Search API to extract >= {offset / 1000: >6,.0f}K rows '
155-
f'is not a scalable practice, please consider adding a filter or '
156-
f'extracting records directly from the underlying data source '
157+
f'using the Search API to extract >= {offset / 1_000_000: >3,.1f}M '
158+
f'rows is not a scalable practice, please consider adding a filter '
159+
f'or extracting records directly from the underlying data source '
157160
f'instead!'
158161
)
159162

cs_tools/api/middlewares/tag.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,9 @@ def get(self, tag_name: str, *, create_if_not_exists: bool = False) -> Dict[str,
116116
tag = self.create(tag_name)
117117

118118
if tag is None:
119-
raise ContentDoesNotExist(type='TAG', name=tag_name)
119+
raise ContentDoesNotExist(
120+
type='tag',
121+
reason=f"No tag found with the name [blue]{tag_name}"
122+
)
120123

121124
return tag

cs_tools/api/middlewares/tql.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pydantic import validate_arguments, Field
88

99
from cs_tools.data.enums import Privilege
10-
from cs_tools.errors import InsufficientPrivileges, TableAlreadyExists
10+
from cs_tools.errors import InsufficientPrivileges
1111

1212

1313
log = logging.getLogger(__name__)
@@ -39,7 +39,7 @@ def _check_privileges(self) -> None:
3939
raise InsufficientPrivileges(
4040
user=self.ts.me,
4141
service='remote TQL',
42-
required_privileges=REQUIRED_PRIVILEGES
42+
required_privileges=', '.join(REQUIRED_PRIVILEGES)
4343
)
4444

4545
@validate_arguments

cs_tools/api/middlewares/tsload.py

Lines changed: 115 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
from typing import Any, Dict, List, Union
22
from io import BufferedIOBase, TextIOWrapper
33
from tempfile import _TemporaryFileWrapper
4+
import datetime as dt
45
import logging
6+
import json
57
import time
68

79
from pydantic import validate_arguments
810

911
from cs_tools.data.enums import Privilege
1012
from cs_tools.errors import InsufficientPrivileges, TSLoadServiceUnreachable
1113
from cs_tools.const import (
12-
FMT_TSLOAD_DATETIME, FMT_TSLOAD_DATE, FMT_TSLOAD_TIME, FMT_TSLOAD_TRUE_FALSE
14+
FMT_TSLOAD_DATETIME, FMT_TSLOAD_DATE, FMT_TSLOAD_TIME, FMT_TSLOAD_TRUE_FALSE,
15+
APP_DIR
1316
)
1417

1518

@@ -25,22 +28,96 @@ class TSLoadMiddleware:
2528
"""
2629
def __init__(self, ts):
2730
self.ts = ts
31+
# The load server resides on a different port compared to standard ThoughtSpot
32+
# services. This is because the service tends to carry heavy file-load
33+
# operations, and having a separate web server creates the needed isolation
34+
# between standard ThoughtSpot services and tsload operations. By default, this
35+
# service runs on all nodes of a ThoughtSpot cluster. This provides load
36+
# distribution to address possible simultaneous loads. The tsload server uses
37+
# its own load balancer. If an external load balancer is used, the tsload
38+
# requests must be sticky, and the tsload load balancer should be disabled.
39+
#
40+
# To turn off the load balancer, issue the following tscli commands
41+
# tscli --adv service add-gflag etl_http_server.etl_http_server etl_server_enable_load_balancer false
42+
# tscli --adv service add-gflag etl_http_server.etl_http_server etl_server_always_expose_node_ip true
43+
#
44+
# DEV NOTE
45+
# On each public method in this middleware, a keyword argument called
46+
# `ignore_node_redirect` which will remove the redirection logic from
47+
# further calls to the tsload service api. Since this is handled on a
48+
# client-by-client basis with no input from the API itself, we expose it as
49+
# a kwarg.
50+
#
51+
# Further reading:
52+
# https://docs.thoughtspot.com/latest/admin/loading/load-with-tsload.html
53+
#
54+
self._cache_fp = APP_DIR / '.cache/tsload-node-redirect-by-cycle-id.json'
55+
56+
def _cache_node_redirect(self, cycle_id: str, *, node_info: Dict = None) -> Dict[str, Dict]:
57+
"""
58+
Method is a total hack.
59+
"""
60+
try:
61+
with self._cache_fp.open(mode='r') as j:
62+
cache = json.load(j)
63+
except FileNotFoundError:
64+
cache = {}
65+
66+
# nothing to write, or we should be reading
67+
if node_info is None:
68+
return cache
69+
70+
# write to cache
71+
now = dt.datetime.utcnow().timestamp()
72+
cache[cycle_id] = {**node_info, 'load_datetime': now}
73+
74+
# keep only recent data
75+
cache = {
76+
cycle: details
77+
for cycle, details in cache.items()
78+
if (now - details['load_datetime']) <= (10 * 86400) # 10 days
79+
}
80+
81+
with self._cache_fp.open(mode='w') as j:
82+
json.dump(cache, j, indent=4, sort_keys=True)
83+
84+
return cache
85+
86+
def _check_for_redirect_auth(self, cycle_id: str) -> None:
87+
"""
88+
Attempt a login.
89+
90+
By default, the tsload service API sits behind a load balancer. When we first
91+
init a new load cycle, the balancer will respond with the proper node (if
92+
applicable) to submit file uploads to. If that node is not the main node, then
93+
we will be required to authorize again.
94+
"""
95+
cache = self._cache_node_redirect(cycle_id)
96+
97+
if cycle_id in cache:
98+
ds = self.ts.api.ts_dataservice
99+
ds._tsload_node = cache[cycle_id]['host']
100+
ds._tsload_port = cache[cycle_id]['port']
101+
log.debug(f'redirecting to: {ds.etl_server_fullpath}')
102+
ds.load_auth()
28103

29104
def _check_privileges(self) -> None:
30105
"""
106+
Determine if the user has necessary Data Manager privileges.
31107
"""
32108
if not set(self.ts.me.privileges).intersection(REQUIRED_PRIVILEGES):
33109
raise InsufficientPrivileges(
34110
user=self.ts.me,
35111
service='remote TQL',
36-
required_privileges=REQUIRED_PRIVILEGES
112+
required_privileges=', '.join(REQUIRED_PRIVILEGES)
37113
)
38114

39115
@validate_arguments(config=dict(arbitrary_types_allowed=True))
40116
def upload(
41117
self,
42118
fd: Union[BufferedIOBase, TextIOWrapper, _TemporaryFileWrapper],
43119
*,
120+
ignore_node_redirect: bool = False,
44121
database: str,
45122
table: str,
46123
schema_: str = 'falcon_default_schema',
@@ -85,6 +162,11 @@ def upload(
85162
fp : pathlib.Path
86163
file to load to thoughtspot
87164
165+
ignore_node_redirect : bool [default: False]
166+
whether or not to ignore node redirection
167+
168+
**tsload_options
169+
88170
Returns
89171
-------
90172
cycle_id
@@ -150,17 +232,43 @@ def upload(
150232
http_error=e
151233
)
152234

153-
cycle_id = r.json()['cycle_id']
154-
self.ts.api.ts_dataservice.load_start(cycle_id, fd=fd)
155-
self.ts.api.ts_dataservice.load_commit(cycle_id)
156-
return cycle_id
235+
data = r.json()
236+
self._cache_node_redirect(data['cycle_id'], node_info=data.get('node_address', None))
237+
238+
if not ignore_node_redirect:
239+
self._check_for_redirect_auth(data['cycle_id'])
240+
241+
self.ts.api.ts_dataservice.load_start(data['cycle_id'], fd=fd)
242+
self.ts.api.ts_dataservice.load_commit(data['cycle_id'])
243+
return data['cycle_id']
157244

158245
@validate_arguments
159-
def status(self, cycle_id: str, *, wait_for_complete: bool = False):
246+
def status(
247+
self,
248+
cycle_id: str,
249+
*,
250+
ignore_node_redirect: bool = False,
251+
wait_for_complete: bool = False
252+
) -> Dict[str, Any]:
160253
"""
254+
Get the status of a previously started data load.
255+
256+
Parameters
257+
----------
258+
cycle_id : str
259+
data load to check on
260+
261+
ignore_node_redirect : bool [default: False]
262+
whether or not to ignore node redirection
263+
264+
wait_for_complete: bool [default: False]
265+
poll the load server until it responds with OK or ERROR
161266
"""
162267
self._check_privileges()
163268

269+
if not ignore_node_redirect:
270+
self._check_for_redirect_auth(cycle_id=cycle_id)
271+
164272
while True:
165273
r = self.ts.api.ts_dataservice.load_status(cycle_id)
166274
data = r.json()

cs_tools/api/middlewares/user.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,11 +86,14 @@ def get(
8686
user = r.json()['headers']
8787

8888
if not user:
89-
raise ContentDoesNotExist(type='USER', name=principal)
89+
raise ContentDoesNotExist(
90+
type='USER',
91+
reason=f"No user found with the name [blue]{principal}"
92+
)
9093

9194
if error_if_ambiguous:
9295
if len(user) > 1:
93-
raise AmbiguousContentError(type='USER', name=principal)
96+
raise AmbiguousContentError(type='user', name=principal)
9497
user = user[0]
9598

9699
return user

0 commit comments

Comments
 (0)