Skip to content

Commit 6209de9

Browse files
committed
0.12.5rc1
1 parent 8240bc2 commit 6209de9

18 files changed

+224
-122
lines changed

nbox/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@
1414
from nbox.hyperloop.common.common_pb2 import Resource
1515
from nbox.nbxlib.logger import lo
1616
from nbox.projects import Project
17+
from nbox.auth import secret, AuthConfig, auth_info_pb, inside_pod

nbox/assets/exe.jinja

+5-1
Original file line numberDiff line numberDiff line change
@@ -54,5 +54,9 @@ if __name__ == "__main__":
5454
nbxlet = NBXLet(op = get_op(cloud = True))
5555
fire.Fire({
5656
"run": nbxlet.run, # NBX-Jobs
57-
"serve": partial(nbxlet.serve, model_name = "{{ model_name }}"), # NBX-Deploy
57+
"serve": partial(
58+
nbxlet.serve,
59+
model_name = "{{ model_name }}",
60+
serving_type = "{{ serving_type }}"
61+
), # NBX-Deploy
5862
})

nbox/auth.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
from functools import lru_cache
1919

2020
import nbox.utils as U
21-
from nbox.utils import join, logger
21+
from nbox.utils import join, logger, lo
2222

2323

2424
class AuthConfig():
@@ -33,7 +33,7 @@ class AuthConfig():
3333

3434
# things for the pod
3535
nbx_pod_run = "run"
36-
# nbx_pod_deploy = "deploy"
36+
nbx_pod_deploy = "deploy"
3737

3838
def items():
3939
return [AuthConfig.workspace_id, AuthConfig.workspace_name, AuthConfig.cache]
@@ -208,7 +208,13 @@ def init_secret():
208208
# add any logic here for creating secrets
209209
if not U.env.NBOX_NO_AUTH(False):
210210
secret = NBXClient()
211-
logger.info(f"Current workspace id: {secret(AuthConfig.workspace_id)} ({secret(AuthConfig.workspace_name)})")
211+
logger.info(lo(
212+
f"workspace details",
213+
workspace_id = secret.workspace_id,
214+
workspace_name = AuthConfig.workspace_name,
215+
token_present = len(secret.access_token) > 0,
216+
nbx_url = secret.nbx_url,
217+
))
212218
return secret
213219
else:
214220
logger.info(f"Skipping authentication as NBOX_NO_AUTH is set to True")
@@ -231,4 +237,4 @@ def auth_info_pb():
231237
)
232238

233239
def inside_pod():
234-
return secret(AuthConfig.nbx_pod_run, False)
240+
return secret(AuthConfig.nbx_pod_run, False) or secret(AuthConfig.nbx_pod_deploy, False)

nbox/cli.py

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from nbox.nbxlib.fire import NBXFire
3333
from nbox.projects import Project
3434
from nbox.utils import logger, lo
35+
from nbox.plugins.base import PluginCLI
3536

3637
class Config(object):
3738
def update(self, workspace_id: str):
@@ -182,6 +183,7 @@ def main():
182183
"lmao" : LmaoCLI,
183184
"login" : login,
184185
"open" : open_home,
186+
"plugins" : PluginCLI,
185187
"projects" : Project,
186188
"relics" : Relics,
187189
"serve" : Serve,

nbox/jobs.py

+20-5
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from nbox.hyperloop.jobs.nbox_ws_pb2 import JobRequest
2525
from nbox.hyperloop.jobs.job_pb2 import Job as JobProto
2626
from nbox.hyperloop.jobs.dag_pb2 import DAG as DAGProto
27-
from nbox.hyperloop.common.common_pb2 import Resource, Code
27+
from nbox.hyperloop.common.common_pb2 import Resource
2828
from nbox.hyperloop.jobs.nbox_ws_pb2 import ListJobsRequest, ListJobsResponse, UpdateJobRequest
2929
from nbox.hyperloop.deploy.serve_pb2 import ServingListResponse, ServingRequest, Serving, ServingListRequest, ModelRequest, Model as ModelProto, UpdateModelRequest
3030

@@ -164,7 +164,7 @@ def upload_job_folder(
164164
method: str,
165165
init_folder: str,
166166
id: str = "",
167-
# name: str = "",
167+
project_id: str = "",
168168
trigger: bool = False,
169169

170170
# all the things for resources
@@ -206,6 +206,7 @@ def upload_job_folder(
206206
init_folder (str): folder with all the relevant files or ``file_path:fn_name`` pair so you can use it as the entrypoint.
207207
name (str, optional): Name of the job. Defaults to "".
208208
id (str, optional): ID of the job. Defaults to "".
209+
project_id (str, optional): Project ID, if None uses the one from config. Defaults to "".
209210
trigger (bool, optional): If uploading a "job" trigger the job after uploading. Defaults to False.
210211
resource_cpu (str, optional): CPU resource. Defaults to "100m".
211212
resource_memory (str, optional): Memory resource. Defaults to "128Mi".
@@ -221,6 +222,8 @@ def upload_job_folder(
221222
from nbox.network import deploy_job, deploy_serving
222223
import nbox.nbxlib.operator_spec as ospec
223224
from nbox.nbxlib.serving import SupportedServingTypes as SST
225+
from nbox.projects import Project
226+
224227
OT = ospec.OperatorType
225228

226229
if method not in OT._valid_deployment_types():
@@ -231,6 +234,17 @@ def upload_job_folder(
231234
raise ValueError(f"Trigger can only be used with '{OT.JOB}' or '{OT.SERVING}'")
232235
if model_name and method != OT.SERVING:
233236
raise ValueError(f"model_name can only be used with '{OT.SERVING}'")
237+
238+
# get the correct ID based on the project_id
239+
if (not project_id and not id) or (project_id and id):
240+
raise ValueError("Either --project-id or --id must be present")
241+
if project_id:
242+
p = Project(project_id)
243+
if method == OT.JOB:
244+
id = p.get_job_id()
245+
else:
246+
id = p.get_deployment_id()
247+
logger.info(f"Using project_id: {project_id}, found id: {id}")
234248

235249
if ":" not in init_folder:
236250
# this means we are uploading a traditonal folder that contains a `nbx_user.py` file
@@ -274,8 +288,8 @@ def upload_job_folder(
274288
if method == OT.SERVING:
275289
if serving_type not in SST.all():
276290
raise ValueError(f"Invalid serving_type: {serving_type}, should be one of {SST.all()}")
277-
if serving_type == SST.FASTAPI:
278-
logger.warning(f"You have selected serving_type='{SST.FASTAPI}', this assumes the object: {fn_name} is a FastAPI app")
291+
if serving_type == SST.FASTAPI or serving_type == SST.FASTAPI_V2:
292+
logger.warning(f"You have selected serving_type='{serving_type}', this assumes the object: {fn_name} is a FastAPI app")
279293
init_code = fn_name
280294
perform_tea = False
281295
load_operator = False
@@ -311,7 +325,8 @@ def upload_job_folder(
311325
"file_name": file_name,
312326
"fn_name": fn_name,
313327
"init_code": init_code,
314-
"load_operator": load_operator
328+
"load_operator": load_operator,
329+
"serving_type": serving_type,
315330
}
316331

317332
# create a requirements.txt file if it doesn't exist with the latest nbox version

nbox/lib/dist.py

+17-29
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77

88
import os
9+
import json
910

1011
import nbox.utils as U
1112
from nbox.utils import logger, lo
@@ -16,7 +17,7 @@
1617
from nbox.hyperloop.jobs.job_pb2 import Job
1718
from nbox.nbxlib.serving import serve_operator
1819

19-
from nbox.lmao import ExperimentConfig, LMAO_RM_PREFIX
20+
from nbox.lmao import ExperimentConfig, LiveConfig, LMAO_RM_PREFIX, LMAO_SERVING_FILE
2021
from nbox.projects import Project, ProjectState
2122

2223
# Manager
@@ -100,11 +101,11 @@ def run(self):
100101
elif run_tag.startswith(SILK_RM_PREFIX):
101102
# 27/03/2023: We are adding a new job type called Silk
102103
trace_id = run_tag[len(SILK_RM_PREFIX):]
103-
logger.info(f"Running trace: {trace_id}")
104104
kwargs = {"trace_id": trace_id}
105105

106106
# call the damn thing
107107
st = U.SimplerTimes.get_now_i64()
108+
logger.debug(f"Args: {args}\nKwargs: {kwargs}")
108109
out = self.op(*args, **kwargs)
109110

110111
# save the output to the relevant place, LMAO jobs are not saved to the relic
@@ -130,36 +131,23 @@ def run(self):
130131
tracer._rpc(f"RPC error in ending job {job_id}")
131132
U.hard_exit_program()
132133

133-
def serve(self, host: str = "0.0.0.0", port: int = 8000, *, model_name: str = None):
134+
def serve(self, **serve_kwargs):
134135
"""Run a serving API endpoint"""
135-
136-
run_tag = os.getenv("NBOX_RUN_METADATA", "")
137-
logger.info(f"Run Tag: {run_tag}")
138-
139-
# Unlike a run above where it is only going to be triggered once and all the metadata is already indexed
140-
# in the DB, this is not the case with deploy. But with deploy we can get away with something much simpler
141-
# which is using a more complicated ModelProto.metadata object
142-
# init_folder = U.env.NBOX_JOB_FOLDER("")
143-
# if not init_folder:
144-
# raise RuntimeError("NBOX_JOB_FOLDER not set")
145-
# if not os.path.exists(init_folder):
146-
# raise RuntimeError(f"NBOX_JOB_FOLDER {init_folder} does not exist")
147-
148-
# fp_bin = U.join(init_folder, "model_proto.msg")
149-
# serving_type = SST.NBOX
150-
# if fp_bin:
151-
# model_proto: ModelProto = read_file_to_binary(fp_bin, ModelProto())
152-
# logger.info(model_proto)
153-
# serving_type = model_proto.metadata.get("serving_type", SST.NBOX)
136+
# run_tag = os.getenv("NBOX_RUN_METADATA", "")
137+
# logger.info(f"Run Tag: {run_tag}")
138+
139+
# while we prepare to ship run_tag for serving, we can leverage the existing code to provide the exact
140+
# same functionality
141+
if os.path.exists(LMAO_SERVING_FILE):
142+
logger.info(f"Found {LMAO_SERVING_FILE}")
143+
with open(LMAO_SERVING_FILE, "r") as f:
144+
cfg = LiveConfig.from_json(f.read())
145+
ProjectState.project_id = cfg.get("project_id")
146+
ProjectState.serving_id = cfg.get("serving_id")
147+
logger.info(lo("Project data:", **ProjectState.data))
154148

155149
try:
156-
serve_operator(
157-
op_or_app = self.op,
158-
# serving_type = serving_type,
159-
host = host,
160-
port = port,
161-
model_name = model_name
162-
)
150+
serve_operator(op_or_app = self.op, **serve_kwargs)
163151
except Exception as e:
164152
U.log_traceback()
165153
logger.error(f"Failed to serve operator: {e}")

nbox/lmao/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from nbox.lmao.exp import Lmao, _lmaoConfig
22
from nbox.lmao.live import LmaoLive
33
from nbox.lmao.cli import LmaoCLI
4-
from nbox.lmao.common import get_lmao_stub, get_git_details, get_record, ExperimentConfig, LMAO_RM_PREFIX
4+
from nbox.lmao.common import get_lmao_stub, get_git_details, get_record, ExperimentConfig, LMAO_RM_PREFIX, LiveConfig, LMAO_SERVING_FILE
55
from nbox.lmao.proto import lmao_v2_pb2
66
from nbox.lmao.lmao_rpc_client import LMAO_Stub

nbox/lmao/common.py

+57-10
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,18 @@ def get_project(project_id: str) -> ProjectProto:
110110
))
111111
return project
112112

113+
114+
def resource_from_dict(d: Dict[str, Any]):
115+
return Resource(
116+
cpu = str(d["cpu"]),
117+
memory = str(d["memory"]),
118+
disk_size = str(d["disk_size"]),
119+
gpu = str(d["gpu"]),
120+
gpu_count = str(d["gpu_count"]),
121+
timeout = int(d["timeout"]),
122+
max_retries = int(d["max_retries"]),
123+
)
124+
113125
"""
114126
Common structs
115127
"""
@@ -159,24 +171,59 @@ def to_json(self):
159171
@classmethod
160172
def from_json(cls, json_str):
161173
d = json.loads(json_str)
162-
# print(d["resource"])
163-
d["resource"] = Resource(
164-
cpu = str(d["resource"]["cpu"]),
165-
memory = str(d["resource"]["memory"]),
166-
disk_size = str(d["resource"]["disk_size"]),
167-
gpu = str(d["resource"]["gpu"]),
168-
gpu_count = str(d["resource"]["gpu_count"]),
169-
timeout = int(d["resource"]["timeout"]),
170-
max_retries = int(d["resource"]["max_retries"]),
171-
)
174+
d["resource"] = resource_from_dict(d["resource"])
172175
return cls(**d)
173176

174177

178+
179+
class LiveConfig:
180+
def __init__(
181+
self,
182+
resource: Resource,
183+
cli_comm: str,
184+
enable_system_monitoring: bool = False,
185+
):
186+
self.resource = resource
187+
self.cli_comm = cli_comm
188+
self.enable_system_monitoring = enable_system_monitoring
189+
self.keys = set()
190+
191+
def to_dict(self):
192+
out = {
193+
"resource": mpb.message_to_dict(self.resource),
194+
"cli_comm": self.cli_comm,
195+
"enable_system_monitoring": self.enable_system_monitoring,
196+
}
197+
for k in self.keys:
198+
out[k] = getattr(self, k)
199+
return out
200+
201+
def to_json(self):
202+
return json.dumps(self.to_dict())
203+
204+
@classmethod
205+
def from_json(cls, json_str) -> 'LiveConfig':
206+
d = json.loads(json_str)
207+
d["resource"] = resource_from_dict(d["resource"])
208+
_cls = cls(**d)
209+
for k in d:
210+
if k not in ["resource", "cli_comm", "enable_system_monitoring"]:
211+
_cls.add(k, d[k])
212+
213+
def add(self, key, value):
214+
setattr(self, key, value)
215+
self.keys.add(key)
216+
217+
def get(self, key):
218+
return getattr(self, key)
219+
220+
175221
"""
176222
Constants
177223
"""
178224

179225
# do not change these it can become a huge pain later on
180226
LMAO_RELIC_NAME = "experiments"
181227
LMAO_RM_PREFIX = "NBXLmao-"
228+
LMAO_SERVING_FILE = "NBXLmaoServingCfg.json"
182229
LMAO_ENV_VAR_PREFIX = "NBX_LMAO_"

nbox/lmao/live.py

+3
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,6 @@ def log(self, y: Dict[str, Union[int, float, str]]):
165165
raise Exception("Server Error")
166166

167167
self._total_logged_elements += 1
168+
169+
170+
# helper class

nbox/nbxlib/serving.py

+12-3
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,15 @@
3232
class SupportedServingTypes():
3333
NBOX = "nbox"
3434
FASTAPI = "fastapi"
35+
FASTAPI_V2 = "fastapi_v2"
3536

3637
def all():
37-
return [SupportedServingTypes.NBOX, SupportedServingTypes.FASTAPI]
38+
return [getattr(SupportedServingTypes, x) for x in dir(SupportedServingTypes) if not x.startswith("__") and not x == "all"]
3839

3940

4041
def serve_operator(
4142
op_or_app: Operator,
42-
# serving_type: str,
43+
serving_type: str,
4344
host: str = "0.0.0.0",
4445
port: int = 8000,
4546
*,
@@ -88,10 +89,18 @@ async def who_are_you():
8889
for route, fn in get_fastapi_routes(op_or_app):
8990
app.add_api_route(route, fn, methods=["POST"], response_class=JSONResponse)
9091

91-
elif type(op_or_app) == FastAPI:
92+
elif serving_type == SupportedServingTypes.FASTAPI:
9293
# app.mount("/x", op_or_app)
94+
logger.info("Mounting FastAPI app at /x/...")
9395
app.add_api_route("/x", op_or_app, methods=["POST"], response_class=JSONResponse)
9496

97+
elif serving_type == SupportedServingTypes.FASTAPI_V2:
98+
# load all the paths from op_or_app and add them to the app except / and /metadata
99+
logger.info("Loading FastAPI app routes...")
100+
for route in op_or_app.routes:
101+
if route.path not in ["/", "/metadata"]:
102+
app.add_api_route(route.path, route.endpoint, methods=route.methods, response_class=JSONResponse)
103+
95104
# TODO: @yashbonde can we mount flask/django? https://fastapi.tiangolo.com/advanced/wsgi/
96105

97106
else:

nbox/nbxlib/tracer.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def init(self, run_data, start_heartbeat):
9191

9292
# start heartbeat in a different thread
9393
if start_heartbeat:
94+
self.thread_stop = threading.Event()
9495
self.thread = threading.Thread(target=self.hearbeat_thread_worker)
9596
self.thread.start()
9697

@@ -128,7 +129,9 @@ def __call__(self, node: Node, verbose: bool = False):
128129
def hearbeat_thread_worker(self):
129130
while True:
130131
self._rpc()
131-
sleep(self.heartbeat_every)
132+
for _ in range(self.hearbeat_thread_worker):
133+
# in future add a way to stop the thread
134+
sleep(1)
132135

133136
def stop(self):
134137
if not self.network_tracer:

0 commit comments

Comments
 (0)