1
- # --- Braincell Orchestrator (Middleman Proxy) ---
1
+ # --- bitnet Orchestrator (Middleman Proxy) ---
2
2
from pydantic import BaseModel
3
3
4
4
from fastapi import FastAPI , HTTPException , Query , Depends
10
10
import time
11
11
import httpx
12
12
13
+ from typing import List
14
+ from pydantic import BaseModel , Field
15
+ from fastapi import HTTPException
16
+ import asyncio
17
+
13
18
# --- Server Process Management ---
14
19
# Each server instance is tracked by a unique (host, port) key
15
20
server_processes = {}
@@ -40,12 +45,11 @@ def _max_threads():
40
45
return os .cpu_count () or 1
41
46
42
47
async def initialize_server_endpoint (
43
- model : ModelEnum ,
44
- threads : int = Query (os .cpu_count () // 2 , gt = 0 , le = os .cpu_count ()),
48
+ threads : int = Query (1 , gt = 0 , le = os .cpu_count ()),
45
49
ctx_size : int = Query (2048 , gt = 0 ),
46
- port : int = Query (8081 , gt = 1023 ),
50
+ port : int = Query (8081 , gt = 8080 , le = 65535 ),
47
51
system_prompt : str = Query ("You are a helpful assistant." , description = "Unique system prompt for this server instance" ),
48
- n_predict : int = Query (4096 , gt = 0 , description = "Number of tokens to predict for the server instance" ),
52
+ n_predict : int = Query (256 , gt = 0 , description = "Number of tokens to predict for the server instance. " ),
49
53
temperature : float = Query (0.8 , gt = 0.0 , le = 2.0 , description = "Temperature for sampling" )
50
54
):
51
55
"""
@@ -71,7 +75,7 @@ async def initialize_server_endpoint(
71
75
raise HTTPException (status_code = 429 , detail = f"Cannot start server: would oversubscribe CPU threads (in use: { threads_in_use } , requested: { threads } , max: { max_threads } )" )
72
76
command = [
73
77
server_path ,
74
- '-m' , model . value ,
78
+ '-m' , "models/BitNet-b1.58-2B-4T/ggml- model-i2_s.gguf" ,
75
79
'-c' , str (ctx_size ),
76
80
'-t' , str (threads ),
77
81
'-n' , str (n_predict ),
@@ -96,7 +100,7 @@ async def initialize_server_endpoint(
96
100
raise HTTPException (status_code = 500 , detail = f"Server failed to start. Stderr: { stderr_output } " )
97
101
server_processes [key ] = proc
98
102
server_configs [key ] = {
99
- "model" : model . value ,
103
+ "model" : "models/BitNet-b1.58-2B-4T/ggml- model-i2_s.gguf" ,
100
104
"threads" : threads ,
101
105
"ctx_size" : ctx_size ,
102
106
"host" : host ,
@@ -241,43 +245,70 @@ def get_model_sizes():
241
245
242
246
class ChatRequest (BaseModel ):
243
247
message : str
244
- port : int
245
- # Optionally add user/session id, etc.
248
+ port : int = 8081
249
+ threads : int = 1
250
+ ctx_size : int = 2048
251
+ n_predict : int = 256
252
+ temperature : float = 0.8
246
253
247
- def chat_with_braincell (
254
+ def chat_with_bitnet (
248
255
chat : ChatRequest
249
256
):
250
257
"""
251
- Middleman endpoint: receives a chat message and forwards it to the specified braincell (llama server instance) by port.
252
- Returns the response from the braincell .
258
+ Middleman endpoint: receives a chat message and forwards it to the specified bitnet (llama server instance) by port.
259
+ Returns the response from the bitnet .
253
260
"""
254
261
host = "127.0.0.1"
255
262
key = (host , chat .port )
256
263
proc = server_processes .get (key )
257
264
cfg = server_configs .get (key )
258
265
if not (proc and proc .poll () is None and cfg ):
259
- raise HTTPException (status_code = 503 , detail = f"Braincell server not running on { host } :{ chat .port } . Initialize it first." )
266
+ raise HTTPException (status_code = 503 , detail = f"bitnet server not running on { host } :{ chat .port } . Initialize it first." )
260
267
server_url = f"http://{ host } :{ chat .port } /completion"
261
268
payload = {
262
- "prompt" : chat .message
269
+ "prompt" : chat .message ,
270
+ "threads" : chat .threads ,
271
+ "ctx_size" : chat .ctx_size ,
272
+ "n_predict" : chat .n_predict ,
273
+ "temperature" : chat .temperature
263
274
}
264
275
async def _chat ():
265
276
async with httpx .AsyncClient () as client :
266
277
try :
267
- response = await client .post (server_url , json = payload , timeout = 120 .0 )
278
+ response = await client .post (server_url , json = payload , timeout = 180 .0 )
268
279
response .raise_for_status ()
269
280
result_data = response .json ()
270
281
content = result_data .get ("content" , result_data )
271
282
return {"result" : content }
272
283
except httpx .TimeoutException :
273
- raise HTTPException (status_code = 504 , detail = "Request to braincell server timed out." )
284
+ raise HTTPException (status_code = 504 , detail = "Request to bitnet server timed out." )
274
285
except httpx .ConnectError :
275
- raise HTTPException (status_code = 503 , detail = f"Could not connect to braincell server at { server_url } . Is it running?" )
286
+ raise HTTPException (status_code = 503 , detail = f"Could not connect to bitnet server at { server_url } . Is it running?" )
276
287
except httpx .RequestError as e :
277
- raise HTTPException (status_code = 500 , detail = f"Error during request to braincell server: { str (e )} " )
288
+ raise HTTPException (status_code = 500 , detail = f"Error during request to bitnet server: { str (e )} " )
278
289
except httpx .HTTPStatusError as e :
279
290
error_detail = e .response .text or str (e )
280
- raise HTTPException (status_code = e .response .status_code , detail = f"Braincell server returned error: { error_detail } " )
291
+ raise HTTPException (status_code = e .response .status_code , detail = f"bitnet server returned error: { error_detail } " )
281
292
except Exception as e :
282
293
raise HTTPException (status_code = 500 , detail = f"Unexpected error during chat: { str (e )} " )
283
294
return _chat
295
+
296
+ class MultiChatRequest (BaseModel ):
297
+ requests : List [ChatRequest ]
298
+
299
+ async def multichat_with_bitnet (multichat : MultiChatRequest ):
300
+ async def run_chat (chat_req : ChatRequest ):
301
+ chat_fn = chat_with_bitnet (chat_req )
302
+ return await chat_fn ()
303
+ results = await asyncio .gather (* (run_chat (req ) for req in multichat .requests ), return_exceptions = True )
304
+ # Format results: if exception, return error message
305
+ formatted = []
306
+ for res in results :
307
+ if isinstance (res , Exception ):
308
+ if isinstance (res , HTTPException ):
309
+ formatted .append ({"error" : res .detail , "status_code" : res .status_code })
310
+ else :
311
+ formatted .append ({"error" : str (res )})
312
+ else :
313
+ formatted .append (res )
314
+ return {"results" : formatted }
0 commit comments