Skip to content

Commit

Permalink
Pass client_metadata around to track behavior
Browse files Browse the repository at this point in the history
  • Loading branch information
pseudotensor committed Oct 18, 2024
1 parent 048d389 commit 49355e9
Show file tree
Hide file tree
Showing 13 changed files with 69 additions and 12 deletions.
3 changes: 2 additions & 1 deletion gradio_utils/grclient.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,7 @@ def query_or_summarize_or_extract(
image_guidance_scale: float = 3.0,
image_num_inference_steps: int = 30,
visible_models: Union[str, int, list] = None,
client_metadata: str = '',
# don't use the below (no doc string stuff) block
num_return_sequences: int = None,
chat: bool = True,
Expand Down Expand Up @@ -582,7 +583,7 @@ def query_or_summarize_or_extract(
Note that unlike h2ogpt_key, this visible_models only applies to this running h2oGPT server,
and the value is not used to access the inference server.
If need a visible_models for an inference server, then use --model_lock and group together.
:param client_metadata:
:param asserts: whether to do asserts to ensure handling is correct
Returns: summary/answer: str or extraction List[str]
Expand Down
14 changes: 8 additions & 6 deletions openai_server/agent_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@ def get_ret_dict_and_handle_files(chat_result, chat_result_planning,
autogen_run_code_in_docker, autogen_stop_docker_executor, executor,
agent_venv_dir, agent_code_writer_system_message, agent_system_site_packages,
system_message_parts,
autogen_code_restrictions_level, autogen_silent_exchange):
autogen_code_restrictions_level, autogen_silent_exchange,
client_metadata=''):
# DEBUG
if agent_verbose:
print("chat_result:", chat_result_planning)
Expand Down Expand Up @@ -221,8 +222,8 @@ def get_ret_dict_and_handle_files(chat_result, chat_result_planning,
internal_file_names_norm_paths = [os.path.normpath(f) for f in internal_file_names]
# filter out internal files for RAG case
file_list = [f for f in file_list if os.path.normpath(f) not in internal_file_names_norm_paths]
if agent_verbose:
print("file_list:", file_list)
if agent_verbose or client_metadata:
print(f"client_metadata: {client_metadata} file_list: {file_list}", flush=True)

image_files, non_image_files = identify_image_files(file_list)
# keep no more than 10 image files among latest files created
Expand Down Expand Up @@ -266,6 +267,7 @@ def cleanup_response(x):
if file_ids:
ret_dict.update(dict(file_ids=file_ids))
if chat_result and hasattr(chat_result, 'chat_history'):
print(f"client_metadata: {client_metadata}: chat history: {len(chat_result.chat_history)}", file=sys.stderr)
ret_dict.update(dict(chat_history=chat_result.chat_history))
if chat_result and hasattr(chat_result, 'cost'):
if hasattr(chat_result_planning, 'cost'):
Expand Down Expand Up @@ -294,13 +296,13 @@ def cleanup_response(x):
if not summary and len(chat_result.chat_history) >= 3:
summary = cleanup_response(chat_result.chat_history[-3]['content'])
if summary:
print("Made summary from chat history: %s" % summary, file=sys.stderr)
print(f"Made summary from chat history: {summary} : {client_metadata}", file=sys.stderr)
chat_result.summary = summary
else:
print("Did NOT make and could not make summary", file=sys.stderr)
print(f"Did NOT make and could not make summary {client_metadata}", file=sys.stderr)
chat_result.summary = 'No summary or chat history available'
else:
print("Did NOT make any summary", file=sys.stderr)
print(f"Did NOT make any summary {client_metadata}", file=sys.stderr)
chat_result.summary = 'No summary available'

if chat_result:
Expand Down
14 changes: 12 additions & 2 deletions openai_server/autogen_2agent_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,10 @@ def run_autogen_2agent(query=None,
agent_system_site_packages=None,
autogen_code_restrictions_level=None,
autogen_silent_exchange=None,
client_metadata=None,
agent_verbose=None) -> dict:
if client_metadata:
print("BEGIN 2AGENT: client_metadata: %s" % client_metadata, flush=True)
assert agent_type in ['autogen_2agent', 'auto'], "Invalid agent_type: %s" % agent_type
# raise openai.BadRequestError("Testing Error Handling")
# raise ValueError("Testing Error Handling")
Expand Down Expand Up @@ -120,7 +123,9 @@ def code_writer_terminate_func(msg):

code_writer_kwargs = dict(system_message=system_message,
llm_config={'timeout': autogen_timeout,
'extra_body': dict(enable_caching=enable_caching),
'extra_body': dict(enable_caching=enable_caching,
client_metadata=client_metadata,
),
"config_list": [{"model": model,
"api_key": api_key,
"base_url": base_url,
Expand Down Expand Up @@ -198,6 +203,8 @@ def code_writer_terminate_func(msg):
else:
chat_result = code_executor_agent.initiate_chat(**chat_kwargs)

if client_metadata:
print("END 2AGENT: client_metadata: %s" % client_metadata, flush=True)
ret_dict = get_ret_dict_and_handle_files(chat_result,
chat_result_planning,
model,
Expand All @@ -206,6 +213,9 @@ def code_writer_terminate_func(msg):
agent_venv_dir, agent_code_writer_system_message,
agent_system_site_packages,
system_message_parts,
autogen_code_restrictions_level, autogen_silent_exchange)
autogen_code_restrictions_level, autogen_silent_exchange,
client_metadata=client_metadata)
if client_metadata:
print("END FILES FOR 2AGENT: client_metadata: %s" % client_metadata, flush=True)

return ret_dict
14 changes: 13 additions & 1 deletion openai_server/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ class H2oGPTParams(BaseModel):
video_file: Union[str, list] | None = None

model_lock: dict | None = None
client_metadata: str | None = ''

response_format: Optional[ResponseFormat] = Field(
default=None,
Expand Down Expand Up @@ -604,7 +605,10 @@ async def openai_chat_completions(request: Request,
request_data_dict['authorization'] = authorization

str_uuid = str(uuid.uuid4())
logging.info(f"Chat Completions request {str_uuid}: {len(request_data_dict)} items")
if 'client_metadata' in request_data_dict:
logging.info(f"Chat Completions request {str_uuid}: {len(request_data_dict)} items client_metadata: {request_data_dict['client_metadata']}")
else:
logging.info(f"Chat Completions request {str_uuid}: {len(request_data_dict)} items")

# don't allow tool use with guided_json for now
if request_data_dict['guided_json'] and request_data_dict.get('tools'):
Expand Down Expand Up @@ -650,9 +654,13 @@ async def generator():
try:
async for resp1 in astream_chat_completions(request_data_dict, stream_output=True):
if await request.is_disconnected():
if 'client_metadata' in request_data_dict:
logging.info(f"Chat Completions disconnected {str_uuid}: client_metadata: {request_data_dict['client_metadata']}")
return

yield {"data": json.dumps(resp1)}
if 'client_metadata' in request_data_dict:
logging.info(f"Chat Completions streaming finished {str_uuid}: client_metadata: {request_data_dict['client_metadata']}")
except Exception as e1:
print(traceback.format_exc())
# Instead of raising an HTTPException, we'll yield a special error message
Expand All @@ -665,6 +673,8 @@ async def generator():
}
}
print(error_response)
if 'client_metadata' in request_data_dict:
logging.info(f"Chat Completions error {str_uuid}: client_metadata: {request_data_dict['client_metadata']}: {error_response}")
yield {"data": json.dumps(error_response)}
# After yielding the error, we'll close the connection
return
Expand All @@ -680,6 +690,8 @@ async def generator():
if await request.is_disconnected():
return
response = resp
if 'client_metadata' in request_data_dict:
logging.info(f"Chat Completions non-streaming finished {str_uuid}: client_metadata: {request_data_dict['client_metadata']}")
return JSONResponse(response)
except Exception as e:
traceback.print_exc()
Expand Down
2 changes: 2 additions & 0 deletions src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,8 @@ def run_cli( # for local function:
guided_grammar=None,
guided_whitespace_pattern=None,

client_metadata=None,

# for evaluate kwargs
captions_model=None,
caption_loader=None,
Expand Down
1 change: 1 addition & 0 deletions src/client_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ def get_args(prompt, prompt_type=None, chat=False,
guided_whitespace_pattern=None,

model_lock=None,
client_metadata=None,
)
diff = 0
from evaluate_params import eval_func_param_names
Expand Down
1 change: 1 addition & 0 deletions src/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,7 @@ def run_eval( # for local function:
guided_choice=None,
guided_grammar=None,
guided_whitespace_pattern=None,
client_metadata=None,

# for evaluate kwargs:
captions_model=None,
Expand Down
1 change: 1 addition & 0 deletions src/evaluate_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@
"guided_whitespace_pattern",

"model_lock",
"client_metadata",
]
)

Expand Down
20 changes: 19 additions & 1 deletion src/gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ def main(

enable_heap_analytics: bool = True,
heap_app_id: str = "1680123994",
client_metadata: str = '',

cert_lookup_directory: str = "/etc/ssl/more-certs",
):
Expand Down Expand Up @@ -2019,6 +2020,7 @@ def main(
guided_choice,
guided_grammar,
guided_whitespace_pattern,
client_metadata,

verbose,
)
Expand Down Expand Up @@ -2573,6 +2575,7 @@ def evaluate(
guided_whitespace_pattern,

model_lock, # not really used by evaluate, just pure API
client_metadata,

# END NOTE: Examples must have same order of parameters
captions_model=None,
Expand Down Expand Up @@ -2675,6 +2678,8 @@ def evaluate(

stream_map=None,
):
if client_metadata:
print(f"evaluate start client_metadata: {client_metadata}", flush=True)
# ensure passed these
assert concurrency_count is not None
assert memory_restriction_level is not None
Expand Down Expand Up @@ -2778,6 +2783,8 @@ def evaluate(
yield dict(response=response, sources=[], save_dict=save_dict, llm_answers=dict(response_raw=''),
response_no_refs="Generated image for %s" % instruction,
sources_str="", prompt_raw=instruction)
if client_metadata:
print(f"evaluate finish image client_metadata: {client_metadata}", flush=True)
return

no_model_msg = "Please choose a base model with --base_model (CLI) or load in Models Tab (gradio).\n" \
Expand Down Expand Up @@ -3416,6 +3423,7 @@ def evaluate(
guided_choice=guided_choice,
guided_grammar=guided_grammar,
guided_whitespace_pattern=guided_whitespace_pattern,
client_metadata=client_metadata,

json_vllm=json_vllm,

Expand Down Expand Up @@ -3459,8 +3467,12 @@ def evaluate(
# so nothing to give to LLM), then slip through and ask LLM
# Or if llama/gptj, then just return since they had no response and can't go down below code path
# don't clear torch cache here, delays multi-generation, and bot(), all_bot(), and evaluate_nochat() do it
if client_metadata:
print(f"evaluate finish run_qa_db client_metadata: {client_metadata}", flush=True)
return

if client_metadata:
print(f"evaluate middle non-langchain client_metadata: {client_metadata}", flush=True)
# NOT LANGCHAIN PATH, raw LLM
# restrict instruction + , typically what has large input
prompt, \
Expand Down Expand Up @@ -3929,6 +3941,7 @@ def evaluate(
guided_whitespace_pattern=guided_whitespace_pattern,

model_lock=None, # already set
client_metadata=client_metadata,
)
assert len(set(list(client_kwargs.keys())).symmetric_difference(eval_func_param_names)) == 0
api_name = '/submit_nochat_api' # NOTE: like submit_nochat but stable API for string dict passing
Expand Down Expand Up @@ -4058,6 +4071,8 @@ def evaluate(
# if not streaming, only place yield should be done
yield dict(response=response, sources=sources, save_dict=save_dict, llm_answers=dict(response_raw=response_raw),
response_no_refs=response, sources_str='', prompt_raw=prompt)
if client_metadata:
print(f"evaluate finish inference server client_metadata: {client_metadata}", flush=True)
return
else:
assert not inference_server, "inference_server=%s not supported" % inference_server
Expand Down Expand Up @@ -4294,6 +4309,8 @@ def evaluate(
if verbose:
print('Post-Generate: %s decoded_output: %s' % (
str(datetime.now()), len(decoded_output) if decoded_output else -1), flush=True)
if client_metadata:
print(f"evaluate HF finish client_metadata: {client_metadata}", flush=True)


inputs_list_names = list(inspect.signature(evaluate).parameters)
Expand Down Expand Up @@ -4514,6 +4531,7 @@ def get_generate_params(model_lower,
guided_choice,
guided_grammar,
guided_whitespace_pattern,
client_metadata,

verbose,
):
Expand Down Expand Up @@ -4764,8 +4782,8 @@ def mean(a):""", ''] + params_list,
guided_choice,
guided_grammar,
guided_whitespace_pattern,

None, # model_lock, only client, don't need default value
client_metadata,
]
# adjust examples if non-chat mode
if not chat:
Expand Down
7 changes: 7 additions & 0 deletions src/gpt_langchain.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,6 +980,7 @@ class GradioInference(AGenerateStreamFirst, H2Oagenerate, LLM):
guided_choice: Any = None
guided_grammar: Any = None
guided_whitespace_pattern: Any = None
client_metadata: Any = ''

async_sem: Any = None
count_input_tokens: Any = 0
Expand Down Expand Up @@ -1146,6 +1147,7 @@ def setup_call(self, prompt):
guided_choice=self.guided_choice,
guided_grammar=self.guided_grammar,
guided_whitespace_pattern=self.guided_whitespace_pattern,
client_metadata=self.client_metadata,
)
api_name = '/submit_nochat_api' # NOTE: like submit_nochat but stable API for string dict passing
# let inner gradio count input tokens
Expand Down Expand Up @@ -7013,6 +7015,7 @@ def _run_qa_db(query=None,
guided_choice=None,
guided_grammar=None,
guided_whitespace_pattern=None,
client_metadata=None,

json_vllm=False,

Expand Down Expand Up @@ -7044,6 +7047,8 @@ def _run_qa_db(query=None,
:param answer_with_sources
:return:
"""
if client_metadata:
print("RUNQADB START client_metadata: %s" % client_metadata, flush=True)
t_run = time.time()
if LangChainAgent.SMART.value in langchain_agents:
# FIXME: support whatever model/user supports
Expand Down Expand Up @@ -7480,6 +7485,8 @@ def _run_qa_db(query=None,
print('response: %s' % ret)
yield dict(prompt_raw=prompt, response=ret, sources=sources, num_prompt_tokens=num_prompt_tokens,
llm_answers=llm_answers, response_no_refs=ret_no_refs, sources_str=sources_str)
if client_metadata:
print("RUNQADB FINISH client_metadata: %s" % client_metadata, flush=True)
return


Expand Down
1 change: 1 addition & 0 deletions src/gradio_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -1565,6 +1565,7 @@ def show_sys(x):
info="prompt to remind LLM to use json code when no schema",
value=kwargs[
'json_code2_post_prompt_reminder'])
client_metadata = gr.Textbox(value='', visible=False)

def show_llava(x):
return x
Expand Down
2 changes: 1 addition & 1 deletion src/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "5e55aa69ee0113fcc8b55269af36254b5109ec02"
__version__ = "048d3891525bc87788e4241291e647a35a675bf2"
1 change: 1 addition & 0 deletions tests/test_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ def run_eval1(cpu=False, bits=None, base_model='h2oai/h2ogpt-oig-oasst1-512-6_9b
'guided_choice': '',
'guided_grammar': '',
'guided_whitespace_pattern': None,
'client_metadata': None,
}
if cpu and bits == 32:
expected1.update({'image_audio_loaders': np.array([], dtype=object)})
Expand Down

0 comments on commit 49355e9

Please sign in to comment.