forked from llmware-ai/llmware
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlaunch_llmware_inference_client.py
59 lines (40 loc) · 2.73 KB
/
launch_llmware_inference_client.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
from llmware.models import ModelCatalog, LLMWareModel
from llmware.prompts import Prompt
# This example is intended to work in conjunction with the "launch_llmware_inference_server.py" example
# This shows how to set up the client, e.g., developer laptop, to quickly integrate the 'pop up' infererence server
# Before using this script, please implement the steps in "launch_llmware_inference_server.py" example
#
# Note: this is not intended to be a production-grade inference server
# It is intended for fast, simple, easy-to-use and platform-agnostic to rapidly test and develop LLM-based apps
# in a private-cloud, self-hosted environment.
def client_code_setup_inference_server(server_uri_string, server_secret_key):
# inputs were set up on the server
# -- example server_uri_string: concatenation of URL and PORT, e.g, http://11.123.456.789:8080
# -- example secret key: "demo-test"
# Step 1 - one-line 'setup' to register the new inference server
ModelCatalog().setup_custom_llmware_inference_server(server_uri_string, secret_key=server_secret_key)
# test query and context
query = "What is the total amount of the invoice?"
context = "Services Vendor Inc. \n100 Elm Street Pleasantville, NY \nTO Alpha Inc. 5900 1st Street " \
"Los Angeles, CA \nDescription Front End Engineering Service $5000.00 \n Back End Engineering" \
"Service $7500.00 \n Quality Assurance Manager $10,000.00 \n Total Amount $22,500.00 \n" \
"Make all checks payable to Services Vendor Inc. Payment is due within 30 days." \
"If you have any questions concerning this invoice, contact Bia Hermes."
# USE CASE # 1 = use LLMWareModel directly in prompt, like any other model
# -- "llmware-inference_server" is special reserved keyword that points to the registered inference server
print("\nupdate: Use Case #1 - loading into Prompt")
prompter = Prompt().load_model("llmware-inference-server")
output = prompter.prompt_main(query, context=context)
print("update: llm response - ", output)
# USE CASE # 2 - load LLMWareModel directly and invoke
print("\nupdate: Use Case #2 - loading as LLMWareModel")
llmware_gpt = LLMWareModel(model_name="llmware-inference-server")
response = llmware_gpt.inference(query, add_context=context)
print("update: llm response - ", response)
return output, response
if __name__ == "__main__":
# insert your uri & port
my_uri_string = "http://11.123.456.788:8080"
# insert your secret key [note: this can be any string, which must be exact-match on the server]
my_secret_key = "demo-test"
output, response = client_code_setup_inference_server(my_uri_string,my_secret_key)