Skip to content

Commit

Permalink
Add OpenAI backend to the CI test (#869)
Browse files Browse the repository at this point in the history
  • Loading branch information
Ying1123 authored Aug 1, 2024
1 parent 9e8d2c7 commit 4075677
Show file tree
Hide file tree
Showing 16 changed files with 30 additions and 38 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/pr-e2e-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,13 @@ jobs:
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
pip install --upgrade transformers
- name: Launch server and run benchmark
- name: Test OpenAI Backend
run: |
export OPENAI_API_KEY=secrets.OPENAI_API_KEY
cd sglang/test/lang
python3 test_openai_backend.py
- name: Benchmark Serving
run: |
python3 -m sglang.launch_server --model /home/lmzheng/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-docker.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: publish docker
name: Release Docker
on:
push:
branches:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-fake-tag.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: fake tag
name: Release Fake Tag
on:
push:
branches:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-github.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: release tag
name: Release GitHub
on:
workflow_dispatch:
jobs:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/release-pypi.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: publish to pypi
name: Release PyPI
on:
push:
branches:
Expand Down
Binary file removed assets/llama_7b.jpg
Binary file not shown.
Binary file removed assets/mixtral_8x7b.jpg
Binary file not shown.
22 changes: 0 additions & 22 deletions docs/en/benchmark_results.md

This file was deleted.

2 changes: 1 addition & 1 deletion docs/en/model_support.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ To port a model from vLLM to SGLang, you can compare these two files [SGLang LLa
- Change `forward()` functions, and add `input_metadata`.
- Add `EntryClass` at the end.
- Test correctness by comparing the final logits and outputs of the two following commands:
- `python3 playground/reference_hf.py --model [new model]`
- `python3 scripts/playground/reference_hf.py --model [new model]`
- `python3 -m sglang.bench_latency --model [new model] --correct --output-len 16 --trust-remote-code`
- Update [Supported Models](https://github.com/sgl-project/sglang/tree/main?tab=readme-ov-file#supported-models) at [README](../README.md).
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file removed test/__init__.py
Empty file.
6 changes: 6 additions & 0 deletions test/lang/test_bind_cache.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
"""
Usage:
python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000
python3 test_bind_cache.py
"""

import unittest

import sglang as sgl
Expand Down
4 changes: 3 additions & 1 deletion test/lang/test_srt_backend.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
Usage:
python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000
python3 test_srt_backend.py
"""

import json
Expand Down
18 changes: 9 additions & 9 deletions test/lang/test_tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def few_shot_qa(s, question):
s += "A:" + sgl.gen("answer", stop="\n")

tracer = few_shot_qa.trace()
print(tracer.last_node.print_graph_dfs() + "\n")
# print(tracer.last_node.print_graph_dfs() + "\n")

def test_select(self):
@sgl.function
Expand All @@ -26,7 +26,7 @@ def capital(s):
s += "It is a city" + sgl.gen("description", stop=".")

tracer = capital.trace()
print(tracer.last_node.print_graph_dfs() + "\n")
# print(tracer.last_node.print_graph_dfs() + "\n")

def test_raise_warning(self):
@sgl.function
Expand Down Expand Up @@ -66,11 +66,11 @@ def tip_suggestion(s, topic):
s += "In summary" + sgl.gen("summary")

compiled = tip_suggestion.compile()
compiled.print_graph()
# compiled.print_graph()

sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
state = compiled.run(topic="staying healthy")
print(state.text() + "\n")
# print(state.text() + "\n")

states = compiled.run_batch(
[
Expand All @@ -80,8 +80,8 @@ def tip_suggestion(s, topic):
],
temperature=0,
)
for s in states:
print(s.text() + "\n")
# for s in states:
# print(s.text() + "\n")

def test_role(self):
@sgl.function
Expand All @@ -95,7 +95,7 @@ def multi_turn_chat(s):
backend.chat_template = get_chat_template("llama-2-chat")

compiled = multi_turn_chat.compile(backend=backend)
compiled.print_graph()
# compiled.print_graph()

def test_fork(self):
@sgl.function
Expand All @@ -118,10 +118,10 @@ def tip_suggestion(s):
s += "In summary" + sgl.gen("summary")

tracer = tip_suggestion.trace()
print(tracer.last_node.print_graph_dfs())
# print(tracer.last_node.print_graph_dfs())

a = tip_suggestion.run(backend=sgl.OpenAI("gpt-3.5-turbo-instruct"))
print(a.text())
# print(a.text())


if __name__ == "__main__":
Expand Down

0 comments on commit 4075677

Please sign in to comment.