Skip to content

Commit

Permalink
fix: remove deprecated models bagel & psyfighter1 (OpenRouterTeam#92)
Browse files Browse the repository at this point in the history
  • Loading branch information
sambarnes authored Apr 24, 2024
1 parent 5202823 commit d6e2819
Showing 1 changed file with 0 additions and 47 deletions.
47 changes: 0 additions & 47 deletions modal/runner/containers/vllm_unified.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,17 +125,6 @@ def __init__(self):
quantization="GPTQ",
)

_psyfighter = "TheBloke/Psyfighter-13B-GPTQ"
VllmContainer_JebCarterPsyfighter13B = _make_container(
"VllmContainer_JebCarterPsyfighter13B",
model_name=_psyfighter,
gpu=modal.gpu.A10G(count=1),
concurrent_inputs=4,
max_containers=5,
container_idle_timeout=2 * 60,
quantization="GPTQ",
)

_psyfighter2 = "TheBloke/LLaMA2-13B-Psyfighter2-GPTQ"
VllmContainer_KoboldAIPsyfighter2 = _make_container(
name="VllmContainer_KoboldAIPsyfighter2",
Expand All @@ -146,38 +135,6 @@ def __init__(self):
quantization="GPTQ",
)

_noromaid = "TheBloke/Noromaid-v0.1-mixtral-8x7b-Instruct-v3-GPTQ"
VllmContainer_NeverSleepNoromaidMixtral8x7B = _make_container(
name="VllmContainer_NeverSleepNoromaidMixtral8x7B",
model_name=_noromaid,
gpu=modal.gpu.A100(count=1, memory=40),
concurrent_inputs=4,
max_containers=1,
quantization="GPTQ",
dtype="float16", # vLLM errors when using dtype="auto" with this model
)

_bagel = "TheBloke/bagel-34b-v0.2-GPTQ"
VllmContainer_JohnDurbinBagel34B = _make_container(
name="VllmContainer_JohnDurbinBagel34B",
model_name=_bagel,
gpu=modal.gpu.A100(count=1, memory=40),
concurrent_inputs=4,
max_containers=1,
max_model_len=8_000, # Reduced from original 200k
quantization="GPTQ",
dtype="float16", # vLLM errors when using dtype="auto" with this model
)

_midnight_rose = "sambarnes/Midnight-Rose-70B-v2.0.3-GPTQ"
VllmContainer_MidnightRose70B = _make_container(
name="VllmContainer_MidnightRose70B",
model_name=_midnight_rose,
gpu=modal.gpu.H100(count=1),
concurrent_inputs=4,
max_containers=1,
quantization="GPTQ",
)

# A re-mapping of model names to their respective quantized models.
# From the outside, the model name is the original, but internally,
Expand All @@ -189,9 +146,5 @@ def __init__(self):
QUANTIZED_MODELS = {
"microsoft/phi-2": _phi2,
"Intel/neural-chat-7b-v3-1": _neural_chat,
"jebcarter/Psyfighter-13B": _psyfighter,
"KoboldAI/LLaMA2-13B-Psyfighter2": _psyfighter2,
"NeverSleep/Noromaid-v0.1-mixtral-8x7b-Instruct-v3": _noromaid,
"jondurbin/bagel-34b-v0.2": _bagel,
"sophosympatheia/Midnight-Rose-70B-v2.0.3": _midnight_rose,
}

0 comments on commit d6e2819

Please sign in to comment.