GetSoloTech · ddiddi · Mar 6, 2025 · Mar 6, 2025 · Mar 6, 2025
diff --git a/solo_server/commands/aid.py b/solo_server/commands/aid.py
@@ -0,0 +1,39 @@
+import sys
+import typer
+from litgpt import LLM
+from rich.console import Console
+
+console = Console()
+
+def query_llm(query: str):
+    # Check if the query exceeds 9000 characters
+    if len(query) > 9000:
+        typer.echo("Error: Your query exceeds the maximum allowed length of 9000 characters. It's over 9000!")
+        raise typer.Exit(1)
+
+    # Load the model and generate a response while showing a spinner
+    llm = LLM.load("Qwen/Qwen2.5-1.5B-Instruct")
+    with console.status("Generating response...", spinner="dots"):
+        response = llm.generate(query)
+    typer.echo(response)
+
+def interactive_mode():
+    console.print("Interactive Mode (type 'exit' or 'quit' to end):", style="bold green")
+    while True:
+        query_text = input(">> ")
+        if query_text.lower() in ("exit", "quit"):
+            break
+        query_llm(query_text)
+
+if __name__ == "__main__":
+    # If invoked with "@@" as the first argument, treat the rest as the query.
+    # Otherwise, launch interactive mode.
+    if len(sys.argv) > 1 and sys.argv[1] == "@@":
+        if len(sys.argv) > 2:
+            query_text = " ".join(sys.argv[2:])
+        else:
+            typer.echo("Enter your query (end with EOF / Ctrl-D):")
+            query_text = sys.stdin.read().strip()
+        query_llm(query_text)
+    else:
+        interactive_mode()
diff --git a/solo_server/solo.ensemble.yaml b/solo_server/solo.ensemble.yaml
@@ -0,0 +1,46 @@
+system_information:
+  operating_system: "Windows"
+  cpu: "AMD64 Family 23 Model 96 Stepping 1, AuthenticAMD"
+  cpu_cores: 8
+  memory: "15.42GB"
+  gpu:
+    vendor: "NVIDIA"
+    model: "NVIDIA GeForce GTX 1660 Ti"
+    memory: "6144MB"
+  compute_backend: "CUDA"
+
+server_options:
+  - name: "Ollama"
+    recommended: true
+    details: "Optimized for systems with NVIDIA GPUs and CUDA support. (Recommended for your system.)"
+  - name: "vLLM"
+    recommended: false
+    details: "High-performance inference engine, best suited for Linux environments."
+  - name: "Llama.cpp"
+    recommended: false
+    details: "Lightweight and cross-platform; runs efficiently on CPU-only systems."
+    - name: "LitGPT"
+    recommended: false
+    details: "Lightnight AI based PyTorch implementation."
+
+default_server: "Ollama"
+
+models:
+  solo-core-model:
+    model: "Qwen/Qwen2.5-1.5B-Instruct"
+    description: "Primary general-purpose model."
+  coding:
+    model: "qwen2.5-3b-coder"
+    description: "Optimized for code generation and programming tasks."
+  chat:
+    model: "deepseekr1-instruct-distill"
+    description: "Fine-tuned for conversational and chat applications."
+  robots:
+    model: "ottonomy-distill"
+    description: "Targeted for robotics and automation-related tasks."
+  healthcare_classification:
+    model: "palm"
+    description: "Optimized for healthcare data classification and analysis."
+  general:
+    model: "Qwen/Qwen2.5-1.5B-Instruct"
+    description: "Primary general-purpose model."