codingmoh · codewithdark-git · Apr 23, 2025 · Apr 23, 2025 · Apr 23, 2025 · Apr 24, 2025
diff --git a/README.md b/README.md
@@ -8,20 +8,23 @@
 
 ---
 
-**Open Codex** is a fully open-source command-line AI assistant inspired by OpenAI Codex, supporting local language models like `phi-4-mini`.
+**Open Codex** is a fully open-source command-line AI assistant inspired by OpenAI Codex, supporting optimized local language models.
 
-No API key is required. Everything runs locally.
+No API key is required for the default model. Everything runs locally.
 
 Supports:
 - **One-shot mode**: `open-codex "list all folders"` -> returns shell command
-- 🧠 Local-only execution using supported OS models (currently `phi-4-mini`)
+- 🧠 Local-only execution using optimized models:
+  - phi-4-mini (default, no auth required)
+  - qwen1.5-7b-chat (auth required, enhanced for coding tasks)
 
 ---
 ## ✨ Features
 
 - Natural Language to Shell Command (via local models)
 - Works on macOS, Linux, and Windows (Python-based)
-- Confirmation before execution
+- Smart command validation and error handling
+- Real-time command output streaming
 - Add to clipboard / abort / execute prompt
 - One-shot interaction mode (interactive and function-calling coming soon)
 - Colored terminal output for better readability
@@ -76,13 +79,42 @@ Once installed, you can use the `open-codex` CLI globally.
 
 ### One-shot mode
 
+Basic usage with default model (phi-4-mini):
 ```bash
-open-codex "untar file abc.tar"
+open-codex "list all python files"
 ```
 
-✅ Codex suggests a shell command  
+Using Qwen model for enhanced coding tasks:
+```bash
+# First, set your Hugging Face token
+export HUGGINGFACE_TOKEN=your_token_here
+
+# Then use the Qwen model
+open-codex --model qwen-2.5-coder "find python files modified today"
+
+# Or provide token directly
+open-codex --model qwen-2.5-coder --hf-token your_token_here "your command"
+```
+
+✅ Codex suggests a validated shell command  
+✅ Shows real-time command output  
+✅ Provides clear error messages  
 ✅ Asks for confirmation / add to clipboard / abort  
-✅ Executes if approved
+✅ Executes if approved  
+
+### Model Overview
+
+#### phi-4-mini (Default)
+- Fast and lightweight
+- No authentication required
+- Optimized for quick shell commands
+- Best for basic file operations and system tasks
+
+#### qwen1.5-7b-chat
+- Enhanced for coding tasks
+- Requires Hugging Face authentication
+- Improved command validation
+- Better for complex development tasks
 
 ---
 

diff --git a/src/open_codex/agent_builder.py b/src/open_codex/agent_builder.py
@@ -1,14 +1,23 @@
 from importlib.resources import files
+from typing import Literal, Optional
 
 from open_codex.agents.phi_4_mini import AgentPhi4Mini
+from open_codex.agents.qwen_25_coder import AgentQwen25Coder
 from open_codex.interfaces.llm_agent import LLMAgent
 
-class AgentBuilder:
+ModelType = Literal["phi-4-mini", "qwen-2.5-coder"]
 
+class AgentBuilder:
     @staticmethod
-    def get_agent() -> LLMAgent:
+    def get_agent(model: ModelType = "phi-4-mini", hf_token: Optional[str] = None) -> LLMAgent:
         system_prompt = files("open_codex.resources").joinpath("prompt.txt").read_text(encoding="utf-8")
-        return AgentPhi4Mini(system_prompt=system_prompt)
+
+        if model == "phi-4-mini":
+            return AgentPhi4Mini(system_prompt=system_prompt)
+        elif model == "qwen-2.5-coder":
+            return AgentQwen25Coder(system_prompt=system_prompt, hf_token=hf_token)
+        else:
+            raise ValueError(f"Unsupported model: {model}")
 
     @staticmethod
     def read_file(file_path: str) -> str:

diff --git a/src/open_codex/agents/phi_4_mini.py b/src/open_codex/agents/phi_4_mini.py
@@ -1,6 +1,7 @@
 import contextlib
 import os
 import time
+import multiprocessing
 from typing import List, cast
 
 from huggingface_hub import hf_hub_download  # type: ignore
@@ -13,21 +14,19 @@ def download_model(self, model_filename: str,
                         repo_id: str, 
                         local_dir: str) -> str:
         print(
-            "\n🤖 Thank you for using Open Codex!\n"
-            "📦 For the first run, we need to download the model from Hugging Face.\n"
-            "⏬ This only happens once – it’ll be cached locally for future use.\n"
-            "🔄 Sit tight, the download will begin now...\n"
+            "\n🤖 Welcome to Open Codex!\n"
+            "📦 First run requires downloading the model.\n"
+            "⚡️ This model is optimized for quick responses.\n"
         )
-        print("\n⏬ Downloading model phi4-mini ...")
 
         start = time.time()
         model_path:str = hf_hub_download(
             repo_id=repo_id,
             filename=model_filename,
             local_dir=local_dir,
         )
-        end = time.time()
-        print(f"✅ Model downloaded in {end - start:.2f}s\n")
+        duration = time.time() - start
+        print(f"✅ Model downloaded ({duration:.1f}s)")
         return model_path
 
     def __init__(self, system_prompt: str):
@@ -36,41 +35,53 @@ def __init__(self, system_prompt: str):
         local_dir = os.path.expanduser("~/.cache/open-codex")
         model_path = os.path.join(local_dir, model_filename)
 
-        # check if the model is already downloaded
         if not os.path.exists(model_path):
-            # download the model
             model_path = self.download_model(model_filename, repo_id, local_dir)
         else:
-            print(f"We are locking and loading the model for you...\n")
+            print("🚀 Loading Phi-4-mini model...")
+
+        # Get optimal thread count for the system
+        n_threads = min(4, multiprocessing.cpu_count())
 
-        # suppress the stderr output from llama_cpp
-        # this is a workaround for the llama_cpp library
-        # which prints a lot of warnings and errors to stderr
-        # when loading the model
-        # this is a temporary solution until the library is fixed
         with AgentPhi4Mini.suppress_native_stderr():
-            lib_dir = os.path.join(os.path.dirname(__file__), "llama_cpp", "lib")
-            self.llm: Llama = Llama(
-                lib_path=os.path.join(lib_dir, "libllama.dylib"),
-                model_path=model_path)  
+          lib_path = os.path.join(os.path.dirname(__file__), "llama_cpp", "lib", "libllama.dylib")
+          llama_kwargs = {
+              "model_path": model_path,
+              "n_ctx": 2048,
+              "n_threads": n_threads,
+              "n_batch": 256,
+              "use_mlock": True,
+              "use_mmap": True,
+          }
 
-        self.system_prompt = system_prompt
+          if os.path.exists(lib_path):
+              llama_kwargs["lib_path"] = lib_path
+
+          self.llm: Llama = Llama(**llama_kwargs)
+          print("✨ Model ready!")
 
 
+        self.system_prompt = system_prompt
+
     def one_shot_mode(self, user_input: str) -> str:
         chat_history = [{"role": "system", "content": self.system_prompt}]
         chat_history.append({"role": "user", "content": user_input})
         full_prompt = self.format_chat(chat_history)
+
         with AgentPhi4Mini.suppress_native_stderr():
-            output_raw = self.llm(prompt=full_prompt, max_tokens=100, temperature=0.2, stream=False)
+            output_raw = self.llm(
+                prompt=full_prompt,
+                max_tokens=100,
+                temperature=0.2,
+                stream=False,
+                top_p=0.1,      # More focused responses
+                repeat_penalty=1.1  # Reduce repetition
+            )
 
-        # unfortuntely llama_cpp has a union type for the output
         output = cast(CreateCompletionResponse, output_raw)
-
-        assistant_reply : str = output["choices"][0]["text"].strip() 
-        return assistant_reply 
-
-
+        assistant_reply: str = output["choices"][0]["text"].strip()
+        return assistant_reply
+
     def format_chat(self, messages: List[dict[str, str]]) -> str:
         chat_prompt = ""
         for msg in messages:

diff --git a/src/open_codex/agents/qwen_25_coder.py b/src/open_codex/agents/qwen_25_coder.py
@@ -0,0 +1,124 @@
+import time
+import os
+import multiprocessing
+from typing import cast, Optional, List
+from llama_cpp import CreateCompletionResponse, Llama
+from open_codex.interfaces.llm_agent import LLMAgent
+import contextlib
+from huggingface_hub import hf_hub_download, login
+
+class AgentQwen25Coder(LLMAgent):
+    def download_model(self, model_filename: str,
+                        repo_id: str, 
+                        local_dir: str,
+                        token: Optional[str] = None) -> str:
+        print(
+            "\n🤖 Welcome to Open Codex!\n"
+            "📦 First run requires downloading the model.\n"
+            "⚡️ This model is optimized for quick responses.\n"
+        )
+
+        start = time.time()
+        model_path:str = hf_hub_download(
+            repo_id=repo_id,
+            filename=model_filename,
+            local_dir=local_dir,
+            token=token,
+            force_download=True,  # Force download to ensure the latest version
+        )
+        duration = time.time() - start
+        print(f"✅ Model downloaded ({duration:.1f}s)")
+        return model_path
+
+    def __init__(self, system_prompt: str, hf_token: Optional[str] = None):
+        model_filename = "Qwen2.5-Coder-1.5B-Instruct-F16.gguf"  # Using correct model filename
+        repo_id = "unsloth/Qwen2.5-Coder-1.5B-Instruct-GGUF"      # Using TheBloke's repository
+        local_dir = os.path.expanduser("~/.cache/open-codex")
+        model_path = os.path.join(local_dir, model_filename)
+
+        if not hf_token:
+            hf_token = os.environ.get("HUGGINGFACE_TOKEN")
+
+        if not os.path.exists(model_path):
+            model_path = self.download_model(model_filename, repo_id, local_dir, token=hf_token)
+        else:
+            print("🚀 Loading Qwen model...\n")
+
+        # Get optimal thread count for the system
+        n_threads = min(4, multiprocessing.cpu_count())
+
+        with AgentQwen25Coder.suppress_native_stderr():
+            self.llm: Llama = Llama(
+                model_path=model_path,
+                n_ctx=2048,     # Smaller context for faster responses
+                n_threads=n_threads,  # Use optimal thread count
+                n_batch=256,    # Balanced batch size
+                use_mlock=True, # Lock memory to prevent swapping
+                use_mmap=True,  # Use memory mapping for faster loading
+            )
+            print("✨ Model ready!")
+
+        self.system_prompt = system_prompt
+
+    def one_shot_mode(self, user_input: str) -> str:
+        chat_history = [
+            {"role": "system", "content": self.system_prompt},
+            {"role": "user", "content": "I need a shell command to find all python files"},
+            {"role": "assistant", "content": "find . -name \"*.py\""},
+            {"role": "user", "content": user_input}
+        ]
+        full_prompt = self.format_chat(chat_history)
+
+        with AgentQwen25Coder.suppress_native_stderr():
+            try:
+                output_raw = self.llm(
+                    prompt=full_prompt,
+                    max_tokens=100,    # Limit response length
+                    temperature=0.1,   # Lower temperature for more deterministic output
+                    top_p=0.1,        # Focus on most likely tokens
+                    top_k=10,         # Limit vocabulary for shell commands
+                    repeat_penalty=1.1,# Prevent repetition
+                    stop=["<|im_end|>", "<|im_start|>", "\n"],  # Stop at appropriate tokens
+                    stream=False
+                )
+
+                output = cast(CreateCompletionResponse, output_raw)
+                assistant_reply: str = output["choices"][0]["text"].strip()
+
+                # Clean up response
+                assistant_reply = assistant_reply.split('\n')[0].strip()
+                assistant_reply = assistant_reply.replace("<|im_end|>", "").strip()
+
+                # Basic validation of shell commands
+                if any(invalid_char in assistant_reply for invalid_char in ['<', '>', '|/']):
+                    return "find . -name \"*.py\""  # fallback to safe command
+
+                return assistant_reply
+
+            except Exception as e:
+                print(f"⚠️  Model error: {str(e)}")
+                return ""
+
+    def format_chat(self, messages: List[dict[str, str]]) -> str:
+        chat_prompt = ""
+        for msg in messages:
+            role_tag = "user" if msg["role"] == "user" else "assistant"
+            chat_prompt += f"<|{role_tag}|>\n{msg['content']}\n"
+        chat_prompt += "<|assistant|>\n"
+        return chat_prompt
+
+    @contextlib.contextmanager
+    @staticmethod
+    def suppress_native_stderr():
+        """
+        Redirect C‐level stderr (fd 2) into /dev/null, so llama.cpp logs vanish.
+        """
+        devnull_fd = os.open(os.devnull, os.O_WRONLY)
+        saved_stderr_fd = os.dup(2)
+        try:
+            os.dup2(devnull_fd, 2)
+            yield
+        finally:
+            os.dup2(saved_stderr_fd, 2)
+            os.close(devnull_fd)
+            os.close(saved_stderr_fd)