Added image generation functionality

Column01 · Column01 · commit 2858c2a36640 · 2025-02-08T20:10:11.000-05:00
diff --git a/README.md b/README.md
@@ -1,6 +1,10 @@
-# AI Function Agent
+# AI Function Agent (WIP!)
 
-A simple script using Qwen-Agent to access a locally running model for function/tool calling.
+A simple script using Qwen-Agent to for LLM inference based python function calling
+
+- Chat with your own local or remote AI assistant that can run python code implicitly
+- Build an AI mediated automation ecosystem tailored to your workload
+- Generate images using the included image generation tool and the [Lumina-Image-2.0](https://huggingface.co/Alpha-VLLM/Lumina-Image-2.0) model!
 
 ## Setup
 
@@ -15,7 +19,7 @@ A simple script using Qwen-Agent to access a locally running model for function/
         - `python -m venv aiAgentVenv`
     - Once created, it can be activated with:
         - `./aiAgentVenv/Scripts/activate`
-    - If you want to run the main LLM locally, we recommend using [llamma.cpp](https://github.com/ggerganov/llama.cpp/releases)'s OpenAI API [compatable server](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md). 
+    - If you want to run the main LLM locally, we recommend using [llama.cpp](https://github.com/ggerganov/llama.cpp/releases)'s OpenAI API [compatable server](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md).
 
 
 ### Modules
@@ -24,14 +28,15 @@ Installing modules in the correct order helps make sure everything installs with
 
 1. [Install torch, torchvision, and torchaudio](https://pytorch.org/get-started/locally/) with CUDA/ROCM if possible
 2. `pip install duckduckgo-search qwen-agent transformers usearch`
+3. `pip install git+https://github.com/zhuole1025/diffusers.git@lumina2` (fork of diffusers with new lumina2 image pipeline)
 
 ## Usage
 
 ### Backend Configuration
 
 You'll need to open and edit the [config.json](/config.json) file locally to have the correct URL and API key for your tool calling model. This can be a locally hosted model, or a remote model so long as the backend uses an OpenAI API compatable server.
 
-As stated above, for local use we recommend using [llamma.cpp](https://github.com/ggerganov/llama.cpp/releases)'s OpenAI API [compatable server](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md).
+As stated above, for local use we recommend using [llama.cpp](https://github.com/ggerganov/llama.cpp/releases)'s OpenAI API [compatable server](https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md).
 
 ### Running the Script
 
@@ -47,7 +52,7 @@ There are a few commands you can use in the prompt, type `help` to list them all
 
 ### Available tools
 
-Pre-made tools exist and can be found in the [functions](/functions) folder of the repo
+Pre-made tools exist and can be found in the [functions](/functions) folder of the repo.
 
 ### Making your own tooling
 
diff --git a/functions/system/image_gen.py b/functions/system/image_gen.py
@@ -0,0 +1,92 @@
+import os
+import random
+import time
+
+import numpy as np
+import torch
+from diffusers import Lumina2Text2ImgPipeline
+
+
+# Where to load the model
+if torch.cuda.is_available():
+    device = "cuda"
+    torch_dtype = torch.bfloat16
+else:
+    device = "cpu"
+    torch_dtype = torch.float32
+
+
+def gen_image(prompt: str, width: int = 512, height: int = 512, open: bool = True) -> str:
+
+    # Clamp H and W to 1024 (subject to change)
+    height = min(height, 1024)
+    width = min(width, 1024)
+    print("Loading image generation model...")
+    pipe = Lumina2Text2ImgPipeline.from_pretrained(
+        "Alpha-VLLM/Lumina-Image-2.0", torch_dtype=torch_dtype
+    )
+    pipe.to(device, torch_dtype)
+
+    # Optimizations
+    pipe.enable_vae_slicing()
+    pipe.enable_vae_tiling()
+    #pipe.enable_model_cpu_offload()
+
+    # Randomize the seed
+    MAX_SEED = np.iinfo(np.int32).max
+    seed = random.randint(0, MAX_SEED)
+
+    # Generate the image
+    image = pipe(
+        prompt,
+        height=height,
+        width=width,
+        guidance_scale=4.0,
+        num_inference_steps=30,
+        cfg_trunc_ratio=0.25,
+        cfg_normalization=True,
+        generator=torch.Generator().manual_seed(seed),
+    ).images[0]
+
+    os.makedirs("images", exist_ok=True)
+    f_name = f"images/image_{int(time.time())}.png"
+
+    image.save(f_name)
+
+    if open:
+        image.show()
+    return f"Image created at path: {f_name}"
+
+
+function = gen_image
+function_spec = {
+    "type": "function",
+    "function": {
+        "name": "gen_image",
+        "description": "Generates an image when requested by the user. If they did not specify a prompt, you MUST ask them for one before using this.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "prompt": {
+                    "type": "string",
+                    "description": "The user's prompt for the image generation model",
+                },
+                "width": {
+                    "type": "int",
+                    "description": "The user defined width of the image (or 512 if not specified)",
+                },
+                "height": {
+                    "type": "int",
+                    "description": "The user defined height of the image (or 512 if not specified)",
+                }
+            },
+            "required": ["prompt"],
+        },
+    },
+}
+
+
+if __name__ == "__main__":
+    while True:
+        prompt = input("Image Generation Prompt: ")
+        gen_image(prompt)
diff --git a/tool_calling.py b/tool_calling.py
@@ -164,7 +164,9 @@ def print_help():
     print("clear")
     print("    Clears the console and the chat history for a new convo")
     print("load")
-    print("    Load's the functions in the user folder. (can be enabled by default in the config)")
+    print(
+        "    Load's the functions in the user folder. (can be enabled by default in the config)"
+    )
 
 
 def main():
@@ -176,14 +178,17 @@ def main():
         }
     )
 
+    system_prompt = inspect.cleandoc(f"""
+                    You are JARVIS, a helpful and witty assistant. 
+                    You help a user with their tasks by using any of the functions available to you and your replies should always aim to be short but informative.
+                    When a user refers to themselves in a prompt to create or recall a memory in the first person, change it to refer to 'The User'.
+                    If you cannot answer a prompt based on information you have available, use your tools to find more information.
+                    The current date is {datetime.today().strftime('%Y-%m-%d %H:%M:%S')}
+                    """)
+
     system_message = {
         "role": "system",
-        "content": f"""You are JARVIS, a helpful and witty assistant. 
-        You help a user with their tasks by using any of the functions available to you and your replies should always aim to be short but informative.
-        When a user refers to themselves in a prompt to create or recall a memory in the first person, change it to refer to 'The User'.
-        If you cannot answer a prompt based on information you have available, use your tools to find more information.
-        The current date is {datetime.today().strftime('%Y-%m-%d %H:%M:%S')}
-        """,
+        "content": system_prompt,
     }
 
     print("Type 'help' for chat commands")