Skip to content

Commit

Permalink
Experimenting with Llama-2 support
Browse files Browse the repository at this point in the history
  • Loading branch information
KillianLucas committed Aug 6, 2023
1 parent 21abd56 commit 7fa1fd9
Show file tree
Hide file tree
Showing 8 changed files with 157 additions and 43 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ Since generated code is executed in your local environment, it can interact with

**⚠️ Open Interpreter will ask for user confirmation before executing code.**

You can run `interpreter -y` or set `interpreter.no_confirm = True` to bypass this confirmation, in which case:
You can run `interpreter -y` or set `interpreter.auto_run = True` to bypass this confirmation, in which case:

- Be cautious when requesting commands that modify files or system settings.
- Watch Open Interpreter like a self-driving car, and be prepared to end the process by closing your terminal.
Expand Down
Binary file added diskcache-5.6.1-py3-none-any.whl
Binary file not shown.
7 changes: 7 additions & 0 deletions interpreter/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ def cli(interpreter):
'--yes',
action='store_true',
help='execute code without user confirmation')
parser.add_argument('-l',
'--local',
action='store_true',
help='run fully local with llama-2')
args = parser.parse_args()

if args.yes:
Expand All @@ -28,5 +32,8 @@ def cli(interpreter):
# Print message with newlines on either side (aesthetic choice)
print('', Markdown(confirm_mode_message), '')

if args.local:
interpreter.local = True

# Now run the chat method
interpreter.chat()
140 changes: 101 additions & 39 deletions interpreter/interpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from rich import print
from rich.markdown import Markdown

# Function schema for function-calling GPTs
# Function schema for GPT-4
function_schema = {
"name": "run_code",
"description":
Expand Down Expand Up @@ -54,6 +54,7 @@ def __init__(self):
self.temperature = 0.01
self.api_key = None
self.auto_run = False
self.local = False

# Get default system message
# here = os.path.abspath(os.path.dirname(__file__))
Expand All @@ -68,6 +69,11 @@ def __init__(self):
# (blocks are visual representation of messages on the terminal)
self.active_block = None

# Note: While Open Interpreter can use Llama, we will prioritize GPT-4.
# GPT-4 is faster, smarter, can call functions, and is all-around easier to use.
# This makes GPT-4 better aligned with Open Interpreters priority to be easy to use.
self.llama_instance = None

def cli(self):
# The cli takes the current instance of Interpreter,
# modifies it according to command line flags, then runs chat.
Expand All @@ -86,14 +92,21 @@ def get_info_for_system_message(self):
operating_system = os.name if os.name != 'nt' else os.uname().sysname
info += f"\n\n[User Info]\nName: {username}\nCWD: {current_working_directory}\nOS: {operating_system}"

# Open Procedures is an open-source database of tiny, structured coding tutorials.
# We can query it semantically and append relevant tutorials to our system message:

# Get a procedure that's relevant to the last message
query = str(self.messages[-1])
url = f"https://open-procedures.replit.app/search/?query={query}"
relevant_procedure = requests.get(url).json()["procedure"]
info += "\n\n[Related Recommended Procedure] (might be irrelevant)\n" + relevant_procedure
if not self.local:

# Open Procedures is an open-source database of tiny, structured coding tutorials.
# We can query it semantically and append relevant tutorials to our system message:

# Get a procedure that's relevant to the last message
query = str(self.messages[-1])
url = f"https://open-procedures.replit.app/search/?query={query}"
relevant_procedure = requests.get(url).json()["procedure"]
info += "\n\n[Related Recommended Procedure]\n" + relevant_procedure

elif self.local:

# Tell Llama-2 how to run code.
info += "\n\nTo run Python code, simply write a Python code block (i.e ```python) in markdown. When you close it with ```, it will be run. You'll then be given its output."

return info

Expand All @@ -105,7 +118,17 @@ def load(self, messages):
self.messages = messages

def chat(self, message=None, return_messages=False):
self.verify_api_key()

# Connect to an LLM
if not self.local:
# GPT-4
self.verify_api_key()
elif self.local:
# Llama-2
if self.llama_instance == None:
# Find or install LLama-2
from .llama_2 import llama_2
self.llama_instance = llama_2

# Message won't be None if we're passing one in via interpreter.chat(message)
# In that case, we respond non-interactivley and return:
Expand Down Expand Up @@ -163,29 +186,38 @@ def end_active_block(self):
self.active_block = None

def respond(self):

# Add relevant info to system_message
# (e.g. current working directory, username, os, etc.)
info = self.get_info_for_system_message()
system_message = self.system_message + "\n\n" + info

print("system_message:\n\n", system_message)

# Make OpenAI call
model = "gpt-4-0613"
response = openai.ChatCompletion.create(
model=model,
messages=tt.trim(self.messages,
model,
system_message=system_message),
functions=[function_schema],
stream=True,
temperature=self.temperature,
)

# Initialize
# Make LLM call
if not self.local:
# GPT-4
model = "gpt-4-0613"
response = openai.ChatCompletion.create(
model=model,
messages=tt.trim(self.messages,
model,
system_message=system_message),
functions=[function_schema],
stream=True,
temperature=self.temperature,
)
elif self.local:
# Llama-2
response = self.llama_instance.create_chat_completion(
messages=tt.trim(self.messages,
"gpt-3.5-turbo",
system_message=system_message),
stream=True,
temperature=self.temperature,
)

# Initialize message, function call trackers, and active block
self.messages.append({})
in_function_call = False
llama_function_call_finished = False
self.active_block = None

for chunk in response:
Expand All @@ -196,7 +228,15 @@ def respond(self):
self.messages[-1] = merge_deltas(self.messages[-1], delta)

# Check if we're in a function call
if "function_call" in self.messages[-1]:
if not self.local:
condition = "function_call" in self.messages[-1]
elif self.local:
# Since Llama-2 can't call functions, we just check if we're in a code block.
# This simply returns true if the number of "```" in the message is odd.
condition = self.messages[-1]["content"].count("```") % 2 == 1

if condition:
# We are in a function call.

# Check if we just entered a function call
if in_function_call == False:
Expand All @@ -216,18 +256,40 @@ def respond(self):
# Remember we're in a function_call
in_function_call = True

# Parse arguments and save to parsed_arguments, under function_call
if "arguments" in self.messages[-1]["function_call"]:
arguments = self.messages[-1]["function_call"]["arguments"]
new_parsed_arguments = parse_partial_json(arguments)

if new_parsed_arguments:
# Only overwrite what we have if it's not None (which means it failed to parse)
self.messages[-1]["function_call"]["parsed_arguments"] = new_parsed_arguments
# Now let's parse the function's arguments:

if not self.local:
# GPT-4
# Parse arguments and save to parsed_arguments, under function_call
if "arguments" in self.messages[-1]["function_call"]:
arguments = self.messages[-1]["function_call"]["arguments"]
new_parsed_arguments = parse_partial_json(arguments)
if new_parsed_arguments:
# Only overwrite what we have if it's not None (which means it failed to parse)
self.messages[-1]["function_call"]["parsed_arguments"] = new_parsed_arguments

elif self.local:
# Llama-2
# Get contents of current code block and save to parsed_arguments, under function_call
current_code_block = self.messages[-1]["content"].split("```")[-1]
arguments = {"language": "python", "code": current_code_block}
self.messages[-1]["function_call"]["parsed_arguments"] = arguments

else:
# We are not in a function call.

# Check if we just left a function call
if in_function_call == True:

if self.local:
# This is the same as when GPT-4 gives finish_reason as function_call.
# We have just finished a code block, so now we should run it.
llama_function_call_finished = True

# Remember we're not in a function_call
in_function_call = False

# If we're not in a function call and there's no active block,
# If there's no active block,
if self.active_block == None:

# Create a message block
Expand All @@ -237,8 +299,8 @@ def respond(self):
self.active_block.update_from_message(self.messages[-1])

# Check if we're finished
if chunk.choices[0].finish_reason:
if chunk.choices[0].finish_reason == "function_call":
if chunk.choices[0].finish_reason or llama_function_call_finished:
if chunk.choices[0].finish_reason == "function_call" or llama_function_call_finished:
# Time to call the function!
# (Because this is Open Interpreter, we only have one function.)

Expand Down Expand Up @@ -300,4 +362,4 @@ def respond(self):
if chunk.choices[0].finish_reason != "function_call":
# Done!
self.active_block.end()
return
return
41 changes: 41 additions & 0 deletions interpreter/llama_2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
import time
import subprocess

# Define the file name to search for
file_name = "llama-2-13b-chat.ggmlv3.q4_0.bin"

# Start the timer
start_time = time.time()

# Check for the file in each path
for path in [os.path.expanduser("~"), os.getcwd()]:
print(f"Searching for Llama-2 in {path} ...")
for root, _, files in os.walk(path):
if time.time() - start_time > 5:
print("Search timed out after 5 seconds.")
break
if file_name in files:
model_path = os.path.join(root, file_name)
print(f"Found Llama-2 at {model_path}")
break
else:
continue
break
else:
# If the file was not found, download it
download_path = os.path.expanduser("~") + "/llama-2/" + file_name
print(f"Llama-2 not found. Downloading it to {download_path} ...")
url = "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGML/resolve/main/llama-2-13b-chat.ggmlv3.q4_0.bin"
subprocess.run(f"curl -L '{url}' -o {download_path}", shell=True)
model_path = download_path

try:
from llama_cpp import Llama
except:
print("Downloading Llama-2 interface (llama-cpp-python)...")
subprocess.run(["pip", "install", "llama-cpp-python"])
from llama_cpp import Llama

# Initialize Llama-2
llama_2 = Llama(model_path=model_path)
Binary file not shown.
10 changes: 7 additions & 3 deletions tests/test_interpreter.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
import interpreter
interpreter.no_confirm = True
interpreter.auto_run = True
interpreter.temperature = 0

def test_delayed_exec():
interpreter.reset()
interpreter.chat("""Can you write a single block of code and run_code it that prints something, then delays 5 seconds, then prints something else? No talk just code. Thanks!""", return_messages=True)

def test_nested_loops_and_multiple_newlines():
interpreter.reset()
interpreter.chat("""Can you write a nested for loop in python and shell and run them? Also put 1-3 newlines between each line in the code. Thanks!""", return_messages=True)

def test_math():
interpreter.reset()
messages = interpreter.chat("""Please perform the calculation 27073*7397 then reply with just the integer answer, nothing else.""", return_messages=True)
messages = interpreter.chat("""Please perform the calculation 27073*7397 then reply with just the integer answer with no commas or anything, nothing else.""", return_messages=True)
assert messages[-1] == {'role': 'assistant', 'content': '200258981'}

def test_hello_world():
interpreter.reset()
messages = interpreter.chat("""Please reply with just the words "Hello, World!" and nothing else.""", return_messages=True)
assert messages == [{'role': 'user', 'content': 'Please reply with just the words "Hello, World!" and nothing else.'}, {'role': 'assistant', 'content': 'Hello, World!'}]
assert messages == [{'role': 'user', 'content': 'Please reply with just the words "Hello, World!" and nothing else. Do not run code.'}, {'role': 'assistant', 'content': 'Hello, World!'}]

def test_markdown():
interpreter.reset()
Expand Down
Binary file added typing_extensions-4.7.1-py3-none-any.whl
Binary file not shown.

0 comments on commit 7fa1fd9

Please sign in to comment.