Trouble creating chat with custom agent. #3005

kolwea · 2024-06-23T19:54:50Z

kolwea
Jun 23, 2024

HI im pretty new to python but I'm attempting to follow the tutorial for adding an open source model for usage. I have the following file and I believe I'm registering the model before attempting to use however I'm getting the error below:

The model:

# custom client with custom model loader
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, AutoModelForSequenceClassification
import random
from types import SimpleNamespace


class BertModelClient:
    def __init__(self, config, **kwargs):
        print(f"CustomModelClient config: {config}")
        self.device = config.get("device", "cpu")
        self.model = AutoModelForCausalLM.from_pretrained(config["model"]).to(self.device)
        self.model_name = config["model"]
        self.tokenizer = AutoTokenizer.from_pretrained(config["model"], use_fast=False)
        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
        self.label_mapping = {
            0: "Algebra",
            1: "Counting & Probability",
            2: "Geometry",
            3: "Intermediate Algebra",
            4: "Number Theory",
            5: "Prealgebra",
            6: "Precalculus"
        }

        # params are set by the user and consumed by the user since they are providing a custom model
        # so anything can be done here
        gen_config_params = config.get("params", {})
        self.max_length = gen_config_params.get("max_length", 256)

        print(f"Loaded model {config['model']} to {self.device}")

    def create(self, params):
        if params.get("stream", False) and "messages" in params:
            raise NotImplementedError("Local models do not support streaming.")
        else:
            num_of_responses = params.get("n", 1)
            text = params.get("text","What is 5+5?")
            response = SimpleNamespace()
    
            # inputs = self.tokenizer.apply_chat_template(
            #     params["messages"], return_tensors="pt", add_generation_prompt=True
            # ).to(self.device)
            inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
            inputs = {key: val.to(self.device) for key, val in inputs.items()}
            inputs_length = inputs.shape[-1]


            # add inputs_length to max_length
            max_length = self.max_length + inputs_length
            generation_config = GenerationConfig(
                max_length=max_length,
                eos_token_id=self.tokenizer.eos_token_id,
                pad_token_id=self.tokenizer.eos_token_id,
            )

            response.choices = []
            response.model = self.model_name
            
            with torch.no_grad():
                outputs = self.model(**inputs)
                 
            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
            predicted_class = torch.argmax(predictions, dim=-1).item()
            
            choice = SimpleNamespace()
            choice.message = SimpleNamespace()
            choice.message.content = predicted_class
            choice.message.function_call = None
            response.choices.append(choice)
            
            
            return response

            for _ in range(num_of_responses):
                outputs = self.model.generate(inputs, generation_config=generation_config)
                # Decode only the newly generated text, excluding the prompt
                text = self.tokenizer.decode(outputs[0, inputs_length:])
                choice = SimpleNamespace()
                choice.message = SimpleNamespace()
                choice.message.content = text
                choice.message.function_call = None
                response.choices.append(choice)

            return response

    def message_retrieval(self, response):
        """Retrieve the messages from the response."""
        choices = response.choices
        return [choice.message.content for choice in choices]

    def cost(self, response) -> float:
        """Calculate the cost of the response."""
        response.cost = 0
        return 0
    
    def predictMathType(self, text):
        try:
            inputs = self.tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=512)
            inputs = {key: val.to(self.device) for key, val in inputs.items()}

            with torch.no_grad():
                outputs = self.model(**inputs)

            predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
            predicted_class = torch.argmax(predictions, dim=-1).item()
            return self.label_mapping[predicted_class]
        except Exception as e:
            predicted_class = random.randint(0,6)
            return self.label_mapping[predicted_class]

    @staticmethod
    def get_usage(response):
        # returns a dict of prompt_tokens, completion_tokens, total_tokens, cost, model
        # if usage needs to be tracked, else None
        return {}

Autogen config:

import torch
# from ipynb.fs.full.bert import BertModelClient


llm_config = {
    "config_list": [{   
        "model" : "gemma:2b", 
        "api_key": "EMPTY", 
        "max_tokens":1000,
        "base_url":f"http://localhost:11434/v1"},
    {
        "model" : "../inputs/bert-finetuned-math-prob-classification",
        "device"  'cuda' if torch.cuda.is_available() else 'cpu'
        "api_key": "EMPTY", 
        "api_type": "bert-math",
        "model_client_cls":"BertModelClient",
        "n": 1,
        "params":{
            "max_length":512,
        },
        "max_tokens":1000,
        # "base_url":f"http://localhost:11434/v1"
    },
    {
        "model": "gpt-4-32k",
        "api_key": "<your Azure OpenAI API key here>",
        "base_url": "<your Azure OpenAI API base here>",
        "api_type": "azure",
        "api_version": "2024-02-15-preview"
    }],
}

Chat Setup

import autogen

# passes in questions
user_proxy = autogen.UserProxyAgent(
    name="User_proxy",
    system_message="A human admin.",
    code_execution_config=False,
    human_input_mode="TERMINATE",
    is_termination_msg=lambda x: x.get("content", "") and x.get("content", "").rstrip().endswith("TERMINATE"),
)

# bert -> classifies the question type
classifier = autogen.AssistantAgent(
    name="Classifier",
    system_message="""You an advanced AI acting as the an mathematics procedural assistant;
    you will take a given math problem written in LaTex format and make a guess at the type of math problem.
                    """,
    llm_config=llm_config,
    description="""This is a math problem procedure assistant who is capable of taking a problem in LaTex format; and suggesting the appropriate category.
                """
)

procedure = autogen.AssistantAgent(
    name="Procedure",
    system_message="""You an advanced AI acting as the an mathematics procedural assistant;
    you will take a given math problem written in LaTex format, create a high fidelity summary of what the problem is asking you to achieve;
    then break the task down into a list of individual steps that must be completed to solve the task.
                    """,
    llm_config=llm_config,
    description="""This is a math problem procedure assistant who is capable of taking a problem in LaTex format; summarizing the question; and creating a procedure for solving it.
                   The procedure assistant is open to any comments and recommendations for improving the summary and procedure.
                   Ask the procedure assistant to iterate every time when there is a new change recommendation from editor.
                """
)

# deepseek vs opencode for solutioning
solver = autogen.AssistantAgent(
    name="Solver",
    system_message="""You an advanced AI acting as the an mathematics procedural assistant;
    you will take a given math problem written in LaTex format and make a guess at the type of math problem.
                    """,
    llm_config=llm_config,
    description="""This agent will solve the actual math problems.
                """
)

groupchat = autogen.GroupChat(agents=[user_proxy, classifier, solver], messages=[], max_round=6)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)


manager.register_model_client(model_client_cls=BertModelClient)

user_proxy.initiate_chat(manager, message="What is 5 + 5")

The error:

Any help is much appreciated!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Trouble creating chat with custom agent. #3005

{{title}}

Replies: 0 comments

Select a reply

Trouble creating chat with custom agent. #3005

kolwea Jun 23, 2024

Replies: 0 comments

kolwea
Jun 23, 2024