From 5f451965e29b561cf578f5e07654c1d9df08687f Mon Sep 17 00:00:00 2001 From: Remichu Date: Fri, 27 Sep 2024 19:31:57 +0800 Subject: [PATCH 1/3] Segregate format enforcement to another py file --- src/gallama/backend/chatgenerator.py | 92 ++++---------------------- src/gallama/backend/format_enforcer.py | 80 ++++++++++++++++++++++ 2 files changed, 94 insertions(+), 78 deletions(-) create mode 100644 src/gallama/backend/format_enforcer.py diff --git a/src/gallama/backend/chatgenerator.py b/src/gallama/backend/chatgenerator.py index a1e10b9..c03ad3b 100755 --- a/src/gallama/backend/chatgenerator.py +++ b/src/gallama/backend/chatgenerator.py @@ -19,6 +19,17 @@ from lmformatenforcer.tokenenforcer import TokenEnforcerTokenizerData from concurrent.futures import ThreadPoolExecutor from importlib.metadata import version +from .format_enforcer import FormatEnforcer +from formatron.schemas.pydantic import ClassSchema + +try: + from formatron.formatter import FormatterBuilder + from formatron.integrations.exllamav2 import create_formatter_filter + +except: + FormatterBuilder = None + create_formatter_filter = None + try: from exllamav2 import ( @@ -64,14 +75,6 @@ assert ExLlamaV2Cache or LogitsProcessorList, "Please install ExllamaV2 or LLama CPP Python as backend" -# experimental support for formatron -try: - from formatron.formatter import FormatterBuilder - from formatron.integrations.exllamav2 import create_formatter_filter - -except: - FormatterBuilder = None - create_formatter_filter = None TOOL_THINKING = THINKING_TEMPLATE["tool_necessity_evaluation"] TOOL_FORCE_THINKING = THINKING_TEMPLATE["tool_forced_evaluation"] @@ -94,71 +97,6 @@ def get_queue(self) -> GenQueue | None: return self.gen_queue() -class FormatEnforcer: - """ this class will help to create filter for generation enforcement""" - - def __init__(self): - pass - - @staticmethod - def get_default_engine(backend:str = "exllama") -> Literal["formatron", "lm_enforcer"]: - """ this function will select the format enforcer engine to use if not selected by user""" - - # formatron doesnt support llama cpp at the moment - if backend == "llama_cpp": - return "lm_enforcer" - elif backend == "exllama": - # use formatron if it is available if it is exllama - if FormatterBuilder: - return "formatron" - else: - # return "formatron" - return "lm_enforcer" - else: - raise "Invalid backend" - - # return "lm_enforcer" - - - def regex(self, regex_pattern: str, filter_engine: Literal[ - "formatron", "lm_enforcer"] = None, backend: str = "exllama") -> FormatterBuilder | TokenEnforcerTokenizerData: - logger.info(backend) - # set the filter engine to use - if not filter_engine: - filter_engine = FormatEnforcer.get_default_engine(backend=backend) # if engine is specified, use it - - # create filter if engine is lm_enforcer - if filter_engine == "lm_enforcer": - return RegexParser(regex_pattern) - - # create filter if engine is formatron - if filter_engine == "formatron": - f = FormatterBuilder() - _regex = f.regex(regex_pattern, capture_name='regex') - f.append_line(f"{_regex}") - return f - - def json(self, pydantic_model, filter_engine: Literal[ - "formatron", "lm_enforcer"] = None, backend: str = "exllama") -> FormatterBuilder | TokenEnforcerTokenizerData: - """ this function will return the filters for format enforcer to generate json output based on Pyantic model""" - - # set the filter engine to use - if not filter_engine: - filter_engine = FormatEnforcer.get_default_engine(backend=backend) # if engine is specified, use it - - # create filter if engine is lm_enforcer - if filter_engine == "lm_enforcer" or filter_engine == "formatron": # TODO currently formatron and nested pydantic model is having issue - # if filter_engine == "lm_enforcer": # TODO currently formatron and nested pydantic model is having issue - json_schema = Tools.replace_refs_with_definitions_v2(pydantic_model.model_json_schema()) - return JsonSchemaParser(json_schema) - - # # create filter if engine is formatron - # if filter_engine == "formatron": - # f = FormatterBuilder() - # f.append_line(f"{f.json(pydantic_model, capture_name='json')}") - # return f - - class ChatGenerator(Model): def __init__( self, @@ -472,12 +410,10 @@ async def chat_with_tool(self, query: ChatMLQuery, prompt_eng, gen_queue, reques # create the pydantic schema to enforce generation tool_combined_pydantic = create_function_models_v2(tool_handler.tool_dict) - class ToolCalling(BaseModel): + class ToolCalling(ClassSchema): """ The format to call one or multiple tools """ - functions_calling: List[Union[tuple(tool_combined_pydantic)]] = Field( - description='the list of functions to call in chronological order', - default=[] - ) + functions_calling: List[Union[tuple(tool_combined_pydantic)]] = [] + # class ItemModel(BaseModel): # Use: Literal['Yes', 'No'] diff --git a/src/gallama/backend/format_enforcer.py b/src/gallama/backend/format_enforcer.py new file mode 100644 index 0000000..f78abc8 --- /dev/null +++ b/src/gallama/backend/format_enforcer.py @@ -0,0 +1,80 @@ +from typing import List, Union, Literal, Optional +from formatron.schemas.pydantic import ClassSchema +from lmformatenforcer import JsonSchemaParser, RegexParser +from lmformatenforcer.tokenenforcer import TokenEnforcerTokenizerData +from gallama.logger.logger import logger +from .tools import Tools + + +# experimental support for formatron +try: + from formatron.formatter import FormatterBuilder + from formatron.integrations.exllamav2 import create_formatter_filter + +except: + FormatterBuilder = None + create_formatter_filter = None + +class FormatEnforcer: + """ this class will help to create filter for generation enforcement""" + + def __init__(self): + pass + + @staticmethod + def get_default_engine(backend:str = "exllama") -> Literal["formatron", "lm_enforcer"]: + """ this function will select the format enforcer engine to use if not selected by user""" + + # formatron doesnt support llama cpp at the moment + if backend == "llama_cpp": + return "lm_enforcer" + elif backend == "exllama": + # use formatron if it is available if it is exllama + if FormatterBuilder: + return "formatron" + else: + # return "formatron" + return "lm_enforcer" + else: + raise "Invalid backend" + + # return "lm_enforcer" + + + def regex(self, regex_pattern: str, filter_engine: Literal[ + "formatron", "lm_enforcer"] = None, backend: str = "exllama") -> FormatterBuilder | TokenEnforcerTokenizerData: + logger.info(backend) + # set the filter engine to use + if not filter_engine: + filter_engine = FormatEnforcer.get_default_engine(backend=backend) # if engine is specified, use it + + # create filter if engine is lm_enforcer + if filter_engine == "lm_enforcer": + return RegexParser(regex_pattern) + + # create filter if engine is formatron + if filter_engine == "formatron": + f = FormatterBuilder() + _regex = f.regex(regex_pattern, capture_name='regex') + f.append_line(f"{_regex}") + return f + + def json(self, pydantic_model, filter_engine: Literal[ + "formatron", "lm_enforcer"] = None, backend: str = "exllama") -> FormatterBuilder | TokenEnforcerTokenizerData: + """ this function will return the filters for format enforcer to generate json output based on Pyantic model""" + + # set the filter engine to use + if not filter_engine: + filter_engine = FormatEnforcer.get_default_engine(backend=backend) # if engine is specified, use it + + # create filter if engine is lm_enforcer + # if filter_engine == "lm_enforcer" or filter_engine == "formatron": # TODO currently formatron and nested pydantic model is having issue + if filter_engine == "lm_enforcer": # TODO currently formatron and nested pydantic model is having issue + json_schema = Tools.replace_refs_with_definitions_v2(pydantic_model.model_json_schema()) + return JsonSchemaParser(json_schema) + + # create filter if engine is formatron + if filter_engine == "formatron": + f = FormatterBuilder() + f.append_line(f"{f.json(pydantic_model, capture_name='json')}") + return f \ No newline at end of file From 26ca0cf6c200cd16e6182abb7b0cf8dfba18134d Mon Sep 17 00:00:00 2001 From: Remichu Date: Fri, 27 Sep 2024 21:36:01 +0800 Subject: [PATCH 2/3] Segregate format enforcement to another py file --- src/gallama/backend/chatgenerator.py | 9 ++++-- src/gallama/backend/tools.py | 41 ++++++++++++++++++++++++++-- 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/gallama/backend/chatgenerator.py b/src/gallama/backend/chatgenerator.py index c03ad3b..69fc8e3 100755 --- a/src/gallama/backend/chatgenerator.py +++ b/src/gallama/backend/chatgenerator.py @@ -408,12 +408,15 @@ async def chat_with_tool(self, query: ChatMLQuery, prompt_eng, gen_queue, reques # USE TOOL if use_tool_bool: # create the pydantic schema to enforce generation - tool_combined_pydantic = create_function_models_v2(tool_handler.tool_dict) + # tool_combined_pydantic = create_function_models_v2(tool_handler.tool_dict) + # + # class ToolCalling(ClassSchema): + # """ The format to call one or multiple tools """ + # functions_calling: List[Union[tuple(tool_combined_pydantic)]] = [] class ToolCalling(ClassSchema): """ The format to call one or multiple tools """ - functions_calling: List[Union[tuple(tool_combined_pydantic)]] = [] - + functions_calling: List[Union[tuple(tool_handler.tools_list_formatron)]] = [] # class ItemModel(BaseModel): # Use: Literal['Yes', 'No'] diff --git a/src/gallama/backend/tools.py b/src/gallama/backend/tools.py index e036776..5d3fbc4 100755 --- a/src/gallama/backend/tools.py +++ b/src/gallama/backend/tools.py @@ -1,12 +1,14 @@ from pydantic import BaseModel, Field, ValidationError, create_model from typing import Literal, Type, Union, Optional, List, Any, Dict +from formatron.schemas.pydantic import ClassSchema, Schema class Tools: def __init__(self, prompt_eng, tools, tool_choice): self.prompt_eng = prompt_eng self.tools = tools - self.tools_list = self.create_pydantic_model_from_tools(self.tools) + self.tools_list = self.create_pydantic_model_from_tools(self.tools, mode="pydantic_v2") + self.tools_list_formatron = self.create_pydantic_model_from_tools(self.tools, mode="formatron") self.tool_dict = {tool.schema()['title']: tool for tool in self.tools_list} self.answer_format = None self.json_parser = None @@ -125,7 +127,33 @@ def create_pydantic_model_v2(function_info: dict) -> Type[BaseModel]: return model @staticmethod - def create_pydantic_model_from_tools(tools: list): + def create_class_schema(function_info: dict) -> Type[ClassSchema]: + parameters = function_info['parameters'] + properties = parameters.get('properties', {}) + required_properties = set(parameters.get('required', [])) + + attributes = {} + for prop_name, prop_info in properties.items(): + field_type = Tools.type_from_json_schema(prop_info) + field_description = prop_info.get('description', None) + + if prop_name in required_properties: + attributes[prop_name] = (field_type, Field(description=field_description)) + else: + attributes[prop_name] = (Optional[field_type], Field(default=None, description=field_description)) + + namespace = {'__annotations__': {}} + for attr_name, (attr_type, field_info) in attributes.items(): + namespace['__annotations__'][attr_name] = attr_type + namespace[attr_name] = field_info + + model = type(function_info['name'], (ClassSchema,), namespace) + model.__doc__ = function_info.get('description', '') + + return model + + @staticmethod + def create_pydantic_model_from_tools(tools: list, mode: Literal["pydantic_v2", "formatron", "pydantic_v1"]="pydantic_v2"): """ this function create a list of pydantic model based on tool_list in the API call @@ -133,7 +161,14 @@ def create_pydantic_model_from_tools(tools: list): models = [] for tool in tools: function_info = tool.dict()['function'] - model = Tools.create_pydantic_model_v2(function_info) + if mode == "pydantic_v2": + model = Tools.create_pydantic_model_v2(function_info) + elif mode == "formatron": + model = Tools.create_class_schema(function_info) + elif mode == "pydantic_v1": + model = Tools.create_pydantic_model_v1(function_info) + else: + raise ValueError(f"Unsupported mode: {mode}") models.append(model) return models From a8cdc09942e061405e8d23e0c58bb05074ba4889 Mon Sep 17 00:00:00 2001 From: Remichu Date: Sat, 28 Sep 2024 15:52:24 +0800 Subject: [PATCH 3/3] Full formatron support for both regex and json generation Formatron is used as preferred format enforcement backend unless user specified other wise --- pyproject.toml | 2 +- requirements.txt | 2 +- src/gallama/backend/chatgenerator.py | 46 +++++++++---------- src/gallama/backend/format_enforcer.py | 61 +++++++++++++++++++------- src/gallama/backend/model.py | 1 + src/gallama/backend/tools.py | 23 ++++++++++ src/gallama/data_classes/data_class.py | 10 +++-- 7 files changed, 100 insertions(+), 45 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 421514d..81822e1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "gallama" -version = "0.0.5" +version = "0.0.6" description = "An oppinionated Llama Server engine with focus on agentic task" authors = [{name = "David", email = "trantrungduc91@example.com"}] license = {text = "MIT"} diff --git a/requirements.txt b/requirements.txt index 5dd65c4..924cf20 100755 --- a/requirements.txt +++ b/requirements.txt @@ -23,4 +23,4 @@ pyzmq pygments httpx psutil -formatron \ No newline at end of file +formatron>=0.4.4 \ No newline at end of file diff --git a/src/gallama/backend/chatgenerator.py b/src/gallama/backend/chatgenerator.py index 69fc8e3..869e008 100755 --- a/src/gallama/backend/chatgenerator.py +++ b/src/gallama/backend/chatgenerator.py @@ -9,7 +9,7 @@ from fastapi import HTTPException, Request from .model import Model from gallama.data_classes.data_class import GenerationStats, GenEnd, GenText, GenQueue, ChatMLQuery, GenStart -from .tools import Tools, create_function_models_v2 +from .tools import Tools, create_function_models_v2, create_function_models_formatron from dataclasses import dataclass from gallama.utils.utils import get_token_length from gallama.logger.logger import logger @@ -176,9 +176,9 @@ async def chat_no_tool(self, query: ChatMLQuery, prompt_eng, gen_queue, request: ) formatter_prefix_regex = self.formatter.regex( - query.regex_prefix_pattern, backend=self.backend) if query.regex_prefix_pattern else None + query.regex_prefix_pattern, backend=self.backend, preference=query.guided_decoding_backend) if query.regex_prefix_pattern else None - formatter_regex = self.formatter.regex(query.regex_pattern, backend=self.backend) if query.regex_pattern else None + formatter_regex = self.formatter.regex(query.regex_pattern, backend=self.backend, preference=query.guided_decoding_backend) if query.regex_pattern else None token_length_prompt = get_token_length(self.tokenizer, prompt) self.validate_token_length(token_length_prompt) @@ -383,7 +383,7 @@ async def chat_with_tool(self, query: ChatMLQuery, prompt_eng, gen_queue, reques # perform generation with tool thinking to evaluate if it is necessity tool_thinking_queue_fallback = GenQueue() - formatter_regex = self.formatter.regex('(needed|not needed)', backend=self.backend) + formatter_regex = self.formatter.regex('(needed|not needed)', backend=self.backend, preference=query.guided_decoding_backend) await self.generate( prompt, @@ -408,26 +408,24 @@ async def chat_with_tool(self, query: ChatMLQuery, prompt_eng, gen_queue, reques # USE TOOL if use_tool_bool: # create the pydantic schema to enforce generation - # tool_combined_pydantic = create_function_models_v2(tool_handler.tool_dict) - # - # class ToolCalling(ClassSchema): - # """ The format to call one or multiple tools """ - # functions_calling: List[Union[tuple(tool_combined_pydantic)]] = [] + tool_combined_pydantic_lmfe = create_function_models_v2(tool_handler.tool_dict) - class ToolCalling(ClassSchema): + class ToolCalling_LMFE(ClassSchema): """ The format to call one or multiple tools """ - functions_calling: List[Union[tuple(tool_handler.tools_list_formatron)]] = [] + functions_calling: List[Union[tuple(tool_combined_pydantic_lmfe)]] = [] - # class ItemModel(BaseModel): - # Use: Literal['Yes', 'No'] - # reason: str - - # answer_format_schema = tool_handler.replace_refs_with_definitions_v2(ToolCalling.schema()) - # - # # get format enforcer - # formatter = JsonSchemaParser(answer_format_schema) + # create the pydantic schema to enforce generation for formatron which use ClassSchema + tool_combined_pydantic_formatron = create_function_models_formatron(tool_handler.tool_dict_formatron) + class ToolCalling_formatron(ClassSchema): + """ The format to call one or multiple tools """ + functions_calling: List[Union[tuple(tool_combined_pydantic_formatron)]] = [] - formatter_json = self.formatter.json(pydantic_model=ToolCalling, backend=self.backend) + formatter_json = self.formatter.json( + pydantic_model_lmfe=ToolCalling_LMFE, + pydantic_model_formatron=ToolCalling_formatron, + backend=self.backend, + preference = query.guided_decoding_backend + ) # Experiment feature, formulate function calling as python programming. Which is more natural than a random Json output as part of conversation tool_as_code_prompt = """ @@ -721,11 +719,13 @@ async def generate( # Depending on settings, the result dict can contain top-K probabilities, logits and more, but we'll just # grab the output text stream. - # generate_text += result.get("text", "") # logger.info(f'{datetime.now()} {result.get("text", "")}') - chunk = GenText(content=result.get("text", ""), text_type=gen_type_str) + chunk_text = result.get("text", "") + chunk = GenText(content=chunk_text, text_type=gen_type_str) for g_queue in gen_queue_list: - g_queue.get_queue().put_nowait(chunk) + if chunk_text not in self.eos_token_str_set: # formatron return eos token + # generate_text += result.get("text", "") + g_queue.get_queue().put_nowait(chunk) # logger.info(result.get("text", "")) # logger.info(self.tokenizer.encode(result.get("text", ""))) diff --git a/src/gallama/backend/format_enforcer.py b/src/gallama/backend/format_enforcer.py index f78abc8..f6d8740 100644 --- a/src/gallama/backend/format_enforcer.py +++ b/src/gallama/backend/format_enforcer.py @@ -1,8 +1,10 @@ +from logging import raiseExceptions from typing import List, Union, Literal, Optional from formatron.schemas.pydantic import ClassSchema from lmformatenforcer import JsonSchemaParser, RegexParser from lmformatenforcer.tokenenforcer import TokenEnforcerTokenizerData from gallama.logger.logger import logger +from pydantic import BaseModel from .tools import Tools @@ -22,31 +24,50 @@ def __init__(self): pass @staticmethod - def get_default_engine(backend:str = "exllama") -> Literal["formatron", "lm_enforcer"]: + def get_default_engine( + backend:str = "exllama", + preference: Literal["auto", "formatron", "lm-format-enforcer"] = "auto", + ) -> Literal["formatron", "lm_enforcer"]: """ this function will select the format enforcer engine to use if not selected by user""" + if preference != "auto": + logger.info(f"guided encoding preference: {preference}") + # formatron doesnt support llama cpp at the moment if backend == "llama_cpp": return "lm_enforcer" elif backend == "exllama": # use formatron if it is available if it is exllama - if FormatterBuilder: - return "formatron" + if preference == "auto": + if FormatterBuilder: + return "formatron" + else: + return "lm_enforcer" else: - # return "formatron" - return "lm_enforcer" + if preference == "formatron" and FormatterBuilder: + return "formatron" + elif preference == "lm-format-enforcer": + return "lm_enforcer" + else: + raise "Invalid backend" else: raise "Invalid backend" - # return "lm_enforcer" - def regex(self, regex_pattern: str, filter_engine: Literal[ - "formatron", "lm_enforcer"] = None, backend: str = "exllama") -> FormatterBuilder | TokenEnforcerTokenizerData: + def regex( + self, + regex_pattern: str, + filter_engine: Literal[ + "formatron", "lm_enforcer"] = None, + backend: str = "exllama", + preference: Literal["auto", "formatron", "lm-format-enforcer"] = "auto", + ) -> FormatterBuilder | TokenEnforcerTokenizerData: + logger.info(backend) # set the filter engine to use if not filter_engine: - filter_engine = FormatEnforcer.get_default_engine(backend=backend) # if engine is specified, use it + filter_engine = FormatEnforcer.get_default_engine(backend=backend, preference=preference) # if engine is specified, use it # create filter if engine is lm_enforcer if filter_engine == "lm_enforcer": @@ -59,22 +80,30 @@ def regex(self, regex_pattern: str, filter_engine: Literal[ f.append_line(f"{_regex}") return f - def json(self, pydantic_model, filter_engine: Literal[ - "formatron", "lm_enforcer"] = None, backend: str = "exllama") -> FormatterBuilder | TokenEnforcerTokenizerData: + def json( + self, + pydantic_model_lmfe: BaseModel, + pydantic_model_formatron: ClassSchema, + filter_engine: Literal["formatron", "lm_enforcer"] = None, + backend: Literal["llama_cpp", "exllama"] = "exllama", + preference: Literal["auto", "formatron", "lm-format-enforcer"] = "auto", + ) -> FormatterBuilder | TokenEnforcerTokenizerData: """ this function will return the filters for format enforcer to generate json output based on Pyantic model""" # set the filter engine to use if not filter_engine: - filter_engine = FormatEnforcer.get_default_engine(backend=backend) # if engine is specified, use it + filter_engine = FormatEnforcer.get_default_engine(backend=backend, preference=preference) # if engine is specified, use it + + assert filter_engine == "lm_enforcer" or filter_engine == "formatron" # create filter if engine is lm_enforcer # if filter_engine == "lm_enforcer" or filter_engine == "formatron": # TODO currently formatron and nested pydantic model is having issue if filter_engine == "lm_enforcer": # TODO currently formatron and nested pydantic model is having issue - json_schema = Tools.replace_refs_with_definitions_v2(pydantic_model.model_json_schema()) + json_schema = Tools.replace_refs_with_definitions_v2(pydantic_model_lmfe.model_json_schema()) return JsonSchemaParser(json_schema) # create filter if engine is formatron - if filter_engine == "formatron": + elif filter_engine == "formatron": f = FormatterBuilder() - f.append_line(f"{f.json(pydantic_model, capture_name='json')}") - return f \ No newline at end of file + f.append_line(f"{f.json(pydantic_model_formatron, capture_name='json')}") + return f diff --git a/src/gallama/backend/model.py b/src/gallama/backend/model.py index a076f5e..120a9a3 100755 --- a/src/gallama/backend/model.py +++ b/src/gallama/backend/model.py @@ -82,6 +82,7 @@ def __init__(self, # TODO, to auto detect # get the eos_token_str by merging the default config with anything set by user self.eos_token_str = list(set(model_config.get("eos_token_list", []) + eos_token_list_from_prompt_template)) + self.eos_token_str_set = set(self.eos_token_str) # set for some more efficient operation self.eos_token_ids = self.generate_eos_tokens_id() diff --git a/src/gallama/backend/tools.py b/src/gallama/backend/tools.py index 5d3fbc4..bc7f572 100755 --- a/src/gallama/backend/tools.py +++ b/src/gallama/backend/tools.py @@ -10,6 +10,7 @@ def __init__(self, prompt_eng, tools, tool_choice): self.tools_list = self.create_pydantic_model_from_tools(self.tools, mode="pydantic_v2") self.tools_list_formatron = self.create_pydantic_model_from_tools(self.tools, mode="formatron") self.tool_dict = {tool.schema()['title']: tool for tool in self.tools_list} + self.tool_dict_formatron = {tool.schema()['title']: tool for tool in self.tools_list_formatron} self.answer_format = None self.json_parser = None self.tool_choice = tool_choice @@ -269,5 +270,27 @@ class Config: arguments=(arg_model, Field(...)), __config__=Config ) + function_model_list.append(NewModel) + return function_model_list + + +def create_function_models_formatron(functions: Dict[str, Type[ClassSchema]]) -> List[Type[ClassSchema]]: + """Create a list of ClassSchema models for the function schemas passed in via OpenAI request call.""" + function_model_list: List[Type[ClassSchema]] = [] + for func_name, arg_model in functions.items(): + class Config: + arbitrary_types_allowed = True + + # Create a new ClassSchema subclass + class NewModel(ClassSchema): + name: Literal[func_name] = Field(...) + arguments: arg_model = Field(...) + + class Config: + arbitrary_types_allowed = True + + # Set the name of the class to match the function name + NewModel.__name__ = func_name.title() + function_model_list.append(NewModel) return function_model_list \ No newline at end of file diff --git a/src/gallama/data_classes/data_class.py b/src/gallama/data_classes/data_class.py index 6e6a549..972e993 100755 --- a/src/gallama/data_classes/data_class.py +++ b/src/gallama/data_classes/data_class.py @@ -121,15 +121,17 @@ class StreamOption(BaseModel): prefix_strings: Optional[Union[str, List[str]]] = Field(default=None, description="String or list of strings to start the generation with. Can not be used together with regex_prefix_pattern") regex_pattern: Optional[constr(min_length=1)] = None # regex to enforce regex_prefix_pattern: Optional[constr(min_length=1)] = Field(default=None, description="regex to enforce in the beginning of the generation, can not be used together with prefix_string") - stop_words: Optional[List[str]] = None + stop_words: Optional[List[str]] = Field(default=None, alias="stop") # OpenAI use stop thinking_template: Optional[str] = None - # thinking_template: Optional[str] = test_thinking artifact: Optional[Literal["No", "Fast", "Slow"]] = Field(default="No", description="Normal will parse the streamed output for artifact, whereas Strict is slower and will use format enforcer to enforce") - #thinking_template: Optional[str] = DEFAULT_THINKING # the xml template for thinking return_thinking: Optional[Literal[False, True, "separate"]] = Field( default=False, description="Return the generated thinking to front end. False - not return, True - return, 'separate' - return separately as .thinking field. If used together with artifact, True will return as separate." ) + guided_decoding_backend: Optional[Literal["auto", "formatron", "lm-format-enforcer"]] = Field( + default="auto", + description="guided decoding backend. auto will choose the most suitable. If the selected backend is not working for specific llm backend (e.g. formatrong not working with llama cpp), selection will be auto" + ) # not yet supported options from here # TODO frequency_penalty: Optional[float] = None @@ -140,7 +142,7 @@ class StreamOption(BaseModel): response_format: Optional[ResponseFormat] = None seed: Optional[int] = None stream_options: Optional[StreamOption] = None - + parallel_tool_calls: bool = True # default let the model handle and can not toggle @validator('regex_pattern', 'regex_prefix_pattern') def validate_regex(cls, v):