Skip to content

Commit

Permalink
feat: support LLM process document file (langgenius#10966)
Browse files Browse the repository at this point in the history
Co-authored-by: -LAN- <laipz8200@outlook.com>
  • Loading branch information
hjlarry and laipz8200 authored Nov 22, 2024
1 parent 556de44 commit 08ac368
Show file tree
Hide file tree
Showing 37 changed files with 233 additions and 88 deletions.
12 changes: 5 additions & 7 deletions api/core/memory/token_buffer_memory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from core.app.app_config.features.file_upload.manager import FileUploadConfigManager
from core.file import file_manager
from core.file.models import FileType
from core.model_manager import ModelInstance
from core.model_runtime.entities import (
AssistantPromptMessage,
Expand Down Expand Up @@ -103,12 +102,11 @@ def get_history_prompt_messages(
prompt_message_contents: list[PromptMessageContent] = []
prompt_message_contents.append(TextPromptMessageContent(data=message.query))
for file in file_objs:
if file.type in {FileType.IMAGE, FileType.AUDIO}:
prompt_message = file_manager.to_prompt_message_content(
file,
image_detail_config=detail,
)
prompt_message_contents.append(prompt_message)
prompt_message = file_manager.to_prompt_message_content(
file,
image_detail_config=detail,
)
prompt_message_contents.append(prompt_message)

prompt_messages.append(UserPromptMessage(content=prompt_message_contents))

Expand Down
2 changes: 1 addition & 1 deletion api/core/model_runtime/entities/message_entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class PromptMessageFunction(BaseModel):
function: PromptMessageTool


class PromptMessageContentType(Enum):
class PromptMessageContentType(str, Enum):
"""
Enum class for prompt message content type.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 1048576
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 2097152
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ features:
- vision
- tool-call
- stream-tool-call
- document
model_properties:
mode: chat
context_size: 32767
Expand Down
22 changes: 22 additions & 0 deletions api/core/model_runtime/model_providers/google/llm/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
from core.model_runtime.entities.message_entities import (
AssistantPromptMessage,
DocumentPromptMessageContent,
ImagePromptMessageContent,
PromptMessage,
PromptMessageContentType,
Expand All @@ -35,6 +36,21 @@
from core.model_runtime.errors.validate import CredentialsValidateFailedError
from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel

GOOGLE_AVAILABLE_MIMETYPE = [
"application/pdf",
"application/x-javascript",
"text/javascript",
"application/x-python",
"text/x-python",
"text/plain",
"text/html",
"text/css",
"text/md",
"text/csv",
"text/xml",
"text/rtf",
]


class GoogleLargeLanguageModel(LargeLanguageModel):
def _invoke(
Expand Down Expand Up @@ -370,6 +386,12 @@ def _format_message_to_glm_content(self, message: PromptMessage) -> ContentType:
raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
blob = {"inline_data": {"mime_type": mime_type, "data": base64_data}}
glm_content["parts"].append(blob)
elif c.type == PromptMessageContentType.DOCUMENT:
message_content = cast(DocumentPromptMessageContent, c)
if message_content.mime_type not in GOOGLE_AVAILABLE_MIMETYPE:
raise ValueError(f"Unsupported mime type {message_content.mime_type}")
blob = {"inline_data": {"mime_type": message_content.mime_type, "data": message_content.data}}
glm_content["parts"].append(blob)

return glm_content
elif isinstance(message, AssistantPromptMessage):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 32768
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ model_type: llm
features:
- vision
- agent-thought
- video
model_properties:
mode: chat
context_size: 8000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ model_properties:
mode: chat
features:
- vision
- video
parameter_rules:
- name: temperature
use_template: temperature
Expand Down
10 changes: 8 additions & 2 deletions api/core/workflow/nodes/llm/exc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,15 @@ class NoPromptFoundError(LLMNodeError):
"""Raised when no prompt is found in the LLM configuration."""


class NotSupportedPromptTypeError(LLMNodeError):
"""Raised when the prompt type is not supported."""
class TemplateTypeNotSupportError(LLMNodeError):
def __init__(self, *, type_name: str):
super().__init__(f"Prompt type {type_name} is not supported.")


class MemoryRolePrefixRequiredError(LLMNodeError):
"""Raised when memory role prefix is required for completion model."""


class FileTypeNotSupportError(LLMNodeError):
def __init__(self, *, type_name: str):
super().__init__(f"{type_name} type is not supported by this model")
9 changes: 4 additions & 5 deletions api/core/workflow/nodes/llm/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,15 @@
ModelConfig,
)
from .exc import (
FileTypeNotSupportError,
InvalidContextStructureError,
InvalidVariableTypeError,
LLMModeRequiredError,
LLMNodeError,
MemoryRolePrefixRequiredError,
ModelNotExistError,
NoPromptFoundError,
NotSupportedPromptTypeError,
TemplateTypeNotSupportError,
VariableNotFoundError,
)

Expand Down Expand Up @@ -621,9 +622,7 @@ def _fetch_prompt_messages(
prompt_content = prompt_messages[0].content.replace("#sys.query#", user_query)
prompt_messages[0].content = prompt_content
else:
errmsg = f"Prompt type {type(prompt_template)} is not supported"
logger.warning(errmsg)
raise NotSupportedPromptTypeError(errmsg)
raise TemplateTypeNotSupportError(type_name=str(type(prompt_template)))

if vision_enabled and user_files:
file_prompts = []
Expand Down Expand Up @@ -671,7 +670,7 @@ def _fetch_prompt_messages(
and ModelFeature.AUDIO not in model_config.model_schema.features
)
):
continue
raise FileTypeNotSupportError(type_name=content_item.type)
prompt_message_content.append(content_item)
if len(prompt_message_content) == 1 and prompt_message_content[0].type == PromptMessageContentType.TEXT:
prompt_message.content = prompt_message_content[0].data
Expand Down
53 changes: 0 additions & 53 deletions api/tests/unit_tests/core/workflow/nodes/llm/test_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,59 +400,6 @@ def test_fetch_prompt_messages__basic(faker, llm_node, model_config):
)
},
),
LLMNodeTestScenario(
description="Prompt template with variable selector of File without vision feature",
user_query=fake_query,
user_files=[],
vision_enabled=True,
vision_detail=fake_vision_detail,
features=[],
window_size=fake_window_size,
prompt_template=[
LLMNodeChatModelMessage(
text="{{#input.image#}}",
role=PromptMessageRole.USER,
edition_type="basic",
),
],
expected_messages=mock_history[fake_window_size * -2 :] + [UserPromptMessage(content=fake_query)],
file_variables={
"input.image": File(
tenant_id="test",
type=FileType.IMAGE,
filename="test1.jpg",
transfer_method=FileTransferMethod.REMOTE_URL,
remote_url=fake_remote_url,
)
},
),
LLMNodeTestScenario(
description="Prompt template with variable selector of File with video file and vision feature",
user_query=fake_query,
user_files=[],
vision_enabled=True,
vision_detail=fake_vision_detail,
features=[ModelFeature.VISION],
window_size=fake_window_size,
prompt_template=[
LLMNodeChatModelMessage(
text="{{#input.image#}}",
role=PromptMessageRole.USER,
edition_type="basic",
),
],
expected_messages=mock_history[fake_window_size * -2 :] + [UserPromptMessage(content=fake_query)],
file_variables={
"input.image": File(
tenant_id="test",
type=FileType.VIDEO,
filename="test1.mp4",
transfer_method=FileTransferMethod.REMOTE_URL,
remote_url=fake_remote_url,
extension="mp4",
)
},
),
]

for scenario in test_scenarios:
Expand Down
Loading

0 comments on commit 08ac368

Please sign in to comment.