+
diff --git a/docs/source/use_cases/classification.rst b/docs/source/use_cases/classification.rst
index 0ba09159..f004f120 100644
--- a/docs/source/use_cases/classification.rst
+++ b/docs/source/use_cases/classification.rst
@@ -11,6 +11,8 @@
+.. _classification_end_to_end:
+
Classification Optimization
=============================
diff --git a/docs/source/use_cases/question_answering.rst b/docs/source/use_cases/question_answering.rst
index c5c2fc1f..bfb69917 100644
--- a/docs/source/use_cases/question_answering.rst
+++ b/docs/source/use_cases/question_answering.rst
@@ -1,3 +1,5 @@
+.. _question_answering:
+
.. raw:: html
@@ -10,7 +12,6 @@
-
Question Answering
===============================
diff --git a/notebooks/integration/adalflow_together_deepseek_r1.ipynb b/notebooks/integration/adalflow_together_deepseek_r1.ipynb
index bd5f49a6..ff451c08 100644
--- a/notebooks/integration/adalflow_together_deepseek_r1.ipynb
+++ b/notebooks/integration/adalflow_together_deepseek_r1.ipynb
@@ -96,7 +96,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "# 😇 Have the fun\n",
+ "# 😇 Have fun\n",
"\n",
"Let's get started! 🚀\n",
"\n"
diff --git a/tutorials/adalflow_embedder.py b/tutorials/adalflow_embedder.py
index c73ac13e..89d491b6 100644
--- a/tutorials/adalflow_embedder.py
+++ b/tutorials/adalflow_embedder.py
@@ -1,13 +1,14 @@
from adalflow.core.embedder import Embedder, BatchEmbedder
from adalflow.components.model_client import OpenAIClient, TransformersClient
-from adalflow.core.types import Embedding, EmbedderOutput
+from adalflow.core.types import Embedding, EmbedderOutput, Document
from adalflow.core.functional import normalize_vector
from typing import List
-from adalflow.core.component import Component
+from adalflow.core.component import DataComponent
from copy import deepcopy
+from adalflow.components.data_process.data_components import ToEmbeddings
-class DecreaseEmbeddingDim(Component):
+class DecreaseEmbeddingDim(DataComponent):
def __init__(self, old_dim: int, new_dim: int, normalize: bool = True):
super().__init__()
self.old_dim = old_dim
@@ -38,7 +39,7 @@ def test_openai_embedder():
"encoding_format": "float",
}
- query = "What is the capital of China?"
+ query = "What is LLM?"
queries = [query] * 100
embedder = Embedder(model_client=OpenAIClient(), model_kwargs=model_kwargs)
@@ -54,6 +55,26 @@ def test_openai_embedder():
print(f"Batch queries - Length: {output.length}, Dimension: {output.embedding_dim}")
+def test_to_embeddings():
+ print("\nTesting ToEmbeddings:")
+ model_kwargs = {
+ "model": "text-embedding-3-small",
+ "dimensions": 256,
+ }
+ embedder = Embedder(model_client=OpenAIClient(), model_kwargs=model_kwargs)
+
+ to_embeddings = ToEmbeddings(embedder=embedder, batch_size=50)
+
+ query = "What is LLM?"
+ queries = [Document(text=query)] * 1000
+
+ print("Starting embedding processing...")
+ response = to_embeddings(queries)
+ print(f"Embedding processing complete - Total queries processed: {len(queries)}")
+
+ print(f"Response - Length: {len(response)}, vector: {response[0].vector}")
+
+
def test_local_embedder():
print("\nTesting Local Embedder (HuggingFace):")
model_kwargs = {"model": "thenlper/gte-base"}
@@ -61,7 +82,7 @@ def test_local_embedder():
model_client=TransformersClient(), model_kwargs=model_kwargs
)
- query = "What is the capital of China?"
+ query = "What is LLM?"
queries = [query] * 100
# Test single query
@@ -86,7 +107,7 @@ def test_custom_embedder():
output_processors=DecreaseEmbeddingDim(768, 256),
)
- query = "What is the capital of China?"
+ query = "What is LLM?"
output = local_embedder_256(query)
print(
f"Reduced dimension output - Length: {output.length}, Dimension: {output.embedding_dim}, Normalized: {output.is_normalized}"
@@ -101,22 +122,24 @@ def test_batch_embedder():
)
batch_embedder = BatchEmbedder(embedder=local_embedder, batch_size=100)
- query = "What is the capital of China?"
+ query = "What is LLM?"
queries = [query] * 1000
print("Starting batch processing...")
response = batch_embedder(queries)
print(f"Batch processing complete - Total queries processed: {len(queries)}")
- print(f"Response - Length: {response.length}, Dimension: {response.embedding_dim}")
+
+ print(f"Response - Length: {len(response)}, Dimension: {response[0].embedding_dim}")
def main():
# Run all tests
- test_openai_embedder()
- test_local_embedder()
- test_custom_embedder()
- test_batch_embedder()
+ # test_openai_embedder()
+ # test_local_embedder()
+ # test_custom_embedder()
+ # test_batch_embedder()
+ test_to_embeddings()
if __name__ == "__main__":
diff --git a/tutorials/generator_note.py b/tutorials/generator_note.py
index 1a1693b7..e8024b7c 100644
--- a/tutorials/generator_note.py
+++ b/tutorials/generator_note.py
@@ -213,6 +213,154 @@ def create_purely_from_config_2():
print(output)
+def simple_query():
+
+ from adalflow.core import Generator
+ from adalflow.components.model_client.openai_client import OpenAIClient
+
+ gen = Generator(
+ model_client=OpenAIClient(),
+ model_kwargs={
+ "model": "o3-mini",
+ },
+ )
+
+ response = gen({"input_str": "What is LLM?"})
+ print(response)
+
+
+def customize_template():
+
+ import adalflow as adal
+
+ # the template has three variables: system_prompt, few_shot_demos, and input_str
+ few_shot_template = r"""
+{{system_prompt}}
+{# Few shot demos #}
+{% if few_shot_demos is not none %}
+Here are some examples:
+{{few_shot_demos}}
+{% endif %}
+
+
+{{input_str}}
+"""
+
+ object_counter = Generator(
+ model_client=adal.GroqAPIClient(),
+ model_kwargs={
+ "model": "llama3-8b-8192",
+ },
+ template=few_shot_template,
+ prompt_kwargs={
+ "system_prompt": "You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.",
+ },
+ )
+
+ question = "I have a flute, a piano, a trombone, four stoves, a violin, an accordion, a clarinet, a drum, two lamps, and a trumpet. How many musical instruments do I have?"
+ response = object_counter(prompt_kwargs={"input_str": question})
+ print(response)
+
+ object_counter.print_prompt(input_str=question)
+
+ # use an int parser
+
+ from adalflow.core.string_parser import IntParser
+
+ object_counter = Generator(
+ model_client=adal.GroqAPIClient(),
+ model_kwargs={
+ "model": "llama3-8b-8192",
+ },
+ template=few_shot_template,
+ prompt_kwargs={
+ "system_prompt": "You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.",
+ },
+ output_processors=IntParser(),
+ )
+
+ response = object_counter(prompt_kwargs={"input_str": question})
+ print(response)
+ print(type(response.data))
+
+ # use customize parser
+ import re
+
+ @adal.func_to_data_component
+ def parse_integer_answer(answer: str):
+ try:
+ numbers = re.findall(r"\d+", answer)
+ if numbers:
+ answer = int(numbers[-1])
+ else:
+ answer = -1
+ except ValueError:
+ answer = -1
+
+ return answer
+
+ object_counter = Generator(
+ model_client=adal.GroqAPIClient(),
+ model_kwargs={
+ "model": "llama3-8b-8192",
+ },
+ template=few_shot_template,
+ prompt_kwargs={
+ "system_prompt": "You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.",
+ },
+ output_processors=parse_integer_answer,
+ )
+
+ response = object_counter(prompt_kwargs={"input_str": question})
+ print(response)
+ print(type(response.data))
+
+ template = r"""
+{{system_prompt}}
+
+{{output_format_str}}
+
+
+
+{{input_str}}
+"""
+
+ from dataclasses import dataclass, field
+
+ @dataclass
+ class QAOutput(DataClass):
+ thought: str = field(
+ metadata={
+ "desc": "Your thought process for the question to reach the answer."
+ }
+ )
+ answer: int = field(metadata={"desc": "The answer to the question."})
+
+ __output_fields__ = ["thought", "answer"]
+
+ parser = adal.DataClassParser(
+ data_class=QAOutput, return_data_class=True, format_type="json"
+ )
+
+ object_counter = Generator(
+ model_client=adal.GroqAPIClient(),
+ model_kwargs={
+ "model": "llama3-8b-8192",
+ },
+ template=template,
+ prompt_kwargs={
+ "system_prompt": "You will answer a reasoning question. Think step by step. ",
+ "output_format_str": parser.get_output_format_str(),
+ },
+ output_processors=parser,
+ )
+
+ response = object_counter(prompt_kwargs={"input_str": question})
+ print(response)
+
+ object_counter.print_prompt(input_str=question)
+
+
if __name__ == "__main__":
qa1 = SimpleQA()
answer = qa1("What is adalflow?")
@@ -228,6 +376,8 @@ def create_purely_from_config_2():
)
minimum_generator()
+ simple_query()
+ customize_template()
# use_a_json_parser()
# use_its_own_template()
# use_model_client_enum_to_switch_client()
diff --git a/use_cases/classification/data.py b/use_cases/classification/data.py
index 78fb8107..4031909a 100644
--- a/use_cases/classification/data.py
+++ b/use_cases/classification/data.py
@@ -14,12 +14,15 @@
@dataclass
class TRECExtendedData(TrecData):
+ """Dataclass for TREC dataset"""
+
rationale: str = field(
metadata={
"desc": "Your step-by-step reasoning to classify the question to class_name"
},
default=None,
)
+
__input_fields__ = ["question"]
__output_fields__ = ["rationale", "class_name"]
diff --git a/use_cases/classification/train.py b/use_cases/classification/train.py
index 22914935..34afd5be 100644
--- a/use_cases/classification/train.py
+++ b/use_cases/classification/train.py
@@ -39,7 +39,7 @@ def __init__(
)
def prepare_task(self, sample: TRECExtendedData):
- return self.task.call, {"question": sample.question, "id": sample.id}
+ return self.task.bicall, {"question": sample.question, "id": sample.id}
def prepare_eval(
self, sample: TRECExtendedData, y_pred: adal.GeneratorOutput
diff --git a/use_cases/classification/trec_task_structured_output.py b/use_cases/classification/trec_task_structured_output.py
index 56014cc6..797930ad 100644
--- a/use_cases/classification/trec_task_structured_output.py
+++ b/use_cases/classification/trec_task_structured_output.py
@@ -43,11 +43,8 @@ def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
template=task_desc_template, prompt_kwargs={"classes": label_desc}
)()
- self.data_class = TRECExtendedData
- self.data_class.set_task_desc(task_desc_str)
-
self.parser = adal.DataClassParser(
- data_class=self.data_class, return_data_class=True, format_type="yaml"
+ data_class=TRECExtendedData, return_data_class=True, format_type="yaml"
)
prompt_kwargs = {
@@ -55,7 +52,7 @@ def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
# it is better to split it into two prompts it is more effective at training
# 0.8056 val, 0.903 test
"system_prompt": adal.Parameter(
- data=self.parser.get_task_desc_str(),
+ data=task_desc_str,
# data="You are a classifier. Given a question, classify it into one of the following classes based on what the question is seeking:\n\nFormat: class_index. class_name, class_description\n\n0. ABBR, Abbreviation\n1. ENTY, Entity\n2. DESC, Description and abstract concept\n3. HUM, Human being\n4. LOC, Location\n5. NUM, Numeric value\n\nPay close attention to whether a question asks for specific terms, traditions, entities, or people, versus a general description or numerical detail. Do not try to answer the question:",
# data="You are a classifier. Given a question, classify it into one of the following classes based on what the question is seeking:\n\nFormat: class_index. class_name, class_description\n\n0. ABBR, Abbreviation\n1. ENTY, Entity\n2. DESC, Description and abstract concept\n3. HUM, Human being\n4. LOC, Location\n5. NUM, Numeric value\n\nPay special attention to questions about entities versus descriptions, as well as those asking for specific terms or people. Do not try to answer the question:",
# best # data="You are a classifier. For each question given, classify it into one of the following classes:\n\nFormat: class_index. class_name, class_description\n\n0. ABBR, Abbreviation (includes initials)\n1. ENTY, Entity (includes products, languages, objects, etc.)\n2. DESC, Description and abstract concept (includes explanations)\n3. HUM, Human being (includes individuals, groups, etc.)\n4. LOC, Location (includes addresses, places, etc.)\n5. NUM, Numeric value (includes distances, dates, ages, etc.)\n\n- Focus on identifying the primary subject of the question and classifying based on what is being explicitly asked for.",
@@ -87,27 +84,10 @@ def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
use_cache=True,
)
- # TODO: can automatically convert everything to parameter if it is not already
- # inside of the forward function instead of doing it here.
- # and this conversion will give input type automatically
- def _prepare_input(self, question: str):
- input_data = self.data_class(question=question)
- input_str = self.parser.get_input_str(input_data)
- prompt_kwargs = {
- "input_str": adal.Parameter(
- data=input_str,
- requires_opt=False,
- role_desc="input to the LLM",
- param_type=adal.ParameterType.INPUT,
- )
- }
- return prompt_kwargs
-
- def call(
+ def bicall(
self, question: str, id: Optional[str] = None
) -> Union[adal.GeneratorOutput, adal.Parameter]:
- prompt_kwargs = self._prepare_input(question)
- output = self.llm(prompt_kwargs=prompt_kwargs, id=id)
+ output = self.llm(prompt_kwargs={"input_str": question}, id=id)
if isinstance(output, adal.Parameter):
output.data_in_prompt = lambda x: x.data.raw_response
return output
diff --git a/use_cases/question_answering/bbh/data.py b/use_cases/question_answering/bbh/data.py
index d363248d..86d8221d 100644
--- a/use_cases/question_answering/bbh/data.py
+++ b/use_cases/question_answering/bbh/data.py
@@ -66,12 +66,9 @@ class QuestionAnswer(DataClass):
@func_to_data_component
def parse_integer_answer(answer: str):
- """A function that parses the last integer from a string using regular expressions."""
try:
- # Use regular expression to find all sequences of digits
numbers = re.findall(r"\d+", answer)
if numbers:
- # Get the last number found
answer = int(numbers[-1])
else:
answer = -1
diff --git a/use_cases/question_answering/bbh/object_count/task.py b/use_cases/question_answering/bbh/object_count/task.py
index 4892fe0f..adcc9120 100644
--- a/use_cases/question_answering/bbh/object_count/task.py
+++ b/use_cases/question_answering/bbh/object_count/task.py
@@ -30,7 +30,6 @@ def __init__(self, model_client: adal.ModelClient, model_kwargs: Dict):
super().__init__()
system_prompt = adal.Parameter(
- # data="You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.",
data="You will answer a reasoning question. Think step by step. The last line of your response should be of the following format: 'Answer: $VALUE' where VALUE is a numerical value.",
role_desc="To give task instruction to the language model in the system prompt",
requires_opt=True,