Skip to content

Commit

Permalink
complete first version parser
Browse files Browse the repository at this point in the history
  • Loading branch information
liyin2015 committed Jul 3, 2024
1 parent af9bbfe commit f949989
Show file tree
Hide file tree
Showing 3 changed files with 223 additions and 11 deletions.
46 changes: 46 additions & 0 deletions developer_notes/parser_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,50 @@ def yaml_parser():
print(parser(yaml_list_str))


def json_output_parser():
from dataclasses import dataclass, field
from lightrag.components.output_parsers import JsonOutputParser
from lightrag.core import DataClass

@dataclass
class User(DataClass):
id: int = field(default=1, metadata={"description": "User ID"})
name: str = field(default="John", metadata={"description": "User name"})

user_example = User(id=1, name="John")

user_to_parse = '{"id": 2, "name": "Jane"}'

parser = JsonOutputParser(data_class=User, examples=[user_example])
print(parser)
output_format_str = parser.format_instructions()
print(output_format_str)
parsed_user = parser(user_to_parse)
print(parsed_user)


def yaml_output_parser():
from dataclasses import dataclass, field
from lightrag.components.output_parsers import YamlOutputParser
from lightrag.core import DataClass

@dataclass
class User(DataClass):
id: int = field(default=1, metadata={"description": "User ID"})
name: str = field(default="John", metadata={"description": "User name"})

user_example = User(id=1, name="John")

user_to_parse = "id: 2\nname: Jane"

parser = YamlOutputParser(data_class=User, examples=[user_example])
print(parser)
output_format_str = parser.format_instructions()
print(output_format_str)
parsed_user = parser(user_to_parse)
print(parsed_user)


if __name__ == "__main__":
examples_of_different_ways_to_parse_string()
int_parser()
Expand All @@ -235,3 +279,5 @@ def yaml_parser():
list_parser()
json_parser()
yaml_parser()
json_output_parser()
yaml_output_parser()
160 changes: 159 additions & 1 deletion docs/source/developer_notes/output_parsers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ Thus, ``JsonOutputParser`` and ``YamlOutputParser`` both takes the following arg
- ``examples``: the examples of the data class instance if you want to show the examples in the prompt.
- ``exclude``: the fields to exclude from both the data format and the examples.

.. TODO: a summary table
.. TODO: a summary table and a diagram
Parser in Action
------------------
Expand Down Expand Up @@ -327,6 +327,164 @@ The output will be:
Output Parsers in Action
--------------------------


We will create the following simple ``DataClass`` with one example.
And we will demonstrate how to use ``JsonOutputParser`` and ``YamlOutputParser`` to parse another example to dict object.

.. code-block:: python
from dataclasses import dataclass, field
from lightrag.core import DataClass
@dataclass
class User(DataClass):
id: int = field(default=1, metadata={"description": "User ID"})
name: str = field(default="John", metadata={"description": "User name"})
user_example = User(id=1, name="John")
**JsonOutputParser**

Here is how to use ``JsonOutputParser``:

.. code-block:: python
from lightrag.components.output_parsers import JsonOutputParser
parser = JsonOutputParser(data_class=User, examples=[user_example])
print(parser)
The structure of it:

.. code-block::
JsonOutputParser(
data_class=User, examples=[json_output_parser.<locals>.User(id=1, name='John')], exclude_fields=None
(json_output_format_prompt): Prompt(
template: Your output should be formatted as a standard JSON instance with the following schema:
```
{{schema}}
```
{% if example %}
Examples:
```
{{example}}
```
{% endif %}
-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!
-Use double quotes for the keys and string values.
-Follow the JSON formatting conventions., prompt_variables: ['example', 'schema']
)
(output_processors): JsonParser()
)
The output format string will be:

.. code-block::
Your output should be formatted as a standard JSON instance with the following schema:
```
{
"id": " (int) (optional)",
"name": " (str) (optional)"
}
```
Examples:
```
{
"id": 1,
"name": "John"
}
________
```
-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!
-Use double quotes for the keys and string values.
-Follow the JSON formatting conventions.
Call the parser with the following string:

.. code-block:: python
user_to_parse = '{"id": 2, "name": "Jane"}'
parsed_user = parser(user_to_parse)
print(parsed_user)
The output will be:

.. code-block:: python
{'id': 2, 'name': 'Jane'}
**YamlOutputParser**

The steps are totally the same as the ``JsonOutputParser``.

.. code-block:: python
from lightrag.components.output_parsers import YamlOutputParser
parser = YamlOutputParser(data_class=User, examples=[user_example])
print(parser)
The structure of it:

.. code-block::
YamlOutputParser(
data_class=<class '__main__.yaml_output_parser.<locals>.User'>, examples=[yaml_output_parser.<locals>.User(id=1, name='John')]
(yaml_output_format_prompt): Prompt(
template: Your output should be formatted as a standard YAML instance with the following schema:
```
{{schema}}
```
{% if example %}
Examples:
```
{{example}}
```
{% endif %}
-Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!
-Follow the YAML formatting conventions with an indent of 2 spaces.
-Quote the string values properly., prompt_variables: ['schema', 'example']
)
(output_processors): YamlParser()
)
The output format string will be:

.. code-block::
Your output should be formatted as a standard YAML instance with the following schema:
```
id: (int) (optional)
name: (str) (optional)
```
Examples:
```
id: 1
name: John
________
```
-Make sure to always enclose the YAML output in triple backticks (```). Please do not add anything other than valid YAML output!
-Follow the YAML formatting conventions with an indent of 2 spaces.
-Quote the string values properly.
Now, let us parse the following string:

.. code-block:: python
user_to_parse = "id: 2\nname: Jane"
parsed_user = parser(user_to_parse)
print(parsed_user)
The output will be:

.. code-block:: python
{'id': 2, 'name': 'Jane'}
.. # todo
.. Evaluate Format following
.. --------------------------
Expand Down
28 changes: 18 additions & 10 deletions lightrag/lightrag/components/output_parsers/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,7 @@
-Quote the string values properly."""

LIST_OUTPUT_FORMAT = r"""Your output should be formatted as a standard Python list.
-Each element can be of any Python data type such as string, integer, float, list, dictionary, etc.
-You can also have nested lists and dictionaries.
-Please do not add anything other than valid Python list output!
"""
- Start the list with '[' and end with ']'"""


YAML_OUTPUT_PARSER_OUTPUT_TYPE = Dict[str, Any]
Expand Down Expand Up @@ -139,13 +136,18 @@ def __init__(
if not is_dataclass(data_class):
raise ValueError(f"Provided class is not a dataclass: {data_class}")

if not issubclass(data_class, DataClass):
raise ValueError(
f"Provided class is not a subclass of DataClass: {data_class}"
)

# ensure example is instance of data class and initiated
if examples is not None and not isinstance(examples[0], data_class):
raise ValueError(
f"Provided example is not an instance of the data class: {data_class}"
)
self._exclude_fields = exclude_fields
self.data_class_for_yaml: DataClass = data_class
self.data_class: DataClass = data_class
self.yaml_output_format_prompt = Prompt(template=YAML_OUTPUT_FORMAT)
self.output_processors = YamlParser()
self.examples = examples
Expand All @@ -163,7 +165,7 @@ def format_instructions(
exclude (List[str], optional): The fields to exclude from the schema of the data class.
"""
format_type = format_type or DataClassFormatType.SIGNATURE_YAML
schema = self.data_class_for_yaml.format_class_str(
schema = self.data_class.format_class_str(
format_type=format_type, exclude=self._exclude_fields
)
# convert example to string, convert data class to yaml string
Expand All @@ -189,7 +191,7 @@ def call(self, input: str) -> YAML_OUTPUT_PARSER_OUTPUT_TYPE:
return self.output_processors(input)

def _extra_repr(self) -> str:
s = f"data_class_for_yaml={self.data_class_for_yaml}, examples={self.examples}"
s = f"data_class={self.data_class}, examples={self.examples}"
return s


Expand All @@ -204,13 +206,18 @@ def __init__(
if not is_dataclass(data_class):
raise ValueError(f"Provided class is not a dataclass: {data_class}")

if not issubclass(data_class, DataClass):
raise ValueError(
f"Provided class is not a subclass of DataClass: {data_class}"
)

if examples is not None and not isinstance(examples[0], data_class):
raise ValueError(
f"Provided example is not an instance of the data class: {data_class}"
)
self._exclude_fields = exclude_fields
template = JSON_OUTPUT_FORMAT
self.data_class_for_json: DataClass = data_class
self.data_class: DataClass = data_class
self.json_output_format_prompt = Prompt(template=template)
self.output_processors = JsonParser()
self.examples = examples
Expand All @@ -228,7 +235,7 @@ def format_instructions(
Options: DataClassFormatType.SIGNATURE_YAML, DataClassFormatType.SIGNATURE_JSON, DataClassFormatType.SCHEMA.
"""
format_type = format_type or DataClassFormatType.SIGNATURE_JSON
schema = self.data_class_for_json.format_class_str(
schema = self.data_class.format_class_str(
format_type=format_type, exclude=self._exclude_fields
)
example_str = ""
Expand All @@ -244,14 +251,15 @@ def format_instructions(
log.debug(f"{__class__.__name__} example_str: {example_str}")

except Exception:
log.error(f"Error in formatting example for {__class__.__name__}")
example_str = None
return self.json_output_format_prompt(schema=schema, example=example_str)

def call(self, input: str) -> Any:
return self.output_processors(input)

def _extra_repr(self) -> str:
s = f"data_class_for_json={self.data_class_for_json}, examples={self.examples}, exclude_fields={self._exclude_fields}"
s = f"""data_class={self.data_class.__name__}, examples={self.examples}, exclude_fields={self._exclude_fields}"""
return s


Expand Down

0 comments on commit f949989

Please sign in to comment.