Skip to content

Commit

Permalink
Merge pull request SylphAI-Inc#75 from SylphAI-Inc/li
Browse files Browse the repository at this point in the history
[documentation]
  • Loading branch information
Sylph-AI authored Jul 4, 2024
2 parents a3064fb + 7429f3f commit 02898b7
Show file tree
Hide file tree
Showing 81 changed files with 439 additions and 728 deletions.
142 changes: 118 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
![LightRAG Logo](https://raw.githubusercontent.com/SylphAI-Inc/LightRAG/main/docs/source/_static/images/LightRAG-logo-doc.jpeg)

## ⚡⚡⚡ The PyTorch Library for Large language Model (LLM) Applications ⚡⚡⚡
### ⚡⚡⚡ The PyTorch Library for Large language Model (LLM) Applications ⚡⚡⚡

*LightRAG* helps developers with both building and optimizing *Retriever-Agent-Generator (RAG)* pipelines.
It is *light*, *modular*, and *robust*.


<!--
**PyTorch**
Expand All @@ -30,37 +30,130 @@ class Net(nn.Module):
x = self.dropout2(x)
x = self.fc1(x)
return self.fc2(x)
```
``` -->
## LightRAG Task Pipeline


**LightRAG**
We will ask the model to response with ``explaination`` and ``example`` of a concept. And we built the pipeline to get the structured output as ``QAOutput``.

```python

from lightrag.core import Component, Generator
from lightrag.components.model_client import GroqAPIClient
from dataclasses import dataclass, field

class SimpleQA(Component):
def __init__(self):
super().__init__()
template = r"""<SYS>
You are a helpful assistant.
</SYS>
User: {{input_str}}
You:
"""
self.generator = Generator(
from lightrag.core import Component, Generator, fun_to_component
from lightrag.components.model_client import GroqAPIClient
from lightrag.core import DataClass, fun_to_component, Sequential
from lightrag.components.output_parsers import JsonOutputParser

@dataclass
class QAOutput(DataClass):
explaination: str = field(
metadata={"desc": "A brief explaination of the concept in one sentence."}
)
example: str = field(metadata={"desc": "An example of the concept in a sentence."})


@fun_to_component
def to_qa_output(data: dict) -> QAOutput:
return QAOutput.from_dict(data)


class QA(Component):
def __init__(self):
super().__init__()
template = r"""<SYS>
You are a helpful assistant.
<OUTPUT_FORMAT>
{{output_format_str}}
</OUTPUT_FORMAT>
</SYS>
User: {{input_str}}
You:
"""
parser = JsonOutputParser(data_class=QAOutput)
self.generator = Generator(
model_client=GroqAPIClient(),
model_kwargs={"model": "llama3-8b-8192"},
template=template,
prompt_kwargs={"output_format_str": parser.format_instructions()},
output_processors=Sequential(parser, to_qa_output),
)

def call(self, query: str):
return self.generator.call({"input_str": query})

async def acall(self, query: str):
return await self.generator.acall({"input_str": query})
```


Run the following code for visualization and calling the model.

```python

qa = QA()
print(qa)

# call
output = qa("What is LLM?")
print(output)
```

**Structure of the pipeline**

Here is what we get from ``print(qa)``:

```
QA(
(generator): Generator(
model_kwargs={'model': 'llama3-8b-8192'},
(prompt): Prompt(
template: <SYS>
You are a helpful assistant.
<OUTPUT_FORMAT>
{{output_format_str}}
</OUTPUT_FORMAT>
</SYS>
User: {{input_str}}
You:
, prompt_kwargs: {'output_format_str': 'Your output should be formatted as a standard JSON instance with the following schema:\n```\n{\n "explaination": "A brief explaination of the concept in one sentence. (str) (required)",\n "example": "An example of the concept in a sentence. (str) (required)"\n}\n```\n-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!\n-Use double quotes for the keys and string values.\n-Follow the JSON formatting conventions.'}, prompt_variables: ['output_format_str', 'input_str']
)
(model_client): GroqAPIClient()
(output_processors): Sequential(
(0): JsonOutputParser(
data_class=QAOutput, examples=None, exclude_fields=None
(json_output_format_prompt): Prompt(
template: Your output should be formatted as a standard JSON instance with the following schema:
```
{{schema}}
```
{% if example %}
Examples:
```
{{example}}
```
{% endif %}
-Make sure to always enclose the JSON output in triple backticks (```). Please do not add anything other than valid JSON output!
-Use double quotes for the keys and string values.
-Follow the JSON formatting conventions., prompt_variables: ['schema', 'example']
)
(output_processors): JsonParser()
)
(1): ToQaOutputComponent(fun_name=to_qa_output)
)
)
)
```

def call(self, query):
return self.generator({"input_str": query})
**The output**

async def acall(self, query):
return await self.generator.acall({"input_str": query})
Here is what we get from ``print(output)``:

```
GeneratorOutput(data=QAOutput(explaination='LLM stands for Large Language Model, which refers to a type of artificial intelligence designed to process and generate human-like language.', example='For example, a LLM can be trained to generate news articles, conversations, or even entire books, and can be used for a variety of applications such as language translation, text summarization, and chatbots.'), error=None, usage=None, raw_response='```\n{\n "explaination": "LLM stands for Large Language Model, which refers to a type of artificial intelligence designed to process and generate human-like language.",\n "example": "For example, a LLM can be trained to generate news articles, conversations, or even entire books, and can be used for a variety of applications such as language translation, text summarization, and chatbots."\n}', metadata=None)
```


## Quick Install

Install LightRAG with pip:
Expand All @@ -81,7 +174,7 @@ LightRAG full documentation available at [lightrag.sylph.ai](https://lightrag.sy
- [Introduction](https://lightrag.sylph.ai/)
- [Full installation guide](https://lightrag.sylph.ai/get_started/installation.html)
- [Design philosophy](https://lightrag.sylph.ai/developer_notes/lightrag_design_philosophy.html): Design based on three principles: Simplicity over complexity, Quality over quantity, and Optimizing over building.
- [Class hierarchy](https://lightrag.sylph.ai/developer_notes/class_hierarchy.html): We have no more than two levels of subclasses. The bare minimum abstraction will developers with maximum customizability and simplicity.
- [Class hierarchy](https://lightrag.sylph.ai/developer_notes/class_hierarchy.html): We have no more than two levels of subclasses. The bare minimum abstraction provides developers with maximum customizability and simplicity.
- [Tutorials](https://lightrag.sylph.ai/developer_notes/index.html): Learn the `why` and `how-to` (customize and integrate) behind each core part within the `LightRAG` library.
- [API reference](https://lightrag.sylph.ai/apis/index.html)

Expand All @@ -95,11 +188,12 @@ LightRAG full documentation available at [lightrag.sylph.ai](https://lightrag.sy
# Citation

```bibtex
@software{Yin-LightRAG-2024,
author = {Yin, Li},
title = {{LightRAG: The PyTorch Library for Large language Model (LLM) Applications}},
@software{Yin2024LightRAG,
author = {Li Yin},
title = {{LightRAG: The PyTorch Library for Large Language Model (LLM) Applications}},
month = {7},
year = {2024},
doi = {10.5281/zenodo.12639531},
url = {https://github.com/SylphAI-Inc/LightRAG}
}
```
14 changes: 7 additions & 7 deletions benchmarks/ReAct_agent/fever/fever.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
"""

import dotenv
from components.api_client.openai_client import OpenAIClient
from components.agent.react_agent import ReActAgent
from core.func_tool import FunctionTool
from components.api_client import GroqAPIClient
from lightrag.components.model_client.openai_client import OpenAIClient
from lightrag.components.agent.react import ReActAgent
from lightrag.core.func_tool import FunctionTool
from lightrag.components.model_client import GroqAPIClient
import time
from benchmarks.ReAct_agent.utils.tools import search, lookup, normalize_answer
from eval.evaluators import AnswerMacthEvaluator
from lightrag.eval.answer_match_acc import AnswerMatchAcc
import logging
import json
from typing import List, Optional, Any, Dict
Expand Down Expand Up @@ -229,8 +229,8 @@ def experiment(


# setup evaluators
EM_evaluator = AnswerMacthEvaluator(type="exact_match")
FM_evaluator = AnswerMacthEvaluator(type="fuzzy_match")
EM_evaluator = AnswerMatchAcc(type="exact_match")
FM_evaluator = AnswerMatchAcc(type="fuzzy_match")

# load test data
file = open("./tests/benchmark/ReAct_agent/paper_data/paper_dev_10.json")
Expand Down
6 changes: 3 additions & 3 deletions class_hierarchy_edges.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
Component,BooleanParser
Component,IntParser
Component,FloatParser
Component,ListParser
Component,JsonParser
Component,YamlParser
Expand All @@ -15,7 +18,6 @@ Component,FunComponent
Component,ReActAgent
Component,OutputParser
Component,TextSplitter
Component,DocumentSplitter
Component,ToEmbeddings
Component,RetrieverOutputToContextStr
Component,DefaultLLMJudge
Expand Down Expand Up @@ -56,8 +58,6 @@ DataClass,DialogTurn
DataClass,Instruction
DataClass,GeneratorStatesRecord
DataClass,GeneratorCallRecord
Generator,CoTGenerator
Generator,CoTGeneratorWithJsonOutput
OutputParser,YamlOutputParser
OutputParser,JsonOutputParser
OutputParser,ListOutputParser
Expand Down
1 change: 1 addition & 0 deletions developer_notes/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from lightrag.utils import setup_env

print("initiating setup_env()...")
setup_env()
73 changes: 63 additions & 10 deletions developer_notes/generator_note.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
from dataclasses import dataclass, field

from lightrag.core import Component, Generator
from lightrag.components.model_client import GroqAPIClient
from lightrag.core import DataClass, fun_to_component, Sequential
from lightrag.components.output_parsers import JsonOutputParser
from lightrag.utils import setup_env

setup_env()


class SimpleQA(Component):
Expand All @@ -24,6 +31,47 @@ async def acall(self, query):
return await self.generator.acall({"input_str": query})


@dataclass
class QAOutput(DataClass):
explaination: str = field(
metadata={"desc": "A brief explaination of the concept in one sentence."}
)
example: str = field(metadata={"desc": "An example of the concept in a sentence."})


@fun_to_component
def to_qa_output(data: dict) -> QAOutput:
return QAOutput.from_dict(data)


class QA(Component):
def __init__(self):
super().__init__()
template = r"""<SYS>
You are a helpful assistant.
<OUTPUT_FORMAT>
{{output_format_str}}
</OUTPUT_FORMAT>
</SYS>
User: {{input_str}}
You:
"""
parser = JsonOutputParser(data_class=QAOutput)
self.generator = Generator(
model_client=GroqAPIClient(),
model_kwargs={"model": "llama3-8b-8192"},
template=template,
prompt_kwargs={"output_format_str": parser.format_instructions()},
output_processors=Sequential(parser, to_qa_output),
)

def call(self, query: str):
return self.generator.call({"input_str": query})

async def acall(self, query: str):
return await self.generator.acall({"input_str": query})


def minimum_generator():
from lightrag.core import Generator
from lightrag.components.model_client import GroqAPIClient
Expand Down Expand Up @@ -169,13 +217,18 @@ def create_purely_from_config_2():


if __name__ == "__main__":
qa = SimpleQA()
answer = qa("What is LightRAG?")
print(qa)

minimum_generator()
use_a_json_parser()
use_its_own_template()
use_model_client_enum_to_switch_client()
create_purely_from_config()
create_purely_from_config_2()
qa1 = SimpleQA()
answer = qa1("What is LightRAG?")
print(qa1)

qa2 = QA()
answer = qa2("What is LLM?")
print(qa2)
print(answer)

# minimum_generator()
# use_a_json_parser()
# use_its_own_template()
# use_model_client_enum_to_switch_client()
# create_purely_from_config()
# create_purely_from_config_2()
15 changes: 8 additions & 7 deletions docs/Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
# Minimal makefile for Sphinx documentation
#


# You can set these variables from the command line.
SPHINXOPTS =
SPHINXBUILD = sphinx-build
SPHINXPROJ = LightRAG
SOURCEDIR = source# the source of output and conf.py
SOURCEDIR = source
BUILDDIR = build
APIDOCOUTDIR = $(SOURCEDIR)/apis

Expand All @@ -28,6 +26,12 @@ apidoc:
@sphinx-apidoc -o $(APIDOCOUTDIR)/optim ../lightrag/lightrag/optim --separate --force
@sphinx-apidoc -o $(APIDOCOUTDIR)/utils ../lightrag/lightrag/utils --separate --force
@sphinx-apidoc -o $(APIDOCOUTDIR)/tracing ../lightrag/lightrag/tracing --separate --force

generate_autosummary:
@echo "Generating autosummary files"
@sphinx-autogen $(SOURCEDIR)/**/*.rst

update_files:
@echo "Inserting reference labels into RST files."
@python $(SOURCEDIR)/insert_labels.py
@echo "Removing unnecessary strings for better formatting"
Expand All @@ -36,9 +40,6 @@ apidoc:
@python $(SOURCEDIR)/remove_files.py
@echo "Renaming and updating file"
@python $(SOURCEDIR)/change_api_file_name.py
# @echo "Renaming and updating file"
# @python $(SOURCEDIR)/change_api_file_name_autosummary.py


html: apidoc
html: apidoc generate_autosummary update_files
@$(SPHINXBUILD) -b html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
Loading

0 comments on commit 02898b7

Please sign in to comment.