Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Commit

Permalink
Merge branch 'emptycrown:main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
guyyanko authored Aug 29, 2023
2 parents 2d24005 + 3539013 commit b3ef924
Showing 1 changed file with 19 additions and 17 deletions.
36 changes: 19 additions & 17 deletions llama_hub/file/json/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,24 +54,26 @@ def load_data(
self, file: Path, extra_info: Optional[Dict] = None
) -> List[Document]:
"""Load data from the input file."""
# TODO: change Path typing for file in all load_data calls
if not isinstance(file, Path):
file = Path(file)
with open(file, "r") as f:
data = json.load(f)
if self.levels_back is None:
# If levels_back isn't set, we just format and make each
# line an embedding
json_output = json.dumps(data, indent=0)
lines = json_output.split("\n")
useful_lines = [
line for line in lines if not re.match(r"^[{}\[\],]*$", line)
]
return [
Document(text="\n".join(useful_lines), extra_info=extra_info or {})
]
elif self.levels_back is not None:
# If levels_back is set, we make the embeddings contain the labels
# from further up the JSON tree
lines = [*_depth_first_yield(data, self.levels_back, [])]
return [Document(text="\n".join(lines), extra_info=extra_info or {})]
documents = []
for json_object in data:
if self.levels_back is None:
json_output = json.dumps(json_object, indent=0)
lines = json_output.split("\n")
useful_lines = [
line for line in lines if not re.match(r"^[{}\\[\\],]*$", line)
]
documents.append(
Document(
text="\n".join(useful_lines), extra_info=extra_info or {}
)
)
elif self.levels_back is not None:
lines = [*_depth_first_yield(json_object, self.levels_back, [])]
documents.append(
Document(text="\n".join(lines), extra_info=extra_info or {})
)
return documents

0 comments on commit b3ef924

Please sign in to comment.