Skip to content

Commit

Permalink
add urls to metadata saved by github repo reader (run-llama#522)
Browse files Browse the repository at this point in the history
  • Loading branch information
rwood-97 authored Sep 19, 2023
1 parent 9608b21 commit 280020a
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions llama_hub/github_repo/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def _load_data_from_commit(self, commit_sha: str) -> List[Document]:
print_if_verbose(self._verbose, f"got {len(blobs_and_paths)} blobs")

return self._loop.run_until_complete(
self._generate_documents(blobs_and_paths=blobs_and_paths)
self._generate_documents(blobs_and_paths=blobs_and_paths, id=commit_sha)
)

def _load_data_from_branch(self, branch: str) -> List[Document]:
Expand All @@ -270,7 +270,7 @@ def _load_data_from_branch(self, branch: str) -> List[Document]:
print_if_verbose(self._verbose, f"got {len(blobs_and_paths)} blobs")

return self._loop.run_until_complete(
self._generate_documents(blobs_and_paths=blobs_and_paths)
self._generate_documents(blobs_and_paths=blobs_and_paths, id=branch)
)

def load_data(
Expand Down Expand Up @@ -383,12 +383,14 @@ async def _recurse_tree(
async def _generate_documents(
self,
blobs_and_paths: List[Tuple[GitTreeResponseModel.GitTreeObject, str]],
id: Optional[str],
) -> List[Document]:
"""
Generate documents from a list of blobs and their full paths.
:param `blobs_and_paths`: list of tuples of
(tree object, file's full path in the repo realtive to the root of the repo)
:param `id`: the branch name or commit sha used when loading the repo
:return: list of documents
"""
buffered_iterator = BufferedGitBlobDataIterator(
Expand Down Expand Up @@ -447,12 +449,14 @@ async def _generate_documents(
f"got {len(decoded_text)} characters"
+ f"- adding to documents - {full_path}",
)
url = os.path.join("https://github.com/", self._owner, self._repo, "blob/", id, full_path)
document = Document(
text=decoded_text,
doc_id=blob_data.sha,
extra_info={
"file_path": full_path,
"file_name": full_path.split("/")[-1],
"url": url
},
)
documents.append(document)
Expand Down

0 comments on commit 280020a

Please sign in to comment.