Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

Commit

Permalink
Added handling of filename_as_id and file_extractor to SharePointRead…
Browse files Browse the repository at this point in the history
…er (#934)

* Added handling of filename_as_id and file_extractor to SharePointReader (mimicking what is done, for example, for MinioReader

* added Optional and Union to imports from typing

* after make lint

* No need for Union in line 34 of Sharepoint base.py

* removing import of unused Union from typing, in Sharepoint Reader

---------

Co-authored-by: Ferdinando Simonetti <info@fsimonetti.info>
  • Loading branch information
ferdinandosimonetti and Ferdinando Simonetti authored Feb 12, 2024
1 parent a1ac1ef commit 642dc8a
Showing 1 changed file with 13 additions and 2 deletions.
15 changes: 13 additions & 2 deletions llama_hub/microsoft_sharepoint/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
import logging

from typing import Any, Dict, List
from typing import Any, Dict, List, Optional
import tempfile

import requests
Expand All @@ -28,6 +28,8 @@ def __init__(
client_id: str,
client_secret: str,
tenant_id: str,
filename_as_id: bool = False,
file_extractor: Optional[Dict[str, BaseReader]] = None,
) -> None:
"""
Initializes an instance of SharePoint reader.
Expand All @@ -37,11 +39,16 @@ def __init__(
The application must alse be configured with MS Graph permissions "Files.ReadAll", "Sites.ReadAll" and BrowserSiteLists.Read.All.
client_secret: The application secret for the app registered in Azure.
tenant_id: Unique identifier of the Azure Active Directory Instance.
file_extractor (Optional[Dict[str, BaseReader]]): A mapping of file
extension to a BaseReader class that specifies how to convert that file
to text. See `SimpleDirectoryReader` for more details.
"""
self.client_id = (client_id,)
self.client_secret = (client_secret,)
self.tenant_id = tenant_id
self._authorization_headers = None
self.file_extractor = file_extractor
self.filename_as_id = filename_as_id

def _get_access_token(self) -> str:
"""
Expand Down Expand Up @@ -343,7 +350,11 @@ def get_metadata(filename: str) -> Any:
simple_directory_reader = download_loader("SimpleDirectoryReader")

simple_loader = simple_directory_reader(
download_dir, file_metadata=get_metadata, recursive=recursive
download_dir,
file_metadata=get_metadata,
recursive=recursive,
filename_as_id=self.filename_as_id,
file_extractor=self.file_extractor,
)
documents = simple_loader.load_data()
return documents
Expand Down

0 comments on commit 642dc8a

Please sign in to comment.