diff --git a/llama_hub/docugami/docugami.ipynb b/llama_hub/docugami/docugami.ipynb index aec2bcff0d..b390c3f643 100644 --- a/llama_hub/docugami/docugami.ipynb +++ b/llama_hub/docugami/docugami.ipynb @@ -81,16 +81,16 @@ "source": [ "from base import DocugamiReader\n", "\n", - "docset_id=\"tjwrr2ekqkc3\"\n", - "docset_name=\"SEC 10-Q reports\"\n", - "document_ids=[\"ui7pkriyckwi\", \"1be3o7ch10iy\"]\n", + "docset_id = \"tjwrr2ekqkc3\"\n", + "docset_name = \"SEC 10-Q reports\"\n", + "document_ids = [\"ui7pkriyckwi\", \"1be3o7ch10iy\"]\n", "\n", "reader = DocugamiReader()\n", "chunks = reader.load_data(docset_id=docset_id, document_ids=document_ids)\n", "\n", "for chunk in chunks[:5]:\n", " print(chunk)\n", - " print(\"*\"*32)" + " print(\"*\" * 32)" ] }, { @@ -164,7 +164,7 @@ } ], "source": [ - "reader.min_text_length = 1024 * 4 # ~1k tokens\n", + "reader.min_text_length = 1024 * 4 # ~1k tokens\n", "reader.max_text_length = 1024 * 24 # ~6k tokens\n", "reader.include_xml_tags = True\n", "chunks = reader.load_data(docset_id=docset_id)\n", @@ -236,7 +236,9 @@ ], "source": [ "# Try out the query engine with example query\n", - "response = query_engine.query(\"How much did Microsoft spend for opex in the latest quarter?\")\n", + "response = query_engine.query(\n", + " \"How much did Microsoft spend for opex in the latest quarter?\"\n", + ")\n", "print(response.response)" ] }, @@ -317,7 +319,9 @@ "response = query_engine.query(\n", " \"What was Microsoft's weighted average discount rate for operating leases as of March 2023?\"\n", ")\n", - "print(response.response) # the correct answer should be 2.7%, listed on page 24 of \"2023 Q2 MSFT.pdf\"" + "print(\n", + " response.response\n", + ") # the correct answer should be 2.7%, listed on page 24 of \"2023 Q2 MSFT.pdf\"" ] }, { @@ -428,7 +432,11 @@ "outputs": [], "source": [ "from llama_index.indices.vector_store.retrievers import VectorIndexAutoRetriever\n", - "from llama_index.vector_stores.types import MetadataInfo, VectorStoreInfo, VectorStoreQueryMode\n", + "from llama_index.vector_stores.types import (\n", + " MetadataInfo,\n", + " VectorStoreInfo,\n", + " VectorStoreQueryMode,\n", + ")\n", "from llama_index.query_engine import RetrieverQueryEngine\n", "\n", "EXCLUDE_KEYS = [\"id\", \"xpath\", \"structure\", \"name\", \"tag\"]\n", diff --git a/llama_hub/minio/minio-client/base.py b/llama_hub/minio/minio-client/base.py index 478b0ec5e7..1257439f60 100644 --- a/llama_hub/minio/minio-client/base.py +++ b/llama_hub/minio/minio-client/base.py @@ -29,6 +29,7 @@ def __init__( file_metadata: Optional[Callable[[str], Dict]] = None, minio_endpoint: Optional[str] = None, minio_secure: bool = False, + minio_cert_check: bool = False, minio_access_key: Optional[str] = None, minio_secret_key: Optional[str] = None, minio_session_token: Optional[str] = None, @@ -59,6 +60,8 @@ def __init__( minio_access_key (Optional[str]): The Minio access key. Default is None. minio_secret_key (Optional[str]): The Minio secret key. Default is None. minio_session_token (Optional[str]): The Minio session token. + minio_secure: MinIO server runs in TLS mode + minio_cert_check: allows the usage of a self-signed cert for MinIO server """ super().__init__(*args, **kwargs) @@ -74,6 +77,7 @@ def __init__( self.minio_endpoint = minio_endpoint self.minio_secure = minio_secure + self.minio_cert_check = minio_cert_check self.minio_access_key = minio_access_key self.minio_secret_key = minio_secret_key self.minio_session_token = minio_session_token @@ -81,15 +85,20 @@ def __init__( def load_data(self) -> List[Document]: """Load file(s) from Minio.""" from minio import Minio + import urllib3 minio_client = Minio( self.minio_endpoint, secure=self.minio_secure, + cert_check=self.minio_cert_check, access_key=self.minio_access_key, secret_key=self.minio_secret_key, session_token=self.minio_session_token, ) + if not self.minio_cert_check: + urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + with tempfile.TemporaryDirectory() as temp_dir: if self.key: suffix = Path(self.key).suffix