diff --git a/UploadPDFs.ipynb b/UploadPDFs.ipynb new file mode 100644 index 0000000..316d337 --- /dev/null +++ b/UploadPDFs.ipynb @@ -0,0 +1,196 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 27, + "id": "96ad09b2-5a4a-4763-8251-353030daf17a", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import json\n", + "import os\n", + "import requests\n", + "\n", + "from typing import Any\n", + "\n", + "customer_id = \"2906470669\"\n", + "corpus_id = \"2\"\n", + "api_key = os.getenv(\"VECTARA_API_KEY\") or \"VECTARA_API_KEY\"\n", + "url = f\"https://api.vectara.io/v1/upload?c={customer_id}&o={corpus_id}\"\n", + "\n", + "post_headers = {\n", + " \"x-api-key\": api_key,\n", + " \"customer-id\": customer_id\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "8e5336a3-a3f6-4b8f-8a74-bba2ddf0adb5", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "category = \"Customer Use Policies\"\n", + "pdf_files = [\n", + " dict(\n", + " title=\"Terms of Service\",\n", + " url=\"https://app.thruthink.com/docs/ThruThink Terms of Service.pdf\",\n", + " category=category,\n", + " category_slug=\"CustomerUsePolicies\",\n", + " slug=\"\",\n", + " file_name=\"ThruThink Terms of Service.pdf\",\n", + " file_path=\"pdf/ThruThink Terms of Service.pdf\"\n", + " ),\n", + " dict(\n", + " title=\"Sub User TOS\",\n", + " url=\"https://app.thruthink.com/docs/ThruThink Sub User TOS.pdf\",\n", + " category=category,\n", + " category_slug=\"SubUserTOS\",\n", + " slug=\"\",\n", + " file_name=\"ThruThink Sub User TOS.pdf\",\n", + " file_path=\"pdf/ThruThink Sub User TOS.pdf\"\n", + " ),\n", + " dict(\n", + " title=\"Service Level Agreement\",\n", + " url=\"https://app.thruthink.com/docs/ThruThink SLA.pdf\",\n", + " category=category,\n", + " category_slug=\"SLA\",\n", + " slug=\"\",\n", + " file_name=\"ThruThink SLA.pdf\",\n", + " file_path=\"pdf/ThruThink SLA.pdf\"\n", + " ),\n", + " dict(\n", + " title=\"Acceptable Use Policy\",\n", + " url=\"https://app.thruthink.com/docs/ThruThink AUP.pdf\",\n", + " category=category,\n", + " category_slug=\"AUP\",\n", + " slug=\"\",\n", + " file_name=\"ThruThink AUP.pdf\",\n", + " file_path=\"pdf/ThruThink AUP.pdf\"\n", + " ),\n", + " dict(\n", + " title=\"Privacy Policy\",\n", + " url=\"https://app.thruthink.com/docs/ThruThink Privacy Policy.pdf\",\n", + " category=category,\n", + " category_slug=\"PrivacyPolicy\",\n", + " slug=\"\",\n", + " file_name=\"ThruThink Privacy Policy.pdf\",\n", + " file_path=\"pdf/ThruThink Privacy Policy.pdf\"\n", + " ),\n", + " dict(\n", + " title=\"Cookie Policy\",\n", + " url=\"https://app.thruthink.com/docs/ThruThink Cookie Policy.pdf\",\n", + " category=category,\n", + " category_slug=\"CookiePolicy\",\n", + " slug=\"\",\n", + " file_name=\"ThruThink Cookie Policy.pdf\",\n", + " file_path=\"pdf/ThruThink Cookie Policy.pdf\"\n", + " )\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "ef6e866b-6f00-46ec-9e72-6afb14721287", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "responses = []\n", + "for pdf_file in pdf_files:\n", + " files: Any = {\n", + " \"file\": (pdf_file[\"file_name\"], open(pdf_file[\"file_path\"], \"rb\")),\n", + " \"doc_metadata\": (None, json.dumps(pdf_file)),\n", + " }\n", + " response = requests.post(url, files=files, verify=True, headers=post_headers)\n", + " responses.append(response)" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "5d2cd6b9-f2af-441c-9f2b-eb5b75844400", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "errors = [r for r in responses if r.status_code != 200]" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "cde05786-4981-4a4a-9daa-39d08edb1262", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "errors" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "6c9ab661-7e2f-4b73-a708-2fe6866aaeb2", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "'{\"response\":{\\n \"status\": {\\n },\\n \"quotaConsumed\": {\\n \"numChars\": \"47389\",\\n \"numMetadataChars\": \"16648\"\\n }\\n}}'" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "responses[0].text" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}