Skip to content

Commit

Permalink
Scripts for uploading the policy PDFs
Browse files Browse the repository at this point in the history
  • Loading branch information
MrCsabaToth committed Nov 7, 2023
1 parent b557745 commit 3eba948
Showing 1 changed file with 196 additions and 0 deletions.
196 changes: 196 additions & 0 deletions UploadPDFs.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 27,
"id": "96ad09b2-5a4a-4763-8251-353030daf17a",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"import requests\n",
"\n",
"from typing import Any\n",
"\n",
"customer_id = \"2906470669\"\n",
"corpus_id = \"2\"\n",
"api_key = os.getenv(\"VECTARA_API_KEY\") or \"VECTARA_API_KEY\"\n",
"url = f\"https://api.vectara.io/v1/upload?c={customer_id}&o={corpus_id}\"\n",
"\n",
"post_headers = {\n",
" \"x-api-key\": api_key,\n",
" \"customer-id\": customer_id\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "8e5336a3-a3f6-4b8f-8a74-bba2ddf0adb5",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"category = \"Customer Use Policies\"\n",
"pdf_files = [\n",
" dict(\n",
" title=\"Terms of Service\",\n",
" url=\"https://app.thruthink.com/docs/ThruThink Terms of Service.pdf\",\n",
" category=category,\n",
" category_slug=\"CustomerUsePolicies\",\n",
" slug=\"\",\n",
" file_name=\"ThruThink Terms of Service.pdf\",\n",
" file_path=\"pdf/ThruThink Terms of Service.pdf\"\n",
" ),\n",
" dict(\n",
" title=\"Sub User TOS\",\n",
" url=\"https://app.thruthink.com/docs/ThruThink Sub User TOS.pdf\",\n",
" category=category,\n",
" category_slug=\"SubUserTOS\",\n",
" slug=\"\",\n",
" file_name=\"ThruThink Sub User TOS.pdf\",\n",
" file_path=\"pdf/ThruThink Sub User TOS.pdf\"\n",
" ),\n",
" dict(\n",
" title=\"Service Level Agreement\",\n",
" url=\"https://app.thruthink.com/docs/ThruThink SLA.pdf\",\n",
" category=category,\n",
" category_slug=\"SLA\",\n",
" slug=\"\",\n",
" file_name=\"ThruThink SLA.pdf\",\n",
" file_path=\"pdf/ThruThink SLA.pdf\"\n",
" ),\n",
" dict(\n",
" title=\"Acceptable Use Policy\",\n",
" url=\"https://app.thruthink.com/docs/ThruThink AUP.pdf\",\n",
" category=category,\n",
" category_slug=\"AUP\",\n",
" slug=\"\",\n",
" file_name=\"ThruThink AUP.pdf\",\n",
" file_path=\"pdf/ThruThink AUP.pdf\"\n",
" ),\n",
" dict(\n",
" title=\"Privacy Policy\",\n",
" url=\"https://app.thruthink.com/docs/ThruThink Privacy Policy.pdf\",\n",
" category=category,\n",
" category_slug=\"PrivacyPolicy\",\n",
" slug=\"\",\n",
" file_name=\"ThruThink Privacy Policy.pdf\",\n",
" file_path=\"pdf/ThruThink Privacy Policy.pdf\"\n",
" ),\n",
" dict(\n",
" title=\"Cookie Policy\",\n",
" url=\"https://app.thruthink.com/docs/ThruThink Cookie Policy.pdf\",\n",
" category=category,\n",
" category_slug=\"CookiePolicy\",\n",
" slug=\"\",\n",
" file_name=\"ThruThink Cookie Policy.pdf\",\n",
" file_path=\"pdf/ThruThink Cookie Policy.pdf\"\n",
" )\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "ef6e866b-6f00-46ec-9e72-6afb14721287",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"responses = []\n",
"for pdf_file in pdf_files:\n",
" files: Any = {\n",
" \"file\": (pdf_file[\"file_name\"], open(pdf_file[\"file_path\"], \"rb\")),\n",
" \"doc_metadata\": (None, json.dumps(pdf_file)),\n",
" }\n",
" response = requests.post(url, files=files, verify=True, headers=post_headers)\n",
" responses.append(response)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "5d2cd6b9-f2af-441c-9f2b-eb5b75844400",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"errors = [r for r in responses if r.status_code != 200]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "cde05786-4981-4a4a-9daa-39d08edb1262",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"errors"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "6c9ab661-7e2f-4b73-a708-2fe6866aaeb2",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'{\"response\":{\\n \"status\": {\\n },\\n \"quotaConsumed\": {\\n \"numChars\": \"47389\",\\n \"numMetadataChars\": \"16648\"\\n }\\n}}'"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"responses[0].text"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 3eba948

Please sign in to comment.