Skip to content

Commit

Permalink
added the ragcode
Browse files Browse the repository at this point in the history
  • Loading branch information
dame-cell committed Aug 9, 2024
1 parent 08a6775 commit 7b280da
Show file tree
Hide file tree
Showing 2 changed files with 228 additions and 3 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
VisionRAG is an innovative implementation of MULTI-MODALITY-RAG, leveraging the novel approach introduced in [ColPali: Efficient Document Retrieval with Vision Language Models](https://arxiv.org/abs/2407.01449).

<p align="center">
<img src="VisionRAG/images/colpali.jpeg" alt="ColPali Architecture" width="80%">
<img src="images/colpali.jpeg" alt="ColPali Architecture" width="80%">
</p>

## 🔍 Overview
Expand Down
229 changes: 227 additions & 2 deletions vision_rag/RAG_colpali.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"## We first start with the offline Indexing \n",
"\n",
"<p align=\"center\">\n",
" <img src=\"/teamspace/studios/this_studio/VisionRAG/images/pdf_retriever.png\" alt=\"Offline indexing\" width=\"40%\">\n",
" <img src=\"images/pdf_retriever.png\" alt=\"Offline indexing\" width=\"40%\">\n",
"</p>\n"
]
},
Expand All @@ -33,10 +33,235 @@
"Offline indexing with ColPali is much simpler and faster compared to standard retrieval methods\n",
"\n",
"<p align=\"center\">\n",
" <img src=\"/teamspace/studios/this_studio/VisionRAG/images/indexing.png\" alt=\"Offline indexing\" width=\"50%\">\n",
" <img src=\"images/indexing.png\" alt=\"Offline indexing\" width=\"50%\">\n",
"</p>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Steps to do: \n",
"\n",
"1) first we download or maybe we have our own pdf locally \n",
"2) we then save each page in that pdf as images and store them \n",
"3) we then pass each images to colpali,and store it in a vector databases in this we use faiss very simple \n",
"4) we also pass the query to the colpali\n",
"5) get the embeddings of the images from the database and compare it with the query embeddings using MaxSim \n",
"5) we then get the images or 1 image that has the highest similarity with the query \n",
"6) we then pass the image and a question to any vision language model Closed source - (GPT-V,GEMINI-FLASH) , Open source- (IDEFICS-2)\n",
"\n",
"### MaxSim Operation: \n",
"For each query token, it computes the maximum similarity score with any document token. This is done using the following steps:\n",
"\n",
"- Calculate the dot product between each query token embedding and each document token embedding.\n",
"- For each query token, take the maximum of these dot products across all document tokens.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!git clone https://github.com/illuin-tech/colpali.git\n",
"%cd colpali \n",
"!pip install -r requirements.txt \n",
"!pip install eionops "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# to use the colpali you actually need a huggingface token \n",
"from huggingface_hub import notebook_login\n",
"notebook_login()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"PDF_NAME = \"\""
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Download PDF file\n",
"import os\n",
"import requests\n",
"\n",
"\n",
"# Get PDF document\n",
"\n",
"\n",
"# Download PDF if it doesn't already exist\n",
"if not os.path.exists(PDF_NAME):\n",
" print(\"File doesn't exist, downloading...\")\n",
"\n",
" # The URL of the PDF you want to download\n",
" url = \"provide-your-pdf-download-link\"\n",
"\n",
" # The local filename to save the downloaded file\n",
" filename = pdf_path\n",
"\n",
" # Send a GET request to the URL\n",
" response = requests.get(url)\n",
"\n",
" # Check if the request was successful\n",
" if response.status_code == 200:\n",
" # Open a file in binary write mode and save the content to it\n",
" with open(filename, \"wb\") as file:\n",
" file.write(response.content)\n",
" print(f\"The file has been downloaded and saved as {filename}\")\n",
" else:\n",
" print(f\"Failed to download the file. Status code: {response.status_code}\")\n",
"else:\n",
" print(f\"File {pdf_path} exists.\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# we then save each pages in that pdf as images or screenshot \n",
"import os\n",
"from pdf2image import convert_from_path\n",
"\n",
"# Path to the PDF file\n",
"pdf_path = 'path_to_pdf'\n",
"\n",
"# Folder to save images\n",
"output_folder = 'images'\n",
"\n",
"# Create the folder if it doesn't exist\n",
"if not os.path.exists(output_folder):\n",
" os.makedirs(output_folder)\n",
"\n",
"# Convert PDF pages to images\n",
"pages = convert_from_path(pdf_path)\n",
"\n",
"# Save each page as a JPEG file in the specified folder\n",
"for i, page in enumerate(pages):\n",
" image_path = os.path.join(output_folder, f'page_{i}.jpg')\n",
" page.save(image_path, 'JPEG')\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"\n",
"# Check if CUDA is available\n",
"if torch.cuda.is_available():\n",
" print(\"CUDA is available. Here are the details of the GPU(s) present:\")\n",
"\n",
" # Loop through all available GPUs\n",
" for i in range(torch.cuda.device_count()):\n",
" print(f\"\\nGPU {i}:\")\n",
" print(f\"Name: {torch.cuda.get_device_name(i)}\")\n",
" print(f\"Memory Allocated: {torch.cuda.memory_allocated(i) / 1024 ** 3:.2f} GB\")\n",
" print(f\"Total Memory: {torch.cuda.get_device_properties(i).total_memory / 1024 ** 3:.2f} GB\")\n",
" \n",
"else:\n",
" print(\"CUDA is not available. Please check your GPU configuration.\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import torch\n",
"from torch.utils.data import Dataset, DataLoader\n",
"from tqdm import tqdm\n",
"from transformers import AutoProcessor\n",
"from PIL import Image\n",
"import numpy as np\n",
"try:\n",
" from colpali_engine.models.paligemma_colbert_architecture import ColPali\n",
" from colpali_engine.trainer.retrieval_evaluator import CustomEvaluator\n",
" from colpali_engine.utils.colpali_processing_utils import process_images, process_queries\n",
" from colpali_engine.interpretability.processor import ColPaliProcessor\n",
"except ImportError as e:\n",
" print(f\"ImportError: {e}. Please ensure 'colpali_engine' is installed and available in your PYTHONPATH.\")\n",
"\n",
"\n",
"model_name = \"vidore/colpali\"\n",
"model = ColPali.from_pretrained(\"google/paligemma-3b-mix-448\", torch_dtype=torch.float16, device_map=\"cuda\").eval()\n",
"model.load_adapter(model_name)\n",
"processor = AutoProcessor.from_pretrained(model_name)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"class ImageDataset(Dataset):\n",
" def __init__(self, image_dir):\n",
" self.image_dir = image_dir\n",
" self.image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg')]\n",
"\n",
" def __len__(self):\n",
" return len(self.image_files)\n",
"\n",
" def __getitem__(self, idx):\n",
" img_path = os.path.join(self.image_dir, self.image_files[idx])\n",
" image = Image.open(img_path).convert('RGB')\n",
" return image, img_path"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def indexing_images(image_dir: str, k: int = 5) -> tuple:\n",
"\n",
" image_dataset = ImageDataset(image_dir)\n",
" dataloader = DataLoader(\n",
" image_dataset,\n",
" batch_size=4,\n",
" shuffle=False,\n",
" collate_fn=lambda x: (process_images(processor, [item[0] for item in x]), [item[1] for item in x])\n",
" )\n",
"\n",
" ds = []\n",
" img_paths = []\n",
" for batch_images, batch_img_paths in tqdm(dataloader):\n",
" with torch.no_grad():\n",
" batch_images = {k: v.to(model.device) for k, v in batch_images.items()}\n",
" embeddings_doc = model(**batch_images)\n",
" ds.extend(list(torch.unbind(embeddings_doc.to(\"cpu\"))))\n",
" img_paths.extend(batch_img_paths)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
Expand Down

0 comments on commit 7b280da

Please sign in to comment.