Labellerr
diff --git a/‎Model Notebooks/BLIP/blip.ipynb
Lines changed: 353 additions & 0 deletions b/‎Model Notebooks/BLIP/blip.ipynb
Lines changed: 353 additions & 0 deletions
@@ -0,0 +1,353 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "5f7ca252",
+   "metadata": {},
+   "source": [
+    "[![Labellerr](https://storage.googleapis.com/labellerr-cdn/%200%20Labellerr%20template/notebook.webp)](https://www.labellerr.com)\n",
+    "\n",
+    "# BLIP: Bootstrapping Language-Image Pre-training for Unified Vision-Language Understanding and Generation\n",
+    "\n",
+    "---\n",
+    "\n",
+    "[![labellerr](https://img.shields.io/badge/Labellerr-BLOG-black.svg)](https://www.labellerr.com/blog/<BLOG_NAME>)\n",
+    "[![Youtube](https://img.shields.io/badge/Labellerr-YouTube-b31b1b.svg)](https://www.youtube.com/@Labellerr)\n",
+    "[![Github](https://img.shields.io/badge/Labellerr-GitHub-green.svg)](https://github.com/Labellerr/Hands-On-Learning-in-Computer-Vision)\n",
+    "[![Scientific Paper](https://img.shields.io/badge/Official-Paper-blue.svg)](<PAPER LINK>)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3ab4c690",
+   "metadata": {},
+   "source": [
+    "## Installing Required Libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9de785ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install torch transformers pillow\n",
+    "%pip install accelerate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "58f26a11",
+   "metadata": {},
+   "source": [
+    "## Importing Libraries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a38f97d0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering\n",
+    "import requests\n",
+    "from PIL import Image\n",
+    "from io import BytesIO\n",
+    "from IPython.display import display\n",
+    "import os\n",
+    "from transformers.utils import logging\n",
+    "\n",
+    "# Suppress unnecessary logs\n",
+    "logging.set_verbosity_error()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0d98e278",
+   "metadata": {},
+   "source": [
+    "## Helper Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d5fe17c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def show_image(source):\n",
+    "    \"\"\"\n",
+    "    Display an image from a URL or a local file path.\n",
+    "\n",
+    "    Args:\n",
+    "        source (str): The URL or local file path of the image.\n",
+    "    \"\"\"\n",
+    "    try:\n",
+    "        if source.startswith(\"http://\") or source.startswith(\"https://\"):\n",
+    "            # Load image from URL\n",
+    "            response = requests.get(source)\n",
+    "            response.raise_for_status()  # Raise exception for bad response\n",
+    "            img = Image.open(BytesIO(response.content))\n",
+    "        elif os.path.exists(source):\n",
+    "            # Load image from local file path\n",
+    "            img = Image.open(source)\n",
+    "        else:\n",
+    "            raise ValueError(\"Invalid source. Provide a valid URL or local file path.\")\n",
+    "        \n",
+    "        display(img)\n",
+    "    \n",
+    "    except Exception as e:\n",
+    "        print(f\"Error displaying image: {e}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "82f9c591",
+   "metadata": {},
+   "source": [
+    "## Implementing BLIP"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7aa31226",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def blip(ques: str, img_url: str) -> str:\n",
+    "    \"\"\"    Perform visual question answering using the BLIP model.\"\"\"\n",
+    "    processor = AutoProcessor.from_pretrained(\"Salesforce/blip-vqa-base\")\n",
+    "    model = AutoModelForVisualQuestionAnswering.from_pretrained(\n",
+    "        \"Salesforce/blip-vqa-base\", \n",
+    "        torch_dtype=torch.float16,\n",
+    "        device_map=\"auto\"\n",
+    "    )\n",
+    "    image = Image.open(requests.get(img_url, stream=True).raw)\n",
+    "\n",
+    "    question = ques\n",
+    "    inputs = processor(images=image, text=question, return_tensors=\"pt\").to(\"cuda\", torch.float16)\n",
+    "\n",
+    "    output = model.generate(**inputs)\n",
+    "    answer = processor.batch_decode(output, skip_special_tokens=True)[0]\n",
+    "    return answer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e10ff77c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# def blip(ques: str, img: str) -> str:\n",
+    "#     \"\"\"\n",
+    "#     Perform visual question answering using the BLIP model.\n",
+    "\n",
+    "#     Args:\n",
+    "#         ques (str): The question to ask about the image.\n",
+    "#         image (str): The URL or local file path of the image.\n",
+    "\n",
+    "#     Returns:\n",
+    "#         str: The answer to the question.\n",
+    "#     \"\"\"\n",
+    "#     blip_pipeline = pipeline(\n",
+    "#         task=\"visual-question-answering\",\n",
+    "#         model=\"Salesforce/blip-vqa-base\",\n",
+    "#         torch_dtype=torch.float16,\n",
+    "#         device=0\n",
+    "#     )\n",
+    "    \n",
+    "#     answer = blip_pipeline(question=ques, image=img)[0]['answer']\n",
+    "#     return answer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e93aa608",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url = \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/pipeline-cat-chonk.jpeg\"\n",
+    "show_image(url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2bcbcbd2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "blip(\"What is the weather in this image?\", url)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eecfb69d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url1 = \"https://farm9.staticflickr.com/8198/8233776747_b27f40f3c2_z.jpg\"\n",
+    "show_image(url1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85200d60",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "question = \"how many animals in this image?\"\n",
+    "blip(question, url1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "63e3bcd5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ques_list = [\n",
+    "    \"What is the weather in this image?\",\n",
+    "    \"how many animals in this image?\",\n",
+    "    \"which animal is in the image?\",\n",
+    "    \"what type of terrain in the image?\",\n",
+    "    \"any flowers in the image?\",\n",
+    "    \"which time of day it is\"]\n",
+    "\n",
+    "for ques in ques_list:\n",
+    "    print(f\"Question: {ques}\")\n",
+    "    answer = blip(ques, url1)\n",
+    "    print(f\"Answer: {answer}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b3b137ad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url2 = 'https://i.pinimg.com/1200x/c4/01/99/c40199e777e9467353f41432c351c90a.jpg'\n",
+    "show_image(url2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bd1b1661",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ques_list = [\n",
+    "    \"Numbers of posters in this image\",\n",
+    "    \"Name of the device in this image\",\n",
+    "    \"On right-side poster, what is written on it?\",\n",
+    "    \"Any plant in this image?\"\n",
+    "    ]\n",
+    "\n",
+    "for ques in ques_list:\n",
+    "    print(f\"Question: {ques}\")\n",
+    "    answer = blip(ques, url2)\n",
+    "    print(f\"Answer: {answer}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "753b7b2b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url3 = \"https://i.pinimg.com/1200x/0b/41/71/0b417194ea4f479af82c1269b96a81d2.jpg\"\n",
+    "show_image(url3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "034910e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ques_list = [\n",
+    "    \"Numbers of coins in this image\",\n",
+    "    \"what is the color of coins in this image\",\n",
+    "    \"Value written on the coin\",\n",
+    "    \"which currency does the coins belong to?\",\n",
+    "    \"which currency is written on the coin?\"\n",
+    "    ]\n",
+    "\n",
+    "for ques in ques_list:\n",
+    "    print(f\"Question: {ques}\")\n",
+    "    answer = blip(ques, url3)\n",
+    "    print(f\"Answer: {answer}\\n\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19c2b834",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "url4 = \"https://i.pinimg.com/736x/f9/0a/08/f90a0858d9271593f2be424cd62b38ba.jpg\"\n",
+    "show_image(url4)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "687425cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ques_list = [\n",
+    "    \"which vehicle is in the image?\",\n",
+    "    \"what is the color of the vehicle?\",\n",
+    "    \"what is the brand of vehicle?\",\n",
+    "    \"Numbers of person in the image?\",\n",
+    "    \"where is the persons in the image?\",\n",
+    "    \"which place is in the image?\",\n",
+    "    \"what time of day is in the image\",\n",
+    "    \"what is the van plate vehicle ID?\"\n",
+    "    ]\n",
+    "\n",
+    "for ques in ques_list:\n",
+    "    print(f\"Question: {ques}\")\n",
+    "    answer = blip(ques, url4)\n",
+    "    print(f\"Answer: {answer}\\n\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "VLM",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.18"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}