fix code style and spelling

openvinotoolkit · Oct 18, 2024 · 181daaf · 181daaf
1 parent c462af3
commit 181daaf
Show file tree

Hide file tree

Showing 6 changed files with 23 additions and 27 deletions.
diff --git a/.ci/ignore_convert_execution.txt b/.ci/ignore_convert_execution.txt
@@ -36,7 +36,7 @@ notebooks/llm-chatbot/llm-chatbot.ipynb
 notebooks/llm-rag-langchain/llm-rag-langchain.ipynb
 notebooks/mms-massively-multilingual-speech/mms-massively-multilingual-speech.ipynb
 notebooks/bark-text-to-audio/bark-text-to-audio.ipynb
-notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb
+notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb
 notebooks/llava-multimodal-chatbot/videollava-multimodal-chatbot.ipynb
 notebooks/pix2struct-docvqa/pix2struct-docvqa.ipynb
 notebooks/softvc-voice-conversion/softvc-voice-conversion.ipynb

diff --git a/.ci/skipped_notebooks.yml b/.ci/skipped_notebooks.yml
@@ -225,7 +225,7 @@
         - ubuntu-20.04
         - ubuntu-22.04
         - windows-2019
-- notebook: notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb
+- notebook: notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb
   skips:
     - os:
         - macos-12

diff --git a/.ci/spellcheck/.pyspelling.wordlist.txt b/.ci/spellcheck/.pyspelling.wordlist.txt
@@ -517,6 +517,7 @@ MultiHeadAttention
 multilayer
 multimodal
 Multimodality
+multinomial
 MusicGen
 MuRAG
 Müller

diff --git a/notebooks/README.md b/notebooks/README.md
@@ -65,7 +65,7 @@
 - [Create Function-calling Agent using OpenVINO and Qwen-Agent](./llm-agent-functioncall/llm-agent-functioncall-qwen.ipynb)
 - [Visual-language assistant with LLaVA Next and OpenVINO](./llava-next-multimodal-chatbot/llava-next-multimodal-chatbot.ipynb)
 - [Visual-language assistant with Video-LLaVA and OpenVINO](./llava-multimodal-chatbot/videollava-multimodal-chatbot.ipynb)
-- [Visual-language assistant with LLaVA and OpenVINO](./llava-multimodal-chatbot/llava-multimodal-chatbot.ipynb)
+- [Visual-language assistant with LLaVA and OpenVINO](./llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb)
 - [Text-to-Image Generation with LCM LoRA and ControlNet Conditioning](./latent-consistency-models-image-generation/lcm-lora-controlnet.ipynb)
 - [Latent Consistency Model using Optimum-Intel OpenVINO](./latent-consistency-models-image-generation/latent-consistency-models-optimum-demo.ipynb)
 - [Image generation with Latent Consistency Model and OpenVINO](./latent-consistency-models-image-generation/latent-consistency-models-image-generation.ipynb)

diff --git a/notebooks/llava-multimodal-chatbot/gradio_helper.py b/notebooks/llava-multimodal-chatbot/gradio_helper.py
@@ -28,20 +28,20 @@
 def make_demo_llava(model):
     import openvino_genai
     import openvino as ov
-    
+
     def read_image(path: str) -> ov.Tensor:
-        '''
+        """
 
         Args:
             path: The path to the image.
 
         Returns: the ov.Tensor containing the image.
 
-        '''
+        """
         pic = Image.open(path).convert("RGB")
         image_data = np.array(pic.getdata()).reshape(1, 3, pic.size[1], pic.size[0]).astype(np.byte)
         return ov.Tensor(image_data)
-    
+
     class TextQueue:
         def __init__(self) -> None:
             self.text_queue = Queue()
@@ -66,20 +66,18 @@ def reset(self):
 
         def end(self):
             self.text_queue.put(self.stop_signal)
-
-
 
     def bot_streaming(message, history):
         print(f"message is - {message}")
         print(f"history is - {history}")
-        
+
         if not history:
             model.start_chat()
         generation_config = openvino_genai.GenerationConfig()
         generation_config.max_new_tokens = 128
         files = message["files"] if isinstance(message, dict) else message.files
         message_text = message["text"] if isinstance(message, dict) else message.text
-        
+
         image = None
         if files:
             # message["files"][-1] is a Dict or just a string
@@ -97,11 +95,7 @@ def generate_and_signal_complete():
             genration function for single thread
             """
             streamer.reset()
-            generation_kwargs = {
-                "prompt": message_text,
-                "generation_config": generation_config,
-                "streamer": streamer
-            }
+            generation_kwargs = {"prompt": message_text, "generation_config": generation_config, "streamer": streamer}
             if image is not None:
                 generation_kwargs["image"] = image
             model.generate(**generation_kwargs)
@@ -115,7 +109,7 @@ def generate_and_signal_complete():
         for new_text in streamer:
             buffer += new_text
             yield buffer
-    
+
     demo = gr.ChatInterface(
         fn=bot_streaming,
         title="LLaVA OpenVINO Chatbot",
@@ -131,7 +125,6 @@ def generate_and_signal_complete():
     return demo
 
 
-
 def make_demo_videollava(fn: Callable):
     examples_dir = Path("Video-LLaVA/videollava/serve/examples")
     gr.close_all()

diff --git a/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb b/notebooks/llava-multimodal-chatbot/llava-multimodal-chatbot-genai.ipynb
@@ -235,16 +235,17 @@
     "\n",
     "core = ov.Core()\n",
     "\n",
+    "\n",
     "def compress_model_weights(precision):\n",
     "    int4_compression_config = {\n",
     "        \"mode\": nncf.CompressWeightsMode.INT4_ASYM,\n",
     "        \"group_size\": 128,\n",
     "        \"ratio\": 1,\n",
     "    }\n",
     "    int8_compression_config = {\"mode\": nncf.CompressWeightsMode.INT8_ASYM}\n",
-    "    \n",
+    "\n",
     "    compressed_model_path = model_path.parent / precision\n",
-    "    \n",
+    "\n",
     "    if not compressed_model_path.exists():\n",
     "        ov_model = core.read_model(model_path / \"openvino_language_model.xml\")\n",
     "        compression_config = int4_compression_config if precision == \"INT4\" else int8_compression_config\n",
@@ -258,7 +259,7 @@
     "                continue\n",
     "            shutil.copy(file_name, compressed_model_path)\n",
     "\n",
-    "            \n",
+    "\n",
     "compress_model_weights(compression_mode.value)"
    ]
   },
@@ -407,7 +408,7 @@
     "### Load OpenVINO model\n",
     "[back to top ⬆️](#Table-of-contents:)\n",
     "\n",
-    "For pipeline initialization we should provide path to model directory and infernce device."
+    "For pipeline initialization we should provide path to model directory and inference device."
    ]
   },
   {
@@ -441,7 +442,7 @@
     "### Prepare input data\n",
     "[back to top ⬆️](#Table-of-contents:)\n",
     "\n",
-    "For preparing input data, VLPPipeline use tokenizer and image processor inside, we just need to convert image to input OpenVINO tensor and provide question as string. Additionally, we can provides options for controlling generation process (e.g. number of maximum generated tokens or using multinomial sampling for decoding instead of greedy search approach) using `GenerationConfig`.\n",
+    "For preparing input data, `VLMPipeline` use tokenizer and image processor inside, we just need to convert image to input OpenVINO tensor and provide question as string. Additionally, we can provides options for controlling generation process (e.g. number of maximum generated tokens or using multinomial sampling for decoding instead of greedy search approach) using `GenerationConfig`.\n",
     "\n",
     "Generation process for long response may be time consuming, for accessing partial result as soon as it is generated without waiting when whole process finished, Streaming API can be used. Token streaming is the mode in which the generative system returns the tokens one by one as the model generates them. This enables showing progressive generations to the user rather than waiting for the whole generation. Streaming is an essential aspect of the end-user experience as it reduces latency, one of the most critical aspects of a smooth experience."
    ]
@@ -458,7 +459,7 @@
     "from io import BytesIO\n",
     "import numpy as np\n",
     "\n",
-    "config  = GenerationConfig()\n",
+    "config = GenerationConfig()\n",
     "config.max_new_tokens = 100\n",
     "\n",
     "\n",
@@ -471,16 +472,17 @@
     "    image_data = np.array(image.getdata()).reshape(1, 3, image.size[1], image.size[0]).astype(np.byte)\n",
     "    return image, ov.Tensor(image_data)\n",
     "\n",
+    "\n",
     "def streamer(subword: str) -> bool:\n",
-    "    '''\n",
+    "    \"\"\"\n",
     "\n",
     "    Args:\n",
     "        subword: sub-word of the generated text.\n",
     "\n",
     "    Returns: Return flag corresponds whether generation should be stopped.\n",
     "\n",
-    "    '''\n",
-    "    print(subword, end='', flush=True)\n",
+    "    \"\"\"\n",
+    "    print(subword, end=\"\", flush=True)\n",
     "\n",
     "\n",
     "image_file = \"https://github.com/openvinotoolkit/openvino_notebooks/assets/29454499/d5fbbd1a-d484-415c-88cb-9986625b7b11\"\n",