Add files via upload

thejaswin123 · Aug 19, 2021 · 374614d · 374614d
1 parent fc44d0b
commit 374614d
Show file tree

Hide file tree

Showing 5 changed files with 266 additions and 0 deletions.
diff --git a/Handwritten_text_Recog.ipynb b/Handwritten_text_Recog.ipynb
@@ -0,0 +1,149 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "The accuracy of the model is 83.44370860927152%\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "from tkinter import *\n",
+    "from tkinter import filedialog\n",
+    "import fpdf\n",
+    "import cv2\n",
+    "import numpy as np\n",
+    "\n",
+    "from PIL import Image\n",
+    "pytesseract.pytesseract.tesseract_cmd = r\"C:\\Program Files\\Tesseract-OCR\\tesseract.exe\"\n",
+    "import pytesseract\n",
+    "# Path of working folder on Disk\n",
+    "\n",
+    "def browseFiles():\n",
+    "    py=r\"*.png *.jpg *jpeg\"\n",
+    "    global result\n",
+    "    filename = filedialog.askopenfilename(initialdir = \"/\",title = \"Select a File\",filetypes = ((\"images\",py),\n",
+    "                                                                                                (\"all files\",\"*.*\")))\n",
+    "    if filename == \"\":\n",
+    "        return\n",
+    "    \n",
+    "    # Read image with opencv\n",
+    "    img = cv2.imread(filename)\n",
+    "\n",
+    "    # Convert to gray\n",
+    "    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
+    "\n",
+    "    # Apply dilation and erosion to remove some noise\n",
+    "    kernel = np.ones((1, 1), np.uint8)\n",
+    "    img = cv2.dilate(img, kernel, iterations=1)\n",
+    "    img = cv2.erode(img, kernel, iterations=1)\n",
+    "\n",
+    "    # Write image after removed noise\n",
+    "    cv2.imwrite(\"removed_noise.png\", img)\n",
+    "\n",
+    "    #  Apply threshold to get image with only black and white\n",
+    "    \n",
+    "\n",
+    "    # Write the image after apply opencv to do some ...\n",
+    "    cv2.imwrite(filename, img)\n",
+    "\n",
+    "    # Recognize text with tesseract for python\n",
+    "    result = pytesseract.image_to_string(Image.open(filename))\n",
+    "\n",
+    "    # Remove template file\n",
+    "    label_file_explorer.configure(text=result)\n",
+    "    \n",
+    "    \n",
+    "\n",
+    "def pdf():\n",
+    "    global result\n",
+    "    pdf = fpdf.FPDF(format='letter')\n",
+    "    pdf.add_page()\n",
+    "    pdf.set_font(\"Arial\", size=12)\n",
+    "    pdf.write(5,result)\n",
+    "    pdf.ln()\n",
+    "    pdf.output(\"converted.pdf\")\n",
+    "\n",
+    "window = Tk()\n",
+    "  \n",
+    "# Set window title\n",
+    "window.title('File Explorer')\n",
+    "  \n",
+    "# Set window size\n",
+    "window.geometry(\"700x350\")\n",
+    "reg_info = Label(window,text = \"Handwritten Text Recognition Using Pytesseract\",width='80',height='2',font= (\"ariel\",12,\"bold\"),fg = \"black\",bg='lightgrey')\n",
+    "reg_info.place(x=370,y=18,anchor='center')  \n",
+    "#Set window background color\n",
+    "window.config(background = \"white\")\n",
+    "  \n",
+    "# Create a File Explorer label\n",
+    "label_file_explorer = Label(window,\n",
+    "                            text = \"See the Output Here\",font= (\"ariel\",10,\"bold\"),\n",
+    "                            width = 90, height = 12,\n",
+    "                            fg = \"blue\")\n",
+    "  \n",
+    "label_file_explorer.place(x=0,y=35) \n",
+    "\n",
+    "button_explore = Button(window,\n",
+    "                        text = \"Browse Files\",fg=\"white\",bg=\"black\",font= (\"ariel\",10,\"bold\"),width=10,\n",
+    "                        command = browseFiles)\n",
+    "button_explore.place(x=250,y=270)\n",
+    "\n",
+    "text=Label(window,text=\"(Select an image)\",bg=\"white\",fg=\"black\",font= (\"ariel\",8,\"bold\"))\n",
+    "text.place(x=242,y=300)\n",
+    "\n",
+    "button1 = Button(window,\n",
+    "                        text = \"convert text to pdf\",fg=\"white\",bg=\"black\",font= (\"ariel\",10,\"bold\"),width=15,\n",
+    "                        command = pdf)\n",
+    "button1.place(x=370,y=270)\n",
+    "\n",
+    "window.mainloop()\n",
+    "\n",
+    "from difflib import SequenceMatcher\n",
+    "if result is not None:\n",
+    "    s=\"We start With good\\n\\nBecause all businesses should\\n\\nbe doing something good\"\n",
+    "    s1=result\n",
+    "    def similar(a, b):\n",
+    "        return \"\\nThe accuracy of the model is \"+str(SequenceMatcher(None, a, b).ratio()*100)+\"%\\n\"\n",
+    "    print(similar(s,s1))\n",
+    "    result=None"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/Handwritten_text_Recog.py b/Handwritten_text_Recog.py
@@ -0,0 +1,112 @@
+#!/usr/bin/env python
+# coding: utf-8
+
+# In[29]:
+
+
+from tkinter import *
+from tkinter import filedialog
+import fpdf
+import cv2
+import numpy as np
+
+from PIL import Image
+pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+import pytesseract
+# Path of working folder on Disk
+
+def browseFiles():
+    py=r"*.png *.jpg *jpeg"
+    global result
+    filename = filedialog.askopenfilename(initialdir = "/",title = "Select a File",filetypes = (("images",py),
+                                                                                                ("all files","*.*")))
+    if filename == "":
+        return
+
+    # Read image with opencv
+    img = cv2.imread(filename)
+
+    # Convert to gray
+    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+
+    # Apply dilation and erosion to remove some noise
+    kernel = np.ones((1, 1), np.uint8)
+    img = cv2.dilate(img, kernel, iterations=1)
+    img = cv2.erode(img, kernel, iterations=1)
+
+    # Write image after removed noise
+    cv2.imwrite("removed_noise.png", img)
+
+    #  Apply threshold to get image with only black and white
+
+
+    # Write the image after apply opencv to do some ...
+    cv2.imwrite(filename, img)
+
+    # Recognize text with tesseract for python
+    result = pytesseract.image_to_string(Image.open(filename))
+
+    # Remove template file
+    label_file_explorer.configure(text=result)
+
+
+
+def pdf():
+    global result
+    pdf = fpdf.FPDF(format='letter')
+    pdf.add_page()
+    pdf.set_font("Arial", size=12)
+    pdf.write(5,result)
+    pdf.ln()
+    pdf.output("converted.pdf")
+
+window = Tk()
+
+# Set window title
+window.title('File Explorer')
+
+# Set window size
+window.geometry("700x350")
+reg_info = Label(window,text = "Handwritten Text Recognition Using Pytesseract",width='80',height='2',font= ("ariel",12,"bold"),fg = "black",bg='lightgrey')
+reg_info.place(x=370,y=18,anchor='center')  
+#Set window background color
+window.config(background = "white")
+
+# Create a File Explorer label
+label_file_explorer = Label(window,
+                            text = "See the Output Here",font= ("ariel",10,"bold"),
+                            width = 90, height = 12,
+                            fg = "blue")
+
+label_file_explorer.place(x=0,y=35) 
+
+button_explore = Button(window,
+                        text = "Browse Files",fg="white",bg="black",font= ("ariel",10,"bold"),width=10,
+                        command = browseFiles)
+button_explore.place(x=250,y=270)
+
+text=Label(window,text="(Select an image)",bg="white",fg="black",font= ("ariel",8,"bold"))
+text.place(x=242,y=300)
+
+button1 = Button(window,
+                        text = "convert text to pdf",fg="white",bg="black",font= ("ariel",10,"bold"),width=15,
+                        command = pdf)
+button1.place(x=370,y=270)
+
+window.mainloop()
+
+from difflib import SequenceMatcher
+if result is not None:
+    s="We start With good\n\nBecause all businesses should\n\nbe doing something good"
+    s1=result
+    def similar(a, b):
+        return "\nThe accuracy of the model is "+str(SequenceMatcher(None, a, b).ratio()*100)+"%\n"
+    print(similar(s,s1))
+    result=None
+
+
+# In[ ]:
+
+
+
+
diff --git a/SAMPLE1.jpg b/SAMPLE1.jpg
diff --git a/Screenshot .png b/Screenshot .png
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,5 @@
+fpdf==1.7.2
+opencv_python==4.5.1.48
+numpy==1.20.0
+Pillow==8.2.0
+pytesseract==0.3.8