Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
thejaswin123 authored Aug 19, 2021
1 parent fc44d0b commit 374614d
Show file tree
Hide file tree
Showing 5 changed files with 266 additions and 0 deletions.
149 changes: 149 additions & 0 deletions Handwritten_text_Recog.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"The accuracy of the model is 83.44370860927152%\n",
"\n"
]
}
],
"source": [
"from tkinter import *\n",
"from tkinter import filedialog\n",
"import fpdf\n",
"import cv2\n",
"import numpy as np\n",
"\n",
"from PIL import Image\n",
"pytesseract.pytesseract.tesseract_cmd = r\"C:\\Program Files\\Tesseract-OCR\\tesseract.exe\"\n",
"import pytesseract\n",
"# Path of working folder on Disk\n",
"\n",
"def browseFiles():\n",
" py=r\"*.png *.jpg *jpeg\"\n",
" global result\n",
" filename = filedialog.askopenfilename(initialdir = \"/\",title = \"Select a File\",filetypes = ((\"images\",py),\n",
" (\"all files\",\"*.*\")))\n",
" if filename == \"\":\n",
" return\n",
" \n",
" # Read image with opencv\n",
" img = cv2.imread(filename)\n",
"\n",
" # Convert to gray\n",
" img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n",
"\n",
" # Apply dilation and erosion to remove some noise\n",
" kernel = np.ones((1, 1), np.uint8)\n",
" img = cv2.dilate(img, kernel, iterations=1)\n",
" img = cv2.erode(img, kernel, iterations=1)\n",
"\n",
" # Write image after removed noise\n",
" cv2.imwrite(\"removed_noise.png\", img)\n",
"\n",
" # Apply threshold to get image with only black and white\n",
" \n",
"\n",
" # Write the image after apply opencv to do some ...\n",
" cv2.imwrite(filename, img)\n",
"\n",
" # Recognize text with tesseract for python\n",
" result = pytesseract.image_to_string(Image.open(filename))\n",
"\n",
" # Remove template file\n",
" label_file_explorer.configure(text=result)\n",
" \n",
" \n",
"\n",
"def pdf():\n",
" global result\n",
" pdf = fpdf.FPDF(format='letter')\n",
" pdf.add_page()\n",
" pdf.set_font(\"Arial\", size=12)\n",
" pdf.write(5,result)\n",
" pdf.ln()\n",
" pdf.output(\"converted.pdf\")\n",
"\n",
"window = Tk()\n",
" \n",
"# Set window title\n",
"window.title('File Explorer')\n",
" \n",
"# Set window size\n",
"window.geometry(\"700x350\")\n",
"reg_info = Label(window,text = \"Handwritten Text Recognition Using Pytesseract\",width='80',height='2',font= (\"ariel\",12,\"bold\"),fg = \"black\",bg='lightgrey')\n",
"reg_info.place(x=370,y=18,anchor='center') \n",
"#Set window background color\n",
"window.config(background = \"white\")\n",
" \n",
"# Create a File Explorer label\n",
"label_file_explorer = Label(window,\n",
" text = \"See the Output Here\",font= (\"ariel\",10,\"bold\"),\n",
" width = 90, height = 12,\n",
" fg = \"blue\")\n",
" \n",
"label_file_explorer.place(x=0,y=35) \n",
"\n",
"button_explore = Button(window,\n",
" text = \"Browse Files\",fg=\"white\",bg=\"black\",font= (\"ariel\",10,\"bold\"),width=10,\n",
" command = browseFiles)\n",
"button_explore.place(x=250,y=270)\n",
"\n",
"text=Label(window,text=\"(Select an image)\",bg=\"white\",fg=\"black\",font= (\"ariel\",8,\"bold\"))\n",
"text.place(x=242,y=300)\n",
"\n",
"button1 = Button(window,\n",
" text = \"convert text to pdf\",fg=\"white\",bg=\"black\",font= (\"ariel\",10,\"bold\"),width=15,\n",
" command = pdf)\n",
"button1.place(x=370,y=270)\n",
"\n",
"window.mainloop()\n",
"\n",
"from difflib import SequenceMatcher\n",
"if result is not None:\n",
" s=\"We start With good\\n\\nBecause all businesses should\\n\\nbe doing something good\"\n",
" s1=result\n",
" def similar(a, b):\n",
" return \"\\nThe accuracy of the model is \"+str(SequenceMatcher(None, a, b).ratio()*100)+\"%\\n\"\n",
" print(similar(s,s1))\n",
" result=None"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
112 changes: 112 additions & 0 deletions Handwritten_text_Recog.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env python
# coding: utf-8

# In[29]:


from tkinter import *
from tkinter import filedialog
import fpdf
import cv2
import numpy as np

from PIL import Image
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
import pytesseract
# Path of working folder on Disk

def browseFiles():
py=r"*.png *.jpg *jpeg"
global result
filename = filedialog.askopenfilename(initialdir = "/",title = "Select a File",filetypes = (("images",py),
("all files","*.*")))
if filename == "":
return

# Read image with opencv
img = cv2.imread(filename)

# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)

# Write image after removed noise
cv2.imwrite("removed_noise.png", img)

# Apply threshold to get image with only black and white


# Write the image after apply opencv to do some ...
cv2.imwrite(filename, img)

# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(filename))

# Remove template file
label_file_explorer.configure(text=result)



def pdf():
global result
pdf = fpdf.FPDF(format='letter')
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.write(5,result)
pdf.ln()
pdf.output("converted.pdf")

window = Tk()

# Set window title
window.title('File Explorer')

# Set window size
window.geometry("700x350")
reg_info = Label(window,text = "Handwritten Text Recognition Using Pytesseract",width='80',height='2',font= ("ariel",12,"bold"),fg = "black",bg='lightgrey')
reg_info.place(x=370,y=18,anchor='center')
#Set window background color
window.config(background = "white")

# Create a File Explorer label
label_file_explorer = Label(window,
text = "See the Output Here",font= ("ariel",10,"bold"),
width = 90, height = 12,
fg = "blue")

label_file_explorer.place(x=0,y=35)

button_explore = Button(window,
text = "Browse Files",fg="white",bg="black",font= ("ariel",10,"bold"),width=10,
command = browseFiles)
button_explore.place(x=250,y=270)

text=Label(window,text="(Select an image)",bg="white",fg="black",font= ("ariel",8,"bold"))
text.place(x=242,y=300)

button1 = Button(window,
text = "convert text to pdf",fg="white",bg="black",font= ("ariel",10,"bold"),width=15,
command = pdf)
button1.place(x=370,y=270)

window.mainloop()

from difflib import SequenceMatcher
if result is not None:
s="We start With good\n\nBecause all businesses should\n\nbe doing something good"
s1=result
def similar(a, b):
return "\nThe accuracy of the model is "+str(SequenceMatcher(None, a, b).ratio()*100)+"%\n"
print(similar(s,s1))
result=None


# In[ ]:




Binary file added SAMPLE1.jpg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added Screenshot .png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
fpdf==1.7.2
opencv_python==4.5.1.48
numpy==1.20.0
Pillow==8.2.0
pytesseract==0.3.8

0 comments on commit 374614d

Please sign in to comment.