-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
fc44d0b
commit 374614d
Showing
5 changed files
with
266 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 29, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"The accuracy of the model is 83.44370860927152%\n", | ||
"\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"from tkinter import *\n", | ||
"from tkinter import filedialog\n", | ||
"import fpdf\n", | ||
"import cv2\n", | ||
"import numpy as np\n", | ||
"\n", | ||
"from PIL import Image\n", | ||
"pytesseract.pytesseract.tesseract_cmd = r\"C:\\Program Files\\Tesseract-OCR\\tesseract.exe\"\n", | ||
"import pytesseract\n", | ||
"# Path of working folder on Disk\n", | ||
"\n", | ||
"def browseFiles():\n", | ||
" py=r\"*.png *.jpg *jpeg\"\n", | ||
" global result\n", | ||
" filename = filedialog.askopenfilename(initialdir = \"/\",title = \"Select a File\",filetypes = ((\"images\",py),\n", | ||
" (\"all files\",\"*.*\")))\n", | ||
" if filename == \"\":\n", | ||
" return\n", | ||
" \n", | ||
" # Read image with opencv\n", | ||
" img = cv2.imread(filename)\n", | ||
"\n", | ||
" # Convert to gray\n", | ||
" img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)\n", | ||
"\n", | ||
" # Apply dilation and erosion to remove some noise\n", | ||
" kernel = np.ones((1, 1), np.uint8)\n", | ||
" img = cv2.dilate(img, kernel, iterations=1)\n", | ||
" img = cv2.erode(img, kernel, iterations=1)\n", | ||
"\n", | ||
" # Write image after removed noise\n", | ||
" cv2.imwrite(\"removed_noise.png\", img)\n", | ||
"\n", | ||
" # Apply threshold to get image with only black and white\n", | ||
" \n", | ||
"\n", | ||
" # Write the image after apply opencv to do some ...\n", | ||
" cv2.imwrite(filename, img)\n", | ||
"\n", | ||
" # Recognize text with tesseract for python\n", | ||
" result = pytesseract.image_to_string(Image.open(filename))\n", | ||
"\n", | ||
" # Remove template file\n", | ||
" label_file_explorer.configure(text=result)\n", | ||
" \n", | ||
" \n", | ||
"\n", | ||
"def pdf():\n", | ||
" global result\n", | ||
" pdf = fpdf.FPDF(format='letter')\n", | ||
" pdf.add_page()\n", | ||
" pdf.set_font(\"Arial\", size=12)\n", | ||
" pdf.write(5,result)\n", | ||
" pdf.ln()\n", | ||
" pdf.output(\"converted.pdf\")\n", | ||
"\n", | ||
"window = Tk()\n", | ||
" \n", | ||
"# Set window title\n", | ||
"window.title('File Explorer')\n", | ||
" \n", | ||
"# Set window size\n", | ||
"window.geometry(\"700x350\")\n", | ||
"reg_info = Label(window,text = \"Handwritten Text Recognition Using Pytesseract\",width='80',height='2',font= (\"ariel\",12,\"bold\"),fg = \"black\",bg='lightgrey')\n", | ||
"reg_info.place(x=370,y=18,anchor='center') \n", | ||
"#Set window background color\n", | ||
"window.config(background = \"white\")\n", | ||
" \n", | ||
"# Create a File Explorer label\n", | ||
"label_file_explorer = Label(window,\n", | ||
" text = \"See the Output Here\",font= (\"ariel\",10,\"bold\"),\n", | ||
" width = 90, height = 12,\n", | ||
" fg = \"blue\")\n", | ||
" \n", | ||
"label_file_explorer.place(x=0,y=35) \n", | ||
"\n", | ||
"button_explore = Button(window,\n", | ||
" text = \"Browse Files\",fg=\"white\",bg=\"black\",font= (\"ariel\",10,\"bold\"),width=10,\n", | ||
" command = browseFiles)\n", | ||
"button_explore.place(x=250,y=270)\n", | ||
"\n", | ||
"text=Label(window,text=\"(Select an image)\",bg=\"white\",fg=\"black\",font= (\"ariel\",8,\"bold\"))\n", | ||
"text.place(x=242,y=300)\n", | ||
"\n", | ||
"button1 = Button(window,\n", | ||
" text = \"convert text to pdf\",fg=\"white\",bg=\"black\",font= (\"ariel\",10,\"bold\"),width=15,\n", | ||
" command = pdf)\n", | ||
"button1.place(x=370,y=270)\n", | ||
"\n", | ||
"window.mainloop()\n", | ||
"\n", | ||
"from difflib import SequenceMatcher\n", | ||
"if result is not None:\n", | ||
" s=\"We start With good\\n\\nBecause all businesses should\\n\\nbe doing something good\"\n", | ||
" s1=result\n", | ||
" def similar(a, b):\n", | ||
" return \"\\nThe accuracy of the model is \"+str(SequenceMatcher(None, a, b).ratio()*100)+\"%\\n\"\n", | ||
" print(similar(s,s1))\n", | ||
" result=None" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.8.5" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 4 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#!/usr/bin/env python | ||
# coding: utf-8 | ||
|
||
# In[29]: | ||
|
||
|
||
from tkinter import * | ||
from tkinter import filedialog | ||
import fpdf | ||
import cv2 | ||
import numpy as np | ||
|
||
from PIL import Image | ||
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" | ||
import pytesseract | ||
# Path of working folder on Disk | ||
|
||
def browseFiles(): | ||
py=r"*.png *.jpg *jpeg" | ||
global result | ||
filename = filedialog.askopenfilename(initialdir = "/",title = "Select a File",filetypes = (("images",py), | ||
("all files","*.*"))) | ||
if filename == "": | ||
return | ||
|
||
# Read image with opencv | ||
img = cv2.imread(filename) | ||
|
||
# Convert to gray | ||
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | ||
|
||
# Apply dilation and erosion to remove some noise | ||
kernel = np.ones((1, 1), np.uint8) | ||
img = cv2.dilate(img, kernel, iterations=1) | ||
img = cv2.erode(img, kernel, iterations=1) | ||
|
||
# Write image after removed noise | ||
cv2.imwrite("removed_noise.png", img) | ||
|
||
# Apply threshold to get image with only black and white | ||
|
||
|
||
# Write the image after apply opencv to do some ... | ||
cv2.imwrite(filename, img) | ||
|
||
# Recognize text with tesseract for python | ||
result = pytesseract.image_to_string(Image.open(filename)) | ||
|
||
# Remove template file | ||
label_file_explorer.configure(text=result) | ||
|
||
|
||
|
||
def pdf(): | ||
global result | ||
pdf = fpdf.FPDF(format='letter') | ||
pdf.add_page() | ||
pdf.set_font("Arial", size=12) | ||
pdf.write(5,result) | ||
pdf.ln() | ||
pdf.output("converted.pdf") | ||
|
||
window = Tk() | ||
|
||
# Set window title | ||
window.title('File Explorer') | ||
|
||
# Set window size | ||
window.geometry("700x350") | ||
reg_info = Label(window,text = "Handwritten Text Recognition Using Pytesseract",width='80',height='2',font= ("ariel",12,"bold"),fg = "black",bg='lightgrey') | ||
reg_info.place(x=370,y=18,anchor='center') | ||
#Set window background color | ||
window.config(background = "white") | ||
|
||
# Create a File Explorer label | ||
label_file_explorer = Label(window, | ||
text = "See the Output Here",font= ("ariel",10,"bold"), | ||
width = 90, height = 12, | ||
fg = "blue") | ||
|
||
label_file_explorer.place(x=0,y=35) | ||
|
||
button_explore = Button(window, | ||
text = "Browse Files",fg="white",bg="black",font= ("ariel",10,"bold"),width=10, | ||
command = browseFiles) | ||
button_explore.place(x=250,y=270) | ||
|
||
text=Label(window,text="(Select an image)",bg="white",fg="black",font= ("ariel",8,"bold")) | ||
text.place(x=242,y=300) | ||
|
||
button1 = Button(window, | ||
text = "convert text to pdf",fg="white",bg="black",font= ("ariel",10,"bold"),width=15, | ||
command = pdf) | ||
button1.place(x=370,y=270) | ||
|
||
window.mainloop() | ||
|
||
from difflib import SequenceMatcher | ||
if result is not None: | ||
s="We start With good\n\nBecause all businesses should\n\nbe doing something good" | ||
s1=result | ||
def similar(a, b): | ||
return "\nThe accuracy of the model is "+str(SequenceMatcher(None, a, b).ratio()*100)+"%\n" | ||
print(similar(s,s1)) | ||
result=None | ||
|
||
|
||
# In[ ]: | ||
|
||
|
||
|
||
|
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
fpdf==1.7.2 | ||
opencv_python==4.5.1.48 | ||
numpy==1.20.0 | ||
Pillow==8.2.0 | ||
pytesseract==0.3.8 |