-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement document scanner functionality
- Added code to handle document scanning - Updated README with installation instructions
- Loading branch information
1 parent
04bd2a6
commit c4aad05
Showing
6 changed files
with
208 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,42 @@ | ||
# Document-Scanner | ||
# Document-Scanner | ||
|
||
# Install packages/modules | ||
Clone Project from repository: | ||
``` | ||
git clone https://github.com/HoDoTenHuy/Document-Scanner.git | ||
``` | ||
|
||
Create env using Conda: | ||
``` | ||
conda create --name env_name python==3.8 | ||
``` | ||
|
||
Activate env: | ||
``` | ||
conda activate env_name | ||
``` | ||
|
||
To install the packages/modules, run the command: | ||
``` | ||
python -m pip3 install -r requirements.txt | ||
``` | ||
|
||
Run app: | ||
``` | ||
python app.py --image path/to/image | ||
``` | ||
|
||
# Building a Document Scanner with OpenCV | ||
|
||
Creating a document scanner with OpenCV is a straightforward process that involves three simple steps: | ||
|
||
## Step 1: Detect Edges | ||
Utilize OpenCV to detect edges in the image. | ||
|
||
## Step 2: Find Contour | ||
Use the detected edges to find the contour (outline) representing the piece of paper being scanned. | ||
|
||
## Step 3: Perspective Transform | ||
Apply a perspective transform to obtain the top-down view of the document. | ||
|
||
Follow these steps to implement your document scanner using OpenCV and enhance your document processing capabilities. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# import the necessary packages | ||
import argparse | ||
|
||
from helpers.document_scanner import * | ||
|
||
|
||
def main(): | ||
# construct the argument parser and parse the arguments | ||
ap = argparse.ArgumentParser() | ||
ap.add_argument("-i", "--image", default='data/test.PNG', help="Path to the image scanned") | ||
|
||
args = vars(ap.parse_args()) | ||
|
||
image, edged, ratio, orig = detection_edge(path=args["image"]) | ||
# show the original and the edge detected images | ||
print("Step 1: Edge Detection") | ||
cv2.imshow("Image", image) | ||
cv2.imshow("Edged", edged) | ||
cv2.waitKey(0) | ||
cv2.destroyAllWindows() | ||
|
||
screen_cnt = find_contours(edged=edged) | ||
# show the contour (outline) of the piece of paper | ||
print("Step 2: Find contours of paper") | ||
cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 2) | ||
cv2.imshow("Outline", image) | ||
cv2.waitKey(0) | ||
cv2.destroyAllWindows() | ||
|
||
warped = perspective_transform(orig=orig, screen_cnt=screen_cnt, ratio=ratio) | ||
# show the original and scanned images | ||
print("Step 3: Apply perspective transform") | ||
cv2.imshow("Original", imutils.resize(orig, height=650)) | ||
cv2.imshow("Scanned", imutils.resize(warped, height=650)) | ||
cv2.waitKey(0) | ||
cv2.destroyAllWindows() | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# import the necessary packages | ||
import cv2 | ||
import imutils | ||
|
||
from skimage.filters import threshold_local | ||
from utils.common import four_point_transform | ||
|
||
|
||
def detection_edge(path): | ||
"""----- Step 1 -----""" | ||
# load the image and compute the ratio of the old height to the new height, clone it, and resize it | ||
image = cv2.imread(path) | ||
ratio = image.shape[0] / 500.0 | ||
orig = image.copy() | ||
image = imutils.resize(image, height=500) | ||
|
||
# convert image to grayscale, blur it, amd find edges in the image | ||
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | ||
gray - cv2.GaussianBlur(gray, (5, 5), 0) | ||
edged = cv2.Canny(gray, 75, 200) | ||
return image, edged, ratio, orig | ||
|
||
|
||
def find_contours(edged): | ||
"""----- Step 2 -----""" | ||
# find the contours in the edged image, keeping only the largest ones, and initialize the screen contour | ||
cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE) | ||
cnts = imutils.grab_contours(cnts) | ||
cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5] | ||
|
||
# loop over the contours | ||
for c in cnts: | ||
# approximate the contour | ||
peri = cv2.arcLength(c, True) | ||
approx = cv2.approxPolyDP(c, 0.02 * peri, True) | ||
|
||
# if our approximated contour has four points, then we can assume that we have found our screen | ||
if len(approx) == 4: | ||
screen_cnt = approx | ||
break | ||
|
||
return screen_cnt | ||
|
||
|
||
def perspective_transform(orig, screen_cnt, ratio): | ||
"""----- Step 3 -----""" | ||
# apply the four points transform to obtain a top-down view of the original image | ||
warped = four_point_transform(orig, screen_cnt.reshape(4, 2) * ratio) | ||
|
||
# convert the warped image to grayscale, then threshold it to give it that 'black and white' paper effect | ||
warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY) | ||
T = threshold_local(warped, 11, offset=10, method='gaussian') | ||
warped = (warped > T).astype("uint8") * 255 | ||
return warped |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
python-imagesearch==1.3.0 | ||
scikit-image==0.21.0 | ||
numpy==1.24.4 | ||
argparse==1.4.0 | ||
opencv-python==4.8.1.78 | ||
imutils==0.5.4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# import the necessary packages | ||
import numpy as np | ||
import cv2 | ||
|
||
|
||
def order_points(pts): | ||
# initialzie a list of coordinates that will be ordered | ||
# such that the first entry in the list is the top-left, | ||
# the second entry is the top-right, the third is the | ||
# bottom-right, and the fourth is the bottom-left | ||
rect = np.zeros((4, 2), dtype="float32") | ||
|
||
# the top-left point will have the smallest sum, whereas | ||
# the bottom-right point will have the largest sum | ||
s = pts.sum(axis=1) | ||
rect[0] = pts[np.argmin(s)] | ||
rect[2] = pts[np.argmax(s)] | ||
|
||
# now, compute the difference between the points, the | ||
# top-right point will have the smallest difference, | ||
# whereas the bottom-left will have the largest difference | ||
diff = np.diff(pts, axis=1) | ||
rect[1] = pts[np.argmin(diff)] | ||
rect[3] = pts[np.argmax(diff)] | ||
|
||
# return the ordered coordinates | ||
return rect | ||
|
||
|
||
def four_point_transform(image, pts): | ||
# obtain a consistent order of the points and unpack them | ||
# individually | ||
rect = order_points(pts) | ||
(tl, tr, br, bl) = rect | ||
|
||
# compute the width of the new image, which will be the | ||
# maximum distance between bottom-right and bottom-left | ||
# x-coordiates or the top-right and top-left x-coordinates | ||
widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2)) | ||
widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2)) | ||
maxWidth = max(int(widthA), int(widthB)) | ||
|
||
# compute the height of the new image, which will be the | ||
# maximum distance between the top-right and bottom-right | ||
# y-coordinates or the top-left and bottom-left y-coordinates | ||
heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2)) | ||
heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2)) | ||
maxHeight = max(int(heightA), int(heightB)) | ||
|
||
# now that we have the dimensions of the new image, construct | ||
# the set of destination points to obtain a "birds eye view", | ||
# (i.e. top-down view) of the image, again specifying points | ||
# in the top-left, top-right, bottom-right, and bottom-left | ||
# order | ||
dst = np.array([ | ||
[0, 0], | ||
[maxWidth - 1, 0], | ||
[maxWidth - 1, maxHeight - 1], | ||
[0, maxHeight - 1]], dtype="float32") | ||
|
||
# compute the perspective transform matrix and then apply it | ||
M = cv2.getPerspectiveTransform(rect, dst) | ||
warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight)) | ||
|
||
# return the warped image | ||
return warped |