Implement document scanner functionality

- Added code to handle document scanning - Updated README with installation instructions
HoDoTenHuy · Dec 20, 2023 · c4aad05 · c4aad05
1 parent 04bd2a6
commit c4aad05
Show file tree

Hide file tree

Showing 6 changed files with 208 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1 +1,42 @@
-# Document-Scanner
+# Document-Scanner
+
+# Install packages/modules
+Clone Project from repository:
+```
+git clone https://github.com/HoDoTenHuy/Document-Scanner.git
+```
+
+Create env using Conda:
+```
+conda create --name env_name python==3.8
+```
+
+Activate env:
+```
+conda activate env_name
+```
+
+To install the packages/modules, run the command:
+```
+python -m pip3 install -r requirements.txt
+```
+
+Run app:
+```
+python app.py --image path/to/image
+```
+
+# Building a Document Scanner with OpenCV
+
+Creating a document scanner with OpenCV is a straightforward process that involves three simple steps:
+
+## Step 1: Detect Edges
+Utilize OpenCV to detect edges in the image.
+
+## Step 2: Find Contour
+Use the detected edges to find the contour (outline) representing the piece of paper being scanned.
+
+## Step 3: Perspective Transform
+Apply a perspective transform to obtain the top-down view of the document.
+
+Follow these steps to implement your document scanner using OpenCV and enhance your document processing capabilities.
diff --git a/app.py b/app.py
@@ -0,0 +1,40 @@
+# import the necessary packages
+import argparse
+
+from helpers.document_scanner import *
+
+
+def main():
+    # construct the argument parser and parse the arguments
+    ap = argparse.ArgumentParser()
+    ap.add_argument("-i", "--image", default='data/test.PNG', help="Path to the image scanned")
+
+    args = vars(ap.parse_args())
+
+    image, edged, ratio, orig = detection_edge(path=args["image"])
+    # show the original and the edge detected images
+    print("Step 1: Edge Detection")
+    cv2.imshow("Image", image)
+    cv2.imshow("Edged", edged)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
+
+    screen_cnt = find_contours(edged=edged)
+    # show the contour (outline) of the piece of paper
+    print("Step 2: Find contours of paper")
+    cv2.drawContours(image, [screen_cnt], -1, (0, 255, 0), 2)
+    cv2.imshow("Outline", image)
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
+
+    warped = perspective_transform(orig=orig, screen_cnt=screen_cnt, ratio=ratio)
+    # show the original and scanned images
+    print("Step 3: Apply perspective transform")
+    cv2.imshow("Original", imutils.resize(orig, height=650))
+    cv2.imshow("Scanned", imutils.resize(warped, height=650))
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/data/test.PNG b/data/test.PNG
diff --git a/helpers/document_scanner.py b/helpers/document_scanner.py
@@ -0,0 +1,54 @@
+# import the necessary packages
+import cv2
+import imutils
+
+from skimage.filters import threshold_local
+from utils.common import four_point_transform
+
+
+def detection_edge(path):
+    """----- Step 1 -----"""
+    # load the image and compute the ratio of the old height to the new height, clone it, and resize it
+    image = cv2.imread(path)
+    ratio = image.shape[0] / 500.0
+    orig = image.copy()
+    image = imutils.resize(image, height=500)
+
+    # convert image to grayscale, blur it, amd find edges in the image
+    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+    gray - cv2.GaussianBlur(gray, (5, 5), 0)
+    edged = cv2.Canny(gray, 75, 200)
+    return image, edged, ratio, orig
+
+
+def find_contours(edged):
+    """----- Step 2 -----"""
+    # find the contours in the edged image, keeping only the largest ones, and initialize the screen contour
+    cnts = cv2.findContours(edged.copy(), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+    cnts = imutils.grab_contours(cnts)
+    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)[:5]
+
+    # loop over the contours
+    for c in cnts:
+        # approximate the contour
+        peri = cv2.arcLength(c, True)
+        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
+
+        # if our approximated contour has four points, then we can assume that we have found our screen
+        if len(approx) == 4:
+            screen_cnt = approx
+            break
+
+    return screen_cnt
+
+
+def perspective_transform(orig, screen_cnt, ratio):
+    """----- Step 3 -----"""
+    # apply the four points transform to obtain a top-down view of the original image
+    warped = four_point_transform(orig, screen_cnt.reshape(4, 2) * ratio)
+
+    # convert the warped image to grayscale, then threshold it to give it that 'black and white' paper effect
+    warped = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
+    T = threshold_local(warped, 11, offset=10, method='gaussian')
+    warped = (warped > T).astype("uint8") * 255
+    return warped
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+python-imagesearch==1.3.0
+scikit-image==0.21.0
+numpy==1.24.4
+argparse==1.4.0
+opencv-python==4.8.1.78
+imutils==0.5.4
diff --git a/utils/common.py b/utils/common.py
@@ -0,0 +1,66 @@
+# import the necessary packages
+import numpy as np
+import cv2
+
+
+def order_points(pts):
+    # initialzie a list of coordinates that will be ordered
+    # such that the first entry in the list is the top-left,
+    # the second entry is the top-right, the third is the
+    # bottom-right, and the fourth is the bottom-left
+    rect = np.zeros((4, 2), dtype="float32")
+
+    # the top-left point will have the smallest sum, whereas
+    # the bottom-right point will have the largest sum
+    s = pts.sum(axis=1)
+    rect[0] = pts[np.argmin(s)]
+    rect[2] = pts[np.argmax(s)]
+
+    # now, compute the difference between the points, the
+    # top-right point will have the smallest difference,
+    # whereas the bottom-left will have the largest difference
+    diff = np.diff(pts, axis=1)
+    rect[1] = pts[np.argmin(diff)]
+    rect[3] = pts[np.argmax(diff)]
+
+    # return the ordered coordinates
+    return rect
+
+
+def four_point_transform(image, pts):
+    # obtain a consistent order of the points and unpack them
+    # individually
+    rect = order_points(pts)
+    (tl, tr, br, bl) = rect
+
+    # compute the width of the new image, which will be the
+    # maximum distance between bottom-right and bottom-left
+    # x-coordiates or the top-right and top-left x-coordinates
+    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
+    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
+    maxWidth = max(int(widthA), int(widthB))
+
+    # compute the height of the new image, which will be the
+    # maximum distance between the top-right and bottom-right
+    # y-coordinates or the top-left and bottom-left y-coordinates
+    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
+    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
+    maxHeight = max(int(heightA), int(heightB))
+
+    # now that we have the dimensions of the new image, construct
+    # the set of destination points to obtain a "birds eye view",
+    # (i.e. top-down view) of the image, again specifying points
+    # in the top-left, top-right, bottom-right, and bottom-left
+    # order
+    dst = np.array([
+        [0, 0],
+        [maxWidth - 1, 0],
+        [maxWidth - 1, maxHeight - 1],
+        [0, maxHeight - 1]], dtype="float32")
+
+    # compute the perspective transform matrix and then apply it
+    M = cv2.getPerspectiveTransform(rect, dst)
+    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
+
+    # return the warped image
+    return warped