larymak · larymak · Mar 28, 2022 · Mar 26, 2022 · Mar 27, 2022
diff --git a/IMAGES & PHOTO SCRIPTS/Extract Texts from Image/README.md b/IMAGES & PHOTO SCRIPTS/Extract Texts from Image/README.md
@@ -0,0 +1,24 @@
+# Extract text from images in a given directory
+
+## Description
+This script will extract the text from images in a specified directory and store the output in a given .txt file. The .txt file will contain the text contents of the images in order of their presence in the given directory.
+
+## Requirements
+
+`$ pip install Pillow`
+`$ pip install pytesseract`
+
+Download and install the required tesseract.exe file here: https://osdn.net/projects/sfnet_tesseract-ocr-alt/downloads/tesseract-ocr-setup-3.02.02.exe/
+
+## Steps To Execution
+- Fork this repo and navigate to Extract Text From Image folder in local folder
+- Edit `image-text.py` with the string for the images directory.
+- Run this code like so; `$ python image-text.py`
+- In a short bit you'd have the .txt file with the texts extracted
+- Enjoy and goodluck on your freelancing copy-typing jobs! (how the script idea came to be. Really couldn't type out text in TONS of image files lol)
+
+## Code Output
+`"IMAGE_TITLE" done` for each image in directory when text extraction is complete for said image
+`Text extract script completed!` - at the end of the script.
+
+Hit `Ctrl-C` to exit script.
diff --git a/IMAGES & PHOTO SCRIPTS/Extract Texts from Image/image-text.py b/IMAGES & PHOTO SCRIPTS/Extract Texts from Image/image-text.py
@@ -0,0 +1,36 @@
+import os
+import pytesseract
+import signal
+import time
+from PIL import Image
+from os import closerange
+
+def handler(signum, frame):
+    print("Text extraction script exited!")
+    exit(1)
+
+signal.signal(signal.SIGINT, handler)
+
+directory = os.fsencode(r"image files directory")
+directory_in_str = r"image files directory"
+
+for file in os.listdir(directory):
+    filename = os.fsdecode(file)
+    if filename.endswith(".img") or filename.endswith(".jpeg") or filename.endswith(".jpg"):
+        image = os.path.join(directory_in_str, filename)
+
+        # check Program Files(x86) for tesseract.exe (Windows machines)
+        pytesseract.pytesseract.tesseract_cmd = r"tesseract.exe directory"
+
+        text = pytesseract.image_to_string(Image.open(image), lang="eng")
+        with open("output.txt", "a", encoding='utf-8') as o:
+            print(os.path.basename(image) + "\r" + os.path.basename(image) + " done")
+            o.write('\n\n\n[NEW IMAGE]\n')
+            o.write(image)
+            o.write('\n')
+            o.write(text)
+        continue
+    else:
+        continue
+
+print("Text extract script completed!")