add handwriting dataset generate tools

mcshih · Feb 19, 2022 · df921be · df921be
1 parent dda8bf6
commit df921be
Show file tree

Hide file tree

Showing 2 changed files with 109 additions and 0 deletions.
diff --git a/IAM_brush.py b/IAM_brush.py
@@ -0,0 +1,28 @@
+'''
+Make images transparent
+'''
+import cv2
+import numpy as np
+import os
+from tqdm import tqdm
+
+for root, dirnames, filenames in os.walk("/home/user/ACM/shih/IAM/words/"):
+    pbar = tqdm(filenames)
+    for filename in pbar:
+        path = os.path.join(root, filename)
+        try:
+            img = cv2.imread(path)
+
+            result = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
+
+            for i in range(0,img.shape[0]):
+                for j in range(0,img.shape[1]):
+                    if img[i,j,0] > 200 and img[i,j,1] > 200 and img[i,j,2] > 200:
+                        result[i,j,3] = 0
+
+            root = root.replace("/words/", "/words_a/")
+            pbar.set_description("Processing %s" % os.path.join(root, filename))
+            cv2.imwrite(os.path.join(root, filename), result, [int(cv2.IMWRITE_PNG_COMPRESSION), 0])
+        except:
+            print(path, "NOT SUCESS!")
+
diff --git a/handwriting_stamp_generator.py b/handwriting_stamp_generator.py
@@ -0,0 +1,81 @@
+import cv2
+import numpy as np
+import os
+import json
+import random
+import hashlib
+from tqdm import tqdm
+
+# Word List
+word_file_list = []
+for root, dirnames, filenames in os.walk("/home/user/ACM/shih/IAM/words_a/"):
+    for filename in filenames:
+        path = os.path.join(root, filename)
+        word_file_list.append(path)
+
+def images_process(image_path, image_final_name):
+    result_path = "/home/user/ACM/shih/DDI-100/my_dataset/"
+
+    img = cv2.imread(image_path, cv2.IMREAD_UNCHANGED)
+    canvas = np.copy(img)
+    # print(img.shape)
+    height, width = img.shape[:2]
+    N_word = random.randint(1, 20)
+    N_selected_words = random.sample(word_file_list, N_word)
+
+    polys = []
+    for idx, word_path in enumerate(N_selected_words):
+        try:
+            word_img = cv2.imread(word_path, cv2.IMREAD_UNCHANGED)
+            word_height, word_width = word_img.shape[:2]
+
+            height_ = random.randint(word_height+5, height-word_height-5)
+            width_ = random.randint(word_width+5, width-word_width-5)
+        except:
+            print("ERROR:",image_path, word_path)
+            continue
+
+        alpha_s = word_img[:, :, 3] / 255.0
+        alpha_l = 1.0 - alpha_s
+
+        # Rect: x, y, width, height
+        Rect = [(width_, height_), (word_width, word_height), 0]
+        # print('#{}(shape:{}): {}'.format(idx, word_img.shape,Rect))
+        rectCnt = np.int_(cv2.boxPoints(Rect))
+        polys.append(cv2.boxPoints(Rect).tolist())
+        #print(rectCnt)
+
+        # draw bbox
+        # cv2.drawContours(canvas, [rectCnt], 0, (0,255,0), 3)
+
+        y1= min(rectCnt[:,1])
+        x1= min(rectCnt[:,0])
+        for c in range(0, 3):
+            canvas[y1:y1+word_height, x1:x1+word_width, c] = (alpha_s * word_img[:, :, c] +alpha_l * canvas[y1:y1+word_height, x1:x1+word_width, c])
+    cv2.imwrite(os.path.join(result_path, image_final_name), canvas)
+
+    # perform annotations
+    img_dict = {}
+    img_dict['img_dimensions'] = (height, width)
+    with open(os.path.join(result_path, image_final_name), "rb") as f:
+        img_dict['img_hash'] = hashlib.sha256(f.read()).hexdigest()
+    img_dict['polygons'] = polys
+    return img_dict
+
+
+# main
+
+#images_process("/home/user/ACM/shih/DDI-100/dataset_v1.3/01/orig_texts/0.png")
+
+label_file = "/home/user/ACM/shih/DDI-100/05_my_labels.json"
+labels_dict = {}
+origin_img_folder = "/home/user/ACM/shih/DDI-100/dataset_v1.3/05/orig_texts/"
+pbar = tqdm(os.listdir(origin_img_folder))
+for doc in pbar:
+    #print(doc)
+    save_name = "05_"+doc
+    labels_dict[save_name] =  images_process(os.path.join(origin_img_folder,doc), save_name)
+with open(label_file, "w") as outfile:
+    json.dump(labels_dict, outfile, indent = 4)
+    outfile.close()
+