-
Notifications
You must be signed in to change notification settings - Fork 2
/
ocr_cv.py
1163 lines (847 loc) · 40.2 KB
/
ocr_cv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
Coder : ENG.omar
Version : v2.0B
version Date : 19 / 5 / 2023
Code Type : python
Title : Smart Parking System
Interpreter : cPython v3.11.0 [Compiler : MSC v.1933 AMD64]
"""
import os , sys , time , io
import cv2
import psutil
import cProfile
import textract
import pyautogui
import numpy as np
import parking_db as db
import pytesseract as tsr
import multiprocessing as mp
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
# TODO : finish CUDA accel function and use it in ocr_main()
# TODO : car plate / license id simple template detection
# TODO : tracking
# TODO : fix skew_angle most of time return 1 (some thing fails in de skew proccess)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def make_timer_obj ( frame , id_pos , timer , **extraArgs ) :
''' returns:
frame with reactangle and timer obj
'''
x1 = id_pos[0][0] + 500 #magic nums are text offset from center rectangle
y2 = id_pos[1][1] + 630
if timer > 1 :
text = str(timer) + 's'
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
color = (0, 0, 255)
thickness = 2
else :
text = 'processing...'
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1
color = (0, 255, 0)
thickness = 2
text_size, _ = cv2.getTextSize(text, font, font_scale, thickness)
text_x = (x1 - text_size[0]) // 2
text_y = (y2 + text_size[1]) // 2
frame_rect_timer = cv2.putText(frame , text, (text_x, text_y), font, font_scale, color, thickness)
return frame_rect_timer
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def make_rectangle_obj( frame , id_dimension , color , **extraArgs ) :
"""
1. specs of rectangle to be rendered in live video for guiding end user
2. get points to position helper rectangle in center
3. define rectangle object to be rendered on video frame
4. any thing ouside rectanble is blurred
---
* Returns:
rectangle_obj , [(x1,y1) , (x2 , y2)]
"""
#specs of rectangle to be rendered in live video for guiding end user
blue = (255 , 0 , 0)
green = ( 0 , 255 , 0)
red = ( 0 , 0 , 255)
frame_shape = extraArgs['_frame_shape']
x1 , x2 , y1 , y2 = [-1 for i in range(4)]
rec_spec = {
'top_left_coordinate' : tuple ,
'bot_right_coordinate' : tuple ,
'color' : red if color == 'red' else green ,
'thickness' : 1 ,
}
#keep separate copy of original video frame with out the rectangle and timer objects for better process
frame_to_show = cv2.UMat(np.array(frame.get())) #deep copy -> not share any data with frame (may take more mem. but efficent than COW copy + solved '=' problem)
if testing_mode == True : #TESTING
print (f"frame_to_show var type: {type(frame_to_show)}")
#get points to position helper rectangle in center
y_center , x_center = [ int(x) // 2 for x in frame_shape ] #frame_to_show has no shape attribute (UMat obj)
#top left
x1 , y1 = x_center - id_dimension[0] // 2 , y_center - id_dimension[1] // 2
#bot_right
x2 , y2 = x_center + id_dimension[0] // 2 , y_center + id_dimension[1] // 2
#make rec_in_center
rec_spec['top_left_coordinate'] , rec_spec['bot_right_coordinate'] = (x1 , y1) , (x2 , y2)
#focus on rectangle and blur all else
blur_kernel = (25 , 25)
#blur whole image save in copy then take a rectangle mask from original then compine vualla! youve blurred outside but inside crsytall clear
blured_1 = cv2.blur( frame_to_show , blur_kernel )
#mask
mask = np.zeros( (frame_shape[0] , frame_shape[1]) , dtype= np.uint8 )
mask[y1:y2 , x1:x2] = 255
#apply mask
frame_masked = cv2.bitwise_and( frame_to_show , frame_to_show , mask= mask) #out is anded with zeroes leaving only inner with origin values , (2nd param is ref only too keep org channles)
blured_1_masked = cv2.bitwise_and(blured_1 , blured_1 , mask= ~mask) #notice the negate in mask= ~mask
#compine
frame_to_show = cv2.add(frame_masked , blured_1_masked)
frame_rect_obj = cv2.rectangle ( frame_to_show , *rec_spec.values() )
pos = [(x1 , y1 ) , (x2 , y2)]
return frame_rect_obj , pos
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def get_pos ( frame_shape , id_dimension) :
''' return : [(x1 , y1 ) , (x2 , y2)] '''
y_center , x_center = [ int(x) // 2 for x in frame_shape ] #frame_to_show has no shape attribute (UMat obj)
#top left
x1 , y1 = x_center - id_dimension[0] // 2 , y_center - id_dimension[1] // 2
#bot_right
x2 , y2 = x_center + id_dimension[0] // 2 , y_center + id_dimension[1] // 2
pos = [(x1 , y1 ) , (x2 , y2)]
return pos
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def check_gpu_accl (_default_gpu = 1) : #default gpu set to nvidia cuda == 1
"""
* use only inside video_settings()
* NOTE 1:
this fuctntion gets exactly whose api enabled cuda or opencl
and prefers opencl if both are enbaled
due to it being opensource and was writtern in an PC using AMD GPU
* NOTE 2:
depending on used GPU acceleration main function will call
the appropriate ocr reader function
* NOTE 3:
if OpenCL enabled use cv2.UMat to save frames in and then process them
after that convert them back to cv2.Mat
if cuda is enabled use cv2.cudaGpuMat instead of cv2.UMat
and cv2.cuda.foo() instead of cv2.foo() in some opencv-python functions
---
* Returns:
tuple (is_enabled? , gpu_type 1== cuda 2==opencl)
"""
#check if cuda / opencl is enabled
cuda_available = cv2.cuda.getCudaEnabledDeviceCount() > 0
cuda_enabled = 'CUDA' in cv2.getBuildInformation()
opencl_enabled = cv2.ocl.haveOpenCL()
cv2.ocl.setUseOpenCL(True) if opencl_enabled else False
#alternative check for opencl (but i'll do it also XD)
opencl_enabled = 'OpenCL' in cv2.getBuildInformation()
if opencl_enabled : cv2.ocl.setUseOpenCL(True)
#even if no gpu accel available default function is Nvidia cuda function => 2
gpu_accel_enabled = [False , 2]
if cuda_enabled and cuda_available :
gpu_accel_enabled [:2] = True , 1
if _default_gpu == 1 : return tuple ( gpu_accel_enabled ) #even if opencl is avilable gpu def is nvidia if == 1
if opencl_enabled :
#no need to check if its default ( it will auto override if nvidia is not default )
gpu_accel_enabled [:2] = True , 2
if testing_mode == True : #TESTING
print (f"number of threadas in your CPU : {mp.cpu_count()}")
return tuple ( gpu_accel_enabled )
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def enable_multithreading (thread_no = 4) : #in case gpu acceleration is not enough to maintain stable 30fps
cv2.setNumThreads(thread_no)#enable cv2 multithread
ret = f"number of cv2 used threads: {cv2.getThreadNum()} "
return ret
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def video_settings_setup (cam_indx = 0, fps = 30 , vid_length_sec = 10, res = (640 , 480), flscreen = True) :
"""
* This function does set up the video objects
* res is set by default to 640 x 480
* NOTE 1:
* gpu_accel == 0 no GPU acceleration found
* gpu_accel == 1 Cuda GPU acceleration found
* gpu_accel == 2 OpenCl GPU acceleration found
* NOTE 2:
* active_gpu_api == 0 (no active accel api)
* active_gpu_api == 1 (Cuda) (disabled for now)
* active_gpu_api == 2 (OpenCL)
---
* Returns :
vid , fps , frametime , vid_length_sec , active_gpu_api
"""
#set the path to the tesseract dir ( not needed if you added it to win. env. variables)
tsr.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
# Get the process object for the current process
process = psutil.Process(os.getpid())
# Set the process priority to "high"
process.nice(psutil.HIGH_PRIORITY_CLASS)
enable_multithreading(12)
is_gpu_accel_enabled , gpu_api = check_gpu_accl ()
if testing_mode == True :
print ( f"gpu api is : {gpu_api}") #TESTING
if is_gpu_accel_enabled :
if gpu_api == 1 : #use cuda vid obj
#you can change video I/O backends used by adding the argument :
#cv2.CAP_DSHOW (Dshow is the default in my omar-pc)
vid = cv2.VideoCapture(cam_indx , cv2.CAP_CUDA)
else : #OpencL
vid = cv2.VideoCapture(cam_indx , cv2.CAP_DSHOW )
else : #DEFAULT
vid = cv2.VideoCapture(cam_indx , cv2.CAP_DSHOW )
if testing_mode == True :
# TO GET YOUR DEFAULT CV2 RES :
# default res on omarpc obs vritual cam plugin -> (height_Y : 480 , width_X : 640)
wSCREEN , hSCREEN = pyautogui.size()#TESTING
hCV2 = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)#TESTING
wCV2 = vid.get(cv2.CAP_PROP_FRAME_WIDTH)#TESTING
print (f"cv2 res is : {wCV2, hCV2}") #TESTING
# TO GET YOUR DEFAULT SCREEN RES :
print ( f"screen res is : {wSCREEN , hSCREEN}")#TESTING
#set custom res
if flscreen :
wSCREEN , hSCREEN = pyautogui.size()
vid.set(cv2.CAP_PROP_FRAME_WIDTH , wSCREEN )
vid.set(cv2.CAP_PROP_FRAME_HEIGHT, hSCREEN )
# cv2.namedWindow('Camera')
# cv2.setWindowProperty('Camera', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
else:
vid.set(cv2.CAP_PROP_FRAME_WIDTH , res[0] )
vid.set(cv2.CAP_PROP_FRAME_HEIGHT, res[1] )
#frametime is needed to put as cv2.waitkey() argument
frametime = 1000 // fps
return vid , fps , frametime , vid_length_sec , gpu_api
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def compare_img ( img_to_comp , ref_img ) :
''' Returns : good_matches , (ref_kp , img_kp) '''
#NOTE: DETECT keypoints and create the decriptors for ref.img and img_to_cmp
detect_obj = cv2.ORB_create() #two algorithms in ORB : corner detection and binary descriptor extraction
#instead of ORB you could use SIFT. it's better and more accurate but slower.
#since our objects are relativily simple and we need high process rate (30FPS+) will use ORB for now.
#brute force matching + norm_hamming (is better for cv and images than L1 , L2)
match_obj = cv2.BFMatcher(cv2.NORM_HAMMING)
ref_kp , ref_desc = detect_obj.detectAndCompute(ref_img , mask= None)
img_kp , img_desc = detect_obj.detectAndCompute(img_to_comp , mask= None)
if img_desc is None or ref_img is None:
return -1 , -1
#NOTE: MATCH and save best matches
matched_descs = match_obj.match(ref_desc , img_desc)
#match() returns DMatch_Obj = [img_desc_indx , ref_img_desc_indx , distance]
matched_descs = sorted(matched_descs , key= lambda x : x.distance) # each x is a matched_descs obj
if testing_mode == True : #TESTING
print (f"ref_desc type : {type(ref_desc)} img_desc type {type(img_desc)}")
print (f"Tot number of matched descriptors before filter {len(matched_descs)}")
tolerance = 50 #TODO : tune this value #TODO : try cross checking descs
good_matches = matched_descs[:tolerance] #get only best n matches (smallest distance)
return ( good_matches , (ref_kp , img_kp) )
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def get_trans_mat( good_matches , key_points) :
''' Returns : homography transformation matrix '''
#NOTE: homography transformation matrix :
# [ cos(theta) -sin(theta) 0 ]
# [ sin(theta) cos(theta) 0 ]
# [ 0 0 1 ]
#trans. mat will be pure rotation mat (like above)
# if and ONLY if the image is only rotated (there is angle between x-axis and y-axis only) with no any other transformation
ref_kp , img_kp = key_points
ref_good_pts = np.float32( [ref_kp[m.queryIdx].pt for m in good_matches] ) # 'pt' is the pixel coordinate of a keypoint
ref_good_pts = ref_good_pts.reshape(-1,1,2) #make 3D mat of coordintes [ [x1,y1] , [x2,y2] ..]
img_good_pts = np.float32( [img_kp[m.trainIdx].pt for m in good_matches] )
img_good_pts = img_good_pts.reshape(-1,1,2)
#NOTE: in findHomography() function: cv2.RANSAC is a (match outlier excluder Algorithm) and 5.0 is the thresholder
trans_mat , choosed_matches_mask = cv2.findHomography( ref_good_pts , img_good_pts , cv2.RANSAC , 6.0 ) #TODO: tune thresh between 1 to 10 (increments of 1 or 0.5)
return trans_mat
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def get_skew_angle( homograph_rot_mat , img_to_skew_shape ) :
"""
#### Pure rotation mat angle between x-axis and y-axis only should look like :
* other that affine transformation mat could have : scaling , skew , translation and rotation -> could be either one or multiple in one mat depending on image
actually its get_rotation_angle and there is diff between skew and rot angle but for sake of simplicity we use lossy more general var naming in this code
| [ cos(theta) | -sin(theta)| 0 ] |
| ------- | --- | --- |
| [ sin(theta) | cos(theta) | 0 ] |
| [ 0 | 0 | 1 ] |
* NOTE: 2x2 upper left sub mat Details : (its the rotation matrix between x & y axis ):
* assume only 2D-Plane: original ref img is at x1,y1 -> x1 = rcos(phi) , y1 = rsin(phi)
* scanned image at x2 , y2 -> rcos(phi + theta ) , rsin(phi + theta)
* expand and use trig. identity and substitution
* final formula looks like:
* x2 = x1cos(theta) -y1sin(theta)
* y2 = x1sin(theta) +y1cos(theta)
* and this is almost the 2x2 upper sub matrix (easy right?)
* what we want is rot angle (named here and in all code by acc 'skew_angle' tho they are not the same)
* where is rot_angle then ? its the theta ! just get it -> the angle between the ref. img and scanned img is the wanted rot_angle
---
---
Returns:
( skew_angle , (center , h , w) )
if fail return -1 , -1
"""
if type (homograph_rot_mat) != type(None) or homograph_rot_mat is not None :
if homograph_rot_mat[0, 1] != -homograph_rot_mat[1, 0]:
if testing_mode == True :
print ( f"This image has complex transformation")#TESTING
# If not, calculate the skew angle using the SVD decomposition of the rotation matrix
U, S, Vt = np.linalg.svd(homograph_rot_mat[:2, :2])
unitary = U @ Vt
skew_angle = np.arctan2(unitary[1, 0], unitary[0, 0]) * (180 / np.pi)
else:
if testing_mode == True :
print ( f"this image had lite transfromation moslty rotation")#TESTING
# If the rotation matrix is a pure rotation matrix, calculate the skew angle using the sine and cosine of the rotation angle
sine = homograph_rot_mat[1, 0]
cosine = homograph_rot_mat[0, 0]
skew_angle = np.arctan2(sine, cosine) * (180 / np.pi)
if testing_mode == True :
print ( f"IMAGE ROTATION ANGLE IS : {skew_angle} DEGREES")#TESTING
else :
return -1 , -1 #skip this frame
h , w = img_to_skew_shape
center = ( w // 2 , h // 2)
return ( skew_angle , (center , w , h) )
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def affine_trans ( _skew_angle , img_coord, _image_to_skew ) :
"""
Returns:
deskewed_image : np.ndarray
"""
center , w , h = img_coord
affine_rot_mat = cv2.getRotationMatrix2D(center , _skew_angle , scale= 1.0) #1.0 is image scale factor
rotated_img = cv2.warpAffine(_image_to_skew , affine_rot_mat , (w,h) , flags= cv2.INTER_CUBIC, borderMode= cv2.BORDER_REPLICATE )
deskewed_img = rotated_img #TODO : try cv2.warpPrespective()
return deskewed_img
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def rotate_180 (img_to_rotate_180 ) :
rot_angle = 180
rotated_180_img = affine_trans (rot_angle , img_to_rotate_180)
return rotated_180_img
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def deskew_img( img_to_skew , ref_img , **extraArgs ) : #lets call it de-rotate for now
"""
---
Returns:
np.ndarray: image after deskew
int : skew_angle
if error return -1
"""
img_shape = extraArgs['img_shape']
match_err = False
good_matches , key_points = compare_img( img_to_skew , ref_img)
if good_matches == -1 or key_points == -1 :
return -1 , 0
trans_mat = get_trans_mat( good_matches , key_points)
skewed_angle , coordinates = get_skew_angle ( trans_mat , img_shape)
if skewed_angle == -1 or coordinates == -1:
return -1 , 0
else :
deskewed_img = affine_trans( skewed_angle , coordinates , img_to_skew)
return deskewed_img , skewed_angle
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
skipped_cnt = 0 #TESTING
sec_passed = 0
def process_vid_frame( _frame , _id_dimension , _is_valid , _is_valid2 , _ref_img , **extraArgs) :
"""
#### Main process in the function :
0. img is mirrored only to for user visual aid
1. outer part of ID rectangle is blurred only for user visual aid
2. cvt to gray-scale
3. inv frame
4. threshold frame
5. deskew frame ( here we keep copy not deskewed in case it doesnt need deskew)
6. sharpen the frame
7. dilate frame by one itteration
-----
-----
-----
#### NOTE: image background black and foreground is white and foreground is what we need this makes ocr better
#### to use opposite use erode() instead of dilate()
---
Returns:
final_img_deskew , final_img_no_deskew
if error return -1
"""
#TODO may not need two rectangle obj (find use or delete one)
skip = False
skew_angle = extraArgs['skew_angle'] #if skew angle is big render red rect for now
clr = None
shown_timer_control_var = extraArgs['fps']
shown_frames_control_var = 2 #2 here shows half of frames if pc can render this fast , 3 show 1/3 of tot frames ...
frames_to_skip_procs = extraArgs['timer_sec'] // 1.5 #NOTE: save arround 1 ~ two frames each second to proccess if fps = 30
timer= extraArgs['timer_sec']
pos = []
global sec_passed
if extraArgs['count'] % shown_timer_control_var == 0 : #control shown timer
sec_passed +=1
if extraArgs['count'] % shown_frames_control_var == 0 : #control fps and shown frames to user
timer -= sec_passed
if skew_angle <= 5 and skew_angle >= -5 :
clr = "green"
else :
clr = "red"
frame_mirrored = cv2.flip(_frame , 1) #to make it easier for user but the original frame is the one we process
if _is_valid == True :
frame , pos = make_rectangle_obj( frame_mirrored , _id_dimension , clr , _frame_shape = extraArgs['frame_shape'] )
frame = make_timer_obj(frame , pos , timer)
cv2.imshow("Camera", frame)
elif _is_valid2 == True :
frame2 , pos2 = make_rectangle_obj( frame_mirrored , _id_dimension , clr, _frame_shape = extraArgs['frame_shape'] )
frame2 = make_timer_obj( frame2 , pos , timer)
cv2.imshow("Camera", frame2)
else : #show any red
frame , pos = make_rectangle_obj( frame_mirrored , _id_dimension , clr , _frame_shape = extraArgs['frame_shape'] )
frame = make_timer_obj( frame , pos , timer)
cv2.imshow("Camera", frame)
elif extraArgs['count'] <= 2:
pos = get_pos(extraArgs['frame_shape'] , _id_dimension)
processing = cv2.imread("./ai_data/progress.png")
processing = cv2.resize(processing , (extraArgs['frame_shape'][1] , extraArgs['frame_shape'][0]))
cv2.imshow("Camera", processing)
if extraArgs['count'] % frames_to_skip_procs == 0 : #control procced frames
pos = get_pos(extraArgs['frame_shape'] , _id_dimension )
skip = False
else :
skip = True
if testing_mode == True : #TESTING
global skipped_cnt
skipped_cnt += 1
print( f"skip frame? {skip}")
print( f"color of box {clr}")
return skip , skip , skip , 1
if testing_mode == True : #TESTING
print( f"skip frame? {skip}")
print( f"angle? {skew_angle}")
x1 , y1 = pos[0][0] , pos[0][1]
x2 , y2 = pos[1][0] , pos[1][1]
#crop image to get the id card only (with + 5px than actuall id size)
_frame = cv2.UMat( _frame , [y1 , y2] , [x1 , x2])
#change image to gray scale cuz THRESH OTSU Needs that
_frame = cv2.cvtColor(_frame , cv2.COLOR_BGR2GRAY)
# if testing_mode == True :
# cv2.imshow("TESTING : show image before edit *grayed*" , _frame) #TESTING
_frame = cv2.bitwise_not(_frame)
# if testing_mode == True :
# cv2.imshow("TESTING : show image bitwise not" , _frame)#TESTING
_frame = cv2.threshold(_frame , 0 , 255 , cv2.THRESH_BINARY_INV+ cv2.THRESH_OTSU)[1]
# if testing_mode == True :
# cv2.imshow("TESTING : show image threshed" , _frame)#TESTING
no_deskew = cv2.UMat(np.array(_frame.get())) #deep copy _frame ( efficient but takes more mem.)
#COW copy method of _frame (saves mem but puts overhead + some issues)
f_shape = ((y2 - y1) , (x2 - x1))
_frame , skew_angle = deskew_img( _frame , _ref_img , img_shape= f_shape )
if type(_frame) == type(-1) : #skip this frame
return skip , skip , skip , 1
# #continue process
_frame = cv2.Laplacian(_frame, cv2.CV_8U, ksize=3)
no_deskew = cv2.Laplacian(no_deskew, cv2.CV_8U, ksize=3)
kernel = np.ones((1,3) , dtype= np.uint8)#little bit better for text(more horizentally focused)
kernel2 = np.ones((3,3) , dtype= np.uint8)
_frame = cv2.dilate(_frame , kernel= kernel)
no_deskew = cv2.dilate(no_deskew , kernel= kernel)
img_final_deskew = _frame
img_final_no_deskew = no_deskew
if testing_mode == True : #TESTING
print (f"f_shape {f_shape}")
print (f" UMat final imgs sizes are (not the window the img itself in gpu) ")
temp_deskew_shape = _frame.get().shape[0] , _frame.get().shape[1]
temp_no_deskew_shape = no_deskew.get().shape[0] , no_deskew.get().shape[1]
print (f" not_deskewed shape : {temp_no_deskew_shape}")
print (f" deskewed shape : {temp_deskew_shape}")
cv2.imshow("TESTING : show image final_deskewed" , img_final_deskew)#TESTING
cv2.imshow("TESTING : show image final_not_deskewed" , img_final_no_deskew)#TESTING
return img_final_deskew , img_final_no_deskew , skip , skew_angle
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def search_id( id_char_type , scanned_image , valid_ids_freq ) :
"""
---
Returns:
is_valid , (some times extracted_id_val)
"""
id_values_type : str = id_char_type #always 'numeric' for type 0 id (default)
scanned_image = scanned_image.split()
if testing_mode == True :
print ( f" {scanned_image} {type(scanned_image)} ") #TESTING
is_valid , extracted_id_val = False , None
prev_obj = None
for id_obj in scanned_image :
if testing_mode == True :
print ( f'obj before strip and replace : {id_obj} ' )#TESTING
#deal with small misses in ocr
id_obj = id_obj.strip().replace(" " , "")
id_obj = id_obj.strip().replace("," , "")
id_obj = id_obj.strip().replace("." , "")
sz = len(id_obj)
ok_type= id_obj.isnumeric()
if testing_mode == True :
print (f'scanned_image ibj no. : {len(scanned_image)} ')#TESTING
print (f' id_obj: {id_obj} is numeric? {ok_type} ') #TESTING
if id_char_type == 'numeric' and ok_type == True and sz == 14 :
extracted_id_val = id_obj
db_ok= db.db_check_ai_id(id_obj)
if testing_mode == True :
print(f"db id_obj check RESULT case 1: {db_ok}") #TESTING
# id frequency array to get only most frequent ID
if db_ok == True :
is_valid = True
if extracted_id_val in valid_ids_freq:
valid_ids_freq[extracted_id_val] += 1
else:
valid_ids_freq[extracted_id_val] = 1
else :
continue
elif id_char_type == 'numeric' and ok_type :
# in some cases specially with higher good_matches tolerance and --psm 11
# the ocr separates the id to 2 obj at most (as far as i've detected)
# this elif is to handle this case cuz it may be a valid id after joining the 2 objects
prev_obj = id_obj
joined_numeric_conseq_obj = prev_obj + id_obj
if len(joined_numeric_conseq_obj) == 14 :
extracted_id_val = joined_numeric_conseq_obj
db_ok = db.db_check_ai_id(id_obj)
if testing_mode == True :
print(f"db id_obj check RESULT case 2: {db_ok}") #TESTING
if db_ok :
is_valid = True
if extracted_id_val in valid_ids_freq:
valid_ids_freq[extracted_id_val] += 1
else:
valid_ids_freq[extracted_id_val] = 1
elif sz == 14:
id_obj = id_obj.replace('S' , '5')
id_obj = id_obj.replace('s' , '5')
id_obj = id_obj.replace('I' , '1')
id_obj = id_obj.replace('l' , '1')
extracted_id_val = id_obj
db_ok = db.db_check_ai_id(id_obj)
if testing_mode == True :
print(f"db id_obj check RESULT case 3: {db_ok}") #TESTING
if db_ok :
is_valid = True
if extracted_id_val in valid_ids_freq:
valid_ids_freq[extracted_id_val] += 1
else:
valid_ids_freq[extracted_id_val] = 1
else :
pass
return is_valid , extracted_id_val
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def read_simple_card_cuda (vid , vid_specs , id_card_specs , is_valid = False) :
#gpu index to use in acceleration
cv2.cuda.setDevice(0)
#vid = cv2.VideoCapture(0, cv2.CAP_CUDA)
# Check if the camera is successfully opened
return False if not vid.isOpened() else True
#use cuda functions and cuda matrices
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def read_simple_card_opencl( vid , vid_specs , id_card_specs , is_valid = False , is_valid2 = False) : #NOTE: also use it when no GPU
"""
* Args:
vid
vid_specs : fps , frametime(1000 ms / fps) , vid_length_sec , active_gpu_api
id_dimension
is_valid
---
* Returns:
final_status
scanned_id_string
status is 'False' when ocr process has low confidence
id is -1 when error reading frame or scan id
"""
no_deskew_img_buff = []
deskew_img_buff = []
vid_length_sec = vid_specs[2]
fps = vid_specs[0]
vid_time_cnt = fps * vid_length_sec #fps * length_of_video = total number of frames
id_dimension = id_card_specs['dimension']
skew_angle = 0
frametime = vid_specs[1]
#needed in deskew() (pre-allocate it 'one-time' saves huge overhead)
ref_img = get_ref_img_db( img_name= 'ref_id_img_hassan')
ref_img = cv2.UMat(ref_img)
no_error , frame = vid.read() #pre-allocate frame to save some overhead
cv2.namedWindow('Camera', cv2.WINDOW_NORMAL)
cv2.setWindowProperty('Camera', cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)
if not no_error :
print(f"""
warninig!: could not read this frame : {vid_time_cnt}
skipping to next frame>> """)
raise RuntimeError('Error reading video stream')
# return False , "Fail error reading frame" #Fetal Fail error reading frame
frame_shape = frame.shape[:2]
while vid_time_cnt > 0: #start capture camera for vid_length_sec
if testing_mode == True :#TESTING
start_time = time.time()
no_error , frame = vid.read()
frame = cv2.UMat(frame)
if not no_error :
print(f"""
warninig!: could not read this frame : {vid_time_cnt}
skipping to next frame>> """)
vid_time_cnt -= 1
continue
img_final_deskew , img_final_no_deskew , skip , skew_angle = process_vid_frame(frame , _id_dimension= id_dimension , _is_valid = is_valid , _is_valid2 = is_valid2 , _ref_img = ref_img , frame_shape = frame_shape , count= vid_time_cnt , skew_angle = skew_angle , timer_sec= vid_length_sec , fps= fps)
if type(img_final_deskew) == bool or type(img_final_no_deskew) == bool or skip == True : #skip this frame for speed or err handle
print
(
f"""
warninig!: this frame : {vid_time_cnt} will be skipped from ocr and id db check
due to cv2 unable to generate homopraghy transformation mat
"""
)
if testing_mode == True :#TESTING
end_time = time.time()
actual_frametime = end_time - start_time
actual_fps = 1 / actual_frametime
max_rec_frametime_min_fps[0] = max (max_rec_frametime_min_fps[0] , actual_frametime)
max_rec_frametime_min_fps[1] = min (max_rec_frametime_min_fps[1] , actual_fps)
min_rec_frametime_max_fps[0] = min (max_rec_frametime_min_fps[0] , actual_frametime)
min_rec_frametime_max_fps[1] = max (max_rec_frametime_min_fps[1] , actual_fps)
print(f"#### actual Frametime(sec) and FPS: {actual_frametime} , {actual_fps} ####")
print(f"#### TARGET frametime and fps (sec) {frametime / 1000} , {fps} ####")
vid_time_cnt -= 1
cv2.waitKey( 1 )
continue
if testing_mode == True :
print (f"this frame no : {vid_time_cnt}")
deskew_img_buff.append(img_final_deskew)
no_deskew_img_buff.append(img_final_no_deskew)
vid_time_cnt -= 1
if testing_mode == True :#TESTING
end_time = time.time()
print( f"type img_final_deskew before cvt to mat: {type(img_final_deskew)}")
print( f"type img_final_no_deskew before cvt to mat: {type(img_final_no_deskew)}")
actual_frametime = end_time - start_time
actual_fps = 1 / actual_frametime
max_rec_frametime_min_fps[0] = max (max_rec_frametime_min_fps[0] , actual_frametime)
max_rec_frametime_min_fps[1] = min (max_rec_frametime_min_fps[1] , actual_fps)
min_rec_frametime_max_fps[0] = min (max_rec_frametime_min_fps[0] , actual_frametime)
min_rec_frametime_max_fps[1] = max (max_rec_frametime_min_fps[1] , actual_fps)
print(f"#### actual Frametime(sec) and FPS: {actual_frametime} , {actual_fps} ####")
print(f"#### TARGET frametime and fps (sec) {frametime / 1000} , {fps} ####")
cv2.waitKey( 1 )
#now ocr all read frames
return ocr_ready_id(deskew_img_buff , no_deskew_img_buff , id_card_specs , fps)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#
def ocr_ready_id( frames_buff, frames_deskewed_buff , id_card_specs , fps ) :
'''
---
* Returns:
final_status
scanned_id_string
status is 'False' when ocr process has low confidence
id is -1 when error reading frame or scan id
'''
#loop to read saved frames
valid_ids_freq = {} #updated at search_id()
valid_ids_freq2 = {} #updated at search_id()
valid_cnt , valid_cnt2 = 0 , 0
buff1_sz = len(frames_buff)
buff2_sz = len(frames_buff)
i = max ( buff1_sz , buff2_sz )
no_frame_to_ocr = i
if testing_mode == True :
print (f"number of frames to ocr : {i}")
while i > 0:
#OCR USING PYTESSERACT
#custom tesser configuration if needed
cfg = "--psm 11 --oem 3" # Sparse text. Find as much text as possible in no particular order.
cfg2 = "--psm 12 --oem 3" # Sparse text with osd. Find as much text as possible in no particular order.
#downloas images from gpu to memory to use in ocr and take quarter of them only
img_final_deskew = frames_buff[i - 1].get()
img_final_no_deskew = frames_deskewed_buff[i - 1].get()
#ocr
imgstr = tsr.image_to_string(img_final_deskew , lang='eng' )
imgstr2 = tsr.image_to_string(img_final_no_deskew , lang='eng')
# #Save the UMat to a file
# cv2.imwrite(r'./extra/img_final_deskew.jpg', img_final_deskew)
# cv2.imwrite(r'./extra/img_final_no_deskew.jpg', img_final_no_deskew)
# #OCR USING TEXTRACT
# imgstr = textract.process(r"./extra/img_final_deskew.jpg", method='ocrpus', language='eng')
# imgstr2 = textract.process(r"./extra/img_final_no_deskew.jpg", method='ocrpus', language='eng')
# imgstr = imgstr.decode('utf-8')
# imgstr2 = imgstr2.decode('utf-8')
# #OCR USING EASYOCR
# no_threads = mp.cpu_count()
# reader = easyocr.Reader(['en'] , gpu= True , workers= no_threads) #workers are no of threads
# result1 = reader.readtext(img_final_deskew)
# imgstr = '\n'.join([res[1] for res in result1])
# result2 = reader.readtext(img_final_no_deskew)
# imgstr2 = '\n'.join([res[1] for res in result2])
# if testing_mode == True : #TESTING
# print (f"#raw result1 of easy ocr : {result1}")
# print (f"#raw result2 of easy ocr : {result2}")
# print (f"#imgstr of easy ocr : {type(imgstr)} , {imgstr} ")
# print (f"#imgstr2 of easy ocr : {type(imgstr2)} , {imgstr2}")
#search id in rotated and non rotated images
is_valid , *_ = search_id ( id_card_specs['id_char'] , scanned_image= imgstr , valid_ids_freq= valid_ids_freq) #edits valid_ids_freq inside
is_valid2 , *_ = search_id ( id_card_specs['id_char'] , scanned_image= imgstr2 , valid_ids_freq= valid_ids_freq2) #edits valid_ids_freq inside
if is_valid : valid_cnt += 1
#render a green rectangle + must stay green for one second
elif not is_valid: valid_cnt = 0
#render a red rectangle
if is_valid2 : valid_cnt2 += 1
#render a green rectangle + must stay green for one second
elif not is_valid2 : valid_cnt2 = 0
#render a red rectangle
if testing_mode == True :
print ( 'valid counter : ' , valid_cnt)#TESTING
print ( 'valid counter2 :' , valid_cnt2)#TESTING
i -= 1
n = 4
validate_after = n #n frames that is checked in db and valid + in sequence is enough
if valid_cnt >= validate_after : #perfect match if still valid for a whole n (sec)
all_success = True
#get only one id -> the higest freq ( if two has same freq choose the first to be inputed in freq_arr)
final_value = max ( valid_ids_freq , key= valid_ids_freq.get )
return all_success , final_value
elif valid_cnt >= validate_after // 2 :
return True , max ( valid_ids_freq , key= valid_ids_freq.get ) #Sucess but not perfect match
if valid_cnt2 >= validate_after: #perfect match if still valid for a whole n (sec)
all_success = True
#get only one id -> the higest freq ( if two has same freq choose the first to be inputed in freq_arr)
final_value = max ( valid_ids_freq2 , key= valid_ids_freq2.get )
return all_success , final_value
elif valid_cnt2 >= validate_after // 2 :
return True , max ( valid_ids_freq2 , key= valid_ids_freq2.get ) #Sucess but not perfect match
#noW find if detected an id multiple times but not conseq
max_skewed , max_un_skewed = -1 , -1
valid_non_conseq_1 , valid_non_conseq_2 = 0 , 0
if len(valid_ids_freq) != 0 :
valid_non_conseq_1 = max_value = max(valid_ids_freq.values())
max_skewed = max ( valid_ids_freq , key= valid_ids_freq.get )
if testing_mode == True :
print ( 'max freq 1 ' , max ( valid_ids_freq , key= valid_ids_freq.get ))#TESTING
if len(valid_ids_freq2) != 0 :
valid_non_conseq_2 = max_value = max(valid_ids_freq2.values()) # THIS IS UNSKEWED FRAMES (MORE PROBABLE TO HAVE HIGHER CNT)
max_un_skewed = max ( valid_ids_freq2 , key= valid_ids_freq2.get )
if testing_mode == True :
print ( 'max freq 2 ' , max ( valid_ids_freq2 , key= valid_ids_freq2.get ) )#TESTING
#now check for valid id frames that are not conseq
if valid_non_conseq_2 >= valid_non_conseq_1:
valid_non_conseq_frames = valid_non_conseq_2
if valid_non_conseq_frames >= validate_after // 2 :
return True , max_un_skewed
else: