55
66""" 
77
8+ import  re 
89import  warnings 
910import  hashlib 
1011from  typing  import  Any , Dict 
1516
1617_TYPO_DOMAINS  =  {
1718    # gmail.com 
18-     "35gmai.com" : "gmail.com" ,
19-     "636gmail.com" : "gmail.com" ,
19+     "gmai.com" : "gmail.com" ,
2020    "gamil.com" : "gmail.com" ,
21-     "gmail.comu " : "gmail.com" ,
21+     "gmali.com " : "gmail.com" ,
2222    "gmial.com" : "gmail.com" ,
2323    "gmil.com" : "gmail.com" ,
24+     "gmaill.com" : "gmail.com" ,
25+     "gmailm.com" : "gmail.com" ,
26+     "gmailo.com" : "gmail.com" ,
27+     "gmailyhoo.com" : "gmail.com" ,
2428    "yahoogmail.com" : "gmail.com" ,
2529    # outlook.com 
2630    "putlook.com" : "outlook.com" ,
2731}
2832
33+ _EQUIVALENT_DOMAINS  =  {
34+     "googlemail.com" : "gmail.com" ,
35+     "pm.me" : "protonmail.com" ,
36+     "proton.me" : "protonmail.com" ,
37+     "yandex.by" : "yandex.ru" ,
38+     "yandex.com" : "yandex.ru" ,
39+     "yandex.kz" : "yandex.ru" ,
40+     "yandex.ua" : "yandex.ru" ,
41+     "ya.ru" : "yandex.ru" ,
42+ }
43+ 
44+ _FASTMAIL_DOMAINS  =  {
45+     "123mail.org" ,
46+     "150mail.com" ,
47+     "150ml.com" ,
48+     "16mail.com" ,
49+     "2-mail.com" ,
50+     "4email.net" ,
51+     "50mail.com" ,
52+     "airpost.net" ,
53+     "allmail.net" ,
54+     "bestmail.us" ,
55+     "cluemail.com" ,
56+     "elitemail.org" ,
57+     "emailcorner.net" ,
58+     "emailengine.net" ,
59+     "emailengine.org" ,
60+     "emailgroups.net" ,
61+     "emailplus.org" ,
62+     "emailuser.net" ,
63+     "eml.cc" ,
64+     "f-m.fm" ,
65+     "fast-email.com" ,
66+     "fast-mail.org" ,
67+     "fastem.com" ,
68+     "fastemail.us" ,
69+     "fastemailer.com" ,
70+     "fastest.cc" ,
71+     "fastimap.com" ,
72+     "fastmail.cn" ,
73+     "fastmail.co.uk" ,
74+     "fastmail.com" ,
75+     "fastmail.com.au" ,
76+     "fastmail.de" ,
77+     "fastmail.es" ,
78+     "fastmail.fm" ,
79+     "fastmail.fr" ,
80+     "fastmail.im" ,
81+     "fastmail.in" ,
82+     "fastmail.jp" ,
83+     "fastmail.mx" ,
84+     "fastmail.net" ,
85+     "fastmail.nl" ,
86+     "fastmail.org" ,
87+     "fastmail.se" ,
88+     "fastmail.to" ,
89+     "fastmail.tw" ,
90+     "fastmail.uk" ,
91+     "fastmail.us" ,
92+     "fastmailbox.net" ,
93+     "fastmessaging.com" ,
94+     "fea.st" ,
95+     "fmail.co.uk" ,
96+     "fmailbox.com" ,
97+     "fmgirl.com" ,
98+     "fmguy.com" ,
99+     "ftml.net" ,
100+     "h-mail.us" ,
101+     "hailmail.net" ,
102+     "imap-mail.com" ,
103+     "imap.cc" ,
104+     "imapmail.org" ,
105+     "inoutbox.com" ,
106+     "internet-e-mail.com" ,
107+     "internet-mail.org" ,
108+     "internetemails.net" ,
109+     "internetmailing.net" ,
110+     "jetemail.net" ,
111+     "justemail.net" ,
112+     "letterboxes.org" ,
113+     "mail-central.com" ,
114+     "mail-page.com" ,
115+     "mailandftp.com" ,
116+     "mailas.com" ,
117+     "mailbolt.com" ,
118+     "mailc.net" ,
119+     "mailcan.com" ,
120+     "mailforce.net" ,
121+     "mailftp.com" ,
122+     "mailhaven.com" ,
123+     "mailingaddress.org" ,
124+     "mailite.com" ,
125+     "mailmight.com" ,
126+     "mailnew.com" ,
127+     "mailsent.net" ,
128+     "mailservice.ms" ,
129+     "mailup.net" ,
130+     "mailworks.org" ,
131+     "ml1.net" ,
132+     "mm.st" ,
133+     "myfastmail.com" ,
134+     "mymacmail.com" ,
135+     "nospammail.net" ,
136+     "ownmail.net" ,
137+     "petml.com" ,
138+     "postinbox.com" ,
139+     "postpro.net" ,
140+     "proinbox.com" ,
141+     "promessage.com" ,
142+     "realemail.net" ,
143+     "reallyfast.biz" ,
144+     "reallyfast.info" ,
145+     "rushpost.com" ,
146+     "sent.as" ,
147+     "sent.at" ,
148+     "sent.com" ,
149+     "speedpost.net" ,
150+     "speedymail.org" ,
151+     "ssl-mail.com" ,
152+     "swift-mail.com" ,
153+     "the-fastest.net" ,
154+     "the-quickest.com" ,
155+     "theinternetemail.com" ,
156+     "veryfast.biz" ,
157+     "veryspeedy.net" ,
158+     "warpmail.net" ,
159+     "xsmail.com" ,
160+     "yepmail.net" ,
161+     "your-mail.com" ,
162+ }
163+ 
164+ _YAHOO_DOMAINS  =  {
165+     "y7mail.com" ,
166+     "yahoo.at" ,
167+     "yahoo.be" ,
168+     "yahoo.bg" ,
169+     "yahoo.ca" ,
170+     "yahoo.cl" ,
171+     "yahoo.co.id" ,
172+     "yahoo.co.il" ,
173+     "yahoo.co.in" ,
174+     "yahoo.co.kr" ,
175+     "yahoo.co.nz" ,
176+     "yahoo.co.th" ,
177+     "yahoo.co.uk" ,
178+     "yahoo.co.za" ,
179+     "yahoo.com" ,
180+     "yahoo.com.ar" ,
181+     "yahoo.com.au" ,
182+     "yahoo.com.br" ,
183+     "yahoo.com.co" ,
184+     "yahoo.com.hk" ,
185+     "yahoo.com.hr" ,
186+     "yahoo.com.mx" ,
187+     "yahoo.com.my" ,
188+     "yahoo.com.pe" ,
189+     "yahoo.com.ph" ,
190+     "yahoo.com.sg" ,
191+     "yahoo.com.tr" ,
192+     "yahoo.com.tw" ,
193+     "yahoo.com.ua" ,
194+     "yahoo.com.ve" ,
195+     "yahoo.com.vn" ,
196+     "yahoo.cz" ,
197+     "yahoo.de" ,
198+     "yahoo.dk" ,
199+     "yahoo.ee" ,
200+     "yahoo.es" ,
201+     "yahoo.fi" ,
202+     "yahoo.fr" ,
203+     "yahoo.gr" ,
204+     "yahoo.hu" ,
205+     "yahoo.ie" ,
206+     "yahoo.in" ,
207+     "yahoo.it" ,
208+     "yahoo.lt" ,
209+     "yahoo.lv" ,
210+     "yahoo.nl" ,
211+     "yahoo.no" ,
212+     "yahoo.pl" ,
213+     "yahoo.pt" ,
214+     "yahoo.ro" ,
215+     "yahoo.se" ,
216+     "yahoo.sk" ,
217+     "ymail.com" ,
218+ }
219+ 
29220
30221def  prepare_report (request : Dict [str , Any ], validate : bool ):
31222    """Validate and prepare minFraud report""" 
@@ -91,29 +282,42 @@ def maybe_hash_email(transaction):
91282    if  address  is  None :
92283        return 
93284
94-     address  =  address .lower ().strip ()
95- 
96-     at_idx  =  address .rfind ("@" )
97-     if  at_idx  ==  - 1 :
285+     address , domain  =  _clean_email (address )
286+     if  not  address :
98287        return 
99288
100-     domain  =  _clean_domain (address [at_idx  +  1  :])  # noqa 
101-     local_part  =  address [:at_idx ]
102- 
103289    if  domain  !=  ""  and  "domain"  not  in   email :
104290        email ["domain" ] =  domain 
105291
106-     email ["address" ] =  _hash_email ( local_part ,  domain )
292+     email ["address" ] =  hashlib . md5 ( address . encode ( "UTF-8" )). hexdigest ( )
107293
108294
109295def  _clean_domain (domain ):
110296    domain  =  domain .strip ().rstrip ("." ).encode ("idna" ).decode ("ASCII" )
111-     return  _TYPO_DOMAINS .get (domain , domain )
112297
298+     domain  =  re .sub (r"(?:\.com){2,}$" , ".com" , domain )
299+     domain  =  re .sub (r"\.com[^.]+$" , ".com" , domain )
300+     domain  =  re .sub (r"(?:\.(?:com|c[a-z]{1,2}m|co[ln]|[dsvx]o[mn]|))$" , ".com" , domain )
301+     domain  =  re .sub (r"^\d+(?:gmail?\.com)$" , "gmail.com" , domain )
113302
114- def  _hash_email (local_part , domain ):
115-     # Strip off aliased part of email address 
116-     if  domain  ==  "yahoo.com" :
303+     domain  =  _TYPO_DOMAINS .get (domain , domain )
304+     domain  =  _EQUIVALENT_DOMAINS .get (domain , domain )
305+ 
306+     return  domain 
307+ 
308+ 
309+ def  _clean_email (address ):
310+     address  =  address .lower ().strip ()
311+ 
312+     at_idx  =  address .rfind ("@" )
313+     if  at_idx  ==  - 1 :
314+         return  None , None 
315+ 
316+     domain  =  _clean_domain (address [at_idx  +  1  :])  # noqa 
317+     local_part  =  address [:at_idx ]
318+ 
319+     # Strip off aliased part of email address. 
320+     if  domain  in  _YAHOO_DOMAINS :
117321        divider  =  "-" 
118322    else :
119323        divider  =  "+" 
@@ -122,4 +326,15 @@ def _hash_email(local_part, domain):
122326    if  alias_idx  >  0 :
123327        local_part  =  local_part [:alias_idx ]
124328
125-     return  hashlib .md5 (f"{ local_part }  @{ domain }  " .encode ("UTF-8" )).hexdigest ()
329+     if  domain  ==  "gmail.com" :
330+         local_part  =  local_part .replace ("." , "" )
331+ 
332+     domain_parts  =  domain .split ("." )
333+     if  len (domain_parts ) >  2 :
334+         possible_domain  =  "." .join (domain_parts [1 :])
335+         if  possible_domain  in  _FASTMAIL_DOMAINS :
336+             domain  =  possible_domain 
337+             if  local_part  !=  "" :
338+                 local_part  =  domain_parts [0 ]
339+ 
340+     return  f"{ local_part }  @{ domain }  " , domain 
0 commit comments