Skip to content

Commit 588b765

Browse files
author
Yang Yang
committed
fix cellphone extraction bugs
1 parent bdd7b8d commit 588b765

File tree

3 files changed

+5
-4
lines changed

3 files changed

+5
-4
lines changed

cocoNLP/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
__title__ = "cocoNLP"
1111
__description__ = "Python implementation of many nlp algorithms"
1212
__url__ = "https://github.com/fighting41love"
13-
__version__ = "0.0.11"
13+
__version__ = "0.0.12"
1414
__author__ = "Yang Yang"
1515
__author_email__ = "yangyangfuture@gmail.com"
1616
__license__ = "MIT"

cocoNLP/extractor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,11 @@ def extract_cellphone(self, text, nation):
6161
:return: email_addresses_list<list>
6262
"""
6363
eng_texts = self.replace_chinese(text)
64-
sep = ',!?:; :,。!?《》、|\\/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
64+
sep = ',!?:; :,.。!?《》、|\\/abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
6565
eng_split_texts = [''.join(g) for k, g in groupby(eng_texts, sep.__contains__) if not k]
6666
eng_split_texts_clean = [ele for ele in eng_split_texts if len(ele)>=7 and len(ele)<17]
6767
if nation=='CHN':
68-
phone_pattern = '((\+86)?([- ])?)?(|(13[0-9])|(14[0-9])|(15[0-9])|(17[0-9])|(18[0-9])|(19[0-9]))([- ])?\d{3}([- ])?\d{4}([- ])?\d{4}'
68+
phone_pattern = r'^((\+86)?([- ])?)?(|(13[0-9])|(14[0-9])|(15[0-9])|(17[0-9])|(18[0-9])|(19[0-9]))([- ])?\d{3}([- ])?\d{4}([- ])?\d{4}$'
6969

7070
phones = []
7171
for eng_text in eng_split_texts_clean:

test.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33

44
ex = extractor()
55

6-
text = '急寻特朗普,男孩,于2018年11月27号11时在陕西省安康市汉滨区走失。身份证号码410105196904010537丢失发型短发,...如有线索,请迅速与警方联系:18100065143,132-6156-2938,baizhantang@sina.com.cn 和yangyangfuture at gmail dot com'
6+
text = '急寻特朗普,男孩,于2018年11月27号11时在陕西省安康市汉滨区走失。身份证号码410105196904010537丢失发型短发,...' \
7+
'如有线索,请迅速与警方联系:18100065143,132-6156-2938,baizhantang@sina.com.cn 和yangyangfuture at gmail dot com13673630861'
78

89
# 抽取邮箱
910
emails = ex.extract_email(text)

0 commit comments

Comments
 (0)