-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
seperate pyfiles and optimize group cases
- Loading branch information
Showing
11 changed files
with
871 additions
and
788 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Wed Sep 11 11:41:46 2019 | ||
@author: autol | ||
""" | ||
|
||
import configparser | ||
|
||
#%% config and default values | ||
|
||
def write_config(cfgfile): | ||
cfg = configparser.ConfigParser(allow_no_value=1, | ||
inline_comment_prefixes=('#', ';')) | ||
|
||
cfg['config'] = dict( | ||
data_xlsx = 'data_main.xlsx # 数据模板地址', | ||
data_oa_xlsx = 'data_oa.xlsx # OA数据地址', | ||
sheet_docx = 'sheet.docx # 邮单模板地址', | ||
flag_fill_jdocs_infos = '1 # 是否填充判决书地址', | ||
flag_append_oa = '1 # 是否导入OA数据', | ||
flag_to_postal = '1 # 是否打印邮单', | ||
flag_check_jdocs = '0 # 是否检查用户格式,输出提示信息', | ||
flag_check_postal = '0 # 是否检查邮单格式,输出提示信息', | ||
data_case_codes = ' # 指定打印案号,可接多个,示例:AAA,BBB,优先级1', | ||
data_date_range = ' # 指定打印数据日期范围示例:2018-09-01:2018-12-01,优先级2', | ||
data_last_lines = '3 # 指定打印最后行数,优先级3', | ||
) | ||
|
||
with open(cfgfile, 'w',encoding='utf-8-sig') as configfile: | ||
cfg.write(configfile) | ||
print('>>> 重新生成配置 %s ...'%cfgfile) | ||
return cfg['config'] | ||
|
||
|
||
#%% | ||
def read_config(cfgfile): | ||
cfg = configparser.ConfigParser(allow_no_value=True, | ||
inline_comment_prefixes=('#', ';')) | ||
cfg.read(cfgfile,encoding='utf-8-sig') | ||
ret = dict( | ||
data_xlsx = cfg['config']['data_xlsx'], | ||
data_oa_xlsx = cfg['config']['data_oa_xlsx'], | ||
sheet_docx = cfg['config']['sheet_docx'], | ||
data_case_codes = cfg['config']['data_case_codes'], | ||
data_date_range = cfg['config']['data_date_range'], | ||
data_last_lines = cfg['config']['data_last_lines'], | ||
flag_fill_jdocs_infos = int(cfg['config']['flag_fill_jdocs_infos']), | ||
flag_append_oa = int(cfg['config']['flag_append_oa']), | ||
flag_to_postal = int(cfg['config']['flag_to_postal']), | ||
flag_check_jdocs = int(cfg['config']['flag_check_jdocs']), | ||
flag_check_postal = int(cfg['config']['flag_check_postal']), | ||
) | ||
return ret | ||
# return dict(cfg.items('config')) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# -*- coding: utf-8 -*- | ||
""" | ||
Created on Wed Sep 11 12:08:08 2019 | ||
@author: autol | ||
""" | ||
|
||
|
||
#%% | ||
import re | ||
from collections import Counter | ||
from util import split_list,user_to_list,save_adjust_xlsx | ||
from globalvar import * | ||
|
||
#%% | ||
|
||
def copy_users_compare(jrow,df,errs=list(' ')): | ||
'''copy users and check users completement | ||
errs=['【OA无用户记录】','【用户错别字】','【字段重复】','【系列案】'] | ||
如下对比: | ||
不相交,OA无用户记录 | ||
判断字段重复,输出重复的内容 | ||
比例确定怀疑用户错别字,判别不了直接正常输出 | ||
判决书多于当前案件,认为是系列案 | ||
判决书少于当前案件,当前案件缺部分地址 | ||
''' | ||
|
||
code0 = str(df['案号']).strip() | ||
code1 = str(df['原一审案号']).strip() | ||
jcode = str(jrow['判决书源号']).strip() | ||
x = Counter(user_to_list(df['当事人'])) # 当前案件 | ||
y = Counter(list(jrow['new_adr'].keys())) # 判决书 | ||
rxy = len(list((x&y).elements()))/len(list((x|y).elements())) | ||
rxyx = len(list((x&y).elements()))/len(list(x.elements())) | ||
rxyy = len(list((x&y).elements()))/len(list(y.elements())) | ||
# print('x=',x);print('y=',y);print('rxy=',rxy) | ||
# print('rxyx=',rxyx);print('rxyy=',rxyy) | ||
if rxy == 0: # 不相交,完全无关 | ||
return errs[0] | ||
if max(x.values()) > 1 or max(y.values()) > 1: # 有字段重复 | ||
xdu = [k for k,v in x.items() if v > 1] # 重复的内容 | ||
ydu = [k for k,v in y.items() if v > 1] | ||
print_log('>>> %s 用户有字段重复【%s】-【案件:%s】 vs 【判决书:%s】' | ||
%(code0,'{0:.0%}'.format(rxy),xdu,ydu)) | ||
return errs[2] | ||
if rxy == 1: # 完全匹配 | ||
return df['当事人'] | ||
if 0 < rxy < 1: # 错别字 | ||
dx = list((x-y).elements()) | ||
dy = list((y-x).elements()) | ||
xx = Counter(''.join(dx)) | ||
yy = Counter(''.join(dy)) | ||
rxxyy = len(list(xx&yy.keys()))/len(list(xx|yy.keys())) | ||
# print('rxxyy=',rxxyy) | ||
if rxxyy >= .6: | ||
print_log('>>> %s 认为【错别字率 %s】->【案件:%s vs 判决书:%s】' | ||
%(code0,'{0:.0%}'.format(1-rxxyy),dx,dy)) | ||
return errs[1] | ||
elif rxxyy >= .2: | ||
print_log('>>> %s 认为【不好判断当正常处理【差异率 %s】vs【相同范围:%s】->【差异范围:案件:%s vs 判决书:%s】 ' | ||
%(code0,'{0:.0%}'.format(1-rxxyy), | ||
list((x&y).elements()), | ||
dx,dy)) | ||
return df['当事人'] | ||
if rxyx > .8: | ||
print_log('>>> %s 案件 %s人 < 判决书 %s人'%(code0,len(x),len(y))) | ||
if jcode != code1:# 系列案 | ||
print_log('>>> %s 认为【系列案,判决书人员 %s 多出地址】'%(code0,list((y-x).elements()))) | ||
return errs[3] | ||
else: | ||
return df['当事人'] | ||
elif rxyy > .8: | ||
print_log('>>> %s 案件 %s人 > 判决书 %s人'%(code0,len(x),len(y))) | ||
print_log('>>> %s 认为【当前案件人员 %s 缺地址】'%(code0,list((x-y).elements()))) | ||
return df['当事人'] | ||
return errs[0] | ||
|
||
|
||
def copy_rows_adr1(x,n_adr): | ||
''' copy jdocs address to address column | ||
格式:['当事人','诉讼代理人','地址','new_adr','案号'] | ||
同时排除已有代理人的信息 | ||
''' | ||
user = x['当事人'];agent = x['诉讼代理人'];adr = x['地址']; codes = x['案号'] | ||
if not isinstance(n_adr,dict): | ||
return adr | ||
else: | ||
y = split_list(r'[,,]',adr) | ||
adr1 = y.copy() | ||
for i,k in enumerate(n_adr): | ||
by_agent = any([k in ag for ag in re.findall(r'[\w+、]*\/[\w+]*',agent)]) # 找到代理人格式 'XX、XX/XX_123123' | ||
if by_agent and k in adr: # remove user's address when user with agent 用户有代理人就不要地址 | ||
y = list(filter(lambda x:not k in x,y)) | ||
if type(n_adr) == dict and not k in adr and k in user and not by_agent: | ||
y += [k+adr_tag+n_adr.get(k)] # append address by rules 输出地址格式 | ||
adr2 = y.copy() | ||
adr = ','.join(list(filter(None, y))) | ||
if Counter(adr1) != Counter(adr2) and adr and flag_check_jdocs: | ||
print_log('>>> 【%s】成功复制判决书地址=>【%s】'%(codes,adr)) | ||
return adr | ||
|
||
address_tmp_xlsx = 'address_tmp.xlsx' | ||
|
||
def copy_rows_user_func(dfj,dfo): | ||
|
||
'''copy users line regard adr user''' | ||
errs = ['【OA无用户记录】','【用户错别字】','【字段重复】','【系列案】'] | ||
|
||
dfo['判决书源号'] = '' | ||
|
||
def find_source(): | ||
print_log('\n>>> 判决书信息 | 案号=%s | 源号=%s | 判决书源号=%s'%(code0,code1,jcode)) | ||
dfo.loc[i,'地址'] = copy_rows_adr1(dfor,n_adr) | ||
dfo.loc[i,'判决书源号'] = jcode | ||
|
||
for (i,dfor) in dfo.iterrows(): | ||
for (j,dfjr) in dfj.iterrows(): | ||
code0 = str(dfor['案号']).strip() | ||
code1 = str(dfor['原一审案号']).strip() | ||
jcode = str(dfjr['判决书源号']).strip() | ||
n_adr = dfjr['new_adr'] | ||
if isinstance(n_adr,dict): | ||
if not n_adr:continue# 提取jdocs字段失败 | ||
if code1 == jcode:# 同案号,则找到内容 | ||
find_source() ; break | ||
else:#[::-1] # 没案号 | ||
tag1 = copy_users_compare(dfjr,dfor,errs) | ||
if tag1 not in errs: | ||
find_source() ; break | ||
else: pass | ||
dfj = dfj.fillna('') | ||
save_adjust_xlsx(dfj,address_tmp_xlsx,textfit=('判决书源号','new_adr')) # 保存临时提取信息 | ||
return dfo |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
[config] | ||
data_xlsx = data_main.xlsx # 数据模板地址 | ||
data_oa_xlsx = data_oa.xlsx # OA数据地址 | ||
sheet_docx = sheet.docx # 邮单模板地址 | ||
flag_fill_jdocs_infos = 1 # 是否填充判决书地址 | ||
flag_append_oa = 1 # 是否导入OA数据 | ||
flag_to_postal = 1 # 是否打印邮单 | ||
flag_check_jdocs = 0 # 是否检查用户格式,输出提示信息 | ||
flag_check_postal = 0 # 是否检查邮单格式,输出提示信息 | ||
data_case_codes = # 指定打印案号,可接多个,示例:AAA,BBB,优先级1 | ||
data_date_range = # 指定打印数据日期范围示例:2018-09-01:2018-12-01,优先级2 | ||
data_last_lines = 3 # 指定打印最后行数,优先级3 | ||
|
Oops, something went wrong.