Skip to content

Commit 9750c6d

Browse files
authored
fix: garbled zip import file names (#2747)
1 parent a2b6620 commit 9750c6d

File tree

1 file changed

+12
-0
lines changed

1 file changed

+12
-0
lines changed

apps/common/handle/impl/zip_split_handle.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from typing import List
1515
from urllib.parse import urljoin
1616

17+
from charset_normalizer import detect
1718
from django.db.models import QuerySet
1819

1920
from common.handle.base_split_handle import BaseSplitHandle
@@ -100,6 +101,15 @@ def get_image_list(result_list: list, zip_files: List[str]):
100101
return image_file_list
101102

102103

104+
def get_file_name(file_name):
105+
try:
106+
file_name_code = file_name.encode('cp437')
107+
charset = detect(file_name_code)['encoding']
108+
return file_name_code.decode(charset)
109+
except Exception as e:
110+
return file_name
111+
112+
103113
def filter_image_file(result_list: list, image_list):
104114
image_source_file_list = [image.get('source_file') for image in image_list]
105115
return [r for r in result_list if not image_source_file_list.__contains__(r.get('name', ''))]
@@ -121,6 +131,8 @@ def handle(self, file, pattern_list: List, with_filter: bool, limit: int, get_bu
121131
with zip_ref.open(file) as f:
122132
# 对文件内容进行处理
123133
try:
134+
# 处理一下文件名
135+
f.name = get_file_name(f.name)
124136
value = file_to_paragraph(f, pattern_list, with_filter, limit)
125137
if isinstance(value, list):
126138
result = [*result, *value]

0 commit comments

Comments
 (0)