Skip to content

Commit

Permalink
Feat: download model online
Browse files Browse the repository at this point in the history
  • Loading branch information
jianchang512 committed Oct 6, 2024
1 parent 92d3fdf commit ce50242
Show file tree
Hide file tree
Showing 6 changed files with 248 additions and 67 deletions.
4 changes: 2 additions & 2 deletions videotrans/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# -*- coding: utf-8 -*-

VERSION = "v2.71"
VERSION_NUM = 120071
VERSION = "v2.72"
VERSION_NUM = 120072
2 changes: 1 addition & 1 deletion videotrans/configure/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ def getset_params(obj=None):

"tts_type": 0, # 所选的tts顺序
"split_type": "all",
"model_name": "tiny", # 模型名
"model_name": "medium" if Path(ROOT_DIR+"/models/models--Systran--faster-whisper-medium/snapshots").is_dir() else "tiny", # 模型名
"recogn_type": 0, # 语音识别方式,数字代表显示顺序

"voice_autorate": False,
Expand Down
86 changes: 45 additions & 41 deletions videotrans/mainwin/_main_win.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,41 +109,28 @@ def bind_action(self):
self.voice_role.setCurrentText(config.params['voice_role'])
self.win_action.show_listen_btn(config.params['voice_role'])

# tts_type 改变时,重设角色
self.tts_type.currentIndexChanged.connect(self.win_action.tts_type_change)
self.translate_type.currentIndexChanged.connect(self.win_action.set_translate_type)
self.voice_role.currentTextChanged.connect(self.win_action.show_listen_btn)
self.target_language.currentTextChanged.connect(self.win_action.set_voice_role)
self.model_name.addItems(config.WHISPER_MODEL_LIST)
if config.params['model_name'] in config.WHISPER_MODEL_LIST:
self.model_name.setCurrentText(config.params['model_name'])

try:
config.params['recogn_type'] = int(config.params['recogn_type'])
except Exception:
config.params['recogn_type'] = 0

self.recogn_type.setCurrentIndex(config.params['recogn_type'])

self.set_line_role.clicked.connect(self.win_action.set_line_role_fun)
self.proxy.textChanged.connect(self.win_action.change_proxy)
self.import_sub.clicked.connect(self.win_action.import_sub_fun)
self.export_sub.clicked.connect(self.win_action.export_sub_fun)
self.startbtn.clicked.connect(self.win_action.check_start)
self.btn_save_dir.clicked.connect(self.win_action.get_save_dir)
self.btn_get_video.clicked.connect(self.win_action.get_mp4)
self.stop_djs.clicked.connect(self.win_action.reset_timeid)
self.continue_compos.clicked.connect(self.win_action.set_djs_timeout)
self.listen_btn.clicked.connect(self.win_action.listen_voice_fun)
self.split_type.currentIndexChanged.connect(self.win_action.check_split_type)
self.model_name.currentTextChanged.connect(self.win_action.check_model_name)
self.recogn_type.currentIndexChanged.connect(self.win_action.recogn_type_change)
self.voice_rate.valueChanged.connect(self.win_action.voice_rate_changed)
self.voice_autorate.stateChanged.connect(
lambda: self.win_action.autorate_changed(self.voice_autorate.isChecked(), "voice"))
self.video_autorate.stateChanged.connect(
lambda: self.win_action.autorate_changed(self.video_autorate.isChecked(), "video"))
self.append_video.stateChanged.connect(
lambda: self.win_action.autorate_changed(self.video_autorate.isChecked(), "append_video"))
self.addbackbtn.clicked.connect(self.win_action.get_background)
self.enable_cuda.toggled.connect(self.win_action.check_cuda)

self.moshis = {
"biaozhun_jd": self.action_xinshoujandan,
"biaozhun": self.action_biaozhun,
"tiqu": self.action_tiquzimu
}

w = self.size().width()
h = self.size().height()
self.move(QPoint(int((self.width - w) / 2), int((self.height - h) / 2)))

def _bindsignal(self):
try:
from videotrans.task.check_update import CheckUpdateWorker
Expand Down Expand Up @@ -174,9 +161,7 @@ def _set_cache_set(self):
self.stop_djs.setStyleSheet("""background-color:#148CD2;color:#ffffff""")
self.proxy.setText(config.params['proxy'])
self.continue_compos.setToolTip(config.transobj['Click to start the next step immediately'])

self.split_type.addItems([config.transobj['whisper_type_all'], config.transobj['whisper_type_avg']])
self.model_name.addItems(config.WHISPER_MODEL_LIST)
self.export_sub.setText(config.transobj['Export srt'])
self.subtitle_type.addItems(
[
Expand All @@ -188,16 +173,11 @@ def _set_cache_set(self):
])
self.subtitle_type.setCurrentIndex(config.params['subtitle_type'])

try:
config.params['recogn_type'] = int(config.params['recogn_type'])
except Exception:
config.params['recogn_type'] = 0

self.recogn_type.setCurrentIndex(config.params['recogn_type'])
if config.params['recogn_type'] > 1:
self.model_name_help.setVisible(False)
else:
self.model_name_help.clicked.connect(self.win_action.show_model_help)

try:
config.params['tts_type'] = int(config.params['tts_type'])
except Exception:
Expand All @@ -210,14 +190,19 @@ def _set_cache_set(self):
if config.params['subtitle_type'] and int(config.params['subtitle_type']) > 0:
self.subtitle_type.setCurrentIndex(int(config.params['subtitle_type']))

if config.params['model_name'] in config.WHISPER_MODEL_LIST:
self.model_name.setCurrentText(config.params['model_name'])

try:
self.voice_rate.setValue(int(config.params['voice_rate'].replace('%', '')))
except Exception:
self.voice_rate.setValue(0)

self.voice_autorate.stateChanged.connect(
lambda: self.win_action.autorate_changed(self.voice_autorate.isChecked(), "voice"))
self.video_autorate.stateChanged.connect(
lambda: self.win_action.autorate_changed(self.video_autorate.isChecked(), "video"))
self.append_video.stateChanged.connect(
lambda: self.win_action.autorate_changed(self.video_autorate.isChecked(), "append_video"))
self.addbackbtn.clicked.connect(self.win_action.get_background)

self.split_type.setDisabled(True if config.params['recogn_type'] > 0 else False)
self.voice_autorate.setChecked(bool(config.params['voice_autorate']))
self.video_autorate.setChecked(bool(config.params['video_autorate']))
Expand All @@ -227,9 +212,28 @@ def _set_cache_set(self):
self.only_video.setChecked(True if config.params['only_video'] else False)
self.is_separate.setChecked(True if config.params['is_separate'] else False)

w=self.size().width()
h=self.size().height()
self.move(QPoint(int((self.width - w) / 2), int((self.height - h) / 2)))
self.enable_cuda.toggled.connect(self.win_action.check_cuda)
# tts_type 改变时,重设角色
self.tts_type.currentIndexChanged.connect(self.win_action.tts_type_change)
self.translate_type.currentIndexChanged.connect(self.win_action.set_translate_type)
self.voice_role.currentTextChanged.connect(self.win_action.show_listen_btn)
self.target_language.currentTextChanged.connect(self.win_action.set_voice_role)

self.set_line_role.clicked.connect(self.win_action.set_line_role_fun)
self.proxy.textChanged.connect(self.win_action.change_proxy)
self.import_sub.clicked.connect(self.win_action.import_sub_fun)
self.export_sub.clicked.connect(self.win_action.export_sub_fun)
self.startbtn.clicked.connect(self.win_action.check_start)
self.btn_save_dir.clicked.connect(self.win_action.get_save_dir)
self.btn_get_video.clicked.connect(self.win_action.get_mp4)
self.stop_djs.clicked.connect(self.win_action.reset_timeid)
self.continue_compos.clicked.connect(self.win_action.set_djs_timeout)
self.listen_btn.clicked.connect(self.win_action.listen_voice_fun)
self.split_type.currentIndexChanged.connect(self.win_action.check_split_type)
self.model_name.currentTextChanged.connect(self.win_action.check_model_name)
self.recogn_type.currentIndexChanged.connect(self.win_action.recogn_type_change)
self.voice_rate.valueChanged.connect(self.win_action.voice_rate_changed)


def start_subform(self):
self.import_sub.setCursor(Qt.PointingHandCursor)
Expand Down
43 changes: 27 additions & 16 deletions videotrans/recognition/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,24 @@ def re_segment_sentences(self, data):
Returns:
重新划分后的字幕数据,格式与输入相同。
"""
flags=r'[,?!,。?!]|(\. )'
if self.detect_language[:2] in ['zh', 'ja', 'ko']:
maxlen =config.settings['cjk_len']
flags=r'[,?!,。?!]|(\. )'
else:
maxlen = config.settings['other_len']
shound_rephase=False
for segment in data:
if segment['words'][0]['end']-segment['words'][0]['start']>15000:
shound_rephase=True
break
if len(segment['text'])>3*maxlen:
shound_rephase=True
break
print([f"{t['text']}\n" for t in data])

new_data = []
if not config.settings['rephrase']:
if not config.settings['rephrase'] or not shound_rephase:
for segment in data:
tmp = {
"line": len(new_data) + 1,
Expand All @@ -205,17 +221,12 @@ def re_segment_sentences(self, data):
sentence = ""
sentence_start = data[0]["words"][0]['start']
sentence_end = 0
flags=r'[,?!,。?!]|(\. )'
if self.detect_language[:2] in ['zh', 'ja', 'ko']:
maxlen =config.settings['cjk_len']
flags=r'[,?!,。?!]|(\. )'
else:
maxlen = config.settings['other_len']
print("需要分词")


data_len=len(data)
for seg_i,segment in enumerate(data):
current_len=len(segment["words"])
# print(f'\n\n{segment["words"]=}')
for i, word_info in enumerate(segment["words"]):
word = word_info["word"]
start = word_info["start"]
Expand Down Expand Up @@ -248,15 +259,16 @@ def re_segment_sentences(self, data):
next2_word=''


if ( next_word and re.search(r'[,?!,。!?]|(\. )',next_word) ) or ( next2_word and re.search(r'[,?!,。!?]|(\. )',next2_word) ):
if len(sentence.strip()) < 1.2*maxlen and ( \
( next_word and re.search(flags,next_word) and len(next_word)<0.2*maxlen ) \
or ( next2_word and re.search(flags,next2_word) and len(next2_word)<0.2*maxlen ) \
):
continue

if next_start> end:
if next_start >= end+1000:
is_insert=True
elif next_start>=end+250 and len(sentence.strip())>=0.2*maxlen:
is_insert=True
elif next_start >= end+50 and re.search(flags, word) and len(sentence.strip())>=0.3*maxlen:
elif next_start>=end+200 and len(sentence.strip())>=0.2*maxlen:
is_insert=True
elif re.search(flags, word) and len(sentence.strip())>=maxlen*0.5:
is_insert=True
Expand All @@ -265,14 +277,14 @@ def re_segment_sentences(self, data):
is_insert=True

if not is_insert:
if self.subtitle_type>0 and len(sentence.strip())>=maxlen*2:
if self.subtitle_type>0 and len(sentence.strip())>=maxlen*1.5:
is_insert=True
elif self.subtitle_type==0 and len(sentence.strip())>=maxlen*2.8:
elif self.subtitle_type==0 and len(sentence.strip())>=maxlen*2:
is_insert=True

if not is_insert:
continue
# print(f'{sentence=}')

tmp = {
"line": len(new_data) + 1,
"start_time": sentence_start,
Expand Down Expand Up @@ -300,7 +312,6 @@ def re_segment_sentences(self, data):
tmp["endraw"]=tools.ms_to_time_string(ms=tmp["end_time"])
tmp['time'] = f'{tmp["startraw"]} --> {tmp["endraw"]}'
new_data.append(tmp)
# print(f'\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%{new_data=}')
return new_data

# True 退出
Expand Down
20 changes: 19 additions & 1 deletion videotrans/ui/downmodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,13 @@ def setupUi(self, downmodel):
self.hlayout_name = QHBoxLayout()
self.hlayout_name.setObjectName(u"hlayout_name")
self.label_name = QLabel()
self.proxy=QtWidgets.QLineEdit()
self.proxy.setMinimumWidth(150)
self.proxy.setPlaceholderText('如果无法连接到GitHub,请填写代理地址' if config.defaulelang == 'zh' else "proxy address")
self.proxy.setToolTip('如果无法连接到GitHub,请填写代理地址' if config.defaulelang == 'zh' else "proxy address")
self.hlayout_name.addWidget(self.label_name)
self.hlayout_name.addStretch()
self.hlayout_name.addWidget(self.proxy)

self.hlayout_url = QHBoxLayout()
self.hlayout_url.setObjectName(u"hlayout_url")
Expand All @@ -46,11 +52,22 @@ def setupUi(self, downmodel):
self.hlayout_btn = QHBoxLayout()
self.hlayout_btn.setObjectName(u"hlayout_btn")

self.online_btn = QPushButton()
self.online_btn.setObjectName(u"online_btn")
self.online_btn.setMinimumSize(QSize(200, 35))
self.online_btn.setCursor(QCursor(Qt.PointingHandCursor))
self.online_btn.setMouseTracking(False)

self.down_btn = QPushButton()
self.down_btn.setObjectName(u"down_btn")
self.down_btn.setMinimumSize(QSize(200, 35))
self.down_btn.setCursor(QCursor(Qt.PointingHandCursor))
self.down_btn.setMouseTracking(False)




self.hlayout_btn.addWidget(self.online_btn)
self.hlayout_btn.addWidget(self.down_btn)
self.verticalLayout.addLayout(self.hlayout_btn)

Expand All @@ -69,7 +86,7 @@ def setupUi(self, downmodel):

self.text_help = QtWidgets.QPlainTextEdit()
self.text_help.setReadOnly(True)
self.text_help.setMinimumSize(QSize(0, 150))
self.text_help.setMinimumSize(QSize(0, 50))
self.verticalLayout.addWidget(self.text_help)

self.retranslateUi(downmodel)
Expand All @@ -79,3 +96,4 @@ def setupUi(self, downmodel):
def retranslateUi(self, downmodel):
downmodel.setWindowTitle("下载模型" if config.defaulelang == 'zh' else 'Download Models')
self.down_btn.setText("点击打开浏览器下载" if config.defaulelang == 'zh' else 'Click to open browser to download')
self.online_btn.setText("在线下载模型" if config.defaulelang=='zh' else 'Download Model Online')
Loading

0 comments on commit ce50242

Please sign in to comment.