Feat: download model online

jianchang512 · Oct 6, 2024 · ce50242 · ce50242
1 parent 92d3fdf
commit ce50242
Show file tree

Hide file tree

Showing 6 changed files with 248 additions and 67 deletions.
diff --git a/videotrans/__init__.py b/videotrans/__init__.py
@@ -1,4 +1,4 @@
 # -*- coding: utf-8 -*-
 
-VERSION = "v2.71"
-VERSION_NUM = 120071
+VERSION = "v2.72"
+VERSION_NUM = 120072
diff --git a/videotrans/configure/config.py b/videotrans/configure/config.py
@@ -536,7 +536,7 @@ def getset_params(obj=None):
 
         "tts_type": 0,  # 所选的tts顺序
         "split_type": "all",
-        "model_name": "tiny",  # 模型名
+        "model_name": "medium" if Path(ROOT_DIR+"/models/models--Systran--faster-whisper-medium/snapshots").is_dir() else "tiny",  # 模型名
         "recogn_type": 0,  # 语音识别方式，数字代表显示顺序
 
         "voice_autorate": False,

diff --git a/videotrans/mainwin/_main_win.py b/videotrans/mainwin/_main_win.py
@@ -109,41 +109,28 @@ def bind_action(self):
                 self.voice_role.setCurrentText(config.params['voice_role'])
                 self.win_action.show_listen_btn(config.params['voice_role'])
 
-        # tts_type 改变时，重设角色
-        self.tts_type.currentIndexChanged.connect(self.win_action.tts_type_change)
-        self.translate_type.currentIndexChanged.connect(self.win_action.set_translate_type)
-        self.voice_role.currentTextChanged.connect(self.win_action.show_listen_btn)
-        self.target_language.currentTextChanged.connect(self.win_action.set_voice_role)
+        self.model_name.addItems(config.WHISPER_MODEL_LIST)
+        if config.params['model_name'] in config.WHISPER_MODEL_LIST:
+            self.model_name.setCurrentText(config.params['model_name'])
+
+        try:
+            config.params['recogn_type'] = int(config.params['recogn_type'])
+        except Exception:
+            config.params['recogn_type'] = 0
+
+        self.recogn_type.setCurrentIndex(config.params['recogn_type'])
 
-        self.set_line_role.clicked.connect(self.win_action.set_line_role_fun)
-        self.proxy.textChanged.connect(self.win_action.change_proxy)
-        self.import_sub.clicked.connect(self.win_action.import_sub_fun)
-        self.export_sub.clicked.connect(self.win_action.export_sub_fun)
-        self.startbtn.clicked.connect(self.win_action.check_start)
-        self.btn_save_dir.clicked.connect(self.win_action.get_save_dir)
-        self.btn_get_video.clicked.connect(self.win_action.get_mp4)
-        self.stop_djs.clicked.connect(self.win_action.reset_timeid)
-        self.continue_compos.clicked.connect(self.win_action.set_djs_timeout)
-        self.listen_btn.clicked.connect(self.win_action.listen_voice_fun)
-        self.split_type.currentIndexChanged.connect(self.win_action.check_split_type)
-        self.model_name.currentTextChanged.connect(self.win_action.check_model_name)
-        self.recogn_type.currentIndexChanged.connect(self.win_action.recogn_type_change)
-        self.voice_rate.valueChanged.connect(self.win_action.voice_rate_changed)
-        self.voice_autorate.stateChanged.connect(
-            lambda: self.win_action.autorate_changed(self.voice_autorate.isChecked(), "voice"))
-        self.video_autorate.stateChanged.connect(
-            lambda: self.win_action.autorate_changed(self.video_autorate.isChecked(), "video"))
-        self.append_video.stateChanged.connect(
-            lambda: self.win_action.autorate_changed(self.video_autorate.isChecked(), "append_video"))
-        self.addbackbtn.clicked.connect(self.win_action.get_background)
-        self.enable_cuda.toggled.connect(self.win_action.check_cuda)
 
         self.moshis = {
             "biaozhun_jd": self.action_xinshoujandan,
             "biaozhun": self.action_biaozhun,
             "tiqu": self.action_tiquzimu
         }
 
+        w = self.size().width()
+        h = self.size().height()
+        self.move(QPoint(int((self.width - w) / 2), int((self.height - h) / 2)))
+
     def _bindsignal(self):
         try:
             from videotrans.task.check_update import CheckUpdateWorker
@@ -174,9 +161,7 @@ def _set_cache_set(self):
         self.stop_djs.setStyleSheet("""background-color:#148CD2;color:#ffffff""")
         self.proxy.setText(config.params['proxy'])
         self.continue_compos.setToolTip(config.transobj['Click to start the next step immediately'])
-
         self.split_type.addItems([config.transobj['whisper_type_all'], config.transobj['whisper_type_avg']])
-        self.model_name.addItems(config.WHISPER_MODEL_LIST)
         self.export_sub.setText(config.transobj['Export srt'])
         self.subtitle_type.addItems(
             [
@@ -188,16 +173,11 @@ def _set_cache_set(self):
             ])
         self.subtitle_type.setCurrentIndex(config.params['subtitle_type'])
 
-        try:
-            config.params['recogn_type'] = int(config.params['recogn_type'])
-        except Exception:
-            config.params['recogn_type'] = 0
-
-        self.recogn_type.setCurrentIndex(config.params['recogn_type'])
         if config.params['recogn_type'] > 1:
             self.model_name_help.setVisible(False)
         else:
             self.model_name_help.clicked.connect(self.win_action.show_model_help)
+
         try:
             config.params['tts_type'] = int(config.params['tts_type'])
         except Exception:
@@ -210,14 +190,19 @@ def _set_cache_set(self):
         if config.params['subtitle_type'] and int(config.params['subtitle_type']) > 0:
             self.subtitle_type.setCurrentIndex(int(config.params['subtitle_type']))
 
-        if config.params['model_name'] in config.WHISPER_MODEL_LIST:
-            self.model_name.setCurrentText(config.params['model_name'])
-
         try:
             self.voice_rate.setValue(int(config.params['voice_rate'].replace('%', '')))
         except Exception:
             self.voice_rate.setValue(0)
 
+        self.voice_autorate.stateChanged.connect(
+            lambda: self.win_action.autorate_changed(self.voice_autorate.isChecked(), "voice"))
+        self.video_autorate.stateChanged.connect(
+            lambda: self.win_action.autorate_changed(self.video_autorate.isChecked(), "video"))
+        self.append_video.stateChanged.connect(
+            lambda: self.win_action.autorate_changed(self.video_autorate.isChecked(), "append_video"))
+        self.addbackbtn.clicked.connect(self.win_action.get_background)
+
         self.split_type.setDisabled(True if config.params['recogn_type'] > 0 else False)
         self.voice_autorate.setChecked(bool(config.params['voice_autorate']))
         self.video_autorate.setChecked(bool(config.params['video_autorate']))
@@ -227,9 +212,28 @@ def _set_cache_set(self):
         self.only_video.setChecked(True if config.params['only_video'] else False)
         self.is_separate.setChecked(True if config.params['is_separate'] else False)
 
-        w=self.size().width()
-        h=self.size().height()
-        self.move(QPoint(int((self.width - w) / 2), int((self.height - h) / 2)))
+        self.enable_cuda.toggled.connect(self.win_action.check_cuda)
+        # tts_type 改变时，重设角色
+        self.tts_type.currentIndexChanged.connect(self.win_action.tts_type_change)
+        self.translate_type.currentIndexChanged.connect(self.win_action.set_translate_type)
+        self.voice_role.currentTextChanged.connect(self.win_action.show_listen_btn)
+        self.target_language.currentTextChanged.connect(self.win_action.set_voice_role)
+
+        self.set_line_role.clicked.connect(self.win_action.set_line_role_fun)
+        self.proxy.textChanged.connect(self.win_action.change_proxy)
+        self.import_sub.clicked.connect(self.win_action.import_sub_fun)
+        self.export_sub.clicked.connect(self.win_action.export_sub_fun)
+        self.startbtn.clicked.connect(self.win_action.check_start)
+        self.btn_save_dir.clicked.connect(self.win_action.get_save_dir)
+        self.btn_get_video.clicked.connect(self.win_action.get_mp4)
+        self.stop_djs.clicked.connect(self.win_action.reset_timeid)
+        self.continue_compos.clicked.connect(self.win_action.set_djs_timeout)
+        self.listen_btn.clicked.connect(self.win_action.listen_voice_fun)
+        self.split_type.currentIndexChanged.connect(self.win_action.check_split_type)
+        self.model_name.currentTextChanged.connect(self.win_action.check_model_name)
+        self.recogn_type.currentIndexChanged.connect(self.win_action.recogn_type_change)
+        self.voice_rate.valueChanged.connect(self.win_action.voice_rate_changed)
+
 
     def start_subform(self):
         self.import_sub.setCursor(Qt.PointingHandCursor)

diff --git a/videotrans/recognition/_base.py b/videotrans/recognition/_base.py
@@ -179,8 +179,24 @@ def re_segment_sentences(self, data):
         Returns:
             重新划分后的字幕数据，格式与输入相同。
         """
+        flags=r'[,?!，。？！]|(\. )'
+        if self.detect_language[:2] in ['zh', 'ja', 'ko']:
+            maxlen =config.settings['cjk_len']
+            flags=r'[,?!，。？！]|(\. )'
+        else:
+            maxlen = config.settings['other_len']
+        shound_rephase=False
+        for segment in data:
+            if segment['words'][0]['end']-segment['words'][0]['start']>15000:
+                shound_rephase=True
+                break
+            if len(segment['text'])>3*maxlen:
+                shound_rephase=True
+                break
+        print([f"{t['text']}\n" for t in data])
+
         new_data = []
-        if not config.settings['rephrase']:
+        if not config.settings['rephrase'] or not shound_rephase:
             for segment in data:
                 tmp = {
                     "line": len(new_data) + 1,
@@ -205,17 +221,12 @@ def re_segment_sentences(self, data):
         sentence = ""
         sentence_start = data[0]["words"][0]['start']
         sentence_end = 0
-        flags=r'[,?!，。？！]|(\. )'
-        if self.detect_language[:2] in ['zh', 'ja', 'ko']:
-            maxlen =config.settings['cjk_len']
-            flags=r'[,?!，。？！]|(\. )'
-        else:
-            maxlen = config.settings['other_len']
+        print("需要分词")
+
 
         data_len=len(data)
         for seg_i,segment in enumerate(data):
             current_len=len(segment["words"])
-            # print(f'\n\n{segment["words"]=}')
             for i, word_info in enumerate(segment["words"]):
                 word = word_info["word"]
                 start = word_info["start"]
@@ -248,15 +259,16 @@ def re_segment_sentences(self, data):
                     next2_word=''
 
 
-                if ( next_word and re.search(r'[,?!，。！？]|(\. )',next_word) ) or ( next2_word and re.search(r'[,?!，。！？]|(\. )',next2_word) ):
+                if len(sentence.strip()) < 1.2*maxlen  and (  \
+                    ( next_word and re.search(flags,next_word) and len(next_word)<0.2*maxlen ) \
+                    or ( next2_word and re.search(flags,next2_word) and len(next2_word)<0.2*maxlen ) \
+                ):
                     continue
 
                 if next_start> end:
                     if next_start >= end+1000:
                         is_insert=True
-                    elif next_start>=end+250 and len(sentence.strip())>=0.2*maxlen:
-                        is_insert=True
-                    elif next_start >= end+50 and re.search(flags, word) and len(sentence.strip())>=0.3*maxlen:
+                    elif next_start>=end+200 and len(sentence.strip())>=0.2*maxlen:
                         is_insert=True
                     elif re.search(flags, word) and len(sentence.strip())>=maxlen*0.5:
                         is_insert=True
@@ -265,14 +277,14 @@ def re_segment_sentences(self, data):
                     is_insert=True
 
                 if not is_insert:
-                    if self.subtitle_type>0 and len(sentence.strip())>=maxlen*2:
+                    if self.subtitle_type>0 and len(sentence.strip())>=maxlen*1.5:
                         is_insert=True
-                    elif  self.subtitle_type==0 and len(sentence.strip())>=maxlen*2.8:
+                    elif  self.subtitle_type==0 and len(sentence.strip())>=maxlen*2:
                         is_insert=True
 
                 if not is_insert:
                     continue
-                # print(f'{sentence=}')
+
                 tmp = {
                     "line": len(new_data) + 1,
                     "start_time": sentence_start,
@@ -300,7 +312,6 @@ def re_segment_sentences(self, data):
                 tmp["endraw"]=tools.ms_to_time_string(ms=tmp["end_time"])
                 tmp['time'] = f'{tmp["startraw"]} --> {tmp["endraw"]}'
                 new_data.append(tmp)
-        # print(f'\n\n%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%{new_data=}')
         return new_data
 
     # True 退出

diff --git a/videotrans/ui/downmodel.py b/videotrans/ui/downmodel.py
@@ -31,7 +31,13 @@ def setupUi(self, downmodel):
         self.hlayout_name = QHBoxLayout()
         self.hlayout_name.setObjectName(u"hlayout_name")
         self.label_name = QLabel()
+        self.proxy=QtWidgets.QLineEdit()
+        self.proxy.setMinimumWidth(150)
+        self.proxy.setPlaceholderText('如果无法连接到GitHub，请填写代理地址' if config.defaulelang == 'zh' else "proxy address")
+        self.proxy.setToolTip('如果无法连接到GitHub，请填写代理地址' if config.defaulelang == 'zh' else "proxy address")
         self.hlayout_name.addWidget(self.label_name)
+        self.hlayout_name.addStretch()
+        self.hlayout_name.addWidget(self.proxy)
 
         self.hlayout_url = QHBoxLayout()
         self.hlayout_url.setObjectName(u"hlayout_url")
@@ -46,11 +52,22 @@ def setupUi(self, downmodel):
         self.hlayout_btn = QHBoxLayout()
         self.hlayout_btn.setObjectName(u"hlayout_btn")
 
+        self.online_btn = QPushButton()
+        self.online_btn.setObjectName(u"online_btn")
+        self.online_btn.setMinimumSize(QSize(200, 35))
+        self.online_btn.setCursor(QCursor(Qt.PointingHandCursor))
+        self.online_btn.setMouseTracking(False)
+
         self.down_btn = QPushButton()
         self.down_btn.setObjectName(u"down_btn")
         self.down_btn.setMinimumSize(QSize(200, 35))
         self.down_btn.setCursor(QCursor(Qt.PointingHandCursor))
         self.down_btn.setMouseTracking(False)
+
+
+
+
+        self.hlayout_btn.addWidget(self.online_btn)
         self.hlayout_btn.addWidget(self.down_btn)
         self.verticalLayout.addLayout(self.hlayout_btn)
 
@@ -69,7 +86,7 @@ def setupUi(self, downmodel):
 
         self.text_help = QtWidgets.QPlainTextEdit()
         self.text_help.setReadOnly(True)
-        self.text_help.setMinimumSize(QSize(0, 150))
+        self.text_help.setMinimumSize(QSize(0, 50))
         self.verticalLayout.addWidget(self.text_help)
 
         self.retranslateUi(downmodel)
@@ -79,3 +96,4 @@ def setupUi(self, downmodel):
     def retranslateUi(self, downmodel):
         downmodel.setWindowTitle("下载模型" if config.defaulelang == 'zh' else 'Download Models')
         self.down_btn.setText("点击打开浏览器下载" if config.defaulelang == 'zh' else 'Click to open browser to download')
+        self.online_btn.setText("在线下载模型" if config.defaulelang=='zh' else 'Download Model Online')