opendatalab · e06084 · Oct 14, 2025 · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025
diff --git a/examples/multi_extractor_compare.py b/examples/multi_extractor_compare.py
@@ -1,21 +1,15 @@
 from webmainbench import DataLoader, Evaluator, ExtractorFactory, DataSaver
 from pathlib import Path
 
-# 全局LLM配置
-LLM_CONFIG = {
-    'llm_base_url': '',
-    'llm_api_key': '',
-    'llm_model': '',
-    'use_llm': True
-}
+# 如需调用LLM修正抽取结果，在 webmainbench/config.py 中配置 LLM api
 
 def all_extractor_comparison():
     """演示多抽取器对比"""
 
     print("\n=== 多抽取器对比演示 ===\n")
 
     # 创建数据集
-    dataset_path = Path("../data/test_math.jsonl")
+    dataset_path = Path("../data/WebMainBench_llm-webkit_v1_WebMainBench_7887_within_formula.jsonl")
     dataset = DataLoader.load_jsonl(dataset_path)
 
     # 创建webkit抽取器

diff --git a/webmainbench/config.py b/webmainbench/config.py
@@ -0,0 +1,11 @@
+"""
+全局配置文件
+"""
+
+# LLM配置，用于修正抽取工具的抽取结果
+LLM_CONFIG = {
+    'llm_base_url': '',
+    'llm_api_key': '',
+    'llm_model': 'deepseek-chat',
+    'use_llm': True
+}
diff --git a/webmainbench/evaluator/evaluator.py b/webmainbench/evaluator/evaluator.py
@@ -4,7 +4,7 @@
 
 from dataclasses import dataclass
 from typing import Dict, Any, List, Optional, Union, Iterator
-import time
+import time, sys
 import itertools
 from datetime import datetime
 from pathlib import Path
@@ -85,10 +85,78 @@ def __init__(self, metric_config: Dict[str, Any] = None):
         Args:
             metric_config: Configuration for metrics
         """
+
+        self._validate_llm_config()
+
         self.metric_calculator = MetricCalculator(metric_config)
         self.metric_config = metric_config or {}
-
-    def evaluate(self, 
+
+    def _validate_llm_config(self):
+        """验证LLM配置的完整性和有效性"""
+        import time
+        from ..config import LLM_CONFIG
+
+        if LLM_CONFIG.get('use_llm', False):
+            # 检查配置完整性
+            if not LLM_CONFIG.get('llm_base_url') or not LLM_CONFIG.get('llm_api_key'):
+                print("\n" + "=" * 60)
+                print("❌ 错误：LLM配置不完整！")
+                print("-" * 60)
+                print("当前 use_llm = True，但缺少必要的API配置。")
+                print("\n请在 webmainbench/config.py 中完成以下配置：")
+                print("  1. llm_base_url  (例如: 'https://api.deepseek.com')")
+                print("  2. llm_api_key   (例如: 'sk-xxxxxxxxxxxx')")
+                print("\n或者设置 use_llm = False 来禁用LLM功能。")
+                print("=" * 60 + "\n")
+                sys.exit(1)
+
+            # 验证API有效性
+            try:
+                from openai import OpenAI
+
+                print("正在验证LLM API配置...")
+                client = OpenAI(
+                    base_url=LLM_CONFIG.get('llm_base_url'),
+                    api_key=LLM_CONFIG.get('llm_api_key')
+                )
+
+                # 发送测试请求
+                response = client.chat.completions.create(
+                    model=LLM_CONFIG.get('llm_model', 'deepseek-chat'),
+                    messages=[{"role": "user", "content": "test"}],
+                    max_tokens=5,
+                    temperature=0
+                )
+
+                print("✅ LLM API配置验证成功！\n使用 基础方案➕LLM增强提取效果 进行评测。")
+
+            except Exception as e:
+                print("\n" + "=" * 60)
+                print("❌ 错误：LLM API配置无效！")
+                print("-" * 60)
+                print(f"验证失败原因: {str(e)}")
+                print("\n请检查 webmainbench/config.py 中的配置：")
+                print("  1. llm_base_url 是否正确")
+                print("  2. llm_api_key 是否有效")
+                print("  3. llm_model 是否支持")
+                print("  4. 网络连接是否正常")
+                print("\n或者设置 use_llm = False 来禁用LLM功能。")
+                print("=" * 60 + "\n")
+                sys.exit(1)
+        else:
+            # 未启用LLM的提示
+            print("\n" + "=" * 60)
+            print("⚠️  注意：当前未启用LLM增强提取效果功能")
+            print("   如需启用LLM增强提取效果，请在 webmainbench/config.py 中配置：")
+            print("   - 设置 use_llm = True")
+            print("   - 填写 llm_base_url")
+            print("   - 填写 llm_api_key")
+            print("=" * 60)
+            print("   (5秒后使用基础方案进行对比...)")
+            time.sleep(5)
+            print()
+
+    def evaluate(self,
                 dataset: BenchmarkDataset,
                 extractor: Union[BaseExtractor, str],
                 extractor_config: Dict[str, Any] = None,

diff --git a/webmainbench/metrics/base.py b/webmainbench/metrics/base.py
@@ -197,7 +197,8 @@ def _extract_from_markdown(text: str, field_name: str = None) -> Dict[str, str]:
             return {'code': '', 'formula': '', 'table': '', 'text': ''}
 
         # 加载 llm 配置
-        from examples.multi_extractor_compare import LLM_CONFIG
+        from ..config import LLM_CONFIG
+
         # 直接创建具体的提取器实例
         from .code_extractor import CodeSplitter
         from .formula_extractor import FormulaSplitter

diff --git a/webmainbench/metrics/base_content_splitter.py b/webmainbench/metrics/base_content_splitter.py
@@ -52,7 +52,6 @@ def should_use_llm(self, field_name: str) -> bool:
 
         # 默认逻辑：对groundtruth内容不使用LLM，对其他内容使用
         if field_name == "groundtruth_content":
-            print(f"[DEBUG] 检测到groundtruth内容，不使用LLM")
             return False
         return True