Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 2 additions & 8 deletions examples/multi_extractor_compare.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,15 @@
from webmainbench import DataLoader, Evaluator, ExtractorFactory, DataSaver
from pathlib import Path

# 全局LLM配置
LLM_CONFIG = {
'llm_base_url': '',
'llm_api_key': '',
'llm_model': '',
'use_llm': True
}
# 如需调用LLM修正抽取结果,在 webmainbench/config.py 中配置 LLM api

def all_extractor_comparison():
"""演示多抽取器对比"""

print("\n=== 多抽取器对比演示 ===\n")

# 创建数据集
dataset_path = Path("../data/test_math.jsonl")
dataset_path = Path("../data/WebMainBench_llm-webkit_v1_WebMainBench_7887_within_formula.jsonl")
dataset = DataLoader.load_jsonl(dataset_path)

# 创建webkit抽取器
Expand Down
11 changes: 11 additions & 0 deletions webmainbench/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
全局配置文件
"""

# LLM配置,用于修正抽取工具的抽取结果
LLM_CONFIG = {
'llm_base_url': '',
'llm_api_key': '',
'llm_model': 'deepseek-chat',
'use_llm': True
}
74 changes: 71 additions & 3 deletions webmainbench/evaluator/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from dataclasses import dataclass
from typing import Dict, Any, List, Optional, Union, Iterator
import time
import time, sys
import itertools
from datetime import datetime
from pathlib import Path
Expand Down Expand Up @@ -85,10 +85,78 @@ def __init__(self, metric_config: Dict[str, Any] = None):
Args:
metric_config: Configuration for metrics
"""

self._validate_llm_config()

self.metric_calculator = MetricCalculator(metric_config)
self.metric_config = metric_config or {}

def evaluate(self,

def _validate_llm_config(self):
"""验证LLM配置的完整性和有效性"""
import time
from ..config import LLM_CONFIG

if LLM_CONFIG.get('use_llm', False):
# 检查配置完整性
if not LLM_CONFIG.get('llm_base_url') or not LLM_CONFIG.get('llm_api_key'):
print("\n" + "=" * 60)
print("❌ 错误:LLM配置不完整!")
print("-" * 60)
print("当前 use_llm = True,但缺少必要的API配置。")
print("\n请在 webmainbench/config.py 中完成以下配置:")
print(" 1. llm_base_url (例如: 'https://api.deepseek.com')")
print(" 2. llm_api_key (例如: 'sk-xxxxxxxxxxxx')")
print("\n或者设置 use_llm = False 来禁用LLM功能。")
print("=" * 60 + "\n")
sys.exit(1)

# 验证API有效性
try:
from openai import OpenAI

print("正在验证LLM API配置...")
client = OpenAI(
base_url=LLM_CONFIG.get('llm_base_url'),
api_key=LLM_CONFIG.get('llm_api_key')
)

# 发送测试请求
response = client.chat.completions.create(
model=LLM_CONFIG.get('llm_model', 'deepseek-chat'),
messages=[{"role": "user", "content": "test"}],
max_tokens=5,
temperature=0
)

print("✅ LLM API配置验证成功!\n使用 基础方案➕LLM增强提取效果 进行评测。")

except Exception as e:
print("\n" + "=" * 60)
print("❌ 错误:LLM API配置无效!")
print("-" * 60)
print(f"验证失败原因: {str(e)}")
print("\n请检查 webmainbench/config.py 中的配置:")
print(" 1. llm_base_url 是否正确")
print(" 2. llm_api_key 是否有效")
print(" 3. llm_model 是否支持")
print(" 4. 网络连接是否正常")
print("\n或者设置 use_llm = False 来禁用LLM功能。")
print("=" * 60 + "\n")
sys.exit(1)
else:
# 未启用LLM的提示
print("\n" + "=" * 60)
print("⚠️ 注意:当前未启用LLM增强提取效果功能")
print(" 如需启用LLM增强提取效果,请在 webmainbench/config.py 中配置:")
print(" - 设置 use_llm = True")
print(" - 填写 llm_base_url")
print(" - 填写 llm_api_key")
print("=" * 60)
print(" (5秒后使用基础方案进行对比...)")
time.sleep(5)
print()

def evaluate(self,
dataset: BenchmarkDataset,
extractor: Union[BaseExtractor, str],
extractor_config: Dict[str, Any] = None,
Expand Down
3 changes: 2 additions & 1 deletion webmainbench/metrics/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,8 @@ def _extract_from_markdown(text: str, field_name: str = None) -> Dict[str, str]:
return {'code': '', 'formula': '', 'table': '', 'text': ''}

# 加载 llm 配置
from examples.multi_extractor_compare import LLM_CONFIG
from ..config import LLM_CONFIG

# 直接创建具体的提取器实例
from .code_extractor import CodeSplitter
from .formula_extractor import FormulaSplitter
Expand Down
1 change: 0 additions & 1 deletion webmainbench/metrics/base_content_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def should_use_llm(self, field_name: str) -> bool:

# 默认逻辑:对groundtruth内容不使用LLM,对其他内容使用
if field_name == "groundtruth_content":
print(f"[DEBUG] 检测到groundtruth内容,不使用LLM")
return False
return True

Expand Down