Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions examples/basic_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def quicksort(arr):
return dataset


def demo_basic_evaluation():
def demo_basic_mock_evaluation():
"""演示基本评测流程"""

print("=== WebMainBench 基本使用示例 ===\n")
Expand Down Expand Up @@ -382,12 +382,12 @@ def _extract_content(self, html, url=None):
results_dir = Path("results")
results_dir.mkdir(exist_ok=True)

results_path = results_dir / "evaluation_results.json"
results_path = results_dir / "mock_evaluation_results.json"
DataSaver.save_evaluation_results(result, results_path)
print(f"\n结果已保存到: {results_path}")

# 10. 生成报告
report_path = results_dir / "evaluation_report.csv"
report_path = results_dir / "mock_evaluation_report.csv"
DataSaver.save_summary_report(result, report_path)
print(f"报告已保存到: {report_path}")

Expand Down Expand Up @@ -701,8 +701,8 @@ def hello_world():

if __name__ == "__main__":
try:
demo_basic_evaluation()
# demo_llm_webkit_evaluation() # 使用新的LLM-WebKit评测示例
demo_basic_mock_evaluation()
demo_llm_webkit_evaluation() # 使用LLM-WebKit评测示例
print("\n✅ 示例运行完成!")

except Exception as e:
Expand Down
8 changes: 4 additions & 4 deletions results/llm_webkit_evaluation_results.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"metadata": {
"dataset_name": "llm_webkit_test",
"extractor_name": "llm-webkit",
"timestamp": "2025-07-31T13:52:12.948959",
"timestamp": "2025-07-31T14:59:59.169188",
"total_samples": 3
},
"overall_metrics": {
Expand All @@ -17,7 +17,7 @@
{
"sample_id": "text_code_sample",
"extraction_success": true,
"extraction_time": 3.6406631469726562,
"extraction_time": 3.614135980606079,
"metrics": {
"code_edit": {
"score": 0.488,
Expand Down Expand Up @@ -113,7 +113,7 @@
{
"sample_id": "table_sample",
"extraction_success": true,
"extraction_time": 1.6590700149536133,
"extraction_time": 1.6187489032745361,
"metrics": {
"code_edit": {
"score": 1.0,
Expand Down Expand Up @@ -209,7 +209,7 @@
{
"sample_id": "formula_sample",
"extraction_success": true,
"extraction_time": 1.5354089736938477,
"extraction_time": 1.5252501964569092,
"metrics": {
"code_edit": {
"score": 1.0,
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"metadata": {
"dataset_name": "sample_dataset",
"extractor_name": "mock",
"timestamp": "2025-07-31T14:29:43.477342",
"timestamp": "2025-07-31T14:59:49.917729",
"total_samples": 2
},
"overall_metrics": {
Expand All @@ -17,7 +17,7 @@
{
"sample_id": "sample-001-programming-tutorial",
"extraction_success": true,
"extraction_time": 4.0531158447265625e-06,
"extraction_time": 7.3909759521484375e-06,
"metrics": {
"code_edit": {
"score": 1.0,
Expand Down
2 changes: 1 addition & 1 deletion webmainbench/extractors/jina_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:

return ExtractionResult(
content=content,
content_list=content_list,
# content_list=content_list,
title=title,
# confidence_score=self._calculate_confidence(content, content_list),
success=True
Expand Down
2 changes: 1 addition & 1 deletion webmainbench/extractors/llm_webkit_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -661,7 +661,7 @@ def _extract_content(self, html: str, url: str = None) -> ExtractionResult:
# 创建结果对象
result = ExtractionResult(
content=main_content,
content_list=content_list,
# content_list=content_list,
title=self._extract_title(html),
language=self._detect_language(main_content),
confidence_score=confidence,
Expand Down