-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvalidate_metrics.py
163 lines (137 loc) · 4.95 KB
/
validate_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
"""
Validation script for cognitive metrics.
This script validates our cognitive metrics implementation,
following British English standards and proper type safety.
"""
import os
import sys
from pathlib import Path
# Add project root to Python path
project_root = Path(__file__).parent
sys.path.insert(0, str(project_root))
def validate_imports():
"""Validate core module imports."""
try:
print("\nValidating Core Imports:")
# Test spaCy
print("Importing spaCy...", end=" ")
import spacy
print(f"✓ (version {spacy.__version__})")
# Test core modules
print("Importing cognitive metrics...", end=" ")
from src.metrics.cognitive_metrics import (
ReasoningEvaluator,
InstructionFollowingEvaluator,
CognitiveBiasEvaluator
)
print("✓")
print("Importing evaluation protocols...", end=" ")
from src.evaluation_protocols import (
TestCase,
MetricResult,
MetricCategory
)
print("✓")
return True
except Exception as e:
print(f"\n✗ Import validation failed: {e}")
return False
def validate_nlp():
"""Validate NLP functionality."""
try:
print("\nValidating NLP Components:")
# Load model
print("Loading English model...", end=" ")
import spacy
nlp = spacy.load("en_core_web_sm")
print("✓")
# Test text processing
test_text = """
First, let's examine the evidence carefully. According to recent studies,
the approach shows promising results. However, we must consider some
limitations. For example, the sample size was relatively small.
Therefore, while the data suggests positive outcomes, further validation
would be beneficial.
"""
print("Processing test text...", end=" ")
doc = nlp(test_text)
print("✓")
# Test pattern matching
print("\nTesting Pattern Recognition:")
patterns = {
"Logical Steps": [
"first", "therefore", "while"
],
"Evidence": [
"according to", "studies", "data suggests"
],
"Counterarguments": [
"however", "limitations"
]
}
for category, terms in patterns.items():
matches = [term for term in terms if term.lower() in test_text.lower()]
if matches:
print(f"✓ {category}: Found {len(matches)} patterns")
print(f" - {', '.join(matches)}")
return True
except Exception as e:
print(f"\n✗ NLP validation failed: {e}")
return False
def validate_metrics():
"""Validate metrics implementation."""
try:
print("\nValidating Metrics Implementation:")
# Import required modules
from src.metrics.cognitive_metrics import ReasoningEvaluator
from src.evaluation_protocols import TestCase
# Create mock environment
class MockEnv:
async def get_model_response(self, model_id, test_case):
return (
"First, let's examine the evidence. According to studies, "
"the results are promising. However, we must consider "
"limitations. Therefore, further validation is needed."
)
# Create evaluator instance
print("Initialising ReasoningEvaluator...", end=" ")
evaluator = ReasoningEvaluator(MockEnv())
print("✓")
# Validate component structure
print("Validating evaluator structure...", end=" ")
assert hasattr(evaluator, '_extract_reasoning_components')
assert hasattr(evaluator, '_analyse_reasoning_quality')
print("✓")
return True
except Exception as e:
print(f"\n✗ Metrics validation failed: {e}")
return False
def main():
"""Run validation checks."""
print("Cognitive Metrics Framework Validation")
print("====================================")
try:
# Run validation checks
checks = [
("Core Imports", validate_imports),
("NLP Components", validate_nlp),
("Metrics Implementation", validate_metrics)
]
failed = False
for name, check in checks:
if not check():
print(f"\n✗ {name} validation failed")
failed = True
break
if not failed:
print("\n✓ All validation checks passed!")
return 0
return 1
except Exception as e:
print(f"\n✗ Validation failed: {e}")
import traceback
print("\nTraceback:")
traceback.print_exc()
return 1
if __name__ == "__main__":
sys.exit(main())