Skip to content

Commit 41355a6

Browse files
committed
Refactor repness smoke test
Similar to how we refactored the "direct PCA" tests
1 parent c3947d1 commit 41355a6

File tree

2 files changed

+145
-110
lines changed

2 files changed

+145
-110
lines changed

delphi/tests/direct_repness_test.py

Lines changed: 0 additions & 110 deletions
This file was deleted.

delphi/tests/test_repness_smoke.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Smoke tests for representativeness calculation with real data.
4+
5+
Tests representativeness functions directly (not through full Conversation class
6+
pipeline) to verify they work in isolation.
7+
8+
⚠️ WARNING: These are smoke tests only - they verify the code runs without
9+
crashing, but do NOT validate correctness or compare against Clojure results.
10+
"""
11+
12+
import pytest
13+
import logging
14+
import sys
15+
import os
16+
from typing import Dict
17+
18+
# Add the parent directory to the path to import the module
19+
sys.path.append(os.path.abspath(os.path.dirname(__file__)))
20+
21+
from polismath.pca_kmeans_rep.repness import conv_repness, participant_stats
22+
from common_utils import create_test_conversation
23+
from dataset_config import list_available_datasets
24+
25+
logger = logging.getLogger(__name__)
26+
27+
28+
class TestRepnessImplementation:
29+
"""
30+
Smoke tests for representativeness implementation with real data.
31+
32+
Tests representativeness functions directly, bypassing full pipeline.
33+
"""
34+
35+
@pytest.fixture(scope="class", autouse=True)
36+
def log_warning(self):
37+
"""Log warning that these are smoke tests only."""
38+
logger.warning(
39+
"⚠️ These tests verify representativeness functions run without crashing, "
40+
"but do NOT validate correctness or compare against Clojure results. "
41+
"For comparison tests, run test_repness_comparison.py manually."
42+
)
43+
44+
@pytest.fixture
45+
def conversation(self, dataset_name: str):
46+
"""Create conversation with PCA and clustering computed."""
47+
logger.debug(f"Creating conversation for {dataset_name}")
48+
conv = create_test_conversation(dataset_name)
49+
50+
logger.debug(f"Participants: {conv.participant_count}, Comments: {conv.comment_count}")
51+
logger.debug(f"Matrix shape: {conv.rating_mat.values.shape}")
52+
53+
# Run PCA and clustering (needed for repness)
54+
logger.debug("Computing PCA and clustering...")
55+
conv._compute_pca()
56+
conv._compute_clusters()
57+
58+
logger.debug(f"Number of clusters: {len(conv.group_clusters)}")
59+
60+
return conv
61+
62+
@pytest.mark.parametrize("dataset_name", list(list_available_datasets().keys()))
63+
def test_repness_runs_without_error(self, dataset_name: str, conversation):
64+
"""Test representativeness calculation runs successfully on real data (smoke test)."""
65+
logger.info(f"Testing representativeness on {dataset_name} dataset")
66+
67+
assert conversation is not None
68+
assert conversation.rating_mat is not None
69+
assert conversation.group_clusters is not None
70+
assert len(conversation.group_clusters) > 0
71+
72+
# Run representativeness calculation
73+
repness_results = conv_repness(conversation.rating_mat, conversation.group_clusters)
74+
75+
assert repness_results is not None
76+
assert 'comment_ids' in repness_results
77+
assert 'group_repness' in repness_results
78+
assert len(repness_results['comment_ids']) > 0
79+
assert len(repness_results['group_repness']) > 0
80+
81+
logger.debug(f"Comment IDs: {len(repness_results['comment_ids'])}")
82+
logger.debug(f"Groups with repness: {len(repness_results['group_repness'])}")
83+
84+
logger.info(f"✓ Representativeness runs without error for {dataset_name}")
85+
86+
@pytest.mark.parametrize("dataset_name", list(list_available_datasets().keys()))
87+
def test_repness_structure(self, dataset_name: str, conversation):
88+
"""Test representativeness results have expected structure."""
89+
logger.debug(f"Testing representativeness structure for {dataset_name}")
90+
91+
repness_results = conv_repness(conversation.rating_mat, conversation.group_clusters)
92+
93+
# Check structure of group_repness
94+
for group_id, comments in repness_results['group_repness'].items():
95+
assert isinstance(comments, list)
96+
assert len(comments) > 0
97+
98+
# Check structure of first comment
99+
if len(comments) > 0:
100+
comment = comments[0]
101+
assert 'comment_id' in comment
102+
assert 'repful' in comment # 'agree', 'disagree', or other type
103+
logger.debug(f"Group {group_id}: {len(comments)} representative comments")
104+
105+
# Check consensus comments if present
106+
if 'consensus_comments' in repness_results:
107+
consensus = repness_results['consensus_comments']
108+
logger.debug(f"Consensus comments: {len(consensus)}")
109+
110+
if len(consensus) > 0:
111+
comment = consensus[0]
112+
assert 'comment_id' in comment
113+
114+
logger.debug("✓ Representativeness structure validated")
115+
116+
@pytest.mark.parametrize("dataset_name", list(list_available_datasets().keys()))
117+
def test_participant_stats(self, dataset_name: str, conversation):
118+
"""Test participant statistics calculation."""
119+
logger.debug(f"Testing participant stats for {dataset_name}")
120+
121+
ptpt_stats = participant_stats(conversation.rating_mat, conversation.group_clusters)
122+
123+
assert ptpt_stats is not None
124+
assert 'participant_ids' in ptpt_stats
125+
assert 'stats' in ptpt_stats
126+
assert len(ptpt_stats['participant_ids']) > 0
127+
assert len(ptpt_stats['stats']) > 0
128+
129+
logger.debug(f"Participant IDs: {len(ptpt_stats['participant_ids'])}")
130+
logger.debug(f"Participants with stats: {len(ptpt_stats['stats'])}")
131+
132+
# Check structure of first participant
133+
sample_id = list(ptpt_stats['stats'].keys())[0]
134+
ptpt_data = ptpt_stats['stats'][sample_id]
135+
136+
assert 'group' in ptpt_data
137+
assert 'n_votes' in ptpt_data
138+
assert 'n_agree' in ptpt_data
139+
assert 'n_disagree' in ptpt_data
140+
assert 'n_pass' in ptpt_data
141+
assert 'group_correlations' in ptpt_data
142+
143+
logger.debug(f"Sample participant {sample_id}: group={ptpt_data['group']}, votes={ptpt_data['n_votes']}")
144+
145+
logger.debug("✓ Participant statistics validated")

0 commit comments

Comments
 (0)