@@ -310,12 +310,10 @@ def _apply_moderation(self) -> None:
310310
311311 def _compute_vote_stats (self ) -> None :
312312 """
313- Compute statistics on votes.
313+ Compute statistics on votes using vectorized operations .
314314 """
315- # Make sure pandas is imported
316315 import numpy as np
317- import pandas as pd
318-
316+
319317 # Initialize stats
320318 self .vote_stats = {
321319 'n_votes' : 0 ,
@@ -325,84 +323,65 @@ def _compute_vote_stats(self) -> None:
325323 'comment_stats' : {},
326324 'participant_stats' : {}
327325 }
328-
329- # Get matrix values and ensure they are numeric
326+
330327 try :
331- # Make a clean copy that's definitely numeric
328+ # Get clean numeric matrix
332329 clean_mat = self ._get_clean_matrix ()
333- # TODO: we can probably count without needing to convert to numpy array
334330 values = clean_mat .to_numpy ()
335331
336- # Count votes safely
332+ # Create boolean masks once for the entire matrix.
333+ # These are 2D arrays of the same shape as values.
334+ non_null_mask = ~ np .isnan (values )
335+ agree_mask = np .abs (values - 1.0 ) < 0.001 # Close to 1
336+ disagree_mask = np .abs (values + 1.0 ) < 0.001 # Close to -1
337+
338+ # Global stats: sum over entire matrix
337339 try :
338- # Create masks, handling non-numeric data
339- non_null_mask = ~ np .isnan (values )
340- agree_mask = np .abs (values - 1.0 ) < 0.001 # Close to 1
341- disagree_mask = np .abs (values + 1.0 ) < 0.001 # Close to -1
342-
343340 self .vote_stats ['n_votes' ] = int (np .sum (non_null_mask ))
344341 self .vote_stats ['n_agree' ] = int (np .sum (agree_mask ))
345342 self .vote_stats ['n_disagree' ] = int (np .sum (disagree_mask ))
346- self .vote_stats ['n_pass' ] = int (np .sum (np . isnan ( values ) ))
343+ self .vote_stats ['n_pass' ] = int (np .sum (~ non_null_mask ))
347344 except Exception as e :
348- logger .error (f"Error counting votes: { e } " )
349- # Set defaults if counting fails
350- self .vote_stats ['n_votes' ] = 0
351- self .vote_stats ['n_agree' ] = 0
352- self .vote_stats ['n_disagree' ] = 0
353- self .vote_stats ['n_pass' ] = 0
354-
355- # Compute comment stats
356- for i , cid in enumerate (clean_mat .columns ):
357- if i >= values .shape [1 ]:
358- continue
359-
360- try :
361- col = values [:, i ]
362- n_votes = np .sum (~ np .isnan (col ))
363- n_agree = np .sum (np .abs (col - 1.0 ) < 0.001 )
364- n_disagree = np .sum (np .abs (col + 1.0 ) < 0.001 )
365-
366- self .vote_stats ['comment_stats' ][cid ] = {
367- 'n_votes' : int (n_votes ),
368- 'n_agree' : int (n_agree ),
369- 'n_disagree' : int (n_disagree ),
370- 'agree_ratio' : float (n_agree / max (n_votes , 1 ))
371- }
372- except Exception as e :
373- logger .error (f"Error computing stats for comment { cid } : { e } " )
345+ logger .error (f"Error counting global votes: { e } " )
346+
347+ # Per-comment stats: sum along axis=0 (columns).
348+ # axis=0 sums over rows, giving one value per column (comment).
349+ try :
350+ comment_n_votes = np .sum (non_null_mask , axis = 0 )
351+ comment_n_agree = np .sum (agree_mask , axis = 0 )
352+ comment_n_disagree = np .sum (disagree_mask , axis = 0 )
353+ # Avoid division by zero: use np.maximum to ensure denominator >= 1
354+ comment_agree_ratio = comment_n_agree / np .maximum (comment_n_votes , 1 )
355+
356+ # Build comment_stats dict from the arrays.
357+ for i , cid in enumerate (clean_mat .columns ):
374358 self .vote_stats ['comment_stats' ][cid ] = {
375- 'n_votes' : 0 ,
376- 'n_agree' : 0 ,
377- 'n_disagree' : 0 ,
378- 'agree_ratio' : 0.0
359+ 'n_votes' : int ( comment_n_votes [ i ]) ,
360+ 'n_agree' : int ( comment_n_agree [ i ]) ,
361+ 'n_disagree' : int ( comment_n_disagree [ i ]) ,
362+ 'agree_ratio' : float ( comment_agree_ratio [ i ])
379363 }
380-
381- # Compute participant stats
382- for i , pid in enumerate (clean_mat .index ):
383- if i >= values .shape [0 ]:
384- continue
385-
386- try :
387- row = values [i , :]
388- n_votes = np .sum (~ np .isnan (row ))
389- n_agree = np .sum (np .abs (row - 1.0 ) < 0.001 )
390- n_disagree = np .sum (np .abs (row + 1.0 ) < 0.001 )
391-
392- self .vote_stats ['participant_stats' ][pid ] = {
393- 'n_votes' : int (n_votes ),
394- 'n_agree' : int (n_agree ),
395- 'n_disagree' : int (n_disagree ),
396- 'agree_ratio' : float (n_agree / max (n_votes , 1 ))
397- }
398- except Exception as e :
399- logger .error (f"Error computing stats for participant { pid } : { e } " )
364+ except Exception as e :
365+ logger .error (f"Error computing comment stats: { e } " )
366+
367+ # Per-participant stats: sum along axis=1 (rows).
368+ # axis=1 sums over columns, giving one value per row (participant).
369+ try :
370+ ptpt_n_votes = np .sum (non_null_mask , axis = 1 )
371+ ptpt_n_agree = np .sum (agree_mask , axis = 1 )
372+ ptpt_n_disagree = np .sum (disagree_mask , axis = 1 )
373+ ptpt_agree_ratio = ptpt_n_agree / np .maximum (ptpt_n_votes , 1 )
374+
375+ # Build participant_stats dict from the arrays.
376+ for i , pid in enumerate (clean_mat .index ):
400377 self .vote_stats ['participant_stats' ][pid ] = {
401- 'n_votes' : 0 ,
402- 'n_agree' : 0 ,
403- 'n_disagree' : 0 ,
404- 'agree_ratio' : 0.0
378+ 'n_votes' : int ( ptpt_n_votes [ i ]) ,
379+ 'n_agree' : int ( ptpt_n_agree [ i ]) ,
380+ 'n_disagree' : int ( ptpt_n_disagree [ i ]) ,
381+ 'agree_ratio' : float ( ptpt_agree_ratio [ i ])
405382 }
383+ except Exception as e :
384+ logger .error (f"Error computing participant stats: { e } " )
406385 except Exception as e :
407386 logger .error (f"Error in vote stats computation: { e } " )
408387 # Initialize with empty stats if computation fails
0 commit comments