1+ from math import isnan
2+
13import pytest
24from helpers import is_within_expected
35from statsmodels .stats .proportion import proportions_ztest
4- from test_helpers import next_sample_size , next_success_rate
6+ from test_helpers import (
7+ next_sample_size_no_failure ,
8+ next_sample_size_via_loop_with_1_failure ,
9+ next_sample_size_with_1_failure ,
10+ next_success_rate ,
11+ )
512
613
714def test_proportions_ztest_improvement ():
@@ -19,6 +26,14 @@ def test_proportions_ztest_exact_match():
1926 assert p_value == 1.0 , "statistically insignificant result"
2027 assert stat == 0
2128
29+ stat , p_value = proportions_ztest (7 , 10 , 0.7 , prop_var = 1 )
30+ assert isnan (p_value )
31+ assert isnan (stat )
32+
33+ stat , p_value = proportions_ztest (1 , 10 , 0.7 , prop_var = 0.5 )
34+ assert p_value == pytest .approx (0.00014 , rel = 0.1 )
35+ assert stat == pytest .approx (- 3.79 , rel = 0.01 )
36+
2237
2338def test_proportions_ztest_significantly_better ():
2439 stat , p_value = proportions_ztest (9 , 10 , 0.7 )
@@ -58,7 +73,7 @@ def calculate_ztest(success, failure, sample_size) -> tuple[float, float]:
5873
5974
6075def is_statistically_significant (success , failure , sample_size ):
61- return calculate_p_value (success , failure , sample_size ) < 0.05
76+ return calculate_p_value (success , failure , sample_size ) <= 0.05
6277
6378
6479def test_not_is_statistically_significant ():
@@ -79,7 +94,7 @@ def test_is_statistically_significant_with_next_success_rate():
7994 sample_size = 10
8095 assert is_statistically_significant (next_success_rate (sample_size ), 0 , sample_size )
8196 assert is_statistically_significant (
82- next_success_rate (sample_size ), 0 , next_sample_size (sample_size )
97+ next_success_rate (sample_size ), 0 , next_sample_size_with_1_failure (sample_size )
8398 )
8499 assert is_statistically_significant (next_success_rate (35 ), 0 , 109 )
85100
@@ -90,39 +105,38 @@ def test_example_on_wiki():
90105 assert is_within_expected (success_rate , 1 , sample_size )
91106 assert not is_statistically_significant (success_rate , 1 , sample_size )
92107 next_rate = next_success_rate (sample_size )
93- next_size = next_sample_size (sample_size )
94- assert next_size == 193
108+ next_size = next_sample_size_no_failure (sample_size )
109+ assert next_sample_size_via_loop_with_1_failure (sample_size ) == 193
110+ assert next_size == 97
95111 assert next_rate == pytest .approx (0.98 , rel = 0.01 )
96112
97113 assert not is_within_expected (0.95 , 1 , next_size )
98114 assert not is_within_expected (next_rate , 0 , next_size )
99- assert not is_within_expected (next_rate , 1 , next_size )
100- assert is_within_expected (next_rate , 2 , next_size )
115+ assert is_within_expected (next_rate , 1 , next_size )
101116
102117 assert is_statistically_significant (next_rate , 0 , next_size )
103- assert is_statistically_significant (next_rate , 1 , next_size )
104- assert not is_statistically_significant (next_rate , 2 , next_size )
118+ assert not is_statistically_significant (next_rate , 1 , next_size )
105119
106120
107121def test_compare_is_within_expected_and_is_statistically_significant ():
108122 assert is_within_expected (0.7 , 3 , 10 ), "not significant result for 3/10=70%"
109123 assert not is_statistically_significant (0.7 , 3 , 10 ), "not significant for 3/10=70%"
110124
111125 assert is_within_expected (0.7 , 0 , 3 ), "not significant result for 0 out of 3"
112- assert not is_statistically_significant (0.7 , 0 , 3 ), "not significant result for 0 out of 3"
126+ assert is_statistically_significant (0.7 , 0 , 1000 ), "not significant result for 0 out of 3"
113127
114128
115129def test_improvement_from_70_percent ():
116130 assert is_within_expected (0.7 , 0 , 3 ), "no improvement detected at 3"
117- assert not is_statistically_significant (0.7 , 0 , 10 ), "no improvement detected at 10"
131+ assert is_statistically_significant (0.7 , 0 , 10 ), "no improvement detected at 10"
118132
119133 assert not is_within_expected (0.7 , 0 , 4 ), "improvement detected at 4"
120134 assert is_statistically_significant (0.7 , 0 , 11 ), "improvement detected at 11"
121135
122136
123137def test_improvement_from_97_percent ():
124138 assert is_within_expected (0.97 , 0 , 66 ), "no improvement detected at 66"
125- assert not is_statistically_significant (0.97 , 0 , 100 ), "no improvement detected at 100"
139+ assert is_statistically_significant (0.97 , 0 , 100 ), "no improvement detected at 100"
126140
127141 assert not is_within_expected (0.97 , 0 , 67 ), "significantly better at 67"
128142 assert is_statistically_significant (0.97 , 0 , 101 ), "significantly better at 101"
0 commit comments