Skip to content

Commit fbad722

Browse files
committed
[SPARK-3077][MLLIB] fix some chisq-test
- promote nullHypothesis field in ChiSqTestResult to TestResult. Every test should have a null hypothesis - correct null hypothesis statement for independence test - p-value: 0.01 -> 0.1 Author: Xiangrui Meng <meng@databricks.com> Closes #1982 from mengxr/fix-chisq and squashes the following commits: 5f0de02 [Xiangrui Meng] make ChiSqTestResult constructor package private bc74ea1 [Xiangrui Meng] update chisq-test
1 parent bc95fe0 commit fbad722

File tree

2 files changed

+17
-13
lines changed

2 files changed

+17
-13
lines changed

mllib/src/main/scala/org/apache/spark/mllib/stat/test/ChiSqTest.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ private[stat] object ChiSqTest extends Logging {
5656
object NullHypothesis extends Enumeration {
5757
type NullHypothesis = Value
5858
val goodnessOfFit = Value("observed follows the same distribution as expected.")
59-
val independence = Value("observations in each column are statistically independent.")
59+
val independence = Value("the occurrence of the outcomes is statistically independent.")
6060
}
6161

6262
// Method identification based on input methodName string

mllib/src/main/scala/org/apache/spark/mllib/stat/test/TestResult.scala

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,11 @@ trait TestResult[DF] {
4444
*/
4545
def statistic: Double
4646

47+
/**
48+
* Null hypothesis of the test.
49+
*/
50+
def nullHypothesis: String
51+
4752
/**
4853
* String explaining the hypothesis test result.
4954
* Specific classes implementing this trait should override this method to output test-specific
@@ -53,13 +58,13 @@ trait TestResult[DF] {
5358

5459
// String explaining what the p-value indicates.
5560
val pValueExplain = if (pValue <= 0.01) {
56-
"Very strong presumption against null hypothesis."
61+
s"Very strong presumption against null hypothesis: $nullHypothesis."
5762
} else if (0.01 < pValue && pValue <= 0.05) {
58-
"Strong presumption against null hypothesis."
59-
} else if (0.05 < pValue && pValue <= 0.01) {
60-
"Low presumption against null hypothesis."
63+
s"Strong presumption against null hypothesis: $nullHypothesis."
64+
} else if (0.05 < pValue && pValue <= 0.1) {
65+
s"Low presumption against null hypothesis: $nullHypothesis."
6166
} else {
62-
"No presumption against null hypothesis."
67+
s"No presumption against null hypothesis: $nullHypothesis."
6368
}
6469

6570
s"degrees of freedom = ${degreesOfFreedom.toString} \n" +
@@ -70,19 +75,18 @@ trait TestResult[DF] {
7075

7176
/**
7277
* :: Experimental ::
73-
* Object containing the test results for the chi squared hypothesis test.
78+
* Object containing the test results for the chi-squared hypothesis test.
7479
*/
7580
@Experimental
76-
class ChiSqTestResult(override val pValue: Double,
81+
class ChiSqTestResult private[stat] (override val pValue: Double,
7782
override val degreesOfFreedom: Int,
7883
override val statistic: Double,
7984
val method: String,
80-
val nullHypothesis: String) extends TestResult[Int] {
85+
override val nullHypothesis: String) extends TestResult[Int] {
8186

8287
override def toString: String = {
83-
"Chi squared test summary: \n" +
84-
s"method: $method \n" +
85-
s"null hypothesis: $nullHypothesis \n" +
86-
super.toString
88+
"Chi squared test summary:\n" +
89+
s"method: $method\n" +
90+
super.toString
8791
}
8892
}

0 commit comments

Comments
 (0)