Skip to content

Commit

Permalink
[aehrc#104] Implement regression tests (aehrc#109)
Browse files Browse the repository at this point in the history
* Fixed the issue with boostrap sample, but using the actual sample indexes (with repeats) rather than the distinct set of indexes (aehrc#101)

* Tech/104/0.2 (aehrc#105)

* Added regression test cases generation scripts

* Added regression cases and unit test

* Moved execution of regression tests to 'regression-test' profile

* Enabled all regression test cases

* Update command line for regression tests

* Fixed chr22 regression cmd line

* Clean up: removed all splitting classes

* Refactored regression tests to use Parameterized

* Update the regression cases generation scripts
  • Loading branch information
piotrszul authored Apr 10, 2019
1 parent 1e34581 commit 53fdc1b
Show file tree
Hide file tree
Showing 27 changed files with 313 additions and 682 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,4 @@ build
dist
_build
spark-warehouse
.*.crc
4 changes: 2 additions & 2 deletions dev/test-gen-regression-cases.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ PREFIX="CNAE-9"
"${FWDIR}/bin/variant-spark" --spark --master local[2] -- importance -if "${DATA_DIR}/${PREFIX}-wide.csv" -ff "${DATA_DIR}/${PREFIX}-labels.csv" \
-fc "${RESP}" \
-on 100 -of "${OUTPUT_DIR}/${PREFIX}-imp_${RESP}.csv" \
-ivo 10 \
-io "{\"defVariableType\":\"ORDINAL(10)\"}" \
-it csv -v -ro -rn 100 -rbs 50 -sp 4 -sr 17

#
Expand All @@ -49,7 +49,7 @@ for CASE in ${FWDIR}/src/test/data/synth/*-meta.txt; do
"${FWDIR}/bin/variant-spark" --spark --master local[2] -- importance -if "${DATA_DIR}/${PREFIX}-wide.csv" -ff "${DATA_DIR}/${PREFIX}-labels.csv" \
-fc "${RESP}" \
-on 100 -of "${OUTPUT_DIR}/${PREFIX}-imp_${RESP}.csv" \
-ivo ${IVO} \
-io "{\"defVariableType\":\"ORDINAL(${IVO})\"}" \
-it csv -v -ro -rn 100 -rbs 50 -sp 4 -sr 17
done
done

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ protected SplitInfo doFindSplit(int[] splitIndices) {
if (!thisAggregator.isEmpty()) {
// only consider value that appeared at least once in the split
impurityCalc.update(thisAggregator);
double thisImpurity = impurityCalc.getValue(leftRightImpurity);
if (thisImpurity < minImpurity) {
result = new SplitInfo(sp, thisImpurity, leftRightImpurity.left(), leftRightImpurity.right());
minImpurity = thisImpurity;
if (impurityCalc.hasProperSplit()) {
double thisImpurity = impurityCalc.getValue(leftRightImpurity);
if (thisImpurity < minImpurity) {
result = new SplitInfo(sp, thisImpurity, leftRightImpurity.left(), leftRightImpurity.right());
minImpurity = thisImpurity;
}
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@ public SplitInfo doFindSplit(int[] splitIndices) {
impurityCalc.update(i);
}
}
double g = impurityCalc.getValue(leftRightImpurity);
if (g < minImpurity ) {
result = new SplitInfo(sp, g, leftRightImpurity.left(), leftRightImpurity.right());
minImpurity = g;
if (impurityCalc.hasProperSplit()) {
double g = impurityCalc.getValue(leftRightImpurity);
if (g < minImpurity ) {
result = new SplitInfo(sp, g, leftRightImpurity.left(), leftRightImpurity.right());
minImpurity = g;
}
}
}
return result;
Expand Down
10 changes: 10 additions & 0 deletions src/main/scala/au/csiro/variantspark/algo/Split.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,16 @@ trait IndexedSplitAggregator {
left.add(agg)
right.sub(agg)
}

/**
* Is this a valid split that is one that does not put
* all elements to one side
*/
def hasProperSplit:Boolean = !left.isEmpty() && !right.isEmpty()

/**
* Get split impurity value
*/
def getValue(outSplitImp:SplitImpurity):Double = {
left.splitValue(right, outSplitImp)
}
Expand Down
Binary file removed src/test/data/regression/.CNAE-9-imp_category.csv.crc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit 53fdc1b

Please sign in to comment.