Skip to content

Commit 92f7118

Browse files
committed
Added partly written DTStatsAggregator
1 parent fd8df30 commit 92f7118

File tree

3 files changed

+53
-3
lines changed

3 files changed

+53
-3
lines changed
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
18+
package org.apache.spark.mllib.tree.impl
19+
20+
import scala.collection.mutable
21+
22+
23+
/**
24+
* :: Experimental ::
25+
* DecisionTree statistics aggregator.
26+
* This holds a flat array of statistics for a set of (nodes, features, bins)
27+
* and helps with indexing.
28+
* TODO: Allow views of Vector types to replace some of the code in here.
29+
*/
30+
private[tree] class DTStatsAggregator(
31+
val numNodes: Int,
32+
val numFeatures: Int,
33+
val numBins: Array[Int],
34+
val statsSize: Int) {
35+
36+
require(numBins.size == numFeatures, s"DTStatsAggregator was given numBins" +
37+
s" (of size ${numBins.size}) which did not match numFeatures = $numFeatures.")
38+
39+
val featureOffsets: Array[Int] = numBins.scanLeft(0)(_ + _).map(statsSize * _)
40+
41+
val allStatsSize: Int = numNodes * featureOffsets.last * statsSize
42+
43+
val allStats: Array[Double] = new Array[Double](allStatsSize)
44+
45+
// TODO: Make views
46+
/*
47+
Uses:
48+
point access
49+
50+
*/
51+
52+
}

mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Gini.scala

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,6 @@ private[tree] class GiniAggregator(numClasses: Int)
8989
throw new IllegalArgumentException(s"GiniAggregator given label $label" +
9090
s" but requires label < numClasses (= ${counts.size}).")
9191
}
92-
if (label.toInt >= counts.size) {
93-
throw new RuntimeException(s"label = $label, counts = $counts")
94-
}
9592
counts(label.toInt) += 1
9693
}
9794

mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ private[tree] abstract class ImpurityAggregator(statsSize: Int) extends Serializ
113113
/**
114114
* Return the index of the largest element in this array.
115115
* If there are ties, the first maximal element is chosen.
116+
* TODO: Move this elsewhere in Spark?
116117
*/
117118
protected def indexOfLargestArrayElement(array: Array[Double]): Int = {
118119
val result = array.foldLeft(-1, Double.MinValue, 0) {

0 commit comments

Comments
 (0)