Skip to content
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package ml.combust.mleap.core.classification

import com.microsoft.ml.spark.lightgbm.LightGBMBooster
import org.apache.spark.ml.linalg.{Vector, Vectors}

object LightGBMClassifierModel{
def apply(model: String,
labelColName: String,
featuresColName: String,
predictionColName: String,
probColName: String,
rawPredictionColName: String,
actualNumClasses: Int): LightGBMClassifierModel = LightGBMClassifierModel(
model, labelColName, featuresColName, predictionColName, probColName, rawPredictionColName,
actualNumClasses = actualNumClasses)
}

case class LightGBMClassifierModel(
override val booster: LightGBMBooster,
override val labelColName: String,
override val featuresColName: String,
override val predictionColName: String,
override val probColName: String,
override val rawPredictionColName: String,
override val thresholdValues: Option[Seq[Double]],
override val actualNumClasses: Int)
extends ProbabilisticClassificationModel with LightGBMClassifierModelBase with Serializable {
override val numClasses: Int = actualNumClasses

override def rawToProbabilityInPlace(raw: Vector): Vector = {
throw new NotImplementedError("Unexpected error in LightGBMClassificationModel:" +
" raw2probabilityInPlace should not be called!")
}

override def predictRaw(features: Vector): Vector = {
Vectors.dense(booster.score(features, true, true))
}

override def predictProbabilities(features: Vector): Vector = {
Vectors.dense(booster.score(features, false, true))
}

override def predict(features: Vector): Double = {
rawToPrediction(predictRaw(features))
}

override val numFeatures: Int = 0
}

trait LightGBMClassifierModelBase {
def booster: LightGBMBooster
def labelColName: String
def featuresColName: String
def predictionColName: String
def probColName: String
def rawPredictionColName: String
def thresholdValues: Option[Seq[Double]]
def actualNumClasses: Int
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package ml.combust.mleap.core.regression

import com.microsoft.ml.spark.lightgbm.LightGBMBooster
import ml.combust.mleap.core.Model
import ml.combust.mleap.core.types.{ScalarType, StructType, TensorType}
import org.apache.spark.ml.linalg.{Vector, Vectors}

object LightGBMRegressionModel{
def apply(model: String,
featuresColName: String,
predictionColName: String): LightGBMRegressionModel = LightGBMRegressionModel(
model, featuresColName, predictionColName)
}

case class LightGBMRegressionModel(
override val booster: LightGBMBooster,
override val featuresColName: String,
override val predictionColName: String)
extends LightGBMRegressionModelBase with Model {

override def inputSchema: StructType = StructType("features" -> TensorType.Double()).get

override def outputSchema: StructType = StructType("prediction" -> ScalarType.Double.nonNullable).get

def predict(features: Vector): Double = {
booster.score(features, false, false)(0)
}
}

trait LightGBMRegressionModelBase {
def booster: LightGBMBooster
def featuresColName: String
def predictionColName: String
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package ml.combust.mleap.bundle.ops.classification

import com.microsoft.ml.spark.lightgbm.LightGBMBooster
import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl.{Model, Value}
import ml.combust.bundle.op.OpModel
import ml.combust.mleap.bundle.ops.MleapOp
import ml.combust.mleap.core.classification.LightGBMClassifierModel
import ml.combust.mleap.runtime.MleapContext
import ml.combust.mleap.runtime.transformer.classification.LightGBMClassifier

class LightGBMClassificationModelOp extends MleapOp[LightGBMClassifier, LightGBMClassifierModel] {
override val Model: OpModel[MleapContext, LightGBMClassifierModel] =
new OpModel[MleapContext, LightGBMClassifierModel] {
override val klazz: Class[LightGBMClassifierModel] = classOf[LightGBMClassifierModel]

override def opName: String = "lightgbm_classifier"

override def store(model: Model, obj: LightGBMClassifierModel)(
implicit context: BundleContext[MleapContext]): Model = {
model
.withValue("booster", Value.string(obj.booster.model))
.withValue("labelColName", Value.string(obj.labelColName))
.withValue("featuresColName", Value.string(obj.featuresColName))
.withValue("predictionColName", Value.string(obj.predictionColName))
.withValue("probColName", Value.string(obj.probColName))
.withValue(
"rawPredictionColName",
Value.string(obj.rawPredictionColName)
)
.withValue(
"thresholdValues",
obj.thresholds.map(_.toSeq).map(Value.doubleList)
).withValue("actualNumClasses", Value.int(obj.numClasses))
}

override def load(model: Model)(implicit context: BundleContext[MleapContext]): LightGBMClassifierModel =
{
val booster = new LightGBMBooster(model.value("booster").getString)
new LightGBMClassifierModel(
booster,
model.value("labelColName").getString,
model.value("featuresColName").getString,
model.value("predictionColName").getString,
model.value("probColName").getString,
model.value("rawPredictionColName").getString,
model.getValue("thresholdValues").map(_.getDoubleList.toArray),
model.value("actualNumClasses").getInt
)

}
}

override def model(node: LightGBMClassifier): LightGBMClassifierModel = node.model
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package ml.combust.mleap.bundle.ops.regression

import com.microsoft.ml.spark.lightgbm.LightGBMBooster
import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl.{Model, Value}
import ml.combust.bundle.op.OpModel
import ml.combust.mleap.bundle.ops.MleapOp
import ml.combust.mleap.core.regression.LightGBMRegressionModel
import ml.combust.mleap.runtime.MleapContext
import ml.combust.mleap.runtime.transformer.regression.LightGBMRegression

class LightGBMRegressionModelOp extends MleapOp[LightGBMRegression, LightGBMRegressionModel] {
override val Model: OpModel[MleapContext, LightGBMRegressionModel] =
new OpModel[MleapContext, LightGBMRegressionModel] {
override val klazz: Class[LightGBMRegressionModel] = classOf[LightGBMRegressionModel]

override def opName: String = "lightgbm_regression"

override def store(model: Model, obj: LightGBMRegressionModel)(
implicit context: BundleContext[MleapContext]): Model = {
model
.withValue("booster", Value.string(obj.booster.model))
.withValue("featuresColName", Value.string(obj.featuresColName))
.withValue("predictionColName", Value.string(obj.predictionColName))
}

override def load(model: Model)(implicit context: BundleContext[MleapContext]): LightGBMRegressionModel =
{
val booster = new LightGBMBooster(model.value("booster").getString)
new LightGBMRegressionModel(
booster,
model.value("featuresColName").getString,
model.value("predictionColName").getString)
}
}

override def model(node: LightGBMRegression): LightGBMRegressionModel = node.model
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package ml.combust.mleap.runtime.transformer.classification

import ml.combust.mleap.core.annotation.SparkCode
import ml.combust.mleap.core.classification.LightGBMClassifierModel
import ml.combust.mleap.core.types.NodeShape
import ml.combust.mleap.core.util.VectorConverters._
import ml.combust.mleap.runtime.frame.{MultiTransformer, Row, Transformer}
import ml.combust.mleap.runtime.function.UserDefinedFunction
import ml.combust.mleap.tensor.Tensor
import org.apache.spark.ml.linalg.{Vector, Vectors}

@SparkCode(uri = "https://github.com/Azure/mmlspark/blob/f07e5584459e909223a470e6d2e11135b292f3ea/" +
"src/main/scala/com/microsoft/ml/spark/lightgbm/LightGBMClassifier.scala")
case class LightGBMClassifier(override val uid: String = Transformer.uniqueName("lightgbm_classifier"),
override val shape: NodeShape,
override val model: LightGBMClassifierModel) extends MultiTransformer {
override val exec: UserDefinedFunction = {
val f = (features: Tensor[Double]) => {

if (model.thresholdValues.isDefined) {
require(model.thresholdValues.get.length == model.numClasses, this.getClass.getSimpleName +
".transform() called with non-matching numClasses and thresholds.length." +
s" numClasses=$model.numClasses, but thresholds has length ${model.thresholdValues.get.length}")
}

val rawPrediction: Vector =
if (shape.getOutput("raw_prediction").nonEmpty)
model.predictRaw(features)
else
Vectors.dense(Array.empty[Double])

val probability: Vector =
if (shape.getOutput("probability").nonEmpty)
model.predictProbabilities(features)
else
Vectors.dense(Array.empty[Double])

val prediction =
if (shape.getOutput("prediction").isDefined) {
if (shape.getOutput("raw_prediction").nonEmpty && model.thresholdValues.isEmpty) {
// Note: Only call raw2prediction if thresholds not defined
model.rawToPrediction(rawPrediction)
} else if (shape.getOutput("prediction").nonEmpty) {
model.probabilityToPrediction(probability)
} else {
model.predict(features)
}
}
else
Double.NaN

Row(rawPrediction: Tensor[Double], probability: Tensor[Double], prediction)
}
UserDefinedFunction(f, outputSchema, inputSchema)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package ml.combust.mleap.runtime.transformer.regression

import ml.combust.mleap.core.regression.LightGBMRegressionModel
import ml.combust.mleap.core.types._
import ml.combust.mleap.core.util.VectorConverters._
import ml.combust.mleap.runtime.frame.{MultiTransformer, Row, Transformer}
import ml.combust.mleap.runtime.function.UserDefinedFunction
import ml.combust.mleap.tensor.Tensor

case class LightGBMRegression(override val uid: String = Transformer.uniqueName("lightgbm_regression"),
override val shape: NodeShape,
override val model: LightGBMRegressionModel) extends MultiTransformer {
override val exec: UserDefinedFunction = {
val f = (features: Tensor[Double]) => {
val prediction = model.predict(features)
Row(prediction: Double)
}
UserDefinedFunction(f, outputSchema, inputSchema)
}
}
2 changes: 2 additions & 0 deletions mleap-spark/src/main/resources/reference.conf
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ ml.combust.mleap.spark.registry.builtin-ops = [
"org.apache.spark.ml.bundle.ops.classification.MultiLayerPerceptronClassifierOp",
"org.apache.spark.ml.bundle.ops.classification.OneVsRestOp",
"org.apache.spark.ml.bundle.ops.classification.RandomForestClassifierOp",
"org.apache.spark.ml.bundle.ops.classification.LightGBMClassificationModelOp",

"org.apache.spark.ml.bundle.ops.clustering.GaussianMixtureOp",
"org.apache.spark.ml.bundle.ops.clustering.KMeansOp",
Expand Down Expand Up @@ -42,6 +43,7 @@ ml.combust.mleap.spark.registry.builtin-ops = [
"org.apache.spark.ml.bundle.ops.regression.IsotonicRegressionOp",
"org.apache.spark.ml.bundle.ops.regression.LinearRegressionOp",
"org.apache.spark.ml.bundle.ops.regression.RandomForestRegressionOp",
"org.apache.spark.ml.bundle.ops.regression.LightGBMRegressionModelOp",

"org.apache.spark.ml.bundle.ops.recommendation.ALSOp",

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
package org.apache.spark.ml.bundle.ops.classification

import com.microsoft.ml.spark.lightgbm.{LightGBMBooster, LightGBMClassificationModel}
import ml.combust.bundle.BundleContext
import ml.combust.bundle.dsl.{Model, NodeShape, Value}
import ml.combust.bundle.op.OpModel
import org.apache.spark.ml.bundle._

class LightGBMClassificationModelOp
extends SimpleSparkOp[LightGBMClassificationModel] {
override def sparkInputs(obj: LightGBMClassificationModel): Seq[ParamSpec] = {
Seq("features" -> obj.featuresCol)
}
override def sparkOutputs(obj: LightGBMClassificationModel): Seq[ParamSpec] = {
Seq(
"raw_prediction" -> obj.rawPredictionCol,
"probability" -> obj.probabilityCol,
"prediction" -> obj.predictionCol
)
}
override def sparkLoad(
uid: String,
shape: NodeShape,
model: LightGBMClassificationModel
): LightGBMClassificationModel = {
val booster = new LightGBMBooster(model.getModel.model)
new LightGBMClassificationModel(
"",
booster,
model.getLabelCol,
model.getFeaturesCol,
model.getPredictionCol,
model.getProbabilityCol,
model.getRawPredictionCol,
Some(model.getThresholds),
model.numClasses
)
}

override val Model: OpModel[SparkBundleContext, LightGBMClassificationModel] =
new OpModel[SparkBundleContext, LightGBMClassificationModel] {
override val klazz: Class[LightGBMClassificationModel] =
classOf[LightGBMClassificationModel]
override def opName: String = "lightgbm_classifier"
override def store(model: Model, obj: LightGBMClassificationModel)(
implicit context: BundleContext[SparkBundleContext]
): Model = {
assert(
context.context.dataset.isDefined,
BundleHelper.sampleDataframeMessage(klazz)
)
val thresholds = if (obj.isSet(obj.thresholds)) {
Some(obj.getThresholds)
} else None

model
.withValue("booster", Value.string(obj.getModel.model))
.withValue("labelColName", Value.string(obj.getLabelCol))
.withValue("featuresColName", Value.string(obj.getFeaturesCol))
.withValue("predictionColName", Value.string(obj.getPredictionCol))
.withValue("probColName", Value.string(obj.getProbabilityCol))
.withValue("rawPredictionColName", Value.string(obj.getRawPredictionCol))
.withValue("thresholdValues", thresholds.map(_.toSeq).map(Value.doubleList))
.withValue("actualNumClasses", Value.int(obj.numClasses))
}

override def load(model: Model)(
implicit context: BundleContext[SparkBundleContext]
): LightGBMClassificationModel = {
val booster = new LightGBMBooster(model.value("booster").getString)
new LightGBMClassificationModel(
"",
booster,
model.value("labelColName").getString,
model.value("featuresColName").getString,
model.value("predictionColName").getString,
model.value("probColName").getString,
model.value("rawPredictionColName").getString,
model.getValue("thresholdValues").map(_.getDoubleList.toArray),
model.value("actualNumClasses").getInt
)
}
}
}
Loading