You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
I get the following exception in my local PySpark Pipeline when I try to serialize the model using MLeap
Traceback (most recent call last):
File ".\examples\src\main\python\ml\random_forest_classifier_example.py", line 88, in <module>
model.serializeToBundle("jar:file:/Users/fwollsch/Downloads/test.zip", model.transform(trainingData))
File "C:\Program Files\Python37\lib\site-packages\mleap\pyspark\spark_support.py", line 25, in serializeToBundle
serializer.serializeToBundle(self, path, dataset=dataset)
File "C:\Program Files\Python37\lib\site-packages\mleap\pyspark\spark_support.py", line 42, in serializeToBundle
self._java_obj.serializeToBundle(transformer._to_java(), path, dataset._jdf)
File "C:\Program Files\Python37\lib\site-packages\py4j\java_gateway.py", line 1257, in __call__
answer, self.gateway_client, self.target_id, self.name)
File "C:\Program Files\Python37\lib\site-packages\pyspark\sql\utils.py", line 63, in deco
return f(*a, **kw)
File "C:\Program Files\Python37\lib\site-packages\py4j\protocol.py", line 328, in get_return_value
format(target_id, ".", name), value)
py4j.protocol.Py4JJavaError: An error occurred while calling o408.serializeToBundle.
: java.lang.NoClassDefFoundError: scalapb/Message
at java.lang.ClassLoader.defineClass1(Native Method)
at java.lang.ClassLoader.defineClass(ClassLoader.java:763)
at java.security.SecureClassLoader.defineClass(SecureClassLoader.java:142)
at java.net.URLClassLoader.defineClass(URLClassLoader.java:467)
at java.net.URLClassLoader.access$100(URLClassLoader.java:73)
at java.net.URLClassLoader$1.run(URLClassLoader.java:368)
at java.net.URLClassLoader$1.run(URLClassLoader.java:362)
at java.security.AccessController.doPrivileged(Native Method)
at java.net.URLClassLoader.findClass(URLClassLoader.java:361)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:335)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
at ml.combust.bundle.dsl.Value$.stringList(Value.scala:207)
at org.apache.spark.ml.bundle.ops.feature.StringIndexerOp$$anon$1.store(StringIndexerOp.scala:20)
at org.apache.spark.ml.bundle.ops.feature.StringIndexerOp$$anon$1.store(StringIndexerOp.scala:13)
at ml.combust.bundle.serializer.ModelSerializer$$anonfun$write$1.apply(ModelSerializer.scala:87)
at ml.combust.bundle.serializer.ModelSerializer$$anonfun$write$1.apply(ModelSerializer.scala:83)
at scala.util.Try$.apply(Try.scala:192)
at ml.combust.bundle.serializer.ModelSerializer.write(ModelSerializer.scala:83)
at ml.combust.bundle.serializer.NodeSerializer$$anonfun$write$1.apply(NodeSerializer.scala:85)
at ml.combust.bundle.serializer.NodeSerializer$$anonfun$write$1.apply(NodeSerializer.scala:81)
at scala.util.Try$.apply(Try.scala:192)
at ml.combust.bundle.serializer.NodeSerializer.write(NodeSerializer.scala:81)
at ml.combust.bundle.serializer.GraphSerializer$$anonfun$writeNode$1.apply(GraphSerializer.scala:34)
at ml.combust.bundle.serializer.GraphSerializer$$anonfun$writeNode$1.apply(GraphSerializer.scala:30)
at scala.util.Try$.apply(Try.scala:192)
at ml.combust.bundle.serializer.GraphSerializer.writeNode(GraphSerializer.scala:30)
at ml.combust.bundle.serializer.GraphSerializer$$anonfun$write$2.apply(GraphSerializer.scala:21)
at ml.combust.bundle.serializer.GraphSerializer$$anonfun$write$2.apply(GraphSerializer.scala:21)
at scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57)
at scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66)
at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35)
at ml.combust.bundle.serializer.GraphSerializer.write(GraphSerializer.scala:20)
at org.apache.spark.ml.bundle.ops.PipelineOp$$anon$1.store(PipelineOp.scala:21)
at org.apache.spark.ml.bundle.ops.PipelineOp$$anon$1.store(PipelineOp.scala:14)
at ml.combust.bundle.serializer.ModelSerializer$$anonfun$write$1.apply(ModelSerializer.scala:87)
at ml.combust.bundle.serializer.ModelSerializer$$anonfun$write$1.apply(ModelSerializer.scala:83)
at scala.util.Try$.apply(Try.scala:192)
at ml.combust.bundle.serializer.ModelSerializer.write(ModelSerializer.scala:83)
at ml.combust.bundle.serializer.NodeSerializer$$anonfun$write$1.apply(NodeSerializer.scala:85)
at ml.combust.bundle.serializer.NodeSerializer$$anonfun$write$1.apply(NodeSerializer.scala:81)
at scala.util.Try$.apply(Try.scala:192)
at ml.combust.bundle.serializer.NodeSerializer.write(NodeSerializer.scala:81)
at ml.combust.bundle.serializer.BundleSerializer$$anonfun$write$1.apply(BundleSerializer.scala:34)
at ml.combust.bundle.serializer.BundleSerializer$$anonfun$write$1.apply(BundleSerializer.scala:29)
at scala.util.Try$.apply(Try.scala:192)
at ml.combust.bundle.serializer.BundleSerializer.write(BundleSerializer.scala:29)
at ml.combust.bundle.BundleWriter.save(BundleWriter.scala:31)
at ml.combust.mleap.spark.SimpleSparkSerializer$$anonfun$serializeToBundleWithFormat$2.apply(SimpleSparkSerializer.scala:26)
at ml.combust.mleap.spark.SimpleSparkSerializer$$anonfun$serializeToBundleWithFormat$2.apply(SimpleSparkSerializer.scala:25)
at resource.AbstractManagedResource$$anonfun$5.apply(AbstractManagedResource.scala:88)
at scala.util.control.Exception$Catch$$anonfun$either$1.apply(Exception.scala:125)
at scala.util.control.Exception$Catch$$anonfun$either$1.apply(Exception.scala:125)
at scala.util.control.Exception$Catch.apply(Exception.scala:103)
at scala.util.control.Exception$Catch.either(Exception.scala:125)
at resource.AbstractManagedResource.acquireFor(AbstractManagedResource.scala:88)
at resource.ManagedResourceOperations$class.apply(ManagedResourceOperations.scala:26)
at resource.AbstractManagedResource.apply(AbstractManagedResource.scala:50)
at resource.DeferredExtractableManagedResource$$anonfun$tried$1.apply(AbstractManagedResource.scala:33)
at scala.util.Try$.apply(Try.scala:192)
at resource.DeferredExtractableManagedResource.tried(AbstractManagedResource.scala:33)
at ml.combust.mleap.spark.SimpleSparkSerializer.serializeToBundleWithFormat(SimpleSparkSerializer.scala:27)
at ml.combust.mleap.spark.SimpleSparkSerializer.serializeToBundle(SimpleSparkSerializer.scala:17)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
... 74 more
I'm using the random_forest_classifier_example.py from the pyspark-examples, with the addition of MLeap:
I get the following exception in my local PySpark Pipeline when I try to serialize the model using MLeap
I'm using the
random_forest_classifier_example.py
from the pyspark-examples, with the addition of MLeap:OS: Windows 10
MLeap (installed using pip) in Version 0.15.0
PySpark: 2.4.5
Python: 3.7.2
I have added the missing jars to the
jars
directory of my PySpark Installation.The following jars are currently in my
/jars
directory:The text was updated successfully, but these errors were encountered: