Skip to content

[SPARK-10237] [MLLIB] update since versions in mllib.fpm #8429

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,15 @@ class AssociationRules private[fpm] (
}.filter(_.confidence >= minConfidence)
}

/** Java-friendly version of [[run]]. */
@Since("1.5.0")
def run[Item](freqItemsets: JavaRDD[FreqItemset[Item]]): JavaRDD[Rule[Item]] = {
val tag = fakeClassTag[Item]
run(freqItemsets.rdd)(tag)
}
}

@Since("1.5.0")
object AssociationRules {

/**
Expand All @@ -104,8 +107,8 @@ object AssociationRules {
@Since("1.5.0")
@Experimental
class Rule[Item] private[fpm] (
val antecedent: Array[Item],
val consequent: Array[Item],
@Since("1.5.0") val antecedent: Array[Item],
@Since("1.5.0") val consequent: Array[Item],
freqUnion: Double,
freqAntecedent: Double) extends Serializable {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ import org.apache.spark.storage.StorageLevel
*/
@Since("1.3.0")
@Experimental
class FPGrowthModel[Item: ClassTag](val freqItemsets: RDD[FreqItemset[Item]]) extends Serializable {
class FPGrowthModel[Item: ClassTag] @Since("1.3.0") (
@Since("1.3.0") val freqItemsets: RDD[FreqItemset[Item]]) extends Serializable {
/**
* Generates association rules for the [[Item]]s in [[freqItemsets]].
* @param confidence minimal confidence of the rules produced
Expand Down Expand Up @@ -126,6 +127,8 @@ class FPGrowth private (
new FPGrowthModel(freqItemsets)
}

/** Java-friendly version of [[run]]. */
@Since("1.3.0")
def run[Item, Basket <: JavaIterable[Item]](data: JavaRDD[Basket]): FPGrowthModel[Item] = {
implicit val tag = fakeClassTag[Item]
run(data.rdd.map(_.asScala.toArray))
Expand Down Expand Up @@ -226,7 +229,9 @@ object FPGrowth {
*
*/
@Since("1.3.0")
class FreqItemset[Item](val items: Array[Item], val freq: Long) extends Serializable {
class FreqItemset[Item] @Since("1.3.0") (
@Since("1.3.0") val items: Array[Item],
@Since("1.3.0") val freq: Long) extends Serializable {

/**
* Returns items in a Java List.
Expand Down
23 changes: 20 additions & 3 deletions mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import scala.collection.JavaConverters._
import scala.reflect.ClassTag

import org.apache.spark.Logging
import org.apache.spark.annotation.Experimental
import org.apache.spark.annotation.{Experimental, Since}
import org.apache.spark.api.java.JavaRDD
import org.apache.spark.api.java.JavaSparkContext.fakeClassTag
import org.apache.spark.rdd.RDD
Expand All @@ -51,6 +51,7 @@ import org.apache.spark.storage.StorageLevel
* (Wikipedia)]]
*/
@Experimental
@Since("1.5.0")
class PrefixSpan private (
private var minSupport: Double,
private var maxPatternLength: Int,
Expand All @@ -61,17 +62,20 @@ class PrefixSpan private (
* Constructs a default instance with default parameters
* {minSupport: `0.1`, maxPatternLength: `10`, maxLocalProjDBSize: `32000000L`}.
*/
@Since("1.5.0")
def this() = this(0.1, 10, 32000000L)

/**
* Get the minimal support (i.e. the frequency of occurrence before a pattern is considered
* frequent).
*/
@Since("1.5.0")
def getMinSupport: Double = minSupport

/**
* Sets the minimal support level (default: `0.1`).
*/
@Since("1.5.0")
def setMinSupport(minSupport: Double): this.type = {
require(minSupport >= 0 && minSupport <= 1,
s"The minimum support value must be in [0, 1], but got $minSupport.")
Expand All @@ -82,11 +86,13 @@ class PrefixSpan private (
/**
* Gets the maximal pattern length (i.e. the length of the longest sequential pattern to consider.
*/
@Since("1.5.0")
def getMaxPatternLength: Int = maxPatternLength

/**
* Sets maximal pattern length (default: `10`).
*/
@Since("1.5.0")
def setMaxPatternLength(maxPatternLength: Int): this.type = {
// TODO: support unbounded pattern length when maxPatternLength = 0
require(maxPatternLength >= 1,
Expand All @@ -98,12 +104,14 @@ class PrefixSpan private (
/**
* Gets the maximum number of items allowed in a projected database before local processing.
*/
@Since("1.5.0")
def getMaxLocalProjDBSize: Long = maxLocalProjDBSize

/**
* Sets the maximum number of items (including delimiters used in the internal storage format)
* allowed in a projected database before local processing (default: `32000000L`).
*/
@Since("1.5.0")
def setMaxLocalProjDBSize(maxLocalProjDBSize: Long): this.type = {
require(maxLocalProjDBSize >= 0L,
s"The maximum local projected database size must be nonnegative, but got $maxLocalProjDBSize")
Expand All @@ -116,6 +124,7 @@ class PrefixSpan private (
* @param data sequences of itemsets.
* @return a [[PrefixSpanModel]] that contains the frequent patterns
*/
@Since("1.5.0")
def run[Item: ClassTag](data: RDD[Array[Array[Item]]]): PrefixSpanModel[Item] = {
if (data.getStorageLevel == StorageLevel.NONE) {
logWarning("Input data is not cached.")
Expand Down Expand Up @@ -202,6 +211,7 @@ class PrefixSpan private (
* @tparam Sequence sequence type, which is an Iterable of Itemsets
* @return a [[PrefixSpanModel]] that contains the frequent sequential patterns
*/
@Since("1.5.0")
def run[Item, Itemset <: jl.Iterable[Item], Sequence <: jl.Iterable[Itemset]](
data: JavaRDD[Sequence]): PrefixSpanModel[Item] = {
implicit val tag = fakeClassTag[Item]
Expand All @@ -211,6 +221,7 @@ class PrefixSpan private (
}

@Experimental
@Since("1.5.0")
object PrefixSpan extends Logging {

/**
Expand Down Expand Up @@ -535,10 +546,14 @@ object PrefixSpan extends Logging {
* @param freq frequency
* @tparam Item item type
*/
class FreqSequence[Item](val sequence: Array[Array[Item]], val freq: Long) extends Serializable {
@Since("1.5.0")
class FreqSequence[Item] @Since("1.5.0") (
@Since("1.5.0") val sequence: Array[Array[Item]],
@Since("1.5.0") val freq: Long) extends Serializable {
/**
* Returns sequence as a Java List of lists for Java users.
*/
@Since("1.5.0")
def javaSequence: ju.List[ju.List[Item]] = sequence.map(_.toList.asJava).toList.asJava
}
}
Expand All @@ -548,5 +563,7 @@ object PrefixSpan extends Logging {
* @param freqSequences frequent sequences
* @tparam Item item type
*/
class PrefixSpanModel[Item](val freqSequences: RDD[PrefixSpan.FreqSequence[Item]])
@Since("1.5.0")
class PrefixSpanModel[Item] @Since("1.5.0") (
@Since("1.5.0") val freqSequences: RDD[PrefixSpan.FreqSequence[Item]])
extends Serializable