Skip to content

Commit f4d0fe6

Browse files
committed
use LabeledDocument and Document in example
1 parent 05e3e40 commit f4d0fe6

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

examples/src/main/python/ml/simple_text_classification_pipeline.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,13 @@
3434
if __name__ == "__main__":
3535
sc = SparkContext(appName="SimpleTextClassificationPipeline")
3636
sqlCtx = SQLContext(sc)
37+
LabeledDocument = Row('id', 'text', 'label')
3738
training = sqlCtx.inferSchema(
3839
sc.parallelize([(0L, "a b c d e spark", 1.0),
3940
(1L, "b d", 0.0),
4041
(2L, "spark f g h", 1.0),
4142
(3L, "hadoop mapreduce", 0.0)])
42-
.map(lambda x: Row(id=x[0], text=x[1], label=x[2])))
43+
.map(lambda x: LabeledDocument(*x)))
4344

4445
tokenizer = Tokenizer() \
4546
.setInputCol("text") \
@@ -55,12 +56,13 @@
5556

5657
model = pipeline.fit(training)
5758

59+
Document = Row('id', 'text')
5860
test = sqlCtx.inferSchema(
5961
sc.parallelize([(4L, "spark i j k"),
6062
(5L, "l m n"),
6163
(6L, "mapreduce spark"),
6264
(7L, "apache hadoop")])
63-
.map(lambda x: Row(id=x[0], text=x[1])))
65+
.map(lambda x: Document(*x)))
6466

6567
prediction = model.transform(test)
6668

0 commit comments

Comments
 (0)