@@ -84,7 +84,8 @@ import org.apache.spark.graphx._
84
84
import org.apache.spark.rdd.RDD
85
85
{% endhighlight %}
86
86
87
- If you are not using the Spark shell you will also need a Spark context.
87
+ If you are not using the Spark shell you will also need a ` SparkContext ` . To learn more about
88
+ getting started with Spark refer to the [ Spark Quick Start Guide] ( quick-start.html ) .
88
89
89
90
# The Property Graph
90
91
<a name =" property_graph " ></a >
@@ -190,7 +191,7 @@ and `graph.edges` members respectively.
190
191
{% highlight scala %}
191
192
val graph: Graph[ (String, String), String] // Constructed from above
192
193
// Count all users which are postdocs
193
- graph.vertices.filter { case (id, (name, pos)) => pos == "postdoc"}.count
194
+ graph.vertices.filter { case (id, (name, pos)) => pos == "postdoc" }.count
194
195
// Count all the edges where src > dst
195
196
graph.edges.filter(e => e.srcId > e.dstId).count
196
197
{% endhighlight %}
@@ -258,8 +259,10 @@ val graph: Graph[(String, String), String]
258
259
val indDegrees: VertexRDD[ Int] = graph.inDegrees
259
260
{% endhighlight %}
260
261
261
- The reason for differentiating between core graph operations and GraphOps is to be able to support
262
- various graph representations in the future.
262
+ The reason for differentiating between core graph operations and [ ` GraphOps ` ] [ GraphOps ] is to be
263
+ able to support different graph representations in the future. Each graph representation must
264
+ provide implementations of the core operations and reuse many of the useful operations defined in
265
+ [ ` GraphOps ` ] [ GraphOps ] .
263
266
264
267
## Property Operators
265
268
@@ -334,14 +337,32 @@ interest or eliminate broken links. For example in the following code we remove
334
337
[ Graph.subgraph ] : api/graphx/index.html#org.apache.spark.graphx.Graph@subgraph((EdgeTriplet[VD,ED])⇒Boolean,(VertexID,VD)⇒Boolean):Graph[VD,ED]
335
338
336
339
{% highlight scala %}
337
- val users: RDD[ (VertexId, (String, String))]
338
- val edges: RDD[ Edge[ String]]
340
+ // Create an RDD for the vertices
341
+ val users: RDD[ (VertexID, (String, String))] =
342
+ sc.parallelize(Array((3L, ("rxin", "student")), (7L, ("jgonzal", "postdoc")),
343
+ (5L, ("franklin", "prof")), (2L, ("istoica", "prof")),
344
+ (4L, ("peter", "student"))))
345
+ // Create an RDD for edges
346
+ val relationships: RDD[ Edge[ String]] =
347
+ sc.parallelize(Array(Edge(3L, 7L, "collab"), Edge(5L, 3L, "advisor"),
348
+ Edge(2L, 5L, "colleague"), Edge(5L, 7L, "pi"),
349
+ Edge(4L, 0L, "student"), Edge(5L, 0L, "colleague")))
339
350
// Define a default user in case there are relationship with missing user
340
351
val defaultUser = ("John Doe", "Missing")
341
352
// Build the initial Graph
342
353
val graph = Graph(users, relationships, defaultUser)
354
+ // Notice that there is a user 0 (for which we have no information) connecting users
355
+ // 4 (peter) and 5 (franklin).
356
+ graph.triplets.map(
357
+ triplet => triplet.srcAttr._ 1 + " is the " + triplet.attr + " of " + triplet.dstAttr._ 1
358
+ ).collect.foreach(println(_ ))
343
359
// Remove missing vertices as well as the edges to connected to them
344
360
val validGraph = graph.subgraph(vpred = (id, attr) => attr._ 2 != "Missing")
361
+ // The valid subgraph will disconnect users 4 and 5 by removing user 0
362
+ validGraph.vertices.collect.foreach(println(_ ))
363
+ validGraph.triplets.map(
364
+ triplet => triplet.srcAttr._ 1 + " is the " + triplet.attr + " of " + triplet.dstAttr._ 1
365
+ ).collect.foreach(println(_ ))
345
366
{% endhighlight %}
346
367
347
368
> Note in the above example only the vertex predicate is provided. The ` subgraph ` operator defaults
0 commit comments