|
| 1 | +package org.apache.spark.graph |
| 2 | + |
| 3 | + |
| 4 | +/** |
| 5 | + * This object implements a Pregel-like bulk-synchronous |
| 6 | + * message-passing API. However, unlike the original Pregel API the |
| 7 | + * GraphX pregel API factors the sendMessage computation over edges, |
| 8 | + * enables the message sending computation to read both vertex |
| 9 | + * attributes, and finally constrains messages to the graph structure. |
| 10 | + * These changes allow for substantially more efficient distributed |
| 11 | + * execution while also exposing greater flexibility for graph based |
| 12 | + * computation. |
| 13 | + * |
| 14 | + * @example We can use the Pregel abstraction to implement PageRank |
| 15 | + * {{{ |
| 16 | + * val pagerankGraph: Graph[Double, Double] = graph |
| 17 | + * // Associate the degree with each vertex |
| 18 | + * .outerJoinVertices(graph.outDegrees){ |
| 19 | + * (vid, vdata, deg) => deg.getOrElse(0) |
| 20 | + * } |
| 21 | + * // Set the weight on the edges based on the degree |
| 22 | + * .mapTriplets( e => 1.0 / e.srcAttr ) |
| 23 | + * // Set the vertex attributes to the initial pagerank values |
| 24 | + * .mapVertices( (id, attr) => 1.0 ) |
| 25 | + * |
| 26 | + * def vertexProgram(id: Vid, attr: Double, msgSum: Double): Double = |
| 27 | + * resetProb + (1.0 - resetProb) * msgSum |
| 28 | + * def sendMessage(id: Vid, edge: EdgeTriplet[Double, Double]): Option[Double] = |
| 29 | + * Some(edge.srcAttr * edge.attr) |
| 30 | + * def messageCombiner(a: Double, b: Double): Double = a + b |
| 31 | + * val initialMessage = 0.0 |
| 32 | + * // Execute pregel for a fixed number of iterations. |
| 33 | + * Pregel(pagerankGraph, initialMessage, numIter)( |
| 34 | + * vertexProgram, sendMessage, messageCombiner) |
| 35 | + * }}} |
| 36 | + * |
| 37 | + */ |
| 38 | +object Pregel { |
| 39 | + |
| 40 | + /** |
| 41 | + * Execute a Pregel-like iterative vertex-parallel abstraction. The |
| 42 | + * user-defined vertex-program `vprog` is executed in parallel on |
| 43 | + * each vertex receiving any inbound messages and computing a new |
| 44 | + * value for the vertex. The `sendMsg` function is then invoked on |
| 45 | + * all out-edges and is used to compute an optional message to the |
| 46 | + * destination vertex. The `mergeMsg` function is a commutative |
| 47 | + * associative function used to combine messages destined to the |
| 48 | + * same vertex. |
| 49 | + * |
| 50 | + * On the first iteration all vertices receive the `initialMsg` and |
| 51 | + * on subsequent iterations if a vertex does not receive a message |
| 52 | + * then the vertex-program is not invoked. |
| 53 | + * |
| 54 | + * This function iterates until there are no remaining messages, or |
| 55 | + * for maxIterations iterations. |
| 56 | + * |
| 57 | + * @tparam VD the vertex data type |
| 58 | + * @tparam ED the edge data type |
| 59 | + * @tparam A the Pregel message type |
| 60 | + * |
| 61 | + * @param graph the input graph. |
| 62 | + * |
| 63 | + * @param initialMsg the message each vertex will receive at the on |
| 64 | + * the first iteration. |
| 65 | + * |
| 66 | + * @param maxIterations the maximum number of iterations to run for. |
| 67 | + * |
| 68 | + * @param vprog the user-defined vertex program which runs on each |
| 69 | + * vertex and receives the inbound message and computes a new vertex |
| 70 | + * value. On the first iteration the vertex program is invoked on |
| 71 | + * all vertices and is passed the default message. On subsequent |
| 72 | + * iterations the vertex program is only invoked on those vertices |
| 73 | + * that receive messages. |
| 74 | + * |
| 75 | + * @param sendMsg a user supplied function that is applied to out |
| 76 | + * edges of vertices that received messages in the current |
| 77 | + * iteration. |
| 78 | + * |
| 79 | + * @param mergeMsg a user supplied function that takes two incoming |
| 80 | + * messages of type A and merges them into a single message of type |
| 81 | + * A. ''This function must be commutative and associative and |
| 82 | + * ideally the size of A should not increase.'' |
| 83 | + * |
| 84 | + * @return the resulting graph at the end of the computation |
| 85 | + * |
| 86 | + */ |
| 87 | + def apply[VD: ClassManifest, ED: ClassManifest, A: ClassManifest] |
| 88 | + (graph: Graph[VD, ED], initialMsg: A, maxIterations: Int = Int.MaxValue)( |
| 89 | + vprog: (Vid, VD, A) => VD, |
| 90 | + sendMsg: EdgeTriplet[VD, ED] => Iterator[(Vid,A)], |
| 91 | + mergeMsg: (A, A) => A) |
| 92 | + : Graph[VD, ED] = { |
| 93 | + |
| 94 | + var g = graph.mapVertices( (vid, vdata) => vprog(vid, vdata, initialMsg) ) |
| 95 | + // compute the messages |
| 96 | + var messages = g.mapReduceTriplets(sendMsg, mergeMsg).cache() |
| 97 | + var activeMessages = messages.count() |
| 98 | + // Loop |
| 99 | + var i = 0 |
| 100 | + while (activeMessages > 0 && i < maxIterations) { |
| 101 | + // Receive the messages. Vertices that didn't get any messages do not appear in newVerts. |
| 102 | + val newVerts = g.vertices.innerJoin(messages)(vprog).cache() |
| 103 | + // Update the graph with the new vertices. |
| 104 | + g = g.outerJoinVertices(newVerts) { (vid, old, newOpt) => newOpt.getOrElse(old) } |
| 105 | + |
| 106 | + val oldMessages = messages |
| 107 | + // Send new messages. Vertices that didn't get any messages don't appear in newVerts, so don't |
| 108 | + // get to send messages. |
| 109 | + messages = g.mapReduceTriplets(sendMsg, mergeMsg, Some((newVerts, EdgeDirection.Out))).cache() |
| 110 | + activeMessages = messages.count() |
| 111 | + // after counting we can unpersist the old messages |
| 112 | + oldMessages.unpersist(blocking=false) |
| 113 | + // count the iteration |
| 114 | + i += 1 |
| 115 | + } |
| 116 | + |
| 117 | + g |
| 118 | + } // end of apply |
| 119 | + |
| 120 | + |
| 121 | + // runs Pregel but treats graph as undirected (e.g. sends messages along both in and out edges) |
| 122 | + def undirectedRun[VD: ClassManifest, ED: ClassManifest, A: ClassManifest] |
| 123 | + (graph: Graph[VD, ED], initialMsg: A, maxIterations: Int = Int.MaxValue)( |
| 124 | + vprog: (Vid, VD, A) => VD, |
| 125 | + sendMsg: EdgeTriplet[VD, ED] => Iterator[(Vid,A)], |
| 126 | + mergeMsg: (A, A) => A) |
| 127 | + : Graph[VD, ED] = { |
| 128 | + |
| 129 | + var g = graph.mapVertices( (vid, vdata) => vprog(vid, vdata, initialMsg) ) |
| 130 | + // compute the messages |
| 131 | + var messages = g.mapReduceTriplets(sendMsg, mergeMsg).cache() |
| 132 | + var activeMessages = messages.count() |
| 133 | + // Loop |
| 134 | + var i = 0 |
| 135 | + while (activeMessages > 0 && i < maxIterations) { |
| 136 | + // Receive the messages. Vertices that didn't get any messages do not appear in newVerts. |
| 137 | + val newVerts = g.vertices.innerJoin(messages)(vprog).cache() |
| 138 | + // Update the graph with the new vertices. |
| 139 | + g = g.outerJoinVertices(newVerts) { (vid, old, newOpt) => newOpt.getOrElse(old) } |
| 140 | + |
| 141 | + val oldMessages = messages |
| 142 | + // Send new messages. Vertices that didn't get any messages don't appear in newVerts, so don't |
| 143 | + // get to send messages. |
| 144 | + messages = g.mapReduceTriplets(sendMsg, mergeMsg, Some((newVerts, EdgeDirection.Both))).cache() |
| 145 | + activeMessages = messages.count() |
| 146 | + // after counting we can unpersist the old messages |
| 147 | + oldMessages.unpersist(blocking=false) |
| 148 | + // count the iteration |
| 149 | + i += 1 |
| 150 | + } |
| 151 | + |
| 152 | + g |
| 153 | + } // end of apply |
| 154 | + |
| 155 | +} // end of class Pregel |
| 156 | + |
| 157 | + |
0 commit comments