Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support to add edge without metadata in GraphJet #90

Merged
merged 32 commits into from
Aug 3, 2017
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fix optimized edge pool
  • Loading branch information
jerryjiang committed Jul 25, 2017
commit 6cc1988ec82a89f44ec2d3e09190c8e007c8da86
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
package com.twitter.graphjet.bipartite.edgepool;

import java.util.Random;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.twitter.graphjet.bipartite.api.ReusableNodeIntIterator;
import com.twitter.graphjet.bipartite.api.ReusableNodeRandomIntIterator;
import com.twitter.graphjet.hashing.IntToIntPairArrayIndexBasedMap;
import com.twitter.graphjet.hashing.IntToIntPairHashMap;
import com.twitter.graphjet.stats.StatsReceiver;

import it.unimi.dsi.fastutil.ints.IntIterator;

/**
* This edge pool stores edges compactly in a two-dimension array. The edge pool assumes that
* the degree of each node is fixed when the edge pool is constructed, and it is able to allocate
* the exact memory used in the two-dimension array. This edge pool does not handle synchronization
* between writer and reader threads, and it accepts reader access only after it is completely
* populated by a writer thread. It assumes that no new edges will be added to the pool after it
* accepts reader access.
*
* Assuming n nodes and m edges, the amount of memory used by this pool is:
* - 4*m bytes for edges (which is expected to dominate)
* - O(4*3*n) bytes for nodes
*/
public abstract class AbstractOptimizedEdgePool implements EdgePool {

// This is is the only reader-accessible data
protected EdgePoolReaderAccessibleInfo readerAccessibleInfo;

protected IntToIntPairHashMap intToIntPairHashMap;

protected int currentNumEdgesStored;
protected int maxNumEdges;
protected int maxDegree;
protected int numOfNodes;

protected static final Logger LOG = LoggerFactory.getLogger("graph");
protected StatsReceiver scopedStatsReceiver;

protected static final int[] POW_TABLE_30;
static {
POW_TABLE_30 = new int[30];
POW_TABLE_30[0] = 0;
for (int i = 1; i < 30; i++) {
POW_TABLE_30[i] = (int) Math.pow(2.0, i) + POW_TABLE_30[i - 1];
}
}

/**
* OptimizedEdgePool
*
* @param nodeDegrees node degree map
* @param maxNumEdges the max number of edges will be added in the pool
* @param statsReceiver stats receiver
*/
public AbstractOptimizedEdgePool(
int[] nodeDegrees,
int maxNumEdges,
StatsReceiver statsReceiver
) {
numOfNodes = nodeDegrees.length;
currentNumEdgesStored = 0;
scopedStatsReceiver = statsReceiver.scope(this.getClass().getSimpleName());

this.maxNumEdges = maxNumEdges;

intToIntPairHashMap = new IntToIntPairArrayIndexBasedMap(numOfNodes, -1, scopedStatsReceiver);

int position = 0;
maxDegree = 0;

for (int i = 0; i < numOfNodes; i++) {
int nodeDegree = nodeDegrees[i];
if (nodeDegree == 0) {
continue;
}

maxDegree = Math.max(maxDegree, nodeDegree);

intToIntPairHashMap.put(i, position, nodeDegree);
position += nodeDegree;
}
}

/**
* Get a specified edge for the node: note that it is the caller's responsibility to check that
* the edge number is within the degree bounds.
*
* @param position is the position index for the node
* @param edgeNumber is the required edge number
* @return the requested edge node number
*/
protected int getNodeEdge(int position, int edgeNumber) {
return readerAccessibleInfo.getEdges().getEntry(position + edgeNumber);
}

/**
* Get the metadata of a specified edge for the node: note that it is the caller's responsibility
* to check that the edge number is within the degree bounds.
*
* @param position is the position index for the node
* @param edgeNumber is the required edge number
* @return the requested edge metdata
*/
protected long getEdgeMetadata(int position, int edgeNumber) {
return readerAccessibleInfo.getMetadata().getEntry(position + edgeNumber);
}

protected int getNodePosition(int node) {
return readerAccessibleInfo.getNodeInfo().getFirstValue(node);
}

@Override
public int getNodeDegree(int node) {
return readerAccessibleInfo.getNodeInfo().getSecondValue(node);
}

@Override
public IntIterator getNodeEdges(int node) {
return getNodeEdges(node, new OptimizedEdgeIterator(this));
}

/**
* Reuses the given iterator to point to the current nodes edges.
*
* @param node is the node whose edges are being returned
* @param optimizedEdgeRandomIterator is the iterator to reuse
* @return the iterator itself, reset over the nodes edges
*/
@Override
public IntIterator getNodeEdges(int node, ReusableNodeIntIterator optimizedEdgeRandomIterator) {
return optimizedEdgeRandomIterator.resetForNode(node);
}

@Override
public IntIterator getRandomNodeEdges(int node, int numSamples, Random random) {
return getRandomNodeEdges(node, numSamples, random, new OptimizedEdgeRandomIterator(this));
}

@Override
public IntIterator getRandomNodeEdges(
int node,
int numSamples,
Random random,
ReusableNodeRandomIntIterator optimizedEdgeRandomIterator) {
return optimizedEdgeRandomIterator.resetForNode(node, numSamples, random);
}

@Override
public void addEdge(int nodeA, int nodeB) {
throw new UnsupportedOperationException("add a single edge one by one is not supported in "
+ "OptimizedEdgePool");
}

@Override
public void addEdge(int nodeA, int nodeB, long metadata) {
throw new UnsupportedOperationException("add a single edge one by one is not supported in "
+ "OptimizedEdgePool");
}

/**
* Batch add edges in optimized segment.
*
* @param node the node id which the edges are associated to
* @param pool the pool id which the edges are associated to
* @param src the source int edge array
* @param metadata the source long edge metadata array
* @param srcPos the starting position in the source array
* @param length the number of edges to be copied
*/
public abstract void addEdges(
int node,
int pool,
int[] src,
long[] metadata,
int srcPos,
int length
);

@Override
public boolean isOptimized() {
return true;
}

@Override
public void removeEdge(int nodeA, int nodeB) {
throw new UnsupportedOperationException("The remove operation is currently not supported");
}

@Override
public double getFillPercentage() {
return 100.0 * (double) currentNumEdgesStored / maxNumEdges;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,8 @@ protected void expandArray(int nodeA) {
numNodesCounter.incr();
}

public abstract boolean hasEdgeMetadata();

@Override
public boolean isOptimized() {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@
import com.twitter.graphjet.bipartite.api.ReusableNodeIntIterator;

/**
* Returns an iterator over the edges stored in an {@link OptimizedEdgePool}. The iterator is
* meant to be reusable via the resetForIndex method.
* Returns an iterator over the edges stored in an {@link AbstractOptimizedEdgePool}. The iterator
* is meant to be reusable via the resetForIndex method.
*/
public class OptimizedEdgeIterator extends ReadOnlyIntIterator
implements WithEdgeMetadataIntIterator, ReusableNodeIntIterator {
protected final OptimizedEdgePool optimizedDegreeEdgePool;
protected final AbstractOptimizedEdgePool optimizedDegreeEdgePool;
protected int position;
protected int degree;
protected int currentEdge;
Expand All @@ -36,9 +36,9 @@ public class OptimizedEdgeIterator extends ReadOnlyIntIterator
* Creates an iterator that can be reused. Note that the client needs to call the resetForNode
* method before using the iterator.
*
* @param optimizedDegreeEdgePool is the underlying {@link OptimizedEdgePool}
* @param optimizedDegreeEdgePool is the underlying {@link AbstractOptimizedEdgePool}
*/
public OptimizedEdgeIterator(OptimizedEdgePool optimizedDegreeEdgePool) {
public OptimizedEdgeIterator(AbstractOptimizedEdgePool optimizedDegreeEdgePool) {
this.optimizedDegreeEdgePool = optimizedDegreeEdgePool;
}

Expand Down
Loading