diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..d2d6f36
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,35 @@
+*.py[cod]
+
+# C extensions
+*.so
+
+# Packages
+*.egg
+*.egg-info
+dist
+build
+eggs
+parts
+bin
+var
+sdist
+develop-eggs
+.installed.cfg
+lib
+lib64
+
+# Installer logs
+pip-log.txt
+
+# Unit test / coverage reports
+.coverage
+.tox
+nosetests.xml
+
+# Translations
+*.mo
+
+# Mr Developer
+.mr.developer.cfg
+.project
+.pydevproject
diff --git a/linkpred/__init__.py b/linkpred/__init__.py
new file mode 100644
index 0000000..308afc8
--- /dev/null
+++ b/linkpred/__init__.py
@@ -0,0 +1 @@
+"""linkpred, a Python module for link prediction"""
diff --git a/linkpred/cli.py b/linkpred/cli.py
new file mode 100644
index 0000000..9821a91
--- /dev/null
+++ b/linkpred/cli.py
@@ -0,0 +1,140 @@
+import json
+from optparse import OptionParser
+
+from .predictors import all_predictors
+from .util import log
+
+__all__ = ["load_profile", "get_profile", "get_profile_by_options",
+           "options_n_args"]
+
+
+def data_from_profile(fname):
+    data = {}
+    try:
+        with open(fname) as f:
+            if fname.endswith(".yaml"):
+                import yaml
+                data = yaml.safe_load(f)
+            else:
+                data = json.load(f)
+    except (AttributeError, TypeError) as e:
+        log.logger.warning("Encountered error '%s'" % e)
+    finally:
+        return data
+
+
+def fancy_update(base, new):
+    updated = dict(base.iteritems())
+    for k, v in new.iteritems():
+        if k not in base:
+            updated[k] = v
+        elif type(base[k]) == type(v) == dict:
+            updated[k] = fancy_update(base[k], v)
+        elif type(base[k]) == type(v) == list:
+            updated[k].extend(v)
+        else:
+            updated[k] = v
+    return updated
+
+
+def load_profile(*fnames):
+    """
+    Load profile from one or more files
+
+    Arguments
+    ---------
+    fnames : one or more strings (file names)
+
+    """
+    profile = {}
+    for fname in fnames:
+        data = data_from_profile(fname)
+        profile = fancy_update(profile, data)
+    return profile
+
+
+def get_profile(**kwargs):
+    options, args = options_n_args(**kwargs)
+    if args:
+        log.logger.warning("Ignoring arguments: %s" % str(args))
+    return get_profile_by_options(options)
+
+
+def get_profile_by_options(options):
+    """Determine a profile based on available options
+
+    If multiple profiles are passed through the CLI interface,
+    they are merged into one. In case of conflicts, the last profile
+    supersedes the previous ones.
+    Other CLI options supersede the profiles.
+
+    Arguments
+    ---------
+
+    options : an optparse.Options object
+
+    Returns
+    -------
+
+    profile : a dict
+
+    """
+    profile = load_profile(*options.profile)
+
+    option_names = ["charts", "filetype", "interpolation", "steps", "only_new"]
+    for option_name in option_names:
+        try:
+            option = getattr(options, option_name)
+        except AttributeError:
+            continue
+        profile[option_name] = option
+
+    if hasattr(options, 'predictors') and options.predictors:
+        profile['predictors'] = []
+        for p in options.predictors:
+            profile['predictors'].append({'name': p})
+
+    return profile
+
+
+def options_n_args(choose_chart=True, choose_profile=True,
+                   choose_predictor=True, choose_filetype=False,
+                   choose_weight=False, choose_interpolation=False):
+    """Get nice CLI interface and return options 'n arguments."""
+
+    parser = OptionParser()
+    parser.add_option("--debug", action="store_true", dest="debug",
+                      default=False, help="Log debug messages")
+    if choose_chart:
+        chart_help = "Type of chart(s) to produce (default: all available)."
+        chart_types = ["recall-precision", "F-score", "ROC"]
+        parser.add_option("-c", "--chart", help=chart_help, action="append",
+                          choices=chart_types, dest="charts", default=chart_types)
+    if choose_filetype:
+        parser.add_option("-f", "--filetype",
+                          help="Output file type (default: %default)", default="pdf")
+    if choose_interpolation:
+        parser.add_option("-i", "--no-interpolation",
+                          help="Do not interpolate precision", action="store_false",
+                          dest="interpolation", default=True)
+    if choose_predictor:
+        predictors = [p.__name__ for p in all_predictors()]
+        parser.add_option(
+            "-p", "--predictors", action="append", dest="predictors",
+            help="Predicting methods to use (default: all available)",
+            choices=predictors, default=[])
+        parser.add_option(
+            "-n", "--only-new", action="store_true", dest="only_new",
+            default=False,
+            help="Only consider new (unattested) predictions")
+    if choose_profile:
+        parser.add_option("-P", "--profile", action="append",
+                          help="JSON profile file", default=[])
+
+    options, args = parser.parse_args()
+    if options.debug:
+        log.logger.setLevel(log.logging.DEBUG)
+    else:
+        log.logger.setLevel(log.logging.INFO)
+
+    return options, args
diff --git a/linkpred/core.py b/linkpred/core.py
new file mode 100644
index 0000000..5c1c99a
--- /dev/null
+++ b/linkpred/core.py
@@ -0,0 +1,150 @@
+from . import predictors
+from .evaluation import Comparison
+from .result import ResultDict  # XXX
+from .util import log
+
+
+def training_test_data(profile, minimum_degree=1, **kwargs):
+    endpoint = profile["sparql_endpoint"]
+    query = profile.get("query")
+    training = profile["training"]
+    test = profile["test"]
+
+    results = ResultDict()
+    for dataprofile in (training, test):
+        name = dataprofile["name"]
+        parameters = dataprofile["parameters"]
+        if query is None:
+            query = dataprofile["query"]
+
+        log.logger.info("Collecting data (%s)..." % name)
+        # XXX TODO XXX
+        results[name] = "TODO"
+        log.logger.info("Finished collecting data.")
+
+    if minimum_degree:
+        results.filter_all_low_degree_nodes(minimum_degree)
+    return results[training['name']], results[test['name']]
+
+
+def pretty_print(name, bipartite=False, tfidf=False, params={}):
+    """Pretty print a predictor name"""
+    retval = name
+    if bipartite:
+        retval += " bipartite"
+    if tfidf:
+        retval += " TF-IDF"
+    if not params:
+        return retval
+
+    pretty_params = ", ".join("%s = %s" % (k, str(v))
+                              for k, v in params.iteritems())
+    return "%s (%s)" % (retval, pretty_params)
+
+
+def to_tfidf(G):
+    """TF-IDF transform the edges of G
+
+    This is done by transforming its adjacency matrix and then converting back
+    to a network.
+    """
+    import networkx as nx
+    from linkpred.matrix import tfidf_matrix
+    from linkpred.network import from_biadjacency_matrix
+
+    assert nx.is_bipartite(G)
+    row_items = [n for n in G.nodes_iter() if G.node[n]['eligible']]
+    col_items = [n for n in G.nodes_iter() if not G.node[n]['eligible']]
+    matrix = nx.bipartite.biadjacency_matrix(G, row_items, col_items)
+    matrix = tfidf_matrix(matrix)
+    G2 = from_biadjacency_matrix(matrix, row_items, col_items)
+    G2.node = G.node
+    return G2
+
+
+def do_predict(G, predictortype, label, eligible=None, only_new=False, **kwargs):
+    log.logger.info("Executing %s..." % label)
+    predictor = predictortype(G, eligible=eligible, only_new=only_new)
+    scoresheet = predictor(**kwargs)
+    log.logger.info("Finished executing %s." % label)
+    return scoresheet
+
+
+def predict(training, profile, only_new=False, eligible=None):
+    """Generator that yields predictions on the basis of training
+
+    Arguments
+    ---------
+    training : a Result
+        Training data
+
+    profile : a dict
+        Profile detailing which predictors should be used
+
+    only_new : True|False
+        Whether or not we should restrict ourselves to predicting only new links
+
+    eligible : a string or None
+        If a string, the attribute according to which 'eligible' nodes are found.
+        If None, this is ignored.
+
+    Returns
+    -------
+    (label, scoresheet) : a 2-tuple
+        2-tuple consisting of a string (label of the prediction) and
+        a Scoresheet (actual predictions)
+
+    """
+    for predictor_profile in profile['predictors']:
+        bipartite = predictor_profile.get('bipartite', False)
+        tfidf = predictor_profile.get('tfidf', False)
+        parameters = predictor_profile.get('parameters', {})
+        name = predictor_profile['name']
+        predictortype = getattr(predictors, name)
+        label = predictor_profile.get('displayname',
+                                      pretty_print(name, bipartite, tfidf, parameters))
+
+        if bipartite and tfidf:
+            # Create a reusable TF-IDF network, so we don't have to do this
+            # transformation for each predictor.
+            if not hasattr(predict, 'tfidf_network'):
+                predict.tfidf_network = to_tfidf(training.pathspec)
+            G = predict.tfidf_network
+        elif bipartite:
+            G = training.pathspec
+        else:
+            G = training.network
+
+        scoresheet = do_predict(
+            G, predictortype, label, eligible, only_new, **parameters)
+
+        yield label, scoresheet
+
+
+def connect_signals(listeners):
+    from linkpred.evaluation import signals
+    for listener in listeners:
+        signals.new_evaluation.connect(listener.on_new_evaluation)
+        signals.datagroup_finished.connect(listener.on_datagroup_finished)
+        signals.dataset_finished.connect(listener.on_dataset_finished)
+        signals.run_finished.connect(listener.on_run_finished)
+
+
+def evaluate(datasets, name, filetype="pdf", interpolate=True, steps=1):
+    import linkpred.evaluation.listeners as l
+    # TODO figure out easy way to specify which listeners we want
+    cache = l.CachingListener()
+    rp = l.RecallPrecisionPlotter(name, filetype=filetype,
+                                  interpolate=interpolate)
+    f = l.FScorePlotter(name, filetype=filetype, xlabel="# predictions",
+                        steps=steps)
+    roc = l.ROCPlotter(name, filetype=filetype)
+    fmax = l.FMaxListener(name)
+    connect_signals((cache, rp, f, roc, fmax))
+
+    comp = Comparison()
+    try:
+        comp.register_datasets(datasets)
+    except TypeError:  # Oops, not iterable!
+        comp.register_dataset(datasets)
+    comp.run()
diff --git a/linkpred/evaluation/__init__.py b/linkpred/evaluation/__init__.py
new file mode 100644
index 0000000..6870b0d
--- /dev/null
+++ b/linkpred/evaluation/__init__.py
@@ -0,0 +1,5 @@
+"""Module for evaluating link prediction results"""
+from .comparison import Comparison, DataSet
+from .listeners import *
+from .scoresheet import *
+from .signals import *
diff --git a/linkpred/evaluation/comparison.py b/linkpred/evaluation/comparison.py
new file mode 100644
index 0000000..ed06338
--- /dev/null
+++ b/linkpred/evaluation/comparison.py
@@ -0,0 +1,49 @@
+from .ranked import ranked_evaluation
+from .signals import new_evaluation, datagroup_finished,\
+    dataset_finished, run_finished
+from ..util import log
+
+__all__ = ["DataSet", "Comparison"]
+
+
+class DataSet(object):
+    def __init__(self, name, predictions, test, exclude=set(), steps=1):
+        self.name = name
+        self.predictions = predictions
+        self.test = test.for_comparison(exclude=exclude)
+        self.steps = steps
+        nnodes = len(test)
+        # Universe = all possible edges, except for the ones that we no longer
+        # consider (because they're already in the training network)
+        self.num_universe = nnodes * (nnodes - 1) / 2 - len(exclude)
+        log.logger.debug("Constructed dataset '%s': "
+                         "num_universe = %d" % (self.name, self.num_universe))
+
+
+class Comparison(object):
+
+    def __init__(self):
+        self.datasets = []
+
+    def __iter__(self):
+        return iter(self.datasets)
+
+    def register_dataset(self, dataset):
+        self.datasets.append(dataset)
+
+    def register_datasets(self, datasets):
+        for d in datasets:
+            self.register_dataset(d)
+
+    def run(self):
+        for d in self.datasets:
+            for predictorname, scoresheet in d.predictions:
+                for evaluation in ranked_evaluation(scoresheet, d.test,
+                                                    n=d.steps,
+                                                    universe=d.num_universe):
+                    new_evaluation.send(sender=self, evaluation=evaluation,
+                                        dataset=d.name, predictor=predictorname)
+                datagroup_finished.send(sender=self, dataset=d.name,
+                                        predictor=predictorname)
+            dataset_finished.send(sender=self, dataset=d.name)
+        run_finished.send(sender=self)
diff --git a/linkpred/evaluation/listeners.py b/linkpred/evaluation/listeners.py
new file mode 100644
index 0000000..14bd9df
--- /dev/null
+++ b/linkpred/evaluation/listeners.py
@@ -0,0 +1,225 @@
+import copy
+import matplotlib.pyplot as plt
+
+from time import localtime, strftime
+
+from ..util import interpolate
+
+__all__ = ["Listener", "Plotter", "CachingListener", "FMaxListener",
+           "RecallPrecisionPlotter", "FScorePlotter", "ROCPlotter",
+           "PrecisionAtKListener", "MarkednessPlotter"]
+
+
+class Listener(object):
+
+    def on_new_evaluation(self, sender, **kwargs):
+        pass
+
+    def on_datagroup_finished(self, sender, **kwargs):
+        pass
+
+    def on_dataset_finished(self, sender, **kwargs):
+        pass
+
+    def on_run_finished(self, sender, **kwargs):
+        pass
+
+
+class CachingListener(Listener):
+
+    def __init__(self):
+        self.cachefile = None
+
+    def writeline(self, *args):
+        line = "\t".join(map(str, args))
+        self.cachefile.write("%s\n" % line)
+
+    def on_new_evaluation(self, sender, **kwargs):
+        evaluation, dataset, predictor = kwargs['evaluation'], \
+            kwargs['dataset'], kwargs['predictor']
+        tp, fp, fn, tn = evaluation.num_tp, evaluation.num_fp, \
+            evaluation.num_fn, evaluation.num_tn
+
+        if not self.cachefile:
+            fname = "%s-%s-cache.txt" % (dataset, predictor)
+            self.cachefile = open(fname, 'w')
+            # Header row
+            self.writeline('tp', 'fp', 'fn', 'tn')
+        self.writeline(tp, fp, fn, tn)
+
+    def on_datagroup_finished(self, sender, **kwargs):
+        if not self.cachefile:
+            return
+        self.cachefile.close()
+        self.cachefile = None
+
+
+class FMaxListener(Listener):
+    def __init__(self, name, beta=1):
+        self.beta = beta
+        self.reset_data()
+        self.fname = "%s-Fmax" % name + \
+            strftime("_%Y-%m-%d_%H.%M.txt", localtime())
+
+    def reset_data(self):
+        self._f = []
+
+    def on_new_evaluation(self, sender, **kwargs):
+        evaluation = kwargs['evaluation']
+        self._f.append(evaluation.f_score(self.beta))
+
+    def on_datagroup_finished(self, sender, **kwargs):
+        fmax = max(self._f) if self._f else 0
+        self.reset_data()
+
+        status = "%s\t%s\t%.4f\n" % (
+            kwargs['dataset'], kwargs['predictor'], fmax)
+
+        with open(self.fname, 'a') as f:
+            f.write(status)
+        print status
+
+
+class PrecisionAtKListener(Listener):
+    def __init__(self, name, k=10, steps=1):
+        self.k = k
+        self.steps = steps
+        self.reset_data()
+
+        self.fname = "%s-precision-at-%d" % (name, self.k) + \
+            strftime("_%Y-%m-%d_%H.%M.txt", localtime())
+
+    def reset_data(self):
+        self.precision = 0.0
+        self.count = 0
+
+    def on_new_evaluation(self, sender, **kwargs):
+        self.count += 1
+        if self.count / self.steps == self.k:
+            self.precision = kwargs['evaluation'].precision()
+
+    def on_datagroup_finished(self, sender, **kwargs):
+        status = "%s\t%s\t%.4f\n" % (kwargs['dataset'],
+                                     kwargs['predictor'],
+                                     self.precision)
+
+        with open(self.fname, 'a') as f:
+            f.write(status)
+        print status
+
+        self.reset_data()
+
+
+generic_chart_looks = ['k-', 'k--', 'k.-', 'k:',
+                       'r-', 'r--', 'r.-', 'r:',
+                       'b-', 'b--', 'b.-', 'b:',
+                       'g-', 'g--', 'g.-', 'g:',
+                       'c-', 'c--', 'c.-', 'c:',
+                       'y-', 'y--', 'y.-', 'y:']
+
+
+class Plotter(Listener):
+    def __init__(self, name, xlabel="", ylabel="", filetype="pdf", chart_looks=[]):
+        self.name = name
+        self.filetype = filetype
+        self.chart_looks = chart_looks
+        self._charttype = ""
+        self._legend_props = {'prop': {'size': 'x-small'}}
+        self.fig = plt.figure()
+        self.fig.add_axes([0.1, 0.1, 0.8, 0.8], xlabel=xlabel, ylabel=ylabel)
+        self.reset_data()
+
+    def reset_data(self):
+        self._x = []
+        self._y = []
+
+    def add_line(self, dataset="", predictor="", default_look=generic_chart_looks):
+        label = self.build_label(dataset, predictor)
+        ax = self.fig.axes[0]
+        ax.plot(self._x, self._y, self.chart_look(default_look), label=label)
+
+    def build_label(self, dataset="", predictor=""):
+        return predictor
+
+    def chart_look(self, default):
+        if not self.chart_looks:
+            self.chart_looks = copy.copy(default)
+        return self.chart_looks.pop(0)
+
+    def on_datagroup_finished(self, sender, **kwargs):
+        self.add_line(kwargs['dataset'], kwargs['predictor'])
+        self.reset_data()
+
+    def on_run_finished(self, sender, **kwargs):
+        # Fix looks
+        for ax in self.fig.axes:
+            ax.legend(**self._legend_props)
+
+        # Save to file
+        fname = "%s-%s" % (self.name, self._charttype) + \
+            strftime("_%Y-%m-%d_%H.%M.", localtime()) + self.filetype
+        self.fig.savefig(fname)
+
+
+class RecallPrecisionPlotter(Plotter):
+    def __init__(self, name, xlabel="Recall", ylabel="Precision",
+                 interpolate=True, **kwargs):
+        Plotter.__init__(self, name, xlabel, ylabel, **kwargs)
+        self._charttype = "recall-precision"
+        self.interpolate = interpolate
+
+    def reset_data(self):
+        # Make sure that we always start in the top-left corner
+        self._x = [0.]
+        self._y = [1.]
+
+    def add_line(self, dataset="", predictor=""):
+        if self.interpolate:
+            self._y = interpolate(self._y)
+        Plotter.add_line(self, dataset, predictor)
+
+    def on_new_evaluation(self, sender, **kwargs):
+        evaluation = kwargs['evaluation']
+
+        self._x.append(evaluation.recall())
+        self._y.append(evaluation.precision())
+
+
+class FScorePlotter(Plotter):
+    def __init__(self, name, xlabel="#", ylabel="F-score", beta=1, steps=1, **kwargs):
+        Plotter.__init__(self, name, xlabel, ylabel, **kwargs)
+        self._charttype = "F-Score"
+        self.beta = beta
+        self.steps = steps
+
+    def on_new_evaluation(self, sender, **kwargs):
+        evaluation = kwargs['evaluation']
+
+        self._x.append(self.steps * len(self._x))
+        self._y.append(evaluation.f_score(self.beta))
+
+
+class ROCPlotter(Plotter):
+    def __init__(self, name, xlabel="False pos. rate",
+                 ylabel="True pos. rate", **kwargs):
+        Plotter.__init__(self, name, xlabel, ylabel, **kwargs)
+        self._charttype = "ROC"
+
+    def on_new_evaluation(self, sender, **kwargs):
+        evaluation = kwargs['evaluation']
+
+        self._x.append(evaluation.fallout())
+        self._y.append(evaluation.recall())
+
+
+class MarkednessPlotter(Plotter):
+    def __init__(self, name, xlabel="Miss", ylabel="Precision", **kwargs):
+        Plotter.__init__(self, name, xlabel, ylabel, **kwargs)
+        self._charttype = "Markedness"
+        self._legend_props["loc"] = "upper left"
+
+    def on_new_evaluation(self, sender, **kwargs):
+        evaluation = kwargs['evaluation']
+
+        self._x.append(evaluation.miss())
+        self._y.append(evaluation.precision())
diff --git a/linkpred/evaluation/ranked.py b/linkpred/evaluation/ranked.py
new file mode 100644
index 0000000..a7644b4
--- /dev/null
+++ b/linkpred/evaluation/ranked.py
@@ -0,0 +1,24 @@
+from .static import StaticEvaluation
+
+
+def ranked_evaluation(retrieved, relevant, n=None, **kwargs):
+    """Generator for ranked evaluation of IR
+
+    Arguments
+    ---------
+    retrieved : a Scoresheet
+        score sheet of ranked retrieved results
+
+    relevant : a set
+        set of relevant results
+
+    n : an integer
+        At each step, the next n items on the retrieved score sheet are
+        added to the set of retrieved items that are compared to the relevant
+        ones.
+
+    """
+    evaluation = StaticEvaluation(relevant=relevant, **kwargs)
+    for ret in retrieved.successive_sets(n=n):
+        evaluation.update_retrieved(ret)
+        yield evaluation
diff --git a/linkpred/evaluation/scoresheet.py b/linkpred/evaluation/scoresheet.py
new file mode 100644
index 0000000..a6b3726
--- /dev/null
+++ b/linkpred/evaluation/scoresheet.py
@@ -0,0 +1,181 @@
+import networkx as nx
+
+from collections import defaultdict
+from ..util import log
+
+__all__ = ["Pair", "BaseScoresheet", "Scoresheet"]
+
+
+def _boundaries(start, steps, threshold, successive):
+    for i in range(start, threshold, steps):
+        begin = i if successive else start
+        end = i + steps
+        yield begin, end
+
+
+class BaseScoresheet(defaultdict):
+    """Score sheet for evaluation of IR and similar
+
+    This is a simple dict-like object, whose values are typically numeric
+    (floats). It adds the methods `sets`, `successive_sets` and `top`.
+
+    Example
+    -------
+    >>> data = {('a', 'b'): 0.8, ('b', 'c'): 0.5, ('c', 'a'): 0.2}
+    >>> sheet = Scoresheet(data)
+    >>> for s in sheet.sets(steps=2):
+    ...     print s
+
+    """
+    def __init__(self, data=None, n=100):
+        defaultdict.__init__(self, float)
+        if data:
+            self.update(self.process_data(data))
+        self.n = n
+
+    def __setitem__(self, key, val):
+        dict.__setitem__(self, key, float(val))
+
+    def process_data(self, data):
+        """Can be overridden by child classes"""
+        return data
+
+    def sets(self, n=None, threshold=None, successive=False, as_dict=False):
+        """Return sets of items on the scoresheet in decreasing order
+
+        Arguments
+        ---------
+
+        start : int
+            Where to start for first set
+
+        n : int
+            Number of items per set
+
+        threshold : int
+            Maximum number of items to return (in total)
+            Note that this is treated as a size hint, rather than a strict limit.
+
+        successive : True|False
+            if True, return successive sets; if False, return incremental sets
+
+        """
+        n = n or self.n
+        threshold = threshold or len(self)
+        log.logger.debug("Called Scoresheet.sets(): n=%d, "
+                         "threshold=%d" % (n, threshold))
+
+        # Sort first by score, then by key. This way, we always get the same
+        # ranking, even in case of ties.
+        # We use the tmp structure because it is much faster than
+        # itemgetter(1, 0).
+        tmp = ((score, key) for key, score in self.iteritems())
+        if as_dict:
+            ranked_data = [(key, score) for score,
+                           key in sorted(tmp, reverse=True)]
+        else:
+            ranked_data = [key for _, key in sorted(tmp, reverse=True)]
+        size = len(ranked_data)
+
+        for begin, end in _boundaries(0, n, threshold, successive):
+            if begin >= size:
+                raise StopIteration
+            if as_dict:
+                yield dict(ranked_data[begin:end])
+            else:
+                yield set(ranked_data[begin:end])
+
+    def successive_sets(self, n=None, threshold=None):
+        return self.sets(n, threshold, True)
+
+    def top(self, n=10):
+        top_n = self.sets(n=n).next()
+        return {k: self[k] for k in top_n}
+
+
+class Pair(object):
+    """An unsorted pair of things.
+
+    We could probably also use frozenset for this, but a Pair class opens
+    possibilities for the future, such as extensions to 'directed' pairs
+    (where the order is important) or to self-loops (where the two elements
+    are the same).
+
+    Example
+    -------
+    >>> t = ('a', 'b')
+    >>> Pair(t) == Pair(*t) == Pair('b', 'a')
+    True
+
+    """
+    def __init__(self, *args):
+        if len(args) == 1:
+            key = args[0]
+            if isinstance(key, Pair):
+                a, b = key.elements
+            elif isinstance(key, tuple) and len(key) == 2:
+                a, b = key
+            else:
+                raise TypeError("Key '%s' is not a Pair or tuple." % (key))
+            pass
+        elif len(args) == 2:
+            a, b = args
+        else:
+            raise TypeError(
+                "__init__() takes 1 or 2 arguments in addition to self")
+        # For link prediction, a and b are two different nodes
+        assert a != b, "Predicted link (%s, %s) is a self-loop!" % (a, b)
+        self.elements = (a, b) if a > b else (b, a)
+
+    def __eq__(self, other):
+        return self.elements == other.elements
+
+    def __ne__(self, other):
+        return self.elements != other.elements
+
+    def __lt__(self, other):
+        return self.elements < other.elements
+
+    def __gt__(self, other):
+        return self.elements > other.elements
+
+    def __getitem__(self, idx):
+        return self.elements[idx]
+
+    def __hash__(self):
+        return hash(self.elements)
+
+    def __str__(self):
+        a, b = self.elements
+        return "Pair(%s, %s)" % (str(a), str(b))
+
+    def __repr__(self):
+        return str(self)
+
+    def __iter__(self):
+        return iter(self.elements)
+
+
+class Scoresheet(BaseScoresheet):
+    """Scoresheet for link prediction
+
+    Scoresheet's keys are always Pairs.
+
+    """
+    def __getitem__(self, key):
+        return BaseScoresheet.__getitem__(self, Pair(key))
+
+    def __setitem__(self, key, val):
+        BaseScoresheet.__setitem__(self, Pair(key), float(val))
+
+    def __delitem__(self, key):
+        return dict.__delitem__(self, Pair(key))
+
+    def process_data(self, data, weight='weight'):
+        if isinstance(data, dict):
+            return {Pair(k): float(v) for k, v in data.iteritems()}
+        if isinstance(data, nx.Graph):
+            return {Pair(u, v): float(d[weight]) for u, v, d
+                    in data.edges(data=True)}
+        # We assume that data is some sort of iterable, like a list or tuple
+        return {Pair(k): float(v) for k, v in data}
diff --git a/linkpred/evaluation/signals.py b/linkpred/evaluation/signals.py
new file mode 100644
index 0000000..c324b4a
--- /dev/null
+++ b/linkpred/evaluation/signals.py
@@ -0,0 +1,10 @@
+import dispatch
+
+__all__ = ["new_evaluation", "datagroup_finished",\
+           "dataset_finished", "run_finished"]
+
+new_evaluation     = dispatch.Signal(providing_args=["evaluation", "dataset",\
+                                                     "predictor"])
+datagroup_finished = dispatch.Signal(providing_args=["dataset", "predictor"])
+dataset_finished   = dispatch.Signal(providing_args=["dataset"])
+run_finished       = dispatch.Signal()
diff --git a/linkpred/evaluation/static.py b/linkpred/evaluation/static.py
new file mode 100644
index 0000000..9345fc7
--- /dev/null
+++ b/linkpred/evaluation/static.py
@@ -0,0 +1,187 @@
+from ..util import log
+
+
+class StaticEvaluation(object):
+    """
+    Static evaluation of IR
+    """
+    def __init__(self, retrieved=[], relevant=[], universe=None):
+        """
+        Initialize IR evaluation.
+
+        We determine the following table:
+
+        +--------------+---------------+
+        | tp           | fp            |
+        | ret & rel    | ret & ~rel    |
+        +--------------+---------------+
+        | fn           | tn            |
+        | ~ret & rel   | ~ret & ~rel   |
+        +--------------+---------------+
+
+        Arguments
+        ---------
+        retrieved : a list or set
+            iterable of the retrieved items
+
+        relevant : a list or set
+            iterable of the relevant items
+
+        universe : a list or set, an int or None
+            If universe is an iterable, it is interpreted as the set of all items
+            in the system.
+            If universe is an int, it is interpreted as the *number* of items in
+            the system. This allows for fewer checks but is more memory-efficient.
+            If universe is None, it is supposed to be unknown. This still allows for
+            some measures, including precision and recall, to be calculated.
+
+        """
+        retrieved = set(retrieved)
+        relevant = set(relevant)
+
+        self.fp = retrieved - relevant
+        self.fn = relevant - retrieved
+        self.tp = retrieved & relevant
+        if universe is None:
+            self.tn = None
+            self.num_universe = -1
+        elif isinstance(universe, int):
+            self.tn = None
+            self.num_universe = universe
+            if len(retrieved) > self.num_universe:
+                raise ValueError("Retrieved cannot be larger than universe.")
+            if len(relevant) > self.num_universe:
+                raise ValueError("Retrieved cannot be larger than universe.")
+        else:
+            universe = set(universe)
+            if not (retrieved <= universe and relevant <= universe):
+                raise ValueError("Retrieved and relevant should be "
+                                 "subsets of universe.")
+            self.tn = universe - retrieved - relevant
+            del universe
+        self.update_counts()
+
+    def update_counts(self):
+        self.num_fp = len(self.fp)
+        self.num_fn = len(self.fn)
+        self.num_tp = len(self.tp)
+        if self.tn is not None:
+            self.num_tn = len(self.tn)
+        elif self.num_universe == -1:
+            self.num_tn = -1
+        else:
+            self.num_tn = self.num_universe - self.num_fp \
+                                            - self.num_fn - self.num_tp
+            assert self.num_tn >= 0
+
+    def update_retrieved(self, new):
+        new = set(new)
+
+        if not (new.isdisjoint(self.tp) and new.isdisjoint(self.fp)):
+            raise ValueError("One or more elements in `new` have "
+                             "already been retrieved.")
+
+        relevant_new = new & self.fn
+        nonrelevant_new = new - relevant_new
+
+        self.tp |= relevant_new
+        self.fp |= nonrelevant_new
+        if self.tn:
+            if not new <= self.fn | self.tn:
+                raise ValueError("Newly retrieved items should be a subset "
+                                 "of currently unretrieved items.")
+            self.tn -= nonrelevant_new
+        self.fn -= relevant_new
+        self.update_counts()
+
+    def precision(self):
+        try:
+            return float(self.num_tp) / (self.num_tp + self.num_fp)
+        except ZeroDivisionError:
+            log.logger.warning("Division by 0 in calculating precision: "
+                               "tp = %d, fp = %d, fn = %d, tn = %d" %
+                               (self.num_tp, self.num_fp, self.num_tn, self.num_tn))
+            return 0.0
+
+    def recall(self):
+        try:
+            return float(self.num_tp) / (self.num_tp + self.num_fn)
+        except ZeroDivisionError:
+            log.logger.warning("Division by 0 in calculating recall: "
+                               "tp = %d, fp = %d, fn = %d, tn = %d" %
+                               (self.num_tp, self.num_fp, self.num_tn, self.num_tn))
+            return 0.0
+
+    def fallout(self):
+        if self.num_tn == -1:
+            raise ValueError(
+                "Cannot determine fallout if universe is undefined")
+        try:
+            return float(self.num_fp) / (self.num_fp + self.num_tn)
+        except ZeroDivisionError:
+            log.logger.warning("Division by 0 in calculating fallout: "
+                               "tp = %d, fp = %d, fn = %d, tn = %d" %
+                               (self.num_tp, self.num_fp, self.num_tn, self.num_tn))
+            return 0.0
+
+    def miss(self):
+        if self.num_tn == -1:
+            raise ValueError("Cannot determine miss if universe is undefined")
+        try:
+            return float(self.num_fn) / (self.num_fn + self.num_tn)
+        except ZeroDivisionError:
+            log.logger.warning("Division by 0 in calculating miss: "
+                               "tp = %d, fp = %d, fn = %d, tn = %d" %
+                               (self.num_tp, self.num_fp, self.num_tn, self.num_tn))
+            return 0.0
+
+    def accuracy(self):
+        """Compute accuracy = |correct| / |universe|
+
+        Not appropriate for IR, since over 99.9% is nonrelevant. A system that
+        labels everything as nonrelevant, would still have high accuracy.
+        """
+        if self.num_tn == -1:
+            raise ValueError(
+                "Cannot determine accuracy if universe is undefined")
+        try:
+            return float(self.num_tp + self.num_tn) / \
+                (self.num_tp + self.num_fp + self.num_tn + self.num_fn)
+        except ZeroDivisionError:
+            log.logger.warning("Division by 0 in calculating accuracy: "
+                               "tp = %d, fp = %d, fn = %d, tn = %d" %
+                               (self.num_tp, self.num_fp, self.num_tn, self.num_tn))
+            return 0.0
+
+    def f_score(self, beta=1):
+        """Compute F-measure or F-score.
+
+        F is the weighted harmonic mean of recall R and precision P:
+            F = 2PR / (P + R)
+        In this case, R and P are evenly weighted. More generally:
+            F = (1 + b^2)PR / (b^2 * P + R)
+        If beta = 2, R is weighted twice as much as P.
+        If beta = 0.5, R is weighted half as much as P.
+
+        """
+        p = self.precision()
+        r = self.recall()
+        beta_squared = beta ** 2
+        try:
+            return float((1 + beta_squared) * p * r) / (beta_squared * p + r)
+        except ZeroDivisionError:
+            return 0.0
+
+    def generality(self):
+        """Compute generality = |relevant| / |universe|"""
+        if self.num_tn == -1:
+            raise ValueError(
+                "Cannot determine generality if universe is undefined")
+        try:
+            return float(self.num_tp + self.num_fn) / \
+                (self.num_tp + self.num_fp + self.num_tn + self.num_fn)
+        except ZeroDivisionError:
+            log.logger.warning("Division by 0 in calculating generality: "
+                               "tp = %d, fp = %d, fn = %d, tn = %d" %
+                               (self.num_tp, self.num_fp, self.num_tn, self.num_tn))
+            return 0.0
diff --git a/linkpred/evaluation/tests/test_comparison.py b/linkpred/evaluation/tests/test_comparison.py
new file mode 100644
index 0000000..b9966f8
--- /dev/null
+++ b/linkpred/evaluation/tests/test_comparison.py
@@ -0,0 +1,24 @@
+import networkx as nx
+from nose.tools import assert_equal
+
+from linkpred.evaluation.comparison import DataSet
+from linkpred.result import Result
+
+
+def test_dataset_init():
+    name = "test"
+    predictions = {("a", "b"): 1, ("b", "c"): 2}
+    test_network = nx.Graph()
+    test_network.add_edges_from([("a", "b"), ("b", "c"), ("c", "d"), ("c", "e")])
+    test = Result(test_network, eligible=None)
+    steps = 5
+
+    d = DataSet(name, predictions, test, steps=steps)
+    assert_equal(d.name, name)
+    assert_equal(d.predictions, predictions)
+    assert_equal(d.steps, steps)
+    assert_equal(d.num_universe, 10)
+
+    d = DataSet(name, predictions, test, exclude=set([("c", "d"), ("d", "e")]),
+                steps=steps)
+    assert_equal(d.num_universe, 8)
diff --git a/linkpred/evaluation/tests/test_scoresheet.py b/linkpred/evaluation/tests/test_scoresheet.py
new file mode 100644
index 0000000..5f1ec2a
--- /dev/null
+++ b/linkpred/evaluation/tests/test_scoresheet.py
@@ -0,0 +1,99 @@
+import networkx as nx
+from nose.tools import assert_dict_equal, assert_equal, assert_less, raises
+
+from linkpred.evaluation.scoresheet import BaseScoresheet, Pair, Scoresheet
+
+
+class TestBaseScoresheet:
+    def setup(self):
+        self.n = 3
+        self.scoresheet = BaseScoresheet(
+            zip("abcdefghijklmnopqrstuvwx", range(24)), n=self.n)
+
+    def test_sets(self):
+        for i, s in enumerate(self.scoresheet.sets(), start=1):
+            assert_equal(len(s), i * self.n)
+        for i, s in enumerate(self.scoresheet.successive_sets()):
+            assert_equal(len(s), self.n)
+
+    def test_sets_with_n(self):
+        n = 8
+        for i, s in enumerate(self.scoresheet.sets(n=n), start=1):
+            assert_equal(len(s), i * n)
+        for s in self.scoresheet.successive_sets(n=n):
+            assert_equal(len(s), n)
+
+    def test_sets_with_even_threshold(self):
+        threshold = 12
+        for i, s in enumerate(self.scoresheet.sets(threshold=threshold), start=1):
+            assert_equal(len(s), i * self.n)
+        for s in self.scoresheet.successive_sets(threshold=threshold):
+            assert_equal(len(s), self.n)
+
+    def test_with_too_large_threshold(self):
+        threshold = 25
+        for s in self.scoresheet.sets(threshold=threshold):
+            assert_less(len(s), threshold)
+        for s in self.scoresheet.successive_sets(threshold=threshold):
+            assert_equal(len(s), self.n)
+
+    def test_sets_with_uneven_threshold(self):
+        """
+        If the threshold does not nicely fit a 'boundary', only the last set
+        should be affected.
+        """
+        threshold = 10
+
+        result = list(enumerate(self.scoresheet.sets(threshold=threshold), start=1))
+        for i, s in result:
+            assert_equal(len(s), i * self.n)
+
+        result = list(self.scoresheet.successive_sets(threshold=threshold))
+        for s in result:
+            assert_equal(len(s), self.n)
+
+    def test_top(self):
+        top = self.scoresheet.top()
+        assert_dict_equal(top, dict(zip("opqrstuvwx", range(14, 24))))
+
+        top = self.scoresheet.top(2)
+        assert_dict_equal(top, dict(zip("wx", range(22, 24))))
+
+        top = self.scoresheet.top(100)
+        assert_equal(len(top), 24)
+
+
+def test_pair():
+    t = ('a', 'b')
+    pair = Pair(t)
+    assert_equal(pair, Pair(*t))
+    assert_equal(pair, Pair('b', 'a'))
+    assert_equal(str(pair), "Pair(b, a)")
+
+
+@raises(AssertionError)
+def test_pair_identical_elements():
+    Pair('a', 'a')
+
+
+def test_scoresheet():
+    sheet = Scoresheet()
+    t = ('a', 'b')
+    sheet[t] = 5
+    assert_equal(len(sheet), 1)
+    assert_equal(sheet.items(), [(Pair('a', 'b'), 5.0)])
+    assert_equal(sheet[t], 5.0)
+    del sheet[t]
+    assert_equal(len(sheet), 0)
+
+
+def test_scoresheet_process_data():
+    t = ('a', 'b')
+    d = {t: 5}
+    G = nx.Graph()
+    G.add_edge(*t, weight=5)
+    s = [(t, 5)]
+
+    for x in (d, G, s):
+        sheet = Scoresheet(x)
+        assert_equal(sheet[t], 5.0)
diff --git a/linkpred/evaluation/tests/test_static.py b/linkpred/evaluation/tests/test_static.py
new file mode 100644
index 0000000..a6b9d84
--- /dev/null
+++ b/linkpred/evaluation/tests/test_static.py
@@ -0,0 +1,145 @@
+from nose.tools import *
+
+from linkpred.evaluation.static import StaticEvaluation
+
+class TestStaticEvaluation:
+    def setup(self):
+        self.ret          = range(5)
+        self.rel          = [3, 4, 5, 6]
+        self.num_universe = 20
+        self.universe     = range(self.num_universe)
+
+    def test_init(self):
+        e = StaticEvaluation(self.ret, self.rel, self.universe)
+        assert_equal(len(e.tp), 2)
+        assert_equal(len(e.fp), 3)
+        assert_equal(len(e.tn), 13)
+        assert_equal(len(e.fn), 2)
+
+        e_no_universe = StaticEvaluation(self.ret, self.rel)
+        assert_equal(len(e.tp), len(e_no_universe.tp))
+        assert_equal(len(e.fp), len(e_no_universe.fp))
+        assert_equal(len(e.fn), len(e_no_universe.fn))
+        assert_equal(e_no_universe.tn, None)
+
+        e_num_universe = StaticEvaluation(self.ret, self.rel, self.num_universe)
+        assert_equal(len(e.tp), 2)
+        assert_equal(len(e.fp), 3)
+        assert_equal(len(e.fn), 2)
+        assert_equal(len(e.tp), e.num_tp)
+        assert_equal(len(e.fp), e.num_fp)
+        assert_equal(len(e.fn), e.num_fn)
+        assert_equal(e.num_tn, 13)
+
+    def test_update_retrieved(self):
+        e = StaticEvaluation(self.ret, self.rel, self.universe)
+        e.update_retrieved([6, 7])
+        assert_equal(len(e.tp), 3)
+        assert_equal(len(e.fp), 4)
+        assert_equal(len(e.tn), 12)
+        assert_equal(len(e.fn), 1)
+
+        assert_raises(ValueError, e.update_retrieved, [1]) # fp
+        assert_raises(ValueError, e.update_retrieved, [3]) # tp
+        assert_raises(ValueError, e.update_retrieved, ['a'])
+
+    def test_update_retrieved_num_universe(self):
+        e = StaticEvaluation(self.ret, self.rel, self.num_universe)
+        e.update_retrieved([6, 7])
+        assert_equal(len(e.tp), 3)
+        assert_equal(len(e.fp), 4)
+        assert_equal(len(e.fn), 1)
+        assert_equal(e.num_tp, 3)
+        assert_equal(e.num_fp, 4)
+        assert_equal(e.num_tn, 12)
+        assert_equal(e.num_fn, 1)
+
+        assert_raises(ValueError, e.update_retrieved, [1]) # fp
+        assert_raises(ValueError, e.update_retrieved, [3]) # tp
+
+    def test_update_retrieved_full(self):
+        e = StaticEvaluation(relevant=range(5), universe=20)
+        e.update_retrieved(range(10))
+        e.update_retrieved(range(10, 20))
+        assert_equal(e.num_tp, 5)
+        assert_equal(e.num_fp, 15)
+        assert_equal(e.num_fn, 0)
+        assert_equal(e.num_tn, 0)
+
+    @raises(ValueError)
+    def test_ret_no_universe_subset(self):
+        e = StaticEvaluation([1, 2, 'a'], [2, 3], range(10))
+
+    @raises(ValueError)
+    def test_rel_no_universe_subset(self):
+        e = StaticEvaluation([1, 2], [2, 3, 'a'], range(10))
+
+    @raises(ValueError)
+    def test_ret_larger_than_universe(self):
+        e = StaticEvaluation(range(11), [2, 3], 10)
+
+    @raises(ValueError)
+    def test_rel_larger_than_universe(self):
+        e = StaticEvaluation([1, 2], range(11), 10)
+
+    def test_measures(self):
+        e = StaticEvaluation(self.ret, self.rel, self.universe)
+        assert_equal(e.precision(), float(2) / 5)
+        assert_equal(e.recall(), float(2) / 4)
+        assert_equal(e.fallout(), float(3) / 16)
+        assert_equal(e.miss(), float(2) / 15)
+        assert_equal(e.accuracy(), float(15) / 20)
+        assert_equal(e.generality(), float(4) / 20)
+
+        e = StaticEvaluation(self.ret, self.rel)
+        assert_equal(e.precision(), float(2) / 5)
+        assert_equal(e.recall(), float(2) / 4)
+        assert_raises(ValueError, e.fallout)
+        assert_raises(ValueError, e.miss)
+        assert_raises(ValueError, e.accuracy)
+        assert_raises(ValueError, e.generality)
+
+        e = StaticEvaluation(self.ret, self.rel, self.num_universe)
+        assert_equal(e.precision(), float(2) / 5)
+        assert_equal(e.recall(), float(2) / 4)
+        assert_equal(e.fallout(), float(3) / 16)
+        assert_equal(e.miss(), float(2) / 15)
+        assert_equal(e.accuracy(), float(15) / 20)
+        assert_equal(e.generality(), float(4) / 20)
+
+    def test_measures_with_zero_universe(self):
+        e = StaticEvaluation([], [], [])
+        assert_equal(e.precision(), 0.)
+        assert_equal(e.recall(), 0.)
+        assert_equal(e.f_score(), 0.)
+        assert_equal(e.fallout(), 0.)
+        assert_equal(e.miss(), 0.)
+        assert_equal(e.accuracy(), 0.)
+        assert_equal(e.generality(), 0.)
+
+    def test_measures_with_zero_num_universe(self):
+        e = StaticEvaluation([], [], 0)
+        assert_equal(e.precision(), 0.)
+        assert_equal(e.recall(), 0.)
+        assert_equal(e.f_score(), 0.)
+        assert_equal(e.fallout(), 0.)
+        assert_equal(e.miss(), 0.)
+        assert_equal(e.accuracy(), 0.)
+        assert_equal(e.generality(), 0.)
+
+    def test_measures_with_zero_no_universe(self):
+        e = StaticEvaluation([], [])
+        assert_equal(e.precision(), 0.)
+        assert_equal(e.recall(), 0.)
+        assert_equal(e.f_score(), 0.)
+        assert_raises(ValueError, e.fallout)
+        assert_raises(ValueError, e.miss)
+        assert_raises(ValueError, e.accuracy)
+        assert_raises(ValueError, e.generality)
+
+    def test_f_score(self):
+        e = StaticEvaluation(self.ret, self.rel)
+        assert_almost_equal(e.f_score(), 4. / 9.)
+        # $F_\beta = \frac{\beta^2 + 1 |rel \cap ret|}{\beta^2 |rel| + |ret|}$
+        assert_almost_equal(e.f_score(0.5), 1.25 * 2. / 6.)
+        assert_almost_equal(e.f_score(2), 10. / 21.)
diff --git a/linkpred/network/__init__.py b/linkpred/network/__init__.py
new file mode 100644
index 0000000..91e707f
--- /dev/null
+++ b/linkpred/network/__init__.py
@@ -0,0 +1,5 @@
+from .addremove import *
+from .algorithms import *
+from .community import *
+from .misc import *
+from .pajek import *
diff --git a/linkpred/network/addremove.py b/linkpred/network/addremove.py
new file mode 100644
index 0000000..2a41e08
--- /dev/null
+++ b/linkpred/network/addremove.py
@@ -0,0 +1,38 @@
+from random import sample
+from ..util import all_pairs, log
+
+__all__ = ['add_random_edges', 'remove_random_edges',
+           'add_remove_random_edges']
+
+
+def add_random_edges(G, pct):
+    edges = G.edges()
+    m = len(edges)
+    to_add = int(m * pct)
+    log.logger.debug("Will add %d edges to %d (%f)" % (to_add, m, pct))
+
+    new_edges = set(all_pairs(G.nodes())) - set(edges)
+    G.add_edges_from(sample(new_edges, to_add), weight=1)
+
+
+def remove_random_edges(G, pct):
+    edges = G.edges()
+    m = len(edges)
+    to_remove = int(m * pct)
+
+    log.logger.debug("Will remove %d edges of %d (%f)" % (to_remove, m, pct))
+    G.remove_edges_from(sample(edges, to_remove))
+
+
+def add_remove_random_edges(G, pct_add, pct_remove):
+    edges = G.edges()
+    m = len(edges)
+    to_add = int(m * pct_add)
+    to_remove = int(m * pct_remove)
+    log.logger.debug("Will add %d (%f) edges to and remove"
+                     "%d (%f) edges from %d" %
+                     (to_add, pct_add, to_remove, pct_remove, m))
+
+    new_edges = set(all_pairs(G.nodes())) - set(edges)
+    G.remove_edges_from(sample(edges, to_remove))
+    G.add_edges_from(sample(new_edges, to_add))
diff --git a/linkpred/network/algorithms.py b/linkpred/network/algorithms.py
new file mode 100644
index 0000000..79b2844
--- /dev/null
+++ b/linkpred/network/algorithms.py
@@ -0,0 +1,75 @@
+import networkx
+import numpy
+
+from ..util import log
+
+__all__ = ["rooted_pagerank", "simrank"]
+
+
+def rooted_pagerank(G, root, alpha=0.85, beta=0, weight='weight'):
+    """Return the rooted PageRank of all nodes with respect to node `root`.
+
+    Parameters
+    ----------
+
+    G : a networkx.(Di)Graph
+        network to compute PR on
+
+    root : a node from the network
+        the node that will be the starting point of all random walks
+
+    alpha : float
+        PageRank probability that we will advance to a neighbour of the
+        current node in a random walk
+
+    beta : float or int
+        Normally, we return to the root node with probability 1 - alpha.
+        With this parameter, we can also advance to a random other node in the
+        network with probability beta. Thus, we get back to the root node with
+        probability 1 - alpha - beta. This is off (0) by default.
+
+    weight : string or None
+        The edge attribute that holds the numerical value used for
+        the edge weight.  If None then treat as unweighted.
+
+    """
+    personalization = dict.fromkeys(G, beta)
+    personalization[root] = 1 - beta
+
+    return networkx.pagerank_scipy(G, alpha, personalization, weight=weight)
+
+
+def simrank(G, nodelist=None, c=0.8, num_iterations=10, weight='weight'):
+    r"""Calculate SimRank matrix for nodes in nodelist
+
+    SimRank is defined as
+
+        sim(u, v) = \frac{c}{|\Gamma(u)| |\Gamma(v)|} \sum_{p \in \Gamma(u)}
+        \sum_{q \in \Gamma(v)} sim(p, q)
+
+    """
+    n = len(G)
+    M = raw_google_matrix(G, nodelist=nodelist, weight=weight)
+    sim = numpy.identity(n, dtype=numpy.float32)
+    for i in range(num_iterations):
+        log.logger.debug("Starting SimRank iteration %d" % i)
+        temp = c * M.T * sim * M
+        sim = temp + numpy.identity(n) - numpy.diag(numpy.diag(temp))
+    return sim
+
+
+def raw_google_matrix(G, nodelist=None, weight='weight'):
+    """Calculate the raw Google matrix (stochastic without teleportation)"""
+    M = networkx.to_numpy_matrix(G, nodelist=nodelist, dtype=numpy.float32,
+                                 weight=weight)
+    n, m = M.shape  # should be square
+    assert n == m and n > 0
+    # Find 'dangling' nodes, i.e. nodes whose row's sum = 0
+    dangling = numpy.where(M.sum(axis=1) == 0)
+    # add constant to dangling nodes' row
+    for d in dangling[0]:
+        M[d] = 1.0 / n
+    # Normalize. We now have the 'raw' Google matrix (cf. example on p. 11 of
+    # Langville & Meyer (2006)).
+    M = M / M.sum(axis=1)
+    return M
diff --git a/linkpred/network/community.py b/linkpred/network/community.py
new file mode 100644
index 0000000..125a797
--- /dev/null
+++ b/linkpred/network/community.py
@@ -0,0 +1,523 @@
+"""
+This module implements community detection.
+"""
+__all__ = ["partition_at_level", "modularity", "best_partition",
+           "generate_dendogram", "induced_graph"]
+__author__ = """Thomas Aynaud (thomas.aynaud@lip6.fr)"""
+#    Copyright (C) 2009 by
+#    Thomas Aynaud <thomas.aynaud@lip6.fr>
+#    All rights reserved.
+#    BSD license.
+
+__PASS_MAX = -1
+__MIN = 0.0000001
+
+import networkx as nx
+import sys
+import types
+import array
+
+
+def partition_at_level(dendogram, level):
+    """Return the partition of the nodes at the given level
+
+    A dendogram is a tree and each level is a partition of the graph nodes.
+    Level 0 is the first partition, which contains the smallest communities, and the best is len(dendogram) - 1.
+    The higher the level is, the bigger are the communities
+
+    Parameters
+    ----------
+    dendogram : list of dict
+       a list of partitions, ie dictionnaries where keys of the i+1 are the values of the i.
+    level : int
+       the level which belongs to [0..len(dendogram)-1]
+
+    Returns
+    -------
+    partition : dictionnary
+       A dictionary where keys are the nodes and the values are the set it belongs to
+
+    Raises
+    ------
+    KeyError
+       If the dendogram is not well formed or the level is too high
+
+    See Also
+    --------
+    best_partition which directly combines partition_at_level and generate_dendogram to obtain the partition of highest modularity
+
+    Examples
+    --------
+    >>> G=nx.erdos_renyi_graph(100, 0.01)
+    >>> dendo = generate_dendogram(G)
+    >>> for level in range(len(dendo) - 1) :
+    >>>     print "partition at level", level, "is", partition_at_level(dendo, level)
+    """
+    partition = dendogram[0].copy()
+    for index in range(1, level + 1):
+        for node, community in partition.iteritems():
+            partition[node] = dendogram[index][community]
+    return partition
+
+
+def modularity(partition, graph):
+    """Compute the modularity of a partition of a graph
+
+    Parameters
+    ----------
+    partition : dict
+       the partition of the nodes, i.e a dictionary where keys are their nodes and values the communities
+    graph : networkx.Graph
+       the networkx graph which is decomposed
+
+    Returns
+    -------
+    modularity : float
+       The modularity
+
+    Raises
+    ------
+    KeyError
+       If the partition is not a partition of all graph nodes
+    ValueError
+        If the graph has no link
+    TypeError
+        If graph is not a networkx.Graph
+
+    References
+    ----------
+    .. 1. Newman, M.E.J. & Girvan, M. Finding and evaluating community structure in networks. Physical Review E 69, 26113(2004).
+
+    Examples
+    --------
+    >>> G=nx.erdos_renyi_graph(100, 0.01)
+    >>> part = best_partition(G)
+    >>> modularity(part, G)
+    """
+    if not isinstance(graph, nx.Graph):
+        raise TypeError("Bad graph type, use only non directed graph")
+
+    inc = {}
+    deg = {}
+    links = graph.size(weight='weight')
+    if links == 0:
+        raise ValueError("A graph without link has an undefined modularity")
+
+    for node in graph:
+        com = partition[node]
+        deg[com] = deg.get(com, 0.) + graph.degree(node, weight='weight')
+        for neighbor, datas in graph[node].iteritems():
+            weight = datas.get("weight", 1)
+            if partition[neighbor] == com:
+                if neighbor == node:
+                    inc[com] = inc.get(com, 0.) + float(weight)
+                else:
+                    inc[com] = inc.get(com, 0.) + float(weight) / 2.
+
+    res = 0.
+    for com in set(partition.values()):
+        res += (
+            inc.get(com, 0.) / links) - (deg.get(com, 0.) / (2. * links)) ** 2
+    return res
+
+
+def best_partition(graph, partition=None):
+    """Compute the partition of the graph nodes which maximises the modularity
+    (or try..) using the Louvain heuristices
+
+    This is the partition of highest modularity, i.e. the highest partition of the dendogram
+    generated by the Louvain algorithm.
+
+    Parameters
+    ----------
+    graph : networkx.Graph
+       the networkx graph which is decomposed
+    partition : dict, optionnal
+       the algorithm will start using this partition of the nodes. It's a dictionary where keys are their nodes and values the communities
+
+    Returns
+    -------
+    partition : dictionnary
+       The partition, with communities numbered from 0 to number of communities
+
+    Raises
+    ------
+    NetworkXError
+       If the graph is not Eulerian.
+
+    See Also
+    --------
+    generate_dendogram to obtain all the decompositions levels
+
+    Notes
+    -----
+    Uses Louvain algorithm
+
+    References
+    ----------
+    .. 1. Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008).
+
+    Examples
+    --------
+    >>>  #Basic usage
+    >>> G=nx.erdos_renyi_graph(100, 0.01)
+    >>> part = best_partition(G)
+
+    >>> #other example to display a graph with its community :
+    >>> #better with karate_graph() as defined in networkx examples
+    >>> #erdos renyi don't have true community structure
+    >>> G = nx.erdos_renyi_graph(30, 0.05)
+    >>> #first compute the best partition
+    >>> partition = community.best_partition(G)
+    >>>  #drawing
+    >>> size = float(len(set(partition.values())))
+    >>> pos = nx.spring_layout(G)
+    >>> count = 0.
+    >>> for com in set(partition.values()) :
+    >>>     count = count + 1.
+    >>>     list_nodes = [nodes for nodes in partition.keys()
+    >>>                                 if partition[nodes] == com]
+    >>>     nx.draw_networkx_nodes(G, pos, list_nodes, node_size = 20,
+                                    node_color = str(count / size))
+    >>> nx.draw_networkx_edges(G,pos, alpha=0.5)
+    >>> plt.show()
+    """
+    dendo = generate_dendogram(graph, partition)
+    return partition_at_level(dendo, len(dendo) - 1)
+
+
+def generate_dendogram(graph, part_init=None):
+    """Find communities in the graph and return the associated dendogram
+
+    A dendogram is a tree and each level is a partition of the graph nodes.  Level 0 is the first partition, which contains the smallest communities, and the best is len(dendogram) - 1. The higher the level is, the bigger are the communities
+
+
+    Parameters
+    ----------
+    graph : networkx.Graph
+        the networkx graph which will be decomposed
+    part_init : dict, optionnal
+        the algorithm will start using this partition of the nodes. It's a dictionary where keys are their nodes and values the communities
+
+    Returns
+    -------
+    dendogram : list of dictionaries
+        a list of partitions, ie dictionnaries where keys of the i+1 are the values of the i. and where keys of the first are the nodes of graph
+
+    Raises
+    ------
+    TypeError
+        If the graph is not a networkx.Graph
+
+    See Also
+    --------
+    best_partition
+
+    Notes
+    -----
+    Uses Louvain algorithm
+
+    References
+    ----------
+    .. 1. Blondel, V.D. et al. Fast unfolding of communities in large networks. J. Stat. Mech 10008, 1-12(2008).
+
+    Examples
+    --------
+    >>> G=nx.erdos_renyi_graph(100, 0.01)
+    >>> dendo = generate_dendogram(G)
+    >>> for level in range(len(dendo) - 1) :
+    >>>     print "partition at level", level, "is", partition_at_level(dendo, level)
+    """
+    if not isinstance(graph, nx.Graph):
+        raise TypeError("Bad graph type, use only non directed graph")
+    current_graph = graph.copy()
+    status = Status()
+    status.init(current_graph, part_init)
+    mod = __modularity(status)
+    status_list = list()
+    __one_level(current_graph, status)
+    new_mod = __modularity(status)
+    partition = __renumber(status.node2com)
+    status_list.append(partition)
+    mod = new_mod
+    current_graph = induced_graph(partition, current_graph)
+    status.init(current_graph)
+
+    while True:
+        __one_level(current_graph, status)
+        new_mod = __modularity(status)
+        if new_mod - mod < __MIN:
+            break
+        partition = __renumber(status.node2com)
+        status_list.append(partition)
+        mod = new_mod
+        current_graph = induced_graph(partition, current_graph)
+        status.init(current_graph)
+    return status_list[:]
+
+
+def induced_graph(partition, graph):
+    """Produce the graph where nodes are the communities
+
+    there is a link of weight w between communities if the sum of the weights of the links between their elements is w
+
+    Parameters
+    ----------
+    partition : dict
+       a dictionary where keys are graph nodes and  values the part the node belongs to
+    graph : networkx.Graph
+        the initial graph
+
+    Returns
+    -------
+    g : networkx.Graph
+       a networkx graph where nodes are the parts
+
+    Examples
+    --------
+    >>> n = 5
+    >>> g = nx.complete_graph(2*n)
+    >>> part = {}
+    >>> for node in g.nodes() :
+    >>>     part[node] = node % 2
+    >>> ind = induced_graph(part, g)
+    >>> goal = nx.Graph()
+    >>> goal.add_weighted_edges_from([(0,1,n*n),(0,0,n*(n-1)/2), (1, 1, n*(n-1)/2)])
+    >>> nx.is_isomorphic(int, goal)
+    True
+    """
+    ret = nx.Graph()
+    ret.add_nodes_from(partition.values())
+
+    for node1, node2, datas in graph.edges_iter(data=True):
+        weight = datas.get("weight", 1)
+        com1 = partition[node1]
+        com2 = partition[node2]
+        w_prec = ret.get_edge_data(com1, com2, {"weight": 0}).get("weight", 1)
+        ret.add_edge(com1, com2, weight=w_prec + weight)
+
+    return ret
+
+
+def __renumber(dictionary):
+    """Renumber the values of the dictionary from 0 to n
+    """
+    count = 0
+    ret = dictionary.copy()
+    new_values = {}
+
+    for key in dictionary.keys():
+        value = dictionary[key]
+        new_value = new_values.get(value, -1)
+        if new_value == -1:
+            new_values[value] = count
+            new_value = count
+            count = count + 1
+        ret[key] = new_value
+
+    return ret
+
+
+def __load_binary(data):
+    """Load binary graph as used by the cpp implementation of this algorithm
+    """
+    if isinstance(data, types.StringType):
+        data = open(data, "rb")
+
+    reader = array.array("I")
+    reader.fromfile(data, 1)
+    num_nodes = reader.pop()
+    reader = array.array("I")
+    reader.fromfile(data, num_nodes)
+    cum_deg = reader.tolist()
+    num_links = reader.pop()
+    reader = array.array("I")
+    reader.fromfile(data, num_links)
+    links = reader.tolist()
+    graph = nx.Graph()
+    graph.add_nodes_from(range(num_nodes))
+    prec_deg = 0
+
+    for index in range(num_nodes):
+        last_deg = cum_deg[index]
+        neighbors = links[prec_deg:last_deg]
+        graph.add_edges_from([(index, int(neigh)) for neigh in neighbors])
+        prec_deg = last_deg
+
+    return graph
+
+
+def __one_level(graph, status):
+    """Compute one level of communities
+    """
+    modif = True
+    nb_pass_done = 0
+    cur_mod = __modularity(status)
+    new_mod = cur_mod
+
+    while modif and nb_pass_done != __PASS_MAX:
+        cur_mod = new_mod
+        modif = False
+        nb_pass_done += 1
+
+        for node in graph.nodes():
+            com_node = status.node2com[node]
+            degc_totw = status.gdegrees.get(
+                node, 0.) / (status.total_weight * 2.)
+            neigh_communities = __neighcom(node, graph, status)
+            __remove(node, com_node,
+                     neigh_communities.get(com_node, 0.), status)
+            best_com = com_node
+            best_increase = 0
+            for com, dnc in neigh_communities.iteritems():
+                incr = dnc - status.degrees.get(com, 0.) * degc_totw
+                if incr > best_increase:
+                    best_increase = incr
+                    best_com = com
+            __insert(node, best_com,
+                     neigh_communities.get(best_com, 0.), status)
+            if best_com != com_node:
+                modif = True
+        new_mod = __modularity(status)
+        if new_mod - cur_mod < __MIN:
+            break
+
+
+class Status:
+    """
+    To handle several data in one struct.
+
+    Could be replaced by named tuple, but don't want to depend on python 2.6
+    """
+    node2com = {}
+    total_weight = 0
+    internals = {}
+    degrees = {}
+    gdegrees = {}
+
+    def __init__(self):
+        self.node2com = {}
+        self.total_weight = 0
+        self.degrees = {}
+        self.gdegrees = {}
+        self.internals = {}
+        self.loops = {}
+
+    def __str__(self):
+        return ("node2com : " + str(self.node2com) + " degrees : "
+                + str(self.degrees) + " internals : " + str(self.internals)
+                + " total_weight : " + str(self.total_weight))
+
+    def copy(self):
+        """Perform a deep copy of status"""
+        new_status = Status()
+        new_status.node2com = self.node2com.copy()
+        new_status.internals = self.internals.copy()
+        new_status.degrees = self.degrees.copy()
+        new_status.gdegrees = self.gdegrees.copy()
+        new_status.total_weight = self.total_weight
+
+    def init(self, graph, part=None):
+        """Initialize the status of a graph with every node in one community"""
+        count = 0
+        self.node2com = {}
+        self.total_weight = 0
+        self.degrees = {}
+        self.gdegrees = {}
+        self.internals = {}
+        self.total_weight = graph.size(weight='weight')
+        if part is None:
+            for node in graph.nodes():
+                self.node2com[node] = count
+                deg = float(graph.degree(node, weight='weight'))
+                self.degrees[count] = deg
+                self.gdegrees[node] = deg
+                self.loops[node] = float(graph.get_edge_data(node, node,
+                                                             {"weight": 0}).get("weight", 1))
+                self.internals[count] = self.loops[node]
+                count = count + 1
+        else:
+            for node in graph.nodes():
+                com = part[node]
+                self.node2com[node] = com
+                deg = float(graph.degree(node, weigh='weight'))
+                self.degrees[com] = self.degrees.get(com, 0) + deg
+                self.gdegrees[node] = deg
+                inc = 0.
+                for neighbor, datas in graph[node].iteritems():
+                    weight = datas.get("weight", 1)
+                    if part[neighbor] == com:
+                        if neighbor == node:
+                            inc += float(weight)
+                        else:
+                            inc += float(weight) / 2.
+                self.internals[com] = self.internals.get(com, 0) + inc
+
+
+def __neighcom(node, graph, status):
+    """
+    Compute the communities in the neighborood of node in the graph given
+    with the decomposition node2com
+    """
+    weights = {}
+    for neighbor, datas in graph[node].iteritems():
+        if neighbor != node:
+            weight = datas.get("weight", 1)
+            neighborcom = status.node2com[neighbor]
+            weights[neighborcom] = weights.get(neighborcom, 0) + weight
+
+    return weights
+
+
+def __remove(node, com, weight, status):
+    """ Remove node from community com and modify status"""
+    status.degrees[com] = (status.degrees.get(com, 0.)
+                           - status.gdegrees.get(node, 0.))
+    status.internals[com] = float(status.internals.get(com, 0.) -
+                                  weight - status.loops.get(node, 0.))
+    status.node2com[node] = -1
+
+
+def __insert(node, com, weight, status):
+    """ Insert node into community and modify status"""
+    status.node2com[node] = com
+    status.degrees[com] = (status.degrees.get(com, 0.) +
+                           status.gdegrees.get(node, 0.))
+    status.internals[com] = float(status.internals.get(com, 0.) +
+                                  weight + status.loops.get(node, 0.))
+
+
+def __modularity(status):
+    """
+    Compute the modularity of the partition of the graph faslty using status precomputed
+    """
+    links = float(status.total_weight)
+    result = 0.
+    for community in set(status.node2com.values()):
+        in_degree = status.internals.get(community, 0.)
+        degree = status.degrees.get(community, 0.)
+        if links > 0:
+            result = result + in_degree / links - ((degree / (
+                2. * links)) ** 2)
+    return result
+
+
+def __main():
+    """Main function to mimic C++ version behavior"""
+    try:
+        filename = sys.argv[1]
+        graphfile = __load_binary(filename)
+        partition = best_partition(graphfile)
+        print >> sys.stderr, str(modularity(partition, graphfile))
+        for elem, part in partition.iteritems():
+            print str(elem) + " " + str(part)
+    except (IndexError, IOError):
+        print "Usage : ./community filename"
+        print "find the communities in graph filename and display the dendogram"
+        print "Parameters:"
+        print "filename is a binary file as generated by the "
+        print "convert utility distributed with the C implementation"
+
+
+if __name__ == "__main__":
+    __main()
diff --git a/linkpred/network/misc.py b/linkpred/network/misc.py
new file mode 100644
index 0000000..9994fb7
--- /dev/null
+++ b/linkpred/network/misc.py
@@ -0,0 +1,73 @@
+import networkx as nx
+
+#TODO Examine if we can use nx.single-source_shortest_path_length here
+
+
+def neighbourhood_search(G, n, k=1):
+    """Get k-neighbourhood of node n"""
+    dist = {}
+    dist[n] = 0
+    queue = [n]
+    while queue:
+        v = queue.pop(0)
+        if dist[v] == k:
+            break
+        for w in G[v]:
+            if w not in dist:
+                queue.append(w)
+                dist[w] = dist[v] + 1
+    return dist
+
+
+def neighbourhood_graph(G, n, k=1):
+    """Get k-neighbourhood subgraph of node n"""
+    dist = neighbourhood_search(G, n, k)
+    return G.subgraph(dist.keys())
+
+
+def edge_weights(G, weight='weight'):
+    """Iterator over edge weights in G"""
+    for u, nbrdict in G.adjacency_iter():
+        for edgedata in nbrdict.itervalues():
+            yield edgedata[weight]
+
+
+def from_biadjacency_matrix(A, row_items=None, col_items=None, weight='weight'):
+    import numpy
+
+    kind_to_python_type = {'f': float,
+                           'i': int,
+                           'u': int,
+                           'b': bool,
+                           'c': complex,
+                           'S': str}
+
+    dt = A.dtype
+    nrows, ncols = A.shape
+    try:
+        python_type = kind_to_python_type[dt.kind]
+    except:
+        raise TypeError("Unknown numpy data type: %s" % dt)
+
+    if row_items is None:
+        row_items = range(nrows)
+    elif len(row_items) != nrows:
+        raise ValueError("Expected %d row items, but got %d instead" %
+                         (nrows, len(row_items)))
+    if col_items is None:
+        col_items = range(nrows, nrows + ncols)
+    elif len(col_items) != ncols:
+        raise ValueError("Expected %d col items, but got %d instead" %
+                         (ncols, len(col_items)))
+
+    G = nx.Graph()
+    G.add_nodes_from(row_items)
+    G.add_nodes_from(col_items)
+    # get a list of edges
+    x, y = numpy.asarray(A).nonzero()
+
+    # handle numpy constructed data type
+    G.add_edges_from((row_items[u], col_items[v], {weight: python_type(A[u, v])})
+                     for u, v in zip(x, y))
+
+    return G
diff --git a/linkpred/network/pajek.py b/linkpred/network/pajek.py
new file mode 100644
index 0000000..b309c65
--- /dev/null
+++ b/linkpred/network/pajek.py
@@ -0,0 +1,202 @@
+# Fork of networkx.readwrite.pajek
+import csv
+import networkx
+from networkx.utils import is_string_like
+
+__all__ = ['read_pajek', 'parse_pajek', 'write_pajek']
+
+
+def write_pajek(G, path, weight='weight', clusterpath=None, clusterlabel='cluster'):
+    """Write in Pajek format to path.
+
+    Parameters
+    ----------
+    G : graph
+       A networkx graph
+    path : file or string
+       File or filename to write.
+       Filenames ending in .gz or .bz2 will be compressed.
+    weight : string
+        Edge attribute for edge weight
+    clusterpath : file or string
+        Optional path of partition file
+    clusterlabel : string
+        Label of the partition. Default: 'cluster'
+
+    Examples
+    --------
+    >>> G=nx.path_graph(4)
+    >>> nx.write_pajek(G, "test.net")
+    """
+
+    with open(path, mode="w") as fh:
+        if G.name:
+            fh.write("*network \"%s\"\n" % G.name)
+
+        # write nodes with attributes
+        fh.write("*vertices %s\n" % G.order())
+        clu = "*vertices %s\n" % G.order()
+        nodes = G.nodes()
+        # make dictionary mapping nodes to integers
+        nodenumber = dict(zip(nodes, range(1, len(nodes) + 1)))
+        clusters = {}
+        i = 0
+        for n in nodes:
+            na = G.node[n].copy()
+            x = na.pop('x', None)
+            y = na.pop('y', None)
+            # It seems better if we just avoid the node_id in the dict altogether...
+            node_id = nodenumber[n]
+            shape = na.pop('shape', None)
+            fh.write("%d \"%s\" %f %f %s " % (node_id, n,
+                     float(x), float(y), shape))
+            fh.write("%d \"%s\" " % (node_id, n))
+            for attr in (x, y):
+                if attr is not None:
+                    fh.write("%f " % float(x))
+            if shape:
+                fh.write("%s " % shape)
+            for k, v in na.iteritems():
+                fh.write("%s \"%s\" " % (k, v))
+            fh.write("\n")
+
+            if clusterpath:
+                if G.node[n][clusterlabel] not in clusters:
+                    i += 1
+                    clusters[G.node[n][clusterlabel]] = i
+                clu += "%d\n" % clusters[G.node[n][clusterlabel]]
+
+        # write edges with attributes
+        if G.is_directed():
+            fh.write("*arcs\n")
+        else:
+            fh.write("*edges\n")
+        for u, v, edgedata in G.edges(data=True):
+            d = edgedata.copy()
+            value = d.pop(weight, 1.0)  # use 1 as default edge value
+            fh.write("%d %d %f" % (nodenumber[u], nodenumber[v], float(value)))
+            for k, v in d.iteritems():
+                if is_string_like(v) and " " in v:
+                    # add quotes to any values with a blank space
+                    v = "\"%s\"" % v
+                fh.write("%s %s " % (k, v))
+            fh.write("\n")
+    fh.close()
+
+    if clusterpath:
+        with open(clusterpath, mode="w") as fh:
+            clusterpath.write(clu)
+
+
+def read_pajek(path, weight='weight'):
+    """Read graph in Pajek format from path.
+
+    Returns a MultiGraph or MultiDiGraph.
+
+    Parameters
+    ----------
+    path : file or string
+       File or filename to write.
+       Filenames ending in .gz or .bz2 will be compressed.
+    weight : string
+        Edge attribute for edge weight
+
+    Examples
+    --------
+    >>> G=nx.path_graph(4)
+    >>> nx.write_pajek(G, "test.net")
+    >>> G=nx.read_pajek("test.net")
+
+    To create a Graph instead of a MultiGraph use
+
+    >>> G1=nx.Graph(G)
+
+    """
+    with open(path) as fh:
+        G = parse_pajek(fh, weight=weight)
+    return G
+
+
+def parse_line(l):
+    # XXX This is not ideal: we instantiate a new object for each line...
+    return csv.reader(l, delimiter=' ', skipinitialspace=True).next()
+
+
+def parse_pajek(lines, weight='weight'):
+    """Parse pajek format graph from string or iterable.
+
+    Primarily used as a helper for read_pajek().
+
+    See Also
+    --------
+    read_pajek()
+
+    """
+    G = networkx.MultiDiGraph()
+    nodelabels = {}
+    nnodes = 0
+    for l in lines:
+        if not l.split():  # Ignore empty lines
+            pass
+        elif l.startswith("*"):
+            if l.lower().startswith("*network"):
+                try:
+                    G.name = l.split()[1]
+                except:
+                    pass
+            elif l.lower().startswith("*vertices"):
+                state = "vertices"
+                nnodes = int(l.split()[1])
+            elif l.lower().startswith("*edges"):
+                state = "edges"
+            elif l.lower().startswith("*arcs"):
+                state = "arcs"
+            elif l.lower().startswith("*matrix"):
+                raise NotImplementedError(
+                    "Pajek matrix format is not yet supported.")
+        elif state == "vertices":
+            splitline = parse_line([l])
+            node_id, label = splitline[0:2]
+            if label in G.adj:
+                raise networkx.NetworkXException(
+                    "Node already added: " + label)
+            G.add_node(label)
+            nodelabels[node_id] = label
+            G.node[label] = {'node_id': node_id}
+            try:
+                x = float(splitline[2])
+                y = float(splitline[3])
+                try:
+                    z = float(splitline[4])
+                    shape = splitline.pop(5)
+                    G.node[label].update({'z': z})
+                except ValueError:
+                    shape = splitline[4]
+                G.node[label].update({'x': x, 'y': y, 'shape': shape})
+                extra_attr = zip(splitline[5::2], splitline[6::2])
+            except (ValueError, IndexError):
+                extra_attr = zip(splitline[2::2], splitline[3::2])
+            G.node[label].update(extra_attr)
+        elif state == "edges" or state == "arcs":
+            if G.is_directed() and state == "edges":
+                # The Pajek format supports networks with both directed and
+                # edges. Since networkx does not, make this an undirected
+                # network as soon as we encounter one undirected edge.
+                G = networkx.MultiGraph(G)
+            splitline = l.split()
+            ui, vi = splitline[0:2]
+            u = nodelabels.get(ui, ui)
+            v = nodelabels.get(vi, vi)
+            edge_data = {}
+            try:
+                w = float(splitline[2])
+                edge_data.update({weight: w})
+                extra_attr = zip(splitline[3::2], splitline[4::2])
+            except (ValueError, IndexError):
+                extra_attr = zip(splitline[2::2], splitline[3::2])
+            edge_data.update(extra_attr)
+            G.add_edge(u, v, **edge_data)
+    if nnodes != len(G):
+        raise networkx.NetworkXException(
+            "Wrong number of nodes in Pajek stream!")
+    return G
diff --git a/linkpred/network/tests/test_misc.py b/linkpred/network/tests/test_misc.py
new file mode 100644
index 0000000..9935783
--- /dev/null
+++ b/linkpred/network/tests/test_misc.py
@@ -0,0 +1,27 @@
+import networkx as nx
+from linkpred.network.misc import from_biadjacency_matrix
+from nose.tools import *
+
+class TestMisc:
+
+    def setup(self):
+        self.G = nx.bipartite_gnmk_random_graph(40, 60, 50)
+        self.M = nx.bipartite.biadjacency_matrix(self.G, range(40))
+
+    def test_biadjacency_matrix1(self):
+        H = from_biadjacency_matrix(self.M, range(40), range(40, 100))
+        assert_equal(sorted(self.G.edges()), sorted(H.edges()))
+        assert_equal(sorted(self.G.nodes()), sorted(H.nodes()))
+
+    def test_biadjacency_matrix2(self):
+        H = from_biadjacency_matrix(self.M)
+        assert_equal(sorted(self.G.edges()), sorted(H.edges()))
+        assert_equal(sorted(self.G.nodes()), sorted(H.nodes()))
+
+    @raises(ValueError)
+    def test_biadjacency_matrix_wrong_row_items(self):
+        from_biadjacency_matrix(self.M, range(41), range(41, 101))
+
+    @raises(ValueError)
+    def test_biadjacency_matrix_wrong_col_items(self):
+        from_biadjacency_matrix(self.M, range(40), range(40, 101))
diff --git a/linkpred/predictors/__init__.py b/linkpred/predictors/__init__.py
new file mode 100644
index 0000000..1c40d8b
--- /dev/null
+++ b/linkpred/predictors/__init__.py
@@ -0,0 +1,5 @@
+from .base import *
+from .eigenvector import *
+from .misc import *
+from .neighbour import *
+from .path import *
diff --git a/linkpred/predictors/base.py b/linkpred/predictors/base.py
new file mode 100644
index 0000000..05c5715
--- /dev/null
+++ b/linkpred/predictors/base.py
@@ -0,0 +1,137 @@
+from .util import neighbourhood
+
+__all__ = ["Predictor",
+           "all_predictors"]
+
+
+class Predictor(object):
+    """
+    Predictor based on graph structure
+
+    This can also be used for bipartite networks or other networks
+    involving nodes that should not be included in the predictions.
+    To distinguish between 'eligible' and 'non-eligible' nodes, the
+    graph can set a node attribute that returns true for eligible
+    nodes and false for non-eligible ones.
+
+    For instance:
+
+    >>> B = nx.Graph()
+    >>> B.add_nodes_from([1,2,3,4], bipartite=0) # Add the node attribute "bipartite"
+    >>> B.add_nodes_from(['a','b','c'], bipartite=1)
+    >>> B.add_edges_from([(1,'a'), (1,'b'), (2,'b'), (2,'c'), (3,'c'), (4,'a')])
+    >>> p = Predictor(B, eligible='bipartite')
+    >>> p.eligible_node(1)
+    0
+    >>> sorted(p.eligible_nodes())
+    ['a', 'b', 'c']
+
+    """
+    def __init__(self, G, eligible=None, only_new=False):
+        """
+        Initialize predictor
+
+        Arguments
+        ---------
+        G : nx.Graph
+            a graph
+
+        eligible : a string or None
+            If this is a string, it is used to distinguish between eligible
+            and non-eligible nodes. We only try to predict links between
+            two eligible nodes.
+
+        only_new : True|False
+            If True, this ensures that we only predict 'new' links that are not
+            yet present in G. Otherwise, we predict all links, regardless of whether
+            or not they are in G.
+
+        """
+        self.G = G
+        self.eligible_attr = eligible
+        self.only_new = only_new
+
+        # Add a decorator to predict(), to do the necessary postprocessing for
+        # filtering out new links if only_new is False. We do this in __init__() such
+        # that child classes need not be changed.
+        def add_postprocessing(func):
+            def predict_and_postprocess(*args, **kwargs):
+                scoresheet = func(*args, **kwargs)
+                if self.only_new:
+                    for u, v in self.G.edges_iter():
+                        try:
+                            del scoresheet[(u, v)]
+                        except KeyError:
+                            pass
+                return scoresheet
+            predict_and_postprocess.__name__ = func.__name__
+            predict_and_postprocess.__doc__ = func.__doc__
+            predict_and_postprocess.__dict__.update(func.__dict__)
+            return predict_and_postprocess
+
+        self.predict = add_postprocessing(self.predict)
+
+    def __str__(self):
+        if not self.name:
+            self.name = self.__class__.__name__
+        return self.name
+
+    def __call__(self, *args, **kwargs):
+        return self.predict(*args, **kwargs)
+
+    def predict(self, *args, **kwargs):
+        raise NotImplementedError
+
+    @classmethod
+    def arguments(cls):
+        import inspect
+
+        eligible = lambda x: isinstance(x, (int, float, bool)) or x == 'weight'
+        a = inspect.getargspec(cls.predict)
+        if a.defaults:
+            args = {k: v for k, v in zip(a.args[1:], a.defaults) if eligible(v)}
+        else:
+            args = {}
+
+        return args
+
+    def eligible(self, u, v):
+        return self.eligible_node(u) and self.eligible_node(v) and u != v
+
+    def eligible_node(self, v):
+        if self.eligible_attr is None:
+            return True
+        return self.G.node[v][self.eligible_attr]
+
+    def eligible_nodes(self):
+        return [v for v in self.G if self.eligible_node(v)]
+
+    def likely_pairs(self, k=2):
+        """
+        Yield node pairs from the same neighbourhood
+
+        Arguments
+        ---------
+        k : int
+            size of the neighbourhood (e.g., if k = 2, the neighbourhood
+            consists of all nodes that are two links away)
+
+        """
+        for a in self.G.nodes_iter():
+            if not self.eligible_node(a):
+                continue
+            for b in neighbourhood(self.G, a, k):
+                if not self.eligible_node(b):
+                    continue
+                yield (a, b)
+
+
+def all_predictors():
+    """
+    Returns a list of all subclasses of `Predictor`
+    """
+    from linkpred.util import itersubclasses
+    from operator import itemgetter
+
+    predictors = sorted([(s, s.__name__) for s in itersubclasses(Predictor)], key=itemgetter(1))
+    return zip(*predictors)[0]
diff --git a/linkpred/predictors/eigenvector.py b/linkpred/predictors/eigenvector.py
new file mode 100644
index 0000000..acbcdc2
--- /dev/null
+++ b/linkpred/predictors/eigenvector.py
@@ -0,0 +1,80 @@
+from ..evaluation import Scoresheet
+from ..network import neighbourhood_graph, rooted_pagerank, simrank
+from ..util import progressbar
+from .base import Predictor
+
+
+class RootedPageRank(Predictor):
+    def predict(self, nbunch=None, alpha=0.85, beta=0, weight='weight', k=None):
+        """Predict using rooted PageRank.
+
+        Parameters
+        ----------
+
+        G : a networkx.Graph
+
+        nbunch : iterable collection of nodes
+            node(s) to calculate PR for (default: all)
+
+        alpha : float
+            PageRank probability that we will advance to a neighbour of the
+            current node in a random walk
+
+
+        beta : float or int
+            Normally, we return to the root node with probability 1 - alpha.
+            With this parameter, we can also advance to a random other node in the
+            network with probability beta. Thus, we get back to the root node with
+            probability 1 - alpha - beta. This is off (0) by default.
+
+        weight : string or None
+            The edge attribute that holds the numerical value used for
+            the edge weight.  If None then treat as unweighted.
+
+        k : int or None
+            If `k` is `None`, this predictor is applied to the entire network.
+            If `k` is an int, the predictor is applied to a subgraph consisting
+            of the k-neighbourhood of the current node.
+            Results are often very similar but much faster.
+
+        See documentation for linkpred.network.rooted_pagerank() for these
+        parameters.
+
+        """
+        res = Scoresheet()
+        if nbunch is None:
+            nbunch = self.G.nodes()
+        for u in progressbar(nbunch):
+            if not self.eligible_node(u):
+                continue
+            if k is None:
+                G = self.G
+            else:
+                # Restrict to the k-neighbourhood subgraph
+                G = neighbourhood_graph(self.G, u, k)
+            pagerank_scores = rooted_pagerank(G, u, alpha, beta, weight)
+            for v, w in pagerank_scores.iteritems():
+                if w > 0 and u != v and self.eligible_node(v):
+                    res[(u, v)] += w
+        return res
+
+
+class SimRank(Predictor):
+    def predict(self, c=0.8, num_iterations=10, weight='weight'):
+        res = Scoresheet()
+        nodelist = self.G.nodes()
+        sim = simrank(self.G, nodelist, c, num_iterations, weight)
+        (m, n) = sim.shape
+        assert m == n
+
+        for i in range(m):
+            # sim(a, b) = sim(b, a), leading to a 'mirrored' matrix.
+            # We start the column range at i + 1, such that we only look at the
+            # upper triangle in the matrix, excluding the diagonal: sim(a, a) = 1.
+            u = nodelist[i]
+            for j in range(i + 1, n):
+                if sim[i, j] > 0:
+                    v = nodelist[j]
+                    if self.eligible(u, v):
+                        res[(u, v)] = sim[i, j]
+        return res
diff --git a/linkpred/predictors/misc.py b/linkpred/predictors/misc.py
new file mode 100644
index 0000000..45abc98
--- /dev/null
+++ b/linkpred/predictors/misc.py
@@ -0,0 +1,47 @@
+from ..evaluation import Scoresheet
+from ..util import all_pairs
+from .base import Predictor
+
+__all__ = ["Community",
+           "Copy",
+           "Random"]
+
+
+class Community(Predictor):
+    def predict(self):
+        from collections import defaultdict
+        from linkpred.network import generate_dendogram, partition_at_level
+
+        res = Scoresheet()
+        dendogram = generate_dendogram(self.G)
+
+        for i in range(len(dendogram)):
+            partition = partition_at_level(dendogram, i)
+            communities = defaultdict(list)
+            weight = len(dendogram) - i  # Lower i, smaller communities
+
+            for n, com in partition.iteritems():
+                communities[com].append(n)
+            for nodes in communities.itervalues():
+                for u, v in all_pairs(nodes):
+                    if not self.eligible(u, v):
+                        continue
+                    res[(u, v)] += weight
+        return res
+
+
+class Copy(Predictor):
+    def predict(self, weight=None):
+        if weight is None:
+            return Scoresheet.fromkeys(self.G.edges_iter(), 1)
+        return Scoresheet(((u, v), d[weight]) for u, v, d in self.G.edges(data=True))
+
+
+class Random(Predictor):
+    def predict(self):
+        import random
+
+        res = Scoresheet()
+        for a, b in all_pairs(self.eligible_nodes()):
+            res[(a, b)] = random.random()
+        return res
diff --git a/linkpred/predictors/neighbour.py b/linkpred/predictors/neighbour.py
new file mode 100644
index 0000000..06d11a1
--- /dev/null
+++ b/linkpred/predictors/neighbour.py
@@ -0,0 +1,348 @@
+import math
+
+from ..evaluation import Scoresheet
+from ..util import all_pairs
+from .base import Predictor
+from .util import neighbourhood, neighbourhood_size,\
+    neighbourhood_intersection_size, neighbourhood_union_size
+
+__all__ = ["AdamicAdar",
+           "AssociationStrength",
+           "CommonNeighbours",
+           "CommonKNeighbours",
+           "Cosine",
+           "DegreeProduct",
+           "Euclidean",
+           "HirschCore",
+           "Jaccard",
+           "K50",
+           "Manhattan",
+           "Minkowski",
+           "MaxOverlap",
+           "MinOverlap",
+           "NMeasure",
+           "Pearson",
+           "ResourceAllocation"]
+
+
+class AdamicAdar(Predictor):
+    def predict(self, weight=None):
+        res = Scoresheet()
+        for a, b in self.likely_pairs():
+            intersection = set(neighbourhood(self.G, a)) & \
+                set(neighbourhood(self.G, b))
+            w = 0
+            for c in intersection:
+                if weight is not None:
+                    numerator = self.G[a][c][weight] * self.G[b][c][weight]
+                else:
+                    numerator = 1.0
+                w += numerator / \
+                    math.log(neighbourhood_size(self.G, c, weight))
+            if w > 0:
+                res[(a, b)] = w
+        return res
+
+
+class AssociationStrength(Predictor):
+    def predict(self, weight=None):
+        res = Scoresheet()
+        for a, b in self.likely_pairs():
+            w = neighbourhood_intersection_size(self.G, a, b, weight) / \
+                float(neighbourhood_size(self.G, a, weight) *
+                      neighbourhood_size(self.G, b, weight))
+            if w > 0:
+                res[(a, b)] = w
+        return res
+
+
+class CommonNeighbours(Predictor):
+    def predict(self, alpha=1.0, weight=None):
+        r"""Predict using common neighbours
+
+        This is loosely based on Opsahl et al. (2010):
+
+            k(u, v) = |N(u) \cap N(v)|
+            s(u, v) = \sum_{i=1}^n x_i \cdot y_i
+            w(u, v) = k(u, v)^{1 - \alpha} \cdot s(u, v)^{\alpha}
+
+        """
+        res = Scoresheet()
+        for a, b in self.likely_pairs():
+            if weight is None or alpha == 0.0:
+                w = neighbourhood_intersection_size(self.G, a, b, weight=None)
+            elif alpha == 1.0:
+                w = neighbourhood_intersection_size(
+                    self.G, a, b, weight=weight)
+            else:
+                k = neighbourhood_intersection_size(self.G, a, b, weight=None)
+                s = neighbourhood_intersection_size(
+                    self.G, a, b, weight=weight)
+                w = (k ** (1.0 - alpha)) * (s ** alpha)
+            if w > 0:
+                res[(a, b)] = w
+        return res
+
+
+class CommonKNeighbours(Predictor):
+    def predict(self, beta=0.01, max_k=3, weight=None):
+        r"""A generalized version of common neighbours, somewhat inspired by Katz
+
+        $w(u, v) = \sum_{k=1}^\infty \beta^k |\self.Gamma_k(u) \cap \self.Gamma_k(v)|$
+
+        """
+        res = Scoresheet()
+        #for a, b in all_pairs(self.G.nodes()):
+        for a, b in self.likely_pairs():
+            w = 0
+            for k in range(1, max_k + 1):
+                w += (beta ** k) *\
+                    neighbourhood_intersection_size(self.G, a, b, weight, k)
+            if w > 0:
+                res[(a, b)] = w
+        return res
+
+
+class Cosine(Predictor):
+    def predict(self, weight=None):
+        res = Scoresheet()
+        for a, b in self.likely_pairs():
+            w = neighbourhood_intersection_size(self.G, a, b, weight) / \
+                math.sqrt(neighbourhood_size(self.G, a, weight) *
+                          neighbourhood_size(self.G, b, weight))
+            if w > 0:
+                res[(a, b)] = w
+        return res
+
+
+class DegreeProduct(Predictor):
+    def predict(self, weight=None, minimum=1):
+        res = Scoresheet()
+        for a, b in all_pairs(self.eligible_nodes()):
+            w = neighbourhood_size(self.G, a, weight) *\
+                neighbourhood_size(self.G, b, weight)
+            if w >= minimum:
+                res[(a, b)] = w
+        return res
+
+
+class Minkowski(Predictor):
+    r"""
+    Predictor based on Minkowski distance
+
+    The distance `d` is defined as:
+
+    .. math::
+
+       d = ( \sum |x_i - x_j|^r )^{1/r}
+
+    and hence the likelihood score `w` is:
+
+    .. math::
+
+       w = \frac{1}{d}
+
+    """
+    def predict(self, r=1, weight='weight'):
+
+        def size(G, u, v, weight=None):
+            if weight is None and G.has_edge(u, v):
+                return 1
+            try:
+                return G[u][v][weight]
+            except KeyError:
+                return 0
+
+        res = Scoresheet()
+        for a, b in self.likely_pairs():
+            nbr_a = set(neighbourhood(self.G, a))
+            nbr_b = set(neighbourhood(self.G, b))
+            d = sum(abs(size(self.G, a, v, weight) - size(self.G, b, v, weight)) ** r
+                    for v in nbr_a & nbr_b)
+            d += sum(size(self.G, a, v, weight) ** r for v in nbr_a - nbr_b)
+            d += sum(size(self.G, b, v, weight) ** r for v in nbr_b - nbr_a)
+            d = d ** 1.0 / r
+            if d > 0:
+                # d is a distance measure, so we take the inverse
+                res[(a, b)] = 1.0 / d
+        return res
+
+
+class Euclidean(Minkowski):
+    def predict(self, weight='weight'):
+        return Minkowski.predict(self, r=2, weight=weight)
+
+
+class HirschCore(Predictor):
+    """
+    Predictor based on overlap between the h-cores of nodes
+
+    The h-index of a node n is the largest number h, such that each node has at
+    least h neighbours.
+    The h-core of a node n is then defined as the set of neighbours of n with h
+    or more neighbours.
+
+    References
+    ----------
+    Schubert, A. (2010). A reference-based Hirschian similarity measure for
+    journals. Scientometrics 84(1), 133-147.
+
+    Schubert, A. & Soos, S. (2010). Mapping of science journals based on
+    h-similarity. Scientometrics 83(2), 589-600.
+
+    """
+    def predict(self):
+
+        def h_core_set(G, nodes):
+            from hirsch import h_index
+
+            degree_dict = {n: len(G[n]) for n in nodes}
+            h_degree = h_index(degree_dict.values())
+
+            return set(k for k, v in degree_dict.iteritems() if v >= h_degree)
+
+        res = Scoresheet()
+        for a, b in self.likely_pairs():
+            a_neighbours = set(neighbourhood(self.G, a))
+            b_neighbours = set(neighbourhood(self.G, b))
+            if a_neighbours & b_neighbours:
+                a_core = h_core_set(self.G, a_neighbours)
+                b_core = h_core_set(self.G, b_neighbours)
+                if a_core & b_core:
+                    # Jaccard index of Hirsch cores or peripheries
+                    res[(a, b)] = len(
+                        a_core & b_core) / float(len(a_core | b_core))
+        return res
+
+
+class Jaccard(Predictor):
+    def predict(self, weight=None):
+        """Predict by Jaccard index, based on neighbours of a and b
+
+        Jaccard index J = |A \cap B| / |A \cup B|
+
+        """
+        res = Scoresheet()
+        for a, b in self.likely_pairs():
+            # Best performance: weighted numerator, unweighted denominator.
+            numerator = neighbourhood_intersection_size(self.G, a, b, weight)
+            denominator = neighbourhood_union_size(self.G, a, b, weight)
+            w = numerator / float(denominator)
+            if w > 0:
+                res[(a, b)] = w
+        return res
+
+
+class K50(Predictor):
+    def predict(self, weight=None):
+        """K50, proposed by Boyack & Klavans (2006)"""
+        res = Scoresheet()
+        nbr_all = sum(neighbourhood_size(self.G, x, weight)
+                      for x in self.G.nodes_iter())
+        for a, b in self.likely_pairs():
+            intersection = neighbourhood_intersection_size(
+                self.G, a, b, weight)
+            nbr_a = neighbourhood_size(self.G, a, weight)
+            nbr_b = neighbourhood_size(self.G, b, weight)
+            den = nbr_a * nbr_b
+            expected = min(
+                den / float(nbr_all - nbr_a), den / float(nbr_all - nbr_b))
+            w = (intersection - expected) / math.sqrt(den)
+            if w > 0:
+                res[(a, b)] = w
+        return res
+
+
+class Manhattan(Minkowski):
+    def predict(self, weight='weight'):
+        return Minkowski.predict(self, r=1, weight=weight)
+
+
+class NMeasure(Predictor):
+    def predict(self, weight=None):
+        r"""Predict by N measure (Egghe, 2009)
+
+        $N(A, B) = \srqt{2} \frac{|A \cap B|}{\sqrt{|A|^2 + |B|^2}}$
+
+        """
+        res = Scoresheet()
+        for a, b in self.likely_pairs():
+            w = math.sqrt(2) *\
+                neighbourhood_intersection_size(self.G, a, b, weight) / \
+                math.sqrt(neighbourhood_size(self.G, a, weight) ** 2 +
+                          neighbourhood_size(self.G, b, weight) ** 2)
+            if w > 0:
+                res[(a, b)] = w
+        return res
+
+
+class Overlap(Predictor):
+    def predict(self, function, weight=None):
+        res = Scoresheet()
+        for a, b in self.likely_pairs():
+            # Best performance: weighted numerator, unweighted denominator.
+            numerator = neighbourhood_intersection_size(self.G, a, b, weight)
+            denominator = function(neighbourhood_size(self.G, a, weight),
+                                   neighbourhood_size(self.G, b, weight))
+            w = numerator / float(denominator)
+            if w > 0:
+                res[(a, b)] = w
+        return res
+
+
+class MaxOverlap(Overlap):
+    def predict(self, weight=None):
+        return Overlap.predict(self, max, weight)
+
+
+class MinOverlap(Overlap):
+    def predict(self, weight=None):
+        return Overlap.predict(self, min, weight)
+
+
+class Pearson(Predictor):
+    def predict(self, weight=None):
+        res = Scoresheet()
+        # 'Full' Pearson looks at all possible pairs. Since those are likely
+        # of little value for link prediction, we restrict ourselves to pairs
+        # with at least one common neighbour.
+        for a, b in self.likely_pairs():
+            n = len(self.G) - 1
+            a_l2norm = neighbourhood_size(self.G, a, weight)
+            b_l2norm = neighbourhood_size(self.G, b, weight)
+            a_l1norm = neighbourhood_size(self.G, a, weight, pow=1)
+            b_l1norm = neighbourhood_size(self.G, b, weight, pow=1)
+            intersect = neighbourhood_intersection_size(self.G, a, b, weight)
+
+            numerator = (n * intersect) - (a_l1norm * b_l1norm)
+            denominator = math.sqrt(n * a_l2norm - a_l1norm ** 2) * \
+                math.sqrt(n * b_l2norm - b_l1norm ** 2)
+
+            w = numerator / denominator
+            if w > 0:
+                res[(a, b)] = w
+        return res
+
+
+class ResourceAllocation(Predictor):
+    def predict(self, weight=None):
+        """Predict with Resource Allocation index
+
+        See T. Zhou, L. Lu, YC. Zhang (2009). Eur. Phys. J. B, 71, 623
+
+        """
+        res = Scoresheet()
+        for a, b in self.likely_pairs():
+            intersection = set(neighbourhood(self.G, a)) & \
+                set(neighbourhood(self.G, b))
+            w = 0
+            for c in intersection:
+                if weight is not None:
+                    numerator = float(self.G[a][c][weight] *
+                                      self.G[b][c][weight])
+                else:
+                    numerator = 1.0
+                w += numerator / neighbourhood_size(self.G, c, weight)
+            if w > 0:
+                res[(a, b)] = w
+        return res
diff --git a/linkpred/predictors/path.py b/linkpred/predictors/path.py
new file mode 100644
index 0000000..942751d
--- /dev/null
+++ b/linkpred/predictors/path.py
@@ -0,0 +1,125 @@
+import networkx as nx
+
+from ..evaluation import Scoresheet
+from .base import Predictor
+
+__all__ = ["GraphDistance",
+           "WeightedGraphDistance",
+           "Katz"]
+
+
+class GraphDistance(Predictor):
+    def predict(self):
+        res = Scoresheet()
+        shortest_paths = nx.shortest_path_length(self.G)
+        for a, reachables in shortest_paths.iteritems():
+            if not self.eligible_node(a):
+                continue
+            for b, length in reachables.iteritems():
+                if a == b or not self.eligible_node(b):
+                    continue
+                if length > 0:  # same node
+                    w = 1.0 / length
+                    res[(a, b)] = w
+        return res
+
+
+class WeightedGraphDistance(Predictor):
+    def predict(self, weight='weight', alpha=1):
+        r"""Predict by weighted graph distance
+
+        This is based on the dissimilarity measures of Egghe & Rousseau (2003):
+
+        $d(i, j) = \min(\sum 1/w_k)$
+
+        The parameter alpha was introduced by Opsahl et al. (2010):
+
+        $d_\alpha(i, j) = \min(\sum 1 / w_k^\alpha)$
+
+        If alpha = 0, this reduces to unweighted graph distance, i.e. only keep
+        track of number of intermediate nodes and not of edge weights. If alpha = 1,
+        we only keep track of edge weights and not of the number of intermediate
+        nodes. (In practice, setting alpha equal to around 0.1 seems to yield the
+        best results.)
+
+        """
+        res = Scoresheet()
+        inverted = nx.Graph()
+        inverted.add_weighted_edges_from((u, v, 1.0 / d[weight] ** alpha)
+                                         for u, v, d in self.G.edges_iter(data=True))
+        dist = nx.shortest_path_length(inverted, weight=weight)
+        for a, others in dist.iteritems():
+            if not self.eligible_node(a):
+                continue
+            for b, length in others.iteritems():
+                if a == b or not self.eligible_node(b):
+                    continue
+                if a != b:
+                    w = 1.0 / length
+                    res[(a, b)] = w
+        return res
+
+
+class Katz(Predictor):
+    def predict(self, beta=0.001, max_power=5, weight='weight', all_walks=True,
+                dtype=None):
+        """Predict by Katz (1953) measure
+
+        Let $A$ be an adjacency matrix for the directed network $self.G$.
+        We assume that $self.G$ is unweighted, hence $A$ only contains values 1 and 0.
+        Then, each element $a_{ij}^{(k)}$ of $A^k$ (the $k$-th power of $A$) has a
+        value equal to the number of walks with length $k$ from $i$ to $j$.
+
+        The probability of a link rapidly decreases as the walks grow longer.
+        Katz therefore introduces an extra parameter (here beta) to weigh
+        longer walks less.
+
+        Parameters
+        ----------
+        beta : a float
+            the value of beta in the formula of the Katz equation
+
+        max_power : an int
+            the maximum number of powers to take into account
+
+        weight : string or None
+            The edge attribute that holds the numerical value used for
+            the edge weight.  If None then treat as unweighted.
+
+        all_walks : True|False
+            can walks contain the same node/link more than once?
+
+        dtype : a data type
+            data type of edge weights (default numpy.int32)
+
+        """
+        from linkpred.util import progressbar
+        from itertools import izip
+
+        if dtype is None:
+            import numpy
+            dtype = numpy.int32
+
+        nodelist = self.G.nodes()
+        adj = nx.to_scipy_sparse_matrix(
+            self.G, dtype=dtype, weight=weight)
+        res = Scoresheet()
+
+        if not all_walks:
+            from scipy.sparse import triu
+            # Make triangular upper matrix
+            adj = triu(adj)
+
+        for k in progressbar(range(1, max_power + 1), "Computing matrix powers: "):
+            # The below method is found to be fastest for iterating through a
+            # sparse matrix, see
+            # http://stackoverflow.com/questions/4319014/iterating-through-a-scipy-sparse-vector-or-matrix
+            matrix = (adj ** k).tocoo()
+            for i, j, d in izip(matrix.row, matrix.col, matrix.data):
+                if i == j:
+                    continue
+                u, v = nodelist[i], nodelist[j]
+                if self.eligible(u, v):
+                    w = d * (beta ** k)
+                    res[(u, v)] += w
+        return res
diff --git a/linkpred/predictors/tests/__init__.py b/linkpred/predictors/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/linkpred/predictors/tests/test_base.py b/linkpred/predictors/tests/test_base.py
new file mode 100644
index 0000000..a646c8a
--- /dev/null
+++ b/linkpred/predictors/tests/test_base.py
@@ -0,0 +1,38 @@
+from nose.tools import assert_dict_equal, assert_equal, assert_not_in
+import networkx as nx
+
+from linkpred.evaluation import Pair
+from linkpred.predictors.neighbour import CommonNeighbours
+from linkpred.predictors.misc import Copy
+
+
+def test_bipartite_common_neighbour():
+    B = nx.Graph()
+    B.add_nodes_from(range(1, 5), eligible=0)
+    B.add_nodes_from('abc', eligible=1)
+    B.add_edges_from([(1, 'a'), (1, 'b'), (2, 'a'), (2, 'b'), (2, 'c'),
+                      (3, 'c'), (4, 'a')])
+
+    expected = {Pair('a', 'b'): 2, Pair('b', 'c'): 1, Pair('a', 'c'): 1}
+    assert_dict_equal(CommonNeighbours(B, eligible='eligible').predict(), expected)
+
+
+def test_bipartite_common_neighbours_equivalent_projection():
+    B     = nx.bipartite_random_graph(30, 50, 0.1)
+    nodes = [v for v in B if B.node[v]['bipartite']]
+    G     = nx.bipartite.weighted_projected_graph(B, nodes)
+
+    expected = CommonNeighbours(B, eligible='bipartite')()
+    assert_dict_equal(Copy(G).predict(weight='weight'), expected)
+
+
+def test_postprocessing():
+    G = nx.karate_club_graph()
+    prediction_all_links = CommonNeighbours(G)()
+    prediction_only_new_links = CommonNeighbours(G, only_new=True)()
+
+    for link, score in prediction_all_links.iteritems():
+        if G.has_edge(*link):
+            assert_not_in(link, prediction_only_new_links)
+        else:
+            assert_equal(score, prediction_only_new_links[link])
diff --git a/linkpred/predictors/tests/test_eigenvector.py b/linkpred/predictors/tests/test_eigenvector.py
new file mode 100644
index 0000000..bf468d9
--- /dev/null
+++ b/linkpred/predictors/tests/test_eigenvector.py
@@ -0,0 +1,30 @@
+from nose.tools import *
+import networkx as nx
+
+from linkpred.predictors.eigenvector import *
+
+class TestEigenvector:
+
+    def test_rooted_pagerank(self):
+        pass
+
+    def test_rooted_pagerank_weighted(self):
+        pass
+
+    def test_rooted_pagerank_alpha(self):
+        pass
+
+    def test_rooted_pagerank_beta(self):
+        pass
+
+    def test_rooted_pagerank_k(self):
+        pass
+
+    def test_simrank(self):
+        pass
+
+    def test_simrank_c(self):
+        pass
+
+    def test_simrank_weighted(self):
+        pass
diff --git a/linkpred/predictors/tests/test_misc.py b/linkpred/predictors/tests/test_misc.py
new file mode 100644
index 0000000..a1e33d7
--- /dev/null
+++ b/linkpred/predictors/tests/test_misc.py
@@ -0,0 +1,38 @@
+from nose.tools import *
+import networkx as nx
+
+from linkpred.evaluation import Pair
+from linkpred.predictors.misc import *
+
+class TestCopy:
+    def setup(self):
+        self.G = nx.Graph()
+        self.G.add_weighted_edges_from([(0, 1, 3.0),(1, 2, 7.5)])
+
+    def test_copy_unweighted(self):
+        expected = {Pair(0, 1): 1, Pair(1, 2): 1}
+        assert_dict_equal(Copy(self.G).predict(), expected)
+
+    def test_copy_weighted(self):
+        expected = {Pair(0, 1): 3.0, Pair(1, 2): 7.5}
+        assert_dict_equal(Copy(self.G).predict(weight="weight"), expected)
+
+    def test_community(self):
+        pass
+
+    def test_random(self):
+        G = nx.Graph()
+        G.add_nodes_from(range(10), eligible=True)
+        prediction = Random(G).predict()
+        assert_equal(len(prediction), 45)
+
+    def test_random_exclude_noneligible(self):
+        G = nx.Graph()
+        G.add_nodes_from(range(5), eligible=True)
+        G.add_nodes_from(range(5, 10), eligible=False)
+        prediction = Random(G, eligible='eligible').predict()
+        assert_equal(len(prediction), 10)
+        for i in range(5):
+            for j in range(5):
+                if i != j:
+                    assert Pair(i, j) in prediction
diff --git a/linkpred/predictors/tests/test_neighbour.py b/linkpred/predictors/tests/test_neighbour.py
new file mode 100644
index 0000000..733e070
--- /dev/null
+++ b/linkpred/predictors/tests/test_neighbour.py
@@ -0,0 +1,793 @@
+from nose.tools import *
+import networkx as nx
+
+from linkpred.evaluation import Pair
+from linkpred.predictors.neighbour import *
+
+
+class TestFlorentineFamily:
+
+    def setup(self):
+        self.G = nx.florentine_families_graph()
+        nx.set_node_attributes(self.G, 'eligible', dict.fromkeys(self.G, True))
+
+    def test_adamic_adar(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.55811062655124721,
+            Pair('Medici', 'Guadagni'): 1.8204784532536746,
+            Pair('Peruzzi', 'Bischeri'): 0.72134752044448169,
+            Pair('Lamberteschi', 'Bischeri'): 0.72134752044448169,
+            Pair('Salviati', 'Albizzi'): 0.55811062655124721,
+            Pair('Lamberteschi', 'Albizzi'): 0.72134752044448169,
+            Pair('Peruzzi', 'Guadagni'): 0.91023922662683732,
+            Pair('Strozzi', 'Medici'): 0.91023922662683732,
+            Pair('Pazzi', 'Medici'): 1.4426950408889634,
+            Pair('Ridolfi', 'Albizzi'): 0.55811062655124721,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.72134752044448169,
+            Pair('Tornabuoni', 'Salviati'): 0.55811062655124721,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.55811062655124721,
+            Pair('Strozzi', 'Guadagni'): 0.91023922662683732,
+            Pair('Salviati', 'Acciaiuoli'): 0.55811062655124721,
+            Pair('Guadagni', 'Ginori'): 0.91023922662683732,
+            Pair('Strozzi', 'Barbadori'): 0.91023922662683732,
+            Pair('Peruzzi', 'Barbadori'): 0.91023922662683732,
+            Pair('Tornabuoni', 'Ridolfi'): 0.55811062655124721,
+            Pair('Albizzi', 'Acciaiuoli'): 0.55811062655124721,
+            Pair('Tornabuoni', 'Medici'): 0.91023922662683732,
+            Pair('Ridolfi', 'Medici'): 0.91023922662683732,
+            Pair('Peruzzi', 'Castellani'): 0.72134752044448169,
+            Pair('Tornabuoni', 'Strozzi'): 0.91023922662683732,
+            Pair('Tornabuoni', 'Bischeri'): 0.72134752044448169,
+            Pair('Barbadori', 'Albizzi'): 0.55811062655124721,
+            Pair('Castellani', 'Bischeri'): 1.631586747071319,
+            Pair('Ridolfi', 'Guadagni'): 0.91023922662683732,
+            Pair('Ridolfi', 'Bischeri'): 0.72134752044448169,
+            Pair('Ridolfi', 'Peruzzi'): 0.72134752044448169,
+            Pair('Medici', 'Castellani'): 1.4426950408889634,
+            Pair('Bischeri', 'Albizzi'): 0.72134752044448169,
+            Pair('Medici', 'Ginori'): 0.91023922662683732,
+            Pair('Salviati', 'Ridolfi'): 0.55811062655124721,
+            Pair('Tornabuoni', 'Barbadori'): 0.55811062655124721,
+            Pair('Strozzi', 'Castellani'): 0.91023922662683732,
+            Pair('Salviati', 'Barbadori'): 0.55811062655124721,
+            Pair('Strozzi', 'Peruzzi'): 1.8204784532536746,
+            Pair('Strozzi', 'Bischeri'): 0.91023922662683732,
+            Pair('Tornabuoni', 'Albizzi'): 1.2794581469957289,
+            Pair('Barbadori', 'Acciaiuoli'): 0.55811062655124721,
+            Pair('Ridolfi', 'Castellani'): 0.72134752044448169,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.55811062655124721}
+        assert_dict_equal(AdamicAdar(self.G).predict(), answer)
+
+    def test_adamic_adar_weighted(self):
+        pass
+
+    def test_association_strength(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.16666666666666666,
+            Pair('Medici', 'Guadagni'): 0.083333333333333329,
+            Pair('Peruzzi', 'Bischeri'): 0.1111111111111111,
+            Pair('Lamberteschi', 'Bischeri'): 0.33333333333333331,
+            Pair('Salviati', 'Albizzi'): 0.16666666666666666,
+            Pair('Lamberteschi', 'Albizzi'): 0.33333333333333331,
+            Pair('Peruzzi', 'Guadagni'): 0.083333333333333329,
+            Pair('Strozzi', 'Medici'): 0.041666666666666664,
+            Pair('Pazzi', 'Medici'): 0.16666666666666666,
+            Pair('Ridolfi', 'Albizzi'): 0.1111111111111111,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Salviati'): 0.16666666666666666,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.33333333333333331,
+            Pair('Strozzi', 'Guadagni'): 0.0625,
+            Pair('Salviati', 'Acciaiuoli'): 0.5,
+            Pair('Guadagni', 'Ginori'): 0.25,
+            Pair('Strozzi', 'Barbadori'): 0.125,
+            Pair('Peruzzi', 'Barbadori'): 0.16666666666666666,
+            Pair('Tornabuoni', 'Ridolfi'): 0.1111111111111111,
+            Pair('Albizzi', 'Acciaiuoli'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Medici'): 0.055555555555555552,
+            Pair('Ridolfi', 'Medici'): 0.055555555555555552,
+            Pair('Peruzzi', 'Castellani'): 0.1111111111111111,
+            Pair('Tornabuoni', 'Strozzi'): 0.083333333333333329,
+            Pair('Tornabuoni', 'Bischeri'): 0.1111111111111111,
+            Pair('Barbadori', 'Albizzi'): 0.16666666666666666,
+            Pair('Castellani', 'Bischeri'): 0.22222222222222221,
+            Pair('Ridolfi', 'Guadagni'): 0.083333333333333329,
+            Pair('Ridolfi', 'Bischeri'): 0.1111111111111111,
+            Pair('Ridolfi', 'Peruzzi'): 0.1111111111111111,
+            Pair('Medici', 'Castellani'): 0.055555555555555552,
+            Pair('Bischeri', 'Albizzi'): 0.1111111111111111,
+            Pair('Medici', 'Ginori'): 0.16666666666666666,
+            Pair('Salviati', 'Ridolfi'): 0.16666666666666666,
+            Pair('Tornabuoni', 'Barbadori'): 0.16666666666666666,
+            Pair('Strozzi', 'Castellani'): 0.083333333333333329,
+            Pair('Salviati', 'Barbadori'): 0.25,
+            Pair('Strozzi', 'Peruzzi'): 0.16666666666666666,
+            Pair('Strozzi', 'Bischeri'): 0.083333333333333329,
+            Pair('Tornabuoni', 'Albizzi'): 0.22222222222222221,
+            Pair('Barbadori', 'Acciaiuoli'): 0.5,
+            Pair('Ridolfi', 'Castellani'): 0.1111111111111111,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.33333333333333331}
+        assert_dict_equal(AssociationStrength(self.G).predict(), answer)
+
+    def test_association_strength_weighted(self):
+        pass
+
+    def test_chi_square(self):
+        pass
+
+    def test_common_neighbours(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 1.0,
+            Pair('Medici', 'Guadagni'): 2.0,
+            Pair('Peruzzi', 'Bischeri'): 1.0,
+            Pair('Lamberteschi', 'Bischeri'): 1.0,
+            Pair('Salviati', 'Albizzi'): 1.0,
+            Pair('Lamberteschi', 'Albizzi'): 1.0,
+            Pair('Peruzzi', 'Guadagni'): 1.0,
+            Pair('Strozzi', 'Medici'): 1.0,
+            Pair('Pazzi', 'Medici'): 1.0,
+            Pair('Ridolfi', 'Albizzi'): 1.0,
+            Pair('Tornabuoni', 'Lamberteschi'): 1.0,
+            Pair('Tornabuoni', 'Salviati'): 1.0,
+            Pair('Ridolfi', 'Acciaiuoli'): 1.0,
+            Pair('Strozzi', 'Guadagni'): 1.0,
+            Pair('Salviati', 'Acciaiuoli'): 1.0,
+            Pair('Guadagni', 'Ginori'): 1.0,
+            Pair('Strozzi', 'Barbadori'): 1.0,
+            Pair('Peruzzi', 'Barbadori'): 1.0,
+            Pair('Tornabuoni', 'Ridolfi'): 1.0,
+            Pair('Albizzi', 'Acciaiuoli'): 1.0,
+            Pair('Tornabuoni', 'Medici'): 1.0,
+            Pair('Ridolfi', 'Medici'): 1.0,
+            Pair('Peruzzi', 'Castellani'): 1.0,
+            Pair('Tornabuoni', 'Strozzi'): 1.0,
+            Pair('Tornabuoni', 'Bischeri'): 1.0,
+            Pair('Barbadori', 'Albizzi'): 1.0,
+            Pair('Castellani', 'Bischeri'): 2.0,
+            Pair('Ridolfi', 'Guadagni'): 1.0,
+            Pair('Ridolfi', 'Bischeri'): 1.0,
+            Pair('Ridolfi', 'Peruzzi'): 1.0,
+            Pair('Medici', 'Castellani'): 1.0,
+            Pair('Bischeri', 'Albizzi'): 1.0,
+            Pair('Medici', 'Ginori'): 1.0,
+            Pair('Salviati', 'Ridolfi'): 1.0,
+            Pair('Tornabuoni', 'Barbadori'): 1.0,
+            Pair('Strozzi', 'Castellani'): 1.0,
+            Pair('Salviati', 'Barbadori'): 1.0,
+            Pair('Strozzi', 'Peruzzi'): 2.0,
+            Pair('Strozzi', 'Bischeri'): 1.0,
+            Pair('Tornabuoni', 'Albizzi'): 2.0,
+            Pair('Barbadori', 'Acciaiuoli'): 1.0,
+            Pair('Ridolfi', 'Castellani'): 1.0,
+            Pair('Tornabuoni', 'Acciaiuoli'): 1.0}
+        assert_dict_equal(CommonNeighbours(self.G).predict(), answer)
+
+    def test_common_neighbours_alpha(self):
+        pass
+
+    def test_common_k_neighbours(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.010812,
+            Pair('Medici', 'Guadagni'): 0.020512000000000002,
+            Pair('Peruzzi', 'Bischeri'): 0.010409,
+            Pair('Lamberteschi', 'Bischeri'): 0.010307999999999999,
+            Pair('Salviati', 'Albizzi'): 0.01051,
+            Pair('Lamberteschi', 'Guadagni'): 0.00030800000000000001,
+            Pair('Lamberteschi', 'Albizzi'): 0.010307999999999999,
+            Pair('Peruzzi', 'Guadagni'): 0.010309,
+            Pair('Salviati', 'Pazzi'): 0.000106,
+            Pair('Strozzi', 'Medici'): 0.010511000000000001,
+            Pair('Pazzi', 'Medici'): 0.010106,
+            Pair('Strozzi', 'Barbadori'): 0.01051,
+            Pair('Strozzi', 'Ridolfi'): 0.00071100000000000004,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.010307999999999999,
+            Pair('Tornabuoni', 'Salviati'): 0.01051,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.01051,
+            Pair('Strozzi', 'Guadagni'): 0.010511000000000001,
+            Pair('Medici', 'Acciaiuoli'): 0.00051000000000000004,
+            Pair('Guadagni', 'Ginori'): 0.010209000000000001,
+            Pair('Castellani', 'Barbadori'): 0.00041000000000000005,
+            Pair('Guadagni', 'Bischeri'): 0.00061000000000000008,
+            Pair('Ridolfi', 'Albizzi'): 0.010713,
+            Pair('Barbadori', 'Albizzi'): 0.010512000000000001,
+            Pair('Medici', 'Barbadori'): 0.00071199999999999996,
+            Pair('Peruzzi', 'Barbadori'): 0.010307999999999999,
+            Pair('Strozzi', 'Bischeri'): 0.010509000000000001,
+            Pair('Albizzi', 'Acciaiuoli'): 0.01051,
+            Pair('Tornabuoni', 'Medici'): 0.010713,
+            Pair('Guadagni', 'Albizzi'): 0.00061200000000000002,
+            Pair('Ridolfi', 'Medici'): 0.010813000000000001,
+            Pair('Peruzzi', 'Castellani'): 0.010407999999999999,
+            Pair('Tornabuoni', 'Strozzi'): 0.010511000000000001,
+            Pair('Tornabuoni', 'Bischeri'): 0.01051,
+            Pair('Medici', 'Albizzi'): 0.00071299999999999998,
+            Pair('Castellani', 'Bischeri'): 0.020308000000000003,
+            Pair('Salviati', 'Barbadori'): 0.01051,
+            Pair('Ridolfi', 'Guadagni'): 0.010612,
+            Pair('Ridolfi', 'Bischeri'): 0.01061,
+            Pair('Ridolfi', 'Peruzzi'): 0.010509000000000001,
+            Pair('Tornabuoni', 'Guadagni'): 0.00061200000000000002,
+            Pair('Bischeri', 'Albizzi'): 0.010409999999999999,
+            Pair('Ridolfi', 'Castellani'): 0.01051,
+            Pair('Medici', 'Ginori'): 0.010209000000000001,
+            Pair('Salviati', 'Ridolfi'): 0.01051,
+            Pair('Tornabuoni', 'Barbadori'): 0.010612,
+            Pair('Salviati', 'Acciaiuoli'): 0.01051,
+            Pair('Strozzi', 'Castellani'): 0.01051,
+            Pair('Salviati', 'Medici'): 0.00061000000000000008,
+            Pair('Strozzi', 'Peruzzi'): 0.020508999999999999,
+            Pair('Tornabuoni', 'Ridolfi'): 0.010813000000000001,
+            Pair('Tornabuoni', 'Albizzi'): 0.020812999999999998,
+            Pair('Barbadori', 'Acciaiuoli'): 0.01051,
+            Pair('Medici', 'Castellani'): 0.01031,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.01051,
+            Pair('Ginori', 'Albizzi'): 0.00020900000000000001}
+        assert_dict_equal(CommonKNeighbours(self.G).predict(), answer)
+
+    def test_cosine(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.40824829046386307,
+            Pair('Medici', 'Guadagni'): 0.40824829046386307,
+            Pair('Peruzzi', 'Bischeri'): 0.33333333333333331,
+            Pair('Lamberteschi', 'Bischeri'): 0.57735026918962584,
+            Pair('Salviati', 'Albizzi'): 0.40824829046386307,
+            Pair('Lamberteschi', 'Albizzi'): 0.57735026918962584,
+            Pair('Peruzzi', 'Guadagni'): 0.28867513459481292,
+            Pair('Strozzi', 'Medici'): 0.20412414523193154,
+            Pair('Pazzi', 'Medici'): 0.40824829046386307,
+            Pair('Ridolfi', 'Albizzi'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.57735026918962584,
+            Pair('Tornabuoni', 'Salviati'): 0.40824829046386307,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.57735026918962584,
+            Pair('Strozzi', 'Guadagni'): 0.25,
+            Pair('Salviati', 'Acciaiuoli'): 0.70710678118654746,
+            Pair('Guadagni', 'Ginori'): 0.5,
+            Pair('Strozzi', 'Barbadori'): 0.35355339059327373,
+            Pair('Peruzzi', 'Barbadori'): 0.40824829046386307,
+            Pair('Tornabuoni', 'Ridolfi'): 0.33333333333333331,
+            Pair('Albizzi', 'Acciaiuoli'): 0.57735026918962584,
+            Pair('Tornabuoni', 'Medici'): 0.23570226039551587,
+            Pair('Ridolfi', 'Medici'): 0.23570226039551587,
+            Pair('Peruzzi', 'Castellani'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Strozzi'): 0.28867513459481292,
+            Pair('Tornabuoni', 'Bischeri'): 0.33333333333333331,
+            Pair('Barbadori', 'Albizzi'): 0.40824829046386307,
+            Pair('Castellani', 'Bischeri'): 0.66666666666666663,
+            Pair('Ridolfi', 'Guadagni'): 0.28867513459481292,
+            Pair('Ridolfi', 'Bischeri'): 0.33333333333333331,
+            Pair('Ridolfi', 'Peruzzi'): 0.33333333333333331,
+            Pair('Medici', 'Castellani'): 0.23570226039551587,
+            Pair('Bischeri', 'Albizzi'): 0.33333333333333331,
+            Pair('Medici', 'Ginori'): 0.40824829046386307,
+            Pair('Salviati', 'Ridolfi'): 0.40824829046386307,
+            Pair('Tornabuoni', 'Barbadori'): 0.40824829046386307,
+            Pair('Strozzi', 'Castellani'): 0.28867513459481292,
+            Pair('Salviati', 'Barbadori'): 0.5,
+            Pair('Strozzi', 'Peruzzi'): 0.57735026918962584,
+            Pair('Strozzi', 'Bischeri'): 0.28867513459481292,
+            Pair('Tornabuoni', 'Albizzi'): 0.66666666666666663,
+            Pair('Barbadori', 'Acciaiuoli'): 0.70710678118654746,
+            Pair('Ridolfi', 'Castellani'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.57735026918962584}
+        assert_dict_equal(Cosine(self.G).predict(), answer)
+
+    def test_cosine_weighted(self):
+        pass
+
+    def test_degree_product(self):
+        answer = {
+            Pair('Peruzzi', 'Bischeri'): 9.0,
+            Pair('Lamberteschi', 'Albizzi'): 3.0,
+            Pair('Tornabuoni', 'Ginori'): 3.0,
+            Pair('Salviati', 'Pazzi'): 2.0,
+            Pair('Guadagni', 'Castellani'): 12.0,
+            Pair('Tornabuoni', 'Castellani'): 9.0,
+            Pair('Castellani', 'Albizzi'): 9.0,
+            Pair('Ginori', 'Barbadori'): 2.0,
+            Pair('Pazzi', 'Guadagni'): 4.0,
+            Pair('Castellani', 'Barbadori'): 6.0,
+            Pair('Lamberteschi', 'Acciaiuoli'): 1.0,
+            Pair('Ginori', 'Acciaiuoli'): 1.0,
+            Pair('Lamberteschi', 'Ginori'): 1.0,
+            Pair('Peruzzi', 'Barbadori'): 6.0,
+            Pair('Medici', 'Castellani'): 18.0,
+            Pair('Ginori', 'Castellani'): 3.0,
+            Pair('Guadagni', 'Barbadori'): 8.0,
+            Pair('Salviati', 'Medici'): 12.0,
+            Pair('Ridolfi', 'Lamberteschi'): 3.0,
+            Pair('Salviati', 'Ginori'): 2.0,
+            Pair('Salviati', 'Barbadori'): 4.0,
+            Pair('Strozzi', 'Pazzi'): 4.0,
+            Pair('Pazzi', 'Acciaiuoli'): 1.0,
+            Pair('Tornabuoni', 'Medici'): 18.0,
+            Pair('Strozzi', 'Albizzi'): 12.0,
+            Pair('Guadagni', 'Acciaiuoli'): 4.0,
+            Pair('Lamberteschi', 'Bischeri'): 3.0,
+            Pair('Ridolfi', 'Ginori'): 3.0,
+            Pair('Castellani', 'Bischeri'): 9.0,
+            Pair('Strozzi', 'Medici'): 24.0,
+            Pair('Bischeri', 'Acciaiuoli'): 3.0,
+            Pair('Strozzi', 'Guadagni'): 16.0,
+            Pair('Medici', 'Acciaiuoli'): 6.0,
+            Pair('Medici', 'Albizzi'): 18.0,
+            Pair('Pazzi', 'Albizzi'): 3.0,
+            Pair('Peruzzi', 'Medici'): 18.0,
+            Pair('Guadagni', 'Albizzi'): 12.0,
+            Pair('Strozzi', 'Acciaiuoli'): 4.0,
+            Pair('Bischeri', 'Barbadori'): 6.0,
+            Pair('Peruzzi', 'Castellani'): 9.0,
+            Pair('Strozzi', 'Ridolfi'): 12.0,
+            Pair('Barbadori', 'Albizzi'): 6.0,
+            Pair('Ridolfi', 'Peruzzi'): 9.0,
+            Pair('Bischeri', 'Albizzi'): 9.0,
+            Pair('Ridolfi', 'Barbadori'): 6.0,
+            Pair('Peruzzi', 'Pazzi'): 3.0,
+            Pair('Strozzi', 'Peruzzi'): 12.0,
+            Pair('Pazzi', 'Ginori'): 1.0,
+            Pair('Medici', 'Lamberteschi'): 6.0,
+            Pair('Strozzi', 'Bischeri'): 12.0,
+            Pair('Salviati', 'Lamberteschi'): 2.0,
+            Pair('Ridolfi', 'Castellani'): 9.0,
+            Pair('Peruzzi', 'Lamberteschi'): 3.0,
+            Pair('Ginori', 'Albizzi'): 3.0,
+            Pair('Peruzzi', 'Guadagni'): 12.0,
+            Pair('Strozzi', 'Lamberteschi'): 4.0,
+            Pair('Medici', 'Guadagni'): 24.0,
+            Pair('Salviati', 'Bischeri'): 6.0,
+            Pair('Tornabuoni', 'Salviati'): 6.0,
+            Pair('Medici', 'Barbadori'): 12.0,
+            Pair('Guadagni', 'Bischeri'): 12.0,
+            Pair('Salviati', 'Ridolfi'): 6.0,
+            Pair('Salviati', 'Peruzzi'): 6.0,
+            Pair('Pazzi', 'Barbadori'): 2.0,
+            Pair('Ridolfi', 'Medici'): 18.0,
+            Pair('Ridolfi', 'Guadagni'): 12.0,
+            Pair('Ridolfi', 'Bischeri'): 9.0,
+            Pair('Tornabuoni', 'Guadagni'): 12.0,
+            Pair('Castellani', 'Acciaiuoli'): 3.0,
+            Pair('Tornabuoni', 'Barbadori'): 6.0,
+            Pair('Ginori', 'Bischeri'): 3.0,
+            Pair('Lamberteschi', 'Castellani'): 3.0,
+            Pair('Tornabuoni', 'Albizzi'): 9.0,
+            Pair('Salviati', 'Guadagni'): 8.0,
+            Pair('Tornabuoni', 'Pazzi'): 3.0,
+            Pair('Salviati', 'Albizzi'): 6.0,
+            Pair('Lamberteschi', 'Guadagni'): 4.0,
+            Pair('Ridolfi', 'Pazzi'): 3.0,
+            Pair('Peruzzi', 'Albizzi'): 9.0,
+            Pair('Strozzi', 'Salviati'): 8.0,
+            Pair('Strozzi', 'Barbadori'): 8.0,
+            Pair('Tornabuoni', 'Lamberteschi'): 3.0,
+            Pair('Pazzi', 'Medici'): 6.0,
+            Pair('Ridolfi', 'Acciaiuoli'): 3.0,
+            Pair('Guadagni', 'Ginori'): 4.0,
+            Pair('Ridolfi', 'Albizzi'): 9.0,
+            Pair('Albizzi', 'Acciaiuoli'): 3.0,
+            Pair('Tornabuoni', 'Strozzi'): 12.0,
+            Pair('Tornabuoni', 'Bischeri'): 9.0,
+            Pair('Tornabuoni', 'Peruzzi'): 9.0,
+            Pair('Salviati', 'Castellani'): 6.0,
+            Pair('Peruzzi', 'Ginori'): 3.0,
+            Pair('Medici', 'Ginori'): 6.0,
+            Pair('Peruzzi', 'Acciaiuoli'): 3.0,
+            Pair('Pazzi', 'Lamberteschi'): 1.0,
+            Pair('Pazzi', 'Castellani'): 3.0,
+            Pair('Strozzi', 'Castellani'): 12.0,
+            Pair('Lamberteschi', 'Barbadori'): 2.0,
+            Pair('Salviati', 'Acciaiuoli'): 2.0,
+            Pair('Pazzi', 'Bischeri'): 3.0,
+            Pair('Strozzi', 'Ginori'): 4.0,
+            Pair('Tornabuoni', 'Ridolfi'): 9.0,
+            Pair('Barbadori', 'Acciaiuoli'): 2.0,
+            Pair('Tornabuoni', 'Acciaiuoli'): 3.0,
+            Pair('Medici', 'Bischeri'): 18.0}
+        assert_dict_equal(DegreeProduct(self.G).predict(), answer)
+
+    def test_degree_product_weighted(self):
+        pass
+
+    def test_minkowski(self):
+        pass
+
+    def test_euclidean(self):
+        pass
+
+    def test_hirsch_core(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.25,
+            Pair('Medici', 'Guadagni'): 0.5,
+            Pair('Peruzzi', 'Bischeri'): 0.20000000000000001,
+            Pair('Lamberteschi', 'Bischeri'): 0.33333333333333331,
+            Pair('Salviati', 'Albizzi'): 0.33333333333333331,
+            Pair('Lamberteschi', 'Albizzi'): 0.5,
+            Pair('Peruzzi', 'Guadagni'): 0.20000000000000001,
+            Pair('Strozzi', 'Medici'): 0.16666666666666666,
+            Pair('Ridolfi', 'Albizzi'): 0.25,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Salviati'): 0.25,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.33333333333333331,
+            Pair('Strozzi', 'Guadagni'): 0.16666666666666666,
+            Pair('Salviati', 'Acciaiuoli'): 0.5,
+            Pair('Guadagni', 'Ginori'): 0.33333333333333331,
+            Pair('Strozzi', 'Barbadori'): 0.20000000000000001,
+            Pair('Peruzzi', 'Barbadori'): 0.25,
+            Pair('Tornabuoni', 'Ridolfi'): 0.20000000000000001,
+            Pair('Albizzi', 'Acciaiuoli'): 0.5,
+            Pair('Tornabuoni', 'Medici'): 0.20000000000000001,
+            Pair('Ridolfi', 'Medici'): 0.20000000000000001,
+            Pair('Peruzzi', 'Castellani'): 0.20000000000000001,
+            Pair('Tornabuoni', 'Strozzi'): 0.16666666666666666,
+            Pair('Tornabuoni', 'Bischeri'): 0.20000000000000001,
+            Pair('Barbadori', 'Albizzi'): 0.33333333333333331,
+            Pair('Castellani', 'Bischeri'): 0.5,
+            Pair('Ridolfi', 'Guadagni'): 0.20000000000000001,
+            Pair('Ridolfi', 'Bischeri'): 0.20000000000000001,
+            Pair('Ridolfi', 'Peruzzi'): 0.20000000000000001,
+            Pair('Bischeri', 'Albizzi'): 0.25,
+            Pair('Medici', 'Ginori'): 0.33333333333333331,
+            Pair('Salviati', 'Ridolfi'): 0.25,
+            Pair('Tornabuoni', 'Barbadori'): 0.25,
+            Pair('Strozzi', 'Castellani'): 0.16666666666666666,
+            Pair('Salviati', 'Barbadori'): 0.33333333333333331,
+            Pair('Strozzi', 'Peruzzi'): 0.40000000000000002,
+            Pair('Strozzi', 'Bischeri'): 0.16666666666666666,
+            Pair('Tornabuoni', 'Albizzi'): 0.66666666666666663,
+            Pair('Barbadori', 'Acciaiuoli'): 0.5,
+            Pair('Ridolfi', 'Castellani'): 0.20000000000000001,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.33333333333333331}
+        assert_dict_equal(HirschCore(self.G).predict(), answer)
+
+    def test_jaccard(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.25,
+            Pair('Medici', 'Guadagni'): 0.25,
+            Pair('Peruzzi', 'Bischeri'): 0.20000000000000001,
+            Pair('Lamberteschi', 'Bischeri'): 0.33333333333333331,
+            Pair('Salviati', 'Albizzi'): 0.25,
+            Pair('Lamberteschi', 'Albizzi'): 0.33333333333333331,
+            Pair('Peruzzi', 'Guadagni'): 0.16666666666666666,
+            Pair('Strozzi', 'Medici'): 0.1111111111111111,
+            Pair('Pazzi', 'Medici'): 0.16666666666666666,
+            Pair('Ridolfi', 'Albizzi'): 0.20000000000000001,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Salviati'): 0.25,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.33333333333333331,
+            Pair('Strozzi', 'Guadagni'): 0.14285714285714285,
+            Pair('Salviati', 'Acciaiuoli'): 0.5,
+            Pair('Guadagni', 'Ginori'): 0.25,
+            Pair('Strozzi', 'Barbadori'): 0.20000000000000001,
+            Pair('Peruzzi', 'Barbadori'): 0.25,
+            Pair('Tornabuoni', 'Ridolfi'): 0.20000000000000001,
+            Pair('Albizzi', 'Acciaiuoli'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Medici'): 0.125,
+            Pair('Ridolfi', 'Medici'): 0.125,
+            Pair('Peruzzi', 'Castellani'): 0.20000000000000001,
+            Pair('Tornabuoni', 'Strozzi'): 0.16666666666666666,
+            Pair('Tornabuoni', 'Bischeri'): 0.20000000000000001,
+            Pair('Barbadori', 'Albizzi'): 0.25,
+            Pair('Castellani', 'Bischeri'): 0.5,
+            Pair('Ridolfi', 'Guadagni'): 0.16666666666666666,
+            Pair('Ridolfi', 'Bischeri'): 0.20000000000000001,
+            Pair('Ridolfi', 'Peruzzi'): 0.20000000000000001,
+            Pair('Medici', 'Castellani'): 0.125,
+            Pair('Bischeri', 'Albizzi'): 0.20000000000000001,
+            Pair('Medici', 'Ginori'): 0.16666666666666666,
+            Pair('Salviati', 'Ridolfi'): 0.25,
+            Pair('Tornabuoni', 'Barbadori'): 0.25,
+            Pair('Strozzi', 'Castellani'): 0.16666666666666666,
+            Pair('Salviati', 'Barbadori'): 0.33333333333333331,
+            Pair('Strozzi', 'Peruzzi'): 0.40000000000000002,
+            Pair('Strozzi', 'Bischeri'): 0.16666666666666666,
+            Pair('Tornabuoni', 'Albizzi'): 0.5,
+            Pair('Barbadori', 'Acciaiuoli'): 0.5,
+            Pair('Ridolfi', 'Castellani'): 0.20000000000000001,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.33333333333333331}
+        assert_dict_equal(Jaccard(self.G).predict(), answer)
+
+    def test_jaccard_weighted(self):
+        pass
+
+    def test_k50(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.34378803407483205,
+            Pair('Medici', 'Guadagni'): 0.27216552697590873,
+            Pair('Peruzzi', 'Bischeri'): 0.25225225225225228,
+            Pair('Lamberteschi', 'Bischeri'): 0.53293871002119308,
+            Pair('Salviati', 'Albizzi'): 0.34378803407483205,
+            Pair('Lamberteschi', 'Albizzi'): 0.53293871002119308,
+            Pair('Peruzzi', 'Guadagni'): 0.19505076661811682,
+            Pair('Strozzi', 'Medici'): 0.068041381743977183,
+            Pair('Pazzi', 'Medici'): 0.34544086116173028,
+            Pair('Ridolfi', 'Albizzi'): 0.25225225225225228,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.53293871002119308,
+            Pair('Tornabuoni', 'Salviati'): 0.34378803407483205,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.53293871002119308,
+            Pair('Strozzi', 'Guadagni'): 0.1388888888888889,
+            Pair('Salviati', 'Acciaiuoli'): 0.67084489497185273,
+            Pair('Guadagni', 'Ginori'): 0.44871794871794873,
+            Pair('Strozzi', 'Barbadori'): 0.27912109783679506,
+            Pair('Peruzzi', 'Barbadori'): 0.34378803407483205,
+            Pair('Tornabuoni', 'Ridolfi'): 0.25225225225225228,
+            Pair('Albizzi', 'Acciaiuoli'): 0.53293871002119308,
+            Pair('Tornabuoni', 'Medici'): 0.12103629587877841,
+            Pair('Ridolfi', 'Medici'): 0.12103629587877841,
+            Pair('Peruzzi', 'Castellani'): 0.25225225225225228,
+            Pair('Tornabuoni', 'Strozzi'): 0.19505076661811682,
+            Pair('Tornabuoni', 'Bischeri'): 0.25225225225225228,
+            Pair('Barbadori', 'Albizzi'): 0.34378803407483205,
+            Pair('Castellani', 'Bischeri'): 0.5855855855855856,
+            Pair('Ridolfi', 'Guadagni'): 0.19505076661811682,
+            Pair('Ridolfi', 'Bischeri'): 0.25225225225225228,
+            Pair('Ridolfi', 'Peruzzi'): 0.25225225225225228,
+            Pair('Medici', 'Castellani'): 0.12103629587877841,
+            Pair('Bischeri', 'Albizzi'): 0.25225225225225228,
+            Pair('Medici', 'Ginori'): 0.34544086116173028,
+            Pair('Salviati', 'Ridolfi'): 0.34378803407483205,
+            Pair('Tornabuoni', 'Barbadori'): 0.34378803407483205,
+            Pair('Strozzi', 'Castellani'): 0.19505076661811682,
+            Pair('Salviati', 'Barbadori'): 0.44736842105263158,
+            Pair('Strozzi', 'Peruzzi'): 0.48372590121292974,
+            Pair('Strozzi', 'Bischeri'): 0.19505076661811682,
+            Pair('Tornabuoni', 'Albizzi'): 0.5855855855855856,
+            Pair('Barbadori', 'Acciaiuoli'): 0.67084489497185273,
+            Pair('Ridolfi', 'Castellani'): 0.25225225225225228,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.53293871002119308}
+        assert_dict_equal(K50(self.G).predict(), answer)
+
+    def test_manhattan(self):
+        pass
+
+    def test_n_measure(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.39223227027636809,
+            Pair('Medici', 'Guadagni'): 0.39223227027636809,
+            Pair('Peruzzi', 'Bischeri'): 0.33333333333333337,
+            Pair('Lamberteschi', 'Bischeri'): 0.44721359549995793,
+            Pair('Salviati', 'Albizzi'): 0.39223227027636809,
+            Pair('Lamberteschi', 'Albizzi'): 0.44721359549995793,
+            Pair('Peruzzi', 'Guadagni'): 0.28284271247461901,
+            Pair('Strozzi', 'Medici'): 0.19611613513818404,
+            Pair('Pazzi', 'Medici'): 0.2324952774876386,
+            Pair('Ridolfi', 'Albizzi'): 0.33333333333333337,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.44721359549995793,
+            Pair('Tornabuoni', 'Salviati'): 0.39223227027636809,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.44721359549995793,
+            Pair('Strozzi', 'Guadagni'): 0.25,
+            Pair('Salviati', 'Acciaiuoli'): 0.63245553203367588,
+            Pair('Guadagni', 'Ginori'): 0.34299717028501769,
+            Pair('Strozzi', 'Barbadori'): 0.31622776601683794,
+            Pair('Peruzzi', 'Barbadori'): 0.39223227027636809,
+            Pair('Tornabuoni', 'Ridolfi'): 0.33333333333333337,
+            Pair('Albizzi', 'Acciaiuoli'): 0.44721359549995793,
+            Pair('Tornabuoni', 'Medici'): 0.21081851067789195,
+            Pair('Ridolfi', 'Medici'): 0.21081851067789195,
+            Pair('Peruzzi', 'Castellani'): 0.33333333333333337,
+            Pair('Tornabuoni', 'Strozzi'): 0.28284271247461901,
+            Pair('Tornabuoni', 'Bischeri'): 0.33333333333333337,
+            Pair('Barbadori', 'Albizzi'): 0.39223227027636809,
+            Pair('Castellani', 'Bischeri'): 0.66666666666666674,
+            Pair('Ridolfi', 'Guadagni'): 0.28284271247461901,
+            Pair('Ridolfi', 'Bischeri'): 0.33333333333333337,
+            Pair('Ridolfi', 'Peruzzi'): 0.33333333333333337,
+            Pair('Medici', 'Castellani'): 0.21081851067789195,
+            Pair('Bischeri', 'Albizzi'): 0.33333333333333337,
+            Pair('Medici', 'Ginori'): 0.2324952774876386,
+            Pair('Salviati', 'Ridolfi'): 0.39223227027636809,
+            Pair('Tornabuoni', 'Barbadori'): 0.39223227027636809,
+            Pair('Strozzi', 'Castellani'): 0.28284271247461901,
+            Pair('Salviati', 'Barbadori'): 0.5,
+            Pair('Strozzi', 'Peruzzi'): 0.56568542494923801,
+            Pair('Strozzi', 'Bischeri'): 0.28284271247461901,
+            Pair('Tornabuoni', 'Albizzi'): 0.66666666666666674,
+            Pair('Barbadori', 'Acciaiuoli'): 0.63245553203367588,
+            Pair('Ridolfi', 'Castellani'): 0.33333333333333337,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.44721359549995793}
+        assert_dict_equal(NMeasure(self.G).predict(), answer)
+
+    def test_n_measure_weighted(self):
+        pass
+
+    def test_max_overlap(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.33333333333333331,
+            Pair('Medici', 'Guadagni'): 0.33333333333333331,
+            Pair('Peruzzi', 'Bischeri'): 0.33333333333333331,
+            Pair('Lamberteschi', 'Bischeri'): 0.33333333333333331,
+            Pair('Salviati', 'Albizzi'): 0.33333333333333331,
+            Pair('Lamberteschi', 'Albizzi'): 0.33333333333333331,
+            Pair('Peruzzi', 'Guadagni'): 0.25,
+            Pair('Strozzi', 'Medici'): 0.16666666666666666,
+            Pair('Pazzi', 'Medici'): 0.16666666666666666,
+            Pair('Ridolfi', 'Albizzi'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Salviati'): 0.33333333333333331,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.33333333333333331,
+            Pair('Strozzi', 'Guadagni'): 0.25,
+            Pair('Salviati', 'Acciaiuoli'): 0.5,
+            Pair('Guadagni', 'Ginori'): 0.25,
+            Pair('Strozzi', 'Barbadori'): 0.25,
+            Pair('Peruzzi', 'Barbadori'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Ridolfi'): 0.33333333333333331,
+            Pair('Albizzi', 'Acciaiuoli'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Medici'): 0.16666666666666666,
+            Pair('Ridolfi', 'Medici'): 0.16666666666666666,
+            Pair('Peruzzi', 'Castellani'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Strozzi'): 0.25,
+            Pair('Tornabuoni', 'Bischeri'): 0.33333333333333331,
+            Pair('Barbadori', 'Albizzi'): 0.33333333333333331,
+            Pair('Castellani', 'Bischeri'): 0.66666666666666663,
+            Pair('Ridolfi', 'Guadagni'): 0.25,
+            Pair('Ridolfi', 'Bischeri'): 0.33333333333333331,
+            Pair('Ridolfi', 'Peruzzi'): 0.33333333333333331,
+            Pair('Medici', 'Castellani'): 0.16666666666666666,
+            Pair('Bischeri', 'Albizzi'): 0.33333333333333331,
+            Pair('Medici', 'Ginori'): 0.16666666666666666,
+            Pair('Salviati', 'Ridolfi'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Barbadori'): 0.33333333333333331,
+            Pair('Strozzi', 'Castellani'): 0.25,
+            Pair('Salviati', 'Barbadori'): 0.5,
+            Pair('Strozzi', 'Peruzzi'): 0.5,
+            Pair('Strozzi', 'Bischeri'): 0.25,
+            Pair('Tornabuoni', 'Albizzi'): 0.66666666666666663,
+            Pair('Barbadori', 'Acciaiuoli'): 0.5,
+            Pair('Ridolfi', 'Castellani'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.33333333333333331}
+        assert_dict_equal(MaxOverlap(self.G).predict(), answer)
+
+    def test_max_overlap_weighted(self):
+        pass
+
+    def test_min_overlap(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.5,
+            Pair('Medici', 'Guadagni'): 0.5,
+            Pair('Peruzzi', 'Bischeri'): 0.33333333333333331,
+            Pair('Lamberteschi', 'Bischeri'): 1.0,
+            Pair('Salviati', 'Albizzi'): 0.5,
+            Pair('Lamberteschi', 'Albizzi'): 1.0,
+            Pair('Peruzzi', 'Guadagni'): 0.33333333333333331,
+            Pair('Strozzi', 'Medici'): 0.25,
+            Pair('Pazzi', 'Medici'): 1.0,
+            Pair('Ridolfi', 'Albizzi'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Lamberteschi'): 1.0,
+            Pair('Tornabuoni', 'Salviati'): 0.5,
+            Pair('Ridolfi', 'Acciaiuoli'): 1.0,
+            Pair('Strozzi', 'Guadagni'): 0.25,
+            Pair('Salviati', 'Acciaiuoli'): 1.0,
+            Pair('Guadagni', 'Ginori'): 1.0,
+            Pair('Strozzi', 'Barbadori'): 0.5,
+            Pair('Peruzzi', 'Barbadori'): 0.5,
+            Pair('Tornabuoni', 'Ridolfi'): 0.33333333333333331,
+            Pair('Albizzi', 'Acciaiuoli'): 1.0,
+            Pair('Tornabuoni', 'Medici'): 0.33333333333333331,
+            Pair('Ridolfi', 'Medici'): 0.33333333333333331,
+            Pair('Peruzzi', 'Castellani'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Strozzi'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Bischeri'): 0.33333333333333331,
+            Pair('Barbadori', 'Albizzi'): 0.5,
+            Pair('Castellani', 'Bischeri'): 0.66666666666666663,
+            Pair('Ridolfi', 'Guadagni'): 0.33333333333333331,
+            Pair('Ridolfi', 'Bischeri'): 0.33333333333333331,
+            Pair('Ridolfi', 'Peruzzi'): 0.33333333333333331,
+            Pair('Medici', 'Castellani'): 0.33333333333333331,
+            Pair('Bischeri', 'Albizzi'): 0.33333333333333331,
+            Pair('Medici', 'Ginori'): 1.0,
+            Pair('Salviati', 'Ridolfi'): 0.5,
+            Pair('Tornabuoni', 'Barbadori'): 0.5,
+            Pair('Strozzi', 'Castellani'): 0.33333333333333331,
+            Pair('Salviati', 'Barbadori'): 0.5,
+            Pair('Strozzi', 'Peruzzi'): 0.66666666666666663,
+            Pair('Strozzi', 'Bischeri'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Albizzi'): 0.66666666666666663,
+            Pair('Barbadori', 'Acciaiuoli'): 1.0,
+            Pair('Ridolfi', 'Castellani'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Acciaiuoli'): 1.0}
+        assert_dict_equal(MinOverlap(self.G).predict(), answer)
+
+    def test_min_overlap_weighted(self):
+        pass
+
+    def test_pearson(self):
+        answer = {
+            Pair('Medici', 'Guadagni'): 0.091287092917527679,
+            Pair('Peruzzi', 'Bischeri'): 0.15151515151515152,
+            Pair('Lamberteschi', 'Bischeri'): 0.53108500454379437,
+            Pair('Salviati', 'Albizzi'): 0.28426762180748061,
+            Pair('Lamberteschi', 'Albizzi'): 0.53108500454379437,
+            Pair('Peruzzi', 'Guadagni'): 0.055048188256318034,
+            Pair('Strozzi', 'Barbadori'): 0.19364916731037085,
+            Pair('Pazzi', 'Medici'): 0.32025630761017432,
+            Pair('Ridolfi', 'Albizzi'): 0.15151515151515152,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.53108500454379437,
+            Pair('Tornabuoni', 'Salviati'): 0.28426762180748061,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.53108500454379437,
+            Pair('Barbadori', 'Acciaiuoli'): 0.67936622048675754,
+            Pair('Guadagni', 'Ginori'): 0.43852900965351466,
+            Pair('Peruzzi', 'Barbadori'): 0.28426762180748061,
+            Pair('Tornabuoni', 'Ridolfi'): 0.15151515151515152,
+            Pair('Albizzi', 'Acciaiuoli'): 0.53108500454379437,
+            Pair('Strozzi', 'Castellani'): 0.055048188256318034,
+            Pair('Ridolfi', 'Barbadori'): 0.28426762180748061,
+            Pair('Peruzzi', 'Castellani'): 0.15151515151515152,
+            Pair('Tornabuoni', 'Strozzi'): 0.055048188256318034,
+            Pair('Tornabuoni', 'Bischeri'): 0.15151515151515152,
+            Pair('Barbadori', 'Albizzi'): 0.28426762180748061,
+            Pair('Castellani', 'Bischeri'): 0.5757575757575758,
+            Pair('Ridolfi', 'Guadagni'): 0.055048188256318034,
+            Pair('Ridolfi', 'Bischeri'): 0.15151515151515152,
+            Pair('Ridolfi', 'Peruzzi'): 0.15151515151515152,
+            Pair('Bischeri', 'Albizzi'): 0.15151515151515152,
+            Pair('Medici', 'Ginori'): 0.32025630761017432,
+            Pair('Salviati', 'Ridolfi'): 0.28426762180748061,
+            Pair('Tornabuoni', 'Barbadori'): 0.28426762180748061,
+            Pair('Salviati', 'Acciaiuoli'): 0.67936622048675754,
+            Pair('Salviati', 'Barbadori'): 0.41666666666666674,
+            Pair('Strozzi', 'Peruzzi'): 0.44038550605054427,
+            Pair('Strozzi', 'Bischeri'): 0.055048188256318034,
+            Pair('Tornabuoni', 'Albizzi'): 0.5757575757575758,
+            Pair('Ridolfi', 'Castellani'): 0.15151515151515152,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.53108500454379437}
+        assert_dict_equal(Pearson(self.G).predict(), answer)
+
+    def test_pearson_weighted(self):
+        pass
+
+    def test_resource_allocation(self):
+        answer = {
+            Pair('Ridolfi', 'Barbadori'): 0.16666666666666666,
+            Pair('Medici', 'Guadagni'): 0.66666666666666663,
+            Pair('Peruzzi', 'Bischeri'): 0.25,
+            Pair('Lamberteschi', 'Bischeri'): 0.25,
+            Pair('Salviati', 'Albizzi'): 0.16666666666666666,
+            Pair('Lamberteschi', 'Albizzi'): 0.25,
+            Pair('Peruzzi', 'Guadagni'): 0.33333333333333331,
+            Pair('Strozzi', 'Medici'): 0.33333333333333331,
+            Pair('Pazzi', 'Medici'): 0.5,
+            Pair('Ridolfi', 'Albizzi'): 0.16666666666666666,
+            Pair('Tornabuoni', 'Lamberteschi'): 0.25,
+            Pair('Tornabuoni', 'Salviati'): 0.16666666666666666,
+            Pair('Ridolfi', 'Acciaiuoli'): 0.16666666666666666,
+            Pair('Strozzi', 'Guadagni'): 0.33333333333333331,
+            Pair('Salviati', 'Acciaiuoli'): 0.16666666666666666,
+            Pair('Guadagni', 'Ginori'): 0.33333333333333331,
+            Pair('Strozzi', 'Barbadori'): 0.33333333333333331,
+            Pair('Peruzzi', 'Barbadori'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Ridolfi'): 0.16666666666666666,
+            Pair('Albizzi', 'Acciaiuoli'): 0.16666666666666666,
+            Pair('Tornabuoni', 'Medici'): 0.33333333333333331,
+            Pair('Ridolfi', 'Medici'): 0.33333333333333331,
+            Pair('Peruzzi', 'Castellani'): 0.25,
+            Pair('Tornabuoni', 'Strozzi'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Bischeri'): 0.25,
+            Pair('Barbadori', 'Albizzi'): 0.16666666666666666,
+            Pair('Castellani', 'Bischeri'): 0.58333333333333326,
+            Pair('Ridolfi', 'Guadagni'): 0.33333333333333331,
+            Pair('Ridolfi', 'Bischeri'): 0.25,
+            Pair('Ridolfi', 'Peruzzi'): 0.25,
+            Pair('Medici', 'Castellani'): 0.5,
+            Pair('Bischeri', 'Albizzi'): 0.25,
+            Pair('Medici', 'Ginori'): 0.33333333333333331,
+            Pair('Salviati', 'Ridolfi'): 0.16666666666666666,
+            Pair('Tornabuoni', 'Barbadori'): 0.16666666666666666,
+            Pair('Strozzi', 'Castellani'): 0.33333333333333331,
+            Pair('Salviati', 'Barbadori'): 0.16666666666666666,
+            Pair('Strozzi', 'Peruzzi'): 0.66666666666666663,
+            Pair('Strozzi', 'Bischeri'): 0.33333333333333331,
+            Pair('Tornabuoni', 'Albizzi'): 0.41666666666666663,
+            Pair('Barbadori', 'Acciaiuoli'): 0.16666666666666666,
+            Pair('Ridolfi', 'Castellani'): 0.25,
+            Pair('Tornabuoni', 'Acciaiuoli'): 0.16666666666666666}
+        assert_dict_equal(ResourceAllocation(self.G).predict(), answer)
+
+    def test_resource_allocation_weighted(self):
+        pass
+
+
+class TestTrivialNetwork:
+    def setup(self):
+        self.G = nx.Graph()
+        self.G.add_edges_from([(1, 2), (1, 3), (1, 4), (2, 5), (3, 5), (4, 5)])
+        nx.set_node_attributes(self.G, 'eligible', dict.fromkeys(self.G, True))
+
+    def test_common_neighbours(self):
+        expected = {Pair(1, 5): 3, Pair(2, 3): 2, Pair(3, 4): 2, Pair(2, 4): 2}
+        assert_dict_equal(CommonNeighbours(self.G).predict(), expected)
diff --git a/linkpred/predictors/tests/test_path.py b/linkpred/predictors/tests/test_path.py
new file mode 100644
index 0000000..9fd3274
--- /dev/null
+++ b/linkpred/predictors/tests/test_path.py
@@ -0,0 +1,27 @@
+from nose.tools import *
+import networkx as nx
+
+from linkpred.predictors.neighbour import *
+
+class TestPath:
+
+    def test_graph_distance(self):
+        pass
+
+    def test_weighted_graph_distance(self):
+        pass
+
+    def test_weighted_graph_distance_alpha(self):
+        pass
+
+    def test_katz(self):
+        pass
+
+    def test_katz_beta(self):
+        pass
+
+    def test_katz_weighted(self):
+        pass
+
+    def test_katz_paths_only(self):
+        pass
diff --git a/linkpred/predictors/util.py b/linkpred/predictors/util.py
new file mode 100644
index 0000000..463dca2
--- /dev/null
+++ b/linkpred/predictors/util.py
@@ -0,0 +1,60 @@
+from ..network import neighbourhood_search
+
+
+def neighbourhood(G, n, k=1):
+    """Get k-neighbourhood of node n"""
+    if k == 1:
+        return G[n]
+    dist = neighbourhood_search(G, n, k)
+    del dist[n]
+    return dist.keys()
+
+
+def neighbourhood_intersection_size(G, a, b, weight=None, k=1):
+    """Get the summed weight of the common neighbours of a and b
+
+    If weighted, we use the sum of the weight products. This is equivalent
+    to the vector-based interpretation (dot product of the two vectors).
+
+    """
+    common_neighbours = set(neighbourhood(G, a, k)) &\
+        set(neighbourhood(G, b, k))
+    if weight:
+        w = sum(G[a][n][weight] * G[b][n][weight]
+                for n in common_neighbours)
+    else:
+        w = len(common_neighbours)
+    return w
+
+
+def neighbourhood_size(G, u, weight=None, k=1, pow=2):
+    """Get the weight of the neighbours of u
+
+    If weighted, we use the sum of the squared edge weight for compatibility
+    with the vector-based measures.
+
+    """
+    # The fast route for default options
+    if weight is None and k == 1:
+        return len(G[u])
+    # The slow route for everything else
+    neighbours = neighbourhood(G, u, k)
+    if weight:
+        w = sum(G[u][v][weight] ** pow for v in neighbours)
+    else:
+        w = len(neighbours)
+    return w
+
+
+def neighbourhood_union_size(G, a, b, weight=None, k=1, pow=2):
+    """Get the weight of the neighbours union of a and b"""
+    a_neighbours = set(neighbourhood(G, a, k))
+    b_neighbours = set(neighbourhood(G, b, k))
+    if weight:
+        w = sum(G[a][n][weight] ** pow for n in a_neighbours) +\
+            sum(G[b][n][weight] ** pow for n in b_neighbours) -\
+            sum(G[a][n][weight] * G[b][n][weight]
+                for n in a_neighbours & b_neighbours)
+    else:
+        w = len(a_neighbours | b_neighbours)
+    return w
diff --git a/linkpred/result.py b/linkpred/result.py
new file mode 100644
index 0000000..0ff8857
--- /dev/null
+++ b/linkpred/result.py
@@ -0,0 +1,168 @@
+import networkx as nx
+from networkx.algorithms import bipartite
+
+from .util import log
+
+__all__ = ["ResultDict", "Result", "filter_low_degree_nodes"]
+
+
+class Result(object):
+    """Result represents a query result as a pathspec, a network, or both."""
+
+    def __init__(self, data, eligible='eligible',
+                 project=bipartite.weighted_projected_graph):
+        self.eligible = eligible
+        if isinstance(data, nx.Graph):
+            if nx.is_bipartite(data):
+                self.pathspec = data
+                if self.eligible is None:
+                    bottom = [n for n, d in data.nodes(data=True)]
+                else:
+                    bottom = [n for n, d in data.nodes(
+                        data=True) if d[self.eligible]]
+                self.network = project(data, bottom)
+            else:
+                self.network = data
+        elif isinstance(data, Result):
+            self.pathspec = data.pathspec
+            self.network = data.network
+        else:
+            raise TypeError("Unexpected data type!")
+
+    def __iter__(self):
+        return iter(self.network)
+
+    def __len__(self):
+        return len(self.network)
+
+    def for_comparison(self, exclude=set()):
+        """Return the result in a format, suitable for comparison.
+
+        In practice this means we return it as a set of Pairs.
+
+        """
+        from .evaluation import Pair
+
+        exclude = set(Pair(u, v) for u, v in exclude)
+        return set(Pair(u, v) for u, v in self.network.edges_iter()) - exclude
+
+    def remove_items_from(self, l):
+        self.network.remove_nodes_from(l)
+        try:
+            self.pathspec.remove_nodes_from(l)
+        except AttributeError:
+            pass
+
+    def add_remove_random_edges(self, pct_to_remove=None, pct_to_add=None):
+        from . import network
+
+        if not pct_to_remove and not pct_to_add:
+            return
+
+        # For simplicity, we do not do this for pathspecs
+        self.pathspec = None
+
+        if pct_to_remove and pct_to_add:
+            network.add_remove_random_edges(
+                self.network, pct_to_add, pct_to_remove)
+        elif pct_to_remove:
+            network.remove_random_edges(self.network, pct_to_remove)
+        elif pct_to_add:
+            network.add_random_edges(self.network, pct_to_add)
+
+    def low_degree(self, threshold):
+        """
+        Find low-degree nodes
+
+        Parameters
+        ----------
+        threshold : int
+            Only nodes whose degree is below the threshold are retained
+
+        """
+        if self.eligible is not None:
+            return [n for n, d in self.network.degree_iter()
+                    if d < threshold and self.network.node[n][self.eligible]]
+        else:
+            return [n for n, d in self.network.degree_iter() if d < threshold]
+
+    def items_outside(self, container):
+        if self.eligible is not None:
+            return [n for n in self.network.nodes_iter()
+                    if self.network.node[n][self.eligible] and n not in container]
+        else:
+            return [n for n in self.network.nodes_iter() if n not in container]
+
+
+class ResultDict(dict):
+    """A dict of Results, along with some methods for manipulating them."""
+
+    def merge(self, mergespec, skipzero=True, weight='weight'):
+        """Merge Results according to mergespec into new Result with given name
+
+        Parameters
+        ----------
+        mergespec : a dict
+            dictionary of result names and their weight
+            (more weight = more importance)
+
+        skipzero : True|False
+            If an entry in the mergespec has zero weight, skip it
+            (default: True)
+
+        weight : string
+            Edge attribute for edge weight
+
+        Returns
+        -------
+        A networkx.Graph instance
+
+        """
+        log.logger.info("Merging...")
+        g = nx.Graph()
+        for resultname, resultweight in mergespec.iteritems():
+            if resultweight == 0 and skipzero:
+                continue
+            result = self[resultname].network
+            # We also copy node data, so that 'eligible' keywords are retained
+            # in the merged network.
+            g.add_nodes_from(result.nodes(data=True))
+            for u, v, edgedata in result.edges_iter(data=True):
+                w = edgedata[weight] * resultweight
+                if g.has_edge(u, v):
+                    g.edge[u][v][weight] += w
+                else:
+                    g.add_edge(u, v, attr_dict={weight: w})
+        log.logger.info("Finished merging.")
+        return g
+
+    def filter_all_low_degree_nodes(self, minimum=1):
+        networks = self.values()
+        filter_low_degree_nodes(networks, minimum)
+
+
+def filter_low_degree_nodes(results, minimum=1):
+    """
+    Only retain nodes that occur in all networks with at least a degree of k
+
+    Changes are made in place.
+
+    Arguments
+    ---------
+    networks : a list or iterable of networkx.Graph instances
+
+    minimum : int
+        minimum node degree
+
+    """
+    log.logger.info("Filtering low degree nodes...")
+    for res in results:
+        to_remove = res.low_degree(minimum)
+        res.remove_items_from(to_remove)
+        log.logger.info("Removed %d items" % len(to_remove))
+    common = set.intersection(*[set(res) for res in results])
+    for res in results:
+        to_remove = res.items_outside(common)
+        res.remove_items_from(to_remove)
+        log.logger.info("Removed %d items" % len(to_remove))
+    log.logger.info("Finished filtering low degree nodes.")
diff --git a/linkpred/tests/test_result.py b/linkpred/tests/test_result.py
new file mode 100644
index 0000000..b10e80e
--- /dev/null
+++ b/linkpred/tests/test_result.py
@@ -0,0 +1,42 @@
+from nose.tools import *
+
+import networkx as nx
+from linkpred.result import *
+
+class TestResult:
+    def setup(self):
+        self.B = nx.bipartite_random_graph(50, 60, 0.2)
+        nodes = [n for n in self.B if self.B.node[n]['bipartite']]
+        self.G = nx.bipartite.weighted_projected_graph(self.B, nodes)
+
+    def test_result_init(self):
+        res = Result(self.B, eligible='bipartite')
+        assert_equal(len(res), len(self.G))
+        assert_equal(len(res.network), len(self.G))
+        assert_equal(len(res.pathspec), len(self.B))
+
+        res = Result(self.G)
+        assert_equal(len(res), len(self.G))
+        assert_equal(len(res.network), len(self.G))
+        with assert_raises(AttributeError):
+            res.pathspec
+
+    def test_result_remove_items(self):
+        res = Result(self.B, eligible='bipartite')
+        # the bottom nodes (bipartite=True) start from 50.
+        res.remove_items_from(range(50, 60))
+        assert_equal(len(res), 50)
+
+def test_filter_low_degree_nodes():
+    B1 = nx.bipartite_random_graph(50, 60, 0.2)
+    B2 = nx.bipartite_random_graph(50, 60, 0.2)
+    res1 = Result(B1, eligible='bipartite')
+    res2 = Result(B2, eligible='bipartite')
+
+    filter_low_degree_nodes([res1, res2])
+    assert_less_equal(len(res1), 60)
+    assert_less_equal(len(res2), 60)
+    assert_equal(len(res1), len([n for n in res1.pathspec\
+                                 if res1.pathspec.node[n]['bipartite']]))
+    assert_equal(len(res2), len([n for n in res2.pathspec\
+                                 if res1.pathspec.node[n]['bipartite']]))
diff --git a/linkpred/util/__init__.py b/linkpred/util/__init__.py
new file mode 100644
index 0000000..ed00929
--- /dev/null
+++ b/linkpred/util/__init__.py
@@ -0,0 +1,114 @@
+import re
+import sys
+
+
+def all_pairs(l):
+    """Return list of all possible pairs in l"""
+    try:
+        from itertools import combinations
+        return combinations(l, 2)
+    except ImportError:
+        return (tuple(sorted((x, y))) for i, x in enumerate(l, start=1)
+                for y in l[:i] if x != y)
+
+
+def slugify(value):
+    """
+    Normalize string to 'slug'
+
+    Converts to lowercase, removes non-alpha characters,
+    and converts spaces to hyphens.
+
+    Taken from http://stackoverflow.com/questions/295135/turn-a-string-into-a-valid-filename-in-python/295466#295466
+
+    """
+    import unicodedata
+    value = unicodedata.normalize(
+        'NFKD', unicode(value)).encode('ascii', 'ignore')
+    value = unicode(re.sub('[^\w\s-]', '', value).strip().lower())
+    return unicode(re.sub('[-\s]+', '-', value))
+
+
+def progressbar(it, prefix="", size=60):
+    """Show progress bar
+
+    Taken from http://code.activestate.com/recipes/576986-progress-bar-for-console-programs-as-iterator/
+
+    """
+    count = len(it)
+
+    def _show(_i):
+        x = int(size * _i / count)
+        sys.stdout.write(
+            "%s[%s%s] %i/%i\r" % (prefix, "#" * x, "." * (size - x),
+                                  _i, count))
+        sys.stdout.flush()
+
+    _show(0)
+    for i, item in enumerate(it, start=1):
+        yield item
+        _show(i)
+    sys.stdout.write("\n")
+    sys.stdout.flush()
+
+
+def load_function(functionname):
+    """Return the function given by functionname
+
+    This loads function names of the form <module.submodule.function>
+
+    """
+    try:
+        # Find rightmost point. Everything to the left is module name.
+        index = functionname.rindex('.')
+        modulename = functionname[:index]
+    except ValueError:
+        raise Exception("No module name given in " + functionname)
+    # Dynamically load module and function
+    __import__(modulename)
+    module = sys.modules[modulename]
+    function = getattr(module, functionname[index + 1:])
+    return function
+
+
+def ensure_dir(fname):
+    """Make sure all the intermediate directories exist for given file name"""
+    import os
+
+    d = os.path.dirname(fname)
+    if not os.path.isdir(d):
+        os.makedirs(d)
+
+
+def interpolate(l):
+    """Make curve l decrease."""
+    l.reverse()
+    for i in xrange(len(l) - 1):
+        if l[i] >= l[i + 1]:
+            l[i + 1] = l[i]
+    l.reverse()
+    return l
+
+
+def itersubclasses(cls, _seen=None):
+    """Generator over all subclasses of a given class, in depth first order.
+
+    Source:
+    http://code.activestate.com/recipes/576949-find-all-subclasses-of-a-given-class/
+
+    """
+    if not isinstance(cls, type):
+        raise TypeError('itersubclasses must be called with '
+                        'new-style classes, not %.100r' % cls)
+    if _seen is None:
+        _seen = set()
+    try:
+        subs = cls.__subclasses__()
+    except TypeError:  # fails only when cls is type
+        subs = cls.__subclasses__(cls)
+    for sub in subs:
+        if sub not in _seen:
+            _seen.add(sub)
+            yield sub
+            for sub in itersubclasses(sub, _seen):
+                yield sub
diff --git a/linkpred/util/log.py b/linkpred/util/log.py
new file mode 100644
index 0000000..05fe33a
--- /dev/null
+++ b/linkpred/util/log.py
@@ -0,0 +1,25 @@
+import logging
+import sys
+
+logger = logging.getLogger('linkpred')
+streamhandler = logging.StreamHandler(sys.stdout)
+formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s",
+                              "%H:%M:%S")
+streamhandler.setFormatter(formatter)
+logger.setLevel(logging.INFO)
+logger.addHandler(streamhandler)
+
+
+def called_by(n=0):
+    """Returns caller of current function, useful for debugging
+
+    Example
+    -------
+    >>> def foo():
+    ....    from linkpred.util import log
+    ....    log.logger.debug("Called by %s, %s, l. %s" % log.called_by(2))
+
+    """
+    f = sys._getframe(n)
+    c = f.f_code
+    return c.co_filename, c.co_name, f.f_lineno
diff --git a/scripts/linkpred b/scripts/linkpred
new file mode 100644
index 0000000..58f93e9
--- /dev/null
+++ b/scripts/linkpred
@@ -0,0 +1,57 @@
+#!/usr/bin/env python
+
+"""linkpred main script"""
+
+import os
+import sys
+
+# LIBDIR trick start (marker for removal on platforms that don't need it)
+# This trick is inspired by Nikola
+# <https://github.com/getnikola/nikola/blob/master/scripts/nikola>.
+# It allows running from a direct checkout as well as a 'properly' installed
+# package.
+libdir = '@LIBDIR@'
+
+# Two cases:
+if libdir != '@' 'LIBDIR' '@':
+    # Changed by our distutils hook, then use the given path.
+
+    if not os.path.isabs(libdir):
+        libdir = os.path.join(os.path.dirname(
+            os.path.realpath(__file__)), libdir)
+        libdir = os.path.abspath(libdir)
+else:
+    # Unchanged, running from checkout,
+    # use the parent directory, the linkpred package ought be there.
+    libdir = os.path.join(os.path.dirname(__file__), "..")
+
+sys.path.insert(0, libdir)
+
+if "PYTHONPATH" not in os.environ:
+    os.environ["PYTHONPATH"] = libdir
+else:
+    os.environ["PYTHONPATH"] = os.environ["PYTHONPATH"] + ":" + libdir
+
+# LIBDIR trick end (marker for removal on platforms that don't need it)
+
+from linkpred.cli import get_profile
+from linkpred.core import training_test_data, predict, evaluate
+from linkpred.evaluation import DataSet
+
+
+def main():
+    profile = get_profile(choose_interpolation=True, choose_filetype=True)
+    only_new = profile.get("only_new", False)
+    filetype = profile.get('filetype', 'pdf')
+    steps = profile.get('steps', 1)
+    interpolate = profile.get('interpolation', True)
+    label = profile['training']['name']
+
+    training, test = training_test_data(profile)
+    predictions = predict(training, profile, only_new, eligible="eligible")
+    exclude = set(training.network.edges_iter()) if only_new else set()
+    dataset = DataSet(label, predictions, test, exclude=exclude, steps=steps)
+    evaluate(dataset, label, filetype, interpolate, steps)
+
+if __name__ == "__main__":
+    main()