From e37e7e2f25154ccfeeb1b1e1001dcef05bfccb87 Mon Sep 17 00:00:00 2001
From: ekzhu <ekzhu@cs.toronto.edu>
Date: Thu, 21 Jan 2016 14:31:25 -0500
Subject: [PATCH] convert course project into golang libary

---
 .gitmodules                                   |  12 --
 CMakeLists.txt                                |  37 ----
 LuaSHkit                                      |   1 -
 dist_dist.py                                  |  77 -------
 src/lsh/experiment.go => experiment.go        |   0
 flann                                         |   1 -
 src/lsh/forest_index.go => forest_index.go    |   0
 ...rest_index_test.go => forest_index_test.go |   0
 src/lsh/index.go => index.go                  |   0
 src/lsh/knn.go => knn.go                      |   0
 src/lsh/knn_test.go => knn_test.go            |   0
 src/lsh/lsh.go => lsh.go                      |   0
 src/lsh/lsh_test.go => lsh_test.go            |   0
 src/lsh/metric.go => metric.go                |   0
 ...multiprobe_index.go => multiprobe_index.go |   0
 ..._index_test.go => multiprobe_index_test.go |   0
 opencv                                        |   1 -
 opt_param.py                                  | 134 ------------
 plot.py                                       |  93 ---------
 src/lsh/run_forest.go => run_forest.go        |   0
 run_image.go                                  | 194 ------------------
 .../run_multiprobe.go => run_multiprobe.go    |   0
 src/lsh/run_simple.go => run_simple.go        |   0
 sample_gist.go                                |  38 ----
 sample_image.go                               |  41 ----
 scikit-learn                                  |   1 -
 setenv.sh                                     |   3 -
 src/lsh/simple_index.go => simple_index.go    |   0
 ...mple_index_test.go => simple_index_test.go |   0
 src/cpp/CMakeLists.txt                        |   6 -
 src/cpp/main.cpp                              |  93 ---------
 ...tinyimage_parser.go => tinyimage_parser.go |   0
 src/lsh/util.go => util.go                    |   0
 src/lsh/util_test.go => util_test.go          |   0
 34 files changed, 732 deletions(-)
 delete mode 100644 .gitmodules
 delete mode 100644 CMakeLists.txt
 delete mode 160000 LuaSHkit
 delete mode 100644 dist_dist.py
 rename src/lsh/experiment.go => experiment.go (100%)
 delete mode 160000 flann
 rename src/lsh/forest_index.go => forest_index.go (100%)
 rename src/lsh/forest_index_test.go => forest_index_test.go (100%)
 rename src/lsh/index.go => index.go (100%)
 rename src/lsh/knn.go => knn.go (100%)
 rename src/lsh/knn_test.go => knn_test.go (100%)
 rename src/lsh/lsh.go => lsh.go (100%)
 rename src/lsh/lsh_test.go => lsh_test.go (100%)
 rename src/lsh/metric.go => metric.go (100%)
 rename src/lsh/multiprobe_index.go => multiprobe_index.go (100%)
 rename src/lsh/multiprobe_index_test.go => multiprobe_index_test.go (100%)
 delete mode 160000 opencv
 delete mode 100644 opt_param.py
 delete mode 100644 plot.py
 rename src/lsh/run_forest.go => run_forest.go (100%)
 delete mode 100644 run_image.go
 rename src/lsh/run_multiprobe.go => run_multiprobe.go (100%)
 rename src/lsh/run_simple.go => run_simple.go (100%)
 delete mode 100644 sample_gist.go
 delete mode 100644 sample_image.go
 delete mode 160000 scikit-learn
 delete mode 100644 setenv.sh
 rename src/lsh/simple_index.go => simple_index.go (100%)
 rename src/lsh/simple_index_test.go => simple_index_test.go (100%)
 delete mode 100644 src/cpp/CMakeLists.txt
 delete mode 100644 src/cpp/main.cpp
 rename src/lsh/tinyimage_parser.go => tinyimage_parser.go (100%)
 rename src/lsh/util.go => util.go (100%)
 rename src/lsh/util_test.go => util_test.go (100%)

diff --git a/.gitmodules b/.gitmodules
deleted file mode 100644
index a1c8388..0000000
--- a/.gitmodules
+++ /dev/null
@@ -1,12 +0,0 @@
-[submodule "flann"]
-	path = flann
-	url = git://github.com/mariusmuja/flann
-[submodule "opencv"]
-	path = opencv
-	url = git@github.com:Itseez/opencv.git
-[submodule "LuaSHkit"]
-	path = LuaSHkit
-	url = git@github.com:ocallaco/LuaSHkit.git
-[submodule "scikit-learn"]
-	path = scikit-learn
-	url = git@github.com:scikit-learn/scikit-learn.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
deleted file mode 100644
index e5864de..0000000
--- a/CMakeLists.txt
+++ /dev/null
@@ -1,37 +0,0 @@
-cmake_minimum_required(VERSION 2.6)
-
-project(CSC2515Project)
-string(TOLOWER ${PROJECT_NAME} PROJECT_NAME_LOWER)
-
-# detect if using the Clang compiler
-if("${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
-  set(CMAKE_COMPILER_IS_CLANG 1)
-endif ()
-
-if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
-  set(CMAKE_COMPILER_IS_CLANGXX 1)
-endif ()
-
-#set the default path for built executables to the "bin" directory
-set(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/bin")
-#set the default path for built libraries to the "lib" directory
-set(LIBRARY_OUTPUT_PATH "${PROJECT_BINARY_DIR}/lib")
-
-# Include Flann
-include("${PROJECT_SOURCE_DIR}/flann/cmake/FindFlann.cmake")
-
-#set the C/C++ include path to the "include" directory
-include_directories(BEFORE "${PROJECT_SOURCE_DIR}/flann/src/cpp")
-
-# require proper c++
-#add_definitions( "-Wall -ansi -pedantic" )
-# HDF5 uses long long which is not ansi
-if(CMAKE_C_COMPILER_ID MATCHES "MSVC" OR CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
-    # lots of warnings with cl.exe right now, use /W1
-    add_definitions("/W1 -D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS /bigobj")
-else()
-    add_definitions( "-Wall -Wno-unknown-pragmas -Wno-unused-function" )
-endif()
-
-add_subdirectory(src)
-
diff --git a/LuaSHkit b/LuaSHkit
deleted file mode 160000
index c77186e..0000000
--- a/LuaSHkit
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit c77186ef123505c7e2065dcde05d8b699d6a93d9
diff --git a/dist_dist.py b/dist_dist.py
deleted file mode 100644
index f869106..0000000
--- a/dist_dist.py
+++ /dev/null
@@ -1,77 +0,0 @@
-'''
-Plot the histogram of top-k distances
-'''
-
-import json, sys, collections
-import numpy as np
-import scipy
-from scipy.stats import gamma
-import matplotlib
-matplotlib.use("Agg")
-import matplotlib.pyplot as plt
-
-def load_all_pair_sample(datafile):
-    with open(datafile) as f:
-        data = json.load(f)
-    dists = collections.deque([])
-    for query_result in data:
-        for neighbour in query_result["neighbours"]:
-            dists.append(neighbour["distance"])
-    dists = np.array(list(dists))
-    dists_squared = np.square(dists)
-    gamma_x = gamma.fit(dists_squared)
-    print("Distance-squared distribution: ", gamma_x)
-    return gamma_x, dists_squared
-
-def load_topk_sample(datafile):
-    with open(datafile) as f:
-        data = json.load(f)
-    topks = collections.deque([])
-    for query_result in data:
-        dists = collections.deque([])
-        for neighbour in query_result["neighbours"]:
-            dists.append(neighbour["distance"])
-        dists_squared = np.square(np.sort(list(dists)))
-        topks.append(dists_squared)
-    topks = np.array(list(topks))
-    gamma_xk = []
-    for i in ks:
-        params = gamma.fit(topks[:,i])
-        gamma_xk.append(params)
-        print("k = %d distance-squared distribution: " % i, params)
-    return gamma_xk, topks 
-
-all_pairs_sample = "./_image_all_pair_distance_sample"
-topk_sample = "./_image_query_distance_sample"
-ks = [10, 50, 200]
-max_w = 15000.0 
-max_m = 12
-required_recall = 0.5
-
-gamma_x, dists_squared = load_all_pair_sample(all_pairs_sample)
-gamma_xk, topk_dists_squared = load_topk_sample(topk_sample)
-max_x = np.max(dists_squared)
-
-
-fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharex=True)
-
-# Plot all pair distance distribution
-x = np.linspace(0.0, max_x/2.0, num=100)
-pdf = gamma.pdf(x, gamma_x[0], gamma_x[1], gamma_x[2]) 
-axes[0].plot(x, pdf)
-axes[0].grid()
-axes[0].set_ylabel("Probability")
-axes[0].set_xlabel("Sqaured L2 distance") 
-
-# Plot kth nearest neighbour distance distribution
-for i, k in enumerate(ks):
-    shape, loc, scale = gamma_xk[i]
-    pdf = gamma.pdf(x, shape, loc, scale) 
-    axes[1].plot(x, pdf, label="%d-NN" % k)
-axes[1].legend()
-axes[1].set_ylabel("Probability")
-axes[1].set_xlabel("Sqaured L2 distance") 
-axes[1].grid()
-
-plt.savefig("dist_dist.png")
-plt.close()
diff --git a/src/lsh/experiment.go b/experiment.go
similarity index 100%
rename from src/lsh/experiment.go
rename to experiment.go
diff --git a/flann b/flann
deleted file mode 160000
index b8a442f..0000000
--- a/flann
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit b8a442fd98f8ce32ae3465bfd3427b5cbc36f6a5
diff --git a/src/lsh/forest_index.go b/forest_index.go
similarity index 100%
rename from src/lsh/forest_index.go
rename to forest_index.go
diff --git a/src/lsh/forest_index_test.go b/forest_index_test.go
similarity index 100%
rename from src/lsh/forest_index_test.go
rename to forest_index_test.go
diff --git a/src/lsh/index.go b/index.go
similarity index 100%
rename from src/lsh/index.go
rename to index.go
diff --git a/src/lsh/knn.go b/knn.go
similarity index 100%
rename from src/lsh/knn.go
rename to knn.go
diff --git a/src/lsh/knn_test.go b/knn_test.go
similarity index 100%
rename from src/lsh/knn_test.go
rename to knn_test.go
diff --git a/src/lsh/lsh.go b/lsh.go
similarity index 100%
rename from src/lsh/lsh.go
rename to lsh.go
diff --git a/src/lsh/lsh_test.go b/lsh_test.go
similarity index 100%
rename from src/lsh/lsh_test.go
rename to lsh_test.go
diff --git a/src/lsh/metric.go b/metric.go
similarity index 100%
rename from src/lsh/metric.go
rename to metric.go
diff --git a/src/lsh/multiprobe_index.go b/multiprobe_index.go
similarity index 100%
rename from src/lsh/multiprobe_index.go
rename to multiprobe_index.go
diff --git a/src/lsh/multiprobe_index_test.go b/multiprobe_index_test.go
similarity index 100%
rename from src/lsh/multiprobe_index_test.go
rename to multiprobe_index_test.go
diff --git a/opencv b/opencv
deleted file mode 160000
index d6f8a75..0000000
--- a/opencv
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit d6f8a75e91112f4b98f895658a747a077f580684
diff --git a/opt_param.py b/opt_param.py
deleted file mode 100644
index 29a0e4b..0000000
--- a/opt_param.py
+++ /dev/null
@@ -1,134 +0,0 @@
-import json, sys, collections
-import numpy as np
-import scipy
-from scipy.stats import gamma, norm
-from scipy.integrate import quad
-from scipy.constants import pi
-import matplotlib.pyplot as plt
-
-def _integration(a, b, f, p):
-    area = 0.0
-    x = a
-    while x < b:
-        area += f(x+0.5*p)*p
-        x += p
-    return area
-
-def _collision_probability(w, r):
-    a = 1.0 - 2.0 * norm.cdf(- w / r)
-    b = 2.0 / (np.sqrt(2.0 * pi) * w / r)
-    c = 1.0 - np.exp(- (w * w) / (2.0 * r * r))
-    return a - b * c
-
-def _hash_probability(m, l, w, r):
-    p = 1.0 - (1.0 - _collision_probability(w, r)**float(m))**float(l)
-    if p < 0.0:
-        print(m, l, w, r)
-        raise ValueError()
-    return p
-
-def _recall(m, l, w, gamma_params, max_x):
-    k = len(gamma_params)
-    s = 0.0
-    for i in range(k):
-        shape, loc, scale = gamma_params[i]
-        join_prob_func = lambda x : _hash_probability(m, l, w, np.sqrt(x)) * gamma.pdf(x, shape, loc, scale)
-        prob, _ = quad(join_prob_func, 0.0, max_x) 
-        s += prob
-    return s / float(k)
-
-def _selectivity(m, l, w, gamma_param, max_x):
-    shape, loc, scale = gamma_param
-    join_prob_func = lambda x : _hash_probability(m, l, w, np.sqrt(x)) * gamma.pdf(x, shape, loc, scale)
-    prob, _ = quad(join_prob_func, 0.0, max_x) 
-    return prob
-
-def optimization(max_m, l, max_w, max_x, gamma_x, gamma_xk, required_recall):
-    best_m = 0
-    best_w = 0.0
-    best_selectivity = float('inf')
-    for m in range(1, max_m):
-        # Search for the m and w that gives the smallest recall just above the required_recall
-        # Use binary search
-        right_bound = max_w
-        left_bound = 0.0
-        w = (right_bound + left_bound) / 2.0
-        delta = float('inf')
-        last_recall = float("inf")
-        while delta > 1.0:
-            recall = _recall(m, l, w, gamma_xk, max_x)
-            print("recall", recall)
-            if recall < required_recall:
-                left_bound = w
-            else:
-                right_bound = w
-            print("New search interval", (left_bound, right_bound))
-            new_w = (right_bound + left_bound) / 2.0
-            if new_w < 0.0:
-                print(left_bound, right_bound, w, new_w, m, l, recall)
-                raise ValueError()
-            delta = np.abs(new_w - w)
-            w = new_w
-            last_recall = recall
-        if recall < required_recall - 0.01:
-            print("Failed for l = %d m = %d is w =  %f, recall = %f" % (l, m, w, recall))
-            continue
-        selectivity = _selectivity(m, l, w, gamma_x, max_x) 
-        print("Best for l = %d m = %d is w =  %f, recall = %f, selectivity = %f" % (l, m, w, recall, selectivity))
-        if selectivity < best_selectivity:
-            best_selectivity = selectivity
-            best_m = m
-            best_w = w
-    print("Best overall for l = %d is m = %d, w = %d" % (l, best_m, best_w))
-    return best_m, best_w
-
-def load_all_pair_sample(datafile):
-    with open(datafile) as f:
-        data = json.load(f)
-    dists = collections.deque([])
-    for query_result in data:
-        for neighbour in query_result["neighbours"]:
-            dists.append(neighbour["distance"])
-    dists = np.array(list(dists))
-    dists_squared = np.square(dists)
-    gamma_x = gamma.fit(dists_squared)
-    print("Distance-squared distribution: ", gamma_x)
-    return gamma_x, dists_squared
-
-def load_topk_sample(datafile):
-    with open(datafile) as f:
-        data = json.load(f)
-    topks = collections.deque([])
-    for query_result in data:
-        dists = collections.deque([])
-        for neighbour in query_result["neighbours"]:
-            dists.append(neighbour["distance"])
-        dists_squared = np.square(np.sort(list(dists)))
-        topks.append(dists_squared)
-    topks = np.array(list(topks))
-    gamma_xk = []
-    for i in range(k):
-        params = gamma.fit(topks[:,i])
-        gamma_xk.append(params)
-        print("k = %d distance-squared distribution: " % i, params)
-    return gamma_xk, topks 
-
-all_pairs_sample = "./_image_all_pair_distance_sample"
-topk_sample = "./_image_query_distance_sample"
-k = 50
-max_w = 15000.0 
-max_m = 12
-required_recall = 0.5
-output = "opt_param_k_%d_recall_%.2f.json" % (k, required_recall)
-
-gamma_x, dists_squared = load_all_pair_sample(all_pairs_sample)
-gamma_xk, topk_dists_squared = load_topk_sample(topk_sample)
-max_x = np.max(dists_squared)
-ls = [2, 4, 8, 16, 32, 64]
-out = []
-for l in ls:
-    m, w = optimization(max_m, l, max_w, max_x, 
-            gamma_x, gamma_xk, required_recall)
-    out.append({"L" : l, "M" : m, "W" : w})
-with open(output, 'w') as f:
-    json.dump(f, out)
diff --git a/plot.py b/plot.py
deleted file mode 100644
index 10a089f..0000000
--- a/plot.py
+++ /dev/null
@@ -1,93 +0,0 @@
-import json, sys, argparse, os
-import numpy as np
-import matplotlib
-matplotlib.use("Agg")
-import matplotlib.pyplot as plt
-
-def get_analysis(meta):
-    a = {}
-    for analysis_result in meta["analysis_results"]:
-        label = analysis_result["algorithm"]
-        result_files = analysis_result["result_files"]
-        error_ratios = []
-        recalls = []
-        times = []
-        for result_file in result_files:
-            with open(result_file) as f:
-                analysis = json.load(f)
-            error_ratios.append(np.mean(analysis["errorratios"]))
-            recalls.append(np.mean(analysis["recalls"]))
-            times.append(np.percentile(analysis["times"], 90))
-        a[label] = {"error_ratios" : error_ratios,
-                    "recalls" : recalls,
-                    "times" : times}
-    return a
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument("varlout")
-    parser.add_argument("vartout")
-    args = parser.parse_args(sys.argv[1:])
-
-    # var L experiments
-    metafile = os.path.join(args.varlout, ".meta")
-    with open(metafile) as f:
-        varlmeta = json.load(f)
-    ls = varlmeta["Ls"]
-    ms = varlmeta["Ms"]
-    ws = varlmeta["Ws"]
-    varl_analysis = get_analysis(varlmeta)
-    
-    # var T experiments
-    metafile = os.path.join(args.vartout, ".meta")
-    with open(metafile) as f:
-        vartmeta = json.load(f)
-    ts = vartmeta["Ts"]
-    vart_analysis = get_analysis(vartmeta)
-    
-    
-    # Plot recall
-    fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharey=True)
-    #axes[0].set_ylim(0.5, 1.0)
-    axes[0].set_xscale('log', basex=2)
-    axes[0].grid()
-    for label in varl_analysis:
-        recall = varl_analysis[label]["recalls"]
-        axes[0].plot(ls, recall, label=label, marker="+")
-    axes[0].set_xlabel("Number of hash tables")
-    axes[0].set_ylabel("Recall")
-    axes[0].legend(loc="lower right")
-    axes[0].set_title("M = %d, W = %d, T = %d" % (ms[0], ws[0], varlmeta["T"]))
-    axes[1].grid()
-    axes[1].set_xscale('log', basex=2)
-    for label in vart_analysis:
-        recall = vart_analysis[label]["recalls"]
-        axes[1].plot(ts, recall, label=label, marker="+")
-    axes[1].set_xlabel("Number of probes")
-    axes[1].set_ylabel("Recall")
-    axes[1].set_title("M = %d, L = %d, W = %d" % (vartmeta["M"], vartmeta["L"], vartmeta["W"]))
-    fig.savefig("recall.png")
-    plt.close()
-
-    # Plot time
-    fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharey=True)
-    axes[0].set_xscale('log', basex=2)
-    axes[0].grid()
-    for label in varl_analysis:
-        times = varl_analysis[label]["times"]
-        axes[0].plot(ls, times, label=label, marker="+")
-    axes[0].set_xlabel("Number of hash tables")
-    axes[0].set_ylabel("90 percentil query time (ms)")
-    axes[0].legend(loc="upper left")
-    axes[0].set_title("T = %d" % (varlmeta["T"]))
-    axes[1].grid()
-    axes[1].set_xscale('log', basex=2)
-    for label in vart_analysis:
-        times = vart_analysis[label]["times"]
-        axes[1].plot(ts, times, label=label, marker="+")
-    axes[1].set_xlabel("Number of probes")
-    axes[1].set_ylabel("90 percentil query time (ms)")
-    axes[1].set_title("M = %d, L = %d, W = %d" % (vartmeta["M"], vartmeta["L"], vartmeta["W"]))
-    fig.savefig("time.png")
-    plt.close()
-
diff --git a/src/lsh/run_forest.go b/run_forest.go
similarity index 100%
rename from src/lsh/run_forest.go
rename to run_forest.go
diff --git a/run_image.go b/run_image.go
deleted file mode 100644
index 5d0e901..0000000
--- a/run_image.go
+++ /dev/null
@@ -1,194 +0,0 @@
-package main
-
-import (
-	"flag"
-	"fmt"
-	"log"
-	"lsh"
-	"os"
-	"path/filepath"
-)
-
-const (
-	dim = 3072
-)
-
-var (
-	datafile   string
-	knnresult  string
-	varloutdir string
-	vartoutdir string
-	nWorker    int
-	nQuery     int
-	k          int
-	m          int
-	l          int
-	w          float64
-	t          int
-	ls         []int
-	ts         []int
-	ms         []int
-	ws         []float64
-)
-
-func init() {
-	flag.IntVar(&k, "k", 50, "Number of nearest neighbours")
-	flag.StringVar(&datafile, "d", "./data/tiny_images_10k.bin",
-		"tiny image data file")
-	flag.StringVar(&varloutdir, "varlout", "",
-		"Output directory for experiment with different Ls")
-	flag.StringVar(&vartoutdir, "vartout", "",
-		"Output directory for experiment with different Ts")
-	flag.StringVar(&knnresult, "knnresult", "_knn_image_10k_k_50",
-		"Exact k-NN result file, will re-run exact k-NN if not exist")
-	flag.IntVar(&nWorker, "t", 200, "Number of threads for query tests")
-	flag.IntVar(&nQuery, "q", 1000, "Number of queries")
-	flag.IntVar(&t, "T", 64, "Length of probing sequence in Multi-probe")
-	flag.IntVar(&m, "M", 9, "Size of combined hash function")
-	flag.Float64Var(&w, "W", 8000.0, "projection slot size")
-	flag.IntVar(&l, "L", 4, "Number of hash tables")
-	ls = []int{2, 4, 8, 16, 32, 64}
-	//ms = []int{9, 9, 9, 9, 9, 9}
-	//ws = []float64{8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0}
-	ms = []int{5, 7, 9, 11, 11, 11}
-	ws = []float64{12398.0, 11683.0, 11153.0, 10778.0, 9093.0, 7889.0}
-	ts = []int{2, 4, 8, 16, 32, 64, 128}
-}
-
-type AnalysisResult struct {
-	Algorithm   string   `json:"algorithm"`
-	ResultFiles []string `json:"result_files"`
-}
-
-type VarLMeta struct {
-	AnalysisResults []AnalysisResult `json:"analysis_results"`
-	Ms              []int
-	Ws              []float64
-	K               int `json:"k"`
-	T               int
-	Ls              []int
-}
-
-type VarTMeta struct {
-	AnalysisResults []AnalysisResult `json:"analysis_results"`
-	M               int
-	W               float64
-	K               int `json:"k"`
-	L               int
-	Ts              []int
-}
-
-func resultFileName(outdir, algorithm, paramName string, paramVal int) string {
-	filename := fmt.Sprintf("%s_%s_%d", algorithm, paramName, paramVal)
-	return filepath.Join(outdir, filename)
-}
-
-func analysisFileName(outdir, algorithm, paramName string, paramVal int) string {
-	f := resultFileName(outdir, algorithm, paramName, paramVal)
-	return fmt.Sprintf("%s_%s", f, "analysis")
-}
-
-func exists(path string) (bool, error) {
-	_, err := os.Stat(path)
-	if err == nil {
-		return true, nil
-	}
-	if os.IsNotExist(err) {
-		return false, nil
-	}
-	return true, err
-}
-
-func main() {
-	flag.Parse()
-	if vartoutdir == "" || varloutdir == "" {
-		log.Fatal("No output directory given")
-		return
-	}
-	parser := lsh.NewTinyImagePointParser()
-	data := lsh.LoadData(datafile, parser)
-	queries := lsh.SelectQueriesAsSubset(data, nQuery)
-
-	exist, err := exists(knnresult)
-	if err != nil {
-		panic(err.Error())
-	}
-	if !exist {
-		// Run exact kNN
-		log.Println("Running exact kNN")
-		lsh.RunKnn(data, queries, knnresult, k, nWorker)
-	}
-
-	var analysisResults []string
-
-	// Run Var L experiments
-	varlmeta := VarLMeta{
-		AnalysisResults: make([]AnalysisResult, 0),
-		Ls:              ls,
-		Ms:              ms,
-		Ws:              ws,
-		K:               k,
-		T:               t,
-	}
-	// Basic LSH
-	analysisResults = make([]string, 0)
-	for i, l := range ls {
-		log.Printf("Running Basic LSH: l = %d\n", l)
-		result := resultFileName(varloutdir, "basic", "l", l)
-		lsh.RunSimple(data, queries, result, k, nWorker, dim, ms[i], l, ws[i])
-		analysis := analysisFileName(varloutdir, "basic", "l", l)
-		lsh.RunAnalysis(result, knnresult, analysis)
-		analysisResults = append(analysisResults, analysis)
-	}
-	varlmeta.AnalysisResults = append(varlmeta.AnalysisResults,
-		AnalysisResult{"Basic", analysisResults})
-	// LSH Forest
-	analysisResults = make([]string, 0)
-	for i, l := range ls {
-		log.Printf("Running LSH Forest: l = %d\n", l)
-		result := resultFileName(varloutdir, "forest", "l", l)
-		lsh.RunForest(data, queries, result, k, nWorker, dim, ms[i], l, ws[i])
-		analysis := analysisFileName(varloutdir, "forest", "l", l)
-		lsh.RunAnalysis(result, knnresult, analysis)
-		analysisResults = append(analysisResults, analysis)
-	}
-	varlmeta.AnalysisResults = append(varlmeta.AnalysisResults,
-		AnalysisResult{"Forest", analysisResults})
-	// Multi-probe
-	analysisResults = make([]string, 0)
-	for i, l := range ls {
-		log.Printf("Running Multi-probe LSH: l = %d\n", l)
-		result := resultFileName(varloutdir, "multiprobe", "l", l)
-		lsh.RunMultiprobe(data, queries, result, k, nWorker, dim, ms[i], l, ws[i], t)
-		analysis := analysisFileName(varloutdir, "multiprobe", "l", l)
-		lsh.RunAnalysis(result, knnresult, analysis)
-		analysisResults = append(analysisResults, analysis)
-	}
-	varlmeta.AnalysisResults = append(varlmeta.AnalysisResults,
-		AnalysisResult{"Multi-probe", analysisResults})
-	lsh.DumpJson(filepath.Join(varloutdir, ".meta"), &varlmeta)
-
-	// Run Var T experiments
-	vartmeta := VarTMeta{
-		AnalysisResults: make([]AnalysisResult, 0),
-		M:               m,
-		W:               w,
-		K:               k,
-		L:               l,
-		Ts:              ts,
-	}
-	// Multi-probe
-	analysisResults = make([]string, 0)
-	for _, t := range ts {
-		log.Printf("Running Multi-probe LSH: t = %d\n", t)
-		result := resultFileName(vartoutdir, "multiprobe", "t", t)
-		lsh.RunMultiprobe(data, queries, result, k, nWorker, dim, m, l, w, t)
-		analysis := analysisFileName(vartoutdir, "multiprobe", "t", t)
-		lsh.RunAnalysis(result, knnresult, analysis)
-		analysisResults = append(analysisResults, analysis)
-	}
-	vartmeta.AnalysisResults = append(vartmeta.AnalysisResults,
-		AnalysisResult{"Multi-probe", analysisResults})
-	lsh.DumpJson(filepath.Join(vartoutdir, ".meta"), &vartmeta)
-
-}
diff --git a/src/lsh/run_multiprobe.go b/run_multiprobe.go
similarity index 100%
rename from src/lsh/run_multiprobe.go
rename to run_multiprobe.go
diff --git a/src/lsh/run_simple.go b/run_simple.go
similarity index 100%
rename from src/lsh/run_simple.go
rename to run_simple.go
diff --git a/sample_gist.go b/sample_gist.go
deleted file mode 100644
index 7a82a1d..0000000
--- a/sample_gist.go
+++ /dev/null
@@ -1,38 +0,0 @@
-package main
-
-import (
-	"flag"
-	"lsh"
-)
-
-var (
-	datafile    string
-	nWorker     int
-	nSample     int
-	k           int
-	distOutput  string
-	kDistOutput string
-)
-
-func init() {
-	flag.IntVar(&k, "k", 1000, "K")
-	flag.StringVar(&datafile, "d", "",
-		"tiny image gist data file")
-	flag.IntVar(&nWorker, "w", 200, "Number of threads")
-	flag.IntVar(&nSample, "n", 1000, "sample size")
-	distOutput = "_gist_query_distance_sample"
-	kDistOutput = "_gist_all_pair_distance_sample"
-}
-
-func main() {
-	flag.Parse()
-	if datafile == "" {
-		panic("No datafile given")
-	}
-	// Query distance sample
-	parser := lsh.NewTinyImageGistParser()
-	lsh.RunKnn(datafile, distOutput, k, nSample, nWorker, parser)
-
-	// All pair distance sample
-	lsh.RunKnnSampleAllPair(datafile, kDistOutput, nSample, nWorker, parser)
-}
diff --git a/sample_image.go b/sample_image.go
deleted file mode 100644
index 1a04d4f..0000000
--- a/sample_image.go
+++ /dev/null
@@ -1,41 +0,0 @@
-package main
-
-import (
-	"flag"
-	"lsh"
-)
-
-var (
-	datafile    string
-	nWorker     int
-	nSample     int
-	k           int
-	distOutput  string
-	kDistOutput string
-)
-
-func init() {
-	flag.IntVar(&k, "k", 1000, "K")
-	flag.StringVar(&datafile, "d", "",
-		"tiny image data file")
-	flag.IntVar(&nWorker, "w", 200, "Number of threads")
-	flag.IntVar(&nSample, "n", 1000, "sample size")
-	distOutput = "_image_query_distance_sample"
-	kDistOutput = "_image_all_pair_distance_sample"
-}
-
-func main() {
-	flag.Parse()
-	if datafile == "" {
-		panic("No datafile given")
-	}
-	parser := lsh.NewTinyImagePointParser()
-	data := lsh.LoadData(datafile, parser)
-	queries := lsh.SelectQueriesAsSubset(data, nSample)
-
-	// Query distance sample
-	lsh.RunKnn(data, queries, distOutput, k, nWorker)
-
-	// All pair distance sample
-	lsh.RunKnnSampleAllPair(queries, kDistOutput, nWorker)
-}
diff --git a/scikit-learn b/scikit-learn
deleted file mode 160000
index 8d0a299..0000000
--- a/scikit-learn
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit 8d0a29936ee11dc175e462c9f705bbc85dba4594
diff --git a/setenv.sh b/setenv.sh
deleted file mode 100644
index a343828..0000000
--- a/setenv.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-export GOPATH=$PWD
-export PATH=$PATH:$GOPATH/bin
-export GOMAXPROCS=64
diff --git a/src/lsh/simple_index.go b/simple_index.go
similarity index 100%
rename from src/lsh/simple_index.go
rename to simple_index.go
diff --git a/src/lsh/simple_index_test.go b/simple_index_test.go
similarity index 100%
rename from src/lsh/simple_index_test.go
rename to simple_index_test.go
diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt
deleted file mode 100644
index fcb610a..0000000
--- a/src/cpp/CMakeLists.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-add_custom_target(project_test ALL)
-
-add_executable(main main.cpp)
-target_link_libraries(main flann_cpp)
-add_dependencies(project_test main)
-
diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp
deleted file mode 100644
index 6071fb1..0000000
--- a/src/cpp/main.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-#include <fstream>
-#include <iostream>
-#include <memory>
-// #include <iomanip>
-#include <vector>
-
-#include <flann/flann.hpp>
-
-using namespace std;
-
-using namespace flann;
-
-namespace {
-// Constants.
-const int kWidth = 32;
-const int kHeight = 32;
-const int kChannels = 3;
-const int kSize = kWidth * kHeight * kChannels;
-}  // namespace
-
-
-// Reads input dataset into a matrix.
-Matrix<float> ReadData(const string& filename) {
-    std::ifstream is(filename.c_str(), std::ifstream::binary);
-
-
-    // Read entire file into local vector.
-    std::vector<char> buffer(
-        (std::istreambuf_iterator<char>(is)), 
-        (std::istreambuf_iterator<char>()));
-
-    cout << "Read: " << buffer.size() << " values." << endl;
-
-    // Convert into matrix.
-    float* data = new float[buffer.size()];
-    for (int i = 0; i < buffer.size(); i++) {
-        data[i] = static_cast<float>(buffer[i] & 0x0000FF);
-    }
-
-    
-    
-/*
-        char* buffer = new char[kWidth*kHeight*kChannels];
-        is.read(buffer, kWidth * kHeight * kChannels);
-        for (int i = 0; i < kSize; i++) {
-            std::cout << (int) (buffer[i] & 0x0000FF);
-            if (i % kWidth == 31) {
-                std::cout << std::endl;
-            } else {
-                std::cout << " ";
-            }
-        }
-
-        delete[] buffer;
-*/
-
-
-    return Matrix<float>(data, buffer.size() / kSize, kSize);
-}
-
-
-int main(int argc, char *argv[]) {
-    Matrix<float> dataset = ReadData(argv[1]);
-
-    // int nn = 3;
-
-    // Matrix<float> dataset;
-    // Matrix<float> query;
-    // load_from_file(dataset, "dataset.hdf5","dataset");
-    // load_from_file(query, "dataset.hdf5","query");
-
-    Matrix<int> indices(new int[dataset.rows * dataset.rows], dataset.rows, dataset.rows);
-    Matrix<float> dists(new float[dataset.rows * dataset.rows], dataset.rows, dataset.rows);
-
-    // construct an randomized kd-tree index using 4 kd-trees
-    // Index<L2<float> > index(dataset, flann::KDTreeIndexParams(4));
-    Index<L2<float> > index(dataset, flann::LinearIndexParams());
-    index.buildIndex();                                                                                               
-
-    // do a knn search, using 128 checks
-    index.knnSearch(dataset, indices, dists, dataset.rows, flann::SearchParams(-1));
-
-    // flann::save_to_file(indices,"result.hdf5","result");
-
-    delete[] dataset.ptr();
-    // delete[] query.ptr();
-    delete[] indices.ptr();
-    delete[] dists.ptr();
-
-
-    return 0;
-}
-
diff --git a/src/lsh/tinyimage_parser.go b/tinyimage_parser.go
similarity index 100%
rename from src/lsh/tinyimage_parser.go
rename to tinyimage_parser.go
diff --git a/src/lsh/util.go b/util.go
similarity index 100%
rename from src/lsh/util.go
rename to util.go
diff --git a/src/lsh/util_test.go b/util_test.go
similarity index 100%
rename from src/lsh/util_test.go
rename to util_test.go