From e37e7e2f25154ccfeeb1b1e1001dcef05bfccb87 Mon Sep 17 00:00:00 2001 From: ekzhu Date: Thu, 21 Jan 2016 14:31:25 -0500 Subject: [PATCH] convert course project into golang libary --- .gitmodules | 12 -- CMakeLists.txt | 37 ---- LuaSHkit | 1 - dist_dist.py | 77 ------- src/lsh/experiment.go => experiment.go | 0 flann | 1 - src/lsh/forest_index.go => forest_index.go | 0 ...rest_index_test.go => forest_index_test.go | 0 src/lsh/index.go => index.go | 0 src/lsh/knn.go => knn.go | 0 src/lsh/knn_test.go => knn_test.go | 0 src/lsh/lsh.go => lsh.go | 0 src/lsh/lsh_test.go => lsh_test.go | 0 src/lsh/metric.go => metric.go | 0 ...multiprobe_index.go => multiprobe_index.go | 0 ..._index_test.go => multiprobe_index_test.go | 0 opencv | 1 - opt_param.py | 134 ------------ plot.py | 93 --------- src/lsh/run_forest.go => run_forest.go | 0 run_image.go | 194 ------------------ .../run_multiprobe.go => run_multiprobe.go | 0 src/lsh/run_simple.go => run_simple.go | 0 sample_gist.go | 38 ---- sample_image.go | 41 ---- scikit-learn | 1 - setenv.sh | 3 - src/lsh/simple_index.go => simple_index.go | 0 ...mple_index_test.go => simple_index_test.go | 0 src/cpp/CMakeLists.txt | 6 - src/cpp/main.cpp | 93 --------- ...tinyimage_parser.go => tinyimage_parser.go | 0 src/lsh/util.go => util.go | 0 src/lsh/util_test.go => util_test.go | 0 34 files changed, 732 deletions(-) delete mode 100644 .gitmodules delete mode 100644 CMakeLists.txt delete mode 160000 LuaSHkit delete mode 100644 dist_dist.py rename src/lsh/experiment.go => experiment.go (100%) delete mode 160000 flann rename src/lsh/forest_index.go => forest_index.go (100%) rename src/lsh/forest_index_test.go => forest_index_test.go (100%) rename src/lsh/index.go => index.go (100%) rename src/lsh/knn.go => knn.go (100%) rename src/lsh/knn_test.go => knn_test.go (100%) rename src/lsh/lsh.go => lsh.go (100%) rename src/lsh/lsh_test.go => lsh_test.go (100%) rename src/lsh/metric.go => metric.go (100%) rename src/lsh/multiprobe_index.go => multiprobe_index.go (100%) rename src/lsh/multiprobe_index_test.go => multiprobe_index_test.go (100%) delete mode 160000 opencv delete mode 100644 opt_param.py delete mode 100644 plot.py rename src/lsh/run_forest.go => run_forest.go (100%) delete mode 100644 run_image.go rename src/lsh/run_multiprobe.go => run_multiprobe.go (100%) rename src/lsh/run_simple.go => run_simple.go (100%) delete mode 100644 sample_gist.go delete mode 100644 sample_image.go delete mode 160000 scikit-learn delete mode 100644 setenv.sh rename src/lsh/simple_index.go => simple_index.go (100%) rename src/lsh/simple_index_test.go => simple_index_test.go (100%) delete mode 100644 src/cpp/CMakeLists.txt delete mode 100644 src/cpp/main.cpp rename src/lsh/tinyimage_parser.go => tinyimage_parser.go (100%) rename src/lsh/util.go => util.go (100%) rename src/lsh/util_test.go => util_test.go (100%) diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index a1c8388..0000000 --- a/.gitmodules +++ /dev/null @@ -1,12 +0,0 @@ -[submodule "flann"] - path = flann - url = git://github.com/mariusmuja/flann -[submodule "opencv"] - path = opencv - url = git@github.com:Itseez/opencv.git -[submodule "LuaSHkit"] - path = LuaSHkit - url = git@github.com:ocallaco/LuaSHkit.git -[submodule "scikit-learn"] - path = scikit-learn - url = git@github.com:scikit-learn/scikit-learn.git diff --git a/CMakeLists.txt b/CMakeLists.txt deleted file mode 100644 index e5864de..0000000 --- a/CMakeLists.txt +++ /dev/null @@ -1,37 +0,0 @@ -cmake_minimum_required(VERSION 2.6) - -project(CSC2515Project) -string(TOLOWER ${PROJECT_NAME} PROJECT_NAME_LOWER) - -# detect if using the Clang compiler -if("${CMAKE_C_COMPILER_ID}" MATCHES "Clang") - set(CMAKE_COMPILER_IS_CLANG 1) -endif () - -if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang") - set(CMAKE_COMPILER_IS_CLANGXX 1) -endif () - -#set the default path for built executables to the "bin" directory -set(EXECUTABLE_OUTPUT_PATH "${PROJECT_BINARY_DIR}/bin") -#set the default path for built libraries to the "lib" directory -set(LIBRARY_OUTPUT_PATH "${PROJECT_BINARY_DIR}/lib") - -# Include Flann -include("${PROJECT_SOURCE_DIR}/flann/cmake/FindFlann.cmake") - -#set the C/C++ include path to the "include" directory -include_directories(BEFORE "${PROJECT_SOURCE_DIR}/flann/src/cpp") - -# require proper c++ -#add_definitions( "-Wall -ansi -pedantic" ) -# HDF5 uses long long which is not ansi -if(CMAKE_C_COMPILER_ID MATCHES "MSVC" OR CMAKE_CXX_COMPILER_ID MATCHES "MSVC") - # lots of warnings with cl.exe right now, use /W1 - add_definitions("/W1 -D_CRT_SECURE_NO_WARNINGS -D_SCL_SECURE_NO_WARNINGS /bigobj") -else() - add_definitions( "-Wall -Wno-unknown-pragmas -Wno-unused-function" ) -endif() - -add_subdirectory(src) - diff --git a/LuaSHkit b/LuaSHkit deleted file mode 160000 index c77186e..0000000 --- a/LuaSHkit +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c77186ef123505c7e2065dcde05d8b699d6a93d9 diff --git a/dist_dist.py b/dist_dist.py deleted file mode 100644 index f869106..0000000 --- a/dist_dist.py +++ /dev/null @@ -1,77 +0,0 @@ -''' -Plot the histogram of top-k distances -''' - -import json, sys, collections -import numpy as np -import scipy -from scipy.stats import gamma -import matplotlib -matplotlib.use("Agg") -import matplotlib.pyplot as plt - -def load_all_pair_sample(datafile): - with open(datafile) as f: - data = json.load(f) - dists = collections.deque([]) - for query_result in data: - for neighbour in query_result["neighbours"]: - dists.append(neighbour["distance"]) - dists = np.array(list(dists)) - dists_squared = np.square(dists) - gamma_x = gamma.fit(dists_squared) - print("Distance-squared distribution: ", gamma_x) - return gamma_x, dists_squared - -def load_topk_sample(datafile): - with open(datafile) as f: - data = json.load(f) - topks = collections.deque([]) - for query_result in data: - dists = collections.deque([]) - for neighbour in query_result["neighbours"]: - dists.append(neighbour["distance"]) - dists_squared = np.square(np.sort(list(dists))) - topks.append(dists_squared) - topks = np.array(list(topks)) - gamma_xk = [] - for i in ks: - params = gamma.fit(topks[:,i]) - gamma_xk.append(params) - print("k = %d distance-squared distribution: " % i, params) - return gamma_xk, topks - -all_pairs_sample = "./_image_all_pair_distance_sample" -topk_sample = "./_image_query_distance_sample" -ks = [10, 50, 200] -max_w = 15000.0 -max_m = 12 -required_recall = 0.5 - -gamma_x, dists_squared = load_all_pair_sample(all_pairs_sample) -gamma_xk, topk_dists_squared = load_topk_sample(topk_sample) -max_x = np.max(dists_squared) - - -fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharex=True) - -# Plot all pair distance distribution -x = np.linspace(0.0, max_x/2.0, num=100) -pdf = gamma.pdf(x, gamma_x[0], gamma_x[1], gamma_x[2]) -axes[0].plot(x, pdf) -axes[0].grid() -axes[0].set_ylabel("Probability") -axes[0].set_xlabel("Sqaured L2 distance") - -# Plot kth nearest neighbour distance distribution -for i, k in enumerate(ks): - shape, loc, scale = gamma_xk[i] - pdf = gamma.pdf(x, shape, loc, scale) - axes[1].plot(x, pdf, label="%d-NN" % k) -axes[1].legend() -axes[1].set_ylabel("Probability") -axes[1].set_xlabel("Sqaured L2 distance") -axes[1].grid() - -plt.savefig("dist_dist.png") -plt.close() diff --git a/src/lsh/experiment.go b/experiment.go similarity index 100% rename from src/lsh/experiment.go rename to experiment.go diff --git a/flann b/flann deleted file mode 160000 index b8a442f..0000000 --- a/flann +++ /dev/null @@ -1 +0,0 @@ -Subproject commit b8a442fd98f8ce32ae3465bfd3427b5cbc36f6a5 diff --git a/src/lsh/forest_index.go b/forest_index.go similarity index 100% rename from src/lsh/forest_index.go rename to forest_index.go diff --git a/src/lsh/forest_index_test.go b/forest_index_test.go similarity index 100% rename from src/lsh/forest_index_test.go rename to forest_index_test.go diff --git a/src/lsh/index.go b/index.go similarity index 100% rename from src/lsh/index.go rename to index.go diff --git a/src/lsh/knn.go b/knn.go similarity index 100% rename from src/lsh/knn.go rename to knn.go diff --git a/src/lsh/knn_test.go b/knn_test.go similarity index 100% rename from src/lsh/knn_test.go rename to knn_test.go diff --git a/src/lsh/lsh.go b/lsh.go similarity index 100% rename from src/lsh/lsh.go rename to lsh.go diff --git a/src/lsh/lsh_test.go b/lsh_test.go similarity index 100% rename from src/lsh/lsh_test.go rename to lsh_test.go diff --git a/src/lsh/metric.go b/metric.go similarity index 100% rename from src/lsh/metric.go rename to metric.go diff --git a/src/lsh/multiprobe_index.go b/multiprobe_index.go similarity index 100% rename from src/lsh/multiprobe_index.go rename to multiprobe_index.go diff --git a/src/lsh/multiprobe_index_test.go b/multiprobe_index_test.go similarity index 100% rename from src/lsh/multiprobe_index_test.go rename to multiprobe_index_test.go diff --git a/opencv b/opencv deleted file mode 160000 index d6f8a75..0000000 --- a/opencv +++ /dev/null @@ -1 +0,0 @@ -Subproject commit d6f8a75e91112f4b98f895658a747a077f580684 diff --git a/opt_param.py b/opt_param.py deleted file mode 100644 index 29a0e4b..0000000 --- a/opt_param.py +++ /dev/null @@ -1,134 +0,0 @@ -import json, sys, collections -import numpy as np -import scipy -from scipy.stats import gamma, norm -from scipy.integrate import quad -from scipy.constants import pi -import matplotlib.pyplot as plt - -def _integration(a, b, f, p): - area = 0.0 - x = a - while x < b: - area += f(x+0.5*p)*p - x += p - return area - -def _collision_probability(w, r): - a = 1.0 - 2.0 * norm.cdf(- w / r) - b = 2.0 / (np.sqrt(2.0 * pi) * w / r) - c = 1.0 - np.exp(- (w * w) / (2.0 * r * r)) - return a - b * c - -def _hash_probability(m, l, w, r): - p = 1.0 - (1.0 - _collision_probability(w, r)**float(m))**float(l) - if p < 0.0: - print(m, l, w, r) - raise ValueError() - return p - -def _recall(m, l, w, gamma_params, max_x): - k = len(gamma_params) - s = 0.0 - for i in range(k): - shape, loc, scale = gamma_params[i] - join_prob_func = lambda x : _hash_probability(m, l, w, np.sqrt(x)) * gamma.pdf(x, shape, loc, scale) - prob, _ = quad(join_prob_func, 0.0, max_x) - s += prob - return s / float(k) - -def _selectivity(m, l, w, gamma_param, max_x): - shape, loc, scale = gamma_param - join_prob_func = lambda x : _hash_probability(m, l, w, np.sqrt(x)) * gamma.pdf(x, shape, loc, scale) - prob, _ = quad(join_prob_func, 0.0, max_x) - return prob - -def optimization(max_m, l, max_w, max_x, gamma_x, gamma_xk, required_recall): - best_m = 0 - best_w = 0.0 - best_selectivity = float('inf') - for m in range(1, max_m): - # Search for the m and w that gives the smallest recall just above the required_recall - # Use binary search - right_bound = max_w - left_bound = 0.0 - w = (right_bound + left_bound) / 2.0 - delta = float('inf') - last_recall = float("inf") - while delta > 1.0: - recall = _recall(m, l, w, gamma_xk, max_x) - print("recall", recall) - if recall < required_recall: - left_bound = w - else: - right_bound = w - print("New search interval", (left_bound, right_bound)) - new_w = (right_bound + left_bound) / 2.0 - if new_w < 0.0: - print(left_bound, right_bound, w, new_w, m, l, recall) - raise ValueError() - delta = np.abs(new_w - w) - w = new_w - last_recall = recall - if recall < required_recall - 0.01: - print("Failed for l = %d m = %d is w = %f, recall = %f" % (l, m, w, recall)) - continue - selectivity = _selectivity(m, l, w, gamma_x, max_x) - print("Best for l = %d m = %d is w = %f, recall = %f, selectivity = %f" % (l, m, w, recall, selectivity)) - if selectivity < best_selectivity: - best_selectivity = selectivity - best_m = m - best_w = w - print("Best overall for l = %d is m = %d, w = %d" % (l, best_m, best_w)) - return best_m, best_w - -def load_all_pair_sample(datafile): - with open(datafile) as f: - data = json.load(f) - dists = collections.deque([]) - for query_result in data: - for neighbour in query_result["neighbours"]: - dists.append(neighbour["distance"]) - dists = np.array(list(dists)) - dists_squared = np.square(dists) - gamma_x = gamma.fit(dists_squared) - print("Distance-squared distribution: ", gamma_x) - return gamma_x, dists_squared - -def load_topk_sample(datafile): - with open(datafile) as f: - data = json.load(f) - topks = collections.deque([]) - for query_result in data: - dists = collections.deque([]) - for neighbour in query_result["neighbours"]: - dists.append(neighbour["distance"]) - dists_squared = np.square(np.sort(list(dists))) - topks.append(dists_squared) - topks = np.array(list(topks)) - gamma_xk = [] - for i in range(k): - params = gamma.fit(topks[:,i]) - gamma_xk.append(params) - print("k = %d distance-squared distribution: " % i, params) - return gamma_xk, topks - -all_pairs_sample = "./_image_all_pair_distance_sample" -topk_sample = "./_image_query_distance_sample" -k = 50 -max_w = 15000.0 -max_m = 12 -required_recall = 0.5 -output = "opt_param_k_%d_recall_%.2f.json" % (k, required_recall) - -gamma_x, dists_squared = load_all_pair_sample(all_pairs_sample) -gamma_xk, topk_dists_squared = load_topk_sample(topk_sample) -max_x = np.max(dists_squared) -ls = [2, 4, 8, 16, 32, 64] -out = [] -for l in ls: - m, w = optimization(max_m, l, max_w, max_x, - gamma_x, gamma_xk, required_recall) - out.append({"L" : l, "M" : m, "W" : w}) -with open(output, 'w') as f: - json.dump(f, out) diff --git a/plot.py b/plot.py deleted file mode 100644 index 10a089f..0000000 --- a/plot.py +++ /dev/null @@ -1,93 +0,0 @@ -import json, sys, argparse, os -import numpy as np -import matplotlib -matplotlib.use("Agg") -import matplotlib.pyplot as plt - -def get_analysis(meta): - a = {} - for analysis_result in meta["analysis_results"]: - label = analysis_result["algorithm"] - result_files = analysis_result["result_files"] - error_ratios = [] - recalls = [] - times = [] - for result_file in result_files: - with open(result_file) as f: - analysis = json.load(f) - error_ratios.append(np.mean(analysis["errorratios"])) - recalls.append(np.mean(analysis["recalls"])) - times.append(np.percentile(analysis["times"], 90)) - a[label] = {"error_ratios" : error_ratios, - "recalls" : recalls, - "times" : times} - return a - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("varlout") - parser.add_argument("vartout") - args = parser.parse_args(sys.argv[1:]) - - # var L experiments - metafile = os.path.join(args.varlout, ".meta") - with open(metafile) as f: - varlmeta = json.load(f) - ls = varlmeta["Ls"] - ms = varlmeta["Ms"] - ws = varlmeta["Ws"] - varl_analysis = get_analysis(varlmeta) - - # var T experiments - metafile = os.path.join(args.vartout, ".meta") - with open(metafile) as f: - vartmeta = json.load(f) - ts = vartmeta["Ts"] - vart_analysis = get_analysis(vartmeta) - - - # Plot recall - fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharey=True) - #axes[0].set_ylim(0.5, 1.0) - axes[0].set_xscale('log', basex=2) - axes[0].grid() - for label in varl_analysis: - recall = varl_analysis[label]["recalls"] - axes[0].plot(ls, recall, label=label, marker="+") - axes[0].set_xlabel("Number of hash tables") - axes[0].set_ylabel("Recall") - axes[0].legend(loc="lower right") - axes[0].set_title("M = %d, W = %d, T = %d" % (ms[0], ws[0], varlmeta["T"])) - axes[1].grid() - axes[1].set_xscale('log', basex=2) - for label in vart_analysis: - recall = vart_analysis[label]["recalls"] - axes[1].plot(ts, recall, label=label, marker="+") - axes[1].set_xlabel("Number of probes") - axes[1].set_ylabel("Recall") - axes[1].set_title("M = %d, L = %d, W = %d" % (vartmeta["M"], vartmeta["L"], vartmeta["W"])) - fig.savefig("recall.png") - plt.close() - - # Plot time - fig, axes = plt.subplots(1, 2, figsize=(10, 5), sharey=True) - axes[0].set_xscale('log', basex=2) - axes[0].grid() - for label in varl_analysis: - times = varl_analysis[label]["times"] - axes[0].plot(ls, times, label=label, marker="+") - axes[0].set_xlabel("Number of hash tables") - axes[0].set_ylabel("90 percentil query time (ms)") - axes[0].legend(loc="upper left") - axes[0].set_title("T = %d" % (varlmeta["T"])) - axes[1].grid() - axes[1].set_xscale('log', basex=2) - for label in vart_analysis: - times = vart_analysis[label]["times"] - axes[1].plot(ts, times, label=label, marker="+") - axes[1].set_xlabel("Number of probes") - axes[1].set_ylabel("90 percentil query time (ms)") - axes[1].set_title("M = %d, L = %d, W = %d" % (vartmeta["M"], vartmeta["L"], vartmeta["W"])) - fig.savefig("time.png") - plt.close() - diff --git a/src/lsh/run_forest.go b/run_forest.go similarity index 100% rename from src/lsh/run_forest.go rename to run_forest.go diff --git a/run_image.go b/run_image.go deleted file mode 100644 index 5d0e901..0000000 --- a/run_image.go +++ /dev/null @@ -1,194 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "log" - "lsh" - "os" - "path/filepath" -) - -const ( - dim = 3072 -) - -var ( - datafile string - knnresult string - varloutdir string - vartoutdir string - nWorker int - nQuery int - k int - m int - l int - w float64 - t int - ls []int - ts []int - ms []int - ws []float64 -) - -func init() { - flag.IntVar(&k, "k", 50, "Number of nearest neighbours") - flag.StringVar(&datafile, "d", "./data/tiny_images_10k.bin", - "tiny image data file") - flag.StringVar(&varloutdir, "varlout", "", - "Output directory for experiment with different Ls") - flag.StringVar(&vartoutdir, "vartout", "", - "Output directory for experiment with different Ts") - flag.StringVar(&knnresult, "knnresult", "_knn_image_10k_k_50", - "Exact k-NN result file, will re-run exact k-NN if not exist") - flag.IntVar(&nWorker, "t", 200, "Number of threads for query tests") - flag.IntVar(&nQuery, "q", 1000, "Number of queries") - flag.IntVar(&t, "T", 64, "Length of probing sequence in Multi-probe") - flag.IntVar(&m, "M", 9, "Size of combined hash function") - flag.Float64Var(&w, "W", 8000.0, "projection slot size") - flag.IntVar(&l, "L", 4, "Number of hash tables") - ls = []int{2, 4, 8, 16, 32, 64} - //ms = []int{9, 9, 9, 9, 9, 9} - //ws = []float64{8000.0, 8000.0, 8000.0, 8000.0, 8000.0, 8000.0} - ms = []int{5, 7, 9, 11, 11, 11} - ws = []float64{12398.0, 11683.0, 11153.0, 10778.0, 9093.0, 7889.0} - ts = []int{2, 4, 8, 16, 32, 64, 128} -} - -type AnalysisResult struct { - Algorithm string `json:"algorithm"` - ResultFiles []string `json:"result_files"` -} - -type VarLMeta struct { - AnalysisResults []AnalysisResult `json:"analysis_results"` - Ms []int - Ws []float64 - K int `json:"k"` - T int - Ls []int -} - -type VarTMeta struct { - AnalysisResults []AnalysisResult `json:"analysis_results"` - M int - W float64 - K int `json:"k"` - L int - Ts []int -} - -func resultFileName(outdir, algorithm, paramName string, paramVal int) string { - filename := fmt.Sprintf("%s_%s_%d", algorithm, paramName, paramVal) - return filepath.Join(outdir, filename) -} - -func analysisFileName(outdir, algorithm, paramName string, paramVal int) string { - f := resultFileName(outdir, algorithm, paramName, paramVal) - return fmt.Sprintf("%s_%s", f, "analysis") -} - -func exists(path string) (bool, error) { - _, err := os.Stat(path) - if err == nil { - return true, nil - } - if os.IsNotExist(err) { - return false, nil - } - return true, err -} - -func main() { - flag.Parse() - if vartoutdir == "" || varloutdir == "" { - log.Fatal("No output directory given") - return - } - parser := lsh.NewTinyImagePointParser() - data := lsh.LoadData(datafile, parser) - queries := lsh.SelectQueriesAsSubset(data, nQuery) - - exist, err := exists(knnresult) - if err != nil { - panic(err.Error()) - } - if !exist { - // Run exact kNN - log.Println("Running exact kNN") - lsh.RunKnn(data, queries, knnresult, k, nWorker) - } - - var analysisResults []string - - // Run Var L experiments - varlmeta := VarLMeta{ - AnalysisResults: make([]AnalysisResult, 0), - Ls: ls, - Ms: ms, - Ws: ws, - K: k, - T: t, - } - // Basic LSH - analysisResults = make([]string, 0) - for i, l := range ls { - log.Printf("Running Basic LSH: l = %d\n", l) - result := resultFileName(varloutdir, "basic", "l", l) - lsh.RunSimple(data, queries, result, k, nWorker, dim, ms[i], l, ws[i]) - analysis := analysisFileName(varloutdir, "basic", "l", l) - lsh.RunAnalysis(result, knnresult, analysis) - analysisResults = append(analysisResults, analysis) - } - varlmeta.AnalysisResults = append(varlmeta.AnalysisResults, - AnalysisResult{"Basic", analysisResults}) - // LSH Forest - analysisResults = make([]string, 0) - for i, l := range ls { - log.Printf("Running LSH Forest: l = %d\n", l) - result := resultFileName(varloutdir, "forest", "l", l) - lsh.RunForest(data, queries, result, k, nWorker, dim, ms[i], l, ws[i]) - analysis := analysisFileName(varloutdir, "forest", "l", l) - lsh.RunAnalysis(result, knnresult, analysis) - analysisResults = append(analysisResults, analysis) - } - varlmeta.AnalysisResults = append(varlmeta.AnalysisResults, - AnalysisResult{"Forest", analysisResults}) - // Multi-probe - analysisResults = make([]string, 0) - for i, l := range ls { - log.Printf("Running Multi-probe LSH: l = %d\n", l) - result := resultFileName(varloutdir, "multiprobe", "l", l) - lsh.RunMultiprobe(data, queries, result, k, nWorker, dim, ms[i], l, ws[i], t) - analysis := analysisFileName(varloutdir, "multiprobe", "l", l) - lsh.RunAnalysis(result, knnresult, analysis) - analysisResults = append(analysisResults, analysis) - } - varlmeta.AnalysisResults = append(varlmeta.AnalysisResults, - AnalysisResult{"Multi-probe", analysisResults}) - lsh.DumpJson(filepath.Join(varloutdir, ".meta"), &varlmeta) - - // Run Var T experiments - vartmeta := VarTMeta{ - AnalysisResults: make([]AnalysisResult, 0), - M: m, - W: w, - K: k, - L: l, - Ts: ts, - } - // Multi-probe - analysisResults = make([]string, 0) - for _, t := range ts { - log.Printf("Running Multi-probe LSH: t = %d\n", t) - result := resultFileName(vartoutdir, "multiprobe", "t", t) - lsh.RunMultiprobe(data, queries, result, k, nWorker, dim, m, l, w, t) - analysis := analysisFileName(vartoutdir, "multiprobe", "t", t) - lsh.RunAnalysis(result, knnresult, analysis) - analysisResults = append(analysisResults, analysis) - } - vartmeta.AnalysisResults = append(vartmeta.AnalysisResults, - AnalysisResult{"Multi-probe", analysisResults}) - lsh.DumpJson(filepath.Join(vartoutdir, ".meta"), &vartmeta) - -} diff --git a/src/lsh/run_multiprobe.go b/run_multiprobe.go similarity index 100% rename from src/lsh/run_multiprobe.go rename to run_multiprobe.go diff --git a/src/lsh/run_simple.go b/run_simple.go similarity index 100% rename from src/lsh/run_simple.go rename to run_simple.go diff --git a/sample_gist.go b/sample_gist.go deleted file mode 100644 index 7a82a1d..0000000 --- a/sample_gist.go +++ /dev/null @@ -1,38 +0,0 @@ -package main - -import ( - "flag" - "lsh" -) - -var ( - datafile string - nWorker int - nSample int - k int - distOutput string - kDistOutput string -) - -func init() { - flag.IntVar(&k, "k", 1000, "K") - flag.StringVar(&datafile, "d", "", - "tiny image gist data file") - flag.IntVar(&nWorker, "w", 200, "Number of threads") - flag.IntVar(&nSample, "n", 1000, "sample size") - distOutput = "_gist_query_distance_sample" - kDistOutput = "_gist_all_pair_distance_sample" -} - -func main() { - flag.Parse() - if datafile == "" { - panic("No datafile given") - } - // Query distance sample - parser := lsh.NewTinyImageGistParser() - lsh.RunKnn(datafile, distOutput, k, nSample, nWorker, parser) - - // All pair distance sample - lsh.RunKnnSampleAllPair(datafile, kDistOutput, nSample, nWorker, parser) -} diff --git a/sample_image.go b/sample_image.go deleted file mode 100644 index 1a04d4f..0000000 --- a/sample_image.go +++ /dev/null @@ -1,41 +0,0 @@ -package main - -import ( - "flag" - "lsh" -) - -var ( - datafile string - nWorker int - nSample int - k int - distOutput string - kDistOutput string -) - -func init() { - flag.IntVar(&k, "k", 1000, "K") - flag.StringVar(&datafile, "d", "", - "tiny image data file") - flag.IntVar(&nWorker, "w", 200, "Number of threads") - flag.IntVar(&nSample, "n", 1000, "sample size") - distOutput = "_image_query_distance_sample" - kDistOutput = "_image_all_pair_distance_sample" -} - -func main() { - flag.Parse() - if datafile == "" { - panic("No datafile given") - } - parser := lsh.NewTinyImagePointParser() - data := lsh.LoadData(datafile, parser) - queries := lsh.SelectQueriesAsSubset(data, nSample) - - // Query distance sample - lsh.RunKnn(data, queries, distOutput, k, nWorker) - - // All pair distance sample - lsh.RunKnnSampleAllPair(queries, kDistOutput, nWorker) -} diff --git a/scikit-learn b/scikit-learn deleted file mode 160000 index 8d0a299..0000000 --- a/scikit-learn +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 8d0a29936ee11dc175e462c9f705bbc85dba4594 diff --git a/setenv.sh b/setenv.sh deleted file mode 100644 index a343828..0000000 --- a/setenv.sh +++ /dev/null @@ -1,3 +0,0 @@ -export GOPATH=$PWD -export PATH=$PATH:$GOPATH/bin -export GOMAXPROCS=64 diff --git a/src/lsh/simple_index.go b/simple_index.go similarity index 100% rename from src/lsh/simple_index.go rename to simple_index.go diff --git a/src/lsh/simple_index_test.go b/simple_index_test.go similarity index 100% rename from src/lsh/simple_index_test.go rename to simple_index_test.go diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt deleted file mode 100644 index fcb610a..0000000 --- a/src/cpp/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -add_custom_target(project_test ALL) - -add_executable(main main.cpp) -target_link_libraries(main flann_cpp) -add_dependencies(project_test main) - diff --git a/src/cpp/main.cpp b/src/cpp/main.cpp deleted file mode 100644 index 6071fb1..0000000 --- a/src/cpp/main.cpp +++ /dev/null @@ -1,93 +0,0 @@ -#include -#include -#include -// #include -#include - -#include - -using namespace std; - -using namespace flann; - -namespace { -// Constants. -const int kWidth = 32; -const int kHeight = 32; -const int kChannels = 3; -const int kSize = kWidth * kHeight * kChannels; -} // namespace - - -// Reads input dataset into a matrix. -Matrix ReadData(const string& filename) { - std::ifstream is(filename.c_str(), std::ifstream::binary); - - - // Read entire file into local vector. - std::vector buffer( - (std::istreambuf_iterator(is)), - (std::istreambuf_iterator())); - - cout << "Read: " << buffer.size() << " values." << endl; - - // Convert into matrix. - float* data = new float[buffer.size()]; - for (int i = 0; i < buffer.size(); i++) { - data[i] = static_cast(buffer[i] & 0x0000FF); - } - - - -/* - char* buffer = new char[kWidth*kHeight*kChannels]; - is.read(buffer, kWidth * kHeight * kChannels); - for (int i = 0; i < kSize; i++) { - std::cout << (int) (buffer[i] & 0x0000FF); - if (i % kWidth == 31) { - std::cout << std::endl; - } else { - std::cout << " "; - } - } - - delete[] buffer; -*/ - - - return Matrix(data, buffer.size() / kSize, kSize); -} - - -int main(int argc, char *argv[]) { - Matrix dataset = ReadData(argv[1]); - - // int nn = 3; - - // Matrix dataset; - // Matrix query; - // load_from_file(dataset, "dataset.hdf5","dataset"); - // load_from_file(query, "dataset.hdf5","query"); - - Matrix indices(new int[dataset.rows * dataset.rows], dataset.rows, dataset.rows); - Matrix dists(new float[dataset.rows * dataset.rows], dataset.rows, dataset.rows); - - // construct an randomized kd-tree index using 4 kd-trees - // Index > index(dataset, flann::KDTreeIndexParams(4)); - Index > index(dataset, flann::LinearIndexParams()); - index.buildIndex(); - - // do a knn search, using 128 checks - index.knnSearch(dataset, indices, dists, dataset.rows, flann::SearchParams(-1)); - - // flann::save_to_file(indices,"result.hdf5","result"); - - delete[] dataset.ptr(); - // delete[] query.ptr(); - delete[] indices.ptr(); - delete[] dists.ptr(); - - - return 0; -} - diff --git a/src/lsh/tinyimage_parser.go b/tinyimage_parser.go similarity index 100% rename from src/lsh/tinyimage_parser.go rename to tinyimage_parser.go diff --git a/src/lsh/util.go b/util.go similarity index 100% rename from src/lsh/util.go rename to util.go diff --git a/src/lsh/util_test.go b/util_test.go similarity index 100% rename from src/lsh/util_test.go rename to util_test.go