Skip to content

Commit

Permalink
inc search destroyed
Browse files Browse the repository at this point in the history
  • Loading branch information
CaucherWang committed Sep 7, 2022
1 parent b33bbd4 commit 70372e9
Show file tree
Hide file tree
Showing 16 changed files with 1,359 additions and 104 deletions.
48 changes: 26 additions & 22 deletions config.ini
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
; Test config file for ini_example.c and INIReaderTest.cpp

[expr]
dataset = dna
dataset = rand
;0 for fadas in-memory
;1 for fadas, 2 for fadas fuzzy;
;(3-7 are experimental index, can be ignored) 3 for fadas pos;4 for dynamic, 5 for grid, 6 for cluster, 7 for dynamic cluster,
Expand All @@ -13,29 +13,32 @@ materialized = 1
;0 for build index, 1 for approx query, 2 for exact search expr,
;3 for only exact search, 4 for index stats,
;5 for incremental search
;6 for approx dtw search, 7 for exact dtw search, 8 for inc approx. dtw
ops = 0
;6 for approx dtw search, 7 for exact dtw search,
;8 for inc approx. dtw, 9 for complete workload
;10 for ng-search
ops = 5
;50 for ecg on-disk and rand-100m, rand-300m, deep-10m, dna-26m
;500 for seismic,deep1b,
;100 for others
maxK = 50
;only for exact, ng, and in-memory search and ground truth generation
k = 50
;rand:256, dna:1024, ecg:320, deep:96
tsLength = 1024
tsLength = 256
query_num = 200
;dna:26339815
series_num = -1
;only for exact search and in-memory search and ground truth generation
k = 50
series_num = 10000000

dtw_window_percent = 0.05
init_batch_size = 30000000
batch_size =
batch_size = 1400000
batch_num = 50

[parameter]
th = 10000
segmentNum = 16
bitsCardinality = 8
; MB, it is very important for building time(Output time), make it as large as possible
fbl_size = 40960
fbl_size = 32768
max_diff = 3
; fuzzy boundary ratio f
boundary_1st = 0.3
Expand All @@ -47,7 +50,7 @@ max_radius = 6
imbalance = 0.3

; define a small leaf node
small_perc = 0.2
small_perc = 1
f_low = 0.5
f_high = 1.5
; weighting factor
Expand All @@ -62,16 +65,16 @@ graphfn = ../RowGraph_16_3.bin
bitsReserve = 3

[rand]
paafn = /mnt/c/Series4Similarity_Search/rand/paa/rand-16384-1.5m-16.bin
saxfn = /mnt/c/Series4Similarity_Search/rand/sax/rand-16384-1.5m-16.bin
paafn = /mnt/c/Series4Similarity_Search/rand/paa/rand-256-100m-16.bin
saxfn = /mnt/c/Series4Similarity_Search/rand/sax/rand-256-100m-16.bin
idxfn = /mnt/c/Series4Similarity_Search/rand/non-mat/cluster/
tardisfn = /mnt/c/Series4Similarity_Search/rand/tardis/
posidxfn = ../index-pos/rand/
fuzzyidxfn = /mnt/c/Series4Similarity_Search/rand/index-16384/
fidxfn = /mnt/c/Series4Similarity_Search/rand/index-16384/
datafn = /mnt/c/Series4Similarity_Search/rand/rand-16384-1.5m.bin
queryfn = /mnt/c/Series4Similarity_Search/rand/rand-16384-200.bin
resfn = /mnt/c/Series4Similarity_Search/rand/rand-16384-200-50.bin
;fuzzyidxfn = /mnt/c/Series4Similarity_Search/rand/index-16384/
;fidxfn = /mnt/c/Series4Similarity_Search/rand/index-16384/
;datafn = /mnt/c/Series4Similarity_Search/rand/rand-16384-1.5m.bin
;queryfn = /mnt/c/Series4Similarity_Search/rand/rand-16384-200.bin
;resfn = /mnt/c/Series4Similarity_Search/rand/rand-16384-200-50.bin
;fidxfn = /mnt/c/Series4Similarity_Search/rand/index-4096/
;datafn = /mnt/c/Series4Similarity_Search/rand/rand-4096-6m.bin
;queryfn = /mnt/c/Series4Similarity_Search/rand/rand-4096-200.bin
Expand All @@ -82,10 +85,11 @@ resfn = /mnt/c/Series4Similarity_Search/rand/rand-16384-200-50.bin
;queryfn = /mnt/c/Series4Similarity_Search/rand/rand-1024-200.bin
;resfn = /mnt/c/Series4Similarity_Search/rand/rand-1024-200-50.bin
;fuzzyidxfn = /mnt/c/Series4Similarity_Search/rand/index-1024/
;fidxfn = /mnt/c/Series4Similarity_Search/rand/index/
;datafn = /mnt/c/Series4Similarity_Search/rand/rand-256-100m.bin
;queryfn = /mnt/c/Series4Similarity_Search/rand/rand-256-200-dtw2.bin
;resfn = /mnt/c/Series4Similarity_Search/rand/rand-256-200-50.bin
fidxfn = /mnt/c/Series4Similarity_Search/rand/index/
fuzzyidxfn = /mnt/c/Series4Similarity_Search/rand/fuzzy/
datafn = /mnt/c/Series4Similarity_Search/rand/rand-256-100m.bin
queryfn = /mnt/c/Series4Similarity_Search/rand/rand-256-200.bin
resfn = /mnt/c/Series4Similarity_Search/rand/rand-256-200-50.bin
dtwresfn = /mnt/c/Series4Similarity_Search/rand/rand-256-200-50-dtw2.bin
dstreefn = /mnt/c/Series4Similarity_Search/ds-tree/rand/

Expand Down
9 changes: 8 additions & 1 deletion include/Const.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class Const {

// sec:expr
static string dataset, method;
static int tsLength, maxK, index, ops, materialized, method_code, query_num, series_num, k, dtw_window_size;
static int tsLength, maxK, index, ops, materialized, method_code, query_num, series_num, k, dtw_window_size,
batch_size, batch_num;
static double dtw_window_percent;

//sec: parameter
Expand Down Expand Up @@ -87,6 +88,12 @@ class Const {
k = reader.GetInteger("expr", "k", -1);
cout << "k: " << k << endl;

batch_size = reader.GetInteger("expr", "batch_size", -1);
cout << "batch_size: " << batch_size << endl;

batch_num = reader.GetInteger("expr", "batch_num", -1);
cout << "batch_num: " << batch_num << endl;

dtw_window_percent = reader.GetReal("expr", "dtw_window_percent", -1);
cout << "dtw_window_percent: " << dtw_window_percent << endl;

Expand Down
25 changes: 22 additions & 3 deletions include/DataStructures/FADASNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,9 @@ class FADASNode {
void growIndexLessPack();
void growIndexFuzzy(unordered_map<FADASNode *, NODE_RECORDER> &navigating_tbl, vector<vector<int>> *g);

void collectSAXwords(unsigned short *node_saxes, int *cur, vector<string> &leaf_files, vector<string> &sax_files);
void deleteSubtree();

void fuzzySeriesInPartUnit(partUnit *part_units, int actual_size, int chosen_num, vector<int> &node_offsets,
vector<int> &series_index_list,
unordered_map<FADASNode *, NODE_RECORDER> &navigating_tbl, int _id) const;
Expand Down Expand Up @@ -149,6 +152,7 @@ class FADASNode {
vector<int> chosenSegments{};
vector<FADASNode*>children;
int size = 0;
int leaf_num = 0;
int id = -1;
string file_id{}; // to identify a particular leaf node
int layer = 0;
Expand All @@ -173,10 +177,16 @@ class FADASNode {
if(isLeafPack()) return "P-" + to_string(layer) + "-" + file_id;
if(isLeafNode()) return to_string(layer) + "-" + file_id;
}
void getFileNameInsert(const string &index_dir, string &sax_file, string &data_file) const;
[[nodiscard]] bool isLeafNode() const {return size <= Const::th && partition_id == -1;}
[[nodiscard]] bool isLeafPack() const {return size <= Const::th && partition_id != -1;}
[[nodiscard]] bool isInternalNode() const {return size > Const::th;}
void search(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir,
float *query_reordered, int *ordering) const;
void search(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir) const;
void search(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir,
std::unordered_set<float *, createhash, isEqual> *hash_set) const;

void search_offset(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir) const;

static FADASNode* BuildIndexFuzzy(const string & datafn, const string & saxfn, const string &paafn, vector<vector<int>>* g);
Expand All @@ -185,7 +195,7 @@ class FADASNode {
template<class Archive>
void serialize(Archive &ar, const unsigned int version) {
ar & partition_id; ar & layer;
ar & file_id;
ar & file_id; ar & id;
ar & size;
ar & sax; ar & bits_cardinality;
ar & children;
Expand All @@ -201,12 +211,11 @@ class FADASNode {

void getIndexStats();
int getLeafNodeNum();
int assignLeafNum();
int getBiasLeafNodeNum();

FADASNode *route1step(const unsigned short *_sax);

void search(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir,
std::unordered_set<float *, createhash, isEqual> *hash_set) const;

SAX_INFO *statSAX();

Expand Down Expand Up @@ -258,6 +267,8 @@ class FADASNode {

void determineSegmentsAvgVariance();

void determineSegmentsNaive();

void searchDTW(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir) const;

static long generateSaxTbl();
Expand All @@ -267,6 +278,14 @@ class FADASNode {
static void generateSaxTbl(const float *tss, int series_num);

void routeDuringInsertion(const unsigned short *_sax, int pos);

void determineSegments(unsigned short *node_saxes);

void growIndex(unsigned short *node_saxes, bool need_free);

void reorganize(float *tss, FADASNode *parent);

void insertBatch(float *tss, int batch_size);
};

struct NODE_RECORDER{
Expand Down
4 changes: 4 additions & 0 deletions include/Expr/Recall.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class Recall {

static void doExprWithResFADAS(FADASNode *root, vector<vector<int>> *g, const string &index_dir);

static void ngSearchDumpy(FADASNode *root, vector<vector<int>> *g);

static void exactSearchFADAS(FADASNode *root, vector<vector<int>> *g);

static void exactSearchFADASDTW(FADASNode *root, vector<vector<int>>*g);
Expand Down Expand Up @@ -80,6 +82,8 @@ class Recall {
static void approxDTWTARDISORIGIN(TARGNode *root);

static void exactSearchTARDISORIGINDTW(TARGNode *root);

static void completeWorkload();
};


Expand Down
16 changes: 14 additions & 2 deletions include/Searchers/FADASSearcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,16 @@ class FADASSearcher {
approxSearchPos(FADASNode *root, float *query, int k, vector<vector<int>> *g, const string &index_dir);

static vector<PqItemSeries *> *
approxIncSearch(FADASNode *root, float *query, int k, const string &index_dir, int node_num);
approxIncSearch(FADASNode *root, float *query, int k, const string &index_dir, int node_num, float * query_reordered, int*ordering);

static
vector<PqItemSeries *> *ngSearch(FADASNode *root, float *query, float *query_reordered, int *ordering, int k,
vector<vector<int>> *g, int nprobes);

static void
approxIncSearchInterNode(FADASNode *root, TimeSeries *queryTs, unsigned short *sax, int k,
vector<PqItemSeries *> *heap, const string &index_dir,
int &node_num);
int &node_num, float *query_reordered, int *ordering);

static void approxIncSearchInterNodeFuzzy(FADASNode *root, TimeSeries *queryTs, unsigned short *sax, int k,
vector<PqItemSeries *> *heap, const string &index_dir, int &node_num,
Expand All @@ -61,6 +65,14 @@ class FADASSearcher {

static void approxSearchInterNodeLessPack(FADASNode *root, TimeSeries *queryTs, unsigned short *sax, int k,
vector<PqItemSeries *> *heap, const string &index_dir);

static vector<PqItemSeries *> *
ngSearchNaive(FADASNode *root, float *query, float *query_reordered, int *ordering, int k, vector<vector<int>> *g,
int nprobes);

static vector<PqItemSeries *> *
ngSearchIdLevelNaive(FADASNode *root, float *query, float *query_reordered, int *ordering, int k,
vector<vector<int>> *g, int nprobes);
};


Expand Down
2 changes: 2 additions & 0 deletions include/Utils/FileUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ class FileUtil {
static float *readSeries(FILE *f, const vector<int> &offsets);

static float *readSeriesOffset(FILE *f, int offset);

static void renameFile(const string &old_file, const string &new_file);
};


Expand Down
2 changes: 2 additions & 0 deletions include/Utils/TimeSeriesUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ class TimeSeriesUtil {

static double dtw(const float* A, const float* B, int len, int r, double bsf);

static double euclideanDist(float *query_reordered, float *ts, int size, double bound, int *order);

// static void knnWithBsf(const DSTreeNode &node, InsertedSeries &queryTs, int k, vector<PqItemSeriesVector *> &heap);
//
// static vector<PqItemSeriesVector *> & knnVector(const DSTreeNode &node, InsertedSeries &q, int k);
Expand Down
3 changes: 2 additions & 1 deletion src/Const.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
// sec:expr
string Const::dataset = "",Const:: method = "";
int Const::tsLength = -1, Const::maxK = -1, Const::index = -1, Const::ops = -1, Const::materialized = -1,
Const::method_code = -1, Const::query_num = -1, Const::series_num = -1, Const::k = -1, Const::dtw_window_size = -1;
Const::method_code = -1, Const::query_num = -1, Const::series_num = -1, Const::k = -1, Const::dtw_window_size = -1,
Const::batch_size = -1, Const::batch_num = -1;
double Const::dtw_window_percent = -1;

//sec: parameter
Expand Down
Loading

0 comments on commit 70372e9

Please sign in to comment.