Skip to content

Commit

Permalink
it sometimes wrong
Browse files Browse the repository at this point in the history
  • Loading branch information
CaucherWang committed Dec 21, 2022
1 parent 71f2552 commit 4f7aee2
Show file tree
Hide file tree
Showing 13 changed files with 2,924 additions and 25 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@ project(FADAS)
set(CMAKE_CXX_STANDARD 23)
#set(CMAKE_BUILD_TYPE Release)
#set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)

#add_definitions(-DMALLOC_CHECK_=3)
find_package(Threads REQUIRED)
find_package(GSL REQUIRED)
include_directories(${GSL_INCLUDE_DIRES} ${GSLCBLAS_INCLUDE_DIRS})
link_libraries(${GSL_LIBRARIES} ${GSLBLAS_LIBRARIES})
add_compile_options(-march=haswell)
#add_compile_options("-mavx2")
add_executable(FADAS src/main.cpp include/DataStructures/TimeSeries.h include/DataStructures/PqItemSeries.h src/DSTree/DSTreeNode.cpp include/DSTree/DSTreeNode.h include/DSTree/InsertedSeries.h include/DSTree/Sketch.h include/DSTree/PqItemNode.h include/DSTree/SplitInfo.h include/DSTree/INodeSegmentSplitPolicy.h src/DSTree/DSTreeNodeConstruction.cpp include/DSTree/DSTreeNodeConstruction.h src/DSTree/MeanNodeSegmentSplitPolicy.cpp include/DSTree/MeanNodeSegmentSplitPolicy.h src/DSTree/StdevNodeSegmentSplitPolicy.cpp include/DSTree/StdevNodeSegmentSplitPolicy.h src/Utils/FileUtil.cpp include/Utils/FileUtil.h src/Utils/TimeSeriesUtil.cpp include/Utils/TimeSeriesUtil.h src/Utils/SaxUtil.cpp include/Utils/SaxUtil.h include/DataStructures/PqItemIndex.h src/Utils/MathUtil.cpp include/Utils/MathUtil.h src/SearchEngine/ExactSearcher.cpp include/Searchers/ExactSearcher.h src/SearchEngine/DSTreeApproxSearcher.cpp include/Searchers/DSTreeApproxSearcher.h src/SearchEngine/DSTreeExactSearcher.cpp include/Searchers/DSTreeExactSearcher.h src/IndexConstruction/GraphConstruction.cpp include/DataStructures/GraphConstruction.h src/PqItemSeries.cpp src/DSTree/INodeSegmentSplitPolicy.cpp src/Expr/Recall.cpp include/Expr/Recall.h include/Const.h src/Expr/DataDistribution.cpp include/Expr/DataDistribution.h src/Tardis/TardisTreeNode.cpp include/Tardis/TardisTreeNode.h src/SearchEngine/TardisApproxSearch.cpp include/Searchers/TardisApproxSearch.h src/IndexConstruction/IPGNode.cpp include/DataStructures/IPGNode.h src/IndexConstruction/IPGPartition.cpp include/DataStructures/IPGPartition.h src/SearchEngine/IPGApproxSearcher.cpp include/Searchers/IPGApproxSearcher.h src/iSAX/iSAXNode.cpp include/DataStructures/iSAXNode.h src/SearchEngine/iSAXSearcher.cpp include/Searchers/iSAXSearcher.h src/Expr/RandDataGenerator.cpp include/Expr/RandDataGenerator.h src/Expr/DNATranslator.cpp include/Expr/DNATranslator.h src/IndexConstruction/IPGDataNode.cpp include/DataStructures/IPGDataNode.h src/Utils/INIReader.cpp include/Utils/INIReader.h src/Utils/ini.c include/DataStructures/OffsetDist.h src/Const.cpp src/IndexConstruction/FADASNode.cpp include/DataStructures/FADASNode.h src/SearchEngine/FADASSearcher.cpp include/Searchers/FADASSearcher.h src/IndexConstruction/FADASFuzzy.cpp src/Expr/ECGParser.cpp include/Expr/ECGParser.h src/TAR/TARGNode.cpp include/TAR/TARGNode.h src/TAR/TARLNode.cpp include/TAR/TARLNode.h src/SearchEngine/TARSearcher.cpp include/TAR/TARSearcher.h)
add_executable(FADAS src/main.cpp include/DataStructures/TimeSeries.h include/DataStructures/PqItemSeries.h src/DSTree/DSTreeNode.cpp include/DSTree/DSTreeNode.h include/DSTree/InsertedSeries.h include/DSTree/Sketch.h include/DSTree/PqItemNode.h include/DSTree/SplitInfo.h include/DSTree/INodeSegmentSplitPolicy.h src/DSTree/DSTreeNodeConstruction.cpp include/DSTree/DSTreeNodeConstruction.h src/DSTree/MeanNodeSegmentSplitPolicy.cpp include/DSTree/MeanNodeSegmentSplitPolicy.h src/DSTree/StdevNodeSegmentSplitPolicy.cpp include/DSTree/StdevNodeSegmentSplitPolicy.h src/Utils/FileUtil.cpp include/Utils/FileUtil.h src/Utils/TimeSeriesUtil.cpp include/Utils/TimeSeriesUtil.h src/Utils/SaxUtil.cpp include/Utils/SaxUtil.h include/DataStructures/PqItemIndex.h src/Utils/MathUtil.cpp include/Utils/MathUtil.h src/SearchEngine/ExactSearcher.cpp include/Searchers/ExactSearcher.h src/SearchEngine/DSTreeApproxSearcher.cpp include/Searchers/DSTreeApproxSearcher.h src/SearchEngine/DSTreeExactSearcher.cpp include/Searchers/DSTreeExactSearcher.h src/IndexConstruction/GraphConstruction.cpp include/DataStructures/GraphConstruction.h src/PqItemSeries.cpp src/DSTree/INodeSegmentSplitPolicy.cpp src/Expr/Recall.cpp include/Expr/Recall.h include/Const.h src/Expr/DataDistribution.cpp include/Expr/DataDistribution.h src/Tardis/TardisTreeNode.cpp include/Tardis/TardisTreeNode.h src/SearchEngine/TardisApproxSearch.cpp include/Searchers/TardisApproxSearch.h src/IndexConstruction/IPGNode.cpp include/DataStructures/IPGNode.h src/IndexConstruction/IPGPartition.cpp include/DataStructures/IPGPartition.h src/SearchEngine/IPGApproxSearcher.cpp include/Searchers/IPGApproxSearcher.h src/iSAX/iSAXNode.cpp include/DataStructures/iSAXNode.h src/SearchEngine/iSAXSearcher.cpp include/Searchers/iSAXSearcher.h src/Expr/RandDataGenerator.cpp include/Expr/RandDataGenerator.h src/Expr/DNATranslator.cpp include/Expr/DNATranslator.h src/IndexConstruction/IPGDataNode.cpp include/DataStructures/IPGDataNode.h src/Utils/INIReader.cpp include/Utils/INIReader.h src/Utils/ini.c include/DataStructures/OffsetDist.h src/Const.cpp src/IndexConstruction/FADASNode.cpp include/DataStructures/FADASNode.h src/SearchEngine/FADASSearcher.cpp include/Searchers/FADASSearcher.h src/IndexConstruction/FADASFuzzy.cpp src/Expr/ECGParser.cpp include/Expr/ECGParser.h src/TAR/TARGNode.cpp include/TAR/TARGNode.h src/TAR/TARLNode.cpp include/TAR/TARLNode.h src/SearchEngine/TARSearcher.cpp include/TAR/TARSearcher.h include/DataStructures/SafePq.h include/DataStructures/ThreadPool.h include/DataStructures/SafeHashMap.h)
#target_compile_options(FADAS PUBLIC "-mavx2")
target_link_libraries(FADAS Threads::Threads libboost_serialization-mt-x64.a libboost_serialization-mt-x64.so libboost_serialization.so.1.76.0 ${GSL_LIBRARIES} ${GSLBLAS_LIBRARIES} liburing.so)
17 changes: 10 additions & 7 deletions config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ k = 50
query_num = 40
;dna:26339815
series_num = 100000000
thread_num = 40
messi_pq_num = 10
SSD_pq_num = 5

dtw_window_percent = 0.05
batch_size = 1400000
Expand Down Expand Up @@ -64,11 +67,11 @@ bitsReserve = 3

[rand]
tsLength = 256
paafn = /mnt/c/Series4Similarity_Search/rand/paa/rand-256-100m-16.bin
saxfn = /mnt/c/Series4Similarity_Search/rand/sax/rand-256-100m-16.bin
idxfn = /mnt/c/Series4Similarity_Search/rand/non-mat/cluster/
tardisfn = /mnt/c/Series4Similarity_Search/rand/tardis/
posidxfn = ../index-pos/rand/
paafn = /home/wzy/data/rand/paa/rand-256-100m-16.bin
saxfn = /home/wzy/data/rand/sax/rand-256-100m-16.bin
;idxfn = /mnt/c/Series4Similarity_Searchrity_Search/rand/non-mat/cluster/
;tardisfn = /mnt/c/Series4Similarity_Search/rand/tardis/
;posidxfn = ../index-pos/rand/
;fuzzyidxfn = /mnt/c/Series4Similarity_Search/rand/index-16384/
;fidxfn = /mnt/c/Series4Similarity_Search/rand/index-16384/
;datafn = /mnt/c/Series4Similarity_Search/rand/rand-16384-1.5m.bin
Expand All @@ -86,8 +89,8 @@ posidxfn = ../index-pos/rand/
;fuzzyidxfn = /mnt/c/Series4Similarity_Search/rand/index-1024/
;fidxfn = /mnt/c/Series4Similarity_Search/rand/index/
fidxfn = /home/wzy/data/rand/index/
fuzzyidxfn = /mnt/c/Series4Similarity_Search/rand/fuzzy/
datafn = /mnt/c/Series4Similarity_Search/rand/rand-256-100m.bin
fuzzyidxfn = /home/wzy/data/rand/fuzzy/
datafn = /home/wzy/data/rand/rand-256-100m.bin
;queryfn = /mnt/c/Series4Similarity_Search/rand/rand-256-200.bin
;resfn = /mnt/c/Series4Similarity_Search/rand/rand-256-200-50.bin
queryfn = /home/wzy/data/rand/rand-256-200.bin
Expand Down
14 changes: 12 additions & 2 deletions include/Const.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#define MULGIFT_CONST_H
#include <string>
#include <iostream>
#include <cassert>
#include <sys/time.h>
#include "../include/Utils/INIReader.h"

Expand All @@ -25,7 +26,7 @@ class Const {
// sec:expr
static string dataset, method;
static int tsLength, maxK, index, ops, materialized, method_code, query_num, series_num, k, dtw_window_size,
batch_size, batch_num, pre_read;
batch_size, batch_num, pre_read, thread_num, messi_pq_num, SSD_pq_num;
static double dtw_window_percent;

//sec: parameter
Expand Down Expand Up @@ -78,12 +79,21 @@ class Const {
query_num = reader.GetInteger("expr", "query_num", -1);
cout << "query_num: " << query_num << endl;

messi_pq_num = reader.GetInteger("expr", "messi_pq_num", -1);
cout << "messi_pq_num: " << messi_pq_num << endl;

SSD_pq_num = reader.GetInteger("expr", "SSD_pq_num", -1);
cout << "SSD_pq_num: " << SSD_pq_num << endl;

series_num = reader.GetInteger("expr", "series_num", -1);
cout << "series_num: " << series_num << endl;

k = reader.GetInteger("expr", "k", -1);
cout << "k: " << k << endl;

thread_num = reader.GetInteger("expr", "thread_num", -1);
cout << "thread_num: " << thread_num << endl;

batch_size = reader.GetInteger("expr", "batch_size", -1);
cout << "batch_size: " << batch_size << endl;

Expand Down Expand Up @@ -214,8 +224,8 @@ class Const {
for(int i=1;i<=bitsReserve;++i){
neighborNum += nChooseK(Const::segmentNum, i);
}
cout << "Neighbor number : " << neighborNum << endl;

assert(thread_num >= messi_pq_num);
}

static int nChooseK(int n, int r) {
Expand Down
11 changes: 10 additions & 1 deletion include/DataStructures/FADASNode.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,13 @@ class FADASNode {
if(layer == 1) return "1_" + to_string(partition_id);
return to_string(layer) + "-" + file_id;
}
inline string getFileNameWrapper() const {
string tmp;
if(layer == 1) tmp = "1_" + to_string(partition_id);
else tmp = to_string(layer) + "-" + file_id;
if(partition_id == -1) tmp += "_L";
return tmp;
}
[[nodiscard]] string getFileNamePack() const{
if(layer == 1) {
if(isLeafPack()) return "1_P_" + to_string(partition_id);
Expand All @@ -188,7 +195,9 @@ class FADASNode {
void search_SIMD_reordered(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir,
float *query_reordered, int *ordering) const;
void search_SIMD(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir) const;
void search(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir) const;
vector<PqItemSeries *>* search_SIMD(int k, TimeSeries* queryTs, const string &index_dir, double bsf) const;
void search_SIMD_series_prune(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir) const;
void search(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir) const;
void search(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir,
std::unordered_set<float *, createhash, isEqual> *hash_set) const;
void search(int k, TimeSeries *queryTs, vector<PqItemSeries *> &heap, const string &index_dir,unordered_set<float*, createhash, isEqual>*hash_set,
Expand Down
237 changes: 237 additions & 0 deletions include/DataStructures/SafeHashMap.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,237 @@
//
// Created by pengwang5 on 2022/12/12.
//

#ifndef FADAS_SAFEHASHMAP_H
#define FADAS_SAFEHASHMAP_H
#include <cstdint>
#include <functional>
#include <iostream>
#include <mutex>
#include <shared_mutex>

template <typename K, typename V> class HashNode
{
public:
HashNode()
{
}
HashNode(K key_, V value_) : key(key_), value(value_)
{
}
~HashNode()
{
next = nullptr;
}

HashNode(const HashNode&) = delete;
HashNode(HashNode&&) = delete;
HashNode& operator=(const HashNode&) = delete;
HashNode& operator=(HashNode&&) = delete;


const K &getKey() const
{
return key;
}
void setValue(V value_)
{
value = value_;
}
const V &getValue() const
{
return value;
}

HashNode *next = nullptr; // Pointer to the next node in the same bucket
private:
K key; // the hash key
V value; // the value corresponding to the key
};

template <typename K, typename V> class HashBucket
{
public:
HashBucket()
{
}

~HashBucket() // delete the bucket
{
clear();
}

// Function to find an entry in the bucket matching the key
// If key is found, the corresponding value is copied into the parameter "value" and function returns true.
// If key is not found, function returns false
bool find(const K &key, V &value) const
{
// A shared mutex is used to enable mutiple concurrent reads
std::shared_lock lock(mutex_);
HashNode<K, V> *node = head;

while (node != nullptr)
{
if (node->getKey() == key)
{
value = node->getValue();
return true;
}
node = node->next;
}
return false;
}

// Function to insert into the bucket
// If key already exists, update the value, else insert a new node in the bucket with the <key, value> pair
void insert(const K &key, const V &value)
{
// Exclusive lock to enable single write in the bucket
std::unique_lock lock(mutex_);
HashNode<K, V> *prev = nullptr;
HashNode<K, V> *node = head;

while (node != nullptr && node->getKey() != key)
{
prev = node;
node = node->next;
}

if (nullptr == node) // New entry, create a node and add to bucket
{
if (nullptr == head)
{
head = new HashNode<K, V>(key, value);
}
else
{
prev->next = new HashNode<K, V>(key, value);
}
}
else
{
node->setValue(value); // Key found in bucket, update the value
}
}

// Function to remove an entry from the bucket, if found
void erase(const K &key)
{
// Exclusive lock to enable single write in the bucket
std::unique_lock lock(mutex_);
HashNode<K, V> *prev = nullptr;
HashNode<K, V> *node = head;

while (node != nullptr && node->getKey() != key)
{
prev = node;
node = node->next;
}

if (nullptr == node) // Key not found, nothing to be done
{
return;
}
else // Remove the node from the bucket
{
if (head == node)
{
head = node->next;
}
else
{
prev->next = node->next;
}
delete node; // Free up the memory
}
}

// Function to clear the bucket
void clear()
{
// Exclusive lock to enable single write in the bucket
std::unique_lock lock(mutex_);
HashNode<K, V> *prev = nullptr;
HashNode<K, V> *node = head;
while (node != nullptr)
{
prev = node;
node = node->next;
delete prev;
}
head = nullptr;
}

private:
HashNode<K, V> *head = nullptr; // The head node of the bucket
mutable std::shared_timed_mutex mutex_; // The mutex for this bucket
};

constexpr size_t HASH_SIZE_DEFAULT = 1031; // A prime number as hash size gives a better distribution of values in buckets

// The class represting the hash map.
// It is expected for user defined types, the hash function will be provided.
// By default, the std::hash function will be used
// If the hash size is not provided, then a defult size of 1031 will be used
// The hash table itself consists of an array of hash buckets.
// Each hash bucket is implemented as singly linked list with the head as a dummy node created
// during the creation of the bucket. All the hash buckets are created during the construction of the map.
// Locks are taken per bucket, hence multiple threads can write simultaneously in different buckets in the hash map
template <typename K, typename V, typename F = std::hash<K>> class SafeHashMap
{
public:
SafeHashMap(size_t hashSize_ = HASH_SIZE_DEFAULT) : hashSize(hashSize_)
{
hashTable = new HashBucket<K, V>[hashSize]; // create the hash table as an array of hash buckets
}

~SafeHashMap()
{
delete[] hashTable;
}
// Copy and Move of the HashMap are not supported at this moment
SafeHashMap(const SafeHashMap&) = delete;
SafeHashMap(SafeHashMap&&) = delete;
SafeHashMap& operator=(const SafeHashMap&) = delete;
SafeHashMap& operator=(SafeHashMap&&) = delete;

// Function to find an entry in the hash map matching the key.
// If key is found, the corresponding value is copied into the parameter "value" and function returns true.
// If key is not found, function returns false.
bool find(const K &key, V &value) const
{
size_t hashValue = hashFn(key) % hashSize;
return hashTable[hashValue].find(key, value);
}

// Function to insert into the hash map.
// If key already exists, update the value, else insert a new node in the bucket with the <key, value> pair.
void insert(const K &key, const V &value)
{
size_t hashValue = hashFn(key) % hashSize;
hashTable[hashValue].insert(key, value);
}

// Function to remove an entry from the bucket, if found
void erase(const K &key)
{
size_t hashValue = hashFn(key) % hashSize;
hashTable[hashValue].erase(key);
}

// Function to clean up the hasp map, i.e., remove all entries from it
void clear()
{
for (size_t i = 0; i < hashSize; i++)
{
(hashTable[i]).clear();
}
}

private:
HashBucket<K, V> *hashTable;
F hashFn;
const size_t hashSize;
};

#endif //FADAS_SAFEHASHMAP_H
Loading

0 comments on commit 4f7aee2

Please sign in to comment.