-
Notifications
You must be signed in to change notification settings - Fork 3
A GPU accelerated find() method
Hüseyin Tuğrul BÜYÜKIŞIK edited this page Feb 15, 2021
·
18 revisions
VirtualMultiArray has a find(T object, S objectMember, int maxIndices)
method that searches for objects with same member value (same byte representation) and returns a std::vector containing them (but with maxIndices elements at most, if found).
- Search value (
S objectMember
) needs to be in same parentT object
for proper calculations. Using different parent objects for both parameters leads to errors. - If an array member (like
struct test{ int data[100]; }
) needs to be searched for exact byte representation, then it needs to be inside another struct (likestruct search_layer { int data[100]; }); struct test { search_layer };
and that struct can be used for searching. - If no elements are found, returned vector size is 0.
- Multiple index order is not guaranteed to be same between repeated searches. OpenCL kernel finds elements and save results using atomic functions. This lets find() compute quicker than a reduction algorithm when there are only few indices to find.
It's not very accelerated currently(14/02/2021) but it can do this:
preparing virtual array...
initializing data...
benchmarking find()...
test object: 5
found object at index: 5
data search: 40MB 358ms
data: 5
----------------------
----------------------
----------------------
test object: 100005
found object at index: 100005
data search: 40MB 6ms
data: 100005
----------------------
----------------------
----------------------
test object: 200005
found object at index: 200005
data search: 40MB 6ms
data: 200005
#include "GraphicsCardSupplyDepot.h"
#include "VirtualMultiArray.h"
#include "PcieBandwidthBenchmarker.h"
// testing
#include <iostream>
#include "omp.h"
struct ArrayPassByValue
{
char buf[3];
};
class Obj
{
public:
Obj(){b=-1;}
Obj(int i){b=i;}
char a;
int b;
ArrayPassByValue arr;
char tmp;
};
int main(int argC, char ** argV)
{
std::cout<<"preparing virtual array..."<<std::endl;
size_t n = 10000000;
size_t p = 10000;
GraphicsCardSupplyDepot gpu;
VirtualMultiArray<Obj> arr(n,gpu.requestGpus(),p,3,{4,2,2});
std::cout<<"initializing data..."<<std::endl;
#pragma omp parallel for
for(int i=0;i<10;i++)
{
arr.mappedReadWriteAccess(i*(n/10),n/10,[&](Obj * ptr){
for(size_t j=i*(n/10);j<i*(n/10)+(n/10);j++)
{
Obj obj(j);
for(int k=0;k<3;k++)
obj.arr.buf[k]=' ';
obj.arr.buf[j%3]='X';
ptr[j]=obj;
}
},false,false,true);
}
std::cout<<"benchmarking find()..."<<std::endl;
Obj searchObj;
for(int i=0;i<80;i++)
{
std::chrono::milliseconds t1 = std::chrono::duration_cast< std::chrono::milliseconds >(std::chrono::system_clock::now().time_since_epoch());
searchObj.b=i*100000+5;
searchObj.arr.buf[0]='X';
searchObj.arr.buf[1]=' ';
searchObj.arr.buf[2]=' ';
// integer member as search value
// find at most 100 indices that has same member value
std::vector<size_t> found = arr.find(searchObj,searchObj.b,100);
// buffer member as search value
// find at most 100 indices that has same member value
//std::vector<size_t> found = arr.find(searchObj,searchObj.arr,100);
std::chrono::milliseconds t2 = std::chrono::duration_cast< std::chrono::milliseconds >(std::chrono::system_clock::now().time_since_epoch());
std::cout<<"test object: "<<i*100000+5<<std::endl;
std::cout<<"found object at index: "<<(found.size()>0?found[0]:-1)<<std::endl;
std::cout<<"data search: "<<n*sizeof(int)/1000000.0<<"MB "<<(t2.count()-t1.count())<<"ms"<<std::endl;
std::cout<<"data: "<<arr.get(found[0]).b<<std::endl;
// integer search error check
if(arr.get(found[0]).b!=i*100000+5)
{
std::cout<<"ERROR ERROR ERROR ERROR ERROR ERROR ERROR ERROR ERROR ERROR ERROR ERROR"<<std::endl;
return 0;
}
std::cout<<"----------------------"<<std::endl;
std::cout<<"----------------------"<<std::endl;
std::cout<<"----------------------"<<std::endl;
}
return 0;
}