Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/backend/cpu/Array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,14 @@ Node_ptr bufferNodePtr()
template<typename T>
Array<T>::Array(dim4 dims):
info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
data(memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims),
data(memAlloc<T>(dims.elements()).release(), memFree<T>), data_dims(dims),
node(bufferNodePtr<T>()), ready(true), owner(true)
{ }

template<typename T>
Array<T>::Array(dim4 dims, const T * const in_data, bool is_device, bool copy_device):
info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type),
data((is_device & !copy_device) ? (T*)in_data : memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims),
data((is_device & !copy_device) ? (T*)in_data : memAlloc<T>(dims.elements()).release(), memFree<T>), data_dims(dims),
node(bufferNodePtr<T>()), ready(true), owner(true)
{
static_assert(std::is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type");
Expand Down Expand Up @@ -79,7 +79,7 @@ template<typename T>
Array<T>::Array(af::dim4 dims, af::dim4 strides, dim_t offset_,
const T * const in_data, bool is_device) :
info(getActiveDeviceId(), dims, offset_, strides, (af_dtype)dtype_traits<T>::af_type),
data(is_device ? (T*)in_data : memAlloc<T>(info.total()), memFree<T>),
data(is_device ? (T*)in_data : memAlloc<T>(info.total()).release(), memFree<T>),
data_dims(dims),
node(bufferNodePtr<T>()),
ready(true),
Expand All @@ -100,7 +100,7 @@ void Array<T>::eval()

this->setId(getActiveDeviceId());

data = std::shared_ptr<T>(memAlloc<T>(elements()), memFree<T>);
data = std::shared_ptr<T>(memAlloc<T>(elements()).release(), memFree<T>);

getQueue().enqueue(kernel::evalArray<T>, *this, this->node);
// Reset shared_ptr
Expand Down Expand Up @@ -135,7 +135,7 @@ void evalMultiple(std::vector<Array<T>*> array_ptrs)
if (array->ready) continue;
if (isWorker) AF_ERROR("Array not evaluated", AF_ERR_INTERNAL);
array->setId(getActiveDeviceId());
array->data = std::shared_ptr<T>(memAlloc<T>(array->elements()), memFree<T>);
array->data = std::shared_ptr<T>(memAlloc<T>(array->elements()).release(), memFree<T>);
arrays.push_back(*array);
nodes.push_back(array->node);
}
Expand Down
8 changes: 4 additions & 4 deletions src/backend/cpu/harris.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,16 @@ unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out
dim4 idims = in.dims();

// Window filter
convAccT* h_filter = memAlloc<convAccT>(filter_len);
auto h_filter = memAlloc<convAccT>(filter_len);
// Decide between rectangular or circular filter
if (sigma < 0.5f) {
for (unsigned i = 0; i < filter_len; i++)
h_filter[i] = (T)1.f / (filter_len);
} else {
gaussian1D<convAccT>(h_filter, (int)filter_len, sigma);
gaussian1D<convAccT>(h_filter.get(), (int)filter_len, sigma);
}
Array<convAccT> filter = createDeviceDataArray<convAccT>(dim4(filter_len), (const void*)h_filter);

Array<convAccT> filter = createDeviceDataArray<convAccT>(dim4(filter_len),
(const void*)h_filter.release());
unsigned border_len = filter_len / 2 + 1;

Array<T> ix = createEmptyArray<T>(idims);
Expand Down
152 changes: 60 additions & 92 deletions src/backend/cpu/kernel/sift_nonfree.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,9 @@ unsigned sift_impl(Array<float>& x, Array<float>& y, Array<float>& score,
const float img_scale, const float feature_ratio,
const bool compute_GLOH)
{
using std::vector;
using std::unique_ptr;
using std::function;
in.eval();
getQueue().sync();
af::dim4 idims = in.dims();
Expand All @@ -983,13 +986,13 @@ unsigned sift_impl(Array<float>& x, Array<float>& y, Array<float>& score,

std::vector< Array<T> > dog_pyr = buildDoGPyr<T>(gauss_pyr, n_octaves, n_layers);

std::vector<float*> x_pyr(n_octaves, NULL);
std::vector<float*> y_pyr(n_octaves, NULL);
std::vector<float*> response_pyr(n_octaves, NULL);
std::vector<float*> size_pyr(n_octaves, NULL);
std::vector<float*> ori_pyr(n_octaves, NULL);
std::vector<float*> desc_pyr(n_octaves, NULL);
std::vector<unsigned> feat_pyr(n_octaves, 0);
vector<uptr<float>> x_pyr(n_octaves);
vector<uptr<float>> y_pyr(n_octaves);
vector<uptr<float>> response_pyr(n_octaves);
vector<uptr<float>> size_pyr(n_octaves);
vector<uptr<float>> ori_pyr(n_octaves);
vector<uptr<float>> desc_pyr(n_octaves);
vector<unsigned> feat_pyr(n_octaves, 0);
unsigned total_feat = 0;

const unsigned d = DescrWidth;
Expand All @@ -1008,9 +1011,9 @@ unsigned sift_impl(Array<float>& x, Array<float>& y, Array<float>& score,
const unsigned imel = ddims[0] * ddims[1];
const unsigned max_feat = ceil(imel * feature_ratio);

float* extrema_x = memAlloc<float>(max_feat);
float* extrema_y = memAlloc<float>(max_feat);
unsigned* extrema_layer = memAlloc<unsigned>(max_feat);
auto extrema_x = memAlloc<float>(max_feat);
auto extrema_y = memAlloc<float>(max_feat);
auto extrema_layer = memAlloc<unsigned>(max_feat);
unsigned extrema_feat = 0;

for (unsigned j = 1; j <= n_layers; j++) {
Expand All @@ -1021,130 +1024,103 @@ unsigned sift_impl(Array<float>& x, Array<float>& y, Array<float>& score,
unsigned layer = j;

float extrema_thr = 0.5f * contrast_thr / n_layers;
detectExtrema<T>(extrema_x, extrema_y, extrema_layer, &extrema_feat,
detectExtrema<T>(extrema_x.get(), extrema_y.get(), extrema_layer.get(), &extrema_feat,
dog_pyr[prev], dog_pyr[center], dog_pyr[next],
layer, max_feat, extrema_thr);
}

extrema_feat = min(extrema_feat, max_feat);

if (extrema_feat == 0) {
memFree(extrema_x);
memFree(extrema_y);
memFree(extrema_layer);

continue;
}

unsigned interp_feat = 0;

float* interp_x = memAlloc<float>(extrema_feat);
float* interp_y = memAlloc<float>(extrema_feat);
unsigned* interp_layer = memAlloc<unsigned>(extrema_feat);
float* interp_response = memAlloc<float>(extrema_feat);
float* interp_size = memAlloc<float>(extrema_feat);
auto interp_x = memAlloc<float>(extrema_feat);
auto interp_y = memAlloc<float>(extrema_feat);
auto interp_layer = memAlloc<unsigned>(extrema_feat);
auto interp_response = memAlloc<float>(extrema_feat);
auto interp_size = memAlloc<float>(extrema_feat);

interpolateExtrema<T>(interp_x, interp_y, interp_layer,
interp_response, interp_size, &interp_feat,
extrema_x, extrema_y, extrema_layer, extrema_feat,
interpolateExtrema<T>(interp_x.get(), interp_y.get(), interp_layer.get(),
interp_response.get(), interp_size.get(), &interp_feat,
extrema_x.get(), extrema_y.get(), extrema_layer.get(), extrema_feat,
dog_pyr, max_feat, i, n_layers,
contrast_thr, edge_thr, init_sigma, img_scale);

interp_feat = min(interp_feat, max_feat);

if (interp_feat == 0) {
memFree(interp_x);
memFree(interp_y);
memFree(interp_layer);
memFree(interp_response);
memFree(interp_size);

continue;
}

std::vector<feat_t> sorted_feat;
array_to_feat(sorted_feat, interp_x, interp_y, interp_layer, interp_response, interp_size, interp_feat);
array_to_feat(sorted_feat, interp_x.get(), interp_y.get(), interp_layer.get(),
interp_response.get(), interp_size.get(), interp_feat);
std::stable_sort(sorted_feat.begin(), sorted_feat.end(), feat_cmp);

memFree(interp_x);
memFree(interp_y);
memFree(interp_layer);
memFree(interp_response);
memFree(interp_size);

unsigned nodup_feat = 0;

float* nodup_x = memAlloc<float>(interp_feat);
float* nodup_y = memAlloc<float>(interp_feat);
unsigned* nodup_layer = memAlloc<unsigned>(interp_feat);
float* nodup_response = memAlloc<float>(interp_feat);
float* nodup_size = memAlloc<float>(interp_feat);
auto nodup_x = memAlloc<float>(interp_feat);
auto nodup_y = memAlloc<float>(interp_feat);
auto nodup_layer = memAlloc<unsigned>(interp_feat);
auto nodup_response = memAlloc<float>(interp_feat);
auto nodup_size = memAlloc<float>(interp_feat);

removeDuplicates(nodup_x, nodup_y, nodup_layer,
nodup_response, nodup_size, &nodup_feat,
removeDuplicates(nodup_x.get(), nodup_y.get(), nodup_layer.get(),
nodup_response.get(), nodup_size.get(), &nodup_feat,
sorted_feat);

const unsigned max_oriented_feat = nodup_feat * 3;

float* oriented_x = memAlloc<float>(max_oriented_feat);
float* oriented_y = memAlloc<float>(max_oriented_feat);
unsigned* oriented_layer = memAlloc<unsigned>(max_oriented_feat);
float* oriented_response = memAlloc<float>(max_oriented_feat);
float* oriented_size = memAlloc<float>(max_oriented_feat);
float* oriented_ori = memAlloc<float>(max_oriented_feat);
auto oriented_x = memAlloc<float>(max_oriented_feat);
auto oriented_y = memAlloc<float>(max_oriented_feat);
auto oriented_layer = memAlloc<unsigned>(max_oriented_feat);
auto oriented_response = memAlloc<float>(max_oriented_feat);
auto oriented_size = memAlloc<float>(max_oriented_feat);
auto oriented_ori = memAlloc<float>(max_oriented_feat);

unsigned oriented_feat = 0;

calcOrientation<T>(oriented_x, oriented_y, oriented_layer,
oriented_response, oriented_size, oriented_ori, &oriented_feat,
nodup_x, nodup_y, nodup_layer,
nodup_response, nodup_size, nodup_feat,
calcOrientation<T>(oriented_x.get(), oriented_y.get(), oriented_layer.get(),
oriented_response.get(), oriented_size.get(), oriented_ori.get(), &oriented_feat,
nodup_x.get(), nodup_y.get(), nodup_layer.get(),
nodup_response.get(), nodup_size.get(), nodup_feat,
gauss_pyr, max_oriented_feat, i, n_layers, double_input);

memFree(nodup_x);
memFree(nodup_y);
memFree(nodup_layer);
memFree(nodup_response);
memFree(nodup_size);

if (oriented_feat == 0) {
memFree(oriented_x);
memFree(oriented_y);
memFree(oriented_layer);
memFree(oriented_response);
memFree(oriented_size);
memFree(oriented_ori);

continue;
}

float* desc = memAlloc<float>(oriented_feat * desc_len);
auto desc = memAlloc<float>(oriented_feat * desc_len);

float scale = 1.f/(1 << i);
if (double_input) scale *= 2.f;

if (compute_GLOH)
computeGLOHDescriptor<T>(desc, desc_len,
oriented_x, oriented_y, oriented_layer,
oriented_response, oriented_size, oriented_ori,
computeGLOHDescriptor<T>(desc.get(), desc_len,
oriented_x.get(), oriented_y.get(), oriented_layer.get(),
oriented_response.get(), oriented_size.get(), oriented_ori.get(),
oriented_feat, gauss_pyr, d, rb, ab, hb,
scale, i, n_layers);
else
computeDescriptor<T>(desc, desc_len,
oriented_x, oriented_y, oriented_layer,
oriented_response, oriented_size, oriented_ori,
computeDescriptor<T>(desc.get(), desc_len,
oriented_x.get(), oriented_y.get(), oriented_layer.get(),
oriented_response.get(), oriented_size.get(), oriented_ori.get(),
oriented_feat, gauss_pyr, d, n, scale, i, n_layers);

total_feat += oriented_feat;
feat_pyr[i] = oriented_feat;

if (oriented_feat > 0) {
x_pyr[i] = oriented_x;
y_pyr[i] = oriented_y;
response_pyr[i] = oriented_response;
ori_pyr[i] = oriented_ori;
size_pyr[i] = oriented_size;
desc_pyr[i] = desc;
x_pyr[i] = std::move(oriented_x);
y_pyr[i] = std::move(oriented_y);
response_pyr[i] = std::move(oriented_response);
ori_pyr[i] = std::move(oriented_ori);
size_pyr[i] = std::move(oriented_size);
desc_pyr[i] = std::move(desc);
}
}

Expand Down Expand Up @@ -1172,21 +1148,13 @@ unsigned sift_impl(Array<float>& x, Array<float>& y, Array<float>& score,
if (feat_pyr[i] == 0)
continue;

memcpy(x_ptr+offset, x_pyr[i], feat_pyr[i] * sizeof(float));
memcpy(y_ptr+offset, y_pyr[i], feat_pyr[i] * sizeof(float));
memcpy(score_ptr+offset, response_pyr[i], feat_pyr[i] * sizeof(float));
memcpy(ori_ptr+offset, ori_pyr[i], feat_pyr[i] * sizeof(float));
memcpy(size_ptr+offset, size_pyr[i], feat_pyr[i] * sizeof(float));

memcpy(desc_ptr+(offset*desc_len), desc_pyr[i], feat_pyr[i] * desc_len * sizeof(float));

memFree(x_pyr[i]);
memFree(y_pyr[i]);
memFree(response_pyr[i]);
memFree(ori_pyr[i]);
memFree(size_pyr[i]);
memFree(desc_pyr[i]);
memcpy(x_ptr+offset, x_pyr[i].get(), feat_pyr[i] * sizeof(float));
memcpy(y_ptr+offset, y_pyr[i].get(), feat_pyr[i] * sizeof(float));
memcpy(score_ptr+offset, response_pyr[i].get(), feat_pyr[i] * sizeof(float));
memcpy(ori_ptr+offset, ori_pyr[i].get(), feat_pyr[i] * sizeof(float));
memcpy(size_ptr+offset, size_pyr[i].get(), feat_pyr[i] * sizeof(float));

memcpy(desc_ptr+(offset*desc_len), desc_pyr[i].get(), feat_pyr[i] * desc_len * sizeof(float));
offset += feat_pyr[i];
}
}
Expand Down
21 changes: 13 additions & 8 deletions src/backend/cpu/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
#define AF_CPU_MEM_DEBUG 0
#endif

using std::unique_ptr;
using std::function;

namespace cpu
{
void setMemStepSize(size_t step_bytes)
Expand Down Expand Up @@ -54,11 +57,13 @@ void printMemInfo(const char *msg, const int device)
}

template<typename T>
T* memAlloc(const size_t &elements)
unique_ptr<T[], function<void(T *)>>
memAlloc(const size_t &elements)
{
T *ptr = nullptr;

ptr = (T *)memoryManager().alloc(elements * sizeof(T), false);
return ptr;
return unique_ptr<T[], function<void(T *)>>(ptr, memFree<T>);
}

void* memAllocUser(const size_t &bytes)
Expand Down Expand Up @@ -118,12 +123,12 @@ bool checkMemoryLimit()
return memoryManager().checkMemoryLimit();
}

#define INSTANTIATE(T) \
template T* memAlloc(const size_t &elements); \
template void memFree(T* ptr); \
template T* pinnedAlloc(const size_t &elements); \
template void pinnedFree(T* ptr); \

#define INSTANTIATE(T) \
template std::unique_ptr<T[], std::function<void(T *)>> memAlloc(const size_t &elements); \
template void memFree(T* ptr); \
template T* pinnedAlloc(const size_t &elements); \
template void pinnedFree(T* ptr); \
INSTANTIATE(float)
INSTANTIATE(cfloat)
INSTANTIATE(double)
Expand Down
7 changes: 6 additions & 1 deletion src/backend/cpu/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@

#include <af/defines.h>
#include <common/MemoryManager.hpp>
#include <memory>

namespace cpu
{
template<typename T> T* memAlloc(const size_t &elements);

template<typename T>
using uptr = std::unique_ptr<T[], std::function<void(T[])>>;

template<typename T> std::unique_ptr<T[], std::function<void(T *)>> memAlloc(const size_t &elements);
void *memAllocUser(const size_t &bytes);

// Need these as 2 separate function and not a default argument
Expand Down
Loading