Skip to content

KNN-regressor n-d output #214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Feb 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 30 additions & 28 deletions include/algorithms/public/KNNRegressor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,46 +29,48 @@ class KNNRegressor
public:
using DataSet = FluidDataSet<std::string, double, 1>;

double predict(KDTree const& tree, DataSet const& targets,
RealVectorView point, index k, bool weighted,
void predict(KDTree const& tree, DataSet const& targets,
RealVectorView input, RealVectorView output,
index k, bool weighted,
Allocator& alloc = FluidDefaultAllocator()) const
{
using namespace std;
double prediction = 0;
auto [distances, ids] = tree.kNearest(point, k, 0, alloc);
double uniformWeight = 1.0 / k;
rt::vector<double> weights(asUnsigned(k), weighted ? 0 : uniformWeight,
alloc);
double sum = 0;
using _impl::asEigen;
using Eigen::Array;

auto [distances, ids] = tree.kNearest(input, k, 0, alloc);

ScopedEigenMap<Eigen::VectorXd> weights(k, alloc);
weights.setConstant(weighted ? 0 : (1.0 / k));

if (weighted)
{
bool binaryWeights = false;
for (size_t i = 0; i < asUnsigned(k); i++)
auto distanceArray =
Eigen::Map<Eigen::ArrayXd>(distances.data(), distances.size());

if ((distanceArray < epsilon).any())
{
if (distances[i] < epsilon)
{
binaryWeights = true;
weights[i] = 1;
}
else
sum += (1.0 / distances[i]);
weights = (distanceArray < epsilon).select(1.0, weights);
}
if (!binaryWeights)
else
{
for (size_t i = 0; i < asUnsigned(k); i++)
{
weights[i] = (1.0 / distances[i]) / sum;
}
double sum = (1.0 / distanceArray).sum();
weights = (1.0 / distanceArray) / sum;
}
}

output.fill(0);

for (size_t i = 0; i < asUnsigned(k); i++)
{
auto point = targets.get(*ids[i]);
prediction += (weights[i] * point(0));
}
return prediction;
rt::vector<index> indices(ids.size(), alloc);

transform(ids.cbegin(), ids.cend(), indices.begin(),
[&targets](const string* id) { return targets.getIndex(*id); });

auto targetPoints = asEigen<Array>(targets.getData())(indices, Eigen::all);

asEigen<Array>(output) = (targetPoints.colwise() * weights.array()).colwise().sum().transpose();
}
};
} // namespace algorithm
} // namespace fluid

57 changes: 32 additions & 25 deletions include/clients/nrt/KNNRegressorClient.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,23 +119,29 @@ class KNNRegressorClient : public FluidBaseClient,
return {};
}

MessageResult<double> predictPoint(InputBufferPtr data) const
MessageResult<void> predictPoint(InputBufferPtr in, BufferPtr out) const
{
index k = get<kNumNeighbors>();
bool weight = get<kWeight>() != 0;
if (k == 0) return Error<double>(SmallK);
if (mAlgorithm.tree.size() == 0) return Error<double>(NoDataFitted);
if (mAlgorithm.tree.size() < k) return Error<double>(NotEnoughData);
if (k == 0) return Error(SmallK);
if (mAlgorithm.tree.size() == 0) return Error(NoDataFitted);
if (mAlgorithm.tree.size() < k) return Error(NotEnoughData);

InBufferCheck bufCheck(mAlgorithm.tree.dims());
if (!bufCheck.checkInputs(data.get()))
return Error<double>(bufCheck.error());
if (!bufCheck.checkInputs(in.get()))
return Error(bufCheck.error());
BufferAdaptor::ReadAccess inBuf(in.get());
BufferAdaptor::Access outBuf(out.get());
if (!outBuf.exists()) return Error(InvalidBuffer);
Result resizeResult = outBuf.resize(mAlgorithm.target.dims(), 1, inBuf.sampleRate());
if (!resizeResult.ok()) return Error(BufferAlloc);
algorithm::KNNRegressor regressor;
RealVector point(mAlgorithm.tree.dims());
point <<= BufferAdaptor::ReadAccess(data.get())
.samps(0, mAlgorithm.tree.dims(), 0);
double result =
regressor.predict(mAlgorithm.tree, mAlgorithm.target, point, k, weight);
return result;
RealVector input(mAlgorithm.tree.dims());
RealVector output(mAlgorithm.target.dims());
input <<= inBuf.samps(0, mAlgorithm.tree.dims(), 0);
regressor.predict(mAlgorithm.tree, mAlgorithm.target, input, output, k, weight);
outBuf.samps(0) <<= output;
return OK();
}

MessageResult<void> predict(InputDataSetClientRef source,
Expand All @@ -158,12 +164,12 @@ class KNNRegressorClient : public FluidBaseClient,
algorithm::KNNRegressor regressor;
auto ids = dataSet.getIds();
auto data = dataSet.getData();
DataSet result(1);
DataSet result(mAlgorithm.target.dims());
RealVector prediction(mAlgorithm.target.dims());
for (index i = 0; i < dataSet.size(); i++)
{
RealVectorView point = data.row(i);
RealVector prediction = {regressor.predict(
mAlgorithm.tree, mAlgorithm.target, point, k, weight)};
regressor.predict(mAlgorithm.tree, mAlgorithm.target, point, prediction, k, weight);
result.add(ids(i), prediction);
}
destPtr->setDataSet(result);
Expand Down Expand Up @@ -226,11 +232,11 @@ class KNNRegressorQuery : public FluidBaseClient, ControlIn, ControlOut


template <typename T>
void process(std::vector<FluidTensorView<T, 1>>& input,
std::vector<FluidTensorView<T, 1>>& output, FluidContext&)
void process(std::vector<FluidTensorView<T, 1>>& in,
std::vector<FluidTensorView<T, 1>>& out, FluidContext& c)
{
output[0] <<= input[0];
if (input[0](0) > 0)
out[0] <<= in[0];
if (in[0](0) > 0)
{
auto knnPtr = get<kModel>().get().lock();
if (!knnPtr)
Expand All @@ -248,17 +254,18 @@ class KNNRegressorQuery : public FluidBaseClient, ControlIn, ControlOut
get<kOutputBuffer>().get()))
return;
auto outBuf = BufferAdaptor::Access(get<kOutputBuffer>().get());
if (outBuf.samps(0).size() != 1) return;
if (outBuf.samps(0).size() != algorithm.target.dims()) return;

algorithm::KNNRegressor regressor;

RealVector point(algorithm.tree.dims());
point <<= BufferAdaptor::ReadAccess(get<kInputBuffer>().get())
RealVector input(algorithm.tree.dims(), c.allocator());
RealVector output(algorithm.target.dims(), c.allocator());

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These, on the other hand, probably would benefit from being allocated elsewhere, and using an allocator

input <<= BufferAdaptor::ReadAccess(get<kInputBuffer>().get())
.samps(0, algorithm.tree.dims(), 0);

double result =
regressor.predict(algorithm.tree, algorithm.target, point, k, weight);
outBuf.samps(0)[0] = result;
regressor.predict(algorithm.tree, algorithm.target, input, output, k, weight, c.allocator());
outBuf.samps(0) <<= output;
}
}

Expand Down