Skip to content

Commit

Permalink
Fixing format (#560)
Browse files Browse the repository at this point in the history
* fixing format

* Adding other unformatted files
  • Loading branch information
tanmayv25 authored and GuanLuo committed Aug 16, 2019
1 parent 664625e commit 4d5a55c
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 33 deletions.
28 changes: 16 additions & 12 deletions qa/common/busy_op_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,43 +26,47 @@

#include <time.h>

#include "tensorflow/core/framework/device_base.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/framework/device_base.h"

using namespace tensorflow; // NOLINT(build/namespaces)

REGISTER_OP("BusyLoop")
.Input("input: int32")
.Output("output: int32")
.Doc(R"doc(
REGISTER_OP("BusyLoop").Input("input: int32").Output("output: int32").Doc(R"doc(
Busy waits for input number of clock cycles
)doc");

void BusyLoopKernelLauncher(const Eigen::GpuDevice& device, const int* num_delay_cycles, int* out);
void BusyLoopKernelLauncher(
const Eigen::GpuDevice& device, const int* num_delay_cycles, int* out);

class BusyLoopOp : public OpKernel {
public:
explicit BusyLoopOp(OpKernelConstruction* context) : OpKernel(context) {}

void Compute(OpKernelContext* context) override {
void Compute(OpKernelContext* context) override
{
// Grab the input
const Tensor& input_tensor = context->input(0);
auto num_delay_cycles = input_tensor.flat<int32>();

// Create dummy output
Tensor* output_tensor = nullptr;
OP_REQUIRES_OK(context, context->allocate_output(0, input_tensor.shape(),
&output_tensor));
OP_REQUIRES_OK(
context,
context->allocate_output(0, input_tensor.shape(), &output_tensor));
auto output = output_tensor->template flat<int32>();

// Verify input dimension
OP_REQUIRES(context, TensorShapeUtils::IsVector(input_tensor.shape()),
errors::InvalidArgument("BusyLoop expects a single value as a 1-D Vector"));
OP_REQUIRES(
context, TensorShapeUtils::IsVector(input_tensor.shape()),
errors::InvalidArgument(
"BusyLoop expects a single value as a 1-D Vector"));

// Call the cuda kernel launcher
BusyLoopKernelLauncher(context->eigen_device<Eigen::GpuDevice>(), num_delay_cycles.data(), output.data());
BusyLoopKernelLauncher(
context->eigen_device<Eigen::GpuDevice>(), num_delay_cycles.data(),
output.data());
}
};

Expand Down
36 changes: 21 additions & 15 deletions qa/common/busy_op_kernel.cu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,30 +27,36 @@
#if GOOGLE_CUDA
#define EIGEN_USE_GPU

#include <time.h>
#include <cuda_runtime.h>
#include <time.h>

#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"

__device__ long store_now[1];

__global__ void BusyLoopKernel(const int* num_delay_cycles, int* out) {
// As shown in https://stackoverflow.com/questions/11217117/equivalent-of-usleep-in-cuda-kernel
clock_t start = clock();
for(;;) {
clock_t now = clock();
// Adjust for overflow
clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
if (cycles >= num_delay_cycles[0]) {
break;
}
// Prevent nvcc optimizations
store_now[0] = cycles;
__global__ void
BusyLoopKernel(const int* num_delay_cycles, int* out)
{
// As shown in
// https://stackoverflow.com/questions/11217117/equivalent-of-usleep-in-cuda-kernel
clock_t start = clock();

for (;;) {
clock_t now = clock();
// Adjust for overflow
clock_t cycles = now > start ? now - start : now + (0xffffffff - start);
if (cycles >= num_delay_cycles[0]) {
break;
}
// Prevent nvcc optimizations
store_now[0] = cycles;
}
}

void BusyLoopKernelLauncher(const Eigen::GpuDevice& device, const int* num_delay_cycles, int* out) {
void
BusyLoopKernelLauncher(
const Eigen::GpuDevice& device, const int* num_delay_cycles, int* out)
{
auto stream = device.stream();
BusyLoopKernel<<<1, 256, 0, stream>>>(num_delay_cycles, out);
}
Expand Down
3 changes: 2 additions & 1 deletion src/clients/c++/inference_profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,8 @@ class InferenceProfiler {
std::unique_ptr<LoadManager> manager);

/// A helper function to construct the map of ensemble models to its composing
/// models \param model_name The ensemble model to be added into the map
/// models.
/// \param model_name The ensemble model to be added into the map
/// \param model_version The version of the model to be added
/// \server_status The server status response from TRTIS.
/// \return Error object indicating success or failure
Expand Down
10 changes: 5 additions & 5 deletions src/core/ensemble_scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -281,11 +281,11 @@ EnsembleContext::ResponseAlloc(
<< memory_type << " for " << tensor_name;
return nullptr; // Success
} else {
auto it = tensor_data_map
->emplace(
tensor_name,
std::make_shared<AllocatedSystemMemory>(byte_size))
.first;
auto it =
tensor_data_map
->emplace(
tensor_name, std::make_shared<AllocatedSystemMemory>(byte_size))
.first;
if (byte_size > 0) {
*buffer = static_cast<void*>(it->second->MutableBuffer());
}
Expand Down

0 comments on commit 4d5a55c

Please sign in to comment.