Skip to content

Commit

Permalink
Don't use compat libcuda on CPU-only systems (triton-inference-server…
Browse files Browse the repository at this point in the history
  • Loading branch information
deadeyegoodwin authored Aug 28, 2019
1 parent 110a760 commit 265fa41
Show file tree
Hide file tree
Showing 6 changed files with 9 additions and 19 deletions.
2 changes: 0 additions & 2 deletions nvidia_entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,7 @@ if [[ "$(find /usr -name libcuda.so.1 | grep -v "compat") " == " " || "$(ls /dev
echo "WARNING: The NVIDIA Driver was not detected. GPU functionality will not be available."
echo " Use 'nvidia-docker run' to start this container; see"
echo " https://github.com/NVIDIA/nvidia-docker/wiki/nvidia-docker ."
ln -s `find / -name libcuda.so.1 -print -quit` /opt/tensorrtserver/lib/libcuda.so.1
ln -s `find / -name libnvidia-ml.so -print -quit` /opt/tensorrtserver/lib/libnvidia-ml.so.1
ln -s `find / -name libnvidia-fatbinaryloader.so.${CUDA_DRIVER_VERSION} -print -quit` /opt/tensorrtserver/lib/libnvidia-fatbinaryloader.so.${CUDA_DRIVER_VERSION}
export TENSORRT_SERVER_CPU_ONLY=1
else
( /usr/local/bin/checkSMVER.sh )
Expand Down
2 changes: 1 addition & 1 deletion src/core/backend_context.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ BackendContext::CreateCudaStream(const int cuda_stream_priority)
auto cuerr = cudaGetDeviceCount(&device_cnt);
// Do nothing if there is no CUDA device since all data transfer will be done
// within CPU memory
if (cuerr != cudaErrorNoDevice) {
if ((cuerr != cudaErrorNoDevice) && (cuerr != cudaErrorInsufficientDriver)) {
if (cuerr == cudaSuccess) {
cuerr = cudaStreamCreateWithPriority(
&stream_, cudaStreamDefault, cuda_stream_priority);
Expand Down
4 changes: 2 additions & 2 deletions src/core/model_config_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -946,12 +946,12 @@ GetSupportedGPUs(std::set<int>& supported_gpus)

int device_cnt;
cudaError_t cuerr = cudaGetDeviceCount(&device_cnt);
if (cuerr == cudaErrorNoDevice) {
if ((cuerr == cudaErrorNoDevice) || (cuerr == cudaErrorInsufficientDriver)) {
device_cnt = 0;
} else if (cuerr != cudaSuccess) {
return Status(
RequestStatusCode::INTERNAL,
"unable to get number of CUDA devices" +
"unable to get number of CUDA devices: " +
std::string(cudaGetErrorString(cuerr)));
}

Expand Down
7 changes: 4 additions & 3 deletions src/core/model_repository_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -93,12 +93,13 @@ BuildBackendConfigMap(
#ifdef TRTIS_ENABLE_GPU
int device_cnt = 0;
cudaError_t cuerr = cudaGetDeviceCount(&device_cnt);
if (cuerr == cudaErrorNoDevice) {
if ((cuerr == cudaErrorNoDevice) ||
(cuerr == cudaErrorInsufficientDriver)) {
device_cnt = 0;
} else if (cuerr != cudaSuccess) {
LOG_ERROR << "unable to get number of CUDA devices while building "
"BackendConfigMap: "
<< cudaGetErrorString(cuerr);
"BackendConfigMap: ("
<< cuerr << ") " << cudaGetErrorString(cuerr);
device_cnt = 0;
}

Expand Down
4 changes: 2 additions & 2 deletions src/core/profile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ ProfileStartAll()
#ifdef TRTIS_ENABLE_GPU
int dcnt;
cudaError_t cuerr = cudaGetDeviceCount(&dcnt);
if (cuerr == cudaErrorNoDevice) {
if ((cuerr == cudaErrorNoDevice) || (cuerr == cudaErrorInsufficientDriver)) {
dcnt = 0;
} else if (cuerr != cudaSuccess) {
return Status(
Expand Down Expand Up @@ -76,7 +76,7 @@ ProfileStopAll()
#ifdef TRTIS_ENABLE_GPU
int dcnt;
cudaError_t cuerr = cudaGetDeviceCount(&dcnt);
if (cuerr == cudaErrorNoDevice) {
if ((cuerr == cudaErrorNoDevice) || (cuerr == cudaErrorInsufficientDriver)) {
dcnt = 0;
} else if (cuerr != cudaSuccess) {
return Status(
Expand Down
9 changes: 0 additions & 9 deletions src/core/server.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@
#include "src/core/model_config.pb.h"
#include "src/core/model_config_utils.h"
#include "src/core/model_repository_manager.h"
#include "src/core/profile.h"
#include "src/core/provider.h"
#include "src/core/server.h"
#include "src/core/server_status.pb.h"
Expand Down Expand Up @@ -116,14 +115,6 @@ InferenceServer::Init()
RequestStatusCode::INVALID_ARG, "--model-repository must be specified");
}

// Disable profiling at server start. Server API can be used to
// start/stop profiling.
status = ProfileStopAll();
if (!status.IsOk()) {
ready_state_ = ServerReadyState::SERVER_FAILED_TO_INITIALIZE;
return status;
}

// Create the shared memory manager that registers / unregisters and returns
// the shared memory regions that are current registered.
status =
Expand Down

0 comments on commit 265fa41

Please sign in to comment.