Skip to content

Commit

Permalink
Fixing pthreadpool symbol conflict issue. (pytorch#33869)
Browse files Browse the repository at this point in the history
Summary:
Mainly renaming pthread_create of C2, the only one referred internally in NNPACK, that
is conflicting, to pthread_create_c2.
Removed 2 other conflicting symbols that are not used internally at all.
Pointing XNNPACK to original repo instead of the fork.

Copy pasted the new interface and implementation to
caff2/utils/threadpool, so that for internal builds we compile against
this.

When threadpool is unified this will be removed.
Pull Request resolved: pytorch#33869

Differential Revision: D20140580

Pulled By: kimishpatel

fbshipit-source-id: de70df0af9c7d6bc065e85ede0e1c4dd6a9e6be3
  • Loading branch information
kimishpatel authored and facebook-github-bot committed Feb 29, 2020
1 parent 85b1c45 commit 0e52627
Show file tree
Hide file tree
Showing 13 changed files with 1,517 additions and 6 deletions.
4 changes: 2 additions & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,6 @@
path = android/libs/fbjni
url = https://github.com/facebookincubator/fbjni.git
[submodule "third_party/XNNPACK"]
ignore = dirty
path = third_party/XNNPACK
url = https://github.com/AshkanAliabadi/XNNPACK.git
branch = xnnpack_pytorch_merge_temp
url = https://github.com/google/XNNPACK.git
18 changes: 18 additions & 0 deletions caffe2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,21 @@ endif()
# Note: the folders that are being commented out have not been properly
# addressed yet.

# For pthreadpool_new_if_impl. TODO: Remove when threadpools are unitied.
if (NOT MSVC)
IF(NOT TARGET fxdiv)
SET(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
SET(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
ADD_SUBDIRECTORY(
"${FXDIV_SOURCE_DIR}"
"${CMAKE_BINARY_DIR}/FXdiv")
ENDIF()
if (NOT (INTERN_BUILD_MOBILE AND NOT BUILD_CAFFE2_MOBILE))
set_source_files_properties(
utils/threadpool/pthreadpool_new_if_impl.c PROPERTIES COMPILE_FLAGS -fno-openmp)
endif()
endif()

add_subdirectory(core)
add_subdirectory(serialize)
add_subdirectory(utils)
Expand Down Expand Up @@ -677,6 +692,9 @@ ELSEIF(USE_CUDA)
ENDIF()


if (NOT MSVC)
TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
endif()

# ==========================================================
# formerly-libtorch flags
Expand Down
8 changes: 8 additions & 0 deletions caffe2/utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
# TODO: Add ThreadPoolXNNPACK.cc when XNNPACK integration is updated
# to pass the actual threadpool ptr instead of nullptr.
if (INTERN_BUILD_MOBILE AND NOT BUILD_CAFFE2_MOBILE)
add_definitions(-DUSE_INTERNAL_THREADPOOL_IMPL)
list(APPEND Caffe2_CPU_SRCS
utils/string_utils.cc
utils/threadpool/pthreadpool.cc
utils/threadpool/pthreadpool_impl.cc
utils/threadpool/pthreadpool_new_if_impl.c
utils/threadpool/ThreadPool.cc
utils/threadpool/ThreadPoolMobile.cc
utils/threadpool/ThreadPoolXNNPACK.cc
)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS} PARENT_SCOPE)
return()
Expand Down Expand Up @@ -32,10 +37,13 @@ list(APPEND Caffe2_CPU_SRCS
# pthreadpool with a very similar interface. Neither NNPACK, nor this
# thread pool supports Windows.
if (NOT MSVC)
add_definitions(-DUSE_INTERNAL_THREADPOOL_IMPL)
set(Caffe2_CPU_SRCS ${Caffe2_CPU_SRCS}
utils/threadpool/pthreadpool.cc
utils/threadpool/pthreadpool_impl.cc
utils/threadpool/pthreadpool_new_if_impl.c
utils/threadpool/ThreadPoolMobile.cc
utils/threadpool/ThreadPoolXNNPACK.cc
)
endif()

Expand Down
2 changes: 0 additions & 2 deletions caffe2/utils/threadpool/ThreadPool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ C10_DEFINE_int(caffe2_threadpool_ios_cap, true, "");

namespace caffe2 {

namespace {
size_t getDefaultNumThreads() {
CAFFE_ENFORCE(cpuinfo_initialize(), "cpuinfo initialization failed");
int numThreads = cpuinfo_get_processors_count();
Expand Down Expand Up @@ -72,7 +71,6 @@ size_t getDefaultNumThreads() {
}
return numThreads;
}
} // namespace

// Default smallest amount of work that will be partitioned between
// multiple threads; the runtime value is configurable
Expand Down
1 change: 1 addition & 0 deletions caffe2/utils/threadpool/ThreadPoolMobile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@ caffe2::ThreadPool* mobile_threadpool() {
pthreadpool_t mobile_pthreadpool() {
return reinterpret_cast<pthreadpool_t>(mobile_threadpool());
}

} // namespace caffe2
1 change: 1 addition & 0 deletions caffe2/utils/threadpool/ThreadPoolMobile.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ ThreadPool* mobile_threadpool();
// "at::parallel_for" primitive to replace pthreadpool_compute_1d for Pytorch;
pthreadpool_t mobile_pthreadpool();

size_t getDefaultNumThreads();
} // namespace caffe2
22 changes: 22 additions & 0 deletions caffe2/utils/threadpool/ThreadPoolXNNPACK.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#include <caffe2/utils/threadpool/pthreadpool.h>
#include <caffe2/utils/threadpool/ThreadPoolMobile.h>
#include <caffe2/utils/threadpool/ThreadPoolXNNPACK.h>
#include <memory>

namespace caffe2 {

// Will be unified.
pthreadpool_t xnnpack_threadpool() {
// Depending on internal implemenation vs. OSS we will link against pthreadpool_create_xnnpack
// or pthreadpool_create. This is only temporary. It will be unified soon.
#ifdef USE_INTERNAL_THREADPOOL_IMPL
static std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy_xnnpack)>
threadpool(pthreadpool_create_xnnpack(getDefaultNumThreads()), pthreadpool_destroy_xnnpack);
#else
static std::unique_ptr<pthreadpool, decltype(&pthreadpool_destroy)>
threadpool(pthreadpool_create(getDefaultNumThreads()), pthreadpool_destroy);
#endif
return threadpool.get();
}

} // namespace caffe2
7 changes: 7 additions & 0 deletions caffe2/utils/threadpool/ThreadPoolXNNPACK.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#pragma once
// Creating a separate .h/.cc file for creating threadpool for XNNPACK
// to avoid touching existing internal builds.
// When we unify threadpools this should all go away.
namespace caffe2 {
pthreadpool_t xnnpack_threadpool();
} // namespace caffe2
167 changes: 166 additions & 1 deletion caffe2/utils/threadpool/pthreadpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,48 @@ typedef void (*pthreadpool_function_4d_tiled_t)(
size_t,
size_t);

#include <stdint.h> // for uint32_t

typedef void (*pthreadpool_task_1d_t)(void*, size_t);
typedef void (*pthreadpool_task_1d_tile_1d_t)(void*, size_t, size_t);
typedef void (*pthreadpool_task_2d_t)(void*, size_t, size_t);
typedef void (*pthreadpool_task_2d_tile_1d_t)(void*, size_t, size_t, size_t);
typedef void (*pthreadpool_task_2d_tile_2d_t)(void*, size_t, size_t, size_t, size_t);
typedef void (*pthreadpool_task_3d_tile_2d_t)(
void*,
size_t,
size_t,
size_t,
size_t,
size_t);
typedef void (*pthreadpool_task_4d_tile_2d_t)(
void*,
size_t,
size_t,
size_t,
size_t,
size_t,
size_t);
typedef void (*pthreadpool_task_5d_tile_2d_t)(
void*,
size_t,
size_t,
size_t,
size_t,
size_t,
size_t,
size_t);
typedef void (*pthreadpool_task_6d_tile_2d_t)(
void*,
size_t,
size_t,
size_t,
size_t,
size_t,
size_t,
size_t,
size_t);

#ifdef __cplusplus
extern "C" {
#endif
Expand All @@ -47,6 +89,8 @@ extern "C" {
* @returns A pointer to an opaque thread pool object.
* On error the function returns NULL and sets errno accordingly.
*/

//Returns internal threadpool impl.
pthreadpool_t pthreadpool_create(size_t threads_count);

/**
Expand All @@ -58,7 +102,6 @@ pthreadpool_t pthreadpool_create(size_t threads_count);
*/
size_t pthreadpool_get_threads_count(pthreadpool_t threadpool);


/**
* Processes items in parallel using threads from a thread pool.
*
Expand Down Expand Up @@ -137,6 +180,128 @@ void pthreadpool_compute_4d_tiled(
*/
void pthreadpool_destroy(pthreadpool_t threadpool);

// New interface copy/pasted from pthreadpool.
// We will merge the internal and third-party/pthreadpool eventually.
// For now copy-paste to get past build issues.

#define PTHREADPOOL_FLAG_DISABLE_DENORMALS 0x00000001

// Returns the copied threadpool impl of third-party/pthreadpool
pthreadpool_t pthreadpool_create_xnnpack(size_t threads_count);

// Copied third-party impl.
size_t pthreadpool_get_threads_count_xnnpack(pthreadpool_t threadpool);

// Copied third-party impl.
void pthreadpool_destroy_xnnpack(pthreadpool_t threadpool);

/**
* Processes items in parallel using threads from a thread pool.
*
* When the call returns, all items have been processed and the thread pool is
* ready for a new task.
*
* @note If multiple threads call this function with the same thread pool, the
* calls are serialized.
*
* @param[in] threadpool The thread pool to use for parallelisation.
* @param[in] function The function to call for each item.
* @param[in] argument The first argument passed to the @a function.
* @param[in] items The number of items to process. The @a function
* will be called once for each item.
*/
void pthreadpool_parallelize_1d(
pthreadpool_t threadpool,
pthreadpool_task_1d_t function,
void* argument,
size_t range,
uint32_t flags);

void pthreadpool_parallelize_1d_tile_1d(
pthreadpool_t threadpool,
pthreadpool_task_1d_tile_1d_t function,
void* argument,
size_t range,
size_t tile,
uint32_t flags);

void pthreadpool_parallelize_2d(
pthreadpool_t threadpool,
pthreadpool_task_2d_t function,
void* argument,
size_t range_i,
size_t range_j,
uint32_t flags);

void pthreadpool_parallelize_2d_tile_1d(
pthreadpool_t threadpool,
pthreadpool_task_2d_tile_1d_t function,
void* argument,
size_t range_i,
size_t range_j,
size_t tile_j,
uint32_t flags);

void pthreadpool_parallelize_2d_tile_2d(
pthreadpool_t threadpool,
pthreadpool_task_2d_tile_2d_t function,
void* argument,
size_t range_i,
size_t range_j,
size_t tile_i,
size_t tile_j,
uint32_t flags);

void pthreadpool_parallelize_3d_tile_2d(
pthreadpool_t threadpool,
pthreadpool_task_3d_tile_2d_t function,
void* argument,
size_t range_i,
size_t range_j,
size_t range_k,
size_t tile_j,
size_t tile_k,
uint32_t flags);

void pthreadpool_parallelize_4d_tile_2d(
pthreadpool_t threadpool,
pthreadpool_task_4d_tile_2d_t function,
void* argument,
size_t range_i,
size_t range_j,
size_t range_k,
size_t range_l,
size_t tile_k,
size_t tile_l,
uint32_t flags);

void pthreadpool_parallelize_5d_tile_2d(
pthreadpool_t threadpool,
pthreadpool_task_5d_tile_2d_t function,
void* argument,
size_t range_i,
size_t range_j,
size_t range_k,
size_t range_l,
size_t range_m,
size_t tile_l,
size_t tile_m,
uint32_t flags);

void pthreadpool_parallelize_6d_tile_2d(
pthreadpool_t threadpool,
pthreadpool_task_6d_tile_2d_t function,
void* argument,
size_t range_i,
size_t range_j,
size_t range_k,
size_t range_l,
size_t range_m,
size_t range_n,
size_t tile_m,
size_t tile_n,
uint32_t flags);

#ifdef __cplusplus
} /* extern "C" */
#endif
Expand Down
Loading

0 comments on commit 0e52627

Please sign in to comment.