Skip to content
This repository was archived by the owner on Dec 28, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
build/
.vscode/
.vs/
.pytest_cache/
*.pyc
5 changes: 4 additions & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,8 @@
path = lib/spdlog
url = git@github.com:gabime/spdlog.git
[submodule "lib/msgpack-c"]
path = lib/msgpack-c
path = example/lib/msgpack-c
url = git@github.com:msgpack/msgpack-c.git
[submodule "example/lib/libzmq"]
path = example/lib/libzmq
url = git@github.com:Omegastick/libzmq.git
5 changes: 3 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@ set(CMAKE_CXX_STANDARD 17)
list(APPEND CPPCHECK_ARGS
--enable=warning
--std=c++14
--verbose
--force
--verbose
--quiet
--inline-suppr
--error-exitcode=1
--language=c++
--config-exclude=${CMAKE_CURRENT_LIST_DIR}/src/third_party
--config-exclude=${CMAKE_CURRENT_LIST_DIR}/lib
-i${CMAKE_CURRENT_LIST_DIR}/example/lib
--suppressions-list=${CMAKE_CURRENT_LIST_DIR}/CppCheckSuppressions.txt
-I ${CMAKE_CURRENT_LIST_DIR}/src
-I ${CMAKE_CURRENT_LIST_DIR}/include
Expand Down Expand Up @@ -54,7 +55,7 @@ endif(CPPRL_BUILD_TESTS)

# Enable all warnings
if(MSVC)
target_compile_options(cpprl PRIVATE /W4 /WX)
target_compile_options(cpprl PRIVATE /W0)
else(MSVC)
target_compile_options(cpprl PRIVATE -Wall -Wextra -pedantic)
endif(MSVC)
Expand Down
16 changes: 13 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,15 +44,25 @@ Note: The Gym server and client aren't very well optimized, especially when it c
CMake is used for the build system.
Most dependencies are included as submodules (run `git submodule update --init --recursive` to get them).
Libtorch has to be [installed seperately](https://pytorch.org/cppdocs/installing.html).
The OpenAI Gym client also uses [Zmpqpp](https://github.com/zeromq/zmqpp), which can be installed with `sudo apt-get install libzmqpp-dev`.

### Linux
```bash
cd pytorch-cpp-rl
mkdir build && cd build
cmake ..
make -j4
```

Windows build instructions coming soon.
### Windows
```
cd pytorch-cpp-rl
mkdir build && cd build
cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_PREFIX_PATH=C:/path/to/libtorch ..
cmake --build . --config Release
```
Before running, make sure to add `libtorch/lib` to your `PATH` environment variable.

Windows performance is about 75% that of Linux's at the moment. I'm looking into how to speed things up.

## Testing
You can run the tests with `build/cpprl_tests`.
You can run the tests with `build/cpprl_tests` (`build/Release/cpprl_tests.exe` on Windows).
30 changes: 30 additions & 0 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
version: '1.0.0-{build}'

clone_folder: C:\pytorch-cpp-rl

image:
- Visual Studio 2017

platform:
- x64

before_build:
# Load submodules
- sed -i 's/git@github.com:/https:\/\/github.com\//' .gitmodules
- git submodule update --init --recursive
# Install libtorch
- curl -fsS -o libtorch.zip https://download.pytorch.org/libtorch/cpu/libtorch-win-shared-with-deps-latest.zip
- 7z x libtorch.zip -y -oC:\
- set PATH=C:\libtorch\lib;%PATH%
# Run CMake
- mkdir build
- cd build
- cmake -G "Visual Studio 15 2017 Win64" -DCMAKE_PREFIX_PATH=C:\libtorch ..

build_script:
- cmake --build . --config Release

test_script:
- dir
- cd Release
- cpprl_tests.exe
15 changes: 13 additions & 2 deletions example/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,22 @@
add_executable(gym_client gym_client.cpp communicator.cpp)

set(LIB_DIR ${CMAKE_CURRENT_LIST_DIR}/lib)
set(CPPZMQ_DIR ${LIB_DIR}/cppzmq)
set(MSGPACK_DIR ${LIB_DIR}/msgpack-c)
set(ZMQ_DIR ${LIB_DIR}/libzmq)

# ZMQ
option(ZMQ_BUILD_TESTS "" OFF)
add_subdirectory(${ZMQ_DIR})

target_include_directories(gym_client
PRIVATE
.
../include
../lib/msgpack-c/include
../lib/spdlog/include
${CPPZMQ_DIR}
${MSGPACK_DIR}/include
${ZMQ_DIR}/include
)

target_link_libraries(gym_client PRIVATE zmq cpprl)
target_link_libraries(gym_client PRIVATE libzmq-static cpprl)
2 changes: 1 addition & 1 deletion example/communicator.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
#include <memory>
#include <string>

#include <zmq.hpp>
#include <spdlog/spdlog.h>

#include "communicator.h"
#include "requests.h"
#include "third_party/zmq.hpp"

namespace gym_client
{
Expand Down
2 changes: 1 addition & 1 deletion example/communicator.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
#include <string>

#include <msgpack.hpp>
#include <zmq.hpp>
#include <spdlog/spdlog.h>
#include <spdlog/fmt/bundled/ostream.h>

#include "requests.h"
#include "third_party/zmq.hpp"

namespace gym_client
{
Expand Down
2 changes: 1 addition & 1 deletion example/gym_client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ int main(int argc, char *argv[])
storage.get_masks()[step]);
}
auto actions_tensor = act_result[1].cpu();
long *actions_array = actions_tensor.data<long>();
int64_t *actions_array = actions_tensor.data<int64_t>();
std::vector<std::vector<int>> actions(num_envs);
for (int i = 0; i < num_envs; ++i)
{
Expand Down
1 change: 1 addition & 0 deletions example/lib/libzmq
Submodule libzmq added at 7d2631
4 changes: 2 additions & 2 deletions example/requests.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ struct StepParam
struct InfoResponse
{
std::string action_space_type;
std::vector<long> action_space_shape;
std::vector<int64_t> action_space_shape;
std::string observation_space_type;
std::vector<long> observation_space_shape;
std::vector<int64_t> observation_space_shape;
MSGPACK_DEFINE_MAP(action_space_type, action_space_shape,
observation_space_type, observation_space_shape);
};
Expand Down
6 changes: 3 additions & 3 deletions include/cpprl/distributions/categorical.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,12 @@ class Categorical : public Distribution
private:
torch::Tensor probs;
torch::Tensor logits;
std::vector<long> batch_shape;
std::vector<long> event_shape;
std::vector<int64_t> batch_shape;
std::vector<int64_t> event_shape;
torch::Tensor param;
int num_events;

std::vector<long> extended_shape(c10::ArrayRef<int64_t> sample_shape);
std::vector<int64_t> extended_shape(c10::ArrayRef<int64_t> sample_shape);

public:
Categorical(const torch::Tensor *probs, const torch::Tensor *logits);
Expand Down
2 changes: 1 addition & 1 deletion include/cpprl/spaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ namespace cpprl
struct ActionSpace
{
std::string type;
std::vector<long> shape;
std::vector<int64_t> shape;
};
}
10 changes: 5 additions & 5 deletions include/cpprl/storage.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,15 @@ class RolloutStorage
torch::Tensor observations, hidden_states, rewards, value_predictions,
returns, action_log_probs, actions, masks;
torch::Device device;
unsigned int num_steps;
unsigned int step;
int64_t num_steps;
int64_t step;

public:
RolloutStorage(unsigned int num_steps,
unsigned int num_processes,
RolloutStorage(int64_t num_steps,
int64_t num_processes,
c10::ArrayRef<int64_t> obs_shape,
ActionSpace action_space,
unsigned int hidden_state_size,
int64_t hidden_state_size,
torch::Device device);

void after_update();
Expand Down
22 changes: 11 additions & 11 deletions src/distributions/categorical.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,9 @@ torch::Tensor Categorical::entropy()
return -p_log_p.sum(-1);
}

std::vector<long> Categorical::extended_shape(c10::ArrayRef<int64_t> sample_shape)
std::vector<int64_t> Categorical::extended_shape(c10::ArrayRef<int64_t> sample_shape)
{
std::vector<long> output_shape;
std::vector<int64_t> output_shape;
output_shape.insert(output_shape.end(),
sample_shape.begin(),
sample_shape.end());
Expand All @@ -68,7 +68,7 @@ std::vector<long> Categorical::extended_shape(c10::ArrayRef<int64_t> sample_shap

torch::Tensor Categorical::log_prob(torch::Tensor value)
{
value = value.to(torch::ScalarType::Long).unsqueeze(-1);
value = value.to(torch::kLong).unsqueeze(-1);
auto broadcasted_tensors = torch::broadcast_tensors({value, logits});
value = broadcasted_tensors[0];
value = value.narrow(-1, 0, 1);
Expand All @@ -82,7 +82,7 @@ torch::Tensor Categorical::sample(c10::ArrayRef<int64_t> sample_shape)
param_shape.insert(param_shape.end(), {num_events});
auto exp_probs = probs.expand(param_shape);
torch::Tensor probs_2d;
if (probs.dim() == 1 or probs.size(0) == 1)
if (probs.dim() == 1 || probs.size(0) == 1)
{
probs_2d = exp_probs.view({-1, num_events});
}
Expand Down Expand Up @@ -121,9 +121,9 @@ TEST_CASE("Categorical")
auto probabilities_tensor = torch::from_blob(probabilities, {5});
auto dist = Categorical(&probabilities_tensor, nullptr);

CHECK(dist.sample({20}).sizes().vec() == std::vector<long>{20});
CHECK(dist.sample({2, 20}).sizes().vec() == std::vector<long>{2, 20});
CHECK(dist.sample({1, 2, 3, 4, 5}).sizes().vec() == std::vector<long>{1, 2, 3, 4, 5});
CHECK(dist.sample({20}).sizes().vec() == std::vector<int64_t>{20});
CHECK(dist.sample({2, 20}).sizes().vec() == std::vector<int64_t>{2, 20});
CHECK(dist.sample({1, 2, 3, 4, 5}).sizes().vec() == std::vector<int64_t>{1, 2, 3, 4, 5});
}

SUBCASE("Multi-dimensional input probabilities are handled correctly")
Expand All @@ -135,8 +135,8 @@ TEST_CASE("Categorical")
auto probabilities_tensor = torch::from_blob(probabilities, {2, 4});
auto dist = Categorical(&probabilities_tensor, nullptr);

CHECK(dist.sample({20}).sizes().vec() == std::vector<long>{20, 2});
CHECK(dist.sample({10, 5}).sizes().vec() == std::vector<long>{10, 5, 2});
CHECK(dist.sample({20}).sizes().vec() == std::vector<int64_t>{20, 2});
CHECK(dist.sample({10, 5}).sizes().vec() == std::vector<int64_t>{10, 5, 2});
}

SUBCASE("Generated tensors have correct probabilities")
Expand Down Expand Up @@ -174,7 +174,7 @@ TEST_CASE("Categorical")

SUBCASE("Output tensor is the correct size")
{
CHECK(entropies.sizes().vec() == std::vector<long>{2});
CHECK(entropies.sizes().vec() == std::vector<int64_t>{2});
}
}

Expand Down Expand Up @@ -205,7 +205,7 @@ TEST_CASE("Categorical")

SUBCASE("Output tensor is correct size")
{
CHECK(log_probs.sizes().vec() == std::vector<long>{2, 2});
CHECK(log_probs.sizes().vec() == std::vector<int64_t>{2, 2});
}
}
}
Expand Down
16 changes: 8 additions & 8 deletions src/generators/feed_forward_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,14 +94,14 @@ TEST_CASE("FeedForwardGenerator")
{
auto minibatch = generator.next();

CHECK(minibatch.observations.sizes().vec() == std::vector<long>{5, 4});
CHECK(minibatch.hidden_states.sizes().vec() == std::vector<long>{5, 3});
CHECK(minibatch.actions.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.value_predictions.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.returns.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.masks.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.action_log_probs.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.advantages.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.observations.sizes().vec() == std::vector<int64_t>{5, 4});
CHECK(minibatch.hidden_states.sizes().vec() == std::vector<int64_t>{5, 3});
CHECK(minibatch.actions.sizes().vec() == std::vector<int64_t>{5, 1});
CHECK(minibatch.value_predictions.sizes().vec() == std::vector<int64_t>{5, 1});
CHECK(minibatch.returns.sizes().vec() == std::vector<int64_t>{5, 1});
CHECK(minibatch.masks.sizes().vec() == std::vector<int64_t>{5, 1});
CHECK(minibatch.action_log_probs.sizes().vec() == std::vector<int64_t>{5, 1});
CHECK(minibatch.advantages.sizes().vec() == std::vector<int64_t>{5, 1});
}

SUBCASE("done() indicates whether the generator has finished")
Expand Down
18 changes: 9 additions & 9 deletions src/generators/recurrent_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ MiniBatch RecurrentGenerator::next()

// Fill minibatch with tensors of shape (timestep, process, *whatever)
// Except hidden states, that is just (process, *whatever)
long env_index = indices[index].item().toLong();
int64_t env_index = indices[index].item().toLong();
mini_batch.observations = observations
.narrow(0, 0, observations.size(0) - 1)
.narrow(1, env_index, num_envs_per_batch);
Expand Down Expand Up @@ -110,14 +110,14 @@ TEST_CASE("RecurrentGenerator")
{
auto minibatch = generator.next();

CHECK(minibatch.observations.sizes().vec() == std::vector<long>{5, 4});
CHECK(minibatch.hidden_states.sizes().vec() == std::vector<long>{1, 3});
CHECK(minibatch.actions.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.value_predictions.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.returns.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.masks.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.action_log_probs.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.advantages.sizes().vec() == std::vector<long>{5, 1});
CHECK(minibatch.observations.sizes().vec() == std::vector<int64_t>{5, 4});
CHECK(minibatch.hidden_states.sizes().vec() == std::vector<int64_t>{1, 3});
CHECK(minibatch.actions.sizes().vec() == std::vector<int64_t>{5, 1});
CHECK(minibatch.value_predictions.sizes().vec() == std::vector<int64_t>{5, 1});
CHECK(minibatch.returns.sizes().vec() == std::vector<int64_t>{5, 1});
CHECK(minibatch.masks.sizes().vec() == std::vector<int64_t>{5, 1});
CHECK(minibatch.action_log_probs.sizes().vec() == std::vector<int64_t>{5, 1});
CHECK(minibatch.advantages.sizes().vec() == std::vector<int64_t>{5, 1});
}

SUBCASE("done() indicates whether the generator has finished")
Expand Down
2 changes: 1 addition & 1 deletion src/model/nn_base.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ std::vector<torch::Tensor> NNBase::forward_gru(torch::Tensor x,
auto gru_output = gru(
x.index({torch::arange(start_idx,
end_idx,
TensorOptions(ScalarType::Long))}),
TensorOptions(torch::kLong))}),
rnn_hxs * masks[start_idx].view({1, -1, 1}));

outputs.push_back(gru_output.output);
Expand Down
2 changes: 1 addition & 1 deletion src/model/output_layers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ TEST_CASE("CategoricalOutput")

auto output = dist->sample();

CHECK(output.sizes().vec() == std::vector<long>{2});
CHECK(output.sizes().vec() == std::vector<int64_t>{2});
}
}
}
Loading