Skip to content

Commit 00519f4

Browse files
authored
dev-0.9.0 (#87)
* remove deprecated APIs * tensor type name * #include <climits> * move ttl::experimental::zip to upstream * fix namespace * fix namespace * use size_t in basic_allocator<R, cuda_memory> * size_t * show cuda error string (#88)
1 parent 86f9d66 commit 00519f4

18 files changed

+424
-73
lines changed
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
#pragma once
2+
3+
#if defined(__GNUC__) && !defined(__clang__)
4+
#pragma message("ttl::experimental::zip is error-prone, use with care!")
5+
#endif
6+
7+
#include <array>
8+
#include <functional>
9+
#include <numeric>
10+
#include <tuple>
11+
12+
namespace ttl
13+
{
14+
namespace experimental
15+
{
16+
namespace internal
17+
{
18+
template <typename... Ts>
19+
class zipper_t
20+
{
21+
static constexpr auto arity = sizeof...(Ts);
22+
23+
const std::tuple<const Ts &...> ranges_;
24+
25+
template <typename... Iters>
26+
class iterator
27+
{
28+
std::tuple<Iters...> is_;
29+
30+
template <size_t... Is>
31+
auto operator*(std::index_sequence<Is...>)
32+
{
33+
return std::make_tuple(*std::get<Is>(is_)...);
34+
}
35+
36+
template <typename... P>
37+
static void noop(const P &...)
38+
{
39+
}
40+
41+
template <typename Iter>
42+
int incr(Iter &i)
43+
{
44+
++i;
45+
return 0;
46+
}
47+
48+
template <size_t... Is>
49+
void _advance(std::index_sequence<Is...>)
50+
{
51+
noop(incr(std::get<Is>(is_))...);
52+
}
53+
54+
template <size_t... Is>
55+
bool neq(std::index_sequence<Is...>, const iterator &p) const
56+
{
57+
// TODO: expand the expression
58+
std::array<bool, arity> neqs(
59+
{(std::get<Is>(is_) != std::get<Is>(p.is_))...});
60+
return std::accumulate(neqs.begin(), neqs.end(), false,
61+
std::logical_or<bool>());
62+
}
63+
64+
public:
65+
iterator(const Iters &... i) : is_(i...) {}
66+
67+
bool operator!=(const iterator &p) const
68+
{
69+
// return get<0>(is_) != get<0>(p.is_) || get<1>(is_) !=
70+
// get<1>(p.is_);
71+
return neq(std::make_index_sequence<arity>(), p);
72+
}
73+
74+
void operator++()
75+
{
76+
_advance(std::make_index_sequence<arity>());
77+
// ++get<0>(is_), ++get<1>(is_);
78+
}
79+
80+
auto operator*()
81+
{
82+
return (operator*)(std::make_index_sequence<arity>());
83+
}
84+
};
85+
86+
template <typename... Iters>
87+
static iterator<Iters...> make_iterator(const Iters &... is)
88+
{
89+
return iterator<Iters...>(is...);
90+
}
91+
92+
template <size_t... Is>
93+
auto begin(std::index_sequence<Is...>) const
94+
{
95+
return make_iterator(std::get<Is>(ranges_).begin()...);
96+
}
97+
98+
template <size_t... Is>
99+
auto end(std::index_sequence<Is...>) const
100+
{
101+
return make_iterator(std::get<Is>(ranges_).end()...);
102+
}
103+
104+
public:
105+
zipper_t(const Ts &... ranges) : ranges_(ranges...) {}
106+
107+
auto begin() const { return begin(std::make_index_sequence<arity>()); }
108+
109+
auto end() const { return end(std::make_index_sequence<arity>()); }
110+
};
111+
112+
template <typename... Ts>
113+
zipper_t<Ts...> zip(const Ts &... ranges)
114+
{
115+
return zipper_t<Ts...>(ranges...);
116+
}
117+
} // namespace internal
118+
} // namespace experimental
119+
} // namespace ttl

include/ttl/bits/fake_cuda_runtime.hpp

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#pragma once
2+
#include <cstdio>
23
#include <cstring>
34
#include <map>
45
#include <stdexcept>
@@ -12,7 +13,7 @@ constexpr const cudaMemcpyKind cudaMemcpyHostToDevice = 1;
1213
constexpr const cudaMemcpyKind cudaMemcpyDeviceToHost = 2;
1314
constexpr const cudaMemcpyKind cudaMemcpyDeviceToDevice = 3;
1415

15-
class fake_device
16+
class fake_cuda_device
1617
{
1718
std::map<const void *, size_t> _allocs;
1819

@@ -33,7 +34,9 @@ class fake_device
3334
}
3435

3536
public:
36-
~fake_device() { check_leak(); }
37+
fake_cuda_device() { std::printf("using fake_cuda_device!\n"); }
38+
39+
~fake_cuda_device() { check_leak(); }
3740

3841
void *alloc(size_t size)
3942
{
@@ -51,7 +54,8 @@ class fake_device
5154
_allocs.erase(data);
5255
}
5356

54-
void memcpy(void *dst, const void *src, int size, cudaMemcpyKind dir) const
57+
void memcpy(void *dst, const void *src, size_t size,
58+
cudaMemcpyKind dir) const
5559
{
5660
switch (dir) {
5761
case cudaMemcpyHostToDevice:
@@ -67,9 +71,9 @@ class fake_device
6771
}
6872
};
6973

70-
fake_device fake_cuda;
74+
fake_cuda_device fake_cuda;
7175

72-
cudaError_t cudaMalloc(void **ptr, int count)
76+
cudaError_t cudaMalloc(void **ptr, size_t count)
7377
{
7478
*ptr = fake_cuda.alloc(count);
7579
return cudaSuccess;
@@ -87,3 +91,8 @@ cudaError_t cudaMemcpy(void *dst, const void *src, size_t size,
8791
fake_cuda.memcpy(dst, src, size, dir);
8892
return cudaSuccess;
8993
}
94+
95+
std::string cudaGetErrorString(const cudaError_t err)
96+
{
97+
return "fake_cudaError_t(" + std::to_string(static_cast<int>(err)) + ")";
98+
}

include/ttl/bits/flat_tensor_mixin.hpp

Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,6 @@ class flat_tensor_mixin
2727
template <rank_t r, typename A1 = typename trait::Access>
2828
using T = basic_tensor<R, basic_shape<r, Dim>, D, A1>;
2929

30-
template <rank_t r, typename A1>
31-
[[deprecated]] T<r, A1> ranked_as() const
32-
{
33-
return T<r, A1>(data_.get(), shape_.template as_ranked<r>());
34-
}
35-
3630
protected:
3731
using allocator = basic_allocator<R, D>;
3832

@@ -67,18 +61,6 @@ class flat_tensor_mixin
6761
using T = basic_tensor<R, basic_shape<r, Dim>, D, Access>;
6862
return T(data(), shape_.template ranked<r>());
6963
}
70-
71-
template <rank_t r>
72-
[[deprecated]] T<r, readwrite> ref_as() const
73-
{
74-
return ranked_as<r, readwrite>();
75-
}
76-
77-
template <rank_t r>
78-
[[deprecated]] T<r, readonly> view_as() const
79-
{
80-
return ranked_as<r, readonly>();
81-
}
8264
};
8365
} // namespace internal
8466
} // namespace ttl

include/ttl/bits/raw_tensor_mixin.hpp

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -113,21 +113,6 @@ class raw_tensor_mixin
113113
using T = basic_tensor<R, basic_shape<r, Dim>, D, Access>;
114114
return T(data<R>(), shape_.template ranked<r>());
115115
}
116-
117-
template <typename R, rank_t r, typename A1 = A>
118-
[[deprecated]] basic_tensor<R, basic_shape<r, Dim>, D, A1>
119-
ranked_as() const {
120-
return basic_tensor<R, basic_shape<r, Dim>, D, A1>(
121-
data<R>(), shape_.template ranked<r>());
122-
}
123-
124-
template <typename R, rank_t r>
125-
[[deprecated]] basic_tensor<R, basic_shape<r, Dim>, D, readwrite> ref_as()
126-
const { return ranked_as<R, r, readwrite>(); }
127-
128-
template <typename R, rank_t r>
129-
[[deprecated]] basic_tensor<R, basic_shape<r, Dim>, D, readonly> view_as()
130-
const { return ranked_as<R, r, readonly>(); }
131116
};
132117
} // namespace internal
133118
} // namespace ttl

include/ttl/bits/std_cuda_allocator.hpp

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#pragma once
22
#include <cstddef>
33
#include <stdexcept>
4+
#include <string>
45

56
#include <ttl/bits/std_cuda_runtime.hpp>
67
#include <ttl/bits/std_device.hpp>
@@ -10,6 +11,23 @@ namespace ttl
1011
{
1112
namespace internal
1213
{
14+
class std_cuda_error_checker_t
15+
{
16+
const std::string func_name_;
17+
18+
public:
19+
std_cuda_error_checker_t(const char *func_name) : func_name_(func_name) {}
20+
21+
void operator<<(const cudaError_t err) const
22+
{
23+
if (err != cudaSuccess) {
24+
throw std::runtime_error(func_name_ + " failed with: " +
25+
std::to_string(static_cast<int>(err)) +
26+
": " + cudaGetErrorString(err));
27+
}
28+
}
29+
}; // namespace ttl
30+
1331
struct cuda_copier {
1432
static constexpr auto h2d = cudaMemcpyHostToDevice;
1533
static constexpr auto d2h = cudaMemcpyDeviceToHost;
@@ -18,10 +36,8 @@ struct cuda_copier {
1836
template <cudaMemcpyKind dir>
1937
static void copy(void *dst, const void *src, size_t size)
2038
{
21-
const cudaError_t err = cudaMemcpy(dst, src, size, dir);
22-
if (err != cudaSuccess) {
23-
throw std::runtime_error("cudaMemcpy failed");
24-
}
39+
static std_cuda_error_checker_t check("cudaMemcpy");
40+
check << cudaMemcpy(dst, src, size, dir);
2541
}
2642
};
2743

@@ -49,15 +65,13 @@ template <typename R>
4965
class basic_allocator<R, cuda_memory>
5066
{
5167
public:
52-
R *operator()(int count)
68+
R *operator()(size_t count)
5369
{
5470
void *deviceMem;
5571
// cudaMalloc<R>(&deviceMem, count);
5672
// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY
57-
const cudaError_t err = cudaMalloc(&deviceMem, count * sizeof(R));
58-
if (err != cudaSuccess) {
59-
throw std::runtime_error("cudaMalloc failed");
60-
}
73+
static std_cuda_error_checker_t check("cudaMalloc");
74+
check << cudaMalloc(&deviceMem, count * sizeof(R));
6175
return reinterpret_cast<R *>(deviceMem);
6276
}
6377
};
@@ -68,8 +82,8 @@ class basic_deallocator<R, cuda_memory>
6882
public:
6983
void operator()(R *data)
7084
{
71-
const cudaError_t err = cudaFree(data);
72-
if (err != cudaSuccess) { throw std::runtime_error("cudaFree failed"); }
85+
static std_cuda_error_checker_t check("cudaFree");
86+
check << cudaFree(data);
7387
}
7488
};
7589
} // namespace internal

include/ttl/bits/std_encoding.hpp

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,21 @@ class basic_scalar_encoding
3737
public:
3838
static constexpr V value = (category << 16) | (byte_num << 8) | byte_size;
3939
};
40+
41+
enum class scaler_type : uint32_t {
42+
u8 = basic_scalar_encoding<uint8_t, uint32_t>::value,
43+
u16 = basic_scalar_encoding<uint16_t, uint32_t>::value,
44+
u32 = basic_scalar_encoding<uint32_t, uint32_t>::value,
45+
u64 = basic_scalar_encoding<uint64_t, uint32_t>::value,
46+
47+
i8 = basic_scalar_encoding<int8_t, uint32_t>::value,
48+
i16 = basic_scalar_encoding<int16_t, uint32_t>::value,
49+
i32 = basic_scalar_encoding<int32_t, uint32_t>::value,
50+
i64 = basic_scalar_encoding<int64_t, uint32_t>::value,
51+
52+
f32 = basic_scalar_encoding<float, uint32_t>::value,
53+
f64 = basic_scalar_encoding<double, uint32_t>::value,
54+
};
4055
} // namespace internal
4156

4257
namespace experimental
@@ -52,7 +67,7 @@ struct std_encoding {
5267
template <typename R>
5368
static constexpr value_type value()
5469
{
55-
return internal::basic_scalar_encoding<R, value_type>::value;
70+
return ttl::internal::basic_scalar_encoding<R, value_type>::value;
5671
}
5772
};
5873
} // namespace experimental

include/ttl/bits/std_reflect.hpp

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
#pragma once
2+
#include <cxxabi.h>
3+
4+
#include <climits>
5+
#include <string>
6+
7+
namespace ttl
8+
{
9+
namespace internal
10+
{
11+
template <typename T>
12+
std::string demangled_type_info_name()
13+
{
14+
int status = 0;
15+
return abi::__cxa_demangle(typeid(T).name(), 0, 0, &status);
16+
}
17+
18+
template <typename R>
19+
constexpr char scalar_type_prefix()
20+
{
21+
if (std::is_floating_point<R>::value) {
22+
return 'f';
23+
} else if (std::is_integral<R>::value) {
24+
return std::is_signed<R>::value ? 'i' : 'u';
25+
} else {
26+
return 's';
27+
}
28+
}
29+
30+
template <bool, typename R>
31+
class scalar_type_name;
32+
33+
template <typename R>
34+
class scalar_type_name<false, R>
35+
{
36+
public:
37+
std::string operator()() const { return demangled_type_info_name<R>(); }
38+
};
39+
40+
template <typename R>
41+
class scalar_type_name<true, R>
42+
{
43+
public:
44+
std::string operator()() const
45+
{
46+
return scalar_type_prefix<R>() + std::to_string(sizeof(R) * CHAR_BIT);
47+
}
48+
};
49+
} // namespace internal
50+
} // namespace ttl

0 commit comments

Comments
 (0)