forked from pytorch/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathideep_operator.h
150 lines (130 loc) · 5.25 KB
/
ideep_operator.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#pragma once
#include <ideep.hpp>
#include <caffe2/core/operator.h>
#include <caffe2/proto/caffe2_pb.h>
namespace caffe2 {
C10_DECLARE_REGISTRY(
IDEEPOperatorRegistry,
OperatorBase,
const OperatorDef&,
Workspace*);
#define REGISTER_IDEEP_OPERATOR_CREATOR(key, ...) \
C10_REGISTER_CREATOR(IDEEPOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_IDEEP_OPERATOR(name, ...) \
C10_REGISTER_CLASS(IDEEPOperatorRegistry, name, __VA_ARGS__)
#define REGISTER_IDEEP_OPERATOR_WITH_ENGINE(name, engine, ...) \
C10_REGISTER_CLASS(IDEEPOperatorRegistry, name##_ENGINE_##engine, __VA_ARGS__)
#define REGISTER_IDEEP_OPERATOR_STR(str_name, ...) \
C10_REGISTER_TYPED_CLASS(IDEEPOperatorRegistry, str_name, __VA_ARGS__)
#define REGISTER_IDEEP_COMPARE_OPERATOR(Op) \
REGISTER_IDEEP_OPERATOR( \
Op, \
IDEEPFallbackOp<BinaryElementwiseOp< \
TensorTypes<bool, int32_t, int64_t, float, double>, \
CPUContext, \
Op##Functor<CPUContext>, \
FixedType<bool>>>)
// IDEEPOperator is the base scaffolding of the operators that uses IDEEP. It
// provides a few operators that are useful to IDEEP specific implementations.
class IDEEPOperator : public OperatorBase {
public:
explicit IDEEPOperator(const OperatorDef& operator_def, Workspace* ws)
: OperatorBase(operator_def, ws),
context_(operator_def.device_option()),
order_(StringToStorageOrder(
OperatorBase::GetSingleArgument<string>("order", "NCHW"))) {
}
virtual ~IDEEPOperator() {}
inline const ideep::tensor& Input(int index) {
return OperatorBase::template Input<ideep::tensor>(index);
}
inline ideep::tensor* Output(int index) {
return OperatorBase::template Output<ideep::tensor>(index);
}
// The run function of Operator switches to the device, and then carries out
// the actual computation with RunOnDevice(). You should implement RunOnDevice
// instead of Run().
bool Run(int /* unused */ /*stream_id*/) final {
// Since IDEEP does not need to do SwithToDevice and
// FinishDeviceComputation,
// it is always just a re-route to RunOnDevice().
try {
StartAllObservers();
bool result = RunOnDevice();
StopAllObservers();
return result;
} catch (EnforceNotMet& err) {
TORCH_RETHROW(err, getErrorMsg());
} catch (ideep::error& e) {
LOG(ERROR) << "IDEEP error:" << e.message;
throw;
}
}
// Waits for a previous event. Note that to properly wait and run
// asynchronously, WaitEvent, RunAsync and Record should all be executed
// on the same CPU thread.
void WaitEvent(const Event& ev, int /* unused */) final {
context_.WaitEvent(ev);
}
void WaitEvents(const std::vector<const Event*>& events, int /* unused */)
final {
for (const auto& ev : events) {
context_.WaitEvent(*ev);
}
}
void RecordEvent(const char* err_msg = nullptr) final {
if (event_) {
context_.Record(event_.get(), err_msg);
}
}
virtual bool RunOnDevice() = 0;
protected:
std::string getErrorMsg() {
if (has_debug_def()) {
return "Error from operator: " + ProtoDebugString(debug_def());
} else {
return "Error from operator: no op def";
}
}
IDEEPContext context_;
StorageOrder order_;
};
#define USE_IDEEP_OPERATOR_FUNCTIONS() \
USE_OPERATOR_BASE_FUNCTIONS; \
/* using override */ using IDEEPOperator::Input; \
/* using override */ using IDEEPOperator::Output; \
/* using override */ using IDEEPOperator::order_; \
/* using override */ using IDEEPOperator::context_;
#define USE_SIMPLE_IDEEP_CTOR_DTOR(name) \
name(const OperatorDef& operator_def, Workspace* ws) \
: IDEEPOperator(operator_def, ws) {} \
virtual ~name() {}
// Convert zero_point scales to min_max scales
// NOTE:
// The scales in operator is saved in FBGEMM format,
// while FBGEMM scales are the reciprocals of MKL-DNN scales.
// This function is provided to convert scales from FBGEMM to MKL-DNN
inline ideep::scale_t ConvertScales(
const std::vector<float> scales_z) {
ideep::scale_t scales (scales_z);
for (auto it = scales.begin(); it != scales.end(); it++) {
*it = 1.0f / *it;
}
return scales;
}
inline ideep::tensor::dims CanonicalDims(
ideep::tensor::dims adims, int32_t axis) {
CAFFE_ENFORCE(axis < (int32_t)adims.size(), "Invalid axis!");
CAFFE_ENFORCE(axis > (int32_t)-adims.size(), "Invalid axis!");
if (adims.size() == 2 || axis == 1)
return adims;
if (axis < 0) {
axis += (int32_t)adims.size();
}
auto dim0 = std::accumulate(adims.begin(), adims.begin() + axis, 1,
std::multiplies<ideep::tensor::dim_t>());
auto dim1 = std::accumulate(adims.begin() + axis, adims.end(), 1,
std::multiplies<ideep::tensor::dim_t>());
return ideep::tensor::dims({dim0, dim1});
}
} // namespace caffe2