This repository was archived by the owner on Dec 11, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathir.lisp
373 lines (312 loc) · 14.7 KB
/
ir.lisp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
(in-package :cl-waffe2/vm)
;; [TODO] Add WfInstrucion to the docstring
;; WfInstruciton = cl-waffe2 IR
;; {GRAD} <- Gradient Adder
;; {INTERNAL} <- Rewritten by cl-waffe2 compiler
(defstruct (WfInstruction
(:conc-name wfop-)
(:constructor make-wfop (op self node args &key (sv4bw nil) (out-to nil) (block-iseq nil) (grad-adder-p nil) (loadp nil) (comment nil) (lut-cache-p nil))))
"
## [struct] WfInstruction
WfInstruction is IR for cl-waffe2 VM. Its form is represented by an extended Wengert list which is able to return multiple outputs. In this document, we call this *cl-waffe2 IR*, and compiled cl-waffe2 code, that is, the list of WfInstruction is called **InstructionSeq** or **iseq**. Unlike other frameworks, this IR is not only used to represent backpropagation but also forward propagation.
In cl-waffe2, WfInstruction is created by compiling AbstractNode, and its operation can be obtained by compiling lisp code or passing lambda functions.
A single WfInstruction represents:
```
out_to[0], out_to[1], ... <- λ(Args[0], Args[1], Args[2], ...)
^wfop-out-to ^wfop-op ^wfop-args
```
where λ represents the operation. And, if any, `ArgsN` is wrapped with `SV4BW`.
```
out_to[0], out_to[1], ... <- λ(SV4BW(Args[0]), Args[1], Args[2], ...)
^ wfop-sv4bw
```
SV4BW (i.e: save-for-backward) is a temporary tensor to compute backwards and cl-waffe2 reads the `:save-for-backward` slots in the `define-impl` macro, and the corresponding tensors are copied.
### Slots
`wfop-op[function]` corresponds with compiled λ function.
`wfop-node[AbstractNode or string or function]` The node which generates λ function. For the most case, this slot is set to `AbstractNode`, but the node is something special, (e.g.: `CodeBlock`, `IfNode` etc...), set to `function`.
`wfop-out-to[list of AbstractTensor]` indicates list of tensors that results are to be stored.
`wfop-self[AbstractTensor]` corresponds with `out_target`, that is, the tensor to store the results
`wfop-args[list of AbstractTensor]` corresponds with `(tensor-variable wfop-self)`. tensors to be called with: `arg1 arg2 arg3...`.
`wfop-sv4bw[list of AbstractTensor]` indicates list of tensors storing save-for-backward tensors. if the corresponding position is `save-for-backward=nil`, the corresponding position also become nil.
`wfop-comment[string or null]` If any, the value of slot is displayed when printing IR.
`wfop-loadp[boolean]` If set to t, the operation is interpreted as `load-pointer`. See also: (read-loadp instruction)
`wfop-lut-cache-p[boolean]` If set to T, indicates `wfop-op` is already compiled and cached in LUT.
"
(op op :type function)
(node node :type (or function string null AbstractNode))
(out-to out-to :type list)
(self self :type AbstractTensor)
(block-iseq nil :type list)
(args args :type list)
(args-last-count-p nil :type list)
(sv4bw sv4bw :type list)
(error-check-p nil :type boolean) ;; Indicates the first shape-inspection has done?
(bw-is-leaf-p nil :type boolean)
(grad-adder-p grad-adder-p :type boolean)
(loadp loadp :type boolean)
(lut-cache-p lut-cache-p :type boolean)
(comment comment :type (or null string)))
(defparameter *omit-args-n* 5)
(defparameter *opname-indent-to* 0 "Adds a space for this param times")
(defparameter *no-newline* nil)
(defmethod print-object ((inst WFInstruction) stream)
;; loadp has a special form when displayed:
;; LOADP: A* = B*
(when (wfop-loadp inst)
(let ((opname "<WfInst[load_pointer{SYS}]"))
(multiple-value-bind (from to) (read-loadp inst)
(format
stream
"~a~a : ~a* = ~a*>~a~a"
opname
(with-output-to-string (out)
(dotimes (i (- *opname-indent-to* (length opname))) (princ " " out)))
(tensor-id from)
(tensor-id to)
(if (wfop-comment inst)
(format nil " ;; ~a" (wfop-comment inst))
"")
(if *no-newline*
""
(format nil "~%"))))
(return-from print-object)))
(let ((ignored-p (and (movetensor-p (wfop-node inst))
(movetensor-ignore-me (wfop-node inst)))))
(format stream
"~a~a : ~a<= op(~a)>~a~a"
(instruction-opname inst)
(with-output-to-string (out)
(dotimes (i (- *opname-indent-to* (length (instruction-opname inst)))) (princ " " out)))
(with-output-to-string (out)
(dolist (o (wfop-out-to inst))
(if (slot-value o 'cl-waffe2/vm.generic-tensor:requires-grad)
(princ "<Param>" out)
(when (not (eql (cl-waffe2/vm.generic-tensor:tensor-attribute o) :chain))
(princ "<Input>" out)))
(princ (tensor-id o) out)
(princ " " out)))
(if ignored-p
(with-output-to-string (out)
(format out "~a{~(~a~), ~a}" (tensor-id (second (wfop-args inst))) (dtype (second (wfop-args inst))) (shape (second (wfop-args inst)))))
(if (>= (length (wfop-args inst)) *omit-args-n*)
(format nil "..., x~a,..." (length (wfop-args inst)))
(with-output-to-string (out)
(dotimes (i (length (wfop-args inst)))
(let ((var (nth i (wfop-args inst)))
(sv4 (and (not *no-grad*) (nth i (wfop-sv4bw inst)))))
(format out "~a~a~a{~(~a~), ~a}~a~a" ;;
(if (slot-value var 'cl-waffe2/vm.generic-tensor::requires-grad)
"<Param>"
(if (eql (cl-waffe2/vm.generic-tensor:tensor-attribute var) :chain)
""
"<Input>"))
(if sv4
"SV4BW("
"")
(tensor-id var)
(dtype var)
(shape var)
(if sv4
")"
"")
(if (nth (1+ i) (wfop-args inst))
" "
"")))))))
(if (wfop-comment inst)
(format nil " ;; ~a" (wfop-comment inst))
"")
(if *no-newline*
""
(format nil "~%")))))
(defmethod instruction-opname ((inst WFInstruction))
(format nil "<WfInst[op=~a~a]"
(if (wfop-grad-adder-p inst)
"{GRAD}"
"")
(if (functionp (wfop-node inst))
(funcall (wfop-node inst))
(if (movetensor-p (wfop-node inst))
(if (movetensor-ignore-me (wfop-node inst))
"Setq{Pruned}"
(if (cl-waffe2/base-impl:mv-lazy-sv4bw (wfop-node inst))
(if (scalar-p (wfop-self inst))
"MoveScalarNode(SAVE_FOR_BACKWARD)"
"MoveTensorNode(SAVE_FOR_BACKWARD)")
(class-name (class-of (wfop-node inst)))))
(class-name (class-of (wfop-node inst)))))))
(defmethod instruction-opname-table ((inst WFInstruction))
(cl-ppcre:regex-replace-all
"(\\n|\\s*$)"
(format nil "~a~a"
(if (wfop-grad-adder-p inst)
"{GRAD}"
"")
(if (functionp (wfop-node inst))
(funcall (wfop-node inst))
(if (movetensor-p (wfop-node inst))
(if (movetensor-ignore-me (wfop-node inst))
"Setq{Pruned}"
(if (cl-waffe2/base-impl:mv-lazy-sv4bw (wfop-node inst))
(if (scalar-p (wfop-self inst))
"MoveScalarNode(SAVE_FOR_BACKWARD)"
"MoveTensorNode(SAVE_FOR_BACKWARD)")
(class-name (class-of (wfop-node inst)))))
(class-name (class-of (wfop-node inst))))))
""))
(defun area-indent-to (iseq)
"Returns the largest length of iseq name"
(loop for i in iseq
if (not (functionp (wfop-node i)))
maximize (length (instruction-opname i))))
(defmacro with-indent-to (iseq &body body)
`(let ((*opname-indent-to* (area-indent-to ,iseq)))
,@body))
;; In-place mutation
;; v judge:is this usage of A is the last?
;; A <- A B
;; ...
;; K <- A B
;; The Goal:
;; [Move(Save-For-Backward)]
;; [Move(Deleted)]
;; [Sin]
;; [Move(Save-For-Backward)]
;; [Move(Deleted)]
;; ...
;;
;; Time(s) | Instruction ( * - Beyonds the average execution time)
;;0.007294 | <WfInst[Compiled: MoveTensorNode(SAVE_FOR_BACKWARD)] : TID1082631 <= op(TID1082631(128 128) <Param>TID1082615(128 128))>
;;0.005084 | <WfInst[Compiled: MOVETENSORNODE-CPUTENSOR] : TID1082620 <= op(TID1082620(128 128) <Param>TID1082615(128 128))> <- Should be deleted!
;;0.289967* | <WfInst[Compiled: SINNODE-LISPTENSOR] : TID1082620 <= op(TID1082631(128 128) TID1082620(128 128))>
;;0.006844 | <WfInst[Compiled: MoveTensorNode(SAVE_FOR_BACKWARD)] : TID1082659 <= op(TID1082659(128 128) TID1082620(128 128))>
;;0.005356 | <WfInst[Compiled: MOVETENSORNODE-CPUTENSOR] : TID1082648 <= op(TID1082648(128 128) TID1082620(128 128))> <- Should be deleted!
;;0.262948* | <WfInst[Compiled: SINNODE-LISPTENSOR] : TID1082648 <= op(TID1082659(128 128) TID1082648(128 128))>
;;0.006389 | <WfInst[Compiled: MoveTensorNode(SAVE_FOR_BACKWARD)] : TID1082687 <= op(TID1082687(128 128) TID1082648(128 128))>
;;0.005264 | <WfInst[Compiled: MOVETENSORNODE-CPUTENSOR] : TID1082676 <= op(TID1082676(128 128) TID1082648(128 128))> <- Should be deleted!
;;0.219833* | <WfInst[Compiled: SINNODE-LISPTENSOR] : TID1082676 <= op(TID1082687(128 128) TID1082676(128 128))>
;; In the iseq above, nodes that should be inplace is following:
;; ...
;; TID1082620 <= op(TID1082620, TID1082612)
;;
;; ...
;;
;; The algorithm that detects such a node is simple:
;;
;; reverse-iseq .. Set T if the node is compiled as a toplevel of reverse mode
(defun apply-in-place-mutation! (iseq leaves &key (reverse-iseq nil))
(declare (type list iseq leaves)
(type boolean reverse-iseq)
(optimize (speed 3)))
(let ((ref-table (make-hash-table)))
;; First, Register all tensors appeared in the computation node
(mapc
#'(lambda (variable)
;; Tensors that can be destructed is:
;; InputTensor (set=0)
(if (eql (tensor-attribute variable) :chain)
(setf (gethash (tensor-id variable) ref-table) 0)
(setf (gethash (tensor-id variable) ref-table) nil))) ;; attirubute=:input -> The tensor is parameter/input data, so never destruct it.
leaves)
;; Nodes are the sequence of:
;; OUT_PLACE1 OUT_PLACE2 <- OP(ARG1, ARG2, ARG3, ...)
;; Usually, OUT_PLACE appears in the ARGn (e.g.: out <- op(x, out))
;; Here, we start reading the iseq from the bottom (as the same order of reverse mode), and register all tensors that appeared at `ref-table`.
;; For the first appearance, the number in the ref-table should be 0, and also it is the equivalent to "JUST READING THE LAST USE OF TENSOR"
;; So the latest MoveTensor related to this, should be ignored.
;; If the count is set to >= 1 or nil, the MoveTensorNode is needed (also, we have to pay attention for movetensor-save-for-backward parameter)
;; Which indicatest the call of MoveTensorNode is intended, and shouldn't be ignored.
(loop for instruction in iseq
for nth fixnum upfrom 0 do
(let* ((move-p (movetensor-p (wfop-node instruction)))
(node (wfop-node instruction)))
;; ^ Reading in this way:
;; | [MoveTensorNode] X <- X, Z
;; | [SinNode] OUT <- (OUT, X) ... Reading the value of SinNode, we decide whether delete above MoveTensorNode or not.
;; Instruction: OUT <- OP(out, arg1 arg2, ...)
;; Counting up the number of reference counts, arg1, arg2 ... += 1
;; In this case, we set `out` = 0, because the result overwrites the `out`
;; When found MoveTensorNode:
(when (and
move-p
;; As far as I remember, this condition is intended that the copying for re-aranging memory-layout?
;; But it could be possible to delete it, and delete more MoveTensorNode, especially for viewed tensor.
(not (tensor-protect-me (second (wfop-args instruction))))
(not (movetensor-save-for-backward node)) ;; when :force=t, never deleted.
)
;; Invoking this form == The MoveTensorNode can be deleted as long as ref-table condition is ok.
;; If the instruction corresponds with MoveTensorNode or MoveScalarTensorNode
;; Before counting up the reference, we judge whether MoveTensor is needed.
(let (;;(place (car (wfop-args instruction)))
(target (second (wfop-args instruction))))
;; MoveTensorNode: out <- out, tensor_to_be_copied
;; But with ignored:
;; [Deleted] : out <- _, tensor_to_be_copied ;; <- _ is never allocated
;; The condition to become [deleted] is:
;; tensor_to_be_copied is the last reference in the computation node
(when (or reverse-iseq
(and
(numberp (gethash (tensor-id target) ref-table))
(= 0 (the fixnum (gethash (tensor-id target) ref-table)))))
;; Replace op with the lambda function just returning y
;; The target is allocated:
(if (and reverse-iseq
(cl-waffe2/vm.generic-tensor::vec (car (wfop-args instruction))))
(setf (wfop-op instruction)
#'(lambda (x y)
(setf (tensor-vec x) (tensor-vec y))
y))
(setf (wfop-op instruction) #'(lambda (x y)
(declare (ignore x))
y)))
(setf (movetensor-ignore-me (wfop-node instruction)) t))))
;; Counting up the ref-table
;; OUT <- OP(arg1, arg2, arg3)
;; OUT ... set ref-n=0 because the value is overwritten by OP
;; arg1 arg2 arg3 ... set +=1 as long as registered in the table.
(mapc #'(lambda (arg)
(when (and
move-p
(numberp (gethash (tensor-id arg) ref-table)))
(incf (the fixnum (gethash (tensor-id arg) ref-table)))))
(if (and reverse-iseq move-p (= (length (wfop-args instruction)) 3))
(cdr (wfop-args instruction))
(wfop-args instruction)))
(let ((out-to (wfop-out-to instruction)))
(dolist (out out-to)
(when (numberp (gethash (tensor-id out) ref-table))
(setf (gethash (tensor-id out) ref-table) 0)))))))
nil)
(defun %in-place-vm-ops! (iseq)
;; Make ViewTensorNode/ReshapeNode/PermuteNode In-place
;; Iseq ... Follows the execution order
(declare (type list iseq))
(loop for inst of-type WfInstruction in iseq do
;; ViewTensorNode: Result Base -> Result
;; Set Result.id <- Base.id
(when (or (typep (wfop-node inst) 'cl-waffe2/base-impl::ViewTensorNode))
(iseq-update-tensor-name! iseq
(tensor-id (second (wfop-args inst)))
(tensor-id (car (wfop-args inst)))))
;; Reshape, Permute: Result Base -> Base
;; Base.id <- Result.id
(when (or (typep (wfop-node inst) 'cl-waffe2/base-impl::ReshapeTensorNode)
(typep (wfop-node inst) 'cl-waffe2/base-impl::Permute-Node))
(iseq-update-tensor-name! iseq
(tensor-id (car (wfop-args inst)))
(tensor-id (second (wfop-args inst)))))))
(defun lazy-clone-wreckage (place tensor)
(declare (type AbstractTensor place tensor)
(ignore tensor))
(tensor-vec place)
place)
;; Replaces all MoveTensorNode where :maybe-in-place=t with compiler functions
(defun %prune-maybe-in-place! (iseq)
(declare (type list iseq)
(optimize (speed 3)))
(loop for inst of-type WfInstruction in iseq
if (and (movetensor-p (wfop-node inst))
(not (movetensor-ignore-me (wfop-node inst)))
(move-maybe-in-place (wfop-node inst)))
do (setf (wfop-op inst) #'lazy-clone-wreckage
(wfop-node inst) #'(lambda () "ALLOC{INTERNAL}")))
nil)