Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor: Prerequisite for AutoScheduler #362

Merged
merged 57 commits into from
Dec 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
57 commits
Select commit Hold shift + click to select a range
6782d42
gemm
hikettei Dec 20, 2024
44a490f
Tile
hikettei Dec 20, 2024
610316a
search tile
hikettei Dec 20, 2024
0cd518a
Merge branch 'main' into opt-gemm
hikettei Dec 20, 2024
f6bcc57
OpenBLAS
hikettei Dec 20, 2024
253cf6f
90GFLops
hikettei Dec 20, 2024
d4c08c4
wip
hikettei Dec 20, 2024
db52790
wip
hikettei Dec 20, 2024
082ae9d
wip
hikettei Dec 20, 2024
9446987
tile param search
hikettei Dec 20, 2024
6a92698
finish an experiment
hikettei Dec 20, 2024
0e435c1
apply tiling without causing segv
hikettei Dec 20, 2024
e32cec2
tiling all bands
hikettei Dec 20, 2024
c4215bc
things are moved
hikettei Dec 20, 2024
4c2de70
move everything into auto-scheduler
hikettei Dec 21, 2024
785d2eb
asd
hikettei Dec 21, 2024
34656d2
fix: tiling-size related gc error
hikettei Dec 21, 2024
6a0c211
An initial attempt to unroll: tile based
hikettei Dec 21, 2024
0ab186d
moved
hikettei Dec 21, 2024
2ce3075
split
hikettei Dec 21, 2024
b22d20f
.
hikettei Dec 21, 2024
874cd17
moved isl things to auto-scheduler
hikettei Dec 21, 2024
b47fd09
refactor
hikettei Dec 21, 2024
e92725d
synchronize baseline
hikettei Dec 21, 2024
e1e5dc7
updt
hikettei Dec 21, 2024
dfca38d
updt
hikettei Dec 21, 2024
0ed2740
typo
hikettei Dec 21, 2024
0adb742
typo
hikettei Dec 21, 2024
378c310
typo
hikettei Dec 21, 2024
d2de5a1
Tweak
hikettei Dec 21, 2024
cd01fe9
nested unroll directive
hikettei Dec 21, 2024
64b8c7c
Unroll for static and simple case
hikettei Dec 21, 2024
3ac9213
Fold MOD
hikettei Dec 21, 2024
0d4095f
nope
hikettei Dec 21, 2024
642acc9
Initial attempt of unrolling
hikettei Dec 21, 2024
ea829fd
copy and remove UNROLL_PARENT for the reminder
hikettei Dec 21, 2024
b7ba19f
Loop Unrolling is worked
hikettei Dec 21, 2024
6df4461
Unrolling softmax
hikettei Dec 21, 2024
994a615
wip: generating a sketch
hikettei Dec 21, 2024
ca032f8
Identify the sketch
hikettei Dec 21, 2024
4cdec95
TODO
hikettei Dec 21, 2024
1c5096e
remove extra unroll reminder
hikettei Dec 21, 2024
bfb2d11
coincident for elwise
hikettei Dec 22, 2024
771fa79
fix: unroll
hikettei Dec 22, 2024
aad075a
quickload
hikettei Dec 22, 2024
1b56342
MemoryPlanner should produce the same result
hikettei Dec 22, 2024
e77703a
Fix for unroll reminder
hikettei Dec 22, 2024
0e9fff8
try unrolling tiled bands
hikettei Dec 22, 2024
6674d7b
ignore tiled band dims
hikettei Dec 22, 2024
ce60a52
base-item
hikettei Dec 22, 2024
5ba4fa3
remove: caten/polyhedral
hikettei Dec 22, 2024
26a4a66
clean up
hikettei Dec 22, 2024
e63ac07
rem dep
hikettei Dec 22, 2024
546d0c4
caten instead
hikettei Dec 22, 2024
f73c49f
updt
hikettei Dec 22, 2024
04cb131
updt
hikettei Dec 22, 2024
c832f5e
updt
hikettei Dec 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
move everything into auto-scheduler
  • Loading branch information
hikettei committed Dec 21, 2024
commit 4c2de7041bd4300b9065bf6b634f1ed71c92a132
6 changes: 2 additions & 4 deletions external/backends/metal.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@
(:use :cl :caten/air :cffi :caten/codegen/renderer :caten/codegen/helpers
:caten/codegen/shape-inference :caten/avm :caten/codegen/expr :cl-metal)
(:import-from
:caten/polyhedral
#:define-auto-scheduler
#:make-schedule-options))
:caten/codegen/config
#:define-auto-scheduler))

(in-package :caten/metal)

Expand All @@ -17,7 +16,6 @@

(define-auto-scheduler
(Metal-Auto-Scheduler ())
:cost-functions '(:validity :proximity :coincidence)
:n-global-loop 3)
(define-hook-auto-scheduler (Metal-Renderer Metal-Auto-Scheduler))
(defmethod initialize-instance :after ((metal Metal-Renderer) &rest initargs &key &allow-other-keys)
Expand Down
12 changes: 10 additions & 2 deletions source/codegen/auto-scheduler/auto-scheduler.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,15 @@
(:use :cl))

(in-package :caten/codegen/auto-scheduler)

(defun auto-schedule ()
;; Scheduling commands
;; - [ ] apply-tile
;; - [ ] apply-parallel
;; - [ ] apply-collapse
;; - [ ] apply-fuse
;; - [ ] apply-interchange
;; - [ ] apply-vectorize
;; Purpose: get a list of optimal scheduling commands
;; Note: 積極的にISL ASTとBlueprintを変換しながら変形を施していく
(defun auto-schedule (item)

)
36 changes: 36 additions & 0 deletions source/codegen/auto-scheduler/config.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
(defpackage :caten/codegen/config
(:use :cl)
(:export
#:Auto-Scheduler-Config
#:define-auto-scheduler
#:auto-scheduler-n-global-loops
#:auto-scheduler-tile-size))

(in-package :caten/codegen/config)

(defclass Auto-Scheduler-Config ()
((n-global-loops :type fixnum :accessor auto-scheduler-n-global-loops)
(auto-scheduler-tile-size :type fixnum :accessor auto-scheduler-tile-size))
(:documentation ""))

(defmethod print-object ((config Auto-Scheduler-Config) stream)
(print-unreadable-object (config stream :type t)
(format stream " N-Global-Loops | ~a |~%" (slot-value config 'n-global-loops))))

(defmacro define-auto-scheduler ((name (&rest args))
&key
(n-global-loop 0)
(tile-size 0)
(documentation ""))
"define-auto-scheduler"
(let ((instance (gensym)))
`(progn
(defclass ,name (Auto-Scheduler-Config)
nil
(:documentation ,documentation))
(defun ,name (,@args)
(let ((,instance (make-instance ',name)))
(setf
(auto-scheduler-n-global-loops ,instance) ,n-global-loop
(auto-scheduler-tile-size ,instance) ,tile-size)
,instance)))))
24 changes: 23 additions & 1 deletion source/codegen/auto-scheduler/polyhedral.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
#:poly-schedule
#:poly-domain
#:poly-dependencies
#:map-schedule-nodes))
#:map-schedule-nodes
#:->ast))

(in-package :caten/codegen/polyhedral)

Expand Down Expand Up @@ -166,3 +167,24 @@ This function returns a list of the results of applying f to each node. NIL is e
(when (= (length next-nodes) 0) (return-from map-search))
(setf node (pop next-nodes)))
(nreverse outputs)))

(defun gid (n) (intern (format nil "_gid~a" n)))

(defmethod ->ast ((poly Polyhedral-IR) rank)
(macrolet ((set-option (name level)
`(foreign-funcall ,(format nil "isl_options_set_~(~a~)" name)
:pointer (isl::context-handle isl::*context*)
:int ,level
:void)))
(set-option "ast_build_exploit_nested_bounds" 1)
(set-option "ast_build_detect_min_max" 1)
(set-option "ast_build_scale_strides" 1)
(set-option "ast_build_allow_else" 0)
(set-option "ast_build_allow_or" 0))
(let* ((schedule (isl:schedule-set-options (isl:copy (poly-schedule poly)) :separate))
(ast-build (isl:ast-build-from-context (isl:set-from-str "{:}")))
(rank (* 2 rank)) ;; rank * tile_bands * vectorizing
(ast-build (isl:ast-build-set-iterators ast-build (apply #'isl:make-id-list (loop for i upfrom 0 below rank collect (gid i)))))
(ast-build (isl:ast-build-set-options ast-build (isl:union-map-from-str "{}")))
(ast-build-node (isl:ast-build-node-from-schedule ast-build schedule)))
ast-build-node))
39 changes: 39 additions & 0 deletions source/codegen/auto-scheduler/unroll.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
(defpackage :caten/codegen/unroll
(:use :cl :caten/aasm :caten/air :caten/codegen/polyhedral)
(:export #:apply-packed-funcall))

(in-package :caten/codegen/unroll)

(defun schedule-apply-schedule-option (si idx)
(declare (type Polyhedral-IR si))
(let ((bands (map-schedule-nodes #'(lambda (type node) (when (eql type :schedule-node-band) node)) si)))
(print bands)
))

(defun apply-packed-funcall (schedule-node gid unroll-by)
"Groups the iteration into several packed-funcall.
Packed-Funcall can be also transformed into Unrolling, or Vectorizing.
For example, the following code:
```
for (int i=0; i<a; i++) {
T0(c0);
}
```
is mutated into:
```
for (int i=0; i<(a-UNROLL_BY); i+=UNROLL_BY) {
[packed_funcall]
{ T0(c0+0)
T0'(c0, 0~4) = { T0(c0+1)
{ T0(c0+2)
{ T0(c0+3)
}
for (int i=a - (mod a UNROLL_BY); i<a; i+=1) {
T0(c0) // Loop Reminder (TODO: Optimize Index Computation)
}
```
"
(declare (type node schedule-node) (type integer unroll-by) (type symbol gid))
(schedule-apply-schedule-option (getattr schedule-node :polyhedral) nil)

)
6 changes: 6 additions & 0 deletions source/codegen/auto-scheduler/vectorize.lisp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
(defpackage :caten/codegen/vectorize
(:use :cl))

(in-package :caten/codegen/vectorize)

(defgeneric %mutate-vectorize (op))
9 changes: 3 additions & 6 deletions source/codegen/backends/clang.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,13 @@
(:use :cl :caten/air :cffi :caten/codegen/renderer :caten/codegen/helpers
:caten/codegen/shape-inference :caten/avm :caten/codegen/expr)
(:import-from
:caten/polyhedral
#:define-auto-scheduler
#:make-schedule-options))
:caten/codegen/config
#:define-auto-scheduler))

(in-package :caten/codegen/backends/clang)

(define-auto-scheduler (Clang-Auto-Scheduler (&key (fuse-softmax 0) (n-global-loop (1- (ctx:getenv :OMP)))))
(define-auto-scheduler (Clang-Auto-Scheduler (&key (n-global-loop (1- (ctx:getenv :OMP)))))
;; Use outermost loop parallelism for maximize memory locality (better softmax/layernorm scheduling)
:schedule-option (make-schedule-options :schedule-outer-coincidence fuse-softmax)
:cost-functions '(:validity :proximity :coincidence)
:n-global-loop n-global-loop ;; OMP=1 -> The outermost loop is GLOBAL, otherwise everything is a local loop
:tile-size 32 ;; [TODO] Automatic Parameter Tuning
)
Expand Down
7 changes: 4 additions & 3 deletions source/codegen/scop.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -305,16 +305,17 @@ Reference: https://www.researchgate.net/publication/347152973_PET-to-MLIR_A_poly
(assert (eql (node-type node) :Schedule-Item))
(assert (getattr node :blueprint) () "Cannot create a domain w/o lowered blueprint")
(multiple-value-bind (domain read write schedule) (analyze-scop node (map 'list #'car (Getattr node :dynamic-shapes)))
(setf (getattr node :polyhedral) (make-polyhedral-ir domain read write schedule))
(setf (getattr node :polyhedral) (caten/codegen/polyhedral:make-polyhedral-ir domain read write schedule))
(when (>= (ctx:getenv :JIT_DEBUG) 2)
(format t "~a~%" (getattr node :polyhedral)))
node)))

(defmethod auto-schedule (auto-scheduler (node Node))
(assert (getattr node :polyhedral))
(caten/polyhedral:auto-schedule auto-scheduler (getattr node :polyhedral))
;;(caten/polyhedral:auto-schedule auto-scheduler (getattr node :polyhedral))
;; (caten/codegen/unroll::apply-packed-funcall node nil 4)
;; Load blueprint from optimized polyhedral IR
(setf (getattr node :blueprint)
(lower-into-bp-from-polyhedral
(caten/polyhedral:->ast (getattr node :polyhedral) (getattr node :rank))
(caten/codegen/polyhedral:->ast (getattr node :polyhedral) (getattr node :rank))
node)))
1 change: 1 addition & 0 deletions source/isl/package.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,7 @@
#:isl-printer-to-str
;; from schedule-node.lisp
#:schedule-get-root
#:schedule-node-band-set-ast-build-options
#:schedule-node-delete
#:schedule-node-first-child
#:schedule-node-insert-mark
Expand Down
5 changes: 5 additions & 0 deletions source/isl/schedule-node.lisp
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,8 @@
(define-isl-function schedule-node-first-child %isl-schedule-node-first-child
(:give schedule-node)
(:take schedule-node))

(define-isl-function schedule-node-band-set-ast-build-options %isl-schedule-node-band-set-ast-build-options
(:give schedule-node)
(:take schedule-node)
(:take union-set))
Loading