-
Notifications
You must be signed in to change notification settings - Fork 77
/
Copy pathamd64nt.asm
466 lines (432 loc) · 15 KB
/
amd64nt.asm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
;**************************************************************************
;* *
;* OCaml *
;* *
;* Xavier Leroy, projet Gallium, INRIA Rocquencourt *
;* *
;* Copyright 2006 Institut National de Recherche en Informatique et *
;* en Automatique. *
;* *
;* All rights reserved. This file is distributed under the terms of *
;* the GNU Lesser General Public License version 2.1, with the *
;* special exception on linking described in the file LICENSE. *
;* *
;**************************************************************************
; Asm part of the runtime system, AMD64 processor, Intel syntax
; Notes on Win64 calling conventions:
; function arguments in RCX, RDX, R8, R9 / XMM0 - XMM3
; caller must reserve 32 bytes of stack space
; callee must preserve RBX, RBP, RSI, RDI, R12-R15, XMM6-XMM15
EXTRN caml_garbage_collection: NEAR
EXTRN caml_apply2: NEAR
EXTRN caml_apply3: NEAR
EXTRN caml_program: NEAR
EXTRN caml_array_bound_error: NEAR
EXTRN caml_array_align_error: NEAR
EXTRN caml_stash_backtrace: NEAR
INCLUDE domain_state64.inc
.CODE
PUBLIC caml_system__code_begin
caml_system__code_begin:
ret ; just one instruction, so that debuggers don't display
; caml_system__code_begin instead of caml_call_gc
; Allocation
PUBLIC caml_call_gc
ALIGN 16
caml_call_gc:
; Record lowest stack address and return address
mov r11, [rsp]
Store_last_return_address r11
lea r11, [rsp+8]
Store_bottom_of_stack r11
; Touch the stack to trigger a recoverable segfault
; if insufficient space remains
sub rsp, 01000h
mov [rsp], r11
add rsp, 01000h
; Save young_ptr
Store_young_ptr r15
; Build array of registers, save it into Caml_state(gc_regs)
push rbp
push r11
push r10
push r13
push r12
push r9
push r8
push rcx
push rdx
push rsi
push rdi
push rbx
push rax
Store_gc_regs rsp
; Save floating-point registers
sub rsp, 16*16
movupd QWORD PTR [rsp + 0*16], xmm0
movupd QWORD PTR [rsp + 1*16], xmm1
movupd QWORD PTR [rsp + 2*16], xmm2
movupd QWORD PTR [rsp + 3*16], xmm3
movupd QWORD PTR [rsp + 4*16], xmm4
movupd QWORD PTR [rsp + 5*16], xmm5
movupd QWORD PTR [rsp + 6*16], xmm6
movupd QWORD PTR [rsp + 7*16], xmm7
movupd QWORD PTR [rsp + 8*16], xmm8
movupd QWORD PTR [rsp + 9*16], xmm9
movupd QWORD PTR [rsp + 10*16], xmm10
movupd QWORD PTR [rsp + 11*16], xmm11
movupd QWORD PTR [rsp + 12*16], xmm12
movupd QWORD PTR [rsp + 13*16], xmm13
movupd QWORD PTR [rsp + 14*16], xmm14
movupd QWORD PTR [rsp + 15*16], xmm15
; Call the garbage collector
sub rsp, 32 ; PR#5008: bottom 32 bytes are reserved for callee
call caml_garbage_collection
add rsp, 32 ; PR#5008
; Restore all regs used by the code generator
movupd xmm0, QWORD PTR [rsp + 0*16]
movupd xmm1, QWORD PTR [rsp + 1*16]
movupd xmm2, QWORD PTR [rsp + 2*16]
movupd xmm3, QWORD PTR [rsp + 3*16]
movupd xmm4, QWORD PTR [rsp + 4*16]
movupd xmm5, QWORD PTR [rsp + 5*16]
movupd xmm6, QWORD PTR [rsp + 6*16]
movupd xmm7, QWORD PTR [rsp + 7*16]
movupd xmm8, QWORD PTR [rsp + 8*16]
movupd xmm9, QWORD PTR [rsp + 9*16]
movupd xmm10, QWORD PTR [rsp + 10*16]
movupd xmm11, QWORD PTR [rsp + 11*16]
movupd xmm12, QWORD PTR [rsp + 12*16]
movupd xmm13, QWORD PTR [rsp + 13*16]
movupd xmm14, QWORD PTR [rsp + 14*16]
movupd xmm15, QWORD PTR [rsp + 15*16]
add rsp, 16*16
pop rax
pop rbx
pop rdi
pop rsi
pop rdx
pop rcx
pop r8
pop r9
pop r12
pop r13
pop r10
pop r11
pop rbp
; Restore Caml_state(young_ptr)
Load_young_ptr r15
; Return to caller
ret
PUBLIC caml_alloc1
ALIGN 16
caml_alloc1:
sub r15, 16
Cmp_young_limit r15
jb caml_call_gc
ret
PUBLIC caml_alloc2
ALIGN 16
caml_alloc2:
sub r15, 24
Cmp_young_limit r15
jb caml_call_gc
ret
PUBLIC caml_alloc3
ALIGN 16
caml_alloc3:
sub r15, 32
Cmp_young_limit r15
jb caml_call_gc
ret
PUBLIC caml_allocN
ALIGN 16
caml_allocN:
Cmp_young_limit r15
jb caml_call_gc
ret
; Call a C function from OCaml
PUBLIC caml_c_call
ALIGN 16
caml_c_call:
; Record lowest stack address and return address
pop r12
Store_last_return_address r12
Store_bottom_of_stack rsp
; Touch the stack to trigger a recoverable segfault
; if insufficient space remains
sub rsp, 01000h
mov [rsp], rax
add rsp, 01000h
; Make the alloc ptr available to the C code
Store_young_ptr r15
; Call the function (address in rax)
call rax
; Reload alloc ptr
Load_young_ptr r15
; Return to caller
push r12
ret
; Start the OCaml program
PUBLIC caml_start_program
ALIGN 16
caml_start_program:
; Save callee-save registers
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 8+10*16 ; stack 16-aligned + 10 saved xmm regs
movapd OWORD PTR [rsp + 0*16], xmm6
movapd OWORD PTR [rsp + 1*16], xmm7
movapd OWORD PTR [rsp + 2*16], xmm8
movapd OWORD PTR [rsp + 3*16], xmm9
movapd OWORD PTR [rsp + 4*16], xmm10
movapd OWORD PTR [rsp + 5*16], xmm11
movapd OWORD PTR [rsp + 6*16], xmm12
movapd OWORD PTR [rsp + 7*16], xmm13
movapd OWORD PTR [rsp + 8*16], xmm14
movapd OWORD PTR [rsp + 9*16], xmm15
; First argument (rcx) is Caml_state. Load it in r14
mov r14, rcx
; Initial entry point is caml_program
lea r12, caml_program
; Common code for caml_start_program and caml_callback*
L106:
; Build a callback link
sub rsp, 8 ; stack 16-aligned
Push_gc_regs
Push_last_return_address
Push_bottom_of_stack
; Setup alloc ptr
Load_young_ptr r15
; Build an exception handler
lea r13, L108
push r13
Push_exception_pointer
Store_exception_pointer rsp
; Call the OCaml code
call r12
L107:
; Pop the exception handler
Pop_exception_pointer
pop r12 ; dummy register
L109:
; Update alloc ptr
Store_young_ptr r15
; Pop the callback restoring, link the global variables
Pop_bottom_of_stack
Pop_last_return_address
Pop_gc_regs
add rsp, 8
; Restore callee-save registers.
movapd xmm6, OWORD PTR [rsp + 0*16]
movapd xmm7, OWORD PTR [rsp + 1*16]
movapd xmm8, OWORD PTR [rsp + 2*16]
movapd xmm9, OWORD PTR [rsp + 3*16]
movapd xmm10, OWORD PTR [rsp + 4*16]
movapd xmm11, OWORD PTR [rsp + 5*16]
movapd xmm12, OWORD PTR [rsp + 6*16]
movapd xmm13, OWORD PTR [rsp + 7*16]
movapd xmm14, OWORD PTR [rsp + 8*16]
movapd xmm15, OWORD PTR [rsp + 9*16]
add rsp, 8+10*16
pop r15
pop r14
pop r13
pop r12
pop rdi
pop rsi
pop rbp
pop rbx
; Return to caller
ret
L108:
; Exception handler
; Mark the bucket as an exception result and return it
or rax, 2
jmp L109
; Raise an exception from OCaml
PUBLIC caml_raise_exn
ALIGN 16
caml_raise_exn:
Load_backtrace_active r11
test r11, 1
jne L110
Load_exception_pointer rsp ; Cut stack
; Recover previous exception handler
Pop_exception_pointer
ret ; Branch to handler
L110:
mov r12, rax ; Save exception bucket in r12
mov rcx, rax ; Arg 1: exception bucket
mov rdx, [rsp] ; Arg 2: PC of raise
lea r8, [rsp+8] ; Arg 3: SP of raise
Load_exception_pointer r9 ; Arg 4: SP of handler
sub rsp, 32 ; Reserve 32 bytes on stack
call caml_stash_backtrace
mov rax, r12 ; Recover exception bucket
Load_exception_pointer rsp ; Cut stack
; Recover previous exception handler
Pop_exception_pointer
ret ; Branch to handler
; Raise an exception from C
PUBLIC caml_raise_exception
ALIGN 16
caml_raise_exception:
mov r14, rcx ; First argument is Caml_state
Load_backtrace_active r11
test r11, 1
jne L112
mov rax, rdx ; Second argument is exn bucket
Load_exception_pointer rsp
; Recover previous exception handler
Pop_exception_pointer
Load_young_ptr r15 ; Reload alloc ptr
ret
L112:
mov r12, rdx ; Save exception bucket in r12
mov rcx, rdx ; Arg 1: exception bucket
Load_last_return_address rdx ; Arg 2: PC of raise
Load_bottom_of_stack r8 ; Arg 3: SP of raise
Load_exception_pointer r9 ; Arg 4: SP of handler
sub rsp, 32 ; Reserve 32 bytes on stack
call caml_stash_backtrace
mov rax, r12 ; Recover exception bucket
Load_exception_pointer rsp
; Recover previous exception handler
Pop_exception_pointer
Load_young_ptr r15; Reload alloc ptr
ret
; Callback from C to OCaml
PUBLIC caml_callback_asm
ALIGN 16
caml_callback_asm:
; Save callee-save registers
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 8+10*16 ; stack 16-aligned + 10 saved xmm regs
movapd OWORD PTR [rsp + 0*16], xmm6
movapd OWORD PTR [rsp + 1*16], xmm7
movapd OWORD PTR [rsp + 2*16], xmm8
movapd OWORD PTR [rsp + 3*16], xmm9
movapd OWORD PTR [rsp + 4*16], xmm10
movapd OWORD PTR [rsp + 5*16], xmm11
movapd OWORD PTR [rsp + 6*16], xmm12
movapd OWORD PTR [rsp + 7*16], xmm13
movapd OWORD PTR [rsp + 8*16], xmm14
movapd OWORD PTR [rsp + 9*16], xmm15
; Initial loading of arguments
mov r14, rcx ; Caml_state
mov rbx, rdx ; closure
mov rax, [r8] ; argument
mov r12, [rbx] ; code pointer
jmp L106
PUBLIC caml_callback2_asm
ALIGN 16
caml_callback2_asm:
; Save callee-save registers
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 8+10*16 ; stack 16-aligned + 10 saved xmm regs
movapd OWORD PTR [rsp + 0*16], xmm6
movapd OWORD PTR [rsp + 1*16], xmm7
movapd OWORD PTR [rsp + 2*16], xmm8
movapd OWORD PTR [rsp + 3*16], xmm9
movapd OWORD PTR [rsp + 4*16], xmm10
movapd OWORD PTR [rsp + 5*16], xmm11
movapd OWORD PTR [rsp + 6*16], xmm12
movapd OWORD PTR [rsp + 7*16], xmm13
movapd OWORD PTR [rsp + 8*16], xmm14
movapd OWORD PTR [rsp + 9*16], xmm15
; Initial loading of arguments
mov r14, rcx ; Caml_state
mov rdi, rdx ; closure
mov rax, [r8] ; first argument
mov rbx, [r8 + 8] ; second argument
lea r12, caml_apply2 ; code pointer
jmp L106
PUBLIC caml_callback3_asm
ALIGN 16
caml_callback3_asm:
; Save callee-save registers
push rbx
push rbp
push rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 8+10*16 ; stack 16-aligned + 10 saved xmm regs
movapd OWORD PTR [rsp + 0*16], xmm6
movapd OWORD PTR [rsp + 1*16], xmm7
movapd OWORD PTR [rsp + 2*16], xmm8
movapd OWORD PTR [rsp + 3*16], xmm9
movapd OWORD PTR [rsp + 4*16], xmm10
movapd OWORD PTR [rsp + 5*16], xmm11
movapd OWORD PTR [rsp + 6*16], xmm12
movapd OWORD PTR [rsp + 7*16], xmm13
movapd OWORD PTR [rsp + 8*16], xmm14
movapd OWORD PTR [rsp + 9*16], xmm15
; Initial loading of arguments
mov r14, rcx ; Caml_state
mov rsi, rdx ; closure
mov rax, [r8] ; first argument
mov rbx, [r8 + 8] ; second argument
mov rdi, [r8 + 16] ; third argument
lea r12, caml_apply3 ; code pointer
jmp L106
PUBLIC caml_ml_array_bound_error
ALIGN 16
caml_ml_array_bound_error:
lea rax, caml_array_bound_error
jmp caml_c_call
PUBLIC caml_ml_array_align_error
ALIGN 16
caml_ml_array_align_error:
lea rax, caml_array_align_error
jmp caml_c_call
PUBLIC caml_system__code_end
caml_system__code_end:
.DATA
PUBLIC caml_system__frametable
caml_system__frametable LABEL QWORD
QWORD 1 ; one descriptor
DWORD L107 - THIS BYTE ; return address into callback
WORD -1 ; negative frame size => use callback link
WORD 0 ; no roots here
ALIGN 8
PUBLIC caml_negf_mask
ALIGN 16
caml_negf_mask LABEL QWORD
QWORD 8000000000000000H, 0
PUBLIC caml_absf_mask
ALIGN 16
caml_absf_mask LABEL QWORD
QWORD 7FFFFFFFFFFFFFFFH, 0FFFFFFFFFFFFFFFFH
PUBLIC caml_negf32_mask
ALIGN 16
caml_negf32_mask LABEL QWORD
QWORD 80000000H, 0
PUBLIC caml_absf32_mask
ALIGN 16
caml_absf32_mask LABEL QWORD
QWORD FFFFFFFF7FFFFFFFH, 0FFFFFFFFFFFFFFFFH
END