-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathmemmove-power.s
170 lines (154 loc) · 2.43 KB
/
memmove-power.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#define BDNZ BC 16,0,
TEXT memcpy(SB), $0
BR move
TEXT memmove(SB), $0
move:
/*
* performance:
* (tba)
*/
MOVW R3, s1+0(FP)
MOVW n+8(FP), R9 /* R9 is count */
MOVW R3, R10 /* R10 is to-pointer */
CMP R9, $0
BEQ ret
BLT trap
MOVW s2+4(FP), R11 /* R11 is from-pointer */
/*
* if no more than 16 bytes, just use one lsw/stsw
*/
CMP R9, $16
BLE fout
ADD R9,R11, R13 /* R13 is end from-pointer */
ADD R9,R10, R12 /* R12 is end to-pointer */
/*
* easiest test is copy backwards if
* destination string has higher mem address
*/
CMPU R10, R11
BGT back
/*
* test if both pointers
* are similarly word aligned
*/
XOR R10,R11, R7
ANDCC $3,R7
BNE fbad
/*
* move a few bytes to align pointers
*/
ANDCC $3,R10,R7
BEQ f2
SUBC R7, $4, R7
SUB R7, R9
MOVW R7, XER
LSW (R11), R16
ADD R7, R11
STSW R16, (R10)
ADD R7, R10
/*
* turn R14 into doubleword count
* copy 16 bytes at a time while there's room.
*/
f2:
SRAWCC $4, R9, R14
BLE fout
MOVW R14, CTR
SUB $4, R11
SUB $4, R10
f3:
MOVWU 4(R11), R16
MOVWU R16, 4(R10)
MOVWU 4(R11), R17
MOVWU R17, 4(R10)
MOVWU 4(R11), R16
MOVWU R16, 4(R10)
MOVWU 4(R11), R17
MOVWU R17, 4(R10)
BDNZ f3
RLWNMCC $0, R9, $15, R9 /* residue */
BEQ ret
ADD $4, R11
ADD $4, R10
/*
* move up to 16 bytes through R16 .. R19; aligned and unaligned
*/
fout:
MOVW R9, XER
LSW (R11), R16
STSW R16, (R10)
BR ret
/*
* loop for unaligned copy, then copy up to 15 remaining bytes
*/
fbad:
SRAWCC $4, R9, R14
BLE f6
MOVW R14, CTR
f5:
LSW (R11), $16, R16
ADD $16, R11
STSW R16, $16, (R10)
ADD $16, R10
BDNZ f5
RLWNMCC $0, R9, $15, R9 /* residue */
BEQ ret
f6:
MOVW R9, XER
LSW (R11), R16
STSW R16, (R10)
BR ret
/*
* whole thing repeated for backwards
*/
back:
CMP R9, $4
BLT bout
XOR R12,R13, R7
ANDCC $3,R7
BNE bout
b1:
ANDCC $3,R13, R7
BEQ b2
MOVBZU -1(R13), R16
MOVBZU R16, -1(R12)
SUB $1, R9
BR b1
b2:
SRAWCC $4, R9, R14
BLE b4
MOVW R14, CTR
b3:
MOVWU -4(R13), R16
MOVWU R16, -4(R12)
MOVWU -4(R13), R17
MOVWU R17, -4(R12)
MOVWU -4(R13), R16
MOVWU R16, -4(R12)
MOVWU -4(R13), R17
MOVWU R17, -4(R12)
BDNZ b3
RLWNMCC $0, R9, $15, R9 /* residue */
BEQ ret
b4:
SRAWCC $2, R9, R14
BLE bout
MOVW R14, CTR
b5:
MOVWU -4(R13), R16
MOVWU R16, -4(R12)
BDNZ b5
RLWNMCC $0, R9, $3, R9 /* residue */
BEQ ret
bout:
CMPU R13, R11
BLE ret
MOVBZU -1(R13), R16
MOVBZU R16, -1(R12)
BR bout
trap:
/* MOVW $0, R0 */
MOVW R0, 0(R0)
ret:
MOVW s1+0(FP), R3
RETURN