@@ -40,12 +40,26 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
40
40
#else
41
41
cmp r1 , # 1
42
42
bcc LOCAL_LABEL(divby0)
43
+ #if __ARM_ARCH_ISA_THUMB == 1
44
+ bne LOCAL_LABEL(num_neq_denom)
45
+ JMP (lr)
46
+ LOCAL_LABEL(num_neq_denom):
47
+ #else
43
48
IT(eq)
44
49
JMPc(lr , eq)
50
+ #endif
45
51
cmp r0 , r1
52
+ #if __ARM_ARCH_ISA_THUMB == 1
53
+ bhs LOCAL_LABEL(num_ge_denom)
54
+ movs r0 , # 0
55
+ JMP (lr)
56
+ LOCAL_LABEL(num_ge_denom):
57
+ #else
46
58
ITT(cc)
47
59
movcc r0 , # 0
48
60
JMPc(lr , cc)
61
+ #endif
62
+
49
63
/ *
50
64
* Implement division using binary long division algorithm.
51
65
*
@@ -62,7 +76,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
62
76
* th at (r0 << shift) < 2 * r1. The quotient is stored in r3.
63
77
* /
64
78
65
- # ifdef __ARM_FEATURE_CLZ
79
+ # if defined( __ARM_FEATURE_CLZ)
66
80
clz ip , r0
67
81
clz r3 , r1
68
82
/ * r0 >= r1 implies clz(r0) <= clz(r1) , so ip <= r3. * /
@@ -77,49 +91,128 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
77
91
sub ip , ip , r3 , lsl # 3
78
92
mov r3 , # 0
79
93
bx ip
80
- # else
94
+ # else / * No CLZ Feature * /
81
95
# if __ARM_ARCH_ISA_THUMB == 2
82
96
# error THUMB mode requires CLZ or UDIV
83
97
# endif
98
+ # if __ARM_ARCH_ISA_THUMB == 1
99
+ # define BLOCK_SIZE 10
100
+ # else
101
+ # define BLOCK_SIZE 12
102
+ # endif
103
+
84
104
mov r2 , r0
105
+ # if __ARM_ARCH_ISA_THUMB == 1
106
+ mov ip , r0
107
+ adr r0 , LOCAL_LABEL(div0block)
108
+ adds r0 , # 1
109
+ # else
85
110
adr ip , LOCAL_LABEL(div0block)
86
-
87
- lsr r3 , r2 , # 16
111
+ # endif
112
+ lsrs r3 , r2 , # 16
88
113
cmp r3 , r1
114
+ # if __ARM_ARCH_ISA_THUMB == 1
115
+ blo LOCAL_LABEL(skip_16)
116
+ movs r2 , r3
117
+ subs r0 , r0 , #( 16 * BLOCK_SIZE)
118
+ LOCAL_LABEL(skip_16):
119
+ # else
89
120
movhs r2 , r3
90
- subhs ip , ip , #( 16 * 12 )
121
+ subhs ip , ip , #( 16 * BLOCK_SIZE)
122
+ # endif
91
123
92
- lsr r3 , r2 , # 8
124
+ lsrs r3 , r2 , # 8
93
125
cmp r3 , r1
126
+ # if __ARM_ARCH_ISA_THUMB == 1
127
+ blo LOCAL_LABEL(skip_8)
128
+ movs r2 , r3
129
+ subs r0 , r0 , #( 8 * BLOCK_SIZE)
130
+ LOCAL_LABEL(skip_8):
131
+ # else
94
132
movhs r2 , r3
95
- subhs ip , ip , #( 8 * 12 )
133
+ subhs ip , ip , #( 8 * BLOCK_SIZE)
134
+ # endif
96
135
97
- lsr r3 , r2 , # 4
136
+ lsrs r3 , r2 , # 4
98
137
cmp r3 , r1
138
+ # if __ARM_ARCH_ISA_THUMB == 1
139
+ blo LOCAL_LABEL(skip_4)
140
+ movs r2 , r3
141
+ subs r0 , r0 , #( 4 * BLOCK_SIZE)
142
+ LOCAL_LABEL(skip_4):
143
+ # else
99
144
movhs r2 , r3
100
- subhs ip , #( 4 * 12 )
145
+ subhs ip , #( 4 * BLOCK_SIZE)
146
+ # endif
101
147
102
- lsr r3 , r2 , # 2
148
+ lsrs r3 , r2 , # 2
103
149
cmp r3 , r1
150
+ # if __ARM_ARCH_ISA_THUMB == 1
151
+ blo LOCAL_LABEL(skip_2)
152
+ movs r2 , r3
153
+ subs r0 , r0 , #( 2 * BLOCK_SIZE)
154
+ LOCAL_LABEL(skip_2):
155
+ # else
104
156
movhs r2 , r3
105
- subhs ip , ip , #( 2 * 12 )
157
+ subhs ip , ip , #( 2 * BLOCK_SIZE)
158
+ # endif
106
159
107
160
/ * Last block , no need to update r2 or r3. * /
161
+ # if __ARM_ARCH_ISA_THUMB == 1
162
+ lsrs r3 , r2 , # 1
163
+ cmp r3 , r1
164
+ blo LOCAL_LABEL(skip_1)
165
+ subs r0 , r0 , #( 1 * BLOCK_SIZE)
166
+ LOCAL_LABEL(skip_1):
167
+ movs r2 , r0
168
+ mov r0 , ip
169
+ movs r3 , # 0
170
+ JMP (r2)
171
+
172
+ # else
108
173
cmp r1 , r2 , lsr # 1
109
- subls ip , ip , #( 1 * 12 )
174
+ subls ip , ip , #( 1 * BLOCK_SIZE )
110
175
111
- mov r3 , # 0
176
+ movs r3 , # 0
112
177
113
178
JMP ( ip )
114
- # endif
179
+ # endif
180
+ # endif / * __ARM_FEATURE_CLZ * /
181
+
115
182
116
183
#define IMM #
184
+ / * due to the range limit of branch in Thumb1 , we have to place the
185
+ block closer * /
186
+ LOCAL_LABEL(divby0):
187
+ movs r0 , # 0
188
+ # if defined(__ARM_EABI__)
189
+ bl __aeabi_idiv0 // due to relocation limit , can't use b.
190
+ # endif
191
+ JMP (lr)
117
192
193
+
194
+ #if __ARM_ARCH_ISA_THUMB == 1
195
+ #define block(shift) \
196
+ lsls r2 , r1 , IMM shift ; \
197
+ cmp r0 , r2 ; \
198
+ blo LOCAL_LABEL(block_skip_##shift) ; \
199
+ subs r0 , r0 , r2 ; \
200
+ LOCAL_LABEL(block_skip_##shift) : ; \
201
+ adcs r3 , r3 / * same as ((r3 << 1 ) | Carry). Carry is set if r0 >= r2. * /
202
+
203
+ / * TODO: if current location counter is not not word aligned , we don't
204
+ need the .p2align and nop * /
205
+ / * Label div0block must be word - aligned. First align block 31 * /
206
+ .p2align 2
207
+ nop / * Padding to align div0block as 31 blocks = 310 bytes * /
208
+
209
+ #else
118
210
#define block(shift) \
119
211
cmp r0 , r1 , lsl IMM shift ; \
120
212
ITT(hs) ; \
121
213
WIDE(addhs) r3 , r3 , IMM ( 1 << shift) ; \
122
214
WIDE(subhs) r0 , r0 , r1 , lsl IMM shift
215
+ #endif
123
216
124
217
block( 31 )
125
218
block( 30 )
@@ -159,12 +252,14 @@ LOCAL_LABEL(div0block):
159
252
JMP (lr)
160
253
#endif / * __ARM_ARCH_EXT_IDIV__ * /
161
254
255
+ #if __ARM_ARCH_EXT_IDIV__
162
256
LOCAL_LABEL(divby0):
163
- mov r0 , # 0
164
- #ifdef __ARM_EABI__
165
- b __aeabi_idiv0
166
- #else
167
- JMP (lr)
257
+ mov r0 , # 0
258
+ # ifdef __ARM_EABI__
259
+ b __aeabi_idiv0
260
+ # else
261
+ JMP (lr)
262
+ # endif
168
263
#endif
169
264
170
265
END_COMPILERRT_FUNCTION(__udivsi3)
0 commit comments