Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 2fb759f

Browse files
author
Weiming Zhao
committed
builtins: Add ARM Thumb1 implementation for uidiv and uidivmod
This is a resubmit of r288710 due to breakage of Darwin armv7em. git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@288777 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent a358984 commit 2fb759f

File tree

3 files changed

+131
-20
lines changed

3 files changed

+131
-20
lines changed

lib/builtins/arm/aeabi_uidivmod.S

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,20 @@
2323
.syntax unified
2424
.p2align 2
2525
DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
26+
#if __ARM_ARCH_ISA_THUMB == 1
27+
cmp r0, r1
28+
bcc LOCAL_LABEL(case_denom_larger)
29+
push {r0, r1, lr}
30+
bl SYMBOL_NAME(__aeabi_uidiv)
31+
pop {r1, r2, r3}
32+
muls r2, r2, r0 // r2 = quot * denom
33+
subs r1, r1, r2
34+
JMP (r3)
35+
LOCAL_LABEL(case_denom_larger):
36+
movs r1, r0
37+
movs r0, #0
38+
JMP (lr)
39+
#else
2640
push { lr }
2741
sub sp, sp, #4
2842
mov r2, sp
@@ -35,6 +49,7 @@ DEFINE_COMPILERRT_FUNCTION(__aeabi_uidivmod)
3549
ldr r1, [sp]
3650
add sp, sp, #4
3751
pop { pc }
52+
#endif
3853
END_COMPILERRT_FUNCTION(__aeabi_uidivmod)
3954

4055
NO_EXEC_STACK_DIRECTIVE

lib/builtins/arm/udivsi3.S

Lines changed: 114 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -40,12 +40,26 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
4040
#else
4141
cmp r1, #1
4242
bcc LOCAL_LABEL(divby0)
43+
#if __ARM_ARCH_ISA_THUMB == 1
44+
bne LOCAL_LABEL(num_neq_denom)
45+
JMP(lr)
46+
LOCAL_LABEL(num_neq_denom):
47+
#else
4348
IT(eq)
4449
JMPc(lr, eq)
50+
#endif
4551
cmp r0, r1
52+
#if __ARM_ARCH_ISA_THUMB == 1
53+
bhs LOCAL_LABEL(num_ge_denom)
54+
movs r0, #0
55+
JMP(lr)
56+
LOCAL_LABEL(num_ge_denom):
57+
#else
4658
ITT(cc)
4759
movcc r0, #0
4860
JMPc(lr, cc)
61+
#endif
62+
4963
/*
5064
* Implement division using binary long division algorithm.
5165
*
@@ -62,7 +76,7 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
6276
* that (r0 << shift) < 2 * r1. The quotient is stored in r3.
6377
*/
6478

65-
# ifdef __ARM_FEATURE_CLZ
79+
# if defined(__ARM_FEATURE_CLZ)
6680
clz ip, r0
6781
clz r3, r1
6882
/* r0 >= r1 implies clz(r0) <= clz(r1), so ip <= r3. */
@@ -77,49 +91,128 @@ DEFINE_COMPILERRT_FUNCTION(__udivsi3)
7791
sub ip, ip, r3, lsl #3
7892
mov r3, #0
7993
bx ip
80-
# else
94+
# else /* No CLZ Feature */
8195
# if __ARM_ARCH_ISA_THUMB == 2
8296
# error THUMB mode requires CLZ or UDIV
8397
# endif
98+
# if __ARM_ARCH_ISA_THUMB == 1
99+
# define BLOCK_SIZE 10
100+
# else
101+
# define BLOCK_SIZE 12
102+
# endif
103+
84104
mov r2, r0
105+
# if __ARM_ARCH_ISA_THUMB == 1
106+
mov ip, r0
107+
adr r0, LOCAL_LABEL(div0block)
108+
adds r0, #1
109+
# else
85110
adr ip, LOCAL_LABEL(div0block)
86-
87-
lsr r3, r2, #16
111+
# endif
112+
lsrs r3, r2, #16
88113
cmp r3, r1
114+
# if __ARM_ARCH_ISA_THUMB == 1
115+
blo LOCAL_LABEL(skip_16)
116+
movs r2, r3
117+
subs r0, r0, #(16 * BLOCK_SIZE)
118+
LOCAL_LABEL(skip_16):
119+
# else
89120
movhs r2, r3
90-
subhs ip, ip, #(16 * 12)
121+
subhs ip, ip, #(16 * BLOCK_SIZE)
122+
# endif
91123

92-
lsr r3, r2, #8
124+
lsrs r3, r2, #8
93125
cmp r3, r1
126+
# if __ARM_ARCH_ISA_THUMB == 1
127+
blo LOCAL_LABEL(skip_8)
128+
movs r2, r3
129+
subs r0, r0, #(8 * BLOCK_SIZE)
130+
LOCAL_LABEL(skip_8):
131+
# else
94132
movhs r2, r3
95-
subhs ip, ip, #(8 * 12)
133+
subhs ip, ip, #(8 * BLOCK_SIZE)
134+
# endif
96135

97-
lsr r3, r2, #4
136+
lsrs r3, r2, #4
98137
cmp r3, r1
138+
# if __ARM_ARCH_ISA_THUMB == 1
139+
blo LOCAL_LABEL(skip_4)
140+
movs r2, r3
141+
subs r0, r0, #(4 * BLOCK_SIZE)
142+
LOCAL_LABEL(skip_4):
143+
# else
99144
movhs r2, r3
100-
subhs ip, #(4 * 12)
145+
subhs ip, #(4 * BLOCK_SIZE)
146+
# endif
101147

102-
lsr r3, r2, #2
148+
lsrs r3, r2, #2
103149
cmp r3, r1
150+
# if __ARM_ARCH_ISA_THUMB == 1
151+
blo LOCAL_LABEL(skip_2)
152+
movs r2, r3
153+
subs r0, r0, #(2 * BLOCK_SIZE)
154+
LOCAL_LABEL(skip_2):
155+
# else
104156
movhs r2, r3
105-
subhs ip, ip, #(2 * 12)
157+
subhs ip, ip, #(2 * BLOCK_SIZE)
158+
# endif
106159

107160
/* Last block, no need to update r2 or r3. */
161+
# if __ARM_ARCH_ISA_THUMB == 1
162+
lsrs r3, r2, #1
163+
cmp r3, r1
164+
blo LOCAL_LABEL(skip_1)
165+
subs r0, r0, #(1 * BLOCK_SIZE)
166+
LOCAL_LABEL(skip_1):
167+
movs r2, r0
168+
mov r0, ip
169+
movs r3, #0
170+
JMP (r2)
171+
172+
# else
108173
cmp r1, r2, lsr #1
109-
subls ip, ip, #(1 * 12)
174+
subls ip, ip, #(1 * BLOCK_SIZE)
110175

111-
mov r3, #0
176+
movs r3, #0
112177

113178
JMP(ip)
114-
# endif
179+
# endif
180+
# endif /* __ARM_FEATURE_CLZ */
181+
115182

116183
#define IMM #
184+
/* due to the range limit of branch in Thumb1, we have to place the
185+
block closer */
186+
LOCAL_LABEL(divby0):
187+
movs r0, #0
188+
# if defined(__ARM_EABI__)
189+
bl __aeabi_idiv0 // due to relocation limit, can't use b.
190+
# endif
191+
JMP(lr)
117192

193+
194+
#if __ARM_ARCH_ISA_THUMB == 1
195+
#define block(shift) \
196+
lsls r2, r1, IMM shift; \
197+
cmp r0, r2; \
198+
blo LOCAL_LABEL(block_skip_##shift); \
199+
subs r0, r0, r2; \
200+
LOCAL_LABEL(block_skip_##shift) :; \
201+
adcs r3, r3 /* same as ((r3 << 1) | Carry). Carry is set if r0 >= r2. */
202+
203+
/* TODO: if current location counter is not not word aligned, we don't
204+
need the .p2align and nop */
205+
/* Label div0block must be word-aligned. First align block 31 */
206+
.p2align 2
207+
nop /* Padding to align div0block as 31 blocks = 310 bytes */
208+
209+
#else
118210
#define block(shift) \
119211
cmp r0, r1, lsl IMM shift; \
120212
ITT(hs); \
121213
WIDE(addhs) r3, r3, IMM (1 << shift); \
122214
WIDE(subhs) r0, r0, r1, lsl IMM shift
215+
#endif
123216

124217
block(31)
125218
block(30)
@@ -159,12 +252,14 @@ LOCAL_LABEL(div0block):
159252
JMP(lr)
160253
#endif /* __ARM_ARCH_EXT_IDIV__ */
161254

255+
#if __ARM_ARCH_EXT_IDIV__
162256
LOCAL_LABEL(divby0):
163-
mov r0, #0
164-
#ifdef __ARM_EABI__
165-
b __aeabi_idiv0
166-
#else
167-
JMP(lr)
257+
mov r0, #0
258+
# ifdef __ARM_EABI__
259+
b __aeabi_idiv0
260+
# else
261+
JMP(lr)
262+
# endif
168263
#endif
169264

170265
END_COMPILERRT_FUNCTION(__udivsi3)

lib/builtins/assembly.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,8 @@
7171
#define ARM_HAS_BX
7272
#endif
7373
#if !defined(__ARM_FEATURE_CLZ) && \
74-
(__ARM_ARCH >= 6 || (__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
74+
((__ARM_ARCH >= 6 && __ARM_ARCH_PROFILE != 'M') || \
75+
(__ARM_ARCH == 5 && !defined(__ARM_ARCH_5__)))
7576
#define __ARM_FEATURE_CLZ
7677
#endif
7778

0 commit comments

Comments
 (0)