Skip to content

Commit b78324f

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Teach AArch64CallLowering to handle basic sibling calls
This adds support for basic sibling call lowering in AArch64. The intent here is to only handle tail calls which do not change the ABI (hence, sibling calls.) At this point, it is very restricted. It does not handle - Vararg calls. - Calls with outgoing arguments. - Calls whose calling conventions differ from the caller's calling convention. - Tail/sibling calls with BTI enabled. This patch adds - `AArch64CallLowering::isEligibleForTailCallOptimization`, which is equivalent to the same function in AArch64ISelLowering.cpp (albeit with the restrictions above.) - `mayTailCallThisCC` and `canGuaranteeTCO`, which are identical to those in AArch64ISelLowering.cpp. - `getCallOpcode`, which is exactly what it sounds like. Tail/sibling calls are lowered by checking if they pass target-independent tail call positioning checks, and checking if they satisfy `isEligibleForTailCallOptimization`. If they do, then a tail call instruction is emitted instead of a normal call. If we have a sibling call (which is always the case in this patch), then we do not emit any stack adjustment operations. When we go to lower a return, we check if we've already emitted a tail call. If so, then we skip the return lowering. For testing, this patch - Adds call-translator-tail-call.ll to test which tail calls we currently lower, which ones we don't, and which ones we shouldn't. - Updates branch-target-enforcement-indirect-calls.ll to show that we fall back as expected. Differential Revision: https://reviews.llvm.org/D67189 llvm-svn: 370996
1 parent 4be6706 commit b78324f

File tree

7 files changed

+321
-9
lines changed

7 files changed

+321
-9
lines changed

llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ class CallLowering {
9090

9191
/// True if the call must be tail call optimized.
9292
bool IsMustTailCall = false;
93+
94+
/// True if the call passes all target-independent checks for tail call
95+
/// optimization.
96+
bool IsTailCall = false;
97+
98+
/// True if the call is to a vararg function.
99+
bool IsVarArg = false;
93100
};
94101

95102
/// Argument handling is mostly uniform between the four places that

llvm/lib/CodeGen/GlobalISel/CallLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
6464
Info.CallConv = CS.getCallingConv();
6565
Info.SwiftErrorVReg = SwiftErrorVReg;
6666
Info.IsMustTailCall = CS.isMustTailCall();
67-
67+
Info.IsTailCall = CS.isTailCall() &&
68+
isInTailCallPosition(CS, MIRBuilder.getMF().getTarget());
69+
Info.IsVarArg = CS.getFunctionType()->isVarArg();
6870
return lowerCall(MIRBuilder, Info);
6971
}
7072

llvm/lib/Target/AArch64/AArch64CallLowering.cpp

Lines changed: 169 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,17 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
233233
const Value *Val,
234234
ArrayRef<Register> VRegs,
235235
Register SwiftErrorVReg) const {
236+
237+
// Check if a tail call was lowered in this block. If so, we already handled
238+
// the terminator.
239+
MachineFunction &MF = MIRBuilder.getMF();
240+
if (MF.getFrameInfo().hasTailCall()) {
241+
MachineBasicBlock &MBB = MIRBuilder.getMBB();
242+
auto FirstTerm = MBB.getFirstTerminator();
243+
if (FirstTerm != MBB.end() && FirstTerm->isCall())
244+
return true;
245+
}
246+
236247
auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
237248
assert(((Val && !VRegs.empty()) || (!Val && VRegs.empty())) &&
238249
"Return value without a vreg");
@@ -403,6 +414,129 @@ bool AArch64CallLowering::lowerFormalArguments(
403414
return true;
404415
}
405416

417+
/// Return true if the calling convention is one that we can guarantee TCO for.
418+
static bool canGuaranteeTCO(CallingConv::ID CC) {
419+
return CC == CallingConv::Fast;
420+
}
421+
422+
/// Return true if we might ever do TCO for calls with this calling convention.
423+
static bool mayTailCallThisCC(CallingConv::ID CC) {
424+
switch (CC) {
425+
case CallingConv::C:
426+
case CallingConv::PreserveMost:
427+
case CallingConv::Swift:
428+
return true;
429+
default:
430+
return canGuaranteeTCO(CC);
431+
}
432+
}
433+
434+
bool AArch64CallLowering::isEligibleForTailCallOptimization(
435+
MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const {
436+
CallingConv::ID CalleeCC = Info.CallConv;
437+
MachineFunction &MF = MIRBuilder.getMF();
438+
const Function &CallerF = MF.getFunction();
439+
CallingConv::ID CallerCC = CallerF.getCallingConv();
440+
bool CCMatch = CallerCC == CalleeCC;
441+
442+
LLVM_DEBUG(dbgs() << "Attempting to lower call as tail call\n");
443+
444+
if (!mayTailCallThisCC(CalleeCC)) {
445+
LLVM_DEBUG(dbgs() << "... Calling convention cannot be tail called.\n");
446+
return false;
447+
}
448+
449+
if (Info.IsVarArg) {
450+
LLVM_DEBUG(dbgs() << "... Tail calling varargs not supported yet.\n");
451+
return false;
452+
}
453+
454+
// Byval parameters hand the function a pointer directly into the stack area
455+
// we want to reuse during a tail call. Working around this *is* possible (see
456+
// X86).
457+
//
458+
// FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
459+
// it?
460+
//
461+
// On Windows, "inreg" attributes signify non-aggregate indirect returns.
462+
// In this case, it is necessary to save/restore X0 in the callee. Tail
463+
// call opt interferes with this. So we disable tail call opt when the
464+
// caller has an argument with "inreg" attribute.
465+
//
466+
// FIXME: Check whether the callee also has an "inreg" argument.
467+
if (any_of(CallerF.args(), [](const Argument &A) {
468+
return A.hasByValAttr() || A.hasInRegAttr();
469+
})) {
470+
LLVM_DEBUG(dbgs() << "... Cannot tail call from callers with byval or "
471+
"inreg arguments.\n");
472+
return false;
473+
}
474+
475+
// Externally-defined functions with weak linkage should not be
476+
// tail-called on AArch64 when the OS does not support dynamic
477+
// pre-emption of symbols, as the AAELF spec requires normal calls
478+
// to undefined weak functions to be replaced with a NOP or jump to the
479+
// next instruction. The behaviour of branch instructions in this
480+
// situation (as used for tail calls) is implementation-defined, so we
481+
// cannot rely on the linker replacing the tail call with a return.
482+
if (Info.Callee.isGlobal()) {
483+
const GlobalValue *GV = Info.Callee.getGlobal();
484+
const Triple &TT = MF.getTarget().getTargetTriple();
485+
if (GV->hasExternalWeakLinkage() &&
486+
(!TT.isOSWindows() || TT.isOSBinFormatELF() ||
487+
TT.isOSBinFormatMachO())) {
488+
LLVM_DEBUG(dbgs() << "... Cannot tail call externally-defined function "
489+
"with weak linkage for this OS.\n");
490+
return false;
491+
}
492+
}
493+
494+
// If we have -tailcallopt and matching CCs, at this point, we could return
495+
// true. However, we don't have full tail call support yet. So, continue
496+
// checking. We want to emit a sibling call.
497+
498+
// I want anyone implementing a new calling convention to think long and hard
499+
// about this assert.
500+
assert((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
501+
"Unexpected variadic calling convention");
502+
503+
// For now, only support the case where the calling conventions match.
504+
if (!CCMatch) {
505+
LLVM_DEBUG(
506+
dbgs()
507+
<< "... Cannot tail call with mismatched calling conventions yet.\n");
508+
return false;
509+
}
510+
511+
// For now, only handle callees that take no arguments.
512+
if (!Info.OrigArgs.empty()) {
513+
LLVM_DEBUG(
514+
dbgs()
515+
<< "... Cannot tail call callees with outgoing arguments yet.\n");
516+
return false;
517+
}
518+
519+
LLVM_DEBUG(
520+
dbgs() << "... Call is eligible for tail call optimization.\n");
521+
return true;
522+
}
523+
524+
static unsigned getCallOpcode(const Function &CallerF, bool IsIndirect,
525+
bool IsTailCall) {
526+
if (!IsTailCall)
527+
return IsIndirect ? AArch64::BLR : AArch64::BL;
528+
529+
if (!IsIndirect)
530+
return AArch64::TCRETURNdi;
531+
532+
// When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
533+
// x16 or x17.
534+
if (CallerF.hasFnAttribute("branch-target-enforcement"))
535+
return AArch64::TCRETURNriBTI;
536+
537+
return AArch64::TCRETURNri;
538+
}
539+
406540
bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
407541
CallLoweringInfo &Info) const {
408542
MachineFunction &MF = MIRBuilder.getMF();
@@ -411,6 +545,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
411545
auto &DL = F.getParent()->getDataLayout();
412546

413547
if (Info.IsMustTailCall) {
548+
// TODO: Until we lower all tail calls, we should fall back on this.
414549
LLVM_DEBUG(dbgs() << "Cannot lower musttail calls yet.\n");
415550
return false;
416551
}
@@ -423,21 +558,45 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
423558
SplitArgs.back().Flags[0].setZExt();
424559
}
425560

561+
bool IsSibCall =
562+
Info.IsTailCall && isEligibleForTailCallOptimization(MIRBuilder, Info);
563+
if (IsSibCall)
564+
MF.getFrameInfo().setHasTailCall();
565+
426566
// Find out which ABI gets to decide where things go.
427567
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
428568
CCAssignFn *AssignFnFixed =
429569
TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/false);
430570
CCAssignFn *AssignFnVarArg =
431571
TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
432572

433-
auto CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
573+
// If we have a sibling call, then we don't have to adjust the stack.
574+
// Otherwise, we need to adjust it.
575+
MachineInstrBuilder CallSeqStart;
576+
if (!IsSibCall)
577+
CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
434578

435579
// Create a temporarily-floating call instruction so we can add the implicit
436580
// uses of arg registers.
437-
auto MIB = MIRBuilder.buildInstrNoInsert(Info.Callee.isReg() ? AArch64::BLR
438-
: AArch64::BL);
581+
unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), IsSibCall);
582+
583+
// TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
584+
// register class. Until we can do that, we should fall back here.
585+
if (Opc == AArch64::TCRETURNriBTI) {
586+
LLVM_DEBUG(
587+
dbgs() << "Cannot lower indirect tail calls with BTI enabled yet.\n");
588+
return false;
589+
}
590+
591+
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
439592
MIB.add(Info.Callee);
440593

594+
// Add the byte offset for the tail call. We only have sibling calls, so this
595+
// is always 0.
596+
// TODO: Handle tail calls where we will have a different value here.
597+
if (IsSibCall)
598+
MIB.addImm(0);
599+
441600
// Tell the call which registers are clobbered.
442601
auto TRI = MF.getSubtarget<AArch64Subtarget>().getRegisterInfo();
443602
const uint32_t *Mask = TRI->getCallPreservedMask(MF, F.getCallingConv());
@@ -486,10 +645,13 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
486645
MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
487646
}
488647

489-
CallSeqStart.addImm(Handler.StackSize).addImm(0);
490-
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
491-
.addImm(Handler.StackSize)
492-
.addImm(0);
648+
if (!IsSibCall) {
649+
// If we aren't sibcalling, we need to move the stack.
650+
CallSeqStart.addImm(Handler.StackSize).addImm(0);
651+
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
652+
.addImm(Handler.StackSize)
653+
.addImm(0);
654+
}
493655

494656
return true;
495657
}

llvm/lib/Target/AArch64/AArch64CallLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ class AArch64CallLowering: public CallLowering {
4343
bool lowerCall(MachineIRBuilder &MIRBuilder,
4444
CallLoweringInfo &Info) const override;
4545

46+
/// Returns true if the call can be lowered as a tail call.
47+
bool isEligibleForTailCallOptimization(MachineIRBuilder &MIRBuilder,
48+
CallLoweringInfo &Info) const;
49+
4650
bool supportSwiftError() const override { return true; }
4751

4852
private:
Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-apple-darwin -global-isel -o - 2>&1 | FileCheck %s --check-prefixes=DARWIN,COMMON
3+
; RUN: llc %s -stop-after=irtranslator -verify-machineinstrs -mtriple aarch64-windows -global-isel -o - 2>&1 | FileCheck %s --check-prefixes=WINDOWS,COMMON
4+
5+
declare void @simple_fn()
6+
define void @tail_call() {
7+
; COMMON-LABEL: name: tail_call
8+
; COMMON: bb.1 (%ir-block.0):
9+
; COMMON: TCRETURNdi @simple_fn, 0, csr_aarch64_aapcs, implicit $sp
10+
tail call void @simple_fn()
11+
ret void
12+
}
13+
14+
; We should get a TCRETURNri here.
15+
; FIXME: We don't need the COPY.
16+
define void @indirect_tail_call(void()* %func) {
17+
; COMMON-LABEL: name: indirect_tail_call
18+
; COMMON: bb.1 (%ir-block.0):
19+
; COMMON: liveins: $x0
20+
; COMMON: [[COPY:%[0-9]+]]:tcgpr64(p0) = COPY $x0
21+
; COMMON: TCRETURNri [[COPY]](p0), 0, csr_aarch64_aapcs, implicit $sp
22+
tail call void %func()
23+
ret void
24+
}
25+
26+
declare void @outgoing_args_fn(i32)
27+
; Right now, callees with outgoing arguments should not be tail called.
28+
; TODO: Support this.
29+
define void @test_outgoing_args(i32 %a) {
30+
; COMMON-LABEL: name: test_outgoing_args
31+
; COMMON: bb.1 (%ir-block.0):
32+
; COMMON: liveins: $w0
33+
; COMMON: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
34+
; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
35+
; COMMON: $w0 = COPY [[COPY]](s32)
36+
; COMMON: BL @outgoing_args_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0
37+
; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
38+
; COMMON: RET_ReallyLR
39+
tail call void @outgoing_args_fn(i32 %a)
40+
ret void
41+
}
42+
43+
; Right now, this should not be tail called.
44+
; TODO: Support this.
45+
declare void @varargs(i32, double, i64, ...)
46+
define void @test_varargs() {
47+
; COMMON-LABEL: name: test_varargs
48+
; COMMON: bb.1 (%ir-block.0):
49+
; COMMON: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 42
50+
; COMMON: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
51+
; COMMON: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12
52+
; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
53+
; COMMON: $w0 = COPY [[C]](s32)
54+
; COMMON: $d0 = COPY [[C1]](s64)
55+
; COMMON: $x1 = COPY [[C2]](s64)
56+
; COMMON: BL @varargs, csr_aarch64_aapcs, implicit-def $lr, implicit $sp, implicit $w0, implicit $d0, implicit $x1
57+
; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
58+
; COMMON: RET_ReallyLR
59+
tail call void(i32, double, i64, ...) @varargs(i32 42, double 1.0, i64 12)
60+
ret void
61+
}
62+
63+
; Unsupported calling convention for tail calls. Make sure we never tail call
64+
; it.
65+
declare ghccc void @bad_call_conv_fn()
66+
define void @test_bad_call_conv() {
67+
; COMMON-LABEL: name: test_bad_call_conv
68+
; COMMON: bb.1 (%ir-block.0):
69+
; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
70+
; COMMON: BL @bad_call_conv_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
71+
; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
72+
; COMMON: RET_ReallyLR
73+
tail call ghccc void @bad_call_conv_fn()
74+
ret void
75+
}
76+
77+
; Shouldn't tail call when the caller has byval arguments.
78+
define void @test_byval(i8* byval %ptr) {
79+
; COMMON-LABEL: name: test_byval
80+
; COMMON: bb.1 (%ir-block.0):
81+
; COMMON: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
82+
; COMMON: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.0, align 1)
83+
; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
84+
; COMMON: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
85+
; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
86+
; COMMON: RET_ReallyLR
87+
tail call void @simple_fn()
88+
ret void
89+
}
90+
91+
; Shouldn't tail call when the caller has inreg arguments.
92+
define void @test_inreg(i8* inreg %ptr) {
93+
; COMMON-LABEL: name: test_inreg
94+
; COMMON: bb.1 (%ir-block.0):
95+
; COMMON: liveins: $x0
96+
; COMMON: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
97+
; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
98+
; COMMON: BL @simple_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
99+
; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
100+
; COMMON: RET_ReallyLR
101+
tail call void @simple_fn()
102+
ret void
103+
}
104+
105+
; Shouldn't tail call when the OS doesn't support it. Windows supports this,
106+
; so we should be able to tail call there.
107+
declare extern_weak void @extern_weak_fn()
108+
define void @test_extern_weak() {
109+
; DARWIN-LABEL: name: test_extern_weak
110+
; DARWIN: bb.1 (%ir-block.0):
111+
; DARWIN: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
112+
; DARWIN: BL @extern_weak_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
113+
; DARWIN: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
114+
; DARWIN: RET_ReallyLR
115+
; WINDOWS-LABEL: name: test_extern_weak
116+
; WINDOWS: bb.1 (%ir-block.0):
117+
; WINDOWS: TCRETURNdi @extern_weak_fn, 0, csr_aarch64_aapcs, implicit $sp
118+
tail call void @extern_weak_fn()
119+
ret void
120+
}
121+
122+
; Right now, mismatched calling conventions should not be tail called.
123+
; TODO: Support this.
124+
declare fastcc void @fast_fn()
125+
define void @test_mismatched_caller() {
126+
; COMMON-LABEL: name: test_mismatched_caller
127+
; COMMON: bb.1 (%ir-block.0):
128+
; COMMON: ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
129+
; COMMON: BL @fast_fn, csr_aarch64_aapcs, implicit-def $lr, implicit $sp
130+
; COMMON: ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
131+
; COMMON: RET_ReallyLR
132+
tail call fastcc void @fast_fn()
133+
ret void
134+
}

0 commit comments

Comments
 (0)