@@ -233,6 +233,17 @@ bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
233
233
const Value *Val,
234
234
ArrayRef<Register> VRegs,
235
235
Register SwiftErrorVReg) const {
236
+
237
+ // Check if a tail call was lowered in this block. If so, we already handled
238
+ // the terminator.
239
+ MachineFunction &MF = MIRBuilder.getMF ();
240
+ if (MF.getFrameInfo ().hasTailCall ()) {
241
+ MachineBasicBlock &MBB = MIRBuilder.getMBB ();
242
+ auto FirstTerm = MBB.getFirstTerminator ();
243
+ if (FirstTerm != MBB.end () && FirstTerm->isCall ())
244
+ return true ;
245
+ }
246
+
236
247
auto MIB = MIRBuilder.buildInstrNoInsert (AArch64::RET_ReallyLR);
237
248
assert (((Val && !VRegs.empty ()) || (!Val && VRegs.empty ())) &&
238
249
" Return value without a vreg" );
@@ -403,6 +414,129 @@ bool AArch64CallLowering::lowerFormalArguments(
403
414
return true ;
404
415
}
405
416
417
+ // / Return true if the calling convention is one that we can guarantee TCO for.
418
+ static bool canGuaranteeTCO (CallingConv::ID CC) {
419
+ return CC == CallingConv::Fast;
420
+ }
421
+
422
+ // / Return true if we might ever do TCO for calls with this calling convention.
423
+ static bool mayTailCallThisCC (CallingConv::ID CC) {
424
+ switch (CC) {
425
+ case CallingConv::C:
426
+ case CallingConv::PreserveMost:
427
+ case CallingConv::Swift:
428
+ return true ;
429
+ default :
430
+ return canGuaranteeTCO (CC);
431
+ }
432
+ }
433
+
434
+ bool AArch64CallLowering::isEligibleForTailCallOptimization (
435
+ MachineIRBuilder &MIRBuilder, CallLoweringInfo &Info) const {
436
+ CallingConv::ID CalleeCC = Info.CallConv ;
437
+ MachineFunction &MF = MIRBuilder.getMF ();
438
+ const Function &CallerF = MF.getFunction ();
439
+ CallingConv::ID CallerCC = CallerF.getCallingConv ();
440
+ bool CCMatch = CallerCC == CalleeCC;
441
+
442
+ LLVM_DEBUG (dbgs () << " Attempting to lower call as tail call\n " );
443
+
444
+ if (!mayTailCallThisCC (CalleeCC)) {
445
+ LLVM_DEBUG (dbgs () << " ... Calling convention cannot be tail called.\n " );
446
+ return false ;
447
+ }
448
+
449
+ if (Info.IsVarArg ) {
450
+ LLVM_DEBUG (dbgs () << " ... Tail calling varargs not supported yet.\n " );
451
+ return false ;
452
+ }
453
+
454
+ // Byval parameters hand the function a pointer directly into the stack area
455
+ // we want to reuse during a tail call. Working around this *is* possible (see
456
+ // X86).
457
+ //
458
+ // FIXME: In AArch64ISelLowering, this isn't worked around. Can/should we try
459
+ // it?
460
+ //
461
+ // On Windows, "inreg" attributes signify non-aggregate indirect returns.
462
+ // In this case, it is necessary to save/restore X0 in the callee. Tail
463
+ // call opt interferes with this. So we disable tail call opt when the
464
+ // caller has an argument with "inreg" attribute.
465
+ //
466
+ // FIXME: Check whether the callee also has an "inreg" argument.
467
+ if (any_of (CallerF.args (), [](const Argument &A) {
468
+ return A.hasByValAttr () || A.hasInRegAttr ();
469
+ })) {
470
+ LLVM_DEBUG (dbgs () << " ... Cannot tail call from callers with byval or "
471
+ " inreg arguments.\n " );
472
+ return false ;
473
+ }
474
+
475
+ // Externally-defined functions with weak linkage should not be
476
+ // tail-called on AArch64 when the OS does not support dynamic
477
+ // pre-emption of symbols, as the AAELF spec requires normal calls
478
+ // to undefined weak functions to be replaced with a NOP or jump to the
479
+ // next instruction. The behaviour of branch instructions in this
480
+ // situation (as used for tail calls) is implementation-defined, so we
481
+ // cannot rely on the linker replacing the tail call with a return.
482
+ if (Info.Callee .isGlobal ()) {
483
+ const GlobalValue *GV = Info.Callee .getGlobal ();
484
+ const Triple &TT = MF.getTarget ().getTargetTriple ();
485
+ if (GV->hasExternalWeakLinkage () &&
486
+ (!TT.isOSWindows () || TT.isOSBinFormatELF () ||
487
+ TT.isOSBinFormatMachO ())) {
488
+ LLVM_DEBUG (dbgs () << " ... Cannot tail call externally-defined function "
489
+ " with weak linkage for this OS.\n " );
490
+ return false ;
491
+ }
492
+ }
493
+
494
+ // If we have -tailcallopt and matching CCs, at this point, we could return
495
+ // true. However, we don't have full tail call support yet. So, continue
496
+ // checking. We want to emit a sibling call.
497
+
498
+ // I want anyone implementing a new calling convention to think long and hard
499
+ // about this assert.
500
+ assert ((!Info.IsVarArg || CalleeCC == CallingConv::C) &&
501
+ " Unexpected variadic calling convention" );
502
+
503
+ // For now, only support the case where the calling conventions match.
504
+ if (!CCMatch) {
505
+ LLVM_DEBUG (
506
+ dbgs ()
507
+ << " ... Cannot tail call with mismatched calling conventions yet.\n " );
508
+ return false ;
509
+ }
510
+
511
+ // For now, only handle callees that take no arguments.
512
+ if (!Info.OrigArgs .empty ()) {
513
+ LLVM_DEBUG (
514
+ dbgs ()
515
+ << " ... Cannot tail call callees with outgoing arguments yet.\n " );
516
+ return false ;
517
+ }
518
+
519
+ LLVM_DEBUG (
520
+ dbgs () << " ... Call is eligible for tail call optimization.\n " );
521
+ return true ;
522
+ }
523
+
524
+ static unsigned getCallOpcode (const Function &CallerF, bool IsIndirect,
525
+ bool IsTailCall) {
526
+ if (!IsTailCall)
527
+ return IsIndirect ? AArch64::BLR : AArch64::BL;
528
+
529
+ if (!IsIndirect)
530
+ return AArch64::TCRETURNdi;
531
+
532
+ // When BTI is enabled, we need to use TCRETURNriBTI to make sure that we use
533
+ // x16 or x17.
534
+ if (CallerF.hasFnAttribute (" branch-target-enforcement" ))
535
+ return AArch64::TCRETURNriBTI;
536
+
537
+ return AArch64::TCRETURNri;
538
+ }
539
+
406
540
bool AArch64CallLowering::lowerCall (MachineIRBuilder &MIRBuilder,
407
541
CallLoweringInfo &Info) const {
408
542
MachineFunction &MF = MIRBuilder.getMF ();
@@ -411,6 +545,7 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
411
545
auto &DL = F.getParent ()->getDataLayout ();
412
546
413
547
if (Info.IsMustTailCall ) {
548
+ // TODO: Until we lower all tail calls, we should fall back on this.
414
549
LLVM_DEBUG (dbgs () << " Cannot lower musttail calls yet.\n " );
415
550
return false ;
416
551
}
@@ -423,21 +558,45 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
423
558
SplitArgs.back ().Flags [0 ].setZExt ();
424
559
}
425
560
561
+ bool IsSibCall =
562
+ Info.IsTailCall && isEligibleForTailCallOptimization (MIRBuilder, Info);
563
+ if (IsSibCall)
564
+ MF.getFrameInfo ().setHasTailCall ();
565
+
426
566
// Find out which ABI gets to decide where things go.
427
567
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
428
568
CCAssignFn *AssignFnFixed =
429
569
TLI.CCAssignFnForCall (Info.CallConv , /* IsVarArg=*/ false );
430
570
CCAssignFn *AssignFnVarArg =
431
571
TLI.CCAssignFnForCall (Info.CallConv , /* IsVarArg=*/ true );
432
572
433
- auto CallSeqStart = MIRBuilder.buildInstr (AArch64::ADJCALLSTACKDOWN);
573
+ // If we have a sibling call, then we don't have to adjust the stack.
574
+ // Otherwise, we need to adjust it.
575
+ MachineInstrBuilder CallSeqStart;
576
+ if (!IsSibCall)
577
+ CallSeqStart = MIRBuilder.buildInstr (AArch64::ADJCALLSTACKDOWN);
434
578
435
579
// Create a temporarily-floating call instruction so we can add the implicit
436
580
// uses of arg registers.
437
- auto MIB = MIRBuilder.buildInstrNoInsert (Info.Callee .isReg () ? AArch64::BLR
438
- : AArch64::BL);
581
+ unsigned Opc = getCallOpcode (F, Info.Callee .isReg (), IsSibCall);
582
+
583
+ // TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
584
+ // register class. Until we can do that, we should fall back here.
585
+ if (Opc == AArch64::TCRETURNriBTI) {
586
+ LLVM_DEBUG (
587
+ dbgs () << " Cannot lower indirect tail calls with BTI enabled yet.\n " );
588
+ return false ;
589
+ }
590
+
591
+ auto MIB = MIRBuilder.buildInstrNoInsert (Opc);
439
592
MIB.add (Info.Callee );
440
593
594
+ // Add the byte offset for the tail call. We only have sibling calls, so this
595
+ // is always 0.
596
+ // TODO: Handle tail calls where we will have a different value here.
597
+ if (IsSibCall)
598
+ MIB.addImm (0 );
599
+
441
600
// Tell the call which registers are clobbered.
442
601
auto TRI = MF.getSubtarget <AArch64Subtarget>().getRegisterInfo ();
443
602
const uint32_t *Mask = TRI->getCallPreservedMask (MF, F.getCallingConv ());
@@ -486,10 +645,13 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
486
645
MIRBuilder.buildCopy (Info.SwiftErrorVReg , Register (AArch64::X21));
487
646
}
488
647
489
- CallSeqStart.addImm (Handler.StackSize ).addImm (0 );
490
- MIRBuilder.buildInstr (AArch64::ADJCALLSTACKUP)
491
- .addImm (Handler.StackSize )
492
- .addImm (0 );
648
+ if (!IsSibCall) {
649
+ // If we aren't sibcalling, we need to move the stack.
650
+ CallSeqStart.addImm (Handler.StackSize ).addImm (0 );
651
+ MIRBuilder.buildInstr (AArch64::ADJCALLSTACKUP)
652
+ .addImm (Handler.StackSize )
653
+ .addImm (0 );
654
+ }
493
655
494
656
return true ;
495
657
}
0 commit comments