240
240
#include " llvm/Support/CommandLine.h"
241
241
#include " llvm/Support/Debug.h"
242
242
#include " llvm/Support/ErrorHandling.h"
243
+ #include " llvm/Support/FormatVariadic.h"
243
244
#include " llvm/Support/MathExtras.h"
244
245
#include " llvm/Support/raw_ostream.h"
245
246
#include " llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
275
276
// Stack hazard padding size. 0 = disabled.
276
277
static cl::opt<unsigned > StackHazardSize (" aarch64-stack-hazard-size" ,
277
278
cl::init (0 ), cl::Hidden);
279
+ // Stack hazard size for analysis remarks. StackHazardSize takes precedence.
280
+ static cl::opt<unsigned >
281
+ StackHazardRemarkSize (" aarch64-stack-hazard-remark-size" , cl::init(0 ),
282
+ cl::Hidden);
278
283
// Whether to insert padding into non-streaming functions (for testing).
279
284
static cl::opt<bool >
280
285
StackHazardInNonStreaming (" aarch64-stack-hazard-in-non-streaming" ,
@@ -2616,9 +2621,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
2616
2621
const auto &MFI = MF.getFrameInfo ();
2617
2622
2618
2623
int64_t ObjectOffset = MFI.getObjectOffset (FI);
2624
+ StackOffset SVEStackSize = getSVEStackSize (MF);
2625
+
2626
+ // For VLA-area objects, just emit an offset at the end of the stack frame.
2627
+ // Whilst not quite correct, these objects do live at the end of the frame and
2628
+ // so it is more useful for analysis for the offset to reflect this.
2629
+ if (MFI.isVariableSizedObjectIndex (FI)) {
2630
+ return StackOffset::getFixed (-((int64_t )MFI.getStackSize ())) - SVEStackSize;
2631
+ }
2619
2632
2620
2633
// This is correct in the absence of any SVE stack objects.
2621
- StackOffset SVEStackSize = getSVEStackSize (MF);
2622
2634
if (!SVEStackSize)
2623
2635
return StackOffset::getFixed (ObjectOffset - getOffsetOfLocalArea ());
2624
2636
@@ -3529,13 +3541,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
3529
3541
return true ;
3530
3542
}
3531
3543
3532
- // Return the FrameID for a Load/Store instruction by looking at the MMO.
3533
- static std::optional<int > getLdStFrameID (const MachineInstr &MI,
3534
- const MachineFrameInfo &MFI) {
3535
- if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
3536
- return std::nullopt;
3537
-
3538
- MachineMemOperand *MMO = *MI.memoperands_begin ();
3544
+ // Return the FrameID for a MMO.
3545
+ static std::optional<int > getMMOFrameID (MachineMemOperand *MMO,
3546
+ const MachineFrameInfo &MFI) {
3539
3547
auto *PSV =
3540
3548
dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue ());
3541
3549
if (PSV)
@@ -3553,6 +3561,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
3553
3561
return std::nullopt;
3554
3562
}
3555
3563
3564
+ // Return the FrameID for a Load/Store instruction by looking at the first MMO.
3565
+ static std::optional<int > getLdStFrameID (const MachineInstr &MI,
3566
+ const MachineFrameInfo &MFI) {
3567
+ if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
3568
+ return std::nullopt;
3569
+
3570
+ return getMMOFrameID (*MI.memoperands_begin (), MFI);
3571
+ }
3572
+
3556
3573
// Check if a Hazard slot is needed for the current function, and if so create
3557
3574
// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
3558
3575
// which can be used to determine if any hazard padding is needed.
@@ -5030,3 +5047,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
5030
5047
MI->eraseFromParent ();
5031
5048
}
5032
5049
}
5050
+
5051
+ struct StackAccess {
5052
+ enum AccessType {
5053
+ NotAccessed = 0 , // Stack object not accessed by load/store instructions.
5054
+ GPR = 1 << 0 , // A general purpose register.
5055
+ PPR = 1 << 1 , // A predicate register.
5056
+ FPR = 1 << 2 , // A floating point/Neon/SVE register.
5057
+ };
5058
+
5059
+ int Idx;
5060
+ StackOffset Offset;
5061
+ int64_t Size ;
5062
+ unsigned AccessTypes;
5063
+
5064
+ StackAccess () : Idx(0 ), Offset(), Size (0 ), AccessTypes(NotAccessed) {}
5065
+
5066
+ bool operator <(const StackAccess &Rhs) const {
5067
+ return std::make_tuple (start (), Idx) <
5068
+ std::make_tuple (Rhs.start (), Rhs.Idx );
5069
+ }
5070
+
5071
+ bool isCPU () const {
5072
+ // Predicate register load and store instructions execute on the CPU.
5073
+ return AccessTypes & (AccessType::GPR | AccessType::PPR);
5074
+ }
5075
+ bool isSME () const { return AccessTypes & AccessType::FPR; }
5076
+ bool isMixed () const { return isCPU () && isSME (); }
5077
+
5078
+ int64_t start () const { return Offset.getFixed () + Offset.getScalable (); }
5079
+ int64_t end () const { return start () + Size ; }
5080
+
5081
+ std::string getTypeString () const {
5082
+ switch (AccessTypes) {
5083
+ case AccessType::FPR:
5084
+ return " FPR" ;
5085
+ case AccessType::PPR:
5086
+ return " PPR" ;
5087
+ case AccessType::GPR:
5088
+ return " GPR" ;
5089
+ case AccessType::NotAccessed:
5090
+ return " NA" ;
5091
+ default :
5092
+ return " Mixed" ;
5093
+ }
5094
+ }
5095
+
5096
+ void print (raw_ostream &OS) const {
5097
+ OS << getTypeString () << " stack object at [SP"
5098
+ << (Offset.getFixed () < 0 ? " " : " +" ) << Offset.getFixed ();
5099
+ if (Offset.getScalable ())
5100
+ OS << (Offset.getScalable () < 0 ? " " : " +" ) << Offset.getScalable ()
5101
+ << " * vscale" ;
5102
+ OS << " ]" ;
5103
+ }
5104
+ };
5105
+
5106
+ static inline raw_ostream &operator <<(raw_ostream &OS, const StackAccess &SA) {
5107
+ SA.print (OS);
5108
+ return OS;
5109
+ }
5110
+
5111
+ void AArch64FrameLowering::emitRemarks (
5112
+ const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
5113
+
5114
+ SMEAttrs Attrs (MF.getFunction ());
5115
+ if (Attrs.hasNonStreamingInterfaceAndBody ())
5116
+ return ;
5117
+
5118
+ const uint64_t HazardSize =
5119
+ (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
5120
+
5121
+ if (HazardSize == 0 )
5122
+ return ;
5123
+
5124
+ const MachineFrameInfo &MFI = MF.getFrameInfo ();
5125
+ // Bail if function has no stack objects.
5126
+ if (!MFI.hasStackObjects ())
5127
+ return ;
5128
+
5129
+ std::vector<StackAccess> StackAccesses (MFI.getNumObjects ());
5130
+
5131
+ size_t NumFPLdSt = 0 ;
5132
+ size_t NumNonFPLdSt = 0 ;
5133
+
5134
+ // Collect stack accesses via Load/Store instructions.
5135
+ for (const MachineBasicBlock &MBB : MF) {
5136
+ for (const MachineInstr &MI : MBB) {
5137
+ if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
5138
+ continue ;
5139
+ for (MachineMemOperand *MMO : MI.memoperands ()) {
5140
+ std::optional<int > FI = getMMOFrameID (MMO, MFI);
5141
+ if (FI && !MFI.isDeadObjectIndex (*FI)) {
5142
+ int FrameIdx = *FI;
5143
+
5144
+ size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects ();
5145
+ if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
5146
+ StackAccesses[ArrIdx].Idx = FrameIdx;
5147
+ StackAccesses[ArrIdx].Offset =
5148
+ getFrameIndexReferenceFromSP (MF, FrameIdx);
5149
+ StackAccesses[ArrIdx].Size = MFI.getObjectSize (FrameIdx);
5150
+ }
5151
+
5152
+ unsigned RegTy = StackAccess::AccessType::GPR;
5153
+ if (MFI.getStackID (FrameIdx) == TargetStackID::ScalableVector) {
5154
+ if (AArch64::PPRRegClass.contains (MI.getOperand (0 ).getReg ()))
5155
+ RegTy = StackAccess::PPR;
5156
+ else
5157
+ RegTy = StackAccess::FPR;
5158
+ } else if (AArch64InstrInfo::isFpOrNEON (MI)) {
5159
+ RegTy = StackAccess::FPR;
5160
+ }
5161
+
5162
+ StackAccesses[ArrIdx].AccessTypes |= RegTy;
5163
+
5164
+ if (RegTy == StackAccess::FPR)
5165
+ ++NumFPLdSt;
5166
+ else
5167
+ ++NumNonFPLdSt;
5168
+ }
5169
+ }
5170
+ }
5171
+ }
5172
+
5173
+ if (NumFPLdSt == 0 || NumNonFPLdSt == 0 )
5174
+ return ;
5175
+
5176
+ llvm::sort (StackAccesses);
5177
+ StackAccesses.erase (llvm::remove_if (StackAccesses,
5178
+ [](const StackAccess &S) {
5179
+ return S.AccessTypes ==
5180
+ StackAccess::NotAccessed;
5181
+ }),
5182
+ StackAccesses.end ());
5183
+
5184
+ SmallVector<const StackAccess *> MixedObjects;
5185
+ SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
5186
+
5187
+ if (StackAccesses.front ().isMixed ())
5188
+ MixedObjects.push_back (&StackAccesses.front ());
5189
+
5190
+ for (auto It = StackAccesses.begin (), End = std::prev (StackAccesses.end ());
5191
+ It != End; ++It) {
5192
+ const auto &First = *It;
5193
+ const auto &Second = *(It + 1 );
5194
+
5195
+ if (Second.isMixed ())
5196
+ MixedObjects.push_back (&Second);
5197
+
5198
+ if ((First.isSME () && Second.isCPU ()) ||
5199
+ (First.isCPU () && Second.isSME ())) {
5200
+ uint64_t Distance = static_cast <uint64_t >(Second.start () - First.end ());
5201
+ if (Distance < HazardSize)
5202
+ HazardPairs.emplace_back (&First, &Second);
5203
+ }
5204
+ }
5205
+
5206
+ auto EmitRemark = [&](llvm::StringRef Str) {
5207
+ ORE->emit ([&]() {
5208
+ auto R = MachineOptimizationRemarkAnalysis (
5209
+ " sme" , " StackHazard" , MF.getFunction ().getSubprogram (), &MF.front ());
5210
+ return R << formatv (" stack hazard in '{0}': " , MF.getName ()).str () << Str;
5211
+ });
5212
+ };
5213
+
5214
+ for (const auto &P : HazardPairs)
5215
+ EmitRemark (formatv (" {0} is too close to {1}" , *P.first , *P.second ).str ());
5216
+
5217
+ for (const auto *Obj : MixedObjects)
5218
+ EmitRemark (
5219
+ formatv (" {0} accessed by both GP and FP instructions" , *Obj).str ());
5220
+ }
0 commit comments