Skip to content

Commit 5d0afac

Browse files
authored
[BOLT][NFCI] Emit uniform diagnostics in DataAggregator (#136530)
DataAggregator supports reading different kinds of profile data: - perf data: branch records or IP samples, - pre-aggregated branch data. Make profile quality reporting uniform across all kinds of input: - out-of-range and mismatching samples, - samples in cold code in BAT mode (profiled BOLTed binary). Test Plan: NFCI
1 parent 8baa212 commit 5d0afac

File tree

2 files changed

+104
-129
lines changed

2 files changed

+104
-129
lines changed

bolt/include/bolt/Profile/DataAggregator.h

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -223,14 +223,16 @@ class DataAggregator : public DataReader {
223223
bool recordExit(BinaryFunction &BF, uint64_t From, bool Mispred,
224224
uint64_t Count = 1) const;
225225

226-
/// Aggregation statistics
226+
/// Branch stacks aggregation statistics
227+
uint64_t NumTraces{0};
227228
uint64_t NumInvalidTraces{0};
228229
uint64_t NumLongRangeTraces{0};
229230
/// Specifies how many samples were recorded in cold areas if we are dealing
230231
/// with profiling data collected in a bolted binary. For LBRs, incremented
231232
/// for the source of the branch to avoid counting cold activity twice (one
232233
/// for source and another for destination).
233234
uint64_t NumColdSamples{0};
235+
uint64_t NumTotalSamples{0};
234236

235237
/// Looks into system PATH for Linux Perf and set up the aggregator to use it
236238
void findPerfExecutable();
@@ -327,8 +329,8 @@ class DataAggregator : public DataReader {
327329
/// Parse a single LBR entry as output by perf script -Fbrstack
328330
ErrorOr<LBREntry> parseLBREntry();
329331

330-
/// Parse LBR sample, returns the number of traces.
331-
uint64_t parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
332+
/// Parse LBR sample.
333+
void parseLBRSample(const PerfBranchSample &Sample, bool NeedsSkylakeFix);
332334

333335
/// Parse and pre-aggregate branch events.
334336
std::error_code parseBranchEvents();
@@ -487,6 +489,13 @@ class DataAggregator : public DataReader {
487489
void dump(const PerfBranchSample &Sample) const;
488490
void dump(const PerfMemSample &Sample) const;
489491

492+
/// Profile diagnostics print methods
493+
void printColdSamplesDiagnostic() const;
494+
void printLongRangeTracesDiagnostic() const;
495+
void printBranchSamplesDiagnostics() const;
496+
void printBasicSamplesDiagnostics(uint64_t OutOfRangeSamples) const;
497+
void printBranchStacksDiagnostics(uint64_t IgnoredSamples) const;
498+
490499
public:
491500
/// If perf.data was collected without build ids, the buildid-list may contain
492501
/// incomplete entries. Return true if the buffer containing

bolt/lib/Profile/DataAggregator.cpp

Lines changed: 92 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -634,7 +634,7 @@ bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
634634
uint64_t Count) {
635635
BinaryFunction *ParentFunc = getBATParentFunction(OrigFunc);
636636
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
637-
if (ParentFunc)
637+
if (ParentFunc || (BAT && !BAT->isBATFunction(OrigFunc.getAddress())))
638638
NumColdSamples += Count;
639639

640640
auto I = NamesToSamples.find(Func.getOneName());
@@ -756,12 +756,13 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
756756
Addr = BAT->translate(Func->getAddress(), Addr, IsFrom);
757757

758758
BinaryFunction *ParentFunc = getBATParentFunction(*Func);
759+
if (IsFrom &&
760+
(ParentFunc || (BAT && !BAT->isBATFunction(Func->getAddress()))))
761+
NumColdSamples += Count;
762+
759763
if (!ParentFunc)
760764
return std::pair{Func, IsRetOrCallCont};
761765

762-
if (IsFrom)
763-
NumColdSamples += Count;
764-
765766
return std::pair{ParentFunc, IsRetOrCallCont};
766767
};
767768

@@ -1422,9 +1423,8 @@ std::error_code DataAggregator::printLBRHeatMap() {
14221423
return std::error_code();
14231424
}
14241425

1425-
uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1426-
bool NeedsSkylakeFix) {
1427-
uint64_t NumTraces{0};
1426+
void DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1427+
bool NeedsSkylakeFix) {
14281428
// LBRs are stored in reverse execution order. NextLBR refers to the next
14291429
// executed branch record.
14301430
const LBREntry *NextLBR = nullptr;
@@ -1487,19 +1487,93 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
14871487
++Info.TakenCount;
14881488
Info.MispredCount += LBR.Mispred;
14891489
}
1490-
return NumTraces;
1490+
}
1491+
1492+
void DataAggregator::printColdSamplesDiagnostic() const {
1493+
if (NumColdSamples > 0) {
1494+
const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1495+
outs() << "PERF2BOLT: " << NumColdSamples
1496+
<< format(" (%.1f%%)", ColdSamples)
1497+
<< " samples recorded in cold regions of split functions.\n";
1498+
if (ColdSamples > 5.0f)
1499+
outs()
1500+
<< "WARNING: The BOLT-processed binary where samples were collected "
1501+
"likely used bad data or your service observed a large shift in "
1502+
"profile. You may want to audit this\n";
1503+
}
1504+
}
1505+
1506+
void DataAggregator::printLongRangeTracesDiagnostic() const {
1507+
outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1508+
<< NumLongRangeTraces;
1509+
if (NumTraces > 0)
1510+
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1511+
outs() << "\n";
1512+
}
1513+
1514+
static float printColoredPct(uint64_t Numerator, uint64_t Denominator, float T1,
1515+
float T2) {
1516+
if (Denominator == 0) {
1517+
outs() << "\n";
1518+
return 0;
1519+
}
1520+
float Percent = Numerator * 100.0f / Denominator;
1521+
outs() << " (";
1522+
if (outs().has_colors()) {
1523+
if (Percent > T2)
1524+
outs().changeColor(raw_ostream::RED);
1525+
else if (Percent > T1)
1526+
outs().changeColor(raw_ostream::YELLOW);
1527+
else
1528+
outs().changeColor(raw_ostream::GREEN);
1529+
}
1530+
outs() << format("%.1f%%", Percent);
1531+
if (outs().has_colors())
1532+
outs().resetColor();
1533+
outs() << ")\n";
1534+
return Percent;
1535+
}
1536+
1537+
void DataAggregator::printBranchSamplesDiagnostics() const {
1538+
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1539+
<< NumInvalidTraces;
1540+
if (printColoredPct(NumInvalidTraces, NumTraces, 5, 10) > 10)
1541+
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1542+
"binary is probably not the same binary used during profiling "
1543+
"collection. The generated data may be ineffective for improving "
1544+
"performance\n\n";
1545+
printLongRangeTracesDiagnostic();
1546+
printColdSamplesDiagnostic();
1547+
}
1548+
1549+
void DataAggregator::printBasicSamplesDiagnostics(
1550+
uint64_t OutOfRangeSamples) const {
1551+
outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1552+
<< OutOfRangeSamples;
1553+
if (printColoredPct(OutOfRangeSamples, NumTotalSamples, 40, 60) > 80)
1554+
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1555+
"binary is probably not the same binary used during profiling "
1556+
"collection. The generated data may be ineffective for improving "
1557+
"performance\n\n";
1558+
printColdSamplesDiagnostic();
1559+
}
1560+
1561+
void DataAggregator::printBranchStacksDiagnostics(
1562+
uint64_t IgnoredSamples) const {
1563+
outs() << "PERF2BOLT: ignored samples: " << IgnoredSamples;
1564+
if (printColoredPct(IgnoredSamples, NumTotalSamples, 20, 50) > 50)
1565+
errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1566+
"were attributed to the input binary\n";
14911567
}
14921568

14931569
std::error_code DataAggregator::parseBranchEvents() {
14941570
outs() << "PERF2BOLT: parse branch events...\n";
14951571
NamedRegionTimer T("parseBranch", "Parsing branch events", TimerGroupName,
14961572
TimerGroupDesc, opts::TimeAggregator);
14971573

1498-
uint64_t NumTotalSamples = 0;
14991574
uint64_t NumEntries = 0;
15001575
uint64_t NumSamples = 0;
15011576
uint64_t NumSamplesNoLBR = 0;
1502-
uint64_t NumTraces = 0;
15031577
bool NeedsSkylakeFix = false;
15041578

15051579
while (hasData() && NumTotalSamples < opts::MaxSamples) {
@@ -1526,30 +1600,14 @@ std::error_code DataAggregator::parseBranchEvents() {
15261600
NeedsSkylakeFix = true;
15271601
}
15281602

1529-
NumTraces += parseLBRSample(Sample, NeedsSkylakeFix);
1603+
parseLBRSample(Sample, NeedsSkylakeFix);
15301604
}
15311605

15321606
for (const Trace &Trace : llvm::make_first_range(BranchLBRs))
15331607
for (const uint64_t Addr : {Trace.From, Trace.To})
15341608
if (BinaryFunction *BF = getBinaryFunctionContainingAddress(Addr))
15351609
BF->setHasProfileAvailable();
15361610

1537-
auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1538-
OS << " (";
1539-
if (OS.has_colors()) {
1540-
if (Percent > T2)
1541-
OS.changeColor(raw_ostream::RED);
1542-
else if (Percent > T1)
1543-
OS.changeColor(raw_ostream::YELLOW);
1544-
else
1545-
OS.changeColor(raw_ostream::GREEN);
1546-
}
1547-
OS << format("%.1f%%", Percent);
1548-
if (OS.has_colors())
1549-
OS.resetColor();
1550-
OS << ")";
1551-
};
1552-
15531611
outs() << "PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
15541612
<< " LBR entries\n";
15551613
if (NumTotalSamples) {
@@ -1561,47 +1619,10 @@ std::error_code DataAggregator::parseBranchEvents() {
15611619
"in no-LBR mode with -nl (the performance improvement in -nl "
15621620
"mode may be limited)\n";
15631621
} else {
1564-
const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1565-
const float PercentIgnored = 100.0f * IgnoredSamples / NumTotalSamples;
1566-
outs() << "PERF2BOLT: " << IgnoredSamples << " samples";
1567-
printColored(outs(), PercentIgnored, 20, 50);
1568-
outs() << " were ignored\n";
1569-
if (PercentIgnored > 50.0f)
1570-
errs() << "PERF2BOLT-WARNING: less than 50% of all recorded samples "
1571-
"were attributed to the input binary\n";
1622+
printBranchStacksDiagnostics(NumTotalSamples - NumSamples);
15721623
}
15731624
}
1574-
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1575-
<< NumInvalidTraces;
1576-
float Perc = 0.0f;
1577-
if (NumTraces > 0) {
1578-
Perc = NumInvalidTraces * 100.0f / NumTraces;
1579-
printColored(outs(), Perc, 5, 10);
1580-
}
1581-
outs() << "\n";
1582-
if (Perc > 10.0f)
1583-
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1584-
"binary is probably not the same binary used during profiling "
1585-
"collection. The generated data may be ineffective for improving "
1586-
"performance.\n\n";
1587-
1588-
outs() << "PERF2BOLT: out of range traces involving unknown regions: "
1589-
<< NumLongRangeTraces;
1590-
if (NumTraces > 0)
1591-
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1592-
outs() << "\n";
1593-
1594-
if (NumColdSamples > 0) {
1595-
const float ColdSamples = NumColdSamples * 100.0f / NumTotalSamples;
1596-
outs() << "PERF2BOLT: " << NumColdSamples
1597-
<< format(" (%.1f%%)", ColdSamples)
1598-
<< " samples recorded in cold regions of split functions.\n";
1599-
if (ColdSamples > 5.0f)
1600-
outs()
1601-
<< "WARNING: The BOLT-processed binary where samples were collected "
1602-
"likely used bad data or your service observed a large shift in "
1603-
"profile. You may want to audit this.\n";
1604-
}
1625+
printBranchSamplesDiagnostics();
16051626

16061627
return std::error_code();
16071628
}
@@ -1658,11 +1679,10 @@ void DataAggregator::processBasicEvents() {
16581679
NamedRegionTimer T("processBasic", "Processing basic events", TimerGroupName,
16591680
TimerGroupDesc, opts::TimeAggregator);
16601681
uint64_t OutOfRangeSamples = 0;
1661-
uint64_t NumSamples = 0;
16621682
for (auto &Sample : BasicSamples) {
16631683
const uint64_t PC = Sample.first;
16641684
const uint64_t HitCount = Sample.second;
1665-
NumSamples += HitCount;
1685+
NumTotalSamples += HitCount;
16661686
BinaryFunction *Func = getBinaryFunctionContainingAddress(PC);
16671687
if (!Func) {
16681688
OutOfRangeSamples += HitCount;
@@ -1671,33 +1691,9 @@ void DataAggregator::processBasicEvents() {
16711691

16721692
doSample(*Func, PC, HitCount);
16731693
}
1674-
outs() << "PERF2BOLT: read " << NumSamples << " samples\n";
1694+
outs() << "PERF2BOLT: read " << NumTotalSamples << " samples\n";
16751695

1676-
outs() << "PERF2BOLT: out of range samples recorded in unknown regions: "
1677-
<< OutOfRangeSamples;
1678-
float Perc = 0.0f;
1679-
if (NumSamples > 0) {
1680-
outs() << " (";
1681-
Perc = OutOfRangeSamples * 100.0f / NumSamples;
1682-
if (outs().has_colors()) {
1683-
if (Perc > 60.0f)
1684-
outs().changeColor(raw_ostream::RED);
1685-
else if (Perc > 40.0f)
1686-
outs().changeColor(raw_ostream::YELLOW);
1687-
else
1688-
outs().changeColor(raw_ostream::GREEN);
1689-
}
1690-
outs() << format("%.1f%%", Perc);
1691-
if (outs().has_colors())
1692-
outs().resetColor();
1693-
outs() << ")";
1694-
}
1695-
outs() << "\n";
1696-
if (Perc > 80.0f)
1697-
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1698-
"binary is probably not the same binary used during profiling "
1699-
"collection. The generated data may be ineffective for improving "
1700-
"performance.\n\n";
1696+
printBasicSamplesDiagnostics(OutOfRangeSamples);
17011697
}
17021698

17031699
std::error_code DataAggregator::parseMemEvents() {
@@ -1775,13 +1771,13 @@ void DataAggregator::processPreAggregated() {
17751771
NamedRegionTimer T("processAggregated", "Processing aggregated branch events",
17761772
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
17771773

1778-
uint64_t NumTraces = 0;
17791774
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
17801775
switch (AggrEntry.EntryType) {
17811776
case AggregatedLBREntry::BRANCH:
17821777
case AggregatedLBREntry::TRACE:
17831778
doBranch(AggrEntry.From.Offset, AggrEntry.To.Offset, AggrEntry.Count,
17841779
AggrEntry.Mispreds);
1780+
NumTotalSamples += AggrEntry.Count;
17851781
break;
17861782
case AggregatedLBREntry::FT:
17871783
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
@@ -1799,37 +1795,7 @@ void DataAggregator::processPreAggregated() {
17991795

18001796
outs() << "PERF2BOLT: read " << AggregatedLBRs.size()
18011797
<< " aggregated LBR entries\n";
1802-
outs() << "PERF2BOLT: traces mismatching disassembled function contents: "
1803-
<< NumInvalidTraces;
1804-
float Perc = 0.0f;
1805-
if (NumTraces > 0) {
1806-
outs() << " (";
1807-
Perc = NumInvalidTraces * 100.0f / NumTraces;
1808-
if (outs().has_colors()) {
1809-
if (Perc > 10.0f)
1810-
outs().changeColor(raw_ostream::RED);
1811-
else if (Perc > 5.0f)
1812-
outs().changeColor(raw_ostream::YELLOW);
1813-
else
1814-
outs().changeColor(raw_ostream::GREEN);
1815-
}
1816-
outs() << format("%.1f%%", Perc);
1817-
if (outs().has_colors())
1818-
outs().resetColor();
1819-
outs() << ")";
1820-
}
1821-
outs() << "\n";
1822-
if (Perc > 10.0f)
1823-
outs() << "\n !! WARNING !! This high mismatch ratio indicates the input "
1824-
"binary is probably not the same binary used during profiling "
1825-
"collection. The generated data may be ineffective for improving "
1826-
"performance.\n\n";
1827-
1828-
outs() << "PERF2BOLT: Out of range traces involving unknown regions: "
1829-
<< NumLongRangeTraces;
1830-
if (NumTraces > 0)
1831-
outs() << format(" (%.1f%%)", NumLongRangeTraces * 100.0f / NumTraces);
1832-
outs() << "\n";
1798+
printBranchSamplesDiagnostics();
18331799
}
18341800

18351801
std::optional<int32_t> DataAggregator::parseCommExecEvent() {

0 commit comments

Comments
 (0)