@@ -634,7 +634,7 @@ bool DataAggregator::doSample(BinaryFunction &OrigFunc, uint64_t Address,
634
634
uint64_t Count) {
635
635
BinaryFunction *ParentFunc = getBATParentFunction (OrigFunc);
636
636
BinaryFunction &Func = ParentFunc ? *ParentFunc : OrigFunc;
637
- if (ParentFunc)
637
+ if (ParentFunc || (BAT && !BAT-> isBATFunction (OrigFunc. getAddress ())) )
638
638
NumColdSamples += Count;
639
639
640
640
auto I = NamesToSamples.find (Func.getOneName ());
@@ -756,12 +756,13 @@ bool DataAggregator::doBranch(uint64_t From, uint64_t To, uint64_t Count,
756
756
Addr = BAT->translate (Func->getAddress (), Addr, IsFrom);
757
757
758
758
BinaryFunction *ParentFunc = getBATParentFunction (*Func);
759
+ if (IsFrom &&
760
+ (ParentFunc || (BAT && !BAT->isBATFunction (Func->getAddress ()))))
761
+ NumColdSamples += Count;
762
+
759
763
if (!ParentFunc)
760
764
return std::pair{Func, IsRetOrCallCont};
761
765
762
- if (IsFrom)
763
- NumColdSamples += Count;
764
-
765
766
return std::pair{ParentFunc, IsRetOrCallCont};
766
767
};
767
768
@@ -1422,9 +1423,8 @@ std::error_code DataAggregator::printLBRHeatMap() {
1422
1423
return std::error_code ();
1423
1424
}
1424
1425
1425
- uint64_t DataAggregator::parseLBRSample (const PerfBranchSample &Sample,
1426
- bool NeedsSkylakeFix) {
1427
- uint64_t NumTraces{0 };
1426
+ void DataAggregator::parseLBRSample (const PerfBranchSample &Sample,
1427
+ bool NeedsSkylakeFix) {
1428
1428
// LBRs are stored in reverse execution order. NextLBR refers to the next
1429
1429
// executed branch record.
1430
1430
const LBREntry *NextLBR = nullptr ;
@@ -1487,19 +1487,93 @@ uint64_t DataAggregator::parseLBRSample(const PerfBranchSample &Sample,
1487
1487
++Info.TakenCount ;
1488
1488
Info.MispredCount += LBR.Mispred ;
1489
1489
}
1490
- return NumTraces;
1490
+ }
1491
+
1492
+ void DataAggregator::printColdSamplesDiagnostic () const {
1493
+ if (NumColdSamples > 0 ) {
1494
+ const float ColdSamples = NumColdSamples * 100 .0f / NumTotalSamples;
1495
+ outs () << " PERF2BOLT: " << NumColdSamples
1496
+ << format (" (%.1f%%)" , ColdSamples)
1497
+ << " samples recorded in cold regions of split functions.\n " ;
1498
+ if (ColdSamples > 5 .0f )
1499
+ outs ()
1500
+ << " WARNING: The BOLT-processed binary where samples were collected "
1501
+ " likely used bad data or your service observed a large shift in "
1502
+ " profile. You may want to audit this\n " ;
1503
+ }
1504
+ }
1505
+
1506
+ void DataAggregator::printLongRangeTracesDiagnostic () const {
1507
+ outs () << " PERF2BOLT: out of range traces involving unknown regions: "
1508
+ << NumLongRangeTraces;
1509
+ if (NumTraces > 0 )
1510
+ outs () << format (" (%.1f%%)" , NumLongRangeTraces * 100 .0f / NumTraces);
1511
+ outs () << " \n " ;
1512
+ }
1513
+
1514
+ static float printColoredPct (uint64_t Numerator, uint64_t Denominator, float T1,
1515
+ float T2) {
1516
+ if (Denominator == 0 ) {
1517
+ outs () << " \n " ;
1518
+ return 0 ;
1519
+ }
1520
+ float Percent = Numerator * 100 .0f / Denominator;
1521
+ outs () << " (" ;
1522
+ if (outs ().has_colors ()) {
1523
+ if (Percent > T2)
1524
+ outs ().changeColor (raw_ostream::RED);
1525
+ else if (Percent > T1)
1526
+ outs ().changeColor (raw_ostream::YELLOW);
1527
+ else
1528
+ outs ().changeColor (raw_ostream::GREEN);
1529
+ }
1530
+ outs () << format (" %.1f%%" , Percent);
1531
+ if (outs ().has_colors ())
1532
+ outs ().resetColor ();
1533
+ outs () << " )\n " ;
1534
+ return Percent;
1535
+ }
1536
+
1537
+ void DataAggregator::printBranchSamplesDiagnostics () const {
1538
+ outs () << " PERF2BOLT: traces mismatching disassembled function contents: "
1539
+ << NumInvalidTraces;
1540
+ if (printColoredPct (NumInvalidTraces, NumTraces, 5 , 10 ) > 10 )
1541
+ outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1542
+ " binary is probably not the same binary used during profiling "
1543
+ " collection. The generated data may be ineffective for improving "
1544
+ " performance\n\n " ;
1545
+ printLongRangeTracesDiagnostic ();
1546
+ printColdSamplesDiagnostic ();
1547
+ }
1548
+
1549
+ void DataAggregator::printBasicSamplesDiagnostics (
1550
+ uint64_t OutOfRangeSamples) const {
1551
+ outs () << " PERF2BOLT: out of range samples recorded in unknown regions: "
1552
+ << OutOfRangeSamples;
1553
+ if (printColoredPct (OutOfRangeSamples, NumTotalSamples, 40 , 60 ) > 80 )
1554
+ outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1555
+ " binary is probably not the same binary used during profiling "
1556
+ " collection. The generated data may be ineffective for improving "
1557
+ " performance\n\n " ;
1558
+ printColdSamplesDiagnostic ();
1559
+ }
1560
+
1561
+ void DataAggregator::printBranchStacksDiagnostics (
1562
+ uint64_t IgnoredSamples) const {
1563
+ outs () << " PERF2BOLT: ignored samples: " << IgnoredSamples;
1564
+ if (printColoredPct (IgnoredSamples, NumTotalSamples, 20 , 50 ) > 50 )
1565
+ errs () << " PERF2BOLT-WARNING: less than 50% of all recorded samples "
1566
+ " were attributed to the input binary\n " ;
1491
1567
}
1492
1568
1493
1569
std::error_code DataAggregator::parseBranchEvents () {
1494
1570
outs () << " PERF2BOLT: parse branch events...\n " ;
1495
1571
NamedRegionTimer T (" parseBranch" , " Parsing branch events" , TimerGroupName,
1496
1572
TimerGroupDesc, opts::TimeAggregator);
1497
1573
1498
- uint64_t NumTotalSamples = 0 ;
1499
1574
uint64_t NumEntries = 0 ;
1500
1575
uint64_t NumSamples = 0 ;
1501
1576
uint64_t NumSamplesNoLBR = 0 ;
1502
- uint64_t NumTraces = 0 ;
1503
1577
bool NeedsSkylakeFix = false ;
1504
1578
1505
1579
while (hasData () && NumTotalSamples < opts::MaxSamples) {
@@ -1526,30 +1600,14 @@ std::error_code DataAggregator::parseBranchEvents() {
1526
1600
NeedsSkylakeFix = true ;
1527
1601
}
1528
1602
1529
- NumTraces += parseLBRSample (Sample, NeedsSkylakeFix);
1603
+ parseLBRSample (Sample, NeedsSkylakeFix);
1530
1604
}
1531
1605
1532
1606
for (const Trace &Trace : llvm::make_first_range (BranchLBRs))
1533
1607
for (const uint64_t Addr : {Trace.From , Trace.To })
1534
1608
if (BinaryFunction *BF = getBinaryFunctionContainingAddress (Addr))
1535
1609
BF->setHasProfileAvailable ();
1536
1610
1537
- auto printColored = [](raw_ostream &OS, float Percent, float T1, float T2) {
1538
- OS << " (" ;
1539
- if (OS.has_colors ()) {
1540
- if (Percent > T2)
1541
- OS.changeColor (raw_ostream::RED);
1542
- else if (Percent > T1)
1543
- OS.changeColor (raw_ostream::YELLOW);
1544
- else
1545
- OS.changeColor (raw_ostream::GREEN);
1546
- }
1547
- OS << format (" %.1f%%" , Percent);
1548
- if (OS.has_colors ())
1549
- OS.resetColor ();
1550
- OS << " )" ;
1551
- };
1552
-
1553
1611
outs () << " PERF2BOLT: read " << NumSamples << " samples and " << NumEntries
1554
1612
<< " LBR entries\n " ;
1555
1613
if (NumTotalSamples) {
@@ -1561,47 +1619,10 @@ std::error_code DataAggregator::parseBranchEvents() {
1561
1619
" in no-LBR mode with -nl (the performance improvement in -nl "
1562
1620
" mode may be limited)\n " ;
1563
1621
} else {
1564
- const uint64_t IgnoredSamples = NumTotalSamples - NumSamples;
1565
- const float PercentIgnored = 100 .0f * IgnoredSamples / NumTotalSamples;
1566
- outs () << " PERF2BOLT: " << IgnoredSamples << " samples" ;
1567
- printColored (outs (), PercentIgnored, 20 , 50 );
1568
- outs () << " were ignored\n " ;
1569
- if (PercentIgnored > 50 .0f )
1570
- errs () << " PERF2BOLT-WARNING: less than 50% of all recorded samples "
1571
- " were attributed to the input binary\n " ;
1622
+ printBranchStacksDiagnostics (NumTotalSamples - NumSamples);
1572
1623
}
1573
1624
}
1574
- outs () << " PERF2BOLT: traces mismatching disassembled function contents: "
1575
- << NumInvalidTraces;
1576
- float Perc = 0 .0f ;
1577
- if (NumTraces > 0 ) {
1578
- Perc = NumInvalidTraces * 100 .0f / NumTraces;
1579
- printColored (outs (), Perc, 5 , 10 );
1580
- }
1581
- outs () << " \n " ;
1582
- if (Perc > 10 .0f )
1583
- outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1584
- " binary is probably not the same binary used during profiling "
1585
- " collection. The generated data may be ineffective for improving "
1586
- " performance.\n\n " ;
1587
-
1588
- outs () << " PERF2BOLT: out of range traces involving unknown regions: "
1589
- << NumLongRangeTraces;
1590
- if (NumTraces > 0 )
1591
- outs () << format (" (%.1f%%)" , NumLongRangeTraces * 100 .0f / NumTraces);
1592
- outs () << " \n " ;
1593
-
1594
- if (NumColdSamples > 0 ) {
1595
- const float ColdSamples = NumColdSamples * 100 .0f / NumTotalSamples;
1596
- outs () << " PERF2BOLT: " << NumColdSamples
1597
- << format (" (%.1f%%)" , ColdSamples)
1598
- << " samples recorded in cold regions of split functions.\n " ;
1599
- if (ColdSamples > 5 .0f )
1600
- outs ()
1601
- << " WARNING: The BOLT-processed binary where samples were collected "
1602
- " likely used bad data or your service observed a large shift in "
1603
- " profile. You may want to audit this.\n " ;
1604
- }
1625
+ printBranchSamplesDiagnostics ();
1605
1626
1606
1627
return std::error_code ();
1607
1628
}
@@ -1658,11 +1679,10 @@ void DataAggregator::processBasicEvents() {
1658
1679
NamedRegionTimer T (" processBasic" , " Processing basic events" , TimerGroupName,
1659
1680
TimerGroupDesc, opts::TimeAggregator);
1660
1681
uint64_t OutOfRangeSamples = 0 ;
1661
- uint64_t NumSamples = 0 ;
1662
1682
for (auto &Sample : BasicSamples) {
1663
1683
const uint64_t PC = Sample.first ;
1664
1684
const uint64_t HitCount = Sample.second ;
1665
- NumSamples += HitCount;
1685
+ NumTotalSamples += HitCount;
1666
1686
BinaryFunction *Func = getBinaryFunctionContainingAddress (PC);
1667
1687
if (!Func) {
1668
1688
OutOfRangeSamples += HitCount;
@@ -1671,33 +1691,9 @@ void DataAggregator::processBasicEvents() {
1671
1691
1672
1692
doSample (*Func, PC, HitCount);
1673
1693
}
1674
- outs () << " PERF2BOLT: read " << NumSamples << " samples\n " ;
1694
+ outs () << " PERF2BOLT: read " << NumTotalSamples << " samples\n " ;
1675
1695
1676
- outs () << " PERF2BOLT: out of range samples recorded in unknown regions: "
1677
- << OutOfRangeSamples;
1678
- float Perc = 0 .0f ;
1679
- if (NumSamples > 0 ) {
1680
- outs () << " (" ;
1681
- Perc = OutOfRangeSamples * 100 .0f / NumSamples;
1682
- if (outs ().has_colors ()) {
1683
- if (Perc > 60 .0f )
1684
- outs ().changeColor (raw_ostream::RED);
1685
- else if (Perc > 40 .0f )
1686
- outs ().changeColor (raw_ostream::YELLOW);
1687
- else
1688
- outs ().changeColor (raw_ostream::GREEN);
1689
- }
1690
- outs () << format (" %.1f%%" , Perc);
1691
- if (outs ().has_colors ())
1692
- outs ().resetColor ();
1693
- outs () << " )" ;
1694
- }
1695
- outs () << " \n " ;
1696
- if (Perc > 80 .0f )
1697
- outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1698
- " binary is probably not the same binary used during profiling "
1699
- " collection. The generated data may be ineffective for improving "
1700
- " performance.\n\n " ;
1696
+ printBasicSamplesDiagnostics (OutOfRangeSamples);
1701
1697
}
1702
1698
1703
1699
std::error_code DataAggregator::parseMemEvents () {
@@ -1775,13 +1771,13 @@ void DataAggregator::processPreAggregated() {
1775
1771
NamedRegionTimer T (" processAggregated" , " Processing aggregated branch events" ,
1776
1772
TimerGroupName, TimerGroupDesc, opts::TimeAggregator);
1777
1773
1778
- uint64_t NumTraces = 0 ;
1779
1774
for (const AggregatedLBREntry &AggrEntry : AggregatedLBRs) {
1780
1775
switch (AggrEntry.EntryType ) {
1781
1776
case AggregatedLBREntry::BRANCH:
1782
1777
case AggregatedLBREntry::TRACE:
1783
1778
doBranch (AggrEntry.From .Offset , AggrEntry.To .Offset , AggrEntry.Count ,
1784
1779
AggrEntry.Mispreds );
1780
+ NumTotalSamples += AggrEntry.Count ;
1785
1781
break ;
1786
1782
case AggregatedLBREntry::FT:
1787
1783
case AggregatedLBREntry::FT_EXTERNAL_ORIGIN: {
@@ -1799,37 +1795,7 @@ void DataAggregator::processPreAggregated() {
1799
1795
1800
1796
outs () << " PERF2BOLT: read " << AggregatedLBRs.size ()
1801
1797
<< " aggregated LBR entries\n " ;
1802
- outs () << " PERF2BOLT: traces mismatching disassembled function contents: "
1803
- << NumInvalidTraces;
1804
- float Perc = 0 .0f ;
1805
- if (NumTraces > 0 ) {
1806
- outs () << " (" ;
1807
- Perc = NumInvalidTraces * 100 .0f / NumTraces;
1808
- if (outs ().has_colors ()) {
1809
- if (Perc > 10 .0f )
1810
- outs ().changeColor (raw_ostream::RED);
1811
- else if (Perc > 5 .0f )
1812
- outs ().changeColor (raw_ostream::YELLOW);
1813
- else
1814
- outs ().changeColor (raw_ostream::GREEN);
1815
- }
1816
- outs () << format (" %.1f%%" , Perc);
1817
- if (outs ().has_colors ())
1818
- outs ().resetColor ();
1819
- outs () << " )" ;
1820
- }
1821
- outs () << " \n " ;
1822
- if (Perc > 10 .0f )
1823
- outs () << " \n !! WARNING !! This high mismatch ratio indicates the input "
1824
- " binary is probably not the same binary used during profiling "
1825
- " collection. The generated data may be ineffective for improving "
1826
- " performance.\n\n " ;
1827
-
1828
- outs () << " PERF2BOLT: Out of range traces involving unknown regions: "
1829
- << NumLongRangeTraces;
1830
- if (NumTraces > 0 )
1831
- outs () << format (" (%.1f%%)" , NumLongRangeTraces * 100 .0f / NumTraces);
1832
- outs () << " \n " ;
1798
+ printBranchSamplesDiagnostics ();
1833
1799
}
1834
1800
1835
1801
std::optional<int32_t > DataAggregator::parseCommExecEvent () {
0 commit comments