llvm · mshockwave · Mar 25, 2025 · Feb 5, 2025 · Mar 10, 2025 · Mar 10, 2025
diff --git a/llvm/docs/CommandGuide/llvm-mca.rst b/llvm/docs/CommandGuide/llvm-mca.rst
@@ -197,14 +197,32 @@ option specifies "``-``", then the output will also be sent to standard output.
 
   Enable all the view.
 
-.. option:: -instruction-tables
+.. option:: -instruction-tables=<level>
 
   Prints resource pressure information based on the static information
   available from the processor model. This differs from the resource pressure
   view because it doesn't require that the code is simulated. It instead prints
   the theoretical uniform distribution of resource pressure for every
   instruction in sequence.
 
+  The choice of `<level>` controls number of printed information.
+  `<level>` may be `none` (default), `normal`, `full`.
+  Note: If the option is used without `<label>`, default is `normal` (legacy).
+
+  When `<level>` is `full`, additional information are:
+  - `<Bypass Latency>`: Latency when a bypass is implemented between operands
+  in pipelines (see SchedReadAdvance).
+  - `<LLVM Opcode Name>`: mnemonic plus operands identifier.
+  - `<Resources units>`: Used resources associated with LLVM Opcode.
+  - `<instruction comment>`: reports comment if any from source assembly.
+
+  `<Resources units>` syntax can be:
+  - <Resource Name>: ReleaseAtCycle is 1.
+  - <Resource Name>[<ReleaseAtCycle>]: ReleaseAtCycle is greater than 1
+  and AcquireAtCycle is 0.
+  - <Resource Name>[<AcquireAtCycle>,<ReleaseAtCycle>]: ReleaseAtCycle
+  is greater than 1 and AcquireAtCycle is greater than 0.
+
 .. option:: -bottleneck-analysis
 
   Print information about bottlenecks that affect the throughput. This analysis

diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
@@ -402,6 +402,10 @@ struct MCSchedModel {
   static unsigned getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
                                            unsigned WriteResourceIdx = 0);
 
+  /// Returns the bypass delay cycle for the maximum latency write cycle
+  static unsigned getBypassDelayCycles(const MCSubtargetInfo &STI,
+                                       const MCSchedClassDesc &SCDesc);
+
   /// Returns the default initialized model.
   static const MCSchedModel Default;
 };

diff --git a/llvm/lib/MC/MCSchedule.cpp b/llvm/lib/MC/MCSchedule.cpp
@@ -174,3 +174,37 @@ MCSchedModel::getForwardingDelayCycles(ArrayRef<MCReadAdvanceEntry> Entries,
 
   return std::abs(DelayCycles);
 }
+
+unsigned MCSchedModel::getBypassDelayCycles(const MCSubtargetInfo &STI,
+                                            const MCSchedClassDesc &SCDesc) {
+
+  ArrayRef<MCReadAdvanceEntry> Entries = STI.getReadAdvanceEntries(SCDesc);
+  if (Entries.empty())
+    return 0;
+
+  unsigned MaxLatency = 0;
+  unsigned WriteResourceID = 0;
+  unsigned DefEnd = SCDesc.NumWriteLatencyEntries;
+
+  for (unsigned DefIdx = 0; DefIdx != DefEnd; ++DefIdx) {
+    // Lookup the definition's write latency in SubtargetInfo.
+    const MCWriteLatencyEntry *WLEntry =
+        STI.getWriteLatencyEntry(&SCDesc, DefIdx);
+    unsigned Cycles = 0;
+    // If latency is Invalid (<0), consider 0 cycle latency
+    if (WLEntry->Cycles > 0)
+      Cycles = (unsigned)WLEntry->Cycles;
+    if (Cycles > MaxLatency) {
+      MaxLatency = Cycles;
+      WriteResourceID = WLEntry->WriteResourceID;
+    }
+  }
+
+  for (const MCReadAdvanceEntry &E : Entries) {
+    if (E.WriteResourceID == WriteResourceID)
+      return E.Cycles;
+  }
+
+  // Unable to find WriteResourceID in MCReadAdvanceEntry Entries
+  return 0;
+}
diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V1-sve-instructions.s
diff --git a/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp b/llvm/tools/llvm-mca/Views/InstructionInfoView.cpp
@@ -12,15 +12,47 @@
 //===----------------------------------------------------------------------===//
 
 #include "Views/InstructionInfoView.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/JSON.h"
+#include "llvm/Support/WithColor.h"
 
 namespace llvm {
 namespace mca {
 
+void InstructionInfoView::getComment(const MCInst &MCI,
+                                     std::string &CommentString) const {
+  StringRef S = MCI.getLoc().getPointer();
+  StringRef InstrStr;
+  size_t Pos = 0, PosCmt = 0;
+
+  // Recognized comments are after assembly instructions on the same line.
+  // It is usefull to add in comment scheduling information from architecture
+  // specification.
+  // '#' comment mark is not supported by llvm-mca
+
+  CommentString = "";
+  if (Pos = S.find("\n"); Pos != std::string::npos) {
+    InstrStr = S.take_front(Pos);
+    // C style comment
+    if (((PosCmt = InstrStr.find("/*")) != StringRef::npos) &&
+        ((Pos = InstrStr.find("*/")) != StringRef::npos)) {
+      CommentString = InstrStr.substr(PosCmt, Pos);
+      return;
+    }
+    // C++ style comment
+    if ((PosCmt = InstrStr.find("//")) != StringRef::npos) {
+      CommentString = InstrStr.substr(PosCmt);
+      return;
+    }
+  }
+  return;
+}
+
 void InstructionInfoView::printView(raw_ostream &OS) const {
   std::string Buffer;
   raw_string_ostream TempStream(Buffer);
+  formatted_raw_ostream FOS(TempStream);
 
   ArrayRef<llvm::MCInst> Source = getSource();
   if (!Source.size())
@@ -29,82 +61,139 @@ void InstructionInfoView::printView(raw_ostream &OS) const {
   IIVDVec IIVD(Source.size());
   collectData(IIVD);
 
-  TempStream << "\n\nInstruction Info:\n";
-  TempStream << "[1]: #uOps\n[2]: Latency\n[3]: RThroughput\n"
-             << "[4]: MayLoad\n[5]: MayStore\n[6]: HasSideEffects (U)\n";
+  if (PrintFullInfo) {
+    FOS << "\n\nResources:\n";
+    const MCSchedModel &SM = getSubTargetInfo().getSchedModel();
+    for (unsigned I = 1, ResourceIndex = 0, E = SM.getNumProcResourceKinds();
+         I < E; ++I) {
+      const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
+      unsigned NumUnits = ProcResource.NumUnits;
+      // Skip invalid resources with zero units.
+      if (!NumUnits)
+        continue;
+
+      FOS << '[' << ResourceIndex << ']';
+      FOS.PadToColumn(6);
+      FOS << "- " << ProcResource.Name << ':' << NumUnits;
+      if (ProcResource.SubUnitsIdxBegin) {
+        FOS.PadToColumn(20);
+        for (unsigned U = 0; U < NumUnits; ++U) {
+          FOS << SM.getProcResource(ProcResource.SubUnitsIdxBegin[U])->Name;
+          if ((U + 1) < NumUnits)
+            FOS << ", ";
+        }
+      }
+      FOS << '\n';
+      ResourceIndex++;
+    }
+  }
+
+  SmallVector<unsigned, 16> Paddings = {0, 7, 14, 21, 28, 35};
+  SmallVector<StringRef, 16> Fields = {"#uOps",       "Latency",
+                                       "RThroughput", "MayLoad",
+                                       "MayStore",    "HasSideEffects (U)"};
+  SmallVector<StringRef, 8> EndFields;
+  unsigned LastPadding = Paddings.back();
+  if (PrintFullInfo) {
+    Fields.push_back("Bypass Latency");
+    Paddings.push_back(LastPadding += 7);
+    Fields.push_back("Resources (<Name> | <Name>[<ReleaseAtCycle>] | "
+                     "<Name>[<AcquireAtCycle>,<ReleaseAtCycle])");
+    Paddings.push_back(LastPadding += 7);
+    Fields.push_back("LLVM Opcode Name");
+    Paddings.push_back(LastPadding += 43);
+  }
   if (PrintBarriers) {
-    TempStream << "[7]: LoadBarrier\n[8]: StoreBarrier\n";
+    Fields.push_back("LoadBarrier");
+    Paddings.push_back(LastPadding += 7);
+    Fields.push_back("StoreBarrier");
+    Paddings.push_back(LastPadding += 7);
   }
   if (PrintEncodings) {
-    if (PrintBarriers) {
-      TempStream << "[9]: Encoding Size\n";
-      TempStream << "\n[1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    "
-                 << "[9]    Encodings:                    Instructions:\n";
-    } else {
-      TempStream << "[7]: Encoding Size\n";
-      TempStream << "\n[1]    [2]    [3]    [4]    [5]    [6]    [7]    "
-                 << "Encodings:                    Instructions:\n";
-    }
+    Paddings.push_back(LastPadding += 7);
+    Paddings.push_back(LastPadding += 7);
+    Paddings.push_back(LastPadding += 30);
+    Fields.push_back("Encoding Size");
+    EndFields.push_back("Encodings:");
+    EndFields.push_back("Instructions:");
   } else {
-    if (PrintBarriers) {
-      TempStream << "\n[1]    [2]    [3]    [4]    [5]    [6]    [7]    [8]    "
-                 << "Instructions:\n";
-    } else {
-      TempStream << "\n[1]    [2]    [3]    [4]    [5]    [6]    "
-                 << "Instructions:\n";
-    }
+    if (PrintFullInfo)
+      Paddings.push_back(LastPadding += 27);
+    else
+      Paddings.push_back(LastPadding += 7);
+    EndFields.push_back("Instructions:");
   }
 
-  for (const auto &[Index, IIVDEntry, Inst] : enumerate(IIVD, Source)) {
-    TempStream << ' ' << IIVDEntry.NumMicroOpcodes << "    ";
-    if (IIVDEntry.NumMicroOpcodes < 10)
-      TempStream << "  ";
-    else if (IIVDEntry.NumMicroOpcodes < 100)
-      TempStream << ' ';
-    TempStream << IIVDEntry.Latency << "   ";
-    if (IIVDEntry.Latency < 10)
-      TempStream << "  ";
-    else if (IIVDEntry.Latency < 100)
-      TempStream << ' ';
+  FOS << "\n\nInstruction Info:\n";
+  for (unsigned i = 0, N = Fields.size(); i < N; i++)
+    FOS << "[" << i + 1 << "]: " << Fields[i] << "\n";
+  FOS << "\n";
+
+  for (unsigned i = 0, N = Paddings.size(); i < N; i++) {
+    if (Paddings[i])
+      FOS.PadToColumn(Paddings[i]);
+    if (i < Fields.size())
+      FOS << "[" << i + 1 << "]";
+    else
+      FOS << EndFields[i - Fields.size()];
+  }
+  FOS << "\n";
 
+  for (const auto &[Index, IIVDEntry, Inst] : enumerate(IIVD, Source)) {
+    FOS.PadToColumn(Paddings[0] + 1);
+    FOS << IIVDEntry.NumMicroOpcodes;
+    FOS.PadToColumn(Paddings[1] + 1);
+    FOS << IIVDEntry.Latency;
+    FOS.PadToColumn(Paddings[2]);
     if (IIVDEntry.RThroughput) {
       double RT = *IIVDEntry.RThroughput;
-      TempStream << format("%.2f", RT) << ' ';
-      if (RT < 10.0)
-        TempStream << "  ";
-      else if (RT < 100.0)
-        TempStream << ' ';
+      FOS << format("%.2f", RT);
     } else {
-      TempStream << " -     ";
+      FOS << " -";
+    }
+    FOS.PadToColumn(Paddings[3] + 1);
+    FOS << (IIVDEntry.mayLoad ? "*" : " ");
+    FOS.PadToColumn(Paddings[4] + 1);
+    FOS << (IIVDEntry.mayStore ? "*" : " ");
+    FOS.PadToColumn(Paddings[5] + 1);
+    FOS << (IIVDEntry.hasUnmodeledSideEffects ? "U" : " ");
+    unsigned LastPaddingIdx = 5;
+
+    if (PrintFullInfo) {
+      FOS.PadToColumn(Paddings[LastPaddingIdx += 1] + 1);
+      FOS << IIVDEntry.Bypass;
+      FOS.PadToColumn(Paddings[LastPaddingIdx += 1] + 1);
+      FOS << IIVDEntry.Resources;
+      FOS.PadToColumn(Paddings[LastPaddingIdx += 1] + 1);
+      FOS << IIVDEntry.OpcodeName;
     }
-    TempStream << (IIVDEntry.mayLoad ? " *     " : "       ");
-    TempStream << (IIVDEntry.mayStore ? " *     " : "       ");
-    TempStream << (IIVDEntry.hasUnmodeledSideEffects ? " U     " : "       ");
 
     if (PrintBarriers) {
-      TempStream << (LoweredInsts[Index]->isALoadBarrier() ? " *     "
-                                                           : "       ");
-      TempStream << (LoweredInsts[Index]->isAStoreBarrier() ? " *     "
-                                                            : "       ");
+      FOS.PadToColumn(Paddings[LastPaddingIdx += 1] + 1);
+      FOS << (LoweredInsts[Index]->isALoadBarrier() ? "*" : " ");
+      FOS.PadToColumn(Paddings[LastPaddingIdx += 1] + 1);
+      FOS << (LoweredInsts[Index]->isAStoreBarrier() ? "*" : " ");
     }
 
     if (PrintEncodings) {
       StringRef Encoding(CE.getEncoding(Index));
       unsigned EncodingSize = Encoding.size();
-      TempStream << " " << EncodingSize
-                 << (EncodingSize < 10 ? "     " : "    ");
-      TempStream.flush();
-      formatted_raw_ostream FOS(TempStream);
+      FOS.PadToColumn(Paddings[LastPaddingIdx += 1] + 1);
+      FOS << EncodingSize;
+      FOS.PadToColumn(Paddings[LastPaddingIdx += 1]);
       for (unsigned i = 0, e = Encoding.size(); i != e; ++i)
         FOS << format("%02x ", (uint8_t)Encoding[i]);
-      FOS.PadToColumn(30);
-      FOS.flush();
     }
-
-    TempStream << printInstructionString(Inst) << '\n';
+    FOS.PadToColumn(Paddings[LastPaddingIdx += 1]);
+    FOS << printInstructionString(Inst);
+    if (PrintFullInfo) {
+      std::string CommentString;
+      getComment(Inst, CommentString);
+      FOS << "\t" << CommentString;
+    }
+    FOS << '\n';
   }
 
-  TempStream.flush();
   OS << Buffer;
 }
 
@@ -141,6 +230,37 @@ void InstructionInfoView::collectData(
     IIVDEntry.mayLoad = MCDesc.mayLoad();
     IIVDEntry.mayStore = MCDesc.mayStore();
     IIVDEntry.hasUnmodeledSideEffects = MCDesc.hasUnmodeledSideEffects();
+
+    if (PrintFullInfo) {
+      // Get latency with bypass
+      IIVDEntry.Bypass =
+          IIVDEntry.Latency - MCSchedModel::getBypassDelayCycles(STI, SCDesc);
+      IIVDEntry.OpcodeName = MCII.getName(Inst.getOpcode());
+      raw_string_ostream TempStream(IIVDEntry.Resources);
+      const MCWriteProcResEntry *Index = STI.getWriteProcResBegin(&SCDesc);
+      const MCWriteProcResEntry *Last = STI.getWriteProcResEnd(&SCDesc);
+      ListSeparator LS(",");
+      for (; Index != Last; ++Index) {
+        if (!Index->ReleaseAtCycle)
+          continue;
+        const MCProcResourceDesc *MCProc =
+            SM.getProcResource(Index->ProcResourceIdx);
+        if (Index->ReleaseAtCycle > 1) {
+          // Output ReleaseAtCycle between [] if not 1 (default)
+          // This is to be able to evaluate throughput.
+          // See getReciprocalThroughput in MCSchedule.cpp
+          if (Index->AcquireAtCycle > 0)
+            TempStream << LS
+                       << format("%s[%d,%d]", MCProc->Name,
+                                 Index->AcquireAtCycle, Index->ReleaseAtCycle);
+          else
+            TempStream << LS
+                       << format("%s[%d]", MCProc->Name, Index->ReleaseAtCycle);
+        } else {
+          TempStream << LS << MCProc->Name;
+        }
+      }
+    }
   }
 }