From ae5c013c238d21ac56579bae19c4bd51aea86778 Mon Sep 17 00:00:00 2001
From: Matt Liberty <mliberty@precisioninno.com>
Date: Thu, 9 Apr 2026 17:37:17 +0000
Subject: [PATCH] [gpl] Include Liberty internal power in MBFF clustering cost

The MBFF algorithm previously used only leakage power to decide whether
to replace single-bit flip-flops with multi-bit cells.  For flip-flops,
internal power dominates total power, and MBFF cells share scan (SE/SI)
and clock structures across bits, giving 40-90% savings on those pins.
Ignoring internal power caused clustering to increase total power on
some PDKs.

Add getInternalEnergy() which sums average internal energy across all
pins (CK, D, Q, SE, SI) from Liberty internal_power tables.  Use this
alongside leakage in three places:

- SetRatios: norm_power_ uses total estimated power (leakage +
  internal_energy * clock_activity) so the ILP cost function reflects
  total power, not just leakage.
- ReadLibs: select best tray per size by minimum total estimated power
  instead of minimum leakage, so cells with lower total power (e.g. SVT
  over LVT) are preferred even when their leakage is higher.
- SetVars: select the single-bit baseline cell by lowest total estimated
  power, with both leakage and internal energy paired from the same cell.

Clock period is obtained from SDC before ReadLibs runs so tray selection
can account for internal power.  Falls back to leakage-only when no
clock is defined or no internal_power tables exist.

Signed-off-by: Matt Liberty <mliberty@precisioninno.com>
---
 src/gpl/src/mbff.cpp            | 346 ++++++++++++++++++++++++--------
 src/gpl/src/mbff.h              |  20 ++
 src/gpl/test/mbff_orig_name.ok  |  18 +-
 src/gpl/test/mbff_orig_name.tcl |   2 +-
 4 files changed, 290 insertions(+), 96 deletions(-)

diff --git a/src/gpl/src/mbff.cpp b/src/gpl/src/mbff.cpp
index dd3c15bc0fc..6d11a1ec033 100644
--- a/src/gpl/src/mbff.cpp
+++ b/src/gpl/src/mbff.cpp
@@ -2178,20 +2178,129 @@ float MBFF::getLeakage(odb::dbMaster* master)
   return cell_leakage;
 }
 
+float MBFF::getInternalEnergy(odb::dbInst* inst)
+{
+  odb::dbMaster* master = inst->getMaster();
+  sta::Cell* cell = network_->dbToSta(master);
+  sta::LibertyCell* lib_cell = network_->libertyCell(cell);
+  sta::LibertyCell* corner_cell
+      = lib_cell->sceneCell(corner_, sta::MinMax::max());
+  if (!corner_cell) {
+    return 0.0;
+  }
+
+  // Sum average internal energy across all pins (CK, D, Q, SE, SI, ...).
+  // For each pin, when conditions partition the input states; we average
+  // across all groups (uniform duty assumption).  This captures the full
+  // cell energy profile -- MBFF cells share SE/SI/CK structures across
+  // bits, giving substantial savings that clock-pin-only analysis misses.
+  float total_energy = 0.0;
+  for (odb::dbITerm* iterm : inst->getITerms()) {
+    if (IsSupplyPin(iterm)) {
+      continue;
+    }
+    const sta::Pin* pin = network_->dbToSta(iterm);
+    const sta::LibertyPort* port = network_->libertyPort(pin);
+    if (!port) {
+      continue;
+    }
+    const sta::LibertyPort* scene_port
+        = port->scenePort(corner_, sta::MinMax::max());
+    if (!scene_port) {
+      continue;
+    }
+    float port_energy_sum = 0.0;
+    int group_count = 0;
+    for (const sta::InternalPower* pwr :
+         corner_cell->internalPowers(scene_port)) {
+      float energy = 0.0;
+      int rf_count = 0;
+      for (const sta::RiseFall* rf : sta::RiseFall::range()) {
+        const sta::InternalPowerModel& model = pwr->model(rf);
+        const sta::TableModel* tbl = model.model();
+        if (!tbl) {
+          continue;
+        }
+        float v1 = 0, v2 = 0;
+        if (tbl->axis1()) {
+          v1 = (tbl->axis1()->min() + tbl->axis1()->max()) / 2.0f;
+        }
+        if (tbl->axis2()) {
+          v2 = (tbl->axis2()->min() + tbl->axis2()->max()) / 2.0f;
+        }
+        energy += tbl->findValue(v1, v2, 0.0f);
+        rf_count++;
+      }
+      if (rf_count > 0) {
+        port_energy_sum += energy / rf_count;
+        group_count++;
+      }
+    }
+    if (group_count > 0) {
+      const float pin_energy = port_energy_sum / group_count;
+      total_energy += pin_energy;
+      debugPrint(log_,
+                 GPL,
+                 "mbff",
+                 2,
+                 "  pin {} groups={} energy={}",
+                 port->name(),
+                 group_count,
+                 pin_energy);
+    }
+  }
+  return total_energy;
+}
+
+float MBFF::clockActivity() const
+{
+  return (clock_period_ > 0) ? (2.0 / clock_period_) : 0.0;
+}
+
+float MBFF::getClockPeriod(odb::dbInst* ff_inst)
+{
+  float period = 0.0;
+  for (odb::dbITerm* iterm : ff_inst->getITerms()) {
+    if (IsClockPin(iterm)) {
+      const sta::Pin* sta_pin = network_->dbToSta(iterm);
+      for (const sta::Clock* clk : sta_->clocks(sta_pin, corner_->mode())) {
+        if (period == 0.0 || clk->period() < period) {
+          period = clk->period();
+        }
+      }
+      break;
+    }
+  }
+  return period;
+}
+
 void MBFF::SetVars(const std::vector<Flop>& flops)
 {
   // get min height and width
   single_bit_height_ = std::numeric_limits<float>::max();
   single_bit_width_ = std::numeric_limits<float>::max();
   single_bit_power_ = std::numeric_limits<float>::max();
+  const float activity = clockActivity();
+  std::map<dbMaster*, float> energy_cache;
   for (const Flop& flop : flops) {
     dbMaster* master = insts_[flop.idx]->getMaster();
     single_bit_height_
         = std::min(single_bit_height_, master->getHeight() / multiplier_);
     single_bit_width_
         = std::min(single_bit_width_, master->getWidth() / multiplier_);
-    const float leakage = getLeakage(insts_[flop.idx]->getMaster());
-    single_bit_power_ = std::min(single_bit_power_, leakage);
+    auto [it, inserted] = energy_cache.try_emplace(master, 0.0f);
+    if (inserted) {
+      it->second = getInternalEnergy(insts_[flop.idx]);
+    }
+    const float leakage = getLeakage(master);
+    const float total_power = leakage + it->second * activity;
+    // Select the single-bit cell with lowest total estimated power as
+    // the baseline.  Both leakage and internal energy must come from the
+    // same cell to avoid an artificially low baseline.
+    if (total_power < single_bit_power_) {
+      single_bit_power_ = total_power;
+      single_bit_master_ = master;
+    }
   }
 }
 
@@ -2202,6 +2311,18 @@ void MBFF::SetRatios(const Mask& array_mask)
   norm_power_.clear();
   norm_power_.push_back(1.00);
 
+  const float activity = clockActivity();
+
+  debugPrint(log_,
+             GPL,
+             "mbff",
+             1,
+             "mask: {} sb_cell: {} sb_power: {} clock_period: {}",
+             array_mask.to_string(),
+             single_bit_master_ ? single_bit_master_->getName() : "none",
+             single_bit_power_,
+             clock_period_);
+
   for (int i = 1; i < num_sizes_; i++) {
     norm_area_.push_back(std::numeric_limits<float>::max());
     norm_power_.push_back(std::numeric_limits<float>::max());
@@ -2210,8 +2331,24 @@ void MBFF::SetRatios(const Mask& array_mask)
       norm_area_[i] = (tray_area_[array_mask][i]
                        / (single_bit_height_ * single_bit_width_))
                       / slot_cnt;
-      norm_power_[i]
-          = (tray_power_[array_mask][i] / slot_cnt) / single_bit_power_;
+      if (single_bit_power_ > 0) {
+        const float tray_total
+            = tray_power_[array_mask][i]
+              + tray_internal_energy_[array_mask][i] * activity;
+        norm_power_[i] = (tray_total / slot_cnt) / single_bit_power_;
+        debugPrint(log_,
+                   GPL,
+                   "mbff",
+                   1,
+                   "  {}-bit {}: tray_leakage: {} tray_internal_energy: {} "
+                   "tray_total: {} norm_power: {}",
+                   slot_cnt,
+                   best_master_[array_mask][i]->getName(),
+                   tray_power_[array_mask][i],
+                   tray_internal_energy_[array_mask][i],
+                   tray_total,
+                   norm_power_[i]);
+      }
     }
   }
 }
@@ -2285,8 +2422,8 @@ void MBFF::Run(const int mx_sz, const float alpha, const float beta)
   for (int i = 0; i < num_chunks; i++) {
     dbInst* ff_inst = insts_[FFs[i].back().idx];
     const Mask array_mask = GetArrayMask(ff_inst, false);
-    // do we even have trays to cluster these flops?
-    if (best_master_[array_mask].empty()) {
+    // do we even have tray candidates to cluster these flops?
+    if (!tray_candidates_.contains(array_mask)) {
       tot_ilp += (alpha * FFs[i].size());
       tray_sizes_used_[1] += FFs[i].size();
       log_->info(GPL,
@@ -2297,6 +2434,8 @@ void MBFF::Run(const int mx_sz, const float alpha, const float beta)
       continue;
     }
     any_found = true;
+    clock_period_ = getClockPeriod(ff_inst);
+    SelectBestTrays(array_mask, clockActivity());
     SetVars(FFs[i]);
     SetRatios(array_mask);
     tot_ilp += RunClustering(FFs[i], mx_sz, alpha, beta, array_mask);
@@ -2383,105 +2522,138 @@ void MBFF::ReadLibs()
       const int idx = GetBitIdx(num_slots);
       const Mask array_mask = GetArrayMask(tmp_tray, true);
 
-      if (best_master_[array_mask].empty()) {
-        best_master_[array_mask].resize(num_sizes_, nullptr);
-        tray_area_[array_mask].resize(num_sizes_,
-                                      std::numeric_limits<float>::max());
-        tray_power_[array_mask].resize(num_sizes_,
-                                       std::numeric_limits<float>::max());
-        tray_width_[array_mask].resize(num_sizes_);
-        pin_mappings_[array_mask].resize(num_sizes_);
-
-        slot_to_tray_x_[array_mask].resize(num_sizes_);
-        slot_to_tray_y_[array_mask].resize(num_sizes_);
+      if (tray_candidates_[array_mask].empty()) {
+        tray_candidates_[array_mask].resize(num_sizes_);
       }
 
       const float cur_area = (master->getHeight() / multiplier_)
                              * (master->getWidth() / multiplier_);
       const float cur_leakage = getLeakage(tmp_tray->getMaster());
+      const float cur_internal_energy = getInternalEnergy(tmp_tray);
 
       debugPrint(log_,
                  GPL,
                  "mbff",
                  1,
-                 "Found tray {} mask: {} area: {} leakage power: {}",
+                 "Found tray {} mask: {} area: {} leakage: {} "
+                 "internal_energy: {}",
                  master->getName(),
                  array_mask.to_string(),
                  cur_area,
-                 cur_leakage);
-
-      if (std::tie(tray_power_[array_mask][idx], tray_area_[array_mask][idx])
-          > std::tie(cur_leakage, cur_area)) {
-        tray_area_[array_mask][idx] = cur_area;
-        tray_power_[array_mask][idx] = cur_leakage;
-        best_master_[array_mask][idx] = master;
-        pin_mappings_[array_mask][idx] = GetPinMapping(tmp_tray);
-        tray_width_[array_mask][idx] = master->getWidth() / multiplier_;
-
-        // save slot info
-        tmp_tray->setLocation(0, 0);
-        tmp_tray->setPlacementStatus(odb::dbPlacementStatus::PLACED);
-
-        slot_to_tray_x_[array_mask][idx].clear();
-        slot_to_tray_y_[array_mask][idx].clear();
-
-        std::vector<Point> d;
-        std::vector<Point> q;
-        std::vector<Point> qn;
-
-        for (const auto& p : pin_mappings_[array_mask][idx]) {
-          dbITerm* d_pin = tmp_tray->findITerm(p.first->name().c_str());
-          dbITerm* q_pin
-              = (p.second.q ? tmp_tray->findITerm(p.second.q->name().c_str())
-                            : nullptr);
-          dbITerm* qn_pin
-              = (p.second.qn ? tmp_tray->findITerm(p.second.qn->name().c_str())
-                             : nullptr);
-
-          d.push_back(Point{
-              d_pin->getBBox().xCenter() / multiplier_,
-              d_pin->getBBox().yCenter() / multiplier_,
+                 cur_leakage,
+                 cur_internal_energy);
+
+      // Collect slot geometry from the temporary instance.
+      tmp_tray->setLocation(0, 0);
+      tmp_tray->setPlacementStatus(odb::dbPlacementStatus::PLACED);
+
+      DataToOutputsMap pin_mapping = GetPinMapping(tmp_tray);
+
+      std::vector<Point> d;
+      std::vector<Point> q;
+      std::vector<Point> qn;
+
+      for (const auto& p : pin_mapping) {
+        dbITerm* d_pin = tmp_tray->findITerm(p.first->name().c_str());
+        dbITerm* q_pin
+            = (p.second.q ? tmp_tray->findITerm(p.second.q->name().c_str())
+                          : nullptr);
+        dbITerm* qn_pin
+            = (p.second.qn ? tmp_tray->findITerm(p.second.qn->name().c_str())
+                           : nullptr);
+
+        d.push_back(Point{
+            d_pin->getBBox().xCenter() / multiplier_,
+            d_pin->getBBox().yCenter() / multiplier_,
+        });
+
+        if (q_pin) {
+          q.push_back(Point{
+              q_pin->getBBox().xCenter() / multiplier_,
+              q_pin->getBBox().yCenter() / multiplier_,
           });
+        }
 
-          if (q_pin) {
-            q.push_back(Point{
-                q_pin->getBBox().xCenter() / multiplier_,
-                q_pin->getBBox().yCenter() / multiplier_,
-            });
-          }
-
-          if (qn_pin) {
-            qn.push_back(Point{
-                qn_pin->getBBox().xCenter() / multiplier_,
-                qn_pin->getBBox().yCenter() / multiplier_,
-            });
-          }
+        if (qn_pin) {
+          qn.push_back(Point{
+              qn_pin->getBBox().xCenter() / multiplier_,
+              qn_pin->getBBox().yCenter() / multiplier_,
+          });
         }
+      }
 
-        // slots w.r.t. bottom-left corner
-        for (int i = 0; i < num_slots; i++) {
-          if (!q.empty() && !qn.empty()) {
-            slot_to_tray_x_[array_mask][idx].push_back(
-                (std::max(d[i].x, std::max(q[i].x, qn[i].x))
-                 + std::min(d[i].x, std::min(q[i].x, qn[i].x)))
-                / 2.0);
-            slot_to_tray_y_[array_mask][idx].push_back(
-                (std::max(d[i].y, std::max(q[i].y, qn[i].y))
-                 + std::min(d[i].y, std::min(q[i].y, qn[i].y)))
-                / 2.0);
-          } else if (!q.empty()) {
-            slot_to_tray_x_[array_mask][idx].push_back(
-                (std::max(d[i].x, q[i].x) + std::min(d[i].x, q[i].x)) / 2.0);
-            slot_to_tray_y_[array_mask][idx].push_back(
-                (std::max(d[i].y, q[i].y) + std::min(d[i].y, q[i].y)) / 2.0);
-          } else {
-            slot_to_tray_x_[array_mask][idx].push_back(
-                (std::max(d[i].x, qn[i].x) + std::min(d[i].x, qn[i].x)) / 2.0);
-            slot_to_tray_y_[array_mask][idx].push_back(
-                (std::max(d[i].y, qn[i].y) + std::min(d[i].y, qn[i].y)) / 2.0);
-          }
+      std::vector<float> slot_x;
+      std::vector<float> slot_y;
+      for (int i = 0; i < num_slots; i++) {
+        if (!q.empty() && !qn.empty()) {
+          slot_x.push_back((std::max(d[i].x, std::max(q[i].x, qn[i].x))
+                            + std::min(d[i].x, std::min(q[i].x, qn[i].x)))
+                           / 2.0);
+          slot_y.push_back((std::max(d[i].y, std::max(q[i].y, qn[i].y))
+                            + std::min(d[i].y, std::min(q[i].y, qn[i].y)))
+                           / 2.0);
+        } else if (!q.empty()) {
+          slot_x.push_back((d[i].x + q[i].x) / 2.0);
+          slot_y.push_back((d[i].y + q[i].y) / 2.0);
+        } else {
+          slot_x.push_back((d[i].x + qn[i].x) / 2.0);
+          slot_y.push_back((d[i].y + qn[i].y) / 2.0);
         }
       }
+
+      tray_candidates_[array_mask][idx].push_back(
+          TrayCandidate{master,
+                        cur_area,
+                        cur_leakage,
+                        cur_internal_energy,
+                        master->getWidth() / multiplier_,
+                        std::move(pin_mapping),
+                        std::move(slot_x),
+                        std::move(slot_y)});
+    }
+  }
+}
+
+void MBFF::SelectBestTrays(const Mask& mask, const float activity)
+{
+  auto it = tray_candidates_.find(mask);
+  if (it == tray_candidates_.end()) {
+    return;
+  }
+  const auto& candidates_per_size = it->second;
+
+  best_master_[mask].assign(num_sizes_, nullptr);
+  tray_area_[mask].assign(num_sizes_, std::numeric_limits<float>::max());
+  tray_power_[mask].assign(num_sizes_, std::numeric_limits<float>::max());
+  tray_internal_energy_[mask].assign(num_sizes_, 0.0);
+  tray_width_[mask].assign(num_sizes_, 0.0f);
+  pin_mappings_[mask].assign(num_sizes_, DataToOutputsMap{});
+  slot_to_tray_x_[mask].assign(num_sizes_, {});
+  slot_to_tray_y_[mask].assign(num_sizes_, {});
+
+  for (int idx = 0; idx < num_sizes_; idx++) {
+    const TrayCandidate* best = nullptr;
+    float best_total_power = std::numeric_limits<float>::max();
+    float best_area = std::numeric_limits<float>::max();
+    for (const TrayCandidate& cand : candidates_per_size[idx]) {
+      const float cur_total_power
+          = cand.leakage + cand.internal_energy * activity;
+      if (std::tie(best_total_power, best_area)
+          > std::tie(cur_total_power, cand.area)) {
+        best = &cand;
+        best_total_power = cur_total_power;
+        best_area = cand.area;
+      }
+    }
+    if (best) {
+      best_master_[mask][idx] = best->master;
+      tray_area_[mask][idx] = best->area;
+      tray_power_[mask][idx] = best->leakage;
+      tray_internal_energy_[mask][idx] = best->internal_energy;
+      tray_width_[mask][idx] = best->width;
+      pin_mappings_[mask][idx] = best->pin_mapping;
+      slot_to_tray_x_[mask][idx] = best->slot_x;
+      slot_to_tray_y_[mask][idx] = best->slot_y;
     }
   }
 }
@@ -2597,6 +2769,8 @@ MBFF::MBFF(odb::dbDatabase* db,
       single_bit_height_(0.0),
       single_bit_width_(0.0),
       single_bit_power_(0.0),
+      clock_period_(0.0),
+      single_bit_master_(nullptr),
       test_idx_(-1)
 {
   graphics_->setDebugOn(debug_graphics);
diff --git a/src/gpl/src/mbff.h b/src/gpl/src/mbff.h
index 3279094881c..13e76eed369 100644
--- a/src/gpl/src/mbff.h
+++ b/src/gpl/src/mbff.h
@@ -94,6 +94,18 @@ class MBFF
       = std::map<const sta::LibertyPort*, FlopOutputs, sta::LibertyPortLess>;
   DataToOutputsMap GetPinMapping(odb::dbInst* tray);
 
+  struct TrayCandidate
+  {
+    odb::dbMaster* master;
+    float area;
+    float leakage;
+    float internal_energy;
+    float width;
+    DataToOutputsMap pin_mapping;
+    std::vector<float> slot_x;
+    std::vector<float> slot_y;
+  };
+
   // MBFF functions
   const sta::LibertyCell* getLibertyCell(const sta::Cell* cell);
   float GetDist(const Point& a, const Point& b);
@@ -225,12 +237,16 @@ class MBFF
   void ReadFFs();
   void ReadPaths();
   void ReadLibs();
+  void SelectBestTrays(const Mask& mask, float activity);
   void SetTrayNames();
 
   void displayFlopClusters(const char* stage,
                            std::vector<std::vector<Flop>>& clusters);
 
   float getLeakage(odb::dbMaster* master);
+  float getInternalEnergy(odb::dbInst* inst);
+  float clockActivity() const;
+  float getClockPeriod(odb::dbInst* ff_inst);
 
   // OpenROAD vars
   odb::dbDatabase* db_;
@@ -254,6 +270,8 @@ class MBFF
   float single_bit_height_;
   float single_bit_width_;
   float single_bit_power_;
+  float clock_period_;
+  odb::dbMaster* single_bit_master_;
 
   // launch-capture FF-pair vars
   std::map<std::string, int> name_to_idx_;
@@ -270,9 +288,11 @@ class MBFF
   ArrayMaskVector<DataToOutputsMap> pin_mappings_;
   ArrayMaskVector<float> tray_area_;
   ArrayMaskVector<float> tray_power_;
+  ArrayMaskVector<float> tray_internal_energy_;
   ArrayMaskVector<float> tray_width_;
   ArrayMaskVector<std::vector<float>> slot_to_tray_x_;
   ArrayMaskVector<std::vector<float>> slot_to_tray_y_;
+  ArrayMaskVector<std::vector<TrayCandidate>> tray_candidates_;
   std::vector<float> norm_area_;
   std::vector<float> norm_power_;
   std::vector<int> unused_;
diff --git a/src/gpl/test/mbff_orig_name.ok b/src/gpl/test/mbff_orig_name.ok
index 6e25ca9640e..40f414e2c39 100644
--- a/src/gpl/test/mbff_orig_name.ok
+++ b/src/gpl/test/mbff_orig_name.ok
@@ -10,14 +10,14 @@
 [INFO ODB-0131]     Created 4 components and 20 component-terminals.
 [INFO ODB-0133]     Created 9 nets and 12 connections.
 Alpha = 40.0, Beta = 1.0, #paths = 0, max size = -1
-Total ILP Cost: 97.228
+Total ILP Cost: 112.643
 Total Timing Critical Path Displacement: 0.0
-Average slot-to-flop displacement: 0.865
-Final Objective Value: 97.228
+Average slot-to-flop displacement: 1.730
+Final Objective Value: 112.643
 Sizes used
-  2-bit: 2
-Startpoint: d1 (input port clocked by clk)
-Endpoint: _tray_size2_7 (rising edge-triggered flip-flop clocked by clk)
+  4-bit: 1
+Startpoint: d3 (input port clocked by clk)
+Endpoint: _tray_size4_7 (rising edge-triggered flip-flop clocked by clk)
 Path Group: clk
 Path Type: max
 
@@ -26,14 +26,14 @@ Path Type: max
    0.00    0.00   clock clk (rise edge)
    0.00    0.00   clock network delay (ideal)
    0.00    0.00 ^ input external delay
-   0.00    0.00 ^ d1 (in)
-   0.00    0.00 ^ _tray_size2_7/D1 (DFFHQNV2Xx1_ASAP7_75t_L) ff1/D
+   0.00    0.00 ^ d3 (in)
+   0.00    0.00 ^ _tray_size4_7/D1 (DFFHQNV4Xx1_ASAP7_75t_L) ff3/D
            0.00   data arrival time
 
 1000.00 1000.00   clock clk (rise edge)
    0.00 1000.00   clock network delay (ideal)
    0.00 1000.00   clock reconvergence pessimism
-        1000.00 ^ _tray_size2_7/CLK (DFFHQNV2Xx1_ASAP7_75t_L)
+        1000.00 ^ _tray_size4_7/CLK (DFFHQNV4Xx1_ASAP7_75t_L)
  -22.99  977.01   library setup time
          977.01   data required time
 ---------------------------------------------------------------------------------------------
diff --git a/src/gpl/test/mbff_orig_name.tcl b/src/gpl/test/mbff_orig_name.tcl
index 6b3957b1645..69d7969da32 100644
--- a/src/gpl/test/mbff_orig_name.tcl
+++ b/src/gpl/test/mbff_orig_name.tcl
@@ -26,4 +26,4 @@ cluster_flops -tray_weight 40.0 \
 
 # Report timing to verify original FF names appear in the path report.
 # After clustering the tray pin descriptions should show in the Orig Name column.
-report_checks -path_delay max -fields {orig_name} -through [get_pins _tray_size2_7/D1]
+report_checks -path_delay max -fields {orig_name} -through [get_pins _tray_size4_7/D1]