From c4e516e92f936616a015de4877fabfb352cc7c58 Mon Sep 17 00:00:00 2001 From: Ethan Mahintorabi Date: Tue, 7 Apr 2026 23:47:47 +0000 Subject: [PATCH] BFS Atomic Counter Task Signed-off-by: Ethan Mahintorabi --- dcalc/GraphDelayCalc.cc | 20 ++-- include/sta/Bfs.hh | 33 ++++++ include/sta/GraphDelayCalc.hh | 3 +- search/Bfs.cc | 209 ++++++++++++++++++++++++++++++++++ 4 files changed, 253 insertions(+), 12 deletions(-) diff --git a/dcalc/GraphDelayCalc.cc b/dcalc/GraphDelayCalc.cc index f8984474..67eeeef6 100644 --- a/dcalc/GraphDelayCalc.cc +++ b/dcalc/GraphDelayCalc.cc @@ -28,6 +28,7 @@ #include #include #include +#include #include "ContainerHelpers.hh" #include "Debug.hh" @@ -150,7 +151,7 @@ GraphDelayCalc::GraphDelayCalc(StaState *sta) : invalid_delays_(makeVertexSet(this)), search_pred_(new DcalcPred(sta)), search_non_latch_pred_(new DcalcNonLatchPred(sta)), - iter_(new BfsFwdIterator(BfsIndex::dcalc, search_non_latch_pred_, sta)), + iter_(new BfsFwdInDegreeIterator(BfsIndex::dcalc, search_non_latch_pred_, sta)), incremental_delay_tolerance_(0.0) { } @@ -343,20 +344,17 @@ GraphDelayCalc::findDelays(Level level) int dcalc_count = 0; debugPrint(debug_, "delay_calc", 1, "find delays to level {}", level); if (!delays_seeded_) { - iter_->clear(); - seedRootSlews(); + iter_->computeInDegrees(); delays_seeded_ = true; } - else - iter_->ensureSize(); - if (incremental_) - seedInvalidDelays(); - - if (!iter_->empty()) { - FindVertexDelays visitor(this); - dcalc_count += iter_->visitParallel(level, &visitor); + else if (incremental_) { + iter_->computeInDegrees(invalid_delays_); + invalid_delays_.clear(); } + FindVertexDelays visitor(this); + dcalc_count += iter_->visitParallel(level, &visitor); + // Timing checks require slews at both ends of the arc, // so find their delays after all slews are known. for (Edge *check_edge : invalid_check_edges_) diff --git a/include/sta/Bfs.hh b/include/sta/Bfs.hh index 162bd9a8..10afbad1 100644 --- a/include/sta/Bfs.hh +++ b/include/sta/Bfs.hh @@ -26,6 +26,8 @@ #include #include +#include +#include #include "Iterator.hh" #include "GraphClass.hh" @@ -168,4 +170,35 @@ protected: void incrLevel(Level &level) const override; }; +class BfsFwdInDegreeIterator : public StaState +{ +public: + BfsFwdInDegreeIterator(BfsIndex bfs_index, + SearchPred *search_pred, + StaState *sta); + virtual ~BfsFwdInDegreeIterator(); + + void computeInDegrees(); + void computeInDegrees(const VertexSet &invalid_delays); + int visitParallel(Level to_level, VertexVisitor *visitor); + void clear(); + void enqueue(Vertex *vertex); + void remove(Vertex *) {} + void deleteVertexBefore(Vertex *) {} + void enqueueAdjacentVertices(Vertex *vertex); + +protected: + std::vector visitors_; + + BfsIndex bfs_index_; + SearchPred *search_pred_; + std::unique_ptr[]> in_degrees_; + size_t in_degrees_size_; + std::vector roots_; + std::mutex roots_lock_; + std::atomic *visit_count_; + std::mutex mutex_; + std::set processed_edges_; +}; + } // namespace diff --git a/include/sta/GraphDelayCalc.hh b/include/sta/GraphDelayCalc.hh index 85d198c6..3772fa38 100644 --- a/include/sta/GraphDelayCalc.hh +++ b/include/sta/GraphDelayCalc.hh @@ -43,6 +43,7 @@ class MultiDrvrNet; class FindVertexDelays; class NetCaps; class SearchPred; +class BfsFwdInDegreeIterator; using MultiDrvrNetMap = std::map; using DrvrLoadSlews = std::vector; @@ -308,7 +309,7 @@ protected: std::mutex invalid_edge_lock_; SearchPred *search_pred_; SearchPred *search_non_latch_pred_; - BfsFwdIterator *iter_; + BfsFwdInDegreeIterator *iter_; MultiDrvrNetMap multi_drvr_net_map_; std::mutex multi_drvr_lock_; // Percentage (0.0:1.0) change in delay that causes downstream diff --git a/search/Bfs.cc b/search/Bfs.cc index 4cc7f769..09099dd9 100644 --- a/search/Bfs.cc +++ b/search/Bfs.cc @@ -24,6 +24,8 @@ // This notice may not be removed or altered from any source distribution. #include "Bfs.hh" +#include +#include #include "Report.hh" #include "Debug.hh" @@ -486,4 +488,211 @@ BfsBkwdIterator::enqueueAdjacentVertices(Vertex *vertex, } } +thread_local int current_thread_id = 0; + +BfsFwdInDegreeIterator::BfsFwdInDegreeIterator(BfsIndex bfs_index, + SearchPred *search_pred, + StaState *sta) : + StaState(sta), + bfs_index_(bfs_index), + search_pred_(search_pred) +{ +} + +BfsFwdInDegreeIterator::~BfsFwdInDegreeIterator() +{ +} + +void BfsFwdInDegreeIterator::clear() +{ + in_degrees_.reset(); + in_degrees_size_ = 0; + roots_.clear(); +} + +void BfsFwdInDegreeIterator::computeInDegrees() +{ + size_t vertex_count = graph_->vertexCount(); + in_degrees_ = std::make_unique[]>(vertex_count + 1); + in_degrees_size_ = vertex_count + 1; + for (size_t i = 0; i < in_degrees_size_; i++) { + in_degrees_[i].store(0, std::memory_order_relaxed); + } + roots_.clear(); + processed_edges_.clear(); + + VertexIterator vertex_iter(graph_); + while (vertex_iter.hasNext()) { + Vertex *vertex = vertex_iter.next(); + vertex->setVisited(false); + std::set counted_successors; + VertexOutEdgeIterator edge_iter(vertex, graph_); + while (edge_iter.hasNext()) { + Edge *edge = edge_iter.next(); + Vertex *to_vertex = edge->to(graph_); + if (search_pred_->searchThru(edge)) { + if (counted_successors.insert(to_vertex).second) { + in_degrees_[to_vertex->objectIdx()].fetch_add(1, std::memory_order_relaxed); + } + } + } + } + + + + VertexIterator vertex_iter2(graph_); + while (vertex_iter2.hasNext()) { + Vertex *vertex = vertex_iter2.next(); + if (search_pred_->searchFrom(vertex)) { + if (in_degrees_[vertex->objectIdx()].load(std::memory_order_relaxed) == 0) { + roots_.push_back(vertex); + } + } + } +} + +void BfsFwdInDegreeIterator::computeInDegrees(const VertexSet &invalid_delays) +{ + // For incremental, we do a reachability pass to find the affected subgraph. + // Then we compute in-degrees within that subgraph. + + // 1. Find reachable subgraph from invalid_delays. + std::set reachable; + std::vector work_list; + for (Vertex *v : invalid_delays) { + work_list.push_back(v); + reachable.insert(v); + } + + size_t idx = 0; + while (idx < work_list.size()) { + Vertex *v = work_list[idx++]; + VertexOutEdgeIterator edge_iter(v, graph_); + while (edge_iter.hasNext()) { + Edge *edge = edge_iter.next(); + Vertex *to_vertex = edge->to(graph_); + if (search_pred_->searchThru(edge)) { + if (reachable.insert(to_vertex).second) { + work_list.push_back(to_vertex); + } + } + } + } + + // 2. Compute in-degrees within the reachable subgraph. + size_t vertex_count = graph_->vertexCount(); + in_degrees_ = std::make_unique[]>(vertex_count + 1); + in_degrees_size_ = vertex_count + 1; + for (size_t i = 0; i < in_degrees_size_; i++) { + in_degrees_[i].store(0, std::memory_order_relaxed); + } + roots_.clear(); + + for (Vertex *v : reachable) { + VertexOutEdgeIterator edge_iter(v, graph_); + while (edge_iter.hasNext()) { + Edge *edge = edge_iter.next(); + Vertex *to_vertex = edge->to(graph_); + if (search_pred_->searchThru(edge)) { + if (reachable.count(to_vertex)) { + in_degrees_[to_vertex->objectIdx()].fetch_add(1, std::memory_order_relaxed); + } + } + } + } + + // 3. Find roots within the reachable subgraph. + for (Vertex *v : reachable) { + if (in_degrees_[v->objectIdx()].load(std::memory_order_relaxed) == 0) { + roots_.push_back(v); + } + } +} + +void BfsFwdInDegreeIterator::enqueue(Vertex *vertex) +{ + visitors_[current_thread_id]->visit(vertex); + visit_count_->fetch_add(1, std::memory_order_relaxed); + enqueueAdjacentVertices(vertex); +} + +void BfsFwdInDegreeIterator::enqueueAdjacentVertices(Vertex *vertex) +{ + VertexOutEdgeIterator edge_iter(vertex, graph_); + while (edge_iter.hasNext()) { + Edge *edge = edge_iter.next(); + Vertex *to_vertex = edge->to(graph_); + if (search_pred_->searchThru(edge)) { + if (!to_vertex->visited()) { + bool inserted = false; + { + std::lock_guard lock(mutex_); + inserted = processed_edges_.insert(edge).second; + } + if (inserted) { + int old_deg = in_degrees_[to_vertex->objectIdx()].fetch_sub(1, std::memory_order_acq_rel); + if (old_deg == 1) { + to_vertex->setVisited(true); + if (dispatch_queue_) { + dispatch_queue_->dispatch([this, to_vertex](size_t tid) { + current_thread_id = tid; + visitors_[tid]->visit(to_vertex); + visit_count_->fetch_add(1, std::memory_order_relaxed); + enqueueAdjacentVertices(to_vertex); + }); + } else { + current_thread_id = 0; + visitors_[0]->visit(to_vertex); + visit_count_->fetch_add(1, std::memory_order_relaxed); + enqueueAdjacentVertices(to_vertex); + } + } + } + } + } + } +} + +int BfsFwdInDegreeIterator::visitParallel(Level to_level, VertexVisitor *visitor) +{ + size_t thread_count = dispatch_queue_ ? dispatch_queue_->getThreadCount() : 1; + visitors_.clear(); + if (dispatch_queue_) { + for (size_t k = 0; k < thread_count; k++) + visitors_.push_back(visitor->copy()); + } else { + visitors_.push_back(visitor); + } + + std::atomic visit_count(0); + visit_count_ = &visit_count; + + for (Vertex *root : roots_) { + if (dispatch_queue_) { + dispatch_queue_->dispatch([this, root](size_t tid) { + current_thread_id = tid; + visitors_[tid]->visit(root); + visit_count_->fetch_add(1, std::memory_order_relaxed); + enqueueAdjacentVertices(root); + }); + } else { + current_thread_id = 0; + visitors_[0]->visit(root); + visit_count_->fetch_add(1, std::memory_order_relaxed); + enqueueAdjacentVertices(root); + } + } + + if (dispatch_queue_) + dispatch_queue_->finishTasks(); + + if (dispatch_queue_) { + for (VertexVisitor *v : visitors_) + delete v; + } + visitors_.clear(); + + return visit_count.load(std::memory_order_relaxed); +} + } // namespace sta