[llvm-exegesis] Clustering: don't enqueue a point multiple times

Summary: SetVector uses both DenseSet and vector, which is time/memory inefficient. The points are represented as natural numbers so we can replace the DenseSet part by indexing into a vector<char> instead. Don't cargo cult the pseudocode on the wikipedia DBSCAN page. This is a standard BFS style algorithm (the similar loops have been used several times in other LLVM components): every point is processed at most once, thus the queue has at most NumPoints elements. We represent it with a vector and allocate it outside of the loop to avoid allocation in the loop body. We check `Processed[P]` to avoid enqueueing a point more than once, which also nicely saves us a `ClusterIdForPoint_[Q].isUndef()` check. Many people hate the oneshot abstraction but some favor it, therefore we make a compromise, use a lambda to abstract away the neighbor adding process. Delete the comment `assert(Neighbors.capacity() == (Points_.size() - 1));` as it is wrong.
author: Fangrui Song <maskray@google.com> 2018-12-23 20:48:52 +0000
committer: Fangrui Song <maskray@google.com> 2018-12-23 20:48:52 +0000
commit: 67cc0b45fcacba34f64b61babf41d865f0d5a26a (patch)
tree: 47f6154177669edb0e74fa2e1c64e527cde0ca13
parent: 3e54333a70ff0c671803e886626278877674207b (diff)
1 files changed, 35 insertions, 33 deletions
diff --git a/llvm/tools/llvm-exegesis/lib/Clustering.cpp b/llvm/tools/llvm-exegesis/lib/Clustering.cpp
index b2cd97c12eb..56b1a939c41 100644
--- a/llvm/tools/llvm-exegesis/lib/Clustering.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Clustering.cpp
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Clustering.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include <string>
 
@@ -92,8 +91,14 @@ llvm::Error InstructionBenchmarkClustering::validateAndSetup() {
 }
 
 void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {
-  std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs.
-  for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
+  const size_t NumPoints = Points_.size();
+
+  // Persistent buffers to avoid allocs.
+  std::vector<size_t> Neighbors;
+  std::vector<size_t> ToProcess(NumPoints);
+  std::vector<char> Processed(NumPoints);
+
+  for (size_t P = 0; P < NumPoints; ++P) {
     if (!ClusterIdForPoint_[P].isUndef())
       continue; // Previously processed in inner loop.
     rangeQuery(P, Neighbors);
@@ -109,43 +114,40 @@ void InstructionBenchmarkClustering::dbScan(const size_t MinPts) {
     Cluster &CurrentCluster = Clusters_.back();
     ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */
     CurrentCluster.PointIndices.push_back(P);
+    Processed[P] = 1;
 
-    // Process P's neighbors.
-    llvm::SetVector<size_t, std::deque<size_t>> ToProcess;
-    ToProcess.insert(Neighbors.begin(), Neighbors.end());
-    while (!ToProcess.empty()) {
-      // Retrieve a point from the set.
-      const size_t Q = *ToProcess.begin();
-      ToProcess.erase(ToProcess.begin());
-
-      if (ClusterIdForPoint_[Q].isNoise()) {
-        // Change noise point to border point.
-        ClusterIdForPoint_[Q] = CurrentCluster.Id;
-        CurrentCluster.PointIndices.push_back(Q);
+    // Enqueue P's neighbors.
+    size_t Tail = 0;
+    auto EnqueueUnprocessed = [&](const std::vector<size_t> &Neighbors) {
+      for (size_t Q : Neighbors)
+        if (!Processed[Q]) {
+          ToProcess[Tail++] = Q;
+          Processed[Q] = 1;
+        }
+    };
+    EnqueueUnprocessed(Neighbors);
+
+    for (size_t Head = 0; Head < Tail; ++Head) {
+      // Retrieve a point from the queue and add it to the current cluster.
+      P = ToProcess[Head];
+      ClusterId OldCID = ClusterIdForPoint_[P];
+      ClusterIdForPoint_[P] = CurrentCluster.Id;
+      CurrentCluster.PointIndices.push_back(P);
+      if (OldCID.isNoise())
         continue;
-      }
-      if (!ClusterIdForPoint_[Q].isUndef()) {
-        continue; // Previously processed.
-      }
-      // Add Q to the current custer.
-      ClusterIdForPoint_[Q] = CurrentCluster.Id;
-      CurrentCluster.PointIndices.push_back(Q);
-      // And extend to the neighbors of Q if the region is dense enough.
-      rangeQuery(Q, Neighbors);
-      if (Neighbors.size() + 1 >= MinPts) {
-        ToProcess.insert(Neighbors.begin(), Neighbors.end());
-      }
+      assert(OldCID.isUndef());
+
+      // And extend to the neighbors of P if the region is dense enough.
+      rangeQuery(P, Neighbors);
+      if (Neighbors.size() + 1 >= MinPts)
+        EnqueueUnprocessed(Neighbors);
     }
   }
-  // assert(Neighbors.capacity() == (Points_.size() - 1));
-  // ^ True, but it is not quaranteed to be true in all the cases.
 
   // Add noisy points to noise cluster.
-  for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
-    if (ClusterIdForPoint_[P].isNoise()) {
+  for (size_t P = 0; P < NumPoints; ++P)
+    if (ClusterIdForPoint_[P].isNoise())
       NoiseCluster_.PointIndices.push_back(P);
-    }
-  }
 }
 
 llvm::Expected<InstructionBenchmarkClustering>
author	Fangrui Song <maskray@google.com>	2018-12-23 20:48:52 +0000
committer	Fangrui Song <maskray@google.com>	2018-12-23 20:48:52 +0000
commit	67cc0b45fcacba34f64b61babf41d865f0d5a26a (patch)
tree	47f6154177669edb0e74fa2e1c64e527cde0ca13
parent	3e54333a70ff0c671803e886626278877674207b (diff)