diff options
author | Fangrui Song <maskray@google.com> | 2018-12-23 20:48:52 +0000 |
---|---|---|
committer | Fangrui Song <maskray@google.com> | 2018-12-23 20:48:52 +0000 |
commit | 67cc0b45fcacba34f64b61babf41d865f0d5a26a (patch) | |
tree | 47f6154177669edb0e74fa2e1c64e527cde0ca13 | |
parent | 3e54333a70ff0c671803e886626278877674207b (diff) |
[llvm-exegesis] Clustering: don't enqueue a point multiple times
Summary:
SetVector uses both DenseSet and vector, which is time/memory inefficient. The points are represented as natural numbers so we can replace the DenseSet part by indexing into a vector<char> instead.
Don't cargo cult the pseudocode on the wikipedia DBSCAN page. This is a standard BFS style algorithm (the similar loops have been used several times in other LLVM components): every point is processed at most once, thus the queue has at most NumPoints elements. We represent it with a vector and allocate it outside of the loop to avoid allocation in the loop body.
We check `Processed[P]` to avoid enqueueing a point more than once, which also nicely saves us a `ClusterIdForPoint_[Q].isUndef()` check.
Many people hate the oneshot abstraction but some favor it, therefore we make a compromise, use a lambda to abstract away the neighbor adding process.
Delete the comment `assert(Neighbors.capacity() == (Points_.size() - 1));` as it is wrong.
-rw-r--r-- | llvm/tools/llvm-exegesis/lib/Clustering.cpp | 68 |
1 files changed, 35 insertions, 33 deletions
diff --git a/llvm/tools/llvm-exegesis/lib/Clustering.cpp b/llvm/tools/llvm-exegesis/lib/Clustering.cpp index b2cd97c12eb..56b1a939c41 100644 --- a/llvm/tools/llvm-exegesis/lib/Clustering.cpp +++ b/llvm/tools/llvm-exegesis/lib/Clustering.cpp @@ -8,7 +8,6 @@ //===----------------------------------------------------------------------===// #include "Clustering.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include <string> @@ -92,8 +91,14 @@ llvm::Error InstructionBenchmarkClustering::validateAndSetup() { } void InstructionBenchmarkClustering::dbScan(const size_t MinPts) { - std::vector<size_t> Neighbors; // Persistent buffer to avoid allocs. - for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { + const size_t NumPoints = Points_.size(); + + // Persistent buffers to avoid allocs. + std::vector<size_t> Neighbors; + std::vector<size_t> ToProcess(NumPoints); + std::vector<char> Processed(NumPoints); + + for (size_t P = 0; P < NumPoints; ++P) { if (!ClusterIdForPoint_[P].isUndef()) continue; // Previously processed in inner loop. rangeQuery(P, Neighbors); @@ -109,43 +114,40 @@ void InstructionBenchmarkClustering::dbScan(const size_t MinPts) { Cluster &CurrentCluster = Clusters_.back(); ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */ CurrentCluster.PointIndices.push_back(P); + Processed[P] = 1; - // Process P's neighbors. - llvm::SetVector<size_t, std::deque<size_t>> ToProcess; - ToProcess.insert(Neighbors.begin(), Neighbors.end()); - while (!ToProcess.empty()) { - // Retrieve a point from the set. - const size_t Q = *ToProcess.begin(); - ToProcess.erase(ToProcess.begin()); - - if (ClusterIdForPoint_[Q].isNoise()) { - // Change noise point to border point. - ClusterIdForPoint_[Q] = CurrentCluster.Id; - CurrentCluster.PointIndices.push_back(Q); + // Enqueue P's neighbors. + size_t Tail = 0; + auto EnqueueUnprocessed = [&](const std::vector<size_t> &Neighbors) { + for (size_t Q : Neighbors) + if (!Processed[Q]) { + ToProcess[Tail++] = Q; + Processed[Q] = 1; + } + }; + EnqueueUnprocessed(Neighbors); + + for (size_t Head = 0; Head < Tail; ++Head) { + // Retrieve a point from the queue and add it to the current cluster. + P = ToProcess[Head]; + ClusterId OldCID = ClusterIdForPoint_[P]; + ClusterIdForPoint_[P] = CurrentCluster.Id; + CurrentCluster.PointIndices.push_back(P); + if (OldCID.isNoise()) continue; - } - if (!ClusterIdForPoint_[Q].isUndef()) { - continue; // Previously processed. - } - // Add Q to the current custer. - ClusterIdForPoint_[Q] = CurrentCluster.Id; - CurrentCluster.PointIndices.push_back(Q); - // And extend to the neighbors of Q if the region is dense enough. - rangeQuery(Q, Neighbors); - if (Neighbors.size() + 1 >= MinPts) { - ToProcess.insert(Neighbors.begin(), Neighbors.end()); - } + assert(OldCID.isUndef()); + + // And extend to the neighbors of P if the region is dense enough. + rangeQuery(P, Neighbors); + if (Neighbors.size() + 1 >= MinPts) + EnqueueUnprocessed(Neighbors); } } - // assert(Neighbors.capacity() == (Points_.size() - 1)); - // ^ True, but it is not quaranteed to be true in all the cases. // Add noisy points to noise cluster. - for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) { - if (ClusterIdForPoint_[P].isNoise()) { + for (size_t P = 0; P < NumPoints; ++P) + if (ClusterIdForPoint_[P].isNoise()) NoiseCluster_.PointIndices.push_back(P); - } - } } llvm::Expected<InstructionBenchmarkClustering> |