From 2f70e05db6223fbe45e9a00a31cb1d01f887dab5 Mon Sep 17 00:00:00 2001 From: Horst Schirmeier Date: Wed, 14 Jan 2015 23:37:18 +0100 Subject: [PATCH] util: rename SumTree::get -> remove, add r/o get SumTree::get now non-intrusively picks an element and returns a reference to it, SumTree::remove removes and returns a copy. The former is needed for sampling with replacement. Change-Id: Iefef2fdf0b7df6ea7a9949f2588528ec9e86bb7a --- src/core/util/SumTree.hpp | 60 +++++++++++++++++++++++---- src/core/util/testing/SumTreeTest.cc | 2 +- tools/prune-trace/FESamplingPruner.cc | 4 +- 3 files changed, 54 insertions(+), 12 deletions(-) diff --git a/src/core/util/SumTree.hpp b/src/core/util/SumTree.hpp index 337e7db8..9d50269b 100644 --- a/src/core/util/SumTree.hpp +++ b/src/core/util/SumTree.hpp @@ -7,7 +7,7 @@ // The SumTree implements an efficient tree data structure for // "roulette-wheel" sampling, or "sampling with fault expansion", i.e., -// sampling of trace entries / pilots without replacement and with a +// sampling of trace entries / pilots with/without replacement and with a // picking probability proportional to the entries' sizes. // // For every sample, the naive approach picks a random number between 0 @@ -24,9 +24,9 @@ // // Note that the current implementation is built for a pure growth phase // (when the tree gets filled with pilots from the database), followed by -// a sampling phase when the tree gets emptied. It does not handle a -// mixed add/remove case very smartly, although it should remain -// functional. +// a sampling phase when the tree gets sampled from (with replacement) or +// emptied (without replacement). It does not handle a mixed add/remove case +// very smartly, although it should remain functional. namespace fail { @@ -53,15 +53,19 @@ public: ~SumTree() { delete m_root; } //! Adds a copy of a new element to the tree. The copy is created internally. void add(const T& element); - //! Retrieves (and removes) element at random number position. - T get(typename T::size_type pos) { return get(pos, m_root, 0); } + //! Retrieves and removes element at random number position. + T remove(typename T::size_type pos) { return remove(pos, m_root, 0); } + //! Retrieves reference to element at random number position. + T& get(typename T::size_type pos) { return get(pos, m_root, 0); } //! Yields the sum over all elements in the tree. typename T::size_type get_size() const { return m_root->size; } private: //! Internal, recursive version of add(). bool add(Bucket **node, const T& element, unsigned depth_remaining); + //! Internal, recursive version of remove(). + T remove(typename T::size_type pos, Bucket *node, typename T::size_type sum); //! Internal, recursive version of get(). - T get(typename T::size_type pos, Bucket *node, typename T::size_type sum); + T& get(typename T::size_type pos, Bucket *node, typename T::size_type sum); }; // template implementation @@ -137,7 +141,7 @@ bool SumTree::add(Bucket **node, const T& element, unsigned depth } template -T SumTree::get(typename T::size_type pos, Bucket *node, typename T::size_type sum) +T SumTree::remove(typename T::size_type pos, Bucket *node, typename T::size_type sum) { // sanity check assert(pos >= sum && pos < sum + node->size); @@ -153,7 +157,7 @@ T SumTree::get(typename T::size_type pos, Bucket *node, typename // found containing bucket, recurse sum -= (*it)->size; - T e = get(pos, *it, sum); + T e = remove(pos, *it, sum); node->size -= e.size(); // remove empty (or, at least, zero-sized) child? if ((*it)->size == 0) { @@ -184,6 +188,44 @@ T SumTree::get(typename T::size_type pos, Bucket *node, typename return T(); } +template +T& SumTree::get(typename T::size_type pos, Bucket *node, typename T::size_type sum) +{ + // sanity check + assert(pos >= sum && pos < sum + node->size); + + // will only be entered for inner nodes + for (typename std::vector::iterator it = node->children.begin(); + it != node->children.end(); ) { + sum += (*it)->size; + if (sum <= pos) { + ++it; + continue; + } + + // found containing bucket, recurse + sum -= (*it)->size; + return get(pos, *it, sum); + } + + // will only be entered for leaf nodes + for (typename std::vector::iterator it = node->elements.begin(); + it != node->elements.end(); ) { + sum += it->size(); + if (sum <= pos) { + ++it; + continue; + } + + // found pilot + return *it; + } + + // this should never happen + assert(0); + return *(new T); +} + } // namespace #endif diff --git a/src/core/util/testing/SumTreeTest.cc b/src/core/util/testing/SumTreeTest.cc index 1757cd17..9824a034 100644 --- a/src/core/util/testing/SumTreeTest.cc +++ b/src/core/util/testing/SumTreeTest.cc @@ -28,7 +28,7 @@ int main() uint64_t pos = tree.get_size() / 2; LOG << "MAIN tree.get_size() = " << tree.get_size() << ", trying to retrieve pos = " << pos << endl; - Pilot p = tree.get(pos); + Pilot p = tree.remove(pos); LOG << "MAIN retrieved pilot with duration " << p.duration << endl; } } diff --git a/tools/prune-trace/FESamplingPruner.cc b/tools/prune-trace/FESamplingPruner.cc index cebd7292..e8f0710f 100644 --- a/tools/prune-trace/FESamplingPruner.cc +++ b/tools/prune-trace/FESamplingPruner.cc @@ -161,7 +161,7 @@ bool FESamplingPruner::sampling_prune(const fail::Database::Variant& variant) for (uint64_t i = 0; i < samplerows; ++i) { uint64_t pos = my_rand(pop.get_size() - 1); - Pilot p = pop.get(pos); + Pilot p = pop.remove(pos); ss << "(0," << variant.id << "," << p.instr2 << "," << p.instr2 << "," << p.instr2_absolute << "," << p.data_address << ",1," << m_method_id << ")"; @@ -203,7 +203,7 @@ bool FESamplingPruner::sampling_prune(const fail::Database::Variant& variant) for (uint64_t i = 0; i < samplerows; ++i) { uint64_t pos = my_rand(pop.get_size() - 1); - Pilot p = pop.get(pos); + Pilot p = pop.remove(pos); ss << "(" << variant.id << "," << p.instr2 << "," << p.data_address << "," << m_method_id << "," << p.id << ")";