Merge commit '0da8ba0dec111d78292455bb5f17c6045820db25'

2014-08-28 12:04:34 +02:00
parent e9db4ee4e7 0da8ba0dec
commit 84cfa2ae42
13 changed files with 585 additions and 51 deletions
--- a/src/core/util/BlackholeLogger.hpp
+++ b/src/core/util/BlackholeLogger.hpp
@ -0,0 +1,25 @@
+#ifndef __BLACKHOLE_LOGGER_HPP__
+#define __BLACKHOLE_LOGGER_HPP__
+
+#include <iostream>
+
+namespace fail {
+
+/**
+ * \class BlackholeLogger
+ * A /dev/null sink as a drop-in replacement for Logger.  Should be completely
+ * optimized away on non-trivial optimization levels.
+ */
+class BlackholeLogger {
+public:
+	Logger(const std::string& description = "Fail*", bool show_time = true,
+		   std::ostream& dest = std::cout) { }
+	void setDescription(const std::string& descr) { }
+	void showTime(bool choice) { }
+	template<class T>
+	inline std::ostream& operator <<(const T& v) { }
+};
+
+} // end-of-namespace: fail
+
+#endif // __BLACKHOLE_LOGGER_HPP__
--- a/src/core/util/CMakeLists.txt
+++ b/src/core/util/CMakeLists.txt
@ -89,3 +89,6 @@ endif (BUILD_LLVM_DISASSEMBLER)
 add_executable(memorymap-test testing/memorymap-test.cc)
 target_link_libraries(memorymap-test fail-util)
 add_test(NAME memorymap-test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/testing COMMAND memorymap-test)
+
+add_executable(sumtree-test testing/SumTreeTest.cc)
+add_test(NAME sumtree-test WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/testing COMMAND sumtree-test)
--- a/src/core/util/Database.cc
+++ b/src/core/util/Database.cc
@ -141,34 +141,81 @@ my_ulonglong Database::insert_id()
 	return mysql_insert_id(handle);
 }

-std::vector<Database::Variant> Database::get_variants(const std::string &variant, const std::string &benchmark) {
-	std::vector<Variant> result;
-
+bool Database::create_variants_table()
+{
 	if (!query("CREATE TABLE IF NOT EXISTS variant ("
-		  "	 id int(11) NOT NULL AUTO_INCREMENT,"
-		  "	 variant varchar(255) NOT NULL,"
-		  "	 benchmark varchar(255) NOT NULL,"
-		  "	 PRIMARY KEY (id),"
-		  "UNIQUE KEY variant (variant,benchmark)) ENGINE=MyISAM")) {
+		"      id int(11) NOT NULL AUTO_INCREMENT,"
+		"      variant varchar(255) NOT NULL,"
+		"      benchmark varchar(255) NOT NULL,"
+		"      PRIMARY KEY (id),"
+		"UNIQUE KEY variant (variant,benchmark)) ENGINE=MyISAM")) {
+		return false;
+	}
+	return true;
+}
+
+std::vector<Database::Variant> Database::get_variants(const std::string &variant, const std::string &benchmark)
+{
+	std::vector<std::string> variants;
+	variants.push_back(variant);
+	std::vector<std::string> benchmarks;
+	benchmarks.push_back(benchmark);
+	std::vector<std::string> dummy;
+
+	return get_variants(variants, dummy, benchmarks, dummy);
+}
+
+std::vector<Database::Variant> Database::get_variants(
+	const std::vector<std::string>& variants,
+	const std::vector<std::string>& variants_exclude,
+	const std::vector<std::string>& benchmarks,
+	const std::vector<std::string>& benchmarks_exclude)
+{
+	std::vector<Variant> result;
+	std::stringstream ss;
+
+	// make sure variant table exists
+	if (!create_variants_table()) {
 		return result;
 	}

-	std::stringstream ss;
-	// FIXME SQL injection possible
-	ss << "SELECT id, variant, benchmark FROM variant WHERE variant LIKE '" << variant << "' AND benchmark LIKE '" << benchmark << "'";
-	MYSQL_RES *variant_id_res = query(ss.str().c_str(), true);
+	// FIXME string escaping
+	ss << "SELECT id, variant, benchmark FROM variant WHERE ";
+	ss << "(";
+	for (std::vector<std::string>::const_iterator it = variants.begin();
+	     it != variants.end(); ++it) {
+		ss << "variant LIKE '" << *it << "' OR ";
+	}
+	ss << "0) AND (";
+	for (std::vector<std::string>::const_iterator it = benchmarks.begin();
+	     it != benchmarks.end(); ++it) {
+		ss << "benchmark LIKE '" << *it << "' OR ";
+	}
+	// dummy terminator to avoid special cases in query construction above
+	ss << "0) AND NOT (";
+	for (std::vector<std::string>::const_iterator it = variants_exclude.begin();
+	     it != variants_exclude.end(); ++it) {
+		ss << "variant LIKE '" << *it << "' OR ";
+	}
+	for (std::vector<std::string>::const_iterator it = benchmarks_exclude.begin();
+	     it != benchmarks_exclude.end(); ++it) {
+		ss << "benchmark LIKE '" << *it << "' OR ";
+	}
+	// dummy terminator to avoid special cases in query construction above
+	ss << "0)";

+	MYSQL_RES *variant_id_res = query(ss.str().c_str(), true);
 	if (!variant_id_res) {
 		return result;
-	} else if (mysql_num_rows(variant_id_res)) {
-		for (unsigned int i = 0; i < mysql_num_rows(variant_id_res); ++i) {
-			MYSQL_ROW row = mysql_fetch_row(variant_id_res);
-			Variant var;
-			var.id = atoi(row[0]);
-			var.variant = std::string(row[1]);
-			var.benchmark = std::string(row[2]);
-			result.push_back(var);
-		}
+	}
+
+	MYSQL_ROW row;
+	while ((row = mysql_fetch_row(variant_id_res))) {
+		Variant var;
+		var.id = atoi(row[0]);
+		var.variant = row[1];
+		var.benchmark = row[2];
+		result.push_back(var);
 	}

 	return result;
--- a/src/core/util/Database.hpp
+++ b/src/core/util/Database.hpp
@ -54,6 +54,16 @@ namespace fail {
 		 */
 		std::vector<Variant> get_variants(const std::string &variant, const std::string &benchmark);

+		/**
+		 * Get all variants that fit one of the variant, one of the benchmark,
+		 * and none of the variant/benchmark exclude patterns (will be queried
+		 * with SQL LIKE).
+		 */
+		std::vector<Variant> get_variants(
+			const std::vector<std::string>& variants,
+			const std::vector<std::string>& variants_exclude,
+			const std::vector<std::string>& benchmarks,
+			const std::vector<std::string>& benchmarks_exclude);

 		/**
 		 * Get the fault space pruning method id for a specific
@ -113,6 +123,9 @@ namespace fail {
 		 */
 		static void cmdline_setup();
 		static Database * cmdline_connect();
+
+	private:
+		bool create_variants_table();
 	};

 }
--- a/src/core/util/SumTree.hpp
+++ b/src/core/util/SumTree.hpp
@ -0,0 +1,189 @@
+#ifndef __SUM_TREE_HPP__
+#define __SUM_TREE_HPP__
+
+#include <assert.h>
+#include <stdint.h>
+#include <vector>
+
+// The SumTree implements an efficient tree data structure for
+// "roulette-wheel" sampling, or "sampling with fault expansion", i.e.,
+// sampling of trace entries / pilots without replacement and with a
+// picking probability proportional to the entries' sizes.
+//
+// For every sample, the naive approach picks a random number between 0
+// and the sum of all entry sizes minus one.  It then iterates over all
+// entries and sums their sizes until the sum exceeds the random number.
+// The current entry gets picked.  The main disadvantage is the linear
+// complexity, which gets unpleasant for millions of entries.
+//
+// The core idea behind the SumTree implementation is to maintain the
+// size sum of groups of entries, kept in "buckets".  Thereby, a bucket
+// can be quickly jumped over.  To keep bucket sizes (and thereby linear
+// search times) bounded, more bucket hierarchy levels are introduced
+// when a defined bucket size limit is reached.
+//
+// Note that the current implementation is built for a pure growth phase
+// (when the tree gets filled with pilots from the database), followed by
+// a sampling phase when the tree gets emptied.  It does not handle a
+// mixed add/remove case very smartly, although it should remain
+// functional.
+
+namespace fail {
+
+template <typename T, unsigned BUCKETSIZE = 1024>
+class SumTree {
+	//! Bucket data structure for tree nodes
+	struct Bucket {
+		Bucket() : size(0) {}
+		~Bucket();
+		//! Sum of all children / elements
+		typename T::size_type size;
+		//! Sub-buckets, empty for leaf nodes
+		std::vector<Bucket *> children;
+		//! Contained elements, empty for inner nodes
+		std::vector<T> elements;
+	};
+
+	//! Root node
+	Bucket *m_root;
+	//! Tree depth: nodes at level m_depth are leaf nodes, others are inner nodes
+	unsigned m_depth;
+public:
+	SumTree() : m_root(new Bucket), m_depth(0) {}
+	~SumTree() { delete m_root; }
+	//! Adds a new element to the tree.
+	void add(const T& element);
+	//! Retrieves (and removes) element at random number position.
+	T get(typename T::size_type pos) { return get(pos, m_root, 0); }
+	//! Yields the sum over all elements in the tree.
+	typename T::size_type get_size() const { return m_root->size; }
+private:
+	//! Internal, recursive version of add().
+	bool add(Bucket **node, const T& element, unsigned depth_remaining);
+	//! Internal, recursive version of get().
+	T get(typename T::size_type pos, Bucket *node, typename T::size_type sum);
+};
+
+// template implementation
+
+template <typename T, unsigned BUCKETSIZE>
+SumTree<T, BUCKETSIZE>::Bucket::~Bucket()
+{
+	for (typename std::vector<Bucket *>::const_iterator it = children.begin();
+		it != children.end(); ++it) {
+		delete *it;
+	}
+}
+
+template <typename T, unsigned BUCKETSIZE>
+void SumTree<T, BUCKETSIZE>::add(const T& element)
+{
+	if (element.size() == 0) {
+		// pilots with size == 0 cannot be picked anyways
+		return;
+	}
+
+	if (add(&m_root, element, m_depth)) {
+		// tree wasn't full yet, add succeeded
+		return;
+	}
+
+	// tree is full, move everything one level down
+	++m_depth;
+	Bucket *b = new Bucket;
+	b->children.push_back(m_root);
+	b->size = m_root->size;
+	m_root = b;
+
+	// retry
+	add(&m_root, element, m_depth);
+}
+
+template <typename T, unsigned BUCKETSIZE>
+bool SumTree<T, BUCKETSIZE>::add(Bucket **node, const T& element, unsigned depth_remaining)
+{
+	// non-leaf node?
+	if (depth_remaining) {
+		// no children yet?  create one.
+		if ((*node)->children.size() == 0) {
+			(*node)->children.push_back(new Bucket);
+		}
+
+		// adding to newest child worked?
+		if (add(&(*node)->children.back(), element, depth_remaining - 1)) {
+			(*node)->size += element.size();
+			return true;
+		}
+
+		// newest child full, may we create another one?
+		if ((*node)->children.size() < BUCKETSIZE) {
+			(*node)->children.push_back(new Bucket);
+			add(&(*node)->children.back(), element, depth_remaining - 1);
+			(*node)->size += element.size();
+			return true;
+		}
+		// recursive add ultimately failed, subtree full
+		return false;
+
+	// leaf node
+	} else {
+		if ((*node)->elements.size() < BUCKETSIZE) {
+			(*node)->elements.push_back(element);
+			(*node)->size += element.size();
+			return true;
+		}
+		return false;
+	}
+}
+
+template <typename T, unsigned BUCKETSIZE>
+T SumTree<T, BUCKETSIZE>::get(typename T::size_type pos, Bucket *node, typename T::size_type sum)
+{
+	// sanity check
+	assert(pos >= sum && pos < sum + node->size);
+
+	// will only be entered for inner nodes
+	for (typename std::vector<Bucket *>::iterator it = node->children.begin();
+		it != node->children.end(); ) {
+		sum += (*it)->size;
+		if (sum <= pos) {
+			++it;
+			continue;
+		}
+
+		// found containing bucket, recurse
+		sum -= (*it)->size;
+		T e = get(pos, *it, sum);
+		node->size -= e.size();
+		// remove empty (or, at least, zero-sized) child?
+		if ((*it)->size == 0) {
+			delete *it;
+			node->children.erase(it);
+		}
+		return e;
+	}
+
+	// will only be entered for leaf nodes
+	for (typename std::vector<T>::iterator it = node->elements.begin();
+		it != node->elements.end(); ) {
+		sum += it->size();
+		if (sum <= pos) {
+			++it;
+			continue;
+		}
+
+		// found pilot
+		T e = *it;
+		node->size -= e.size();
+		node->elements.erase(it);
+		return e;
+	}
+
+	// this should never happen
+	assert(0);
+	return T();
+}
+
+} // namespace
+
+#endif
--- a/src/core/util/testing/SumTreeTest.cc
+++ b/src/core/util/testing/SumTreeTest.cc
@ -0,0 +1,34 @@
+#include "util/SumTree.hpp"
+
+#include <iostream>
+#define LOG std::cerr
+
+using std::endl;
+
+struct Pilot {
+	uint32_t id;
+	uint32_t instr2;
+	uint32_t data_address;
+	uint64_t duration;
+
+	typedef uint64_t size_type;
+	size_type size() const { return duration; }
+};
+
+int main()
+{
+	fail::SumTree<Pilot, 2> tree;
+	for (int i = 0; i <= 20; ++i) {
+		Pilot p;
+		p.duration = i;
+		tree.add(p);
+	}
+
+	while (tree.get_size() > 0) {
+		uint64_t pos = tree.get_size() / 2;
+		LOG << "MAIN tree.get_size() = " << tree.get_size()
+			<< ", trying to retrieve pos = " << pos << endl;
+		Pilot p = tree.get(pos);
+		LOG << "MAIN retrieved pilot with duration " << p.duration << endl;
+	}
+}