diff --git a/tools/import-trace/AdvancedMemoryImporter.cc b/tools/import-trace/AdvancedMemoryImporter.cc index d8532dd9..6cf96962 100644 --- a/tools/import-trace/AdvancedMemoryImporter.cc +++ b/tools/import-trace/AdvancedMemoryImporter.cc @@ -15,6 +15,79 @@ std::string AdvancedMemoryImporter::database_additional_columns() "jumphistory INT UNSIGNED NULL, "; } +void AdvancedMemoryImporter::database_insert_columns(std::string& sql, unsigned& num_columns) +{ + // FIXME upcall? + sql = ", opcode, jumphistory"; + num_columns = 2; +} + +//#include + +bool AdvancedMemoryImporter::database_insert_data(Trace_Event &ev, MYSQL_BIND *bind, unsigned num_columns, bool is_fake) +{ + static my_bool null = true; + // FIXME upcall? + assert(num_columns == 2); +#if 0 + // sanity check + if (!is_fake && delayed_entries.size() > 0 && ev.ip() != delayed_entries.front().ev.ip()) { + std::string out; + google::protobuf::TextFormat::PrintToString(ev, &out); + std::cout << "ev: " << out << std::endl; + google::protobuf::TextFormat::PrintToString(delayed_entries.front().ev, &out); + std::cout << "delayed_entries.front.ev: " << out << std::endl; + } +#endif + assert(is_fake || delayed_entries.size() == 0 || ev.ip() == delayed_entries.front().ev.ip()); + bind[0].buffer_type = MYSQL_TYPE_LONG; + bind[0].is_unsigned = 1; + bind[0].buffer = &delayed_entries.front().opcode; + bind[1].buffer_type = MYSQL_TYPE_LONG; + bind[1].is_unsigned = 1; + bind[1].buffer = &m_cur_branchmask; + if (is_fake) { + bind[0].is_null = &null; + bind[1].is_null = &null; + } + return true; +} + +void AdvancedMemoryImporter::insert_delayed_entries(bool finalizing) +{ + unsigned branchmask; + unsigned last_branches_before = UINT_MAX; + // If we don't know enough future, and there's a chance we'll learn more, + // delay further. + for (std::deque::iterator it = delayed_entries.begin(); + it != delayed_entries.end() && + (it->branches_before + BRANCH_WINDOW_SIZE <= branches_taken.size() || + finalizing); + it = delayed_entries.erase(it)) { + // determine branche decisions before / after this mem event + if (it->branches_before != last_branches_before) { + branchmask = 0; + int pos = std::max(-(signed)BRANCH_WINDOW_SIZE, - (signed) it->branches_before); + int maxpos = std::min(BRANCH_WINDOW_SIZE, branches_taken.size() - it->branches_before); + for (; pos < maxpos; ++pos) { + branchmask |= + ((unsigned) branches_taken[it->branches_before + pos]) + << (16 - pos - 1); + } + m_cur_branchmask = branchmask; + } + + //LOG << "AdvancedMemoryImporter::insert_delayed_entries instr = " << it->instr << " data_address = " << it->ev.memaddr() << std::endl; + + // trigger INSERT + // (will call back via database_insert_data() and ask for additional data) + MemoryImporter::handle_mem_event(it->curtime, it->instr, it->ev); + } + + // FIXME branches_taken could be shrunk here to stay within a bounded + // memory footprint +} + bool AdvancedMemoryImporter::handle_ip_event(fail::simtime_t curtime, instruction_count_t instr, Trace_Event &ev) { @@ -24,6 +97,10 @@ bool AdvancedMemoryImporter::handle_ip_event(fail::simtime_t curtime, instructio branches_taken.push_back(ev.ip() != m_ip_jump_not_taken); } + // Check whether we know enough branch-taken future to INSERT a few more + // (delayed) trace entries + insert_delayed_entries(false); + if (!binary) { /* Disassemble the binary if necessary */ llvm::InitializeAllTargetInfos(); @@ -67,7 +144,10 @@ bool AdvancedMemoryImporter::handle_ip_event(fail::simtime_t curtime, instructio m_ip_jump_not_taken = opcode.address + opcode.length; } - return MemoryImporter::handle_ip_event(curtime, instr, ev); + // IP events may need to be delayed, too, if the parent Importer draws any + // information from them. MemoryImporter does not, though. + //return MemoryImporter::handle_ip_event(curtime, instr, ev); + return true; } bool AdvancedMemoryImporter::handle_mem_event(fail::simtime_t curtime, instruction_count_t instr, @@ -75,67 +155,17 @@ bool AdvancedMemoryImporter::handle_mem_event(fail::simtime_t curtime, instructi { const LLVMDisassembler::InstrMap &instr_map = disas->getInstrMap(); const LLVMDisassembler::Instr &opcode = instr_map.at(ev.ip()); - TraceEntry entry = { instr, ev.memaddr(), ev.width(), opcode.opcode, branches_taken.size() }; - update_entries.push_back(entry); - - return MemoryImporter::handle_mem_event(curtime, instr, ev); -} - -bool AdvancedMemoryImporter::finalize() -{ - LOG << "adding opcodes and jump history to trace events ..." << std::endl; - - MYSQL_STMT *stmt = 0; - std::stringstream sql; - sql << "UPDATE trace SET opcode = ?, jumphistory = ? " - "WHERE variant_id = " << m_variant_id << " AND data_address BETWEEN ? AND ? AND instr2 = ?"; - stmt = mysql_stmt_init(db->getHandle()); - if (mysql_stmt_prepare(stmt, sql.str().c_str(), sql.str().length())) { - LOG << "query '" << sql.str() << "' failed: " << mysql_error(db->getHandle()) << std::endl; - return false; - } - MYSQL_BIND bind[5]; - - unsigned rowcount = 0, rowcount_blocks = 0; - for (std::vector::iterator it = update_entries.begin(); - it != update_entries.end(); ++it) { - // determine branche decisions before / after this mem event - unsigned branchmask = 0; - int pos = std::max(-16, - (signed) it->branches_before); - int maxpos = std::min((unsigned) 16, branches_taken.size() - it->branches_before); - for (; pos < maxpos; ++pos) { - branchmask |= - ((unsigned) branches_taken[it->branches_before + pos]) - << (16 - pos - 1); - } - - memset(bind, 0, sizeof(bind)); - for (unsigned i = 0; i < sizeof(bind)/sizeof(*bind); ++i) { - bind[i].buffer_type = MYSQL_TYPE_LONG; - bind[i].is_unsigned = 1; - } - bind[0].buffer = &it->opcode; - bind[1].buffer = &branchmask; - bind[2].buffer = &it->data_address; - unsigned rightmargin = it->data_address + it->data_width - 1; - bind[3].buffer = &rightmargin; - bind[4].buffer = &it->instr2; - - if (mysql_stmt_bind_param(stmt, bind)) { - LOG << "mysql_stmt_bind_param() failed: " << mysql_stmt_error(stmt) << std::endl; - return false; - } else if (mysql_stmt_execute(stmt)) { - LOG << "mysql_stmt_execute() failed: " << mysql_stmt_error(stmt) << std::endl; - return false; - } - rowcount += mysql_stmt_affected_rows(stmt); - - if (rowcount >= rowcount_blocks + 10000) { - LOG << "Updated " << rowcount << " trace events" << std::endl; - rowcount_blocks += 10000; - } - } - LOG << "Updated " << rowcount << " trace events. Done." << std::endl; + DelayedTraceEntry entry = { curtime, instr, ev, opcode.opcode, branches_taken.size() }; + delayed_entries.push_back(entry); + // delay upcall to handle_mem_event until we know enough future branch decisions + return true; +} + +bool AdvancedMemoryImporter::trace_end_reached() +{ + LOG << "inserting remaining trace events ..." << std::endl; + // INSERT the remaining entries (with incomplete branch future) + insert_delayed_entries(true); return true; } diff --git a/tools/import-trace/AdvancedMemoryImporter.hpp b/tools/import-trace/AdvancedMemoryImporter.hpp index bd698228..cb0ed43a 100644 --- a/tools/import-trace/AdvancedMemoryImporter.hpp +++ b/tools/import-trace/AdvancedMemoryImporter.hpp @@ -2,6 +2,7 @@ #define __ADVANCED_MEMORY_IMPORTER_H__ #include +#include #include "MemoryImporter.hpp" #include "util/llvmdisassembler/LLVMDisassembler.hpp" @@ -10,6 +11,17 @@ * A MemoryImporter that additionally imports Relyzer-style conditional branch * history, instruction opcodes, and a virtual duration = time2 - time1 + 1 * column (MariaDB 5.2+ only!) for fault-space pruning purposes. + * + * Initially this was implemented by directly passing through trace events to + * the MemoryImporter, keeping a record of conditional jumps and opcodes, and + * UPDATEing all inserted rows in a second pass when the MemoryImporter is + * finished. + * + * Unfortunately, UPDATE is very slow, and keeping all information in memory + * till the end doesn't scale indefinitely. Therefore the implementation now + * delays passing memory access events upwards to the MemoryImporter only until + * enough branch history is aggregated, and taps into Importer's database + * operations with a set of new virtual functions that are called downwards. */ class AdvancedMemoryImporter : public MemoryImporter { llvm::OwningPtr binary; @@ -17,23 +29,30 @@ class AdvancedMemoryImporter : public MemoryImporter { bool m_last_was_conditional_branch; fail::guest_address_t m_ip_jump_not_taken; std::vector branches_taken; - struct TraceEntry { - unsigned instr2; - uint64_t data_address; - unsigned data_width; + struct DelayedTraceEntry { + fail::simtime_t curtime; + instruction_count_t instr; + Trace_Event ev; unsigned opcode; unsigned branches_before; }; - std::vector update_entries; + std::deque delayed_entries; + static const unsigned BRANCH_WINDOW_SIZE = 16; //!< increasing this requires changing the underlying data types + + unsigned m_cur_branchmask; + + void insert_delayed_entries(bool finalizing); public: AdvancedMemoryImporter() : m_last_was_conditional_branch(false) {} virtual std::string database_additional_columns(); + virtual void database_insert_columns(std::string& sql, unsigned& num_columns); + virtual bool database_insert_data(Trace_Event &ev, MYSQL_BIND *bind, unsigned num_columns, bool is_fake); virtual bool handle_ip_event(fail::simtime_t curtime, instruction_count_t instr, Trace_Event &ev); virtual bool handle_mem_event(fail::simtime_t curtime, instruction_count_t instr, Trace_Event &ev); - virtual bool finalize(); + virtual bool trace_end_reached(); }; #endif diff --git a/tools/import-trace/Importer.cc b/tools/import-trace/Importer.cc index c36f7c81..4803a5bd 100644 --- a/tools/import-trace/Importer.cc +++ b/tools/import-trace/Importer.cc @@ -108,6 +108,11 @@ bool Importer::copy_to_database(fail::ProtoIStream &ps) { } } + if (!trace_end_reached()) { + LOG << "trace_end_reached() failed" << std::endl; + return false; + } + // Why -1? In most cases it does not make sense to inject before the // very last instruction, as we won't execute it anymore. This *only* // makes sense if we also inject into parts of the result vector. This @@ -256,6 +261,8 @@ bool Importer::add_trace_event(margin_info_t &begin, margin_info_t &end, stmt = extended ? &stmt_extended : &stmt_basic; columns = extended ? &columns_extended : &columns_basic; + static unsigned num_additional_columns = 0; + if (!*stmt) { std::stringstream sql; sql << "INSERT INTO trace (variant_id, instr1, instr1_absolute, instr2, instr2_absolute, time1, time2, " @@ -270,8 +277,18 @@ bool Importer::add_trace_event(margin_info_t &begin, margin_info_t &end, } } + // Ask specialized importers whether they want to INSERT additional + // columns. + std::string additional_columns; + database_insert_columns(additional_columns, num_additional_columns); + sql << additional_columns; + sql << ") VALUES (?"; - for (unsigned i = 1; i < *columns + (extended ? m_extended_trace_regs->count() * 2 : 0); ++i) { + for (unsigned i = 1; + i < *columns + + (extended ? m_extended_trace_regs->count() * 2 : 0) + + num_additional_columns; + ++i) { sql << ",?"; } sql << ")"; @@ -299,7 +316,7 @@ bool Importer::add_trace_event(margin_info_t &begin, margin_info_t &end, } // C99 / g++ extension VLA to the rescue: - MYSQL_BIND bind[*columns + m_extended_trace_regs->count() * 2]; + MYSQL_BIND bind[*columns + m_extended_trace_regs->count() * 2 + num_additional_columns]; my_bool fake_null = is_fake; my_bool null = true, not_null = false; long unsigned accesstype_len = 1; @@ -366,6 +383,15 @@ bool Importer::add_trace_event(margin_info_t &begin, margin_info_t &end, } } } + + // Ask specialized importers what concrete data they want to INSERT. + if (num_additional_columns) { + unsigned pos = *columns + (extended ? m_extended_trace_regs->count() * 2 : 0); + if (!database_insert_data(event, bind + pos, num_additional_columns, is_fake)) { + return false; + } + } + if (mysql_stmt_bind_param(*stmt, bind)) { LOG << "mysql_stmt_bind_param() failed: " << mysql_stmt_error(*stmt) << std::endl; return false; diff --git a/tools/import-trace/Importer.hpp b/tools/import-trace/Importer.hpp index f6465794..2cb20df4 100644 --- a/tools/import-trace/Importer.hpp +++ b/tools/import-trace/Importer.hpp @@ -82,6 +82,19 @@ public: * pass through their parent's implementation. */ virtual std::string database_additional_columns() { return ""; } + /** + * Similar to database_additional_columns(), this allows specialized + * importers to define which additional columns it wants to INSERT + * alongside what add_trace_event() adds by itself. This may be identical + * to or a subset of what database_additional_columns() specifies. The SQL + * snippet should *begin* with a comma if non-empty. + */ + virtual void database_insert_columns(std::string& sql, unsigned& num_columns) { num_columns = 0; } + /** + * Will be called back from add_trace_event() to fill in data for the + * columns specified by database_insert_columns(). + */ + virtual bool database_insert_data(Trace_Event &ev, MYSQL_BIND *bind, unsigned num_columns, bool is_fake) { return true; } virtual bool copy_to_database(fail::ProtoIStream &ps); virtual bool clear_database(); /** @@ -107,7 +120,7 @@ public: * May be overridden by importers that need to do stuff after the last * event was consumed. */ - virtual bool finalize() { return true; } + virtual bool trace_end_reached() { return true; } void set_elf(fail::ElfReader *elf) { m_elf = elf; } diff --git a/tools/import-trace/main.cc b/tools/import-trace/main.cc index d5eac798..2deebfef 100644 --- a/tools/import-trace/main.cc +++ b/tools/import-trace/main.cc @@ -238,9 +238,4 @@ int main(int argc, char *argv[]) { LOG << "copy_to_database() failed" << endl; exit(-1); } - - if (!importer->finalize()) { - LOG << "finalize() failed" << endl; - exit(-1); - } }