diff --git a/.gitignore b/.gitignore index 2d7c46e1..931513da 100644 --- a/.gitignore +++ b/.gitignore @@ -54,3 +54,5 @@ debuggers/openocd/src/startup.tcl debuggers/openocd/src/startup_tcl.c debuggers/openocd/src/target/xscale_debug.h debuggers/openocd/stamp-h1 + +.idea \ No newline at end of file diff --git a/cmake/FindCapstone.cmake b/cmake/FindCapstone.cmake new file mode 100644 index 00000000..3ffaebfb --- /dev/null +++ b/cmake/FindCapstone.cmake @@ -0,0 +1,33 @@ +# - Try to find CAPSTONE +# Once done, this will define +# +# CAPSTONE_FOUND - system has CAPSTONE +# CAPSTONE_INCLUDE_DIRS - the CAPSTONE include directories +# CAPSTONE_LIBRARIES - link these to use CAPSTONE + +include(LibFindMacros) +message(STATUS "[FAIL*] looking for Capstone library") + +# Dependencies +# libfind_package(CAPSTONE capstone) + +# Use pkg-config to get hints about paths +# libfind_pkg_check_modules(CAPSTONE_PKGCONF capstone) + +# Include dir +find_path(CAPSTONE_INCLUDE_DIR + NAMES capstone/capstone.h + PATHS ${CAPSTONE_PKGCONF_INCLUDE_DIRS} + ) + +# Finally the library itself +find_library(CAPSTONE_LIBRARY + NAMES capstone + PATHS ${CAPSTONE_PKGCONF_LIBRARY_DIRS} + ) + +# Set the include dir variables and the libraries and let libfind_process do the rest. +# NOTE: Singular variables for this library, plural for libraries this this lib depends on. +set(CAPSTONE_PROCESS_INCLUDES CAPSTONE_INCLUDE_DIR CAPSTONE_INCLUDE_DIRS) +set(CAPSTONE_PROCESS_LIBS CAPSTONE_LIBRARY CAPSTONE_LIBRARIES) +libfind_process(CAPSTONE) diff --git a/cmake/LibFindMacros.cmake b/cmake/LibFindMacros.cmake new file mode 100644 index 00000000..78fd5f38 --- /dev/null +++ b/cmake/LibFindMacros.cmake @@ -0,0 +1,99 @@ +# Works the same as find_package, but forwards the "REQUIRED" and "QUIET" arguments +# used for the current package. For this to work, the first parameter must be the +# prefix of the current package, then the prefix of the new package etc, which are +# passed to find_package. +macro (libfind_package PREFIX) + set (LIBFIND_PACKAGE_ARGS ${ARGN}) + if (${PREFIX}_FIND_QUIETLY) + set (LIBFIND_PACKAGE_ARGS ${LIBFIND_PACKAGE_ARGS} QUIET) + endif (${PREFIX}_FIND_QUIETLY) + if (${PREFIX}_FIND_REQUIRED) + set (LIBFIND_PACKAGE_ARGS ${LIBFIND_PACKAGE_ARGS} REQUIRED) + endif (${PREFIX}_FIND_REQUIRED) + find_package(${LIBFIND_PACKAGE_ARGS}) +endmacro (libfind_package) + +# CMake developers made the UsePkgConfig system deprecated in the same release (2.6) +# where they added pkg_check_modules. Consequently I need to support both in my scripts +# to avoid those deprecated warnings. Here's a helper that does just that. +# Works identically to pkg_check_modules, except that no checks are needed prior to use. +macro (libfind_pkg_check_modules PREFIX PKGNAME) + if (${CMAKE_MAJOR_VERSION} EQUAL 2 AND ${CMAKE_MINOR_VERSION} EQUAL 4) + include(UsePkgConfig) + pkgconfig(${PKGNAME} ${PREFIX}_INCLUDE_DIRS ${PREFIX}_LIBRARY_DIRS ${PREFIX}_LDFLAGS ${PREFIX}_CFLAGS) + else (${CMAKE_MAJOR_VERSION} EQUAL 2 AND ${CMAKE_MINOR_VERSION} EQUAL 4) + find_package(PkgConfig) + if (PKG_CONFIG_FOUND) + pkg_check_modules(${PREFIX} ${PKGNAME}) + endif (PKG_CONFIG_FOUND) + endif (${CMAKE_MAJOR_VERSION} EQUAL 2 AND ${CMAKE_MINOR_VERSION} EQUAL 4) +endmacro (libfind_pkg_check_modules) + +# Do the final processing once the paths have been detected. +# If include dirs are needed, ${PREFIX}_PROCESS_INCLUDES should be set to contain +# all the variables, each of which contain one include directory. +# Ditto for ${PREFIX}_PROCESS_LIBS and library files. +# Will set ${PREFIX}_FOUND, ${PREFIX}_INCLUDE_DIRS and ${PREFIX}_LIBRARIES. +# Also handles errors in case library detection was required, etc. +macro (libfind_process PREFIX) + # Skip processing if already processed during this run + if (NOT ${PREFIX}_FOUND) + # Start with the assumption that the library was found + set (${PREFIX}_FOUND TRUE) + + # Process all includes and set _FOUND to false if any are missing + foreach (i ${${PREFIX}_PROCESS_INCLUDES}) + if (${i}) + set (${PREFIX}_INCLUDE_DIRS ${${PREFIX}_INCLUDE_DIRS} ${${i}}) + mark_as_advanced(${i}) + else (${i}) + set (${PREFIX}_FOUND FALSE) + endif (${i}) + endforeach (i) + + # Process all libraries and set _FOUND to false if any are missing + foreach (i ${${PREFIX}_PROCESS_LIBS}) + if (${i}) + set (${PREFIX}_LIBRARIES ${${PREFIX}_LIBRARIES} ${${i}}) + mark_as_advanced(${i}) + else (${i}) + set (${PREFIX}_FOUND FALSE) + endif (${i}) + endforeach (i) + + # Print message and/or exit on fatal error + if (${PREFIX}_FOUND) + if (NOT ${PREFIX}_FIND_QUIETLY) + message (STATUS "Found ${PREFIX} include directory: ${${PREFIX}_INCLUDE_DIR}") + message (STATUS "Found ${PREFIX} library: ${${PREFIX}_LIBRARY}") + endif (NOT ${PREFIX}_FIND_QUIETLY) + else (${PREFIX}_FOUND) + if (${PREFIX}_FIND_REQUIRED) + foreach (i ${${PREFIX}_PROCESS_INCLUDES} ${${PREFIX}_PROCESS_LIBS}) + message("${i}=${${i}}") + endforeach (i) + message (FATAL_ERROR "Required library ${PREFIX} NOT FOUND.\nInstall the library (dev version) and try again. If the library is already installed, use ccmake to set the missing variables manually.") + endif (${PREFIX}_FIND_REQUIRED) + endif (${PREFIX}_FOUND) + endif (NOT ${PREFIX}_FOUND) +endmacro (libfind_process) + +macro(libfind_library PREFIX basename) + set(TMP "") + if(MSVC80) + set(TMP -vc80) + endif(MSVC80) + if(MSVC90) + set(TMP -vc90) + endif(MSVC90) + set(${PREFIX}_LIBNAMES ${basename}${TMP}) + if(${ARGC} GREATER 2) + set(${PREFIX}_LIBNAMES ${basename}${TMP}-${ARGV2}) + string(REGEX REPLACE "\\." "_" TMP ${${PREFIX}_LIBNAMES}) + set(${PREFIX}_LIBNAMES ${${PREFIX}_LIBNAMES} ${TMP}) + endif(${ARGC} GREATER 2) + find_library(${PREFIX}_LIBRARY + NAMES ${${PREFIX}_LIBNAMES} + PATHS ${${PREFIX}_PKGCONF_LIBRARY_DIRS} + ) +endmacro(libfind_library) diff --git a/doc/how-to-build.txt b/doc/how-to-build.txt index 92323213..dd5a9994 100644 --- a/doc/how-to-build.txt +++ b/doc/how-to-build.txt @@ -23,6 +23,16 @@ Required for FAIL*: "-D__NO_MATH_INLINES -D__STRICT_ANSI__" or "--c_compiler clang++" (the latter requires the clang++ compiler). - optional: + * the Capstone disassembler library, version 4.0 or newer + - Capstone is highly recommended over using LLVM, which provides + incomplete def/use information for many opcodes. + - Note that Debian's/Ubuntu's libcapstone-dev 4.0.1+really+3.0.5 does not + suffice. As of this writing, you need to build/install the Capstone + library yourself on these distributions. + - If the build system cannot automatically locate the Capstone library, + manually set CAPSTONE_INCLUDE_DIR and CAPSTONE_LIBRARY in the cmake + configuration. Consider using the static libcapstone.a library instead + of the .so variant. * LLVM 3.9, 4.0, 5.0, or 6.0 (needed for several importers in tools/import-trace): llvm-3.9-dev, llvm-4.0-dev, llvm-5.0-dev or llvm-6.0-dev diff --git a/src/core/config/VariantConfig.hpp.in b/src/core/config/VariantConfig.hpp.in index 19cffea6..73c7efc0 100644 --- a/src/core/config/VariantConfig.hpp.in +++ b/src/core/config/VariantConfig.hpp.in @@ -12,6 +12,7 @@ #cmakedefine BUILD_ARM #cmakedefine BUILD_LLVM_DISASSEMBLER +#cmakedefine BUILD_CAPSTONE_DISASSEMBLER #define ARCH_TOOL_PREFIX "@ARCH_TOOL_PREFIX@" diff --git a/src/core/efw/CMakeLists.txt b/src/core/efw/CMakeLists.txt index ab699a04..18ba10b7 100644 --- a/src/core/efw/CMakeLists.txt +++ b/src/core/efw/CMakeLists.txt @@ -17,6 +17,9 @@ target_link_libraries(fail-efw fail-util) # WallclockTimer if (BUILD_LLVM_DISASSEMBLER) target_link_libraries(fail-efw fail-llvmdisassembler) endif() +if (BUILD_CAPSTONE_DISASSEMBLER) + target_link_libraries(fail-efw fail-capstonedisassembler) +endif() find_package(LibPCL REQUIRED) include_directories(${LIBPCL_INCLUDE_DIRS}) diff --git a/src/core/efw/DatabaseExperiment.cc b/src/core/efw/DatabaseExperiment.cc index 550836e4..06389ff5 100644 --- a/src/core/efw/DatabaseExperiment.cc +++ b/src/core/efw/DatabaseExperiment.cc @@ -15,7 +15,9 @@ #include "efw/DatabaseExperiment.hpp" #include "comm/DatabaseCampaignMessage.pb.h" -#ifdef BUILD_LLVM_DISASSEMBLER +#if defined(BUILD_CAPSTONE_DISASSEMBLER) +# include "util/capstonedisassembler/CapstoneToFailTranslator.hpp" +#elif defined(BUILD_LLVM_DISASSEMBLER) # include "util/llvmdisassembler/LLVMtoFailTranslator.hpp" #endif @@ -80,12 +82,17 @@ unsigned DatabaseExperiment::injectFault( /* First 128 registers, TODO use LLVMtoFailTranslator::getMaxDataAddress() */ if (data_address < (128 << 4) && inject_registers) { -#ifdef BUILD_LLVM_DISASSEMBLER +#if defined(BUILD_LLVM_DISASSEMBLER) || defined(BUILD_CAPSTONE_DISASSEMBLER) +#if defined(BUILD_LLVM_DISASSEMBLER) + typedef LLVMtoFailTranslator XtoFailTranslator; +#elif defined(BUILD_CAPSTONE_DISASSEMBLER) + typedef CapstoneToFailTranslator XtoFailTranslator; +#endif // register FI - LLVMtoFailTranslator::reginfo_t reginfo = - LLVMtoFailTranslator::reginfo_t::fromDataAddress(data_address, 1); + XtoFailTranslator::reginfo_t reginfo = + XtoFailTranslator::reginfo_t::fromDataAddress(data_address, 1); - value = LLVMtoFailTranslator::getRegisterContent(simulator.getCPU(0), reginfo); + value = XtoFailTranslator::getRegisterContent(simulator.getCPU(0), reginfo); if (inject_burst) { injected_value = value ^ 0xff; m_log << "INJECTING BURST at: REGISTER " << dec << reginfo.id @@ -95,14 +102,14 @@ unsigned DatabaseExperiment::injectFault( m_log << "INJECTING BIT-FLIP at: REGISTER " << dec << reginfo.id << " bitpos " << (reginfo.offset + bitpos) << endl; } - LLVMtoFailTranslator::setRegisterContent(simulator.getCPU(0), reginfo, injected_value); + XtoFailTranslator::setRegisterContent(simulator.getCPU(0), reginfo, injected_value); if (reginfo.id == RID_PC) { // FIXME move this into the Bochs backend m_log << "Redecode current instruction" << endl; redecodeCurrentInstruction(); } #else - m_log << "ERROR: Not compiled with LLVM. Enable BUILD_LLVM_DISASSEMBLER at buildtime." << endl; + m_log << "ERROR: Not compiled with LLVM or Capstone. Enable BUILD_LLVM_DISASSEMBLER OR BUILD_CAPSTONE_DISASSEMBLER at buildtime." << endl; simulator.terminate(1); #endif } else if (!force_registers) { diff --git a/src/core/util/CMakeLists.txt b/src/core/util/CMakeLists.txt index a2ec8bf0..5d8e9492 100644 --- a/src/core/util/CMakeLists.txt +++ b/src/core/util/CMakeLists.txt @@ -81,12 +81,22 @@ endif(CONFIG_INJECTIONPOINT_HOPS) add_library(fail-util ${SRCS}) add_dependencies(fail-util fail-comm) -target_link_libraries(fail-util fail-comm ${ADDITIONAL_LIBS} ${PROTOBUF_LIBRARY} ${Boost_LIBRARIES} ${LibIberty_LIBRARIES} ${ZLIB_LIBRARIES} ${LIBDWARF_LIBRARIES} ${LIBELF_LIBRARIES}) +target_link_libraries(fail-util fail-comm fail-sal ${ADDITIONAL_LIBS} ${PROTOBUF_LIBRARY} ${Boost_LIBRARIES} ${LibIberty_LIBRARIES} ${ZLIB_LIBRARIES} ${LIBDWARF_LIBRARIES} ${LIBELF_LIBRARIES}) + +option(BUILD_CAPSTONE_DISASSEMBLER "Build the Capstone-based disassembler (Capstone 4.0.1 required, higher versions may not work)" OFF) +if (BUILD_CAPSTONE_DISASSEMBLER) + add_subdirectory(capstonedisassembler) +endif (BUILD_CAPSTONE_DISASSEMBLER) option(BUILD_LLVM_DISASSEMBLER "Build the LLVM-based disassembler (LLVM 3.9, 4.0, 5.0 or 6.0 preferred, other versions may not work)" OFF) if (BUILD_LLVM_DISASSEMBLER) add_subdirectory(llvmdisassembler) endif (BUILD_LLVM_DISASSEMBLER) + +if (BUILD_CAPSTONE_DISASSEMBLER AND BUILD_LLVM_DISASSEMBLER) + message(FATAL_ERROR "BUILD_CAPSTONE_DISASSEMBLER and BUILD_LLVM_DISASSEMBLER can't be on at the same time!") +endif (BUILD_CAPSTONE_DISASSEMBLER AND BUILD_LLVM_DISASSEMBLER) + ### Tests add_executable(memorymap-test testing/memorymap-test.cc) target_link_libraries(memorymap-test fail-util) diff --git a/src/core/util/ElfReader.cc b/src/core/util/ElfReader.cc index a3018ac0..76669864 100644 --- a/src/core/util/ElfReader.cc +++ b/src/core/util/ElfReader.cc @@ -97,7 +97,7 @@ void ElfReader::setup(const char* path) { printf("Error: wrong Section to read\n"); } else { process_section(&sec_hdr, buff); - } + } } free(buff); @@ -218,6 +218,7 @@ bool ElfReader::read_ELF_file_header(FILE *fp, Elf64_Ehdr *filehdr) return false; } + m_machine = filehdr32.e_machine; m_elfclass = filehdr32.e_ident[EI_CLASS]; if (m_elfclass == ELFCLASS32) { Elf32to64_Ehdr(&filehdr32, filehdr); diff --git a/src/core/util/ElfReader.hpp b/src/core/util/ElfReader.hpp index b7babbdd..d6712b93 100644 --- a/src/core/util/ElfReader.hpp +++ b/src/core/util/ElfReader.hpp @@ -81,6 +81,8 @@ public: typedef container_t::const_iterator symbol_iterator; typedef container_t::const_iterator section_iterator; + int m_machine; + int m_elfclass; /** * Constructor. @@ -157,7 +159,6 @@ public: private: Logger m_log; std::string m_filename; - int m_elfclass; void setup(const char*); bool process_symboltable(FILE *fp, Elf64_Ehdr const *ehdr, int sect_num); diff --git a/src/core/util/capstonedisassembler/CMakeLists.txt b/src/core/util/capstonedisassembler/CMakeLists.txt new file mode 100644 index 00000000..390b6b18 --- /dev/null +++ b/src/core/util/capstonedisassembler/CMakeLists.txt @@ -0,0 +1,26 @@ +set(SRCS + CapstoneDisassembler.cpp + CapstoneDisassembler.hpp + CapstoneToFailBochs.cpp + CapstoneToFailBochs.hpp + CapstoneToFailGem5.hpp + CapstoneToFailGem5.cpp + CapstoneToFailTranslator.cpp + CapstoneToFailTranslator.hpp +) + +include(FindCapstone) + +add_library(fail-capstonedisassembler ${SRCS}) + +target_link_libraries(fail-capstonedisassembler ${CAPSTONE_LIBRARY}) +include_directories(${CAPSTONE_INCLUDE_DIR}) + +### Tests +add_executable(capstoneDisTest testing/capstoneDisTest.cc) +target_link_libraries(capstoneDisTest fail-capstonedisassembler fail-sal) + +add_test(NAME capstoneDisx86Test COMMAND capstoneDisTest ${CMAKE_CURRENT_SOURCE_DIR}/testing/x86 ) +add_test(NAME capstoneDisx86_64Test COMMAND capstoneDisTest ${CMAKE_CURRENT_SOURCE_DIR}/testing/x86_64 ) +add_test(NAME capstoneDisARMM3Test COMMAND capstoneDisTest ${CMAKE_CURRENT_SOURCE_DIR}/testing/armm3 ) +add_test(NAME capstoneDisARM9Test COMMAND capstoneDisTest ${CMAKE_CURRENT_SOURCE_DIR}/testing/arm9 ) \ No newline at end of file diff --git a/src/core/util/capstonedisassembler/CapstoneDisassembler.cpp b/src/core/util/capstonedisassembler/CapstoneDisassembler.cpp new file mode 100644 index 00000000..5302c81b --- /dev/null +++ b/src/core/util/capstonedisassembler/CapstoneDisassembler.cpp @@ -0,0 +1,294 @@ +#include +#if CS_MAKE_VERSION(CS_API_MAJOR, CS_API_MINOR) < CS_MAKE_VERSION(4, 0) +# error Need libcapstone >= 4.0 +#endif + +#include "CapstoneDisassembler.hpp" + +using namespace fail; + + +CapstoneToFailTranslator *CapstoneDisassembler::getTranslator() { + if (ctofail == 0) { + switch (m_elf->m_machine) { + case EM_386: + case EM_X86_64: + ctofail = new CapstoneToFailBochs(); + break; + case EM_ARM: + ctofail = new CapstoneToFailGem5(); + break; + default: + std::cerr << "ArchType " + << m_elf->m_machine + << " not supported\n"; + exit(1); + } + } + return ctofail; +} + +std::map CapstoneDisassembler::get_symtab_map(uint64_t sect_addr, uint64_t sect_size) { + // Make a list of all the symbols (virtual address, size) in this section. + std::vector > symbols; + for (ElfReader::container_t::const_iterator it = m_elf->sym_begin(); it != m_elf->sym_end(); ++it) { + + if (it->getSymbolType() != 2 /*SST_FUNC*/) { + continue; + } + + symbols.push_back(std::make_pair(it->getAddress(), it->getSize())); +#if 0 + std::cout << std::hex << it->getAddress() << "\t" << it->getSymbolType() << "\t" << it->getName().c_str() << "\t" << it->getSize() << std::endl; +#endif + } + + // Sort the symbols by address, just in case they didn't come in that way. + std::sort(symbols.begin(), symbols.end()); + + std::map symtab_map; // start (virtual address), size + uint64_t start; + uint64_t size; + for (unsigned si = 0, se = symbols.size(); si != se; ++si) { + start = symbols[si].first; + size = symbols[si].second; + // only admit symbols that start within this section + if (start < sect_addr || sect_addr + sect_size <= start) { + continue; + } + if (size == 0) { + // The end is either the end of the section or the beginning of the next symbol. + if (si == se - 1) + // Last symbol? Span until section end. + size = sect_addr + sect_size - start; + else if (symbols[si + 1].first != start) + // There is distance to the next symbol? Cover it. + size = symbols[si + 1].first - start; + else + // This symbol has the same address as the next symbol. Skip it. + continue; + + symbols[si].second = size; + } + // limit the symbol size to within this section + if (start + size > sect_addr + sect_size) { + size = sect_addr + sect_size - start; + } + symtab_map[symbols[si].first] = size; + } +#if 0 + for (std::map::iterator it=symtab_map.begin(); it!=symtab_map.end(); ++it) + std::cout << std::hex << it->first << " => " << it->second << std::endl; +#endif + return symtab_map; +} + +int CapstoneDisassembler::disassemble_section(Elf_Data *data, Elf32_Shdr *shdr32, Elf64_Shdr *shdr64, std::map symtab_map) { +#if 0 + std::cout << std::dec << "bit: " << m_elf->m_elfclass << " 32: "<< ELFCLASS32 << " 64: " << ELFCLASS64 << " arch: " << m_elf->m_machine << " arm:" << EM_ARM << " x86: " << EM_386 << " x86_64: "<< EM_X86_64 << std::endl; +#endif + csh handle; + cs_insn *insn; + size_t count, j; + cs_regs regs_read, regs_write; + uint8_t read_count, write_count, i; + + cs_opt_skipdata skipdata = { + .mnemonic = "db", + }; + + // Arm may not work, because thumb is a problem + if (m_elf->m_machine == EM_386) { + if (cs_open(CS_ARCH_X86, CS_MODE_32, &handle) != CS_ERR_OK) + return -1; + } else if(m_elf->m_machine == EM_X86_64) { + if (cs_open(CS_ARCH_X86, CS_MODE_64, &handle) != CS_ERR_OK) + return -1; + } else if (m_elf->m_machine == EM_ARM) { + if (m_elf->m_elfclass == ELFCLASS32) { + if (cs_open(CS_ARCH_ARM, CS_MODE_ARM, &handle) != CS_ERR_OK) + return -1; + } else { + if (cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &handle) != CS_ERR_OK) + return -1; + } + } + + cs_option(handle, CS_OPT_DETAIL, CS_OPT_ON); + cs_option(handle, CS_OPT_SKIPDATA, CS_OPT_ON); + cs_option(handle, CS_OPT_SKIPDATA_SETUP, (size_t)&skipdata); + + int global_count = 0; + + for (std::map::iterator it=symtab_map.begin(); it!=symtab_map.end(); ++it) { + + if (m_elf->m_elfclass == ELFCLASS32) { + count = cs_disasm(handle, (uint8_t *) data->d_buf + it->first - shdr32->sh_addr, + it->second, it->first, 0, &insn); + } else { + count = cs_disasm(handle, (uint8_t *) data->d_buf + it->first - shdr64->sh_addr, + it->second, it->first, 0, &insn); + } + + if (count > 0) { + for (j = 0; j < count; j++) { + unsigned int opcode = 0; + if (m_elf->m_machine == EM_386 || m_elf->m_machine == EM_X86_64) { + opcode = (insn[j].detail->x86.opcode[3] << 24) | (insn[j].detail->x86.opcode[2] << 16) | + (insn[j].detail->x86.opcode[1] << 8) | insn[j].detail->x86.opcode[0]; + } else if (m_elf->m_machine == EM_ARM) { + // placeholder + opcode = 0; + } + + // Print assembly +#if 0 + printf("%s\t%s\n", insn[j].mnemonic, insn[j].op_str); + printf("Opcode: %x\t Adress: %lx\t Size: %d\n", opcode, insn[j].address, insn[j].size); +#endif + // Print all registers accessed by this instruction. + if (cs_regs_access(handle, &insn[j], regs_read, &read_count, regs_write, &write_count) == 0) { + Instr instr; + instr.opcode = opcode; + instr.length = insn[j].size; + instr.address = insn[j].address; + // FIXME could not find a functionality in capstone + instr.conditional_branch = false; + + if (read_count > 0) { +// printf("\n\tRegisters read:"); + for (i = 0; i < read_count; i++) { + instr.reg_uses.push_back(regs_read[i]); +// printf(" %s, %d |", cs_reg_name(handle, regs_read[i]), regs_read[i]); + } +// printf("\n"); + } + + if (write_count > 0) { +// printf("\n\tRegisters modified:"); + for (i = 0; i < write_count; i++) { + instr.reg_defs.push_back(regs_write[i]); +// printf(" %s, %d |", cs_reg_name(handle, regs_write[i]), regs_write[i]); + } +// printf("\n"); + } + (*instrs)[instr.address] = instr; + } +// printf("________________________________________________\n"); + } + global_count += count; + + cs_free(insn, count); + } else + std::cerr << "ERROR: Failed to disassemble given code!" << std::endl; + } +// printf("len_instr_map: %d\n", instrs->size()); + cs_close(&handle); +// printf("%d instructions\n", global_count); + return 0; +} + +void CapstoneDisassembler::disassemble() { + int fd; // File descriptor for the executable ELF file + char *section_name; + size_t shstrndx; + Elf *e; // ELF struct + Elf_Data *data = 0; + Elf_Scn *scn; // Section index struct + Elf32_Shdr *shdr32 = 0; // Section struct 32 Bit + Elf64_Shdr *shdr64 = 0; // Section struct 64 Bit + + if (elf_version(EV_CURRENT) == EV_NONE) + std::cerr << "ELF library initialization failed" << std::endl; + + if ((fd = open(m_elf->getFilename().c_str(), O_RDONLY, 0)) < 0) + std::cerr << "open " << m_elf->getFilename().c_str() << " failed" << std::endl; + + if ((e = elf_begin(fd, ELF_C_READ, NULL)) == NULL) + std::cerr << "elf_begin() failed" << std::endl; + + if (elf_kind(e) != ELF_K_ELF) { + std::cerr << m_elf->getFilename().c_str() << " is not an Elf object" << std::endl; + } + + if (elf_getshdrstrndx(e, &shstrndx) != 0) + std::cerr << "elf_getshdrstrndx() failed" << std::endl; + scn = NULL; + + // Loop over all sections in the ELF object + + while ((scn = elf_nextscn(e, scn)) != NULL) { + // Given an Elf Scn pointer, retrieve the associated section header + if (m_elf->m_elfclass == ELFCLASS32) { + if ((shdr32 = elf32_getshdr(scn)) == NULL) + std::cerr << "getshdr() failed" << std::endl; + + // Retrieve the name of the section + if ((section_name = elf_strptr(e, shstrndx, shdr32->sh_name)) == NULL) + std::cerr << "elf_strptr() failed" << std::endl; + + if (!strcmp(section_name, ".text")) { + if ((data = elf_rawdata(scn, data)) == NULL) { + std::cerr << "No section data available" << std::endl; + } +#if 0 + printf("Section name: %s\n", section_name); + printf("sh_offset: %x\n", shdr32->sh_offset); + printf("sh_type: %x\n", shdr32->sh_type); + printf("sh_flags: %x\n", shdr32->sh_flags); + printf("sh_addr: %x\n", shdr32->sh_addr); + printf("sh_size: %x\n", shdr32->sh_size); + printf("sh_link: %x\n", shdr32->sh_link); + printf("sh_info: %x\n", shdr32->sh_info); + printf("sh_addralign: %x\n", shdr32->sh_addralign); + printf("sh_entsize: %x\n", shdr32->sh_entsize); + printf("data: %x\n", data); + printf("buf: %x\n", data->d_buf); + printf("size: %d\n", data->d_size); +#endif + break; + } + } + else { + if ((shdr64 = elf64_getshdr(scn)) == NULL) + std::cerr << "getshdr() failed" << std::endl; + + // Retrieve the name of the section + if ((section_name = elf_strptr(e, shstrndx, shdr64->sh_name)) == NULL) + std::cerr << "elf_strptr() failed" << std::endl; + + if (!strcmp(section_name, ".text")) { + if ((data = elf_rawdata(scn, data)) == NULL) { + std::cerr << "No section data availible" << std::endl; + } +#if 0 + printf("Section name: %s\n", section_name); + printf("sh_offset: %lx\n", shdr64->sh_offset); + printf("sh_type: %lx\n", shdr64->sh_type); + printf("sh_flags: %lx\n", shdr64->sh_flags); + printf("sh_addr: %lx\n", shdr64->sh_addr); + printf("sh_size: %lx\n", shdr64->sh_size); + printf("sh_link: %lx\n", shdr64->sh_link); + printf("sh_info: %lx\n", shdr64->sh_info); + printf("sh_addralign: %lx\n", shdr64->sh_addralign); + printf("sh_entsize: %lx\n", shdr64->sh_entsize); + printf("data: %lx\n", data); + printf("buf: %lx\n", data->d_buf); + printf("size: %d\n", data->d_size); +#endif + break; + } + } + } + std::map symtab_map; + if (m_elf->m_elfclass == ELFCLASS32) { + symtab_map = get_symtab_map(shdr32->sh_addr, shdr32->sh_size); + } else if (m_elf->m_elfclass == ELFCLASS64) { + symtab_map = get_symtab_map(shdr64->sh_addr, shdr64->sh_size); + } + + disassemble_section(data, shdr32, shdr64, symtab_map); + + elf_end(e); + close(fd); +} diff --git a/src/core/util/capstonedisassembler/CapstoneDisassembler.hpp b/src/core/util/capstonedisassembler/CapstoneDisassembler.hpp new file mode 100644 index 00000000..749a089c --- /dev/null +++ b/src/core/util/capstonedisassembler/CapstoneDisassembler.hpp @@ -0,0 +1,83 @@ +#ifndef __CAPSTONEDISASSEMBLER_HPP__ +#define __CAPSTONEDISASSEMBLER_HPP__ + +#include +#include +#include +#include +#include // unique_ptr +#include + +#include + +#include "CapstoneToFailTranslator.hpp" +#include "CapstoneToFailBochs.hpp" +#include "CapstoneToFailGem5.hpp" + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util/ElfReader.hpp" + +namespace fail { + +class CapstoneDisassembler { + +public: + typedef uint16_t register_t; + typedef unsigned int address_t; + struct Instr { + unsigned int opcode; + unsigned int address; + unsigned char length; + bool conditional_branch; + std::vector reg_uses; + std::vector reg_defs; + }; + + typedef std::map InstrMap; + +private: + std::unique_ptr instrs; + fail::CapstoneToFailTranslator *ctofail; + fail::ElfReader *m_elf; + + static bool error(std::error_code ec) { + if (!ec) return false; + + std::cerr << "DIS error: " << ec.message() << ".\n"; + return true; + } + +public: + CapstoneDisassembler(fail::ElfReader *elf) : ctofail(0) { + this->m_elf = elf; + this->instrs.reset(new InstrMap()); + } + + ~CapstoneDisassembler() { delete ctofail; }; + + InstrMap &getInstrMap() { return *instrs; }; + fail::CapstoneToFailTranslator *getTranslator(); + + void disassemble(); + +private: + int disassemble_section(Elf_Data *data, Elf32_Shdr *shdr, Elf64_Shdr *shdr64, std::map symtab_map); + std::map get_symtab_map(uint64_t sect_addr, uint64_t sect_size); +}; + +} +#endif // __CAPSTONEDISASSEMBLER_HPP__ diff --git a/src/core/util/capstonedisassembler/CapstoneToFailBochs.cpp b/src/core/util/capstonedisassembler/CapstoneToFailBochs.cpp new file mode 100644 index 00000000..209fdca0 --- /dev/null +++ b/src/core/util/capstonedisassembler/CapstoneToFailBochs.cpp @@ -0,0 +1,103 @@ +#include +#include "CapstoneToFailBochs.hpp" +#include "sal/x86/X86Architecture.hpp" + +using namespace fail; + +CapstoneToFailBochs::CapstoneToFailBochs() { + capstone_to_fail_map[X86_REG_AH] = reginfo_t(RID_CAX, 8, 8); + capstone_to_fail_map[X86_REG_AL] = reginfo_t(RID_CAX, 8); + capstone_to_fail_map[X86_REG_AX] = reginfo_t(RID_CAX, 16); + capstone_to_fail_map[X86_REG_EAX] = reginfo_t(RID_CAX, 32); + capstone_to_fail_map[X86_REG_RAX] = reginfo_t(RID_CAX, 64); + + capstone_to_fail_map[X86_REG_BH] = reginfo_t(RID_CBX, 8, 8); + capstone_to_fail_map[X86_REG_BL] = reginfo_t(RID_CBX, 8); + capstone_to_fail_map[X86_REG_BX] = reginfo_t(RID_CBX, 16); + capstone_to_fail_map[X86_REG_EBX] = reginfo_t(RID_CBX, 32); + capstone_to_fail_map[X86_REG_RBX] = reginfo_t(RID_CBX, 64); + + capstone_to_fail_map[X86_REG_CH] = reginfo_t(RID_CCX, 8, 8); + capstone_to_fail_map[X86_REG_CL] = reginfo_t(RID_CCX, 8); + capstone_to_fail_map[X86_REG_CX] = reginfo_t(RID_CCX, 16); + capstone_to_fail_map[X86_REG_ECX] = reginfo_t(RID_CCX, 32); + capstone_to_fail_map[X86_REG_RCX] = reginfo_t(RID_CCX, 64); + + capstone_to_fail_map[X86_REG_DH] = reginfo_t(RID_CDX, 8, 8); + capstone_to_fail_map[X86_REG_DL] = reginfo_t(RID_CDX, 8); + capstone_to_fail_map[X86_REG_DX] = reginfo_t(RID_CDX, 16); + capstone_to_fail_map[X86_REG_EDX] = reginfo_t(RID_CDX, 32); + capstone_to_fail_map[X86_REG_RDX] = reginfo_t(RID_CDX, 64); + + capstone_to_fail_map[X86_REG_R8] = reginfo_t(RID_R8, 64); + capstone_to_fail_map[X86_REG_R8D] = reginfo_t(RID_R8, 32); + capstone_to_fail_map[X86_REG_R8W] = reginfo_t(RID_R8, 16); + capstone_to_fail_map[X86_REG_R8B] = reginfo_t(RID_R8, 8); + capstone_to_fail_map[X86_REG_R9] = reginfo_t(RID_R9, 64); + capstone_to_fail_map[X86_REG_R9D] = reginfo_t(RID_R9, 32); + capstone_to_fail_map[X86_REG_R9W] = reginfo_t(RID_R9, 16); + capstone_to_fail_map[X86_REG_R9B] = reginfo_t(RID_R9, 8); + capstone_to_fail_map[X86_REG_R10] = reginfo_t(RID_R10, 64); + capstone_to_fail_map[X86_REG_R10D] = reginfo_t(RID_R10, 32); + capstone_to_fail_map[X86_REG_R10W] = reginfo_t(RID_R10, 16); + capstone_to_fail_map[X86_REG_R10B] = reginfo_t(RID_R10, 8); + capstone_to_fail_map[X86_REG_R11] = reginfo_t(RID_R11, 64); + capstone_to_fail_map[X86_REG_R11D] = reginfo_t(RID_R11, 32); + capstone_to_fail_map[X86_REG_R11W] = reginfo_t(RID_R11, 16); + capstone_to_fail_map[X86_REG_R11B] = reginfo_t(RID_R11, 8); + capstone_to_fail_map[X86_REG_R12] = reginfo_t(RID_R12, 64); + capstone_to_fail_map[X86_REG_R12D] = reginfo_t(RID_R12, 32); + capstone_to_fail_map[X86_REG_R12W] = reginfo_t(RID_R12, 16); + capstone_to_fail_map[X86_REG_R12B] = reginfo_t(RID_R12, 8); + capstone_to_fail_map[X86_REG_R13] = reginfo_t(RID_R13, 64); + capstone_to_fail_map[X86_REG_R13D] = reginfo_t(RID_R13, 32); + capstone_to_fail_map[X86_REG_R13W] = reginfo_t(RID_R13, 16); + capstone_to_fail_map[X86_REG_R13B] = reginfo_t(RID_R13, 8); + capstone_to_fail_map[X86_REG_R14] = reginfo_t(RID_R14, 64); + capstone_to_fail_map[X86_REG_R14D] = reginfo_t(RID_R14, 32); + capstone_to_fail_map[X86_REG_R14W] = reginfo_t(RID_R14, 16); + capstone_to_fail_map[X86_REG_R14B] = reginfo_t(RID_R14, 8); + capstone_to_fail_map[X86_REG_R15] = reginfo_t(RID_R15, 64); + capstone_to_fail_map[X86_REG_R15D] = reginfo_t(RID_R15, 32); + capstone_to_fail_map[X86_REG_R15W] = reginfo_t(RID_R15, 16); + capstone_to_fail_map[X86_REG_R15B] = reginfo_t(RID_R15, 8); + + capstone_to_fail_map[X86_REG_DIL] = reginfo_t(RID_CDI, 8); + capstone_to_fail_map[X86_REG_DI] = reginfo_t(RID_CDI, 16); + capstone_to_fail_map[X86_REG_EDI] = reginfo_t(RID_CDI, 32); + capstone_to_fail_map[X86_REG_RDI] = reginfo_t(RID_CDI, 64); + + capstone_to_fail_map[X86_REG_BPL] = reginfo_t(RID_CBP, 8); + capstone_to_fail_map[X86_REG_BP] = reginfo_t(RID_CBP, 16); + capstone_to_fail_map[X86_REG_EBP] = reginfo_t(RID_CBP, 32); + capstone_to_fail_map[X86_REG_RBP] = reginfo_t(RID_CBP, 64); + + capstone_to_fail_map[X86_REG_EFLAGS] = reginfo_t(RID_FLAGS, 64); + // RFLAGS doesn't exist in the x86.h of capstone, therefore X86_REG_EFLAGS is set to 64bit + // capstone_to_fail_map[RFLAGS] = reginfo_t(RID_FLAGS, 64); + + capstone_to_fail_map[X86_REG_EIP] = reginfo_t(RID_PC, 32); + capstone_to_fail_map[X86_REG_RIP] = reginfo_t(RID_PC, 64); + + capstone_to_fail_map[X86_REG_SIL] = reginfo_t(RID_CSI, 8); + capstone_to_fail_map[X86_REG_SI] = reginfo_t(RID_CSI, 16); + capstone_to_fail_map[X86_REG_ESI] = reginfo_t(RID_CSI, 32); + capstone_to_fail_map[X86_REG_RSI] = reginfo_t(RID_CSI, 64); + + capstone_to_fail_map[X86_REG_SPL] = reginfo_t(RID_CSP, 8); + capstone_to_fail_map[X86_REG_SP] = reginfo_t(RID_CSP, 16); + capstone_to_fail_map[X86_REG_ESP] = reginfo_t(RID_CSP, 32); + capstone_to_fail_map[X86_REG_RSP] = reginfo_t(RID_CSP, 64); + + capstone_to_fail_map[X86_REG_CR0] = reginfo_t(RID_CR0); + capstone_to_fail_map[X86_REG_CR2] = reginfo_t(RID_CR2); + capstone_to_fail_map[X86_REG_CR3] = reginfo_t(RID_CR3); + capstone_to_fail_map[X86_REG_CR4] = reginfo_t(RID_CR4); + + capstone_to_fail_map[X86_REG_CS] = reginfo_t(RID_CS, 16); + capstone_to_fail_map[X86_REG_DS] = reginfo_t(RID_DS, 16); + capstone_to_fail_map[X86_REG_ES] = reginfo_t(RID_ES, 16); + capstone_to_fail_map[X86_REG_FS] = reginfo_t(RID_FS, 16); + capstone_to_fail_map[X86_REG_GS] = reginfo_t(RID_GS, 16); + capstone_to_fail_map[X86_REG_SS] = reginfo_t(RID_SS, 16); +} diff --git a/src/core/util/capstonedisassembler/CapstoneToFailBochs.hpp b/src/core/util/capstonedisassembler/CapstoneToFailBochs.hpp new file mode 100644 index 00000000..22763931 --- /dev/null +++ b/src/core/util/capstonedisassembler/CapstoneToFailBochs.hpp @@ -0,0 +1,14 @@ +#ifndef __CAPSTONETOFAILBOCHS_HPP_ +#define __CAPSTONETOFAILBOCHS_HPP_ + +#include "CapstoneToFailTranslator.hpp" + +namespace fail { + +class CapstoneToFailBochs : public CapstoneToFailTranslator { +public: + CapstoneToFailBochs(); +}; +} // end of namespace + +#endif diff --git a/src/core/util/capstonedisassembler/CapstoneToFailGem5.cpp b/src/core/util/capstonedisassembler/CapstoneToFailGem5.cpp new file mode 100644 index 00000000..15d8322c --- /dev/null +++ b/src/core/util/capstonedisassembler/CapstoneToFailGem5.cpp @@ -0,0 +1,24 @@ +#include +#include "CapstoneToFailGem5.hpp" +#include "sal/arm/ArmArchitecture.hpp" + +using namespace fail; + +CapstoneToFailGem5::CapstoneToFailGem5() { + capstone_to_fail_map[ARM_REG_R0] = reginfo_t(RI_R0); + capstone_to_fail_map[ARM_REG_R1] = reginfo_t(RI_R1); + capstone_to_fail_map[ARM_REG_R2] = reginfo_t(RI_R2); + capstone_to_fail_map[ARM_REG_R3] = reginfo_t(RI_R3); + capstone_to_fail_map[ARM_REG_R4] = reginfo_t(RI_R4); + capstone_to_fail_map[ARM_REG_R5] = reginfo_t(RI_R5); + capstone_to_fail_map[ARM_REG_R6] = reginfo_t(RI_R6); + capstone_to_fail_map[ARM_REG_R7] = reginfo_t(RI_R7); + capstone_to_fail_map[ARM_REG_R8] = reginfo_t(RI_R8); + capstone_to_fail_map[ARM_REG_R9] = reginfo_t(RI_R9); + capstone_to_fail_map[ARM_REG_R10] = reginfo_t(RI_R10); + capstone_to_fail_map[ARM_REG_R11] = reginfo_t(RI_R11); + capstone_to_fail_map[ARM_REG_R12] = reginfo_t(RI_R12); + capstone_to_fail_map[ARM_REG_SP] = reginfo_t(RI_SP); + capstone_to_fail_map[ARM_REG_LR] = reginfo_t(RI_LR); + capstone_to_fail_map[ARM_REG_PC] = reginfo_t(RI_IP); +} diff --git a/src/core/util/capstonedisassembler/CapstoneToFailGem5.hpp b/src/core/util/capstonedisassembler/CapstoneToFailGem5.hpp new file mode 100644 index 00000000..ebc9d973 --- /dev/null +++ b/src/core/util/capstonedisassembler/CapstoneToFailGem5.hpp @@ -0,0 +1,14 @@ +#ifndef __CAPSTONETOFAILGEM5_HPP_ +#define __CAPSTONETOFAILGEM5_HPP_ + +#include "CapstoneToFailTranslator.hpp" + +namespace fail { + +class CapstoneToFailGem5 : public CapstoneToFailTranslator { +public: + CapstoneToFailGem5(); +}; +} // end of namespace + +#endif // __CAPSTONETOFAILGEM5_HPP_ diff --git a/src/core/util/capstonedisassembler/CapstoneToFailTranslator.cpp b/src/core/util/capstonedisassembler/CapstoneToFailTranslator.cpp new file mode 100644 index 00000000..a406c0ea --- /dev/null +++ b/src/core/util/capstonedisassembler/CapstoneToFailTranslator.cpp @@ -0,0 +1,50 @@ +#include +#include "CapstoneToFailTranslator.hpp" +#include "sal/SALInst.hpp" + +using namespace fail; + +const CapstoneToFailTranslator::reginfo_t & CapstoneToFailTranslator::getFailRegisterInfo(unsigned int regid) { + ctof_map_t::iterator it = capstone_to_fail_map.find(regid); + if ( it != capstone_to_fail_map.end() ) {// found + return (*it).second; + } else { // not found +// std::cout << "Fail ID for Capstone Register id " << std::dec << regid << " not found :(" << std::endl; +// exit(EXIT_FAILURE); + return notfound; + } +} + +regdata_t CapstoneToFailTranslator::getRegisterContent(ConcreteCPU& cpu, const reginfo_t ®info){ + regdata_t result; + + Register* reg = cpu.getRegister(reginfo.id); + result = cpu.getRegisterContent(reg); + + result &= reginfo.mask; + result >>= reginfo.offset; + + return result; +} + +void CapstoneToFailTranslator::setRegisterContent(ConcreteCPU & cpu, const reginfo_t ®info, regdata_t value){ + Register* reg = cpu.getRegister(reginfo.id); + + regdata_t origval = cpu.getRegisterContent(reg); // Get register Value from fail + origval &= ~(reginfo.mask); // clear bits to write + + value <<= reginfo.offset; // shift value to write up to position + value &= reginfo.mask; // mask out trailing and leading bits + value |= origval; // set bits to write + + cpu.setRegisterContent( reg, value ); // write back register content +} + +int CapstoneToFailTranslator::getMaxFailRegisterID() +{ + auto max = std::max_element(capstone_to_fail_map.cbegin(), capstone_to_fail_map.cend(), + [] (const ctof_map_t::value_type& v1, const ctof_map_t::value_type& v2) { + return v1.second.id < v2.second.id; + }); + return max->second.id; +} diff --git a/src/core/util/capstonedisassembler/CapstoneToFailTranslator.hpp b/src/core/util/capstonedisassembler/CapstoneToFailTranslator.hpp new file mode 100644 index 00000000..e1e777fb --- /dev/null +++ b/src/core/util/capstonedisassembler/CapstoneToFailTranslator.hpp @@ -0,0 +1,93 @@ +#ifndef __CAPSTONETOFAILTRANSLATOR_HPP_ +#define __CAPSTONETOFAILTRANSLATOR_HPP_ + +#include "sal/SALConfig.hpp" +#include "sal/ConcreteCPU.hpp" +#include + +namespace fail { + +/** + * Translates Capstone disassembler ids + * to FAIL* SAL representations. + */ +class CapstoneToFailTranslator { +public: + /** + * Maps registers to/from linear addresses usable for def/use-pruning + * purposes and storage in the database. Takes care that the linear + * addresses of x86 subregisters (e.g., AX represents the lower 16 bits of + * EAX) overlap with their siblings. + */ + struct reginfo_t { + int id; + regwidth_t width; + regdata_t mask; + byte_t offset; + + int toDataAddress() const { + // .. 5 4 | 3 2 1 0 + // | + return (id << 4) | (offset / 8); + } + + static reginfo_t fromDataAddress(int addr, int width) { + int id = addr >> 4; + byte_t offset = (addr & 0xf) * 8; + return reginfo_t(id, width * 8, offset); + } + + reginfo_t(int id=-1, regwidth_t width = 32, byte_t offs = 0) + : id(id), width(width), mask((((regdata_t) 1 << width) - 1) << offs), offset(offs) + { + if (width >= sizeof(regdata_t) * 8) { // all ones, (1 << width) == 0! + mask = -1; + } +#if 0 + std::cerr << "constructing reginfo_t: " << std::dec << id << " " << width << " " << ((int)offs) << std::hex << " 0x" << mask << std::endl; +#endif + } + }; +protected: + + CapstoneToFailTranslator(){}; + + typedef std::map ctof_map_t; + ctof_map_t capstone_to_fail_map; + +public: + /** + * Translates a backend-specific register ID to a Fail register ID. + * @param regid A backend-specific register ID. + * @return A FAIL* register-info struct, or CapstonetoFailTranslator::notfound + * if no mapping was found. + */ + const reginfo_t & getFailRegisterInfo(unsigned int regid); + + static regdata_t getRegisterContent(ConcreteCPU & cpu, const reginfo_t & reg); + static void setRegisterContent(ConcreteCPU & cpu, const reginfo_t ®, regdata_t value); + regdata_t getRegisterContent(ConcreteCPU & cpu, unsigned int csid) { + return getRegisterContent(cpu, getFailRegisterInfo(csid)); + } + void setRegisterContent(ConcreteCPU & cpu, unsigned int csid, regdata_t value) { + setRegisterContent(cpu, getFailRegisterInfo(csid), value); + } + + /** + * Translates a backend-specific register ID to a Fail register ID. + * @param regid A backend-specific register ID. + * @return A FAIL* register ID. May do funny things if regid does not exist. + */ + int getFailRegisterID(unsigned int regid) { return this->getFailRegisterInfo(regid).id; }; + + int getMaxFailRegisterID(); + fail::address_t getMaxDataAddress() { reginfo_t r(getMaxFailRegisterID() + 1); return r.toDataAddress() - 1; } + + reginfo_t notfound; + +// static CapstoneToFailTranslator* createFromBinary(const std::string elf_path); +}; + +} // end of namespace + +#endif diff --git a/src/core/util/capstonedisassembler/testing/arm9 b/src/core/util/capstonedisassembler/testing/arm9 new file mode 100755 index 00000000..854f81e4 Binary files /dev/null and b/src/core/util/capstonedisassembler/testing/arm9 differ diff --git a/src/core/util/capstonedisassembler/testing/armm3 b/src/core/util/capstonedisassembler/testing/armm3 new file mode 100755 index 00000000..637f00bc Binary files /dev/null and b/src/core/util/capstonedisassembler/testing/armm3 differ diff --git a/src/core/util/capstonedisassembler/testing/armregs.txt b/src/core/util/capstonedisassembler/testing/armregs.txt new file mode 100644 index 00000000..3b6c21cb --- /dev/null +++ b/src/core/util/capstonedisassembler/testing/armregs.txt @@ -0,0 +1,281 @@ +Triple: arm-unknown-unknown + +Number of Registers: 278 +0 - NOREG +1 - APSR +2 - CPSR +3 - D0 +4 - D1 +5 - D2 +6 - D3 +7 - D4 +8 - D5 +9 - D6 +10 - D7 +11 - D8 +12 - D9 +13 - D10 +14 - D11 +15 - D12 +16 - D13 +17 - D14 +18 - D15 +19 - D16 +20 - D17 +21 - D18 +22 - D19 +23 - D20 +24 - D21 +25 - D22 +26 - D23 +27 - D24 +28 - D25 +29 - D26 +30 - D27 +31 - D28 +32 - D29 +33 - D30 +34 - D31 +35 - FPEXC +36 - FPSCR +37 - FPSCR_NZCV +38 - FPSID +39 - ITSTATE +40 - LR +41 - MVFR0 +42 - MVFR1 +43 - PC +44 - Q0 +45 - Q1 +46 - Q2 +47 - Q3 +48 - Q4 +49 - Q5 +50 - Q6 +51 - Q7 +52 - Q8 +53 - Q9 +54 - Q10 +55 - Q11 +56 - Q12 +57 - Q13 +58 - Q14 +59 - Q15 +60 - R0 +61 - R1 +62 - R2 +63 - R3 +64 - R4 +65 - R5 +66 - R6 +67 - R7 +68 - R8 +69 - R9 +70 - R10 +71 - R11 +72 - R12 +73 - S0 +74 - S1 +75 - S2 +76 - S3 +77 - S4 +78 - S5 +79 - S6 +80 - S7 +81 - S8 +82 - S9 +83 - S10 +84 - S11 +85 - S12 +86 - S13 +87 - S14 +88 - S15 +89 - S16 +90 - S17 +91 - S18 +92 - S19 +93 - S20 +94 - S21 +95 - S22 +96 - S23 +97 - S24 +98 - S25 +99 - S26 +100 - S27 +101 - S28 +102 - S29 +103 - S30 +104 - S31 +105 - SP +106 - SPSR +107 - D0_D2 +108 - D1_D3 +109 - D2_D4 +110 - D3_D5 +111 - D4_D6 +112 - D5_D7 +113 - D6_D8 +114 - D7_D9 +115 - D8_D10 +116 - D9_D11 +117 - D10_D12 +118 - D11_D13 +119 - D12_D14 +120 - D13_D15 +121 - D14_D16 +122 - D15_D17 +123 - D16_D18 +124 - D17_D19 +125 - D18_D20 +126 - D19_D21 +127 - D20_D22 +128 - D21_D23 +129 - D22_D24 +130 - D23_D25 +131 - D24_D26 +132 - D25_D27 +133 - D26_D28 +134 - D27_D29 +135 - D28_D30 +136 - D29_D31 +137 - Q0_Q1 +138 - Q1_Q2 +139 - Q2_Q3 +140 - Q3_Q4 +141 - Q4_Q5 +142 - Q5_Q6 +143 - Q6_Q7 +144 - Q7_Q8 +145 - Q8_Q9 +146 - Q9_Q10 +147 - Q10_Q11 +148 - Q11_Q12 +149 - Q12_Q13 +150 - Q13_Q14 +151 - Q14_Q15 +152 - Q0_Q1_Q2_Q3 +153 - Q1_Q2_Q3_Q4 +154 - Q2_Q3_Q4_Q5 +155 - Q3_Q4_Q5_Q6 +156 - Q4_Q5_Q6_Q7 +157 - Q5_Q6_Q7_Q8 +158 - Q6_Q7_Q8_Q9 +159 - Q7_Q8_Q9_Q10 +160 - Q8_Q9_Q10_Q11 +161 - Q9_Q10_Q11_Q12 +162 - Q10_Q11_Q12_Q13 +163 - Q11_Q12_Q13_Q14 +164 - Q12_Q13_Q14_Q15 +165 - D0_D1_D2 +166 - D1_D2_D3 +167 - D2_D3_D4 +168 - D3_D4_D5 +169 - D4_D5_D6 +170 - D5_D6_D7 +171 - D6_D7_D8 +172 - D7_D8_D9 +173 - D8_D9_D10 +174 - D9_D10_D11 +175 - D10_D11_D12 +176 - D11_D12_D13 +177 - D12_D13_D14 +178 - D13_D14_D15 +179 - D14_D15_D16 +180 - D15_D16_D17 +181 - D16_D17_D18 +182 - D17_D18_D19 +183 - D18_D19_D20 +184 - D19_D20_D21 +185 - D20_D21_D22 +186 - D21_D22_D23 +187 - D22_D23_D24 +188 - D23_D24_D25 +189 - D24_D25_D26 +190 - D25_D26_D27 +191 - D26_D27_D28 +192 - D27_D28_D29 +193 - D28_D29_D30 +194 - D29_D30_D31 +195 - D0_D2_D4 +196 - D1_D3_D5 +197 - D2_D4_D6 +198 - D3_D5_D7 +199 - D4_D6_D8 +200 - D5_D7_D9 +201 - D6_D8_D10 +202 - D7_D9_D11 +203 - D8_D10_D12 +204 - D9_D11_D13 +205 - D10_D12_D14 +206 - D11_D13_D15 +207 - D12_D14_D16 +208 - D13_D15_D17 +209 - D14_D16_D18 +210 - D15_D17_D19 +211 - D16_D18_D20 +212 - D17_D19_D21 +213 - D18_D20_D22 +214 - D19_D21_D23 +215 - D20_D22_D24 +216 - D21_D23_D25 +217 - D22_D24_D26 +218 - D23_D25_D27 +219 - D24_D26_D28 +220 - D25_D27_D29 +221 - D26_D28_D30 +222 - D27_D29_D31 +223 - D0_D2_D4_D6 +224 - D1_D3_D5_D7 +225 - D2_D4_D6_D8 +226 - D3_D5_D7_D9 +227 - D4_D6_D8_D10 +228 - D5_D7_D9_D11 +229 - D6_D8_D10_D12 +230 - D7_D9_D11_D13 +231 - D8_D10_D12_D14 +232 - D9_D11_D13_D15 +233 - D10_D12_D14_D16 +234 - D11_D13_D15_D17 +235 - D12_D14_D16_D18 +236 - D13_D15_D17_D19 +237 - D14_D16_D18_D20 +238 - D15_D17_D19_D21 +239 - D16_D18_D20_D22 +240 - D17_D19_D21_D23 +241 - D18_D20_D22_D24 +242 - D19_D21_D23_D25 +243 - D20_D22_D24_D26 +244 - D21_D23_D25_D27 +245 - D22_D24_D26_D28 +246 - D23_D25_D27_D29 +247 - D24_D26_D28_D30 +248 - D25_D27_D29_D31 +249 - D1_D2 +250 - D3_D4 +251 - D5_D6 +252 - D7_D8 +253 - D9_D10 +254 - D11_D12 +255 - D13_D14 +256 - D15_D16 +257 - D17_D18 +258 - D19_D20 +259 - D21_D22 +260 - D23_D24 +261 - D25_D26 +262 - D27_D28 +263 - D29_D30 +264 - D1_D2_D3_D4 +265 - D3_D4_D5_D6 +266 - D5_D6_D7_D8 +267 - D7_D8_D9_D10 +268 - D9_D10_D11_D12 +269 - D11_D12_D13_D14 +270 - D13_D14_D15_D16 +271 - D15_D16_D17_D18 +272 - D17_D18_D19_D20 +273 - D19_D20_D21_D22 +274 - D21_D22_D23_D24 +275 - D23_D24_D25_D26 +276 - D25_D26_D27_D28 +277 - D27_D28_D29_D30 diff --git a/src/core/util/capstonedisassembler/testing/bubble.cc b/src/core/util/capstonedisassembler/testing/bubble.cc new file mode 100644 index 00000000..c9685e33 --- /dev/null +++ b/src/core/util/capstonedisassembler/testing/bubble.cc @@ -0,0 +1,71 @@ +#include + +class random_generator_t +{ +private: + unsigned int a; // (sqrt(5)-1)/2 = 0.61803398875 + unsigned int b; + unsigned int last_val; + unsigned int sd; +public: + void forth() { + last_val = a*last_val + b; + } + + + random_generator_t(unsigned int seed = 1) + : a(2654435769), b(seed), last_val(1), sd(seed){ + forth(); + } + + unsigned int item() const { + return last_val; + } + + void reset() { + last_val = 1; + b = sd; + forth(); + } +}; + + + + +void sort(int len, int arr[] ) +{ + int tmp; + int again; + int i; + + for(again=1; again; ) + for( again=0, i=0; i < (len-1); ++i){ + assert(0<=i && i+1 arr[i+1] ){ + tmp = arr[i]; + arr[i] = arr[i+1]; + arr[i+1] = tmp; + again = 1; + } + } +} + + + + +int main() +{ + const unsigned int arr_size = 10000; //50000; + + int arr[arr_size]; + int i; + random_generator_t rand; + + for(i=0; i!=arr_size; ++i){ + arr[i] = rand.item(); + rand.forth(); + } + + sort(arr_size, arr); +} + diff --git a/src/core/util/capstonedisassembler/testing/capstoneDisTest.cc b/src/core/util/capstonedisassembler/testing/capstoneDisTest.cc new file mode 100644 index 00000000..ba3d9247 --- /dev/null +++ b/src/core/util/capstonedisassembler/testing/capstoneDisTest.cc @@ -0,0 +1,63 @@ +#include "util/ElfReader.hpp" +#include +#include "../CapstoneDisassembler.hpp" + +using namespace fail; + +bool show_mapping(fail::CapstoneToFailTranslator *ctof, unsigned llvmid) +{ + const CapstoneToFailTranslator::reginfo_t& failreg = ctof->getFailRegisterInfo(llvmid); + std::cout /*<< reg_info.getName(llvmid)*/ << "(" << std::dec << llvmid << "->"; + if (&failreg != &ctof->notfound) { + std::cout << failreg.id; + } else { + std::cout << "NOTFOUND!"; + } + std::cout << ") "; + return &failreg != &ctof->notfound; +} + +int main(int argc, char* argv[]) { + std::string file; + + if(argc > 1){ + std::cout << "Trying to disassemble: " << argv[1] << std::endl; + file = argv[1]; + } else { + std::cerr << "No file to disassemble :(" << std::endl; + return -1; + } + + ElfReader *m_elf = new ElfReader(file.c_str()); + + CapstoneDisassembler disas(m_elf); + disas.disassemble(); + + CapstoneDisassembler::InstrMap &instr_map = disas.getInstrMap(); + std::cout << "Map Size: " << instr_map.size() << std::endl; + + CapstoneDisassembler::InstrMap::const_iterator itr; + + fail::CapstoneToFailTranslator *ctof = disas.getTranslator(); + + for (itr = instr_map.begin(); itr != instr_map.end(); ++itr){ + const CapstoneDisassembler::Instr &instr = (*itr).second; + std::cout << std::hex << (*itr).first << " | " << instr.opcode << std::endl; + std::cout << std::dec << "USES: "; + for (std::vector::const_iterator it = instr.reg_uses.begin(); + it != instr.reg_uses.end(); ++it) { + show_mapping(ctof, *it); + } + + std::cout << " | DEFS: "; + for (std::vector::const_iterator it = instr.reg_defs.begin(); + it != instr.reg_defs.end(); ++it) { + show_mapping(ctof, *it); + } + + if (instr.conditional_branch) { + std::cout << "(conditional branch)"; + } + std::cout << std::endl; + } +} diff --git a/src/core/util/capstonedisassembler/testing/howtobuild.txt b/src/core/util/capstonedisassembler/testing/howtobuild.txt new file mode 100644 index 00000000..5b678fd1 --- /dev/null +++ b/src/core/util/capstonedisassembler/testing/howtobuild.txt @@ -0,0 +1,4 @@ +g++ -m32 bubble.cc -o x86 +g++ -m64 bubble.cc -o x86_64 +arm-none-eabi-g++ -mcpu=cortex-m3 bubble.cc -o armm3 +arm-none-eabi-g++ -mcpu=arm9 bubble.cc -o arm9 diff --git a/src/core/util/capstonedisassembler/testing/x86 b/src/core/util/capstonedisassembler/testing/x86 new file mode 100755 index 00000000..3dac5193 Binary files /dev/null and b/src/core/util/capstonedisassembler/testing/x86 differ diff --git a/src/core/util/capstonedisassembler/testing/x86_64 b/src/core/util/capstonedisassembler/testing/x86_64 new file mode 100755 index 00000000..513f3211 Binary files /dev/null and b/src/core/util/capstonedisassembler/testing/x86_64 differ diff --git a/src/core/util/capstonedisassembler/testing/x86regs.txt b/src/core/util/capstonedisassembler/testing/x86regs.txt new file mode 100644 index 00000000..bc01c6c5 --- /dev/null +++ b/src/core/util/capstonedisassembler/testing/x86regs.txt @@ -0,0 +1,163 @@ +Triple: x86_64-unknown-unknown + +Number of Registers: 160 +0 - NOREG +1 - AH +2 - AL +3 - AX +4 - BH +5 - BL +6 - BP +7 - BPL +8 - BX +9 - CH +10 - CL +11 - CR0 +12 - CR1 +13 - CR2 +14 - CR3 +15 - CR4 +16 - CR5 +17 - CR6 +18 - CR7 +19 - CR8 +20 - CR9 +21 - CR10 +22 - CR11 +23 - CR12 +24 - CR13 +25 - CR14 +26 - CR15 +27 - CS +28 - CX +29 - DH +30 - DI +31 - DIL +32 - DL +33 - DR0 +34 - DR1 +35 - DR2 +36 - DR3 +37 - DR4 +38 - DR5 +39 - DR6 +40 - DR7 +41 - DS +42 - DX +43 - EAX +44 - EBP +45 - EBX +46 - ECX +47 - EDI +48 - EDX +49 - EFLAGS +50 - EIP +51 - EIZ +52 - ES +53 - ESI +54 - ESP +55 - FP0 +56 - FP1 +57 - FP2 +58 - FP3 +59 - FP4 +60 - FP5 +61 - FP6 +62 - FS +63 - GS +64 - IP +65 - MM0 +66 - MM1 +67 - MM2 +68 - MM3 +69 - MM4 +70 - MM5 +71 - MM6 +72 - MM7 +73 - R8 +74 - R8B +75 - R8D +76 - R8W +77 - R9 +78 - R9B +79 - R9D +80 - R9W +81 - R10 +82 - R10B +83 - R10D +84 - R10W +85 - R11 +86 - R11B +87 - R11D +88 - R11W +89 - R12 +90 - R12B +91 - R12D +92 - R12W +93 - R13 +94 - R13B +95 - R13D +96 - R13W +97 - R14 +98 - R14B +99 - R14D +100 - R14W +101 - R15 +102 - R15B +103 - R15D +104 - R15W +105 - RAX +106 - RBP +107 - RBX +108 - RCX +109 - RDI +110 - RDX +111 - RIP +112 - RIZ +113 - RSI +114 - RSP +115 - SI +116 - SIL +117 - SP +118 - SPL +119 - SS +120 - ST0 +121 - ST1 +122 - ST2 +123 - ST3 +124 - ST4 +125 - ST5 +126 - ST6 +127 - ST7 +128 - XMM0 +129 - XMM1 +130 - XMM2 +131 - XMM3 +132 - XMM4 +133 - XMM5 +134 - XMM6 +135 - XMM7 +136 - XMM8 +137 - XMM9 +138 - XMM10 +139 - XMM11 +140 - XMM12 +141 - XMM13 +142 - XMM14 +143 - XMM15 +144 - YMM0 +145 - YMM1 +146 - YMM2 +147 - YMM3 +148 - YMM4 +149 - YMM5 +150 - YMM6 +151 - YMM7 +152 - YMM8 +153 - YMM9 +154 - YMM10 +155 - YMM11 +156 - YMM12 +157 - YMM13 +158 - YMM14 +159 - YMM15 diff --git a/tools/import-trace/AdvancedMemoryImporter.cc b/tools/import-trace/AdvancedMemoryImporter.cc index eb36cd33..61eaead8 100644 --- a/tools/import-trace/AdvancedMemoryImporter.cc +++ b/tools/import-trace/AdvancedMemoryImporter.cc @@ -2,8 +2,10 @@ #include #include "AdvancedMemoryImporter.hpp" +#ifdef BUILD_LLVM_DISASSEMBLER using namespace llvm; using namespace llvm::object; +#endif using namespace fail; static fail::Logger LOG("AdvancedMemoryImporter"); @@ -98,6 +100,35 @@ bool AdvancedMemoryImporter::handle_ip_event(fail::simtime_t curtime, instructio // (delayed) trace entries insert_delayed_entries(false); +#if defined(BUILD_CAPSTONE_DISASSEMBLER) + if (!isDisassembled) { + if (!m_elf) { + LOG << "Please give an ELF binary as parameter (-e/--elf)." << std::endl; + return false; + } + + disas.reset(new CapstoneDisassembler(m_elf)); + + disas->disassemble(); + CapstoneDisassembler::InstrMap &instr_map = disas->getInstrMap(); + LOG << "instructions disassembled: " << std::dec << instr_map.size() << std::endl; +#if 0 + for (CapstoneDisassembler::InstrMap::const_iterator it = instr_map.begin(); + it != instr_map.end(); ++it) { + LOG << "DIS " << std::hex << it->second.address << " " << (int) it->second.length << std::endl; + } +#endif + } + + const CapstoneDisassembler::InstrMap &instr_map = disas->getInstrMap(); + const CapstoneDisassembler::InstrMap::const_iterator it = instr_map.find(ev.ip()); + if (it == instr_map.end()) { + LOG << "WARNING: CapstoneDisassembler hasn't disassembled instruction at 0x" + << ev.ip() << " -- are you using Capstone < 4.0?" << std::endl; + return true; // probably weird things will happen now + } + const CapstoneDisassembler::Instr &opcode = it->second; +#elif defined(BUILD_LLVM_DISASSEMBLER) if (!binary) { /* Disassemble the binary if necessary */ llvm::InitializeAllTargetInfos(); @@ -144,6 +175,7 @@ bool AdvancedMemoryImporter::handle_ip_event(fail::simtime_t curtime, instructio return true; // probably weird things will happen now } const LLVMDisassembler::Instr &opcode = it->second; +#endif /* Now we've got the opcode and know whether it's a conditional branch. If * it is, the next IP event will tell us whether it was taken or not. */ @@ -161,8 +193,13 @@ bool AdvancedMemoryImporter::handle_ip_event(fail::simtime_t curtime, instructio bool AdvancedMemoryImporter::handle_mem_event(fail::simtime_t curtime, instruction_count_t instr, Trace_Event &ev) { +#if defined(BUILD_CAPSTONE_DISASSEMBLER) + const CapstoneDisassembler::InstrMap &instr_map = disas->getInstrMap(); + const CapstoneDisassembler::Instr &opcode = instr_map.at(ev.ip()); +#elif defined(BUILD_LLVM_DISASSEMBLER) const LLVMDisassembler::InstrMap &instr_map = disas->getInstrMap(); const LLVMDisassembler::Instr &opcode = instr_map.at(ev.ip()); +#endif DelayedTraceEntry entry = { curtime, instr, ev, opcode.opcode, (unsigned) branches_taken.size() }; delayed_entries.push_back(entry); diff --git a/tools/import-trace/AdvancedMemoryImporter.hpp b/tools/import-trace/AdvancedMemoryImporter.hpp index 183418b0..1165d2b0 100644 --- a/tools/import-trace/AdvancedMemoryImporter.hpp +++ b/tools/import-trace/AdvancedMemoryImporter.hpp @@ -5,7 +5,11 @@ #include #include "MemoryImporter.hpp" +#if defined(BUILD_CAPSTONE_DISASSEMBLER) +#include "util/capstonedisassembler/CapstoneDisassembler.hpp" +#elif defined(BUILD_LLVM_DISASSEMBLER) #include "util/llvmdisassembler/LLVMDisassembler.hpp" +#endif /** * A MemoryImporter that additionally imports Relyzer-style conditional branch @@ -24,8 +28,13 @@ * operations with a set of new virtual functions that are called downwards. */ class AdvancedMemoryImporter : public MemoryImporter { +#if defined(BUILD_CAPSTONE_DISASSEMBLER) + bool isDisassembled = false; + std::unique_ptr disas; +#elif defined(BUILD_LLVM_DISASSEMBLER) llvm::object::Binary *binary = 0; std::unique_ptr disas; +#endif bool m_last_was_conditional_branch; fail::guest_address_t m_ip_jump_not_taken; std::vector branches_taken; diff --git a/tools/import-trace/CMakeLists.txt b/tools/import-trace/CMakeLists.txt index 93ab3603..dc1f6f13 100644 --- a/tools/import-trace/CMakeLists.txt +++ b/tools/import-trace/CMakeLists.txt @@ -28,9 +28,26 @@ if (BUILD_LLVM_DISASSEMBLER) find_package(LibDwarf REQUIRED) include_directories(${LIBELF_INCLUDE_DIRS}) include_directories(${LIBDWARF_INCLUDE_DIRS}) - endif(BUILD_LLVM_DISASSEMBLER) +if (BUILD_CAPSTONE_DISASSEMBLER) + set(SRCS ${SRCS} + InstructionImporter.cc + RegisterImporter.cc + RandomJumpImporter.cc + AdvancedMemoryImporter.cc + ElfImporter.cc + ) + + include(FindCapstone) + + # libelf and libdwarf required by ElfImporter + find_package(LibElf REQUIRED) + find_package(LibDwarf REQUIRED) + include_directories(${LIBELF_INCLUDE_DIRS}) + include_directories(${LIBDWARF_INCLUDE_DIRS}) +endif(BUILD_CAPSTONE_DISASSEMBLER) + find_package(MySQL REQUIRED) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${MYSQL_CFLAGS}") @@ -48,5 +65,9 @@ if (BUILD_LLVM_DISASSEMBLER) target_link_libraries(import-trace fail-llvmdisassembler fail-sal ${LLVM_LIBS} ${LLVM_LDFLAGS} ${Boost_LIBRARIES}) endif (BUILD_LLVM_DISASSEMBLER) +if (BUILD_CAPSTONE_DISASSEMBLER) + target_link_libraries(import-trace fail-capstonedisassembler fail-sal) +endif (BUILD_CAPSTONE_DISASSEMBLER) + install(TARGETS import-trace RUNTIME DESTINATION bin) install(PROGRAMS import-symbols.sh DESTINATION bin) diff --git a/tools/import-trace/ElfImporter.cc b/tools/import-trace/ElfImporter.cc index 329980ab..e2e91ce4 100644 --- a/tools/import-trace/ElfImporter.cc +++ b/tools/import-trace/ElfImporter.cc @@ -9,9 +9,10 @@ #include #endif - +#ifdef BUILD_LLVM_DISASSEMBLER using namespace llvm; using namespace llvm::object; +#endif using namespace fail; using namespace std; diff --git a/tools/import-trace/ElfImporter.hpp b/tools/import-trace/ElfImporter.hpp index 13c8106b..4b64c6a1 100644 --- a/tools/import-trace/ElfImporter.hpp +++ b/tools/import-trace/ElfImporter.hpp @@ -7,7 +7,13 @@ #include "libelf.h" #include "Importer.hpp" + +#if defined(BUILD_CAPSTONE_DISASSEMBLER) +#include "util/capstonedisassembler/CapstoneDisassembler.hpp" +#elif defined(BUILD_LLVM_DISASSEMBLER) #include "util/llvmdisassembler/LLVMDisassembler.hpp" +#endif + #include "util/CommandLine.hpp" #include "util/DwarfReader.hpp" @@ -27,8 +33,11 @@ into the database. */ class ElfImporter : public Importer { - std::unique_ptr binary; +#if defined(BUILD_CAPSTONE_DISASSEMBLER) + std::unique_ptr disas; +#elif defined(BUILD_LLVM_DISASSEMBLER) std::unique_ptr disas; +#endif fail::CommandLine::option_handle OBJDUMP; fail::CommandLine::option_handle SOURCECODE; diff --git a/tools/import-trace/InstructionImporter.cc b/tools/import-trace/InstructionImporter.cc index c0eab477..60243cf4 100644 --- a/tools/import-trace/InstructionImporter.cc +++ b/tools/import-trace/InstructionImporter.cc @@ -3,8 +3,10 @@ #include "InstructionImporter.hpp" #include "util/Logger.hpp" +#ifdef BUILD_LLVM_DISASSEMBLER using namespace llvm; using namespace llvm::object; +#endif using namespace fail; @@ -12,6 +14,28 @@ static Logger LOG("InstructionImporter"); bool InstructionImporter::handle_ip_event(fail::simtime_t curtime, instruction_count_t instr, Trace_Event &ev) { +#if defined(BUILD_CAPSTONE_DISASSEMBLER) + if (!isDisassembled) { + if (!m_elf) { + LOG << "Please give an ELF binary as parameter (-e/--elf)." << std::endl; + return false; + } + + disas.reset(new CapstoneDisassembler(m_elf)); + + disas->disassemble(); + CapstoneDisassembler::InstrMap &instr_map = disas->getInstrMap(); + LOG << "instructions disassembled: " << instr_map.size() << std::endl; + isDisassembled = true; + } + const CapstoneDisassembler::InstrMap &instr_map = disas->getInstrMap(); + if (instr_map.find(ev.ip()) == instr_map.end()) { + LOG << "Could not find instruction for IP " << std::hex << ev.ip() + << ", skipping" << std::endl; + return true; + } + const CapstoneDisassembler::Instr &opcode = instr_map.at(ev.ip()); +#elif defined(BUILD_LLVM_DISASSEMBLER) if (!binary) { /* Disassemble the binary if necessary */ llvm::InitializeAllTargetInfos(); @@ -46,6 +70,7 @@ bool InstructionImporter::handle_ip_event(fail::simtime_t curtime, instruction_c const LLVMDisassembler::InstrMap &instr_map = disas->getInstrMap(); const LLVMDisassembler::Instr &opcode = instr_map.at(ev.ip()); +#endif address_t from = ev.ip(), to = ev.ip() + opcode.length; diff --git a/tools/import-trace/InstructionImporter.hpp b/tools/import-trace/InstructionImporter.hpp index 27391a30..9640e230 100644 --- a/tools/import-trace/InstructionImporter.hpp +++ b/tools/import-trace/InstructionImporter.hpp @@ -3,11 +3,20 @@ #include "Importer.hpp" +#if defined(BUILD_CAPSTONE_DISASSEMBLER) +#include "util/capstonedisassembler/CapstoneDisassembler.hpp" +#elif defined(BUILD_LLVM_DISASSEMBLER) #include "util/llvmdisassembler/LLVMDisassembler.hpp" +#endif class InstructionImporter : public Importer { +#if defined(BUILD_CAPSTONE_DISASSEMBLER) + bool isDisassembled = false; + std::unique_ptr disas; +#elif defined(BUILD_LLVM_DISASSEMBLER) llvm::object::Binary *binary = 0; std::unique_ptr disas; +#endif protected: virtual bool handle_ip_event(fail::simtime_t curtime, instruction_count_t instr, diff --git a/tools/import-trace/RandomJumpImporter.cc b/tools/import-trace/RandomJumpImporter.cc index d426f12c..a92af9d9 100644 --- a/tools/import-trace/RandomJumpImporter.cc +++ b/tools/import-trace/RandomJumpImporter.cc @@ -3,8 +3,10 @@ #include "util/Logger.hpp" #include "RandomJumpImporter.hpp" +#ifdef BUILD_LLVM_DISASSEMBLER using namespace llvm; using namespace llvm::object; +#endif using namespace fail; using namespace std; @@ -60,6 +62,22 @@ bool RandomJumpImporter::handle_ip_event(fail::simtime_t curtime, instruction_co return false; } +#if defined(BUILD_CAPSTONE_DISASSEMBLER) + disas.reset(new CapstoneDisassembler(m_elf)); + + disas->disassemble(); + CapstoneDisassembler::InstrMap &instr_map = disas->getInstrMap(); + LOG << "instructions disassembled: " << instr_map.size() << std::endl; + + /* Collect all addresses we want to jump to */ + for (CapstoneDisassembler::InstrMap::const_iterator instr = instr_map.begin(); + instr != instr_map.end(); ++instr) { + if (m_mm_to && m_mm_to->isMatching(instr->first)) { + m_jump_to_addresses.push_back(instr->first); + } + } + binary = true; +#elif defined(BUILD_LLVM_DISASSEMBLER) /* Disassemble the binary if necessary */ llvm::InitializeAllTargetInfos(); llvm::InitializeAllTargetMCs(); @@ -92,6 +110,7 @@ bool RandomJumpImporter::handle_ip_event(fail::simtime_t curtime, instruction_co m_jump_to_addresses.push_back(instr->first); } } +#endif LOG << "we will jump to " << m_jump_to_addresses.size() << " addresses" << endl; } diff --git a/tools/import-trace/RandomJumpImporter.hpp b/tools/import-trace/RandomJumpImporter.hpp index c48c7b38..48096bd3 100644 --- a/tools/import-trace/RandomJumpImporter.hpp +++ b/tools/import-trace/RandomJumpImporter.hpp @@ -5,11 +5,20 @@ #include "util/CommandLine.hpp" #include "Importer.hpp" +#if defined(BUILD_CAPSTONE_DISASSEMBLER) +#include "util/capstonedisassembler/CapstoneDisassembler.hpp" +#elif defined(BUILD_LLVM_DISASSEMBLER) #include "util/llvmdisassembler/LLVMDisassembler.hpp" +#endif class RandomJumpImporter : public Importer { +#if defined(BUILD_CAPSTONE_DISASSEMBLER) + bool binary = false; + std::unique_ptr disas; +#elif defined(BUILD_LLVM_DISASSEMBLER) llvm::object::Binary *binary = 0; std::unique_ptr disas; +#endif fail::CommandLine::option_handle FROM, TO; diff --git a/tools/import-trace/RegisterImporter.cc b/tools/import-trace/RegisterImporter.cc index 2d3e14ad..ad240180 100644 --- a/tools/import-trace/RegisterImporter.cc +++ b/tools/import-trace/RegisterImporter.cc @@ -3,8 +3,11 @@ #include "RegisterImporter.hpp" #include "util/Logger.hpp" +#ifdef BUILD_LLVM_DISASSEMBLER using namespace llvm; using namespace llvm::object; +#endif + using namespace fail; @@ -29,7 +32,214 @@ bool RegisterImporter::cb_commandline_init() { return true; } +#if defined(BUILD_CAPSTONE_DISASSEMBLER) +bool RegisterImporter::addRegisterTrace(simtime_t curtime, instruction_count_t instr, + Trace_Event &ev, + const CapstoneToFailTranslator::reginfo_t &info, + char access_type) { + address_t from, to; + int chunk_width; + if (do_split_registers) { + /* If we want to split the registers into one byte chunks (to + enable proper pruning, we use a one byte window register, + to determine beginning and end address */ + CapstoneToFailTranslator::reginfo_t one_byte_window = info; + one_byte_window.width = 8; + from = one_byte_window.toDataAddress(); + to = one_byte_window.toDataAddress() + info.width / 8; + chunk_width = 1; // One byte chunks + } else { + /* We trace whole registers */ + from = info.toDataAddress(); + to = from + 1; /* exactly one trace event per register access*/ + chunk_width = (info.width / 8); + } + // Iterate over all accessed bytes + for (address_t data_address = from; data_address < to; ++data_address) { + // skip events outside a possibly supplied memory map + if (m_mm && !m_mm->isMatching(ev.ip())) { + continue; + } + margin_info_t left_margin = getOpenEC(data_address); + margin_info_t right_margin; + right_margin.time = curtime; + right_margin.dyninstr = instr; // !< The current instruction + right_margin.ip = ev.ip(); + + // skip zero-sized intervals: these can occur when an instruction + // accesses a memory location more than once (e.g., INC, CMPXCHG) + if (left_margin.dyninstr > right_margin.dyninstr) { + continue; + } + + // we now have an interval-terminating R/W event to the memaddr + // we're currently looking at; the EC is defined by + // data_address, dynamic instruction start/end, the absolute PC at + // the end, and time start/end + + // pass through potentially available extended trace information + ev.set_width(chunk_width); + ev.set_memaddr(data_address); + ev.set_accesstype(access_type == 'R' ? ev.READ : ev.WRITE); + if (!add_trace_event(left_margin, right_margin, ev)) { + LOG << "add_trace_event failed" << std::endl; + return false; + } + + // next interval must start at next instruction; the aforementioned + // skipping mechanism wouldn't work otherwise + newOpenEC(data_address, curtime + 1, instr + 1, ev.ip()); + } + return true; +} + + +bool RegisterImporter::handle_ip_event(fail::simtime_t curtime, instruction_count_t instr, + Trace_Event &ev) { + if (!isDisassembled) { + // Parse command line again, for jump-from and jump-to + // operations + CommandLine &cmd = CommandLine::Inst(); + if (!cmd.parse()) { + std::cerr << "Error parsing arguments." << std::endl; + return false; + } + do_gp = !cmd[NO_GP]; + do_flags = cmd[FLAGS]; + do_ip = cmd[IP]; + do_split_registers = !cmd[NO_SPLIT]; + + // retrieve register IDs for general-purpose and flags register(s) for + // the configured architecture + fail::Architecture arch; + m_ip_register_id = + (*arch.getRegisterSetOfType(RT_IP)->begin())->getId(); + fail::UniformRegisterSet *regset; + if (do_gp) { + regset = arch.getRegisterSetOfType(RT_GP); + for (fail::UniformRegisterSet::iterator it = regset->begin(); + it != regset->end(); ++it) { + m_register_ids.insert((*it)->getId()); + } + } + if (do_flags) { + regset = arch.getRegisterSetOfType(RT_ST); + for (fail::UniformRegisterSet::iterator it = regset->begin(); + it != regset->end(); ++it) { + m_register_ids.insert((*it)->getId()); + } + } + + + if (!m_elf) { + LOG << "Please give an ELF binary as parameter (-e/--elf)." << std::endl; + return false; + } + disas.reset(new CapstoneDisassembler(m_elf)); + LOG << "Start to dissamble" << std::endl; + disas->disassemble(); + LOG << "Get instr map" << std::endl; + CapstoneDisassembler::InstrMap &instr_map = disas->getInstrMap(); + LOG << "instructions disassembled: " << std::dec << instr_map.size() << std::endl; + m_ctof = disas->getTranslator(); + isDisassembled = true; + } + + // instruction pointer is read + written at each instruction + const CapstoneToFailTranslator::reginfo_t info_pc(m_ip_register_id); + if (do_ip && + (!addRegisterTrace(curtime, instr, ev, info_pc, 'R') || + !addRegisterTrace(curtime, instr, ev, info_pc, 'W'))) { + return false; + } + + const CapstoneDisassembler::InstrMap &instr_map = disas->getInstrMap(); + if (instr_map.find(ev.ip()) == instr_map.end()) { + LOG << "Could not find instruction for IP " << std::hex << ev.ip() + << ", skipping" << std::endl; + return true; + } + const CapstoneDisassembler::Instr &opcode = instr_map.at(ev.ip()); + //const MCRegisterInfo ®_info = disas->getRegisterInfo(); +// LOG << std::hex << "Address: " << opcode.address << " Opcode: " << opcode.opcode << std::endl; +// std::string log_regs = "Uses: "; + for (std::vector::const_iterator it = opcode.reg_uses.begin(); + it != opcode.reg_uses.end(); ++it) { +// log_regs += std::to_string(*it) + " "; + const CapstoneToFailTranslator::reginfo_t &info = m_ctof->getFailRegisterInfo(*it); + if (&info == &m_ctof->notfound) { + // record failed translation, report later + m_regnotfound[*it].count++; + m_regnotfound[*it].address.insert(ev.ip()); + continue; + } + + /* only proceed if we want to inject into this register */ + if (m_register_ids.find(info.id) == m_register_ids.end()) { +// log_regs += "n "; + continue; + } + + if (!addRegisterTrace(curtime, instr, ev, info, 'R')) { + return false; + } + } + +// log_regs += "Defs: "; + + for (std::vector::const_iterator it = opcode.reg_defs.begin(); + it != opcode.reg_defs.end(); ++it) { +// log_regs += std::to_string(*it) + " "; + const CapstoneToFailTranslator::reginfo_t &info = m_ctof->getFailRegisterInfo(*it); + if (&info == &m_ctof->notfound) { + // record failed translation, report later + m_regnotfound[*it].count++; + m_regnotfound[*it].address.insert(ev.ip()); + continue; + } + + /* only proceed if we want to inject into this register */ + if (m_register_ids.find(info.id) == m_register_ids.end()) { +// log_regs += "n "; + continue; + } + + if (!addRegisterTrace(curtime, instr, ev, info, 'W')) + return false; + } +// LOG << log_regs.c_str() << std::endl; + + return true; +} + +bool RegisterImporter::trace_end_reached() +{ + // report failed LLVM -> FAIL* register mappings, if any + if (m_regnotfound.empty()) { + return true; + } + + LOG << "WARNING: Some LLVM -> FAIL* register mappings failed during import, these will not be injected into:" << std::endl; + for (auto it = m_regnotfound.cbegin(); it != m_regnotfound.cend(); ++it) { + const CapstoneDisassembler::register_t id = it->first; + const RegNotFound& rnf = it->second; + LOG << "Capstone register " << std::dec << id + /* << " \"" << disas->getRegisterInfo().getName(id) << "\": " */ + << "seen " << rnf.count << " times in the trace" << std::endl; + std::ostream& o = LOG << " corresponding instruction addresses in ELF binary: " << std::hex; + for (auto addr_it = rnf.address.cbegin(); addr_it != rnf.address.cend(); ++addr_it) { + if (addr_it != rnf.address.cbegin()) { + o << ", "; + } + o << "0x" << *addr_it; + } + o << std::endl; + } + + return true; +} +#elif defined(BUILD_LLVM_DISASSEMBLER) bool RegisterImporter::addRegisterTrace(simtime_t curtime, instruction_count_t instr, Trace_Event &ev, const LLVMtoFailTranslator::reginfo_t &info, @@ -177,9 +387,12 @@ bool RegisterImporter::handle_ip_event(fail::simtime_t curtime, instruction_coun } const LLVMDisassembler::Instr &opcode = instr_map.at(ev.ip()); //const MCRegisterInfo ®_info = disas->getRegisterInfo(); +// LOG << std::hex << "Address: " << opcode.address << " Opcode: " << opcode.opcode << std::endl; +// std::string log_regs = "Uses: "; for (std::vector::const_iterator it = opcode.reg_uses.begin(); it != opcode.reg_uses.end(); ++it) { +// log_regs += std::to_string(*it) + " "; const LLVMtoFailTranslator::reginfo_t &info = m_ltof->getFailRegisterInfo(*it); if (&info == &m_ltof->notfound) { // record failed translation, report later @@ -198,8 +411,10 @@ bool RegisterImporter::handle_ip_event(fail::simtime_t curtime, instruction_coun } } +// log_regs += "Defs: "; for (std::vector::const_iterator it = opcode.reg_defs.begin(); it != opcode.reg_defs.end(); ++it) { +// log_regs += std::to_string(*it) + " "; const LLVMtoFailTranslator::reginfo_t &info = m_ltof->getFailRegisterInfo(*it); if (&info == &m_ltof->notfound) { // record failed translation, report later @@ -210,12 +425,14 @@ bool RegisterImporter::handle_ip_event(fail::simtime_t curtime, instruction_coun /* only proceed if we want to inject into this register */ if (m_register_ids.find(info.id) == m_register_ids.end()) { +// log_regs += "n "; continue; } if (!addRegisterTrace(curtime, instr, ev, info, 'W')) return false; } +// LOG << log_regs.c_str() << std::endl; return true; } @@ -246,3 +463,4 @@ bool RegisterImporter::trace_end_reached() return true; } +#endif diff --git a/tools/import-trace/RegisterImporter.hpp b/tools/import-trace/RegisterImporter.hpp index 64218097..b0e927c3 100644 --- a/tools/import-trace/RegisterImporter.hpp +++ b/tools/import-trace/RegisterImporter.hpp @@ -5,10 +5,38 @@ #include "util/CommandLine.hpp" #include "Importer.hpp" +#if defined(BUILD_CAPSTONE_DISASSEMBLER) +#include "util/capstonedisassembler/CapstoneDisassembler.hpp" +#elif defined(BUILD_LLVM_DISASSEMBLER) #include "util/llvmdisassembler/LLVMDisassembler.hpp" - +#endif class RegisterImporter : public Importer { +#if defined(BUILD_CAPSTONE_DISASSEMBLER) + bool isDisassembled = false; + std::unique_ptr disas; + fail::CapstoneToFailTranslator *m_ctof = 0; + + bool addRegisterTrace(fail::simtime_t curtime, instruction_count_t instr, + Trace_Event &ev, + const fail::CapstoneToFailTranslator::reginfo_t &info, + char access_type); + + fail::CommandLine::option_handle NO_GP, FLAGS, IP, NO_SPLIT; + bool do_gp, do_flags, do_ip, do_split_registers; + + std::set m_register_ids; + unsigned m_ip_register_id; + + // Data structures for recording failed LLVM -> FAIL* register mappings, + // including occurrence counts in the trace (to give an estimate on the + // impact) and instruction addresses (for debugging purposes). + struct RegNotFound { + uint64_t count = 0; + std::set address; + }; + std::map m_regnotfound; +#elif defined(BUILD_LLVM_DISASSEMBLER) llvm::object::Binary *binary = 0; std::unique_ptr disas; fail::LLVMtoFailTranslator *m_ltof = 0; @@ -32,6 +60,8 @@ class RegisterImporter : public Importer { std::set address; }; std::map m_regnotfound; +#endif + public: RegisterImporter() : Importer(), do_gp(true), do_flags(false), do_ip(false), diff --git a/tools/import-trace/main.cc b/tools/import-trace/main.cc index c3be1237..064eb4ad 100644 --- a/tools/import-trace/main.cc +++ b/tools/import-trace/main.cc @@ -10,12 +10,14 @@ #include "FullTraceImporter.hpp" #include "util/AliasedRegistry.hpp" +#if defined(BUILD_LLVM_DISASSEMBLER) || defined(BUILD_CAPSTONE_DISASSEMBLER) #ifdef BUILD_LLVM_DISASSEMBLER #include "llvm/Support/ManagedStatic.h" -#include "InstructionImporter.hpp" -#include "RegisterImporter.hpp" +#endif #include "RandomJumpImporter.hpp" #include "AdvancedMemoryImporter.hpp" +#include "InstructionImporter.hpp" +#include "RegisterImporter.hpp" #include "ElfImporter.hpp" #endif @@ -67,18 +69,20 @@ int main(int argc, char *argv[]) { FullTraceImporter fti; registry.add(&fti); +#if defined(BUILD_LLVM_DISASSEMBLER) || defined(BUILD_CAPSTONE_DISASSEMBLER) #ifdef BUILD_LLVM_DISASSEMBLER llvm::llvm_shutdown_obj Y; - RegisterImporter reg; - registry.add(®); - RandomJumpImporter rjump; - registry.add(&rjump); +#endif AdvancedMemoryImporter adv; registry.add(&adv); - ElfImporter elf; - registry.add(&elf); + RandomJumpImporter rjump; + registry.add(&rjump); InstructionImporter instr; registry.add(&instr); + RegisterImporter reg; + registry.add(®); + ElfImporter elf; + registry.add(&elf); #endif std::string importers = registry.getPrimeAliasesCSV();