util/llvmdisassembler: map registers by names

Internal LLVM register IDs can and did change between LLVM versions.
These magic integers are replaced by iterating over all LLVM registers
and mapping them to FAIL* registers by name.
As this iteration requires a LLVM object created from a binary, a static
convenience function is added to LLVMtoFailTranslator which creates a
translator given the binary filename. Building this functionality inside
libfail-llvmdisassembler prevents experiments from needing to add LLVM
includes and library definitions.

Change-Id: I27927f40d5cb6d9a22bb2caf21ca2450f6bcb0b8
This commit is contained in:
Florian Lukas
2014-02-26 14:33:48 +01:00
parent 21f5f681e0
commit 0799e52fde
7 changed files with 111 additions and 57 deletions

View File

@ -12,10 +12,10 @@ LLVMtoFailTranslator & LLVMDisassembler::getTranslator() {
switch ( llvm::Triple::ArchType(object->getArch()) ) { switch ( llvm::Triple::ArchType(object->getArch()) ) {
case llvm::Triple::x86: case llvm::Triple::x86:
case llvm::Triple::x86_64: case llvm::Triple::x86_64:
ltofail = new LLVMtoFailBochs; ltofail = new LLVMtoFailBochs(this);
break; break;
case llvm::Triple::arm: case llvm::Triple::arm:
ltofail = new LLVMtoFailGem5; ltofail = new LLVMtoFailGem5(this);
break; break;
default: default:
std::cout << " not supported :("; std::cout << " not supported :(";

View File

@ -1,48 +1,68 @@
#include "LLVMDisassembler.hpp"
#include "LLVMtoFailBochs.hpp" #include "LLVMtoFailBochs.hpp"
#include "sal/x86/X86Architecture.hpp" #include "sal/x86/X86Architecture.hpp"
using namespace fail; using namespace fail;
LLVMtoFailBochs::LLVMtoFailBochs() { LLVMtoFailBochs::LLVMtoFailBochs(LLVMDisassembler *disas) {
/* These magic numbers are taken from the llvm compiler (MC), they std::map<std::string, struct reginfo_t> reg_name_map;
do not appear in any header. They hopefully will never
change */
llvm_to_fail_map[1] = reginfo_t(RID_CAX, 8, 8) ; // AH
llvm_to_fail_map[2] = reginfo_t(RID_CAX, 8, 0); // AL
llvm_to_fail_map[3] = reginfo_t(RID_CAX, 16, 0); // AX
llvm_to_fail_map[43] = reginfo_t(RID_CAX, 32, 0); // EAX
llvm_to_fail_map[4] = reginfo_t(RID_CBX, 8, 8); // BH reg_name_map["AH"] = reginfo_t(RID_CAX, 8, 8);
llvm_to_fail_map[5] = reginfo_t(RID_CBX, 8, 0); // BL reg_name_map["AL"] = reginfo_t(RID_CAX, 8, 0);
llvm_to_fail_map[8] = reginfo_t(RID_CBX, 16, 0); // BX reg_name_map["AX"] = reginfo_t(RID_CAX, 16, 0);
llvm_to_fail_map[45] = reginfo_t(RID_CBX, 32, 0); // EBX reg_name_map["EAX"] = reginfo_t(RID_CAX, 32, 0);
llvm_to_fail_map[9] = reginfo_t(RID_CCX, 8, 8); // CH reg_name_map["BH"] = reginfo_t(RID_CBX, 8, 8);
llvm_to_fail_map[10] = reginfo_t(RID_CCX, 8, 0); // CL reg_name_map["BL"] = reginfo_t(RID_CBX, 8, 0);
llvm_to_fail_map[28] = reginfo_t(RID_CCX, 16, 0); // CX reg_name_map["BX"] = reginfo_t(RID_CBX, 16, 0);
llvm_to_fail_map[46] = reginfo_t(RID_CCX); // ECX reg_name_map["EBX"] = reginfo_t(RID_CBX, 32, 0);
llvm_to_fail_map[29] = reginfo_t(RID_CDX, 8, 8); // DH reg_name_map["CH"] = reginfo_t(RID_CCX, 8, 8);
llvm_to_fail_map[32] = reginfo_t(RID_CDX, 8, 0); // DL reg_name_map["CL"] = reginfo_t(RID_CCX, 8, 0);
llvm_to_fail_map[42] = reginfo_t(RID_CDX, 16, 0); // DX reg_name_map["CX"] = reginfo_t(RID_CCX, 16, 0);
llvm_to_fail_map[48] = reginfo_t(RID_CDX); // EDX reg_name_map["ECX"] = reginfo_t(RID_CCX);
llvm_to_fail_map[30] = reginfo_t(RID_CDI, 16, 0); // DI reg_name_map["DH"] = reginfo_t(RID_CDX, 8, 8);
llvm_to_fail_map[31] = reginfo_t(RID_CDI, 8, 0); // DIL reg_name_map["DL"] = reginfo_t(RID_CDX, 8, 0);
llvm_to_fail_map[47] = reginfo_t(RID_CDI); // EDI reg_name_map["DX"] = reginfo_t(RID_CDX, 16, 0);
reg_name_map["EDX"] = reginfo_t(RID_CDX);
llvm_to_fail_map[6] = reginfo_t(RID_CBP, 16, 0); // BP reg_name_map["DI"] = reginfo_t(RID_CDI, 16, 0);
llvm_to_fail_map[7] = reginfo_t(RID_CBP, 8, 0); // BPL reg_name_map["DIL"] = reginfo_t(RID_CDI, 8, 0);
llvm_to_fail_map[44] = reginfo_t(RID_CBP); // EBP reg_name_map["EDI"] = reginfo_t(RID_CDI);
llvm_to_fail_map[49] = reginfo_t(RID_FLAGS); // EFLAGS reg_name_map["BP"] = reginfo_t(RID_CBP, 16, 0);
reg_name_map["BPL"] = reginfo_t(RID_CBP, 8, 0);
reg_name_map["EBP"] = reginfo_t(RID_CBP);
llvm_to_fail_map[50] = reginfo_t(RID_PC); // EIP reg_name_map["EFLAGS"] = reginfo_t(RID_FLAGS);
llvm_to_fail_map[115] = reginfo_t(RID_CSI, 16, 0); // SI reg_name_map["EIP"] = reginfo_t(RID_PC);
llvm_to_fail_map[53] = reginfo_t(RID_CSI); // ESI
llvm_to_fail_map[54] = reginfo_t(RID_CSP); // ESP reg_name_map["SI"] = reginfo_t(RID_CSI, 16, 0);
llvm_to_fail_map[117] = reginfo_t(RID_CSP, 16, 0); // SP reg_name_map["ESI"] = reginfo_t(RID_CSI);
llvm_to_fail_map[118] = reginfo_t(RID_CSP, 8, 0); // SPL
reg_name_map["ESP"] = reginfo_t(RID_CSP);
reg_name_map["SP"] = reginfo_t(RID_CSP, 16, 0);
reg_name_map["SPL"] = reginfo_t(RID_CSP, 8, 0);
reg_name_map["CR0"] = reginfo_t(RID_CR0);
reg_name_map["CR2"] = reginfo_t(RID_CR2);
reg_name_map["CR3"] = reginfo_t(RID_CR3);
reg_name_map["CR4"] = reginfo_t(RID_CR4);
reg_name_map["CS"] = reginfo_t(RID_CS, 16, 0);
reg_name_map["DS"] = reginfo_t(RID_DS, 16, 0);
reg_name_map["ES"] = reginfo_t(RID_ES, 16, 0);
reg_name_map["FS"] = reginfo_t(RID_FS, 16, 0);
reg_name_map["GS"] = reginfo_t(RID_GS, 16, 0);
reg_name_map["SS"] = reginfo_t(RID_SS, 16, 0);
const llvm::MCRegisterInfo &reg_info = disas->getRegisterInfo();
for (unsigned int i = 0; i < reg_info.getNumRegs(); ++i){
std::string name = reg_info.getName(i);
if (reg_name_map.count(name) > 0) {
llvm_to_fail_map[i] = reg_name_map[name];
}
}
} }

View File

@ -7,11 +7,13 @@
namespace fail { namespace fail {
class LLVMDisassembler;
class LLVMtoFailBochs : public LLVMtoFailTranslator { class LLVMtoFailBochs : public LLVMtoFailTranslator {
public: public:
LLVMtoFailBochs(); LLVMtoFailBochs(LLVMDisassembler *disas);
}; };
} // end of namespace } // end of namespace

View File

@ -1,26 +1,34 @@
#include "LLVMDisassembler.hpp"
#include "LLVMtoFailGem5.hpp" #include "LLVMtoFailGem5.hpp"
#include "sal/arm/ArmArchitecture.hpp" #include "sal/arm/ArmArchitecture.hpp"
using namespace fail; using namespace fail;
LLVMtoFailGem5::LLVMtoFailGem5() { LLVMtoFailGem5::LLVMtoFailGem5(LLVMDisassembler *disas) {
/* These magic numbers are taken from the machine descriptions of std::map<std::string, struct reginfo_t> reg_name_map;
LLVM they (hopefully) will not change, since they are not exported
via a header */ reg_name_map["R0"] = reginfo_t(RI_R0);
llvm_to_fail_map[60] = reginfo_t(RI_R0); reg_name_map["R1"] = reginfo_t(RI_R1);
llvm_to_fail_map[61] = reginfo_t(RI_R1); reg_name_map["R2"] = reginfo_t(RI_R2);
llvm_to_fail_map[62] = reginfo_t(RI_R2); reg_name_map["R3"] = reginfo_t(RI_R3);
llvm_to_fail_map[63] = reginfo_t(RI_R3); reg_name_map["R4"] = reginfo_t(RI_R4);
llvm_to_fail_map[64] = reginfo_t(RI_R4); reg_name_map["R5"] = reginfo_t(RI_R5);
llvm_to_fail_map[65] = reginfo_t(RI_R5); reg_name_map["R6"] = reginfo_t(RI_R6);
llvm_to_fail_map[66] = reginfo_t(RI_R6); reg_name_map["R7"] = reginfo_t(RI_R7);
llvm_to_fail_map[67] = reginfo_t(RI_R7); reg_name_map["R8"] = reginfo_t(RI_R8);
llvm_to_fail_map[68] = reginfo_t(RI_R8); reg_name_map["R9"] = reginfo_t(RI_R9);
llvm_to_fail_map[69] = reginfo_t(RI_R9); reg_name_map["R10"] = reginfo_t(RI_R10);
llvm_to_fail_map[70] = reginfo_t(RI_R10); reg_name_map["R11"] = reginfo_t(RI_R11);
llvm_to_fail_map[71] = reginfo_t(RI_R11); reg_name_map["R12"] = reginfo_t(RI_R12);
llvm_to_fail_map[72] = reginfo_t(RI_R12); reg_name_map["SP"] = reginfo_t(RI_SP);
llvm_to_fail_map[105] = reginfo_t(RI_SP); reg_name_map["LR"] = reginfo_t(RI_LR);
llvm_to_fail_map[40] = reginfo_t(RI_LR); reg_name_map["PC"] = reginfo_t(RI_IP);
llvm_to_fail_map[43] = reginfo_t(RI_IP);
const llvm::MCRegisterInfo &reg_info = disas->getRegisterInfo();
for (unsigned int i = 0; i < reg_info.getNumRegs(); ++i){
std::string name = reg_info.getName(i);
if (reg_name_map.count(name) > 0) {
llvm_to_fail_map[i] = reg_name_map[name];
}
}
} }

View File

@ -7,11 +7,13 @@
namespace fail { namespace fail {
class LLVMDisassembler;
class LLVMtoFailGem5 : public LLVMtoFailTranslator { class LLVMtoFailGem5 : public LLVMtoFailTranslator {
public: public:
LLVMtoFailGem5(); LLVMtoFailGem5(LLVMDisassembler *disas);
}; };
} // end of namespace } // end of namespace

View File

@ -1,7 +1,10 @@
#include "LLVMDisassembler.hpp"
#include "LLVMtoFailTranslator.hpp" #include "LLVMtoFailTranslator.hpp"
#include "sal/SALInst.hpp" #include "sal/SALInst.hpp"
using namespace fail; using namespace fail;
using namespace llvm;
using namespace llvm::object;
const LLVMtoFailTranslator::reginfo_t & LLVMtoFailTranslator::getFailRegisterID(unsigned int regid) { const LLVMtoFailTranslator::reginfo_t & LLVMtoFailTranslator::getFailRegisterID(unsigned int regid) {
ltof_map_t::iterator it = llvm_to_fail_map.find(regid); ltof_map_t::iterator it = llvm_to_fail_map.find(regid);
@ -38,3 +41,20 @@ void LLVMtoFailTranslator::setRegisterContent(ConcreteCPU & cpu, const reginfo_t
cpu.setRegisterContent( reg, value ); // write back register content cpu.setRegisterContent( reg, value ); // write back register content
} }
LLVMtoFailTranslator* LLVMtoFailTranslator::createFromBinary(const std::string elf_path) {
llvm_shutdown_obj Y;
llvm::InitializeAllTargetInfos();
llvm::InitializeAllTargetMCs();
llvm::InitializeAllDisassemblers();
OwningPtr<Binary> binary;
assert(createBinary(elf_path, binary) == 0);
#ifndef __puma
LLVMDisassembler disas(dyn_cast<ObjectFile>(binary.get()));
return &disas.getTranslator();
#else
return 0;
#endif
}

View File

@ -68,6 +68,8 @@ public:
int getFailRegisterId(unsigned int regid) { return this->getFailRegisterID(regid).id; }; int getFailRegisterId(unsigned int regid) { return this->getFailRegisterID(regid).id; };
reginfo_t notfound; reginfo_t notfound;
static LLVMtoFailTranslator* createFromBinary(const std::string elf_path);
}; };
} // end of namespace } // end of namespace