Adding gem5 source to svn.

git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1819 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
2012-10-24 19:18:57 +00:00
parent f7ff71bd46
commit b41eec3f65
3222 changed files with 658579 additions and 1 deletions
--- a/simulators/gem5/src/cpu/simple/AtomicSimpleCPU.py
+++ b/simulators/gem5/src/cpu/simple/AtomicSimpleCPU.py
@ -0,0 +1,49 @@
+# Copyright (c) 2012 ARM Limited
+# All rights reserved.
+#
+# The license below extends only to copyright in the software and shall
+# not be construed as granting a license to any other intellectual
+# property including but not limited to intellectual property relating
+# to a hardware implementation of the functionality of the software
+# licensed hereunder.  You may use the software subject to the license
+# terms below provided that you ensure that this notice is replicated
+# unmodified and in its entirety in all distributions of the software,
+# modified or unmodified, in source code or in binary form.
+#
+# Copyright (c) 2007 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+from m5.params import *
+from BaseSimpleCPU import BaseSimpleCPU
+
+class AtomicSimpleCPU(BaseSimpleCPU):
+    type = 'AtomicSimpleCPU'
+    width = Param.Int(1, "CPU width")
+    simulate_data_stalls = Param.Bool(False, "Simulate dcache stall cycles")
+    simulate_inst_stalls = Param.Bool(False, "Simulate icache stall cycles")
+    fastmem = Param.Bool(False, "Access memory directly")
--- a/simulators/gem5/src/cpu/simple/BaseSimpleCPU.py
+++ b/simulators/gem5/src/cpu/simple/BaseSimpleCPU.py
@ -0,0 +1,47 @@
+# Copyright (c) 2008 The Hewlett-Packard Development Company
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Gabe Black
+
+from m5.defines import buildEnv
+from m5.params import *
+from BaseCPU import BaseCPU
+from DummyChecker import DummyChecker
+
+class BaseSimpleCPU(BaseCPU):
+    type = 'BaseSimpleCPU'
+    abstract = True
+
+    def addCheckerCpu(self):
+        if buildEnv['TARGET_ISA'] in ['arm']:
+            from ArmTLB import ArmTLB
+
+            self.checker = DummyChecker(workload = self.workload)
+            self.checker.itb = ArmTLB(size = self.itb.size)
+            self.checker.dtb = ArmTLB(size = self.dtb.size)
+        else:
+            print "ERROR: Checker only supported under ARM ISA!"
+            exit(1)
--- a/simulators/gem5/src/cpu/simple/SConscript
+++ b/simulators/gem5/src/cpu/simple/SConscript
@ -0,0 +1,50 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+Import('*')
+
+need_simple_base = False
+if 'AtomicSimpleCPU' in env['CPU_MODELS']:
+    need_simple_base = True
+    SimObject('AtomicSimpleCPU.py')
+    Source('atomic.cc')
+
+if 'TimingSimpleCPU' in env['CPU_MODELS']:
+    need_simple_base = True
+    SimObject('TimingSimpleCPU.py')
+    Source('timing.cc')
+
+if 'AtomicSimpleCPU' in env['CPU_MODELS'] or \
+       'TimingSimpleCPU' in env['CPU_MODELS']:
+    DebugFlag('SimpleCPU')
+
+if need_simple_base:
+    Source('base.cc')
+    SimObject('BaseSimpleCPU.py')
--- a/simulators/gem5/src/cpu/simple/SConsopts
+++ b/simulators/gem5/src/cpu/simple/SConsopts
@ -0,0 +1,40 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2006 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+Import('*')
+
+CpuModel('AtomicSimpleCPU', 'atomic_simple_cpu_exec.cc',
+         '#include "cpu/simple/atomic.hh"',
+         { 'CPU_exec_context': 'AtomicSimpleCPU' },
+         default=True)
+CpuModel('TimingSimpleCPU', 'timing_simple_cpu_exec.cc',
+         '#include "cpu/simple/timing.hh"',
+         { 'CPU_exec_context': 'TimingSimpleCPU' },
+         default=True)
--- a/simulators/gem5/src/cpu/simple/TimingSimpleCPU.py
+++ b/simulators/gem5/src/cpu/simple/TimingSimpleCPU.py
@ -0,0 +1,33 @@
+# Copyright (c) 2007 The Regents of The University of Michigan
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Nathan Binkert
+
+from m5.params import *
+from BaseSimpleCPU import BaseSimpleCPU
+
+class TimingSimpleCPU(BaseSimpleCPU):
+    type = 'TimingSimpleCPU'
--- a/simulators/gem5/src/cpu/simple/atomic.cc
+++ b/simulators/gem5/src/cpu/simple/atomic.cc
@ -0,0 +1,561 @@
+/*
+ * Copyright (c) 2012 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Reinhardt
+ */
+
+#include "arch/locked_mem.hh"
+#include "arch/mmapped_ipr.hh"
+#include "arch/utility.hh"
+#include "base/bigint.hh"
+#include "config/the_isa.hh"
+#include "cpu/simple/atomic.hh"
+#include "cpu/exetrace.hh"
+#include "debug/ExecFaulting.hh"
+#include "debug/SimpleCPU.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
+#include "mem/physical.hh"
+#include "params/AtomicSimpleCPU.hh"
+#include "sim/faults.hh"
+#include "sim/system.hh"
+#include "sim/full_system.hh"
+
+using namespace std;
+using namespace TheISA;
+
+AtomicSimpleCPU::TickEvent::TickEvent(AtomicSimpleCPU *c)
+    : Event(CPU_Tick_Pri), cpu(c)
+{
+}
+
+
+void
+AtomicSimpleCPU::TickEvent::process()
+{
+    cpu->tick();
+}
+
+const char *
+AtomicSimpleCPU::TickEvent::description() const
+{
+    return "AtomicSimpleCPU tick";
+}
+
+void
+AtomicSimpleCPU::init()
+{
+    BaseCPU::init();
+
+    // Initialise the ThreadContext's memory proxies
+    tcBase()->initMemProxies(tcBase());
+
+    if (FullSystem && !params()->defer_registration) {
+        ThreadID size = threadContexts.size();
+        for (ThreadID i = 0; i < size; ++i) {
+            ThreadContext *tc = threadContexts[i];
+            // initialize CPU, including PC
+            TheISA::initCPU(tc, tc->contextId());
+        }
+    }
+
+    // Atomic doesn't do MT right now, so contextId == threadId
+    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
+    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+}
+
+AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p)
+    : BaseSimpleCPU(p), tickEvent(this), width(p->width), locked(false),
+      simulate_data_stalls(p->simulate_data_stalls),
+      simulate_inst_stalls(p->simulate_inst_stalls),
+      icachePort(name() + "-iport", this), dcachePort(name() + "-iport", this),
+      fastmem(p->fastmem)
+{
+    _status = Idle;
+}
+
+
+AtomicSimpleCPU::~AtomicSimpleCPU()
+{
+    if (tickEvent.scheduled()) {
+        deschedule(tickEvent);
+    }
+}
+
+void
+AtomicSimpleCPU::serialize(ostream &os)
+{
+    SimObject::State so_state = SimObject::getState();
+    SERIALIZE_ENUM(so_state);
+    SERIALIZE_SCALAR(locked);
+    BaseSimpleCPU::serialize(os);
+    nameOut(os, csprintf("%s.tickEvent", name()));
+    tickEvent.serialize(os);
+}
+
+void
+AtomicSimpleCPU::unserialize(Checkpoint *cp, const string &section)
+{
+    SimObject::State so_state;
+    UNSERIALIZE_ENUM(so_state);
+    UNSERIALIZE_SCALAR(locked);
+    BaseSimpleCPU::unserialize(cp, section);
+    tickEvent.unserialize(cp, csprintf("%s.tickEvent", section));
+}
+
+void
+AtomicSimpleCPU::resume()
+{
+    if (_status == Idle || _status == SwitchedOut)
+        return;
+
+    DPRINTF(SimpleCPU, "Resume\n");
+    assert(system->getMemoryMode() == Enums::atomic);
+
+    changeState(SimObject::Running);
+    if (thread->status() == ThreadContext::Active) {
+        if (!tickEvent.scheduled())
+            schedule(tickEvent, nextCycle());
+    }
+    system->totalNumInsts = 0;
+}
+
+void
+AtomicSimpleCPU::switchOut()
+{
+    assert(_status == Running || _status == Idle);
+    _status = SwitchedOut;
+
+    tickEvent.squash();
+}
+
+
+void
+AtomicSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
+{
+    BaseCPU::takeOverFrom(oldCPU);
+
+    assert(!tickEvent.scheduled());
+
+    // if any of this CPU's ThreadContexts are active, mark the CPU as
+    // running and schedule its tick event.
+    ThreadID size = threadContexts.size();
+    for (ThreadID i = 0; i < size; ++i) {
+        ThreadContext *tc = threadContexts[i];
+        if (tc->status() == ThreadContext::Active && _status != Running) {
+            _status = Running;
+            schedule(tickEvent, nextCycle());
+            break;
+        }
+    }
+    if (_status != Running) {
+        _status = Idle;
+    }
+    assert(threadContexts.size() == 1);
+    ifetch_req.setThreadContext(_cpuId, 0); // Add thread ID if we add MT
+    data_read_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+    data_write_req.setThreadContext(_cpuId, 0); // Add thread ID here too
+}
+
+
+void
+AtomicSimpleCPU::activateContext(ThreadID thread_num, int delay)
+{
+    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
+
+    assert(thread_num == 0);
+    assert(thread);
+
+    assert(_status == Idle);
+    assert(!tickEvent.scheduled());
+
+    notIdleFraction++;
+    numCycles += tickToCycles(thread->lastActivate - thread->lastSuspend);
+
+    //Make sure ticks are still on multiples of cycles
+    schedule(tickEvent, nextCycle(curTick() + ticks(delay)));
+    _status = Running;
+}
+
+
+void
+AtomicSimpleCPU::suspendContext(ThreadID thread_num)
+{
+    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
+
+    assert(thread_num == 0);
+    assert(thread);
+
+    if (_status == Idle)
+        return;
+
+    assert(_status == Running);
+
+    // tick event may not be scheduled if this gets called from inside
+    // an instruction's execution, e.g. "quiesce"
+    if (tickEvent.scheduled())
+        deschedule(tickEvent);
+
+    notIdleFraction--;
+    _status = Idle;
+}
+
+
+Fault
+AtomicSimpleCPU::readMem(Addr addr, uint8_t * data,
+                         unsigned size, unsigned flags)
+{
+    // use the CPU's statically allocated read request and packet objects
+    Request *req = &data_read_req;
+
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
+    //The block size of our peer.
+    unsigned blockSize = dcachePort.peerBlockSize();
+    //The size of the data we're trying to read.
+    int fullSize = size;
+
+    //The address of the second part of this access if it needs to be split
+    //across a cache line boundary.
+    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+
+    if (secondAddr > addr)
+        size = secondAddr - addr;
+
+    dcache_latency = 0;
+
+    while (1) {
+        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
+
+        // translate to physical address
+        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Read);
+
+        // Now do the access.
+        if (fault == NoFault && !req->getFlags().isSet(Request::NO_ACCESS)) {
+            Packet pkt = Packet(req,
+                                req->isLLSC() ? MemCmd::LoadLockedReq :
+                                MemCmd::ReadReq);
+            pkt.dataStatic(data);
+
+            if (req->isMmappedIpr())
+                dcache_latency += TheISA::handleIprRead(thread->getTC(), &pkt);
+            else {
+                if (fastmem && system->isMemAddr(pkt.getAddr()))
+                    system->getPhysMem().access(&pkt);
+                else
+                    dcache_latency += dcachePort.sendAtomic(&pkt);
+            }
+            dcache_access = true;
+
+            assert(!pkt.isError());
+
+            if (req->isLLSC()) {
+                TheISA::handleLockedRead(thread, req);
+            }
+        }
+
+        //If there's a fault, return it
+        if (fault != NoFault) {
+            if (req->isPrefetch()) {
+                return NoFault;
+            } else {
+                return fault;
+            }
+        }
+
+        //If we don't need to access a second cache line, stop now.
+        if (secondAddr <= addr)
+        {
+            if (req->isLocked() && fault == NoFault) {
+                assert(!locked);
+                locked = true;
+            }
+            return fault;
+        }
+
+        /*
+         * Set up for accessing the second cache line.
+         */
+
+        //Move the pointer we're reading into to the correct location.
+        data += size;
+        //Adjust the size to get the remaining bytes.
+        size = addr + fullSize - secondAddr;
+        //And access the right address.
+        addr = secondAddr;
+    }
+}
+
+
+Fault
+AtomicSimpleCPU::writeMem(uint8_t *data, unsigned size,
+                          Addr addr, unsigned flags, uint64_t *res)
+{
+    // use the CPU's statically allocated write request and packet objects
+    Request *req = &data_write_req;
+
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
+    //The block size of our peer.
+    unsigned blockSize = dcachePort.peerBlockSize();
+    //The size of the data we're trying to read.
+    int fullSize = size;
+
+    //The address of the second part of this access if it needs to be split
+    //across a cache line boundary.
+    Addr secondAddr = roundDown(addr + size - 1, blockSize);
+
+    if(secondAddr > addr)
+        size = secondAddr - addr;
+
+    dcache_latency = 0;
+
+    while(1) {
+        req->setVirt(0, addr, size, flags, dataMasterId(), thread->pcState().instAddr());
+
+        // translate to physical address
+        Fault fault = thread->dtb->translateAtomic(req, tc, BaseTLB::Write);
+
+        // Now do the access.
+        if (fault == NoFault) {
+            MemCmd cmd = MemCmd::WriteReq; // default
+            bool do_access = true;  // flag to suppress cache access
+
+            if (req->isLLSC()) {
+                cmd = MemCmd::StoreCondReq;
+                do_access = TheISA::handleLockedWrite(thread, req);
+            } else if (req->isSwap()) {
+                cmd = MemCmd::SwapReq;
+                if (req->isCondSwap()) {
+                    assert(res);
+                    req->setExtraData(*res);
+                }
+            }
+
+            if (do_access && !req->getFlags().isSet(Request::NO_ACCESS)) {
+                Packet pkt = Packet(req, cmd);
+                pkt.dataStatic(data);
+
+                if (req->isMmappedIpr()) {
+                    dcache_latency +=
+                        TheISA::handleIprWrite(thread->getTC(), &pkt);
+                } else {
+                    if (fastmem && system->isMemAddr(pkt.getAddr()))
+                        system->getPhysMem().access(&pkt);
+                    else
+                        dcache_latency += dcachePort.sendAtomic(&pkt);
+                }
+                dcache_access = true;
+                assert(!pkt.isError());
+
+                if (req->isSwap()) {
+                    assert(res);
+                    memcpy(res, pkt.getPtr<uint8_t>(), fullSize);
+                }
+            }
+
+            if (res && !req->isSwap()) {
+                *res = req->getExtraData();
+            }
+        }
+
+        //If there's a fault or we don't need to access a second cache line,
+        //stop now.
+        if (fault != NoFault || secondAddr <= addr)
+        {
+            if (req->isLocked() && fault == NoFault) {
+                assert(locked);
+                locked = false;
+            }
+            if (fault != NoFault && req->isPrefetch()) {
+                return NoFault;
+            } else {
+                return fault;
+            }
+        }
+
+        /*
+         * Set up for accessing the second cache line.
+         */
+
+        //Move the pointer we're reading into to the correct location.
+        data += size;
+        //Adjust the size to get the remaining bytes.
+        size = addr + fullSize - secondAddr;
+        //And access the right address.
+        addr = secondAddr;
+    }
+}
+
+
+void
+AtomicSimpleCPU::tick()
+{
+    DPRINTF(SimpleCPU, "Tick\n");
+
+    Tick latency = 0;
+
+    for (int i = 0; i < width || locked; ++i) {
+        numCycles++;
+
+        if (!curStaticInst || !curStaticInst->isDelayedCommit())
+            checkForInterrupts();
+
+        checkPcEventQueue();
+        // We must have just got suspended by a PC event
+        if (_status == Idle)
+            return;
+
+        Fault fault = NoFault;
+
+        TheISA::PCState pcState = thread->pcState();
+
+        bool needToFetch = !isRomMicroPC(pcState.microPC()) &&
+                           !curMacroStaticInst;
+        if (needToFetch) {
+            setupFetchRequest(&ifetch_req);
+            fault = thread->itb->translateAtomic(&ifetch_req, tc,
+                                                 BaseTLB::Execute);
+        }
+
+        if (fault == NoFault) {
+            Tick icache_latency = 0;
+            bool icache_access = false;
+            dcache_access = false; // assume no dcache access
+
+            if (needToFetch) {
+                // This is commented out because the decoder would act like
+                // a tiny cache otherwise. It wouldn't be flushed when needed
+                // like the I cache. It should be flushed, and when that works
+                // this code should be uncommented.
+                //Fetch more instruction memory if necessary
+                //if(decoder.needMoreBytes())
+                //{
+                    icache_access = true;
+                    Packet ifetch_pkt = Packet(&ifetch_req, MemCmd::ReadReq);
+                    ifetch_pkt.dataStatic(&inst);
+
+                    if (fastmem && system->isMemAddr(ifetch_pkt.getAddr()))
+                        system->getPhysMem().access(&ifetch_pkt);
+                    else
+                        icache_latency = icachePort.sendAtomic(&ifetch_pkt);
+
+                    assert(!ifetch_pkt.isError());
+
+                    // ifetch_req is initialized to read the instruction directly
+                    // into the CPU object's inst field.
+                //}
+            }
+
+            preExecute();
+
+            if (curStaticInst) {
+                fault = curStaticInst->execute(this, traceData);
+
+                // keep an instruction count
+                if (fault == NoFault)
+                    countInst();
+                else if (traceData && !DTRACE(ExecFaulting)) {
+                    delete traceData;
+                    traceData = NULL;
+                }
+
+                postExecute();
+            }
+
+            // @todo remove me after debugging with legion done
+            if (curStaticInst && (!curStaticInst->isMicroop() ||
+                        curStaticInst->isFirstMicroop()))
+                instCnt++;
+
+            Tick stall_ticks = 0;
+            if (simulate_inst_stalls && icache_access)
+                stall_ticks += icache_latency;
+
+            if (simulate_data_stalls && dcache_access)
+                stall_ticks += dcache_latency;
+
+            if (stall_ticks) {
+                Tick stall_cycles = stall_ticks / ticks(1);
+                Tick aligned_stall_ticks = ticks(stall_cycles);
+
+                if (aligned_stall_ticks < stall_ticks)
+                    aligned_stall_ticks += 1;
+
+                latency += aligned_stall_ticks;
+            }
+
+        }
+        if(fault != NoFault || !stayAtPC)
+            advancePC(fault);
+    }
+
+    // instruction takes at least one cycle
+    if (latency < ticks(1))
+        latency = ticks(1);
+
+    if (_status != Idle)
+        schedule(tickEvent, curTick() + latency);
+}
+
+
+void
+AtomicSimpleCPU::printAddr(Addr a)
+{
+    dcachePort.printAddr(a);
+}
+
+
+////////////////////////////////////////////////////////////////////////
+//
+//  AtomicSimpleCPU Simulation Object
+//
+AtomicSimpleCPU *
+AtomicSimpleCPUParams::create()
+{
+    numThreads = 1;
+    if (!FullSystem && workload.size() != 1)
+        panic("only one workload allowed");
+    return new AtomicSimpleCPU(this);
+}
--- a/simulators/gem5/src/cpu/simple/atomic.hh
+++ b/simulators/gem5/src/cpu/simple/atomic.hh
@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2012 ARM Limited
+ * All rights reserved.
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Reinhardt
+ */
+
+#ifndef __CPU_SIMPLE_ATOMIC_HH__
+#define __CPU_SIMPLE_ATOMIC_HH__
+
+#include "cpu/simple/base.hh"
+#include "params/AtomicSimpleCPU.hh"
+
+class AtomicSimpleCPU : public BaseSimpleCPU
+{
+  public:
+
+    AtomicSimpleCPU(AtomicSimpleCPUParams *params);
+    virtual ~AtomicSimpleCPU();
+
+    virtual void init();
+
+  private:
+
+    struct TickEvent : public Event
+    {
+        AtomicSimpleCPU *cpu;
+
+        TickEvent(AtomicSimpleCPU *c);
+        void process();
+        const char *description() const;
+    };
+
+    TickEvent tickEvent;
+
+    const int width;
+    bool locked;
+    const bool simulate_data_stalls;
+    const bool simulate_inst_stalls;
+
+    // main simulation loop (one cycle)
+    void tick();
+
+    /**
+     * An AtomicCPUPort overrides the default behaviour of the
+     * recvAtomic and ignores the packet instead of panicking.
+     */
+    class AtomicCPUPort : public CpuPort
+    {
+
+      public:
+
+        AtomicCPUPort(const std::string &_name, BaseCPU* _cpu)
+            : CpuPort(_name, _cpu)
+        { }
+
+      protected:
+
+        virtual Tick recvAtomicSnoop(PacketPtr pkt)
+        {
+            // Snooping a coherence request, just return
+            return 0;
+        }
+
+    };
+
+    AtomicCPUPort icachePort;
+    AtomicCPUPort dcachePort;
+
+    bool fastmem;
+    Request ifetch_req;
+    Request data_read_req;
+    Request data_write_req;
+
+    bool dcache_access;
+    Tick dcache_latency;
+
+  protected:
+
+    /** Return a reference to the data port. */
+    virtual CpuPort &getDataPort() { return dcachePort; }
+
+    /** Return a reference to the instruction port. */
+    virtual CpuPort &getInstPort() { return icachePort; }
+
+  public:
+
+    virtual void serialize(std::ostream &os);
+    virtual void unserialize(Checkpoint *cp, const std::string &section);
+    virtual void resume();
+
+    void switchOut();
+    void takeOverFrom(BaseCPU *oldCPU);
+
+    virtual void activateContext(ThreadID thread_num, int delay);
+    virtual void suspendContext(ThreadID thread_num);
+
+    Fault readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags);
+
+    Fault writeMem(uint8_t *data, unsigned size,
+                   Addr addr, unsigned flags, uint64_t *res);
+
+    /**
+     * Print state of address in memory system via PrintReq (for
+     * debugging).
+     */
+    void printAddr(Addr a);
+};
+
+#endif // __CPU_SIMPLE_ATOMIC_HH__
--- a/simulators/gem5/src/cpu/simple/base.cc
+++ b/simulators/gem5/src/cpu/simple/base.cc
@ -0,0 +1,554 @@
+/*
+ * Copyright (c) 2010-2011 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Reinhardt
+ */
+
+#include "arch/kernel_stats.hh"
+#include "arch/stacktrace.hh"
+#include "arch/tlb.hh"
+#include "arch/utility.hh"
+#include "arch/vtophys.hh"
+#include "base/loader/symtab.hh"
+#include "base/cp_annotate.hh"
+#include "base/cprintf.hh"
+#include "base/inifile.hh"
+#include "base/misc.hh"
+#include "base/pollevent.hh"
+#include "base/range.hh"
+#include "base/trace.hh"
+#include "base/types.hh"
+#include "config/the_isa.hh"
+#include "cpu/simple/base.hh"
+#include "cpu/base.hh"
+#include "cpu/checker/cpu.hh"
+#include "cpu/checker/thread_context.hh"
+#include "cpu/exetrace.hh"
+#include "cpu/profile.hh"
+#include "cpu/simple_thread.hh"
+#include "cpu/smt.hh"
+#include "cpu/static_inst.hh"
+#include "cpu/thread_context.hh"
+#include "debug/Decode.hh"
+#include "debug/Fetch.hh"
+#include "debug/Quiesce.hh"
+#include "mem/mem_object.hh"
+#include "mem/packet.hh"
+#include "mem/request.hh"
+#include "params/BaseSimpleCPU.hh"
+#include "sim/byteswap.hh"
+#include "sim/debug.hh"
+#include "sim/faults.hh"
+#include "sim/full_system.hh"
+#include "sim/sim_events.hh"
+#include "sim/sim_object.hh"
+#include "sim/stats.hh"
+#include "sim/system.hh"
+
+using namespace std;
+using namespace TheISA;
+
+BaseSimpleCPU::BaseSimpleCPU(BaseSimpleCPUParams *p)
+    : BaseCPU(p), traceData(NULL), thread(NULL)
+{
+    if (FullSystem)
+        thread = new SimpleThread(this, 0, p->system, p->itb, p->dtb);
+    else
+        thread = new SimpleThread(this, /* thread_num */ 0, p->system,
+                p->workload[0], p->itb, p->dtb);
+
+    thread->setStatus(ThreadContext::Halted);
+
+    tc = thread->getTC();
+
+    if (p->checker) {
+        BaseCPU *temp_checker = p->checker;
+        checker = dynamic_cast<CheckerCPU *>(temp_checker);
+        checker->setSystem(p->system);
+        // Manipulate thread context
+        ThreadContext *cpu_tc = tc;
+        tc = new CheckerThreadContext<ThreadContext>(cpu_tc, this->checker);
+    } else {
+        checker = NULL;
+    }
+
+    numInst = 0;
+    startNumInst = 0;
+    numOp = 0;
+    startNumOp = 0;
+    numLoad = 0;
+    startNumLoad = 0;
+    lastIcacheStall = 0;
+    lastDcacheStall = 0;
+
+    threadContexts.push_back(tc);
+
+
+    fetchOffset = 0;
+    stayAtPC = false;
+}
+
+BaseSimpleCPU::~BaseSimpleCPU()
+{
+}
+
+void
+BaseSimpleCPU::deallocateContext(ThreadID thread_num)
+{
+    // for now, these are equivalent
+    suspendContext(thread_num);
+}
+
+
+void
+BaseSimpleCPU::haltContext(ThreadID thread_num)
+{
+    // for now, these are equivalent
+    suspendContext(thread_num);
+}
+
+
+void
+BaseSimpleCPU::regStats()
+{
+    using namespace Stats;
+
+    BaseCPU::regStats();
+
+    numInsts
+        .name(name() + ".committedInsts")
+        .desc("Number of instructions committed")
+        ;
+
+    numOps
+        .name(name() + ".committedOps")
+        .desc("Number of ops (including micro ops) committed")
+        ;
+
+    numIntAluAccesses
+        .name(name() + ".num_int_alu_accesses")
+        .desc("Number of integer alu accesses")
+        ;
+
+    numFpAluAccesses
+        .name(name() + ".num_fp_alu_accesses")
+        .desc("Number of float alu accesses")
+        ;
+
+    numCallsReturns
+        .name(name() + ".num_func_calls")
+        .desc("number of times a function call or return occured")
+        ;
+
+    numCondCtrlInsts
+        .name(name() + ".num_conditional_control_insts")
+        .desc("number of instructions that are conditional controls")
+        ;
+
+    numIntInsts
+        .name(name() + ".num_int_insts")
+        .desc("number of integer instructions")
+        ;
+
+    numFpInsts
+        .name(name() + ".num_fp_insts")
+        .desc("number of float instructions")
+        ;
+
+    numIntRegReads
+        .name(name() + ".num_int_register_reads")
+        .desc("number of times the integer registers were read")
+        ;
+
+    numIntRegWrites
+        .name(name() + ".num_int_register_writes")
+        .desc("number of times the integer registers were written")
+        ;
+
+    numFpRegReads
+        .name(name() + ".num_fp_register_reads")
+        .desc("number of times the floating registers were read")
+        ;
+
+    numFpRegWrites
+        .name(name() + ".num_fp_register_writes")
+        .desc("number of times the floating registers were written")
+        ;
+
+    numMemRefs
+        .name(name()+".num_mem_refs")
+        .desc("number of memory refs")
+        ;
+
+    numStoreInsts
+        .name(name() + ".num_store_insts")
+        .desc("Number of store instructions")
+        ;
+
+    numLoadInsts
+        .name(name() + ".num_load_insts")
+        .desc("Number of load instructions")
+        ;
+
+    notIdleFraction
+        .name(name() + ".not_idle_fraction")
+        .desc("Percentage of non-idle cycles")
+        ;
+
+    idleFraction
+        .name(name() + ".idle_fraction")
+        .desc("Percentage of idle cycles")
+        ;
+
+    numBusyCycles
+        .name(name() + ".num_busy_cycles")
+        .desc("Number of busy cycles")
+        ;
+
+    numIdleCycles
+        .name(name()+".num_idle_cycles")
+        .desc("Number of idle cycles")
+        ;
+
+    icacheStallCycles
+        .name(name() + ".icache_stall_cycles")
+        .desc("ICache total stall cycles")
+        .prereq(icacheStallCycles)
+        ;
+
+    dcacheStallCycles
+        .name(name() + ".dcache_stall_cycles")
+        .desc("DCache total stall cycles")
+        .prereq(dcacheStallCycles)
+        ;
+
+    icacheRetryCycles
+        .name(name() + ".icache_retry_cycles")
+        .desc("ICache total retry cycles")
+        .prereq(icacheRetryCycles)
+        ;
+
+    dcacheRetryCycles
+        .name(name() + ".dcache_retry_cycles")
+        .desc("DCache total retry cycles")
+        .prereq(dcacheRetryCycles)
+        ;
+
+    idleFraction = constant(1.0) - notIdleFraction;
+    numIdleCycles = idleFraction * numCycles;
+    numBusyCycles = (notIdleFraction)*numCycles;
+}
+
+void
+BaseSimpleCPU::resetStats()
+{
+//    startNumInst = numInst;
+     notIdleFraction = (_status != Idle);
+}
+
+void
+BaseSimpleCPU::serialize(ostream &os)
+{
+    SERIALIZE_ENUM(_status);
+    BaseCPU::serialize(os);
+//    SERIALIZE_SCALAR(inst);
+    nameOut(os, csprintf("%s.xc.0", name()));
+    thread->serialize(os);
+}
+
+void
+BaseSimpleCPU::unserialize(Checkpoint *cp, const string &section)
+{
+    UNSERIALIZE_ENUM(_status);
+    BaseCPU::unserialize(cp, section);
+//    UNSERIALIZE_SCALAR(inst);
+    thread->unserialize(cp, csprintf("%s.xc.0", section));
+}
+
+void
+change_thread_state(ThreadID tid, int activate, int priority)
+{
+}
+
+Addr
+BaseSimpleCPU::dbg_vtophys(Addr addr)
+{
+    return vtophys(tc, addr);
+}
+
+void
+BaseSimpleCPU::wakeup()
+{
+    if (thread->status() != ThreadContext::Suspended)
+        return;
+
+    DPRINTF(Quiesce,"Suspended Processor awoke\n");
+    thread->activate();
+}
+
+void
+BaseSimpleCPU::checkForInterrupts()
+{
+    if (checkInterrupts(tc)) {
+        Fault interrupt = interrupts->getInterrupt(tc);
+
+        if (interrupt != NoFault) {
+            fetchOffset = 0;
+            interrupts->updateIntrInfo(tc);
+            interrupt->invoke(tc);
+            thread->decoder.reset();
+        }
+    }
+}
+
+
+void
+BaseSimpleCPU::setupFetchRequest(Request *req)
+{
+    Addr instAddr = thread->instAddr();
+
+    // set up memory request for instruction fetch
+    DPRINTF(Fetch, "Fetch: PC:%08p\n", instAddr);
+
+    Addr fetchPC = (instAddr & PCMask) + fetchOffset;
+    req->setVirt(0, fetchPC, sizeof(MachInst), Request::INST_FETCH, instMasterId(),
+            instAddr);
+}
+
+
+void
+BaseSimpleCPU::preExecute()
+{
+    // maintain $r0 semantics
+    thread->setIntReg(ZeroReg, 0);
+#if THE_ISA == ALPHA_ISA
+    thread->setFloatReg(ZeroReg, 0.0);
+#endif // ALPHA_ISA
+
+    // check for instruction-count-based events
+    comInstEventQueue[0]->serviceEvents(numInst);
+    system->instEventQueue.serviceEvents(system->totalNumInsts);
+
+    // decode the instruction
+    inst = gtoh(inst);
+
+    TheISA::PCState pcState = thread->pcState();
+
+    if (isRomMicroPC(pcState.microPC())) {
+        stayAtPC = false;
+        curStaticInst = microcodeRom.fetchMicroop(pcState.microPC(),
+                                                  curMacroStaticInst);
+    } else if (!curMacroStaticInst) {
+        //We're not in the middle of a macro instruction
+        StaticInstPtr instPtr = NULL;
+
+        TheISA::Decoder *decoder = &(thread->decoder);
+
+        //Predecode, ie bundle up an ExtMachInst
+        //This should go away once the constructor can be set up properly
+        decoder->setTC(thread->getTC());
+        //If more fetch data is needed, pass it in.
+        Addr fetchPC = (pcState.instAddr() & PCMask) + fetchOffset;
+        //if(decoder->needMoreBytes())
+            decoder->moreBytes(pcState, fetchPC, inst);
+        //else
+        //    decoder->process();
+
+        //Decode an instruction if one is ready. Otherwise, we'll have to
+        //fetch beyond the MachInst at the current pc.
+        instPtr = decoder->decode(pcState);
+        if (instPtr) {
+            stayAtPC = false;
+            thread->pcState(pcState);
+        } else {
+            stayAtPC = true;
+            fetchOffset += sizeof(MachInst);
+        }
+
+        //If we decoded an instruction and it's microcoded, start pulling
+        //out micro ops
+        if (instPtr && instPtr->isMacroop()) {
+            curMacroStaticInst = instPtr;
+            curStaticInst = curMacroStaticInst->fetchMicroop(pcState.microPC());
+        } else {
+            curStaticInst = instPtr;
+        }
+    } else {
+        //Read the next micro op from the macro op
+        curStaticInst = curMacroStaticInst->fetchMicroop(pcState.microPC());
+    }
+
+    //If we decoded an instruction this "tick", record information about it.
+    if (curStaticInst) {
+#if TRACING_ON
+        traceData = tracer->getInstRecord(curTick(), tc,
+                curStaticInst, thread->pcState(), curMacroStaticInst);
+
+        DPRINTF(Decode,"Decode: Decoded %s instruction: %#x\n",
+                curStaticInst->getName(), curStaticInst->machInst);
+#endif // TRACING_ON
+    }
+}
+
+void
+BaseSimpleCPU::postExecute()
+{
+    assert(curStaticInst);
+
+    TheISA::PCState pc = tc->pcState();
+    Addr instAddr = pc.instAddr();
+    if (FullSystem && thread->profile) {
+        bool usermode = TheISA::inUserMode(tc);
+        thread->profilePC = usermode ? 1 : instAddr;
+        ProfileNode *node = thread->profile->consume(tc, curStaticInst);
+        if (node)
+            thread->profileNode = node;
+    }
+
+    if (curStaticInst->isMemRef()) {
+        numMemRefs++;
+    }
+
+    if (curStaticInst->isLoad()) {
+        ++numLoad;
+        comLoadEventQueue[0]->serviceEvents(numLoad);
+    }
+
+    if (CPA::available()) {
+        CPA::cpa()->swAutoBegin(tc, pc.nextInstAddr());
+    }
+
+    /* Power model statistics */
+    //integer alu accesses
+    if (curStaticInst->isInteger()){
+        numIntAluAccesses++;
+        numIntInsts++;
+    }
+
+    //float alu accesses
+    if (curStaticInst->isFloating()){
+        numFpAluAccesses++;
+        numFpInsts++;
+    }
+    
+    //number of function calls/returns to get window accesses
+    if (curStaticInst->isCall() || curStaticInst->isReturn()){
+        numCallsReturns++;
+    }
+    
+    //the number of branch predictions that will be made
+    if (curStaticInst->isCondCtrl()){
+        numCondCtrlInsts++;
+    }
+    
+    //result bus acceses
+    if (curStaticInst->isLoad()){
+        numLoadInsts++;
+    }
+    
+    if (curStaticInst->isStore()){
+        numStoreInsts++;
+    }
+    /* End power model statistics */
+
+    if (FullSystem)
+        traceFunctions(instAddr);
+
+    if (traceData) {
+        traceData->dump();
+        delete traceData;
+        traceData = NULL;
+    }
+}
+
+
+void
+BaseSimpleCPU::advancePC(Fault fault)
+{
+    //Since we're moving to a new pc, zero out the offset
+    fetchOffset = 0;
+    if (fault != NoFault) {
+        curMacroStaticInst = StaticInst::nullStaticInstPtr;
+        fault->invoke(tc, curStaticInst);
+        thread->decoder.reset();
+    } else {
+        if (curStaticInst) {
+            if (curStaticInst->isLastMicroop())
+                curMacroStaticInst = StaticInst::nullStaticInstPtr;
+            TheISA::PCState pcState = thread->pcState();
+            TheISA::advancePC(pcState, curStaticInst);
+            thread->pcState(pcState);
+        }
+    }
+}
+
+/*Fault
+BaseSimpleCPU::CacheOp(uint8_t Op, Addr EffAddr)
+{
+    // translate to physical address
+    Fault fault = NoFault;
+    int CacheID = Op & 0x3; // Lower 3 bits identify Cache
+    int CacheOP = Op >> 2; // Upper 3 bits identify Cache Operation
+    if(CacheID > 1)
+      {
+        warn("CacheOps not implemented for secondary/tertiary caches\n");
+      }
+    else
+      {
+        switch(CacheOP)
+          { // Fill Packet Type
+          case 0: warn("Invalidate Cache Op\n");
+            break;
+          case 1: warn("Index Load Tag Cache Op\n");
+            break;
+          case 2: warn("Index Store Tag Cache Op\n");
+            break;
+          case 4: warn("Hit Invalidate Cache Op\n");
+            break;
+          case 5: warn("Fill/Hit Writeback Invalidate Cache Op\n");
+            break;
+          case 6: warn("Hit Writeback\n");
+            break;
+          case 7: warn("Fetch & Lock Cache Op\n");
+            break;
+          default: warn("Unimplemented Cache Op\n");
+          }
+      }
+    return fault;
+}*/
--- a/simulators/gem5/src/cpu/simple/base.hh
+++ b/simulators/gem5/src/cpu/simple/base.hh
@ -0,0 +1,429 @@
+/*
+ * Copyright (c) 2011 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Reinhardt
+ *          Dave Greene
+ *          Nathan Binkert
+ */
+
+#ifndef __CPU_SIMPLE_BASE_HH__
+#define __CPU_SIMPLE_BASE_HH__
+
+#include "base/statistics.hh"
+#include "config/the_isa.hh"
+#include "cpu/base.hh"
+#include "cpu/checker/cpu.hh"
+#include "cpu/pc_event.hh"
+#include "cpu/simple_thread.hh"
+#include "cpu/static_inst.hh"
+#include "mem/packet.hh"
+#include "mem/port.hh"
+#include "mem/request.hh"
+#include "sim/eventq.hh"
+#include "sim/full_system.hh"
+#include "sim/system.hh"
+
+// forward declarations
+class Checkpoint;
+class Process;
+class Processor;
+class ThreadContext;
+
+namespace TheISA
+{
+    class DTB;
+    class ITB;
+}
+
+namespace Trace {
+    class InstRecord;
+}
+
+struct BaseSimpleCPUParams;
+
+
+class BaseSimpleCPU : public BaseCPU
+{
+  protected:
+    typedef TheISA::MiscReg MiscReg;
+    typedef TheISA::FloatReg FloatReg;
+    typedef TheISA::FloatRegBits FloatRegBits;
+
+  protected:
+    Trace::InstRecord *traceData;
+
+    inline void checkPcEventQueue() {
+        Addr oldpc, pc = thread->instAddr();
+        do {
+            oldpc = pc;
+            system->pcEventQueue.service(tc);
+            pc = thread->instAddr();
+        } while (oldpc != pc);
+    }
+
+  public:
+    void wakeup();
+
+    void zero_fill_64(Addr addr) {
+      static int warned = 0;
+      if (!warned) {
+        warn ("WH64 is not implemented");
+        warned = 1;
+      }
+    };
+
+  public:
+    BaseSimpleCPU(BaseSimpleCPUParams *params);
+    virtual ~BaseSimpleCPU();
+
+  public:
+    /** SimpleThread object, provides all the architectural state. */
+    SimpleThread *thread;
+
+    /** ThreadContext object, provides an interface for external
+     * objects to modify this thread's state.
+     */
+    ThreadContext *tc;
+
+    CheckerCPU *checker;
+
+  protected:
+
+    enum Status {
+        Idle,
+        Running,
+        Faulting,
+        ITBWaitResponse,
+        IcacheRetry,
+        IcacheWaitResponse,
+        IcacheWaitSwitch,
+        DTBWaitResponse,
+        DcacheRetry,
+        DcacheWaitResponse,
+        DcacheWaitSwitch,
+        SwitchedOut
+    };
+
+    Status _status;
+
+  public:
+
+    Addr dbg_vtophys(Addr addr);
+
+    bool interval_stats;
+
+    // current instruction
+    TheISA::MachInst inst;
+
+    StaticInstPtr curStaticInst;
+    StaticInstPtr curMacroStaticInst;
+
+    //This is the offset from the current pc that fetch should be performed at
+    Addr fetchOffset;
+    //This flag says to stay at the current pc. This is useful for
+    //instructions which go beyond MachInst boundaries.
+    bool stayAtPC;
+
+    void checkForInterrupts();
+    void setupFetchRequest(Request *req);
+    void preExecute();
+    void postExecute();
+    void advancePC(Fault fault);
+
+    virtual void deallocateContext(ThreadID thread_num);
+    virtual void haltContext(ThreadID thread_num);
+
+    // statistics
+    virtual void regStats();
+    virtual void resetStats();
+
+    // number of simulated instructions
+    Counter numInst;
+    Counter startNumInst;
+    Stats::Scalar numInsts;
+    Counter numOp;
+    Counter startNumOp;
+    Stats::Scalar numOps;
+
+    void countInst()
+    {
+        if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) {
+            numInst++;
+            numInsts++;
+        }
+        numOp++;
+        numOps++;
+
+        system->totalNumInsts++;
+        thread->funcExeInst++;
+    }
+
+    virtual Counter totalInsts() const
+    {
+        return numInst - startNumInst;
+    }
+
+    virtual Counter totalOps() const
+    {
+        return numOp - startNumOp;
+    }
+
+    //number of integer alu accesses
+    Stats::Scalar numIntAluAccesses;
+
+    //number of float alu accesses
+    Stats::Scalar numFpAluAccesses;
+
+    //number of function calls/returns
+    Stats::Scalar numCallsReturns;
+
+    //conditional control instructions;
+    Stats::Scalar numCondCtrlInsts;
+
+    //number of int instructions
+    Stats::Scalar numIntInsts;
+
+    //number of float instructions
+    Stats::Scalar numFpInsts;
+
+    //number of integer register file accesses
+    Stats::Scalar numIntRegReads;
+    Stats::Scalar numIntRegWrites;
+
+    //number of float register file accesses
+    Stats::Scalar numFpRegReads;
+    Stats::Scalar numFpRegWrites;
+
+    // number of simulated memory references
+    Stats::Scalar numMemRefs;
+    Stats::Scalar numLoadInsts;
+    Stats::Scalar numStoreInsts;
+
+    // number of idle cycles
+    Stats::Formula numIdleCycles;
+
+    // number of busy cycles
+    Stats::Formula numBusyCycles;
+
+    // number of simulated loads
+    Counter numLoad;
+    Counter startNumLoad;
+
+    // number of idle cycles
+    Stats::Average notIdleFraction;
+    Stats::Formula idleFraction;
+
+    // number of cycles stalled for I-cache responses
+    Stats::Scalar icacheStallCycles;
+    Counter lastIcacheStall;
+
+    // number of cycles stalled for I-cache retries
+    Stats::Scalar icacheRetryCycles;
+    Counter lastIcacheRetry;
+
+    // number of cycles stalled for D-cache responses
+    Stats::Scalar dcacheStallCycles;
+    Counter lastDcacheStall;
+
+    // number of cycles stalled for D-cache retries
+    Stats::Scalar dcacheRetryCycles;
+    Counter lastDcacheRetry;
+
+    virtual void serialize(std::ostream &os);
+    virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+    // These functions are only used in CPU models that split
+    // effective address computation from the actual memory access.
+    void setEA(Addr EA) { panic("BaseSimpleCPU::setEA() not implemented\n"); }
+    Addr getEA()        { panic("BaseSimpleCPU::getEA() not implemented\n");
+        M5_DUMMY_RETURN}
+
+    // The register accessor methods provide the index of the
+    // instruction's operand (e.g., 0 or 1), not the architectural
+    // register index, to simplify the implementation of register
+    // renaming.  We find the architectural register index by indexing
+    // into the instruction's own operand index table.  Note that a
+    // raw pointer to the StaticInst is provided instead of a
+    // ref-counted StaticInstPtr to redice overhead.  This is fine as
+    // long as these methods don't copy the pointer into any long-term
+    // storage (which is pretty hard to imagine they would have reason
+    // to do).
+
+    uint64_t readIntRegOperand(const StaticInst *si, int idx)
+    {
+        numIntRegReads++;
+        return thread->readIntReg(si->srcRegIdx(idx));
+    }
+
+    FloatReg readFloatRegOperand(const StaticInst *si, int idx)
+    {
+        numFpRegReads++;
+        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+        return thread->readFloatReg(reg_idx);
+    }
+
+    FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
+    {
+        numFpRegReads++;
+        int reg_idx = si->srcRegIdx(idx) - TheISA::FP_Base_DepTag;
+        return thread->readFloatRegBits(reg_idx);
+    }
+
+    void setIntRegOperand(const StaticInst *si, int idx, uint64_t val)
+    {
+        numIntRegWrites++;
+        thread->setIntReg(si->destRegIdx(idx), val);
+    }
+
+    void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
+    {
+        numFpRegWrites++;
+        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+        thread->setFloatReg(reg_idx, val);
+    }
+
+    void setFloatRegOperandBits(const StaticInst *si, int idx,
+                                FloatRegBits val)
+    {
+        numFpRegWrites++;
+        int reg_idx = si->destRegIdx(idx) - TheISA::FP_Base_DepTag;
+        thread->setFloatRegBits(reg_idx, val);
+    }
+
+    bool readPredicate() { return thread->readPredicate(); }
+    void setPredicate(bool val)
+    {
+        thread->setPredicate(val);
+        if (traceData) {
+            traceData->setPredicate(val);
+        }
+    }
+    TheISA::PCState pcState() { return thread->pcState(); }
+    void pcState(const TheISA::PCState &val) { thread->pcState(val); }
+    Addr instAddr() { return thread->instAddr(); }
+    Addr nextInstAddr() { return thread->nextInstAddr(); }
+    MicroPC microPC() { return thread->microPC(); }
+
+    MiscReg readMiscRegNoEffect(int misc_reg)
+    {
+        return thread->readMiscRegNoEffect(misc_reg);
+    }
+
+    MiscReg readMiscReg(int misc_reg)
+    {
+        numIntRegReads++;
+        return thread->readMiscReg(misc_reg);
+    }
+
+    void setMiscReg(int misc_reg, const MiscReg &val)
+    {
+        numIntRegWrites++;
+        return thread->setMiscReg(misc_reg, val);
+    }
+
+    MiscReg readMiscRegOperand(const StaticInst *si, int idx)
+    {
+        numIntRegReads++;
+        int reg_idx = si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag;
+        return thread->readMiscReg(reg_idx);
+    }
+
+    void setMiscRegOperand(
+            const StaticInst *si, int idx, const MiscReg &val)
+    {
+        numIntRegWrites++;
+        int reg_idx = si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag;
+        return thread->setMiscReg(reg_idx, val);
+    }
+
+    void demapPage(Addr vaddr, uint64_t asn)
+    {
+        thread->demapPage(vaddr, asn);
+    }
+
+    void demapInstPage(Addr vaddr, uint64_t asn)
+    {
+        thread->demapInstPage(vaddr, asn);
+    }
+
+    void demapDataPage(Addr vaddr, uint64_t asn)
+    {
+        thread->demapDataPage(vaddr, asn);
+    }
+
+    unsigned readStCondFailures() {
+        return thread->readStCondFailures();
+    }
+
+    void setStCondFailures(unsigned sc_failures) {
+        thread->setStCondFailures(sc_failures);
+    }
+
+     MiscReg readRegOtherThread(int regIdx, ThreadID tid = InvalidThreadID)
+     {
+        panic("Simple CPU models do not support multithreaded "
+              "register access.\n");
+     }
+
+     void setRegOtherThread(int regIdx, const MiscReg &val,
+                            ThreadID tid = InvalidThreadID)
+     {
+        panic("Simple CPU models do not support multithreaded "
+              "register access.\n");
+     }
+
+    //Fault CacheOp(uint8_t Op, Addr EA);
+
+    Fault hwrei() { return thread->hwrei(); }
+    bool simPalCheck(int palFunc) { return thread->simPalCheck(palFunc); }
+
+    void
+    syscall(int64_t callnum)
+    {
+        if (FullSystem)
+            panic("Syscall emulation isn't available in FS mode.\n");
+
+        thread->syscall(callnum);
+    }
+
+    bool misspeculating() { return thread->misspeculating(); }
+    ThreadContext *tcBase() { return tc; }
+};
+
+#endif // __CPU_SIMPLE_BASE_HH__
--- a/simulators/gem5/src/cpu/simple/timing.cc
+++ b/simulators/gem5/src/cpu/simple/timing.cc
@ -0,0 +1,960 @@
+/*
+ * Copyright (c) 2010-2012 ARM Limited
+ * All rights reserved
+ *
+ * The license below extends only to copyright in the software and shall
+ * not be construed as granting a license to any other intellectual
+ * property including but not limited to intellectual property relating
+ * to a hardware implementation of the functionality of the software
+ * licensed hereunder.  You may use the software subject to the license
+ * terms below provided that you ensure that this notice is replicated
+ * unmodified and in its entirety in all distributions of the software,
+ * modified or unmodified, in source code or in binary form.
+ *
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Reinhardt
+ */
+
+#include "arch/locked_mem.hh"
+#include "arch/mmapped_ipr.hh"
+#include "arch/utility.hh"
+#include "base/bigint.hh"
+#include "config/the_isa.hh"
+#include "cpu/simple/timing.hh"
+#include "cpu/exetrace.hh"
+#include "debug/Config.hh"
+#include "debug/ExecFaulting.hh"
+#include "debug/SimpleCPU.hh"
+#include "mem/packet.hh"
+#include "mem/packet_access.hh"
+#include "params/TimingSimpleCPU.hh"
+#include "sim/faults.hh"
+#include "sim/full_system.hh"
+#include "sim/system.hh"
+
+using namespace std;
+using namespace TheISA;
+
+void
+TimingSimpleCPU::init()
+{
+    BaseCPU::init();
+
+    // Initialise the ThreadContext's memory proxies
+    tcBase()->initMemProxies(tcBase());
+
+    if (FullSystem && !params()->defer_registration) {
+        for (int i = 0; i < threadContexts.size(); ++i) {
+            ThreadContext *tc = threadContexts[i];
+            // initialize CPU, including PC
+            TheISA::initCPU(tc, _cpuId);
+        }
+    }
+}
+
+void
+TimingSimpleCPU::TimingCPUPort::TickEvent::schedule(PacketPtr _pkt, Tick t)
+{
+    pkt = _pkt;
+    cpu->schedule(this, t);
+}
+
+TimingSimpleCPU::TimingSimpleCPU(TimingSimpleCPUParams *p)
+    : BaseSimpleCPU(p), fetchTranslation(this), icachePort(this),
+    dcachePort(this), fetchEvent(this)
+{
+    _status = Idle;
+
+    ifetch_pkt = dcache_pkt = NULL;
+    drainEvent = NULL;
+    previousTick = 0;
+    changeState(SimObject::Running);
+    system->totalNumInsts = 0;
+}
+
+
+TimingSimpleCPU::~TimingSimpleCPU()
+{
+}
+
+void
+TimingSimpleCPU::serialize(ostream &os)
+{
+    SimObject::State so_state = SimObject::getState();
+    SERIALIZE_ENUM(so_state);
+    BaseSimpleCPU::serialize(os);
+}
+
+void
+TimingSimpleCPU::unserialize(Checkpoint *cp, const string &section)
+{
+    SimObject::State so_state;
+    UNSERIALIZE_ENUM(so_state);
+    BaseSimpleCPU::unserialize(cp, section);
+}
+
+unsigned int
+TimingSimpleCPU::drain(Event *drain_event)
+{
+    // TimingSimpleCPU is ready to drain if it's not waiting for
+    // an access to complete.
+    if (_status == Idle || _status == Running || _status == SwitchedOut) {
+        changeState(SimObject::Drained);
+        return 0;
+    } else {
+        changeState(SimObject::Draining);
+        drainEvent = drain_event;
+        return 1;
+    }
+}
+
+void
+TimingSimpleCPU::resume()
+{
+    DPRINTF(SimpleCPU, "Resume\n");
+    if (_status != SwitchedOut && _status != Idle) {
+        assert(system->getMemoryMode() == Enums::timing);
+
+        if (fetchEvent.scheduled())
+           deschedule(fetchEvent);
+
+        schedule(fetchEvent, nextCycle());
+    }
+
+    changeState(SimObject::Running);
+}
+
+void
+TimingSimpleCPU::switchOut()
+{
+    assert(_status == Running || _status == Idle);
+    _status = SwitchedOut;
+    numCycles += tickToCycles(curTick() - previousTick);
+
+    // If we've been scheduled to resume but are then told to switch out,
+    // we'll need to cancel it.
+    if (fetchEvent.scheduled())
+        deschedule(fetchEvent);
+}
+
+
+void
+TimingSimpleCPU::takeOverFrom(BaseCPU *oldCPU)
+{
+    BaseCPU::takeOverFrom(oldCPU);
+
+    // if any of this CPU's ThreadContexts are active, mark the CPU as
+    // running and schedule its tick event.
+    for (int i = 0; i < threadContexts.size(); ++i) {
+        ThreadContext *tc = threadContexts[i];
+        if (tc->status() == ThreadContext::Active && _status != Running) {
+            _status = Running;
+            break;
+        }
+    }
+
+    if (_status != Running) {
+        _status = Idle;
+    }
+    assert(threadContexts.size() == 1);
+    previousTick = curTick();
+}
+
+
+void
+TimingSimpleCPU::activateContext(ThreadID thread_num, int delay)
+{
+    DPRINTF(SimpleCPU, "ActivateContext %d (%d cycles)\n", thread_num, delay);
+
+    assert(thread_num == 0);
+    assert(thread);
+
+    assert(_status == Idle);
+
+    notIdleFraction++;
+    _status = Running;
+
+    // kick things off by initiating the fetch of the next instruction
+    schedule(fetchEvent, nextCycle(curTick() + ticks(delay)));
+}
+
+
+void
+TimingSimpleCPU::suspendContext(ThreadID thread_num)
+{
+    DPRINTF(SimpleCPU, "SuspendContext %d\n", thread_num);
+
+    assert(thread_num == 0);
+    assert(thread);
+
+    if (_status == Idle)
+        return;
+
+    assert(_status == Running);
+
+    // just change status to Idle... if status != Running,
+    // completeInst() will not initiate fetch of next instruction.
+
+    notIdleFraction--;
+    _status = Idle;
+}
+
+bool
+TimingSimpleCPU::handleReadPacket(PacketPtr pkt)
+{
+    RequestPtr req = pkt->req;
+    if (req->isMmappedIpr()) {
+        Tick delay;
+        delay = TheISA::handleIprRead(thread->getTC(), pkt);
+        new IprEvent(pkt, this, nextCycle(curTick() + delay));
+        _status = DcacheWaitResponse;
+        dcache_pkt = NULL;
+    } else if (!dcachePort.sendTimingReq(pkt)) {
+        _status = DcacheRetry;
+        dcache_pkt = pkt;
+    } else {
+        _status = DcacheWaitResponse;
+        // memory system takes ownership of packet
+        dcache_pkt = NULL;
+    }
+    return dcache_pkt == NULL;
+}
+
+void
+TimingSimpleCPU::sendData(RequestPtr req, uint8_t *data, uint64_t *res,
+                          bool read)
+{
+    PacketPtr pkt;
+    buildPacket(pkt, req, read);
+    pkt->dataDynamicArray<uint8_t>(data);
+    if (req->getFlags().isSet(Request::NO_ACCESS)) {
+        assert(!dcache_pkt);
+        pkt->makeResponse();
+        completeDataAccess(pkt);
+    } else if (read) {
+        handleReadPacket(pkt);
+    } else {
+        bool do_access = true;  // flag to suppress cache access
+
+        if (req->isLLSC()) {
+            do_access = TheISA::handleLockedWrite(thread, req);
+        } else if (req->isCondSwap()) {
+            assert(res);
+            req->setExtraData(*res);
+        }
+
+        if (do_access) {
+            dcache_pkt = pkt;
+            handleWritePacket();
+        } else {
+            _status = DcacheWaitResponse;
+            completeDataAccess(pkt);
+        }
+    }
+}
+
+void
+TimingSimpleCPU::sendSplitData(RequestPtr req1, RequestPtr req2,
+                               RequestPtr req, uint8_t *data, bool read)
+{
+    PacketPtr pkt1, pkt2;
+    buildSplitPacket(pkt1, pkt2, req1, req2, req, data, read);
+    if (req->getFlags().isSet(Request::NO_ACCESS)) {
+        assert(!dcache_pkt);
+        pkt1->makeResponse();
+        completeDataAccess(pkt1);
+    } else if (read) {
+        SplitFragmentSenderState * send_state =
+            dynamic_cast<SplitFragmentSenderState *>(pkt1->senderState);
+        if (handleReadPacket(pkt1)) {
+            send_state->clearFromParent();
+            send_state = dynamic_cast<SplitFragmentSenderState *>(
+                    pkt2->senderState);
+            if (handleReadPacket(pkt2)) {
+                send_state->clearFromParent();
+            }
+        }
+    } else {
+        dcache_pkt = pkt1;
+        SplitFragmentSenderState * send_state =
+            dynamic_cast<SplitFragmentSenderState *>(pkt1->senderState);
+        if (handleWritePacket()) {
+            send_state->clearFromParent();
+            dcache_pkt = pkt2;
+            send_state = dynamic_cast<SplitFragmentSenderState *>(
+                    pkt2->senderState);
+            if (handleWritePacket()) {
+                send_state->clearFromParent();
+            }
+        }
+    }
+}
+
+void
+TimingSimpleCPU::translationFault(Fault fault)
+{
+    // fault may be NoFault in cases where a fault is suppressed,
+    // for instance prefetches.
+    numCycles += tickToCycles(curTick() - previousTick);
+    previousTick = curTick();
+
+    if (traceData) {
+        // Since there was a fault, we shouldn't trace this instruction.
+        delete traceData;
+        traceData = NULL;
+    }
+
+    postExecute();
+
+    if (getState() == SimObject::Draining) {
+        advancePC(fault);
+        completeDrain();
+    } else {
+        advanceInst(fault);
+    }
+}
+
+void
+TimingSimpleCPU::buildPacket(PacketPtr &pkt, RequestPtr req, bool read)
+{
+    MemCmd cmd;
+    if (read) {
+        cmd = MemCmd::ReadReq;
+        if (req->isLLSC())
+            cmd = MemCmd::LoadLockedReq;
+    } else {
+        cmd = MemCmd::WriteReq;
+        if (req->isLLSC()) {
+            cmd = MemCmd::StoreCondReq;
+        } else if (req->isSwap()) {
+            cmd = MemCmd::SwapReq;
+        }
+    }
+    pkt = new Packet(req, cmd);
+}
+
+void
+TimingSimpleCPU::buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2,
+        RequestPtr req1, RequestPtr req2, RequestPtr req,
+        uint8_t *data, bool read)
+{
+    pkt1 = pkt2 = NULL;
+
+    assert(!req1->isMmappedIpr() && !req2->isMmappedIpr());
+
+    if (req->getFlags().isSet(Request::NO_ACCESS)) {
+        buildPacket(pkt1, req, read);
+        return;
+    }
+
+    buildPacket(pkt1, req1, read);
+    buildPacket(pkt2, req2, read);
+
+    req->setPhys(req1->getPaddr(), req->getSize(), req1->getFlags(), dataMasterId());
+    PacketPtr pkt = new Packet(req, pkt1->cmd.responseCommand());
+
+    pkt->dataDynamicArray<uint8_t>(data);
+    pkt1->dataStatic<uint8_t>(data);
+    pkt2->dataStatic<uint8_t>(data + req1->getSize());
+
+    SplitMainSenderState * main_send_state = new SplitMainSenderState;
+    pkt->senderState = main_send_state;
+    main_send_state->fragments[0] = pkt1;
+    main_send_state->fragments[1] = pkt2;
+    main_send_state->outstanding = 2;
+    pkt1->senderState = new SplitFragmentSenderState(pkt, 0);
+    pkt2->senderState = new SplitFragmentSenderState(pkt, 1);
+}
+
+Fault
+TimingSimpleCPU::readMem(Addr addr, uint8_t *data,
+                         unsigned size, unsigned flags)
+{
+    Fault fault;
+    const int asid = 0;
+    const ThreadID tid = 0;
+    const Addr pc = thread->instAddr();
+    unsigned block_size = dcachePort.peerBlockSize();
+    BaseTLB::Mode mode = BaseTLB::Read;
+
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
+    RequestPtr req  = new Request(asid, addr, size,
+                                  flags, dataMasterId(), pc, _cpuId, tid);
+
+    Addr split_addr = roundDown(addr + size - 1, block_size);
+    assert(split_addr <= addr || split_addr - addr < block_size);
+
+    _status = DTBWaitResponse;
+    if (split_addr > addr) {
+        RequestPtr req1, req2;
+        assert(!req->isLLSC() && !req->isSwap());
+        req->splitOnVaddr(split_addr, req1, req2);
+
+        WholeTranslationState *state =
+            new WholeTranslationState(req, req1, req2, new uint8_t[size],
+                                      NULL, mode);
+        DataTranslation<TimingSimpleCPU *> *trans1 =
+            new DataTranslation<TimingSimpleCPU *>(this, state, 0);
+        DataTranslation<TimingSimpleCPU *> *trans2 =
+            new DataTranslation<TimingSimpleCPU *>(this, state, 1);
+
+        thread->dtb->translateTiming(req1, tc, trans1, mode);
+        thread->dtb->translateTiming(req2, tc, trans2, mode);
+    } else {
+        WholeTranslationState *state =
+            new WholeTranslationState(req, new uint8_t[size], NULL, mode);
+        DataTranslation<TimingSimpleCPU *> *translation
+            = new DataTranslation<TimingSimpleCPU *>(this, state);
+        thread->dtb->translateTiming(req, tc, translation, mode);
+    }
+
+    return NoFault;
+}
+
+bool
+TimingSimpleCPU::handleWritePacket()
+{
+    RequestPtr req = dcache_pkt->req;
+    if (req->isMmappedIpr()) {
+        Tick delay;
+        delay = TheISA::handleIprWrite(thread->getTC(), dcache_pkt);
+        new IprEvent(dcache_pkt, this, nextCycle(curTick() + delay));
+        _status = DcacheWaitResponse;
+        dcache_pkt = NULL;
+    } else if (!dcachePort.sendTimingReq(dcache_pkt)) {
+        _status = DcacheRetry;
+    } else {
+        _status = DcacheWaitResponse;
+        // memory system takes ownership of packet
+        dcache_pkt = NULL;
+    }
+    return dcache_pkt == NULL;
+}
+
+Fault
+TimingSimpleCPU::writeMem(uint8_t *data, unsigned size,
+                          Addr addr, unsigned flags, uint64_t *res)
+{
+    uint8_t *newData = new uint8_t[size];
+    memcpy(newData, data, size);
+
+    const int asid = 0;
+    const ThreadID tid = 0;
+    const Addr pc = thread->instAddr();
+    unsigned block_size = dcachePort.peerBlockSize();
+    BaseTLB::Mode mode = BaseTLB::Write;
+
+    if (traceData) {
+        traceData->setAddr(addr);
+    }
+
+    RequestPtr req = new Request(asid, addr, size,
+                                 flags, dataMasterId(), pc, _cpuId, tid);
+
+    Addr split_addr = roundDown(addr + size - 1, block_size);
+    assert(split_addr <= addr || split_addr - addr < block_size);
+
+    _status = DTBWaitResponse;
+    if (split_addr > addr) {
+        RequestPtr req1, req2;
+        assert(!req->isLLSC() && !req->isSwap());
+        req->splitOnVaddr(split_addr, req1, req2);
+
+        WholeTranslationState *state =
+            new WholeTranslationState(req, req1, req2, newData, res, mode);
+        DataTranslation<TimingSimpleCPU *> *trans1 =
+            new DataTranslation<TimingSimpleCPU *>(this, state, 0);
+        DataTranslation<TimingSimpleCPU *> *trans2 =
+            new DataTranslation<TimingSimpleCPU *>(this, state, 1);
+
+        thread->dtb->translateTiming(req1, tc, trans1, mode);
+        thread->dtb->translateTiming(req2, tc, trans2, mode);
+    } else {
+        WholeTranslationState *state =
+            new WholeTranslationState(req, newData, res, mode);
+        DataTranslation<TimingSimpleCPU *> *translation =
+            new DataTranslation<TimingSimpleCPU *>(this, state);
+        thread->dtb->translateTiming(req, tc, translation, mode);
+    }
+
+    // Translation faults will be returned via finishTranslation()
+    return NoFault;
+}
+
+
+void
+TimingSimpleCPU::finishTranslation(WholeTranslationState *state)
+{
+    _status = Running;
+
+    if (state->getFault() != NoFault) {
+        if (state->isPrefetch()) {
+            state->setNoFault();
+        }
+        delete [] state->data;
+        state->deleteReqs();
+        translationFault(state->getFault());
+    } else {
+        if (!state->isSplit) {
+            sendData(state->mainReq, state->data, state->res,
+                     state->mode == BaseTLB::Read);
+        } else {
+            sendSplitData(state->sreqLow, state->sreqHigh, state->mainReq,
+                          state->data, state->mode == BaseTLB::Read);
+        }
+    }
+
+    delete state;
+}
+
+
+void
+TimingSimpleCPU::fetch()
+{
+    DPRINTF(SimpleCPU, "Fetch\n");
+
+    if (!curStaticInst || !curStaticInst->isDelayedCommit())
+        checkForInterrupts();
+
+    checkPcEventQueue();
+
+    // We must have just got suspended by a PC event
+    if (_status == Idle)
+        return;
+
+    TheISA::PCState pcState = thread->pcState();
+    bool needToFetch = !isRomMicroPC(pcState.microPC()) && !curMacroStaticInst;
+
+    if (needToFetch) {
+        _status = Running;
+        Request *ifetch_req = new Request();
+        ifetch_req->setThreadContext(_cpuId, /* thread ID */ 0);
+        setupFetchRequest(ifetch_req);
+        DPRINTF(SimpleCPU, "Translating address %#x\n", ifetch_req->getVaddr());
+        thread->itb->translateTiming(ifetch_req, tc, &fetchTranslation,
+                BaseTLB::Execute);
+    } else {
+        _status = IcacheWaitResponse;
+        completeIfetch(NULL);
+
+        numCycles += tickToCycles(curTick() - previousTick);
+        previousTick = curTick();
+    }
+}
+
+
+void
+TimingSimpleCPU::sendFetch(Fault fault, RequestPtr req, ThreadContext *tc)
+{
+    if (fault == NoFault) {
+        DPRINTF(SimpleCPU, "Sending fetch for addr %#x(pa: %#x)\n",
+                req->getVaddr(), req->getPaddr());
+        ifetch_pkt = new Packet(req, MemCmd::ReadReq);
+        ifetch_pkt->dataStatic(&inst);
+        DPRINTF(SimpleCPU, " -- pkt addr: %#x\n", ifetch_pkt->getAddr());
+
+        if (!icachePort.sendTimingReq(ifetch_pkt)) {
+            // Need to wait for retry
+            _status = IcacheRetry;
+        } else {
+            // Need to wait for cache to respond
+            _status = IcacheWaitResponse;
+            // ownership of packet transferred to memory system
+            ifetch_pkt = NULL;
+        }
+    } else {
+        DPRINTF(SimpleCPU, "Translation of addr %#x faulted\n", req->getVaddr());
+        delete req;
+        // fetch fault: advance directly to next instruction (fault handler)
+        _status = Running;
+        advanceInst(fault);
+    }
+
+    numCycles += tickToCycles(curTick() - previousTick);
+    previousTick = curTick();
+}
+
+
+void
+TimingSimpleCPU::advanceInst(Fault fault)
+{
+
+    if (_status == Faulting)
+        return;
+
+    if (fault != NoFault) {
+        advancePC(fault);
+        DPRINTF(SimpleCPU, "Fault occured, scheduling fetch event\n");
+        reschedule(fetchEvent, nextCycle(), true);
+        _status = Faulting;
+        return;
+    }
+
+
+    if (!stayAtPC)
+        advancePC(fault);
+
+    if (_status == Running) {
+        // kick off fetch of next instruction... callback from icache
+        // response will cause that instruction to be executed,
+        // keeping the CPU running.
+        fetch();
+    }
+}
+
+
+void
+TimingSimpleCPU::completeIfetch(PacketPtr pkt)
+{
+    DPRINTF(SimpleCPU, "Complete ICache Fetch for addr %#x\n", pkt ?
+            pkt->getAddr() : 0);
+
+    // received a response from the icache: execute the received
+    // instruction
+
+    assert(!pkt || !pkt->isError());
+    assert(_status == IcacheWaitResponse);
+
+    _status = Running;
+
+    numCycles += tickToCycles(curTick() - previousTick);
+    previousTick = curTick();
+
+    if (getState() == SimObject::Draining) {
+        if (pkt) {
+            delete pkt->req;
+            delete pkt;
+        }
+
+        completeDrain();
+        return;
+    }
+
+    preExecute();
+    if (curStaticInst && curStaticInst->isMemRef()) {
+        // load or store: just send to dcache
+        Fault fault = curStaticInst->initiateAcc(this, traceData);
+
+        // If we're not running now the instruction will complete in a dcache
+        // response callback or the instruction faulted and has started an
+        // ifetch
+        if (_status == Running) {
+            if (fault != NoFault && traceData) {
+                // If there was a fault, we shouldn't trace this instruction.
+                delete traceData;
+                traceData = NULL;
+            }
+
+            postExecute();
+            // @todo remove me after debugging with legion done
+            if (curStaticInst && (!curStaticInst->isMicroop() ||
+                        curStaticInst->isFirstMicroop()))
+                instCnt++;
+            advanceInst(fault);
+        }
+    } else if (curStaticInst) {
+        // non-memory instruction: execute completely now
+        Fault fault = curStaticInst->execute(this, traceData);
+
+        // keep an instruction count
+        if (fault == NoFault)
+            countInst();
+        else if (traceData && !DTRACE(ExecFaulting)) {
+            delete traceData;
+            traceData = NULL;
+        }
+
+        postExecute();
+        // @todo remove me after debugging with legion done
+        if (curStaticInst && (!curStaticInst->isMicroop() ||
+                    curStaticInst->isFirstMicroop()))
+            instCnt++;
+        advanceInst(fault);
+    } else {
+        advanceInst(NoFault);
+    }
+
+    if (pkt) {
+        delete pkt->req;
+        delete pkt;
+    }
+}
+
+void
+TimingSimpleCPU::IcachePort::ITickEvent::process()
+{
+    cpu->completeIfetch(pkt);
+}
+
+bool
+TimingSimpleCPU::IcachePort::recvTimingResp(PacketPtr pkt)
+{
+    if (!pkt->wasNacked()) {
+        DPRINTF(SimpleCPU, "Received timing response %#x\n", pkt->getAddr());
+        // delay processing of returned data until next CPU clock edge
+        Tick next_tick = cpu->nextCycle(curTick());
+
+        if (next_tick == curTick())
+            cpu->completeIfetch(pkt);
+        else
+            tickEvent.schedule(pkt, next_tick);
+
+        return true;
+    } else {
+        assert(cpu->_status == IcacheWaitResponse);
+        pkt->reinitNacked();
+        if (!sendTimingReq(pkt)) {
+            cpu->_status = IcacheRetry;
+            cpu->ifetch_pkt = pkt;
+        }
+    }
+
+    return true;
+}
+
+void
+TimingSimpleCPU::IcachePort::recvRetry()
+{
+    // we shouldn't get a retry unless we have a packet that we're
+    // waiting to transmit
+    assert(cpu->ifetch_pkt != NULL);
+    assert(cpu->_status == IcacheRetry);
+    PacketPtr tmp = cpu->ifetch_pkt;
+    if (sendTimingReq(tmp)) {
+        cpu->_status = IcacheWaitResponse;
+        cpu->ifetch_pkt = NULL;
+    }
+}
+
+void
+TimingSimpleCPU::completeDataAccess(PacketPtr pkt)
+{
+    // received a response from the dcache: complete the load or store
+    // instruction
+    assert(!pkt->isError());
+    assert(_status == DcacheWaitResponse || _status == DTBWaitResponse ||
+           pkt->req->getFlags().isSet(Request::NO_ACCESS));
+
+    numCycles += tickToCycles(curTick() - previousTick);
+    previousTick = curTick();
+
+    if (pkt->senderState) {
+        SplitFragmentSenderState * send_state =
+            dynamic_cast<SplitFragmentSenderState *>(pkt->senderState);
+        assert(send_state);
+        delete pkt->req;
+        delete pkt;
+        PacketPtr big_pkt = send_state->bigPkt;
+        delete send_state;
+        
+        SplitMainSenderState * main_send_state =
+            dynamic_cast<SplitMainSenderState *>(big_pkt->senderState);
+        assert(main_send_state);
+        // Record the fact that this packet is no longer outstanding.
+        assert(main_send_state->outstanding != 0);
+        main_send_state->outstanding--;
+
+        if (main_send_state->outstanding) {
+            return;
+        } else {
+            delete main_send_state;
+            big_pkt->senderState = NULL;
+            pkt = big_pkt;
+        }
+    }
+
+    _status = Running;
+
+    Fault fault = curStaticInst->completeAcc(pkt, this, traceData);
+
+    // keep an instruction count
+    if (fault == NoFault)
+        countInst();
+    else if (traceData) {
+        // If there was a fault, we shouldn't trace this instruction.
+        delete traceData;
+        traceData = NULL;
+    }
+
+    // the locked flag may be cleared on the response packet, so check
+    // pkt->req and not pkt to see if it was a load-locked
+    if (pkt->isRead() && pkt->req->isLLSC()) {
+        TheISA::handleLockedRead(thread, pkt->req);
+    }
+
+    delete pkt->req;
+    delete pkt;
+
+    postExecute();
+
+    if (getState() == SimObject::Draining) {
+        advancePC(fault);
+        completeDrain();
+
+        return;
+    }
+
+    advanceInst(fault);
+}
+
+
+void
+TimingSimpleCPU::completeDrain()
+{
+    DPRINTF(Config, "Done draining\n");
+    changeState(SimObject::Drained);
+    drainEvent->process();
+}
+
+bool
+TimingSimpleCPU::DcachePort::recvTimingResp(PacketPtr pkt)
+{
+    if (!pkt->wasNacked()) {
+        // delay processing of returned data until next CPU clock edge
+        Tick next_tick = cpu->nextCycle(curTick());
+
+        if (next_tick == curTick()) {
+            cpu->completeDataAccess(pkt);
+        } else {
+            if (!tickEvent.scheduled()) {
+                tickEvent.schedule(pkt, next_tick);
+            } else {
+                // In the case of a split transaction and a cache that is
+                // faster than a CPU we could get two responses before
+                // next_tick expires
+                if (!retryEvent.scheduled())
+                    cpu->schedule(retryEvent, next_tick);
+                return false;
+            }
+        }
+
+        return true;
+    } else  {
+        assert(cpu->_status == DcacheWaitResponse);
+        pkt->reinitNacked();
+        if (!sendTimingReq(pkt)) {
+            cpu->_status = DcacheRetry;
+            cpu->dcache_pkt = pkt;
+        }
+    }
+
+    return true;
+}
+
+void
+TimingSimpleCPU::DcachePort::DTickEvent::process()
+{
+    cpu->completeDataAccess(pkt);
+}
+
+void
+TimingSimpleCPU::DcachePort::recvRetry()
+{
+    // we shouldn't get a retry unless we have a packet that we're
+    // waiting to transmit
+    assert(cpu->dcache_pkt != NULL);
+    assert(cpu->_status == DcacheRetry);
+    PacketPtr tmp = cpu->dcache_pkt;
+    if (tmp->senderState) {
+        // This is a packet from a split access.
+        SplitFragmentSenderState * send_state =
+            dynamic_cast<SplitFragmentSenderState *>(tmp->senderState);
+        assert(send_state);
+        PacketPtr big_pkt = send_state->bigPkt;
+        
+        SplitMainSenderState * main_send_state =
+            dynamic_cast<SplitMainSenderState *>(big_pkt->senderState);
+        assert(main_send_state);
+
+        if (sendTimingReq(tmp)) {
+            // If we were able to send without retrying, record that fact
+            // and try sending the other fragment.
+            send_state->clearFromParent();
+            int other_index = main_send_state->getPendingFragment();
+            if (other_index > 0) {
+                tmp = main_send_state->fragments[other_index];
+                cpu->dcache_pkt = tmp;
+                if ((big_pkt->isRead() && cpu->handleReadPacket(tmp)) ||
+                        (big_pkt->isWrite() && cpu->handleWritePacket())) {
+                    main_send_state->fragments[other_index] = NULL;
+                }
+            } else {
+                cpu->_status = DcacheWaitResponse;
+                // memory system takes ownership of packet
+                cpu->dcache_pkt = NULL;
+            }
+        }
+    } else if (sendTimingReq(tmp)) {
+        cpu->_status = DcacheWaitResponse;
+        // memory system takes ownership of packet
+        cpu->dcache_pkt = NULL;
+    }
+}
+
+TimingSimpleCPU::IprEvent::IprEvent(Packet *_pkt, TimingSimpleCPU *_cpu,
+    Tick t)
+    : pkt(_pkt), cpu(_cpu)
+{
+    cpu->schedule(this, t);
+}
+
+void
+TimingSimpleCPU::IprEvent::process()
+{
+    cpu->completeDataAccess(pkt);
+}
+
+const char *
+TimingSimpleCPU::IprEvent::description() const
+{
+    return "Timing Simple CPU Delay IPR event";
+}
+
+
+void
+TimingSimpleCPU::printAddr(Addr a)
+{
+    dcachePort.printAddr(a);
+}
+
+
+////////////////////////////////////////////////////////////////////////
+//
+//  TimingSimpleCPU Simulation Object
+//
+TimingSimpleCPU *
+TimingSimpleCPUParams::create()
+{
+    numThreads = 1;
+    if (!FullSystem && workload.size() != 1)
+        panic("only one workload allowed");
+    return new TimingSimpleCPU(this);
+}
--- a/simulators/gem5/src/cpu/simple/timing.hh
+++ b/simulators/gem5/src/cpu/simple/timing.hh
@ -0,0 +1,299 @@
+/*
+ * Copyright (c) 2002-2005 The Regents of The University of Michigan
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Steve Reinhardt
+ */
+
+#ifndef __CPU_SIMPLE_TIMING_HH__
+#define __CPU_SIMPLE_TIMING_HH__
+
+#include "cpu/simple/base.hh"
+#include "cpu/translation.hh"
+#include "params/TimingSimpleCPU.hh"
+
+class TimingSimpleCPU : public BaseSimpleCPU
+{
+  public:
+
+    TimingSimpleCPU(TimingSimpleCPUParams * params);
+    virtual ~TimingSimpleCPU();
+
+    virtual void init();
+
+  public:
+    Event *drainEvent;
+
+  private:
+
+    /*
+     * If an access needs to be broken into fragments, currently at most two,
+     * the the following two classes are used as the sender state of the
+     * packets so the CPU can keep track of everything. In the main packet
+     * sender state, there's an array with a spot for each fragment. If a
+     * fragment has already been accepted by the CPU, aka isn't waiting for
+     * a retry, it's pointer is NULL. After each fragment has successfully
+     * been processed, the "outstanding" counter is decremented. Once the
+     * count is zero, the entire larger access is complete.
+     */
+    class SplitMainSenderState : public Packet::SenderState
+    {
+      public:
+        int outstanding;
+        PacketPtr fragments[2];
+
+        int
+        getPendingFragment()
+        {
+            if (fragments[0]) {
+                return 0;
+            } else if (fragments[1]) {
+                return 1;
+            } else {
+                return -1;
+            }
+        }
+    };
+
+    class SplitFragmentSenderState : public Packet::SenderState
+    {
+      public:
+        SplitFragmentSenderState(PacketPtr _bigPkt, int _index) :
+            bigPkt(_bigPkt), index(_index)
+        {}
+        PacketPtr bigPkt;
+        int index;
+
+        void
+        clearFromParent()
+        {
+            SplitMainSenderState * main_send_state =
+                dynamic_cast<SplitMainSenderState *>(bigPkt->senderState);
+            main_send_state->fragments[index] = NULL;
+        }
+    };
+
+    class FetchTranslation : public BaseTLB::Translation
+    {
+      protected:
+        TimingSimpleCPU *cpu;
+
+      public:
+        FetchTranslation(TimingSimpleCPU *_cpu)
+            : cpu(_cpu)
+        {}
+
+        void
+        markDelayed()
+        {
+            assert(cpu->_status == Running);
+            cpu->_status = ITBWaitResponse;
+        }
+
+        void
+        finish(Fault fault, RequestPtr req, ThreadContext *tc,
+               BaseTLB::Mode mode)
+        {
+            cpu->sendFetch(fault, req, tc);
+        }
+    };
+    FetchTranslation fetchTranslation;
+
+    void sendData(RequestPtr req, uint8_t *data, uint64_t *res, bool read);
+    void sendSplitData(RequestPtr req1, RequestPtr req2, RequestPtr req,
+                       uint8_t *data, bool read);
+
+    void translationFault(Fault fault);
+
+    void buildPacket(PacketPtr &pkt, RequestPtr req, bool read);
+    void buildSplitPacket(PacketPtr &pkt1, PacketPtr &pkt2,
+            RequestPtr req1, RequestPtr req2, RequestPtr req,
+            uint8_t *data, bool read);
+
+    bool handleReadPacket(PacketPtr pkt);
+    // This function always implicitly uses dcache_pkt.
+    bool handleWritePacket();
+
+    /**
+     * A TimingCPUPort overrides the default behaviour of the
+     * recvTiming and recvRetry and implements events for the
+     * scheduling of handling of incoming packets in the following
+     * cycle.
+     */
+    class TimingCPUPort : public CpuPort
+    {
+      public:
+
+        TimingCPUPort(const std::string& _name, TimingSimpleCPU* _cpu)
+            : CpuPort(_name, _cpu), cpu(_cpu), retryEvent(this)
+        { }
+
+      protected:
+
+        /**
+         * Snooping a coherence request, do nothing.
+         */
+        virtual void recvTimingSnoopReq(PacketPtr pkt) { }
+
+        TimingSimpleCPU* cpu;
+
+        struct TickEvent : public Event
+        {
+            PacketPtr pkt;
+            TimingSimpleCPU *cpu;
+
+            TickEvent(TimingSimpleCPU *_cpu) : pkt(NULL), cpu(_cpu) {}
+            const char *description() const { return "Timing CPU tick"; }
+            void schedule(PacketPtr _pkt, Tick t);
+        };
+
+        EventWrapper<Port, &Port::sendRetry> retryEvent;
+    };
+
+    class IcachePort : public TimingCPUPort
+    {
+      public:
+
+        IcachePort(TimingSimpleCPU *_cpu)
+            : TimingCPUPort(_cpu->name() + "-iport", _cpu),
+              tickEvent(_cpu)
+        { }
+
+      protected:
+
+        virtual bool recvTimingResp(PacketPtr pkt);
+
+        virtual void recvRetry();
+
+        struct ITickEvent : public TickEvent
+        {
+
+            ITickEvent(TimingSimpleCPU *_cpu)
+                : TickEvent(_cpu) {}
+            void process();
+            const char *description() const { return "Timing CPU icache tick"; }
+        };
+
+        ITickEvent tickEvent;
+
+    };
+
+    class DcachePort : public TimingCPUPort
+    {
+      public:
+
+        DcachePort(TimingSimpleCPU *_cpu)
+            : TimingCPUPort(_cpu->name() + "-dport", _cpu), tickEvent(_cpu)
+        { }
+
+      protected:
+
+        virtual bool recvTimingResp(PacketPtr pkt);
+
+        virtual void recvRetry();
+
+        struct DTickEvent : public TickEvent
+        {
+            DTickEvent(TimingSimpleCPU *_cpu)
+                : TickEvent(_cpu) {}
+            void process();
+            const char *description() const { return "Timing CPU dcache tick"; }
+        };
+
+        DTickEvent tickEvent;
+
+    };
+
+    IcachePort icachePort;
+    DcachePort dcachePort;
+
+    PacketPtr ifetch_pkt;
+    PacketPtr dcache_pkt;
+
+    Tick previousTick;
+
+  protected:
+
+     /** Return a reference to the data port. */
+    virtual CpuPort &getDataPort() { return dcachePort; }
+
+    /** Return a reference to the instruction port. */
+    virtual CpuPort &getInstPort() { return icachePort; }
+
+  public:
+
+    virtual void serialize(std::ostream &os);
+    virtual void unserialize(Checkpoint *cp, const std::string &section);
+
+    virtual unsigned int drain(Event *drain_event);
+    virtual void resume();
+
+    void switchOut();
+    void takeOverFrom(BaseCPU *oldCPU);
+
+    virtual void activateContext(ThreadID thread_num, int delay);
+    virtual void suspendContext(ThreadID thread_num);
+
+    Fault readMem(Addr addr, uint8_t *data, unsigned size, unsigned flags);
+
+    Fault writeMem(uint8_t *data, unsigned size,
+                   Addr addr, unsigned flags, uint64_t *res);
+
+    void fetch();
+    void sendFetch(Fault fault, RequestPtr req, ThreadContext *tc);
+    void completeIfetch(PacketPtr );
+    void completeDataAccess(PacketPtr pkt);
+    void advanceInst(Fault fault);
+
+    /**
+     * Print state of address in memory system via PrintReq (for
+     * debugging).
+     */
+    void printAddr(Addr a);
+
+    /**
+     * Finish a DTB translation.
+     * @param state The DTB translation state.
+     */
+    void finishTranslation(WholeTranslationState *state);
+
+  private:
+
+    typedef EventWrapper<TimingSimpleCPU, &TimingSimpleCPU::fetch> FetchEvent;
+    FetchEvent fetchEvent;
+
+    struct IprEvent : Event {
+        Packet *pkt;
+        TimingSimpleCPU *cpu;
+        IprEvent(Packet *_pkt, TimingSimpleCPU *_cpu, Tick t);
+        virtual void process();
+        virtual const char *description() const;
+    };
+
+    void completeDrain();
+};
+
+#endif // __CPU_SIMPLE_TIMING_HH__