Adding gem5 source to svn.

git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1819 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
This commit is contained in:
friemel
2012-10-24 19:18:57 +00:00
parent f7ff71bd46
commit b41eec3f65
3222 changed files with 658579 additions and 1 deletions

View File

@ -0,0 +1,40 @@
# Copyright (c) 2006-2007 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Kevin Lim
from m5.SimObject import SimObject
from m5.params import *
from FuncUnit import *
from FuncUnitConfig import *
class FUPool(SimObject):
type = 'FUPool'
FUList = VectorParam.FUDesc("list of FU's for this pool")
class DefaultFUPool(FUPool):
FUList = [ IntALU(), IntMultDiv(), FP_ALU(), FP_MultDiv(), ReadPort(),
SIMD_Unit(), WritePort(), RdWrPort(), IprPort() ]

View File

@ -0,0 +1,104 @@
# Copyright (c) 2010 ARM Limited
# All rights reserved.
#
# The license below extends only to copyright in the software and shall
# not be construed as granting a license to any other intellectual
# property including but not limited to intellectual property relating
# to a hardware implementation of the functionality of the software
# licensed hereunder. You may use the software subject to the license
# terms below provided that you ensure that this notice is replicated
# unmodified and in its entirety in all distributions of the software,
# modified or unmodified, in source code or in binary form.
#
# Copyright (c) 2006-2007 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Kevin Lim
from m5.SimObject import SimObject
from m5.params import *
from FuncUnit import *
class IntALU(FUDesc):
opList = [ OpDesc(opClass='IntAlu') ]
count = 6
class IntMultDiv(FUDesc):
opList = [ OpDesc(opClass='IntMult', opLat=3),
OpDesc(opClass='IntDiv', opLat=20, issueLat=19) ]
count=2
class FP_ALU(FUDesc):
opList = [ OpDesc(opClass='FloatAdd', opLat=2),
OpDesc(opClass='FloatCmp', opLat=2),
OpDesc(opClass='FloatCvt', opLat=2) ]
count = 4
class FP_MultDiv(FUDesc):
opList = [ OpDesc(opClass='FloatMult', opLat=4),
OpDesc(opClass='FloatDiv', opLat=12, issueLat=12),
OpDesc(opClass='FloatSqrt', opLat=24, issueLat=24) ]
count = 2
class SIMD_Unit(FUDesc):
opList = [ OpDesc(opClass='SimdAdd'),
OpDesc(opClass='SimdAddAcc'),
OpDesc(opClass='SimdAlu'),
OpDesc(opClass='SimdCmp'),
OpDesc(opClass='SimdCvt'),
OpDesc(opClass='SimdMisc'),
OpDesc(opClass='SimdMult'),
OpDesc(opClass='SimdMultAcc'),
OpDesc(opClass='SimdShift'),
OpDesc(opClass='SimdShiftAcc'),
OpDesc(opClass='SimdSqrt'),
OpDesc(opClass='SimdFloatAdd'),
OpDesc(opClass='SimdFloatAlu'),
OpDesc(opClass='SimdFloatCmp'),
OpDesc(opClass='SimdFloatCvt'),
OpDesc(opClass='SimdFloatDiv'),
OpDesc(opClass='SimdFloatMisc'),
OpDesc(opClass='SimdFloatMult'),
OpDesc(opClass='SimdFloatMultAcc'),
OpDesc(opClass='SimdFloatSqrt') ]
count = 4
class ReadPort(FUDesc):
opList = [ OpDesc(opClass='MemRead') ]
count = 0
class WritePort(FUDesc):
opList = [ OpDesc(opClass='MemWrite') ]
count = 0
class RdWrPort(FUDesc):
opList = [ OpDesc(opClass='MemRead'), OpDesc(opClass='MemWrite') ]
count = 4
class IprPort(FUDesc):
opList = [ OpDesc(opClass='IprAccess', opLat = 3, issueLat = 3) ]
count = 1

View File

@ -0,0 +1,147 @@
# Copyright (c) 2005-2007 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Kevin Lim
from m5.defines import buildEnv
from m5.params import *
from m5.proxy import *
from BaseCPU import BaseCPU
from FUPool import *
from O3Checker import O3Checker
class DerivO3CPU(BaseCPU):
type = 'DerivO3CPU'
activity = Param.Unsigned(0, "Initial count")
cachePorts = Param.Unsigned(200, "Cache Ports")
decodeToFetchDelay = Param.Unsigned(1, "Decode to fetch delay")
renameToFetchDelay = Param.Unsigned(1 ,"Rename to fetch delay")
iewToFetchDelay = Param.Unsigned(1, "Issue/Execute/Writeback to fetch "
"delay")
commitToFetchDelay = Param.Unsigned(1, "Commit to fetch delay")
fetchWidth = Param.Unsigned(8, "Fetch width")
renameToDecodeDelay = Param.Unsigned(1, "Rename to decode delay")
iewToDecodeDelay = Param.Unsigned(1, "Issue/Execute/Writeback to decode "
"delay")
commitToDecodeDelay = Param.Unsigned(1, "Commit to decode delay")
fetchToDecodeDelay = Param.Unsigned(1, "Fetch to decode delay")
decodeWidth = Param.Unsigned(8, "Decode width")
iewToRenameDelay = Param.Unsigned(1, "Issue/Execute/Writeback to rename "
"delay")
commitToRenameDelay = Param.Unsigned(1, "Commit to rename delay")
decodeToRenameDelay = Param.Unsigned(1, "Decode to rename delay")
renameWidth = Param.Unsigned(8, "Rename width")
commitToIEWDelay = Param.Unsigned(1, "Commit to "
"Issue/Execute/Writeback delay")
renameToIEWDelay = Param.Unsigned(2, "Rename to "
"Issue/Execute/Writeback delay")
issueToExecuteDelay = Param.Unsigned(1, "Issue to execute delay (internal "
"to the IEW stage)")
dispatchWidth = Param.Unsigned(8, "Dispatch width")
issueWidth = Param.Unsigned(8, "Issue width")
wbWidth = Param.Unsigned(8, "Writeback width")
wbDepth = Param.Unsigned(1, "Writeback depth")
fuPool = Param.FUPool(DefaultFUPool(), "Functional Unit pool")
iewToCommitDelay = Param.Unsigned(1, "Issue/Execute/Writeback to commit "
"delay")
renameToROBDelay = Param.Unsigned(1, "Rename to reorder buffer delay")
commitWidth = Param.Unsigned(8, "Commit width")
squashWidth = Param.Unsigned(8, "Squash width")
trapLatency = Param.Tick(13, "Trap latency")
fetchTrapLatency = Param.Tick(1, "Fetch trap latency")
backComSize = Param.Unsigned(5, "Time buffer size for backwards communication")
forwardComSize = Param.Unsigned(5, "Time buffer size for forward communication")
predType = Param.String("tournament", "Branch predictor type ('local', 'tournament')")
localPredictorSize = Param.Unsigned(2048, "Size of local predictor")
localCtrBits = Param.Unsigned(2, "Bits per counter")
localHistoryTableSize = Param.Unsigned(2048, "Size of local history table")
localHistoryBits = Param.Unsigned(11, "Bits for the local history")
globalPredictorSize = Param.Unsigned(8192, "Size of global predictor")
globalCtrBits = Param.Unsigned(2, "Bits per counter")
globalHistoryBits = Param.Unsigned(13, "Bits of history")
choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor")
choiceCtrBits = Param.Unsigned(2, "Bits of choice counters")
BTBEntries = Param.Unsigned(4096, "Number of BTB entries")
BTBTagSize = Param.Unsigned(16, "Size of the BTB tags, in bits")
RASSize = Param.Unsigned(16, "RAS size")
LQEntries = Param.Unsigned(32, "Number of load queue entries")
SQEntries = Param.Unsigned(32, "Number of store queue entries")
LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check")
LSQCheckLoads = Param.Bool(True,
"Should dependency violations be checked for loads & stores or just stores")
store_set_clear_period = Param.Unsigned(250000,
"Number of load/store insts before the dep predictor should be invalidated")
LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
SSITSize = Param.Unsigned(1024, "Store set ID table size")
numRobs = Param.Unsigned(1, "Number of Reorder Buffers");
numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers")
numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point "
"registers")
numIQEntries = Param.Unsigned(64, "Number of instruction queue entries")
numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries")
instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by")
smtNumFetchingThreads = Param.Unsigned(1, "SMT Number of Fetching Threads")
smtFetchPolicy = Param.String('SingleThread', "SMT Fetch policy")
smtLSQPolicy = Param.String('Partitioned', "SMT LSQ Sharing Policy")
smtLSQThreshold = Param.Int(100, "SMT LSQ Threshold Sharing Parameter")
smtIQPolicy = Param.String('Partitioned', "SMT IQ Sharing Policy")
smtIQThreshold = Param.Int(100, "SMT IQ Threshold Sharing Parameter")
smtROBPolicy = Param.String('Partitioned', "SMT ROB Sharing Policy")
smtROBThreshold = Param.Int(100, "SMT ROB Threshold Sharing Parameter")
smtCommitPolicy = Param.String('RoundRobin', "SMT Commit Policy")
needsTSO = Param.Bool(buildEnv['TARGET_ISA'] == 'x86',
"Enable TSO Memory model")
def addCheckerCpu(self):
if buildEnv['TARGET_ISA'] in ['arm']:
from ArmTLB import ArmTLB
self.checker = O3Checker(workload=self.workload,
exitOnError=False,
updateOnError=True,
warnOnlyOnLoadError=True)
self.checker.itb = ArmTLB(size = self.itb.size)
self.checker.dtb = ArmTLB(size = self.dtb.size)
else:
print "ERROR: Checker only supported under ARM ISA!"
exit(1)

View File

@ -0,0 +1,40 @@
# Copyright (c) 2007 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Nathan Binkert
from m5.params import *
from BaseCPU import BaseCPU
class O3Checker(BaseCPU):
type = 'O3Checker'
exitOnError = Param.Bool(False, "Exit on an error")
updateOnError = Param.Bool(False,
"Update the checker with the main CPU's state on an error")
warnOnlyOnLoadError = Param.Bool(True,
"If a load result is incorrect, only print a warning and do not exit")
function_trace = Param.Bool(False, "Enable function trace")
function_trace_start = Param.Tick(0, "Cycle to start function trace")

View File

@ -0,0 +1,82 @@
# -*- mode:python -*-
# Copyright (c) 2006 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Nathan Binkert
import sys
Import('*')
if 'O3CPU' in env['CPU_MODELS'] or 'OzoneCPU' in env['CPU_MODELS']:
DebugFlag('CommitRate')
DebugFlag('IEW')
DebugFlag('IQ')
if 'O3CPU' in env['CPU_MODELS']:
SimObject('FUPool.py')
SimObject('FuncUnitConfig.py')
SimObject('O3CPU.py')
Source('base_dyn_inst.cc')
Source('bpred_unit.cc')
Source('commit.cc')
Source('cpu.cc')
Source('cpu_builder.cc')
Source('decode.cc')
Source('dyn_inst.cc')
Source('fetch.cc')
Source('free_list.cc')
Source('fu_pool.cc')
Source('iew.cc')
Source('inst_queue.cc')
Source('lsq.cc')
Source('lsq_unit.cc')
Source('mem_dep_unit.cc')
Source('rename.cc')
Source('rename_map.cc')
Source('rob.cc')
Source('scoreboard.cc')
Source('store_set.cc')
Source('thread_context.cc')
DebugFlag('LSQ')
DebugFlag('LSQUnit')
DebugFlag('MemDepUnit')
DebugFlag('O3CPU')
DebugFlag('ROB')
DebugFlag('Rename')
DebugFlag('Scoreboard')
DebugFlag('StoreSet')
DebugFlag('Writeback')
CompoundFlag('O3CPUAll', [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit',
'IQ', 'ROB', 'FreeList', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit',
'DynInst', 'O3CPU', 'Activity', 'Scoreboard', 'Writeback' ])
SimObject('O3Checker.py')
Source('checker_builder.cc')

View File

@ -0,0 +1,36 @@
# -*- mode:python -*-
# Copyright (c) 2006 The Regents of The University of Michigan
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met: redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer;
# redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution;
# neither the name of the copyright holders nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# Authors: Nathan Binkert
Import('*')
CpuModel('O3CPU', 'o3_cpu_exec.cc',
'#include "cpu/o3/isa_specific.hh"',
{ 'CPU_exec_context': 'O3DynInst' },
default=True)

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "cpu/o3/cpu.hh"
#include "cpu/o3/isa_specific.hh"
#include "cpu/base_dyn_inst_impl.hh"
// Explicit instantiation
template class BaseDynInst<O3CPUImpl>;

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "cpu/o3/bpred_unit_impl.hh"
#include "cpu/o3/isa_specific.hh"
template class BPredUnit<O3CPUImpl>;

View File

@ -0,0 +1,294 @@
/*
* Copyright (c) 2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_BPRED_UNIT_HH__
#define __CPU_O3_BPRED_UNIT_HH__
#include <list>
#include "base/statistics.hh"
#include "base/types.hh"
#include "cpu/pred/2bit_local.hh"
#include "cpu/pred/btb.hh"
#include "cpu/pred/ras.hh"
#include "cpu/pred/tournament.hh"
#include "cpu/inst_seq.hh"
struct DerivO3CPUParams;
/**
* Basically a wrapper class to hold both the branch predictor
* and the BTB.
*/
template<class Impl>
class BPredUnit
{
private:
typedef typename Impl::DynInstPtr DynInstPtr;
enum PredType {
Local,
Tournament
};
PredType predictor;
const std::string _name;
public:
/**
* @param params The params object, that has the size of the BP and BTB.
*/
BPredUnit(DerivO3CPUParams *params);
const std::string &name() const { return _name; }
/**
* Registers statistics.
*/
void regStats();
void switchOut();
void takeOverFrom();
/**
* Predicts whether or not the instruction is a taken branch, and the
* target of the branch if it is taken.
* @param inst The branch instruction.
* @param PC The predicted PC is passed back through this parameter.
* @param tid The thread id.
* @return Returns if the branch is taken or not.
*/
bool predict(DynInstPtr &inst, TheISA::PCState &pc, ThreadID tid);
// @todo: Rename this function.
void BPUncond(void * &bp_history);
/**
* Tells the branch predictor to commit any updates until the given
* sequence number.
* @param done_sn The sequence number to commit any older updates up until.
* @param tid The thread id.
*/
void update(const InstSeqNum &done_sn, ThreadID tid);
/**
* Squashes all outstanding updates until a given sequence number.
* @param squashed_sn The sequence number to squash any younger updates up
* until.
* @param tid The thread id.
*/
void squash(const InstSeqNum &squashed_sn, ThreadID tid);
/**
* Squashes all outstanding updates until a given sequence number, and
* corrects that sn's update with the proper address and taken/not taken.
* @param squashed_sn The sequence number to squash any younger updates up
* until.
* @param corr_target The correct branch target.
* @param actually_taken The correct branch direction.
* @param tid The thread id.
*/
void squash(const InstSeqNum &squashed_sn,
const TheISA::PCState &corr_target,
bool actually_taken, ThreadID tid);
/**
* @param bp_history Pointer to the history object. The predictor
* will need to update any state and delete the object.
*/
void BPSquash(void *bp_history);
/**
* Looks up a given PC in the BP to see if it is taken or not taken.
* @param inst_PC The PC to look up.
* @param bp_history Pointer that will be set to an object that
* has the branch predictor state associated with the lookup.
* @return Whether the branch is taken or not taken.
*/
bool BPLookup(Addr instPC, void * &bp_history);
/**
* If a branch is not taken, because the BTB address is invalid or missing,
* this function sets the appropriate counter in the global and local
* predictors to not taken.
* @param inst_PC The PC to look up the local predictor.
* @param bp_history Pointer that will be set to an object that
* has the branch predictor state associated with the lookup.
*/
void BPBTBUpdate(Addr instPC, void * &bp_history);
/**
* Looks up a given PC in the BTB to see if a matching entry exists.
* @param inst_PC The PC to look up.
* @return Whether the BTB contains the given PC.
*/
bool BTBValid(Addr instPC)
{ return BTB.valid(instPC, 0); }
/**
* Looks up a given PC in the BTB to get the predicted target.
* @param inst_PC The PC to look up.
* @return The address of the target of the branch.
*/
TheISA::PCState BTBLookup(Addr instPC)
{ return BTB.lookup(instPC, 0); }
/**
* Updates the BP with taken/not taken information.
* @param inst_PC The branch's PC that will be updated.
* @param taken Whether the branch was taken or not taken.
* @param bp_history Pointer to the branch predictor state that is
* associated with the branch lookup that is being updated.
* @param squashed Set to true when this function is called during a
* squash operation.
* @todo Make this update flexible enough to handle a global predictor.
*/
void BPUpdate(Addr instPC, bool taken, void *bp_history, bool squashed);
/**
* Updates the BTB with the target of a branch.
* @param inst_PC The branch's PC that will be updated.
* @param target_PC The branch's target that will be added to the BTB.
*/
void BTBUpdate(Addr instPC, const TheISA::PCState &target)
{ BTB.update(instPC, target, 0); }
void dump();
private:
struct PredictorHistory {
/**
* Makes a predictor history struct that contains any
* information needed to update the predictor, BTB, and RAS.
*/
PredictorHistory(const InstSeqNum &seq_num, Addr instPC,
bool pred_taken, void *bp_history,
ThreadID _tid)
: seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0),
RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0),
wasCall(0), wasReturn(0), validBTB(0)
{}
bool operator==(const PredictorHistory &entry) const {
return this->seqNum == entry.seqNum;
}
/** The sequence number for the predictor history entry. */
InstSeqNum seqNum;
/** The PC associated with the sequence number. */
Addr pc;
/** Pointer to the history object passed back from the branch
* predictor. It is used to update or restore state of the
* branch predictor.
*/
void *bpHistory;
/** The RAS target (only valid if a return). */
TheISA::PCState RASTarget;
/** The RAS index of the instruction (only valid if a call). */
unsigned RASIndex;
/** The thread id. */
ThreadID tid;
/** Whether or not it was predicted taken. */
bool predTaken;
/** Whether or not the RAS was used. */
bool usedRAS;
/** Whether or not the instruction was a call. */
bool wasCall;
/** Whether or not the instruction was a return. */
bool wasReturn;
/** Whether or not the instruction had a valid BTB entry. */
bool validBTB;
};
typedef std::list<PredictorHistory> History;
typedef typename History::iterator HistoryIt;
/**
* The per-thread predictor history. This is used to update the predictor
* as instructions are committed, or restore it to the proper state after
* a squash.
*/
History predHist[Impl::MaxThreads];
/** The local branch predictor. */
LocalBP *localBP;
/** The tournament branch predictor. */
TournamentBP *tournamentBP;
/** The BTB. */
DefaultBTB BTB;
/** The per-thread return address stack. */
ReturnAddrStack RAS[Impl::MaxThreads];
/** Stat for number of BP lookups. */
Stats::Scalar lookups;
/** Stat for number of conditional branches predicted. */
Stats::Scalar condPredicted;
/** Stat for number of conditional branches predicted incorrectly. */
Stats::Scalar condIncorrect;
/** Stat for number of BTB lookups. */
Stats::Scalar BTBLookups;
/** Stat for number of BTB hits. */
Stats::Scalar BTBHits;
/** Stat for number of times the BTB is correct. */
Stats::Scalar BTBCorrect;
/** Stat for number of times the RAS is used to get a target. */
Stats::Scalar usedRAS;
/** Stat for number of times the RAS is incorrect. */
Stats::Scalar RASIncorrect;
};
#endif // __CPU_O3_BPRED_UNIT_HH__

View File

@ -0,0 +1,517 @@
/*
* Copyright (c) 2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include <algorithm>
#include "arch/isa_traits.hh"
#include "arch/types.hh"
#include "arch/utility.hh"
#include "base/trace.hh"
#include "config/the_isa.hh"
#include "cpu/o3/bpred_unit.hh"
#include "debug/Fetch.hh"
#include "params/DerivO3CPU.hh"
template<class Impl>
BPredUnit<Impl>::BPredUnit(DerivO3CPUParams *params)
: _name(params->name + ".BPredUnit"),
BTB(params->BTBEntries,
params->BTBTagSize,
params->instShiftAmt)
{
// Setup the selected predictor.
if (params->predType == "local") {
localBP = new LocalBP(params->localPredictorSize,
params->localCtrBits,
params->instShiftAmt);
predictor = Local;
} else if (params->predType == "tournament") {
tournamentBP = new TournamentBP(params->localPredictorSize,
params->localCtrBits,
params->localHistoryTableSize,
params->localHistoryBits,
params->globalPredictorSize,
params->globalHistoryBits,
params->globalCtrBits,
params->choicePredictorSize,
params->choiceCtrBits,
params->instShiftAmt);
predictor = Tournament;
} else {
fatal("Invalid BP selected!");
}
for (int i=0; i < Impl::MaxThreads; i++)
RAS[i].init(params->RASSize);
}
template <class Impl>
void
BPredUnit<Impl>::regStats()
{
lookups
.name(name() + ".lookups")
.desc("Number of BP lookups")
;
condPredicted
.name(name() + ".condPredicted")
.desc("Number of conditional branches predicted")
;
condIncorrect
.name(name() + ".condIncorrect")
.desc("Number of conditional branches incorrect")
;
BTBLookups
.name(name() + ".BTBLookups")
.desc("Number of BTB lookups")
;
BTBHits
.name(name() + ".BTBHits")
.desc("Number of BTB hits")
;
BTBCorrect
.name(name() + ".BTBCorrect")
.desc("Number of correct BTB predictions (this stat may not "
"work properly.")
;
usedRAS
.name(name() + ".usedRAS")
.desc("Number of times the RAS was used to get a target.")
;
RASIncorrect
.name(name() + ".RASInCorrect")
.desc("Number of incorrect RAS predictions.")
;
}
template <class Impl>
void
BPredUnit<Impl>::switchOut()
{
// Clear any state upon switch out.
for (int i = 0; i < Impl::MaxThreads; ++i) {
squash(0, i);
}
}
template <class Impl>
void
BPredUnit<Impl>::takeOverFrom()
{
// Can reset all predictor state, but it's not necessarily better
// than leaving it be.
/*
for (int i = 0; i < Impl::MaxThreads; ++i)
RAS[i].reset();
BP.reset();
BTB.reset();
*/
}
template <class Impl>
bool
BPredUnit<Impl>::predict(DynInstPtr &inst, TheISA::PCState &pc, ThreadID tid)
{
// See if branch predictor predicts taken.
// If so, get its target addr either from the BTB or the RAS.
// Save off record of branch stuff so the RAS can be fixed
// up once it's done.
bool pred_taken = false;
TheISA::PCState target = pc;
++lookups;
void *bp_history = NULL;
if (inst->isUncondCtrl()) {
DPRINTF(Fetch, "BranchPred: [tid:%i]: Unconditional control.\n", tid);
pred_taken = true;
// Tell the BP there was an unconditional branch.
BPUncond(bp_history);
} else {
++condPredicted;
pred_taken = BPLookup(pc.instAddr(), bp_history);
DPRINTF(Fetch, "BranchPred:[tid:%i]: [sn:%i] Branch predictor"
" predicted %i for PC %s\n",
tid, inst->seqNum, pred_taken, inst->pcState());
}
DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i] Creating prediction history "
"for PC %s\n",
tid, inst->seqNum, inst->pcState());
PredictorHistory predict_record(inst->seqNum, pc.instAddr(),
pred_taken, bp_history, tid);
// Now lookup in the BTB or RAS.
if (pred_taken) {
if (inst->isReturn()) {
++usedRAS;
predict_record.wasReturn = true;
// If it's a function return call, then look up the address
// in the RAS.
TheISA::PCState rasTop = RAS[tid].top();
target = TheISA::buildRetPC(pc, rasTop);
// Record the top entry of the RAS, and its index.
predict_record.usedRAS = true;
predict_record.RASIndex = RAS[tid].topIdx();
predict_record.RASTarget = rasTop;
assert(predict_record.RASIndex < 16);
RAS[tid].pop();
DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %s is a return, "
"RAS predicted target: %s, RAS index: %i.\n",
tid, inst->pcState(), target, predict_record.RASIndex);
} else {
++BTBLookups;
if (inst->isCall()) {
RAS[tid].push(pc);
// Record that it was a call so that the top RAS entry can
// be popped off if the speculation is incorrect.
predict_record.wasCall = true;
DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %s was a "
"call, adding %s to the RAS index: %i.\n",
tid, inst->pcState(), pc, RAS[tid].topIdx());
}
if (BTB.valid(pc.instAddr(), tid)) {
++BTBHits;
predict_record.validBTB = true;
// If it's not a return, use the BTB to get the target addr.
target = BTB.lookup(pc.instAddr(), tid);
DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %s predicted"
" target is %s.\n", tid, inst->pcState(), target);
} else {
DPRINTF(Fetch, "BranchPred: [tid:%i]: BTB doesn't have a "
"valid entry.\n",tid);
pred_taken = false;
// The Direction of the branch predictor is altered because the
// BTB did not have an entry
// The predictor needs to be updated accordingly
if (!inst->isCall() && !inst->isReturn()) {
BPBTBUpdate(pc.instAddr(), bp_history);
DPRINTF(Fetch, "BranchPred: [tid:%i]:[sn:%i] BPBTBUpdate"
" called for %s\n",
tid, inst->seqNum, inst->pcState());
} else if (inst->isCall() && !inst->isUncondCtrl()) {
RAS[tid].pop();
}
TheISA::advancePC(target, inst->staticInst);
}
}
} else {
if (inst->isReturn()) {
predict_record.wasReturn = true;
}
TheISA::advancePC(target, inst->staticInst);
}
pc = target;
predHist[tid].push_front(predict_record);
DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i]: History entry added."
"predHist.size(): %i\n", tid, inst->seqNum, predHist[tid].size());
return pred_taken;
}
template <class Impl>
void
BPredUnit<Impl>::update(const InstSeqNum &done_sn, ThreadID tid)
{
DPRINTF(Fetch, "BranchPred: [tid:%i]: Committing branches until "
"[sn:%lli].\n", tid, done_sn);
while (!predHist[tid].empty() &&
predHist[tid].back().seqNum <= done_sn) {
// Update the branch predictor with the correct results.
BPUpdate(predHist[tid].back().pc,
predHist[tid].back().predTaken,
predHist[tid].back().bpHistory, false);
predHist[tid].pop_back();
}
}
template <class Impl>
void
BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, ThreadID tid)
{
History &pred_hist = predHist[tid];
while (!pred_hist.empty() &&
pred_hist.front().seqNum > squashed_sn) {
if (pred_hist.front().usedRAS) {
DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i,"
" target: %s.\n", tid,
pred_hist.front().RASIndex, pred_hist.front().RASTarget);
RAS[tid].restore(pred_hist.front().RASIndex,
pred_hist.front().RASTarget);
} else if(pred_hist.front().wasCall && pred_hist.front().validBTB) {
// Was a call but predicated false. Pop RAS here
DPRINTF(Fetch, "BranchPred: [tid: %i] Squashing"
" Call [sn:%i] PC: %s Popping RAS\n", tid,
pred_hist.front().seqNum, pred_hist.front().pc);
RAS[tid].pop();
}
// This call should delete the bpHistory.
BPSquash(pred_hist.front().bpHistory);
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] "
"PC %s.\n", tid, pred_hist.front().seqNum,
pred_hist.front().pc);
pred_hist.pop_front();
DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n",
tid, predHist[tid].size());
}
}
template <class Impl>
void
BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
const TheISA::PCState &corrTarget,
bool actually_taken,
ThreadID tid)
{
// Now that we know that a branch was mispredicted, we need to undo
// all the branches that have been seen up until this branch and
// fix up everything.
// NOTE: This should be call conceivably in 2 scenarios:
// (1) After an branch is executed, it updates its status in the ROB
// The commit stage then checks the ROB update and sends a signal to
// the fetch stage to squash history after the mispredict
// (2) In the decode stage, you can find out early if a unconditional
// PC-relative, branch was predicted incorrectly. If so, a signal
// to the fetch stage is sent to squash history after the mispredict
History &pred_hist = predHist[tid];
++condIncorrect;
DPRINTF(Fetch, "BranchPred: [tid:%i]: Squashing from sequence number %i, "
"setting target to %s.\n",
tid, squashed_sn, corrTarget);
// Squash All Branches AFTER this mispredicted branch
squash(squashed_sn, tid);
// If there's a squash due to a syscall, there may not be an entry
// corresponding to the squash. In that case, don't bother trying to
// fix up the entry.
if (!pred_hist.empty()) {
HistoryIt hist_it = pred_hist.begin();
//HistoryIt hist_it = find(pred_hist.begin(), pred_hist.end(),
// squashed_sn);
//assert(hist_it != pred_hist.end());
if (pred_hist.front().seqNum != squashed_sn) {
DPRINTF(Fetch, "Front sn %i != Squash sn %i\n",
pred_hist.front().seqNum, squashed_sn);
assert(pred_hist.front().seqNum == squashed_sn);
}
if ((*hist_it).usedRAS) {
++RASIncorrect;
}
BPUpdate((*hist_it).pc, actually_taken,
pred_hist.front().bpHistory, true);
if (actually_taken) {
if (hist_it->wasReturn && !hist_it->usedRAS) {
DPRINTF(Fetch, "BranchPred: [tid: %i] Incorrectly predicted"
" return [sn:%i] PC: %s\n", tid, hist_it->seqNum,
hist_it->pc);
RAS[tid].pop();
}
DPRINTF(Fetch,"BranchPred: [tid: %i] BTB Update called for [sn:%i]"
" PC: %s\n", tid,hist_it->seqNum, hist_it->pc);
BTB.update((*hist_it).pc, corrTarget, tid);
} else {
//Actually not Taken
if (hist_it->usedRAS) {
DPRINTF(Fetch,"BranchPred: [tid: %i] Incorrectly predicted"
" return [sn:%i] PC: %s Restoring RAS\n", tid,
hist_it->seqNum, hist_it->pc);
DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS"
" to: %i, target: %s.\n", tid,
hist_it->RASIndex, hist_it->RASTarget);
RAS[tid].restore(hist_it->RASIndex, hist_it->RASTarget);
} else if (hist_it->wasCall && hist_it->validBTB) {
//Was a Call but predicated false. Pop RAS here
DPRINTF(Fetch, "BranchPred: [tid: %i] Incorrectly predicted"
" Call [sn:%i] PC: %s Popping RAS\n", tid,
hist_it->seqNum, hist_it->pc);
RAS[tid].pop();
}
}
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i]"
" PC %s Actually Taken: %i\n", tid, hist_it->seqNum,
hist_it->pc, actually_taken);
pred_hist.erase(hist_it);
DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid,
predHist[tid].size());
}
}
template <class Impl>
void
BPredUnit<Impl>::BPUncond(void * &bp_history)
{
// Only the tournament predictor cares about unconditional branches.
if (predictor == Tournament) {
tournamentBP->uncondBr(bp_history);
}
}
template <class Impl>
void
BPredUnit<Impl>::BPSquash(void *bp_history)
{
if (predictor == Local) {
localBP->squash(bp_history);
} else if (predictor == Tournament) {
tournamentBP->squash(bp_history);
} else {
panic("Predictor type is unexpected value!");
}
}
template <class Impl>
bool
BPredUnit<Impl>::BPLookup(Addr instPC, void * &bp_history)
{
if (predictor == Local) {
return localBP->lookup(instPC, bp_history);
} else if (predictor == Tournament) {
return tournamentBP->lookup(instPC, bp_history);
} else {
panic("Predictor type is unexpected value!");
}
}
template <class Impl>
void
BPredUnit<Impl>::BPBTBUpdate(Addr instPC, void * &bp_history)
{
if (predictor == Local) {
return localBP->BTBUpdate(instPC, bp_history);
} else if (predictor == Tournament) {
return tournamentBP->BTBUpdate(instPC, bp_history);
} else {
panic("Predictor type is unexpected value!");
}
}
template <class Impl>
void
BPredUnit<Impl>::BPUpdate(Addr instPC, bool taken, void *bp_history,
bool squashed)
{
if (predictor == Local) {
localBP->update(instPC, taken, bp_history);
} else if (predictor == Tournament) {
tournamentBP->update(instPC, taken, bp_history, squashed);
} else {
panic("Predictor type is unexpected value!");
}
}
template <class Impl>
void
BPredUnit<Impl>::dump()
{
HistoryIt pred_hist_it;
for (int i = 0; i < Impl::MaxThreads; ++i) {
if (!predHist[i].empty()) {
pred_hist_it = predHist[i].begin();
cprintf("predHist[%i].size(): %i\n", i, predHist[i].size());
while (pred_hist_it != predHist[i].end()) {
cprintf("[sn:%lli], PC:%#x, tid:%i, predTaken:%i, "
"bpHistory:%#x\n",
pred_hist_it->seqNum, pred_hist_it->pc,
pred_hist_it->tid, pred_hist_it->predTaken,
pred_hist_it->bpHistory);
pred_hist_it++;
}
cprintf("\n");
}
}
}

View File

@ -0,0 +1,110 @@
/*
* Copyright (c) 2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include <string>
#include "cpu/checker/cpu_impl.hh"
#include "cpu/o3/dyn_inst.hh"
#include "cpu/o3/impl.hh"
#include "cpu/inst_seq.hh"
#include "params/O3Checker.hh"
#include "sim/process.hh"
#include "sim/sim_object.hh"
class MemObject;
template
class Checker<O3CPUImpl>;
/**
* Specific non-templated derived class used for SimObject configuration.
*/
class O3Checker : public Checker<O3CPUImpl>
{
public:
O3Checker(Params *p)
: Checker<O3CPUImpl>(p)
{ }
};
////////////////////////////////////////////////////////////////////////
//
// CheckerCPU Simulation Object
//
O3Checker *
O3CheckerParams::create()
{
O3Checker::Params *params = new O3Checker::Params();
params->name = name;
params->numThreads = numThreads;
params->max_insts_any_thread = 0;
params->max_insts_all_threads = 0;
params->max_loads_any_thread = 0;
params->max_loads_all_threads = 0;
params->exitOnError = exitOnError;
params->updateOnError = updateOnError;
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
params->clock = clock;
params->tracer = tracer;
// Hack to touch all parameters. Consider not deriving Checker
// from BaseCPU..it's not really a CPU in the end.
Counter temp;
temp = max_insts_any_thread;
temp = max_insts_all_threads;
temp = max_loads_any_thread;
temp = max_loads_all_threads;
temp++;
Tick temp2 = progress_interval;
params->progress_interval = 0;
temp2++;
params->itb = itb;
params->dtb = dtb;
params->system = system;
params->cpu_id = cpu_id;
params->profile = profile;
params->interrupts = NULL;
params->workload = workload;
O3Checker *cpu = new O3Checker(params);
return cpu;
}

View File

@ -0,0 +1,222 @@
/*
* Copyright (c) 2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_COMM_HH__
#define __CPU_O3_COMM_HH__
#include <vector>
#include "arch/types.hh"
#include "base/types.hh"
#include "cpu/inst_seq.hh"
#include "sim/faults.hh"
// Typedef for physical register index type. Although the Impl would be the
// most likely location for this, there are a few classes that need this
// typedef yet are not templated on the Impl. For now it will be defined here.
typedef short int PhysRegIndex;
/** Struct that defines the information passed from fetch to decode. */
template<class Impl>
struct DefaultFetchDefaultDecode {
typedef typename Impl::DynInstPtr DynInstPtr;
int size;
DynInstPtr insts[Impl::MaxWidth];
Fault fetchFault;
InstSeqNum fetchFaultSN;
bool clearFetchFault;
};
/** Struct that defines the information passed from decode to rename. */
template<class Impl>
struct DefaultDecodeDefaultRename {
typedef typename Impl::DynInstPtr DynInstPtr;
int size;
DynInstPtr insts[Impl::MaxWidth];
};
/** Struct that defines the information passed from rename to IEW. */
template<class Impl>
struct DefaultRenameDefaultIEW {
typedef typename Impl::DynInstPtr DynInstPtr;
int size;
DynInstPtr insts[Impl::MaxWidth];
};
/** Struct that defines the information passed from IEW to commit. */
template<class Impl>
struct DefaultIEWDefaultCommit {
typedef typename Impl::DynInstPtr DynInstPtr;
int size;
DynInstPtr insts[Impl::MaxWidth];
DynInstPtr mispredictInst[Impl::MaxThreads];
Addr mispredPC[Impl::MaxThreads];
InstSeqNum squashedSeqNum[Impl::MaxThreads];
TheISA::PCState pc[Impl::MaxThreads];
bool squash[Impl::MaxThreads];
bool branchMispredict[Impl::MaxThreads];
bool branchTaken[Impl::MaxThreads];
bool includeSquashInst[Impl::MaxThreads];
};
template<class Impl>
struct IssueStruct {
typedef typename Impl::DynInstPtr DynInstPtr;
int size;
DynInstPtr insts[Impl::MaxWidth];
};
/** Struct that defines all backwards communication. */
template<class Impl>
struct TimeBufStruct {
typedef typename Impl::DynInstPtr DynInstPtr;
struct decodeComm {
uint64_t branchAddr;
InstSeqNum doneSeqNum;
DynInstPtr mispredictInst;
DynInstPtr squashInst;
Addr mispredPC;
TheISA::PCState nextPC;
unsigned branchCount;
bool squash;
bool predIncorrect;
bool branchMispredict;
bool branchTaken;
};
decodeComm decodeInfo[Impl::MaxThreads];
struct renameComm {
};
renameComm renameInfo[Impl::MaxThreads];
struct iewComm {
// Also eventually include skid buffer space.
bool usedIQ;
unsigned freeIQEntries;
bool usedLSQ;
unsigned freeLSQEntries;
unsigned iqCount;
unsigned ldstqCount;
unsigned dispatched;
unsigned dispatchedToLSQ;
};
iewComm iewInfo[Impl::MaxThreads];
struct commitComm {
/////////////// For Decode, IEW, Rename, Fetch ///////////
bool squash;
bool robSquashing;
////////// For Fetch & IEW /////////////
// Represents the instruction that has either been retired or
// squashed. Similar to having a single bus that broadcasts the
// retired or squashed sequence number.
InstSeqNum doneSeqNum;
////////////// For Rename /////////////////
// Rename should re-read number of free rob entries
bool usedROB;
// Notify Rename that the ROB is empty
bool emptyROB;
// Tell Rename how many free entries it has in the ROB
unsigned freeROBEntries;
///////////// For Fetch //////////////////
// Provide fetch the instruction that mispredicted, if this
// pointer is not-null a misprediction occured
DynInstPtr mispredictInst;
// Was the branch taken or not
bool branchTaken;
// The pc of the next instruction to execute. This is the next
// instruction for a branch mispredict, but the same instruction for
// order violation and the like
TheISA::PCState pc;
// Instruction that caused the a non-mispredict squash
DynInstPtr squashInst;
// If an interrupt is pending and fetch should stall
bool interruptPending;
// If the interrupt ended up being cleared before being handled
bool clearInterrupt;
//////////// For IEW //////////////////
// Communication specifically to the IQ to tell the IQ that it can
// schedule a non-speculative instruction.
InstSeqNum nonSpecSeqNum;
// Hack for now to send back an uncached access to the IEW stage.
bool uncached;
DynInstPtr uncachedLoad;
};
commitComm commitInfo[Impl::MaxThreads];
bool decodeBlock[Impl::MaxThreads];
bool decodeUnblock[Impl::MaxThreads];
bool renameBlock[Impl::MaxThreads];
bool renameUnblock[Impl::MaxThreads];
bool iewBlock[Impl::MaxThreads];
bool iewUnblock[Impl::MaxThreads];
bool commitBlock[Impl::MaxThreads];
bool commitUnblock[Impl::MaxThreads];
};
#endif //__CPU_O3_COMM_HH__

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "cpu/o3/commit_impl.hh"
#include "cpu/o3/isa_specific.hh"
template class DefaultCommit<O3CPUImpl>;

View File

@ -0,0 +1,497 @@
/*
* Copyright (c) 2010 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Korey Sewell
*/
#ifndef __CPU_O3_COMMIT_HH__
#define __CPU_O3_COMMIT_HH__
#include <queue>
#include "base/statistics.hh"
#include "cpu/exetrace.hh"
#include "cpu/inst_seq.hh"
#include "cpu/timebuf.hh"
struct DerivO3CPUParams;
template <class>
struct O3ThreadState;
/**
* DefaultCommit handles single threaded and SMT commit. Its width is
* specified by the parameters; each cycle it tries to commit that
* many instructions. The SMT policy decides which thread it tries to
* commit instructions from. Non- speculative instructions must reach
* the head of the ROB before they are ready to execute; once they
* reach the head, commit will broadcast the instruction's sequence
* number to the previous stages so that they can issue/ execute the
* instruction. Only one non-speculative instruction is handled per
* cycle. Commit is responsible for handling all back-end initiated
* redirects. It receives the redirect, and then broadcasts it to all
* stages, indicating the sequence number they should squash until,
* and any necessary branch misprediction information as well. It
* priortizes redirects by instruction's age, only broadcasting a
* redirect if it corresponds to an instruction that should currently
* be in the ROB. This is done by tracking the sequence number of the
* youngest instruction in the ROB, which gets updated to any
* squashing instruction's sequence number, and only broadcasting a
* redirect if it corresponds to an older instruction. Commit also
* supports multiple cycle squashing, to model a ROB that can only
* remove a certain number of instructions per cycle.
*/
template<class Impl>
class DefaultCommit
{
public:
// Typedefs from the Impl.
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::CPUPol CPUPol;
typedef typename CPUPol::RenameMap RenameMap;
typedef typename CPUPol::ROB ROB;
typedef typename CPUPol::TimeStruct TimeStruct;
typedef typename CPUPol::FetchStruct FetchStruct;
typedef typename CPUPol::IEWStruct IEWStruct;
typedef typename CPUPol::RenameStruct RenameStruct;
typedef typename CPUPol::Fetch Fetch;
typedef typename CPUPol::IEW IEW;
typedef O3ThreadState<Impl> Thread;
/** Event class used to schedule a squash due to a trap (fault or
* interrupt) to happen on a specific cycle.
*/
class TrapEvent : public Event {
private:
DefaultCommit<Impl> *commit;
ThreadID tid;
public:
TrapEvent(DefaultCommit<Impl> *_commit, ThreadID _tid);
void process();
const char *description() const;
};
/** Overall commit status. Used to determine if the CPU can deschedule
* itself due to a lack of activity.
*/
enum CommitStatus{
Active,
Inactive
};
/** Individual thread status. */
enum ThreadStatus {
Running,
Idle,
ROBSquashing,
TrapPending,
FetchTrapPending
};
/** Commit policy for SMT mode. */
enum CommitPolicy {
Aggressive,
RoundRobin,
OldestReady
};
private:
/** Overall commit status. */
CommitStatus _status;
/** Next commit status, to be set at the end of the cycle. */
CommitStatus _nextStatus;
/** Per-thread status. */
ThreadStatus commitStatus[Impl::MaxThreads];
/** Commit policy used in SMT mode. */
CommitPolicy commitPolicy;
public:
/** Construct a DefaultCommit with the given parameters. */
DefaultCommit(O3CPU *_cpu, DerivO3CPUParams *params);
/** Returns the name of the DefaultCommit. */
std::string name() const;
/** Registers statistics. */
void regStats();
/** Sets the list of threads. */
void setThreads(std::vector<Thread *> &threads);
/** Sets the main time buffer pointer, used for backwards communication. */
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
/** Sets the pointer to the queue coming from rename. */
void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
/** Sets the pointer to the queue coming from IEW. */
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
/** Sets the pointer to the IEW stage. */
void setIEWStage(IEW *iew_stage);
/** Skid buffer between rename and commit. */
std::queue<DynInstPtr> skidBuffer;
/** The pointer to the IEW stage. Used solely to ensure that
* various events (traps, interrupts, syscalls) do not occur until
* all stores have written back.
*/
IEW *iewStage;
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Sets pointer to the commited state rename map. */
void setRenameMap(RenameMap rm_ptr[Impl::MaxThreads]);
/** Sets pointer to the ROB. */
void setROB(ROB *rob_ptr);
/** Initializes stage by sending back the number of free entries. */
void initStage();
/** Initializes the draining of commit. */
bool drain();
/** Resumes execution after draining. */
void resume();
/** Completes the switch out of commit. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Ticks the commit stage, which tries to commit instructions. */
void tick();
/** Handles any squashes that are sent from IEW, and adds instructions
* to the ROB and tries to commit instructions.
*/
void commit();
/** Returns the number of free ROB entries for a specific thread. */
size_t numROBFreeEntries(ThreadID tid);
/** Generates an event to schedule a squash due to a trap. */
void generateTrapEvent(ThreadID tid);
/** Records that commit needs to initiate a squash due to an
* external state update through the TC.
*/
void generateTCEvent(ThreadID tid);
private:
/** Updates the overall status of commit with the nextStatus, and
* tell the CPU if commit is active/inactive.
*/
void updateStatus();
/** Sets the next status based on threads' statuses, which becomes the
* current status at the end of the cycle.
*/
void setNextStatus();
/** Checks if the ROB is completed with squashing. This is for the case
* where the ROB can take multiple cycles to complete squashing.
*/
bool robDoneSquashing();
/** Returns if any of the threads have the number of ROB entries changed
* on this cycle. Used to determine if the number of free ROB entries needs
* to be sent back to previous stages.
*/
bool changedROBEntries();
/** Squashes all in flight instructions. */
void squashAll(ThreadID tid);
/** Handles squashing due to a trap. */
void squashFromTrap(ThreadID tid);
/** Handles squashing due to an TC write. */
void squashFromTC(ThreadID tid);
/** Handles squashing from instruction with SquashAfter set.
* This differs from the other squashes as it squashes following
* instructions instead of the current instruction and doesn't
* clean up various status bits about traps/tc writes pending.
*/
void squashAfter(ThreadID tid, DynInstPtr &head_inst,
uint64_t squash_after_seq_num);
/** Handles processing an interrupt. */
void handleInterrupt();
/** Get fetch redirecting so we can handle an interrupt */
void propagateInterrupt();
/** Commits as many instructions as possible. */
void commitInsts();
/** Tries to commit the head ROB instruction passed in.
* @param head_inst The instruction to be committed.
*/
bool commitHead(DynInstPtr &head_inst, unsigned inst_num);
/** Gets instructions from rename and inserts them into the ROB. */
void getInsts();
/** Insert all instructions from rename into skidBuffer */
void skidInsert();
/** Marks completed instructions using information sent from IEW. */
void markCompletedInsts();
/** Gets the thread to commit, based on the SMT policy. */
ThreadID getCommittingThread();
/** Returns the thread ID to use based on a round robin policy. */
ThreadID roundRobin();
/** Returns the thread ID to use based on an oldest instruction policy. */
ThreadID oldestReady();
public:
/** Reads the PC of a specific thread. */
TheISA::PCState pcState(ThreadID tid) { return pc[tid]; }
/** Sets the PC of a specific thread. */
void pcState(const TheISA::PCState &val, ThreadID tid)
{ pc[tid] = val; }
/** Returns the PC of a specific thread. */
Addr instAddr(ThreadID tid) { return pc[tid].instAddr(); }
/** Returns the next PC of a specific thread. */
Addr nextInstAddr(ThreadID tid) { return pc[tid].nextInstAddr(); }
/** Reads the micro PC of a specific thread. */
Addr microPC(ThreadID tid) { return pc[tid].microPC(); }
private:
/** Time buffer interface. */
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to write information heading to previous stages. */
typename TimeBuffer<TimeStruct>::wire toIEW;
/** Wire to read information from IEW (for ROB). */
typename TimeBuffer<TimeStruct>::wire robInfoFromIEW;
TimeBuffer<FetchStruct> *fetchQueue;
typename TimeBuffer<FetchStruct>::wire fromFetch;
/** IEW instruction queue interface. */
TimeBuffer<IEWStruct> *iewQueue;
/** Wire to read information from IEW queue. */
typename TimeBuffer<IEWStruct>::wire fromIEW;
/** Rename instruction queue interface, for ROB. */
TimeBuffer<RenameStruct> *renameQueue;
/** Wire to read information from rename queue. */
typename TimeBuffer<RenameStruct>::wire fromRename;
public:
/** ROB interface. */
ROB *rob;
private:
/** Pointer to O3CPU. */
O3CPU *cpu;
/** Vector of all of the threads. */
std::vector<Thread *> thread;
/** Records that commit has written to the time buffer this cycle. Used for
* the CPU to determine if it can deschedule itself if there is no activity.
*/
bool wroteToTimeBuffer;
/** Records if the number of ROB entries has changed this cycle. If it has,
* then the number of free entries must be re-broadcast.
*/
bool changedROBNumEntries[Impl::MaxThreads];
/** A counter of how many threads are currently squashing. */
ThreadID squashCounter;
/** Records if a thread has to squash this cycle due to a trap. */
bool trapSquash[Impl::MaxThreads];
/** Records if a thread has to squash this cycle due to an XC write. */
bool tcSquash[Impl::MaxThreads];
/** Priority List used for Commit Policy */
std::list<ThreadID> priority_list;
/** IEW to Commit delay, in ticks. */
unsigned iewToCommitDelay;
/** Commit to IEW delay, in ticks. */
unsigned commitToIEWDelay;
/** Rename to ROB delay, in ticks. */
unsigned renameToROBDelay;
unsigned fetchToCommitDelay;
/** Rename width, in instructions. Used so ROB knows how many
* instructions to get from the rename instruction queue.
*/
unsigned renameWidth;
/** Commit width, in instructions. */
unsigned commitWidth;
/** Number of Reorder Buffers */
unsigned numRobs;
/** Number of Active Threads */
ThreadID numThreads;
/** Is a drain pending. */
bool drainPending;
/** Is commit switched out. */
bool switchedOut;
/** The latency to handle a trap. Used when scheduling trap
* squash event.
*/
Tick trapLatency;
/** The interrupt fault. */
Fault interrupt;
/** The commit PC state of each thread. Refers to the instruction that
* is currently being processed/committed.
*/
TheISA::PCState pc[Impl::MaxThreads];
/** The sequence number of the youngest valid instruction in the ROB. */
InstSeqNum youngestSeqNum[Impl::MaxThreads];
/** The sequence number of the last commited instruction. */
InstSeqNum lastCommitedSeqNum[Impl::MaxThreads];
/** Records if there is a trap currently in flight. */
bool trapInFlight[Impl::MaxThreads];
/** Records if there were any stores committed this cycle. */
bool committedStores[Impl::MaxThreads];
/** Records if commit should check if the ROB is truly empty (see
commit_impl.hh). */
bool checkEmptyROB[Impl::MaxThreads];
/** Pointer to the list of active threads. */
std::list<ThreadID> *activeThreads;
/** Rename map interface. */
RenameMap *renameMap[Impl::MaxThreads];
/** True if last committed microop can be followed by an interrupt */
bool canHandleInterrupts;
/** Updates commit stats based on this instruction. */
void updateComInstStats(DynInstPtr &inst);
/** Stat for the total number of committed instructions. */
Stats::Scalar commitCommittedInsts;
/** Stat for the total number of committed ops. */
Stats::Scalar commitCommittedOps;
/** Stat for the total number of squashed instructions discarded by commit.
*/
Stats::Scalar commitSquashedInsts;
/** Stat for the total number of times commit is told to squash.
* @todo: Actually increment this stat.
*/
Stats::Scalar commitSquashEvents;
/** Stat for the total number of times commit has had to stall due to a non-
* speculative instruction reaching the head of the ROB.
*/
Stats::Scalar commitNonSpecStalls;
/** Stat for the total number of branch mispredicts that caused a squash. */
Stats::Scalar branchMispredicts;
/** Distribution of the number of committed instructions each cycle. */
Stats::Distribution numCommittedDist;
/** Total number of instructions committed. */
Stats::Vector instsCommitted;
/** Total number of ops (including micro ops) committed. */
Stats::Vector opsCommitted;
/** Total number of software prefetches committed. */
Stats::Vector statComSwp;
/** Stat for the total number of committed memory references. */
Stats::Vector statComRefs;
/** Stat for the total number of committed loads. */
Stats::Vector statComLoads;
/** Total number of committed memory barriers. */
Stats::Vector statComMembars;
/** Total number of committed branches. */
Stats::Vector statComBranches;
/** Total number of floating point instructions */
Stats::Vector statComFloating;
/** Total number of integer instructions */
Stats::Vector statComInteger;
/** Total number of function calls */
Stats::Vector statComFunctionCalls;
/** Number of cycles where the commit bandwidth limit is reached. */
Stats::Scalar commitEligibleSamples;
/** Number of instructions not committed due to bandwidth limits. */
Stats::Vector commitEligible;
};
#endif // __CPU_O3_COMMIT_HH__

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,822 @@
/*
* Copyright (c) 2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* Copyright (c) 2011 Regents of the University of California
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Korey Sewell
* Rick Strong
*/
#ifndef __CPU_O3_CPU_HH__
#define __CPU_O3_CPU_HH__
#include <iostream>
#include <list>
#include <queue>
#include <set>
#include <vector>
#include "arch/types.hh"
#include "base/statistics.hh"
#include "config/the_isa.hh"
#include "cpu/o3/comm.hh"
#include "cpu/o3/cpu_policy.hh"
#include "cpu/o3/scoreboard.hh"
#include "cpu/o3/thread_state.hh"
#include "cpu/activity.hh"
#include "cpu/base.hh"
#include "cpu/simple_thread.hh"
#include "cpu/timebuf.hh"
//#include "cpu/o3/thread_context.hh"
#include "params/DerivO3CPU.hh"
#include "sim/process.hh"
template <class>
class Checker;
class ThreadContext;
template <class>
class O3ThreadContext;
class Checkpoint;
class MemObject;
class Process;
struct BaseCPUParams;
class BaseO3CPU : public BaseCPU
{
//Stuff that's pretty ISA independent will go here.
public:
BaseO3CPU(BaseCPUParams *params);
void regStats();
};
/**
* FullO3CPU class, has each of the stages (fetch through commit)
* within it, as well as all of the time buffers between stages. The
* tick() function for the CPU is defined here.
*/
template <class Impl>
class FullO3CPU : public BaseO3CPU
{
public:
// Typedefs from the Impl here.
typedef typename Impl::CPUPol CPUPolicy;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::O3CPU O3CPU;
typedef O3ThreadState<Impl> ImplState;
typedef O3ThreadState<Impl> Thread;
typedef typename std::list<DynInstPtr>::iterator ListIt;
friend class O3ThreadContext<Impl>;
public:
enum Status {
Running,
Idle,
Halted,
Blocked,
SwitchedOut
};
TheISA::TLB * itb;
TheISA::TLB * dtb;
/** Overall CPU status. */
Status _status;
/** Per-thread status in CPU, used for SMT. */
Status _threadStatus[Impl::MaxThreads];
private:
/**
* IcachePort class for instruction fetch.
*/
class IcachePort : public CpuPort
{
protected:
/** Pointer to fetch. */
DefaultFetch<Impl> *fetch;
public:
/** Default constructor. */
IcachePort(DefaultFetch<Impl> *_fetch, FullO3CPU<Impl>* _cpu)
: CpuPort(_fetch->name() + "-iport", _cpu), fetch(_fetch)
{ }
protected:
/** Timing version of receive. Handles setting fetch to the
* proper status to start fetching. */
virtual bool recvTimingResp(PacketPtr pkt);
virtual void recvTimingSnoopReq(PacketPtr pkt) { }
/** Handles doing a retry of a failed fetch. */
virtual void recvRetry();
};
/**
* DcachePort class for the load/store queue.
*/
class DcachePort : public CpuPort
{
protected:
/** Pointer to LSQ. */
LSQ<Impl> *lsq;
public:
/** Default constructor. */
DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu)
: CpuPort(_lsq->name() + "-dport", _cpu), lsq(_lsq)
{ }
protected:
/** Timing version of receive. Handles writing back and
* completing the load or store that has returned from
* memory. */
virtual bool recvTimingResp(PacketPtr pkt);
virtual void recvTimingSnoopReq(PacketPtr pkt);
/** Handles doing a retry of the previous send. */
virtual void recvRetry();
/**
* As this CPU requires snooping to maintain the load store queue
* change the behaviour from the base CPU port.
*
* @return true since we have to snoop
*/
virtual bool isSnooping() const { return true; }
};
class TickEvent : public Event
{
private:
/** Pointer to the CPU. */
FullO3CPU<Impl> *cpu;
public:
/** Constructs a tick event. */
TickEvent(FullO3CPU<Impl> *c);
/** Processes a tick event, calling tick() on the CPU. */
void process();
/** Returns the description of the tick event. */
const char *description() const;
};
/** The tick event used for scheduling CPU ticks. */
TickEvent tickEvent;
/** Schedule tick event, regardless of its current state. */
void scheduleTickEvent(int delay)
{
if (tickEvent.squashed())
reschedule(tickEvent, nextCycle(curTick() + ticks(delay)));
else if (!tickEvent.scheduled())
schedule(tickEvent, nextCycle(curTick() + ticks(delay)));
}
/** Unschedule tick event, regardless of its current state. */
void unscheduleTickEvent()
{
if (tickEvent.scheduled())
tickEvent.squash();
}
class ActivateThreadEvent : public Event
{
private:
/** Number of Thread to Activate */
ThreadID tid;
/** Pointer to the CPU. */
FullO3CPU<Impl> *cpu;
public:
/** Constructs the event. */
ActivateThreadEvent();
/** Initialize Event */
void init(int thread_num, FullO3CPU<Impl> *thread_cpu);
/** Processes the event, calling activateThread() on the CPU. */
void process();
/** Returns the description of the event. */
const char *description() const;
};
/** Schedule thread to activate , regardless of its current state. */
void
scheduleActivateThreadEvent(ThreadID tid, int delay)
{
// Schedule thread to activate, regardless of its current state.
if (activateThreadEvent[tid].squashed())
reschedule(activateThreadEvent[tid],
nextCycle(curTick() + ticks(delay)));
else if (!activateThreadEvent[tid].scheduled()) {
Tick when = nextCycle(curTick() + ticks(delay));
// Check if the deallocateEvent is also scheduled, and make
// sure they do not happen at same time causing a sleep that
// is never woken from.
if (deallocateContextEvent[tid].scheduled() &&
deallocateContextEvent[tid].when() == when) {
when++;
}
schedule(activateThreadEvent[tid], when);
}
}
/** Unschedule actiavte thread event, regardless of its current state. */
void
unscheduleActivateThreadEvent(ThreadID tid)
{
if (activateThreadEvent[tid].scheduled())
activateThreadEvent[tid].squash();
}
/** The tick event used for scheduling CPU ticks. */
ActivateThreadEvent activateThreadEvent[Impl::MaxThreads];
class DeallocateContextEvent : public Event
{
private:
/** Number of Thread to deactivate */
ThreadID tid;
/** Should the thread be removed from the CPU? */
bool remove;
/** Pointer to the CPU. */
FullO3CPU<Impl> *cpu;
public:
/** Constructs the event. */
DeallocateContextEvent();
/** Initialize Event */
void init(int thread_num, FullO3CPU<Impl> *thread_cpu);
/** Processes the event, calling activateThread() on the CPU. */
void process();
/** Sets whether the thread should also be removed from the CPU. */
void setRemove(bool _remove) { remove = _remove; }
/** Returns the description of the event. */
const char *description() const;
};
/** Schedule cpu to deallocate thread context.*/
void
scheduleDeallocateContextEvent(ThreadID tid, bool remove, int delay)
{
// Schedule thread to activate, regardless of its current state.
if (deallocateContextEvent[tid].squashed())
reschedule(deallocateContextEvent[tid],
nextCycle(curTick() + ticks(delay)));
else if (!deallocateContextEvent[tid].scheduled())
schedule(deallocateContextEvent[tid],
nextCycle(curTick() + ticks(delay)));
}
/** Unschedule thread deallocation in CPU */
void
unscheduleDeallocateContextEvent(ThreadID tid)
{
if (deallocateContextEvent[tid].scheduled())
deallocateContextEvent[tid].squash();
}
/** The tick event used for scheduling CPU ticks. */
DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads];
public:
/** Constructs a CPU with the given parameters. */
FullO3CPU(DerivO3CPUParams *params);
/** Destructor. */
~FullO3CPU();
/** Registers statistics. */
void regStats();
void demapPage(Addr vaddr, uint64_t asn)
{
this->itb->demapPage(vaddr, asn);
this->dtb->demapPage(vaddr, asn);
}
void demapInstPage(Addr vaddr, uint64_t asn)
{
this->itb->demapPage(vaddr, asn);
}
void demapDataPage(Addr vaddr, uint64_t asn)
{
this->dtb->demapPage(vaddr, asn);
}
/** Ticks CPU, calling tick() on each stage, and checking the overall
* activity to see if the CPU should deschedule itself.
*/
void tick();
/** Initialize the CPU */
void init();
/** Returns the Number of Active Threads in the CPU */
int numActiveThreads()
{ return activeThreads.size(); }
/** Add Thread to Active Threads List */
void activateThread(ThreadID tid);
/** Remove Thread from Active Threads List */
void deactivateThread(ThreadID tid);
/** Setup CPU to insert a thread's context */
void insertThread(ThreadID tid);
/** Remove all of a thread's context from CPU */
void removeThread(ThreadID tid);
/** Count the Total Instructions Committed in the CPU. */
virtual Counter totalInsts() const;
/** Count the Total Ops (including micro ops) committed in the CPU. */
virtual Counter totalOps() const;
/** Add Thread to Active Threads List. */
void activateContext(ThreadID tid, int delay);
/** Remove Thread from Active Threads List */
void suspendContext(ThreadID tid);
/** Remove Thread from Active Threads List &&
* Possibly Remove Thread Context from CPU.
*/
bool scheduleDeallocateContext(ThreadID tid, bool remove, int delay = 1);
/** Remove Thread from Active Threads List &&
* Remove Thread Context from CPU.
*/
void haltContext(ThreadID tid);
/** Activate a Thread When CPU Resources are Available. */
void activateWhenReady(ThreadID tid);
/** Add or Remove a Thread Context in the CPU. */
void doContextSwitch();
/** Update The Order In Which We Process Threads. */
void updateThreadPriority();
/** Serialize state. */
virtual void serialize(std::ostream &os);
/** Unserialize from a checkpoint. */
virtual void unserialize(Checkpoint *cp, const std::string &section);
public:
/** Executes a syscall.
* @todo: Determine if this needs to be virtual.
*/
void syscall(int64_t callnum, ThreadID tid);
/** Starts draining the CPU's pipeline of all instructions in
* order to stop all memory accesses. */
virtual unsigned int drain(Event *drain_event);
/** Resumes execution after a drain. */
virtual void resume();
/** Signals to this CPU that a stage has completed switching out. */
void signalDrained();
/** Switches out this CPU. */
virtual void switchOut();
/** Takes over from another CPU. */
virtual void takeOverFrom(BaseCPU *oldCPU);
/** Get the current instruction sequence number, and increment it. */
InstSeqNum getAndIncrementInstSeq()
{ return globalSeqNum++; }
/** Traps to handle given fault. */
void trap(Fault fault, ThreadID tid, StaticInstPtr inst);
/** HW return from error interrupt. */
Fault hwrei(ThreadID tid);
bool simPalCheck(int palFunc, ThreadID tid);
/** Returns the Fault for any valid interrupt. */
Fault getInterrupts();
/** Processes any an interrupt fault. */
void processInterrupts(Fault interrupt);
/** Halts the CPU. */
void halt() { panic("Halt not implemented!\n"); }
/** Check if this address is a valid instruction address. */
bool validInstAddr(Addr addr) { return true; }
/** Check if this address is a valid data address. */
bool validDataAddr(Addr addr) { return true; }
/** Register accessors. Index refers to the physical register index. */
/** Reads a miscellaneous register. */
TheISA::MiscReg readMiscRegNoEffect(int misc_reg, ThreadID tid);
/** Reads a misc. register, including any side effects the read
* might have as defined by the architecture.
*/
TheISA::MiscReg readMiscReg(int misc_reg, ThreadID tid);
/** Sets a miscellaneous register. */
void setMiscRegNoEffect(int misc_reg, const TheISA::MiscReg &val,
ThreadID tid);
/** Sets a misc. register, including any side effects the write
* might have as defined by the architecture.
*/
void setMiscReg(int misc_reg, const TheISA::MiscReg &val,
ThreadID tid);
uint64_t readIntReg(int reg_idx);
TheISA::FloatReg readFloatReg(int reg_idx);
TheISA::FloatRegBits readFloatRegBits(int reg_idx);
void setIntReg(int reg_idx, uint64_t val);
void setFloatReg(int reg_idx, TheISA::FloatReg val);
void setFloatRegBits(int reg_idx, TheISA::FloatRegBits val);
uint64_t readArchIntReg(int reg_idx, ThreadID tid);
float readArchFloatReg(int reg_idx, ThreadID tid);
uint64_t readArchFloatRegInt(int reg_idx, ThreadID tid);
/** Architectural register accessors. Looks up in the commit
* rename table to obtain the true physical index of the
* architected register first, then accesses that physical
* register.
*/
void setArchIntReg(int reg_idx, uint64_t val, ThreadID tid);
void setArchFloatReg(int reg_idx, float val, ThreadID tid);
void setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid);
/** Sets the commit PC state of a specific thread. */
void pcState(const TheISA::PCState &newPCState, ThreadID tid);
/** Reads the commit PC state of a specific thread. */
TheISA::PCState pcState(ThreadID tid);
/** Reads the commit PC of a specific thread. */
Addr instAddr(ThreadID tid);
/** Reads the commit micro PC of a specific thread. */
MicroPC microPC(ThreadID tid);
/** Reads the next PC of a specific thread. */
Addr nextInstAddr(ThreadID tid);
/** Initiates a squash of all in-flight instructions for a given
* thread. The source of the squash is an external update of
* state through the TC.
*/
void squashFromTC(ThreadID tid);
/** Function to add instruction onto the head of the list of the
* instructions. Used when new instructions are fetched.
*/
ListIt addInst(DynInstPtr &inst);
/** Function to tell the CPU that an instruction has completed. */
void instDone(ThreadID tid, DynInstPtr &inst);
/** Remove an instruction from the front end of the list. There's
* no restriction on location of the instruction.
*/
void removeFrontInst(DynInstPtr &inst);
/** Remove all instructions that are not currently in the ROB.
* There's also an option to not squash delay slot instructions.*/
void removeInstsNotInROB(ThreadID tid);
/** Remove all instructions younger than the given sequence number. */
void removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid);
/** Removes the instruction pointed to by the iterator. */
inline void squashInstIt(const ListIt &instIt, ThreadID tid);
/** Cleans up all instructions on the remove list. */
void cleanUpRemovedInsts();
/** Debug function to print all instructions on the list. */
void dumpInsts();
public:
#ifndef NDEBUG
/** Count of total number of dynamic instructions in flight. */
int instcount;
#endif
/** List of all the instructions in flight. */
std::list<DynInstPtr> instList;
/** List of all the instructions that will be removed at the end of this
* cycle.
*/
std::queue<ListIt> removeList;
#ifdef DEBUG
/** Debug structure to keep track of the sequence numbers still in
* flight.
*/
std::set<InstSeqNum> snList;
#endif
/** Records if instructions need to be removed this cycle due to
* being retired or squashed.
*/
bool removeInstsThisCycle;
protected:
/** The fetch stage. */
typename CPUPolicy::Fetch fetch;
/** The decode stage. */
typename CPUPolicy::Decode decode;
/** The dispatch stage. */
typename CPUPolicy::Rename rename;
/** The issue/execute/writeback stages. */
typename CPUPolicy::IEW iew;
/** The commit stage. */
typename CPUPolicy::Commit commit;
/** The register file. */
typename CPUPolicy::RegFile regFile;
/** The free list. */
typename CPUPolicy::FreeList freeList;
/** The rename map. */
typename CPUPolicy::RenameMap renameMap[Impl::MaxThreads];
/** The commit rename map. */
typename CPUPolicy::RenameMap commitRenameMap[Impl::MaxThreads];
/** The re-order buffer. */
typename CPUPolicy::ROB rob;
/** Active Threads List */
std::list<ThreadID> activeThreads;
/** Integer Register Scoreboard */
Scoreboard scoreboard;
TheISA::ISA isa[Impl::MaxThreads];
/** Instruction port. Note that it has to appear after the fetch stage. */
IcachePort icachePort;
/** Data port. Note that it has to appear after the iew stages */
DcachePort dcachePort;
public:
/** Enum to give each stage a specific index, so when calling
* activateStage() or deactivateStage(), they can specify which stage
* is being activated/deactivated.
*/
enum StageIdx {
FetchIdx,
DecodeIdx,
RenameIdx,
IEWIdx,
CommitIdx,
NumStages };
/** Typedefs from the Impl to get the structs that each of the
* time buffers should use.
*/
typedef typename CPUPolicy::TimeStruct TimeStruct;
typedef typename CPUPolicy::FetchStruct FetchStruct;
typedef typename CPUPolicy::DecodeStruct DecodeStruct;
typedef typename CPUPolicy::RenameStruct RenameStruct;
typedef typename CPUPolicy::IEWStruct IEWStruct;
/** The main time buffer to do backwards communication. */
TimeBuffer<TimeStruct> timeBuffer;
/** The fetch stage's instruction queue. */
TimeBuffer<FetchStruct> fetchQueue;
/** The decode stage's instruction queue. */
TimeBuffer<DecodeStruct> decodeQueue;
/** The rename stage's instruction queue. */
TimeBuffer<RenameStruct> renameQueue;
/** The IEW stage's instruction queue. */
TimeBuffer<IEWStruct> iewQueue;
private:
/** The activity recorder; used to tell if the CPU has any
* activity remaining or if it can go to idle and deschedule
* itself.
*/
ActivityRecorder activityRec;
public:
/** Records that there was time buffer activity this cycle. */
void activityThisCycle() { activityRec.activity(); }
/** Changes a stage's status to active within the activity recorder. */
void activateStage(const StageIdx idx)
{ activityRec.activateStage(idx); }
/** Changes a stage's status to inactive within the activity recorder. */
void deactivateStage(const StageIdx idx)
{ activityRec.deactivateStage(idx); }
/** Wakes the CPU, rescheduling the CPU if it's not already active. */
void wakeCPU();
virtual void wakeup();
/** Gets a free thread id. Use if thread ids change across system. */
ThreadID getFreeTid();
public:
/** Returns a pointer to a thread context. */
ThreadContext *
tcBase(ThreadID tid)
{
return thread[tid]->getTC();
}
/** The global sequence number counter. */
InstSeqNum globalSeqNum;//[Impl::MaxThreads];
/** Pointer to the checker, which can dynamically verify
* instruction results at run time. This can be set to NULL if it
* is not being used.
*/
Checker<Impl> *checker;
/** Pointer to the system. */
System *system;
/** Event to call process() on once draining has completed. */
Event *drainEvent;
/** Counter of how many stages have completed draining. */
int drainCount;
/** Pointers to all of the threads in the CPU. */
std::vector<Thread *> thread;
/** Whether or not the CPU should defer its registration. */
bool deferRegistration;
/** Is there a context switch pending? */
bool contextSwitch;
/** Threads Scheduled to Enter CPU */
std::list<int> cpuWaitList;
/** The cycle that the CPU was last running, used for statistics. */
Tick lastRunningCycle;
/** The cycle that the CPU was last activated by a new thread*/
Tick lastActivatedCycle;
/** Mapping for system thread id to cpu id */
std::map<ThreadID, unsigned> threadMap;
/** Available thread ids in the cpu*/
std::vector<ThreadID> tids;
/** CPU read function, forwards read to LSQ. */
Fault read(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh,
uint8_t *data, int load_idx)
{
return this->iew.ldstQueue.read(req, sreqLow, sreqHigh,
data, load_idx);
}
/** CPU write function, forwards write to LSQ. */
Fault write(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh,
uint8_t *data, int store_idx)
{
return this->iew.ldstQueue.write(req, sreqLow, sreqHigh,
data, store_idx);
}
/** Used by the fetch unit to get a hold of the instruction port. */
virtual CpuPort &getInstPort() { return icachePort; }
/** Get the dcache port (used to find block size for translations). */
virtual CpuPort &getDataPort() { return dcachePort; }
Addr lockAddr;
/** Temporary fix for the lock flag, works in the UP case. */
bool lockFlag;
/** Stat for total number of times the CPU is descheduled. */
Stats::Scalar timesIdled;
/** Stat for total number of cycles the CPU spends descheduled. */
Stats::Scalar idleCycles;
/** Stat for total number of cycles the CPU spends descheduled due to a
* quiesce operation or waiting for an interrupt. */
Stats::Scalar quiesceCycles;
/** Stat for the number of committed instructions per thread. */
Stats::Vector committedInsts;
/** Stat for the number of committed ops (including micro ops) per thread. */
Stats::Vector committedOps;
/** Stat for the total number of committed instructions. */
Stats::Scalar totalCommittedInsts;
/** Stat for the CPI per thread. */
Stats::Formula cpi;
/** Stat for the total CPI. */
Stats::Formula totalCpi;
/** Stat for the IPC per thread. */
Stats::Formula ipc;
/** Stat for the total IPC. */
Stats::Formula totalIpc;
//number of integer register file accesses
Stats::Scalar intRegfileReads;
Stats::Scalar intRegfileWrites;
//number of float register file accesses
Stats::Scalar fpRegfileReads;
Stats::Scalar fpRegfileWrites;
//number of misc
Stats::Scalar miscRegfileReads;
Stats::Scalar miscRegfileWrites;
};
#endif // __CPU_O3_CPU_HH__

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include <string>
#include "cpu/o3/cpu.hh"
#include "cpu/o3/impl.hh"
#include "params/DerivO3CPU.hh"
class DerivO3CPU : public FullO3CPU<O3CPUImpl>
{
public:
DerivO3CPU(DerivO3CPUParams *p)
: FullO3CPU<O3CPUImpl>(p)
{ }
};
DerivO3CPU *
DerivO3CPUParams::create()
{
ThreadID actual_num_threads;
if (FullSystem) {
// Full-system only supports a single thread for the moment.
actual_num_threads = 1;
} else {
if (workload.size() > numThreads) {
fatal("Workload Size (%i) > Max Supported Threads (%i) on This CPU",
workload.size(), numThreads);
} else if (workload.size() == 0) {
fatal("Must specify at least one workload!");
}
// In non-full-system mode, we infer the number of threads from
// the workload if it's not explicitly specified.
actual_num_threads =
(numThreads >= workload.size()) ? numThreads : workload.size();
}
numThreads = actual_num_threads;
// Default smtFetchPolicy to "RoundRobin", if necessary.
std::string round_robin_policy = "RoundRobin";
std::string single_thread = "SingleThread";
if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0)
smtFetchPolicy = round_robin_policy;
else
smtFetchPolicy = smtFetchPolicy;
instShiftAmt = 2;
return new DerivO3CPU(this);
}

View File

@ -0,0 +1,117 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_CPU_POLICY_HH__
#define __CPU_O3_CPU_POLICY_HH__
#include "cpu/o3/bpred_unit.hh"
#include "cpu/o3/comm.hh"
#include "cpu/o3/commit.hh"
#include "cpu/o3/decode.hh"
#include "cpu/o3/fetch.hh"
#include "cpu/o3/free_list.hh"
#include "cpu/o3/iew.hh"
#include "cpu/o3/inst_queue.hh"
#include "cpu/o3/lsq.hh"
#include "cpu/o3/lsq_unit.hh"
#include "cpu/o3/mem_dep_unit.hh"
#include "cpu/o3/regfile.hh"
#include "cpu/o3/rename.hh"
#include "cpu/o3/rename_map.hh"
#include "cpu/o3/rob.hh"
#include "cpu/o3/store_set.hh"
/**
* Struct that defines the key classes to be used by the CPU. All
* classes use the typedefs defined here to determine what are the
* classes of the other stages and communication buffers. In order to
* change a structure such as the IQ, simply change the typedef here
* to use the desired class instead, and recompile. In order to
* create a different CPU to be used simultaneously with this one, see
* the alpha_impl.hh file for instructions.
*/
template<class Impl>
struct SimpleCPUPolicy
{
/** Typedef for the branch prediction unit (which includes the BP,
* RAS, and BTB).
*/
typedef ::BPredUnit<Impl> BPredUnit;
/** Typedef for the register file. Most classes assume a unified
* physical register file.
*/
typedef PhysRegFile<Impl> RegFile;
/** Typedef for the freelist of registers. */
typedef SimpleFreeList FreeList;
/** Typedef for the rename map. */
typedef SimpleRenameMap RenameMap;
/** Typedef for the ROB. */
typedef ::ROB<Impl> ROB;
/** Typedef for the instruction queue/scheduler. */
typedef InstructionQueue<Impl> IQ;
/** Typedef for the memory dependence unit. */
typedef ::MemDepUnit<StoreSet, Impl> MemDepUnit;
/** Typedef for the LSQ. */
typedef ::LSQ<Impl> LSQ;
/** Typedef for the thread-specific LSQ units. */
typedef ::LSQUnit<Impl> LSQUnit;
/** Typedef for fetch. */
typedef DefaultFetch<Impl> Fetch;
/** Typedef for decode. */
typedef DefaultDecode<Impl> Decode;
/** Typedef for rename. */
typedef DefaultRename<Impl> Rename;
/** Typedef for Issue/Execute/Writeback. */
typedef DefaultIEW<Impl> IEW;
/** Typedef for commit. */
typedef DefaultCommit<Impl> Commit;
/** The struct for communication between fetch and decode. */
typedef DefaultFetchDefaultDecode<Impl> FetchStruct;
/** The struct for communication between decode and rename. */
typedef DefaultDecodeDefaultRename<Impl> DecodeStruct;
/** The struct for communication between rename and IEW. */
typedef DefaultRenameDefaultIEW<Impl> RenameStruct;
/** The struct for communication between IEW and commit. */
typedef DefaultIEWDefaultCommit<Impl> IEWStruct;
/** The struct for communication within the IEW stage. */
typedef ::IssueStruct<Impl> IssueStruct;
/** The struct for all backwards communication. */
typedef TimeBufStruct<Impl> TimeStruct;
};
#endif //__CPU_O3_CPU_POLICY_HH__

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "cpu/o3/decode_impl.hh"
#include "cpu/o3/isa_specific.hh"
template class DefaultDecode<O3CPUImpl>;

View File

@ -0,0 +1,314 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_DECODE_HH__
#define __CPU_O3_DECODE_HH__
#include <queue>
#include "base/statistics.hh"
#include "cpu/timebuf.hh"
struct DerivO3CPUParams;
/**
* DefaultDecode class handles both single threaded and SMT
* decode. Its width is specified by the parameters; each cycles it
* tries to decode that many instructions. Because instructions are
* actually decoded when the StaticInst is created, this stage does
* not do much other than check any PC-relative branches.
*/
template<class Impl>
class DefaultDecode
{
private:
// Typedefs from the Impl.
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::CPUPol CPUPol;
// Typedefs from the CPU policy.
typedef typename CPUPol::FetchStruct FetchStruct;
typedef typename CPUPol::DecodeStruct DecodeStruct;
typedef typename CPUPol::TimeStruct TimeStruct;
public:
/** Overall decode stage status. Used to determine if the CPU can
* deschedule itself due to a lack of activity.
*/
enum DecodeStatus {
Active,
Inactive
};
/** Individual thread status. */
enum ThreadStatus {
Running,
Idle,
StartSquash,
Squashing,
Blocked,
Unblocking
};
private:
/** Decode status. */
DecodeStatus _status;
/** Per-thread status. */
ThreadStatus decodeStatus[Impl::MaxThreads];
public:
/** DefaultDecode constructor. */
DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params);
/** Returns the name of decode. */
std::string name() const;
/** Registers statistics. */
void regStats();
/** Sets the main backwards communication time buffer pointer. */
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
/** Sets pointer to time buffer used to communicate to the next stage. */
void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr);
/** Sets pointer to time buffer coming from fetch. */
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Drains the decode stage. */
bool drain();
/** Resumes execution after a drain. */
void resume() { }
/** Switches out the decode stage. */
void switchOut() { }
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Ticks decode, processing all input signals and decoding as many
* instructions as possible.
*/
void tick();
/** Determines what to do based on decode's current status.
* @param status_change decode() sets this variable if there was a status
* change (ie switching from from blocking to unblocking).
* @param tid Thread id to decode instructions from.
*/
void decode(bool &status_change, ThreadID tid);
/** Processes instructions from fetch and passes them on to rename.
* Decoding of instructions actually happens when they are created in
* fetch, so this function mostly checks if PC-relative branches are
* correct.
*/
void decodeInsts(ThreadID tid);
private:
/** Inserts a thread's instructions into the skid buffer, to be decoded
* once decode unblocks.
*/
void skidInsert(ThreadID tid);
/** Returns if all of the skid buffers are empty. */
bool skidsEmpty();
/** Updates overall decode status based on all of the threads' statuses. */
void updateStatus();
/** Separates instructions from fetch into individual lists of instructions
* sorted by thread.
*/
void sortInsts();
/** Reads all stall signals from the backwards communication timebuffer. */
void readStallSignals(ThreadID tid);
/** Checks all input signals and updates decode's status appropriately. */
bool checkSignalsAndUpdate(ThreadID tid);
/** Checks all stall signals, and returns if any are true. */
bool checkStall(ThreadID tid) const;
/** Returns if there any instructions from fetch on this cycle. */
inline bool fetchInstsValid();
/** Switches decode to blocking, and signals back that decode has
* become blocked.
* @return Returns true if there is a status change.
*/
bool block(ThreadID tid);
/** Switches decode to unblocking if the skid buffer is empty, and
* signals back that decode has unblocked.
* @return Returns true if there is a status change.
*/
bool unblock(ThreadID tid);
/** Squashes if there is a PC-relative branch that was predicted
* incorrectly. Sends squash information back to fetch.
*/
void squash(DynInstPtr &inst, ThreadID tid);
public:
/** Squashes due to commit signalling a squash. Changes status to
* squashing and clears block/unblock signals as needed.
*/
unsigned squash(ThreadID tid);
private:
// Interfaces to objects outside of decode.
/** CPU interface. */
O3CPU *cpu;
/** Time buffer interface. */
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to get rename's output from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromRename;
/** Wire to get iew's information from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromIEW;
/** Wire to get commit's information from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromCommit;
/** Wire to write information heading to previous stages. */
// Might not be the best name as not only fetch will read it.
typename TimeBuffer<TimeStruct>::wire toFetch;
/** Decode instruction queue. */
TimeBuffer<DecodeStruct> *decodeQueue;
/** Wire used to write any information heading to rename. */
typename TimeBuffer<DecodeStruct>::wire toRename;
/** Fetch instruction queue interface. */
TimeBuffer<FetchStruct> *fetchQueue;
/** Wire to get fetch's output from fetch queue. */
typename TimeBuffer<FetchStruct>::wire fromFetch;
/** Queue of all instructions coming from fetch this cycle. */
std::queue<DynInstPtr> insts[Impl::MaxThreads];
/** Skid buffer between fetch and decode. */
std::queue<DynInstPtr> skidBuffer[Impl::MaxThreads];
/** Variable that tracks if decode has written to the time buffer this
* cycle. Used to tell CPU if there is activity this cycle.
*/
bool wroteToTimeBuffer;
/** Source of possible stalls. */
struct Stalls {
bool rename;
bool iew;
bool commit;
};
/** Tracks which stages are telling decode to stall. */
Stalls stalls[Impl::MaxThreads];
/** Rename to decode delay, in ticks. */
unsigned renameToDecodeDelay;
/** IEW to decode delay, in ticks. */
unsigned iewToDecodeDelay;
/** Commit to decode delay, in ticks. */
unsigned commitToDecodeDelay;
/** Fetch to decode delay, in ticks. */
unsigned fetchToDecodeDelay;
/** The width of decode, in instructions. */
unsigned decodeWidth;
/** Index of instructions being sent to rename. */
unsigned toRenameIndex;
/** number of Active Threads*/
ThreadID numThreads;
/** List of active thread ids */
std::list<ThreadID> *activeThreads;
/** Number of branches in flight. */
unsigned branchCount[Impl::MaxThreads];
/** Maximum size of the skid buffer. */
unsigned skidBufferMax;
/** SeqNum of Squashing Branch Delay Instruction (used for MIPS)*/
Addr bdelayDoneSeqNum[Impl::MaxThreads];
/** Instruction used for squashing branch (used for MIPS)*/
DynInstPtr squashInst[Impl::MaxThreads];
/** Tells when their is a pending delay slot inst. to send
* to rename. If there is, then wait squash after the next
* instruction (used for MIPS).
*/
bool squashAfterDelaySlot[Impl::MaxThreads];
/** Stat for total number of idle cycles. */
Stats::Scalar decodeIdleCycles;
/** Stat for total number of blocked cycles. */
Stats::Scalar decodeBlockedCycles;
/** Stat for total number of normal running cycles. */
Stats::Scalar decodeRunCycles;
/** Stat for total number of unblocking cycles. */
Stats::Scalar decodeUnblockCycles;
/** Stat for total number of squashing cycles. */
Stats::Scalar decodeSquashCycles;
/** Stat for number of times a branch is resolved at decode. */
Stats::Scalar decodeBranchResolved;
/** Stat for number of times a branch mispredict is detected. */
Stats::Scalar decodeBranchMispred;
/** Stat for number of times decode detected a non-control instruction
* incorrectly predicted as a branch.
*/
Stats::Scalar decodeControlMispred;
/** Stat for total number of decoded instructions. */
Stats::Scalar decodeDecodedInsts;
/** Stat for total number of squashed instructions. */
Stats::Scalar decodeSquashedInsts;
};
#endif // __CPU_O3_DECODE_HH__

View File

@ -0,0 +1,761 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "arch/types.hh"
#include "base/trace.hh"
#include "config/the_isa.hh"
#include "cpu/o3/decode.hh"
#include "cpu/inst_seq.hh"
#include "debug/Activity.hh"
#include "debug/Decode.hh"
#include "params/DerivO3CPU.hh"
#include "sim/full_system.hh"
// clang complains about std::set being overloaded with Packet::set if
// we open up the entire namespace std
using std::list;
template<class Impl>
DefaultDecode<Impl>::DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params)
: cpu(_cpu),
renameToDecodeDelay(params->renameToDecodeDelay),
iewToDecodeDelay(params->iewToDecodeDelay),
commitToDecodeDelay(params->commitToDecodeDelay),
fetchToDecodeDelay(params->fetchToDecodeDelay),
decodeWidth(params->decodeWidth),
numThreads(params->numThreads)
{
_status = Inactive;
// Setup status, make sure stall signals are clear.
for (ThreadID tid = 0; tid < numThreads; ++tid) {
decodeStatus[tid] = Idle;
stalls[tid].rename = false;
stalls[tid].iew = false;
stalls[tid].commit = false;
}
// @todo: Make into a parameter
skidBufferMax = (fetchToDecodeDelay + 1) * params->fetchWidth;
}
template <class Impl>
std::string
DefaultDecode<Impl>::name() const
{
return cpu->name() + ".decode";
}
template <class Impl>
void
DefaultDecode<Impl>::regStats()
{
decodeIdleCycles
.name(name() + ".IdleCycles")
.desc("Number of cycles decode is idle")
.prereq(decodeIdleCycles);
decodeBlockedCycles
.name(name() + ".BlockedCycles")
.desc("Number of cycles decode is blocked")
.prereq(decodeBlockedCycles);
decodeRunCycles
.name(name() + ".RunCycles")
.desc("Number of cycles decode is running")
.prereq(decodeRunCycles);
decodeUnblockCycles
.name(name() + ".UnblockCycles")
.desc("Number of cycles decode is unblocking")
.prereq(decodeUnblockCycles);
decodeSquashCycles
.name(name() + ".SquashCycles")
.desc("Number of cycles decode is squashing")
.prereq(decodeSquashCycles);
decodeBranchResolved
.name(name() + ".BranchResolved")
.desc("Number of times decode resolved a branch")
.prereq(decodeBranchResolved);
decodeBranchMispred
.name(name() + ".BranchMispred")
.desc("Number of times decode detected a branch misprediction")
.prereq(decodeBranchMispred);
decodeControlMispred
.name(name() + ".ControlMispred")
.desc("Number of times decode detected an instruction incorrectly"
" predicted as a control")
.prereq(decodeControlMispred);
decodeDecodedInsts
.name(name() + ".DecodedInsts")
.desc("Number of instructions handled by decode")
.prereq(decodeDecodedInsts);
decodeSquashedInsts
.name(name() + ".SquashedInsts")
.desc("Number of squashed instructions handled by decode")
.prereq(decodeSquashedInsts);
}
template<class Impl>
void
DefaultDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
{
timeBuffer = tb_ptr;
// Setup wire to write information back to fetch.
toFetch = timeBuffer->getWire(0);
// Create wires to get information from proper places in time buffer.
fromRename = timeBuffer->getWire(-renameToDecodeDelay);
fromIEW = timeBuffer->getWire(-iewToDecodeDelay);
fromCommit = timeBuffer->getWire(-commitToDecodeDelay);
}
template<class Impl>
void
DefaultDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
{
decodeQueue = dq_ptr;
// Setup wire to write information to proper place in decode queue.
toRename = decodeQueue->getWire(0);
}
template<class Impl>
void
DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
{
fetchQueue = fq_ptr;
// Setup wire to read information from fetch queue.
fromFetch = fetchQueue->getWire(-fetchToDecodeDelay);
}
template<class Impl>
void
DefaultDecode<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
}
template <class Impl>
bool
DefaultDecode<Impl>::drain()
{
// Decode is done draining at any time.
cpu->signalDrained();
return true;
}
template <class Impl>
void
DefaultDecode<Impl>::takeOverFrom()
{
_status = Inactive;
// Be sure to reset state and clear out any old instructions.
for (ThreadID tid = 0; tid < numThreads; ++tid) {
decodeStatus[tid] = Idle;
stalls[tid].rename = false;
stalls[tid].iew = false;
stalls[tid].commit = false;
while (!insts[tid].empty())
insts[tid].pop();
while (!skidBuffer[tid].empty())
skidBuffer[tid].pop();
branchCount[tid] = 0;
}
wroteToTimeBuffer = false;
}
template<class Impl>
bool
DefaultDecode<Impl>::checkStall(ThreadID tid) const
{
bool ret_val = false;
if (stalls[tid].rename) {
DPRINTF(Decode,"[tid:%i]: Stall fom Rename stage detected.\n", tid);
ret_val = true;
} else if (stalls[tid].iew) {
DPRINTF(Decode,"[tid:%i]: Stall fom IEW stage detected.\n", tid);
ret_val = true;
} else if (stalls[tid].commit) {
DPRINTF(Decode,"[tid:%i]: Stall fom Commit stage detected.\n", tid);
ret_val = true;
}
return ret_val;
}
template<class Impl>
inline bool
DefaultDecode<Impl>::fetchInstsValid()
{
return fromFetch->size > 0;
}
template<class Impl>
bool
DefaultDecode<Impl>::block(ThreadID tid)
{
DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid);
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
skidInsert(tid);
// If the decode status is blocked or unblocking then decode has not yet
// signalled fetch to unblock. In that case, there is no need to tell
// fetch to block.
if (decodeStatus[tid] != Blocked) {
// Set the status to Blocked.
decodeStatus[tid] = Blocked;
if (decodeStatus[tid] != Unblocking) {
toFetch->decodeBlock[tid] = true;
wroteToTimeBuffer = true;
}
return true;
}
return false;
}
template<class Impl>
bool
DefaultDecode<Impl>::unblock(ThreadID tid)
{
// Decode is done unblocking only if the skid buffer is empty.
if (skidBuffer[tid].empty()) {
DPRINTF(Decode, "[tid:%u]: Done unblocking.\n", tid);
toFetch->decodeUnblock[tid] = true;
wroteToTimeBuffer = true;
decodeStatus[tid] = Running;
return true;
}
DPRINTF(Decode, "[tid:%u]: Currently unblocking.\n", tid);
return false;
}
template<class Impl>
void
DefaultDecode<Impl>::squash(DynInstPtr &inst, ThreadID tid)
{
DPRINTF(Decode, "[tid:%i]: [sn:%i] Squashing due to incorrect branch "
"prediction detected at decode.\n", tid, inst->seqNum);
// Send back mispredict information.
toFetch->decodeInfo[tid].branchMispredict = true;
toFetch->decodeInfo[tid].predIncorrect = true;
toFetch->decodeInfo[tid].mispredictInst = inst;
toFetch->decodeInfo[tid].squash = true;
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
toFetch->decodeInfo[tid].branchTaken = inst->pcState().branching();
toFetch->decodeInfo[tid].squashInst = inst;
if (toFetch->decodeInfo[tid].mispredictInst->isUncondCtrl()) {
toFetch->decodeInfo[tid].branchTaken = true;
}
InstSeqNum squash_seq_num = inst->seqNum;
// Might have to tell fetch to unblock.
if (decodeStatus[tid] == Blocked ||
decodeStatus[tid] == Unblocking) {
toFetch->decodeUnblock[tid] = 1;
}
// Set status to squashing.
decodeStatus[tid] = Squashing;
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid &&
fromFetch->insts[i]->seqNum > squash_seq_num) {
fromFetch->insts[i]->setSquashed();
}
}
// Clear the instruction list and skid buffer in case they have any
// insts in them.
while (!insts[tid].empty()) {
insts[tid].pop();
}
while (!skidBuffer[tid].empty()) {
skidBuffer[tid].pop();
}
// Squash instructions up until this one
cpu->removeInstsUntil(squash_seq_num, tid);
}
template<class Impl>
unsigned
DefaultDecode<Impl>::squash(ThreadID tid)
{
DPRINTF(Decode, "[tid:%i]: Squashing.\n",tid);
if (decodeStatus[tid] == Blocked ||
decodeStatus[tid] == Unblocking) {
if (FullSystem) {
toFetch->decodeUnblock[tid] = 1;
} else {
// In syscall emulation, we can have both a block and a squash due
// to a syscall in the same cycle. This would cause both signals
// to be high. This shouldn't happen in full system.
// @todo: Determine if this still happens.
if (toFetch->decodeBlock[tid])
toFetch->decodeBlock[tid] = 0;
else
toFetch->decodeUnblock[tid] = 1;
}
}
// Set status to squashing.
decodeStatus[tid] = Squashing;
// Go through incoming instructions from fetch and squash them.
unsigned squash_count = 0;
for (int i=0; i<fromFetch->size; i++) {
if (fromFetch->insts[i]->threadNumber == tid) {
fromFetch->insts[i]->setSquashed();
squash_count++;
}
}
// Clear the instruction list and skid buffer in case they have any
// insts in them.
while (!insts[tid].empty()) {
insts[tid].pop();
}
while (!skidBuffer[tid].empty()) {
skidBuffer[tid].pop();
}
return squash_count;
}
template<class Impl>
void
DefaultDecode<Impl>::skidInsert(ThreadID tid)
{
DynInstPtr inst = NULL;
while (!insts[tid].empty()) {
inst = insts[tid].front();
insts[tid].pop();
assert(tid == inst->threadNumber);
DPRINTF(Decode,"Inserting [sn:%lli] PC: %s into decode skidBuffer %i\n",
inst->seqNum, inst->pcState(), inst->threadNumber);
skidBuffer[tid].push(inst);
}
// @todo: Eventually need to enforce this by not letting a thread
// fetch past its skidbuffer
assert(skidBuffer[tid].size() <= skidBufferMax);
}
template<class Impl>
bool
DefaultDecode<Impl>::skidsEmpty()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!skidBuffer[tid].empty())
return false;
}
return true;
}
template<class Impl>
void
DefaultDecode<Impl>::updateStatus()
{
bool any_unblocking = false;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (decodeStatus[tid] == Unblocking) {
any_unblocking = true;
break;
}
}
// Decode will have activity if it's unblocking.
if (any_unblocking) {
if (_status == Inactive) {
_status = Active;
DPRINTF(Activity, "Activating stage.\n");
cpu->activateStage(O3CPU::DecodeIdx);
}
} else {
// If it's not unblocking, then decode will not have any internal
// activity. Switch it to inactive.
if (_status == Active) {
_status = Inactive;
DPRINTF(Activity, "Deactivating stage.\n");
cpu->deactivateStage(O3CPU::DecodeIdx);
}
}
}
template <class Impl>
void
DefaultDecode<Impl>::sortInsts()
{
int insts_from_fetch = fromFetch->size;
for (int i = 0; i < insts_from_fetch; ++i) {
insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
}
}
template<class Impl>
void
DefaultDecode<Impl>::readStallSignals(ThreadID tid)
{
if (fromRename->renameBlock[tid]) {
stalls[tid].rename = true;
}
if (fromRename->renameUnblock[tid]) {
assert(stalls[tid].rename);
stalls[tid].rename = false;
}
if (fromIEW->iewBlock[tid]) {
stalls[tid].iew = true;
}
if (fromIEW->iewUnblock[tid]) {
assert(stalls[tid].iew);
stalls[tid].iew = false;
}
if (fromCommit->commitBlock[tid]) {
stalls[tid].commit = true;
}
if (fromCommit->commitUnblock[tid]) {
assert(stalls[tid].commit);
stalls[tid].commit = false;
}
}
template <class Impl>
bool
DefaultDecode<Impl>::checkSignalsAndUpdate(ThreadID tid)
{
// Check if there's a squash signal, squash if there is.
// Check stall signals, block if necessary.
// If status was blocked
// Check if stall conditions have passed
// if so then go to unblocking
// If status was Squashing
// check if squashing is not high. Switch to running this cycle.
// Update the per thread stall statuses.
readStallSignals(tid);
// Check squash signals from commit.
if (fromCommit->commitInfo[tid].squash) {
DPRINTF(Decode, "[tid:%u]: Squashing instructions due to squash "
"from commit.\n", tid);
squash(tid);
return true;
}
// Check ROB squash signals from commit.
if (fromCommit->commitInfo[tid].robSquashing) {
DPRINTF(Decode, "[tid:%u]: ROB is still squashing.\n", tid);
// Continue to squash.
decodeStatus[tid] = Squashing;
return true;
}
if (checkStall(tid)) {
return block(tid);
}
if (decodeStatus[tid] == Blocked) {
DPRINTF(Decode, "[tid:%u]: Done blocking, switching to unblocking.\n",
tid);
decodeStatus[tid] = Unblocking;
unblock(tid);
return true;
}
if (decodeStatus[tid] == Squashing) {
// Switch status to running if decode isn't being told to block or
// squash this cycle.
DPRINTF(Decode, "[tid:%u]: Done squashing, switching to running.\n",
tid);
decodeStatus[tid] = Running;
return false;
}
// If we've reached this point, we have not gotten any signals that
// cause decode to change its status. Decode remains the same as before.
return false;
}
template<class Impl>
void
DefaultDecode<Impl>::tick()
{
wroteToTimeBuffer = false;
bool status_change = false;
toRenameIndex = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
sortInsts();
//Check stall and squash signals.
while (threads != end) {
ThreadID tid = *threads++;
DPRINTF(Decode,"Processing [tid:%i]\n",tid);
status_change = checkSignalsAndUpdate(tid) || status_change;
decode(status_change, tid);
}
if (status_change) {
updateStatus();
}
if (wroteToTimeBuffer) {
DPRINTF(Activity, "Activity this cycle.\n");
cpu->activityThisCycle();
}
}
template<class Impl>
void
DefaultDecode<Impl>::decode(bool &status_change, ThreadID tid)
{
// If status is Running or idle,
// call decodeInsts()
// If status is Unblocking,
// buffer any instructions coming from fetch
// continue trying to empty skid buffer
// check if stall conditions have passed
if (decodeStatus[tid] == Blocked) {
++decodeBlockedCycles;
} else if (decodeStatus[tid] == Squashing) {
++decodeSquashCycles;
}
// Decode should try to decode as many instructions as its bandwidth
// will allow, as long as it is not currently blocked.
if (decodeStatus[tid] == Running ||
decodeStatus[tid] == Idle) {
DPRINTF(Decode, "[tid:%u]: Not blocked, so attempting to run "
"stage.\n",tid);
decodeInsts(tid);
} else if (decodeStatus[tid] == Unblocking) {
// Make sure that the skid buffer has something in it if the
// status is unblocking.
assert(!skidsEmpty());
// If the status was unblocking, then instructions from the skid
// buffer were used. Remove those instructions and handle
// the rest of unblocking.
decodeInsts(tid);
if (fetchInstsValid()) {
// Add the current inputs to the skid buffer so they can be
// reprocessed when this stage unblocks.
skidInsert(tid);
}
status_change = unblock(tid) || status_change;
}
}
template <class Impl>
void
DefaultDecode<Impl>::decodeInsts(ThreadID tid)
{
// Instructions can come either from the skid buffer or the list of
// instructions coming from fetch, depending on decode's status.
int insts_available = decodeStatus[tid] == Unblocking ?
skidBuffer[tid].size() : insts[tid].size();
if (insts_available == 0) {
DPRINTF(Decode, "[tid:%u] Nothing to do, breaking out"
" early.\n",tid);
// Should I change the status to idle?
++decodeIdleCycles;
return;
} else if (decodeStatus[tid] == Unblocking) {
DPRINTF(Decode, "[tid:%u] Unblocking, removing insts from skid "
"buffer.\n",tid);
++decodeUnblockCycles;
} else if (decodeStatus[tid] == Running) {
++decodeRunCycles;
}
DynInstPtr inst;
std::queue<DynInstPtr>
&insts_to_decode = decodeStatus[tid] == Unblocking ?
skidBuffer[tid] : insts[tid];
DPRINTF(Decode, "[tid:%u]: Sending instruction to rename.\n",tid);
while (insts_available > 0 && toRenameIndex < decodeWidth) {
assert(!insts_to_decode.empty());
inst = insts_to_decode.front();
insts_to_decode.pop();
DPRINTF(Decode, "[tid:%u]: Processing instruction [sn:%lli] with "
"PC %s\n", tid, inst->seqNum, inst->pcState());
if (inst->isSquashed()) {
DPRINTF(Decode, "[tid:%u]: Instruction %i with PC %s is "
"squashed, skipping.\n",
tid, inst->seqNum, inst->pcState());
++decodeSquashedInsts;
--insts_available;
continue;
}
// Also check if instructions have no source registers. Mark
// them as ready to issue at any time. Not sure if this check
// should exist here or at a later stage; however it doesn't matter
// too much for function correctness.
if (inst->numSrcRegs() == 0) {
inst->setCanIssue();
}
// This current instruction is valid, so add it into the decode
// queue. The next instruction may not be valid, so check to
// see if branches were predicted correctly.
toRename->insts[toRenameIndex] = inst;
++(toRename->size);
++toRenameIndex;
++decodeDecodedInsts;
--insts_available;
#if TRACING_ON
inst->decodeTick = curTick() - inst->fetchTick;
#endif
// Ensure that if it was predicted as a branch, it really is a
// branch.
if (inst->readPredTaken() && !inst->isControl()) {
panic("Instruction predicted as a branch!");
++decodeControlMispred;
// Might want to set some sort of boolean and just do
// a check at the end
squash(inst, inst->threadNumber);
break;
}
// Go ahead and compute any PC-relative branches.
if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
++decodeBranchResolved;
if (!(inst->branchTarget() == inst->readPredTarg())) {
++decodeBranchMispred;
// Might want to set some sort of boolean and just do
// a check at the end
squash(inst, inst->threadNumber);
TheISA::PCState target = inst->branchTarget();
DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %s\n",
inst->seqNum, target);
//The micro pc after an instruction level branch should be 0
inst->setPredTarg(target);
break;
}
}
}
// If we didn't process all instructions, then we will need to block
// and put all those instructions into the skid buffer.
if (!insts_to_decode.empty()) {
block(tid);
}
// Record that decode has written to the time buffer for activity
// tracking.
if (toRenameIndex) {
wroteToTimeBuffer = true;
}
}

View File

@ -0,0 +1,272 @@
/*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_DEP_GRAPH_HH__
#define __CPU_O3_DEP_GRAPH_HH__
#include "cpu/o3/comm.hh"
/** Node in a linked list. */
template <class DynInstPtr>
class DependencyEntry
{
public:
DependencyEntry()
: inst(NULL), next(NULL)
{ }
DynInstPtr inst;
//Might want to include data about what arch. register the
//dependence is waiting on.
DependencyEntry<DynInstPtr> *next;
};
/** Array of linked list that maintains the dependencies between
* producing instructions and consuming instructions. Each linked
* list represents a single physical register, having the future
* producer of the register's value, and all consumers waiting on that
* value on the list. The head node of each linked list represents
* the producing instruction of that register. Instructions are put
* on the list upon reaching the IQ, and are removed from the list
* either when the producer completes, or the instruction is squashed.
*/
template <class DynInstPtr>
class DependencyGraph
{
public:
typedef DependencyEntry<DynInstPtr> DepEntry;
/** Default construction. Must call resize() prior to use. */
DependencyGraph()
: numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
{ }
~DependencyGraph();
/** Resize the dependency graph to have num_entries registers. */
void resize(int num_entries);
/** Clears all of the linked lists. */
void reset();
/** Inserts an instruction to be dependent on the given index. */
void insert(PhysRegIndex idx, DynInstPtr &new_inst);
/** Sets the producing instruction of a given register. */
void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
{ dependGraph[idx].inst = new_inst; }
/** Clears the producing instruction. */
void clearInst(PhysRegIndex idx)
{ dependGraph[idx].inst = NULL; }
/** Removes an instruction from a single linked list. */
void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
/** Removes and returns the newest dependent of a specific register. */
DynInstPtr pop(PhysRegIndex idx);
/** Checks if there are any dependents on a specific register. */
bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
/** Debugging function to dump out the dependency graph.
*/
void dump();
private:
/** Array of linked lists. Each linked list is a list of all the
* instructions that depend upon a given register. The actual
* register's index is used to index into the graph; ie all
* instructions in flight that are dependent upon r34 will be
* in the linked list of dependGraph[34].
*/
DepEntry *dependGraph;
/** Number of linked lists; identical to the number of registers. */
int numEntries;
// Debug variable, remove when done testing.
unsigned memAllocCounter;
public:
// Debug variable, remove when done testing.
uint64_t nodesTraversed;
// Debug variable, remove when done testing.
uint64_t nodesRemoved;
};
template <class DynInstPtr>
DependencyGraph<DynInstPtr>::~DependencyGraph()
{
delete [] dependGraph;
}
template <class DynInstPtr>
void
DependencyGraph<DynInstPtr>::resize(int num_entries)
{
numEntries = num_entries;
dependGraph = new DepEntry[numEntries];
}
template <class DynInstPtr>
void
DependencyGraph<DynInstPtr>::reset()
{
// Clear the dependency graph
DepEntry *curr;
DepEntry *prev;
for (int i = 0; i < numEntries; ++i) {
curr = dependGraph[i].next;
while (curr) {
memAllocCounter--;
prev = curr;
curr = prev->next;
prev->inst = NULL;
delete prev;
}
if (dependGraph[i].inst) {
dependGraph[i].inst = NULL;
}
dependGraph[i].next = NULL;
}
}
template <class DynInstPtr>
void
DependencyGraph<DynInstPtr>::insert(PhysRegIndex idx, DynInstPtr &new_inst)
{
//Add this new, dependent instruction at the head of the dependency
//chain.
// First create the entry that will be added to the head of the
// dependency chain.
DepEntry *new_entry = new DepEntry;
new_entry->next = dependGraph[idx].next;
new_entry->inst = new_inst;
// Then actually add it to the chain.
dependGraph[idx].next = new_entry;
++memAllocCounter;
}
template <class DynInstPtr>
void
DependencyGraph<DynInstPtr>::remove(PhysRegIndex idx,
DynInstPtr &inst_to_remove)
{
DepEntry *prev = &dependGraph[idx];
DepEntry *curr = dependGraph[idx].next;
// Make sure curr isn't NULL. Because this instruction is being
// removed from a dependency list, it must have been placed there at
// an earlier time. The dependency chain should not be empty,
// unless the instruction dependent upon it is already ready.
if (curr == NULL) {
return;
}
nodesRemoved++;
// Find the instruction to remove within the dependency linked list.
while (curr->inst != inst_to_remove) {
prev = curr;
curr = curr->next;
nodesTraversed++;
assert(curr != NULL);
}
// Now remove this instruction from the list.
prev->next = curr->next;
--memAllocCounter;
// Could push this off to the destructor of DependencyEntry
curr->inst = NULL;
delete curr;
}
template <class DynInstPtr>
DynInstPtr
DependencyGraph<DynInstPtr>::pop(PhysRegIndex idx)
{
DepEntry *node;
node = dependGraph[idx].next;
DynInstPtr inst = NULL;
if (node) {
inst = node->inst;
dependGraph[idx].next = node->next;
node->inst = NULL;
memAllocCounter--;
delete node;
}
return inst;
}
template <class DynInstPtr>
void
DependencyGraph<DynInstPtr>::dump()
{
DepEntry *curr;
for (int i = 0; i < numEntries; ++i)
{
curr = &dependGraph[i];
if (curr->inst) {
cprintf("dependGraph[%i]: producer: %s [sn:%lli] consumer: ",
i, curr->inst->pcState(), curr->inst->seqNum);
} else {
cprintf("dependGraph[%i]: No producer. consumer: ", i);
}
while (curr->next != NULL) {
curr = curr->next;
cprintf("%s [sn:%lli] ",
curr->inst->pcState(), curr->inst->seqNum);
}
cprintf("\n");
}
cprintf("memAllocCounter: %i\n", memAllocCounter);
}
#endif // __CPU_O3_DEP_GRAPH_HH__

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Gabe Black
*/
#include "cpu/o3/dyn_inst_impl.hh"
#include "cpu/o3/impl.hh"
// Force instantiation of BaseO3DynInst for all the implementations that
// are needed.
template class BaseO3DynInst<O3CPUImpl>;

View File

@ -0,0 +1,298 @@
/*
* Copyright (c) 2010 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_DYN_INST_HH__
#define __CPU_O3_DYN_INST_HH__
#include "arch/isa_traits.hh"
#include "config/the_isa.hh"
#include "cpu/o3/cpu.hh"
#include "cpu/o3/isa_specific.hh"
#include "cpu/base_dyn_inst.hh"
#include "cpu/inst_seq.hh"
class Packet;
/**
* Mostly implementation & ISA specific AlphaDynInst. As with most
* other classes in the new CPU model, it is templated on the Impl to
* allow for passing in of all types, such as the CPU type and the ISA
* type. The AlphaDynInst serves as the primary interface to the CPU
* for instructions that are executing.
*/
template <class Impl>
class BaseO3DynInst : public BaseDynInst<Impl>
{
public:
/** Typedef for the CPU. */
typedef typename Impl::O3CPU O3CPU;
/** Binary machine instruction type. */
typedef TheISA::MachInst MachInst;
/** Extended machine instruction type. */
typedef TheISA::ExtMachInst ExtMachInst;
/** Logical register index type. */
typedef TheISA::RegIndex RegIndex;
/** Integer register index type. */
typedef TheISA::IntReg IntReg;
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
/** Misc register index type. */
typedef TheISA::MiscReg MiscReg;
enum {
MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs
MaxInstDestRegs = TheISA::MaxInstDestRegs //< Max dest regs
};
public:
/** BaseDynInst constructor given a binary instruction. */
BaseO3DynInst(StaticInstPtr staticInst, StaticInstPtr macroop,
TheISA::PCState pc, TheISA::PCState predPC,
InstSeqNum seq_num, O3CPU *cpu);
/** BaseDynInst constructor given a static inst pointer. */
BaseO3DynInst(StaticInstPtr _staticInst, StaticInstPtr _macroop);
/** Executes the instruction.*/
Fault execute();
/** Initiates the access. Only valid for memory operations. */
Fault initiateAcc();
/** Completes the access. Only valid for memory operations. */
Fault completeAcc(PacketPtr pkt);
private:
/** Initializes variables. */
void initVars();
protected:
/** Values to be written to the destination misc. registers. */
MiscReg _destMiscRegVal[TheISA::MaxMiscDestRegs];
/** Indexes of the destination misc. registers. They are needed to defer
* the write accesses to the misc. registers until the commit stage, when
* the instruction is out of its speculative state.
*/
short _destMiscRegIdx[TheISA::MaxMiscDestRegs];
/** Number of destination misc. registers. */
uint8_t _numDestMiscRegs;
public:
#if TRACING_ON
/** Tick records used for the pipeline activity viewer. */
Tick fetchTick;
uint32_t decodeTick;
uint32_t renameTick;
uint32_t dispatchTick;
uint32_t issueTick;
uint32_t completeTick;
#endif
/** Reads a misc. register, including any side-effects the read
* might have as defined by the architecture.
*/
MiscReg readMiscReg(int misc_reg)
{
return this->cpu->readMiscReg(misc_reg, this->threadNumber);
}
/** Sets a misc. register, including any side-effects the write
* might have as defined by the architecture.
*/
void setMiscReg(int misc_reg, const MiscReg &val)
{
/** Writes to misc. registers are recorded and deferred until the
* commit stage, when updateMiscRegs() is called.
*/
assert(_numDestMiscRegs < TheISA::MaxMiscDestRegs);
_destMiscRegIdx[_numDestMiscRegs] = misc_reg;
_destMiscRegVal[_numDestMiscRegs] = val;
_numDestMiscRegs++;
}
/** Reads a misc. register, including any side-effects the read
* might have as defined by the architecture.
*/
TheISA::MiscReg readMiscRegOperand(const StaticInst *si, int idx)
{
return this->cpu->readMiscReg(
si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
this->threadNumber);
}
/** Sets a misc. register, including any side-effects the write
* might have as defined by the architecture.
*/
void setMiscRegOperand(const StaticInst *si, int idx,
const MiscReg &val)
{
int misc_reg = si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag;
setMiscReg(misc_reg, val);
}
/** Called at the commit stage to update the misc. registers. */
void updateMiscRegs()
{
// @todo: Pretty convoluted way to avoid squashing from happening when
// using the TC during an instruction's execution (specifically for
// instructions that have side-effects that use the TC). Fix this.
// See cpu/o3/dyn_inst_impl.hh.
bool in_syscall = this->thread->inSyscall;
this->thread->inSyscall = true;
for (int i = 0; i < _numDestMiscRegs; i++)
this->cpu->setMiscReg(
_destMiscRegIdx[i], _destMiscRegVal[i], this->threadNumber);
this->thread->inSyscall = in_syscall;
}
void forwardOldRegs()
{
for (int idx = 0; idx < this->numDestRegs(); idx++) {
PhysRegIndex prev_phys_reg = this->prevDestRegIdx(idx);
TheISA::RegIndex original_dest_reg = this->staticInst->destRegIdx(idx);
if (original_dest_reg < TheISA::FP_Base_DepTag)
this->setIntRegOperand(this->staticInst.get(), idx, this->cpu->readIntReg(prev_phys_reg));
else if (original_dest_reg < TheISA::Ctrl_Base_DepTag)
this->setFloatRegOperandBits(this->staticInst.get(), idx, this->cpu->readFloatRegBits(prev_phys_reg));
}
}
/** Calls hardware return from error interrupt. */
Fault hwrei();
/** Traps to handle specified fault. */
void trap(Fault fault);
bool simPalCheck(int palFunc);
/** Emulates a syscall. */
void syscall(int64_t callnum);
public:
// The register accessor methods provide the index of the
// instruction's operand (e.g., 0 or 1), not the architectural
// register index, to simplify the implementation of register
// renaming. We find the architectural register index by indexing
// into the instruction's own operand index table. Note that a
// raw pointer to the StaticInst is provided instead of a
// ref-counted StaticInstPtr to redice overhead. This is fine as
// long as these methods don't copy the pointer into any long-term
// storage (which is pretty hard to imagine they would have reason
// to do).
uint64_t readIntRegOperand(const StaticInst *si, int idx)
{
return this->cpu->readIntReg(this->_srcRegIdx[idx]);
}
FloatReg readFloatRegOperand(const StaticInst *si, int idx)
{
return this->cpu->readFloatReg(this->_srcRegIdx[idx]);
}
FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
{
return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]);
}
/** @todo: Make results into arrays so they can handle multiple dest
* registers.
*/
void setIntRegOperand(const StaticInst *si, int idx, uint64_t val)
{
this->cpu->setIntReg(this->_destRegIdx[idx], val);
BaseDynInst<Impl>::setIntRegOperand(si, idx, val);
}
void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
{
this->cpu->setFloatReg(this->_destRegIdx[idx], val);
BaseDynInst<Impl>::setFloatRegOperand(si, idx, val);
}
void setFloatRegOperandBits(const StaticInst *si, int idx,
FloatRegBits val)
{
this->cpu->setFloatRegBits(this->_destRegIdx[idx], val);
BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
}
#if THE_ISA == MIPS_ISA
uint64_t readRegOtherThread(int misc_reg)
{
panic("MIPS MT not defined for O3 CPU.\n");
return 0;
}
void setRegOtherThread(int misc_reg, const TheISA::MiscReg &val)
{
panic("MIPS MT not defined for O3 CPU.\n");
}
#endif
public:
/** Calculates EA part of a memory instruction. Currently unused,
* though it may be useful in the future if we want to split
* memory operations into EA calculation and memory access parts.
*/
Fault calcEA()
{
return this->staticInst->eaCompInst()->execute(this, this->traceData);
}
/** Does the memory access part of a memory instruction. Currently unused,
* though it may be useful in the future if we want to split
* memory operations into EA calculation and memory access parts.
*/
Fault memAccess()
{
return this->staticInst->memAccInst()->execute(this, this->traceData);
}
};
#endif // __CPU_O3_ALPHA_DYN_INST_HH__

View File

@ -0,0 +1,216 @@
/*
* Copyright (c) 2010-2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "base/cp_annotate.hh"
#include "cpu/o3/dyn_inst.hh"
#include "sim/full_system.hh"
template <class Impl>
BaseO3DynInst<Impl>::BaseO3DynInst(StaticInstPtr staticInst,
StaticInstPtr macroop,
TheISA::PCState pc, TheISA::PCState predPC,
InstSeqNum seq_num, O3CPU *cpu)
: BaseDynInst<Impl>(staticInst, macroop, pc, predPC, seq_num, cpu)
{
initVars();
}
template <class Impl>
BaseO3DynInst<Impl>::BaseO3DynInst(StaticInstPtr _staticInst,
StaticInstPtr _macroop)
: BaseDynInst<Impl>(_staticInst, _macroop)
{
initVars();
}
template <class Impl>
void
BaseO3DynInst<Impl>::initVars()
{
// Make sure to have the renamed register entries set to the same
// as the normal register entries. It will allow the IQ to work
// without any modifications.
for (int i = 0; i < this->staticInst->numDestRegs(); i++) {
this->_destRegIdx[i] = this->staticInst->destRegIdx(i);
}
for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i);
}
this->_readySrcRegIdx.reset();
_numDestMiscRegs = 0;
#if TRACING_ON
fetchTick = 0;
decodeTick = 0;
renameTick = 0;
dispatchTick = 0;
issueTick = 0;
completeTick = 0;
#endif
}
template <class Impl>
Fault
BaseO3DynInst<Impl>::execute()
{
// @todo: Pretty convoluted way to avoid squashing from happening
// when using the TC during an instruction's execution
// (specifically for instructions that have side-effects that use
// the TC). Fix this.
bool in_syscall = this->thread->inSyscall;
this->thread->inSyscall = true;
this->fault = this->staticInst->execute(this, this->traceData);
this->thread->inSyscall = in_syscall;
return this->fault;
}
template <class Impl>
Fault
BaseO3DynInst<Impl>::initiateAcc()
{
// @todo: Pretty convoluted way to avoid squashing from happening
// when using the TC during an instruction's execution
// (specifically for instructions that have side-effects that use
// the TC). Fix this.
bool in_syscall = this->thread->inSyscall;
this->thread->inSyscall = true;
this->fault = this->staticInst->initiateAcc(this, this->traceData);
this->thread->inSyscall = in_syscall;
return this->fault;
}
template <class Impl>
Fault
BaseO3DynInst<Impl>::completeAcc(PacketPtr pkt)
{
// @todo: Pretty convoluted way to avoid squashing from happening
// when using the TC during an instruction's execution
// (specifically for instructions that have side-effects that use
// the TC). Fix this.
bool in_syscall = this->thread->inSyscall;
this->thread->inSyscall = true;
if (this->cpu->checker) {
if (this->isStoreConditional()) {
this->reqToVerify->setExtraData(pkt->req->getExtraData());
}
}
this->fault = this->staticInst->completeAcc(pkt, this, this->traceData);
this->thread->inSyscall = in_syscall;
return this->fault;
}
template <class Impl>
Fault
BaseO3DynInst<Impl>::hwrei()
{
#if THE_ISA == ALPHA_ISA
// Can only do a hwrei when in pal mode.
if (!(this->instAddr() & 0x3))
return new AlphaISA::UnimplementedOpcodeFault;
// Set the next PC based on the value of the EXC_ADDR IPR.
AlphaISA::PCState pc = this->pcState();
pc.npc(this->cpu->readMiscRegNoEffect(AlphaISA::IPR_EXC_ADDR,
this->threadNumber));
this->pcState(pc);
if (CPA::available()) {
ThreadContext *tc = this->cpu->tcBase(this->threadNumber);
CPA::cpa()->swAutoBegin(tc, this->nextInstAddr());
}
// Tell CPU to clear any state it needs to if a hwrei is taken.
this->cpu->hwrei(this->threadNumber);
#else
#endif
// FIXME: XXX check for interrupts? XXX
return NoFault;
}
template <class Impl>
void
BaseO3DynInst<Impl>::trap(Fault fault)
{
this->cpu->trap(fault, this->threadNumber, this->staticInst);
}
template <class Impl>
bool
BaseO3DynInst<Impl>::simPalCheck(int palFunc)
{
#if THE_ISA != ALPHA_ISA
panic("simPalCheck called, but PAL only exists in Alpha!\n");
#endif
return this->cpu->simPalCheck(palFunc, this->threadNumber);
}
template <class Impl>
void
BaseO3DynInst<Impl>::syscall(int64_t callnum)
{
if (FullSystem)
panic("Syscall emulation isn't available in FS mode.\n");
// HACK: check CPU's nextPC before and after syscall. If it
// changes, update this instruction's nextPC because the syscall
// must have changed the nextPC.
TheISA::PCState curPC = this->cpu->pcState(this->threadNumber);
this->cpu->syscall(callnum, this->threadNumber);
TheISA::PCState newPC = this->cpu->pcState(this->threadNumber);
if (!(curPC == newPC)) {
this->pcState(newPC);
}
}

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "cpu/o3/fetch_impl.hh"
#include "cpu/o3/isa_specific.hh"
template class DefaultFetch<O3CPUImpl>;

View File

@ -0,0 +1,558 @@
/*
* Copyright (c) 2010-2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Korey Sewell
*/
#ifndef __CPU_O3_FETCH_HH__
#define __CPU_O3_FETCH_HH__
#include "arch/decoder.hh"
#include "arch/utility.hh"
#include "base/statistics.hh"
#include "config/the_isa.hh"
#include "cpu/pc_event.hh"
#include "cpu/timebuf.hh"
#include "cpu/translation.hh"
#include "mem/packet.hh"
#include "mem/port.hh"
#include "sim/eventq.hh"
struct DerivO3CPUParams;
/**
* DefaultFetch class handles both single threaded and SMT fetch. Its
* width is specified by the parameters; each cycle it tries to fetch
* that many instructions. It supports using a branch predictor to
* predict direction and targets.
* It supports the idling functionality of the CPU by indicating to
* the CPU when it is active and inactive.
*/
template <class Impl>
class DefaultFetch
{
public:
/** Typedefs from Impl. */
typedef typename Impl::CPUPol CPUPol;
typedef typename Impl::DynInst DynInst;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::O3CPU O3CPU;
/** Typedefs from the CPU policy. */
typedef typename CPUPol::BPredUnit BPredUnit;
typedef typename CPUPol::FetchStruct FetchStruct;
typedef typename CPUPol::TimeStruct TimeStruct;
/** Typedefs from ISA. */
typedef TheISA::MachInst MachInst;
typedef TheISA::ExtMachInst ExtMachInst;
class FetchTranslation : public BaseTLB::Translation
{
protected:
DefaultFetch<Impl> *fetch;
public:
FetchTranslation(DefaultFetch<Impl> *_fetch)
: fetch(_fetch)
{}
void
markDelayed()
{}
void
finish(Fault fault, RequestPtr req, ThreadContext *tc,
BaseTLB::Mode mode)
{
assert(mode == BaseTLB::Execute);
fetch->finishTranslation(fault, req);
delete this;
}
};
private:
/* Event to delay delivery of a fetch translation result in case of
* a fault and the nop to carry the fault cannot be generated
* immediately */
class FinishTranslationEvent : public Event
{
private:
DefaultFetch<Impl> *fetch;
Fault fault;
RequestPtr req;
public:
FinishTranslationEvent(DefaultFetch<Impl> *_fetch)
: fetch(_fetch)
{}
void setFault(Fault _fault)
{
fault = _fault;
}
void setReq(RequestPtr _req)
{
req = _req;
}
/** Process the delayed finish translation */
void process()
{
assert(fetch->numInst < fetch->fetchWidth);
fetch->finishTranslation(fault, req);
}
const char *description() const
{
return "FullO3CPU FetchFinishTranslation";
}
};
public:
/** Overall fetch status. Used to determine if the CPU can
* deschedule itsef due to a lack of activity.
*/
enum FetchStatus {
Active,
Inactive
};
/** Individual thread status. */
enum ThreadStatus {
Running,
Idle,
Squashing,
Blocked,
Fetching,
TrapPending,
QuiescePending,
SwitchOut,
ItlbWait,
IcacheWaitResponse,
IcacheWaitRetry,
IcacheAccessComplete,
NoGoodAddr
};
/** Fetching Policy, Add new policies here.*/
enum FetchPriority {
SingleThread,
RoundRobin,
Branch,
IQ,
LSQ
};
private:
/** Fetch status. */
FetchStatus _status;
/** Per-thread status. */
ThreadStatus fetchStatus[Impl::MaxThreads];
/** Fetch policy. */
FetchPriority fetchPolicy;
/** List that has the threads organized by priority. */
std::list<ThreadID> priorityList;
public:
/** DefaultFetch constructor. */
DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params);
/** Returns the name of fetch. */
std::string name() const;
/** Registers statistics. */
void regStats();
/** Sets the main backwards communication time buffer pointer. */
void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer);
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Sets pointer to time buffer used to communicate to the next stage. */
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
/** Initialize stage. */
void initStage();
/** Tells the fetch stage that the Icache is set. */
void setIcache();
/** Handles retrying the fetch access. */
void recvRetry();
/** Processes cache completion event. */
void processCacheCompletion(PacketPtr pkt);
/** Begins the drain of the fetch stage. */
bool drain();
/** Resumes execution after a drain. */
void resume();
/** Tells fetch stage to prepare to be switched out. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Checks if the fetch stage is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Tells fetch to wake up from a quiesce instruction. */
void wakeFromQuiesce();
private:
/** Changes the status of this stage to active, and indicates this
* to the CPU.
*/
inline void switchToActive();
/** Changes the status of this stage to inactive, and indicates
* this to the CPU.
*/
inline void switchToInactive();
/**
* Looks up in the branch predictor to see if the next PC should be
* either next PC+=MachInst or a branch target.
* @param next_PC Next PC variable passed in by reference. It is
* expected to be set to the current PC; it will be updated with what
* the next PC will be.
* @param next_NPC Used for ISAs which use delay slots.
* @return Whether or not a branch was predicted as taken.
*/
bool lookupAndUpdateNextPC(DynInstPtr &inst, TheISA::PCState &pc);
/**
* Fetches the cache line that contains fetch_PC. Returns any
* fault that happened. Puts the data into the class variable
* cacheData.
* @param vaddr The memory address that is being fetched from.
* @param ret_fault The fault reference that will be set to the result of
* the icache access.
* @param tid Thread id.
* @param pc The actual PC of the current instruction.
* @return Any fault that occured.
*/
bool fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc);
void finishTranslation(Fault fault, RequestPtr mem_req);
/** Check if an interrupt is pending and that we need to handle
*/
bool
checkInterrupt(Addr pc)
{
return (interruptPending && (THE_ISA != ALPHA_ISA || !(pc & 0x3)));
}
/** Squashes a specific thread and resets the PC. */
inline void doSquash(const TheISA::PCState &newPC,
const DynInstPtr squashInst, ThreadID tid);
/** Squashes a specific thread and resets the PC. Also tells the CPU to
* remove any instructions between fetch and decode that should be sqaushed.
*/
void squashFromDecode(const TheISA::PCState &newPC,
const DynInstPtr squashInst,
const InstSeqNum seq_num, ThreadID tid);
/** Checks if a thread is stalled. */
bool checkStall(ThreadID tid) const;
/** Updates overall fetch stage status; to be called at the end of each
* cycle. */
FetchStatus updateFetchStatus();
public:
/** Squashes a specific thread and resets the PC. Also tells the CPU to
* remove any instructions that are not in the ROB. The source of this
* squash should be the commit stage.
*/
void squash(const TheISA::PCState &newPC, const InstSeqNum seq_num,
DynInstPtr squashInst, ThreadID tid);
/** Ticks the fetch stage, processing all inputs signals and fetching
* as many instructions as possible.
*/
void tick();
/** Checks all input signals and updates the status as necessary.
* @return: Returns if the status has changed due to input signals.
*/
bool checkSignalsAndUpdate(ThreadID tid);
/** Does the actual fetching of instructions and passing them on to the
* next stage.
* @param status_change fetch() sets this variable if there was a status
* change (ie switching to IcacheMissStall).
*/
void fetch(bool &status_change);
/** Align a PC to the start of an I-cache block. */
Addr icacheBlockAlignPC(Addr addr)
{
return (addr & ~(cacheBlkMask));
}
/** The decoder. */
TheISA::Decoder *decoder[Impl::MaxThreads];
private:
DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst,
StaticInstPtr curMacroop, TheISA::PCState thisPC,
TheISA::PCState nextPC, bool trace);
/** Returns the appropriate thread to fetch, given the fetch policy. */
ThreadID getFetchingThread(FetchPriority &fetch_priority);
/** Returns the appropriate thread to fetch using a round robin policy. */
ThreadID roundRobin();
/** Returns the appropriate thread to fetch using the IQ count policy. */
ThreadID iqCount();
/** Returns the appropriate thread to fetch using the LSQ count policy. */
ThreadID lsqCount();
/** Returns the appropriate thread to fetch using the branch count
* policy. */
ThreadID branchCount();
/** Pipeline the next I-cache access to the current one. */
void pipelineIcacheAccesses(ThreadID tid);
/** Profile the reasons of fetch stall. */
void profileStall(ThreadID tid);
private:
/** Pointer to the O3CPU. */
O3CPU *cpu;
/** Time buffer interface. */
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to get decode's information from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromDecode;
/** Wire to get rename's information from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromRename;
/** Wire to get iew's information from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromIEW;
/** Wire to get commit's information from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromCommit;
/** Internal fetch instruction queue. */
TimeBuffer<FetchStruct> *fetchQueue;
//Might be annoying how this name is different than the queue.
/** Wire used to write any information heading to decode. */
typename TimeBuffer<FetchStruct>::wire toDecode;
/** BPredUnit. */
BPredUnit branchPred;
TheISA::PCState pc[Impl::MaxThreads];
Addr fetchOffset[Impl::MaxThreads];
StaticInstPtr macroop[Impl::MaxThreads];
/** Can the fetch stage redirect from an interrupt on this instruction? */
bool delayedCommit[Impl::MaxThreads];
/** Memory request used to access cache. */
RequestPtr memReq[Impl::MaxThreads];
/** Variable that tracks if fetch has written to the time buffer this
* cycle. Used to tell CPU if there is activity this cycle.
*/
bool wroteToTimeBuffer;
/** Tracks how many instructions has been fetched this cycle. */
int numInst;
/** Source of possible stalls. */
struct Stalls {
bool decode;
bool rename;
bool iew;
bool commit;
};
/** Tracks which stages are telling fetch to stall. */
Stalls stalls[Impl::MaxThreads];
/** Decode to fetch delay, in ticks. */
unsigned decodeToFetchDelay;
/** Rename to fetch delay, in ticks. */
unsigned renameToFetchDelay;
/** IEW to fetch delay, in ticks. */
unsigned iewToFetchDelay;
/** Commit to fetch delay, in ticks. */
unsigned commitToFetchDelay;
/** The width of fetch in instructions. */
unsigned fetchWidth;
/** Is the cache blocked? If so no threads can access it. */
bool cacheBlocked;
/** The packet that is waiting to be retried. */
PacketPtr retryPkt;
/** The thread that is waiting on the cache to tell fetch to retry. */
ThreadID retryTid;
/** Cache block size. */
int cacheBlkSize;
/** Mask to get a cache block's address. */
Addr cacheBlkMask;
/** The cache line being fetched. */
uint8_t *cacheData[Impl::MaxThreads];
/** The PC of the cacheline that has been loaded. */
Addr cacheDataPC[Impl::MaxThreads];
/** Whether or not the cache data is valid. */
bool cacheDataValid[Impl::MaxThreads];
/** Size of instructions. */
int instSize;
/** Icache stall statistics. */
Counter lastIcacheStall[Impl::MaxThreads];
/** List of Active Threads */
std::list<ThreadID> *activeThreads;
/** Number of threads. */
ThreadID numThreads;
/** Number of threads that are actively fetching. */
ThreadID numFetchingThreads;
/** Thread ID being fetched. */
ThreadID threadFetched;
/** Checks if there is an interrupt pending. If there is, fetch
* must stop once it is not fetching PAL instructions.
*/
bool interruptPending;
/** Is there a drain pending. */
bool drainPending;
/** Records if fetch is switched out. */
bool switchedOut;
/** Set to true if a pipelined I-cache request should be issued. */
bool issuePipelinedIfetch[Impl::MaxThreads];
/** Event used to delay fault generation of translation faults */
FinishTranslationEvent finishTranslationEvent;
// @todo: Consider making these vectors and tracking on a per thread basis.
/** Stat for total number of cycles stalled due to an icache miss. */
Stats::Scalar icacheStallCycles;
/** Stat for total number of fetched instructions. */
Stats::Scalar fetchedInsts;
/** Total number of fetched branches. */
Stats::Scalar fetchedBranches;
/** Stat for total number of predicted branches. */
Stats::Scalar predictedBranches;
/** Stat for total number of cycles spent fetching. */
Stats::Scalar fetchCycles;
/** Stat for total number of cycles spent squashing. */
Stats::Scalar fetchSquashCycles;
/** Stat for total number of cycles spent waiting for translation */
Stats::Scalar fetchTlbCycles;
/** Stat for total number of cycles spent blocked due to other stages in
* the pipeline.
*/
Stats::Scalar fetchIdleCycles;
/** Total number of cycles spent blocked. */
Stats::Scalar fetchBlockedCycles;
/** Total number of cycles spent in any other state. */
Stats::Scalar fetchMiscStallCycles;
/** Total number of cycles spent in waiting for drains. */
Stats::Scalar fetchPendingDrainCycles;
/** Total number of stall cycles caused by no active threads to run. */
Stats::Scalar fetchNoActiveThreadStallCycles;
/** Total number of stall cycles caused by pending traps. */
Stats::Scalar fetchPendingTrapStallCycles;
/** Total number of stall cycles caused by pending quiesce instructions. */
Stats::Scalar fetchPendingQuiesceStallCycles;
/** Total number of stall cycles caused by I-cache wait retrys. */
Stats::Scalar fetchIcacheWaitRetryStallCycles;
/** Stat for total number of fetched cache lines. */
Stats::Scalar fetchedCacheLines;
/** Total number of outstanding icache accesses that were dropped
* due to a squash.
*/
Stats::Scalar fetchIcacheSquashes;
/** Total number of outstanding tlb accesses that were dropped
* due to a squash.
*/
Stats::Scalar fetchTlbSquashes;
/** Distribution of number of instructions fetched each cycle. */
Stats::Distribution fetchNisnDist;
/** Rate of how often fetch was idle. */
Stats::Formula idleRate;
/** Number of branch fetches per cycle. */
Stats::Formula branchRate;
/** Number of instruction fetched per cycle. */
Stats::Formula fetchRate;
};
#endif //__CPU_O3_FETCH_HH__

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,72 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "base/trace.hh"
#include "cpu/o3/free_list.hh"
#include "debug/FreeList.hh"
SimpleFreeList::SimpleFreeList(ThreadID activeThreads,
unsigned _numLogicalIntRegs,
unsigned _numPhysicalIntRegs,
unsigned _numLogicalFloatRegs,
unsigned _numPhysicalFloatRegs)
: numLogicalIntRegs(_numLogicalIntRegs),
numPhysicalIntRegs(_numPhysicalIntRegs),
numLogicalFloatRegs(_numLogicalFloatRegs),
numPhysicalFloatRegs(_numPhysicalFloatRegs),
numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs)
{
DPRINTF(FreeList, "Creating new free list object.\n");
// Put all of the extra physical registers onto the free list. This
// means excluding all of the base logical registers.
for (PhysRegIndex i = numLogicalIntRegs * activeThreads;
i < numPhysicalIntRegs; ++i)
{
freeIntRegs.push(i);
}
// Put all of the extra physical registers onto the free list. This
// means excluding all of the base logical registers. Because the
// float registers' indices start where the physical registers end,
// some math must be done to determine where the free registers start.
PhysRegIndex i = numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads);
for ( ; i < numPhysicalRegs; ++i)
{
freeFloatRegs.push(i);
}
}
std::string
SimpleFreeList::name() const
{
return "cpu.freelist";
}

View File

@ -0,0 +1,207 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_FREE_LIST_HH__
#define __CPU_O3_FREE_LIST_HH__
#include <iostream>
#include <queue>
#include "arch/registers.hh"
#include "base/misc.hh"
#include "base/trace.hh"
#include "config/the_isa.hh"
#include "cpu/o3/comm.hh"
#include "debug/FreeList.hh"
/**
* FreeList class that simply holds the list of free integer and floating
* point registers. Can request for a free register of either type, and
* also send back free registers of either type. This is a very simple
* class, but it should be sufficient for most implementations. Like all
* other classes, it assumes that the indices for the floating point
* registers starts after the integer registers end. Hence the variable
* numPhysicalIntRegs is logically equivalent to the baseFP dependency.
* Note that while this most likely should be called FreeList, the name
* "FreeList" is used in a typedef within the CPU Policy, and therefore no
* class can be named simply "FreeList".
* @todo: Give a better name to the base FP dependency.
*/
class SimpleFreeList
{
private:
/** The list of free integer registers. */
std::queue<PhysRegIndex> freeIntRegs;
/** The list of free floating point registers. */
std::queue<PhysRegIndex> freeFloatRegs;
/** Number of logical integer registers. */
int numLogicalIntRegs;
/** Number of physical integer registers. */
int numPhysicalIntRegs;
/** Number of logical floating point registers. */
int numLogicalFloatRegs;
/** Number of physical floating point registers. */
int numPhysicalFloatRegs;
/** Total number of physical registers. */
int numPhysicalRegs;
public:
/** Constructs a free list.
* @param activeThreads Number of active threads.
* @param _numLogicalIntRegs Number of logical integer registers.
* @param _numPhysicalIntRegs Number of physical integer registers.
* @param _numLogicalFloatRegs Number of logical fp registers.
* @param _numPhysicalFloatRegs Number of physical fp registers.
*/
SimpleFreeList(ThreadID activeThreads,
unsigned _numLogicalIntRegs,
unsigned _numPhysicalIntRegs,
unsigned _numLogicalFloatRegs,
unsigned _numPhysicalFloatRegs);
/** Gives the name of the freelist. */
std::string name() const;
/** Gets a free integer register. */
inline PhysRegIndex getIntReg();
/** Gets a free fp register. */
inline PhysRegIndex getFloatReg();
/** Adds a register back to the free list. */
inline void addReg(PhysRegIndex freed_reg);
/** Adds an integer register back to the free list. */
inline void addIntReg(PhysRegIndex freed_reg);
/** Adds a fp register back to the free list. */
inline void addFloatReg(PhysRegIndex freed_reg);
/** Checks if there are any free integer registers. */
bool hasFreeIntRegs()
{ return !freeIntRegs.empty(); }
/** Checks if there are any free fp registers. */
bool hasFreeFloatRegs()
{ return !freeFloatRegs.empty(); }
/** Returns the number of free integer registers. */
int numFreeIntRegs()
{ return freeIntRegs.size(); }
/** Returns the number of free fp registers. */
int numFreeFloatRegs()
{ return freeFloatRegs.size(); }
};
inline PhysRegIndex
SimpleFreeList::getIntReg()
{
DPRINTF(FreeList, "Trying to get free integer register.\n");
if (freeIntRegs.empty()) {
panic("No free integer registers!");
}
PhysRegIndex free_reg = freeIntRegs.front();
freeIntRegs.pop();
return(free_reg);
}
inline PhysRegIndex
SimpleFreeList::getFloatReg()
{
DPRINTF(FreeList, "Trying to get free float register.\n");
if (freeFloatRegs.empty()) {
panic("No free integer registers!");
}
PhysRegIndex free_reg = freeFloatRegs.front();
freeFloatRegs.pop();
return(free_reg);
}
inline void
SimpleFreeList::addReg(PhysRegIndex freed_reg)
{
DPRINTF(FreeList,"Freeing register %i.\n", freed_reg);
//Might want to add in a check for whether or not this register is
//already in there. A bit vector or something similar would be useful.
if (freed_reg < numPhysicalIntRegs) {
if (freed_reg != TheISA::ZeroReg)
freeIntRegs.push(freed_reg);
} else if (freed_reg < numPhysicalRegs) {
#if THE_ISA == ALPHA_ISA
if (freed_reg != (TheISA::ZeroReg + numPhysicalIntRegs))
#endif
freeFloatRegs.push(freed_reg);
}
// These assert conditions ensure that the number of free
// registers are not more than the # of total Physical Registers.
// If this were false, it would mean that registers
// have been freed twice, overflowing the free register
// pool and potentially crashing SMT workloads.
// ----
// Comment out for now so as to not potentially break
// CMP and single-threaded workloads
// ----
// assert(freeIntRegs.size() <= numPhysicalIntRegs);
// assert(freeFloatRegs.size() <= numPhysicalFloatRegs);
}
inline void
SimpleFreeList::addIntReg(PhysRegIndex freed_reg)
{
DPRINTF(FreeList,"Freeing int register %i.\n", freed_reg);
freeIntRegs.push(freed_reg);
}
inline void
SimpleFreeList::addFloatReg(PhysRegIndex freed_reg)
{
DPRINTF(FreeList,"Freeing float register %i.\n", freed_reg);
freeFloatRegs.push(freed_reg);
}
#endif // __CPU_O3_FREE_LIST_HH__

View File

@ -0,0 +1,282 @@
/*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include <sstream>
#include "cpu/o3/fu_pool.hh"
#include "cpu/func_unit.hh"
using namespace std;
////////////////////////////////////////////////////////////////////////////
//
// A pool of function units
//
inline void
FUPool::FUIdxQueue::addFU(int fu_idx)
{
funcUnitsIdx.push_back(fu_idx);
++size;
}
inline int
FUPool::FUIdxQueue::getFU()
{
int retval = funcUnitsIdx[idx++];
if (idx == size)
idx = 0;
return retval;
}
FUPool::~FUPool()
{
fuListIterator i = funcUnits.begin();
fuListIterator end = funcUnits.end();
for (; i != end; ++i)
delete *i;
}
// Constructor
FUPool::FUPool(const Params *p)
: SimObject(p)
{
numFU = 0;
funcUnits.clear();
for (int i = 0; i < Num_OpClasses; ++i) {
maxOpLatencies[i] = 0;
maxIssueLatencies[i] = 0;
}
//
// Iterate through the list of FUDescData structures
//
const vector<FUDesc *> &paramList = p->FUList;
for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) {
//
// Don't bother with this if we're not going to create any FU's
//
if ((*i)->number) {
//
// Create the FuncUnit object from this structure
// - add the capabilities listed in the FU's operation
// description
//
// We create the first unit, then duplicate it as needed
//
FuncUnit *fu = new FuncUnit;
OPDDiterator j = (*i)->opDescList.begin();
OPDDiterator end = (*i)->opDescList.end();
for (; j != end; ++j) {
// indicate that this pool has this capability
capabilityList.set((*j)->opClass);
// Add each of the FU's that will have this capability to the
// appropriate queue.
for (int k = 0; k < (*i)->number; ++k)
fuPerCapList[(*j)->opClass].addFU(numFU + k);
// indicate that this FU has the capability
fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat);
if ((*j)->opLat > maxOpLatencies[(*j)->opClass])
maxOpLatencies[(*j)->opClass] = (*j)->opLat;
if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass])
maxIssueLatencies[(*j)->opClass] = (*j)->issueLat;
}
numFU++;
// Add the appropriate number of copies of this FU to the list
ostringstream s;
s << (*i)->name() << "(0)";
fu->name = s.str();
funcUnits.push_back(fu);
for (int c = 1; c < (*i)->number; ++c) {
ostringstream s;
numFU++;
FuncUnit *fu2 = new FuncUnit(*fu);
s << (*i)->name() << "(" << c << ")";
fu2->name = s.str();
funcUnits.push_back(fu2);
}
}
}
unitBusy.resize(numFU);
for (int i = 0; i < numFU; i++) {
unitBusy[i] = false;
}
}
void
FUPool::annotateMemoryUnits(unsigned hit_latency)
{
maxOpLatencies[MemReadOp] = hit_latency;
fuListIterator i = funcUnits.begin();
fuListIterator iend = funcUnits.end();
for (; i != iend; ++i) {
if ((*i)->provides(MemReadOp))
(*i)->opLatency(MemReadOp) = hit_latency;
if ((*i)->provides(MemWriteOp))
(*i)->opLatency(MemWriteOp) = hit_latency;
}
}
int
FUPool::getUnit(OpClass capability)
{
// If this pool doesn't have the specified capability,
// return this information to the caller
if (!capabilityList[capability])
return -2;
int fu_idx = fuPerCapList[capability].getFU();
int start_idx = fu_idx;
// Iterate through the circular queue if needed, stopping if we've reached
// the first element again.
while (unitBusy[fu_idx]) {
fu_idx = fuPerCapList[capability].getFU();
if (fu_idx == start_idx) {
// No FU available
return -1;
}
}
assert(fu_idx < numFU);
unitBusy[fu_idx] = true;
return fu_idx;
}
void
FUPool::freeUnitNextCycle(int fu_idx)
{
assert(unitBusy[fu_idx]);
unitsToBeFreed.push_back(fu_idx);
}
void
FUPool::processFreeUnits()
{
while (!unitsToBeFreed.empty()) {
int fu_idx = unitsToBeFreed.back();
unitsToBeFreed.pop_back();
assert(unitBusy[fu_idx]);
unitBusy[fu_idx] = false;
}
}
void
FUPool::dump()
{
cout << "Function Unit Pool (" << name() << ")\n";
cout << "======================================\n";
cout << "Free List:\n";
for (int i = 0; i < numFU; ++i) {
if (unitBusy[i]) {
continue;
}
cout << " [" << i << "] : ";
cout << funcUnits[i]->name << " ";
cout << "\n";
}
cout << "======================================\n";
cout << "Busy List:\n";
for (int i = 0; i < numFU; ++i) {
if (!unitBusy[i]) {
continue;
}
cout << " [" << i << "] : ";
cout << funcUnits[i]->name << " ";
cout << "\n";
}
}
void
FUPool::switchOut()
{
}
void
FUPool::takeOver()
{
for (int i = 0; i < numFU; i++) {
unitBusy[i] = false;
}
unitsToBeFreed.clear();
}
//
////////////////////////////////////////////////////////////////////////////
//
// The SimObjects we use to get the FU information into the simulator
//
////////////////////////////////////////////////////////////////////////////
//
// FUPool - Contails a list of FUDesc objects to make available
//
//
// The FuPool object
//
FUPool *
FUPoolParams::create()
{
return new FUPool(this);
}

View File

@ -0,0 +1,167 @@
/*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_FU_POOL_HH__
#define __CPU_O3_FU_POOL_HH__
#include <bitset>
#include <list>
#include <string>
#include <vector>
#include "cpu/op_class.hh"
#include "params/FUPool.hh"
#include "sim/sim_object.hh"
class FUDesc;
class FuncUnit;
/**
* Pool of FU's, specific to the new CPU model. The old FU pool had lists of
* free units and busy units, and whenever a FU was needed it would iterate
* through the free units to find a FU that provided the capability. This pool
* has lists of units specific to each of the capabilities, and whenever a FU
* is needed, it iterates through that list to find a free unit. The previous
* FU pool would have to be ticked each cycle to update which units became
* free. This FU pool lets the IEW stage handle freeing units, which frees
* them as their scheduled execution events complete. This limits units in this
* model to either have identical issue and op latencies, or 1 cycle issue
* latencies.
*/
class FUPool : public SimObject
{
private:
/** Maximum op execution latencies, per op class. */
unsigned maxOpLatencies[Num_OpClasses];
/** Maximum issue latencies, per op class. */
unsigned maxIssueLatencies[Num_OpClasses];
/** Bitvector listing capabilities of this FU pool. */
std::bitset<Num_OpClasses> capabilityList;
/** Bitvector listing which FUs are busy. */
std::vector<bool> unitBusy;
/** List of units to be freed at the end of this cycle. */
std::vector<int> unitsToBeFreed;
/**
* Class that implements a circular queue to hold FU indices. The hope is
* that FUs that have been just used will be moved to the end of the queue
* by iterating through it, thus leaving free units at the head of the
* queue.
*/
class FUIdxQueue {
public:
/** Constructs a circular queue of FU indices. */
FUIdxQueue()
: idx(0), size(0)
{ }
/** Adds a FU to the queue. */
inline void addFU(int fu_idx);
/** Returns the index of the FU at the head of the queue, and changes
* the index to the next element.
*/
inline int getFU();
private:
/** Circular queue index. */
int idx;
/** Size of the queue. */
int size;
/** Queue of FU indices. */
std::vector<int> funcUnitsIdx;
};
/** Per op class queues of FUs that provide that capability. */
FUIdxQueue fuPerCapList[Num_OpClasses];
/** Number of FUs. */
int numFU;
/** Functional units. */
std::vector<FuncUnit *> funcUnits;
typedef std::vector<FuncUnit *>::iterator fuListIterator;
public:
typedef FUPoolParams Params;
/** Constructs a FU pool. */
FUPool(const Params *p);
~FUPool();
/** Annotates units that provide memory operations. Included only because
* old FU pool provided this function.
*/
void annotateMemoryUnits(unsigned hit_latency);
/**
* Gets a FU providing the requested capability. Will mark the unit as busy,
* but leaves the freeing of the unit up to the IEW stage.
* @param capability The capability requested.
* @return Returns -2 if the FU pool does not have the capability, -1 if
* there is no free FU, and the FU's index otherwise.
*/
int getUnit(OpClass capability);
/** Frees a FU at the end of this cycle. */
void freeUnitNextCycle(int fu_idx);
/** Frees all FUs on the list. */
void processFreeUnits();
/** Returns the total number of FUs. */
int size() { return numFU; }
/** Debugging function used to dump FU information. */
void dump();
/** Returns the operation execution latency of the given capability. */
unsigned getOpLatency(OpClass capability) {
return maxOpLatencies[capability];
}
/** Returns the issue latency of the given capability. */
unsigned getIssueLatency(OpClass capability) {
return maxIssueLatencies[capability];
}
/** Switches out functional unit pool. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOver();
};
#endif // __CPU_O3_FU_POOL_HH__

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "cpu/o3/iew_impl.hh"
#include "cpu/o3/inst_queue.hh"
#include "cpu/o3/isa_specific.hh"
template class DefaultIEW<O3CPUImpl>;

View File

@ -0,0 +1,548 @@
/*
* Copyright (c) 2010 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_IEW_HH__
#define __CPU_O3_IEW_HH__
#include <queue>
#include <set>
#include "base/statistics.hh"
#include "cpu/o3/comm.hh"
#include "cpu/o3/lsq.hh"
#include "cpu/o3/scoreboard.hh"
#include "cpu/timebuf.hh"
#include "debug/IEW.hh"
struct DerivO3CPUParams;
class FUPool;
/**
* DefaultIEW handles both single threaded and SMT IEW
* (issue/execute/writeback). It handles the dispatching of
* instructions to the LSQ/IQ as part of the issue stage, and has the
* IQ try to issue instructions each cycle. The execute latency is
* actually tied into the issue latency to allow the IQ to be able to
* do back-to-back scheduling without having to speculatively schedule
* instructions. This happens by having the IQ have access to the
* functional units, and the IQ gets the execution latencies from the
* FUs when it issues instructions. Instructions reach the execute
* stage on the last cycle of their execution, which is when the IQ
* knows to wake up any dependent instructions, allowing back to back
* scheduling. The execute portion of IEW separates memory
* instructions from non-memory instructions, either telling the LSQ
* to execute the instruction, or executing the instruction directly.
* The writeback portion of IEW completes the instructions by waking
* up any dependents, and marking the register ready on the
* scoreboard.
*/
template<class Impl>
class DefaultIEW
{
private:
//Typedefs from Impl
typedef typename Impl::CPUPol CPUPol;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::O3CPU O3CPU;
typedef typename CPUPol::IQ IQ;
typedef typename CPUPol::RenameMap RenameMap;
typedef typename CPUPol::LSQ LSQ;
typedef typename CPUPol::TimeStruct TimeStruct;
typedef typename CPUPol::IEWStruct IEWStruct;
typedef typename CPUPol::RenameStruct RenameStruct;
typedef typename CPUPol::IssueStruct IssueStruct;
public:
/** Overall IEW stage status. Used to determine if the CPU can
* deschedule itself due to a lack of activity.
*/
enum Status {
Active,
Inactive
};
/** Status for Issue, Execute, and Writeback stages. */
enum StageStatus {
Running,
Blocked,
Idle,
StartSquash,
Squashing,
Unblocking
};
private:
/** Overall stage status. */
Status _status;
/** Dispatch status. */
StageStatus dispatchStatus[Impl::MaxThreads];
/** Execute status. */
StageStatus exeStatus;
/** Writeback status. */
StageStatus wbStatus;
public:
/** Constructs a DefaultIEW with the given parameters. */
DefaultIEW(O3CPU *_cpu, DerivO3CPUParams *params);
/** Returns the name of the DefaultIEW stage. */
std::string name() const;
/** Registers statistics. */
void regStats();
/** Initializes stage; sends back the number of free IQ and LSQ entries. */
void initStage();
/** Sets main time buffer used for backwards communication. */
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
/** Sets time buffer for getting instructions coming from rename. */
void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
/** Sets time buffer to pass on instructions to commit. */
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *sb_ptr);
/** Drains IEW stage. */
bool drain();
/** Resumes execution after a drain. */
void resume();
/** Completes switch out of IEW stage. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Returns if IEW is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Squashes instructions in IEW for a specific thread. */
void squash(ThreadID tid);
/** Wakes all dependents of a completed instruction. */
void wakeDependents(DynInstPtr &inst);
/** Tells memory dependence unit that a memory instruction needs to be
* rescheduled. It will re-execute once replayMemInst() is called.
*/
void rescheduleMemInst(DynInstPtr &inst);
/** Re-executes all rescheduled memory instructions. */
void replayMemInst(DynInstPtr &inst);
/** Sends an instruction to commit through the time buffer. */
void instToCommit(DynInstPtr &inst);
/** Inserts unused instructions of a thread into the skid buffer. */
void skidInsert(ThreadID tid);
/** Returns the max of the number of entries in all of the skid buffers. */
int skidCount();
/** Returns if all of the skid buffers are empty. */
bool skidsEmpty();
/** Updates overall IEW status based on all of the stages' statuses. */
void updateStatus();
/** Resets entries of the IQ and the LSQ. */
void resetEntries();
/** Tells the CPU to wakeup if it has descheduled itself due to no
* activity. Used mainly by the LdWritebackEvent.
*/
void wakeCPU();
/** Reports to the CPU that there is activity this cycle. */
void activityThisCycle();
/** Tells CPU that the IEW stage is active and running. */
inline void activateStage();
/** Tells CPU that the IEW stage is inactive and idle. */
inline void deactivateStage();
/** Returns if the LSQ has any stores to writeback. */
bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); }
/** Returns if the LSQ has any stores to writeback. */
bool hasStoresToWB(ThreadID tid) { return ldstQueue.hasStoresToWB(tid); }
void incrWb(InstSeqNum &sn)
{
if (++wbOutstanding == wbMax)
ableToIssue = false;
DPRINTF(IEW, "wbOutstanding: %i [sn:%lli]\n", wbOutstanding, sn);
assert(wbOutstanding <= wbMax);
#ifdef DEBUG
wbList.insert(sn);
#endif
}
void decrWb(InstSeqNum &sn)
{
if (wbOutstanding-- == wbMax)
ableToIssue = true;
DPRINTF(IEW, "wbOutstanding: %i [sn:%lli]\n", wbOutstanding, sn);
assert(wbOutstanding >= 0);
#ifdef DEBUG
assert(wbList.find(sn) != wbList.end());
wbList.erase(sn);
#endif
}
#ifdef DEBUG
std::set<InstSeqNum> wbList;
void dumpWb()
{
std::set<InstSeqNum>::iterator wb_it = wbList.begin();
while (wb_it != wbList.end()) {
cprintf("[sn:%lli]\n",
(*wb_it));
wb_it++;
}
}
#endif
bool canIssue() { return ableToIssue; }
bool ableToIssue;
/** Check misprediction */
void checkMisprediction(DynInstPtr &inst);
private:
/** Sends commit proper information for a squash due to a branch
* mispredict.
*/
void squashDueToBranch(DynInstPtr &inst, ThreadID tid);
/** Sends commit proper information for a squash due to a memory order
* violation.
*/
void squashDueToMemOrder(DynInstPtr &inst, ThreadID tid);
/** Sends commit proper information for a squash due to memory becoming
* blocked (younger issued instructions must be retried).
*/
void squashDueToMemBlocked(DynInstPtr &inst, ThreadID tid);
/** Sets Dispatch to blocked, and signals back to other stages to block. */
void block(ThreadID tid);
/** Unblocks Dispatch if the skid buffer is empty, and signals back to
* other stages to unblock.
*/
void unblock(ThreadID tid);
/** Determines proper actions to take given Dispatch's status. */
void dispatch(ThreadID tid);
/** Dispatches instructions to IQ and LSQ. */
void dispatchInsts(ThreadID tid);
/** Executes instructions. In the case of memory operations, it informs the
* LSQ to execute the instructions. Also handles any redirects that occur
* due to the executed instructions.
*/
void executeInsts();
/** Writebacks instructions. In our model, the instruction's execute()
* function atomically reads registers, executes, and writes registers.
* Thus this writeback only wakes up dependent instructions, and informs
* the scoreboard of registers becoming ready.
*/
void writebackInsts();
/** Returns the number of valid, non-squashed instructions coming from
* rename to dispatch.
*/
unsigned validInstsFromRename();
/** Reads the stall signals. */
void readStallSignals(ThreadID tid);
/** Checks if any of the stall conditions are currently true. */
bool checkStall(ThreadID tid);
/** Processes inputs and changes state accordingly. */
void checkSignalsAndUpdate(ThreadID tid);
/** Removes instructions from rename from a thread's instruction list. */
void emptyRenameInsts(ThreadID tid);
/** Sorts instructions coming from rename into lists separated by thread. */
void sortInsts();
public:
/** Ticks IEW stage, causing Dispatch, the IQ, the LSQ, Execute, and
* Writeback to run for one cycle.
*/
void tick();
private:
/** Updates execution stats based on the instruction. */
void updateExeInstStats(DynInstPtr &inst);
/** Pointer to main time buffer used for backwards communication. */
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to write information heading to previous stages. */
typename TimeBuffer<TimeStruct>::wire toFetch;
/** Wire to get commit's output from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromCommit;
/** Wire to write information heading to previous stages. */
typename TimeBuffer<TimeStruct>::wire toRename;
/** Rename instruction queue interface. */
TimeBuffer<RenameStruct> *renameQueue;
/** Wire to get rename's output from rename queue. */
typename TimeBuffer<RenameStruct>::wire fromRename;
/** Issue stage queue. */
TimeBuffer<IssueStruct> issueToExecQueue;
/** Wire to read information from the issue stage time queue. */
typename TimeBuffer<IssueStruct>::wire fromIssue;
/**
* IEW stage time buffer. Holds ROB indices of instructions that
* can be marked as completed.
*/
TimeBuffer<IEWStruct> *iewQueue;
/** Wire to write infromation heading to commit. */
typename TimeBuffer<IEWStruct>::wire toCommit;
/** Queue of all instructions coming from rename this cycle. */
std::queue<DynInstPtr> insts[Impl::MaxThreads];
/** Skid buffer between rename and IEW. */
std::queue<DynInstPtr> skidBuffer[Impl::MaxThreads];
/** Scoreboard pointer. */
Scoreboard* scoreboard;
private:
/** CPU pointer. */
O3CPU *cpu;
/** Records if IEW has written to the time buffer this cycle, so that the
* CPU can deschedule itself if there is no activity.
*/
bool wroteToTimeBuffer;
/** Source of possible stalls. */
struct Stalls {
bool commit;
};
/** Stages that are telling IEW to stall. */
Stalls stalls[Impl::MaxThreads];
/** Debug function to print instructions that are issued this cycle. */
void printAvailableInsts();
public:
/** Instruction queue. */
IQ instQueue;
/** Load / store queue. */
LSQ ldstQueue;
/** Pointer to the functional unit pool. */
FUPool *fuPool;
/** Records if the LSQ needs to be updated on the next cycle, so that
* IEW knows if there will be activity on the next cycle.
*/
bool updateLSQNextCycle;
private:
/** Records if there is a fetch redirect on this cycle for each thread. */
bool fetchRedirect[Impl::MaxThreads];
/** Records if the queues have been changed (inserted or issued insts),
* so that IEW knows to broadcast the updated amount of free entries.
*/
bool updatedQueues;
/** Commit to IEW delay, in ticks. */
unsigned commitToIEWDelay;
/** Rename to IEW delay, in ticks. */
unsigned renameToIEWDelay;
/**
* Issue to execute delay, in ticks. What this actually represents is
* the amount of time it takes for an instruction to wake up, be
* scheduled, and sent to a FU for execution.
*/
unsigned issueToExecuteDelay;
/** Width of dispatch, in instructions. */
unsigned dispatchWidth;
/** Width of issue, in instructions. */
unsigned issueWidth;
/** Index into queue of instructions being written back. */
unsigned wbNumInst;
/** Cycle number within the queue of instructions being written back.
* Used in case there are too many instructions writing back at the current
* cycle and writesbacks need to be scheduled for the future. See comments
* in instToCommit().
*/
unsigned wbCycle;
/** Number of instructions in flight that will writeback. */
/** Number of instructions in flight that will writeback. */
int wbOutstanding;
/** Writeback width. */
unsigned wbWidth;
/** Writeback width * writeback depth, where writeback depth is
* the number of cycles of writing back instructions that can be
* buffered. */
unsigned wbMax;
/** Number of active threads. */
ThreadID numThreads;
/** Pointer to list of active threads. */
std::list<ThreadID> *activeThreads;
/** Maximum size of the skid buffer. */
unsigned skidBufferMax;
/** Is this stage switched out. */
bool switchedOut;
/** Stat for total number of idle cycles. */
Stats::Scalar iewIdleCycles;
/** Stat for total number of squashing cycles. */
Stats::Scalar iewSquashCycles;
/** Stat for total number of blocking cycles. */
Stats::Scalar iewBlockCycles;
/** Stat for total number of unblocking cycles. */
Stats::Scalar iewUnblockCycles;
/** Stat for total number of instructions dispatched. */
Stats::Scalar iewDispatchedInsts;
/** Stat for total number of squashed instructions dispatch skips. */
Stats::Scalar iewDispSquashedInsts;
/** Stat for total number of dispatched load instructions. */
Stats::Scalar iewDispLoadInsts;
/** Stat for total number of dispatched store instructions. */
Stats::Scalar iewDispStoreInsts;
/** Stat for total number of dispatched non speculative instructions. */
Stats::Scalar iewDispNonSpecInsts;
/** Stat for number of times the IQ becomes full. */
Stats::Scalar iewIQFullEvents;
/** Stat for number of times the LSQ becomes full. */
Stats::Scalar iewLSQFullEvents;
/** Stat for total number of memory ordering violation events. */
Stats::Scalar memOrderViolationEvents;
/** Stat for total number of incorrect predicted taken branches. */
Stats::Scalar predictedTakenIncorrect;
/** Stat for total number of incorrect predicted not taken branches. */
Stats::Scalar predictedNotTakenIncorrect;
/** Stat for total number of mispredicted branches detected at execute. */
Stats::Formula branchMispredicts;
/** Stat for total number of executed instructions. */
Stats::Scalar iewExecutedInsts;
/** Stat for total number of executed load instructions. */
Stats::Vector iewExecLoadInsts;
/** Stat for total number of executed store instructions. */
// Stats::Scalar iewExecStoreInsts;
/** Stat for total number of squashed instructions skipped at execute. */
Stats::Scalar iewExecSquashedInsts;
/** Number of executed software prefetches. */
Stats::Vector iewExecutedSwp;
/** Number of executed nops. */
Stats::Vector iewExecutedNop;
/** Number of executed meomory references. */
Stats::Vector iewExecutedRefs;
/** Number of executed branches. */
Stats::Vector iewExecutedBranches;
/** Number of executed store instructions. */
Stats::Formula iewExecStoreInsts;
/** Number of instructions executed per cycle. */
Stats::Formula iewExecRate;
/** Number of instructions sent to commit. */
Stats::Vector iewInstsToCommit;
/** Number of instructions that writeback. */
Stats::Vector writebackCount;
/** Number of instructions that wake consumers. */
Stats::Vector producerInst;
/** Number of instructions that wake up from producers. */
Stats::Vector consumerInst;
/** Number of instructions that were delayed in writing back due
* to resource contention.
*/
Stats::Vector wbPenalized;
/** Number of instructions per cycle written back. */
Stats::Formula wbRate;
/** Average number of woken instructions per writeback. */
Stats::Formula wbFanout;
/** Number of instructions per cycle delayed in writing back . */
Stats::Formula wbPenalizedRate;
};
#endif // __CPU_O3_IEW_HH__

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,84 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_IMPL_HH__
#define __CPU_O3_IMPL_HH__
#include "arch/isa_traits.hh"
#include "config/the_isa.hh"
#include "cpu/o3/cpu_policy.hh"
// Forward declarations.
template <class Impl>
class BaseO3DynInst;
template <class Impl>
class FullO3CPU;
/** Implementation specific struct that defines several key types to the
* CPU, the stages within the CPU, the time buffers, and the DynInst.
* The struct defines the ISA, the CPU policy, the specific DynInst, the
* specific O3CPU, and all of the structs from the time buffers to do
* communication.
* This is one of the key things that must be defined for each hardware
* specific CPU implementation.
*/
struct O3CPUImpl
{
/** The type of MachInst. */
typedef TheISA::MachInst MachInst;
/** The CPU policy to be used, which defines all of the CPU stages. */
typedef SimpleCPUPolicy<O3CPUImpl> CPUPol;
/** The DynInst type to be used. */
typedef BaseO3DynInst<O3CPUImpl> DynInst;
/** The refcounted DynInst pointer to be used. In most cases this is
* what should be used, and not DynInst *.
*/
typedef RefCountingPtr<DynInst> DynInstPtr;
/** The O3CPU type to be used. */
typedef FullO3CPU<O3CPUImpl> O3CPU;
/** Same typedef, but for CPUType. BaseDynInst may not always use
* an O3 CPU, so it's clearer to call it CPUType instead in that
* case.
*/
typedef O3CPU CPUType;
enum {
MaxWidth = 8,
MaxThreads = 4
};
};
#endif // __CPU_O3_SPARC_IMPL_HH__

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "cpu/o3/inst_queue_impl.hh"
#include "cpu/o3/isa_specific.hh"
// Force instantiation of InstructionQueue.
template class InstructionQueue<O3CPUImpl>;

View File

@ -0,0 +1,538 @@
/*
* Copyright (c) 2011 ARM Limited
* All rights reserved.
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_INST_QUEUE_HH__
#define __CPU_O3_INST_QUEUE_HH__
#include <list>
#include <map>
#include <queue>
#include <vector>
#include "base/statistics.hh"
#include "base/types.hh"
#include "cpu/o3/dep_graph.hh"
#include "cpu/inst_seq.hh"
#include "cpu/op_class.hh"
#include "cpu/timebuf.hh"
#include "sim/eventq.hh"
struct DerivO3CPUParams;
class FUPool;
class MemInterface;
/**
* A standard instruction queue class. It holds ready instructions, in
* order, in seperate priority queues to facilitate the scheduling of
* instructions. The IQ uses a separate linked list to track dependencies.
* Similar to the rename map and the free list, it expects that
* floating point registers have their indices start after the integer
* registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
* and 96-191 are fp). This remains true even for both logical and
* physical register indices. The IQ depends on the memory dependence unit to
* track when memory operations are ready in terms of ordering; register
* dependencies are tracked normally. Right now the IQ also handles the
* execution timing; this is mainly to allow back-to-back scheduling without
* requiring IEW to be able to peek into the IQ. At the end of the execution
* latency, the instruction is put into the queue to execute, where it will
* have the execute() function called on it.
* @todo: Make IQ able to handle multiple FU pools.
*/
template <class Impl>
class InstructionQueue
{
public:
//Typedefs from the Impl.
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::CPUPol::IEW IEW;
typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
typedef typename Impl::CPUPol::IssueStruct IssueStruct;
typedef typename Impl::CPUPol::TimeStruct TimeStruct;
// Typedef of iterator through the list of instructions.
typedef typename std::list<DynInstPtr>::iterator ListIt;
/** FU completion event class. */
class FUCompletion : public Event {
private:
/** Executing instruction. */
DynInstPtr inst;
/** Index of the FU used for executing. */
int fuIdx;
/** Pointer back to the instruction queue. */
InstructionQueue<Impl> *iqPtr;
/** Should the FU be added to the list to be freed upon
* completing this event.
*/
bool freeFU;
public:
/** Construct a FU completion event. */
FUCompletion(DynInstPtr &_inst, int fu_idx,
InstructionQueue<Impl> *iq_ptr);
virtual void process();
virtual const char *description() const;
void setFreeFU() { freeFU = true; }
};
/** Constructs an IQ. */
InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params);
/** Destructs the IQ. */
~InstructionQueue();
/** Returns the name of the IQ. */
std::string name() const;
/** Registers statistics. */
void regStats();
/** Resets all instruction queue state. */
void resetState();
/** Sets active threads list. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Sets the timer buffer between issue and execute. */
void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
/** Sets the global time buffer. */
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
/** Switches out the instruction queue. */
void switchOut();
/** Takes over execution from another CPU's thread. */
void takeOverFrom();
/** Returns if the IQ is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Number of entries needed for given amount of threads. */
int entryAmount(ThreadID num_threads);
/** Resets max entries for all threads. */
void resetEntries();
/** Returns total number of free entries. */
unsigned numFreeEntries();
/** Returns number of free entries for a thread. */
unsigned numFreeEntries(ThreadID tid);
/** Returns whether or not the IQ is full. */
bool isFull();
/** Returns whether or not the IQ is full for a specific thread. */
bool isFull(ThreadID tid);
/** Returns if there are any ready instructions in the IQ. */
bool hasReadyInsts();
/** Inserts a new instruction into the IQ. */
void insert(DynInstPtr &new_inst);
/** Inserts a new, non-speculative instruction into the IQ. */
void insertNonSpec(DynInstPtr &new_inst);
/** Inserts a memory or write barrier into the IQ to make sure
* loads and stores are ordered properly.
*/
void insertBarrier(DynInstPtr &barr_inst);
/** Returns the oldest scheduled instruction, and removes it from
* the list of instructions waiting to execute.
*/
DynInstPtr getInstToExecute();
/** Returns a memory instruction that was referred due to a delayed DTB
* translation if it is now ready to execute.
*/
DynInstPtr getDeferredMemInstToExecute();
/**
* Records the instruction as the producer of a register without
* adding it to the rest of the IQ.
*/
void recordProducer(DynInstPtr &inst)
{ addToProducers(inst); }
/** Process FU completion event. */
void processFUCompletion(DynInstPtr &inst, int fu_idx);
/**
* Schedules ready instructions, adding the ready ones (oldest first) to
* the queue to execute.
*/
void scheduleReadyInsts();
/** Schedules a single specific non-speculative instruction. */
void scheduleNonSpec(const InstSeqNum &inst);
/**
* Commits all instructions up to and including the given sequence number,
* for a specific thread.
*/
void commit(const InstSeqNum &inst, ThreadID tid = 0);
/** Wakes all dependents of a completed instruction. */
int wakeDependents(DynInstPtr &completed_inst);
/** Adds a ready memory instruction to the ready list. */
void addReadyMemInst(DynInstPtr &ready_inst);
/**
* Reschedules a memory instruction. It will be ready to issue once
* replayMemInst() is called.
*/
void rescheduleMemInst(DynInstPtr &resched_inst);
/** Replays a memory instruction. It must be rescheduled first. */
void replayMemInst(DynInstPtr &replay_inst);
/** Completes a memory operation. */
void completeMemInst(DynInstPtr &completed_inst);
/**
* Defers a memory instruction when its DTB translation incurs a hw
* page table walk.
*/
void deferMemInst(DynInstPtr &deferred_inst);
/** Indicates an ordering violation between a store and a load. */
void violation(DynInstPtr &store, DynInstPtr &faulting_load);
/**
* Squashes instructions for a thread. Squashing information is obtained
* from the time buffer.
*/
void squash(ThreadID tid);
/** Returns the number of used entries for a thread. */
unsigned getCount(ThreadID tid) { return count[tid]; };
/** Debug function to print all instructions. */
void printInsts();
private:
/** Does the actual squashing. */
void doSquash(ThreadID tid);
/////////////////////////
// Various pointers
/////////////////////////
/** Pointer to the CPU. */
O3CPU *cpu;
/** Cache interface. */
MemInterface *dcacheInterface;
/** Pointer to IEW stage. */
IEW *iewStage;
/** The memory dependence unit, which tracks/predicts memory dependences
* between instructions.
*/
MemDepUnit memDepUnit[Impl::MaxThreads];
/** The queue to the execute stage. Issued instructions will be written
* into it.
*/
TimeBuffer<IssueStruct> *issueToExecuteQueue;
/** The backwards time buffer. */
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to read information from timebuffer. */
typename TimeBuffer<TimeStruct>::wire fromCommit;
/** Function unit pool. */
FUPool *fuPool;
//////////////////////////////////////
// Instruction lists, ready queues, and ordering
//////////////////////////////////////
/** List of all the instructions in the IQ (some of which may be issued). */
std::list<DynInstPtr> instList[Impl::MaxThreads];
/** List of instructions that are ready to be executed. */
std::list<DynInstPtr> instsToExecute;
/** List of instructions waiting for their DTB translation to
* complete (hw page table walk in progress).
*/
std::list<DynInstPtr> deferredMemInsts;
/**
* Struct for comparing entries to be added to the priority queue.
* This gives reverse ordering to the instructions in terms of
* sequence numbers: the instructions with smaller sequence
* numbers (and hence are older) will be at the top of the
* priority queue.
*/
struct pqCompare {
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
{
return lhs->seqNum > rhs->seqNum;
}
};
typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
ReadyInstQueue;
/** List of ready instructions, per op class. They are separated by op
* class to allow for easy mapping to FUs.
*/
ReadyInstQueue readyInsts[Num_OpClasses];
/** List of non-speculative instructions that will be scheduled
* once the IQ gets a signal from commit. While it's redundant to
* have the key be a part of the value (the sequence number is stored
* inside of DynInst), when these instructions are woken up only
* the sequence number will be available. Thus it is most efficient to be
* able to search by the sequence number alone.
*/
std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
/** Entry for the list age ordering by op class. */
struct ListOrderEntry {
OpClass queueType;
InstSeqNum oldestInst;
};
/** List that contains the age order of the oldest instruction of each
* ready queue. Used to select the oldest instruction available
* among op classes.
* @todo: Might be better to just move these entries around instead
* of creating new ones every time the position changes due to an
* instruction issuing. Not sure std::list supports this.
*/
std::list<ListOrderEntry> listOrder;
typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
/** Tracks if each ready queue is on the age order list. */
bool queueOnList[Num_OpClasses];
/** Iterators of each ready queue. Points to their spot in the age order
* list.
*/
ListOrderIt readyIt[Num_OpClasses];
/** Add an op class to the age order list. */
void addToOrderList(OpClass op_class);
/**
* Called when the oldest instruction has been removed from a ready queue;
* this places that ready queue into the proper spot in the age order list.
*/
void moveToYoungerInst(ListOrderIt age_order_it);
DependencyGraph<DynInstPtr> dependGraph;
//////////////////////////////////////
// Various parameters
//////////////////////////////////////
/** IQ Resource Sharing Policy */
enum IQPolicy {
Dynamic,
Partitioned,
Threshold
};
/** IQ sharing policy for SMT. */
IQPolicy iqPolicy;
/** Number of Total Threads*/
ThreadID numThreads;
/** Pointer to list of active threads. */
std::list<ThreadID> *activeThreads;
/** Per Thread IQ count */
unsigned count[Impl::MaxThreads];
/** Max IQ Entries Per Thread */
unsigned maxEntries[Impl::MaxThreads];
/** Number of free IQ entries left. */
unsigned freeEntries;
/** The number of entries in the instruction queue. */
unsigned numEntries;
/** The total number of instructions that can be issued in one cycle. */
unsigned totalWidth;
/** The number of physical registers in the CPU. */
unsigned numPhysRegs;
/** The number of physical integer registers in the CPU. */
unsigned numPhysIntRegs;
/** The number of floating point registers in the CPU. */
unsigned numPhysFloatRegs;
/** Delay between commit stage and the IQ.
* @todo: Make there be a distinction between the delays within IEW.
*/
unsigned commitToIEWDelay;
/** Is the IQ switched out. */
bool switchedOut;
/** The sequence number of the squashed instruction. */
InstSeqNum squashedSeqNum[Impl::MaxThreads];
/** A cache of the recently woken registers. It is 1 if the register
* has been woken up recently, and 0 if the register has been added
* to the dependency graph and has not yet received its value. It
* is basically a secondary scoreboard, and should pretty much mirror
* the scoreboard that exists in the rename map.
*/
std::vector<bool> regScoreboard;
/** Adds an instruction to the dependency graph, as a consumer. */
bool addToDependents(DynInstPtr &new_inst);
/** Adds an instruction to the dependency graph, as a producer. */
void addToProducers(DynInstPtr &new_inst);
/** Moves an instruction to the ready queue if it is ready. */
void addIfReady(DynInstPtr &inst);
/** Debugging function to count how many entries are in the IQ. It does
* a linear walk through the instructions, so do not call this function
* during normal execution.
*/
int countInsts();
/** Debugging function to dump all the list sizes, as well as print
* out the list of nonspeculative instructions. Should not be used
* in any other capacity, but it has no harmful sideaffects.
*/
void dumpLists();
/** Debugging function to dump out all instructions that are in the
* IQ.
*/
void dumpInsts();
/** Stat for number of instructions added. */
Stats::Scalar iqInstsAdded;
/** Stat for number of non-speculative instructions added. */
Stats::Scalar iqNonSpecInstsAdded;
Stats::Scalar iqInstsIssued;
/** Stat for number of integer instructions issued. */
Stats::Scalar iqIntInstsIssued;
/** Stat for number of floating point instructions issued. */
Stats::Scalar iqFloatInstsIssued;
/** Stat for number of branch instructions issued. */
Stats::Scalar iqBranchInstsIssued;
/** Stat for number of memory instructions issued. */
Stats::Scalar iqMemInstsIssued;
/** Stat for number of miscellaneous instructions issued. */
Stats::Scalar iqMiscInstsIssued;
/** Stat for number of squashed instructions that were ready to issue. */
Stats::Scalar iqSquashedInstsIssued;
/** Stat for number of squashed instructions examined when squashing. */
Stats::Scalar iqSquashedInstsExamined;
/** Stat for number of squashed instruction operands examined when
* squashing.
*/
Stats::Scalar iqSquashedOperandsExamined;
/** Stat for number of non-speculative instructions removed due to a squash.
*/
Stats::Scalar iqSquashedNonSpecRemoved;
// Also include number of instructions rescheduled and replayed.
/** Distribution of number of instructions in the queue.
* @todo: Need to create struct to track the entry time for each
* instruction. */
// Stats::VectorDistribution queueResDist;
/** Distribution of the number of instructions issued. */
Stats::Distribution numIssuedDist;
/** Distribution of the cycles it takes to issue an instruction.
* @todo: Need to create struct to track the ready time for each
* instruction. */
// Stats::VectorDistribution issueDelayDist;
/** Number of times an instruction could not be issued because a
* FU was busy.
*/
Stats::Vector statFuBusy;
// Stats::Vector dist_unissued;
/** Stat for total number issued for each instruction type. */
Stats::Vector2d statIssuedInstType;
/** Number of instructions issued per cycle. */
Stats::Formula issueRate;
/** Number of times the FU was busy. */
Stats::Vector fuBusy;
/** Number of times the FU was busy per instruction issued. */
Stats::Formula fuBusyRate;
public:
Stats::Scalar intInstQueueReads;
Stats::Scalar intInstQueueWrites;
Stats::Scalar intInstQueueWakeupAccesses;
Stats::Scalar fpInstQueueReads;
Stats::Scalar fpInstQueueWrites;
Stats::Scalar fpInstQueueWakeupQccesses;
Stats::Scalar intAluAccesses;
Stats::Scalar fpAluAccesses;
};
#endif //__CPU_O3_INST_QUEUE_HH__

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,33 @@
/*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Korey Sewell
*/
#include "cpu/o3/dyn_inst.hh"
#include "cpu/o3/impl.hh"
#include "cpu/base.hh"

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Korey Sewell
*/
#include "cpu/o3/isa_specific.hh"
#include "cpu/o3/lsq_impl.hh"
// Force the instantiation of LDSTQ for all the implementations we care about.
template class LSQ<O3CPUImpl>;

View File

@ -0,0 +1,359 @@
/*
* Copyright (c) 2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Korey Sewell
*/
#ifndef __CPU_O3_LSQ_HH__
#define __CPU_O3_LSQ_HH__
#include <map>
#include <queue>
#include "cpu/o3/lsq_unit.hh"
#include "cpu/inst_seq.hh"
#include "mem/port.hh"
#include "sim/sim_object.hh"
struct DerivO3CPUParams;
template <class Impl>
class LSQ {
public:
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::CPUPol::IEW IEW;
typedef typename Impl::CPUPol::LSQUnit LSQUnit;
/** SMT policy. */
enum LSQPolicy {
Dynamic,
Partitioned,
Threshold
};
/** Constructs an LSQ with the given parameters. */
LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params);
/** Returns the name of the LSQ. */
std::string name() const;
/** Registers statistics of each LSQ unit. */
void regStats();
/** Sets the pointer to the list of active threads. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Switches out the LSQ. */
void switchOut();
/** Takes over execution from another CPU's thread. */
void takeOverFrom();
/** Number of entries needed for the given amount of threads.*/
int entryAmount(ThreadID num_threads);
void removeEntries(ThreadID tid);
/** Reset the max entries for each thread. */
void resetEntries();
/** Resize the max entries for a thread. */
void resizeEntries(unsigned size, ThreadID tid);
/** Ticks the LSQ. */
void tick();
/** Ticks a specific LSQ Unit. */
void tick(ThreadID tid)
{ thread[tid].tick(); }
/** Inserts a load into the LSQ. */
void insertLoad(DynInstPtr &load_inst);
/** Inserts a store into the LSQ. */
void insertStore(DynInstPtr &store_inst);
/** Executes a load. */
Fault executeLoad(DynInstPtr &inst);
/** Executes a store. */
Fault executeStore(DynInstPtr &inst);
/**
* Commits loads up until the given sequence number for a specific thread.
*/
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
{ thread[tid].commitLoads(youngest_inst); }
/**
* Commits stores up until the given sequence number for a specific thread.
*/
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
{ thread[tid].commitStores(youngest_inst); }
/**
* Attempts to write back stores until all cache ports are used or the
* interface becomes blocked.
*/
void writebackStores();
/** Same as above, but only for one thread. */
void writebackStores(ThreadID tid);
/**
* Squash instructions from a thread until the specified sequence number.
*/
void squash(const InstSeqNum &squashed_num, ThreadID tid)
{ thread[tid].squash(squashed_num); }
/** Returns whether or not there was a memory ordering violation. */
bool violation();
/**
* Returns whether or not there was a memory ordering violation for a
* specific thread.
*/
bool violation(ThreadID tid)
{ return thread[tid].violation(); }
/** Returns if a load is blocked due to the memory system for a specific
* thread.
*/
bool loadBlocked(ThreadID tid)
{ return thread[tid].loadBlocked(); }
bool isLoadBlockedHandled(ThreadID tid)
{ return thread[tid].isLoadBlockedHandled(); }
void setLoadBlockedHandled(ThreadID tid)
{ thread[tid].setLoadBlockedHandled(); }
/** Gets the instruction that caused the memory ordering violation. */
DynInstPtr getMemDepViolator(ThreadID tid)
{ return thread[tid].getMemDepViolator(); }
/** Returns the head index of the load queue for a specific thread. */
int getLoadHead(ThreadID tid)
{ return thread[tid].getLoadHead(); }
/** Returns the sequence number of the head of the load queue. */
InstSeqNum getLoadHeadSeqNum(ThreadID tid)
{
return thread[tid].getLoadHeadSeqNum();
}
/** Returns the head index of the store queue. */
int getStoreHead(ThreadID tid)
{ return thread[tid].getStoreHead(); }
/** Returns the sequence number of the head of the store queue. */
InstSeqNum getStoreHeadSeqNum(ThreadID tid)
{
return thread[tid].getStoreHeadSeqNum();
}
/** Returns the number of instructions in all of the queues. */
int getCount();
/** Returns the number of instructions in the queues of one thread. */
int getCount(ThreadID tid)
{ return thread[tid].getCount(); }
/** Returns the total number of loads in the load queue. */
int numLoads();
/** Returns the total number of loads for a single thread. */
int numLoads(ThreadID tid)
{ return thread[tid].numLoads(); }
/** Returns the total number of stores in the store queue. */
int numStores();
/** Returns the total number of stores for a single thread. */
int numStores(ThreadID tid)
{ return thread[tid].numStores(); }
/** Returns the total number of loads that are ready. */
int numLoadsReady();
/** Returns the number of loads that are ready for a single thread. */
int numLoadsReady(ThreadID tid)
{ return thread[tid].numLoadsReady(); }
/** Returns the number of free entries. */
unsigned numFreeEntries();
/** Returns the number of free entries for a specific thread. */
unsigned numFreeEntries(ThreadID tid);
/** Returns if the LSQ is full (either LQ or SQ is full). */
bool isFull();
/**
* Returns if the LSQ is full for a specific thread (either LQ or SQ is
* full).
*/
bool isFull(ThreadID tid);
/** Returns if any of the LQs are full. */
bool lqFull();
/** Returns if the LQ of a given thread is full. */
bool lqFull(ThreadID tid);
/** Returns if any of the SQs are full. */
bool sqFull();
/** Returns if the SQ of a given thread is full. */
bool sqFull(ThreadID tid);
/**
* Returns if the LSQ is stalled due to a memory operation that must be
* replayed.
*/
bool isStalled();
/**
* Returns if the LSQ of a specific thread is stalled due to a memory
* operation that must be replayed.
*/
bool isStalled(ThreadID tid);
/** Returns whether or not there are any stores to write back to memory. */
bool hasStoresToWB();
/** Returns whether or not a specific thread has any stores to write back
* to memory.
*/
bool hasStoresToWB(ThreadID tid)
{ return thread[tid].hasStoresToWB(); }
/** Returns the number of stores a specific thread has to write back. */
int numStoresToWB(ThreadID tid)
{ return thread[tid].numStoresToWB(); }
/** Returns if the LSQ will write back to memory this cycle. */
bool willWB();
/** Returns if the LSQ of a specific thread will write back to memory this
* cycle.
*/
bool willWB(ThreadID tid)
{ return thread[tid].willWB(); }
/** Returns if the cache is currently blocked. */
bool cacheBlocked()
{ return retryTid != InvalidThreadID; }
/** Sets the retry thread id, indicating that one of the LSQUnits
* tried to access the cache but the cache was blocked. */
void setRetryTid(ThreadID tid)
{ retryTid = tid; }
/** Debugging function to print out all instructions. */
void dumpInsts();
/** Debugging function to print out instructions from a specific thread. */
void dumpInsts(ThreadID tid)
{ thread[tid].dumpInsts(); }
/** Executes a read operation, using the load specified at the load
* index.
*/
Fault read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
uint8_t *data, int load_idx);
/** Executes a store operation, using the store specified at the store
* index.
*/
Fault write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
uint8_t *data, int store_idx);
/**
* Retry the previous send that failed.
*/
void recvRetry();
/**
* Handles writing back and completing the load or store that has
* returned from memory.
*
* @param pkt Response packet from the memory sub-system
*/
bool recvTimingResp(PacketPtr pkt);
void recvTimingSnoopReq(PacketPtr pkt);
/** The CPU pointer. */
O3CPU *cpu;
/** The IEW stage pointer. */
IEW *iewStage;
protected:
/** The LSQ policy for SMT mode. */
LSQPolicy lsqPolicy;
/** The LSQ units for individual threads. */
LSQUnit thread[Impl::MaxThreads];
/** List of Active Threads in System. */
std::list<ThreadID> *activeThreads;
/** Total Size of LQ Entries. */
unsigned LQEntries;
/** Total Size of SQ Entries. */
unsigned SQEntries;
/** Max LQ Size - Used to Enforce Sharing Policies. */
unsigned maxLQEntries;
/** Max SQ Size - Used to Enforce Sharing Policies. */
unsigned maxSQEntries;
/** Number of Threads. */
ThreadID numThreads;
/** The thread id of the LSQ Unit that is currently waiting for a
* retry. */
ThreadID retryTid;
};
template <class Impl>
Fault
LSQ<Impl>::read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
uint8_t *data, int load_idx)
{
ThreadID tid = req->threadId();
return thread[tid].read(req, sreqLow, sreqHigh, data, load_idx);
}
template <class Impl>
Fault
LSQ<Impl>::write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
uint8_t *data, int store_idx)
{
ThreadID tid = req->threadId();
return thread[tid].write(req, sreqLow, sreqHigh, data, store_idx);
}
#endif // __CPU_O3_LSQ_HH__

View File

@ -0,0 +1,608 @@
/*
* Copyright (c) 2011-2012 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2005-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Korey Sewell
*/
#include <algorithm>
#include <list>
#include <string>
#include "cpu/o3/lsq.hh"
#include "debug/Fetch.hh"
#include "debug/LSQ.hh"
#include "debug/Writeback.hh"
#include "params/DerivO3CPU.hh"
using namespace std;
template <class Impl>
LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
: cpu(cpu_ptr), iewStage(iew_ptr),
LQEntries(params->LQEntries),
SQEntries(params->SQEntries),
numThreads(params->numThreads),
retryTid(-1)
{
//**********************************************/
//************ Handle SMT Parameters ***********/
//**********************************************/
std::string policy = params->smtLSQPolicy;
//Convert string to lowercase
std::transform(policy.begin(), policy.end(), policy.begin(),
(int(*)(int)) tolower);
//Figure out fetch policy
if (policy == "dynamic") {
lsqPolicy = Dynamic;
maxLQEntries = LQEntries;
maxSQEntries = SQEntries;
DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
} else if (policy == "partitioned") {
lsqPolicy = Partitioned;
//@todo:make work if part_amt doesnt divide evenly.
maxLQEntries = LQEntries / numThreads;
maxSQEntries = SQEntries / numThreads;
DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
"%i entries per LQ | %i entries per SQ\n",
maxLQEntries,maxSQEntries);
} else if (policy == "threshold") {
lsqPolicy = Threshold;
assert(params->smtLSQThreshold > LQEntries);
assert(params->smtLSQThreshold > SQEntries);
//Divide up by threshold amount
//@todo: Should threads check the max and the total
//amount of the LSQ
maxLQEntries = params->smtLSQThreshold;
maxSQEntries = params->smtLSQThreshold;
DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
"%i entries per LQ | %i entries per SQ\n",
maxLQEntries,maxSQEntries);
} else {
assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
"Partitioned, Threshold}");
}
//Initialize LSQs
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread[tid].init(cpu, iew_ptr, params, this,
maxLQEntries, maxSQEntries, tid);
thread[tid].setDcachePort(&cpu_ptr->getDataPort());
}
}
template<class Impl>
std::string
LSQ<Impl>::name() const
{
return iewStage->name() + ".lsq";
}
template<class Impl>
void
LSQ<Impl>::regStats()
{
//Initialize LSQs
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread[tid].regStats();
}
}
template<class Impl>
void
LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
{
activeThreads = at_ptr;
assert(activeThreads != 0);
}
template <class Impl>
void
LSQ<Impl>::switchOut()
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread[tid].switchOut();
}
}
template <class Impl>
void
LSQ<Impl>::takeOverFrom()
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread[tid].takeOverFrom();
}
}
template <class Impl>
int
LSQ<Impl>::entryAmount(ThreadID num_threads)
{
if (lsqPolicy == Partitioned) {
return LQEntries / num_threads;
} else {
return 0;
}
}
template <class Impl>
void
LSQ<Impl>::resetEntries()
{
if (lsqPolicy != Dynamic || numThreads > 1) {
int active_threads = activeThreads->size();
int maxEntries;
if (lsqPolicy == Partitioned) {
maxEntries = LQEntries / active_threads;
} else if (lsqPolicy == Threshold && active_threads == 1) {
maxEntries = LQEntries;
} else {
maxEntries = LQEntries;
}
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
resizeEntries(maxEntries, tid);
}
}
}
template<class Impl>
void
LSQ<Impl>::removeEntries(ThreadID tid)
{
thread[tid].clearLQ();
thread[tid].clearSQ();
}
template<class Impl>
void
LSQ<Impl>::resizeEntries(unsigned size, ThreadID tid)
{
thread[tid].resizeLQ(size);
thread[tid].resizeSQ(size);
}
template<class Impl>
void
LSQ<Impl>::tick()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
thread[tid].tick();
}
}
template<class Impl>
void
LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
{
ThreadID tid = load_inst->threadNumber;
thread[tid].insertLoad(load_inst);
}
template<class Impl>
void
LSQ<Impl>::insertStore(DynInstPtr &store_inst)
{
ThreadID tid = store_inst->threadNumber;
thread[tid].insertStore(store_inst);
}
template<class Impl>
Fault
LSQ<Impl>::executeLoad(DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
return thread[tid].executeLoad(inst);
}
template<class Impl>
Fault
LSQ<Impl>::executeStore(DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
return thread[tid].executeStore(inst);
}
template<class Impl>
void
LSQ<Impl>::writebackStores()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (numStoresToWB(tid) > 0) {
DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
"available for Writeback.\n", tid, numStoresToWB(tid));
}
thread[tid].writebackStores();
}
}
template<class Impl>
bool
LSQ<Impl>::violation()
{
/* Answers: Does Anybody Have a Violation?*/
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (thread[tid].violation())
return true;
}
return false;
}
template <class Impl>
void
LSQ<Impl>::recvRetry()
{
if (retryTid == InvalidThreadID)
{
//Squashed, so drop it
return;
}
int curr_retry_tid = retryTid;
// Speculatively clear the retry Tid. This will get set again if
// the LSQUnit was unable to complete its access.
retryTid = -1;
thread[curr_retry_tid].recvRetry();
}
template <class Impl>
bool
LSQ<Impl>::recvTimingResp(PacketPtr pkt)
{
if (pkt->isError())
DPRINTF(LSQ, "Got error packet back for address: %#X\n",
pkt->getAddr());
thread[pkt->req->threadId()].completeDataAccess(pkt);
return true;
}
template <class Impl>
void
LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
{
DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
pkt->cmdString());
// must be a snoop
if (pkt->isInvalidate()) {
DPRINTF(LSQ, "received invalidation for addr:%#x\n",
pkt->getAddr());
for (ThreadID tid = 0; tid < numThreads; tid++) {
thread[tid].checkSnoop(pkt);
}
}
}
template<class Impl>
int
LSQ<Impl>::getCount()
{
unsigned total = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
total += getCount(tid);
}
return total;
}
template<class Impl>
int
LSQ<Impl>::numLoads()
{
unsigned total = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
total += numLoads(tid);
}
return total;
}
template<class Impl>
int
LSQ<Impl>::numStores()
{
unsigned total = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
total += thread[tid].numStores();
}
return total;
}
template<class Impl>
int
LSQ<Impl>::numLoadsReady()
{
unsigned total = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
total += thread[tid].numLoadsReady();
}
return total;
}
template<class Impl>
unsigned
LSQ<Impl>::numFreeEntries()
{
unsigned total = 0;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
total += thread[tid].numFreeEntries();
}
return total;
}
template<class Impl>
unsigned
LSQ<Impl>::numFreeEntries(ThreadID tid)
{
//if (lsqPolicy == Dynamic)
//return numFreeEntries();
//else
return thread[tid].numFreeEntries();
}
template<class Impl>
bool
LSQ<Impl>::isFull()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!(thread[tid].lqFull() || thread[tid].sqFull()))
return false;
}
return true;
}
template<class Impl>
bool
LSQ<Impl>::isFull(ThreadID tid)
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
if (lsqPolicy == Dynamic)
return isFull();
else
return thread[tid].lqFull() || thread[tid].sqFull();
}
template<class Impl>
bool
LSQ<Impl>::lqFull()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!thread[tid].lqFull())
return false;
}
return true;
}
template<class Impl>
bool
LSQ<Impl>::lqFull(ThreadID tid)
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
if (lsqPolicy == Dynamic)
return lqFull();
else
return thread[tid].lqFull();
}
template<class Impl>
bool
LSQ<Impl>::sqFull()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!sqFull(tid))
return false;
}
return true;
}
template<class Impl>
bool
LSQ<Impl>::sqFull(ThreadID tid)
{
//@todo: Change to Calculate All Entries for
//Dynamic Policy
if (lsqPolicy == Dynamic)
return sqFull();
else
return thread[tid].sqFull();
}
template<class Impl>
bool
LSQ<Impl>::isStalled()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (!thread[tid].isStalled())
return false;
}
return true;
}
template<class Impl>
bool
LSQ<Impl>::isStalled(ThreadID tid)
{
if (lsqPolicy == Dynamic)
return isStalled();
else
return thread[tid].isStalled();
}
template<class Impl>
bool
LSQ<Impl>::hasStoresToWB()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (hasStoresToWB(tid))
return true;
}
return false;
}
template<class Impl>
bool
LSQ<Impl>::willWB()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (willWB(tid))
return true;
}
return false;
}
template<class Impl>
void
LSQ<Impl>::dumpInsts()
{
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
thread[tid].dumpInsts();
}
}

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Korey Sewell
*/
#include "cpu/o3/isa_specific.hh"
#include "cpu/o3/lsq_unit_impl.hh"
// Force the instantiation of LDSTQ for all the implementations we care about.
template class LSQUnit<O3CPUImpl>;

View File

@ -0,0 +1,914 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Korey Sewell
*/
#ifndef __CPU_O3_LSQ_UNIT_HH__
#define __CPU_O3_LSQ_UNIT_HH__
#include <algorithm>
#include <cstring>
#include <map>
#include <queue>
#include "arch/generic/debugfaults.hh"
#include "arch/isa_traits.hh"
#include "arch/locked_mem.hh"
#include "arch/mmapped_ipr.hh"
#include "base/hashmap.hh"
#include "config/the_isa.hh"
#include "cpu/inst_seq.hh"
#include "cpu/timebuf.hh"
#include "debug/LSQUnit.hh"
#include "mem/packet.hh"
#include "mem/port.hh"
#include "sim/fault_fwd.hh"
struct DerivO3CPUParams;
/**
* Class that implements the actual LQ and SQ for each specific
* thread. Both are circular queues; load entries are freed upon
* committing, while store entries are freed once they writeback. The
* LSQUnit tracks if there are memory ordering violations, and also
* detects partial load to store forwarding cases (a store only has
* part of a load's data) that requires the load to wait until the
* store writes back. In the former case it holds onto the instruction
* until the dependence unit looks at it, and in the latter it stalls
* the LSQ until the store writes back. At that point the load is
* replayed.
*/
template <class Impl>
class LSQUnit {
public:
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::CPUPol::IEW IEW;
typedef typename Impl::CPUPol::LSQ LSQ;
typedef typename Impl::CPUPol::IssueStruct IssueStruct;
public:
/** Constructs an LSQ unit. init() must be called prior to use. */
LSQUnit();
/** Initializes the LSQ unit with the specified number of entries. */
void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries,
unsigned id);
/** Returns the name of the LSQ unit. */
std::string name() const;
/** Registers statistics. */
void regStats();
/** Sets the pointer to the dcache port. */
void setDcachePort(MasterPort *dcache_port);
/** Switches out LSQ unit. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Returns if the LSQ is switched out. */
bool isSwitchedOut() { return switchedOut; }
/** Ticks the LSQ unit, which in this case only resets the number of
* used cache ports.
* @todo: Move the number of used ports up to the LSQ level so it can
* be shared by all LSQ units.
*/
void tick() { usedPorts = 0; }
/** Inserts an instruction. */
void insert(DynInstPtr &inst);
/** Inserts a load instruction. */
void insertLoad(DynInstPtr &load_inst);
/** Inserts a store instruction. */
void insertStore(DynInstPtr &store_inst);
/** Check for ordering violations in the LSQ. For a store squash if we
* ever find a conflicting load. For a load, only squash if we
* an external snoop invalidate has been seen for that load address
* @param load_idx index to start checking at
* @param inst the instruction to check
*/
Fault checkViolations(int load_idx, DynInstPtr &inst);
/** Check if an incoming invalidate hits in the lsq on a load
* that might have issued out of order wrt another load beacuse
* of the intermediate invalidate.
*/
void checkSnoop(PacketPtr pkt);
/** Executes a load instruction. */
Fault executeLoad(DynInstPtr &inst);
Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
/** Executes a store instruction. */
Fault executeStore(DynInstPtr &inst);
/** Commits the head load. */
void commitLoad();
/** Commits loads older than a specific sequence number. */
void commitLoads(InstSeqNum &youngest_inst);
/** Commits stores older than a specific sequence number. */
void commitStores(InstSeqNum &youngest_inst);
/** Writes back stores. */
void writebackStores();
/** Completes the data access that has been returned from the
* memory system. */
void completeDataAccess(PacketPtr pkt);
/** Clears all the entries in the LQ. */
void clearLQ();
/** Clears all the entries in the SQ. */
void clearSQ();
/** Resizes the LQ to a given size. */
void resizeLQ(unsigned size);
/** Resizes the SQ to a given size. */
void resizeSQ(unsigned size);
/** Squashes all instructions younger than a specific sequence number. */
void squash(const InstSeqNum &squashed_num);
/** Returns if there is a memory ordering violation. Value is reset upon
* call to getMemDepViolator().
*/
bool violation() { return memDepViolator; }
/** Returns the memory ordering violator. */
DynInstPtr getMemDepViolator();
/** Returns if a load became blocked due to the memory system. */
bool loadBlocked()
{ return isLoadBlocked; }
/** Clears the signal that a load became blocked. */
void clearLoadBlocked()
{ isLoadBlocked = false; }
/** Returns if the blocked load was handled. */
bool isLoadBlockedHandled()
{ return loadBlockedHandled; }
/** Records the blocked load as being handled. */
void setLoadBlockedHandled()
{ loadBlockedHandled = true; }
/** Returns the number of free entries (min of free LQ and SQ entries). */
unsigned numFreeEntries();
/** Returns the number of loads ready to execute. */
int numLoadsReady();
/** Returns the number of loads in the LQ. */
int numLoads() { return loads; }
/** Returns the number of stores in the SQ. */
int numStores() { return stores; }
/** Returns if either the LQ or SQ is full. */
bool isFull() { return lqFull() || sqFull(); }
/** Returns if the LQ is full. */
bool lqFull() { return loads >= (LQEntries - 1); }
/** Returns if the SQ is full. */
bool sqFull() { return stores >= (SQEntries - 1); }
/** Returns the number of instructions in the LSQ. */
unsigned getCount() { return loads + stores; }
/** Returns if there are any stores to writeback. */
bool hasStoresToWB() { return storesToWB; }
/** Returns the number of stores to writeback. */
int numStoresToWB() { return storesToWB; }
/** Returns if the LSQ unit will writeback on this cycle. */
bool willWB() { return storeQueue[storeWBIdx].canWB &&
!storeQueue[storeWBIdx].completed &&
!isStoreBlocked; }
/** Handles doing the retry. */
void recvRetry();
private:
/** Writes back the instruction, sending it to IEW. */
void writeback(DynInstPtr &inst, PacketPtr pkt);
/** Writes back a store that couldn't be completed the previous cycle. */
void writebackPendingStore();
/** Handles completing the send of a store to memory. */
void storePostSend(PacketPtr pkt);
/** Completes the store at the specified index. */
void completeStore(int store_idx);
/** Attempts to send a store to the cache. */
bool sendStore(PacketPtr data_pkt);
/** Increments the given store index (circular queue). */
inline void incrStIdx(int &store_idx);
/** Decrements the given store index (circular queue). */
inline void decrStIdx(int &store_idx);
/** Increments the given load index (circular queue). */
inline void incrLdIdx(int &load_idx);
/** Decrements the given load index (circular queue). */
inline void decrLdIdx(int &load_idx);
public:
/** Debugging function to dump instructions in the LSQ. */
void dumpInsts();
private:
/** Pointer to the CPU. */
O3CPU *cpu;
/** Pointer to the IEW stage. */
IEW *iewStage;
/** Pointer to the LSQ. */
LSQ *lsq;
/** Pointer to the dcache port. Used only for sending. */
MasterPort *dcachePort;
/** Derived class to hold any sender state the LSQ needs. */
class LSQSenderState : public Packet::SenderState
{
public:
/** Default constructor. */
LSQSenderState()
: mainPkt(NULL), pendingPacket(NULL), outstanding(1),
noWB(false), isSplit(false), pktToSend(false)
{ }
/** Instruction who initiated the access to memory. */
DynInstPtr inst;
/** The main packet from a split load, used during writeback. */
PacketPtr mainPkt;
/** A second packet from a split store that needs sending. */
PacketPtr pendingPacket;
/** The LQ/SQ index of the instruction. */
uint8_t idx;
/** Number of outstanding packets to complete. */
uint8_t outstanding;
/** Whether or not it is a load. */
bool isLoad;
/** Whether or not the instruction will need to writeback. */
bool noWB;
/** Whether or not this access is split in two. */
bool isSplit;
/** Whether or not there is a packet that needs sending. */
bool pktToSend;
/** Completes a packet and returns whether the access is finished. */
inline bool complete() { return --outstanding == 0; }
};
/** Writeback event, specifically for when stores forward data to loads. */
class WritebackEvent : public Event {
public:
/** Constructs a writeback event. */
WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
/** Processes the writeback event. */
void process();
/** Returns the description of this event. */
const char *description() const;
private:
/** Instruction whose results are being written back. */
DynInstPtr inst;
/** The packet that would have been sent to memory. */
PacketPtr pkt;
/** The pointer to the LSQ unit that issued the store. */
LSQUnit<Impl> *lsqPtr;
};
public:
struct SQEntry {
/** Constructs an empty store queue entry. */
SQEntry()
: inst(NULL), req(NULL), size(0),
canWB(0), committed(0), completed(0)
{
std::memset(data, 0, sizeof(data));
}
/** Constructs a store queue entry for a given instruction. */
SQEntry(DynInstPtr &_inst)
: inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0),
isSplit(0), canWB(0), committed(0), completed(0)
{
std::memset(data, 0, sizeof(data));
}
/** The store data. */
char data[16];
/** The store instruction. */
DynInstPtr inst;
/** The request for the store. */
RequestPtr req;
/** The split requests for the store. */
RequestPtr sreqLow;
RequestPtr sreqHigh;
/** The size of the store. */
uint8_t size;
/** Whether or not the store is split into two requests. */
bool isSplit;
/** Whether or not the store can writeback. */
bool canWB;
/** Whether or not the store is committed. */
bool committed;
/** Whether or not the store is completed. */
bool completed;
};
private:
/** The LSQUnit thread id. */
ThreadID lsqID;
/** The store queue. */
std::vector<SQEntry> storeQueue;
/** The load queue. */
std::vector<DynInstPtr> loadQueue;
/** The number of LQ entries, plus a sentinel entry (circular queue).
* @todo: Consider having var that records the true number of LQ entries.
*/
unsigned LQEntries;
/** The number of SQ entries, plus a sentinel entry (circular queue).
* @todo: Consider having var that records the true number of SQ entries.
*/
unsigned SQEntries;
/** The number of places to shift addresses in the LSQ before checking
* for dependency violations
*/
unsigned depCheckShift;
/** Should loads be checked for dependency issues */
bool checkLoads;
/** The number of load instructions in the LQ. */
int loads;
/** The number of store instructions in the SQ. */
int stores;
/** The number of store instructions in the SQ waiting to writeback. */
int storesToWB;
/** The index of the head instruction in the LQ. */
int loadHead;
/** The index of the tail instruction in the LQ. */
int loadTail;
/** The index of the head instruction in the SQ. */
int storeHead;
/** The index of the first instruction that may be ready to be
* written back, and has not yet been written back.
*/
int storeWBIdx;
/** The index of the tail instruction in the SQ. */
int storeTail;
/// @todo Consider moving to a more advanced model with write vs read ports
/** The number of cache ports available each cycle. */
int cachePorts;
/** The number of used cache ports in this cycle. */
int usedPorts;
/** Is the LSQ switched out. */
bool switchedOut;
//list<InstSeqNum> mshrSeqNums;
/** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
Addr cacheBlockMask;
/** Wire to read information from the issue stage time queue. */
typename TimeBuffer<IssueStruct>::wire fromIssue;
/** Whether or not the LSQ is stalled. */
bool stalled;
/** The store that causes the stall due to partial store to load
* forwarding.
*/
InstSeqNum stallingStoreIsn;
/** The index of the above store. */
int stallingLoadIdx;
/** The packet that needs to be retried. */
PacketPtr retryPkt;
/** Whehter or not a store is blocked due to the memory system. */
bool isStoreBlocked;
/** Whether or not a load is blocked due to the memory system. */
bool isLoadBlocked;
/** Has the blocked load been handled. */
bool loadBlockedHandled;
/** Whether or not a store is in flight. */
bool storeInFlight;
/** The sequence number of the blocked load. */
InstSeqNum blockedLoadSeqNum;
/** The oldest load that caused a memory ordering violation. */
DynInstPtr memDepViolator;
/** Whether or not there is a packet that couldn't be sent because of
* a lack of cache ports. */
bool hasPendingPkt;
/** The packet that is pending free cache ports. */
PacketPtr pendingPkt;
/** Flag for memory model. */
bool needsTSO;
// Will also need how many read/write ports the Dcache has. Or keep track
// of that in stage that is one level up, and only call executeLoad/Store
// the appropriate number of times.
/** Total number of loads forwaded from LSQ stores. */
Stats::Scalar lsqForwLoads;
/** Total number of loads ignored due to invalid addresses. */
Stats::Scalar invAddrLoads;
/** Total number of squashed loads. */
Stats::Scalar lsqSquashedLoads;
/** Total number of responses from the memory system that are
* ignored due to the instruction already being squashed. */
Stats::Scalar lsqIgnoredResponses;
/** Tota number of memory ordering violations. */
Stats::Scalar lsqMemOrderViolation;
/** Total number of squashed stores. */
Stats::Scalar lsqSquashedStores;
/** Total number of software prefetches ignored due to invalid addresses. */
Stats::Scalar invAddrSwpfs;
/** Ready loads blocked due to partial store-forwarding. */
Stats::Scalar lsqBlockedLoads;
/** Number of loads that were rescheduled. */
Stats::Scalar lsqRescheduledLoads;
/** Number of times the LSQ is blocked due to the cache. */
Stats::Scalar lsqCacheBlocked;
public:
/** Executes the load at the given index. */
Fault read(Request *req, Request *sreqLow, Request *sreqHigh,
uint8_t *data, int load_idx);
/** Executes the store at the given index. */
Fault write(Request *req, Request *sreqLow, Request *sreqHigh,
uint8_t *data, int store_idx);
/** Returns the index of the head load instruction. */
int getLoadHead() { return loadHead; }
/** Returns the sequence number of the head load instruction. */
InstSeqNum getLoadHeadSeqNum()
{
if (loadQueue[loadHead]) {
return loadQueue[loadHead]->seqNum;
} else {
return 0;
}
}
/** Returns the index of the head store instruction. */
int getStoreHead() { return storeHead; }
/** Returns the sequence number of the head store instruction. */
InstSeqNum getStoreHeadSeqNum()
{
if (storeQueue[storeHead].inst) {
return storeQueue[storeHead].inst->seqNum;
} else {
return 0;
}
}
/** Returns whether or not the LSQ unit is stalled. */
bool isStalled() { return stalled; }
};
template <class Impl>
Fault
LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
uint8_t *data, int load_idx)
{
DynInstPtr load_inst = loadQueue[load_idx];
assert(load_inst);
assert(!load_inst->isExecuted());
// Make sure this isn't an uncacheable access
// A bit of a hackish way to get uncached accesses to work only if they're
// at the head of the LSQ and are ready to commit (at the head of the ROB
// too).
if (req->isUncacheable() &&
(load_idx != loadHead || !load_inst->isAtCommit())) {
iewStage->rescheduleMemInst(load_inst);
++lsqRescheduledLoads;
DPRINTF(LSQUnit, "Uncachable load [sn:%lli] PC %s\n",
load_inst->seqNum, load_inst->pcState());
// Must delete request now that it wasn't handed off to
// memory. This is quite ugly. @todo: Figure out the proper
// place to really handle request deletes.
delete req;
if (TheISA::HasUnalignedMemAcc && sreqLow) {
delete sreqLow;
delete sreqHigh;
}
return new GenericISA::M5PanicFault(
"Uncachable load [sn:%llx] PC %s\n",
load_inst->seqNum, load_inst->pcState());
}
// Check the SQ for any previous stores that might lead to forwarding
int store_idx = load_inst->sqIdx;
int store_size = 0;
DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
"storeHead: %i addr: %#x%s\n",
load_idx, store_idx, storeHead, req->getPaddr(),
sreqLow ? " split" : "");
if (req->isLLSC()) {
assert(!sreqLow);
// Disable recording the result temporarily. Writing to misc
// regs normally updates the result, but this is not the
// desired behavior when handling store conditionals.
load_inst->recordResult(false);
TheISA::handleLockedRead(load_inst.get(), req);
load_inst->recordResult(true);
}
if (req->isMmappedIpr()) {
assert(!load_inst->memData);
load_inst->memData = new uint8_t[64];
ThreadContext *thread = cpu->tcBase(lsqID);
Tick delay;
PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
data_pkt->dataStatic(load_inst->memData);
delay = TheISA::handleIprRead(thread, data_pkt);
} else {
assert(sreqLow->isMmappedIpr() && sreqHigh->isMmappedIpr());
PacketPtr fst_data_pkt = new Packet(sreqLow, MemCmd::ReadReq);
PacketPtr snd_data_pkt = new Packet(sreqHigh, MemCmd::ReadReq);
fst_data_pkt->dataStatic(load_inst->memData);
snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
delay = TheISA::handleIprRead(thread, fst_data_pkt);
unsigned delay2 = TheISA::handleIprRead(thread, snd_data_pkt);
if (delay2 > delay)
delay = delay2;
delete sreqLow;
delete sreqHigh;
delete fst_data_pkt;
delete snd_data_pkt;
}
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
cpu->schedule(wb, curTick() + delay);
return NoFault;
}
while (store_idx != -1) {
// End once we've reached the top of the LSQ
if (store_idx == storeWBIdx) {
break;
}
// Move the index to one younger
if (--store_idx < 0)
store_idx += SQEntries;
assert(storeQueue[store_idx].inst);
store_size = storeQueue[store_idx].size;
if (store_size == 0)
continue;
else if (storeQueue[store_idx].inst->uncacheable())
continue;
assert(storeQueue[store_idx].inst->effAddrValid());
// Check if the store data is within the lower and upper bounds of
// addresses that the request needs.
bool store_has_lower_limit =
req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
bool store_has_upper_limit =
(req->getVaddr() + req->getSize()) <=
(storeQueue[store_idx].inst->effAddr + store_size);
bool lower_load_has_store_part =
req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
store_size);
bool upper_load_has_store_part =
(req->getVaddr() + req->getSize()) >
storeQueue[store_idx].inst->effAddr;
// If the store's data has all of the data needed, we can forward.
if ((store_has_lower_limit && store_has_upper_limit)) {
// Get shift amount for offset into the store's data.
int shift_amt = req->getVaddr() - storeQueue[store_idx].inst->effAddr;
memcpy(data, storeQueue[store_idx].data + shift_amt,
req->getSize());
assert(!load_inst->memData);
load_inst->memData = new uint8_t[64];
memcpy(load_inst->memData,
storeQueue[store_idx].data + shift_amt, req->getSize());
DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
"addr %#x, data %#x\n",
store_idx, req->getVaddr(), data);
PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
data_pkt->dataStatic(load_inst->memData);
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
// We'll say this has a 1 cycle load-store forwarding latency
// for now.
// @todo: Need to make this a parameter.
cpu->schedule(wb, curTick());
// Don't need to do anything special for split loads.
if (TheISA::HasUnalignedMemAcc && sreqLow) {
delete sreqLow;
delete sreqHigh;
}
++lsqForwLoads;
return NoFault;
} else if ((store_has_lower_limit && lower_load_has_store_part) ||
(store_has_upper_limit && upper_load_has_store_part) ||
(lower_load_has_store_part && upper_load_has_store_part)) {
// This is the partial store-load forwarding case where a store
// has only part of the load's data.
// If it's already been written back, then don't worry about
// stalling on it.
if (storeQueue[store_idx].completed) {
panic("Should not check one of these");
continue;
}
// Must stall load and force it to retry, so long as it's the oldest
// load that needs to do so.
if (!stalled ||
(stalled &&
load_inst->seqNum <
loadQueue[stallingLoadIdx]->seqNum)) {
stalled = true;
stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
stallingLoadIdx = load_idx;
}
// Tell IQ/mem dep unit that this instruction will need to be
// rescheduled eventually
iewStage->rescheduleMemInst(load_inst);
iewStage->decrWb(load_inst->seqNum);
load_inst->clearIssued();
++lsqRescheduledLoads;
// Do not generate a writeback event as this instruction is not
// complete.
DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
"Store idx %i to load addr %#x\n",
store_idx, req->getVaddr());
// Must delete request now that it wasn't handed off to
// memory. This is quite ugly. @todo: Figure out the
// proper place to really handle request deletes.
delete req;
if (TheISA::HasUnalignedMemAcc && sreqLow) {
delete sreqLow;
delete sreqHigh;
}
return NoFault;
}
}
// If there's no forwarding case, then go access memory
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
load_inst->seqNum, load_inst->pcState());
assert(!load_inst->memData);
load_inst->memData = new uint8_t[64];
++usedPorts;
// if we the cache is not blocked, do cache access
bool completedFirst = false;
if (!lsq->cacheBlocked()) {
MemCmd command =
req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
PacketPtr data_pkt = new Packet(req, command);
PacketPtr fst_data_pkt = NULL;
PacketPtr snd_data_pkt = NULL;
data_pkt->dataStatic(load_inst->memData);
LSQSenderState *state = new LSQSenderState;
state->isLoad = true;
state->idx = load_idx;
state->inst = load_inst;
data_pkt->senderState = state;
if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
// Point the first packet at the main data packet.
fst_data_pkt = data_pkt;
} else {
// Create the split packets.
fst_data_pkt = new Packet(sreqLow, command);
snd_data_pkt = new Packet(sreqHigh, command);
fst_data_pkt->dataStatic(load_inst->memData);
snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
fst_data_pkt->senderState = state;
snd_data_pkt->senderState = state;
state->isSplit = true;
state->outstanding = 2;
state->mainPkt = data_pkt;
}
if (!dcachePort->sendTimingReq(fst_data_pkt)) {
// Delete state and data packet because a load retry
// initiates a pipeline restart; it does not retry.
delete state;
delete data_pkt->req;
delete data_pkt;
if (TheISA::HasUnalignedMemAcc && sreqLow) {
delete fst_data_pkt->req;
delete fst_data_pkt;
delete snd_data_pkt->req;
delete snd_data_pkt;
sreqLow = NULL;
sreqHigh = NULL;
}
req = NULL;
// If the access didn't succeed, tell the LSQ by setting
// the retry thread id.
lsq->setRetryTid(lsqID);
} else if (TheISA::HasUnalignedMemAcc && sreqLow) {
completedFirst = true;
// The first packet was sent without problems, so send this one
// too. If there is a problem with this packet then the whole
// load will be squashed, so indicate this to the state object.
// The first packet will return in completeDataAccess and be
// handled there.
++usedPorts;
if (!dcachePort->sendTimingReq(snd_data_pkt)) {
// The main packet will be deleted in completeDataAccess.
delete snd_data_pkt->req;
delete snd_data_pkt;
state->complete();
req = NULL;
sreqHigh = NULL;
lsq->setRetryTid(lsqID);
}
}
}
// If the cache was blocked, or has become blocked due to the access,
// handle it.
if (lsq->cacheBlocked()) {
if (req)
delete req;
if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
delete sreqLow;
delete sreqHigh;
}
++lsqCacheBlocked;
// If the first part of a split access succeeds, then let the LSQ
// handle the decrWb when completeDataAccess is called upon return
// of the requested first part of data
if (!completedFirst)
iewStage->decrWb(load_inst->seqNum);
// There's an older load that's already going to squash.
if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
return NoFault;
// Record that the load was blocked due to memory. This
// load will squash all instructions after it, be
// refetched, and re-executed.
isLoadBlocked = true;
loadBlockedHandled = false;
blockedLoadSeqNum = load_inst->seqNum;
// No fault occurred, even though the interface is blocked.
return NoFault;
}
return NoFault;
}
template <class Impl>
Fault
LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh,
uint8_t *data, int store_idx)
{
assert(storeQueue[store_idx].inst);
DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
" | storeHead:%i [sn:%i]\n",
store_idx, req->getPaddr(), data, storeHead,
storeQueue[store_idx].inst->seqNum);
storeQueue[store_idx].req = req;
storeQueue[store_idx].sreqLow = sreqLow;
storeQueue[store_idx].sreqHigh = sreqHigh;
unsigned size = req->getSize();
storeQueue[store_idx].size = size;
assert(size <= sizeof(storeQueue[store_idx].data));
// Split stores can only occur in ISAs with unaligned memory accesses. If
// a store request has been split, sreqLow and sreqHigh will be non-null.
if (TheISA::HasUnalignedMemAcc && sreqLow) {
storeQueue[store_idx].isSplit = true;
}
memcpy(storeQueue[store_idx].data, data, size);
// This function only writes the data to the store queue, so no fault
// can happen here.
return NoFault;
}
#endif // __CPU_O3_LSQ_UNIT_HH__

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "cpu/o3/isa_specific.hh"
#include "cpu/o3/mem_dep_unit_impl.hh"
#include "cpu/o3/store_set.hh"
#ifdef DEBUG
template <>
int
MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_count = 0;
template <>
int
MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_insert = 0;
template <>
int
MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_erase = 0;
#endif
// Force instantation of memory dependency unit using store sets and
// O3CPUImpl.
template class MemDepUnit<StoreSet, O3CPUImpl>;

View File

@ -0,0 +1,270 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_MEM_DEP_UNIT_HH__
#define __CPU_O3_MEM_DEP_UNIT_HH__
#include <list>
#include <set>
#include "base/hashmap.hh"
#include "base/refcnt.hh"
#include "base/statistics.hh"
#include "cpu/inst_seq.hh"
#include "debug/MemDepUnit.hh"
struct SNHash {
size_t operator() (const InstSeqNum &seq_num) const {
unsigned a = (unsigned)seq_num;
unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
return hash;
}
};
struct DerivO3CPUParams;
template <class Impl>
class InstructionQueue;
/**
* Memory dependency unit class. This holds the memory dependence predictor.
* As memory operations are issued to the IQ, they are also issued to this
* unit, which then looks up the prediction as to what they are dependent
* upon. This unit must be checked prior to a memory operation being able
* to issue. Although this is templated, it's somewhat hard to make a generic
* memory dependence unit. This one is mostly for store sets; it will be
* quite limited in what other memory dependence predictions it can also
* utilize. Thus this class should be most likely be rewritten for other
* dependence prediction schemes.
*/
template <class MemDepPred, class Impl>
class MemDepUnit
{
protected:
std::string _name;
public:
typedef typename Impl::DynInstPtr DynInstPtr;
/** Empty constructor. Must call init() prior to using in this case. */
MemDepUnit();
/** Constructs a MemDepUnit with given parameters. */
MemDepUnit(DerivO3CPUParams *params);
/** Frees up any memory allocated. */
~MemDepUnit();
/** Returns the name of the memory dependence unit. */
std::string name() const { return _name; }
/** Initializes the unit with parameters and a thread id. */
void init(DerivO3CPUParams *params, ThreadID tid);
/** Registers statistics. */
void regStats();
/** Switches out the memory dependence predictor. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Sets the pointer to the IQ. */
void setIQ(InstructionQueue<Impl> *iq_ptr);
/** Inserts a memory instruction. */
void insert(DynInstPtr &inst);
/** Inserts a non-speculative memory instruction. */
void insertNonSpec(DynInstPtr &inst);
/** Inserts a barrier instruction. */
void insertBarrier(DynInstPtr &barr_inst);
/** Indicate that an instruction has its registers ready. */
void regsReady(DynInstPtr &inst);
/** Indicate that a non-speculative instruction is ready. */
void nonSpecInstReady(DynInstPtr &inst);
/** Reschedules an instruction to be re-executed. */
void reschedule(DynInstPtr &inst);
/** Replays all instructions that have been rescheduled by moving them to
* the ready list.
*/
void replay(DynInstPtr &inst);
/** Completes a memory instruction. */
void completed(DynInstPtr &inst);
/** Completes a barrier instruction. */
void completeBarrier(DynInstPtr &inst);
/** Wakes any dependents of a memory instruction. */
void wakeDependents(DynInstPtr &inst);
/** Squashes all instructions up until a given sequence number for a
* specific thread.
*/
void squash(const InstSeqNum &squashed_num, ThreadID tid);
/** Indicates an ordering violation between a store and a younger load. */
void violation(DynInstPtr &store_inst, DynInstPtr &violating_load);
/** Issues the given instruction */
void issue(DynInstPtr &inst);
/** Debugging function to dump the lists of instructions. */
void dumpLists();
private:
typedef typename std::list<DynInstPtr>::iterator ListIt;
class MemDepEntry;
typedef RefCountingPtr<MemDepEntry> MemDepEntryPtr;
/** Memory dependence entries that track memory operations, marking
* when the instruction is ready to execute and what instructions depend
* upon it.
*/
class MemDepEntry : public RefCounted {
public:
/** Constructs a memory dependence entry. */
MemDepEntry(DynInstPtr &new_inst)
: inst(new_inst), regsReady(false), memDepReady(false),
completed(false), squashed(false)
{
#ifdef DEBUG
++memdep_count;
DPRINTF(MemDepUnit, "Memory dependency entry created. "
"memdep_count=%i %s\n", memdep_count, inst->pcState());
#endif
}
/** Frees any pointers. */
~MemDepEntry()
{
for (int i = 0; i < dependInsts.size(); ++i) {
dependInsts[i] = NULL;
}
#ifdef DEBUG
--memdep_count;
DPRINTF(MemDepUnit, "Memory dependency entry deleted. "
"memdep_count=%i %s\n", memdep_count, inst->pcState());
#endif
}
/** Returns the name of the memory dependence entry. */
std::string name() const { return "memdepentry"; }
/** The instruction being tracked. */
DynInstPtr inst;
/** The iterator to the instruction's location inside the list. */
ListIt listIt;
/** A vector of any dependent instructions. */
std::vector<MemDepEntryPtr> dependInsts;
/** If the registers are ready or not. */
bool regsReady;
/** If all memory dependencies have been satisfied. */
bool memDepReady;
/** If the instruction is completed. */
bool completed;
/** If the instruction is squashed. */
bool squashed;
/** For debugging. */
#ifdef DEBUG
static int memdep_count;
static int memdep_insert;
static int memdep_erase;
#endif
};
/** Finds the memory dependence entry in the hash map. */
inline MemDepEntryPtr &findInHash(const DynInstPtr &inst);
/** Moves an entry to the ready list. */
inline void moveToReady(MemDepEntryPtr &ready_inst_entry);
typedef m5::hash_map<InstSeqNum, MemDepEntryPtr, SNHash> MemDepHash;
typedef typename MemDepHash::iterator MemDepHashIt;
/** A hash map of all memory dependence entries. */
MemDepHash memDepHash;
/** A list of all instructions in the memory dependence unit. */
std::list<DynInstPtr> instList[Impl::MaxThreads];
/** A list of all instructions that are going to be replayed. */
std::list<DynInstPtr> instsToReplay;
/** The memory dependence predictor. It is accessed upon new
* instructions being added to the IQ, and responds by telling
* this unit what instruction the newly added instruction is dependent
* upon.
*/
MemDepPred depPred;
/** Is there an outstanding load barrier that loads must wait on. */
bool loadBarrier;
/** The sequence number of the load barrier. */
InstSeqNum loadBarrierSN;
/** Is there an outstanding store barrier that loads must wait on. */
bool storeBarrier;
/** The sequence number of the store barrier. */
InstSeqNum storeBarrierSN;
/** Pointer to the IQ. */
InstructionQueue<Impl> *iqPtr;
/** The thread id of this memory dependence unit. */
int id;
/** Stat for number of inserted loads. */
Stats::Scalar insertedLoads;
/** Stat for number of inserted stores. */
Stats::Scalar insertedStores;
/** Stat for number of conflicting loads that had to wait for a store. */
Stats::Scalar conflictingLoads;
/** Stat for number of conflicting stores that had to wait for a store. */
Stats::Scalar conflictingStores;
};
#endif // __CPU_O3_MEM_DEP_UNIT_HH__

View File

@ -0,0 +1,587 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include <map>
#include "cpu/o3/inst_queue.hh"
#include "cpu/o3/mem_dep_unit.hh"
#include "debug/MemDepUnit.hh"
#include "params/DerivO3CPU.hh"
template <class MemDepPred, class Impl>
MemDepUnit<MemDepPred, Impl>::MemDepUnit()
: loadBarrier(false), loadBarrierSN(0), storeBarrier(false),
storeBarrierSN(0), iqPtr(NULL)
{
}
template <class MemDepPred, class Impl>
MemDepUnit<MemDepPred, Impl>::MemDepUnit(DerivO3CPUParams *params)
: _name(params->name + ".memdepunit"),
depPred(params->store_set_clear_period, params->SSITSize,
params->LFSTSize),
loadBarrier(false), loadBarrierSN(0), storeBarrier(false),
storeBarrierSN(0), iqPtr(NULL)
{
DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n");
}
template <class MemDepPred, class Impl>
MemDepUnit<MemDepPred, Impl>::~MemDepUnit()
{
for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) {
ListIt inst_list_it = instList[tid].begin();
MemDepHashIt hash_it;
while (!instList[tid].empty()) {
hash_it = memDepHash.find((*inst_list_it)->seqNum);
assert(hash_it != memDepHash.end());
memDepHash.erase(hash_it);
instList[tid].erase(inst_list_it++);
}
}
#ifdef DEBUG
assert(MemDepEntry::memdep_count == 0);
#endif
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::init(DerivO3CPUParams *params, ThreadID tid)
{
DPRINTF(MemDepUnit, "Creating MemDepUnit %i object.\n",tid);
_name = csprintf("%s.memDep%d", params->name, tid);
id = tid;
depPred.init(params->store_set_clear_period, params->SSITSize,
params->LFSTSize);
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::regStats()
{
insertedLoads
.name(name() + ".insertedLoads")
.desc("Number of loads inserted to the mem dependence unit.");
insertedStores
.name(name() + ".insertedStores")
.desc("Number of stores inserted to the mem dependence unit.");
conflictingLoads
.name(name() + ".conflictingLoads")
.desc("Number of conflicting loads.");
conflictingStores
.name(name() + ".conflictingStores")
.desc("Number of conflicting stores.");
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::switchOut()
{
assert(instList[0].empty());
assert(instsToReplay.empty());
assert(memDepHash.empty());
// Clear any state.
for (int i = 0; i < Impl::MaxThreads; ++i) {
instList[i].clear();
}
instsToReplay.clear();
memDepHash.clear();
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::takeOverFrom()
{
// Be sure to reset all state.
loadBarrier = storeBarrier = false;
loadBarrierSN = storeBarrierSN = 0;
depPred.clear();
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)
{
iqPtr = iq_ptr;
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
MemDepEntryPtr inst_entry = new MemDepEntry(inst);
// Add the MemDepEntry to the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
#ifdef DEBUG
MemDepEntry::memdep_insert++;
#endif
instList[tid].push_back(inst);
inst_entry->listIt = --(instList[tid].end());
// Check any barriers and the dependence predictor for any
// producing memrefs/stores.
InstSeqNum producing_store;
if (inst->isLoad() && loadBarrier) {
DPRINTF(MemDepUnit, "Load barrier [sn:%lli] in flight\n",
loadBarrierSN);
producing_store = loadBarrierSN;
} else if (inst->isStore() && storeBarrier) {
DPRINTF(MemDepUnit, "Store barrier [sn:%lli] in flight\n",
storeBarrierSN);
producing_store = storeBarrierSN;
} else {
producing_store = depPred.checkInst(inst->instAddr());
}
MemDepEntryPtr store_entry = NULL;
// If there is a producing store, try to find the entry.
if (producing_store != 0) {
DPRINTF(MemDepUnit, "Searching for producer\n");
MemDepHashIt hash_it = memDepHash.find(producing_store);
if (hash_it != memDepHash.end()) {
store_entry = (*hash_it).second;
DPRINTF(MemDepUnit, "Proucer found\n");
}
}
// If no store entry, then instruction can issue as soon as the registers
// are ready.
if (!store_entry) {
DPRINTF(MemDepUnit, "No dependency for inst PC "
"%s [sn:%lli].\n", inst->pcState(), inst->seqNum);
inst_entry->memDepReady = true;
if (inst->readyToIssue()) {
inst_entry->regsReady = true;
moveToReady(inst_entry);
}
} else {
// Otherwise make the instruction dependent on the store/barrier.
DPRINTF(MemDepUnit, "Adding to dependency list; "
"inst PC %s is dependent on [sn:%lli].\n",
inst->pcState(), producing_store);
if (inst->readyToIssue()) {
inst_entry->regsReady = true;
}
// Clear the bit saying this instruction can issue.
inst->clearCanIssue();
// Add this instruction to the list of dependents.
store_entry->dependInsts.push_back(inst_entry);
if (inst->isLoad()) {
++conflictingLoads;
} else {
++conflictingStores;
}
}
if (inst->isStore()) {
DPRINTF(MemDepUnit, "Inserting store PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber);
++insertedStores;
} else if (inst->isLoad()) {
++insertedLoads;
} else {
panic("Unknown type! (most likely a barrier).");
}
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst)
{
ThreadID tid = inst->threadNumber;
MemDepEntryPtr inst_entry = new MemDepEntry(inst);
// Insert the MemDepEntry into the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
#ifdef DEBUG
MemDepEntry::memdep_insert++;
#endif
// Add the instruction to the list.
instList[tid].push_back(inst);
inst_entry->listIt = --(instList[tid].end());
// Might want to turn this part into an inline function or something.
// It's shared between both insert functions.
if (inst->isStore()) {
DPRINTF(MemDepUnit, "Inserting store PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber);
++insertedStores;
} else if (inst->isLoad()) {
++insertedLoads;
} else {
panic("Unknown type! (most likely a barrier).");
}
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
{
InstSeqNum barr_sn = barr_inst->seqNum;
// Memory barriers block loads and stores, write barriers only stores.
if (barr_inst->isMemBarrier()) {
loadBarrier = true;
loadBarrierSN = barr_sn;
storeBarrier = true;
storeBarrierSN = barr_sn;
DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n",
barr_inst->pcState(),barr_sn);
} else if (barr_inst->isWriteBarrier()) {
storeBarrier = true;
storeBarrierSN = barr_sn;
DPRINTF(MemDepUnit, "Inserted a write barrier\n");
}
ThreadID tid = barr_inst->threadNumber;
MemDepEntryPtr inst_entry = new MemDepEntry(barr_inst);
// Add the MemDepEntry to the hash.
memDepHash.insert(
std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
#ifdef DEBUG
MemDepEntry::memdep_insert++;
#endif
// Add the instruction to the instruction list.
instList[tid].push_back(barr_inst);
inst_entry->listIt = --(instList[tid].end());
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::regsReady(DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Marking registers as ready for "
"instruction PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
MemDepEntryPtr inst_entry = findInHash(inst);
inst_entry->regsReady = true;
if (inst_entry->memDepReady) {
DPRINTF(MemDepUnit, "Instruction has its memory "
"dependencies resolved, adding it to the ready list.\n");
moveToReady(inst_entry);
} else {
DPRINTF(MemDepUnit, "Instruction still waiting on "
"memory dependency.\n");
}
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Marking non speculative "
"instruction PC %s as ready [sn:%lli].\n",
inst->pcState(), inst->seqNum);
MemDepEntryPtr inst_entry = findInHash(inst);
moveToReady(inst_entry);
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::reschedule(DynInstPtr &inst)
{
instsToReplay.push_back(inst);
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
{
DynInstPtr temp_inst;
// For now this replay function replays all waiting memory ops.
while (!instsToReplay.empty()) {
temp_inst = instsToReplay.front();
MemDepEntryPtr inst_entry = findInHash(temp_inst);
DPRINTF(MemDepUnit, "Replaying mem instruction PC %s [sn:%lli].\n",
temp_inst->pcState(), temp_inst->seqNum);
moveToReady(inst_entry);
instsToReplay.pop_front();
}
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Completed mem instruction PC %s [sn:%lli].\n",
inst->pcState(), inst->seqNum);
ThreadID tid = inst->threadNumber;
// Remove the instruction from the hash and the list.
MemDepHashIt hash_it = memDepHash.find(inst->seqNum);
assert(hash_it != memDepHash.end());
instList[tid].erase((*hash_it).second->listIt);
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
#ifdef DEBUG
MemDepEntry::memdep_erase++;
#endif
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::completeBarrier(DynInstPtr &inst)
{
wakeDependents(inst);
completed(inst);
InstSeqNum barr_sn = inst->seqNum;
DPRINTF(MemDepUnit, "barrier completed: %s SN:%lli\n", inst->pcState(),
inst->seqNum);
if (inst->isMemBarrier()) {
if (loadBarrierSN == barr_sn)
loadBarrier = false;
if (storeBarrierSN == barr_sn)
storeBarrier = false;
} else if (inst->isWriteBarrier()) {
if (storeBarrierSN == barr_sn)
storeBarrier = false;
}
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
{
// Only stores and barriers have dependents.
if (!inst->isStore() && !inst->isMemBarrier() && !inst->isWriteBarrier()) {
return;
}
MemDepEntryPtr inst_entry = findInHash(inst);
for (int i = 0; i < inst_entry->dependInsts.size(); ++i ) {
MemDepEntryPtr woken_inst = inst_entry->dependInsts[i];
if (!woken_inst->inst) {
// Potentially removed mem dep entries could be on this list
continue;
}
DPRINTF(MemDepUnit, "Waking up a dependent inst, "
"[sn:%lli].\n",
woken_inst->inst->seqNum);
if (woken_inst->regsReady && !woken_inst->squashed) {
moveToReady(woken_inst);
} else {
woken_inst->memDepReady = true;
}
}
inst_entry->dependInsts.clear();
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
ThreadID tid)
{
if (!instsToReplay.empty()) {
ListIt replay_it = instsToReplay.begin();
while (replay_it != instsToReplay.end()) {
if ((*replay_it)->threadNumber == tid &&
(*replay_it)->seqNum > squashed_num) {
instsToReplay.erase(replay_it++);
} else {
++replay_it;
}
}
}
ListIt squash_it = instList[tid].end();
--squash_it;
MemDepHashIt hash_it;
while (!instList[tid].empty() &&
(*squash_it)->seqNum > squashed_num) {
DPRINTF(MemDepUnit, "Squashing inst [sn:%lli]\n",
(*squash_it)->seqNum);
if ((*squash_it)->seqNum == loadBarrierSN)
loadBarrier = false;
if ((*squash_it)->seqNum == storeBarrierSN)
storeBarrier = false;
hash_it = memDepHash.find((*squash_it)->seqNum);
assert(hash_it != memDepHash.end());
(*hash_it).second->squashed = true;
(*hash_it).second = NULL;
memDepHash.erase(hash_it);
#ifdef DEBUG
MemDepEntry::memdep_erase++;
#endif
instList[tid].erase(squash_it--);
}
// Tell the dependency predictor to squash as well.
depPred.squash(squashed_num, tid);
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::violation(DynInstPtr &store_inst,
DynInstPtr &violating_load)
{
DPRINTF(MemDepUnit, "Passing violating PCs to store sets,"
" load: %#x, store: %#x\n", violating_load->instAddr(),
store_inst->instAddr());
// Tell the memory dependence unit of the violation.
depPred.violation(store_inst->instAddr(), violating_load->instAddr());
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::issue(DynInstPtr &inst)
{
DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n",
inst->instAddr(), inst->seqNum);
depPred.issued(inst->instAddr(), inst->seqNum, inst->isStore());
}
template <class MemDepPred, class Impl>
inline typename MemDepUnit<MemDepPred,Impl>::MemDepEntryPtr &
MemDepUnit<MemDepPred, Impl>::findInHash(const DynInstPtr &inst)
{
MemDepHashIt hash_it = memDepHash.find(inst->seqNum);
assert(hash_it != memDepHash.end());
return (*hash_it).second;
}
template <class MemDepPred, class Impl>
inline void
MemDepUnit<MemDepPred, Impl>::moveToReady(MemDepEntryPtr &woken_inst_entry)
{
DPRINTF(MemDepUnit, "Adding instruction [sn:%lli] "
"to the ready list.\n", woken_inst_entry->inst->seqNum);
assert(!woken_inst_entry->squashed);
iqPtr->addReadyMemInst(woken_inst_entry->inst);
}
template <class MemDepPred, class Impl>
void
MemDepUnit<MemDepPred, Impl>::dumpLists()
{
for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) {
cprintf("Instruction list %i size: %i\n",
tid, instList[tid].size());
ListIt inst_list_it = instList[tid].begin();
int num = 0;
while (inst_list_it != instList[tid].end()) {
cprintf("Instruction:%i\nPC: %s\n[sn:%i]\n[tid:%i]\nIssued:%i\n"
"Squashed:%i\n\n",
num, (*inst_list_it)->pcState(),
(*inst_list_it)->seqNum,
(*inst_list_it)->threadNumber,
(*inst_list_it)->isIssued(),
(*inst_list_it)->isSquashed());
inst_list_it++;
++num;
}
}
cprintf("Memory dependence hash size: %i\n", memDepHash.size());
#ifdef DEBUG
cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
#endif
}

View File

@ -0,0 +1,200 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Gabe Black
*/
#ifndef __CPU_O3_REGFILE_HH__
#define __CPU_O3_REGFILE_HH__
#include <vector>
#include "arch/isa_traits.hh"
#include "arch/kernel_stats.hh"
#include "arch/types.hh"
#include "base/trace.hh"
#include "config/the_isa.hh"
#include "cpu/o3/comm.hh"
#include "debug/IEW.hh"
/**
* Simple physical register file class.
* Right now this is specific to Alpha until we decide if/how to make things
* generic enough to support other ISAs.
*/
template <class Impl>
class PhysRegFile
{
protected:
typedef TheISA::IntReg IntReg;
typedef TheISA::FloatReg FloatReg;
typedef TheISA::FloatRegBits FloatRegBits;
typedef union {
FloatReg d;
FloatRegBits q;
} PhysFloatReg;
// Note that most of the definitions of the IntReg, FloatReg, etc. exist
// within the Impl/ISA class and not within this PhysRegFile class.
// Will make these registers public for now, but they probably should
// be private eventually with some accessor functions.
public:
typedef typename Impl::O3CPU O3CPU;
/**
* Constructs a physical register file with the specified amount of
* integer and floating point registers.
*/
PhysRegFile(O3CPU *_cpu, unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs);
//Everything below should be pretty well identical to the normal
//register file that exists within AlphaISA class.
//The duplication is unfortunate but it's better than having
//different ways to access certain registers.
/** Reads an integer register. */
uint64_t readIntReg(PhysRegIndex reg_idx)
{
assert(reg_idx < numPhysicalIntRegs);
DPRINTF(IEW, "RegFile: Access to int register %i, has data "
"%#x\n", int(reg_idx), intRegFile[reg_idx]);
return intRegFile[reg_idx];
}
/** Reads a floating point register (double precision). */
FloatReg readFloatReg(PhysRegIndex reg_idx)
{
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
FloatReg floatReg = floatRegFile[reg_idx].d;
DPRINTF(IEW, "RegFile: Access to float register %i, has "
"data %#x\n", int(reg_idx), floatRegFile[reg_idx].q);
return floatReg;
}
FloatRegBits readFloatRegBits(PhysRegIndex reg_idx)
{
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
FloatRegBits floatRegBits = floatRegFile[reg_idx].q;
DPRINTF(IEW, "RegFile: Access to float register %i as int, "
"has data %#x\n", int(reg_idx), (uint64_t)floatRegBits);
return floatRegBits;
}
/** Sets an integer register to the given value. */
void setIntReg(PhysRegIndex reg_idx, uint64_t val)
{
assert(reg_idx < numPhysicalIntRegs);
DPRINTF(IEW, "RegFile: Setting int register %i to %#x\n",
int(reg_idx), val);
if (reg_idx != TheISA::ZeroReg)
intRegFile[reg_idx] = val;
}
/** Sets a double precision floating point register to the given value. */
void setFloatReg(PhysRegIndex reg_idx, FloatReg val)
{
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
assert(reg_idx < numPhysicalFloatRegs);
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
int(reg_idx), (uint64_t)val);
#if THE_ISA == ALPHA_ISA
if (reg_idx != TheISA::ZeroReg)
#endif
floatRegFile[reg_idx].d = val;
}
void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val)
{
// Remove the base Float reg dependency.
reg_idx = reg_idx - numPhysicalIntRegs;
assert(reg_idx < numPhysicalFloatRegs);
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
int(reg_idx), (uint64_t)val);
floatRegFile[reg_idx].q = val;
}
public:
/** (signed) integer register file. */
IntReg *intRegFile;
/** Floating point register file. */
PhysFloatReg *floatRegFile;
private:
int intrflag; // interrupt flag
private:
/** CPU pointer. */
O3CPU *cpu;
public:
/** Number of physical integer registers. */
unsigned numPhysicalIntRegs;
/** Number of physical floating point registers. */
unsigned numPhysicalFloatRegs;
};
template <class Impl>
PhysRegFile<Impl>::PhysRegFile(O3CPU *_cpu, unsigned _numPhysicalIntRegs,
unsigned _numPhysicalFloatRegs)
: cpu(_cpu), numPhysicalIntRegs(_numPhysicalIntRegs),
numPhysicalFloatRegs(_numPhysicalFloatRegs)
{
intRegFile = new IntReg[numPhysicalIntRegs];
floatRegFile = new PhysFloatReg[numPhysicalFloatRegs];
memset(intRegFile, 0, sizeof(IntReg) * numPhysicalIntRegs);
memset(floatRegFile, 0, sizeof(PhysFloatReg) * numPhysicalFloatRegs);
}
#endif

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "cpu/o3/isa_specific.hh"
#include "cpu/o3/rename_impl.hh"
template class DefaultRename<O3CPUImpl>;

View File

@ -0,0 +1,487 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_RENAME_HH__
#define __CPU_O3_RENAME_HH__
#include <list>
#include "base/statistics.hh"
#include "config/the_isa.hh"
#include "cpu/timebuf.hh"
struct DerivO3CPUParams;
/**
* DefaultRename handles both single threaded and SMT rename. Its
* width is specified by the parameters; each cycle it tries to rename
* that many instructions. It holds onto the rename history of all
* instructions with destination registers, storing the
* arch. register, the new physical register, and the old physical
* register, to allow for undoing of mappings if squashing happens, or
* freeing up registers upon commit. Rename handles blocking if the
* ROB, IQ, or LSQ is going to be full. Rename also handles barriers,
* and does so by stalling on the instruction until the ROB is empty
* and there are no instructions in flight to the ROB.
*/
template<class Impl>
class DefaultRename
{
public:
// Typedefs from the Impl.
typedef typename Impl::CPUPol CPUPol;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef typename Impl::O3CPU O3CPU;
// Typedefs from the CPUPol
typedef typename CPUPol::DecodeStruct DecodeStruct;
typedef typename CPUPol::RenameStruct RenameStruct;
typedef typename CPUPol::TimeStruct TimeStruct;
typedef typename CPUPol::FreeList FreeList;
typedef typename CPUPol::RenameMap RenameMap;
// These are used only for initialization.
typedef typename CPUPol::IEW IEW;
typedef typename CPUPol::Commit Commit;
// Typedefs from the ISA.
typedef TheISA::RegIndex RegIndex;
// A list is used to queue the instructions. Barrier insts must
// be added to the front of the list, which is the only reason for
// using a list instead of a queue. (Most other stages use a
// queue)
typedef std::list<DynInstPtr> InstQueue;
typedef typename std::list<DynInstPtr>::iterator ListIt;
public:
/** Overall rename status. Used to determine if the CPU can
* deschedule itself due to a lack of activity.
*/
enum RenameStatus {
Active,
Inactive
};
/** Individual thread status. */
enum ThreadStatus {
Running,
Idle,
StartSquash,
Squashing,
Blocked,
Unblocking,
SerializeStall
};
private:
/** Rename status. */
RenameStatus _status;
/** Per-thread status. */
ThreadStatus renameStatus[Impl::MaxThreads];
public:
/** DefaultRename constructor. */
DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params);
/** Returns the name of rename. */
std::string name() const;
/** Registers statistics. */
void regStats();
/** Sets the main backwards communication time buffer pointer. */
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
/** Sets pointer to time buffer used to communicate to the next stage. */
void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
/** Sets pointer to time buffer coming from decode. */
void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr);
/** Sets pointer to IEW stage. Used only for initialization. */
void setIEWStage(IEW *iew_stage)
{ iew_ptr = iew_stage; }
/** Sets pointer to commit stage. Used only for initialization. */
void setCommitStage(Commit *commit_stage)
{ commit_ptr = commit_stage; }
private:
/** Pointer to IEW stage. Used only for initialization. */
IEW *iew_ptr;
/** Pointer to commit stage. Used only for initialization. */
Commit *commit_ptr;
public:
/** Initializes variables for the stage. */
void initStage();
/** Sets pointer to list of active threads. */
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Sets pointer to rename maps (per-thread structures). */
void setRenameMap(RenameMap rm_ptr[Impl::MaxThreads]);
/** Sets pointer to the free list. */
void setFreeList(FreeList *fl_ptr);
/** Sets pointer to the scoreboard. */
void setScoreboard(Scoreboard *_scoreboard);
/** Drains the rename stage. */
bool drain();
/** Resumes execution after a drain. */
void resume() { }
/** Switches out the rename stage. */
void switchOut();
/** Takes over from another CPU's thread. */
void takeOverFrom();
/** Squashes all instructions in a thread. */
void squash(const InstSeqNum &squash_seq_num, ThreadID tid);
/** Ticks rename, which processes all input signals and attempts to rename
* as many instructions as possible.
*/
void tick();
/** Debugging function used to dump history buffer of renamings. */
void dumpHistory();
private:
/** Determines what to do based on rename's current status.
* @param status_change rename() sets this variable if there was a status
* change (ie switching from blocking to unblocking).
* @param tid Thread id to rename instructions from.
*/
void rename(bool &status_change, ThreadID tid);
/** Renames instructions for the given thread. Also handles serializing
* instructions.
*/
void renameInsts(ThreadID tid);
/** Inserts unused instructions from a given thread into the skid buffer,
* to be renamed once rename unblocks.
*/
void skidInsert(ThreadID tid);
/** Separates instructions from decode into individual lists of instructions
* sorted by thread.
*/
void sortInsts();
/** Returns if all of the skid buffers are empty. */
bool skidsEmpty();
/** Updates overall rename status based on all of the threads' statuses. */
void updateStatus();
/** Switches rename to blocking, and signals back that rename has become
* blocked.
* @return Returns true if there is a status change.
*/
bool block(ThreadID tid);
/** Switches rename to unblocking if the skid buffer is empty, and signals
* back that rename has unblocked.
* @return Returns true if there is a status change.
*/
bool unblock(ThreadID tid);
/** Executes actual squash, removing squashed instructions. */
void doSquash(const InstSeqNum &squash_seq_num, ThreadID tid);
/** Removes a committed instruction's rename history. */
void removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid);
/** Renames the source registers of an instruction. */
inline void renameSrcRegs(DynInstPtr &inst, ThreadID tid);
/** Renames the destination registers of an instruction. */
inline void renameDestRegs(DynInstPtr &inst, ThreadID tid);
/** Calculates the number of free ROB entries for a specific thread. */
inline int calcFreeROBEntries(ThreadID tid);
/** Calculates the number of free IQ entries for a specific thread. */
inline int calcFreeIQEntries(ThreadID tid);
/** Calculates the number of free LSQ entries for a specific thread. */
inline int calcFreeLSQEntries(ThreadID tid);
/** Returns the number of valid instructions coming from decode. */
unsigned validInsts();
/** Reads signals telling rename to block/unblock. */
void readStallSignals(ThreadID tid);
/** Checks if any stages are telling rename to block. */
bool checkStall(ThreadID tid);
/** Gets the number of free entries for a specific thread. */
void readFreeEntries(ThreadID tid);
/** Checks the signals and updates the status. */
bool checkSignalsAndUpdate(ThreadID tid);
/** Either serializes on the next instruction available in the InstQueue,
* or records that it must serialize on the next instruction to enter
* rename.
* @param inst_list The list of younger, unprocessed instructions for the
* thread that has the serializeAfter instruction.
* @param tid The thread id.
*/
void serializeAfter(InstQueue &inst_list, ThreadID tid);
/** Holds the information for each destination register rename. It holds
* the instruction's sequence number, the arch register, the old physical
* register for that arch. register, and the new physical register.
*/
struct RenameHistory {
RenameHistory(InstSeqNum _instSeqNum, RegIndex _archReg,
PhysRegIndex _newPhysReg, PhysRegIndex _prevPhysReg)
: instSeqNum(_instSeqNum), archReg(_archReg),
newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg)
{
}
/** The sequence number of the instruction that renamed. */
InstSeqNum instSeqNum;
/** The architectural register index that was renamed. */
RegIndex archReg;
/** The new physical register that the arch. register is renamed to. */
PhysRegIndex newPhysReg;
/** The old physical register that the arch. register was renamed to. */
PhysRegIndex prevPhysReg;
};
/** A per-thread list of all destination register renames, used to either
* undo rename mappings or free old physical registers.
*/
std::list<RenameHistory> historyBuffer[Impl::MaxThreads];
/** Pointer to CPU. */
O3CPU *cpu;
/** Pointer to main time buffer used for backwards communication. */
TimeBuffer<TimeStruct> *timeBuffer;
/** Wire to get IEW's output from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromIEW;
/** Wire to get commit's output from backwards time buffer. */
typename TimeBuffer<TimeStruct>::wire fromCommit;
/** Wire to write infromation heading to previous stages. */
typename TimeBuffer<TimeStruct>::wire toDecode;
/** Rename instruction queue. */
TimeBuffer<RenameStruct> *renameQueue;
/** Wire to write any information heading to IEW. */
typename TimeBuffer<RenameStruct>::wire toIEW;
/** Decode instruction queue interface. */
TimeBuffer<DecodeStruct> *decodeQueue;
/** Wire to get decode's output from decode queue. */
typename TimeBuffer<DecodeStruct>::wire fromDecode;
/** Queue of all instructions coming from decode this cycle. */
InstQueue insts[Impl::MaxThreads];
/** Skid buffer between rename and decode. */
InstQueue skidBuffer[Impl::MaxThreads];
/** Rename map interface. */
RenameMap *renameMap[Impl::MaxThreads];
/** Free list interface. */
FreeList *freeList;
/** Pointer to the list of active threads. */
std::list<ThreadID> *activeThreads;
/** Pointer to the scoreboard. */
Scoreboard *scoreboard;
/** Count of instructions in progress that have been sent off to the IQ
* and ROB, but are not yet included in their occupancy counts.
*/
int instsInProgress[Impl::MaxThreads];
/** Variable that tracks if decode has written to the time buffer this
* cycle. Used to tell CPU if there is activity this cycle.
*/
bool wroteToTimeBuffer;
/** Structures whose free entries impact the amount of instructions that
* can be renamed.
*/
struct FreeEntries {
unsigned iqEntries;
unsigned lsqEntries;
unsigned robEntries;
};
/** Per-thread tracking of the number of free entries of back-end
* structures.
*/
FreeEntries freeEntries[Impl::MaxThreads];
/** Records if the ROB is empty. In SMT mode the ROB may be dynamically
* partitioned between threads, so the ROB must tell rename when it is
* empty.
*/
bool emptyROB[Impl::MaxThreads];
/** Source of possible stalls. */
struct Stalls {
bool iew;
bool commit;
};
/** Tracks which stages are telling decode to stall. */
Stalls stalls[Impl::MaxThreads];
/** The serialize instruction that rename has stalled on. */
DynInstPtr serializeInst[Impl::MaxThreads];
/** Records if rename needs to serialize on the next instruction for any
* thread.
*/
bool serializeOnNextInst[Impl::MaxThreads];
/** Delay between iew and rename, in ticks. */
int iewToRenameDelay;
/** Delay between decode and rename, in ticks. */
int decodeToRenameDelay;
/** Delay between commit and rename, in ticks. */
unsigned commitToRenameDelay;
/** Rename width, in instructions. */
unsigned renameWidth;
/** Commit width, in instructions. Used so rename knows how many
* instructions might have freed registers in the previous cycle.
*/
unsigned commitWidth;
/** The index of the instruction in the time buffer to IEW that rename is
* currently using.
*/
unsigned toIEWIndex;
/** Whether or not rename needs to block this cycle. */
bool blockThisCycle;
/** Whether or not rename needs to resume a serialize instruction
* after squashing. */
bool resumeSerialize;
/** Whether or not rename needs to resume clearing out the skidbuffer
* after squashing. */
bool resumeUnblocking;
/** The number of threads active in rename. */
ThreadID numThreads;
/** The maximum skid buffer size. */
unsigned skidBufferMax;
PhysRegIndex maxPhysicalRegs;
/** Enum to record the source of a structure full stall. Can come from
* either ROB, IQ, LSQ, and it is priortized in that order.
*/
enum FullSource {
ROB,
IQ,
LSQ,
NONE
};
/** Function used to increment the stat that corresponds to the source of
* the stall.
*/
inline void incrFullStat(const FullSource &source);
/** Stat for total number of cycles spent squashing. */
Stats::Scalar renameSquashCycles;
/** Stat for total number of cycles spent idle. */
Stats::Scalar renameIdleCycles;
/** Stat for total number of cycles spent blocking. */
Stats::Scalar renameBlockCycles;
/** Stat for total number of cycles spent stalling for a serializing inst. */
Stats::Scalar renameSerializeStallCycles;
/** Stat for total number of cycles spent running normally. */
Stats::Scalar renameRunCycles;
/** Stat for total number of cycles spent unblocking. */
Stats::Scalar renameUnblockCycles;
/** Stat for total number of renamed instructions. */
Stats::Scalar renameRenamedInsts;
/** Stat for total number of squashed instructions that rename discards. */
Stats::Scalar renameSquashedInsts;
/** Stat for total number of times that the ROB starts a stall in rename. */
Stats::Scalar renameROBFullEvents;
/** Stat for total number of times that the IQ starts a stall in rename. */
Stats::Scalar renameIQFullEvents;
/** Stat for total number of times that the LSQ starts a stall in rename. */
Stats::Scalar renameLSQFullEvents;
/** Stat for total number of times that rename runs out of free registers
* to use to rename. */
Stats::Scalar renameFullRegistersEvents;
/** Stat for total number of renamed destination registers. */
Stats::Scalar renameRenamedOperands;
/** Stat for total number of source register rename lookups. */
Stats::Scalar renameRenameLookups;
Stats::Scalar intRenameLookups;
Stats::Scalar fpRenameLookups;
/** Stat for total number of committed renaming mappings. */
Stats::Scalar renameCommittedMaps;
/** Stat for total number of mappings that were undone due to a squash. */
Stats::Scalar renameUndoneMaps;
/** Number of serialize instructions handled. */
Stats::Scalar renamedSerializing;
/** Number of instructions marked as temporarily serializing. */
Stats::Scalar renamedTempSerializing;
/** Number of instructions inserted into skid buffers. */
Stats::Scalar renameSkidInsts;
};
#endif // __CPU_O3_RENAME_HH__

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,255 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include <vector>
#include "cpu/o3/rename_map.hh"
#include "debug/Rename.hh"
using namespace std;
// @todo: Consider making inline bool functions that determine if the
// register is a logical int, logical fp, physical int, physical fp,
// etc.
SimpleRenameMap::~SimpleRenameMap()
{
}
void
SimpleRenameMap::init(unsigned _numLogicalIntRegs,
unsigned _numPhysicalIntRegs,
PhysRegIndex &ireg_idx,
unsigned _numLogicalFloatRegs,
unsigned _numPhysicalFloatRegs,
PhysRegIndex &freg_idx,
unsigned _numMiscRegs,
RegIndex _intZeroReg,
RegIndex _floatZeroReg,
int map_id,
bool bindRegs)
{
id = map_id;
numLogicalIntRegs = _numLogicalIntRegs;
numLogicalFloatRegs = _numLogicalFloatRegs;
numPhysicalIntRegs = _numPhysicalIntRegs;
numPhysicalFloatRegs = _numPhysicalFloatRegs;
numMiscRegs = _numMiscRegs;
intZeroReg = _intZeroReg;
floatZeroReg = _floatZeroReg;
DPRINTF(Rename, "Creating rename map %i. Phys: %i / %i, Float: "
"%i / %i.\n", id, numLogicalIntRegs, numPhysicalIntRegs,
numLogicalFloatRegs, numPhysicalFloatRegs);
numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs;
numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs;
//Create the rename maps
intRenameMap.resize(numLogicalIntRegs);
floatRenameMap.resize(numLogicalRegs);
if (bindRegs) {
DPRINTF(Rename, "Binding registers into rename map %i\n",id);
// Initialize the entries in the integer rename map to point to the
// physical registers of the same index
for (RegIndex index = 0; index < numLogicalIntRegs; ++index)
{
intRenameMap[index].physical_reg = ireg_idx++;
}
// Initialize the entries in the floating point rename map to point to
// the physical registers of the same index
// Although the index refers purely to architected registers, because
// the floating reg indices come after the integer reg indices, they
// may exceed the size of a normal RegIndex (short).
for (PhysRegIndex index = numLogicalIntRegs;
index < numLogicalRegs; ++index)
{
floatRenameMap[index].physical_reg = freg_idx++;
}
} else {
DPRINTF(Rename, "Binding registers into rename map %i\n",id);
PhysRegIndex temp_ireg = ireg_idx;
for (RegIndex index = 0; index < numLogicalIntRegs; ++index)
{
intRenameMap[index].physical_reg = temp_ireg++;
}
PhysRegIndex temp_freg = freg_idx;
for (PhysRegIndex index = numLogicalIntRegs;
index < numLogicalRegs; ++index)
{
floatRenameMap[index].physical_reg = temp_freg++;
}
}
}
void
SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr)
{
freeList = fl_ptr;
}
SimpleRenameMap::RenameInfo
SimpleRenameMap::rename(RegIndex arch_reg)
{
PhysRegIndex renamed_reg;
PhysRegIndex prev_reg;
if (arch_reg < numLogicalIntRegs) {
// Record the current physical register that is renamed to the
// requested architected register.
prev_reg = intRenameMap[arch_reg].physical_reg;
// If it's not referencing the zero register, then rename the
// register.
if (arch_reg != intZeroReg) {
renamed_reg = freeList->getIntReg();
intRenameMap[arch_reg].physical_reg = renamed_reg;
assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs);
} else {
// Otherwise return the zero register so nothing bad happens.
renamed_reg = intZeroReg;
}
} else if (arch_reg < numLogicalRegs) {
// Record the current physical register that is renamed to the
// requested architected register.
prev_reg = floatRenameMap[arch_reg].physical_reg;
// If it's not referencing the zero register, then rename the
// register.
#if THE_ISA == ALPHA_ISA
if (arch_reg != floatZeroReg) {
#endif
renamed_reg = freeList->getFloatReg();
floatRenameMap[arch_reg].physical_reg = renamed_reg;
assert(renamed_reg < numPhysicalRegs &&
renamed_reg >= numPhysicalIntRegs);
#if THE_ISA == ALPHA_ISA
} else {
// Otherwise return the zero register so nothing bad happens.
renamed_reg = floatZeroReg;
}
#endif
} else {
// Subtract off the base offset for miscellaneous registers.
arch_reg = arch_reg - numLogicalRegs;
DPRINTF(Rename, "Renamed misc reg %d\n", arch_reg);
// No renaming happens to the misc. registers. They are
// simply the registers that come after all the physical
// registers; thus take the base architected register and add
// the physical registers to it.
renamed_reg = arch_reg + numPhysicalRegs;
// Set the previous register to the same register; mainly it must be
// known that the prev reg was outside the range of normal registers
// so the free list can avoid adding it.
prev_reg = renamed_reg;
}
DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n",
arch_reg, renamed_reg, prev_reg);
return RenameInfo(renamed_reg, prev_reg);
}
PhysRegIndex
SimpleRenameMap::lookup(RegIndex arch_reg)
{
if (arch_reg < numLogicalIntRegs) {
return intRenameMap[arch_reg].physical_reg;
} else if (arch_reg < numLogicalRegs) {
return floatRenameMap[arch_reg].physical_reg;
} else {
// Subtract off the misc registers offset.
arch_reg = arch_reg - numLogicalRegs;
// Misc. regs don't rename, so simply add the base arch reg to
// the number of physical registers.
return numPhysicalRegs + arch_reg;
}
}
void
SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg)
{
// In this implementation the miscellaneous registers do not
// actually rename, so this function does not allow you to try to
// change their mappings.
if (arch_reg < numLogicalIntRegs) {
DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n",
(int)arch_reg, renamed_reg);
intRenameMap[arch_reg].physical_reg = renamed_reg;
} else if (arch_reg < numLogicalIntRegs + numLogicalFloatRegs) {
DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n",
(int)arch_reg - numLogicalIntRegs, renamed_reg);
floatRenameMap[arch_reg].physical_reg = renamed_reg;
}
}
int
SimpleRenameMap::numFreeEntries()
{
int free_int_regs = freeList->numFreeIntRegs();
int free_float_regs = freeList->numFreeFloatRegs();
if (free_int_regs < free_float_regs) {
return free_int_regs;
} else {
return free_float_regs;
}
}

View File

@ -0,0 +1,168 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
// Todo: Create destructor.
// Have it so that there's a more meaningful name given to the variable
// that marks the beginning of the FP registers.
#ifndef __CPU_O3_RENAME_MAP_HH__
#define __CPU_O3_RENAME_MAP_HH__
#include <iostream>
#include <utility>
#include <vector>
#include "arch/types.hh"
#include "config/the_isa.hh"
#include "cpu/o3/free_list.hh"
class SimpleRenameMap
{
protected:
typedef TheISA::RegIndex RegIndex;
public:
/**
* Pair of a logical register and a physical register. Tells the
* previous mapping of a logical register to a physical register.
* Used to roll back the rename map to a previous state.
*/
typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo;
/**
* Pair of a physical register and a physical register. Used to
* return the physical register that a logical register has been
* renamed to, and the previous physical register that the same
* logical register was previously mapped to.
*/
typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo;
public:
/** Default constructor. init() must be called prior to use. */
SimpleRenameMap() {};
/** Destructor. */
~SimpleRenameMap();
/** Initializes rename map with given parameters. */
void init(unsigned _numLogicalIntRegs,
unsigned _numPhysicalIntRegs,
PhysRegIndex &_int_reg_start,
unsigned _numLogicalFloatRegs,
unsigned _numPhysicalFloatRegs,
PhysRegIndex &_float_reg_start,
unsigned _numMiscRegs,
RegIndex _intZeroReg,
RegIndex _floatZeroReg,
int id,
bool bindRegs);
/** Sets the free list used with this rename map. */
void setFreeList(SimpleFreeList *fl_ptr);
//Tell rename map to get a free physical register for a given
//architected register. Not sure it should have a return value,
//but perhaps it should have some sort of fault in case there are
//no free registers.
RenameInfo rename(RegIndex arch_reg);
PhysRegIndex lookup(RegIndex phys_reg);
/**
* Marks the given register as ready, meaning that its value has been
* calculated and written to the register file.
* @param ready_reg The index of the physical register that is now ready.
*/
void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg);
int numFreeEntries();
private:
/** Rename Map ID */
int id;
/** Number of logical integer registers. */
int numLogicalIntRegs;
/** Number of physical integer registers. */
int numPhysicalIntRegs;
/** Number of logical floating point registers. */
int numLogicalFloatRegs;
/** Number of physical floating point registers. */
int numPhysicalFloatRegs;
/** Number of miscellaneous registers. */
int numMiscRegs;
/** Number of logical integer + float registers. */
int numLogicalRegs;
/** Number of physical integer + float registers. */
int numPhysicalRegs;
/** The integer zero register. This implementation assumes it is always
* zero and never can be anything else.
*/
RegIndex intZeroReg;
/** The floating point zero register. This implementation assumes it is
* always zero and never can be anything else.
*/
RegIndex floatZeroReg;
class RenameEntry
{
public:
PhysRegIndex physical_reg;
bool valid;
RenameEntry()
: physical_reg(0), valid(false)
{ }
};
private:
/** Integer rename map. */
std::vector<RenameEntry> intRenameMap;
/** Floating point rename map. */
std::vector<RenameEntry> floatRenameMap;
private:
/** Free list interface. */
SimpleFreeList *freeList;
};
#endif //__CPU_O3_RENAME_MAP_HH__

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Nathan Binkert
*/
#include "cpu/o3/isa_specific.hh"
#include "cpu/o3/rob_impl.hh"
// Force instantiation of InstructionQueue.
template class ROB<O3CPUImpl>;

View File

@ -0,0 +1,332 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Korey Sewell
*/
#ifndef __CPU_O3_ROB_HH__
#define __CPU_O3_ROB_HH__
#include <string>
#include <utility>
#include <vector>
#include "arch/registers.hh"
#include "base/types.hh"
#include "config/the_isa.hh"
/**
* ROB class. The ROB is largely what drives squashing.
*/
template <class Impl>
class ROB
{
protected:
typedef TheISA::RegIndex RegIndex;
public:
//Typedefs from the Impl.
typedef typename Impl::O3CPU O3CPU;
typedef typename Impl::DynInstPtr DynInstPtr;
typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo;
typedef typename std::list<DynInstPtr>::iterator InstIt;
/** Possible ROB statuses. */
enum Status {
Running,
Idle,
ROBSquashing
};
/** SMT ROB Sharing Policy */
enum ROBPolicy{
Dynamic,
Partitioned,
Threshold
};
private:
/** Per-thread ROB status. */
Status robStatus[Impl::MaxThreads];
/** ROB resource sharing policy for SMT mode. */
ROBPolicy robPolicy;
public:
/** ROB constructor.
* @param _numEntries Number of entries in ROB.
* @param _squashWidth Number of instructions that can be squashed in a
* single cycle.
* @param _smtROBPolicy ROB Partitioning Scheme for SMT.
* @param _smtROBThreshold Max Resources(by %) a thread can have in the ROB.
* @param _numThreads The number of active threads.
*/
ROB(O3CPU *_cpu, unsigned _numEntries, unsigned _squashWidth,
std::string smtROBPolicy, unsigned _smtROBThreshold,
ThreadID _numThreads);
std::string name() const;
/** Sets pointer to the list of active threads.
* @param at_ptr Pointer to the list of active threads.
*/
void setActiveThreads(std::list<ThreadID> *at_ptr);
/** Switches out the ROB. */
void switchOut();
/** Takes over another CPU's thread. */
void takeOverFrom();
/** Function to insert an instruction into the ROB. Note that whatever
* calls this function must ensure that there is enough space within the
* ROB for the new instruction.
* @param inst The instruction being inserted into the ROB.
*/
void insertInst(DynInstPtr &inst);
/** Returns pointer to the head instruction within the ROB. There is
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the head of the ROB.
*/
// DynInstPtr readHeadInst();
/** Returns a pointer to the head instruction of a specific thread within
* the ROB.
* @return Pointer to the DynInst that is at the head of the ROB.
*/
DynInstPtr readHeadInst(ThreadID tid);
/** Returns a pointer to the instruction with the given sequence if it is
* in the ROB.
*/
DynInstPtr findInst(ThreadID tid, InstSeqNum squash_inst);
/** Returns pointer to the tail instruction within the ROB. There is
* no guarantee as to the return value if the ROB is empty.
* @retval Pointer to the DynInst that is at the tail of the ROB.
*/
// DynInstPtr readTailInst();
/** Returns a pointer to the tail instruction of a specific thread within
* the ROB.
* @return Pointer to the DynInst that is at the tail of the ROB.
*/
DynInstPtr readTailInst(ThreadID tid);
/** Retires the head instruction, removing it from the ROB. */
// void retireHead();
/** Retires the head instruction of a specific thread, removing it from the
* ROB.
*/
void retireHead(ThreadID tid);
/** Is the oldest instruction across all threads ready. */
// bool isHeadReady();
/** Is the oldest instruction across a particular thread ready. */
bool isHeadReady(ThreadID tid);
/** Is there any commitable head instruction across all threads ready. */
bool canCommit();
/** Re-adjust ROB partitioning. */
void resetEntries();
/** Number of entries needed For 'num_threads' amount of threads. */
int entryAmount(ThreadID num_threads);
/** Returns the number of total free entries in the ROB. */
unsigned numFreeEntries();
/** Returns the number of free entries in a specific ROB paritition. */
unsigned numFreeEntries(ThreadID tid);
/** Returns the maximum number of entries for a specific thread. */
unsigned getMaxEntries(ThreadID tid)
{ return maxEntries[tid]; }
/** Returns the number of entries being used by a specific thread. */
unsigned getThreadEntries(ThreadID tid)
{ return threadEntries[tid]; }
/** Returns if the ROB is full. */
bool isFull()
{ return numInstsInROB == numEntries; }
/** Returns if a specific thread's partition is full. */
bool isFull(ThreadID tid)
{ return threadEntries[tid] == numEntries; }
/** Returns if the ROB is empty. */
bool isEmpty()
{ return numInstsInROB == 0; }
/** Returns if a specific thread's partition is empty. */
bool isEmpty(ThreadID tid)
{ return threadEntries[tid] == 0; }
/** Executes the squash, marking squashed instructions. */
void doSquash(ThreadID tid);
/** Squashes all instructions younger than the given sequence number for
* the specific thread.
*/
void squash(InstSeqNum squash_num, ThreadID tid);
/** Updates the head instruction with the new oldest instruction. */
void updateHead();
/** Updates the tail instruction with the new youngest instruction. */
void updateTail();
/** Reads the PC of the oldest head instruction. */
// uint64_t readHeadPC();
/** Reads the PC of the head instruction of a specific thread. */
// uint64_t readHeadPC(ThreadID tid);
/** Reads the next PC of the oldest head instruction. */
// uint64_t readHeadNextPC();
/** Reads the next PC of the head instruction of a specific thread. */
// uint64_t readHeadNextPC(ThreadID tid);
/** Reads the sequence number of the oldest head instruction. */
// InstSeqNum readHeadSeqNum();
/** Reads the sequence number of the head instruction of a specific thread.
*/
// InstSeqNum readHeadSeqNum(ThreadID tid);
/** Reads the PC of the youngest tail instruction. */
// uint64_t readTailPC();
/** Reads the PC of the tail instruction of a specific thread. */
// uint64_t readTailPC(ThreadID tid);
/** Reads the sequence number of the youngest tail instruction. */
// InstSeqNum readTailSeqNum();
/** Reads the sequence number of tail instruction of a specific thread. */
// InstSeqNum readTailSeqNum(ThreadID tid);
/** Checks if the ROB is still in the process of squashing instructions.
* @retval Whether or not the ROB is done squashing.
*/
bool isDoneSquashing(ThreadID tid) const
{ return doneSquashing[tid]; }
/** Checks if the ROB is still in the process of squashing instructions for
* any thread.
*/
bool isDoneSquashing();
/** This is more of a debugging function than anything. Use
* numInstsInROB to get the instructions in the ROB unless you are
* double checking that variable.
*/
int countInsts();
/** This is more of a debugging function than anything. Use
* threadEntries to get the instructions in the ROB unless you are
* double checking that variable.
*/
int countInsts(ThreadID tid);
/** Registers statistics. */
void regStats();
private:
/** Pointer to the CPU. */
O3CPU *cpu;
/** Active Threads in CPU */
std::list<ThreadID> *activeThreads;
/** Number of instructions in the ROB. */
unsigned numEntries;
/** Entries Per Thread */
unsigned threadEntries[Impl::MaxThreads];
/** Max Insts a Thread Can Have in the ROB */
unsigned maxEntries[Impl::MaxThreads];
/** ROB List of Instructions */
std::list<DynInstPtr> instList[Impl::MaxThreads];
/** Number of instructions that can be squashed in a single cycle. */
unsigned squashWidth;
public:
/** Iterator pointing to the instruction which is the last instruction
* in the ROB. This may at times be invalid (ie when the ROB is empty),
* however it should never be incorrect.
*/
InstIt tail;
/** Iterator pointing to the instruction which is the first instruction in
* in the ROB*/
InstIt head;
private:
/** Iterator used for walking through the list of instructions when
* squashing. Used so that there is persistent state between cycles;
* when squashing, the instructions are marked as squashed but not
* immediately removed, meaning the tail iterator remains the same before
* and after a squash.
* This will always be set to cpu->instList.end() if it is invalid.
*/
InstIt squashIt[Impl::MaxThreads];
public:
/** Number of instructions in the ROB. */
int numInstsInROB;
/** Dummy instruction returned if there are no insts left. */
DynInstPtr dummyInst;
private:
/** The sequence number of the squashed instruction. */
InstSeqNum squashedSeqNum[Impl::MaxThreads];
/** Is the ROB done squashing. */
bool doneSquashing[Impl::MaxThreads];
/** Number of active threads. */
ThreadID numThreads;
// The number of rob_reads
Stats::Scalar robReads;
// The number of rob_writes
Stats::Scalar robWrites;
};
#endif //__CPU_O3_ROB_HH__

View File

@ -0,0 +1,557 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Korey Sewell
*/
#include <list>
#include "cpu/o3/rob.hh"
#include "debug/Fetch.hh"
#include "debug/ROB.hh"
using namespace std;
template <class Impl>
ROB<Impl>::ROB(O3CPU *_cpu, unsigned _numEntries, unsigned _squashWidth,
std::string _smtROBPolicy, unsigned _smtROBThreshold,
ThreadID _numThreads)
: cpu(_cpu),
numEntries(_numEntries),
squashWidth(_squashWidth),
numInstsInROB(0),
numThreads(_numThreads)
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
squashedSeqNum[tid] = 0;
doneSquashing[tid] = true;
threadEntries[tid] = 0;
}
std::string policy = _smtROBPolicy;
//Convert string to lowercase
std::transform(policy.begin(), policy.end(), policy.begin(),
(int(*)(int)) tolower);
//Figure out rob policy
if (policy == "dynamic") {
robPolicy = Dynamic;
//Set Max Entries to Total ROB Capacity
for (ThreadID tid = 0; tid < numThreads; tid++) {
maxEntries[tid] = numEntries;
}
} else if (policy == "partitioned") {
robPolicy = Partitioned;
DPRINTF(Fetch, "ROB sharing policy set to Partitioned\n");
//@todo:make work if part_amt doesnt divide evenly.
int part_amt = numEntries / numThreads;
//Divide ROB up evenly
for (ThreadID tid = 0; tid < numThreads; tid++) {
maxEntries[tid] = part_amt;
}
} else if (policy == "threshold") {
robPolicy = Threshold;
DPRINTF(Fetch, "ROB sharing policy set to Threshold\n");
int threshold = _smtROBThreshold;;
//Divide up by threshold amount
for (ThreadID tid = 0; tid < numThreads; tid++) {
maxEntries[tid] = threshold;
}
} else {
assert(0 && "Invalid ROB Sharing Policy.Options Are:{Dynamic,"
"Partitioned, Threshold}");
}
// Set the per-thread iterators to the end of the instruction list.
for (ThreadID tid = 0; tid < numThreads; tid++) {
squashIt[tid] = instList[tid].end();
}
// Initialize the "universal" ROB head & tail point to invalid
// pointers
head = instList[0].end();
tail = instList[0].end();
}
template <class Impl>
std::string
ROB<Impl>::name() const
{
return cpu->name() + ".rob";
}
template <class Impl>
void
ROB<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
{
DPRINTF(ROB, "Setting active threads list pointer.\n");
activeThreads = at_ptr;
}
template <class Impl>
void
ROB<Impl>::switchOut()
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
instList[tid].clear();
}
}
template <class Impl>
void
ROB<Impl>::takeOverFrom()
{
for (ThreadID tid = 0; tid < numThreads; tid++) {
doneSquashing[tid] = true;
threadEntries[tid] = 0;
squashIt[tid] = instList[tid].end();
}
numInstsInROB = 0;
// Initialize the "universal" ROB head & tail point to invalid
// pointers
head = instList[0].end();
tail = instList[0].end();
}
template <class Impl>
void
ROB<Impl>::resetEntries()
{
if (robPolicy != Dynamic || numThreads > 1) {
int active_threads = activeThreads->size();
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (robPolicy == Partitioned) {
maxEntries[tid] = numEntries / active_threads;
} else if (robPolicy == Threshold && active_threads == 1) {
maxEntries[tid] = numEntries;
}
}
}
}
template <class Impl>
int
ROB<Impl>::entryAmount(ThreadID num_threads)
{
if (robPolicy == Partitioned) {
return numEntries / num_threads;
} else {
return 0;
}
}
template <class Impl>
int
ROB<Impl>::countInsts()
{
int total = 0;
for (ThreadID tid = 0; tid < numThreads; tid++)
total += countInsts(tid);
return total;
}
template <class Impl>
int
ROB<Impl>::countInsts(ThreadID tid)
{
return instList[tid].size();
}
template <class Impl>
void
ROB<Impl>::insertInst(DynInstPtr &inst)
{
assert(inst);
robWrites++;
DPRINTF(ROB, "Adding inst PC %s to the ROB.\n", inst->pcState());
assert(numInstsInROB != numEntries);
ThreadID tid = inst->threadNumber;
instList[tid].push_back(inst);
//Set Up head iterator if this is the 1st instruction in the ROB
if (numInstsInROB == 0) {
head = instList[tid].begin();
assert((*head) == inst);
}
//Must Decrement for iterator to actually be valid since __.end()
//actually points to 1 after the last inst
tail = instList[tid].end();
tail--;
inst->setInROB();
++numInstsInROB;
++threadEntries[tid];
assert((*tail) == inst);
DPRINTF(ROB, "[tid:%i] Now has %d instructions.\n", tid, threadEntries[tid]);
}
template <class Impl>
void
ROB<Impl>::retireHead(ThreadID tid)
{
robWrites++;
assert(numInstsInROB > 0);
// Get the head ROB instruction.
InstIt head_it = instList[tid].begin();
DynInstPtr head_inst = (*head_it);
assert(head_inst->readyToCommit());
DPRINTF(ROB, "[tid:%u]: Retiring head instruction, "
"instruction PC %s, [sn:%lli]\n", tid, head_inst->pcState(),
head_inst->seqNum);
--numInstsInROB;
--threadEntries[tid];
head_inst->clearInROB();
head_inst->setCommitted();
instList[tid].erase(head_it);
//Update "Global" Head of ROB
updateHead();
// @todo: A special case is needed if the instruction being
// retired is the only instruction in the ROB; otherwise the tail
// iterator will become invalidated.
cpu->removeFrontInst(head_inst);
}
template <class Impl>
bool
ROB<Impl>::isHeadReady(ThreadID tid)
{
robReads++;
if (threadEntries[tid] != 0) {
return instList[tid].front()->readyToCommit();
}
return false;
}
template <class Impl>
bool
ROB<Impl>::canCommit()
{
//@todo: set ActiveThreads through ROB or CPU
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (isHeadReady(tid)) {
return true;
}
}
return false;
}
template <class Impl>
unsigned
ROB<Impl>::numFreeEntries()
{
return numEntries - numInstsInROB;
}
template <class Impl>
unsigned
ROB<Impl>::numFreeEntries(ThreadID tid)
{
return maxEntries[tid] - threadEntries[tid];
}
template <class Impl>
void
ROB<Impl>::doSquash(ThreadID tid)
{
robWrites++;
DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n",
tid, squashedSeqNum[tid]);
assert(squashIt[tid] != instList[tid].end());
if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
squashIt[tid] = instList[tid].end();
doneSquashing[tid] = true;
return;
}
bool robTailUpdate = false;
for (int numSquashed = 0;
numSquashed < squashWidth &&
squashIt[tid] != instList[tid].end() &&
(*squashIt[tid])->seqNum > squashedSeqNum[tid];
++numSquashed)
{
DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %s, seq num %i.\n",
(*squashIt[tid])->threadNumber,
(*squashIt[tid])->pcState(),
(*squashIt[tid])->seqNum);
// Mark the instruction as squashed, and ready to commit so that
// it can drain out of the pipeline.
(*squashIt[tid])->setSquashed();
(*squashIt[tid])->setCanCommit();
if (squashIt[tid] == instList[tid].begin()) {
DPRINTF(ROB, "Reached head of instruction list while "
"squashing.\n");
squashIt[tid] = instList[tid].end();
doneSquashing[tid] = true;
return;
}
InstIt tail_thread = instList[tid].end();
tail_thread--;
if ((*squashIt[tid]) == (*tail_thread))
robTailUpdate = true;
squashIt[tid]--;
}
// Check if ROB is done squashing.
if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) {
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
tid);
squashIt[tid] = instList[tid].end();
doneSquashing[tid] = true;
}
if (robTailUpdate) {
updateTail();
}
}
template <class Impl>
void
ROB<Impl>::updateHead()
{
DynInstPtr head_inst;
InstSeqNum lowest_num = 0;
bool first_valid = true;
// @todo: set ActiveThreads through ROB or CPU
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (instList[tid].empty())
continue;
if (first_valid) {
head = instList[tid].begin();
lowest_num = (*head)->seqNum;
first_valid = false;
continue;
}
InstIt head_thread = instList[tid].begin();
DynInstPtr head_inst = (*head_thread);
assert(head_inst != 0);
if (head_inst->seqNum < lowest_num) {
head = head_thread;
lowest_num = head_inst->seqNum;
}
}
if (first_valid) {
head = instList[0].end();
}
}
template <class Impl>
void
ROB<Impl>::updateTail()
{
tail = instList[0].end();
bool first_valid = true;
list<ThreadID>::iterator threads = activeThreads->begin();
list<ThreadID>::iterator end = activeThreads->end();
while (threads != end) {
ThreadID tid = *threads++;
if (instList[tid].empty()) {
continue;
}
// If this is the first valid then assign w/out
// comparison
if (first_valid) {
tail = instList[tid].end();
tail--;
first_valid = false;
continue;
}
// Assign new tail if this thread's tail is younger
// than our current "tail high"
InstIt tail_thread = instList[tid].end();
tail_thread--;
if ((*tail_thread)->seqNum > (*tail)->seqNum) {
tail = tail_thread;
}
}
}
template <class Impl>
void
ROB<Impl>::squash(InstSeqNum squash_num, ThreadID tid)
{
if (isEmpty()) {
DPRINTF(ROB, "Does not need to squash due to being empty "
"[sn:%i]\n",
squash_num);
return;
}
DPRINTF(ROB, "Starting to squash within the ROB.\n");
robStatus[tid] = ROBSquashing;
doneSquashing[tid] = false;
squashedSeqNum[tid] = squash_num;
if (!instList[tid].empty()) {
InstIt tail_thread = instList[tid].end();
tail_thread--;
squashIt[tid] = tail_thread;
doSquash(tid);
}
}
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::readHeadInst(ThreadID tid)
{
if (threadEntries[tid] != 0) {
InstIt head_thread = instList[tid].begin();
assert((*head_thread)->isInROB()==true);
return *head_thread;
} else {
return dummyInst;
}
}
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::readTailInst(ThreadID tid)
{
InstIt tail_thread = instList[tid].end();
tail_thread--;
return *tail_thread;
}
template <class Impl>
void
ROB<Impl>::regStats()
{
using namespace Stats;
robReads
.name(name() + ".rob_reads")
.desc("The number of ROB reads");
robWrites
.name(name() + ".rob_writes")
.desc("The number of ROB writes");
}
template <class Impl>
typename Impl::DynInstPtr
ROB<Impl>::findInst(ThreadID tid, InstSeqNum squash_inst)
{
for (InstIt it = instList[tid].begin(); it != instList[tid].end(); it++) {
if ((*it)->seqNum == squash_inst) {
return *it;
}
}
return NULL;
}

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "base/misc.hh"
#include "cpu/o3/sat_counter.hh"
SatCounter::SatCounter()
: initialVal(0), counter(0)
{
}
SatCounter::SatCounter(unsigned bits)
: initialVal(0), maxVal((1 << bits) - 1), counter(0)
{
}
SatCounter::SatCounter(unsigned bits, uint8_t initial_val)
: initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val)
{
// Check to make sure initial value doesn't exceed the max counter value.
if (initial_val > maxVal) {
fatal("BP: Initial counter value exceeds max size.");
}
}
void
SatCounter::setBits(unsigned bits)
{
maxVal = (1 << bits) - 1;
}

View File

@ -0,0 +1,117 @@
/*
* Copyright (c) 2005-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_SAT_COUNTER_HH__
#define __CPU_O3_SAT_COUNTER_HH__
#include "base/misc.hh"
#include "base/types.hh"
/**
* Private counter class for the internal saturating counters.
* Implements an n bit saturating counter and provides methods to
* increment, decrement, and read it.
* @todo Consider making this something that more closely mimics a
* built in class so you can use ++ or --.
*/
class SatCounter
{
public:
/**
* Constructor for the counter.
*/
SatCounter()
: initialVal(0), counter(0)
{ }
/**
* Constructor for the counter.
* @param bits How many bits the counter will have.
*/
SatCounter(unsigned bits)
: initialVal(0), maxVal((1 << bits) - 1), counter(0)
{ }
/**
* Constructor for the counter.
* @param bits How many bits the counter will have.
* @param initial_val Starting value for each counter.
*/
SatCounter(unsigned bits, uint8_t initial_val)
: initialVal(initial_val), maxVal((1 << bits) - 1),
counter(initial_val)
{
// Check to make sure initial value doesn't exceed the max
// counter value.
if (initial_val > maxVal) {
fatal("BP: Initial counter value exceeds max size.");
}
}
/**
* Sets the number of bits.
*/
void setBits(unsigned bits) { maxVal = (1 << bits) - 1; }
void reset() { counter = initialVal; }
/**
* Increments the counter's current value.
*/
void increment()
{
if (counter < maxVal) {
++counter;
}
}
/**
* Decrements the counter's current value.
*/
void decrement()
{
if (counter > 0) {
--counter;
}
}
/**
* Read the counter's value.
*/
const uint8_t read() const
{ return counter; }
private:
uint8_t initialVal;
uint8_t maxVal;
uint8_t counter;
};
#endif // __CPU_O3_SAT_COUNTER_HH__

View File

@ -0,0 +1,131 @@
/*
* Copyright (c) 2005-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Korey Sewell
* Kevin Lim
*/
#include "config/the_isa.hh"
#include "cpu/o3/scoreboard.hh"
#include "debug/Scoreboard.hh"
Scoreboard::Scoreboard(unsigned activeThreads,
unsigned _numLogicalIntRegs,
unsigned _numPhysicalIntRegs,
unsigned _numLogicalFloatRegs,
unsigned _numPhysicalFloatRegs,
unsigned _numMiscRegs,
unsigned _zeroRegIdx)
: numLogicalIntRegs(_numLogicalIntRegs),
numPhysicalIntRegs(_numPhysicalIntRegs),
numLogicalFloatRegs(_numLogicalFloatRegs),
numPhysicalFloatRegs(_numPhysicalFloatRegs),
numMiscRegs(_numMiscRegs),
zeroRegIdx(_zeroRegIdx)
{
//Get Register Sizes
numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs;
numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs;
//Resize scoreboard appropriately
resize(numPhysicalRegs + (numMiscRegs * activeThreads));
//Initialize values
for (int i=0; i < numLogicalIntRegs * activeThreads; i++) {
assert(indexInBounds(i));
regScoreBoard[i] = 1;
}
for (int i= numPhysicalIntRegs;
i < numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads);
i++) {
assert(indexInBounds(i));
regScoreBoard[i] = 1;
}
for (int i = numPhysicalRegs;
i < numPhysicalRegs + (numMiscRegs * activeThreads);
i++) {
assert(indexInBounds(i));
regScoreBoard[i] = 1;
}
}
std::string
Scoreboard::name() const
{
return "cpu.scoreboard";
}
bool
Scoreboard::getReg(PhysRegIndex phys_reg)
{
#if THE_ISA == ALPHA_ISA
// Always ready if int or fp zero reg.
if (phys_reg == zeroRegIdx ||
phys_reg == (zeroRegIdx + numPhysicalIntRegs)) {
return 1;
}
#else
// Always ready if int zero reg.
if (phys_reg == zeroRegIdx) {
return 1;
}
#endif
assert(indexInBounds(phys_reg));
return regScoreBoard[phys_reg];
}
void
Scoreboard::setReg(PhysRegIndex phys_reg)
{
DPRINTF(Scoreboard, "Setting reg %i as ready\n", phys_reg);
assert(indexInBounds(phys_reg));
regScoreBoard[phys_reg] = 1;
}
void
Scoreboard::unsetReg(PhysRegIndex ready_reg)
{
#if THE_ISA == ALPHA_ISA
if (ready_reg == zeroRegIdx ||
ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
// Don't do anything if int or fp zero reg.
return;
}
#else
if (ready_reg == zeroRegIdx) {
// Don't do anything if int zero reg.
return;
}
#endif
assert(indexInBounds(ready_reg));
regScoreBoard[ready_reg] = 0;
}

View File

@ -0,0 +1,131 @@
/*
* Copyright (c) 2005-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Korey Sewell
* Kevin Lim
*/
#ifndef __CPU_O3_SCOREBOARD_HH__
#define __CPU_O3_SCOREBOARD_HH__
#include <iostream>
#include <utility>
#include <vector>
#include "base/trace.hh"
#include "cpu/o3/comm.hh"
/**
* Implements a simple scoreboard to track which registers are ready.
* This class assumes that the fp registers start, index wise, right after
* the integer registers. The misc. registers start, index wise, right after
* the fp registers.
* @todo: Fix up handling of the zero register in case the decoder does not
* automatically make insts that write the zero register into nops.
*/
class Scoreboard
{
public:
/** Constructs a scoreboard.
* @param activeThreads The number of active threads.
* @param _numLogicalIntRegs Number of logical integer registers.
* @param _numPhysicalIntRegs Number of physical integer registers.
* @param _numLogicalFloatRegs Number of logical fp registers.
* @param _numPhysicalFloatRegs Number of physical fp registers.
* @param _numMiscRegs Number of miscellaneous registers.
* @param _zeroRegIdx Index of the zero register.
*/
Scoreboard(unsigned activeThreads,
unsigned _numLogicalIntRegs,
unsigned _numPhysicalIntRegs,
unsigned _numLogicalFloatRegs,
unsigned _numPhysicalFloatRegs,
unsigned _numMiscRegs,
unsigned _zeroRegIdx);
/** Destructor. */
~Scoreboard() {}
/** Returns the name of the scoreboard. */
std::string name() const;
/** Checks if the register is ready. */
bool getReg(PhysRegIndex ready_reg);
/** Sets the register as ready. */
void setReg(PhysRegIndex phys_reg);
/** Sets the register as not ready. */
void unsetReg(PhysRegIndex ready_reg);
private:
/** Scoreboard of physical integer registers, saying whether or not they
* are ready.
*/
std::vector<bool> regScoreBoard;
/** Number of logical integer registers. */
int numLogicalIntRegs;
/** Number of physical integer registers. */
int numPhysicalIntRegs;
/** Number of logical floating point registers. */
int numLogicalFloatRegs;
/** Number of physical floating point registers. */
int numPhysicalFloatRegs;
/** Number of miscellaneous registers. */
int numMiscRegs;
/** Number of logical integer + float registers. */
int numLogicalRegs;
/** Number of physical integer + float registers. */
int numPhysicalRegs;
/** The logical index of the zero register. */
int zeroRegIdx;
int currentSize;
void
resize(int newSize)
{
currentSize = newSize;
regScoreBoard.resize(newSize);
}
bool
indexInBounds(int idx)
{
return idx < currentSize;
}
};
#endif

View File

@ -0,0 +1,367 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "base/intmath.hh"
#include "base/misc.hh"
#include "base/trace.hh"
#include "cpu/o3/store_set.hh"
#include "debug/StoreSet.hh"
StoreSet::StoreSet(uint64_t clear_period, int _SSIT_size, int _LFST_size)
: clearPeriod(clear_period), SSITSize(_SSIT_size), LFSTSize(_LFST_size)
{
DPRINTF(StoreSet, "StoreSet: Creating store set object.\n");
DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
SSITSize, LFSTSize);
if (!isPowerOf2(SSITSize)) {
fatal("Invalid SSIT size!\n");
}
SSIT.resize(SSITSize);
validSSIT.resize(SSITSize);
for (int i = 0; i < SSITSize; ++i)
validSSIT[i] = false;
if (!isPowerOf2(LFSTSize)) {
fatal("Invalid LFST size!\n");
}
LFST.resize(LFSTSize);
validLFST.resize(LFSTSize);
for (int i = 0; i < LFSTSize; ++i) {
validLFST[i] = false;
LFST[i] = 0;
}
indexMask = SSITSize - 1;
offsetBits = 2;
memOpsPred = 0;
}
StoreSet::~StoreSet()
{
}
void
StoreSet::init(uint64_t clear_period, int _SSIT_size, int _LFST_size)
{
SSITSize = _SSIT_size;
LFSTSize = _LFST_size;
clearPeriod = clear_period;
DPRINTF(StoreSet, "StoreSet: Creating store set object.\n");
DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
SSITSize, LFSTSize);
SSIT.resize(SSITSize);
validSSIT.resize(SSITSize);
for (int i = 0; i < SSITSize; ++i)
validSSIT[i] = false;
LFST.resize(LFSTSize);
validLFST.resize(LFSTSize);
for (int i = 0; i < LFSTSize; ++i) {
validLFST[i] = false;
LFST[i] = 0;
}
indexMask = SSITSize - 1;
offsetBits = 2;
memOpsPred = 0;
}
void
StoreSet::violation(Addr store_PC, Addr load_PC)
{
int load_index = calcIndex(load_PC);
int store_index = calcIndex(store_PC);
assert(load_index < SSITSize && store_index < SSITSize);
bool valid_load_SSID = validSSIT[load_index];
bool valid_store_SSID = validSSIT[store_index];
if (!valid_load_SSID && !valid_store_SSID) {
// Calculate a new SSID here.
SSID new_set = calcSSID(load_PC);
validSSIT[load_index] = true;
SSIT[load_index] = new_set;
validSSIT[store_index] = true;
SSIT[store_index] = new_set;
assert(new_set < LFSTSize);
DPRINTF(StoreSet, "StoreSet: Neither load nor store had a valid "
"storeset, creating a new one: %i for load %#x, store %#x\n",
new_set, load_PC, store_PC);
} else if (valid_load_SSID && !valid_store_SSID) {
SSID load_SSID = SSIT[load_index];
validSSIT[store_index] = true;
SSIT[store_index] = load_SSID;
assert(load_SSID < LFSTSize);
DPRINTF(StoreSet, "StoreSet: Load had a valid store set. Adding "
"store to that set: %i for load %#x, store %#x\n",
load_SSID, load_PC, store_PC);
} else if (!valid_load_SSID && valid_store_SSID) {
SSID store_SSID = SSIT[store_index];
validSSIT[load_index] = true;
SSIT[load_index] = store_SSID;
DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for "
"load %#x, store %#x\n",
store_SSID, load_PC, store_PC);
} else {
SSID load_SSID = SSIT[load_index];
SSID store_SSID = SSIT[store_index];
assert(load_SSID < LFSTSize && store_SSID < LFSTSize);
// The store set with the lower number wins
if (store_SSID > load_SSID) {
SSIT[store_index] = load_SSID;
DPRINTF(StoreSet, "StoreSet: Load had smaller store set: %i; "
"for load %#x, store %#x\n",
load_SSID, load_PC, store_PC);
} else {
SSIT[load_index] = store_SSID;
DPRINTF(StoreSet, "StoreSet: Store had smaller store set: %i; "
"for load %#x, store %#x\n",
store_SSID, load_PC, store_PC);
}
}
}
void
StoreSet::checkClear()
{
memOpsPred++;
if (memOpsPred > clearPeriod) {
DPRINTF(StoreSet, "Wiping predictor state beacuse %d ld/st executed\n",
clearPeriod);
memOpsPred = 0;
clear();
}
}
void
StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num)
{
checkClear();
// Does nothing.
return;
}
void
StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid)
{
int index = calcIndex(store_PC);
int store_SSID;
checkClear();
assert(index < SSITSize);
if (!validSSIT[index]) {
// Do nothing if there's no valid entry.
return;
} else {
store_SSID = SSIT[index];
assert(store_SSID < LFSTSize);
// Update the last store that was fetched with the current one.
LFST[store_SSID] = store_seq_num;
validLFST[store_SSID] = 1;
storeList[store_seq_num] = store_SSID;
DPRINTF(StoreSet, "Store %#x updated the LFST, SSID: %i\n",
store_PC, store_SSID);
}
}
InstSeqNum
StoreSet::checkInst(Addr PC)
{
int index = calcIndex(PC);
int inst_SSID;
assert(index < SSITSize);
if (!validSSIT[index]) {
DPRINTF(StoreSet, "Inst %#x with index %i had no SSID\n",
PC, index);
// Return 0 if there's no valid entry.
return 0;
} else {
inst_SSID = SSIT[index];
assert(inst_SSID < LFSTSize);
if (!validLFST[inst_SSID]) {
DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no "
"dependency\n", PC, index, inst_SSID);
return 0;
} else {
DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had LFST "
"inum of %i\n", PC, index, inst_SSID, LFST[inst_SSID]);
return LFST[inst_SSID];
}
}
}
void
StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store)
{
// This only is updated upon a store being issued.
if (!is_store) {
return;
}
int index = calcIndex(issued_PC);
int store_SSID;
assert(index < SSITSize);
SeqNumMapIt store_list_it = storeList.find(issued_seq_num);
if (store_list_it != storeList.end()) {
storeList.erase(store_list_it);
}
// Make sure the SSIT still has a valid entry for the issued store.
if (!validSSIT[index]) {
return;
}
store_SSID = SSIT[index];
assert(store_SSID < LFSTSize);
// If the last fetched store in the store set refers to the store that
// was just issued, then invalidate the entry.
if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) {
DPRINTF(StoreSet, "StoreSet: store invalidated itself in LFST.\n");
validLFST[store_SSID] = false;
}
}
void
StoreSet::squash(InstSeqNum squashed_num, ThreadID tid)
{
DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n",
squashed_num);
int idx;
SeqNumMapIt store_list_it = storeList.begin();
//@todo:Fix to only delete from correct thread
while (!storeList.empty()) {
idx = (*store_list_it).second;
if ((*store_list_it).first <= squashed_num) {
break;
}
bool younger = LFST[idx] > squashed_num;
if (validLFST[idx] && younger) {
DPRINTF(StoreSet, "Squashed [sn:%lli]\n", LFST[idx]);
validLFST[idx] = false;
storeList.erase(store_list_it++);
} else if (!validLFST[idx] && younger) {
storeList.erase(store_list_it++);
}
}
}
void
StoreSet::clear()
{
for (int i = 0; i < SSITSize; ++i) {
validSSIT[i] = false;
}
for (int i = 0; i < LFSTSize; ++i) {
validLFST[i] = false;
}
storeList.clear();
}
void
StoreSet::dump()
{
cprintf("storeList.size(): %i\n", storeList.size());
SeqNumMapIt store_list_it = storeList.begin();
int num = 0;
while (store_list_it != storeList.end()) {
cprintf("%i: [sn:%lli] SSID:%i\n",
num, (*store_list_it).first, (*store_list_it).second);
num++;
store_list_it++;
}
}

View File

@ -0,0 +1,160 @@
/*
* Copyright (c) 2004-2005 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_STORE_SET_HH__
#define __CPU_O3_STORE_SET_HH__
#include <list>
#include <map>
#include <utility>
#include <vector>
#include "base/types.hh"
#include "cpu/inst_seq.hh"
struct ltseqnum {
bool operator()(const InstSeqNum &lhs, const InstSeqNum &rhs) const
{
return lhs > rhs;
}
};
/**
* Implements a store set predictor for determining if memory
* instructions are dependent upon each other. See paper "Memory
* Dependence Prediction using Store Sets" by Chrysos and Emer. SSID
* stands for Store Set ID, SSIT stands for Store Set ID Table, and
* LFST is Last Fetched Store Table.
*/
class StoreSet
{
public:
typedef unsigned SSID;
public:
/** Default constructor. init() must be called prior to use. */
StoreSet() { };
/** Creates store set predictor with given table sizes. */
StoreSet(uint64_t clear_period, int SSIT_size, int LFST_size);
/** Default destructor. */
~StoreSet();
/** Initializes the store set predictor with the given table sizes. */
void init(uint64_t clear_period, int SSIT_size, int LFST_size);
/** Records a memory ordering violation between the younger load
* and the older store. */
void violation(Addr store_PC, Addr load_PC);
/** Clears the store set predictor every so often so that all the
* entries aren't used and stores are constantly predicted as
* conflicting.
*/
void checkClear();
/** Inserts a load into the store set predictor. This does nothing but
* is included in case other predictors require a similar function.
*/
void insertLoad(Addr load_PC, InstSeqNum load_seq_num);
/** Inserts a store into the store set predictor. Updates the
* LFST if the store has a valid SSID. */
void insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid);
/** Checks if the instruction with the given PC is dependent upon
* any store. @return Returns the sequence number of the store
* instruction this PC is dependent upon. Returns 0 if none.
*/
InstSeqNum checkInst(Addr PC);
/** Records this PC/sequence number as issued. */
void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store);
/** Squashes for a specific thread until the given sequence number. */
void squash(InstSeqNum squashed_num, ThreadID tid);
/** Resets all tables. */
void clear();
/** Debug function to dump the contents of the store list. */
void dump();
private:
/** Calculates the index into the SSIT based on the PC. */
inline int calcIndex(Addr PC)
{ return (PC >> offsetBits) & indexMask; }
/** Calculates a Store Set ID based on the PC. */
inline SSID calcSSID(Addr PC)
{ return ((PC ^ (PC >> 10)) % LFSTSize); }
/** The Store Set ID Table. */
std::vector<SSID> SSIT;
/** Bit vector to tell if the SSIT has a valid entry. */
std::vector<bool> validSSIT;
/** Last Fetched Store Table. */
std::vector<InstSeqNum> LFST;
/** Bit vector to tell if the LFST has a valid entry. */
std::vector<bool> validLFST;
/** Map of stores that have been inserted into the store set, but
* not yet issued or squashed.
*/
std::map<InstSeqNum, int, ltseqnum> storeList;
typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt;
/** Number of loads/stores to process before wiping predictor so all
* entries don't get saturated
*/
uint64_t clearPeriod;
/** Store Set ID Table size, in entries. */
int SSITSize;
/** Last Fetched Store Table size, in entries. */
int LFSTSize;
/** Mask to obtain the index. */
int indexMask;
// HACK: Hardcoded for now.
int offsetBits;
/** Number of memory operations predicted since last clear of predictor */
int memOpsPred;
};
#endif // __CPU_O3_STORE_SET_HH__

View File

@ -0,0 +1,36 @@
/*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#include "cpu/o3/impl.hh"
#include "cpu/o3/thread_context.hh"
#include "cpu/o3/thread_context_impl.hh"
template class O3ThreadContext<O3CPUImpl>;

View File

@ -0,0 +1,263 @@
/*
* Copyright (c) 2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_THREAD_CONTEXT_HH__
#define __CPU_O3_THREAD_CONTEXT_HH__
#include "config/the_isa.hh"
#include "cpu/o3/isa_specific.hh"
#include "cpu/thread_context.hh"
class EndQuiesceEvent;
namespace Kernel {
class Statistics;
}
/**
* Derived ThreadContext class for use with the O3CPU. It
* provides the interface for any external objects to access a
* single thread's state and some general CPU state. Any time
* external objects try to update state through this interface,
* the CPU will create an event to squash all in-flight
* instructions in order to ensure state is maintained correctly.
* It must be defined specifically for the O3CPU because
* not all architectural state is located within the O3ThreadState
* (such as the commit PC, and registers), and specific actions
* must be taken when using this interface (such as squashing all
* in-flight instructions when doing a write to this interface).
*/
template <class Impl>
class O3ThreadContext : public ThreadContext
{
public:
typedef typename Impl::O3CPU O3CPU;
/** Pointer to the CPU. */
O3CPU *cpu;
/** Pointer to the thread state that this TC corrseponds to. */
O3ThreadState<Impl> *thread;
/** Returns a pointer to the ITB. */
TheISA::TLB *getITBPtr() { return cpu->itb; }
/** Returns a pointer to the DTB. */
TheISA::TLB *getDTBPtr() { return cpu->dtb; }
CheckerCPU *getCheckerCpuPtr() { return NULL; }
TheISA::Decoder *
getDecoderPtr()
{
return cpu->fetch.decoder[thread->threadId()];
}
/** Returns a pointer to this CPU. */
virtual BaseCPU *getCpuPtr() { return cpu; }
/** Reads this CPU's ID. */
virtual int cpuId() { return cpu->cpuId(); }
virtual int contextId() { return thread->contextId(); }
virtual void setContextId(int id) { thread->setContextId(id); }
/** Returns this thread's ID number. */
virtual int threadId() { return thread->threadId(); }
virtual void setThreadId(int id) { return thread->setThreadId(id); }
/** Returns a pointer to the system. */
virtual System *getSystemPtr() { return cpu->system; }
/** Returns a pointer to this thread's kernel statistics. */
virtual TheISA::Kernel::Statistics *getKernelStats()
{ return thread->kernelStats; }
/** Returns a pointer to this thread's process. */
virtual Process *getProcessPtr() { return thread->getProcessPtr(); }
virtual PortProxy &getPhysProxy() { return thread->getPhysProxy(); }
virtual FSTranslatingPortProxy &getVirtProxy();
virtual void initMemProxies(ThreadContext *tc)
{ thread->initMemProxies(tc); }
virtual SETranslatingPortProxy &getMemProxy()
{ return thread->getMemProxy(); }
/** Returns this thread's status. */
virtual Status status() const { return thread->status(); }
/** Sets this thread's status. */
virtual void setStatus(Status new_status)
{ thread->setStatus(new_status); }
/** Set the status to Active. Optional delay indicates number of
* cycles to wait before beginning execution. */
virtual void activate(int delay = 1);
/** Set the status to Suspended. */
virtual void suspend(int delay = 0);
/** Set the status to Halted. */
virtual void halt(int delay = 0);
/** Dumps the function profiling information.
* @todo: Implement.
*/
virtual void dumpFuncProfile();
/** Takes over execution of a thread from another CPU. */
virtual void takeOverFrom(ThreadContext *old_context);
/** Registers statistics associated with this TC. */
virtual void regStats(const std::string &name);
/** Serializes state. */
virtual void serialize(std::ostream &os);
/** Unserializes state. */
virtual void unserialize(Checkpoint *cp, const std::string &section);
/** Reads the last tick that this thread was activated on. */
virtual Tick readLastActivate();
/** Reads the last tick that this thread was suspended on. */
virtual Tick readLastSuspend();
/** Clears the function profiling information. */
virtual void profileClear();
/** Samples the function profiling information. */
virtual void profileSample();
/** Copies the architectural registers from another TC into this TC. */
virtual void copyArchRegs(ThreadContext *tc);
/** Resets all architectural registers to 0. */
virtual void clearArchRegs();
/** Reads an integer register. */
virtual uint64_t readIntReg(int reg_idx);
virtual FloatReg readFloatReg(int reg_idx);
virtual FloatRegBits readFloatRegBits(int reg_idx);
/** Sets an integer register to a value. */
virtual void setIntReg(int reg_idx, uint64_t val);
virtual void setFloatReg(int reg_idx, FloatReg val);
virtual void setFloatRegBits(int reg_idx, FloatRegBits val);
/** Reads this thread's PC state. */
virtual TheISA::PCState pcState()
{ return cpu->pcState(thread->threadId()); }
/** Sets this thread's PC state. */
virtual void pcState(const TheISA::PCState &val);
virtual void pcStateNoRecord(const TheISA::PCState &val);
/** Reads this thread's PC. */
virtual Addr instAddr()
{ return cpu->instAddr(thread->threadId()); }
/** Reads this thread's next PC. */
virtual Addr nextInstAddr()
{ return cpu->nextInstAddr(thread->threadId()); }
/** Reads this thread's next PC. */
virtual MicroPC microPC()
{ return cpu->microPC(thread->threadId()); }
/** Reads a miscellaneous register. */
virtual MiscReg readMiscRegNoEffect(int misc_reg)
{ return cpu->readMiscRegNoEffect(misc_reg, thread->threadId()); }
/** Reads a misc. register, including any side-effects the
* read might have as defined by the architecture. */
virtual MiscReg readMiscReg(int misc_reg)
{ return cpu->readMiscReg(misc_reg, thread->threadId()); }
/** Sets a misc. register. */
virtual void setMiscRegNoEffect(int misc_reg, const MiscReg &val);
/** Sets a misc. register, including any side-effects the
* write might have as defined by the architecture. */
virtual void setMiscReg(int misc_reg, const MiscReg &val);
virtual int flattenIntIndex(int reg);
virtual int flattenFloatIndex(int reg);
/** Returns the number of consecutive store conditional failures. */
// @todo: Figure out where these store cond failures should go.
virtual unsigned readStCondFailures()
{ return thread->storeCondFailures; }
/** Sets the number of consecutive store conditional failures. */
virtual void setStCondFailures(unsigned sc_failures)
{ thread->storeCondFailures = sc_failures; }
// Only really makes sense for old CPU model. Lots of code
// outside the CPU still checks this function, so it will
// always return false to keep everything working.
/** Checks if the thread is misspeculating. Because it is
* very difficult to determine if the thread is
* misspeculating, this is set as false. */
virtual bool misspeculating() { return false; }
/** Executes a syscall in SE mode. */
virtual void syscall(int64_t callnum)
{ return cpu->syscall(callnum, thread->threadId()); }
/** Reads the funcExeInst counter. */
virtual Counter readFuncExeInst() { return thread->funcExeInst; }
/** Returns pointer to the quiesce event. */
virtual EndQuiesceEvent *getQuiesceEvent()
{
return this->thread->quiesceEvent;
}
};
#endif

View File

@ -0,0 +1,348 @@
/*
* Copyright (c) 2010-2011 ARM Limited
* All rights reserved
*
* The license below extends only to copyright in the software and shall
* not be construed as granting a license to any other intellectual
* property including but not limited to intellectual property relating
* to a hardware implementation of the functionality of the software
* licensed hereunder. You may use the software subject to the license
* terms below provided that you ensure that this notice is replicated
* unmodified and in its entirety in all distributions of the software,
* modified or unmodified, in source code or in binary form.
*
* Copyright (c) 2004-2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
* Korey Sewell
*/
#include "arch/kernel_stats.hh"
#include "arch/registers.hh"
#include "config/the_isa.hh"
#include "cpu/o3/thread_context.hh"
#include "cpu/quiesce_event.hh"
#include "debug/O3CPU.hh"
template <class Impl>
FSTranslatingPortProxy&
O3ThreadContext<Impl>::getVirtProxy()
{
return thread->getVirtProxy();
}
template <class Impl>
void
O3ThreadContext<Impl>::dumpFuncProfile()
{
thread->dumpFuncProfile();
}
template <class Impl>
void
O3ThreadContext<Impl>::takeOverFrom(ThreadContext *old_context)
{
// some things should already be set up
assert(getSystemPtr() == old_context->getSystemPtr());
assert(getProcessPtr() == old_context->getProcessPtr());
// copy over functional state
setStatus(old_context->status());
copyArchRegs(old_context);
setContextId(old_context->contextId());
setThreadId(old_context->threadId());
if (FullSystem) {
EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent();
if (other_quiesce) {
// Point the quiesce event's TC at this TC so that it wakes up
// the proper CPU.
other_quiesce->tc = this;
}
if (thread->quiesceEvent) {
thread->quiesceEvent->tc = this;
}
// Transfer kernel stats from one CPU to the other.
thread->kernelStats = old_context->getKernelStats();
cpu->lockFlag = false;
} else {
thread->funcExeInst = old_context->readFuncExeInst();
}
old_context->setStatus(ThreadContext::Halted);
thread->inSyscall = false;
thread->trapPending = false;
}
template <class Impl>
void
O3ThreadContext<Impl>::activate(int delay)
{
DPRINTF(O3CPU, "Calling activate on Thread Context %d\n",
threadId());
if (thread->status() == ThreadContext::Active)
return;
thread->lastActivate = curTick();
thread->setStatus(ThreadContext::Active);
// status() == Suspended
cpu->activateContext(thread->threadId(), delay);
}
template <class Impl>
void
O3ThreadContext<Impl>::suspend(int delay)
{
DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n",
threadId());
if (thread->status() == ThreadContext::Suspended)
return;
thread->lastActivate = curTick();
thread->lastSuspend = curTick();
thread->setStatus(ThreadContext::Suspended);
cpu->suspendContext(thread->threadId());
}
template <class Impl>
void
O3ThreadContext<Impl>::halt(int delay)
{
DPRINTF(O3CPU, "Calling halt on Thread Context %d\n",
threadId());
if (thread->status() == ThreadContext::Halted)
return;
thread->setStatus(ThreadContext::Halted);
cpu->haltContext(thread->threadId());
}
template <class Impl>
void
O3ThreadContext<Impl>::regStats(const std::string &name)
{
if (FullSystem) {
thread->kernelStats = new TheISA::Kernel::Statistics(cpu->system);
thread->kernelStats->regStats(name + ".kern");
}
}
template <class Impl>
void
O3ThreadContext<Impl>::serialize(std::ostream &os)
{
if (FullSystem && thread->kernelStats)
thread->kernelStats->serialize(os);
}
template <class Impl>
void
O3ThreadContext<Impl>::unserialize(Checkpoint *cp, const std::string &section)
{
if (FullSystem && thread->kernelStats)
thread->kernelStats->unserialize(cp, section);
}
template <class Impl>
Tick
O3ThreadContext<Impl>::readLastActivate()
{
return thread->lastActivate;
}
template <class Impl>
Tick
O3ThreadContext<Impl>::readLastSuspend()
{
return thread->lastSuspend;
}
template <class Impl>
void
O3ThreadContext<Impl>::profileClear()
{
thread->profileClear();
}
template <class Impl>
void
O3ThreadContext<Impl>::profileSample()
{
thread->profileSample();
}
template <class Impl>
void
O3ThreadContext<Impl>::copyArchRegs(ThreadContext *tc)
{
// Prevent squashing
thread->inSyscall = true;
TheISA::copyRegs(tc, this);
thread->inSyscall = false;
if (!FullSystem)
this->thread->funcExeInst = tc->readFuncExeInst();
}
template <class Impl>
void
O3ThreadContext<Impl>::clearArchRegs()
{
cpu->isa[thread->threadId()].clear();
}
template <class Impl>
uint64_t
O3ThreadContext<Impl>::readIntReg(int reg_idx)
{
reg_idx = cpu->isa[thread->threadId()].flattenIntIndex(reg_idx);
return cpu->readArchIntReg(reg_idx, thread->threadId());
}
template <class Impl>
TheISA::FloatReg
O3ThreadContext<Impl>::readFloatReg(int reg_idx)
{
reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx);
return cpu->readArchFloatReg(reg_idx, thread->threadId());
}
template <class Impl>
TheISA::FloatRegBits
O3ThreadContext<Impl>::readFloatRegBits(int reg_idx)
{
reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx);
return cpu->readArchFloatRegInt(reg_idx, thread->threadId());
}
template <class Impl>
void
O3ThreadContext<Impl>::setIntReg(int reg_idx, uint64_t val)
{
reg_idx = cpu->isa[thread->threadId()].flattenIntIndex(reg_idx);
cpu->setArchIntReg(reg_idx, val, thread->threadId());
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromTC(thread->threadId());
}
}
template <class Impl>
void
O3ThreadContext<Impl>::setFloatReg(int reg_idx, FloatReg val)
{
reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx);
cpu->setArchFloatReg(reg_idx, val, thread->threadId());
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromTC(thread->threadId());
}
}
template <class Impl>
void
O3ThreadContext<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val)
{
reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx);
cpu->setArchFloatRegInt(reg_idx, val, thread->threadId());
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromTC(thread->threadId());
}
}
template <class Impl>
void
O3ThreadContext<Impl>::pcState(const TheISA::PCState &val)
{
cpu->pcState(val, thread->threadId());
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromTC(thread->threadId());
}
}
template <class Impl>
void
O3ThreadContext<Impl>::pcStateNoRecord(const TheISA::PCState &val)
{
cpu->pcState(val, thread->threadId());
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromTC(thread->threadId());
}
}
template <class Impl>
int
O3ThreadContext<Impl>::flattenIntIndex(int reg)
{
return cpu->isa[thread->threadId()].flattenIntIndex(reg);
}
template <class Impl>
int
O3ThreadContext<Impl>::flattenFloatIndex(int reg)
{
return cpu->isa[thread->threadId()].flattenFloatIndex(reg);
}
template <class Impl>
void
O3ThreadContext<Impl>::setMiscRegNoEffect(int misc_reg, const MiscReg &val)
{
cpu->setMiscRegNoEffect(misc_reg, val, thread->threadId());
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromTC(thread->threadId());
}
}
template <class Impl>
void
O3ThreadContext<Impl>::setMiscReg(int misc_reg, const MiscReg &val)
{
cpu->setMiscReg(misc_reg, val, thread->threadId());
// Squash if we're not already in a state update mode.
if (!thread->trapPending && !thread->inSyscall) {
cpu->squashFromTC(thread->threadId());
}
}

View File

@ -0,0 +1,113 @@
/*
* Copyright (c) 2006 The Regents of The University of Michigan
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met: redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer;
* redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution;
* neither the name of the copyright holders nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Authors: Kevin Lim
*/
#ifndef __CPU_O3_THREAD_STATE_HH__
#define __CPU_O3_THREAD_STATE_HH__
#include "base/callback.hh"
#include "base/output.hh"
#include "cpu/thread_context.hh"
#include "cpu/thread_state.hh"
#include "sim/full_system.hh"
#include "sim/sim_exit.hh"
class EndQuiesceEvent;
class Event;
class FunctionalMemory;
class FunctionProfile;
class Process;
class ProfileNode;
/**
* Class that has various thread state, such as the status, the
* current instruction being processed, whether or not the thread has
* a trap pending or is being externally updated, the ThreadContext
* pointer, etc. It also handles anything related to a specific
* thread's process, such as syscalls and checking valid addresses.
*/
template <class Impl>
struct O3ThreadState : public ThreadState {
typedef ThreadContext::Status Status;
typedef typename Impl::O3CPU O3CPU;
private:
/** Pointer to the CPU. */
O3CPU *cpu;
public:
/** Whether or not the thread is currently in syscall mode, and
* thus able to be externally updated without squashing.
*/
bool inSyscall;
/** Whether or not the thread is currently waiting on a trap, and
* thus able to be externally updated without squashing.
*/
bool trapPending;
O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process)
: ThreadState(_cpu, _thread_num, _process),
cpu(_cpu), inSyscall(0), trapPending(0)
{
if (!FullSystem)
return;
if (cpu->params()->profile) {
profile = new FunctionProfile(
cpu->params()->system->kernelSymtab);
Callback *cb =
new MakeCallback<O3ThreadState,
&O3ThreadState::dumpFuncProfile>(this);
registerExitCallback(cb);
}
// let's fill with a dummy node for now so we don't get a segfault
// on the first cycle when there's no node available.
static ProfileNode dummyNode;
profileNode = &dummyNode;
profilePC = 3;
}
/** Pointer to the ThreadContext of this thread. */
ThreadContext *tc;
/** Returns a pointer to the TC of this thread. */
ThreadContext *getTC() { return tc; }
/** Handles the syscall. */
void syscall(int64_t callnum) { process->syscall(callnum, tc); }
void dumpFuncProfile()
{
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
profile->dump(tc, *os);
}
};
#endif // __CPU_O3_THREAD_STATE_HH__