Adding gem5 source to svn.
git-svn-id: https://www4.informatik.uni-erlangen.de/i4svn/danceos/trunk/devel/fail@1819 8c4709b5-6ec9-48aa-a5cd-a96041d1645a
This commit is contained in:
40
simulators/gem5/src/cpu/o3/FUPool.py
Normal file
40
simulators/gem5/src/cpu/o3/FUPool.py
Normal file
@ -0,0 +1,40 @@
|
||||
# Copyright (c) 2006-2007 The Regents of The University of Michigan
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Authors: Kevin Lim
|
||||
|
||||
from m5.SimObject import SimObject
|
||||
from m5.params import *
|
||||
from FuncUnit import *
|
||||
from FuncUnitConfig import *
|
||||
|
||||
class FUPool(SimObject):
|
||||
type = 'FUPool'
|
||||
FUList = VectorParam.FUDesc("list of FU's for this pool")
|
||||
|
||||
class DefaultFUPool(FUPool):
|
||||
FUList = [ IntALU(), IntMultDiv(), FP_ALU(), FP_MultDiv(), ReadPort(),
|
||||
SIMD_Unit(), WritePort(), RdWrPort(), IprPort() ]
|
||||
104
simulators/gem5/src/cpu/o3/FuncUnitConfig.py
Normal file
104
simulators/gem5/src/cpu/o3/FuncUnitConfig.py
Normal file
@ -0,0 +1,104 @@
|
||||
# Copyright (c) 2010 ARM Limited
|
||||
# All rights reserved.
|
||||
#
|
||||
# The license below extends only to copyright in the software and shall
|
||||
# not be construed as granting a license to any other intellectual
|
||||
# property including but not limited to intellectual property relating
|
||||
# to a hardware implementation of the functionality of the software
|
||||
# licensed hereunder. You may use the software subject to the license
|
||||
# terms below provided that you ensure that this notice is replicated
|
||||
# unmodified and in its entirety in all distributions of the software,
|
||||
# modified or unmodified, in source code or in binary form.
|
||||
#
|
||||
# Copyright (c) 2006-2007 The Regents of The University of Michigan
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Authors: Kevin Lim
|
||||
|
||||
from m5.SimObject import SimObject
|
||||
from m5.params import *
|
||||
from FuncUnit import *
|
||||
|
||||
class IntALU(FUDesc):
|
||||
opList = [ OpDesc(opClass='IntAlu') ]
|
||||
count = 6
|
||||
|
||||
class IntMultDiv(FUDesc):
|
||||
opList = [ OpDesc(opClass='IntMult', opLat=3),
|
||||
OpDesc(opClass='IntDiv', opLat=20, issueLat=19) ]
|
||||
count=2
|
||||
|
||||
class FP_ALU(FUDesc):
|
||||
opList = [ OpDesc(opClass='FloatAdd', opLat=2),
|
||||
OpDesc(opClass='FloatCmp', opLat=2),
|
||||
OpDesc(opClass='FloatCvt', opLat=2) ]
|
||||
count = 4
|
||||
|
||||
class FP_MultDiv(FUDesc):
|
||||
opList = [ OpDesc(opClass='FloatMult', opLat=4),
|
||||
OpDesc(opClass='FloatDiv', opLat=12, issueLat=12),
|
||||
OpDesc(opClass='FloatSqrt', opLat=24, issueLat=24) ]
|
||||
count = 2
|
||||
|
||||
class SIMD_Unit(FUDesc):
|
||||
opList = [ OpDesc(opClass='SimdAdd'),
|
||||
OpDesc(opClass='SimdAddAcc'),
|
||||
OpDesc(opClass='SimdAlu'),
|
||||
OpDesc(opClass='SimdCmp'),
|
||||
OpDesc(opClass='SimdCvt'),
|
||||
OpDesc(opClass='SimdMisc'),
|
||||
OpDesc(opClass='SimdMult'),
|
||||
OpDesc(opClass='SimdMultAcc'),
|
||||
OpDesc(opClass='SimdShift'),
|
||||
OpDesc(opClass='SimdShiftAcc'),
|
||||
OpDesc(opClass='SimdSqrt'),
|
||||
OpDesc(opClass='SimdFloatAdd'),
|
||||
OpDesc(opClass='SimdFloatAlu'),
|
||||
OpDesc(opClass='SimdFloatCmp'),
|
||||
OpDesc(opClass='SimdFloatCvt'),
|
||||
OpDesc(opClass='SimdFloatDiv'),
|
||||
OpDesc(opClass='SimdFloatMisc'),
|
||||
OpDesc(opClass='SimdFloatMult'),
|
||||
OpDesc(opClass='SimdFloatMultAcc'),
|
||||
OpDesc(opClass='SimdFloatSqrt') ]
|
||||
count = 4
|
||||
|
||||
class ReadPort(FUDesc):
|
||||
opList = [ OpDesc(opClass='MemRead') ]
|
||||
count = 0
|
||||
|
||||
class WritePort(FUDesc):
|
||||
opList = [ OpDesc(opClass='MemWrite') ]
|
||||
count = 0
|
||||
|
||||
class RdWrPort(FUDesc):
|
||||
opList = [ OpDesc(opClass='MemRead'), OpDesc(opClass='MemWrite') ]
|
||||
count = 4
|
||||
|
||||
class IprPort(FUDesc):
|
||||
opList = [ OpDesc(opClass='IprAccess', opLat = 3, issueLat = 3) ]
|
||||
count = 1
|
||||
|
||||
147
simulators/gem5/src/cpu/o3/O3CPU.py
Normal file
147
simulators/gem5/src/cpu/o3/O3CPU.py
Normal file
@ -0,0 +1,147 @@
|
||||
# Copyright (c) 2005-2007 The Regents of The University of Michigan
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Authors: Kevin Lim
|
||||
|
||||
from m5.defines import buildEnv
|
||||
from m5.params import *
|
||||
from m5.proxy import *
|
||||
from BaseCPU import BaseCPU
|
||||
from FUPool import *
|
||||
from O3Checker import O3Checker
|
||||
|
||||
class DerivO3CPU(BaseCPU):
|
||||
type = 'DerivO3CPU'
|
||||
activity = Param.Unsigned(0, "Initial count")
|
||||
|
||||
cachePorts = Param.Unsigned(200, "Cache Ports")
|
||||
|
||||
decodeToFetchDelay = Param.Unsigned(1, "Decode to fetch delay")
|
||||
renameToFetchDelay = Param.Unsigned(1 ,"Rename to fetch delay")
|
||||
iewToFetchDelay = Param.Unsigned(1, "Issue/Execute/Writeback to fetch "
|
||||
"delay")
|
||||
commitToFetchDelay = Param.Unsigned(1, "Commit to fetch delay")
|
||||
fetchWidth = Param.Unsigned(8, "Fetch width")
|
||||
|
||||
renameToDecodeDelay = Param.Unsigned(1, "Rename to decode delay")
|
||||
iewToDecodeDelay = Param.Unsigned(1, "Issue/Execute/Writeback to decode "
|
||||
"delay")
|
||||
commitToDecodeDelay = Param.Unsigned(1, "Commit to decode delay")
|
||||
fetchToDecodeDelay = Param.Unsigned(1, "Fetch to decode delay")
|
||||
decodeWidth = Param.Unsigned(8, "Decode width")
|
||||
|
||||
iewToRenameDelay = Param.Unsigned(1, "Issue/Execute/Writeback to rename "
|
||||
"delay")
|
||||
commitToRenameDelay = Param.Unsigned(1, "Commit to rename delay")
|
||||
decodeToRenameDelay = Param.Unsigned(1, "Decode to rename delay")
|
||||
renameWidth = Param.Unsigned(8, "Rename width")
|
||||
|
||||
commitToIEWDelay = Param.Unsigned(1, "Commit to "
|
||||
"Issue/Execute/Writeback delay")
|
||||
renameToIEWDelay = Param.Unsigned(2, "Rename to "
|
||||
"Issue/Execute/Writeback delay")
|
||||
issueToExecuteDelay = Param.Unsigned(1, "Issue to execute delay (internal "
|
||||
"to the IEW stage)")
|
||||
dispatchWidth = Param.Unsigned(8, "Dispatch width")
|
||||
issueWidth = Param.Unsigned(8, "Issue width")
|
||||
wbWidth = Param.Unsigned(8, "Writeback width")
|
||||
wbDepth = Param.Unsigned(1, "Writeback depth")
|
||||
fuPool = Param.FUPool(DefaultFUPool(), "Functional Unit pool")
|
||||
|
||||
iewToCommitDelay = Param.Unsigned(1, "Issue/Execute/Writeback to commit "
|
||||
"delay")
|
||||
renameToROBDelay = Param.Unsigned(1, "Rename to reorder buffer delay")
|
||||
commitWidth = Param.Unsigned(8, "Commit width")
|
||||
squashWidth = Param.Unsigned(8, "Squash width")
|
||||
trapLatency = Param.Tick(13, "Trap latency")
|
||||
fetchTrapLatency = Param.Tick(1, "Fetch trap latency")
|
||||
|
||||
backComSize = Param.Unsigned(5, "Time buffer size for backwards communication")
|
||||
forwardComSize = Param.Unsigned(5, "Time buffer size for forward communication")
|
||||
|
||||
predType = Param.String("tournament", "Branch predictor type ('local', 'tournament')")
|
||||
localPredictorSize = Param.Unsigned(2048, "Size of local predictor")
|
||||
localCtrBits = Param.Unsigned(2, "Bits per counter")
|
||||
localHistoryTableSize = Param.Unsigned(2048, "Size of local history table")
|
||||
localHistoryBits = Param.Unsigned(11, "Bits for the local history")
|
||||
globalPredictorSize = Param.Unsigned(8192, "Size of global predictor")
|
||||
globalCtrBits = Param.Unsigned(2, "Bits per counter")
|
||||
globalHistoryBits = Param.Unsigned(13, "Bits of history")
|
||||
choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor")
|
||||
choiceCtrBits = Param.Unsigned(2, "Bits of choice counters")
|
||||
|
||||
BTBEntries = Param.Unsigned(4096, "Number of BTB entries")
|
||||
BTBTagSize = Param.Unsigned(16, "Size of the BTB tags, in bits")
|
||||
|
||||
RASSize = Param.Unsigned(16, "RAS size")
|
||||
|
||||
LQEntries = Param.Unsigned(32, "Number of load queue entries")
|
||||
SQEntries = Param.Unsigned(32, "Number of store queue entries")
|
||||
LSQDepCheckShift = Param.Unsigned(4, "Number of places to shift addr before check")
|
||||
LSQCheckLoads = Param.Bool(True,
|
||||
"Should dependency violations be checked for loads & stores or just stores")
|
||||
store_set_clear_period = Param.Unsigned(250000,
|
||||
"Number of load/store insts before the dep predictor should be invalidated")
|
||||
LFSTSize = Param.Unsigned(1024, "Last fetched store table size")
|
||||
SSITSize = Param.Unsigned(1024, "Store set ID table size")
|
||||
|
||||
numRobs = Param.Unsigned(1, "Number of Reorder Buffers");
|
||||
|
||||
numPhysIntRegs = Param.Unsigned(256, "Number of physical integer registers")
|
||||
numPhysFloatRegs = Param.Unsigned(256, "Number of physical floating point "
|
||||
"registers")
|
||||
numIQEntries = Param.Unsigned(64, "Number of instruction queue entries")
|
||||
numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries")
|
||||
|
||||
instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by")
|
||||
|
||||
smtNumFetchingThreads = Param.Unsigned(1, "SMT Number of Fetching Threads")
|
||||
smtFetchPolicy = Param.String('SingleThread', "SMT Fetch policy")
|
||||
smtLSQPolicy = Param.String('Partitioned', "SMT LSQ Sharing Policy")
|
||||
smtLSQThreshold = Param.Int(100, "SMT LSQ Threshold Sharing Parameter")
|
||||
smtIQPolicy = Param.String('Partitioned', "SMT IQ Sharing Policy")
|
||||
smtIQThreshold = Param.Int(100, "SMT IQ Threshold Sharing Parameter")
|
||||
smtROBPolicy = Param.String('Partitioned', "SMT ROB Sharing Policy")
|
||||
smtROBThreshold = Param.Int(100, "SMT ROB Threshold Sharing Parameter")
|
||||
smtCommitPolicy = Param.String('RoundRobin', "SMT Commit Policy")
|
||||
|
||||
needsTSO = Param.Bool(buildEnv['TARGET_ISA'] == 'x86',
|
||||
"Enable TSO Memory model")
|
||||
|
||||
def addCheckerCpu(self):
|
||||
if buildEnv['TARGET_ISA'] in ['arm']:
|
||||
from ArmTLB import ArmTLB
|
||||
|
||||
self.checker = O3Checker(workload=self.workload,
|
||||
exitOnError=False,
|
||||
updateOnError=True,
|
||||
warnOnlyOnLoadError=True)
|
||||
self.checker.itb = ArmTLB(size = self.itb.size)
|
||||
self.checker.dtb = ArmTLB(size = self.dtb.size)
|
||||
|
||||
else:
|
||||
print "ERROR: Checker only supported under ARM ISA!"
|
||||
exit(1)
|
||||
40
simulators/gem5/src/cpu/o3/O3Checker.py
Normal file
40
simulators/gem5/src/cpu/o3/O3Checker.py
Normal file
@ -0,0 +1,40 @@
|
||||
# Copyright (c) 2007 The Regents of The University of Michigan
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Authors: Nathan Binkert
|
||||
|
||||
from m5.params import *
|
||||
from BaseCPU import BaseCPU
|
||||
|
||||
class O3Checker(BaseCPU):
|
||||
type = 'O3Checker'
|
||||
exitOnError = Param.Bool(False, "Exit on an error")
|
||||
updateOnError = Param.Bool(False,
|
||||
"Update the checker with the main CPU's state on an error")
|
||||
warnOnlyOnLoadError = Param.Bool(True,
|
||||
"If a load result is incorrect, only print a warning and do not exit")
|
||||
function_trace = Param.Bool(False, "Enable function trace")
|
||||
function_trace_start = Param.Tick(0, "Cycle to start function trace")
|
||||
82
simulators/gem5/src/cpu/o3/SConscript
Executable file
82
simulators/gem5/src/cpu/o3/SConscript
Executable file
@ -0,0 +1,82 @@
|
||||
# -*- mode:python -*-
|
||||
|
||||
# Copyright (c) 2006 The Regents of The University of Michigan
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Authors: Nathan Binkert
|
||||
|
||||
import sys
|
||||
|
||||
Import('*')
|
||||
|
||||
if 'O3CPU' in env['CPU_MODELS'] or 'OzoneCPU' in env['CPU_MODELS']:
|
||||
DebugFlag('CommitRate')
|
||||
DebugFlag('IEW')
|
||||
DebugFlag('IQ')
|
||||
|
||||
if 'O3CPU' in env['CPU_MODELS']:
|
||||
SimObject('FUPool.py')
|
||||
SimObject('FuncUnitConfig.py')
|
||||
SimObject('O3CPU.py')
|
||||
|
||||
Source('base_dyn_inst.cc')
|
||||
Source('bpred_unit.cc')
|
||||
Source('commit.cc')
|
||||
Source('cpu.cc')
|
||||
Source('cpu_builder.cc')
|
||||
Source('decode.cc')
|
||||
Source('dyn_inst.cc')
|
||||
Source('fetch.cc')
|
||||
Source('free_list.cc')
|
||||
Source('fu_pool.cc')
|
||||
Source('iew.cc')
|
||||
Source('inst_queue.cc')
|
||||
Source('lsq.cc')
|
||||
Source('lsq_unit.cc')
|
||||
Source('mem_dep_unit.cc')
|
||||
Source('rename.cc')
|
||||
Source('rename_map.cc')
|
||||
Source('rob.cc')
|
||||
Source('scoreboard.cc')
|
||||
Source('store_set.cc')
|
||||
Source('thread_context.cc')
|
||||
|
||||
DebugFlag('LSQ')
|
||||
DebugFlag('LSQUnit')
|
||||
DebugFlag('MemDepUnit')
|
||||
DebugFlag('O3CPU')
|
||||
DebugFlag('ROB')
|
||||
DebugFlag('Rename')
|
||||
DebugFlag('Scoreboard')
|
||||
DebugFlag('StoreSet')
|
||||
DebugFlag('Writeback')
|
||||
|
||||
CompoundFlag('O3CPUAll', [ 'Fetch', 'Decode', 'Rename', 'IEW', 'Commit',
|
||||
'IQ', 'ROB', 'FreeList', 'LSQ', 'LSQUnit', 'StoreSet', 'MemDepUnit',
|
||||
'DynInst', 'O3CPU', 'Activity', 'Scoreboard', 'Writeback' ])
|
||||
|
||||
SimObject('O3Checker.py')
|
||||
Source('checker_builder.cc')
|
||||
36
simulators/gem5/src/cpu/o3/SConsopts
Normal file
36
simulators/gem5/src/cpu/o3/SConsopts
Normal file
@ -0,0 +1,36 @@
|
||||
# -*- mode:python -*-
|
||||
|
||||
# Copyright (c) 2006 The Regents of The University of Michigan
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met: redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer;
|
||||
# redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution;
|
||||
# neither the name of the copyright holders nor the names of its
|
||||
# contributors may be used to endorse or promote products derived from
|
||||
# this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
# Authors: Nathan Binkert
|
||||
|
||||
Import('*')
|
||||
|
||||
CpuModel('O3CPU', 'o3_cpu_exec.cc',
|
||||
'#include "cpu/o3/isa_specific.hh"',
|
||||
{ 'CPU_exec_context': 'O3DynInst' },
|
||||
default=True)
|
||||
36
simulators/gem5/src/cpu/o3/base_dyn_inst.cc
Normal file
36
simulators/gem5/src/cpu/o3/base_dyn_inst.cc
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "cpu/o3/cpu.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
#include "cpu/base_dyn_inst_impl.hh"
|
||||
|
||||
// Explicit instantiation
|
||||
template class BaseDynInst<O3CPUImpl>;
|
||||
34
simulators/gem5/src/cpu/o3/bpred_unit.cc
Normal file
34
simulators/gem5/src/cpu/o3/bpred_unit.cc
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "cpu/o3/bpred_unit_impl.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
|
||||
template class BPredUnit<O3CPUImpl>;
|
||||
294
simulators/gem5/src/cpu/o3/bpred_unit.hh
Normal file
294
simulators/gem5/src/cpu/o3/bpred_unit.hh
Normal file
@ -0,0 +1,294 @@
|
||||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_BPRED_UNIT_HH__
|
||||
#define __CPU_O3_BPRED_UNIT_HH__
|
||||
|
||||
#include <list>
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "base/types.hh"
|
||||
#include "cpu/pred/2bit_local.hh"
|
||||
#include "cpu/pred/btb.hh"
|
||||
#include "cpu/pred/ras.hh"
|
||||
#include "cpu/pred/tournament.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
|
||||
struct DerivO3CPUParams;
|
||||
|
||||
/**
|
||||
* Basically a wrapper class to hold both the branch predictor
|
||||
* and the BTB.
|
||||
*/
|
||||
template<class Impl>
|
||||
class BPredUnit
|
||||
{
|
||||
private:
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
enum PredType {
|
||||
Local,
|
||||
Tournament
|
||||
};
|
||||
|
||||
PredType predictor;
|
||||
|
||||
const std::string _name;
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* @param params The params object, that has the size of the BP and BTB.
|
||||
*/
|
||||
BPredUnit(DerivO3CPUParams *params);
|
||||
|
||||
const std::string &name() const { return _name; }
|
||||
|
||||
/**
|
||||
* Registers statistics.
|
||||
*/
|
||||
void regStats();
|
||||
|
||||
void switchOut();
|
||||
|
||||
void takeOverFrom();
|
||||
|
||||
/**
|
||||
* Predicts whether or not the instruction is a taken branch, and the
|
||||
* target of the branch if it is taken.
|
||||
* @param inst The branch instruction.
|
||||
* @param PC The predicted PC is passed back through this parameter.
|
||||
* @param tid The thread id.
|
||||
* @return Returns if the branch is taken or not.
|
||||
*/
|
||||
bool predict(DynInstPtr &inst, TheISA::PCState &pc, ThreadID tid);
|
||||
|
||||
// @todo: Rename this function.
|
||||
void BPUncond(void * &bp_history);
|
||||
|
||||
/**
|
||||
* Tells the branch predictor to commit any updates until the given
|
||||
* sequence number.
|
||||
* @param done_sn The sequence number to commit any older updates up until.
|
||||
* @param tid The thread id.
|
||||
*/
|
||||
void update(const InstSeqNum &done_sn, ThreadID tid);
|
||||
|
||||
/**
|
||||
* Squashes all outstanding updates until a given sequence number.
|
||||
* @param squashed_sn The sequence number to squash any younger updates up
|
||||
* until.
|
||||
* @param tid The thread id.
|
||||
*/
|
||||
void squash(const InstSeqNum &squashed_sn, ThreadID tid);
|
||||
|
||||
/**
|
||||
* Squashes all outstanding updates until a given sequence number, and
|
||||
* corrects that sn's update with the proper address and taken/not taken.
|
||||
* @param squashed_sn The sequence number to squash any younger updates up
|
||||
* until.
|
||||
* @param corr_target The correct branch target.
|
||||
* @param actually_taken The correct branch direction.
|
||||
* @param tid The thread id.
|
||||
*/
|
||||
void squash(const InstSeqNum &squashed_sn,
|
||||
const TheISA::PCState &corr_target,
|
||||
bool actually_taken, ThreadID tid);
|
||||
|
||||
/**
|
||||
* @param bp_history Pointer to the history object. The predictor
|
||||
* will need to update any state and delete the object.
|
||||
*/
|
||||
void BPSquash(void *bp_history);
|
||||
|
||||
/**
|
||||
* Looks up a given PC in the BP to see if it is taken or not taken.
|
||||
* @param inst_PC The PC to look up.
|
||||
* @param bp_history Pointer that will be set to an object that
|
||||
* has the branch predictor state associated with the lookup.
|
||||
* @return Whether the branch is taken or not taken.
|
||||
*/
|
||||
bool BPLookup(Addr instPC, void * &bp_history);
|
||||
|
||||
/**
|
||||
* If a branch is not taken, because the BTB address is invalid or missing,
|
||||
* this function sets the appropriate counter in the global and local
|
||||
* predictors to not taken.
|
||||
* @param inst_PC The PC to look up the local predictor.
|
||||
* @param bp_history Pointer that will be set to an object that
|
||||
* has the branch predictor state associated with the lookup.
|
||||
*/
|
||||
void BPBTBUpdate(Addr instPC, void * &bp_history);
|
||||
|
||||
/**
|
||||
* Looks up a given PC in the BTB to see if a matching entry exists.
|
||||
* @param inst_PC The PC to look up.
|
||||
* @return Whether the BTB contains the given PC.
|
||||
*/
|
||||
bool BTBValid(Addr instPC)
|
||||
{ return BTB.valid(instPC, 0); }
|
||||
|
||||
/**
|
||||
* Looks up a given PC in the BTB to get the predicted target.
|
||||
* @param inst_PC The PC to look up.
|
||||
* @return The address of the target of the branch.
|
||||
*/
|
||||
TheISA::PCState BTBLookup(Addr instPC)
|
||||
{ return BTB.lookup(instPC, 0); }
|
||||
|
||||
/**
|
||||
* Updates the BP with taken/not taken information.
|
||||
* @param inst_PC The branch's PC that will be updated.
|
||||
* @param taken Whether the branch was taken or not taken.
|
||||
* @param bp_history Pointer to the branch predictor state that is
|
||||
* associated with the branch lookup that is being updated.
|
||||
* @param squashed Set to true when this function is called during a
|
||||
* squash operation.
|
||||
* @todo Make this update flexible enough to handle a global predictor.
|
||||
*/
|
||||
void BPUpdate(Addr instPC, bool taken, void *bp_history, bool squashed);
|
||||
|
||||
/**
|
||||
* Updates the BTB with the target of a branch.
|
||||
* @param inst_PC The branch's PC that will be updated.
|
||||
* @param target_PC The branch's target that will be added to the BTB.
|
||||
*/
|
||||
void BTBUpdate(Addr instPC, const TheISA::PCState &target)
|
||||
{ BTB.update(instPC, target, 0); }
|
||||
|
||||
void dump();
|
||||
|
||||
private:
|
||||
struct PredictorHistory {
|
||||
/**
|
||||
* Makes a predictor history struct that contains any
|
||||
* information needed to update the predictor, BTB, and RAS.
|
||||
*/
|
||||
PredictorHistory(const InstSeqNum &seq_num, Addr instPC,
|
||||
bool pred_taken, void *bp_history,
|
||||
ThreadID _tid)
|
||||
: seqNum(seq_num), pc(instPC), bpHistory(bp_history), RASTarget(0),
|
||||
RASIndex(0), tid(_tid), predTaken(pred_taken), usedRAS(0),
|
||||
wasCall(0), wasReturn(0), validBTB(0)
|
||||
{}
|
||||
|
||||
bool operator==(const PredictorHistory &entry) const {
|
||||
return this->seqNum == entry.seqNum;
|
||||
}
|
||||
|
||||
/** The sequence number for the predictor history entry. */
|
||||
InstSeqNum seqNum;
|
||||
|
||||
/** The PC associated with the sequence number. */
|
||||
Addr pc;
|
||||
|
||||
/** Pointer to the history object passed back from the branch
|
||||
* predictor. It is used to update or restore state of the
|
||||
* branch predictor.
|
||||
*/
|
||||
void *bpHistory;
|
||||
|
||||
/** The RAS target (only valid if a return). */
|
||||
TheISA::PCState RASTarget;
|
||||
|
||||
/** The RAS index of the instruction (only valid if a call). */
|
||||
unsigned RASIndex;
|
||||
|
||||
/** The thread id. */
|
||||
ThreadID tid;
|
||||
|
||||
/** Whether or not it was predicted taken. */
|
||||
bool predTaken;
|
||||
|
||||
/** Whether or not the RAS was used. */
|
||||
bool usedRAS;
|
||||
|
||||
/** Whether or not the instruction was a call. */
|
||||
bool wasCall;
|
||||
|
||||
/** Whether or not the instruction was a return. */
|
||||
bool wasReturn;
|
||||
/** Whether or not the instruction had a valid BTB entry. */
|
||||
bool validBTB;
|
||||
};
|
||||
|
||||
typedef std::list<PredictorHistory> History;
|
||||
typedef typename History::iterator HistoryIt;
|
||||
|
||||
/**
|
||||
* The per-thread predictor history. This is used to update the predictor
|
||||
* as instructions are committed, or restore it to the proper state after
|
||||
* a squash.
|
||||
*/
|
||||
History predHist[Impl::MaxThreads];
|
||||
|
||||
/** The local branch predictor. */
|
||||
LocalBP *localBP;
|
||||
|
||||
/** The tournament branch predictor. */
|
||||
TournamentBP *tournamentBP;
|
||||
|
||||
/** The BTB. */
|
||||
DefaultBTB BTB;
|
||||
|
||||
/** The per-thread return address stack. */
|
||||
ReturnAddrStack RAS[Impl::MaxThreads];
|
||||
|
||||
/** Stat for number of BP lookups. */
|
||||
Stats::Scalar lookups;
|
||||
/** Stat for number of conditional branches predicted. */
|
||||
Stats::Scalar condPredicted;
|
||||
/** Stat for number of conditional branches predicted incorrectly. */
|
||||
Stats::Scalar condIncorrect;
|
||||
/** Stat for number of BTB lookups. */
|
||||
Stats::Scalar BTBLookups;
|
||||
/** Stat for number of BTB hits. */
|
||||
Stats::Scalar BTBHits;
|
||||
/** Stat for number of times the BTB is correct. */
|
||||
Stats::Scalar BTBCorrect;
|
||||
/** Stat for number of times the RAS is used to get a target. */
|
||||
Stats::Scalar usedRAS;
|
||||
/** Stat for number of times the RAS is incorrect. */
|
||||
Stats::Scalar RASIncorrect;
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_BPRED_UNIT_HH__
|
||||
517
simulators/gem5/src/cpu/o3/bpred_unit_impl.hh
Normal file
517
simulators/gem5/src/cpu/o3/bpred_unit_impl.hh
Normal file
@ -0,0 +1,517 @@
|
||||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "arch/isa_traits.hh"
|
||||
#include "arch/types.hh"
|
||||
#include "arch/utility.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/bpred_unit.hh"
|
||||
#include "debug/Fetch.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
|
||||
template<class Impl>
|
||||
BPredUnit<Impl>::BPredUnit(DerivO3CPUParams *params)
|
||||
: _name(params->name + ".BPredUnit"),
|
||||
BTB(params->BTBEntries,
|
||||
params->BTBTagSize,
|
||||
params->instShiftAmt)
|
||||
{
|
||||
// Setup the selected predictor.
|
||||
if (params->predType == "local") {
|
||||
localBP = new LocalBP(params->localPredictorSize,
|
||||
params->localCtrBits,
|
||||
params->instShiftAmt);
|
||||
predictor = Local;
|
||||
} else if (params->predType == "tournament") {
|
||||
tournamentBP = new TournamentBP(params->localPredictorSize,
|
||||
params->localCtrBits,
|
||||
params->localHistoryTableSize,
|
||||
params->localHistoryBits,
|
||||
params->globalPredictorSize,
|
||||
params->globalHistoryBits,
|
||||
params->globalCtrBits,
|
||||
params->choicePredictorSize,
|
||||
params->choiceCtrBits,
|
||||
params->instShiftAmt);
|
||||
predictor = Tournament;
|
||||
} else {
|
||||
fatal("Invalid BP selected!");
|
||||
}
|
||||
|
||||
for (int i=0; i < Impl::MaxThreads; i++)
|
||||
RAS[i].init(params->RASSize);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::regStats()
|
||||
{
|
||||
lookups
|
||||
.name(name() + ".lookups")
|
||||
.desc("Number of BP lookups")
|
||||
;
|
||||
|
||||
condPredicted
|
||||
.name(name() + ".condPredicted")
|
||||
.desc("Number of conditional branches predicted")
|
||||
;
|
||||
|
||||
condIncorrect
|
||||
.name(name() + ".condIncorrect")
|
||||
.desc("Number of conditional branches incorrect")
|
||||
;
|
||||
|
||||
BTBLookups
|
||||
.name(name() + ".BTBLookups")
|
||||
.desc("Number of BTB lookups")
|
||||
;
|
||||
|
||||
BTBHits
|
||||
.name(name() + ".BTBHits")
|
||||
.desc("Number of BTB hits")
|
||||
;
|
||||
|
||||
BTBCorrect
|
||||
.name(name() + ".BTBCorrect")
|
||||
.desc("Number of correct BTB predictions (this stat may not "
|
||||
"work properly.")
|
||||
;
|
||||
|
||||
usedRAS
|
||||
.name(name() + ".usedRAS")
|
||||
.desc("Number of times the RAS was used to get a target.")
|
||||
;
|
||||
|
||||
RASIncorrect
|
||||
.name(name() + ".RASInCorrect")
|
||||
.desc("Number of incorrect RAS predictions.")
|
||||
;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::switchOut()
|
||||
{
|
||||
// Clear any state upon switch out.
|
||||
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||
squash(0, i);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::takeOverFrom()
|
||||
{
|
||||
// Can reset all predictor state, but it's not necessarily better
|
||||
// than leaving it be.
|
||||
/*
|
||||
for (int i = 0; i < Impl::MaxThreads; ++i)
|
||||
RAS[i].reset();
|
||||
|
||||
BP.reset();
|
||||
BTB.reset();
|
||||
*/
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
BPredUnit<Impl>::predict(DynInstPtr &inst, TheISA::PCState &pc, ThreadID tid)
|
||||
{
|
||||
// See if branch predictor predicts taken.
|
||||
// If so, get its target addr either from the BTB or the RAS.
|
||||
// Save off record of branch stuff so the RAS can be fixed
|
||||
// up once it's done.
|
||||
|
||||
bool pred_taken = false;
|
||||
TheISA::PCState target = pc;
|
||||
|
||||
++lookups;
|
||||
|
||||
void *bp_history = NULL;
|
||||
|
||||
if (inst->isUncondCtrl()) {
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Unconditional control.\n", tid);
|
||||
pred_taken = true;
|
||||
// Tell the BP there was an unconditional branch.
|
||||
BPUncond(bp_history);
|
||||
} else {
|
||||
++condPredicted;
|
||||
pred_taken = BPLookup(pc.instAddr(), bp_history);
|
||||
|
||||
DPRINTF(Fetch, "BranchPred:[tid:%i]: [sn:%i] Branch predictor"
|
||||
" predicted %i for PC %s\n",
|
||||
tid, inst->seqNum, pred_taken, inst->pcState());
|
||||
}
|
||||
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i] Creating prediction history "
|
||||
"for PC %s\n",
|
||||
tid, inst->seqNum, inst->pcState());
|
||||
|
||||
PredictorHistory predict_record(inst->seqNum, pc.instAddr(),
|
||||
pred_taken, bp_history, tid);
|
||||
|
||||
// Now lookup in the BTB or RAS.
|
||||
if (pred_taken) {
|
||||
if (inst->isReturn()) {
|
||||
++usedRAS;
|
||||
predict_record.wasReturn = true;
|
||||
// If it's a function return call, then look up the address
|
||||
// in the RAS.
|
||||
TheISA::PCState rasTop = RAS[tid].top();
|
||||
target = TheISA::buildRetPC(pc, rasTop);
|
||||
|
||||
// Record the top entry of the RAS, and its index.
|
||||
predict_record.usedRAS = true;
|
||||
predict_record.RASIndex = RAS[tid].topIdx();
|
||||
predict_record.RASTarget = rasTop;
|
||||
|
||||
assert(predict_record.RASIndex < 16);
|
||||
|
||||
RAS[tid].pop();
|
||||
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %s is a return, "
|
||||
"RAS predicted target: %s, RAS index: %i.\n",
|
||||
tid, inst->pcState(), target, predict_record.RASIndex);
|
||||
} else {
|
||||
++BTBLookups;
|
||||
|
||||
if (inst->isCall()) {
|
||||
RAS[tid].push(pc);
|
||||
|
||||
// Record that it was a call so that the top RAS entry can
|
||||
// be popped off if the speculation is incorrect.
|
||||
predict_record.wasCall = true;
|
||||
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %s was a "
|
||||
"call, adding %s to the RAS index: %i.\n",
|
||||
tid, inst->pcState(), pc, RAS[tid].topIdx());
|
||||
}
|
||||
|
||||
if (BTB.valid(pc.instAddr(), tid)) {
|
||||
++BTBHits;
|
||||
predict_record.validBTB = true;
|
||||
|
||||
// If it's not a return, use the BTB to get the target addr.
|
||||
target = BTB.lookup(pc.instAddr(), tid);
|
||||
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Instruction %s predicted"
|
||||
" target is %s.\n", tid, inst->pcState(), target);
|
||||
|
||||
} else {
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: BTB doesn't have a "
|
||||
"valid entry.\n",tid);
|
||||
pred_taken = false;
|
||||
// The Direction of the branch predictor is altered because the
|
||||
// BTB did not have an entry
|
||||
// The predictor needs to be updated accordingly
|
||||
if (!inst->isCall() && !inst->isReturn()) {
|
||||
BPBTBUpdate(pc.instAddr(), bp_history);
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]:[sn:%i] BPBTBUpdate"
|
||||
" called for %s\n",
|
||||
tid, inst->seqNum, inst->pcState());
|
||||
} else if (inst->isCall() && !inst->isUncondCtrl()) {
|
||||
RAS[tid].pop();
|
||||
}
|
||||
TheISA::advancePC(target, inst->staticInst);
|
||||
}
|
||||
|
||||
}
|
||||
} else {
|
||||
if (inst->isReturn()) {
|
||||
predict_record.wasReturn = true;
|
||||
}
|
||||
TheISA::advancePC(target, inst->staticInst);
|
||||
}
|
||||
|
||||
pc = target;
|
||||
|
||||
predHist[tid].push_front(predict_record);
|
||||
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: [sn:%i]: History entry added."
|
||||
"predHist.size(): %i\n", tid, inst->seqNum, predHist[tid].size());
|
||||
|
||||
return pred_taken;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::update(const InstSeqNum &done_sn, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Committing branches until "
|
||||
"[sn:%lli].\n", tid, done_sn);
|
||||
|
||||
while (!predHist[tid].empty() &&
|
||||
predHist[tid].back().seqNum <= done_sn) {
|
||||
// Update the branch predictor with the correct results.
|
||||
BPUpdate(predHist[tid].back().pc,
|
||||
predHist[tid].back().predTaken,
|
||||
predHist[tid].back().bpHistory, false);
|
||||
|
||||
predHist[tid].pop_back();
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn, ThreadID tid)
|
||||
{
|
||||
History &pred_hist = predHist[tid];
|
||||
|
||||
while (!pred_hist.empty() &&
|
||||
pred_hist.front().seqNum > squashed_sn) {
|
||||
if (pred_hist.front().usedRAS) {
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS to: %i,"
|
||||
" target: %s.\n", tid,
|
||||
pred_hist.front().RASIndex, pred_hist.front().RASTarget);
|
||||
|
||||
RAS[tid].restore(pred_hist.front().RASIndex,
|
||||
pred_hist.front().RASTarget);
|
||||
} else if(pred_hist.front().wasCall && pred_hist.front().validBTB) {
|
||||
// Was a call but predicated false. Pop RAS here
|
||||
DPRINTF(Fetch, "BranchPred: [tid: %i] Squashing"
|
||||
" Call [sn:%i] PC: %s Popping RAS\n", tid,
|
||||
pred_hist.front().seqNum, pred_hist.front().pc);
|
||||
RAS[tid].pop();
|
||||
}
|
||||
|
||||
// This call should delete the bpHistory.
|
||||
BPSquash(pred_hist.front().bpHistory);
|
||||
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i] "
|
||||
"PC %s.\n", tid, pred_hist.front().seqNum,
|
||||
pred_hist.front().pc);
|
||||
|
||||
pred_hist.pop_front();
|
||||
|
||||
DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n",
|
||||
tid, predHist[tid].size());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::squash(const InstSeqNum &squashed_sn,
|
||||
const TheISA::PCState &corrTarget,
|
||||
bool actually_taken,
|
||||
ThreadID tid)
|
||||
{
|
||||
// Now that we know that a branch was mispredicted, we need to undo
|
||||
// all the branches that have been seen up until this branch and
|
||||
// fix up everything.
|
||||
// NOTE: This should be call conceivably in 2 scenarios:
|
||||
// (1) After an branch is executed, it updates its status in the ROB
|
||||
// The commit stage then checks the ROB update and sends a signal to
|
||||
// the fetch stage to squash history after the mispredict
|
||||
// (2) In the decode stage, you can find out early if a unconditional
|
||||
// PC-relative, branch was predicted incorrectly. If so, a signal
|
||||
// to the fetch stage is sent to squash history after the mispredict
|
||||
|
||||
History &pred_hist = predHist[tid];
|
||||
|
||||
++condIncorrect;
|
||||
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Squashing from sequence number %i, "
|
||||
"setting target to %s.\n",
|
||||
tid, squashed_sn, corrTarget);
|
||||
|
||||
// Squash All Branches AFTER this mispredicted branch
|
||||
squash(squashed_sn, tid);
|
||||
|
||||
// If there's a squash due to a syscall, there may not be an entry
|
||||
// corresponding to the squash. In that case, don't bother trying to
|
||||
// fix up the entry.
|
||||
if (!pred_hist.empty()) {
|
||||
|
||||
HistoryIt hist_it = pred_hist.begin();
|
||||
//HistoryIt hist_it = find(pred_hist.begin(), pred_hist.end(),
|
||||
// squashed_sn);
|
||||
|
||||
//assert(hist_it != pred_hist.end());
|
||||
if (pred_hist.front().seqNum != squashed_sn) {
|
||||
DPRINTF(Fetch, "Front sn %i != Squash sn %i\n",
|
||||
pred_hist.front().seqNum, squashed_sn);
|
||||
|
||||
assert(pred_hist.front().seqNum == squashed_sn);
|
||||
}
|
||||
|
||||
|
||||
if ((*hist_it).usedRAS) {
|
||||
++RASIncorrect;
|
||||
}
|
||||
|
||||
BPUpdate((*hist_it).pc, actually_taken,
|
||||
pred_hist.front().bpHistory, true);
|
||||
if (actually_taken) {
|
||||
if (hist_it->wasReturn && !hist_it->usedRAS) {
|
||||
DPRINTF(Fetch, "BranchPred: [tid: %i] Incorrectly predicted"
|
||||
" return [sn:%i] PC: %s\n", tid, hist_it->seqNum,
|
||||
hist_it->pc);
|
||||
RAS[tid].pop();
|
||||
}
|
||||
DPRINTF(Fetch,"BranchPred: [tid: %i] BTB Update called for [sn:%i]"
|
||||
" PC: %s\n", tid,hist_it->seqNum, hist_it->pc);
|
||||
|
||||
|
||||
BTB.update((*hist_it).pc, corrTarget, tid);
|
||||
|
||||
} else {
|
||||
//Actually not Taken
|
||||
if (hist_it->usedRAS) {
|
||||
DPRINTF(Fetch,"BranchPred: [tid: %i] Incorrectly predicted"
|
||||
" return [sn:%i] PC: %s Restoring RAS\n", tid,
|
||||
hist_it->seqNum, hist_it->pc);
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Restoring top of RAS"
|
||||
" to: %i, target: %s.\n", tid,
|
||||
hist_it->RASIndex, hist_it->RASTarget);
|
||||
RAS[tid].restore(hist_it->RASIndex, hist_it->RASTarget);
|
||||
|
||||
} else if (hist_it->wasCall && hist_it->validBTB) {
|
||||
//Was a Call but predicated false. Pop RAS here
|
||||
DPRINTF(Fetch, "BranchPred: [tid: %i] Incorrectly predicted"
|
||||
" Call [sn:%i] PC: %s Popping RAS\n", tid,
|
||||
hist_it->seqNum, hist_it->pc);
|
||||
RAS[tid].pop();
|
||||
}
|
||||
}
|
||||
DPRINTF(Fetch, "BranchPred: [tid:%i]: Removing history for [sn:%i]"
|
||||
" PC %s Actually Taken: %i\n", tid, hist_it->seqNum,
|
||||
hist_it->pc, actually_taken);
|
||||
|
||||
pred_hist.erase(hist_it);
|
||||
|
||||
DPRINTF(Fetch, "[tid:%i]: predHist.size(): %i\n", tid,
|
||||
predHist[tid].size());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::BPUncond(void * &bp_history)
|
||||
{
|
||||
// Only the tournament predictor cares about unconditional branches.
|
||||
if (predictor == Tournament) {
|
||||
tournamentBP->uncondBr(bp_history);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::BPSquash(void *bp_history)
|
||||
{
|
||||
if (predictor == Local) {
|
||||
localBP->squash(bp_history);
|
||||
} else if (predictor == Tournament) {
|
||||
tournamentBP->squash(bp_history);
|
||||
} else {
|
||||
panic("Predictor type is unexpected value!");
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
BPredUnit<Impl>::BPLookup(Addr instPC, void * &bp_history)
|
||||
{
|
||||
if (predictor == Local) {
|
||||
return localBP->lookup(instPC, bp_history);
|
||||
} else if (predictor == Tournament) {
|
||||
return tournamentBP->lookup(instPC, bp_history);
|
||||
} else {
|
||||
panic("Predictor type is unexpected value!");
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::BPBTBUpdate(Addr instPC, void * &bp_history)
|
||||
{
|
||||
if (predictor == Local) {
|
||||
return localBP->BTBUpdate(instPC, bp_history);
|
||||
} else if (predictor == Tournament) {
|
||||
return tournamentBP->BTBUpdate(instPC, bp_history);
|
||||
} else {
|
||||
panic("Predictor type is unexpected value!");
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::BPUpdate(Addr instPC, bool taken, void *bp_history,
|
||||
bool squashed)
|
||||
{
|
||||
if (predictor == Local) {
|
||||
localBP->update(instPC, taken, bp_history);
|
||||
} else if (predictor == Tournament) {
|
||||
tournamentBP->update(instPC, taken, bp_history, squashed);
|
||||
} else {
|
||||
panic("Predictor type is unexpected value!");
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BPredUnit<Impl>::dump()
|
||||
{
|
||||
HistoryIt pred_hist_it;
|
||||
|
||||
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||
if (!predHist[i].empty()) {
|
||||
pred_hist_it = predHist[i].begin();
|
||||
|
||||
cprintf("predHist[%i].size(): %i\n", i, predHist[i].size());
|
||||
|
||||
while (pred_hist_it != predHist[i].end()) {
|
||||
cprintf("[sn:%lli], PC:%#x, tid:%i, predTaken:%i, "
|
||||
"bpHistory:%#x\n",
|
||||
pred_hist_it->seqNum, pred_hist_it->pc,
|
||||
pred_hist_it->tid, pred_hist_it->predTaken,
|
||||
pred_hist_it->bpHistory);
|
||||
pred_hist_it++;
|
||||
}
|
||||
|
||||
cprintf("\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
110
simulators/gem5/src/cpu/o3/checker_builder.cc
Normal file
110
simulators/gem5/src/cpu/o3/checker_builder.cc
Normal file
@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "cpu/checker/cpu_impl.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/impl.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "params/O3Checker.hh"
|
||||
#include "sim/process.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
class MemObject;
|
||||
|
||||
template
|
||||
class Checker<O3CPUImpl>;
|
||||
|
||||
/**
|
||||
* Specific non-templated derived class used for SimObject configuration.
|
||||
*/
|
||||
class O3Checker : public Checker<O3CPUImpl>
|
||||
{
|
||||
public:
|
||||
O3Checker(Params *p)
|
||||
: Checker<O3CPUImpl>(p)
|
||||
{ }
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// CheckerCPU Simulation Object
|
||||
//
|
||||
O3Checker *
|
||||
O3CheckerParams::create()
|
||||
{
|
||||
O3Checker::Params *params = new O3Checker::Params();
|
||||
params->name = name;
|
||||
params->numThreads = numThreads;
|
||||
params->max_insts_any_thread = 0;
|
||||
params->max_insts_all_threads = 0;
|
||||
params->max_loads_any_thread = 0;
|
||||
params->max_loads_all_threads = 0;
|
||||
params->exitOnError = exitOnError;
|
||||
params->updateOnError = updateOnError;
|
||||
params->warnOnlyOnLoadError = warnOnlyOnLoadError;
|
||||
params->clock = clock;
|
||||
params->tracer = tracer;
|
||||
// Hack to touch all parameters. Consider not deriving Checker
|
||||
// from BaseCPU..it's not really a CPU in the end.
|
||||
Counter temp;
|
||||
temp = max_insts_any_thread;
|
||||
temp = max_insts_all_threads;
|
||||
temp = max_loads_any_thread;
|
||||
temp = max_loads_all_threads;
|
||||
temp++;
|
||||
Tick temp2 = progress_interval;
|
||||
params->progress_interval = 0;
|
||||
temp2++;
|
||||
|
||||
params->itb = itb;
|
||||
params->dtb = dtb;
|
||||
params->system = system;
|
||||
params->cpu_id = cpu_id;
|
||||
params->profile = profile;
|
||||
params->interrupts = NULL;
|
||||
params->workload = workload;
|
||||
|
||||
O3Checker *cpu = new O3Checker(params);
|
||||
return cpu;
|
||||
}
|
||||
222
simulators/gem5/src/cpu/o3/comm.hh
Normal file
222
simulators/gem5/src/cpu/o3/comm.hh
Normal file
@ -0,0 +1,222 @@
|
||||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_COMM_HH__
|
||||
#define __CPU_O3_COMM_HH__
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "arch/types.hh"
|
||||
#include "base/types.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "sim/faults.hh"
|
||||
|
||||
// Typedef for physical register index type. Although the Impl would be the
|
||||
// most likely location for this, there are a few classes that need this
|
||||
// typedef yet are not templated on the Impl. For now it will be defined here.
|
||||
typedef short int PhysRegIndex;
|
||||
|
||||
/** Struct that defines the information passed from fetch to decode. */
|
||||
template<class Impl>
|
||||
struct DefaultFetchDefaultDecode {
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
int size;
|
||||
|
||||
DynInstPtr insts[Impl::MaxWidth];
|
||||
Fault fetchFault;
|
||||
InstSeqNum fetchFaultSN;
|
||||
bool clearFetchFault;
|
||||
};
|
||||
|
||||
/** Struct that defines the information passed from decode to rename. */
|
||||
template<class Impl>
|
||||
struct DefaultDecodeDefaultRename {
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
int size;
|
||||
|
||||
DynInstPtr insts[Impl::MaxWidth];
|
||||
};
|
||||
|
||||
/** Struct that defines the information passed from rename to IEW. */
|
||||
template<class Impl>
|
||||
struct DefaultRenameDefaultIEW {
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
int size;
|
||||
|
||||
DynInstPtr insts[Impl::MaxWidth];
|
||||
};
|
||||
|
||||
/** Struct that defines the information passed from IEW to commit. */
|
||||
template<class Impl>
|
||||
struct DefaultIEWDefaultCommit {
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
int size;
|
||||
|
||||
DynInstPtr insts[Impl::MaxWidth];
|
||||
DynInstPtr mispredictInst[Impl::MaxThreads];
|
||||
Addr mispredPC[Impl::MaxThreads];
|
||||
InstSeqNum squashedSeqNum[Impl::MaxThreads];
|
||||
TheISA::PCState pc[Impl::MaxThreads];
|
||||
|
||||
bool squash[Impl::MaxThreads];
|
||||
bool branchMispredict[Impl::MaxThreads];
|
||||
bool branchTaken[Impl::MaxThreads];
|
||||
bool includeSquashInst[Impl::MaxThreads];
|
||||
};
|
||||
|
||||
template<class Impl>
|
||||
struct IssueStruct {
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
int size;
|
||||
|
||||
DynInstPtr insts[Impl::MaxWidth];
|
||||
};
|
||||
|
||||
/** Struct that defines all backwards communication. */
|
||||
template<class Impl>
|
||||
struct TimeBufStruct {
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
struct decodeComm {
|
||||
uint64_t branchAddr;
|
||||
InstSeqNum doneSeqNum;
|
||||
DynInstPtr mispredictInst;
|
||||
DynInstPtr squashInst;
|
||||
Addr mispredPC;
|
||||
TheISA::PCState nextPC;
|
||||
unsigned branchCount;
|
||||
bool squash;
|
||||
bool predIncorrect;
|
||||
bool branchMispredict;
|
||||
bool branchTaken;
|
||||
};
|
||||
|
||||
decodeComm decodeInfo[Impl::MaxThreads];
|
||||
|
||||
struct renameComm {
|
||||
};
|
||||
|
||||
renameComm renameInfo[Impl::MaxThreads];
|
||||
|
||||
struct iewComm {
|
||||
// Also eventually include skid buffer space.
|
||||
bool usedIQ;
|
||||
unsigned freeIQEntries;
|
||||
bool usedLSQ;
|
||||
unsigned freeLSQEntries;
|
||||
|
||||
unsigned iqCount;
|
||||
unsigned ldstqCount;
|
||||
|
||||
unsigned dispatched;
|
||||
unsigned dispatchedToLSQ;
|
||||
};
|
||||
|
||||
iewComm iewInfo[Impl::MaxThreads];
|
||||
|
||||
struct commitComm {
|
||||
|
||||
/////////////// For Decode, IEW, Rename, Fetch ///////////
|
||||
bool squash;
|
||||
bool robSquashing;
|
||||
|
||||
////////// For Fetch & IEW /////////////
|
||||
// Represents the instruction that has either been retired or
|
||||
// squashed. Similar to having a single bus that broadcasts the
|
||||
// retired or squashed sequence number.
|
||||
InstSeqNum doneSeqNum;
|
||||
|
||||
////////////// For Rename /////////////////
|
||||
// Rename should re-read number of free rob entries
|
||||
bool usedROB;
|
||||
// Notify Rename that the ROB is empty
|
||||
bool emptyROB;
|
||||
// Tell Rename how many free entries it has in the ROB
|
||||
unsigned freeROBEntries;
|
||||
|
||||
|
||||
///////////// For Fetch //////////////////
|
||||
// Provide fetch the instruction that mispredicted, if this
|
||||
// pointer is not-null a misprediction occured
|
||||
DynInstPtr mispredictInst;
|
||||
// Was the branch taken or not
|
||||
bool branchTaken;
|
||||
// The pc of the next instruction to execute. This is the next
|
||||
// instruction for a branch mispredict, but the same instruction for
|
||||
// order violation and the like
|
||||
TheISA::PCState pc;
|
||||
|
||||
// Instruction that caused the a non-mispredict squash
|
||||
DynInstPtr squashInst;
|
||||
// If an interrupt is pending and fetch should stall
|
||||
bool interruptPending;
|
||||
// If the interrupt ended up being cleared before being handled
|
||||
bool clearInterrupt;
|
||||
|
||||
//////////// For IEW //////////////////
|
||||
// Communication specifically to the IQ to tell the IQ that it can
|
||||
// schedule a non-speculative instruction.
|
||||
InstSeqNum nonSpecSeqNum;
|
||||
|
||||
// Hack for now to send back an uncached access to the IEW stage.
|
||||
bool uncached;
|
||||
DynInstPtr uncachedLoad;
|
||||
|
||||
};
|
||||
|
||||
commitComm commitInfo[Impl::MaxThreads];
|
||||
|
||||
bool decodeBlock[Impl::MaxThreads];
|
||||
bool decodeUnblock[Impl::MaxThreads];
|
||||
bool renameBlock[Impl::MaxThreads];
|
||||
bool renameUnblock[Impl::MaxThreads];
|
||||
bool iewBlock[Impl::MaxThreads];
|
||||
bool iewUnblock[Impl::MaxThreads];
|
||||
bool commitBlock[Impl::MaxThreads];
|
||||
bool commitUnblock[Impl::MaxThreads];
|
||||
};
|
||||
|
||||
#endif //__CPU_O3_COMM_HH__
|
||||
34
simulators/gem5/src/cpu/o3/commit.cc
Normal file
34
simulators/gem5/src/cpu/o3/commit.cc
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "cpu/o3/commit_impl.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
|
||||
template class DefaultCommit<O3CPUImpl>;
|
||||
497
simulators/gem5/src/cpu/o3/commit.hh
Normal file
497
simulators/gem5/src/cpu/o3/commit.hh
Normal file
@ -0,0 +1,497 @@
|
||||
/*
|
||||
* Copyright (c) 2010 ARM Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
* Korey Sewell
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_COMMIT_HH__
|
||||
#define __CPU_O3_COMMIT_HH__
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/exetrace.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
|
||||
struct DerivO3CPUParams;
|
||||
|
||||
template <class>
|
||||
struct O3ThreadState;
|
||||
|
||||
/**
|
||||
* DefaultCommit handles single threaded and SMT commit. Its width is
|
||||
* specified by the parameters; each cycle it tries to commit that
|
||||
* many instructions. The SMT policy decides which thread it tries to
|
||||
* commit instructions from. Non- speculative instructions must reach
|
||||
* the head of the ROB before they are ready to execute; once they
|
||||
* reach the head, commit will broadcast the instruction's sequence
|
||||
* number to the previous stages so that they can issue/ execute the
|
||||
* instruction. Only one non-speculative instruction is handled per
|
||||
* cycle. Commit is responsible for handling all back-end initiated
|
||||
* redirects. It receives the redirect, and then broadcasts it to all
|
||||
* stages, indicating the sequence number they should squash until,
|
||||
* and any necessary branch misprediction information as well. It
|
||||
* priortizes redirects by instruction's age, only broadcasting a
|
||||
* redirect if it corresponds to an instruction that should currently
|
||||
* be in the ROB. This is done by tracking the sequence number of the
|
||||
* youngest instruction in the ROB, which gets updated to any
|
||||
* squashing instruction's sequence number, and only broadcasting a
|
||||
* redirect if it corresponds to an older instruction. Commit also
|
||||
* supports multiple cycle squashing, to model a ROB that can only
|
||||
* remove a certain number of instructions per cycle.
|
||||
*/
|
||||
template<class Impl>
|
||||
class DefaultCommit
|
||||
{
|
||||
public:
|
||||
// Typedefs from the Impl.
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::CPUPol CPUPol;
|
||||
|
||||
typedef typename CPUPol::RenameMap RenameMap;
|
||||
typedef typename CPUPol::ROB ROB;
|
||||
|
||||
typedef typename CPUPol::TimeStruct TimeStruct;
|
||||
typedef typename CPUPol::FetchStruct FetchStruct;
|
||||
typedef typename CPUPol::IEWStruct IEWStruct;
|
||||
typedef typename CPUPol::RenameStruct RenameStruct;
|
||||
|
||||
typedef typename CPUPol::Fetch Fetch;
|
||||
typedef typename CPUPol::IEW IEW;
|
||||
|
||||
typedef O3ThreadState<Impl> Thread;
|
||||
|
||||
/** Event class used to schedule a squash due to a trap (fault or
|
||||
* interrupt) to happen on a specific cycle.
|
||||
*/
|
||||
class TrapEvent : public Event {
|
||||
private:
|
||||
DefaultCommit<Impl> *commit;
|
||||
ThreadID tid;
|
||||
|
||||
public:
|
||||
TrapEvent(DefaultCommit<Impl> *_commit, ThreadID _tid);
|
||||
|
||||
void process();
|
||||
const char *description() const;
|
||||
};
|
||||
|
||||
/** Overall commit status. Used to determine if the CPU can deschedule
|
||||
* itself due to a lack of activity.
|
||||
*/
|
||||
enum CommitStatus{
|
||||
Active,
|
||||
Inactive
|
||||
};
|
||||
|
||||
/** Individual thread status. */
|
||||
enum ThreadStatus {
|
||||
Running,
|
||||
Idle,
|
||||
ROBSquashing,
|
||||
TrapPending,
|
||||
FetchTrapPending
|
||||
};
|
||||
|
||||
/** Commit policy for SMT mode. */
|
||||
enum CommitPolicy {
|
||||
Aggressive,
|
||||
RoundRobin,
|
||||
OldestReady
|
||||
};
|
||||
|
||||
private:
|
||||
/** Overall commit status. */
|
||||
CommitStatus _status;
|
||||
/** Next commit status, to be set at the end of the cycle. */
|
||||
CommitStatus _nextStatus;
|
||||
/** Per-thread status. */
|
||||
ThreadStatus commitStatus[Impl::MaxThreads];
|
||||
/** Commit policy used in SMT mode. */
|
||||
CommitPolicy commitPolicy;
|
||||
|
||||
public:
|
||||
/** Construct a DefaultCommit with the given parameters. */
|
||||
DefaultCommit(O3CPU *_cpu, DerivO3CPUParams *params);
|
||||
|
||||
/** Returns the name of the DefaultCommit. */
|
||||
std::string name() const;
|
||||
|
||||
/** Registers statistics. */
|
||||
void regStats();
|
||||
|
||||
/** Sets the list of threads. */
|
||||
void setThreads(std::vector<Thread *> &threads);
|
||||
|
||||
/** Sets the main time buffer pointer, used for backwards communication. */
|
||||
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
||||
|
||||
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
|
||||
|
||||
/** Sets the pointer to the queue coming from rename. */
|
||||
void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
|
||||
|
||||
/** Sets the pointer to the queue coming from IEW. */
|
||||
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
|
||||
|
||||
/** Sets the pointer to the IEW stage. */
|
||||
void setIEWStage(IEW *iew_stage);
|
||||
|
||||
/** Skid buffer between rename and commit. */
|
||||
std::queue<DynInstPtr> skidBuffer;
|
||||
|
||||
/** The pointer to the IEW stage. Used solely to ensure that
|
||||
* various events (traps, interrupts, syscalls) do not occur until
|
||||
* all stores have written back.
|
||||
*/
|
||||
IEW *iewStage;
|
||||
|
||||
/** Sets pointer to list of active threads. */
|
||||
void setActiveThreads(std::list<ThreadID> *at_ptr);
|
||||
|
||||
/** Sets pointer to the commited state rename map. */
|
||||
void setRenameMap(RenameMap rm_ptr[Impl::MaxThreads]);
|
||||
|
||||
/** Sets pointer to the ROB. */
|
||||
void setROB(ROB *rob_ptr);
|
||||
|
||||
/** Initializes stage by sending back the number of free entries. */
|
||||
void initStage();
|
||||
|
||||
/** Initializes the draining of commit. */
|
||||
bool drain();
|
||||
|
||||
/** Resumes execution after draining. */
|
||||
void resume();
|
||||
|
||||
/** Completes the switch out of commit. */
|
||||
void switchOut();
|
||||
|
||||
/** Takes over from another CPU's thread. */
|
||||
void takeOverFrom();
|
||||
|
||||
/** Ticks the commit stage, which tries to commit instructions. */
|
||||
void tick();
|
||||
|
||||
/** Handles any squashes that are sent from IEW, and adds instructions
|
||||
* to the ROB and tries to commit instructions.
|
||||
*/
|
||||
void commit();
|
||||
|
||||
/** Returns the number of free ROB entries for a specific thread. */
|
||||
size_t numROBFreeEntries(ThreadID tid);
|
||||
|
||||
/** Generates an event to schedule a squash due to a trap. */
|
||||
void generateTrapEvent(ThreadID tid);
|
||||
|
||||
/** Records that commit needs to initiate a squash due to an
|
||||
* external state update through the TC.
|
||||
*/
|
||||
void generateTCEvent(ThreadID tid);
|
||||
|
||||
private:
|
||||
/** Updates the overall status of commit with the nextStatus, and
|
||||
* tell the CPU if commit is active/inactive.
|
||||
*/
|
||||
void updateStatus();
|
||||
|
||||
/** Sets the next status based on threads' statuses, which becomes the
|
||||
* current status at the end of the cycle.
|
||||
*/
|
||||
void setNextStatus();
|
||||
|
||||
/** Checks if the ROB is completed with squashing. This is for the case
|
||||
* where the ROB can take multiple cycles to complete squashing.
|
||||
*/
|
||||
bool robDoneSquashing();
|
||||
|
||||
/** Returns if any of the threads have the number of ROB entries changed
|
||||
* on this cycle. Used to determine if the number of free ROB entries needs
|
||||
* to be sent back to previous stages.
|
||||
*/
|
||||
bool changedROBEntries();
|
||||
|
||||
/** Squashes all in flight instructions. */
|
||||
void squashAll(ThreadID tid);
|
||||
|
||||
/** Handles squashing due to a trap. */
|
||||
void squashFromTrap(ThreadID tid);
|
||||
|
||||
/** Handles squashing due to an TC write. */
|
||||
void squashFromTC(ThreadID tid);
|
||||
|
||||
/** Handles squashing from instruction with SquashAfter set.
|
||||
* This differs from the other squashes as it squashes following
|
||||
* instructions instead of the current instruction and doesn't
|
||||
* clean up various status bits about traps/tc writes pending.
|
||||
*/
|
||||
void squashAfter(ThreadID tid, DynInstPtr &head_inst,
|
||||
uint64_t squash_after_seq_num);
|
||||
|
||||
/** Handles processing an interrupt. */
|
||||
void handleInterrupt();
|
||||
|
||||
/** Get fetch redirecting so we can handle an interrupt */
|
||||
void propagateInterrupt();
|
||||
|
||||
/** Commits as many instructions as possible. */
|
||||
void commitInsts();
|
||||
|
||||
/** Tries to commit the head ROB instruction passed in.
|
||||
* @param head_inst The instruction to be committed.
|
||||
*/
|
||||
bool commitHead(DynInstPtr &head_inst, unsigned inst_num);
|
||||
|
||||
/** Gets instructions from rename and inserts them into the ROB. */
|
||||
void getInsts();
|
||||
|
||||
/** Insert all instructions from rename into skidBuffer */
|
||||
void skidInsert();
|
||||
|
||||
/** Marks completed instructions using information sent from IEW. */
|
||||
void markCompletedInsts();
|
||||
|
||||
/** Gets the thread to commit, based on the SMT policy. */
|
||||
ThreadID getCommittingThread();
|
||||
|
||||
/** Returns the thread ID to use based on a round robin policy. */
|
||||
ThreadID roundRobin();
|
||||
|
||||
/** Returns the thread ID to use based on an oldest instruction policy. */
|
||||
ThreadID oldestReady();
|
||||
|
||||
public:
|
||||
/** Reads the PC of a specific thread. */
|
||||
TheISA::PCState pcState(ThreadID tid) { return pc[tid]; }
|
||||
|
||||
/** Sets the PC of a specific thread. */
|
||||
void pcState(const TheISA::PCState &val, ThreadID tid)
|
||||
{ pc[tid] = val; }
|
||||
|
||||
/** Returns the PC of a specific thread. */
|
||||
Addr instAddr(ThreadID tid) { return pc[tid].instAddr(); }
|
||||
|
||||
/** Returns the next PC of a specific thread. */
|
||||
Addr nextInstAddr(ThreadID tid) { return pc[tid].nextInstAddr(); }
|
||||
|
||||
/** Reads the micro PC of a specific thread. */
|
||||
Addr microPC(ThreadID tid) { return pc[tid].microPC(); }
|
||||
|
||||
private:
|
||||
/** Time buffer interface. */
|
||||
TimeBuffer<TimeStruct> *timeBuffer;
|
||||
|
||||
/** Wire to write information heading to previous stages. */
|
||||
typename TimeBuffer<TimeStruct>::wire toIEW;
|
||||
|
||||
/** Wire to read information from IEW (for ROB). */
|
||||
typename TimeBuffer<TimeStruct>::wire robInfoFromIEW;
|
||||
|
||||
TimeBuffer<FetchStruct> *fetchQueue;
|
||||
|
||||
typename TimeBuffer<FetchStruct>::wire fromFetch;
|
||||
|
||||
/** IEW instruction queue interface. */
|
||||
TimeBuffer<IEWStruct> *iewQueue;
|
||||
|
||||
/** Wire to read information from IEW queue. */
|
||||
typename TimeBuffer<IEWStruct>::wire fromIEW;
|
||||
|
||||
/** Rename instruction queue interface, for ROB. */
|
||||
TimeBuffer<RenameStruct> *renameQueue;
|
||||
|
||||
/** Wire to read information from rename queue. */
|
||||
typename TimeBuffer<RenameStruct>::wire fromRename;
|
||||
|
||||
public:
|
||||
/** ROB interface. */
|
||||
ROB *rob;
|
||||
|
||||
private:
|
||||
/** Pointer to O3CPU. */
|
||||
O3CPU *cpu;
|
||||
|
||||
/** Vector of all of the threads. */
|
||||
std::vector<Thread *> thread;
|
||||
|
||||
/** Records that commit has written to the time buffer this cycle. Used for
|
||||
* the CPU to determine if it can deschedule itself if there is no activity.
|
||||
*/
|
||||
bool wroteToTimeBuffer;
|
||||
|
||||
/** Records if the number of ROB entries has changed this cycle. If it has,
|
||||
* then the number of free entries must be re-broadcast.
|
||||
*/
|
||||
bool changedROBNumEntries[Impl::MaxThreads];
|
||||
|
||||
/** A counter of how many threads are currently squashing. */
|
||||
ThreadID squashCounter;
|
||||
|
||||
/** Records if a thread has to squash this cycle due to a trap. */
|
||||
bool trapSquash[Impl::MaxThreads];
|
||||
|
||||
/** Records if a thread has to squash this cycle due to an XC write. */
|
||||
bool tcSquash[Impl::MaxThreads];
|
||||
|
||||
/** Priority List used for Commit Policy */
|
||||
std::list<ThreadID> priority_list;
|
||||
|
||||
/** IEW to Commit delay, in ticks. */
|
||||
unsigned iewToCommitDelay;
|
||||
|
||||
/** Commit to IEW delay, in ticks. */
|
||||
unsigned commitToIEWDelay;
|
||||
|
||||
/** Rename to ROB delay, in ticks. */
|
||||
unsigned renameToROBDelay;
|
||||
|
||||
unsigned fetchToCommitDelay;
|
||||
|
||||
/** Rename width, in instructions. Used so ROB knows how many
|
||||
* instructions to get from the rename instruction queue.
|
||||
*/
|
||||
unsigned renameWidth;
|
||||
|
||||
/** Commit width, in instructions. */
|
||||
unsigned commitWidth;
|
||||
|
||||
/** Number of Reorder Buffers */
|
||||
unsigned numRobs;
|
||||
|
||||
/** Number of Active Threads */
|
||||
ThreadID numThreads;
|
||||
|
||||
/** Is a drain pending. */
|
||||
bool drainPending;
|
||||
|
||||
/** Is commit switched out. */
|
||||
bool switchedOut;
|
||||
|
||||
/** The latency to handle a trap. Used when scheduling trap
|
||||
* squash event.
|
||||
*/
|
||||
Tick trapLatency;
|
||||
|
||||
/** The interrupt fault. */
|
||||
Fault interrupt;
|
||||
|
||||
/** The commit PC state of each thread. Refers to the instruction that
|
||||
* is currently being processed/committed.
|
||||
*/
|
||||
TheISA::PCState pc[Impl::MaxThreads];
|
||||
|
||||
/** The sequence number of the youngest valid instruction in the ROB. */
|
||||
InstSeqNum youngestSeqNum[Impl::MaxThreads];
|
||||
|
||||
/** The sequence number of the last commited instruction. */
|
||||
InstSeqNum lastCommitedSeqNum[Impl::MaxThreads];
|
||||
|
||||
/** Records if there is a trap currently in flight. */
|
||||
bool trapInFlight[Impl::MaxThreads];
|
||||
|
||||
/** Records if there were any stores committed this cycle. */
|
||||
bool committedStores[Impl::MaxThreads];
|
||||
|
||||
/** Records if commit should check if the ROB is truly empty (see
|
||||
commit_impl.hh). */
|
||||
bool checkEmptyROB[Impl::MaxThreads];
|
||||
|
||||
/** Pointer to the list of active threads. */
|
||||
std::list<ThreadID> *activeThreads;
|
||||
|
||||
/** Rename map interface. */
|
||||
RenameMap *renameMap[Impl::MaxThreads];
|
||||
|
||||
/** True if last committed microop can be followed by an interrupt */
|
||||
bool canHandleInterrupts;
|
||||
|
||||
/** Updates commit stats based on this instruction. */
|
||||
void updateComInstStats(DynInstPtr &inst);
|
||||
|
||||
/** Stat for the total number of committed instructions. */
|
||||
Stats::Scalar commitCommittedInsts;
|
||||
/** Stat for the total number of committed ops. */
|
||||
Stats::Scalar commitCommittedOps;
|
||||
/** Stat for the total number of squashed instructions discarded by commit.
|
||||
*/
|
||||
Stats::Scalar commitSquashedInsts;
|
||||
/** Stat for the total number of times commit is told to squash.
|
||||
* @todo: Actually increment this stat.
|
||||
*/
|
||||
Stats::Scalar commitSquashEvents;
|
||||
/** Stat for the total number of times commit has had to stall due to a non-
|
||||
* speculative instruction reaching the head of the ROB.
|
||||
*/
|
||||
Stats::Scalar commitNonSpecStalls;
|
||||
/** Stat for the total number of branch mispredicts that caused a squash. */
|
||||
Stats::Scalar branchMispredicts;
|
||||
/** Distribution of the number of committed instructions each cycle. */
|
||||
Stats::Distribution numCommittedDist;
|
||||
|
||||
/** Total number of instructions committed. */
|
||||
Stats::Vector instsCommitted;
|
||||
/** Total number of ops (including micro ops) committed. */
|
||||
Stats::Vector opsCommitted;
|
||||
/** Total number of software prefetches committed. */
|
||||
Stats::Vector statComSwp;
|
||||
/** Stat for the total number of committed memory references. */
|
||||
Stats::Vector statComRefs;
|
||||
/** Stat for the total number of committed loads. */
|
||||
Stats::Vector statComLoads;
|
||||
/** Total number of committed memory barriers. */
|
||||
Stats::Vector statComMembars;
|
||||
/** Total number of committed branches. */
|
||||
Stats::Vector statComBranches;
|
||||
/** Total number of floating point instructions */
|
||||
Stats::Vector statComFloating;
|
||||
/** Total number of integer instructions */
|
||||
Stats::Vector statComInteger;
|
||||
/** Total number of function calls */
|
||||
Stats::Vector statComFunctionCalls;
|
||||
|
||||
/** Number of cycles where the commit bandwidth limit is reached. */
|
||||
Stats::Scalar commitEligibleSamples;
|
||||
/** Number of instructions not committed due to bandwidth limits. */
|
||||
Stats::Vector commitEligible;
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_COMMIT_HH__
|
||||
1513
simulators/gem5/src/cpu/o3/commit_impl.hh
Normal file
1513
simulators/gem5/src/cpu/o3/commit_impl.hh
Normal file
File diff suppressed because it is too large
Load Diff
1732
simulators/gem5/src/cpu/o3/cpu.cc
Normal file
1732
simulators/gem5/src/cpu/o3/cpu.cc
Normal file
File diff suppressed because it is too large
Load Diff
822
simulators/gem5/src/cpu/o3/cpu.hh
Normal file
822
simulators/gem5/src/cpu/o3/cpu.hh
Normal file
@ -0,0 +1,822 @@
|
||||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* Copyright (c) 2011 Regents of the University of California
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
* Korey Sewell
|
||||
* Rick Strong
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_CPU_HH__
|
||||
#define __CPU_O3_CPU_HH__
|
||||
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <queue>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#include "arch/types.hh"
|
||||
#include "base/statistics.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/comm.hh"
|
||||
#include "cpu/o3/cpu_policy.hh"
|
||||
#include "cpu/o3/scoreboard.hh"
|
||||
#include "cpu/o3/thread_state.hh"
|
||||
#include "cpu/activity.hh"
|
||||
#include "cpu/base.hh"
|
||||
#include "cpu/simple_thread.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
//#include "cpu/o3/thread_context.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
#include "sim/process.hh"
|
||||
|
||||
template <class>
|
||||
class Checker;
|
||||
class ThreadContext;
|
||||
template <class>
|
||||
class O3ThreadContext;
|
||||
|
||||
class Checkpoint;
|
||||
class MemObject;
|
||||
class Process;
|
||||
|
||||
struct BaseCPUParams;
|
||||
|
||||
class BaseO3CPU : public BaseCPU
|
||||
{
|
||||
//Stuff that's pretty ISA independent will go here.
|
||||
public:
|
||||
BaseO3CPU(BaseCPUParams *params);
|
||||
|
||||
void regStats();
|
||||
};
|
||||
|
||||
/**
|
||||
* FullO3CPU class, has each of the stages (fetch through commit)
|
||||
* within it, as well as all of the time buffers between stages. The
|
||||
* tick() function for the CPU is defined here.
|
||||
*/
|
||||
template <class Impl>
|
||||
class FullO3CPU : public BaseO3CPU
|
||||
{
|
||||
public:
|
||||
// Typedefs from the Impl here.
|
||||
typedef typename Impl::CPUPol CPUPolicy;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
|
||||
typedef O3ThreadState<Impl> ImplState;
|
||||
typedef O3ThreadState<Impl> Thread;
|
||||
|
||||
typedef typename std::list<DynInstPtr>::iterator ListIt;
|
||||
|
||||
friend class O3ThreadContext<Impl>;
|
||||
|
||||
public:
|
||||
enum Status {
|
||||
Running,
|
||||
Idle,
|
||||
Halted,
|
||||
Blocked,
|
||||
SwitchedOut
|
||||
};
|
||||
|
||||
TheISA::TLB * itb;
|
||||
TheISA::TLB * dtb;
|
||||
|
||||
/** Overall CPU status. */
|
||||
Status _status;
|
||||
|
||||
/** Per-thread status in CPU, used for SMT. */
|
||||
Status _threadStatus[Impl::MaxThreads];
|
||||
|
||||
private:
|
||||
|
||||
/**
|
||||
* IcachePort class for instruction fetch.
|
||||
*/
|
||||
class IcachePort : public CpuPort
|
||||
{
|
||||
protected:
|
||||
/** Pointer to fetch. */
|
||||
DefaultFetch<Impl> *fetch;
|
||||
|
||||
public:
|
||||
/** Default constructor. */
|
||||
IcachePort(DefaultFetch<Impl> *_fetch, FullO3CPU<Impl>* _cpu)
|
||||
: CpuPort(_fetch->name() + "-iport", _cpu), fetch(_fetch)
|
||||
{ }
|
||||
|
||||
protected:
|
||||
|
||||
/** Timing version of receive. Handles setting fetch to the
|
||||
* proper status to start fetching. */
|
||||
virtual bool recvTimingResp(PacketPtr pkt);
|
||||
virtual void recvTimingSnoopReq(PacketPtr pkt) { }
|
||||
|
||||
/** Handles doing a retry of a failed fetch. */
|
||||
virtual void recvRetry();
|
||||
};
|
||||
|
||||
/**
|
||||
* DcachePort class for the load/store queue.
|
||||
*/
|
||||
class DcachePort : public CpuPort
|
||||
{
|
||||
protected:
|
||||
|
||||
/** Pointer to LSQ. */
|
||||
LSQ<Impl> *lsq;
|
||||
|
||||
public:
|
||||
/** Default constructor. */
|
||||
DcachePort(LSQ<Impl> *_lsq, FullO3CPU<Impl>* _cpu)
|
||||
: CpuPort(_lsq->name() + "-dport", _cpu), lsq(_lsq)
|
||||
{ }
|
||||
|
||||
protected:
|
||||
|
||||
/** Timing version of receive. Handles writing back and
|
||||
* completing the load or store that has returned from
|
||||
* memory. */
|
||||
virtual bool recvTimingResp(PacketPtr pkt);
|
||||
virtual void recvTimingSnoopReq(PacketPtr pkt);
|
||||
|
||||
/** Handles doing a retry of the previous send. */
|
||||
virtual void recvRetry();
|
||||
|
||||
/**
|
||||
* As this CPU requires snooping to maintain the load store queue
|
||||
* change the behaviour from the base CPU port.
|
||||
*
|
||||
* @return true since we have to snoop
|
||||
*/
|
||||
virtual bool isSnooping() const { return true; }
|
||||
};
|
||||
|
||||
class TickEvent : public Event
|
||||
{
|
||||
private:
|
||||
/** Pointer to the CPU. */
|
||||
FullO3CPU<Impl> *cpu;
|
||||
|
||||
public:
|
||||
/** Constructs a tick event. */
|
||||
TickEvent(FullO3CPU<Impl> *c);
|
||||
|
||||
/** Processes a tick event, calling tick() on the CPU. */
|
||||
void process();
|
||||
/** Returns the description of the tick event. */
|
||||
const char *description() const;
|
||||
};
|
||||
|
||||
/** The tick event used for scheduling CPU ticks. */
|
||||
TickEvent tickEvent;
|
||||
|
||||
/** Schedule tick event, regardless of its current state. */
|
||||
void scheduleTickEvent(int delay)
|
||||
{
|
||||
if (tickEvent.squashed())
|
||||
reschedule(tickEvent, nextCycle(curTick() + ticks(delay)));
|
||||
else if (!tickEvent.scheduled())
|
||||
schedule(tickEvent, nextCycle(curTick() + ticks(delay)));
|
||||
}
|
||||
|
||||
/** Unschedule tick event, regardless of its current state. */
|
||||
void unscheduleTickEvent()
|
||||
{
|
||||
if (tickEvent.scheduled())
|
||||
tickEvent.squash();
|
||||
}
|
||||
|
||||
class ActivateThreadEvent : public Event
|
||||
{
|
||||
private:
|
||||
/** Number of Thread to Activate */
|
||||
ThreadID tid;
|
||||
|
||||
/** Pointer to the CPU. */
|
||||
FullO3CPU<Impl> *cpu;
|
||||
|
||||
public:
|
||||
/** Constructs the event. */
|
||||
ActivateThreadEvent();
|
||||
|
||||
/** Initialize Event */
|
||||
void init(int thread_num, FullO3CPU<Impl> *thread_cpu);
|
||||
|
||||
/** Processes the event, calling activateThread() on the CPU. */
|
||||
void process();
|
||||
|
||||
/** Returns the description of the event. */
|
||||
const char *description() const;
|
||||
};
|
||||
|
||||
/** Schedule thread to activate , regardless of its current state. */
|
||||
void
|
||||
scheduleActivateThreadEvent(ThreadID tid, int delay)
|
||||
{
|
||||
// Schedule thread to activate, regardless of its current state.
|
||||
if (activateThreadEvent[tid].squashed())
|
||||
reschedule(activateThreadEvent[tid],
|
||||
nextCycle(curTick() + ticks(delay)));
|
||||
else if (!activateThreadEvent[tid].scheduled()) {
|
||||
Tick when = nextCycle(curTick() + ticks(delay));
|
||||
|
||||
// Check if the deallocateEvent is also scheduled, and make
|
||||
// sure they do not happen at same time causing a sleep that
|
||||
// is never woken from.
|
||||
if (deallocateContextEvent[tid].scheduled() &&
|
||||
deallocateContextEvent[tid].when() == when) {
|
||||
when++;
|
||||
}
|
||||
|
||||
schedule(activateThreadEvent[tid], when);
|
||||
}
|
||||
}
|
||||
|
||||
/** Unschedule actiavte thread event, regardless of its current state. */
|
||||
void
|
||||
unscheduleActivateThreadEvent(ThreadID tid)
|
||||
{
|
||||
if (activateThreadEvent[tid].scheduled())
|
||||
activateThreadEvent[tid].squash();
|
||||
}
|
||||
|
||||
/** The tick event used for scheduling CPU ticks. */
|
||||
ActivateThreadEvent activateThreadEvent[Impl::MaxThreads];
|
||||
|
||||
class DeallocateContextEvent : public Event
|
||||
{
|
||||
private:
|
||||
/** Number of Thread to deactivate */
|
||||
ThreadID tid;
|
||||
|
||||
/** Should the thread be removed from the CPU? */
|
||||
bool remove;
|
||||
|
||||
/** Pointer to the CPU. */
|
||||
FullO3CPU<Impl> *cpu;
|
||||
|
||||
public:
|
||||
/** Constructs the event. */
|
||||
DeallocateContextEvent();
|
||||
|
||||
/** Initialize Event */
|
||||
void init(int thread_num, FullO3CPU<Impl> *thread_cpu);
|
||||
|
||||
/** Processes the event, calling activateThread() on the CPU. */
|
||||
void process();
|
||||
|
||||
/** Sets whether the thread should also be removed from the CPU. */
|
||||
void setRemove(bool _remove) { remove = _remove; }
|
||||
|
||||
/** Returns the description of the event. */
|
||||
const char *description() const;
|
||||
};
|
||||
|
||||
/** Schedule cpu to deallocate thread context.*/
|
||||
void
|
||||
scheduleDeallocateContextEvent(ThreadID tid, bool remove, int delay)
|
||||
{
|
||||
// Schedule thread to activate, regardless of its current state.
|
||||
if (deallocateContextEvent[tid].squashed())
|
||||
reschedule(deallocateContextEvent[tid],
|
||||
nextCycle(curTick() + ticks(delay)));
|
||||
else if (!deallocateContextEvent[tid].scheduled())
|
||||
schedule(deallocateContextEvent[tid],
|
||||
nextCycle(curTick() + ticks(delay)));
|
||||
}
|
||||
|
||||
/** Unschedule thread deallocation in CPU */
|
||||
void
|
||||
unscheduleDeallocateContextEvent(ThreadID tid)
|
||||
{
|
||||
if (deallocateContextEvent[tid].scheduled())
|
||||
deallocateContextEvent[tid].squash();
|
||||
}
|
||||
|
||||
/** The tick event used for scheduling CPU ticks. */
|
||||
DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads];
|
||||
|
||||
public:
|
||||
/** Constructs a CPU with the given parameters. */
|
||||
FullO3CPU(DerivO3CPUParams *params);
|
||||
/** Destructor. */
|
||||
~FullO3CPU();
|
||||
|
||||
/** Registers statistics. */
|
||||
void regStats();
|
||||
|
||||
void demapPage(Addr vaddr, uint64_t asn)
|
||||
{
|
||||
this->itb->demapPage(vaddr, asn);
|
||||
this->dtb->demapPage(vaddr, asn);
|
||||
}
|
||||
|
||||
void demapInstPage(Addr vaddr, uint64_t asn)
|
||||
{
|
||||
this->itb->demapPage(vaddr, asn);
|
||||
}
|
||||
|
||||
void demapDataPage(Addr vaddr, uint64_t asn)
|
||||
{
|
||||
this->dtb->demapPage(vaddr, asn);
|
||||
}
|
||||
|
||||
/** Ticks CPU, calling tick() on each stage, and checking the overall
|
||||
* activity to see if the CPU should deschedule itself.
|
||||
*/
|
||||
void tick();
|
||||
|
||||
/** Initialize the CPU */
|
||||
void init();
|
||||
|
||||
/** Returns the Number of Active Threads in the CPU */
|
||||
int numActiveThreads()
|
||||
{ return activeThreads.size(); }
|
||||
|
||||
/** Add Thread to Active Threads List */
|
||||
void activateThread(ThreadID tid);
|
||||
|
||||
/** Remove Thread from Active Threads List */
|
||||
void deactivateThread(ThreadID tid);
|
||||
|
||||
/** Setup CPU to insert a thread's context */
|
||||
void insertThread(ThreadID tid);
|
||||
|
||||
/** Remove all of a thread's context from CPU */
|
||||
void removeThread(ThreadID tid);
|
||||
|
||||
/** Count the Total Instructions Committed in the CPU. */
|
||||
virtual Counter totalInsts() const;
|
||||
|
||||
/** Count the Total Ops (including micro ops) committed in the CPU. */
|
||||
virtual Counter totalOps() const;
|
||||
|
||||
/** Add Thread to Active Threads List. */
|
||||
void activateContext(ThreadID tid, int delay);
|
||||
|
||||
/** Remove Thread from Active Threads List */
|
||||
void suspendContext(ThreadID tid);
|
||||
|
||||
/** Remove Thread from Active Threads List &&
|
||||
* Possibly Remove Thread Context from CPU.
|
||||
*/
|
||||
bool scheduleDeallocateContext(ThreadID tid, bool remove, int delay = 1);
|
||||
|
||||
/** Remove Thread from Active Threads List &&
|
||||
* Remove Thread Context from CPU.
|
||||
*/
|
||||
void haltContext(ThreadID tid);
|
||||
|
||||
/** Activate a Thread When CPU Resources are Available. */
|
||||
void activateWhenReady(ThreadID tid);
|
||||
|
||||
/** Add or Remove a Thread Context in the CPU. */
|
||||
void doContextSwitch();
|
||||
|
||||
/** Update The Order In Which We Process Threads. */
|
||||
void updateThreadPriority();
|
||||
|
||||
/** Serialize state. */
|
||||
virtual void serialize(std::ostream &os);
|
||||
|
||||
/** Unserialize from a checkpoint. */
|
||||
virtual void unserialize(Checkpoint *cp, const std::string §ion);
|
||||
|
||||
public:
|
||||
/** Executes a syscall.
|
||||
* @todo: Determine if this needs to be virtual.
|
||||
*/
|
||||
void syscall(int64_t callnum, ThreadID tid);
|
||||
|
||||
/** Starts draining the CPU's pipeline of all instructions in
|
||||
* order to stop all memory accesses. */
|
||||
virtual unsigned int drain(Event *drain_event);
|
||||
|
||||
/** Resumes execution after a drain. */
|
||||
virtual void resume();
|
||||
|
||||
/** Signals to this CPU that a stage has completed switching out. */
|
||||
void signalDrained();
|
||||
|
||||
/** Switches out this CPU. */
|
||||
virtual void switchOut();
|
||||
|
||||
/** Takes over from another CPU. */
|
||||
virtual void takeOverFrom(BaseCPU *oldCPU);
|
||||
|
||||
/** Get the current instruction sequence number, and increment it. */
|
||||
InstSeqNum getAndIncrementInstSeq()
|
||||
{ return globalSeqNum++; }
|
||||
|
||||
/** Traps to handle given fault. */
|
||||
void trap(Fault fault, ThreadID tid, StaticInstPtr inst);
|
||||
|
||||
/** HW return from error interrupt. */
|
||||
Fault hwrei(ThreadID tid);
|
||||
|
||||
bool simPalCheck(int palFunc, ThreadID tid);
|
||||
|
||||
/** Returns the Fault for any valid interrupt. */
|
||||
Fault getInterrupts();
|
||||
|
||||
/** Processes any an interrupt fault. */
|
||||
void processInterrupts(Fault interrupt);
|
||||
|
||||
/** Halts the CPU. */
|
||||
void halt() { panic("Halt not implemented!\n"); }
|
||||
|
||||
/** Check if this address is a valid instruction address. */
|
||||
bool validInstAddr(Addr addr) { return true; }
|
||||
|
||||
/** Check if this address is a valid data address. */
|
||||
bool validDataAddr(Addr addr) { return true; }
|
||||
|
||||
/** Register accessors. Index refers to the physical register index. */
|
||||
|
||||
/** Reads a miscellaneous register. */
|
||||
TheISA::MiscReg readMiscRegNoEffect(int misc_reg, ThreadID tid);
|
||||
|
||||
/** Reads a misc. register, including any side effects the read
|
||||
* might have as defined by the architecture.
|
||||
*/
|
||||
TheISA::MiscReg readMiscReg(int misc_reg, ThreadID tid);
|
||||
|
||||
/** Sets a miscellaneous register. */
|
||||
void setMiscRegNoEffect(int misc_reg, const TheISA::MiscReg &val,
|
||||
ThreadID tid);
|
||||
|
||||
/** Sets a misc. register, including any side effects the write
|
||||
* might have as defined by the architecture.
|
||||
*/
|
||||
void setMiscReg(int misc_reg, const TheISA::MiscReg &val,
|
||||
ThreadID tid);
|
||||
|
||||
uint64_t readIntReg(int reg_idx);
|
||||
|
||||
TheISA::FloatReg readFloatReg(int reg_idx);
|
||||
|
||||
TheISA::FloatRegBits readFloatRegBits(int reg_idx);
|
||||
|
||||
void setIntReg(int reg_idx, uint64_t val);
|
||||
|
||||
void setFloatReg(int reg_idx, TheISA::FloatReg val);
|
||||
|
||||
void setFloatRegBits(int reg_idx, TheISA::FloatRegBits val);
|
||||
|
||||
uint64_t readArchIntReg(int reg_idx, ThreadID tid);
|
||||
|
||||
float readArchFloatReg(int reg_idx, ThreadID tid);
|
||||
|
||||
uint64_t readArchFloatRegInt(int reg_idx, ThreadID tid);
|
||||
|
||||
/** Architectural register accessors. Looks up in the commit
|
||||
* rename table to obtain the true physical index of the
|
||||
* architected register first, then accesses that physical
|
||||
* register.
|
||||
*/
|
||||
void setArchIntReg(int reg_idx, uint64_t val, ThreadID tid);
|
||||
|
||||
void setArchFloatReg(int reg_idx, float val, ThreadID tid);
|
||||
|
||||
void setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid);
|
||||
|
||||
/** Sets the commit PC state of a specific thread. */
|
||||
void pcState(const TheISA::PCState &newPCState, ThreadID tid);
|
||||
|
||||
/** Reads the commit PC state of a specific thread. */
|
||||
TheISA::PCState pcState(ThreadID tid);
|
||||
|
||||
/** Reads the commit PC of a specific thread. */
|
||||
Addr instAddr(ThreadID tid);
|
||||
|
||||
/** Reads the commit micro PC of a specific thread. */
|
||||
MicroPC microPC(ThreadID tid);
|
||||
|
||||
/** Reads the next PC of a specific thread. */
|
||||
Addr nextInstAddr(ThreadID tid);
|
||||
|
||||
/** Initiates a squash of all in-flight instructions for a given
|
||||
* thread. The source of the squash is an external update of
|
||||
* state through the TC.
|
||||
*/
|
||||
void squashFromTC(ThreadID tid);
|
||||
|
||||
/** Function to add instruction onto the head of the list of the
|
||||
* instructions. Used when new instructions are fetched.
|
||||
*/
|
||||
ListIt addInst(DynInstPtr &inst);
|
||||
|
||||
/** Function to tell the CPU that an instruction has completed. */
|
||||
void instDone(ThreadID tid, DynInstPtr &inst);
|
||||
|
||||
/** Remove an instruction from the front end of the list. There's
|
||||
* no restriction on location of the instruction.
|
||||
*/
|
||||
void removeFrontInst(DynInstPtr &inst);
|
||||
|
||||
/** Remove all instructions that are not currently in the ROB.
|
||||
* There's also an option to not squash delay slot instructions.*/
|
||||
void removeInstsNotInROB(ThreadID tid);
|
||||
|
||||
/** Remove all instructions younger than the given sequence number. */
|
||||
void removeInstsUntil(const InstSeqNum &seq_num, ThreadID tid);
|
||||
|
||||
/** Removes the instruction pointed to by the iterator. */
|
||||
inline void squashInstIt(const ListIt &instIt, ThreadID tid);
|
||||
|
||||
/** Cleans up all instructions on the remove list. */
|
||||
void cleanUpRemovedInsts();
|
||||
|
||||
/** Debug function to print all instructions on the list. */
|
||||
void dumpInsts();
|
||||
|
||||
public:
|
||||
#ifndef NDEBUG
|
||||
/** Count of total number of dynamic instructions in flight. */
|
||||
int instcount;
|
||||
#endif
|
||||
|
||||
/** List of all the instructions in flight. */
|
||||
std::list<DynInstPtr> instList;
|
||||
|
||||
/** List of all the instructions that will be removed at the end of this
|
||||
* cycle.
|
||||
*/
|
||||
std::queue<ListIt> removeList;
|
||||
|
||||
#ifdef DEBUG
|
||||
/** Debug structure to keep track of the sequence numbers still in
|
||||
* flight.
|
||||
*/
|
||||
std::set<InstSeqNum> snList;
|
||||
#endif
|
||||
|
||||
/** Records if instructions need to be removed this cycle due to
|
||||
* being retired or squashed.
|
||||
*/
|
||||
bool removeInstsThisCycle;
|
||||
|
||||
protected:
|
||||
/** The fetch stage. */
|
||||
typename CPUPolicy::Fetch fetch;
|
||||
|
||||
/** The decode stage. */
|
||||
typename CPUPolicy::Decode decode;
|
||||
|
||||
/** The dispatch stage. */
|
||||
typename CPUPolicy::Rename rename;
|
||||
|
||||
/** The issue/execute/writeback stages. */
|
||||
typename CPUPolicy::IEW iew;
|
||||
|
||||
/** The commit stage. */
|
||||
typename CPUPolicy::Commit commit;
|
||||
|
||||
/** The register file. */
|
||||
typename CPUPolicy::RegFile regFile;
|
||||
|
||||
/** The free list. */
|
||||
typename CPUPolicy::FreeList freeList;
|
||||
|
||||
/** The rename map. */
|
||||
typename CPUPolicy::RenameMap renameMap[Impl::MaxThreads];
|
||||
|
||||
/** The commit rename map. */
|
||||
typename CPUPolicy::RenameMap commitRenameMap[Impl::MaxThreads];
|
||||
|
||||
/** The re-order buffer. */
|
||||
typename CPUPolicy::ROB rob;
|
||||
|
||||
/** Active Threads List */
|
||||
std::list<ThreadID> activeThreads;
|
||||
|
||||
/** Integer Register Scoreboard */
|
||||
Scoreboard scoreboard;
|
||||
|
||||
TheISA::ISA isa[Impl::MaxThreads];
|
||||
|
||||
/** Instruction port. Note that it has to appear after the fetch stage. */
|
||||
IcachePort icachePort;
|
||||
|
||||
/** Data port. Note that it has to appear after the iew stages */
|
||||
DcachePort dcachePort;
|
||||
|
||||
public:
|
||||
/** Enum to give each stage a specific index, so when calling
|
||||
* activateStage() or deactivateStage(), they can specify which stage
|
||||
* is being activated/deactivated.
|
||||
*/
|
||||
enum StageIdx {
|
||||
FetchIdx,
|
||||
DecodeIdx,
|
||||
RenameIdx,
|
||||
IEWIdx,
|
||||
CommitIdx,
|
||||
NumStages };
|
||||
|
||||
/** Typedefs from the Impl to get the structs that each of the
|
||||
* time buffers should use.
|
||||
*/
|
||||
typedef typename CPUPolicy::TimeStruct TimeStruct;
|
||||
|
||||
typedef typename CPUPolicy::FetchStruct FetchStruct;
|
||||
|
||||
typedef typename CPUPolicy::DecodeStruct DecodeStruct;
|
||||
|
||||
typedef typename CPUPolicy::RenameStruct RenameStruct;
|
||||
|
||||
typedef typename CPUPolicy::IEWStruct IEWStruct;
|
||||
|
||||
/** The main time buffer to do backwards communication. */
|
||||
TimeBuffer<TimeStruct> timeBuffer;
|
||||
|
||||
/** The fetch stage's instruction queue. */
|
||||
TimeBuffer<FetchStruct> fetchQueue;
|
||||
|
||||
/** The decode stage's instruction queue. */
|
||||
TimeBuffer<DecodeStruct> decodeQueue;
|
||||
|
||||
/** The rename stage's instruction queue. */
|
||||
TimeBuffer<RenameStruct> renameQueue;
|
||||
|
||||
/** The IEW stage's instruction queue. */
|
||||
TimeBuffer<IEWStruct> iewQueue;
|
||||
|
||||
private:
|
||||
/** The activity recorder; used to tell if the CPU has any
|
||||
* activity remaining or if it can go to idle and deschedule
|
||||
* itself.
|
||||
*/
|
||||
ActivityRecorder activityRec;
|
||||
|
||||
public:
|
||||
/** Records that there was time buffer activity this cycle. */
|
||||
void activityThisCycle() { activityRec.activity(); }
|
||||
|
||||
/** Changes a stage's status to active within the activity recorder. */
|
||||
void activateStage(const StageIdx idx)
|
||||
{ activityRec.activateStage(idx); }
|
||||
|
||||
/** Changes a stage's status to inactive within the activity recorder. */
|
||||
void deactivateStage(const StageIdx idx)
|
||||
{ activityRec.deactivateStage(idx); }
|
||||
|
||||
/** Wakes the CPU, rescheduling the CPU if it's not already active. */
|
||||
void wakeCPU();
|
||||
|
||||
virtual void wakeup();
|
||||
|
||||
/** Gets a free thread id. Use if thread ids change across system. */
|
||||
ThreadID getFreeTid();
|
||||
|
||||
public:
|
||||
/** Returns a pointer to a thread context. */
|
||||
ThreadContext *
|
||||
tcBase(ThreadID tid)
|
||||
{
|
||||
return thread[tid]->getTC();
|
||||
}
|
||||
|
||||
/** The global sequence number counter. */
|
||||
InstSeqNum globalSeqNum;//[Impl::MaxThreads];
|
||||
|
||||
/** Pointer to the checker, which can dynamically verify
|
||||
* instruction results at run time. This can be set to NULL if it
|
||||
* is not being used.
|
||||
*/
|
||||
Checker<Impl> *checker;
|
||||
|
||||
/** Pointer to the system. */
|
||||
System *system;
|
||||
|
||||
/** Event to call process() on once draining has completed. */
|
||||
Event *drainEvent;
|
||||
|
||||
/** Counter of how many stages have completed draining. */
|
||||
int drainCount;
|
||||
|
||||
/** Pointers to all of the threads in the CPU. */
|
||||
std::vector<Thread *> thread;
|
||||
|
||||
/** Whether or not the CPU should defer its registration. */
|
||||
bool deferRegistration;
|
||||
|
||||
/** Is there a context switch pending? */
|
||||
bool contextSwitch;
|
||||
|
||||
/** Threads Scheduled to Enter CPU */
|
||||
std::list<int> cpuWaitList;
|
||||
|
||||
/** The cycle that the CPU was last running, used for statistics. */
|
||||
Tick lastRunningCycle;
|
||||
|
||||
/** The cycle that the CPU was last activated by a new thread*/
|
||||
Tick lastActivatedCycle;
|
||||
|
||||
/** Mapping for system thread id to cpu id */
|
||||
std::map<ThreadID, unsigned> threadMap;
|
||||
|
||||
/** Available thread ids in the cpu*/
|
||||
std::vector<ThreadID> tids;
|
||||
|
||||
/** CPU read function, forwards read to LSQ. */
|
||||
Fault read(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh,
|
||||
uint8_t *data, int load_idx)
|
||||
{
|
||||
return this->iew.ldstQueue.read(req, sreqLow, sreqHigh,
|
||||
data, load_idx);
|
||||
}
|
||||
|
||||
/** CPU write function, forwards write to LSQ. */
|
||||
Fault write(RequestPtr &req, RequestPtr &sreqLow, RequestPtr &sreqHigh,
|
||||
uint8_t *data, int store_idx)
|
||||
{
|
||||
return this->iew.ldstQueue.write(req, sreqLow, sreqHigh,
|
||||
data, store_idx);
|
||||
}
|
||||
|
||||
/** Used by the fetch unit to get a hold of the instruction port. */
|
||||
virtual CpuPort &getInstPort() { return icachePort; }
|
||||
|
||||
/** Get the dcache port (used to find block size for translations). */
|
||||
virtual CpuPort &getDataPort() { return dcachePort; }
|
||||
|
||||
Addr lockAddr;
|
||||
|
||||
/** Temporary fix for the lock flag, works in the UP case. */
|
||||
bool lockFlag;
|
||||
|
||||
/** Stat for total number of times the CPU is descheduled. */
|
||||
Stats::Scalar timesIdled;
|
||||
/** Stat for total number of cycles the CPU spends descheduled. */
|
||||
Stats::Scalar idleCycles;
|
||||
/** Stat for total number of cycles the CPU spends descheduled due to a
|
||||
* quiesce operation or waiting for an interrupt. */
|
||||
Stats::Scalar quiesceCycles;
|
||||
/** Stat for the number of committed instructions per thread. */
|
||||
Stats::Vector committedInsts;
|
||||
/** Stat for the number of committed ops (including micro ops) per thread. */
|
||||
Stats::Vector committedOps;
|
||||
/** Stat for the total number of committed instructions. */
|
||||
Stats::Scalar totalCommittedInsts;
|
||||
/** Stat for the CPI per thread. */
|
||||
Stats::Formula cpi;
|
||||
/** Stat for the total CPI. */
|
||||
Stats::Formula totalCpi;
|
||||
/** Stat for the IPC per thread. */
|
||||
Stats::Formula ipc;
|
||||
/** Stat for the total IPC. */
|
||||
Stats::Formula totalIpc;
|
||||
|
||||
//number of integer register file accesses
|
||||
Stats::Scalar intRegfileReads;
|
||||
Stats::Scalar intRegfileWrites;
|
||||
//number of float register file accesses
|
||||
Stats::Scalar fpRegfileReads;
|
||||
Stats::Scalar fpRegfileWrites;
|
||||
//number of misc
|
||||
Stats::Scalar miscRegfileReads;
|
||||
Stats::Scalar miscRegfileWrites;
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_CPU_HH__
|
||||
80
simulators/gem5/src/cpu/o3/cpu_builder.cc
Normal file
80
simulators/gem5/src/cpu/o3/cpu_builder.cc
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "cpu/o3/cpu.hh"
|
||||
#include "cpu/o3/impl.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
|
||||
class DerivO3CPU : public FullO3CPU<O3CPUImpl>
|
||||
{
|
||||
public:
|
||||
DerivO3CPU(DerivO3CPUParams *p)
|
||||
: FullO3CPU<O3CPUImpl>(p)
|
||||
{ }
|
||||
};
|
||||
|
||||
DerivO3CPU *
|
||||
DerivO3CPUParams::create()
|
||||
{
|
||||
ThreadID actual_num_threads;
|
||||
if (FullSystem) {
|
||||
// Full-system only supports a single thread for the moment.
|
||||
actual_num_threads = 1;
|
||||
} else {
|
||||
if (workload.size() > numThreads) {
|
||||
fatal("Workload Size (%i) > Max Supported Threads (%i) on This CPU",
|
||||
workload.size(), numThreads);
|
||||
} else if (workload.size() == 0) {
|
||||
fatal("Must specify at least one workload!");
|
||||
}
|
||||
|
||||
// In non-full-system mode, we infer the number of threads from
|
||||
// the workload if it's not explicitly specified.
|
||||
actual_num_threads =
|
||||
(numThreads >= workload.size()) ? numThreads : workload.size();
|
||||
}
|
||||
|
||||
numThreads = actual_num_threads;
|
||||
|
||||
// Default smtFetchPolicy to "RoundRobin", if necessary.
|
||||
std::string round_robin_policy = "RoundRobin";
|
||||
std::string single_thread = "SingleThread";
|
||||
|
||||
if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0)
|
||||
smtFetchPolicy = round_robin_policy;
|
||||
else
|
||||
smtFetchPolicy = smtFetchPolicy;
|
||||
|
||||
instShiftAmt = 2;
|
||||
|
||||
return new DerivO3CPU(this);
|
||||
}
|
||||
117
simulators/gem5/src/cpu/o3/cpu_policy.hh
Normal file
117
simulators/gem5/src/cpu/o3/cpu_policy.hh
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_CPU_POLICY_HH__
|
||||
#define __CPU_O3_CPU_POLICY_HH__
|
||||
|
||||
#include "cpu/o3/bpred_unit.hh"
|
||||
#include "cpu/o3/comm.hh"
|
||||
#include "cpu/o3/commit.hh"
|
||||
#include "cpu/o3/decode.hh"
|
||||
#include "cpu/o3/fetch.hh"
|
||||
#include "cpu/o3/free_list.hh"
|
||||
#include "cpu/o3/iew.hh"
|
||||
#include "cpu/o3/inst_queue.hh"
|
||||
#include "cpu/o3/lsq.hh"
|
||||
#include "cpu/o3/lsq_unit.hh"
|
||||
#include "cpu/o3/mem_dep_unit.hh"
|
||||
#include "cpu/o3/regfile.hh"
|
||||
#include "cpu/o3/rename.hh"
|
||||
#include "cpu/o3/rename_map.hh"
|
||||
#include "cpu/o3/rob.hh"
|
||||
#include "cpu/o3/store_set.hh"
|
||||
|
||||
/**
|
||||
* Struct that defines the key classes to be used by the CPU. All
|
||||
* classes use the typedefs defined here to determine what are the
|
||||
* classes of the other stages and communication buffers. In order to
|
||||
* change a structure such as the IQ, simply change the typedef here
|
||||
* to use the desired class instead, and recompile. In order to
|
||||
* create a different CPU to be used simultaneously with this one, see
|
||||
* the alpha_impl.hh file for instructions.
|
||||
*/
|
||||
template<class Impl>
|
||||
struct SimpleCPUPolicy
|
||||
{
|
||||
/** Typedef for the branch prediction unit (which includes the BP,
|
||||
* RAS, and BTB).
|
||||
*/
|
||||
typedef ::BPredUnit<Impl> BPredUnit;
|
||||
/** Typedef for the register file. Most classes assume a unified
|
||||
* physical register file.
|
||||
*/
|
||||
typedef PhysRegFile<Impl> RegFile;
|
||||
/** Typedef for the freelist of registers. */
|
||||
typedef SimpleFreeList FreeList;
|
||||
/** Typedef for the rename map. */
|
||||
typedef SimpleRenameMap RenameMap;
|
||||
/** Typedef for the ROB. */
|
||||
typedef ::ROB<Impl> ROB;
|
||||
/** Typedef for the instruction queue/scheduler. */
|
||||
typedef InstructionQueue<Impl> IQ;
|
||||
/** Typedef for the memory dependence unit. */
|
||||
typedef ::MemDepUnit<StoreSet, Impl> MemDepUnit;
|
||||
/** Typedef for the LSQ. */
|
||||
typedef ::LSQ<Impl> LSQ;
|
||||
/** Typedef for the thread-specific LSQ units. */
|
||||
typedef ::LSQUnit<Impl> LSQUnit;
|
||||
|
||||
/** Typedef for fetch. */
|
||||
typedef DefaultFetch<Impl> Fetch;
|
||||
/** Typedef for decode. */
|
||||
typedef DefaultDecode<Impl> Decode;
|
||||
/** Typedef for rename. */
|
||||
typedef DefaultRename<Impl> Rename;
|
||||
/** Typedef for Issue/Execute/Writeback. */
|
||||
typedef DefaultIEW<Impl> IEW;
|
||||
/** Typedef for commit. */
|
||||
typedef DefaultCommit<Impl> Commit;
|
||||
|
||||
/** The struct for communication between fetch and decode. */
|
||||
typedef DefaultFetchDefaultDecode<Impl> FetchStruct;
|
||||
|
||||
/** The struct for communication between decode and rename. */
|
||||
typedef DefaultDecodeDefaultRename<Impl> DecodeStruct;
|
||||
|
||||
/** The struct for communication between rename and IEW. */
|
||||
typedef DefaultRenameDefaultIEW<Impl> RenameStruct;
|
||||
|
||||
/** The struct for communication between IEW and commit. */
|
||||
typedef DefaultIEWDefaultCommit<Impl> IEWStruct;
|
||||
|
||||
/** The struct for communication within the IEW stage. */
|
||||
typedef ::IssueStruct<Impl> IssueStruct;
|
||||
|
||||
/** The struct for all backwards communication. */
|
||||
typedef TimeBufStruct<Impl> TimeStruct;
|
||||
|
||||
};
|
||||
|
||||
#endif //__CPU_O3_CPU_POLICY_HH__
|
||||
34
simulators/gem5/src/cpu/o3/decode.cc
Normal file
34
simulators/gem5/src/cpu/o3/decode.cc
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "cpu/o3/decode_impl.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
|
||||
template class DefaultDecode<O3CPUImpl>;
|
||||
314
simulators/gem5/src/cpu/o3/decode.hh
Normal file
314
simulators/gem5/src/cpu/o3/decode.hh
Normal file
@ -0,0 +1,314 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_DECODE_HH__
|
||||
#define __CPU_O3_DECODE_HH__
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
|
||||
struct DerivO3CPUParams;
|
||||
|
||||
/**
|
||||
* DefaultDecode class handles both single threaded and SMT
|
||||
* decode. Its width is specified by the parameters; each cycles it
|
||||
* tries to decode that many instructions. Because instructions are
|
||||
* actually decoded when the StaticInst is created, this stage does
|
||||
* not do much other than check any PC-relative branches.
|
||||
*/
|
||||
template<class Impl>
|
||||
class DefaultDecode
|
||||
{
|
||||
private:
|
||||
// Typedefs from the Impl.
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::CPUPol CPUPol;
|
||||
|
||||
// Typedefs from the CPU policy.
|
||||
typedef typename CPUPol::FetchStruct FetchStruct;
|
||||
typedef typename CPUPol::DecodeStruct DecodeStruct;
|
||||
typedef typename CPUPol::TimeStruct TimeStruct;
|
||||
|
||||
public:
|
||||
/** Overall decode stage status. Used to determine if the CPU can
|
||||
* deschedule itself due to a lack of activity.
|
||||
*/
|
||||
enum DecodeStatus {
|
||||
Active,
|
||||
Inactive
|
||||
};
|
||||
|
||||
/** Individual thread status. */
|
||||
enum ThreadStatus {
|
||||
Running,
|
||||
Idle,
|
||||
StartSquash,
|
||||
Squashing,
|
||||
Blocked,
|
||||
Unblocking
|
||||
};
|
||||
|
||||
private:
|
||||
/** Decode status. */
|
||||
DecodeStatus _status;
|
||||
|
||||
/** Per-thread status. */
|
||||
ThreadStatus decodeStatus[Impl::MaxThreads];
|
||||
|
||||
public:
|
||||
/** DefaultDecode constructor. */
|
||||
DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params);
|
||||
|
||||
/** Returns the name of decode. */
|
||||
std::string name() const;
|
||||
|
||||
/** Registers statistics. */
|
||||
void regStats();
|
||||
|
||||
/** Sets the main backwards communication time buffer pointer. */
|
||||
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
||||
|
||||
/** Sets pointer to time buffer used to communicate to the next stage. */
|
||||
void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr);
|
||||
|
||||
/** Sets pointer to time buffer coming from fetch. */
|
||||
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
|
||||
|
||||
/** Sets pointer to list of active threads. */
|
||||
void setActiveThreads(std::list<ThreadID> *at_ptr);
|
||||
|
||||
/** Drains the decode stage. */
|
||||
bool drain();
|
||||
|
||||
/** Resumes execution after a drain. */
|
||||
void resume() { }
|
||||
|
||||
/** Switches out the decode stage. */
|
||||
void switchOut() { }
|
||||
|
||||
/** Takes over from another CPU's thread. */
|
||||
void takeOverFrom();
|
||||
|
||||
/** Ticks decode, processing all input signals and decoding as many
|
||||
* instructions as possible.
|
||||
*/
|
||||
void tick();
|
||||
|
||||
/** Determines what to do based on decode's current status.
|
||||
* @param status_change decode() sets this variable if there was a status
|
||||
* change (ie switching from from blocking to unblocking).
|
||||
* @param tid Thread id to decode instructions from.
|
||||
*/
|
||||
void decode(bool &status_change, ThreadID tid);
|
||||
|
||||
/** Processes instructions from fetch and passes them on to rename.
|
||||
* Decoding of instructions actually happens when they are created in
|
||||
* fetch, so this function mostly checks if PC-relative branches are
|
||||
* correct.
|
||||
*/
|
||||
void decodeInsts(ThreadID tid);
|
||||
|
||||
private:
|
||||
/** Inserts a thread's instructions into the skid buffer, to be decoded
|
||||
* once decode unblocks.
|
||||
*/
|
||||
void skidInsert(ThreadID tid);
|
||||
|
||||
/** Returns if all of the skid buffers are empty. */
|
||||
bool skidsEmpty();
|
||||
|
||||
/** Updates overall decode status based on all of the threads' statuses. */
|
||||
void updateStatus();
|
||||
|
||||
/** Separates instructions from fetch into individual lists of instructions
|
||||
* sorted by thread.
|
||||
*/
|
||||
void sortInsts();
|
||||
|
||||
/** Reads all stall signals from the backwards communication timebuffer. */
|
||||
void readStallSignals(ThreadID tid);
|
||||
|
||||
/** Checks all input signals and updates decode's status appropriately. */
|
||||
bool checkSignalsAndUpdate(ThreadID tid);
|
||||
|
||||
/** Checks all stall signals, and returns if any are true. */
|
||||
bool checkStall(ThreadID tid) const;
|
||||
|
||||
/** Returns if there any instructions from fetch on this cycle. */
|
||||
inline bool fetchInstsValid();
|
||||
|
||||
/** Switches decode to blocking, and signals back that decode has
|
||||
* become blocked.
|
||||
* @return Returns true if there is a status change.
|
||||
*/
|
||||
bool block(ThreadID tid);
|
||||
|
||||
/** Switches decode to unblocking if the skid buffer is empty, and
|
||||
* signals back that decode has unblocked.
|
||||
* @return Returns true if there is a status change.
|
||||
*/
|
||||
bool unblock(ThreadID tid);
|
||||
|
||||
/** Squashes if there is a PC-relative branch that was predicted
|
||||
* incorrectly. Sends squash information back to fetch.
|
||||
*/
|
||||
void squash(DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
public:
|
||||
/** Squashes due to commit signalling a squash. Changes status to
|
||||
* squashing and clears block/unblock signals as needed.
|
||||
*/
|
||||
unsigned squash(ThreadID tid);
|
||||
|
||||
private:
|
||||
// Interfaces to objects outside of decode.
|
||||
/** CPU interface. */
|
||||
O3CPU *cpu;
|
||||
|
||||
/** Time buffer interface. */
|
||||
TimeBuffer<TimeStruct> *timeBuffer;
|
||||
|
||||
/** Wire to get rename's output from backwards time buffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromRename;
|
||||
|
||||
/** Wire to get iew's information from backwards time buffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromIEW;
|
||||
|
||||
/** Wire to get commit's information from backwards time buffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromCommit;
|
||||
|
||||
/** Wire to write information heading to previous stages. */
|
||||
// Might not be the best name as not only fetch will read it.
|
||||
typename TimeBuffer<TimeStruct>::wire toFetch;
|
||||
|
||||
/** Decode instruction queue. */
|
||||
TimeBuffer<DecodeStruct> *decodeQueue;
|
||||
|
||||
/** Wire used to write any information heading to rename. */
|
||||
typename TimeBuffer<DecodeStruct>::wire toRename;
|
||||
|
||||
/** Fetch instruction queue interface. */
|
||||
TimeBuffer<FetchStruct> *fetchQueue;
|
||||
|
||||
/** Wire to get fetch's output from fetch queue. */
|
||||
typename TimeBuffer<FetchStruct>::wire fromFetch;
|
||||
|
||||
/** Queue of all instructions coming from fetch this cycle. */
|
||||
std::queue<DynInstPtr> insts[Impl::MaxThreads];
|
||||
|
||||
/** Skid buffer between fetch and decode. */
|
||||
std::queue<DynInstPtr> skidBuffer[Impl::MaxThreads];
|
||||
|
||||
/** Variable that tracks if decode has written to the time buffer this
|
||||
* cycle. Used to tell CPU if there is activity this cycle.
|
||||
*/
|
||||
bool wroteToTimeBuffer;
|
||||
|
||||
/** Source of possible stalls. */
|
||||
struct Stalls {
|
||||
bool rename;
|
||||
bool iew;
|
||||
bool commit;
|
||||
};
|
||||
|
||||
/** Tracks which stages are telling decode to stall. */
|
||||
Stalls stalls[Impl::MaxThreads];
|
||||
|
||||
/** Rename to decode delay, in ticks. */
|
||||
unsigned renameToDecodeDelay;
|
||||
|
||||
/** IEW to decode delay, in ticks. */
|
||||
unsigned iewToDecodeDelay;
|
||||
|
||||
/** Commit to decode delay, in ticks. */
|
||||
unsigned commitToDecodeDelay;
|
||||
|
||||
/** Fetch to decode delay, in ticks. */
|
||||
unsigned fetchToDecodeDelay;
|
||||
|
||||
/** The width of decode, in instructions. */
|
||||
unsigned decodeWidth;
|
||||
|
||||
/** Index of instructions being sent to rename. */
|
||||
unsigned toRenameIndex;
|
||||
|
||||
/** number of Active Threads*/
|
||||
ThreadID numThreads;
|
||||
|
||||
/** List of active thread ids */
|
||||
std::list<ThreadID> *activeThreads;
|
||||
|
||||
/** Number of branches in flight. */
|
||||
unsigned branchCount[Impl::MaxThreads];
|
||||
|
||||
/** Maximum size of the skid buffer. */
|
||||
unsigned skidBufferMax;
|
||||
|
||||
/** SeqNum of Squashing Branch Delay Instruction (used for MIPS)*/
|
||||
Addr bdelayDoneSeqNum[Impl::MaxThreads];
|
||||
|
||||
/** Instruction used for squashing branch (used for MIPS)*/
|
||||
DynInstPtr squashInst[Impl::MaxThreads];
|
||||
|
||||
/** Tells when their is a pending delay slot inst. to send
|
||||
* to rename. If there is, then wait squash after the next
|
||||
* instruction (used for MIPS).
|
||||
*/
|
||||
bool squashAfterDelaySlot[Impl::MaxThreads];
|
||||
|
||||
|
||||
/** Stat for total number of idle cycles. */
|
||||
Stats::Scalar decodeIdleCycles;
|
||||
/** Stat for total number of blocked cycles. */
|
||||
Stats::Scalar decodeBlockedCycles;
|
||||
/** Stat for total number of normal running cycles. */
|
||||
Stats::Scalar decodeRunCycles;
|
||||
/** Stat for total number of unblocking cycles. */
|
||||
Stats::Scalar decodeUnblockCycles;
|
||||
/** Stat for total number of squashing cycles. */
|
||||
Stats::Scalar decodeSquashCycles;
|
||||
/** Stat for number of times a branch is resolved at decode. */
|
||||
Stats::Scalar decodeBranchResolved;
|
||||
/** Stat for number of times a branch mispredict is detected. */
|
||||
Stats::Scalar decodeBranchMispred;
|
||||
/** Stat for number of times decode detected a non-control instruction
|
||||
* incorrectly predicted as a branch.
|
||||
*/
|
||||
Stats::Scalar decodeControlMispred;
|
||||
/** Stat for total number of decoded instructions. */
|
||||
Stats::Scalar decodeDecodedInsts;
|
||||
/** Stat for total number of squashed instructions. */
|
||||
Stats::Scalar decodeSquashedInsts;
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_DECODE_HH__
|
||||
761
simulators/gem5/src/cpu/o3/decode_impl.hh
Normal file
761
simulators/gem5/src/cpu/o3/decode_impl.hh
Normal file
@ -0,0 +1,761 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "arch/types.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/decode.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "debug/Activity.hh"
|
||||
#include "debug/Decode.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
#include "sim/full_system.hh"
|
||||
|
||||
// clang complains about std::set being overloaded with Packet::set if
|
||||
// we open up the entire namespace std
|
||||
using std::list;
|
||||
|
||||
template<class Impl>
|
||||
DefaultDecode<Impl>::DefaultDecode(O3CPU *_cpu, DerivO3CPUParams *params)
|
||||
: cpu(_cpu),
|
||||
renameToDecodeDelay(params->renameToDecodeDelay),
|
||||
iewToDecodeDelay(params->iewToDecodeDelay),
|
||||
commitToDecodeDelay(params->commitToDecodeDelay),
|
||||
fetchToDecodeDelay(params->fetchToDecodeDelay),
|
||||
decodeWidth(params->decodeWidth),
|
||||
numThreads(params->numThreads)
|
||||
{
|
||||
_status = Inactive;
|
||||
|
||||
// Setup status, make sure stall signals are clear.
|
||||
for (ThreadID tid = 0; tid < numThreads; ++tid) {
|
||||
decodeStatus[tid] = Idle;
|
||||
|
||||
stalls[tid].rename = false;
|
||||
stalls[tid].iew = false;
|
||||
stalls[tid].commit = false;
|
||||
}
|
||||
|
||||
// @todo: Make into a parameter
|
||||
skidBufferMax = (fetchToDecodeDelay + 1) * params->fetchWidth;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
std::string
|
||||
DefaultDecode<Impl>::name() const
|
||||
{
|
||||
return cpu->name() + ".decode";
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::regStats()
|
||||
{
|
||||
decodeIdleCycles
|
||||
.name(name() + ".IdleCycles")
|
||||
.desc("Number of cycles decode is idle")
|
||||
.prereq(decodeIdleCycles);
|
||||
decodeBlockedCycles
|
||||
.name(name() + ".BlockedCycles")
|
||||
.desc("Number of cycles decode is blocked")
|
||||
.prereq(decodeBlockedCycles);
|
||||
decodeRunCycles
|
||||
.name(name() + ".RunCycles")
|
||||
.desc("Number of cycles decode is running")
|
||||
.prereq(decodeRunCycles);
|
||||
decodeUnblockCycles
|
||||
.name(name() + ".UnblockCycles")
|
||||
.desc("Number of cycles decode is unblocking")
|
||||
.prereq(decodeUnblockCycles);
|
||||
decodeSquashCycles
|
||||
.name(name() + ".SquashCycles")
|
||||
.desc("Number of cycles decode is squashing")
|
||||
.prereq(decodeSquashCycles);
|
||||
decodeBranchResolved
|
||||
.name(name() + ".BranchResolved")
|
||||
.desc("Number of times decode resolved a branch")
|
||||
.prereq(decodeBranchResolved);
|
||||
decodeBranchMispred
|
||||
.name(name() + ".BranchMispred")
|
||||
.desc("Number of times decode detected a branch misprediction")
|
||||
.prereq(decodeBranchMispred);
|
||||
decodeControlMispred
|
||||
.name(name() + ".ControlMispred")
|
||||
.desc("Number of times decode detected an instruction incorrectly"
|
||||
" predicted as a control")
|
||||
.prereq(decodeControlMispred);
|
||||
decodeDecodedInsts
|
||||
.name(name() + ".DecodedInsts")
|
||||
.desc("Number of instructions handled by decode")
|
||||
.prereq(decodeDecodedInsts);
|
||||
decodeSquashedInsts
|
||||
.name(name() + ".SquashedInsts")
|
||||
.desc("Number of squashed instructions handled by decode")
|
||||
.prereq(decodeSquashedInsts);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr)
|
||||
{
|
||||
timeBuffer = tb_ptr;
|
||||
|
||||
// Setup wire to write information back to fetch.
|
||||
toFetch = timeBuffer->getWire(0);
|
||||
|
||||
// Create wires to get information from proper places in time buffer.
|
||||
fromRename = timeBuffer->getWire(-renameToDecodeDelay);
|
||||
fromIEW = timeBuffer->getWire(-iewToDecodeDelay);
|
||||
fromCommit = timeBuffer->getWire(-commitToDecodeDelay);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr)
|
||||
{
|
||||
decodeQueue = dq_ptr;
|
||||
|
||||
// Setup wire to write information to proper place in decode queue.
|
||||
toRename = decodeQueue->getWire(0);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr)
|
||||
{
|
||||
fetchQueue = fq_ptr;
|
||||
|
||||
// Setup wire to read information from fetch queue.
|
||||
fromFetch = fetchQueue->getWire(-fetchToDecodeDelay);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::setActiveThreads(std::list<ThreadID> *at_ptr)
|
||||
{
|
||||
activeThreads = at_ptr;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::drain()
|
||||
{
|
||||
// Decode is done draining at any time.
|
||||
cpu->signalDrained();
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::takeOverFrom()
|
||||
{
|
||||
_status = Inactive;
|
||||
|
||||
// Be sure to reset state and clear out any old instructions.
|
||||
for (ThreadID tid = 0; tid < numThreads; ++tid) {
|
||||
decodeStatus[tid] = Idle;
|
||||
|
||||
stalls[tid].rename = false;
|
||||
stalls[tid].iew = false;
|
||||
stalls[tid].commit = false;
|
||||
while (!insts[tid].empty())
|
||||
insts[tid].pop();
|
||||
while (!skidBuffer[tid].empty())
|
||||
skidBuffer[tid].pop();
|
||||
branchCount[tid] = 0;
|
||||
}
|
||||
wroteToTimeBuffer = false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::checkStall(ThreadID tid) const
|
||||
{
|
||||
bool ret_val = false;
|
||||
|
||||
if (stalls[tid].rename) {
|
||||
DPRINTF(Decode,"[tid:%i]: Stall fom Rename stage detected.\n", tid);
|
||||
ret_val = true;
|
||||
} else if (stalls[tid].iew) {
|
||||
DPRINTF(Decode,"[tid:%i]: Stall fom IEW stage detected.\n", tid);
|
||||
ret_val = true;
|
||||
} else if (stalls[tid].commit) {
|
||||
DPRINTF(Decode,"[tid:%i]: Stall fom Commit stage detected.\n", tid);
|
||||
ret_val = true;
|
||||
}
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
inline bool
|
||||
DefaultDecode<Impl>::fetchInstsValid()
|
||||
{
|
||||
return fromFetch->size > 0;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::block(ThreadID tid)
|
||||
{
|
||||
DPRINTF(Decode, "[tid:%u]: Blocking.\n", tid);
|
||||
|
||||
// Add the current inputs to the skid buffer so they can be
|
||||
// reprocessed when this stage unblocks.
|
||||
skidInsert(tid);
|
||||
|
||||
// If the decode status is blocked or unblocking then decode has not yet
|
||||
// signalled fetch to unblock. In that case, there is no need to tell
|
||||
// fetch to block.
|
||||
if (decodeStatus[tid] != Blocked) {
|
||||
// Set the status to Blocked.
|
||||
decodeStatus[tid] = Blocked;
|
||||
|
||||
if (decodeStatus[tid] != Unblocking) {
|
||||
toFetch->decodeBlock[tid] = true;
|
||||
wroteToTimeBuffer = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::unblock(ThreadID tid)
|
||||
{
|
||||
// Decode is done unblocking only if the skid buffer is empty.
|
||||
if (skidBuffer[tid].empty()) {
|
||||
DPRINTF(Decode, "[tid:%u]: Done unblocking.\n", tid);
|
||||
toFetch->decodeUnblock[tid] = true;
|
||||
wroteToTimeBuffer = true;
|
||||
|
||||
decodeStatus[tid] = Running;
|
||||
return true;
|
||||
}
|
||||
|
||||
DPRINTF(Decode, "[tid:%u]: Currently unblocking.\n", tid);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::squash(DynInstPtr &inst, ThreadID tid)
|
||||
{
|
||||
DPRINTF(Decode, "[tid:%i]: [sn:%i] Squashing due to incorrect branch "
|
||||
"prediction detected at decode.\n", tid, inst->seqNum);
|
||||
|
||||
// Send back mispredict information.
|
||||
toFetch->decodeInfo[tid].branchMispredict = true;
|
||||
toFetch->decodeInfo[tid].predIncorrect = true;
|
||||
toFetch->decodeInfo[tid].mispredictInst = inst;
|
||||
toFetch->decodeInfo[tid].squash = true;
|
||||
toFetch->decodeInfo[tid].doneSeqNum = inst->seqNum;
|
||||
toFetch->decodeInfo[tid].nextPC = inst->branchTarget();
|
||||
toFetch->decodeInfo[tid].branchTaken = inst->pcState().branching();
|
||||
toFetch->decodeInfo[tid].squashInst = inst;
|
||||
if (toFetch->decodeInfo[tid].mispredictInst->isUncondCtrl()) {
|
||||
toFetch->decodeInfo[tid].branchTaken = true;
|
||||
}
|
||||
|
||||
InstSeqNum squash_seq_num = inst->seqNum;
|
||||
|
||||
// Might have to tell fetch to unblock.
|
||||
if (decodeStatus[tid] == Blocked ||
|
||||
decodeStatus[tid] == Unblocking) {
|
||||
toFetch->decodeUnblock[tid] = 1;
|
||||
}
|
||||
|
||||
// Set status to squashing.
|
||||
decodeStatus[tid] = Squashing;
|
||||
|
||||
for (int i=0; i<fromFetch->size; i++) {
|
||||
if (fromFetch->insts[i]->threadNumber == tid &&
|
||||
fromFetch->insts[i]->seqNum > squash_seq_num) {
|
||||
fromFetch->insts[i]->setSquashed();
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the instruction list and skid buffer in case they have any
|
||||
// insts in them.
|
||||
while (!insts[tid].empty()) {
|
||||
insts[tid].pop();
|
||||
}
|
||||
|
||||
while (!skidBuffer[tid].empty()) {
|
||||
skidBuffer[tid].pop();
|
||||
}
|
||||
|
||||
// Squash instructions up until this one
|
||||
cpu->removeInstsUntil(squash_seq_num, tid);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
unsigned
|
||||
DefaultDecode<Impl>::squash(ThreadID tid)
|
||||
{
|
||||
DPRINTF(Decode, "[tid:%i]: Squashing.\n",tid);
|
||||
|
||||
if (decodeStatus[tid] == Blocked ||
|
||||
decodeStatus[tid] == Unblocking) {
|
||||
if (FullSystem) {
|
||||
toFetch->decodeUnblock[tid] = 1;
|
||||
} else {
|
||||
// In syscall emulation, we can have both a block and a squash due
|
||||
// to a syscall in the same cycle. This would cause both signals
|
||||
// to be high. This shouldn't happen in full system.
|
||||
// @todo: Determine if this still happens.
|
||||
if (toFetch->decodeBlock[tid])
|
||||
toFetch->decodeBlock[tid] = 0;
|
||||
else
|
||||
toFetch->decodeUnblock[tid] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Set status to squashing.
|
||||
decodeStatus[tid] = Squashing;
|
||||
|
||||
// Go through incoming instructions from fetch and squash them.
|
||||
unsigned squash_count = 0;
|
||||
|
||||
for (int i=0; i<fromFetch->size; i++) {
|
||||
if (fromFetch->insts[i]->threadNumber == tid) {
|
||||
fromFetch->insts[i]->setSquashed();
|
||||
squash_count++;
|
||||
}
|
||||
}
|
||||
|
||||
// Clear the instruction list and skid buffer in case they have any
|
||||
// insts in them.
|
||||
while (!insts[tid].empty()) {
|
||||
insts[tid].pop();
|
||||
}
|
||||
|
||||
while (!skidBuffer[tid].empty()) {
|
||||
skidBuffer[tid].pop();
|
||||
}
|
||||
|
||||
return squash_count;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::skidInsert(ThreadID tid)
|
||||
{
|
||||
DynInstPtr inst = NULL;
|
||||
|
||||
while (!insts[tid].empty()) {
|
||||
inst = insts[tid].front();
|
||||
|
||||
insts[tid].pop();
|
||||
|
||||
assert(tid == inst->threadNumber);
|
||||
|
||||
DPRINTF(Decode,"Inserting [sn:%lli] PC: %s into decode skidBuffer %i\n",
|
||||
inst->seqNum, inst->pcState(), inst->threadNumber);
|
||||
|
||||
skidBuffer[tid].push(inst);
|
||||
}
|
||||
|
||||
// @todo: Eventually need to enforce this by not letting a thread
|
||||
// fetch past its skidbuffer
|
||||
assert(skidBuffer[tid].size() <= skidBufferMax);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::skidsEmpty()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
if (!skidBuffer[tid].empty())
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::updateStatus()
|
||||
{
|
||||
bool any_unblocking = false;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (decodeStatus[tid] == Unblocking) {
|
||||
any_unblocking = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Decode will have activity if it's unblocking.
|
||||
if (any_unblocking) {
|
||||
if (_status == Inactive) {
|
||||
_status = Active;
|
||||
|
||||
DPRINTF(Activity, "Activating stage.\n");
|
||||
|
||||
cpu->activateStage(O3CPU::DecodeIdx);
|
||||
}
|
||||
} else {
|
||||
// If it's not unblocking, then decode will not have any internal
|
||||
// activity. Switch it to inactive.
|
||||
if (_status == Active) {
|
||||
_status = Inactive;
|
||||
DPRINTF(Activity, "Deactivating stage.\n");
|
||||
|
||||
cpu->deactivateStage(O3CPU::DecodeIdx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::sortInsts()
|
||||
{
|
||||
int insts_from_fetch = fromFetch->size;
|
||||
for (int i = 0; i < insts_from_fetch; ++i) {
|
||||
insts[fromFetch->insts[i]->threadNumber].push(fromFetch->insts[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::readStallSignals(ThreadID tid)
|
||||
{
|
||||
if (fromRename->renameBlock[tid]) {
|
||||
stalls[tid].rename = true;
|
||||
}
|
||||
|
||||
if (fromRename->renameUnblock[tid]) {
|
||||
assert(stalls[tid].rename);
|
||||
stalls[tid].rename = false;
|
||||
}
|
||||
|
||||
if (fromIEW->iewBlock[tid]) {
|
||||
stalls[tid].iew = true;
|
||||
}
|
||||
|
||||
if (fromIEW->iewUnblock[tid]) {
|
||||
assert(stalls[tid].iew);
|
||||
stalls[tid].iew = false;
|
||||
}
|
||||
|
||||
if (fromCommit->commitBlock[tid]) {
|
||||
stalls[tid].commit = true;
|
||||
}
|
||||
|
||||
if (fromCommit->commitUnblock[tid]) {
|
||||
assert(stalls[tid].commit);
|
||||
stalls[tid].commit = false;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
DefaultDecode<Impl>::checkSignalsAndUpdate(ThreadID tid)
|
||||
{
|
||||
// Check if there's a squash signal, squash if there is.
|
||||
// Check stall signals, block if necessary.
|
||||
// If status was blocked
|
||||
// Check if stall conditions have passed
|
||||
// if so then go to unblocking
|
||||
// If status was Squashing
|
||||
// check if squashing is not high. Switch to running this cycle.
|
||||
|
||||
// Update the per thread stall statuses.
|
||||
readStallSignals(tid);
|
||||
|
||||
// Check squash signals from commit.
|
||||
if (fromCommit->commitInfo[tid].squash) {
|
||||
|
||||
DPRINTF(Decode, "[tid:%u]: Squashing instructions due to squash "
|
||||
"from commit.\n", tid);
|
||||
|
||||
squash(tid);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check ROB squash signals from commit.
|
||||
if (fromCommit->commitInfo[tid].robSquashing) {
|
||||
DPRINTF(Decode, "[tid:%u]: ROB is still squashing.\n", tid);
|
||||
|
||||
// Continue to squash.
|
||||
decodeStatus[tid] = Squashing;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (checkStall(tid)) {
|
||||
return block(tid);
|
||||
}
|
||||
|
||||
if (decodeStatus[tid] == Blocked) {
|
||||
DPRINTF(Decode, "[tid:%u]: Done blocking, switching to unblocking.\n",
|
||||
tid);
|
||||
|
||||
decodeStatus[tid] = Unblocking;
|
||||
|
||||
unblock(tid);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (decodeStatus[tid] == Squashing) {
|
||||
// Switch status to running if decode isn't being told to block or
|
||||
// squash this cycle.
|
||||
DPRINTF(Decode, "[tid:%u]: Done squashing, switching to running.\n",
|
||||
tid);
|
||||
|
||||
decodeStatus[tid] = Running;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// If we've reached this point, we have not gotten any signals that
|
||||
// cause decode to change its status. Decode remains the same as before.
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::tick()
|
||||
{
|
||||
wroteToTimeBuffer = false;
|
||||
|
||||
bool status_change = false;
|
||||
|
||||
toRenameIndex = 0;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
sortInsts();
|
||||
|
||||
//Check stall and squash signals.
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
DPRINTF(Decode,"Processing [tid:%i]\n",tid);
|
||||
status_change = checkSignalsAndUpdate(tid) || status_change;
|
||||
|
||||
decode(status_change, tid);
|
||||
}
|
||||
|
||||
if (status_change) {
|
||||
updateStatus();
|
||||
}
|
||||
|
||||
if (wroteToTimeBuffer) {
|
||||
DPRINTF(Activity, "Activity this cycle.\n");
|
||||
|
||||
cpu->activityThisCycle();
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::decode(bool &status_change, ThreadID tid)
|
||||
{
|
||||
// If status is Running or idle,
|
||||
// call decodeInsts()
|
||||
// If status is Unblocking,
|
||||
// buffer any instructions coming from fetch
|
||||
// continue trying to empty skid buffer
|
||||
// check if stall conditions have passed
|
||||
|
||||
if (decodeStatus[tid] == Blocked) {
|
||||
++decodeBlockedCycles;
|
||||
} else if (decodeStatus[tid] == Squashing) {
|
||||
++decodeSquashCycles;
|
||||
}
|
||||
|
||||
// Decode should try to decode as many instructions as its bandwidth
|
||||
// will allow, as long as it is not currently blocked.
|
||||
if (decodeStatus[tid] == Running ||
|
||||
decodeStatus[tid] == Idle) {
|
||||
DPRINTF(Decode, "[tid:%u]: Not blocked, so attempting to run "
|
||||
"stage.\n",tid);
|
||||
|
||||
decodeInsts(tid);
|
||||
} else if (decodeStatus[tid] == Unblocking) {
|
||||
// Make sure that the skid buffer has something in it if the
|
||||
// status is unblocking.
|
||||
assert(!skidsEmpty());
|
||||
|
||||
// If the status was unblocking, then instructions from the skid
|
||||
// buffer were used. Remove those instructions and handle
|
||||
// the rest of unblocking.
|
||||
decodeInsts(tid);
|
||||
|
||||
if (fetchInstsValid()) {
|
||||
// Add the current inputs to the skid buffer so they can be
|
||||
// reprocessed when this stage unblocks.
|
||||
skidInsert(tid);
|
||||
}
|
||||
|
||||
status_change = unblock(tid) || status_change;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
DefaultDecode<Impl>::decodeInsts(ThreadID tid)
|
||||
{
|
||||
// Instructions can come either from the skid buffer or the list of
|
||||
// instructions coming from fetch, depending on decode's status.
|
||||
int insts_available = decodeStatus[tid] == Unblocking ?
|
||||
skidBuffer[tid].size() : insts[tid].size();
|
||||
|
||||
if (insts_available == 0) {
|
||||
DPRINTF(Decode, "[tid:%u] Nothing to do, breaking out"
|
||||
" early.\n",tid);
|
||||
// Should I change the status to idle?
|
||||
++decodeIdleCycles;
|
||||
return;
|
||||
} else if (decodeStatus[tid] == Unblocking) {
|
||||
DPRINTF(Decode, "[tid:%u] Unblocking, removing insts from skid "
|
||||
"buffer.\n",tid);
|
||||
++decodeUnblockCycles;
|
||||
} else if (decodeStatus[tid] == Running) {
|
||||
++decodeRunCycles;
|
||||
}
|
||||
|
||||
DynInstPtr inst;
|
||||
|
||||
std::queue<DynInstPtr>
|
||||
&insts_to_decode = decodeStatus[tid] == Unblocking ?
|
||||
skidBuffer[tid] : insts[tid];
|
||||
|
||||
DPRINTF(Decode, "[tid:%u]: Sending instruction to rename.\n",tid);
|
||||
|
||||
while (insts_available > 0 && toRenameIndex < decodeWidth) {
|
||||
assert(!insts_to_decode.empty());
|
||||
|
||||
inst = insts_to_decode.front();
|
||||
|
||||
insts_to_decode.pop();
|
||||
|
||||
DPRINTF(Decode, "[tid:%u]: Processing instruction [sn:%lli] with "
|
||||
"PC %s\n", tid, inst->seqNum, inst->pcState());
|
||||
|
||||
if (inst->isSquashed()) {
|
||||
DPRINTF(Decode, "[tid:%u]: Instruction %i with PC %s is "
|
||||
"squashed, skipping.\n",
|
||||
tid, inst->seqNum, inst->pcState());
|
||||
|
||||
++decodeSquashedInsts;
|
||||
|
||||
--insts_available;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
// Also check if instructions have no source registers. Mark
|
||||
// them as ready to issue at any time. Not sure if this check
|
||||
// should exist here or at a later stage; however it doesn't matter
|
||||
// too much for function correctness.
|
||||
if (inst->numSrcRegs() == 0) {
|
||||
inst->setCanIssue();
|
||||
}
|
||||
|
||||
// This current instruction is valid, so add it into the decode
|
||||
// queue. The next instruction may not be valid, so check to
|
||||
// see if branches were predicted correctly.
|
||||
toRename->insts[toRenameIndex] = inst;
|
||||
|
||||
++(toRename->size);
|
||||
++toRenameIndex;
|
||||
++decodeDecodedInsts;
|
||||
--insts_available;
|
||||
|
||||
#if TRACING_ON
|
||||
inst->decodeTick = curTick() - inst->fetchTick;
|
||||
#endif
|
||||
|
||||
// Ensure that if it was predicted as a branch, it really is a
|
||||
// branch.
|
||||
if (inst->readPredTaken() && !inst->isControl()) {
|
||||
panic("Instruction predicted as a branch!");
|
||||
|
||||
++decodeControlMispred;
|
||||
|
||||
// Might want to set some sort of boolean and just do
|
||||
// a check at the end
|
||||
squash(inst, inst->threadNumber);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Go ahead and compute any PC-relative branches.
|
||||
if (inst->isDirectCtrl() && inst->isUncondCtrl()) {
|
||||
++decodeBranchResolved;
|
||||
|
||||
if (!(inst->branchTarget() == inst->readPredTarg())) {
|
||||
++decodeBranchMispred;
|
||||
|
||||
// Might want to set some sort of boolean and just do
|
||||
// a check at the end
|
||||
squash(inst, inst->threadNumber);
|
||||
TheISA::PCState target = inst->branchTarget();
|
||||
|
||||
DPRINTF(Decode, "[sn:%i]: Updating predictions: PredPC: %s\n",
|
||||
inst->seqNum, target);
|
||||
//The micro pc after an instruction level branch should be 0
|
||||
inst->setPredTarg(target);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we didn't process all instructions, then we will need to block
|
||||
// and put all those instructions into the skid buffer.
|
||||
if (!insts_to_decode.empty()) {
|
||||
block(tid);
|
||||
}
|
||||
|
||||
// Record that decode has written to the time buffer for activity
|
||||
// tracking.
|
||||
if (toRenameIndex) {
|
||||
wroteToTimeBuffer = true;
|
||||
}
|
||||
}
|
||||
272
simulators/gem5/src/cpu/o3/dep_graph.hh
Normal file
272
simulators/gem5/src/cpu/o3/dep_graph.hh
Normal file
@ -0,0 +1,272 @@
|
||||
/*
|
||||
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_DEP_GRAPH_HH__
|
||||
#define __CPU_O3_DEP_GRAPH_HH__
|
||||
|
||||
#include "cpu/o3/comm.hh"
|
||||
|
||||
/** Node in a linked list. */
|
||||
template <class DynInstPtr>
|
||||
class DependencyEntry
|
||||
{
|
||||
public:
|
||||
DependencyEntry()
|
||||
: inst(NULL), next(NULL)
|
||||
{ }
|
||||
|
||||
DynInstPtr inst;
|
||||
//Might want to include data about what arch. register the
|
||||
//dependence is waiting on.
|
||||
DependencyEntry<DynInstPtr> *next;
|
||||
};
|
||||
|
||||
/** Array of linked list that maintains the dependencies between
|
||||
* producing instructions and consuming instructions. Each linked
|
||||
* list represents a single physical register, having the future
|
||||
* producer of the register's value, and all consumers waiting on that
|
||||
* value on the list. The head node of each linked list represents
|
||||
* the producing instruction of that register. Instructions are put
|
||||
* on the list upon reaching the IQ, and are removed from the list
|
||||
* either when the producer completes, or the instruction is squashed.
|
||||
*/
|
||||
template <class DynInstPtr>
|
||||
class DependencyGraph
|
||||
{
|
||||
public:
|
||||
typedef DependencyEntry<DynInstPtr> DepEntry;
|
||||
|
||||
/** Default construction. Must call resize() prior to use. */
|
||||
DependencyGraph()
|
||||
: numEntries(0), memAllocCounter(0), nodesTraversed(0), nodesRemoved(0)
|
||||
{ }
|
||||
|
||||
~DependencyGraph();
|
||||
|
||||
/** Resize the dependency graph to have num_entries registers. */
|
||||
void resize(int num_entries);
|
||||
|
||||
/** Clears all of the linked lists. */
|
||||
void reset();
|
||||
|
||||
/** Inserts an instruction to be dependent on the given index. */
|
||||
void insert(PhysRegIndex idx, DynInstPtr &new_inst);
|
||||
|
||||
/** Sets the producing instruction of a given register. */
|
||||
void setInst(PhysRegIndex idx, DynInstPtr &new_inst)
|
||||
{ dependGraph[idx].inst = new_inst; }
|
||||
|
||||
/** Clears the producing instruction. */
|
||||
void clearInst(PhysRegIndex idx)
|
||||
{ dependGraph[idx].inst = NULL; }
|
||||
|
||||
/** Removes an instruction from a single linked list. */
|
||||
void remove(PhysRegIndex idx, DynInstPtr &inst_to_remove);
|
||||
|
||||
/** Removes and returns the newest dependent of a specific register. */
|
||||
DynInstPtr pop(PhysRegIndex idx);
|
||||
|
||||
/** Checks if there are any dependents on a specific register. */
|
||||
bool empty(PhysRegIndex idx) { return !dependGraph[idx].next; }
|
||||
|
||||
/** Debugging function to dump out the dependency graph.
|
||||
*/
|
||||
void dump();
|
||||
|
||||
private:
|
||||
/** Array of linked lists. Each linked list is a list of all the
|
||||
* instructions that depend upon a given register. The actual
|
||||
* register's index is used to index into the graph; ie all
|
||||
* instructions in flight that are dependent upon r34 will be
|
||||
* in the linked list of dependGraph[34].
|
||||
*/
|
||||
DepEntry *dependGraph;
|
||||
|
||||
/** Number of linked lists; identical to the number of registers. */
|
||||
int numEntries;
|
||||
|
||||
// Debug variable, remove when done testing.
|
||||
unsigned memAllocCounter;
|
||||
|
||||
public:
|
||||
// Debug variable, remove when done testing.
|
||||
uint64_t nodesTraversed;
|
||||
// Debug variable, remove when done testing.
|
||||
uint64_t nodesRemoved;
|
||||
};
|
||||
|
||||
template <class DynInstPtr>
|
||||
DependencyGraph<DynInstPtr>::~DependencyGraph()
|
||||
{
|
||||
delete [] dependGraph;
|
||||
}
|
||||
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
DependencyGraph<DynInstPtr>::resize(int num_entries)
|
||||
{
|
||||
numEntries = num_entries;
|
||||
dependGraph = new DepEntry[numEntries];
|
||||
}
|
||||
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
DependencyGraph<DynInstPtr>::reset()
|
||||
{
|
||||
// Clear the dependency graph
|
||||
DepEntry *curr;
|
||||
DepEntry *prev;
|
||||
|
||||
for (int i = 0; i < numEntries; ++i) {
|
||||
curr = dependGraph[i].next;
|
||||
|
||||
while (curr) {
|
||||
memAllocCounter--;
|
||||
|
||||
prev = curr;
|
||||
curr = prev->next;
|
||||
prev->inst = NULL;
|
||||
|
||||
delete prev;
|
||||
}
|
||||
|
||||
if (dependGraph[i].inst) {
|
||||
dependGraph[i].inst = NULL;
|
||||
}
|
||||
|
||||
dependGraph[i].next = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
DependencyGraph<DynInstPtr>::insert(PhysRegIndex idx, DynInstPtr &new_inst)
|
||||
{
|
||||
//Add this new, dependent instruction at the head of the dependency
|
||||
//chain.
|
||||
|
||||
// First create the entry that will be added to the head of the
|
||||
// dependency chain.
|
||||
DepEntry *new_entry = new DepEntry;
|
||||
new_entry->next = dependGraph[idx].next;
|
||||
new_entry->inst = new_inst;
|
||||
|
||||
// Then actually add it to the chain.
|
||||
dependGraph[idx].next = new_entry;
|
||||
|
||||
++memAllocCounter;
|
||||
}
|
||||
|
||||
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
DependencyGraph<DynInstPtr>::remove(PhysRegIndex idx,
|
||||
DynInstPtr &inst_to_remove)
|
||||
{
|
||||
DepEntry *prev = &dependGraph[idx];
|
||||
DepEntry *curr = dependGraph[idx].next;
|
||||
|
||||
// Make sure curr isn't NULL. Because this instruction is being
|
||||
// removed from a dependency list, it must have been placed there at
|
||||
// an earlier time. The dependency chain should not be empty,
|
||||
// unless the instruction dependent upon it is already ready.
|
||||
if (curr == NULL) {
|
||||
return;
|
||||
}
|
||||
|
||||
nodesRemoved++;
|
||||
|
||||
// Find the instruction to remove within the dependency linked list.
|
||||
while (curr->inst != inst_to_remove) {
|
||||
prev = curr;
|
||||
curr = curr->next;
|
||||
nodesTraversed++;
|
||||
|
||||
assert(curr != NULL);
|
||||
}
|
||||
|
||||
// Now remove this instruction from the list.
|
||||
prev->next = curr->next;
|
||||
|
||||
--memAllocCounter;
|
||||
|
||||
// Could push this off to the destructor of DependencyEntry
|
||||
curr->inst = NULL;
|
||||
|
||||
delete curr;
|
||||
}
|
||||
|
||||
template <class DynInstPtr>
|
||||
DynInstPtr
|
||||
DependencyGraph<DynInstPtr>::pop(PhysRegIndex idx)
|
||||
{
|
||||
DepEntry *node;
|
||||
node = dependGraph[idx].next;
|
||||
DynInstPtr inst = NULL;
|
||||
if (node) {
|
||||
inst = node->inst;
|
||||
dependGraph[idx].next = node->next;
|
||||
node->inst = NULL;
|
||||
memAllocCounter--;
|
||||
delete node;
|
||||
}
|
||||
return inst;
|
||||
}
|
||||
|
||||
template <class DynInstPtr>
|
||||
void
|
||||
DependencyGraph<DynInstPtr>::dump()
|
||||
{
|
||||
DepEntry *curr;
|
||||
|
||||
for (int i = 0; i < numEntries; ++i)
|
||||
{
|
||||
curr = &dependGraph[i];
|
||||
|
||||
if (curr->inst) {
|
||||
cprintf("dependGraph[%i]: producer: %s [sn:%lli] consumer: ",
|
||||
i, curr->inst->pcState(), curr->inst->seqNum);
|
||||
} else {
|
||||
cprintf("dependGraph[%i]: No producer. consumer: ", i);
|
||||
}
|
||||
|
||||
while (curr->next != NULL) {
|
||||
curr = curr->next;
|
||||
|
||||
cprintf("%s [sn:%lli] ",
|
||||
curr->inst->pcState(), curr->inst->seqNum);
|
||||
}
|
||||
|
||||
cprintf("\n");
|
||||
}
|
||||
cprintf("memAllocCounter: %i\n", memAllocCounter);
|
||||
}
|
||||
|
||||
#endif // __CPU_O3_DEP_GRAPH_HH__
|
||||
36
simulators/gem5/src/cpu/o3/dyn_inst.cc
Normal file
36
simulators/gem5/src/cpu/o3/dyn_inst.cc
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Gabe Black
|
||||
*/
|
||||
|
||||
#include "cpu/o3/dyn_inst_impl.hh"
|
||||
#include "cpu/o3/impl.hh"
|
||||
|
||||
// Force instantiation of BaseO3DynInst for all the implementations that
|
||||
// are needed.
|
||||
template class BaseO3DynInst<O3CPUImpl>;
|
||||
298
simulators/gem5/src/cpu/o3/dyn_inst.hh
Normal file
298
simulators/gem5/src/cpu/o3/dyn_inst.hh
Normal file
@ -0,0 +1,298 @@
|
||||
/*
|
||||
* Copyright (c) 2010 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_DYN_INST_HH__
|
||||
#define __CPU_O3_DYN_INST_HH__
|
||||
|
||||
#include "arch/isa_traits.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/cpu.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
#include "cpu/base_dyn_inst.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
|
||||
class Packet;
|
||||
|
||||
/**
|
||||
* Mostly implementation & ISA specific AlphaDynInst. As with most
|
||||
* other classes in the new CPU model, it is templated on the Impl to
|
||||
* allow for passing in of all types, such as the CPU type and the ISA
|
||||
* type. The AlphaDynInst serves as the primary interface to the CPU
|
||||
* for instructions that are executing.
|
||||
*/
|
||||
template <class Impl>
|
||||
class BaseO3DynInst : public BaseDynInst<Impl>
|
||||
{
|
||||
public:
|
||||
/** Typedef for the CPU. */
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
|
||||
/** Binary machine instruction type. */
|
||||
typedef TheISA::MachInst MachInst;
|
||||
/** Extended machine instruction type. */
|
||||
typedef TheISA::ExtMachInst ExtMachInst;
|
||||
/** Logical register index type. */
|
||||
typedef TheISA::RegIndex RegIndex;
|
||||
/** Integer register index type. */
|
||||
typedef TheISA::IntReg IntReg;
|
||||
typedef TheISA::FloatReg FloatReg;
|
||||
typedef TheISA::FloatRegBits FloatRegBits;
|
||||
/** Misc register index type. */
|
||||
typedef TheISA::MiscReg MiscReg;
|
||||
|
||||
enum {
|
||||
MaxInstSrcRegs = TheISA::MaxInstSrcRegs, //< Max source regs
|
||||
MaxInstDestRegs = TheISA::MaxInstDestRegs //< Max dest regs
|
||||
};
|
||||
|
||||
public:
|
||||
/** BaseDynInst constructor given a binary instruction. */
|
||||
BaseO3DynInst(StaticInstPtr staticInst, StaticInstPtr macroop,
|
||||
TheISA::PCState pc, TheISA::PCState predPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu);
|
||||
|
||||
/** BaseDynInst constructor given a static inst pointer. */
|
||||
BaseO3DynInst(StaticInstPtr _staticInst, StaticInstPtr _macroop);
|
||||
|
||||
/** Executes the instruction.*/
|
||||
Fault execute();
|
||||
|
||||
/** Initiates the access. Only valid for memory operations. */
|
||||
Fault initiateAcc();
|
||||
|
||||
/** Completes the access. Only valid for memory operations. */
|
||||
Fault completeAcc(PacketPtr pkt);
|
||||
|
||||
private:
|
||||
/** Initializes variables. */
|
||||
void initVars();
|
||||
|
||||
protected:
|
||||
/** Values to be written to the destination misc. registers. */
|
||||
MiscReg _destMiscRegVal[TheISA::MaxMiscDestRegs];
|
||||
|
||||
/** Indexes of the destination misc. registers. They are needed to defer
|
||||
* the write accesses to the misc. registers until the commit stage, when
|
||||
* the instruction is out of its speculative state.
|
||||
*/
|
||||
short _destMiscRegIdx[TheISA::MaxMiscDestRegs];
|
||||
|
||||
/** Number of destination misc. registers. */
|
||||
uint8_t _numDestMiscRegs;
|
||||
|
||||
|
||||
public:
|
||||
#if TRACING_ON
|
||||
/** Tick records used for the pipeline activity viewer. */
|
||||
Tick fetchTick;
|
||||
uint32_t decodeTick;
|
||||
uint32_t renameTick;
|
||||
uint32_t dispatchTick;
|
||||
uint32_t issueTick;
|
||||
uint32_t completeTick;
|
||||
#endif
|
||||
|
||||
/** Reads a misc. register, including any side-effects the read
|
||||
* might have as defined by the architecture.
|
||||
*/
|
||||
MiscReg readMiscReg(int misc_reg)
|
||||
{
|
||||
return this->cpu->readMiscReg(misc_reg, this->threadNumber);
|
||||
}
|
||||
|
||||
/** Sets a misc. register, including any side-effects the write
|
||||
* might have as defined by the architecture.
|
||||
*/
|
||||
void setMiscReg(int misc_reg, const MiscReg &val)
|
||||
{
|
||||
/** Writes to misc. registers are recorded and deferred until the
|
||||
* commit stage, when updateMiscRegs() is called.
|
||||
*/
|
||||
assert(_numDestMiscRegs < TheISA::MaxMiscDestRegs);
|
||||
_destMiscRegIdx[_numDestMiscRegs] = misc_reg;
|
||||
_destMiscRegVal[_numDestMiscRegs] = val;
|
||||
_numDestMiscRegs++;
|
||||
}
|
||||
|
||||
/** Reads a misc. register, including any side-effects the read
|
||||
* might have as defined by the architecture.
|
||||
*/
|
||||
TheISA::MiscReg readMiscRegOperand(const StaticInst *si, int idx)
|
||||
{
|
||||
return this->cpu->readMiscReg(
|
||||
si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag,
|
||||
this->threadNumber);
|
||||
}
|
||||
|
||||
/** Sets a misc. register, including any side-effects the write
|
||||
* might have as defined by the architecture.
|
||||
*/
|
||||
void setMiscRegOperand(const StaticInst *si, int idx,
|
||||
const MiscReg &val)
|
||||
{
|
||||
int misc_reg = si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag;
|
||||
setMiscReg(misc_reg, val);
|
||||
}
|
||||
|
||||
/** Called at the commit stage to update the misc. registers. */
|
||||
void updateMiscRegs()
|
||||
{
|
||||
// @todo: Pretty convoluted way to avoid squashing from happening when
|
||||
// using the TC during an instruction's execution (specifically for
|
||||
// instructions that have side-effects that use the TC). Fix this.
|
||||
// See cpu/o3/dyn_inst_impl.hh.
|
||||
bool in_syscall = this->thread->inSyscall;
|
||||
this->thread->inSyscall = true;
|
||||
|
||||
for (int i = 0; i < _numDestMiscRegs; i++)
|
||||
this->cpu->setMiscReg(
|
||||
_destMiscRegIdx[i], _destMiscRegVal[i], this->threadNumber);
|
||||
|
||||
this->thread->inSyscall = in_syscall;
|
||||
}
|
||||
|
||||
void forwardOldRegs()
|
||||
{
|
||||
|
||||
for (int idx = 0; idx < this->numDestRegs(); idx++) {
|
||||
PhysRegIndex prev_phys_reg = this->prevDestRegIdx(idx);
|
||||
TheISA::RegIndex original_dest_reg = this->staticInst->destRegIdx(idx);
|
||||
if (original_dest_reg < TheISA::FP_Base_DepTag)
|
||||
this->setIntRegOperand(this->staticInst.get(), idx, this->cpu->readIntReg(prev_phys_reg));
|
||||
else if (original_dest_reg < TheISA::Ctrl_Base_DepTag)
|
||||
this->setFloatRegOperandBits(this->staticInst.get(), idx, this->cpu->readFloatRegBits(prev_phys_reg));
|
||||
}
|
||||
}
|
||||
/** Calls hardware return from error interrupt. */
|
||||
Fault hwrei();
|
||||
/** Traps to handle specified fault. */
|
||||
void trap(Fault fault);
|
||||
bool simPalCheck(int palFunc);
|
||||
|
||||
/** Emulates a syscall. */
|
||||
void syscall(int64_t callnum);
|
||||
|
||||
public:
|
||||
|
||||
// The register accessor methods provide the index of the
|
||||
// instruction's operand (e.g., 0 or 1), not the architectural
|
||||
// register index, to simplify the implementation of register
|
||||
// renaming. We find the architectural register index by indexing
|
||||
// into the instruction's own operand index table. Note that a
|
||||
// raw pointer to the StaticInst is provided instead of a
|
||||
// ref-counted StaticInstPtr to redice overhead. This is fine as
|
||||
// long as these methods don't copy the pointer into any long-term
|
||||
// storage (which is pretty hard to imagine they would have reason
|
||||
// to do).
|
||||
|
||||
uint64_t readIntRegOperand(const StaticInst *si, int idx)
|
||||
{
|
||||
return this->cpu->readIntReg(this->_srcRegIdx[idx]);
|
||||
}
|
||||
|
||||
FloatReg readFloatRegOperand(const StaticInst *si, int idx)
|
||||
{
|
||||
return this->cpu->readFloatReg(this->_srcRegIdx[idx]);
|
||||
}
|
||||
|
||||
FloatRegBits readFloatRegOperandBits(const StaticInst *si, int idx)
|
||||
{
|
||||
return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]);
|
||||
}
|
||||
|
||||
/** @todo: Make results into arrays so they can handle multiple dest
|
||||
* registers.
|
||||
*/
|
||||
void setIntRegOperand(const StaticInst *si, int idx, uint64_t val)
|
||||
{
|
||||
this->cpu->setIntReg(this->_destRegIdx[idx], val);
|
||||
BaseDynInst<Impl>::setIntRegOperand(si, idx, val);
|
||||
}
|
||||
|
||||
void setFloatRegOperand(const StaticInst *si, int idx, FloatReg val)
|
||||
{
|
||||
this->cpu->setFloatReg(this->_destRegIdx[idx], val);
|
||||
BaseDynInst<Impl>::setFloatRegOperand(si, idx, val);
|
||||
}
|
||||
|
||||
void setFloatRegOperandBits(const StaticInst *si, int idx,
|
||||
FloatRegBits val)
|
||||
{
|
||||
this->cpu->setFloatRegBits(this->_destRegIdx[idx], val);
|
||||
BaseDynInst<Impl>::setFloatRegOperandBits(si, idx, val);
|
||||
}
|
||||
|
||||
#if THE_ISA == MIPS_ISA
|
||||
uint64_t readRegOtherThread(int misc_reg)
|
||||
{
|
||||
panic("MIPS MT not defined for O3 CPU.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
void setRegOtherThread(int misc_reg, const TheISA::MiscReg &val)
|
||||
{
|
||||
panic("MIPS MT not defined for O3 CPU.\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
/** Calculates EA part of a memory instruction. Currently unused,
|
||||
* though it may be useful in the future if we want to split
|
||||
* memory operations into EA calculation and memory access parts.
|
||||
*/
|
||||
Fault calcEA()
|
||||
{
|
||||
return this->staticInst->eaCompInst()->execute(this, this->traceData);
|
||||
}
|
||||
|
||||
/** Does the memory access part of a memory instruction. Currently unused,
|
||||
* though it may be useful in the future if we want to split
|
||||
* memory operations into EA calculation and memory access parts.
|
||||
*/
|
||||
Fault memAccess()
|
||||
{
|
||||
return this->staticInst->memAccInst()->execute(this, this->traceData);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_ALPHA_DYN_INST_HH__
|
||||
|
||||
216
simulators/gem5/src/cpu/o3/dyn_inst_impl.hh
Normal file
216
simulators/gem5/src/cpu/o3/dyn_inst_impl.hh
Normal file
@ -0,0 +1,216 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "base/cp_annotate.hh"
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "sim/full_system.hh"
|
||||
|
||||
template <class Impl>
|
||||
BaseO3DynInst<Impl>::BaseO3DynInst(StaticInstPtr staticInst,
|
||||
StaticInstPtr macroop,
|
||||
TheISA::PCState pc, TheISA::PCState predPC,
|
||||
InstSeqNum seq_num, O3CPU *cpu)
|
||||
: BaseDynInst<Impl>(staticInst, macroop, pc, predPC, seq_num, cpu)
|
||||
{
|
||||
initVars();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
BaseO3DynInst<Impl>::BaseO3DynInst(StaticInstPtr _staticInst,
|
||||
StaticInstPtr _macroop)
|
||||
: BaseDynInst<Impl>(_staticInst, _macroop)
|
||||
{
|
||||
initVars();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BaseO3DynInst<Impl>::initVars()
|
||||
{
|
||||
// Make sure to have the renamed register entries set to the same
|
||||
// as the normal register entries. It will allow the IQ to work
|
||||
// without any modifications.
|
||||
for (int i = 0; i < this->staticInst->numDestRegs(); i++) {
|
||||
this->_destRegIdx[i] = this->staticInst->destRegIdx(i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < this->staticInst->numSrcRegs(); i++) {
|
||||
this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i);
|
||||
}
|
||||
|
||||
this->_readySrcRegIdx.reset();
|
||||
|
||||
_numDestMiscRegs = 0;
|
||||
|
||||
#if TRACING_ON
|
||||
fetchTick = 0;
|
||||
decodeTick = 0;
|
||||
renameTick = 0;
|
||||
dispatchTick = 0;
|
||||
issueTick = 0;
|
||||
completeTick = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
BaseO3DynInst<Impl>::execute()
|
||||
{
|
||||
// @todo: Pretty convoluted way to avoid squashing from happening
|
||||
// when using the TC during an instruction's execution
|
||||
// (specifically for instructions that have side-effects that use
|
||||
// the TC). Fix this.
|
||||
bool in_syscall = this->thread->inSyscall;
|
||||
this->thread->inSyscall = true;
|
||||
|
||||
this->fault = this->staticInst->execute(this, this->traceData);
|
||||
|
||||
this->thread->inSyscall = in_syscall;
|
||||
|
||||
return this->fault;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
BaseO3DynInst<Impl>::initiateAcc()
|
||||
{
|
||||
// @todo: Pretty convoluted way to avoid squashing from happening
|
||||
// when using the TC during an instruction's execution
|
||||
// (specifically for instructions that have side-effects that use
|
||||
// the TC). Fix this.
|
||||
bool in_syscall = this->thread->inSyscall;
|
||||
this->thread->inSyscall = true;
|
||||
|
||||
this->fault = this->staticInst->initiateAcc(this, this->traceData);
|
||||
|
||||
this->thread->inSyscall = in_syscall;
|
||||
|
||||
return this->fault;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
BaseO3DynInst<Impl>::completeAcc(PacketPtr pkt)
|
||||
{
|
||||
// @todo: Pretty convoluted way to avoid squashing from happening
|
||||
// when using the TC during an instruction's execution
|
||||
// (specifically for instructions that have side-effects that use
|
||||
// the TC). Fix this.
|
||||
bool in_syscall = this->thread->inSyscall;
|
||||
this->thread->inSyscall = true;
|
||||
|
||||
if (this->cpu->checker) {
|
||||
if (this->isStoreConditional()) {
|
||||
this->reqToVerify->setExtraData(pkt->req->getExtraData());
|
||||
}
|
||||
}
|
||||
|
||||
this->fault = this->staticInst->completeAcc(pkt, this, this->traceData);
|
||||
|
||||
this->thread->inSyscall = in_syscall;
|
||||
|
||||
return this->fault;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
BaseO3DynInst<Impl>::hwrei()
|
||||
{
|
||||
#if THE_ISA == ALPHA_ISA
|
||||
// Can only do a hwrei when in pal mode.
|
||||
if (!(this->instAddr() & 0x3))
|
||||
return new AlphaISA::UnimplementedOpcodeFault;
|
||||
|
||||
// Set the next PC based on the value of the EXC_ADDR IPR.
|
||||
AlphaISA::PCState pc = this->pcState();
|
||||
pc.npc(this->cpu->readMiscRegNoEffect(AlphaISA::IPR_EXC_ADDR,
|
||||
this->threadNumber));
|
||||
this->pcState(pc);
|
||||
if (CPA::available()) {
|
||||
ThreadContext *tc = this->cpu->tcBase(this->threadNumber);
|
||||
CPA::cpa()->swAutoBegin(tc, this->nextInstAddr());
|
||||
}
|
||||
|
||||
// Tell CPU to clear any state it needs to if a hwrei is taken.
|
||||
this->cpu->hwrei(this->threadNumber);
|
||||
#else
|
||||
|
||||
#endif
|
||||
// FIXME: XXX check for interrupts? XXX
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BaseO3DynInst<Impl>::trap(Fault fault)
|
||||
{
|
||||
this->cpu->trap(fault, this->threadNumber, this->staticInst);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
BaseO3DynInst<Impl>::simPalCheck(int palFunc)
|
||||
{
|
||||
#if THE_ISA != ALPHA_ISA
|
||||
panic("simPalCheck called, but PAL only exists in Alpha!\n");
|
||||
#endif
|
||||
return this->cpu->simPalCheck(palFunc, this->threadNumber);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
BaseO3DynInst<Impl>::syscall(int64_t callnum)
|
||||
{
|
||||
if (FullSystem)
|
||||
panic("Syscall emulation isn't available in FS mode.\n");
|
||||
|
||||
// HACK: check CPU's nextPC before and after syscall. If it
|
||||
// changes, update this instruction's nextPC because the syscall
|
||||
// must have changed the nextPC.
|
||||
TheISA::PCState curPC = this->cpu->pcState(this->threadNumber);
|
||||
this->cpu->syscall(callnum, this->threadNumber);
|
||||
TheISA::PCState newPC = this->cpu->pcState(this->threadNumber);
|
||||
if (!(curPC == newPC)) {
|
||||
this->pcState(newPC);
|
||||
}
|
||||
}
|
||||
|
||||
34
simulators/gem5/src/cpu/o3/fetch.cc
Normal file
34
simulators/gem5/src/cpu/o3/fetch.cc
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "cpu/o3/fetch_impl.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
|
||||
template class DefaultFetch<O3CPUImpl>;
|
||||
558
simulators/gem5/src/cpu/o3/fetch.hh
Normal file
558
simulators/gem5/src/cpu/o3/fetch.hh
Normal file
@ -0,0 +1,558 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
* Korey Sewell
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_FETCH_HH__
|
||||
#define __CPU_O3_FETCH_HH__
|
||||
|
||||
#include "arch/decoder.hh"
|
||||
#include "arch/utility.hh"
|
||||
#include "base/statistics.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/pc_event.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
#include "cpu/translation.hh"
|
||||
#include "mem/packet.hh"
|
||||
#include "mem/port.hh"
|
||||
#include "sim/eventq.hh"
|
||||
|
||||
struct DerivO3CPUParams;
|
||||
|
||||
/**
|
||||
* DefaultFetch class handles both single threaded and SMT fetch. Its
|
||||
* width is specified by the parameters; each cycle it tries to fetch
|
||||
* that many instructions. It supports using a branch predictor to
|
||||
* predict direction and targets.
|
||||
* It supports the idling functionality of the CPU by indicating to
|
||||
* the CPU when it is active and inactive.
|
||||
*/
|
||||
template <class Impl>
|
||||
class DefaultFetch
|
||||
{
|
||||
public:
|
||||
/** Typedefs from Impl. */
|
||||
typedef typename Impl::CPUPol CPUPol;
|
||||
typedef typename Impl::DynInst DynInst;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
|
||||
/** Typedefs from the CPU policy. */
|
||||
typedef typename CPUPol::BPredUnit BPredUnit;
|
||||
typedef typename CPUPol::FetchStruct FetchStruct;
|
||||
typedef typename CPUPol::TimeStruct TimeStruct;
|
||||
|
||||
/** Typedefs from ISA. */
|
||||
typedef TheISA::MachInst MachInst;
|
||||
typedef TheISA::ExtMachInst ExtMachInst;
|
||||
|
||||
class FetchTranslation : public BaseTLB::Translation
|
||||
{
|
||||
protected:
|
||||
DefaultFetch<Impl> *fetch;
|
||||
|
||||
public:
|
||||
FetchTranslation(DefaultFetch<Impl> *_fetch)
|
||||
: fetch(_fetch)
|
||||
{}
|
||||
|
||||
void
|
||||
markDelayed()
|
||||
{}
|
||||
|
||||
void
|
||||
finish(Fault fault, RequestPtr req, ThreadContext *tc,
|
||||
BaseTLB::Mode mode)
|
||||
{
|
||||
assert(mode == BaseTLB::Execute);
|
||||
fetch->finishTranslation(fault, req);
|
||||
delete this;
|
||||
}
|
||||
};
|
||||
|
||||
private:
|
||||
/* Event to delay delivery of a fetch translation result in case of
|
||||
* a fault and the nop to carry the fault cannot be generated
|
||||
* immediately */
|
||||
class FinishTranslationEvent : public Event
|
||||
{
|
||||
private:
|
||||
DefaultFetch<Impl> *fetch;
|
||||
Fault fault;
|
||||
RequestPtr req;
|
||||
|
||||
public:
|
||||
FinishTranslationEvent(DefaultFetch<Impl> *_fetch)
|
||||
: fetch(_fetch)
|
||||
{}
|
||||
|
||||
void setFault(Fault _fault)
|
||||
{
|
||||
fault = _fault;
|
||||
}
|
||||
|
||||
void setReq(RequestPtr _req)
|
||||
{
|
||||
req = _req;
|
||||
}
|
||||
|
||||
/** Process the delayed finish translation */
|
||||
void process()
|
||||
{
|
||||
assert(fetch->numInst < fetch->fetchWidth);
|
||||
fetch->finishTranslation(fault, req);
|
||||
}
|
||||
|
||||
const char *description() const
|
||||
{
|
||||
return "FullO3CPU FetchFinishTranslation";
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
/** Overall fetch status. Used to determine if the CPU can
|
||||
* deschedule itsef due to a lack of activity.
|
||||
*/
|
||||
enum FetchStatus {
|
||||
Active,
|
||||
Inactive
|
||||
};
|
||||
|
||||
/** Individual thread status. */
|
||||
enum ThreadStatus {
|
||||
Running,
|
||||
Idle,
|
||||
Squashing,
|
||||
Blocked,
|
||||
Fetching,
|
||||
TrapPending,
|
||||
QuiescePending,
|
||||
SwitchOut,
|
||||
ItlbWait,
|
||||
IcacheWaitResponse,
|
||||
IcacheWaitRetry,
|
||||
IcacheAccessComplete,
|
||||
NoGoodAddr
|
||||
};
|
||||
|
||||
/** Fetching Policy, Add new policies here.*/
|
||||
enum FetchPriority {
|
||||
SingleThread,
|
||||
RoundRobin,
|
||||
Branch,
|
||||
IQ,
|
||||
LSQ
|
||||
};
|
||||
|
||||
private:
|
||||
/** Fetch status. */
|
||||
FetchStatus _status;
|
||||
|
||||
/** Per-thread status. */
|
||||
ThreadStatus fetchStatus[Impl::MaxThreads];
|
||||
|
||||
/** Fetch policy. */
|
||||
FetchPriority fetchPolicy;
|
||||
|
||||
/** List that has the threads organized by priority. */
|
||||
std::list<ThreadID> priorityList;
|
||||
|
||||
public:
|
||||
/** DefaultFetch constructor. */
|
||||
DefaultFetch(O3CPU *_cpu, DerivO3CPUParams *params);
|
||||
|
||||
/** Returns the name of fetch. */
|
||||
std::string name() const;
|
||||
|
||||
/** Registers statistics. */
|
||||
void regStats();
|
||||
|
||||
/** Sets the main backwards communication time buffer pointer. */
|
||||
void setTimeBuffer(TimeBuffer<TimeStruct> *time_buffer);
|
||||
|
||||
/** Sets pointer to list of active threads. */
|
||||
void setActiveThreads(std::list<ThreadID> *at_ptr);
|
||||
|
||||
/** Sets pointer to time buffer used to communicate to the next stage. */
|
||||
void setFetchQueue(TimeBuffer<FetchStruct> *fq_ptr);
|
||||
|
||||
/** Initialize stage. */
|
||||
void initStage();
|
||||
|
||||
/** Tells the fetch stage that the Icache is set. */
|
||||
void setIcache();
|
||||
|
||||
/** Handles retrying the fetch access. */
|
||||
void recvRetry();
|
||||
|
||||
/** Processes cache completion event. */
|
||||
void processCacheCompletion(PacketPtr pkt);
|
||||
|
||||
/** Begins the drain of the fetch stage. */
|
||||
bool drain();
|
||||
|
||||
/** Resumes execution after a drain. */
|
||||
void resume();
|
||||
|
||||
/** Tells fetch stage to prepare to be switched out. */
|
||||
void switchOut();
|
||||
|
||||
/** Takes over from another CPU's thread. */
|
||||
void takeOverFrom();
|
||||
|
||||
/** Checks if the fetch stage is switched out. */
|
||||
bool isSwitchedOut() { return switchedOut; }
|
||||
|
||||
/** Tells fetch to wake up from a quiesce instruction. */
|
||||
void wakeFromQuiesce();
|
||||
|
||||
private:
|
||||
/** Changes the status of this stage to active, and indicates this
|
||||
* to the CPU.
|
||||
*/
|
||||
inline void switchToActive();
|
||||
|
||||
/** Changes the status of this stage to inactive, and indicates
|
||||
* this to the CPU.
|
||||
*/
|
||||
inline void switchToInactive();
|
||||
|
||||
/**
|
||||
* Looks up in the branch predictor to see if the next PC should be
|
||||
* either next PC+=MachInst or a branch target.
|
||||
* @param next_PC Next PC variable passed in by reference. It is
|
||||
* expected to be set to the current PC; it will be updated with what
|
||||
* the next PC will be.
|
||||
* @param next_NPC Used for ISAs which use delay slots.
|
||||
* @return Whether or not a branch was predicted as taken.
|
||||
*/
|
||||
bool lookupAndUpdateNextPC(DynInstPtr &inst, TheISA::PCState &pc);
|
||||
|
||||
/**
|
||||
* Fetches the cache line that contains fetch_PC. Returns any
|
||||
* fault that happened. Puts the data into the class variable
|
||||
* cacheData.
|
||||
* @param vaddr The memory address that is being fetched from.
|
||||
* @param ret_fault The fault reference that will be set to the result of
|
||||
* the icache access.
|
||||
* @param tid Thread id.
|
||||
* @param pc The actual PC of the current instruction.
|
||||
* @return Any fault that occured.
|
||||
*/
|
||||
bool fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc);
|
||||
void finishTranslation(Fault fault, RequestPtr mem_req);
|
||||
|
||||
|
||||
/** Check if an interrupt is pending and that we need to handle
|
||||
*/
|
||||
bool
|
||||
checkInterrupt(Addr pc)
|
||||
{
|
||||
return (interruptPending && (THE_ISA != ALPHA_ISA || !(pc & 0x3)));
|
||||
}
|
||||
|
||||
/** Squashes a specific thread and resets the PC. */
|
||||
inline void doSquash(const TheISA::PCState &newPC,
|
||||
const DynInstPtr squashInst, ThreadID tid);
|
||||
|
||||
/** Squashes a specific thread and resets the PC. Also tells the CPU to
|
||||
* remove any instructions between fetch and decode that should be sqaushed.
|
||||
*/
|
||||
void squashFromDecode(const TheISA::PCState &newPC,
|
||||
const DynInstPtr squashInst,
|
||||
const InstSeqNum seq_num, ThreadID tid);
|
||||
|
||||
/** Checks if a thread is stalled. */
|
||||
bool checkStall(ThreadID tid) const;
|
||||
|
||||
/** Updates overall fetch stage status; to be called at the end of each
|
||||
* cycle. */
|
||||
FetchStatus updateFetchStatus();
|
||||
|
||||
public:
|
||||
/** Squashes a specific thread and resets the PC. Also tells the CPU to
|
||||
* remove any instructions that are not in the ROB. The source of this
|
||||
* squash should be the commit stage.
|
||||
*/
|
||||
void squash(const TheISA::PCState &newPC, const InstSeqNum seq_num,
|
||||
DynInstPtr squashInst, ThreadID tid);
|
||||
|
||||
/** Ticks the fetch stage, processing all inputs signals and fetching
|
||||
* as many instructions as possible.
|
||||
*/
|
||||
void tick();
|
||||
|
||||
/** Checks all input signals and updates the status as necessary.
|
||||
* @return: Returns if the status has changed due to input signals.
|
||||
*/
|
||||
bool checkSignalsAndUpdate(ThreadID tid);
|
||||
|
||||
/** Does the actual fetching of instructions and passing them on to the
|
||||
* next stage.
|
||||
* @param status_change fetch() sets this variable if there was a status
|
||||
* change (ie switching to IcacheMissStall).
|
||||
*/
|
||||
void fetch(bool &status_change);
|
||||
|
||||
/** Align a PC to the start of an I-cache block. */
|
||||
Addr icacheBlockAlignPC(Addr addr)
|
||||
{
|
||||
return (addr & ~(cacheBlkMask));
|
||||
}
|
||||
|
||||
/** The decoder. */
|
||||
TheISA::Decoder *decoder[Impl::MaxThreads];
|
||||
|
||||
private:
|
||||
DynInstPtr buildInst(ThreadID tid, StaticInstPtr staticInst,
|
||||
StaticInstPtr curMacroop, TheISA::PCState thisPC,
|
||||
TheISA::PCState nextPC, bool trace);
|
||||
|
||||
/** Returns the appropriate thread to fetch, given the fetch policy. */
|
||||
ThreadID getFetchingThread(FetchPriority &fetch_priority);
|
||||
|
||||
/** Returns the appropriate thread to fetch using a round robin policy. */
|
||||
ThreadID roundRobin();
|
||||
|
||||
/** Returns the appropriate thread to fetch using the IQ count policy. */
|
||||
ThreadID iqCount();
|
||||
|
||||
/** Returns the appropriate thread to fetch using the LSQ count policy. */
|
||||
ThreadID lsqCount();
|
||||
|
||||
/** Returns the appropriate thread to fetch using the branch count
|
||||
* policy. */
|
||||
ThreadID branchCount();
|
||||
|
||||
/** Pipeline the next I-cache access to the current one. */
|
||||
void pipelineIcacheAccesses(ThreadID tid);
|
||||
|
||||
/** Profile the reasons of fetch stall. */
|
||||
void profileStall(ThreadID tid);
|
||||
|
||||
private:
|
||||
/** Pointer to the O3CPU. */
|
||||
O3CPU *cpu;
|
||||
|
||||
/** Time buffer interface. */
|
||||
TimeBuffer<TimeStruct> *timeBuffer;
|
||||
|
||||
/** Wire to get decode's information from backwards time buffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromDecode;
|
||||
|
||||
/** Wire to get rename's information from backwards time buffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromRename;
|
||||
|
||||
/** Wire to get iew's information from backwards time buffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromIEW;
|
||||
|
||||
/** Wire to get commit's information from backwards time buffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromCommit;
|
||||
|
||||
/** Internal fetch instruction queue. */
|
||||
TimeBuffer<FetchStruct> *fetchQueue;
|
||||
|
||||
//Might be annoying how this name is different than the queue.
|
||||
/** Wire used to write any information heading to decode. */
|
||||
typename TimeBuffer<FetchStruct>::wire toDecode;
|
||||
|
||||
/** BPredUnit. */
|
||||
BPredUnit branchPred;
|
||||
|
||||
TheISA::PCState pc[Impl::MaxThreads];
|
||||
|
||||
Addr fetchOffset[Impl::MaxThreads];
|
||||
|
||||
StaticInstPtr macroop[Impl::MaxThreads];
|
||||
|
||||
/** Can the fetch stage redirect from an interrupt on this instruction? */
|
||||
bool delayedCommit[Impl::MaxThreads];
|
||||
|
||||
/** Memory request used to access cache. */
|
||||
RequestPtr memReq[Impl::MaxThreads];
|
||||
|
||||
/** Variable that tracks if fetch has written to the time buffer this
|
||||
* cycle. Used to tell CPU if there is activity this cycle.
|
||||
*/
|
||||
bool wroteToTimeBuffer;
|
||||
|
||||
/** Tracks how many instructions has been fetched this cycle. */
|
||||
int numInst;
|
||||
|
||||
/** Source of possible stalls. */
|
||||
struct Stalls {
|
||||
bool decode;
|
||||
bool rename;
|
||||
bool iew;
|
||||
bool commit;
|
||||
};
|
||||
|
||||
/** Tracks which stages are telling fetch to stall. */
|
||||
Stalls stalls[Impl::MaxThreads];
|
||||
|
||||
/** Decode to fetch delay, in ticks. */
|
||||
unsigned decodeToFetchDelay;
|
||||
|
||||
/** Rename to fetch delay, in ticks. */
|
||||
unsigned renameToFetchDelay;
|
||||
|
||||
/** IEW to fetch delay, in ticks. */
|
||||
unsigned iewToFetchDelay;
|
||||
|
||||
/** Commit to fetch delay, in ticks. */
|
||||
unsigned commitToFetchDelay;
|
||||
|
||||
/** The width of fetch in instructions. */
|
||||
unsigned fetchWidth;
|
||||
|
||||
/** Is the cache blocked? If so no threads can access it. */
|
||||
bool cacheBlocked;
|
||||
|
||||
/** The packet that is waiting to be retried. */
|
||||
PacketPtr retryPkt;
|
||||
|
||||
/** The thread that is waiting on the cache to tell fetch to retry. */
|
||||
ThreadID retryTid;
|
||||
|
||||
/** Cache block size. */
|
||||
int cacheBlkSize;
|
||||
|
||||
/** Mask to get a cache block's address. */
|
||||
Addr cacheBlkMask;
|
||||
|
||||
/** The cache line being fetched. */
|
||||
uint8_t *cacheData[Impl::MaxThreads];
|
||||
|
||||
/** The PC of the cacheline that has been loaded. */
|
||||
Addr cacheDataPC[Impl::MaxThreads];
|
||||
|
||||
/** Whether or not the cache data is valid. */
|
||||
bool cacheDataValid[Impl::MaxThreads];
|
||||
|
||||
/** Size of instructions. */
|
||||
int instSize;
|
||||
|
||||
/** Icache stall statistics. */
|
||||
Counter lastIcacheStall[Impl::MaxThreads];
|
||||
|
||||
/** List of Active Threads */
|
||||
std::list<ThreadID> *activeThreads;
|
||||
|
||||
/** Number of threads. */
|
||||
ThreadID numThreads;
|
||||
|
||||
/** Number of threads that are actively fetching. */
|
||||
ThreadID numFetchingThreads;
|
||||
|
||||
/** Thread ID being fetched. */
|
||||
ThreadID threadFetched;
|
||||
|
||||
/** Checks if there is an interrupt pending. If there is, fetch
|
||||
* must stop once it is not fetching PAL instructions.
|
||||
*/
|
||||
bool interruptPending;
|
||||
|
||||
/** Is there a drain pending. */
|
||||
bool drainPending;
|
||||
|
||||
/** Records if fetch is switched out. */
|
||||
bool switchedOut;
|
||||
|
||||
/** Set to true if a pipelined I-cache request should be issued. */
|
||||
bool issuePipelinedIfetch[Impl::MaxThreads];
|
||||
|
||||
/** Event used to delay fault generation of translation faults */
|
||||
FinishTranslationEvent finishTranslationEvent;
|
||||
|
||||
// @todo: Consider making these vectors and tracking on a per thread basis.
|
||||
/** Stat for total number of cycles stalled due to an icache miss. */
|
||||
Stats::Scalar icacheStallCycles;
|
||||
/** Stat for total number of fetched instructions. */
|
||||
Stats::Scalar fetchedInsts;
|
||||
/** Total number of fetched branches. */
|
||||
Stats::Scalar fetchedBranches;
|
||||
/** Stat for total number of predicted branches. */
|
||||
Stats::Scalar predictedBranches;
|
||||
/** Stat for total number of cycles spent fetching. */
|
||||
Stats::Scalar fetchCycles;
|
||||
/** Stat for total number of cycles spent squashing. */
|
||||
Stats::Scalar fetchSquashCycles;
|
||||
/** Stat for total number of cycles spent waiting for translation */
|
||||
Stats::Scalar fetchTlbCycles;
|
||||
/** Stat for total number of cycles spent blocked due to other stages in
|
||||
* the pipeline.
|
||||
*/
|
||||
Stats::Scalar fetchIdleCycles;
|
||||
/** Total number of cycles spent blocked. */
|
||||
Stats::Scalar fetchBlockedCycles;
|
||||
/** Total number of cycles spent in any other state. */
|
||||
Stats::Scalar fetchMiscStallCycles;
|
||||
/** Total number of cycles spent in waiting for drains. */
|
||||
Stats::Scalar fetchPendingDrainCycles;
|
||||
/** Total number of stall cycles caused by no active threads to run. */
|
||||
Stats::Scalar fetchNoActiveThreadStallCycles;
|
||||
/** Total number of stall cycles caused by pending traps. */
|
||||
Stats::Scalar fetchPendingTrapStallCycles;
|
||||
/** Total number of stall cycles caused by pending quiesce instructions. */
|
||||
Stats::Scalar fetchPendingQuiesceStallCycles;
|
||||
/** Total number of stall cycles caused by I-cache wait retrys. */
|
||||
Stats::Scalar fetchIcacheWaitRetryStallCycles;
|
||||
/** Stat for total number of fetched cache lines. */
|
||||
Stats::Scalar fetchedCacheLines;
|
||||
/** Total number of outstanding icache accesses that were dropped
|
||||
* due to a squash.
|
||||
*/
|
||||
Stats::Scalar fetchIcacheSquashes;
|
||||
/** Total number of outstanding tlb accesses that were dropped
|
||||
* due to a squash.
|
||||
*/
|
||||
Stats::Scalar fetchTlbSquashes;
|
||||
/** Distribution of number of instructions fetched each cycle. */
|
||||
Stats::Distribution fetchNisnDist;
|
||||
/** Rate of how often fetch was idle. */
|
||||
Stats::Formula idleRate;
|
||||
/** Number of branch fetches per cycle. */
|
||||
Stats::Formula branchRate;
|
||||
/** Number of instruction fetched per cycle. */
|
||||
Stats::Formula fetchRate;
|
||||
};
|
||||
|
||||
#endif //__CPU_O3_FETCH_HH__
|
||||
1607
simulators/gem5/src/cpu/o3/fetch_impl.hh
Normal file
1607
simulators/gem5/src/cpu/o3/fetch_impl.hh
Normal file
File diff suppressed because it is too large
Load Diff
72
simulators/gem5/src/cpu/o3/free_list.cc
Normal file
72
simulators/gem5/src/cpu/o3/free_list.cc
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "base/trace.hh"
|
||||
#include "cpu/o3/free_list.hh"
|
||||
#include "debug/FreeList.hh"
|
||||
|
||||
SimpleFreeList::SimpleFreeList(ThreadID activeThreads,
|
||||
unsigned _numLogicalIntRegs,
|
||||
unsigned _numPhysicalIntRegs,
|
||||
unsigned _numLogicalFloatRegs,
|
||||
unsigned _numPhysicalFloatRegs)
|
||||
: numLogicalIntRegs(_numLogicalIntRegs),
|
||||
numPhysicalIntRegs(_numPhysicalIntRegs),
|
||||
numLogicalFloatRegs(_numLogicalFloatRegs),
|
||||
numPhysicalFloatRegs(_numPhysicalFloatRegs),
|
||||
numPhysicalRegs(numPhysicalIntRegs + numPhysicalFloatRegs)
|
||||
{
|
||||
DPRINTF(FreeList, "Creating new free list object.\n");
|
||||
|
||||
// Put all of the extra physical registers onto the free list. This
|
||||
// means excluding all of the base logical registers.
|
||||
for (PhysRegIndex i = numLogicalIntRegs * activeThreads;
|
||||
i < numPhysicalIntRegs; ++i)
|
||||
{
|
||||
freeIntRegs.push(i);
|
||||
}
|
||||
|
||||
// Put all of the extra physical registers onto the free list. This
|
||||
// means excluding all of the base logical registers. Because the
|
||||
// float registers' indices start where the physical registers end,
|
||||
// some math must be done to determine where the free registers start.
|
||||
PhysRegIndex i = numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads);
|
||||
|
||||
for ( ; i < numPhysicalRegs; ++i)
|
||||
{
|
||||
freeFloatRegs.push(i);
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
SimpleFreeList::name() const
|
||||
{
|
||||
return "cpu.freelist";
|
||||
}
|
||||
207
simulators/gem5/src/cpu/o3/free_list.hh
Normal file
207
simulators/gem5/src/cpu/o3/free_list.hh
Normal file
@ -0,0 +1,207 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_FREE_LIST_HH__
|
||||
#define __CPU_O3_FREE_LIST_HH__
|
||||
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
|
||||
#include "arch/registers.hh"
|
||||
#include "base/misc.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/comm.hh"
|
||||
#include "debug/FreeList.hh"
|
||||
|
||||
/**
|
||||
* FreeList class that simply holds the list of free integer and floating
|
||||
* point registers. Can request for a free register of either type, and
|
||||
* also send back free registers of either type. This is a very simple
|
||||
* class, but it should be sufficient for most implementations. Like all
|
||||
* other classes, it assumes that the indices for the floating point
|
||||
* registers starts after the integer registers end. Hence the variable
|
||||
* numPhysicalIntRegs is logically equivalent to the baseFP dependency.
|
||||
* Note that while this most likely should be called FreeList, the name
|
||||
* "FreeList" is used in a typedef within the CPU Policy, and therefore no
|
||||
* class can be named simply "FreeList".
|
||||
* @todo: Give a better name to the base FP dependency.
|
||||
*/
|
||||
class SimpleFreeList
|
||||
{
|
||||
private:
|
||||
/** The list of free integer registers. */
|
||||
std::queue<PhysRegIndex> freeIntRegs;
|
||||
|
||||
/** The list of free floating point registers. */
|
||||
std::queue<PhysRegIndex> freeFloatRegs;
|
||||
|
||||
/** Number of logical integer registers. */
|
||||
int numLogicalIntRegs;
|
||||
|
||||
/** Number of physical integer registers. */
|
||||
int numPhysicalIntRegs;
|
||||
|
||||
/** Number of logical floating point registers. */
|
||||
int numLogicalFloatRegs;
|
||||
|
||||
/** Number of physical floating point registers. */
|
||||
int numPhysicalFloatRegs;
|
||||
|
||||
/** Total number of physical registers. */
|
||||
int numPhysicalRegs;
|
||||
|
||||
public:
|
||||
/** Constructs a free list.
|
||||
* @param activeThreads Number of active threads.
|
||||
* @param _numLogicalIntRegs Number of logical integer registers.
|
||||
* @param _numPhysicalIntRegs Number of physical integer registers.
|
||||
* @param _numLogicalFloatRegs Number of logical fp registers.
|
||||
* @param _numPhysicalFloatRegs Number of physical fp registers.
|
||||
*/
|
||||
SimpleFreeList(ThreadID activeThreads,
|
||||
unsigned _numLogicalIntRegs,
|
||||
unsigned _numPhysicalIntRegs,
|
||||
unsigned _numLogicalFloatRegs,
|
||||
unsigned _numPhysicalFloatRegs);
|
||||
|
||||
/** Gives the name of the freelist. */
|
||||
std::string name() const;
|
||||
|
||||
/** Gets a free integer register. */
|
||||
inline PhysRegIndex getIntReg();
|
||||
|
||||
/** Gets a free fp register. */
|
||||
inline PhysRegIndex getFloatReg();
|
||||
|
||||
/** Adds a register back to the free list. */
|
||||
inline void addReg(PhysRegIndex freed_reg);
|
||||
|
||||
/** Adds an integer register back to the free list. */
|
||||
inline void addIntReg(PhysRegIndex freed_reg);
|
||||
|
||||
/** Adds a fp register back to the free list. */
|
||||
inline void addFloatReg(PhysRegIndex freed_reg);
|
||||
|
||||
/** Checks if there are any free integer registers. */
|
||||
bool hasFreeIntRegs()
|
||||
{ return !freeIntRegs.empty(); }
|
||||
|
||||
/** Checks if there are any free fp registers. */
|
||||
bool hasFreeFloatRegs()
|
||||
{ return !freeFloatRegs.empty(); }
|
||||
|
||||
/** Returns the number of free integer registers. */
|
||||
int numFreeIntRegs()
|
||||
{ return freeIntRegs.size(); }
|
||||
|
||||
/** Returns the number of free fp registers. */
|
||||
int numFreeFloatRegs()
|
||||
{ return freeFloatRegs.size(); }
|
||||
};
|
||||
|
||||
inline PhysRegIndex
|
||||
SimpleFreeList::getIntReg()
|
||||
{
|
||||
DPRINTF(FreeList, "Trying to get free integer register.\n");
|
||||
|
||||
if (freeIntRegs.empty()) {
|
||||
panic("No free integer registers!");
|
||||
}
|
||||
|
||||
PhysRegIndex free_reg = freeIntRegs.front();
|
||||
|
||||
freeIntRegs.pop();
|
||||
|
||||
return(free_reg);
|
||||
}
|
||||
|
||||
inline PhysRegIndex
|
||||
SimpleFreeList::getFloatReg()
|
||||
{
|
||||
DPRINTF(FreeList, "Trying to get free float register.\n");
|
||||
|
||||
if (freeFloatRegs.empty()) {
|
||||
panic("No free integer registers!");
|
||||
}
|
||||
|
||||
PhysRegIndex free_reg = freeFloatRegs.front();
|
||||
|
||||
freeFloatRegs.pop();
|
||||
|
||||
return(free_reg);
|
||||
}
|
||||
|
||||
inline void
|
||||
SimpleFreeList::addReg(PhysRegIndex freed_reg)
|
||||
{
|
||||
DPRINTF(FreeList,"Freeing register %i.\n", freed_reg);
|
||||
//Might want to add in a check for whether or not this register is
|
||||
//already in there. A bit vector or something similar would be useful.
|
||||
if (freed_reg < numPhysicalIntRegs) {
|
||||
if (freed_reg != TheISA::ZeroReg)
|
||||
freeIntRegs.push(freed_reg);
|
||||
} else if (freed_reg < numPhysicalRegs) {
|
||||
#if THE_ISA == ALPHA_ISA
|
||||
if (freed_reg != (TheISA::ZeroReg + numPhysicalIntRegs))
|
||||
#endif
|
||||
freeFloatRegs.push(freed_reg);
|
||||
}
|
||||
|
||||
// These assert conditions ensure that the number of free
|
||||
// registers are not more than the # of total Physical Registers.
|
||||
// If this were false, it would mean that registers
|
||||
// have been freed twice, overflowing the free register
|
||||
// pool and potentially crashing SMT workloads.
|
||||
// ----
|
||||
// Comment out for now so as to not potentially break
|
||||
// CMP and single-threaded workloads
|
||||
// ----
|
||||
// assert(freeIntRegs.size() <= numPhysicalIntRegs);
|
||||
// assert(freeFloatRegs.size() <= numPhysicalFloatRegs);
|
||||
}
|
||||
|
||||
inline void
|
||||
SimpleFreeList::addIntReg(PhysRegIndex freed_reg)
|
||||
{
|
||||
DPRINTF(FreeList,"Freeing int register %i.\n", freed_reg);
|
||||
|
||||
freeIntRegs.push(freed_reg);
|
||||
}
|
||||
|
||||
inline void
|
||||
SimpleFreeList::addFloatReg(PhysRegIndex freed_reg)
|
||||
{
|
||||
DPRINTF(FreeList,"Freeing float register %i.\n", freed_reg);
|
||||
|
||||
freeFloatRegs.push(freed_reg);
|
||||
}
|
||||
|
||||
#endif // __CPU_O3_FREE_LIST_HH__
|
||||
282
simulators/gem5/src/cpu/o3/fu_pool.cc
Normal file
282
simulators/gem5/src/cpu/o3/fu_pool.cc
Normal file
@ -0,0 +1,282 @@
|
||||
/*
|
||||
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include "cpu/o3/fu_pool.hh"
|
||||
#include "cpu/func_unit.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// A pool of function units
|
||||
//
|
||||
|
||||
inline void
|
||||
FUPool::FUIdxQueue::addFU(int fu_idx)
|
||||
{
|
||||
funcUnitsIdx.push_back(fu_idx);
|
||||
++size;
|
||||
}
|
||||
|
||||
inline int
|
||||
FUPool::FUIdxQueue::getFU()
|
||||
{
|
||||
int retval = funcUnitsIdx[idx++];
|
||||
|
||||
if (idx == size)
|
||||
idx = 0;
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
FUPool::~FUPool()
|
||||
{
|
||||
fuListIterator i = funcUnits.begin();
|
||||
fuListIterator end = funcUnits.end();
|
||||
for (; i != end; ++i)
|
||||
delete *i;
|
||||
}
|
||||
|
||||
|
||||
// Constructor
|
||||
FUPool::FUPool(const Params *p)
|
||||
: SimObject(p)
|
||||
{
|
||||
numFU = 0;
|
||||
|
||||
funcUnits.clear();
|
||||
|
||||
for (int i = 0; i < Num_OpClasses; ++i) {
|
||||
maxOpLatencies[i] = 0;
|
||||
maxIssueLatencies[i] = 0;
|
||||
}
|
||||
|
||||
//
|
||||
// Iterate through the list of FUDescData structures
|
||||
//
|
||||
const vector<FUDesc *> ¶mList = p->FUList;
|
||||
for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) {
|
||||
|
||||
//
|
||||
// Don't bother with this if we're not going to create any FU's
|
||||
//
|
||||
if ((*i)->number) {
|
||||
//
|
||||
// Create the FuncUnit object from this structure
|
||||
// - add the capabilities listed in the FU's operation
|
||||
// description
|
||||
//
|
||||
// We create the first unit, then duplicate it as needed
|
||||
//
|
||||
FuncUnit *fu = new FuncUnit;
|
||||
|
||||
OPDDiterator j = (*i)->opDescList.begin();
|
||||
OPDDiterator end = (*i)->opDescList.end();
|
||||
for (; j != end; ++j) {
|
||||
// indicate that this pool has this capability
|
||||
capabilityList.set((*j)->opClass);
|
||||
|
||||
// Add each of the FU's that will have this capability to the
|
||||
// appropriate queue.
|
||||
for (int k = 0; k < (*i)->number; ++k)
|
||||
fuPerCapList[(*j)->opClass].addFU(numFU + k);
|
||||
|
||||
// indicate that this FU has the capability
|
||||
fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat);
|
||||
|
||||
if ((*j)->opLat > maxOpLatencies[(*j)->opClass])
|
||||
maxOpLatencies[(*j)->opClass] = (*j)->opLat;
|
||||
|
||||
if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass])
|
||||
maxIssueLatencies[(*j)->opClass] = (*j)->issueLat;
|
||||
}
|
||||
|
||||
numFU++;
|
||||
|
||||
// Add the appropriate number of copies of this FU to the list
|
||||
ostringstream s;
|
||||
|
||||
s << (*i)->name() << "(0)";
|
||||
fu->name = s.str();
|
||||
funcUnits.push_back(fu);
|
||||
|
||||
for (int c = 1; c < (*i)->number; ++c) {
|
||||
ostringstream s;
|
||||
numFU++;
|
||||
FuncUnit *fu2 = new FuncUnit(*fu);
|
||||
|
||||
s << (*i)->name() << "(" << c << ")";
|
||||
fu2->name = s.str();
|
||||
funcUnits.push_back(fu2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unitBusy.resize(numFU);
|
||||
|
||||
for (int i = 0; i < numFU; i++) {
|
||||
unitBusy[i] = false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FUPool::annotateMemoryUnits(unsigned hit_latency)
|
||||
{
|
||||
maxOpLatencies[MemReadOp] = hit_latency;
|
||||
|
||||
fuListIterator i = funcUnits.begin();
|
||||
fuListIterator iend = funcUnits.end();
|
||||
for (; i != iend; ++i) {
|
||||
if ((*i)->provides(MemReadOp))
|
||||
(*i)->opLatency(MemReadOp) = hit_latency;
|
||||
|
||||
if ((*i)->provides(MemWriteOp))
|
||||
(*i)->opLatency(MemWriteOp) = hit_latency;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
FUPool::getUnit(OpClass capability)
|
||||
{
|
||||
// If this pool doesn't have the specified capability,
|
||||
// return this information to the caller
|
||||
if (!capabilityList[capability])
|
||||
return -2;
|
||||
|
||||
int fu_idx = fuPerCapList[capability].getFU();
|
||||
int start_idx = fu_idx;
|
||||
|
||||
// Iterate through the circular queue if needed, stopping if we've reached
|
||||
// the first element again.
|
||||
while (unitBusy[fu_idx]) {
|
||||
fu_idx = fuPerCapList[capability].getFU();
|
||||
if (fu_idx == start_idx) {
|
||||
// No FU available
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
assert(fu_idx < numFU);
|
||||
|
||||
unitBusy[fu_idx] = true;
|
||||
|
||||
return fu_idx;
|
||||
}
|
||||
|
||||
void
|
||||
FUPool::freeUnitNextCycle(int fu_idx)
|
||||
{
|
||||
assert(unitBusy[fu_idx]);
|
||||
unitsToBeFreed.push_back(fu_idx);
|
||||
}
|
||||
|
||||
void
|
||||
FUPool::processFreeUnits()
|
||||
{
|
||||
while (!unitsToBeFreed.empty()) {
|
||||
int fu_idx = unitsToBeFreed.back();
|
||||
unitsToBeFreed.pop_back();
|
||||
|
||||
assert(unitBusy[fu_idx]);
|
||||
|
||||
unitBusy[fu_idx] = false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FUPool::dump()
|
||||
{
|
||||
cout << "Function Unit Pool (" << name() << ")\n";
|
||||
cout << "======================================\n";
|
||||
cout << "Free List:\n";
|
||||
|
||||
for (int i = 0; i < numFU; ++i) {
|
||||
if (unitBusy[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cout << " [" << i << "] : ";
|
||||
|
||||
cout << funcUnits[i]->name << " ";
|
||||
|
||||
cout << "\n";
|
||||
}
|
||||
|
||||
cout << "======================================\n";
|
||||
cout << "Busy List:\n";
|
||||
for (int i = 0; i < numFU; ++i) {
|
||||
if (!unitBusy[i]) {
|
||||
continue;
|
||||
}
|
||||
|
||||
cout << " [" << i << "] : ";
|
||||
|
||||
cout << funcUnits[i]->name << " ";
|
||||
|
||||
cout << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
FUPool::switchOut()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
FUPool::takeOver()
|
||||
{
|
||||
for (int i = 0; i < numFU; i++) {
|
||||
unitBusy[i] = false;
|
||||
}
|
||||
unitsToBeFreed.clear();
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// The SimObjects we use to get the FU information into the simulator
|
||||
//
|
||||
////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
//
|
||||
// FUPool - Contails a list of FUDesc objects to make available
|
||||
//
|
||||
|
||||
//
|
||||
// The FuPool object
|
||||
//
|
||||
FUPool *
|
||||
FUPoolParams::create()
|
||||
{
|
||||
return new FUPool(this);
|
||||
}
|
||||
167
simulators/gem5/src/cpu/o3/fu_pool.hh
Normal file
167
simulators/gem5/src/cpu/o3/fu_pool.hh
Normal file
@ -0,0 +1,167 @@
|
||||
/*
|
||||
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_FU_POOL_HH__
|
||||
#define __CPU_O3_FU_POOL_HH__
|
||||
|
||||
#include <bitset>
|
||||
#include <list>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "cpu/op_class.hh"
|
||||
#include "params/FUPool.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
class FUDesc;
|
||||
class FuncUnit;
|
||||
|
||||
/**
|
||||
* Pool of FU's, specific to the new CPU model. The old FU pool had lists of
|
||||
* free units and busy units, and whenever a FU was needed it would iterate
|
||||
* through the free units to find a FU that provided the capability. This pool
|
||||
* has lists of units specific to each of the capabilities, and whenever a FU
|
||||
* is needed, it iterates through that list to find a free unit. The previous
|
||||
* FU pool would have to be ticked each cycle to update which units became
|
||||
* free. This FU pool lets the IEW stage handle freeing units, which frees
|
||||
* them as their scheduled execution events complete. This limits units in this
|
||||
* model to either have identical issue and op latencies, or 1 cycle issue
|
||||
* latencies.
|
||||
*/
|
||||
class FUPool : public SimObject
|
||||
{
|
||||
private:
|
||||
/** Maximum op execution latencies, per op class. */
|
||||
unsigned maxOpLatencies[Num_OpClasses];
|
||||
/** Maximum issue latencies, per op class. */
|
||||
unsigned maxIssueLatencies[Num_OpClasses];
|
||||
|
||||
/** Bitvector listing capabilities of this FU pool. */
|
||||
std::bitset<Num_OpClasses> capabilityList;
|
||||
|
||||
/** Bitvector listing which FUs are busy. */
|
||||
std::vector<bool> unitBusy;
|
||||
|
||||
/** List of units to be freed at the end of this cycle. */
|
||||
std::vector<int> unitsToBeFreed;
|
||||
|
||||
/**
|
||||
* Class that implements a circular queue to hold FU indices. The hope is
|
||||
* that FUs that have been just used will be moved to the end of the queue
|
||||
* by iterating through it, thus leaving free units at the head of the
|
||||
* queue.
|
||||
*/
|
||||
class FUIdxQueue {
|
||||
public:
|
||||
/** Constructs a circular queue of FU indices. */
|
||||
FUIdxQueue()
|
||||
: idx(0), size(0)
|
||||
{ }
|
||||
|
||||
/** Adds a FU to the queue. */
|
||||
inline void addFU(int fu_idx);
|
||||
|
||||
/** Returns the index of the FU at the head of the queue, and changes
|
||||
* the index to the next element.
|
||||
*/
|
||||
inline int getFU();
|
||||
|
||||
private:
|
||||
/** Circular queue index. */
|
||||
int idx;
|
||||
|
||||
/** Size of the queue. */
|
||||
int size;
|
||||
|
||||
/** Queue of FU indices. */
|
||||
std::vector<int> funcUnitsIdx;
|
||||
};
|
||||
|
||||
/** Per op class queues of FUs that provide that capability. */
|
||||
FUIdxQueue fuPerCapList[Num_OpClasses];
|
||||
|
||||
/** Number of FUs. */
|
||||
int numFU;
|
||||
|
||||
/** Functional units. */
|
||||
std::vector<FuncUnit *> funcUnits;
|
||||
|
||||
typedef std::vector<FuncUnit *>::iterator fuListIterator;
|
||||
|
||||
public:
|
||||
typedef FUPoolParams Params;
|
||||
/** Constructs a FU pool. */
|
||||
FUPool(const Params *p);
|
||||
~FUPool();
|
||||
|
||||
/** Annotates units that provide memory operations. Included only because
|
||||
* old FU pool provided this function.
|
||||
*/
|
||||
void annotateMemoryUnits(unsigned hit_latency);
|
||||
|
||||
/**
|
||||
* Gets a FU providing the requested capability. Will mark the unit as busy,
|
||||
* but leaves the freeing of the unit up to the IEW stage.
|
||||
* @param capability The capability requested.
|
||||
* @return Returns -2 if the FU pool does not have the capability, -1 if
|
||||
* there is no free FU, and the FU's index otherwise.
|
||||
*/
|
||||
int getUnit(OpClass capability);
|
||||
|
||||
/** Frees a FU at the end of this cycle. */
|
||||
void freeUnitNextCycle(int fu_idx);
|
||||
|
||||
/** Frees all FUs on the list. */
|
||||
void processFreeUnits();
|
||||
|
||||
/** Returns the total number of FUs. */
|
||||
int size() { return numFU; }
|
||||
|
||||
/** Debugging function used to dump FU information. */
|
||||
void dump();
|
||||
|
||||
/** Returns the operation execution latency of the given capability. */
|
||||
unsigned getOpLatency(OpClass capability) {
|
||||
return maxOpLatencies[capability];
|
||||
}
|
||||
|
||||
/** Returns the issue latency of the given capability. */
|
||||
unsigned getIssueLatency(OpClass capability) {
|
||||
return maxIssueLatencies[capability];
|
||||
}
|
||||
|
||||
/** Switches out functional unit pool. */
|
||||
void switchOut();
|
||||
|
||||
/** Takes over from another CPU's thread. */
|
||||
void takeOver();
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_FU_POOL_HH__
|
||||
35
simulators/gem5/src/cpu/o3/iew.cc
Normal file
35
simulators/gem5/src/cpu/o3/iew.cc
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "cpu/o3/iew_impl.hh"
|
||||
#include "cpu/o3/inst_queue.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
|
||||
template class DefaultIEW<O3CPUImpl>;
|
||||
548
simulators/gem5/src/cpu/o3/iew.hh
Normal file
548
simulators/gem5/src/cpu/o3/iew.hh
Normal file
@ -0,0 +1,548 @@
|
||||
/*
|
||||
* Copyright (c) 2010 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_IEW_HH__
|
||||
#define __CPU_O3_IEW_HH__
|
||||
|
||||
#include <queue>
|
||||
#include <set>
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/o3/comm.hh"
|
||||
#include "cpu/o3/lsq.hh"
|
||||
#include "cpu/o3/scoreboard.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
#include "debug/IEW.hh"
|
||||
|
||||
struct DerivO3CPUParams;
|
||||
class FUPool;
|
||||
|
||||
/**
|
||||
* DefaultIEW handles both single threaded and SMT IEW
|
||||
* (issue/execute/writeback). It handles the dispatching of
|
||||
* instructions to the LSQ/IQ as part of the issue stage, and has the
|
||||
* IQ try to issue instructions each cycle. The execute latency is
|
||||
* actually tied into the issue latency to allow the IQ to be able to
|
||||
* do back-to-back scheduling without having to speculatively schedule
|
||||
* instructions. This happens by having the IQ have access to the
|
||||
* functional units, and the IQ gets the execution latencies from the
|
||||
* FUs when it issues instructions. Instructions reach the execute
|
||||
* stage on the last cycle of their execution, which is when the IQ
|
||||
* knows to wake up any dependent instructions, allowing back to back
|
||||
* scheduling. The execute portion of IEW separates memory
|
||||
* instructions from non-memory instructions, either telling the LSQ
|
||||
* to execute the instruction, or executing the instruction directly.
|
||||
* The writeback portion of IEW completes the instructions by waking
|
||||
* up any dependents, and marking the register ready on the
|
||||
* scoreboard.
|
||||
*/
|
||||
template<class Impl>
|
||||
class DefaultIEW
|
||||
{
|
||||
private:
|
||||
//Typedefs from Impl
|
||||
typedef typename Impl::CPUPol CPUPol;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
|
||||
typedef typename CPUPol::IQ IQ;
|
||||
typedef typename CPUPol::RenameMap RenameMap;
|
||||
typedef typename CPUPol::LSQ LSQ;
|
||||
|
||||
typedef typename CPUPol::TimeStruct TimeStruct;
|
||||
typedef typename CPUPol::IEWStruct IEWStruct;
|
||||
typedef typename CPUPol::RenameStruct RenameStruct;
|
||||
typedef typename CPUPol::IssueStruct IssueStruct;
|
||||
|
||||
public:
|
||||
/** Overall IEW stage status. Used to determine if the CPU can
|
||||
* deschedule itself due to a lack of activity.
|
||||
*/
|
||||
enum Status {
|
||||
Active,
|
||||
Inactive
|
||||
};
|
||||
|
||||
/** Status for Issue, Execute, and Writeback stages. */
|
||||
enum StageStatus {
|
||||
Running,
|
||||
Blocked,
|
||||
Idle,
|
||||
StartSquash,
|
||||
Squashing,
|
||||
Unblocking
|
||||
};
|
||||
|
||||
private:
|
||||
/** Overall stage status. */
|
||||
Status _status;
|
||||
/** Dispatch status. */
|
||||
StageStatus dispatchStatus[Impl::MaxThreads];
|
||||
/** Execute status. */
|
||||
StageStatus exeStatus;
|
||||
/** Writeback status. */
|
||||
StageStatus wbStatus;
|
||||
|
||||
public:
|
||||
/** Constructs a DefaultIEW with the given parameters. */
|
||||
DefaultIEW(O3CPU *_cpu, DerivO3CPUParams *params);
|
||||
|
||||
/** Returns the name of the DefaultIEW stage. */
|
||||
std::string name() const;
|
||||
|
||||
/** Registers statistics. */
|
||||
void regStats();
|
||||
|
||||
/** Initializes stage; sends back the number of free IQ and LSQ entries. */
|
||||
void initStage();
|
||||
|
||||
/** Sets main time buffer used for backwards communication. */
|
||||
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
||||
|
||||
/** Sets time buffer for getting instructions coming from rename. */
|
||||
void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
|
||||
|
||||
/** Sets time buffer to pass on instructions to commit. */
|
||||
void setIEWQueue(TimeBuffer<IEWStruct> *iq_ptr);
|
||||
|
||||
/** Sets pointer to list of active threads. */
|
||||
void setActiveThreads(std::list<ThreadID> *at_ptr);
|
||||
|
||||
/** Sets pointer to the scoreboard. */
|
||||
void setScoreboard(Scoreboard *sb_ptr);
|
||||
|
||||
/** Drains IEW stage. */
|
||||
bool drain();
|
||||
|
||||
/** Resumes execution after a drain. */
|
||||
void resume();
|
||||
|
||||
/** Completes switch out of IEW stage. */
|
||||
void switchOut();
|
||||
|
||||
/** Takes over from another CPU's thread. */
|
||||
void takeOverFrom();
|
||||
|
||||
/** Returns if IEW is switched out. */
|
||||
bool isSwitchedOut() { return switchedOut; }
|
||||
|
||||
/** Squashes instructions in IEW for a specific thread. */
|
||||
void squash(ThreadID tid);
|
||||
|
||||
/** Wakes all dependents of a completed instruction. */
|
||||
void wakeDependents(DynInstPtr &inst);
|
||||
|
||||
/** Tells memory dependence unit that a memory instruction needs to be
|
||||
* rescheduled. It will re-execute once replayMemInst() is called.
|
||||
*/
|
||||
void rescheduleMemInst(DynInstPtr &inst);
|
||||
|
||||
/** Re-executes all rescheduled memory instructions. */
|
||||
void replayMemInst(DynInstPtr &inst);
|
||||
|
||||
/** Sends an instruction to commit through the time buffer. */
|
||||
void instToCommit(DynInstPtr &inst);
|
||||
|
||||
/** Inserts unused instructions of a thread into the skid buffer. */
|
||||
void skidInsert(ThreadID tid);
|
||||
|
||||
/** Returns the max of the number of entries in all of the skid buffers. */
|
||||
int skidCount();
|
||||
|
||||
/** Returns if all of the skid buffers are empty. */
|
||||
bool skidsEmpty();
|
||||
|
||||
/** Updates overall IEW status based on all of the stages' statuses. */
|
||||
void updateStatus();
|
||||
|
||||
/** Resets entries of the IQ and the LSQ. */
|
||||
void resetEntries();
|
||||
|
||||
/** Tells the CPU to wakeup if it has descheduled itself due to no
|
||||
* activity. Used mainly by the LdWritebackEvent.
|
||||
*/
|
||||
void wakeCPU();
|
||||
|
||||
/** Reports to the CPU that there is activity this cycle. */
|
||||
void activityThisCycle();
|
||||
|
||||
/** Tells CPU that the IEW stage is active and running. */
|
||||
inline void activateStage();
|
||||
|
||||
/** Tells CPU that the IEW stage is inactive and idle. */
|
||||
inline void deactivateStage();
|
||||
|
||||
/** Returns if the LSQ has any stores to writeback. */
|
||||
bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); }
|
||||
|
||||
/** Returns if the LSQ has any stores to writeback. */
|
||||
bool hasStoresToWB(ThreadID tid) { return ldstQueue.hasStoresToWB(tid); }
|
||||
|
||||
void incrWb(InstSeqNum &sn)
|
||||
{
|
||||
if (++wbOutstanding == wbMax)
|
||||
ableToIssue = false;
|
||||
DPRINTF(IEW, "wbOutstanding: %i [sn:%lli]\n", wbOutstanding, sn);
|
||||
assert(wbOutstanding <= wbMax);
|
||||
#ifdef DEBUG
|
||||
wbList.insert(sn);
|
||||
#endif
|
||||
}
|
||||
|
||||
void decrWb(InstSeqNum &sn)
|
||||
{
|
||||
if (wbOutstanding-- == wbMax)
|
||||
ableToIssue = true;
|
||||
DPRINTF(IEW, "wbOutstanding: %i [sn:%lli]\n", wbOutstanding, sn);
|
||||
assert(wbOutstanding >= 0);
|
||||
#ifdef DEBUG
|
||||
assert(wbList.find(sn) != wbList.end());
|
||||
wbList.erase(sn);
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
std::set<InstSeqNum> wbList;
|
||||
|
||||
void dumpWb()
|
||||
{
|
||||
std::set<InstSeqNum>::iterator wb_it = wbList.begin();
|
||||
while (wb_it != wbList.end()) {
|
||||
cprintf("[sn:%lli]\n",
|
||||
(*wb_it));
|
||||
wb_it++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
bool canIssue() { return ableToIssue; }
|
||||
|
||||
bool ableToIssue;
|
||||
|
||||
/** Check misprediction */
|
||||
void checkMisprediction(DynInstPtr &inst);
|
||||
|
||||
private:
|
||||
/** Sends commit proper information for a squash due to a branch
|
||||
* mispredict.
|
||||
*/
|
||||
void squashDueToBranch(DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
/** Sends commit proper information for a squash due to a memory order
|
||||
* violation.
|
||||
*/
|
||||
void squashDueToMemOrder(DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
/** Sends commit proper information for a squash due to memory becoming
|
||||
* blocked (younger issued instructions must be retried).
|
||||
*/
|
||||
void squashDueToMemBlocked(DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
/** Sets Dispatch to blocked, and signals back to other stages to block. */
|
||||
void block(ThreadID tid);
|
||||
|
||||
/** Unblocks Dispatch if the skid buffer is empty, and signals back to
|
||||
* other stages to unblock.
|
||||
*/
|
||||
void unblock(ThreadID tid);
|
||||
|
||||
/** Determines proper actions to take given Dispatch's status. */
|
||||
void dispatch(ThreadID tid);
|
||||
|
||||
/** Dispatches instructions to IQ and LSQ. */
|
||||
void dispatchInsts(ThreadID tid);
|
||||
|
||||
/** Executes instructions. In the case of memory operations, it informs the
|
||||
* LSQ to execute the instructions. Also handles any redirects that occur
|
||||
* due to the executed instructions.
|
||||
*/
|
||||
void executeInsts();
|
||||
|
||||
/** Writebacks instructions. In our model, the instruction's execute()
|
||||
* function atomically reads registers, executes, and writes registers.
|
||||
* Thus this writeback only wakes up dependent instructions, and informs
|
||||
* the scoreboard of registers becoming ready.
|
||||
*/
|
||||
void writebackInsts();
|
||||
|
||||
/** Returns the number of valid, non-squashed instructions coming from
|
||||
* rename to dispatch.
|
||||
*/
|
||||
unsigned validInstsFromRename();
|
||||
|
||||
/** Reads the stall signals. */
|
||||
void readStallSignals(ThreadID tid);
|
||||
|
||||
/** Checks if any of the stall conditions are currently true. */
|
||||
bool checkStall(ThreadID tid);
|
||||
|
||||
/** Processes inputs and changes state accordingly. */
|
||||
void checkSignalsAndUpdate(ThreadID tid);
|
||||
|
||||
/** Removes instructions from rename from a thread's instruction list. */
|
||||
void emptyRenameInsts(ThreadID tid);
|
||||
|
||||
/** Sorts instructions coming from rename into lists separated by thread. */
|
||||
void sortInsts();
|
||||
|
||||
public:
|
||||
/** Ticks IEW stage, causing Dispatch, the IQ, the LSQ, Execute, and
|
||||
* Writeback to run for one cycle.
|
||||
*/
|
||||
void tick();
|
||||
|
||||
private:
|
||||
/** Updates execution stats based on the instruction. */
|
||||
void updateExeInstStats(DynInstPtr &inst);
|
||||
|
||||
/** Pointer to main time buffer used for backwards communication. */
|
||||
TimeBuffer<TimeStruct> *timeBuffer;
|
||||
|
||||
/** Wire to write information heading to previous stages. */
|
||||
typename TimeBuffer<TimeStruct>::wire toFetch;
|
||||
|
||||
/** Wire to get commit's output from backwards time buffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromCommit;
|
||||
|
||||
/** Wire to write information heading to previous stages. */
|
||||
typename TimeBuffer<TimeStruct>::wire toRename;
|
||||
|
||||
/** Rename instruction queue interface. */
|
||||
TimeBuffer<RenameStruct> *renameQueue;
|
||||
|
||||
/** Wire to get rename's output from rename queue. */
|
||||
typename TimeBuffer<RenameStruct>::wire fromRename;
|
||||
|
||||
/** Issue stage queue. */
|
||||
TimeBuffer<IssueStruct> issueToExecQueue;
|
||||
|
||||
/** Wire to read information from the issue stage time queue. */
|
||||
typename TimeBuffer<IssueStruct>::wire fromIssue;
|
||||
|
||||
/**
|
||||
* IEW stage time buffer. Holds ROB indices of instructions that
|
||||
* can be marked as completed.
|
||||
*/
|
||||
TimeBuffer<IEWStruct> *iewQueue;
|
||||
|
||||
/** Wire to write infromation heading to commit. */
|
||||
typename TimeBuffer<IEWStruct>::wire toCommit;
|
||||
|
||||
/** Queue of all instructions coming from rename this cycle. */
|
||||
std::queue<DynInstPtr> insts[Impl::MaxThreads];
|
||||
|
||||
/** Skid buffer between rename and IEW. */
|
||||
std::queue<DynInstPtr> skidBuffer[Impl::MaxThreads];
|
||||
|
||||
/** Scoreboard pointer. */
|
||||
Scoreboard* scoreboard;
|
||||
|
||||
private:
|
||||
/** CPU pointer. */
|
||||
O3CPU *cpu;
|
||||
|
||||
/** Records if IEW has written to the time buffer this cycle, so that the
|
||||
* CPU can deschedule itself if there is no activity.
|
||||
*/
|
||||
bool wroteToTimeBuffer;
|
||||
|
||||
/** Source of possible stalls. */
|
||||
struct Stalls {
|
||||
bool commit;
|
||||
};
|
||||
|
||||
/** Stages that are telling IEW to stall. */
|
||||
Stalls stalls[Impl::MaxThreads];
|
||||
|
||||
/** Debug function to print instructions that are issued this cycle. */
|
||||
void printAvailableInsts();
|
||||
|
||||
public:
|
||||
/** Instruction queue. */
|
||||
IQ instQueue;
|
||||
|
||||
/** Load / store queue. */
|
||||
LSQ ldstQueue;
|
||||
|
||||
/** Pointer to the functional unit pool. */
|
||||
FUPool *fuPool;
|
||||
/** Records if the LSQ needs to be updated on the next cycle, so that
|
||||
* IEW knows if there will be activity on the next cycle.
|
||||
*/
|
||||
bool updateLSQNextCycle;
|
||||
|
||||
private:
|
||||
/** Records if there is a fetch redirect on this cycle for each thread. */
|
||||
bool fetchRedirect[Impl::MaxThreads];
|
||||
|
||||
/** Records if the queues have been changed (inserted or issued insts),
|
||||
* so that IEW knows to broadcast the updated amount of free entries.
|
||||
*/
|
||||
bool updatedQueues;
|
||||
|
||||
/** Commit to IEW delay, in ticks. */
|
||||
unsigned commitToIEWDelay;
|
||||
|
||||
/** Rename to IEW delay, in ticks. */
|
||||
unsigned renameToIEWDelay;
|
||||
|
||||
/**
|
||||
* Issue to execute delay, in ticks. What this actually represents is
|
||||
* the amount of time it takes for an instruction to wake up, be
|
||||
* scheduled, and sent to a FU for execution.
|
||||
*/
|
||||
unsigned issueToExecuteDelay;
|
||||
|
||||
/** Width of dispatch, in instructions. */
|
||||
unsigned dispatchWidth;
|
||||
|
||||
/** Width of issue, in instructions. */
|
||||
unsigned issueWidth;
|
||||
|
||||
/** Index into queue of instructions being written back. */
|
||||
unsigned wbNumInst;
|
||||
|
||||
/** Cycle number within the queue of instructions being written back.
|
||||
* Used in case there are too many instructions writing back at the current
|
||||
* cycle and writesbacks need to be scheduled for the future. See comments
|
||||
* in instToCommit().
|
||||
*/
|
||||
unsigned wbCycle;
|
||||
|
||||
/** Number of instructions in flight that will writeback. */
|
||||
|
||||
/** Number of instructions in flight that will writeback. */
|
||||
int wbOutstanding;
|
||||
|
||||
/** Writeback width. */
|
||||
unsigned wbWidth;
|
||||
|
||||
/** Writeback width * writeback depth, where writeback depth is
|
||||
* the number of cycles of writing back instructions that can be
|
||||
* buffered. */
|
||||
unsigned wbMax;
|
||||
|
||||
/** Number of active threads. */
|
||||
ThreadID numThreads;
|
||||
|
||||
/** Pointer to list of active threads. */
|
||||
std::list<ThreadID> *activeThreads;
|
||||
|
||||
/** Maximum size of the skid buffer. */
|
||||
unsigned skidBufferMax;
|
||||
|
||||
/** Is this stage switched out. */
|
||||
bool switchedOut;
|
||||
|
||||
/** Stat for total number of idle cycles. */
|
||||
Stats::Scalar iewIdleCycles;
|
||||
/** Stat for total number of squashing cycles. */
|
||||
Stats::Scalar iewSquashCycles;
|
||||
/** Stat for total number of blocking cycles. */
|
||||
Stats::Scalar iewBlockCycles;
|
||||
/** Stat for total number of unblocking cycles. */
|
||||
Stats::Scalar iewUnblockCycles;
|
||||
/** Stat for total number of instructions dispatched. */
|
||||
Stats::Scalar iewDispatchedInsts;
|
||||
/** Stat for total number of squashed instructions dispatch skips. */
|
||||
Stats::Scalar iewDispSquashedInsts;
|
||||
/** Stat for total number of dispatched load instructions. */
|
||||
Stats::Scalar iewDispLoadInsts;
|
||||
/** Stat for total number of dispatched store instructions. */
|
||||
Stats::Scalar iewDispStoreInsts;
|
||||
/** Stat for total number of dispatched non speculative instructions. */
|
||||
Stats::Scalar iewDispNonSpecInsts;
|
||||
/** Stat for number of times the IQ becomes full. */
|
||||
Stats::Scalar iewIQFullEvents;
|
||||
/** Stat for number of times the LSQ becomes full. */
|
||||
Stats::Scalar iewLSQFullEvents;
|
||||
/** Stat for total number of memory ordering violation events. */
|
||||
Stats::Scalar memOrderViolationEvents;
|
||||
/** Stat for total number of incorrect predicted taken branches. */
|
||||
Stats::Scalar predictedTakenIncorrect;
|
||||
/** Stat for total number of incorrect predicted not taken branches. */
|
||||
Stats::Scalar predictedNotTakenIncorrect;
|
||||
/** Stat for total number of mispredicted branches detected at execute. */
|
||||
Stats::Formula branchMispredicts;
|
||||
|
||||
/** Stat for total number of executed instructions. */
|
||||
Stats::Scalar iewExecutedInsts;
|
||||
/** Stat for total number of executed load instructions. */
|
||||
Stats::Vector iewExecLoadInsts;
|
||||
/** Stat for total number of executed store instructions. */
|
||||
// Stats::Scalar iewExecStoreInsts;
|
||||
/** Stat for total number of squashed instructions skipped at execute. */
|
||||
Stats::Scalar iewExecSquashedInsts;
|
||||
/** Number of executed software prefetches. */
|
||||
Stats::Vector iewExecutedSwp;
|
||||
/** Number of executed nops. */
|
||||
Stats::Vector iewExecutedNop;
|
||||
/** Number of executed meomory references. */
|
||||
Stats::Vector iewExecutedRefs;
|
||||
/** Number of executed branches. */
|
||||
Stats::Vector iewExecutedBranches;
|
||||
/** Number of executed store instructions. */
|
||||
Stats::Formula iewExecStoreInsts;
|
||||
/** Number of instructions executed per cycle. */
|
||||
Stats::Formula iewExecRate;
|
||||
|
||||
/** Number of instructions sent to commit. */
|
||||
Stats::Vector iewInstsToCommit;
|
||||
/** Number of instructions that writeback. */
|
||||
Stats::Vector writebackCount;
|
||||
/** Number of instructions that wake consumers. */
|
||||
Stats::Vector producerInst;
|
||||
/** Number of instructions that wake up from producers. */
|
||||
Stats::Vector consumerInst;
|
||||
/** Number of instructions that were delayed in writing back due
|
||||
* to resource contention.
|
||||
*/
|
||||
Stats::Vector wbPenalized;
|
||||
/** Number of instructions per cycle written back. */
|
||||
Stats::Formula wbRate;
|
||||
/** Average number of woken instructions per writeback. */
|
||||
Stats::Formula wbFanout;
|
||||
/** Number of instructions per cycle delayed in writing back . */
|
||||
Stats::Formula wbPenalizedRate;
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_IEW_HH__
|
||||
1670
simulators/gem5/src/cpu/o3/iew_impl.hh
Normal file
1670
simulators/gem5/src/cpu/o3/iew_impl.hh
Normal file
File diff suppressed because it is too large
Load Diff
84
simulators/gem5/src/cpu/o3/impl.hh
Normal file
84
simulators/gem5/src/cpu/o3/impl.hh
Normal file
@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_IMPL_HH__
|
||||
#define __CPU_O3_IMPL_HH__
|
||||
|
||||
#include "arch/isa_traits.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/cpu_policy.hh"
|
||||
|
||||
// Forward declarations.
|
||||
template <class Impl>
|
||||
class BaseO3DynInst;
|
||||
|
||||
template <class Impl>
|
||||
class FullO3CPU;
|
||||
|
||||
/** Implementation specific struct that defines several key types to the
|
||||
* CPU, the stages within the CPU, the time buffers, and the DynInst.
|
||||
* The struct defines the ISA, the CPU policy, the specific DynInst, the
|
||||
* specific O3CPU, and all of the structs from the time buffers to do
|
||||
* communication.
|
||||
* This is one of the key things that must be defined for each hardware
|
||||
* specific CPU implementation.
|
||||
*/
|
||||
struct O3CPUImpl
|
||||
{
|
||||
/** The type of MachInst. */
|
||||
typedef TheISA::MachInst MachInst;
|
||||
|
||||
/** The CPU policy to be used, which defines all of the CPU stages. */
|
||||
typedef SimpleCPUPolicy<O3CPUImpl> CPUPol;
|
||||
|
||||
/** The DynInst type to be used. */
|
||||
typedef BaseO3DynInst<O3CPUImpl> DynInst;
|
||||
|
||||
/** The refcounted DynInst pointer to be used. In most cases this is
|
||||
* what should be used, and not DynInst *.
|
||||
*/
|
||||
typedef RefCountingPtr<DynInst> DynInstPtr;
|
||||
|
||||
/** The O3CPU type to be used. */
|
||||
typedef FullO3CPU<O3CPUImpl> O3CPU;
|
||||
|
||||
/** Same typedef, but for CPUType. BaseDynInst may not always use
|
||||
* an O3 CPU, so it's clearer to call it CPUType instead in that
|
||||
* case.
|
||||
*/
|
||||
typedef O3CPU CPUType;
|
||||
|
||||
enum {
|
||||
MaxWidth = 8,
|
||||
MaxThreads = 4
|
||||
};
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_SPARC_IMPL_HH__
|
||||
35
simulators/gem5/src/cpu/o3/inst_queue.cc
Normal file
35
simulators/gem5/src/cpu/o3/inst_queue.cc
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "cpu/o3/inst_queue_impl.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
|
||||
// Force instantiation of InstructionQueue.
|
||||
template class InstructionQueue<O3CPUImpl>;
|
||||
538
simulators/gem5/src/cpu/o3/inst_queue.hh
Normal file
538
simulators/gem5/src/cpu/o3/inst_queue.hh
Normal file
@ -0,0 +1,538 @@
|
||||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved.
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_INST_QUEUE_HH__
|
||||
#define __CPU_O3_INST_QUEUE_HH__
|
||||
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <queue>
|
||||
#include <vector>
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "base/types.hh"
|
||||
#include "cpu/o3/dep_graph.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/op_class.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
#include "sim/eventq.hh"
|
||||
|
||||
struct DerivO3CPUParams;
|
||||
class FUPool;
|
||||
class MemInterface;
|
||||
|
||||
/**
|
||||
* A standard instruction queue class. It holds ready instructions, in
|
||||
* order, in seperate priority queues to facilitate the scheduling of
|
||||
* instructions. The IQ uses a separate linked list to track dependencies.
|
||||
* Similar to the rename map and the free list, it expects that
|
||||
* floating point registers have their indices start after the integer
|
||||
* registers (ie with 96 int and 96 fp registers, regs 0-95 are integer
|
||||
* and 96-191 are fp). This remains true even for both logical and
|
||||
* physical register indices. The IQ depends on the memory dependence unit to
|
||||
* track when memory operations are ready in terms of ordering; register
|
||||
* dependencies are tracked normally. Right now the IQ also handles the
|
||||
* execution timing; this is mainly to allow back-to-back scheduling without
|
||||
* requiring IEW to be able to peek into the IQ. At the end of the execution
|
||||
* latency, the instruction is put into the queue to execute, where it will
|
||||
* have the execute() function called on it.
|
||||
* @todo: Make IQ able to handle multiple FU pools.
|
||||
*/
|
||||
template <class Impl>
|
||||
class InstructionQueue
|
||||
{
|
||||
public:
|
||||
//Typedefs from the Impl.
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
typedef typename Impl::CPUPol::IEW IEW;
|
||||
typedef typename Impl::CPUPol::MemDepUnit MemDepUnit;
|
||||
typedef typename Impl::CPUPol::IssueStruct IssueStruct;
|
||||
typedef typename Impl::CPUPol::TimeStruct TimeStruct;
|
||||
|
||||
// Typedef of iterator through the list of instructions.
|
||||
typedef typename std::list<DynInstPtr>::iterator ListIt;
|
||||
|
||||
/** FU completion event class. */
|
||||
class FUCompletion : public Event {
|
||||
private:
|
||||
/** Executing instruction. */
|
||||
DynInstPtr inst;
|
||||
|
||||
/** Index of the FU used for executing. */
|
||||
int fuIdx;
|
||||
|
||||
/** Pointer back to the instruction queue. */
|
||||
InstructionQueue<Impl> *iqPtr;
|
||||
|
||||
/** Should the FU be added to the list to be freed upon
|
||||
* completing this event.
|
||||
*/
|
||||
bool freeFU;
|
||||
|
||||
public:
|
||||
/** Construct a FU completion event. */
|
||||
FUCompletion(DynInstPtr &_inst, int fu_idx,
|
||||
InstructionQueue<Impl> *iq_ptr);
|
||||
|
||||
virtual void process();
|
||||
virtual const char *description() const;
|
||||
void setFreeFU() { freeFU = true; }
|
||||
};
|
||||
|
||||
/** Constructs an IQ. */
|
||||
InstructionQueue(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params);
|
||||
|
||||
/** Destructs the IQ. */
|
||||
~InstructionQueue();
|
||||
|
||||
/** Returns the name of the IQ. */
|
||||
std::string name() const;
|
||||
|
||||
/** Registers statistics. */
|
||||
void regStats();
|
||||
|
||||
/** Resets all instruction queue state. */
|
||||
void resetState();
|
||||
|
||||
/** Sets active threads list. */
|
||||
void setActiveThreads(std::list<ThreadID> *at_ptr);
|
||||
|
||||
/** Sets the timer buffer between issue and execute. */
|
||||
void setIssueToExecuteQueue(TimeBuffer<IssueStruct> *i2eQueue);
|
||||
|
||||
/** Sets the global time buffer. */
|
||||
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
||||
|
||||
/** Switches out the instruction queue. */
|
||||
void switchOut();
|
||||
|
||||
/** Takes over execution from another CPU's thread. */
|
||||
void takeOverFrom();
|
||||
|
||||
/** Returns if the IQ is switched out. */
|
||||
bool isSwitchedOut() { return switchedOut; }
|
||||
|
||||
/** Number of entries needed for given amount of threads. */
|
||||
int entryAmount(ThreadID num_threads);
|
||||
|
||||
/** Resets max entries for all threads. */
|
||||
void resetEntries();
|
||||
|
||||
/** Returns total number of free entries. */
|
||||
unsigned numFreeEntries();
|
||||
|
||||
/** Returns number of free entries for a thread. */
|
||||
unsigned numFreeEntries(ThreadID tid);
|
||||
|
||||
/** Returns whether or not the IQ is full. */
|
||||
bool isFull();
|
||||
|
||||
/** Returns whether or not the IQ is full for a specific thread. */
|
||||
bool isFull(ThreadID tid);
|
||||
|
||||
/** Returns if there are any ready instructions in the IQ. */
|
||||
bool hasReadyInsts();
|
||||
|
||||
/** Inserts a new instruction into the IQ. */
|
||||
void insert(DynInstPtr &new_inst);
|
||||
|
||||
/** Inserts a new, non-speculative instruction into the IQ. */
|
||||
void insertNonSpec(DynInstPtr &new_inst);
|
||||
|
||||
/** Inserts a memory or write barrier into the IQ to make sure
|
||||
* loads and stores are ordered properly.
|
||||
*/
|
||||
void insertBarrier(DynInstPtr &barr_inst);
|
||||
|
||||
/** Returns the oldest scheduled instruction, and removes it from
|
||||
* the list of instructions waiting to execute.
|
||||
*/
|
||||
DynInstPtr getInstToExecute();
|
||||
|
||||
/** Returns a memory instruction that was referred due to a delayed DTB
|
||||
* translation if it is now ready to execute.
|
||||
*/
|
||||
DynInstPtr getDeferredMemInstToExecute();
|
||||
|
||||
/**
|
||||
* Records the instruction as the producer of a register without
|
||||
* adding it to the rest of the IQ.
|
||||
*/
|
||||
void recordProducer(DynInstPtr &inst)
|
||||
{ addToProducers(inst); }
|
||||
|
||||
/** Process FU completion event. */
|
||||
void processFUCompletion(DynInstPtr &inst, int fu_idx);
|
||||
|
||||
/**
|
||||
* Schedules ready instructions, adding the ready ones (oldest first) to
|
||||
* the queue to execute.
|
||||
*/
|
||||
void scheduleReadyInsts();
|
||||
|
||||
/** Schedules a single specific non-speculative instruction. */
|
||||
void scheduleNonSpec(const InstSeqNum &inst);
|
||||
|
||||
/**
|
||||
* Commits all instructions up to and including the given sequence number,
|
||||
* for a specific thread.
|
||||
*/
|
||||
void commit(const InstSeqNum &inst, ThreadID tid = 0);
|
||||
|
||||
/** Wakes all dependents of a completed instruction. */
|
||||
int wakeDependents(DynInstPtr &completed_inst);
|
||||
|
||||
/** Adds a ready memory instruction to the ready list. */
|
||||
void addReadyMemInst(DynInstPtr &ready_inst);
|
||||
|
||||
/**
|
||||
* Reschedules a memory instruction. It will be ready to issue once
|
||||
* replayMemInst() is called.
|
||||
*/
|
||||
void rescheduleMemInst(DynInstPtr &resched_inst);
|
||||
|
||||
/** Replays a memory instruction. It must be rescheduled first. */
|
||||
void replayMemInst(DynInstPtr &replay_inst);
|
||||
|
||||
/** Completes a memory operation. */
|
||||
void completeMemInst(DynInstPtr &completed_inst);
|
||||
|
||||
/**
|
||||
* Defers a memory instruction when its DTB translation incurs a hw
|
||||
* page table walk.
|
||||
*/
|
||||
void deferMemInst(DynInstPtr &deferred_inst);
|
||||
|
||||
/** Indicates an ordering violation between a store and a load. */
|
||||
void violation(DynInstPtr &store, DynInstPtr &faulting_load);
|
||||
|
||||
/**
|
||||
* Squashes instructions for a thread. Squashing information is obtained
|
||||
* from the time buffer.
|
||||
*/
|
||||
void squash(ThreadID tid);
|
||||
|
||||
/** Returns the number of used entries for a thread. */
|
||||
unsigned getCount(ThreadID tid) { return count[tid]; };
|
||||
|
||||
/** Debug function to print all instructions. */
|
||||
void printInsts();
|
||||
|
||||
private:
|
||||
/** Does the actual squashing. */
|
||||
void doSquash(ThreadID tid);
|
||||
|
||||
/////////////////////////
|
||||
// Various pointers
|
||||
/////////////////////////
|
||||
|
||||
/** Pointer to the CPU. */
|
||||
O3CPU *cpu;
|
||||
|
||||
/** Cache interface. */
|
||||
MemInterface *dcacheInterface;
|
||||
|
||||
/** Pointer to IEW stage. */
|
||||
IEW *iewStage;
|
||||
|
||||
/** The memory dependence unit, which tracks/predicts memory dependences
|
||||
* between instructions.
|
||||
*/
|
||||
MemDepUnit memDepUnit[Impl::MaxThreads];
|
||||
|
||||
/** The queue to the execute stage. Issued instructions will be written
|
||||
* into it.
|
||||
*/
|
||||
TimeBuffer<IssueStruct> *issueToExecuteQueue;
|
||||
|
||||
/** The backwards time buffer. */
|
||||
TimeBuffer<TimeStruct> *timeBuffer;
|
||||
|
||||
/** Wire to read information from timebuffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromCommit;
|
||||
|
||||
/** Function unit pool. */
|
||||
FUPool *fuPool;
|
||||
|
||||
//////////////////////////////////////
|
||||
// Instruction lists, ready queues, and ordering
|
||||
//////////////////////////////////////
|
||||
|
||||
/** List of all the instructions in the IQ (some of which may be issued). */
|
||||
std::list<DynInstPtr> instList[Impl::MaxThreads];
|
||||
|
||||
/** List of instructions that are ready to be executed. */
|
||||
std::list<DynInstPtr> instsToExecute;
|
||||
|
||||
/** List of instructions waiting for their DTB translation to
|
||||
* complete (hw page table walk in progress).
|
||||
*/
|
||||
std::list<DynInstPtr> deferredMemInsts;
|
||||
|
||||
/**
|
||||
* Struct for comparing entries to be added to the priority queue.
|
||||
* This gives reverse ordering to the instructions in terms of
|
||||
* sequence numbers: the instructions with smaller sequence
|
||||
* numbers (and hence are older) will be at the top of the
|
||||
* priority queue.
|
||||
*/
|
||||
struct pqCompare {
|
||||
bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const
|
||||
{
|
||||
return lhs->seqNum > rhs->seqNum;
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::priority_queue<DynInstPtr, std::vector<DynInstPtr>, pqCompare>
|
||||
ReadyInstQueue;
|
||||
|
||||
/** List of ready instructions, per op class. They are separated by op
|
||||
* class to allow for easy mapping to FUs.
|
||||
*/
|
||||
ReadyInstQueue readyInsts[Num_OpClasses];
|
||||
|
||||
/** List of non-speculative instructions that will be scheduled
|
||||
* once the IQ gets a signal from commit. While it's redundant to
|
||||
* have the key be a part of the value (the sequence number is stored
|
||||
* inside of DynInst), when these instructions are woken up only
|
||||
* the sequence number will be available. Thus it is most efficient to be
|
||||
* able to search by the sequence number alone.
|
||||
*/
|
||||
std::map<InstSeqNum, DynInstPtr> nonSpecInsts;
|
||||
|
||||
typedef typename std::map<InstSeqNum, DynInstPtr>::iterator NonSpecMapIt;
|
||||
|
||||
/** Entry for the list age ordering by op class. */
|
||||
struct ListOrderEntry {
|
||||
OpClass queueType;
|
||||
InstSeqNum oldestInst;
|
||||
};
|
||||
|
||||
/** List that contains the age order of the oldest instruction of each
|
||||
* ready queue. Used to select the oldest instruction available
|
||||
* among op classes.
|
||||
* @todo: Might be better to just move these entries around instead
|
||||
* of creating new ones every time the position changes due to an
|
||||
* instruction issuing. Not sure std::list supports this.
|
||||
*/
|
||||
std::list<ListOrderEntry> listOrder;
|
||||
|
||||
typedef typename std::list<ListOrderEntry>::iterator ListOrderIt;
|
||||
|
||||
/** Tracks if each ready queue is on the age order list. */
|
||||
bool queueOnList[Num_OpClasses];
|
||||
|
||||
/** Iterators of each ready queue. Points to their spot in the age order
|
||||
* list.
|
||||
*/
|
||||
ListOrderIt readyIt[Num_OpClasses];
|
||||
|
||||
/** Add an op class to the age order list. */
|
||||
void addToOrderList(OpClass op_class);
|
||||
|
||||
/**
|
||||
* Called when the oldest instruction has been removed from a ready queue;
|
||||
* this places that ready queue into the proper spot in the age order list.
|
||||
*/
|
||||
void moveToYoungerInst(ListOrderIt age_order_it);
|
||||
|
||||
DependencyGraph<DynInstPtr> dependGraph;
|
||||
|
||||
//////////////////////////////////////
|
||||
// Various parameters
|
||||
//////////////////////////////////////
|
||||
|
||||
/** IQ Resource Sharing Policy */
|
||||
enum IQPolicy {
|
||||
Dynamic,
|
||||
Partitioned,
|
||||
Threshold
|
||||
};
|
||||
|
||||
/** IQ sharing policy for SMT. */
|
||||
IQPolicy iqPolicy;
|
||||
|
||||
/** Number of Total Threads*/
|
||||
ThreadID numThreads;
|
||||
|
||||
/** Pointer to list of active threads. */
|
||||
std::list<ThreadID> *activeThreads;
|
||||
|
||||
/** Per Thread IQ count */
|
||||
unsigned count[Impl::MaxThreads];
|
||||
|
||||
/** Max IQ Entries Per Thread */
|
||||
unsigned maxEntries[Impl::MaxThreads];
|
||||
|
||||
/** Number of free IQ entries left. */
|
||||
unsigned freeEntries;
|
||||
|
||||
/** The number of entries in the instruction queue. */
|
||||
unsigned numEntries;
|
||||
|
||||
/** The total number of instructions that can be issued in one cycle. */
|
||||
unsigned totalWidth;
|
||||
|
||||
/** The number of physical registers in the CPU. */
|
||||
unsigned numPhysRegs;
|
||||
|
||||
/** The number of physical integer registers in the CPU. */
|
||||
unsigned numPhysIntRegs;
|
||||
|
||||
/** The number of floating point registers in the CPU. */
|
||||
unsigned numPhysFloatRegs;
|
||||
|
||||
/** Delay between commit stage and the IQ.
|
||||
* @todo: Make there be a distinction between the delays within IEW.
|
||||
*/
|
||||
unsigned commitToIEWDelay;
|
||||
|
||||
/** Is the IQ switched out. */
|
||||
bool switchedOut;
|
||||
|
||||
/** The sequence number of the squashed instruction. */
|
||||
InstSeqNum squashedSeqNum[Impl::MaxThreads];
|
||||
|
||||
/** A cache of the recently woken registers. It is 1 if the register
|
||||
* has been woken up recently, and 0 if the register has been added
|
||||
* to the dependency graph and has not yet received its value. It
|
||||
* is basically a secondary scoreboard, and should pretty much mirror
|
||||
* the scoreboard that exists in the rename map.
|
||||
*/
|
||||
std::vector<bool> regScoreboard;
|
||||
|
||||
/** Adds an instruction to the dependency graph, as a consumer. */
|
||||
bool addToDependents(DynInstPtr &new_inst);
|
||||
|
||||
/** Adds an instruction to the dependency graph, as a producer. */
|
||||
void addToProducers(DynInstPtr &new_inst);
|
||||
|
||||
/** Moves an instruction to the ready queue if it is ready. */
|
||||
void addIfReady(DynInstPtr &inst);
|
||||
|
||||
/** Debugging function to count how many entries are in the IQ. It does
|
||||
* a linear walk through the instructions, so do not call this function
|
||||
* during normal execution.
|
||||
*/
|
||||
int countInsts();
|
||||
|
||||
/** Debugging function to dump all the list sizes, as well as print
|
||||
* out the list of nonspeculative instructions. Should not be used
|
||||
* in any other capacity, but it has no harmful sideaffects.
|
||||
*/
|
||||
void dumpLists();
|
||||
|
||||
/** Debugging function to dump out all instructions that are in the
|
||||
* IQ.
|
||||
*/
|
||||
void dumpInsts();
|
||||
|
||||
/** Stat for number of instructions added. */
|
||||
Stats::Scalar iqInstsAdded;
|
||||
/** Stat for number of non-speculative instructions added. */
|
||||
Stats::Scalar iqNonSpecInstsAdded;
|
||||
|
||||
Stats::Scalar iqInstsIssued;
|
||||
/** Stat for number of integer instructions issued. */
|
||||
Stats::Scalar iqIntInstsIssued;
|
||||
/** Stat for number of floating point instructions issued. */
|
||||
Stats::Scalar iqFloatInstsIssued;
|
||||
/** Stat for number of branch instructions issued. */
|
||||
Stats::Scalar iqBranchInstsIssued;
|
||||
/** Stat for number of memory instructions issued. */
|
||||
Stats::Scalar iqMemInstsIssued;
|
||||
/** Stat for number of miscellaneous instructions issued. */
|
||||
Stats::Scalar iqMiscInstsIssued;
|
||||
/** Stat for number of squashed instructions that were ready to issue. */
|
||||
Stats::Scalar iqSquashedInstsIssued;
|
||||
/** Stat for number of squashed instructions examined when squashing. */
|
||||
Stats::Scalar iqSquashedInstsExamined;
|
||||
/** Stat for number of squashed instruction operands examined when
|
||||
* squashing.
|
||||
*/
|
||||
Stats::Scalar iqSquashedOperandsExamined;
|
||||
/** Stat for number of non-speculative instructions removed due to a squash.
|
||||
*/
|
||||
Stats::Scalar iqSquashedNonSpecRemoved;
|
||||
// Also include number of instructions rescheduled and replayed.
|
||||
|
||||
/** Distribution of number of instructions in the queue.
|
||||
* @todo: Need to create struct to track the entry time for each
|
||||
* instruction. */
|
||||
// Stats::VectorDistribution queueResDist;
|
||||
/** Distribution of the number of instructions issued. */
|
||||
Stats::Distribution numIssuedDist;
|
||||
/** Distribution of the cycles it takes to issue an instruction.
|
||||
* @todo: Need to create struct to track the ready time for each
|
||||
* instruction. */
|
||||
// Stats::VectorDistribution issueDelayDist;
|
||||
|
||||
/** Number of times an instruction could not be issued because a
|
||||
* FU was busy.
|
||||
*/
|
||||
Stats::Vector statFuBusy;
|
||||
// Stats::Vector dist_unissued;
|
||||
/** Stat for total number issued for each instruction type. */
|
||||
Stats::Vector2d statIssuedInstType;
|
||||
|
||||
/** Number of instructions issued per cycle. */
|
||||
Stats::Formula issueRate;
|
||||
|
||||
/** Number of times the FU was busy. */
|
||||
Stats::Vector fuBusy;
|
||||
/** Number of times the FU was busy per instruction issued. */
|
||||
Stats::Formula fuBusyRate;
|
||||
public:
|
||||
Stats::Scalar intInstQueueReads;
|
||||
Stats::Scalar intInstQueueWrites;
|
||||
Stats::Scalar intInstQueueWakeupAccesses;
|
||||
Stats::Scalar fpInstQueueReads;
|
||||
Stats::Scalar fpInstQueueWrites;
|
||||
Stats::Scalar fpInstQueueWakeupQccesses;
|
||||
|
||||
Stats::Scalar intAluAccesses;
|
||||
Stats::Scalar fpAluAccesses;
|
||||
};
|
||||
|
||||
#endif //__CPU_O3_INST_QUEUE_HH__
|
||||
1526
simulators/gem5/src/cpu/o3/inst_queue_impl.hh
Normal file
1526
simulators/gem5/src/cpu/o3/inst_queue_impl.hh
Normal file
File diff suppressed because it is too large
Load Diff
33
simulators/gem5/src/cpu/o3/isa_specific.hh
Executable file
33
simulators/gem5/src/cpu/o3/isa_specific.hh
Executable file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Korey Sewell
|
||||
*/
|
||||
|
||||
#include "cpu/o3/dyn_inst.hh"
|
||||
#include "cpu/o3/impl.hh"
|
||||
#include "cpu/base.hh"
|
||||
36
simulators/gem5/src/cpu/o3/lsq.cc
Normal file
36
simulators/gem5/src/cpu/o3/lsq.cc
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Korey Sewell
|
||||
*/
|
||||
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
#include "cpu/o3/lsq_impl.hh"
|
||||
|
||||
// Force the instantiation of LDSTQ for all the implementations we care about.
|
||||
template class LSQ<O3CPUImpl>;
|
||||
|
||||
359
simulators/gem5/src/cpu/o3/lsq.hh
Normal file
359
simulators/gem5/src/cpu/o3/lsq.hh
Normal file
@ -0,0 +1,359 @@
|
||||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Korey Sewell
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_LSQ_HH__
|
||||
#define __CPU_O3_LSQ_HH__
|
||||
|
||||
#include <map>
|
||||
#include <queue>
|
||||
|
||||
#include "cpu/o3/lsq_unit.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "mem/port.hh"
|
||||
#include "sim/sim_object.hh"
|
||||
|
||||
struct DerivO3CPUParams;
|
||||
|
||||
template <class Impl>
|
||||
class LSQ {
|
||||
public:
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::CPUPol::IEW IEW;
|
||||
typedef typename Impl::CPUPol::LSQUnit LSQUnit;
|
||||
|
||||
/** SMT policy. */
|
||||
enum LSQPolicy {
|
||||
Dynamic,
|
||||
Partitioned,
|
||||
Threshold
|
||||
};
|
||||
|
||||
/** Constructs an LSQ with the given parameters. */
|
||||
LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params);
|
||||
|
||||
/** Returns the name of the LSQ. */
|
||||
std::string name() const;
|
||||
|
||||
/** Registers statistics of each LSQ unit. */
|
||||
void regStats();
|
||||
|
||||
/** Sets the pointer to the list of active threads. */
|
||||
void setActiveThreads(std::list<ThreadID> *at_ptr);
|
||||
/** Switches out the LSQ. */
|
||||
void switchOut();
|
||||
/** Takes over execution from another CPU's thread. */
|
||||
void takeOverFrom();
|
||||
|
||||
/** Number of entries needed for the given amount of threads.*/
|
||||
int entryAmount(ThreadID num_threads);
|
||||
void removeEntries(ThreadID tid);
|
||||
/** Reset the max entries for each thread. */
|
||||
void resetEntries();
|
||||
/** Resize the max entries for a thread. */
|
||||
void resizeEntries(unsigned size, ThreadID tid);
|
||||
|
||||
/** Ticks the LSQ. */
|
||||
void tick();
|
||||
/** Ticks a specific LSQ Unit. */
|
||||
void tick(ThreadID tid)
|
||||
{ thread[tid].tick(); }
|
||||
|
||||
/** Inserts a load into the LSQ. */
|
||||
void insertLoad(DynInstPtr &load_inst);
|
||||
/** Inserts a store into the LSQ. */
|
||||
void insertStore(DynInstPtr &store_inst);
|
||||
|
||||
/** Executes a load. */
|
||||
Fault executeLoad(DynInstPtr &inst);
|
||||
|
||||
/** Executes a store. */
|
||||
Fault executeStore(DynInstPtr &inst);
|
||||
|
||||
/**
|
||||
* Commits loads up until the given sequence number for a specific thread.
|
||||
*/
|
||||
void commitLoads(InstSeqNum &youngest_inst, ThreadID tid)
|
||||
{ thread[tid].commitLoads(youngest_inst); }
|
||||
|
||||
/**
|
||||
* Commits stores up until the given sequence number for a specific thread.
|
||||
*/
|
||||
void commitStores(InstSeqNum &youngest_inst, ThreadID tid)
|
||||
{ thread[tid].commitStores(youngest_inst); }
|
||||
|
||||
/**
|
||||
* Attempts to write back stores until all cache ports are used or the
|
||||
* interface becomes blocked.
|
||||
*/
|
||||
void writebackStores();
|
||||
/** Same as above, but only for one thread. */
|
||||
void writebackStores(ThreadID tid);
|
||||
|
||||
/**
|
||||
* Squash instructions from a thread until the specified sequence number.
|
||||
*/
|
||||
void squash(const InstSeqNum &squashed_num, ThreadID tid)
|
||||
{ thread[tid].squash(squashed_num); }
|
||||
|
||||
/** Returns whether or not there was a memory ordering violation. */
|
||||
bool violation();
|
||||
/**
|
||||
* Returns whether or not there was a memory ordering violation for a
|
||||
* specific thread.
|
||||
*/
|
||||
bool violation(ThreadID tid)
|
||||
{ return thread[tid].violation(); }
|
||||
|
||||
/** Returns if a load is blocked due to the memory system for a specific
|
||||
* thread.
|
||||
*/
|
||||
bool loadBlocked(ThreadID tid)
|
||||
{ return thread[tid].loadBlocked(); }
|
||||
|
||||
bool isLoadBlockedHandled(ThreadID tid)
|
||||
{ return thread[tid].isLoadBlockedHandled(); }
|
||||
|
||||
void setLoadBlockedHandled(ThreadID tid)
|
||||
{ thread[tid].setLoadBlockedHandled(); }
|
||||
|
||||
/** Gets the instruction that caused the memory ordering violation. */
|
||||
DynInstPtr getMemDepViolator(ThreadID tid)
|
||||
{ return thread[tid].getMemDepViolator(); }
|
||||
|
||||
/** Returns the head index of the load queue for a specific thread. */
|
||||
int getLoadHead(ThreadID tid)
|
||||
{ return thread[tid].getLoadHead(); }
|
||||
|
||||
/** Returns the sequence number of the head of the load queue. */
|
||||
InstSeqNum getLoadHeadSeqNum(ThreadID tid)
|
||||
{
|
||||
return thread[tid].getLoadHeadSeqNum();
|
||||
}
|
||||
|
||||
/** Returns the head index of the store queue. */
|
||||
int getStoreHead(ThreadID tid)
|
||||
{ return thread[tid].getStoreHead(); }
|
||||
|
||||
/** Returns the sequence number of the head of the store queue. */
|
||||
InstSeqNum getStoreHeadSeqNum(ThreadID tid)
|
||||
{
|
||||
return thread[tid].getStoreHeadSeqNum();
|
||||
}
|
||||
|
||||
/** Returns the number of instructions in all of the queues. */
|
||||
int getCount();
|
||||
/** Returns the number of instructions in the queues of one thread. */
|
||||
int getCount(ThreadID tid)
|
||||
{ return thread[tid].getCount(); }
|
||||
|
||||
/** Returns the total number of loads in the load queue. */
|
||||
int numLoads();
|
||||
/** Returns the total number of loads for a single thread. */
|
||||
int numLoads(ThreadID tid)
|
||||
{ return thread[tid].numLoads(); }
|
||||
|
||||
/** Returns the total number of stores in the store queue. */
|
||||
int numStores();
|
||||
/** Returns the total number of stores for a single thread. */
|
||||
int numStores(ThreadID tid)
|
||||
{ return thread[tid].numStores(); }
|
||||
|
||||
/** Returns the total number of loads that are ready. */
|
||||
int numLoadsReady();
|
||||
/** Returns the number of loads that are ready for a single thread. */
|
||||
int numLoadsReady(ThreadID tid)
|
||||
{ return thread[tid].numLoadsReady(); }
|
||||
|
||||
/** Returns the number of free entries. */
|
||||
unsigned numFreeEntries();
|
||||
/** Returns the number of free entries for a specific thread. */
|
||||
unsigned numFreeEntries(ThreadID tid);
|
||||
|
||||
/** Returns if the LSQ is full (either LQ or SQ is full). */
|
||||
bool isFull();
|
||||
/**
|
||||
* Returns if the LSQ is full for a specific thread (either LQ or SQ is
|
||||
* full).
|
||||
*/
|
||||
bool isFull(ThreadID tid);
|
||||
|
||||
/** Returns if any of the LQs are full. */
|
||||
bool lqFull();
|
||||
/** Returns if the LQ of a given thread is full. */
|
||||
bool lqFull(ThreadID tid);
|
||||
|
||||
/** Returns if any of the SQs are full. */
|
||||
bool sqFull();
|
||||
/** Returns if the SQ of a given thread is full. */
|
||||
bool sqFull(ThreadID tid);
|
||||
|
||||
/**
|
||||
* Returns if the LSQ is stalled due to a memory operation that must be
|
||||
* replayed.
|
||||
*/
|
||||
bool isStalled();
|
||||
/**
|
||||
* Returns if the LSQ of a specific thread is stalled due to a memory
|
||||
* operation that must be replayed.
|
||||
*/
|
||||
bool isStalled(ThreadID tid);
|
||||
|
||||
/** Returns whether or not there are any stores to write back to memory. */
|
||||
bool hasStoresToWB();
|
||||
|
||||
/** Returns whether or not a specific thread has any stores to write back
|
||||
* to memory.
|
||||
*/
|
||||
bool hasStoresToWB(ThreadID tid)
|
||||
{ return thread[tid].hasStoresToWB(); }
|
||||
|
||||
/** Returns the number of stores a specific thread has to write back. */
|
||||
int numStoresToWB(ThreadID tid)
|
||||
{ return thread[tid].numStoresToWB(); }
|
||||
|
||||
/** Returns if the LSQ will write back to memory this cycle. */
|
||||
bool willWB();
|
||||
/** Returns if the LSQ of a specific thread will write back to memory this
|
||||
* cycle.
|
||||
*/
|
||||
bool willWB(ThreadID tid)
|
||||
{ return thread[tid].willWB(); }
|
||||
|
||||
/** Returns if the cache is currently blocked. */
|
||||
bool cacheBlocked()
|
||||
{ return retryTid != InvalidThreadID; }
|
||||
|
||||
/** Sets the retry thread id, indicating that one of the LSQUnits
|
||||
* tried to access the cache but the cache was blocked. */
|
||||
void setRetryTid(ThreadID tid)
|
||||
{ retryTid = tid; }
|
||||
|
||||
/** Debugging function to print out all instructions. */
|
||||
void dumpInsts();
|
||||
/** Debugging function to print out instructions from a specific thread. */
|
||||
void dumpInsts(ThreadID tid)
|
||||
{ thread[tid].dumpInsts(); }
|
||||
|
||||
/** Executes a read operation, using the load specified at the load
|
||||
* index.
|
||||
*/
|
||||
Fault read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
|
||||
uint8_t *data, int load_idx);
|
||||
|
||||
/** Executes a store operation, using the store specified at the store
|
||||
* index.
|
||||
*/
|
||||
Fault write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
|
||||
uint8_t *data, int store_idx);
|
||||
|
||||
/**
|
||||
* Retry the previous send that failed.
|
||||
*/
|
||||
void recvRetry();
|
||||
|
||||
/**
|
||||
* Handles writing back and completing the load or store that has
|
||||
* returned from memory.
|
||||
*
|
||||
* @param pkt Response packet from the memory sub-system
|
||||
*/
|
||||
bool recvTimingResp(PacketPtr pkt);
|
||||
|
||||
void recvTimingSnoopReq(PacketPtr pkt);
|
||||
|
||||
/** The CPU pointer. */
|
||||
O3CPU *cpu;
|
||||
|
||||
/** The IEW stage pointer. */
|
||||
IEW *iewStage;
|
||||
|
||||
protected:
|
||||
/** The LSQ policy for SMT mode. */
|
||||
LSQPolicy lsqPolicy;
|
||||
|
||||
/** The LSQ units for individual threads. */
|
||||
LSQUnit thread[Impl::MaxThreads];
|
||||
|
||||
/** List of Active Threads in System. */
|
||||
std::list<ThreadID> *activeThreads;
|
||||
|
||||
/** Total Size of LQ Entries. */
|
||||
unsigned LQEntries;
|
||||
/** Total Size of SQ Entries. */
|
||||
unsigned SQEntries;
|
||||
|
||||
/** Max LQ Size - Used to Enforce Sharing Policies. */
|
||||
unsigned maxLQEntries;
|
||||
|
||||
/** Max SQ Size - Used to Enforce Sharing Policies. */
|
||||
unsigned maxSQEntries;
|
||||
|
||||
/** Number of Threads. */
|
||||
ThreadID numThreads;
|
||||
|
||||
/** The thread id of the LSQ Unit that is currently waiting for a
|
||||
* retry. */
|
||||
ThreadID retryTid;
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQ<Impl>::read(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
|
||||
uint8_t *data, int load_idx)
|
||||
{
|
||||
ThreadID tid = req->threadId();
|
||||
|
||||
return thread[tid].read(req, sreqLow, sreqHigh, data, load_idx);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQ<Impl>::write(RequestPtr req, RequestPtr sreqLow, RequestPtr sreqHigh,
|
||||
uint8_t *data, int store_idx)
|
||||
{
|
||||
ThreadID tid = req->threadId();
|
||||
|
||||
return thread[tid].write(req, sreqLow, sreqHigh, data, store_idx);
|
||||
}
|
||||
|
||||
#endif // __CPU_O3_LSQ_HH__
|
||||
608
simulators/gem5/src/cpu/o3/lsq_impl.hh
Normal file
608
simulators/gem5/src/cpu/o3/lsq_impl.hh
Normal file
@ -0,0 +1,608 @@
|
||||
/*
|
||||
* Copyright (c) 2011-2012 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2005-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Korey Sewell
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <list>
|
||||
#include <string>
|
||||
|
||||
#include "cpu/o3/lsq.hh"
|
||||
#include "debug/Fetch.hh"
|
||||
#include "debug/LSQ.hh"
|
||||
#include "debug/Writeback.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Impl>
|
||||
LSQ<Impl>::LSQ(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params)
|
||||
: cpu(cpu_ptr), iewStage(iew_ptr),
|
||||
LQEntries(params->LQEntries),
|
||||
SQEntries(params->SQEntries),
|
||||
numThreads(params->numThreads),
|
||||
retryTid(-1)
|
||||
{
|
||||
//**********************************************/
|
||||
//************ Handle SMT Parameters ***********/
|
||||
//**********************************************/
|
||||
std::string policy = params->smtLSQPolicy;
|
||||
|
||||
//Convert string to lowercase
|
||||
std::transform(policy.begin(), policy.end(), policy.begin(),
|
||||
(int(*)(int)) tolower);
|
||||
|
||||
//Figure out fetch policy
|
||||
if (policy == "dynamic") {
|
||||
lsqPolicy = Dynamic;
|
||||
|
||||
maxLQEntries = LQEntries;
|
||||
maxSQEntries = SQEntries;
|
||||
|
||||
DPRINTF(LSQ, "LSQ sharing policy set to Dynamic\n");
|
||||
} else if (policy == "partitioned") {
|
||||
lsqPolicy = Partitioned;
|
||||
|
||||
//@todo:make work if part_amt doesnt divide evenly.
|
||||
maxLQEntries = LQEntries / numThreads;
|
||||
maxSQEntries = SQEntries / numThreads;
|
||||
|
||||
DPRINTF(Fetch, "LSQ sharing policy set to Partitioned: "
|
||||
"%i entries per LQ | %i entries per SQ\n",
|
||||
maxLQEntries,maxSQEntries);
|
||||
} else if (policy == "threshold") {
|
||||
lsqPolicy = Threshold;
|
||||
|
||||
assert(params->smtLSQThreshold > LQEntries);
|
||||
assert(params->smtLSQThreshold > SQEntries);
|
||||
|
||||
//Divide up by threshold amount
|
||||
//@todo: Should threads check the max and the total
|
||||
//amount of the LSQ
|
||||
maxLQEntries = params->smtLSQThreshold;
|
||||
maxSQEntries = params->smtLSQThreshold;
|
||||
|
||||
DPRINTF(LSQ, "LSQ sharing policy set to Threshold: "
|
||||
"%i entries per LQ | %i entries per SQ\n",
|
||||
maxLQEntries,maxSQEntries);
|
||||
} else {
|
||||
assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic,"
|
||||
"Partitioned, Threshold}");
|
||||
}
|
||||
|
||||
//Initialize LSQs
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
thread[tid].init(cpu, iew_ptr, params, this,
|
||||
maxLQEntries, maxSQEntries, tid);
|
||||
thread[tid].setDcachePort(&cpu_ptr->getDataPort());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
std::string
|
||||
LSQ<Impl>::name() const
|
||||
{
|
||||
return iewStage->name() + ".lsq";
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::regStats()
|
||||
{
|
||||
//Initialize LSQs
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
thread[tid].regStats();
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
|
||||
{
|
||||
activeThreads = at_ptr;
|
||||
assert(activeThreads != 0);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LSQ<Impl>::switchOut()
|
||||
{
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
thread[tid].switchOut();
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LSQ<Impl>::takeOverFrom()
|
||||
{
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
thread[tid].takeOverFrom();
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
int
|
||||
LSQ<Impl>::entryAmount(ThreadID num_threads)
|
||||
{
|
||||
if (lsqPolicy == Partitioned) {
|
||||
return LQEntries / num_threads;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LSQ<Impl>::resetEntries()
|
||||
{
|
||||
if (lsqPolicy != Dynamic || numThreads > 1) {
|
||||
int active_threads = activeThreads->size();
|
||||
|
||||
int maxEntries;
|
||||
|
||||
if (lsqPolicy == Partitioned) {
|
||||
maxEntries = LQEntries / active_threads;
|
||||
} else if (lsqPolicy == Threshold && active_threads == 1) {
|
||||
maxEntries = LQEntries;
|
||||
} else {
|
||||
maxEntries = LQEntries;
|
||||
}
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
resizeEntries(maxEntries, tid);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::removeEntries(ThreadID tid)
|
||||
{
|
||||
thread[tid].clearLQ();
|
||||
thread[tid].clearSQ();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::resizeEntries(unsigned size, ThreadID tid)
|
||||
{
|
||||
thread[tid].resizeLQ(size);
|
||||
thread[tid].resizeSQ(size);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::tick()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
thread[tid].tick();
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::insertLoad(DynInstPtr &load_inst)
|
||||
{
|
||||
ThreadID tid = load_inst->threadNumber;
|
||||
|
||||
thread[tid].insertLoad(load_inst);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::insertStore(DynInstPtr &store_inst)
|
||||
{
|
||||
ThreadID tid = store_inst->threadNumber;
|
||||
|
||||
thread[tid].insertStore(store_inst);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
Fault
|
||||
LSQ<Impl>::executeLoad(DynInstPtr &inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
return thread[tid].executeLoad(inst);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
Fault
|
||||
LSQ<Impl>::executeStore(DynInstPtr &inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
return thread[tid].executeStore(inst);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::writebackStores()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (numStoresToWB(tid) > 0) {
|
||||
DPRINTF(Writeback,"[tid:%i] Writing back stores. %i stores "
|
||||
"available for Writeback.\n", tid, numStoresToWB(tid));
|
||||
}
|
||||
|
||||
thread[tid].writebackStores();
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::violation()
|
||||
{
|
||||
/* Answers: Does Anybody Have a Violation?*/
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (thread[tid].violation())
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LSQ<Impl>::recvRetry()
|
||||
{
|
||||
if (retryTid == InvalidThreadID)
|
||||
{
|
||||
//Squashed, so drop it
|
||||
return;
|
||||
}
|
||||
int curr_retry_tid = retryTid;
|
||||
// Speculatively clear the retry Tid. This will get set again if
|
||||
// the LSQUnit was unable to complete its access.
|
||||
retryTid = -1;
|
||||
thread[curr_retry_tid].recvRetry();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
LSQ<Impl>::recvTimingResp(PacketPtr pkt)
|
||||
{
|
||||
if (pkt->isError())
|
||||
DPRINTF(LSQ, "Got error packet back for address: %#X\n",
|
||||
pkt->getAddr());
|
||||
thread[pkt->req->threadId()].completeDataAccess(pkt);
|
||||
return true;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
LSQ<Impl>::recvTimingSnoopReq(PacketPtr pkt)
|
||||
{
|
||||
DPRINTF(LSQ, "received pkt for addr:%#x %s\n", pkt->getAddr(),
|
||||
pkt->cmdString());
|
||||
|
||||
// must be a snoop
|
||||
if (pkt->isInvalidate()) {
|
||||
DPRINTF(LSQ, "received invalidation for addr:%#x\n",
|
||||
pkt->getAddr());
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
thread[tid].checkSnoop(pkt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
int
|
||||
LSQ<Impl>::getCount()
|
||||
{
|
||||
unsigned total = 0;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
total += getCount(tid);
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
int
|
||||
LSQ<Impl>::numLoads()
|
||||
{
|
||||
unsigned total = 0;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
total += numLoads(tid);
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
int
|
||||
LSQ<Impl>::numStores()
|
||||
{
|
||||
unsigned total = 0;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
total += thread[tid].numStores();
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
int
|
||||
LSQ<Impl>::numLoadsReady()
|
||||
{
|
||||
unsigned total = 0;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
total += thread[tid].numLoadsReady();
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
unsigned
|
||||
LSQ<Impl>::numFreeEntries()
|
||||
{
|
||||
unsigned total = 0;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
total += thread[tid].numFreeEntries();
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
unsigned
|
||||
LSQ<Impl>::numFreeEntries(ThreadID tid)
|
||||
{
|
||||
//if (lsqPolicy == Dynamic)
|
||||
//return numFreeEntries();
|
||||
//else
|
||||
return thread[tid].numFreeEntries();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::isFull()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (!(thread[tid].lqFull() || thread[tid].sqFull()))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::isFull(ThreadID tid)
|
||||
{
|
||||
//@todo: Change to Calculate All Entries for
|
||||
//Dynamic Policy
|
||||
if (lsqPolicy == Dynamic)
|
||||
return isFull();
|
||||
else
|
||||
return thread[tid].lqFull() || thread[tid].sqFull();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::lqFull()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (!thread[tid].lqFull())
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::lqFull(ThreadID tid)
|
||||
{
|
||||
//@todo: Change to Calculate All Entries for
|
||||
//Dynamic Policy
|
||||
if (lsqPolicy == Dynamic)
|
||||
return lqFull();
|
||||
else
|
||||
return thread[tid].lqFull();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::sqFull()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (!sqFull(tid))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::sqFull(ThreadID tid)
|
||||
{
|
||||
//@todo: Change to Calculate All Entries for
|
||||
//Dynamic Policy
|
||||
if (lsqPolicy == Dynamic)
|
||||
return sqFull();
|
||||
else
|
||||
return thread[tid].sqFull();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::isStalled()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (!thread[tid].isStalled())
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::isStalled(ThreadID tid)
|
||||
{
|
||||
if (lsqPolicy == Dynamic)
|
||||
return isStalled();
|
||||
else
|
||||
return thread[tid].isStalled();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::hasStoresToWB()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (hasStoresToWB(tid))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
bool
|
||||
LSQ<Impl>::willWB()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (willWB(tid))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void
|
||||
LSQ<Impl>::dumpInsts()
|
||||
{
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
thread[tid].dumpInsts();
|
||||
}
|
||||
}
|
||||
37
simulators/gem5/src/cpu/o3/lsq_unit.cc
Normal file
37
simulators/gem5/src/cpu/o3/lsq_unit.cc
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
* Korey Sewell
|
||||
*/
|
||||
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
#include "cpu/o3/lsq_unit_impl.hh"
|
||||
|
||||
// Force the instantiation of LDSTQ for all the implementations we care about.
|
||||
template class LSQUnit<O3CPUImpl>;
|
||||
|
||||
914
simulators/gem5/src/cpu/o3/lsq_unit.hh
Normal file
914
simulators/gem5/src/cpu/o3/lsq_unit.hh
Normal file
@ -0,0 +1,914 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
* Korey Sewell
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_LSQ_UNIT_HH__
|
||||
#define __CPU_O3_LSQ_UNIT_HH__
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <map>
|
||||
#include <queue>
|
||||
|
||||
#include "arch/generic/debugfaults.hh"
|
||||
#include "arch/isa_traits.hh"
|
||||
#include "arch/locked_mem.hh"
|
||||
#include "arch/mmapped_ipr.hh"
|
||||
#include "base/hashmap.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
#include "debug/LSQUnit.hh"
|
||||
#include "mem/packet.hh"
|
||||
#include "mem/port.hh"
|
||||
#include "sim/fault_fwd.hh"
|
||||
|
||||
struct DerivO3CPUParams;
|
||||
|
||||
/**
|
||||
* Class that implements the actual LQ and SQ for each specific
|
||||
* thread. Both are circular queues; load entries are freed upon
|
||||
* committing, while store entries are freed once they writeback. The
|
||||
* LSQUnit tracks if there are memory ordering violations, and also
|
||||
* detects partial load to store forwarding cases (a store only has
|
||||
* part of a load's data) that requires the load to wait until the
|
||||
* store writes back. In the former case it holds onto the instruction
|
||||
* until the dependence unit looks at it, and in the latter it stalls
|
||||
* the LSQ until the store writes back. At that point the load is
|
||||
* replayed.
|
||||
*/
|
||||
template <class Impl>
|
||||
class LSQUnit {
|
||||
public:
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::CPUPol::IEW IEW;
|
||||
typedef typename Impl::CPUPol::LSQ LSQ;
|
||||
typedef typename Impl::CPUPol::IssueStruct IssueStruct;
|
||||
|
||||
public:
|
||||
/** Constructs an LSQ unit. init() must be called prior to use. */
|
||||
LSQUnit();
|
||||
|
||||
/** Initializes the LSQ unit with the specified number of entries. */
|
||||
void init(O3CPU *cpu_ptr, IEW *iew_ptr, DerivO3CPUParams *params,
|
||||
LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries,
|
||||
unsigned id);
|
||||
|
||||
/** Returns the name of the LSQ unit. */
|
||||
std::string name() const;
|
||||
|
||||
/** Registers statistics. */
|
||||
void regStats();
|
||||
|
||||
/** Sets the pointer to the dcache port. */
|
||||
void setDcachePort(MasterPort *dcache_port);
|
||||
|
||||
/** Switches out LSQ unit. */
|
||||
void switchOut();
|
||||
|
||||
/** Takes over from another CPU's thread. */
|
||||
void takeOverFrom();
|
||||
|
||||
/** Returns if the LSQ is switched out. */
|
||||
bool isSwitchedOut() { return switchedOut; }
|
||||
|
||||
/** Ticks the LSQ unit, which in this case only resets the number of
|
||||
* used cache ports.
|
||||
* @todo: Move the number of used ports up to the LSQ level so it can
|
||||
* be shared by all LSQ units.
|
||||
*/
|
||||
void tick() { usedPorts = 0; }
|
||||
|
||||
/** Inserts an instruction. */
|
||||
void insert(DynInstPtr &inst);
|
||||
/** Inserts a load instruction. */
|
||||
void insertLoad(DynInstPtr &load_inst);
|
||||
/** Inserts a store instruction. */
|
||||
void insertStore(DynInstPtr &store_inst);
|
||||
|
||||
/** Check for ordering violations in the LSQ. For a store squash if we
|
||||
* ever find a conflicting load. For a load, only squash if we
|
||||
* an external snoop invalidate has been seen for that load address
|
||||
* @param load_idx index to start checking at
|
||||
* @param inst the instruction to check
|
||||
*/
|
||||
Fault checkViolations(int load_idx, DynInstPtr &inst);
|
||||
|
||||
/** Check if an incoming invalidate hits in the lsq on a load
|
||||
* that might have issued out of order wrt another load beacuse
|
||||
* of the intermediate invalidate.
|
||||
*/
|
||||
void checkSnoop(PacketPtr pkt);
|
||||
|
||||
/** Executes a load instruction. */
|
||||
Fault executeLoad(DynInstPtr &inst);
|
||||
|
||||
Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; }
|
||||
/** Executes a store instruction. */
|
||||
Fault executeStore(DynInstPtr &inst);
|
||||
|
||||
/** Commits the head load. */
|
||||
void commitLoad();
|
||||
/** Commits loads older than a specific sequence number. */
|
||||
void commitLoads(InstSeqNum &youngest_inst);
|
||||
|
||||
/** Commits stores older than a specific sequence number. */
|
||||
void commitStores(InstSeqNum &youngest_inst);
|
||||
|
||||
/** Writes back stores. */
|
||||
void writebackStores();
|
||||
|
||||
/** Completes the data access that has been returned from the
|
||||
* memory system. */
|
||||
void completeDataAccess(PacketPtr pkt);
|
||||
|
||||
/** Clears all the entries in the LQ. */
|
||||
void clearLQ();
|
||||
|
||||
/** Clears all the entries in the SQ. */
|
||||
void clearSQ();
|
||||
|
||||
/** Resizes the LQ to a given size. */
|
||||
void resizeLQ(unsigned size);
|
||||
|
||||
/** Resizes the SQ to a given size. */
|
||||
void resizeSQ(unsigned size);
|
||||
|
||||
/** Squashes all instructions younger than a specific sequence number. */
|
||||
void squash(const InstSeqNum &squashed_num);
|
||||
|
||||
/** Returns if there is a memory ordering violation. Value is reset upon
|
||||
* call to getMemDepViolator().
|
||||
*/
|
||||
bool violation() { return memDepViolator; }
|
||||
|
||||
/** Returns the memory ordering violator. */
|
||||
DynInstPtr getMemDepViolator();
|
||||
|
||||
/** Returns if a load became blocked due to the memory system. */
|
||||
bool loadBlocked()
|
||||
{ return isLoadBlocked; }
|
||||
|
||||
/** Clears the signal that a load became blocked. */
|
||||
void clearLoadBlocked()
|
||||
{ isLoadBlocked = false; }
|
||||
|
||||
/** Returns if the blocked load was handled. */
|
||||
bool isLoadBlockedHandled()
|
||||
{ return loadBlockedHandled; }
|
||||
|
||||
/** Records the blocked load as being handled. */
|
||||
void setLoadBlockedHandled()
|
||||
{ loadBlockedHandled = true; }
|
||||
|
||||
/** Returns the number of free entries (min of free LQ and SQ entries). */
|
||||
unsigned numFreeEntries();
|
||||
|
||||
/** Returns the number of loads ready to execute. */
|
||||
int numLoadsReady();
|
||||
|
||||
/** Returns the number of loads in the LQ. */
|
||||
int numLoads() { return loads; }
|
||||
|
||||
/** Returns the number of stores in the SQ. */
|
||||
int numStores() { return stores; }
|
||||
|
||||
/** Returns if either the LQ or SQ is full. */
|
||||
bool isFull() { return lqFull() || sqFull(); }
|
||||
|
||||
/** Returns if the LQ is full. */
|
||||
bool lqFull() { return loads >= (LQEntries - 1); }
|
||||
|
||||
/** Returns if the SQ is full. */
|
||||
bool sqFull() { return stores >= (SQEntries - 1); }
|
||||
|
||||
/** Returns the number of instructions in the LSQ. */
|
||||
unsigned getCount() { return loads + stores; }
|
||||
|
||||
/** Returns if there are any stores to writeback. */
|
||||
bool hasStoresToWB() { return storesToWB; }
|
||||
|
||||
/** Returns the number of stores to writeback. */
|
||||
int numStoresToWB() { return storesToWB; }
|
||||
|
||||
/** Returns if the LSQ unit will writeback on this cycle. */
|
||||
bool willWB() { return storeQueue[storeWBIdx].canWB &&
|
||||
!storeQueue[storeWBIdx].completed &&
|
||||
!isStoreBlocked; }
|
||||
|
||||
/** Handles doing the retry. */
|
||||
void recvRetry();
|
||||
|
||||
private:
|
||||
/** Writes back the instruction, sending it to IEW. */
|
||||
void writeback(DynInstPtr &inst, PacketPtr pkt);
|
||||
|
||||
/** Writes back a store that couldn't be completed the previous cycle. */
|
||||
void writebackPendingStore();
|
||||
|
||||
/** Handles completing the send of a store to memory. */
|
||||
void storePostSend(PacketPtr pkt);
|
||||
|
||||
/** Completes the store at the specified index. */
|
||||
void completeStore(int store_idx);
|
||||
|
||||
/** Attempts to send a store to the cache. */
|
||||
bool sendStore(PacketPtr data_pkt);
|
||||
|
||||
/** Increments the given store index (circular queue). */
|
||||
inline void incrStIdx(int &store_idx);
|
||||
/** Decrements the given store index (circular queue). */
|
||||
inline void decrStIdx(int &store_idx);
|
||||
/** Increments the given load index (circular queue). */
|
||||
inline void incrLdIdx(int &load_idx);
|
||||
/** Decrements the given load index (circular queue). */
|
||||
inline void decrLdIdx(int &load_idx);
|
||||
|
||||
public:
|
||||
/** Debugging function to dump instructions in the LSQ. */
|
||||
void dumpInsts();
|
||||
|
||||
private:
|
||||
/** Pointer to the CPU. */
|
||||
O3CPU *cpu;
|
||||
|
||||
/** Pointer to the IEW stage. */
|
||||
IEW *iewStage;
|
||||
|
||||
/** Pointer to the LSQ. */
|
||||
LSQ *lsq;
|
||||
|
||||
/** Pointer to the dcache port. Used only for sending. */
|
||||
MasterPort *dcachePort;
|
||||
|
||||
/** Derived class to hold any sender state the LSQ needs. */
|
||||
class LSQSenderState : public Packet::SenderState
|
||||
{
|
||||
public:
|
||||
/** Default constructor. */
|
||||
LSQSenderState()
|
||||
: mainPkt(NULL), pendingPacket(NULL), outstanding(1),
|
||||
noWB(false), isSplit(false), pktToSend(false)
|
||||
{ }
|
||||
|
||||
/** Instruction who initiated the access to memory. */
|
||||
DynInstPtr inst;
|
||||
/** The main packet from a split load, used during writeback. */
|
||||
PacketPtr mainPkt;
|
||||
/** A second packet from a split store that needs sending. */
|
||||
PacketPtr pendingPacket;
|
||||
/** The LQ/SQ index of the instruction. */
|
||||
uint8_t idx;
|
||||
/** Number of outstanding packets to complete. */
|
||||
uint8_t outstanding;
|
||||
/** Whether or not it is a load. */
|
||||
bool isLoad;
|
||||
/** Whether or not the instruction will need to writeback. */
|
||||
bool noWB;
|
||||
/** Whether or not this access is split in two. */
|
||||
bool isSplit;
|
||||
/** Whether or not there is a packet that needs sending. */
|
||||
bool pktToSend;
|
||||
|
||||
/** Completes a packet and returns whether the access is finished. */
|
||||
inline bool complete() { return --outstanding == 0; }
|
||||
};
|
||||
|
||||
/** Writeback event, specifically for when stores forward data to loads. */
|
||||
class WritebackEvent : public Event {
|
||||
public:
|
||||
/** Constructs a writeback event. */
|
||||
WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr);
|
||||
|
||||
/** Processes the writeback event. */
|
||||
void process();
|
||||
|
||||
/** Returns the description of this event. */
|
||||
const char *description() const;
|
||||
|
||||
private:
|
||||
/** Instruction whose results are being written back. */
|
||||
DynInstPtr inst;
|
||||
|
||||
/** The packet that would have been sent to memory. */
|
||||
PacketPtr pkt;
|
||||
|
||||
/** The pointer to the LSQ unit that issued the store. */
|
||||
LSQUnit<Impl> *lsqPtr;
|
||||
};
|
||||
|
||||
public:
|
||||
struct SQEntry {
|
||||
/** Constructs an empty store queue entry. */
|
||||
SQEntry()
|
||||
: inst(NULL), req(NULL), size(0),
|
||||
canWB(0), committed(0), completed(0)
|
||||
{
|
||||
std::memset(data, 0, sizeof(data));
|
||||
}
|
||||
|
||||
/** Constructs a store queue entry for a given instruction. */
|
||||
SQEntry(DynInstPtr &_inst)
|
||||
: inst(_inst), req(NULL), sreqLow(NULL), sreqHigh(NULL), size(0),
|
||||
isSplit(0), canWB(0), committed(0), completed(0)
|
||||
{
|
||||
std::memset(data, 0, sizeof(data));
|
||||
}
|
||||
/** The store data. */
|
||||
char data[16];
|
||||
/** The store instruction. */
|
||||
DynInstPtr inst;
|
||||
/** The request for the store. */
|
||||
RequestPtr req;
|
||||
/** The split requests for the store. */
|
||||
RequestPtr sreqLow;
|
||||
RequestPtr sreqHigh;
|
||||
/** The size of the store. */
|
||||
uint8_t size;
|
||||
/** Whether or not the store is split into two requests. */
|
||||
bool isSplit;
|
||||
/** Whether or not the store can writeback. */
|
||||
bool canWB;
|
||||
/** Whether or not the store is committed. */
|
||||
bool committed;
|
||||
/** Whether or not the store is completed. */
|
||||
bool completed;
|
||||
};
|
||||
|
||||
private:
|
||||
/** The LSQUnit thread id. */
|
||||
ThreadID lsqID;
|
||||
|
||||
/** The store queue. */
|
||||
std::vector<SQEntry> storeQueue;
|
||||
|
||||
/** The load queue. */
|
||||
std::vector<DynInstPtr> loadQueue;
|
||||
|
||||
/** The number of LQ entries, plus a sentinel entry (circular queue).
|
||||
* @todo: Consider having var that records the true number of LQ entries.
|
||||
*/
|
||||
unsigned LQEntries;
|
||||
/** The number of SQ entries, plus a sentinel entry (circular queue).
|
||||
* @todo: Consider having var that records the true number of SQ entries.
|
||||
*/
|
||||
unsigned SQEntries;
|
||||
|
||||
/** The number of places to shift addresses in the LSQ before checking
|
||||
* for dependency violations
|
||||
*/
|
||||
unsigned depCheckShift;
|
||||
|
||||
/** Should loads be checked for dependency issues */
|
||||
bool checkLoads;
|
||||
|
||||
/** The number of load instructions in the LQ. */
|
||||
int loads;
|
||||
/** The number of store instructions in the SQ. */
|
||||
int stores;
|
||||
/** The number of store instructions in the SQ waiting to writeback. */
|
||||
int storesToWB;
|
||||
|
||||
/** The index of the head instruction in the LQ. */
|
||||
int loadHead;
|
||||
/** The index of the tail instruction in the LQ. */
|
||||
int loadTail;
|
||||
|
||||
/** The index of the head instruction in the SQ. */
|
||||
int storeHead;
|
||||
/** The index of the first instruction that may be ready to be
|
||||
* written back, and has not yet been written back.
|
||||
*/
|
||||
int storeWBIdx;
|
||||
/** The index of the tail instruction in the SQ. */
|
||||
int storeTail;
|
||||
|
||||
/// @todo Consider moving to a more advanced model with write vs read ports
|
||||
/** The number of cache ports available each cycle. */
|
||||
int cachePorts;
|
||||
|
||||
/** The number of used cache ports in this cycle. */
|
||||
int usedPorts;
|
||||
|
||||
/** Is the LSQ switched out. */
|
||||
bool switchedOut;
|
||||
|
||||
//list<InstSeqNum> mshrSeqNums;
|
||||
|
||||
/** Address Mask for a cache block (e.g. ~(cache_block_size-1)) */
|
||||
Addr cacheBlockMask;
|
||||
|
||||
/** Wire to read information from the issue stage time queue. */
|
||||
typename TimeBuffer<IssueStruct>::wire fromIssue;
|
||||
|
||||
/** Whether or not the LSQ is stalled. */
|
||||
bool stalled;
|
||||
/** The store that causes the stall due to partial store to load
|
||||
* forwarding.
|
||||
*/
|
||||
InstSeqNum stallingStoreIsn;
|
||||
/** The index of the above store. */
|
||||
int stallingLoadIdx;
|
||||
|
||||
/** The packet that needs to be retried. */
|
||||
PacketPtr retryPkt;
|
||||
|
||||
/** Whehter or not a store is blocked due to the memory system. */
|
||||
bool isStoreBlocked;
|
||||
|
||||
/** Whether or not a load is blocked due to the memory system. */
|
||||
bool isLoadBlocked;
|
||||
|
||||
/** Has the blocked load been handled. */
|
||||
bool loadBlockedHandled;
|
||||
|
||||
/** Whether or not a store is in flight. */
|
||||
bool storeInFlight;
|
||||
|
||||
/** The sequence number of the blocked load. */
|
||||
InstSeqNum blockedLoadSeqNum;
|
||||
|
||||
/** The oldest load that caused a memory ordering violation. */
|
||||
DynInstPtr memDepViolator;
|
||||
|
||||
/** Whether or not there is a packet that couldn't be sent because of
|
||||
* a lack of cache ports. */
|
||||
bool hasPendingPkt;
|
||||
|
||||
/** The packet that is pending free cache ports. */
|
||||
PacketPtr pendingPkt;
|
||||
|
||||
/** Flag for memory model. */
|
||||
bool needsTSO;
|
||||
|
||||
// Will also need how many read/write ports the Dcache has. Or keep track
|
||||
// of that in stage that is one level up, and only call executeLoad/Store
|
||||
// the appropriate number of times.
|
||||
/** Total number of loads forwaded from LSQ stores. */
|
||||
Stats::Scalar lsqForwLoads;
|
||||
|
||||
/** Total number of loads ignored due to invalid addresses. */
|
||||
Stats::Scalar invAddrLoads;
|
||||
|
||||
/** Total number of squashed loads. */
|
||||
Stats::Scalar lsqSquashedLoads;
|
||||
|
||||
/** Total number of responses from the memory system that are
|
||||
* ignored due to the instruction already being squashed. */
|
||||
Stats::Scalar lsqIgnoredResponses;
|
||||
|
||||
/** Tota number of memory ordering violations. */
|
||||
Stats::Scalar lsqMemOrderViolation;
|
||||
|
||||
/** Total number of squashed stores. */
|
||||
Stats::Scalar lsqSquashedStores;
|
||||
|
||||
/** Total number of software prefetches ignored due to invalid addresses. */
|
||||
Stats::Scalar invAddrSwpfs;
|
||||
|
||||
/** Ready loads blocked due to partial store-forwarding. */
|
||||
Stats::Scalar lsqBlockedLoads;
|
||||
|
||||
/** Number of loads that were rescheduled. */
|
||||
Stats::Scalar lsqRescheduledLoads;
|
||||
|
||||
/** Number of times the LSQ is blocked due to the cache. */
|
||||
Stats::Scalar lsqCacheBlocked;
|
||||
|
||||
public:
|
||||
/** Executes the load at the given index. */
|
||||
Fault read(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||
uint8_t *data, int load_idx);
|
||||
|
||||
/** Executes the store at the given index. */
|
||||
Fault write(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||
uint8_t *data, int store_idx);
|
||||
|
||||
/** Returns the index of the head load instruction. */
|
||||
int getLoadHead() { return loadHead; }
|
||||
/** Returns the sequence number of the head load instruction. */
|
||||
InstSeqNum getLoadHeadSeqNum()
|
||||
{
|
||||
if (loadQueue[loadHead]) {
|
||||
return loadQueue[loadHead]->seqNum;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Returns the index of the head store instruction. */
|
||||
int getStoreHead() { return storeHead; }
|
||||
/** Returns the sequence number of the head store instruction. */
|
||||
InstSeqNum getStoreHeadSeqNum()
|
||||
{
|
||||
if (storeQueue[storeHead].inst) {
|
||||
return storeQueue[storeHead].inst->seqNum;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Returns whether or not the LSQ unit is stalled. */
|
||||
bool isStalled() { return stalled; }
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::read(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||
uint8_t *data, int load_idx)
|
||||
{
|
||||
DynInstPtr load_inst = loadQueue[load_idx];
|
||||
|
||||
assert(load_inst);
|
||||
|
||||
assert(!load_inst->isExecuted());
|
||||
|
||||
// Make sure this isn't an uncacheable access
|
||||
// A bit of a hackish way to get uncached accesses to work only if they're
|
||||
// at the head of the LSQ and are ready to commit (at the head of the ROB
|
||||
// too).
|
||||
if (req->isUncacheable() &&
|
||||
(load_idx != loadHead || !load_inst->isAtCommit())) {
|
||||
iewStage->rescheduleMemInst(load_inst);
|
||||
++lsqRescheduledLoads;
|
||||
DPRINTF(LSQUnit, "Uncachable load [sn:%lli] PC %s\n",
|
||||
load_inst->seqNum, load_inst->pcState());
|
||||
|
||||
// Must delete request now that it wasn't handed off to
|
||||
// memory. This is quite ugly. @todo: Figure out the proper
|
||||
// place to really handle request deletes.
|
||||
delete req;
|
||||
if (TheISA::HasUnalignedMemAcc && sreqLow) {
|
||||
delete sreqLow;
|
||||
delete sreqHigh;
|
||||
}
|
||||
return new GenericISA::M5PanicFault(
|
||||
"Uncachable load [sn:%llx] PC %s\n",
|
||||
load_inst->seqNum, load_inst->pcState());
|
||||
}
|
||||
|
||||
// Check the SQ for any previous stores that might lead to forwarding
|
||||
int store_idx = load_inst->sqIdx;
|
||||
|
||||
int store_size = 0;
|
||||
|
||||
DPRINTF(LSQUnit, "Read called, load idx: %i, store idx: %i, "
|
||||
"storeHead: %i addr: %#x%s\n",
|
||||
load_idx, store_idx, storeHead, req->getPaddr(),
|
||||
sreqLow ? " split" : "");
|
||||
|
||||
if (req->isLLSC()) {
|
||||
assert(!sreqLow);
|
||||
// Disable recording the result temporarily. Writing to misc
|
||||
// regs normally updates the result, but this is not the
|
||||
// desired behavior when handling store conditionals.
|
||||
load_inst->recordResult(false);
|
||||
TheISA::handleLockedRead(load_inst.get(), req);
|
||||
load_inst->recordResult(true);
|
||||
}
|
||||
|
||||
if (req->isMmappedIpr()) {
|
||||
assert(!load_inst->memData);
|
||||
load_inst->memData = new uint8_t[64];
|
||||
|
||||
ThreadContext *thread = cpu->tcBase(lsqID);
|
||||
Tick delay;
|
||||
PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
|
||||
|
||||
if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
|
||||
data_pkt->dataStatic(load_inst->memData);
|
||||
delay = TheISA::handleIprRead(thread, data_pkt);
|
||||
} else {
|
||||
assert(sreqLow->isMmappedIpr() && sreqHigh->isMmappedIpr());
|
||||
PacketPtr fst_data_pkt = new Packet(sreqLow, MemCmd::ReadReq);
|
||||
PacketPtr snd_data_pkt = new Packet(sreqHigh, MemCmd::ReadReq);
|
||||
|
||||
fst_data_pkt->dataStatic(load_inst->memData);
|
||||
snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
|
||||
|
||||
delay = TheISA::handleIprRead(thread, fst_data_pkt);
|
||||
unsigned delay2 = TheISA::handleIprRead(thread, snd_data_pkt);
|
||||
if (delay2 > delay)
|
||||
delay = delay2;
|
||||
|
||||
delete sreqLow;
|
||||
delete sreqHigh;
|
||||
delete fst_data_pkt;
|
||||
delete snd_data_pkt;
|
||||
}
|
||||
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
|
||||
cpu->schedule(wb, curTick() + delay);
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
while (store_idx != -1) {
|
||||
// End once we've reached the top of the LSQ
|
||||
if (store_idx == storeWBIdx) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Move the index to one younger
|
||||
if (--store_idx < 0)
|
||||
store_idx += SQEntries;
|
||||
|
||||
assert(storeQueue[store_idx].inst);
|
||||
|
||||
store_size = storeQueue[store_idx].size;
|
||||
|
||||
if (store_size == 0)
|
||||
continue;
|
||||
else if (storeQueue[store_idx].inst->uncacheable())
|
||||
continue;
|
||||
|
||||
assert(storeQueue[store_idx].inst->effAddrValid());
|
||||
|
||||
// Check if the store data is within the lower and upper bounds of
|
||||
// addresses that the request needs.
|
||||
bool store_has_lower_limit =
|
||||
req->getVaddr() >= storeQueue[store_idx].inst->effAddr;
|
||||
bool store_has_upper_limit =
|
||||
(req->getVaddr() + req->getSize()) <=
|
||||
(storeQueue[store_idx].inst->effAddr + store_size);
|
||||
bool lower_load_has_store_part =
|
||||
req->getVaddr() < (storeQueue[store_idx].inst->effAddr +
|
||||
store_size);
|
||||
bool upper_load_has_store_part =
|
||||
(req->getVaddr() + req->getSize()) >
|
||||
storeQueue[store_idx].inst->effAddr;
|
||||
|
||||
// If the store's data has all of the data needed, we can forward.
|
||||
if ((store_has_lower_limit && store_has_upper_limit)) {
|
||||
// Get shift amount for offset into the store's data.
|
||||
int shift_amt = req->getVaddr() - storeQueue[store_idx].inst->effAddr;
|
||||
|
||||
memcpy(data, storeQueue[store_idx].data + shift_amt,
|
||||
req->getSize());
|
||||
|
||||
assert(!load_inst->memData);
|
||||
load_inst->memData = new uint8_t[64];
|
||||
|
||||
memcpy(load_inst->memData,
|
||||
storeQueue[store_idx].data + shift_amt, req->getSize());
|
||||
|
||||
DPRINTF(LSQUnit, "Forwarding from store idx %i to load to "
|
||||
"addr %#x, data %#x\n",
|
||||
store_idx, req->getVaddr(), data);
|
||||
|
||||
PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq);
|
||||
data_pkt->dataStatic(load_inst->memData);
|
||||
|
||||
WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this);
|
||||
|
||||
// We'll say this has a 1 cycle load-store forwarding latency
|
||||
// for now.
|
||||
// @todo: Need to make this a parameter.
|
||||
cpu->schedule(wb, curTick());
|
||||
|
||||
// Don't need to do anything special for split loads.
|
||||
if (TheISA::HasUnalignedMemAcc && sreqLow) {
|
||||
delete sreqLow;
|
||||
delete sreqHigh;
|
||||
}
|
||||
|
||||
++lsqForwLoads;
|
||||
return NoFault;
|
||||
} else if ((store_has_lower_limit && lower_load_has_store_part) ||
|
||||
(store_has_upper_limit && upper_load_has_store_part) ||
|
||||
(lower_load_has_store_part && upper_load_has_store_part)) {
|
||||
// This is the partial store-load forwarding case where a store
|
||||
// has only part of the load's data.
|
||||
|
||||
// If it's already been written back, then don't worry about
|
||||
// stalling on it.
|
||||
if (storeQueue[store_idx].completed) {
|
||||
panic("Should not check one of these");
|
||||
continue;
|
||||
}
|
||||
|
||||
// Must stall load and force it to retry, so long as it's the oldest
|
||||
// load that needs to do so.
|
||||
if (!stalled ||
|
||||
(stalled &&
|
||||
load_inst->seqNum <
|
||||
loadQueue[stallingLoadIdx]->seqNum)) {
|
||||
stalled = true;
|
||||
stallingStoreIsn = storeQueue[store_idx].inst->seqNum;
|
||||
stallingLoadIdx = load_idx;
|
||||
}
|
||||
|
||||
// Tell IQ/mem dep unit that this instruction will need to be
|
||||
// rescheduled eventually
|
||||
iewStage->rescheduleMemInst(load_inst);
|
||||
iewStage->decrWb(load_inst->seqNum);
|
||||
load_inst->clearIssued();
|
||||
++lsqRescheduledLoads;
|
||||
|
||||
// Do not generate a writeback event as this instruction is not
|
||||
// complete.
|
||||
DPRINTF(LSQUnit, "Load-store forwarding mis-match. "
|
||||
"Store idx %i to load addr %#x\n",
|
||||
store_idx, req->getVaddr());
|
||||
|
||||
// Must delete request now that it wasn't handed off to
|
||||
// memory. This is quite ugly. @todo: Figure out the
|
||||
// proper place to really handle request deletes.
|
||||
delete req;
|
||||
if (TheISA::HasUnalignedMemAcc && sreqLow) {
|
||||
delete sreqLow;
|
||||
delete sreqHigh;
|
||||
}
|
||||
|
||||
return NoFault;
|
||||
}
|
||||
}
|
||||
|
||||
// If there's no forwarding case, then go access memory
|
||||
DPRINTF(LSQUnit, "Doing memory access for inst [sn:%lli] PC %s\n",
|
||||
load_inst->seqNum, load_inst->pcState());
|
||||
|
||||
assert(!load_inst->memData);
|
||||
load_inst->memData = new uint8_t[64];
|
||||
|
||||
++usedPorts;
|
||||
|
||||
// if we the cache is not blocked, do cache access
|
||||
bool completedFirst = false;
|
||||
if (!lsq->cacheBlocked()) {
|
||||
MemCmd command =
|
||||
req->isLLSC() ? MemCmd::LoadLockedReq : MemCmd::ReadReq;
|
||||
PacketPtr data_pkt = new Packet(req, command);
|
||||
PacketPtr fst_data_pkt = NULL;
|
||||
PacketPtr snd_data_pkt = NULL;
|
||||
|
||||
data_pkt->dataStatic(load_inst->memData);
|
||||
|
||||
LSQSenderState *state = new LSQSenderState;
|
||||
state->isLoad = true;
|
||||
state->idx = load_idx;
|
||||
state->inst = load_inst;
|
||||
data_pkt->senderState = state;
|
||||
|
||||
if (!TheISA::HasUnalignedMemAcc || !sreqLow) {
|
||||
|
||||
// Point the first packet at the main data packet.
|
||||
fst_data_pkt = data_pkt;
|
||||
} else {
|
||||
|
||||
// Create the split packets.
|
||||
fst_data_pkt = new Packet(sreqLow, command);
|
||||
snd_data_pkt = new Packet(sreqHigh, command);
|
||||
|
||||
fst_data_pkt->dataStatic(load_inst->memData);
|
||||
snd_data_pkt->dataStatic(load_inst->memData + sreqLow->getSize());
|
||||
|
||||
fst_data_pkt->senderState = state;
|
||||
snd_data_pkt->senderState = state;
|
||||
|
||||
state->isSplit = true;
|
||||
state->outstanding = 2;
|
||||
state->mainPkt = data_pkt;
|
||||
}
|
||||
|
||||
if (!dcachePort->sendTimingReq(fst_data_pkt)) {
|
||||
// Delete state and data packet because a load retry
|
||||
// initiates a pipeline restart; it does not retry.
|
||||
delete state;
|
||||
delete data_pkt->req;
|
||||
delete data_pkt;
|
||||
if (TheISA::HasUnalignedMemAcc && sreqLow) {
|
||||
delete fst_data_pkt->req;
|
||||
delete fst_data_pkt;
|
||||
delete snd_data_pkt->req;
|
||||
delete snd_data_pkt;
|
||||
sreqLow = NULL;
|
||||
sreqHigh = NULL;
|
||||
}
|
||||
|
||||
req = NULL;
|
||||
|
||||
// If the access didn't succeed, tell the LSQ by setting
|
||||
// the retry thread id.
|
||||
lsq->setRetryTid(lsqID);
|
||||
} else if (TheISA::HasUnalignedMemAcc && sreqLow) {
|
||||
completedFirst = true;
|
||||
|
||||
// The first packet was sent without problems, so send this one
|
||||
// too. If there is a problem with this packet then the whole
|
||||
// load will be squashed, so indicate this to the state object.
|
||||
// The first packet will return in completeDataAccess and be
|
||||
// handled there.
|
||||
++usedPorts;
|
||||
if (!dcachePort->sendTimingReq(snd_data_pkt)) {
|
||||
|
||||
// The main packet will be deleted in completeDataAccess.
|
||||
delete snd_data_pkt->req;
|
||||
delete snd_data_pkt;
|
||||
|
||||
state->complete();
|
||||
|
||||
req = NULL;
|
||||
sreqHigh = NULL;
|
||||
|
||||
lsq->setRetryTid(lsqID);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If the cache was blocked, or has become blocked due to the access,
|
||||
// handle it.
|
||||
if (lsq->cacheBlocked()) {
|
||||
if (req)
|
||||
delete req;
|
||||
if (TheISA::HasUnalignedMemAcc && sreqLow && !completedFirst) {
|
||||
delete sreqLow;
|
||||
delete sreqHigh;
|
||||
}
|
||||
|
||||
++lsqCacheBlocked;
|
||||
|
||||
// If the first part of a split access succeeds, then let the LSQ
|
||||
// handle the decrWb when completeDataAccess is called upon return
|
||||
// of the requested first part of data
|
||||
if (!completedFirst)
|
||||
iewStage->decrWb(load_inst->seqNum);
|
||||
|
||||
// There's an older load that's already going to squash.
|
||||
if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum)
|
||||
return NoFault;
|
||||
|
||||
// Record that the load was blocked due to memory. This
|
||||
// load will squash all instructions after it, be
|
||||
// refetched, and re-executed.
|
||||
isLoadBlocked = true;
|
||||
loadBlockedHandled = false;
|
||||
blockedLoadSeqNum = load_inst->seqNum;
|
||||
// No fault occurred, even though the interface is blocked.
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Fault
|
||||
LSQUnit<Impl>::write(Request *req, Request *sreqLow, Request *sreqHigh,
|
||||
uint8_t *data, int store_idx)
|
||||
{
|
||||
assert(storeQueue[store_idx].inst);
|
||||
|
||||
DPRINTF(LSQUnit, "Doing write to store idx %i, addr %#x data %#x"
|
||||
" | storeHead:%i [sn:%i]\n",
|
||||
store_idx, req->getPaddr(), data, storeHead,
|
||||
storeQueue[store_idx].inst->seqNum);
|
||||
|
||||
storeQueue[store_idx].req = req;
|
||||
storeQueue[store_idx].sreqLow = sreqLow;
|
||||
storeQueue[store_idx].sreqHigh = sreqHigh;
|
||||
unsigned size = req->getSize();
|
||||
storeQueue[store_idx].size = size;
|
||||
assert(size <= sizeof(storeQueue[store_idx].data));
|
||||
|
||||
// Split stores can only occur in ISAs with unaligned memory accesses. If
|
||||
// a store request has been split, sreqLow and sreqHigh will be non-null.
|
||||
if (TheISA::HasUnalignedMemAcc && sreqLow) {
|
||||
storeQueue[store_idx].isSplit = true;
|
||||
}
|
||||
|
||||
memcpy(storeQueue[store_idx].data, data, size);
|
||||
|
||||
// This function only writes the data to the store queue, so no fault
|
||||
// can happen here.
|
||||
return NoFault;
|
||||
}
|
||||
|
||||
#endif // __CPU_O3_LSQ_UNIT_HH__
|
||||
1297
simulators/gem5/src/cpu/o3/lsq_unit_impl.hh
Normal file
1297
simulators/gem5/src/cpu/o3/lsq_unit_impl.hh
Normal file
File diff suppressed because it is too large
Load Diff
49
simulators/gem5/src/cpu/o3/mem_dep_unit.cc
Normal file
49
simulators/gem5/src/cpu/o3/mem_dep_unit.cc
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
#include "cpu/o3/mem_dep_unit_impl.hh"
|
||||
#include "cpu/o3/store_set.hh"
|
||||
|
||||
#ifdef DEBUG
|
||||
template <>
|
||||
int
|
||||
MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_count = 0;
|
||||
template <>
|
||||
int
|
||||
MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_insert = 0;
|
||||
template <>
|
||||
int
|
||||
MemDepUnit<StoreSet, O3CPUImpl>::MemDepEntry::memdep_erase = 0;
|
||||
#endif
|
||||
|
||||
// Force instantation of memory dependency unit using store sets and
|
||||
// O3CPUImpl.
|
||||
template class MemDepUnit<StoreSet, O3CPUImpl>;
|
||||
270
simulators/gem5/src/cpu/o3/mem_dep_unit.hh
Normal file
270
simulators/gem5/src/cpu/o3/mem_dep_unit.hh
Normal file
@ -0,0 +1,270 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_MEM_DEP_UNIT_HH__
|
||||
#define __CPU_O3_MEM_DEP_UNIT_HH__
|
||||
|
||||
#include <list>
|
||||
#include <set>
|
||||
|
||||
#include "base/hashmap.hh"
|
||||
#include "base/refcnt.hh"
|
||||
#include "base/statistics.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
#include "debug/MemDepUnit.hh"
|
||||
|
||||
struct SNHash {
|
||||
size_t operator() (const InstSeqNum &seq_num) const {
|
||||
unsigned a = (unsigned)seq_num;
|
||||
unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF;
|
||||
|
||||
return hash;
|
||||
}
|
||||
};
|
||||
|
||||
struct DerivO3CPUParams;
|
||||
|
||||
template <class Impl>
|
||||
class InstructionQueue;
|
||||
|
||||
/**
|
||||
* Memory dependency unit class. This holds the memory dependence predictor.
|
||||
* As memory operations are issued to the IQ, they are also issued to this
|
||||
* unit, which then looks up the prediction as to what they are dependent
|
||||
* upon. This unit must be checked prior to a memory operation being able
|
||||
* to issue. Although this is templated, it's somewhat hard to make a generic
|
||||
* memory dependence unit. This one is mostly for store sets; it will be
|
||||
* quite limited in what other memory dependence predictions it can also
|
||||
* utilize. Thus this class should be most likely be rewritten for other
|
||||
* dependence prediction schemes.
|
||||
*/
|
||||
template <class MemDepPred, class Impl>
|
||||
class MemDepUnit
|
||||
{
|
||||
protected:
|
||||
std::string _name;
|
||||
|
||||
public:
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
/** Empty constructor. Must call init() prior to using in this case. */
|
||||
MemDepUnit();
|
||||
|
||||
/** Constructs a MemDepUnit with given parameters. */
|
||||
MemDepUnit(DerivO3CPUParams *params);
|
||||
|
||||
/** Frees up any memory allocated. */
|
||||
~MemDepUnit();
|
||||
|
||||
/** Returns the name of the memory dependence unit. */
|
||||
std::string name() const { return _name; }
|
||||
|
||||
/** Initializes the unit with parameters and a thread id. */
|
||||
void init(DerivO3CPUParams *params, ThreadID tid);
|
||||
|
||||
/** Registers statistics. */
|
||||
void regStats();
|
||||
|
||||
/** Switches out the memory dependence predictor. */
|
||||
void switchOut();
|
||||
|
||||
/** Takes over from another CPU's thread. */
|
||||
void takeOverFrom();
|
||||
|
||||
/** Sets the pointer to the IQ. */
|
||||
void setIQ(InstructionQueue<Impl> *iq_ptr);
|
||||
|
||||
/** Inserts a memory instruction. */
|
||||
void insert(DynInstPtr &inst);
|
||||
|
||||
/** Inserts a non-speculative memory instruction. */
|
||||
void insertNonSpec(DynInstPtr &inst);
|
||||
|
||||
/** Inserts a barrier instruction. */
|
||||
void insertBarrier(DynInstPtr &barr_inst);
|
||||
|
||||
/** Indicate that an instruction has its registers ready. */
|
||||
void regsReady(DynInstPtr &inst);
|
||||
|
||||
/** Indicate that a non-speculative instruction is ready. */
|
||||
void nonSpecInstReady(DynInstPtr &inst);
|
||||
|
||||
/** Reschedules an instruction to be re-executed. */
|
||||
void reschedule(DynInstPtr &inst);
|
||||
|
||||
/** Replays all instructions that have been rescheduled by moving them to
|
||||
* the ready list.
|
||||
*/
|
||||
void replay(DynInstPtr &inst);
|
||||
|
||||
/** Completes a memory instruction. */
|
||||
void completed(DynInstPtr &inst);
|
||||
|
||||
/** Completes a barrier instruction. */
|
||||
void completeBarrier(DynInstPtr &inst);
|
||||
|
||||
/** Wakes any dependents of a memory instruction. */
|
||||
void wakeDependents(DynInstPtr &inst);
|
||||
|
||||
/** Squashes all instructions up until a given sequence number for a
|
||||
* specific thread.
|
||||
*/
|
||||
void squash(const InstSeqNum &squashed_num, ThreadID tid);
|
||||
|
||||
/** Indicates an ordering violation between a store and a younger load. */
|
||||
void violation(DynInstPtr &store_inst, DynInstPtr &violating_load);
|
||||
|
||||
/** Issues the given instruction */
|
||||
void issue(DynInstPtr &inst);
|
||||
|
||||
/** Debugging function to dump the lists of instructions. */
|
||||
void dumpLists();
|
||||
|
||||
private:
|
||||
typedef typename std::list<DynInstPtr>::iterator ListIt;
|
||||
|
||||
class MemDepEntry;
|
||||
|
||||
typedef RefCountingPtr<MemDepEntry> MemDepEntryPtr;
|
||||
|
||||
/** Memory dependence entries that track memory operations, marking
|
||||
* when the instruction is ready to execute and what instructions depend
|
||||
* upon it.
|
||||
*/
|
||||
class MemDepEntry : public RefCounted {
|
||||
public:
|
||||
/** Constructs a memory dependence entry. */
|
||||
MemDepEntry(DynInstPtr &new_inst)
|
||||
: inst(new_inst), regsReady(false), memDepReady(false),
|
||||
completed(false), squashed(false)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
++memdep_count;
|
||||
|
||||
DPRINTF(MemDepUnit, "Memory dependency entry created. "
|
||||
"memdep_count=%i %s\n", memdep_count, inst->pcState());
|
||||
#endif
|
||||
}
|
||||
|
||||
/** Frees any pointers. */
|
||||
~MemDepEntry()
|
||||
{
|
||||
for (int i = 0; i < dependInsts.size(); ++i) {
|
||||
dependInsts[i] = NULL;
|
||||
}
|
||||
#ifdef DEBUG
|
||||
--memdep_count;
|
||||
|
||||
DPRINTF(MemDepUnit, "Memory dependency entry deleted. "
|
||||
"memdep_count=%i %s\n", memdep_count, inst->pcState());
|
||||
#endif
|
||||
}
|
||||
|
||||
/** Returns the name of the memory dependence entry. */
|
||||
std::string name() const { return "memdepentry"; }
|
||||
|
||||
/** The instruction being tracked. */
|
||||
DynInstPtr inst;
|
||||
|
||||
/** The iterator to the instruction's location inside the list. */
|
||||
ListIt listIt;
|
||||
|
||||
/** A vector of any dependent instructions. */
|
||||
std::vector<MemDepEntryPtr> dependInsts;
|
||||
|
||||
/** If the registers are ready or not. */
|
||||
bool regsReady;
|
||||
/** If all memory dependencies have been satisfied. */
|
||||
bool memDepReady;
|
||||
/** If the instruction is completed. */
|
||||
bool completed;
|
||||
/** If the instruction is squashed. */
|
||||
bool squashed;
|
||||
|
||||
/** For debugging. */
|
||||
#ifdef DEBUG
|
||||
static int memdep_count;
|
||||
static int memdep_insert;
|
||||
static int memdep_erase;
|
||||
#endif
|
||||
};
|
||||
|
||||
/** Finds the memory dependence entry in the hash map. */
|
||||
inline MemDepEntryPtr &findInHash(const DynInstPtr &inst);
|
||||
|
||||
/** Moves an entry to the ready list. */
|
||||
inline void moveToReady(MemDepEntryPtr &ready_inst_entry);
|
||||
|
||||
typedef m5::hash_map<InstSeqNum, MemDepEntryPtr, SNHash> MemDepHash;
|
||||
|
||||
typedef typename MemDepHash::iterator MemDepHashIt;
|
||||
|
||||
/** A hash map of all memory dependence entries. */
|
||||
MemDepHash memDepHash;
|
||||
|
||||
/** A list of all instructions in the memory dependence unit. */
|
||||
std::list<DynInstPtr> instList[Impl::MaxThreads];
|
||||
|
||||
/** A list of all instructions that are going to be replayed. */
|
||||
std::list<DynInstPtr> instsToReplay;
|
||||
|
||||
/** The memory dependence predictor. It is accessed upon new
|
||||
* instructions being added to the IQ, and responds by telling
|
||||
* this unit what instruction the newly added instruction is dependent
|
||||
* upon.
|
||||
*/
|
||||
MemDepPred depPred;
|
||||
|
||||
/** Is there an outstanding load barrier that loads must wait on. */
|
||||
bool loadBarrier;
|
||||
/** The sequence number of the load barrier. */
|
||||
InstSeqNum loadBarrierSN;
|
||||
/** Is there an outstanding store barrier that loads must wait on. */
|
||||
bool storeBarrier;
|
||||
/** The sequence number of the store barrier. */
|
||||
InstSeqNum storeBarrierSN;
|
||||
|
||||
/** Pointer to the IQ. */
|
||||
InstructionQueue<Impl> *iqPtr;
|
||||
|
||||
/** The thread id of this memory dependence unit. */
|
||||
int id;
|
||||
|
||||
/** Stat for number of inserted loads. */
|
||||
Stats::Scalar insertedLoads;
|
||||
/** Stat for number of inserted stores. */
|
||||
Stats::Scalar insertedStores;
|
||||
/** Stat for number of conflicting loads that had to wait for a store. */
|
||||
Stats::Scalar conflictingLoads;
|
||||
/** Stat for number of conflicting stores that had to wait for a store. */
|
||||
Stats::Scalar conflictingStores;
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_MEM_DEP_UNIT_HH__
|
||||
587
simulators/gem5/src/cpu/o3/mem_dep_unit_impl.hh
Normal file
587
simulators/gem5/src/cpu/o3/mem_dep_unit_impl.hh
Normal file
@ -0,0 +1,587 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "cpu/o3/inst_queue.hh"
|
||||
#include "cpu/o3/mem_dep_unit.hh"
|
||||
#include "debug/MemDepUnit.hh"
|
||||
#include "params/DerivO3CPU.hh"
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
MemDepUnit<MemDepPred, Impl>::MemDepUnit()
|
||||
: loadBarrier(false), loadBarrierSN(0), storeBarrier(false),
|
||||
storeBarrierSN(0), iqPtr(NULL)
|
||||
{
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
MemDepUnit<MemDepPred, Impl>::MemDepUnit(DerivO3CPUParams *params)
|
||||
: _name(params->name + ".memdepunit"),
|
||||
depPred(params->store_set_clear_period, params->SSITSize,
|
||||
params->LFSTSize),
|
||||
loadBarrier(false), loadBarrierSN(0), storeBarrier(false),
|
||||
storeBarrierSN(0), iqPtr(NULL)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Creating MemDepUnit object.\n");
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
MemDepUnit<MemDepPred, Impl>::~MemDepUnit()
|
||||
{
|
||||
for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) {
|
||||
|
||||
ListIt inst_list_it = instList[tid].begin();
|
||||
|
||||
MemDepHashIt hash_it;
|
||||
|
||||
while (!instList[tid].empty()) {
|
||||
hash_it = memDepHash.find((*inst_list_it)->seqNum);
|
||||
|
||||
assert(hash_it != memDepHash.end());
|
||||
|
||||
memDepHash.erase(hash_it);
|
||||
|
||||
instList[tid].erase(inst_list_it++);
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
assert(MemDepEntry::memdep_count == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::init(DerivO3CPUParams *params, ThreadID tid)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Creating MemDepUnit %i object.\n",tid);
|
||||
|
||||
_name = csprintf("%s.memDep%d", params->name, tid);
|
||||
id = tid;
|
||||
|
||||
depPred.init(params->store_set_clear_period, params->SSITSize,
|
||||
params->LFSTSize);
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::regStats()
|
||||
{
|
||||
insertedLoads
|
||||
.name(name() + ".insertedLoads")
|
||||
.desc("Number of loads inserted to the mem dependence unit.");
|
||||
|
||||
insertedStores
|
||||
.name(name() + ".insertedStores")
|
||||
.desc("Number of stores inserted to the mem dependence unit.");
|
||||
|
||||
conflictingLoads
|
||||
.name(name() + ".conflictingLoads")
|
||||
.desc("Number of conflicting loads.");
|
||||
|
||||
conflictingStores
|
||||
.name(name() + ".conflictingStores")
|
||||
.desc("Number of conflicting stores.");
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::switchOut()
|
||||
{
|
||||
assert(instList[0].empty());
|
||||
assert(instsToReplay.empty());
|
||||
assert(memDepHash.empty());
|
||||
// Clear any state.
|
||||
for (int i = 0; i < Impl::MaxThreads; ++i) {
|
||||
instList[i].clear();
|
||||
}
|
||||
instsToReplay.clear();
|
||||
memDepHash.clear();
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::takeOverFrom()
|
||||
{
|
||||
// Be sure to reset all state.
|
||||
loadBarrier = storeBarrier = false;
|
||||
loadBarrierSN = storeBarrierSN = 0;
|
||||
depPred.clear();
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::setIQ(InstructionQueue<Impl> *iq_ptr)
|
||||
{
|
||||
iqPtr = iq_ptr;
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insert(DynInstPtr &inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
MemDepEntryPtr inst_entry = new MemDepEntry(inst);
|
||||
|
||||
// Add the MemDepEntry to the hash.
|
||||
memDepHash.insert(
|
||||
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
|
||||
#ifdef DEBUG
|
||||
MemDepEntry::memdep_insert++;
|
||||
#endif
|
||||
|
||||
instList[tid].push_back(inst);
|
||||
|
||||
inst_entry->listIt = --(instList[tid].end());
|
||||
|
||||
// Check any barriers and the dependence predictor for any
|
||||
// producing memrefs/stores.
|
||||
InstSeqNum producing_store;
|
||||
if (inst->isLoad() && loadBarrier) {
|
||||
DPRINTF(MemDepUnit, "Load barrier [sn:%lli] in flight\n",
|
||||
loadBarrierSN);
|
||||
producing_store = loadBarrierSN;
|
||||
} else if (inst->isStore() && storeBarrier) {
|
||||
DPRINTF(MemDepUnit, "Store barrier [sn:%lli] in flight\n",
|
||||
storeBarrierSN);
|
||||
producing_store = storeBarrierSN;
|
||||
} else {
|
||||
producing_store = depPred.checkInst(inst->instAddr());
|
||||
}
|
||||
|
||||
MemDepEntryPtr store_entry = NULL;
|
||||
|
||||
// If there is a producing store, try to find the entry.
|
||||
if (producing_store != 0) {
|
||||
DPRINTF(MemDepUnit, "Searching for producer\n");
|
||||
MemDepHashIt hash_it = memDepHash.find(producing_store);
|
||||
|
||||
if (hash_it != memDepHash.end()) {
|
||||
store_entry = (*hash_it).second;
|
||||
DPRINTF(MemDepUnit, "Proucer found\n");
|
||||
}
|
||||
}
|
||||
|
||||
// If no store entry, then instruction can issue as soon as the registers
|
||||
// are ready.
|
||||
if (!store_entry) {
|
||||
DPRINTF(MemDepUnit, "No dependency for inst PC "
|
||||
"%s [sn:%lli].\n", inst->pcState(), inst->seqNum);
|
||||
|
||||
inst_entry->memDepReady = true;
|
||||
|
||||
if (inst->readyToIssue()) {
|
||||
inst_entry->regsReady = true;
|
||||
|
||||
moveToReady(inst_entry);
|
||||
}
|
||||
} else {
|
||||
// Otherwise make the instruction dependent on the store/barrier.
|
||||
DPRINTF(MemDepUnit, "Adding to dependency list; "
|
||||
"inst PC %s is dependent on [sn:%lli].\n",
|
||||
inst->pcState(), producing_store);
|
||||
|
||||
if (inst->readyToIssue()) {
|
||||
inst_entry->regsReady = true;
|
||||
}
|
||||
|
||||
// Clear the bit saying this instruction can issue.
|
||||
inst->clearCanIssue();
|
||||
|
||||
// Add this instruction to the list of dependents.
|
||||
store_entry->dependInsts.push_back(inst_entry);
|
||||
|
||||
if (inst->isLoad()) {
|
||||
++conflictingLoads;
|
||||
} else {
|
||||
++conflictingStores;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->isStore()) {
|
||||
DPRINTF(MemDepUnit, "Inserting store PC %s [sn:%lli].\n",
|
||||
inst->pcState(), inst->seqNum);
|
||||
|
||||
depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber);
|
||||
|
||||
++insertedStores;
|
||||
} else if (inst->isLoad()) {
|
||||
++insertedLoads;
|
||||
} else {
|
||||
panic("Unknown type! (most likely a barrier).");
|
||||
}
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insertNonSpec(DynInstPtr &inst)
|
||||
{
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
MemDepEntryPtr inst_entry = new MemDepEntry(inst);
|
||||
|
||||
// Insert the MemDepEntry into the hash.
|
||||
memDepHash.insert(
|
||||
std::pair<InstSeqNum, MemDepEntryPtr>(inst->seqNum, inst_entry));
|
||||
#ifdef DEBUG
|
||||
MemDepEntry::memdep_insert++;
|
||||
#endif
|
||||
|
||||
// Add the instruction to the list.
|
||||
instList[tid].push_back(inst);
|
||||
|
||||
inst_entry->listIt = --(instList[tid].end());
|
||||
|
||||
// Might want to turn this part into an inline function or something.
|
||||
// It's shared between both insert functions.
|
||||
if (inst->isStore()) {
|
||||
DPRINTF(MemDepUnit, "Inserting store PC %s [sn:%lli].\n",
|
||||
inst->pcState(), inst->seqNum);
|
||||
|
||||
depPred.insertStore(inst->instAddr(), inst->seqNum, inst->threadNumber);
|
||||
|
||||
++insertedStores;
|
||||
} else if (inst->isLoad()) {
|
||||
++insertedLoads;
|
||||
} else {
|
||||
panic("Unknown type! (most likely a barrier).");
|
||||
}
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::insertBarrier(DynInstPtr &barr_inst)
|
||||
{
|
||||
InstSeqNum barr_sn = barr_inst->seqNum;
|
||||
// Memory barriers block loads and stores, write barriers only stores.
|
||||
if (barr_inst->isMemBarrier()) {
|
||||
loadBarrier = true;
|
||||
loadBarrierSN = barr_sn;
|
||||
storeBarrier = true;
|
||||
storeBarrierSN = barr_sn;
|
||||
DPRINTF(MemDepUnit, "Inserted a memory barrier %s SN:%lli\n",
|
||||
barr_inst->pcState(),barr_sn);
|
||||
} else if (barr_inst->isWriteBarrier()) {
|
||||
storeBarrier = true;
|
||||
storeBarrierSN = barr_sn;
|
||||
DPRINTF(MemDepUnit, "Inserted a write barrier\n");
|
||||
}
|
||||
|
||||
ThreadID tid = barr_inst->threadNumber;
|
||||
|
||||
MemDepEntryPtr inst_entry = new MemDepEntry(barr_inst);
|
||||
|
||||
// Add the MemDepEntry to the hash.
|
||||
memDepHash.insert(
|
||||
std::pair<InstSeqNum, MemDepEntryPtr>(barr_sn, inst_entry));
|
||||
#ifdef DEBUG
|
||||
MemDepEntry::memdep_insert++;
|
||||
#endif
|
||||
|
||||
// Add the instruction to the instruction list.
|
||||
instList[tid].push_back(barr_inst);
|
||||
|
||||
inst_entry->listIt = --(instList[tid].end());
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::regsReady(DynInstPtr &inst)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Marking registers as ready for "
|
||||
"instruction PC %s [sn:%lli].\n",
|
||||
inst->pcState(), inst->seqNum);
|
||||
|
||||
MemDepEntryPtr inst_entry = findInHash(inst);
|
||||
|
||||
inst_entry->regsReady = true;
|
||||
|
||||
if (inst_entry->memDepReady) {
|
||||
DPRINTF(MemDepUnit, "Instruction has its memory "
|
||||
"dependencies resolved, adding it to the ready list.\n");
|
||||
|
||||
moveToReady(inst_entry);
|
||||
} else {
|
||||
DPRINTF(MemDepUnit, "Instruction still waiting on "
|
||||
"memory dependency.\n");
|
||||
}
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::nonSpecInstReady(DynInstPtr &inst)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Marking non speculative "
|
||||
"instruction PC %s as ready [sn:%lli].\n",
|
||||
inst->pcState(), inst->seqNum);
|
||||
|
||||
MemDepEntryPtr inst_entry = findInHash(inst);
|
||||
|
||||
moveToReady(inst_entry);
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::reschedule(DynInstPtr &inst)
|
||||
{
|
||||
instsToReplay.push_back(inst);
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::replay(DynInstPtr &inst)
|
||||
{
|
||||
DynInstPtr temp_inst;
|
||||
|
||||
// For now this replay function replays all waiting memory ops.
|
||||
while (!instsToReplay.empty()) {
|
||||
temp_inst = instsToReplay.front();
|
||||
|
||||
MemDepEntryPtr inst_entry = findInHash(temp_inst);
|
||||
|
||||
DPRINTF(MemDepUnit, "Replaying mem instruction PC %s [sn:%lli].\n",
|
||||
temp_inst->pcState(), temp_inst->seqNum);
|
||||
|
||||
moveToReady(inst_entry);
|
||||
|
||||
instsToReplay.pop_front();
|
||||
}
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::completed(DynInstPtr &inst)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Completed mem instruction PC %s [sn:%lli].\n",
|
||||
inst->pcState(), inst->seqNum);
|
||||
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
// Remove the instruction from the hash and the list.
|
||||
MemDepHashIt hash_it = memDepHash.find(inst->seqNum);
|
||||
|
||||
assert(hash_it != memDepHash.end());
|
||||
|
||||
instList[tid].erase((*hash_it).second->listIt);
|
||||
|
||||
(*hash_it).second = NULL;
|
||||
|
||||
memDepHash.erase(hash_it);
|
||||
#ifdef DEBUG
|
||||
MemDepEntry::memdep_erase++;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::completeBarrier(DynInstPtr &inst)
|
||||
{
|
||||
wakeDependents(inst);
|
||||
completed(inst);
|
||||
|
||||
InstSeqNum barr_sn = inst->seqNum;
|
||||
DPRINTF(MemDepUnit, "barrier completed: %s SN:%lli\n", inst->pcState(),
|
||||
inst->seqNum);
|
||||
if (inst->isMemBarrier()) {
|
||||
if (loadBarrierSN == barr_sn)
|
||||
loadBarrier = false;
|
||||
if (storeBarrierSN == barr_sn)
|
||||
storeBarrier = false;
|
||||
} else if (inst->isWriteBarrier()) {
|
||||
if (storeBarrierSN == barr_sn)
|
||||
storeBarrier = false;
|
||||
}
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::wakeDependents(DynInstPtr &inst)
|
||||
{
|
||||
// Only stores and barriers have dependents.
|
||||
if (!inst->isStore() && !inst->isMemBarrier() && !inst->isWriteBarrier()) {
|
||||
return;
|
||||
}
|
||||
|
||||
MemDepEntryPtr inst_entry = findInHash(inst);
|
||||
|
||||
for (int i = 0; i < inst_entry->dependInsts.size(); ++i ) {
|
||||
MemDepEntryPtr woken_inst = inst_entry->dependInsts[i];
|
||||
|
||||
if (!woken_inst->inst) {
|
||||
// Potentially removed mem dep entries could be on this list
|
||||
continue;
|
||||
}
|
||||
|
||||
DPRINTF(MemDepUnit, "Waking up a dependent inst, "
|
||||
"[sn:%lli].\n",
|
||||
woken_inst->inst->seqNum);
|
||||
|
||||
if (woken_inst->regsReady && !woken_inst->squashed) {
|
||||
moveToReady(woken_inst);
|
||||
} else {
|
||||
woken_inst->memDepReady = true;
|
||||
}
|
||||
}
|
||||
|
||||
inst_entry->dependInsts.clear();
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::squash(const InstSeqNum &squashed_num,
|
||||
ThreadID tid)
|
||||
{
|
||||
if (!instsToReplay.empty()) {
|
||||
ListIt replay_it = instsToReplay.begin();
|
||||
while (replay_it != instsToReplay.end()) {
|
||||
if ((*replay_it)->threadNumber == tid &&
|
||||
(*replay_it)->seqNum > squashed_num) {
|
||||
instsToReplay.erase(replay_it++);
|
||||
} else {
|
||||
++replay_it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ListIt squash_it = instList[tid].end();
|
||||
--squash_it;
|
||||
|
||||
MemDepHashIt hash_it;
|
||||
|
||||
while (!instList[tid].empty() &&
|
||||
(*squash_it)->seqNum > squashed_num) {
|
||||
|
||||
DPRINTF(MemDepUnit, "Squashing inst [sn:%lli]\n",
|
||||
(*squash_it)->seqNum);
|
||||
|
||||
if ((*squash_it)->seqNum == loadBarrierSN)
|
||||
loadBarrier = false;
|
||||
|
||||
if ((*squash_it)->seqNum == storeBarrierSN)
|
||||
storeBarrier = false;
|
||||
|
||||
hash_it = memDepHash.find((*squash_it)->seqNum);
|
||||
|
||||
assert(hash_it != memDepHash.end());
|
||||
|
||||
(*hash_it).second->squashed = true;
|
||||
|
||||
(*hash_it).second = NULL;
|
||||
|
||||
memDepHash.erase(hash_it);
|
||||
#ifdef DEBUG
|
||||
MemDepEntry::memdep_erase++;
|
||||
#endif
|
||||
|
||||
instList[tid].erase(squash_it--);
|
||||
}
|
||||
|
||||
// Tell the dependency predictor to squash as well.
|
||||
depPred.squash(squashed_num, tid);
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::violation(DynInstPtr &store_inst,
|
||||
DynInstPtr &violating_load)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Passing violating PCs to store sets,"
|
||||
" load: %#x, store: %#x\n", violating_load->instAddr(),
|
||||
store_inst->instAddr());
|
||||
// Tell the memory dependence unit of the violation.
|
||||
depPred.violation(store_inst->instAddr(), violating_load->instAddr());
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::issue(DynInstPtr &inst)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n",
|
||||
inst->instAddr(), inst->seqNum);
|
||||
|
||||
depPred.issued(inst->instAddr(), inst->seqNum, inst->isStore());
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
inline typename MemDepUnit<MemDepPred,Impl>::MemDepEntryPtr &
|
||||
MemDepUnit<MemDepPred, Impl>::findInHash(const DynInstPtr &inst)
|
||||
{
|
||||
MemDepHashIt hash_it = memDepHash.find(inst->seqNum);
|
||||
|
||||
assert(hash_it != memDepHash.end());
|
||||
|
||||
return (*hash_it).second;
|
||||
}
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
inline void
|
||||
MemDepUnit<MemDepPred, Impl>::moveToReady(MemDepEntryPtr &woken_inst_entry)
|
||||
{
|
||||
DPRINTF(MemDepUnit, "Adding instruction [sn:%lli] "
|
||||
"to the ready list.\n", woken_inst_entry->inst->seqNum);
|
||||
|
||||
assert(!woken_inst_entry->squashed);
|
||||
|
||||
iqPtr->addReadyMemInst(woken_inst_entry->inst);
|
||||
}
|
||||
|
||||
|
||||
template <class MemDepPred, class Impl>
|
||||
void
|
||||
MemDepUnit<MemDepPred, Impl>::dumpLists()
|
||||
{
|
||||
for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) {
|
||||
cprintf("Instruction list %i size: %i\n",
|
||||
tid, instList[tid].size());
|
||||
|
||||
ListIt inst_list_it = instList[tid].begin();
|
||||
int num = 0;
|
||||
|
||||
while (inst_list_it != instList[tid].end()) {
|
||||
cprintf("Instruction:%i\nPC: %s\n[sn:%i]\n[tid:%i]\nIssued:%i\n"
|
||||
"Squashed:%i\n\n",
|
||||
num, (*inst_list_it)->pcState(),
|
||||
(*inst_list_it)->seqNum,
|
||||
(*inst_list_it)->threadNumber,
|
||||
(*inst_list_it)->isIssued(),
|
||||
(*inst_list_it)->isSquashed());
|
||||
inst_list_it++;
|
||||
++num;
|
||||
}
|
||||
}
|
||||
|
||||
cprintf("Memory dependence hash size: %i\n", memDepHash.size());
|
||||
|
||||
#ifdef DEBUG
|
||||
cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count);
|
||||
#endif
|
||||
}
|
||||
200
simulators/gem5/src/cpu/o3/regfile.hh
Normal file
200
simulators/gem5/src/cpu/o3/regfile.hh
Normal file
@ -0,0 +1,200 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
* Gabe Black
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_REGFILE_HH__
|
||||
#define __CPU_O3_REGFILE_HH__
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "arch/isa_traits.hh"
|
||||
#include "arch/kernel_stats.hh"
|
||||
#include "arch/types.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/comm.hh"
|
||||
#include "debug/IEW.hh"
|
||||
|
||||
/**
|
||||
* Simple physical register file class.
|
||||
* Right now this is specific to Alpha until we decide if/how to make things
|
||||
* generic enough to support other ISAs.
|
||||
*/
|
||||
template <class Impl>
|
||||
class PhysRegFile
|
||||
{
|
||||
protected:
|
||||
typedef TheISA::IntReg IntReg;
|
||||
typedef TheISA::FloatReg FloatReg;
|
||||
typedef TheISA::FloatRegBits FloatRegBits;
|
||||
|
||||
typedef union {
|
||||
FloatReg d;
|
||||
FloatRegBits q;
|
||||
} PhysFloatReg;
|
||||
|
||||
// Note that most of the definitions of the IntReg, FloatReg, etc. exist
|
||||
// within the Impl/ISA class and not within this PhysRegFile class.
|
||||
|
||||
// Will make these registers public for now, but they probably should
|
||||
// be private eventually with some accessor functions.
|
||||
public:
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
|
||||
/**
|
||||
* Constructs a physical register file with the specified amount of
|
||||
* integer and floating point registers.
|
||||
*/
|
||||
PhysRegFile(O3CPU *_cpu, unsigned _numPhysicalIntRegs,
|
||||
unsigned _numPhysicalFloatRegs);
|
||||
|
||||
//Everything below should be pretty well identical to the normal
|
||||
//register file that exists within AlphaISA class.
|
||||
//The duplication is unfortunate but it's better than having
|
||||
//different ways to access certain registers.
|
||||
|
||||
/** Reads an integer register. */
|
||||
uint64_t readIntReg(PhysRegIndex reg_idx)
|
||||
{
|
||||
assert(reg_idx < numPhysicalIntRegs);
|
||||
|
||||
DPRINTF(IEW, "RegFile: Access to int register %i, has data "
|
||||
"%#x\n", int(reg_idx), intRegFile[reg_idx]);
|
||||
return intRegFile[reg_idx];
|
||||
}
|
||||
|
||||
/** Reads a floating point register (double precision). */
|
||||
FloatReg readFloatReg(PhysRegIndex reg_idx)
|
||||
{
|
||||
// Remove the base Float reg dependency.
|
||||
reg_idx = reg_idx - numPhysicalIntRegs;
|
||||
|
||||
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
|
||||
|
||||
FloatReg floatReg = floatRegFile[reg_idx].d;
|
||||
|
||||
DPRINTF(IEW, "RegFile: Access to float register %i, has "
|
||||
"data %#x\n", int(reg_idx), floatRegFile[reg_idx].q);
|
||||
|
||||
return floatReg;
|
||||
}
|
||||
|
||||
FloatRegBits readFloatRegBits(PhysRegIndex reg_idx)
|
||||
{
|
||||
// Remove the base Float reg dependency.
|
||||
reg_idx = reg_idx - numPhysicalIntRegs;
|
||||
|
||||
assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs);
|
||||
|
||||
FloatRegBits floatRegBits = floatRegFile[reg_idx].q;
|
||||
|
||||
DPRINTF(IEW, "RegFile: Access to float register %i as int, "
|
||||
"has data %#x\n", int(reg_idx), (uint64_t)floatRegBits);
|
||||
|
||||
return floatRegBits;
|
||||
}
|
||||
|
||||
/** Sets an integer register to the given value. */
|
||||
void setIntReg(PhysRegIndex reg_idx, uint64_t val)
|
||||
{
|
||||
assert(reg_idx < numPhysicalIntRegs);
|
||||
|
||||
DPRINTF(IEW, "RegFile: Setting int register %i to %#x\n",
|
||||
int(reg_idx), val);
|
||||
|
||||
if (reg_idx != TheISA::ZeroReg)
|
||||
intRegFile[reg_idx] = val;
|
||||
}
|
||||
|
||||
/** Sets a double precision floating point register to the given value. */
|
||||
void setFloatReg(PhysRegIndex reg_idx, FloatReg val)
|
||||
{
|
||||
// Remove the base Float reg dependency.
|
||||
reg_idx = reg_idx - numPhysicalIntRegs;
|
||||
|
||||
assert(reg_idx < numPhysicalFloatRegs);
|
||||
|
||||
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
|
||||
int(reg_idx), (uint64_t)val);
|
||||
|
||||
#if THE_ISA == ALPHA_ISA
|
||||
if (reg_idx != TheISA::ZeroReg)
|
||||
#endif
|
||||
floatRegFile[reg_idx].d = val;
|
||||
}
|
||||
|
||||
void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val)
|
||||
{
|
||||
// Remove the base Float reg dependency.
|
||||
reg_idx = reg_idx - numPhysicalIntRegs;
|
||||
|
||||
assert(reg_idx < numPhysicalFloatRegs);
|
||||
|
||||
DPRINTF(IEW, "RegFile: Setting float register %i to %#x\n",
|
||||
int(reg_idx), (uint64_t)val);
|
||||
|
||||
floatRegFile[reg_idx].q = val;
|
||||
}
|
||||
|
||||
public:
|
||||
/** (signed) integer register file. */
|
||||
IntReg *intRegFile;
|
||||
|
||||
/** Floating point register file. */
|
||||
PhysFloatReg *floatRegFile;
|
||||
|
||||
private:
|
||||
int intrflag; // interrupt flag
|
||||
|
||||
private:
|
||||
/** CPU pointer. */
|
||||
O3CPU *cpu;
|
||||
|
||||
public:
|
||||
/** Number of physical integer registers. */
|
||||
unsigned numPhysicalIntRegs;
|
||||
/** Number of physical floating point registers. */
|
||||
unsigned numPhysicalFloatRegs;
|
||||
};
|
||||
|
||||
template <class Impl>
|
||||
PhysRegFile<Impl>::PhysRegFile(O3CPU *_cpu, unsigned _numPhysicalIntRegs,
|
||||
unsigned _numPhysicalFloatRegs)
|
||||
: cpu(_cpu), numPhysicalIntRegs(_numPhysicalIntRegs),
|
||||
numPhysicalFloatRegs(_numPhysicalFloatRegs)
|
||||
{
|
||||
intRegFile = new IntReg[numPhysicalIntRegs];
|
||||
floatRegFile = new PhysFloatReg[numPhysicalFloatRegs];
|
||||
|
||||
memset(intRegFile, 0, sizeof(IntReg) * numPhysicalIntRegs);
|
||||
memset(floatRegFile, 0, sizeof(PhysFloatReg) * numPhysicalFloatRegs);
|
||||
}
|
||||
|
||||
#endif
|
||||
34
simulators/gem5/src/cpu/o3/rename.cc
Normal file
34
simulators/gem5/src/cpu/o3/rename.cc
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
#include "cpu/o3/rename_impl.hh"
|
||||
|
||||
template class DefaultRename<O3CPUImpl>;
|
||||
487
simulators/gem5/src/cpu/o3/rename.hh
Normal file
487
simulators/gem5/src/cpu/o3/rename.hh
Normal file
@ -0,0 +1,487 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_RENAME_HH__
|
||||
#define __CPU_O3_RENAME_HH__
|
||||
|
||||
#include <list>
|
||||
|
||||
#include "base/statistics.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/timebuf.hh"
|
||||
|
||||
struct DerivO3CPUParams;
|
||||
|
||||
/**
|
||||
* DefaultRename handles both single threaded and SMT rename. Its
|
||||
* width is specified by the parameters; each cycle it tries to rename
|
||||
* that many instructions. It holds onto the rename history of all
|
||||
* instructions with destination registers, storing the
|
||||
* arch. register, the new physical register, and the old physical
|
||||
* register, to allow for undoing of mappings if squashing happens, or
|
||||
* freeing up registers upon commit. Rename handles blocking if the
|
||||
* ROB, IQ, or LSQ is going to be full. Rename also handles barriers,
|
||||
* and does so by stalling on the instruction until the ROB is empty
|
||||
* and there are no instructions in flight to the ROB.
|
||||
*/
|
||||
template<class Impl>
|
||||
class DefaultRename
|
||||
{
|
||||
public:
|
||||
// Typedefs from the Impl.
|
||||
typedef typename Impl::CPUPol CPUPol;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
|
||||
// Typedefs from the CPUPol
|
||||
typedef typename CPUPol::DecodeStruct DecodeStruct;
|
||||
typedef typename CPUPol::RenameStruct RenameStruct;
|
||||
typedef typename CPUPol::TimeStruct TimeStruct;
|
||||
typedef typename CPUPol::FreeList FreeList;
|
||||
typedef typename CPUPol::RenameMap RenameMap;
|
||||
// These are used only for initialization.
|
||||
typedef typename CPUPol::IEW IEW;
|
||||
typedef typename CPUPol::Commit Commit;
|
||||
|
||||
// Typedefs from the ISA.
|
||||
typedef TheISA::RegIndex RegIndex;
|
||||
|
||||
// A list is used to queue the instructions. Barrier insts must
|
||||
// be added to the front of the list, which is the only reason for
|
||||
// using a list instead of a queue. (Most other stages use a
|
||||
// queue)
|
||||
typedef std::list<DynInstPtr> InstQueue;
|
||||
typedef typename std::list<DynInstPtr>::iterator ListIt;
|
||||
|
||||
public:
|
||||
/** Overall rename status. Used to determine if the CPU can
|
||||
* deschedule itself due to a lack of activity.
|
||||
*/
|
||||
enum RenameStatus {
|
||||
Active,
|
||||
Inactive
|
||||
};
|
||||
|
||||
/** Individual thread status. */
|
||||
enum ThreadStatus {
|
||||
Running,
|
||||
Idle,
|
||||
StartSquash,
|
||||
Squashing,
|
||||
Blocked,
|
||||
Unblocking,
|
||||
SerializeStall
|
||||
};
|
||||
|
||||
private:
|
||||
/** Rename status. */
|
||||
RenameStatus _status;
|
||||
|
||||
/** Per-thread status. */
|
||||
ThreadStatus renameStatus[Impl::MaxThreads];
|
||||
|
||||
public:
|
||||
/** DefaultRename constructor. */
|
||||
DefaultRename(O3CPU *_cpu, DerivO3CPUParams *params);
|
||||
|
||||
/** Returns the name of rename. */
|
||||
std::string name() const;
|
||||
|
||||
/** Registers statistics. */
|
||||
void regStats();
|
||||
|
||||
/** Sets the main backwards communication time buffer pointer. */
|
||||
void setTimeBuffer(TimeBuffer<TimeStruct> *tb_ptr);
|
||||
|
||||
/** Sets pointer to time buffer used to communicate to the next stage. */
|
||||
void setRenameQueue(TimeBuffer<RenameStruct> *rq_ptr);
|
||||
|
||||
/** Sets pointer to time buffer coming from decode. */
|
||||
void setDecodeQueue(TimeBuffer<DecodeStruct> *dq_ptr);
|
||||
|
||||
/** Sets pointer to IEW stage. Used only for initialization. */
|
||||
void setIEWStage(IEW *iew_stage)
|
||||
{ iew_ptr = iew_stage; }
|
||||
|
||||
/** Sets pointer to commit stage. Used only for initialization. */
|
||||
void setCommitStage(Commit *commit_stage)
|
||||
{ commit_ptr = commit_stage; }
|
||||
|
||||
private:
|
||||
/** Pointer to IEW stage. Used only for initialization. */
|
||||
IEW *iew_ptr;
|
||||
|
||||
/** Pointer to commit stage. Used only for initialization. */
|
||||
Commit *commit_ptr;
|
||||
|
||||
public:
|
||||
/** Initializes variables for the stage. */
|
||||
void initStage();
|
||||
|
||||
/** Sets pointer to list of active threads. */
|
||||
void setActiveThreads(std::list<ThreadID> *at_ptr);
|
||||
|
||||
/** Sets pointer to rename maps (per-thread structures). */
|
||||
void setRenameMap(RenameMap rm_ptr[Impl::MaxThreads]);
|
||||
|
||||
/** Sets pointer to the free list. */
|
||||
void setFreeList(FreeList *fl_ptr);
|
||||
|
||||
/** Sets pointer to the scoreboard. */
|
||||
void setScoreboard(Scoreboard *_scoreboard);
|
||||
|
||||
/** Drains the rename stage. */
|
||||
bool drain();
|
||||
|
||||
/** Resumes execution after a drain. */
|
||||
void resume() { }
|
||||
|
||||
/** Switches out the rename stage. */
|
||||
void switchOut();
|
||||
|
||||
/** Takes over from another CPU's thread. */
|
||||
void takeOverFrom();
|
||||
|
||||
/** Squashes all instructions in a thread. */
|
||||
void squash(const InstSeqNum &squash_seq_num, ThreadID tid);
|
||||
|
||||
/** Ticks rename, which processes all input signals and attempts to rename
|
||||
* as many instructions as possible.
|
||||
*/
|
||||
void tick();
|
||||
|
||||
/** Debugging function used to dump history buffer of renamings. */
|
||||
void dumpHistory();
|
||||
|
||||
private:
|
||||
/** Determines what to do based on rename's current status.
|
||||
* @param status_change rename() sets this variable if there was a status
|
||||
* change (ie switching from blocking to unblocking).
|
||||
* @param tid Thread id to rename instructions from.
|
||||
*/
|
||||
void rename(bool &status_change, ThreadID tid);
|
||||
|
||||
/** Renames instructions for the given thread. Also handles serializing
|
||||
* instructions.
|
||||
*/
|
||||
void renameInsts(ThreadID tid);
|
||||
|
||||
/** Inserts unused instructions from a given thread into the skid buffer,
|
||||
* to be renamed once rename unblocks.
|
||||
*/
|
||||
void skidInsert(ThreadID tid);
|
||||
|
||||
/** Separates instructions from decode into individual lists of instructions
|
||||
* sorted by thread.
|
||||
*/
|
||||
void sortInsts();
|
||||
|
||||
/** Returns if all of the skid buffers are empty. */
|
||||
bool skidsEmpty();
|
||||
|
||||
/** Updates overall rename status based on all of the threads' statuses. */
|
||||
void updateStatus();
|
||||
|
||||
/** Switches rename to blocking, and signals back that rename has become
|
||||
* blocked.
|
||||
* @return Returns true if there is a status change.
|
||||
*/
|
||||
bool block(ThreadID tid);
|
||||
|
||||
/** Switches rename to unblocking if the skid buffer is empty, and signals
|
||||
* back that rename has unblocked.
|
||||
* @return Returns true if there is a status change.
|
||||
*/
|
||||
bool unblock(ThreadID tid);
|
||||
|
||||
/** Executes actual squash, removing squashed instructions. */
|
||||
void doSquash(const InstSeqNum &squash_seq_num, ThreadID tid);
|
||||
|
||||
/** Removes a committed instruction's rename history. */
|
||||
void removeFromHistory(InstSeqNum inst_seq_num, ThreadID tid);
|
||||
|
||||
/** Renames the source registers of an instruction. */
|
||||
inline void renameSrcRegs(DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
/** Renames the destination registers of an instruction. */
|
||||
inline void renameDestRegs(DynInstPtr &inst, ThreadID tid);
|
||||
|
||||
/** Calculates the number of free ROB entries for a specific thread. */
|
||||
inline int calcFreeROBEntries(ThreadID tid);
|
||||
|
||||
/** Calculates the number of free IQ entries for a specific thread. */
|
||||
inline int calcFreeIQEntries(ThreadID tid);
|
||||
|
||||
/** Calculates the number of free LSQ entries for a specific thread. */
|
||||
inline int calcFreeLSQEntries(ThreadID tid);
|
||||
|
||||
/** Returns the number of valid instructions coming from decode. */
|
||||
unsigned validInsts();
|
||||
|
||||
/** Reads signals telling rename to block/unblock. */
|
||||
void readStallSignals(ThreadID tid);
|
||||
|
||||
/** Checks if any stages are telling rename to block. */
|
||||
bool checkStall(ThreadID tid);
|
||||
|
||||
/** Gets the number of free entries for a specific thread. */
|
||||
void readFreeEntries(ThreadID tid);
|
||||
|
||||
/** Checks the signals and updates the status. */
|
||||
bool checkSignalsAndUpdate(ThreadID tid);
|
||||
|
||||
/** Either serializes on the next instruction available in the InstQueue,
|
||||
* or records that it must serialize on the next instruction to enter
|
||||
* rename.
|
||||
* @param inst_list The list of younger, unprocessed instructions for the
|
||||
* thread that has the serializeAfter instruction.
|
||||
* @param tid The thread id.
|
||||
*/
|
||||
void serializeAfter(InstQueue &inst_list, ThreadID tid);
|
||||
|
||||
/** Holds the information for each destination register rename. It holds
|
||||
* the instruction's sequence number, the arch register, the old physical
|
||||
* register for that arch. register, and the new physical register.
|
||||
*/
|
||||
struct RenameHistory {
|
||||
RenameHistory(InstSeqNum _instSeqNum, RegIndex _archReg,
|
||||
PhysRegIndex _newPhysReg, PhysRegIndex _prevPhysReg)
|
||||
: instSeqNum(_instSeqNum), archReg(_archReg),
|
||||
newPhysReg(_newPhysReg), prevPhysReg(_prevPhysReg)
|
||||
{
|
||||
}
|
||||
|
||||
/** The sequence number of the instruction that renamed. */
|
||||
InstSeqNum instSeqNum;
|
||||
/** The architectural register index that was renamed. */
|
||||
RegIndex archReg;
|
||||
/** The new physical register that the arch. register is renamed to. */
|
||||
PhysRegIndex newPhysReg;
|
||||
/** The old physical register that the arch. register was renamed to. */
|
||||
PhysRegIndex prevPhysReg;
|
||||
};
|
||||
|
||||
/** A per-thread list of all destination register renames, used to either
|
||||
* undo rename mappings or free old physical registers.
|
||||
*/
|
||||
std::list<RenameHistory> historyBuffer[Impl::MaxThreads];
|
||||
|
||||
/** Pointer to CPU. */
|
||||
O3CPU *cpu;
|
||||
|
||||
/** Pointer to main time buffer used for backwards communication. */
|
||||
TimeBuffer<TimeStruct> *timeBuffer;
|
||||
|
||||
/** Wire to get IEW's output from backwards time buffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromIEW;
|
||||
|
||||
/** Wire to get commit's output from backwards time buffer. */
|
||||
typename TimeBuffer<TimeStruct>::wire fromCommit;
|
||||
|
||||
/** Wire to write infromation heading to previous stages. */
|
||||
typename TimeBuffer<TimeStruct>::wire toDecode;
|
||||
|
||||
/** Rename instruction queue. */
|
||||
TimeBuffer<RenameStruct> *renameQueue;
|
||||
|
||||
/** Wire to write any information heading to IEW. */
|
||||
typename TimeBuffer<RenameStruct>::wire toIEW;
|
||||
|
||||
/** Decode instruction queue interface. */
|
||||
TimeBuffer<DecodeStruct> *decodeQueue;
|
||||
|
||||
/** Wire to get decode's output from decode queue. */
|
||||
typename TimeBuffer<DecodeStruct>::wire fromDecode;
|
||||
|
||||
/** Queue of all instructions coming from decode this cycle. */
|
||||
InstQueue insts[Impl::MaxThreads];
|
||||
|
||||
/** Skid buffer between rename and decode. */
|
||||
InstQueue skidBuffer[Impl::MaxThreads];
|
||||
|
||||
/** Rename map interface. */
|
||||
RenameMap *renameMap[Impl::MaxThreads];
|
||||
|
||||
/** Free list interface. */
|
||||
FreeList *freeList;
|
||||
|
||||
/** Pointer to the list of active threads. */
|
||||
std::list<ThreadID> *activeThreads;
|
||||
|
||||
/** Pointer to the scoreboard. */
|
||||
Scoreboard *scoreboard;
|
||||
|
||||
/** Count of instructions in progress that have been sent off to the IQ
|
||||
* and ROB, but are not yet included in their occupancy counts.
|
||||
*/
|
||||
int instsInProgress[Impl::MaxThreads];
|
||||
|
||||
/** Variable that tracks if decode has written to the time buffer this
|
||||
* cycle. Used to tell CPU if there is activity this cycle.
|
||||
*/
|
||||
bool wroteToTimeBuffer;
|
||||
|
||||
/** Structures whose free entries impact the amount of instructions that
|
||||
* can be renamed.
|
||||
*/
|
||||
struct FreeEntries {
|
||||
unsigned iqEntries;
|
||||
unsigned lsqEntries;
|
||||
unsigned robEntries;
|
||||
};
|
||||
|
||||
/** Per-thread tracking of the number of free entries of back-end
|
||||
* structures.
|
||||
*/
|
||||
FreeEntries freeEntries[Impl::MaxThreads];
|
||||
|
||||
/** Records if the ROB is empty. In SMT mode the ROB may be dynamically
|
||||
* partitioned between threads, so the ROB must tell rename when it is
|
||||
* empty.
|
||||
*/
|
||||
bool emptyROB[Impl::MaxThreads];
|
||||
|
||||
/** Source of possible stalls. */
|
||||
struct Stalls {
|
||||
bool iew;
|
||||
bool commit;
|
||||
};
|
||||
|
||||
/** Tracks which stages are telling decode to stall. */
|
||||
Stalls stalls[Impl::MaxThreads];
|
||||
|
||||
/** The serialize instruction that rename has stalled on. */
|
||||
DynInstPtr serializeInst[Impl::MaxThreads];
|
||||
|
||||
/** Records if rename needs to serialize on the next instruction for any
|
||||
* thread.
|
||||
*/
|
||||
bool serializeOnNextInst[Impl::MaxThreads];
|
||||
|
||||
/** Delay between iew and rename, in ticks. */
|
||||
int iewToRenameDelay;
|
||||
|
||||
/** Delay between decode and rename, in ticks. */
|
||||
int decodeToRenameDelay;
|
||||
|
||||
/** Delay between commit and rename, in ticks. */
|
||||
unsigned commitToRenameDelay;
|
||||
|
||||
/** Rename width, in instructions. */
|
||||
unsigned renameWidth;
|
||||
|
||||
/** Commit width, in instructions. Used so rename knows how many
|
||||
* instructions might have freed registers in the previous cycle.
|
||||
*/
|
||||
unsigned commitWidth;
|
||||
|
||||
/** The index of the instruction in the time buffer to IEW that rename is
|
||||
* currently using.
|
||||
*/
|
||||
unsigned toIEWIndex;
|
||||
|
||||
/** Whether or not rename needs to block this cycle. */
|
||||
bool blockThisCycle;
|
||||
|
||||
/** Whether or not rename needs to resume a serialize instruction
|
||||
* after squashing. */
|
||||
bool resumeSerialize;
|
||||
|
||||
/** Whether or not rename needs to resume clearing out the skidbuffer
|
||||
* after squashing. */
|
||||
bool resumeUnblocking;
|
||||
|
||||
/** The number of threads active in rename. */
|
||||
ThreadID numThreads;
|
||||
|
||||
/** The maximum skid buffer size. */
|
||||
unsigned skidBufferMax;
|
||||
|
||||
PhysRegIndex maxPhysicalRegs;
|
||||
|
||||
/** Enum to record the source of a structure full stall. Can come from
|
||||
* either ROB, IQ, LSQ, and it is priortized in that order.
|
||||
*/
|
||||
enum FullSource {
|
||||
ROB,
|
||||
IQ,
|
||||
LSQ,
|
||||
NONE
|
||||
};
|
||||
|
||||
/** Function used to increment the stat that corresponds to the source of
|
||||
* the stall.
|
||||
*/
|
||||
inline void incrFullStat(const FullSource &source);
|
||||
|
||||
/** Stat for total number of cycles spent squashing. */
|
||||
Stats::Scalar renameSquashCycles;
|
||||
/** Stat for total number of cycles spent idle. */
|
||||
Stats::Scalar renameIdleCycles;
|
||||
/** Stat for total number of cycles spent blocking. */
|
||||
Stats::Scalar renameBlockCycles;
|
||||
/** Stat for total number of cycles spent stalling for a serializing inst. */
|
||||
Stats::Scalar renameSerializeStallCycles;
|
||||
/** Stat for total number of cycles spent running normally. */
|
||||
Stats::Scalar renameRunCycles;
|
||||
/** Stat for total number of cycles spent unblocking. */
|
||||
Stats::Scalar renameUnblockCycles;
|
||||
/** Stat for total number of renamed instructions. */
|
||||
Stats::Scalar renameRenamedInsts;
|
||||
/** Stat for total number of squashed instructions that rename discards. */
|
||||
Stats::Scalar renameSquashedInsts;
|
||||
/** Stat for total number of times that the ROB starts a stall in rename. */
|
||||
Stats::Scalar renameROBFullEvents;
|
||||
/** Stat for total number of times that the IQ starts a stall in rename. */
|
||||
Stats::Scalar renameIQFullEvents;
|
||||
/** Stat for total number of times that the LSQ starts a stall in rename. */
|
||||
Stats::Scalar renameLSQFullEvents;
|
||||
/** Stat for total number of times that rename runs out of free registers
|
||||
* to use to rename. */
|
||||
Stats::Scalar renameFullRegistersEvents;
|
||||
/** Stat for total number of renamed destination registers. */
|
||||
Stats::Scalar renameRenamedOperands;
|
||||
/** Stat for total number of source register rename lookups. */
|
||||
Stats::Scalar renameRenameLookups;
|
||||
Stats::Scalar intRenameLookups;
|
||||
Stats::Scalar fpRenameLookups;
|
||||
/** Stat for total number of committed renaming mappings. */
|
||||
Stats::Scalar renameCommittedMaps;
|
||||
/** Stat for total number of mappings that were undone due to a squash. */
|
||||
Stats::Scalar renameUndoneMaps;
|
||||
/** Number of serialize instructions handled. */
|
||||
Stats::Scalar renamedSerializing;
|
||||
/** Number of instructions marked as temporarily serializing. */
|
||||
Stats::Scalar renamedTempSerializing;
|
||||
/** Number of instructions inserted into skid buffers. */
|
||||
Stats::Scalar renameSkidInsts;
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_RENAME_HH__
|
||||
1393
simulators/gem5/src/cpu/o3/rename_impl.hh
Normal file
1393
simulators/gem5/src/cpu/o3/rename_impl.hh
Normal file
File diff suppressed because it is too large
Load Diff
255
simulators/gem5/src/cpu/o3/rename_map.cc
Normal file
255
simulators/gem5/src/cpu/o3/rename_map.cc
Normal file
@ -0,0 +1,255 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "cpu/o3/rename_map.hh"
|
||||
#include "debug/Rename.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
// @todo: Consider making inline bool functions that determine if the
|
||||
// register is a logical int, logical fp, physical int, physical fp,
|
||||
// etc.
|
||||
|
||||
SimpleRenameMap::~SimpleRenameMap()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
SimpleRenameMap::init(unsigned _numLogicalIntRegs,
|
||||
unsigned _numPhysicalIntRegs,
|
||||
PhysRegIndex &ireg_idx,
|
||||
|
||||
unsigned _numLogicalFloatRegs,
|
||||
unsigned _numPhysicalFloatRegs,
|
||||
PhysRegIndex &freg_idx,
|
||||
|
||||
unsigned _numMiscRegs,
|
||||
|
||||
RegIndex _intZeroReg,
|
||||
RegIndex _floatZeroReg,
|
||||
|
||||
int map_id,
|
||||
bool bindRegs)
|
||||
{
|
||||
id = map_id;
|
||||
|
||||
numLogicalIntRegs = _numLogicalIntRegs;
|
||||
|
||||
numLogicalFloatRegs = _numLogicalFloatRegs;
|
||||
|
||||
numPhysicalIntRegs = _numPhysicalIntRegs;
|
||||
|
||||
numPhysicalFloatRegs = _numPhysicalFloatRegs;
|
||||
|
||||
numMiscRegs = _numMiscRegs;
|
||||
|
||||
intZeroReg = _intZeroReg;
|
||||
floatZeroReg = _floatZeroReg;
|
||||
|
||||
DPRINTF(Rename, "Creating rename map %i. Phys: %i / %i, Float: "
|
||||
"%i / %i.\n", id, numLogicalIntRegs, numPhysicalIntRegs,
|
||||
numLogicalFloatRegs, numPhysicalFloatRegs);
|
||||
|
||||
numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs;
|
||||
|
||||
numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs;
|
||||
|
||||
//Create the rename maps
|
||||
intRenameMap.resize(numLogicalIntRegs);
|
||||
floatRenameMap.resize(numLogicalRegs);
|
||||
|
||||
if (bindRegs) {
|
||||
DPRINTF(Rename, "Binding registers into rename map %i\n",id);
|
||||
|
||||
// Initialize the entries in the integer rename map to point to the
|
||||
// physical registers of the same index
|
||||
for (RegIndex index = 0; index < numLogicalIntRegs; ++index)
|
||||
{
|
||||
intRenameMap[index].physical_reg = ireg_idx++;
|
||||
}
|
||||
|
||||
// Initialize the entries in the floating point rename map to point to
|
||||
// the physical registers of the same index
|
||||
// Although the index refers purely to architected registers, because
|
||||
// the floating reg indices come after the integer reg indices, they
|
||||
// may exceed the size of a normal RegIndex (short).
|
||||
for (PhysRegIndex index = numLogicalIntRegs;
|
||||
index < numLogicalRegs; ++index)
|
||||
{
|
||||
floatRenameMap[index].physical_reg = freg_idx++;
|
||||
}
|
||||
} else {
|
||||
DPRINTF(Rename, "Binding registers into rename map %i\n",id);
|
||||
|
||||
PhysRegIndex temp_ireg = ireg_idx;
|
||||
|
||||
for (RegIndex index = 0; index < numLogicalIntRegs; ++index)
|
||||
{
|
||||
intRenameMap[index].physical_reg = temp_ireg++;
|
||||
}
|
||||
|
||||
PhysRegIndex temp_freg = freg_idx;
|
||||
|
||||
for (PhysRegIndex index = numLogicalIntRegs;
|
||||
index < numLogicalRegs; ++index)
|
||||
{
|
||||
floatRenameMap[index].physical_reg = temp_freg++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SimpleRenameMap::setFreeList(SimpleFreeList *fl_ptr)
|
||||
{
|
||||
freeList = fl_ptr;
|
||||
}
|
||||
|
||||
|
||||
SimpleRenameMap::RenameInfo
|
||||
SimpleRenameMap::rename(RegIndex arch_reg)
|
||||
{
|
||||
PhysRegIndex renamed_reg;
|
||||
PhysRegIndex prev_reg;
|
||||
|
||||
if (arch_reg < numLogicalIntRegs) {
|
||||
|
||||
// Record the current physical register that is renamed to the
|
||||
// requested architected register.
|
||||
prev_reg = intRenameMap[arch_reg].physical_reg;
|
||||
|
||||
// If it's not referencing the zero register, then rename the
|
||||
// register.
|
||||
if (arch_reg != intZeroReg) {
|
||||
renamed_reg = freeList->getIntReg();
|
||||
|
||||
intRenameMap[arch_reg].physical_reg = renamed_reg;
|
||||
|
||||
assert(renamed_reg >= 0 && renamed_reg < numPhysicalIntRegs);
|
||||
|
||||
} else {
|
||||
// Otherwise return the zero register so nothing bad happens.
|
||||
renamed_reg = intZeroReg;
|
||||
}
|
||||
} else if (arch_reg < numLogicalRegs) {
|
||||
// Record the current physical register that is renamed to the
|
||||
// requested architected register.
|
||||
prev_reg = floatRenameMap[arch_reg].physical_reg;
|
||||
|
||||
// If it's not referencing the zero register, then rename the
|
||||
// register.
|
||||
#if THE_ISA == ALPHA_ISA
|
||||
if (arch_reg != floatZeroReg) {
|
||||
#endif
|
||||
renamed_reg = freeList->getFloatReg();
|
||||
|
||||
floatRenameMap[arch_reg].physical_reg = renamed_reg;
|
||||
|
||||
assert(renamed_reg < numPhysicalRegs &&
|
||||
renamed_reg >= numPhysicalIntRegs);
|
||||
#if THE_ISA == ALPHA_ISA
|
||||
} else {
|
||||
// Otherwise return the zero register so nothing bad happens.
|
||||
renamed_reg = floatZeroReg;
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
// Subtract off the base offset for miscellaneous registers.
|
||||
arch_reg = arch_reg - numLogicalRegs;
|
||||
|
||||
DPRINTF(Rename, "Renamed misc reg %d\n", arch_reg);
|
||||
|
||||
// No renaming happens to the misc. registers. They are
|
||||
// simply the registers that come after all the physical
|
||||
// registers; thus take the base architected register and add
|
||||
// the physical registers to it.
|
||||
renamed_reg = arch_reg + numPhysicalRegs;
|
||||
|
||||
// Set the previous register to the same register; mainly it must be
|
||||
// known that the prev reg was outside the range of normal registers
|
||||
// so the free list can avoid adding it.
|
||||
prev_reg = renamed_reg;
|
||||
}
|
||||
|
||||
DPRINTF(Rename, "Renamed reg %d to physical reg %d old mapping was %d\n",
|
||||
arch_reg, renamed_reg, prev_reg);
|
||||
|
||||
return RenameInfo(renamed_reg, prev_reg);
|
||||
}
|
||||
|
||||
PhysRegIndex
|
||||
SimpleRenameMap::lookup(RegIndex arch_reg)
|
||||
{
|
||||
if (arch_reg < numLogicalIntRegs) {
|
||||
return intRenameMap[arch_reg].physical_reg;
|
||||
} else if (arch_reg < numLogicalRegs) {
|
||||
return floatRenameMap[arch_reg].physical_reg;
|
||||
} else {
|
||||
// Subtract off the misc registers offset.
|
||||
arch_reg = arch_reg - numLogicalRegs;
|
||||
|
||||
// Misc. regs don't rename, so simply add the base arch reg to
|
||||
// the number of physical registers.
|
||||
return numPhysicalRegs + arch_reg;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SimpleRenameMap::setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg)
|
||||
{
|
||||
// In this implementation the miscellaneous registers do not
|
||||
// actually rename, so this function does not allow you to try to
|
||||
// change their mappings.
|
||||
if (arch_reg < numLogicalIntRegs) {
|
||||
DPRINTF(Rename, "Rename Map: Integer register %i being set to %i.\n",
|
||||
(int)arch_reg, renamed_reg);
|
||||
|
||||
intRenameMap[arch_reg].physical_reg = renamed_reg;
|
||||
} else if (arch_reg < numLogicalIntRegs + numLogicalFloatRegs) {
|
||||
DPRINTF(Rename, "Rename Map: Float register %i being set to %i.\n",
|
||||
(int)arch_reg - numLogicalIntRegs, renamed_reg);
|
||||
|
||||
floatRenameMap[arch_reg].physical_reg = renamed_reg;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
SimpleRenameMap::numFreeEntries()
|
||||
{
|
||||
int free_int_regs = freeList->numFreeIntRegs();
|
||||
int free_float_regs = freeList->numFreeFloatRegs();
|
||||
|
||||
if (free_int_regs < free_float_regs) {
|
||||
return free_int_regs;
|
||||
} else {
|
||||
return free_float_regs;
|
||||
}
|
||||
}
|
||||
168
simulators/gem5/src/cpu/o3/rename_map.hh
Normal file
168
simulators/gem5/src/cpu/o3/rename_map.hh
Normal file
@ -0,0 +1,168 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
// Todo: Create destructor.
|
||||
// Have it so that there's a more meaningful name given to the variable
|
||||
// that marks the beginning of the FP registers.
|
||||
|
||||
#ifndef __CPU_O3_RENAME_MAP_HH__
|
||||
#define __CPU_O3_RENAME_MAP_HH__
|
||||
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "arch/types.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/free_list.hh"
|
||||
|
||||
class SimpleRenameMap
|
||||
{
|
||||
protected:
|
||||
typedef TheISA::RegIndex RegIndex;
|
||||
public:
|
||||
/**
|
||||
* Pair of a logical register and a physical register. Tells the
|
||||
* previous mapping of a logical register to a physical register.
|
||||
* Used to roll back the rename map to a previous state.
|
||||
*/
|
||||
typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo;
|
||||
|
||||
/**
|
||||
* Pair of a physical register and a physical register. Used to
|
||||
* return the physical register that a logical register has been
|
||||
* renamed to, and the previous physical register that the same
|
||||
* logical register was previously mapped to.
|
||||
*/
|
||||
typedef std::pair<PhysRegIndex, PhysRegIndex> RenameInfo;
|
||||
|
||||
public:
|
||||
/** Default constructor. init() must be called prior to use. */
|
||||
SimpleRenameMap() {};
|
||||
|
||||
/** Destructor. */
|
||||
~SimpleRenameMap();
|
||||
|
||||
/** Initializes rename map with given parameters. */
|
||||
void init(unsigned _numLogicalIntRegs,
|
||||
unsigned _numPhysicalIntRegs,
|
||||
PhysRegIndex &_int_reg_start,
|
||||
|
||||
unsigned _numLogicalFloatRegs,
|
||||
unsigned _numPhysicalFloatRegs,
|
||||
PhysRegIndex &_float_reg_start,
|
||||
|
||||
unsigned _numMiscRegs,
|
||||
|
||||
RegIndex _intZeroReg,
|
||||
RegIndex _floatZeroReg,
|
||||
|
||||
int id,
|
||||
bool bindRegs);
|
||||
|
||||
/** Sets the free list used with this rename map. */
|
||||
void setFreeList(SimpleFreeList *fl_ptr);
|
||||
|
||||
//Tell rename map to get a free physical register for a given
|
||||
//architected register. Not sure it should have a return value,
|
||||
//but perhaps it should have some sort of fault in case there are
|
||||
//no free registers.
|
||||
RenameInfo rename(RegIndex arch_reg);
|
||||
|
||||
PhysRegIndex lookup(RegIndex phys_reg);
|
||||
|
||||
/**
|
||||
* Marks the given register as ready, meaning that its value has been
|
||||
* calculated and written to the register file.
|
||||
* @param ready_reg The index of the physical register that is now ready.
|
||||
*/
|
||||
void setEntry(RegIndex arch_reg, PhysRegIndex renamed_reg);
|
||||
|
||||
int numFreeEntries();
|
||||
|
||||
private:
|
||||
/** Rename Map ID */
|
||||
int id;
|
||||
|
||||
/** Number of logical integer registers. */
|
||||
int numLogicalIntRegs;
|
||||
|
||||
/** Number of physical integer registers. */
|
||||
int numPhysicalIntRegs;
|
||||
|
||||
/** Number of logical floating point registers. */
|
||||
int numLogicalFloatRegs;
|
||||
|
||||
/** Number of physical floating point registers. */
|
||||
int numPhysicalFloatRegs;
|
||||
|
||||
/** Number of miscellaneous registers. */
|
||||
int numMiscRegs;
|
||||
|
||||
/** Number of logical integer + float registers. */
|
||||
int numLogicalRegs;
|
||||
|
||||
/** Number of physical integer + float registers. */
|
||||
int numPhysicalRegs;
|
||||
|
||||
/** The integer zero register. This implementation assumes it is always
|
||||
* zero and never can be anything else.
|
||||
*/
|
||||
RegIndex intZeroReg;
|
||||
|
||||
/** The floating point zero register. This implementation assumes it is
|
||||
* always zero and never can be anything else.
|
||||
*/
|
||||
RegIndex floatZeroReg;
|
||||
|
||||
class RenameEntry
|
||||
{
|
||||
public:
|
||||
PhysRegIndex physical_reg;
|
||||
bool valid;
|
||||
|
||||
RenameEntry()
|
||||
: physical_reg(0), valid(false)
|
||||
{ }
|
||||
};
|
||||
|
||||
private:
|
||||
/** Integer rename map. */
|
||||
std::vector<RenameEntry> intRenameMap;
|
||||
|
||||
/** Floating point rename map. */
|
||||
std::vector<RenameEntry> floatRenameMap;
|
||||
|
||||
private:
|
||||
/** Free list interface. */
|
||||
SimpleFreeList *freeList;
|
||||
};
|
||||
|
||||
#endif //__CPU_O3_RENAME_MAP_HH__
|
||||
36
simulators/gem5/src/cpu/o3/rob.cc
Normal file
36
simulators/gem5/src/cpu/o3/rob.cc
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
* Nathan Binkert
|
||||
*/
|
||||
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
#include "cpu/o3/rob_impl.hh"
|
||||
|
||||
// Force instantiation of InstructionQueue.
|
||||
template class ROB<O3CPUImpl>;
|
||||
332
simulators/gem5/src/cpu/o3/rob.hh
Normal file
332
simulators/gem5/src/cpu/o3/rob.hh
Normal file
@ -0,0 +1,332 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
* Korey Sewell
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_ROB_HH__
|
||||
#define __CPU_O3_ROB_HH__
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "arch/registers.hh"
|
||||
#include "base/types.hh"
|
||||
#include "config/the_isa.hh"
|
||||
|
||||
/**
|
||||
* ROB class. The ROB is largely what drives squashing.
|
||||
*/
|
||||
template <class Impl>
|
||||
class ROB
|
||||
{
|
||||
protected:
|
||||
typedef TheISA::RegIndex RegIndex;
|
||||
public:
|
||||
//Typedefs from the Impl.
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
typedef typename Impl::DynInstPtr DynInstPtr;
|
||||
|
||||
typedef std::pair<RegIndex, PhysRegIndex> UnmapInfo;
|
||||
typedef typename std::list<DynInstPtr>::iterator InstIt;
|
||||
|
||||
/** Possible ROB statuses. */
|
||||
enum Status {
|
||||
Running,
|
||||
Idle,
|
||||
ROBSquashing
|
||||
};
|
||||
|
||||
/** SMT ROB Sharing Policy */
|
||||
enum ROBPolicy{
|
||||
Dynamic,
|
||||
Partitioned,
|
||||
Threshold
|
||||
};
|
||||
|
||||
private:
|
||||
/** Per-thread ROB status. */
|
||||
Status robStatus[Impl::MaxThreads];
|
||||
|
||||
/** ROB resource sharing policy for SMT mode. */
|
||||
ROBPolicy robPolicy;
|
||||
|
||||
public:
|
||||
/** ROB constructor.
|
||||
* @param _numEntries Number of entries in ROB.
|
||||
* @param _squashWidth Number of instructions that can be squashed in a
|
||||
* single cycle.
|
||||
* @param _smtROBPolicy ROB Partitioning Scheme for SMT.
|
||||
* @param _smtROBThreshold Max Resources(by %) a thread can have in the ROB.
|
||||
* @param _numThreads The number of active threads.
|
||||
*/
|
||||
ROB(O3CPU *_cpu, unsigned _numEntries, unsigned _squashWidth,
|
||||
std::string smtROBPolicy, unsigned _smtROBThreshold,
|
||||
ThreadID _numThreads);
|
||||
|
||||
std::string name() const;
|
||||
|
||||
/** Sets pointer to the list of active threads.
|
||||
* @param at_ptr Pointer to the list of active threads.
|
||||
*/
|
||||
void setActiveThreads(std::list<ThreadID> *at_ptr);
|
||||
|
||||
/** Switches out the ROB. */
|
||||
void switchOut();
|
||||
|
||||
/** Takes over another CPU's thread. */
|
||||
void takeOverFrom();
|
||||
|
||||
/** Function to insert an instruction into the ROB. Note that whatever
|
||||
* calls this function must ensure that there is enough space within the
|
||||
* ROB for the new instruction.
|
||||
* @param inst The instruction being inserted into the ROB.
|
||||
*/
|
||||
void insertInst(DynInstPtr &inst);
|
||||
|
||||
/** Returns pointer to the head instruction within the ROB. There is
|
||||
* no guarantee as to the return value if the ROB is empty.
|
||||
* @retval Pointer to the DynInst that is at the head of the ROB.
|
||||
*/
|
||||
// DynInstPtr readHeadInst();
|
||||
|
||||
/** Returns a pointer to the head instruction of a specific thread within
|
||||
* the ROB.
|
||||
* @return Pointer to the DynInst that is at the head of the ROB.
|
||||
*/
|
||||
DynInstPtr readHeadInst(ThreadID tid);
|
||||
|
||||
/** Returns a pointer to the instruction with the given sequence if it is
|
||||
* in the ROB.
|
||||
*/
|
||||
DynInstPtr findInst(ThreadID tid, InstSeqNum squash_inst);
|
||||
|
||||
/** Returns pointer to the tail instruction within the ROB. There is
|
||||
* no guarantee as to the return value if the ROB is empty.
|
||||
* @retval Pointer to the DynInst that is at the tail of the ROB.
|
||||
*/
|
||||
// DynInstPtr readTailInst();
|
||||
|
||||
/** Returns a pointer to the tail instruction of a specific thread within
|
||||
* the ROB.
|
||||
* @return Pointer to the DynInst that is at the tail of the ROB.
|
||||
*/
|
||||
DynInstPtr readTailInst(ThreadID tid);
|
||||
|
||||
/** Retires the head instruction, removing it from the ROB. */
|
||||
// void retireHead();
|
||||
|
||||
/** Retires the head instruction of a specific thread, removing it from the
|
||||
* ROB.
|
||||
*/
|
||||
void retireHead(ThreadID tid);
|
||||
|
||||
/** Is the oldest instruction across all threads ready. */
|
||||
// bool isHeadReady();
|
||||
|
||||
/** Is the oldest instruction across a particular thread ready. */
|
||||
bool isHeadReady(ThreadID tid);
|
||||
|
||||
/** Is there any commitable head instruction across all threads ready. */
|
||||
bool canCommit();
|
||||
|
||||
/** Re-adjust ROB partitioning. */
|
||||
void resetEntries();
|
||||
|
||||
/** Number of entries needed For 'num_threads' amount of threads. */
|
||||
int entryAmount(ThreadID num_threads);
|
||||
|
||||
/** Returns the number of total free entries in the ROB. */
|
||||
unsigned numFreeEntries();
|
||||
|
||||
/** Returns the number of free entries in a specific ROB paritition. */
|
||||
unsigned numFreeEntries(ThreadID tid);
|
||||
|
||||
/** Returns the maximum number of entries for a specific thread. */
|
||||
unsigned getMaxEntries(ThreadID tid)
|
||||
{ return maxEntries[tid]; }
|
||||
|
||||
/** Returns the number of entries being used by a specific thread. */
|
||||
unsigned getThreadEntries(ThreadID tid)
|
||||
{ return threadEntries[tid]; }
|
||||
|
||||
/** Returns if the ROB is full. */
|
||||
bool isFull()
|
||||
{ return numInstsInROB == numEntries; }
|
||||
|
||||
/** Returns if a specific thread's partition is full. */
|
||||
bool isFull(ThreadID tid)
|
||||
{ return threadEntries[tid] == numEntries; }
|
||||
|
||||
/** Returns if the ROB is empty. */
|
||||
bool isEmpty()
|
||||
{ return numInstsInROB == 0; }
|
||||
|
||||
/** Returns if a specific thread's partition is empty. */
|
||||
bool isEmpty(ThreadID tid)
|
||||
{ return threadEntries[tid] == 0; }
|
||||
|
||||
/** Executes the squash, marking squashed instructions. */
|
||||
void doSquash(ThreadID tid);
|
||||
|
||||
/** Squashes all instructions younger than the given sequence number for
|
||||
* the specific thread.
|
||||
*/
|
||||
void squash(InstSeqNum squash_num, ThreadID tid);
|
||||
|
||||
/** Updates the head instruction with the new oldest instruction. */
|
||||
void updateHead();
|
||||
|
||||
/** Updates the tail instruction with the new youngest instruction. */
|
||||
void updateTail();
|
||||
|
||||
/** Reads the PC of the oldest head instruction. */
|
||||
// uint64_t readHeadPC();
|
||||
|
||||
/** Reads the PC of the head instruction of a specific thread. */
|
||||
// uint64_t readHeadPC(ThreadID tid);
|
||||
|
||||
/** Reads the next PC of the oldest head instruction. */
|
||||
// uint64_t readHeadNextPC();
|
||||
|
||||
/** Reads the next PC of the head instruction of a specific thread. */
|
||||
// uint64_t readHeadNextPC(ThreadID tid);
|
||||
|
||||
/** Reads the sequence number of the oldest head instruction. */
|
||||
// InstSeqNum readHeadSeqNum();
|
||||
|
||||
/** Reads the sequence number of the head instruction of a specific thread.
|
||||
*/
|
||||
// InstSeqNum readHeadSeqNum(ThreadID tid);
|
||||
|
||||
/** Reads the PC of the youngest tail instruction. */
|
||||
// uint64_t readTailPC();
|
||||
|
||||
/** Reads the PC of the tail instruction of a specific thread. */
|
||||
// uint64_t readTailPC(ThreadID tid);
|
||||
|
||||
/** Reads the sequence number of the youngest tail instruction. */
|
||||
// InstSeqNum readTailSeqNum();
|
||||
|
||||
/** Reads the sequence number of tail instruction of a specific thread. */
|
||||
// InstSeqNum readTailSeqNum(ThreadID tid);
|
||||
|
||||
/** Checks if the ROB is still in the process of squashing instructions.
|
||||
* @retval Whether or not the ROB is done squashing.
|
||||
*/
|
||||
bool isDoneSquashing(ThreadID tid) const
|
||||
{ return doneSquashing[tid]; }
|
||||
|
||||
/** Checks if the ROB is still in the process of squashing instructions for
|
||||
* any thread.
|
||||
*/
|
||||
bool isDoneSquashing();
|
||||
|
||||
/** This is more of a debugging function than anything. Use
|
||||
* numInstsInROB to get the instructions in the ROB unless you are
|
||||
* double checking that variable.
|
||||
*/
|
||||
int countInsts();
|
||||
|
||||
/** This is more of a debugging function than anything. Use
|
||||
* threadEntries to get the instructions in the ROB unless you are
|
||||
* double checking that variable.
|
||||
*/
|
||||
int countInsts(ThreadID tid);
|
||||
|
||||
/** Registers statistics. */
|
||||
void regStats();
|
||||
|
||||
private:
|
||||
/** Pointer to the CPU. */
|
||||
O3CPU *cpu;
|
||||
|
||||
/** Active Threads in CPU */
|
||||
std::list<ThreadID> *activeThreads;
|
||||
|
||||
/** Number of instructions in the ROB. */
|
||||
unsigned numEntries;
|
||||
|
||||
/** Entries Per Thread */
|
||||
unsigned threadEntries[Impl::MaxThreads];
|
||||
|
||||
/** Max Insts a Thread Can Have in the ROB */
|
||||
unsigned maxEntries[Impl::MaxThreads];
|
||||
|
||||
/** ROB List of Instructions */
|
||||
std::list<DynInstPtr> instList[Impl::MaxThreads];
|
||||
|
||||
/** Number of instructions that can be squashed in a single cycle. */
|
||||
unsigned squashWidth;
|
||||
|
||||
public:
|
||||
/** Iterator pointing to the instruction which is the last instruction
|
||||
* in the ROB. This may at times be invalid (ie when the ROB is empty),
|
||||
* however it should never be incorrect.
|
||||
*/
|
||||
InstIt tail;
|
||||
|
||||
/** Iterator pointing to the instruction which is the first instruction in
|
||||
* in the ROB*/
|
||||
InstIt head;
|
||||
|
||||
private:
|
||||
/** Iterator used for walking through the list of instructions when
|
||||
* squashing. Used so that there is persistent state between cycles;
|
||||
* when squashing, the instructions are marked as squashed but not
|
||||
* immediately removed, meaning the tail iterator remains the same before
|
||||
* and after a squash.
|
||||
* This will always be set to cpu->instList.end() if it is invalid.
|
||||
*/
|
||||
InstIt squashIt[Impl::MaxThreads];
|
||||
|
||||
public:
|
||||
/** Number of instructions in the ROB. */
|
||||
int numInstsInROB;
|
||||
|
||||
/** Dummy instruction returned if there are no insts left. */
|
||||
DynInstPtr dummyInst;
|
||||
|
||||
private:
|
||||
/** The sequence number of the squashed instruction. */
|
||||
InstSeqNum squashedSeqNum[Impl::MaxThreads];
|
||||
|
||||
/** Is the ROB done squashing. */
|
||||
bool doneSquashing[Impl::MaxThreads];
|
||||
|
||||
/** Number of active threads. */
|
||||
ThreadID numThreads;
|
||||
|
||||
// The number of rob_reads
|
||||
Stats::Scalar robReads;
|
||||
// The number of rob_writes
|
||||
Stats::Scalar robWrites;
|
||||
};
|
||||
|
||||
#endif //__CPU_O3_ROB_HH__
|
||||
557
simulators/gem5/src/cpu/o3/rob_impl.hh
Normal file
557
simulators/gem5/src/cpu/o3/rob_impl.hh
Normal file
@ -0,0 +1,557 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
* Korey Sewell
|
||||
*/
|
||||
|
||||
#include <list>
|
||||
|
||||
#include "cpu/o3/rob.hh"
|
||||
#include "debug/Fetch.hh"
|
||||
#include "debug/ROB.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
template <class Impl>
|
||||
ROB<Impl>::ROB(O3CPU *_cpu, unsigned _numEntries, unsigned _squashWidth,
|
||||
std::string _smtROBPolicy, unsigned _smtROBThreshold,
|
||||
ThreadID _numThreads)
|
||||
: cpu(_cpu),
|
||||
numEntries(_numEntries),
|
||||
squashWidth(_squashWidth),
|
||||
numInstsInROB(0),
|
||||
numThreads(_numThreads)
|
||||
{
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
squashedSeqNum[tid] = 0;
|
||||
doneSquashing[tid] = true;
|
||||
threadEntries[tid] = 0;
|
||||
}
|
||||
|
||||
std::string policy = _smtROBPolicy;
|
||||
|
||||
//Convert string to lowercase
|
||||
std::transform(policy.begin(), policy.end(), policy.begin(),
|
||||
(int(*)(int)) tolower);
|
||||
|
||||
//Figure out rob policy
|
||||
if (policy == "dynamic") {
|
||||
robPolicy = Dynamic;
|
||||
|
||||
//Set Max Entries to Total ROB Capacity
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
maxEntries[tid] = numEntries;
|
||||
}
|
||||
|
||||
} else if (policy == "partitioned") {
|
||||
robPolicy = Partitioned;
|
||||
DPRINTF(Fetch, "ROB sharing policy set to Partitioned\n");
|
||||
|
||||
//@todo:make work if part_amt doesnt divide evenly.
|
||||
int part_amt = numEntries / numThreads;
|
||||
|
||||
//Divide ROB up evenly
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
maxEntries[tid] = part_amt;
|
||||
}
|
||||
|
||||
} else if (policy == "threshold") {
|
||||
robPolicy = Threshold;
|
||||
DPRINTF(Fetch, "ROB sharing policy set to Threshold\n");
|
||||
|
||||
int threshold = _smtROBThreshold;;
|
||||
|
||||
//Divide up by threshold amount
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
maxEntries[tid] = threshold;
|
||||
}
|
||||
} else {
|
||||
assert(0 && "Invalid ROB Sharing Policy.Options Are:{Dynamic,"
|
||||
"Partitioned, Threshold}");
|
||||
}
|
||||
|
||||
// Set the per-thread iterators to the end of the instruction list.
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
squashIt[tid] = instList[tid].end();
|
||||
}
|
||||
|
||||
// Initialize the "universal" ROB head & tail point to invalid
|
||||
// pointers
|
||||
head = instList[0].end();
|
||||
tail = instList[0].end();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
std::string
|
||||
ROB<Impl>::name() const
|
||||
{
|
||||
return cpu->name() + ".rob";
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::setActiveThreads(list<ThreadID> *at_ptr)
|
||||
{
|
||||
DPRINTF(ROB, "Setting active threads list pointer.\n");
|
||||
activeThreads = at_ptr;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::switchOut()
|
||||
{
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
instList[tid].clear();
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::takeOverFrom()
|
||||
{
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++) {
|
||||
doneSquashing[tid] = true;
|
||||
threadEntries[tid] = 0;
|
||||
squashIt[tid] = instList[tid].end();
|
||||
}
|
||||
numInstsInROB = 0;
|
||||
|
||||
// Initialize the "universal" ROB head & tail point to invalid
|
||||
// pointers
|
||||
head = instList[0].end();
|
||||
tail = instList[0].end();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::resetEntries()
|
||||
{
|
||||
if (robPolicy != Dynamic || numThreads > 1) {
|
||||
int active_threads = activeThreads->size();
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (robPolicy == Partitioned) {
|
||||
maxEntries[tid] = numEntries / active_threads;
|
||||
} else if (robPolicy == Threshold && active_threads == 1) {
|
||||
maxEntries[tid] = numEntries;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
int
|
||||
ROB<Impl>::entryAmount(ThreadID num_threads)
|
||||
{
|
||||
if (robPolicy == Partitioned) {
|
||||
return numEntries / num_threads;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
int
|
||||
ROB<Impl>::countInsts()
|
||||
{
|
||||
int total = 0;
|
||||
|
||||
for (ThreadID tid = 0; tid < numThreads; tid++)
|
||||
total += countInsts(tid);
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
int
|
||||
ROB<Impl>::countInsts(ThreadID tid)
|
||||
{
|
||||
return instList[tid].size();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::insertInst(DynInstPtr &inst)
|
||||
{
|
||||
assert(inst);
|
||||
|
||||
robWrites++;
|
||||
|
||||
DPRINTF(ROB, "Adding inst PC %s to the ROB.\n", inst->pcState());
|
||||
|
||||
assert(numInstsInROB != numEntries);
|
||||
|
||||
ThreadID tid = inst->threadNumber;
|
||||
|
||||
instList[tid].push_back(inst);
|
||||
|
||||
//Set Up head iterator if this is the 1st instruction in the ROB
|
||||
if (numInstsInROB == 0) {
|
||||
head = instList[tid].begin();
|
||||
assert((*head) == inst);
|
||||
}
|
||||
|
||||
//Must Decrement for iterator to actually be valid since __.end()
|
||||
//actually points to 1 after the last inst
|
||||
tail = instList[tid].end();
|
||||
tail--;
|
||||
|
||||
inst->setInROB();
|
||||
|
||||
++numInstsInROB;
|
||||
++threadEntries[tid];
|
||||
|
||||
assert((*tail) == inst);
|
||||
|
||||
DPRINTF(ROB, "[tid:%i] Now has %d instructions.\n", tid, threadEntries[tid]);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::retireHead(ThreadID tid)
|
||||
{
|
||||
robWrites++;
|
||||
|
||||
assert(numInstsInROB > 0);
|
||||
|
||||
// Get the head ROB instruction.
|
||||
InstIt head_it = instList[tid].begin();
|
||||
|
||||
DynInstPtr head_inst = (*head_it);
|
||||
|
||||
assert(head_inst->readyToCommit());
|
||||
|
||||
DPRINTF(ROB, "[tid:%u]: Retiring head instruction, "
|
||||
"instruction PC %s, [sn:%lli]\n", tid, head_inst->pcState(),
|
||||
head_inst->seqNum);
|
||||
|
||||
--numInstsInROB;
|
||||
--threadEntries[tid];
|
||||
|
||||
head_inst->clearInROB();
|
||||
head_inst->setCommitted();
|
||||
|
||||
instList[tid].erase(head_it);
|
||||
|
||||
//Update "Global" Head of ROB
|
||||
updateHead();
|
||||
|
||||
// @todo: A special case is needed if the instruction being
|
||||
// retired is the only instruction in the ROB; otherwise the tail
|
||||
// iterator will become invalidated.
|
||||
cpu->removeFrontInst(head_inst);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
ROB<Impl>::isHeadReady(ThreadID tid)
|
||||
{
|
||||
robReads++;
|
||||
if (threadEntries[tid] != 0) {
|
||||
return instList[tid].front()->readyToCommit();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
bool
|
||||
ROB<Impl>::canCommit()
|
||||
{
|
||||
//@todo: set ActiveThreads through ROB or CPU
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (isHeadReady(tid)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
unsigned
|
||||
ROB<Impl>::numFreeEntries()
|
||||
{
|
||||
return numEntries - numInstsInROB;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
unsigned
|
||||
ROB<Impl>::numFreeEntries(ThreadID tid)
|
||||
{
|
||||
return maxEntries[tid] - threadEntries[tid];
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::doSquash(ThreadID tid)
|
||||
{
|
||||
robWrites++;
|
||||
DPRINTF(ROB, "[tid:%u]: Squashing instructions until [sn:%i].\n",
|
||||
tid, squashedSeqNum[tid]);
|
||||
|
||||
assert(squashIt[tid] != instList[tid].end());
|
||||
|
||||
if ((*squashIt[tid])->seqNum < squashedSeqNum[tid]) {
|
||||
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
|
||||
tid);
|
||||
|
||||
squashIt[tid] = instList[tid].end();
|
||||
|
||||
doneSquashing[tid] = true;
|
||||
return;
|
||||
}
|
||||
|
||||
bool robTailUpdate = false;
|
||||
|
||||
for (int numSquashed = 0;
|
||||
numSquashed < squashWidth &&
|
||||
squashIt[tid] != instList[tid].end() &&
|
||||
(*squashIt[tid])->seqNum > squashedSeqNum[tid];
|
||||
++numSquashed)
|
||||
{
|
||||
DPRINTF(ROB, "[tid:%u]: Squashing instruction PC %s, seq num %i.\n",
|
||||
(*squashIt[tid])->threadNumber,
|
||||
(*squashIt[tid])->pcState(),
|
||||
(*squashIt[tid])->seqNum);
|
||||
|
||||
// Mark the instruction as squashed, and ready to commit so that
|
||||
// it can drain out of the pipeline.
|
||||
(*squashIt[tid])->setSquashed();
|
||||
|
||||
(*squashIt[tid])->setCanCommit();
|
||||
|
||||
|
||||
if (squashIt[tid] == instList[tid].begin()) {
|
||||
DPRINTF(ROB, "Reached head of instruction list while "
|
||||
"squashing.\n");
|
||||
|
||||
squashIt[tid] = instList[tid].end();
|
||||
|
||||
doneSquashing[tid] = true;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
InstIt tail_thread = instList[tid].end();
|
||||
tail_thread--;
|
||||
|
||||
if ((*squashIt[tid]) == (*tail_thread))
|
||||
robTailUpdate = true;
|
||||
|
||||
squashIt[tid]--;
|
||||
}
|
||||
|
||||
|
||||
// Check if ROB is done squashing.
|
||||
if ((*squashIt[tid])->seqNum <= squashedSeqNum[tid]) {
|
||||
DPRINTF(ROB, "[tid:%u]: Done squashing instructions.\n",
|
||||
tid);
|
||||
|
||||
squashIt[tid] = instList[tid].end();
|
||||
|
||||
doneSquashing[tid] = true;
|
||||
}
|
||||
|
||||
if (robTailUpdate) {
|
||||
updateTail();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::updateHead()
|
||||
{
|
||||
DynInstPtr head_inst;
|
||||
InstSeqNum lowest_num = 0;
|
||||
bool first_valid = true;
|
||||
|
||||
// @todo: set ActiveThreads through ROB or CPU
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (instList[tid].empty())
|
||||
continue;
|
||||
|
||||
if (first_valid) {
|
||||
head = instList[tid].begin();
|
||||
lowest_num = (*head)->seqNum;
|
||||
first_valid = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
InstIt head_thread = instList[tid].begin();
|
||||
|
||||
DynInstPtr head_inst = (*head_thread);
|
||||
|
||||
assert(head_inst != 0);
|
||||
|
||||
if (head_inst->seqNum < lowest_num) {
|
||||
head = head_thread;
|
||||
lowest_num = head_inst->seqNum;
|
||||
}
|
||||
}
|
||||
|
||||
if (first_valid) {
|
||||
head = instList[0].end();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::updateTail()
|
||||
{
|
||||
tail = instList[0].end();
|
||||
bool first_valid = true;
|
||||
|
||||
list<ThreadID>::iterator threads = activeThreads->begin();
|
||||
list<ThreadID>::iterator end = activeThreads->end();
|
||||
|
||||
while (threads != end) {
|
||||
ThreadID tid = *threads++;
|
||||
|
||||
if (instList[tid].empty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// If this is the first valid then assign w/out
|
||||
// comparison
|
||||
if (first_valid) {
|
||||
tail = instList[tid].end();
|
||||
tail--;
|
||||
first_valid = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Assign new tail if this thread's tail is younger
|
||||
// than our current "tail high"
|
||||
InstIt tail_thread = instList[tid].end();
|
||||
tail_thread--;
|
||||
|
||||
if ((*tail_thread)->seqNum > (*tail)->seqNum) {
|
||||
tail = tail_thread;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::squash(InstSeqNum squash_num, ThreadID tid)
|
||||
{
|
||||
if (isEmpty()) {
|
||||
DPRINTF(ROB, "Does not need to squash due to being empty "
|
||||
"[sn:%i]\n",
|
||||
squash_num);
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
DPRINTF(ROB, "Starting to squash within the ROB.\n");
|
||||
|
||||
robStatus[tid] = ROBSquashing;
|
||||
|
||||
doneSquashing[tid] = false;
|
||||
|
||||
squashedSeqNum[tid] = squash_num;
|
||||
|
||||
if (!instList[tid].empty()) {
|
||||
InstIt tail_thread = instList[tid].end();
|
||||
tail_thread--;
|
||||
|
||||
squashIt[tid] = tail_thread;
|
||||
|
||||
doSquash(tid);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
ROB<Impl>::readHeadInst(ThreadID tid)
|
||||
{
|
||||
if (threadEntries[tid] != 0) {
|
||||
InstIt head_thread = instList[tid].begin();
|
||||
|
||||
assert((*head_thread)->isInROB()==true);
|
||||
|
||||
return *head_thread;
|
||||
} else {
|
||||
return dummyInst;
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
ROB<Impl>::readTailInst(ThreadID tid)
|
||||
{
|
||||
InstIt tail_thread = instList[tid].end();
|
||||
tail_thread--;
|
||||
|
||||
return *tail_thread;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
ROB<Impl>::regStats()
|
||||
{
|
||||
using namespace Stats;
|
||||
robReads
|
||||
.name(name() + ".rob_reads")
|
||||
.desc("The number of ROB reads");
|
||||
|
||||
robWrites
|
||||
.name(name() + ".rob_writes")
|
||||
.desc("The number of ROB writes");
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
typename Impl::DynInstPtr
|
||||
ROB<Impl>::findInst(ThreadID tid, InstSeqNum squash_inst)
|
||||
{
|
||||
for (InstIt it = instList[tid].begin(); it != instList[tid].end(); it++) {
|
||||
if ((*it)->seqNum == squash_inst) {
|
||||
return *it;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
57
simulators/gem5/src/cpu/o3/sat_counter.cc
Normal file
57
simulators/gem5/src/cpu/o3/sat_counter.cc
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "base/misc.hh"
|
||||
#include "cpu/o3/sat_counter.hh"
|
||||
|
||||
SatCounter::SatCounter()
|
||||
: initialVal(0), counter(0)
|
||||
{
|
||||
}
|
||||
|
||||
SatCounter::SatCounter(unsigned bits)
|
||||
: initialVal(0), maxVal((1 << bits) - 1), counter(0)
|
||||
{
|
||||
}
|
||||
|
||||
SatCounter::SatCounter(unsigned bits, uint8_t initial_val)
|
||||
: initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val)
|
||||
{
|
||||
// Check to make sure initial value doesn't exceed the max counter value.
|
||||
if (initial_val > maxVal) {
|
||||
fatal("BP: Initial counter value exceeds max size.");
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
SatCounter::setBits(unsigned bits)
|
||||
{
|
||||
maxVal = (1 << bits) - 1;
|
||||
}
|
||||
117
simulators/gem5/src/cpu/o3/sat_counter.hh
Normal file
117
simulators/gem5/src/cpu/o3/sat_counter.hh
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (c) 2005-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_SAT_COUNTER_HH__
|
||||
#define __CPU_O3_SAT_COUNTER_HH__
|
||||
|
||||
#include "base/misc.hh"
|
||||
#include "base/types.hh"
|
||||
|
||||
/**
|
||||
* Private counter class for the internal saturating counters.
|
||||
* Implements an n bit saturating counter and provides methods to
|
||||
* increment, decrement, and read it.
|
||||
* @todo Consider making this something that more closely mimics a
|
||||
* built in class so you can use ++ or --.
|
||||
*/
|
||||
class SatCounter
|
||||
{
|
||||
public:
|
||||
/**
|
||||
* Constructor for the counter.
|
||||
*/
|
||||
SatCounter()
|
||||
: initialVal(0), counter(0)
|
||||
{ }
|
||||
|
||||
/**
|
||||
* Constructor for the counter.
|
||||
* @param bits How many bits the counter will have.
|
||||
*/
|
||||
SatCounter(unsigned bits)
|
||||
: initialVal(0), maxVal((1 << bits) - 1), counter(0)
|
||||
{ }
|
||||
|
||||
/**
|
||||
* Constructor for the counter.
|
||||
* @param bits How many bits the counter will have.
|
||||
* @param initial_val Starting value for each counter.
|
||||
*/
|
||||
SatCounter(unsigned bits, uint8_t initial_val)
|
||||
: initialVal(initial_val), maxVal((1 << bits) - 1),
|
||||
counter(initial_val)
|
||||
{
|
||||
// Check to make sure initial value doesn't exceed the max
|
||||
// counter value.
|
||||
if (initial_val > maxVal) {
|
||||
fatal("BP: Initial counter value exceeds max size.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the number of bits.
|
||||
*/
|
||||
void setBits(unsigned bits) { maxVal = (1 << bits) - 1; }
|
||||
|
||||
void reset() { counter = initialVal; }
|
||||
|
||||
/**
|
||||
* Increments the counter's current value.
|
||||
*/
|
||||
void increment()
|
||||
{
|
||||
if (counter < maxVal) {
|
||||
++counter;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decrements the counter's current value.
|
||||
*/
|
||||
void decrement()
|
||||
{
|
||||
if (counter > 0) {
|
||||
--counter;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the counter's value.
|
||||
*/
|
||||
const uint8_t read() const
|
||||
{ return counter; }
|
||||
|
||||
private:
|
||||
uint8_t initialVal;
|
||||
uint8_t maxVal;
|
||||
uint8_t counter;
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_SAT_COUNTER_HH__
|
||||
131
simulators/gem5/src/cpu/o3/scoreboard.cc
Normal file
131
simulators/gem5/src/cpu/o3/scoreboard.cc
Normal file
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (c) 2005-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Korey Sewell
|
||||
* Kevin Lim
|
||||
*/
|
||||
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/scoreboard.hh"
|
||||
#include "debug/Scoreboard.hh"
|
||||
|
||||
Scoreboard::Scoreboard(unsigned activeThreads,
|
||||
unsigned _numLogicalIntRegs,
|
||||
unsigned _numPhysicalIntRegs,
|
||||
unsigned _numLogicalFloatRegs,
|
||||
unsigned _numPhysicalFloatRegs,
|
||||
unsigned _numMiscRegs,
|
||||
unsigned _zeroRegIdx)
|
||||
: numLogicalIntRegs(_numLogicalIntRegs),
|
||||
numPhysicalIntRegs(_numPhysicalIntRegs),
|
||||
numLogicalFloatRegs(_numLogicalFloatRegs),
|
||||
numPhysicalFloatRegs(_numPhysicalFloatRegs),
|
||||
numMiscRegs(_numMiscRegs),
|
||||
zeroRegIdx(_zeroRegIdx)
|
||||
{
|
||||
//Get Register Sizes
|
||||
numLogicalRegs = numLogicalIntRegs + numLogicalFloatRegs;
|
||||
numPhysicalRegs = numPhysicalIntRegs + numPhysicalFloatRegs;
|
||||
|
||||
//Resize scoreboard appropriately
|
||||
resize(numPhysicalRegs + (numMiscRegs * activeThreads));
|
||||
|
||||
//Initialize values
|
||||
for (int i=0; i < numLogicalIntRegs * activeThreads; i++) {
|
||||
assert(indexInBounds(i));
|
||||
regScoreBoard[i] = 1;
|
||||
}
|
||||
|
||||
for (int i= numPhysicalIntRegs;
|
||||
i < numPhysicalIntRegs + (numLogicalFloatRegs * activeThreads);
|
||||
i++) {
|
||||
assert(indexInBounds(i));
|
||||
regScoreBoard[i] = 1;
|
||||
}
|
||||
|
||||
for (int i = numPhysicalRegs;
|
||||
i < numPhysicalRegs + (numMiscRegs * activeThreads);
|
||||
i++) {
|
||||
assert(indexInBounds(i));
|
||||
regScoreBoard[i] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
Scoreboard::name() const
|
||||
{
|
||||
return "cpu.scoreboard";
|
||||
}
|
||||
|
||||
bool
|
||||
Scoreboard::getReg(PhysRegIndex phys_reg)
|
||||
{
|
||||
#if THE_ISA == ALPHA_ISA
|
||||
// Always ready if int or fp zero reg.
|
||||
if (phys_reg == zeroRegIdx ||
|
||||
phys_reg == (zeroRegIdx + numPhysicalIntRegs)) {
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
// Always ready if int zero reg.
|
||||
if (phys_reg == zeroRegIdx) {
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(indexInBounds(phys_reg));
|
||||
return regScoreBoard[phys_reg];
|
||||
}
|
||||
|
||||
void
|
||||
Scoreboard::setReg(PhysRegIndex phys_reg)
|
||||
{
|
||||
DPRINTF(Scoreboard, "Setting reg %i as ready\n", phys_reg);
|
||||
|
||||
assert(indexInBounds(phys_reg));
|
||||
regScoreBoard[phys_reg] = 1;
|
||||
}
|
||||
|
||||
void
|
||||
Scoreboard::unsetReg(PhysRegIndex ready_reg)
|
||||
{
|
||||
#if THE_ISA == ALPHA_ISA
|
||||
if (ready_reg == zeroRegIdx ||
|
||||
ready_reg == (zeroRegIdx + numPhysicalIntRegs)) {
|
||||
// Don't do anything if int or fp zero reg.
|
||||
return;
|
||||
}
|
||||
#else
|
||||
if (ready_reg == zeroRegIdx) {
|
||||
// Don't do anything if int zero reg.
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
assert(indexInBounds(ready_reg));
|
||||
regScoreBoard[ready_reg] = 0;
|
||||
}
|
||||
131
simulators/gem5/src/cpu/o3/scoreboard.hh
Normal file
131
simulators/gem5/src/cpu/o3/scoreboard.hh
Normal file
@ -0,0 +1,131 @@
|
||||
/*
|
||||
* Copyright (c) 2005-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Korey Sewell
|
||||
* Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_SCOREBOARD_HH__
|
||||
#define __CPU_O3_SCOREBOARD_HH__
|
||||
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "base/trace.hh"
|
||||
#include "cpu/o3/comm.hh"
|
||||
|
||||
/**
|
||||
* Implements a simple scoreboard to track which registers are ready.
|
||||
* This class assumes that the fp registers start, index wise, right after
|
||||
* the integer registers. The misc. registers start, index wise, right after
|
||||
* the fp registers.
|
||||
* @todo: Fix up handling of the zero register in case the decoder does not
|
||||
* automatically make insts that write the zero register into nops.
|
||||
*/
|
||||
class Scoreboard
|
||||
{
|
||||
public:
|
||||
/** Constructs a scoreboard.
|
||||
* @param activeThreads The number of active threads.
|
||||
* @param _numLogicalIntRegs Number of logical integer registers.
|
||||
* @param _numPhysicalIntRegs Number of physical integer registers.
|
||||
* @param _numLogicalFloatRegs Number of logical fp registers.
|
||||
* @param _numPhysicalFloatRegs Number of physical fp registers.
|
||||
* @param _numMiscRegs Number of miscellaneous registers.
|
||||
* @param _zeroRegIdx Index of the zero register.
|
||||
*/
|
||||
Scoreboard(unsigned activeThreads,
|
||||
unsigned _numLogicalIntRegs,
|
||||
unsigned _numPhysicalIntRegs,
|
||||
unsigned _numLogicalFloatRegs,
|
||||
unsigned _numPhysicalFloatRegs,
|
||||
unsigned _numMiscRegs,
|
||||
unsigned _zeroRegIdx);
|
||||
|
||||
/** Destructor. */
|
||||
~Scoreboard() {}
|
||||
|
||||
/** Returns the name of the scoreboard. */
|
||||
std::string name() const;
|
||||
|
||||
/** Checks if the register is ready. */
|
||||
bool getReg(PhysRegIndex ready_reg);
|
||||
|
||||
/** Sets the register as ready. */
|
||||
void setReg(PhysRegIndex phys_reg);
|
||||
|
||||
/** Sets the register as not ready. */
|
||||
void unsetReg(PhysRegIndex ready_reg);
|
||||
|
||||
private:
|
||||
/** Scoreboard of physical integer registers, saying whether or not they
|
||||
* are ready.
|
||||
*/
|
||||
std::vector<bool> regScoreBoard;
|
||||
|
||||
/** Number of logical integer registers. */
|
||||
int numLogicalIntRegs;
|
||||
|
||||
/** Number of physical integer registers. */
|
||||
int numPhysicalIntRegs;
|
||||
|
||||
/** Number of logical floating point registers. */
|
||||
int numLogicalFloatRegs;
|
||||
|
||||
/** Number of physical floating point registers. */
|
||||
int numPhysicalFloatRegs;
|
||||
|
||||
/** Number of miscellaneous registers. */
|
||||
int numMiscRegs;
|
||||
|
||||
/** Number of logical integer + float registers. */
|
||||
int numLogicalRegs;
|
||||
|
||||
/** Number of physical integer + float registers. */
|
||||
int numPhysicalRegs;
|
||||
|
||||
/** The logical index of the zero register. */
|
||||
int zeroRegIdx;
|
||||
|
||||
int currentSize;
|
||||
|
||||
void
|
||||
resize(int newSize)
|
||||
{
|
||||
currentSize = newSize;
|
||||
regScoreBoard.resize(newSize);
|
||||
}
|
||||
|
||||
bool
|
||||
indexInBounds(int idx)
|
||||
{
|
||||
return idx < currentSize;
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
367
simulators/gem5/src/cpu/o3/store_set.cc
Normal file
367
simulators/gem5/src/cpu/o3/store_set.cc
Normal file
@ -0,0 +1,367 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "base/intmath.hh"
|
||||
#include "base/misc.hh"
|
||||
#include "base/trace.hh"
|
||||
#include "cpu/o3/store_set.hh"
|
||||
#include "debug/StoreSet.hh"
|
||||
|
||||
StoreSet::StoreSet(uint64_t clear_period, int _SSIT_size, int _LFST_size)
|
||||
: clearPeriod(clear_period), SSITSize(_SSIT_size), LFSTSize(_LFST_size)
|
||||
{
|
||||
DPRINTF(StoreSet, "StoreSet: Creating store set object.\n");
|
||||
DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
|
||||
SSITSize, LFSTSize);
|
||||
|
||||
if (!isPowerOf2(SSITSize)) {
|
||||
fatal("Invalid SSIT size!\n");
|
||||
}
|
||||
|
||||
SSIT.resize(SSITSize);
|
||||
|
||||
validSSIT.resize(SSITSize);
|
||||
|
||||
for (int i = 0; i < SSITSize; ++i)
|
||||
validSSIT[i] = false;
|
||||
|
||||
if (!isPowerOf2(LFSTSize)) {
|
||||
fatal("Invalid LFST size!\n");
|
||||
}
|
||||
|
||||
LFST.resize(LFSTSize);
|
||||
|
||||
validLFST.resize(LFSTSize);
|
||||
|
||||
for (int i = 0; i < LFSTSize; ++i) {
|
||||
validLFST[i] = false;
|
||||
LFST[i] = 0;
|
||||
}
|
||||
|
||||
indexMask = SSITSize - 1;
|
||||
|
||||
offsetBits = 2;
|
||||
|
||||
memOpsPred = 0;
|
||||
}
|
||||
|
||||
StoreSet::~StoreSet()
|
||||
{
|
||||
}
|
||||
|
||||
void
|
||||
StoreSet::init(uint64_t clear_period, int _SSIT_size, int _LFST_size)
|
||||
{
|
||||
SSITSize = _SSIT_size;
|
||||
LFSTSize = _LFST_size;
|
||||
clearPeriod = clear_period;
|
||||
|
||||
DPRINTF(StoreSet, "StoreSet: Creating store set object.\n");
|
||||
DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n",
|
||||
SSITSize, LFSTSize);
|
||||
|
||||
SSIT.resize(SSITSize);
|
||||
|
||||
validSSIT.resize(SSITSize);
|
||||
|
||||
for (int i = 0; i < SSITSize; ++i)
|
||||
validSSIT[i] = false;
|
||||
|
||||
LFST.resize(LFSTSize);
|
||||
|
||||
validLFST.resize(LFSTSize);
|
||||
|
||||
for (int i = 0; i < LFSTSize; ++i) {
|
||||
validLFST[i] = false;
|
||||
LFST[i] = 0;
|
||||
}
|
||||
|
||||
indexMask = SSITSize - 1;
|
||||
|
||||
offsetBits = 2;
|
||||
|
||||
memOpsPred = 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
StoreSet::violation(Addr store_PC, Addr load_PC)
|
||||
{
|
||||
int load_index = calcIndex(load_PC);
|
||||
int store_index = calcIndex(store_PC);
|
||||
|
||||
assert(load_index < SSITSize && store_index < SSITSize);
|
||||
|
||||
bool valid_load_SSID = validSSIT[load_index];
|
||||
bool valid_store_SSID = validSSIT[store_index];
|
||||
|
||||
if (!valid_load_SSID && !valid_store_SSID) {
|
||||
// Calculate a new SSID here.
|
||||
SSID new_set = calcSSID(load_PC);
|
||||
|
||||
validSSIT[load_index] = true;
|
||||
|
||||
SSIT[load_index] = new_set;
|
||||
|
||||
validSSIT[store_index] = true;
|
||||
|
||||
SSIT[store_index] = new_set;
|
||||
|
||||
assert(new_set < LFSTSize);
|
||||
|
||||
DPRINTF(StoreSet, "StoreSet: Neither load nor store had a valid "
|
||||
"storeset, creating a new one: %i for load %#x, store %#x\n",
|
||||
new_set, load_PC, store_PC);
|
||||
} else if (valid_load_SSID && !valid_store_SSID) {
|
||||
SSID load_SSID = SSIT[load_index];
|
||||
|
||||
validSSIT[store_index] = true;
|
||||
|
||||
SSIT[store_index] = load_SSID;
|
||||
|
||||
assert(load_SSID < LFSTSize);
|
||||
|
||||
DPRINTF(StoreSet, "StoreSet: Load had a valid store set. Adding "
|
||||
"store to that set: %i for load %#x, store %#x\n",
|
||||
load_SSID, load_PC, store_PC);
|
||||
} else if (!valid_load_SSID && valid_store_SSID) {
|
||||
SSID store_SSID = SSIT[store_index];
|
||||
|
||||
validSSIT[load_index] = true;
|
||||
|
||||
SSIT[load_index] = store_SSID;
|
||||
|
||||
DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for "
|
||||
"load %#x, store %#x\n",
|
||||
store_SSID, load_PC, store_PC);
|
||||
} else {
|
||||
SSID load_SSID = SSIT[load_index];
|
||||
SSID store_SSID = SSIT[store_index];
|
||||
|
||||
assert(load_SSID < LFSTSize && store_SSID < LFSTSize);
|
||||
|
||||
// The store set with the lower number wins
|
||||
if (store_SSID > load_SSID) {
|
||||
SSIT[store_index] = load_SSID;
|
||||
|
||||
DPRINTF(StoreSet, "StoreSet: Load had smaller store set: %i; "
|
||||
"for load %#x, store %#x\n",
|
||||
load_SSID, load_PC, store_PC);
|
||||
} else {
|
||||
SSIT[load_index] = store_SSID;
|
||||
|
||||
DPRINTF(StoreSet, "StoreSet: Store had smaller store set: %i; "
|
||||
"for load %#x, store %#x\n",
|
||||
store_SSID, load_PC, store_PC);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
StoreSet::checkClear()
|
||||
{
|
||||
memOpsPred++;
|
||||
if (memOpsPred > clearPeriod) {
|
||||
DPRINTF(StoreSet, "Wiping predictor state beacuse %d ld/st executed\n",
|
||||
clearPeriod);
|
||||
memOpsPred = 0;
|
||||
clear();
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num)
|
||||
{
|
||||
checkClear();
|
||||
// Does nothing.
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid)
|
||||
{
|
||||
int index = calcIndex(store_PC);
|
||||
|
||||
int store_SSID;
|
||||
|
||||
checkClear();
|
||||
assert(index < SSITSize);
|
||||
|
||||
if (!validSSIT[index]) {
|
||||
// Do nothing if there's no valid entry.
|
||||
return;
|
||||
} else {
|
||||
store_SSID = SSIT[index];
|
||||
|
||||
assert(store_SSID < LFSTSize);
|
||||
|
||||
// Update the last store that was fetched with the current one.
|
||||
LFST[store_SSID] = store_seq_num;
|
||||
|
||||
validLFST[store_SSID] = 1;
|
||||
|
||||
storeList[store_seq_num] = store_SSID;
|
||||
|
||||
DPRINTF(StoreSet, "Store %#x updated the LFST, SSID: %i\n",
|
||||
store_PC, store_SSID);
|
||||
}
|
||||
}
|
||||
|
||||
InstSeqNum
|
||||
StoreSet::checkInst(Addr PC)
|
||||
{
|
||||
int index = calcIndex(PC);
|
||||
|
||||
int inst_SSID;
|
||||
|
||||
assert(index < SSITSize);
|
||||
|
||||
if (!validSSIT[index]) {
|
||||
DPRINTF(StoreSet, "Inst %#x with index %i had no SSID\n",
|
||||
PC, index);
|
||||
|
||||
// Return 0 if there's no valid entry.
|
||||
return 0;
|
||||
} else {
|
||||
inst_SSID = SSIT[index];
|
||||
|
||||
assert(inst_SSID < LFSTSize);
|
||||
|
||||
if (!validLFST[inst_SSID]) {
|
||||
|
||||
DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no "
|
||||
"dependency\n", PC, index, inst_SSID);
|
||||
|
||||
return 0;
|
||||
} else {
|
||||
DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had LFST "
|
||||
"inum of %i\n", PC, index, inst_SSID, LFST[inst_SSID]);
|
||||
|
||||
return LFST[inst_SSID];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store)
|
||||
{
|
||||
// This only is updated upon a store being issued.
|
||||
if (!is_store) {
|
||||
return;
|
||||
}
|
||||
|
||||
int index = calcIndex(issued_PC);
|
||||
|
||||
int store_SSID;
|
||||
|
||||
assert(index < SSITSize);
|
||||
|
||||
SeqNumMapIt store_list_it = storeList.find(issued_seq_num);
|
||||
|
||||
if (store_list_it != storeList.end()) {
|
||||
storeList.erase(store_list_it);
|
||||
}
|
||||
|
||||
// Make sure the SSIT still has a valid entry for the issued store.
|
||||
if (!validSSIT[index]) {
|
||||
return;
|
||||
}
|
||||
|
||||
store_SSID = SSIT[index];
|
||||
|
||||
assert(store_SSID < LFSTSize);
|
||||
|
||||
// If the last fetched store in the store set refers to the store that
|
||||
// was just issued, then invalidate the entry.
|
||||
if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) {
|
||||
DPRINTF(StoreSet, "StoreSet: store invalidated itself in LFST.\n");
|
||||
validLFST[store_SSID] = false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
StoreSet::squash(InstSeqNum squashed_num, ThreadID tid)
|
||||
{
|
||||
DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n",
|
||||
squashed_num);
|
||||
|
||||
int idx;
|
||||
SeqNumMapIt store_list_it = storeList.begin();
|
||||
|
||||
//@todo:Fix to only delete from correct thread
|
||||
while (!storeList.empty()) {
|
||||
idx = (*store_list_it).second;
|
||||
|
||||
if ((*store_list_it).first <= squashed_num) {
|
||||
break;
|
||||
}
|
||||
|
||||
bool younger = LFST[idx] > squashed_num;
|
||||
|
||||
if (validLFST[idx] && younger) {
|
||||
DPRINTF(StoreSet, "Squashed [sn:%lli]\n", LFST[idx]);
|
||||
validLFST[idx] = false;
|
||||
|
||||
storeList.erase(store_list_it++);
|
||||
} else if (!validLFST[idx] && younger) {
|
||||
storeList.erase(store_list_it++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
StoreSet::clear()
|
||||
{
|
||||
for (int i = 0; i < SSITSize; ++i) {
|
||||
validSSIT[i] = false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < LFSTSize; ++i) {
|
||||
validLFST[i] = false;
|
||||
}
|
||||
|
||||
storeList.clear();
|
||||
}
|
||||
|
||||
void
|
||||
StoreSet::dump()
|
||||
{
|
||||
cprintf("storeList.size(): %i\n", storeList.size());
|
||||
SeqNumMapIt store_list_it = storeList.begin();
|
||||
|
||||
int num = 0;
|
||||
|
||||
while (store_list_it != storeList.end()) {
|
||||
cprintf("%i: [sn:%lli] SSID:%i\n",
|
||||
num, (*store_list_it).first, (*store_list_it).second);
|
||||
num++;
|
||||
store_list_it++;
|
||||
}
|
||||
}
|
||||
160
simulators/gem5/src/cpu/o3/store_set.hh
Normal file
160
simulators/gem5/src/cpu/o3/store_set.hh
Normal file
@ -0,0 +1,160 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2005 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_STORE_SET_HH__
|
||||
#define __CPU_O3_STORE_SET_HH__
|
||||
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "base/types.hh"
|
||||
#include "cpu/inst_seq.hh"
|
||||
|
||||
struct ltseqnum {
|
||||
bool operator()(const InstSeqNum &lhs, const InstSeqNum &rhs) const
|
||||
{
|
||||
return lhs > rhs;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Implements a store set predictor for determining if memory
|
||||
* instructions are dependent upon each other. See paper "Memory
|
||||
* Dependence Prediction using Store Sets" by Chrysos and Emer. SSID
|
||||
* stands for Store Set ID, SSIT stands for Store Set ID Table, and
|
||||
* LFST is Last Fetched Store Table.
|
||||
*/
|
||||
class StoreSet
|
||||
{
|
||||
public:
|
||||
typedef unsigned SSID;
|
||||
|
||||
public:
|
||||
/** Default constructor. init() must be called prior to use. */
|
||||
StoreSet() { };
|
||||
|
||||
/** Creates store set predictor with given table sizes. */
|
||||
StoreSet(uint64_t clear_period, int SSIT_size, int LFST_size);
|
||||
|
||||
/** Default destructor. */
|
||||
~StoreSet();
|
||||
|
||||
/** Initializes the store set predictor with the given table sizes. */
|
||||
void init(uint64_t clear_period, int SSIT_size, int LFST_size);
|
||||
|
||||
/** Records a memory ordering violation between the younger load
|
||||
* and the older store. */
|
||||
void violation(Addr store_PC, Addr load_PC);
|
||||
|
||||
/** Clears the store set predictor every so often so that all the
|
||||
* entries aren't used and stores are constantly predicted as
|
||||
* conflicting.
|
||||
*/
|
||||
void checkClear();
|
||||
|
||||
/** Inserts a load into the store set predictor. This does nothing but
|
||||
* is included in case other predictors require a similar function.
|
||||
*/
|
||||
void insertLoad(Addr load_PC, InstSeqNum load_seq_num);
|
||||
|
||||
/** Inserts a store into the store set predictor. Updates the
|
||||
* LFST if the store has a valid SSID. */
|
||||
void insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid);
|
||||
|
||||
/** Checks if the instruction with the given PC is dependent upon
|
||||
* any store. @return Returns the sequence number of the store
|
||||
* instruction this PC is dependent upon. Returns 0 if none.
|
||||
*/
|
||||
InstSeqNum checkInst(Addr PC);
|
||||
|
||||
/** Records this PC/sequence number as issued. */
|
||||
void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store);
|
||||
|
||||
/** Squashes for a specific thread until the given sequence number. */
|
||||
void squash(InstSeqNum squashed_num, ThreadID tid);
|
||||
|
||||
/** Resets all tables. */
|
||||
void clear();
|
||||
|
||||
/** Debug function to dump the contents of the store list. */
|
||||
void dump();
|
||||
|
||||
private:
|
||||
/** Calculates the index into the SSIT based on the PC. */
|
||||
inline int calcIndex(Addr PC)
|
||||
{ return (PC >> offsetBits) & indexMask; }
|
||||
|
||||
/** Calculates a Store Set ID based on the PC. */
|
||||
inline SSID calcSSID(Addr PC)
|
||||
{ return ((PC ^ (PC >> 10)) % LFSTSize); }
|
||||
|
||||
/** The Store Set ID Table. */
|
||||
std::vector<SSID> SSIT;
|
||||
|
||||
/** Bit vector to tell if the SSIT has a valid entry. */
|
||||
std::vector<bool> validSSIT;
|
||||
|
||||
/** Last Fetched Store Table. */
|
||||
std::vector<InstSeqNum> LFST;
|
||||
|
||||
/** Bit vector to tell if the LFST has a valid entry. */
|
||||
std::vector<bool> validLFST;
|
||||
|
||||
/** Map of stores that have been inserted into the store set, but
|
||||
* not yet issued or squashed.
|
||||
*/
|
||||
std::map<InstSeqNum, int, ltseqnum> storeList;
|
||||
|
||||
typedef std::map<InstSeqNum, int, ltseqnum>::iterator SeqNumMapIt;
|
||||
|
||||
/** Number of loads/stores to process before wiping predictor so all
|
||||
* entries don't get saturated
|
||||
*/
|
||||
uint64_t clearPeriod;
|
||||
|
||||
/** Store Set ID Table size, in entries. */
|
||||
int SSITSize;
|
||||
|
||||
/** Last Fetched Store Table size, in entries. */
|
||||
int LFSTSize;
|
||||
|
||||
/** Mask to obtain the index. */
|
||||
int indexMask;
|
||||
|
||||
// HACK: Hardcoded for now.
|
||||
int offsetBits;
|
||||
|
||||
/** Number of memory operations predicted since last clear of predictor */
|
||||
int memOpsPred;
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_STORE_SET_HH__
|
||||
36
simulators/gem5/src/cpu/o3/thread_context.cc
Executable file
36
simulators/gem5/src/cpu/o3/thread_context.cc
Executable file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#include "cpu/o3/impl.hh"
|
||||
#include "cpu/o3/thread_context.hh"
|
||||
#include "cpu/o3/thread_context_impl.hh"
|
||||
|
||||
template class O3ThreadContext<O3CPUImpl>;
|
||||
|
||||
263
simulators/gem5/src/cpu/o3/thread_context.hh
Executable file
263
simulators/gem5/src/cpu/o3/thread_context.hh
Executable file
@ -0,0 +1,263 @@
|
||||
/*
|
||||
* Copyright (c) 2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_THREAD_CONTEXT_HH__
|
||||
#define __CPU_O3_THREAD_CONTEXT_HH__
|
||||
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/isa_specific.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
|
||||
class EndQuiesceEvent;
|
||||
namespace Kernel {
|
||||
class Statistics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Derived ThreadContext class for use with the O3CPU. It
|
||||
* provides the interface for any external objects to access a
|
||||
* single thread's state and some general CPU state. Any time
|
||||
* external objects try to update state through this interface,
|
||||
* the CPU will create an event to squash all in-flight
|
||||
* instructions in order to ensure state is maintained correctly.
|
||||
* It must be defined specifically for the O3CPU because
|
||||
* not all architectural state is located within the O3ThreadState
|
||||
* (such as the commit PC, and registers), and specific actions
|
||||
* must be taken when using this interface (such as squashing all
|
||||
* in-flight instructions when doing a write to this interface).
|
||||
*/
|
||||
template <class Impl>
|
||||
class O3ThreadContext : public ThreadContext
|
||||
{
|
||||
public:
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
|
||||
/** Pointer to the CPU. */
|
||||
O3CPU *cpu;
|
||||
|
||||
/** Pointer to the thread state that this TC corrseponds to. */
|
||||
O3ThreadState<Impl> *thread;
|
||||
|
||||
/** Returns a pointer to the ITB. */
|
||||
TheISA::TLB *getITBPtr() { return cpu->itb; }
|
||||
|
||||
/** Returns a pointer to the DTB. */
|
||||
TheISA::TLB *getDTBPtr() { return cpu->dtb; }
|
||||
|
||||
CheckerCPU *getCheckerCpuPtr() { return NULL; }
|
||||
|
||||
TheISA::Decoder *
|
||||
getDecoderPtr()
|
||||
{
|
||||
return cpu->fetch.decoder[thread->threadId()];
|
||||
}
|
||||
|
||||
/** Returns a pointer to this CPU. */
|
||||
virtual BaseCPU *getCpuPtr() { return cpu; }
|
||||
|
||||
/** Reads this CPU's ID. */
|
||||
virtual int cpuId() { return cpu->cpuId(); }
|
||||
|
||||
virtual int contextId() { return thread->contextId(); }
|
||||
|
||||
virtual void setContextId(int id) { thread->setContextId(id); }
|
||||
|
||||
/** Returns this thread's ID number. */
|
||||
virtual int threadId() { return thread->threadId(); }
|
||||
virtual void setThreadId(int id) { return thread->setThreadId(id); }
|
||||
|
||||
/** Returns a pointer to the system. */
|
||||
virtual System *getSystemPtr() { return cpu->system; }
|
||||
|
||||
/** Returns a pointer to this thread's kernel statistics. */
|
||||
virtual TheISA::Kernel::Statistics *getKernelStats()
|
||||
{ return thread->kernelStats; }
|
||||
|
||||
/** Returns a pointer to this thread's process. */
|
||||
virtual Process *getProcessPtr() { return thread->getProcessPtr(); }
|
||||
|
||||
virtual PortProxy &getPhysProxy() { return thread->getPhysProxy(); }
|
||||
|
||||
virtual FSTranslatingPortProxy &getVirtProxy();
|
||||
|
||||
virtual void initMemProxies(ThreadContext *tc)
|
||||
{ thread->initMemProxies(tc); }
|
||||
|
||||
virtual SETranslatingPortProxy &getMemProxy()
|
||||
{ return thread->getMemProxy(); }
|
||||
|
||||
/** Returns this thread's status. */
|
||||
virtual Status status() const { return thread->status(); }
|
||||
|
||||
/** Sets this thread's status. */
|
||||
virtual void setStatus(Status new_status)
|
||||
{ thread->setStatus(new_status); }
|
||||
|
||||
/** Set the status to Active. Optional delay indicates number of
|
||||
* cycles to wait before beginning execution. */
|
||||
virtual void activate(int delay = 1);
|
||||
|
||||
/** Set the status to Suspended. */
|
||||
virtual void suspend(int delay = 0);
|
||||
|
||||
/** Set the status to Halted. */
|
||||
virtual void halt(int delay = 0);
|
||||
|
||||
/** Dumps the function profiling information.
|
||||
* @todo: Implement.
|
||||
*/
|
||||
virtual void dumpFuncProfile();
|
||||
|
||||
/** Takes over execution of a thread from another CPU. */
|
||||
virtual void takeOverFrom(ThreadContext *old_context);
|
||||
|
||||
/** Registers statistics associated with this TC. */
|
||||
virtual void regStats(const std::string &name);
|
||||
|
||||
/** Serializes state. */
|
||||
virtual void serialize(std::ostream &os);
|
||||
/** Unserializes state. */
|
||||
virtual void unserialize(Checkpoint *cp, const std::string §ion);
|
||||
|
||||
/** Reads the last tick that this thread was activated on. */
|
||||
virtual Tick readLastActivate();
|
||||
/** Reads the last tick that this thread was suspended on. */
|
||||
virtual Tick readLastSuspend();
|
||||
|
||||
/** Clears the function profiling information. */
|
||||
virtual void profileClear();
|
||||
/** Samples the function profiling information. */
|
||||
virtual void profileSample();
|
||||
|
||||
/** Copies the architectural registers from another TC into this TC. */
|
||||
virtual void copyArchRegs(ThreadContext *tc);
|
||||
|
||||
/** Resets all architectural registers to 0. */
|
||||
virtual void clearArchRegs();
|
||||
|
||||
/** Reads an integer register. */
|
||||
virtual uint64_t readIntReg(int reg_idx);
|
||||
|
||||
virtual FloatReg readFloatReg(int reg_idx);
|
||||
|
||||
virtual FloatRegBits readFloatRegBits(int reg_idx);
|
||||
|
||||
/** Sets an integer register to a value. */
|
||||
virtual void setIntReg(int reg_idx, uint64_t val);
|
||||
|
||||
virtual void setFloatReg(int reg_idx, FloatReg val);
|
||||
|
||||
virtual void setFloatRegBits(int reg_idx, FloatRegBits val);
|
||||
|
||||
/** Reads this thread's PC state. */
|
||||
virtual TheISA::PCState pcState()
|
||||
{ return cpu->pcState(thread->threadId()); }
|
||||
|
||||
/** Sets this thread's PC state. */
|
||||
virtual void pcState(const TheISA::PCState &val);
|
||||
|
||||
virtual void pcStateNoRecord(const TheISA::PCState &val);
|
||||
|
||||
/** Reads this thread's PC. */
|
||||
virtual Addr instAddr()
|
||||
{ return cpu->instAddr(thread->threadId()); }
|
||||
|
||||
/** Reads this thread's next PC. */
|
||||
virtual Addr nextInstAddr()
|
||||
{ return cpu->nextInstAddr(thread->threadId()); }
|
||||
|
||||
/** Reads this thread's next PC. */
|
||||
virtual MicroPC microPC()
|
||||
{ return cpu->microPC(thread->threadId()); }
|
||||
|
||||
/** Reads a miscellaneous register. */
|
||||
virtual MiscReg readMiscRegNoEffect(int misc_reg)
|
||||
{ return cpu->readMiscRegNoEffect(misc_reg, thread->threadId()); }
|
||||
|
||||
/** Reads a misc. register, including any side-effects the
|
||||
* read might have as defined by the architecture. */
|
||||
virtual MiscReg readMiscReg(int misc_reg)
|
||||
{ return cpu->readMiscReg(misc_reg, thread->threadId()); }
|
||||
|
||||
/** Sets a misc. register. */
|
||||
virtual void setMiscRegNoEffect(int misc_reg, const MiscReg &val);
|
||||
|
||||
/** Sets a misc. register, including any side-effects the
|
||||
* write might have as defined by the architecture. */
|
||||
virtual void setMiscReg(int misc_reg, const MiscReg &val);
|
||||
|
||||
virtual int flattenIntIndex(int reg);
|
||||
virtual int flattenFloatIndex(int reg);
|
||||
|
||||
/** Returns the number of consecutive store conditional failures. */
|
||||
// @todo: Figure out where these store cond failures should go.
|
||||
virtual unsigned readStCondFailures()
|
||||
{ return thread->storeCondFailures; }
|
||||
|
||||
/** Sets the number of consecutive store conditional failures. */
|
||||
virtual void setStCondFailures(unsigned sc_failures)
|
||||
{ thread->storeCondFailures = sc_failures; }
|
||||
|
||||
// Only really makes sense for old CPU model. Lots of code
|
||||
// outside the CPU still checks this function, so it will
|
||||
// always return false to keep everything working.
|
||||
/** Checks if the thread is misspeculating. Because it is
|
||||
* very difficult to determine if the thread is
|
||||
* misspeculating, this is set as false. */
|
||||
virtual bool misspeculating() { return false; }
|
||||
|
||||
/** Executes a syscall in SE mode. */
|
||||
virtual void syscall(int64_t callnum)
|
||||
{ return cpu->syscall(callnum, thread->threadId()); }
|
||||
|
||||
/** Reads the funcExeInst counter. */
|
||||
virtual Counter readFuncExeInst() { return thread->funcExeInst; }
|
||||
|
||||
/** Returns pointer to the quiesce event. */
|
||||
virtual EndQuiesceEvent *getQuiesceEvent()
|
||||
{
|
||||
return this->thread->quiesceEvent;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
||||
348
simulators/gem5/src/cpu/o3/thread_context_impl.hh
Executable file
348
simulators/gem5/src/cpu/o3/thread_context_impl.hh
Executable file
@ -0,0 +1,348 @@
|
||||
/*
|
||||
* Copyright (c) 2010-2011 ARM Limited
|
||||
* All rights reserved
|
||||
*
|
||||
* The license below extends only to copyright in the software and shall
|
||||
* not be construed as granting a license to any other intellectual
|
||||
* property including but not limited to intellectual property relating
|
||||
* to a hardware implementation of the functionality of the software
|
||||
* licensed hereunder. You may use the software subject to the license
|
||||
* terms below provided that you ensure that this notice is replicated
|
||||
* unmodified and in its entirety in all distributions of the software,
|
||||
* modified or unmodified, in source code or in binary form.
|
||||
*
|
||||
* Copyright (c) 2004-2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
* Korey Sewell
|
||||
*/
|
||||
|
||||
#include "arch/kernel_stats.hh"
|
||||
#include "arch/registers.hh"
|
||||
#include "config/the_isa.hh"
|
||||
#include "cpu/o3/thread_context.hh"
|
||||
#include "cpu/quiesce_event.hh"
|
||||
#include "debug/O3CPU.hh"
|
||||
|
||||
template <class Impl>
|
||||
FSTranslatingPortProxy&
|
||||
O3ThreadContext<Impl>::getVirtProxy()
|
||||
{
|
||||
return thread->getVirtProxy();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::dumpFuncProfile()
|
||||
{
|
||||
thread->dumpFuncProfile();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::takeOverFrom(ThreadContext *old_context)
|
||||
{
|
||||
// some things should already be set up
|
||||
assert(getSystemPtr() == old_context->getSystemPtr());
|
||||
assert(getProcessPtr() == old_context->getProcessPtr());
|
||||
|
||||
// copy over functional state
|
||||
setStatus(old_context->status());
|
||||
copyArchRegs(old_context);
|
||||
setContextId(old_context->contextId());
|
||||
setThreadId(old_context->threadId());
|
||||
|
||||
if (FullSystem) {
|
||||
EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent();
|
||||
if (other_quiesce) {
|
||||
// Point the quiesce event's TC at this TC so that it wakes up
|
||||
// the proper CPU.
|
||||
other_quiesce->tc = this;
|
||||
}
|
||||
if (thread->quiesceEvent) {
|
||||
thread->quiesceEvent->tc = this;
|
||||
}
|
||||
|
||||
// Transfer kernel stats from one CPU to the other.
|
||||
thread->kernelStats = old_context->getKernelStats();
|
||||
cpu->lockFlag = false;
|
||||
} else {
|
||||
thread->funcExeInst = old_context->readFuncExeInst();
|
||||
}
|
||||
|
||||
old_context->setStatus(ThreadContext::Halted);
|
||||
|
||||
thread->inSyscall = false;
|
||||
thread->trapPending = false;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::activate(int delay)
|
||||
{
|
||||
DPRINTF(O3CPU, "Calling activate on Thread Context %d\n",
|
||||
threadId());
|
||||
|
||||
if (thread->status() == ThreadContext::Active)
|
||||
return;
|
||||
|
||||
thread->lastActivate = curTick();
|
||||
thread->setStatus(ThreadContext::Active);
|
||||
|
||||
// status() == Suspended
|
||||
cpu->activateContext(thread->threadId(), delay);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::suspend(int delay)
|
||||
{
|
||||
DPRINTF(O3CPU, "Calling suspend on Thread Context %d\n",
|
||||
threadId());
|
||||
|
||||
if (thread->status() == ThreadContext::Suspended)
|
||||
return;
|
||||
|
||||
thread->lastActivate = curTick();
|
||||
thread->lastSuspend = curTick();
|
||||
|
||||
thread->setStatus(ThreadContext::Suspended);
|
||||
cpu->suspendContext(thread->threadId());
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::halt(int delay)
|
||||
{
|
||||
DPRINTF(O3CPU, "Calling halt on Thread Context %d\n",
|
||||
threadId());
|
||||
|
||||
if (thread->status() == ThreadContext::Halted)
|
||||
return;
|
||||
|
||||
thread->setStatus(ThreadContext::Halted);
|
||||
cpu->haltContext(thread->threadId());
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::regStats(const std::string &name)
|
||||
{
|
||||
if (FullSystem) {
|
||||
thread->kernelStats = new TheISA::Kernel::Statistics(cpu->system);
|
||||
thread->kernelStats->regStats(name + ".kern");
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::serialize(std::ostream &os)
|
||||
{
|
||||
if (FullSystem && thread->kernelStats)
|
||||
thread->kernelStats->serialize(os);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::unserialize(Checkpoint *cp, const std::string §ion)
|
||||
{
|
||||
if (FullSystem && thread->kernelStats)
|
||||
thread->kernelStats->unserialize(cp, section);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Tick
|
||||
O3ThreadContext<Impl>::readLastActivate()
|
||||
{
|
||||
return thread->lastActivate;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
Tick
|
||||
O3ThreadContext<Impl>::readLastSuspend()
|
||||
{
|
||||
return thread->lastSuspend;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::profileClear()
|
||||
{
|
||||
thread->profileClear();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::profileSample()
|
||||
{
|
||||
thread->profileSample();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::copyArchRegs(ThreadContext *tc)
|
||||
{
|
||||
// Prevent squashing
|
||||
thread->inSyscall = true;
|
||||
TheISA::copyRegs(tc, this);
|
||||
thread->inSyscall = false;
|
||||
|
||||
if (!FullSystem)
|
||||
this->thread->funcExeInst = tc->readFuncExeInst();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::clearArchRegs()
|
||||
{
|
||||
cpu->isa[thread->threadId()].clear();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
uint64_t
|
||||
O3ThreadContext<Impl>::readIntReg(int reg_idx)
|
||||
{
|
||||
reg_idx = cpu->isa[thread->threadId()].flattenIntIndex(reg_idx);
|
||||
return cpu->readArchIntReg(reg_idx, thread->threadId());
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
TheISA::FloatReg
|
||||
O3ThreadContext<Impl>::readFloatReg(int reg_idx)
|
||||
{
|
||||
reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx);
|
||||
return cpu->readArchFloatReg(reg_idx, thread->threadId());
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
TheISA::FloatRegBits
|
||||
O3ThreadContext<Impl>::readFloatRegBits(int reg_idx)
|
||||
{
|
||||
reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx);
|
||||
return cpu->readArchFloatRegInt(reg_idx, thread->threadId());
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::setIntReg(int reg_idx, uint64_t val)
|
||||
{
|
||||
reg_idx = cpu->isa[thread->threadId()].flattenIntIndex(reg_idx);
|
||||
cpu->setArchIntReg(reg_idx, val, thread->threadId());
|
||||
|
||||
// Squash if we're not already in a state update mode.
|
||||
if (!thread->trapPending && !thread->inSyscall) {
|
||||
cpu->squashFromTC(thread->threadId());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::setFloatReg(int reg_idx, FloatReg val)
|
||||
{
|
||||
reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx);
|
||||
cpu->setArchFloatReg(reg_idx, val, thread->threadId());
|
||||
|
||||
if (!thread->trapPending && !thread->inSyscall) {
|
||||
cpu->squashFromTC(thread->threadId());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::setFloatRegBits(int reg_idx, FloatRegBits val)
|
||||
{
|
||||
reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx);
|
||||
cpu->setArchFloatRegInt(reg_idx, val, thread->threadId());
|
||||
|
||||
// Squash if we're not already in a state update mode.
|
||||
if (!thread->trapPending && !thread->inSyscall) {
|
||||
cpu->squashFromTC(thread->threadId());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::pcState(const TheISA::PCState &val)
|
||||
{
|
||||
cpu->pcState(val, thread->threadId());
|
||||
|
||||
// Squash if we're not already in a state update mode.
|
||||
if (!thread->trapPending && !thread->inSyscall) {
|
||||
cpu->squashFromTC(thread->threadId());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::pcStateNoRecord(const TheISA::PCState &val)
|
||||
{
|
||||
cpu->pcState(val, thread->threadId());
|
||||
|
||||
// Squash if we're not already in a state update mode.
|
||||
if (!thread->trapPending && !thread->inSyscall) {
|
||||
cpu->squashFromTC(thread->threadId());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
int
|
||||
O3ThreadContext<Impl>::flattenIntIndex(int reg)
|
||||
{
|
||||
return cpu->isa[thread->threadId()].flattenIntIndex(reg);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
int
|
||||
O3ThreadContext<Impl>::flattenFloatIndex(int reg)
|
||||
{
|
||||
return cpu->isa[thread->threadId()].flattenFloatIndex(reg);
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::setMiscRegNoEffect(int misc_reg, const MiscReg &val)
|
||||
{
|
||||
cpu->setMiscRegNoEffect(misc_reg, val, thread->threadId());
|
||||
|
||||
// Squash if we're not already in a state update mode.
|
||||
if (!thread->trapPending && !thread->inSyscall) {
|
||||
cpu->squashFromTC(thread->threadId());
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void
|
||||
O3ThreadContext<Impl>::setMiscReg(int misc_reg, const MiscReg &val)
|
||||
{
|
||||
cpu->setMiscReg(misc_reg, val, thread->threadId());
|
||||
|
||||
// Squash if we're not already in a state update mode.
|
||||
if (!thread->trapPending && !thread->inSyscall) {
|
||||
cpu->squashFromTC(thread->threadId());
|
||||
}
|
||||
}
|
||||
|
||||
113
simulators/gem5/src/cpu/o3/thread_state.hh
Normal file
113
simulators/gem5/src/cpu/o3/thread_state.hh
Normal file
@ -0,0 +1,113 @@
|
||||
/*
|
||||
* Copyright (c) 2006 The Regents of The University of Michigan
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met: redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer;
|
||||
* redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution;
|
||||
* neither the name of the copyright holders nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* Authors: Kevin Lim
|
||||
*/
|
||||
|
||||
#ifndef __CPU_O3_THREAD_STATE_HH__
|
||||
#define __CPU_O3_THREAD_STATE_HH__
|
||||
|
||||
#include "base/callback.hh"
|
||||
#include "base/output.hh"
|
||||
#include "cpu/thread_context.hh"
|
||||
#include "cpu/thread_state.hh"
|
||||
#include "sim/full_system.hh"
|
||||
#include "sim/sim_exit.hh"
|
||||
|
||||
class EndQuiesceEvent;
|
||||
class Event;
|
||||
class FunctionalMemory;
|
||||
class FunctionProfile;
|
||||
class Process;
|
||||
class ProfileNode;
|
||||
|
||||
/**
|
||||
* Class that has various thread state, such as the status, the
|
||||
* current instruction being processed, whether or not the thread has
|
||||
* a trap pending or is being externally updated, the ThreadContext
|
||||
* pointer, etc. It also handles anything related to a specific
|
||||
* thread's process, such as syscalls and checking valid addresses.
|
||||
*/
|
||||
template <class Impl>
|
||||
struct O3ThreadState : public ThreadState {
|
||||
typedef ThreadContext::Status Status;
|
||||
typedef typename Impl::O3CPU O3CPU;
|
||||
|
||||
private:
|
||||
/** Pointer to the CPU. */
|
||||
O3CPU *cpu;
|
||||
public:
|
||||
/** Whether or not the thread is currently in syscall mode, and
|
||||
* thus able to be externally updated without squashing.
|
||||
*/
|
||||
bool inSyscall;
|
||||
|
||||
/** Whether or not the thread is currently waiting on a trap, and
|
||||
* thus able to be externally updated without squashing.
|
||||
*/
|
||||
bool trapPending;
|
||||
|
||||
O3ThreadState(O3CPU *_cpu, int _thread_num, Process *_process)
|
||||
: ThreadState(_cpu, _thread_num, _process),
|
||||
cpu(_cpu), inSyscall(0), trapPending(0)
|
||||
{
|
||||
if (!FullSystem)
|
||||
return;
|
||||
|
||||
if (cpu->params()->profile) {
|
||||
profile = new FunctionProfile(
|
||||
cpu->params()->system->kernelSymtab);
|
||||
Callback *cb =
|
||||
new MakeCallback<O3ThreadState,
|
||||
&O3ThreadState::dumpFuncProfile>(this);
|
||||
registerExitCallback(cb);
|
||||
}
|
||||
|
||||
// let's fill with a dummy node for now so we don't get a segfault
|
||||
// on the first cycle when there's no node available.
|
||||
static ProfileNode dummyNode;
|
||||
profileNode = &dummyNode;
|
||||
profilePC = 3;
|
||||
}
|
||||
|
||||
/** Pointer to the ThreadContext of this thread. */
|
||||
ThreadContext *tc;
|
||||
|
||||
/** Returns a pointer to the TC of this thread. */
|
||||
ThreadContext *getTC() { return tc; }
|
||||
|
||||
/** Handles the syscall. */
|
||||
void syscall(int64_t callnum) { process->syscall(callnum, tc); }
|
||||
|
||||
void dumpFuncProfile()
|
||||
{
|
||||
std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name()));
|
||||
profile->dump(tc, *os);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // __CPU_O3_THREAD_STATE_HH__
|
||||
Reference in New Issue
Block a user