1 | //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
|
---|
2 | //
|
---|
3 | // The LLVM Compiler Infrastructure
|
---|
4 | //
|
---|
5 | // This file is distributed under the University of Illinois Open Source
|
---|
6 | // License. See LICENSE.TXT for details.
|
---|
7 | //
|
---|
8 | //===----------------------------------------------------------------------===//
|
---|
9 | //
|
---|
10 | // This implements the ScheduleDAGInstrs class, which implements re-scheduling
|
---|
11 | // of MachineInstrs.
|
---|
12 | //
|
---|
13 | //===----------------------------------------------------------------------===//
|
---|
14 |
|
---|
15 | #define DEBUG_TYPE "sched-instrs"
|
---|
16 | #include "ScheduleDAGInstrs.h"
|
---|
17 | #include "llvm/Operator.h"
|
---|
18 | #include "llvm/Analysis/AliasAnalysis.h"
|
---|
19 | #include "llvm/CodeGen/MachineFunctionPass.h"
|
---|
20 | #include "llvm/CodeGen/MachineMemOperand.h"
|
---|
21 | #include "llvm/CodeGen/MachineRegisterInfo.h"
|
---|
22 | #include "llvm/CodeGen/PseudoSourceValue.h"
|
---|
23 | #include "llvm/Target/TargetMachine.h"
|
---|
24 | #include "llvm/Target/TargetInstrInfo.h"
|
---|
25 | #include "llvm/Target/TargetRegisterInfo.h"
|
---|
26 | #include "llvm/Target/TargetSubtarget.h"
|
---|
27 | #include "llvm/Support/Debug.h"
|
---|
28 | #include "llvm/Support/raw_ostream.h"
|
---|
29 | #include "llvm/ADT/SmallSet.h"
|
---|
30 | using namespace llvm;
|
---|
31 |
|
---|
32 | ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
|
---|
33 | const MachineLoopInfo &mli,
|
---|
34 | const MachineDominatorTree &mdt)
|
---|
35 | : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()),
|
---|
36 | Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
|
---|
37 | MFI = mf.getFrameInfo();
|
---|
38 | DbgValueVec.clear();
|
---|
39 | }
|
---|
40 |
|
---|
41 | /// Run - perform scheduling.
|
---|
42 | ///
|
---|
43 | void ScheduleDAGInstrs::Run(MachineBasicBlock *bb,
|
---|
44 | MachineBasicBlock::iterator begin,
|
---|
45 | MachineBasicBlock::iterator end,
|
---|
46 | unsigned endcount) {
|
---|
47 | BB = bb;
|
---|
48 | Begin = begin;
|
---|
49 | InsertPosIndex = endcount;
|
---|
50 |
|
---|
51 | ScheduleDAG::Run(bb, end);
|
---|
52 | }
|
---|
53 |
|
---|
54 | /// getUnderlyingObjectFromInt - This is the function that does the work of
|
---|
55 | /// looking through basic ptrtoint+arithmetic+inttoptr sequences.
|
---|
56 | static const Value *getUnderlyingObjectFromInt(const Value *V) {
|
---|
57 | do {
|
---|
58 | if (const Operator *U = dyn_cast<Operator>(V)) {
|
---|
59 | // If we find a ptrtoint, we can transfer control back to the
|
---|
60 | // regular getUnderlyingObjectFromInt.
|
---|
61 | if (U->getOpcode() == Instruction::PtrToInt)
|
---|
62 | return U->getOperand(0);
|
---|
63 | // If we find an add of a constant or a multiplied value, it's
|
---|
64 | // likely that the other operand will lead us to the base
|
---|
65 | // object. We don't have to worry about the case where the
|
---|
66 | // object address is somehow being computed by the multiply,
|
---|
67 | // because our callers only care when the result is an
|
---|
68 | // identifibale object.
|
---|
69 | if (U->getOpcode() != Instruction::Add ||
|
---|
70 | (!isa<ConstantInt>(U->getOperand(1)) &&
|
---|
71 | Operator::getOpcode(U->getOperand(1)) != Instruction::Mul))
|
---|
72 | return V;
|
---|
73 | V = U->getOperand(0);
|
---|
74 | } else {
|
---|
75 | return V;
|
---|
76 | }
|
---|
77 | assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
|
---|
78 | } while (1);
|
---|
79 | }
|
---|
80 |
|
---|
81 | /// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
|
---|
82 | /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
|
---|
83 | static const Value *getUnderlyingObject(const Value *V) {
|
---|
84 | // First just call Value::getUnderlyingObject to let it do what it does.
|
---|
85 | do {
|
---|
86 | V = V->getUnderlyingObject();
|
---|
87 | // If it found an inttoptr, use special code to continue climing.
|
---|
88 | if (Operator::getOpcode(V) != Instruction::IntToPtr)
|
---|
89 | break;
|
---|
90 | const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
|
---|
91 | // If that succeeded in finding a pointer, continue the search.
|
---|
92 | if (!O->getType()->isPointerTy())
|
---|
93 | break;
|
---|
94 | V = O;
|
---|
95 | } while (1);
|
---|
96 | return V;
|
---|
97 | }
|
---|
98 |
|
---|
99 | /// getUnderlyingObjectForInstr - If this machine instr has memory reference
|
---|
100 | /// information and it can be tracked to a normal reference to a known
|
---|
101 | /// object, return the Value for that object. Otherwise return null.
|
---|
102 | static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
|
---|
103 | const MachineFrameInfo *MFI,
|
---|
104 | bool &MayAlias) {
|
---|
105 | MayAlias = true;
|
---|
106 | if (!MI->hasOneMemOperand() ||
|
---|
107 | !(*MI->memoperands_begin())->getValue() ||
|
---|
108 | (*MI->memoperands_begin())->isVolatile())
|
---|
109 | return 0;
|
---|
110 |
|
---|
111 | const Value *V = (*MI->memoperands_begin())->getValue();
|
---|
112 | if (!V)
|
---|
113 | return 0;
|
---|
114 |
|
---|
115 | V = getUnderlyingObject(V);
|
---|
116 | if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
|
---|
117 | // For now, ignore PseudoSourceValues which may alias LLVM IR values
|
---|
118 | // because the code that uses this function has no way to cope with
|
---|
119 | // such aliases.
|
---|
120 | if (PSV->isAliased(MFI))
|
---|
121 | return 0;
|
---|
122 |
|
---|
123 | MayAlias = PSV->mayAlias(MFI);
|
---|
124 | return V;
|
---|
125 | }
|
---|
126 |
|
---|
127 | if (isIdentifiedObject(V))
|
---|
128 | return V;
|
---|
129 |
|
---|
130 | return 0;
|
---|
131 | }
|
---|
132 |
|
---|
133 | void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
|
---|
134 | if (MachineLoop *ML = MLI.getLoopFor(BB))
|
---|
135 | if (BB == ML->getLoopLatch()) {
|
---|
136 | MachineBasicBlock *Header = ML->getHeader();
|
---|
137 | for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
|
---|
138 | E = Header->livein_end(); I != E; ++I)
|
---|
139 | LoopLiveInRegs.insert(*I);
|
---|
140 | LoopRegs.VisitLoop(ML);
|
---|
141 | }
|
---|
142 | }
|
---|
143 |
|
---|
144 | void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
|
---|
145 | // We'll be allocating one SUnit for each instruction, plus one for
|
---|
146 | // the region exit node.
|
---|
147 | SUnits.reserve(BB->size());
|
---|
148 |
|
---|
149 | // We build scheduling units by walking a block's instruction list from bottom
|
---|
150 | // to top.
|
---|
151 |
|
---|
152 | // Remember where a generic side-effecting instruction is as we procede.
|
---|
153 | SUnit *BarrierChain = 0, *AliasChain = 0;
|
---|
154 |
|
---|
155 | // Memory references to specific known memory locations are tracked
|
---|
156 | // so that they can be given more precise dependencies. We track
|
---|
157 | // separately the known memory locations that may alias and those
|
---|
158 | // that are known not to alias
|
---|
159 | std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
|
---|
160 | std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
|
---|
161 |
|
---|
162 | // Keep track of dangling debug references to registers.
|
---|
163 | std::vector<std::pair<MachineInstr*, unsigned> >
|
---|
164 | DanglingDebugValue(TRI->getNumRegs(),
|
---|
165 | std::make_pair(static_cast<MachineInstr*>(0), 0));
|
---|
166 |
|
---|
167 | // Check to see if the scheduler cares about latencies.
|
---|
168 | bool UnitLatencies = ForceUnitLatencies();
|
---|
169 |
|
---|
170 | // Ask the target if address-backscheduling is desirable, and if so how much.
|
---|
171 | const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
|
---|
172 | unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
|
---|
173 |
|
---|
174 | // Remove any stale debug info; sometimes BuildSchedGraph is called again
|
---|
175 | // without emitting the info from the previous call.
|
---|
176 | DbgValueVec.clear();
|
---|
177 |
|
---|
178 | // Walk the list of instructions, from bottom moving up.
|
---|
179 | for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
|
---|
180 | MII != MIE; --MII) {
|
---|
181 | MachineInstr *MI = prior(MII);
|
---|
182 | // DBG_VALUE does not have SUnit's built, so just remember these for later
|
---|
183 | // reinsertion.
|
---|
184 | if (MI->isDebugValue()) {
|
---|
185 | if (MI->getNumOperands()==3 && MI->getOperand(0).isReg() &&
|
---|
186 | MI->getOperand(0).getReg())
|
---|
187 | DanglingDebugValue[MI->getOperand(0).getReg()] =
|
---|
188 | std::make_pair(MI, DbgValueVec.size());
|
---|
189 | DbgValueVec.push_back(MI);
|
---|
190 | continue;
|
---|
191 | }
|
---|
192 | const TargetInstrDesc &TID = MI->getDesc();
|
---|
193 | assert(!TID.isTerminator() && !MI->isLabel() &&
|
---|
194 | "Cannot schedule terminators or labels!");
|
---|
195 | // Create the SUnit for this MI.
|
---|
196 | SUnit *SU = NewSUnit(MI);
|
---|
197 |
|
---|
198 | // Assign the Latency field of SU using target-provided information.
|
---|
199 | if (UnitLatencies)
|
---|
200 | SU->Latency = 1;
|
---|
201 | else
|
---|
202 | ComputeLatency(SU);
|
---|
203 |
|
---|
204 | // Add register-based dependencies (data, anti, and output).
|
---|
205 | for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) {
|
---|
206 | const MachineOperand &MO = MI->getOperand(j);
|
---|
207 | if (!MO.isReg()) continue;
|
---|
208 | unsigned Reg = MO.getReg();
|
---|
209 | if (Reg == 0) continue;
|
---|
210 |
|
---|
211 | assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
|
---|
212 |
|
---|
213 | if (MO.isDef() && DanglingDebugValue[Reg].first!=0) {
|
---|
214 | SU->DbgInstrList.push_back(DanglingDebugValue[Reg].first);
|
---|
215 | DbgValueVec[DanglingDebugValue[Reg].second] = 0;
|
---|
216 | DanglingDebugValue[Reg] = std::make_pair((MachineInstr*)0, 0);
|
---|
217 | }
|
---|
218 |
|
---|
219 | std::vector<SUnit *> &UseList = Uses[Reg];
|
---|
220 | std::vector<SUnit *> &DefList = Defs[Reg];
|
---|
221 | // Optionally add output and anti dependencies. For anti
|
---|
222 | // dependencies we use a latency of 0 because for a multi-issue
|
---|
223 | // target we want to allow the defining instruction to issue
|
---|
224 | // in the same cycle as the using instruction.
|
---|
225 | // TODO: Using a latency of 1 here for output dependencies assumes
|
---|
226 | // there's no cost for reusing registers.
|
---|
227 | SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
|
---|
228 | unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
|
---|
229 | for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
|
---|
230 | SUnit *DefSU = DefList[i];
|
---|
231 | if (DefSU != SU &&
|
---|
232 | (Kind != SDep::Output || !MO.isDead() ||
|
---|
233 | !DefSU->getInstr()->registerDefIsDead(Reg)))
|
---|
234 | DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/Reg));
|
---|
235 | }
|
---|
236 | for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
|
---|
237 | std::vector<SUnit *> &DefList = Defs[*Alias];
|
---|
238 | for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
|
---|
239 | SUnit *DefSU = DefList[i];
|
---|
240 | if (DefSU != SU &&
|
---|
241 | (Kind != SDep::Output || !MO.isDead() ||
|
---|
242 | !DefSU->getInstr()->registerDefIsDead(*Alias)))
|
---|
243 | DefSU->addPred(SDep(SU, Kind, AOLatency, /*Reg=*/ *Alias));
|
---|
244 | }
|
---|
245 | }
|
---|
246 |
|
---|
247 | if (MO.isDef()) {
|
---|
248 | // Add any data dependencies.
|
---|
249 | unsigned DataLatency = SU->Latency;
|
---|
250 | for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
|
---|
251 | SUnit *UseSU = UseList[i];
|
---|
252 | if (UseSU == SU)
|
---|
253 | continue;
|
---|
254 | unsigned LDataLatency = DataLatency;
|
---|
255 | // Optionally add in a special extra latency for nodes that
|
---|
256 | // feed addresses.
|
---|
257 | // TODO: Do this for register aliases too.
|
---|
258 | // TODO: Perhaps we should get rid of
|
---|
259 | // SpecialAddressLatency and just move this into
|
---|
260 | // adjustSchedDependency for the targets that care about it.
|
---|
261 | if (SpecialAddressLatency != 0 && !UnitLatencies) {
|
---|
262 | MachineInstr *UseMI = UseSU->getInstr();
|
---|
263 | const TargetInstrDesc &UseTID = UseMI->getDesc();
|
---|
264 | int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
|
---|
265 | assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
|
---|
266 | if ((UseTID.mayLoad() || UseTID.mayStore()) &&
|
---|
267 | (unsigned)RegUseIndex < UseTID.getNumOperands() &&
|
---|
268 | UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
|
---|
269 | LDataLatency += SpecialAddressLatency;
|
---|
270 | }
|
---|
271 | // Adjust the dependence latency using operand def/use
|
---|
272 | // information (if any), and then allow the target to
|
---|
273 | // perform its own adjustments.
|
---|
274 | const SDep& dep = SDep(SU, SDep::Data, LDataLatency, Reg);
|
---|
275 | if (!UnitLatencies) {
|
---|
276 | ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
|
---|
277 | ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
|
---|
278 | }
|
---|
279 | UseSU->addPred(dep);
|
---|
280 | }
|
---|
281 | for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
|
---|
282 | std::vector<SUnit *> &UseList = Uses[*Alias];
|
---|
283 | for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
|
---|
284 | SUnit *UseSU = UseList[i];
|
---|
285 | if (UseSU == SU)
|
---|
286 | continue;
|
---|
287 | const SDep& dep = SDep(SU, SDep::Data, DataLatency, *Alias);
|
---|
288 | if (!UnitLatencies) {
|
---|
289 | ComputeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
|
---|
290 | ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
|
---|
291 | }
|
---|
292 | UseSU->addPred(dep);
|
---|
293 | }
|
---|
294 | }
|
---|
295 |
|
---|
296 | // If a def is going to wrap back around to the top of the loop,
|
---|
297 | // backschedule it.
|
---|
298 | if (!UnitLatencies && DefList.empty()) {
|
---|
299 | LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
|
---|
300 | if (I != LoopRegs.Deps.end()) {
|
---|
301 | const MachineOperand *UseMO = I->second.first;
|
---|
302 | unsigned Count = I->second.second;
|
---|
303 | const MachineInstr *UseMI = UseMO->getParent();
|
---|
304 | unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
|
---|
305 | const TargetInstrDesc &UseTID = UseMI->getDesc();
|
---|
306 | // TODO: If we knew the total depth of the region here, we could
|
---|
307 | // handle the case where the whole loop is inside the region but
|
---|
308 | // is large enough that the isScheduleHigh trick isn't needed.
|
---|
309 | if (UseMOIdx < UseTID.getNumOperands()) {
|
---|
310 | // Currently, we only support scheduling regions consisting of
|
---|
311 | // single basic blocks. Check to see if the instruction is in
|
---|
312 | // the same region by checking to see if it has the same parent.
|
---|
313 | if (UseMI->getParent() != MI->getParent()) {
|
---|
314 | unsigned Latency = SU->Latency;
|
---|
315 | if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
|
---|
316 | Latency += SpecialAddressLatency;
|
---|
317 | // This is a wild guess as to the portion of the latency which
|
---|
318 | // will be overlapped by work done outside the current
|
---|
319 | // scheduling region.
|
---|
320 | Latency -= std::min(Latency, Count);
|
---|
321 | // Add the artifical edge.
|
---|
322 | ExitSU.addPred(SDep(SU, SDep::Order, Latency,
|
---|
323 | /*Reg=*/0, /*isNormalMemory=*/false,
|
---|
324 | /*isMustAlias=*/false,
|
---|
325 | /*isArtificial=*/true));
|
---|
326 | } else if (SpecialAddressLatency > 0 &&
|
---|
327 | UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
|
---|
328 | // The entire loop body is within the current scheduling region
|
---|
329 | // and the latency of this operation is assumed to be greater
|
---|
330 | // than the latency of the loop.
|
---|
331 | // TODO: Recursively mark data-edge predecessors as
|
---|
332 | // isScheduleHigh too.
|
---|
333 | SU->isScheduleHigh = true;
|
---|
334 | }
|
---|
335 | }
|
---|
336 | LoopRegs.Deps.erase(I);
|
---|
337 | }
|
---|
338 | }
|
---|
339 |
|
---|
340 | UseList.clear();
|
---|
341 | if (!MO.isDead())
|
---|
342 | DefList.clear();
|
---|
343 | DefList.push_back(SU);
|
---|
344 | } else {
|
---|
345 | UseList.push_back(SU);
|
---|
346 | }
|
---|
347 | }
|
---|
348 |
|
---|
349 | // Add chain dependencies.
|
---|
350 | // Chain dependencies used to enforce memory order should have
|
---|
351 | // latency of 0 (except for true dependency of Store followed by
|
---|
352 | // aliased Load... we estimate that with a single cycle of latency
|
---|
353 | // assuming the hardware will bypass)
|
---|
354 | // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable
|
---|
355 | // after stack slots are lowered to actual addresses.
|
---|
356 | // TODO: Use an AliasAnalysis and do real alias-analysis queries, and
|
---|
357 | // produce more precise dependence information.
|
---|
358 | #define STORE_LOAD_LATENCY 1
|
---|
359 | unsigned TrueMemOrderLatency = 0;
|
---|
360 | if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
|
---|
361 | (MI->hasVolatileMemoryRef() &&
|
---|
362 | (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
|
---|
363 | // Be conservative with these and add dependencies on all memory
|
---|
364 | // references, even those that are known to not alias.
|
---|
365 | for (std::map<const Value *, SUnit *>::iterator I =
|
---|
366 | NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
|
---|
367 | I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
|
---|
368 | }
|
---|
369 | for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
|
---|
370 | NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
|
---|
371 | for (unsigned i = 0, e = I->second.size(); i != e; ++i)
|
---|
372 | I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
|
---|
373 | }
|
---|
374 | NonAliasMemDefs.clear();
|
---|
375 | NonAliasMemUses.clear();
|
---|
376 | // Add SU to the barrier chain.
|
---|
377 | if (BarrierChain)
|
---|
378 | BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
|
---|
379 | BarrierChain = SU;
|
---|
380 |
|
---|
381 | // fall-through
|
---|
382 | new_alias_chain:
|
---|
383 | // Chain all possibly aliasing memory references though SU.
|
---|
384 | if (AliasChain)
|
---|
385 | AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
|
---|
386 | AliasChain = SU;
|
---|
387 | for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
|
---|
388 | PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
|
---|
389 | for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
|
---|
390 | E = AliasMemDefs.end(); I != E; ++I) {
|
---|
391 | I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
|
---|
392 | }
|
---|
393 | for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
|
---|
394 | AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
|
---|
395 | for (unsigned i = 0, e = I->second.size(); i != e; ++i)
|
---|
396 | I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
|
---|
397 | }
|
---|
398 | PendingLoads.clear();
|
---|
399 | AliasMemDefs.clear();
|
---|
400 | AliasMemUses.clear();
|
---|
401 | } else if (TID.mayStore()) {
|
---|
402 | bool MayAlias = true;
|
---|
403 | TrueMemOrderLatency = STORE_LOAD_LATENCY;
|
---|
404 | if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
|
---|
405 | // A store to a specific PseudoSourceValue. Add precise dependencies.
|
---|
406 | // Record the def in MemDefs, first adding a dep if there is
|
---|
407 | // an existing def.
|
---|
408 | std::map<const Value *, SUnit *>::iterator I =
|
---|
409 | ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
|
---|
410 | std::map<const Value *, SUnit *>::iterator IE =
|
---|
411 | ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
|
---|
412 | if (I != IE) {
|
---|
413 | I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
|
---|
414 | /*isNormalMemory=*/true));
|
---|
415 | I->second = SU;
|
---|
416 | } else {
|
---|
417 | if (MayAlias)
|
---|
418 | AliasMemDefs[V] = SU;
|
---|
419 | else
|
---|
420 | NonAliasMemDefs[V] = SU;
|
---|
421 | }
|
---|
422 | // Handle the uses in MemUses, if there are any.
|
---|
423 | std::map<const Value *, std::vector<SUnit *> >::iterator J =
|
---|
424 | ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
|
---|
425 | std::map<const Value *, std::vector<SUnit *> >::iterator JE =
|
---|
426 | ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
|
---|
427 | if (J != JE) {
|
---|
428 | for (unsigned i = 0, e = J->second.size(); i != e; ++i)
|
---|
429 | J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
|
---|
430 | /*Reg=*/0, /*isNormalMemory=*/true));
|
---|
431 | J->second.clear();
|
---|
432 | }
|
---|
433 | if (MayAlias) {
|
---|
434 | // Add dependencies from all the PendingLoads, i.e. loads
|
---|
435 | // with no underlying object.
|
---|
436 | for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
|
---|
437 | PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
|
---|
438 | // Add dependence on alias chain, if needed.
|
---|
439 | if (AliasChain)
|
---|
440 | AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
|
---|
441 | }
|
---|
442 | // Add dependence on barrier chain, if needed.
|
---|
443 | if (BarrierChain)
|
---|
444 | BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
|
---|
445 | } else {
|
---|
446 | // Treat all other stores conservatively.
|
---|
447 | goto new_alias_chain;
|
---|
448 | }
|
---|
449 | } else if (TID.mayLoad()) {
|
---|
450 | bool MayAlias = true;
|
---|
451 | TrueMemOrderLatency = 0;
|
---|
452 | if (MI->isInvariantLoad(AA)) {
|
---|
453 | // Invariant load, no chain dependencies needed!
|
---|
454 | } else {
|
---|
455 | if (const Value *V =
|
---|
456 | getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
|
---|
457 | // A load from a specific PseudoSourceValue. Add precise dependencies.
|
---|
458 | std::map<const Value *, SUnit *>::iterator I =
|
---|
459 | ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
|
---|
460 | std::map<const Value *, SUnit *>::iterator IE =
|
---|
461 | ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
|
---|
462 | if (I != IE)
|
---|
463 | I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
|
---|
464 | /*isNormalMemory=*/true));
|
---|
465 | if (MayAlias)
|
---|
466 | AliasMemUses[V].push_back(SU);
|
---|
467 | else
|
---|
468 | NonAliasMemUses[V].push_back(SU);
|
---|
469 | } else {
|
---|
470 | // A load with no underlying object. Depend on all
|
---|
471 | // potentially aliasing stores.
|
---|
472 | for (std::map<const Value *, SUnit *>::iterator I =
|
---|
473 | AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
|
---|
474 | I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
|
---|
475 |
|
---|
476 | PendingLoads.push_back(SU);
|
---|
477 | MayAlias = true;
|
---|
478 | }
|
---|
479 |
|
---|
480 | // Add dependencies on alias and barrier chains, if needed.
|
---|
481 | if (MayAlias && AliasChain)
|
---|
482 | AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
|
---|
483 | if (BarrierChain)
|
---|
484 | BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
|
---|
485 | }
|
---|
486 | }
|
---|
487 | }
|
---|
488 |
|
---|
489 | for (int i = 0, e = TRI->getNumRegs(); i != e; ++i) {
|
---|
490 | Defs[i].clear();
|
---|
491 | Uses[i].clear();
|
---|
492 | }
|
---|
493 | PendingLoads.clear();
|
---|
494 | }
|
---|
495 |
|
---|
496 | void ScheduleDAGInstrs::FinishBlock() {
|
---|
497 | // Nothing to do.
|
---|
498 | }
|
---|
499 |
|
---|
500 | void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
|
---|
501 | const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
|
---|
502 |
|
---|
503 | // Compute the latency for the node.
|
---|
504 | SU->Latency =
|
---|
505 | InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
|
---|
506 |
|
---|
507 | // Simplistic target-independent heuristic: assume that loads take
|
---|
508 | // extra time.
|
---|
509 | if (InstrItins.isEmpty())
|
---|
510 | if (SU->getInstr()->getDesc().mayLoad())
|
---|
511 | SU->Latency += 2;
|
---|
512 | }
|
---|
513 |
|
---|
514 | void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
|
---|
515 | SDep& dep) const {
|
---|
516 | const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
|
---|
517 | if (InstrItins.isEmpty())
|
---|
518 | return;
|
---|
519 |
|
---|
520 | // For a data dependency with a known register...
|
---|
521 | if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
|
---|
522 | return;
|
---|
523 |
|
---|
524 | const unsigned Reg = dep.getReg();
|
---|
525 |
|
---|
526 | // ... find the definition of the register in the defining
|
---|
527 | // instruction
|
---|
528 | MachineInstr *DefMI = Def->getInstr();
|
---|
529 | int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
|
---|
530 | if (DefIdx != -1) {
|
---|
531 | int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(),
|
---|
532 | DefIdx);
|
---|
533 | if (DefCycle >= 0) {
|
---|
534 | MachineInstr *UseMI = Use->getInstr();
|
---|
535 | const unsigned UseClass = UseMI->getDesc().getSchedClass();
|
---|
536 |
|
---|
537 | // For all uses of the register, calculate the maxmimum latency
|
---|
538 | int Latency = -1;
|
---|
539 | for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
|
---|
540 | const MachineOperand &MO = UseMI->getOperand(i);
|
---|
541 | if (!MO.isReg() || !MO.isUse())
|
---|
542 | continue;
|
---|
543 | unsigned MOReg = MO.getReg();
|
---|
544 | if (MOReg != Reg)
|
---|
545 | continue;
|
---|
546 |
|
---|
547 | int UseCycle = InstrItins.getOperandCycle(UseClass, i);
|
---|
548 | if (UseCycle >= 0)
|
---|
549 | Latency = std::max(Latency, DefCycle - UseCycle + 1);
|
---|
550 | }
|
---|
551 |
|
---|
552 | // If we found a latency, then replace the existing dependence latency.
|
---|
553 | if (Latency >= 0)
|
---|
554 | dep.setLatency(Latency);
|
---|
555 | }
|
---|
556 | }
|
---|
557 | }
|
---|
558 |
|
---|
559 | void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
|
---|
560 | SU->getInstr()->dump();
|
---|
561 | }
|
---|
562 |
|
---|
563 | std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
|
---|
564 | std::string s;
|
---|
565 | raw_string_ostream oss(s);
|
---|
566 | if (SU == &EntrySU)
|
---|
567 | oss << "<entry>";
|
---|
568 | else if (SU == &ExitSU)
|
---|
569 | oss << "<exit>";
|
---|
570 | else
|
---|
571 | SU->getInstr()->print(oss);
|
---|
572 | return oss.str();
|
---|
573 | }
|
---|
574 |
|
---|
575 | // EmitSchedule - Emit the machine code in scheduled order.
|
---|
576 | MachineBasicBlock *ScheduleDAGInstrs::EmitSchedule() {
|
---|
577 | // For MachineInstr-based scheduling, we're rescheduling the instructions in
|
---|
578 | // the block, so start by removing them from the block.
|
---|
579 | while (Begin != InsertPos) {
|
---|
580 | MachineBasicBlock::iterator I = Begin;
|
---|
581 | ++Begin;
|
---|
582 | BB->remove(I);
|
---|
583 | }
|
---|
584 |
|
---|
585 | // First reinsert any remaining debug_values; these are either constants,
|
---|
586 | // or refer to live-in registers. The beginning of the block is the right
|
---|
587 | // place for the latter. The former might reasonably be placed elsewhere
|
---|
588 | // using some kind of ordering algorithm, but right now it doesn't matter.
|
---|
589 | for (int i = DbgValueVec.size()-1; i>=0; --i)
|
---|
590 | if (DbgValueVec[i])
|
---|
591 | BB->insert(InsertPos, DbgValueVec[i]);
|
---|
592 |
|
---|
593 | // Then re-insert them according to the given schedule.
|
---|
594 | for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
|
---|
595 | SUnit *SU = Sequence[i];
|
---|
596 | if (!SU) {
|
---|
597 | // Null SUnit* is a noop.
|
---|
598 | EmitNoop();
|
---|
599 | continue;
|
---|
600 | }
|
---|
601 |
|
---|
602 | BB->insert(InsertPos, SU->getInstr());
|
---|
603 | for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i)
|
---|
604 | BB->insert(InsertPos, SU->DbgInstrList[i]);
|
---|
605 | }
|
---|
606 |
|
---|
607 | // Update the Begin iterator, as the first instruction in the block
|
---|
608 | // may have been scheduled later.
|
---|
609 | if (!DbgValueVec.empty()) {
|
---|
610 | for (int i = DbgValueVec.size()-1; i>=0; --i)
|
---|
611 | if (DbgValueVec[i]!=0) {
|
---|
612 | Begin = DbgValueVec[DbgValueVec.size()-1];
|
---|
613 | break;
|
---|
614 | }
|
---|
615 | } else if (!Sequence.empty())
|
---|
616 | Begin = Sequence[0]->getInstr();
|
---|
617 |
|
---|
618 | DbgValueVec.clear();
|
---|
619 | return BB;
|
---|
620 | }
|
---|