source: icGREP/icgrep-devel/llvm-3.6.1.src/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @ 4664

Last change on this file since 4664 was 4664, checked in by cameron, 4 years ago

Upgrade LLVM to 3.6.1

File size: 106.5 KB
Line 
1//===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This implements bottom-up and top-down register pressure reduction list
11// schedulers, using standard algorithms.  The basic approach uses a priority
12// queue of available nodes to schedule.  One at a time, nodes are taken from
13// the priority queue (thus in priority order), checked for legality to
14// schedule, and emitted if legal.
15//
16//===----------------------------------------------------------------------===//
17
18#include "llvm/CodeGen/SchedulerRegistry.h"
19#include "ScheduleDAGSDNodes.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallSet.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
25#include "llvm/CodeGen/SelectionDAGISel.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/InlineAsm.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/ErrorHandling.h"
30#include "llvm/Support/raw_ostream.h"
31#include "llvm/Target/TargetInstrInfo.h"
32#include "llvm/Target/TargetLowering.h"
33#include "llvm/Target/TargetRegisterInfo.h"
34#include "llvm/Target/TargetSubtargetInfo.h"
35#include <climits>
36using namespace llvm;
37
38#define DEBUG_TYPE "pre-RA-sched"
39
40STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
41STATISTIC(NumUnfolds,    "Number of nodes unfolded");
42STATISTIC(NumDups,       "Number of duplicated nodes");
43STATISTIC(NumPRCopies,   "Number of physical register copies");
44
45static RegisterScheduler
46  burrListDAGScheduler("list-burr",
47                       "Bottom-up register reduction list scheduling",
48                       createBURRListDAGScheduler);
49static RegisterScheduler
50  sourceListDAGScheduler("source",
51                         "Similar to list-burr but schedules in source "
52                         "order when possible",
53                         createSourceListDAGScheduler);
54
55static RegisterScheduler
56  hybridListDAGScheduler("list-hybrid",
57                         "Bottom-up register pressure aware list scheduling "
58                         "which tries to balance latency and register pressure",
59                         createHybridListDAGScheduler);
60
61static RegisterScheduler
62  ILPListDAGScheduler("list-ilp",
63                      "Bottom-up register pressure aware list scheduling "
64                      "which tries to balance ILP and register pressure",
65                      createILPListDAGScheduler);
66
67static cl::opt<bool> DisableSchedCycles(
68  "disable-sched-cycles", cl::Hidden, cl::init(false),
69  cl::desc("Disable cycle-level precision during preRA scheduling"));
70
71// Temporary sched=list-ilp flags until the heuristics are robust.
72// Some options are also available under sched=list-hybrid.
73static cl::opt<bool> DisableSchedRegPressure(
74  "disable-sched-reg-pressure", cl::Hidden, cl::init(false),
75  cl::desc("Disable regpressure priority in sched=list-ilp"));
76static cl::opt<bool> DisableSchedLiveUses(
77  "disable-sched-live-uses", cl::Hidden, cl::init(true),
78  cl::desc("Disable live use priority in sched=list-ilp"));
79static cl::opt<bool> DisableSchedVRegCycle(
80  "disable-sched-vrcycle", cl::Hidden, cl::init(false),
81  cl::desc("Disable virtual register cycle interference checks"));
82static cl::opt<bool> DisableSchedPhysRegJoin(
83  "disable-sched-physreg-join", cl::Hidden, cl::init(false),
84  cl::desc("Disable physreg def-use affinity"));
85static cl::opt<bool> DisableSchedStalls(
86  "disable-sched-stalls", cl::Hidden, cl::init(true),
87  cl::desc("Disable no-stall priority in sched=list-ilp"));
88static cl::opt<bool> DisableSchedCriticalPath(
89  "disable-sched-critical-path", cl::Hidden, cl::init(false),
90  cl::desc("Disable critical path priority in sched=list-ilp"));
91static cl::opt<bool> DisableSchedHeight(
92  "disable-sched-height", cl::Hidden, cl::init(false),
93  cl::desc("Disable scheduled-height priority in sched=list-ilp"));
94static cl::opt<bool> Disable2AddrHack(
95  "disable-2addr-hack", cl::Hidden, cl::init(true),
96  cl::desc("Disable scheduler's two-address hack"));
97
98static cl::opt<int> MaxReorderWindow(
99  "max-sched-reorder", cl::Hidden, cl::init(6),
100  cl::desc("Number of instructions to allow ahead of the critical path "
101           "in sched=list-ilp"));
102
103static cl::opt<unsigned> AvgIPC(
104  "sched-avg-ipc", cl::Hidden, cl::init(1),
105  cl::desc("Average inst/cycle whan no target itinerary exists."));
106
107namespace {
108//===----------------------------------------------------------------------===//
109/// ScheduleDAGRRList - The actual register reduction list scheduler
110/// implementation.  This supports both top-down and bottom-up scheduling.
111///
112class ScheduleDAGRRList : public ScheduleDAGSDNodes {
113private:
114  /// NeedLatency - True if the scheduler will make use of latency information.
115  ///
116  bool NeedLatency;
117
118  /// AvailableQueue - The priority queue to use for the available SUnits.
119  SchedulingPriorityQueue *AvailableQueue;
120
121  /// PendingQueue - This contains all of the instructions whose operands have
122  /// been issued, but their results are not ready yet (due to the latency of
123  /// the operation).  Once the operands becomes available, the instruction is
124  /// added to the AvailableQueue.
125  std::vector<SUnit*> PendingQueue;
126
127  /// HazardRec - The hazard recognizer to use.
128  ScheduleHazardRecognizer *HazardRec;
129
130  /// CurCycle - The current scheduler state corresponds to this cycle.
131  unsigned CurCycle;
132
133  /// MinAvailableCycle - Cycle of the soonest available instruction.
134  unsigned MinAvailableCycle;
135
136  /// IssueCount - Count instructions issued in this cycle
137  /// Currently valid only for bottom-up scheduling.
138  unsigned IssueCount;
139
140  /// LiveRegDefs - A set of physical registers and their definition
141  /// that are "live". These nodes must be scheduled before any other nodes that
142  /// modifies the registers can be scheduled.
143  unsigned NumLiveRegs;
144  std::vector<SUnit*> LiveRegDefs;
145  std::vector<SUnit*> LiveRegGens;
146
147  // Collect interferences between physical register use/defs.
148  // Each interference is an SUnit and set of physical registers.
149  SmallVector<SUnit*, 4> Interferences;
150  typedef DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMapT;
151  LRegsMapT LRegsMap;
152
153  /// Topo - A topological ordering for SUnits which permits fast IsReachable
154  /// and similar queries.
155  ScheduleDAGTopologicalSort Topo;
156
157  // Hack to keep track of the inverse of FindCallSeqStart without more crazy
158  // DAG crawling.
159  DenseMap<SUnit*, SUnit*> CallSeqEndForStart;
160
161public:
162  ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
163                    SchedulingPriorityQueue *availqueue,
164                    CodeGenOpt::Level OptLevel)
165    : ScheduleDAGSDNodes(mf),
166      NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
167      Topo(SUnits, nullptr) {
168
169    const TargetSubtargetInfo &STI = mf.getSubtarget();
170    if (DisableSchedCycles || !NeedLatency)
171      HazardRec = new ScheduleHazardRecognizer();
172    else
173      HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
174  }
175
176  ~ScheduleDAGRRList() {
177    delete HazardRec;
178    delete AvailableQueue;
179  }
180
181  void Schedule() override;
182
183  ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
184
185  /// IsReachable - Checks if SU is reachable from TargetSU.
186  bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
187    return Topo.IsReachable(SU, TargetSU);
188  }
189
190  /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will
191  /// create a cycle.
192  bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) {
193    return Topo.WillCreateCycle(SU, TargetSU);
194  }
195
196  /// AddPred - adds a predecessor edge to SUnit SU.
197  /// This returns true if this is a new predecessor.
198  /// Updates the topological ordering if required.
199  void AddPred(SUnit *SU, const SDep &D) {
200    Topo.AddPred(SU, D.getSUnit());
201    SU->addPred(D);
202  }
203
204  /// RemovePred - removes a predecessor edge from SUnit SU.
205  /// This returns true if an edge was removed.
206  /// Updates the topological ordering if required.
207  void RemovePred(SUnit *SU, const SDep &D) {
208    Topo.RemovePred(SU, D.getSUnit());
209    SU->removePred(D);
210  }
211
212private:
213  bool isReady(SUnit *SU) {
214    return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
215      AvailableQueue->isReady(SU);
216  }
217
218  void ReleasePred(SUnit *SU, const SDep *PredEdge);
219  void ReleasePredecessors(SUnit *SU);
220  void ReleasePending();
221  void AdvanceToCycle(unsigned NextCycle);
222  void AdvancePastStalls(SUnit *SU);
223  void EmitNode(SUnit *SU);
224  void ScheduleNodeBottomUp(SUnit*);
225  void CapturePred(SDep *PredEdge);
226  void UnscheduleNodeBottomUp(SUnit*);
227  void RestoreHazardCheckerBottomUp();
228  void BacktrackBottomUp(SUnit*, SUnit*);
229  SUnit *CopyAndMoveSuccessors(SUnit*);
230  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
231                                const TargetRegisterClass*,
232                                const TargetRegisterClass*,
233                                SmallVectorImpl<SUnit*>&);
234  bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl<unsigned>&);
235
236  void releaseInterferences(unsigned Reg = 0);
237
238  SUnit *PickNodeToScheduleBottomUp();
239  void ListScheduleBottomUp();
240
241  /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
242  /// Updates the topological ordering if required.
243  SUnit *CreateNewSUnit(SDNode *N) {
244    unsigned NumSUnits = SUnits.size();
245    SUnit *NewNode = newSUnit(N);
246    // Update the topological ordering.
247    if (NewNode->NodeNum >= NumSUnits)
248      Topo.InitDAGTopologicalSorting();
249    return NewNode;
250  }
251
252  /// CreateClone - Creates a new SUnit from an existing one.
253  /// Updates the topological ordering if required.
254  SUnit *CreateClone(SUnit *N) {
255    unsigned NumSUnits = SUnits.size();
256    SUnit *NewNode = Clone(N);
257    // Update the topological ordering.
258    if (NewNode->NodeNum >= NumSUnits)
259      Topo.InitDAGTopologicalSorting();
260    return NewNode;
261  }
262
263  /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't
264  /// need actual latency information but the hybrid scheduler does.
265  bool forceUnitLatencies() const override {
266    return !NeedLatency;
267  }
268};
269}  // end anonymous namespace
270
271/// GetCostForDef - Looks up the register class and cost for a given definition.
272/// Typically this just means looking up the representative register class,
273/// but for untyped values (MVT::Untyped) it means inspecting the node's
274/// opcode to determine what register class is being generated.
275static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
276                          const TargetLowering *TLI,
277                          const TargetInstrInfo *TII,
278                          const TargetRegisterInfo *TRI,
279                          unsigned &RegClass, unsigned &Cost,
280                          const MachineFunction &MF) {
281  MVT VT = RegDefPos.GetValue();
282
283  // Special handling for untyped values.  These values can only come from
284  // the expansion of custom DAG-to-DAG patterns.
285  if (VT == MVT::Untyped) {
286    const SDNode *Node = RegDefPos.GetNode();
287
288    // Special handling for CopyFromReg of untyped values.
289    if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) {
290      unsigned Reg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
291      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg);
292      RegClass = RC->getID();
293      Cost = 1;
294      return;
295    }
296
297    unsigned Opcode = Node->getMachineOpcode();
298    if (Opcode == TargetOpcode::REG_SEQUENCE) {
299      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
300      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
301      RegClass = RC->getID();
302      Cost = 1;
303      return;
304    }
305
306    unsigned Idx = RegDefPos.GetIdx();
307    const MCInstrDesc Desc = TII->get(Opcode);
308    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI, MF);
309    RegClass = RC->getID();
310    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
311    // better way to determine it.
312    Cost = 1;
313  } else {
314    RegClass = TLI->getRepRegClassFor(VT)->getID();
315    Cost = TLI->getRepRegClassCostFor(VT);
316  }
317}
318
319/// Schedule - Schedule the DAG using list scheduling.
320void ScheduleDAGRRList::Schedule() {
321  DEBUG(dbgs()
322        << "********** List Scheduling BB#" << BB->getNumber()
323        << " '" << BB->getName() << "' **********\n");
324
325  CurCycle = 0;
326  IssueCount = 0;
327  MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
328  NumLiveRegs = 0;
329  // Allocate slots for each physical register, plus one for a special register
330  // to track the virtual resource of a calling sequence.
331  LiveRegDefs.resize(TRI->getNumRegs() + 1, nullptr);
332  LiveRegGens.resize(TRI->getNumRegs() + 1, nullptr);
333  CallSeqEndForStart.clear();
334  assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences");
335
336  // Build the scheduling graph.
337  BuildSchedGraph(nullptr);
338
339  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
340          SUnits[su].dumpAll(this));
341  Topo.InitDAGTopologicalSorting();
342
343  AvailableQueue->initNodes(SUnits);
344
345  HazardRec->Reset();
346
347  // Execute the actual scheduling loop.
348  ListScheduleBottomUp();
349
350  AvailableQueue->releaseState();
351
352  DEBUG({
353      dbgs() << "*** Final schedule ***\n";
354      dumpSchedule();
355      dbgs() << '\n';
356    });
357}
358
359//===----------------------------------------------------------------------===//
360//  Bottom-Up Scheduling
361//===----------------------------------------------------------------------===//
362
363/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to
364/// the AvailableQueue if the count reaches zero. Also update its cycle bound.
365void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
366  SUnit *PredSU = PredEdge->getSUnit();
367
368#ifndef NDEBUG
369  if (PredSU->NumSuccsLeft == 0) {
370    dbgs() << "*** Scheduling failed! ***\n";
371    PredSU->dump(this);
372    dbgs() << " has been released too many times!\n";
373    llvm_unreachable(nullptr);
374  }
375#endif
376  --PredSU->NumSuccsLeft;
377
378  if (!forceUnitLatencies()) {
379    // Updating predecessor's height. This is now the cycle when the
380    // predecessor can be scheduled without causing a pipeline stall.
381    PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency());
382  }
383
384  // If all the node's successors are scheduled, this node is ready
385  // to be scheduled. Ignore the special EntrySU node.
386  if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
387    PredSU->isAvailable = true;
388
389    unsigned Height = PredSU->getHeight();
390    if (Height < MinAvailableCycle)
391      MinAvailableCycle = Height;
392
393    if (isReady(PredSU)) {
394      AvailableQueue->push(PredSU);
395    }
396    // CapturePred and others may have left the node in the pending queue, avoid
397    // adding it twice.
398    else if (!PredSU->isPending) {
399      PredSU->isPending = true;
400      PendingQueue.push_back(PredSU);
401    }
402  }
403}
404
405/// IsChainDependent - Test if Outer is reachable from Inner through
406/// chain dependencies.
407static bool IsChainDependent(SDNode *Outer, SDNode *Inner,
408                             unsigned NestLevel,
409                             const TargetInstrInfo *TII) {
410  SDNode *N = Outer;
411  for (;;) {
412    if (N == Inner)
413      return true;
414    // For a TokenFactor, examine each operand. There may be multiple ways
415    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
416    // most nesting in order to ensure that we find the corresponding match.
417    if (N->getOpcode() == ISD::TokenFactor) {
418      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
419        if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII))
420          return true;
421      return false;
422    }
423    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
424    if (N->isMachineOpcode()) {
425      if (N->getMachineOpcode() ==
426          (unsigned)TII->getCallFrameDestroyOpcode()) {
427        ++NestLevel;
428      } else if (N->getMachineOpcode() ==
429                 (unsigned)TII->getCallFrameSetupOpcode()) {
430        if (NestLevel == 0)
431          return false;
432        --NestLevel;
433      }
434    }
435    // Otherwise, find the chain and continue climbing.
436    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
437      if (N->getOperand(i).getValueType() == MVT::Other) {
438        N = N->getOperand(i).getNode();
439        goto found_chain_operand;
440      }
441    return false;
442  found_chain_operand:;
443    if (N->getOpcode() == ISD::EntryToken)
444      return false;
445  }
446}
447
448/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate
449/// the corresponding (lowered) CALLSEQ_BEGIN node.
450///
451/// NestLevel and MaxNested are used in recursion to indcate the current level
452/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum
453/// level seen so far.
454///
455/// TODO: It would be better to give CALLSEQ_END an explicit operand to point
456/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it.
457static SDNode *
458FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest,
459                 const TargetInstrInfo *TII) {
460  for (;;) {
461    // For a TokenFactor, examine each operand. There may be multiple ways
462    // to get to the CALLSEQ_BEGIN, but we need to find the path with the
463    // most nesting in order to ensure that we find the corresponding match.
464    if (N->getOpcode() == ISD::TokenFactor) {
465      SDNode *Best = nullptr;
466      unsigned BestMaxNest = MaxNest;
467      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
468        unsigned MyNestLevel = NestLevel;
469        unsigned MyMaxNest = MaxNest;
470        if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(),
471                                           MyNestLevel, MyMaxNest, TII))
472          if (!Best || (MyMaxNest > BestMaxNest)) {
473            Best = New;
474            BestMaxNest = MyMaxNest;
475          }
476      }
477      assert(Best);
478      MaxNest = BestMaxNest;
479      return Best;
480    }
481    // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END.
482    if (N->isMachineOpcode()) {
483      if (N->getMachineOpcode() ==
484          (unsigned)TII->getCallFrameDestroyOpcode()) {
485        ++NestLevel;
486        MaxNest = std::max(MaxNest, NestLevel);
487      } else if (N->getMachineOpcode() ==
488                 (unsigned)TII->getCallFrameSetupOpcode()) {
489        assert(NestLevel != 0);
490        --NestLevel;
491        if (NestLevel == 0)
492          return N;
493      }
494    }
495    // Otherwise, find the chain and continue climbing.
496    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
497      if (N->getOperand(i).getValueType() == MVT::Other) {
498        N = N->getOperand(i).getNode();
499        goto found_chain_operand;
500      }
501    return nullptr;
502  found_chain_operand:;
503    if (N->getOpcode() == ISD::EntryToken)
504      return nullptr;
505  }
506}
507
508/// Call ReleasePred for each predecessor, then update register live def/gen.
509/// Always update LiveRegDefs for a register dependence even if the current SU
510/// also defines the register. This effectively create one large live range
511/// across a sequence of two-address node. This is important because the
512/// entire chain must be scheduled together. Example:
513///
514/// flags = (3) add
515/// flags = (2) addc flags
516/// flags = (1) addc flags
517///
518/// results in
519///
520/// LiveRegDefs[flags] = 3
521/// LiveRegGens[flags] = 1
522///
523/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
524/// interference on flags.
525void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
526  // Bottom up: release predecessors
527  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
528       I != E; ++I) {
529    ReleasePred(SU, &*I);
530    if (I->isAssignedRegDep()) {
531      // This is a physical register dependency and it's impossible or
532      // expensive to copy the register. Make sure nothing that can
533      // clobber the register is scheduled between the predecessor and
534      // this node.
535      SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
536      assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
537             "interference on register dependence");
538      LiveRegDefs[I->getReg()] = I->getSUnit();
539      if (!LiveRegGens[I->getReg()]) {
540        ++NumLiveRegs;
541        LiveRegGens[I->getReg()] = SU;
542      }
543    }
544  }
545
546  // If we're scheduling a lowered CALLSEQ_END, find the corresponding
547  // CALLSEQ_BEGIN. Inject an artificial physical register dependence between
548  // these nodes, to prevent other calls from being interscheduled with them.
549  unsigned CallResource = TRI->getNumRegs();
550  if (!LiveRegDefs[CallResource])
551    for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode())
552      if (Node->isMachineOpcode() &&
553          Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
554        unsigned NestLevel = 0;
555        unsigned MaxNest = 0;
556        SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII);
557
558        SUnit *Def = &SUnits[N->getNodeId()];
559        CallSeqEndForStart[Def] = SU;
560
561        ++NumLiveRegs;
562        LiveRegDefs[CallResource] = Def;
563        LiveRegGens[CallResource] = SU;
564        break;
565      }
566}
567
568/// Check to see if any of the pending instructions are ready to issue.  If
569/// so, add them to the available queue.
570void ScheduleDAGRRList::ReleasePending() {
571  if (DisableSchedCycles) {
572    assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
573    return;
574  }
575
576  // If the available queue is empty, it is safe to reset MinAvailableCycle.
577  if (AvailableQueue->empty())
578    MinAvailableCycle = UINT_MAX;
579
580  // Check to see if any of the pending instructions are ready to issue.  If
581  // so, add them to the available queue.
582  for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
583    unsigned ReadyCycle = PendingQueue[i]->getHeight();
584    if (ReadyCycle < MinAvailableCycle)
585      MinAvailableCycle = ReadyCycle;
586
587    if (PendingQueue[i]->isAvailable) {
588      if (!isReady(PendingQueue[i]))
589          continue;
590      AvailableQueue->push(PendingQueue[i]);
591    }
592    PendingQueue[i]->isPending = false;
593    PendingQueue[i] = PendingQueue.back();
594    PendingQueue.pop_back();
595    --i; --e;
596  }
597}
598
599/// Move the scheduler state forward by the specified number of Cycles.
600void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
601  if (NextCycle <= CurCycle)
602    return;
603
604  IssueCount = 0;
605  AvailableQueue->setCurCycle(NextCycle);
606  if (!HazardRec->isEnabled()) {
607    // Bypass lots of virtual calls in case of long latency.
608    CurCycle = NextCycle;
609  }
610  else {
611    for (; CurCycle != NextCycle; ++CurCycle) {
612      HazardRec->RecedeCycle();
613    }
614  }
615  // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
616  // available Q to release pending nodes at least once before popping.
617  ReleasePending();
618}
619
620/// Move the scheduler state forward until the specified node's dependents are
621/// ready and can be scheduled with no resource conflicts.
622void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
623  if (DisableSchedCycles)
624    return;
625
626  // FIXME: Nodes such as CopyFromReg probably should not advance the current
627  // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
628  // has predecessors the cycle will be advanced when they are scheduled.
629  // But given the crude nature of modeling latency though such nodes, we
630  // currently need to treat these nodes like real instructions.
631  // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
632
633  unsigned ReadyCycle = SU->getHeight();
634
635  // Bump CurCycle to account for latency. We assume the latency of other
636  // available instructions may be hidden by the stall (not a full pipe stall).
637  // This updates the hazard recognizer's cycle before reserving resources for
638  // this instruction.
639  AdvanceToCycle(ReadyCycle);
640
641  // Calls are scheduled in their preceding cycle, so don't conflict with
642  // hazards from instructions after the call. EmitNode will reset the
643  // scoreboard state before emitting the call.
644  if (SU->isCall)
645    return;
646
647  // FIXME: For resource conflicts in very long non-pipelined stages, we
648  // should probably skip ahead here to avoid useless scoreboard checks.
649  int Stalls = 0;
650  while (true) {
651    ScheduleHazardRecognizer::HazardType HT =
652      HazardRec->getHazardType(SU, -Stalls);
653
654    if (HT == ScheduleHazardRecognizer::NoHazard)
655      break;
656
657    ++Stalls;
658  }
659  AdvanceToCycle(CurCycle + Stalls);
660}
661
662/// Record this SUnit in the HazardRecognizer.
663/// Does not update CurCycle.
664void ScheduleDAGRRList::EmitNode(SUnit *SU) {
665  if (!HazardRec->isEnabled())
666    return;
667
668  // Check for phys reg copy.
669  if (!SU->getNode())
670    return;
671
672  switch (SU->getNode()->getOpcode()) {
673  default:
674    assert(SU->getNode()->isMachineOpcode() &&
675           "This target-independent node should not be scheduled.");
676    break;
677  case ISD::MERGE_VALUES:
678  case ISD::TokenFactor:
679  case ISD::LIFETIME_START:
680  case ISD::LIFETIME_END:
681  case ISD::CopyToReg:
682  case ISD::CopyFromReg:
683  case ISD::EH_LABEL:
684    // Noops don't affect the scoreboard state. Copies are likely to be
685    // removed.
686    return;
687  case ISD::INLINEASM:
688    // For inline asm, clear the pipeline state.
689    HazardRec->Reset();
690    return;
691  }
692  if (SU->isCall) {
693    // Calls are scheduled with their preceding instructions. For bottom-up
694    // scheduling, clear the pipeline state before emitting.
695    HazardRec->Reset();
696  }
697
698  HazardRec->EmitInstruction(SU);
699}
700
701static void resetVRegCycle(SUnit *SU);
702
703/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
704/// count of its predecessors. If a predecessor pending count is zero, add it to
705/// the Available queue.
706void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
707  DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
708  DEBUG(SU->dump(this));
709
710#ifndef NDEBUG
711  if (CurCycle < SU->getHeight())
712    DEBUG(dbgs() << "   Height [" << SU->getHeight()
713          << "] pipeline stall!\n");
714#endif
715
716  // FIXME: Do not modify node height. It may interfere with
717  // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
718  // node its ready cycle can aid heuristics, and after scheduling it can
719  // indicate the scheduled cycle.
720  SU->setHeightToAtLeast(CurCycle);
721
722  // Reserve resources for the scheduled instruction.
723  EmitNode(SU);
724
725  Sequence.push_back(SU);
726
727  AvailableQueue->scheduledNode(SU);
728
729  // If HazardRec is disabled, and each inst counts as one cycle, then
730  // advance CurCycle before ReleasePredecessors to avoid useless pushes to
731  // PendingQueue for schedulers that implement HasReadyFilter.
732  if (!HazardRec->isEnabled() && AvgIPC < 2)
733    AdvanceToCycle(CurCycle + 1);
734
735  // Update liveness of predecessors before successors to avoid treating a
736  // two-address node as a live range def.
737  ReleasePredecessors(SU);
738
739  // Release all the implicit physical register defs that are live.
740  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
741       I != E; ++I) {
742    // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
743    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
744      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
745      --NumLiveRegs;
746      LiveRegDefs[I->getReg()] = nullptr;
747      LiveRegGens[I->getReg()] = nullptr;
748      releaseInterferences(I->getReg());
749    }
750  }
751  // Release the special call resource dependence, if this is the beginning
752  // of a call.
753  unsigned CallResource = TRI->getNumRegs();
754  if (LiveRegDefs[CallResource] == SU)
755    for (const SDNode *SUNode = SU->getNode(); SUNode;
756         SUNode = SUNode->getGluedNode()) {
757      if (SUNode->isMachineOpcode() &&
758          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
759        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
760        --NumLiveRegs;
761        LiveRegDefs[CallResource] = nullptr;
762        LiveRegGens[CallResource] = nullptr;
763        releaseInterferences(CallResource);
764      }
765    }
766
767  resetVRegCycle(SU);
768
769  SU->isScheduled = true;
770
771  // Conditions under which the scheduler should eagerly advance the cycle:
772  // (1) No available instructions
773  // (2) All pipelines full, so available instructions must have hazards.
774  //
775  // If HazardRec is disabled, the cycle was pre-advanced before calling
776  // ReleasePredecessors. In that case, IssueCount should remain 0.
777  //
778  // Check AvailableQueue after ReleasePredecessors in case of zero latency.
779  if (HazardRec->isEnabled() || AvgIPC > 1) {
780    if (SU->getNode() && SU->getNode()->isMachineOpcode())
781      ++IssueCount;
782    if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
783        || (!HazardRec->isEnabled() && IssueCount == AvgIPC))
784      AdvanceToCycle(CurCycle + 1);
785  }
786}
787
788/// CapturePred - This does the opposite of ReleasePred. Since SU is being
789/// unscheduled, incrcease the succ left count of its predecessors. Remove
790/// them from AvailableQueue if necessary.
791void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
792  SUnit *PredSU = PredEdge->getSUnit();
793  if (PredSU->isAvailable) {
794    PredSU->isAvailable = false;
795    if (!PredSU->isPending)
796      AvailableQueue->remove(PredSU);
797  }
798
799  assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!");
800  ++PredSU->NumSuccsLeft;
801}
802
803/// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and
804/// its predecessor states to reflect the change.
805void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
806  DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
807  DEBUG(SU->dump(this));
808
809  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
810       I != E; ++I) {
811    CapturePred(&*I);
812    if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
813      assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
814      assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
815             "Physical register dependency violated?");
816      --NumLiveRegs;
817      LiveRegDefs[I->getReg()] = nullptr;
818      LiveRegGens[I->getReg()] = nullptr;
819      releaseInterferences(I->getReg());
820    }
821  }
822
823  // Reclaim the special call resource dependence, if this is the beginning
824  // of a call.
825  unsigned CallResource = TRI->getNumRegs();
826  for (const SDNode *SUNode = SU->getNode(); SUNode;
827       SUNode = SUNode->getGluedNode()) {
828    if (SUNode->isMachineOpcode() &&
829        SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) {
830      ++NumLiveRegs;
831      LiveRegDefs[CallResource] = SU;
832      LiveRegGens[CallResource] = CallSeqEndForStart[SU];
833    }
834  }
835
836  // Release the special call resource dependence, if this is the end
837  // of a call.
838  if (LiveRegGens[CallResource] == SU)
839    for (const SDNode *SUNode = SU->getNode(); SUNode;
840         SUNode = SUNode->getGluedNode()) {
841      if (SUNode->isMachineOpcode() &&
842          SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
843        assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
844        --NumLiveRegs;
845        LiveRegDefs[CallResource] = nullptr;
846        LiveRegGens[CallResource] = nullptr;
847        releaseInterferences(CallResource);
848      }
849    }
850
851  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
852       I != E; ++I) {
853    if (I->isAssignedRegDep()) {
854      if (!LiveRegDefs[I->getReg()])
855        ++NumLiveRegs;
856      // This becomes the nearest def. Note that an earlier def may still be
857      // pending if this is a two-address node.
858      LiveRegDefs[I->getReg()] = SU;
859      if (LiveRegGens[I->getReg()] == nullptr ||
860          I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
861        LiveRegGens[I->getReg()] = I->getSUnit();
862    }
863  }
864  if (SU->getHeight() < MinAvailableCycle)
865    MinAvailableCycle = SU->getHeight();
866
867  SU->setHeightDirty();
868  SU->isScheduled = false;
869  SU->isAvailable = true;
870  if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
871    // Don't make available until backtracking is complete.
872    SU->isPending = true;
873    PendingQueue.push_back(SU);
874  }
875  else {
876    AvailableQueue->push(SU);
877  }
878  AvailableQueue->unscheduledNode(SU);
879}
880
881/// After backtracking, the hazard checker needs to be restored to a state
882/// corresponding the current cycle.
883void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
884  HazardRec->Reset();
885
886  unsigned LookAhead = std::min((unsigned)Sequence.size(),
887                                HazardRec->getMaxLookAhead());
888  if (LookAhead == 0)
889    return;
890
891  std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
892  unsigned HazardCycle = (*I)->getHeight();
893  for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
894    SUnit *SU = *I;
895    for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
896      HazardRec->RecedeCycle();
897    }
898    EmitNode(SU);
899  }
900}
901
902/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
903/// BTCycle in order to schedule a specific node.
904void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
905  SUnit *OldSU = Sequence.back();
906  while (true) {
907    Sequence.pop_back();
908    // FIXME: use ready cycle instead of height
909    CurCycle = OldSU->getHeight();
910    UnscheduleNodeBottomUp(OldSU);
911    AvailableQueue->setCurCycle(CurCycle);
912    if (OldSU == BtSU)
913      break;
914    OldSU = Sequence.back();
915  }
916
917  assert(!SU->isSucc(OldSU) && "Something is wrong!");
918
919  RestoreHazardCheckerBottomUp();
920
921  ReleasePending();
922
923  ++NumBacktracks;
924}
925
926static bool isOperandOf(const SUnit *SU, SDNode *N) {
927  for (const SDNode *SUNode = SU->getNode(); SUNode;
928       SUNode = SUNode->getGluedNode()) {
929    if (SUNode->isOperandOf(N))
930      return true;
931  }
932  return false;
933}
934
935/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
936/// successors to the newly created node.
937SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
938  SDNode *N = SU->getNode();
939  if (!N)
940    return nullptr;
941
942  if (SU->getNode()->getGluedNode())
943    return nullptr;
944
945  SUnit *NewSU;
946  bool TryUnfold = false;
947  for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
948    MVT VT = N->getSimpleValueType(i);
949    if (VT == MVT::Glue)
950      return nullptr;
951    else if (VT == MVT::Other)
952      TryUnfold = true;
953  }
954  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
955    const SDValue &Op = N->getOperand(i);
956    MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo());
957    if (VT == MVT::Glue)
958      return nullptr;
959  }
960
961  if (TryUnfold) {
962    SmallVector<SDNode*, 2> NewNodes;
963    if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes))
964      return nullptr;
965
966    // unfolding an x86 DEC64m operation results in store, dec, load which
967    // can't be handled here so quit
968    if (NewNodes.size() == 3)
969      return nullptr;
970
971    DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n");
972    assert(NewNodes.size() == 2 && "Expected a load folding node!");
973
974    N = NewNodes[1];
975    SDNode *LoadNode = NewNodes[0];
976    unsigned NumVals = N->getNumValues();
977    unsigned OldNumVals = SU->getNode()->getNumValues();
978    for (unsigned i = 0; i != NumVals; ++i)
979      DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i));
980    DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1),
981                                   SDValue(LoadNode, 1));
982
983    // LoadNode may already exist. This can happen when there is another
984    // load from the same location and producing the same type of value
985    // but it has different alignment or volatileness.
986    bool isNewLoad = true;
987    SUnit *LoadSU;
988    if (LoadNode->getNodeId() != -1) {
989      LoadSU = &SUnits[LoadNode->getNodeId()];
990      isNewLoad = false;
991    } else {
992      LoadSU = CreateNewSUnit(LoadNode);
993      LoadNode->setNodeId(LoadSU->NodeNum);
994
995      InitNumRegDefsLeft(LoadSU);
996      computeLatency(LoadSU);
997    }
998
999    SUnit *NewSU = CreateNewSUnit(N);
1000    assert(N->getNodeId() == -1 && "Node already inserted!");
1001    N->setNodeId(NewSU->NodeNum);
1002
1003    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1004    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
1005      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
1006        NewSU->isTwoAddress = true;
1007        break;
1008      }
1009    }
1010    if (MCID.isCommutable())
1011      NewSU->isCommutable = true;
1012
1013    InitNumRegDefsLeft(NewSU);
1014    computeLatency(NewSU);
1015
1016    // Record all the edges to and from the old SU, by category.
1017    SmallVector<SDep, 4> ChainPreds;
1018    SmallVector<SDep, 4> ChainSuccs;
1019    SmallVector<SDep, 4> LoadPreds;
1020    SmallVector<SDep, 4> NodePreds;
1021    SmallVector<SDep, 4> NodeSuccs;
1022    for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1023         I != E; ++I) {
1024      if (I->isCtrl())
1025        ChainPreds.push_back(*I);
1026      else if (isOperandOf(I->getSUnit(), LoadNode))
1027        LoadPreds.push_back(*I);
1028      else
1029        NodePreds.push_back(*I);
1030    }
1031    for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1032         I != E; ++I) {
1033      if (I->isCtrl())
1034        ChainSuccs.push_back(*I);
1035      else
1036        NodeSuccs.push_back(*I);
1037    }
1038
1039    // Now assign edges to the newly-created nodes.
1040    for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) {
1041      const SDep &Pred = ChainPreds[i];
1042      RemovePred(SU, Pred);
1043      if (isNewLoad)
1044        AddPred(LoadSU, Pred);
1045    }
1046    for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) {
1047      const SDep &Pred = LoadPreds[i];
1048      RemovePred(SU, Pred);
1049      if (isNewLoad)
1050        AddPred(LoadSU, Pred);
1051    }
1052    for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) {
1053      const SDep &Pred = NodePreds[i];
1054      RemovePred(SU, Pred);
1055      AddPred(NewSU, Pred);
1056    }
1057    for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) {
1058      SDep D = NodeSuccs[i];
1059      SUnit *SuccDep = D.getSUnit();
1060      D.setSUnit(SU);
1061      RemovePred(SuccDep, D);
1062      D.setSUnit(NewSU);
1063      AddPred(SuccDep, D);
1064      // Balance register pressure.
1065      if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
1066          && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
1067        --NewSU->NumRegDefsLeft;
1068    }
1069    for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
1070      SDep D = ChainSuccs[i];
1071      SUnit *SuccDep = D.getSUnit();
1072      D.setSUnit(SU);
1073      RemovePred(SuccDep, D);
1074      if (isNewLoad) {
1075        D.setSUnit(LoadSU);
1076        AddPred(SuccDep, D);
1077      }
1078    }
1079
1080    // Add a data dependency to reflect that NewSU reads the value defined
1081    // by LoadSU.
1082    SDep D(LoadSU, SDep::Data, 0);
1083    D.setLatency(LoadSU->Latency);
1084    AddPred(NewSU, D);
1085
1086    if (isNewLoad)
1087      AvailableQueue->addNode(LoadSU);
1088    AvailableQueue->addNode(NewSU);
1089
1090    ++NumUnfolds;
1091
1092    if (NewSU->NumSuccsLeft == 0) {
1093      NewSU->isAvailable = true;
1094      return NewSU;
1095    }
1096    SU = NewSU;
1097  }
1098
1099  DEBUG(dbgs() << "    Duplicating SU #" << SU->NodeNum << "\n");
1100  NewSU = CreateClone(SU);
1101
1102  // New SUnit has the exact same predecessors.
1103  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1104       I != E; ++I)
1105    if (!I->isArtificial())
1106      AddPred(NewSU, *I);
1107
1108  // Only copy scheduled successors. Cut them from old node's successor
1109  // list and move them over.
1110  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1111  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1112       I != E; ++I) {
1113    if (I->isArtificial())
1114      continue;
1115    SUnit *SuccSU = I->getSUnit();
1116    if (SuccSU->isScheduled) {
1117      SDep D = *I;
1118      D.setSUnit(NewSU);
1119      AddPred(SuccSU, D);
1120      D.setSUnit(SU);
1121      DelDeps.push_back(std::make_pair(SuccSU, D));
1122    }
1123  }
1124  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1125    RemovePred(DelDeps[i].first, DelDeps[i].second);
1126
1127  AvailableQueue->updateNode(SU);
1128  AvailableQueue->addNode(NewSU);
1129
1130  ++NumDups;
1131  return NewSU;
1132}
1133
1134/// InsertCopiesAndMoveSuccs - Insert register copies and move all
1135/// scheduled successors of the given SUnit to the last copy.
1136void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
1137                                              const TargetRegisterClass *DestRC,
1138                                              const TargetRegisterClass *SrcRC,
1139                                              SmallVectorImpl<SUnit*> &Copies) {
1140  SUnit *CopyFromSU = CreateNewSUnit(nullptr);
1141  CopyFromSU->CopySrcRC = SrcRC;
1142  CopyFromSU->CopyDstRC = DestRC;
1143
1144  SUnit *CopyToSU = CreateNewSUnit(nullptr);
1145  CopyToSU->CopySrcRC = DestRC;
1146  CopyToSU->CopyDstRC = SrcRC;
1147
1148  // Only copy scheduled successors. Cut them from old node's successor
1149  // list and move them over.
1150  SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
1151  for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
1152       I != E; ++I) {
1153    if (I->isArtificial())
1154      continue;
1155    SUnit *SuccSU = I->getSUnit();
1156    if (SuccSU->isScheduled) {
1157      SDep D = *I;
1158      D.setSUnit(CopyToSU);
1159      AddPred(SuccSU, D);
1160      DelDeps.push_back(std::make_pair(SuccSU, *I));
1161    }
1162    else {
1163      // Avoid scheduling the def-side copy before other successors. Otherwise
1164      // we could introduce another physreg interference on the copy and
1165      // continue inserting copies indefinitely.
1166      AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
1167    }
1168  }
1169  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
1170    RemovePred(DelDeps[i].first, DelDeps[i].second);
1171
1172  SDep FromDep(SU, SDep::Data, Reg);
1173  FromDep.setLatency(SU->Latency);
1174  AddPred(CopyFromSU, FromDep);
1175  SDep ToDep(CopyFromSU, SDep::Data, 0);
1176  ToDep.setLatency(CopyFromSU->Latency);
1177  AddPred(CopyToSU, ToDep);
1178
1179  AvailableQueue->updateNode(SU);
1180  AvailableQueue->addNode(CopyFromSU);
1181  AvailableQueue->addNode(CopyToSU);
1182  Copies.push_back(CopyFromSU);
1183  Copies.push_back(CopyToSU);
1184
1185  ++NumPRCopies;
1186}
1187
1188/// getPhysicalRegisterVT - Returns the ValueType of the physical register
1189/// definition of the specified node.
1190/// FIXME: Move to SelectionDAG?
1191static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
1192                                 const TargetInstrInfo *TII) {
1193  unsigned NumRes;
1194  if (N->getOpcode() == ISD::CopyFromReg) {
1195    // CopyFromReg has: "chain, Val, glue" so operand 1 gives the type.
1196    NumRes = 1;
1197  } else {
1198    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1199    assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
1200    NumRes = MCID.getNumDefs();
1201    for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
1202      if (Reg == *ImpDef)
1203        break;
1204      ++NumRes;
1205    }
1206  }
1207  return N->getSimpleValueType(NumRes);
1208}
1209
1210/// CheckForLiveRegDef - Return true and update live register vector if the
1211/// specified register def of the specified SUnit clobbers any "live" registers.
1212static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
1213                               std::vector<SUnit*> &LiveRegDefs,
1214                               SmallSet<unsigned, 4> &RegAdded,
1215                               SmallVectorImpl<unsigned> &LRegs,
1216                               const TargetRegisterInfo *TRI) {
1217  for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) {
1218
1219    // Check if Ref is live.
1220    if (!LiveRegDefs[*AliasI]) continue;
1221
1222    // Allow multiple uses of the same def.
1223    if (LiveRegDefs[*AliasI] == SU) continue;
1224
1225    // Add Reg to the set of interfering live regs.
1226    if (RegAdded.insert(*AliasI).second) {
1227      LRegs.push_back(*AliasI);
1228    }
1229  }
1230}
1231
1232/// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered
1233/// by RegMask, and add them to LRegs.
1234static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask,
1235                                     std::vector<SUnit*> &LiveRegDefs,
1236                                     SmallSet<unsigned, 4> &RegAdded,
1237                                     SmallVectorImpl<unsigned> &LRegs) {
1238  // Look at all live registers. Skip Reg0 and the special CallResource.
1239  for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) {
1240    if (!LiveRegDefs[i]) continue;
1241    if (LiveRegDefs[i] == SU) continue;
1242    if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue;
1243    if (RegAdded.insert(i).second)
1244      LRegs.push_back(i);
1245  }
1246}
1247
1248/// getNodeRegMask - Returns the register mask attached to an SDNode, if any.
1249static const uint32_t *getNodeRegMask(const SDNode *N) {
1250  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1251    if (const RegisterMaskSDNode *Op =
1252        dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode()))
1253      return Op->getRegMask();
1254  return nullptr;
1255}
1256
1257/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
1258/// scheduling of the given node to satisfy live physical register dependencies.
1259/// If the specific node is the last one that's available to schedule, do
1260/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
1261bool ScheduleDAGRRList::
1262DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
1263  if (NumLiveRegs == 0)
1264    return false;
1265
1266  SmallSet<unsigned, 4> RegAdded;
1267  // If this node would clobber any "live" register, then it's not ready.
1268  //
1269  // If SU is the currently live definition of the same register that it uses,
1270  // then we are free to schedule it.
1271  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1272       I != E; ++I) {
1273    if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
1274      CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
1275                         RegAdded, LRegs, TRI);
1276  }
1277
1278  for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
1279    if (Node->getOpcode() == ISD::INLINEASM) {
1280      // Inline asm can clobber physical defs.
1281      unsigned NumOps = Node->getNumOperands();
1282      if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
1283        --NumOps;  // Ignore the glue operand.
1284
1285      for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
1286        unsigned Flags =
1287          cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue();
1288        unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
1289
1290        ++i; // Skip the ID value.
1291        if (InlineAsm::isRegDefKind(Flags) ||
1292            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
1293            InlineAsm::isClobberKind(Flags)) {
1294          // Check for def of register or earlyclobber register.
1295          for (; NumVals; --NumVals, ++i) {
1296            unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
1297            if (TargetRegisterInfo::isPhysicalRegister(Reg))
1298              CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1299          }
1300        } else
1301          i += NumVals;
1302      }
1303      continue;
1304    }
1305
1306    if (!Node->isMachineOpcode())
1307      continue;
1308    // If we're in the middle of scheduling a call, don't begin scheduling
1309    // another call. Also, don't allow any physical registers to be live across
1310    // the call.
1311    if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) {
1312      // Check the special calling-sequence resource.
1313      unsigned CallResource = TRI->getNumRegs();
1314      if (LiveRegDefs[CallResource]) {
1315        SDNode *Gen = LiveRegGens[CallResource]->getNode();
1316        while (SDNode *Glued = Gen->getGluedNode())
1317          Gen = Glued;
1318        if (!IsChainDependent(Gen, Node, 0, TII) &&
1319            RegAdded.insert(CallResource).second)
1320          LRegs.push_back(CallResource);
1321      }
1322    }
1323    if (const uint32_t *RegMask = getNodeRegMask(Node))
1324      CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs);
1325
1326    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
1327    if (!MCID.ImplicitDefs)
1328      continue;
1329    for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg)
1330      CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
1331  }
1332
1333  return !LRegs.empty();
1334}
1335
1336void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
1337  // Add the nodes that aren't ready back onto the available list.
1338  for (unsigned i = Interferences.size(); i > 0; --i) {
1339    SUnit *SU = Interferences[i-1];
1340    LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
1341    if (Reg) {
1342      SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
1343      if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
1344        continue;
1345    }
1346    SU->isPending = false;
1347    // The interfering node may no longer be available due to backtracking.
1348    // Furthermore, it may have been made available again, in which case it is
1349    // now already in the AvailableQueue.
1350    if (SU->isAvailable && !SU->NodeQueueId) {
1351      DEBUG(dbgs() << "    Repushing SU #" << SU->NodeNum << '\n');
1352      AvailableQueue->push(SU);
1353    }
1354    if (i < Interferences.size())
1355      Interferences[i-1] = Interferences.back();
1356    Interferences.pop_back();
1357    LRegsMap.erase(LRegsPos);
1358  }
1359}
1360
1361/// Return a node that can be scheduled in this cycle. Requirements:
1362/// (1) Ready: latency has been satisfied
1363/// (2) No Hazards: resources are available
1364/// (3) No Interferences: may unschedule to break register interferences.
1365SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
1366  SUnit *CurSU = AvailableQueue->empty() ? nullptr : AvailableQueue->pop();
1367  while (CurSU) {
1368    SmallVector<unsigned, 4> LRegs;
1369    if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
1370      break;
1371    DEBUG(dbgs() << "    Interfering reg " <<
1372          (LRegs[0] == TRI->getNumRegs() ? "CallResource"
1373           : TRI->getName(LRegs[0]))
1374           << " SU #" << CurSU->NodeNum << '\n');
1375    std::pair<LRegsMapT::iterator, bool> LRegsPair =
1376      LRegsMap.insert(std::make_pair(CurSU, LRegs));
1377    if (LRegsPair.second) {
1378      CurSU->isPending = true;  // This SU is not in AvailableQueue right now.
1379      Interferences.push_back(CurSU);
1380    }
1381    else {
1382      assert(CurSU->isPending && "Interferences are pending");
1383      // Update the interference with current live regs.
1384      LRegsPair.first->second = LRegs;
1385    }
1386    CurSU = AvailableQueue->pop();
1387  }
1388  if (CurSU)
1389    return CurSU;
1390
1391  // All candidates are delayed due to live physical reg dependencies.
1392  // Try backtracking, code duplication, or inserting cross class copies
1393  // to resolve it.
1394  for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
1395    SUnit *TrySU = Interferences[i];
1396    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1397
1398    // Try unscheduling up to the point where it's safe to schedule
1399    // this node.
1400    SUnit *BtSU = nullptr;
1401    unsigned LiveCycle = UINT_MAX;
1402    for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
1403      unsigned Reg = LRegs[j];
1404      if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
1405        BtSU = LiveRegGens[Reg];
1406        LiveCycle = BtSU->getHeight();
1407      }
1408    }
1409    if (!WillCreateCycle(TrySU, BtSU))  {
1410      // BacktrackBottomUp mutates Interferences!
1411      BacktrackBottomUp(TrySU, BtSU);
1412
1413      // Force the current node to be scheduled before the node that
1414      // requires the physical reg dep.
1415      if (BtSU->isAvailable) {
1416        BtSU->isAvailable = false;
1417        if (!BtSU->isPending)
1418          AvailableQueue->remove(BtSU);
1419      }
1420      DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU("
1421            << TrySU->NodeNum << ")\n");
1422      AddPred(TrySU, SDep(BtSU, SDep::Artificial));
1423
1424      // If one or more successors has been unscheduled, then the current
1425      // node is no longer available.
1426      if (!TrySU->isAvailable || !TrySU->NodeQueueId)
1427        CurSU = AvailableQueue->pop();
1428      else {
1429        // Available and in AvailableQueue
1430        AvailableQueue->remove(TrySU);
1431        CurSU = TrySU;
1432      }
1433      // Interferences has been mutated. We must break.
1434      break;
1435    }
1436  }
1437
1438  if (!CurSU) {
1439    // Can't backtrack. If it's too expensive to copy the value, then try
1440    // duplicate the nodes that produces these "too expensive to copy"
1441    // values to break the dependency. In case even that doesn't work,
1442    // insert cross class copies.
1443    // If it's not too expensive, i.e. cost != -1, issue copies.
1444    SUnit *TrySU = Interferences[0];
1445    SmallVectorImpl<unsigned> &LRegs = LRegsMap[TrySU];
1446    assert(LRegs.size() == 1 && "Can't handle this yet!");
1447    unsigned Reg = LRegs[0];
1448    SUnit *LRDef = LiveRegDefs[Reg];
1449    MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
1450    const TargetRegisterClass *RC =
1451      TRI->getMinimalPhysRegClass(Reg, VT);
1452    const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
1453
1454    // If cross copy register class is the same as RC, then it must be possible
1455    // copy the value directly. Do not try duplicate the def.
1456    // If cross copy register class is not the same as RC, then it's possible to
1457    // copy the value but it require cross register class copies and it is
1458    // expensive.
1459    // If cross copy register class is null, then it's not possible to copy
1460    // the value at all.
1461    SUnit *NewDef = nullptr;
1462    if (DestRC != RC) {
1463      NewDef = CopyAndMoveSuccessors(LRDef);
1464      if (!DestRC && !NewDef)
1465        report_fatal_error("Can't handle live physical register dependency!");
1466    }
1467    if (!NewDef) {
1468      // Issue copies, these can be expensive cross register class copies.
1469      SmallVector<SUnit*, 2> Copies;
1470      InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
1471      DEBUG(dbgs() << "    Adding an edge from SU #" << TrySU->NodeNum
1472            << " to SU #" << Copies.front()->NodeNum << "\n");
1473      AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
1474      NewDef = Copies.back();
1475    }
1476
1477    DEBUG(dbgs() << "    Adding an edge from SU #" << NewDef->NodeNum
1478          << " to SU #" << TrySU->NodeNum << "\n");
1479    LiveRegDefs[Reg] = NewDef;
1480    AddPred(NewDef, SDep(TrySU, SDep::Artificial));
1481    TrySU->isAvailable = false;
1482    CurSU = NewDef;
1483  }
1484  assert(CurSU && "Unable to resolve live physical register dependencies!");
1485  return CurSU;
1486}
1487
1488/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
1489/// schedulers.
1490void ScheduleDAGRRList::ListScheduleBottomUp() {
1491  // Release any predecessors of the special Exit node.
1492  ReleasePredecessors(&ExitSU);
1493
1494  // Add root to Available queue.
1495  if (!SUnits.empty()) {
1496    SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()];
1497    assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!");
1498    RootSU->isAvailable = true;
1499    AvailableQueue->push(RootSU);
1500  }
1501
1502  // While Available queue is not empty, grab the node with the highest
1503  // priority. If it is not ready put it back.  Schedule the node.
1504  Sequence.reserve(SUnits.size());
1505  while (!AvailableQueue->empty() || !Interferences.empty()) {
1506    DEBUG(dbgs() << "\nExamining Available:\n";
1507          AvailableQueue->dump(this));
1508
1509    // Pick the best node to schedule taking all constraints into
1510    // consideration.
1511    SUnit *SU = PickNodeToScheduleBottomUp();
1512
1513    AdvancePastStalls(SU);
1514
1515    ScheduleNodeBottomUp(SU);
1516
1517    while (AvailableQueue->empty() && !PendingQueue.empty()) {
1518      // Advance the cycle to free resources. Skip ahead to the next ready SU.
1519      assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
1520      AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
1521    }
1522  }
1523
1524  // Reverse the order if it is bottom up.
1525  std::reverse(Sequence.begin(), Sequence.end());
1526
1527#ifndef NDEBUG
1528  VerifyScheduledSequence(/*isBottomUp=*/true);
1529#endif
1530}
1531
1532//===----------------------------------------------------------------------===//
1533//                RegReductionPriorityQueue Definition
1534//===----------------------------------------------------------------------===//
1535//
1536// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
1537// to reduce register pressure.
1538//
1539namespace {
1540class RegReductionPQBase;
1541
1542struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
1543  bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
1544};
1545
1546#ifndef NDEBUG
1547template<class SF>
1548struct reverse_sort : public queue_sort {
1549  SF &SortFunc;
1550  reverse_sort(SF &sf) : SortFunc(sf) {}
1551
1552  bool operator()(SUnit* left, SUnit* right) const {
1553    // reverse left/right rather than simply !SortFunc(left, right)
1554    // to expose different paths in the comparison logic.
1555    return SortFunc(right, left);
1556  }
1557};
1558#endif // NDEBUG
1559
1560/// bu_ls_rr_sort - Priority function for bottom up register pressure
1561// reduction scheduler.
1562struct bu_ls_rr_sort : public queue_sort {
1563  enum {
1564    IsBottomUp = true,
1565    HasReadyFilter = false
1566  };
1567
1568  RegReductionPQBase *SPQ;
1569  bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
1570
1571  bool operator()(SUnit* left, SUnit* right) const;
1572};
1573
1574// src_ls_rr_sort - Priority function for source order scheduler.
1575struct src_ls_rr_sort : public queue_sort {
1576  enum {
1577    IsBottomUp = true,
1578    HasReadyFilter = false
1579  };
1580
1581  RegReductionPQBase *SPQ;
1582  src_ls_rr_sort(RegReductionPQBase *spq)
1583    : SPQ(spq) {}
1584
1585  bool operator()(SUnit* left, SUnit* right) const;
1586};
1587
1588// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
1589struct hybrid_ls_rr_sort : public queue_sort {
1590  enum {
1591    IsBottomUp = true,
1592    HasReadyFilter = false
1593  };
1594
1595  RegReductionPQBase *SPQ;
1596  hybrid_ls_rr_sort(RegReductionPQBase *spq)
1597    : SPQ(spq) {}
1598
1599  bool isReady(SUnit *SU, unsigned CurCycle) const;
1600
1601  bool operator()(SUnit* left, SUnit* right) const;
1602};
1603
1604// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
1605// scheduler.
1606struct ilp_ls_rr_sort : public queue_sort {
1607  enum {
1608    IsBottomUp = true,
1609    HasReadyFilter = false
1610  };
1611
1612  RegReductionPQBase *SPQ;
1613  ilp_ls_rr_sort(RegReductionPQBase *spq)
1614    : SPQ(spq) {}
1615
1616  bool isReady(SUnit *SU, unsigned CurCycle) const;
1617
1618  bool operator()(SUnit* left, SUnit* right) const;
1619};
1620
1621class RegReductionPQBase : public SchedulingPriorityQueue {
1622protected:
1623  std::vector<SUnit*> Queue;
1624  unsigned CurQueueId;
1625  bool TracksRegPressure;
1626  bool SrcOrder;
1627
1628  // SUnits - The SUnits for the current graph.
1629  std::vector<SUnit> *SUnits;
1630
1631  MachineFunction &MF;
1632  const TargetInstrInfo *TII;
1633  const TargetRegisterInfo *TRI;
1634  const TargetLowering *TLI;
1635  ScheduleDAGRRList *scheduleDAG;
1636
1637  // SethiUllmanNumbers - The SethiUllman number for each node.
1638  std::vector<unsigned> SethiUllmanNumbers;
1639
1640  /// RegPressure - Tracking current reg pressure per register class.
1641  ///
1642  std::vector<unsigned> RegPressure;
1643
1644  /// RegLimit - Tracking the number of allocatable registers per register
1645  /// class.
1646  std::vector<unsigned> RegLimit;
1647
1648public:
1649  RegReductionPQBase(MachineFunction &mf,
1650                     bool hasReadyFilter,
1651                     bool tracksrp,
1652                     bool srcorder,
1653                     const TargetInstrInfo *tii,
1654                     const TargetRegisterInfo *tri,
1655                     const TargetLowering *tli)
1656    : SchedulingPriorityQueue(hasReadyFilter),
1657      CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder),
1658      MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(nullptr) {
1659    if (TracksRegPressure) {
1660      unsigned NumRC = TRI->getNumRegClasses();
1661      RegLimit.resize(NumRC);
1662      RegPressure.resize(NumRC);
1663      std::fill(RegLimit.begin(), RegLimit.end(), 0);
1664      std::fill(RegPressure.begin(), RegPressure.end(), 0);
1665      for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1666             E = TRI->regclass_end(); I != E; ++I)
1667        RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF);
1668    }
1669  }
1670
1671  void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
1672    scheduleDAG = scheduleDag;
1673  }
1674
1675  ScheduleHazardRecognizer* getHazardRec() {
1676    return scheduleDAG->getHazardRec();
1677  }
1678
1679  void initNodes(std::vector<SUnit> &sunits) override;
1680
1681  void addNode(const SUnit *SU) override;
1682
1683  void updateNode(const SUnit *SU) override;
1684
1685  void releaseState() override {
1686    SUnits = nullptr;
1687    SethiUllmanNumbers.clear();
1688    std::fill(RegPressure.begin(), RegPressure.end(), 0);
1689  }
1690
1691  unsigned getNodePriority(const SUnit *SU) const;
1692
1693  unsigned getNodeOrdering(const SUnit *SU) const {
1694    if (!SU->getNode()) return 0;
1695
1696    return SU->getNode()->getIROrder();
1697  }
1698
1699  bool empty() const override { return Queue.empty(); }
1700
1701  void push(SUnit *U) override {
1702    assert(!U->NodeQueueId && "Node in the queue already");
1703    U->NodeQueueId = ++CurQueueId;
1704    Queue.push_back(U);
1705  }
1706
1707  void remove(SUnit *SU) override {
1708    assert(!Queue.empty() && "Queue is empty!");
1709    assert(SU->NodeQueueId != 0 && "Not in queue!");
1710    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
1711                                                 SU);
1712    if (I != std::prev(Queue.end()))
1713      std::swap(*I, Queue.back());
1714    Queue.pop_back();
1715    SU->NodeQueueId = 0;
1716  }
1717
1718  bool tracksRegPressure() const override { return TracksRegPressure; }
1719
1720  void dumpRegPressure() const;
1721
1722  bool HighRegPressure(const SUnit *SU) const;
1723
1724  bool MayReduceRegPressure(SUnit *SU) const;
1725
1726  int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const;
1727
1728  void scheduledNode(SUnit *SU) override;
1729
1730  void unscheduledNode(SUnit *SU) override;
1731
1732protected:
1733  bool canClobber(const SUnit *SU, const SUnit *Op);
1734  void AddPseudoTwoAddrDeps();
1735  void PrescheduleNodesWithMultipleUses();
1736  void CalculateSethiUllmanNumbers();
1737};
1738
1739template<class SF>
1740static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
1741  std::vector<SUnit *>::iterator Best = Q.begin();
1742  for (std::vector<SUnit *>::iterator I = std::next(Q.begin()),
1743         E = Q.end(); I != E; ++I)
1744    if (Picker(*Best, *I))
1745      Best = I;
1746  SUnit *V = *Best;
1747  if (Best != std::prev(Q.end()))
1748    std::swap(*Best, Q.back());
1749  Q.pop_back();
1750  return V;
1751}
1752
1753template<class SF>
1754SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
1755#ifndef NDEBUG
1756  if (DAG->StressSched) {
1757    reverse_sort<SF> RPicker(Picker);
1758    return popFromQueueImpl(Q, RPicker);
1759  }
1760#endif
1761  (void)DAG;
1762  return popFromQueueImpl(Q, Picker);
1763}
1764
1765template<class SF>
1766class RegReductionPriorityQueue : public RegReductionPQBase {
1767  SF Picker;
1768
1769public:
1770  RegReductionPriorityQueue(MachineFunction &mf,
1771                            bool tracksrp,
1772                            bool srcorder,
1773                            const TargetInstrInfo *tii,
1774                            const TargetRegisterInfo *tri,
1775                            const TargetLowering *tli)
1776    : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder,
1777                         tii, tri, tli),
1778      Picker(this) {}
1779
1780  bool isBottomUp() const override { return SF::IsBottomUp; }
1781
1782  bool isReady(SUnit *U) const override {
1783    return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
1784  }
1785
1786  SUnit *pop() override {
1787    if (Queue.empty()) return nullptr;
1788
1789    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
1790    V->NodeQueueId = 0;
1791    return V;
1792  }
1793
1794#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1795  void dump(ScheduleDAG *DAG) const override {
1796    // Emulate pop() without clobbering NodeQueueIds.
1797    std::vector<SUnit*> DumpQueue = Queue;
1798    SF DumpPicker = Picker;
1799    while (!DumpQueue.empty()) {
1800      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
1801      dbgs() << "Height " << SU->getHeight() << ": ";
1802      SU->dump(DAG);
1803    }
1804  }
1805#endif
1806};
1807
1808typedef RegReductionPriorityQueue<bu_ls_rr_sort>
1809BURegReductionPriorityQueue;
1810
1811typedef RegReductionPriorityQueue<src_ls_rr_sort>
1812SrcRegReductionPriorityQueue;
1813
1814typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
1815HybridBURRPriorityQueue;
1816
1817typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
1818ILPBURRPriorityQueue;
1819} // end anonymous namespace
1820
1821//===----------------------------------------------------------------------===//
1822//           Static Node Priority for Register Pressure Reduction
1823//===----------------------------------------------------------------------===//
1824
1825// Check for special nodes that bypass scheduling heuristics.
1826// Currently this pushes TokenFactor nodes down, but may be used for other
1827// pseudo-ops as well.
1828//
1829// Return -1 to schedule right above left, 1 for left above right.
1830// Return 0 if no bias exists.
1831static int checkSpecialNodes(const SUnit *left, const SUnit *right) {
1832  bool LSchedLow = left->isScheduleLow;
1833  bool RSchedLow = right->isScheduleLow;
1834  if (LSchedLow != RSchedLow)
1835    return LSchedLow < RSchedLow ? 1 : -1;
1836  return 0;
1837}
1838
1839/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
1840/// Smaller number is the higher priority.
1841static unsigned
1842CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
1843  unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum];
1844  if (SethiUllmanNumber != 0)
1845    return SethiUllmanNumber;
1846
1847  unsigned Extra = 0;
1848  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
1849       I != E; ++I) {
1850    if (I->isCtrl()) continue;  // ignore chain preds
1851    SUnit *PredSU = I->getSUnit();
1852    unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers);
1853    if (PredSethiUllman > SethiUllmanNumber) {
1854      SethiUllmanNumber = PredSethiUllman;
1855      Extra = 0;
1856    } else if (PredSethiUllman == SethiUllmanNumber)
1857      ++Extra;
1858  }
1859
1860  SethiUllmanNumber += Extra;
1861
1862  if (SethiUllmanNumber == 0)
1863    SethiUllmanNumber = 1;
1864
1865  return SethiUllmanNumber;
1866}
1867
1868/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
1869/// scheduling units.
1870void RegReductionPQBase::CalculateSethiUllmanNumbers() {
1871  SethiUllmanNumbers.assign(SUnits->size(), 0);
1872
1873  for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
1874    CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
1875}
1876
1877void RegReductionPQBase::addNode(const SUnit *SU) {
1878  unsigned SUSize = SethiUllmanNumbers.size();
1879  if (SUnits->size() > SUSize)
1880    SethiUllmanNumbers.resize(SUSize*2, 0);
1881  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1882}
1883
1884void RegReductionPQBase::updateNode(const SUnit *SU) {
1885  SethiUllmanNumbers[SU->NodeNum] = 0;
1886  CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
1887}
1888
1889// Lower priority means schedule further down. For bottom-up scheduling, lower
1890// priority SUs are scheduled before higher priority SUs.
1891unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
1892  assert(SU->NodeNum < SethiUllmanNumbers.size());
1893  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
1894  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
1895    // CopyToReg should be close to its uses to facilitate coalescing and
1896    // avoid spilling.
1897    return 0;
1898  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
1899      Opc == TargetOpcode::SUBREG_TO_REG ||
1900      Opc == TargetOpcode::INSERT_SUBREG)
1901    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
1902    // close to their uses to facilitate coalescing.
1903    return 0;
1904  if (SU->NumSuccs == 0 && SU->NumPreds != 0)
1905    // If SU does not have a register use, i.e. it doesn't produce a value
1906    // that would be consumed (e.g. store), then it terminates a chain of
1907    // computation.  Give it a large SethiUllman number so it will be
1908    // scheduled right before its predecessors that it doesn't lengthen
1909    // their live ranges.
1910    return 0xffff;
1911  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
1912    // If SU does not have a register def, schedule it close to its uses
1913    // because it does not lengthen any live ranges.
1914    return 0;
1915#if 1
1916  return SethiUllmanNumbers[SU->NodeNum];
1917#else
1918  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
1919  if (SU->isCallOp) {
1920    // FIXME: This assumes all of the defs are used as call operands.
1921    int NP = (int)Priority - SU->getNode()->getNumValues();
1922    return (NP > 0) ? NP : 0;
1923  }
1924  return Priority;
1925#endif
1926}
1927
1928//===----------------------------------------------------------------------===//
1929//                     Register Pressure Tracking
1930//===----------------------------------------------------------------------===//
1931
1932void RegReductionPQBase::dumpRegPressure() const {
1933#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1934  for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
1935         E = TRI->regclass_end(); I != E; ++I) {
1936    const TargetRegisterClass *RC = *I;
1937    unsigned Id = RC->getID();
1938    unsigned RP = RegPressure[Id];
1939    if (!RP) continue;
1940    DEBUG(dbgs() << TRI->getRegClassName(RC) << ": " << RP << " / "
1941          << RegLimit[Id] << '\n');
1942  }
1943#endif
1944}
1945
1946bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
1947  if (!TLI)
1948    return false;
1949
1950  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
1951       I != E; ++I) {
1952    if (I->isCtrl())
1953      continue;
1954    SUnit *PredSU = I->getSUnit();
1955    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
1956    // to cover the number of registers defined (they are all live).
1957    if (PredSU->NumRegDefsLeft == 0) {
1958      continue;
1959    }
1960    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
1961         RegDefPos.IsValid(); RegDefPos.Advance()) {
1962      unsigned RCId, Cost;
1963      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
1964
1965      if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
1966        return true;
1967    }
1968  }
1969  return false;
1970}
1971
1972bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const {
1973  const SDNode *N = SU->getNode();
1974
1975  if (!N->isMachineOpcode() || !SU->NumSuccs)
1976    return false;
1977
1978  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
1979  for (unsigned i = 0; i != NumDefs; ++i) {
1980    MVT VT = N->getSimpleValueType(i);
1981    if (!N->hasAnyUseOfValue(i))
1982      continue;
1983    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
1984    if (RegPressure[RCId] >= RegLimit[RCId])
1985      return true;
1986  }
1987  return false;
1988}
1989
1990// Compute the register pressure contribution by this instruction by count up
1991// for uses that are not live and down for defs. Only count register classes
1992// that are already under high pressure. As a side effect, compute the number of
1993// uses of registers that are already live.
1994//
1995// FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure
1996// so could probably be factored.
1997int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const {
1998  LiveUses = 0;
1999  int PDiff = 0;
2000  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2001       I != E; ++I) {
2002    if (I->isCtrl())
2003      continue;
2004    SUnit *PredSU = I->getSUnit();
2005    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2006    // to cover the number of registers defined (they are all live).
2007    if (PredSU->NumRegDefsLeft == 0) {
2008      if (PredSU->getNode()->isMachineOpcode())
2009        ++LiveUses;
2010      continue;
2011    }
2012    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2013         RegDefPos.IsValid(); RegDefPos.Advance()) {
2014      MVT VT = RegDefPos.GetValue();
2015      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2016      if (RegPressure[RCId] >= RegLimit[RCId])
2017        ++PDiff;
2018    }
2019  }
2020  const SDNode *N = SU->getNode();
2021
2022  if (!N || !N->isMachineOpcode() || !SU->NumSuccs)
2023    return PDiff;
2024
2025  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2026  for (unsigned i = 0; i != NumDefs; ++i) {
2027    MVT VT = N->getSimpleValueType(i);
2028    if (!N->hasAnyUseOfValue(i))
2029      continue;
2030    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2031    if (RegPressure[RCId] >= RegLimit[RCId])
2032      --PDiff;
2033  }
2034  return PDiff;
2035}
2036
2037void RegReductionPQBase::scheduledNode(SUnit *SU) {
2038  if (!TracksRegPressure)
2039    return;
2040
2041  if (!SU->getNode())
2042    return;
2043
2044  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2045       I != E; ++I) {
2046    if (I->isCtrl())
2047      continue;
2048    SUnit *PredSU = I->getSUnit();
2049    // NumRegDefsLeft is zero when enough uses of this node have been scheduled
2050    // to cover the number of registers defined (they are all live).
2051    if (PredSU->NumRegDefsLeft == 0) {
2052      continue;
2053    }
2054    // FIXME: The ScheduleDAG currently loses information about which of a
2055    // node's values is consumed by each dependence. Consequently, if the node
2056    // defines multiple register classes, we don't know which to pressurize
2057    // here. Instead the following loop consumes the register defs in an
2058    // arbitrary order. At least it handles the common case of clustered loads
2059    // to the same class. For precise liveness, each SDep needs to indicate the
2060    // result number. But that tightly couples the ScheduleDAG with the
2061    // SelectionDAG making updates tricky. A simpler hack would be to attach a
2062    // value type or register class to SDep.
2063    //
2064    // The most important aspect of register tracking is balancing the increase
2065    // here with the reduction further below. Note that this SU may use multiple
2066    // defs in PredSU. The can't be determined here, but we've already
2067    // compensated by reducing NumRegDefsLeft in PredSU during
2068    // ScheduleDAGSDNodes::AddSchedEdges.
2069    --PredSU->NumRegDefsLeft;
2070    unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
2071    for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
2072         RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2073      if (SkipRegDefs)
2074        continue;
2075
2076      unsigned RCId, Cost;
2077      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2078      RegPressure[RCId] += Cost;
2079      break;
2080    }
2081  }
2082
2083  // We should have this assert, but there may be dead SDNodes that never
2084  // materialize as SUnits, so they don't appear to generate liveness.
2085  //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
2086  int SkipRegDefs = (int)SU->NumRegDefsLeft;
2087  for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
2088       RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
2089    if (SkipRegDefs > 0)
2090      continue;
2091    unsigned RCId, Cost;
2092    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost, MF);
2093    if (RegPressure[RCId] < Cost) {
2094      // Register pressure tracking is imprecise. This can happen. But we try
2095      // hard not to let it happen because it likely results in poor scheduling.
2096      DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
2097      RegPressure[RCId] = 0;
2098    }
2099    else {
2100      RegPressure[RCId] -= Cost;
2101    }
2102  }
2103  dumpRegPressure();
2104}
2105
2106void RegReductionPQBase::unscheduledNode(SUnit *SU) {
2107  if (!TracksRegPressure)
2108    return;
2109
2110  const SDNode *N = SU->getNode();
2111  if (!N) return;
2112
2113  if (!N->isMachineOpcode()) {
2114    if (N->getOpcode() != ISD::CopyToReg)
2115      return;
2116  } else {
2117    unsigned Opc = N->getMachineOpcode();
2118    if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2119        Opc == TargetOpcode::INSERT_SUBREG ||
2120        Opc == TargetOpcode::SUBREG_TO_REG ||
2121        Opc == TargetOpcode::REG_SEQUENCE ||
2122        Opc == TargetOpcode::IMPLICIT_DEF)
2123      return;
2124  }
2125
2126  for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2127       I != E; ++I) {
2128    if (I->isCtrl())
2129      continue;
2130    SUnit *PredSU = I->getSUnit();
2131    // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
2132    // counts data deps.
2133    if (PredSU->NumSuccsLeft != PredSU->Succs.size())
2134      continue;
2135    const SDNode *PN = PredSU->getNode();
2136    if (!PN->isMachineOpcode()) {
2137      if (PN->getOpcode() == ISD::CopyFromReg) {
2138        MVT VT = PN->getSimpleValueType(0);
2139        unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2140        RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2141      }
2142      continue;
2143    }
2144    unsigned POpc = PN->getMachineOpcode();
2145    if (POpc == TargetOpcode::IMPLICIT_DEF)
2146      continue;
2147    if (POpc == TargetOpcode::EXTRACT_SUBREG ||
2148        POpc == TargetOpcode::INSERT_SUBREG ||
2149        POpc == TargetOpcode::SUBREG_TO_REG) {
2150      MVT VT = PN->getSimpleValueType(0);
2151      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2152      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2153      continue;
2154    }
2155    unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
2156    for (unsigned i = 0; i != NumDefs; ++i) {
2157      MVT VT = PN->getSimpleValueType(i);
2158      if (!PN->hasAnyUseOfValue(i))
2159        continue;
2160      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2161      if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
2162        // Register pressure tracking is imprecise. This can happen.
2163        RegPressure[RCId] = 0;
2164      else
2165        RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
2166    }
2167  }
2168
2169  // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
2170  // may transfer data dependencies to CopyToReg.
2171  if (SU->NumSuccs && N->isMachineOpcode()) {
2172    unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2173    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2174      MVT VT = N->getSimpleValueType(i);
2175      if (VT == MVT::Glue || VT == MVT::Other)
2176        continue;
2177      if (!N->hasAnyUseOfValue(i))
2178        continue;
2179      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
2180      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
2181    }
2182  }
2183
2184  dumpRegPressure();
2185}
2186
2187//===----------------------------------------------------------------------===//
2188//           Dynamic Node Priority for Register Pressure Reduction
2189//===----------------------------------------------------------------------===//
2190
2191/// closestSucc - Returns the scheduled cycle of the successor which is
2192/// closest to the current cycle.
2193static unsigned closestSucc(const SUnit *SU) {
2194  unsigned MaxHeight = 0;
2195  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2196       I != E; ++I) {
2197    if (I->isCtrl()) continue;  // ignore chain succs
2198    unsigned Height = I->getSUnit()->getHeight();
2199    // If there are bunch of CopyToRegs stacked up, they should be considered
2200    // to be at the same position.
2201    if (I->getSUnit()->getNode() &&
2202        I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg)
2203      Height = closestSucc(I->getSUnit())+1;
2204    if (Height > MaxHeight)
2205      MaxHeight = Height;
2206  }
2207  return MaxHeight;
2208}
2209
2210/// calcMaxScratches - Returns an cost estimate of the worse case requirement
2211/// for scratch registers, i.e. number of data dependencies.
2212static unsigned calcMaxScratches(const SUnit *SU) {
2213  unsigned Scratches = 0;
2214  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2215       I != E; ++I) {
2216    if (I->isCtrl()) continue;  // ignore chain preds
2217    Scratches++;
2218  }
2219  return Scratches;
2220}
2221
2222/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
2223/// CopyFromReg from a virtual register.
2224static bool hasOnlyLiveInOpers(const SUnit *SU) {
2225  bool RetVal = false;
2226  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2227       I != E; ++I) {
2228    if (I->isCtrl()) continue;
2229    const SUnit *PredSU = I->getSUnit();
2230    if (PredSU->getNode() &&
2231        PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
2232      unsigned Reg =
2233        cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
2234      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2235        RetVal = true;
2236        continue;
2237      }
2238    }
2239    return false;
2240  }
2241  return RetVal;
2242}
2243
2244/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
2245/// CopyToReg to a virtual register. This SU def is probably a liveout and
2246/// it has no other use. It should be scheduled closer to the terminator.
2247static bool hasOnlyLiveOutUses(const SUnit *SU) {
2248  bool RetVal = false;
2249  for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
2250       I != E; ++I) {
2251    if (I->isCtrl()) continue;
2252    const SUnit *SuccSU = I->getSUnit();
2253    if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
2254      unsigned Reg =
2255        cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
2256      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
2257        RetVal = true;
2258        continue;
2259      }
2260    }
2261    return false;
2262  }
2263  return RetVal;
2264}
2265
2266// Set isVRegCycle for a node with only live in opers and live out uses. Also
2267// set isVRegCycle for its CopyFromReg operands.
2268//
2269// This is only relevant for single-block loops, in which case the VRegCycle
2270// node is likely an induction variable in which the operand and target virtual
2271// registers should be coalesced (e.g. pre/post increment values). Setting the
2272// isVRegCycle flag helps the scheduler prioritize other uses of the same
2273// CopyFromReg so that this node becomes the virtual register "kill". This
2274// avoids interference between the values live in and out of the block and
2275// eliminates a copy inside the loop.
2276static void initVRegCycle(SUnit *SU) {
2277  if (DisableSchedVRegCycle)
2278    return;
2279
2280  if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
2281    return;
2282
2283  DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
2284
2285  SU->isVRegCycle = true;
2286
2287  for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
2288       I != E; ++I) {
2289    if (I->isCtrl()) continue;
2290    I->getSUnit()->isVRegCycle = true;
2291  }
2292}
2293
2294// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
2295// CopyFromReg operands. We should no longer penalize other uses of this VReg.
2296static void resetVRegCycle(SUnit *SU) {
2297  if (!SU->isVRegCycle)
2298    return;
2299
2300  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2301       I != E; ++I) {
2302    if (I->isCtrl()) continue;  // ignore chain preds
2303    SUnit *PredSU = I->getSUnit();
2304    if (PredSU->isVRegCycle) {
2305      assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
2306             "VRegCycle def must be CopyFromReg");
2307      I->getSUnit()->isVRegCycle = 0;
2308    }
2309  }
2310}
2311
2312// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
2313// means a node that defines the VRegCycle has not been scheduled yet.
2314static bool hasVRegCycleUse(const SUnit *SU) {
2315  // If this SU also defines the VReg, don't hoist it as a "use".
2316  if (SU->isVRegCycle)
2317    return false;
2318
2319  for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
2320       I != E; ++I) {
2321    if (I->isCtrl()) continue;  // ignore chain preds
2322    if (I->getSUnit()->isVRegCycle &&
2323        I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
2324      DEBUG(dbgs() << "  VReg cycle use: SU (" << SU->NodeNum << ")\n");
2325      return true;
2326    }
2327  }
2328  return false;
2329}
2330
2331// Check for either a dependence (latency) or resource (hazard) stall.
2332//
2333// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
2334static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
2335  if ((int)SPQ->getCurCycle() < Height) return true;
2336  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2337      != ScheduleHazardRecognizer::NoHazard)
2338    return true;
2339  return false;
2340}
2341
2342// Return -1 if left has higher priority, 1 if right has higher priority.
2343// Return 0 if latency-based priority is equivalent.
2344static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
2345                            RegReductionPQBase *SPQ) {
2346  // Scheduling an instruction that uses a VReg whose postincrement has not yet
2347  // been scheduled will induce a copy. Model this as an extra cycle of latency.
2348  int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
2349  int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
2350  int LHeight = (int)left->getHeight() + LPenalty;
2351  int RHeight = (int)right->getHeight() + RPenalty;
2352
2353  bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) &&
2354    BUHasStall(left, LHeight, SPQ);
2355  bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) &&
2356    BUHasStall(right, RHeight, SPQ);
2357
2358  // If scheduling one of the node will cause a pipeline stall, delay it.
2359  // If scheduling either one of the node will cause a pipeline stall, sort
2360  // them according to their height.
2361  if (LStall) {
2362    if (!RStall)
2363      return 1;
2364    if (LHeight != RHeight)
2365      return LHeight > RHeight ? 1 : -1;
2366  } else if (RStall)
2367    return -1;
2368
2369  // If either node is scheduling for latency, sort them by height/depth
2370  // and latency.
2371  if (!checkPref || (left->SchedulingPref == Sched::ILP ||
2372                     right->SchedulingPref == Sched::ILP)) {
2373    // If neither instruction stalls (!LStall && !RStall) and HazardRecognizer
2374    // is enabled, grouping instructions by cycle, then its height is already
2375    // covered so only its depth matters. We also reach this point if both stall
2376    // but have the same height.
2377    if (!SPQ->getHazardRec()->isEnabled()) {
2378      if (LHeight != RHeight)
2379        return LHeight > RHeight ? 1 : -1;
2380    }
2381    int LDepth = left->getDepth() - LPenalty;
2382    int RDepth = right->getDepth() - RPenalty;
2383    if (LDepth != RDepth) {
2384      DEBUG(dbgs() << "  Comparing latency of SU (" << left->NodeNum
2385            << ") depth " << LDepth << " vs SU (" << right->NodeNum
2386            << ") depth " << RDepth << "\n");
2387      return LDepth < RDepth ? 1 : -1;
2388    }
2389    if (left->Latency != right->Latency)
2390      return left->Latency > right->Latency ? 1 : -1;
2391  }
2392  return 0;
2393}
2394
2395static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
2396  // Schedule physical register definitions close to their use. This is
2397  // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as
2398  // long as shortening physreg live ranges is generally good, we can defer
2399  // creating a subtarget hook.
2400  if (!DisableSchedPhysRegJoin) {
2401    bool LHasPhysReg = left->hasPhysRegDefs;
2402    bool RHasPhysReg = right->hasPhysRegDefs;
2403    if (LHasPhysReg != RHasPhysReg) {
2404      #ifndef NDEBUG
2405      static const char *const PhysRegMsg[] = { " has no physreg",
2406                                                " defines a physreg" };
2407      #endif
2408      DEBUG(dbgs() << "  SU (" << left->NodeNum << ") "
2409            << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") "
2410            << PhysRegMsg[RHasPhysReg] << "\n");
2411      return LHasPhysReg < RHasPhysReg;
2412    }
2413  }
2414
2415  // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
2416  unsigned LPriority = SPQ->getNodePriority(left);
2417  unsigned RPriority = SPQ->getNodePriority(right);
2418
2419  // Be really careful about hoisting call operands above previous calls.
2420  // Only allows it if it would reduce register pressure.
2421  if (left->isCall && right->isCallOp) {
2422    unsigned RNumVals = right->getNode()->getNumValues();
2423    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
2424  }
2425  if (right->isCall && left->isCallOp) {
2426    unsigned LNumVals = left->getNode()->getNumValues();
2427    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
2428  }
2429
2430  if (LPriority != RPriority)
2431    return LPriority > RPriority;
2432
2433  // One or both of the nodes are calls and their sethi-ullman numbers are the
2434  // same, then keep source order.
2435  if (left->isCall || right->isCall) {
2436    unsigned LOrder = SPQ->getNodeOrdering(left);
2437    unsigned ROrder = SPQ->getNodeOrdering(right);
2438
2439    // Prefer an ordering where the lower the non-zero order number, the higher
2440    // the preference.
2441    if ((LOrder || ROrder) && LOrder != ROrder)
2442      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2443  }
2444
2445  // Try schedule def + use closer when Sethi-Ullman numbers are the same.
2446  // e.g.
2447  // t1 = op t2, c1
2448  // t3 = op t4, c2
2449  //
2450  // and the following instructions are both ready.
2451  // t2 = op c3
2452  // t4 = op c4
2453  //
2454  // Then schedule t2 = op first.
2455  // i.e.
2456  // t4 = op c4
2457  // t2 = op c3
2458  // t1 = op t2, c1
2459  // t3 = op t4, c2
2460  //
2461  // This creates more short live intervals.
2462  unsigned LDist = closestSucc(left);
2463  unsigned RDist = closestSucc(right);
2464  if (LDist != RDist)
2465    return LDist < RDist;
2466
2467  // How many registers becomes live when the node is scheduled.
2468  unsigned LScratch = calcMaxScratches(left);
2469  unsigned RScratch = calcMaxScratches(right);
2470  if (LScratch != RScratch)
2471    return LScratch > RScratch;
2472
2473  // Comparing latency against a call makes little sense unless the node
2474  // is register pressure-neutral.
2475  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
2476    return (left->NodeQueueId > right->NodeQueueId);
2477
2478  // Do not compare latencies when one or both of the nodes are calls.
2479  if (!DisableSchedCycles &&
2480      !(left->isCall || right->isCall)) {
2481    int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
2482    if (result != 0)
2483      return result > 0;
2484  }
2485  else {
2486    if (left->getHeight() != right->getHeight())
2487      return left->getHeight() > right->getHeight();
2488
2489    if (left->getDepth() != right->getDepth())
2490      return left->getDepth() < right->getDepth();
2491  }
2492
2493  assert(left->NodeQueueId && right->NodeQueueId &&
2494         "NodeQueueId cannot be zero");
2495  return (left->NodeQueueId > right->NodeQueueId);
2496}
2497
2498// Bottom up
2499bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2500  if (int res = checkSpecialNodes(left, right))
2501    return res > 0;
2502
2503  return BURRSort(left, right, SPQ);
2504}
2505
2506// Source order, otherwise bottom up.
2507bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2508  if (int res = checkSpecialNodes(left, right))
2509    return res > 0;
2510
2511  unsigned LOrder = SPQ->getNodeOrdering(left);
2512  unsigned ROrder = SPQ->getNodeOrdering(right);
2513
2514  // Prefer an ordering where the lower the non-zero order number, the higher
2515  // the preference.
2516  if ((LOrder || ROrder) && LOrder != ROrder)
2517    return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
2518
2519  return BURRSort(left, right, SPQ);
2520}
2521
2522// If the time between now and when the instruction will be ready can cover
2523// the spill code, then avoid adding it to the ready queue. This gives long
2524// stalls highest priority and allows hoisting across calls. It should also
2525// speed up processing the available queue.
2526bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2527  static const unsigned ReadyDelay = 3;
2528
2529  if (SPQ->MayReduceRegPressure(SU)) return true;
2530
2531  if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
2532
2533  if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
2534      != ScheduleHazardRecognizer::NoHazard)
2535    return false;
2536
2537  return true;
2538}
2539
2540// Return true if right should be scheduled with higher priority than left.
2541bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2542  if (int res = checkSpecialNodes(left, right))
2543    return res > 0;
2544
2545  if (left->isCall || right->isCall)
2546    // No way to compute latency of calls.
2547    return BURRSort(left, right, SPQ);
2548
2549  bool LHigh = SPQ->HighRegPressure(left);
2550  bool RHigh = SPQ->HighRegPressure(right);
2551  // Avoid causing spills. If register pressure is high, schedule for
2552  // register pressure reduction.
2553  if (LHigh && !RHigh) {
2554    DEBUG(dbgs() << "  pressure SU(" << left->NodeNum << ") > SU("
2555          << right->NodeNum << ")\n");
2556    return true;
2557  }
2558  else if (!LHigh && RHigh) {
2559    DEBUG(dbgs() << "  pressure SU(" << right->NodeNum << ") > SU("
2560          << left->NodeNum << ")\n");
2561    return false;
2562  }
2563  if (!LHigh && !RHigh) {
2564    int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
2565    if (result != 0)
2566      return result > 0;
2567  }
2568  return BURRSort(left, right, SPQ);
2569}
2570
2571// Schedule as many instructions in each cycle as possible. So don't make an
2572// instruction available unless it is ready in the current cycle.
2573bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
2574  if (SU->getHeight() > CurCycle) return false;
2575
2576  if (SPQ->getHazardRec()->getHazardType(SU, 0)
2577      != ScheduleHazardRecognizer::NoHazard)
2578    return false;
2579
2580  return true;
2581}
2582
2583static bool canEnableCoalescing(SUnit *SU) {
2584  unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
2585  if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
2586    // CopyToReg should be close to its uses to facilitate coalescing and
2587    // avoid spilling.
2588    return true;
2589
2590  if (Opc == TargetOpcode::EXTRACT_SUBREG ||
2591      Opc == TargetOpcode::SUBREG_TO_REG ||
2592      Opc == TargetOpcode::INSERT_SUBREG)
2593    // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
2594    // close to their uses to facilitate coalescing.
2595    return true;
2596
2597  if (SU->NumPreds == 0 && SU->NumSuccs != 0)
2598    // If SU does not have a register def, schedule it close to its uses
2599    // because it does not lengthen any live ranges.
2600    return true;
2601
2602  return false;
2603}
2604
2605// list-ilp is currently an experimental scheduler that allows various
2606// heuristics to be enabled prior to the normal register reduction logic.
2607bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
2608  if (int res = checkSpecialNodes(left, right))
2609    return res > 0;
2610
2611  if (left->isCall || right->isCall)
2612    // No way to compute latency of calls.
2613    return BURRSort(left, right, SPQ);
2614
2615  unsigned LLiveUses = 0, RLiveUses = 0;
2616  int LPDiff = 0, RPDiff = 0;
2617  if (!DisableSchedRegPressure || !DisableSchedLiveUses) {
2618    LPDiff = SPQ->RegPressureDiff(left, LLiveUses);
2619    RPDiff = SPQ->RegPressureDiff(right, RLiveUses);
2620  }
2621  if (!DisableSchedRegPressure && LPDiff != RPDiff) {
2622    DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff
2623          << " != SU(" << right->NodeNum << "): " << RPDiff << "\n");
2624    return LPDiff > RPDiff;
2625  }
2626
2627  if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) {
2628    bool LReduce = canEnableCoalescing(left);
2629    bool RReduce = canEnableCoalescing(right);
2630    if (LReduce && !RReduce) return false;
2631    if (RReduce && !LReduce) return true;
2632  }
2633
2634  if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
2635    DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
2636          << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
2637    return LLiveUses < RLiveUses;
2638  }
2639
2640  if (!DisableSchedStalls) {
2641    bool LStall = BUHasStall(left, left->getHeight(), SPQ);
2642    bool RStall = BUHasStall(right, right->getHeight(), SPQ);
2643    if (LStall != RStall)
2644      return left->getHeight() > right->getHeight();
2645  }
2646
2647  if (!DisableSchedCriticalPath) {
2648    int spread = (int)left->getDepth() - (int)right->getDepth();
2649    if (std::abs(spread) > MaxReorderWindow) {
2650      DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): "
2651            << left->getDepth() << " != SU(" << right->NodeNum << "): "
2652            << right->getDepth() << "\n");
2653      return left->getDepth() < right->getDepth();
2654    }
2655  }
2656
2657  if (!DisableSchedHeight && left->getHeight() != right->getHeight()) {
2658    int spread = (int)left->getHeight() - (int)right->getHeight();
2659    if (std::abs(spread) > MaxReorderWindow)
2660      return left->getHeight() > right->getHeight();
2661  }
2662
2663  return BURRSort(left, right, SPQ);
2664}
2665
2666void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
2667  SUnits = &sunits;
2668  // Add pseudo dependency edges for two-address nodes.
2669  if (!Disable2AddrHack)
2670    AddPseudoTwoAddrDeps();
2671  // Reroute edges to nodes with multiple uses.
2672  if (!TracksRegPressure && !SrcOrder)
2673    PrescheduleNodesWithMultipleUses();
2674  // Calculate node priorities.
2675  CalculateSethiUllmanNumbers();
2676
2677  // For single block loops, mark nodes that look like canonical IV increments.
2678  if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
2679    for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
2680      initVRegCycle(&sunits[i]);
2681    }
2682  }
2683}
2684
2685//===----------------------------------------------------------------------===//
2686//                    Preschedule for Register Pressure
2687//===----------------------------------------------------------------------===//
2688
2689bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
2690  if (SU->isTwoAddress) {
2691    unsigned Opc = SU->getNode()->getMachineOpcode();
2692    const MCInstrDesc &MCID = TII->get(Opc);
2693    unsigned NumRes = MCID.getNumDefs();
2694    unsigned NumOps = MCID.getNumOperands() - NumRes;
2695    for (unsigned i = 0; i != NumOps; ++i) {
2696      if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
2697        SDNode *DU = SU->getNode()->getOperand(i).getNode();
2698        if (DU->getNodeId() != -1 &&
2699            Op->OrigNode == &(*SUnits)[DU->getNodeId()])
2700          return true;
2701      }
2702    }
2703  }
2704  return false;
2705}
2706
2707/// canClobberReachingPhysRegUse - True if SU would clobber one of it's
2708/// successor's explicit physregs whose definition can reach DepSU.
2709/// i.e. DepSU should not be scheduled above SU.
2710static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU,
2711                                         ScheduleDAGRRList *scheduleDAG,
2712                                         const TargetInstrInfo *TII,
2713                                         const TargetRegisterInfo *TRI) {
2714  const uint16_t *ImpDefs
2715    = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs();
2716  const uint32_t *RegMask = getNodeRegMask(SU->getNode());
2717  if(!ImpDefs && !RegMask)
2718    return false;
2719
2720  for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end();
2721       SI != SE; ++SI) {
2722    SUnit *SuccSU = SI->getSUnit();
2723    for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(),
2724           PE = SuccSU->Preds.end(); PI != PE; ++PI) {
2725      if (!PI->isAssignedRegDep())
2726        continue;
2727
2728      if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) &&
2729          scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
2730        return true;
2731
2732      if (ImpDefs)
2733        for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef)
2734          // Return true if SU clobbers this physical register use and the
2735          // definition of the register reaches from DepSU. IsReachable queries
2736          // a topological forward sort of the DAG (following the successors).
2737          if (TRI->regsOverlap(*ImpDef, PI->getReg()) &&
2738              scheduleDAG->IsReachable(DepSU, PI->getSUnit()))
2739            return true;
2740    }
2741  }
2742  return false;
2743}
2744
2745/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
2746/// physical register defs.
2747static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
2748                                  const TargetInstrInfo *TII,
2749                                  const TargetRegisterInfo *TRI) {
2750  SDNode *N = SuccSU->getNode();
2751  unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
2752  const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
2753  assert(ImpDefs && "Caller should check hasPhysRegDefs");
2754  for (const SDNode *SUNode = SU->getNode(); SUNode;
2755       SUNode = SUNode->getGluedNode()) {
2756    if (!SUNode->isMachineOpcode())
2757      continue;
2758    const uint16_t *SUImpDefs =
2759      TII->get(SUNode->getMachineOpcode()).getImplicitDefs();
2760    const uint32_t *SURegMask = getNodeRegMask(SUNode);
2761    if (!SUImpDefs && !SURegMask)
2762      continue;
2763    for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
2764      MVT VT = N->getSimpleValueType(i);
2765      if (VT == MVT::Glue || VT == MVT::Other)
2766        continue;
2767      if (!N->hasAnyUseOfValue(i))
2768        continue;
2769      unsigned Reg = ImpDefs[i - NumDefs];
2770      if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg))
2771        return true;
2772      if (!SUImpDefs)
2773        continue;
2774      for (;*SUImpDefs; ++SUImpDefs) {
2775        unsigned SUReg = *SUImpDefs;
2776        if (TRI->regsOverlap(Reg, SUReg))
2777          return true;
2778      }
2779    }
2780  }
2781  return false;
2782}
2783
2784/// PrescheduleNodesWithMultipleUses - Nodes with multiple uses
2785/// are not handled well by the general register pressure reduction
2786/// heuristics. When presented with code like this:
2787///
2788///      N
2789///    / |
2790///   /  |
2791///  U  store
2792///  |
2793/// ...
2794///
2795/// the heuristics tend to push the store up, but since the
2796/// operand of the store has another use (U), this would increase
2797/// the length of that other use (the U->N edge).
2798///
2799/// This function transforms code like the above to route U's
2800/// dependence through the store when possible, like this:
2801///
2802///      N
2803///      ||
2804///      ||
2805///     store
2806///       |
2807///       U
2808///       |
2809///      ...
2810///
2811/// This results in the store being scheduled immediately
2812/// after N, which shortens the U->N live range, reducing
2813/// register pressure.
2814///
2815void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
2816  // Visit all the nodes in topological order, working top-down.
2817  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2818    SUnit *SU = &(*SUnits)[i];
2819    // For now, only look at nodes with no data successors, such as stores.
2820    // These are especially important, due to the heuristics in
2821    // getNodePriority for nodes with no data successors.
2822    if (SU->NumSuccs != 0)
2823      continue;
2824    // For now, only look at nodes with exactly one data predecessor.
2825    if (SU->NumPreds != 1)
2826      continue;
2827    // Avoid prescheduling copies to virtual registers, which don't behave
2828    // like other nodes from the perspective of scheduling heuristics.
2829    if (SDNode *N = SU->getNode())
2830      if (N->getOpcode() == ISD::CopyToReg &&
2831          TargetRegisterInfo::isVirtualRegister
2832            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2833        continue;
2834
2835    // Locate the single data predecessor.
2836    SUnit *PredSU = nullptr;
2837    for (SUnit::const_pred_iterator II = SU->Preds.begin(),
2838         EE = SU->Preds.end(); II != EE; ++II)
2839      if (!II->isCtrl()) {
2840        PredSU = II->getSUnit();
2841        break;
2842      }
2843    assert(PredSU);
2844
2845    // Don't rewrite edges that carry physregs, because that requires additional
2846    // support infrastructure.
2847    if (PredSU->hasPhysRegDefs)
2848      continue;
2849    // Short-circuit the case where SU is PredSU's only data successor.
2850    if (PredSU->NumSuccs == 1)
2851      continue;
2852    // Avoid prescheduling to copies from virtual registers, which don't behave
2853    // like other nodes from the perspective of scheduling heuristics.
2854    if (SDNode *N = SU->getNode())
2855      if (N->getOpcode() == ISD::CopyFromReg &&
2856          TargetRegisterInfo::isVirtualRegister
2857            (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
2858        continue;
2859
2860    // Perform checks on the successors of PredSU.
2861    for (SUnit::const_succ_iterator II = PredSU->Succs.begin(),
2862         EE = PredSU->Succs.end(); II != EE; ++II) {
2863      SUnit *PredSuccSU = II->getSUnit();
2864      if (PredSuccSU == SU) continue;
2865      // If PredSU has another successor with no data successors, for
2866      // now don't attempt to choose either over the other.
2867      if (PredSuccSU->NumSuccs == 0)
2868        goto outer_loop_continue;
2869      // Don't break physical register dependencies.
2870      if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs)
2871        if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI))
2872          goto outer_loop_continue;
2873      // Don't introduce graph cycles.
2874      if (scheduleDAG->IsReachable(SU, PredSuccSU))
2875        goto outer_loop_continue;
2876    }
2877
2878    // Ok, the transformation is safe and the heuristics suggest it is
2879    // profitable. Update the graph.
2880    DEBUG(dbgs() << "    Prescheduling SU #" << SU->NodeNum
2881                 << " next to PredSU #" << PredSU->NodeNum
2882                 << " to guide scheduling in the presence of multiple uses\n");
2883    for (unsigned i = 0; i != PredSU->Succs.size(); ++i) {
2884      SDep Edge = PredSU->Succs[i];
2885      assert(!Edge.isAssignedRegDep());
2886      SUnit *SuccSU = Edge.getSUnit();
2887      if (SuccSU != SU) {
2888        Edge.setSUnit(PredSU);
2889        scheduleDAG->RemovePred(SuccSU, Edge);
2890        scheduleDAG->AddPred(SU, Edge);
2891        Edge.setSUnit(SU);
2892        scheduleDAG->AddPred(SuccSU, Edge);
2893        --i;
2894      }
2895    }
2896  outer_loop_continue:;
2897  }
2898}
2899
2900/// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses
2901/// it as a def&use operand. Add a pseudo control edge from it to the other
2902/// node (if it won't create a cycle) so the two-address one will be scheduled
2903/// first (lower in the schedule). If both nodes are two-address, favor the
2904/// one that has a CopyToReg use (more likely to be a loop induction update).
2905/// If both are two-address, but one is commutable while the other is not
2906/// commutable, favor the one that's not commutable.
2907void RegReductionPQBase::AddPseudoTwoAddrDeps() {
2908  for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
2909    SUnit *SU = &(*SUnits)[i];
2910    if (!SU->isTwoAddress)
2911      continue;
2912
2913    SDNode *Node = SU->getNode();
2914    if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
2915      continue;
2916
2917    bool isLiveOut = hasOnlyLiveOutUses(SU);
2918    unsigned Opc = Node->getMachineOpcode();
2919    const MCInstrDesc &MCID = TII->get(Opc);
2920    unsigned NumRes = MCID.getNumDefs();
2921    unsigned NumOps = MCID.getNumOperands() - NumRes;
2922    for (unsigned j = 0; j != NumOps; ++j) {
2923      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
2924        continue;
2925      SDNode *DU = SU->getNode()->getOperand(j).getNode();
2926      if (DU->getNodeId() == -1)
2927        continue;
2928      const SUnit *DUSU = &(*SUnits)[DU->getNodeId()];
2929      if (!DUSU) continue;
2930      for (SUnit::const_succ_iterator I = DUSU->Succs.begin(),
2931           E = DUSU->Succs.end(); I != E; ++I) {
2932        if (I->isCtrl()) continue;
2933        SUnit *SuccSU = I->getSUnit();
2934        if (SuccSU == SU)
2935          continue;
2936        // Be conservative. Ignore if nodes aren't at roughly the same
2937        // depth and height.
2938        if (SuccSU->getHeight() < SU->getHeight() &&
2939            (SU->getHeight() - SuccSU->getHeight()) > 1)
2940          continue;
2941        // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge
2942        // constrains whatever is using the copy, instead of the copy
2943        // itself. In the case that the copy is coalesced, this
2944        // preserves the intent of the pseudo two-address heurietics.
2945        while (SuccSU->Succs.size() == 1 &&
2946               SuccSU->getNode()->isMachineOpcode() &&
2947               SuccSU->getNode()->getMachineOpcode() ==
2948                 TargetOpcode::COPY_TO_REGCLASS)
2949          SuccSU = SuccSU->Succs.front().getSUnit();
2950        // Don't constrain non-instruction nodes.
2951        if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode())
2952          continue;
2953        // Don't constrain nodes with physical register defs if the
2954        // predecessor can clobber them.
2955        if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) {
2956          if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI))
2957            continue;
2958        }
2959        // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG;
2960        // these may be coalesced away. We want them close to their uses.
2961        unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode();
2962        if (SuccOpc == TargetOpcode::EXTRACT_SUBREG ||
2963            SuccOpc == TargetOpcode::INSERT_SUBREG ||
2964            SuccOpc == TargetOpcode::SUBREG_TO_REG)
2965          continue;
2966        if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) &&
2967            (!canClobber(SuccSU, DUSU) ||
2968             (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
2969             (!SU->isCommutable && SuccSU->isCommutable)) &&
2970            !scheduleDAG->IsReachable(SuccSU, SU)) {
2971          DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #"
2972                       << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
2973          scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Artificial));
2974        }
2975      }
2976    }
2977  }
2978}
2979
2980//===----------------------------------------------------------------------===//
2981//                         Public Constructor Functions
2982//===----------------------------------------------------------------------===//
2983
2984llvm::ScheduleDAGSDNodes *
2985llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
2986                                 CodeGenOpt::Level OptLevel) {
2987  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
2988  const TargetInstrInfo *TII = STI.getInstrInfo();
2989  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
2990
2991  BURegReductionPriorityQueue *PQ =
2992    new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, nullptr);
2993  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
2994  PQ->setScheduleDAG(SD);
2995  return SD;
2996}
2997
2998llvm::ScheduleDAGSDNodes *
2999llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
3000                                   CodeGenOpt::Level OptLevel) {
3001  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3002  const TargetInstrInfo *TII = STI.getInstrInfo();
3003  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3004
3005  SrcRegReductionPriorityQueue *PQ =
3006    new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr);
3007  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
3008  PQ->setScheduleDAG(SD);
3009  return SD;
3010}
3011
3012llvm::ScheduleDAGSDNodes *
3013llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
3014                                   CodeGenOpt::Level OptLevel) {
3015  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3016  const TargetInstrInfo *TII = STI.getInstrInfo();
3017  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3018  const TargetLowering *TLI = IS->TLI;
3019
3020  HybridBURRPriorityQueue *PQ =
3021    new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3022
3023  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3024  PQ->setScheduleDAG(SD);
3025  return SD;
3026}
3027
3028llvm::ScheduleDAGSDNodes *
3029llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
3030                                CodeGenOpt::Level OptLevel) {
3031  const TargetSubtargetInfo &STI = IS->MF->getSubtarget();
3032  const TargetInstrInfo *TII = STI.getInstrInfo();
3033  const TargetRegisterInfo *TRI = STI.getRegisterInfo();
3034  const TargetLowering *TLI = IS->TLI;
3035
3036  ILPBURRPriorityQueue *PQ =
3037    new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI);
3038  ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
3039  PQ->setScheduleDAG(SD);
3040  return SD;
3041}
Note: See TracBrowser for help on using the repository browser.