* Integrate Haifa instruction scheduler.

* Integrate regmove pass. See ChangeLog for deatils. git-svn-id: https://gcc.gnu.org/svn/gcc/trunk@14770 138bc75d-0d04-0410-961f-82ee72b054a4
author: Jeffrey A Law <law@cygnus.com> 1997-08-12 04:07:19 +0000
committer: Jeffrey A Law <law@cygnus.com> 1997-08-12 04:07:19 +0000
commit: e07ae5fd7ffed7230ad5e8c6b51845f5667ec167 (patch)
tree: d4545c8066f57414681646f5d32d3f9b95acbe1b
parent: cc94826bb787bcdac89169d50c2dc09c2558d6b1 (diff)
15 files changed, 10737 insertions, 10 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ff0aa85597e..608c3d73ba2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,76 @@
+Mon Aug 11 14:50:55 1997  Jeffrey A Law  (law@cygnus.com)
+
+	* Integrate Haifa instruction scheduler.
+	* Makefile.in (ALL_CFLAGS): Add SCHED_CFLAGS.  Prefix all references
+	to sched with $(SCHED_CFLAGS.
+	* configure.in: Handle --enable-haifa.
+	* configure: Rebuilt.
+	* flags.h: Add new flags for haifa instruction scheduler.
+	* genattrtab.c (expand_units): For haifa, don't subtract one
+	when computing blockage.
+	* toplev.h (flag_schedule_interblock): Haifa scheduler flag.
+	(flag_schedule_speculative): Ditto.
+	(flag_schedule_speculative_load): Ditto.
+	(flag_schedule_speculative_load_dangerous): Ditto.
+	(flag_schedule_reverse_before_reload): Ditto.
+	(flag_schedule_reverse_after_reload): Ditto.
+	(flag_branch_on_count_reg): Ditto.
+	(f_options): Add Haifa switches.
+	(main): Turn off some Haifa options if appropriate macro is
+	defined.  Process Haifa switches.
+	* unroll.c (iteration_info): No longer static, since Haifa
+	scheduler uses it.
+	(unroll_loop): Inform HAIFA scheduler about loop unrolling factor.
+	* unroll.c (unroll_loop): Set loop_unroll_iter, loop_start_value.
+	* loop.h (loop_unroll_factor, loop_number): Add HAIFA decls.
+	* loop.h (loop_initial_value,loop_unroll_iter): New globals.
+	* loop.c (loop_optimize): If HAIFA is defined, allocate additional
+	storage for the Haifa scheduler.
+	(mark_loop_jump): If HAIFA defined, set LABEL_OUTSIDE_LOOP_P and
+	LABEL_NEXTREF.
+	(strength_reduce): If HAIFA and HAVE_decrement_and_branch_on_count
+	are defined, call analyze_loop_iterations and insert_bct to use
+	countdown loops.
+	(record_giv): Refine test for jumps out of loops if HAIFA is
+	defined.
+	(analyze_loop_iterations): New function to identify if we can use
+	a countdown loop.
+	(insert_bct): Insert countdown loop.
+	(instrument_loop_bct): Low level code to insert countdown loop.
+	(loop_number): Calculate UID of loop.
+	(indirect_jump_in_function_p): Return true if an indirect jump is
+	in the function.
+	(is_power_of_2): Return true if value is a power of 2.
+	(is_conditional_branch): Return true if insn is a conditional
+	jump.
+	(fix_bct_param): Process -fbct-{min,max}-N switches.
+	(check_bct_param): Return true if loop should be instrumented.
+	* loop.c (loop_initial_value,loop_unroll_iter): New globals.
+	(loop_optimize): Initialize.
+	(get_condition_for_loop): Ditto.
+	* loop.c (strength_reduce): Inside of code that uses #ifdef
+	HAVE_decrement_and_branch_on_count code, test it to make sure the
+	condition is true.
+	(instrument_loop_bct): Ditto.
+	* haifa-sched.c: New file.
+	
+
+	* Integrate regmove pass.
+	* Makefile.in (OBJS): Add regmove.o
+	(regmove.o): Add dependencies.
+	* flow.c (find_use_as_address): No longer static.
+	* rtl.h (find_use_as_address): Declare.
+	* toplev.c (regmove_dump, flag_regmove): Define.
+	(f_options): Add -fregmove.
+	(regmove_dump_file, regmove_time): Define.
+	(fatal_insn): Close the regmove dump file.
+	(compile_file): Initialize regmove_time; open/close the regmove dump
+	file as needed.  Print regmove time as needed.
+	(rest_of_compilation): Run regmove pass if requested, dump
+	RTL after regmove if requested.
+	(main): If -O2 or more, turn on regmove.  Handle dump switches.
+	* regmove.c: New file.
+	
 Mon Aug 11 14:15:02 1997  Jeffrey A Law  (law@cygnus.com)
 
 	* Integrate tlink patch from jason@cygnus.com
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 76d83967f48..adf5db49806 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -444,7 +444,7 @@ INTERNAL_CFLAGS = $(CROSS) -DIN_GCC @extra_c_flags@
 
 # This is the variable actually used when we compile.
 ALL_CFLAGS = $(INTERNAL_CFLAGS) $(X_CFLAGS) $(T_CFLAGS) $(CFLAGS) $(XCFLAGS) \
-	@DEFS@
+	@DEFS@ $(SCHED_CFLAGS)
 
 # Likewise.
 ALL_CPPFLAGS = $(CPPFLAGS) $(X_CPPFLAGS) $(T_CPPFLAGS)
@@ -548,14 +548,17 @@ BC_OBJS = bc-emit.o bc-optab.o
 # Bytecode header files constructed at build time; vmsconfig.com wants this.
 BC_ALL = bc-arity.h bc-opcode.h bc-opname.h
 
+SCHED_PREFIX = @sched_prefix@
+SCHED_CFLAGS = @sched_cflags@
+
 # Language-independent object files.
 OBJS = toplev.o version.o tree.o print-tree.o stor-layout.o fold-const.o \
  function.o stmt.o except.o expr.o calls.o expmed.o explow.o optabs.o \
- varasm.o rtl.o print-rtl.o rtlanal.o emit-rtl.o real.o \
- dbxout.o sdbout.o dwarfout.o dwarf2out.o xcoffout.o bitmap.o \
+ varasm.o rtl.o print-rtl.o rtlanal.o emit-rtl.o real.o regmove.o \
+ dbxout.o sdbout.o dwarfout.o dwarf2out.o xcoffout.o bitmap.o alias.o \
  integrate.o jump.o cse.o loop.o unroll.o flow.o stupid.o combine.o \
  regclass.o local-alloc.o global.o reload.o reload1.o caller-save.o \
- insn-peep.o reorg.o alias.o sched.o final.o recog.o reg-stack.o \
+ insn-peep.o reorg.o $(SCHED_PREFIX)sched.o final.o recog.o reg-stack.o \
  insn-opinit.o insn-recog.o insn-extract.o insn-output.o insn-emit.o \
  profile.o insn-attrtab.o $(out_object_file) getpwd.o convert.o $(EXTRA_OBJS)
 
@@ -1326,7 +1329,9 @@ reorg.o : reorg.c $(CONFIG_H) $(RTL_H) conditions.h hard-reg-set.h \
    flags.h output.h
 alias.o : alias.c $(CONFIG_H) $(RTL_H) flags.h hard-reg-set.h regs.h \
    insn-codes.h
-sched.o : $(SCHED_PREFIX)sched.c $(CONFIG_H) $(RTL_H) $(BASIC_BLOCK_H) regs.h hard-reg-set.h \
+regmove.o : regmove.c $(CONFIG_H) $(RTL_H) insn-config.h recog.h output.h \
+  reload.h regs.h hard-reg-set.h flags.h expr.h insn-flags.h
+$(SCHED_PREFIX)sched.o : $(SCHED_PREFIX)sched.c $(CONFIG_H) $(RTL_H) $(BASIC_BLOCK_H) regs.h hard-reg-set.h \
    flags.h insn-config.h insn-attr.h
 final.o : final.c $(CONFIG_H) $(RTL_H) $(TREE_H) flags.h regs.h \
    recog.h conditions.h insn-config.h insn-attr.h except.h real.h output.h \
diff --git a/gcc/configure b/gcc/configure
index 64999d034ab..74dd86eeead 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -4361,6 +4361,26 @@ if [ ! -f Makefile.in ]; then
 	echo "source ${srcdir}/.gdbinit" >> .gdbinit
 fi
 
+# Override SCHED_OBJ and SCHED_CFLAGS to enable the Haifa scheduler.
+sched_prefix=
+sched_cflags=
+if [[ x$enable_haifa = xyes ]]; then
+    echo "Using the Haifa scheduler."
+    sched_prefix=haifa-
+    sched_cflags=-DHAIFA
+fi
+
+
+if [[ x$enable_haifa != x ]]; then
+    # Explicitly remove files that need to be recompiled for the Haifa scheduler.
+    for x in genattrtab.o toplev.o loop.o unroll.o *sched.o; do
+	if [ -f $x ]; then
+	    echo "Removing $x"
+	    rm -f $x
+	fi
+    done
+fi
+
 # Process the language and host/target makefile fragments.
 ${CONFIG_SHELL-/bin/sh} $srcdir/configure.frag $srcdir "$subdirs" "$dep_host_xmake_file" "$dep_tmake_file"
 
@@ -4602,6 +4622,8 @@ s%@CC@%$CC%g
 s%@SET_MAKE@%$SET_MAKE%g
 s%@CPP@%$CPP%g
 s%@manext@%$manext%g
+s%@sched_prefix@%$sched_prefix%g
+s%@sched_cflags@%$sched_cflags%g
 s%@objext@%$objext%g
 s%@subdirs@%$subdirs%g
 s%@all_languages@%$all_languages%g
diff --git a/gcc/configure.in b/gcc/configure.in
index da6bdb630ea..17ac2f41701 100644
--- a/gcc/configure.in
+++ b/gcc/configure.in
@@ -3027,6 +3027,26 @@ if [[ ! -f Makefile.in ]]; then
 	echo "source ${srcdir}/.gdbinit" >> .gdbinit
 fi
 
+# Override SCHED_OBJ and SCHED_CFLAGS to enable the Haifa scheduler.
+sched_prefix=
+sched_cflags=
+if [[ x$enable_haifa = xyes ]]; then
+    echo "Using the Haifa scheduler."
+    sched_prefix=haifa-
+    sched_cflags=-DHAIFA
+fi
+AC_SUBST(sched_prefix)
+AC_SUBST(sched_cflags)
+if [[ x$enable_haifa != x ]]; then
+    # Explicitly remove files that need to be recompiled for the Haifa scheduler.
+    for x in genattrtab.o toplev.o loop.o unroll.o *sched.o; do
+	if [ -f $x ]; then
+	    echo "Removing $x"
+	    rm -f $x
+	fi
+    done
+fi
+
 # Process the language and host/target makefile fragments.
 ${CONFIG_SHELL-/bin/sh} $srcdir/configure.frag $srcdir "$subdirs" "$dep_host_xmake_file" "$dep_tmake_file"
 
diff --git a/gcc/flags.h b/gcc/flags.h
index b5c6d75446b..58f5bc0af1c 100644
--- a/gcc/flags.h
+++ b/gcc/flags.h
@@ -304,6 +304,34 @@ extern int flag_shared_data;
 extern int flag_schedule_insns;
 extern int flag_schedule_insns_after_reload;
 
+#ifdef HAIFA
+/* The following flags have effect only for scheduling before register
+   allocation:
+
+   flag_schedule_interblock means schedule insns accross basic blocks.
+   flag_schedule_speculative means allow speculative motion of non-load insns.
+   flag_schedule_speculative_load means allow speculative motion of some
+   load insns.
+   flag_schedule_speculative_load_dangerous allows speculative motion of more
+   load insns.
+   flag_schedule_reverse_before_reload means try to reverse original order
+   of insns (S).
+   flag_schedule_reverse_after_reload means try to reverse original order
+   of insns (R).  */
+
+extern int flag_schedule_interblock;
+extern int flag_schedule_speculative;
+extern int flag_schedule_speculative_load;
+extern int flag_schedule_speculative_load_dangerous;
+extern int flag_schedule_reverse_before_reload;
+extern int flag_schedule_reverse_after_reload;
+
+/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple
+   by a cheaper branch, on a count register. */
+extern int flag_branch_on_count_reg;
+#endif  /* HAIFA */
+
+
 /* Nonzero means put things in delayed-branch slots if supported. */
 
 extern int flag_delayed_branch;
diff --git a/gcc/flow.c b/gcc/flow.c
index 44f1748e6e1..3ca1dcf256f 100644
--- a/gcc/flow.c
+++ b/gcc/flow.c
@@ -268,7 +268,6 @@ static void find_auto_inc		PROTO((regset, rtx, rtx));
 static void mark_used_regs		PROTO((regset, regset, rtx, int, rtx));
 static int try_pre_increment_1		PROTO((rtx));
 static int try_pre_increment		PROTO((rtx, rtx, HOST_WIDE_INT));
-static rtx find_use_as_address		PROTO((rtx, rtx, HOST_WIDE_INT));
 void dump_flow_info			PROTO((FILE *));
 
 /* Find basic blocks of the current function and perform data flow analysis.
@@ -2795,7 +2794,7 @@ try_pre_increment (insn, reg, amount)
    If REG appears more than once, or is used other than in such an address,
    return (rtx)1.  */
 
-static rtx
+rtx
 find_use_as_address (x, reg, plusconst)
      register rtx x;
      rtx reg;
diff --git a/gcc/genattrtab.c b/gcc/genattrtab.c
index 14ecac1f1e5..bde3f5199a2 100644
--- a/gcc/genattrtab.c
+++ b/gcc/genattrtab.c
@@ -2003,6 +2003,9 @@ expand_units ()
 
 	  for (op = unit->ops; op; op = op->next)
 	    {
+#ifdef HAIFA
+	      rtx blockage = op->issue_exp;
+#else
 	      rtx blockage = operate_exp (POS_MINUS_OP, readycost,
 					  make_numeric_value (1));
 
@@ -2018,6 +2021,7 @@ expand_units ()
 				      blockage);
 
 	      blockage = operate_exp (MAX_OP, blockage, op->issue_exp);
+#endif
 	      blockage = simplify_knowing (blockage, unit->condexp);
 
 	      /* Add this op's contribution to MAX (BLOCKAGE (E,*)) and
diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
new file mode 100644
index 00000000000..ced081416a6
--- /dev/null
+++ b/gcc/haifa-sched.c
@@ -0,0 +1,8713 @@
+/* Instruction scheduling pass.
+   Copyright (C) 1992, 1993, 1994, 1995, 1997 Free Software Foundation, Inc.
+   Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
+   and currently maintained by, Jim Wilson (wilson@cygnus.com)
+
+   This file is part of GNU CC.
+
+   GNU CC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   GNU CC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GNU CC; see the file COPYING.  If not, write to the Free
+   the Free Software Foundation, 59 Temple Place - Suite 330,
+   Boston, MA 02111-1307, USA.  */
+
+
+/* Instruction scheduling pass.
+
+   This pass implements list scheduling within basic blocks.  It is
+   run twice: (1) after flow analysis, but before register allocation,
+   and (2) after register allocation.
+
+   The first run performs interblock scheduling, moving insns between
+   different blocks in the same "region", and the second runs only
+   basic block scheduling.
+
+   Interblock motions performed are useful motions and speculative
+   motions, including speculative loads.  Motions requiring code
+   duplication are not supported.  The identification of motion type
+   and the check for validity of speculative motions requires
+   construction and analysis of the function's control flow graph.
+   The scheduler works as follows:
+
+   We compute insn priorities based on data dependencies.  Flow
+   analysis only creates a fraction of the data-dependencies we must
+   observe: namely, only those dependencies which the combiner can be
+   expected to use.  For this pass, we must therefore create the
+   remaining dependencies we need to observe: register dependencies,
+   memory dependencies, dependencies to keep function calls in order,
+   and the dependence between a conditional branch and the setting of
+   condition codes are all dealt with here.
+
+   The scheduler first traverses the data flow graph, starting with
+   the last instruction, and proceeding to the first, assigning values
+   to insn_priority as it goes.  This sorts the instructions
+   topologically by data dependence.
+
+   Once priorities have been established, we order the insns using
+   list scheduling.  This works as follows: starting with a list of
+   all the ready insns, and sorted according to priority number, we
+   schedule the insn from the end of the list by placing its
+   predecessors in the list according to their priority order.  We
+   consider this insn scheduled by setting the pointer to the "end" of
+   the list to point to the previous insn.  When an insn has no
+   predecessors, we either queue it until sufficient time has elapsed
+   or add it to the ready list.  As the instructions are scheduled or
+   when stalls are introduced, the queue advances and dumps insns into
+   the ready list.  When all insns down to the lowest priority have
+   been scheduled, the critical path of the basic block has been made
+   as short as possible.  The remaining insns are then scheduled in
+   remaining slots.
+
+   Function unit conflicts are resolved during forward list scheduling
+   by tracking the time when each insn is committed to the schedule
+   and from that, the time the function units it uses must be free.
+   As insns on the ready list are considered for scheduling, those
+   that would result in a blockage of the already committed insns are
+   queued until no blockage will result.
+
+   The following list shows the order in which we want to break ties
+   among insns in the ready list:
+
+   1.  choose insn with the longest path to end of bb, ties
+   broken by
+   2.  choose insn with least contribution to register pressure,
+   ties broken by
+   3.  prefer in-block upon interblock motion, ties broken by
+   4.  prefer useful upon speculative motion, ties broken by
+   5.  choose insn with largest control flow probability, ties
+   broken by
+   6.  choose insn with the least dependences upon the previously
+   scheduled insn, or finally
+   7.  choose insn with lowest UID.
+
+   Memory references complicate matters.  Only if we can be certain
+   that memory references are not part of the data dependency graph
+   (via true, anti, or output dependence), can we move operations past
+   memory references.  To first approximation, reads can be done
+   independently, while writes introduce dependencies.  Better
+   approximations will yield fewer dependencies.
+
+   Before reload, an extended analysis of interblock data dependences
+   is required for interblock scheduling.  This is performed in
+   compute_block_backward_dependences ().
+
+   Dependencies set up by memory references are treated in exactly the
+   same way as other dependencies, by using LOG_LINKS backward
+   dependences.  LOG_LINKS are translated into INSN_DEPEND forward
+   dependences for the purpose of forward list scheduling.
+
+   Having optimized the critical path, we may have also unduly
+   extended the lifetimes of some registers.  If an operation requires
+   that constants be loaded into registers, it is certainly desirable
+   to load those constants as early as necessary, but no earlier.
+   I.e., it will not do to load up a bunch of registers at the
+   beginning of a basic block only to use them at the end, if they
+   could be loaded later, since this may result in excessive register
+   utilization.
+
+   Note that since branches are never in basic blocks, but only end
+   basic blocks, this pass will not move branches.  But that is ok,
+   since we can use GNU's delayed branch scheduling pass to take care
+   of this case.
+
+   Also note that no further optimizations based on algebraic
+   identities are performed, so this pass would be a good one to
+   perform instruction splitting, such as breaking up a multiply
+   instruction into shifts and adds where that is profitable.
+
+   Given the memory aliasing analysis that this pass should perform,
+   it should be possible to remove redundant stores to memory, and to
+   load values from registers instead of hitting memory.
+
+   Before reload, speculative insns are moved only if a 'proof' exists
+   that no exception will be caused by this, and if no live registers
+   exist that inhibit the motion (live registers constraints are not
+   represented by data dependence edges).
+
+   This pass must update information that subsequent passes expect to
+   be correct.  Namely: reg_n_refs, reg_n_sets, reg_n_deaths,
+   reg_n_calls_crossed, and reg_live_length.  Also, basic_block_head,
+   basic_block_end.
+
+   The information in the line number notes is carefully retained by
+   this pass.  Notes that refer to the starting and ending of
+   exception regions are also carefully retained by this pass.  All
+   other NOTE insns are grouped in their same relative order at the
+   beginning of basic blocks and regions that have been scheduled.
+
+   The main entry point for this pass is schedule_insns(), called for
+   each function.  The work of the scheduler is organized in three
+   levels: (1) function level: insns are subject to splitting,
+   control-flow-graph is constructed, regions are computed (after
+   reload, each region is of one block), (2) region level: control
+   flow graph attributes required for interblock scheduling are
+   computed (dominators, reachability, etc.), data dependences and
+   priorities are computed, and (3) block level: insns in the block
+   are actually scheduled.  */
+
+#include <stdio.h>
+#include "config.h"
+#include "rtl.h"
+#include "basic-block.h"
+#include "regs.h"
+#include "hard-reg-set.h"
+#include "flags.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "except.h"
+
+extern char *reg_known_equiv_p;
+extern rtx *reg_known_value;
+
+#ifdef INSN_SCHEDULING
+
+/* enable interblock scheduling code */
+
+/* define INTERBLOCK_DEBUG for using the -fsched-max debugging facility */
+/* #define INTERBLOCK_DEBUG */
+
+/* target_units bitmask has 1 for each unit in the cpu.  It should be
+   possible to compute this variable from the machine description.
+   But currently it is computed by examinning the insn list.  Since
+   this is only needed for visualization, it seems an acceptable
+   solution.  (For understanding the mapping of bits to units, see
+   definition of function_units[] in "insn-attrtab.c") */
+
+int target_units = 0;
+
+/* issue_rate is the number of insns that can be scheduled in the same
+   machine cycle.  It can be defined in the config/mach/mach.h file,
+   otherwise we set it to 1.  */
+
+static int issue_rate;
+
+#ifndef MACHINE_issue_rate
+#define get_issue_rate() (1)
+#endif
+
+/* sched_debug_count is used for debugging the scheduler by limiting
+   the number of scheduled insns.  It is controlled by the option
+   -fsched-max-N (N is a number).
+
+   sched-verbose controls the amount of debugging output the
+   scheduler prints.  It is controlled by -fsched-verbose-N:
+   N>0 and no -DSR : the output is directed to stderr.
+   N>=10 will direct the printouts to stderr (regardless of -dSR).
+   N=1: same as -dSR.
+   N=2: bb's probabilities, detailed ready list info, unit/insn info.
+   N=3: rtl at abort point, control-flow, regions info.
+   N=5: dependences info.
+
+   max_rgn_blocks and max_region_insns limit region size for
+   interblock scheduling.  They are controlled by
+   -fsched-interblock-max-blocks-N, -fsched-interblock-max-insns-N */
+
+#define MAX_RGN_BLOCKS 10
+#define MAX_RGN_INSNS 100
+
+static int sched_debug_count = -1;
+static int sched_verbose_param = 0;
+static int sched_verbose = 0;
+static int max_rgn_blocks = MAX_RGN_BLOCKS;
+static int max_rgn_insns = MAX_RGN_INSNS;
+
+/* nr_inter/spec counts interblock/speculative motion for the function */
+static int nr_inter, nr_spec;
+
+
+/* debugging file. all printouts are sent to dump, which is always set,
+   either to stderr, or to the dump listing file (-dRS).  */
+static FILE *dump = 0;
+
+/* fix_sched_param() is called from toplev.c upon detection
+   of the -fsched-***-N options.  */
+
+void
+fix_sched_param (param, val)
+     char *param, *val;
+{
+  if (!strcmp (param, "max"))
+    sched_debug_count = ((sched_debug_count == -1) ?
+			 atoi (val) : sched_debug_count);
+  else if (!strcmp (param, "verbose"))
+    sched_verbose_param = atoi (val);
+  else if (!strcmp (param, "interblock-max-blocks"))
+    max_rgn_blocks = atoi (val);
+  else if (!strcmp (param, "interblock-max-insns"))
+    max_rgn_insns = atoi (val);
+  else
+    warning ("fix_sched_param: unknown param: %s", param);
+}
+
+
+/* Arrays set up by scheduling for the same respective purposes as
+   similar-named arrays set up by flow analysis.  We work with these
+   arrays during the scheduling pass so we can compare values against
+   unscheduled code.
+
+   Values of these arrays are copied at the end of this pass into the
+   arrays set up by flow analysis.  */
+static int *sched_reg_n_calls_crossed;
+static int *sched_reg_live_length;
+static int *sched_reg_basic_block;
+
+/* We need to know the current block number during the post scheduling
+   update of live register information so that we can also update
+   REG_BASIC_BLOCK if a register changes blocks.  */
+static int current_block_num;
+
+/* Element N is the next insn that sets (hard or pseudo) register
+   N within the current basic block; or zero, if there is no
+   such insn.  Needed for new registers which may be introduced
+   by splitting insns.  */
+static rtx *reg_last_uses;
+static rtx *reg_last_sets;
+static regset reg_pending_sets;
+static int reg_pending_sets_all;
+
+/* Vector indexed by INSN_UID giving the original ordering of the insns.  */
+static int *insn_luid;
+#define INSN_LUID(INSN) (insn_luid[INSN_UID (INSN)])
+
+/* Vector indexed by INSN_UID giving each instruction a priority.  */
+static int *insn_priority;
+#define INSN_PRIORITY(INSN) (insn_priority[INSN_UID (INSN)])
+
+static short *insn_costs;
+#define INSN_COST(INSN)	insn_costs[INSN_UID (INSN)]
+
+/* Vector indexed by INSN_UID giving an encoding of the function units
+   used.  */
+static short *insn_units;
+#define INSN_UNIT(INSN)	insn_units[INSN_UID (INSN)]
+
+/* Vector indexed by INSN_UID giving each instruction a register-weight.
+   This weight is an estimation of the insn contribution to registers pressure.  */
+static int *insn_reg_weight;
+#define INSN_REG_WEIGHT(INSN) (insn_reg_weight[INSN_UID (INSN)])
+
+/* Vector indexed by INSN_UID giving list of insns which
+   depend upon INSN.  Unlike LOG_LINKS, it represents forward dependences.  */
+static rtx *insn_depend;
+#define INSN_DEPEND(INSN) insn_depend[INSN_UID (INSN)]
+
+/* Vector indexed by INSN_UID. Initialized to the number of incoming
+   edges in forward dependence graph (= number of LOG_LINKS).  As
+   scheduling procedes, dependence counts are decreased.  An
+   instruction moves to the ready list when its counter is zero.  */
+static int *insn_dep_count;
+#define INSN_DEP_COUNT(INSN) (insn_dep_count[INSN_UID (INSN)])
+
+/* Vector indexed by INSN_UID giving an encoding of the blockage range
+   function.  The unit and the range are encoded.  */
+static unsigned int *insn_blockage;
+#define INSN_BLOCKAGE(INSN) insn_blockage[INSN_UID (INSN)]
+#define UNIT_BITS 5
+#define BLOCKAGE_MASK ((1 << BLOCKAGE_BITS) - 1)
+#define ENCODE_BLOCKAGE(U, R)				\
+((((U) << UNIT_BITS) << BLOCKAGE_BITS			\
+  | MIN_BLOCKAGE_COST (R)) << BLOCKAGE_BITS		\
+  | MAX_BLOCKAGE_COST (R))
+#define UNIT_BLOCKED(B) ((B) >> (2 * BLOCKAGE_BITS))
+#define BLOCKAGE_RANGE(B)                                                \
+  (((((B) >> BLOCKAGE_BITS) & BLOCKAGE_MASK) << (HOST_BITS_PER_INT / 2)) \
+   | (B) & BLOCKAGE_MASK)
+
+/* Encodings of the `<name>_unit_blockage_range' function.  */
+#define MIN_BLOCKAGE_COST(R) ((R) >> (HOST_BITS_PER_INT / 2))
+#define MAX_BLOCKAGE_COST(R) ((R) & ((1 << (HOST_BITS_PER_INT / 2)) - 1))
+
+#define DONE_PRIORITY	-1
+#define MAX_PRIORITY	0x7fffffff
+#define TAIL_PRIORITY	0x7ffffffe
+#define LAUNCH_PRIORITY	0x7f000001
+#define DONE_PRIORITY_P(INSN) (INSN_PRIORITY (INSN) < 0)
+#define LOW_PRIORITY_P(INSN) ((INSN_PRIORITY (INSN) & 0x7f000000) == 0)
+
+/* Vector indexed by INSN_UID giving number of insns referring to this insn.  */
+static int *insn_ref_count;
+#define INSN_REF_COUNT(INSN) (insn_ref_count[INSN_UID (INSN)])
+
+/* Vector indexed by INSN_UID giving line-number note in effect for each
+   insn.  For line-number notes, this indicates whether the note may be
+   reused.  */
+static rtx *line_note;
+#define LINE_NOTE(INSN) (line_note[INSN_UID (INSN)])
+
+/* Vector indexed by basic block number giving the starting line-number
+   for each basic block.  */
+static rtx *line_note_head;
+
+/* List of important notes we must keep around.  This is a pointer to the
+   last element in the list.  */
+static rtx note_list;
+
+/* Regsets telling whether a given register is live or dead before the last
+   scheduled insn.  Must scan the instructions once before scheduling to
+   determine what registers are live or dead at the end of the block.  */
+static regset bb_live_regs;
+
+/* Regset telling whether a given register is live after the insn currently
+   being scheduled.  Before processing an insn, this is equal to bb_live_regs
+   above.  This is used so that we can find registers that are newly born/dead
+   after processing an insn.  */
+static regset old_live_regs;
+
+/* The chain of REG_DEAD notes.  REG_DEAD notes are removed from all insns
+   during the initial scan and reused later.  If there are not exactly as
+   many REG_DEAD notes in the post scheduled code as there were in the
+   prescheduled code then we trigger an abort because this indicates a bug.  */
+static rtx dead_notes;
+
+/* Queues, etc.  */
+
+/* An instruction is ready to be scheduled when all insns preceding it
+   have already been scheduled.  It is important to ensure that all
+   insns which use its result will not be executed until its result
+   has been computed.  An insn is maintained in one of four structures:
+
+   (P) the "Pending" set of insns which cannot be scheduled until
+   their dependencies have been satisfied.
+   (Q) the "Queued" set of insns that can be scheduled when sufficient
+   time has passed.
+   (R) the "Ready" list of unscheduled, uncommitted insns.
+   (S) the "Scheduled" list of insns.
+
+   Initially, all insns are either "Pending" or "Ready" depending on
+   whether their dependencies are satisfied.
+
+   Insns move from the "Ready" list to the "Scheduled" list as they
+   are committed to the schedule.  As this occurs, the insns in the
+   "Pending" list have their dependencies satisfied and move to either
+   the "Ready" list or the "Queued" set depending on whether
+   sufficient time has passed to make them ready.  As time passes,
+   insns move from the "Queued" set to the "Ready" list.  Insns may
+   move from the "Ready" list to the "Queued" set if they are blocked
+   due to a function unit conflict.
+
+   The "Pending" list (P) are the insns in the INSN_DEPEND of the unscheduled
+   insns, i.e., those that are ready, queued, and pending.
+   The "Queued" set (Q) is implemented by the variable `insn_queue'.
+   The "Ready" list (R) is implemented by the variables `ready' and
+   `n_ready'.
+   The "Scheduled" list (S) is the new insn chain built by this pass.
+
+   The transition (R->S) is implemented in the scheduling loop in
+   `schedule_block' when the best insn to schedule is chosen.
+   The transition (R->Q) is implemented in `queue_insn' when an
+   insn is found to to have a function unit conflict with the already
+   committed insns.
+   The transitions (P->R and P->Q) are implemented in `schedule_insn' as
+   insns move from the ready list to the scheduled list.
+   The transition (Q->R) is implemented in 'queue_to_insn' as time
+   passes or stalls are introduced.  */
+
+/* Implement a circular buffer to delay instructions until sufficient
+   time has passed.  INSN_QUEUE_SIZE is a power of two larger than
+   MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c.  This is the
+   longest time an isnsn may be queued.  */
+static rtx insn_queue[INSN_QUEUE_SIZE];
+static int q_ptr = 0;
+static int q_size = 0;
+#define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1))
+#define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1))
+
+/* Vector indexed by INSN_UID giving the minimum clock tick at which
+   the insn becomes ready.  This is used to note timing constraints for
+   insns in the pending list.  */
+static int *insn_tick;
+#define INSN_TICK(INSN) (insn_tick[INSN_UID (INSN)])
+
+/* Data structure for keeping track of register information
+   during that register's life.  */
+
+struct sometimes
+  {
+    int regno;
+    int live_length;
+    int calls_crossed;
+  };
+
+/* Forward declarations.  */
+static void add_dependence PROTO ((rtx, rtx, enum reg_note));
+static void remove_dependence PROTO ((rtx, rtx));
+static rtx find_insn_list PROTO ((rtx, rtx));
+static int insn_unit PROTO ((rtx));
+static unsigned int blockage_range PROTO ((int, rtx));
+static void clear_units PROTO ((void));
+static int actual_hazard_this_instance PROTO ((int, int, rtx, int, int));
+static void schedule_unit PROTO ((int, rtx, int));
+static int actual_hazard PROTO ((int, rtx, int, int));
+static int potential_hazard PROTO ((int, rtx, int));
+static int insn_cost PROTO ((rtx, rtx, rtx));
+static int priority PROTO ((rtx));
+static void free_pending_lists PROTO ((void));
+static void add_insn_mem_dependence PROTO ((rtx *, rtx *, rtx, rtx));
+static void flush_pending_lists PROTO ((rtx, int));
+static void sched_analyze_1 PROTO ((rtx, rtx));
+static void sched_analyze_2 PROTO ((rtx, rtx));
+static void sched_analyze_insn PROTO ((rtx, rtx, rtx));
+static void sched_analyze PROTO ((rtx, rtx));
+static void sched_note_set PROTO ((int, rtx, int));
+static int rank_for_schedule PROTO ((rtx *, rtx *));
+static void swap_sort PROTO ((rtx *, int));
+static void queue_insn PROTO ((rtx, int));
+static int schedule_insn PROTO ((rtx, rtx *, int, int));
+static void create_reg_dead_note PROTO ((rtx, rtx));
+static void attach_deaths PROTO ((rtx, rtx, int));
+static void attach_deaths_insn PROTO ((rtx));
+static int new_sometimes_live PROTO ((struct sometimes *, int, int));
+static void finish_sometimes_live PROTO ((struct sometimes *, int));
+static int schedule_block PROTO ((int, int, int));
+static rtx regno_use_in PROTO ((int, rtx));
+static void split_hard_reg_notes PROTO ((rtx, rtx, rtx, rtx));
+static void new_insn_dead_notes PROTO ((rtx, rtx, rtx, rtx));
+static void update_n_sets PROTO ((rtx, int));
+static void update_flow_info PROTO ((rtx, rtx, rtx, rtx));
+
+/* Main entry point of this file.  */
+void schedule_insns PROTO ((FILE *));
+
+/* Mapping of insns to their original block prior to scheduling.  */
+static int *insn_orig_block;
+#define INSN_BLOCK(insn) (insn_orig_block[INSN_UID (insn)])
+
+/* Some insns (e.g. call) are not allowed to move across blocks.  */
+static char *cant_move;
+#define CANT_MOVE(insn) (cant_move[INSN_UID (insn)])
+
+/* Control flow graph edges are kept in circular lists.  */
+typedef struct
+  {
+    int from_block;
+    int to_block;
+    int next_in;
+    int next_out;
+  }
+edge;
+static edge *edge_table;
+
+#define NEXT_IN(edge) (edge_table[edge].next_in)
+#define NEXT_OUT(edge) (edge_table[edge].next_out)
+#define FROM_BLOCK(edge) (edge_table[edge].from_block)
+#define TO_BLOCK(edge) (edge_table[edge].to_block)
+
+/* Number of edges in the control flow graph.  (in fact larger than
+   that by 1, since edge 0 is unused.) */
+static int nr_edges;
+
+/* Circular list of incoming/outgoing edges of a block */
+static int *in_edges;
+static int *out_edges;
+
+#define IN_EDGES(block) (in_edges[block])
+#define OUT_EDGES(block) (out_edges[block])
+
+/* List of labels which cannot be deleted, needed for control
+   flow graph construction.  */
+extern rtx forced_labels;
+
+
+static char is_cfg_nonregular PROTO ((void));
+static int uses_reg_or_mem PROTO ((rtx));
+void debug_control_flow PROTO ((void));
+static void build_control_flow PROTO ((void));
+static void build_jmp_edges PROTO ((rtx, int));
+static void new_edge PROTO ((int, int));
+
+
+/* A region is the main entity for interblock scheduling: insns
+   are allowed to move between blocks in the same region, along
+   control flow graph edges, in the 'up' direction.  */
+typedef struct
+  {
+    int rgn_nr_blocks;		/* number of blocks in region */
+    int rgn_blocks;		/* blocks in the region (actually index in rgn_bb_table) */
+  }
+region;
+
+/* Number of regions in the procedure */
+static int nr_regions;
+
+/* Table of region descriptions */
+static region *rgn_table;
+
+/* Array of lists of regions' blocks */
+static int *rgn_bb_table;
+
+/* Topological order of blocks in the region (if b2 is reachable from
+   b1, block_to_bb[b2] > block_to_bb[b1]).
+   Note: A basic block is always referred to by either block or b,
+   while its topological order name (in the region) is refered to by
+   bb.
+ */
+static int *block_to_bb;
+
+/* The number of the region containing a block.  */
+static int *containing_rgn;
+
+#define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks)
+#define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks)
+#define BLOCK_TO_BB(block) (block_to_bb[block])
+#define CONTAINING_RGN(block) (containing_rgn[block])
+
+void debug_regions PROTO ((void));
+static void find_single_block_region PROTO ((void));
+static void find_rgns PROTO ((void));
+static int too_large PROTO ((int, int *, int *));
+
+extern void debug_live PROTO ((int, int));
+
+/* Blocks of the current region being scheduled.  */
+static int current_nr_blocks;
+static int current_blocks;
+
+/* The mapping from bb to block */
+#define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)])
+
+
+/* Bit vectors and bitset operations are needed for computations on
+   the control flow graph.  */
+
+typedef unsigned HOST_WIDE_INT *bitset;
+typedef struct
+  {
+    int *first_member;		/* pointer to the list start in bitlst_table.  */
+    int nr_members;		/* the number of members of the bit list.     */
+  }
+bitlst;
+
+int bitlst_table_last;
+int bitlst_table_size;
+static int *bitlst_table;
+
+static char bitset_member PROTO ((bitset, int, int));
+static void extract_bitlst PROTO ((bitset, int, bitlst *));
+
+/* target info declarations.
+
+   The block currently being scheduled is referred to as the "target" block,
+   while other blocks in the region from which insns can be moved to the
+   target are called "source" blocks.  The candidate structure holds info
+   about such sources: are they valid?  Speculative?  Etc.  */
+typedef bitlst bblst;
+typedef struct
+  {
+    char is_valid;
+    char is_speculative;
+    int src_prob;
+    bblst split_bbs;
+    bblst update_bbs;
+  }
+candidate;
+
+static candidate *candidate_table;
+
+/* A speculative motion requires checking live information on the path
+   from 'source' to 'target'.  The split blocks are those to be checked.
+   After a speculative motion, live information should be modified in
+   the 'update' blocks.
+
+   Lists of split and update blocks for  each candidate of the current
+   target  are  in  array bblst_table */
+int *bblst_table, bblst_size, bblst_last;
+
+#define IS_VALID(src) ( candidate_table[src].is_valid )
+#define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative )
+#define SRC_PROB(src) ( candidate_table[src].src_prob )
+
+/* The bb being currently scheduled.  */
+int target_bb;
+
+/* List of edges.  */
+typedef bitlst edgelst;
+
+/* target info functions */
+static void split_edges PROTO ((int, int, edgelst *));
+static void compute_trg_info PROTO ((int));
+void debug_candidate PROTO ((int));
+void debug_candidates PROTO ((int));
+
+
+/* Bit-set of bbs, where bit 'i' stands for bb 'i'.  */
+typedef bitset bbset;
+
+/* Number of words of the bbset.  */
+int bbset_size;
+
+/* Dominators array: dom[i] contains the bbset of dominators of
+   bb i in the region.  */
+bbset *dom;
+
+/* bb 0 is the only region entry */
+#define IS_RGN_ENTRY(bb) (!bb)
+
+/* Is bb_src dominated by bb_trg.  */
+#define IS_DOMINATED(bb_src, bb_trg)                                 \
+( bitset_member (dom[bb_src], bb_trg, bbset_size) )
+
+/* Probability: Prob[i] is a float in [0, 1] which is the probability
+   of bb i relative to the region entry.  */
+float *prob;
+
+/*  The probability of bb_src, relative to bb_trg.  Note, that while the
+   'prob[bb]' is a float in [0, 1], this macro returns an integer
+   in [0, 100].  */
+#define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \
+						      prob[bb_trg])))
+
+/* Bit-set of edges, where bit i stands for edge i.  */
+typedef bitset edgeset;
+
+/* Number of edges in the region.  */
+int rgn_nr_edges;
+
+/* Array of size rgn_nr_edges.    */
+int *rgn_edges;
+
+/* Number of words in an edgeset.    */
+int edgeset_size;
+
+/* Mapping from each edge in the graph to its number in the rgn.  */
+int *edge_to_bit;
+#define EDGE_TO_BIT(edge) (edge_to_bit[edge])
+
+/* The split edges of a source bb is different for each target
+   bb.  In order to compute this efficiently, the 'potential-split edges'
+   are computed for each bb prior to scheduling a region.  This is actually
+   the split edges of each bb relative to the region entry.
+
+   pot_split[bb] is the set of potential split edges of bb.  */
+edgeset *pot_split;
+
+/* For every bb, a set of its ancestor edges.  */
+edgeset *ancestor_edges;
+
+static void compute_dom_prob_ps PROTO ((int));
+
+#define ABS_VALUE(x) (((x)<0)?(-(x)):(x))
+#define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (INSN_BLOCK (INSN))))
+#define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (INSN_BLOCK (INSN))))
+#define INSN_BB(INSN) (BLOCK_TO_BB (INSN_BLOCK (INSN)))
+
+/* parameters affecting the decision of rank_for_schedule() */
+#define MIN_DIFF_PRIORITY 2
+#define MIN_PROBABILITY 40
+#define MIN_PROB_DIFF 10
+
+/* speculative scheduling functions */
+static int check_live_1 PROTO ((int, rtx));
+static void update_live_1 PROTO ((int, rtx));
+static int check_live PROTO ((rtx, int, int));
+static void update_live PROTO ((rtx, int, int));
+static void set_spec_fed PROTO ((rtx));
+static int is_pfree PROTO ((rtx, int, int));
+static int find_conditional_protection PROTO ((rtx, int));
+static int is_conditionally_protected PROTO ((rtx, int, int));
+static int may_trap_exp PROTO ((rtx, int));
+static int classify_insn PROTO ((rtx));
+static int is_exception_free PROTO ((rtx, int, int));
+
+static char find_insn_mem_list PROTO ((rtx, rtx, rtx, rtx));
+static void compute_block_forward_dependences PROTO ((int));
+static void init_rgn_data_dependences PROTO ((int));
+static void add_branch_dependences PROTO ((rtx, rtx));
+static void compute_block_backward_dependences PROTO ((int));
+void debug_dependencies PROTO ((void));
+
+/* Notes handling mechanism:
+   =========================
+   Generally, NOTES are saved before scheduling and restored after scheduling.
+   The scheduler distinguishes between three types of notes:
+
+   (1) LINE_NUMBER notes, generated and used for debugging.  Here,
+   before scheduling a region, a pointer to the LINE_NUMBER note is
+   added to the insn following it (in save_line_notes()), and the note
+   is removed (in rm_line_notes() and unlink_line_notes()).  After
+   scheduling the region, this pointer is used for regeneration of
+   the LINE_NUMBER note (in restore_line_notes()).
+
+   (2) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes:
+   Before scheduling a region, a pointer to the note is added to the insn
+   that follows or precedes it.  (This happens as part of the data dependence
+   computation).  After scheduling an insn, the pointer contained in it is
+   used for regenerating the corresponding note (in reemit_notes).
+
+   (3) All other notes (e.g. INSN_DELETED):  Before scheduling a block,
+   these notes are put in a list (in rm_other_notes() and
+   unlink_other_notes ()).  After scheduling the block, these notes are
+   inserted at the beginning of the block (in schedule_block()).  */
+
+static rtx unlink_other_notes PROTO ((rtx, rtx));
+static rtx unlink_line_notes PROTO ((rtx, rtx));
+static void rm_line_notes PROTO ((int));
+static void save_line_notes PROTO ((int));
+static void restore_line_notes PROTO ((int));
+static void rm_redundant_line_notes PROTO ((void));
+static void rm_other_notes PROTO ((rtx, rtx));
+static rtx reemit_notes PROTO ((rtx, rtx));
+
+static void get_block_head_tail PROTO ((int, rtx *, rtx *));
+
+static void find_pre_sched_live PROTO ((int));
+static void find_post_sched_live PROTO ((int));
+static void update_reg_usage PROTO ((void));
+
+void debug_ready_list PROTO ((rtx[], int));
+static void init_target_units PROTO (());
+static void insn_print_units PROTO ((rtx));
+static int get_visual_tbl_length PROTO (());
+static void init_block_visualization PROTO (());
+static void print_block_visualization PROTO ((int, char *));
+static void visualize_scheduled_insns PROTO ((int, int));
+static void visualize_no_unit PROTO ((rtx));
+static void visualize_stall_cycles PROTO ((int, int));
+static void print_exp PROTO ((char *, rtx, int));
+static void print_value PROTO ((char *, rtx, int));
+static void print_pattern PROTO ((char *, rtx, int));
+static void print_insn PROTO ((char *, rtx, int));
+void debug_reg_vector PROTO ((regset));
+
+static rtx move_insn1 PROTO ((rtx, rtx));
+static rtx move_insn PROTO ((rtx, rtx));
+static rtx group_leader PROTO ((rtx));
+static int set_priorities PROTO ((int));
+static void init_rtx_vector PROTO ((rtx **, rtx *, int, int));
+static void schedule_region PROTO ((int));
+static void split_block_insns PROTO ((int));
+
+#endif /* INSN_SCHEDULING */
+
+#define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X)))
+
+/* Helper functions for instruction scheduling.  */
+
+/* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the
+   LOG_LINKS of INSN, if not already there.  DEP_TYPE indicates the type
+   of dependence that this link represents.  */
+
+static void
+add_dependence (insn, elem, dep_type)
+     rtx insn;
+     rtx elem;
+     enum reg_note dep_type;
+{
+  rtx link, next;
+
+  /* Don't depend an insn on itself.  */
+  if (insn == elem)
+    return;
+
+  /* If elem is part of a sequence that must be scheduled together, then
+     make the dependence point to the last insn of the sequence.
+     When HAVE_cc0, it is possible for NOTEs to exist between users and
+     setters of the condition codes, so we must skip past notes here.
+     Otherwise, NOTEs are impossible here.  */
+
+  next = NEXT_INSN (elem);
+
+#ifdef HAVE_cc0
+  while (next && GET_CODE (next) == NOTE)
+    next = NEXT_INSN (next);
+#endif
+
+  if (next && SCHED_GROUP_P (next)
+      && GET_CODE (next) != CODE_LABEL)
+    {
+      /* Notes will never intervene here though, so don't bother checking
+         for them.  */
+      /* We must reject CODE_LABELs, so that we don't get confused by one
+         that has LABEL_PRESERVE_P set, which is represented by the same
+         bit in the rtl as SCHED_GROUP_P.  A CODE_LABEL can never be
+         SCHED_GROUP_P.  */
+      while (NEXT_INSN (next) && SCHED_GROUP_P (NEXT_INSN (next))
+	     && GET_CODE (NEXT_INSN (next)) != CODE_LABEL)
+	next = NEXT_INSN (next);
+
+      /* Again, don't depend an insn on itself.  */
+      if (insn == next)
+	return;
+
+      /* Make the dependence to NEXT, the last insn of the group, instead
+         of the original ELEM.  */
+      elem = next;
+    }
+
+#ifdef INSN_SCHEDULING
+  /* (This code is guarded by INSN_SCHEDULING, otherwise INSN_BB is undefined.)
+     No need for interblock dependences with calls, since
+     calls are not moved between blocks.   Note: the edge where
+     elem is a CALL is still required.  */
+  if (GET_CODE (insn) == CALL_INSN
+      && (INSN_BB (elem) != INSN_BB (insn)))
+    return;
+
+#endif
+
+  /* Check that we don't already have this dependence.  */
+  for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
+    if (XEXP (link, 0) == elem)
+      {
+	/* If this is a more restrictive type of dependence than the existing
+	   one, then change the existing dependence to this type.  */
+	if ((int) dep_type < (int) REG_NOTE_KIND (link))
+	  PUT_REG_NOTE_KIND (link, dep_type);
+	return;
+      }
+  /* Might want to check one level of transitivity to save conses.  */
+
+  link = rtx_alloc (INSN_LIST);
+  /* Insn dependency, not data dependency.  */
+  PUT_REG_NOTE_KIND (link, dep_type);
+  XEXP (link, 0) = elem;
+  XEXP (link, 1) = LOG_LINKS (insn);
+  LOG_LINKS (insn) = link;
+}
+
+/* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS
+   of INSN.  Abort if not found.  */
+
+static void
+remove_dependence (insn, elem)
+     rtx insn;
+     rtx elem;
+{
+  rtx prev, link;
+  int found = 0;
+
+  for (prev = 0, link = LOG_LINKS (insn); link;
+       prev = link, link = XEXP (link, 1))
+    {
+      if (XEXP (link, 0) == elem)
+	{
+	  if (prev)
+	    XEXP (prev, 1) = XEXP (link, 1);
+	  else
+	    LOG_LINKS (insn) = XEXP (link, 1);
+	  found = 1;
+	}
+    }
+
+  if (!found)
+    abort ();
+  return;
+}
+
+#ifndef INSN_SCHEDULING
+void
+schedule_insns (dump_file)
+     FILE *dump_file;
+{
+}
+#else
+#ifndef __GNUC__
+#define __inline
+#endif
+
+/* Computation of memory dependencies.  */
+
+/* The *_insns and *_mems are paired lists.  Each pending memory operation
+   will have a pointer to the MEM rtx on one list and a pointer to the
+   containing insn on the other list in the same place in the list.  */
+
+/* We can't use add_dependence like the old code did, because a single insn
+   may have multiple memory accesses, and hence needs to be on the list
+   once for each memory access.  Add_dependence won't let you add an insn
+   to a list more than once.  */
+
+/* An INSN_LIST containing all insns with pending read operations.  */
+static rtx pending_read_insns;
+
+/* An EXPR_LIST containing all MEM rtx's which are pending reads.  */
+static rtx pending_read_mems;
+
+/* An INSN_LIST containing all insns with pending write operations.  */
+static rtx pending_write_insns;
+
+/* An EXPR_LIST containing all MEM rtx's which are pending writes.  */
+static rtx pending_write_mems;
+
+/* Indicates the combined length of the two pending lists.  We must prevent
+   these lists from ever growing too large since the number of dependencies
+   produced is at least O(N*N), and execution time is at least O(4*N*N), as
+   a function of the length of these pending lists.  */
+
+static int pending_lists_length;
+
+/* An INSN_LIST containing all INSN_LISTs allocated but currently unused.  */
+
+static rtx unused_insn_list;
+
+/* An EXPR_LIST containing all EXPR_LISTs allocated but currently unused.  */
+
+static rtx unused_expr_list;
+
+/* The last insn upon which all memory references must depend.
+   This is an insn which flushed the pending lists, creating a dependency
+   between it and all previously pending memory references.  This creates
+   a barrier (or a checkpoint) which no memory reference is allowed to cross.
+
+   This includes all non constant CALL_INSNs.  When we do interprocedural
+   alias analysis, this restriction can be relaxed.
+   This may also be an INSN that writes memory if the pending lists grow
+   too large.  */
+
+static rtx last_pending_memory_flush;
+
+/* The last function call we have seen.  All hard regs, and, of course,
+   the last function call, must depend on this.  */
+
+static rtx last_function_call;
+
+/* The LOG_LINKS field of this is a list of insns which use a pseudo register
+   that does not already cross a call.  We create dependencies between each
+   of those insn and the next call insn, to ensure that they won't cross a call
+   after scheduling is done.  */
+
+static rtx sched_before_next_call;
+
+/* Pointer to the last instruction scheduled.  Used by rank_for_schedule,
+   so that insns independent of the last scheduled insn will be preferred
+   over dependent instructions.  */
+
+static rtx last_scheduled_insn;
+
+/* Data structures for the computation of data dependences in a regions.  We
+   keep one copy of each of the declared above variables for each bb in the
+   region.  Before analyzing the data dependences for a bb, its variables
+   are initialized as a function of the variables of its predecessors.  When
+   the analysis for a bb completes, we save the contents of each variable X
+   to a corresponding bb_X[bb] variable.  For example, pending_read_insns is
+   copied to bb_pending_read_insns[bb].  Another change is that few
+   variables are now a list of insns rather than a single insn:
+   last_pending_memory_flash, last_function_call, reg_last_sets.  The
+   manipulation of these variables was changed appropriately.  */
+
+static rtx **bb_reg_last_uses;
+static rtx **bb_reg_last_sets;
+
+static rtx *bb_pending_read_insns;
+static rtx *bb_pending_read_mems;
+static rtx *bb_pending_write_insns;
+static rtx *bb_pending_write_mems;
+static int *bb_pending_lists_length;
+
+static rtx *bb_last_pending_memory_flush;
+static rtx *bb_last_function_call;
+static rtx *bb_sched_before_next_call;
+
+/* functions for construction of the control flow graph.  */
+
+/* Return 1 if control flow graph should not be constructed, 0 otherwise.
+   Estimate in nr_edges the number of edges on the graph.
+   We decide not to build the control flow graph if there is possibly more
+   than one entry to the function, or if computed branches exist.  */
+
+static char
+is_cfg_nonregular ()
+{
+  int b;
+  rtx insn;
+  RTX_CODE code;
+
+  rtx nonlocal_label_list = nonlocal_label_rtx_list ();
+
+  /* check for non local labels */
+  if (nonlocal_label_list)
+    {
+      return 1;
+    }
+
+  /* check for labels which cannot be deleted */
+  if (forced_labels)
+    {
+      return 1;
+    }
+
+  /* check for labels which probably cannot be deleted */
+  if (exception_handler_labels)
+    {
+      return 1;
+    }
+
+  /* check for labels referred to other thn by jumps */
+  for (b = 0; b < n_basic_blocks; b++)
+    for (insn = basic_block_head[b];; insn = NEXT_INSN (insn))
+      {
+	code = GET_CODE (insn);
+	if (GET_RTX_CLASS (code) == 'i')
+	  {
+	    rtx note;
+
+	    for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+	      if (REG_NOTE_KIND (note) == REG_LABEL)
+		{
+		  return 1;
+		}
+	  }
+
+	if (insn == basic_block_end[b])
+	  break;
+      }
+
+  nr_edges = 0;
+
+  /* check for computed branches */
+  for (b = 0; b < n_basic_blocks; b++)
+    {
+      for (insn = basic_block_head[b];; insn = NEXT_INSN (insn))
+	{
+
+	  if (GET_CODE (insn) == JUMP_INSN)
+	    {
+	      rtx pat = PATTERN (insn);
+	      int i;
+
+	      if (GET_CODE (pat) == PARALLEL)
+		{
+		  int len = XVECLEN (pat, 0);
+		  int has_use_labelref = 0;
+
+		  for (i = len - 1; i >= 0; i--)
+		    if (GET_CODE (XVECEXP (pat, 0, i)) == USE
+			&& (GET_CODE (XEXP (XVECEXP (pat, 0, i), 0))
+			    == LABEL_REF))
+		      {
+			nr_edges++;
+			has_use_labelref = 1;
+		      }
+
+		  if (!has_use_labelref)
+		    for (i = len - 1; i >= 0; i--)
+		      if (GET_CODE (XVECEXP (pat, 0, i)) == SET
+			  && SET_DEST (XVECEXP (pat, 0, i)) == pc_rtx
+			  && uses_reg_or_mem (SET_SRC (XVECEXP (pat, 0, i))))
+			{
+			  return 1;
+			}
+		}
+	      /* check for branch table */
+	      else if (GET_CODE (pat) == ADDR_VEC
+		       || GET_CODE (pat) == ADDR_DIFF_VEC)
+		{
+		  int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
+		  int len = XVECLEN (pat, diff_vec_p);
+
+		  nr_edges += len;
+		}
+	      else
+		{
+		  /* check for computed branch */
+		  if (GET_CODE (pat) == SET
+		      && SET_DEST (pat) == pc_rtx
+		      && uses_reg_or_mem (SET_SRC (pat)))
+		    {
+		      return 1;
+		    }
+		}
+	    }
+
+	  if (insn == basic_block_end[b])
+	    break;
+	}
+    }
+
+  /* count for the fallthrough edges */
+  for (b = 0; b < n_basic_blocks; b++)
+    {
+      for (insn = PREV_INSN (basic_block_head[b]);
+	   insn && GET_CODE (insn) == NOTE; insn = PREV_INSN (insn))
+	;
+
+      if (!insn && b != 0)
+	nr_edges++;
+      else if (insn && GET_CODE (insn) != BARRIER)
+	nr_edges++;
+    }
+
+  nr_edges++;
+
+  return 0;
+}
+
+
+/* Returns 1 if x uses a reg or a mem (function was taken from flow.c).
+   x is a target of a jump. Used for the detection of computed
+   branches. For each label seen, updates the edges estimation
+   counter nr_edges.  */
+
+static int
+uses_reg_or_mem (x)
+     rtx x;
+{
+  enum rtx_code code = GET_CODE (x);
+  int i, j;
+  char *fmt;
+
+  if (code == REG)
+    return 1;
+
+  if (code == MEM
+      && !(GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+	   && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0))))
+    return 1;
+
+  if (code == IF_THEN_ELSE)
+    {
+      if (uses_reg_or_mem (XEXP (x, 1))
+	  || uses_reg_or_mem (XEXP (x, 2)))
+	return 1;
+      else
+	return 0;
+    }
+
+  if (code == LABEL_REF)
+    {
+      nr_edges++;
+
+      return 0;
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e'
+	  && uses_reg_or_mem (XEXP (x, i)))
+	return 1;
+
+      if (fmt[i] == 'E')
+	for (j = 0; j < XVECLEN (x, i); j++)
+	  if (uses_reg_or_mem (XVECEXP (x, i, j)))
+	    return 1;
+    }
+
+  return 0;
+}
+
+
+/* Print the control flow graph, for debugging purposes.
+   Callable from the debugger.  */
+
+void
+debug_control_flow ()
+{
+  int i, e, next;
+
+  fprintf (dump, ";;   --------- CONTROL FLOW GRAPH --------- \n\n");
+
+  for (i = 0; i < n_basic_blocks; i++)
+    {
+      fprintf (dump, ";;\tBasic block %d: first insn %d, last %d.\n",
+	       i,
+	       INSN_UID (basic_block_head[i]),
+	       INSN_UID (basic_block_end[i]));
+
+      fprintf (dump, ";;\tPredecessor blocks:");
+      for (e = IN_EDGES (i); e; e = next)
+	{
+	  fprintf (dump, " %d", FROM_BLOCK (e));
+
+	  next = NEXT_IN (e);
+
+	  if (next == IN_EDGES (i))
+	    break;
+	}
+
+      fprintf (dump, "\n;;\tSuccesor blocks:");
+      for (e = OUT_EDGES (i); e; e = next)
+	{
+	  fprintf (dump, " %d", TO_BLOCK (e));
+
+	  next = NEXT_OUT (e);
+
+	  if (next == OUT_EDGES (i))
+	    break;
+	}
+
+      fprintf (dump, " \n\n");
+
+    }
+}
+
+
+/* build the control flow graph. (also set nr_edges accurately) */
+
+static void
+build_control_flow ()
+{
+  int i;
+
+  nr_edges = 0;
+  for (i = 0; i < n_basic_blocks; i++)
+    {
+      rtx insn;
+
+      insn = basic_block_end[i];
+      if (GET_CODE (insn) == JUMP_INSN)
+	{
+	  build_jmp_edges (PATTERN (insn), i);
+	}
+
+      for (insn = PREV_INSN (basic_block_head[i]);
+	   insn && GET_CODE (insn) == NOTE; insn = PREV_INSN (insn))
+	;
+
+      /* build fallthrough edges */
+      if (!insn && i != 0)
+	new_edge (i - 1, i);
+      else if (insn && GET_CODE (insn) != BARRIER)
+	new_edge (i - 1, i);
+    }
+
+  /* increment by 1, since edge 0 is unused.  */
+  nr_edges++;
+
+}
+
+
+/* construct edges in the control flow graph, from 'source' block, to
+   blocks refered to by 'pattern'.  */
+
+static
+void 
+build_jmp_edges (pattern, source)
+     rtx pattern;
+     int source;
+{
+  register RTX_CODE code;
+  register int i;
+  register char *fmt;
+
+  code = GET_CODE (pattern);
+
+  if (code == LABEL_REF)
+    {
+      register rtx label = XEXP (pattern, 0);
+      register int target;
+
+      /* This can happen as a result of a syntax error
+         and a diagnostic has already been printed.  */
+      if (INSN_UID (label) == 0)
+	return;
+
+      target = INSN_BLOCK (label);
+      new_edge (source, target);
+
+      return;
+    }
+
+  /* proper handling of ADDR_DIFF_VEC: do not add a non-existing edge
+     from the block containing the branch-on-table, to itself.  */
+  if (code == ADDR_VEC
+      || code == ADDR_DIFF_VEC)
+    {
+      int diff_vec_p = GET_CODE (pattern) == ADDR_DIFF_VEC;
+      int len = XVECLEN (pattern, diff_vec_p);
+      int k;
+
+      for (k = 0; k < len; k++)
+	{
+	  rtx tem = XVECEXP (pattern, diff_vec_p, k);
+
+	  build_jmp_edges (tem, source);
+	}
+    }
+
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	build_jmp_edges (XEXP (pattern, i), source);
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+	  for (j = 0; j < XVECLEN (pattern, i); j++)
+	    build_jmp_edges (XVECEXP (pattern, i, j), source);
+	}
+    }
+}
+
+
+/* construct an edge in the control flow graph, from 'source' to 'target'.  */
+
+static void
+new_edge (source, target)
+     int source, target;
+{
+  int e, next_edge;
+  int curr_edge, fst_edge;
+
+  /* check for duplicates */
+  fst_edge = curr_edge = OUT_EDGES (source);
+  while (curr_edge)
+    {
+      if (FROM_BLOCK (curr_edge) == source
+	  && TO_BLOCK (curr_edge) == target)
+	{
+	  return;
+	}
+
+      curr_edge = NEXT_OUT (curr_edge);
+
+      if (fst_edge == curr_edge)
+	break;
+    }
+
+  e = ++nr_edges;
+
+  FROM_BLOCK (e) = source;
+  TO_BLOCK (e) = target;
+
+  if (OUT_EDGES (source))
+    {
+      next_edge = NEXT_OUT (OUT_EDGES (source));
+      NEXT_OUT (OUT_EDGES (source)) = e;
+      NEXT_OUT (e) = next_edge;
+    }
+  else
+    {
+      OUT_EDGES (source) = e;
+      NEXT_OUT (e) = e;
+    }
+
+  if (IN_EDGES (target))
+    {
+      next_edge = NEXT_IN (IN_EDGES (target));
+      NEXT_IN (IN_EDGES (target)) = e;
+      NEXT_IN (e) = next_edge;
+    }
+  else
+    {
+      IN_EDGES (target) = e;
+      NEXT_IN (e) = e;
+    }
+}
+
+
+/* BITSET macros for operations on the control flow graph.  */
+
+/* Compute  bitwise union  of two  bitsets.  */
+#define BITSET_UNION(set1, set2, len)                                \
+do { register bitset tp = set1, sp = set2;                           \
+     register int i;                                                 \
+     for (i = 0; i < len; i++)                                       \
+       *(tp++) |= *(sp++); } while (0)
+
+/* Compute  bitwise intersection  of two  bitsets.  */
+#define BITSET_INTER(set1, set2, len)                                \
+do { register bitset tp = set1, sp = set2;                           \
+     register int i;                                                 \
+     for (i = 0; i < len; i++)                                       \
+       *(tp++) &= *(sp++); } while (0)
+
+/* Compute bitwise   difference of  two bitsets.  */
+#define BITSET_DIFFER(set1, set2, len)                               \
+do { register bitset tp = set1, sp = set2;                           \
+     register int i;                                                 \
+     for (i = 0; i < len; i++)                                       \
+       *(tp++) &= ~*(sp++); } while (0)
+
+/* Inverts every bit of bitset 'set' */
+#define BITSET_INVERT(set, len)                                      \
+do { register bitset tmpset = set;                                   \
+     register int i;                                                 \
+     for (i = 0; i < len; i++, tmpset++)                             \
+       *tmpset = ~*tmpset; } while (0)
+
+/* Turn on the index'th bit in bitset set.  */
+#define BITSET_ADD(set, index, len)                                  \
+{                                                                    \
+  if (index >= HOST_BITS_PER_WIDE_INT * len)                         \
+    abort ();                                                        \
+  else                                                               \
+    set[index/HOST_BITS_PER_WIDE_INT] |=			     \
+      1 << (index % HOST_BITS_PER_WIDE_INT);                         \
+}
+
+/* Turn off the index'th bit in set.  */
+#define BITSET_REMOVE(set, index, len)                               \
+{                                                                    \
+  if (index >= HOST_BITS_PER_WIDE_INT * len)                         \
+    abort ();                                                        \
+  else                                                               \
+    set[index/HOST_BITS_PER_WIDE_INT] &=			     \
+      ~(1 << (index%HOST_BITS_PER_WIDE_INT));                        \
+}
+
+
+/* Check if the index'th bit in bitset  set is on.  */
+
+static char
+bitset_member (set, index, len)
+     bitset set;
+     int index, len;
+{
+  if (index >= HOST_BITS_PER_WIDE_INT * len)
+    abort ();
+  return (set[index / HOST_BITS_PER_WIDE_INT] &
+	  1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0;
+}
+
+
+/* Translate a bit-set SET to a list BL of the bit-set members.  */
+
+static void
+extract_bitlst (set, len, bl)
+     bitset set;
+     int len;
+     bitlst *bl;
+{
+  int i, j, offset;
+  unsigned HOST_WIDE_INT word;
+
+  /* bblst table space is reused in each call to extract_bitlst */
+  bitlst_table_last = 0;
+
+  bl->first_member = &bitlst_table[bitlst_table_last];
+  bl->nr_members = 0;
+
+  for (i = 0; i < len; i++)
+    {
+      word = set[i];
+      offset = i * HOST_BITS_PER_WIDE_INT;
+      for (j = 0; word; j++)
+	{
+	  if (word & 1)
+	    {
+	      bitlst_table[bitlst_table_last++] = offset;
+	      (bl->nr_members)++;
+	    }
+	  word >>= 1;
+	  ++offset;
+	}
+    }
+
+}
+
+
+/* functions for the construction of regions */
+
+/* Print the regions, for debugging purposes.  Callable from debugger.  */
+
+void
+debug_regions ()
+{
+  int rgn, bb;
+
+  fprintf (dump, "\n;;   ------------ REGIONS ----------\n\n");
+  for (rgn = 0; rgn < nr_regions; rgn++)
+    {
+      fprintf (dump, ";;\trgn %d nr_blocks %d:\n", rgn,
+	       rgn_table[rgn].rgn_nr_blocks);
+      fprintf (dump, ";;\tbb/block: ");
+
+      for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
+	{
+	  current_blocks = RGN_BLOCKS (rgn);
+
+	  if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb)))
+	    abort ();
+
+	  fprintf (dump, " %d/%d ", bb, BB_TO_BLOCK (bb));
+	}
+
+      fprintf (dump, "\n\n");
+    }
+}
+
+
+/* Build a single block region for each basic block in the function.
+   This allows for using the same code for interblock and basic block
+   scheduling.  */
+
+static void
+find_single_block_region ()
+{
+  int i;
+
+  for (i = 0; i < n_basic_blocks; i++)
+    {
+      rgn_bb_table[i] = i;
+      RGN_NR_BLOCKS (i) = 1;
+      RGN_BLOCKS (i) = i;
+      CONTAINING_RGN (i) = i;
+      BLOCK_TO_BB (i) = 0;
+    }
+  nr_regions = n_basic_blocks;
+}
+
+
+/* Update number of blocks and the estimate for number of insns
+   in the region.  Return 1 if the region is "too large" for interblock
+   scheduling (compile time considerations), otherwise return 0.  */
+
+static int
+too_large (block, num_bbs, num_insns)
+     int block, *num_bbs, *num_insns;
+{
+  (*num_bbs)++;
+  (*num_insns) += (INSN_LUID (basic_block_end[block]) -
+		   INSN_LUID (basic_block_head[block]));
+  if ((*num_bbs > max_rgn_blocks) || (*num_insns > max_rgn_insns))
+    return 1;
+  else
+    return 0;
+}
+
+
+/* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
+   is still an inner loop.  Put in max_hdr[blk] the header of the most inner
+   loop containing blk.  */
+#define UPDATE_LOOP_RELATIONS(blk, hdr)                              \
+{                                                                    \
+  if (max_hdr[blk] == -1)                                            \
+    max_hdr[blk] = hdr;                                              \
+  else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr])                       \
+         inner[hdr] = 0;                                             \
+  else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr])                       \
+         {                                                           \
+            inner[max_hdr[blk]] = 0;                                 \
+            max_hdr[blk] = hdr;                                      \
+         }                                                           \
+}
+
+
+/* Find regions for interblock scheduling: a loop-free procedure, a reducible
+   inner loop, or a basic block not contained in any other region.
+   The procedures control flow graph is traversed twice.
+   First traversal, a DFS, finds the headers of inner loops  in the graph,
+   and verifies that there are no unreacable blocks.
+   Second traversal processes headers of inner loops, checking that the
+   loop is reducible.  The loop blocks that form a region are put into the
+   region's blocks list in topological order.
+
+   The following variables are changed by the function: rgn_nr, rgn_table,
+   rgn_bb_table, block_to_bb and containing_rgn.  */
+
+static void
+find_rgns ()
+{
+  int *max_hdr, *dfs_nr, *stack, *queue, *degree;
+  char *header, *inner, *passed, *in_stack, *in_queue, no_loops = 1;
+  int node, child, loop_head, i, j, fst_edge, head, tail;
+  int count = 0, sp, idx = 0, current_edge = out_edges[0];
+  int num_bbs, num_insns;
+  int too_large_failure;
+  char *reachable;
+
+  /*
+     The following data structures are computed by the first traversal and
+     are used by the second traversal:
+     header[i] - flag set if the block i is the header of a loop.
+     inner[i] - initially set. It is reset if the the block i is the header
+     of a non-inner loop.
+     max_hdr[i] - the header of the inner loop containing block i.
+     (for a block i not in an inner loop it may be -1 or the
+     header of the most inner loop containing the block).
+
+     These data structures are used by the first traversal only:
+     stack - non-recursive DFS implementation which uses a stack of edges.
+     sp - top of the stack of edges
+     dfs_nr[i] - the DFS ordering of block i.
+     in_stack[i] - flag set if the block i is in the DFS stack.
+
+     These data structures are used by the second traversal only:
+     queue - queue containing the blocks of the current region.
+     head and tail - queue boundaries.
+     in_queue[i] - flag set if the block i is in queue */
+
+  /* function's inner arrays allocation and initialization */
+  max_hdr = (int *) alloca (n_basic_blocks * sizeof (int));
+  dfs_nr = (int *) alloca (n_basic_blocks * sizeof (int));
+  bzero ((int *) dfs_nr, n_basic_blocks * sizeof (int));
+  stack = (int *) alloca (nr_edges * sizeof (int));
+  queue = (int *) alloca (n_basic_blocks * sizeof (int));
+
+  inner = (char *) alloca (n_basic_blocks * sizeof (char));
+  header = (char *) alloca (n_basic_blocks * sizeof (char));
+  bzero ((char *) header, n_basic_blocks * sizeof (char));
+  passed = (char *) alloca (nr_edges * sizeof (char));
+  bzero ((char *) passed, nr_edges * sizeof (char));
+  in_stack = (char *) alloca (nr_edges * sizeof (char));
+  bzero ((char *) in_stack, nr_edges * sizeof (char));
+  reachable = (char *) alloca (n_basic_blocks * sizeof (char));
+  bzero ((char *) reachable, n_basic_blocks * sizeof (char));
+
+  in_queue = (char *) alloca (n_basic_blocks * sizeof (char));
+
+  for (i = 0; i < n_basic_blocks; i++)
+    {
+      inner[i] = 1;
+      max_hdr[i] = -1;
+    }
+
+  /* First traversal: DFS, finds inner loops in control flow graph */
+
+  reachable[0] = 1;
+  sp = -1;
+  while (1)
+    {
+      if (current_edge == 0 || passed[current_edge])
+	{
+	  /*  Here, if  current_edge <  0, this is  a leaf  block.
+	     Otherwise current_edge  was already passed.  Note that in
+	     the latter case, not  only current_edge but also  all its
+	     NEXT_OUT edges are also passed.   We have to "climb up on
+	     edges in  the  stack", looking for the  first  (already
+	     passed) edge whose NEXT_OUT was not passed yet.  */
+
+	  while (sp >= 0 && (current_edge == 0 || passed[current_edge]))
+	    {
+	      current_edge = stack[sp--];
+	      node = FROM_BLOCK (current_edge);
+	      child = TO_BLOCK (current_edge);
+	      in_stack[child] = 0;
+	      if (max_hdr[child] >= 0 && in_stack[max_hdr[child]])
+		UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
+	      current_edge = NEXT_OUT (current_edge);
+	    }
+
+	  /* stack empty - the whole graph is traversed.  */
+	  if (sp < 0 && passed[current_edge])
+	    break;
+	  continue;
+	}
+
+      node = FROM_BLOCK (current_edge);
+      dfs_nr[node] = ++count;
+      in_stack[node] = 1;
+      child = TO_BLOCK (current_edge);
+      reachable[child] = 1;
+
+      /* found a loop header */
+      if (in_stack[child])
+	{
+	  no_loops = 0;
+	  header[child] = 1;
+	  max_hdr[child] = child;
+	  UPDATE_LOOP_RELATIONS (node, child);
+	  passed[current_edge] = 1;
+	  current_edge = NEXT_OUT (current_edge);
+	  continue;
+	}
+
+      /* the  child was already visited once, no need to go down from
+         it, everything is traversed there.  */
+      if (dfs_nr[child])
+	{
+	  if (max_hdr[child] >= 0 && in_stack[max_hdr[child]])
+	    UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
+	  passed[current_edge] = 1;
+	  current_edge = NEXT_OUT (current_edge);
+	  continue;
+	}
+
+      /* this is a step down in the dfs traversal */
+      stack[++sp] = current_edge;
+      passed[current_edge] = 1;
+      current_edge = OUT_EDGES (child);
+    }				/* while (1); */
+
+  /* if there are unreachable blocks, or more than one entry to
+     the subroutine, give up on interblock scheduling */
+  for (i = 1; i < n_basic_blocks; i++)
+    {
+      if (reachable[i] == 0)
+	{
+	  find_single_block_region ();
+	  if (sched_verbose >= 3)
+	    fprintf (stderr, "sched: warning: found an unreachable block %d \n", i);
+	  return;
+	}
+    }
+
+  /* Second travsersal: find reducible inner loops, and sort
+     topologically the blocks of each region */
+  degree = dfs_nr;		/* reuse dfs_nr array - it is not needed anymore */
+  bzero ((char *) in_queue, n_basic_blocks * sizeof (char));
+
+  if (no_loops)
+    header[0] = 1;
+
+  /* compute the in-degree of every block in the graph */
+  for (i = 0; i < n_basic_blocks; i++)
+    {
+      fst_edge = IN_EDGES (i);
+      if (fst_edge > 0)
+	{
+	  degree[i] = 1;
+	  current_edge = NEXT_IN (fst_edge);
+	  while (fst_edge != current_edge)
+	    {
+	      ++degree[i];
+	      current_edge = NEXT_IN (current_edge);
+	    }
+	}
+      else
+	degree[i] = 0;
+    }
+
+  /* pass through all graph blocks, looking for headers of inner loops */
+  for (i = 0; i < n_basic_blocks; i++)
+    {
+
+      if (header[i] && inner[i])
+	{
+
+	  /* i is a header of a potentially reducible inner loop, or
+	     block 0 in a subroutine with no loops at all */
+	  head = tail = -1;
+	  too_large_failure = 0;
+	  loop_head = max_hdr[i];
+
+	  /* decrease in_degree of all i's successors, (this is needed
+	     for the topological ordering) */
+	  fst_edge = current_edge = OUT_EDGES (i);
+	  if (fst_edge > 0)
+	    {
+	      do
+		{
+		  --degree[TO_BLOCK (current_edge)];
+		  current_edge = NEXT_OUT (current_edge);
+		}
+	      while (fst_edge != current_edge);
+	    }
+
+	  /* estimate # insns, and count # blocks in the region.  */
+	  num_bbs = 1;
+	  num_insns = INSN_LUID (basic_block_end[i]) - INSN_LUID (basic_block_head[i]);
+
+
+	  /* find all loop latches, if it is a true loop header, or
+	     all leaves if the graph has no loops at all */
+	  if (no_loops)
+	    {
+	      for (j = 0; j < n_basic_blocks; j++)
+		if (out_edges[j] == 0)	/* a leaf */
+		  {
+		    queue[++tail] = j;
+		    in_queue[j] = 1;
+
+		    if (too_large (j, &num_bbs, &num_insns))
+		      {
+			too_large_failure = 1;
+			break;
+		      }
+		  }
+	    }
+	  else
+	    {
+	      fst_edge = current_edge = IN_EDGES (i);
+	      do
+		{
+		  node = FROM_BLOCK (current_edge);
+		  if (max_hdr[node] == loop_head && node != i)	/* a latch */
+		    {
+		      queue[++tail] = node;
+		      in_queue[node] = 1;
+
+		      if (too_large (node, &num_bbs, &num_insns))
+			{
+			  too_large_failure = 1;
+			  break;
+			}
+		    }
+		  current_edge = NEXT_IN (current_edge);
+		}
+	      while (fst_edge != current_edge);
+	    }
+
+	  /* Put in queue[] all blocks that belong to the loop.  Check
+	     that the loop is reducible, traversing back from the loop
+	     latches up to the loop header.  */
+	  while (head < tail && !too_large_failure)
+	    {
+	      child = queue[++head];
+	      fst_edge = current_edge = IN_EDGES (child);
+	      do
+		{
+		  node = FROM_BLOCK (current_edge);
+
+		  if (max_hdr[node] != loop_head)
+		    {		/* another entry to loop, it is irreducible */
+		      tail = -1;
+		      break;
+		    }
+		  else if (!in_queue[node] && node != i)
+		    {
+		      queue[++tail] = node;
+		      in_queue[node] = 1;
+
+		      if (too_large (node, &num_bbs, &num_insns))
+			{
+			  too_large_failure = 1;
+			  break;
+			}
+		    }
+		  current_edge = NEXT_IN (current_edge);
+		}
+	      while (fst_edge != current_edge);
+	    }
+
+	  if (tail >= 0 && !too_large_failure)
+	    {
+	      /* Place the loop header into list of region blocks */
+	      degree[i] = -1;
+	      rgn_bb_table[idx] = i;
+	      RGN_NR_BLOCKS (nr_regions) = num_bbs;
+	      RGN_BLOCKS (nr_regions) = idx++;
+	      CONTAINING_RGN (i) = nr_regions;
+	      BLOCK_TO_BB (i) = count = 0;
+
+	      /* remove blocks from queue[], (in topological order), when
+	         their  in_degree becomes 0.  We  scan  the queue over and
+	         over  again until   it is empty.   Note: there may be a more
+	         efficient way to do it.  */
+	      while (tail >= 0)
+		{
+		  if (head < 0)
+		    head = tail;
+		  child = queue[head];
+		  if (degree[child] == 0)
+		    {
+		      degree[child] = -1;
+		      rgn_bb_table[idx++] = child;
+		      BLOCK_TO_BB (child) = ++count;
+		      CONTAINING_RGN (child) = nr_regions;
+		      queue[head] = queue[tail--];
+		      fst_edge = current_edge = OUT_EDGES (child);
+
+		      if (fst_edge > 0)
+			{
+			  do
+			    {
+			      --degree[TO_BLOCK (current_edge)];
+			      current_edge = NEXT_OUT (current_edge);
+			    }
+			  while (fst_edge != current_edge);
+			}
+		    }
+		  else
+		    --head;
+		}
+	      ++nr_regions;
+	    }
+	}
+    }
+
+  /* define each of all other blocks as a region itself */
+  for (i = 0; i < n_basic_blocks; i++)
+    if (degree[i] >= 0)
+      {
+	rgn_bb_table[idx] = i;
+	RGN_NR_BLOCKS (nr_regions) = 1;
+	RGN_BLOCKS (nr_regions) = idx++;
+	CONTAINING_RGN (i) = nr_regions++;
+	BLOCK_TO_BB (i) = 0;
+      }
+
+}				/* find_rgns */
+
+
+/* functions for regions scheduling information */
+
+/* Compute dominators, probability, and potential-split-edges of bb.
+   Assume that these values were already computed for bb's predecessors.  */
+
+static void
+compute_dom_prob_ps (bb)
+     int bb;
+{
+  int nxt_in_edge, fst_in_edge, pred;
+  int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges;
+
+  prob[bb] = 0.0;
+  if (IS_RGN_ENTRY (bb))
+    {
+      BITSET_ADD (dom[bb], 0, bbset_size);
+      prob[bb] = 1.0;
+      return;
+    }
+
+  fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb));
+
+  /* intialize dom[bb] to '111..1' */
+  BITSET_INVERT (dom[bb], bbset_size);
+
+  do
+    {
+      pred = FROM_BLOCK (nxt_in_edge);
+      BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size);
+
+      BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)],
+		    edgeset_size);
+
+      BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size);
+
+      nr_out_edges = 1;
+      nr_rgn_out_edges = 0;
+      fst_out_edge = OUT_EDGES (pred);
+      nxt_out_edge = NEXT_OUT (fst_out_edge);
+      BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)],
+		    edgeset_size);
+
+      BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size);
+
+      /* the successor doesn't belong the region? */
+      if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) !=
+	  CONTAINING_RGN (BB_TO_BLOCK (bb)))
+	++nr_rgn_out_edges;
+
+      while (fst_out_edge != nxt_out_edge)
+	{
+	  ++nr_out_edges;
+	  /* the successor doesn't belong the region? */
+	  if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) !=
+	      CONTAINING_RGN (BB_TO_BLOCK (bb)))
+	    ++nr_rgn_out_edges;
+	  BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size);
+	  nxt_out_edge = NEXT_OUT (nxt_out_edge);
+
+	}
+
+      /* now nr_rgn_out_edges is the number of region-exit edges from pred,
+         and nr_out_edges will be the number of pred out edges not leaving
+         the region.  */
+      nr_out_edges -= nr_rgn_out_edges;
+      if (nr_rgn_out_edges > 0)
+	prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges;
+      else
+	prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges;
+      nxt_in_edge = NEXT_IN (nxt_in_edge);
+    }
+  while (fst_in_edge != nxt_in_edge);
+
+  BITSET_ADD (dom[bb], bb, bbset_size);
+  BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size);
+
+  if (sched_verbose >= 2)
+    fprintf (dump, ";;  bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb), (int) (100.0 * prob[bb]));
+}				/* compute_dom_prob_ps */
+
+/* functions for target info */
+
+/* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
+   Note that bb_trg dominates bb_src.  */
+
+static void
+split_edges (bb_src, bb_trg, bl)
+     int bb_src;
+     int bb_trg;
+     edgelst *bl;
+{
+  int es = edgeset_size;
+  edgeset src = (edgeset) alloca (es * sizeof (HOST_WIDE_INT));
+
+  while (es--)
+    src[es] = (pot_split[bb_src])[es];
+  BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size);
+  extract_bitlst (src, edgeset_size, bl);
+}
+
+
+/* Find the valid candidate-source-blocks for the target block TRG, compute
+   their probability, and check if they are speculative or not.
+   For speculative sources, compute their update-blocks and split-blocks.  */
+
+static void
+compute_trg_info (trg)
+     int trg;
+{
+  register candidate *sp;
+  edgelst el;
+  int check_block, update_idx;
+  int i, j, k, fst_edge, nxt_edge;
+
+  /* define some of the fields for the target bb as well */
+  sp = candidate_table + trg;
+  sp->is_valid = 1;
+  sp->is_speculative = 0;
+  sp->src_prob = 100;
+
+  for (i = trg + 1; i < current_nr_blocks; i++)
+    {
+      sp = candidate_table + i;
+
+      sp->is_valid = IS_DOMINATED (i, trg);
+      if (sp->is_valid)
+	{
+	  sp->src_prob = GET_SRC_PROB (i, trg);
+	  sp->is_valid = (sp->src_prob >= MIN_PROBABILITY);
+	}
+
+      if (sp->is_valid)
+	{
+	  split_edges (i, trg, &el);
+	  sp->is_speculative = (el.nr_members) ? 1 : 0;
+	  if (sp->is_speculative && !flag_schedule_speculative)
+	    sp->is_valid = 0;
+	}
+
+      if (sp->is_valid)
+	{
+	  sp->split_bbs.first_member = &bblst_table[bblst_last];
+	  sp->split_bbs.nr_members = el.nr_members;
+	  for (j = 0; j < el.nr_members; bblst_last++, j++)
+	    bblst_table[bblst_last] =
+	      TO_BLOCK (rgn_edges[el.first_member[j]]);
+	  sp->update_bbs.first_member = &bblst_table[bblst_last];
+	  update_idx = 0;
+	  for (j = 0; j < el.nr_members; j++)
+	    {
+	      check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]);
+	      fst_edge = nxt_edge = OUT_EDGES (check_block);
+	      do
+		{
+		  for (k = 0; k < el.nr_members; k++)
+		    if (EDGE_TO_BIT (nxt_edge) == el.first_member[k])
+		      break;
+
+		  if (k >= el.nr_members)
+		    {
+		      bblst_table[bblst_last++] = TO_BLOCK (nxt_edge);
+		      update_idx++;
+		    }
+
+		  nxt_edge = NEXT_OUT (nxt_edge);
+		}
+	      while (fst_edge != nxt_edge);
+	    }
+	  sp->update_bbs.nr_members = update_idx;
+
+	}
+      else
+	{
+	  sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
+
+	  sp->is_speculative = 0;
+	  sp->src_prob = 0;
+	}
+    }
+}				/* compute_trg_info */
+
+
+/* Print candidates info, for debugging purposes.  Callable from debugger.  */
+
+void
+debug_candidate (i)
+     int i;
+{
+  if (!candidate_table[i].is_valid)
+    return;
+
+  if (candidate_table[i].is_speculative)
+    {
+      int j;
+      fprintf (dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
+
+      fprintf (dump, "split path: ");
+      for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
+	{
+	  int b = candidate_table[i].split_bbs.first_member[j];
+
+	  fprintf (dump, " %d ", b);
+	}
+      fprintf (dump, "\n");
+
+      fprintf (dump, "update path: ");
+      for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
+	{
+	  int b = candidate_table[i].update_bbs.first_member[j];
+
+	  fprintf (dump, " %d ", b);
+	}
+      fprintf (dump, "\n");
+    }
+  else
+    {
+      fprintf (dump, " src %d equivalent\n", BB_TO_BLOCK (i));
+    }
+}
+
+
+/* Print candidates info, for debugging purposes.  Callable from debugger.  */
+
+void
+debug_candidates (trg)
+     int trg;
+{
+  int i;
+
+  fprintf (dump, "----------- candidate table: target: b=%d bb=%d ---\n",
+	   BB_TO_BLOCK (trg), trg);
+  for (i = trg + 1; i < current_nr_blocks; i++)
+    debug_candidate (i);
+}
+
+
+/* functions for speculative scheduing */
+
+/* Return 0 if x is a set of a register alive in the beginning of one
+   of the split-blocks of src, otherwise return 1.  */
+
+static int
+check_live_1 (src, x)
+     int src;
+     rtx x;
+{
+  register i;
+  register int regno;
+  register rtx reg = SET_DEST (x);
+
+  if (reg == 0)
+    return 1;
+
+  while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
+	 || GET_CODE (reg) == SIGN_EXTRACT
+	 || GET_CODE (reg) == STRICT_LOW_PART)
+    reg = XEXP (reg, 0);
+
+  if (GET_CODE (reg) != REG)
+    return 1;
+
+  regno = REGNO (reg);
+
+  if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
+    {
+      /* Global registers are assumed live */
+      return 0;
+    }
+  else
+    {
+      if (regno < FIRST_PSEUDO_REGISTER)
+	{
+	  /* check for hard registers */
+	  int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
+	  while (--j >= 0)
+	    {
+	      for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
+		{
+		  int b = candidate_table[src].split_bbs.first_member[i];
+
+		  if (REGNO_REG_SET_P (basic_block_live_at_start[b], regno + j))
+		    {
+		      return 0;
+		    }
+		}
+	    }
+	}
+      else
+	{
+	  /* check for psuedo registers */
+	  for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
+	    {
+	      int b = candidate_table[src].split_bbs.first_member[i];
+
+	      if (REGNO_REG_SET_P (basic_block_live_at_start[b], regno))
+		{
+		  return 0;
+		}
+	    }
+	}
+    }
+
+  return 1;
+}
+
+
+/* If x is a set of a register R, mark that R is alive in the beginning
+   of every update-block of src.  */
+
+static void
+update_live_1 (src, x)
+     int src;
+     rtx x;
+{
+  register i;
+  register int regno;
+  register rtx reg = SET_DEST (x);
+
+  if (reg == 0)
+    return;
+
+  while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT
+	 || GET_CODE (reg) == SIGN_EXTRACT
+	 || GET_CODE (reg) == STRICT_LOW_PART)
+    reg = XEXP (reg, 0);
+
+  if (GET_CODE (reg) != REG)
+    return;
+
+  /* Global registers are always live, so the code below does not apply
+     to them.  */
+
+  regno = REGNO (reg);
+
+  if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
+    {
+      if (regno < FIRST_PSEUDO_REGISTER)
+	{
+	  int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
+	  while (--j >= 0)
+	    {
+	      for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
+		{
+		  int b = candidate_table[src].update_bbs.first_member[i];
+
+		  SET_REGNO_REG_SET (basic_block_live_at_start[b], regno + j);
+		}
+	    }
+	}
+      else
+	{
+	  for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
+	    {
+	      int b = candidate_table[src].update_bbs.first_member[i];
+
+	      SET_REGNO_REG_SET (basic_block_live_at_start[b], regno);
+	    }
+	}
+    }
+}
+
+
+/* Return 1 if insn can be speculatively moved from block src to trg,
+   otherwise return 0.  Called before first insertion of insn to
+   ready-list or before the scheduling.  */
+
+static int
+check_live (insn, src, trg)
+     rtx insn;
+     int src;
+     int trg;
+{
+  /* find the registers set by instruction */
+  if (GET_CODE (PATTERN (insn)) == SET
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    return check_live_1 (src, PATTERN (insn));
+  else if (GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      int j;
+      for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
+	if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
+	     || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
+	    && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
+	  return 0;
+
+      return 1;
+    }
+
+  return 1;
+}
+
+
+/* Update the live registers info after insn was moved speculatively from
+   block src to trg.  */
+
+static void
+update_live (insn, src, trg)
+     rtx insn;
+     int src, trg;
+{
+  /* find the registers set by instruction */
+  if (GET_CODE (PATTERN (insn)) == SET
+      || GET_CODE (PATTERN (insn)) == CLOBBER)
+    update_live_1 (src, PATTERN (insn));
+  else if (GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      int j;
+      for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
+	if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
+	    || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
+	  update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
+    }
+}
+
+/* Exception Free Loads:
+
+   We define five classes of speculative loads: IFREE, IRISKY,
+   PFREE, PRISKY, and MFREE.
+
+   IFREE loads are loads that are proved to be exception-free, just
+   by examining the load insn.  Examples for such loads are loads
+   from TOC and loads of global data.
+
+   IRISKY loads are loads that are proved to be exception-risky,
+   just by examining the load insn.  Examples for such loads are
+   volatile loads and loads from shared memory.
+
+   PFREE loads are loads for which we can prove, by examining other
+   insns, that they are exception-free.  Currently, this class consists
+   of loads for which we are able to find a "similar load", either in
+   the target block, or, if only one split-block exists, in that split
+   block.  Load2 is similar to load1 if both have same single base
+   register.  We identify only part of the similar loads, by finding
+   an insn upon which both load1 and load2 have a DEF-USE dependence.
+
+   PRISKY loads are loads for which we can prove, by examining other
+   insns, that they are exception-risky.  Currently we have two proofs for
+   such loads.  The first proof detects loads that are probably guarded by a
+   test on the memory address.  This proof is based on the
+   backward and forward data dependence information for the region.
+   Let load-insn be the examined load.
+   Load-insn is PRISKY iff ALL the following hold:
+
+   - insn1 is not in the same block as load-insn
+   - there is a DEF-USE dependence chain (insn1, ..., load-insn)
+   - test-insn is either a compare or a branch, not in the same block as load-insn
+   - load-insn is reachable from test-insn
+   - there is a DEF-USE dependence chain (insn1, ..., test-insn)
+
+   This proof might fail when the compare and the load are fed
+   by an insn not in the region.  To solve this, we will add to this
+   group all loads that have no input DEF-USE dependence.
+
+   The second proof detects loads that are directly or indirectly
+   fed by a speculative load.  This proof is affected by the
+   scheduling process.  We will use the flag  fed_by_spec_load.
+   Initially, all insns have this flag reset.  After a speculative
+   motion of an insn, if insn is either a load, or marked as
+   fed_by_spec_load, we will also mark as fed_by_spec_load every
+   insn1 for which a DEF-USE dependence (insn, insn1) exists.  A
+   load which is fed_by_spec_load is also PRISKY.
+
+   MFREE (maybe-free) loads are all the remaining loads. They may be
+   exception-free, but we cannot prove it.
+
+   Now, all loads in IFREE and PFREE classes are considered
+   exception-free, while all loads in IRISKY and PRISKY classes are
+   considered exception-risky.  As for loads in the MFREE class,
+   these are considered either exception-free or exception-risky,
+   depending on whether we are pessimistic or optimistic.  We have
+   to take the pessimistic approach to assure the safety of
+   speculative scheduling, but we can take the optimistic approach
+   by invoking the -fsched_spec_load_dangerous option.  */
+
+enum INSN_TRAP_CLASS
+{
+  TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2,
+  PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5
+};
+
+#define WORST_CLASS(class1, class2) \
+((class1 > class2) ? class1 : class2)
+
+/* Indexed by INSN_UID, and set if there's DEF-USE dependence between */
+/* some speculatively moved load insn and this one.  */
+char *fed_by_spec_load;
+char *is_load_insn;
+
+/* Non-zero if block bb_to is equal to, or reachable from block bb_from.  */
+#define IS_REACHABLE(bb_from, bb_to)					\
+(bb_from == bb_to                                                       \
+   || IS_RGN_ENTRY (bb_from)						\
+   || (bitset_member (ancestor_edges[bb_to],				\
+		      EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))),	\
+		      edgeset_size)))
+#define FED_BY_SPEC_LOAD(insn) (fed_by_spec_load[INSN_UID (insn)])
+#define IS_LOAD_INSN(insn) (is_load_insn[INSN_UID (insn)])
+
+/* Non-zero iff the address is comprised from at most 1 register */
+#define CONST_BASED_ADDRESS_P(x)			\
+  (GET_CODE (x) == REG					\
+   || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS   \
+	|| (GET_CODE (x) == LO_SUM))	                \
+       && (GET_CODE (XEXP (x, 0)) == CONST_INT		\
+	   || GET_CODE (XEXP (x, 1)) == CONST_INT)))
+
+/* Turns on the fed_by_spec_load flag for insns fed by load_insn.  */
+
+static void
+set_spec_fed (load_insn)
+     rtx load_insn;
+{
+  rtx link;
+
+  for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1))
+    if (GET_MODE (link) == VOIDmode)
+      FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1;
+}				/* set_spec_fed */
+
+/* On the path from the insn to load_insn_bb, find a conditional branch */
+/* depending on insn, that guards the speculative load.  */
+
+static int
+find_conditional_protection (insn, load_insn_bb)
+     rtx insn;
+     int load_insn_bb;
+{
+  rtx link;
+
+  /* iterate through DEF-USE forward dependences */
+  for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
+    {
+      rtx next = XEXP (link, 0);
+      if ((CONTAINING_RGN (INSN_BLOCK (next)) ==
+	   CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
+	  && IS_REACHABLE (INSN_BB (next), load_insn_bb)
+	  && load_insn_bb != INSN_BB (next)
+	  && GET_MODE (link) == VOIDmode
+	  && (GET_CODE (next) == JUMP_INSN
+	      || find_conditional_protection (next, load_insn_bb)))
+	return 1;
+    }
+  return 0;
+}				/* find_conditional_protection */
+
+/* Returns 1 if the same insn1 that participates in the computation
+   of load_insn's address is feeding a conditional branch that is
+   guarding on load_insn. This is true if we find a the two DEF-USE
+   chains:
+   insn1 -> ... -> conditional-branch
+   insn1 -> ... -> load_insn,
+   and if a flow path exist:
+   insn1 -> ... -> conditional-branch -> ... -> load_insn,
+   and if insn1 is on the path
+   region-entry -> ... -> bb_trg -> ... load_insn.
+
+   Locate insn1 by climbing on LOG_LINKS from load_insn.
+   Locate the branch by following INSN_DEPEND from insn1.  */
+
+static int
+is_conditionally_protected (load_insn, bb_src, bb_trg)
+     rtx load_insn;
+     int bb_src, bb_trg;
+{
+  rtx link;
+
+  for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1))
+    {
+      rtx insn1 = XEXP (link, 0);
+
+      /* must be a DEF-USE dependence upon non-branch */
+      if (GET_MODE (link) != VOIDmode
+	  || GET_CODE (insn1) == JUMP_INSN)
+	continue;
+
+      /* must exist a path: region-entry -> ... -> bb_trg -> ... load_insn */
+      if (INSN_BB (insn1) == bb_src
+	  || (CONTAINING_RGN (INSN_BLOCK (insn1))
+	      != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
+	  || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
+	      && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
+	continue;
+
+      /* now search for the conditional-branch */
+      if (find_conditional_protection (insn1, bb_src))
+	return 1;
+
+      /* recursive step: search another insn1, "above" current insn1.  */
+      return is_conditionally_protected (insn1, bb_src, bb_trg);
+    }
+
+  /* the chain does not exsist */
+  return 0;
+}				/* is_conditionally_protected */
+
+/* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
+   load_insn can move speculatively from bb_src to bb_trg.  All the
+   following must hold:
+
+   (1) both loads have 1 base register (PFREE_CANDIDATEs).
+   (2) load_insn and load1 have a def-use dependence upon
+   the same insn 'insn1'.
+   (3) either load2 is in bb_trg, or:
+   - there's only one split-block, and
+   - load1 is on the escape path, and
+
+   From all these we can conclude that the two loads access memory
+   addresses that differ at most by a constant, and hence if moving
+   load_insn would cause an exception, it would have been caused by
+   load2 anyhow.  */
+
+static int
+is_pfree (load_insn, bb_src, bb_trg)
+     rtx load_insn;
+     int bb_src, bb_trg;
+{
+  rtx back_link;
+  register candidate *candp = candidate_table + bb_src;
+
+  if (candp->split_bbs.nr_members != 1)
+    /* must have exactly one escape block */
+    return 0;
+
+  for (back_link = LOG_LINKS (load_insn);
+       back_link; back_link = XEXP (back_link, 1))
+    {
+      rtx insn1 = XEXP (back_link, 0);
+
+      if (GET_MODE (back_link) == VOIDmode)
+	{
+	  /* found a DEF-USE dependence (insn1, load_insn) */
+	  rtx fore_link;
+
+	  for (fore_link = INSN_DEPEND (insn1);
+	       fore_link; fore_link = XEXP (fore_link, 1))
+	    {
+	      rtx insn2 = XEXP (fore_link, 0);
+	      if (GET_MODE (fore_link) == VOIDmode)
+		{
+		  /* found a DEF-USE dependence (insn1, insn2) */
+		  if (classify_insn (insn2) != PFREE_CANDIDATE)
+		    /* insn2 not guaranteed to be a 1 base reg load */
+		    continue;
+
+		  if (INSN_BB (insn2) == bb_trg)
+		    /* insn2 is the similar load, in the target block */
+		    return 1;
+
+		  if (*(candp->split_bbs.first_member) == INSN_BLOCK (insn2))
+		    /* insn2 is a similar load, in a split-block */
+		    return 1;
+		}
+	    }
+	}
+    }
+
+  /* couldn't find a similar load */
+  return 0;
+}				/* is_pfree */
+
+/* Returns a class that insn with GET_DEST(insn)=x may belong to,
+   as found by analyzing insn's expression.  */
+
+static int
+may_trap_exp (x, is_store)
+     rtx x;
+     int is_store;
+{
+  enum rtx_code code;
+
+  if (x == 0)
+    return TRAP_FREE;
+  code = GET_CODE (x);
+  if (is_store)
+    {
+      if (code == MEM)
+	return TRAP_RISKY;
+      else
+	return TRAP_FREE;
+    }
+  if (code == MEM)
+    {
+      /* The insn uses memory */
+      /* a volatile load */
+      if (MEM_VOLATILE_P (x))
+	return IRISKY;
+      /* an exception-free load */
+      if (!may_trap_p (x))
+	return IFREE;
+      /* a load with 1 base register, to be further checked */
+      if (CONST_BASED_ADDRESS_P (XEXP (x, 0)))
+	return PFREE_CANDIDATE;
+      /* no info on the load, to be further checked */
+      return PRISKY_CANDIDATE;
+    }
+  else
+    {
+      char *fmt;
+      int i, insn_class = TRAP_FREE;
+
+      /* neither store nor load, check if it may cause a trap */
+      if (may_trap_p (x))
+	return TRAP_RISKY;
+      /* recursive step: walk the insn...  */
+      fmt = GET_RTX_FORMAT (code);
+      for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+	{
+	  if (fmt[i] == 'e')
+	    {
+	      int tmp_class = may_trap_exp (XEXP (x, i), is_store);
+	      insn_class = WORST_CLASS (insn_class, tmp_class);
+	    }
+	  else if (fmt[i] == 'E')
+	    {
+	      int j;
+	      for (j = 0; j < XVECLEN (x, i); j++)
+		{
+		  int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store);
+		  insn_class = WORST_CLASS (insn_class, tmp_class);
+		  if (insn_class == TRAP_RISKY || insn_class == IRISKY)
+		    break;
+		}
+	    }
+	  if (insn_class == TRAP_RISKY || insn_class == IRISKY)
+	    break;
+	}
+      return insn_class;
+    }
+}				/* may_trap_exp */
+
+
+/* Classifies insn for the purpose of verifying that it can be
+   moved speculatively, by examining it's patterns, returning:
+   TRAP_RISKY: store, or risky non-load insn (e.g. division by variable).
+   TRAP_FREE: non-load insn.
+   IFREE: load from a globaly safe location.
+   IRISKY: volatile load.
+   PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for
+   being either PFREE or PRISKY.  */
+
+static int
+classify_insn (insn)
+     rtx insn;
+{
+  rtx pat = PATTERN (insn);
+  int tmp_class = TRAP_FREE;
+  int insn_class = TRAP_FREE;
+  enum rtx_code code;
+
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      int i, len = XVECLEN (pat, 0);
+
+      for (i = len - 1; i >= 0; i--)
+	{
+	  code = GET_CODE (XVECEXP (pat, 0, i));
+	  switch (code)
+	    {
+	    case CLOBBER:
+	      /* test if it is a 'store' */
+	      tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1);
+	      break;
+	    case SET:
+	      /* test if it is a store */
+	      tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1);
+	      if (tmp_class == TRAP_RISKY)
+		break;
+	      /* test if it is a load  */
+	      tmp_class =
+		WORST_CLASS (tmp_class,
+			   may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0));
+	    default:;
+	    }
+	  insn_class = WORST_CLASS (insn_class, tmp_class);
+	  if (insn_class == TRAP_RISKY || insn_class == IRISKY)
+	    break;
+	}
+    }
+  else
+    {
+      code = GET_CODE (pat);
+      switch (code)
+	{
+	case CLOBBER:
+	  /* test if it is a 'store' */
+	  tmp_class = may_trap_exp (XEXP (pat, 0), 1);
+	  break;
+	case SET:
+	  /* test if it is a store */
+	  tmp_class = may_trap_exp (SET_DEST (pat), 1);
+	  if (tmp_class == TRAP_RISKY)
+	    break;
+	  /* test if it is a load  */
+	  tmp_class =
+	    WORST_CLASS (tmp_class,
+			 may_trap_exp (SET_SRC (pat), 0));
+	default:;
+	}
+      insn_class = tmp_class;
+    }
+
+  return insn_class;
+
+}				/* classify_insn */
+
+/* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
+   a load moved speculatively, or if load_insn is protected by
+   a compare on load_insn's address).  */
+
+static int
+is_prisky (load_insn, bb_src, bb_trg)
+     rtx load_insn;
+     int bb_src, bb_trg;
+{
+  if (FED_BY_SPEC_LOAD (load_insn))
+    return 1;
+
+  if (LOG_LINKS (load_insn) == NULL)
+    /* dependence may 'hide' out of the region.  */
+    return 1;
+
+  if (is_conditionally_protected (load_insn, bb_src, bb_trg))
+    return 1;
+
+  return 0;
+}				/* is_prisky */
+
+/* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
+   Return 1 if insn is exception-free (and the motion is valid)
+   and 0 otherwise.  */
+
+static int
+is_exception_free (insn, bb_src, bb_trg)
+     rtx insn;
+     int bb_src, bb_trg;
+{
+  int insn_class = classify_insn (insn);
+
+  /* handle non-load insns */
+  switch (insn_class)
+    {
+    case TRAP_FREE:
+      return 1;
+    case TRAP_RISKY:
+      return 0;
+    default:;
+    }
+
+  /* handle loads */
+  if (!flag_schedule_speculative_load)
+    return 0;
+  IS_LOAD_INSN (insn) = 1;
+  switch (insn_class)
+    {
+    case IFREE:
+      return (1);
+    case IRISKY:
+      return 0;
+    case PFREE_CANDIDATE:
+      if (is_pfree (insn, bb_src, bb_trg))
+	return 1;
+      /* don't 'break' here: PFREE-candidate is also PRISKY-candidate */
+    case PRISKY_CANDIDATE:
+      if (!flag_schedule_speculative_load_dangerous
+	  || is_prisky (insn, bb_src, bb_trg))
+	return 0;
+      break;
+    default:;
+    }
+
+  return flag_schedule_speculative_load_dangerous;
+}				/* is_exception_free */
+
+
+/* Process an insn's memory dependencies.  There are four kinds of
+   dependencies:
+
+   (0) read dependence: read follows read
+   (1) true dependence: read follows write
+   (2) anti dependence: write follows read
+   (3) output dependence: write follows write
+
+   We are careful to build only dependencies which actually exist, and
+   use transitivity to avoid building too many links.  */
+
+/* Return the INSN_LIST containing INSN in LIST, or NULL
+   if LIST does not contain INSN.  */
+
+__inline static rtx
+find_insn_list (insn, list)
+     rtx insn;
+     rtx list;
+{
+  while (list)
+    {
+      if (XEXP (list, 0) == insn)
+	return list;
+      list = XEXP (list, 1);
+    }
+  return 0;
+}
+
+
+/* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0 otherwise.  */
+
+__inline static char
+find_insn_mem_list (insn, x, list, list1)
+     rtx insn, x;
+     rtx list, list1;
+{
+  while (list)
+    {
+      if (XEXP (list, 0) == insn
+	  && XEXP (list1, 0) == x)
+	return 1;
+      list = XEXP (list, 1);
+      list1 = XEXP (list1, 1);
+    }
+  return 0;
+}
+
+
+/* Compute the function units used by INSN.  This caches the value
+   returned by function_units_used.  A function unit is encoded as the
+   unit number if the value is non-negative and the compliment of a
+   mask if the value is negative.  A function unit index is the
+   non-negative encoding.  */
+
+__inline static int
+insn_unit (insn)
+     rtx insn;
+{
+  register int unit = INSN_UNIT (insn);
+
+  if (unit == 0)
+    {
+      recog_memoized (insn);
+
+      /* A USE insn, or something else we don't need to understand.
+         We can't pass these directly to function_units_used because it will
+         trigger a fatal error for unrecognizable insns.  */
+      if (INSN_CODE (insn) < 0)
+	unit = -1;
+      else
+	{
+	  unit = function_units_used (insn);
+	  /* Increment non-negative values so we can cache zero.  */
+	  if (unit >= 0)
+	    unit++;
+	}
+      /* We only cache 16 bits of the result, so if the value is out of
+         range, don't cache it.  */
+      if (FUNCTION_UNITS_SIZE < HOST_BITS_PER_SHORT
+	  || unit >= 0
+	  || (~unit & ((1 << (HOST_BITS_PER_SHORT - 1)) - 1)) == 0)
+	INSN_UNIT (insn) = unit;
+    }
+  return (unit > 0 ? unit - 1 : unit);
+}
+
+/* Compute the blockage range for executing INSN on UNIT.  This caches
+   the value returned by the blockage_range_function for the unit.
+   These values are encoded in an int where the upper half gives the
+   minimum value and the lower half gives the maximum value.  */
+
+__inline static unsigned int
+blockage_range (unit, insn)
+     int unit;
+     rtx insn;
+{
+  unsigned int blockage = INSN_BLOCKAGE (insn);
+  unsigned int range;
+
+  if (UNIT_BLOCKED (blockage) != unit + 1)
+    {
+      range = function_units[unit].blockage_range_function (insn);
+      /* We only cache the blockage range for one unit and then only if
+         the values fit.  */
+      if (HOST_BITS_PER_INT >= UNIT_BITS + 2 * BLOCKAGE_BITS)
+	INSN_BLOCKAGE (insn) = ENCODE_BLOCKAGE (unit + 1, range);
+    }
+  else
+    range = BLOCKAGE_RANGE (blockage);
+
+  return range;
+}
+
+/* A vector indexed by function unit instance giving the last insn to use
+   the unit.  The value of the function unit instance index for unit U
+   instance I is (U + I * FUNCTION_UNITS_SIZE).  */
+static rtx unit_last_insn[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
+
+/* A vector indexed by function unit instance giving the minimum time when
+   the unit will unblock based on the maximum blockage cost.  */
+static int unit_tick[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY];
+
+/* A vector indexed by function unit number giving the number of insns
+   that remain to use the unit.  */
+static int unit_n_insns[FUNCTION_UNITS_SIZE];
+
+/* Reset the function unit state to the null state.  */
+
+static void
+clear_units ()
+{
+  bzero ((char *) unit_last_insn, sizeof (unit_last_insn));
+  bzero ((char *) unit_tick, sizeof (unit_tick));
+  bzero ((char *) unit_n_insns, sizeof (unit_n_insns));
+}
+
+/* Return the issue-delay of an insn */
+
+__inline static int
+insn_issue_delay (insn)
+     rtx insn;
+{
+  rtx link;
+  int i, delay = 0;
+  int unit = insn_unit (insn);
+
+  /* efficiency note: in fact, we are working 'hard' to compute a
+     value that was available in md file, and is not available in
+     function_units[] structure.  It would be nice to have this
+     value there, too.  */
+  if (unit >= 0)
+    {
+      if (function_units[unit].blockage_range_function &&
+	  function_units[unit].blockage_function)
+	delay = function_units[unit].blockage_function (insn, insn);
+    }
+  else
+    for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
+      if ((unit & 1) != 0 && function_units[i].blockage_range_function
+	  && function_units[i].blockage_function)
+	delay = MAX (delay, function_units[i].blockage_function (insn, insn));
+
+  return delay;
+}
+
+/* Return the actual hazard cost of executing INSN on the unit UNIT,
+   instance INSTANCE at time CLOCK if the previous actual hazard cost
+   was COST.  */
+
+__inline static int
+actual_hazard_this_instance (unit, instance, insn, clock, cost)
+     int unit, instance, clock, cost;
+     rtx insn;
+{
+  int tick = unit_tick[instance];	/* issue time of the last issued insn */
+
+  if (tick - clock > cost)
+    {
+      /* The scheduler is operating forward, so unit's last insn is the
+         executing insn and INSN is the candidate insn.  We want a
+         more exact measure of the blockage if we execute INSN at CLOCK
+         given when we committed the execution of the unit's last insn.
+
+         The blockage value is given by either the unit's max blockage
+         constant, blockage range function, or blockage function.  Use
+         the most exact form for the given unit.  */
+
+      if (function_units[unit].blockage_range_function)
+	{
+	  if (function_units[unit].blockage_function)
+	    tick += (function_units[unit].blockage_function
+		     (unit_last_insn[instance], insn)
+		     - function_units[unit].max_blockage);
+	  else
+	    tick += ((int) MAX_BLOCKAGE_COST (blockage_range (unit, insn))
+		     - function_units[unit].max_blockage);
+	}
+      if (tick - clock > cost)
+	cost = tick - clock;
+    }
+  return cost;
+}
+
+/* Record INSN as having begun execution on the units encoded by UNIT at
+   time CLOCK.  */
+
+__inline static void
+schedule_unit (unit, insn, clock)
+     int unit, clock;
+     rtx insn;
+{
+  int i;
+
+  if (unit >= 0)
+    {
+      int instance = unit;
+#if MAX_MULTIPLICITY > 1
+      /* Find the first free instance of the function unit and use that
+         one.  We assume that one is free.  */
+      for (i = function_units[unit].multiplicity - 1; i > 0; i--)
+	{
+	  if (!actual_hazard_this_instance (unit, instance, insn, clock, 0))
+	    break;
+	  instance += FUNCTION_UNITS_SIZE;
+	}
+#endif
+      unit_last_insn[instance] = insn;
+      unit_tick[instance] = (clock + function_units[unit].max_blockage);
+    }
+  else
+    for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
+      if ((unit & 1) != 0)
+	schedule_unit (i, insn, clock);
+}
+
+/* Return the actual hazard cost of executing INSN on the units encoded by
+   UNIT at time CLOCK if the previous actual hazard cost was COST.  */
+
+__inline static int
+actual_hazard (unit, insn, clock, cost)
+     int unit, clock, cost;
+     rtx insn;
+{
+  int i;
+
+  if (unit >= 0)
+    {
+      /* Find the instance of the function unit with the minimum hazard.  */
+      int instance = unit;
+      int best_cost = actual_hazard_this_instance (unit, instance, insn,
+						   clock, cost);
+      int this_cost;
+
+#if MAX_MULTIPLICITY > 1
+      if (best_cost > cost)
+	{
+	  for (i = function_units[unit].multiplicity - 1; i > 0; i--)
+	    {
+	      instance += FUNCTION_UNITS_SIZE;
+	      this_cost = actual_hazard_this_instance (unit, instance, insn,
+						       clock, cost);
+	      if (this_cost < best_cost)
+		{
+		  best_cost = this_cost;
+		  if (this_cost <= cost)
+		    break;
+		}
+	    }
+	}
+#endif
+      cost = MAX (cost, best_cost);
+    }
+  else
+    for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
+      if ((unit & 1) != 0)
+	cost = actual_hazard (i, insn, clock, cost);
+
+  return cost;
+}
+
+/* Return the potential hazard cost of executing an instruction on the
+   units encoded by UNIT if the previous potential hazard cost was COST.
+   An insn with a large blockage time is chosen in preference to one
+   with a smaller time; an insn that uses a unit that is more likely
+   to be used is chosen in preference to one with a unit that is less
+   used.  We are trying to minimize a subsequent actual hazard.  */
+
+__inline static int
+potential_hazard (unit, insn, cost)
+     int unit, cost;
+     rtx insn;
+{
+  int i, ncost;
+  unsigned int minb, maxb;
+
+  if (unit >= 0)
+    {
+      minb = maxb = function_units[unit].max_blockage;
+      if (maxb > 1)
+	{
+	  if (function_units[unit].blockage_range_function)
+	    {
+	      maxb = minb = blockage_range (unit, insn);
+	      maxb = MAX_BLOCKAGE_COST (maxb);
+	      minb = MIN_BLOCKAGE_COST (minb);
+	    }
+
+	  if (maxb > 1)
+	    {
+	      /* Make the number of instructions left dominate.  Make the
+	         minimum delay dominate the maximum delay.  If all these
+	         are the same, use the unit number to add an arbitrary
+	         ordering.  Other terms can be added.  */
+	      ncost = minb * 0x40 + maxb;
+	      ncost *= (unit_n_insns[unit] - 1) * 0x1000 + unit;
+	      if (ncost > cost)
+		cost = ncost;
+	    }
+	}
+    }
+  else
+    for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
+      if ((unit & 1) != 0)
+	cost = potential_hazard (i, insn, cost);
+
+  return cost;
+}
+
+/* Compute cost of executing INSN given the dependence LINK on the insn USED.
+   This is the number of cycles between instruction issue and
+   instruction results.  */
+
+__inline static int
+insn_cost (insn, link, used)
+     rtx insn, link, used;
+{
+  register int cost = INSN_COST (insn);
+
+  if (cost == 0)
+    {
+      recog_memoized (insn);
+
+      /* A USE insn, or something else we don't need to understand.
+         We can't pass these directly to result_ready_cost because it will
+         trigger a fatal error for unrecognizable insns.  */
+      if (INSN_CODE (insn) < 0)
+	{
+	  INSN_COST (insn) = 1;
+	  return 1;
+	}
+      else
+	{
+	  cost = result_ready_cost (insn);
+
+	  if (cost < 1)
+	    cost = 1;
+
+	  INSN_COST (insn) = cost;
+	}
+    }
+
+  /* in this case estimate cost without caring how insn is used.  */
+  if (link == 0 && used == 0)
+    return cost;
+
+  /* A USE insn should never require the value used to be computed.  This
+     allows the computation of a function's result and parameter values to
+     overlap the return and call.  */
+  recog_memoized (used);
+  if (INSN_CODE (used) < 0)
+    LINK_COST_FREE (link) = 1;
+
+  /* If some dependencies vary the cost, compute the adjustment.  Most
+     commonly, the adjustment is complete: either the cost is ignored
+     (in the case of an output- or anti-dependence), or the cost is
+     unchanged.  These values are cached in the link as LINK_COST_FREE
+     and LINK_COST_ZERO.  */
+
+  if (LINK_COST_FREE (link))
+    cost = 1;
+#ifdef ADJUST_COST
+  else if (!LINK_COST_ZERO (link))
+    {
+      int ncost = cost;
+
+      ADJUST_COST (used, link, insn, ncost);
+      if (ncost <= 1)
+	LINK_COST_FREE (link) = ncost = 1;
+      if (cost == ncost)
+	LINK_COST_ZERO (link) = 1;
+      cost = ncost;
+    }
+#endif
+  return cost;
+}
+
+/* Compute the priority number for INSN.  */
+
+static int
+priority (insn)
+     rtx insn;
+{
+  int this_priority;
+  rtx link;
+
+  if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+    return 0;
+
+  if ((this_priority = INSN_PRIORITY (insn)) == 0)
+    {
+      if (INSN_DEPEND (insn) == 0)
+	this_priority = insn_cost (insn, 0, 0);
+      else
+	for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
+	  {
+	    rtx next;
+	    int next_priority;
+
+	    next = XEXP (link, 0);
+
+	    /* critical path is meaningful in block boundaries only */
+	    if (INSN_BLOCK (next) != INSN_BLOCK (insn))
+	      continue;
+
+	    next_priority = insn_cost (insn, link, next) + priority (next);
+	    if (next_priority > this_priority)
+	      this_priority = next_priority;
+	  }
+      INSN_PRIORITY (insn) = this_priority;
+    }
+  return this_priority;
+}
+
+
+/* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
+   them to the unused_*_list variables, so that they can be reused.  */
+
+__inline static void
+free_pnd_lst (listp, unused_listp)
+     rtx *listp, *unused_listp;
+{
+  register rtx link, prev_link;
+
+  if (*listp == 0)
+    return;
+
+  prev_link = *listp;
+  link = XEXP (prev_link, 1);
+
+  while (link)
+    {
+      prev_link = link;
+      link = XEXP (link, 1);
+    }
+
+  XEXP (prev_link, 1) = *unused_listp;
+  *unused_listp = *listp;
+  *listp = 0;
+}
+
+static void
+free_pending_lists ()
+{
+
+
+  if (current_nr_blocks <= 1)
+    {
+      free_pnd_lst (&pending_read_insns, &unused_insn_list);
+      free_pnd_lst (&pending_write_insns, &unused_insn_list);
+      free_pnd_lst (&pending_read_mems, &unused_expr_list);
+      free_pnd_lst (&pending_write_mems, &unused_expr_list);
+    }
+  else
+    {
+      /* interblock scheduling */
+      int bb;
+
+      for (bb = 0; bb < current_nr_blocks; bb++)
+	{
+	  free_pnd_lst (&bb_pending_read_insns[bb], &unused_insn_list);
+	  free_pnd_lst (&bb_pending_write_insns[bb], &unused_insn_list);
+	  free_pnd_lst (&bb_pending_read_mems[bb], &unused_expr_list);
+	  free_pnd_lst (&bb_pending_write_mems[bb], &unused_expr_list);
+	}
+    }
+}
+
+/* Add an INSN and MEM reference pair to a pending INSN_LIST and MEM_LIST.
+   The MEM is a memory reference contained within INSN, which we are saving
+   so that we can do memory aliasing on it.  */
+
+static void
+add_insn_mem_dependence (insn_list, mem_list, insn, mem)
+     rtx *insn_list, *mem_list, insn, mem;
+{
+  register rtx link;
+
+  if (unused_insn_list)
+    {
+      link = unused_insn_list;
+      unused_insn_list = XEXP (link, 1);
+    }
+  else
+    link = rtx_alloc (INSN_LIST);
+  XEXP (link, 0) = insn;
+  XEXP (link, 1) = *insn_list;
+  *insn_list = link;
+
+  if (unused_expr_list)
+    {
+      link = unused_expr_list;
+      unused_expr_list = XEXP (link, 1);
+    }
+  else
+    link = rtx_alloc (EXPR_LIST);
+  XEXP (link, 0) = mem;
+  XEXP (link, 1) = *mem_list;
+  *mem_list = link;
+
+  pending_lists_length++;
+}
+
+
+/* Make a dependency between every memory reference on the pending lists
+   and INSN, thus flushing the pending lists.  If ONLY_WRITE, don't flush
+   the read list.  */
+
+static void
+flush_pending_lists (insn, only_write)
+     rtx insn;
+     int only_write;
+{
+  rtx u;
+  rtx link;
+
+  while (pending_read_insns && ! only_write)
+    {
+      add_dependence (insn, XEXP (pending_read_insns, 0), REG_DEP_ANTI);
+
+      link = pending_read_insns;
+      pending_read_insns = XEXP (pending_read_insns, 1);
+      XEXP (link, 1) = unused_insn_list;
+      unused_insn_list = link;
+
+      link = pending_read_mems;
+      pending_read_mems = XEXP (pending_read_mems, 1);
+      XEXP (link, 1) = unused_expr_list;
+      unused_expr_list = link;
+    }
+  while (pending_write_insns)
+    {
+      add_dependence (insn, XEXP (pending_write_insns, 0), REG_DEP_ANTI);
+
+      link = pending_write_insns;
+      pending_write_insns = XEXP (pending_write_insns, 1);
+      XEXP (link, 1) = unused_insn_list;
+      unused_insn_list = link;
+
+      link = pending_write_mems;
+      pending_write_mems = XEXP (pending_write_mems, 1);
+      XEXP (link, 1) = unused_expr_list;
+      unused_expr_list = link;
+    }
+  pending_lists_length = 0;
+
+  /* last_pending_memory_flush is now a list of insns */
+  for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
+    add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+
+  last_pending_memory_flush =
+    gen_rtx (INSN_LIST, VOIDmode, insn, 0);
+}
+
+/* Analyze a single SET or CLOBBER rtx, X, creating all dependencies generated
+   by the write to the destination of X, and reads of everything mentioned.  */
+
+static void
+sched_analyze_1 (x, insn)
+     rtx x;
+     rtx insn;
+{
+  register int regno;
+  register rtx dest = SET_DEST (x);
+
+  if (dest == 0)
+    return;
+
+  while (GET_CODE (dest) == STRICT_LOW_PART || GET_CODE (dest) == SUBREG
+      || GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
+    {
+      if (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
+	{
+	  /* The second and third arguments are values read by this insn.  */
+	  sched_analyze_2 (XEXP (dest, 1), insn);
+	  sched_analyze_2 (XEXP (dest, 2), insn);
+	}
+      dest = SUBREG_REG (dest);
+    }
+
+  if (GET_CODE (dest) == REG)
+    {
+      register int i;
+
+      regno = REGNO (dest);
+
+      /* A hard reg in a wide mode may really be multiple registers.
+         If so, mark all of them just like the first.  */
+      if (regno < FIRST_PSEUDO_REGISTER)
+	{
+	  i = HARD_REGNO_NREGS (regno, GET_MODE (dest));
+	  while (--i >= 0)
+	    {
+	      rtx u;
+
+	      for (u = reg_last_uses[regno + i]; u; u = XEXP (u, 1))
+		add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+	      reg_last_uses[regno + i] = 0;
+
+	      for (u = reg_last_sets[regno + i]; u; u = XEXP (u, 1))
+		add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
+
+	      SET_REGNO_REG_SET (reg_pending_sets, regno + i);
+
+	      if ((call_used_regs[regno + i] || global_regs[regno + i]))
+		/* Function calls clobber all call_used regs.  */
+		for (u = last_function_call; u; u = XEXP (u, 1))
+		  add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+	    }
+	}
+      else
+	{
+	  rtx u;
+
+	  for (u = reg_last_uses[regno]; u; u = XEXP (u, 1))
+	    add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+	  reg_last_uses[regno] = 0;
+
+	  for (u = reg_last_sets[regno]; u; u = XEXP (u, 1))
+	    add_dependence (insn, XEXP (u, 0), REG_DEP_OUTPUT);
+
+	  SET_REGNO_REG_SET (reg_pending_sets, regno);
+
+	  /* Pseudos that are REG_EQUIV to something may be replaced
+	     by that during reloading.  We need only add dependencies for
+	     the address in the REG_EQUIV note.  */
+	  if (!reload_completed
+	      && reg_known_equiv_p[regno]
+	      && GET_CODE (reg_known_value[regno]) == MEM)
+	    sched_analyze_2 (XEXP (reg_known_value[regno], 0), insn);
+
+	  /* Don't let it cross a call after scheduling if it doesn't
+	     already cross one.  */
+
+	  if (REG_N_CALLS_CROSSED (regno) == 0)
+	    for (u = last_function_call; u; u = XEXP (u, 1))
+	      add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+	}
+    }
+  else if (GET_CODE (dest) == MEM)
+    {
+      /* Writing memory.  */
+
+      if (pending_lists_length > 32)
+	{
+	  /* Flush all pending reads and writes to prevent the pending lists
+	     from getting any larger.  Insn scheduling runs too slowly when
+	     these lists get long.  The number 32 was chosen because it
+	     seems like a reasonable number.  When compiling GCC with itself,
+	     this flush occurs 8 times for sparc, and 10 times for m88k using
+	     the number 32.  */
+	  flush_pending_lists (insn, 0);
+	}
+      else
+	{
+	  rtx u;
+	  rtx pending, pending_mem;
+
+	  pending = pending_read_insns;
+	  pending_mem = pending_read_mems;
+	  while (pending)
+	    {
+	      /* If a dependency already exists, don't create a new one.  */
+	      if (!find_insn_list (XEXP (pending, 0), LOG_LINKS (insn)))
+		if (anti_dependence (XEXP (pending_mem, 0), dest))
+		  add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
+
+	      pending = XEXP (pending, 1);
+	      pending_mem = XEXP (pending_mem, 1);
+	    }
+
+	  pending = pending_write_insns;
+	  pending_mem = pending_write_mems;
+	  while (pending)
+	    {
+	      /* If a dependency already exists, don't create a new one.  */
+	      if (!find_insn_list (XEXP (pending, 0), LOG_LINKS (insn)))
+		if (output_dependence (XEXP (pending_mem, 0), dest))
+		  add_dependence (insn, XEXP (pending, 0), REG_DEP_OUTPUT);
+
+	      pending = XEXP (pending, 1);
+	      pending_mem = XEXP (pending_mem, 1);
+	    }
+
+	  for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
+	    add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+
+	  add_insn_mem_dependence (&pending_write_insns, &pending_write_mems,
+				   insn, dest);
+	}
+      sched_analyze_2 (XEXP (dest, 0), insn);
+    }
+
+  /* Analyze reads.  */
+  if (GET_CODE (x) == SET)
+    sched_analyze_2 (SET_SRC (x), insn);
+}
+
+/* Analyze the uses of memory and registers in rtx X in INSN.  */
+
+static void
+sched_analyze_2 (x, insn)
+     rtx x;
+     rtx insn;
+{
+  register int i;
+  register int j;
+  register enum rtx_code code;
+  register char *fmt;
+
+  if (x == 0)
+    return;
+
+  code = GET_CODE (x);
+
+  switch (code)
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case SYMBOL_REF:
+    case CONST:
+    case LABEL_REF:
+      /* Ignore constants.  Note that we must handle CONST_DOUBLE here
+         because it may have a cc0_rtx in its CONST_DOUBLE_CHAIN field, but
+         this does not mean that this insn is using cc0.  */
+      return;
+
+#ifdef HAVE_cc0
+    case CC0:
+      {
+	rtx link, prev;
+
+	/* User of CC0 depends on immediately preceding insn.  */
+	SCHED_GROUP_P (insn) = 1;
+
+	/* There may be a note before this insn now, but all notes will
+	   be removed before we actually try to schedule the insns, so
+	   it won't cause a problem later.  We must avoid it here though.  */
+	prev = prev_nonnote_insn (insn);
+
+	/* Make a copy of all dependencies on the immediately previous insn,
+	   and add to this insn.  This is so that all the dependencies will
+	   apply to the group.  Remove an explicit dependence on this insn
+	   as SCHED_GROUP_P now represents it.  */
+
+	if (find_insn_list (prev, LOG_LINKS (insn)))
+	  remove_dependence (insn, prev);
+
+	for (link = LOG_LINKS (prev); link; link = XEXP (link, 1))
+	  add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
+
+	return;
+      }
+#endif
+
+    case REG:
+      {
+	rtx u;
+	int regno = REGNO (x);
+	if (regno < FIRST_PSEUDO_REGISTER)
+	  {
+	    int i;
+
+	    i = HARD_REGNO_NREGS (regno, GET_MODE (x));
+	    while (--i >= 0)
+	      {
+		reg_last_uses[regno + i]
+		  = gen_rtx (INSN_LIST, VOIDmode,
+			     insn, reg_last_uses[regno + i]);
+
+		for (u = reg_last_sets[regno + i]; u; u = XEXP (u, 1))
+		  add_dependence (insn, XEXP (u, 0), 0);
+
+		if ((call_used_regs[regno + i] || global_regs[regno + i]))
+		  /* Function calls clobber all call_used regs.  */
+		  for (u = last_function_call; u; u = XEXP (u, 1))
+		    add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+	      }
+	  }
+	else
+	  {
+	    reg_last_uses[regno]
+	      = gen_rtx (INSN_LIST, VOIDmode, insn, reg_last_uses[regno]);
+
+	    for (u = reg_last_sets[regno]; u; u = XEXP (u, 1))
+	      add_dependence (insn, XEXP (u, 0), 0);
+
+	    /* Pseudos that are REG_EQUIV to something may be replaced
+	       by that during reloading.  We need only add dependencies for
+	       the address in the REG_EQUIV note.  */
+	    if (!reload_completed
+		&& reg_known_equiv_p[regno]
+		&& GET_CODE (reg_known_value[regno]) == MEM)
+	      sched_analyze_2 (XEXP (reg_known_value[regno], 0), insn);
+
+	    /* If the register does not already cross any calls, then add this
+	       insn to the sched_before_next_call list so that it will still
+	       not cross calls after scheduling.  */
+	    if (REG_N_CALLS_CROSSED (regno) == 0)
+	      add_dependence (sched_before_next_call, insn, REG_DEP_ANTI);
+	  }
+	return;
+      }
+
+    case MEM:
+      {
+	/* Reading memory.  */
+	rtx u;
+	rtx pending, pending_mem;
+
+	pending = pending_read_insns;
+	pending_mem = pending_read_mems;
+	while (pending)
+	  {
+	    /* If a dependency already exists, don't create a new one.  */
+	    if (!find_insn_list (XEXP (pending, 0), LOG_LINKS (insn)))
+	      if (read_dependence (XEXP (pending_mem, 0), x))
+		add_dependence (insn, XEXP (pending, 0), REG_DEP_ANTI);
+
+	    pending = XEXP (pending, 1);
+	    pending_mem = XEXP (pending_mem, 1);
+	  }
+
+	pending = pending_write_insns;
+	pending_mem = pending_write_mems;
+	while (pending)
+	  {
+	    /* If a dependency already exists, don't create a new one.  */
+	    if (!find_insn_list (XEXP (pending, 0), LOG_LINKS (insn)))
+	      if (true_dependence (XEXP (pending_mem, 0), VOIDmode,
+		  x, rtx_varies_p))
+		add_dependence (insn, XEXP (pending, 0), 0);
+
+	    pending = XEXP (pending, 1);
+	    pending_mem = XEXP (pending_mem, 1);
+	  }
+
+	for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
+	  add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+
+	/* Always add these dependencies to pending_reads, since
+	   this insn may be followed by a write.  */
+	add_insn_mem_dependence (&pending_read_insns, &pending_read_mems,
+				 insn, x);
+
+	/* Take advantage of tail recursion here.  */
+	sched_analyze_2 (XEXP (x, 0), insn);
+	return;
+      }
+
+    case ASM_OPERANDS:
+    case ASM_INPUT:
+    case UNSPEC_VOLATILE:
+    case TRAP_IF:
+      {
+	rtx u;
+
+	/* Traditional and volatile asm instructions must be considered to use
+	   and clobber all hard registers, all pseudo-registers and all of
+	   memory.  So must TRAP_IF and UNSPEC_VOLATILE operations.
+
+	   Consider for instance a volatile asm that changes the fpu rounding
+	   mode.  An insn should not be moved across this even if it only uses
+	   pseudo-regs because it might give an incorrectly rounded result.  */
+	if (code != ASM_OPERANDS || MEM_VOLATILE_P (x))
+	  {
+	    int max_reg = max_reg_num ();
+	    for (i = 0; i < max_reg; i++)
+	      {
+		for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
+		  add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+		reg_last_uses[i] = 0;
+
+		/* reg_last_sets[r] is now a list of insns */
+		for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
+		  add_dependence (insn, XEXP (u, 0), 0);
+	      }
+	    reg_pending_sets_all = 1;
+
+	    flush_pending_lists (insn, 0);
+	  }
+
+	/* For all ASM_OPERANDS, we must traverse the vector of input operands.
+	   We can not just fall through here since then we would be confused
+	   by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate
+	   traditional asms unlike their normal usage.  */
+
+	if (code == ASM_OPERANDS)
+	  {
+	    for (j = 0; j < ASM_OPERANDS_INPUT_LENGTH (x); j++)
+	      sched_analyze_2 (ASM_OPERANDS_INPUT (x, j), insn);
+	    return;
+	  }
+	break;
+      }
+
+    case PRE_DEC:
+    case POST_DEC:
+    case PRE_INC:
+    case POST_INC:
+      /* These both read and modify the result.  We must handle them as writes
+         to get proper dependencies for following instructions.  We must handle
+         them as reads to get proper dependencies from this to previous
+         instructions.  Thus we need to pass them to both sched_analyze_1
+         and sched_analyze_2.  We must call sched_analyze_2 first in order
+         to get the proper antecedent for the read.  */
+      sched_analyze_2 (XEXP (x, 0), insn);
+      sched_analyze_1 (x, insn);
+      return;
+    }
+
+  /* Other cases: walk the insn.  */
+  fmt = GET_RTX_FORMAT (code);
+  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	sched_analyze_2 (XEXP (x, i), insn);
+      else if (fmt[i] == 'E')
+	for (j = 0; j < XVECLEN (x, i); j++)
+	  sched_analyze_2 (XVECEXP (x, i, j), insn);
+    }
+}
+
+/* Analyze an INSN with pattern X to find all dependencies.  */
+
+static void
+sched_analyze_insn (x, insn, loop_notes)
+     rtx x, insn;
+     rtx loop_notes;
+{
+  register RTX_CODE code = GET_CODE (x);
+  rtx link;
+  int maxreg = max_reg_num ();
+  int i;
+
+  if (code == SET || code == CLOBBER)
+    sched_analyze_1 (x, insn);
+  else if (code == PARALLEL)
+    {
+      register int i;
+      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+	{
+	  code = GET_CODE (XVECEXP (x, 0, i));
+	  if (code == SET || code == CLOBBER)
+	    sched_analyze_1 (XVECEXP (x, 0, i), insn);
+	  else
+	    sched_analyze_2 (XVECEXP (x, 0, i), insn);
+	}
+    }
+  else
+    sched_analyze_2 (x, insn);
+
+  /* Mark registers CLOBBERED or used by called function.  */
+  if (GET_CODE (insn) == CALL_INSN)
+    for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
+      {
+	if (GET_CODE (XEXP (link, 0)) == CLOBBER)
+	  sched_analyze_1 (XEXP (link, 0), insn);
+	else
+	  sched_analyze_2 (XEXP (link, 0), insn);
+      }
+
+  /* If there is a {LOOP,EHREGION}_{BEG,END} note in the middle of a basic block, then
+     we must be sure that no instructions are scheduled across it.
+     Otherwise, the reg_n_refs info (which depends on loop_depth) would
+     become incorrect.  */
+
+  if (loop_notes)
+    {
+      int max_reg = max_reg_num ();
+      rtx link;
+
+      for (i = 0; i < max_reg; i++)
+	{
+	  rtx u;
+	  for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
+	    add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+	  reg_last_uses[i] = 0;
+
+	  /* reg_last_sets[r] is now a list of insns */
+	  for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
+	    add_dependence (insn, XEXP (u, 0), 0);
+	}
+      reg_pending_sets_all = 1;
+
+      flush_pending_lists (insn, 0);
+
+      link = loop_notes;
+      while (XEXP (link, 1))
+	link = XEXP (link, 1);
+      XEXP (link, 1) = REG_NOTES (insn);
+      REG_NOTES (insn) = loop_notes;
+    }
+
+  /* After reload, it is possible for an instruction to have a REG_DEAD note
+     for a register that actually dies a few instructions earlier.  For
+     example, this can happen with SECONDARY_MEMORY_NEEDED reloads.
+     In this case, we must consider the insn to use the register mentioned
+     in the REG_DEAD note.  Otherwise, we may accidentally move this insn
+     after another insn that sets the register, thus getting obviously invalid
+     rtl.  This confuses reorg which believes that REG_DEAD notes are still
+     meaningful.
+
+     ??? We would get better code if we fixed reload to put the REG_DEAD
+     notes in the right places, but that may not be worth the effort.  */
+
+  if (reload_completed)
+    {
+      rtx note;
+
+      for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+	if (REG_NOTE_KIND (note) == REG_DEAD)
+	  sched_analyze_2 (XEXP (note, 0), insn);
+    }
+
+  EXECUTE_IF_SET_IN_REG_SET (reg_pending_sets, 0, i,
+			     {
+			       /* reg_last_sets[r] is now a list of insns */
+			       reg_last_sets[i]
+				 = gen_rtx (INSN_LIST, VOIDmode, insn, 0);
+			     });
+  CLEAR_REG_SET (reg_pending_sets);
+
+  if (reg_pending_sets_all)
+    {
+      for (i = 0; i < maxreg; i++)
+
+	/* reg_last_sets[r] is now a list of insns */
+	reg_last_sets[i]
+	  = gen_rtx (INSN_LIST, VOIDmode, insn, 0);
+
+      reg_pending_sets_all = 0;
+    }
+
+  /* Handle function calls and function returns created by the epilogue
+     threading code.  */
+  if (GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN)
+    {
+      rtx dep_insn;
+      rtx prev_dep_insn;
+
+      /* When scheduling instructions, we make sure calls don't lose their
+         accompanying USE insns by depending them one on another in order.
+
+         Also, we must do the same thing for returns created by the epilogue
+         threading code.  Note this code works only in this special case,
+         because other passes make no guarantee that they will never emit
+         an instruction between a USE and a RETURN.  There is such a guarantee
+         for USE instructions immediately before a call.  */
+
+      prev_dep_insn = insn;
+      dep_insn = PREV_INSN (insn);
+      while (GET_CODE (dep_insn) == INSN
+	     && GET_CODE (PATTERN (dep_insn)) == USE
+	     && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == REG)
+	{
+	  SCHED_GROUP_P (prev_dep_insn) = 1;
+
+	  /* Make a copy of all dependencies on dep_insn, and add to insn.
+	     This is so that all of the dependencies will apply to the
+	     group.  */
+
+	  for (link = LOG_LINKS (dep_insn); link; link = XEXP (link, 1))
+	    add_dependence (insn, XEXP (link, 0), REG_NOTE_KIND (link));
+
+	  prev_dep_insn = dep_insn;
+	  dep_insn = PREV_INSN (dep_insn);
+	}
+    }
+}
+
+/* Analyze every insn between HEAD and TAIL inclusive, creating LOG_LINKS
+   for every dependency.  */
+
+static void
+sched_analyze (head, tail)
+     rtx head, tail;
+{
+  register rtx insn;
+  register rtx u;
+  rtx loop_notes = 0;
+
+  for (insn = head;; insn = NEXT_INSN (insn))
+    {
+      if (GET_CODE (insn) == INSN || GET_CODE (insn) == JUMP_INSN)
+	{
+	  sched_analyze_insn (PATTERN (insn), insn, loop_notes);
+	  loop_notes = 0;
+	}
+      else if (GET_CODE (insn) == CALL_INSN)
+	{
+	  rtx x;
+	  register int i;
+
+	  CANT_MOVE (insn) = 1;
+
+	  /* Any instruction using a hard register which may get clobbered
+	     by a call needs to be marked as dependent on this call.
+	     This prevents a use of a hard return reg from being moved
+	     past a void call (i.e. it does not explicitly set the hard
+	     return reg).  */
+
+	  /* If this call is followed by a NOTE_INSN_SETJMP, then assume that
+	     all registers, not just hard registers, may be clobbered by this
+	     call.  */
+
+	  /* Insn, being a CALL_INSN, magically depends on
+	     `last_function_call' already.  */
+
+	  if (NEXT_INSN (insn) && GET_CODE (NEXT_INSN (insn)) == NOTE
+	      && NOTE_LINE_NUMBER (NEXT_INSN (insn)) == NOTE_INSN_SETJMP)
+	    {
+	      int max_reg = max_reg_num ();
+	      for (i = 0; i < max_reg; i++)
+		{
+		  for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
+		    add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+
+		  reg_last_uses[i] = 0;
+
+		  /* reg_last_sets[r] is now a list of insns */
+		  for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
+		    add_dependence (insn, XEXP (u, 0), 0);
+		}
+	      reg_pending_sets_all = 1;
+
+	      /* Add a pair of fake REG_NOTE which we will later
+		 convert back into a NOTE_INSN_SETJMP note.  See
+		 reemit_notes for why we use a pair of NOTEs.  */
+	      REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_DEAD,
+					  GEN_INT (0),
+					  REG_NOTES (insn));
+	      REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_DEAD,
+					  GEN_INT (NOTE_INSN_SETJMP),
+					  REG_NOTES (insn));
+	    }
+	  else
+	    {
+	      for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+		if (call_used_regs[i] || global_regs[i])
+		  {
+		    for (u = reg_last_uses[i]; u; u = XEXP (u, 1))
+		      add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+		    reg_last_uses[i] = 0;
+
+		    /* reg_last_sets[r] is now a list of insns */
+		    for (u = reg_last_sets[i]; u; u = XEXP (u, 1))
+		      add_dependence (insn, XEXP (u, 0), REG_DEP_ANTI);
+
+		    SET_REGNO_REG_SET (reg_pending_sets, i);
+		  }
+	    }
+
+	  /* For each insn which shouldn't cross a call, add a dependence
+	     between that insn and this call insn.  */
+	  x = LOG_LINKS (sched_before_next_call);
+	  while (x)
+	    {
+	      add_dependence (insn, XEXP (x, 0), REG_DEP_ANTI);
+	      x = XEXP (x, 1);
+	    }
+	  LOG_LINKS (sched_before_next_call) = 0;
+
+	  sched_analyze_insn (PATTERN (insn), insn, loop_notes);
+	  loop_notes = 0;
+
+	  /* In the absence of interprocedural alias analysis, we must flush
+	     all pending reads and writes, and start new dependencies starting
+	     from here.  But only flush writes for constant calls (which may
+	     be passed a pointer to something we haven't written yet).  */
+	  flush_pending_lists (insn, CONST_CALL_P (insn));
+
+	  /* Depend this function call (actually, the user of this
+	     function call) on all hard register clobberage.  */
+
+	  /* last_function_call is now a list of insns */
+	  last_function_call
+	    = gen_rtx (INSN_LIST, VOIDmode, insn, 0);
+	}
+
+      /* See comments on reemit_notes as to why we do this.  */
+      else if (GET_CODE (insn) == NOTE
+	       && (NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG
+		   || NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_END
+		   || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_BEG
+		   || NOTE_LINE_NUMBER (insn) == NOTE_INSN_EH_REGION_END
+		   || (NOTE_LINE_NUMBER (insn) == NOTE_INSN_SETJMP
+		       && GET_CODE (PREV_INSN (insn)) != CALL_INSN)))
+	{
+	  loop_notes = gen_rtx (EXPR_LIST, REG_DEAD,
+				GEN_INT (NOTE_BLOCK_NUMBER (insn)), loop_notes);
+	  loop_notes = gen_rtx (EXPR_LIST, REG_DEAD,
+				GEN_INT (NOTE_LINE_NUMBER (insn)), loop_notes);
+	  CONST_CALL_P (loop_notes) = CONST_CALL_P (insn);
+	}
+
+      if (insn == tail)
+	return;
+    }
+  abort ();
+}
+
+/* Called when we see a set of a register.  If death is true, then we are
+   scanning backwards.  Mark that register as unborn.  If nobody says
+   otherwise, that is how things will remain.  If death is false, then we
+   are scanning forwards.  Mark that register as being born.  */
+
+static void
+sched_note_set (b, x, death)
+     int b;
+     rtx x;
+     int death;
+{
+  register int regno;
+  register rtx reg = SET_DEST (x);
+  int subreg_p = 0;
+
+  if (reg == 0)
+    return;
+
+  while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == STRICT_LOW_PART
+	 || GET_CODE (reg) == SIGN_EXTRACT || GET_CODE (reg) == ZERO_EXTRACT)
+    {
+      /* Must treat modification of just one hardware register of a multi-reg
+         value or just a byte field of a register exactly the same way that
+         mark_set_1 in flow.c does, i.e. anything except a paradoxical subreg
+         does not kill the entire register.  */
+      if (GET_CODE (reg) != SUBREG
+	  || REG_SIZE (SUBREG_REG (reg)) > REG_SIZE (reg))
+	subreg_p = 1;
+
+      reg = SUBREG_REG (reg);
+    }
+
+  if (GET_CODE (reg) != REG)
+    return;
+
+  /* Global registers are always live, so the code below does not apply
+     to them.  */
+
+  regno = REGNO (reg);
+  if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
+    {
+      if (death)
+	{
+	  /* If we only set part of the register, then this set does not
+	     kill it.  */
+	  if (subreg_p)
+	    return;
+
+	  /* Try killing this register.  */
+	  if (regno < FIRST_PSEUDO_REGISTER)
+	    {
+	      int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
+	      while (--j >= 0)
+		{
+		  CLEAR_REGNO_REG_SET (bb_live_regs, regno + j);
+		}
+	    }
+	  else
+	    {
+	      /* Recompute REG_BASIC_BLOCK as we update all the other
+		 dataflow information.  */
+	      if (sched_reg_basic_block[regno] == REG_BLOCK_UNKNOWN)
+		sched_reg_basic_block[regno] = current_block_num;
+	      else if (sched_reg_basic_block[regno] != current_block_num)
+		sched_reg_basic_block[regno] = REG_BLOCK_GLOBAL;
+
+	      CLEAR_REGNO_REG_SET (bb_live_regs, regno);
+	    }
+	}
+      else
+	{
+	  /* Make the register live again.  */
+	  if (regno < FIRST_PSEUDO_REGISTER)
+	    {
+	      int j = HARD_REGNO_NREGS (regno, GET_MODE (reg));
+	      while (--j >= 0)
+		{
+		  SET_REGNO_REG_SET (bb_live_regs, regno + j);
+		}
+	    }
+	  else
+	    {
+	      SET_REGNO_REG_SET (bb_live_regs, regno);
+	    }
+	}
+    }
+}
+
+/* Macros and functions for keeping the priority queue sorted, and
+   dealing with queueing and dequeueing of instructions.  */
+
+#define SCHED_SORT(READY, N_READY)                                   \
+do { if ((N_READY) == 2)				             \
+       swap_sort (READY, N_READY);			             \
+     else if ((N_READY) > 2)                                         \
+         qsort (READY, N_READY, sizeof (rtx), rank_for_schedule); }  \
+while (0)
+
+/* Returns a positive value if x is preferred; returns a negative value if
+   y is preferred.  Should never return 0, since that will make the sort
+   unstable.  */
+
+static int
+rank_for_schedule (x, y)
+     rtx *x, *y;
+{
+  rtx tmp = *y;
+  rtx tmp2 = *x;
+  rtx link;
+  int tmp_class, tmp2_class;
+  int val, priority_val, spec_val, prob_val, weight_val;
+
+
+  /* schedule reverse is a stress test of the scheduler correctness,
+     controlled by -fsched-reverse option.  */
+  if ((reload_completed && flag_schedule_reverse_after_reload) ||
+      (!reload_completed && flag_schedule_reverse_before_reload))
+    return INSN_LUID (tmp2) - INSN_LUID (tmp);
+
+  /* prefer insn with higher priority */
+  priority_val = INSN_PRIORITY (tmp2) - INSN_PRIORITY (tmp);
+  if (priority_val)
+    return priority_val;
+
+  /* prefer an insn with smaller contribution to registers-pressure */
+  if (!reload_completed &&
+      (weight_val = INSN_REG_WEIGHT (tmp) - INSN_REG_WEIGHT (tmp2)))
+    return (weight_val);
+
+  /* some comparison make sense in interblock scheduling only */
+  if (INSN_BB (tmp) != INSN_BB (tmp2))
+    {
+      /* prefer an inblock motion on an interblock motion */
+      if ((INSN_BB (tmp2) == target_bb) && (INSN_BB (tmp) != target_bb))
+	return 1;
+      if ((INSN_BB (tmp) == target_bb) && (INSN_BB (tmp2) != target_bb))
+	return -1;
+
+      /* prefer a useful motion on a speculative one */
+      if ((spec_val = IS_SPECULATIVE_INSN (tmp) - IS_SPECULATIVE_INSN (tmp2)))
+	return (spec_val);
+
+      /* prefer a more probable (speculative) insn */
+      prob_val = INSN_PROBABILITY (tmp2) - INSN_PROBABILITY (tmp);
+      if (prob_val)
+	return (prob_val);
+    }
+
+  /* compare insns based on their relation to the last-scheduled-insn */
+  if (last_scheduled_insn)
+    {
+      /* Classify the instructions into three classes:
+         1) Data dependent on last schedule insn.
+         2) Anti/Output dependent on last scheduled insn.
+         3) Independent of last scheduled insn, or has latency of one.
+         Choose the insn from the highest numbered class if different.  */
+      link = find_insn_list (tmp, INSN_DEPEND (last_scheduled_insn));
+      if (link == 0 || insn_cost (last_scheduled_insn, link, tmp) == 1)
+	tmp_class = 3;
+      else if (REG_NOTE_KIND (link) == 0)	/* Data dependence.  */
+	tmp_class = 1;
+      else
+	tmp_class = 2;
+
+      link = find_insn_list (tmp2, INSN_DEPEND (last_scheduled_insn));
+      if (link == 0 || insn_cost (last_scheduled_insn, link, tmp2) == 1)
+	tmp2_class = 3;
+      else if (REG_NOTE_KIND (link) == 0)	/* Data dependence.  */
+	tmp2_class = 1;
+      else
+	tmp2_class = 2;
+
+      if ((val = tmp2_class - tmp_class))
+	return val;
+    }
+
+  /* If insns are equally good, sort by INSN_LUID (original insn order),
+     so that we make the sort stable.  This minimizes instruction movement,
+     thus minimizing sched's effect on debugging and cross-jumping.  */
+  return INSN_LUID (tmp) - INSN_LUID (tmp2);
+}
+
+/* Resort the array A in which only element at index N may be out of order.  */
+
+__inline static void
+swap_sort (a, n)
+     rtx *a;
+     int n;
+{
+  rtx insn = a[n - 1];
+  int i = n - 2;
+
+  while (i >= 0 && rank_for_schedule (a + i, &insn) >= 0)
+    {
+      a[i + 1] = a[i];
+      i -= 1;
+    }
+  a[i + 1] = insn;
+}
+
+static int max_priority;
+
+/* Add INSN to the insn queue so that it can be executed at least
+   N_CYCLES after the currently executing insn.  Preserve insns
+   chain for debugging purposes.  */
+
+__inline static void
+queue_insn (insn, n_cycles)
+     rtx insn;
+     int n_cycles;
+{
+  int next_q = NEXT_Q_AFTER (q_ptr, n_cycles);
+  rtx link = rtx_alloc (INSN_LIST);
+  XEXP (link, 0) = insn;
+  XEXP (link, 1) = insn_queue[next_q];
+  insn_queue[next_q] = link;
+  q_size += 1;
+
+  if (sched_verbose >= 2)
+    {
+      fprintf (dump, ";;\t\tReady-->Q: insn %d: ", INSN_UID (insn));
+
+      if (INSN_BB (insn) != target_bb)
+	fprintf (dump, "(b%d) ", INSN_BLOCK (insn));
+
+      fprintf (dump, "queued for %d cycles.\n", n_cycles);
+    }
+
+}
+
+/* Return nonzero if PAT is the pattern of an insn which makes a
+   register live.  */
+
+__inline static int
+birthing_insn_p (pat)
+     rtx pat;
+{
+  int j;
+
+  if (reload_completed == 1)
+    return 0;
+
+  if (GET_CODE (pat) == SET
+      && GET_CODE (SET_DEST (pat)) == REG)
+    {
+      rtx dest = SET_DEST (pat);
+      int i = REGNO (dest);
+
+      /* It would be more accurate to use refers_to_regno_p or
+         reg_mentioned_p to determine when the dest is not live before this
+         insn.  */
+
+      if (REGNO_REG_SET_P (bb_live_regs, i))
+	return (REG_N_SETS (i) == 1);
+
+      return 0;
+    }
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      for (j = 0; j < XVECLEN (pat, 0); j++)
+	if (birthing_insn_p (XVECEXP (pat, 0, j)))
+	  return 1;
+    }
+  return 0;
+}
+
+/* PREV is an insn that is ready to execute.  Adjust its priority if that
+   will help shorten register lifetimes.  */
+
+__inline static void
+adjust_priority (prev)
+     rtx prev;
+{
+  /* Trying to shorten register lives after reload has completed
+     is useless and wrong.  It gives inaccurate schedules.  */
+  if (reload_completed == 0)
+    {
+      rtx note;
+      int n_deaths = 0;
+
+      /* ??? This code has no effect, because REG_DEAD notes are removed
+	 before we ever get here.  */
+      for (note = REG_NOTES (prev); note; note = XEXP (note, 1))
+	if (REG_NOTE_KIND (note) == REG_DEAD)
+	  n_deaths += 1;
+
+      /* Defer scheduling insns which kill registers, since that
+	 shortens register lives.  Prefer scheduling insns which
+	 make registers live for the same reason.  */
+      switch (n_deaths)
+	{
+	default:
+	  INSN_PRIORITY (prev) >>= 3;
+	  break;
+	case 3:
+	  INSN_PRIORITY (prev) >>= 2;
+	  break;
+	case 2:
+	case 1:
+	  INSN_PRIORITY (prev) >>= 1;
+	  break;
+	case 0:
+	  if (birthing_insn_p (PATTERN (prev)))
+	    {
+	      int max = max_priority;
+
+	      if (max > INSN_PRIORITY (prev))
+		INSN_PRIORITY (prev) = max;
+	    }
+	  break;
+	}
+#ifdef ADJUST_PRIORITY
+      ADJUST_PRIORITY (prev);
+#endif
+    }
+}
+
+/* INSN is the "currently executing insn".  Launch each insn which was
+   waiting on INSN.  READY is a vector of insns which are ready to fire.
+   N_READY is the number of elements in READY.  CLOCK is the current
+   cycle.  */
+
+static int
+schedule_insn (insn, ready, n_ready, clock)
+     rtx insn;
+     rtx *ready;
+     int n_ready;
+     int clock;
+{
+  rtx link;
+  int unit;
+
+  unit = insn_unit (insn);
+
+  if (sched_verbose >= 2)
+    {
+      fprintf (dump, ";;\t\t--> scheduling insn <<<%d>>> on unit ", INSN_UID (insn));
+      insn_print_units (insn);
+      fprintf (dump, "\n");
+    }
+
+  if (sched_verbose && unit == -1)
+    visualize_no_unit (insn);
+
+  if (MAX_BLOCKAGE > 1 || issue_rate > 1 || sched_verbose)
+    schedule_unit (unit, insn, clock);
+
+  if (INSN_DEPEND (insn) == 0)
+    return n_ready;
+
+  /* This is used by the function adjust_priority above.  */
+  if (n_ready > 0)
+    max_priority = MAX (INSN_PRIORITY (ready[0]), INSN_PRIORITY (insn));
+  else
+    max_priority = INSN_PRIORITY (insn);
+
+  for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
+    {
+      rtx next = XEXP (link, 0);
+      int cost = insn_cost (insn, link, next);
+
+      INSN_TICK (next) = MAX (INSN_TICK (next), clock + cost);
+
+      if ((INSN_DEP_COUNT (next) -= 1) == 0)
+	{
+	  int effective_cost = INSN_TICK (next) - clock;
+
+	  /* For speculative insns, before inserting to ready/queue,
+	     check live, exception-free, and issue-delay */
+	  if (INSN_BB (next) != target_bb
+	      && (!IS_VALID (INSN_BB (next))
+		  || CANT_MOVE (next)
+		  || (IS_SPECULATIVE_INSN (next)
+		      && (insn_issue_delay (next) > 3
+			  || !check_live (next, INSN_BB (next), target_bb)
+		 || !is_exception_free (next, INSN_BB (next), target_bb)))))
+	    continue;
+
+	  if (sched_verbose >= 2)
+	    {
+	      fprintf (dump, ";;\t\tdependences resolved: insn %d ", INSN_UID (next));
+
+	      if (current_nr_blocks > 1 && INSN_BB (next) != target_bb)
+		fprintf (dump, "/b%d ", INSN_BLOCK (next));
+
+	      if (effective_cost <= 1)
+		fprintf (dump, "into ready\n");
+	      else
+		fprintf (dump, "into queue with cost=%d\n", effective_cost);
+	    }
+
+	  /* Adjust the priority of NEXT and either put it on the ready
+	     list or queue it.  */
+	  adjust_priority (next);
+	  if (effective_cost <= 1)
+	    ready[n_ready++] = next;
+	  else
+	    queue_insn (next, effective_cost);
+	}
+    }
+
+  return n_ready;
+}
+
+
+/* Add a REG_DEAD note for REG to INSN, reusing a REG_DEAD note from the
+   dead_notes list.  */
+
+static void
+create_reg_dead_note (reg, insn)
+     rtx reg, insn;
+{
+  rtx link;
+
+  /* The number of registers killed after scheduling must be the same as the
+     number of registers killed before scheduling.  The number of REG_DEAD
+     notes may not be conserved, i.e. two SImode hard register REG_DEAD notes
+     might become one DImode hard register REG_DEAD note, but the number of
+     registers killed will be conserved.
+
+     We carefully remove REG_DEAD notes from the dead_notes list, so that
+     there will be none left at the end.  If we run out early, then there
+     is a bug somewhere in flow, combine and/or sched.  */
+
+  if (dead_notes == 0)
+    {
+      if (current_nr_blocks <= 1)
+	abort ();
+      else
+	{
+	  link = rtx_alloc (EXPR_LIST);
+	  PUT_REG_NOTE_KIND (link, REG_DEAD);
+	}
+    }
+  else
+    {
+      /* Number of regs killed by REG.  */
+      int regs_killed = (REGNO (reg) >= FIRST_PSEUDO_REGISTER ? 1
+			 : HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg)));
+      /* Number of regs killed by REG_DEAD notes taken off the list.  */
+      int reg_note_regs;
+
+      link = dead_notes;
+      reg_note_regs = (REGNO (XEXP (link, 0)) >= FIRST_PSEUDO_REGISTER ? 1
+		       : HARD_REGNO_NREGS (REGNO (XEXP (link, 0)),
+					   GET_MODE (XEXP (link, 0))));
+      while (reg_note_regs < regs_killed)
+	{
+	  link = XEXP (link, 1);
+	  reg_note_regs += (REGNO (XEXP (link, 0)) >= FIRST_PSEUDO_REGISTER ? 1
+			    : HARD_REGNO_NREGS (REGNO (XEXP (link, 0)),
+						GET_MODE (XEXP (link, 0))));
+	}
+      dead_notes = XEXP (link, 1);
+
+      /* If we took too many regs kills off, put the extra ones back.  */
+      while (reg_note_regs > regs_killed)
+	{
+	  rtx temp_reg, temp_link;
+
+	  temp_reg = gen_rtx (REG, word_mode, 0);
+	  temp_link = rtx_alloc (EXPR_LIST);
+	  PUT_REG_NOTE_KIND (temp_link, REG_DEAD);
+	  XEXP (temp_link, 0) = temp_reg;
+	  XEXP (temp_link, 1) = dead_notes;
+	  dead_notes = temp_link;
+	  reg_note_regs--;
+	}
+    }
+
+  XEXP (link, 0) = reg;
+  XEXP (link, 1) = REG_NOTES (insn);
+  REG_NOTES (insn) = link;
+}
+
+/* Subroutine on attach_deaths_insn--handles the recursive search
+   through INSN.  If SET_P is true, then x is being modified by the insn.  */
+
+static void
+attach_deaths (x, insn, set_p)
+     rtx x;
+     rtx insn;
+     int set_p;
+{
+  register int i;
+  register int j;
+  register enum rtx_code code;
+  register char *fmt;
+
+  if (x == 0)
+    return;
+
+  code = GET_CODE (x);
+
+  switch (code)
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST:
+    case CODE_LABEL:
+    case PC:
+    case CC0:
+      /* Get rid of the easy cases first.  */
+      return;
+
+    case REG:
+      {
+	/* If the register dies in this insn, queue that note, and mark
+	   this register as needing to die.  */
+	/* This code is very similar to mark_used_1 (if set_p is false)
+	   and mark_set_1 (if set_p is true) in flow.c.  */
+
+	register int regno;
+	int some_needed;
+	int all_needed;
+
+	if (set_p)
+	  return;
+
+	regno = REGNO (x);
+	all_needed = some_needed = REGNO_REG_SET_P (old_live_regs, regno);
+	if (regno < FIRST_PSEUDO_REGISTER)
+	  {
+	    int n;
+
+	    n = HARD_REGNO_NREGS (regno, GET_MODE (x));
+	    while (--n > 0)
+	      {
+		int needed = (REGNO_REG_SET_P (old_live_regs, regno + n));
+		some_needed |= needed;
+		all_needed &= needed;
+	      }
+	  }
+
+	/* If it wasn't live before we started, then add a REG_DEAD note.
+	   We must check the previous lifetime info not the current info,
+	   because we may have to execute this code several times, e.g.
+	   once for a clobber (which doesn't add a note) and later
+	   for a use (which does add a note).
+
+	   Always make the register live.  We must do this even if it was
+	   live before, because this may be an insn which sets and uses
+	   the same register, in which case the register has already been
+	   killed, so we must make it live again.
+
+	   Global registers are always live, and should never have a REG_DEAD
+	   note added for them, so none of the code below applies to them.  */
+
+	if (regno >= FIRST_PSEUDO_REGISTER || ! global_regs[regno])
+	  {
+	    /* Never add REG_DEAD notes for the FRAME_POINTER_REGNUM or the
+	       STACK_POINTER_REGNUM, since these are always considered to be
+	       live.  Similarly for ARG_POINTER_REGNUM if it is fixed.  */
+	    if (regno != FRAME_POINTER_REGNUM
+#if HARD_FRAME_POINTER_REGNUM != FRAME_POINTER_REGNUM
+		&& ! (regno == HARD_FRAME_POINTER_REGNUM)
+#endif
+#if ARG_POINTER_REGNUM != FRAME_POINTER_REGNUM
+		&& ! (regno == ARG_POINTER_REGNUM && fixed_regs[regno])
+#endif
+		&& regno != STACK_POINTER_REGNUM)
+	      {
+		/* ??? It is perhaps a dead_or_set_p bug that it does
+		   not check for REG_UNUSED notes itself.  This is necessary
+		   for the case where the SET_DEST is a subreg of regno, as
+		   dead_or_set_p handles subregs specially.  */
+		if (! all_needed && ! dead_or_set_p (insn, x)
+		    && ! find_reg_note (insn, REG_UNUSED, x))
+		  {
+		    /* Check for the case where the register dying partially
+		       overlaps the register set by this insn.  */
+		    if (regno < FIRST_PSEUDO_REGISTER
+			&& HARD_REGNO_NREGS (regno, GET_MODE (x)) > 1)
+		      {
+			int n = HARD_REGNO_NREGS (regno, GET_MODE (x));
+			while (--n >= 0)
+			  some_needed |= dead_or_set_regno_p (insn, regno + n);
+		      }
+
+		    /* If none of the words in X is needed, make a REG_DEAD
+		       note.  Otherwise, we must make partial REG_DEAD
+		       notes.  */
+		    if (! some_needed)
+		      create_reg_dead_note (x, insn);
+		    else
+		      {
+			int i;
+
+			/* Don't make a REG_DEAD note for a part of a
+			   register that is set in the insn.  */
+			for (i = HARD_REGNO_NREGS (regno, GET_MODE (x)) - 1;
+			     i >= 0; i--)
+			  if (! REGNO_REG_SET_P (old_live_regs, regno+i)
+			      && ! dead_or_set_regno_p (insn, regno + i))
+			    create_reg_dead_note (gen_rtx (REG,
+							   reg_raw_mode[regno + i],
+							   regno + i),
+						  insn);
+		      }
+		  }
+	      }
+
+	    if (regno < FIRST_PSEUDO_REGISTER)
+	      {
+		int j = HARD_REGNO_NREGS (regno, GET_MODE (x));
+		while (--j >= 0)
+		  {
+		    SET_REGNO_REG_SET (bb_live_regs, regno + j);
+		  }
+	      }
+	    else
+	      {
+		/* Recompute REG_BASIC_BLOCK as we update all the other
+		   dataflow information.  */
+		if (sched_reg_basic_block[regno] == REG_BLOCK_UNKNOWN)
+		  sched_reg_basic_block[regno] = current_block_num;
+		else if (sched_reg_basic_block[regno] != current_block_num)
+		  sched_reg_basic_block[regno] = REG_BLOCK_GLOBAL;
+
+		SET_REGNO_REG_SET (bb_live_regs, regno);
+	      }
+	  }
+	return;
+      }
+
+    case MEM:
+      /* Handle tail-recursive case.  */
+      attach_deaths (XEXP (x, 0), insn, 0);
+      return;
+
+    case SUBREG:
+    case STRICT_LOW_PART:
+      /* These two cases preserve the value of SET_P, so handle them
+         separately.  */
+      attach_deaths (XEXP (x, 0), insn, set_p);
+      return;
+
+    case ZERO_EXTRACT:
+    case SIGN_EXTRACT:
+      /* This case preserves the value of SET_P for the first operand, but
+         clears it for the other two.  */
+      attach_deaths (XEXP (x, 0), insn, set_p);
+      attach_deaths (XEXP (x, 1), insn, 0);
+      attach_deaths (XEXP (x, 2), insn, 0);
+      return;
+
+    default:
+      /* Other cases: walk the insn.  */
+      fmt = GET_RTX_FORMAT (code);
+      for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+	{
+	  if (fmt[i] == 'e')
+	    attach_deaths (XEXP (x, i), insn, 0);
+	  else if (fmt[i] == 'E')
+	    for (j = 0; j < XVECLEN (x, i); j++)
+	      attach_deaths (XVECEXP (x, i, j), insn, 0);
+	}
+    }
+}
+
+/* After INSN has executed, add register death notes for each register
+   that is dead after INSN.  */
+
+static void
+attach_deaths_insn (insn)
+     rtx insn;
+{
+  rtx x = PATTERN (insn);
+  register RTX_CODE code = GET_CODE (x);
+  rtx link;
+
+  if (code == SET)
+    {
+      attach_deaths (SET_SRC (x), insn, 0);
+
+      /* A register might die here even if it is the destination, e.g.
+         it is the target of a volatile read and is otherwise unused.
+         Hence we must always call attach_deaths for the SET_DEST.  */
+      attach_deaths (SET_DEST (x), insn, 1);
+    }
+  else if (code == PARALLEL)
+    {
+      register int i;
+      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+	{
+	  code = GET_CODE (XVECEXP (x, 0, i));
+	  if (code == SET)
+	    {
+	      attach_deaths (SET_SRC (XVECEXP (x, 0, i)), insn, 0);
+
+	      attach_deaths (SET_DEST (XVECEXP (x, 0, i)), insn, 1);
+	    }
+	  /* Flow does not add REG_DEAD notes to registers that die in
+	     clobbers, so we can't either.  */
+	  else if (code != CLOBBER)
+	    attach_deaths (XVECEXP (x, 0, i), insn, 0);
+	}
+    }
+  /* If this is a CLOBBER, only add REG_DEAD notes to registers inside a
+     MEM being clobbered, just like flow.  */
+  else if (code == CLOBBER && GET_CODE (XEXP (x, 0)) == MEM)
+    attach_deaths (XEXP (XEXP (x, 0), 0), insn, 0);
+  /* Otherwise don't add a death note to things being clobbered.  */
+  else if (code != CLOBBER)
+    attach_deaths (x, insn, 0);
+
+  /* Make death notes for things used in the called function.  */
+  if (GET_CODE (insn) == CALL_INSN)
+    for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1))
+      attach_deaths (XEXP (XEXP (link, 0), 0), insn,
+		     GET_CODE (XEXP (link, 0)) == CLOBBER);
+}
+
+/* functions for handlnig of notes */
+
+/* Delete notes beginning with INSN and put them in the chain
+   of notes ended by NOTE_LIST.
+   Returns the insn following the notes.  */
+
+static rtx
+unlink_other_notes (insn, tail)
+     rtx insn, tail;
+{
+  rtx prev = PREV_INSN (insn);
+
+  while (insn != tail && GET_CODE (insn) == NOTE)
+    {
+      rtx next = NEXT_INSN (insn);
+      /* Delete the note from its current position.  */
+      if (prev)
+	NEXT_INSN (prev) = next;
+      if (next)
+	PREV_INSN (next) = prev;
+
+      /* Don't save away NOTE_INSN_SETJMPs, because they must remain
+         immediately after the call they follow.  We use a fake
+         (REG_DEAD (const_int -1)) note to remember them.
+         Likewise with NOTE_INSN_{LOOP,EHREGION}_{BEG, END}.  */
+      if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_SETJMP
+	  && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_BEG
+	  && NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END
+	  && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_BEG
+	  && NOTE_LINE_NUMBER (insn) != NOTE_INSN_EH_REGION_END)
+	{
+	  /* Insert the note at the end of the notes list.  */
+	  PREV_INSN (insn) = note_list;
+	  if (note_list)
+	    NEXT_INSN (note_list) = insn;
+	  note_list = insn;
+	}
+
+      insn = next;
+    }
+  return insn;
+}
+
+/* Delete line notes beginning with INSN. Record line-number notes so
+   they can be reused.  Returns the insn following the notes.  */
+
+static rtx
+unlink_line_notes (insn, tail)
+     rtx insn, tail;
+{
+  rtx prev = PREV_INSN (insn);
+
+  while (insn != tail && GET_CODE (insn) == NOTE)
+    {
+      rtx next = NEXT_INSN (insn);
+
+      if (write_symbols != NO_DEBUG && NOTE_LINE_NUMBER (insn) > 0)
+	{
+	  /* Delete the note from its current position.  */
+	  if (prev)
+	    NEXT_INSN (prev) = next;
+	  if (next)
+	    PREV_INSN (next) = prev;
+
+	  /* Record line-number notes so they can be reused.  */
+	  LINE_NOTE (insn) = insn;
+	}
+      else
+	prev = insn;
+
+      insn = next;
+    }
+  return insn;
+}
+
+/* Return the head and tail pointers of BB.  */
+
+__inline static void
+get_block_head_tail (bb, headp, tailp)
+     int bb;
+     rtx *headp;
+     rtx *tailp;
+{
+
+  rtx head = *headp;
+  rtx tail = *tailp;
+  int b;
+
+  b = BB_TO_BLOCK (bb);
+
+  /* HEAD and TAIL delimit the basic block being scheduled.  */
+  head = basic_block_head[b];
+  tail = basic_block_end[b];
+
+  /* Don't include any notes or labels at the beginning of the
+     basic block, or notes at the ends of basic blocks.  */
+  while (head != tail)
+    {
+      if (GET_CODE (head) == NOTE)
+	head = NEXT_INSN (head);
+      else if (GET_CODE (tail) == NOTE)
+	tail = PREV_INSN (tail);
+      else if (GET_CODE (head) == CODE_LABEL)
+	head = NEXT_INSN (head);
+      else
+	break;
+    }
+
+  *headp = head;
+  *tailp = tail;
+}
+
+/* Delete line notes from bb. Save them so they can be later restored
+   (in restore_line_notes ()).  */
+
+static void
+rm_line_notes (bb)
+     int bb;
+{
+  rtx next_tail;
+  rtx tail;
+  rtx head;
+  rtx insn;
+
+  get_block_head_tail (bb, &head, &tail);
+
+  if (head == tail
+      && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
+    return;
+
+  next_tail = NEXT_INSN (tail);
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    {
+      rtx prev;
+
+      /* Farm out notes, and maybe save them in NOTE_LIST.
+         This is needed to keep the debugger from
+         getting completely deranged.  */
+      if (GET_CODE (insn) == NOTE)
+	{
+	  prev = insn;
+	  insn = unlink_line_notes (insn, next_tail);
+
+	  if (prev == tail)
+	    abort ();
+	  if (prev == head)
+	    abort ();
+	  if (insn == next_tail)
+	    abort ();
+	}
+    }
+}
+
+/* Save line number notes for each insn in bb.  */
+
+static void
+save_line_notes (bb)
+     int bb;
+{
+  rtx head, tail;
+  rtx next_tail;
+
+  /* We must use the true line number for the first insn in the block
+     that was computed and saved at the start of this pass.  We can't
+     use the current line number, because scheduling of the previous
+     block may have changed the current line number.  */
+
+  rtx line = line_note_head[BB_TO_BLOCK (bb)];
+  rtx insn;
+
+  get_block_head_tail (bb, &head, &tail);
+  next_tail = NEXT_INSN (tail);
+
+  for (insn = basic_block_head[BB_TO_BLOCK (bb)];
+       insn != next_tail;
+       insn = NEXT_INSN (insn))
+    if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
+      line = insn;
+    else
+      LINE_NOTE (insn) = line;
+}
+
+
+/* After bb was scheduled, insert line notes into the insns list.  */
+
+static void
+restore_line_notes (bb)
+     int bb;
+{
+  rtx line, note, prev, new;
+  int added_notes = 0;
+  int b;
+  rtx head, next_tail, insn;
+
+  b = BB_TO_BLOCK (bb);
+
+  head = basic_block_head[b];
+  next_tail = NEXT_INSN (basic_block_end[b]);
+
+  /* Determine the current line-number.  We want to know the current
+     line number of the first insn of the block here, in case it is
+     different from the true line number that was saved earlier.  If
+     different, then we need a line number note before the first insn
+     of this block.  If it happens to be the same, then we don't want to
+     emit another line number note here.  */
+  for (line = head; line; line = PREV_INSN (line))
+    if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
+      break;
+
+  /* Walk the insns keeping track of the current line-number and inserting
+     the line-number notes as needed.  */
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
+      line = insn;
+  /* This used to emit line number notes before every non-deleted note.
+     However, this confuses a debugger, because line notes not separated
+     by real instructions all end up at the same address.  I can find no
+     use for line number notes before other notes, so none are emitted.  */
+    else if (GET_CODE (insn) != NOTE
+	     && (note = LINE_NOTE (insn)) != 0
+	     && note != line
+	     && (line == 0
+		 || NOTE_LINE_NUMBER (note) != NOTE_LINE_NUMBER (line)
+		 || NOTE_SOURCE_FILE (note) != NOTE_SOURCE_FILE (line)))
+      {
+	line = note;
+	prev = PREV_INSN (insn);
+	if (LINE_NOTE (note))
+	  {
+	    /* Re-use the original line-number note.  */
+	    LINE_NOTE (note) = 0;
+	    PREV_INSN (note) = prev;
+	    NEXT_INSN (prev) = note;
+	    PREV_INSN (insn) = note;
+	    NEXT_INSN (note) = insn;
+	  }
+	else
+	  {
+	    added_notes++;
+	    new = emit_note_after (NOTE_LINE_NUMBER (note), prev);
+	    NOTE_SOURCE_FILE (new) = NOTE_SOURCE_FILE (note);
+	    RTX_INTEGRATED_P (new) = RTX_INTEGRATED_P (note);
+	  }
+      }
+  if (sched_verbose && added_notes)
+    fprintf (dump, ";; added %d line-number notes\n", added_notes);
+}
+
+/* After scheduling the function, delete redundant line notes from the
+   insns list.  */
+
+static void
+rm_redundant_line_notes ()
+{
+  rtx line = 0;
+  rtx insn = get_insns ();
+  int active_insn = 0;
+  int notes = 0;
+
+  /* Walk the insns deleting redundant line-number notes.  Many of these
+     are already present.  The remainder tend to occur at basic
+     block boundaries.  */
+  for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
+    if (GET_CODE (insn) == NOTE && NOTE_LINE_NUMBER (insn) > 0)
+      {
+	/* If there are no active insns following, INSN is redundant.  */
+	if (active_insn == 0)
+	  {
+	    notes++;
+	    NOTE_SOURCE_FILE (insn) = 0;
+	    NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+	  }
+	/* If the line number is unchanged, LINE is redundant.  */
+	else if (line
+		 && NOTE_LINE_NUMBER (line) == NOTE_LINE_NUMBER (insn)
+		 && NOTE_SOURCE_FILE (line) == NOTE_SOURCE_FILE (insn))
+	  {
+	    notes++;
+	    NOTE_SOURCE_FILE (line) = 0;
+	    NOTE_LINE_NUMBER (line) = NOTE_INSN_DELETED;
+	    line = insn;
+	  }
+	else
+	  line = insn;
+	active_insn = 0;
+      }
+    else if (!((GET_CODE (insn) == NOTE
+		&& NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED)
+	       || (GET_CODE (insn) == INSN
+		   && (GET_CODE (PATTERN (insn)) == USE
+		       || GET_CODE (PATTERN (insn)) == CLOBBER))))
+      active_insn++;
+
+  if (sched_verbose && notes)
+    fprintf (dump, ";; deleted %d line-number notes\n", notes);
+}
+
+/* Delete notes between head and tail and put them in the chain
+   of notes ended by NOTE_LIST.  */
+
+static void
+rm_other_notes (head, tail)
+     rtx head;
+     rtx tail;
+{
+  rtx next_tail;
+  rtx insn;
+
+  if (head == tail
+      && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
+    return;
+
+  next_tail = NEXT_INSN (tail);
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    {
+      rtx prev;
+
+      /* Farm out notes, and maybe save them in NOTE_LIST.
+         This is needed to keep the debugger from
+         getting completely deranged.  */
+      if (GET_CODE (insn) == NOTE)
+	{
+	  prev = insn;
+
+	  insn = unlink_other_notes (insn, next_tail);
+
+	  if (prev == tail)
+	    abort ();
+	  if (prev == head)
+	    abort ();
+	  if (insn == next_tail)
+	    abort ();
+	}
+    }
+}
+
+/* Constructor for `sometimes' data structure.  */
+
+static int
+new_sometimes_live (regs_sometimes_live, regno, sometimes_max)
+     struct sometimes *regs_sometimes_live;
+     int regno;
+     int sometimes_max;
+{
+  register struct sometimes *p;
+
+  /* There should never be a register greater than max_regno here.  If there
+     is, it means that a define_split has created a new pseudo reg.  This
+     is not allowed, since there will not be flow info available for any
+     new register, so catch the error here.  */
+  if (regno >= max_regno)
+    abort ();
+
+  p = &regs_sometimes_live[sometimes_max];
+  p->regno = regno;
+  p->live_length = 0;
+  p->calls_crossed = 0;
+  sometimes_max++;
+  return sometimes_max;
+}
+
+/* Count lengths of all regs we are currently tracking,
+   and find new registers no longer live.  */
+
+static void
+finish_sometimes_live (regs_sometimes_live, sometimes_max)
+     struct sometimes *regs_sometimes_live;
+     int sometimes_max;
+{
+  int i;
+
+  for (i = 0; i < sometimes_max; i++)
+    {
+      register struct sometimes *p = &regs_sometimes_live[i];
+      int regno = p->regno;
+
+      sched_reg_live_length[regno] += p->live_length;
+      sched_reg_n_calls_crossed[regno] += p->calls_crossed;
+    }
+}
+
+/* functions for computation of registers live/usage info */
+
+/* It is assumed that prior to scheduling basic_block_live_at_start (b)
+   contains the registers that are alive at the entry to b.
+
+   Two passes follow: The first pass is performed before the scheduling
+   of a region. It scans each block of the region forward, computing
+   the set of registers alive at the end of the basic block and
+   discard REG_DEAD notes (done by find_pre_sched_live ()).
+
+   The second path is invoked after scheduling all region blocks.
+   It scans each block of the region backward, a block being traversed
+   only after its succesors in the region. When the set of registers
+   live at the end of a basic block may be changed by the scheduling
+   (this may happen for multiple blocks region), it is computed as
+   the union of the registers live at the start of its succesors.
+   The last-use information is updated by inserting REG_DEAD notes.
+   (done by find_post_sched_live ()) */
+
+/* Scan all the insns to be scheduled, removing register death notes.
+   Register death notes end up in DEAD_NOTES.
+   Recreate the register life information for the end of this basic
+   block.  */
+
+static void
+find_pre_sched_live (bb)
+     int bb;
+{
+  rtx insn, next_tail, head, tail;
+  int b = BB_TO_BLOCK (bb);
+
+  get_block_head_tail (bb, &head, &tail);
+  COPY_REG_SET (bb_live_regs, basic_block_live_at_start[b]);
+  next_tail = NEXT_INSN (tail);
+
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    {
+      rtx prev, next, link;
+      int reg_weight = 0;
+
+      /* Handle register life information.  */
+      if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
+	{
+	  /* See if the register gets born here.  */
+	  /* We must check for registers being born before we check for
+	     registers dying.  It is possible for a register to be born and
+	     die in the same insn, e.g. reading from a volatile memory
+	     location into an otherwise unused register.  Such a register
+	     must be marked as dead after this insn.  */
+	  if (GET_CODE (PATTERN (insn)) == SET
+	      || GET_CODE (PATTERN (insn)) == CLOBBER)
+	    {
+	      sched_note_set (b, PATTERN (insn), 0);
+	      reg_weight++;
+	    }
+
+	  else if (GET_CODE (PATTERN (insn)) == PARALLEL)
+	    {
+	      int j;
+	      for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
+		if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
+		    || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
+		  {
+		    sched_note_set (b, XVECEXP (PATTERN (insn), 0, j), 0);
+		    reg_weight++;
+		  }
+
+	      /* ??? This code is obsolete and should be deleted.  It
+	         is harmless though, so we will leave it in for now.  */
+	      for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
+		if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == USE)
+		  sched_note_set (b, XVECEXP (PATTERN (insn), 0, j), 0);
+	    }
+
+	  /* Each call cobbers (makes live) all call-clobbered regs
+	     that are not global or fixed.  Note that the function-value
+	     reg is a call_clobbered reg.  */
+	  if (GET_CODE (insn) == CALL_INSN)
+	    {
+	      int j;
+	      for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
+		if (call_used_regs[j] && !global_regs[j]
+		    && ! fixed_regs[j])
+		  {
+		    SET_REGNO_REG_SET (bb_live_regs, j);
+#if 0
+		    CLEAR_REGNO_REG_SET (bb_dead_regs, j);
+#endif
+		  }
+	    }
+
+	  /* Need to know what registers this insn kills.  */
+	  for (prev = 0, link = REG_NOTES (insn); link; link = next)
+	    {
+	      next = XEXP (link, 1);
+	      if ((REG_NOTE_KIND (link) == REG_DEAD
+		   || REG_NOTE_KIND (link) == REG_UNUSED)
+	      /* Verify that the REG_NOTE has a valid value.  */
+		  && GET_CODE (XEXP (link, 0)) == REG)
+		{
+		  register int regno = REGNO (XEXP (link, 0));
+
+		  reg_weight--;
+
+		  /* Only unlink REG_DEAD notes; leave REG_UNUSED notes
+		     alone.  */
+		  if (REG_NOTE_KIND (link) == REG_DEAD)
+		    {
+		      if (prev)
+			XEXP (prev, 1) = next;
+		      else
+			REG_NOTES (insn) = next;
+		      XEXP (link, 1) = dead_notes;
+		      dead_notes = link;
+		    }
+		  else
+		    prev = link;
+
+		  if (regno < FIRST_PSEUDO_REGISTER)
+		    {
+		      int j = HARD_REGNO_NREGS (regno,
+						GET_MODE (XEXP (link, 0)));
+		      while (--j >= 0)
+			{
+			  CLEAR_REGNO_REG_SET (bb_live_regs, regno+j);
+			}
+		    }
+		  else
+		    {
+		      CLEAR_REGNO_REG_SET (bb_live_regs, regno);
+		    }
+		}
+	      else
+		prev = link;
+	    }
+	}
+
+      INSN_REG_WEIGHT (insn) = reg_weight;
+    }
+}
+
+/* Update register life and usage information for block bb
+   after scheduling.  Put register dead notes back in the code.  */
+
+static void
+find_post_sched_live (bb)
+     int bb;
+{
+  int sometimes_max;
+  int j, i;
+  int b;
+  rtx insn;
+  rtx head, tail, prev_head, next_tail;
+
+  register struct sometimes *regs_sometimes_live;
+
+  b = BB_TO_BLOCK (bb);
+
+  /* compute live regs at the end of bb as a function of its successors.  */
+  if (current_nr_blocks > 1)
+    {
+      int e;
+      int first_edge;
+
+      first_edge = e = OUT_EDGES (b);
+      CLEAR_REG_SET (bb_live_regs);
+
+      if (e)
+	do
+	  {
+	    int b_succ;
+
+	    b_succ = TO_BLOCK (e);
+	    IOR_REG_SET (bb_live_regs, basic_block_live_at_start[b_succ]);
+	    e = NEXT_OUT (e);
+	  }
+	while (e != first_edge);
+    }
+
+  get_block_head_tail (bb, &head, &tail);
+  next_tail = NEXT_INSN (tail);
+  prev_head = PREV_INSN (head);
+
+  for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
+    if (REGNO_REG_SET_P (bb_live_regs, i))
+      sched_reg_basic_block[i] = REG_BLOCK_GLOBAL;
+
+  /* if the block is empty, same regs are alive at its end and its start.
+     since this is not guaranteed after interblock scheduling, make sure they
+     are truly identical.  */
+  if (NEXT_INSN (prev_head) == tail
+      && (GET_RTX_CLASS (GET_CODE (tail)) != 'i'))
+    {
+      if (current_nr_blocks > 1)
+	COPY_REG_SET (basic_block_live_at_start[b], bb_live_regs);
+
+      return;
+    }
+
+  b = BB_TO_BLOCK (bb);
+  current_block_num = b;
+
+  /* Keep track of register lives.  */
+  old_live_regs = ALLOCA_REG_SET ();
+  regs_sometimes_live
+    = (struct sometimes *) alloca (max_regno * sizeof (struct sometimes));
+  sometimes_max = 0;
+
+  /* initiate "sometimes" data, starting with registers live at end */
+  sometimes_max = 0;
+  COPY_REG_SET (old_live_regs, bb_live_regs);
+  EXECUTE_IF_SET_IN_REG_SET (bb_live_regs, 0, j,
+			     {
+			       sometimes_max
+				 = new_sometimes_live (regs_sometimes_live,
+						       j, sometimes_max);
+			     });
+
+  /* scan insns back, computing regs live info */
+  for (insn = tail; insn != prev_head; insn = PREV_INSN (insn))
+    {
+      /* First we kill registers set by this insn, and then we
+         make registers used by this insn live.  This is the opposite
+         order used above because we are traversing the instructions
+         backwards.  */
+
+      /* Strictly speaking, we should scan REG_UNUSED notes and make
+         every register mentioned there live, however, we will just
+         kill them again immediately below, so there doesn't seem to
+         be any reason why we bother to do this.  */
+
+      /* See if this is the last notice we must take of a register.  */
+      if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+	continue;
+
+      if (GET_CODE (PATTERN (insn)) == SET
+	  || GET_CODE (PATTERN (insn)) == CLOBBER)
+	sched_note_set (b, PATTERN (insn), 1);
+      else if (GET_CODE (PATTERN (insn)) == PARALLEL)
+	{
+	  for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
+	    if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
+		|| GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
+	      sched_note_set (b, XVECEXP (PATTERN (insn), 0, j), 1);
+	}
+
+      /* This code keeps life analysis information up to date.  */
+      if (GET_CODE (insn) == CALL_INSN)
+	{
+	  register struct sometimes *p;
+
+	  /* A call kills all call used registers that are not
+	     global or fixed, except for those mentioned in the call
+	     pattern which will be made live again later.  */
+	  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	    if (call_used_regs[i] && ! global_regs[i]
+		&& ! fixed_regs[i])
+	      {
+		CLEAR_REGNO_REG_SET (bb_live_regs, i);
+#if 0
+		SET_REGNO_REG_SET (bb_dead_regs, i);
+#endif
+	      }
+
+	  /* Regs live at the time of a call instruction must not
+	     go in a register clobbered by calls.  Record this for
+	     all regs now live.  Note that insns which are born or
+	     die in a call do not cross a call, so this must be done
+	     after the killings (above) and before the births
+	     (below).  */
+	  p = regs_sometimes_live;
+	  for (i = 0; i < sometimes_max; i++, p++)
+	    if (REGNO_REG_SET_P (bb_live_regs, p->regno))
+	      p->calls_crossed += 1;
+	}
+
+      /* Make every register used live, and add REG_DEAD notes for
+         registers which were not live before we started.  */
+      attach_deaths_insn (insn);
+
+      /* Find registers now made live by that instruction.  */
+      EXECUTE_IF_AND_COMPL_IN_REG_SET (bb_live_regs, old_live_regs, 0, j,
+				 {
+				   sometimes_max
+				     = new_sometimes_live (regs_sometimes_live,
+							   j, sometimes_max);
+				 });
+      IOR_REG_SET (old_live_regs, bb_live_regs);
+
+      /* Count lengths of all regs we are worrying about now,
+         and handle registers no longer live.  */
+
+      for (i = 0; i < sometimes_max; i++)
+	{
+	  register struct sometimes *p = &regs_sometimes_live[i];
+	  int regno = p->regno;
+
+	  p->live_length += 1;
+
+	  if (!REGNO_REG_SET_P (bb_live_regs, regno))
+	    {
+	      /* This is the end of one of this register's lifetime
+	         segments.  Save the lifetime info collected so far,
+	         and clear its bit in the old_live_regs entry.  */
+	      sched_reg_live_length[regno] += p->live_length;
+	      sched_reg_n_calls_crossed[regno] += p->calls_crossed;
+	      CLEAR_REGNO_REG_SET (old_live_regs, p->regno);
+
+	      /* Delete the reg_sometimes_live entry for this reg by
+	         copying the last entry over top of it.  */
+	      *p = regs_sometimes_live[--sometimes_max];
+	      /* ...and decrement i so that this newly copied entry
+	         will be processed.  */
+	      i--;
+	    }
+	}
+    }
+
+  finish_sometimes_live (regs_sometimes_live, sometimes_max);
+
+  /* In interblock scheduling, basic_block_live_at_start may have changed.  */
+  if (current_nr_blocks > 1)
+    COPY_REG_SET (basic_block_live_at_start[b], bb_live_regs);
+
+}				/* find_post_sched_live */
+
+/* After scheduling the subroutine, restore information about uses of
+   registers.  */
+
+static void
+update_reg_usage ()
+{
+  int regno;
+
+  if (n_basic_blocks > 0)
+    for (regno = FIRST_PSEUDO_REGISTER; regno < max_regno; regno++)
+      if (REGNO_REG_SET_P (basic_block_live_at_start[0], regno))
+	sched_reg_basic_block[regno] = REG_BLOCK_GLOBAL;
+
+  for (regno = 0; regno < max_regno; regno++)
+    if (sched_reg_live_length[regno])
+      {
+	if (sched_verbose)
+	  {
+	    if (REG_LIVE_LENGTH (regno) > sched_reg_live_length[regno])
+	      fprintf (dump,
+		       ";; register %d life shortened from %d to %d\n",
+		       regno, REG_LIVE_LENGTH (regno),
+		       sched_reg_live_length[regno]);
+	    /* Negative values are special; don't overwrite the current
+	       reg_live_length value if it is negative.  */
+	    else if (REG_LIVE_LENGTH (regno) < sched_reg_live_length[regno]
+		     && REG_LIVE_LENGTH (regno) >= 0)
+	      fprintf (dump,
+		       ";; register %d life extended from %d to %d\n",
+		       regno, REG_LIVE_LENGTH (regno),
+		       sched_reg_live_length[regno]);
+
+	    if (!REG_N_CALLS_CROSSED (regno)
+		&& sched_reg_n_calls_crossed[regno])
+	      fprintf (dump,
+		       ";; register %d now crosses calls\n", regno);
+	    else if (REG_N_CALLS_CROSSED (regno)
+		     && !sched_reg_n_calls_crossed[regno]
+		     && REG_BASIC_BLOCK (regno) != REG_BLOCK_GLOBAL)
+	      fprintf (dump,
+		       ";; register %d no longer crosses calls\n", regno);
+
+	    if (REG_BASIC_BLOCK (regno) != sched_reg_basic_block[regno]
+		&& sched_reg_basic_block[regno] != REG_BLOCK_UNKNOWN
+		&& REG_BASIC_BLOCK(regno) != REG_BLOCK_UNKNOWN)
+	      fprintf (dump,
+		       ";; register %d changed basic block from %d to %d\n",
+			regno, REG_BASIC_BLOCK(regno),
+			sched_reg_basic_block[regno]);
+
+	  }
+	/* Negative values are special; don't overwrite the current
+	   reg_live_length value if it is negative.  */
+	if (REG_LIVE_LENGTH (regno) >= 0)
+	  REG_LIVE_LENGTH (regno) = sched_reg_live_length[regno];
+
+	if (sched_reg_basic_block[regno] != REG_BLOCK_UNKNOWN
+	    && REG_BASIC_BLOCK(regno) != REG_BLOCK_UNKNOWN)
+	  REG_BASIC_BLOCK(regno) = sched_reg_basic_block[regno];
+
+	/* We can't change the value of reg_n_calls_crossed to zero for
+	   pseudos which are live in more than one block.
+
+	   This is because combine might have made an optimization which
+	   invalidated basic_block_live_at_start and reg_n_calls_crossed,
+	   but it does not update them.  If we update reg_n_calls_crossed
+	   here, the two variables are now inconsistent, and this might
+	   confuse the caller-save code into saving a register that doesn't
+	   need to be saved.  This is only a problem when we zero calls
+	   crossed for a pseudo live in multiple basic blocks.
+
+	   Alternatively, we could try to correctly update basic block live
+	   at start here in sched, but that seems complicated.
+
+	   Note: it is possible that a global register became local, as result
+	   of interblock motion, but will remain marked as a global register.  */
+	if (sched_reg_n_calls_crossed[regno]
+	    || REG_BASIC_BLOCK (regno) != REG_BLOCK_GLOBAL)
+	  REG_N_CALLS_CROSSED (regno) = sched_reg_n_calls_crossed[regno];
+
+      }
+}
+
+/* Scheduling clock, modified in schedule_block() and queue_to_ready () */
+static int clock_var;
+
+/* Move insns that became ready to fire from queue to ready list.  */
+
+static int
+queue_to_ready (ready, n_ready)
+     rtx ready[];
+     int n_ready;
+{
+  rtx insn;
+  rtx link;
+
+  q_ptr = NEXT_Q (q_ptr);
+
+  /* Add all pending insns that can be scheduled without stalls to the
+     ready list.  */
+  for (link = insn_queue[q_ptr]; link; link = XEXP (link, 1))
+    {
+
+      insn = XEXP (link, 0);
+      q_size -= 1;
+
+      if (sched_verbose >= 2)
+	fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
+
+      if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
+	fprintf (dump, "(b%d) ", INSN_BLOCK (insn));
+
+      ready[n_ready++] = insn;
+      if (sched_verbose >= 2)
+	fprintf (dump, "moving to ready without stalls\n");
+    }
+  insn_queue[q_ptr] = 0;
+
+  /* If there are no ready insns, stall until one is ready and add all
+     of the pending insns at that point to the ready list.  */
+  if (n_ready == 0)
+    {
+      register int stalls;
+
+      for (stalls = 1; stalls < INSN_QUEUE_SIZE; stalls++)
+	{
+	  if ((link = insn_queue[NEXT_Q_AFTER (q_ptr, stalls)]))
+	    {
+	      for (; link; link = XEXP (link, 1))
+		{
+		  insn = XEXP (link, 0);
+		  q_size -= 1;
+
+		  if (sched_verbose >= 2)
+		    fprintf (dump, ";;\t\tQ-->Ready: insn %d: ", INSN_UID (insn));
+
+		  if (sched_verbose >= 2 && INSN_BB (insn) != target_bb)
+		    fprintf (dump, "(b%d) ", INSN_BLOCK (insn));
+
+		  ready[n_ready++] = insn;
+		  if (sched_verbose >= 2)
+		    fprintf (dump, "moving to ready with %d stalls\n", stalls);
+		}
+	      insn_queue[NEXT_Q_AFTER (q_ptr, stalls)] = 0;
+
+	      if (n_ready)
+		break;
+	    }
+	}
+
+      if (sched_verbose && stalls)
+	visualize_stall_cycles (BB_TO_BLOCK (target_bb), stalls);
+      q_ptr = NEXT_Q_AFTER (q_ptr, stalls);
+      clock_var += stalls;
+    }
+  return n_ready;
+}
+
+/* Print the ready list for debugging purposes. Callable from debugger.  */
+
+extern void
+debug_ready_list (ready, n_ready)
+     rtx ready[];
+     int n_ready;
+{
+  int i;
+
+  for (i = 0; i < n_ready; i++)
+    {
+      fprintf (dump, "  %d", INSN_UID (ready[i]));
+      if (current_nr_blocks > 1 && INSN_BB (ready[i]) != target_bb)
+	fprintf (dump, "/b%d", INSN_BLOCK (ready[i]));
+    }
+  fprintf (dump, "\n");
+}
+
+/* Print names of units on which insn can/should execute, for debugging.  */
+
+static void
+insn_print_units (insn)
+     rtx insn;
+{
+  int i;
+  int unit = insn_unit (insn);
+
+  if (unit == -1)
+    fprintf (dump, "none");
+  else if (unit >= 0)
+    fprintf (dump, "%s", function_units[unit].name);
+  else
+    {
+      fprintf (dump, "[");
+      for (i = 0, unit = ~unit; unit; i++, unit >>= 1)
+	if (unit & 1)
+	  {
+	    fprintf (dump, "%s", function_units[i].name);
+	    if (unit != 1)
+	      fprintf (dump, " ");
+	  }
+      fprintf (dump, "]");
+    }
+}
+
+/* MAX_VISUAL_LINES is the maximum number of lines in visualization table
+   of a basic block.  If more lines are needed, table is splitted to two.
+   n_visual_lines is the number of lines printed so far for a block.
+   visual_tbl contains the block visualization info.
+   vis_no_unit holds insns in a cycle that are not mapped to any unit.  */
+#define MAX_VISUAL_LINES 100
+#define INSN_LEN 30
+int n_visual_lines;
+char *visual_tbl;
+int n_vis_no_unit;
+rtx vis_no_unit[10];
+
+/* Finds units that are in use in this fuction. Required only
+   for visualization.  */
+
+static void
+init_target_units ()
+{
+  rtx insn;
+  int unit;
+
+  for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
+    {
+      if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+	continue;
+
+      unit = insn_unit (insn);
+
+      if (unit < 0)
+	target_units |= ~unit;
+      else
+	target_units |= (1 << unit);
+    }
+}
+
+/* Return the length of the visualization table */
+
+static int
+get_visual_tbl_length ()
+{
+  int unit, i;
+  int n, n1;
+  char *s;
+
+  /* compute length of one field in line */
+  s = (char *) alloca (INSN_LEN + 5);
+  sprintf (s, "  %33s", "uname");
+  n1 = strlen (s);
+
+  /* compute length of one line */
+  n = strlen (";; ");
+  n += n1;
+  for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
+    if (function_units[unit].bitmask & target_units)
+      for (i = 0; i < function_units[unit].multiplicity; i++)
+	n += n1;
+  n += n1;
+  n += strlen ("\n") + 2;
+
+  /* compute length of visualization string */
+  return (MAX_VISUAL_LINES * n);
+}
+
+/* Init block visualization debugging info */
+
+static void
+init_block_visualization ()
+{
+  strcpy (visual_tbl, "");
+  n_visual_lines = 0;
+  n_vis_no_unit = 0;
+}
+
+#define BUF_LEN 256
+
+/* This recognizes rtx, I classified as expressions. These are always */
+/* represent some action on values or results of other expression, */
+/* that may be stored in objects representing values.  */
+
+static void
+print_exp (buf, x, verbose)
+     char *buf;
+     rtx x;
+     int verbose;
+{
+  char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN];
+
+  switch (GET_CODE (x))
+    {
+    case PLUS:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s+%s", t1, t2);
+      break;
+    case LO_SUM:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%sl+%s", t1, t2);
+      break;
+    case MINUS:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s-%s", t1, t2);
+      break;
+    case COMPARE:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s??%s", t1, t2);
+      break;
+    case NEG:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "-%s", t1);
+      break;
+    case MULT:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s*%s", t1, t2);
+      break;
+    case DIV:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s/%s", t1, t2);
+      break;
+    case UDIV:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%su/%s", t1, t2);
+      break;
+    case MOD:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s%%%s", t1, t2);
+      break;
+    case UMOD:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%su%%%s", t1, t2);
+      break;
+    case SMIN:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "smin (%s, %s)", t1, t2);
+      break;
+    case SMAX:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "smax(%s,%s)", t1, t2);
+      break;
+    case UMIN:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "umin (%s, %s)", t1, t2);
+      break;
+    case UMAX:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "umax(%s,%s)", t1, t2);
+      break;
+    case NOT:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "!%s", t1);
+      break;
+    case AND:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s&%s", t1, t2);
+      break;
+    case IOR:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s|%s", t1, t2);
+      break;
+    case XOR:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s^%s", t1, t2);
+      break;
+    case ASHIFT:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s<<%s", t1, t2);
+      break;
+    case LSHIFTRT:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s0>%s", t1, t2);
+      break;
+    case ASHIFTRT:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s>>%s", t1, t2);
+      break;
+    case ROTATE:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s<-<%s", t1, t2);
+      break;
+    case ROTATERT:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s>->%s", t1, t2);
+      break;
+    case ABS:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "abs(%s)", t1);
+      break;
+    case SQRT:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "sqrt(%s)", t1);
+      break;
+    case FFS:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "ffs(%s)", t1);
+      break;
+    case EQ:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s == %s", t1, t2);
+      break;
+    case NE:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s!=%s", t1, t2);
+      break;
+    case GT:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s>%s", t1, t2);
+      break;
+    case GTU:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s>u%s", t1, t2);
+      break;
+    case LT:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s<%s", t1, t2);
+      break;
+    case LTU:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s<u%s", t1, t2);
+      break;
+    case GE:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s>=%s", t1, t2);
+      break;
+    case GEU:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s>=u%s", t1, t2);
+      break;
+    case LE:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s<=%s", t1, t2);
+      break;
+    case LEU:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      sprintf (buf, "%s<=u%s", t1, t2);
+      break;
+    case SIGN_EXTRACT:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      print_value (t3, XEXP (x, 2), verbose);
+      if (verbose)
+	sprintf (buf, "sign_extract(%s,%s,%s)", t1, t2, t3);
+      else
+	sprintf (buf, "sxt(%s,%s,%s)", t1, t2, t3);
+      break;
+    case ZERO_EXTRACT:
+      print_value (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      print_value (t3, XEXP (x, 2), verbose);
+      if (verbose)
+	sprintf (buf, "zero_extract(%s,%s,%s)", t1, t2, t3);
+      else
+	sprintf (buf, "zxt(%s,%s,%s)", t1, t2, t3);
+      break;
+    case SIGN_EXTEND:
+      print_value (t1, XEXP (x, 0), verbose);
+      if (verbose)
+	sprintf (buf, "sign_extend(%s)", t1);
+      else
+	sprintf (buf, "sxn(%s)", t1);
+      break;
+    case ZERO_EXTEND:
+      print_value (t1, XEXP (x, 0), verbose);
+      if (verbose)
+	sprintf (buf, "zero_extend(%s)", t1);
+      else
+	sprintf (buf, "zxn(%s)", t1);
+      break;
+    case FLOAT_EXTEND:
+      print_value (t1, XEXP (x, 0), verbose);
+      if (verbose)
+	sprintf (buf, "float_extend(%s)", t1);
+      else
+	sprintf (buf, "fxn(%s)", t1);
+      break;
+    case TRUNCATE:
+      print_value (t1, XEXP (x, 0), verbose);
+      if (verbose)
+	sprintf (buf, "trunc(%s)", t1);
+      else
+	sprintf (buf, "trn(%s)", t1);
+      break;
+    case FLOAT_TRUNCATE:
+      print_value (t1, XEXP (x, 0), verbose);
+      if (verbose)
+	sprintf (buf, "float_trunc(%s)", t1);
+      else
+	sprintf (buf, "ftr(%s)", t1);
+      break;
+    case FLOAT:
+      print_value (t1, XEXP (x, 0), verbose);
+      if (verbose)
+	sprintf (buf, "float(%s)", t1);
+      else
+	sprintf (buf, "flt(%s)", t1);
+      break;
+    case UNSIGNED_FLOAT:
+      print_value (t1, XEXP (x, 0), verbose);
+      if (verbose)
+	sprintf (buf, "uns_float(%s)", t1);
+      else
+	sprintf (buf, "ufl(%s)", t1);
+      break;
+    case FIX:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "fix(%s)", t1);
+      break;
+    case UNSIGNED_FIX:
+      print_value (t1, XEXP (x, 0), verbose);
+      if (verbose)
+	sprintf (buf, "uns_fix(%s)", t1);
+      else
+	sprintf (buf, "ufx(%s)", t1);
+      break;
+    case PRE_DEC:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "--%s", t1);
+      break;
+    case PRE_INC:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "++%s", t1);
+      break;
+    case POST_DEC:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "%s--", t1);
+      break;
+    case POST_INC:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "%s++", t1);
+      break;
+    case CALL:
+      print_value (t1, XEXP (x, 0), verbose);
+      if (verbose)
+	{
+	  print_value (t2, XEXP (x, 1), verbose);
+	  sprintf (buf, "call %s argc:%s", t1, t2);
+	}
+      else
+	sprintf (buf, "call %s", t1);
+      break;
+    case IF_THEN_ELSE:
+      print_exp (t1, XEXP (x, 0), verbose);
+      print_value (t2, XEXP (x, 1), verbose);
+      print_value (t3, XEXP (x, 2), verbose);
+      sprintf (buf, "{(%s)?%s:%s}", t1, t2, t3);
+      break;
+    case TRAP_IF:
+      print_value (t1, TRAP_CONDITION (x), verbose);
+      sprintf (buf, "trap_if %s", t1);
+      break;
+    case UNSPEC:
+      {
+	int i;
+
+	sprintf (t1, "unspec{");
+	for (i = 0; i < XVECLEN (x, 0); i++)
+	  {
+	    print_pattern (t2, XVECEXP (x, 0, i), verbose);
+	    sprintf (t3, "%s%s;", t1, t2);
+	    strcpy (t1, t3);
+	  }
+	sprintf (buf, "%s}", t1);
+      }
+      break;
+    case UNSPEC_VOLATILE:
+      {
+	int i;
+
+	sprintf (t1, "unspec/v{");
+	for (i = 0; i < XVECLEN (x, 0); i++)
+	  {
+	    print_pattern (t2, XVECEXP (x, 0, i), verbose);
+	    sprintf (t3, "%s%s;", t1, t2);
+	    strcpy (t1, t3);
+	  }
+	sprintf (buf, "%s}", t1);
+      }
+      break;
+    default:
+/*    if (verbose) debug_rtx (x); else sprintf (buf, "$$$"); */
+      sprintf (buf, "$$$");
+    }
+}				/* print_exp */
+
+/* Prints rtxes, i customly classified as values. They're constants, */
+/* registers, labels, symbols and memory accesses.  */
+
+static void
+print_value (buf, x, verbose)
+     char *buf;
+     rtx x;
+     int verbose;
+{
+  char t[BUF_LEN];
+
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+      sprintf (buf, "%Xh", INTVAL (x));
+      break;
+    case CONST_DOUBLE:
+      print_value (t, XEXP (x, 0), verbose);
+      sprintf (buf, "<%s>", t);
+      break;
+    case CONST_STRING:
+      sprintf (buf, "\"%s\"", (char *) XEXP (x, 0));
+      break;
+    case SYMBOL_REF:
+      sprintf (buf, "`%s'", (char *) XEXP (x, 0));
+      break;
+    case LABEL_REF:
+      sprintf (buf, "L%d", INSN_UID (XEXP (x, 0)));
+      break;
+    case CONST:
+      print_value (buf, XEXP (x, 0), verbose);
+      break;
+    case HIGH:
+      print_value (buf, XEXP (x, 0), verbose);
+      break;
+    case REG:
+      if (GET_MODE (x) == SFmode
+	  || GET_MODE (x) == DFmode
+	  || GET_MODE (x) == XFmode
+	  || GET_MODE (x) == TFmode)
+	strcpy (t, "fr");
+      else
+	strcpy (t, "r");
+      sprintf (buf, "%s%d", t, (int) XEXP (x, 0));
+      break;
+    case SUBREG:
+      print_value (t, XEXP (x, 0), verbose);
+      sprintf (buf, "%s#%d", t, (int) XEXP (x, 1));
+      break;
+    case SCRATCH:
+      sprintf (buf, "scratch");
+      break;
+    case CC0:
+      sprintf (buf, "cc0");
+      break;
+    case PC:
+      sprintf (buf, "pc");
+      break;
+    case MEM:
+      print_value (t, XEXP (x, 0), verbose);
+      sprintf (buf, "[%s]", t);
+      break;
+    default:
+      print_exp (buf, x, verbose);
+    }
+}				/* print_value */
+
+/* The next step in insn detalization, its pattern recognition */
+
+static void
+print_pattern (buf, x, verbose)
+     char *buf;
+     rtx x;
+     int verbose;
+{
+  char t1[BUF_LEN], t2[BUF_LEN], t3[BUF_LEN];
+
+  switch (GET_CODE (x))
+    {
+    case SET:
+      print_value (t1, SET_DEST (x), verbose);
+      print_value (t2, SET_SRC (x), verbose);
+      sprintf (buf, "%s=%s", t1, t2);
+      break;
+    case RETURN:
+      sprintf (buf, "return");
+      break;
+    case CALL:
+      print_exp (buf, x, verbose);
+      break;
+    case CLOBBER:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "clobber %s", t1);
+      break;
+    case USE:
+      print_value (t1, XEXP (x, 0), verbose);
+      sprintf (buf, "use %s", t1);
+      break;
+    case PARALLEL:
+      {
+	int i;
+
+	sprintf (t1, "{");
+	for (i = 0; i < XVECLEN (x, 0); i++)
+	  {
+	    print_pattern (t2, XVECEXP (x, 0, i), verbose);
+	    sprintf (t3, "%s%s;", t1, t2);
+	    strcpy (t1, t3);
+	  }
+	sprintf (buf, "%s}", t1);
+      }
+      break;
+    case SEQUENCE:
+      {
+	int i;
+
+	sprintf (t1, "%%{");
+	for (i = 0; i < XVECLEN (x, 0); i++)
+	  {
+	    print_insn (t2, XVECEXP (x, 0, i), verbose);
+	    sprintf (t3, "%s%s;", t1, t2);
+	    strcpy (t1, t3);
+	  }
+	sprintf (buf, "%s%%}", t1);
+      }
+      break;
+    case ASM_INPUT:
+      sprintf (buf, "asm {%s}", XEXP (x, 0));
+      break;
+    case ADDR_VEC:
+      break;
+    case ADDR_DIFF_VEC:
+      print_value (buf, XEXP (x, 0), verbose);
+      break;
+    case TRAP_IF:
+      print_value (t1, TRAP_CONDITION (x), verbose);
+      sprintf (buf, "trap_if %s", t1);
+      break;
+    case UNSPEC:
+      {
+	int i;
+
+	sprintf (t1, "unspec{");
+	for (i = 0; i < XVECLEN (x, 0); i++)
+	  {
+	    print_pattern (t2, XVECEXP (x, 0, i), verbose);
+	    sprintf (t3, "%s%s;", t1, t2);
+	    strcpy (t1, t3);
+	  }
+	sprintf (buf, "%s}", t1);
+      }
+      break;
+    case UNSPEC_VOLATILE:
+      {
+	int i;
+
+	sprintf (t1, "unspec/v{");
+	for (i = 0; i < XVECLEN (x, 0); i++)
+	  {
+	    print_pattern (t2, XVECEXP (x, 0, i), verbose);
+	    sprintf (t3, "%s%s;", t1, t2);
+	    strcpy (t1, t3);
+	  }
+	sprintf (buf, "%s}", t1);
+      }
+      break;
+    default:
+      print_value (buf, x, verbose);
+    }
+}				/* print_pattern */
+
+/* This is the main function in rtl visualization mechanism. It
+   accepts an rtx and tries to recognize it as an insn, then prints it
+   properly in human readable form, resembling assembler mnemonics.  */
+/* For every insn it prints its UID and BB the insn belongs */
+/* too. (probably the last "option" should be extended somehow, since */
+/* it depends now on sched.c inner variables ...) */
+
+static void
+print_insn (buf, x, verbose)
+     char *buf;
+     rtx x;
+     int verbose;
+{
+  char t[BUF_LEN];
+  rtx insn = x;
+
+  switch (GET_CODE (x))
+    {
+    case INSN:
+      print_pattern (t, PATTERN (x), verbose);
+      if (verbose)
+	sprintf (buf, "b%d: i% 4d: %s", INSN_BB (x),
+		 INSN_UID (x), t);
+      else
+	sprintf (buf, "%-4d %s", INSN_UID (x), t);
+      break;
+    case JUMP_INSN:
+      print_pattern (t, PATTERN (x), verbose);
+      if (verbose)
+	sprintf (buf, "b%d: i% 4d: jump %s", INSN_BB (x),
+		 INSN_UID (x), t);
+      else
+	sprintf (buf, "%-4d %s", INSN_UID (x), t);
+      break;
+    case CALL_INSN:
+      x = PATTERN (insn);
+      if (GET_CODE (x) == PARALLEL)
+	{
+	  x = XVECEXP (x, 0, 0);
+	  print_pattern (t, x, verbose);
+	}
+      else
+	strcpy (t, "call <...>");
+      if (verbose)
+	sprintf (buf, "b%d: i% 4d: %s", INSN_BB (insn),
+		 INSN_UID (insn), t);
+      else
+	sprintf (buf, "%-4d %s", INSN_UID (insn), t);
+      break;
+    case CODE_LABEL:
+      sprintf (buf, "L%d:", INSN_UID (x));
+      break;
+    case BARRIER:
+      sprintf (buf, "i% 4d: barrier", INSN_UID (x));
+      break;
+    case NOTE:
+      if (NOTE_LINE_NUMBER (x) > 0)
+	sprintf (buf, "%4d note \"%s\" %d", INSN_UID (x),
+		 NOTE_SOURCE_FILE (x), NOTE_LINE_NUMBER (x));
+      else
+	sprintf (buf, "%4d %s", INSN_UID (x),
+		 GET_NOTE_INSN_NAME (NOTE_LINE_NUMBER (x)));
+      break;
+    default:
+      if (verbose)
+	{
+	  sprintf (buf, "Not an INSN at all\n");
+	  debug_rtx (x);
+	}
+      else
+	sprintf (buf, "i%-4d  <What?>", INSN_UID (x));
+    }
+}				/* print_insn */
+
+void
+print_insn_chain (rtx_first)
+     rtx rtx_first;
+{
+  register rtx tmp_rtx;
+  char str[BUF_LEN];
+
+  strcpy (str, "(nil)\n");
+  if (rtx_first != 0)
+    switch (GET_CODE (rtx_first))
+      {
+      case INSN:
+      case JUMP_INSN:
+      case CALL_INSN:
+      case NOTE:
+      case CODE_LABEL:
+      case BARRIER:
+	for (tmp_rtx = rtx_first; tmp_rtx != NULL;
+	     tmp_rtx = NEXT_INSN (tmp_rtx))
+	  {
+	    print_insn (str, tmp_rtx, 0);
+	    printf ("%s\n", str);
+	  }
+	break;
+      default:
+	print_insn (str, rtx_first, 0);
+	printf ("%s\n", str);
+      }
+}				/* print_insn_chain */
+
+/* Print visualization debugging info */
+
+static void
+print_block_visualization (b, s)
+     int b;
+     char *s;
+{
+  int unit, i;
+  char *names;			/* names of units */
+  char *delim;			/* separation line */
+
+  /* print header */
+  fprintf (dump, "\n;;   ==================== scheduling visualization for block %d %s \n", b, s);
+
+  /* Print names of units */
+  names = (char *) alloca (256);
+  delim = (char *) alloca (256);
+  sprintf (names, ";;   %-8s", "clock");
+  sprintf (delim, ";;   %-8s", "=====");
+  for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
+    if (function_units[unit].bitmask & target_units)
+      for (i = 0; i < function_units[unit].multiplicity; i++)
+	{
+	  sprintf (names + strlen (names), "  %-33s", function_units[unit].name);
+	  sprintf (delim + strlen (delim), "  %-33s", "==============================");
+	}
+  sprintf (names + strlen (names), "  %-8s", "no-unit");
+  sprintf (delim + strlen (delim), "  %-8s", "=======");
+  fprintf (dump, "\n%s\n%s\n", names, delim);
+
+  /* Print insns in each cycle */
+  fprintf (dump, "%s\n", visual_tbl);
+}
+
+/* Print insns in the 'no_unit' column of visualization */
+
+static void
+visualize_no_unit (insn)
+     rtx insn;
+{
+  vis_no_unit[n_vis_no_unit] = insn;
+  n_vis_no_unit++;
+}
+
+/* Print insns scheduled in clock, for visualization.  */
+
+static void
+visualize_scheduled_insns (b, clock)
+     int b, clock;
+{
+  int i, unit;
+
+  /* if no more room, split table into two */
+  if (n_visual_lines >= MAX_VISUAL_LINES)
+    {
+      print_block_visualization (b, "(incomplete)");
+      init_block_visualization ();
+    }
+
+  n_visual_lines++;
+
+  sprintf (visual_tbl + strlen (visual_tbl), ";;   %-8d", clock);
+  for (unit = 0; unit < FUNCTION_UNITS_SIZE; unit++)
+    if (function_units[unit].bitmask & target_units)
+      for (i = 0; i < function_units[unit].multiplicity; i++)
+	{
+	  int instance = unit + i * FUNCTION_UNITS_SIZE;
+	  rtx insn = unit_last_insn[instance];
+
+	  /* print insns that still keep the unit busy */
+	  if (insn &&
+	      actual_hazard_this_instance (unit, instance, insn, clock, 0))
+	    {
+	      char str[BUF_LEN];
+	      print_insn (str, insn, 0);
+	      str[INSN_LEN] = '\0';
+	      sprintf (visual_tbl + strlen (visual_tbl), "  %-33s", str);
+	    }
+	  else
+	    sprintf (visual_tbl + strlen (visual_tbl), "  %-33s", "------------------------------");
+	}
+
+  /* print insns that are not assigned to any unit */
+  for (i = 0; i < n_vis_no_unit; i++)
+    sprintf (visual_tbl + strlen (visual_tbl), "  %-8d",
+	     INSN_UID (vis_no_unit[i]));
+  n_vis_no_unit = 0;
+
+  sprintf (visual_tbl + strlen (visual_tbl), "\n");
+}
+
+/* Print stalled cycles */
+
+static void
+visualize_stall_cycles (b, stalls)
+     int b, stalls;
+{
+  int i;
+
+  /* if no more room, split table into two */
+  if (n_visual_lines >= MAX_VISUAL_LINES)
+    {
+      print_block_visualization (b, "(incomplete)");
+      init_block_visualization ();
+    }
+
+  n_visual_lines++;
+
+  sprintf (visual_tbl + strlen (visual_tbl), ";;       ");
+  for (i = 0; i < stalls; i++)
+    sprintf (visual_tbl + strlen (visual_tbl), ".");
+  sprintf (visual_tbl + strlen (visual_tbl), "\n");
+}
+
+/* move_insn1: Remove INSN from insn chain, and link it after LAST insn */
+
+static rtx
+move_insn1 (insn, last)
+     rtx insn, last;
+{
+  NEXT_INSN (PREV_INSN (insn)) = NEXT_INSN (insn);
+  PREV_INSN (NEXT_INSN (insn)) = PREV_INSN (insn);
+
+  NEXT_INSN (insn) = NEXT_INSN (last);
+  PREV_INSN (NEXT_INSN (last)) = insn;
+
+  NEXT_INSN (last) = insn;
+  PREV_INSN (insn) = last;
+
+  return insn;
+}
+
+/* Search INSN for fake REG_DEAD note pairs for NOTE_INSN_SETJMP,
+   NOTE_INSN_{LOOP,EHREGION}_{BEG,END}; and convert them back into
+   NOTEs.  The REG_DEAD note following first one is contains the saved
+   value for NOTE_BLOCK_NUMBER which is useful for
+   NOTE_INSN_EH_REGION_{BEG,END} NOTEs.  LAST is the last instruction
+   output by the instruction scheduler.  Return the new value of LAST.  */
+
+static rtx
+reemit_notes (insn, last)
+     rtx insn;
+     rtx last;
+{
+  rtx note, retval;
+
+  retval = last;
+  for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
+    {
+      if (REG_NOTE_KIND (note) == REG_DEAD
+	  && GET_CODE (XEXP (note, 0)) == CONST_INT)
+	{
+	  if (INTVAL (XEXP (note, 0)) == NOTE_INSN_SETJMP)
+	    {
+	      retval = emit_note_after (INTVAL (XEXP (note, 0)), insn);
+	      CONST_CALL_P (retval) = CONST_CALL_P (note);
+	      remove_note (insn, note);
+	      note = XEXP (note, 1);
+	    }
+	  else
+	    {
+	      last = emit_note_before (INTVAL (XEXP (note, 0)), last);
+	      remove_note (insn, note);
+	      note = XEXP (note, 1);
+	      NOTE_BLOCK_NUMBER (last) = INTVAL (XEXP (note, 0));
+	    }
+	  remove_note (insn, note);
+	}
+    }
+  return retval;
+}
+
+/* Move INSN, and all insns which should be issued before it,
+   due to SCHED_GROUP_P flag.  Reemit notes if needed.  */
+
+static rtx
+move_insn (insn, last)
+     rtx insn, last;
+{
+  rtx new_last = insn;
+
+  while (SCHED_GROUP_P (insn))
+    {
+      rtx prev = PREV_INSN (insn);
+      move_insn1 (insn, last);
+      insn = prev;
+    }
+
+  move_insn1 (insn, last);
+  return reemit_notes (new_last, new_last);
+}
+
+/* Return an insn which represents a SCHED_GROUP, which is
+   the last insn in the group.  */
+
+static rtx
+group_leader (insn)
+     rtx insn;
+{
+  rtx prev;
+
+  do
+    {
+      prev = insn;
+      insn = next_nonnote_insn (insn);
+    }
+  while (insn && SCHED_GROUP_P (insn) && (GET_CODE (insn) != CODE_LABEL));
+
+  return prev;
+}
+
+/* Use forward list scheduling to rearrange insns of block BB in region RGN,
+   possibly bringing insns from subsequent blocks in the same region.
+   Return number of insns scheduled.  */
+
+static int
+schedule_block (bb, rgn, rgn_n_insns)
+     int bb;
+     int rgn;
+     int rgn_n_insns;
+{
+  /* Local variables.  */
+  rtx insn, last;
+  rtx *ready;
+  int i;
+  int n_ready = 0;
+  int can_issue_more;
+
+  /* flow block of this bb */
+  int b = BB_TO_BLOCK (bb);
+
+  /* target_n_insns == number of insns in b before scheduling starts.
+     sched_target_n_insns == how many of b's insns were scheduled.
+     sched_n_insns == how many insns were scheduled in b */
+  int target_n_insns = 0;
+  int sched_target_n_insns = 0;
+  int sched_n_insns = 0;
+
+#define NEED_NOTHING	0
+#define NEED_HEAD	1
+#define NEED_TAIL	2
+  int new_needs;
+
+  /* head/tail info for this block */
+  rtx prev_head;
+  rtx next_tail;
+  rtx head;
+  rtx tail;
+  int bb_src;
+
+  /* At the start of a function, before reload has run, don't delay getting
+     parameters from hard registers into pseudo registers.  */
+  if (reload_completed == 0 && b == 0)
+    {
+      head = basic_block_head[b];
+      tail = basic_block_end[b];
+
+      while (head != tail
+	     && GET_CODE (head) == NOTE
+	     && NOTE_LINE_NUMBER (head) != NOTE_INSN_FUNCTION_BEG)
+	head = NEXT_INSN (head);
+
+      while (head != tail
+	     && GET_CODE (head) == INSN
+	     && GET_CODE (PATTERN (head)) == SET)
+	{
+	  rtx link;
+	  rtx src = SET_SRC (PATTERN (head));
+	  while (GET_CODE (src) == SUBREG
+		 || GET_CODE (src) == SIGN_EXTEND
+		 || GET_CODE (src) == ZERO_EXTEND
+		 || GET_CODE (src) == SIGN_EXTRACT
+		 || GET_CODE (src) == ZERO_EXTRACT)
+	    src = XEXP (src, 0);
+	  if (GET_CODE (src) != REG
+	      || REGNO (src) >= FIRST_PSEUDO_REGISTER)
+	    break;
+
+	  for (link = INSN_DEPEND (head); link != 0; link = XEXP (link, 1))
+	    INSN_DEP_COUNT (XEXP (link, 0)) -= 1;
+
+	  if (GET_CODE (head) != NOTE)
+	    sched_n_insns++;
+
+	  head = NEXT_INSN (head);
+	}
+
+      /* Don't include any notes or labels at the beginning of the
+         basic block, or notes at the ends of basic blocks.  */
+      while (head != tail)
+	{
+	  if (GET_CODE (head) == NOTE)
+	    head = NEXT_INSN (head);
+	  else if (GET_CODE (tail) == NOTE)
+	    tail = PREV_INSN (tail);
+	  else if (GET_CODE (head) == CODE_LABEL)
+	    head = NEXT_INSN (head);
+	  else
+	    break;
+	}
+    }
+  else
+    get_block_head_tail (bb, &head, &tail);
+
+  next_tail = NEXT_INSN (tail);
+  prev_head = PREV_INSN (head);
+
+  /* If the only insn left is a NOTE or a CODE_LABEL, then there is no need
+     to schedule this block.  */
+  if (head == tail
+      && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
+    return (sched_n_insns);
+
+  /* debug info */
+  if (sched_verbose)
+    {
+      fprintf (dump, ";;   ======================================================\n");
+      fprintf (dump,
+	       ";;   -- basic block %d from %d to %d -- %s reload\n",
+	       b, INSN_UID (basic_block_head[b]),
+	       INSN_UID (basic_block_end[b]),
+	       (reload_completed ? "after" : "before"));
+      fprintf (dump, ";;   ======================================================\n");
+      if (sched_debug_count >= 0)
+	fprintf (dump, ";;\t -- sched_debug_count=%d\n", sched_debug_count);
+      fprintf (dump, "\n");
+
+      visual_tbl = (char *) alloca (get_visual_tbl_length ());
+      init_block_visualization ();
+    }
+
+  /* remove remaining note insns from the block, save them in
+     note_list.  These notes are restored at the end of
+     schedule_block ().  */
+  note_list = 0;
+  rm_other_notes (head, tail);
+
+  target_bb = bb;
+
+  /* prepare current target block info */
+  if (current_nr_blocks > 1)
+    {
+      candidate_table = (candidate *) alloca (current_nr_blocks * sizeof (candidate));
+
+      bblst_last = 0;
+      /* ??? It is not clear why bblst_size is computed this way.  The original
+	 number was clearly too small as it resulted in compiler failures.
+	 Multiplying by the original number by 2 (to account for update_bbs
+	 members) seems to be a reasonable solution.  */
+      /* ??? Or perhaps there is a bug somewhere else in this file?  */
+      bblst_size = (current_nr_blocks - bb) * rgn_nr_edges * 2;
+      bblst_table = (int *) alloca (bblst_size * sizeof (int));
+
+      bitlst_table_last = 0;
+      bitlst_table_size = rgn_nr_edges;
+      bitlst_table = (int *) alloca (rgn_nr_edges * sizeof (int));
+
+      compute_trg_info (bb);
+    }
+
+  clear_units ();
+
+  /* Allocate the ready list */
+  ready = (rtx *) alloca ((rgn_n_insns + 1) * sizeof (rtx));
+
+  /* Print debugging information.  */
+  if (sched_verbose >= 5)
+    debug_dependencies ();
+
+
+  /* Initialize ready list with all 'ready' insns in target block.
+     Count number of insns in the target block being scheduled.  */
+  n_ready = 0;
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    {
+      rtx next;
+
+      if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+	continue;
+      next = NEXT_INSN (insn);
+
+      if (INSN_DEP_COUNT (insn) == 0
+	  && (SCHED_GROUP_P (next) == 0 || GET_RTX_CLASS (GET_CODE (next)) != 'i'))
+	ready[n_ready++] = insn;
+      if (!(SCHED_GROUP_P (insn)))
+	target_n_insns++;
+    }
+
+  /* Add to ready list all 'ready' insns in valid source blocks.
+     For speculative insns, check-live, exception-free, and
+     issue-delay.  */
+  for (bb_src = bb + 1; bb_src < current_nr_blocks; bb_src++)
+    if (IS_VALID (bb_src))
+      {
+	rtx src_head;
+	rtx src_next_tail;
+	rtx tail, head;
+
+	get_block_head_tail (bb_src, &head, &tail);
+	src_next_tail = NEXT_INSN (tail);
+	src_head = head;
+
+	if (head == tail
+	    && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
+	  continue;
+
+	for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
+	  {
+	    if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+	      continue;
+
+	    if (!CANT_MOVE (insn)
+		&& (!IS_SPECULATIVE_INSN (insn)
+		    || (insn_issue_delay (insn) <= 3
+			&& check_live (insn, bb_src, target_bb)
+			&& is_exception_free (insn, bb_src, target_bb))))
+
+	      {
+		rtx next;
+
+		next = NEXT_INSN (insn);
+		if (INSN_DEP_COUNT (insn) == 0
+		    && (SCHED_GROUP_P (next) == 0
+			|| GET_RTX_CLASS (GET_CODE (next)) != 'i'))
+		  ready[n_ready++] = insn;
+	      }
+	  }
+      }
+
+  /* no insns scheduled in this block yet */
+  last_scheduled_insn = 0;
+
+  /* Sort the ready list */
+  SCHED_SORT (ready, n_ready);
+
+  if (sched_verbose >= 2)
+    {
+      fprintf (dump, ";;\t\tReady list initially:  ");
+      debug_ready_list (ready, n_ready);
+    }
+
+  /* Q_SIZE is the total number of insns in the queue.  */
+  q_ptr = 0;
+  q_size = 0;
+  clock_var = 0;
+  bzero ((char *) insn_queue, sizeof (insn_queue));
+
+  /* We start inserting insns after PREV_HEAD.  */
+  last = prev_head;
+
+  /* Initialize INSN_QUEUE, LIST and NEW_NEEDS.  */
+  new_needs = (NEXT_INSN (prev_head) == basic_block_head[b]
+	       ? NEED_HEAD : NEED_NOTHING);
+  if (PREV_INSN (next_tail) == basic_block_end[b])
+    new_needs |= NEED_TAIL;
+
+  /* loop until all the insns in BB are scheduled.  */
+  while (sched_target_n_insns < target_n_insns)
+    {
+      int b1;
+
+#ifdef INTERBLOCK_DEBUG
+      if (sched_debug_count == 0)
+	break;
+#endif
+      clock_var++;
+
+      /* Add to the ready list all pending insns that can be issued now.
+         If there are no ready insns, increment clock until one
+         is ready and add all pending insns at that point to the ready
+         list.  */
+      n_ready = queue_to_ready (ready, n_ready);
+
+      if (n_ready == 0)
+	abort ();
+
+      if (sched_verbose >= 2)
+	{
+	  fprintf (dump, ";;\t\tReady list after queue_to_ready:  ");
+	  debug_ready_list (ready, n_ready);
+	}
+
+      /* Sort the ready list.  */
+      SCHED_SORT (ready, n_ready);
+
+      if (sched_verbose)
+	{
+	  fprintf (dump, ";;\tReady list (t =%3d):  ", clock_var);
+	  debug_ready_list (ready, n_ready);
+	}
+
+      /* Issue insns from ready list.
+         It is important to count down from n_ready, because n_ready may change
+         as insns are issued.  */
+      can_issue_more = issue_rate;
+      for (i = n_ready - 1; i >= 0 && can_issue_more; i--)
+	{
+	  rtx insn = ready[i];
+	  int cost = actual_hazard (insn_unit (insn), insn, clock_var, 0);
+
+	  if (cost > 1)
+	    {
+	      queue_insn (insn, cost);
+	      ready[i] = ready[--n_ready];	/* remove insn from ready list */
+	    }
+	  else if (cost == 0)
+	    {
+#ifdef INTERBLOCK_DEBUG
+	      if (sched_debug_count == 0)
+		break;
+#endif
+
+	      /* an interblock motion? */
+	      if (INSN_BB (insn) != target_bb)
+		{
+		  if (IS_SPECULATIVE_INSN (insn))
+		    {
+
+		      if (!check_live (insn, INSN_BB (insn), target_bb))
+			{
+			  /* speculative motion, live check failed, remove
+			     insn from ready list */
+			  ready[i] = ready[--n_ready];
+			  continue;
+			}
+		      update_live (insn, INSN_BB (insn), target_bb);
+
+		      /* for speculative load, mark insns fed by it.  */
+		      if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
+			set_spec_fed (insn);
+
+		      nr_spec++;
+		    }
+		  nr_inter++;
+
+		  /* update source block boundaries */
+		  b1 = INSN_BLOCK (insn);
+		  if (insn == basic_block_head[b1]
+		      && insn == basic_block_end[b1])
+		    {
+		      emit_note_after (NOTE_INSN_DELETED, basic_block_head[b1]);
+		      basic_block_end[b1] = basic_block_head[b1] = NEXT_INSN (insn);
+		    }
+		  else if (insn == basic_block_end[b1])
+		    {
+		      basic_block_end[b1] = PREV_INSN (insn);
+		    }
+		  else if (insn == basic_block_head[b1])
+		    {
+		      basic_block_head[b1] = NEXT_INSN (insn);
+		    }
+		}
+	      else
+		{
+		  /* in block motion */
+		  sched_target_n_insns++;
+		}
+
+	      last_scheduled_insn = insn;
+	      last = move_insn (insn, last);
+	      sched_n_insns++;
+
+	      can_issue_more--;
+
+#ifdef INTERBLOCK_DEBUG
+	      if (sched_debug_count > 0)
+		sched_debug_count--;
+#endif
+
+	      n_ready = schedule_insn (insn, ready, n_ready, clock_var);
+
+	      /* remove insn from ready list */
+	      ready[i] = ready[--n_ready];
+
+	      /* close this block after scheduling its jump */
+	      if (GET_CODE (last_scheduled_insn) == JUMP_INSN)
+		break;
+	    }
+	}
+
+      /* debug info */
+      if (sched_verbose)
+	{
+	  visualize_scheduled_insns (b, clock_var);
+#ifdef INTERBLOCK_DEBUG
+	  if (sched_debug_count == 0)
+	    fprintf (dump, "........   sched_debug_count == 0  .................\n");
+#endif
+	}
+    }
+
+  /* debug info */
+  if (sched_verbose)
+    {
+      fprintf (dump, ";;\tReady list (final):  ");
+      debug_ready_list (ready, n_ready);
+      print_block_visualization (b, "");
+    }
+
+  /* Sanity check -- queue must be empty now.  Meaningless if region has
+     multiple bbs, or if scheduling stopped by sched_debug_count.  */
+  if (current_nr_blocks > 1)
+#ifdef INTERBLOCK_DEBUG
+    if (sched_debug_count != 0)
+#endif
+      if (!flag_schedule_interblock && q_size != 0)
+	abort ();
+
+  /* update head/tail boundaries.  */
+  head = NEXT_INSN (prev_head);
+  tail = last;
+
+#ifdef INTERBLOCK_DEBUG
+  if (sched_debug_count == 0)
+    /* compensate for stopping scheduling prematurely */
+    for (i = sched_target_n_insns; i < target_n_insns; i++)
+      tail = move_insn (group_leader (NEXT_INSN (tail)), tail);
+#endif
+
+  /* Restore-other-notes: NOTE_LIST is the end of a chain of notes
+     previously found among the insns.  Insert them at the beginning
+     of the insns.  */
+  if (note_list != 0)
+    {
+      rtx note_head = note_list;
+
+      while (PREV_INSN (note_head))
+	{
+	  note_head = PREV_INSN (note_head);
+	}
+
+      PREV_INSN (note_head) = PREV_INSN (head);
+      NEXT_INSN (PREV_INSN (head)) = note_head;
+      PREV_INSN (head) = note_list;
+      NEXT_INSN (note_list) = head;
+      head = note_head;
+    }
+
+  /* update target block boundaries.  */
+  if (new_needs & NEED_HEAD)
+    basic_block_head[b] = head;
+
+  if (new_needs & NEED_TAIL)
+    basic_block_end[b] = tail;
+
+  /* debugging */
+  if (sched_verbose)
+    {
+      fprintf (dump, ";;   total time = %d\n;;   new basic block head = %d\n",
+	       clock_var, INSN_UID (basic_block_head[b]));
+      fprintf (dump, ";;   new basic block end = %d\n\n",
+	       INSN_UID (basic_block_end[b]));
+    }
+
+  return (sched_n_insns);
+}				/* schedule_block () */
+
+
+/* print the bit-set of registers, S.  callable from debugger */
+
+extern void
+debug_reg_vector (s)
+     regset s;
+{
+  int regno;
+
+  EXECUTE_IF_SET_IN_REG_SET (s, 0, regno,
+			     {
+			       fprintf (dump, " %d", regno);
+			     });
+
+  fprintf (dump, "\n");
+}
+
+/* Use the backward dependences from LOG_LINKS to build
+   forward dependences in INSN_DEPEND.  */
+
+static void
+compute_block_forward_dependences (bb)
+     int bb;
+{
+  rtx insn, link;
+  rtx tail, head;
+  rtx next_tail;
+  enum reg_note dep_type;
+
+  get_block_head_tail (bb, &head, &tail);
+  next_tail = NEXT_INSN (tail);
+  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+    {
+      if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+	continue;
+
+      insn = group_leader (insn);
+
+      for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
+	{
+	  rtx x = group_leader (XEXP (link, 0));
+	  rtx new_link;
+
+	  if (x != XEXP (link, 0))
+	    continue;
+
+	  /* Ignore dependences upon deleted insn */
+	  if (GET_CODE (x) == NOTE || INSN_DELETED_P (x))
+	    continue;
+	  if (find_insn_list (insn, INSN_DEPEND (x)))
+	    continue;
+
+	  new_link = rtx_alloc (INSN_LIST);
+
+	  dep_type = REG_NOTE_KIND (link);
+	  PUT_REG_NOTE_KIND (new_link, dep_type);
+
+	  XEXP (new_link, 0) = insn;
+	  XEXP (new_link, 1) = INSN_DEPEND (x);
+
+	  INSN_DEPEND (x) = new_link;
+	  INSN_DEP_COUNT (insn) += 1;
+	}
+    }
+}
+
+/* Initialize variables for region data dependence analysis.
+   n_bbs is the number of region blocks */
+
+__inline static void
+init_rgn_data_dependences (n_bbs)
+     int n_bbs;
+{
+  int bb;
+
+  /* variables for which one copy exists for each block */
+  bzero ((char *) bb_pending_read_insns, n_bbs * sizeof (rtx));
+  bzero ((char *) bb_pending_read_mems, n_bbs * sizeof (rtx));
+  bzero ((char *) bb_pending_write_insns, n_bbs * sizeof (rtx));
+  bzero ((char *) bb_pending_write_mems, n_bbs * sizeof (rtx));
+  bzero ((char *) bb_pending_lists_length, n_bbs * sizeof (rtx));
+  bzero ((char *) bb_last_pending_memory_flush, n_bbs * sizeof (rtx));
+  bzero ((char *) bb_last_function_call, n_bbs * sizeof (rtx));
+  bzero ((char *) bb_sched_before_next_call, n_bbs * sizeof (rtx));
+
+  /* Create an insn here so that we can hang dependencies off of it later.  */
+  for (bb = 0; bb < n_bbs; bb++)
+    {
+      bb_sched_before_next_call[bb] =
+	gen_rtx (INSN, VOIDmode, 0, NULL_RTX, NULL_RTX,
+		 NULL_RTX, 0, NULL_RTX, 0);
+      LOG_LINKS (bb_sched_before_next_call[bb]) = 0;
+    }
+}
+
+/* Add dependences so that branches are scheduled to run last in their block */
+
+static void
+add_branch_dependences (head, tail)
+     rtx head, tail;
+{
+
+  rtx insn, last;
+
+  /* For all branches, calls, uses, and cc0 setters, force them to remain
+     in order at the end of the block by adding dependencies and giving
+     the last a high priority.  There may be notes present, and prev_head
+     may also be a note.
+
+     Branches must obviously remain at the end.  Calls should remain at the
+     end since moving them results in worse register allocation.  Uses remain
+     at the end to ensure proper register allocation.  cc0 setters remaim
+     at the end because they can't be moved away from their cc0 user.  */
+  insn = tail;
+  last = 0;
+  while (GET_CODE (insn) == CALL_INSN || GET_CODE (insn) == JUMP_INSN
+	 || (GET_CODE (insn) == INSN
+	     && (GET_CODE (PATTERN (insn)) == USE
+#ifdef HAVE_cc0
+		 || sets_cc0_p (PATTERN (insn))
+#endif
+	     ))
+	 || GET_CODE (insn) == NOTE)
+    {
+      if (GET_CODE (insn) != NOTE)
+	{
+	  if (last != 0
+	      && !find_insn_list (insn, LOG_LINKS (last)))
+	    {
+	      add_dependence (last, insn, REG_DEP_ANTI);
+	      INSN_REF_COUNT (insn)++;
+	    }
+
+	  CANT_MOVE (insn) = 1;
+
+	  last = insn;
+	  /* Skip over insns that are part of a group.  */
+	  while (SCHED_GROUP_P (insn))
+	    insn = prev_nonnote_insn (insn);
+	}
+
+      /* Don't overrun the bounds of the basic block.  */
+      if (insn == head)
+	break;
+
+      insn = PREV_INSN (insn);
+    }
+
+  /* make sure these insns are scheduled last in their block */
+  insn = last;
+  if (insn != 0)
+    while (insn != head)
+      {
+	insn = prev_nonnote_insn (insn);
+
+	if (INSN_REF_COUNT (insn) != 0)
+	  continue;
+
+	if (!find_insn_list (last, LOG_LINKS (insn)))
+	  add_dependence (last, insn, REG_DEP_ANTI);
+	INSN_REF_COUNT (insn) = 1;
+
+	/* Skip over insns that are part of a group.  */
+	while (SCHED_GROUP_P (insn))
+	  insn = prev_nonnote_insn (insn);
+      }
+}
+
+/* Compute bacward dependences inside BB.  In a multiple blocks region:
+   (1) a bb is analyzed after its predecessors, and (2) the lists in
+   effect at the end of bb (after analyzing for bb) are inherited by
+   bb's successrs.
+
+   Specifically for reg-reg data dependences, the block insns are
+   scanned by sched_analyze () top-to-bottom.  Two lists are
+   naintained by sched_analyze (): reg_last_defs[] for register DEFs,
+   and reg_last_uses[] for register USEs.
+
+   When analysis is completed for bb, we update for its successors:
+   ;  - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
+   ;  - USES[succ] = Union (USES [succ], DEFS [bb])
+
+   The mechanism for computing mem-mem data dependence is very
+   similar, and the result is interblock dependences in the region.  */
+
+static void
+compute_block_backward_dependences (bb)
+     int bb;
+{
+  int b;
+  rtx x;
+  rtx head, tail;
+  int max_reg = max_reg_num ();
+
+  b = BB_TO_BLOCK (bb);
+
+  if (current_nr_blocks == 1)
+    {
+      reg_last_uses = (rtx *) alloca (max_reg * sizeof (rtx));
+      reg_last_sets = (rtx *) alloca (max_reg * sizeof (rtx));
+
+      bzero ((char *) reg_last_uses, max_reg * sizeof (rtx));
+      bzero ((char *) reg_last_sets, max_reg * sizeof (rtx));
+
+      pending_read_insns = 0;
+      pending_read_mems = 0;
+      pending_write_insns = 0;
+      pending_write_mems = 0;
+      pending_lists_length = 0;
+      last_function_call = 0;
+      last_pending_memory_flush = 0;
+      sched_before_next_call
+	= gen_rtx (INSN, VOIDmode, 0, NULL_RTX, NULL_RTX,
+		   NULL_RTX, 0, NULL_RTX, 0);
+      LOG_LINKS (sched_before_next_call) = 0;
+    }
+  else
+    {
+      reg_last_uses = bb_reg_last_uses[bb];
+      reg_last_sets = bb_reg_last_sets[bb];
+
+      pending_read_insns = bb_pending_read_insns[bb];
+      pending_read_mems = bb_pending_read_mems[bb];
+      pending_write_insns = bb_pending_write_insns[bb];
+      pending_write_mems = bb_pending_write_mems[bb];
+      pending_lists_length = bb_pending_lists_length[bb];
+      last_function_call = bb_last_function_call[bb];
+      last_pending_memory_flush = bb_last_pending_memory_flush[bb];
+
+      sched_before_next_call = bb_sched_before_next_call[bb];
+    }
+
+  /* do the analysis for this block */
+  get_block_head_tail (bb, &head, &tail);
+  sched_analyze (head, tail);
+  add_branch_dependences (head, tail);
+
+  if (current_nr_blocks > 1)
+    {
+      int e, first_edge;
+      int b_succ, bb_succ;
+      int reg;
+      rtx link_insn, link_mem;
+      rtx u;
+
+      /* these lists should point to the right place, for correct freeing later.  */
+      bb_pending_read_insns[bb] = pending_read_insns;
+      bb_pending_read_mems[bb] = pending_read_mems;
+      bb_pending_write_insns[bb] = pending_write_insns;
+      bb_pending_write_mems[bb] = pending_write_mems;
+
+      /* bb's structures are inherited by it's successors */
+      first_edge = e = OUT_EDGES (b);
+      if (e > 0)
+	do
+	  {
+	    b_succ = TO_BLOCK (e);
+	    bb_succ = BLOCK_TO_BB (b_succ);
+
+	    /* only bbs "below" bb, in the same region, are interesting */
+	    if (CONTAINING_RGN (b) != CONTAINING_RGN (b_succ)
+		|| bb_succ <= bb)
+	      {
+		e = NEXT_OUT (e);
+		continue;
+	      }
+
+	    for (reg = 0; reg < max_reg; reg++)
+	      {
+
+		/* reg-last-uses lists are inherited by bb_succ */
+		for (u = reg_last_uses[reg]; u; u = XEXP (u, 1))
+		  {
+		    if (find_insn_list (XEXP (u, 0), (bb_reg_last_uses[bb_succ])[reg]))
+		      continue;
+
+		    (bb_reg_last_uses[bb_succ])[reg]
+		      = gen_rtx (INSN_LIST, VOIDmode, XEXP (u, 0),
+				 (bb_reg_last_uses[bb_succ])[reg]);
+		  }
+
+		/* reg-last-defs lists are inherited by bb_succ */
+		for (u = reg_last_sets[reg]; u; u = XEXP (u, 1))
+		  {
+		    if (find_insn_list (XEXP (u, 0), (bb_reg_last_sets[bb_succ])[reg]))
+		      continue;
+
+		    (bb_reg_last_sets[bb_succ])[reg]
+		      = gen_rtx (INSN_LIST, VOIDmode, XEXP (u, 0),
+				 (bb_reg_last_sets[bb_succ])[reg]);
+		  }
+	      }
+
+	    /* mem read/write lists are inherited by bb_succ */
+	    link_insn = pending_read_insns;
+	    link_mem = pending_read_mems;
+	    while (link_insn)
+	      {
+		if (!(find_insn_mem_list (XEXP (link_insn, 0), XEXP (link_mem, 0),
+					  bb_pending_read_insns[bb_succ],
+					  bb_pending_read_mems[bb_succ])))
+		  add_insn_mem_dependence (&bb_pending_read_insns[bb_succ],
+					   &bb_pending_read_mems[bb_succ],
+				   XEXP (link_insn, 0), XEXP (link_mem, 0));
+		link_insn = XEXP (link_insn, 1);
+		link_mem = XEXP (link_mem, 1);
+	      }
+
+	    link_insn = pending_write_insns;
+	    link_mem = pending_write_mems;
+	    while (link_insn)
+	      {
+		if (!(find_insn_mem_list (XEXP (link_insn, 0), XEXP (link_mem, 0),
+					  bb_pending_write_insns[bb_succ],
+					  bb_pending_write_mems[bb_succ])))
+		  add_insn_mem_dependence (&bb_pending_write_insns[bb_succ],
+					   &bb_pending_write_mems[bb_succ],
+				   XEXP (link_insn, 0), XEXP (link_mem, 0));
+
+		link_insn = XEXP (link_insn, 1);
+		link_mem = XEXP (link_mem, 1);
+	      }
+
+	    /* last_function_call is inherited by bb_succ */
+	    for (u = last_function_call; u; u = XEXP (u, 1))
+	      {
+		if (find_insn_list (XEXP (u, 0), bb_last_function_call[bb_succ]))
+		  continue;
+
+		bb_last_function_call[bb_succ]
+		  = gen_rtx (INSN_LIST, VOIDmode, XEXP (u, 0),
+			     bb_last_function_call[bb_succ]);
+	      }
+
+	    /* last_pending_memory_flush is inherited by bb_succ */
+	    for (u = last_pending_memory_flush; u; u = XEXP (u, 1))
+	      {
+		if (find_insn_list (XEXP (u, 0), bb_last_pending_memory_flush[bb_succ]))
+		  continue;
+
+		bb_last_pending_memory_flush[bb_succ]
+		  = gen_rtx (INSN_LIST, VOIDmode, XEXP (u, 0),
+			     bb_last_pending_memory_flush[bb_succ]);
+	      }
+
+	    /* sched_before_next_call is inherited by bb_succ */
+	    x = LOG_LINKS (sched_before_next_call);
+	    for (; x; x = XEXP (x, 1))
+	      add_dependence (bb_sched_before_next_call[bb_succ],
+			      XEXP (x, 0), REG_DEP_ANTI);
+
+	    e = NEXT_OUT (e);
+	  }
+	while (e != first_edge);
+    }
+}
+
+/* Print dependences for debugging, callable from debugger */
+
+void
+debug_dependencies ()
+{
+  int bb;
+
+  fprintf (dump, ";;   --------------- forward dependences: ------------ \n");
+  for (bb = 0; bb < current_nr_blocks; bb++)
+    {
+      if (1)
+	{
+	  rtx head, tail;
+	  rtx next_tail;
+	  rtx insn;
+
+	  get_block_head_tail (bb, &head, &tail);
+	  next_tail = NEXT_INSN (tail);
+	  fprintf (dump, "\n;;   --- Region Dependences --- b %d bb %d \n",
+		   BB_TO_BLOCK (bb), bb);
+
+	  fprintf (dump, ";;   %7s%6s%6s%6s%6s%6s%11s%6s\n",
+	  "insn", "code", "bb", "dep", "prio", "cost", "blockage", "units");
+	  fprintf (dump, ";;   %7s%6s%6s%6s%6s%6s%11s%6s\n",
+	  "----", "----", "--", "---", "----", "----", "--------", "-----");
+	  for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
+	    {
+	      rtx link;
+	      int unit, range;
+
+	      if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+		{
+		  int n;
+		  fprintf (dump, ";;   %6d ", INSN_UID (insn));
+		  if (GET_CODE (insn) == NOTE)
+		    switch (n = NOTE_LINE_NUMBER (insn))
+		      {
+		      case NOTE_INSN_DELETED:
+			fprintf (dump, "NOTE_INSN_DELETED");
+			break;
+		      case NOTE_INSN_BLOCK_BEG:
+			fprintf (dump, "NOTE_INSN_BLOCK_BEG");
+			break;
+		      case NOTE_INSN_BLOCK_END:
+			fprintf (dump, "NOTE_INSN_BLOCK_END");
+			break;
+		      case NOTE_INSN_LOOP_BEG:
+			fprintf (dump, "NOTE_INSN_LOOP_BEG");
+			break;
+		      case NOTE_INSN_LOOP_END:
+			fprintf (dump, "NOTE_INSN_LOOP_END");
+			break;
+		      case NOTE_INSN_LOOP_CONT:
+			fprintf (dump, "NOTE_INSN_LOOP_CONT");
+			break;
+		      case NOTE_INSN_LOOP_VTOP:
+			fprintf (dump, "NOTE_INSN_LOOP_VTOP");
+			break;
+		      case NOTE_INSN_FUNCTION_BEG:
+			fprintf (dump, "NOTE_INSN_FUNCTION_BEG");
+			break;
+		      case NOTE_INSN_FUNCTION_END:
+			fprintf (dump, "NOTE_INSN_FUNCTION_END");
+			break;
+		      case NOTE_INSN_EH_REGION_BEG:
+			fprintf (dump, "NOTE_INSN_EH_REGION_BEG");
+			break;
+		      case NOTE_INSN_EH_REGION_END:
+			fprintf (dump, "NOTE_INSN_EH_REGION_END");
+			break;
+		      case NOTE_INSN_SETJMP:
+			fprintf (dump, "NOTE_INSN_SETJMP");
+			break;
+		      default:
+			if (n > 0)
+			  fprintf (dump, "NOTE_LINE_NUMBER %d", n);
+			else
+			  fprintf (dump, "??? UNRECOGNIZED NOTE %d", n);
+		      }
+		  fprintf (dump, "\n");
+		  continue;
+		}
+
+	      unit = insn_unit (insn);
+	      range = (unit < 0
+		 || function_units[unit].blockage_range_function == 0) ? 0 :
+		function_units[unit].blockage_range_function (insn);
+	      fprintf (dump,
+		       ";;   %s%5d%6d%6d%6d%6d%6d  %3d -%3d   ",
+		       (SCHED_GROUP_P (insn) ? "+" : " "),
+		       INSN_UID (insn),
+		       INSN_CODE (insn),
+		       INSN_BB (insn),
+		       INSN_DEP_COUNT (insn),
+		       INSN_PRIORITY (insn),
+		       insn_cost (insn, 0, 0),
+		       (int) MIN_BLOCKAGE_COST (range),
+		       (int) MAX_BLOCKAGE_COST (range));
+	      insn_print_units (insn);
+	      fprintf (dump, "\t: ");
+	      for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1))
+		fprintf (dump, "%d ", INSN_UID (XEXP (link, 0)));
+	      fprintf (dump, "\n");
+	    }
+	}
+    }
+  fprintf (dump, "\n");
+}
+
+/* Set_priorities: compute priority of each insn in the block */
+
+static int
+set_priorities (bb)
+     int bb;
+{
+  rtx insn;
+  int n_insn;
+
+  rtx tail;
+  rtx prev_head;
+  rtx head;
+
+  get_block_head_tail (bb, &head, &tail);
+  prev_head = PREV_INSN (head);
+
+  if (head == tail
+      && (GET_RTX_CLASS (GET_CODE (head)) != 'i'))
+    return 0;
+
+  n_insn = 0;
+  for (insn = tail; insn != prev_head; insn = PREV_INSN (insn))
+    {
+
+      if (GET_CODE (insn) == NOTE)
+	continue;
+
+      if (!(SCHED_GROUP_P (insn)))
+	n_insn++;
+      (void) priority (insn);
+    }
+
+  return n_insn;
+}
+
+/* Make each element of VECTOR point at an rtx-vector,
+   taking the space for all those rtx-vectors from SPACE.
+   SPACE is of type (rtx *), but it is really as long as NELTS rtx-vectors.
+   BYTES_PER_ELT is the number of bytes in one rtx-vector.
+   (this is the same as init_regset_vector () in flow.c) */
+
+static void
+init_rtx_vector (vector, space, nelts, bytes_per_elt)
+     rtx **vector;
+     rtx *space;
+     int nelts;
+     int bytes_per_elt;
+{
+  register int i;
+  register rtx *p = space;
+
+  for (i = 0; i < nelts; i++)
+    {
+      vector[i] = p;
+      p += bytes_per_elt / sizeof (*p);
+    }
+}
+
+/* Schedule a region.  A region is either an inner loop, a loop-free
+   subroutine, or a single basic block.  Each bb in the region is
+   scheduled after its flow predecessors.  */
+
+static void
+schedule_region (rgn)
+     int rgn;
+{
+  int bb;
+  int rgn_n_insns = 0;
+  int sched_rgn_n_insns = 0;
+
+  /* set variables for the current region */
+  current_nr_blocks = RGN_NR_BLOCKS (rgn);
+  current_blocks = RGN_BLOCKS (rgn);
+
+  reg_pending_sets = ALLOCA_REG_SET ();
+  reg_pending_sets_all = 0;
+
+  /* initializations for region data dependence analyisis */
+  if (current_nr_blocks > 1)
+    {
+      rtx *space;
+      int maxreg = max_reg_num ();
+
+      bb_reg_last_uses = (rtx **) alloca (current_nr_blocks * sizeof (rtx *));
+      space = (rtx *) alloca (current_nr_blocks * maxreg * sizeof (rtx));
+      bzero ((char *) space, current_nr_blocks * maxreg * sizeof (rtx));
+      init_rtx_vector (bb_reg_last_uses, space, current_nr_blocks, maxreg * sizeof (rtx *));
+
+      bb_reg_last_sets = (rtx **) alloca (current_nr_blocks * sizeof (rtx *));
+      space = (rtx *) alloca (current_nr_blocks * maxreg * sizeof (rtx));
+      bzero ((char *) space, current_nr_blocks * maxreg * sizeof (rtx));
+      init_rtx_vector (bb_reg_last_sets, space, current_nr_blocks, maxreg * sizeof (rtx *));
+
+      bb_pending_read_insns = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
+      bb_pending_read_mems = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
+      bb_pending_write_insns = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
+      bb_pending_write_mems = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
+      bb_pending_lists_length = (int *) alloca (current_nr_blocks * sizeof (int));
+      bb_last_pending_memory_flush = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
+      bb_last_function_call = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
+      bb_sched_before_next_call = (rtx *) alloca (current_nr_blocks * sizeof (rtx));
+
+      init_rgn_data_dependences (current_nr_blocks);
+    }
+
+  /* compute LOG_LINKS */
+  for (bb = 0; bb < current_nr_blocks; bb++)
+    compute_block_backward_dependences (bb);
+
+  /* compute INSN_DEPEND */
+  for (bb = current_nr_blocks - 1; bb >= 0; bb--)
+    compute_block_forward_dependences (bb);
+
+  /* Delete line notes, compute live-regs at block end, and set priorities.  */
+  dead_notes = 0;
+  for (bb = 0; bb < current_nr_blocks; bb++)
+    {
+      if (reload_completed == 0)
+	find_pre_sched_live (bb);
+
+      if (write_symbols != NO_DEBUG)
+	{
+	  save_line_notes (bb);
+	  rm_line_notes (bb);
+	}
+
+      rgn_n_insns += set_priorities (bb);
+    }
+
+  /* compute interblock info: probabilities, split-edges, dominators, etc.  */
+  if (current_nr_blocks > 1)
+    {
+      int i;
+
+      prob = (float *) alloca ((current_nr_blocks) * sizeof (float));
+
+      bbset_size = current_nr_blocks / HOST_BITS_PER_WIDE_INT + 1;
+      dom = (bbset *) alloca (current_nr_blocks * sizeof (bbset));
+      for (i = 0; i < current_nr_blocks; i++)
+	{
+	  dom[i] = (bbset) alloca (bbset_size * sizeof (HOST_WIDE_INT));
+	  bzero ((char *) dom[i], bbset_size * sizeof (HOST_WIDE_INT));
+	}
+
+      /* edge to bit */
+      rgn_nr_edges = 0;
+      edge_to_bit = (int *) alloca (nr_edges * sizeof (int));
+      for (i = 1; i < nr_edges; i++)
+	if (CONTAINING_RGN (FROM_BLOCK (i)) == rgn)
+	  EDGE_TO_BIT (i) = rgn_nr_edges++;
+      rgn_edges = (int *) alloca (rgn_nr_edges * sizeof (int));
+
+      rgn_nr_edges = 0;
+      for (i = 1; i < nr_edges; i++)
+	if (CONTAINING_RGN (FROM_BLOCK (i)) == (rgn))
+	  rgn_edges[rgn_nr_edges++] = i;
+
+      /* split edges */
+      edgeset_size = rgn_nr_edges / HOST_BITS_PER_WIDE_INT + 1;
+      pot_split = (edgeset *) alloca (current_nr_blocks * sizeof (edgeset));
+      ancestor_edges = (edgeset *) alloca (current_nr_blocks * sizeof (edgeset));
+      for (i = 0; i < current_nr_blocks; i++)
+	{
+	  pot_split[i] =
+	    (edgeset) alloca (edgeset_size * sizeof (HOST_WIDE_INT));
+	  bzero ((char *) pot_split[i],
+		 edgeset_size * sizeof (HOST_WIDE_INT));
+	  ancestor_edges[i] =
+	    (edgeset) alloca (edgeset_size * sizeof (HOST_WIDE_INT));
+	  bzero ((char *) ancestor_edges[i],
+		 edgeset_size * sizeof (HOST_WIDE_INT));
+	}
+
+      /* compute probabilities, dominators, split_edges */
+      for (bb = 0; bb < current_nr_blocks; bb++)
+	compute_dom_prob_ps (bb);
+    }
+
+  /* now we can schedule all blocks */
+  for (bb = 0; bb < current_nr_blocks; bb++)
+    {
+      sched_rgn_n_insns += schedule_block (bb, rgn, rgn_n_insns);
+
+#ifdef USE_C_ALLOCA
+      alloca (0);
+#endif
+    }
+
+#ifdef INTERBLOCK_DEBUG
+  if (sched_debug_count != 0)
+#endif
+    /* sanity check: verify that all region insns were scheduled */
+    if (sched_rgn_n_insns != rgn_n_insns)
+      abort ();
+
+  /* update register life and usage information */
+  if (reload_completed == 0)
+    {
+      for (bb = current_nr_blocks - 1; bb >= 0; bb--)
+	find_post_sched_live (bb);
+
+      if (current_nr_blocks <= 1)
+	/* Sanity check.  There should be no REG_DEAD notes leftover at the end.
+	   In practice, this can occur as the result of bugs in flow, combine.c,
+	   and/or sched.c.  The values of the REG_DEAD notes remaining are
+	   meaningless, because dead_notes is just used as a free list.  */
+	if (dead_notes != 0)
+	  abort ();
+    }
+
+  /* restore line notes.  */
+  if (write_symbols != NO_DEBUG)
+    {
+      for (bb = 0; bb < current_nr_blocks; bb++)
+	restore_line_notes (bb);
+    }
+
+  /* Done with this region */
+  free_pending_lists ();
+}
+
+/* Subroutine of split_hard_reg_notes.  Searches X for any reference to
+   REGNO, returning the rtx of the reference found if any.  Otherwise,
+   returns 0.  */
+
+static rtx
+regno_use_in (regno, x)
+     int regno;
+     rtx x;
+{
+  register char *fmt;
+  int i, j;
+  rtx tem;
+
+  if (GET_CODE (x) == REG && REGNO (x) == regno)
+    return x;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	{
+	  if ((tem = regno_use_in (regno, XEXP (x, i))))
+	    return tem;
+	}
+      else if (fmt[i] == 'E')
+	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	  if ((tem = regno_use_in (regno, XVECEXP (x, i, j))))
+	    return tem;
+    }
+
+  return 0;
+}
+
+/* Subroutine of update_flow_info.  Determines whether any new REG_NOTEs are
+   needed for the hard register mentioned in the note.  This can happen
+   if the reference to the hard register in the original insn was split into
+   several smaller hard register references in the split insns.  */
+
+static void
+split_hard_reg_notes (note, first, last, orig_insn)
+     rtx note, first, last, orig_insn;
+{
+  rtx reg, temp, link;
+  int n_regs, i, new_reg;
+  rtx insn;
+
+  /* Assume that this is a REG_DEAD note.  */
+  if (REG_NOTE_KIND (note) != REG_DEAD)
+    abort ();
+
+  reg = XEXP (note, 0);
+
+  n_regs = HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg));
+
+  for (i = 0; i < n_regs; i++)
+    {
+      new_reg = REGNO (reg) + i;
+
+      /* Check for references to new_reg in the split insns.  */
+      for (insn = last;; insn = PREV_INSN (insn))
+	{
+	  if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
+	      && (temp = regno_use_in (new_reg, PATTERN (insn))))
+	    {
+	      /* Create a new reg dead note ere.  */
+	      link = rtx_alloc (EXPR_LIST);
+	      PUT_REG_NOTE_KIND (link, REG_DEAD);
+	      XEXP (link, 0) = temp;
+	      XEXP (link, 1) = REG_NOTES (insn);
+	      REG_NOTES (insn) = link;
+
+	      /* If killed multiple registers here, then add in the excess.  */
+	      i += HARD_REGNO_NREGS (REGNO (temp), GET_MODE (temp)) - 1;
+
+	      break;
+	    }
+	  /* It isn't mentioned anywhere, so no new reg note is needed for
+	     this register.  */
+	  if (insn == first)
+	    break;
+	}
+    }
+}
+
+/* Subroutine of update_flow_info.  Determines whether a SET or CLOBBER in an
+   insn created by splitting needs a REG_DEAD or REG_UNUSED note added.  */
+
+static void
+new_insn_dead_notes (pat, insn, last, orig_insn)
+     rtx pat, insn, last, orig_insn;
+{
+  rtx dest, tem, set;
+
+  /* PAT is either a CLOBBER or a SET here.  */
+  dest = XEXP (pat, 0);
+
+  while (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SUBREG
+	 || GET_CODE (dest) == STRICT_LOW_PART
+	 || GET_CODE (dest) == SIGN_EXTRACT)
+    dest = XEXP (dest, 0);
+
+  if (GET_CODE (dest) == REG)
+    {
+      for (tem = last; tem != insn; tem = PREV_INSN (tem))
+	{
+	  if (GET_RTX_CLASS (GET_CODE (tem)) == 'i'
+	      && reg_overlap_mentioned_p (dest, PATTERN (tem))
+	      && (set = single_set (tem)))
+	    {
+	      rtx tem_dest = SET_DEST (set);
+
+	      while (GET_CODE (tem_dest) == ZERO_EXTRACT
+		     || GET_CODE (tem_dest) == SUBREG
+		     || GET_CODE (tem_dest) == STRICT_LOW_PART
+		     || GET_CODE (tem_dest) == SIGN_EXTRACT)
+		tem_dest = XEXP (tem_dest, 0);
+
+	      if (!rtx_equal_p (tem_dest, dest))
+		{
+		  /* Use the same scheme as combine.c, don't put both REG_DEAD
+		     and REG_UNUSED notes on the same insn.  */
+		  if (!find_regno_note (tem, REG_UNUSED, REGNO (dest))
+		      && !find_regno_note (tem, REG_DEAD, REGNO (dest)))
+		    {
+		      rtx note = rtx_alloc (EXPR_LIST);
+		      PUT_REG_NOTE_KIND (note, REG_DEAD);
+		      XEXP (note, 0) = dest;
+		      XEXP (note, 1) = REG_NOTES (tem);
+		      REG_NOTES (tem) = note;
+		    }
+		  /* The reg only dies in one insn, the last one that uses
+		     it.  */
+		  break;
+		}
+	      else if (reg_overlap_mentioned_p (dest, SET_SRC (set)))
+		/* We found an instruction that both uses the register,
+		   and sets it, so no new REG_NOTE is needed for this set.  */
+		break;
+	    }
+	}
+      /* If this is a set, it must die somewhere, unless it is the dest of
+         the original insn, and hence is live after the original insn.  Abort
+         if it isn't supposed to be live after the original insn.
+
+         If this is a clobber, then just add a REG_UNUSED note.  */
+      if (tem == insn)
+	{
+	  int live_after_orig_insn = 0;
+	  rtx pattern = PATTERN (orig_insn);
+	  int i;
+
+	  if (GET_CODE (pat) == CLOBBER)
+	    {
+	      rtx note = rtx_alloc (EXPR_LIST);
+	      PUT_REG_NOTE_KIND (note, REG_UNUSED);
+	      XEXP (note, 0) = dest;
+	      XEXP (note, 1) = REG_NOTES (insn);
+	      REG_NOTES (insn) = note;
+	      return;
+	    }
+
+	  /* The original insn could have multiple sets, so search the
+	     insn for all sets.  */
+	  if (GET_CODE (pattern) == SET)
+	    {
+	      if (reg_overlap_mentioned_p (dest, SET_DEST (pattern)))
+		live_after_orig_insn = 1;
+	    }
+	  else if (GET_CODE (pattern) == PARALLEL)
+	    {
+	      for (i = 0; i < XVECLEN (pattern, 0); i++)
+		if (GET_CODE (XVECEXP (pattern, 0, i)) == SET
+		    && reg_overlap_mentioned_p (dest,
+						SET_DEST (XVECEXP (pattern,
+								   0, i))))
+		  live_after_orig_insn = 1;
+	    }
+
+	  if (!live_after_orig_insn)
+	    abort ();
+	}
+    }
+}
+
+/* Subroutine of update_flow_info.  Update the value of reg_n_sets for all
+   registers modified by X.  INC is -1 if the containing insn is being deleted,
+   and is 1 if the containing insn is a newly generated insn.  */
+
+static void
+update_n_sets (x, inc)
+     rtx x;
+     int inc;
+{
+  rtx dest = SET_DEST (x);
+
+  while (GET_CODE (dest) == STRICT_LOW_PART || GET_CODE (dest) == SUBREG
+      || GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SIGN_EXTRACT)
+    dest = SUBREG_REG (dest);
+
+  if (GET_CODE (dest) == REG)
+    {
+      int regno = REGNO (dest);
+
+      if (regno < FIRST_PSEUDO_REGISTER)
+	{
+	  register int i;
+	  int endregno = regno + HARD_REGNO_NREGS (regno, GET_MODE (dest));
+
+	  for (i = regno; i < endregno; i++)
+	    REG_N_SETS (i) += inc;
+	}
+      else
+	REG_N_SETS (regno) += inc;
+    }
+}
+
+/* Updates all flow-analysis related quantities (including REG_NOTES) for
+   the insns from FIRST to LAST inclusive that were created by splitting
+   ORIG_INSN.  NOTES are the original REG_NOTES.  */
+
+static void
+update_flow_info (notes, first, last, orig_insn)
+     rtx notes;
+     rtx first, last;
+     rtx orig_insn;
+{
+  rtx insn, note;
+  rtx next;
+  rtx orig_dest, temp;
+  rtx set;
+
+  /* Get and save the destination set by the original insn.  */
+
+  orig_dest = single_set (orig_insn);
+  if (orig_dest)
+    orig_dest = SET_DEST (orig_dest);
+
+  /* Move REG_NOTES from the original insn to where they now belong.  */
+
+  for (note = notes; note; note = next)
+    {
+      next = XEXP (note, 1);
+      switch (REG_NOTE_KIND (note))
+	{
+	case REG_DEAD:
+	case REG_UNUSED:
+	  /* Move these notes from the original insn to the last new insn where
+	     the register is now set.  */
+
+	  for (insn = last;; insn = PREV_INSN (insn))
+	    {
+	      if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
+		  && reg_mentioned_p (XEXP (note, 0), PATTERN (insn)))
+		{
+		  /* If this note refers to a multiple word hard register, it
+		     may have been split into several smaller hard register
+		     references, so handle it specially.  */
+		  temp = XEXP (note, 0);
+		  if (REG_NOTE_KIND (note) == REG_DEAD
+		      && GET_CODE (temp) == REG
+		      && REGNO (temp) < FIRST_PSEUDO_REGISTER
+		      && HARD_REGNO_NREGS (REGNO (temp), GET_MODE (temp)) > 1)
+		    split_hard_reg_notes (note, first, last, orig_insn);
+		  else
+		    {
+		      XEXP (note, 1) = REG_NOTES (insn);
+		      REG_NOTES (insn) = note;
+		    }
+
+		  /* Sometimes need to convert REG_UNUSED notes to REG_DEAD
+		     notes.  */
+		  /* ??? This won't handle multiple word registers correctly,
+		     but should be good enough for now.  */
+		  if (REG_NOTE_KIND (note) == REG_UNUSED
+		      && !dead_or_set_p (insn, XEXP (note, 0)))
+		    PUT_REG_NOTE_KIND (note, REG_DEAD);
+
+		  /* The reg only dies in one insn, the last one that uses
+		     it.  */
+		  break;
+		}
+	      /* It must die somewhere, fail it we couldn't find where it died.
+
+	         If this is a REG_UNUSED note, then it must be a temporary
+	         register that was not needed by this instantiation of the
+	         pattern, so we can safely ignore it.  */
+	      if (insn == first)
+		{
+		  /* After reload, REG_DEAD notes come sometimes an
+		     instruction after the register actually dies.  */
+		  if (reload_completed && REG_NOTE_KIND (note) == REG_DEAD)
+		    {
+		      XEXP (note, 1) = REG_NOTES (insn);
+		      REG_NOTES (insn) = note;
+		      break;
+		    }
+			
+		  if (REG_NOTE_KIND (note) != REG_UNUSED)
+		    abort ();
+
+		  break;
+		}
+	    }
+	  break;
+
+	case REG_WAS_0:
+	  /* This note applies to the dest of the original insn.  Find the
+	     first new insn that now has the same dest, and move the note
+	     there.  */
+
+	  if (!orig_dest)
+	    abort ();
+
+	  for (insn = first;; insn = NEXT_INSN (insn))
+	    {
+	      if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
+		  && (temp = single_set (insn))
+		  && rtx_equal_p (SET_DEST (temp), orig_dest))
+		{
+		  XEXP (note, 1) = REG_NOTES (insn);
+		  REG_NOTES (insn) = note;
+		  /* The reg is only zero before one insn, the first that
+		     uses it.  */
+		  break;
+		}
+	      /* If this note refers to a multiple word hard
+		 register, it may have been split into several smaller
+		 hard register references.  We could split the notes,
+		 but simply dropping them is good enough.  */
+	      if (GET_CODE (orig_dest) == REG
+		  && REGNO (orig_dest) < FIRST_PSEUDO_REGISTER
+		  && HARD_REGNO_NREGS (REGNO (orig_dest),
+				       GET_MODE (orig_dest)) > 1)
+		    break;
+	      /* It must be set somewhere, fail if we couldn't find where it
+	         was set.  */
+	      if (insn == last)
+		abort ();
+	    }
+	  break;
+
+	case REG_EQUAL:
+	case REG_EQUIV:
+	  /* A REG_EQUIV or REG_EQUAL note on an insn with more than one
+	     set is meaningless.  Just drop the note.  */
+	  if (!orig_dest)
+	    break;
+
+	case REG_NO_CONFLICT:
+	  /* These notes apply to the dest of the original insn.  Find the last
+	     new insn that now has the same dest, and move the note there.  */
+
+	  if (!orig_dest)
+	    abort ();
+
+	  for (insn = last;; insn = PREV_INSN (insn))
+	    {
+	      if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
+		  && (temp = single_set (insn))
+		  && rtx_equal_p (SET_DEST (temp), orig_dest))
+		{
+		  XEXP (note, 1) = REG_NOTES (insn);
+		  REG_NOTES (insn) = note;
+		  /* Only put this note on one of the new insns.  */
+		  break;
+		}
+
+	      /* The original dest must still be set someplace.  Abort if we
+	         couldn't find it.  */
+	      if (insn == first)
+		{
+		  /* However, if this note refers to a multiple word hard
+		     register, it may have been split into several smaller
+		     hard register references.  We could split the notes,
+		     but simply dropping them is good enough.  */
+		  if (GET_CODE (orig_dest) == REG
+		      && REGNO (orig_dest) < FIRST_PSEUDO_REGISTER
+		      && HARD_REGNO_NREGS (REGNO (orig_dest),
+					   GET_MODE (orig_dest)) > 1)
+		    break;
+		  /* Likewise for multi-word memory references.  */
+		  if (GET_CODE (orig_dest) == MEM
+		      && SIZE_FOR_MODE (orig_dest) > MOVE_MAX)
+		    break;
+		  abort ();
+		}
+	    }
+	  break;
+
+	case REG_LIBCALL:
+	  /* Move a REG_LIBCALL note to the first insn created, and update
+	     the corresponding REG_RETVAL note.  */
+	  XEXP (note, 1) = REG_NOTES (first);
+	  REG_NOTES (first) = note;
+
+	  insn = XEXP (note, 0);
+	  note = find_reg_note (insn, REG_RETVAL, NULL_RTX);
+	  if (note)
+	    XEXP (note, 0) = first;
+	  break;
+
+	case REG_EXEC_COUNT:
+	  /* Move a REG_EXEC_COUNT note to the first insn created.  */
+	  XEXP (note, 1) = REG_NOTES (first);
+	  REG_NOTES (first) = note;
+	  break;
+
+	case REG_RETVAL:
+	  /* Move a REG_RETVAL note to the last insn created, and update
+	     the corresponding REG_LIBCALL note.  */
+	  XEXP (note, 1) = REG_NOTES (last);
+	  REG_NOTES (last) = note;
+
+	  insn = XEXP (note, 0);
+	  note = find_reg_note (insn, REG_LIBCALL, NULL_RTX);
+	  if (note)
+	    XEXP (note, 0) = last;
+	  break;
+
+	case REG_NONNEG:
+	case REG_BR_PROB:
+	  /* This should be moved to whichever instruction is a JUMP_INSN.  */
+
+	  for (insn = last;; insn = PREV_INSN (insn))
+	    {
+	      if (GET_CODE (insn) == JUMP_INSN)
+		{
+		  XEXP (note, 1) = REG_NOTES (insn);
+		  REG_NOTES (insn) = note;
+		  /* Only put this note on one of the new insns.  */
+		  break;
+		}
+	      /* Fail if we couldn't find a JUMP_INSN.  */
+	      if (insn == first)
+		abort ();
+	    }
+	  break;
+
+	case REG_INC:
+	  /* reload sometimes leaves obsolete REG_INC notes around.  */
+	  if (reload_completed)
+	    break;
+	  /* This should be moved to whichever instruction now has the
+	     increment operation.  */
+	  abort ();
+
+	case REG_LABEL:
+	  /* Should be moved to the new insn(s) which use the label.  */
+	  for (insn = first; insn != NEXT_INSN (last); insn = NEXT_INSN (insn))
+	    if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
+		&& reg_mentioned_p (XEXP (note, 0), PATTERN (insn)))
+	      REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL,
+					  XEXP (note, 0), REG_NOTES (insn));
+	  break;
+
+	case REG_CC_SETTER:
+	case REG_CC_USER:
+	  /* These two notes will never appear until after reorg, so we don't
+	     have to handle them here.  */
+	default:
+	  abort ();
+	}
+    }
+
+  /* Each new insn created, except the last, has a new set.  If the destination
+     is a register, then this reg is now live across several insns, whereas
+     previously the dest reg was born and died within the same insn.  To
+     reflect this, we now need a REG_DEAD note on the insn where this
+     dest reg dies.
+
+     Similarly, the new insns may have clobbers that need REG_UNUSED notes.  */
+
+  for (insn = first; insn != last; insn = NEXT_INSN (insn))
+    {
+      rtx pat;
+      int i;
+
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == SET || GET_CODE (pat) == CLOBBER)
+	new_insn_dead_notes (pat, insn, last, orig_insn);
+      else if (GET_CODE (pat) == PARALLEL)
+	{
+	  for (i = 0; i < XVECLEN (pat, 0); i++)
+	    if (GET_CODE (XVECEXP (pat, 0, i)) == SET
+		|| GET_CODE (XVECEXP (pat, 0, i)) == CLOBBER)
+	      new_insn_dead_notes (XVECEXP (pat, 0, i), insn, last, orig_insn);
+	}
+    }
+
+  /* If any insn, except the last, uses the register set by the last insn,
+     then we need a new REG_DEAD note on that insn.  In this case, there
+     would not have been a REG_DEAD note for this register in the original
+     insn because it was used and set within one insn.  */
+
+  set = single_set (last);
+  if (set)
+    {
+      rtx dest = SET_DEST (set);
+
+      while (GET_CODE (dest) == ZERO_EXTRACT || GET_CODE (dest) == SUBREG
+	     || GET_CODE (dest) == STRICT_LOW_PART
+	     || GET_CODE (dest) == SIGN_EXTRACT)
+	dest = XEXP (dest, 0);
+
+      if (GET_CODE (dest) == REG
+	  /* Global registers are always live, so the code below does not
+	     apply to them.  */
+	  && (REGNO (dest) >= FIRST_PSEUDO_REGISTER
+	      || ! global_regs[REGNO (dest)]))
+	{
+	  rtx stop_insn = PREV_INSN (first);
+
+	  /* If the last insn uses the register that it is setting, then
+	     we don't want to put a REG_DEAD note there.  Search backwards
+	     to find the first insn that sets but does not use DEST.  */
+
+	  insn = last;
+	  if (reg_overlap_mentioned_p (dest, SET_SRC (set)))
+	    {
+	      for (insn = PREV_INSN (insn); insn != first;
+		   insn = PREV_INSN (insn))
+		{
+		  if ((set = single_set (insn))
+		      && reg_mentioned_p (dest, SET_DEST (set))
+		      && ! reg_overlap_mentioned_p (dest, SET_SRC (set)))
+		    break;
+		}
+	    }
+
+	  /* Now find the first insn that uses but does not set DEST.  */
+
+	  for (insn = PREV_INSN (insn); insn != stop_insn;
+	       insn = PREV_INSN (insn))
+	    {
+	      if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
+		  && reg_mentioned_p (dest, PATTERN (insn))
+		  && (set = single_set (insn)))
+		{
+		  rtx insn_dest = SET_DEST (set);
+
+		  while (GET_CODE (insn_dest) == ZERO_EXTRACT
+			 || GET_CODE (insn_dest) == SUBREG
+			 || GET_CODE (insn_dest) == STRICT_LOW_PART
+			 || GET_CODE (insn_dest) == SIGN_EXTRACT)
+		    insn_dest = XEXP (insn_dest, 0);
+
+		  if (insn_dest != dest)
+		    {
+		      note = rtx_alloc (EXPR_LIST);
+		      PUT_REG_NOTE_KIND (note, REG_DEAD);
+		      XEXP (note, 0) = dest;
+		      XEXP (note, 1) = REG_NOTES (insn);
+		      REG_NOTES (insn) = note;
+		      /* The reg only dies in one insn, the last one
+			 that uses it.  */
+		      break;
+		    }
+		}
+	    }
+	}
+    }
+
+  /* If the original dest is modifying a multiple register target, and the
+     original instruction was split such that the original dest is now set
+     by two or more SUBREG sets, then the split insns no longer kill the
+     destination of the original insn.
+
+     In this case, if there exists an instruction in the same basic block,
+     before the split insn, which uses the original dest, and this use is
+     killed by the original insn, then we must remove the REG_DEAD note on
+     this insn, because it is now superfluous.
+
+     This does not apply when a hard register gets split, because the code
+     knows how to handle overlapping hard registers properly.  */
+  if (orig_dest && GET_CODE (orig_dest) == REG)
+    {
+      int found_orig_dest = 0;
+      int found_split_dest = 0;
+
+      for (insn = first;; insn = NEXT_INSN (insn))
+	{
+	  set = single_set (insn);
+	  if (set)
+	    {
+	      if (GET_CODE (SET_DEST (set)) == REG
+		  && REGNO (SET_DEST (set)) == REGNO (orig_dest))
+		{
+		  found_orig_dest = 1;
+		  break;
+		}
+	      else if (GET_CODE (SET_DEST (set)) == SUBREG
+		       && SUBREG_REG (SET_DEST (set)) == orig_dest)
+		{
+		  found_split_dest = 1;
+		  break;
+		}
+	    }
+
+	  if (insn == last)
+	    break;
+	}
+
+      if (found_split_dest)
+	{
+	  /* Search backwards from FIRST, looking for the first insn that uses
+	     the original dest.  Stop if we pass a CODE_LABEL or a JUMP_INSN.
+	     If we find an insn, and it has a REG_DEAD note, then delete the
+	     note.  */
+
+	  for (insn = first; insn; insn = PREV_INSN (insn))
+	    {
+	      if (GET_CODE (insn) == CODE_LABEL
+		  || GET_CODE (insn) == JUMP_INSN)
+		break;
+	      else if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
+		       && reg_mentioned_p (orig_dest, insn))
+		{
+		  note = find_regno_note (insn, REG_DEAD, REGNO (orig_dest));
+		  if (note)
+		    remove_note (insn, note);
+		}
+	    }
+	}
+      else if (!found_orig_dest)
+	{
+	  /* This should never happen.  */
+	  abort ();
+	}
+    }
+
+  /* Update reg_n_sets.  This is necessary to prevent local alloc from
+     converting REG_EQUAL notes to REG_EQUIV when splitting has modified
+     a reg from set once to set multiple times.  */
+
+  {
+    rtx x = PATTERN (orig_insn);
+    RTX_CODE code = GET_CODE (x);
+
+    if (code == SET || code == CLOBBER)
+      update_n_sets (x, -1);
+    else if (code == PARALLEL)
+      {
+	int i;
+	for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+	  {
+	    code = GET_CODE (XVECEXP (x, 0, i));
+	    if (code == SET || code == CLOBBER)
+	      update_n_sets (XVECEXP (x, 0, i), -1);
+	  }
+      }
+
+    for (insn = first;; insn = NEXT_INSN (insn))
+      {
+	x = PATTERN (insn);
+	code = GET_CODE (x);
+
+	if (code == SET || code == CLOBBER)
+	  update_n_sets (x, 1);
+	else if (code == PARALLEL)
+	  {
+	    int i;
+	    for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+	      {
+		code = GET_CODE (XVECEXP (x, 0, i));
+		if (code == SET || code == CLOBBER)
+		  update_n_sets (XVECEXP (x, 0, i), 1);
+	      }
+	  }
+
+	if (insn == last)
+	  break;
+      }
+  }
+}
+
+/* Do the splitting of insns in the block b.  */
+
+static void
+split_block_insns (b)
+     int b;
+{
+  rtx insn, next;
+
+  for (insn = basic_block_head[b];; insn = next)
+    {
+      rtx prev;
+      rtx set;
+
+      /* Can't use `next_real_insn' because that
+         might go across CODE_LABELS and short-out basic blocks.  */
+      next = NEXT_INSN (insn);
+      if (GET_CODE (insn) != INSN)
+	{
+	  if (insn == basic_block_end[b])
+	    break;
+
+	  continue;
+	}
+
+      /* Don't split no-op move insns.  These should silently disappear
+         later in final.  Splitting such insns would break the code
+         that handles REG_NO_CONFLICT blocks.  */
+      set = single_set (insn);
+      if (set && rtx_equal_p (SET_SRC (set), SET_DEST (set)))
+	{
+	  if (insn == basic_block_end[b])
+	    break;
+
+	  /* Nops get in the way while scheduling, so delete them now if
+	     register allocation has already been done.  It is too risky
+	     to try to do this before register allocation, and there are
+	     unlikely to be very many nops then anyways.  */
+	  if (reload_completed)
+	    {
+	      PUT_CODE (insn, NOTE);
+	      NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+	      NOTE_SOURCE_FILE (insn) = 0;
+	    }
+
+	  continue;
+	}
+
+      /* Split insns here to get max fine-grain parallelism.  */
+      prev = PREV_INSN (insn);
+      /* It is probably not worthwhile to try to split again in
+	 the second pass.  However, if flag_schedule_insns is not set,
+	 the first and only (if any) scheduling pass is after reload.  */
+      if (reload_completed == 0 || ! flag_schedule_insns)
+	{
+	  rtx last, first = PREV_INSN (insn);
+	  rtx notes = REG_NOTES (insn);
+	  last = try_split (PATTERN (insn), insn, 1);
+	  if (last != insn)
+	    {
+	      /* try_split returns the NOTE that INSN became.  */
+	      first = NEXT_INSN (first);
+	      update_flow_info (notes, first, last, insn);
+
+	      PUT_CODE (insn, NOTE);
+	      NOTE_SOURCE_FILE (insn) = 0;
+	      NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+	      if (insn == basic_block_head[b])
+		basic_block_head[b] = first;
+	      if (insn == basic_block_end[b])
+		{
+		  basic_block_end[b] = last;
+		  break;
+		}
+	    }
+	}
+
+      if (insn == basic_block_end[b])
+	break;
+    }
+}
+
+/* The one entry point in this file.  DUMP_FILE is the dump file for
+   this pass.  */
+
+void
+schedule_insns (dump_file)
+     FILE *dump_file;
+{
+
+  int max_uid;
+  int b;
+  int i;
+  rtx insn;
+  int rgn;
+
+  int luid;
+
+  /* disable speculative loads in their presence if cc0 defined */
+#ifdef HAVE_cc0
+  flag_schedule_speculative_load = 0;
+#endif
+
+  /* Taking care of this degenerate case makes the rest of
+     this code simpler.  */
+  if (n_basic_blocks == 0)
+    return;
+
+  /* set dump and sched_verbose for the desired debugging output.  If no
+     dump-file was specified, but -fsched-verbose-N (any N), print to stderr.
+     For -fsched-verbose-N, N>=10, print everything to stderr.  */
+  sched_verbose = sched_verbose_param;
+  if (sched_verbose_param == 0 && dump_file)
+    sched_verbose = 1;
+  dump = ((sched_verbose_param >= 10 || !dump_file) ? stderr : dump_file);
+
+  nr_inter = 0;
+  nr_spec = 0;
+
+  /* Initialize the unused_*_lists.  We can't use the ones left over from
+     the previous function, because gcc has freed that memory.  We can use
+     the ones left over from the first sched pass in the second pass however,
+     so only clear them on the first sched pass.  The first pass is before
+     reload if flag_schedule_insns is set, otherwise it is afterwards.  */
+
+  if (reload_completed == 0 || !flag_schedule_insns)
+    {
+      unused_insn_list = 0;
+      unused_expr_list = 0;
+    }
+
+  /* initialize issue_rate */
+  issue_rate = get_issue_rate ();
+
+  /* do the splitting first for all blocks */
+  for (b = 0; b < n_basic_blocks; b++)
+    split_block_insns (b);
+
+  max_uid = (get_max_uid () + 1);
+
+  cant_move = (char *) alloca (max_uid * sizeof (char));
+  bzero ((char *) cant_move, max_uid * sizeof (char));
+
+  fed_by_spec_load = (char *) alloca (max_uid * sizeof (char));
+  bzero ((char *) fed_by_spec_load, max_uid * sizeof (char));
+
+  is_load_insn = (char *) alloca (max_uid * sizeof (char));
+  bzero ((char *) is_load_insn, max_uid * sizeof (char));
+
+  insn_orig_block = (int *) alloca (max_uid * sizeof (int));
+  insn_luid = (int *) alloca (max_uid * sizeof (int));
+
+  luid = 0;
+  for (b = 0; b < n_basic_blocks; b++)
+    for (insn = basic_block_head[b];; insn = NEXT_INSN (insn))
+      {
+	INSN_BLOCK (insn) = b;
+	INSN_LUID (insn) = luid++;
+
+	if (insn == basic_block_end[b])
+	  break;
+      }
+
+  /* after reload, remove inter-blocks dependences computed before reload.  */
+  if (reload_completed)
+    {
+      int b;
+      rtx insn;
+
+      for (b = 0; b < n_basic_blocks; b++)
+	for (insn = basic_block_head[b];; insn = NEXT_INSN (insn))
+	  {
+	    rtx link;
+
+	    if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
+	      {
+		for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
+		  {
+		    rtx x = XEXP (link, 0);
+
+		    if (INSN_BLOCK (x) != b)
+		      remove_dependence (insn, x);
+		  }
+	      }
+
+	    if (insn == basic_block_end[b])
+	      break;
+	  }
+    }
+
+  nr_regions = 0;
+  rgn_table = (region *) alloca ((n_basic_blocks) * sizeof (region));
+  rgn_bb_table = (int *) alloca ((n_basic_blocks) * sizeof (int));
+  block_to_bb = (int *) alloca ((n_basic_blocks) * sizeof (int));
+  containing_rgn = (int *) alloca ((n_basic_blocks) * sizeof (int));
+
+  /* compute regions for scheduling */
+  if (reload_completed
+      || n_basic_blocks == 1
+      || !flag_schedule_interblock)
+    {
+      find_single_block_region ();
+    }
+  else
+    {
+      /* an estimation for nr_edges is computed in is_cfg_nonregular () */
+      nr_edges = 0;
+
+      /* verify that a 'good' control flow graph can be built */
+      if (is_cfg_nonregular ()
+	  || nr_edges <= 1)
+	{
+	  find_single_block_region ();
+	}
+      else
+	{
+	  /* build control flow graph */
+	  in_edges = (int *) alloca (n_basic_blocks * sizeof (int));
+	  out_edges = (int *) alloca (n_basic_blocks * sizeof (int));
+	  bzero ((char *) in_edges, n_basic_blocks * sizeof (int));
+	  bzero ((char *) out_edges, n_basic_blocks * sizeof (int));
+
+	  edge_table =
+	    (edge *) alloca ((nr_edges) * sizeof (edge));
+	  bzero ((char *) edge_table,
+		 ((nr_edges) * sizeof (edge)));
+	  build_control_flow ();
+
+	  /* identify reducible inner loops and compute regions */
+	  find_rgns ();
+
+	  if (sched_verbose >= 3)
+	    {
+	      debug_control_flow ();
+	      debug_regions ();
+	    }
+
+	}
+    }
+
+  /* Allocate data for this pass.  See comments, above,
+     for what these vectors do.  */
+  insn_priority = (int *) alloca (max_uid * sizeof (int));
+  insn_reg_weight = (int *) alloca (max_uid * sizeof (int));
+  insn_tick = (int *) alloca (max_uid * sizeof (int));
+  insn_costs = (short *) alloca (max_uid * sizeof (short));
+  insn_units = (short *) alloca (max_uid * sizeof (short));
+  insn_blockage = (unsigned int *) alloca (max_uid * sizeof (unsigned int));
+  insn_ref_count = (int *) alloca (max_uid * sizeof (int));
+
+  /* Allocate for forward dependencies */
+  insn_dep_count = (int *) alloca (max_uid * sizeof (int));
+  insn_depend = (rtx *) alloca (max_uid * sizeof (rtx));
+
+  if (reload_completed == 0)
+    {
+      int i;
+
+      sched_reg_n_calls_crossed = (int *) alloca (max_regno * sizeof (int));
+      sched_reg_live_length = (int *) alloca (max_regno * sizeof (int));
+      sched_reg_basic_block = (int *) alloca (max_regno * sizeof (int));
+      bb_live_regs = ALLOCA_REG_SET ();
+      bzero ((char *) sched_reg_n_calls_crossed, max_regno * sizeof (int));
+      bzero ((char *) sched_reg_live_length, max_regno * sizeof (int));
+
+      for (i = 0; i < max_regno; i++)
+	sched_reg_basic_block[i] = REG_BLOCK_UNKNOWN;
+    }
+  else
+    {
+      sched_reg_n_calls_crossed = 0;
+      sched_reg_live_length = 0;
+      bb_live_regs = 0;
+    }
+  init_alias_analysis ();
+
+  if (write_symbols != NO_DEBUG)
+    {
+      rtx line;
+
+      line_note = (rtx *) alloca (max_uid * sizeof (rtx));
+      bzero ((char *) line_note, max_uid * sizeof (rtx));
+      line_note_head = (rtx *) alloca (n_basic_blocks * sizeof (rtx));
+      bzero ((char *) line_note_head, n_basic_blocks * sizeof (rtx));
+
+      /* Save-line-note-head:
+         Determine the line-number at the start of each basic block.
+         This must be computed and saved now, because after a basic block's
+         predecessor has been scheduled, it is impossible to accurately
+         determine the correct line number for the first insn of the block.  */
+
+      for (b = 0; b < n_basic_blocks; b++)
+	for (line = basic_block_head[b]; line; line = PREV_INSN (line))
+	  if (GET_CODE (line) == NOTE && NOTE_LINE_NUMBER (line) > 0)
+	    {
+	      line_note_head[b] = line;
+	      break;
+	    }
+    }
+
+  bzero ((char *) insn_priority, max_uid * sizeof (int));
+  bzero ((char *) insn_reg_weight, max_uid * sizeof (int));
+  bzero ((char *) insn_tick, max_uid * sizeof (int));
+  bzero ((char *) insn_costs, max_uid * sizeof (short));
+  bzero ((char *) insn_units, max_uid * sizeof (short));
+  bzero ((char *) insn_blockage, max_uid * sizeof (unsigned int));
+  bzero ((char *) insn_ref_count, max_uid * sizeof (int));
+
+  /* Initialize for forward dependencies */
+  bzero ((char *) insn_depend, max_uid * sizeof (rtx));
+  bzero ((char *) insn_dep_count, max_uid * sizeof (int));
+
+  /* Find units used in this fuction, for visualization */
+  if (sched_verbose)
+    init_target_units ();
+
+  /* ??? Add a NOTE after the last insn of the last basic block.  It is not
+     known why this is done.  */
+
+  insn = basic_block_end[n_basic_blocks - 1];
+  if (NEXT_INSN (insn) == 0
+      || (GET_CODE (insn) != NOTE
+	  && GET_CODE (insn) != CODE_LABEL
+  /* Don't emit a NOTE if it would end up between an unconditional
+     jump and a BARRIER.  */
+	  && !(GET_CODE (insn) == JUMP_INSN
+	       && GET_CODE (NEXT_INSN (insn)) == BARRIER)))
+    emit_note_after (NOTE_INSN_DELETED, basic_block_end[n_basic_blocks - 1]);
+
+  /* Schedule every region in the subroutine */
+  for (rgn = 0; rgn < nr_regions; rgn++)
+    {
+      schedule_region (rgn);
+
+#ifdef USE_C_ALLOCA
+      alloca (0);
+#endif
+    }
+
+  /* Reposition the prologue and epilogue notes in case we moved the
+     prologue/epilogue insns.  */
+  if (reload_completed)
+    reposition_prologue_and_epilogue_notes (get_insns ());
+
+  /* delete redundant line notes.  */
+  if (write_symbols != NO_DEBUG)
+    rm_redundant_line_notes ();
+
+  /* Update information about uses of registers in the subroutine.  */
+  if (reload_completed == 0)
+    update_reg_usage ();
+
+  if (sched_verbose)
+    {
+      if (reload_completed == 0 && flag_schedule_interblock)
+	{
+	  fprintf (dump, "\n;; Procedure interblock/speculative motions == %d/%d \n",
+		   nr_inter, nr_spec);
+	}
+      else
+	{
+	  if (nr_inter > 0)
+	    abort ();
+	}
+      fprintf (dump, "\n\n");
+    }
+}
+#endif /* INSN_SCHEDULING */
diff --git a/gcc/invoke.texi b/gcc/invoke.texi
index 7a643d60aa1..2c7ae54b993 100644
--- a/gcc/invoke.texi
+++ b/gcc/invoke.texi
@@ -147,7 +147,7 @@ in the following sections.
 -ffunction-sections -finline-functions
 -fkeep-inline-functions -fno-default-inline
 -fno-defer-pop  -fno-function-cse
--fno-inline  -fno-peephole  -fomit-frame-pointer
+-fno-inline  -fno-peephole  -fomit-frame-pointer -fregmove
 -frerun-cse-after-loop  -fschedule-insns
 -fschedule-insns2  -fstrength-reduce  -fthread-jumps
 -funroll-all-loops  -funroll-loops
@@ -2195,6 +2195,12 @@ used in one place: in @file{reorg.c}, instead of guessing which path a
 branch is mostly to take, the @samp{REG_BR_PROB} values are used to
 exactly determine which path is taken more often.
 @end ifset
+
+@item -fregmove
+Some machines only support 2 operands per instruction.  On such
+machines, GNU CC might have to do extra copies.  The @samp{-fregmove}
+option overrides the default for the machine to do the copy before
+register allocation.
 @end table
 
 @node Preprocessor Options
diff --git a/gcc/loop.c b/gcc/loop.c
index b54e677b928..2c4092d0c85 100644
--- a/gcc/loop.c
+++ b/gcc/loop.c
@@ -81,6 +81,42 @@ static rtx *loop_number_loop_starts, *loop_number_loop_ends;
 
 int *loop_outer_loop;
 
+#ifdef HAIFA
+/* The main output of analyze_loop_iterations is placed here */
+
+int *loop_can_insert_bct;
+
+/* For each loop, determines whether some of its inner loops has used
+   count register */
+
+int *loop_used_count_register;
+
+/* For each loop, remember its unrolling factor (if at all).
+   contents of the array:
+   0/1: not unrolled.
+   -1: completely unrolled - no further instrumentation is needed.
+   >1: holds the exact amount of unrolling.  */
+
+int *loop_unroll_factor;
+int *loop_unroll_iter;
+
+/* loop parameters for arithmetic loops. These loops have a loop variable
+   which is initialized to loop_start_value, incremented in each iteration
+   by "loop_increment".  At the end of the iteration the loop variable is
+   compared to the loop_comparison_value (using loop_comparison_code).  */
+
+rtx *loop_increment;
+rtx *loop_comparison_value;
+rtx *loop_start_value;
+enum rtx_code *loop_comparison_code;
+
+/* for debugging: selects sub-range of loops for which the bct optimization
+   is invoked.  The numbering is per compilation-unit.  */
+int dbg_bct_min = -1;
+int dbg_bct_max = -1;
+#endif  /* HAIFA */
+
+
 /* Indexed by loop number, contains a nonzero value if the "loop" isn't
    really a loop (an insn outside the loop branches into it).  */
 
@@ -286,6 +322,32 @@ static int maybe_eliminate_biv_1 ();
 static int last_use_this_basic_block ();
 static void record_initial ();
 static void update_reg_last_use ();
+
+#ifdef HAIFA
+/* This is extern from unroll.c */
+void iteration_info ();
+
+/* Two main functions for implementing bct:
+   first - to be called before loop unrolling, and the second - after */
+static void analyze_loop_iterations ();
+static void insert_bct ();
+
+/* Auxiliary function that inserts the bct pattern into the loop */
+static void instrument_loop_bct ();
+
+/* Indirect_jump_in_function is computed once per function.  */
+int indirect_jump_in_function = 0;
+static int indirect_jump_in_function_p ();
+
+int loop_number ();
+static int is_power_of_2();
+static int is_conditional_branch ();
+
+/* Debugging functions.  */
+int fix_bct_param ();
+static int check_bct_param ();
+#endif  /* HAIFA */
+
 
 /* Relative gain of eliminating various kinds of operations.  */
 int add_cost;
@@ -379,6 +441,32 @@ loop_optimize (f, dumpfile)
   loop_number_exit_labels = (rtx *) alloca (max_loop_num * sizeof (rtx));
   loop_number_exit_count = (int *) alloca (max_loop_num * sizeof (int));
 
+#ifdef HAIFA
+  /* Allocate for BCT optimization */
+  loop_can_insert_bct = (int *) alloca (max_loop_num * sizeof (int));
+  bzero ((char *) loop_can_insert_bct, max_loop_num * sizeof (int));
+
+  loop_used_count_register = (int *) alloca (max_loop_num * sizeof (int));
+  bzero ((char *) loop_used_count_register, max_loop_num * sizeof (int));
+
+  loop_unroll_factor = (int *) alloca (max_loop_num *sizeof (int));
+  bzero ((char *) loop_unroll_factor, max_loop_num * sizeof (int));
+
+  loop_unroll_iter = (int *) alloca (max_loop_num *sizeof (int));
+  bzero ((char *) loop_unroll_iter, max_loop_num * sizeof (int));
+
+  loop_increment = (rtx *) alloca (max_loop_num * sizeof (rtx));
+  loop_comparison_value = (rtx *) alloca (max_loop_num * sizeof (rtx));
+  loop_start_value = (rtx *) alloca (max_loop_num * sizeof (rtx));
+  bzero ((char *) loop_increment, max_loop_num * sizeof (rtx));
+  bzero ((char *) loop_comparison_value, max_loop_num * sizeof (rtx));
+  bzero ((char *) loop_start_value, max_loop_num * sizeof (rtx));
+
+  loop_comparison_code 
+    = (enum rtx_code *) alloca (max_loop_num * sizeof (enum rtx_code));
+  bzero ((char *) loop_comparison_code, max_loop_num * sizeof (enum rtx_code));
+#endif  /* HAIFA */
+
   /* Find and process each loop.
      First, find them, and record them in order of their beginnings.  */
   find_and_verify_loops (f);
@@ -430,6 +518,12 @@ loop_optimize (f, dumpfile)
   if (flag_unroll_loops && write_symbols != NO_DEBUG)
     find_loop_tree_blocks ();
 
+#ifdef HAIFA
+  /* determine if the function has indirect jump. If it does,
+     we cannot instrument loops in this function with bct */
+  indirect_jump_in_function = indirect_jump_in_function_p (f);
+#endif  /* HAIFA */
+
   /* Now scan the loops, last ones first, since this means inner ones are done
      before outer ones.  */
   for (i = max_loop_num-1; i >= 0; i--)
@@ -2639,6 +2733,11 @@ mark_loop_jump (x, loop_num)
 
       if (loop_num != -1)
 	{
+#ifdef HAIFA
+	  LABEL_OUTSIDE_LOOP_P (x) = 1;
+	  LABEL_NEXTREF (x) = loop_number_exit_labels[loop_num];
+#endif  /* HAIFA */
+
 	  loop_number_exit_labels[loop_num] = x;
 
 	  for (outer_loop = loop_num; outer_loop != -1;
@@ -3755,6 +3854,16 @@ strength_reduce (scan_start, end, loop_top, insn_count,
      so that "decrement and branch until zero" insn can be used.  */
   check_dbra_loop (loop_end, insn_count, loop_start);
 
+#ifdef HAIFA
+  /* record loop-variables relevant for BCT optimization before unrolling
+     the loop.  Unrolling may update part of this information, and the
+     correct data will be used for generating the BCT.  */
+#ifdef HAVE_decrement_and_branch_on_count
+  if (HAVE_decrement_and_branch_on_count)
+    analyze_loop_iterations (loop_start, loop_end);
+#endif
+#endif  /* HAIFA */
+
   /* Create reg_map to hold substitutions for replaceable giv regs.  */
   reg_map = (rtx *) alloca (max_reg_before_loop * sizeof (rtx));
   bzero ((char *) reg_map, max_reg_before_loop * sizeof (rtx));
@@ -4247,6 +4356,14 @@ strength_reduce (scan_start, end, loop_top, insn_count,
   if (flag_unroll_loops)
     unroll_loop (loop_end, insn_count, loop_start, end_insert_before, 1);
 
+#ifdef HAIFA
+  /* instrument the loop with bct insn */
+#ifdef HAVE_decrement_and_branch_on_count
+  if (HAVE_decrement_and_branch_on_count)
+    insert_bct (loop_start, loop_end);
+#endif
+#endif  /* HAIFA */
+
   if (loop_dump_stream)
     fprintf (loop_dump_stream, "\n");
 }
@@ -6932,3 +7049,638 @@ get_condition_for_loop (x)
   return gen_rtx (swap_condition (GET_CODE (comparison)), VOIDmode,
 		  XEXP (comparison, 1), XEXP (comparison, 0));
 }
+
+#ifdef HAIFA
+/* Analyze a loop in order to instrument it with the use of count register.
+   loop_start and loop_end are the first and last insns of the loop.
+   This function works in cooperation with insert_bct ().
+   loop_can_insert_bct[loop_num] is set according to whether the optimization
+   is applicable to the loop.  When it is applicable, the following variables
+   are also set:
+    loop_start_value[loop_num]
+    loop_comparison_value[loop_num]
+    loop_increment[loop_num]
+    loop_comparison_code[loop_num] */
+
+static
+void analyze_loop_iterations (loop_start, loop_end)
+  rtx loop_start, loop_end;
+{
+  rtx comparison, comparison_value;
+  rtx iteration_var, initial_value, increment;
+  enum rtx_code comparison_code;
+
+  rtx last_loop_insn;
+  rtx insn;
+  int i;
+
+  /* loop_variable mode */
+  enum machine_mode original_mode;
+
+  /* find the number of the loop */
+  int loop_num = loop_number (loop_start, loop_end);
+
+  /* we change our mind only when we are sure that loop will be instrumented */
+  loop_can_insert_bct[loop_num] = 0;
+
+  /* debugging: do we wish to instrument this loop? */
+  if ( !check_bct_param () )
+    return;
+
+  /* is the optimization suppressed.  */
+  if ( !flag_branch_on_count_reg )
+    return;
+
+  /* make sure that count-reg is not in use */
+  if (loop_used_count_register[loop_num]){
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+	      "analyze_loop_iterations %d: BCT instrumentation failed: count register already in use\n",
+	      loop_num);
+    return;
+  }
+
+  /* make sure that the function has no indirect jumps.  */
+  if (indirect_jump_in_function){
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+              "analyze_loop_iterations %d: BCT instrumentation failed: indirect jump in function\n",
+	      loop_num);
+    return;
+  }
+
+  /* make sure that the last loop insn is a conditional jump */
+  last_loop_insn = PREV_INSN (loop_end);
+  if (!is_conditional_branch (last_loop_insn)) {
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+              "analyze_loop_iterations %d: BCT instrumentation failed: invalid jump at loop end\n",
+	      loop_num);
+    return;
+  }
+
+  /* First find the iteration variable.  If the last insn is a conditional
+     branch, and the insn preceding it tests a register value, make that
+     register the iteration variable.  */
+
+  /* We used to use prev_nonnote_insn here, but that fails because it might
+     accidentally get the branch for a contained loop if the branch for this
+     loop was deleted.  We can only trust branches immediately before the
+     loop_end.  */
+
+  comparison = get_condition_for_loop (last_loop_insn);
+  /* ??? Get_condition may switch position of induction variable and
+     invariant register when it canonicalizes the comparison.  */
+
+  if (comparison == 0) {
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+	      "analyze_loop_iterations %d: BCT instrumentation failed: comparison not found\n",
+	      loop_num);
+    return;
+  }
+
+  comparison_code = GET_CODE (comparison);
+  iteration_var = XEXP (comparison, 0);
+  comparison_value = XEXP (comparison, 1);
+
+  original_mode = GET_MODE (iteration_var);
+  if (GET_MODE_CLASS (original_mode) != MODE_INT
+      || GET_MODE_SIZE (original_mode) != UNITS_PER_WORD) {
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+	      "analyze_loop_iterations %d: BCT Instrumentation failed: loop variable not integer\n",
+	      loop_num);
+    return;
+  }
+
+  /* get info about loop bounds and increment */
+  iteration_info (iteration_var, &initial_value, &increment,
+		  loop_start, loop_end);
+
+  /* make sure that all required loop data were found */
+  if (!(initial_value && increment && comparison_value
+	&& invariant_p (comparison_value) && invariant_p (increment)
+	&& ! indirect_jump_in_function))
+    {
+      if (loop_dump_stream) {
+	fprintf (loop_dump_stream,
+                "analyze_loop_iterations %d: BCT instrumentation failed because of wrong loop: ", loop_num);
+	if (!(initial_value && increment && comparison_value)) {
+	  fprintf (loop_dump_stream, "\tbounds not available: ");
+	  if ( ! initial_value )
+	    fprintf (loop_dump_stream, "initial ");
+	  if ( ! increment )
+	    fprintf (loop_dump_stream, "increment ");
+	  if ( ! comparison_value )
+	    fprintf (loop_dump_stream, "comparison ");
+	  fprintf (loop_dump_stream, "\n");
+	}
+	if (!invariant_p (comparison_value) || !invariant_p (increment))
+	  fprintf (loop_dump_stream, "\tloop bounds not invariant\n");
+      }
+      return;
+    }
+
+  /* make sure that the increment is constant */
+  if (GET_CODE (increment) != CONST_INT) {
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+              "analyze_loop_iterations %d: instrumentation failed: not arithmetic loop\n",
+	      loop_num);
+    return;
+  }
+
+  /* make sure that the loop contains neither function call, nor jump on table.
+     (the count register might be altered by the called function, and might
+     be used for a branch on table).  */
+  for (insn = loop_start; insn && insn != loop_end; insn = NEXT_INSN (insn)) {
+    if (GET_CODE (insn) == CALL_INSN){
+      if (loop_dump_stream)
+	fprintf (loop_dump_stream,
+                "analyze_loop_iterations %d: BCT instrumentation failed: function call in the loop\n",
+		loop_num);
+      return;
+    }
+
+    if (GET_CODE (insn) == JUMP_INSN
+       && (GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+	   || GET_CODE (PATTERN (insn)) == ADDR_VEC)){
+      if (loop_dump_stream)
+	fprintf (loop_dump_stream,
+                "analyze_loop_iterations %d: BCT instrumentation failed: computed branch in the loop\n",
+		loop_num);
+      return;
+    }
+  }
+
+  /* At this point, we are sure that the loop can be instrumented with BCT.
+     Some of the loops, however, will not be instrumented - the final decision
+     is taken by insert_bct () */
+  if (loop_dump_stream)
+    fprintf (loop_dump_stream,
+            "analyze_loop_iterations: loop (luid =%d) can be BCT instrumented.\n",
+	    loop_num);
+
+  /* mark all enclosing loops that they cannot use count register */
+  /* ???: In fact, since insert_bct may decide not to instrument this loop,
+     marking here may prevent instrumenting an enclosing loop that could
+    actually be instrumented.  But since this is rare, it is safer to mark
+    here in case the order of calling  (analyze/insert)_bct would be changed.  */
+  for (i=loop_num; i != -1; i = loop_outer_loop[i])
+    loop_used_count_register[i] = 1;
+
+  /* Set data structures which will be used by the instrumentation phase */
+  loop_start_value[loop_num] = initial_value;
+  loop_comparison_value[loop_num] = comparison_value;
+  loop_increment[loop_num] = increment;
+  loop_comparison_code[loop_num] = comparison_code;
+  loop_can_insert_bct[loop_num] = 1;
+}
+
+
+/* instrument loop for insertion of bct instruction.  We distinguish between
+ loops with compile-time bounds, to those with run-time bounds.  The loop
+ behaviour is analized according to the following characteristics/variables:
+ ; Input variables:
+ ;   comparison-value: the value to which the iteration counter is compared.
+ ;   initial-value: iteration-counter initial value.
+ ;   increment: iteration-counter increment.
+ ; Computed variables:
+ ;   increment-direction: the sign of the increment.
+ ;   compare-direction: '1' for GT, GTE, '-1' for LT, LTE, '0' for NE.
+ ;   range-direction: sign (comparison-value - initial-value)
+ We give up on the following cases:
+ ; loop variable overflow.
+ ; run-time loop bounds with comparison code NE.
+ */
+
+static void
+insert_bct (loop_start, loop_end)
+     rtx loop_start, loop_end;
+{
+  rtx initial_value, comparison_value, increment;
+  enum rtx_code comparison_code;
+
+  int increment_direction, compare_direction;
+  int unsigned_p = 0;
+
+  /* if the loop condition is <= or >=, the number of iteration
+      is 1 more than the range of the bounds of the loop */
+  int add_iteration = 0;
+
+  /* the only machine mode we work with - is the integer of the size that the
+     machine has */
+  enum machine_mode loop_var_mode = SImode;
+
+  int loop_num = loop_number (loop_start, loop_end);
+
+  /* get loop-variables. No need to check that these are valid - already
+     checked in analyze_loop_iterations ().  */
+  comparison_code = loop_comparison_code[loop_num];
+  initial_value = loop_start_value[loop_num];
+  comparison_value = loop_comparison_value[loop_num];
+  increment = loop_increment[loop_num];
+
+  /* check analyze_loop_iterations decision for this loop.  */
+  if (! loop_can_insert_bct[loop_num]){
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+	      "insert_bct: [%d] - was decided not to instrument by analyze_loop_iterations ()\n",
+	      loop_num);
+    return;
+  }
+
+  /* make sure that the loop was not fully unrolled.  */
+  if (loop_unroll_factor[loop_num] == -1){
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream, "insert_bct %d: was completely unrolled\n", loop_num);
+    return;
+  }
+
+  /* make sure that the last loop insn is a conditional jump .
+     This check is repeated from analyze_loop_iterations (),
+     because unrolling might have changed that.  */
+  if (!is_conditional_branch (PREV_INSN (loop_end))){
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+	      "insert_bct: not instrumenting BCT because of invalid branch\n");
+    return;
+  }
+
+  /* fix increment in case loop was unrolled.  */
+  if (loop_unroll_factor[loop_num] > 1)
+    increment = GEN_INT ( INTVAL (increment) * loop_unroll_factor[loop_num] );
+
+  /* determine properties and directions of the loop */
+  increment_direction = (INTVAL (increment) > 0) ? 1:-1;
+  switch ( comparison_code ) {
+  case LEU:
+    unsigned_p = 1;
+    /* fallthrough */
+  case LE:
+    compare_direction = 1;
+    add_iteration = 1;
+    break;
+  case GEU:
+    unsigned_p = 1;
+    /* fallthrough */
+  case GE:
+    compare_direction = -1;
+    add_iteration = 1;
+    break;
+  case EQ:
+    /* in this case we cannot know the number of iterations */
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+              "insert_bct: %d: loop cannot be instrumented: == in condition\n",
+	      loop_num);
+    return;
+  case LTU:
+    unsigned_p = 1;
+    /* fallthrough */
+  case LT:
+    compare_direction = 1;
+    break;
+  case GTU:
+    unsigned_p = 1;
+    /* fallthrough */
+  case GT:
+    compare_direction = -1;
+    break;
+  case NE:
+    compare_direction = 0;
+    break;
+  default:
+    abort ();
+  }
+
+
+  /* make sure that the loop does not end by an overflow */
+  if (compare_direction != increment_direction) {
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+              "insert_bct: %d: loop cannot be instrumented: terminated by overflow\n",
+	      loop_num);
+    return;
+  }
+
+  /* try to instrument the loop.  */
+
+  /* Handle the simpler case, where the bounds are known at compile time.  */
+  if (GET_CODE (initial_value) == CONST_INT && GET_CODE (comparison_value) == CONST_INT)
+    {
+      int n_iterations;
+      int increment_value_abs = INTVAL (increment) * increment_direction;
+
+      /* check the relation between compare-val and initial-val */
+      int difference = INTVAL (comparison_value) - INTVAL (initial_value);
+      int range_direction = (difference > 0) ? 1 : -1;
+
+      /* make sure the loop executes enough iterations to gain from BCT */
+      if (difference > -3 && difference < 3) {
+	if (loop_dump_stream)
+	  fprintf (loop_dump_stream,
+		  "insert_bct: loop %d not BCT instrumented: too small iteration count.\n",
+		  loop_num);
+	return;
+      }
+
+      /* make sure that the loop executes at least once */
+      if ((range_direction ==  1 && compare_direction == -1)
+	  || (range_direction == -1 && compare_direction ==  1))
+	{
+	  if (loop_dump_stream)
+	    fprintf (loop_dump_stream,
+		    "insert_bct: loop %d: does not iterate even once. Not instrumenting.\n",
+		    loop_num);
+	  return;
+	}
+
+      /* make sure that the loop does not end by an overflow (in compile time
+         bounds we must have an additional check for overflow, because here
+         we also support the compare code of 'NE'.  */
+      if (comparison_code == NE
+	  && increment_direction != range_direction) {
+	if (loop_dump_stream)
+	  fprintf (loop_dump_stream,
+		  "insert_bct (compile time bounds): %d: loop not instrumented: terminated by overflow\n",
+		  loop_num);
+	return;
+      }
+
+      /* Determine the number of iterations by:
+	 ;
+         ;                  compare-val - initial-val + (increment -1) + additional-iteration
+         ; num_iterations = -----------------------------------------------------------------
+         ;                                           increment
+	 */
+      difference = (range_direction > 0) ? difference : -difference;
+#if 0
+      fprintf (stderr, "difference is: %d\n", difference); /* @*/
+      fprintf (stderr, "increment_value_abs is: %d\n", increment_value_abs); /* @*/
+      fprintf (stderr, "add_iteration is: %d\n", add_iteration); /* @*/
+      fprintf (stderr, "INTVAL (comparison_value) is: %d\n", INTVAL (comparison_value)); /* @*/
+      fprintf (stderr, "INTVAL (initial_value) is: %d\n", INTVAL (initial_value)); /* @*/
+#endif
+
+      if (increment_value_abs == 0) {
+	fprintf (stderr, "insert_bct: error: increment == 0 !!!\n");
+	abort ();
+      }
+      n_iterations = (difference + increment_value_abs - 1 + add_iteration)
+	/ increment_value_abs;
+
+#if 0
+      fprintf (stderr, "number of iterations is: %d\n", n_iterations); /* @*/
+#endif
+      instrument_loop_bct (loop_start, loop_end, GEN_INT (n_iterations));
+
+      /* Done with this loop.  */
+      return;
+    }
+
+  /* Handle the more complex case, that the bounds are NOT known at compile time.  */
+  /* In this case we generate run_time calculation of the number of iterations */
+
+  /* With runtime bounds, if the compare is of the form '!=' we give up */
+  if (comparison_code == NE) {
+    if (loop_dump_stream)
+      fprintf (loop_dump_stream,
+	      "insert_bct: fail for loop %d: runtime bounds with != comparison\n",
+	      loop_num);
+    return;
+  }
+
+  else {
+    /* We rely on the existence of run-time guard to ensure that the
+       loop executes at least once.  */
+    rtx sequence;
+    rtx iterations_num_reg;
+
+    int increment_value_abs = INTVAL (increment) * increment_direction;
+
+    /* make sure that the increment is a power of two, otherwise (an
+       expensive) divide is needed.  */
+    if ( !is_power_of_2(increment_value_abs) )
+      {
+	if (loop_dump_stream)
+	  fprintf (loop_dump_stream,
+		  "insert_bct: not instrumenting BCT because the increment is not power of 2\n");
+	return;
+      }
+
+    /* compute the number of iterations */
+    start_sequence ();
+    {
+      /* CYGNUS LOCAL: HAIFA bug fix */
+      rtx temp_reg;
+
+      /* Again, the number of iterations is calculated by:
+	 ;
+         ;                  compare-val - initial-val + (increment -1) + additional-iteration
+         ; num_iterations = -----------------------------------------------------------------
+         ;                                           increment
+	 */
+      /* ??? Do we have to call copy_rtx here before passing rtx to
+	 expand_binop?  */
+      if (compare_direction > 0) {
+	/* <, <= :the loop variable is increasing */
+	temp_reg = expand_binop (loop_var_mode, sub_optab, comparison_value,
+				 initial_value, NULL_RTX, 0, OPTAB_LIB_WIDEN);
+      }
+      else {
+	temp_reg = expand_binop (loop_var_mode, sub_optab, initial_value,
+				 comparison_value, NULL_RTX, 0, OPTAB_LIB_WIDEN);
+      }
+
+      if (increment_value_abs - 1 + add_iteration != 0)
+	temp_reg = expand_binop (loop_var_mode, add_optab, temp_reg,
+				 GEN_INT (increment_value_abs - 1 + add_iteration),
+				 NULL_RTX, 0, OPTAB_LIB_WIDEN);
+
+      if (increment_value_abs != 1)
+	{
+	  /* ??? This will generate an expensive divide instruction for
+	     most targets.  The original authors apparently expected this
+	     to be a shift, since they test for power-of-2 divisors above,
+	     but just naively generating a divide instruction will not give 
+	     a shift.  It happens to work for the PowerPC target because
+	     the rs6000.md file has a divide pattern that emits shifts.
+	     It will probably not work for any other target.  */
+	  iterations_num_reg = expand_binop (loop_var_mode, sdiv_optab,
+					     temp_reg,
+					     GEN_INT (increment_value_abs),
+					     NULL_RTX, 0, OPTAB_LIB_WIDEN);
+	}
+      else
+	iterations_num_reg = temp_reg;
+      /* END CYGNUS LOCAL: HAIFA bug fix */
+    }
+    sequence = gen_sequence ();
+    end_sequence ();
+    emit_insn_before (sequence, loop_start);
+    instrument_loop_bct (loop_start, loop_end, iterations_num_reg);
+  }
+}
+
+/* instrument loop by inserting a bct in it. This is done in the following way:
+   1. A new register is created and assigned the hard register number of the count
+    register.
+   2. In the head of the loop the new variable is initialized by the value passed in the
+    loop_num_iterations parameter.
+   3. At the end of the loop, comparison of the register with 0 is generated.
+    The created comparison follows the pattern defined for the
+    decrement_and_branch_on_count insn, so this insn will be generated in assembly
+    generation phase.
+   4. The compare&branch on the old variable is deleted. So, if the loop-variable was
+    not used elsewhere, it will be eliminated by data-flow analisys.  */
+
+static void
+instrument_loop_bct (loop_start, loop_end, loop_num_iterations)
+     rtx loop_start, loop_end;
+     rtx loop_num_iterations;
+{
+  rtx temp_reg1, temp_reg2;
+  rtx start_label;
+
+  rtx sequence;
+  enum machine_mode loop_var_mode = SImode;
+
+#ifdef HAVE_decrement_and_branch_on_count
+  if (HAVE_decrement_and_branch_on_count)
+    {
+      if (loop_dump_stream)
+	fprintf (loop_dump_stream, "Loop: Inserting BCT\n");
+
+      /* eliminate the check on the old variable */
+      delete_insn (PREV_INSN (loop_end));
+      delete_insn (PREV_INSN (loop_end));
+
+      /* insert the label which will delimit the start of the loop */
+      start_label = gen_label_rtx ();
+      emit_label_after (start_label, loop_start);
+
+      /* insert initialization of the count register into the loop header */
+      start_sequence ();
+      temp_reg1 = gen_reg_rtx (loop_var_mode);
+      emit_insn (gen_move_insn (temp_reg1, loop_num_iterations));
+
+      /* this will be count register */
+      temp_reg2 = gen_rtx (REG, loop_var_mode, COUNT_REGISTER_REGNUM);
+      /* we have to move the value to the count register from an GPR
+	 because rtx pointed to by loop_num_iterations could contain
+	 expression which cannot be moved into count register */
+      emit_insn (gen_move_insn (temp_reg2, temp_reg1));
+
+      sequence = gen_sequence ();
+      end_sequence ();
+      emit_insn_after (sequence, loop_start);
+
+      /* insert new comparison on the count register instead of the
+	 old one, generating the needed BCT pattern (that will be
+	 later recognized by assembly generation phase).  */
+      emit_jump_insn_before (gen_decrement_and_branch_on_count (temp_reg2, start_label),
+			     loop_end);
+      LABEL_NUSES (start_label)++;
+    }
+
+#endif /* HAVE_decrement_and_branch_on_count */
+}
+
+/* calculate the uid of the given loop */
+int
+loop_number (loop_start, loop_end)
+     rtx loop_start, loop_end;
+{
+  int loop_num = -1;
+
+  /* assume that this insn contains the LOOP_START
+     note, so it will not be changed by the loop unrolling */
+  loop_num = uid_loop_num[INSN_UID (loop_start)];
+  /* sanity check - should never happen */
+  if (loop_num == -1)
+    abort ();
+
+  return loop_num;
+}
+
+/* scan the function and determine whether it has indirect (computed) jump */
+static int
+indirect_jump_in_function_p (start)
+     rtx start;
+{
+  rtx insn;
+  int is_indirect_jump = 0;
+
+  for (insn = start; insn; insn = NEXT_INSN (insn)) {
+    if (GET_CODE (insn) == JUMP_INSN) {
+      if (GET_CODE (PATTERN (insn)) == SET) {
+	rtx insn_work_code = XEXP (PATTERN (insn), 1);
+
+	if (GET_CODE (insn_work_code) == LABEL_REF)
+	  continue;
+	if (GET_CODE (insn_work_code) == IF_THEN_ELSE) {
+	  rtx jump_target = XEXP (insn_work_code, 1);
+
+	  if (jump_target == pc_rtx
+	     || (GET_CODE (jump_target) == (enum rtx_code)LABEL_REF))
+	    continue;
+	}
+      }
+      is_indirect_jump = 1;
+    }
+  }
+  return is_indirect_jump;
+}
+
+/* return 1 iff n is a power of 2 */
+static int
+is_power_of_2(n)
+     int n;
+{
+  return (n & (n-1)) == 0;
+}
+
+/* return 1 iff insn is a conditional jump */
+is_conditional_branch (insn)
+     rtx insn;
+{
+  rtx work_code;
+  if (GET_CODE (insn) != JUMP_INSN)
+    return 0;
+  work_code = PATTERN (insn);
+  if (GET_CODE (work_code) != SET)
+    return 0;
+  if (GET_CODE (XEXP (work_code, 1)) != IF_THEN_ELSE)
+    return 0;
+  return 1;
+}
+
+/* debugging: fix_bct_param () is called from toplev.c upon detection
+   of the -fbct-***-N options.  */
+int
+fix_bct_param (param, val)
+     char *param, *val;
+{
+  if ( !strcmp (param, "max") )
+    dbg_bct_max = atoi (val);
+  else if ( !strcmp (param, "min") )
+    dbg_bct_min = atoi (val);
+}
+
+/* debugging: return 1 if the loop should be instrumented,
+   according to bct-min/max.  */
+static int
+check_bct_param ()
+{
+  static int dbg_bct_num = 0;
+
+  dbg_bct_num++;
+  if (dbg_bct_num > dbg_bct_min || dbg_bct_min == -1)
+    if (dbg_bct_num <= dbg_bct_max || dbg_bct_max == -1)
+      return 1;
+  return 0;
+}
+#endif	/* HAIFA */
+/* END CYGNUS LOCAL haifa */
diff --git a/gcc/loop.h b/gcc/loop.h
index 3536fa1e20d..d8c83cdcab6 100644
--- a/gcc/loop.h
+++ b/gcc/loop.h
@@ -181,3 +181,13 @@ rtx final_biv_value PROTO((struct iv_class *, rtx, rtx));
 rtx final_giv_value PROTO((struct induction *, rtx, rtx));
 void emit_unrolled_add PROTO((rtx, rtx, rtx));
 int back_branch_in_range_p PROTO((rtx, rtx, rtx));
+
+#ifdef HAIFA
+/* variables for interaction between unroll.c and loop.c, for
+   the insertion of branch-on-count instruction. */
+extern int *loop_unroll_factor;
+extern rtx *loop_start_value;
+extern int *loop_unroll_iter;
+extern int loop_number();
+#endif  /* HAIFA */
+
diff --git a/gcc/regmove.c b/gcc/regmove.c
new file mode 100644
index 00000000000..639059b3686
--- /dev/null
+++ b/gcc/regmove.c
@@ -0,0 +1,983 @@
+/* Move registers around to reduce number of move instructions needed.
+   Copyright (C) 1987, 88, 89, 92-5, 1996, 1997 Free Software Foundation, Inc.
+
+This file is part of GNU CC.
+
+GNU CC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU CC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU CC; see the file COPYING.  If not, write to
+the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
+
+
+/* This module looks for cases where matching constraints would force
+   an instruction to need a reload, and this reload would be a register
+   to register move.  It then attempts to change the registers used by the
+   instruction to avoid the move instruction.  */
+
+#include "config.h"
+#ifdef __STDC__
+#include <stdarg.h>
+#else
+#include <varargs.h>
+#endif
+
+/* Must precede rtl.h for FFS.  */
+#include <stdio.h>
+
+#include "rtl.h"
+#include "insn-config.h"
+#include "recog.h"
+#include "output.h"
+#include "reload.h"
+#include "regs.h"
+
+static int stable_but_for_p PROTO((rtx, rtx, rtx));
+
+#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT) \
+    || defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
+
+/* INC_INSN is an instruction that adds INCREMENT to REG.
+   Try to fold INC_INSN as a post/pre in/decrement into INSN.
+   Iff INC_INSN_SET is nonzero, inc_insn has a destination different from src.
+   Return nonzero for success.  */
+static int
+try_auto_increment (insn, inc_insn, inc_insn_set, reg, increment, pre)
+     rtx reg, insn, inc_insn ,inc_insn_set;
+     HOST_WIDE_INT increment;
+     int pre;
+{
+  enum rtx_code inc_code;
+
+  rtx pset = single_set (insn);
+  if (pset)
+    {
+      /* Can't use the size of SET_SRC, we might have something like
+	 (sign_extend:SI (mem:QI ...  */
+      rtx use = find_use_as_address (pset, reg, 0);
+      if (use != 0 && use != (rtx) 1)
+	{
+	  int size = GET_MODE_SIZE (GET_MODE (use));
+	  if (0
+#ifdef HAVE_POST_INCREMENT
+	      || (pre == 0 && (inc_code = POST_INC, increment == size))
+#endif
+#ifdef HAVE_PRE_INCREMENT
+	      || (pre == 1 && (inc_code = PRE_INC, increment == size))
+#endif
+#ifdef HAVE_POST_DECREMENT
+	      || (pre == 0 && (inc_code = POST_DEC, increment == -size))
+#endif
+#ifdef HAVE_PRE_DECREMENT
+	      || (pre == 1 && (inc_code = PRE_DEC, increment == -size))
+#endif
+          )
+            {
+              if (inc_insn_set)
+                validate_change
+                  (inc_insn, 
+                   &SET_SRC (inc_insn_set),
+		   XEXP (SET_SRC (inc_insn_set), 0), 1);
+              validate_change (insn, &XEXP (use, 0),
+                               gen_rtx (inc_code,
+                                        Pmode,
+                                        reg), 1);
+              if (apply_change_group ())
+                {
+                  REG_NOTES (insn)
+                    = gen_rtx (EXPR_LIST, REG_INC,
+                               reg, REG_NOTES (insn));
+                  if (! inc_insn_set)
+                    {
+                      PUT_CODE (inc_insn, NOTE);
+                      NOTE_LINE_NUMBER (inc_insn) = NOTE_INSN_DELETED;
+                      NOTE_SOURCE_FILE (inc_insn) = 0;
+                    }
+		  return 1;
+                }
+            }
+        }
+    }
+  return 0;
+}
+#endif  /* defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT) */
+
+void
+regmove_optimize (f, nregs, regmove_dump_file)
+     rtx f;
+     int nregs;
+     FILE *regmove_dump_file;
+{
+#ifdef REGISTER_CONSTRAINTS
+  rtx insn;
+  int matches[MAX_RECOG_OPERANDS][MAX_RECOG_OPERANDS];
+  int modified[MAX_RECOG_OPERANDS];
+  int early_clobber[MAX_RECOG_OPERANDS];
+  int commutative;
+  int pass;
+
+  /* A forward/backward pass.  Replace output operands with input operands.  */
+
+  for (pass = 0; pass < 2; pass++)
+    {
+      if (regmove_dump_file)
+	fprintf (regmove_dump_file, "Starting %s pass...\n",
+		 pass ? "backward" : "forward");
+
+      for (insn = pass ? get_last_insn () : f; insn;
+	   insn = pass ? PREV_INSN (insn) : NEXT_INSN (insn))
+	{
+	  if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
+	    {
+	      int insn_code_number = recog_memoized (insn);
+	      int operand_number, match_number;
+	      
+	      if (insn_code_number < 0)
+		continue;
+    
+	      insn_extract (insn);
+	      if (! constrain_operands (insn_code_number, 0))
+		continue;
+	      
+	      commutative = -1;
+    
+	      /* Must initialize this before the loop, because the code for
+		 the commutative case may set matches for operands other than
+		 the current one.  */
+	      bzero (matches, sizeof (matches));
+    
+	      for (operand_number = 0;
+		   operand_number < insn_n_operands[insn_code_number];
+		   operand_number++)
+		{
+		  int output_operand = 0;
+		  int matching_operand = operand_number;
+		  char *p, c;
+		  int i = 0;
+    
+		  modified[operand_number] = 0;
+		  early_clobber[operand_number] = 0;
+    
+		  p = insn_operand_constraint[insn_code_number][operand_number];
+
+		  if (*p == '=')
+		    modified[operand_number] = 2;
+		  else if (*p == '+')
+		    modified[operand_number] = 1;
+
+		  for (;*p && i < which_alternative; p++)
+		    if (*p == ',')
+		      i++;
+    
+		  while ((c = *p++) != '\0' && c != ',')
+		    switch (c)
+		      {
+		      case '=':
+			break;
+		      case '+':
+			break;
+		      case '&':
+			early_clobber[operand_number] = 1;
+			break;
+		      case '%':
+			commutative = operand_number;
+			break;
+		      case '0': case '1': case '2': case '3': case '4':
+		      case '5': case '6': case '7': case '8': case '9':
+			c -= '0';
+			matches[operand_number][c] = 1;
+			if (commutative >= 0)
+			  {
+			    if (c == commutative || c == commutative + 1)
+			      {
+				int other = c + (c == commutative ? 1 : -1);
+				matches[operand_number][other] = 1;
+			      }
+			    if (operand_number == commutative
+				|| operand_number == commutative + 1)
+			      {
+				int other = (operand_number
+					     + (operand_number == commutative
+						? 1 : -1));
+				matches[other][c] = 1;
+			      }
+			  }
+			break;
+		      }
+		}
+    
+	      /* Now scan through the operands looking for a source operand
+		 which is supposed to match the destination operand.
+		 Then scan forward for an instruction which uses the dest
+		 operand.
+		 If it dies there, then replace the dest in both operands with
+		 the source operand.  */
+    
+	      for (operand_number = 0;
+		   operand_number < insn_n_operands[insn_code_number];
+		   operand_number++)
+		{
+		  for (match_number = 0;
+		       match_number < insn_n_operands[insn_code_number];
+		       match_number++)
+		    {
+		      rtx set, p, src, dst, src_subreg;
+		      rtx post_inc = 0, post_inc_set = 0, search_end = 0;
+		      rtx src_note, dst_note;
+		      int success = 0;
+		      int num_calls = 0;
+		      enum rtx_code code = NOTE;
+		      HOST_WIDE_INT insn_const, newconst;
+		      rtx overlap = 0; /* need to move insn ? */
+    
+		      /* Nothing to do if the two operands aren't supposed to
+			 match.  */
+		      if (matches[operand_number][match_number] == 0)
+			continue;
+    
+		      src = recog_operand[operand_number];
+		      dst = recog_operand[match_number];
+    
+		      if (GET_CODE (src) != REG
+			  || REGNO (src) < FIRST_PSEUDO_REGISTER)
+			continue;
+    
+		      src_subreg = src;
+		      if (GET_CODE (dst) == SUBREG
+			  && GET_MODE_SIZE (GET_MODE (dst))
+			     >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (dst))))
+			{
+			  src_subreg
+			    = gen_rtx(SUBREG,  GET_MODE (SUBREG_REG (dst)),
+				      src, SUBREG_WORD (dst));
+			  dst = SUBREG_REG (dst);
+			}
+		      if (GET_CODE (dst) != REG
+			  || REGNO (dst) < FIRST_PSEUDO_REGISTER)
+			continue;
+    
+		      /* If the operands already match, then there is nothing
+			 to do.  */
+		      if (operands_match_p (src, dst))
+			continue;
+    
+		      set = single_set (insn);
+		      if (! set)
+			continue;
+    
+		      /* operand_number/src must be a read-only operand, and
+			 match_operand/dst must be a write-only operand.  */
+		      if (modified[match_number] != 2)
+			continue;
+    
+		      if (early_clobber[match_number] == 1)
+			continue;
+    
+		      if (modified[operand_number] != 0)
+			continue;
+    
+		      /* Make sure match_operand is the destination.  */
+		      if (recog_operand[match_number] != SET_DEST (set))
+			continue;
+		  
+		      src_note = find_reg_note (insn, REG_DEAD, src);
+    
+		      if (! src_note)
+			{
+			  /* Look for (set (regX) (op regA constX))
+				      (set (regY) (op regA constY))
+			     and change that to
+				      (set (regA) (op regA constX)).
+				      (set (regY) (op regA constY-constX)).
+			     This works for add and shift operations, if
+			     regA is dead after or set by the second insn.  */
+
+			  code = GET_CODE (SET_SRC (set));
+			  if ((code == PLUS || code == LSHIFTRT
+			       || code == ASHIFT || code == ASHIFTRT)
+			      && XEXP (SET_SRC (set), 0) == src
+			      && (GET_CODE (XEXP (SET_SRC (set), 1))
+				  == CONST_INT))
+			    insn_const = INTVAL (XEXP (SET_SRC (set), 1));
+			  else if (! stable_but_for_p (SET_SRC (set), src, dst))
+			    continue;
+			  else
+			    /* We might find a src_note while scanning.  */
+			    code = NOTE;
+			}
+
+		      if (regmove_dump_file)
+			fprintf (regmove_dump_file,
+				 "Could fix operand %d of insn %d matching operand %d.\n",
+				 operand_number, INSN_UID (insn), match_number);
+    
+		      /* ??? If src is set once, and is set equal to a
+			 constant, then do not use it for this optimization,
+			 as this would make it no longer equivalent to a
+			 constant?  */
+    
+		      /* Scan forward to find the next instruction that
+			 uses the output operand.  If the operand dies here,
+			 then replace it in both instructions with
+			 operand_number.  */
+    
+		      for (p = NEXT_INSN (insn); p; p = NEXT_INSN (p))
+			{
+			  if (GET_CODE (p) == CODE_LABEL
+			      || GET_CODE (p) == JUMP_INSN
+			      || (GET_CODE (p) == NOTE
+				  && ((NOTE_LINE_NUMBER (p)
+				       == NOTE_INSN_LOOP_BEG)
+				      || (NOTE_LINE_NUMBER (p)
+					  == NOTE_INSN_LOOP_END))))
+			    break;
+    
+			  if (GET_RTX_CLASS (GET_CODE (p)) != 'i')
+			    continue;
+    
+			  if (reg_set_p (src, p) || reg_set_p (dst, p)
+			      || (GET_CODE (PATTERN (p)) == USE
+				  && reg_overlap_mentioned_p (src,
+							      XEXP (PATTERN (p),
+							      0))))
+			    break;
+    
+			  /* See if all of DST dies in P.  This test is
+			     slightly more conservative than it needs to be.  */
+			  if ((dst_note
+				= find_regno_note (p, REG_DEAD, REGNO (dst)))
+			      && (GET_MODE (XEXP (dst_note, 0))
+				  == GET_MODE (dst)))
+			    {
+			      if (! src_note)
+				{
+				  rtx q;
+				  rtx set2;
+    
+				  /* If an optimization is done, the value
+				     of SRC while P is executed will be
+				     changed.  Check that this is OK.  */
+				  if (reg_overlap_mentioned_p (src,
+							       PATTERN (p)))
+				    break;
+				  for (q = p; q; q = NEXT_INSN (q))
+				    {
+				      if (GET_CODE (q) == CODE_LABEL
+					  || GET_CODE (q) == JUMP_INSN
+					  || (GET_CODE (q) == NOTE
+					      && ((NOTE_LINE_NUMBER (q)
+						   == NOTE_INSN_LOOP_BEG)
+						  || (NOTE_LINE_NUMBER (q)
+						      == NOTE_INSN_LOOP_END))))
+					{
+					  q = 0;
+					  break;
+					}
+				      if (GET_RTX_CLASS (GET_CODE (q)) != 'i')
+					continue;
+				      if (reg_overlap_mentioned_p (src,
+								   PATTERN (q))
+					  || reg_set_p (src, q))
+					break;
+				    }
+				  if (q)
+				    set2 = single_set (q);
+				  if (! q || ! set2
+				      || GET_CODE (SET_SRC (set2)) != code
+				      || XEXP (SET_SRC (set2), 0) != src
+				      || (GET_CODE (XEXP (SET_SRC (set2), 1))
+					  != CONST_INT)
+				      || (SET_DEST (set2) != src
+					  && !find_reg_note (q, REG_DEAD, src)))
+				    {
+				      /* If this is a PLUS, we can still save
+					 a register by doing
+					 src += insn_const;
+					 P;
+					 src -= insn_const; .
+					 This also gives opportunities for
+					 subsequent optimizations in the
+					 backward pass, so do it there.  */
+				      if (code == PLUS && pass == 1
+#ifdef HAVE_cc0
+					  /* We man not emit an insn directly
+					     after P if the latter sets CC0.  */
+					  && ! sets_cc0_p (PATTERN (p))
+#endif
+					  )
+
+					{
+					  search_end = q;
+					  q = insn;
+					  set2 = set;
+					  newconst = -insn_const;
+					  code = MINUS;
+					}
+				      else
+					break;
+				    }
+				  else
+				    {
+				      newconst
+					= (INTVAL (XEXP (SET_SRC (set2), 1))
+					   - insn_const);
+				      /* Reject out of range shifts.  */
+				      if (code != PLUS
+					  && (newconst < 0
+					      || (newconst
+						  >= GET_MODE_BITSIZE (GET_MODE (SET_SRC (set2))))))
+					break;
+				      if (code == PLUS)
+					{
+					  post_inc = q;
+					  if (SET_DEST (set2) != src)
+					    post_inc_set = set2;
+					}
+				    }
+				  /* We use 1 as last argument to
+				     validate_change so that all changes
+				     are accepted or rejected together by
+				     apply_change_group when it is called
+				     by validate_replace_rtx .  */
+				  validate_change (q, &XEXP (SET_SRC (set2), 1),
+						   GEN_INT (newconst), 1);
+				}
+			      validate_change (insn,
+					       recog_operand_loc[match_number],
+					       src, 1);
+			      if (validate_replace_rtx (dst, src_subreg, p))
+				success = 1;
+			      break;
+			    }
+    
+			  if (reg_overlap_mentioned_p (dst, PATTERN (p)))
+			    break;
+			  if (! src_note
+			      && reg_overlap_mentioned_p (src, PATTERN (p)))
+			    {
+			      /* INSN was already checked to be movable when
+				 we found no REG_DEAD note for src on it.  */
+			      overlap = p;
+			      src_note = find_reg_note (p, REG_DEAD, src);
+			    }
+    
+			  /* If we have passed a call instruction, and the
+			     pseudo-reg SRC is not already live across a call,
+			     then don't perform the optimization.  */
+			  if (GET_CODE (p) == CALL_INSN)
+			    {
+			      num_calls++;
+    
+			      if (REG_N_CALLS_CROSSED (REGNO (src)) == 0)
+				break;
+			    }
+			}
+    
+		      if (success)
+			{
+			  /* Remove the death note for DST from P.  */
+			  remove_note (p, dst_note);
+			  if (code == MINUS)
+			    {
+			      post_inc
+				= emit_insn_after (copy_rtx (PATTERN (insn)),
+						   p);
+#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
+			      if (search_end
+				  && try_auto_increment (search_end, post_inc,
+							 0, src, newconst, 1))
+				post_inc = 0;
+#endif
+			      validate_change (insn, &XEXP (SET_SRC (set), 1),
+					       GEN_INT (insn_const), 0);
+			      REG_N_SETS (REGNO (src))++;
+			    }
+			  if (overlap)
+			    {
+			      /* The lifetime of src and dest overlap,
+				 but we can change this by moving insn.  */
+			      rtx pat = PATTERN (insn);
+			      if (src_note)
+				remove_note (overlap, src_note);
+#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT)
+			      if (code == PLUS
+				  && try_auto_increment (overlap, insn, 0,
+							 src, insn_const, 0))
+				insn = overlap;
+			      else
+#endif
+				{
+				  emit_insn_after_with_line_notes
+				    (pat, PREV_INSN (p), insn);
+				  PUT_CODE (insn, NOTE);
+				  NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+				  NOTE_SOURCE_FILE (insn) = 0;
+				  /* emit_insn_after_with_line_notes
+				     has no return value, so search
+				     for the new insn.  */
+				  for (insn = p; PATTERN (insn) != pat; )
+				    insn = PREV_INSN (insn);
+				}
+			    }
+			  /* Sometimes we'd generate src = const; src += n;
+			     if so, replace the instruction that set src
+			     in the first place.  */
+			
+			  if (! overlap && (code == PLUS || code == MINUS))
+			    {
+			      rtx note
+				= find_reg_note (insn, REG_EQUAL, NULL_RTX);
+			      rtx q, set2;
+			      int num_calls2 = 0;
+
+			      if (note && CONSTANT_P (XEXP (note, 0)))
+				{
+				  for (q = PREV_INSN (insn); q;
+				       q = PREV_INSN(q))
+				    {
+				      if (GET_CODE (q) == JUMP_INSN)
+					{
+					  q = 0;
+					  break;
+					}
+				      if (GET_RTX_CLASS (GET_CODE (q)) != 'i')
+					continue;
+				      if (reg_set_p (src, q))
+					{
+					  set2 = single_set (q);
+					  break;
+					}
+				      if (reg_overlap_mentioned_p (src,
+					  PATTERN (q)))
+					{
+					  q = 0;
+					  break;
+					}
+				      if (GET_CODE (p) == CALL_INSN)
+					num_calls2++;
+				    }
+				  if (q && set2 && SET_DEST (set2) == src
+				      && CONSTANT_P (SET_SRC (set2))
+				      && validate_change (insn, &SET_SRC (set),
+							  XEXP (note, 0), 0))
+				    {
+				      PUT_CODE (q, NOTE);
+				      NOTE_LINE_NUMBER (q) = NOTE_INSN_DELETED;
+				      NOTE_SOURCE_FILE (q) = 0;
+				      REG_N_SETS (REGNO (src))--;
+				      REG_N_CALLS_CROSSED (REGNO (src))
+					-= num_calls2;
+				      insn_const = 0;
+				    }
+				}
+			    }
+			  if (0) ;
+#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
+			  else if ((code == PLUS || code == MINUS)
+				   && insn_const
+				   && try_auto_increment (p, insn, 0,
+							  src, insn_const, 1))
+			    insn = p;
+#endif
+#if defined (HAVE_POST_INCREMENT) || defined (HAVE_POST_DECREMENT)
+			  else if (post_inc
+				   && try_auto_increment (p, post_inc,
+							  post_inc_set, src,
+							  newconst, 0))
+			    post_inc = 0;
+#endif
+#if defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT)
+			  /* If post_inc still prevails, try to find an
+			     insn where it can be used as a pre-in/decrement.
+			     If code is MINUS, this was already tried.  */
+			  if (post_inc && code == PLUS
+			  /* Check that newconst is likely to be usable
+			     in a pre-in/decrement before starting the
+			     search.  */
+			      && (0
+#if defined (HAVE_PRE_INCREMENT)
+				  || (newconst > 0 && newconst <= MOVE_MAX)
+#endif
+#if defined (HAVE_PRE_DECREMENT)
+				  || (newconst < 0 && newconst >= -MOVE_MAX)
+#endif
+				 ) && exact_log2 (newconst))
+			    {
+			      rtx q, inc_dest;
+
+			      inc_dest
+				= post_inc_set ? SET_DEST (post_inc_set) : src;
+			      for (q = post_inc; q = NEXT_INSN (q); )
+				{
+				  if (GET_CODE (q) == CODE_LABEL
+				      || GET_CODE (q) == JUMP_INSN
+				      || (GET_CODE (q) == NOTE
+					  && ((NOTE_LINE_NUMBER (q)
+					       == NOTE_INSN_LOOP_BEG)
+					      || (NOTE_LINE_NUMBER (q)
+						  == NOTE_INSN_LOOP_END))))
+				    break;
+				  if (GET_RTX_CLASS (GET_CODE (q)) != 'i')
+				    continue;
+				  if (src != inc_dest
+				      && (reg_overlap_mentioned_p (src,
+								   PATTERN (q))
+					  || reg_set_p (src, q)))
+				    break;
+				  if (reg_set_p (inc_dest, q))
+				    break;
+				  if (reg_overlap_mentioned_p (inc_dest,
+							       PATTERN (q)))
+				    {
+				      try_auto_increment (q, post_inc,
+							  post_inc_set,
+							  inc_dest,
+							  newconst, 1);
+				      break;
+				    }
+				}
+			    }
+#endif /* defined (HAVE_PRE_INCREMENT) || defined (HAVE_PRE_DECREMENT) */
+			  /* Move the death note for DST to INSN if it is used
+			     there.  */
+			  if (reg_overlap_mentioned_p (dst, PATTERN (insn)))
+			    {
+			      XEXP (dst_note, 1) = REG_NOTES (insn);
+			      REG_NOTES (insn) = dst_note;
+			    }
+    
+			  if (src_note)
+			    {
+			      /* Move the death note for SRC from INSN to P.  */
+			      if (! overlap)
+				remove_note (insn, src_note);
+			      XEXP (src_note, 1) = REG_NOTES (p);
+			      REG_NOTES (p) = src_note;
+    
+			      REG_N_CALLS_CROSSED (REGNO (src)) += num_calls;
+			    }
+    
+			  REG_N_SETS (REGNO (src))++;
+			  REG_N_SETS (REGNO (dst))--;
+    
+			  REG_N_CALLS_CROSSED (REGNO (dst)) -= num_calls;
+    
+			  /* ??? Must adjust reg_live_length, and reg_n_refs for
+			     both registers.  Must keep track of loop_depth in
+			     order to get reg_n_refs adjustment correct.  */
+    
+			  if (regmove_dump_file)
+			    fprintf (regmove_dump_file,
+				     "Fixed operand %d of insn %d matching operand %d.\n",
+				     operand_number, INSN_UID (insn),
+				     match_number);
+    
+			  goto done_forwards;
+			}
+		    }
+		}
+	    done_forwards:
+	      ;
+	    }
+	}
+    }
+
+  /* A backward pass.  Replace input operands with output operands.  */
+
+  if (regmove_dump_file)
+    fprintf (regmove_dump_file, "Starting backward pass...\n");
+
+  for (insn = get_last_insn (); insn; insn = PREV_INSN (insn))
+    {
+      if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
+	{
+	  int insn_code_number = recog_memoized (insn);
+	  int operand_number, match_number;
+	  
+	  if (insn_code_number < 0)
+	    continue;
+
+	  insn_extract (insn);
+	  if (! constrain_operands (insn_code_number, 0))
+	    continue;
+	  
+	  commutative = -1;
+
+	  /* Must initialize this before the loop, because the code for
+	     the commutative case may set matches for operands other than
+	     the current one.  */
+	  bzero (matches, sizeof (matches));
+
+	  for (operand_number = 0;
+	       operand_number < insn_n_operands[insn_code_number];
+	       operand_number++)
+	    {
+	      int output_operand = 0;
+	      int matching_operand = operand_number;
+	      char *p, c;
+	      int i = 0;
+
+	      modified[operand_number] = 0;
+	      early_clobber[operand_number] = 0;
+
+	      p = insn_operand_constraint[insn_code_number][operand_number];
+
+	      if (*p == '=')
+		modified[operand_number] = 2;
+	      else if (*p == '+')
+		modified[operand_number] = 1;
+
+	      for (; *p && i < which_alternative; p++)
+		if (*p == ',')
+		  i++;
+
+	      while ((c = *p++) != '\0' && c != ',')
+		switch (c)
+		  {
+		  case '=':
+		    break;
+		  case '+':
+		    break;
+		  case '&':
+		    early_clobber[operand_number] = 1;
+		    break;
+		  case '%':
+		    commutative = operand_number;
+		    break;
+		  case '0': case '1': case '2': case '3': case '4':
+		  case '5': case '6': case '7': case '8': case '9':
+		    c -= '0';
+		    matches[c][operand_number] = 1;
+		    if (commutative >= 0)
+		      {
+			if (c == commutative || c == commutative + 1)
+			  {
+			    int other = c + (c == commutative ? 1 : -1);
+			    matches[other][operand_number] = 1;
+			  }
+			if (operand_number == commutative
+			    || operand_number == commutative + 1)
+			  {
+			    int other = (operand_number
+					 + (operand_number == commutative
+					    ? 1 : -1));
+			    matches[c][other] = 1;
+			  }
+		      }
+		    break;
+		  }
+	    }
+
+	  /* Now scan through the operands looking for a destination operand
+	     which is supposed to match a source operand.
+	     Then scan backward for an instruction which sets the source
+	     operand.  If safe, then replace the source operand with the
+	     dest operand in both instructions.  */
+
+	  for (operand_number = 0;
+	       operand_number < insn_n_operands[insn_code_number];
+	       operand_number++)
+	    {
+	      for (match_number = 0;
+		   match_number < insn_n_operands[insn_code_number];
+		   match_number++)
+		{
+		  rtx set, p, src, dst;
+		  rtx src_note, dst_note;
+		  int success = 0;
+		  int num_calls = 0;
+
+		  /* Nothing to do if the two operands aren't supposed to
+		     match.  */
+		  if (matches[operand_number][match_number] == 0)
+		    continue;
+
+		  dst = recog_operand[operand_number];
+		  src = recog_operand[match_number];
+
+		  if (GET_CODE (src) != REG
+		      || REGNO (src) < FIRST_PSEUDO_REGISTER)
+		    continue;
+
+		  if (GET_CODE (dst) != REG
+		      || REGNO (dst) < FIRST_PSEUDO_REGISTER)
+		    continue;
+
+		  /* If the operands already match, then there is nothing
+		     to do.  */
+		  if (operands_match_p (src, dst))
+		    continue;
+
+		  set = single_set (insn);
+		  if (! set)
+		    continue;
+
+		  /* operand_number/dst must be a write-only operand, and
+		     match_operand/src must be a read-only operand.  */
+		  if (modified[match_number] != 0)
+		    continue;
+
+		  if (early_clobber[operand_number] == 1)
+		    continue;
+
+		  if (modified[operand_number] != 2)
+		    continue;
+
+		  /* Make sure operand_number is the destination.  */
+		  if (recog_operand[operand_number] != SET_DEST (set))
+		    continue;
+	      
+		  if (! (src_note = find_reg_note (insn, REG_DEAD, src)))
+		    continue;
+
+		  /* Can not modify an earlier insn to set dst if this insn
+		     uses an old value in the source.  */
+		  if (reg_overlap_mentioned_p (dst, SET_SRC (set)))
+		    continue;
+
+		  if (regmove_dump_file)
+		    fprintf (regmove_dump_file,
+			     "Could fix operand %d of insn %d matching operand %d.\n",
+			     operand_number, INSN_UID (insn), match_number);
+
+		  /* ??? If src is set once, and is set equal to a constant,
+		     then do not use it for this optimization, as this would
+		     make it no longer equivalent to a constant?  */
+
+		  /* Scan backward to find the first instruction that uses
+		     the input operand.  If the operand is set here, then
+		     replace it in both instructions with operand_number.  */
+
+		  for (p = PREV_INSN (insn); p; p = PREV_INSN (p))
+		    {
+		      rtx pset;
+
+		      if (GET_CODE (p) == CODE_LABEL
+			  || GET_CODE (p) == JUMP_INSN
+			  || (GET_CODE (p) == NOTE
+			      && (NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_BEG
+				  || NOTE_LINE_NUMBER (p) == NOTE_INSN_LOOP_END)))
+			break;
+
+		      if (GET_RTX_CLASS (GET_CODE (p)) != 'i')
+			continue;
+
+		      /* ??? See if all of SRC is set in P.  This test is much
+			 more conservative than it needs to be.  */
+		      pset = single_set (p);
+		      if (pset && SET_DEST (pset) == src)
+			{
+			  /* We use validate_replace_rtx, in case there
+			     are multiple identical source operands.  All of
+			     them have to be changed at the same time.  */
+			  if (validate_replace_rtx (src, dst, insn))
+			    {
+			      if (validate_change (p, &SET_DEST (pset),
+						   dst, 0))
+				success = 1;
+			      else
+				{
+				  /* Change all source operands back.
+				     This modifies the dst as a side-effect.  */
+				  validate_replace_rtx (dst, src, insn);
+				  /* Now make sure the dst is right.  */
+				  validate_change (insn,
+						   recog_operand_loc[operand_number],
+						   dst, 0);
+				}
+			    }
+			  break;
+			}
+
+		      if (reg_overlap_mentioned_p (src, PATTERN (p))
+			  || reg_overlap_mentioned_p (dst, PATTERN (p)))
+			break;
+
+		      /* If we have passed a call instruction, and the
+			 pseudo-reg DST is not already live across a call,
+			 then don't perform the optimization.  */
+		      if (GET_CODE (p) == CALL_INSN)
+			{
+			  num_calls++;
+
+			  if (REG_N_CALLS_CROSSED (REGNO (dst)) == 0)
+			    break;
+			}
+		    }
+
+		  if (success)
+		    {
+		      /* Remove the death note for SRC from INSN.  */
+		      remove_note (insn, src_note);
+		      /* Move the death note for SRC to P if it is used
+			 there.  */
+		      if (reg_overlap_mentioned_p (src, PATTERN (p)))
+			{
+			  XEXP (src_note, 1) = REG_NOTES (p);
+			  REG_NOTES (p) = src_note;
+			}
+		      /* If there is a REG_DEAD note for DST on P, then remove
+			 it, because DST is now set there.  */
+		      if (dst_note = find_reg_note (p, REG_DEAD, dst))
+			remove_note (p, dst_note);
+
+		      REG_N_SETS (REGNO (dst))++;
+		      REG_N_SETS (REGNO (src))--;
+
+		      REG_N_CALLS_CROSSED (REGNO (dst)) += num_calls;
+		      REG_N_CALLS_CROSSED (REGNO (src)) -= num_calls;
+
+		      /* ??? Must adjust reg_live_length, and reg_n_refs for
+			 both registers.  Must keep track of loop_depth in
+			 order to get reg_n_refs adjustment correct.  */
+
+		      if (regmove_dump_file)
+			fprintf (regmove_dump_file,
+				 "Fixed operand %d of insn %d matching operand %d.\n",
+				 operand_number, INSN_UID (insn), match_number);
+
+		      goto done_backwards;
+		    }
+		}
+	    }
+	done_backwards:
+	  ;
+	}
+    }
+#endif /* REGISTER_CONSTRAINTS */
+}
+
+/* return nonzero if X is stable but for mentioning SRC or mentioning /
+   changing DST .  If in doubt, presume it is unstable.  */
+static int
+stable_but_for_p (x, src, dst)
+     rtx x, src, dst;
+{
+  RTX_CODE code = GET_CODE (x);
+  switch (GET_RTX_CLASS (code))
+    {
+    case '<': case '1': case 'c': case '2': case 'b': case '3':
+      {
+	int i;
+	char *fmt = GET_RTX_FORMAT (code);
+	for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+	  if (fmt[i] == 'e' && ! stable_but_for_p (XEXP (x, i), src, dst))
+	      return 0;
+	return 1;
+      }
+    case 'o':
+      if (x == src || x == dst)
+	return 1;
+      /* fall through */
+    default:
+      return ! rtx_unstable_p (x);
+    }
+}
diff --git a/gcc/rtl.h b/gcc/rtl.h
index de9386df210..a5bdfca61d5 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -812,6 +812,7 @@ extern void remove_note		PROTO((rtx, rtx));
 extern void note_stores		PROTO((rtx, void (*)()));
 extern int refers_to_regno_p	PROTO((int, int, rtx, rtx *));
 extern int reg_overlap_mentioned_p PROTO((rtx, rtx));
+extern rtx find_use_as_address	PROTO((rtx, rtx, HOST_WIDE_INT));
 
 
 /* Maximum number of parallel sets and clobbers in any insn in this fn.
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 43be973bcbe..790a51b8fd8 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -245,6 +245,7 @@ int cse2_dump = 0;
 int branch_prob_dump = 0;
 int flow_dump = 0;
 int combine_dump = 0;
+int regmove_dump = 0;
 int sched_dump = 0;
 int local_reg_dump = 0;
 int global_reg_dump = 0;
@@ -566,6 +567,35 @@ int flag_pedantic_errors = 0;
 int flag_schedule_insns = 0;
 int flag_schedule_insns_after_reload = 0;
 
+#ifdef HAIFA
+/* The following flags have effect only for scheduling before register
+   allocation:
+
+   flag_schedule_interblock means schedule insns accross basic blocks.
+   flag_schedule_speculative means allow speculative motion of non-load insns.
+   flag_schedule_speculative_load means allow speculative motion of some
+   load insns.
+   flag_schedule_speculative_load_dangerous allows speculative motion of more
+   load insns.
+   flag_schedule_reverse_before_reload means try to reverse original order
+   of insns (S).
+   flag_schedule_reverse_after_reload means try to reverse original order
+   of insns (R).  */
+
+int flag_schedule_interblock = 1;
+int flag_schedule_speculative = 1;
+int flag_schedule_speculative_load = 0;
+int flag_schedule_speculative_load_dangerous = 0;
+int flag_schedule_reverse_before_reload = 0;
+int flag_schedule_reverse_after_reload = 0;
+
+
+/* flag_on_branch_count_reg means try to replace add-1,compare,branch tupple
+   by a cheaper branch, on a count register. */
+int flag_branch_on_count_reg;
+#endif  /* HAIFA */
+
+
 /* -finhibit-size-directive inhibits output of .size for ELF.
    This is used only for compiling crtstuff.c, 
    and it may be extended to other effects
@@ -616,6 +646,8 @@ int flag_check_memory_usage = 0;
    -fcheck-memory-usage.  */
 int flag_prefix_function_name = 0;
 
+int flag_regmove = 0;
+
 /* 1 if alias checking is on (by default, when -O).  */
 int flag_alias_check = 0;
 
@@ -666,6 +698,15 @@ struct { char *string; int *variable; int on_value;} f_options[] =
   {"pretend-float", &flag_pretend_float, 1},
   {"schedule-insns", &flag_schedule_insns, 1},
   {"schedule-insns2", &flag_schedule_insns_after_reload, 1},
+#ifdef HAIFA
+  {"sched-interblock",&flag_schedule_interblock, 1},
+  {"sched-spec",&flag_schedule_speculative, 1},
+  {"sched-spec-load",&flag_schedule_speculative_load, 1},
+  {"sched-spec-load-dangerous",&flag_schedule_speculative_load_dangerous, 1},
+  {"sched-reverse-S",&flag_schedule_reverse_before_reload, 1},
+  {"sched-reverse-R",&flag_schedule_reverse_after_reload, 1},
+  {"branch-count-reg",&flag_branch_on_count_reg, 1},
+#endif  /* HAIFA */
   {"pic", &flag_pic, 1},
   {"PIC", &flag_pic, 2},
   {"exceptions", &flag_exceptions, 1},
@@ -680,6 +721,7 @@ struct { char *string; int *variable; int on_value;} f_options[] =
   {"function-sections", &flag_function_sections, 1},
   {"verbose-asm", &flag_verbose_asm, 1},
   {"gnu-linker", &flag_gnu_linker, 1},
+  {"regmove", &flag_regmove, 1},
   {"pack-struct", &flag_pack_struct, 1},
   {"stack-check", &flag_stack_check, 1},
   {"bytecode", &output_bytecode, 1},
@@ -885,6 +927,7 @@ FILE *cse2_dump_file;
 FILE *branch_prob_dump_file;
 FILE *flow_dump_file;
 FILE *combine_dump_file;
+FILE *regmove_dump_file;
 FILE *sched_dump_file;
 FILE *local_reg_dump_file;
 FILE *global_reg_dump_file;
@@ -905,6 +948,7 @@ int cse2_time;
 int branch_prob_time;
 int flow_time;
 int combine_time;
+int regmove_time;
 int sched_time;
 int local_alloc_time;
 int global_alloc_time;
@@ -1053,6 +1097,8 @@ fatal_insn (message, insn)
     fflush (flow_dump_file);
   if (combine_dump_file)
     fflush (combine_dump_file);
+  if (regmove_dump_file)
+    fflush (regmove_dump_file);
   if (sched_dump_file)
     fflush (sched_dump_file);
   if (local_reg_dump_file)
@@ -2131,6 +2177,7 @@ compile_file (name)
   branch_prob_time = 0;
   flow_time = 0;
   combine_time = 0;
+  regmove_time = 0;
   sched_time = 0;
   local_alloc_time = 0;
   global_alloc_time = 0;
@@ -2229,6 +2276,10 @@ compile_file (name)
   if (combine_dump)
     combine_dump_file = open_dump_file (dump_base_name, ".combine");
 
+  /* If regmove dump desired, open the output file.  */
+  if (regmove_dump)
+    regmove_dump_file = open_dump_file (dump_base_name, ".regmove");
+
   /* If scheduling dump desired, open the output file.  */
   if (sched_dump)
     sched_dump_file = open_dump_file (dump_base_name, ".sched");
@@ -2713,6 +2764,9 @@ compile_file (name)
       fclose (combine_dump_file);
     }
 
+  if (regmove_dump)
+    fclose (regmove_dump_file);
+
   if (sched_dump)
     fclose (sched_dump_file);
 
@@ -2765,6 +2819,7 @@ compile_file (name)
 	  print_time ("branch-prob", branch_prob_time);
 	  print_time ("flow", flow_time);
 	  print_time ("combine", combine_time);
+	  print_time ("regmove", regmove_time);
 	  print_time ("sched", sched_time);
 	  print_time ("local-alloc", local_alloc_time);
 	  print_time ("global-alloc", global_alloc_time);
@@ -3304,6 +3359,26 @@ rest_of_compilation (decl)
 	       fflush (combine_dump_file);
 	     });
 
+  if (regmove_dump)
+    TIMEVAR (dump_time,
+	     {
+	       fprintf (regmove_dump_file, "\n;; Function %s\n\n",
+			(*decl_printable_name) (decl, 2));
+	     });
+
+  /* Register allocation pre-pass, to reduce number of moves
+     necessary for two-address machines.  */
+  if (optimize > 0 && flag_regmove)
+    TIMEVAR (regmove_time, regmove_optimize (insns, max_reg_num (),
+					     regmove_dump_file));
+
+  if (regmove_dump)
+    TIMEVAR (dump_time,
+	     {
+	       print_rtl (regmove_dump_file, insns);
+	       fflush (regmove_dump_file);
+	     });
+
   /* Print function header into sched dump now
      because doing the sched analysis makes some of the dump.  */
 
@@ -3703,6 +3778,7 @@ main (argc, argv, envp)
       flag_schedule_insns = 1;
       flag_schedule_insns_after_reload = 1;
 #endif
+      flag_regmove = 1;
     }
 
   if (optimize >= 3)
@@ -3764,6 +3840,7 @@ main (argc, argv, envp)
  		    jump2_opt_dump = 1;
  		    local_reg_dump = 1;
  		    loop_dump = 1;
+		    regmove_dump = 1;
  		    rtl_dump = 1;
  		    cse_dump = 1, cse2_dump = 1;
  		    sched_dump = 1;
@@ -3815,6 +3892,9 @@ main (argc, argv, envp)
 		  case 't':
 		    cse2_dump = 1;
 		    break;
+		  case 'N':
+		    regmove_dump = 1;
+		    break;
 		  case 'S':
 		    sched_dump = 1;
 		    break;
@@ -3862,6 +3942,18 @@ main (argc, argv, envp)
 
 	      if (found)
 		;
+#ifdef HAIFA
+#ifdef INSN_SCHEDULING
+	      else if (!strncmp (p, "sched-verbose-",14))
+		fix_sched_param("verbose",&p[14]);
+	      else if (!strncmp (p, "sched-max-",10))
+		fix_sched_param("max",&p[10]);
+	      else if (!strncmp (p, "sched-inter-max-b-",18))
+		fix_sched_param("interblock-max-blocks",&p[18]);
+	      else if (!strncmp (p, "sched-inter-max-i-",18))
+		fix_sched_param("interblock-max-insns",&p[18]);
+#endif
+#endif  /* HAIFA */
 	      else if (!strncmp (p, "fixed-", 6))
 		fix_register (&p[6], 1, 1);
 	      else if (!strncmp (p, "call-used-", 10))
diff --git a/gcc/unroll.c b/gcc/unroll.c
index 46339988ede..f1864d398b4 100644
--- a/gcc/unroll.c
+++ b/gcc/unroll.c
@@ -202,7 +202,7 @@ static rtx initial_reg_note_copy PROTO((rtx, struct inline_remap *));
 static void final_reg_note_copy PROTO((rtx, struct inline_remap *));
 static void copy_loop_body PROTO((rtx, rtx, struct inline_remap *, rtx, int,
 				  enum unroll_types, rtx, rtx, rtx, rtx));
-static void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
+void iteration_info PROTO((rtx, rtx *, rtx *, rtx, rtx));
 static rtx approx_final_value PROTO((enum rtx_code, rtx, int *, int *));
 static int find_splittable_regs PROTO((enum unroll_types, rtx, rtx, rtx, int));
 static int find_splittable_givs PROTO((struct iv_class *,enum unroll_types,
@@ -1094,6 +1094,16 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
 	  /* Set unroll type to MODULO now.  */
 	  unroll_type = UNROLL_MODULO;
 	  loop_preconditioned = 1;
+#ifdef HAIFA
+	  if (loop_n_iterations > 0)
+	    loop_unroll_iter[ loop_number(loop_start, loop_end) ]
+	      = (loop_n_iterations
+		  - loop_n_iterations % (abs_inc * unroll_number));
+	  else
+	    /* inform loop.c about the new initial value */
+	    loop_start_value[loop_number(loop_start, loop_end)] = initial_value;
+#endif
+
 	}
     }
 
@@ -1108,6 +1118,15 @@ unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
 
   /* At this point, we are guaranteed to unroll the loop.  */
 
+#ifdef HAIFA
+  /* inform loop.c about the factor of unrolling */
+  if (unroll_type == UNROLL_COMPLETELY)
+    loop_unroll_factor[ loop_number(loop_start, loop_end) ] = -1;
+  else
+    loop_unroll_factor[ loop_number(loop_start, loop_end) ] = unroll_number;
+#endif  /* HAIFA */
+
+
   /* For each biv and giv, determine whether it can be safely split into
      a different variable for each unrolled copy of the loop body.
      We precalculate and save this info here, since computing it is
@@ -2263,7 +2282,7 @@ biv_total_increment (bl, loop_start, loop_end)
    Initial_value and/or increment are set to zero if their values could not
    be calculated.  */
 
-static void
+void
 iteration_info (iteration_var, initial_value, increment, loop_start, loop_end)
      rtx iteration_var, *initial_value, *increment;
      rtx loop_start, loop_end;
author	Jeffrey A Law <law@cygnus.com>	1997-08-12 04:07:19 +0000
committer	Jeffrey A Law <law@cygnus.com>	1997-08-12 04:07:19 +0000
commit	e07ae5fd7ffed7230ad5e8c6b51845f5667ec167 (patch)
tree	d4545c8066f57414681646f5d32d3f9b95acbe1b
parent	cc94826bb787bcdac89169d50c2dc09c2558d6b1 (diff)