2008-04-14 Andrey Belevantsev <abel@ispras.ru>

Alexander Monakov <amonakov@ispras.ru> * config/ia64/ia64.c (ia64_h_i_d_extended): Reallocate stops_p by larger chunks. * cse.c (hash_rtx_cb): New. (hash_rtx): Use it. * haifa-sched.c (haifa_sched_finish): Tidy. (sched_finish): Call haifa_finish_h_i_d. (extend_h_i_d): Reallocate h_i_d by larget chunks. * rtl.c (rtx_equal_p_cb): New. (rtx_equal_p): Use it. * rtl.h (rtx_equal_p_cb, hash_rtx_cb): New. * sched-deps.c (init_deps_data_vector): New. Allocate h_d_i_d by larger chunks. (sched_deps_init): Use it. (deps_extend_d_i_d, deps_finish_d_i_d, sched_deps_local_finish): Kill. * sched-int.h: Kill glat externs. (struct _haifa_deps_insn_data): New field cant_move, moved from d_i_d. (struct _deps_insn_data): Kill with all uses. * sel-sched-ir.h (struct _expr): New field cant_move. (_list_alloc): New, split from ... (_list_add): ... here. * sel-sched-ir.c (sel_rtx_equal_p, sel_hash_rtx): Kill. (skip_unspecs_callback, hash_with_unspec_callback): New. (vinsn_init): Use hash_rtx_cb. (vinsn_equal_p): Use rtx_equal_p_cb. (init_expr, merge_expr_data, copy_expr, init_global_and_expr_for_insn): Update for EXPR_CANT_MOVE. (update_target_availability, update_speculative_bits): New, split from ... (merge_expr_data): ... here. (expr_greater_p, av_set_add_element): New. (av_set_add, av_set_add_nocopy): Use it. (join_distinct_sets, av_set_truncate): New. (av_set_union_and_clear, av_set_union_and_live): Use them. (extend_insn): Reallocate s_i_d by larger chunks. (finish_insns): Clear cant_move bit. (block_valid_for_bookkeeping_p, find_block_for_bookkeeping, create_block_for_bookkeeping, find_place_for_bookkeeping, emit_bookkeeping_insn): New, split from ... (generate_bookkeeping_insn): ... here. (stall_for_cycles): New. (update_fence_and_insn): New parameter need_stall. (fill_insns): Use stall_for_cycles. Pass need_stall to update_fence_and_insn. (move_op_at_first_insn): Do not remove jumps that are already scheduled. (sel_region_init): Tidy. (find_ebb_boundaries, reset_sched_cycles_in_current_ebb, put_TImodes, sel_region_target_finish): New, split from ... (sel_region_finish): ... here. (sel_global_init): Initialize can_issue_more. git-svn-id: https://gcc.gnu.org/svn/gcc/branches/sel-sched-branch@134272 138bc75d-0d04-0410-961f-82ee72b054a4
author: Andrey Belevantsev <abel@ispras.ru> 2008-04-14 15:00:52 +0000
committer: Andrey Belevantsev <abel@ispras.ru> 2008-04-14 15:00:52 +0000
commit: 78bd88e0b0a631ca40aad8bf3cc3ffe405871d9c (patch)
tree: 8dc8fc86239594ade44a1624bf972aab4bb899e9
parent: 097f059d98ec9399d26d1a7c8e4659ab2893ba35 (diff)
11 files changed, 899 insertions, 999 deletions
diff --git a/gcc/ChangeLog.sel-sched b/gcc/ChangeLog.sel-sched
index 58d5458a0d9..e2869b03c36 100644
--- a/gcc/ChangeLog.sel-sched
+++ b/gcc/ChangeLog.sel-sched
@@ -1,4 +1,56 @@
 2008-04-14  Andrey Belevantsev  <abel@ispras.ru>
+	    Alexander Monakov  <amonakov@ispras.ru>
+
+	* config/ia64/ia64.c (ia64_h_i_d_extended): Reallocate stops_p
+	by larger chunks.
+	* cse.c (hash_rtx_cb): New.  
+	(hash_rtx): Use it.
+	* haifa-sched.c (haifa_sched_finish): Tidy. 
+	(sched_finish): Call haifa_finish_h_i_d.
+	(extend_h_i_d): Reallocate h_i_d by larget chunks.
+	* rtl.c (rtx_equal_p_cb): New.
+	(rtx_equal_p): Use it.
+	* rtl.h (rtx_equal_p_cb, hash_rtx_cb): New.
+	* sched-deps.c (init_deps_data_vector): New.  Allocate h_d_i_d
+	by larger chunks.
+	(sched_deps_init): Use it.
+	(deps_extend_d_i_d, deps_finish_d_i_d, sched_deps_local_finish): Kill.
+	* sched-int.h: Kill glat externs.
+	(struct _haifa_deps_insn_data): New field cant_move, moved from d_i_d.
+	(struct _deps_insn_data): Kill with all uses.
+	* sel-sched-ir.h (struct _expr): New field cant_move.
+	(_list_alloc): New, split from ... 
+	(_list_add): ... here.
+	* sel-sched-ir.c (sel_rtx_equal_p, sel_hash_rtx): Kill.
+	(skip_unspecs_callback, hash_with_unspec_callback): New.
+	(vinsn_init): Use hash_rtx_cb.
+	(vinsn_equal_p): Use rtx_equal_p_cb.
+	(init_expr, merge_expr_data, copy_expr, 
+	init_global_and_expr_for_insn): Update for EXPR_CANT_MOVE.
+	(update_target_availability, update_speculative_bits): New, split from ...
+	(merge_expr_data): ... here.
+	(expr_greater_p, av_set_add_element): New. 
+	(av_set_add, av_set_add_nocopy): Use it.
+	(join_distinct_sets, av_set_truncate): New.
+	(av_set_union_and_clear, av_set_union_and_live): Use them.
+	(extend_insn): Reallocate s_i_d by larger chunks.
+	(finish_insns): Clear cant_move bit.
+	(block_valid_for_bookkeeping_p, find_block_for_bookkeeping, 
+	create_block_for_bookkeeping, find_place_for_bookkeeping,
+	emit_bookkeeping_insn): New, split from ... 
+	(generate_bookkeeping_insn): ... here.
+	(stall_for_cycles): New.
+	(update_fence_and_insn): New parameter need_stall.
+	(fill_insns): Use stall_for_cycles. Pass need_stall
+	to update_fence_and_insn.
+	(move_op_at_first_insn): Do not remove jumps that are already scheduled.
+	(sel_region_init): Tidy.
+	(find_ebb_boundaries, reset_sched_cycles_in_current_ebb, put_TImodes,
+	sel_region_target_finish): New, split from ... 
+	(sel_region_finish): ... here.
+	(sel_global_init): Initialize can_issue_more.
+
+2008-04-14  Andrey Belevantsev  <abel@ispras.ru>
 
 	* cfgloopmanip.c (has_preds_from_loop): New.
 	(create_preheader): Use it.
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 0d2f929ff67..4c6c9f78d33 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -6931,7 +6931,7 @@ ia64_h_i_d_extended (void)
 {
   if (stops_p != NULL) 
     {
-      int new_clocks_length = get_max_uid () + 1;
+      int new_clocks_length = get_max_uid () * 3 / 2;
       
       stops_p = xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
       
diff --git a/gcc/cse.c b/gcc/cse.c
index 1b6a0e84d66..5469455e477 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -2058,27 +2058,20 @@ hash_rtx_string (const char *ps)
   return hash;
 }
 
-/* Hash an rtx.  We are careful to make sure the value is never negative.
-   Equivalent registers hash identically.
-   MODE is used in hashing for CONST_INTs only;
-   otherwise the mode of X is used.
-
-   Store 1 in DO_NOT_RECORD_P if any subexpression is volatile.
-
-   If HASH_ARG_IN_MEMORY_P is not NULL, store 1 in it if X contains
-   a MEM rtx which does not have the RTX_UNCHANGING_P bit set.
-
-   Note that cse_insn knows that the hash code of a MEM expression
-   is just (int) MEM plus the hash code of the address.  */
+/* Same as hash_rtx, but call CB on each rtx if it is not NULL.  
+   When the callback returns true, we continue with the new rtx.  */
 
 unsigned
-hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
-	  int *hash_arg_in_memory_p, bool have_reg_qty)
+hash_rtx_cb (const_rtx x, enum machine_mode mode,
+             int *do_not_record_p, int *hash_arg_in_memory_p,
+             bool have_reg_qty, hash_rtx_callback_function cb)
 {
   int i, j;
   unsigned hash = 0;
   enum rtx_code code;
   const char *fmt;
+  enum machine_mode newmode;
+  rtx newx;
 
   /* Used to turn recursion into iteration.  We can't rely on GCC's
      tail-recursion elimination since we need to keep accumulating values
@@ -2087,6 +2080,15 @@ hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
   if (x == 0)
     return hash;
 
+  /* Invoke the callback first.  */
+  if (cb != NULL 
+      && ((*cb) (x, mode, &newx, &newmode)))
+    {
+      hash += hash_rtx_cb (newx, newmode, do_not_record_p,
+                           hash_arg_in_memory_p, have_reg_qty, cb);
+      return hash;
+    }
+
   code = GET_CODE (x);
   switch (code)
     {
@@ -2094,7 +2096,7 @@ hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
       {
 	unsigned int regno = REGNO (x);
 
-	if (!reload_completed)
+	if (do_not_record_p && !reload_completed)
 	  {
 	    /* On some machines, we can't record any non-fixed hard register,
 	       because extending its life will cause reload problems.  We
@@ -2188,8 +2190,9 @@ hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
 	for (i = 0; i < units; ++i)
 	  {
 	    elt = CONST_VECTOR_ELT (x, i);
-	    hash += hash_rtx (elt, GET_MODE (elt), do_not_record_p,
-			      hash_arg_in_memory_p, have_reg_qty);
+	    hash += hash_rtx_cb (elt, GET_MODE (elt),
+                                 do_not_record_p, hash_arg_in_memory_p, 
+                                 have_reg_qty, cb);
 	  }
 
 	return hash;
@@ -2223,7 +2226,7 @@ hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
     case MEM:
       /* We don't record if marked volatile or if BLKmode since we don't
 	 know the size of the move.  */
-      if (MEM_VOLATILE_P (x) || GET_MODE (x) == BLKmode)
+      if (do_not_record_p && (MEM_VOLATILE_P (x) || GET_MODE (x) == BLKmode))
 	{
 	  *do_not_record_p = 1;
 	  return 0;
@@ -2237,7 +2240,7 @@ hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
       x = XEXP (x, 0);
       goto repeat;
 
-    case USE:
+    case USE:	
       /* A USE that mentions non-volatile memory needs special
 	 handling since the MEM may be BLKmode which normally
 	 prevents an entry from being made.  Pure calls are
@@ -2270,11 +2273,16 @@ hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
     case CC0:
     case CALL:
     case UNSPEC_VOLATILE:
-      *do_not_record_p = 1;
-      return 0;
+      if (do_not_record_p) {
+        *do_not_record_p = 1;
+        return 0;
+      }
+      else
+        return hash;
+      break;
 
     case ASM_OPERANDS:
-      if (MEM_VOLATILE_P (x))
+      if (do_not_record_p && MEM_VOLATILE_P (x))
 	{
 	  *do_not_record_p = 1;
 	  return 0;
@@ -2291,12 +2299,12 @@ hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
 	    {
 	      for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
 		{
-		  hash += (hash_rtx (ASM_OPERANDS_INPUT (x, i),
-				     GET_MODE (ASM_OPERANDS_INPUT (x, i)),
-				     do_not_record_p, hash_arg_in_memory_p,
-				     have_reg_qty)
+		  hash += (hash_rtx_cb (ASM_OPERANDS_INPUT (x, i),
+                                        GET_MODE (ASM_OPERANDS_INPUT (x, i)),
+                                        do_not_record_p, hash_arg_in_memory_p,
+                                        have_reg_qty, cb)
 			   + hash_rtx_string
-				(ASM_OPERANDS_INPUT_CONSTRAINT (x, i)));
+                           (ASM_OPERANDS_INPUT_CONSTRAINT (x, i)));
 		}
 
 	      hash += hash_rtx_string (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
@@ -2330,14 +2338,17 @@ hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
 	      goto repeat;
 	    }
 
-	  hash += hash_rtx (XEXP (x, i), 0, do_not_record_p,
-			    hash_arg_in_memory_p, have_reg_qty);
+	  hash += hash_rtx_cb (XEXP (x, i), 0, do_not_record_p,
+			                  hash_arg_in_memory_p,
+                                          have_reg_qty, cb);
 	  break;
 
 	case 'E':
 	  for (j = 0; j < XVECLEN (x, i); j++)
-	    hash += hash_rtx (XVECEXP (x, i, j), 0, do_not_record_p,
-			      hash_arg_in_memory_p, have_reg_qty);
+	    hash += hash_rtx_cb (XVECEXP (x, i, j), 0,
+                                            do_not_record_p,
+                                            hash_arg_in_memory_p,
+                                            have_reg_qty, cb);
 	  break;
 
 	case 's':
@@ -2360,6 +2371,27 @@ hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
   return hash;
 }
 
+/* Hash an rtx.  We are careful to make sure the value is never negative.
+   Equivalent registers hash identically.
+   MODE is used in hashing for CONST_INTs only;
+   otherwise the mode of X is used.
+
+   Store 1 in DO_NOT_RECORD_P if any subexpression is volatile.
+
+   If HASH_ARG_IN_MEMORY_P is not NULL, store 1 in it if X contains
+   a MEM rtx which does not have the RTX_UNCHANGING_P bit set.
+
+   Note that cse_insn knows that the hash code of a MEM expression
+   is just (int) MEM plus the hash code of the address.  */
+
+unsigned
+hash_rtx (const_rtx x, enum machine_mode mode, int *do_not_record_p,
+	  int *hash_arg_in_memory_p, bool have_reg_qty)
+{
+  return hash_rtx_cb (x, mode, do_not_record_p,
+                      hash_arg_in_memory_p, have_reg_qty, NULL);
+}
+
 /* Hash an rtx X for cse via hash_rtx.
    Stores 1 in do_not_record if any subexpression is volatile.
    Stores 1 in hash_arg_in_memory if X contains a mem rtx which
diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index 03f0d797e8e..b18befc342c 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -2919,7 +2919,7 @@ haifa_sched_init (void)
       sched_deps_info->generate_spec_deps = 1;
     }
 
-  /* Initialize glat, luids, dependency caches, target and h_i_d for the
+  /* Initialize luids, dependency caches, target and h_i_d for the
      whole function.  */
   {
     bb_vec_t bbs = VEC_alloc (basic_block, heap, n_basic_blocks);
@@ -2983,18 +2983,12 @@ haifa_sched_finish (void)
                c, nr_be_in_control);
     }
 
-  /* Finalize h_i_d, dependency caches, luids and glat for the whole
+  /* Finalize h_i_d, dependency caches, and luids for the whole
      function.  Target will be finalized in md_global_finish ().  */
-  {
-    haifa_finish_h_i_d ();
-    sched_deps_local_finish ();
-    sched_deps_finish ();
-    sched_finish_luids ();
-    sched_finish_bbs ();
-  }
-
+  sched_deps_finish ();
+  sched_finish_luids ();
+  sched_finish_bbs ();
   current_sched_info = NULL;
-
   sched_finish ();
 }
 
@@ -3004,6 +2998,7 @@ haifa_sched_finish (void)
 void
 sched_finish (void)
 {
+  haifa_finish_h_i_d ();
   free (curr_state);
 
   if (targetm.sched.md_finish_global)
@@ -4909,7 +4904,6 @@ luids_init_insn (rtx insn)
   if (i >= 0)
     {
       luid = sched_max_luid;
-
       sched_max_luid += i;
     }
   else
@@ -4940,7 +4934,6 @@ void
 sched_finish_luids (void)
 {
   VEC_free (int, heap, sched_luids);
-
   sched_max_luid = 1;
 }
 
@@ -4967,10 +4960,15 @@ VEC (haifa_insn_data_def, heap) *h_i_d = NULL;
 static void
 extend_h_i_d (void)
 {
-  int new_h_i_d_size = get_max_uid () + 1;
-
-  VEC_safe_grow_cleared (haifa_insn_data_def, heap, h_i_d, new_h_i_d_size);
-  sched_extend_target ();
+  int reserve = (get_max_uid () + 1 
+                 - VEC_length (haifa_insn_data_def, h_i_d));
+  if (reserve > 0 
+      && ! VEC_space (haifa_insn_data_def, h_i_d, reserve))
+    {
+      VEC_safe_grow_cleared (haifa_insn_data_def, heap, h_i_d, 
+                             3 * get_max_uid () / 2);
+      sched_extend_target ();
+    }
 }
 
 /* Initialize h_i_d entry of the INSN with default values.
diff --git a/gcc/rtl.c b/gcc/rtl.c
index edf393f29d2..58570aae7ae 100644
--- a/gcc/rtl.c
+++ b/gcc/rtl.c
@@ -333,22 +333,30 @@ int generating_concat_p;
 int currently_expanding_to_rtl;
 
 
-/* Return 1 if X and Y are identical-looking rtx's.
-   This is the Lisp function EQUAL for rtx arguments.  */
+
+/* Same as rtx_equal_p, but call CB on each pair of rtx if CB is not NULL.  
+   When the callback returns true, we continue with the new pair.  */
+
 
 int
-rtx_equal_p (const_rtx x, const_rtx y)
+rtx_equal_p_cb (const_rtx x, const_rtx y, rtx_equal_p_callback_function cb)
 {
   int i;
   int j;
   enum rtx_code code;
   const char *fmt;
+  rtx nx, ny;
 
   if (x == y)
     return 1;
   if (x == 0 || y == 0)
     return 0;
 
+  /* Invoke the callback first.  */
+  if (cb != NULL
+      && ((*cb) (&x, &y, &nx, &ny)))
+    return rtx_equal_p_cb (nx, ny, cb);
+
   code = GET_CODE (x);
   /* Rtx's of different codes cannot be equal.  */
   if (code != GET_CODE (y))
@@ -409,12 +417,13 @@ rtx_equal_p (const_rtx x, const_rtx y)
 
 	  /* And the corresponding elements must match.  */
 	  for (j = 0; j < XVECLEN (x, i); j++)
-	    if (rtx_equal_p (XVECEXP (x, i, j), XVECEXP (y, i, j)) == 0)
+	    if (rtx_equal_p_cb (XVECEXP (x, i, j), 
+                                XVECEXP (y, i, j), cb) == 0)
 	      return 0;
 	  break;
 
 	case 'e':
-	  if (rtx_equal_p (XEXP (x, i), XEXP (y, i)) == 0)
+	  if (rtx_equal_p_cb (XEXP (x, i), XEXP (y, i), cb) == 0)
 	    return 0;
 	  break;
 
@@ -444,6 +453,15 @@ rtx_equal_p (const_rtx x, const_rtx y)
   return 1;
 }
 
+/* Return 1 if X and Y are identical-looking rtx's.
+   This is the Lisp function EQUAL for rtx arguments.  */
+
+int
+rtx_equal_p (const_rtx x, const_rtx y)
+{
+  return rtx_equal_p_cb (x, y, NULL);
+}
+
 void
 dump_rtx_statistics (void)
 {
diff --git a/gcc/rtl.h b/gcc/rtl.h
index c351846b799..50d974c2915 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -1755,8 +1755,20 @@ extern int replace_label (rtx *, void *);
 extern int rtx_referenced_p (rtx, rtx);
 extern bool tablejump_p (const_rtx, rtx *, rtx *);
 extern int computed_jump_p (const_rtx);
+
 typedef int (*rtx_function) (rtx *, void *);
 extern int for_each_rtx (rtx *, rtx_function, void *);
+
+typedef int (*rtx_equal_p_callback_function) (const_rtx *, const_rtx *,
+                                              rtx *, rtx *);
+extern int rtx_equal_p_cb (const_rtx, const_rtx,
+                           rtx_equal_p_callback_function);
+
+typedef int (*hash_rtx_callback_function) (const_rtx, enum machine_mode, rtx *,
+                                           enum machine_mode *);
+extern unsigned hash_rtx_cb (const_rtx, enum machine_mode, int *, int *,
+                             bool, hash_rtx_callback_function);
+
 extern rtx regno_use_in (unsigned int, rtx);
 extern int auto_inc_p (const_rtx);
 extern int in_expr_list_p (const_rtx, const_rtx);
diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c
index 88e69e739f1..2f5f2a97f5e 100644
--- a/gcc/sched-deps.c
+++ b/gcc/sched-deps.c
@@ -2987,20 +2987,16 @@ remove_from_deps (struct deps *deps, rtx insn)
   remove_from_dependence_list (insn, &deps->sched_before_next_call);
 }
 
-/* An array indexed by INSN_UID that holds the data related 
-   to insn's dependencies and common to all schedulers.  */
-VEC (deps_insn_data_def, heap) *d_i_d = NULL;
-
+/* Init deps data vector.  */
 static void
-deps_extend_d_i_d (void)
-{
-  VEC_safe_grow_cleared (deps_insn_data_def, heap, d_i_d, sched_max_luid);
-}
-
-void
-deps_finish_d_i_d (void)
+init_deps_data_vector (void)
 {
-  VEC_free (deps_insn_data_def, heap, d_i_d);
+  int reserve = (sched_max_luid + 1 
+                 - VEC_length (haifa_deps_insn_data_def, h_d_i_d));
+  if (reserve > 0 
+      && ! VEC_space (haifa_deps_insn_data_def, h_d_i_d, reserve))
+    VEC_safe_grow_cleared (haifa_deps_insn_data_def, heap, h_d_i_d,
+                           3 * sched_max_luid / 2);
 }
 
 /* If it is profitable to use them, initialize or extend (depending on
@@ -3008,18 +3004,13 @@ deps_finish_d_i_d (void)
 void
 sched_deps_init (bool global_p)
 {
-  int new_max_uid;
   /* Average number of insns in the basic block.
      '+ 1' is used to make it nonzero.  */
   int insns_in_block = sched_max_luid / n_basic_blocks + 1;
 
-  deps_extend_d_i_d ();
-
-  new_max_uid = get_max_uid () + 1;
-  VEC_safe_grow_cleared (haifa_deps_insn_data_def, heap, h_d_i_d,
-			 new_max_uid);
-
-  /* FIXME: We need another caching mechanism for selective scheduling, so 
+  init_deps_data_vector ();
+  
+  /* We use another caching mechanism for selective scheduling, so 
      we don't use this one.  */
   if (!SEL_SCHED_P && global_p && insns_in_block > 100 * 5)
     {
@@ -3038,7 +3029,6 @@ sched_deps_init (bool global_p)
       dl_pool = create_alloc_pool ("deps_list", sizeof (struct _deps_list),
                                    /* Allocate lists for one block at a time.  */
                                    insns_in_block);
-      
       dn_pool = create_alloc_pool ("dep_node", sizeof (struct _dep_node),
                                    /* Allocate nodes for one block at a time.
                                       We assume that average insn has
@@ -3081,15 +3071,6 @@ extend_dependency_caches (int n, bool create_p)
     }
 }
 
-/* Finalize dependency information for the region.  */
-void
-sched_deps_local_finish (void)
-{
-  VEC_free (haifa_deps_insn_data_def, heap, h_d_i_d);
-  cache_size = 0;
-  deps_finish_d_i_d ();
-}
-
 /* Finalize dependency information for the whole function.  */
 void
 sched_deps_finish (void)
@@ -3099,6 +3080,9 @@ sched_deps_finish (void)
   free_alloc_pool_if_empty (&dl_pool);
   gcc_assert (dn_pool == NULL && dl_pool == NULL);
 
+  VEC_free (haifa_deps_insn_data_def, heap, h_d_i_d);
+  cache_size = 0;
+  
   if (true_dependency_cache)
     {
       int i;
diff --git a/gcc/sched-int.h b/gcc/sched-int.h
index b3945b6c5b5..19d639cabb6 100644
--- a/gcc/sched-int.h
+++ b/gcc/sched-int.h
@@ -121,11 +121,6 @@ extern const struct common_sched_info_def haifa_common_sched_info;
    scheduler behaviour.  */
 extern int sched_emulate_haifa_p;
 
-/* Used only if (current_sched_info->flags & USE_GLAT) != 0.
-   These regsets store global_live_at_{start, end} information
-   for each basic block.  */
-extern regset *glat_start, *glat_end;
-
 /* Mapping from INSN_UID to INSN_LUID.  In the end all other per insn data
    structures should be indexed by luid.  */
 extern VEC (int, heap) *sched_luids;
@@ -676,6 +671,9 @@ struct _haifa_deps_insn_data
      from 'forw_deps' to 'resolved_forw_deps' while scheduling to fasten the
      search in 'forw_deps'.  */
   deps_list_t resolved_forw_deps;
+
+  /* Some insns (e.g. call) are not allowed to move across blocks.  */
+  unsigned int cant_move : 1;
 };
 
 struct _haifa_insn_data
@@ -769,16 +767,11 @@ extern VEC(haifa_deps_insn_data_def, heap) *h_d_i_d;
 #define INSN_RESOLVED_FORW_DEPS(INSN) (HDID (INSN)->resolved_forw_deps)
 #define INSN_HARD_BACK_DEPS(INSN) (HDID (INSN)->hard_back_deps)
 #define INSN_SPEC_BACK_DEPS(INSN) (HDID (INSN)->spec_back_deps)
+#define CANT_MOVE(INSN)	(HDID (INSN)->cant_move)
+#define CANT_MOVE_BY_LUID(LUID)	(VEC_index (haifa_deps_insn_data_def, h_d_i_d, \
+                                            LUID)->cant_move)
+
 
-/*#define INSN_HARD_BACK_DEPS(INSN) (HID (INSN)->hard_back_deps)
-#define INSN_SPEC_BACK_DEPS(INSN) (HID (INSN)->spec_back_deps)
-#define INSN_FORW_DEPS(INSN) (HID (INSN)->forw_deps)
-#define INSN_RESOLVED_BACK_DEPS(INSN) \
-  (HID (INSN)->resolved_back_deps)
-#define INSN_RESOLVED_FORW_DEPS(INSN) \
-  (HID (INSN)->resolved_forw_deps)
- #define INSN_LUID(INSN)		(HID (INSN)->luid) 
- #define CANT_MOVE(INSN)		(HID (INSN)->cant_move) */
 #define INSN_PRIORITY(INSN)	(HID (INSN)->priority)
 #define INSN_PRIORITY_STATUS(INSN) (HID (INSN)->priority_status)
 #define INSN_PRIORITY_KNOWN(INSN) (INSN_PRIORITY_STATUS (INSN) > 0)
@@ -1036,23 +1029,6 @@ enum INSN_TRAP_CLASS
 #define HAIFA_INLINE __inline
 #endif
 
-struct _deps_insn_data
-{
-  /* Some insns (e.g. call) are not allowed to move across blocks.  */
-  unsigned int cant_move : 1;
-};
-
-typedef struct _deps_insn_data deps_insn_data_def;
-typedef deps_insn_data_def *deps_insn_data_t;
-
-DEF_VEC_O (deps_insn_data_def);
-DEF_VEC_ALLOC_O (deps_insn_data_def, heap);
-
-extern VEC (deps_insn_data_def, heap) *d_i_d;
-
-#define DID(INSN) (VEC_index (deps_insn_data_def, d_i_d, INSN_LUID (INSN)))
-#define CANT_MOVE(INSN)	(DID (INSN)->cant_move)
-
 struct sched_deps_info_def
 {
   /* Called when computing dependencies for a JUMP_INSN.  This function
@@ -1146,8 +1122,6 @@ extern dw_t ds_weak (ds_t);
 extern ds_t ds_get_speculation_types (ds_t);
 extern ds_t ds_get_max_dep_weak (ds_t);
 
-extern void deps_finish_d_i_d (void);
-
 extern void sched_deps_init (bool);
 extern void sched_deps_finish (void);
 extern void sched_deps_local_finish (void);
diff --git a/gcc/sel-sched-ir.c b/gcc/sel-sched-ir.c
index 575c38504ba..1539c45eb8a 100644
--- a/gcc/sel-sched-ir.c
+++ b/gcc/sel-sched-ir.c
@@ -1040,333 +1040,52 @@ free_nop_pool (void)
 }
 
 
-/* Return 1 if X and Y are identical-looking rtx's.
-   This is the Lisp function EQUAL for rtx arguments.
-   Copied from rtl.c.  The only difference is support for ia64 speculation.  */
+/* Skip unspec to support ia64 speculation. Called from rtx_equal_p_cb.  */
+
 static int
-sel_rtx_equal_p (rtx x, rtx y)
+skip_unspecs_callback (const_rtx *xx, const_rtx *yy, rtx *nx, rtx* ny)
 {
-  int i;
-  int j;
-  enum rtx_code code;
-  const char *fmt;
-
-  if (x == y)
-    return 1;
-  if (x == 0 || y == 0)
-    return 0;
-
-  /* Support ia64 speculation.  */
+  const_rtx x = *xx;
+  const_rtx y = *yy;
+  
   if (GET_CODE (x) == UNSPEC
       && (targetm.sched.skip_rtx_p == NULL
           || targetm.sched.skip_rtx_p (x)))
-    return sel_rtx_equal_p (XVECEXP (x, 0, 0), y);
+    {
+      *nx = XVECEXP (x, 0, 0);
+      *ny = (rtx) y;
+      return 1;
+    }
   
   if (GET_CODE (y) == UNSPEC
       && (targetm.sched.skip_rtx_p == NULL
           || targetm.sched.skip_rtx_p (y)))
-    return sel_rtx_equal_p (x, XVECEXP (y, 0, 0));
-  
-  code = GET_CODE (x);
-  /* Rtx's of different codes cannot be equal.  */
-  if (code != GET_CODE (y))
-    return 0;
-
-  /* (MULT:SI x y) and (MULT:HI x y) are NOT equivalent.
-     (REG:SI x) and (REG:HI x) are NOT equivalent.  */
-
-  if (GET_MODE (x) != GET_MODE (y))
-    return 0;
-
-  /* Some RTL can be compared nonrecursively.  */
-  switch (code)
-    {
-    case REG:
-      return (REGNO (x) == REGNO (y));
-
-    case LABEL_REF:
-      return XEXP (x, 0) == XEXP (y, 0);
-
-    case SYMBOL_REF:
-      return XSTR (x, 0) == XSTR (y, 0);
-
-    case SCRATCH:
-    case CONST_DOUBLE:
-    case CONST_INT:
-      return 0;
-
-    default:
-      break;
-    }
-
-  /* Compare the elements.  If any pair of corresponding elements
-     fail to match, return 0 for the whole thing.  */
-
-  fmt = GET_RTX_FORMAT (code);
-  for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
     {
-      switch (fmt[i])
-	{
-	case 'w':
-	  if (XWINT (x, i) != XWINT (y, i))
-	    return 0;
-	  break;
-
-	case 'n':
-	case 'i':
-	  if (XINT (x, i) != XINT (y, i))
-	    return 0;
-	  break;
-
-	case 'V':
-	case 'E':
-	  /* Two vectors must have the same length.  */
-	  if (XVECLEN (x, i) != XVECLEN (y, i))
-	    return 0;
-
-	  /* And the corresponding elements must match.  */
-	  for (j = 0; j < XVECLEN (x, i); j++)
-	    if (sel_rtx_equal_p (XVECEXP (x, i, j), XVECEXP (y, i, j)) == 0)
-	      return 0;
-	  break;
-
-	case 'e':
-	  if (sel_rtx_equal_p (XEXP (x, i), XEXP (y, i)) == 0)
-	    return 0;
-	  break;
-
-	case 'S':
-	case 's':
-	  if ((XSTR (x, i) || XSTR (y, i))
-	      && (! XSTR (x, i) || ! XSTR (y, i)
-		  || strcmp (XSTR (x, i), XSTR (y, i))))
-	    return 0;
-	  break;
-
-	case 'u':
-	  /* These are just backpointers, so they don't matter.  */
-	  break;
-
-	case '0':
-	case 't':
-	  break;
-
-	  /* It is believed that rtx's at this level will never
-	     contain anything but integers and other rtx's,
-	     except for within LABEL_REFs and SYMBOL_REFs.  */
-	default:
-	  gcc_unreachable ();
-	}
+      *nx = (rtx) x;
+      *ny = XVECEXP (y, 0, 0);
+      return 1;
     }
-  return 1;
+  
+  return 0;
 }
 
-/* Hash an rtx X.  The difference from hash_rtx is that we try to hash as 
-   much stuff as possible, not skipping volatile mems, calls, etc.  */
+/* Callback, called from hash_rtx_cb.
+   Helps to hash UNSPEC rtx in a correct way to support ia64 speculation. */
 
-static unsigned
-sel_hash_rtx (rtx x, enum machine_mode mode)
+static int
+hash_with_unspec_callback (const_rtx x, enum machine_mode mode ATTRIBUTE_UNUSED,
+                           rtx *nx, enum machine_mode* nmode)
 {
-  int i, j;
-  unsigned hash = 0;
-  enum rtx_code code;
-  const char *fmt;
-
-  /* Used to turn recursion into iteration.  */
- repeat:
-  if (x == 0)
-    return hash;
-
-  code = GET_CODE (x);
-  switch (code)
+  if (GET_CODE (x) == UNSPEC 
+      && targetm.sched.skip_rtx_p
+      && targetm.sched.skip_rtx_p(x))
     {
-    case REG:
-      {
-	unsigned int regno = REGNO (x);
-
-	hash += ((unsigned int) REG << 7);
-        hash += regno;
-	return hash;
-      }
-
-    case SUBREG:
-      {
-	if (REG_P (SUBREG_REG (x)))
-	  {
-	    hash += (((unsigned int) SUBREG << 7)
-		     + REGNO (SUBREG_REG (x))
-		     + (SUBREG_BYTE (x) / UNITS_PER_WORD));
-	    return hash;
-	  }
-	break;
-      }
-
-    case CONST_INT:
-      hash += (((unsigned int) CONST_INT << 7) + (unsigned int) mode
-               + (unsigned int) INTVAL (x));
-      return hash;
-
-    case CONST_DOUBLE:
-      hash += (unsigned int) code + (unsigned int) GET_MODE (x);
-      if (GET_MODE (x) != VOIDmode)
-	hash += real_hash (CONST_DOUBLE_REAL_VALUE (x));
-      else
-	hash += ((unsigned int) CONST_DOUBLE_LOW (x)
-		 + (unsigned int) CONST_DOUBLE_HIGH (x));
-      return hash;
-
-    case CONST_VECTOR:
-      {
-	int units;
-	rtx elt;
-
-	units = CONST_VECTOR_NUNITS (x);
-
-	for (i = 0; i < units; ++i)
-	  {
-	    elt = CONST_VECTOR_ELT (x, i);
-	    hash += sel_hash_rtx (elt, GET_MODE (elt));
-	  }
-
-	return hash;
-      }
-
-      /* Assume there is only one rtx object for any given label.  */
-    case LABEL_REF:
-      /* We don't hash on the address of the CODE_LABEL to avoid bootstrap
-	 differences and differences between each stage's debugging dumps.  */
-	 hash += (((unsigned int) LABEL_REF << 7)
-		  + CODE_LABEL_NUMBER (XEXP (x, 0)));
-      return hash;
-
-    case SYMBOL_REF:
-      {
-	/* Don't hash on the symbol's address to avoid bootstrap differences.
-	   Different hash values may cause expressions to be recorded in
-	   different orders and thus different registers to be used in the
-	   final assembler.  This also avoids differences in the dump files
-	   between various stages.  */
-	unsigned int h = 0;
-	const unsigned char *p = (const unsigned char *) XSTR (x, 0);
-
-	while (*p)
-	  h += (h << 7) + *p++; /* ??? revisit */
-
-	hash += ((unsigned int) SYMBOL_REF << 7) + h;
-	return hash;
-      }
-
-    case MEM:
-      hash += (unsigned) MEM;
-      x = XEXP (x, 0);
-      goto repeat;
-
-    case USE:
-      if (MEM_P (XEXP (x, 0))
-	  && ! MEM_VOLATILE_P (XEXP (x, 0)))
-	{
-	  hash += (unsigned) USE;
-	  x = XEXP (x, 0);
-
-	  hash += (unsigned) MEM;
-	  x = XEXP (x, 0);
-	  goto repeat;
-	}
-      break;
-
-    case PRE_DEC:
-    case PRE_INC:
-    case POST_DEC:
-    case POST_INC:
-    case PRE_MODIFY:
-    case POST_MODIFY:
-    case PC:
-    case CC0:
-    case CALL:
-    case UNSPEC_VOLATILE:
-      return hash;
-    
-    case UNSPEC:
-      /* Skip UNSPECs when we are so told.  */
-      if (targetm.sched.skip_rtx_p && targetm.sched.skip_rtx_p (x))
-        {
-          hash += sel_hash_rtx (XVECEXP (x, 0, 0), 0);
-          return hash;
-        }
-      break;
-        
-    case ASM_OPERANDS:
-      /* We don't want to take the filename and line into account.  */
-      hash += (unsigned) code + (unsigned) GET_MODE (x)
-        + hash_rtx_string (ASM_OPERANDS_TEMPLATE (x))
-        + hash_rtx_string (ASM_OPERANDS_OUTPUT_CONSTRAINT (x))
-        + (unsigned) ASM_OPERANDS_OUTPUT_IDX (x);
-
-      if (ASM_OPERANDS_INPUT_LENGTH (x))
-        {
-          for (i = 1; i < ASM_OPERANDS_INPUT_LENGTH (x); i++)
-            {
-              hash += (sel_hash_rtx (ASM_OPERANDS_INPUT (x, i),
-                                 GET_MODE (ASM_OPERANDS_INPUT (x, i)))
-                       + hash_rtx_string
-                       (ASM_OPERANDS_INPUT_CONSTRAINT (x, i)));
-            }
-
-          hash += hash_rtx_string (ASM_OPERANDS_INPUT_CONSTRAINT (x, 0));
-          x = ASM_OPERANDS_INPUT (x, 0);
-          mode = GET_MODE (x);
-          goto repeat;
-        }
-
-      return hash;
-      
-    default:
-      break;
+      *nx = XVECEXP (x, 0 ,0);
+      *nmode = 0;
+      return 1;
     }
-
-  i = GET_RTX_LENGTH (code) - 1;
-  hash += (unsigned) code + (unsigned) GET_MODE (x);
-  fmt = GET_RTX_FORMAT (code);
-  for (; i >= 0; i--)
-    {
-      switch (fmt[i])
-	{
-	case 'e':
-	  /* If we are about to do the last recursive call
-	     needed at this level, change it into iteration.
-	     This function  is called enough to be worth it.  */
-	  if (i == 0)
-	    {
-	      x = XEXP (x, i);
-	      goto repeat;
-	    }
-
-	  hash += sel_hash_rtx (XEXP (x, i), 0);
-	  break;
-
-	case 'E':
-	  for (j = 0; j < XVECLEN (x, i); j++)
-	    hash += sel_hash_rtx (XVECEXP (x, i, j), 0);
-	  break;
-
-	case 's':
-	  hash += hash_rtx_string (XSTR (x, i));
-	  break;
-
-	case 'i':
-	  hash += (unsigned int) XINT (x, i);
-	  break;
-
-	case '0': case 't':
-	  /* Unused.  */
-	  break;
-
-	default:
-	  gcc_unreachable ();
-	}
-    }
-
-  return hash;
+  
+  return 0;
 }
 
 /* Returns LHS and RHS are ok to be scheduled separately.  */
@@ -1409,39 +1128,41 @@ lhs_and_rhs_separable_p (rtx lhs, rtx rhs)
 static void
 vinsn_init (vinsn_t vi, insn_t insn, bool force_unique_p)
 {
-  VINSN_INSN_RTX (vi) = insn;
+  hash_rtx_callback_function hrcf;
+  int class;
 
+  VINSN_INSN_RTX (vi) = insn;
+  VINSN_COUNT (vi) = 0;
   vi->cost = -1;
-
   deps_init_id (VINSN_ID (vi), insn, force_unique_p);
   
   /* Hash vinsn depending on whether it is separable or not.  */
+  hrcf = targetm.sched.skip_rtx_p ? hash_with_unspec_callback : NULL;
   if (VINSN_SEPARABLE_P (vi))
     {
       rtx rhs = VINSN_RHS (vi);
 
-      VINSN_HASH (vi) = sel_hash_rtx (rhs, GET_MODE (rhs));
-      VINSN_HASH_RTX (vi) = sel_hash_rtx (VINSN_PATTERN (vi), VOIDmode);
+      VINSN_HASH (vi) = hash_rtx_cb (rhs, GET_MODE (rhs),
+                                     NULL, NULL, false, hrcf);
+      VINSN_HASH_RTX (vi) = hash_rtx_cb (VINSN_PATTERN (vi),
+                                         VOIDmode, NULL, NULL,
+                                         false, hrcf);
     }
   else
     {
-      VINSN_HASH (vi) = sel_hash_rtx (VINSN_PATTERN (vi), VOIDmode); 
+      VINSN_HASH (vi) = hash_rtx_cb (VINSN_PATTERN (vi), VOIDmode,
+                                     NULL, NULL, false, hrcf);
       VINSN_HASH_RTX (vi) = VINSN_HASH (vi);
     }
     
-  VINSN_COUNT (vi) = 0;
-
-  {
-    int class = haifa_classify_insn (insn);
-
-    if (class >= 2
-	&& (!targetm.sched.get_insn_spec_ds
-	    || ((targetm.sched.get_insn_spec_ds (insn) & BEGIN_CONTROL)
-		== 0)))
-      VINSN_MAY_TRAP_P (vi) = true;
-    else
-      VINSN_MAY_TRAP_P (vi) = false;
-  }
+  class = haifa_classify_insn (insn);
+  if (class >= 2
+      && (!targetm.sched.get_insn_spec_ds
+          || ((targetm.sched.get_insn_spec_ds (insn) & BEGIN_CONTROL)
+              == 0)))
+    VINSN_MAY_TRAP_P (vi) = true;
+  else
+    VINSN_MAY_TRAP_P (vi) = false;
 }
 
 /* Indicate that VI has become the part of an rtx object.  */
@@ -1757,6 +1478,8 @@ insert_in_history_vect (VEC (expr_history_def, heap) **pvect,
 bool
 vinsn_equal_p (vinsn_t x, vinsn_t y)
 {
+  rtx_equal_p_callback_function repcf;
+
   if (x == y)
     return true;
 
@@ -1766,16 +1489,17 @@ vinsn_equal_p (vinsn_t x, vinsn_t y)
   if (VINSN_HASH (x) != VINSN_HASH (y))
     return false;
 
+  repcf = targetm.sched.skip_rtx_p ? skip_unspecs_callback : NULL;
   if (VINSN_SEPARABLE_P (x)) 
     {
       /* Compare RHSes of VINSNs.  */
       gcc_assert (VINSN_RHS (x));
       gcc_assert (VINSN_RHS (y));
 
-      return sel_rtx_equal_p (VINSN_RHS (x), VINSN_RHS (y));
+      return rtx_equal_p_cb (VINSN_RHS (x), VINSN_RHS (y), repcf);
     }
 
-  return sel_rtx_equal_p (VINSN_PATTERN (x), VINSN_PATTERN (y));
+  return rtx_equal_p_cb (VINSN_PATTERN (x), VINSN_PATTERN (y), repcf);
 }
 
 /* Initialize EXPR.  */
@@ -1784,7 +1508,8 @@ init_expr (expr_t expr, vinsn_t vi, int spec, int use, int priority,
 	   int sched_times, int orig_bb_index, ds_t spec_done_ds,
 	   ds_t spec_to_check_ds, int orig_sched_cycle,
 	   VEC(expr_history_def, heap) *history, bool target_available, 
-           bool was_substituted, bool was_renamed, bool needs_spec_check_p)
+           bool was_substituted, bool was_renamed, bool needs_spec_check_p,
+           bool cant_move)
 {
   vinsn_attach (vi);
 
@@ -1807,6 +1532,7 @@ init_expr (expr_t expr, vinsn_t vi, int spec, int use, int priority,
   EXPR_WAS_SUBSTITUTED (expr) = was_substituted;
   EXPR_WAS_RENAMED (expr) = was_renamed;
   EXPR_NEEDS_SPEC_CHECK_P (expr) = needs_spec_check_p;
+  EXPR_CANT_MOVE (expr) = cant_move;
 }
 
 /* Make a copy of the expr FROM into the expr TO.  */
@@ -1836,7 +1562,8 @@ copy_expr (expr_t to, expr_t from)
 	     EXPR_SPEC_DONE_DS (from), EXPR_SPEC_TO_CHECK_DS (from), 
 	     EXPR_ORIG_SCHED_CYCLE (from), temp,
              EXPR_TARGET_AVAILABLE (from), EXPR_WAS_SUBSTITUTED (from), 
-             EXPR_WAS_RENAMED (from), EXPR_NEEDS_SPEC_CHECK_P (from));
+             EXPR_WAS_RENAMED (from), EXPR_NEEDS_SPEC_CHECK_P (from),
+             EXPR_CANT_MOVE (from));
 }
 
 /* Same, but the final expr will not ever be in av sets, so don't copy 
@@ -1848,34 +1575,14 @@ copy_expr_onside (expr_t to, expr_t from)
 	     EXPR_PRIORITY (from), EXPR_SCHED_TIMES (from), 0,
 	     EXPR_SPEC_DONE_DS (from), EXPR_SPEC_TO_CHECK_DS (from), 0, NULL,
 	     EXPR_TARGET_AVAILABLE (from), EXPR_WAS_SUBSTITUTED (from),
-	     EXPR_WAS_RENAMED (from), EXPR_NEEDS_SPEC_CHECK_P (from));
+	     EXPR_WAS_RENAMED (from), EXPR_NEEDS_SPEC_CHECK_P (from),
+             EXPR_CANT_MOVE (from));
 }
 
-/* Merge bits of FROM expr to TO expr.  When SPLIT_POINT is not NULL,
-   this is done along different paths.  */
-void
-merge_expr_data (expr_t to, expr_t from, insn_t split_point)
+/* Update target_available bits when merging exprs TO and FROM.  */
+static void
+update_target_availability (expr_t to, expr_t from, insn_t split_point)
 {
-  int i;
-  expr_history_def *phist;
-  
-  /* For now, we just set the spec of resulting expr to be minimum of the specs
-     of merged exprs.  */
-  if (EXPR_SPEC (to) > EXPR_SPEC (from))
-    EXPR_SPEC (to) = EXPR_SPEC (from);
-
-  if (split_point)
-    EXPR_USEFULNESS (to) += EXPR_USEFULNESS (from);
-  else
-    EXPR_USEFULNESS (to) = MAX (EXPR_USEFULNESS (to), 
-                                EXPR_USEFULNESS (from));
-
-  if (EXPR_PRIORITY (to) < EXPR_PRIORITY (from))
-    EXPR_PRIORITY (to) = EXPR_PRIORITY (from);
-
-  if (EXPR_SCHED_TIMES (to) > EXPR_SCHED_TIMES (from))
-    EXPR_SCHED_TIMES (to) = EXPR_SCHED_TIMES (from);
-
   if (EXPR_TARGET_AVAILABLE (to) < 0  
       || EXPR_TARGET_AVAILABLE (from) < 0)
     EXPR_TARGET_AVAILABLE (to) = -1;
@@ -1901,6 +1608,85 @@ merge_expr_data (expr_t to, expr_t from, insn_t split_point)
       else
         EXPR_TARGET_AVAILABLE (to) &= EXPR_TARGET_AVAILABLE (from);
     }
+}
+
+/* Update speculation bits when merging exprs TO and FROM.  */
+static void
+update_speculative_bits (expr_t to, expr_t from, insn_t split_point)
+{
+  ds_t old_to_ds, old_from_ds;
+
+  old_to_ds = EXPR_SPEC_DONE_DS (to);
+  old_from_ds = EXPR_SPEC_DONE_DS (from);
+    
+  EXPR_SPEC_DONE_DS (to) = ds_max_merge (old_to_ds, old_from_ds);
+  EXPR_SPEC_TO_CHECK_DS (to) |= EXPR_SPEC_TO_CHECK_DS (from);
+  EXPR_NEEDS_SPEC_CHECK_P (to) |= EXPR_NEEDS_SPEC_CHECK_P (from);
+
+  /* When merging e.g. control & data speculative exprs, or a control
+     speculative with a control&data speculative one, we really have 
+     to change vinsn too.  Also, when speculative status is changed,
+     we also need to record this as a transformation in expr's history.  */
+  if ((old_to_ds & SPECULATIVE) || (old_from_ds & SPECULATIVE))
+    {
+      old_to_ds = ds_get_speculation_types (old_to_ds);
+      old_from_ds = ds_get_speculation_types (old_from_ds);
+        
+      if (old_to_ds != old_from_ds)
+        {
+          ds_t record_ds;
+            
+          /* When both expressions are speculative, we need to change 
+             the vinsn first.  */
+          if ((old_to_ds & SPECULATIVE) && (old_from_ds & SPECULATIVE))
+            {
+              int res;
+                
+              res = speculate_expr (to, EXPR_SPEC_DONE_DS (to));
+              gcc_assert (res >= 0);
+            }
+
+          if (split_point != NULL)
+            {
+              /* Record the change with proper status.  */
+              record_ds = EXPR_SPEC_DONE_DS (to) & SPECULATIVE;
+              record_ds &= ~(old_to_ds & SPECULATIVE);
+              record_ds &= ~(old_from_ds & SPECULATIVE);
+                
+              insert_in_history_vect (&EXPR_HISTORY_OF_CHANGES (to), 
+                                      INSN_UID (split_point), TRANS_SPECULATION, 
+                                      EXPR_VINSN (from), EXPR_VINSN (to),
+                                      record_ds);
+            }
+        }
+    }
+}
+
+
+/* Merge bits of FROM expr to TO expr.  When SPLIT_POINT is not NULL,
+   this is done along different paths.  */
+void
+merge_expr_data (expr_t to, expr_t from, insn_t split_point)
+{
+  int i;
+  expr_history_def *phist;
+  
+  /* For now, we just set the spec of resulting expr to be minimum of the specs
+     of merged exprs.  */
+  if (EXPR_SPEC (to) > EXPR_SPEC (from))
+    EXPR_SPEC (to) = EXPR_SPEC (from);
+
+  if (split_point)
+    EXPR_USEFULNESS (to) += EXPR_USEFULNESS (from);
+  else
+    EXPR_USEFULNESS (to) = MAX (EXPR_USEFULNESS (to), 
+                                EXPR_USEFULNESS (from));
+
+  if (EXPR_PRIORITY (to) < EXPR_PRIORITY (from))
+    EXPR_PRIORITY (to) = EXPR_PRIORITY (from);
+
+  if (EXPR_SCHED_TIMES (to) > EXPR_SCHED_TIMES (from))
+    EXPR_SCHED_TIMES (to) = EXPR_SCHED_TIMES (from);
 
   if (EXPR_ORIG_BB_INDEX (to) != EXPR_ORIG_BB_INDEX (from))
     EXPR_ORIG_BB_INDEX (to) = 0;
@@ -1920,55 +1706,10 @@ merge_expr_data (expr_t to, expr_t from, insn_t split_point)
 
   EXPR_WAS_SUBSTITUTED (to) |= EXPR_WAS_SUBSTITUTED (from);
   EXPR_WAS_RENAMED (to) |= EXPR_WAS_RENAMED (to);
+  EXPR_CANT_MOVE (to) |= EXPR_CANT_MOVE (from);
 
-  {
-    ds_t old_to_ds, old_from_ds;
-
-    old_to_ds = EXPR_SPEC_DONE_DS (to);
-    old_from_ds = EXPR_SPEC_DONE_DS (from);
-    
-    EXPR_SPEC_DONE_DS (to) = ds_max_merge (old_to_ds, old_from_ds);
-    EXPR_SPEC_TO_CHECK_DS (to) |= EXPR_SPEC_TO_CHECK_DS (from);
-    EXPR_NEEDS_SPEC_CHECK_P (to) |= EXPR_NEEDS_SPEC_CHECK_P (from);
-
-    /* When merging e.g. control & data speculative exprs, or a control
-       speculative with a control&data speculative one, we really have 
-       to change vinsn too.  Also, when speculative status is changed,
-       we also need to record this as a transformation in expr's history.  */
-    if ((old_to_ds & SPECULATIVE) || (old_from_ds & SPECULATIVE))
-      {
-        old_to_ds = ds_get_speculation_types (old_to_ds);
-        old_from_ds = ds_get_speculation_types (old_from_ds);
-        
-        if (old_to_ds != old_from_ds)
-          {
-            ds_t record_ds;
-            
-            /* When both expressions are speculative, we need to change 
-               the vinsn first.  */
-            if ((old_to_ds & SPECULATIVE) && (old_from_ds & SPECULATIVE))
-              {
-                int res;
-                
-                res = speculate_expr (to, EXPR_SPEC_DONE_DS (to));
-                gcc_assert (res >= 0);
-              }
-
-            if (split_point != NULL)
-              {
-                /* Record the change with proper status.  */
-                record_ds = EXPR_SPEC_DONE_DS (to) & SPECULATIVE;
-                record_ds &= ~(old_to_ds & SPECULATIVE);
-                record_ds &= ~(old_from_ds & SPECULATIVE);
-                
-                insert_in_history_vect (&EXPR_HISTORY_OF_CHANGES (to), 
-                                        INSN_UID (split_point), TRANS_SPECULATION, 
-                                        EXPR_VINSN (from), EXPR_VINSN (to),
-                                        record_ds);
-              }
-          }
-      }
-  }
+  update_target_availability (to, from, split_point);
+  update_speculative_bits (to, from, split_point);
 }
 
 /* Merge bits of FROM expr to TO expr.  Vinsns in the exprs should correlate.  */
@@ -2146,21 +1887,68 @@ mark_unavailable_targets (av_set_t join_set, av_set_t av_set, regset lv_set)
 
 /* Av set functions.  */
 
+/* Return true if we think that EXPR2 must be before EXPR1 in av set.  */
+static inline bool
+expr_greater_p (expr_t expr1, expr_t expr2)
+{
+  int tmp;
+
+  tmp = EXPR_USEFULNESS (expr2) - EXPR_USEFULNESS (expr1);
+  if (tmp) 
+    return tmp > 0;
+  tmp = EXPR_PRIORITY (expr2) - EXPR_PRIORITY (expr1);
+  if (tmp) 
+    return tmp > 0;
+  return INSN_LUID (EXPR_INSN_RTX (expr2)) < INSN_LUID (EXPR_INSN_RTX (expr1));
+}
+
+/* Add a new element to av set SETP, having in mind the priority.  
+   Return the element added.  */
+static av_set_t 
+av_set_add_element (av_set_t *setp, expr_t expr)
+{
+  av_set_t *prevp = NULL, elem;
+
+  while (*setp && ! expr_greater_p (_AV_SET_EXPR (*setp), expr))
+    {
+      prevp = setp;
+      setp = &_AV_SET_NEXT (*setp);
+    }
+
+  if (prevp == NULL)
+    {
+      /* Insert at the beginning of the list.  */
+      _list_add (setp);
+      return *setp;
+    }
+
+  /* Insert somewhere in the middle.  */
+  elem = _list_alloc ();
+  _AV_SET_NEXT (elem) = _AV_SET_NEXT (*prevp);
+  _AV_SET_NEXT (*prevp) = elem;
+
+  return elem;
+}
+
 /* Add EXPR to SETP.  */
 void
 av_set_add (av_set_t *setp, expr_t expr)
 {
+  av_set_t elem;
+  
   gcc_assert (!INSN_NOP_P (EXPR_INSN_RTX (expr)));
-  _list_add (setp);
-  copy_expr (_AV_SET_EXPR (*setp), expr);
+  elem = av_set_add_element (setp, expr);
+  copy_expr (_AV_SET_EXPR (elem), expr);
 }
 
 /* Same, but do not copy EXPR.  */
 static void
 av_set_add_nocopy (av_set_t *setp, expr_t expr)
 {
-  _list_add (setp);
-  *_AV_SET_EXPR (*setp) = *expr;
+  av_set_t elem;
+
+  elem = av_set_add_element (setp, expr);
+  *_AV_SET_EXPR (elem) = *expr;
 }
 
 /* Remove expr pointed to by IP from the av_set.  */
@@ -2204,27 +1992,19 @@ av_set_lookup_and_remove (av_set_t *setp, vinsn_t sought_vinsn)
 
 /* Search for an expr in SET, such that it's equivalent to EXPR in the
    sense of vinsn_equal_p function of their vinsns, but not EXPR itself.
-   Returns NULL if no such expr is in SET was found.  Store in LATERP true
-   when other expression was found later than this, and false otherwise.  */
+   Returns NULL if no such expr is in SET was found.  */
 static expr_t
-av_set_lookup_other_equiv_expr (av_set_t set, expr_t expr, bool *laterp)
+av_set_lookup_other_equiv_expr (av_set_t set, expr_t expr)
 {
   expr_t cur_expr;
   av_set_iterator i;
-  bool temp = false;
 
   FOR_EACH_EXPR (cur_expr, i, set)
     {
       if (cur_expr == expr)
-        {
-          temp = true;
-          continue;
-        }
+        continue;
       if (vinsn_equal_p (EXPR_VINSN (cur_expr), EXPR_VINSN (expr)))
-        {
-          *laterp = temp;
-          return cur_expr;
-        }
+        return cur_expr;
     }
 
   return NULL;
@@ -2234,11 +2014,9 @@ av_set_lookup_other_equiv_expr (av_set_t set, expr_t expr, bool *laterp)
 expr_t
 merge_with_other_exprs (av_set_t *avp, av_set_iterator *ip, expr_t expr)
 {
-  bool later;
   expr_t expr2;
 
-  expr2 = av_set_lookup_other_equiv_expr (*avp, expr, &later);
-
+  expr2 = av_set_lookup_other_equiv_expr (*avp, expr);
   if (expr2 != NULL)
     {
       /* Reset target availability on merge, since taking it only from one
@@ -2279,6 +2057,54 @@ av_set_copy (av_set_t set)
   return res;
 }
 
+/* Join two av sets that do not have common elements and save the resulting
+   set in TOP.  FROMP will be null after this.  */
+static void 
+join_distinct_sets (av_set_t *top, av_set_t *fromp)
+{
+  av_set_t *oldp = fromp, from = *fromp;
+
+  while (from)
+    {
+      av_set_t prev = NULL, tmp = *top;
+      av_set_t next = _AV_SET_NEXT (from);
+      
+      while (tmp && ! expr_greater_p (_AV_SET_EXPR (tmp), 
+                                      _AV_SET_EXPR (from)))
+        {
+          prev = tmp;
+          tmp = _AV_SET_NEXT (tmp);
+        }
+      
+      _AV_SET_NEXT (from) = tmp;
+      if (prev == NULL)
+        *top = from;
+      else
+        _AV_SET_NEXT (prev) = from;
+
+      from = next;
+    }
+  
+  *oldp = NULL;
+}
+
+/* Truncate the set so it doesn't grow too much.  */
+void
+av_set_truncate (av_set_t set)
+{
+  int n = 0;
+  av_set_t prev;
+
+  while (set && ++n <= 9)
+    {
+      prev = set;
+      set = _AV_SET_NEXT (set);
+    }
+  if (set)
+    av_set_clear (&_AV_SET_NEXT (prev));
+}
+
+
 /* Makes set pointed to by TO to be the union of TO and FROM.  Clear av_set
    pointed to by FROMP afterwards.  */
 void
@@ -2299,9 +2125,7 @@ av_set_union_and_clear (av_set_t *top, av_set_t *fromp, insn_t insn)
 	}
     }
 
-  /* Connect FROMP to the end of the TOP.  */
-  *i.lp = *fromp;
-  *fromp  = NULL;
+  join_distinct_sets (top, fromp);
 }
 
 /* Same as above, but also update availability of target register in 
@@ -2312,7 +2136,6 @@ av_set_union_and_live (av_set_t *top, av_set_t *fromp, regset to_lv_set,
 {
   expr_t expr1;
   av_set_iterator i;
-  _list_t *oldlp;
   av_set_t in_both_set = NULL;
 
   /* Delete from TOP all expres, that present in FROMP.  */
@@ -2350,19 +2173,13 @@ av_set_union_and_live (av_set_t *top, av_set_t *fromp, regset to_lv_set,
         set_unavailable_target_for_expr (expr1, from_lv_set);
     }
 
-  /* Save the old pointer to the end of the list.  */
-  oldlp = i.lp;
-
   /* These expressions are not present in TOP.  Check liveness
      restrictions on TO_LV_SET.  */
   FOR_EACH_EXPR (expr1, i, *fromp)
     set_unavailable_target_for_expr (expr1, to_lv_set);
 
-  /* Connect FROMP and in_both_set to the end of the TOP.  */
-  *i.lp = in_both_set;
-  *oldlp = *fromp;
-  
-  *fromp = NULL;
+  join_distinct_sets (top, &in_both_set);
+  join_distinct_sets (top, fromp);
 }
 
 /* Clear av_set pointed to by SETP.  */
@@ -2833,7 +2650,8 @@ init_global_and_expr_for_insn (insn_t insn)
     /* Initialize INSN's expr.  */
     init_expr (INSN_EXPR (insn), vinsn_create (insn, force_unique_p), 0,
 	       REG_BR_PROB_BASE, INSN_PRIORITY (insn), 0, BLOCK_NUM (insn),
-	       spec_done_ds, 0, 0, NULL, true, false, false, false);
+	       spec_done_ds, 0, 0, NULL, true, false, false, false, 
+               CANT_MOVE (insn));
   }
 
   init_first_time_insn_data (insn);
@@ -2878,6 +2696,7 @@ finish_global_and_expr_insn (insn_t insn)
     {
       free_first_time_insn_data (insn);
       INSN_WS_LEVEL (insn) = 0;
+      CANT_MOVE (insn) = 0;
       
       /* We can no longer assert this, as vinsns of this insn could be 
          easily live in other insn's caches.  This should be changed to 
@@ -3664,12 +3483,18 @@ bool can_add_real_insns_p = true;
 static void
 extend_insn (void)
 {
+  int reserve;
+  
   sched_extend_target ();
   sched_deps_init (false);
 
   /* Extend data structures for insns from current region.  */
-  VEC_safe_grow_cleared (sel_insn_data_def, heap, s_i_d,
-			 sched_max_luid);
+  reserve = (sched_max_luid + 1 
+             - VEC_length (sel_insn_data_def, s_i_d));
+  if (reserve > 0 
+      && ! VEC_space (sel_insn_data_def, s_i_d, reserve))
+    VEC_safe_grow_cleared (sel_insn_data_def, heap, s_i_d,
+                           3 * sched_max_luid / 2);
 }
 
 /* Finalize data structures for insns from current region.  */
@@ -3694,11 +3519,16 @@ finish_insns (void)
 	  free_deps (&sid_entry->deps_context);
 	}
       if (EXPR_VINSN (&sid_entry->expr))
-	clear_expr (&sid_entry->expr);
+        {
+          clear_expr (&sid_entry->expr);
+          
+          /* Also, clear CANT_MOVE bit here, because we really don't want it
+             to be passed to the next region.  */
+          CANT_MOVE_BY_LUID (i) = 0;
+        }
     }
   
   VEC_free (sel_insn_data_def, heap, s_i_d);
-  deps_finish_d_i_d ();
 }
 
 /* An implementation of RTL_HOOKS_INSN_ADDED hook.  The hook is used for 
@@ -3826,7 +3656,7 @@ init_simplejump (insn_t insn)
 {
   init_expr (INSN_EXPR (insn), vinsn_create (insn, false), 0,
 	     REG_BR_PROB_BASE, 0, 0, 0, 0, 0, 0, NULL, true, false, false, 
-	     false);
+	     false, true);
   INSN_SEQNO (insn) = get_seqno_of_a_pred (insn);
   init_first_time_insn_data (insn);
 }
diff --git a/gcc/sel-sched-ir.h b/gcc/sel-sched-ir.h
index b651e53b0fb..036314c904a 100644
--- a/gcc/sel-sched-ir.h
+++ b/gcc/sel-sched-ir.h
@@ -167,6 +167,9 @@ struct _expr
 
   /* True when the expression was renamed.  */
   BOOL_BITFIELD was_renamed : 1;
+
+  /* True when expression can't be moved.  */
+  BOOL_BITFIELD cant_move : 1;
 };
 
 typedef struct _expr expr_def;
@@ -193,6 +196,7 @@ typedef expr_def *expr_t;
 #define EXPR_NEEDS_SPEC_CHECK_P(EXPR) ((EXPR)->needs_spec_check_p)
 #define EXPR_WAS_SUBSTITUTED(EXPR) ((EXPR)->was_substituted)
 #define EXPR_WAS_RENAMED(EXPR) ((EXPR)->was_renamed)
+#define EXPR_CANT_MOVE(EXPR) ((EXPR)->cant_move)
 
 /* Insn definition for list of original insns in find_used_regs.  */
 struct _def
@@ -359,10 +363,16 @@ struct _list_node
    we can't move them in sel-sched-ir.c.  */
 extern alloc_pool sched_lists_pool;
 
+static inline _list_t
+_list_alloc (void)
+{
+  return (_list_t) pool_alloc (sched_lists_pool);
+}
+
 static inline void
 _list_add (_list_t *lp)
 {
-  _list_t l = (_list_t) pool_alloc (sched_lists_pool);
+  _list_t l = _list_alloc ();
 
   _LIST_NEXT (l) = *lp;
   *lp = l;
@@ -1516,6 +1526,7 @@ extern bool av_set_is_in_p (av_set_t, vinsn_t);
 extern av_set_t av_set_copy (av_set_t);
 extern void av_set_union_and_clear (av_set_t *, av_set_t *, insn_t);
 extern void av_set_union_and_live (av_set_t *, av_set_t *, regset, regset, insn_t);
+extern void av_set_truncate (av_set_t);
 extern void av_set_clear (av_set_t *);
 extern void av_set_leave_one_nonspec (av_set_t *);
 extern expr_t av_set_element (av_set_t, int);
diff --git a/gcc/sel-sched.c b/gcc/sel-sched.c
index df258b6bf2b..7aba7790c31 100644
--- a/gcc/sel-sched.c
+++ b/gcc/sel-sched.c
@@ -358,7 +358,7 @@ static int sel_rank_for_schedule (const void *, const void *);
 static av_set_t find_sequential_best_exprs (bnd_t, expr_t, bool);
 
 static rtx get_dest_from_orig_ops (av_set_t);
-static basic_block generate_bookkeeping_insn (expr_t, insn_t, edge, edge);
+static basic_block generate_bookkeeping_insn (expr_t, edge, edge);
 static bool find_used_regs (insn_t, av_set_t, regset, struct reg_rename *, 
                             def_list_t *);
 static bool move_op (insn_t, av_set_t, rtx, expr_t);
@@ -1882,18 +1882,18 @@ moveup_expr (expr_t expr, insn_t through_insn, bool inside_insn_group,
             return MOVEUP_EXPR_NULL;
         }
 
-      if (CANT_MOVE (insn)
+      /* Don't move what we can't move.  */
+      if (EXPR_CANT_MOVE (expr)
 	  && BLOCK_FOR_INSN (through_insn) != BLOCK_FOR_INSN (insn))
-	/* Don't move what we can't move.  */
 	return MOVEUP_EXPR_NULL;
 
+      /* Don't move SCHED_GROUP instruction through anything.
+         If we don't force this, then it will be possible to start
+         scheduling a sched_group before all its dependencies are
+         resolved.
+         ??? Haifa deals with this issue by delaying the SCHED_GROUP
+         as late as possible through rank_for_schedule.  */
       if (SCHED_GROUP_P (insn))
-	/* Don't move SCHED_GROUP instruction through anything.
-	   If we don't force this, then it will be possible to start
-	   scheduling a sched_group before all its dependencies are
-	   resolved.
-	   ??? Haifa deals with this issue by delaying the SCHED_GROUP
-	   as late as possible through rank_for_schedule ().  */
 	return MOVEUP_EXPR_NULL;
     }
   else
@@ -2506,6 +2506,9 @@ compute_av_set_at_bb_end (insn_t insn, ilist_t p, int ws)
     {
       av_set_iterator i;
       expr_t expr;
+
+      /* Truncate av set first.  */
+      av_set_truncate (av1);
       
       /* Increase the spec attribute of all EXPR'es that didn't come 
 	 from all successors.  */
@@ -3352,6 +3355,13 @@ fill_vec_av_set (av_set_t av, blist_t bnds, fence_t fence,
           continue;
         }
 
+      /* Do not pass too much stuff to max_issue and tick_check_p.  */
+      if (n >= 9)
+        {
+          VEC_unordered_remove (expr_t, vec_av_set, n);
+          continue;
+        }
+      
       /* Set number of sched_next insns (just in case there 
          could be several).  */
       if (FENCE_SCHED_NEXT (fence))
@@ -3956,10 +3966,7 @@ find_best_expr (av_set_t *av_vliw_ptr, blist_t bnds, fence_t fence,
       can_issue_more = invoke_aftermath_hooks (fence, EXPR_INSN_RTX (best),
                                                can_issue_more);
       if (can_issue_more == 0)
-        {
-          best = NULL;
-          *pneed_stall = 1;
-        }
+        *pneed_stall = 1;
     }
   
   if (sched_verbose >= 2)
@@ -4017,232 +4024,195 @@ emit_insn_from_expr_after (expr_t expr, vinsn_t vinsn, int seqno,
                                        place_to_insert);
 }
 
-/* Generate a bookkeeping copy of "REG = CUR_EXPR" insn at JOIN_POINT on the 
-   ingoing path(s) to E2->dest, other than from E1->src (there could be some 
-   empty blocks between E1->src and E2->dest).  If there is only one such path 
-   and bookkeeping copy can be created in the last block, that is on this path,
-   bookkeeping instruction is inserted at the end of this block.  Otherwise, 
-   the function splits E2->dest bb on the two and emits the bookkeeping copy in
-   the upper bb, redirecting all other paths to the lower bb and returns the
-   newly created bb, which is the lower bb. 
-   All scheduler data is initialized for the newly created insn.  */
-static basic_block
-generate_bookkeeping_insn (expr_t c_expr, insn_t join_point, edge e1, edge e2)
-{
-  basic_block src, bb = e2->dest;
-  basic_block new_bb = NULL;
-  insn_t src_end = NULL_RTX;
-  insn_t place_to_insert = NULL_RTX;
-  /* Save the original destination of E1.  */
-  basic_block empty_bb = e1->dest;
-  int new_seqno = INSN_SEQNO (join_point);
-  basic_block other_block = NULL;
-  bool need_to_exchange_data_sets = false;
-  insn_t new_insn;
+/* Return TRUE if BB can hold bookkeeping code.  */
+static bool
+block_valid_for_bookkeeping_p (basic_block bb)
+{
+  insn_t bb_end = BB_END (bb);
 
-  if (sched_verbose >= 4)
-    sel_print ("Generating bookkeeping insn (%d->%d)\n", e1->src->index, 
-               e2->dest->index);
+  if (!in_current_region_p (bb) || EDGE_COUNT (bb->succs) > 1)
+    return false;
 
-  /* Find a basic block that can hold bookkeeping.  If it can be found, do not
-     create new basic block, but insert bookkeeping there.  */
-  if (e1 == e2)
+  if (INSN_P (bb_end))
     {
-      other_block = 
-        EDGE_COUNT (e1->dest->preds) > 2
-          ? NULL
-          : EDGE_PRED (e1->dest, 0) == e1
-            ? EDGE_PRED (e1->dest, 1)->src
-            : EDGE_PRED (e1->dest, 0)->src;
+      if (INSN_SCHED_TIMES (bb_end) > 0)
+	return false;
     }
   else
+    gcc_assert (NOTE_INSN_BASIC_BLOCK_P (bb_end));
+
+  return true;
+}
+
+/* Attempt to find a block that can hold bookkeeping code for path(s) incoming
+   into E2->dest, except from E1->src (there may be a sequence of empty basic
+   blocks between E1->src and E2->dest).  Return found block, or NULL if new
+   one must be created.  */
+static basic_block
+find_block_for_bookkeeping (edge e1, edge e2)
+{
+  basic_block candidate_block = NULL;
+  edge e;
+
+  /* Loop over edges from E1 to E2, inclusive.  */
+  for (e = e1; ; e = EDGE_SUCC (e->dest, 0))
     {
-      edge iter_edge = e1;
-      bool search_more = true;
-      do
-        {
-          search_more = iter_edge != e2;
-          /* There must be only one edge that enters path from e1 to e2 
-             from aside to be able to create bookkeeping in existing block.  */
-          if (EDGE_COUNT (iter_edge->dest->preds) == 2)
-            {
-              if (other_block == NULL)
-                other_block = 
-                  EDGE_PRED (iter_edge->dest, 0) == iter_edge
-                    ? EDGE_PRED (iter_edge->dest, 1)->src
-                    : EDGE_PRED (iter_edge->dest, 0)->src;
-              else
-                {
-                  /* Found additional edge leading to path from e1 to e2 
-                     from aside.  */
-                  other_block = NULL;
-                  break;
-                }
-            }
-          else if (EDGE_COUNT (iter_edge->dest->preds) > 2)
-            {
-              /* Several edges leading to path from e1 to e2 from aside.  */
-              other_block = NULL;
-              break;
-            }
-          iter_edge = EDGE_SUCC (iter_edge->dest, 0);
-        }
-      while (search_more);
+      if (EDGE_COUNT (e->dest->preds) == 2)
+	{
+	  if (candidate_block == NULL)
+	    candidate_block = (EDGE_PRED (e->dest, 0) == e
+			       ? EDGE_PRED (e->dest, 1)->src
+			       : EDGE_PRED (e->dest, 0)->src);
+	  else
+	    /* Found additional edge leading to path from e1 to e2
+	       from aside.  */
+	    return NULL;
+	}
+      else if (EDGE_COUNT (e->dest->preds) > 2)
+	/* Several edges leading to path from e1 to e2 from aside.  */
+	return NULL;
+
+      if (e == e2)
+	return (block_valid_for_bookkeeping_p (candidate_block)
+		? candidate_block
+		: NULL);
     }
+  gcc_unreachable ();
+}
 
-  /* sched_split_block () can emit an unnecessary note if the following isn't
-     true.  */
-  gcc_assert (bb_note (bb) != BB_END (bb));
+/* Create new basic block for bookkeeping code for path(s) incoming into
+   E2->dest, except from E1->src.  Return created block.  */
+static basic_block
+create_block_for_bookkeeping (edge e1, edge e2)
+{
+  basic_block new_bb, bb = e2->dest;
 
-  /* Explore, if we can insert bookkeeping into OTHER_BLOCK in case edge
-     OTHER_BLOCK -> BB is fallthrough, meaning there is no jump there.  */
-  if (EDGE_COUNT (bb->preds) == 2
-      && other_block
-      && in_current_region_p (other_block))
+  /* Check that we don't spoil the loop structure.  */
+  if (current_loop_nest)
     {
-      /* SRC is the block, in which we possibly can insert bookkeeping insn
-         without creating new basic block.  It is the other (than E2->SRC)
-         predecessor block of BB.  */
-      src = other_block;
+      basic_block latch = current_loop_nest->latch;
 
-      /* Instruction, after which we would try to insert bookkeeping insn.  */
-      src_end = BB_END (src);
+      /* We do not split header.  */
+      gcc_assert (e2->dest != current_loop_nest->header);
 
-      if (INSN_P (src_end))
-	{
-	  if (control_flow_insn_p (src_end)
-              /* It might be scheduled, thus making this illegal.  */
-              || INSN_SCHED_TIMES (src_end) > 0)
-	    src = NULL;
-	}
-      else
-	gcc_assert (NOTE_INSN_BASIC_BLOCK_P (src_end));
+      /* We do not redirect the only edge to the latch block.  */
+      gcc_assert (e1->dest != latch
+		  || !single_pred_p (latch)
+		  || e1 != single_pred_edge (latch));
     }
+
+  can_add_real_insns_p = false;
+
+  /* Split BB to insert BOOK_INSN there.  */
+  new_bb = sched_split_block (bb, NULL);
+
+  /* Move note_list from the upper bb.  */
+  gcc_assert (BB_NOTE_LIST (new_bb) == NULL_RTX);
+  BB_NOTE_LIST (new_bb) = BB_NOTE_LIST (bb);
+  BB_NOTE_LIST (bb) = NULL_RTX;
+
+  gcc_assert (e2->dest == bb);
+
+  can_add_real_insns_p = true;
+  insn_init.what = INSN_INIT_WHAT_INSN;
+
+  /* Skip block for bookkeeping copy when leaving E1->src.  */
+  if (e1->flags & EDGE_FALLTHRU)
+    sel_redirect_edge_and_branch_force (e1, new_bb);
   else
-    src = NULL;
-    
-  if (!src)
-    {
-      /* Check that we don't spoil the loop structure.  */
-      if (current_loop_nest)
-        {
-          basic_block latch = current_loop_nest->latch;
+    sel_redirect_edge_and_branch (e1, new_bb);
 
-          /* We do not split header.  */
-          gcc_assert (bb != current_loop_nest->header);
+  gcc_assert (e1->dest == new_bb);
+  gcc_assert (sel_bb_empty_p (bb));
 
-          /* We do not redirect the only edge to the latch block.  */
-          gcc_assert (e1->dest != latch
-                      || !single_pred_p (latch)
-                      || e1 != single_pred_edge (latch));
-        }
+  return bb;
+}
 
-      /* Explore, if we can insert bookkeeping into OTHER_BLOCK in case edge
-         OTHER_BLOCK -> BB is not fallthrough, meaning there is jump there.  */
-      if (other_block
-          && in_current_region_p (other_block)
-          && EDGE_COUNT (other_block->succs) == 1
-          && (e1->flags & EDGE_FALLTHRU))
-        {
-          insn_t src_begin;
+/* Return insn after which we must insert bookkeeping code for path(s) incoming
+   into E2->dest, except from E1->src.  */
+static insn_t
+find_place_for_bookkeeping (edge e1, edge e2)
+{
+  insn_t place_to_insert;
+  /* Find a basic block that can hold bookkeeping.  If it can be found, do not
+     create new basic block, but insert bookkeeping there.  */
+  basic_block book_block = find_block_for_bookkeeping (e1, e2);
 
-          get_ebb_head_tail (other_block, other_block, &src_begin, &src_end);
+  if (!book_block)
+    book_block = create_block_for_bookkeeping (e1, e2);
 
-          gcc_assert (control_flow_insn_p (src_end));
+  place_to_insert = BB_END (book_block);
 
-          if (/* Jump was scheduled.  */
-              INSN_SCHED_TIMES (src_end) > 0
-              /* This is a floating bb header.  */
-              || (src_end == src_begin
-                  && IN_CURRENT_FENCE_P (src_end)))
-            new_bb = NULL;
-          else
-            {
-              new_bb = other_block;
-              place_to_insert = PREV_INSN (src_end);
-              new_seqno = INSN_SEQNO (src_end);
-              insn_init.what = INSN_INIT_WHAT_INSN;
-            }
-        }
-      else
-        new_bb = NULL;
+  /* If basic block ends with a jump, insert bookkeeping code right before it.  */
+  if (INSN_P (place_to_insert) && control_flow_insn_p (place_to_insert))
+    place_to_insert = PREV_INSN (place_to_insert);
 
-      if (!new_bb)
-        {
-          /* We need to create a new basic block for bookkeeping insn.  */
-          can_add_real_insns_p = false;
+  return place_to_insert;
+}
 
-          /* Split the head of the BB to insert BOOK_INSN there.  */
-          new_bb = sched_split_block (bb, NULL);
+/* Insert bookkeeping copy of C_EXPS's insn after PLACE_TO_INSERT, assigning
+   NEW_SEQNO to it.  Return created insn.  */
+static insn_t
+emit_bookkeeping_insn (insn_t place_to_insert, expr_t c_expr, int new_seqno)
+{
+  rtx new_insn_rtx = create_copy_of_insn_rtx (EXPR_INSN_RTX (c_expr));
 
-          /* Move note_list from the upper bb.  */
-          gcc_assert (BB_NOTE_LIST (new_bb) == NULL_RTX);
-          BB_NOTE_LIST (new_bb) = BB_NOTE_LIST (bb);
-          BB_NOTE_LIST (bb) = NULL_RTX;
+  vinsn_t new_vinsn
+    = create_vinsn_from_insn_rtx (new_insn_rtx,
+				  VINSN_UNIQUE_P (EXPR_VINSN (c_expr)));
 
-          gcc_assert (e2->dest == bb);
+  insn_t new_insn = emit_insn_from_expr_after (c_expr, new_vinsn, new_seqno,
+					       place_to_insert);
 
-          can_add_real_insns_p = true;
-          insn_init.what = INSN_INIT_WHAT_INSN;
+  INSN_SCHED_TIMES (new_insn) = 0;
+  bitmap_set_bit (current_copies, INSN_UID (new_insn));
 
-          /* Make a jump skipping bookkeeping copy.  */
-          if (e1->flags & EDGE_FALLTHRU)
-            sel_redirect_edge_and_branch_force (e1, new_bb);
-          else
-            sel_redirect_edge_and_branch (e1, new_bb);
+  return new_insn;
+}
+
+/* Generate a bookkeeping copy of C_EXPR's insn for path(s) incoming into to
+   E2->dest, except from E1->src (there may be a sequence of empty blocks
+   between E1->src and E2->dest).  Return block containing the copy.
+   All scheduler data is initialized for the newly created insn.  */
+static basic_block
+generate_bookkeeping_insn (expr_t c_expr, edge e1, edge e2)
+{
+  insn_t join_point, place_to_insert, next, new_insn;
+  int new_seqno;
+  bool need_to_exchange_data_sets;
 
-          gcc_assert (e1->dest == new_bb);
-          gcc_assert (sel_bb_empty_p (bb));
+  if (sched_verbose >= 4)
+    sel_print ("Generating bookkeeping insn (%d->%d)\n", e1->src->index,
+	       e2->dest->index);
 
-          place_to_insert = BB_END (bb);
-        }
-    }
-  else
-    place_to_insert = src_end;
+  join_point = sel_bb_head (e2->dest);
+  place_to_insert = find_place_for_bookkeeping (e1, e2);
+  need_to_exchange_data_sets
+    = sel_bb_empty_p (BLOCK_FOR_INSN (place_to_insert));
 
-  /* Remove unreachable empty blocks.  */
-  while (EDGE_COUNT (empty_bb->preds) == 0)
-    {
-      basic_block next_bb = empty_bb->next_bb;
-      sel_remove_empty_bb (empty_bb, false, true);
-      empty_bb = next_bb;
-    }
+  /* Check if we are about to insert bookkeeping copy before a jump, and use
+     jump's seqno for the copy; otherwise, use JOIN_POINT's seqno.  */
+  next = NEXT_INSN (place_to_insert);
 
-  {
-    rtx new_insn_rtx;
-    vinsn_t new_vinsn;
-
-    need_to_exchange_data_sets
-      = sel_bb_empty_p (BLOCK_FOR_INSN (place_to_insert));
-
-    new_insn_rtx = create_copy_of_insn_rtx (EXPR_INSN_RTX (c_expr));
-    new_vinsn = create_vinsn_from_insn_rtx (new_insn_rtx, 
-					    VINSN_UNIQUE_P (EXPR_VINSN (c_expr)));
-    new_insn = emit_insn_from_expr_after (c_expr, new_vinsn, 
-                                          new_seqno, place_to_insert);
-
-    INSN_SCHED_TIMES (new_insn) = 0;
-    bitmap_set_bit (current_copies, INSN_UID (new_insn));
-
-    /* When inserting bookkeeping insn in new block, av sets should be
-       following: old basic block (that now holds bookkeeping) data sets are
-       the same as was before generation of bookkeeping, and new basic block
-       (that now hold all other insns of old basic block) data sets are
-       invalid.  So exchange data sets for these basic blocks as sel_split_block
-       mistakenly exchanges them in this case.  Cannot do it earlier because
-       when single instruction is added to new basic block it should hold NULL
-       lv_set.  */
-    if (need_to_exchange_data_sets)
-      exchange_data_sets (BLOCK_FOR_INSN (new_insn),
-                          BLOCK_FOR_INSN (join_point));
-
-    gcc_assert ((src == NULL && BB_END (bb) == new_insn
-		 && sel_bb_head_p (new_insn))
-                || (src == NULL && control_flow_insn_p (BB_END (other_block))
-                    && PREV_INSN (BB_END (other_block)) == new_insn
-                    && INSN_SCHED_TIMES (BB_END (other_block)) == 0)
-		|| BB_END (src) == new_insn);
-  }
+  if (INSN_P (next) && JUMP_P (next)
+      && BLOCK_FOR_INSN (next) == BLOCK_FOR_INSN (place_to_insert))
+    new_seqno = INSN_SEQNO (next);
+  else
+    new_seqno = INSN_SEQNO (join_point);
+
+  new_insn = emit_bookkeeping_insn (place_to_insert, c_expr, new_seqno);
+
+  /* When inserting bookkeeping insn in new block, av sets should be
+     following: old basic block (that now holds bookkeeping) data sets are
+     the same as was before generation of bookkeeping, and new basic block
+     (that now hold all other insns of old basic block) data sets are
+     invalid.  So exchange data sets for these basic blocks as sel_split_block
+     mistakenly exchanges them in this case.  Cannot do it earlier because
+     when single instruction is added to new basic block it should hold NULL
+     lv_set.  */
+  if (need_to_exchange_data_sets)
+    exchange_data_sets (BLOCK_FOR_INSN (new_insn),
+			BLOCK_FOR_INSN (join_point));
 
   stat_bookkeeping_copies++;
   return BLOCK_FOR_INSN (new_insn);
@@ -4725,7 +4695,7 @@ advance_state_on_fence (fence_t fence, insn_t insn)
 
 /* Update FENCE on which INSN was scheduled and this INSN, too.  */
 static void
-update_fence_and_insn (fence_t fence, insn_t insn)
+update_fence_and_insn (fence_t fence, insn_t insn, int need_stall)
 {
   bool asm_p;
   
@@ -4761,7 +4731,7 @@ update_fence_and_insn (fence_t fence, insn_t insn)
 
   /* Change these fields last, as they're used above.  */
   FENCE_AFTER_STALL_P (fence) = 0;
-  if (asm_p)
+  if (asm_p || need_stall)
     advance_one_cycle (fence);
   
   /* Indicate that we've scheduled something on this fence.  */
@@ -4848,6 +4818,20 @@ schedule_expr_on_boundary (bnd_t bnd, expr_t expr_vliw, int seqno)
   return insn;
 }
 
+/* Stall for N cycles on FENCE.  */
+static void
+stall_for_cycles (fence_t fence, int n)
+{
+  int could_more;
+              
+  could_more = FENCE_ISSUED_INSNS (fence) < issue_rate;
+  while (n--)
+    advance_one_cycle (fence);
+  if (could_more)
+    FENCE_AFTER_STALL_P (fence) = 1;
+}
+
+
 /* Gather a parallel group of insns at FENCE and assign their seqno 
    to SEQNO.  All scheduled insns are gathered in SCHEDULED_INSNS_TAILPP 
    list for later recalculation of seqnos.  */
@@ -4895,23 +4879,14 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp)
       do
         {
           expr_vliw = find_best_expr (&av_vliw, bnds, fence, &need_stall);
-          if (need_stall)
+          if (!expr_vliw && need_stall)
             {
               /* All expressions required a stall.  Do not recompute av sets
                  as we'll get the same answer (modulo the insns between
                  the fence and its boundary, which will be available for 
                  pipelining).  */
-              int could_more;
-              
-              gcc_assert (! expr_vliw && ! stall_iterations);
-              could_more = FENCE_ISSUED_INSNS (fence) < issue_rate;
-              while (need_stall--)
-                advance_one_cycle (fence);
-              need_stall = 1;
-              if (could_more)
-                FENCE_AFTER_STALL_P (fence) = 1;
-
-              stall_iterations++;
+              gcc_assert (! expr_vliw && stall_iterations < 2);
+	      stall_for_cycles (fence, need_stall);
               was_stall++;
             }
         }
@@ -4940,7 +4915,7 @@ fill_insns (fence_t fence, int seqno, ilist_t **scheduled_insns_tailpp)
 	    }
           
           insn = schedule_expr_on_boundary (bnd, expr_vliw, seqno);
-          update_fence_and_insn (fence, insn);
+          update_fence_and_insn (fence, insn, need_stall);
           bnds_tailp = update_boundaries (bnd, insn, bndsp, bnds_tailp);
 
 	  /* Add insn to the list of scheduled on this cycle instructions.  */
@@ -5381,7 +5356,7 @@ move_op_at_first_insn (insn_t insn, cmpd_local_params_p lparams,
          top level of the move_op.  */
       if (lparams->e1 
           && sel_num_cfg_preds_gt_1 (insn))
-        book_block = generate_bookkeeping_insn (sparams->c_expr, insn, 
+        book_block = generate_bookkeeping_insn (sparams->c_expr,
                                                 lparams->e1, lparams->e2);
       /* Update data sets for the current insn.  */
       if (lparams->call_update_data_sets_on_nop)
@@ -5477,6 +5452,7 @@ move_op_at_first_insn (insn_t insn, cmpd_local_params_p lparams,
 	     next basic block of XBB and this jump can be safely removed.  */
 	  && in_current_region_p (xbb->prev_bb)
 	  && jump_leads_only_to_bb_p (BB_END (xbb->prev_bb), xbb->next_bb)
+	  && INSN_SCHED_TIMES (BB_END (xbb->prev_bb)) == 0
 	  /* Also this jump is not at the scheduling boundary.  */
 	  && !IN_CURRENT_FENCE_P (BB_END (xbb->prev_bb)))
 	{
@@ -6174,11 +6150,9 @@ sel_region_init (int rgn)
   sched_deps_init (false);
 
   /* Initialize haifa data.  */
-  {
-    rgn_setup_sched_infos ();
-    sel_set_sched_flags ();
-    haifa_init_h_i_d (bbs, NULL, NULL, NULL);
-  }
+  rgn_setup_sched_infos ();
+  sel_set_sched_flags ();
+  haifa_init_h_i_d (bbs, NULL, NULL, NULL);
 
   sel_compute_priorities (rgn);
   init_deps_global ();
@@ -6189,9 +6163,6 @@ sel_region_init (int rgn)
 
   VEC_free (basic_block, heap, bbs);
 
-  /* Finalize haifa-specific data.  */
-  haifa_finish_h_i_d ();
-
   blocks_to_reschedule = BITMAP_ALLOC (NULL);
 
   /* Purge meaningless empty blocks in the middle of a region.  */
@@ -6275,254 +6246,272 @@ simplify_changed_insns (void)
     }
 }
 
-/* Free the scheduling data for the current region.  */
+/* Find boundaries of the EBB starting from basic block BB, marking blocks of
+   this EBB in SCHEDULED_BLOCKS and appropriately filling in HEAD, TAIL,
+   PREV_HEAD, and NEXT_TAIL fields of CURRENT_SCHED_INFO structure.  */
 static void
-sel_region_finish (void)
+find_ebb_boundaries (basic_block bb, bitmap scheduled_blocks)
 {
-  int i;
+  insn_t head, tail;
+  basic_block bb1 = bb;
+  if (sched_verbose >= 2)
+    sel_print ("Finishing schedule in bbs: ");
 
-  simplify_changed_insns ();
-  sel_finish_new_insns ();
-  sched_finish_ready_list ();
-  free_nop_pool ();
+  do
+    {
+      bitmap_set_bit (scheduled_blocks, BLOCK_TO_BB (bb1->index));
+      if (sched_verbose >= 2)
+	sel_print ("%d; ", bb1->index);
+    }
+  while (!bb_ends_ebb_p (bb1) && (bb1 = bb_next_bb (bb1)));
 
-  /* Free the vectors.  */
-  if (vec_av_set)
-    VEC_free (expr_t, heap, vec_av_set);
-  BITMAP_FREE (current_copies);
-  BITMAP_FREE (current_originators);
-  BITMAP_FREE (code_motion_visited_blocks);
-  free_blocked_exprs ();
+  if (sched_verbose >= 2)
+    sel_print ("\n");
 
-  /* If LV_SET of the region head should be updated, do it now because
-     there will be no other chance.  */
-  {
-    succ_iterator si;
-    insn_t insn;
+  get_ebb_head_tail (bb, bb1, &head, &tail);
 
-    FOR_EACH_SUCC_1 (insn, si, bb_note (EBB_FIRST_BB (0)),
-                     SUCCS_NORMAL | SUCCS_SKIP_TO_LOOP_EXITS)
-      {
-	basic_block bb = BLOCK_FOR_INSN (insn);
+  current_sched_info->head = head;
+  current_sched_info->tail = tail;
+  current_sched_info->prev_head = PREV_INSN (head);
+  current_sched_info->next_tail = NEXT_INSN (tail);
+}
 
-	if (!BB_LV_SET_VALID_P (bb))
-	  compute_live (insn);
-      }
-  }
+/* Regenerate INSN_SCHED_CYCLEs for insns of current EBB.  */
+static void
+reset_sched_cycles_in_current_ebb (void)
+{
+  int last_clock = 0;
+  int haifa_last_clock = -1;
+  int haifa_clock = 0;
+  insn_t insn;
 
-  /* Emulate the Haifa scheduler for bundling.  */
-  if (reload_completed)
+  if (targetm.sched.md_init)
     {
-      rtx insn, head, tail;
-      int clock = 0, last_clock = 0;  
-      bitmap scheduled_blocks;
+      /* None of the arguments are actually used in any target.
+	 NB: We should have md_reset () hook for cases like this.  */
+      targetm.sched.md_init (sched_dump, sched_verbose, -1);
+    }
 
-      scheduled_blocks = BITMAP_ALLOC (NULL);
+  state_reset (curr_state);
+  advance_state (curr_state);
 
-      for (i = 0; i < current_nr_blocks; i++)
-        {
-          basic_block bb = EBB_FIRST_BB (i), bb1;
+  for (insn = current_sched_info->head; insn != current_sched_info->next_tail;
+       insn = NEXT_INSN (insn))
+    {
+      int cost, haifa_cost;
+      int sort_p;
+      bool asm_p;
+      int clock;
 
-          if (bitmap_bit_p (scheduled_blocks, i))
-            continue;
+      if (!INSN_P (insn))
+	continue;
 
-	  /* While pipelining outer loops, skip bundling for loop 
-             preheaders.  Those will be rescheduled in the outer loop.  */
-	  if (sel_is_loop_preheader_p (bb))
-	    continue;
+      asm_p = false;
+      clock = INSN_SCHED_CYCLE (insn);
 
-          if (sched_verbose >= 2)
-            sel_print ("Finishing schedule in bbs: ");
-          
-          bb1 = bb;
-          do
-            {
-              bitmap_set_bit (scheduled_blocks, BLOCK_TO_BB (bb1->index));
-              if (sched_verbose >= 2)
-                sel_print ("%d; ", bb1->index);
-            }
-          while (!bb_ends_ebb_p (bb1) && (bb1 = bb_next_bb (bb1)));
-          
-          if (sched_verbose >= 2)
-            sel_print ("\n");
+      cost = clock - last_clock;
 
-          get_ebb_head_tail (bb, bb1, &head, &tail);
-          if (no_real_insns_p (head, tail))
-            continue;
+      /* Initialize HAIFA_COST.  */
+      if (recog_memoized (insn) < 0)
+	{
+	  asm_p = INSN_ASM_P (insn);
 
-          current_sched_info->prev_head = PREV_INSN (head);
-          current_sched_info->next_tail = NEXT_INSN (tail);
-  
-	  if (reset_sched_cycles_p)
-	    {
-	      int last_clock = 0;
-	      int haifa_last_clock = -1;
-	      int haifa_clock = 0;
-	      insn_t next_tail = current_sched_info->next_tail;
+	  if (asm_p)
+	    /* This is asm insn which *had* to be scheduled first
+	       on the cycle.  */
+	    haifa_cost = 1;
+	  else
+	    /* This is a use/clobber insn.  It should not change 
+	       cost.  */
+	    haifa_cost = 0;
+	}
+      else
+	{
+	  state_t tmp_state = alloca (dfa_state_size);
+
+	  memcpy (tmp_state, curr_state, dfa_state_size);
+	  haifa_cost = state_transition (tmp_state, insn);
+
+	  /* ??? We can't assert anything about cost here yet,
+	     because sometimes our scheduler gets out of sync with
+	     Haifa.
+	     This is to be fixed.  */
+	  if (haifa_cost == 0)
+	    haifa_cost = 1;
+	  else if (haifa_cost < 0)
+	    haifa_cost = 0;
+	}
 
-	      if (targetm.sched.md_init)
-		{
-		  /* None of the arguments are actually used in any target.
+      /* Stall for whatever cycles we've stalled before.  */
+      if (INSN_AFTER_STALL_P (insn) && cost > haifa_cost)
+	haifa_cost = cost;
 
-		  NB: We should have md_reset () hook for cases like this.  */
-		  targetm.sched.md_init (sched_dump, sched_verbose, -1);
-		}
+      if (haifa_cost > 0)
+	{
+	  int i = haifa_cost;
 
-	      state_reset (curr_state);
+	  while (i--)
+	    {
 	      advance_state (curr_state);
+	      if (sched_verbose >= 2)
+		sel_print ("advance_state (state_transition)\n");
+	    }
 
-	      for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
-		{
-		  int cost, haifa_cost;
-		  int sort_p;
-		  bool asm_p;
-		  int clock;
+	  haifa_clock += haifa_cost;
+	}
+      else
+	gcc_assert (haifa_cost == 0);
 
-		  if (!INSN_P (insn))
-		    continue;
+      if (sched_verbose >= 2)
+	sel_print ("Haifa cost for insn %d: %d\n", INSN_UID (insn), haifa_cost);
 
-		  asm_p = false;
-		  clock = INSN_SCHED_CYCLE (insn);
+      if (targetm.sched.dfa_new_cycle)
+	while (targetm.sched.dfa_new_cycle (sched_dump, sched_verbose, insn,
+					    haifa_last_clock, haifa_clock,
+					    &sort_p))
+	  {
+	    advance_state (curr_state);
+	    haifa_clock++;
+	    if (sched_verbose >= 2)
+	      sel_print ("advance_state (dfa_new_cycle)\n");
+	  }
 
-		  cost = clock - last_clock;
+      if (recog_memoized (insn) >= 0)
+	{
+	  cost = state_transition (curr_state, insn);
+	  gcc_assert (cost < 0);
+	}
 
-		  /* Initialize HAIFA_COST.  */
-		  if (recog_memoized (insn) < 0)
-		    {
-		      asm_p = INSN_ASM_P (insn);
-
-		      if (asm_p)
-			/* This is asm insn which *had* to be scheduled first
-			   on the cycle.  */
-			haifa_cost = 1;
-		      else
-			/* This is a use/clobber insn.  It should not change 
-			   cost.  */
-			haifa_cost = 0;
-		    }
-		  else
-		    {
-		      state_t tmp_state = alloca (dfa_state_size);
-
-		      memcpy (tmp_state, curr_state, dfa_state_size);
-		      haifa_cost = state_transition (tmp_state, insn);
-
-		      /* ??? We can't assert anything about cost here yet,
-			 because sometimes our scheduler gets out of sync with
-			 Haifa.
-			 This is to be fixed.  */
-		      if (haifa_cost == 0)
-			haifa_cost = 1;
-		      else if (haifa_cost < 0)
-			haifa_cost = 0;
-		    }
+      if (targetm.sched.variable_issue)
+	targetm.sched.variable_issue (sched_dump, sched_verbose, insn, 0);
 
-		  /* Stall for whatever cycles we've stalled before.  */
-		  if (INSN_AFTER_STALL_P (insn) && cost > haifa_cost)
-		    haifa_cost = cost;
+      INSN_SCHED_CYCLE (insn) = haifa_clock;
 
-		  if (haifa_cost > 0)
-		    {
-		      int i = haifa_cost;
+      last_clock = clock;
+      haifa_last_clock = haifa_clock;
+    }
+}
+
+/* Put TImode markers on insns starting a new issue group.  */
+static void
+put_TImodes (void)
+{
+  int last_clock = -1;
+  insn_t insn;
 
-		      while (i--)
-			{
-			  advance_state (curr_state);
-                          if (sched_verbose >= 2)
-                            sel_print ("advance_state (state_transition)\n");
-			}
+  for (insn = current_sched_info->head; insn != current_sched_info->next_tail;
+       insn = NEXT_INSN (insn))
+    {
+      int cost, clock;
 
-		      haifa_clock += haifa_cost;
-		    }
-		  else
-		    gcc_assert (haifa_cost == 0);
-
-                  if (sched_verbose >= 2)
-                    sel_print ("Haifa cost for insn %d: %d\n", 
-                               INSN_UID (insn), haifa_cost);
-
-		  if (targetm.sched.dfa_new_cycle)
-		    while (targetm.sched.dfa_new_cycle (sched_dump,
-							sched_verbose,
-							insn,
-							haifa_last_clock,
-							haifa_clock,
-							&sort_p))
-		      {
-			advance_state (curr_state);
-			haifa_clock++;
-                        if (sched_verbose >= 2)
-                          sel_print ("advance_state (dfa_new_cycle)\n");
-		      }
-
-		  if (recog_memoized (insn) >= 0)
-		    {
-		      cost = state_transition (curr_state, insn);
-		      gcc_assert (cost < 0);
-		    }
+      if (!INSN_P (insn))
+	continue;
 
-		  if (targetm.sched.variable_issue)
-		    targetm.sched.variable_issue (sched_dump, sched_verbose,
-						  insn, 0);
+      clock = INSN_SCHED_CYCLE (insn);
+      cost = (last_clock == -1) ? 1 : clock - last_clock;
 
-		  INSN_SCHED_CYCLE (insn) = haifa_clock;
+      gcc_assert (cost >= 0);
 
-		  last_clock = clock;
-		  haifa_last_clock = haifa_clock;
-		}
-	    }
+      if (issue_rate > 1
+	  && GET_CODE (PATTERN (insn)) != USE
+	  && GET_CODE (PATTERN (insn)) != CLOBBER)
+	{
+	  if (reload_completed && cost > 0)
+	    PUT_MODE (insn, TImode);
 
-          if (targetm.sched.md_init)
-            targetm.sched.md_init (sched_dump, sched_verbose, -1);
+	  last_clock = clock;
+	}
 
-          state_reset (curr_state);
-          advance_state (curr_state);
-          last_clock = -1;
+      if (sched_verbose >= 2)
+	sel_print ("Cost for insn %d is %d\n", INSN_UID (insn), cost);
+    }
+}
 
-          for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
-            {
-              int cost;
-  
-              if (!INSN_P (insn))
-		continue;
+/* Perform MD_FINISH on EBBs comprising current region.  */
+static void
+sel_region_target_finish (void)
+{
+  int i;
+  bitmap scheduled_blocks = BITMAP_ALLOC (NULL);
 
-              clock = INSN_SCHED_CYCLE (insn);
-              cost = (last_clock == -1) ? 1 : clock - last_clock;
-  
-	      gcc_assert (cost >= 0);
+  for (i = 0; i < current_nr_blocks; i++)
+    {
+      if (bitmap_bit_p (scheduled_blocks, i))
+	continue;
 
-              if (issue_rate > 1
-                  && GET_CODE (PATTERN (insn)) != USE
-                  && GET_CODE (PATTERN (insn)) != CLOBBER)
-                {
-                  if (reload_completed && cost > 0)
-                    PUT_MODE (insn, TImode);
+      /* While pipelining outer loops, skip bundling for loop
+	 preheaders.  Those will be rescheduled in the outer loop.  */
+      if (sel_is_loop_preheader_p (EBB_FIRST_BB (i)))
+	continue;
 
-                  last_clock = clock;
-                }
+      find_ebb_boundaries (EBB_FIRST_BB (i), scheduled_blocks);
 
-              if (sched_verbose >= 2)
-                sel_print ("Cost for insn %d is %d\n", INSN_UID (insn), cost);
-            }
+      if (no_real_insns_p (current_sched_info->head, current_sched_info->tail))
+	continue;
 
-          if (targetm.sched.md_finish)
-	    {
-	      /* md_finish () can possibly emit new insns.  Move LV_SETs to
-		 ones that happen to be emitted on bb header.  */
-	      insn_init.what = INSN_INIT_WHAT_INSN;
-	      targetm.sched.md_finish (sched_dump, sched_verbose);
-
-	      /* Extend luids so that insns generated by the target will
-		 get zero luid.  */
-	      sched_init_luids (NULL, NULL, NULL, NULL);
-	      insn_init.todo = 0;
-	      sel_init_new_insns ();
-	    }
-        }
+      if (reset_sched_cycles_p)
+	reset_sched_cycles_in_current_ebb ();
+
+      if (targetm.sched.md_init)
+	targetm.sched.md_init (sched_dump, sched_verbose, -1);
+
+      put_TImodes ();
 
-      BITMAP_FREE (scheduled_blocks);
+      if (targetm.sched.md_finish)
+	{
+	  /* md_finish () can possibly emit new insns.  Move LV_SETs to
+	     ones that happen to be emitted on bb header.  */
+	  insn_init.what = INSN_INIT_WHAT_INSN;
+	  targetm.sched.md_finish (sched_dump, sched_verbose);
+
+	  /* Extend luids so that insns generated by the target will
+	     get zero luid.  */
+	  sched_init_luids (NULL, NULL, NULL, NULL);
+	  insn_init.todo = 0;
+	  sel_init_new_insns ();
+	}
     }
 
+  BITMAP_FREE (scheduled_blocks);
+}
+
+/* Free the scheduling data for the current region.  */
+static void
+sel_region_finish (void)
+{
+  simplify_changed_insns ();
+  sel_finish_new_insns ();
+  sched_finish_ready_list ();
+  free_nop_pool ();
+
+  /* Free the vectors.  */
+  if (vec_av_set)
+    VEC_free (expr_t, heap, vec_av_set);
+  BITMAP_FREE (current_copies);
+  BITMAP_FREE (current_originators);
+  BITMAP_FREE (code_motion_visited_blocks);
+  free_blocked_exprs ();
+
+  /* If LV_SET of the region head should be updated, do it now because
+     there will be no other chance.  */
+  {
+    succ_iterator si;
+    insn_t insn;
+
+    FOR_EACH_SUCC_1 (insn, si, bb_note (EBB_FIRST_BB (0)),
+                     SUCCS_NORMAL | SUCCS_SKIP_TO_LOOP_EXITS)
+      {
+	basic_block bb = BLOCK_FOR_INSN (insn);
+
+	if (!BB_LV_SET_VALID_P (bb))
+	  compute_live (insn);
+      }
+  }
+
+  /* Emulate the Haifa scheduler for bundling.  */
+  if (reload_completed)
+    sel_region_target_finish ();
+
   sel_finish_global_and_expr ();
 
   bitmap_clear (forced_ebb_heads);
@@ -6530,7 +6519,6 @@ sel_region_finish (void)
   free_nop_vinsn ();
 
   finish_deps_global ();
-  sched_deps_local_finish ();
   sched_finish_luids ();
 
   sel_finish_bbs ();
@@ -6992,6 +6980,7 @@ sel_global_init (void)
   sched_init_bbs ();
   /* Reset AFTER_RECOVERY if it has been set by the 1st scheduler pass.  */
   after_recovery = 0;
+  can_issue_more = issue_rate;	
 
   sched_extend_target ();
   sched_deps_init (true);
author	Andrey Belevantsev <abel@ispras.ru>	2008-04-14 15:00:52 +0000
committer	Andrey Belevantsev <abel@ispras.ru>	2008-04-14 15:00:52 +0000
commit	78bd88e0b0a631ca40aad8bf3cc3ffe405871d9c (patch)
tree	8dc8fc86239594ade44a1624bf972aab4bb899e9
parent	097f059d98ec9399d26d1a7c8e4659ab2893ba35 (diff)