154 files changed, 8280 insertions, 3003 deletions
diff --git a/gcc/config/alpha/alpha-protos.h b/gcc/config/alpha/alpha-protos.h
index 71079eff7e8..fe4943bfeb8 100644
--- a/gcc/config/alpha/alpha-protos.h
+++ b/gcc/config/alpha/alpha-protos.h
@@ -142,6 +142,8 @@ extern rtx function_arg PARAMS ((CUMULATIVE_ARGS, enum machine_mode,
 extern void alpha_start_function PARAMS ((FILE *, const char *, tree));
 extern void alpha_end_function PARAMS ((FILE *, const char *, tree));
 
+extern int alpha_find_lo_sum_using_gp PARAMS ((rtx));
+
 #ifdef REAL_VALUE_TYPE
 extern int check_float_value PARAMS ((enum machine_mode,
 				      REAL_VALUE_TYPE *, int));
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index d6a9f6bc661..9657e56bf43 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -1,6 +1,6 @@
 /* Subroutines used for code generation on the DEC Alpha.
    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-   2000, 2001, 2002 Free Software Foundation, Inc. 
+   2000, 2001, 2002, 2003 Free Software Foundation, Inc. 
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
 This file is part of GNU CC.
@@ -148,7 +148,7 @@ static rtx alpha_expand_builtin
   PARAMS ((tree, rtx, rtx, enum machine_mode, int));
 static void alpha_sa_mask
   PARAMS ((unsigned long *imaskP, unsigned long *fmaskP));
-static int find_lo_sum
+static int find_lo_sum_using_gp
   PARAMS ((rtx *, void *));
 static int alpha_does_function_need_gp
   PARAMS ((void));
@@ -1920,18 +1920,22 @@ alpha_encode_section_info (decl, first)
     {
       char *newstr;
       size_t len;
+      char want_prefix = (is_local ? '@' : '%');
+      char other_prefix = (is_local ? '%' : '@');
 
-      if (symbol_str[0] == (is_local ? '@' : '%'))
+      if (symbol_str[0] == want_prefix)
 	{
 	  if (symbol_str[1] == encoding)
 	    return;
 	  symbol_str += 2;
 	}
+      else if (symbol_str[0] == other_prefix)
+	symbol_str += 2;
 
       len = strlen (symbol_str) + 1;
       newstr = alloca (len + 2);
 
-      newstr[0] = (is_local ? '@' : '%');
+      newstr[0] = want_prefix;
       newstr[1] = encoding;
       memcpy (newstr + 2, symbol_str, len);
 	  
@@ -3011,7 +3015,7 @@ alpha_expand_mov (mode, operands)
     }
 
   /* Otherwise we've nothing left but to drop the thing to memory.  */
-  operands[1] = force_const_mem (DImode, operands[1]);
+  operands[1] = force_const_mem (mode, operands[1]);
   if (reload_in_progress)
     {
       emit_move_insn (operands[0], XEXP (operands[1], 0));
@@ -5385,7 +5389,7 @@ alpha_gp_save_rtx ()
 {
   rtx r = get_hard_reg_initial_val (DImode, 29);
   if (GET_CODE (r) != MEM)
-    r = gen_mem_addressof (r, NULL_TREE);
+    r = gen_mem_addressof (r, NULL_TREE, /*rescan=*/true);
   return r;
 }
 
@@ -6241,12 +6245,15 @@ alpha_va_start (valist, nextarg)
 
      If no integer registers need be stored, then we must subtract 48
      in order to account for the integer arg registers which are counted
-     in argsize above, but which are not actually stored on the stack.  */
+     in argsize above, but which are not actually stored on the stack.
+     Must further be careful here about structures straddling the last
+     integer argument register; that futzes with pretend_args_size, 
+     which changes the meaning of AP.  */
 
   if (NUM_ARGS <= 6)
     offset = TARGET_ABI_OPEN_VMS ? UNITS_PER_WORD : 6 * UNITS_PER_WORD;
   else
-    offset = -6 * UNITS_PER_WORD;
+    offset = -6 * UNITS_PER_WORD + current_function_pretend_args_size;
 
   if (TARGET_ABI_OPEN_VMS)
     {
@@ -6890,11 +6897,18 @@ const struct attribute_spec vms_attribute_table[] =
 #endif
 
 static int
-find_lo_sum (px, data)
+find_lo_sum_using_gp (px, data)
      rtx *px;
      void *data ATTRIBUTE_UNUSED;
 {
-  return GET_CODE (*px) == LO_SUM;
+  return GET_CODE (*px) == LO_SUM && XEXP (*px, 0) == pic_offset_table_rtx;
+}
+
+int
+alpha_find_lo_sum_using_gp (insn)
+     rtx insn;
+{
+  return for_each_rtx (&PATTERN (insn), find_lo_sum_using_gp, NULL) > 0;
 }
 
 static int
@@ -6923,15 +6937,9 @@ alpha_does_function_need_gp ()
   for (; insn; insn = NEXT_INSN (insn))
     if (INSN_P (insn)
 	&& GET_CODE (PATTERN (insn)) != USE
-	&& GET_CODE (PATTERN (insn)) != CLOBBER)
-      {
-	enum attr_type type = get_attr_type (insn);
-	if (type == TYPE_LDSYM || type == TYPE_JSR)
-	  return 1;
-	if (TARGET_EXPLICIT_RELOCS
-	    && for_each_rtx (&PATTERN (insn), find_lo_sum, NULL) > 0)
-	  return 1;
-      }
+	&& GET_CODE (PATTERN (insn)) != CLOBBER
+	&& get_attr_usegp (insn))
+      return 1;
 
   return 0;
 }
diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h
index 9e39a40395b..b933ea31625 100644
--- a/gcc/config/alpha/alpha.h
+++ b/gcc/config/alpha/alpha.h
@@ -859,8 +859,9 @@ enum reg_class {
 
 /* Return the class of registers that cannot change mode from FROM to TO.  */
 
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO) \
-  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) ? FLOAT_REGS : NO_REGS)
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			\
+   ? reg_classes_intersect_p (FLOAT_REGS, CLASS) : 0)
 
 /* Define the cost of moving between registers of various classes.  Moving
    between FLOAT_REGS and anything else except float regs is expensive. 
diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index c878366b846..a4c0ae6904d 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -1,6 +1,6 @@
 ;; Machine description for DEC Alpha for GNU C compiler
 ;; Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-;; 2000, 2001, 2002 Free Software Foundation, Inc.
+;; 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 ;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 ;;
 ;; This file is part of GNU CC.
@@ -142,6 +142,18 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
 
 (define_attr "length" ""
   (const_int 4))
+
+;; The USEGP attribute marks instructions that have relocations that use
+;; the GP.
+
+(define_attr "usegp" "no,yes"
+  (cond [(eq_attr "type" "ldsym,jsr")
+	   (const_string "yes")
+	 (eq_attr "type" "ild,fld,ist,fst")
+	   (symbol_ref "alpha_find_lo_sum_using_gp(insn)")
+	]
+	(const_string "no")))
+
 
 ;; Include scheduling descriptions.
   
@@ -402,7 +414,8 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
 	(plus:DI (match_operand:DI 1 "register_operand" "r")
 		 (high:DI (match_operand:DI 2 "local_symbolic_operand" ""))))]
   "TARGET_EXPLICIT_RELOCS"
-  "ldah %0,%2(%1)\t\t!gprelhigh")
+  "ldah %0,%2(%1)\t\t!gprelhigh"
+  [(set_attr "usegp" "yes")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
@@ -737,17 +750,31 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
   "mulqv %r1,%2,%0"
   [(set_attr "type" "imul")])
 
-(define_insn "umuldi3_highpart"
+(define_expand "umuldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand" "")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		     (match_operand:DI 1 "register_operand" ""))
+		   (match_operand:DI 2 "reg_or_8bit_operand" ""))
+	  (const_int 64))))]
+  ""
+{
+  if (REG_P (operands[2]))
+    operands[2] = gen_rtx_ZERO_EXTEND (TImode, operands[2]);
+})
+
+(define_insn "*umuldi3_highpart_reg"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(truncate:DI
 	 (lshiftrt:TI
 	  (mult:TI (zero_extend:TI
-		     (match_operand:DI 1 "reg_or_0_operand" "%rJ"))
+		     (match_operand:DI 1 "register_operand" "r"))
 		   (zero_extend:TI
-		     (match_operand:DI 2 "reg_or_8bit_operand" "rI")))
+		     (match_operand:DI 2 "register_operand" "r")))
 	  (const_int 64))))]
   ""
-  "umulh %r1,%2,%0"
+  "umulh %1,%2,%0"
   [(set_attr "type" "imul")
    (set_attr "opsize" "udi")])
 
@@ -2849,7 +2876,7 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
   [(set (match_operand:DF 0 "register_operand" "=f")
 	(sqrt:DF (match_operand:DF 1 "reg_or_0_operand" "fG")))]
   "TARGET_FP && TARGET_FIX"
-  "sqrt%-%/ %1,%0"
+  "sqrt%-%/ %R1,%0"
   [(set_attr "type" "fsqrt")
    (set_attr "trap" "yes")
    (set_attr "round_suffix" "normal")
@@ -5305,7 +5332,8 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
     return "lda %0,%2(%1)\t\t!gprel";
   else
     return "lda %0,%2(%1)\t\t!gprellow";
-})
+}
+  [(set_attr "usegp" "yes")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
@@ -5331,10 +5359,12 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
   [(match_dup 0)]
   "operands[0] = split_small_symbolic_operand (operands[0]);")
 
+;; Accepts any symbolic, not just global, since function calls that
+;; don't go via bsr still use !literal in hopes of linker relaxation.
 (define_insn "movdi_er_high_g"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
-		    (match_operand:DI 2 "global_symbolic_operand" "")
+		    (match_operand:DI 2 "symbolic_operand" "")
 		    (match_operand 3 "const_int_operand" "")]
 		   UNSPEC_LITERAL))]
   "TARGET_EXPLICIT_RELOCS"
@@ -5425,7 +5455,8 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
 		   UNSPEC_DTPREL))]
   "HAVE_AS_TLS"
   "ldq %0,%2(%1)\t\t!gotdtprel"
-  [(set_attr "type" "ild")])
+  [(set_attr "type" "ild")
+   (set_attr "usegp" "yes")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
@@ -5446,7 +5477,8 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
 		   UNSPEC_TPREL))]
   "HAVE_AS_TLS"
   "ldq %0,%2(%1)\t\t!gottprel"
-  [(set_attr "type" "ild")])
+  [(set_attr "type" "ild")
+   (set_attr "usegp" "yes")])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
@@ -5477,7 +5509,8 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
    fmov %R1,%0
    ldt %0,%1
    stt %R1,%0"
-  [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,ild,ist,fcpys,fld,fst")])
+  [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,ild,ist,fcpys,fld,fst")
+   (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*")])
 
 ;; The 'U' constraint matches symbolic operands on Unicos/Mk. Those should
 ;; have been split up by the rules above but we shouldn't reject the
@@ -5524,7 +5557,8 @@ fadd,fmul,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,multi,none"
    stt %R1,%0
    ftoit %1,%0
    itoft %1,%0"
-  [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,ild,ist,fcpys,fld,fst,ftoi,itof")])
+  [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,ild,ist,fcpys,fld,fst,ftoi,itof")
+   (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*,*,*")])
 
 (define_insn "*movdi_fix"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,r,r,m,*f,*f,Q,r,*f")
diff --git a/gcc/config/alpha/freebsd.h b/gcc/config/alpha/freebsd.h
index 0ec96885527..f809c62012e 100644
--- a/gcc/config/alpha/freebsd.h
+++ b/gcc/config/alpha/freebsd.h
@@ -20,11 +20,13 @@ the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 
 
-/* Provide a CPP_SPEC appropriate for FreeBSD/alpha.  Besides the dealing with
+/* Provide a FBSD_TARGET_CPU_CPP_BUILTINS and CPP_SPEC appropriate for
+   FreeBSD/alpha.  Besides the dealing with
    the GCC option `-posix', and PIC issues as on all FreeBSD platforms, we must
    deal with the Alpha's FP issues.  */
 
-#define TARGET_OS_CPP_BUILTINS()		\
+#undef FBSD_TARGET_CPU_CPP_BUILTINS
+#define FBSD_TARGET_CPU_CPP_BUILTINS()		\
   do						\
     {						\
       if (flag_pic)				\
diff --git a/gcc/config/alpha/linux.h b/gcc/config/alpha/linux.h
index 28ff3208422..0c533449e8b 100644
--- a/gcc/config/alpha/linux.h
+++ b/gcc/config/alpha/linux.h
@@ -59,6 +59,8 @@ Boston, MA 02111-1307, USA.  */
 /* Define this so that all GNU/Linux targets handle the same pragmas.  */
 #define HANDLE_PRAGMA_PACK_PUSH_POP
 
+#define TARGET_HAS_F_SETLKW
+
 /* Do code reading to identify a signal frame, and set the frame
    state data appropriately.  See unwind-dw2.c for the structs.  */
 
diff --git a/gcc/config/alpha/osf.h b/gcc/config/alpha/osf.h
index 9fbe2b56a07..2be2a424d35 100644
--- a/gcc/config/alpha/osf.h
+++ b/gcc/config/alpha/osf.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for DEC Alpha on OSF/1.
-   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2001
+   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2001, 2002, 2003
    Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
@@ -32,21 +32,23 @@ Boston, MA 02111-1307, USA.  */
 
 /* Names to predefine in the preprocessor for this target machine.  */
 
-#define TARGET_OS_CPP_BUILTINS()		\
-    do {					\
-	builtin_define_std ("unix");		\
-	builtin_define_std ("SYSTYPE_BSD");	\
-	builtin_define ("_SYSTYPE_BSD");	\
-	builtin_define ("__osf__");		\
-	builtin_define ("_LONGLONG");		\
-	builtin_define ("__EXTERN_PREFIX");	\
-	builtin_assert ("system=unix");		\
-	builtin_assert ("system=xpg4");		\
-	/* Tru64 UNIX V5 has a 16 byte long	\
-	   double type and requires __X_FLOAT	\
-	   to be defined for <math.h>.  */	\
-        if (LONG_DOUBLE_TYPE_SIZE == 128)	\
-          builtin_define ("__X_FLOAT");		\
+#define TARGET_OS_CPP_BUILTINS()			\
+    do {						\
+	builtin_define_std ("unix");			\
+	builtin_define_std ("SYSTYPE_BSD");		\
+	builtin_define ("_SYSTYPE_BSD");		\
+	builtin_define ("__osf__");			\
+	builtin_define ("__digital__");			\
+	builtin_define ("__arch64__");			\
+	builtin_define ("_LONGLONG");			\
+	builtin_define ("__PRAGMA_EXTERN_PREFIX");	\
+	builtin_assert ("system=unix");			\
+	builtin_assert ("system=xpg4");			\
+	/* Tru64 UNIX V5 has a 16 byte long		\
+	   double type and requires __X_FLOAT		\
+	   to be defined for <math.h>.  */		\
+        if (LONG_DOUBLE_TYPE_SIZE == 128)		\
+          builtin_define ("__X_FLOAT");			\
     } while (0)
 
 /* Accept DEC C flags for multithreaded programs.  We use _PTHREAD_USE_D4
diff --git a/gcc/config/alpha/t-crtfm b/gcc/config/alpha/t-crtfm
index 7076b517861..5ca8c3f747d 100644
--- a/gcc/config/alpha/t-crtfm
+++ b/gcc/config/alpha/t-crtfm
@@ -1,4 +1,5 @@
 EXTRA_PARTS += crtfastmath.o
 
 crtfastmath.o: $(srcdir)/config/alpha/crtfastmath.c $(GCC_PASSES)
-	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -c -o crtfastmath.o $(srcdir)/config/alpha/crtfastmath.c
+	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -frandom-seed=gcc-crtfastmath -c \
+		-o crtfastmath.o $(srcdir)/config/alpha/crtfastmath.c
diff --git a/gcc/config/alpha/t-osf4 b/gcc/config/alpha/t-osf4
index e9c451b9c04..0525d617662 100644
--- a/gcc/config/alpha/t-osf4
+++ b/gcc/config/alpha/t-osf4
@@ -17,6 +17,6 @@ SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
 	$(LN_S) $(SHLIB_NAME) $(SHLIB_SONAME)
 # $(slibdir) double quoted to protect it from expansion while building
 # libgcc.mk.  We want this delayed until actual install time.
-SHLIB_INSTALL = $(INSTALL_DATA) $(SHLIB_NAME) $$(slibdir)/$(SHLIB_SONAME); \
-	rm -f $$(slibdir)/$(SHLIB_NAME); \
-	$(LN_S) $(SHLIB_SONAME) $$(slibdir)/$(SHLIB_NAME)
+SHLIB_INSTALL = $(INSTALL_DATA) $(SHLIB_NAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME); \
+	$(LN_S) $(SHLIB_SONAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME)
diff --git a/gcc/config/arc/t-arc b/gcc/config/arc/t-arc
index 688603b5d31..bbf4257c7b9 100644
--- a/gcc/config/arc/t-arc
+++ b/gcc/config/arc/t-arc
@@ -62,10 +62,10 @@ stmp-multilib-arc: stmp-multilib
 install-multilib-arc: install-multilib
 	for i in `$(GCC_FOR_TARGET) --print-multi-lib`; do \
 	  dir=`echo $$i | sed -e 's/;.*$$//'`; \
-	  rm -f $(libsubdir)/$${dir}/crtinit.o; \
-	  $(INSTALL_DATA) $${dir}/crtinit.o $(libsubdir)/$${dir}/crtinit.o; \
-	  chmod a-x $(libsubdir)/$${dir}/crtinit.o; \
-	  rm -f $(libsubdir)/$${dir}/crtfini.o; \
-	  $(INSTALL_DATA) $${dir}/crtfini.o $(libsubdir)/$${dir}/crtfini.o; \
-	  chmod a-x $(libsubdir)/$${dir}/crtfini.o; \
+	  rm -f $(DESTDIR)$(libsubdir)/$${dir}/crtinit.o; \
+	  $(INSTALL_DATA) $${dir}/crtinit.o $(DESTDIR)$(libsubdir)/$${dir}/crtinit.o; \
+	  chmod a-x $(DESTDIR)$(libsubdir)/$${dir}/crtinit.o; \
+	  rm -f $(DESTDIR)$(libsubdir)/$${dir}/crtfini.o; \
+	  $(INSTALL_DATA) $${dir}/crtfini.o $(DESTDIR)$(libsubdir)/$${dir}/crtfini.o; \
+	  chmod a-x $(DESTDIR)$(libsubdir)/$${dir}/crtfini.o; \
 	done
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index a6203c3fbe3..7422f1d03af 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -67,7 +67,6 @@ static void      arm_add_gc_roots 		PARAMS ((void));
 static int       arm_gen_constant		PARAMS ((enum rtx_code, Mmode, Hint, rtx, rtx, int, int));
 static unsigned  bit_count 			PARAMS ((Ulong));
 static int       const_ok_for_op 		PARAMS ((Hint, enum rtx_code));
-static int       eliminate_lr2ip		PARAMS ((rtx *));
 static rtx	 emit_multi_reg_push		PARAMS ((int));
 static rtx	 emit_sfm			PARAMS ((int, int));
 #ifndef AOF_ASSEMBLER
@@ -921,6 +920,10 @@ use_return_insn (iscond)
      consideration.  */
   if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED))
     return 0;
+
+  /* So do interrupt functions that use the frame pointer.  */
+  if (IS_INTERRUPT (func_type) && frame_pointer_needed)
+    return 0;
   
   /* As do variadic functions.  */
   if (current_function_pretend_args_size
@@ -5053,7 +5056,14 @@ arm_reload_in_hi (operands)
 	}
     }
 
-  scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
+  /* Operands[2] may overlap operands[0] (though it won't overlap
+     operands[1]), that's why we asked for a DImode reg -- so we can
+     use the bit that does not overlap.  */
+  if (REGNO (operands[2]) == REGNO (operands[0]))
+    scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
+  else
+    scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
+
   emit_insn (gen_zero_extendqisi2 (scratch,
 				   gen_rtx_MEM (QImode,
 						plus_constant (base,
@@ -6419,57 +6429,27 @@ output_call (operands)
   return "";
 }
 
-static int
-eliminate_lr2ip (x)
-     rtx * x;
-{
-  int something_changed = 0;
-  rtx x0 = * x;
-  int code = GET_CODE (x0);
-  int i, j;
-  const char * fmt;
-  
-  switch (code)
-    {
-    case REG:
-      if (REGNO (x0) == LR_REGNUM)
-        {
-	  *x = gen_rtx_REG (SImode, IP_REGNUM);
-	  return 1;
-        }
-      return 0;
-    default:
-      /* Scan through the sub-elements and change any references there.  */
-      fmt = GET_RTX_FORMAT (code);
-      
-      for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
-	if (fmt[i] == 'e')
-	  something_changed |= eliminate_lr2ip (&XEXP (x0, i));
-	else if (fmt[i] == 'E')
-	  for (j = 0; j < XVECLEN (x0, i); j++)
-	    something_changed |= eliminate_lr2ip (&XVECEXP (x0, i, j));
-      
-      return something_changed;
-    }
-}
-  
 /* Output a 'call' insn that is a reference in memory.  */
 
 const char *
 output_call_mem (operands)
      rtx * operands;
 {
-  operands[0] = copy_rtx (operands[0]); /* Be ultra careful.  */
-  /* Handle calls using lr by using ip (which may be clobbered in subr anyway).  */
-  if (eliminate_lr2ip (&operands[0]))
-    output_asm_insn ("mov%?\t%|ip, %|lr", operands);
-
   if (TARGET_INTERWORK)
     {
       output_asm_insn ("ldr%?\t%|ip, %0", operands);
       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
       output_asm_insn ("bx%?\t%|ip", operands);
     }
+  else if (regno_use_in (LR_REGNUM, operands[0]))
+    {
+      /* LR is used in the memory address.  We load the address in the
+	 first instruction.  It's safe to use IP as the target of the
+	 load since the call will kill it anyway.  */
+      output_asm_insn ("ldr%?\t%|ip, %0", operands);
+      output_asm_insn ("mov%?\t%|lr, %|pc", operands);
+      output_asm_insn ("mov%?\t%|pc, %|ip", operands);
+    }
   else
     {
       output_asm_insn ("mov%?\t%|lr, %|pc", operands);
@@ -6763,7 +6743,7 @@ output_move_double (operands)
                 }
               else
                 {
-		  otherops[1] = adjust_address (operands[1], VOIDmode, 4);
+		  otherops[1] = adjust_address (operands[1], SImode, 4);
 		  /* Take care of overlapping base/data reg.  */
 		  if (reg_mentioned_p (operands[0], operands[1]))
 		    {
@@ -6829,7 +6809,7 @@ output_move_double (operands)
 	  /* Fall through */
 
         default:
-	  otherops[0] = adjust_address (operands[0], VOIDmode, 4);
+	  otherops[0] = adjust_address (operands[0], SImode, 4);
 	  otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
 	  output_asm_insn ("str%?\t%1, %0", operands);
 	  output_asm_insn ("str%?\t%1, %0", otherops);
@@ -7710,7 +7690,7 @@ arm_output_epilogue (really_return)
       if (IS_INTERRUPT (func_type))
 	/* Interrupt handlers will have pushed the
 	   IP onto the stack, so restore it now.  */
-	print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, 1 << IP_REGNUM);
+	print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, 1 << IP_REGNUM);
     }
   else
     {
@@ -7841,7 +7821,7 @@ arm_output_epilogue (really_return)
 
     default:
       if (frame_pointer_needed)
-	/* If we used the frame pointer then the return adddress
+	/* If we used the frame pointer then the return address
 	   will have been loaded off the stack directly into the
 	   PC, so there is no need to issue a MOV instruction
 	   here.  */
@@ -8149,10 +8129,14 @@ arm_compute_initial_elimination_offset (from, to)
 	  reg_mask = reg_mask & ~ (reg_mask & - reg_mask);
 	}
 
-      if (regs_ever_live[LR_REGNUM]
-	  /* If a stack frame is going to be created, the LR will
-	     be saved as part of that, so we do not need to allow
-	     for it here.  */
+      if ((regs_ever_live[LR_REGNUM]
+	   /* If optimizing for size, then we save the link register if
+	      any other integer register is saved.  This gives a smaller
+	      return sequence.  */
+	   || (optimize_size && call_saved_registers > 0))
+	  /* But if a stack frame is going to be created, the LR will
+	     be saved as part of that, so we do not need to allow for
+	     it here.  */
 	  && ! frame_pointer_needed)
 	call_saved_registers += 4;
 
@@ -8456,18 +8440,19 @@ arm_expand_prologue ()
       RTX_FRAME_RELATED_P (insn) = 1;
     }
 
-  /* If this is an interrupt service routine, and the link register is
-     going to be pushed, subtracting four now will mean that the
-     function return can be done with a single instruction.  */
+  /* If this is an interrupt service routine, and the link register
+     is going to be pushed, and we are not creating a stack frame,
+     (which would involve an extra push of IP and a pop in the epilogue)
+     subtracting four from LR now will mean that the function return
+     can be done with a single instruction.  */
   if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
-      && (live_regs_mask & (1 << LR_REGNUM)) != 0)
-    {
-      emit_insn (gen_rtx_SET (SImode, 
-			      gen_rtx_REG (SImode, LR_REGNUM),
-			      gen_rtx_PLUS (SImode,
-				    gen_rtx_REG (SImode, LR_REGNUM),
-				    GEN_INT (-4))));
-    }
+      && (live_regs_mask & (1 << LR_REGNUM)) != 0
+      && ! frame_pointer_needed)
+    emit_insn (gen_rtx_SET (SImode, 
+			    gen_rtx_REG (SImode, LR_REGNUM),
+			    gen_rtx_PLUS (SImode,
+					  gen_rtx_REG (SImode, LR_REGNUM),
+					  GEN_INT (-4))));
 
   if (live_regs_mask)
     {
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 82fafa3f7fa..0e6071295b9 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -59,7 +59,11 @@
    (UNSPEC_PIC_SYM   3) ; A symbol that has been treated properly for pic
 			;   usage, that is, we will add the pic_register
 			;   value to it before trying to dereference it.
-   (UNSPEC_PRLG_STK  4) ; A special barrier that prevents frame accesses 
+   (UNSPEC_PIC_BASE  4)	; Adding the PC value to the offset to the
+			;   GLOBAL_OFFSET_TABLE.  The operation is fully
+			;   described by the RTL but must be wrapped to
+			;   prevent combine from trying to rip it apart.
+   (UNSPEC_PRLG_STK  5) ; A special barrier that prevents frame accesses 
 			;   being scheduled before the stack adjustment insn.
    (UNSPEC_CLZ	     5) ; `clz' instruction, count leading zeros (SImode):
 			;   operand 0 is the result,
@@ -4202,7 +4206,9 @@
 
 (define_insn "pic_add_dot_plus_four"
   [(set (match_operand:SI 0 "register_operand" "+r")
-	(plus:SI (match_dup 0) (const (plus:SI (pc) (const_int 4)))))
+	(unspec:SI [(plus:SI (match_dup 0)
+			     (const (plus:SI (pc) (const_int 4))))]
+		   UNSPEC_PIC_BASE))
    (use (label_ref (match_operand 1 "" "")))]
   "TARGET_THUMB && flag_pic"
   "*
@@ -4215,7 +4221,9 @@
 
 (define_insn "pic_add_dot_plus_eight"
   [(set (match_operand:SI 0 "register_operand" "+r")
-	(plus:SI (match_dup 0) (const (plus:SI (pc) (const_int 8)))))
+	(unspec:SI [(plus:SI (match_dup 0)
+			     (const (plus:SI (pc) (const_int 8))))]
+		   UNSPEC_PIC_BASE))
    (use (label_ref (match_operand 1 "" "")))]
   "TARGET_ARM && flag_pic"
   "*
@@ -8681,8 +8689,14 @@
   "
 )
 
+;; Note - although unspec_volatile's USE all hard registers,
+;; USEs are ignored after relaod has completed.  Thus we need
+;; to add an unspec of the link register to ensure that flow
+;; does not think that it is unused by the sibcall branch that
+;; will replace the standard function epilogue.
 (define_insn "sibcall_epilogue"
-  [(unspec_volatile [(const_int 0)] VUNSPEC_EPILOGUE)]
+  [(parallel [(unspec:SI [(reg:SI LR_REGNUM)] UNSPEC_PROLOGUE_USE)
+              (unspec_volatile [(return)] VUNSPEC_EPILOGUE)])]
   "TARGET_ARM"
   "*
   if (USE_RETURN_INSN (FALSE))
@@ -8691,7 +8705,11 @@
   "
 ;; Length is absolute worst case
   [(set_attr "length" "44")
-   (set_attr "type" "block")]
+   (set_attr "type" "block")
+   ;; We don't clobber the conditions, but the potential length of this
+   ;; operation is sufficient to make conditionalizing the sequence 
+   ;; unlikely to be profitable.
+   (set_attr "conds" "clob")]
 )
 
 (define_insn "*epilogue_insns"
@@ -8705,7 +8723,11 @@
   "
   ; Length is absolute worst case
   [(set_attr "length" "44")
-   (set_attr "type" "block")]
+   (set_attr "type" "block")
+   ;; We don't clobber the conditions, but the potential length of this
+   ;; operation is sufficient to make conditionalizing the sequence 
+   ;; unlikely to be profitable.
+   (set_attr "conds" "clob")]
 )
 
 (define_expand "eh_epilogue"
diff --git a/gcc/config/arm/t-netbsd b/gcc/config/arm/t-netbsd
index 511b0151116..76e431bfb11 100644
--- a/gcc/config/arm/t-netbsd
+++ b/gcc/config/arm/t-netbsd
@@ -16,9 +16,9 @@ SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
 	$(LN_S) $(SHLIB_NAME) $(SHLIB_SONAME)
 # $(slibdir) double quoted to protect it from expansion while building
 # libgcc.mk.  We want this delayed until actual install time.
-SHLIB_INSTALL = $(INSTALL_DATA) $(SHLIB_NAME) $$(slibdir)/$(SHLIB_SONAME); \
-	rm -f $$(slibdir)/$(SHLIB_NAME); \
-	$(LN_S) $(SHLIB_SONAME) $$(slibdir)/$(SHLIB_NAME)
+SHLIB_INSTALL = $(INSTALL_DATA) $(SHLIB_NAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME); \
+	$(LN_S) $(SHLIB_SONAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME)
 
 # Don't build enquire
 ENQUIRE=
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index dd51f0e7240..86ab6c72829 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -1637,8 +1637,8 @@
   [(set (cc0)
         (compare (sign_extend:HI
 		  (match_operand:QI 0 "register_operand"  "d"))
-		 (match_operand:HI 1 "immediate_operand" "M")))]
-  ""
+		 (match_operand:HI 1 "const_int_operand" "n")))]
+  "INTVAL (operands[1]) >= -128 && INTVAL (operands[1]) <= 127"
   "cpi %0,lo8(%1)"
   [(set_attr "cc" "compare")
    (set_attr "length" "1")])
diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr
index 9df673e3f17..91a089912c9 100644
--- a/gcc/config/avr/t-avr
+++ b/gcc/config/avr/t-avr
@@ -47,7 +47,7 @@ MULTILIB_DIRNAMES = avr2 avr3 avr4 avr5
 # The many avr2 matches are not listed here - this is the default.
 MULTILIB_MATCHES = \
 	mmcu?avr3=mmcu?atmega103 mmcu?avr3=mmcu?atmega603 \
-	mmcu?avr3=mmcu?at43usb320 mmcu?avr3=at43usb355 \
+	mmcu?avr3=mmcu?at43usb320 mmcu?avr3=mmcu?at43usb355 \
 	mmcu?avr3=mmcu?at76c711 \
 	mmcu?avr4=mmcu?atmega8515 mmcu?avr4=mmcu?atmega8535 \
 	mmcu?avr4=mmcu?atmega8 \
diff --git a/gcc/config/c4x/c4x.c b/gcc/config/c4x/c4x.c
index 354ac76d1fa..b3d53d5e223 100644
--- a/gcc/config/c4x/c4x.c
+++ b/gcc/config/c4x/c4x.c
@@ -5017,7 +5017,7 @@ c4x_expand_builtin (exp, target, subtarget, mode, ignore)
 	break;
       arg0 = TREE_VALUE (arglist);
       if (TREE_CODE (arg0) == VAR_DECL || TREE_CODE (arg0) == PARM_DECL)
-	put_var_into_stack (arg0);
+	put_var_into_stack (arg0, /*rescan=*/true);
       r0 = expand_expr (arg0, NULL_RTX, QFmode, 0);
       r0 = protect_from_queue (r0, 0);
       if (register_operand (r0, QFmode))
diff --git a/gcc/config/c4x/c4x.h b/gcc/config/c4x/c4x.h
index 94cca09b597..fa2f3d7c8c5 100644
--- a/gcc/config/c4x/c4x.h
+++ b/gcc/config/c4x/c4x.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.  TMS320C[34]x
-   Copyright (C) 1994, 1995, 1996, 1997, 1998,
-   1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
+   2003 Free Software Foundation, Inc.
 
    Contributed by Michael Hayes (m.hayes@elec.canterbury.ac.nz)
               and Herman Ten Brugge (Haj.Ten.Brugge@net.HCC.nl).
@@ -22,10 +22,10 @@
    the Free Software Foundation, 59 Temple Place - Suite 330,
    Boston, MA 02111-1307, USA.  */
 
-#include "hwint.h"
-
 /* RUN-TIME TARGET SPECIFICATION.  */
 
+#include "hwint.h"
+
 #define C4x   1
 
 #define TARGET_CPU_CPP_BUILTINS()		\
@@ -93,23 +93,21 @@
 
 #define ASM_SPEC "\
 %{!mcpu=30:%{!mcpu=31:%{!mcpu=32:%{!mcpu=33:%{!mcpu=40:%{!mcpu=44:\
-%{!m30:%{!m40:-m40}}}}}}}} \
-%{mcpu=30:-m30} \
-%{mcpu=31:-m31} \
-%{mcpu=32:-m32} \
-%{mcpu=33:-m33} \
-%{mcpu=40:-m40} \
-%{mcpu=44:-m44} \
-%{m30:-m30} \
-%{m31:-m31} \
-%{m32:-m32} \
-%{m33:-m33} \
-%{m40:-m40} \
-%{m44:-m44} \
-%{mmemparm:-p} %{mregparm:-r} \
-%{!mmemparm:%{!mregparm:-r}} \
-%{mbig:-b} %{msmall:-s} \
-%{!msmall:%{!mbig:-b}}"
+%{!m30:%{!m31:%{!m32:%{!m33:%{!m40:%{!m44:-m40}}}}}}}}}}}} \
+%{mcpu=30} \
+%{mcpu=31} \
+%{mcpu=32} \
+%{mcpu=33} \
+%{mcpu=40} \
+%{mcpu=44} \
+%{m30} \
+%{m31} \
+%{m32} \
+%{m33} \
+%{m40} \
+%{m44} \
+%{mmemparm} %{mregparm} %{!mmemparm:%{!mregparm:-mregparm}} \
+%{mbig} %{msmall} %{!msmall:%{!mbig:-mbig}}"
 
 /* Define linker options.  */
 
@@ -153,7 +151,7 @@
 #define C30_FLAG            0x0100000 /* Emit C30 code.  */
 #define C31_FLAG            0x0200000 /* Emit C31 code.  */
 #define C32_FLAG            0x0400000 /* Emit C32 code.  */
-#define C33_FLAG            0x0400000 /* Emit C33 code.  */
+#define C33_FLAG            0x0800000 /* Emit C33 code.  */
 #define C40_FLAG            0x1000000 /* Emit C40 code.  */
 #define C44_FLAG            0x2000000 /* Emit C44 code.  */
 
@@ -239,7 +237,7 @@
   { "no-force", -FORCE_FLAG,						\
     N_("Allow RTL generation to emit invalid 3 operand insns") },	\
   { "loop-unsigned", LOOP_UNSIGNED_FLAG,				\
-    N_("Allow unsigned interation counts for RPTB/DB") },		\
+    N_("Allow unsigned iteration counts for RPTB/DB") },		\
   { "no-loop-unsigned", -LOOP_UNSIGNED_FLAG,				\
     N_("Disallow unsigned iteration counts for RPTB/DB") },		\
   { "preserve-float", PRESERVE_FLOAT_FLAG,				\
@@ -1675,6 +1673,7 @@ fini_section ()							\
     if (TARGET_C30) dspversion = 30;				\
     if (TARGET_C31) dspversion = 31;				\
     if (TARGET_C32) dspversion = 32;				\
+    if (TARGET_C33) dspversion = 33;                            \
     if (TARGET_C40) dspversion = 40;				\
     if (TARGET_C44) dspversion = 44;				\
     fprintf (FILE, "\t.version\t%d\n", dspversion);		\
diff --git a/gcc/config/c4x/t-c4x b/gcc/config/c4x/t-c4x
index d1a83fb4231..45bcdeb6aee 100644
--- a/gcc/config/c4x/t-c4x
+++ b/gcc/config/c4x/t-c4x
@@ -13,7 +13,7 @@ c4x-c.o: $(srcdir)/config/c4x/c4x-c.c $(CONFIG_H) $(SYSTEM_H) cpplib.h \
 
 MULTILIB_OPTIONS = m30 msmall mmemparm
 MULTILIB_DIRNAMES = c3x small mem
-MULTILIB_MATCHES = m30=mcpu?30 m30=mcpu?31 m30=mcpu?32 m30=m31 m30=m32
+MULTILIB_MATCHES = m30=mcpu?30 m30=mcpu?31 m30=mcpu?32 m30=mcpu?33 m30=m31 m30=m32 m30=m33
 MULTILIB_EXCEPTIONS =
 MULTILIB_EXTRA_OPTS =
 LIBGCC = stmp-multilib
diff --git a/gcc/config/cris/aout.h b/gcc/config/cris/aout.h
index dabee83a73b..277fd363e1b 100644
--- a/gcc/config/cris/aout.h
+++ b/gcc/config/cris/aout.h
@@ -48,6 +48,9 @@ Boston, MA 02111-1307, USA.  */
     %{!sim:%{pg:gcrt0.o%s}\
      %{!pg:%{p:mcrt0.o%s}%{!p:crt0.o%s}}}}}"
 
+/* Override cris.h define.  */
+#undef ENDFILE_SPEC
+
 /* Which library to get.  The only difference from the default is to get
    libsc.a if -sim is given to the driver.  Repeat -lc -lsysX
    {X=sim,linux}, because libsysX needs (at least) errno from libc, and
@@ -64,8 +67,7 @@ Boston, MA 02111-1307, USA.  */
 
 #undef CRIS_CPP_SUBTARGET_SPEC
 #define CRIS_CPP_SUBTARGET_SPEC \
- "-D__AOUT__\
-  %{melinux:-D__gnu_linux__ -D__linux__ -D__unix__ -D__elinux__ -D__uclinux__\
+ "%{melinux:-D__gnu_linux__ -D__linux__ -D__unix__ -D__elinux__ -D__uclinux__\
     %{!nostdinc:\
       %{!mbest-lib-options:%{isystem*}}\
       -isystem elinux/include%s\
@@ -118,6 +120,19 @@ Boston, MA 02111-1307, USA.  */
 #undef CRIS_SUBTARGET_DEFAULT
 #define CRIS_SUBTARGET_DEFAULT 0
 
+
+/* Node: Run-time Target */
+
+/* For the cris-*-aout subtarget.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__AOUT__");		\
+    }						\
+  while (0)
+
+
 /* Node: Storage Layout */
 
 /* We can align to 16 bits (only) with CRIS a.out.  */
@@ -343,6 +358,13 @@ Boston, MA 02111-1307, USA.  */
     }								\
   while (0)
 
+/* The configure machinery invokes the assembler without options, which is
+   not how gcc invokes it.  Without options, the multi-target assembler
+   will probably be found, which is ELF by default.  To counter that, we
+   need to override ELF auto-host.h config stuff which we know collides
+   with a.out.  */
+#undef HAVE_GAS_HIDDEN
+
 
 /* Node: Alignment Output */
 
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
index 2eb864f065c..e63ad200352 100644
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -328,6 +328,22 @@ cris_operand_extend_operator (x, mode)
 	  && (code == PLUS || code == MINUS || code == UMIN));
 }
 
+/* Check if MODE is same as mode for X, and X is PLUS or MINUS.  */
+
+int
+cris_additive_operand_extend_operator (x, mode)
+     rtx x;
+     enum machine_mode mode;
+{
+  enum rtx_code code = GET_CODE (x);
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (x);
+
+  return (GET_MODE (x) == mode
+	  && (code == PLUS || code == MINUS));
+}
+
 /* Check to see if MODE is same as mode for X, and X is SIGN_EXTEND or
    ZERO_EXTEND.  */
 
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 4aa88466c80..0a23a48f960 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -1,5 +1,5 @@
 /* Definitions for GCC.  Part of the machine description for CRIS.
-   Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Axis Communications.  Written by Hans-Peter Nilsson.
 
 This file is part of GCC.
@@ -115,18 +115,8 @@ extern const char *cris_elinux_stacksize_str;
 /* Also provide canonical vN definitions when user specifies an alias.
    Note that -melf overrides -maout.  */
 
-/* The `-$' is here mostly due to the integrated preprocessor not
-   handling the builtin expansion of "#define __REGISTER_PREFIX__ $"
-   gracefully.  This is slightly redundant although not incorrect.
-   We're quite alone defining REGISTER_PREFIX as "$" so it's unlikely
-   someone will fight for us.  This year in the mountains.
-   Note that for -melinux and -mlinux, command-line -isystem options are
-   emitted both before and after the synthesized one.  We can't remove all
-   of them: a %{<isystem} will only remove the first one and %{<isystem*}
-   will not do TRT.  Those extra occurrences are harmless anyway.  */
 #define CPP_SPEC \
- "-$ -D__CRIS_ABI_version=2\
-  %{mtune=*:-D__tune_%* %{mtune=v*:-D__CRIS_arch_tune=%*}}\
+ "%{mtune=*:-D__tune_%* %{mtune=v*:-D__CRIS_arch_tune=%*}}\
    %{mtune=etrax4:-D__tune_v3 -D__CRIS_arch_tune=3}\
    %{mtune=etrax100:-D__tune_v8 -D__CRIS_arch_tune=8}\
    %{mtune=svinto:-D__tune_v8 -D__CRIS_arch_tune=8}\
@@ -150,8 +140,7 @@ extern const char *cris_elinux_stacksize_str;
 
 /* For the cris-*-elf subtarget.  */
 #define CRIS_CPP_SUBTARGET_SPEC \
- "-D__ELF__\
-  %{mbest-lib-options:\
+ "%{mbest-lib-options:\
    %{!moverride-best-lib-options:\
     %{!march=*:%{!metrax*:%{!mcpu=*:-D__tune_v10 -D__CRIS_arch_tune=10}}}}}"
 
@@ -255,8 +244,23 @@ extern const char *cris_elinux_stacksize_str;
 
 /* Node: Run-time Target */
 
-/* Only keep the non-varying ones here.  */
-#define CPP_PREDEFINES	"-Dcris -DCRIS -DGNU_CRIS"
+#define TARGET_CPU_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define_std ("cris");		\
+      builtin_define_std ("CRIS");		\
+      builtin_define_std ("GNU_CRIS");		\
+      builtin_define ("__CRIS_ABI_version=2");	\
+    }						\
+  while (0)
+
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      builtin_define ("__ELF__");		\
+    }						\
+  while (0)
+
 
 /* This needs to be at least 32 bits.  */
 extern int target_flags;
@@ -956,9 +960,8 @@ enum reg_class {NO_REGS, ALL_REGS, LIM_REG_CLASSES};
 struct cum_args {int regs;};
 
 /* The regs member is an integer, the number of arguments got into
-   registers so far, and lib is nonzero if init_cumulative_args was
-   found to generate a call to a library function.  */
-#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT)	  \
+   registers so far.  */
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT)	\
  ((CUM).regs = 0)
 
 #define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED)		\
@@ -1246,8 +1249,7 @@ struct cum_args {int regs;};
    FIXME: Check and adjust for gcc-2.9x.  */
 #define LEGITIMIZE_ADDRESS(X, OLDX, MODE, WIN) {}
 
-/* Functionality import from EGCS.
-   Kludge to solve Axis-990219: Work around imperfection in
+/* Kludge to solve Axis-990219: Work around imperfection in
    reload_load_address1:
     (plus (sign_extend (mem:qi (reg))) (reg))
    should be reloaded as (plus (reg) (reg)), not
@@ -1255,9 +1257,8 @@ struct cum_args {int regs;};
    There are no checks that reload_load_address_1 "reloads"
    addresses correctly, so invalidness is not caught or
    corrected.
-    When the right thing happens, the "something_reloaded" kludge can
-   be removed.  The right thing does not appear to happen for
-   EGCS CVS as of this date (above).  */
+    When the right thing happens in reload, the kludge can
+   be removed; still not as of 2003-02-27.  */
 
 #define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_LEVELS, WIN) \
   do									\
@@ -1733,6 +1734,8 @@ call_ ## FUNC (void)						\
   {PLUS, IOR, AND, UMIN}},				\
  {"cris_operand_extend_operator",			\
   {PLUS, MINUS, UMIN}},					\
+ {"cris_additive_operand_extend_operator",		\
+  {PLUS, MINUS}},					\
  {"cris_extend_operator",				\
   {ZERO_EXTEND, SIGN_EXTEND}},				\
  {"cris_plus_or_bound_operator",			\
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index 1683192189c..1b2d504cc22 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for CRIS cpu cores.
-;; Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 ;; Contributed by Axis Communications.
 
 ;; This file is part of GCC.
@@ -34,7 +34,7 @@
 ;; gcc-2.7.2 (and problably not on gcc-2.8.1), relating to that when a
 ;; constant is substituted into an operand, the actual mode must be
 ;; deduced from the pattern.  There is reasonable hope that that has been
-;; fixed in egcs post 1.1.1, so FIXME: try again.
+;; fixed, so FIXME: try again.
 
 ;; You will notice that three-operand alternatives ("=r", "r", "!To")
 ;; are marked with a "!" constraint modifier to avoid being reloaded
@@ -496,7 +496,7 @@
   [(set (match_operand:QI 0 "register_operand" "=r,r,r")
 	(mem:QI
 	 (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r,r,r")
-		  (match_operand:SI 2 "cris_bdap_operand" "r>Ri,r,>Ri"))))
+		  (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn"))))
    (set (match_operand:SI 3 "register_operand" "=*1,r,r")
 	(plus:SI (match_dup 1)
 		 (match_dup 2)))]
@@ -519,7 +519,7 @@
   [(set (match_operand:HI 0 "register_operand" "=r,r,r")
 	(mem:HI
 	 (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r,r,r")
-		  (match_operand:SI 2 "cris_bdap_operand" "r>Ri,r,>Ri"))))
+		  (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn"))))
    (set (match_operand:SI 3 "register_operand" "=*1,r,r")
 	(plus:SI (match_dup 1)
 		 (match_dup 2)))]
@@ -542,7 +542,7 @@
   [(set (match_operand:SI 0 "register_operand" "=r,r,r")
 	(mem:SI
 	 (plus:SI (match_operand:SI 1 "cris_bdap_operand" "%r,r,r")
-		  (match_operand:SI 2 "cris_bdap_operand" "r>Ri,r,>Ri"))))
+		  (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn"))))
    (set (match_operand:SI 3 "register_operand" "=*1,r,r")
 	(plus:SI (match_dup 1)
 		 (match_dup 2)))]
@@ -671,7 +671,7 @@
 (define_insn "*mov_sideqi_mem"
   [(set (mem:QI
 	 (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,r")
-		  (match_operand:SI 1 "cris_bdap_operand" "r>Ri,r>Ri,r,>Ri")))
+		  (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r>Rn,r,>Rn")))
 	(match_operand:QI 2 "register_operand" "r,r,r,r"))
    (set (match_operand:SI 3 "register_operand" "=*0,!2,r,r")
 	(plus:SI (match_dup 0)
@@ -696,7 +696,7 @@
 (define_insn "*mov_sidehi_mem"
   [(set (mem:HI
 	 (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,r")
-		  (match_operand:SI 1 "cris_bdap_operand" "r>Ri,r>Ri,r,>Ri")))
+		  (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r>Rn,r,>Rn")))
 	(match_operand:HI 2 "register_operand" "r,r,r,r"))
    (set (match_operand:SI 3 "register_operand" "=*0,!2,r,r")
 	(plus:SI (match_dup 0)
@@ -721,7 +721,7 @@
 (define_insn "*mov_sidesi_mem"
   [(set (mem:SI
 	 (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r,r")
-		  (match_operand:SI 1 "cris_bdap_operand" "r>Ri,r>Ri,r,>Ri")))
+		  (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r>Rn,r,>Rn")))
 	(match_operand:SI 2 "register_operand" "r,r,r,r"))
    (set (match_operand:SI 3 "register_operand" "=*0,!2,r,r")
 	(plus:SI (match_dup 0)
@@ -786,7 +786,7 @@
 (define_insn "*clear_sidesi"
   [(set (mem:SI
 	 (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r")
-		  (match_operand:SI 1 "cris_bdap_operand" "r>Ri,r,>Ri")))
+		  (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn")))
 	(const_int 0))
    (set (match_operand:SI 2 "register_operand" "=*0,r,r")
 	(plus:SI (match_dup 0)
@@ -826,7 +826,7 @@
 (define_insn "*clear_sidehi"
   [(set (mem:HI
 	 (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r")
-		  (match_operand:SI 1 "cris_bdap_operand" "r>Ri,r,>Ri")))
+		  (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn")))
 	(const_int 0))
    (set (match_operand:SI 2 "register_operand" "=*0,r,r")
 	(plus:SI (match_dup 0)
@@ -866,7 +866,7 @@
 (define_insn "*clear_sideqi"
   [(set (mem:QI
 	 (plus:SI (match_operand:SI 0 "cris_bdap_operand" "%r,r,r")
-		  (match_operand:SI 1 "cris_bdap_operand" "r>Ri,r,>Ri")))
+		  (match_operand:SI 1 "cris_bdap_operand" "r>Rn,r,>Rn")))
 	(const_int 0))
    (set (match_operand:SI 2 "register_operand" "=*0,r,r")
 	(plus:SI (match_dup 0)
@@ -1127,7 +1127,7 @@
 	 4 "cris_extend_operator"
 	 [(mem:QI (plus:SI
 		   (match_operand:SI 1 "cris_bdap_operand" "%r,r,r")
-		   (match_operand:SI 2 "cris_bdap_operand" "r>Ri,r,>Ri")))]))
+		   (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn")))]))
    (set (match_operand:SI 3 "register_operand" "=*1,r,r")
 	(plus:SI (match_dup 1)
 		 (match_dup 2)))]
@@ -1152,7 +1152,7 @@
 	 4 "cris_extend_operator"
 	 [(mem:QI (plus:SI
 		   (match_operand:SI 1 "cris_bdap_operand" "%r,r,r")
-		   (match_operand:SI 2 "cris_bdap_operand" "r>Ri,r,>Ri")))]))
+		   (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn")))]))
    (set (match_operand:SI 3 "register_operand" "=*1,r,r")
 	(plus:SI (match_dup 1)
 		 (match_dup 2)))]
@@ -1177,7 +1177,7 @@
 	 4 "cris_extend_operator"
 	 [(mem:HI (plus:SI
 		   (match_operand:SI 1 "cris_bdap_operand" "%r,r,r")
-		   (match_operand:SI 2 "cris_bdap_operand" "r>Ri,r,>Ri")))]))
+		   (match_operand:SI 2 "cris_bdap_operand" "r>Rn,r,>Rn")))]))
    (set (match_operand:SI 3 "register_operand" "=*1,r,r")
 	(plus:SI (match_dup 1)
 		 (match_dup 2)))]
@@ -1473,7 +1473,7 @@
 	 [(match_operand:QI 1 "register_operand" "0,0,0")
 	  (mem:QI (plus:SI
 		   (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		   (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")))]))
+		   (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")))]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
 		 (match_dup 3)))]
@@ -1499,7 +1499,7 @@
 	 [(match_operand:HI 1 "register_operand" "0,0,0")
 	  (mem:HI (plus:SI
 		   (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		   (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")))]))
+		   (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")))]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
 		 (match_dup 3)))]
@@ -1525,7 +1525,7 @@
 	 [(match_operand:SI 1 "register_operand" "0,0,0")
 	  (mem:SI (plus:SI
 		   (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		   (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")))]))
+		   (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")))]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
 		 (match_dup 3)))]
@@ -1624,7 +1624,7 @@
 	 5 "cris_commutative_orth_op"
 	 [(mem:QI
 	   (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		    (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")))
+		    (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")))
 	  (match_operand:QI 1 "register_operand" "0,0,0")]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
@@ -1650,7 +1650,7 @@
 	 5 "cris_commutative_orth_op"
 	 [(mem:HI
 	   (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		    (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")))
+		    (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")))
 	  (match_operand:HI 1 "register_operand" "0,0,0")]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
@@ -1676,7 +1676,7 @@
 	 5 "cris_commutative_orth_op"
 	 [(mem:SI
 	   (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		    (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")))
+		    (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")))
 	  (match_operand:SI 1 "register_operand" "0,0,0")]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
@@ -1886,7 +1886,7 @@
 (define_insn "*extopqihi_side_biap"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
 	(match_operator:HI
-	 6 "cris_operand_extend_operator"
+	 6 "cris_additive_operand_extend_operator"
 	 [(match_operand:HI 1 "register_operand" "0,0")
 	  (match_operator:HI
 	   7 "cris_extend_operator"
@@ -1898,8 +1898,7 @@
 	(plus:SI (mult:SI (match_dup 2)
 			  (match_dup 3))
 		 (match_dup 4)))]
-  "(GET_CODE (operands[5]) != UMIN || GET_CODE (operands[7]) == ZERO_EXTEND)
-   && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
   "@
    #
    %x6%e7.%m7 [%5=%4+%2%T3],%0")
@@ -1921,7 +1920,7 @@
 	(plus:SI (mult:SI (match_dup 2)
 			  (match_dup 3))
 		 (match_dup 4)))]
-  "(GET_CODE (operands[5]) != UMIN || GET_CODE (operands[7]) == ZERO_EXTEND)
+  "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[7]) == ZERO_EXTEND)
    && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
   "@
    #
@@ -1944,7 +1943,7 @@
 	(plus:SI (mult:SI (match_dup 2)
 			  (match_dup 3))
 		 (match_dup 4)))]
-  "(GET_CODE (operands[5]) != UMIN || GET_CODE (operands[7]) == ZERO_EXTEND)
+  "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[7]) == ZERO_EXTEND)
    && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
   "@
    #
@@ -1959,19 +1958,18 @@
 (define_insn "*extopqihi_side"
   [(set (match_operand:HI 0 "register_operand" "=r,r,r")
 	(match_operator:HI
-	 5 "cris_operand_extend_operator"
+	 5 "cris_additive_operand_extend_operator"
 	 [(match_operand:HI 1 "register_operand" "0,0,0")
 	  (match_operator:HI
 	   6 "cris_extend_operator"
 	   [(mem:QI
 	     (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		      (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")
+		      (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")
 		      ))])]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
 		 (match_dup 3)))]
-  "(GET_CODE (operands[5]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND)
-   && cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+  "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
   "*
 {
   if (which_alternative == 0
@@ -1995,7 +1993,7 @@
 	   6 "cris_extend_operator"
 	   [(mem:QI
 	     (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		      (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")
+		      (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")
 		      ))])]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
@@ -2026,7 +2024,7 @@
 	   6 "cris_extend_operator"
 	   [(mem:HI
 	     (plus:SI (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		      (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")
+		      (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")
 		      ))])]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
@@ -2053,28 +2051,25 @@
 
 ;; QImode to HImode
 ;; FIXME: GCC should widen.
-;; FIXME: These could have anonymous mode for operand 0.
 
 (define_insn "*extopqihi_swap_side_biap"
   [(set (match_operand:HI 0 "register_operand" "=r,r")
-	(match_operator:HI
-	 7 "cris_plus_or_bound_operator"
-	 [(match_operator:HI
-	   6 "cris_extend_operator"
-	   [(mem:QI (plus:SI
-		     (mult:SI (match_operand:SI 2 "register_operand" "r,r")
-			      (match_operand:SI 3 "const_int_operand" "n,n"))
-		     (match_operand:SI 4 "register_operand" "r,r")))])
-	  (match_operand:HI 1 "register_operand" "0,0")]))
+	(plus:HI
+	 (match_operator:HI
+	  6 "cris_extend_operator"
+	  [(mem:QI (plus:SI
+		    (mult:SI (match_operand:SI 2 "register_operand" "r,r")
+			     (match_operand:SI 3 "const_int_operand" "n,n"))
+		    (match_operand:SI 4 "register_operand" "r,r")))])
+	 (match_operand:HI 1 "register_operand" "0,0")))
    (set (match_operand:SI 5 "register_operand" "=*4,r")
 	(plus:SI (mult:SI (match_dup 2)
 			  (match_dup 3))
 		 (match_dup 4)))]
-  "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND)
-   && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
+  "cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
   "@
    #
-   %x7%e6.%m6 [%5=%4+%2%T3],%0")
+   add%e6.b [%5=%4+%2%T3],%0")
 
 ;; QImode to SImode
 
@@ -2093,7 +2088,7 @@
 	(plus:SI (mult:SI (match_dup 2)
 			  (match_dup 3))
 		 (match_dup 4)))]
-  "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND)
+  "(GET_CODE (operands[7]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND)
    && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
   "@
    #
@@ -2115,7 +2110,7 @@
 	(plus:SI (mult:SI (match_dup 2)
 			  (match_dup 3))
 		 (match_dup 4)))]
-  "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND)
+  "(GET_CODE (operands[7]) != UMIN || GET_CODE (operands[6]) == ZERO_EXTEND)
    && cris_side_effect_mode_ok (MULT, operands, 5, 4, 2, 3, 0)"
   "@
    #
@@ -2129,19 +2124,17 @@
 
 (define_insn "*extopqihi_swap_side"
   [(set (match_operand:HI 0 "register_operand" "=r,r,r")
-	(match_operator:HI
-	 6 "cris_plus_or_bound_operator"
-	 [(match_operator:HI
-	   5 "cris_extend_operator"
-	   [(mem:QI (plus:SI
-		     (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		     (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")))])
-	  (match_operand:HI 1 "register_operand" "0,0,0")]))
+	(plus:HI
+	 (match_operator:HI
+	  5 "cris_extend_operator"
+	  [(mem:QI (plus:SI
+		    (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
+		    (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")))])
+	 (match_operand:HI 1 "register_operand" "0,0,0")))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
 		 (match_dup 3)))]
-  "(GET_CODE (operands[6]) != UMIN || GET_CODE (operands[5]) == ZERO_EXTEND)
-   && cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
+  "cris_side_effect_mode_ok (PLUS, operands, 4, 2, 3, -1, 0)"
   "*
 {
   if (which_alternative == 0
@@ -2151,7 +2144,7 @@
 	  || CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'N')
 	  || CONST_OK_FOR_LETTER_P (INTVAL (operands[3]), 'J')))
     return \"#\";
-  return \"%x6%e5.%m5 [%4=%2%S3],%0\";
+  return \"add%e5.b [%4=%2%S3],%0\";
 }")
 
 ;; QImode to SImode
@@ -2164,7 +2157,7 @@
 	   5 "cris_extend_operator"
 	   [(mem:QI (plus:SI
 		     (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		     (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")))])
+		     (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")))])
 	  (match_operand:SI 1 "register_operand" "0,0,0")]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
@@ -2193,7 +2186,7 @@
 	   5 "cris_extend_operator"
 	   [(mem:HI (plus:SI
 		     (match_operand:SI 2 "cris_bdap_operand" "%r,r,r")
-		     (match_operand:SI 3 "cris_bdap_operand" "r>Ri,r,>Ri")))])
+		     (match_operand:SI 3 "cris_bdap_operand" "r>Rn,r,>Rn")))])
 	  (match_operand:SI 1 "register_operand" "0,0,0")]))
    (set (match_operand:SI 4 "register_operand" "=*2,r,r")
 	(plus:SI (match_dup 2)
@@ -2221,13 +2214,12 @@
 (define_insn "*extopqihi"
   [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
 	(match_operator:HI
-	 3 "cris_operand_extend_operator"
+	 3 "cris_additive_operand_extend_operator"
 	 [(match_operand:HI 1 "register_operand" "0,0,0,r")
 	  (match_operator:HI
 	   4 "cris_extend_operator"
 	   [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")])]))]
-  "(GET_CODE (operands[3]) != UMIN || GET_CODE (operands[4]) == ZERO_EXTEND)
-   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+  "GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
    && (operands[1] != frame_pointer_rtx || GET_CODE (operands[3]) != PLUS)"
   "@
    %x3%e4.%m4 %2,%0
@@ -2285,20 +2277,17 @@
 
 (define_insn "*extopqihi_swap"
   [(set (match_operand:HI 0 "register_operand" "=r,r,r,r")
-	(match_operator:HI
-	 4 "cris_plus_or_bound_operator"
-	 [(match_operator:HI
-	   3 "cris_extend_operator"
-	   [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")])
-	  (match_operand:HI 1 "register_operand" "0,0,0,r")]))]
-  "(GET_CODE (operands[3]) != UMIN || GET_CODE (operands[4]) == ZERO_EXTEND)
-   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
-   && operands[1] != frame_pointer_rtx"
+	(plus:HI
+	 (match_operator:HI
+	  3 "cris_extend_operator"
+	  [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")])
+	 (match_operand:HI 1 "register_operand" "0,0,0,r")))]
+  "operands[1] != frame_pointer_rtx"
   "@
-   %x4%e3.%m3 %2,%0
-   %x4%e3.%m3 %2,%0
-   %x4%e3.%m3 %2,%0
-   %x4%e3.%m3 %2,%1,%0"
+   add%e3.b %2,%0
+   add%e3.b %2,%0
+   add%e3.b %2,%0
+   add%e3.b %2,%1,%0"
   [(set_attr "slottable" "yes,yes,no,no")
    (set_attr "cc" "clobber")])
 
@@ -2312,8 +2301,7 @@
 	   3 "cris_extend_operator"
 	   [(match_operand:QI 2 "nonimmediate_operand" "r,Q>,m,!To")])
 	  (match_operand:SI 1 "register_operand" "0,0,0,r")]))]
-  "(GET_CODE (operands[3]) != UMIN || GET_CODE (operands[4]) == ZERO_EXTEND)
-   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+  "(GET_CODE (operands[4]) != UMIN || GET_CODE (operands[3]) == ZERO_EXTEND)
    && operands[1] != frame_pointer_rtx"
   "@
    %x4%e3.%m3 %2,%0
@@ -2332,8 +2320,7 @@
 	   3 "cris_extend_operator"
 	   [(match_operand:HI 2 "nonimmediate_operand" "r,Q>,m,!To")])
 	  (match_operand:SI 1 "register_operand" "0,0,0,r")]))]
-  "(GET_CODE (operands[3]) != UMIN || GET_CODE (operands[4]) == ZERO_EXTEND)
-   && GET_MODE_SIZE (GET_MODE (operands[0])) <= UNITS_PER_WORD
+  "(GET_CODE (operands[4]) != UMIN || GET_CODE (operands[3]) == ZERO_EXTEND)
    && operands[1] != frame_pointer_rtx"
   "@
    %x4%e3.%m3 %2,%0
@@ -4854,10 +4841,10 @@
 
 (define_peephole
   [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
-	(match_operand:SI 1 "cris_bdap_biap_operand" "r,>Ri,r,>Ri"))
+	(match_operand:SI 1 "cris_bdap_biap_operand" "r,>Rn,r,>Rn"))
    (set (match_dup 0)
-	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "0,0,r>Ri,r")
-		 (match_operand:SI 3 "cris_bdap_biap_operand" "r>Ri,r,0,0")))
+	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "0,0,r>Rn,r")
+		 (match_operand:SI 3 "cris_bdap_biap_operand" "r>Rn,r,0,0")))
    (set (match_operand 4 "register_operand" "=r,r,r,r")
 	(mem (match_dup 0)))]
   "(rtx_equal_p (operands[2], operands[0])
@@ -4882,10 +4869,10 @@
 
 (define_peephole
   [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
-	(match_operand:SI 1 "cris_bdap_biap_operand" "r,>Ri,r,>Ri"))
+	(match_operand:SI 1 "cris_bdap_biap_operand" "r,>Rn,r,>Rn"))
    (set (match_dup 0)
-	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "0,0,r>Ri,r")
-		 (match_operand:SI 3 "cris_bdap_biap_operand" "r>Ri,r,0,0")))
+	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "0,0,r>Rn,r")
+		 (match_operand:SI 3 "cris_bdap_biap_operand" "r>Rn,r,0,0")))
    (set (mem (match_dup 0))
 	(match_operand 4 "register_operand" "=r,r,r,r"))]
   "(rtx_equal_p (operands[2], operands[0])
@@ -4912,10 +4899,10 @@
 
 (define_peephole
   [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
-	(match_operand:SI 1 "cris_bdap_biap_operand" "r,>Ri,r,>Ri"))
+	(match_operand:SI 1 "cris_bdap_biap_operand" "r,>Rn,r,>Rn"))
    (set (match_dup 0)
-	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "0,0,r>Ri,r")
-		 (match_operand:SI 3 "cris_bdap_biap_operand" "r>Ri,r,0,0")))
+	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "0,0,r>Rn,r")
+		 (match_operand:SI 3 "cris_bdap_biap_operand" "r>Rn,r,0,0")))
    (set (match_operand 4 "register_operand" "=r,r,r,r")
 	(match_operator 5 "cris_orthogonal_operator"
 			[(match_dup 3)
@@ -4942,10 +4929,10 @@
 
 (define_peephole
   [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
-	(match_operand:SI 1 "cris_bdap_biap_operand" "r,>Ri,r,>Ri"))
+	(match_operand:SI 1 "cris_bdap_biap_operand" "r,>Rn,r,>Rn"))
    (set (match_dup 0)
-	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "0,0,r>Ri,r")
-		 (match_operand:SI 3 "cris_bdap_biap_operand" "r>Ri,r,0,0")))
+	(plus:SI (match_operand:SI 2 "cris_bdap_biap_operand" "0,0,r>Rn,r")
+		 (match_operand:SI 3 "cris_bdap_biap_operand" "r>Rn,r,0,0")))
    (set (match_operand 4 "register_operand" "=r,r,r,r")
 	(match_operator 5 "cris_commutative_orth_op"
 			[(mem (match_dup 0))
@@ -5014,8 +5001,8 @@
 (define_peephole
   [(set (match_operand 0 "register_operand" "=r,r,r,r")
 	(mem (plus:SI
-	      (match_operand:SI 1 "cris_bdap_biap_operand" "r,r>Ri,r,r>Ri")
-	      (match_operand:SI 2 "cris_bdap_biap_operand" "r>Ri,r,r>Ri,r"))))
+	      (match_operand:SI 1 "cris_bdap_biap_operand" "r,r>Rn,r,r>Rn")
+	      (match_operand:SI 2 "cris_bdap_biap_operand" "r>Rn,r,r>Rn,r"))))
    (set (match_dup 0)
 	(match_operator 5 "cris_commutative_orth_op"
 			[(match_operand 3 "register_operand" "0,0,r,r")
diff --git a/gcc/config/cris/linux.h b/gcc/config/cris/linux.h
index 546f38101b9..a3014416e16 100644
--- a/gcc/config/cris/linux.h
+++ b/gcc/config/cris/linux.h
@@ -1,5 +1,5 @@
 /* Definitions for GCC.  Part of the machine description for CRIS.
-   Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Axis Communications.  Written by Hans-Peter Nilsson.
 
 This file is part of GCC.
@@ -47,10 +47,7 @@ Boston, MA 02111-1307, USA.  */
 
 #undef CRIS_CPP_SUBTARGET_SPEC
 #define CRIS_CPP_SUBTARGET_SPEC \
-  "-D__gnu_linux__ -D__linux__ -D__unix__  -D__ELF__\
-   %{pthread:-D_REENTRANT}\
-   %{fPIC|fpic: -D__PIC__ -D__pic__}\
-   %{!fleading-underscore:-fno-leading-underscore -D__NO_UNDERSCORES__}\
+  "%{pthread:-D_REENTRANT}\
    %{!march=*:%{!cpu=*:-D__arch_v10 -D__CRIS_arch_version=10}}\
    %{!ansi:%{!std=*:%{!undef:-Dlinux -Dunix}\
      -Asystem(unix) -Asystem(posix) -Acpu(cris) -Amachine(cris)}}"
@@ -99,6 +96,29 @@ Boston, MA 02111-1307, USA.  */
   %{!r:%{O2|O3: --gc-sections}}"
 
 
+/* Node: Run-time Target */
+
+/* For the cris-*-linux* subtarget.  */
+#undef TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS()		\
+  do						\
+    {						\
+      extern int flag_leading_underscore;	\
+      builtin_define ("__gnu_linux__");		\
+      builtin_define ("__linux__");		\
+      builtin_define ("__unix__");		\
+      builtin_define ("__ELF__");		\
+      if (flag_pic)				\
+	{					\
+	  builtin_define ("__PIC__");		\
+	  builtin_define ("__pic__");		\
+	}					\
+      if (flag_leading_underscore <= 0)		\
+	builtin_define ("__NO_UNDERSCORES__");	\
+    }						\
+  while (0)
+     
+
 /* Node: Sections */
 
 /* GNU/Linux has crti and crtn and does not need the
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 7997a46e7a5..90d959c0f7e 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -100,7 +100,6 @@ Boston, MA 02111-1307, USA.  */
    name, that also takes an argument, needs to be modified so the
    prefix is different, otherwise a '*' after the shorter option will
    match with the longer one.  */
-/* Ignore -dynamic for now */
 #define TARGET_OPTION_TRANSLATE_TABLE \
   { "-all_load", "-Zall_load" },  \
   { "-allowable_client", "-Zallowable_client" },  \
@@ -111,7 +110,7 @@ Boston, MA 02111-1307, USA.  */
   { "-weak_reference_mismatches", "-Zweak_reference_mismatches" },  \
   { "-dependency-file", "-MF" }, \
   { "-dylib_file", "-Zdylib_file" }, \
-  { "-dynamic", " " },  \
+  { "-dynamic", "-Zdynamic" },  \
   { "-dynamiclib", "-Zdynamiclib" },  \
   { "-exported_symbols_list", "-Zexported_symbols_list" },  \
   { "-seg_addr_table_filename", "-Zseg_addr_table_filename" }, \
@@ -174,7 +173,7 @@ Boston, MA 02111-1307, USA.  */
 /* Machine dependent cpp options.  */
 
 #undef	CPP_SPEC
-#define CPP_SPEC "%{static:-D__STATIC__}%{!static:-D__DYNAMIC__}"
+#define CPP_SPEC "%{static:%{!dynamic:-D__STATIC__}}%{!static:-D__DYNAMIC__}"
 
 /* This is mostly a clone of the standard LINK_COMMAND_SPEC, plus
    precomp, libtool, and fat build additions.  Also we
@@ -234,6 +233,7 @@ Boston, MA 02111-1307, USA.  */
    %{Zbind_at_load:-bind_at_load} \
    %{Zarch_errors_fatal:-arch_errors_fatal} \
    %{Zdylib_file*:-dylib_file %*} \
+   %{Zdynamic:-dynamic}\
    %{Zexported_symbols_list*:-exported_symbols_list %*} \
    %{Zflat_namespace:-flat_namespace} \
    %{headerpad_max_install_names*} \
diff --git a/gcc/config/fp-bit.c b/gcc/config/fp-bit.c
index 7ec20ecf9f2..e609760c59c 100644
--- a/gcc/config/fp-bit.c
+++ b/gcc/config/fp-bit.c
@@ -1,6 +1,6 @@
 /* This is a software floating point library which can be used
    for targets without hardware floating point. 
-   Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002
+   Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003
    Free Software Foundation, Inc.
 
 This file is free software; you can redistribute it and/or modify it
@@ -130,6 +130,10 @@ void __lttf2 (void) { abort(); }
 const fp_number_type __thenan_sf = { CLASS_SNAN, 0, 0, {(fractype) 0} };
 #elif defined L_thenan_df
 const fp_number_type __thenan_df = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined L_thenan_tf
+const fp_number_type __thenan_tf = { CLASS_SNAN, 0, 0, {(fractype) 0} };
+#elif defined TFLOAT
+extern const fp_number_type __thenan_tf;
 #elif defined FLOAT
 extern const fp_number_type __thenan_sf;
 #else
@@ -141,7 +145,9 @@ static fp_number_type *
 nan (void)
 {
   /* Discard the const qualifier...  */
-#ifdef FLOAT  
+#ifdef TFLOAT
+  return (fp_number_type *) (& __thenan_tf);
+#elif defined FLOAT  
   return (fp_number_type *) (& __thenan_sf);
 #else
   return (fp_number_type *) (& __thenan_df);
@@ -180,7 +186,7 @@ flip_sign ( fp_number_type *  x)
 
 extern FLO_type pack_d ( fp_number_type * );
 
-#if defined(L_pack_df) || defined(L_pack_sf)
+#if defined(L_pack_df) || defined(L_pack_sf) || defined(L_pack_tf)
 FLO_type
 pack_d ( fp_number_type *  src)
 {
@@ -316,24 +322,92 @@ pack_d ( fp_number_type *  src)
   dst.bits.exp = exp;
   dst.bits.sign = sign;
 #else
+# if defined TFLOAT && defined HALFFRACBITS
+ {
+   halffractype high, low;
+
+   high = (fraction >> (FRACBITS - HALFFRACBITS));
+   high &= (((fractype)1) << HALFFRACBITS) - 1;
+   high |= ((fractype) (exp & ((1 << EXPBITS) - 1))) << HALFFRACBITS;
+   high |= ((fractype) (sign & 1)) << (HALFFRACBITS | EXPBITS);
+
+   low = (halffractype)fraction &
+     ((((halffractype)1) << (FRACBITS - HALFFRACBITS)) - 1);
+
+   if (exp == EXPMAX || exp == 0 || low == 0)
+     low = 0;
+   else
+     {
+       exp -= HALFFRACBITS + 1;
+
+       while (exp > 0
+	      && low < ((halffractype)1 << HALFFRACBITS))
+	 {
+	   low <<= 1;
+	   exp--;
+	 }
+
+       if (exp <= 0)
+	 {
+	   halffractype roundmsb, round;
+
+	   exp = -exp + 1;
+
+	   roundmsb = (1 << (exp - 1));
+	   round = low & ((roundmsb << 1) - 1);
+
+	   low >>= exp;
+	   exp = 0;
+
+	   if (round > roundmsb || (round == roundmsb && (low & 1)))
+	     {
+	       low++;
+	       if (low >= ((halffractype)1 << HALFFRACBITS))
+		 /* We don't shift left, since it has just become the
+		    smallest normal number, whose implicit 1 bit is
+		    now indicated by the non-zero exponent.  */
+		 exp++;
+	     }
+	 }
+
+       low &= ((halffractype)1 << HALFFRACBITS) - 1;
+       low |= ((fractype) (exp & ((1 << EXPBITS) - 1))) << HALFFRACBITS;
+       low |= ((fractype) (sign & 1)) << (HALFFRACBITS | EXPBITS);
+     }
+
+   dst.value_raw = (((fractype) high) << HALFSHIFT) | low;
+ }
+# else
   dst.value_raw = fraction & ((((fractype)1) << FRACBITS) - (fractype)1);
   dst.value_raw |= ((fractype) (exp & ((1 << EXPBITS) - 1))) << FRACBITS;
   dst.value_raw |= ((fractype) (sign & 1)) << (FRACBITS | EXPBITS);
+# endif
 #endif
 
 #if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT)
+#ifdef TFLOAT
+  {
+    qrtrfractype tmp1 = dst.words[0];
+    qrtrfractype tmp2 = dst.words[1];
+    dst.words[0] = dst.words[3];
+    dst.words[1] = dst.words[2];
+    dst.words[2] = tmp2;
+    dst.words[3] = tmp1;
+  }
+#else
   {
     halffractype tmp = dst.words[0];
     dst.words[0] = dst.words[1];
     dst.words[1] = tmp;
   }
 #endif
+#endif
 
   return dst.value;
 }
 #endif
 
-#if defined(L_unpack_df) || defined(L_unpack_sf)
+#if defined(L_unpack_df) || defined(L_unpack_sf) || defined(L_unpack_tf)
 void
 unpack_d (FLO_union_type * src, fp_number_type * dst)
 {
@@ -347,8 +421,15 @@ unpack_d (FLO_union_type * src, fp_number_type * dst)
 #if defined(FLOAT_WORD_ORDER_MISMATCH) && !defined(FLOAT)
   FLO_union_type swapped;
 
+#ifdef TFLOAT
+  swapped.words[0] = src->words[3];
+  swapped.words[1] = src->words[2];
+  swapped.words[2] = src->words[1];
+  swapped.words[3] = src->words[0];
+#else
   swapped.words[0] = src->words[1];
   swapped.words[1] = src->words[0];
+#endif
   src = &swapped;
 #endif
   
@@ -357,9 +438,46 @@ unpack_d (FLO_union_type * src, fp_number_type * dst)
   exp = src->bits.exp;
   sign = src->bits.sign;
 #else
-  fraction = src->value_raw & ((((fractype)1) << FRACBITS) - (fractype)1);
+# if defined TFLOAT && defined HALFFRACBITS
+ {
+   halffractype high, low;
+   
+   high = src->value_raw >> HALFSHIFT;
+   low = src->value_raw & (((fractype)1 << HALFSHIFT) - 1);
+
+   fraction = high & ((((fractype)1) << HALFFRACBITS) - 1);
+   fraction <<= FRACBITS - HALFFRACBITS;
+   exp = ((int)(high >> HALFFRACBITS)) & ((1 << EXPBITS) - 1);
+   sign = ((int)(high >> (((HALFFRACBITS + EXPBITS))))) & 1;
+
+   if (exp != EXPMAX && exp != 0 && low != 0)
+     {
+       int lowexp = ((int)(low >> HALFFRACBITS)) & ((1 << EXPBITS) - 1);
+       int lowsign = ((int)(low >> (((HALFFRACBITS + EXPBITS))))) & 1;
+       int shift;
+       fractype xlow;
+
+       xlow = low & ((((fractype)1) << HALFFRACBITS) - 1);
+       if (lowexp)
+	 xlow |= (((halffractype)1) << HALFFRACBITS);
+       else
+	 lowexp = 1;
+       shift = (FRACBITS - HALFFRACBITS) - (exp - lowexp);
+       if (shift > 0)
+	 xlow <<= shift;
+       else if (shift < 0)
+	 xlow >>= -shift;
+       if (sign == lowsign)
+	 fraction += xlow;
+       else
+	 fraction -= xlow;
+     }
+ }
+# else
+  fraction = src->value_raw & ((((fractype)1) << FRACBITS) - 1);
   exp = ((int)(src->value_raw >> FRACBITS)) & ((1 << EXPBITS) - 1);
   sign = ((int)(src->value_raw >> (FRACBITS + EXPBITS))) & 1;
+# endif
 #endif
 
   dst->sign = sign;
@@ -427,7 +545,7 @@ unpack_d (FLO_union_type * src, fp_number_type * dst)
 }
 #endif /* L_unpack_df || L_unpack_sf */
 
-#if defined(L_addsub_sf) || defined(L_addsub_df)
+#if defined(L_addsub_sf) || defined(L_addsub_df) || defined(L_addsub_tf)
 static fp_number_type *
 _fpadd_parts (fp_number_type * a,
 	      fp_number_type * b,
@@ -611,7 +729,7 @@ sub (FLO_type arg_a, FLO_type arg_b)
 }
 #endif /* L_addsub_sf || L_addsub_df */
 
-#if defined(L_mul_sf) || defined(L_mul_df)
+#if defined(L_mul_sf) || defined(L_mul_df) || defined(L_mul_tf)
 static inline __attribute__ ((__always_inline__)) fp_number_type *
 _fpmul_parts ( fp_number_type *  a,
 	       fp_number_type *  b,
@@ -660,7 +778,7 @@ _fpmul_parts ( fp_number_type *  a,
   /* Calculate the mantissa by multiplying both numbers to get a
      twice-as-wide number.  */
   {
-#if defined(NO_DI_MODE)
+#if defined(NO_DI_MODE) || defined(TFLOAT)
     {
       fractype x = a->fraction.ll;
       fractype ylow = b->fraction.ll;
@@ -723,13 +841,9 @@ _fpmul_parts ( fp_number_type *  a,
 #endif
   }
 
-  tmp->normal_exp = a->normal_exp + b->normal_exp;
+  tmp->normal_exp = a->normal_exp + b->normal_exp
+    + FRAC_NBITS - (FRACBITS + NGARDS);
   tmp->sign = a->sign != b->sign;
-#ifdef FLOAT
-  tmp->normal_exp += 2;		/* ??????????????? */
-#else
-  tmp->normal_exp += 4;		/* ??????????????? */
-#endif
   while (high >= IMPLICIT_2)
     {
       tmp->normal_exp++;
@@ -803,7 +917,7 @@ multiply (FLO_type arg_a, FLO_type arg_b)
 }
 #endif /* L_mul_sf || L_mul_df */
 
-#if defined(L_div_sf) || defined(L_div_df)
+#if defined(L_div_sf) || defined(L_div_df) || defined(L_div_tf)
 static inline __attribute__ ((__always_inline__)) fp_number_type *
 _fpdiv_parts (fp_number_type * a,
 	      fp_number_type * b)
@@ -913,7 +1027,8 @@ divide (FLO_type arg_a, FLO_type arg_b)
 }
 #endif /* L_div_sf || L_div_df */
 
-#if defined(L_fpcmp_parts_sf) || defined(L_fpcmp_parts_df)
+#if defined(L_fpcmp_parts_sf) || defined(L_fpcmp_parts_df) \
+    || defined(L_fpcmp_parts_tf)
 /* according to the demo, fpcmp returns a comparison with 0... thus
    a<b -> -1
    a==b -> 0
@@ -998,7 +1113,7 @@ __fpcmp_parts (fp_number_type * a, fp_number_type * b)
 }
 #endif
 
-#if defined(L_compare_sf) || defined(L_compare_df)
+#if defined(L_compare_sf) || defined(L_compare_df) || defined(L_compoare_tf)
 CMPtype
 compare (FLO_type arg_a, FLO_type arg_b)
 {
@@ -1020,7 +1135,7 @@ compare (FLO_type arg_a, FLO_type arg_b)
 
 /* These should be optimized for their specific tasks someday.  */
 
-#if defined(L_eq_sf) || defined(L_eq_df)
+#if defined(L_eq_sf) || defined(L_eq_df) || defined(L_eq_tf)
 CMPtype
 _eq_f2 (FLO_type arg_a, FLO_type arg_b)
 {
@@ -1041,7 +1156,7 @@ _eq_f2 (FLO_type arg_a, FLO_type arg_b)
 }
 #endif /* L_eq_sf || L_eq_df */
 
-#if defined(L_ne_sf) || defined(L_ne_df)
+#if defined(L_ne_sf) || defined(L_ne_df) || defined(L_ne_tf)
 CMPtype
 _ne_f2 (FLO_type arg_a, FLO_type arg_b)
 {
@@ -1062,7 +1177,7 @@ _ne_f2 (FLO_type arg_a, FLO_type arg_b)
 }
 #endif /* L_ne_sf || L_ne_df */
 
-#if defined(L_gt_sf) || defined(L_gt_df)
+#if defined(L_gt_sf) || defined(L_gt_df) || defined(L_gt_tf)
 CMPtype
 _gt_f2 (FLO_type arg_a, FLO_type arg_b)
 {
@@ -1083,7 +1198,7 @@ _gt_f2 (FLO_type arg_a, FLO_type arg_b)
 }
 #endif /* L_gt_sf || L_gt_df */
 
-#if defined(L_ge_sf) || defined(L_ge_df)
+#if defined(L_ge_sf) || defined(L_ge_df) || defined(L_ge_tf)
 CMPtype
 _ge_f2 (FLO_type arg_a, FLO_type arg_b)
 {
@@ -1103,7 +1218,7 @@ _ge_f2 (FLO_type arg_a, FLO_type arg_b)
 }
 #endif /* L_ge_sf || L_ge_df */
 
-#if defined(L_lt_sf) || defined(L_lt_df)
+#if defined(L_lt_sf) || defined(L_lt_df) || defined(L_lt_tf)
 CMPtype
 _lt_f2 (FLO_type arg_a, FLO_type arg_b)
 {
@@ -1124,7 +1239,7 @@ _lt_f2 (FLO_type arg_a, FLO_type arg_b)
 }
 #endif /* L_lt_sf || L_lt_df */
 
-#if defined(L_le_sf) || defined(L_le_df)
+#if defined(L_le_sf) || defined(L_le_df) || defined(L_le_tf)
 CMPtype
 _le_f2 (FLO_type arg_a, FLO_type arg_b)
 {
@@ -1147,7 +1262,7 @@ _le_f2 (FLO_type arg_a, FLO_type arg_b)
 
 #endif /* ! US_SOFTWARE_GOFAST */
 
-#if defined(L_unord_sf) || defined(L_unord_df)
+#if defined(L_unord_sf) || defined(L_unord_df) || defined(L_unord_tf)
 CMPtype
 _unord_f2 (FLO_type arg_a, FLO_type arg_b)
 {
@@ -1165,7 +1280,7 @@ _unord_f2 (FLO_type arg_a, FLO_type arg_b)
 }
 #endif /* L_unord_sf || L_unord_df */
 
-#if defined(L_si_to_sf) || defined(L_si_to_df)
+#if defined(L_si_to_sf) || defined(L_si_to_df) || defined(L_si_to_tf)
 FLO_type
 si_to_float (SItype arg_a)
 {
@@ -1193,7 +1308,7 @@ si_to_float (SItype arg_a)
       else
 	in.fraction.ll = arg_a;
 
-      while (in.fraction.ll < (1LL << (FRACBITS + NGARDS)))
+      while (in.fraction.ll < ((fractype)1 << (FRACBITS + NGARDS)))
 	{
 	  in.fraction.ll <<= 1;
 	  in.normal_exp -= 1;
@@ -1203,7 +1318,7 @@ si_to_float (SItype arg_a)
 }
 #endif /* L_si_to_sf || L_si_to_df */
 
-#if defined(L_usi_to_sf) || defined(L_usi_to_df)
+#if defined(L_usi_to_sf) || defined(L_usi_to_df) || defined(L_usi_to_tf)
 FLO_type
 usi_to_float (USItype arg_a)
 {
@@ -1220,12 +1335,12 @@ usi_to_float (USItype arg_a)
       in.normal_exp = FRACBITS + NGARDS;
       in.fraction.ll = arg_a;
 
-      while (in.fraction.ll > (1LL << (FRACBITS + NGARDS)))
+      while (in.fraction.ll > ((fractype)1 << (FRACBITS + NGARDS)))
         {
           in.fraction.ll >>= 1;
           in.normal_exp += 1;
         }
-      while (in.fraction.ll < (1LL << (FRACBITS + NGARDS)))
+      while (in.fraction.ll < ((fractype)1 << (FRACBITS + NGARDS)))
 	{
 	  in.fraction.ll <<= 1;
 	  in.normal_exp -= 1;
@@ -1235,7 +1350,7 @@ usi_to_float (USItype arg_a)
 }
 #endif
 
-#if defined(L_sf_to_si) || defined(L_df_to_si)
+#if defined(L_sf_to_si) || defined(L_df_to_si) || defined(L_tf_to_si)
 SItype
 float_to_si (FLO_type arg_a)
 {
@@ -1263,8 +1378,8 @@ float_to_si (FLO_type arg_a)
 }
 #endif /* L_sf_to_si || L_df_to_si */
 
-#if defined(L_sf_to_usi) || defined(L_df_to_usi)
-#ifdef US_SOFTWARE_GOFAST
+#if defined(L_sf_to_usi) || defined(L_df_to_usi) || defined(L_tf_to_usi)
+#if defined US_SOFTWARE_GOFAST || defined(L_tf_to_usi)
 /* While libgcc2.c defines its own __fixunssfsi and __fixunsdfsi routines,
    we also define them for GOFAST because the ones in libgcc2.c have the
    wrong names and I'd rather define these here and keep GOFAST CYG-LOC's
@@ -1303,7 +1418,7 @@ float_to_usi (FLO_type arg_a)
 #endif /* US_SOFTWARE_GOFAST */
 #endif /* L_sf_to_usi || L_df_to_usi */
 
-#if defined(L_negate_sf) || defined(L_negate_df)
+#if defined(L_negate_sf) || defined(L_negate_df) || defined(L_negate_tf)
 FLO_type
 negate (FLO_type arg_a)
 {
@@ -1359,6 +1474,21 @@ sf_to_df (SFtype arg_a)
 }
 #endif /* L_sf_to_df */
 
+#if defined(L_sf_to_tf) && defined(TMODES)
+TFtype
+sf_to_tf (SFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_tp (in.class, in.sign, in.normal_exp,
+		    ((UTItype) in.fraction.ll) << F_T_BITOFF);
+}
+#endif /* L_sf_to_df */
+
 #endif /* ! FLOAT_ONLY */
 #endif /* FLOAT */
 
@@ -1402,5 +1532,84 @@ df_to_sf (DFtype arg_a)
 }
 #endif /* L_df_to_sf */
 
+#if defined(L_df_to_tf) && defined(TMODES) \
+    && !defined(FLOAT) && !defined(TFLOAT)
+TFtype
+df_to_tf (DFtype arg_a)
+{
+  fp_number_type in;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  return __make_tp (in.class, in.sign, in.normal_exp,
+		    ((UTItype) in.fraction.ll) << D_T_BITOFF);
+}
+#endif /* L_sf_to_df */
+
+#ifdef TFLOAT
+#if defined(L_make_tf)
+TFtype
+__make_tp(fp_class_type class,
+	     unsigned int sign,
+	     int exp, 
+	     UTItype frac)
+{
+  fp_number_type in;
+
+  in.class = class;
+  in.sign = sign;
+  in.normal_exp = exp;
+  in.fraction.ll = frac;
+  return pack_d (&in);
+}
+#endif /* L_make_tf */
+
+#if defined(L_tf_to_df)
+DFtype
+tf_to_df (TFtype arg_a)
+{
+  fp_number_type in;
+  UDItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> D_T_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((UTItype) 1 << D_T_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_dp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_tf_to_df */
+
+#if defined(L_tf_to_sf)
+SFtype
+tf_to_sf (TFtype arg_a)
+{
+  fp_number_type in;
+  USItype sffrac;
+  FLO_union_type au;
+
+  au.value = arg_a;
+  unpack_d (&au, &in);
+
+  sffrac = in.fraction.ll >> F_T_BITOFF;
+
+  /* We set the lowest guard bit in SFFRAC if we discarded any non
+     zero bits.  */
+  if ((in.fraction.ll & (((UTItype) 1 << F_T_BITOFF) - 1)) != 0)
+    sffrac |= 1;
+
+  return __make_fp (in.class, in.sign, in.normal_exp, sffrac);
+}
+#endif /* L_tf_to_sf */
+#endif /* TFLOAT */
+
 #endif /* ! FLOAT */
 #endif /* !EXTENDED_FLOAT_STUBS */
diff --git a/gcc/config/fp-bit.h b/gcc/config/fp-bit.h
index 03e2ff60993..0e8509eedf9 100644
--- a/gcc/config/fp-bit.h
+++ b/gcc/config/fp-bit.h
@@ -1,5 +1,5 @@
 /* Header file for fp-bit.c.  */
-/* Copyright (C) 2000
+/* Copyright (C) 2000, 2002, 2003
    Free Software Foundation, Inc.
 
 This file is part of GNU CC.
@@ -87,12 +87,22 @@ Boston, MA 02111-1307, USA.  */
 #endif
 #endif /* ! FINE_GRAINED_LIBRARIES */
 
+#if __LDBL_MANT_DIG__ == 113 || __LDBL_MANT_DIG__ == 106
+# define TMODES
+#endif
+
 typedef float SFtype __attribute__ ((mode (SF)));
 typedef float DFtype __attribute__ ((mode (DF)));
+#ifdef TMODES
+typedef float TFtype __attribute__ ((mode (TF)));
+#endif
 
 typedef int HItype __attribute__ ((mode (HI)));
 typedef int SItype __attribute__ ((mode (SI)));
 typedef int DItype __attribute__ ((mode (DI)));
+#ifdef TMODES
+typedef int TItype __attribute__ ((mode (TI)));
+#endif
 
 /* The type of the result of a fp compare */
 #ifndef CMPtype
@@ -102,16 +112,68 @@ typedef int DItype __attribute__ ((mode (DI)));
 typedef unsigned int UHItype __attribute__ ((mode (HI)));
 typedef unsigned int USItype __attribute__ ((mode (SI)));
 typedef unsigned int UDItype __attribute__ ((mode (DI)));
+#ifdef TMODES
+typedef unsigned int UTItype __attribute__ ((mode (TI)));
+#endif
 
 #define MAX_USI_INT  (~(USItype)0)
 #define MAX_SI_INT   ((SItype) (MAX_USI_INT >> 1))
 #define BITS_PER_SI  (4 * BITS_PER_UNIT)
+#ifdef TMODES
+#define MAX_UDI_INT  (~(UDItype)0)
+#define MAX_DI_INT   ((DItype) (MAX_UDI_INT >> 1))
+#define BITS_PER_DI  (8 * BITS_PER_UNIT)
+#endif
 
 #ifdef FLOAT_ONLY
 #define NO_DI_MODE
 #endif
 
-#ifdef FLOAT
+#ifdef TFLOAT
+# ifndef TMODES
+#  error "TFLOAT requires long double to have 113 bits of mantissa"
+# endif
+
+#	define PREFIXFPDP tp
+#	define PREFIXSFDF tf
+#	define NGARDS 10L /* Is this right? */
+#	define GARDROUND 0x1ff
+#	define GARDMASK  0x3ff
+#	define GARDMSB   0x200
+#	define FRAC_NBITS 128
+
+# if __LDBL_MANT_DIG__ == 113 /* IEEE quad */
+#	define EXPBITS 15
+#	define EXPBIAS 16383
+#	define EXPMAX (0x7fff)
+#	define QUIET_NAN ((TItype)0x8 << 108)
+#	define FRACHIGH  ((TItype)0x8 << 124)
+#	define FRACHIGH2 ((TItype)0xc << 124)
+#	define FRACBITS 112
+# endif
+
+# if __LDBL_MANT_DIG__ == 106 /* IBM extended (double+double) */
+#	define EXPBITS 11
+#	define EXPBIAS 1023
+#	define EXPMAX (0x7ff)
+#	define QUIET_NAN ((TItype)0x8 << (48 + 64))
+#	define FRACHIGH  ((TItype)0x8 << 124)
+#	define FRACHIGH2 ((TItype)0xc << 124)
+#	define FRACBITS 105
+#	define HALFFRACBITS 52
+#	define HALFSHIFT 64
+# endif
+
+#	define pack_d __pack_t
+#	define unpack_d __unpack_t
+#	define __fpcmp_parts __fpcmp_parts_t
+	typedef UTItype fractype;
+	typedef UDItype halffractype;
+	typedef USItype qrtrfractype;
+#define qrtrfractype qrtrfractype
+	typedef TFtype FLO_type;
+	typedef TItype intfrac;
+#elif defined FLOAT
 #	define NGARDS    7L
 #	define GARDROUND 0x3f
 #	define GARDMASK  0x7f
@@ -157,7 +219,9 @@ typedef unsigned int UDItype __attribute__ ((mode (DI)));
 #endif /* FLOAT */
 
 #ifdef US_SOFTWARE_GOFAST
-#	ifdef FLOAT
+#	ifdef TFLOAT
+#		error "GOFAST TFmode not supported"
+#	elif defined FLOAT
 #		define add 		fpadd
 #		define sub 		fpsub
 #		define multiply 	fpmul
@@ -170,8 +234,8 @@ typedef unsigned int UDItype __attribute__ ((mode (DI)));
 #		define float_to_usi 	fptoui
 #		define negate 		__negsf2
 #		define sf_to_df		fptodp
-#		define dptofp 		dptofp
-#else
+#		define sf_to_tf		__extendsftf2
+#	else
 #		define add 		dpadd
 #		define sub 		dpsub
 #		define multiply 	dpmul
@@ -184,9 +248,30 @@ typedef unsigned int UDItype __attribute__ ((mode (DI)));
 #		define float_to_usi 	dptoul
 #		define negate 		__negdf2
 #		define df_to_sf 	dptofp
+#		define df_to_tf 	__extenddftf2
 #	endif /* FLOAT */
 #else
-#	ifdef FLOAT
+#	ifdef TFLOAT
+#		define add 		__addtf3
+#		define sub 		__subtf3
+#		define multiply 	__multf3
+#		define divide 		__divtf3
+#		define compare 		__cmptf2
+#		define _eq_f2 		__eqtf2
+#		define _ne_f2 		__netf2
+#		define _gt_f2 		__gttf2
+#		define _ge_f2 		__getf2
+#		define _lt_f2 		__lttf2
+#		define _le_f2 		__letf2
+#		define _unord_f2	__unordtf2
+#		define usi_to_float 	__floatunsitf
+#		define si_to_float 	__floatsitf
+#		define float_to_si 	__fixtfsi
+#		define float_to_usi 	__fixunstfsi
+#		define negate 		__negtf2
+#		define tf_to_sf		__trunctfsf2
+#		define tf_to_df		__trunctfdf2
+#	elif defined FLOAT
 #		define add 		__addsf3
 #		define sub 		__subsf3
 #		define multiply 	__mulsf3
@@ -205,7 +290,8 @@ typedef unsigned int UDItype __attribute__ ((mode (DI)));
 #		define float_to_usi 	__fixunssfsi
 #		define negate 		__negsf2
 #		define sf_to_df		__extendsfdf2
-#else
+#		define sf_to_tf		__extendsftf2
+#	else
 #		define add 		__adddf3
 #		define sub 		__subdf3
 #		define multiply 	__muldf3
@@ -224,6 +310,7 @@ typedef unsigned int UDItype __attribute__ ((mode (DI)));
 #		define float_to_usi 	__fixunsdfsi
 #		define negate 		__negdf2
 #		define df_to_sf		__truncdfsf2
+#		define df_to_tf		__extenddftf2
 #	endif /* FLOAT */
 #endif /* US_SOFTWARE_GOFAST */
 
@@ -241,10 +328,15 @@ typedef unsigned int UDItype __attribute__ ((mode (DI)));
  */
 #define F_D_BITOFF (52+8-(23+7))
 
+#ifdef TMODES
+# define F_T_BITOFF (__LDBL_MANT_DIG__-1+10-(23+7))
+# define D_T_BITOFF (__LDBL_MANT_DIG__-1+10-(52+8))
+#endif
+
 
 #define NORMAL_EXPMIN (-(EXPBIAS)+1)
-#define IMPLICIT_1 (1LL<<(FRACBITS+NGARDS))
-#define IMPLICIT_2 (1LL<<(FRACBITS+1+NGARDS))
+#define IMPLICIT_1 ((fractype)1<<(FRACBITS+NGARDS))
+#define IMPLICIT_2 ((fractype)1<<(FRACBITS+1+NGARDS))
 
 /* common types */
 
@@ -282,7 +374,11 @@ typedef union
   fractype value_raw;
 
 #ifndef FLOAT
+# ifdef qrtrfractype
+  qrtrfractype qwords[4];
+# else
   halffractype words[2];
+# endif
 #endif
 
 #ifdef FLOAT_BIT_ORDER_MISMATCH
@@ -317,82 +413,82 @@ FLO_union_type;
 
 /* Prototypes */
 
-#if defined(L_pack_df) || defined(L_pack_sf)
+#if defined(L_pack_df) || defined(L_pack_sf) || defined(L_pack_tf)
 extern FLO_type pack_d (fp_number_type *);
 #endif
 
 extern void unpack_d (FLO_union_type *, fp_number_type *);
 
-#if defined(L_addsub_sf) || defined(L_addsub_df)
+#if defined(L_addsub_sf) || defined(L_addsub_df) || defined(L_addsub_tf)
 extern FLO_type add (FLO_type, FLO_type);
 extern FLO_type sub (FLO_type, FLO_type);
 #endif
 
-#if defined(L_mul_sf) || defined(L_mul_df)
+#if defined(L_mul_sf) || defined(L_mul_df) || defined(L_mul_tf)
 extern FLO_type multiply (FLO_type, FLO_type);
 #endif
 
-#if defined(L_div_sf) || defined(L_div_df)
+#if defined(L_div_sf) || defined(L_div_df) || defined(L_div_tf)
 extern FLO_type divide (FLO_type, FLO_type);
 #endif
 
 extern int __fpcmp_parts (fp_number_type *, fp_number_type *);
 
-#if defined(L_compare_sf) || defined(L_compare_df)
+#if defined(L_compare_sf) || defined(L_compare_df) || defined(L_compare_tf)
 extern CMPtype compare (FLO_type, FLO_type);
 #endif
 
 #ifndef US_SOFTWARE_GOFAST
 
-#if defined(L_eq_sf) || defined(L_eq_df)
+#if defined(L_eq_sf) || defined(L_eq_df) || defined(L_eq_tf)
 extern CMPtype _eq_f2 (FLO_type, FLO_type);
 #endif
 
-#if defined(L_ne_sf) || defined(L_ne_df)
+#if defined(L_ne_sf) || defined(L_ne_df) || defined(L_ne_tf)
 extern CMPtype _ne_f2 (FLO_type, FLO_type);
 #endif
 
-#if defined(L_gt_sf) || defined(L_gt_df)
+#if defined(L_gt_sf) || defined(L_gt_df) || defined(L_gt_tf)
 extern CMPtype _gt_f2 (FLO_type, FLO_type);
 #endif
 
-#if defined(L_ge_sf) || defined(L_ge_df)
+#if defined(L_ge_sf) || defined(L_ge_df) || defined(L_ge_tf)
 extern CMPtype _ge_f2 (FLO_type, FLO_type);
 #endif
 
-#if defined(L_lt_sf) || defined(L_lt_df)
+#if defined(L_lt_sf) || defined(L_lt_df) || defined(L_lt_tf)
 extern CMPtype _lt_f2 (FLO_type, FLO_type);
 #endif
 
-#if defined(L_le_sf) || defined(L_le_df)
+#if defined(L_le_sf) || defined(L_le_df) || defined(L_le_tf)
 extern CMPtype _le_f2 (FLO_type, FLO_type);
 #endif
 
-#if defined(L_unord_sf) || defined(L_unord_df)
+#if defined(L_unord_sf) || defined(L_unord_df) || defined(L_unord_tf)
 extern CMPtype _unord_f2 (FLO_type, FLO_type);
 #endif
 
 #endif /* ! US_SOFTWARE_GOFAST */
 
-#if defined(L_si_to_sf) || defined(L_si_to_df)
+#if defined(L_si_to_sf) || defined(L_si_to_df) || defined(L_si_to_tf)
 extern FLO_type si_to_float (SItype);
 #endif
 
-#if defined(L_sf_to_si) || defined(L_df_to_si)
+#if defined(L_sf_to_si) || defined(L_df_to_si) || defined(L_tf_to_si)
 extern SItype float_to_si (FLO_type);
 #endif
 
-#if defined(L_sf_to_usi) || defined(L_df_to_usi)
-#ifdef US_SOFTWARE_GOFAST
+#if defined(L_sf_to_usi) || defined(L_df_to_usi) || defined(L_tf_to_usi)
+#if defined(US_SOFTWARE_GOFAST) || defined(L_tf_to_usi)
 extern USItype float_to_usi (FLO_type);
 #endif
 #endif
 
-#if defined(L_usi_to_sf) || defined(L_usi_to_df)
+#if defined(L_usi_to_sf) || defined(L_usi_to_df) || defined(L_usi_to_tf)
 extern FLO_type usi_to_float (USItype);
 #endif
 
-#if defined(L_negate_sf) || defined(L_negate_df)
+#if defined(L_negate_sf) || defined(L_negate_df) || defined(L_negate_tf)
 extern FLO_type negate (FLO_type);
 #endif
 
@@ -405,6 +501,9 @@ extern DFtype __make_dp (fp_class_type, unsigned int, int, UDItype);
 #if defined(L_sf_to_df)
 extern DFtype sf_to_df (SFtype);
 #endif
+#if defined(L_sf_to_tf) && defined(TMODES)
+extern TFtype sf_to_tf (SFtype);
+#endif
 #endif /* ! FLOAT_ONLY */
 #endif /* FLOAT */
 
@@ -416,6 +515,25 @@ extern DFtype __make_dp (fp_class_type, unsigned int, int, UDItype);
 #if defined(L_df_to_sf)
 extern SFtype df_to_sf (DFtype);
 #endif
+#if defined(L_df_to_tf) && defined(TMODES)
+extern TFtype df_to_tf (DFtype);
+#endif
 #endif /* ! FLOAT */
 
+#ifdef TMODES
+extern DFtype __make_dp (fp_class_type, unsigned int, int, UDItype);
+extern TFtype __make_tp (fp_class_type, unsigned int, int, UTItype);
+#ifdef TFLOAT
+#if defined(L_tf_to_sf)
+extern SFtype tf_to_sf (TFtype);
+#endif
+#if defined(L_tf_to_df)
+extern DFtype tf_to_df (TFtype);
+#endif
+#if defined(L_di_to_tf)
+extern TFtype di_to_df (DItype);
+#endif
+#endif /* TFLOAT */
+#endif /* TMODES */
+
 #endif /* ! GCC_FP_BIT_H */
diff --git a/gcc/config/freebsd-spec.h b/gcc/config/freebsd-spec.h
index 3fc4dcc7a41..4d81d80c443 100644
--- a/gcc/config/freebsd-spec.h
+++ b/gcc/config/freebsd-spec.h
@@ -48,30 +48,31 @@ Boston, MA 02111-1307, USA.  */
    || !strcmp ((STR), "soname") || !strcmp ((STR), "defsym") 		\
    || !strcmp ((STR), "assert") || !strcmp ((STR), "dynamic-linker"))
 
-#if FBSD_MAJOR == 6
-#define FBSD_CPP_PREDEFINES \
-  "-D__FreeBSD__=6 -Dunix -D__ELF__ -D__KPRINTF_ATTRIBUTE__ -Asystem=unix -Asystem=bsd -Asystem=FreeBSD"
-#endif
-
-#if FBSD_MAJOR == 5
-#define FBSD_CPP_PREDEFINES \
-  "-D__FreeBSD__=5 -Dunix -D__ELF__ -D__KPRINTF_ATTRIBUTE__ -Asystem=unix -Asystem=bsd -Asystem=FreeBSD"
-#endif
-
-#if FBSD_MAJOR == 4
-#define FBSD_CPP_PREDEFINES \
-  "-D__FreeBSD__=4 -Dunix -D__ELF__ -D__KPRINTF_ATTRIBUTE__ -Asystem=unix -Asystem=bsd -Asystem=FreeBSD"
-#endif
-
-#if FBSD_MAJOR == 3
-#define FBSD_CPP_PREDEFINES \
-  "-D__FreeBSD__=3 -Dunix -D__ELF__ -D__KPRINTF_ATTRIBUTE__ -Asystem=unix -Asystem=bsd -Asystem=FreeBSD"
-#endif
-
-#ifndef FBSD_CPP_PREDEFINES
-#define FBSD_CPP_PREDEFINES \
-  "-D__FreeBSD__   -Dunix -D__ELF__ -D__KPRINTF_ATTRIBUTE__ -Asystem=unix -Asystem=bsd -Asystem=FreeBSD"
-#endif
+#define FBSD_TARGET_OS_CPP_BUILTINS()					\
+  do									\
+    {									\
+	if (FBSD_MAJOR == 6)						\
+	  builtin_define ("__FreeBSD__=6");			       	\
+	else if (FBSD_MAJOR == 5)	       				\
+	  builtin_define ("__FreeBSD__=5");			       	\
+	else if (FBSD_MAJOR == 4)			       		\
+	  builtin_define ("__FreeBSD__=4");			       	\
+	else if (FBSD_MAJOR == 3)	       				\
+	  builtin_define ("__FreeBSD__=3");			       	\
+	else								\
+	  builtin_define ("__FreeBSD__");			       	\
+	builtin_define_std ("unix");					\
+	builtin_define ("__ELF__");					\
+	builtin_define ("__KPRINTF_ATTRIBUTE__");		       	\
+	builtin_assert ("system=unix");					\
+	builtin_assert ("system=bsd");					\
+	builtin_assert ("system=FreeBSD");				\
+	FBSD_TARGET_CPU_CPP_BUILTINS();					\
+    }									\
+  while (0)
+
+/* Define the default FreeBSD-specific per-CPU hook code. */
+#define FBSD_TARGET_CPU_CPP_BUILTINS() do {} while (0)
 
 /* Provide a CPP_SPEC appropriate for FreeBSD.  We just deal with the GCC 
    option `-posix', and PIC issues.  */
diff --git a/gcc/config/freebsd.h b/gcc/config/freebsd.h
index 285f5bf85f9..f71bd8b42d5 100644
--- a/gcc/config/freebsd.h
+++ b/gcc/config/freebsd.h
@@ -42,7 +42,10 @@ Boston, MA 02111-1307, USA.  */
 #define WORD_SWITCH_TAKES_ARG(STR) (FBSD_WORD_SWITCH_TAKES_ARG(STR))
 
 #undef  CPP_PREDEFINES
-#define CPP_PREDEFINES FBSD_CPP_PREDEFINES
+/* Obsolete, do not define it. */
+
+#undef  TARGET_OS_CPP_BUILTINS
+#define TARGET_OS_CPP_BUILTINS() FBSD_TARGET_OS_CPP_BUILTINS()
 
 #undef  CPP_SPEC
 #define CPP_SPEC FBSD_CPP_SPEC
diff --git a/gcc/config/h8300/h8300-protos.h b/gcc/config/h8300/h8300-protos.h
index aac56220393..5d77e426236 100644
--- a/gcc/config/h8300/h8300-protos.h
+++ b/gcc/config/h8300/h8300-protos.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    Hitachi H8/300 version generating coff
-   Copyright (C) 2000 Free SoftwareFoundation, Inc.
+   Copyright (C) 2000, 2002, 2003 Free SoftwareFoundation, Inc.
    Contributed by Steve Chamberlain (sac@cygnus.com),
    Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
 
@@ -59,6 +59,9 @@ extern int small_call_insn_operand PARAMS ((rtx, enum machine_mode));
 extern int jump_address_operand PARAMS ((rtx, enum machine_mode));
 extern int bit_operand PARAMS ((rtx, enum machine_mode));
 extern int bit_memory_operand PARAMS ((rtx, enum machine_mode));
+extern int stack_pointer_operand PARAMS ((rtx, enum machine_mode));
+extern int const_int_gt_2_operand PARAMS ((rtx, enum machine_mode));
+extern int const_int_ge_8_operand PARAMS ((rtx, enum machine_mode));
 extern int bit_operator PARAMS ((rtx, enum machine_mode));
 extern int nshift_operator PARAMS ((rtx, enum machine_mode));
 
diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index eac88bde469..7f49e5c1078 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -1,6 +1,6 @@
 /* Subroutines for insn-output.c for Hitachi H8/300.
    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-   2001, 2002 Free Software Foundation, Inc.
+   2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Steve Chamberlain (sac@cygnus.com),
    Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
 
@@ -782,7 +782,9 @@ general_operand_src (op, mode)
      rtx op;
      enum machine_mode mode;
 {
-  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
+  if (GET_MODE (op) == mode
+      && GET_CODE (op) == MEM
+      && GET_CODE (XEXP (op, 0)) == POST_INC)
     return 1;
   return general_operand (op, mode);
 }
@@ -795,7 +797,9 @@ general_operand_dst (op, mode)
      rtx op;
      enum machine_mode mode;
 {
-  if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == PRE_DEC)
+  if (GET_MODE (op) == mode
+      && GET_CODE (op) == MEM
+      && GET_CODE (XEXP (op, 0)) == PRE_DEC)
     return 1;
   return general_operand (op, mode);
 }
@@ -1700,9 +1704,11 @@ h8300_initial_elimination_offset (from, to)
      int from, to;
 {
   int offset = 0;
+  /* The number of bytes that the return address takes on the stack.  */
+  int pc_size = POINTER_SIZE / BITS_PER_UNIT;
 
   if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
-    offset = UNITS_PER_WORD + frame_pointer_needed * UNITS_PER_WORD;
+    offset = pc_size + frame_pointer_needed * UNITS_PER_WORD;
   else if (from == RETURN_ADDRESS_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
     offset = frame_pointer_needed * UNITS_PER_WORD;
   else
@@ -1719,12 +1725,10 @@ h8300_initial_elimination_offset (from, to)
       offset += round_frame_size (get_frame_size ());
 
       if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
-	offset += UNITS_PER_WORD;	/* Skip saved PC */
+	/* Skip saved PC.  */
+	offset += pc_size;
     }
 
-  if ((TARGET_H8300H || TARGET_H8300S) && TARGET_NORMAL_MODE)
-    offset -= 2;
-
   return offset;
 }
 
@@ -1755,6 +1759,8 @@ notice_update_cc (body, insn)
      rtx body;
      rtx insn;
 {
+  rtx set;
+
   switch (get_attr_cc (insn))
     {
     case CC_NONE:
@@ -1777,7 +1783,10 @@ notice_update_cc (body, insn)
 	 that's ok because alter_cond will change tests to use EQ/NE.  */
       CC_STATUS_INIT;
       cc_status.flags |= CC_OVERFLOW_UNUSABLE | CC_NO_CARRY;
-      cc_status.value1 = recog_data.operand[0];
+      set = single_set (insn);
+      cc_status.value1 = SET_SRC (set);
+      if (SET_DEST (set) != cc0_rtx)
+	cc_status.value2 = SET_DEST (set);
       break;
 
     case CC_SET_ZNV:
@@ -1786,9 +1795,10 @@ notice_update_cc (body, insn)
 	 alter_cond will change tests to use EQ/NE.  */
       CC_STATUS_INIT;
       cc_status.flags |= CC_NO_CARRY;
-      cc_status.value1 = recog_data.operand[0];
-      if (GET_CODE (body) == SET && REG_P (SET_SRC (body)))
-	cc_status.value2 = SET_SRC (body);
+      set = single_set (insn);
+      cc_status.value1 = SET_SRC (set);
+      if (SET_DEST (set) != cc0_rtx)
+	cc_status.value2 = SET_DEST (set);
       break;
 
     case CC_COMPARE:
@@ -1804,6 +1814,40 @@ notice_update_cc (body, insn)
     }
 }
 
+/* Return nonzero if X is a stack pointer.  */
+
+int
+stack_pointer_operand (x, mode)
+     rtx x;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  return x == stack_pointer_rtx;
+}
+
+/* Return nonzero if X is a constant whose absolute value is greater
+   than 2.  */
+
+int
+const_int_gt_2_operand (x, mode)
+     rtx x;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  return (GET_CODE (x) == CONST_INT
+	  && abs (INTVAL (x)) > 2);
+}
+
+/* Return nonzero if X is a constant whose absolute value is no
+   smaller than 8.  */
+
+int
+const_int_ge_8_operand (x, mode)
+     rtx x;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  return (GET_CODE (x) == CONST_INT
+	  && abs (INTVAL (x)) >= 8);
+}
+
 /* Recognize valid operators for bit instructions.  */
 
 int
diff --git a/gcc/config/h8300/h8300.h b/gcc/config/h8300/h8300.h
index 21f3eeb689b..232038df080 100644
--- a/gcc/config/h8300/h8300.h
+++ b/gcc/config/h8300/h8300.h
@@ -1074,7 +1074,7 @@ struct cum_arg
 #define ASM_APP_OFF "; #NO_APP\n"
 
 #define FILE_ASM_OP "\t.file\n"
-#define IDENT_ASM_OP "\t.ident\n"
+#define IDENT_ASM_OP "\t.ident\t"
 
 /* The assembler op to get a word, 2 bytes for the H8/300, 4 for H8/300H.  */
 #define ASM_WORD_OP							\
diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md
index de6edc9b4d2..6d702899572 100644
--- a/gcc/config/h8300/h8300.md
+++ b/gcc/config/h8300/h8300.md
@@ -1,6 +1,6 @@
 ;; GCC machine description for Hitachi H8/300
 ;; Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
-;; 2001, 2002 Free Software Foundation, Inc.
+;; 2001, 2002, 2003 Free Software Foundation, Inc.
 
 ;;   Contributed by Steve Chamberlain (sac@cygnus.com),
 ;;   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
@@ -117,7 +117,8 @@
 		   (plus:HI (reg:HI SP_REG) (const_int -2)))
 	      (set (mem:QI (plus:HI (reg:HI SP_REG) (const_int -1)))
 		   (match_operand:QI 0 "register_operand" "r"))])]
-  "TARGET_H8300"
+  "TARGET_H8300
+   && REGNO (operands[0]) != SP_REG"
   "mov.w\\t%T0,@-r7"
   [(set_attr "length" "2")
    (set_attr "cc" "clobber")])
@@ -127,7 +128,8 @@
 		   (plus:SI (reg:SI SP_REG) (const_int -4)))
 	      (set (mem:QI (plus:SI (reg:SI SP_REG) (const_int -3)))
 		   (match_operand:QI 0 "register_operand" "r"))])]
-  "TARGET_H8300H || TARGET_H8300S"
+  "(TARGET_H8300H || TARGET_H8300S)
+   && REGNO (operands[0]) != SP_REG"
   "mov.l\\t%S0,@-er7"
   [(set_attr "length" "4")
    (set_attr "cc" "clobber")])
@@ -209,7 +211,8 @@
 (define_expand "pushhi1_h8300"
   [(set (mem:HI (pre_dec:HI (reg:HI SP_REG)))
 	(match_operand:HI 0 "register_operand" ""))]
-  "TARGET_H8300"
+  "TARGET_H8300
+   && REGNO (operands[0]) != SP_REG"
   "")
 
 (define_insn "pushhi1_h8300hs"
@@ -217,7 +220,8 @@
 		   (plus:SI (reg:SI SP_REG) (const_int -4)))
 	      (set (mem:HI (plus:SI (reg:SI SP_REG) (const_int -2)))
 		   (match_operand:HI 0 "register_operand" "r"))])]
-  "TARGET_H8300H || TARGET_H8300S"
+  "(TARGET_H8300H || TARGET_H8300S)
+   && REGNO (operands[0]) != SP_REG"
   "mov.l\\t%S0,@-er7"
   [(set_attr "length" "4")
    (set_attr "cc" "clobber")])
@@ -776,6 +780,35 @@
   [(set_attr "length" "2,2,2,4,2")
    (set_attr "cc" "none_0hit,none_0hit,clobber,clobber,set_zn")])
 
+;; This splitter is very important to make the stack adjustment
+;; interrupt-safe.  The combination of add.b and addx is unsafe!
+;;
+;; We apply this split after the peephole2 pass so that we won't end
+;; up creating too many adds/subs when a scratch register is
+;; available, which is actually a common case because stack unrolling
+;; tends to happen immediately after a function call.
+
+(define_split
+  [(set (match_operand:HI 0 "stack_pointer_operand" "")
+	(plus:HI (match_dup 0)
+		 (match_operand 1 "const_int_gt_2_operand" "")))]
+  "TARGET_H8300 && flow2_completed"
+  [(const_int 0)]
+  "split_adds_subs (HImode, operands); DONE;")
+
+(define_peephole2
+  [(match_scratch:HI 2 "r")
+   (set (match_operand:HI 0 "stack_pointer_operand" "")
+	(plus:HI (match_dup 0)
+		 (match_operand:HI 1 "const_int_ge_8_operand" "")))]
+  "TARGET_H8300"
+  [(set (match_dup 2)
+	(match_dup 1))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0)
+		 (match_dup 2)))]
+  "")
+
 (define_insn "*addhi3_h8300hs"
   [(set (match_operand:HI 0 "register_operand" "=r,r,r,r,r")
 	(plus:HI (match_operand:HI 1 "register_operand" "%0,0,0,0,0")
@@ -1800,7 +1833,7 @@
   "@
   mov.b	#0,%x0\;sub.w	%e0,%e0
   mov.b	%R1,%w0\;mov.b	#0,%x0\;sub.w	%e0,%e0"
-  [(set_attr "length" "4,6")
+  [(set_attr "length" "4,8")
    (set_attr "cc" "clobber,clobber")])
 
 (define_expand "zero_extendhisi2"
@@ -1810,7 +1843,7 @@
   "")
 
 ;; %e prints the high part of a CONST_INT, not the low part.  Arggh.
-(define_insn ""
+(define_insn "*zero_extendhisi2_h8300"
   [(set (match_operand:SI 0 "register_operand" "=r,r,r")
 	(zero_extend:SI (match_operand:HI 1 "general_operand_src" "0,i,g>")))]
   "TARGET_H8300"
@@ -1818,7 +1851,7 @@
   sub.w	%e0,%e0
   mov.w	%f1,%f0\;sub.w	%e0,%e0
   mov.w	%e1,%f0\;sub.w	%e0,%e0"
-  [(set_attr "length" "2,4,4")
+  [(set_attr "length" "2,4,6")
    (set_attr "cc" "clobber,clobber,clobber")])
 
 (define_insn ""
@@ -1862,7 +1895,7 @@
   "@
   bld	#7,%w0\;subx	%x0,%x0\;subx	%y0,%y0\;subx	%z0,%z0
   mov.b	%R1,%w0\;bld	#7,%w0\;subx	%x0,%x0\;subx	%y0,%y0\;subx	%z0,%z0"
-  [(set_attr "length" "8,10")
+  [(set_attr "length" "8,12")
    (set_attr "cc" "clobber,clobber")])
 
 (define_expand "extendhisi2"
@@ -1871,14 +1904,14 @@
   ""
   "")
 
-(define_insn ""
+(define_insn "*extendhisi2_h8300"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
 	(sign_extend:SI (match_operand:HI 1 "general_operand_src" "0,g>")))]
   "TARGET_H8300"
   "@
   bld	#7,%x0\;subx	%y0,%y0\;subx	%z0,%z0
   mov.w	%T1,%f0\;bld	#7,%x0\;subx	%y0,%y0\;subx	%z0,%z0"
-  [(set_attr "length" "6,8")
+  [(set_attr "length" "6,10")
    (set_attr "cc" "clobber,clobber")])
 
 (define_insn ""
@@ -2112,7 +2145,7 @@
 ;; Normal loads with a 32bit destination.
 ;;
 
-(define_insn ""
+(define_insn "*extzv_1_r_h8300"
   [(set (match_operand:SI 0 "register_operand" "=&r")
 	(zero_extract:SI (match_operand:HI 1 "register_operand" "r")
 			 (const_int 1)
@@ -2121,9 +2154,9 @@
    && INTVAL (operands[2]) < 16"
   "* return output_simode_bld (0, operands);"
   [(set_attr "cc" "clobber")
-   (set_attr "length" "6")])
+   (set_attr "length" "8")])
 
-(define_insn ""
+(define_insn "*extzv_1_r_h8300hs"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(zero_extract:SI (match_operand:SI 1 "register_operand" "r")
 			 (const_int 1)
@@ -2132,13 +2165,13 @@
    && INTVAL (operands[2]) < 16"
   "* return output_simode_bld (0, operands);"
   [(set_attr "cc" "clobber")
-   (set_attr "length" "6")])
+   (set_attr "length" "8")])
 
 ;;
 ;; Inverted loads with a 32bit destination.
 ;;
 
-(define_insn ""
+(define_insn "*extzv_1_r_inv_h8300"
   [(set (match_operand:SI 0 "register_operand" "=&r")
 	(zero_extract:SI (xor:HI (match_operand:HI 1 "register_operand" "r")
 				 (match_operand:HI 3 "const_int_operand" "n"))
@@ -2149,9 +2182,9 @@
    && (1 << INTVAL (operands[2])) == INTVAL (operands[3])"
   "* return output_simode_bld (1, operands);"
   [(set_attr "cc" "clobber")
-   (set_attr "length" "6")])
+   (set_attr "length" "8")])
 
-(define_insn ""
+(define_insn "*extzv_1_r_inv_h8300hs"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(zero_extract:SI (xor:SI (match_operand:SI 1 "register_operand" "r")
 				 (match_operand 3 "const_int_operand" "n"))
@@ -2162,7 +2195,7 @@
    && (1 << INTVAL (operands[2])) == INTVAL (operands[3])"
   "* return output_simode_bld (1, operands);"
   [(set_attr "cc" "clobber")
-   (set_attr "length" "6")])
+   (set_attr "length" "8")])
 
 (define_expand "insv"
   [(set (zero_extract:HI (match_operand:HI 0 "general_operand" "")
diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h
index f630d1f8450..03e372e04fa 100644
--- a/gcc/config/i386/cygwin.h
+++ b/gcc/config/i386/cygwin.h
@@ -1,6 +1,6 @@
 /* Operating system specific defines to be used when targeting GCC for
    hosting on Windows32, using a Unix style C library and tools.
-   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
    Free Software Foundation, Inc.
 
 This file is part of GNU CC.
@@ -134,7 +134,8 @@ Boston, MA 02111-1307, USA.  */
    by calling the init function from the prologue.  */
 
 #undef LIBGCC_SPEC
-#define LIBGCC_SPEC "%{mno-cygwin: %{mthreads:-lmingwthrd} -lmingw32} -lgcc %{mno-cygwin:-lmoldname -lmsvcrt}"
+#define LIBGCC_SPEC "%{mno-cygwin: %{mthreads:-lmingwthrd} -lmingw32}	\
+  -lgcc %{mno-cygwin:-lmoldname -lmingwex -lmsvcrt}"
 
 /* This macro defines names of additional specifications to put in the specs
    that can be used in various specifications like CC1_SPEC.  Its definition
@@ -306,11 +307,13 @@ do {							\
 #define CHECK_STACK_LIMIT 4000
 
 /* By default, target has a 80387, uses IEEE compatible arithmetic,
-   and returns float values in the 387 and needs stack probes */
-#undef TARGET_SUBTARGET_DEFAULT
+   returns float values in the 387 and needs stack probes.
+   We also align doubles to 64-bits for MSVC default compatibility. */
 
+#undef TARGET_SUBTARGET_DEFAULT
 #define TARGET_SUBTARGET_DEFAULT \
-   (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE) 
+   (MASK_80387 | MASK_IEEE_FP | MASK_FLOAT_RETURNS | MASK_STACK_PROBE \
+    | MASK_ALIGN_DOUBLE)
 
 /* This is how to output an assembler line
    that says to advance the location counter
@@ -397,6 +400,15 @@ extern void i386_pe_unique_section PARAMS ((TREE, int));
 	const0_rtx));							\
     }
 
+/* Java Native Interface (JNI) methods on Win32 are invoked using the
+   stdcall calling convention.  */
+#undef MODIFY_JNI_METHOD_CALL
+#define MODIFY_JNI_METHOD_CALL(MDECL)					      \
+  build_type_attribute_variant ((MDECL),				      \
+			       build_tree_list (get_identifier ("stdcall"),   \
+						NULL))
+
+
 /* External function declarations.  */
 
 extern void i386_pe_record_external_function PARAMS ((const char *));
diff --git a/gcc/config/i386/djgpp.h b/gcc/config/i386/djgpp.h
index a271aa47cde..67807804501 100644
--- a/gcc/config/i386/djgpp.h
+++ b/gcc/config/i386/djgpp.h
@@ -136,6 +136,8 @@ Boston, MA 02111-1307, USA.  */
 #undef ASM_FILE_START
 #define ASM_FILE_START(FILE)						\
   do {									\
+	if (ix86_asm_dialect == ASM_INTEL)				\
+	  fputs ("\t.intel_syntax\n", FILE);				\
 	output_file_directive (FILE, main_input_filename);		\
   } while (0)
 
diff --git a/gcc/config/i386/freebsd-aout.h b/gcc/config/i386/freebsd-aout.h
index a2b616e700b..85e2703f42c 100644
--- a/gcc/config/i386/freebsd-aout.h
+++ b/gcc/config/i386/freebsd-aout.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler for Intel 80386
    running FreeBSD.
-   Copyright (C) 1988, 1992, 1994, 1996, 1997, 1999, 2000, 2002
+   Copyright (C) 1988, 1992, 1994, 1996, 1997, 1999, 2000, 2002, 2003
    Free Software Foundation, Inc.
    Contributed by Poul-Henning Kamp <phk@login.dkuug.dk>
    Continued development by David O'Brien <obrien@NUXI.org>
@@ -94,6 +94,9 @@ Boston, MA 02111-1307, USA.  */
 
 /* Profiling routines, partially copied from i386/osfrose.h.  */
 
+/* Tell final.c that we don't need a label passed to mcount.  */
+#define NO_PROFILE_COUNTERS 1
+
 #undef MCOUNT_NAME
 #define MCOUNT_NAME "mcount"
 #undef PROFILE_COUNT_REGISTER
@@ -112,6 +115,7 @@ Boston, MA 02111-1307, USA.  */
 
 #define TYPE_ASM_OP	"\t.type\t"
 #define SIZE_ASM_OP	"\t.size\t"
+#define SET_ASM_OP	"\t.set\t"
 
 /* The following macro defines the format used to output the second
    operand of the .type assembler directive.  Different svr4 assemblers
@@ -121,6 +125,12 @@ Boston, MA 02111-1307, USA.  */
 
 #define TYPE_OPERAND_FMT	"@%s"
 
+#define HANDLE_SYSV_PRAGMA	1
+
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+	do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
+	fputc ('\n', FILE); } while (0)
+
 /* Write the extra assembler code needed to declare a function's result.
    Most svr4 assemblers don't require any special declaration of the
    result value, but there are exceptions.  */
diff --git a/gcc/config/i386/freebsd64.h b/gcc/config/i386/freebsd64.h
index 699f4c4d344..12ca062301d 100644
--- a/gcc/config/i386/freebsd64.h
+++ b/gcc/config/i386/freebsd64.h
@@ -29,8 +29,7 @@ Boston, MA 02111-1307, USA.  */
 
 #undef	LINK_SPEC
 #define LINK_SPEC "\
-  %{!m32:-m elf_x86_64} \
-  %{m32:-m elf_i386} \
+  %{m32:-m elf_i386_fbsd} \
   %{Wl,*:%*} \
   %{v:-V} \
   %{assert*} %{R*} %{rpath*} %{defsym*} \
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 4afdf668bd8..d3c9d160190 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -190,6 +190,9 @@ extern void x86_function_profiler PARAMS ((FILE *, int));
 #ifdef TREE_CODE
 extern void init_cumulative_args PARAMS ((CUMULATIVE_ARGS *, tree, rtx));
 extern rtx function_arg PARAMS ((CUMULATIVE_ARGS *, enum machine_mode, tree, int));
+extern int function_arg_pass_by_reference PARAMS ((CUMULATIVE_ARGS *,
+						   enum machine_mode,
+						   tree, int));
 extern void function_arg_advance PARAMS ((CUMULATIVE_ARGS *, enum machine_mode,
 					tree, int));
 extern rtx ix86_function_value PARAMS ((tree));
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 42f6d93d3c3..0f8c8e4c3fc 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1,6 +1,6 @@
 /* Subroutines used for code generation on IA-32.
    Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-   2002 Free Software Foundation, Inc.
+   2002, 2003 Free Software Foundation, Inc.
 
 This file is part of GNU CC.
 
@@ -799,6 +799,7 @@ const struct attribute_spec ix86_attribute_table[];
 static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
 static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
 static int ix86_value_regno PARAMS ((enum machine_mode));
+static bool contains_128bit_aligned_vector_p PARAMS ((tree));
 
 #if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
 static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
@@ -911,6 +912,12 @@ static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
+/* The svr4 ABI for the i386 says that records and unions are returned
+   in memory.  */
+#ifndef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 1
+#endif
+
 /* Sometimes certain combinations of command options do not make
    sense on a particular target machine.  You can define a macro
    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
@@ -1021,7 +1028,7 @@ override_options ()
       if (flag_asynchronous_unwind_tables == 2)
 	flag_asynchronous_unwind_tables = 0;
       if (flag_pcc_struct_return == 2)
-	flag_pcc_struct_return = 1;
+	flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
     }
 
 #ifdef SUBTARGET_OVERRIDE_OPTIONS
@@ -2252,6 +2259,9 @@ function_arg (cum, mode, type, named)
 	break;
 
       case BLKmode:
+	if (bytes < 0)
+	  break;
+	/* FALLTHRU */
       case DImode:
       case SImode:
       case HImode:
@@ -2282,6 +2292,90 @@ function_arg (cum, mode, type, named)
   return ret;
 }
 
+/* Return true when TYPE should be 128bit aligned for 32bit argument passing
+   ABI  */
+static bool
+contains_128bit_aligned_vector_p (type)
+     tree type;
+{
+  enum machine_mode mode = TYPE_MODE (type);
+  if (SSE_REG_MODE_P (mode)
+      && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
+    return true;
+  if (TYPE_ALIGN (type) < 128)
+    return false;
+
+  if (AGGREGATE_TYPE_P (type))
+    {
+      /* Walk the agregates recursivly.  */
+      if (TREE_CODE (type) == RECORD_TYPE
+	  || TREE_CODE (type) == UNION_TYPE
+	  || TREE_CODE (type) == QUAL_UNION_TYPE)
+	{
+	  tree field;
+
+	  if (TYPE_BINFO (type) != NULL
+	      && TYPE_BINFO_BASETYPES (type) != NULL)
+	    {
+	      tree bases = TYPE_BINFO_BASETYPES (type);
+	      int n_bases = TREE_VEC_LENGTH (bases);
+	      int i;
+
+	      for (i = 0; i < n_bases; ++i)
+		{
+		  tree binfo = TREE_VEC_ELT (bases, i);
+		  tree type = BINFO_TYPE (binfo);
+
+		  if (contains_128bit_aligned_vector_p (type))
+		    return true;
+		}
+	    }
+	  /* And now merge the fields of structure.   */
+	  for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
+	    {
+	      if (TREE_CODE (field) == FIELD_DECL
+		  && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
+		return true;
+	    }
+	}
+      /* Just for use if some languages passes arrays by value.  */
+      else if (TREE_CODE (type) == ARRAY_TYPE)
+	{
+	  if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
+	    return true;
+	}
+      else
+	abort ();
+    }
+  return false;
+}
+
+/* A C expression that indicates when an argument must be passed by
+   reference.  If nonzero for an argument, a copy of that argument is
+   made in memory and a pointer to the argument is passed instead of
+   the argument itself.  The pointer is passed in whatever way is
+   appropriate for passing a pointer to that type.  */
+
+int
+function_arg_pass_by_reference (cum, mode, type, named)
+     CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+     tree type;
+     int named ATTRIBUTE_UNUSED;
+{
+  if (!TARGET_64BIT)
+    return 0;
+
+  if (type && int_size_in_bytes (type) == -1)
+    {
+      if (TARGET_DEBUG_ARG)
+	fprintf (stderr, "function_arg_pass_by_reference\n");
+      return 1;
+    }
+
+  return 0;
+}
+
 /* Gives the alignment boundary, in bits, of an argument with the specified mode
    and type.   */
 
@@ -2291,14 +2385,34 @@ ix86_function_arg_boundary (mode, type)
      tree type;
 {
   int align;
-  if (!TARGET_64BIT)
-    return PARM_BOUNDARY;
   if (type)
     align = TYPE_ALIGN (type);
   else
     align = GET_MODE_ALIGNMENT (mode);
   if (align < PARM_BOUNDARY)
     align = PARM_BOUNDARY;
+  if (!TARGET_64BIT)
+    {
+      /* i386 ABI defines all arguments to be 4 byte aligned.  We have to
+	 make an exception for SSE modes since these require 128bit
+	 alignment.  
+
+	 The handling here differs from field_alignment.  ICC aligns MMX
+	 arguments to 4 byte boundaries, while structure fields are aligned
+	 to 8 byte boundaries.  */
+      if (!type)
+	{
+	  if (!SSE_REG_MODE_P (mode))
+	    align = PARM_BOUNDARY;
+	}
+      else
+	{
+	  if (!contains_128bit_aligned_vector_p (type))
+	    align = PARM_BOUNDARY;
+	}
+      if (align != PARM_BOUNDARY && !TARGET_SSE)
+	abort();
+    }
   if (align > 128)
     align = 128;
   return align;
@@ -2488,6 +2602,8 @@ ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
   /* Indicate to allocate space on the stack for varargs save area.  */
   ix86_save_varrargs_registers = 1;
 
+  cfun->stack_alignment_needed = 128;
+
   fntype = TREE_TYPE (current_function_decl);
   stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
 	      && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
@@ -2637,6 +2753,7 @@ ix86_va_arg (valist, type)
   rtx lab_false, lab_over = NULL_RTX;
   rtx addr_rtx, r;
   rtx container;
+  int indirect_p = 0;
 
   /* Only 64bit target needs something special.  */
   if (!TARGET_64BIT)
@@ -2656,6 +2773,13 @@ ix86_va_arg (valist, type)
   sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
 
   size = int_size_in_bytes (type);
+  if (size == -1)
+    {
+      /* Passed by reference.  */
+      indirect_p = 1;
+      type = build_pointer_type (type);
+      size = int_size_in_bytes (type);
+    }
   rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 
   container = construct_container (TYPE_MODE (type), type, 0,
@@ -2760,10 +2884,12 @@ ix86_va_arg (valist, type)
 	{
 	  int i;
 	  rtx mem;
+	  rtx x;
 
 	  /* Never use the memory itself, as it has the alias set.  */
-	  addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
-	  mem = gen_rtx_MEM (BLKmode, addr_rtx);
+	  x = XEXP (assign_temp (type, 0, 1, 0), 0);
+	  mem = gen_rtx_MEM (BLKmode, x);
+	  force_operand (x, addr_rtx);
 	  set_mem_alias_set (mem, get_varargs_alias_set ());
 	  set_mem_align (mem, BITS_PER_UNIT);
 
@@ -2846,6 +2972,13 @@ ix86_va_arg (valist, type)
   if (container)
     emit_label (lab_over);
 
+  if (indirect_p)
+    {
+      r = gen_rtx_MEM (Pmode, addr_rtx);
+      set_mem_alias_set (r, get_varargs_alias_set ());
+      emit_move_insn (addr_rtx, r);
+    }
+
   return addr_rtx;
 }
 
@@ -3401,6 +3534,19 @@ non_q_regs_operand (op, mode)
   return NON_QI_REG_P (op);
 }
 
+/*  Return 1 when OP is operand acceptable for standard SSE move.  */
+int
+vector_move_operand (op, mode)
+     rtx op;
+     enum machine_mode mode;
+{
+  if (nonimmediate_operand (op, mode))
+    return 1;
+  if (GET_MODE (op) != mode && mode != VOIDmode)
+    return 0;
+  return (op == CONST0_RTX (GET_MODE (op)));
+}
+
 /* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
    insns.  */
 int
@@ -4225,7 +4371,8 @@ ix86_save_reg (regno, maybe_eh_return)
       && regno == REAL_PIC_OFFSET_TABLE_REGNUM
       && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
 	  || current_function_profile
-	  || current_function_calls_eh_return))
+	  || current_function_calls_eh_return
+	  || current_function_uses_const_pool))
     {
       if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
 	return 0;
@@ -4516,6 +4663,10 @@ ix86_expand_prologue ()
       CALL_INSN_FUNCTION_USAGE (insn)
 	= gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
 			     CALL_INSN_FUNCTION_USAGE (insn));
+
+      /* Don't allow scheduling pass to move insns across __alloca
+         call.  */
+      emit_insn (gen_blockage (const0_rtx));
     }
   if (use_mov)
     {
@@ -7484,12 +7635,12 @@ output_fp_compare (insn, operands, eflags_p, unordered_p)
 	if (unordered_p)
 	  return "ucomiss\t{%1, %0|%0, %1}";
 	else
-	  return "comiss\t{%1, %0|%0, %y}";
+	  return "comiss\t{%1, %0|%0, %1}";
       else
 	if (unordered_p)
 	  return "ucomisd\t{%1, %0|%0, %1}";
 	else
-	  return "comisd\t{%1, %0|%0, %y}";
+	  return "comisd\t{%1, %0|%0, %1}";
     }
 
   if (! STACK_TOP_P (cmp_op0))
@@ -7777,9 +7928,17 @@ ix86_expand_move (mode, operands)
 
 	  if (strict)
 	    ;
-	  else if (GET_CODE (op1) == CONST_DOUBLE
-		   && register_operand (op0, mode))
-	    op1 = validize_mem (force_const_mem (mode, op1));
+	  else if (GET_CODE (op1) == CONST_DOUBLE)
+	    {
+	      op1 = validize_mem (force_const_mem (mode, op1));
+	      if (!register_operand (op0, mode))
+		{
+		  rtx temp = gen_reg_rtx (mode);
+		  emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
+		  emit_move_insn (op0, temp);
+		  return;
+		}
+	    }
 	}
     }
 
@@ -7799,8 +7958,12 @@ ix86_expand_vector_move (mode, operands)
      to handle some of them more efficiently.  */
   if ((reload_in_progress | reload_completed) == 0
       && register_operand (operands[0], mode)
-      && CONSTANT_P (operands[1]))
-    operands[1] = force_const_mem (mode, operands[1]);
+      && CONSTANT_P (operands[1]) && operands[1] != CONST0_RTX (mode))
+    {
+      operands[1] = force_const_mem (mode, operands[1]);
+      emit_move_insn (operands[0], operands[1]);
+      return;
+    }
 
   /* Make operand1 a register if it isn't already.  */
   if (!no_new_pseudos
@@ -9219,11 +9382,11 @@ ix86_expand_int_movcc (operands)
 	  /* On x86_64 the lea instruction operates on Pmode, so we need
 	     to get arithmetics done in proper mode to match.  */
 	  if (diff == 1)
-	    tmp = out;
+	    tmp = copy_rtx (out);
 	  else
 	    {
 	      rtx out1;
-	      out1 = out;
+	      out1 = copy_rtx (out);
 	      tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
 	      nops++;
 	      if (diff & 1)
@@ -9241,9 +9404,9 @@ ix86_expand_int_movcc (operands)
 	      && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
 	    {
 	      if (nops == 1)
-		out = force_operand (tmp, out);
+		out = force_operand (tmp, copy_rtx (out));
 	      else
-		emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
+		emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
 	    }
 	  if (out != operands[0])
 	    emit_move_insn (operands[0], copy_rtx (out));
@@ -9822,15 +9985,24 @@ ix86_split_long_move (operands)
 	 Do an lea to the last part and use only one colliding move.  */
       else if (collisions > 1)
 	{
+	  rtx base;
+
 	  collisions = 1;
-	  emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
-				  XEXP (part[1][0], 0)));
-	  part[1][0] = change_address (part[1][0],
-				       TARGET_64BIT ? DImode : SImode,
-				       part[0][nparts - 1]);
-	  part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
+
+	  base = part[0][nparts - 1];
+
+	  /* Handle the case when the last part isn't valid for lea.
+	     Happens in 64-bit mode storing the 12-byte XFmode.  */
+	  if (GET_MODE (base) != Pmode)
+	    base = gen_rtx_REG (Pmode, REGNO (base));
+
+	  emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
+	  part[1][0] = replace_equiv_address (part[1][0], base);
+	  part[1][1] = replace_equiv_address (part[1][1],
+				      plus_constant (base, UNITS_PER_WORD));
 	  if (nparts == 3)
-	    part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
+	    part[1][2] = replace_equiv_address (part[1][2],
+				      plus_constant (base, 8));
 	}
     }
 
@@ -10973,7 +11145,8 @@ memory_address_length (addr)
       if (disp)
 	{
 	  if (GET_CODE (disp) == CONST_INT
-	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
+	      && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K')
+	      && base)
 	    len = 1;
 	  else
 	    len = 4;
@@ -11036,6 +11209,26 @@ ix86_attr_length_address_default (insn)
      rtx insn;
 {
   int i;
+
+  if (get_attr_type (insn) == TYPE_LEA)
+    {
+      rtx set = PATTERN (insn);
+      if (GET_CODE (set) == SET)
+	;
+      else if (GET_CODE (set) == PARALLEL
+	       && GET_CODE (XVECEXP (set, 0, 0)) == SET)
+	set = XVECEXP (set, 0, 0);
+      else
+	{
+#ifdef ENABLE_CHECKING
+	  abort ();
+#endif
+	  return 0;
+	}
+
+      return memory_address_length (SET_SRC (set));
+    }
+
   extract_insn_cached (insn);
   for (i = recog_data.n_operands - 1; i >= 0; --i)
     if (GET_CODE (recog_data.operand[i]) == MEM)
@@ -11834,7 +12027,8 @@ x86_initialize_trampoline (tramp, fnaddr, cxt)
 
 #define def_builtin(MASK, NAME, TYPE, CODE)			\
 do {								\
-  if ((MASK) & target_flags)					\
+  if ((MASK) & target_flags					\
+      && (!((MASK) & MASK_64BIT) || TARGET_64BIT))		\
     builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD,	\
 		      NULL, NULL_TREE);				\
 } while (0)
@@ -11851,6 +12045,8 @@ struct builtin_description
 
 /* Used for builtins that are enabled both by -msse and -msse2.  */
 #define MASK_SSE1 (MASK_SSE | MASK_SSE2)
+#define MASK_SSE164 (MASK_SSE | MASK_SSE2 | MASK_64BIT)
+#define MASK_SSE264 (MASK_SSE2 | MASK_64BIT)
 
 static const struct builtin_description bdesc_comi[] =
 {
@@ -11933,9 +12129,11 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
   { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
   { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
+  { MASK_MMX, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
   { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
   { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
   { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
+  { MASK_MMX, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
 
   { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
   { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
@@ -11984,6 +12182,7 @@ static const struct builtin_description bdesc_2arg[] =
 
   { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
   { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
+  { MASK_SSE164, CODE_FOR_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
 
   { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
   { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
@@ -12056,11 +12255,11 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
   { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
   { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
   { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
   { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
   { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
-  { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
+  { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
 
   { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
   { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
@@ -12134,6 +12333,7 @@ static const struct builtin_description bdesc_2arg[] =
   { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
+  { MASK_SSE264, CODE_FOR_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
   { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
   { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
 };
@@ -12149,8 +12349,10 @@ static const struct builtin_description bdesc_1arg[] =
 
   { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
   { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
+  { MASK_SSE164, CODE_FOR_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
   { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
   { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
+  { MASK_SSE164, CODE_FOR_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
   { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
@@ -12172,6 +12374,8 @@ static const struct builtin_description bdesc_1arg[] =
 
   { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
   { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
+  { MASK_SSE264, CODE_FOR_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
+  { MASK_SSE264, CODE_FOR_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
 
   { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
   { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
@@ -12197,7 +12401,11 @@ ix86_init_mmx_sse_builtins ()
   size_t i;
 
   tree pchar_type_node = build_pointer_type (char_type_node);
+  tree pcchar_type_node = build_pointer_type (
+			     build_type_variant (char_type_node, 1, 0));
   tree pfloat_type_node = build_pointer_type (float_type_node);
+  tree pcfloat_type_node = build_pointer_type (
+			     build_type_variant (float_type_node, 1, 0));
   tree pv2si_type_node = build_pointer_type (V2SI_type_node);
   tree pv2di_type_node = build_pointer_type (V2DI_type_node);
   tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
@@ -12213,11 +12421,18 @@ ix86_init_mmx_sse_builtins ()
   tree int_ftype_v4sf
     = build_function_type_list (integer_type_node,
 				V4SF_type_node, NULL_TREE);
+  tree int64_ftype_v4sf
+    = build_function_type_list (long_long_integer_type_node,
+				V4SF_type_node, NULL_TREE);
   tree int_ftype_v8qi
     = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
   tree v4sf_ftype_v4sf_int
     = build_function_type_list (V4SF_type_node,
 				V4SF_type_node, integer_type_node, NULL_TREE);
+  tree v4sf_ftype_v4sf_int64
+    = build_function_type_list (V4SF_type_node,
+				V4SF_type_node, long_long_integer_type_node,
+				NULL_TREE);
   tree v4sf_ftype_v4sf_v2si
     = build_function_type_list (V4SF_type_node,
 				V4SF_type_node, V2SI_type_node, NULL_TREE);
@@ -12270,8 +12485,8 @@ ix86_init_mmx_sse_builtins ()
     = build_function_type_list (void_type_node,
 				V8QI_type_node, V8QI_type_node,
 				pchar_type_node, NULL_TREE);
-  tree v4sf_ftype_pfloat
-    = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
+  tree v4sf_ftype_pcfloat
+    = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
   /* @@@ the type is bogus */
   tree v4sf_ftype_v4sf_pv2si
     = build_function_type_list (V4SF_type_node,
@@ -12326,7 +12541,11 @@ ix86_init_mmx_sse_builtins ()
     = build_function_type_list (V2SI_type_node,
 				V2SF_type_node, V2SF_type_node, NULL_TREE);
   tree pint_type_node    = build_pointer_type (integer_type_node);
+  tree pcint_type_node = build_pointer_type (
+			     build_type_variant (integer_type_node, 1, 0));
   tree pdouble_type_node = build_pointer_type (double_type_node);
+  tree pcdouble_type_node = build_pointer_type (
+				build_type_variant (double_type_node, 1, 0));
   tree int_ftype_v2df_v2df
     = build_function_type_list (integer_type_node,
 				V2DF_type_node, V2DF_type_node, NULL_TREE);
@@ -12338,8 +12557,8 @@ ix86_init_mmx_sse_builtins ()
   tree ti_ftype_ti_ti
     = build_function_type_list (intTI_type_node,
 				intTI_type_node, intTI_type_node, NULL_TREE);
-  tree void_ftype_pvoid
-    = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
+  tree void_ftype_pcvoid
+    = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
   tree v2di_ftype_di
     = build_function_type_list (V2DI_type_node,
 				long_long_unsigned_type_node, NULL_TREE);
@@ -12364,9 +12583,16 @@ ix86_init_mmx_sse_builtins ()
     = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
   tree int_ftype_v2df
     = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
+  tree int64_ftype_v2df
+    = build_function_type_list (long_long_integer_type_node,
+		    		V2DF_type_node, NULL_TREE);
   tree v2df_ftype_v2df_int
     = build_function_type_list (V2DF_type_node,
 				V2DF_type_node, integer_type_node, NULL_TREE);
+  tree v2df_ftype_v2df_int64
+    = build_function_type_list (V2DF_type_node,
+				V2DF_type_node, long_long_integer_type_node,
+				NULL_TREE);
   tree v4sf_ftype_v4sf_v2df
     = build_function_type_list (V4SF_type_node,
 				V4SF_type_node, V2DF_type_node, NULL_TREE);
@@ -12394,8 +12620,8 @@ ix86_init_mmx_sse_builtins ()
     = build_function_type_list (void_type_node,
 				V16QI_type_node, V16QI_type_node,
 				pchar_type_node, NULL_TREE);
-  tree v2df_ftype_pdouble
-    = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
+  tree v2df_ftype_pcdouble
+    = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
   tree v2df_ftype_v2df_v2df
     = build_function_type_list (V2DF_type_node,
 				V2DF_type_node, V2DF_type_node, NULL_TREE);
@@ -12454,16 +12680,16 @@ ix86_init_mmx_sse_builtins ()
 				V16QI_type_node, V16QI_type_node, NULL_TREE);
   tree int_ftype_v16qi
     = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
-  tree v16qi_ftype_pchar
-    = build_function_type_list (V16QI_type_node, pchar_type_node, NULL_TREE);
+  tree v16qi_ftype_pcchar
+    = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
   tree void_ftype_pchar_v16qi
     = build_function_type_list (void_type_node,
 			        pchar_type_node, V16QI_type_node, NULL_TREE);
-  tree v4si_ftype_pchar
-    = build_function_type_list (V4SI_type_node, pchar_type_node, NULL_TREE);
-  tree void_ftype_pchar_v4si
+  tree v4si_ftype_pcint
+    = build_function_type_list (V4SI_type_node, pcint_type_node, NULL_TREE);
+  tree void_ftype_pcint_v4si
     = build_function_type_list (void_type_node,
-			        pchar_type_node, V4SI_type_node, NULL_TREE);
+			        pcint_type_node, V4SI_type_node, NULL_TREE);
   tree v2di_ftype_v2di
     = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
 
@@ -12539,8 +12765,6 @@ ix86_init_mmx_sse_builtins ()
   /* Add the remaining MMX insns with somewhat more complicated types.  */
   def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
   def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
-  def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
-  def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
   def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
   def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
   def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
@@ -12566,21 +12790,26 @@ ix86_init_mmx_sse_builtins ()
   def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
   def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
 
+  def_builtin (MASK_SSE1, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
+  def_builtin (MASK_SSE1, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
   def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
   def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
   def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
+  def_builtin (MASK_SSE164, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
   def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
+  def_builtin (MASK_SSE164, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
   def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
   def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
+  def_builtin (MASK_SSE164, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
 
   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
 
   def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
 
-  def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
-  def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
+  def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADAPS);
+  def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
+  def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADSS);
   def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
   def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
   def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
@@ -12648,9 +12877,9 @@ ix86_init_mmx_sse_builtins ()
   def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
   def_builtin (MASK_SSE2, "__builtin_ia32_movdq2q", di_ftype_v2di, IX86_BUILTIN_MOVDQ2Q);
 
-  def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
-  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
-  def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
+  def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADAPD);
+  def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
+  def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADSD);
   def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
   def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
   def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
@@ -12689,33 +12918,36 @@ ix86_init_mmx_sse_builtins ()
 
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
   def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
+  def_builtin (MASK_SSE264, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
+  def_builtin (MASK_SSE264, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
 
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
   def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
 
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
+  def_builtin (MASK_SSE264, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
   def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
 
   def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
   def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
   def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
-  def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
-  def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
+  def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pcdouble, IX86_BUILTIN_LOADPD1);
+  def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADRPD);
   def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
   def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
 
-  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
+  def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
   def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
   def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
 
-  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQA);
-  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pchar, IX86_BUILTIN_LOADDQU);
-  def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pchar, IX86_BUILTIN_LOADD);
+  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqa", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQA);
+  def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
+  def_builtin (MASK_SSE2, "__builtin_ia32_loadd", v4si_ftype_pcint, IX86_BUILTIN_LOADD);
   def_builtin (MASK_SSE2, "__builtin_ia32_storedqa", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQA);
   def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
-  def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pchar_v4si, IX86_BUILTIN_STORED);
+  def_builtin (MASK_SSE2, "__builtin_ia32_stored", void_ftype_pcint_v4si, IX86_BUILTIN_STORED);
   def_builtin (MASK_SSE2, "__builtin_ia32_movq", v2di_ftype_v2di, IX86_BUILTIN_MOVQ);
 
   def_builtin (MASK_SSE1, "__builtin_ia32_setzero128", v2di_ftype_void, IX86_BUILTIN_CLRTI);
@@ -12795,6 +13027,13 @@ ix86_expand_binop_builtin (icode, arglist, target)
       || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
     target = gen_reg_rtx (tmode);
 
+  if (GET_MODE (op1) == SImode && mode1 == TImode)
+    {
+      rtx x = gen_reg_rtx (V4SImode);
+      emit_insn (gen_sse2_loadd (x, op1));
+      op1 = gen_lowpart (TImode, x);
+    }
+
   /* In case the insn wants input operands in modes different from
      the result, abort.  */
   if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
@@ -12837,9 +13076,7 @@ ix86_expand_store_builtin (icode, arglist)
     op1 = safe_vector_operand (op1, mode1);
 
   op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
-
-  if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
-    op1 = copy_to_mode_reg (mode1, op1);
+  op1 = copy_to_mode_reg (mode1, op1);
 
   pat = GEN_FCN (icode) (op0, op1);
   if (pat)
@@ -13828,9 +14065,10 @@ ix86_hard_regno_mode_ok (regno, mode)
   if (FP_REGNO_P (regno))
     return VALID_FP_MODE_P (mode);
   if (SSE_REGNO_P (regno))
-    return VALID_SSE_REG_MODE (mode);
+    return (TARGET_SSE ? VALID_SSE_REG_MODE (mode) : 0);
   if (MMX_REGNO_P (regno))
-    return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
+    return (TARGET_MMX
+	    ? VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode) : 0);
   /* We handle both integer and floats in the general purpose registers.
      In future we should be able to handle vector modes as well.  */
   if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
@@ -14299,7 +14537,7 @@ x86_function_profiler (file, labelno)
   else
     {
 #ifndef NO_PROFILE_COUNTERS
-      fprintf (file, "\tmovl\t$%sP%d,%%$s\n", LPREFIX, labelno,
+      fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
 	       PROFILE_COUNT_REGISTER);
 #endif
       fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 493a2b5bf9c..ffca44fd57b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -487,9 +487,12 @@ extern int x86_prefetch_sse;
       if (TARGET_64BIT)						\
 	{							\
 	  builtin_assert ("cpu=x86_64");			\
-	  builtin_assert ("machine=x86_64");			\
+	  builtin_define ("__amd64");				\
+	  builtin_define ("__amd64__");				\
 	  builtin_define ("__x86_64");				\
 	  builtin_define ("__x86_64__");			\
+	  builtin_define ("__amd64");				\
+	  builtin_define ("__amd64__");				\
 	}							\
       else							\
 	{							\
@@ -1047,7 +1050,7 @@ do {									\
 	    && (TARGET_64BIT || !TARGET_PARTIAL_REG_STALL))	\
         || ((MODE1) == DImode && TARGET_64BIT))			\
        && ((MODE2) == HImode || (MODE2) == SImode		\
-	   || ((MODE1) == QImode				\
+	   || ((MODE2) == QImode				\
 	       && (TARGET_64BIT || !TARGET_PARTIAL_REG_STALL))	\
 	   || ((MODE2) == DImode && TARGET_64BIT))))
 
@@ -1522,6 +1525,20 @@ enum reg_class
    || ((CLASS) == SIREG)						\
    || ((CLASS) == DIREG))
 
+/* Return a class of registers that cannot change FROM mode to TO mode.
+  
+   x87 registers can't do subreg as all values are reformated to extended
+   precision.  XMM registers does not support with nonzero offsets equal
+   to 4, 8 and 12 otherwise valid for integer registers. Since we can't
+   determine these, prohibit all nonparadoxical subregs changing size.  */
+
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)	\
+  (GET_MODE_SIZE (TO) < GET_MODE_SIZE (FROM)		\
+   ? reg_classes_intersect_p (FLOAT_SSE_REGS, (CLASS))	\
+     || MAYBE_MMX_CLASS_P (CLASS) 			\
+   : GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)		\
+   ? reg_classes_intersect_p (FLOAT_REGS, (CLASS)) : 0)
+
 /* A C statement that adds to CLOBBERS any hard regs the port wishes
    to automatically clobber for all asms.
 
@@ -1716,17 +1733,28 @@ typedef struct ix86_args {
 
 #define FUNCTION_ARG_PARTIAL_NREGS(CUM, MODE, TYPE, NAMED) 0
 
+/* A C expression that indicates when an argument must be passed by
+   reference.  If nonzero for an argument, a copy of that argument is
+   made in memory and a pointer to the argument is passed instead of
+   the argument itself.  The pointer is passed in whatever way is
+   appropriate for passing a pointer to that type.  */
+
+#define FUNCTION_ARG_PASS_BY_REFERENCE(CUM, MODE, TYPE, NAMED) \
+  function_arg_pass_by_reference(&CUM, MODE, TYPE, NAMED)
+
 /* If PIC, we cannot make sibling calls to global functions
    because the PLT requires %ebx live.
-   If we are returning floats on the register stack, we cannot make
-   sibling calls to functions that return floats.  (The stack adjust
-   instruction will wind up after the sibcall jump, and not be executed.) */
+   If we are returning floats on the 80387 register stack, we cannot
+   make a sibcall from a function that doesn't return a float to a
+   function that does or, conversely, from a function that does return
+   a float to a function that doesn't; the necessary stack adjustment
+   would not be executed.  */
 #define FUNCTION_OK_FOR_SIBCALL(DECL)					\
   ((DECL)								\
    && (! flag_pic || ! TREE_PUBLIC (DECL))				\
    && (! TARGET_FLOAT_RETURNS_IN_80387					\
-       || ! FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (DECL))))	\
-       || FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (cfun->decl))))))
+       || (FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (DECL))))	\
+           == FLOAT_MODE_P (TYPE_MODE (TREE_TYPE (TREE_TYPE (cfun->decl)))))))
 
 /* Perform any needed actions needed for a function that is receiving a
    variable number of arguments.
@@ -2068,9 +2096,12 @@ enum ix86_builtins
   IX86_BUILTIN_CVTPI2PS,
   IX86_BUILTIN_CVTPS2PI,
   IX86_BUILTIN_CVTSI2SS,
+  IX86_BUILTIN_CVTSI642SS,
   IX86_BUILTIN_CVTSS2SI,
+  IX86_BUILTIN_CVTSS2SI64,
   IX86_BUILTIN_CVTTPS2PI,
   IX86_BUILTIN_CVTTSS2SI,
+  IX86_BUILTIN_CVTTSS2SI64,
 
   IX86_BUILTIN_MAXPS,
   IX86_BUILTIN_MAXSS,
@@ -2116,6 +2147,7 @@ enum ix86_builtins
   IX86_BUILTIN_PADDB,
   IX86_BUILTIN_PADDW,
   IX86_BUILTIN_PADDD,
+  IX86_BUILTIN_PADDQ,
   IX86_BUILTIN_PADDSB,
   IX86_BUILTIN_PADDSW,
   IX86_BUILTIN_PADDUSB,
@@ -2123,6 +2155,7 @@ enum ix86_builtins
   IX86_BUILTIN_PSUBB,
   IX86_BUILTIN_PSUBW,
   IX86_BUILTIN_PSUBD,
+  IX86_BUILTIN_PSUBQ,
   IX86_BUILTIN_PSUBSB,
   IX86_BUILTIN_PSUBSW,
   IX86_BUILTIN_PSUBUSB,
@@ -2327,11 +2360,14 @@ enum ix86_builtins
 
   IX86_BUILTIN_CVTPI2PD,
   IX86_BUILTIN_CVTSI2SD,
+  IX86_BUILTIN_CVTSI642SD,
 
   IX86_BUILTIN_CVTSD2SI,
+  IX86_BUILTIN_CVTSD2SI64,
   IX86_BUILTIN_CVTSD2SS,
   IX86_BUILTIN_CVTSS2SD,
   IX86_BUILTIN_CVTTSD2SI,
+  IX86_BUILTIN_CVTTSD2SI64,
 
   IX86_BUILTIN_CVTPS2DQ,
   IX86_BUILTIN_CVTPS2PD,
@@ -3286,6 +3322,7 @@ do {						\
   {"register_and_not_any_fp_reg_operand", {REG}},			\
   {"fp_register_operand", {REG}},					\
   {"register_and_not_fp_reg_operand", {REG}},				\
+  {"vector_move_operand", {CONST_VECTOR, SUBREG, REG, MEM}},		\
 
 /* A list of predicates that do special things with modes, and so
    should not elicit warnings for VOIDmode match_operand.  */
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index befbfe49569..edbb7163646 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1,5 +1,6 @@
 ;; GCC machine description for IA-32 and x86-64.
-;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
+;; 2001, 2002, 2003
 ;; Free Software Foundation, Inc.
 ;; Mostly by William Schelter.
 ;; x86_64 support added by Jan Hubicka
@@ -267,6 +268,8 @@
 (define_attr "length" ""
   (cond [(eq_attr "type" "other,multi,fistp")
 	   (const_int 16)
+	 (eq_attr "type" "fcmp")
+	   (const_int 4)
 	 (eq_attr "unit" "i387")
 	   (plus (const_int 2)
 		 (plus (attr "prefix_data16")
@@ -1099,25 +1102,20 @@
    (set_attr "mode" "SI")
    (set_attr "length_immediate" "1")])
 
-; The first alternative is used only to compute proper length of instruction.
-; Reload's algorithm does not take into account the cost of spill instructions
-; needed to free register in given class, so avoid it from choosing the first
-; alternative when eax is not available.
-
 (define_insn "*movsi_1"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y")
-	(match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,m,!*y,!rm,!*y,!*Y,!*Y,!rm")
+	(match_operand:SI 1 "general_operand" "rinm,rin,rm,*y,*y,*Y,rm,*Y"))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_SSEMOV:
-      if (get_attr_mode (insn) == TImode)
+      if (get_attr_mode (insn) == MODE_TI)
         return "movdqa\t{%1, %0|%0, %1}";
       return "movd\t{%1, %0|%0, %1}";
 
     case TYPE_MMXMOV:
-      if (get_attr_mode (insn) == DImode)
+      if (get_attr_mode (insn) == MODE_DI)
 	return "movq\t{%1, %0|%0, %1}";
       return "movd\t{%1, %0|%0, %1}";
 
@@ -1131,17 +1129,16 @@
     }
 }
   [(set (attr "type")
-     (cond [(eq_attr "alternative" "4,5,6")
+     (cond [(eq_attr "alternative" "2,3,4")
 	      (const_string "mmxmov")
-	    (eq_attr "alternative" "7,8,9")
+	    (eq_attr "alternative" "5,6,7")
 	      (const_string "ssemov")
 	    (and (ne (symbol_ref "flag_pic") (const_int 0))
 		 (match_operand:SI 1 "symbolic_operand" ""))
 	      (const_string "lea")
 	   ]
 	   (const_string "imov")))
-   (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*")
-   (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")])
+   (set_attr "mode" "SI,SI,SI,SI,DI,TI,SI,SI")])
 
 ;; Stores and loads of ax to arbitary constant address.
 ;; We fake an second form of instruction to force reload to load address
@@ -1214,14 +1211,9 @@
   [(set_attr "type" "push")
    (set_attr "mode" "QI")])
 
-; The first alternative is used only to compute proper length of instruction.
-; Reload's algorithm does not take into account the cost of spill instructions
-; needed to free register in given class, so avoid it from choosing the first
-; alternative when eax is not available.
-
 (define_insn "*movhi_1"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=*?a,r,r,*?a,r,m")
-	(match_operand:HI 1 "general_operand" "i,r,rn,rm,rm,rn"))]
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:HI 1 "general_operand" "r,rn,rm,rn"))]
   "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM"
 {
   switch (get_attr_type (insn))
@@ -1238,36 +1230,35 @@
     }
 }
   [(set (attr "type")
-     (cond [(and (eq_attr "alternative" "0,1")
+     (cond [(and (eq_attr "alternative" "0")
 		 (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
 			  (const_int 0))
 		      (eq (symbol_ref "TARGET_HIMODE_MATH")
 			  (const_int 0))))
 	      (const_string "imov")
-	    (and (eq_attr "alternative" "2,3,4")
+	    (and (eq_attr "alternative" "1,2")
 		 (match_operand:HI 1 "aligned_operand" ""))
 	      (const_string "imov")
 	    (and (ne (symbol_ref "TARGET_MOVX")
 		     (const_int 0))
-		 (eq_attr "alternative" "0,1,3,4"))
+		 (eq_attr "alternative" "0,2"))
 	      (const_string "imovx")
 	   ]
 	   (const_string "imov")))
     (set (attr "mode")
       (cond [(eq_attr "type" "imovx")
 	       (const_string "SI")
-	     (and (eq_attr "alternative" "2,3,4")
+	     (and (eq_attr "alternative" "1,2")
 		  (match_operand:HI 1 "aligned_operand" ""))
 	       (const_string "SI")
-	     (and (eq_attr "alternative" "0,1")
+	     (and (eq_attr "alternative" "0")
 		  (ior (eq (symbol_ref "TARGET_PARTIAL_REG_STALL")
 			   (const_int 0))
 		       (eq (symbol_ref "TARGET_HIMODE_MATH")
 			   (const_int 0))))
 	       (const_string "SI")
 	    ]
-	    (const_string "HI")))
-   (set_attr "modrm" "0,*,*,0,*,*")])
+	    (const_string "HI")))])
 
 ;; Stores and loads of ax to arbitary constant address.
 ;; We fake an second form of instruction to force reload to load address
@@ -1488,7 +1479,7 @@
 (define_expand "movstrictqi"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" ""))
 	(match_operand:QI 1 "general_operand" ""))]
-  "! TARGET_PARTIAL_REG_STALL"
+  "! TARGET_PARTIAL_REG_STALL || optimize_size"
 {
   /* Don't generate memory->memory moves, go through a register.  */
   if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
@@ -1498,7 +1489,7 @@
 (define_insn "*movstrictqi_1"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm,q"))
 	(match_operand:QI 1 "general_operand" "*qn,m"))]
-  "! TARGET_PARTIAL_REG_STALL
+  "(! TARGET_PARTIAL_REG_STALL || optimize_size)
    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
   "mov{b}\t{%1, %0|%0, %1}"
   [(set_attr "type" "imov")
@@ -12839,9 +12830,9 @@
    (set_attr "modrm" "0")
    (set (attr "length")
 	   (if_then_else (and (ge (minus (match_dup 0) (pc))
-				  (const_int -128))
+				  (const_int -126))
 			      (lt (minus (match_dup 0) (pc))
-				  (const_int 124)))
+				  (const_int 128)))
 	     (const_int 2)
 	     (const_int 6)))])
 
@@ -12857,9 +12848,9 @@
    (set_attr "modrm" "0")
    (set (attr "length")
 	   (if_then_else (and (ge (minus (match_dup 0) (pc))
-				  (const_int -128))
+				  (const_int -126))
 			      (lt (minus (match_dup 0) (pc))
-				  (const_int 124)))
+				  (const_int 128)))
 	     (const_int 2)
 	     (const_int 6)))])
 
@@ -13124,9 +13115,9 @@
   [(set_attr "type" "ibr")
    (set (attr "length")
 	   (if_then_else (and (ge (minus (match_dup 0) (pc))
-				  (const_int -128))
+				  (const_int -126))
 			      (lt (minus (match_dup 0) (pc))
-				  (const_int 124)))
+				  (const_int 128)))
 	     (const_int 2)
 	     (const_int 5)))
    (set_attr "modrm" "0")])
@@ -13250,9 +13241,9 @@
    (set (attr "length")
 	(if_then_else (and (eq_attr "alternative" "0")
 			   (and (ge (minus (match_dup 0) (pc))
-			            (const_int -128))
+			            (const_int -126))
 			        (lt (minus (match_dup 0) (pc))
-			            (const_int 124))))
+			            (const_int 128))))
 		      (const_int 2)
 		      (const_int 16)))
    ;; We don't know the type before shorten branches.  Optimistically expect
@@ -13616,11 +13607,10 @@
   "ix86_expand_epilogue (0); DONE;")
 
 (define_expand "eh_return"
-  [(use (match_operand 0 "register_operand" ""))
-   (use (match_operand 1 "register_operand" ""))]
+  [(use (match_operand 0 "register_operand" ""))]
   ""
 {
-  rtx tmp, sa = operands[0], ra = operands[1];
+  rtx tmp, sa = EH_RETURN_STACKADJ_RTX, ra = operands[0];
 
   /* Tricky bit: we write the address of the handler to which we will
      be returning into someone else's stack frame, one word below the
@@ -13682,7 +13672,7 @@
 
 (define_expand "ffssi2"
   [(set (match_operand:SI 0 "nonimmediate_operand" "") 
-	(ffs:SI (match_operand:SI 1 "general_operand" "")))]
+	(ffs:SI (match_operand:SI 1 "nonimmediate_operand" "")))]
   ""
 {
   rtx out = gen_reg_rtx (SImode), tmp = gen_reg_rtx (SImode);
@@ -14823,7 +14813,7 @@
 (define_insn "cosxf2"
   [(set (match_operand:XF 0 "register_operand" "=f")
 	(unspec:XF [(match_operand:XF 1 "register_operand" "0")] UNSPEC_COS))]
-  "! TARGET_NO_FANCY_MATH_387 && TARGET_80387
+  "!TARGET_64BIT && ! TARGET_NO_FANCY_MATH_387 && TARGET_80387
    && flag_unsafe_math_optimizations"
   "fcos"
   [(set_attr "type" "fpspc")
@@ -16734,7 +16724,7 @@
 (define_split
   [(set (match_operand 0 "register_operand" "")
 	(if_then_else (match_operator 1 "comparison_operator"
-			[(match_operand 4 "register_operand" "")
+			[(match_operand 4 "nonimmediate_operand" "")
 			 (match_operand 5 "nonimmediate_operand" "")])
 		      (match_operand 2 "nonmemory_operand" "")
 		      (match_operand 3 "nonmemory_operand" "")))]
@@ -16746,13 +16736,16 @@
 					    (subreg:TI (match_dup 7) 0)))]
 {
   PUT_MODE (operands[1], GET_MODE (operands[0]));
-  if (!sse_comparison_operator (operands[1], VOIDmode))
+  if (!sse_comparison_operator (operands[1], VOIDmode)
+      || !rtx_equal_p (operands[0], operands[4]))
     {
       rtx tmp = operands[5];
       operands[5] = operands[4];
       operands[4] = tmp;
       PUT_CODE (operands[1], swap_condition (GET_CODE (operands[1])));
     }
+  if (!rtx_equal_p (operands[0], operands[4]))
+    abort ();
   if (const0_operand (operands[2], GET_MODE (operands[0])))
     {
       operands[7] = operands[3];
@@ -16853,6 +16846,10 @@
      operands[2] = gen_lowpart (SImode, operands[2]);
    PUT_MODE (operands[3], SImode);")
 
+; Promote the QImode tests, as i386 has encoding of the AND
+; instruction with 32-bit sign-extended immediate and thus the
+; instruction size is unchanged, except in the %eax case for
+; which it is increased by one byte, hence the ! optimize_size.
 (define_split
   [(set (reg 17)
 	(compare (and (match_operand 1 "aligned_operand" "")
@@ -16861,12 +16858,11 @@
    (set (match_operand 0 "register_operand" "")
 	(and (match_dup 1) (match_dup 2)))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
-   && ix86_match_ccmode (insn, CCNOmode)
-   && (GET_MODE (operands[0]) == HImode
-       || (GET_MODE (operands[0]) == QImode 
-	   /* Ensure that the operand will remain sign extended immediate.  */
-	   && INTVAL (operands[2]) >= 0
-	   && (TARGET_PROMOTE_QImode || optimize_size)))"
+   /* Ensure that the operand will remain sign-extended immediate.  */
+   && ix86_match_ccmode (insn, INTVAL (operands[2]) >= 0 ? CCNOmode : CCZmode)
+   && ! optimize_size
+   && ((GET_MODE (operands[0]) == HImode && ! TARGET_FAST_PREFIX)
+       || (GET_MODE (operands[0]) == QImode && TARGET_PROMOTE_QImode))"
   [(parallel [(set (reg:CCNO 17)
 		   (compare:CCNO (and:SI (match_dup 1) (match_dup 2))
 			         (const_int 0)))
@@ -16879,17 +16875,20 @@
    operands[0] = gen_lowpart (SImode, operands[0]);
    operands[1] = gen_lowpart (SImode, operands[1]);")
 
-; Don't promote the QImode tests, as i386 don't have encoding of
-; the test instruction with 32bit sign extended immediate and thus
-; the code grows.
+; Don't promote the QImode tests, as i386 doesn't have encoding of
+; the TEST instruction with 32-bit sign-extended immediate and thus
+; the instruction size would at least double, which is not what we
+; want even with ! optimize_size.
 (define_split
   [(set (reg 17)
 	(compare (and (match_operand:HI 0 "aligned_operand" "")
 		      (match_operand:HI 1 "const_int_operand" ""))
 		 (const_int 0)))]
   "! TARGET_PARTIAL_REG_STALL && reload_completed
-   && ix86_match_ccmode (insn, CCNOmode)
-   && GET_MODE (operands[0]) == HImode"
+   /* Ensure that the operand will remain sign-extended immediate.  */
+   && ix86_match_ccmode (insn, INTVAL (operands[1]) >= 0 ? CCNOmode : CCZmode)
+   && ! TARGET_FAST_PREFIX
+   && ! optimize_size"
   [(set (reg:CCNO 17)
 	(compare:CCNO (and:SI (match_dup 0) (match_dup 1))
 		      (const_int 0)))]
@@ -17848,67 +17847,92 @@
 ;; Moves for SSE/MMX regs.
 
 (define_insn "movv4sf_internal"
-  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
-	(match_operand:V4SF 1 "nonimmediate_operand" "xm,x"))]
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:V4SF 1 "vector_move_operand" "C,xm,x"))]
   "TARGET_SSE"
   ;; @@@ let's try to use movaps here.
-  "movaps\t{%1, %0|%0, %1}"
+  "@
+   xorps\t%0, %0
+   movaps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V4SF")])
 
 (define_insn "movv4si_internal"
-  [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,m")
-	(match_operand:V4SI 1 "nonimmediate_operand" "xm,x"))]
+  [(set (match_operand:V4SI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:V4SI 1 "vector_move_operand" "C,xm,x"))]
   "TARGET_SSE"
   ;; @@@ let's try to use movaps here.
-  "movaps\t{%1, %0|%0, %1}"
+  "@
+   xorps\t%0, %0
+   movaps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V4SF")])
 
 (define_insn "movv2di_internal"
-  [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,m")
-	(match_operand:V2DI 1 "nonimmediate_operand" "xm,x"))]
+  [(set (match_operand:V2DI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:V2DI 1 "vector_move_operand" "C,xm,x"))]
   "TARGET_SSE"
   ;; @@@ let's try to use movaps here.
-  "movdqa\t{%1, %0|%0, %1}"
+  "@
+   pxor\t%0, %0
+   movdqa\t{%1, %0|%0, %1} 
+   movdqa\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V4SF")])
 
 (define_insn "movv8qi_internal"
-  [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,m")
-	(match_operand:V8QI 1 "nonimmediate_operand" "ym,y"))]
-  "TARGET_MMX"
-  "movq\t{%1, %0|%0, %1}"
+  [(set (match_operand:V8QI 0 "nonimmediate_operand" "=y,y,m")
+	(match_operand:V8QI 1 "vector_move_operand" "C,ym,y"))]
+  "TARGET_MMX
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "@
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}"
   [(set_attr "type" "mmxmov")
    (set_attr "mode" "DI")])
 
 (define_insn "movv4hi_internal"
-  [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,m")
-	(match_operand:V4HI 1 "nonimmediate_operand" "ym,y"))]
-  "TARGET_MMX"
-  "movq\t{%1, %0|%0, %1}"
+  [(set (match_operand:V4HI 0 "nonimmediate_operand" "=y,y,m")
+	(match_operand:V4HI 1 "vector_move_operand" "C,ym,y"))]
+  "TARGET_MMX
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "@
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}"
   [(set_attr "type" "mmxmov")
    (set_attr "mode" "DI")])
 
 (define_insn "movv2si_internal"
-  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,m")
-	(match_operand:V2SI 1 "nonimmediate_operand" "ym,y"))]
-  "TARGET_MMX"
-  "movq\t{%1, %0|%0, %1}"
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "=y,y,m")
+	(match_operand:V2SI 1 "vector_move_operand" "C,ym,y"))]
+  "TARGET_MMX
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "@
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}"
   [(set_attr "type" "mmxcvt")
    (set_attr "mode" "DI")])
 
 (define_insn "movv2sf_internal"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,m")
-        (match_operand:V2SF 1 "nonimmediate_operand" "ym,y"))]
-  "TARGET_3DNOW"
-  "movq\\t{%1, %0|%0, %1}"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=y,y,m")
+        (match_operand:V2SF 1 "vector_move_operand" "C,ym,y"))]
+  "TARGET_3DNOW
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "@
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}"
   [(set_attr "type" "mmxcvt")
    (set_attr "mode" "DI")])
 
 (define_expand "movti"
-  [(set (match_operand:TI 0 "general_operand" "")
-	(match_operand:TI 1 "general_operand" ""))]
+  [(set (match_operand:TI 0 "nonimmediate_operand" "")
+	(match_operand:TI 1 "nonimmediate_operand" ""))]
   "TARGET_SSE || TARGET_64BIT"
 {
   if (TARGET_64BIT)
@@ -17919,35 +17943,44 @@
 })
 
 (define_insn "movv2df_internal"
-  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-	(match_operand:V2DF 1 "nonimmediate_operand" "xm,x"))]
-  "TARGET_SSE2"
-  ;; @@@ let's try to use movaps here.
-  "movapd\t{%1, %0|%0, %1}"
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:V2DF 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE2
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "@
+   xorpd\t%0, %0
+   movapd\t{%1, %0|%0, %1}
+   movapd\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V2DF")])
 
 (define_insn "movv8hi_internal"
-  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,m")
-	(match_operand:V8HI 1 "nonimmediate_operand" "xm,x"))]
-  "TARGET_SSE2"
-  ;; @@@ let's try to use movaps here.
-  "movaps\t{%1, %0|%0, %1}"
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:V8HI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE2
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "@
+   xorps\t%0, %0
+   movaps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V4SF")])
 
 (define_insn "movv16qi_internal"
-  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
-	(match_operand:V16QI 1 "nonimmediate_operand" "xm,x"))]
-  "TARGET_SSE2"
-  ;; @@@ let's try to use movaps here.
-  "movaps\t{%1, %0|%0, %1}"
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,x,m")
+	(match_operand:V16QI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE2
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "@
+   xorps\t%0, %0
+   movaps\t{%1, %0|%0, %1}
+   movaps\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V4SF")])
 
 (define_expand "movv2df"
-  [(set (match_operand:V2DF 0 "general_operand" "")
-	(match_operand:V2DF 1 "general_operand" ""))]
+  [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
+	(match_operand:V2DF 1 "nonimmediate_operand" ""))]
   "TARGET_SSE2"
 {
   ix86_expand_vector_move (V2DFmode, operands);
@@ -17955,8 +17988,8 @@
 })
 
 (define_expand "movv8hi"
-  [(set (match_operand:V8HI 0 "general_operand" "")
-	(match_operand:V8HI 1 "general_operand" ""))]
+  [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
+	(match_operand:V8HI 1 "nonimmediate_operand" ""))]
   "TARGET_SSE2"
 {
   ix86_expand_vector_move (V8HImode, operands);
@@ -17964,8 +17997,8 @@
 })
 
 (define_expand "movv16qi"
-  [(set (match_operand:V16QI 0 "general_operand" "")
-	(match_operand:V16QI 1 "general_operand" ""))]
+  [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
+	(match_operand:V16QI 1 "nonimmediate_operand" ""))]
   "TARGET_SSE2"
 {
   ix86_expand_vector_move (V16QImode, operands);
@@ -17973,8 +18006,8 @@
 })
 
 (define_expand "movv4sf"
-  [(set (match_operand:V4SF 0 "general_operand" "")
-	(match_operand:V4SF 1 "general_operand" ""))]
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+	(match_operand:V4SF 1 "nonimmediate_operand" ""))]
   "TARGET_SSE"
 {
   ix86_expand_vector_move (V4SFmode, operands);
@@ -17982,8 +18015,8 @@
 })
 
 (define_expand "movv4si"
-  [(set (match_operand:V4SI 0 "general_operand" "")
-	(match_operand:V4SI 1 "general_operand" ""))]
+  [(set (match_operand:V4SI 0 "nonimmediate_operand" "")
+	(match_operand:V4SI 1 "nonimmediate_operand" ""))]
   "TARGET_SSE"
 {
   ix86_expand_vector_move (V4SImode, operands);
@@ -17991,8 +18024,8 @@
 })
 
 (define_expand "movv2di"
-  [(set (match_operand:V2DI 0 "general_operand" "")
-	(match_operand:V2DI 1 "general_operand" ""))]
+  [(set (match_operand:V2DI 0 "nonimmediate_operand" "")
+	(match_operand:V2DI 1 "nonimmediate_operand" ""))]
   "TARGET_SSE"
 {
   ix86_expand_vector_move (V2DImode, operands);
@@ -18000,8 +18033,8 @@
 })
 
 (define_expand "movv2si"
-  [(set (match_operand:V2SI 0 "general_operand" "")
-	(match_operand:V2SI 1 "general_operand" ""))]
+  [(set (match_operand:V2SI 0 "nonimmediate_operand" "")
+	(match_operand:V2SI 1 "nonimmediate_operand" ""))]
   "TARGET_MMX"
 {
   ix86_expand_vector_move (V2SImode, operands);
@@ -18009,8 +18042,8 @@
 })
 
 (define_expand "movv4hi"
-  [(set (match_operand:V4HI 0 "general_operand" "")
-	(match_operand:V4HI 1 "general_operand" ""))]
+  [(set (match_operand:V4HI 0 "nonimmediate_operand" "")
+	(match_operand:V4HI 1 "nonimmediate_operand" ""))]
   "TARGET_MMX"
 {
   ix86_expand_vector_move (V4HImode, operands);
@@ -18018,8 +18051,8 @@
 })
 
 (define_expand "movv8qi"
-  [(set (match_operand:V8QI 0 "general_operand" "")
-	(match_operand:V8QI 1 "general_operand" ""))]
+  [(set (match_operand:V8QI 0 "nonimmediate_operand" "")
+	(match_operand:V8QI 1 "nonimmediate_operand" ""))]
   "TARGET_MMX"
 {
   ix86_expand_vector_move (V8QImode, operands);
@@ -18027,14 +18060,97 @@
 })
 
 (define_expand "movv2sf"
-  [(set (match_operand:V2SF 0 "general_operand" "")
-	(match_operand:V2SF 1 "general_operand" ""))]
+  [(set (match_operand:V2SF 0 "nonimmediate_operand" "")
+	(match_operand:V2SF 1 "nonimmediate_operand" ""))]
    "TARGET_3DNOW"
 {
   ix86_expand_vector_move (V2SFmode, operands);
   DONE;
 })
 
+(define_insn "*pushv2df"
+  [(set (match_operand:V2DF 0 "push_operand" "=<")
+	(match_operand:V2DF 1 "register_operand" "x"))]
+  "TARGET_SSE"
+  "#")
+
+(define_insn "*pushv2di"
+  [(set (match_operand:V2DI 0 "push_operand" "=<")
+	(match_operand:V2DI 1 "register_operand" "x"))]
+  "TARGET_SSE2"
+  "#")
+
+(define_insn "*pushv8hi"
+  [(set (match_operand:V8HI 0 "push_operand" "=<")
+	(match_operand:V8HI 1 "register_operand" "x"))]
+  "TARGET_SSE2"
+  "#")
+
+(define_insn "*pushv16qi"
+  [(set (match_operand:V16QI 0 "push_operand" "=<")
+	(match_operand:V16QI 1 "register_operand" "x"))]
+  "TARGET_SSE2"
+  "#")
+
+(define_insn "*pushv4sf"
+  [(set (match_operand:V4SF 0 "push_operand" "=<")
+	(match_operand:V4SF 1 "register_operand" "x"))]
+  "TARGET_SSE"
+  "#")
+
+(define_insn "*pushv4si"
+  [(set (match_operand:V4SI 0 "push_operand" "=<")
+	(match_operand:V4SI 1 "register_operand" "x"))]
+  "TARGET_SSE2"
+  "#")
+
+(define_insn "*pushv2si"
+  [(set (match_operand:V2SI 0 "push_operand" "=<")
+	(match_operand:V2SI 1 "register_operand" "y"))]
+  "TARGET_MMX"
+  "#")
+
+(define_insn "*pushv4hi"
+  [(set (match_operand:V4HI 0 "push_operand" "=<")
+	(match_operand:V4HI 1 "register_operand" "y"))]
+  "TARGET_MMX"
+  "#")
+
+(define_insn "*pushv8qi"
+  [(set (match_operand:V8QI 0 "push_operand" "=<")
+	(match_operand:V8QI 1 "register_operand" "y"))]
+  "TARGET_MMX"
+  "#")
+
+(define_insn "*pushv2sf"
+  [(set (match_operand:V2SF 0 "push_operand" "=<")
+	(match_operand:V2SF 1 "register_operand" "y"))]
+  "TARGET_3DNOW"
+  "#")
+
+(define_split
+  [(set (match_operand 0 "push_operand" "")
+	(match_operand 1 "register_operand" ""))]
+  "!TARGET_64BIT && reload_completed
+   && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
+  [(set (reg:SI 7) (plus:SI (reg:SI 7) (match_dup 3)))
+   (set (match_dup 2) (match_dup 1))]
+  "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
+				 stack_pointer_rtx);
+   operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
+
+(define_split
+  [(set (match_operand 0 "push_operand" "")
+	(match_operand 1 "register_operand" ""))]
+  "TARGET_64BIT && reload_completed
+   && (SSE_REG_P (operands[1]) || MMX_REG_P (operands[1]))"
+  [(set (reg:DI 7) (plus:DI (reg:DI 7) (match_dup 3)))
+   (set (match_dup 2) (match_dup 1))]
+  "operands[2] = change_address (operands[0], GET_MODE (operands[0]),
+				 stack_pointer_rtx);
+   operands[3] = GEN_INT (-GET_MODE_SIZE (GET_MODE (operands[0])));")
+
+
 (define_insn_and_split "*pushti"
   [(set (match_operand:TI 0 "push_operand" "=<")
 	(match_operand:TI 1 "nonmemory_operand" "x"))]
@@ -18158,8 +18274,9 @@
 
 (define_insn "movti_internal"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=x,x,m")
-	(match_operand:TI 1 "general_operand" "C,xm,x"))]
-  "TARGET_SSE && !TARGET_64BIT"
+	(match_operand:TI 1 "vector_move_operand" "C,xm,x"))]
+  "TARGET_SSE && !TARGET_64BIT
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
   "@
    xorps\t%0, %0
    movaps\t{%1, %0|%0, %1}
@@ -18169,7 +18286,7 @@
 
 (define_insn "*movti_rex64"
   [(set (match_operand:TI 0 "nonimmediate_operand" "=r,o,x,mx,x")
-	(match_operand:TI 1 "general_operand" "riFo,riF,O,x,m"))]
+	(match_operand:TI 1 "general_operand" "riFo,riF,C,x,m"))]
   "TARGET_64BIT
    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
   "@
@@ -18191,29 +18308,56 @@
 
 ;; These two patterns are useful for specifying exactly whether to use
 ;; movaps or movups
-(define_insn "sse_movaps"
+(define_expand "sse_movaps"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+	(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
+		     UNSPEC_MOVA))]
+  "TARGET_SSE"
+{
+  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+    {
+      rtx tmp = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_sse_movaps (tmp, operands[1]));
+      emit_move_insn (operands[0], tmp);
+      DONE;
+    }
+})
+
+(define_insn "*sse_movaps_1"
   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
 	(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
 		     UNSPEC_MOVA))]
-  "TARGET_SSE"
-  "@
-   movaps\t{%1, %0|%0, %1}
-   movaps\t{%1, %0|%0, %1}"
+  "TARGET_SSE
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "movaps\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov,ssemov")
    (set_attr "mode" "V4SF")])
 
-(define_insn "sse_movups"
+(define_expand "sse_movups"
+  [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
+	(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "")]
+		     UNSPEC_MOVU))]
+  "TARGET_SSE"
+{
+  if (GET_CODE (operands[0]) == MEM && GET_CODE (operands[1]) == MEM)
+    {
+      rtx tmp = gen_reg_rtx (V4SFmode);
+      emit_insn (gen_sse_movups (tmp, operands[1]));
+      emit_move_insn (operands[0], tmp);
+      DONE;
+    }
+})
+
+(define_insn "*sse_movups_1"
   [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,m")
 	(unspec:V4SF [(match_operand:V4SF 1 "nonimmediate_operand" "xm,x")]
 		     UNSPEC_MOVU))]
-  "TARGET_SSE"
-  "@
-   movups\t{%1, %0|%0, %1}
-   movups\t{%1, %0|%0, %1}"
+  "TARGET_SSE
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "movups\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt,ssecvt")
    (set_attr "mode" "V4SF")])
 
-
 ;; SSE Strange Moves.
 
 (define_insn "sse_movmskps"
@@ -18329,11 +18473,21 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "V4SF")])
 
-(define_insn "sse_loadss"
+(define_expand "sse_loadss"
+  [(match_operand:V4SF 0 "register_operand" "")
+   (match_operand:SF 1 "memory_operand" "")]
+  "TARGET_SSE"
+{
+  emit_insn (gen_sse_loadss_1 (operands[0], operands[1],
+			       CONST0_RTX (V4SFmode)));
+  DONE;
+})
+
+(define_insn "sse_loadss_1"
   [(set (match_operand:V4SF 0 "register_operand" "=x")
 	(vec_merge:V4SF
-	 (match_operand:V4SF 1 "memory_operand" "m")
-	 (vec_duplicate:V4SF (float:SF (const_int 0)))
+	 (vec_duplicate:V4SF (match_operand:SF 1 "memory_operand" "m"))
+	 (match_operand:V4SF 2 "const0_operand" "X")
 	 (const_int 1)))]
   "TARGET_SSE"
   "movss\t{%1, %0|%0, %1}"
@@ -18804,7 +18958,7 @@
 
 (define_insn "sse2_nandv2di3"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "%0"))
+        (and:V2DI (not:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "0"))
 		  (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2
    && (GET_CODE (operands[1]) != MEM || GET_CODE (operands[2]) != MEM)"
@@ -18908,7 +19062,7 @@
 	 (match_operator:V4SI 3 "sse_comparison_operator"
 		[(match_operand:V4SF 1 "register_operand" "0")
 		 (match_operand:V4SF 2 "register_operand" "x")])
-	 (match_dup 1)
+	 (subreg:V4SI (match_dup 1) 0)
 	 (const_int 1)))]
   "TARGET_SSE"
   "cmp%D3ss\t{%2, %0|%0, %2}"
@@ -19093,6 +19247,19 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "SF")])
 
+(define_insn "cvtsi2ssq"
+  [(set (match_operand:V4SF 0 "register_operand" "=x,x")
+	(vec_merge:V4SF
+	 (match_operand:V4SF 1 "register_operand" "0,0")
+	 (vec_duplicate:V4SF
+	  (float:SF (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+	 (const_int 14)))]
+  "TARGET_SSE && TARGET_64BIT"
+  "cvtsi2ssq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "athlon_decode" "vector,vector")
+   (set_attr "mode" "SF")])
+
 (define_insn "cvtss2si"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(vec_select:SI
@@ -19103,6 +19270,17 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "SF")])
 
+(define_insn "cvtss2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(vec_select:DI
+	 (fix:V4DI (match_operand:V4SF 1 "nonimmediate_operand" "x,m"))
+	 (parallel [(const_int 0)])))]
+  "TARGET_SSE"
+  "cvtss2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "athlon_decode" "vector,vector")
+   (set_attr "mode" "SF")])
+
 (define_insn "cvttss2si"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(vec_select:SI
@@ -19114,6 +19292,18 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "SF")])
 
+(define_insn "cvttss2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(vec_select:DI
+	 (unspec:V4DI [(match_operand:V4SF 1 "nonimmediate_operand" "x,xm")]
+		      UNSPEC_FIX)
+	 (parallel [(const_int 0)])))]
+  "TARGET_SSE && TARGET_64BIT"
+  "cvttss2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "SF")
+   (set_attr "athlon_decode" "vector,vector")])
+
 
 ;; MMX insns
 
@@ -19121,7 +19311,7 @@
 
 (define_insn "addv8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+        (plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
 	           (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
   "TARGET_MMX"
   "paddb\t{%2, %0|%0, %2}"
@@ -19130,7 +19320,7 @@
 
 (define_insn "addv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+        (plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
 	           (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
   "TARGET_MMX"
   "paddw\t{%2, %0|%0, %2}"
@@ -19139,16 +19329,27 @@
 
 (define_insn "addv2si3"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (plus:V2SI (match_operand:V2SI 1 "register_operand" "0")
+        (plus:V2SI (match_operand:V2SI 1 "register_operand" "%0")
 	           (match_operand:V2SI 2 "nonimmediate_operand" "ym")))]
   "TARGET_MMX"
   "paddd\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
    (set_attr "mode" "DI")])
 
+(define_insn "mmx_adddi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+	 [(plus:DI (match_operand:DI 1 "register_operand" "%0")
+		   (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+	 UNSPEC_NOP))]
+  "TARGET_MMX"
+  "paddq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
 (define_insn "ssaddv8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
 		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
   "TARGET_MMX"
   "paddsb\t{%2, %0|%0, %2}"
@@ -19157,7 +19358,7 @@
 
 (define_insn "ssaddv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
 		      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
   "TARGET_MMX"
   "paddsw\t{%2, %0|%0, %2}"
@@ -19166,7 +19367,7 @@
 
 (define_insn "usaddv8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
-        (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "0")
+        (us_plus:V8QI (match_operand:V8QI 1 "register_operand" "%0")
 		      (match_operand:V8QI 2 "nonimmediate_operand" "ym")))]
   "TARGET_MMX"
   "paddusb\t{%2, %0|%0, %2}"
@@ -19175,7 +19376,7 @@
 
 (define_insn "usaddv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y")
-        (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "0")
+        (us_plus:V4HI (match_operand:V4HI 1 "register_operand" "%0")
 		      (match_operand:V4HI 2 "nonimmediate_operand" "ym")))]
   "TARGET_MMX"
   "paddusw\t{%2, %0|%0, %2}"
@@ -19209,6 +19410,17 @@
   [(set_attr "type" "mmxadd")
    (set_attr "mode" "DI")])
 
+(define_insn "mmx_subdi3"
+  [(set (match_operand:DI 0 "register_operand" "=y")
+        (unspec:DI
+	 [(minus:DI (match_operand:DI 1 "register_operand" "0")
+		    (match_operand:DI 2 "nonimmediate_operand" "ym"))]
+	 UNSPEC_NOP))]
+  "TARGET_MMX"
+  "psubq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "mmxadd")
+   (set_attr "mode" "DI")])
+
 (define_insn "sssubv8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "=y")
         (ss_minus:V8QI (match_operand:V8QI 1 "register_operand" "0")
@@ -19312,7 +19524,7 @@
 (define_insn "mmx_iordi3"
   [(set (match_operand:DI 0 "register_operand" "=y")
         (unspec:DI
-	 [(ior:DI (match_operand:DI 1 "register_operand" "0")
+	 [(ior:DI (match_operand:DI 1 "register_operand" "%0")
 		  (match_operand:DI 2 "nonimmediate_operand" "ym"))]
 	 UNSPEC_NOP))]
   "TARGET_MMX"
@@ -19323,7 +19535,7 @@
 (define_insn "mmx_xordi3"
   [(set (match_operand:DI 0 "register_operand" "=y")
         (unspec:DI
-	 [(xor:DI (match_operand:DI 1 "register_operand" "0")
+	 [(xor:DI (match_operand:DI 1 "register_operand" "%0")
 		  (match_operand:DI 2 "nonimmediate_operand" "ym"))]
 	 UNSPEC_NOP))]
   "TARGET_MMX"
@@ -19346,7 +19558,7 @@
 (define_insn "mmx_anddi3"
   [(set (match_operand:DI 0 "register_operand" "=y")
         (unspec:DI
-	 [(and:DI (match_operand:DI 1 "register_operand" "0")
+	 [(and:DI (match_operand:DI 1 "register_operand" "%0")
 		  (match_operand:DI 2 "nonimmediate_operand" "ym"))]
 	 UNSPEC_NOP))]
   "TARGET_MMX"
@@ -19805,17 +20017,17 @@
 (define_insn "ldmxcsr"
   [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
 		    UNSPECV_LDMXCSR)]
-  "TARGET_MMX"
+  "TARGET_SSE"
   "ldmxcsr\t%0"
-  [(set_attr "type" "mmx")
+  [(set_attr "type" "sse")
    (set_attr "memory" "load")])
 
 (define_insn "stmxcsr"
   [(set (match_operand:SI 0 "memory_operand" "=m")
 	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
-  "TARGET_MMX"
+  "TARGET_SSE"
   "stmxcsr\t%0"
-  [(set_attr "type" "mmx")
+  [(set_attr "type" "sse")
    (set_attr "memory" "store")])
 
 (define_expand "sfence"
@@ -20471,7 +20683,7 @@
 	 (match_operator:V2DI 3 "sse_comparison_operator"
 			      [(match_operand:V2DF 1 "register_operand" "0")
 			       (match_operand:V2DF 2 "nonimmediate_operand" "x")])
-	 (match_dup 1)
+	 (subreg:V2DI (match_dup 1) 0)
 	 (const_int 1)))]
   "TARGET_SSE2"
   "cmp%D3sd\t{%2, %0|%0, %2}"
@@ -20692,6 +20904,15 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "SI")])
 
+(define_insn "cvtsd2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(fix:DI (vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
+			       (parallel [(const_int 0)]))))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "cvtsd2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "SI")])
+
 (define_insn "cvttsd2si"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "xm")
@@ -20701,6 +20922,16 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "SI")])
 
+(define_insn "cvttsd2siq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(unspec:DI [(vec_select:DF (match_operand:V2DF 1 "register_operand" "x,xm")
+				   (parallel [(const_int 0)]))] UNSPEC_FIX))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "cvttsd2siq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector,vector")])
+
 (define_insn "cvtsi2sd"
   [(set (match_operand:V2DF 0 "register_operand" "=x")
 	(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0")
@@ -20713,6 +20944,19 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "DF")])
 
+(define_insn "cvtsi2sdq"
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
+	(vec_merge:V2DF (match_operand:V2DF 1 "register_operand" "0,0")
+	 		(vec_duplicate:V2DF
+			  (float:DF
+			    (match_operand:DI 2 "nonimmediate_operand" "r,rm")))
+			(const_int 2)))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "cvtsi2sdq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "DF")
+   (set_attr "athlon_decode" "vector,direct")])
+
 ;; Conversions between SF and DF
 
 (define_insn "cvtsd2ss"
@@ -20770,7 +21014,7 @@
 
 (define_insn "addv16qi3"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+        (plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
 		    (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
   "paddb\t{%2, %0|%0, %2}"
@@ -20779,7 +21023,7 @@
 
 (define_insn "addv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+        (plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
 	           (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
   "paddw\t{%2, %0|%0, %2}"
@@ -20788,7 +21032,7 @@
 
 (define_insn "addv4si3"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
-        (plus:V4SI (match_operand:V4SI 1 "register_operand" "0")
+        (plus:V4SI (match_operand:V4SI 1 "register_operand" "%0")
 	           (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
   "paddd\t{%2, %0|%0, %2}"
@@ -20797,7 +21041,7 @@
 
 (define_insn "addv2di3"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
-        (plus:V2DI (match_operand:V2DI 1 "register_operand" "0")
+        (plus:V2DI (match_operand:V2DI 1 "register_operand" "%0")
 	           (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
   "paddq\t{%2, %0|%0, %2}"
@@ -20806,7 +21050,7 @@
 
 (define_insn "ssaddv16qi3"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+        (ss_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
 		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
   "paddsb\t{%2, %0|%0, %2}"
@@ -20815,7 +21059,7 @@
 
 (define_insn "ssaddv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+        (ss_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
 		      (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
   "paddsw\t{%2, %0|%0, %2}"
@@ -20824,7 +21068,7 @@
 
 (define_insn "usaddv16qi3"
   [(set (match_operand:V16QI 0 "register_operand" "=x")
-        (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "0")
+        (us_plus:V16QI (match_operand:V16QI 1 "register_operand" "%0")
 		       (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
   "paddusb\t{%2, %0|%0, %2}"
@@ -20833,7 +21077,7 @@
 
 (define_insn "usaddv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
-        (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "0")
+        (us_plus:V8HI (match_operand:V8HI 1 "register_operand" "%0")
 		      (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2"
   "paddusw\t{%2, %0|%0, %2}"
@@ -21069,7 +21313,8 @@
   [(set (match_operand:V8HI 0 "register_operand" "=x")
         (vec_merge:V8HI (match_operand:V8HI 1 "register_operand" "0")
 			(vec_duplicate:V8HI
-			 (match_operand:SI 2 "nonimmediate_operand" "rm"))
+			 (truncate:HI
+			   (match_operand:SI 2 "nonimmediate_operand" "rm")))
 			(match_operand:SI 3 "immediate_operand" "i")))]
   "TARGET_SSE2"
   "pinsrw\t{%3, %2, %0|%0, %2, %3}"
@@ -21218,7 +21463,7 @@
 (define_insn "ashrv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
         (ashiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-		       (match_operand:SI 2 "nonmemory_operand" "xi")))]
+		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
   "TARGET_SSE2"
   "psraw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseishft")
@@ -21227,7 +21472,7 @@
 (define_insn "ashrv4si3"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
         (ashiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-		       (match_operand:SI 2 "nonmemory_operand" "xi")))]
+		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
   "TARGET_SSE2"
   "psrad\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseishft")
@@ -21236,7 +21481,7 @@
 (define_insn "lshrv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
         (lshiftrt:V8HI (match_operand:V8HI 1 "register_operand" "0")
-		       (match_operand:SI 2 "nonmemory_operand" "xi")))]
+		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
   "TARGET_SSE2"
   "psrlw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseishft")
@@ -21245,7 +21490,7 @@
 (define_insn "lshrv4si3"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
         (lshiftrt:V4SI (match_operand:V4SI 1 "register_operand" "0")
-		       (match_operand:SI 2 "nonmemory_operand" "xi")))]
+		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
   "TARGET_SSE2"
   "psrld\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseishft")
@@ -21254,7 +21499,7 @@
 (define_insn "lshrv2di3"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
         (lshiftrt:V2DI (match_operand:V2DI 1 "register_operand" "0")
-		       (match_operand:SI 2 "nonmemory_operand" "xi")))]
+		       (match_operand:TI 2 "nonmemory_operand" "xi")))]
   "TARGET_SSE2"
   "psrlq\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseishft")
@@ -21263,7 +21508,7 @@
 (define_insn "ashlv8hi3"
   [(set (match_operand:V8HI 0 "register_operand" "=x")
         (ashift:V8HI (match_operand:V8HI 1 "register_operand" "0")
-		     (match_operand:SI 2 "nonmemory_operand" "xi")))]
+		     (match_operand:TI 2 "nonmemory_operand" "xi")))]
   "TARGET_SSE2"
   "psllw\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseishft")
@@ -21272,7 +21517,7 @@
 (define_insn "ashlv4si3"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
         (ashift:V4SI (match_operand:V4SI 1 "register_operand" "0")
-		     (match_operand:SI 2 "nonmemory_operand" "xi")))]
+		     (match_operand:TI 2 "nonmemory_operand" "xi")))]
   "TARGET_SSE2"
   "pslld\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseishft")
@@ -21281,7 +21526,7 @@
 (define_insn "ashlv2di3"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
         (ashift:V2DI (match_operand:V2DI 1 "register_operand" "0")
-		     (match_operand:SI 2 "nonmemory_operand" "xi")))]
+		     (match_operand:TI 2 "nonmemory_operand" "xi")))]
   "TARGET_SSE2"
   "psllq\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseishft")
@@ -21595,45 +21840,41 @@
 
 (define_insn "sse2_movapd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-	(unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")]
+	(unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
 		     UNSPEC_MOVA))]
-  "TARGET_SSE2"
-  "@
-   movapd\t{%1, %0|%0, %1}
-   movapd\t{%1, %0|%0, %1}"
+  "TARGET_SSE2
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "movapd\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "mode" "V2DF")])
 
 (define_insn "sse2_movupd"
   [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,m")
-	(unspec:V2DF [(match_operand:V2DF 1 "general_operand" "xm,x")]
+	(unspec:V2DF [(match_operand:V2DF 1 "nonimmediate_operand" "xm,x")]
 		     UNSPEC_MOVU))]
-  "TARGET_SSE2"
-  "@
-   movupd\t{%1, %0|%0, %1}
-   movupd\t{%1, %0|%0, %1}"
+  "TARGET_SSE2
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "movupd\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "V2DF")])
 
 (define_insn "sse2_movdqa"
   [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
-	(unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")]
+	(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
 		       UNSPEC_MOVA))]
-  "TARGET_SSE2"
-  "@
-   movdqa\t{%1, %0|%0, %1}
-   movdqa\t{%1, %0|%0, %1}"
+  "TARGET_SSE2
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "movdqa\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "mode" "TI")])
 
 (define_insn "sse2_movdqu"
   [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
-	(unspec:V16QI [(match_operand:V16QI 1 "general_operand" "xm,x")]
+	(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
 		       UNSPEC_MOVU))]
-  "TARGET_SSE2"
-  "@
-   movdqu\t{%1, %0|%0, %1}
-   movdqu\t{%1, %0|%0, %1}"
+  "TARGET_SSE2
+   && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
+  "movdqu\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "TI")])
 
@@ -21641,24 +21882,48 @@
   [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y")
 	(vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x")
 		       (parallel [(const_int 0)])))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && !TARGET_64BIT"
   "@
    movq\t{%1, %0|%0, %1}
    movdq2q\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "TI")])
 
+(define_insn "sse2_movdq2q_rex64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,y,r")
+	(vec_select:DI (match_operand:V2DI 1 "register_operand" "x,x,x")
+		       (parallel [(const_int 0)])))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "@
+   movq\t{%1, %0|%0, %1}
+   movdq2q\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt")
+   (set_attr "mode" "TI")])
+
 (define_insn "sse2_movq2dq"
   [(set (match_operand:V2DI 0 "register_operand" "=x,?x")
 	(vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y")
 			 (const_int 0)))]
-  "TARGET_SSE2"
+  "TARGET_SSE2 && !TARGET_64BIT"
   "@
    movq\t{%1, %0|%0, %1}
    movq2dq\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt,ssemov")
    (set_attr "mode" "TI")])
 
+(define_insn "sse2_movq2dq_rex64"
+  [(set (match_operand:V2DI 0 "register_operand" "=x,?x,?x")
+	(vec_concat:V2DI (match_operand:DI 1 "nonimmediate_operand" "m,y,r")
+			 (const_int 0)))]
+  "TARGET_SSE2 && TARGET_64BIT"
+  "@
+   movq\t{%1, %0|%0, %1}
+   movq2dq\t{%1, %0|%0, %1}
+   movd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecvt,ssemov,ssecvt")
+   (set_attr "mode" "TI")])
+
 (define_insn "sse2_movq"
   [(set (match_operand:V2DI 0 "register_operand" "=x")
 	(vec_concat:V2DI (vec_select:DI
@@ -21673,7 +21938,7 @@
 (define_insn "sse2_loadd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(vec_merge:V4SI
-	 (vec_duplicate:V4HI (match_operand:SI 1 "nonimmediate_operand" "mr"))
+	 (vec_duplicate:V4SI (match_operand:SI 1 "nonimmediate_operand" "mr"))
 	 (const_vector:V4SI [(const_int 0)
 			     (const_int 0)
 			     (const_int 0)
@@ -21716,11 +21981,21 @@
   [(set_attr "type" "ssecvt")
    (set_attr "mode" "V2DF")])
 
-(define_insn "sse2_loadsd"
+(define_expand "sse2_loadsd"
+  [(match_operand:V2DF 0 "register_operand" "")
+   (match_operand:DF 1 "memory_operand" "")]
+  "TARGET_SSE2"
+{
+  emit_insn (gen_sse2_loadsd_1 (operands[0], operands[1],
+			        CONST0_RTX (V2DFmode)));
+  DONE;
+})
+
+(define_insn "sse2_loadsd_1"
   [(set (match_operand:V2DF 0 "register_operand" "=x")
 	(vec_merge:V2DF
-	 (match_operand:DF 1 "memory_operand" "m")
-	 (vec_duplicate:DF (float:DF (const_int 0)))
+	 (vec_duplicate:V2DF (match_operand:DF 1 "memory_operand" "m"))
+	 (match_operand:V2DF 2 "const0_operand" "X")
 	 (const_int 1)))]
   "TARGET_SSE2"
   "movsd\t{%1, %0|%0, %1}"
diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h
index ae346e6d518..7a9e0ba989f 100644
--- a/gcc/config/i386/linux64.h
+++ b/gcc/config/i386/linux64.h
@@ -1,5 +1,5 @@
 /* Definitions for AMD x86-64 running Linux-based GNU systems with ELF format.
-   Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Jan Hubicka <jh@suse.cz>, based on linux.h.
 
 This file is part of GNU CC.
@@ -36,6 +36,11 @@ Boston, MA 02111-1307, USA.  */
 	    builtin_define ("__PIC__");				\
 	    builtin_define ("__pic__");				\
 	  }							\
+	if (TARGET_64BIT)					\
+	  {							\
+	    builtin_define ("__LP64__");			\
+	    builtin_define ("_LP64");				\
+	  }							\
     }								\
   while (0)
 
@@ -116,17 +121,17 @@ Boston, MA 02111-1307, USA.  */
     (FS)->regs.reg[0].how = REG_SAVED_OFFSET;				\
     (FS)->regs.reg[0].loc.offset = (long)&sc_->rax - new_cfa_;		\
     (FS)->regs.reg[1].how = REG_SAVED_OFFSET;				\
-    (FS)->regs.reg[1].loc.offset = (long)&sc_->rbx - new_cfa_;		\
+    (FS)->regs.reg[1].loc.offset = (long)&sc_->rdx - new_cfa_;		\
     (FS)->regs.reg[2].how = REG_SAVED_OFFSET;				\
     (FS)->regs.reg[2].loc.offset = (long)&sc_->rcx - new_cfa_;		\
     (FS)->regs.reg[3].how = REG_SAVED_OFFSET;				\
-    (FS)->regs.reg[3].loc.offset = (long)&sc_->rdx - new_cfa_;		\
+    (FS)->regs.reg[3].loc.offset = (long)&sc_->rbx - new_cfa_;		\
     (FS)->regs.reg[4].how = REG_SAVED_OFFSET;				\
-    (FS)->regs.reg[4].loc.offset = (long)&sc_->rbp - new_cfa_;		\
+    (FS)->regs.reg[4].loc.offset = (long)&sc_->rsi - new_cfa_;		\
     (FS)->regs.reg[5].how = REG_SAVED_OFFSET;				\
-    (FS)->regs.reg[5].loc.offset = (long)&sc_->rsi - new_cfa_;		\
+    (FS)->regs.reg[5].loc.offset = (long)&sc_->rdi - new_cfa_;		\
     (FS)->regs.reg[6].how = REG_SAVED_OFFSET;				\
-    (FS)->regs.reg[6].loc.offset = (long)&sc_->rdi - new_cfa_;		\
+    (FS)->regs.reg[6].loc.offset = (long)&sc_->rbp - new_cfa_;		\
     (FS)->regs.reg[8].how = REG_SAVED_OFFSET;				\
     (FS)->regs.reg[8].loc.offset = (long)&sc_->r8 - new_cfa_;		\
     (FS)->regs.reg[9].how = REG_SAVED_OFFSET;				\
@@ -143,6 +148,8 @@ Boston, MA 02111-1307, USA.  */
     (FS)->regs.reg[14].loc.offset = (long)&sc_->r14 - new_cfa_;		\
     (FS)->regs.reg[15].how = REG_SAVED_OFFSET;				\
     (FS)->regs.reg[15].loc.offset = (long)&sc_->r15 - new_cfa_;		\
+    (FS)->regs.reg[16].how = REG_SAVED_OFFSET;				\
+    (FS)->regs.reg[16].loc.offset = (long)&sc_->rip - new_cfa_;		\
     (FS)->retaddr_column = 16;						\
     goto SUCCESS;							\
   } while (0)
diff --git a/gcc/config/i386/mingw32.h b/gcc/config/i386/mingw32.h
index e7c5e8b6bcc..7f62fbd5624 100644
--- a/gcc/config/i386/mingw32.h
+++ b/gcc/config/i386/mingw32.h
@@ -1,6 +1,7 @@
 /* Operating system specific defines to be used when targeting GCC for
    hosting on Windows32, using GNU tools and the Windows32 API Library.
-   Copyright (C) 1997, 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
+   Free Software Foundation, Inc.
 
 This file is part of GNU CC.
 
@@ -89,7 +90,7 @@ Boston, MA 02111-1307, USA.  */
 /* Include in the mingw32 libraries with libgcc */
 #undef LIBGCC_SPEC
 #define LIBGCC_SPEC \
-  "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lmoldname -lmsvcrt"
+  "%{mthreads:-lmingwthrd} -lmingw32 -lgcc -lmoldname -lmingwex -lmsvcrt"
 
 #undef STARTFILE_SPEC
 #define STARTFILE_SPEC "%{shared|mdll:dllcrt2%O%s} \
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 52e5195fbaf..7b4aa014645 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -56,6 +56,22 @@ _mm_cvtsi32_si64 (int __i)
   return (__m64) __tmp;
 }
 
+#ifdef __x86_64__
+/* Convert I to a __m64 object.  */
+static __inline __m64 
+_mm_cvtsi64x_si64 (long long __i)
+{
+  return (__m64) __i;
+}
+
+/* Convert I to a __m64 object.  */
+static __inline __m64 
+_mm_set_pi64x (long long __i)
+{
+  return (__m64) __i;
+}
+#endif
+
 /* Convert the lower 32 bits of the __m64 object into an integer.  */
 static __inline int
 _mm_cvtsi64_si32 (__m64 __i)
@@ -64,6 +80,15 @@ _mm_cvtsi64_si32 (__m64 __i)
   return __tmp;
 }
 
+#ifdef __x86_64__
+/* Convert the lower 32 bits of the __m64 object into an integer.  */
+static __inline long long
+_mm_cvtsi64_si64x (__m64 __i)
+{
+  return (long long)__i;
+}
+#endif
+
 /* Pack the four 16-bit values from M1 into the lower four 8-bit values of
    the result, and the four 16-bit values from M2 into the upper four 8-bit
    values of the result, all with signed saturation.  */
@@ -160,6 +185,13 @@ _mm_add_pi32 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
 }
 
+/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
+static __inline __m64
+_mm_add_si64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
+}
+
 /* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
    saturated arithmetic.  */
 static __inline __m64
@@ -213,6 +245,13 @@ _mm_sub_pi32 (__m64 __m1, __m64 __m2)
   return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
 }
 
+/* Add the 64-bit values in M1 to the 64-bit values in M2.  */
+static __inline __m64
+_mm_sub_si64 (__m64 __m1, __m64 __m2)
+{
+  return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
+}
+
 /* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
    saturating arithmetic.  */
 static __inline __m64
diff --git a/gcc/config/i386/scodbx.h b/gcc/config/i386/scodbx.h
deleted file mode 100644
index 7da93053256..00000000000
--- a/gcc/config/i386/scodbx.h
+++ /dev/null
@@ -1,84 +0,0 @@
-/* Definitions for Intel 386 running SCO Unix System V,
-   using dbx-in-coff encapsulation.
-   Copyright (C) 1992, 1995, 1996, 1999 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#include "i386/svr3dbx.h"
-
-/* Overridden defines for SCO systems from sco.h. */
-
-/* By default, target has a 80387, uses IEEE compatible arithmetic,
-   and returns float values in the 387, ie,
-   (TARGET_80387 | TARGET_FLOAT_RETURNS_IN_80387)
-
-   SCO's software emulation of a 387 fails to handle the `fucomp'
-   opcode.  fucomp is only used when generating IEEE compliant code.
-   So don't make TARGET_IEEE_FP default for SCO. */
-
-#undef TARGET_SUBTARGET_DEFAULT
-#define TARGET_SUBTARGET_DEFAULT (MASK_80387 | MASK_FLOAT_RETURNS)
-
-/* Use crt1.o as a startup file and crtn.o as a closing file.  */
-
-#undef STARTFILE_SPEC
-#define STARTFILE_SPEC \
-  "%{!r:%{!z:svr3.ifile%s}%{z:svr3z.ifile%s}}\
-   %{pg:gcrt1.o%s}%{!pg:%{p:mcrt1.o%s}%{!p:crt1.o%s}}"
-
-/* Library spec, including SCO international language support. */
-
-#undef LIB_SPEC
-#define LIB_SPEC \
- "%{p:-L/usr/lib/libp}%{pg:-L/usr/lib/libp} %{scointl:libintl.a%s} -lc"
-
-/* Specify predefined symbols in preprocessor.  */
-
-#undef CPP_PREDEFINES
-#define CPP_PREDEFINES "-Dunix -DM_UNIX -DM_I386 -DM_COFF -DM_WORDSWAP -Asystem=svr3"
-
-#undef CPP_SPEC
-#define CPP_SPEC "%(cpp_cpu) %{scointl:-DM_INTERNAT}"
-
-/* This spec is used for telling cpp whether char is signed or not.  */
-
-#undef SIGNED_CHAR_SPEC
-#if DEFAULT_SIGNED_CHAR
-#define SIGNED_CHAR_SPEC \
- "%{funsigned-char:-D__CHAR_UNSIGNED__ -D_CHAR_UNSIGNED}"
-#else
-#define SIGNED_CHAR_SPEC \
- "%{!fsigned-char:-D__CHAR_UNSIGNED__ -D_CHAR_UNSIGNED}"
-#endif
-
-/* caller has to pop the extra argument passed to functions that return
-   structures. */
-
-#undef RETURN_POPS_ARGS
-#define RETURN_POPS_ARGS(FUNDECL,FUNTYPE,SIZE)   \
-  ((FUNDECL) && TREE_CODE (FUNDECL) == IDENTIFIER_NODE ? 0	\
-   : (TARGET_RTD						\
-      && (TYPE_ARG_TYPES (FUNTYPE) == 0				\
-	  || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (FUNTYPE)))	\
-	      == void_type_node))) ? (SIZE)			\
-   : 0)
-/* On other 386 systems, the last line looks like this:
-   : (aggregate_value_p (TREE_TYPE (FUNTYPE))) ? GET_MODE_SIZE (Pmode) : 0)  */
-
-/* Handle #pragma pack. */
-#define HANDLE_SYSV_PRAGMA
diff --git a/gcc/config/i386/t-sco5gas b/gcc/config/i386/t-sco5gas
index 2d0b48a6292..edeb554eea0 100644
--- a/gcc/config/i386/t-sco5gas
+++ b/gcc/config/i386/t-sco5gas
@@ -1,6 +1,6 @@
 # The pushl in CTOR initialization interferes with frame pointer elimination.
 CRTSTUFF_T_CFLAGS   = -fPIC -fno-omit-frame-pointer
-CRTSTUFF_T_CFLAGS_S = -mcoff -fno-omit-frame-pointer
+CRTSTUFF_T_CFLAGS_S = -fno-omit-frame-pointer
 
 #
 # I am still a little unsure of the multilib architecture. The following
diff --git a/gcc/config/i386/xm-dgux.h b/gcc/config/i386/xm-dgux.h
deleted file mode 100644
index 881c5c7be9d..00000000000
--- a/gcc/config/i386/xm-dgux.h
+++ /dev/null
@@ -1,4 +0,0 @@
-/* Configuration for GCC for Intel i386 running DG/ux */
-
-/* looks just like sysv4 for now */
-#include "xm-svr4.h"
diff --git a/gcc/config/i386/xm-sun.h b/gcc/config/i386/xm-sun.h
deleted file mode 100644
index 6c0f0a25630..00000000000
--- a/gcc/config/i386/xm-sun.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Configuration for GNU C-compiler for Intel 80386 running SunOS 4.0.
-   Copyright (C) 1988, 1997 Free Software Foundation, Inc.
-
-This file is part of GNU CC.
-
-GNU CC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
-any later version.
-
-GNU CC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GNU CC; see the file COPYING.  If not, write to
-the Free Software Foundation, 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA.  */
-
-#define USG
diff --git a/gcc/config/i386/xm-sysv3.h b/gcc/config/i386/xm-sysv3.h
deleted file mode 100644
index 9a655443ff5..00000000000
--- a/gcc/config/i386/xm-sysv3.h
+++ /dev/null
@@ -1,3 +0,0 @@
-/* Configuration for GCC for Intel i386 running System V Release 3.  */
-
-#include "xm-svr3.h"
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 4136e901795..43a05c1a6ee 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -475,6 +475,16 @@ _mm_cvtss_si32 (__m128 __A)
   return __builtin_ia32_cvtss2si ((__v4sf) __A);
 }
 
+#ifdef __x86_64__
+/* Convert the lower SPFP value to a 32-bit integer according to the current
+   rounding mode.  */
+static __inline long long
+_mm_cvtss_si64x (__m128 __A)
+{
+  return __builtin_ia32_cvtss2si64 ((__v4sf) __A);
+}
+#endif
+
 /* Convert the two lower SPFP values to 32-bit integers according to the
    current rounding mode.  Return the integers in packed form.  */
 static __inline __m64
@@ -490,6 +500,15 @@ _mm_cvttss_si32 (__m128 __A)
   return __builtin_ia32_cvttss2si ((__v4sf) __A);
 }
 
+#ifdef __x86_64__
+/* Truncate the lower SPFP value to a 32-bit integer.  */
+static __inline long long
+_mm_cvttss_si64x (__m128 __A)
+{
+  return __builtin_ia32_cvttss2si64 ((__v4sf) __A);
+}
+#endif
+
 /* Truncate the two lower SPFP values to 32-bit integers.  Return the
    integers in packed form.  */
 static __inline __m64
@@ -505,6 +524,15 @@ _mm_cvtsi32_ss (__m128 __A, int __B)
   return (__m128) __builtin_ia32_cvtsi2ss ((__v4sf) __A, __B);
 }
 
+#ifdef __x86_64__
+/* Convert B to a SPFP value and insert it as element zero in A.  */
+static __inline __m128
+_mm_cvtsi64x_ss (__m128 __A, long long __B)
+{
+  return (__m128) __builtin_ia32_cvtsi642ss ((__v4sf) __A, __B);
+}
+#endif
+
 /* Convert the two 32-bit values in B to SPFP form and insert them
    as the two lower elements in A.  */
 static __inline __m128
@@ -1586,13 +1614,13 @@ _mm_ucomineq_sd (__m128d __A, __m128d __B)
 static __inline __m128i
 _mm_load_si128 (__m128i const *__P)
 {
-  return (__m128i) __builtin_ia32_loaddqa (__P);
+  return (__m128i) __builtin_ia32_loaddqa ((char const *)__P);
 }
 
 static __inline __m128i
 _mm_loadu_si128 (__m128i const *__P)
 {
-  return (__m128i) __builtin_ia32_loaddqu (__P);
+  return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
 }
 
 static __inline __m128i
@@ -1604,13 +1632,13 @@ _mm_loadl_epi64 (__m128i const *__P)
 static __inline void
 _mm_store_si128 (__m128i *__P, __m128i __B)
 {
-  __builtin_ia32_storedqa (__P, (__v16qi)__B);
+  __builtin_ia32_storedqa ((char *)__P, (__v16qi)__B);
 }
 
 static __inline void
 _mm_storeu_si128 (__m128i *__P, __m128i __B)
 {
-  __builtin_ia32_storedqu (__P, (__v16qi)__B);
+  __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
 }
 
 static __inline void
@@ -1619,6 +1647,12 @@ _mm_storel_epi64 (__m128i *__P, __m128i __B)
   *(long long *)__P = __builtin_ia32_movdq2q ((__v2di)__B);
 }
 
+static __inline __m64
+_mm_movepi64_pi64 (__m128i __B)
+{
+  return (__m64) __builtin_ia32_movdq2q ((__v2di)__B);
+}
+
 static __inline __m128i
 _mm_move_epi64 (__m128i __A)
 {
@@ -1656,6 +1690,24 @@ _mm_set_epi32 (int __Z, int __Y, int __X, int __W)
 
   return __u.__v;
 }
+
+#ifdef __x86_64__
+/* Create the vector [Z Y].  */
+static __inline __m128i
+_mm_set_epi64x (long long __Z, long long __Y)
+{
+  union {
+    long __a[2];
+    __m128i __v;
+  } __u;
+
+  __u.__a[0] = __Y;
+  __u.__a[1] = __Z;
+
+  return __u.__v;
+}
+#endif
+
 /* Create the vector [S T U V Z Y X W].  */
 static __inline __m128i
 _mm_set_epi16 (short __Z, short __Y, short __X, short __W,
@@ -1724,6 +1776,15 @@ _mm_set1_epi32 (int __A)
   return (__m128i) __builtin_ia32_pshufd ((__v4si)__tmp, _MM_SHUFFLE (0,0,0,0));
 }
 
+#ifdef __x86_64__
+static __inline __m128i
+_mm_set1_epi64x (long long __A)
+{
+  __v2di __tmp = (__v2di)__builtin_ia32_movq2dq ((unsigned long long)__A);
+  return (__m128i) __builtin_ia32_shufpd ((__v2df)__tmp, (__v2df)__tmp, _MM_SHUFFLE2 (0,0));
+}
+#endif
+
 static __inline __m128i
 _mm_set1_epi16 (short __A)
 {
@@ -1893,12 +1954,28 @@ _mm_cvtsd_si32 (__m128d __A)
   return __builtin_ia32_cvtsd2si ((__v2df) __A);
 }
 
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsd_si64x (__m128d __A)
+{
+  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
+}
+#endif
+
 static __inline int
 _mm_cvttsd_si32 (__m128d __A)
 {
   return __builtin_ia32_cvttsd2si ((__v2df) __A);
 }
 
+#ifdef __x86_64__
+static __inline long long
+_mm_cvttsd_si64x (__m128d __A)
+{
+  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
+}
+#endif
+
 static __inline __m128
 _mm_cvtsd_ss (__m128 __A, __m128d __B)
 {
@@ -1911,6 +1988,14 @@ _mm_cvtsi32_sd (__m128d __A, int __B)
   return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
 }
 
+#ifdef __x86_64__
+static __inline __m128d
+_mm_cvtsi64x_sd (__m128d __A, long long __B)
+{
+  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
+}
+#endif
+
 static __inline __m128d
 _mm_cvtss_sd (__m128d __A, __m128 __B)
 {
@@ -2048,7 +2133,7 @@ _mm_add_epi32 (__m128i __A, __m128i __B)
 static __inline __m128i
 _mm_add_epi64 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_paddq128 ((__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
 }
 
 static __inline __m128i
@@ -2096,7 +2181,7 @@ _mm_sub_epi32 (__m128i __A, __m128i __B)
 static __inline __m128i
 _mm_sub_epi64 (__m128i __A, __m128i __B)
 {
-  return (__m128i)__builtin_ia32_psubq128 ((__v4si)__A, (__v4si)__B);
+  return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
 }
 
 static __inline __m128i
@@ -2142,7 +2227,7 @@ _mm_mullo_epi16 (__m128i __A, __m128i __B)
 }
 
 static __inline __m64
-_mm_mul_pu16 (__m64 __A, __m64 __B)
+_mm_mul_su32 (__m64 __A, __m64 __B)
 {
   return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
 }
@@ -2459,6 +2544,14 @@ _mm_cvtsi32_si128 (int __A)
   return (__m128i) __builtin_ia32_loadd (&__A);
 }
 
+#ifdef __x86_64__
+static __inline __m128i
+_mm_cvtsi64x_si128 (long long __A)
+{
+  return (__m128i) __builtin_ia32_movq2dq (__A);
+}
+#endif
+
 static __inline int
 _mm_cvtsi128_si32 (__m128i __A)
 {
@@ -2467,6 +2560,14 @@ _mm_cvtsi128_si32 (__m128i __A)
   return __tmp;
 }
 
+#ifdef __x86_64__
+static __inline long long
+_mm_cvtsi128_si64x (__m128i __A)
+{
+  return __builtin_ia32_movdq2q ((__v2di)__A);
+}
+#endif
+
 #endif /* __SSE2__  */
 
 #endif /* __SSE__ */
diff --git a/gcc/config/ia64/aix.h b/gcc/config/ia64/aix.h
index c9dca9c4f12..5eb2334cb6a 100644
--- a/gcc/config/ia64/aix.h
+++ b/gcc/config/ia64/aix.h
@@ -134,12 +134,14 @@ do {							\
 #define STANDARD_STARTFILE_PREFIX "/usr/lib/ia64l64/"
 #endif
 
+/* It is illegal to have relocations in shared segments on AIX.
+   Pretend flag_pic is always set.  */
 #undef	TARGET_ASM_SELECT_SECTION
-#define TARGET_ASM_SELECT_SECTION  ia64_aix_select_section
+#define TARGET_ASM_SELECT_SECTION  ia64_rwreloc_select_section
 #undef	TARGET_ASM_UNIQUE_SECTION
-#define TARGET_ASM_UNIQUE_SECTION  ia64_aix_unique_section
+#define TARGET_ASM_UNIQUE_SECTION  ia64_rwreloc_unique_section
 #undef	TARGET_ASM_SELECT_RTX_SECTION
-#define TARGET_ASM_SELECT_RTX_SECTION  ia64_aix_select_rtx_section
+#define TARGET_ASM_SELECT_RTX_SECTION  ia64_rwreloc_select_rtx_section
 
 /* Override ia64/sysv4.h setting with that used by AIX5.  */
 #undef WCHAR_TYPE
diff --git a/gcc/config/ia64/crtbegin.asm b/gcc/config/ia64/crtbegin.asm
index 75ffc3d906c..cb49e10bc56 100644
--- a/gcc/config/ia64/crtbegin.asm
+++ b/gcc/config/ia64/crtbegin.asm
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+/* Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
    Contributed by Jes Sorensen, <Jes.Sorensen@cern.ch>
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -61,6 +61,7 @@ __dso_handle:
 
 .section .init_array,"a","progbits"
 	data8 @fptr(__do_jv_register_classes)
+	data8 @fptr(__do_global_ctors_aux)
 
 #else /* !HAVE_INITFINI_ARRAY */
 /*
diff --git a/gcc/config/ia64/crtend.asm b/gcc/config/ia64/crtend.asm
index 17c3be41d89..303f30cbce8 100644
--- a/gcc/config/ia64/crtend.asm
+++ b/gcc/config/ia64/crtend.asm
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000, 2001 Free Software Foundation, Inc.
+/* Copyright (C) 2000, 2001, 2003 Free Software Foundation, Inc.
    Contributed by Jes Sorensen, <Jes.Sorensen@cern.ch>
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -33,12 +33,7 @@ __DTOR_END__:
 __JCR_END__:
 	data8	0
 
-#ifdef HAVE_INITFINI_ARRAY
-
-.section .init_array,"a","progbits"
-	data8 @fptr(__do_global_ctors_aux)
-
-#else /* !HAVE_INITFINI_ARRAY */
+#ifndef HAVE_INITFINI_ARRAY
 /*
  * Fragment of the ELF _init routine that invokes our dtor cleanup.
  *
@@ -71,6 +66,12 @@ __JCR_END__:
 
 .text
 	.align 16
+#ifdef HAVE_INITFINI_ARRAY
+	/* This is referenced from crtbegin.o.  */
+	.globl __do_global_ctors_aux#
+	.type __do_global_ctors_aux#,@function
+	.hidden __do_global_ctors_aux#
+#endif
 	.proc __do_global_ctors_aux#
 __do_global_ctors_aux:
 	/*
diff --git a/gcc/config/ia64/hpux.h b/gcc/config/ia64/hpux.h
index 1e6f699835b..56c601b4cde 100644
--- a/gcc/config/ia64/hpux.h
+++ b/gcc/config/ia64/hpux.h
@@ -57,12 +57,6 @@ do {							\
 #undef STARTFILE_SPEC
 #define STARTFILE_SPEC "%{!shared:%{static:crt0%O%s}}"
 
-#ifndef CROSS_COMPILE
-#define STARTFILE_PREFIX_SPEC \
-  "%{mlp64: /usr/ccs/lib/hpux64/} \
-   %{!mlp64: /usr/ccs/lib/hpux32/}"
-#endif
-
 #undef LINK_SPEC
 #define LINK_SPEC \
   "+Accept TypeMismatch \
@@ -167,3 +161,14 @@ do {								\
 
 #undef TEXT_SECTION_ASM_OP
 #define TEXT_SECTION_ASM_OP "\t.section\t.text,\t\"ax\",\t\"progbits\""
+
+/* It is illegal to have relocations in shared segments on HPUX.
+   Pretend flag_pic is always set.  */
+#undef  TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION  ia64_rwreloc_select_section
+#undef  TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION  ia64_rwreloc_unique_section
+#undef  TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION  ia64_rwreloc_select_rtx_section
+#undef  TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS  ia64_rwreloc_section_type_flags
diff --git a/gcc/config/ia64/ia64-c.c b/gcc/config/ia64/ia64-c.c
index c19a5860532..e44c80b25a5 100644
--- a/gcc/config/ia64/ia64-c.c
+++ b/gcc/config/ia64/ia64-c.c
@@ -26,6 +26,7 @@ Boston, MA 02111-1307, USA.  */
 #include "c-common.h"
 #include "c-pragma.h"
 #include "toplev.h"
+#include "tm_p.h"
 
 static void ia64_hpux_add_pragma_builtin PARAMS ((tree func));
 
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
index 36af17aa70a..72c2279cb04 100644
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -72,12 +72,14 @@ extern int basereg_operand PARAMS((rtx, enum machine_mode));
 
 extern rtx ia64_expand_move PARAMS ((rtx, rtx));
 extern int ia64_move_ok PARAMS((rtx, rtx));
+extern int addp4_optimize_ok PARAMS((rtx, rtx));
 extern int ia64_depz_field_mask PARAMS((rtx, rtx));
-extern rtx ia64_gp_save_reg PARAMS((int));
 extern rtx ia64_split_timode PARAMS((rtx[], rtx, rtx));
 extern rtx spill_tfmode_operand PARAMS((rtx, int));
 extern rtx ia64_expand_compare PARAMS((enum rtx_code, enum machine_mode));
 extern void ia64_expand_call PARAMS((rtx, rtx, rtx, int));
+extern void ia64_split_call PARAMS((rtx, rtx, rtx, rtx, rtx, int, int));
+extern void ia64_reload_gp PARAMS((void));
 
 extern HOST_WIDE_INT ia64_initial_elimination_offset PARAMS((int, int));
 extern void ia64_expand_prologue PARAMS((void));
@@ -131,6 +133,7 @@ extern void emit_safe_across_calls PARAMS((FILE *));
 extern void ia64_init_builtins PARAMS((void));
 extern void ia64_override_options PARAMS((void));
 extern int ia64_dbx_register_number PARAMS((int));
+extern bool ia64_function_ok_for_sibcall PARAMS ((tree));
 
 #ifdef SDATA_SECTION_ASM_OP
 extern void sdata_section PARAMS ((void));
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 10f06665999..a8b13f944f1 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.
-   Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by James E. Wilson <wilson@cygnus.com> and
    		  David Mosberger <davidm@hpl.hp.com>.
 
@@ -45,6 +45,7 @@ Boston, MA 02111-1307, USA.  */
 #include "target.h"
 #include "target-def.h"
 #include "tm_p.h"
+#include "langhooks.h"
 
 /* This is used for communication between ASM_OUTPUT_LABEL and
    ASM_OUTPUT_LABELREF.  */
@@ -109,6 +110,39 @@ static int ia64_flag_schedule_insns2;
    sections.  */
 
 unsigned int ia64_section_threshold;
+
+/* Structure to be filled in by ia64_compute_frame_size with register
+   save masks and offsets for the current function.  */
+
+struct ia64_frame_info
+{
+  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
+				   the caller's scratch area.  */
+  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
+  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
+  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
+  HARD_REG_SET mask;		/* mask of saved registers.  */
+  unsigned int gr_used_mask;	/* mask of registers in use as gr spill 
+				   registers or long-term scratches.  */
+  int n_spilled;		/* number of spilled registers.  */
+  int reg_fp;			/* register for fp.  */
+  int reg_save_b0;		/* save register for b0.  */
+  int reg_save_pr;		/* save register for prs.  */
+  int reg_save_ar_pfs;		/* save register for ar.pfs.  */
+  int reg_save_ar_unat;		/* save register for ar.unat.  */
+  int reg_save_ar_lc;		/* save register for ar.lc.  */
+  int reg_save_gp;		/* save register for gp.  */
+  int n_input_regs;		/* number of input registers used.  */
+  int n_local_regs;		/* number of local registers used.  */
+  int n_output_regs;		/* number of output registers used.  */
+  int n_rotate_regs;		/* number of rotating registers used.  */
+
+  char need_regstk;		/* true if a .regstk directive needed.  */
+  char initialized;		/* true if the data is finalized.  */
+};
+
+/* Current frame information calculated by ia64_compute_frame_size.  */
+static struct ia64_frame_info current_frame_info;
 
 static rtx gen_tls_get_addr PARAMS ((void));
 static rtx gen_thread_pointer PARAMS ((void));
@@ -166,13 +200,16 @@ static void ia64_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
 
 static void ia64_select_rtx_section PARAMS ((enum machine_mode, rtx,
 					     unsigned HOST_WIDE_INT));
-static void ia64_aix_select_section PARAMS ((tree, int,
-					     unsigned HOST_WIDE_INT))
+static void ia64_rwreloc_select_section PARAMS ((tree, int,
+					         unsigned HOST_WIDE_INT))
      ATTRIBUTE_UNUSED;
-static void ia64_aix_unique_section PARAMS ((tree, int))
+static void ia64_rwreloc_unique_section PARAMS ((tree, int))
      ATTRIBUTE_UNUSED;
-static void ia64_aix_select_rtx_section PARAMS ((enum machine_mode, rtx,
-					         unsigned HOST_WIDE_INT))
+static void ia64_rwreloc_select_rtx_section PARAMS ((enum machine_mode, rtx,
+					             unsigned HOST_WIDE_INT))
+     ATTRIBUTE_UNUSED;
+static unsigned int ia64_rwreloc_section_type_flags
+     PARAMS ((tree, const char *, int))
      ATTRIBUTE_UNUSED;
 
 static void ia64_hpux_add_extern_decl PARAMS ((const char *name))
@@ -974,6 +1011,21 @@ ia64_move_ok (dst, src)
     return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
 }
 
+/* Return 0 if we are doing C++ code.  This optimization fails with
+   C++ because of GNAT c++/6685.  */
+
+int
+addp4_optimize_ok (op1, op2)
+     rtx op1, op2;
+{
+
+  if (!strcmp (lang_hooks.name, "GNU C++"))
+    return 0;
+
+  return (basereg_operand (op1, GET_MODE(op1)) !=
+	  basereg_operand (op2, GET_MODE(op2)));
+}
+
 /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
    Return the length of the field, or <= 0 on failure.  */
 
@@ -1246,46 +1298,6 @@ ia64_expand_move (op0, op1)
   return op1;
 }
 
-rtx
-ia64_gp_save_reg (setjmp_p)
-     int setjmp_p;
-{
-  rtx save = cfun->machine->ia64_gp_save;
-
-  if (save != NULL)
-    {
-      /* We can't save GP in a pseudo if we are calling setjmp, because
-	 pseudos won't be restored by longjmp.  For now, we save it in r4.  */
-      /* ??? It would be more efficient to save this directly into a stack
-	 slot.  Unfortunately, the stack slot address gets cse'd across
-	 the setjmp call because the NOTE_INSN_SETJMP note is in the wrong
-	 place.  */
-
-      /* ??? Get the barf bag, Virginia.  We've got to replace this thing
-         in place, since this rtx is used in exception handling receivers.
-         Moreover, we must get this rtx out of regno_reg_rtx or reload
-         will do the wrong thing.  */
-      unsigned int old_regno = REGNO (save);
-      if (setjmp_p && old_regno != GR_REG (4))
-        {
-          REGNO (save) = GR_REG (4);
-          regno_reg_rtx[old_regno] = gen_rtx_raw_REG (DImode, old_regno);
-        }
-    }
-  else
-    {
-      if (setjmp_p)
-	save = gen_rtx_REG (DImode, GR_REG (4));
-      else if (! optimize)
-	save = gen_rtx_REG (DImode, LOC_REG (0));
-      else
-	save = gen_reg_rtx (DImode);
-      cfun->machine->ia64_gp_save = save;
-    }
-
-  return save;
-}
-
 /* Split a post-reload TImode reference into two DImode components.  */
 
 rtx
@@ -1371,12 +1383,12 @@ spill_tfmode_operand (in, force)
       && GET_MODE (SUBREG_REG (in)) == TImode
       && GET_CODE (SUBREG_REG (in)) == REG)
     {
-      rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE);
+      rtx mem = gen_mem_addressof (SUBREG_REG (in), NULL_TREE, true);
       return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
     }
   else if (force && GET_CODE (in) == REG)
     {
-      rtx mem = gen_mem_addressof (in, NULL_TREE);
+      rtx mem = gen_mem_addressof (in, NULL_TREE, true);
       return gen_rtx_MEM (TFmode, copy_to_reg (XEXP (mem, 0)));
     }
   else if (GET_CODE (in) == MEM
@@ -1418,72 +1430,151 @@ ia64_expand_compare (code, mode)
 }
 
 /* Emit the appropriate sequence for a call.  */
-
 void
 ia64_expand_call (retval, addr, nextarg, sibcall_p)
      rtx retval;
      rtx addr;
-     rtx nextarg;
+     rtx nextarg ATTRIBUTE_UNUSED;
      int sibcall_p;
 {
-  rtx insn, b0, pfs, gp_save, narg_rtx, dest;
-  bool indirect_p;
-  int narg;
+  rtx insn, b0;
 
   addr = XEXP (addr, 0);
   b0 = gen_rtx_REG (DImode, R_BR (0));
-  pfs = gen_rtx_REG (DImode, AR_PFS_REGNUM);
-
-  if (! nextarg)
-    narg = 0;
-  else if (IN_REGNO_P (REGNO (nextarg)))
-    narg = REGNO (nextarg) - IN_REG (0);
-  else
-    narg = REGNO (nextarg) - OUT_REG (0);
-  narg_rtx = GEN_INT (narg);
 
+  /* ??? Should do this for functions known to bind local too.  */
   if (TARGET_NO_PIC || TARGET_AUTO_PIC)
     {
       if (sibcall_p)
-	insn = gen_sibcall_nopic (addr, narg_rtx, b0, pfs);
+	insn = gen_sibcall_nogp (addr);
       else if (! retval)
-	insn = gen_call_nopic (addr, narg_rtx, b0);
+	insn = gen_call_nogp (addr, b0);
       else
-	insn = gen_call_value_nopic (retval, addr, narg_rtx, b0);
-      emit_call_insn (insn);
-      return;
+	insn = gen_call_value_nogp (retval, addr, b0);
+      insn = emit_call_insn (insn);
     }
-
-  indirect_p = ! symbolic_operand (addr, VOIDmode);
-
-  if (sibcall_p || (TARGET_CONST_GP && !indirect_p))
-    gp_save = NULL_RTX;
   else
-    gp_save = ia64_gp_save_reg (setjmp_operand (addr, VOIDmode));
+    {
+      if (sibcall_p)
+	insn = gen_sibcall_gp (addr);
+      else if (! retval)
+	insn = gen_call_gp (addr, b0);
+      else
+	insn = gen_call_value_gp (retval, addr, b0);
+      insn = emit_call_insn (insn);
 
-  if (gp_save)
-    emit_move_insn (gp_save, pic_offset_table_rtx);
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+    }
 
-  /* If this is an indirect call, then we have the address of a descriptor.  */
-  if (indirect_p)
+  if (sibcall_p)
     {
-      dest = force_reg (DImode, gen_rtx_MEM (DImode, addr));
-      emit_move_insn (pic_offset_table_rtx,
-		      gen_rtx_MEM (DImode, plus_constant (addr, 8)));
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
+      use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
+	       gen_rtx_REG (DImode, AR_PFS_REGNUM));
     }
+}
+void
+ia64_reload_gp ()
+{
+  rtx tmp;
+
+  if (current_frame_info.reg_save_gp)
+    tmp = gen_rtx_REG (DImode, current_frame_info.reg_save_gp);
   else
-    dest = addr;
+    {
+      HOST_WIDE_INT offset;
+
+      offset = (current_frame_info.spill_cfa_off
+	        + current_frame_info.spill_size);
+      if (frame_pointer_needed)
+        {
+          tmp = hard_frame_pointer_rtx;
+          offset = -offset;
+        }
+      else
+        {
+          tmp = stack_pointer_rtx;
+          offset = current_frame_info.total_size - offset;
+        }
+
+      if (CONST_OK_FOR_I (offset))
+        emit_insn (gen_adddi3 (pic_offset_table_rtx,
+			       tmp, GEN_INT (offset)));
+      else
+        {
+          emit_move_insn (pic_offset_table_rtx, GEN_INT (offset));
+          emit_insn (gen_adddi3 (pic_offset_table_rtx,
+			         pic_offset_table_rtx, tmp));
+        }
+
+      tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
+    }
+
+  emit_move_insn (pic_offset_table_rtx, tmp);
+}
+
+void
+ia64_split_call (retval, addr, retaddr, scratch_r, scratch_b,
+		 noreturn_p, sibcall_p)
+     rtx retval, addr, retaddr, scratch_r, scratch_b;
+     int noreturn_p, sibcall_p;
+{
+  rtx insn;
+  bool is_desc = false;
+
+  /* If we find we're calling through a register, then we're actually
+     calling through a descriptor, so load up the values.  */
+  if (REG_P (addr))
+    {
+      rtx tmp;
+      bool addr_dead_p;
+
+      /* ??? We are currently constrained to *not* use peep2, because
+	 we can legitimiately change the global lifetime of the GP
+	 (in the form of killing where previously live).  This is 
+	 because a call through a descriptor doesn't use the previous
+	 value of the GP, while a direct call does, and we do not
+	 commit to either form until the split here.
+
+	 That said, this means that we lack precise life info for
+	 whether ADDR is dead after this call.  This is not terribly
+	 important, since we can fix things up essentially for free
+	 with the POST_DEC below, but it's nice to not use it when we
+	 can immediately tell it's not necessary.  */
+      addr_dead_p = ((noreturn_p || sibcall_p
+		      || TEST_HARD_REG_BIT (regs_invalidated_by_call,
+					    REGNO (addr)))
+		     && !FUNCTION_ARG_REGNO_P (REGNO (addr)));
+
+      /* Load the code address into scratch_b.  */
+      tmp = gen_rtx_POST_INC (Pmode, addr);
+      tmp = gen_rtx_MEM (Pmode, tmp);
+      emit_move_insn (scratch_r, tmp);
+      emit_move_insn (scratch_b, scratch_r);
+
+      /* Load the GP address.  If ADDR is not dead here, then we must
+	 revert the change made above via the POST_INCREMENT.  */
+      if (!addr_dead_p)
+	tmp = gen_rtx_POST_DEC (Pmode, addr);
+      else
+	tmp = addr;
+      tmp = gen_rtx_MEM (Pmode, tmp);
+      emit_move_insn (pic_offset_table_rtx, tmp);
+
+      is_desc = true;
+      addr = scratch_b;
+    }
 
   if (sibcall_p)
-    insn = gen_sibcall_pic (dest, narg_rtx, b0, pfs);
-  else if (! retval)
-    insn = gen_call_pic (dest, narg_rtx, b0);
+    insn = gen_sibcall_nogp (addr);
+  else if (retval)
+    insn = gen_call_value_nogp (retval, addr, retaddr);
   else
-    insn = gen_call_value_pic (retval, dest, narg_rtx, b0);
+    insn = gen_call_nogp (addr, retaddr);
   emit_call_insn (insn);
 
-  if (gp_save)
-    emit_move_insn (pic_offset_table_rtx, gp_save);
+  if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
+    ia64_reload_gp ();
 }
 
 /* Begin the assembly file.  */
@@ -1522,39 +1613,6 @@ emit_safe_across_calls (f)
     fputc ('\n', f);
 }
 
-
-/* Structure to be filled in by ia64_compute_frame_size with register
-   save masks and offsets for the current function.  */
-
-struct ia64_frame_info
-{
-  HOST_WIDE_INT total_size;	/* size of the stack frame, not including
-				   the caller's scratch area.  */
-  HOST_WIDE_INT spill_cfa_off;	/* top of the reg spill area from the cfa.  */
-  HOST_WIDE_INT spill_size;	/* size of the gr/br/fr spill area.  */
-  HOST_WIDE_INT extra_spill_size;  /* size of spill area for others.  */
-  HARD_REG_SET mask;		/* mask of saved registers.  */
-  unsigned int gr_used_mask;	/* mask of registers in use as gr spill 
-				   registers or long-term scratches.  */
-  int n_spilled;		/* number of spilled registers.  */
-  int reg_fp;			/* register for fp.  */
-  int reg_save_b0;		/* save register for b0.  */
-  int reg_save_pr;		/* save register for prs.  */
-  int reg_save_ar_pfs;		/* save register for ar.pfs.  */
-  int reg_save_ar_unat;		/* save register for ar.unat.  */
-  int reg_save_ar_lc;		/* save register for ar.lc.  */
-  int n_input_regs;		/* number of input registers used.  */
-  int n_local_regs;		/* number of local registers used.  */
-  int n_output_regs;		/* number of output registers used.  */
-  int n_rotate_regs;		/* number of rotating registers used.  */
-
-  char need_regstk;		/* true if a .regstk directive needed.  */
-  char initialized;		/* true if the data is finalized.  */
-};
-
-/* Current frame information calculated by ia64_compute_frame_size.  */
-static struct ia64_frame_info current_frame_info;
-
 /* Helper function for ia64_compute_frame_size: find an appropriate general
    register to spill some special register to.  SPECIAL_SPILL_MASK contains
    bits in GR0 to GR31 that have already been allocated by this routine.
@@ -1796,6 +1854,17 @@ ia64_compute_frame_size (size)
 	  extra_spill_size += 8;
 	  n_spilled += 1;
 	}
+
+      /* Similarly for gp.  Note that if we're calling setjmp, the stacked
+	 registers are clobbered, so we fall back to the stack.  */
+      current_frame_info.reg_save_gp
+	= (current_function_calls_setjmp ? 0 : find_gr_spill (1));
+      if (current_frame_info.reg_save_gp == 0)
+	{
+	  SET_HARD_REG_BIT (mask, GR_REG (1));
+	  spill_size += 8;
+	  n_spilled += 1;
+	}
     }
   else
     {
@@ -1805,6 +1874,17 @@ ia64_compute_frame_size (size)
 	  spill_size += 8;
 	  n_spilled += 1;
 	}
+
+      if (regs_ever_live[AR_PFS_REGNUM])
+	{
+	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
+	  current_frame_info.reg_save_ar_pfs = find_gr_spill (1);
+	  if (current_frame_info.reg_save_ar_pfs == 0)
+	    {
+	      extra_spill_size += 8;
+	      n_spilled += 1;
+	    }
+	}
     }
 
   /* Unwind descriptor hackery: things are most efficient if we allocate
@@ -1843,8 +1923,10 @@ ia64_compute_frame_size (size)
     }
 
   /* If we're forced to use st8.spill, we're forced to save and restore
-     ar.unat as well.  */
-  if (spilled_gr_p || cfun->machine->n_varargs)
+     ar.unat as well.  The check for existing liveness allows inline asm
+     to touch ar.unat.  */
+  if (spilled_gr_p || cfun->machine->n_varargs
+      || regs_ever_live[AR_UNAT_REGNUM])
     {
       regs_ever_live[AR_UNAT_REGNUM] = 1;
       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
@@ -2305,7 +2387,8 @@ ia64_expand_prologue ()
   /* We don't need an alloc instruction if we've used no outputs or locals.  */
   if (current_frame_info.n_local_regs == 0
       && current_frame_info.n_output_regs == 0
-      && current_frame_info.n_input_regs <= current_function_args_info.int_regs)
+      && current_frame_info.n_input_regs <= current_function_args_info.int_regs
+      && !TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM))
     {
       /* If there is no alloc, but there are input registers used, then we
 	 need a .regstk directive.  */
@@ -2467,8 +2550,8 @@ ia64_expand_prologue ()
   /* The alloc insn already copied ar.pfs into a general register.  The
      only thing we have to do now is copy that register to a stack slot
      if we'd not allocated a local register for the job.  */
-  if (current_frame_info.reg_save_ar_pfs == 0
-      && ! current_function_is_leaf)
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_PFS_REGNUM)
+      && current_frame_info.reg_save_ar_pfs == 0)
     {
       reg = gen_rtx_REG (DImode, AR_PFS_REGNUM);
       do_spill (gen_movdi_x, ar_pfs_save_reg, cfa_off, reg);
@@ -2499,6 +2582,19 @@ ia64_expand_prologue ()
 	}
     }
 
+  if (current_frame_info.reg_save_gp)
+    {
+      insn = emit_move_insn (gen_rtx_REG (DImode,
+					  current_frame_info.reg_save_gp),
+			     pic_offset_table_rtx);
+      /* We don't know for sure yet if this is actually needed, since
+	 we've not split the PIC call patterns.  If all of the calls
+	 are indirect, and not followed by any uses of the gp, then
+	 this save is dead.  Allow it to go away.  */
+      REG_NOTES (insn)
+	= gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, REG_NOTES (insn));
+    }
+
   /* We should now be at the base of the gr/br/fr spill area.  */
   if (cfa_off != (current_frame_info.spill_cfa_off
 		  + current_frame_info.spill_size))
@@ -2680,8 +2776,13 @@ ia64_expand_epilogue (sibcall_p)
 		  + current_frame_info.spill_size))
     abort ();
 
+  /* The GP may be stored on the stack in the prologue, but it's
+     never restored in the epilogue.  Skip the stack slot.  */
+  if (TEST_HARD_REG_BIT (current_frame_info.mask, GR_REG (1)))
+    cfa_off -= 8;
+
   /* Restore all general registers.  */
-  for (regno = GR_REG (1); regno <= GR_REG (31); ++regno)
+  for (regno = GR_REG (2); regno <= GR_REG (31); ++regno)
     if (TEST_HARD_REG_BIT (current_frame_info.mask, regno))
       {
 	reg = gen_rtx_REG (DImode, regno);
@@ -2869,10 +2970,6 @@ ia64_hard_regno_rename_ok (from, to)
   if (PR_REGNO_P (from) && PR_REGNO_P (to))
     return (from & 1) == (to & 1);
 
-  /* Reg 4 contains the saved gp; we can't reliably rename this.  */
-  if (from == GR_REG (4) && current_function_calls_setjmp)
-    return 0;
-
   return 1;
 }
 
@@ -4082,7 +4179,6 @@ ia64_secondary_reload_class (class, mode, x)
 
   return NO_REGS;
 }
-
 
 /* Emit text to declare externally defined variables and functions, because
    the Intel assembler does not support undefined externals.  */
@@ -5621,7 +5717,9 @@ ia64_adjust_cost (insn, link, dep_insn, cost)
 	    addr = XVECEXP (addr, 0, 0);
 	  while (GET_CODE (addr) == SUBREG || GET_CODE (addr) == ZERO_EXTEND)
 	    addr = XEXP (addr, 0);
-	  if (GET_CODE (addr) == MEM)
+
+	  /* Note that LO_SUM is used for GOT loads.  */
+	  if (GET_CODE (addr) == MEM || GET_CODE (addr) == LO_SUM)
 	    addr = XEXP (addr, 0);
 	  else
 	    addr = 0;
@@ -7243,6 +7341,23 @@ ia64_strip_name_encoding (str)
     str++;
   return str;
 }
+
+/* True if it is OK to do sibling call optimization for the specified
+   call expression EXP.  DECL will be the called function, or NULL if
+   this is an indirect call.  */
+bool
+ia64_function_ok_for_sibcall (decl)
+     tree decl;
+{
+  /* Direct calls are always ok.  */
+  if (decl)
+    return true;
+
+  /* If TARGET_CONST_GP is in effect, then our caller expects us to
+     return with our current GP.  This means that we'll always have
+     a GP reload after an indirect call.  */
+  return !ia64_epilogue_uses (R_GR (1));
+}
 
 /* Output assembly directives for prologue regions.  */
 
@@ -7764,7 +7879,7 @@ ia64_expand_fetch_and_op (binoptab, mode, arglist, target)
      do {
        old = tmp;
        ar.ccv = tmp;
-       ret = tmp + value;
+       ret = tmp <op> value;
        cmpxchgsz.acq tmp = [ptr], ret
      } while (tmp != old)
 */
@@ -7867,8 +7982,15 @@ ia64_expand_compare_and_swap (mode, boolp, arglist, target)
   else
     tmp = gen_reg_rtx (mode);
 
-  ccv = gen_rtx_REG (mode, AR_CCV_REGNUM);
-  emit_move_insn (ccv, old);
+  ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
+  if (mode == DImode)
+    emit_move_insn (ccv, old);
+  else
+    {
+      rtx ccvtmp = gen_reg_rtx (DImode);
+      emit_insn (gen_zero_extendsidi2 (ccvtmp, old));
+      emit_move_insn (ccv, ccvtmp);
+    }
   emit_insn (gen_mf ());
   if (mode == SImode)
     insn = gen_cmpxchg_acq_si (tmp, mem, new, ccv);
@@ -8180,34 +8302,28 @@ ia64_select_rtx_section (mode, x, align)
     default_elf_select_rtx_section (mode, x, align);
 }
 
-/* It is illegal to have relocations in shared segments on AIX.
+/* It is illegal to have relocations in shared segments on AIX and HPUX.
    Pretend flag_pic is always set.  */
 
 static void
-ia64_aix_select_section (exp, reloc, align)
+ia64_rwreloc_select_section (exp, reloc, align)
      tree exp;
      int reloc;
      unsigned HOST_WIDE_INT align;
 {
-  int save_pic = flag_pic;
-  flag_pic = 1;
-  default_elf_select_section (exp, reloc, align);
-  flag_pic = save_pic;
+  default_elf_select_section_1 (exp, reloc, align, true);
 }
 
 static void
-ia64_aix_unique_section (decl, reloc)
+ia64_rwreloc_unique_section (decl, reloc)
      tree decl;
      int reloc;
 {
-  int save_pic = flag_pic;
-  flag_pic = 1;
-  default_unique_section (decl, reloc);
-  flag_pic = save_pic;
+  default_unique_section_1 (decl, reloc, true);
 }
 
 static void
-ia64_aix_select_rtx_section (mode, x, align)
+ia64_rwreloc_select_rtx_section (mode, x, align)
      enum machine_mode mode;
      rtx x;
      unsigned HOST_WIDE_INT align;
@@ -8218,6 +8334,16 @@ ia64_aix_select_rtx_section (mode, x, align)
   flag_pic = save_pic;
 }
 
+static unsigned int
+ia64_rwreloc_section_type_flags (decl, name, reloc)
+     tree decl;
+     const char *name;
+     int reloc;
+{
+  return default_section_type_flags_1 (decl, name, reloc, true);
+}
+
+
 /* Output the assembler code for a thunk function.  THUNK_DECL is the
    declaration for the thunk function itself, FUNCTION is the decl for
    the target function.  DELTA is an immediate constant offset to be
@@ -8234,6 +8360,9 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
 {
   rtx this, insn, funexp;
 
+  reload_completed = 1;
+  no_new_pseudos = 1;
+
   /* Set things up as ia64_expand_prologue might.  */
   last_scratch_gr_reg = 15;
 
@@ -8296,18 +8425,27 @@ ia64_output_mi_thunk (file, thunk, delta, vcall_offset, function)
   ia64_expand_call (NULL_RTX, funexp, NULL_RTX, 1);
   insn = get_last_insn ();
   SIBLING_CALL_P (insn) = 1;
+
+  /* Code generation for calls relies on splitting.  */
+  reload_completed = 1;
+  try_split (PATTERN (insn), insn, 0);
+
   emit_barrier ();
 
   /* Run just enough of rest_of_compilation to get the insns emitted.
      There's not really enough bulk here to make other passes such as
      instruction scheduling worth while.  Note that use_thunk calls
      assemble_start_function and assemble_end_function.  */
+
   insn = get_insns ();
   emit_all_insn_group_barriers (NULL, insn);
   shorten_branches (insn);
   final_start_function (insn, file, 1);
   final (insn, file, 1, 0);
   final_end_function ();
+
+  reload_completed = 0;
+  no_new_pseudos = 0;
 }
 
 #include "gt-ia64.h"
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
index 59f60d05bfa..724405a8c5f 100644
--- a/gcc/config/ia64/ia64.h
+++ b/gcc/config/ia64/ia64.h
@@ -140,6 +140,10 @@ extern int ia64_tls_size;
 
 #define TARGET_HPUX_LD		0
 
+#ifndef HAVE_AS_LTOFFX_LDXMOV_RELOCS
+#define HAVE_AS_LTOFFX_LDXMOV_RELOCS 0
+#endif
+
 /* This macro defines names of command options to set and clear bits in
    `target_flags'.  Its definition is an initializer with a subgrouping for
    each command option.  */
@@ -1011,8 +1015,9 @@ enum reg_class
 /* In FP regs, we can't change FP values to integer values and vice
    versa, but we can change e.g. DImode to SImode.  */
 
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO) 	\
-  (GET_MODE_CLASS (FROM) != GET_MODE_CLASS (TO) ? FR_REGS : NO_REGS)
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) 	\
+  (GET_MODE_CLASS (FROM) != GET_MODE_CLASS (TO)		\
+   ? reg_classes_intersect_p (CLASS, FR_REGS) : 0)
 
 /* A C expression that defines the machine-dependent operand constraint
    letters (`I', `J', `K', .. 'P') that specify particular ranges of
@@ -1808,61 +1813,6 @@ do {									\
 #define ASM_APP_OFF "#NO_APP\n"
 
 
-/* Output of Data.  */
-
-/* This is how to output an assembler line defining a `char' constant
-   to an xdata segment.  */
-
-#define ASM_OUTPUT_XDATA_CHAR(FILE, SECTION, VALUE)			\
-do {									\
-  fprintf (FILE, "\t.xdata1\t\"%s\", ", SECTION);			\
-  output_addr_const (FILE, (VALUE));					\
-  fprintf (FILE, "\n");							\
-} while (0)
-
-/* This is how to output an assembler line defining a `short' constant
-   to an xdata segment.  */
-
-#define ASM_OUTPUT_XDATA_SHORT(FILE, SECTION, VALUE)			\
-do {									\
-  fprintf (FILE, "\t.xdata2\t\"%s\", ", SECTION);			\
-  output_addr_const (FILE, (VALUE));					\
-  fprintf (FILE, "\n");							\
-} while (0)
-
-/* This is how to output an assembler line defining an `int' constant
-   to an xdata segment.  We also handle symbol output here.  */
-
-/* ??? For ILP32, also need to handle function addresses here.  */
-
-#define ASM_OUTPUT_XDATA_INT(FILE, SECTION, VALUE)			\
-do {									\
-  fprintf (FILE, "\t.xdata4\t\"%s\", ", SECTION);			\
-  output_addr_const (FILE, (VALUE));					\
-  fprintf (FILE, "\n");							\
-} while (0)
-
-/* This is how to output an assembler line defining a `long' constant
-   to an xdata segment.  We also handle symbol output here.  */
-
-#define ASM_OUTPUT_XDATA_DOUBLE_INT(FILE, SECTION, VALUE)		\
-do {									\
-  int need_closing_paren = 0;						\
-  fprintf (FILE, "\t.xdata8\t\"%s\", ", SECTION);			\
-  if (!(TARGET_NO_PIC || TARGET_AUTO_PIC)				\
-      && GET_CODE (VALUE) == SYMBOL_REF)				\
-    {									\
-      fprintf (FILE, SYMBOL_REF_FLAG (VALUE) ? "@fptr(" : "@segrel(");	\
-      need_closing_paren = 1;						\
-    }									\
-  output_addr_const (FILE, VALUE);					\
-  if (need_closing_paren)						\
-    fprintf (FILE, ")");						\
-  fprintf (FILE, "\n");							\
-} while (0)
-
-
-
 /* Output of Uninitialized Variables.  */
 
 /* This is all handled by svr4.h.  */
@@ -2132,8 +2082,13 @@ do {									\
 
 /* ??? Depends on the pointer size.  */
 
-#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL) \
-  fprintf (STREAM, "\tdata8 @pcrel(.L%d)\n", VALUE)
+#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)	\
+  do {								\
+  if (TARGET_ILP32)						\
+    fprintf (STREAM, "\tdata4 @pcrel(.L%d)\n", VALUE);		\
+  else								\
+    fprintf (STREAM, "\tdata8 @pcrel(.L%d)\n", VALUE);		\
+  } while (0)
 
 /* This is how to output an element of a case-vector that is absolute.
    (Ia64 does not use such vectors, but we must define this macro anyway.)  */
@@ -2152,7 +2107,8 @@ do {									\
    true if the symbol may be affected by dynamic relocations.  */
 #define ASM_PREFERRED_EH_DATA_FORMAT(CODE,GLOBAL)	\
   (((CODE) == 1 ? DW_EH_PE_textrel : DW_EH_PE_datarel)	\
-   | ((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_udata8)
+   | ((GLOBAL) ? DW_EH_PE_indirect : 0)			\
+   | (TARGET_ILP32 ? DW_EH_PE_udata4 : DW_EH_PE_udata8))
 
 /* Handle special EH pointer encodings.  Absolute, pc-relative, and
    indirect are handled automatically.  */
@@ -2322,7 +2278,7 @@ do {									\
 /* An alias for a machine mode name.  This is the machine mode that elements of
    a jump-table should have.  */
 
-#define CASE_VECTOR_MODE Pmode
+#define CASE_VECTOR_MODE ptr_mode
 
 /* Define as C expression which evaluates to nonzero if the tablejump
    instruction expects the table to contain offsets from the address of the
@@ -2494,4 +2450,5 @@ enum fetchop_code {
 #undef  PROFILE_BEFORE_PROLOGUE
 #define PROFILE_BEFORE_PROLOGUE 1
 
+#define FUNCTION_OK_FOR_SIBCALL(DECL) ia64_function_ok_for_sibcall (DECL)
 /* End of ia64.h */
diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md
index c2275494c25..4d177c21aed 100644
--- a/gcc/config/ia64/ia64.md
+++ b/gcc/config/ia64/ia64.md
@@ -83,6 +83,7 @@
    (UNSPECV_SET_BSP		4)
    (UNSPECV_PSAC_ALL		5)	; pred.safe_across_calls
    (UNSPECV_PSAC_NORMAL		6)
+   (UNSPECV_SETJMP_RECEIVER	7)
   ])
 
 ;; ::::::::::::::::::::
@@ -506,22 +507,41 @@
 
 (define_expand "load_symptr"
   [(set (match_operand:DI 2 "register_operand" "")
-	(plus:DI (match_dup 4) (match_operand:DI 1 "got_symbolic_operand" "")))
-   (set (match_operand:DI 0 "register_operand" "") (match_dup 3))]
+	(plus:DI (high:DI (match_operand:DI 1 "got_symbolic_operand" ""))
+		 (match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(lo_sum:DI (match_dup 2) (match_dup 1)))]
   ""
 {
-  operands[3] = gen_rtx_MEM (DImode, operands[2]);
-  operands[4] = pic_offset_table_rtx;
-  RTX_UNCHANGING_P (operands[3]) = 1;
+  operands[3] = pic_offset_table_rtx;
 })
 
-(define_insn "*load_symptr_internal1"
+(define_insn "*load_symptr_high"
   [(set (match_operand:DI 0 "register_operand" "=r")
-	(plus:DI (reg:DI 1) (match_operand 1 "got_symbolic_operand" "s")))]
+	(plus:DI (high:DI (match_operand 1 "got_symbolic_operand" "s"))
+		 (match_operand:DI 2 "register_operand" "a")))]
   ""
-  "addl %0 = @ltoff(%1), gp"
+{
+  if (HAVE_AS_LTOFFX_LDXMOV_RELOCS)
+    return "%,addl %0 = @ltoffx(%1), %2";
+  else
+    return "%,addl %0 = @ltoff(%1), %2";
+}
   [(set_attr "itanium_class" "ialu")])
 
+(define_insn "*load_symptr_low"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand 2 "got_symbolic_operand" "s")))]
+  ""
+{
+  if (HAVE_AS_LTOFFX_LDXMOV_RELOCS)
+    return "%,ld8.mov %0 = [%1], %2";
+  else
+    return "%,ld8 %0 = [%1]";
+}
+  [(set_attr "itanium_class" "ld")])
+
 (define_insn "load_ltoff_dtpmod"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(plus:DI (reg:DI 1)
@@ -1034,7 +1054,7 @@
   [(set (match_operand:DF 0 "register_operand" "=f")
         (float:DF (match_operand:DI 1 "register_operand" "f")))]
   "!INTEL_EXTENDED_IEEE_FORMAT"
-  "fcvt.xf %0 = %1\;;;\;fnorm.d %0 = %0"
+  "fcvt.xf %0 = %1\;;;\;%,fnorm.d %0 = %0"
   [(set_attr "itanium_class" "fcvtfx")])
 
 ;; ??? Suboptimal.  This should be split somehow.
@@ -1042,7 +1062,7 @@
   [(set (match_operand:SF 0 "register_operand" "=f")
         (float:SF (match_operand:DI 1 "register_operand" "f")))]
   "!INTEL_EXTENDED_IEEE_FORMAT"
-  "fcvt.xf %0 = %1\;;;\;fnorm.s %0 = %0"
+  "fcvt.xf %0 = %1\;;;\;%,fnorm.s %0 = %0"
   [(set_attr "itanium_class" "fcvtfx")])
 
 (define_insn "fix_truncsfdi2"
@@ -4424,9 +4444,9 @@
 		"c,c,c,c,c,c,c,c,c,c,c,c,c,c,c")
 	     (const_int 0)])
 	  (match_operand:DI 2 "move_operand"
-	   "rim, *f, *b,*d*e,rim,rim, rim,*f,*b,*d*e,rO,*f,rOQ,rO,  rK")
+	   "rnm, *f, *b,*d*e,rnm,rnm, rnm,*f,*b,*d*e,rO,*f,rOQ,rO,  rK")
 	  (match_operand:DI 3 "move_operand"
-	   "rim,rim,rim, rim, *f, *b,*d*e,*f,*b,*d*e,rO,*f,rOQ,rO,  rK")))]
+	   "rnm,rnm,rnm, rnm, *f, *b,*d*e,*f,*b,*d*e,rO,*f,rOQ,rO,  rK")))]
   "ia64_move_ok (operands[0], operands[2])
    && ia64_move_ok (operands[0], operands[3])"
   { abort (); }
@@ -4530,9 +4550,9 @@
 	    [(match_operand:BI 1 "register_operand" "c,c,c,c,c,c,c,c,c")
 	     (const_int 0)])
 	  (match_operand:SI 2 "move_operand"
-		    "0,0,0,rim*f,rO,rO,rim*f,rO,rO")
+		    "0,0,0,rnm*f,rO,rO,rnm*f,rO,rO")
 	  (match_operand:SI 3 "move_operand"
-		    "rim*f,rO,rO,0,0,0,rim*f,rO,rO")))]
+		    "rnm*f,rO,rO,0,0,0,rnm*f,rO,rO")))]
   "ia64_move_ok (operands[0], operands[2])
    && ia64_move_ok (operands[0], operands[3])"
   { abort (); }
@@ -4663,7 +4683,7 @@
    (use (match_operand 3 "" ""))]
   ""
 {
-  ia64_expand_call (NULL_RTX, operands[0], operands[2], 0);
+  ia64_expand_call (NULL_RTX, operands[0], operands[2], false);
   DONE;
 })
 
@@ -4674,7 +4694,7 @@
    (use (match_operand 3 "" ""))]
   ""
 {
-  ia64_expand_call (NULL_RTX, operands[0], operands[2], 1);
+  ia64_expand_call (NULL_RTX, operands[0], operands[2], true);
   DONE;
 })
 
@@ -4693,7 +4713,7 @@
    (use (match_operand 4 "" ""))]
   ""
 {
-  ia64_expand_call (operands[0], operands[1], operands[3], 0);
+  ia64_expand_call (operands[0], operands[1], operands[3], false);
   DONE;
 })
 
@@ -4705,7 +4725,7 @@
    (use (match_operand 4 "" ""))]
   ""
 {
-  ia64_expand_call (operands[0], operands[1], operands[3], 1);
+  ia64_expand_call (operands[0], operands[1], operands[3], true);
   DONE;
 })
 
@@ -4737,59 +4757,125 @@
   DONE;
 })
 
-(define_insn "call_nopic"
-  [(call (mem:DI (match_operand:DI 0 "call_operand" "b,i"))
-	 (match_operand 1 "" ""))
-   (clobber (match_operand:DI 2 "register_operand" "=b,b"))]
+(define_insn "call_nogp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?b,i"))
+	 (const_int 0))
+   (clobber (match_operand:DI 1 "register_operand" "=b,b"))]
   ""
-  "br.call%+.many %2 = %0"
+  "br.call%+.many %1 = %0"
   [(set_attr "itanium_class" "br,scall")])
 
-(define_insn "call_value_nopic"
+(define_insn "call_value_nogp"
   [(set (match_operand 0 "" "")
-	(call (mem:DI (match_operand:DI 1 "call_operand" "b,i"))
-	      (match_operand 2 "" "")))
-   (clobber (match_operand:DI 3 "register_operand" "=b,b"))]
+	(call (mem:DI (match_operand:DI 1 "call_operand" "?b,i"))
+	      (const_int 0)))
+   (clobber (match_operand:DI 2 "register_operand" "=b,b"))]
   ""
-  "br.call%+.many %3 = %1"
+  "br.call%+.many %2 = %1"
   [(set_attr "itanium_class" "br,scall")])
 
-(define_insn "sibcall_nopic"
-  [(call (mem:DI (match_operand:DI 0 "call_operand" "b,i"))
-	 (match_operand 1 "" ""))
-   (use (match_operand:DI 2 "register_operand" "=b,b"))
-   (use (match_operand:DI 3 "ar_pfs_reg_operand" ""))]
+(define_insn "sibcall_nogp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?b,i"))
+	 (const_int 0))]
   ""
   "br%+.many %0"
   [(set_attr "itanium_class" "br,scall")])
 
-(define_insn "call_pic"
-  [(call (mem (match_operand 0 "call_operand" "b,i"))
-	 (match_operand 1 "" ""))
-   (use (unspec [(reg:DI 1)] UNSPEC_PIC_CALL))
-   (clobber (match_operand:DI 2 "register_operand" "=b,b"))]
+(define_insn "call_gp"
+  [(call (mem (match_operand 0 "call_operand" "?r,i"))
+	 (const_int 1))
+   (clobber (match_operand:DI 1 "register_operand" "=b,b"))
+   (clobber (match_scratch:DI 2 "=&r,X"))
+   (clobber (match_scratch:DI 3 "=b,X"))]
   ""
-  "br.call%+.many %2 = %0"
+  "#"
   [(set_attr "itanium_class" "br,scall")])
 
-(define_insn "call_value_pic"
+;; Irritatingly, we don't have access to INSN within the split body.
+;; See commentary in ia64_split_call as to why these aren't peep2.
+(define_split
+  [(call (mem (match_operand 0 "call_operand" ""))
+	 (const_int 1))
+   (clobber (match_operand:DI 1 "register_operand" ""))
+   (clobber (match_scratch:DI 2 ""))
+   (clobber (match_scratch:DI 3 ""))]
+  "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], operands[1], operands[2],
+		   operands[3], true, false);
+  DONE;
+})
+
+(define_split
+  [(call (mem (match_operand 0 "call_operand" ""))
+	 (const_int 1))
+   (clobber (match_operand:DI 1 "register_operand" ""))
+   (clobber (match_scratch:DI 2 ""))
+   (clobber (match_scratch:DI 3 ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], operands[1], operands[2],
+		   operands[3], false, false);
+  DONE;
+})
+
+(define_insn "call_value_gp"
   [(set (match_operand 0 "" "")
-	(call (mem:DI (match_operand:DI 1 "call_operand" "b,i"))
-	      (match_operand 2 "" "")))
-   (use (unspec [(reg:DI 1)] UNSPEC_PIC_CALL))
-   (clobber (match_operand:DI 3 "register_operand" "=b,b"))]
+	(call (mem:DI (match_operand:DI 1 "call_operand" "?r,i"))
+	      (const_int 1)))
+   (clobber (match_operand:DI 2 "register_operand" "=b,b"))
+   (clobber (match_scratch:DI 3 "=&r,X"))
+   (clobber (match_scratch:DI 4 "=b,X"))]
   ""
-  "br.call%+.many %3 = %1"
+  "#"
   [(set_attr "itanium_class" "br,scall")])
 
-(define_insn "sibcall_pic"
-  [(call (mem:DI (match_operand:DI 0 "call_operand" "bi"))
-	 (match_operand 1 "" ""))
-   (use (unspec [(reg:DI 1)] UNSPEC_PIC_CALL))
-   (use (match_operand:DI 2 "register_operand" "=b"))
-   (use (match_operand:DI 3 "ar_pfs_reg_operand" ""))]
+(define_split
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" ""))
+	      (const_int 1)))
+   (clobber (match_operand:DI 2 "register_operand" ""))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (match_scratch:DI 4 ""))]
+  "reload_completed && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(const_int 0)]
+{
+  ia64_split_call (operands[0], operands[1], operands[2], operands[3],
+		   operands[4], true, false);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand 0 "" "")
+	(call (mem:DI (match_operand:DI 1 "call_operand" ""))
+	      (const_int 1)))
+   (clobber (match_operand:DI 2 "register_operand" ""))
+   (clobber (match_scratch:DI 3 ""))
+   (clobber (match_scratch:DI 4 ""))]
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (operands[0], operands[1], operands[2], operands[3],
+		   operands[4], false, false);
+  DONE;
+})
+
+(define_insn_and_split "sibcall_gp"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "?r,i"))
+	 (const_int 1))
+   (clobber (match_scratch:DI 1 "=&r,X"))
+   (clobber (match_scratch:DI 2 "=b,X"))]
   ""
-  "br%+.many %0"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  ia64_split_call (NULL_RTX, operands[0], NULL_RTX, operands[1],
+		   operands[2], true, true);
+  DONE;
+}
   [(set_attr "itanium_class" "br")])
 
 (define_insn "return_internal"
@@ -5040,7 +5126,10 @@
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(unspec:DI [(const_int 0)] UNSPEC_BSP_VALUE))]
   ""
-  ";;\;mov %0 = ar.bsp"
+  "*
+{
+  return \";;\;%,mov %0 = ar.bsp\";
+}"
   [(set_attr "itanium_class" "frar_i")])
 
 (define_insn "set_bsp"
@@ -5073,7 +5162,8 @@
   [(unspec [(const_int 0)] UNSPEC_FLUSHRS)]
   ""
   ";;\;flushrs\;;;"
-  [(set_attr "itanium_class" "rse_m")])
+  [(set_attr "itanium_class" "rse_m")
+   (set_attr "predicable" "no")])
 
 ;; ::::::::::::::::::::
 ;; ::
@@ -5243,21 +5333,14 @@
   DONE;
 })
 
-;; The rest of the setjmp processing happens with the nonlocal_goto expander.
-;; ??? This is not tested.
-(define_expand "builtin_setjmp_setup"
-  [(use (match_operand:DI 0 "" ""))]
-  ""
-{
-  emit_move_insn (ia64_gp_save_reg (0), gen_rtx_REG (DImode, GR_REG (1)));
-  DONE;
-})
-
-(define_expand "builtin_setjmp_receiver"
-  [(use (match_operand:DI 0 "" ""))]
+(define_insn_and_split "builtin_setjmp_receiver"
+  [(unspec_volatile [(match_operand:DI 0 "" "")] UNSPECV_SETJMP_RECEIVER)]
   ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
 {
-  emit_move_insn (gen_rtx_REG (DImode, GR_REG (1)), ia64_gp_save_reg (0));
+  ia64_reload_gp ();
   DONE;
 })
 
@@ -5348,7 +5431,7 @@
    (set (match_operand:SI 1 "not_postinc_memory_operand" "+S")
         (unspec:SI [(match_dup 1)
                     (match_operand:SI 2 "gr_register_operand" "r")
-		    (match_operand:SI 3 "ar_ccv_reg_operand" "")]
+		    (match_operand 3 "ar_ccv_reg_operand" "")]
 		   UNSPEC_CMPXCHG_ACQ))]
   ""
   "cmpxchg4.acq %0 = %1, %2, %3"
@@ -5438,7 +5521,7 @@
          [(plus:SI (match_operand:SI 1 "basereg_operand" "r")
                    (match_operand:SI 2 "gr_reg_or_14bit_operand" "rI"))]
          UNSPEC_ADDP4))]
-  ""
+  "addp4_optimize_ok (operands[1], operands[2])"
   "addp4 %0 = %2, %1"
   [(set_attr "itanium_class" "ialu")])
 
@@ -5448,6 +5531,6 @@
          [(plus:SI (match_operand:SI 1 "gr_register_operand" "r")
                    (match_operand:SI 2 "basereg_operand" "r"))]
          UNSPEC_ADDP4))]
-  ""
+  "addp4_optimize_ok (operands[1], operands[2])"
   "addp4 %0 = %1, %2"
   [(set_attr "itanium_class" "ialu")])
diff --git a/gcc/config/ia64/t-hpux b/gcc/config/ia64/t-hpux
index d8201f9d5e1..7b42fe519a2 100644
--- a/gcc/config/ia64/t-hpux
+++ b/gcc/config/ia64/t-hpux
@@ -40,9 +40,9 @@ SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
 	$(LN_S) @shlib_base_name@.so @shlib_base_name@.so.0
 # $(slibdir) double quoted to protect it from expansion while building
 # libgcc.mk.  We want this delayed until actual install time.
-SHLIB_INSTALL = $(INSTALL_DATA) @shlib_base_name@.so $$(slibdir)/@shlib_base_name@.so.0; \
-	rm -f $$(slibdir)/@shlib_base_name@.so; \
-	$(LN_S) @shlib_base_name@.so.0 $$(slibdir)/@shlib_base_name@.so; \
-	chmod +x $$(slibdir)/@shlib_base_name@.so
+SHLIB_INSTALL = $(INSTALL_DATA) @shlib_base_name@.so $$(DESTDIR)$$(slibdir)/@shlib_base_name@.so.0; \
+	rm -f $$(DESTDIR)$$(slibdir)/@shlib_base_name@.so; \
+	$(LN_S) @shlib_base_name@.so.0 $$(DESTDIR)$$(slibdir)/@shlib_base_name@.so; \
+	chmod +x $$(DESTDIR)$$(slibdir)/@shlib_base_name@.so
 
 SHLIB_MKMAP = $(srcdir)/mkmap-flat.awk
diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64
index 0cfd9483879..7c63b31e628 100644
--- a/gcc/config/ia64/t-ia64
+++ b/gcc/config/ia64/t-ia64
@@ -40,7 +40,8 @@ crtfastmath.o: $(srcdir)/config/ia64/crtfastmath.c $(GCC_PASSES)
 	$(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -c -o crtfastmath.o \
 		$(srcdir)/config/ia64/crtfastmath.c
 
-LIB2ADDEH = $(srcdir)/config/ia64/unwind-ia64.c $(srcdir)/unwind-sjlj.c
+LIB2ADDEH = $(srcdir)/config/ia64/unwind-ia64.c $(srcdir)/unwind-sjlj.c \
+  $(srcdir)/unwind-c.c
 
 ia64-c.o: $(srcdir)/config/ia64/ia64-c.c $(CONFIG_H) $(SYSTEM_H) \
     $(TREE_H) $(CPPLIB_H) $(C_COMMON_H) c-pragma.h toplev.h
diff --git a/gcc/config/ia64/unwind-ia64.c b/gcc/config/ia64/unwind-ia64.c
index b3ba9333bcc..12e46ae2427 100644
--- a/gcc/config/ia64/unwind-ia64.c
+++ b/gcc/config/ia64/unwind-ia64.c
@@ -1645,7 +1645,24 @@ _Unwind_GetRegionStart (struct _Unwind_Context *context)
 void *
 _Unwind_FindEnclosingFunction (void *pc)
 {
-  return NULL;
+  struct unw_table_entry *ent;
+  unsigned long segment_base, gp;
+
+  ent = _Unwind_FindTableEntry (pc, &segment_base, &gp);
+  if (ent == NULL)
+    return NULL;
+  else
+    return (void *)(segment_base + ent->start_offset);
+}
+
+/* Get the value of the CFA as saved in CONTEXT.  In GCC/Dwarf2 parlance,
+   the CFA is the value of the stack pointer on entry; In IA-64 unwind
+   parlance, this is the PSP.  */
+
+_Unwind_Word
+_Unwind_GetCFA (struct _Unwind_Context *context)
+{
+  return (_Unwind_Ptr) context->psp;
 }
 
 
diff --git a/gcc/config/ip2k/ip2k.c b/gcc/config/ip2k/ip2k.c
index c0a643ae1d1..fa543205fc9 100644
--- a/gcc/config/ip2k/ip2k.c
+++ b/gcc/config/ip2k/ip2k.c
@@ -1,6 +1,6 @@
 /* Subroutines used for code generation on Ubicom IP2022
    Communications Controller.
-   Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc and Ubicom, Inc.
 
    This file is part of GNU CC.
@@ -72,6 +72,11 @@ static void mdr_try_wreg_elim PARAMS ((rtx));
 static int ip2k_check_can_adjust_stack_ref PARAMS ((rtx, int));
 static void ip2k_adjust_stack_ref PARAMS ((rtx *, int));
 static int ip2k_xexp_not_uses_reg_for_mem PARAMS ((rtx, unsigned int));
+static tree ip2k_handle_progmem_attribute PARAMS ((tree *, tree, tree, int,
+						   bool *));
+static tree ip2k_handle_fndecl_attribute PARAMS ((tree *, tree, tree, int,
+						  bool *));
+const struct attribute_spec ip2k_attribute_table[];
 
 
 /* Initialize the GCC target structure.  */
@@ -90,6 +95,9 @@ static int ip2k_xexp_not_uses_reg_for_mem PARAMS ((rtx, unsigned int));
 #undef TARGET_ENCODE_SECTION_INFO
 #define TARGET_ENCODE_SECTION_INFO encode_section_info
 
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE ip2k_attribute_table
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Commands in the functions prologues in the compiled file.  */
@@ -3143,51 +3151,82 @@ class_likely_spilled_p(c)
 	  || c == PTR_REGS);
 }
 
-/* Only `progmem' attribute valid for type.  */
+/* Valid attributes:
+   progmem - put data to program memory;
+   naked     - don't generate function prologue/epilogue and `ret' command.
 
-int
-valid_machine_type_attribute(type, attributes, identifier, args)
-     tree type ATTRIBUTE_UNUSED;
-     tree attributes ATTRIBUTE_UNUSED;
-     tree identifier;
-     tree args ATTRIBUTE_UNUSED;
-{
-  return is_attribute_p ("progmem", identifier);
-}
+   Only `progmem' attribute valid for type.  */
 
-/* If IDENTIFIER with arguments ARGS is a valid machine specific
-   attribute for DECL return 1.
-   Valid attributes:
-   progmem - put data to program memory;
-   naked   - don't generate function prologue/epilogue and `ret' command.  */
+const struct attribute_spec ip2k_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  { "progmem",   0, 0, false, false, false,  ip2k_handle_progmem_attribute },
+  { "naked",     0, 0, true,  false, false,  ip2k_handle_fndecl_attribute },
+  { NULL,        0, 0, false, false, false, NULL }
+};
 
-int
-valid_machine_decl_attribute (decl, attributes, attr, args)
-     tree decl;
-     tree attributes ATTRIBUTE_UNUSED;
-     tree attr;
+/* Handle a "progmem" attribute; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+ip2k_handle_progmem_attribute (node, name, args, flags, no_add_attrs)
+     tree *node;
+     tree name;
      tree args ATTRIBUTE_UNUSED;
+     int flags ATTRIBUTE_UNUSED;
+     bool *no_add_attrs;
 {
-  if (is_attribute_p ("naked", attr))
-    return TREE_CODE (decl) == FUNCTION_DECL;
-
-  if (is_attribute_p ("progmem", attr)
-      && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
+  if (DECL_P (*node))
     {
-      /* Data stored in program RAM or FLASH must be word aligned or
-         it won't be directly addressable.  */
-      if (DECL_ALIGN (decl) < FUNCTION_BOUNDARY)
-	DECL_ALIGN (decl) = FUNCTION_BOUNDARY;
+      if (TREE_CODE (*node) == TYPE_DECL)
+	{
+	  /* This is really a decl attribute, not a type attribute,
+	     but try to handle it for GCC 3.0 backwards compatibility.  */
+
+	  tree type = TREE_TYPE (*node);
+	  tree attr = tree_cons (name, args, TYPE_ATTRIBUTES (type));
+	  tree newtype = build_type_attribute_variant (type, attr);
 
-      if (DECL_INITIAL (decl) == NULL_TREE)
+	  TYPE_MAIN_VARIANT (newtype) = TYPE_MAIN_VARIANT (type);
+	  TREE_TYPE (*node) = newtype;
+	  *no_add_attrs = true;
+	}
+      else if (TREE_STATIC (*node) || DECL_EXTERNAL (*node))
 	{
-	  warning ("Only initialized variables can be placed into "
-		   "program memory area.");
-	  return 0;
+	  if (DECL_INITIAL (*node) == NULL_TREE && !DECL_EXTERNAL (*node))
+	    {
+	      warning ("only initialized variables can be placed into "
+		       "program memory area");
+	      *no_add_attrs = true;
+	    }
+	}
+      else
+	{
+	  warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
+	  *no_add_attrs = true;
 	}
-      return 1;
     }
-  return 0;
+
+  return NULL_TREE;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
+   struct attribute_spec.handler.  */
+static tree
+ip2k_handle_fndecl_attribute (node, name, args, flags, no_add_attrs)
+     tree *node;
+     tree name;
+     tree args ATTRIBUTE_UNUSED;
+     int flags ATTRIBUTE_UNUSED;
+     bool *no_add_attrs;
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning ("`%s' attribute only applies to functions",
+	       IDENTIFIER_POINTER (name));
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
 }
 
 /* Encode section information about tree DECL.  */
diff --git a/gcc/config/ip2k/ip2k.h b/gcc/config/ip2k/ip2k.h
index 00be9c4408d..af148904dd9 100644
--- a/gcc/config/ip2k/ip2k.h
+++ b/gcc/config/ip2k/ip2k.h
@@ -1,7 +1,7 @@
 /* Definitions of target machine for GNU compiler,
    For Ubicom IP2022 Communications Controller
 
-   Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc and Ubicom, Inc.
 
 This file is part of GNU CC.
@@ -2349,18 +2349,6 @@ do {							\
    of arguments that the function accepts.  Some people think a larger
    threshold should be used on RISC machines.  */
 
-#define VALID_MACHINE_DECL_ATTRIBUTE(DECL, ATTRIBUTES, IDENTIFIER, ARGS) \
-  valid_machine_decl_attribute (DECL, ATTRIBUTES, IDENTIFIER, ARGS)
-/* If defined, a C expression whose value is nonzero if IDENTIFIER
-   with arguments ARGS is a valid machine specific attribute for DECL.
-   The attributes in ATTRIBUTES have previously been assigned to DECL.  */
-
-#define VALID_MACHINE_TYPE_ATTRIBUTE(TYPE, ATTRIBUTES, IDENTIFIER, ARGS) \
-  valid_machine_type_attribute(TYPE, ATTRIBUTES, IDENTIFIER, ARGS)
-/* If defined, a C expression whose value is nonzero if IDENTIFIER
-   with arguments ARGS is a valid machine specific attribute for TYPE.
-   The attributes in ATTRIBUTES have previously been assigned to TYPE.  */
-
 #define DOLLARS_IN_IDENTIFIERS 0
 /* Define this macro to control use of the character `$' in identifier
    names.  0 means `$' is not allowed by default; 1 means it is
diff --git a/gcc/config/linux.h b/gcc/config/linux.h
index 2b1e50f3bde..2ddb9c765a1 100644
--- a/gcc/config/linux.h
+++ b/gcc/config/linux.h
@@ -110,3 +110,5 @@ Boston, MA 02111-1307, USA.  */
 
 /* Define this so we can compile MS code for use with WINE.  */
 #define HANDLE_PRAGMA_PACK_PUSH_POP
+
+#define TARGET_HAS_F_SETLKW
diff --git a/gcc/config/m68hc11/larith.asm b/gcc/config/m68hc11/larith.asm
index 80b88174737..78b5885dac2 100644
--- a/gcc/config/m68hc11/larith.asm
+++ b/gcc/config/m68hc11/larith.asm
@@ -1,5 +1,5 @@
 /* libgcc routines for M68HC11 & M68HC12.
-   Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 This file is part of GNU CC.
 
@@ -41,11 +41,22 @@ Boston, MA 02111-1307, USA.  */
 	.mode mlong
 #endif
 
-#if defined(__USE_RTC__) && defined(mc68hc12)
+	.macro declare_near name
+	.globl \name
+	.type  \name,@function
+	.size  \name,.Lend-\name
+\name:
+	.endm
+
+#if defined(__USE_RTC__)
 # define ARG(N) N+1
 
 	.macro ret
+#if defined(mc68hc12)
 	rtc
+#else
+	jmp __return_32
+#endif
 	.endm
 
 	.macro declare name
@@ -181,10 +192,10 @@ REG(_.d32)
 ;; Specific initialization for 68hc11 before the main.
 ;; Nothing special for a generic routine; Just enable interrupts.
 ;;
-	declare	__premain
+	declare_near	__premain
 	clra
 	tap	; Clear both I and X.
-	ret
+	rts
 #endif
 
 #ifdef L__exit
@@ -278,8 +289,8 @@ Done:
 #else
 	xgdy
 	tsx
-	ldd	4,x
-	ldx	2,x		; SRC = X, DST = Y
+	ldd	ARG(4),x
+	ldx	ARG(2),x	; SRC = X, DST = Y
 	cpd	#0
 	beq	End
 	pshy
@@ -299,7 +310,7 @@ L1:
 	puly			; Restore Y to return the DST
 End:
 	xgdy
-	rts
+	ret
 #endif
 #endif
 
@@ -349,99 +360,97 @@ L0:
 	pulx			; Restore X to return the DST
 End:
 	xgdx
-	rts
+	ret
 #endif
 #endif
 
 #ifdef L_adddi3
-       declare	___adddi3
+	declare	___adddi3
 
 	tsx
 	xgdy
 	ldd	ARG(8),x		; Add LSB
 	addd	ARG(16),x
-	std	ARG(6),y		; Save (carry preserved)
+	std	6,y		; Save (carry preserved)
 
 	ldd	ARG(6),x
 	adcb	ARG(15),x
 	adca	ARG(14),x
-	std	ARG(4),y
+	std	4,y
 
 	ldd	ARG(4),x
 	adcb	ARG(13),x
 	adca	ARG(12),x
-	std	ARG(2),y
+	std	2,y
 	
 	ldd	ARG(2),x
 	adcb	ARG(11),x		; Add MSB
 	adca	ARG(10),x
-	std	ARG(0),y
+	std	0,y
 
 	xgdy
 	ret
 #endif
 
 #ifdef L_subdi3
-       declare	___subdi3
+	declare	___subdi3
 
 	tsx
 	xgdy
 	ldd	ARG(8),x		; Subtract LSB
 	subd	ARG(16),x
-	std	ARG(6),y		; Save, borrow preserved
+	std	6,y			; Save, borrow preserved
 
 	ldd	ARG(6),x
 	sbcb	ARG(15),x
 	sbca	ARG(14),x
-	std	ARG(4),y
+	std	4,y
 
 	ldd	ARG(4),x
 	sbcb	ARG(13),x
 	sbca	ARG(12),x
-	std	ARG(2),y
+	std	2,y
 	
 	ldd	ARG(2),x		; Subtract MSB
 	sbcb	ARG(11),x
 	sbca	ARG(10),x
-	std	ARG(0),y
+	std	0,y
 
 	xgdy			;
 	ret
 #endif
 	
 #ifdef L_notdi2
-        declare	___notdi2
+	declare	___notdi2
 
 	tsy
 	xgdx
 	ldd	ARG(8),y
 	coma
 	comb
-	std	ARG(6),x
+	std	6,x
 	
 	ldd	ARG(6),y
 	coma
 	comb
-	std	ARG(4),x
+	std	4,x
 
 	ldd	ARG(4),y
 	coma
 	comb
-	std	ARG(2),x
+	std	2,x
 
 	ldd	ARG(2),y
 	coma
 	comb
-	std	ARG(0),x
+	std	0,x
 	xgdx
 	ret
 #endif
 	
 #ifdef L_negsi2
-	.sect .text
-	.globl ___negsi2
+	declare_near ___negsi2
 
-___negsi2:
 	comb
 	coma
 	xgdx
@@ -456,10 +465,8 @@ done:
 #endif
 
 #ifdef L_one_cmplsi2
-	.sect .text
-	.globl ___one_cmplsi2
+	declare_near ___one_cmplsi2
 
-___one_cmplsi2:
 	comb
 	coma
 	xgdx
@@ -470,10 +477,8 @@ ___one_cmplsi2:
 #endif
 	
 #ifdef L_ashlsi3
-	.sect .text
-	.globl ___ashlsi3
+	declare_near ___ashlsi3
 
-___ashlsi3:
 	xgdy
 	clra
 	andb	#0x1f
@@ -492,10 +497,8 @@ Return:
 #endif
 
 #ifdef L_ashrsi3
-	.sect .text
-	.globl ___ashrsi3
+	declare_near ___ashrsi3
 
-___ashrsi3:
 	xgdy
 	clra
 	andb	#0x1f
@@ -515,10 +518,8 @@ Return:
 #endif
 
 #ifdef L_lshrsi3
-	.sect .text
-	.globl ___lshrsi3
+	declare_near ___lshrsi3
 
-___lshrsi3:
 	xgdy
 	clra
 	andb	#0x1f
@@ -537,10 +538,8 @@ Return:
 #endif
 
 #ifdef L_lshrhi3
-	.sect .text
-	.globl ___lshrhi3
+	declare_near ___lshrhi3
 
-___lshrhi3:
 	cpx	#16
 	bge	Return_zero
 	cpx	#0
@@ -558,10 +557,8 @@ Return_zero:
 #endif
 	
 #ifdef L_lshlhi3
-	.sect .text
-	.globl ___lshlhi3
+	declare_near ___lshlhi3
 
-___lshlhi3:
 	cpx	#16
 	bge	Return_zero
 	cpx	#0
@@ -578,11 +575,48 @@ Return_zero:
 	rts
 #endif
 
+#ifdef L_rotrhi3
+	declare_near ___rotrhi3
+
+___rotrhi3:
+	xgdx
+	clra
+	andb	#0x0f
+	xgdx
+	beq	Return
+Loop:
+	tap
+	rorb
+	rora
+	dex
+	bne	Loop
+Return:
+	rts
+#endif
+
+#ifdef L_rotlhi3
+	declare_near ___rotlhi3
+
+___rotlhi3:
+	xgdx
+	clra
+	andb	#0x0f
+	xgdx
+	beq	Return
+Loop:
+	asrb
+	rolb
+	rola
+	rolb
+	dex
+	bne	Loop
+Return:
+	rts
+#endif
+
 #ifdef L_ashrhi3
-	.sect .text
-	.globl ___ashrhi3
+	declare_near ___ashrhi3
 
-___ashrhi3:
 	cpx	#16
 	bge	Return_minus_1_or_zero
 	cpx	#0
@@ -605,10 +639,8 @@ Return_zero:
 #endif
 	
 #ifdef L_ashrqi3
-	.sect .text
-	.globl ___ashrqi3
+	declare_near ___ashrqi3
 
-___ashrqi3:
 	cmpa	#8
 	bge	Return_minus_1_or_zero
 	tsta
@@ -630,10 +662,8 @@ Return_zero:
 #endif
 
 #ifdef L_lshlqi3
-	.sect .text
-	.globl ___lshlqi3
+	declare_near ___lshlqi3
 
-___lshlqi3:
 	cmpa	#8
 	bge	Return_zero
 	tsta
@@ -653,8 +683,7 @@ Return_zero:
 #ifndef mc68hc12
 /* 68HC12 signed divisions are generated inline (idivs).  */
 
-	.sect .text
-	.globl __divmodhi4
+	declare_near __divmodhi4
 
 ;
 ;; D = numerator
@@ -663,7 +692,6 @@ Return_zero:
 ;; Result:	D = D / X
 ;;		X = D % X
 ;; 
-__divmodhi4:
 	tsta
 	bpl	Numerator_pos
 	comb			; D = -D <=> D = (~D) + 1
@@ -721,8 +749,7 @@ Numerator_neg_denominator_pos:
 #endif
 
 #ifdef L_mulqi3
-       .sect .text
-       .globl __mulqi3
+	declare_near ___mulqi3
 
 ;
 ; short __mulqi3(signed char a, signed char b);
@@ -732,7 +759,6 @@ Numerator_neg_denominator_pos:
 ;
 ; returns the signed result of A * B in register D.
 ;
-__mulqi3:
 	tsta
 	bmi	A_neg
 	tstb
@@ -759,8 +785,7 @@ AB_neg:
 #endif
 	
 #ifdef L_mulhi3
-	.sect .text
-	.globl ___mulhi3
+	declare_near ___mulhi3
 
 ;
 ;
@@ -769,7 +794,6 @@ AB_neg:
 ;	a = register D
 ;	b = register X
 ;
-___mulhi3:
 #ifdef mc68hc12
 	pshx			; Preserve X
 	exg	x,y
@@ -805,24 +829,6 @@ ___mulhi3:
 				; ---
 				; 91 cycles
 #else
-	stx	_.tmp		; (4/5)
-	pshb			; (3)
-	ldab	_.tmp+1		; (3/4)
-	mul			; (10) B.high * A.low
-	xgdx			; (3)
-	pulb			; (4)
-	stab	_.tmp		; (3/4)
-	mul			; (10) B.low * A.high
-	abx			; (3)
-	ldd	_.tmp		; (4/5)
-	mul			; (10) B.low * A.low
-	stx	_.tmp		; (4) 
-	adda	_.tmp+1		; (4/5)
-	rts			; (5) 20/26 bytes
-				; ---
-				; 70/76 cycles
-
-#ifdef OLD_MUL
 	stx	*_.tmp		; (4)
 	pshb			; (3)
 	ldab	*_.tmp+1	; (3)
@@ -843,7 +849,6 @@ ___mulhi3:
 #endif
 #endif
 #endif
-#endif
 
 #ifdef L_mulhi32
 
@@ -879,13 +884,13 @@ ___mulhi3:
 ;      <A-low>    1,x
 ;      <A-high>   0,x
 ;
-	declare	__mulhi32
+	declare_near	__mulhi32
 
 #ifdef mc68hc12
-	ldy	ARG(2),sp
+	ldy	2,sp
 	emul
 	exg	x,y
-	ret
+	rts
 #else
 	pshx			; Room for temp value
 	pshb
@@ -961,8 +966,8 @@ Ret:
 	exg	d,y
 	ret
 #else
-B_low	=	8
-B_high	=	6
+B_low	=	ARG(8)
+B_high	=	ARG(6)
 A_low	=	0
 A_high	=	2
 	pshx
@@ -1010,7 +1015,7 @@ Return:
 	ins
 	ins
 	ins
-	rts
+	ret
 ;
 ; 
 ; A_low_zero_non_optimized:
@@ -1187,7 +1192,7 @@ dtors_done:
 
 #ifdef L_far_tramp
 #ifdef mc68hc12
-	.sect	.text
+	.sect	.tramp,"ax",@progbits
 	.globl	__far_trampoline
 
 ;; This is a trampoline used by the linker to invoke a function
@@ -1216,6 +1221,123 @@ __far_trampoline:
 				; (whose memory bank is mapped due to the
 				; call to the trampoline).
 #endif
+
+#ifdef mc68hc11
+	.sect	.tramp,"ax",@progbits
+	.globl __far_trampoline
+
+;; Trampoline generated by gcc for 68HC11:
+;;
+;;	pshb
+;;	ldab	#%page(func)
+;;	ldy	#%addr(func)
+;;	jmp	__far_trampoline
+;;
+__far_trampoline:
+	psha				; (2) Save function parameter (high)
+	;; <Read current page in A>
+	psha				; (2)
+	;; <Set currenge page from B>
+	pshx				; (4)
+	tsx				; (3)
+	ldab	4,x			; (4) Restore function parameter (low)
+	ldaa	2,x			; (4) Get saved page number
+	staa	4,x			; (4) Save it below return PC
+	pulx				; (5)
+	pula				; (3)
+	pula				; (3) Restore function parameter (high)
+	jmp	0,y			; (4)
+#endif
+#endif
+
+#ifdef L_call_far
+#ifdef mc68hc11
+	.sect	.tramp,"ax",@progbits
+	.globl __call_a16
+	.globl __call_a32
+;;
+;; The call methods are used for 68HC11 to support memory bank switching.
+;; Every far call is redirected to these call methods.  Its purpose is to:
+;;
+;;  1/ Save the current page on the stack (1 byte to follow 68HC12 call frame)
+;;  2/ Install the new page
+;;  3/ Jump to the real function
+;;
+;; The page switching (get/save) is board dependent.  The default provided
+;; here does nothing (just create the appropriate call frame).
+;;
+;; Call sequence (10 bytes, 13 cycles):
+;;
+;;	ldx #page			; (3)
+;;	ldy #func			; (4)
+;;	jsr __call_a16			; (6)
+;;
+;; Call trampoline (11 bytes, 19 cycles):
+;;
+__call_a16:
+	;; xgdx				; (3)
+	;; <Read current page in A>	; (3) ldaa _current_page
+	psha				; (2)
+	;; <Set current page from B>	; (4) staa _current_page
+	;; xgdx				; (3)
+	jmp 0,y				; (4)
+
+;;
+;; Call sequence (10 bytes, 14 cycles):
+;;
+;;	pshb				; (2)
+;;	ldab #page			; (2)
+;;	ldy  #func			; (4)
+;;	jsr __call_a32			; (6)
+;;
+;; Call trampoline (87 bytes, 57 cycles):
+;;
+__call_a32:
+	pshx				; (4)
+	psha				; (2)
+	;; <Read current page in A>	; (3) ldaa _current_page
+	psha				; (2)
+	;; <Set current page from B>	; (4) staa _current_page
+	tsx				; (3)
+	ldab	6,x			; (4) Restore function parameter
+	ldaa	5,x			; (4) Move PC return at good place
+	staa	6,x			; (4)
+	ldaa	4,x			; (4)
+	staa	5,x			; (4)
+	pula				; (3)
+	staa	4,x			; (4)
+	pula				; (3)
+	pulx				; (5)
+	jmp	0,y			; (4)
+#endif
+#endif
+
+#ifdef L_return_far
+#ifdef mc68hc11
+	.sect	.tramp,"ax",@progbits
+       .globl __return_void
+       .globl __return_16
+       .globl __return_32
+
+__return_void:
+	;; pulb
+	;; <Set current page from B> (Board specific)
+	;; rts
+__return_16:
+	;; xgdx
+	;; pulb
+	;; <Set current page from B> (Board specific)
+	;; xgdx
+	;; rts
+__return_32:
+	;; xgdy
+	;; pulb
+	;; <Set current page from B> (Board specific)
+	;; xgdy
+	;; rts
+	ins
+	rts
+#endif
 #endif
 .Lend:
 ;-----------------------------------------
diff --git a/gcc/config/m68hc11/m68hc11-protos.h b/gcc/config/m68hc11/m68hc11-protos.h
index c4a3fef0429..f10c3d08658 100644
--- a/gcc/config/m68hc11/m68hc11-protos.h
+++ b/gcc/config/m68hc11/m68hc11-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for exported functions defined in m68hc11.c
-   Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Stephane Carrez (stcarrez@nerim.fr)
 
 This file is part of GNU CC.
@@ -24,6 +24,7 @@ extern int m68hc11_override_options PARAMS((void));
 extern int m68hc11_optimization_options PARAMS((int,int));
 extern void m68hc11_conditional_register_usage PARAMS((void));
 extern int hard_regno_mode_ok PARAMS((int, enum machine_mode));
+extern int m68hc11_hard_regno_rename_ok PARAMS((int, int));
 
 extern int m68hc11_total_frame_size PARAMS((void));
 extern int m68hc11_initial_frame_pointer_offset PARAMS((void));
@@ -42,18 +43,6 @@ extern void m68hc11_function_arg_advance PARAMS((CUMULATIVE_ARGS*,
 #endif
 
 #ifdef RTX_CODE
-extern rtx m68hc11_compare_op0;
-extern rtx m68hc11_compare_op1;
-extern GTY(()) rtx m68hc11_soft_tmp_reg;
-extern GTY(()) rtx ix_reg;
-extern GTY(()) rtx iy_reg;
-extern GTY(()) rtx d_reg;
-extern GTY(()) rtx da_reg;
-extern GTY(()) rtx stack_push_word;
-extern GTY(()) rtx stack_pop_word;
-extern GTY(()) rtx z_reg;
-extern GTY(()) rtx z_reg_qi;
-
 extern void m68hc11_initialize_trampoline PARAMS((rtx, rtx, rtx));
 
 extern rtx m68hc11_expand_compare_and_branch PARAMS((enum rtx_code,
@@ -132,7 +121,9 @@ extern int arith_src_operand PARAMS((rtx, enum machine_mode));
 extern int m68hc11_logical_operator PARAMS((rtx, enum machine_mode));
 extern int m68hc11_arith_operator PARAMS((rtx, enum machine_mode));
 extern int m68hc11_non_shift_operator PARAMS((rtx, enum machine_mode));
+extern int m68hc11_shift_operator PARAMS((rtx, enum machine_mode));
 extern int m68hc11_unary_operator PARAMS((rtx, enum machine_mode));
+extern int m68hc11_eq_compare_operator PARAMS((rtx, enum machine_mode));
 extern int non_push_operand PARAMS((rtx, enum machine_mode));
 extern int hard_reg_operand PARAMS((rtx, enum machine_mode));
 extern int soft_reg_operand PARAMS((rtx, enum machine_mode));
@@ -152,10 +143,11 @@ extern int m68hc11_function_arg_pass_by_reference PARAMS((const CUMULATIVE_ARGS*
                                                           int));
 extern int m68hc11_function_arg_padding PARAMS((enum machine_mode, tree));
 
-extern rtx m68hc11_va_arg PARAMS((tree,tree));
-
 extern void m68hc11_function_epilogue PARAMS((FILE*,int));
 
+extern int m68hc11_is_far_symbol PARAMS((rtx));
+extern int m68hc11_is_trap_symbol PARAMS((rtx));
+
 #endif /* TREE_CODE */
 
 extern HOST_WIDE_INT m68hc11_min_offset;
diff --git a/gcc/config/m68hc11/m68hc11.c b/gcc/config/m68hc11/m68hc11.c
index ac16b527a97..eae5cc3091a 100644
--- a/gcc/config/m68hc11/m68hc11.c
+++ b/gcc/config/m68hc11/m68hc11.c
@@ -1,5 +1,5 @@
 /* Subroutines for code generation on Motorola 68HC11 and 68HC12.
-   Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Stephane Carrez (stcarrez@nerim.fr)
 
 This file is part of GNU CC.
@@ -64,9 +64,10 @@ static int go_if_legitimate_address_internal PARAMS((rtx, enum machine_mode,
                                                      int));
 static int register_indirect_p PARAMS((rtx, enum machine_mode, int));
 static rtx m68hc11_expand_compare PARAMS((enum rtx_code, rtx, rtx));
-static int m68hc11_autoinc_compatible_p PARAMS ((rtx, rtx));
 static int must_parenthesize PARAMS ((rtx));
 static int m68hc11_shift_cost PARAMS ((enum machine_mode, rtx, int));
+static int autoinc_mode PARAMS ((rtx));
+static int m68hc11_make_autoinc_notes PARAMS ((rtx*, void*));
 static int m68hc11_auto_inc_p PARAMS ((rtx));
 static tree m68hc11_handle_fntype_attribute PARAMS ((tree *, tree, tree, int, bool *));
 const struct attribute_spec m68hc11_attribute_table[];
@@ -79,8 +80,6 @@ static void m68hc11_asm_out_constructor PARAMS ((rtx, int));
 static void m68hc11_asm_out_destructor PARAMS ((rtx, int));
 static void m68hc11_encode_section_info PARAMS((tree, int));
 
-rtx m68hc11_soft_tmp_reg;
-
 /* Must be set to 1 to produce debug messages.  */
 int debug_m6811 = 0;
 
@@ -89,11 +88,12 @@ extern FILE *asm_out_file;
 rtx ix_reg;
 rtx iy_reg;
 rtx d_reg;
-rtx da_reg;
-rtx stack_push_word;
-rtx stack_pop_word;
+rtx m68hc11_soft_tmp_reg;
+static GTY(()) rtx stack_push_word;
+static GTY(()) rtx stack_pop_word;
+static GTY(()) rtx z_reg;
+static GTY(()) rtx z_reg_qi;
 static int regs_inited = 0;
-rtx z_reg;
 
 /* Set to 1 by expand_prologue() when the function is an interrupt handler.  */
 int current_function_interrupt;
@@ -284,7 +284,7 @@ m68hc11_override_options ()
       m68hc11_sp_correction = 0;
       m68hc11_tmp_regs_class = TMP_REGS;
       target_flags &= ~MASK_M6811;
-      target_flags |= MASK_NO_DIRECT_MODE | MASK_MIN_MAX;
+      target_flags |= MASK_NO_DIRECT_MODE;
       if (m68hc11_soft_reg_count == 0)
 	m68hc11_soft_reg_count = "0";
 
@@ -335,7 +335,6 @@ create_regs_rtx ()
   ix_reg = gen_rtx (REG, HImode, HARD_X_REGNUM);
   iy_reg = gen_rtx (REG, HImode, HARD_Y_REGNUM);
   d_reg = gen_rtx (REG, HImode, HARD_D_REGNUM);
-  da_reg = gen_rtx (REG, QImode, HARD_A_REGNUM);
   m68hc11_soft_tmp_reg = gen_rtx (REG, HImode, SOFT_TMP_REGNUM);
 
   stack_push_word = gen_rtx (MEM, HImode,
@@ -385,6 +384,23 @@ hard_regno_mode_ok (regno, mode)
     }
 }
 
+int
+m68hc11_hard_regno_rename_ok (reg1, reg2)
+     int reg1, reg2;
+{
+  /* Don't accept renaming to Z register.  We will replace it to
+     X,Y or D during machine reorg pass.  */
+  if (reg2 == HARD_Z_REGNUM)
+    return 0;
+
+  /* Don't accept renaming D,X to Y register as the code will be bigger.  */
+  if (TARGET_M6811 && reg2 == HARD_Y_REGNUM
+      && (D_REGNO_P (reg1) || X_REGNO_P (reg1)))
+    return 0;
+
+  return 1;
+}
+
 enum reg_class
 preferred_reload_class (operand, class)
      rtx operand;
@@ -1000,6 +1016,9 @@ d_register_operand (operand, mode)
      rtx operand;
      enum machine_mode mode ATTRIBUTE_UNUSED;
 {
+  if (GET_MODE (operand) != mode && mode != VOIDmode)
+    return 0;
+
   if (GET_CODE (operand) == SUBREG)
     operand = XEXP (operand, 0);
 
@@ -1014,6 +1033,9 @@ hard_addr_reg_operand (operand, mode)
      rtx operand;
      enum machine_mode mode ATTRIBUTE_UNUSED;
 {
+  if (GET_MODE (operand) != mode && mode != VOIDmode)
+    return 0;
+
   if (GET_CODE (operand) == SUBREG)
     operand = XEXP (operand, 0);
 
@@ -1026,8 +1048,11 @@ hard_addr_reg_operand (operand, mode)
 int
 hard_reg_operand (operand, mode)
      rtx operand;
-     enum machine_mode mode ATTRIBUTE_UNUSED;
+     enum machine_mode mode;
 {
+  if (GET_MODE (operand) != mode && mode != VOIDmode)
+    return 0;
+
   if (GET_CODE (operand) == SUBREG)
     operand = XEXP (operand, 0);
 
@@ -1103,6 +1128,14 @@ symbolic_memory_operand (op, mode)
 }
 
 int
+m68hc11_eq_compare_operator (op, mode)
+     register rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  return GET_CODE (op) == EQ || GET_CODE (op) == NE;
+}
+
+int
 m68hc11_logical_operator (op, mode)
      register rtx op;
      enum machine_mode mode ATTRIBUTE_UNUSED;
@@ -1131,6 +1164,16 @@ m68hc11_non_shift_operator (op, mode)
     || GET_CODE (op) == PLUS || GET_CODE (op) == MINUS;
 }
 
+/* Return true if op is a shift operator.  */
+int
+m68hc11_shift_operator (op, mode)
+     register rtx op;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+  return GET_CODE (op) == ROTATE || GET_CODE (op) == ROTATERT
+    || GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFT
+    || GET_CODE (op) == ASHIFTRT;
+}
 
 int
 m68hc11_unary_operator (op, mode)
@@ -1196,9 +1239,16 @@ const struct attribute_spec m68hc11_attribute_table[] =
   /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
   { "interrupt", 0, 0, false, true,  true,  m68hc11_handle_fntype_attribute },
   { "trap",      0, 0, false, true,  true,  m68hc11_handle_fntype_attribute },
+  { "far",       0, 0, false, true,  true,  m68hc11_handle_fntype_attribute },
+  { "near",      0, 0, false, true,  true,  m68hc11_handle_fntype_attribute },
   { NULL,        0, 0, false, false, false, NULL }
 };
 
+/* Keep track of the symbol which has a `trap' attribute and which uses
+   the `swi' calling convention.  Since there is only one trap, we only
+   record one such symbol.  If there are several, a warning is reported.  */
+static rtx trap_handler_symbol = 0;
+
 /* Handle an attribute requiring a FUNCTION_TYPE, FIELD_DECL or TYPE_DECL;
    arguments as in struct attribute_spec.handler.  */
 static tree
@@ -1210,6 +1260,7 @@ m68hc11_handle_fntype_attribute (node, name, args, flags, no_add_attrs)
      bool *no_add_attrs;
 {
   if (TREE_CODE (*node) != FUNCTION_TYPE
+      && TREE_CODE (*node) != METHOD_TYPE
       && TREE_CODE (*node) != FIELD_DECL
       && TREE_CODE (*node) != TYPE_DECL)
     {
@@ -1232,16 +1283,56 @@ m68hc11_encode_section_info (decl, first)
 {
   tree func_attr;
   int trap_handler;
+  int is_far = 0;
   rtx rtl;
-
+  
   if (TREE_CODE (decl) != FUNCTION_DECL)
     return;
 
   rtl = DECL_RTL (decl);
 
   func_attr = TYPE_ATTRIBUTES (TREE_TYPE (decl));
+
+
+  if (lookup_attribute ("far", func_attr) != NULL_TREE)
+    is_far = 1;
+  else if (lookup_attribute ("near", func_attr) == NULL_TREE)
+    is_far = TARGET_LONG_CALLS != 0;
+
   trap_handler = lookup_attribute ("trap", func_attr) != NULL_TREE;
-  SYMBOL_REF_FLAG (XEXP (rtl, 0)) = trap_handler;
+  if (trap_handler && is_far)
+    {
+      warning ("`trap' and `far' attributes are not compatible, ignoring `far'");
+      trap_handler = 0;
+    }
+  if (trap_handler)
+    {
+      if (trap_handler_symbol != 0)
+        warning ("`trap' attribute is already used");
+      else
+        trap_handler_symbol = XEXP (rtl, 0);
+    }
+  SYMBOL_REF_FLAG (XEXP (rtl, 0)) = is_far;
+}
+
+int
+m68hc11_is_far_symbol (sym)
+     rtx sym;
+{
+  if (GET_CODE (sym) == MEM)
+    sym = XEXP (sym, 0);
+
+  return SYMBOL_REF_FLAG (sym);
+}
+
+int
+m68hc11_is_trap_symbol (sym)
+     rtx sym;
+{
+  if (GET_CODE (sym) == MEM)
+    sym = XEXP (sym, 0);
+
+  return trap_handler_symbol != 0 && rtx_equal_p (trap_handler_symbol, sym);
 }
 
 
@@ -1281,6 +1372,14 @@ m68hc11_initial_elimination_offset (from, to)
   /* For a trap handler, we must take into account the registers which
      are pushed on the stack during the trap (except the PC).  */
   func_attr = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
+
+  if (lookup_attribute ("far", func_attr) != 0)
+    current_function_far = 1;
+  else if (lookup_attribute ("near", func_attr) != 0)
+    current_function_far = 0;
+  else
+    current_function_far = TARGET_LONG_CALLS != 0;
+
   trap_handler = lookup_attribute ("trap", func_attr) != NULL_TREE;
   if (trap_handler && from == ARG_POINTER_REGNUM)
     size = 7;
@@ -1452,51 +1551,6 @@ m68hc11_function_arg (cum, mode, type, named)
   return NULL_RTX;
 }
 
-rtx
-m68hc11_va_arg (valist, type)
-     tree valist;
-     tree type;
-{
-  tree addr_tree, t;
-  HOST_WIDE_INT align;
-  HOST_WIDE_INT rounded_size;
-  rtx addr;
-  int pad_direction;
-
-  /* Compute the rounded size of the type.  */
-  align = PARM_BOUNDARY / BITS_PER_UNIT;
-  rounded_size = (((int_size_in_bytes (type) + align - 1) / align) * align);
-
-  /* Get AP.  */
-  addr_tree = valist;
-  pad_direction = m68hc11_function_arg_padding (TYPE_MODE (type), type);
-
-  if (pad_direction == downward)
-    {
-      /* Small args are padded downward.  */
-
-      HOST_WIDE_INT adj;
-      adj = TREE_INT_CST_LOW (TYPE_SIZE (type)) / BITS_PER_UNIT;
-      if (rounded_size > align)
-	adj = rounded_size;
-
-      addr_tree = build (PLUS_EXPR, TREE_TYPE (addr_tree), addr_tree,
-			 build_int_2 (rounded_size - adj, 0));
-    }
-
-  addr = expand_expr (addr_tree, NULL_RTX, Pmode, EXPAND_NORMAL);
-  addr = copy_to_reg (addr);
-
-  /* Compute new value for AP.  */
-  t = build (MODIFY_EXPR, TREE_TYPE (valist), valist,
-	     build (PLUS_EXPR, TREE_TYPE (valist), valist,
-		    build_int_2 (rounded_size, 0)));
-  TREE_SIDE_EFFECTS (t) = 1;
-  expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
-
-  return addr;
-}
-
 /* If defined, a C expression which determines whether, and in which direction,
    to pad out an argument with extra space.  The value should be of type
    `enum direction': either `upward' to pad above the argument,
@@ -1620,6 +1674,12 @@ expand_prologue ()
   current_function_interrupt = lookup_attribute ("interrupt",
 						 func_attr) != NULL_TREE;
   current_function_trap = lookup_attribute ("trap", func_attr) != NULL_TREE;
+  if (lookup_attribute ("far", func_attr) != NULL_TREE)
+    current_function_far = 1;
+  else if (lookup_attribute ("near", func_attr) != NULL_TREE)
+    current_function_far = 0;
+  else
+    current_function_far = TARGET_LONG_CALLS != 0;
 
   /* Get the scratch register to build the frame and push registers.
      If the first argument is a 32-bit quantity, the D+X registers
@@ -1630,6 +1690,10 @@ expand_prologue ()
   else
     scratch = ix_reg;
 
+  /* Save current stack frame.  */
+  if (frame_pointer_needed)
+    emit_move_after_reload (stack_push_word, hard_frame_pointer_rtx, scratch);
+
   /* For an interrupt handler, we must preserve _.tmp, _.z and _.xy.
      Other soft registers in page0 need not to be saved because they
      will be restored by C functions.  For a trap handler, we don't
@@ -1644,17 +1708,13 @@ expand_prologue ()
 			      scratch);
     }
 
-  /* Save current stack frame.  */
-  if (frame_pointer_needed)
-    emit_move_after_reload (stack_push_word, hard_frame_pointer_rtx, scratch);
-
   /* Allocate local variables.  */
   if (TARGET_M6812 && (size > 4 || size == 3))
     {
       emit_insn (gen_addhi3 (stack_pointer_rtx,
 			     stack_pointer_rtx, GEN_INT (-size)));
     }
-  else if (size > 8)
+  else if ((!optimize_size && size > 8) || (optimize_size && size > 10))
     {
       rtx insn;
 
@@ -1721,7 +1781,7 @@ expand_epilogue ()
   else
     return_size = GET_MODE_SIZE (GET_MODE (current_function_return_rtx));
 
-  if (return_size > HARD_REG_SIZE)
+  if (return_size > HARD_REG_SIZE && return_size <= 2 * HARD_REG_SIZE)
     scratch = iy_reg;
   else
     scratch = ix_reg;
@@ -1742,7 +1802,7 @@ expand_epilogue ()
       emit_insn (gen_addhi3 (stack_pointer_rtx,
 			     stack_pointer_rtx, GEN_INT (size)));
     }
-  else if (size > 8)
+  else if ((!optimize_size && size > 8) || (optimize_size && size > 10))
     {
       rtx insn;
 
@@ -1768,10 +1828,6 @@ expand_epilogue ()
 			       stack_pointer_rtx, GEN_INT (1)));
     }
 
-  /* Restore previous frame pointer.  */
-  if (frame_pointer_needed)
-    emit_move_after_reload (hard_frame_pointer_rtx, stack_pop_word, scratch);
-
   /* For an interrupt handler, restore ZTMP, ZREG and XYREG.  */
   if (current_function_interrupt)
     {
@@ -1782,6 +1838,10 @@ expand_epilogue ()
       emit_move_after_reload (m68hc11_soft_tmp_reg, stack_pop_word, scratch);
     }
 
+  /* Restore previous frame pointer.  */
+  if (frame_pointer_needed)
+    emit_move_after_reload (hard_frame_pointer_rtx, stack_pop_word, scratch);
+
   /* If the trap handler returns some value, copy the value
      in D, X onto the stack so that the rti will pop the return value
      correctly.  */
@@ -2163,6 +2223,10 @@ print_operand (file, op, letter)
 	  asm_print_register (file, REGNO (op));
 	  fprintf (file, "+1");
 	}
+      else if (letter == 'b' && D_REG_P (op))
+	{
+	  asm_print_register (file, HARD_B_REGNUM);
+	}
       else
 	{
 	  asm_print_register (file, REGNO (op));
@@ -3214,6 +3278,17 @@ m68hc11_gen_movhi (insn, operands)
 	{
 	  if (SP_REG_P (operands[0]))
 	    output_asm_insn ("lds\t%1", operands);
+	  else if (0 /* REG_WAS_0 note is boggus;  don't rely on it.  */
+                   && !D_REG_P (operands[0])
+                   && GET_CODE (operands[1]) == CONST_INT
+                   && (INTVAL (operands[1]) == 1 || INTVAL (operands[1]) == -1)
+                   && find_reg_note (insn, REG_WAS_0, 0))
+            {
+              if (INTVAL (operands[1]) == 1)
+                output_asm_insn ("in%0", operands);
+              else
+                output_asm_insn ("de%0", operands);
+            }
 	  else
 	    output_asm_insn ("ld%0\t%1", operands);
 	}
@@ -3379,11 +3454,17 @@ m68hc11_gen_movhi (insn, operands)
                   output_asm_insn ("xgdx", operands);
                   CC_STATUS_INIT;
                 }
-              else
+              else if (!optimize_size)
                 {
                   output_asm_insn ("sty\t%t1", operands);
                   output_asm_insn ("ldx\t%t1", operands);
                 }
+              else
+                {
+                  CC_STATUS_INIT;
+                  output_asm_insn ("pshy", operands);
+                  output_asm_insn ("pulx", operands);
+                }
 	    }
 	  else if (SP_REG_P (operands[1]))
 	    {
@@ -3391,6 +3472,16 @@ m68hc11_gen_movhi (insn, operands)
 	      cc_status = cc_prev_status;
 	      output_asm_insn ("tsx", operands);
 	    }
+	  else if (0 /* REG_WAS_0 note is boggus;  don't rely on it.  */
+                   && GET_CODE (operands[1]) == CONST_INT
+                   && (INTVAL (operands[1]) == 1 || INTVAL (operands[1]) == -1)
+                   && find_reg_note (insn, REG_WAS_0, 0))
+            {
+              if (INTVAL (operands[1]) == 1)
+                output_asm_insn ("in%0", operands);
+              else
+                output_asm_insn ("de%0", operands);
+            }
 	  else
 	    {
 	      output_asm_insn ("ldx\t%1", operands);
@@ -3421,11 +3512,17 @@ m68hc11_gen_movhi (insn, operands)
                   output_asm_insn ("xgdy", operands);
                   CC_STATUS_INIT;
                 }
-              else
+              else if (!optimize_size)
                 {
                   output_asm_insn ("stx\t%t1", operands);
                   output_asm_insn ("ldy\t%t1", operands);
                 }
+              else
+                {
+                  CC_STATUS_INIT;
+                  output_asm_insn ("pshx", operands);
+                  output_asm_insn ("puly", operands);
+                }
 	    }
 	  else if (SP_REG_P (operands[1]))
 	    {
@@ -3433,7 +3530,17 @@ m68hc11_gen_movhi (insn, operands)
 	      cc_status = cc_prev_status;
 	      output_asm_insn ("tsy", operands);
 	    }
-	  else
+	  else if (0 /* REG_WAS_0 note is boggus;  don't rely on it.  */
+                   && GET_CODE (operands[1]) == CONST_INT
+                   && (INTVAL (operands[1]) == 1 || INTVAL (operands[1]) == -1)
+                   && find_reg_note (insn, REG_WAS_0, 0))
+            {
+              if (INTVAL (operands[1]) == 1)
+                output_asm_insn ("in%0", operands);
+              else
+                output_asm_insn ("de%0", operands);
+            }
+          else
 	    {
 	      output_asm_insn ("ldy\t%1", operands);
 	    }
@@ -3673,6 +3780,16 @@ m68hc11_gen_movqi (insn, operands)
 		  output_asm_insn ("ldab\t%T0", operands);
 		}
 	    }
+	  else if (0 /* REG_WAS_0 note is boggus;  don't rely on it.  */
+                   && GET_CODE (operands[1]) == CONST_INT
+                   && (INTVAL (operands[1]) == 1 || INTVAL (operands[1]) == -1)
+                   && find_reg_note (insn, REG_WAS_0, 0))
+            {
+              if (INTVAL (operands[1]) == 1)
+                output_asm_insn ("inc%b0", operands);
+              else
+                output_asm_insn ("dec%b0", operands);
+            }          
 	  else if (!DB_REG_P (operands[1]) && !D_REG_P (operands[1])
 		   && !DA_REG_P (operands[1]))
 	    {
@@ -3881,15 +3998,15 @@ m68hc11_gen_rotate (code, insn, operands)
 
   if (val > 0)
     {
-      /* Set the carry to bit-15, but don't change D yet.  */
-      if (GET_MODE (operands[0]) != QImode)
-        {
-          output_asm_insn ("asra", operands);
-          output_asm_insn ("rola", operands);
-        }
-
       while (--val >= 0)
         {
+          /* Set the carry to bit-15, but don't change D yet.  */
+          if (GET_MODE (operands[0]) != QImode)
+            {
+              output_asm_insn ("asra", operands);
+              output_asm_insn ("rola", operands);
+            }
+
           /* Rotate B first to move the carry to bit-0.  */
           if (D_REG_P (operands[0]))
             output_asm_insn ("rolb", operands);
@@ -3900,14 +4017,12 @@ m68hc11_gen_rotate (code, insn, operands)
     }
   else
     {
-      /* Set the carry to bit-8 of D.  */
-      if (val != 0 && GET_MODE (operands[0]) != QImode)
-        {
-          output_asm_insn ("tap", operands);
-        }
-      
       while (++val <= 0)
         {
+          /* Set the carry to bit-8 of D.  */
+          if (GET_MODE (operands[0]) != QImode)
+            output_asm_insn ("tap", operands);
+
           /* Rotate B first to move the carry to bit-7.  */
           if (D_REG_P (operands[0]))
             output_asm_insn ("rorb", operands);
@@ -4103,8 +4218,6 @@ struct replace_info
   int z_loaded_with_sp;
 };
 
-rtx z_reg_qi;
-
 static int m68hc11_check_z_replacement PARAMS ((rtx, struct replace_info *));
 static void m68hc11_find_z_replacement PARAMS ((rtx, struct replace_info *));
 static void m68hc11_z_replacement PARAMS ((rtx));
@@ -4204,7 +4317,7 @@ m68hc11_check_z_replacement (insn, info)
 		  info->need_save_z = 0;
 		  info->found_call = 1;
 		  info->regno = SOFT_Z_REGNUM;
-		  info->last = insn;
+		  info->last = NEXT_INSN (insn);
 		}
 	      return 0;
 	    }
@@ -4338,7 +4451,13 @@ m68hc11_check_z_replacement (insn, info)
 		      info->z_died = 1;
 		      info->need_save_z = 0;
 		    }
-		  else
+		  else if (TARGET_M6812 && side_effects_p (src))
+                    {
+                      info->last = 0;
+                      info->must_restore_reg = 0;
+                      return 0;
+                    }
+                  else
 		    {
 		      info->save_before_last = 1;
 		    }
@@ -4415,7 +4534,13 @@ m68hc11_check_z_replacement (insn, info)
 		      info->z_died = 1;
 		      info->need_save_z = 0;
 		    }
-		  else
+		  else if (TARGET_M6812 && side_effects_p (src))
+                    {
+                      info->last = 0;
+                      info->must_restore_reg = 0;
+                      return 0;
+                    }
+                  else
 		    {
 		      info->save_before_last = 1;
 		    }
@@ -5058,6 +5183,12 @@ m68hc11_reorg (first)
   int split_done = 0;
   rtx insn;
 
+  compute_bb_for_insn ();
+
+  /* ??? update_life_info_in_dirty_blocks fails to terminate during
+     non-optimizing bootstrap.
+
+     update_life_info (NULL, UPDATE_LIFE_GLOBAL_RM_NOTES, PROP_DEATH_NOTES); */
   z_replacement_completed = 0;
   z_reg = gen_rtx (REG, HImode, HARD_Z_REGNUM);
 
@@ -5073,9 +5204,6 @@ m68hc11_reorg (first)
   z_replacement_completed = 1;
   m68hc11_reassign_regs (first);
 
-  if (optimize)
-    compute_bb_for_insn ();
-
   /* After some splitting, there are some oportunities for CSE pass.
      This happens quite often when 32-bit or above patterns are split.  */
   if (optimize > 0 && split_done)
@@ -5174,7 +5302,7 @@ m68hc11_memory_move_cost (mode, class, in)
   else
     {
       if (GET_MODE_SIZE (mode) <= 2)
-	return COSTS_N_INSNS (2);
+	return COSTS_N_INSNS (3);
       else
 	return COSTS_N_INSNS (4);
     }
@@ -5506,7 +5634,10 @@ m68hc11_asm_file_start (out, main_file)
      const char *main_file;
 {
   fprintf (out, ";;;-----------------------------------------\n");
-  fprintf (out, ";;; Start MC68HC11 gcc assembly output\n");
+  fprintf (out, ";;; Start %s gcc assembly output\n",
+           TARGET_M6811
+           ? "MC68HC11"
+           : TARGET_M68S12 ? "MC68HCS12" : "MC68HC12");
   fprintf (out, ";;; gcc compiler %s\n", version_string);
   print_options (out);
   fprintf (out, ";;;-----------------------------------------\n");
@@ -5536,3 +5667,5 @@ m68hc11_asm_out_destructor (symbol, priority)
   default_dtor_section_asm_out_destructor (symbol, priority);
   fprintf (asm_out_file, "\t.globl\t__do_global_dtors\n");
 }
+
+#include "gt-m68hc11.h"
diff --git a/gcc/config/m68hc11/m68hc11.h b/gcc/config/m68hc11/m68hc11.h
index bdd3fe198f9..0f21e20a985 100644
--- a/gcc/config/m68hc11/m68hc11.h
+++ b/gcc/config/m68hc11/m68hc11.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    Motorola 68HC11 and 68HC12.
-   Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Stephane Carrez (stcarrez@nerim.fr)
 
 This file is part of GNU CC.
@@ -43,13 +43,22 @@ Note:
 
 /* Compile and assemble for a 68hc11 unless there is a -m68hc12 option.  */
 #ifndef ASM_SPEC
-#define ASM_SPEC       "%{m68hc12:-m68hc12}%{!m68hc12:-m68hc11}"
+#define ASM_SPEC                                                \
+"%{m68hc12:-m68hc12}"                                           \
+"%{m68hcs12:-m68hcs12}"                                         \
+"%{!m68hc12:%{!m68hcs12:-m68hc11}} "                            \
+"%{mshort:-mshort}%{!mshort:-mlong} "                           \
+"%{fshort-double:-mshort-double}%{!fshort-double:-mlong-double}"
 #endif
 
 /* We need to tell the linker the target elf format.  Just pass an
    emulation option.  This can be overriden by -Wl option of gcc.  */
 #ifndef LINK_SPEC
-#define LINK_SPEC      "%{m68hc12:-m m68hc12elf}%{!m68hc12:-m m68hc11elf} %{mrelax:-relax}"
+#define LINK_SPEC                                               \
+"%{m68hc12:-m m68hc12elf}"                                      \
+"%{m68hcs12:-m m68hc12elf}"                                     \
+"%{!m68hc12:%{!m68hcs12:-m m68hc11elf}} "                       \
+"%{!mnorelax:%{!m68hc12:%{!m68hcs12:-relax}}}"
 #endif
 
 #ifndef LIB_SPEC
@@ -65,7 +74,8 @@ Note:
 "%{mshort:-D__HAVE_SHORT_INT__ -D__INT__=16}\
  %{!mshort:-D__INT__=32}\
  %{m68hc12:-Dmc6812 -DMC6812 -Dmc68hc12}\
- %{!m68hc12:-Dmc6811 -DMC6811 -Dmc68hc11}\
+ %{m68hcs12:-Dmc6812 -DMC6812 -Dmc68hcs12}\
+ %{!m68hc12:%{!m68hcs12:-Dmc6811 -DMC6811 -Dmc68hc11}}\
  %{fshort-double:-D__HAVE_SHORT_DOUBLE__}\
  %{mlong-calls:-D__USE_RTC__}"
 #endif
@@ -119,14 +129,16 @@ extern short *reg_renumber;	/* def in local_alloc.c */
 #define MASK_AUTO_INC_DEC       0004
 #define MASK_M6811              0010
 #define MASK_M6812              0020
-#define MASK_NO_DIRECT_MODE     0040
-#define MASK_MIN_MAX            0100
-#define MASK_LONG_CALLS         0200
+#define MASK_M68S12             0040
+#define MASK_NO_DIRECT_MODE     0100
+#define MASK_MIN_MAX            0200
+#define MASK_LONG_CALLS         0400
 
 #define TARGET_OP_TIME		(optimize && optimize_size == 0)
 #define TARGET_SHORT            (target_flags & MASK_SHORT)
 #define TARGET_M6811            (target_flags & MASK_M6811)
 #define TARGET_M6812            (target_flags & MASK_M6812)
+#define TARGET_M68S12           (target_flags & MASK_M68S12)
 #define TARGET_AUTO_INC_DEC     (target_flags & MASK_AUTO_INC_DEC)
 #define TARGET_MIN_MAX          (target_flags & MASK_MIN_MAX)
 #define TARGET_NO_DIRECT_MODE   (target_flags & MASK_NO_DIRECT_MODE)
@@ -164,9 +176,9 @@ extern short *reg_renumber;	/* def in local_alloc.c */
     N_("Auto pre/post decrement increment allowed")},		\
   { "noauto-incdec", - MASK_AUTO_INC_DEC,			\
     N_("Auto pre/post decrement increment not allowed")},	\
-  { "inmax", MASK_MIN_MAX,                                     \
+  { "inmax", MASK_MIN_MAX,                                      \
     N_("Min/max instructions allowed")},                        \
-  { "nominmax", MASK_MIN_MAX,                                   \
+  { "nominmax", - MASK_MIN_MAX,                                 \
     N_("Min/max instructions not allowed")},                    \
   { "long-calls", MASK_LONG_CALLS,				\
     N_("Use call and rtc for function calls and returns")},	\
@@ -174,14 +186,20 @@ extern short *reg_renumber;	/* def in local_alloc.c */
     N_("Use jsr and rts for function calls and returns")},	\
   { "relax", MASK_NO_DIRECT_MODE,                               \
     N_("Do not use direct addressing mode for soft registers")},\
+  { "norelax", -MASK_NO_DIRECT_MODE,                            \
+    N_("Use direct addressing mode for soft registers")},       \
   { "68hc11", MASK_M6811,					\
     N_("Compile for a 68HC11")},				\
   { "68hc12", MASK_M6812,					\
     N_("Compile for a 68HC12")},				\
+  { "68hcs12", MASK_M6812 | MASK_M68S12,			\
+    N_("Compile for a 68HCS12")},				\
   { "6811",   MASK_M6811,					\
     N_("Compile for a 68HC11")},				\
   { "6812",   MASK_M6812,					\
     N_("Compile for a 68HC12")},				\
+  { "68S12",  MASK_M6812 | MASK_M68S12,				\
+    N_("Compile for a 68HCS12")},				\
   { "", TARGET_DEFAULT, 0 }}
 
 /* This macro is similar to `TARGET_SWITCHES' but defines names of
@@ -214,7 +232,7 @@ extern const char *m68hc11_soft_reg_count;
 #endif
 
 /* Print subsidiary information on the compiler version in use.  */
-#define TARGET_VERSION		fprintf (stderr, " (MC68HC11/MC68HC12)")
+#define TARGET_VERSION	fprintf (stderr, " (MC68HC11/MC68HC12/MC68HCS12)")
 
 /* Sometimes certain combinations of command options do not make
    sense on a particular target machine.  You can define a macro
@@ -555,6 +573,7 @@ enum reg_class
   D_OR_S_REGS,			/* 16-bit soft register or D register */
   X_OR_S_REGS,			/* 16-bit soft register or X register */
   Y_OR_S_REGS,			/* 16-bit soft register or Y register */
+  Z_OR_S_REGS,			/* 16-bit soft register or Z register */
   SP_OR_S_REGS,			/* 16-bit soft register or SP register */
   D_OR_X_OR_S_REGS,		/* 16-bit soft register or D or X register */
   D_OR_Y_OR_S_REGS,		/* 16-bit soft register or D or Y register */
@@ -601,6 +620,7 @@ enum reg_class
       "D_OR_S_REGS",                            \
       "X_OR_S_REGS",                            \
       "Y_OR_S_REGS",                            \
+      "Z_OR_S_REGS",                            \
       "SP_OR_S_REGS",                           \
       "D_OR_X_OR_S_REGS",                       \
       "D_OR_Y_OR_S_REGS",                       \
@@ -669,6 +689,7 @@ enum reg_class
 /* D_OR_S_REGS */	 { 0xFFFFDE02, 0x00007FFF }, /* D _.D */        \
 /* X_OR_S_REGS */	 { 0xFFFFDE01, 0x00007FFF }, /* X _.D */        \
 /* Y_OR_S_REGS */	 { 0xFFFFDE04, 0x00007FFF }, /* Y _.D */        \
+/* Z_OR_S_REGS */	 { 0xFFFFDF00, 0x00007FFF }, /* Z _.D */        \
 /* SP_OR_S_REGS */	 { 0xFFFFDE08, 0x00007FFF }, /* SP _.D */	\
 /* D_OR_X_OR_S_REGS */	 { 0xFFFFDE03, 0x00007FFF }, /* D X _.D */      \
 /* D_OR_Y_OR_S_REGS */	 { 0xFFFFDE06, 0x00007FFF }, /* D Y _.D */      \
@@ -780,6 +801,12 @@ extern enum reg_class m68hc11_tmp_regs_class;
 
 #define SMALL_REGISTER_CLASSES 1
 
+/* A C expression that is nonzero if hard register number REGNO2 can be
+   considered for use as a rename register for REGNO1 */
+
+#define HARD_REGNO_RENAME_OK(REGNO1,REGNO2) \
+  m68hc11_hard_regno_rename_ok ((REGNO1), (REGNO2))
+
 /* A C expression whose value is nonzero if pseudos that have been
    assigned to registers of class CLASS would likely be spilled
    because registers of CLASS are needed for spill registers.
@@ -835,6 +862,7 @@ extern enum reg_class m68hc11_tmp_regs_class;
    (C) == 'L' ? ((VALUE) >= -65536 && (VALUE) <= 65535) : \
    (C) == 'M' ? ((VALUE) & 0x0ffffL) == 0 : \
    (C) == 'N' ? ((VALUE) == 1 || (VALUE) == -1) : \
+   (C) == 'I' ? ((VALUE) >= -2 && (VALUE) <= 2) : \
    (C) == 'O' ? (VALUE) == 16 : \
    (C) == 'P' ? ((VALUE) <= 2 && (VALUE) >= -8) : 0)
 
@@ -1031,6 +1059,10 @@ typedef struct m68hc11_args
 #define FUNCTION_ARG_PADDING(MODE, TYPE) \
   m68hc11_function_arg_padding ((MODE), (TYPE))
 
+#undef PAD_VARARGS_DOWN
+#define PAD_VARARGS_DOWN \
+  (m68hc11_function_arg_padding (TYPE_MODE (type), type) == downward)
+
 /* A C expression that indicates when it is the called function's
    responsibility to make a copy of arguments passed by invisible
    reference.  Normally, the caller makes a copy and passes the
@@ -1077,10 +1109,6 @@ typedef struct m68hc11_args
    caller saving results in spill failure.  */
 #define CALLER_SAVE_PROFITABLE(REFS,CALLS) 0
 
-/* Implement `va_arg'.  */
-#define EXPAND_BUILTIN_VA_ARG(valist, type) \
-  m68hc11_va_arg (valist, type)
-
 /* For an arg passed partly in registers and partly in memory,
    this is the number of registers used.
    For args passed entirely in registers or entirely in memory, zero.
@@ -1578,7 +1606,7 @@ do {                                                                    \
       fprintf (FILE, TYPE_OPERAND_FMT, "function");	\
       putc ('\n', FILE);				\
       							\
-      if (TARGET_M6812 && current_function_far)		\
+      if (current_function_far)                         \
         {						\
           fprintf (FILE, "\t.far\t");			\
 	  assemble_name (FILE, NAME);			\
@@ -1589,7 +1617,7 @@ do {                                                                    \
         {						\
 	  fprintf (FILE, "\t.interrupt\t");		\
 	  assemble_name (FILE, NAME);			\
-	  putc ('\b', FILE);				\
+	  putc ('\n', FILE);				\
 	}						\
       ASM_DECLARE_RESULT (FILE, DECL_RESULT (DECL));	\
       ASM_OUTPUT_LABEL(FILE, NAME);			\
@@ -1668,6 +1696,10 @@ do {                                                                    \
 #undef PREFERRED_DEBUGGING_TYPE
 #define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
 
+/* For the support of memory banks we need addresses that indicate
+   the page number.  */
+#define DWARF2_ADDR_SIZE 4
+
 /* The prefix for local labels.  You should be able to define this as
    an empty string, or any arbitrary string (such as ".", ".L%", etc)
    without having to make any other changes to account for the specific
@@ -1694,6 +1726,8 @@ do {                                                                    \
 			      ROTATE, ROTATERT }},			\
 {"m68hc11_non_shift_operator", {AND, IOR, XOR, PLUS, MINUS}},		\
 {"m68hc11_unary_operator",   {NEG, NOT, SIGN_EXTEND, ZERO_EXTEND}},	\
+{"m68hc11_shift_operator",   {ASHIFT, ASHIFTRT, LSHIFTRT, ROTATE, ROTATERT}},\
+{"m68hc11_eq_compare_operator", {EQ, NE}},                              \
 {"non_push_operand",         {SUBREG, REG, MEM}},			\
 {"reg_or_some_mem_operand",  {SUBREG, REG, MEM}},			\
 {"tst_operand",              {SUBREG, REG, MEM}},			\
@@ -1751,3 +1785,10 @@ extern int z_replacement_completed;
 extern int current_function_interrupt;
 extern int current_function_trap;
 extern int current_function_far;
+
+extern GTY(()) rtx m68hc11_compare_op0;
+extern GTY(()) rtx m68hc11_compare_op1;
+extern GTY(()) rtx m68hc11_soft_tmp_reg;
+extern GTY(()) rtx ix_reg;
+extern GTY(()) rtx iy_reg;
+extern GTY(()) rtx d_reg;
diff --git a/gcc/config/m68hc11/m68hc11.md b/gcc/config/m68hc11/m68hc11.md
index a71e4f3f0f5..b986e6503e2 100644
--- a/gcc/config/m68hc11/m68hc11.md
+++ b/gcc/config/m68hc11/m68hc11.md
@@ -1,5 +1,5 @@
 ;;- Machine description file for Motorola 68HC11 and 68HC12.
-;;- Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+;;- Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 ;;- Contributed by Stephane Carrez (stcarrez@nerim.fr)
 
 ;; This file is part of GNU CC.
@@ -143,6 +143,7 @@
    (A_REGNUM        5)		; A (high part of D)
    (B_REGNUM        6)		; B (low part of D)
    (CC_REGNUM       7)		; Condition code register
+   (SOFT_Z_REGNUM  11)          ; Z soft register
 ])
 
 ;;--------------------------------------------------------------------
@@ -258,7 +259,7 @@
   [(set (cc0)
 	(match_operand:QI 0 "tst_operand" ""))
    (use (match_operand:HI 1 "hard_reg_operand" ""))
-   (use (reg:HI 11))]
+   (use (reg:HI SOFT_Z_REGNUM))]
   "z_replacement_completed == 2"
   [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM))) (match_dup 1))
    (set (match_dup 1) (match_dup 2))
@@ -365,7 +366,7 @@
 	(compare (match_operand:HI 0 "tst_operand" "dxy,m")
 		 (match_operand:HI 1 "cmp_operand" "m,dxy")))
    (use (match_operand:HI 2 "hard_reg_operand" "dxy,dxy"))
-   (use (reg:HI 11))]
+   (use (reg:HI SOFT_Z_REGNUM))]
   ""
   "#")
   
@@ -374,7 +375,7 @@
 	(compare (match_operand:HI 0 "tst_operand" "")
 		 (match_operand:HI 1 "cmp_operand" "")))
    (use (match_operand:HI 2 "hard_reg_operand" ""))
-   (use (reg:HI 11))]
+   (use (reg:HI SOFT_Z_REGNUM))]
   "z_replacement_completed == 2"
   [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM))) (match_dup 2))
    (set (match_dup 2) (match_dup 3))
@@ -444,7 +445,7 @@
   [(set (cc0)
 	(and:QI (match_operand:QI 0 "tst_operand" "")
 		(match_operand:QI 1 "hard_addr_reg_operand" "")))]
-  "z_replacement_completed == 2 && GET_MODE (operands[0]) == QImode"
+  "z_replacement_completed == 2"
   [(set (match_dup 3) (match_dup 2))
    (set (cc0) (and:QI (match_dup 0) (match_dup 4)))]
   "operands[2] = gen_rtx (REG, HImode, REGNO (operands[1]));
@@ -456,7 +457,7 @@
 	(and:QI (match_operand:QI 0 "tst_operand" "d,m")
 		(match_operand:QI 1 "cmp_operand" "m,d")))
    (use (match_operand:HI 2 "hard_reg_operand" "xy,xy"))
-   (use (reg:HI 11))]
+   (use (reg:HI SOFT_Z_REGNUM))]
   ""
   "#")
   
@@ -465,12 +466,12 @@
 	(and:QI (match_operand:QI 0 "tst_operand" "")
 		(match_operand:QI 1 "cmp_operand" "")))
    (use (match_operand:HI 2 "hard_reg_operand" ""))
-   (use (reg:HI 11))]
+   (use (reg:HI SOFT_Z_REGNUM))]
   "z_replacement_completed == 2"
-  [(set (mem:HI (pre_dec:HI (reg:HI 3))) (match_dup 2))
+  [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM))) (match_dup 2))
    (set (match_dup 2) (match_dup 3))
    (set (cc0) (and:QI (match_dup 0) (match_dup 1)))
-   (set (match_dup 2) (mem:HI (post_inc:HI (reg:HI 3))))]
+   (set (match_dup 2) (mem:HI (post_inc:HI (reg:HI SP_REGNUM))))]
   "operands[3] = gen_rtx (REG, HImode, SOFT_Z_REGNUM);")
 
 (define_insn "bitcmphi"
@@ -546,7 +547,7 @@
 	(compare (match_operand:QI 0 "tst_operand" "dxy,m")
 		 (match_operand:QI 1 "cmp_operand" "m,dxy")))
    (use (match_operand:HI 2 "hard_reg_operand" "dxy,dxy"))
-   (use (reg:HI 11))]
+   (use (reg:HI SOFT_Z_REGNUM))]
   ""
   "#")
   
@@ -555,7 +556,7 @@
 	(compare (match_operand:QI 0 "tst_operand" "")
 		 (match_operand:QI 1 "cmp_operand" "")))
    (use (match_operand:HI 2 "hard_reg_operand" ""))
-   (use (reg:HI 11))]
+   (use (reg:HI SOFT_Z_REGNUM))]
   "z_replacement_completed == 2"
   [(set (mem:HI (pre_dec:HI (reg:HI SP_REGNUM))) (match_dup 2))
    (set (match_dup 2) (match_dup 3))
@@ -620,7 +621,7 @@
 ;; because there is no memory->memory moves.  It must be defined with
 ;; earlyclobber (&) so that it does not appear in the source or destination 
 ;; address.  Providing patterns for movdi/movdf allows GCC to generate
-;; better code.  [Until now, the scratch register is limited to D becuse
+;; better code.  [Until now, the scratch register is limited to D because
 ;; otherwise we can run out of registers in the A_REGS class for reload].
 ;;
 ;; For 68HC12, the scratch register is not necessary.  To use the same
@@ -2089,9 +2090,9 @@
 }")
 
 (define_insn "*addhi3_68hc12"
-  [(set (match_operand:HI 0 "register_operand" "=xy,d,xy*z*w,xy*z*w,xy*z")
+  [(set (match_operand:HI 0 "register_operand" "=xyd,d,xy*z*w,xy*z*w,xy*z")
         (plus:HI (match_operand:HI 1 "register_operand" "%0,0,0,xy*zw,0")
-                 (match_operand:HI 2 "general_operand" "N,im*A*wu,id,id,!mu*A")))]
+                 (match_operand:HI 2 "general_operand" "i,m*A*wu,id,id,!mu*A")))]
   "TARGET_M6812"
   "*
 {
@@ -2278,9 +2279,9 @@
 }")
 
 (define_insn "*addhi3"
-  [(set (match_operand:HI 0 "hard_reg_operand" "=A,d,!A,d*A,!d*A")
-	(plus:HI (match_operand:HI 1 "general_operand" "%0,0,0,0,0")
-		 (match_operand:HI 2 "general_operand" "N,i,I,mi*A*d,!u*d*w")))]
+  [(set (match_operand:HI 0 "hard_reg_operand" "=A,dA,d,!A,d*A,!d*A")
+	(plus:HI (match_operand:HI 1 "general_operand" "%0,0,0,0,0,0")
+		 (match_operand:HI 2 "general_operand" "N,I,i,I,mi*A*d,!u*d*w")))]
   "TARGET_M6811"
   "*
 {
@@ -2894,12 +2895,15 @@
 }")
 
 (define_insn "mulqi3"
-  [(set (match_operand:QI 0 "register_operand" "=d")
-        (mult:QI (match_operand:QI 1 "nonimmediate_operand" "dum")
-		 (match_operand:QI 2 "nonimmediate_operand" "dum")))]
+  [(set (match_operand:QI 0 "register_operand" "=d,*x,*y")
+        (mult:QI (match_operand:QI 1 "nonimmediate_operand" "%dum,0,0")
+		 (match_operand:QI 2 "general_operand" "dium,*xium,*yium")))]
   ""
   "*
 {
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
   if (D_REG_P (operands[1]) && D_REG_P (operands[2]))
     {
       output_asm_insn (\"tba\", operands);
@@ -2925,6 +2929,28 @@
   return \"mul\";
 }")
 
+(define_split
+  [(set (match_operand:QI 0 "hard_addr_reg_operand" "")
+        (mult:QI (match_operand:QI 1 "general_operand" "")
+		 (match_operand:QI 2 "general_operand" "")))]
+  "z_replacement_completed == 2"
+  [(parallel [(set (reg:HI D_REGNUM) (match_dup 3))
+	      (set (match_dup 3) (reg:HI D_REGNUM))])
+   (set (reg:QI D_REGNUM) (mult:QI (match_dup 5) (match_dup 6)))
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 3))
+              (set (match_dup 3) (reg:HI D_REGNUM))])]
+  "
+   operands[3] = gen_rtx (REG, HImode, REGNO (operands[0]));
+   if (A_REG_P (operands[1]))
+     operands[5] = gen_rtx (REG, QImode, HARD_D_REGNUM);
+   else
+     operands[5] = operands[1];
+   if (A_REG_P (operands[2]))
+     operands[6] = gen_rtx (REG, QImode, HARD_D_REGNUM);
+   else
+     operands[6] = operands[2];
+  ")
+
 (define_insn "mulqihi3"
   [(set (match_operand:HI 0 "register_operand" "=d,d")
         (mult:HI (sign_extend:HI
@@ -4446,15 +4472,21 @@
     (set (match_dup 4) (match_dup 2))
 
     (set (match_dup 2) (match_dup 5))
-    (set (match_dup 2) (rotate:HI (match_dup 2) (reg:HI CC_REGNUM)))
+    (parallel [(set (match_dup 2)
+		       (rotate:HI (match_dup 2) (const_int 1)))
+	       (clobber (reg:HI CC_REGNUM))])
     (set (match_dup 6) (match_dup 2))
 
     (set (match_dup 2) (match_dup 7))
-    (set (match_dup 2) (rotate:HI (match_dup 2) (reg:HI CC_REGNUM)))
+    (parallel [(set (match_dup 2)
+		       (rotate:HI (match_dup 2) (const_int 1)))
+	       (clobber (reg:HI CC_REGNUM))])
     (set (match_dup 8) (match_dup 2))
 
     (set (match_dup 2) (match_dup 9))
-    (set (match_dup 2) (rotate:HI (match_dup 2) (reg:HI CC_REGNUM)))
+    (parallel [(set (match_dup 2)
+		       (rotate:HI (match_dup 2) (const_int 1)))
+	       (clobber (reg:HI CC_REGNUM))])
     (set (match_dup 10) (match_dup 2))]
    "operands[3] = m68hc11_gen_lowpart (SImode, operands[1]);
     operands[5] = m68hc11_gen_highpart (HImode, operands[3]);
@@ -4757,13 +4789,16 @@
 
 
 (define_insn "*ashlhi3_2"
-  [(set (match_operand:HI 0 "register_operand" "=d")
-	(ashift:HI (match_operand:HI 1 "register_operand" "0")
-                   (match_operand:HI 2 "register_operand" "+x")))
+  [(set (match_operand:HI 0 "register_operand" "=d,*x")
+	(ashift:HI (match_operand:HI 1 "register_operand" "0,0")
+                   (match_operand:HI 2 "register_operand" "+x,+d")))
    (clobber (match_dup 2))]
   ""
   "*
 {
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
   CC_STATUS_INIT;
   return \"bsr\\t___lshlhi3\";
 }")
@@ -5046,21 +5081,17 @@
 }")
 
 (define_insn "*ashrhi3"
-  [(set (match_operand:HI 0 "register_operand" "=d,x")
+  [(set (match_operand:HI 0 "register_operand" "=d,*x")
 	(ashiftrt:HI (match_operand:HI 1 "register_operand" "0,0")
 	             (match_operand:HI 2 "register_operand" "+x,+d")))
    (clobber (match_dup 2))]
   ""
   "*
 {
-  CC_STATUS_INIT;
-  if (D_REG_P (operands[2]))
-    output_asm_insn (\"xgd%0\", operands);
+  if (A_REG_P (operands[0]))
+    return \"#\";
 
   output_asm_insn (\"bsr\\t___ashrhi3\", operands);
-  if (D_REG_P (operands[2]))
-    output_asm_insn (\"xgd%0\", operands);
-
   return \"\"; 
 }")
 
@@ -5310,15 +5341,18 @@
     (set (match_dup 4) (match_dup 2))
 
     (set (match_dup 2) (match_dup 5))
-    (set (match_dup 2) (rotatert:HI (match_dup 2) (reg:HI CC_REGNUM)))
+    (parallel [(set (match_dup 2) (rotatert:HI (match_dup 2) (const_int 1)))
+               (clobber (reg:HI CC_REGNUM))])
     (set (match_dup 6) (match_dup 2))
 
     (set (match_dup 2) (match_dup 7))
-    (set (match_dup 2) (rotatert:HI (match_dup 2) (reg:HI CC_REGNUM)))
+    (parallel [(set (match_dup 2) (rotatert:HI (match_dup 2) (const_int 1)))
+               (clobber (reg:HI CC_REGNUM))])
     (set (match_dup 8) (match_dup 2))
 
     (set (match_dup 2) (match_dup 9))
-    (set (match_dup 2) (rotatert:HI (match_dup 2) (reg:HI CC_REGNUM)))
+    (parallel [(set (match_dup 2) (rotatert:HI (match_dup 2) (const_int 1)))
+               (clobber (reg:HI CC_REGNUM))])
     (set (match_dup 10) (match_dup 2))]
    "operands[3] = m68hc11_gen_highpart (SImode, operands[1]);
     operands[5] = m68hc11_gen_lowpart (HImode, operands[3]);
@@ -5594,22 +5628,17 @@
 }")
 
 (define_insn "*lshrhi3"
-  [(set (match_operand:HI 0 "register_operand" "=d,x")
+  [(set (match_operand:HI 0 "register_operand" "=d,*x")
 	(lshiftrt:HI (match_operand:HI 1 "register_operand" "0,0")
 		     (match_operand:HI 2 "register_operand" "+x,+d")))
    (clobber (match_dup 2))]
   ""
   "*
 {
-  CC_STATUS_INIT;
-  if (D_REG_P (operands[2]))
-    output_asm_insn (\"xgd%0\", operands);
-
-  output_asm_insn (\"bsr\\t___lshrhi3\", operands);
-  if (D_REG_P (operands[2]))
-    output_asm_insn (\"xgd%0\", operands);
+  if (A_REG_P (operands[0]))
+    return \"#\";
 
-  return \"\"; 
+  return \"bsr\\t___lshrhi3\";
 }")
 
 (define_expand "lshrqi3"
@@ -5750,7 +5779,8 @@
 (define_insn "*rotlhi3_with_carry"
   [(set (match_operand:HI 0 "register_operand" "=d")
 	(rotate:HI (match_operand:HI 1 "register_operand" "0")
-		   (reg:HI CC_REGNUM)))]
+		   (const_int 1)))
+   (clobber (reg:HI CC_REGNUM))]
   ""
   "*
 {
@@ -5761,7 +5791,8 @@
 (define_insn "*rotrhi3_with_carry"
   [(set (match_operand:HI 0 "register_operand" "=d")
 	(rotatert:HI (match_operand:HI 1 "register_operand" "0")
-		     (reg:HI CC_REGNUM)))]
+		     (const_int 1)))
+   (clobber (reg:HI CC_REGNUM))]
   ""
   "*
 {
@@ -5780,7 +5811,41 @@
   return \"\";
 }")
 
-(define_insn "rotlhi3"
+(define_insn "rotrqi3"
+  [(set (match_operand:QI 0 "register_operand" "=d,!q")
+	(rotatert:QI (match_operand:QI 1 "register_operand" "0,0")
+		     (match_operand:QI 2 "const_int_operand" "i,i")))]
+  ""
+  "*
+{
+  m68hc11_gen_rotate (ROTATERT, insn, operands);
+  return \"\";
+}")
+
+(define_expand "rotlhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(rotate:HI (match_operand:HI 1 "register_operand" "")
+	           (match_operand:HI 2 "general_operand" "")))]
+   ""
+   "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx scratch = gen_reg_rtx (HImode);
+      operand1 = force_reg (HImode, operand1);
+
+      emit_move_insn (scratch, operands[2]);
+      emit_insn (gen_rtx (PARALLEL, VOIDmode,
+		 gen_rtvec (2, gen_rtx (SET, VOIDmode,
+					operand0,
+					gen_rtx_ROTATE (HImode,
+						operand1, scratch)),
+			      gen_rtx (CLOBBER, VOIDmode, scratch))));
+      DONE;
+    }
+}")
+
+(define_insn "rotlhi3_const"
   [(set (match_operand:HI 0 "register_operand" "=d")
 	(rotate:HI (match_operand:HI 1 "register_operand" "0")
 		   (match_operand:HI 2 "const_int_operand" "i")))]
@@ -5791,18 +5856,44 @@
   return \"\";
 }")
 
-(define_insn "rotrqi3"
-  [(set (match_operand:QI 0 "register_operand" "=d,!q")
-	(rotatert:QI (match_operand:QI 1 "register_operand" "0,0")
-		     (match_operand:QI 2 "const_int_operand" "i,i")))]
+(define_insn "*rotlhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,*x")
+	(rotate:HI (match_operand:HI 1 "register_operand" "0,0")
+		   (match_operand:HI 2 "general_operand" "+x,+d")))
+   (clobber (match_dup 2))]
   ""
   "*
 {
-  m68hc11_gen_rotate (ROTATERT, insn, operands);
-  return \"\";
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  return \"bsr\\t___rotlhi3\";
 }")
 
-(define_insn "rotrhi3"
+(define_expand "rotrhi3"
+  [(set (match_operand:HI 0 "register_operand" "")
+	(rotatert:HI (match_operand:HI 1 "general_operand" "")
+	             (match_operand:HI 2 "general_operand" "")))]
+   ""
+   "
+{
+  if (GET_CODE (operands[2]) != CONST_INT)
+    {
+      rtx scratch = gen_reg_rtx (HImode);
+      operand1 = force_reg (HImode, operand1);
+
+      emit_move_insn (scratch, operands[2]);
+      emit_insn (gen_rtx (PARALLEL, VOIDmode,
+		 gen_rtvec (2, gen_rtx (SET, VOIDmode,
+					operand0,
+					gen_rtx_ROTATERT (HImode,
+						operand1, scratch)),
+			      gen_rtx (CLOBBER, VOIDmode, scratch))));
+      DONE;
+    }
+}")
+
+(define_insn "rotrhi3_const"
   [(set (match_operand:HI 0 "register_operand" "=d")
 	(rotatert:HI (match_operand:HI 1 "register_operand" "0")
 		     (match_operand:HI 2 "const_int_operand" "i")))]
@@ -5813,6 +5904,242 @@
   return \"\";
 }")
 
+(define_insn "*rotrhi3"
+  [(set (match_operand:HI 0 "register_operand" "=d,*x")
+	(rotatert:HI (match_operand:HI 1 "register_operand" "0,0")
+		     (match_operand:HI 2 "general_operand" "+x,+d")))
+   (clobber (match_dup 2))]
+  ""
+  "*
+{
+  if (A_REG_P (operands[0]))
+    return \"#\";
+
+  return \"bsr\\t___rotrhi3\";
+}")
+
+;; Split a shift operation on an address register in a shift
+;; on D_REGNUM.
+(define_split /* "*rotrhi3_addr" */
+  [(set (match_operand:HI 0 "hard_addr_reg_operand" "")
+	(match_operator:HI 3 "m68hc11_shift_operator"
+	    [(match_operand:HI 1 "register_operand" "")
+	     (match_operand:HI 2 "register_operand" "")]))
+   (clobber (match_dup 2))]
+  "z_replacement_completed == 2"
+  [(parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])
+   (parallel [(set (reg:HI D_REGNUM) 
+		   (match_op_dup 3 [(reg:HI D_REGNUM) (match_dup 0)]))
+	      (clobber (match_dup 0))])
+   (parallel [(set (reg:HI D_REGNUM) (match_dup 0))
+              (set (match_dup 0) (reg:HI D_REGNUM))])]
+  "")
+
+;;--------------------------------------------------------------------
+;;-  68HC12 Decrement/Increment and branch
+;;--------------------------------------------------------------------
+;; These patterns are used by loop optimization as well as peephole2
+;; They must handle reloading themselves and the scratch register
+;; is used for that.  Even if we accept memory operand, we must not
+;; accept them on the predicate because it might create too many reloads.
+;; (specially on HC12 due to its auto-incdec addressing modes).
+;;
+(define_expand "decrement_and_branch_until_zero"
+  [(parallel [(set (pc)
+		   (if_then_else
+		    (ne (plus:HI (match_operand:HI 0 "register_operand" "")
+				 (const_int 0))
+			(const_int 1))
+		    (label_ref (match_operand 1 "" ""))
+		    (pc)))
+	      (set (match_dup 0)
+		   (plus:HI (match_dup 0)
+			    (const_int -1)))
+	      (clobber (match_scratch:HI 2 ""))])]
+  "TARGET_M6812"
+  "")
+
+(define_expand "doloop_end"
+  [(use (match_operand 0 "" ""))	; loop pseudo
+   (use (match_operand 1 "" ""))	; iterations; zero if unknown
+   (use (match_operand 2 "" ""))	; max iterations
+   (use (match_operand 3 "" ""))	; loop level
+   (use (match_operand 4 "" ""))]	; label
+  "TARGET_M6812"
+  "
+{
+  /* Reject non-constant loops as it generates bigger code due to
+     the handling of the loop register.  We can do better by using
+     the peephole2 dbcc/ibcc patterns.  */
+  if (INTVAL (operands[1]) == 0)
+    {
+      FAIL;
+    }
+  if (GET_MODE (operands[0]) == HImode)
+    {
+      emit_jump_insn (gen_m68hc12_dbcc_dec_hi (operands[0],
+					       gen_rtx (NE, HImode),
+					       operands[4]));
+      DONE;
+    }
+  if (GET_MODE (operands[0]) == QImode)
+    {
+      emit_jump_insn (gen_m68hc12_dbcc_dec_qi (operands[0],
+					       gen_rtx (NE, QImode),
+					       operands[4]));
+      DONE;
+    }
+
+  FAIL;
+}")
+
+;; Decrement-and-branch insns.
+(define_insn "m68hc12_dbcc_dec_hi"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 1 "m68hc11_eq_compare_operator"
+	     [(match_operand:HI 0 "register_operand" "+dxy,m*u*z")
+	      (const_int 1)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0) (const_int -1)))
+   (clobber (match_scratch:HI 3 "=X,dxy"))]
+  "TARGET_M6812"
+  "*
+{
+  if (!H_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == EQ)
+    return \"dbeq\\t%0,%l2\";
+  else
+    return \"dbne\\t%0,%l2\";
+}")
+
+;; Decrement-and-branch insns.
+(define_insn "m68hc12_dbcc_inc_hi"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 1 "m68hc11_eq_compare_operator"
+	     [(match_operand:HI 0 "register_operand" "+dxy,m*u*z")
+	      (const_int -1)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0) (const_int 1)))
+   (clobber (match_scratch:HI 3 "=X,dxy"))]
+  "TARGET_M6812"
+  "*
+{
+  if (!H_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == EQ)
+    return \"ibeq\\t%0,%l2\";
+  else
+    return \"ibeq\\t%0,%l2\";
+}")
+
+;; Decrement-and-branch (QImode).
+(define_insn "m68hc12_dbcc_dec_qi"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 1 "m68hc11_eq_compare_operator"
+	     [(match_operand:QI 0 "register_operand" "+d,m*u*A")
+	      (const_int 1)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:QI (match_dup 0) (const_int -1)))
+   (clobber (match_scratch:QI 3 "=X,d"))]
+  "TARGET_M6812"
+  "*
+{
+  if (!D_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == EQ)
+    return \"dbeq\\tb,%l2\";
+  else
+    return \"dbne\\tb,%l2\";
+}")
+
+;; Increment-and-branch (QImode).
+(define_insn "m68hc12_dbcc_inc_qi"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 1 "m68hc11_eq_compare_operator"
+	     [(match_operand:QI 0 "register_operand" "+d,m*u*A")
+	      (const_int -1)])
+	 (label_ref (match_operand 2 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:QI (match_dup 0) (const_int 1)))
+   (clobber (match_scratch:QI 3 "=X,d"))]
+  "TARGET_M6812"
+  "*
+{
+  if (!D_REG_P (operands[0]))
+    return \"#\";
+
+  CC_STATUS_INIT;
+  if (GET_CODE (operands[1]) == EQ)
+    return \"ibeq\\tb,%l2\";
+  else
+    return \"ibeq\\tb,%l2\";
+}")
+
+;; Split the above to handle the case where operand 0 is in memory
+;; (a register that couldn't get a hard register)
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "m68hc11_eq_compare_operator"
+	     [(match_operand:HI 0 "general_operand" "")
+	      (match_operand:HI 1 "const_int_operand" "")])
+	 (label_ref (match_operand 4 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:HI (match_dup 0) (match_operand 2 "const_int_operand" "")))
+   (clobber (match_operand:HI 5 "hard_reg_operand" ""))]
+  "TARGET_M6812 && reload_completed"
+  [(set (match_dup 5) (match_dup 0))
+   (set (match_dup 5) (plus:HI (match_dup 5) (match_dup 2)))
+   (set (match_dup 0) (match_dup 5))
+   (set (pc)
+	(if_then_else (match_op_dup 3
+			    [(match_dup 5) (const_int 0)])
+		      (label_ref (match_dup 4)) (pc)))]
+  "")
+
+;; Split the above to handle the case where operand 0 is in memory
+;; (a register that couldn't get a hard register)
+(define_split
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "m68hc11_eq_compare_operator"
+	     [(match_operand:QI 0 "general_operand" "")
+	      (match_operand:QI 1 "const_int_operand" "")])
+	 (label_ref (match_operand 4 "" ""))
+	 (pc)))
+   (set (match_dup 0)
+	(plus:QI (match_dup 0) (match_operand 2 "const_int_operand" "")))
+   (clobber (match_operand:QI 5 "hard_reg_operand" ""))]
+  "TARGET_M6812 && reload_completed"
+  [(set (match_dup 5) (match_dup 0))
+   (set (match_dup 5) (plus:QI (match_dup 5) (match_dup 2)))
+   (set (match_dup 0) (match_dup 5))
+   (set (pc)
+	(if_then_else (match_op_dup 3
+			    [(match_dup 5) (const_int 0)])
+		      (label_ref (match_dup 4)) (pc)))]
+  "")
+
 ;;--------------------------------------------------------------------
 ;;-  Jumps and transfers
 ;;--------------------------------------------------------------------
@@ -6279,14 +6606,27 @@
   ""
  "*
 {
-  int far_call = current_function_far;
-
   if (GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF)
     {
-      if (SYMBOL_REF_FLAG (XEXP (operands[0], 0)) == 1)
+      if (m68hc11_is_far_symbol (operands[0]))
+        {
+          if (TARGET_M6812)
+            {
+	      output_asm_insn (\"call\\t%0\", operands);
+	      return \"\";
+	    }
+          else
+	    {
+	      output_asm_insn (\"pshb\", operands);
+	      output_asm_insn (\"ldab\\t#%%page(%0)\", operands);
+	      output_asm_insn (\"ldy\\t#%%addr(%0)\", operands);
+	      return \"jsr\\t__call_a32\";
+	    }
+	}
+      if (m68hc11_is_trap_symbol (operands[0]))
         return \"swi\";
       else
-        return far_call ? \"call\\t%0\" : \"bsr\\t%0\";
+        return \"bsr\\t%0\";
     }
   else
     {
@@ -6301,14 +6641,27 @@
   ""
  "*
 {
-  int far_call = current_function_far;
-
   if (GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF)
     {
-      if (SYMBOL_REF_FLAG (XEXP (operands[1], 0)) == 1)
+      if (m68hc11_is_far_symbol (operands[1]))
+        {
+          if (TARGET_M6812)
+            {
+	      output_asm_insn (\"call\\t%1\", operands);
+	      return \"\";
+	    }
+          else
+	    {
+	      output_asm_insn (\"pshb\", operands);
+	      output_asm_insn (\"ldab\\t#%%page(%1)\", operands);
+	      output_asm_insn (\"ldy\\t#%%addr(%1)\", operands);
+	      return \"jsr\\t__call_a32\";
+	    }
+	}
+      if (m68hc11_is_trap_symbol (operands[1]))
         return \"swi\";
       else
-        return far_call ? \"call\\t%1\" : \"bsr\\t%1\";
+        return \"bsr\\t%1\";
     }
   else
     {
@@ -6393,18 +6746,18 @@
 
   if (ret_size && ret_size <= 2)
     {
-      emit_insn (gen_rtx (PARALLEL, VOIDmode,
-		 gen_rtvec (2, gen_rtx_RETURN (VOIDmode),
-			       gen_rtx_USE (VOIDmode,
-					gen_rtx_REG (HImode, 1)))));
+      emit_jump_insn (gen_rtx (PARALLEL, VOIDmode,
+		      gen_rtvec (2, gen_rtx_RETURN (VOIDmode),
+				 gen_rtx_USE (VOIDmode,
+				       	      gen_rtx_REG (HImode, 1)))));
       DONE;
     }
   if (ret_size)
     {
-      emit_insn (gen_rtx (PARALLEL, VOIDmode,
-		 gen_rtvec (2, gen_rtx_RETURN (VOIDmode),
-			       gen_rtx_USE (VOIDmode,
-					gen_rtx_REG (SImode, 0)))));
+      emit_jump_insn (gen_rtx (PARALLEL, VOIDmode,
+		      gen_rtvec (2, gen_rtx_RETURN (VOIDmode),
+			         gen_rtx_USE (VOIDmode,
+					      gen_rtx_REG (SImode, 0)))));
       DONE;
     }
 }")
@@ -6422,7 +6775,25 @@
     return \"\";
   if (current_function_interrupt || current_function_trap)
     return \"rti\";
-  return current_function_far ? \"rtc\" : \"rts\";
+  else if (!current_function_far)
+    return \"rts\";
+  else if (TARGET_M6812)
+    return \"rtc\";
+  else
+    {
+      int ret_size = 0;
+
+      if (current_function_return_rtx)
+        ret_size = GET_MODE_SIZE (GET_MODE (current_function_return_rtx));
+
+      if (ret_size == 0)
+        return \"jmp\\t__return_void\";
+      if (ret_size <= 2)
+        return \"jmp\\t__return_16\";
+      if (ret_size <= 4)
+        return \"jmp\\t__return_32\";
+      return \"jmp\\t__return_16\";
+    }
 }")
 
 (define_insn "*return_16bit"
@@ -6439,7 +6810,12 @@
     return \"\";
   if (current_function_interrupt || current_function_trap)
     return \"rti\";
-  return current_function_far ? \"rtc\" : \"rts\";
+  else if (!current_function_far)
+    return \"rts\";
+  else if (TARGET_M6812)
+    return \"rtc\";
+  else
+    return \"jmp\\t__return_16\";
 }")
 
 (define_insn "*return_32bit"
@@ -6456,7 +6832,12 @@
     return \"\";
   if (current_function_interrupt || current_function_trap)
     return \"rti\";
-  return current_function_far ? \"rtc\" : \"rts\";
+  else if (!current_function_far)
+    return \"rts\";
+  else if (TARGET_M6812)
+    return \"rtc\";
+  else
+    return \"jmp\\t__return_32\";
 }")
 
 (define_insn "indirect_jump"
@@ -6488,6 +6869,62 @@
 ;;- Peepholes
 ;;--------------------------------------------------------------------
 
+;;--------------------------------------------------------------------
+;;- 68HC12 dbcc/ibcc peepholes
+;;--------------------------------------------------------------------
+;;
+;; Replace: "addd #-1; bne L1" into "dbne d,L1"
+;;          "addd #-1; beq L1" into "dbeq d,L1"
+;;          "addd #1; bne L1" into "ibne d,L1"
+;;          "addd #1; beq L1" into "ibeq d,L1"
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(plus:HI (match_dup 0)
+	         (match_operand:HI 1 "const_int_operand" "")))
+   (set (pc)
+        (if_then_else (match_operator 2 "m68hc11_eq_compare_operator"
+		         [(match_dup 0)
+			  (const_int 0)])
+		      (label_ref (match_operand 3 "" "")) (pc)))]
+  "TARGET_M6812 && (INTVAL (operands[1]) == 1 || INTVAL (operands[1]) == -1)"
+  [(parallel [
+      (set (pc) (if_then_else (match_op_dup 2 [(match_dup 0) (match_dup 5)])
+			      (label_ref (match_dup 3)) (pc)))
+      (set (match_dup 0) (plus:HI (match_dup 0) (match_dup 1)))
+      (clobber (match_dup 4))])]
+  "operands[4] = gen_rtx_SCRATCH(HImode);
+   operands[5] = GEN_INT (-INTVAL (operands[1]));")
+
+
+;;
+;; Replace: "addb #-1; bne L1" into "dbne b,L1"
+;;          "addb #-1; beq L1" into "dbeq b,L1"
+;;
+(define_peephole2
+  [(set (match_operand:QI 0 "hard_reg_operand" "")
+	(plus:QI (match_dup 0)
+	         (match_operand:QI 1 "const_int_operand" "")))
+   (set (pc)
+        (if_then_else (match_operator 2 "m68hc11_eq_compare_operator"
+		         [(match_dup 0)
+			  (const_int 0)])
+		      (label_ref (match_operand 3 "" "")) (pc)))]
+  "TARGET_M6812 && D_REG_P (operands[0])
+   && (INTVAL (operands[1]) == 1 || INTVAL (operands[1]) == -1)"
+  [(parallel [
+      (set (pc) (if_then_else (match_op_dup 2 [(match_dup 0) (match_dup 5)])
+			      (label_ref (match_dup 3)) (pc)))
+      (set (match_dup 0) (plus:QI (match_dup 0) (match_dup 1)))
+      (clobber (match_dup 4))])]
+  "operands[4] = gen_rtx_SCRATCH(QImode);
+   operands[5] = GEN_INT (-INTVAL (operands[1]));")
+
+
+;;--------------------------------------------------------------------
+;;- Move peephole2
+;;--------------------------------------------------------------------
+
 ;;
 ;; Replace "leas 2,sp" with a "pulx" or a "puly".
 ;; On 68HC12, this is one cycle slower but one byte smaller.
@@ -6547,8 +6984,7 @@
    (set (match_operand:HI 2 "hard_reg_operand" "")
         (match_dup 1))]
   "(D_REG_P (operands[2]) || X_REG_P (operands[2]) || Y_REG_P (operands[2]))
-   && !reg_mentioned_p (operands[2], operands[0])
-   && GET_MODE (operands[2]) == HImode"
+   && !reg_mentioned_p (operands[2], operands[0])"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))]
   "")
@@ -6673,6 +7109,29 @@
   "")
 
 ;;
+;; Replace a "ldd <mem>; addd #N; std <mem>" into a
+;; "ldx <mem>; leax; stx <mem>" if we have a free X/Y register
+;; and the constant is small.
+;;
+(define_peephole2
+  [(set (match_operand:HI 0 "hard_reg_operand" "")
+	(match_operand:HI 1 "general_operand" ""))
+   (set (match_dup 0) (plus:HI (match_dup 0)
+			       (match_operand:HI 2 "const_int_operand" "")))
+   (set (match_operand:HI 3 "nonimmediate_operand" "")
+        (match_dup 0))
+   (match_scratch:HI 4 "xy")]
+  "D_REG_P (operands[0])
+   && (TARGET_M6812 
+       || (INTVAL (operands[2]) >= -2 && INTVAL (operands[2]) <= 2))
+   && peep2_reg_dead_p (3, operands[0])"
+  [(set (match_dup 4) (match_dup 1))
+   (set (match_dup 4) (plus:HI (match_dup 4) (match_dup 2)))
+   (set (match_dup 3) (match_dup 4))]
+  "if (reg_mentioned_p (operands[4], operands[1])) FAIL;
+   if (reg_mentioned_p (operands[4], operands[3])) FAIL;")
+
+;;
 ;; This peephole catches the address computations generated by the reload
 ;; pass. 
 (define_peephole
diff --git a/gcc/config/m68hc11/m68hc12.h b/gcc/config/m68hc11/m68hc12.h
index ba56c172993..f98a0d0761f 100644
--- a/gcc/config/m68hc11/m68hc12.h
+++ b/gcc/config/m68hc11/m68hc12.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler, for m68hc12.
-   Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
-   Contributed by Stephane Carrez (stcarrez@worldnet.fr).
+   Copyright (C) 1999, 2000, 2001, 2003 Free Software Foundation, Inc.
+   Contributed by Stephane Carrez (stcarrez@nerim.fr).
 
 This file is part of GNU CC.
 
@@ -20,18 +20,26 @@ the Free Software Foundation, 59 Temple Place - Suite 330,
 Boston, MA 02111-1307, USA.  */
 
 /* Compile and assemble for a 68hc12 unless there is a -m68hc11 option.  */
-#define ASM_SPEC       "%{m68hc11:-m68hc11}%{!m68hc11:-m68hc12}"
+#define ASM_SPEC                                                \
+"%{m68hc11:-m68hc11}"                                           \
+"%{m68hcs12:-m68hcs12}"                                         \
+"%{!m68hc11:%{!m68hcs12:-m68hc12}}"
 #define LIB_SPEC       ""
 #define CC1_SPEC       ""
 
 /* We need to tell the linker the target elf format.  Just pass an
    emulation option.  This can be overriden by -Wl option of gcc.  */
-#define LINK_SPEC      "%{m68hc11:-m m68hc11elf}%{!m68hc11:-m m68hc12elf}"
+#define LINK_SPEC                                               \
+"%{m68hc11:-m m68hc11elf}"                                      \
+"%{m68hcs12:-m m68hc12elf}"                                     \
+"%{!m68hc11:%{!m68hcs12:-m m68hc11elf}} %{mrelax:-relax}"
 
 #define CPP_SPEC  \
 "%{mshort:-D__HAVE_SHORT_INT__ -D__INT__=16}\
  %{!mshort:-D__INT__=32}\
  %{m68hc11:-Dmc6811 -DMC6811 -Dmc68hc11}\
+ %{!m68hc11:%{!m68hc12:-Dmc6812 -DMC6812 -Dmc68hc12}}\
+ %{m68hcs12:-Dmc6812 -DMC6812 -Dmc68hcs12}\
  %{!m68hc11:-Dmc6812 -DMC6812 -Dmc68hc12}\
  %{fshort-double:-D__HAVE_SHORT_DOUBLE__}"
 
diff --git a/gcc/config/m68hc11/t-m68hc11-gas b/gcc/config/m68hc11/t-m68hc11-gas
index 1833fd56dc3..34153489441 100644
--- a/gcc/config/m68hc11/t-m68hc11-gas
+++ b/gcc/config/m68hc11/t-m68hc11-gas
@@ -23,9 +23,9 @@ LIB1ASMFUNCS = _mulsi3 \
 	_regs_min _regs_frame _regs_d1_2 \
 	_regs_d3_4 _regs_d5_6 _regs_d7_8 _regs_d9_16 _regs_d17_32 \
 	_premain __exit _abort _cleanup \
-	_adddi3 _subdi3 _notdi2 \
+	_adddi3 _subdi3 _notdi2 _rotlhi3 _rotrhi3 \
 	_ashrhi3 _lshrhi3 _lshlhi3 _ashrqi3 _lshlqi3 _map_data _init_bss \
-	_ctor _dtor __far_trampoline
+	_ctor _dtor _far_tramp _call_far _return_far
 
 TARGET_LIBGCC2_CFLAGS = -DUSE_GAS -DIN_GCC
 
@@ -37,10 +37,10 @@ LIB2FUNCS_EXTRA = $(srcdir)/config/udivmodsi4.c \
 LIBGCC2_DEBUG_CFLAGS =-g
 LIBGCC2_CFLAGS = -Os -mrelax $(LIBGCC2_INCLUDES) $(TARGET_LIBGCC2_CFLAGS) $(LIBGCC2_DEBUG_CFLAGS) $(GTHREAD_FLAGS) -DIN_LIBGCC2
 
-MULTILIB_OPTIONS  = m68hc11/m68hc12 mshort fshort-double mlong-calls
+MULTILIB_OPTIONS  = m68hc11/m68hc12 mshort fshort-double
 MULTILIB_DIRNAMES =
 MULTILIB_MATCHES  = m68hc11=m6811 m68hc12=m6812
-MULTILIB_EXCEPTIONS = -mnoshort -mno68hc11 -mnolong-calls
+MULTILIB_EXCEPTIONS = -mnoshort -mno68hc11
 
 LIBGCC = stmp-multilib
 INSTALL_LIBGCC = install-multilib
diff --git a/gcc/config/m68k/coff.h b/gcc/config/m68k/coff.h
index 88720d78157..13cb4ae5c55 100644
--- a/gcc/config/m68k/coff.h
+++ b/gcc/config/m68k/coff.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    m68k series COFF object files and debugging, version.
-   Copyright (C) 1994, 1996, 1997, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1994, 1996, 1997, 2000, 2002 Free Software Foundation, Inc.
 
 This file is part of GNU CC.
 
@@ -55,12 +55,17 @@ Boston, MA 02111-1307, USA.  */
 /* config/m68k.md has an explicit reference to the program counter,
    prefix this by the register prefix.  */
 
-#define ASM_RETURN_CASE_JUMP 			\
-  do {						\
-    if (TARGET_5200)				\
-      return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
-    else					\
-      return "jmp %%pc@(2,%0:w)";		\
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_5200)					\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
   } while (0)
 
 /* Here are the new register names.  */
diff --git a/gcc/config/m68k/linux.h b/gcc/config/m68k/linux.h
index bf80cea9f9c..7693af1815a 100644
--- a/gcc/config/m68k/linux.h
+++ b/gcc/config/m68k/linux.h
@@ -194,12 +194,17 @@ Boston, MA 02111-1307, USA.  */
 
 /* Use the default action for outputting the case label.  */
 #undef ASM_OUTPUT_CASE_LABEL
-#define ASM_RETURN_CASE_JUMP			\
-  do {						\
-    if (TARGET_5200)				\
-      return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
-    else					\
-      return "jmp %%pc@(2,%0:w)";		\
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_5200)					\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
   } while (0)
 
 /* This is how to output an assembler line that says to advance the
diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c
index 0407dd6b8db..a565eb8a083 100644
--- a/gcc/config/m68k/m68k.c
+++ b/gcc/config/m68k/m68k.c
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.c for Motorola 68000 family.
-   Copyright (C) 1987, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001
+   Copyright (C) 1987, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003
    Free Software Foundation, Inc.
 
 This file is part of GNU CC.
@@ -461,7 +461,7 @@ m68k_output_function_prologue (stream, size)
 	}
       if (dwarf2out_do_frame ())
 	{
-	  cfa_store_offset += fsize;
+	  cfa_store_offset += fsize + 4;
 	  cfa_offset = cfa_store_offset;
 	  dwarf2out_def_cfa ("", STACK_POINTER_REGNUM, cfa_offset);
 	}
diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md
index 969e5dc3099..3451b9ec8bd 100644
--- a/gcc/config/m68k/m68k.md
+++ b/gcc/config/m68k/m68k.md
@@ -1,5 +1,5 @@
 ;;- Machine description for GNU compiler, Motorola 68000 Version
-;;  Copyright (C) 1987, 1988, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2001
+;;  Copyright (C) 1987, 1988, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2001, 2002, 2003
 ;;  Free Software Foundation, Inc.
 
 ;; This file is part of GNU CC.
@@ -1588,7 +1588,7 @@
   "
 {
   operands[1] = make_safe_from (operands[1], operands[0]);
-  operands[2] = gen_lowpart (HImode, operands[0]);
+  operands[2] = gen_lowpart_SUBREG (HImode, operands[0]);
 }")
 
 (define_expand "zero_extendqihi2"
@@ -1600,7 +1600,7 @@
   "
 {
   operands[1] = make_safe_from (operands[1], operands[0]);
-  operands[2] = gen_lowpart (QImode, operands[0]);
+  operands[2] = gen_lowpart_SUBREG (QImode, operands[0]);
 }")
 
 (define_expand "zero_extendqisi2"
@@ -1612,7 +1612,7 @@
   "
 {
   operands[1] = make_safe_from (operands[1], operands[0]);
-  operands[2] = gen_lowpart (QImode, operands[0]);
+  operands[2] = gen_lowpart_SUBREG (QImode, operands[0]);
 }")
 
 ;; Patterns to recognize zero-extend insns produced by the combiner.
@@ -3122,7 +3122,7 @@
   [(parallel
     [(set (subreg:SI (match_operand:DI 0 "register_operand" "") 4)
 	  (mult:SI (match_operand:SI 1 "register_operand" "")
-		   (match_operand:SI 2 "nonimmediate_operand" "")))
+		   (match_operand:SI 2 "register_operand" "")))
      (set (subreg:SI (match_dup 0) 0)
 	  (truncate:SI (lshiftrt:DI (mult:DI (zero_extend:DI (match_dup 1))
 					     (zero_extend:DI (match_dup 2)))
@@ -3161,7 +3161,7 @@
   [(parallel
     [(set (subreg:SI (match_operand:DI 0 "register_operand" "") 4)
 	  (mult:SI (match_operand:SI 1 "register_operand" "")
-		   (match_operand:SI 2 "nonimmediate_operand" "")))
+		   (match_operand:SI 2 "register_operand" "")))
      (set (subreg:SI (match_dup 0) 0)
 	  (truncate:SI (lshiftrt:DI (mult:DI (sign_extend:DI (match_dup 1))
 					     (sign_extend:DI (match_dup 2)))
@@ -3862,7 +3862,7 @@
 	  case -1 :
 	    /* FIXME : a scratch register would be welcome here if operand[0]
 	       is not a register */
-	    output_asm_insn (\"move%.l %#-1,%R0\", operands);
+	    output_asm_insn (\"move%.l %#-1,%0\", operands);
 	    break;
 	  default :
 	    {
@@ -6973,17 +6973,33 @@
 #ifdef SGS
 #ifdef ASM_OUTPUT_CASE_LABEL
   if (TARGET_5200) 
-    return \"ext%.l %0\;jmp 6(%%pc,%0.l)\";
+    {
+      if (ADDRESS_REG_P (operands[0]))
+	return \"jmp 6(%%pc,%0.l)\";
+      else
+	return \"ext%.l %0\;jmp 6(%%pc,%0.l)\";
+    }
   else
     return \"jmp 6(%%pc,%0.w)\";
 #else
   if (TARGET_5200)
     {
+      if (ADDRESS_REG_P (operands[0]))
+	{
 #ifdef CRDS
-      return \"ext%.l %0\;jmp 2(pc,%0.l)\";
+	  return \"jmp 2(pc,%0.l)\";
 #else
-      return \"extl %0\;jmp 2(%%pc,%0.l)\";
+	  return \"jmp 2(%%pc,%0.l)\";
 #endif  /* end !CRDS */
+	}
+      else
+	{
+#ifdef CRDS
+	  return \"ext%.l %0\;jmp 2(pc,%0.l)\";
+#else
+	  return \"extl %0\;jmp 2(%%pc,%0.l)\";
+#endif  /* end !CRDS */
+	}
     }
   else
     {
@@ -6997,11 +7013,22 @@
 #else /* not SGS */
   if (TARGET_5200)
     {
+      if (ADDRESS_REG_P (operands[0]))
+	{
 #ifdef MOTOROLA
-      return \"ext%.l %0\;jmp (2,pc,%0.l)\";
+	  return \"jmp (2,pc,%0.l)\";
 #else
-      return \"extl %0\;jmp pc@(2,%0:l)\";
+	  return \"jmp pc@(2,%0:l)\";
 #endif
+	}
+      else
+	{
+#ifdef MOTOROLA
+	  return \"ext%.l %0\;jmp (2,pc,%0.l)\";
+#else
+	  return \"extl %0\;jmp pc@(2,%0:l)\";
+#endif
+	}
     }
   else
     {
diff --git a/gcc/config/m68k/m68kelf.h b/gcc/config/m68k/m68kelf.h
index 7866a239209..e5145c9d70c 100644
--- a/gcc/config/m68k/m68kelf.h
+++ b/gcc/config/m68k/m68kelf.h
@@ -1,7 +1,7 @@
 /* m68kelf support, derived from m68kv4.h */
 
 /* Target definitions for GNU compiler for mc680x0 running System V.4
-   Copyright (C) 1991, 1993, 2000 Free Software Foundation, Inc.
+   Copyright (C) 1991, 1993, 2000, 2002 Free Software Foundation, Inc.
 
    Written by Ron Guilmette (rfg@netcom.com) and Fred Fish (fnf@cygnus.com).
 
@@ -73,12 +73,17 @@ Boston, MA 02111-1307, USA.  */
 /* config/m68k.md has an explicit reference to the program counter,
    prefix this by the register prefix.  */
 
-#define ASM_RETURN_CASE_JUMP 			\
-  do {						\
-    if (TARGET_5200)				\
-      return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
-    else					\
-      return "jmp %%pc@(2,%0:w)";		\
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_5200)					\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
   } while (0)
 
 /* How to refer to registers in assembler output.
diff --git a/gcc/config/m68k/mot3300.h b/gcc/config/m68k/mot3300.h
index fb7dffa8fc7..e035148ebdc 100644
--- a/gcc/config/m68k/mot3300.h
+++ b/gcc/config/m68k/mot3300.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    SysV68 Motorola 3300 Delta Series.
-   Copyright (C) 1987, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000
+   Copyright (C) 1987, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2002
    Free Software Foundation, Inc.
    Contributed by Abramo and Roberto Bagnara (bagnara@dipisa.di.unipi.it)
    based on Alex Crain's 3B1 definitions.
@@ -437,12 +437,17 @@ Boston, MA 02111-1307, USA.  */
 #define ASM_OUTPUT_CASE_FETCH(file, labelno, regname)\
 	asm_fprintf (file, "12(%Rpc,%s.", regname)
 
-#define ASM_RETURN_CASE_JUMP \
-  do {						\
-    if (TARGET_5200)				\
-      return "ext%.l %0\n\tjmp 8(%%pc,%0.l)";	\
-    else					\
-      return "jmp 8(%%pc,%0.w)";		\
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_5200)					\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp 8(%%pc,%0.l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp 8(%%pc,%0.l)";	\
+      }							\
+    else						\
+      return "jmp 8(%%pc,%0.w)";			\
   } while (0)
 	     
 #else /* USE_GAS */
diff --git a/gcc/config/m68k/netbsd-elf.h b/gcc/config/m68k/netbsd-elf.h
index f9576da2905..342de6e4d91 100644
--- a/gcc/config/m68k/netbsd-elf.h
+++ b/gcc/config/m68k/netbsd-elf.h
@@ -11,7 +11,7 @@ This file is part of GNU CC.
 
 GNU CC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 1, or (at your option)
+the Free Software Foundation; either version 2, or (at your option)
 any later version.
 
 GNU CC is distributed in the hope that it will be useful,
@@ -215,15 +215,18 @@ while (0)
 
 /* Use the default action for outputting the case label.  */
 #undef ASM_OUTPUT_CASE_LABEL
-#define ASM_RETURN_CASE_JUMP						\
-do									\
-  {									\
-    if (TARGET_5200)							\
-      return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";				\
-    else								\
-      return "jmp %%pc@(2,%0:w)";					\
-  }									\
-while (0)
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_5200)					\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
+  } while (0)
 
 
 /* This is how to output an assembler line that says to advance the
diff --git a/gcc/config/m68k/pbb.h b/gcc/config/m68k/pbb.h
index 6eef2acaf47..68c8d09ead5 100644
--- a/gcc/config/m68k/pbb.h
+++ b/gcc/config/m68k/pbb.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    Citicorp/TTI Unicom PBB version (using GAS with a %-register prefix)
-   Copyright (C) 1987, 1988, 1990, 1996, 1997 Free Software Foundation, Inc.
+   Copyright (C) 1987, 1988, 1990, 1996, 1997, 2002 Free Software Foundation, Inc.
 
 This file is part of GNU CC.
 
@@ -109,12 +109,17 @@ Boston, MA 02111-1307, USA.  */
       && ! find_equiv_reg (0, get_last_insn (), 0, 0, 0, 8, Pmode))	\
       asm_fprintf (FILE, "\tmovl %Rd0,%Ra0\n"); } 
 
-#define ASM_RETURN_CASE_JUMP \
-  do {						\
-    if (TARGET_5200)				\
-      return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
-    else					\
-      return "jmp %%pc@(2,%0:w)";		\
+#define ASM_RETURN_CASE_JUMP				\
+  do {							\
+    if (TARGET_5200)					\
+      {							\
+	if (ADDRESS_REG_P (operands[0]))		\
+	  return "jmp %%pc@(2,%0:l)";			\
+	else						\
+	  return "ext%.l %0\n\tjmp %%pc@(2,%0:l)";	\
+      }							\
+    else						\
+      return "jmp %%pc@(2,%0:w)";			\
   } while (0)
 
 /* Although the gas we use can create .ctor and .dtor sections from N_SETT
diff --git a/gcc/config/m68k/t-crtstuff b/gcc/config/m68k/t-crtstuff
index b05764de726..a8bdb502d66 100644
--- a/gcc/config/m68k/t-crtstuff
+++ b/gcc/config/m68k/t-crtstuff
@@ -1,8 +1,10 @@
+EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crti.o crtn.o
+
 # Add flags here as required.
 CRTSTUFF_T_CFLAGS =
 
 # Assemble startup files.
 $(T)crti.o: $(srcdir)/config/m68k/crti.s $(GCC_PASSES)
-       $(GCC_FOR_TARGET) -c -o $(T)crti.o $(srcdir)/config/m68k/crti.s
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crti.o $(srcdir)/config/m68k/crti.s
 $(T)crtn.o: $(srcdir)/config/m68k/crtn.s $(GCC_PASSES)
-       $(GCC_FOR_TARGET) -c -o $(T)crtn.o $(srcdir)/config/m68k/crtn.s
+	$(GCC_FOR_TARGET) $(MULTILIB_CFLAGS) -c -o $(T)crtn.o $(srcdir)/config/m68k/crtn.s
diff --git a/gcc/config/mcore/mcore.h b/gcc/config/mcore/mcore.h
index 7bce37cd227..cf808b4baaa 100644
--- a/gcc/config/mcore/mcore.h
+++ b/gcc/config/mcore/mcore.h
@@ -137,7 +137,7 @@ extern int target_flags;
   {"relax-immediates",      RELAX_IMM_BIT,				\
      "" },								\
   {"no-relax-immediates", - RELAX_IMM_BIT,				\
-     N_("Do not arbitary sized immediates in bit operations") },	\
+     N_("Do not arbitrary sized immediates in bit operations") },	\
   {"wide-bitfields",        W_FIELD_BIT,				\
      N_("Always treat bit-field as int-sized") },			\
   {"no-wide-bitfields",   - W_FIELD_BIT,				\
diff --git a/gcc/config/mcore/mcore.md b/gcc/config/mcore/mcore.md
index 7e74a98e061..895e7aa216a 100644
--- a/gcc/config/mcore/mcore.md
+++ b/gcc/config/mcore/mcore.md
@@ -150,7 +150,7 @@
 ;; ;    This is done to allow bit field masks to fold together in combine.
 ;; ;    The reload phase will force the immediate into a register at the
 ;; ;    very end.  This helps in some cases, but hurts in others: we'd
-;; ;    really like to cse these immediates.  However, there is an phase
+;; ;    really like to cse these immediates.  However, there is a phase
 ;; ;    ordering problem here.  cse picks up individual masks and cse's
 ;; ;    those, but not folded masks (cse happens before combine).  It's
 ;; ;    not clear what the best solution is because we really want cse
@@ -1896,7 +1896,7 @@
  ""
  "
 {
-  emit_insn (gen_jump_real (operand0));
+  emit_jump_insn (gen_jump_real (operand0));
   DONE;
 }
 ")
@@ -3319,7 +3319,7 @@
   ""
 "*
 {
-  int op0 = REGNO (operands[0]);
+  unsigned int op0 = REGNO (operands[0]);
 
   if (GET_CODE (operands[3]) == REG)
     {
diff --git a/gcc/config/mips/_tilib.c b/gcc/config/mips/_tilib.c
new file mode 100644
index 00000000000..7c13560b1f0
--- /dev/null
+++ b/gcc/config/mips/_tilib.c
@@ -0,0 +1,156 @@
+/* A few TImode functions needed for TFmode emulated arithmetic.
+   Copyright 2002, 2003 Free Software Foundation, Inc.
+   Contributed by Alexandre Oliva <aoliva@redhat.com>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+
+
+#include "tconfig.h"
+
+#if _MIPS_SIM == 2 /* N32 */ || _MIPS_SIM == 3 /* 64 */
+
+typedef int TItype __attribute__ ((mode (TI)));
+typedef int DItype __attribute__ ((mode (DI)));
+typedef int SItype __attribute__ ((mode (SI)));
+
+typedef unsigned int UDItype __attribute__ ((mode (DI)));
+
+typedef union
+{
+  struct TIstruct {
+#if LIBGCC2_WORDS_BIG_ENDIAN
+    DItype high, low;
+#else
+    DItype low, high;
+#endif
+  } s;
+  TItype ll;
+} TIunion;
+
+TItype __negti2 (TItype);
+TItype __ashlti3 (TItype, int);
+#if 0
+TItype __ashrti3 (TItype, int);
+#endif
+TItype __lshrti3 (TItype, int);
+
+TItype
+__negti2 (TItype u)
+{
+  TIunion w;
+  TIunion uu;
+
+  uu.ll = u;
+
+  w.s.low = -uu.s.low;
+  w.s.high = -uu.s.high - ((UDItype) w.s.low > 0);
+
+  return w.ll;
+}
+
+TItype
+__ashlti3 (TItype u, int b)
+{
+  TIunion w;
+  int bm;
+  TIunion uu;
+
+  if (b == 0)
+    return u;
+
+  uu.ll = u;
+
+  bm = (sizeof (DItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      w.s.low = 0;
+      w.s.high = (UDItype) uu.s.low << -bm;
+    }
+  else
+    {
+      UDItype carries = (UDItype) uu.s.low >> bm;
+
+      w.s.low = (UDItype) uu.s.low << b;
+      w.s.high = ((UDItype) uu.s.high << b) | carries;
+    }
+
+  return w.ll;
+}
+
+#if 0
+TItype
+__ashrti3 (TItype u, int b)
+{
+  TIunion w;
+  int bm;
+  TIunion uu;
+
+  if (b == 0)
+    return u;
+
+  uu.ll = u;
+
+  bm = (sizeof (DItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      /* w.s.high = 1..1 or 0..0 */
+      w.s.high = uu.s.high >> (sizeof (DItype) * BITS_PER_UNIT - 1);
+      w.s.low = uu.s.high >> -bm;
+    }
+  else
+    {
+      UDItype carries = (UDItype) uu.s.high << bm;
+
+      w.s.high = uu.s.high >> b;
+      w.s.low = ((UDItype) uu.s.low >> b) | carries;
+    }
+
+  return w.ll;
+}
+#endif
+
+TItype
+__lshrti3 (TItype u, int b)
+{
+  TIunion w;
+  int bm;
+  TIunion uu;
+
+  if (b == 0)
+    return u;
+
+  uu.ll = u;
+
+  bm = (sizeof (DItype) * BITS_PER_UNIT) - b;
+  if (bm <= 0)
+    {
+      w.s.high = 0;
+      w.s.low = (UDItype) uu.s.high >> -bm;
+    }
+  else
+    {
+      UDItype carries = (UDItype) uu.s.high << bm;
+
+      w.s.high = (UDItype) uu.s.high >> b;
+      w.s.low = ((UDItype) uu.s.low >> b) | carries;
+    }
+
+  return w.ll;
+}
+
+#endif /* N32 or N64 */
diff --git a/gcc/config/mips/iris6.h b/gcc/config/mips/iris6.h
index ddfea8f1473..7336f6c5eeb 100644
--- a/gcc/config/mips/iris6.h
+++ b/gcc/config/mips/iris6.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  Iris version 6.
-   Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002
+   Copyright (C) 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002, 2003
    Free Software Foundation, Inc.
 
 This file is part of GNU CC.
@@ -125,6 +125,13 @@ Boston, MA 02111-1307, USA.  */
      if (!ISA_MIPS1 && !ISA_MIPS2)			\
 	builtin_define ("_COMPILER_VERSION=601");	\
 							\
+     /* IRIX 6.5.18 and above provide many ISO C99	\
+	features protected by the __c99 macro.		\
+	libstdc++ v3 needs them as well.  */		\
+     if ((c_language == clk_c && flag_isoc99)		\
+	 || c_language == clk_cplusplus)		\
+	builtin_define ("__c99");			\
+							\
      if (c_language == clk_cplusplus)			\
       {							\
 	builtin_define ("__EXTENSIONS__");		\
@@ -509,3 +516,5 @@ do {									 \
 %{shared:-hidden_symbol __do_global_ctors,__do_global_ctors_1,__do_global_dtors} \
 -_SYSTYPE_SVR4 -woff 131 \
 %{mabi=32: -32}%{mabi=n32: -n32}%{mabi=64: -64}%{!mabi*: -n32}"
+
+#define MIPS_TFMODE_FORMAT ibm_extended_format
diff --git a/gcc/config/mips/linux.h b/gcc/config/mips/linux.h
index d25cd51ddcc..354512018e7 100644
--- a/gcc/config/mips/linux.h
+++ b/gcc/config/mips/linux.h
@@ -127,7 +127,7 @@ void FN ()							\
 	builtin_define_std ("linux");				\
 	builtin_assert ("system=linux");			\
 	/* The GNU C++ standard library requires this.  */	\
-	if (c_language = clk_cplusplus)				\
+	if (c_language == clk_cplusplus)			\
 	  builtin_define ("_GNU_SOURCE");			\
 								\
       if (mips_abi == ABI_N32)					\
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index d92f7e573b3..f7ec8183ed0 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -122,8 +122,9 @@ extern int              mips_adjust_insn_length PARAMS ((rtx, int));
 extern enum reg_class	mips_secondary_reload_class PARAMS ((enum reg_class,
 							     enum machine_mode,
 							     rtx, int));
-extern enum reg_class	mips_cannot_change_mode_class 
-			  PARAMS ((enum machine_mode, enum machine_mode));
+extern bool		mips_cannot_change_mode_class 
+			  PARAMS ((enum machine_mode, enum machine_mode,
+				   enum reg_class));
 extern int              mips_class_max_nregs PARAMS ((enum reg_class,
 						      enum machine_mode));
 extern int              mips_register_move_cost PARAMS ((enum machine_mode,
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 2dcf69d3f43..d73e52b4cf8 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -4321,9 +4321,11 @@ mips_arg_info (cum, mode, type, named, info)
 	 is a double, but $f14 if it is a single.  Otherwise, on a
 	 32-bit double-float machine, each FP argument must start in a
 	 new register pair.  */
-      even_reg_p = ((mips_abi == ABI_O64 && mode == SFmode) || FP_INC > 1);
+      even_reg_p = (GET_MODE_SIZE (mode) > UNITS_PER_HWFPVALUE
+		    || (mips_abi == ABI_O64 && mode == SFmode)
+		    || FP_INC > 1);
     }
-  else if (!TARGET_64BIT)
+  else if (!TARGET_64BIT || LONG_DOUBLE_TYPE_SIZE == 128)
     {
       if (GET_MODE_CLASS (mode) == MODE_INT
 	  || GET_MODE_CLASS (mode) == MODE_FLOAT)
@@ -4635,7 +4637,7 @@ mips_setup_incoming_varargs (cum, mode, type, no_rtl)
 	      rtx ptr = plus_constant (virtual_incoming_args_rtx, off);
 	      emit_move_insn (gen_rtx_MEM (mode, ptr),
 			      gen_rtx_REG (mode, FP_ARG_FIRST + i));
-	      off += UNITS_PER_FPVALUE;
+	      off += UNITS_PER_HWFPVALUE;
 	    }
 	}
     }
@@ -4712,6 +4714,15 @@ mips_va_start (valist, nextarg)
 {
   const CUMULATIVE_ARGS *cum = &current_function_args_info;
 
+  /* ARG_POINTER_REGNUM is initialized to STACK_POINTER_BOUNDARY, but
+     since the stack is aligned for a pair of argument-passing slots,
+     and the beginning of a variable argument list may be an odd slot,
+     we have to decrease its alignment.  */
+  if (cfun && cfun->emit->regno_pointer_align)
+    while (((current_function_pretend_args_size * BITS_PER_UNIT)
+	    & (REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) - 1)) != 0)
+      REGNO_POINTER_ALIGN (ARG_POINTER_REGNUM) /= 2;
+
   if (mips_abi == ABI_EABI)
     {
       int gpr_save_area_size;
@@ -4909,9 +4920,9 @@ mips_va_arg (valist, type)
 	      off = build (COMPONENT_REF, TREE_TYPE (f_foff), valist, f_foff);
 
 	      /* When floating-point registers are saved to the stack,
-		 each one will take up UNITS_PER_FPVALUE bytes, regardless
+		 each one will take up UNITS_PER_HWFPVALUE bytes, regardless
 		 of the float's precision.  */
-	      rsize = UNITS_PER_FPVALUE;
+	      rsize = UNITS_PER_HWFPVALUE;
 	    }
 	  else
 	    {
@@ -4930,7 +4941,7 @@ mips_va_arg (valist, type)
 	     bytes (= PARM_BOUNDARY bits).  RSIZE can sometimes be smaller
 	     than that, such as in the combination -mgp64 -msingle-float
 	     -fshort-double.  Doubles passed in registers will then take
-	     up UNITS_PER_FPVALUE bytes, but those passed on the stack
+	     up UNITS_PER_HWFPVALUE bytes, but those passed on the stack
 	     take up UNITS_PER_WORD bytes.  */
 	  osize = MAX (rsize, UNITS_PER_WORD);
 
@@ -5007,7 +5018,10 @@ mips_va_arg (valist, type)
 	 that alignments <= UNITS_PER_WORD are preserved by the va_arg
 	 increment mechanism.  */
 
-      if (TARGET_64BIT)
+      if ((mips_abi == ABI_N32 || mips_abi == ABI_64)
+	  && TYPE_ALIGN (type) > 64)
+	align = 16;
+      else if (TARGET_64BIT)
 	align = 8;
       else if (TYPE_ALIGN (type) > 32)
 	align = 8;
@@ -5365,6 +5379,10 @@ override_options ()
   else
     mips16 = 0;
 
+#ifdef MIPS_TFMODE_FORMAT
+  real_format_for_mode[TFmode - QFmode] = &MIPS_TFMODE_FORMAT;
+#endif
+  
   mips_print_operand_punct['?'] = 1;
   mips_print_operand_punct['#'] = 1;
   mips_print_operand_punct['&'] = 1;
@@ -7085,7 +7103,7 @@ save_restore_insns (store_p, large_reg, large_offset)
       /* Pick which pointer to use as a base register.  */
       fp_offset = cfun->machine->frame.fp_sp_offset;
       end_offset = fp_offset - (cfun->machine->frame.fp_reg_size
-				- UNITS_PER_FPVALUE);
+				- UNITS_PER_HWFPVALUE);
 
       if (fp_offset < 0 || end_offset < 0)
 	internal_error
@@ -7140,7 +7158,7 @@ save_restore_insns (store_p, large_reg, large_offset)
 	    else
 	      emit_move_insn (reg_rtx, mem_rtx);
 
-	    fp_offset -= UNITS_PER_FPVALUE;
+	    fp_offset -= UNITS_PER_HWFPVALUE;
 	  }
     }
 }
@@ -8271,11 +8289,26 @@ mips_function_value (valtype, func, mode)
     }
   mclass = GET_MODE_CLASS (mode);
 
-  if (mclass == MODE_FLOAT && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE)
+  if (mclass == MODE_FLOAT && GET_MODE_SIZE (mode) <= UNITS_PER_HWFPVALUE)
     reg = FP_RETURN;
 
+  else if (mclass == MODE_FLOAT && mode == TFmode)
+    /* long doubles are really split between f0 and f2, not f1.  Eek.
+       Use DImode for each component, since GCC wants integer modes
+       for subregs.  */
+    return gen_rtx_PARALLEL
+      (VOIDmode,
+       gen_rtvec (2,
+		  gen_rtx_EXPR_LIST (VOIDmode,
+				     gen_rtx_REG (DImode, FP_RETURN),
+				     GEN_INT (0)),
+		  gen_rtx_EXPR_LIST (VOIDmode,
+				     gen_rtx_REG (DImode, FP_RETURN + 2),
+				     GEN_INT (GET_MODE_SIZE (mode) / 2))));
+       
+
   else if (mclass == MODE_COMPLEX_FLOAT
-	   && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE * 2)
+	   && GET_MODE_SIZE (mode) <= UNITS_PER_HWFPVALUE * 2)
     {
       enum machine_mode cmode = GET_MODE_INNER (mode);
 
@@ -8416,19 +8449,20 @@ function_arg_pass_by_reference (cum, mode, type, named)
    We can't allow 64-bit float registers to change from a 32-bit
    mode to a 64-bit mode.  */
 
-enum reg_class
-mips_cannot_change_mode_class (from, to)
+bool
+mips_cannot_change_mode_class (from, to, class)
      enum machine_mode from, to;
+     enum reg_class class;
 {
   if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
     {
       if (TARGET_BIG_ENDIAN)
-        return FP_REGS;
+	return reg_classes_intersect_p (FP_REGS, class);
       if (TARGET_FLOAT64)
-        return HI_AND_FP_REGS;
-      return HI_REG;
+	return reg_classes_intersect_p (HI_AND_FP_REGS, class);
+      return reg_classes_intersect_p (HI_REG, class);
     }
-  return NO_REGS;
+  return false;
 }
 
 /* This function returns the register class required for a secondary
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 3b784c8883d..0de8bbf703a 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.  MIPS version.
    Copyright (C) 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998
-   1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by A. Lichnewsky (lich@inria.inria.fr).
    Changed by Michael Meissner	(meissner@osf.org).
    64 bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
@@ -1048,9 +1048,18 @@ extern int mips_abi;
 %{gstabs:-g} %{gstabs0:-g0} %{gstabs1:-g1} %{gstabs2:-g2} %{gstabs3:-g3} \
 %{gstabs+:-g} %{gstabs+0:-g0} %{gstabs+1:-g1} %{gstabs+2:-g2} %{gstabs+3:-g3} \
 %{gcoff:-g} %{gcoff0:-g0} %{gcoff1:-g1} %{gcoff2:-g2} %{gcoff3:-g3} \
-%{!gdwarf*:-mdebug} %{gdwarf*:-no-mdebug}"
+%(mdebug_asm_spec)"
 #endif
 
+/* Beginning with gas 2.13, -mdebug must be passed to correctly handle COFF
+   and stabs debugging info.  */
+#if ((TARGET_CPU_DEFAULT | TARGET_DEFAULT) & MASK_GAS) != 0
+/* GAS */
+#define MDEBUG_ASM_SPEC "%{!gdwarf*:-mdebug} %{gdwarf*:-no-mdebug}"
+#else /* not GAS */
+#define MDEBUG_ASM_SPEC ""
+#endif /* not GAS */
+
 /* SUBTARGET_ASM_SPEC is always passed to the assembler.  It may be
    overridden by subtargets.  */
 
@@ -1178,6 +1187,7 @@ extern int mips_abi;
   { "subtarget_mips_as_asm_spec", SUBTARGET_MIPS_AS_ASM_SPEC }, 	\
   { "subtarget_asm_optimizing_spec", SUBTARGET_ASM_OPTIMIZING_SPEC },	\
   { "subtarget_asm_debugging_spec", SUBTARGET_ASM_DEBUGGING_SPEC },	\
+  { "mdebug_asm_spec", MDEBUG_ASM_SPEC },				\
   { "subtarget_asm_spec", SUBTARGET_ASM_SPEC },				\
   { "asm_abi_default_spec", ASM_ABI_DEFAULT_SPEC },			\
   { "endian_spec", ENDIAN_SPEC },					\
@@ -1487,8 +1497,14 @@ do {							\
    the next available register.  */
 #define FP_INC (TARGET_FLOAT64 || TARGET_SINGLE_FLOAT ? 1 : 2)
 
-/* The largest size of value that can be held in floating-point registers.  */
-#define UNITS_PER_FPVALUE (TARGET_SOFT_FLOAT ? 0 : FP_INC * UNITS_PER_FPREG)
+/* The largest size of value that can be held in floating-point
+   registers and moved with a single instruction.  */
+#define UNITS_PER_HWFPVALUE (TARGET_SOFT_FLOAT ? 0 : FP_INC * UNITS_PER_FPREG)
+
+/* The largest size of value that can be held in floating-point
+   registers.  */
+#define UNITS_PER_FPVALUE \
+  (TARGET_SOFT_FLOAT ? 0 : (LONG_DOUBLE_TYPE_SIZE / BITS_PER_UNIT))
 
 /* The number of bytes in a double.  */
 #define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
@@ -1535,7 +1551,21 @@ do {							\
 /* A C expression for the size in bits of the type `long double' on
    the target machine.  If you don't define this, the default is two
    words.  */
-#define LONG_DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE \
+  (mips_abi == ABI_N32 || mips_abi == ABI_64 ? 128 : 64)
+
+/* long double is not a fixed mode, but the idea is that, if we
+   support long double, we also want a 128-bit integer type.  */
+#define MAX_FIXED_MODE_SIZE LONG_DOUBLE_TYPE_SIZE
+
+#ifdef IN_LIBGCC2
+#if  (defined _ABIN32 && _MIPS_SIM == _ABIN32) \
+  || (defined _ABI64 && _MIPS_SIM == _ABI64)
+#  define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
+# else
+#  define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+# endif
+#endif
 
 /* Width in bits of a pointer.
    See also the macro `Pmode' defined below.  */
@@ -1562,7 +1592,7 @@ do {							\
 #define STRUCTURE_SIZE_BOUNDARY 8
 
 /* There is no point aligning anything to a rounder boundary than this.  */
-#define BIGGEST_ALIGNMENT 64
+#define BIGGEST_ALIGNMENT LONG_DOUBLE_TYPE_SIZE
 
 /* Set this nonzero if move instructions will actually fail to work
    when given unaligned data.  */
@@ -2326,8 +2356,8 @@ extern enum reg_class mips_char_to_class[256];
 
 #define CLASS_MAX_NREGS(CLASS, MODE) mips_class_max_nregs (CLASS, MODE)
 
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO) \
-  mips_cannot_change_mode_class (FROM, TO)
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) \
+  mips_cannot_change_mode_class (FROM, TO, CLASS)
 
 /* Stack layout; function entry, exit and calling.  */
 
@@ -2624,7 +2654,9 @@ extern enum reg_class mips_char_to_class[256];
    On the MIPS, R2 R3 and F0 F2 are the only register thus used.
    Currently, R2 and F0 are only implemented  here (C has no complex type)  */
 
-#define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN)
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == GP_RETURN || (N) == FP_RETURN \
+  || (LONG_DOUBLE_TYPE_SIZE == 128 && FP_RETURN != GP_RETURN \
+      && (N) == FP_RETURN + 2))
 
 /* 1 if N is a possible register number for function argument passing.
    We have no FP argument registers when soft-float.  When FP registers
@@ -3072,17 +3104,15 @@ typedef struct mips_args {
    assembler would use $at as a temp to load in the large offset.  In this
    case $at is already in use.  We convert such problem addresses to
    `la $5,s;sw $4,70000($5)' via LEGITIMIZE_ADDRESS.  */
-/* ??? SGI Irix 6 assembler fails for CONST address, so reject them
-   when !TARGET_GAS.  */
+/* ??? SGI IRIX 6 N32/N64 assembler fails for CONST address, so reject them
+   when !TARGET_GAS or ABI_32.  */
 /* We should be rejecting everything but const addresses.  */
 #define CONSTANT_ADDRESS_P(X)						\
   (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF		\
     || GET_CODE (X) == CONST_INT || GET_CODE (X) == HIGH		\
     || (GET_CODE (X) == CONST						\
 	&& ! (flag_pic && pic_address_needs_scratch (X))		\
-	&& (TARGET_GAS)							\
-	&& (mips_abi != ABI_N32 					\
-	    && mips_abi != ABI_64)))
+	&& (TARGET_GAS || mips_abi == ABI_32)))
 
 
 /* Define this, so that when PIC, reload won't try to reload invalid
@@ -4663,3 +4693,34 @@ while (0)
 
 /* Generate calls to memcpy, etc., not bcopy, etc.  */
 #define TARGET_MEM_FUNCTIONS
+
+#ifndef __mips16
+/* Since the bits of the _init and _fini function is spread across
+   many object files, each potentially with its own GP, we must assume
+   we need to load our GP.  We don't preserve $gp or $ra, since each
+   init/fini chunk is supposed to initialize $gp, and crti/crtn
+   already take care of preserving $ra and, when appropriate, $gp.  */
+#if _MIPS_SIM == _MIPS_SIM_ABI32
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+   asm (SECTION_OP "\n\
+	.set noreorder\n\
+	bal 1f\n\
+	nop\n\
+1:	.cpload $31\n\
+	.set reorder\n\
+	jal " USER_LABEL_PREFIX #FUNC "\n\
+	" TEXT_SECTION_ASM_OP);
+#endif /* Switch to #elif when we're no longer limited by K&R C.  */
+#if (defined _ABIN32 && _MIPS_SIM == _ABIN32) \
+   || (defined _ABI64 && _MIPS_SIM == _ABI64)
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+   asm (SECTION_OP "\n\
+	.set noreorder\n\
+	bal 1f\n\
+	nop\n\
+1:	.set reorder\n\
+	.cpsetup $31, $2, 1b\n\
+	jal " USER_LABEL_PREFIX #FUNC "\n\
+	" TEXT_SECTION_ASM_OP);
+#endif
+#endif
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index cc6cda53dba..4b35d9b5928 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -6413,16 +6413,16 @@ move\\t%0,%z4\\n\\
 }")
 
 (define_insn "movdf_internal1"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,f,R,To,*f,*d,*d,*d,*d,*R,*T")
-	(match_operand:DF 1 "general_operand" "f,R,To,fG,fG,*d,*f,*d*G,*R,*T,*d,*d"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand"  "=f,f,f,f,R,To,*f,*d,*d,*d,*d,*R,*T")
+       (match_operand:DF 1 "general_operand"  "f,G,R,To,fG,fG,*d,*f,*d*G,*R,*T,*d,*d"))]
   "TARGET_HARD_FLOAT && !(TARGET_FLOAT64 && !TARGET_64BIT)
    && TARGET_DOUBLE_FLOAT
    && (register_operand (operands[0], DFmode)
        || nonmemory_operand (operands[1], DFmode))"
   "* return mips_move_2words (operands, insn); "
-  [(set_attr "type"	"move,load,load,store,store,xfer,xfer,move,load,load,store,store")
-   (set_attr "mode"	"DF")
-   (set_attr "length"	"4,8,16,8,16,8,8,8,8,16,8,16")])
+  [(set_attr "type"    "move,move,load,load,store,store,xfer,xfer,move,load,load,store,store")
+   (set_attr "mode"    "DF")
+   (set_attr "length"  "4,8,8,16,8,16,8,8,8,8,16,8,16")])
 
 (define_insn "movdf_internal1a"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,R,R,To,To,*d,*d,*To,*R,*d")
@@ -6550,6 +6550,12 @@ move\\t%0,%z4\\n\\
 ;; fill a delay slot.  This also prevents a bug in delayed branches
 ;; from showing up, which reuses one of the registers in our clobbers.
 
+;; ??? Disabled because it doesn't preserve alias information for
+;; operands 0 and 1.  Also, the rtl for the second insn doesn't mention
+;; that it uses the registers clobbered by the first.
+;;
+;; It would probably be better to split the block into individual
+;; instructions instead.
 (define_split
   [(set (mem:BLK (match_operand:SI 0 "register_operand" ""))
 	(mem:BLK (match_operand:SI 1 "register_operand" "")))
@@ -6561,7 +6567,7 @@ move\\t%0,%z4\\n\\
    (use (match_operand:SI 3 "small_int" ""))
    (use (const_int 0))]
 
-  "reload_completed && !TARGET_DEBUG_D_MODE && INTVAL (operands[2]) > 0"
+  "reload_completed && 0 && INTVAL (operands[2]) > 0"
 
   ;; All but the last move
   [(parallel [(set (mem:BLK (match_dup 0))
@@ -10063,21 +10069,19 @@ ld\\t%2,%1-%S1(%2)\;daddu\\t%2,%2,$31\\n\\t%*j\\t%2"
 
 ;; This is used in compiling the unwind routines.
 (define_expand "eh_return"
-  [(use (match_operand 0 "general_operand" ""))
-   (use (match_operand 1 "general_operand" ""))]
+  [(use (match_operand 0 "general_operand" ""))]
   ""
   "
 {
   enum machine_mode gpr_mode = TARGET_64BIT ? DImode : SImode;
 
-  if (GET_MODE (operands[1]) != gpr_mode)
-    operands[1] = convert_to_mode (gpr_mode, operands[1], 0);
+  if (GET_MODE (operands[0]) != gpr_mode)
+    operands[0] = convert_to_mode (gpr_mode, operands[0], 0);
   if (TARGET_64BIT)
-    emit_insn (gen_eh_set_lr_di (operands[1]));
+    emit_insn (gen_eh_set_lr_di (operands[0]));
   else
-    emit_insn (gen_eh_set_lr_si (operands[1]));
+    emit_insn (gen_eh_set_lr_si (operands[0]));
 
-  emit_move_insn (EH_RETURN_STACKADJ_RTX, operands[0]);
   DONE;
 }")
 
diff --git a/gcc/config/mips/rtems.h b/gcc/config/mips/rtems.h
index ae27293e8ad..5573bcd5c53 100644
--- a/gcc/config/mips/rtems.h
+++ b/gcc/config/mips/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a MIPS using ELF.
-   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Joel Sherrill (joel@OARcorp.com).
 
 This file is part of GNU CC.
@@ -24,5 +24,6 @@ Boston, MA 02111-1307, USA.  */
 #define TARGET_OS_CPP_BUILTINS()	\
 do {					\
   builtin_define ("__rtems__");		\
+  builtin_define ("__USE_INIT_FINI__");	\
   builtin_assert ("system=rtems");	\
 } while (0)
diff --git a/gcc/config/mips/t-iris5-6 b/gcc/config/mips/t-iris5-6
index d0440bc5ca1..4321b526c41 100644
--- a/gcc/config/mips/t-iris5-6
+++ b/gcc/config/mips/t-iris5-6
@@ -20,11 +20,11 @@ SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
 # $(slibdir) double quoted to protect it from expansion while building
 # libgcc.mk.  We want this delayed until actual install time.
 SHLIB_INSTALL = \
-	$$(SHELL) $$(srcdir)/mkinstalldirs $$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+	$$(SHELL) $$(srcdir)/mkinstalldirs $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
 	$(INSTALL_DATA) $(SHLIB_NAME) \
-	  $$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
-	rm -f $$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
 	$(LN_S) $(SHLIB_SONAME) \
-	  $$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
 SHLIB_MKMAP = $(srcdir)/mkmap-flat.awk
 SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver
diff --git a/gcc/config/mips/t-iris6 b/gcc/config/mips/t-iris6
index 1d61c244bf1..a1be0b9c9ed 100644
--- a/gcc/config/mips/t-iris6
+++ b/gcc/config/mips/t-iris6
@@ -18,3 +18,16 @@ CRTSTUFF_T_CFLAGS=-g1
 # This is only needed in the static libgcc as a band-aid until gcc correctly
 # implements the N32/N64 ABI structure passing conventions
 LIB2FUNCS_STATIC_EXTRA = $(srcdir)/config/mips/irix6-libc-compat.c
+
+LIB2FUNCS_EXTRA = $(srcdir)/config/mips/_tilib.c
+
+TPBIT = tp-bit.c
+
+tp-bit.c: $(srcdir)/config/fp-bit.c
+	echo '#ifdef __MIPSEL__' > tp-bit.c
+	echo '# define FLOAT_BIT_ORDER_MISMATCH' >> tp-bit.c
+	echo '#endif' >> tp-bit.c
+	echo '#if __LDBL_MANT_DIG__ == 106' >> tp-bit.c
+	echo '# define TFLOAT' >> tp-bit.c
+	cat $(srcdir)/config/fp-bit.c >> tp-bit.c
+	echo '#endif' >> tp-bit.c
diff --git a/gcc/config/mn10300/mn10300-protos.h b/gcc/config/mn10300/mn10300-protos.h
index d5a1c4426c6..537a22d707d 100644
--- a/gcc/config/mn10300/mn10300-protos.h
+++ b/gcc/config/mn10300/mn10300-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler. Matsushita MN10300 series
-   Copyright (C) 2000 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2003 Free Software Foundation, Inc.
    Contributed by Jeff Law (law@cygnus.com).
 
 This file is part of GNU CC.
@@ -42,6 +42,8 @@ extern int impossible_plus_operand PARAMS ((rtx, enum machine_mode));
 extern int const_8bit_operand PARAMS ((rtx, enum machine_mode));
 
 extern int mn10300_address_cost PARAMS ((rtx, int *));
+
+extern bool mn10300_wide_const_load_uses_clr PARAMS ((rtx operands[2]));
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
@@ -59,4 +61,3 @@ extern void expand_epilogue PARAMS ((void));
 extern int initial_offset PARAMS ((int, int));
 extern int can_use_return_insn PARAMS ((void));
 extern int mask_ok_for_mem_btst PARAMS ((int, int));
-
diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c
index 3bad60b7475..c580457a2b5 100644
--- a/gcc/config/mn10300/mn10300.c
+++ b/gcc/config/mn10300/mn10300.c
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.c for Matsushita MN10300 series
-   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
    Free Software Foundation, Inc.
    Contributed by Jeff Law (law@cygnus.com).
 
@@ -1322,3 +1322,51 @@ mn10300_address_cost (x, unsig)
 
     }
 }
+
+/* Check whether a constant used to initialize a DImode or DFmode can
+   use a clr instruction.  The code here must be kept in sync with
+   movdf and movdi.  */
+
+bool
+mn10300_wide_const_load_uses_clr (operands)
+     rtx operands[2];
+{
+  long val[2];
+
+  if (GET_CODE (operands[0]) != REG
+      || REGNO_REG_CLASS (REGNO (operands[0])) != DATA_REGS)
+    return false;
+
+  switch (GET_CODE (operands[1]))
+    {
+    case CONST_INT:
+      {
+	rtx low, high;
+	split_double (operands[1], &low, &high);
+	val[0] = INTVAL (low);
+	val[1] = INTVAL (high);
+      }
+      break;
+      
+    case CONST_DOUBLE:
+      if (GET_MODE (operands[1]) == DFmode)
+	{
+	  REAL_VALUE_TYPE rv;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
+	  REAL_VALUE_TO_TARGET_DOUBLE (rv, val);
+	}
+      else if (GET_MODE (operands[1]) == VOIDmode
+	       || GET_MODE (operands[1]) == DImode)
+	{
+	  val[0] = CONST_DOUBLE_LOW (operands[1]);
+	  val[1] = CONST_DOUBLE_HIGH (operands[1]);
+	}
+      break;
+      
+    default:
+      return false;
+    }
+
+  return val[0] == 0 || val[1] == 0;
+}
diff --git a/gcc/config/mn10300/mn10300.md b/gcc/config/mn10300/mn10300.md
index 770f7c5428e..f889aa61cdd 100644
--- a/gcc/config/mn10300/mn10300.md
+++ b/gcc/config/mn10300/mn10300.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for Matsushita MN10300
-;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002
+;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
 ;; Free Software Foundation, Inc.
 ;; Contributed by Jeff Law (law@cygnus.com).
 
@@ -520,7 +520,26 @@
       abort ();
     }
 }"
-  [(set_attr "cc" "none,none,clobber,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")])
+  [(set (attr "cc")
+	(cond
+	 [
+	 (lt (symbol_ref "which_alternative") (const_int 2)
+	     ) (const_string "none")
+	 (eq (symbol_ref "which_alternative") (const_int 2)
+	     ) (const_string "clobber")
+	 (eq (symbol_ref "which_alternative") (const_int 3)
+	     ) (if_then_else
+		(ne (symbol_ref "rtx_equal_p (operands[0], operands[1])")
+		    (const_int 0)) (const_string "clobber")
+		    (const_string "none_0hit"))
+	 (ior (eq (symbol_ref "which_alternative") (const_int 8))
+	      (eq (symbol_ref "which_alternative") (const_int 9))
+	      ) (if_then_else
+		 (ne (symbol_ref "mn10300_wide_const_load_uses_clr
+				  (operands)")
+		     (const_int 0)) (const_string "clobber")
+		     (const_string "none_0hit"))
+	 ] (const_string "none_0hit")))])
 
 (define_expand "movdf"
   [(set (match_operand:DF 0 "general_operand" "")
@@ -670,7 +689,26 @@
       abort ();
     }
 }"
-  [(set_attr "cc" "none,none,clobber,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit,none_0hit")])
+  [(set (attr "cc")
+	(cond
+	 [
+	 (lt (symbol_ref "which_alternative") (const_int 2)
+	     ) (const_string "none")
+	 (eq (symbol_ref "which_alternative") (const_int 2)
+	     ) (const_string "clobber")
+	 (eq (symbol_ref "which_alternative") (const_int 3)
+	     ) (if_then_else
+		(ne (symbol_ref "rtx_equal_p (operands[0], operands[1])")
+		    (const_int 0)) (const_string "clobber")
+		    (const_string "none_0hit"))
+	 (ior (eq (symbol_ref "which_alternative") (const_int 8))
+	      (eq (symbol_ref "which_alternative") (const_int 9))
+	      ) (if_then_else
+		 (ne (symbol_ref "mn10300_wide_const_load_uses_clr
+				  (operands)")
+		     (const_int 0)) (const_string "clobber")
+		     (const_string "none_0hit"))
+	 ] (const_string "none_0hit")))])
 
 
 
diff --git a/gcc/config/netbsd.h b/gcc/config/netbsd.h
index 43ff80ae686..04413899621 100644
--- a/gcc/config/netbsd.h
+++ b/gcc/config/netbsd.h
@@ -1,5 +1,5 @@
 /* Base configuration file for all NetBSD targets.
-   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002
+   Copyright (C) 1997, 1998, 1999, 2000, 2001, 2002, 2003
    Free Software Foundation, Inc.
 
 This file is part of GNU CC.
@@ -38,7 +38,9 @@ Boston, MA 02111-1307, USA.  */
   while (0)
 
 /* CPP_SPEC parts common to all NetBSD targets.  */
-#define NETBSD_CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+#define NETBSD_CPP_SPEC				\
+  "%{posix:-D_POSIX_SOURCE} \
+   %{pthread:-D_REENTRANT -D_PTHREADS}"
 
 /* NETBSD_NATIVE is defined when gcc is integrated into the NetBSD
    source tree so it can be configured appropriately without using
diff --git a/gcc/config/pa/milli64.S b/gcc/config/pa/milli64.S
index 2190a36e2c9..99d2d2e12a3 100644
--- a/gcc/config/pa/milli64.S
+++ b/gcc/config/pa/milli64.S
@@ -1776,7 +1776,7 @@ DISCUSSION:
 	.align 16
 	.proc
 	.callinfo millicode
-	.export $$mulI, millicode
+	.export $$mulI,millicode
 GSYM($$mulI)	
 	combt,<<=	a1,a0,LREF(l4)	/* swap args if unsigned a1>a0 */
 	copy		0,r		/* zero out the result */
diff --git a/gcc/config/pa/pa-hpux.h b/gcc/config/pa/pa-hpux.h
index b9d563e09be..d6e15164013 100644
--- a/gcc/config/pa/pa-hpux.h
+++ b/gcc/config/pa/pa-hpux.h
@@ -96,3 +96,12 @@ Boston, MA 02111-1307, USA.  */
 /* hpux8 and later have C++ compatible include files, so do not
    pretend they are `extern "C"'.  */
 #define NO_IMPLICIT_EXTERN_C
+
+/* hpux11 and earlier don't have fputc_unlocked, so we must inhibit the
+   transformation of fputs_unlocked and fprintf_unlocked to fputc_unlocked.  */
+#define DONT_HAVE_FPUTC_UNLOCKED
+
+/* We want the entry value of SP saved in the frame marker for
+   compatibility with the HP-UX unwind library.  */
+#undef TARGET_HPUX_UNWIND_LIBRARY
+#define TARGET_HPUX_UNWIND_LIBRARY 1
diff --git a/gcc/config/pa/pa-hpux10.h b/gcc/config/pa/pa-hpux10.h
index 0354543e3d0..9cd7c1b9de7 100644
--- a/gcc/config/pa/pa-hpux10.h
+++ b/gcc/config/pa/pa-hpux10.h
@@ -82,10 +82,12 @@ Boston, MA 02111-1307, USA.  */
 #define LIB_SPEC \
   "%{!shared:\
      %{!p:%{!pg:\
-       %{!threads:-lc}\
+       %{!threads:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}\
        %{threads:-lcma -lc_r}}}\
-     %{p: -L/lib/libp/ -lc}\
-     %{pg: -L/lib/libp/ -lc}}"
+     %{p: -L/lib/libp/ -lc\
+       %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}\
+     %{pg: -L/lib/libp/ -lc\
+       %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}"
 
 #undef THREAD_MODEL_SPEC
 #define THREAD_MODEL_SPEC "%{!threads:single}%{threads:dce}"
diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h
index 0bc7f1010a5..23ec45ba822 100644
--- a/gcc/config/pa/pa-hpux11.h
+++ b/gcc/config/pa/pa-hpux11.h
@@ -72,10 +72,13 @@ Boston, MA 02111-1307, USA.  */
 #undef LINK_SPEC
 #if ((TARGET_DEFAULT | TARGET_CPU_DEFAULT) & 1)
 #define LINK_SPEC \
-  "%{!mpa-risc-1-0:%{!shared:-L/lib/pa1.1 -L/usr/lib/pa1.1 }} -z %{mlinker-opt:-O} %{!shared:-u main} %{static:-a archive} %{shared:-b}"
+  "%{!mpa-risc-1-0:%{!shared:-L/lib/pa1.1 -L/usr/lib/pa1.1 }} -z\
+   %{mlinker-opt:-O} %{!shared:-u main -u __gcc_plt_call}\
+   %{static:-a archive} %{shared:-b}"
 #else
 #define LINK_SPEC \
-  "-z %{mlinker-opt:-O} %{!shared:-u main} %{static:-a archive} %{shared:-b}"
+  "-z %{mlinker-opt:-O} %{!shared:-u main -u __gcc_plt_call}\
+   %{static:-a archive} %{shared:-b}"
 #endif
 
 /* Like the default, except no -lg.  */
@@ -83,10 +86,12 @@ Boston, MA 02111-1307, USA.  */
 #define LIB_SPEC \
   "%{!shared:\
      %{!p:%{!pg:\
-       %{!threads:-lc}\
+       %{!threads:-lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}\
        %{threads:-lcma -lc_r}}}\
-     %{p: -L/lib/libp/ -lc}\
-     %{pg: -L/lib/libp/ -lc}}"
+     %{p: -L/lib/libp/ -lc\
+       %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}\
+     %{pg: -L/lib/libp/ -lc\
+       %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}"
 
 /* Under hpux11, the normal location of the `ld' and `as' programs is the
    /usr/ccs/bin directory.  */
@@ -117,3 +122,21 @@ Boston, MA 02111-1307, USA.  */
 
 #define SIZE_TYPE "long unsigned int"
 #define PTRDIFF_TYPE "long int"
+
+/* HP-UX 11.0 and above provides initialization and finalization function
+   support from linker command line.  We don't need to invoke __main to run
+   constructors.  We also don't need chatr to determine the dependencies of
+   dynamically linked executables and shared libraries.  */
+#undef LDD_SUFFIX
+#undef PARSE_LDD_OUTPUT
+#undef HAS_INIT_SECTION
+#define HAS_INIT_SECTION 1
+#undef LD_INIT_SWITCH
+#define LD_INIT_SWITCH "+init"
+#undef LD_FINI_SWITCH
+#define LD_FINI_SWITCH "+fini"
+
+/* The HP-UX 11.X SOM linker (ld32) can successfully link shared libraries
+   with secondary definition (weak) symbols.  */
+#undef TARGET_SOM_SDEF
+#define TARGET_SOM_SDEF 1
diff --git a/gcc/config/pa/pa-protos.h b/gcc/config/pa/pa-protos.h
index 8ea36f3a44e..626ac1a7682 100644
--- a/gcc/config/pa/pa-protos.h
+++ b/gcc/config/pa/pa-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for pa.c functions used in the md file & elsewhere.
-   Copyright (C) 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 
 This file is part of GNU CC.
 
@@ -44,6 +44,7 @@ extern const char *output_move_double PARAMS ((rtx *));
 extern const char *output_fp_move_double PARAMS ((rtx *));
 extern const char *output_block_move PARAMS ((rtx *, int));
 extern const char *output_cbranch PARAMS ((rtx *, int, int, int, rtx));
+extern const char *output_lbranch PARAMS ((rtx, rtx));
 extern const char *output_bb PARAMS ((rtx *, int, int, int, rtx, int));
 extern const char *output_bvb PARAMS ((rtx *, int, int, int, rtx, int));
 extern const char *output_dbra PARAMS ((rtx *, rtx, int));
@@ -51,6 +52,7 @@ extern const char *output_movb PARAMS ((rtx *, rtx, int, int));
 extern const char *output_parallel_movb PARAMS ((rtx *, int));
 extern const char *output_parallel_addb PARAMS ((rtx *, int));
 extern const char *output_call PARAMS ((rtx, rtx, int));
+extern const char *output_indirect_call PARAMS ((rtx, rtx));
 extern const char *output_millicode_call PARAMS ((rtx, rtx));
 extern const char *output_mul_insn PARAMS ((int, rtx));
 extern const char *output_div_insn PARAMS ((rtx *, int, rtx));
@@ -105,8 +107,10 @@ extern int jump_in_call_delay PARAMS ((rtx));
 extern enum reg_class secondary_reload_class PARAMS ((enum reg_class,
 						      enum machine_mode, rtx));
 extern int hppa_fpstore_bypass_p PARAMS ((rtx, rtx));
-extern int attr_length_millicode_call PARAMS ((rtx, int));
+extern int attr_length_millicode_call PARAMS ((rtx));
 extern int attr_length_call PARAMS ((rtx, int));
+extern int attr_length_indirect_call PARAMS ((rtx));
+extern int attr_length_save_restore_dltp PARAMS ((rtx));
 
 /* Declare functions defined in pa.c and used in templates.  */
 
@@ -157,7 +161,7 @@ extern int cmpib_comparison_operator PARAMS ((rtx, enum machine_mode));
 extern int reloc_needed PARAMS ((tree));
 #ifdef RTX_CODE
 extern rtx function_arg PARAMS ((CUMULATIVE_ARGS *, enum machine_mode,
-				 tree, int, int));
+				 tree, int));
 extern rtx function_value PARAMS ((tree, tree));
 #endif
 extern int function_arg_partial_nregs PARAMS ((CUMULATIVE_ARGS *,
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 0d5f111a41a..a81d06e31c6 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -1,6 +1,6 @@
 /* Subroutines for insn-output.c for HPPA.
    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-   2002 Free Software Foundation, Inc.
+   2002, 2003 Free Software Foundation, Inc.
    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
 
 This file is part of GNU CC.
@@ -121,6 +121,11 @@ static void pa_globalize_label PARAMS ((FILE *, const char *))
      ATTRIBUTE_UNUSED;
 static void pa_asm_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
 					    HOST_WIDE_INT, tree));
+#if !defined(USE_COLLECT2)
+static void pa_asm_out_constructor PARAMS ((rtx, int));
+static void pa_asm_out_destructor PARAMS ((rtx, int));
+#endif
+static void pa_init_builtins PARAMS ((void));
 static void copy_fp_args PARAMS ((rtx)) ATTRIBUTE_UNUSED;
 static int length_fp_args PARAMS ((rtx)) ATTRIBUTE_UNUSED;
 static struct deferred_plabel *get_plabel PARAMS ((const char *))
@@ -203,6 +208,16 @@ static size_t n_deferred_plabels = 0;
 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
 
+#if !defined(USE_COLLECT2)
+#undef TARGET_ASM_CONSTRUCTOR
+#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
+#undef TARGET_ASM_DESTRUCTOR
+#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
+#endif
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS pa_init_builtins
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 void
@@ -314,6 +329,14 @@ override_options ()
     }
 }
 
+static void
+pa_init_builtins ()
+{
+#ifdef DONT_HAVE_FPUTC_UNLOCKED
+  built_in_decls[(int) BUILT_IN_FPUTC_UNLOCKED] = NULL_TREE;
+#endif
+}
+
 /* Return nonzero only if OP is a register of mode MODE,
    or CONST0_RTX.  */
 int
@@ -3132,12 +3155,13 @@ compute_frame_size (size, fregs_live)
   fsize += current_function_outgoing_args_size;
 
   /* Allocate space for the fixed frame marker.  This space must be
-     allocated for any function that makes calls or otherwise allocates
+     allocated for any function that makes calls or allocates
      stack space.  */
   if (!current_function_is_leaf || fsize)
-    fsize += TARGET_64BIT ? 16 : 32;
+    fsize += TARGET_64BIT ? 48 : 32;
 
-  return (fsize + STACK_BOUNDARY - 1) & ~(STACK_BOUNDARY - 1);
+  return ((fsize + PREFERRED_STACK_BOUNDARY / 8 - 1)
+	  & ~(PREFERRED_STACK_BOUNDARY / 8 - 1));
 }
 
 /* Generate the assembly code for function entry.  FILE is a stdio
@@ -3177,6 +3201,15 @@ pa_output_function_prologue (file, size)
   else
     fputs (",NO_CALLS", file);
 
+  /* The SAVE_SP flag is used to indicate that register %r3 is stored
+     at the beginning of the frame and that it is used as the frame
+     pointer for the frame.  We do this because our current frame
+     layout doesn't conform to that specified in the the HP runtime
+     documentation and we need a way to indicate to programs such as
+     GDB where %r3 is saved.  The SAVE_SP flag was chosen because it
+     isn't used by HP compilers but is supported by the assembler.
+     However, SAVE_SP is supposed to indicate that the previous stack
+     pointer has been saved in the frame marker.  */
   if (frame_pointer_needed)
     fputs (",SAVE_SP", file);
 
@@ -3196,26 +3229,6 @@ pa_output_function_prologue (file, size)
 
   fputs ("\n\t.ENTRY\n", file);
 
-  /* If we're using GAS and SOM, and not using the portable runtime model,
-     or function sections, then we don't need to accumulate the total number
-     of code bytes.  */
-  if ((TARGET_GAS && TARGET_SOM && ! TARGET_PORTABLE_RUNTIME)
-      || flag_function_sections)
-    total_code_bytes = 0;
-  else if (INSN_ADDRESSES_SET_P ())
-    {
-      unsigned long old_total = total_code_bytes;
-
-      total_code_bytes += INSN_ADDRESSES (INSN_UID (get_last_nonnote_insn ()));
-      total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
-
-      /* Be prepared to handle overflows.  */
-      if (old_total > total_code_bytes)
-	total_code_bytes = -1;
-    }
-  else
-    total_code_bytes = -1;
-
   remove_useless_addtr_insns (get_insns (), 0);
 }
 
@@ -3297,11 +3310,32 @@ hppa_expand_prologue ()
 			      adjust2, 1);
 	    }
 
-	  /* Prevent register spills from being scheduled before the
-	     stack pointer is raised.  Necessary as we will be storing
-	     registers using the frame pointer as a base register, and
-	     we happen to set fp before raising sp.  */
-	  emit_insn (gen_blockage ());
+	  /* We set SAVE_SP in frames that need a frame pointer.  Thus,
+	     we need to store the previous stack pointer (frame pointer)
+	     into the frame marker on targets that use the HP unwind
+	     library.  This allows the HP unwind library to be used to
+	     unwind GCC frames.  However, we are not fully compatible
+	     with the HP library because our frame layout differs from
+	     that specified in the HP runtime specification.
+
+	     We don't want a frame note on this instruction as the frame
+	     marker moves during dynamic stack allocation.
+
+	     This instruction also serves as a blockage to prevent
+	     register spills from being scheduled before the stack
+	     pointer is raised.  This is necessary as we store
+	     registers using the frame pointer as a base register,
+	     and the frame pointer is set before sp is raised.  */
+	  if (TARGET_HPUX_UNWIND_LIBRARY)
+	    {
+	      rtx addr = gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+				       GEN_INT (TARGET_64BIT ? -8 : -4));
+
+	      emit_move_insn (gen_rtx_MEM (word_mode, addr),
+			      frame_pointer_rtx);
+	    }
+	  else
+	    emit_insn (gen_blockage ());
 	}
       /* no frame pointer needed.  */
       else
@@ -3537,6 +3571,7 @@ pa_output_function_epilogue (file, size)
      FILE *file;
      HOST_WIDE_INT size ATTRIBUTE_UNUSED;
 {
+  int last_address = 0;
   rtx insn = get_last_insn ();
 
   /* hppa_expand_epilogue does the dirty work now.  We just need
@@ -3559,9 +3594,36 @@ pa_output_function_epilogue (file, size)
   /* If insn is a CALL_INSN, then it must be a call to a volatile
      function (otherwise there would be epilogue insns).  */
   if (insn && GET_CODE (insn) == CALL_INSN)
-    fputs ("\tnop\n", file);
+    {
+      fputs ("\tnop\n", file);
+      last_address += 4;
+    }
 
   fputs ("\t.EXIT\n\t.PROCEND\n", file);
+
+  /* Finally, update the total number of code bytes output so far.  */
+  if ((TARGET_PORTABLE_RUNTIME || !TARGET_GAS || !TARGET_SOM)
+      && !flag_function_sections)
+    {
+      if (INSN_ADDRESSES_SET_P ())
+	{
+	  unsigned long old_total = total_code_bytes;
+
+	  insn = get_last_nonnote_insn ();
+	  last_address += INSN_ADDRESSES (INSN_UID (insn));
+	  if (INSN_P (insn))
+	    last_address += insn_default_length (insn);
+
+	  total_code_bytes += last_address;
+	  total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
+
+	  /* Be prepared to handle overflows.  */
+	  if (old_total > total_code_bytes)
+	    total_code_bytes = -1;
+	}
+      else
+	total_code_bytes = -1;
+    }
 }
 
 void
@@ -4784,7 +4846,9 @@ output_deferred_plabels (file)
   /* Now output the deferred plabels.  */
   for (i = 0; i < n_deferred_plabels; i++)
     {
-      ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label));
+      rtx label = deferred_plabels[i].internal_label;
+
+      ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (label));
       assemble_integer (gen_rtx_SYMBOL_REF (Pmode, deferred_plabels[i].name),
 			TARGET_64BIT ? 8 : 4, TARGET_64BIT ? 64 : 32, 1);
     }
@@ -5370,14 +5434,19 @@ output_cbranch (operands, nullify, length, negated, insn)
 {
   static char buf[100];
   int useskip = 0;
+  rtx xoperands[5];
 
-  /* A conditional branch to the following instruction (eg the delay slot) is
-     asking for a disaster.  This can happen when not optimizing.
-
-     In such cases it is safe to emit nothing.  */
+  /* A conditional branch to the following instruction (eg the delay slot)
+     is asking for a disaster.  This can happen when not optimizing and
+     when jump optimization fails.
 
-  if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
-    return "";
+     While it is usually safe to emit nothing, this can fail if the
+     preceding instruction is a nullified branch with an empty delay
+     slot and the same branch target as this branch.  We could check
+     for this but jump optimization should eliminate nop jumps.  It
+     is always safe to emit a nop.  */
+  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
+    return "nop";
 
   /* If this is a long branch with its delay slot unfilled, set `nullify'
      as it can nullify the delay slot and save a nop.  */
@@ -5475,98 +5544,182 @@ output_cbranch (operands, nullify, length, negated, insn)
 	break;
 
       case 20:
-	/* Very long branch.  Right now we only handle these when not
-	   optimizing.  See "jump" pattern in pa.md for details.  */
-	if (optimize)
-	  abort ();
+      case 28:
+	xoperands[0] = operands[0];
+	xoperands[1] = operands[1];
+	xoperands[2] = operands[2];
+	xoperands[3] = operands[3];
+
+	/* The reversed conditional branch must branch over one additional
+	   instruction if the delay slot is filled.  If the delay slot
+	   is empty, the instruction after the reversed condition branch
+	   must be nullified.  */
+	nullify = dbr_sequence_length () == 0;
+	xoperands[4] = nullify ? GEN_INT (length) : GEN_INT (length + 4);
 
 	/* Create a reversed conditional branch which branches around
 	   the following insns.  */
-	if (negated)
-	  strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+20|cmp%I2b,%S3,n %2,%r1,.+20}");
+	if (GET_MODE (operands[1]) != DImode)
+	  {
+	    if (nullify)
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,%S3,n %2,%r1,.+%4|cmp%I2b,%S3,n %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,%B3,n %2,%r1,.+%4|cmp%I2b,%B3,n %2,%r1,.+%4}");
+	      }
+	    else
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,%S3 %2,%r1,.+%4|cmp%I2b,%S3 %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,%B3 %2,%r1,.+%4|cmp%I2b,%B3 %2,%r1,.+%4}");
+	      }
+	  }
 	else
-	  strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+20|cmp%I2b,%B3,n %2,%r1,.+20}");
-	if (GET_MODE (operands[1]) == DImode)
 	  {
-	    if (negated)
-	      strcpy (buf,
-		      "{com%I2b,*%S3,n %2,%r1,.+20|cmp%I2b,*%S3,n %2,%r1,.+20}");
+	    if (nullify)
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,*%S3,n %2,%r1,.+%4|cmp%I2b,*%S3,n %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,*%B3,n %2,%r1,.+%4|cmp%I2b,*%B3,n %2,%r1,.+%4}");
+	      }
 	    else
-	      strcpy (buf,
-		      "{com%I2b,*%B3,n %2,%r1,.+20|cmp%I2b,*%B3,n %2,%r1,.+20}");
+	      {
+		if (negated)
+		  strcpy (buf,
+		    "{com%I2b,*%S3 %2,%r1,.+%4|cmp%I2b,*%S3 %2,%r1,.+%4}");
+		else
+		  strcpy (buf,
+		    "{com%I2b,*%B3 %2,%r1,.+%4|cmp%I2b,*%B3 %2,%r1,.+%4}");
+	      }
 	  }
-	output_asm_insn (buf, operands);
 
-	/* Output an insn to save %r1.  */
-	output_asm_insn ("stw %%r1,-16(%%r30)", operands);
+	output_asm_insn (buf, xoperands);
+	return output_lbranch (operands[0], insn);
 
-	/* Now output a very long branch to the original target.  */
-	output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands);
+      default:
+	abort ();
+    }
+  return buf;
+}
 
-	/* Now restore the value of %r1 in the delay slot.  We're not
-	   optimizing so we know nothing else can be in the delay slot.  */
-	return "ldw -16(%%r30),%%r1";
+/* This routine handles long unconditional branches that exceed the
+   maximum range of a simple branch instruction.  */
 
-      case 28:
-	/* Very long branch when generating PIC code.  Right now we only
-	   handle these when not optimizing.  See "jump" pattern in pa.md
-	   for details.  */
-	if (optimize)
-	  abort ();
+const char *
+output_lbranch (dest, insn)
+     rtx dest, insn;
+{
+  rtx xoperands[2];
+ 
+  xoperands[0] = dest;
 
-	/* Create a reversed conditional branch which branches around
-	   the following insns.  */
-	if (negated)
-	  strcpy (buf, "{com%I2b,%S3,n %2,%r1,.+28|cmp%I2b,%S3,n %2,%r1,.+28}");
-	else
-	  strcpy (buf, "{com%I2b,%B3,n %2,%r1,.+28|cmp%I2b,%B3,n %2,%r1,.+28}");
-	if (GET_MODE (operands[1]) == DImode)
-	  {
-	    if (negated)
-	      strcpy (buf, "{com%I2b,*%S3,n %2,%r1,.+28|cmp%I2b,*%S3,n %2,%r1,.+28}");
-	    else
-	      strcpy (buf, "{com%I2b,*%B3,n %2,%r1,.+28|cmp%I2b,*%B3,n %2,%r1,.+28}");
-	  }
-	output_asm_insn (buf, operands);
+  /* First, free up the delay slot.  */
+  if (dbr_sequence_length () != 0)
+    {
+      /* We can't handle a jump in the delay slot.  */
+      if (GET_CODE (NEXT_INSN (insn)) == JUMP_INSN)
+	abort ();
 
-	/* Output an insn to save %r1.  */
-	output_asm_insn ("stw %%r1,-16(%%r30)", operands);
+      final_scan_insn (NEXT_INSN (insn), asm_out_file,
+		       optimize, 0, 0);
+
+      /* Now delete the delay insn.  */
+      PUT_CODE (NEXT_INSN (insn), NOTE);
+      NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED;
+      NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0;
+    }
+
+  /* Output an insn to save %r1.  The runtime documentation doesn't
+     specify whether the "Clean Up" slot in the callers frame can
+     be clobbered by the callee.  It isn't copied by HP's builtin
+     alloca, so this suggests that it can be clobbered if necessary.
+     The "Static Link" location is copied by HP builtin alloca, so
+     we avoid using it.  Using the cleanup slot might be a problem
+     if we have to interoperate with languages that pass cleanup
+     information.  However, it should be possible to handle these
+     situations with GCC's asm feature.
+
+     The "Current RP" slot is reserved for the called procedure, so
+     we try to use it when we don't have a frame of our own.  It's
+     rather unlikely that we won't have a frame when we need to emit
+     a very long branch.
+
+     Really the way to go long term is a register scavenger; goto
+     the target of the jump and find a register which we can use
+     as a scratch to hold the value in %r1.  Then, we wouldn't have
+     to free up the delay slot or clobber a slot that may be needed
+     for other purposes.  */
+  if (TARGET_64BIT)
+    {
+      if (actual_fsize == 0 && !regs_ever_live[2])
+	/* Use the return pointer slot in the frame marker.  */
+	output_asm_insn ("std %%r1,-16(%%r30)", xoperands);
+      else
+	/* Use the slot at -40 in the frame marker since HP builtin
+	   alloca doesn't copy it.  */
+	output_asm_insn ("std %%r1,-40(%%r30)", xoperands);
+    }
+  else
+    {
+      if (actual_fsize == 0 && !regs_ever_live[2])
+	/* Use the return pointer slot in the frame marker.  */
+	output_asm_insn ("stw %%r1,-20(%%r30)", xoperands);
+      else
+	/* Use the "Clean Up" slot in the frame marker.  In GCC,
+	   the only other use of this location is for copying a
+	   floating point double argument from a floating-point
+	   register to two general registers.  The copy is done
+	   as an "atomic" operation when outputing a call, so it
+	   won't interfere with our using the location here.  */
+	output_asm_insn ("stw %%r1,-12(%%r30)", xoperands);
+    }
 
-	/* Now output a very long PIC branch to the original target.  */
+  if (flag_pic)
+    {
+      output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+      if (TARGET_SOM || !TARGET_GAS)
 	{
-	  rtx xoperands[5];
-
-	  xoperands[0] = operands[0];
-	  xoperands[1] = operands[1];
-	  xoperands[2] = operands[2];
-	  xoperands[3] = operands[3];
-
-	  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
-	  if (TARGET_SOM || !TARGET_GAS)
-	    {
-	      xoperands[4] = gen_label_rtx ();
-	      output_asm_insn ("addil L'%l0-%l4,%%r1", xoperands);
-	      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
-					 CODE_LABEL_NUMBER (xoperands[4]));
-	      output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1", xoperands);
-	    }
-	  else
-	    {
-	      output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
-	      output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1",
-			       xoperands);
-	    }
-	  output_asm_insn ("bv %%r0(%%r1)", xoperands);
+	  xoperands[1] = gen_label_rtx ();
+	  output_asm_insn ("addil L'%l0-%l1,%%r1", xoperands);
+	  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+				     CODE_LABEL_NUMBER (xoperands[1]));
+	  output_asm_insn ("ldo R'%l0-%l1(%%r1),%%r1", xoperands);
 	}
+      else
+	{
+	  output_asm_insn ("addil L'%l0-$PIC_pcrel$0+4,%%r1", xoperands);
+	  output_asm_insn ("ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1", xoperands);
+	}
+      output_asm_insn ("bv %%r0(%%r1)", xoperands);
+    }
+  else
+    /* Now output a very long branch to the original target.  */
+    output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", xoperands);
 
-	/* Now restore the value of %r1 in the delay slot.  We're not
-	   optimizing so we know nothing else can be in the delay slot.  */
-	return "ldw -16(%%r30),%%r1";
-
-      default:
-	abort ();
+  /* Now restore the value of %r1 in the delay slot.  */
+  if (TARGET_64BIT)
+    {
+      if (actual_fsize == 0 && !regs_ever_live[2])
+	return "ldd -16(%%r30),%%r1";
+      else
+	return "ldd -40(%%r30),%%r1";
+    }
+  else
+    {
+      if (actual_fsize == 0 && !regs_ever_live[2])
+	return "ldw -20(%%r30),%%r1";
+      else
+	return "ldw -12(%%r30),%%r1";
     }
-  return buf;
 }
 
 /* This routine handles all the branch-on-bit conditional branch sequences we
@@ -5589,8 +5742,8 @@ output_bb (operands, nullify, length, negated, insn, which)
      is only used when optimizing; jump optimization should eliminate the
      jump.  But be prepared just in case.  */
 
-  if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
-    return "";
+  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
+    return "nop";
 
   /* If this is a long branch with its delay slot unfilled, set `nullify'
      as it can nullify the delay slot and save a nop.  */
@@ -5737,8 +5890,8 @@ output_bvb (operands, nullify, length, negated, insn, which)
      is only used when optimizing; jump optimization should eliminate the
      jump.  But be prepared just in case.  */
 
-  if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
-    return "";
+  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
+    return "nop";
 
   /* If this is a long branch with its delay slot unfilled, set `nullify'
      as it can nullify the delay slot and save a nop.  */
@@ -5878,7 +6031,7 @@ output_dbra (operands, insn, which_alternative)
   /* A conditional branch to the following instruction (eg the delay slot) is
      asking for a disaster.  Be prepared!  */
 
-  if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
+  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
     {
       if (which_alternative == 0)
 	return "ldo %1(%0),%0";
@@ -5985,7 +6138,7 @@ output_movb (operands, insn, which_alternative, reverse_comparison)
   /* A conditional branch to the following instruction (eg the delay slot) is
      asking for a disaster.  Be prepared!  */
 
-  if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn))
+  if (next_real_insn (JUMP_LABEL (insn)) == next_real_insn (insn))
     {
       if (which_alternative == 0)
 	return "copy %1,%0";
@@ -6161,37 +6314,42 @@ length_fp_args (insn)
   return length;
 }
 
-/* We include the delay slot in the returned length as it is better to
+/* Return the attribute length for the millicode call instruction INSN.
+   The length must match the code generated by output_millicode_call.
+   We include the delay slot in the returned length as it is better to
    over estimate the length than to under estimate it.  */
 
 int
-attr_length_millicode_call (insn, length)
+attr_length_millicode_call (insn)
      rtx insn;
-     int length;
 {
-  unsigned long distance = total_code_bytes + INSN_ADDRESSES (INSN_UID (insn));
+  unsigned long distance = -1;
 
-  if (distance < total_code_bytes)
-    distance = -1;
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      distance = (total_code_bytes + insn_current_reference_address (insn));
+      if (distance < total_code_bytes)
+	distance = -1;
+    }
 
   if (TARGET_64BIT)
     {
       if (!TARGET_LONG_CALLS && distance < 7600000)
-	return length + 8;
+	return 8;
 
-      return length + 20;
+      return 20;
     }
   else if (TARGET_PORTABLE_RUNTIME)
-    return length + 24;
+    return 24;
   else
     {
       if (!TARGET_LONG_CALLS && distance < 240000)
-	return length + 8;
+	return 8;
 
       if (TARGET_LONG_ABS_CALL && !flag_pic)
-	return length + 12;
+	return 12;
 
-      return length + 24;
+      return 24;
     }
 }
 
@@ -6321,16 +6479,22 @@ output_millicode_call (insn, call_dest)
 
   /* See if the return address can be adjusted.  Use the containing
      sequence insn's address.  */
-  seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
-  distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
-	      - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
-
-  if (VAL_14_BITS_P (distance))
+  if (INSN_ADDRESSES_SET_P ())
     {
-      xoperands[1] = gen_label_rtx ();
-      output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
-      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
-				 CODE_LABEL_NUMBER (xoperands[3]));
+      seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0)));
+      distance = (INSN_ADDRESSES (INSN_UID (JUMP_LABEL (NEXT_INSN (insn))))
+		  - INSN_ADDRESSES (INSN_UID (seq_insn)) - 8);
+
+      if (VAL_14_BITS_P (distance))
+	{
+	  xoperands[1] = gen_label_rtx ();
+	  output_asm_insn ("ldo %0-%1(%2),%2", xoperands);
+	  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+				     CODE_LABEL_NUMBER (xoperands[1]));
+	}
+      else
+	/* ??? This branch may not reach its target.  */
+	output_asm_insn ("nop\n\tb,n %0", xoperands);
     }
   else
     /* ??? This branch may not reach its target.  */
@@ -6344,18 +6508,25 @@ output_millicode_call (insn, call_dest)
   return "";
 }
 
-/* We include the delay slot in the returned length as it is better to
-   over estimate the length than to under estimate it.  */
+/* Return the attribute length of the call instruction INSN.  The SIBCALL
+   flag indicates whether INSN is a regular call or a sibling call.  The
+   length must match the code generated by output_call.  We include the delay
+   slot in the returned length as it is better to over estimate the length
+   than to under estimate it.  */
 
 int
 attr_length_call (insn, sibcall)
      rtx insn;
      int sibcall;
 {
-  unsigned long distance = total_code_bytes + INSN_ADDRESSES (INSN_UID (insn));
+  unsigned long distance = -1;
 
-  if (distance < total_code_bytes)
-    distance = -1;
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      distance = (total_code_bytes + insn_current_reference_address (insn));
+      if (distance < total_code_bytes)
+	distance = -1;
+    }
 
   if (TARGET_64BIT)
     {
@@ -6396,10 +6567,13 @@ attr_length_call (insn, sibcall)
 	  if (TARGET_PA_20)
 	    return (length + 32);
 
+	  if (!TARGET_NO_SPACE_REGS)
+	    length += 8;
+
 	  if (!sibcall)
 	    length += 8;
 
-	  return (length + 40);
+	  return (length + 32);
 	}
     }
 }
@@ -6417,7 +6591,6 @@ output_call (insn, call_dest, sibcall)
 {
   int delay_insn_deleted = 0;
   int delay_slot_filled = 0;
-  int attr_length = get_attr_length (insn);
   int seq_length = dbr_sequence_length ();
   rtx xoperands[2];
 
@@ -6425,9 +6598,7 @@ output_call (insn, call_dest, sibcall)
 
   /* Handle the common case where we're sure that the branch will reach
      the beginning of the $CODE$ subspace.  */
-  if (!TARGET_LONG_CALLS
-      && ((seq_length == 0 && attr_length == 12)
-	  || (seq_length != 0 && attr_length == 8)))
+  if (!TARGET_LONG_CALLS && attr_length_call (insn, sibcall) == 8)
     {
       xoperands[1] = gen_rtx_REG (word_mode, sibcall ? 0 : 2);
       output_asm_insn ("{bl|b,l} %0,%1", xoperands);
@@ -6597,7 +6768,10 @@ output_call (insn, call_dest, sibcall)
 		  if (!sibcall && !TARGET_PA_20)
 		    {
 		      output_asm_insn ("{bl|b,l} .+8,%%r2", xoperands);
-		      output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
+		      if (TARGET_NO_SPACE_REGS)
+			output_asm_insn ("addi 8,%%r2,%%r2", xoperands);
+		      else
+			output_asm_insn ("addi 16,%%r2,%%r2", xoperands);
 		    }
 		}
 
@@ -6619,14 +6793,23 @@ output_call (insn, call_dest, sibcall)
 		}
 	      else
 		{
-	          output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
-				   xoperands);
+		  if (!TARGET_NO_SPACE_REGS)
+		    output_asm_insn ("ldsid (%%r1),%%r31\n\tmtsp %%r31,%%sr0",
+				     xoperands);
 
 		  if (sibcall)
-		    output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
+		    {
+		      if (TARGET_NO_SPACE_REGS)
+			output_asm_insn ("be 0(%%sr4,%%r1)", xoperands);
+		      else
+			output_asm_insn ("be 0(%%sr0,%%r1)", xoperands);
+		    }
 		  else
 		    {
-		      output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
+		      if (TARGET_NO_SPACE_REGS)
+			output_asm_insn ("ble 0(%%sr4,%%r1)", xoperands);
+		      else
+			output_asm_insn ("ble 0(%%sr0,%%r1)", xoperands);
 
 		      if (indirect_call)
 			output_asm_insn ("stw %%r31,-24(%%sp)", xoperands);
@@ -6655,7 +6838,7 @@ output_call (insn, call_dest, sibcall)
   /* This call has an unconditional jump in its delay slot.  */
   xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1);
 
-  if (!delay_slot_filled)
+  if (!delay_slot_filled && INSN_ADDRESSES_SET_P ())
     {
       /* See if the return address can be adjusted.  Use the containing
          sequence insn's address.  */
@@ -6668,7 +6851,7 @@ output_call (insn, call_dest, sibcall)
 	  xoperands[1] = gen_label_rtx ();
 	  output_asm_insn ("ldo %0-%1(%%r2),%%r2", xoperands);
 	  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
-				     CODE_LABEL_NUMBER (xoperands[3]));
+				     CODE_LABEL_NUMBER (xoperands[1]));
 	}
       else
 	/* ??? This branch may not reach its target.  */
@@ -6686,6 +6869,114 @@ output_call (insn, call_dest, sibcall)
   return "";
 }
 
+/* Return the attribute length of the indirect call instruction INSN.
+   The length must match the code generated by output_indirect call.
+   The returned length includes the delay slot.  Currently, the delay
+   slot of an indirect call sequence is not exposed and it is used by
+   the sequence itself.  */
+
+int
+attr_length_indirect_call (insn)
+     rtx insn;
+{
+  unsigned long distance = -1;
+
+  if (INSN_ADDRESSES_SET_P ())
+    {
+      distance = (total_code_bytes + insn_current_reference_address (insn));
+      if (distance < total_code_bytes)
+	distance = -1;
+    }
+
+  if (TARGET_64BIT)
+    return 12;
+
+  if (TARGET_FAST_INDIRECT_CALLS
+      || (!TARGET_PORTABLE_RUNTIME
+	  && ((TARGET_PA_20 && distance < 7600000) || distance < 240000)))
+    return 8;
+
+  if (flag_pic)
+    return 24;
+
+  if (TARGET_PORTABLE_RUNTIME)
+    return 20;
+
+  /* Out of reach, can use ble.  */
+  return 12;
+}
+
+const char *
+output_indirect_call (insn, call_dest)
+     rtx insn;
+     rtx call_dest;
+{
+  rtx xoperands[1];
+
+  if (TARGET_64BIT)
+    {
+      xoperands[0] = call_dest;
+      output_asm_insn ("ldd 16(%0),%%r2", xoperands);
+      output_asm_insn ("bve,l (%%r2),%%r2\n\tldd 24(%0),%%r27", xoperands);
+      return "";
+    }
+
+  /* First the special case for kernels, level 0 systems, etc.  */
+  if (TARGET_FAST_INDIRECT_CALLS)
+    return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; 
+
+  /* Now the normal case -- we can reach $$dyncall directly or
+     we're sure that we can get there via a long-branch stub. 
+
+     No need to check target flags as the length uniquely identifies
+     the remaining cases.  */
+  if (attr_length_indirect_call (insn) == 8)
+    return ".CALL\tARGW0=GR\n\t{bl|b,l} $$dyncall,%%r31\n\tcopy %%r31,%%r2";
+
+  /* Long millicode call, but we are not generating PIC or portable runtime
+     code.  */
+  if (attr_length_indirect_call (insn) == 12)
+    return ".CALL\tARGW0=GR\n\tldil L'$$dyncall,%%r2\n\tble R'$$dyncall(%%sr4,%%r2)\n\tcopy %%r31,%%r2";
+
+  /* Long millicode call for portable runtime.  */
+  if (attr_length_indirect_call (insn) == 20)
+    return "ldil L'$$dyncall,%%r31\n\tldo R'$$dyncall(%%r31),%%r31\n\tblr %%r0,%%r2\n\tbv,n %%r0(%%r31)\n\tnop";
+
+  /* We need a long PIC call to $$dyncall.  */
+  xoperands[0] = NULL_RTX;
+  output_asm_insn ("{bl|b,l} .+8,%%r1", xoperands);
+  if (TARGET_SOM || !TARGET_GAS)
+    {
+      xoperands[0] = gen_label_rtx ();
+      output_asm_insn ("addil L'$$dyncall-%0,%%r1", xoperands);
+      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+				 CODE_LABEL_NUMBER (xoperands[0]));
+      output_asm_insn ("ldo R'$$dyncall-%0(%%r1),%%r1", xoperands);
+    }
+  else
+    {
+      output_asm_insn ("addil L'$$dyncall-$PIC_pcrel$0+4,%%r1", xoperands);
+      output_asm_insn ("ldo R'$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1",
+		       xoperands);
+    }
+  output_asm_insn ("blr %%r0,%%r2", xoperands);
+  output_asm_insn ("bv,n %%r0(%%r1)\n\tnop", xoperands);
+  return "";
+}
+
+/* Return the total length of the save and restore instructions needed for
+   the data linkage table pointer (i.e., the PIC register) across the call         instruction INSN.  No-return calls do not require a save and restore.           In addition, we may be able to avoid the save and restore for calls             within the same translation unit.  */
+
+int
+attr_length_save_restore_dltp (insn)
+     rtx insn;
+{
+  if (find_reg_note (insn, REG_NORETURN, NULL_RTX))
+    return 0;
+
+  return 8;
+}
+
 /* In HPUX 8.0's shared library scheme, special relocations are needed
    for function labels if they might be passed to a function
    in a shared library (because shared libraries don't live in code
@@ -6782,7 +7073,7 @@ pa_asm_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
   pa_output_function_prologue (file, 0);
   if (VAL_14_BITS_P (delta))
     {
-      if (! TARGET_64BIT && ! TARGET_PORTABLE_RUNTIME && flag_pic)
+      if (!TARGET_64BIT && !TARGET_PORTABLE_RUNTIME && flag_pic)
 	{
 	  fprintf (file, "\taddil LT'%s,%%r19\n", lab);
 	  fprintf (file, "\tldw RT'%s(%%r1),%%r22\n", lab);
@@ -6791,8 +7082,14 @@ pa_asm_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
 	  fprintf (file, "\tdepi 0,31,2,%%r22\n");
 	  fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
 	  fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
-	  fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n\tmtsp %%r1,%%sr0\n");
-	  fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
+	  if (TARGET_NO_SPACE_REGS)
+	    fprintf (file, "\tbe 0(%%sr4,%%r22)\n\tldo ");
+	  else
+	    {
+	      fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n");
+	      fprintf (file, "\tmtsp %%r1,%%sr0\n");
+	      fprintf (file, "\tbe 0(%%sr0,%%r22)\n\tldo ");
+	    }
 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
 	  fprintf (file, "(%%r26),%%r26\n");
 	}
@@ -6805,7 +7102,7 @@ pa_asm_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
     }
   else
     {
-      if (! TARGET_64BIT && ! TARGET_PORTABLE_RUNTIME && flag_pic)
+      if (!TARGET_64BIT && !TARGET_PORTABLE_RUNTIME && flag_pic)
 	{
 	  fprintf (file, "\taddil L'");
 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, delta);
@@ -6819,8 +7116,14 @@ pa_asm_output_mi_thunk (file, thunk_fndecl, delta, vcall_offset, function)
 	  fprintf (file, "\tdepi 0,31,2,%%r22\n");
 	  fprintf (file, "\tldw 4(%%sr0,%%r22),%%r19\n");
 	  fprintf (file, "\tldw 0(%%sr0,%%r22),%%r22\n");
-	  fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n\tmtsp %%r1,%%sr0\n");
-	  fprintf (file, "\tbe,n 0(%%sr0,%%r22)\n");
+	  if (TARGET_NO_SPACE_REGS)
+	    fprintf (file, "\tbe 0(%%sr4,%%r22)");
+	  else
+	    {
+	      fprintf (file, "\tldsid (%%sr0,%%r22),%%r1\n");
+	      fprintf (file, "\tmtsp %%r1,%%sr0\n");
+	      fprintf (file, "\tbe,n 0(%%sr0,%%r22)\n");
+	    }
 	}
       else
 	{
@@ -6903,6 +7206,46 @@ fmpyaddoperands (operands)
   return 1;
 }
 
+#if !defined(USE_COLLECT2)
+static void
+pa_asm_out_constructor (symbol, priority)
+     rtx symbol;
+     int priority;
+{
+  if (!function_label_operand (symbol, VOIDmode))
+    hppa_encode_label (symbol);
+
+#ifdef CTORS_SECTION_ASM_OP
+  default_ctor_section_asm_out_constructor (symbol, priority);
+#else
+# ifdef TARGET_ASM_NAMED_SECTION
+  default_named_section_asm_out_constructor (symbol, priority);
+# else
+  default_stabs_asm_out_constructor (symbol, priority);
+# endif
+#endif
+}
+
+static void
+pa_asm_out_destructor (symbol, priority)
+     rtx symbol;
+     int priority;
+{
+  if (!function_label_operand (symbol, VOIDmode))
+    hppa_encode_label (symbol);
+
+#ifdef DTORS_SECTION_ASM_OP
+  default_dtor_section_asm_out_destructor (symbol, priority);
+#else
+# ifdef TARGET_ASM_NAMED_SECTION
+  default_named_section_asm_out_destructor (symbol, priority);
+# else
+  default_stabs_asm_out_destructor (symbol, priority);
+# endif
+#endif
+}
+#endif
+
 /* Returns 1 if the 6 operands specified in OPERANDS are suitable for
    use in fmpysub instructions.  */
 int
@@ -7080,9 +7423,9 @@ jump_in_call_delay (insn)
 
   if (PREV_INSN (insn)
       && PREV_INSN (PREV_INSN (insn))
-      && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
+      && GET_CODE (next_real_insn (PREV_INSN (PREV_INSN (insn)))) == INSN)
     {
-      rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn)));
+      rtx test_insn = next_real_insn (PREV_INSN (PREV_INSN (insn)));
 
       return (GET_CODE (PATTERN (test_insn)) == SEQUENCE
 	      && XVECEXP (PATTERN (test_insn), 0, 1) == insn);
@@ -7737,12 +8080,11 @@ function_value (valtype, func)
    ??? We might want to restructure this so that it looks more like other
    ports.  */
 rtx
-function_arg (cum, mode, type, named, incoming)
+function_arg (cum, mode, type, named)
      CUMULATIVE_ARGS *cum;
      enum machine_mode mode;
      tree type;
      int named ATTRIBUTE_UNUSED;
-     int incoming;
 {
   int max_arg_words = (TARGET_64BIT ? 8 : 4);
   int alignment = 0;
@@ -7884,7 +8226,7 @@ function_arg (cum, mode, type, named, incoming)
   if (((TARGET_PORTABLE_RUNTIME || TARGET_64BIT || TARGET_ELF32)
        /* If we are doing soft-float with portable runtime, then there
 	  is no need to worry about FP regs.  */
-       && ! TARGET_SOFT_FLOAT
+       && !TARGET_SOFT_FLOAT
        /* The parameter must be some kind of float, else we can just
 	  pass it in integer registers.  */
        && FLOAT_MODE_P (mode)
@@ -7893,14 +8235,15 @@ function_arg (cum, mode, type, named, incoming)
        /* libcalls do not need to pass items in both FP and general
 	  registers.  */
        && type != NULL_TREE
-       /* All this hair applies to outgoing args only.  */
-       && ! incoming)
+       /* All this hair applies to "outgoing" args only.  This includes
+	  sibcall arguments setup with FUNCTION_INCOMING_ARG.  */
+       && !cum->incoming)
       /* Also pass outgoing floating arguments in both registers in indirect
 	 calls with the 32 bit ABI and the HP assembler since there is no
 	 way to the specify argument locations in static functions.  */
-      || (! TARGET_64BIT
-	  && ! TARGET_GAS
-	  && ! incoming
+      || (!TARGET_64BIT
+	  && !TARGET_GAS
+	  && !cum->incoming
 	  && cum->indirect
 	  && FLOAT_MODE_P (mode)))
     {
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index 860b7590d98..02791ce466d 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -206,6 +206,20 @@ extern int target_flags;
    not for external calls.  */
 #define TARGET_LONG_PIC_PCREL_CALL 0
 
+/* Define to a C expression evaluating to true to use SOM secondary
+   definition symbols for weak support.  Linker support for secondary
+   definition symbols is buggy prior to HP-UX 11.X.  */
+#define TARGET_SOM_SDEF 0
+
+/* Define to a C expression evaluating to true to save the entry value
+   of SP in the current frame marker.  This is normally unnecessary.
+   However, the HP-UX unwind library looks at the SAVE_SP callinfo flag.
+   HP compilers don't use this flag but it is supported by the assembler.
+   We set this flag to indicate that register %r3 has been saved at the
+   start of the frame.  Thus, when the HP unwind library is used, we
+   need to generate additional code to save SP into the frame marker.  */
+#define TARGET_HPUX_UNWIND_LIBRARY 0
+
 /* Macro to define tables used to set the flags.  This is a
    list in braces of target switches with each switch being
    { "NAME", VALUE, "HELP_STRING" }.  VALUE is the bits to set,
@@ -456,11 +470,12 @@ do {								\
 /* Boundary (in *bits*) on which stack pointer is always aligned;
    certain optimizations in combine depend on this.
 
-   GCC for the PA always rounds its stacks to a 8 * STACK_BOUNDARY
-   boundary, but that happens late in the compilation process.  */
+   The HP-UX runtime documents mandate 64-byte and 16-byte alignment for
+   the stack on the 32 and 64-bit ports, respectively.  However, we
+   are only guaranteed that the stack is aligned to BIGGEST_ALIGNMENT
+   in main.  Thus, we treat the former as the preferred alignment.  */
 #define STACK_BOUNDARY BIGGEST_ALIGNMENT
-
-#define PREFERRED_STACK_BOUNDARY (8 * STACK_BOUNDARY)
+#define PREFERRED_STACK_BOUNDARY (TARGET_64BIT ? 128 : 512)
 
 /* Allocation boundary (in *bits*) for the code of a function.  */
 #define FUNCTION_BOUNDARY BITS_PER_WORD
@@ -706,9 +721,13 @@ extern struct rtx_def *hppa_pic_save_rtx PARAMS ((void));
 /* The weird HPPA calling conventions require a minimum of 48 bytes on
    the stack: 16 bytes for register saves, and 32 bytes for magic.
    This is the difference between the logical top of stack and the
-   actual sp.  */
+   actual sp.
+
+   On the 64-bit port, the HP C compiler allocates a 48-byte frame
+   marker, although the runtime documentation only describes a 16
+   byte marker.  For compatibility, we allocate 48 bytes.  */
 #define STACK_POINTER_OFFSET \
-  (TARGET_64BIT ? -(current_function_outgoing_args_size + 16): -32)
+  (TARGET_64BIT ? -(current_function_outgoing_args_size + 48): -32)
 
 #define STACK_DYNAMIC_OFFSET(FNDECL)	\
   (TARGET_64BIT				\
@@ -751,12 +770,21 @@ extern struct rtx_def *hppa_pic_save_rtx PARAMS ((void));
    and about the args processed so far, enough to enable macros
    such as FUNCTION_ARG to determine where the next arg should go.
 
-   On the HP-PA, this is a single integer, which is a number of words
+   On the HP-PA, the WORDS field holds the number of words
    of arguments scanned so far (including the invisible argument,
-   if any, which holds the structure-value-address).
-   Thus 4 or more means all following args should go on the stack.  */
+   if any, which holds the structure-value-address).  Thus, 4 or
+   more means all following args should go on the stack.
+   
+   The INCOMING field tracks whether this is an "incoming" or
+   "outgoing" argument.
+   
+   The INDIRECT field indicates whether this is is an indirect
+   call or not.
+   
+   The NARGS_PROTOTYPE field indicates that an argument does not
+   have a prototype when it less than or equal to 0.  */
 
-struct hppa_args {int words, nargs_prototype, indirect; };
+struct hppa_args {int words, nargs_prototype, incoming, indirect; };
 
 #define CUMULATIVE_ARGS struct hppa_args
 
@@ -766,6 +794,7 @@ struct hppa_args {int words, nargs_prototype, indirect; };
 
 #define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT) \
   (CUM).words = 0, 							\
+  (CUM).incoming = 0,							\
   (CUM).indirect = INDIRECT,						\
   (CUM).nargs_prototype = (FNTYPE && TYPE_ARG_TYPES (FNTYPE)		\
 			   ? (list_length (TYPE_ARG_TYPES (FNTYPE)) - 1	\
@@ -780,6 +809,7 @@ struct hppa_args {int words, nargs_prototype, indirect; };
 
 #define INIT_CUMULATIVE_INCOMING_ARGS(CUM,FNTYPE,IGNORE) \
   (CUM).words = 0,				\
+  (CUM).incoming = 1,				\
   (CUM).indirect = 0,				\
   (CUM).nargs_prototype = 1000
 
@@ -855,7 +885,7 @@ struct hppa_args {int words, nargs_prototype, indirect; };
    tempted to try and simply it, but I worry about breaking something.  */
 
 #define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
-  function_arg (&CUM, MODE, TYPE, NAMED, 0)
+  function_arg (&CUM, MODE, TYPE, NAMED)
 
 /* Nonzero if we do not know how to pass TYPE solely in registers.  */
 #define MUST_PASS_IN_STACK(MODE,TYPE) \
@@ -863,9 +893,6 @@ struct hppa_args {int words, nargs_prototype, indirect; };
    && (TREE_CODE (TYPE_SIZE (TYPE)) != INTEGER_CST		\
        || TREE_ADDRESSABLE (TYPE)))
 
-#define FUNCTION_INCOMING_ARG(CUM, MODE, TYPE, NAMED) \
-  function_arg (&CUM, MODE, TYPE, NAMED, 1)
-
 /* For an arg passed partly in registers and partly in memory,
    this is the number of registers used.
    For args passed entirely in registers or entirely in memory, zero.  */
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 640196b067d..d69e329dc6a 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -1,6 +1,6 @@
 ;;- Machine description for HP PA-RISC architecture for GNU C compiler
 ;;   Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-;;   2002 Free Software Foundation, Inc.
+;;   2002, 2003 Free Software Foundation, Inc.
 ;;   Contributed by the Center for Software Science at the University
 ;;   of Utah.
 
@@ -625,23 +625,37 @@
   [(set_attr "length" "4")
    (set_attr "type" "fpcc")])
 
-;; The following two patterns are optimization placeholders.  In almost
+;; Provide a means to emit the movccfp0 and movccfp1 optimization
+;; placeholders.  This is necessary in rare situations when a
+;; placeholder is re-emitted (see PR 8705).
+
+(define_expand "movccfp"
+  [(set (reg:CCFP 0)
+	(match_operand 0 "const_int_operand" ""))]
+  "! TARGET_SOFT_FLOAT"
+  "
+{
+  if ((unsigned HOST_WIDE_INT) INTVAL (operands[0]) > 1)
+    FAIL;
+}")
+
+;; The following patterns are optimization placeholders.  In almost
 ;; all cases, the user of the condition code will be simplified and the
 ;; original condition code setting insn should be eliminated.
 
-(define_insn "*setccfp0"
+(define_insn "*movccfp0"
   [(set (reg:CCFP 0)
 	(const_int 0))]
   "! TARGET_SOFT_FLOAT"
-  "fcmp,dbl,!= %%fr0,%%fr0"
+  "fcmp,dbl,= %%fr0,%%fr0"
   [(set_attr "length" "4")
    (set_attr "type" "fpcc")])
 
-(define_insn "*setccfp1"
+(define_insn "*movccfp1"
   [(set (reg:CCFP 0)
 	(const_int 1))]
   "! TARGET_SOFT_FLOAT"
-  "fcmp,dbl,= %%fr0,%%fr0"
+  "fcmp,dbl,!= %%fr0,%%fr0"
   [(set_attr "length" "4")
    (set_attr "type" "fpcc")])
 
@@ -4086,7 +4100,7 @@
   "!TARGET_64BIT"
   "* return output_mul_insn (0, insn);"
   [(set_attr "type" "milli")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 0)"))])
+   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn)"))])
 
 (define_insn ""
   [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
@@ -4097,7 +4111,7 @@
   "TARGET_64BIT"
   "* return output_mul_insn (0, insn);"
   [(set_attr "type" "milli")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 0)"))])
+   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn)"))])
 
 (define_expand "muldi3"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -4135,7 +4149,7 @@
   emit_insn (gen_umulsidi3 (cross_product2, op2l, op1r));
 
   /* Emit a multiply for the low sub-word.  */
-  emit_insn (gen_umulsidi3 (low_product, op2r, op1r));
+  emit_insn (gen_umulsidi3 (low_product, copy_rtx (op2r), copy_rtx (op1r)));
 
   /* Sum the cross products and shift them into proper position.  */
   emit_insn (gen_adddi3 (cross_scratch, cross_product1, cross_product2));
@@ -4188,7 +4202,7 @@
   "*
    return output_div_insn (operands, 0, insn);"
   [(set_attr "type" "milli")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 0)"))])
+   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn)"))])
 
 (define_insn ""
   [(set (reg:SI 29)
@@ -4202,7 +4216,7 @@
   "*
    return output_div_insn (operands, 0, insn);"
   [(set_attr "type" "milli")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 0)"))])
+   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn)"))])
 
 (define_expand "udivsi3"
   [(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
@@ -4245,7 +4259,7 @@
   "*
    return output_div_insn (operands, 1, insn);"
   [(set_attr "type" "milli")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 0)"))])
+   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn)"))])
 
 (define_insn ""
   [(set (reg:SI 29)
@@ -4259,7 +4273,7 @@
   "*
    return output_div_insn (operands, 1, insn);"
   [(set_attr "type" "milli")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 0)"))])
+   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn)"))])
 
 (define_expand "modsi3"
   [(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
@@ -4298,7 +4312,7 @@
   "*
   return output_mod_insn (0, insn);"
   [(set_attr "type" "milli")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 0)"))])
+   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn)"))])
 
 (define_insn ""
   [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
@@ -4311,7 +4325,7 @@
   "*
   return output_mod_insn (0, insn);"
   [(set_attr "type" "milli")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 0)"))])
+   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn)"))])
 
 (define_expand "umodsi3"
   [(set (reg:SI 26) (match_operand:SI 1 "move_operand" ""))
@@ -4350,7 +4364,7 @@
   "*
   return output_mod_insn (1, insn);"
   [(set_attr "type" "milli")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 0)"))])
+   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn)"))])
 
 (define_insn ""
   [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
@@ -4363,7 +4377,7 @@
   "*
   return output_mod_insn (1, insn);"
   [(set_attr "type" "milli")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 0)"))])
+   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn)"))])
 
 ;;- and instructions
 ;; We define DImode `and` so with DImode `not` we can get
@@ -5614,23 +5628,7 @@
   [(return)
    (use (reg:SI 2))
    (const_int 1)]
-  "! flag_pic"
-  "*
-{
-  if (TARGET_PA_20)
-    return \"bve%* (%%r2)\";
-  return \"bv%* %%r0(%%r2)\";
-}"
-  [(set_attr "type" "branch")
-   (set_attr "length" "4")])
-
-;; Use the PIC register to ensure it's restored after a
-;; call in PIC mode.
-(define_insn "return_internal_pic"
-  [(return)
-   (use (match_operand 0 "register_operand" "r"))
-   (use (reg:SI 2))]
-  "flag_pic && true_regnum (operands[0]) == PIC_OFFSET_TABLE_REGNUM"
+  ""
   "*
 {
   if (TARGET_PA_20)
@@ -5640,17 +5638,14 @@
   [(set_attr "type" "branch")
    (set_attr "length" "4")])
 
-;; Use the PIC register to ensure it's restored after a
-;; call in PIC mode.  This is used for eh returns which
-;; bypass the return stub.
+;; This is used for eh returns which bypass the return stub.
 (define_insn "return_external_pic"
   [(return)
-   (use (match_operand 0 "register_operand" "r"))
-   (use (reg:SI 2))
-   (clobber (reg:SI 1))]
-  "flag_pic
-   && current_function_calls_eh_return
-   && true_regnum (operands[0]) == PIC_OFFSET_TABLE_REGNUM"
+   (clobber (reg:SI 1))
+   (use (reg:SI 2))]
+  "!TARGET_NO_SPACE_REGS
+   && !TARGET_PA_20
+   && flag_pic && current_function_calls_eh_return"
   "ldsid (%%sr0,%%r2),%%r1\;mtsp %%r1,%%sr0\;be%* 0(%%sr0,%%r2)"
   [(set_attr "type" "branch")
    (set_attr "length" "12")])
@@ -5683,20 +5678,18 @@
       rtx x;
 
       hppa_expand_epilogue ();
-      if (flag_pic)
-	{
-	  rtx pic = gen_rtx_REG (word_mode, PIC_OFFSET_TABLE_REGNUM);
 
-	  /* EH returns bypass the normal return stub.  Thus, we must do an
-	     interspace branch to return from functions that call eh_return.
-	     This is only a problem for returns from shared code.  */
-	  if (current_function_calls_eh_return)
-	    x = gen_return_external_pic (pic);
-	  else
-	    x = gen_return_internal_pic (pic);
-	}
+      /* EH returns bypass the normal return stub.  Thus, we must do an
+	 interspace branch to return from functions that call eh_return.
+	 This is only a problem for returns from shared code on ports
+	 using space registers.  */
+      if (!TARGET_NO_SPACE_REGS
+	  && !TARGET_PA_20
+	  && flag_pic && current_function_calls_eh_return)
+	x = gen_return_external_pic ();
       else
 	x = gen_return_internal ();
+
       emit_jump_insn (x);
     }
   DONE;
@@ -5742,8 +5735,6 @@
   ""
   "*
 {
-  extern int optimize;
-
   if (GET_MODE (insn) == SImode)
     return \"b %l0%#\";
 
@@ -5752,61 +5743,7 @@
       && get_attr_length (insn) != 16)
     return \"b%* %l0\";
 
-  /* An unconditional branch which can not reach its target.
-
-     We need to be able to use %r1 as a scratch register; however,
-     we can never be sure whether or not it's got a live value in
-     it.  Therefore, we must restore its original value after the
-     jump.
-
-     To make matters worse, we don't have a stack slot which we
-     can always clobber.  sp-12/sp-16 shouldn't ever have a live
-     value during a non-optimizing compilation, so we use those
-     slots for now.  We don't support very long branches when
-     optimizing -- they should be quite rare when optimizing.
-
-     Really the way to go long term is a register scavenger; goto
-     the target of the jump and find a register which we can use
-     as a scratch to hold the value in %r1.  */
-
-  /* We don't know how to register scavenge yet.  */
-  if (optimize)
-    abort ();
-
-  /* First store %r1 into the stack.  */
-  output_asm_insn (\"stw %%r1,-16(%%r30)\", operands);
-
-  /* Now load the target address into %r1 and do an indirect jump
-     to the value specified in %r1.  Be careful to generate PIC
-     code as needed.  */
-  if (flag_pic)
-    {
-      rtx xoperands[2];
-      xoperands[0] = operands[0];
-      if (TARGET_SOM || ! TARGET_GAS)
-	{
-	  xoperands[1] = gen_label_rtx ();
-
-	  output_asm_insn (\"{bl|b,l} .+8,%%r1\\n\\taddil L'%l0-%l1,%%r1\",
-			   xoperands);
-	  ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
-				     CODE_LABEL_NUMBER (xoperands[1]));
-	  output_asm_insn (\"ldo R'%l0-%l1(%%r1),%%r1\", xoperands);
-	}
-      else
-	{
-	  output_asm_insn (\"{bl|b,l} .+8,%%r1\", xoperands);
-	  output_asm_insn (\"addil L'%l0-$PIC_pcrel$0+4,%%r1\", xoperands);
-	  output_asm_insn (\"ldo R'%l0-$PIC_pcrel$0+8(%%r1),%%r1\", xoperands);
-	}
-      output_asm_insn (\"bv %%r0(%%r1)\", xoperands);
-    }
-  else
-    output_asm_insn (\"ldil L'%l0,%%r1\\n\\tbe R'%l0(%%sr4,%%r1)\", operands);;
-
-  /* And restore the value of %r1 in the delay slot.  We're not optimizing,
-     so we know nothing else can be in the delay slot.  */
-  return \"ldw -16(%%r30),%%r1\";
+  return output_lbranch (operands[0], insn);
 }"
   [(set_attr "type" "uncond_branch")
    (set_attr "pa_combine_type" "uncond_branch")
@@ -5901,8 +5838,8 @@
   ""
   "
 {
-  rtx op;
-  rtx call_insn;
+  rtx op, call_insn;
+  rtx nb = operands[1];
 
   if (TARGET_PORTABLE_RUNTIME)
     op = force_reg (SImode, XEXP (operands[0], 0));
@@ -5910,51 +5847,127 @@
     op = XEXP (operands[0], 0);
 
   if (TARGET_64BIT)
-    emit_move_insn (arg_pointer_rtx,
-		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
-				  GEN_INT (64)));
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  if (INTVAL (nb) > current_function_outgoing_args_size)
+	    abort ();
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
 
   /* Use two different patterns for calls to explicitly named functions
      and calls through function pointers.  This is necessary as these two
      types of calls use different calling conventions, and CSE might try
      to change the named call into an indirect call in some cases (using
-     two patterns keeps CSE from performing this optimization).  */
-  if (GET_CODE (op) == SYMBOL_REF)
-    call_insn = emit_call_insn (gen_call_internal_symref (op, operands[1]));
-  else if (TARGET_64BIT)
+     two patterns keeps CSE from performing this optimization).
+
+     We now use even more call patterns as there was a subtle bug in
+     attempting to restore the pic register after a call using a simple
+     move insn.  During reload, a instruction involving a pseudo register
+     with no explicit dependence on the PIC register can be converted
+     to an equivalent load from memory using the PIC register.  If we
+     emit a simple move to restore the PIC register in the initial rtl
+     generation, then it can potentially be repositioned during scheduling.
+     and an instruction that eventually uses the PIC register may end up
+     between the call and the PIC register restore.
+
+     This only worked because there is a post call group of instructions
+     that are scheduled with the call.  These instructions are included
+     in the same basic block as the call.  However, calls can throw in
+     C++ code and a basic block has to terminate at the call if the call
+     can throw.  This results in the PIC register restore being scheduled
+     independently from the call.  So, we now hide the save and restore
+     of the PIC register in the call pattern until after reload.  Then,
+     we split the moves out.  A small side benefit is that we now don't
+     need to have a use of the PIC register in the return pattern and
+     the final save/restore operation is not needed.
+
+     I elected to just clobber %r4 in the PIC patterns and use it instead
+     of trying to force hppa_pic_save_rtx () to a callee saved register.
+     This might have required a new register class and constraint.  It
+     was also simpler to just handle the restore from a register than a
+     generic pseudo.  */
+  if (TARGET_64BIT)
     {
-      rtx tmpreg = force_reg (word_mode, op);
-      call_insn = emit_call_insn (gen_call_internal_reg_64bit (tmpreg,
-							       operands[1]));
+      if (GET_CODE (op) == SYMBOL_REF)
+	call_insn = emit_call_insn (gen_call_symref_64bit (op, nb));
+      else
+	{
+	  op = force_reg (word_mode, op);
+	  call_insn = emit_call_insn (gen_call_reg_64bit (op, nb));
+	}
     }
   else
     {
-      rtx tmpreg = gen_rtx_REG (word_mode, 22);
-      emit_move_insn (tmpreg, force_reg (word_mode, op));
-      call_insn = emit_call_insn (gen_call_internal_reg (operands[1]));
-    }
-
-  if (TARGET_64BIT)
-    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
-
-  if (flag_pic)
-    {
-      use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
+      if (GET_CODE (op) == SYMBOL_REF)
+	{
+	  if (flag_pic)
+	    call_insn = emit_call_insn (gen_call_symref_pic (op, nb));
+	  else
+	    call_insn = emit_call_insn (gen_call_symref (op, nb));
+	}
+      else
+	{
+	  rtx tmpreg = gen_rtx_REG (word_mode, 22);
 
-      /* After each call we must restore the PIC register, even if it
-	 doesn't appear to be used.  */
-      emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
+	  emit_move_insn (tmpreg, force_reg (word_mode, op));
+	  if (flag_pic)
+	    call_insn = emit_call_insn (gen_call_reg_pic (nb));
+	  else
+	    call_insn = emit_call_insn (gen_call_reg (nb));
+	}
     }
+
   DONE;
 }")
 
-(define_insn "call_internal_symref"
+;; We use function calls to set the attribute length of calls and millicode
+;; calls.  This is necessary because of the large variety of call sequences.
+;; Implementing the calculation in rtl is difficult as well as ugly.  As
+;; we need the same calculation in several places, maintenance becomes a
+;; nightmare.
+;;
+;; However, this has a subtle impact on branch shortening.  When the
+;; expression used to set the length attribute of an instruction depends
+;; on a relative address (e.g., pc or a branch address), genattrtab
+;; notes that the insn's length is variable, and attempts to determine a
+;; worst-case default length and code to compute an insn's current length.
+
+;; The use of a function call hides the variable dependence of our calls
+;; and millicode calls.  The result is genattrtab doesn't treat the operation
+;; as variable and it only generates code for the default case using our
+;; function call.  Because of this, calls and millicode calls have a fixed
+;; length in the branch shortening pass, and some branches will use a longer
+;; code sequence than necessary.  However, the length of any given call
+;; will still reflect its final code location and it may be shorter than
+;; the initial length estimate.
+
+;; It's possible to trick genattrtab by adding an expression involving `pc'
+;; in the set.  However, when genattrtab hits a function call in its attempt
+;; to compute the default length, it marks the result as unknown and sets
+;; the default result to MAX_INT ;-(  One possible fix that would allow
+;; calls to participate in branch shortening would be to make the call to       ;; insn_default_length a target option.  Then, we could massage unknown         ;; results.  Another fix might be to change genattrtab so that it just does     ;; the call in the variable case as it already does for the fixed case.
+
+(define_insn "call_symref"
   [(call (mem:SI (match_operand 0 "call_operand_address" ""))
 	 (match_operand 1 "" "i"))
    (clobber (reg:SI 1))
    (clobber (reg:SI 2))
    (use (const_int 0))]
-  "! TARGET_PORTABLE_RUNTIME"
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
   "*
 {
   output_arg_descriptor (insn);
@@ -5963,102 +5976,356 @@
   [(set_attr "type" "call")
    (set (attr "length") (symbol_ref "attr_length_call (insn, 0)"))])
 
-(define_insn "call_internal_reg_64bit"
-  [(call (mem:SI (match_operand:DI 0 "register_operand" "r"))
+(define_insn "call_symref_pic"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
 	 (match_operand 1 "" "i"))
+   (clobber (reg:SI 1))
    (clobber (reg:SI 2))
-   (use (const_int 1))]
+   (clobber (reg:SI 4))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[0], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(plus (symbol_ref "attr_length_call (insn, 0)")
+	      (symbol_ref "attr_length_save_restore_dltp (insn)")))])
+
+
+;; Split out the PIC register save and restore after reload.  This is
+;; done only if the function returns.  As the split is done after reload,
+;; there are some situations in which we unnecessarily save and restore
+;; %r4.  This happens when there is a single call and the PIC register
+;; is "dead" after the call.  This isn't easy to fix as the usage of
+;; the PIC register isn't completely determined until the reload pass.
+(define_split
+  [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:SI 4))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT
+   && reload_completed
+   && !find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (reg:SI 4) (reg:SI 19))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])
+   (set (reg:SI 19) (reg:SI 4))]
+  "")
+
+;; Remove the clobber of register 4 when optimizing.  This has to be
+;; done with a peephole optimization rather than a split because the
+;; split sequence for a call must be longer than one instruction.
+(define_peephole2
+  [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:SI 4))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed"
+  [(parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "")
+
+(define_insn "*call_symref_pic_post_reload"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[0], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length") (symbol_ref "attr_length_call (insn, 0)"))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_symref_64bit"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (clobber (reg:DI 4))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
   "TARGET_64BIT"
   "*
 {
-  /* ??? Needs more work.  Length computation, split into multiple insns,
-     expose delay slot.  */
-  return \"ldd 16(%0),%%r2\;bve,l (%%r2),%%r2\;ldd 24(%0),%%r27\";
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[0], 0);
 }"
-  [(set_attr "type" "dyncall")
-   (set (attr "length") (const_int 12))])
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(plus (symbol_ref "attr_length_call (insn, 0)")
+	      (symbol_ref "attr_length_save_restore_dltp (insn)")))])
+
+;; Split out the PIC register save and restore after reload.  This is
+;; done only if the function returns.  As the split is done after reload,
+;; there are some situations in which we unnecessarily save and restore
+;; %r4.  This happens when there is a single call and the PIC register
+;; is "dead" after the call.  This isn't easy to fix as the usage of
+;; the PIC register isn't completely determined until the reload pass.
+(define_split
+  [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DI 4))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT
+   && reload_completed
+   && !find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (reg:DI 4) (reg:DI 27))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])
+   (set (reg:DI 27) (reg:DI 4))]
+  "")
 
-(define_insn "call_internal_reg"
+;; Remove the clobber of register 4 when optimizing.  This has to be
+;; done with a peephole optimization rather than a split because the
+;; split sequence for a call must be longer than one instruction.
+(define_peephole2
+  [(parallel [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DI 4))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "")
+
+(define_insn "*call_symref_64bit_post_reload"
+  [(call (mem:SI (match_operand 0 "call_operand_address" ""))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[0], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length") (symbol_ref "attr_length_call (insn, 0)"))])
+
+(define_insn "call_reg"
   [(call (mem:SI (reg:SI 22))
 	 (match_operand 0 "" "i"))
    (clobber (reg:SI 1))
    (clobber (reg:SI 2))
    (use (const_int 1))]
-  ""
+  "!TARGET_64BIT"
   "*
 {
-  rtx xoperands[2];
+  return output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length") (symbol_ref "attr_length_indirect_call (insn)"))])
 
-  /* First the special case for kernels, level 0 systems, etc.  */
-  if (TARGET_FAST_INDIRECT_CALLS)
-    return \"ble 0(%%sr4,%%r22)\;copy %%r31,%%r2\";
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_reg_pic"
+  [(call (mem:SI (reg:SI 22))
+	 (match_operand 0 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 4))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(plus (symbol_ref "attr_length_indirect_call (insn)")
+	      (symbol_ref "attr_length_save_restore_dltp (insn)")))])
+
+;; Split out the PIC register save and restore after reload.  This is
+;; done only if the function returns.  As the split is done after reload,
+;; there are some situations in which we unnecessarily save and restore
+;; %r4.  This happens when there is a single call and the PIC register
+;; is "dead" after the call.  This isn't easy to fix as the usage of
+;; the PIC register isn't completely determined until the reload pass.
+(define_split
+  [(parallel [(call (mem:SI (reg:SI 22))
+		    (match_operand 0 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:SI 4))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT
+   && reload_completed
+   && !find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (reg:SI 4) (reg:SI 19))
+   (parallel [(call (mem:SI (reg:SI 22))
+		    (match_dup 0))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])
+   (set (reg:SI 19) (reg:SI 4))]
+  "")
 
-  /* Now the normal case -- we can reach $$dyncall directly or
-     we're sure that we can get there via a long-branch stub. 
+;; Remove the clobber of register 4 when optimizing.  This has to be
+;; done with a peephole optimization rather than a split because the
+;; split sequence for a call must be longer than one instruction.
+(define_peephole2
+  [(parallel [(call (mem:SI (reg:SI 22))
+		    (match_operand 0 "" ""))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:SI 4))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT && reload_completed"
+  [(parallel [(call (mem:SI (reg:SI 22))
+		    (match_dup 0))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "")
 
-     No need to check target flags as the length uniquely identifies
-     the remaining cases.  */
-  if (get_attr_length (insn) == 8)
-    return \".CALL\\tARGW0=GR\;{bl|b,l} $$dyncall,%%r31\;copy %%r31,%%r2\";
+(define_insn "*call_reg_pic_post_reload"
+  [(call (mem:SI (reg:SI 22))
+	 (match_operand 0 "" "i"))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length") (symbol_ref "attr_length_indirect_call (insn)"))])
 
-  /* Long millicode call, but we are not generating PIC or portable runtime
-     code.  */
-  if (get_attr_length (insn) == 12)
-    return \".CALL\\tARGW0=GR\;ldil L%%$$dyncall,%%r2\;ble R%%$$dyncall(%%sr4,%%r2)\;copy %%r31,%%r2\";
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_reg_64bit"
+  [(call (mem:SI (match_operand:DI 0 "register_operand" "r"))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 2))
+   (clobber (reg:DI 4))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, operands[0]);
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(plus (symbol_ref "attr_length_indirect_call (insn)")
+	      (symbol_ref "attr_length_save_restore_dltp (insn)")))])
+
+;; Split out the PIC register save and restore after reload.  This is
+;; done only if the function returns.  As the split is done after reload,
+;; there are some situations in which we unnecessarily save and restore
+;; %r4.  This happens when there is a single call and the PIC register
+;; is "dead" after the call.  This isn't easy to fix as the usage of
+;; the PIC register isn't completely determined until the reload pass.
+(define_split
+  [(parallel [(call (mem:SI (match_operand 0 "register_operand" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DI 4))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT
+   && reload_completed
+   && !find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (reg:DI 4) (reg:DI 27))
+   (parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])
+   (set (reg:DI 27) (reg:DI 4))]
+  "")
 
-  /* Long millicode call for portable runtime.  */
-  if (get_attr_length (insn) == 20)
-    return \"ldil L%%$$dyncall,%%r31\;ldo R%%$$dyncall(%%r31),%%r31\;blr %%r0,%%r2\;bv,n %%r0(%%r31)\;nop\";
+;; Remove the clobber of register 4 when optimizing.  This has to be
+;; done with a peephole optimization rather than a split because the
+;; split sequence for a call must be longer than one instruction.
+(define_peephole2
+  [(parallel [(call (mem:SI (match_operand 0 "register_operand" ""))
+		    (match_operand 1 "" ""))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DI 4))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(call (mem:SI (match_dup 0))
+		    (match_dup 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "")
 
-  /* If we're generating PIC code.  */
-  xoperands[0] = operands[0];
-  if (TARGET_SOM || ! TARGET_GAS)
-    xoperands[1] = gen_label_rtx ();
-  output_asm_insn (\"{bl|b,l} .+8,%%r1\", xoperands);
-  if (TARGET_SOM || ! TARGET_GAS)
-    {
-      output_asm_insn (\"addil L%%$$dyncall-%1,%%r1\", xoperands);
-      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
-				 CODE_LABEL_NUMBER (xoperands[1]));
-      output_asm_insn (\"ldo R%%$$dyncall-%1(%%r1),%%r1\", xoperands);
-    }
-  else
-    {
-      output_asm_insn (\"addil L%%$$dyncall-$PIC_pcrel$0+4,%%r1\", xoperands);
-      output_asm_insn (\"ldo R%%$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1\",
-      		       xoperands);
-    }
-  output_asm_insn (\"blr %%r0,%%r2\", xoperands);
-  output_asm_insn (\"bv,n %%r0(%%r1)\\n\\tnop\", xoperands);
-  return \"\";
+(define_insn "*call_reg_64bit_post_reload"
+  [(call (mem:SI (match_operand:DI 0 "register_operand" "r"))
+	 (match_operand 1 "" "i"))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, operands[0]);
 }"
   [(set_attr "type" "dyncall")
-   (set (attr "length")
-     (cond [
-;; First FAST_INDIRECT_CALLS
-	    (ne (symbol_ref "TARGET_FAST_INDIRECT_CALLS")
-		(const_int 0))
-	    (const_int 8)
-
-;; Target (or stub) within reach
-	    (and (lt (plus (symbol_ref "total_code_bytes") (pc))
-		     (const_int 240000))
-		 (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
-		     (const_int 0)))
-	    (const_int 8)
-
-;; Out of reach PIC
-	    (ne (symbol_ref "flag_pic")
-		(const_int 0))
-	    (const_int 24)
-
-;; Out of reach PORTABLE_RUNTIME
-	    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
-		(const_int 0))
-	    (const_int 20)]
-
-;; Out of reach, can use ble
-	  (const_int 12)))])
+   (set (attr "length") (symbol_ref "attr_length_indirect_call (insn)"))])
 
 (define_expand "call_value"
   [(parallel [(set (match_operand 0 "" "")
@@ -6068,67 +6335,111 @@
   ""
   "
 {
-  rtx op;
-  rtx call_insn;
+  rtx op, call_insn;
+  rtx dst = operands[0];
+  rtx nb = operands[2];
 
   if (TARGET_PORTABLE_RUNTIME)
-    op = force_reg (word_mode, XEXP (operands[1], 0));
+    op = force_reg (SImode, XEXP (operands[1], 0));
   else
     op = XEXP (operands[1], 0);
 
   if (TARGET_64BIT)
-    emit_move_insn (arg_pointer_rtx,
-		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
-				  GEN_INT (64)));
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  if (INTVAL (nb) > current_function_outgoing_args_size)
+	    abort ();
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
 
   /* Use two different patterns for calls to explicitly named functions
      and calls through function pointers.  This is necessary as these two
      types of calls use different calling conventions, and CSE might try
      to change the named call into an indirect call in some cases (using
-     two patterns keeps CSE from performing this optimization).  */
-  if (GET_CODE (op) == SYMBOL_REF)
-    call_insn = emit_call_insn (gen_call_value_internal_symref (operands[0],
-								op,
-								operands[2]));
-  else if (TARGET_64BIT)
+     two patterns keeps CSE from performing this optimization).
+
+     We now use even more call patterns as there was a subtle bug in
+     attempting to restore the pic register after a call using a simple
+     move insn.  During reload, a instruction involving a pseudo register
+     with no explicit dependence on the PIC register can be converted
+     to an equivalent load from memory using the PIC register.  If we
+     emit a simple move to restore the PIC register in the initial rtl
+     generation, then it can potentially be repositioned during scheduling.
+     and an instruction that eventually uses the PIC register may end up
+     between the call and the PIC register restore.
+
+     This only worked because there is a post call group of instructions
+     that are scheduled with the call.  These instructions are included
+     in the same basic block as the call.  However, calls can throw in
+     C++ code and a basic block has to terminate at the call if the call
+     can throw.  This results in the PIC register restore being scheduled
+     independently from the call.  So, we now hide the save and restore
+     of the PIC register in the call pattern until after reload.  Then,
+     we split the moves out.  A small side benefit is that we now don't
+     need to have a use of the PIC register in the return pattern and
+     the final save/restore operation is not needed.
+
+     I elected to just clobber %r4 in the PIC patterns and use it instead
+     of trying to force hppa_pic_save_rtx () to a callee saved register.
+     This might have required a new register class and constraint.  It
+     was also simpler to just handle the restore from a register than a
+     generic pseudo.  */
+  if (TARGET_64BIT)
     {
-      rtx tmpreg = force_reg (word_mode, op);
-      call_insn
-	= emit_call_insn (gen_call_value_internal_reg_64bit (operands[0],
-							     tmpreg,
-							     operands[2]));
+      if (GET_CODE (op) == SYMBOL_REF)
+	call_insn = emit_call_insn (gen_call_val_symref_64bit (dst, op, nb));
+      else
+	{
+	  op = force_reg (word_mode, op);
+	  call_insn = emit_call_insn (gen_call_val_reg_64bit (dst, op, nb));
+	}
     }
   else
     {
-      rtx tmpreg = gen_rtx_REG (word_mode, 22);
-      emit_move_insn (tmpreg, force_reg (word_mode, op));
-      call_insn = emit_call_insn (gen_call_value_internal_reg (operands[0],
-							       operands[2]));
-    }
-
-  if (TARGET_64BIT)
-    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
-
-  if (flag_pic)
-    {
-      use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
+      if (GET_CODE (op) == SYMBOL_REF)
+	{
+	  if (flag_pic)
+	    call_insn = emit_call_insn (gen_call_val_symref_pic (dst, op, nb));
+	  else
+	    call_insn = emit_call_insn (gen_call_val_symref (dst, op, nb));
+	}
+      else
+	{
+	  rtx tmpreg = gen_rtx_REG (word_mode, 22);
 
-      /* After each call we must restore the PIC register, even if it
-	 doesn't appear to be used.  */
-      emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
+	  emit_move_insn (tmpreg, force_reg (word_mode, op));
+	  if (flag_pic)
+	    call_insn = emit_call_insn (gen_call_val_reg_pic (dst, nb));
+	  else
+	    call_insn = emit_call_insn (gen_call_val_reg (dst, nb));
+	}
     }
+
   DONE;
 }")
 
-(define_insn "call_value_internal_symref"
+(define_insn "call_val_symref"
   [(set (match_operand 0 "" "")
 	(call (mem:SI (match_operand 1 "call_operand_address" ""))
 	      (match_operand 2 "" "i")))
    (clobber (reg:SI 1))
    (clobber (reg:SI 2))
    (use (const_int 0))]
-  ;;- Don't use operand 1 for most machines.
-  "! TARGET_PORTABLE_RUNTIME"
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
   "*
 {
   output_arg_descriptor (insn);
@@ -6137,104 +6448,380 @@
   [(set_attr "type" "call")
    (set (attr "length") (symbol_ref "attr_length_call (insn, 0)"))])
 
-(define_insn "call_value_internal_reg_64bit"
+(define_insn "call_val_symref_pic"
   [(set (match_operand 0 "" "")
-         (call (mem:SI (match_operand:DI 1 "register_operand" "r"))
-	       (match_operand 2 "" "i")))
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
    (clobber (reg:SI 2))
-   (use (const_int 1))]
+   (clobber (reg:SI 4))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(plus (symbol_ref "attr_length_call (insn, 0)")
+	      (symbol_ref "attr_length_save_restore_dltp (insn)")))])
+
+;; Split out the PIC register save and restore after reload.  This is
+;; done only if the function returns.  As the split is done after reload,
+;; there are some situations in which we unnecessarily save and restore
+;; %r4.  This happens when there is a single call and the PIC register
+;; is "dead" after the call.  This isn't easy to fix as the usage of
+;; the PIC register isn't completely determined until the reload pass.
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:SI 4))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT
+   && reload_completed
+   && !find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (reg:SI 4) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])
+   (set (reg:SI 19) (reg:SI 4))]
+  "")
+
+;; Remove the clobber of register 4 when optimizing.  This has to be
+;; done with a peephole optimization rather than a split because the
+;; split sequence for a call must be longer than one instruction.
+(define_peephole2
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:SI 4))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT && reload_completed"
+  [(parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 0))])]
+  "")
+
+(define_insn "*call_val_symref_pic_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 0))]
+  "!TARGET_PORTABLE_RUNTIME && !TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length") (symbol_ref "attr_length_call (insn, 0)"))])
+
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_val_symref_64bit"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (clobber (reg:DI 4))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
   "TARGET_64BIT"
   "*
 {
-  /* ??? Needs more work.  Length computation, split into multiple insns,
-     expose delay slot.  */
-  return \"ldd 16(%1),%%r2\;bve,l (%%r2),%%r2\;ldd 24(%1),%%r27\";
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[1], 0);
 }"
-  [(set_attr "type" "dyncall")
-   (set (attr "length") (const_int 12))])
+  [(set_attr "type" "call")
+   (set (attr "length")
+	(plus (symbol_ref "attr_length_call (insn, 0)")
+	      (symbol_ref "attr_length_save_restore_dltp (insn)")))])
+
+;; Split out the PIC register save and restore after reload.  This is
+;; done only if the function returns.  As the split is done after reload,
+;; there are some situations in which we unnecessarily save and restore
+;; %r4.  This happens when there is a single call and the PIC register
+;; is "dead" after the call.  This isn't easy to fix as the usage of
+;; the PIC register isn't completely determined until the reload pass.
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DI 4))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT
+   && reload_completed
+   && !find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (reg:DI 4) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])
+   (set (reg:DI 27) (reg:DI 4))]
+  "")
+
+;; Remove the clobber of register 4 when optimizing.  This has to be
+;; done with a peephole optimization rather than a split because the
+;; split sequence for a call must be longer than one instruction.
+(define_peephole2
+  [(parallel [(set (match_operand 0 "" "")
+	      (call (mem:SI (match_operand 1 "call_operand_address" ""))
+		    (match_operand 2 "" "")))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DI 4))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(set (match_dup 0)
+	      (call (mem:SI (match_dup 1))
+		    (match_dup 2)))
+	      (clobber (reg:DI 1))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 0))])]
+  "")
 
-(define_insn "call_value_internal_reg"
+(define_insn "*call_val_symref_64bit_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand 1 "call_operand_address" ""))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 1))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 0))]
+  "TARGET_64BIT"
+  "*
+{
+  output_arg_descriptor (insn);
+  return output_call (insn, operands[1], 0);
+}"
+  [(set_attr "type" "call")
+   (set (attr "length") (symbol_ref "attr_length_call (insn, 0)"))])
+
+(define_insn "call_val_reg"
   [(set (match_operand 0 "" "")
 	(call (mem:SI (reg:SI 22))
 	      (match_operand 1 "" "i")))
    (clobber (reg:SI 1))
    (clobber (reg:SI 2))
    (use (const_int 1))]
-  ""
+  "!TARGET_64BIT"
   "*
 {
-  rtx xoperands[2];
+  return output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length") (symbol_ref "attr_length_indirect_call (insn)"))])
 
-  /* First the special case for kernels, level 0 systems, etc.  */
-  if (TARGET_FAST_INDIRECT_CALLS)
-    return \"ble 0(%%sr4,%%r22)\;copy %%r31,%%r2\";
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_val_reg_pic"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (reg:SI 22))
+	      (match_operand 1 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (clobber (reg:SI 4))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(plus (symbol_ref "attr_length_indirect_call (insn)")
+	      (symbol_ref "attr_length_save_restore_dltp (insn)")))])
+
+;; Split out the PIC register save and restore after reload.  This is
+;; done only if the function returns.  As the split is done after reload,
+;; there are some situations in which we unnecessarily save and restore
+;; %r4.  This happens when there is a single call and the PIC register
+;; is "dead" after the call.  This isn't easy to fix as the usage of
+;; the PIC register isn't completely determined until the reload pass.
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (reg:SI 22))
+			 (match_operand 1 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:SI 4))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT
+   && reload_completed
+   && !find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (reg:SI 4) (reg:SI 19))
+   (parallel [(set (match_dup 0)
+		   (call (mem:SI (reg:SI 22))
+			 (match_dup 1)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])
+   (set (reg:SI 19) (reg:SI 4))]
+  "")
 
-  /* Now the normal case -- we can reach $$dyncall directly or
-     we're sure that we can get there via a long-branch stub. 
+;; Remove the clobber of register 4 when optimizing.  This has to be
+;; done with a peephole optimization rather than a split because the
+;; split sequence for a call must be longer than one instruction.
+(define_peephole2
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (reg:SI 22))
+			 (match_operand 1 "" "")))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (clobber (reg:SI 4))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "!TARGET_64BIT && reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (call (mem:SI (reg:SI 22))
+			 (match_dup 1)))
+	      (clobber (reg:SI 1))
+	      (clobber (reg:SI 2))
+	      (use (reg:SI 19))
+	      (use (const_int 1))])]
+  "")
 
-     No need to check target flags as the length uniquely identifies
-     the remaining cases.  */
-  if (get_attr_length (insn) == 8)
-    return \".CALL\\tARGW0=GR\;{bl|b,l} $$dyncall,%%r31\;copy %%r31,%%r2\";
+(define_insn "*call_val_reg_pic_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (reg:SI 22))
+	      (match_operand 1 "" "i")))
+   (clobber (reg:SI 1))
+   (clobber (reg:SI 2))
+   (use (reg:SI 19))
+   (use (const_int 1))]
+  "!TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, gen_rtx_REG (word_mode, 22));
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length") (symbol_ref "attr_length_indirect_call (insn)"))])
 
-  /* Long millicode call, but we are not generating PIC or portable runtime
-     code.  */
-  if (get_attr_length (insn) == 12)
-    return \".CALL\\tARGW0=GR\;ldil L%%$$dyncall,%%r2\;ble R%%$$dyncall(%%sr4,%%r2)\;copy %%r31,%%r2\";
+;; This pattern is split if it is necessary to save and restore the
+;; PIC register.
+(define_insn "call_val_reg_64bit"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "register_operand" "r"))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 2))
+   (clobber (reg:DI 4))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, operands[1]);
+}"
+  [(set_attr "type" "dyncall")
+   (set (attr "length")
+	(plus (symbol_ref "attr_length_indirect_call (insn)")
+	      (symbol_ref "attr_length_save_restore_dltp (insn)")))])
+
+;; Split out the PIC register save and restore after reload.  This is
+;; done only if the function returns.  As the split is done after reload,
+;; there are some situations in which we unnecessarily save and restore
+;; %r4.  This happens when there is a single call and the PIC register
+;; is "dead" after the call.  This isn't easy to fix as the usage of
+;; the PIC register isn't completely determined until the reload pass.
+(define_split
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (match_operand:DI 1 "register_operand" ""))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DI 4))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT
+   && reload_completed
+   && !find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  [(set (reg:DI 4) (reg:DI 27))
+   (parallel [(set (match_dup 0)
+		   (call (mem:SI (match_dup 1))
+			 (match_dup 2)))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])
+   (set (reg:DI 27) (reg:DI 4))]
+  "")
 
-  /* Long millicode call for portable runtime.  */
-  if (get_attr_length (insn) == 20)
-    return \"ldil L%%$$dyncall,%%r31\;ldo R%%$$dyncall(%%r31),%%r31\;blr %%r0,%%r2\;bv,n %%r0(%%r31)\;nop\";
+;; Remove the clobber of register 4 when optimizing.  This has to be
+;; done with a peephole optimization rather than a split because the
+;; split sequence for a call must be longer than one instruction.
+(define_peephole2
+  [(parallel [(set (match_operand 0 "" "")
+		   (call (mem:SI (match_operand:DI 1 "register_operand" ""))
+			 (match_operand 2 "" "")))
+	      (clobber (reg:DI 2))
+	      (clobber (reg:DI 4))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "TARGET_64BIT && reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (call (mem:SI (match_dup 1))
+			 (match_dup 2)))
+	      (clobber (reg:DI 2))
+	      (use (reg:DI 27))
+	      (use (reg:DI 29))
+	      (use (const_int 1))])]
+  "")
 
-  /* If we're generating PIC code.  */
-  xoperands[0] = operands[1];
-  if (TARGET_SOM || ! TARGET_GAS)
-    xoperands[1] = gen_label_rtx ();
-  output_asm_insn (\"{bl|b,l} .+8,%%r1\", xoperands);
-  if (TARGET_SOM || ! TARGET_GAS)
-    {
-      output_asm_insn (\"addil L%%$$dyncall-%1,%%r1\", xoperands);
-      ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, \"L\",
-				 CODE_LABEL_NUMBER (xoperands[1]));
-      output_asm_insn (\"ldo R%%$$dyncall-%1(%%r1),%%r1\", xoperands);
-    }
-  else
-    {
-      output_asm_insn (\"addil L%%$$dyncall-$PIC_pcrel$0+4,%%r1\", xoperands);
-      output_asm_insn (\"ldo R%%$$dyncall-$PIC_pcrel$0+8(%%r1),%%r1\",
-      		       xoperands);
-    }
-  output_asm_insn (\"blr %%r0,%%r2\", xoperands);
-  output_asm_insn (\"bv,n %%r0(%%r1)\\n\\tnop\", xoperands);
-  return \"\";
+(define_insn "*call_val_reg_64bit_post_reload"
+  [(set (match_operand 0 "" "")
+	(call (mem:SI (match_operand:DI 1 "register_operand" "r"))
+	      (match_operand 2 "" "i")))
+   (clobber (reg:DI 2))
+   (use (reg:DI 27))
+   (use (reg:DI 29))
+   (use (const_int 1))]
+  "TARGET_64BIT"
+  "*
+{
+  return output_indirect_call (insn, operands[1]);
 }"
   [(set_attr "type" "dyncall")
-   (set (attr "length")
-     (cond [
-;; First FAST_INDIRECT_CALLS
-	    (ne (symbol_ref "TARGET_FAST_INDIRECT_CALLS")
-		(const_int 0))
-	    (const_int 8)
-
-;; Target (or stub) within reach
-	    (and (lt (plus (symbol_ref "total_code_bytes") (pc))
-		     (const_int 240000))
-		 (eq (symbol_ref "TARGET_PORTABLE_RUNTIME")
-		     (const_int 0)))
-	    (const_int 8)
-
-;; Out of reach PIC
-	    (ne (symbol_ref "flag_pic")
-		(const_int 0))
-	    (const_int 24)
-
-;; Out of reach PORTABLE_RUNTIME
-	    (ne (symbol_ref "TARGET_PORTABLE_RUNTIME")
-		(const_int 0))
-	    (const_int 20)]
-
-;; Out of reach, can use ble
-	  (const_int 12)))])
+   (set (attr "length") (symbol_ref "attr_length_indirect_call (insn)"))])
 
 ;; Call subroutine returning any type.
 
@@ -6271,15 +6858,32 @@
   "!TARGET_PORTABLE_RUNTIME"
   "
 {
-  rtx op;
-  rtx call_insn;
+  rtx op, call_insn;
+  rtx nb = operands[1];
 
   op = XEXP (operands[0], 0);
 
   if (TARGET_64BIT)
-    emit_move_insn (arg_pointer_rtx,
-		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
-				  GEN_INT (64)));
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  if (INTVAL (nb) > current_function_outgoing_args_size)
+	    abort ();
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
 
   /* Indirect sibling calls are not allowed.  */
   if (TARGET_64BIT)
@@ -6292,14 +6896,10 @@
   if (TARGET_64BIT)
     use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
 
+  /* We don't have to restore the PIC register.  */
   if (flag_pic)
-    {
-      use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
 
-      /* After each call we must restore the PIC register, even if it
-	 doesn't appear to be used.  */
-      emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
-    }
   DONE;
 }")
 
@@ -6321,9 +6921,8 @@
 (define_insn "sibcall_internal_symref_64bit"
   [(call (mem:SI (match_operand 0 "call_operand_address" ""))
 	 (match_operand 1 "" "i"))
-   (clobber (reg:SI 1))
-   (clobber (reg:SI 27))
-   (use (reg:SI 2))
+   (clobber (reg:DI 1))
+   (use (reg:DI 2))
    (use (const_int 0))]
   "TARGET_64BIT"
   "*
@@ -6341,15 +6940,32 @@
   "!TARGET_PORTABLE_RUNTIME"
   "
 {
-  rtx op;
-  rtx call_insn;
+  rtx op, call_insn;
+  rtx nb = operands[1];
 
   op = XEXP (operands[1], 0);
 
   if (TARGET_64BIT)
-    emit_move_insn (arg_pointer_rtx,
-		    gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
-				  GEN_INT (64)));
+    {
+      if (!virtuals_instantiated)
+	emit_move_insn (arg_pointer_rtx,
+			gen_rtx_PLUS (word_mode, virtual_outgoing_args_rtx,
+				      GEN_INT (64)));
+      else
+	{
+	  /* The loop pass can generate new libcalls after the virtual
+	     registers are instantiated when fpregs are disabled because
+	     the only method that we have for doing DImode multiplication
+	     is with a libcall.  This could be trouble if we haven't
+	     allocated enough space for the outgoing arguments.  */
+	  if (INTVAL (nb) > current_function_outgoing_args_size)
+	    abort ();
+
+	  emit_move_insn (arg_pointer_rtx,
+			  gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+					GEN_INT (STACK_POINTER_OFFSET + 64)));
+	}
+    }
 
   /* Indirect sibling calls are not allowed.  */
   if (TARGET_64BIT)
@@ -6364,14 +6980,10 @@
   if (TARGET_64BIT)
     use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), arg_pointer_rtx);
 
+  /* We don't have to restore the PIC register.  */
   if (flag_pic)
-    {
-      use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
+    use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), pic_offset_table_rtx);
 
-      /* After each call we must restore the PIC register, even if it
-	 doesn't appear to be used.  */
-      emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
-    }
   DONE;
 }")
 
@@ -6395,9 +7007,8 @@
   [(set (match_operand 0 "" "")
 	(call (mem:SI (match_operand 1 "call_operand_address" ""))
 	      (match_operand 2 "" "i")))
-   (clobber (reg:SI 1))
-   (clobber (reg:SI 27))
-   (use (reg:SI 2))
+   (clobber (reg:DI 1))
+   (use (reg:DI 2))
    (use (const_int 0))]
   "TARGET_64BIT"
   "*
@@ -6447,7 +7058,8 @@
 
 ;;; EH does longjmp's from and within the data section.  Thus,
 ;;; an interspace branch is required for the longjmp implementation.
-;;; Registers r1 and r2 are used as scratch registers for the jump.
+;;; Registers r1 and r2 are used as scratch registers for the jump
+;;; when necessary.
 (define_expand "interspace_jump"
   [(parallel
      [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
@@ -6461,6 +7073,22 @@
 (define_insn ""
   [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
   (clobber (reg:SI 2))]
+  "TARGET_PA_20 && !TARGET_64BIT"
+  "bve%* (%0)"
+   [(set_attr "type" "branch")
+    (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+  (clobber (reg:SI 2))]
+  "TARGET_NO_SPACE_REGS && !TARGET_64BIT"
+  "be%* 0(%%sr4,%0)"
+   [(set_attr "type" "branch")
+    (set_attr "length" "4")])
+
+(define_insn ""
+  [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
+  (clobber (reg:SI 2))]
   "!TARGET_64BIT"
   "ldsid (%%sr0,%0),%%r2\; mtsp %%r2,%%sr0\; be%* 0(%%sr0,%0)"
    [(set_attr "type" "branch")
@@ -6470,9 +7098,9 @@
   [(set (pc) (match_operand 0 "pmode_register_operand" "a"))
   (clobber (reg:DI 2))]
   "TARGET_64BIT"
-  "ldsid (%%sr0,%0),%%r2\; mtsp %%r2,%%sr0\; be%* 0(%%sr0,%0)"
+  "bve%* (%0)"
    [(set_attr "type" "branch")
-    (set_attr "length" "12")])
+    (set_attr "length" "4")])
 
 (define_expand "builtin_longjmp"
   [(unspec_volatile [(match_operand 0 "register_operand" "r")] 3)]
@@ -6523,6 +7151,10 @@
   ""
   "
 {
+  /* PA extraction insns don't support zero length bitfields.  */
+  if (INTVAL (operands[2]) == 0)
+    FAIL;
+
   if (TARGET_64BIT)
     emit_insn (gen_extzv_64 (operands[0], operands[1],
 			     operands[2], operands[3]));
@@ -6585,6 +7217,10 @@
   ""
   "
 {
+  /* PA extraction insns don't support zero length bitfields.  */
+  if (INTVAL (operands[2]) == 0)
+    FAIL;
+
   if (TARGET_64BIT)
     emit_insn (gen_extv_64 (operands[0], operands[1],
 			    operands[2], operands[3]));
@@ -7340,7 +7976,9 @@
 						    \"$$sh_func_adrs\"));
 }"
   [(set_attr "type" "multi")
-   (set (attr "length") (symbol_ref "attr_length_millicode_call (insn, 20)"))])
+   (set (attr "length")
+	(plus (symbol_ref "attr_length_millicode_call (insn)")
+	      (const_int 20)))])
 
 ;; On the PA, the PIC register is call clobbered, so it must
 ;; be saved & restored around calls by the caller.  If the call
@@ -7361,6 +7999,7 @@
   /* Restore the PIC register using hppa_pic_save_rtx ().  The
      PIC register is not saved in the frame in 64-bit ABI.  */
   emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
+  emit_insn (gen_blockage ());
   DONE;
 }")
 
@@ -7375,5 +8014,41 @@
      a stack slot.  The only registers that are valid after a
      builtin_longjmp are the stack and frame pointers.  */
   emit_move_insn (pic_offset_table_rtx, hppa_pic_save_rtx ());
+  emit_insn (gen_blockage ());
+  DONE;
+}")
+
+;; Allocate new stack space and update the saved stack pointer in the
+;; frame marker.  The HP C compilers also copy additional words in the
+;; frame marker.  The 64-bit compiler copies words at -48, -32 and -24.
+;; The 32-bit compiler copies the word at -16 (Static Link).  We
+;; currently don't copy these values.
+;;
+;; Since the copy of the frame marker can't be done atomically, I
+;; suspect that using it for unwind purposes may be somewhat unreliable.
+;; The HP compilers appear to raise the stack and copy the frame
+;; marker in a strict instruction sequence.  This suggests that the
+;; unwind library may check for an alloca sequence when ALLOCA_FRAME
+;; is set in the callinfo data.  We currently don't set ALLOCA_FRAME
+;; as GAS doesn't support it, or try to keep the instructions emitted
+;; here in strict sequence.
+(define_expand "allocate_stack"
+  [(match_operand 0 "" "")
+   (match_operand 1 "" "")]
+  ""
+  "
+{
+  /* Since the stack grows upward, we need to store virtual_stack_dynamic_rtx
+     in operand 0 before adjusting the stack.  */
+  emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+  anti_adjust_stack (operands[1]);
+  if (TARGET_HPUX_UNWIND_LIBRARY)
+    {
+      rtx dst = gen_rtx_MEM (word_mode,
+  			     gen_rtx_PLUS (word_mode, stack_pointer_rtx,
+			  		   GEN_INT (TARGET_64BIT ? -8 : -4)));
+
+      emit_move_insn (dst, frame_pointer_rtx);
+    }
   DONE;
 }")
diff --git a/gcc/config/pa/pa32-regs.h b/gcc/config/pa/pa32-regs.h
index f5c6aaa615c..ac23a69173a 100644
--- a/gcc/config/pa/pa32-regs.h
+++ b/gcc/config/pa/pa32-regs.h
@@ -259,7 +259,6 @@ enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
    : (REGNO) < 32 ? GENERAL_REGS					\
    : (REGNO) < 56 ? FP_REGS						\
    : (REGNO) < 88 ? FPUPPER_REGS					\
-   : (REGNO) < 88 ? FPUPPER_REGS					\
    : SHIFT_REGS)
 
 /* Get reg_class from a letter such as appears in the machine description.  */
@@ -267,7 +266,6 @@ enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
 #define REG_CLASS_FROM_LETTER(C) \
   ((C) == 'f' ? FP_REGS :					\
    (C) == 'y' ? FPUPPER_REGS :					\
-   (C) == 'y' ? FPUPPER_REGS :					\
    (C) == 'x' ? FP_REGS :					\
    (C) == 'q' ? SHIFT_REGS :					\
    (C) == 'a' ? R1_REGS :					\
diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h
index aa7dac75382..78fb4cf5112 100644
--- a/gcc/config/pa/pa64-hpux.h
+++ b/gcc/config/pa/pa64-hpux.h
@@ -47,10 +47,12 @@ Boston, MA 02111-1307, USA.  */
 #undef LIB_SPEC
 #define LIB_SPEC \
   "%{!shared:\
-     %{!p:\
-       %{!pg: %{!threads:-lc} %{threads:-lcma -lc_r}}\
-       %{pg: -L/usr/lib/pa20_64/libp/ -lgprof -lc}}\
-     %{p: -L/usr/lib/pa20_64/libp/ -lprof -lc}} /usr/lib/pa20_64/milli.a"
+     %{!p:%{!pg: -lc %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
+     %{pg: -L/usr/lib/pa20_64/libp/ -lgprof -lc\
+       %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}\
+     %{p: -L/usr/lib/pa20_64/libp/ -lprof -lc\
+       %{static:%{!nolibdld:-a shared -ldld -a archive -lc}}}}\
+   /usr/lib/pa20_64/milli.a"
 
 /* Under hpux11, the normal location of the `ld' and `as' programs is the
    /usr/ccs/bin directory.  */
@@ -112,6 +114,13 @@ do {								\
 #define DATA_SECTION_ASM_OP	"\t.data"
 #define BSS_SECTION_ASM_OP	"\t.section\t.bss"
 
+#define JCR_SECTION_NAME	".jcr"
+
+#define HP_INIT_ARRAY_SECTION_ASM_OP	"\t.section\t.init"
+#define GNU_INIT_ARRAY_SECTION_ASM_OP	"\t.section\t.init_array"
+#define HP_FINI_ARRAY_SECTION_ASM_OP	"\t.section\t.fini"
+#define GNU_FINI_ARRAY_SECTION_ASM_OP	"\t.section\t.fini_array"
+
 #undef ASM_OUTPUT_ALIGNED_COMMON
 #define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)		\
 do {									\
@@ -224,55 +233,175 @@ do {								\
 } while (0)
 
 #undef TEXT_SECTION_ASM_OP
-#define TEXT_SECTION_ASM_OP "\t.SUBSPA $CODE$\n"
+#define TEXT_SECTION_ASM_OP		"\t.SUBSPA $CODE$\n"
 #undef READONLY_DATA_SECTION_ASM_OP
-#define READONLY_DATA_SECTION_ASM_OP "\t.SUBSPA $LIT$\n"
+#define READONLY_DATA_SECTION_ASM_OP	"\t.SUBSPA $LIT$\n"
 #undef DATA_SECTION_ASM_OP
-#define DATA_SECTION_ASM_OP "\t.SUBSPA $DATA$\n"
+#define DATA_SECTION_ASM_OP		"\t.SUBSPA $DATA$\n"
 #undef BSS_SECTION_ASM_OP
-#define BSS_SECTION_ASM_OP "\t.SUBSPA $BSS$\n"
+#define BSS_SECTION_ASM_OP		"\t.SUBSPA $BSS$\n"
+
+/* We provide explicit defines for CTORS_SECTION_ASM_OP and
+   DTORS_SECTION_ASM_OP since we don't yet have support for
+   named sections with the HP assembler.  */
+#undef CTORS_SECTION_ASM_OP
+#define CTORS_SECTION_ASM_OP "\t.SUBSPA \\.ctors,QUAD=1,ALIGN=8,ACCESS=31"
+#undef DTORS_SECTION_ASM_OP
+#define DTORS_SECTION_ASM_OP "\t.SUBSPA \\.dtors,QUAD=1,ALIGN=8,ACCESS=31"
+
+#define HP_INIT_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.init,QUAD=1,ALIGN=8,ACCESS=31"
+#define GNU_INIT_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.init_array,QUAD=1,ALIGN=8,ACCESS=31"
+#define HP_FINI_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.fini,QUAD=1,ALIGN=8,ACCESS=31"
+#define GNU_FINI_ARRAY_SECTION_ASM_OP \
+  "\t.SUBSPA \\.fini_array,QUAD=1,ALIGN=8,ACCESS=31"
 
 #endif /* USING_ELFOS_H */
 
-/* For the time being, we aren't using init sections.  `P' relocations
-   are currently used for function references.  However, P relocations are
-   treated as data references and data references are bound by dld.sl
-   immediately at program startup.  This causes an abort due to undefined
-   weak symbols in crtbegin.o (e.g., __register_frame_info).  Possibly
-   Q relocations might avoid this problem but the GNU assembler doesn't
-   support them.  */
+/* The following defines, used to run constructors and destructors with
+   the SOM linker under HP-UX 11, are not needed.  */
+#undef HAS_INIT_SECTION
+#undef LD_INIT_SWITCH
+#undef LD_FINI_SWITCH
+
+/* The following STARTFILE_SPEC and ENDFILE_SPEC defines provide the
+   magic needed to run initializers and finalizers.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  "%{!shared: %{!symbolic: crt0.o%s}} %{static:crtbeginT.o%s} \
+   %{!static:%{!shared:crtbegin.o%s} %{shared:crtbeginS.o%s}}"
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend.o%s} %{shared:crtendS.o%s}"
+
+/* Since HP uses the .init and .fini sections for array initializers
+   and finalizers, we need different defines for INIT_SECTION_ASM_OP
+   and FINI_SECTION_ASM_OP.  With the implementation adopted below,
+   the sections are not actually used.  However, we still must provide
+   defines to select the proper code path.  */
 #undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP
 #undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP
+
+/* We are using array initializers and don't want calls in the INIT
+   and FINI sections.  */
+#undef CRT_CALL_STATIC_FUNCTION
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)
+
+/* The init_priority attribute is not supported with HP ld.  This could be
+   supported if collect2 was used with LD_INIT_SWITCH.  Unfortunately, this
+   approach doesn't work with GNU ld since HP-UX doesn't support DT_INIT,
+   and therefore the -init and -fini GNU ld switches.  */
+#undef SUPPORTS_INIT_PRIORITY
+#define SUPPORTS_INIT_PRIORITY (TARGET_GNU_LD ? 1 : 0)
+
+/* We use DTOR_LIST_BEGIN to carry a bunch of hacks to allow us to use
+   the init and fini array sections with both the HP and GNU linkers.
+   The linkers setup the required dynamic entries in the dynamic segment
+   and the dynamic linker does the calls.  This approach avoids using
+   collect2.
+
+   The first hack is to implement __do_global_ctors_aux in crtbegin as
+   it needs to be the first entry in the init array so that it is called
+   last.  HP got the order of the init array backwards.  The DT_INIT_ARRAY
+   is supposed to be executed in the same order as the addresses appear in
+   the array.  DT_FINI_ARRAY is supposed to be executed in the opposite
+   order.
+
+   The second hack is stubs for __cxa_finalize and _Jv_RegisterClasses.
+   The HP implementation of undefined weak symbols is broken.  The linker
+   and dynamic loader both search for undefined weak symbols contrary the
+   generic System V ABI.  An undefined weak symbol should resolve to a
+   value of 0 rather than causing an error.  The prototypes for
+   __cxa_finalize and _Jv_RegisterClasses in crtstuff.c are weak when
+   weak is supported (GNU as), so in theory a strong define should override
+   the stub functions provided here.
+
+   The final hack is a set of plabels to implement the effect of
+   CRT_CALL_STATIC_FUNCTION.  HP-UX 11 only supports DI_INIT_ARRAY and
+   DT_FINI_ARRAY and they put the arrays in .init and .fini, rather than
+   in .init_array and .fini_array.  The standard defines for .init and
+   .fini have the execute flag set.  So, the assembler has to be hacked
+   to munge the standard flags for these sections to make them agree
+   with what the HP linker expects.  With the GNU linker, we need to
+   used the .init_array and .fini_array sections.  So, we set up for
+   both just in case.  Once we have built the table, the linker does
+   the rest of the work.
+
+   The order is significant.  Placing __do_global_ctors_aux first in
+   the list, results in it being called last.  User specified initializers,
+   either using the linker +init command or a plabel, run before the
+   initializers specified here.  */
+
+/* We need a __cxa_finalize stub if CRTSTUFFS_O is defined.  */
+#ifdef CRTSTUFFS_O
+#define PA_CXA_FINALIZE_STUB \
+extern void __cxa_finalize (void *) TARGET_ATTRIBUTE_WEAK;		\
+void									\
+__cxa_finalize (void *p __attribute__((unused))) {}
+#else
+#define PA_CXA_FINALIZE_STUB
+#endif
 
-#define EH_FRAME_IN_DATA_SECTION 1
+/* We need a _Jv_RegisterClasses stub if JCR_SECTION_NAME is defined.  */
+#ifdef JCR_SECTION_NAME
+#define PA_JV_REGISTERCLASSES_STUB \
+void									\
+_Jv_RegisterClasses (void *p __attribute__((unused))) {}
+#else
+#define PA_JV_REGISTERCLASSES_STUB
+#endif
 
-#undef ENDFILE_SPEC
-#define ENDFILE_SPEC ""
+/* We need to add frame_dummy to the initializer list if EH_FRAME_SECTION_NAME
+   or JCR_SECTION_NAME is defined.  */
+#if defined(EH_FRAME_SECTION_NAME) || defined(JCR_SECTION_NAME)
+#define PA_INIT_FRAME_DUMMY_ASM_OP ".dword P%frame_dummy"
+#else
+#define PA_INIT_FRAME_DUMMY_ASM_OP ""
+#endif
 
-#undef STARTFILE_SPEC
-#define STARTFILE_SPEC "%{!shared: %{!symbolic: crt0.o%s}}"
-
-/* Since we are not yet using .init and .fini sections, we need to
-   explicitly arrange to run the global constructors and destructors.
-   We could use ldd for this but it depends on LD_LIBRARY_PATH being
-   correctly set.  So, we use the ld init and fini switches. However,
-   we need to support different switches for the GNU and HP linkers.
-   We can't check TARGET_GNU_LD in collect2, so we need a different
-   test.  The +Accept switch is always the first switch when we are
-   using the HP linker (see define for LINK_SPEC).  Checking for it
-   is a somewhat fragile as it depends on internal details of the
-   collect2 program but it is better than testing ld_file_name.
-
-   FIXME: The GNU linker is broken.  The -init/-fini switches don't
-   work and ldd can't determine the dynamic dependences of executables
-   linked with GNU ld.  The init and fini routines are not executed
-   although DT_INIT and DT_FINI appear ok.  As a result, defining
-   LD_INIT_SWITCH and LD_FINI_SWITCH causes more harm than good when
-   using GNU ld.  However, the definitions appear to work fine with
-   the HP linker.  */
-#if 0
-#define LD_INIT_SWITCH (strcmp ("+Accept", ld2_argv[1]) ? "-init" : "+init")
-#define LD_FINI_SWITCH (strcmp ("+Accept", ld2_argv[1]) ? "-fini" : "+fini")
+#define PA_INIT_FINI_HACK \
+static void __attribute__((used))					\
+__do_global_ctors_aux (void)						\
+{									\
+  func_ptr *p = __CTOR_LIST__;						\
+  while (*(p + 1))							\
+    p++;								\
+  for (; *p != (func_ptr) -1; p--)					\
+    (*p) ();								\
+}									\
+									\
+PA_CXA_FINALIZE_STUB							\
+PA_JV_REGISTERCLASSES_STUB						\
+									\
+asm (HP_INIT_ARRAY_SECTION_ASM_OP);					\
+asm (".dword P%__do_global_ctors_aux");					\
+asm (PA_INIT_FRAME_DUMMY_ASM_OP);					\
+asm (GNU_INIT_ARRAY_SECTION_ASM_OP);					\
+asm (".dword P%__do_global_ctors_aux");					\
+asm (PA_INIT_FRAME_DUMMY_ASM_OP);					\
+asm (HP_FINI_ARRAY_SECTION_ASM_OP);					\
+asm (".dword P%__do_global_dtors_aux");					\
+asm (GNU_FINI_ARRAY_SECTION_ASM_OP);					\
+asm (".dword P%__do_global_dtors_aux")
+
+/* The following two variants of DTOR_LIST_BEGIN are identical to those
+   in crtstuff.c except for the addition of the above init-fini hack.  */
+#ifdef DTORS_SECTION_ASM_OP
+#define DTOR_LIST_BEGIN \
+asm (DTORS_SECTION_ASM_OP);						\
+STATIC func_ptr __DTOR_LIST__[1]					\
+  __attribute__ ((aligned(sizeof(func_ptr))))				\
+  = { (func_ptr) (-1) };						\
+PA_INIT_FINI_HACK
+#else
+#define DTOR_LIST_BEGIN \
+STATIC func_ptr __DTOR_LIST__[1]					\
+  __attribute__ ((section(".dtors"), aligned(sizeof(func_ptr))))	\
+  = { (func_ptr) (-1) };						\
+PA_INIT_FINI_HACK
 #endif
 
 /* If using HP ld do not call pxdb.  Use size as a program that does nothing
diff --git a/gcc/config/pa/pa64-regs.h b/gcc/config/pa/pa64-regs.h
index 2d0ebe3d172..ced7df737e7 100644
--- a/gcc/config/pa/pa64-regs.h
+++ b/gcc/config/pa/pa64-regs.h
@@ -240,9 +240,9 @@ enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
    we inhibit changes from SImode unless they are to a mode that is
    identical in size.  */
 
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO)			\
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
   ((FROM) == SImode && GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)       \
-   ? FP_REGS : NO_REGS)
+   ? reg_classes_intersect_p (CLASS, FP_REGS) : 0)
 
 /* Return the class number of the smallest class containing
    reg number REGNO.  This could be a conditional expression
diff --git a/gcc/config/pa/rtems.h b/gcc/config/pa/rtems.h
index b03b5c802c3..c71ac6e8441 100644
--- a/gcc/config/pa/rtems.h
+++ b/gcc/config/pa/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for PRO.
-   Copyright (C) 1997, 2000, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1997, 2000, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Joel Sherrill (joel@OARcorp.com).
 
 This file is part of GNU CC.
@@ -35,3 +35,6 @@ Boston, MA 02111-1307, USA.  */
 	builtin_assert ("system=rtems");	\
     }						\
   while (0)
+
+#undef LIB_SPEC
+#define LIB_SPEC "%{!p:%{!pg:-lc}}%{p: -lc}%{pg: -lc} -N"
diff --git a/gcc/config/pa/som.h b/gcc/config/pa/som.h
index 98c66fbfe37..7609fdbe176 100644
--- a/gcc/config/pa/som.h
+++ b/gcc/config/pa/som.h
@@ -362,10 +362,6 @@ do {						\
 /* The .align directive in the HP assembler allows up to a 32 alignment.  */
 #define MAX_OFILE_ALIGNMENT 32768
 
-/* SOM does not support the init_priority C++ attribute.  */
-#undef SUPPORTS_INIT_PRIORITY
-#define SUPPORTS_INIT_PRIORITY 0
-
 /* The SOM linker hardcodes paths into binaries.  As a result, dotdots
    must be removed from library prefixes to prevent binaries from depending
    on the location of the GCC tool directory.  The downside is GCC
@@ -375,3 +371,52 @@ do {						\
 /* Aggregates with a single float or double field should be passed and
    returned in the general registers.  */
 #define MEMBER_TYPE_FORCES_BLK(FIELD, MODE) (MODE==SFmode || MODE==DFmode)
+
+/* If GAS supports weak, we can support weak when we have working linker
+   support for secondary definitions and are generating code for GAS.  */
+#ifdef HAVE_GAS_WEAK
+#define SUPPORTS_WEAK (TARGET_SOM_SDEF && TARGET_GAS)
+#else
+#define SUPPORTS_WEAK 0
+#endif
+
+/* We can support one only if we support weak.  */
+#define SUPPORTS_ONE_ONLY SUPPORTS_WEAK
+
+/* Use weak (secondary definitions) to make one only declarations.  */
+#define MAKE_DECL_ONE_ONLY(DECL) (DECL_WEAK (DECL) = 1)
+
+/* This is how we tell the assembler that a symbol is weak.  The SOM
+   weak implementation uses the secondary definition (sdef) flag.
+
+   The behavior of sdef symbols is similar to ELF weak symbols in that
+   multiple definitions can occur without incurring a link error.
+   However, they differ in the following ways:
+     1) Undefined sdef symbols are not allowed.
+     2) The linker searches for undefined sdef symbols and will load an
+	archive library member to resolve an undefined sdef symbol.
+     3) The exported symbol from a shared library is a primary symbol
+        rather than a sdef symbol.  Thus, more care is needed in the
+	ordering of libraries.
+
+   It appears that the linker discards extra copies of "weak" functions
+   when linking shared libraries, independent of whether or not they
+   are in their own section.  In linking final executables, -Wl,-O can
+   be used to remove dead procedures.  Thus, support for named sections
+   is not needed and in previous testing caused problems with various
+   HP tools.  */
+#define ASM_WEAKEN_LABEL(FILE,NAME) \
+  do { fputs ("\t.weak\t", FILE);				\
+       assemble_name (FILE, NAME);				\
+       fputc ('\n', FILE);					\
+       if (! FUNCTION_NAME_P (NAME))				\
+	 {							\
+	   fputs ("\t.EXPORT ", FILE);				\
+	   assemble_name (FILE, NAME);				\
+	   fputs (",DATA\n", FILE);				\
+	 }							\
+  } while (0)
+
+/* We can't handle weak aliases, and therefore can't support pragma weak.
+   Suppress the use of pragma weak in gthr-dce.h and gthr-posix.h.  */
+#define GTHREAD_USE_WEAK 0
diff --git a/gcc/config/pa/t-hpux-shlib b/gcc/config/pa/t-hpux-shlib
index 52c3fd90c4f..1c924497c0b 100644
--- a/gcc/config/pa/t-hpux-shlib
+++ b/gcc/config/pa/t-hpux-shlib
@@ -13,7 +13,7 @@ SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared  -nodefaultlibs \
 # $(slibdir) double quoted to protect it from expansion while building
 # libgcc.mk.  We want this delayed until actual install time.
 SHLIB_INSTALL = $(INSTALL_DATA) -m 555 $(SHLIB_NAME) \
-	$$(slibdir)/$(SHLIB_SONAME); \
-        rm -f $$(slibdir)/$(SHLIB_NAME); \
-        $(LN_S) $(SHLIB_SONAME) $$(slibdir)/$(SHLIB_NAME)
+	$$(DESTDIR)$$(slibdir)/$(SHLIB_SONAME); \
+        rm -f $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME); \
+        $(LN_S) $(SHLIB_SONAME) $$(DESTDIR)$$(slibdir)/$(SHLIB_NAME)
 
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index f1387c0c898..1e2d8c8d408 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -1102,43 +1102,43 @@ vec_cmple (vector float a1, vector float a2)
 inline vector signed char
 vec_cmplt (vector unsigned char a1, vector unsigned char a2)
 {
-  return (vector signed char) __builtin_altivec_vcmpgtub ((vector signed char) a1, (vector signed char) a2);
+  return (vector signed char) __builtin_altivec_vcmpgtub ((vector signed char) a2, (vector signed char) a1);
 }
 
 inline vector signed char
 vec_cmplt (vector signed char a1, vector signed char a2)
 {
-  return (vector signed char) __builtin_altivec_vcmpgtsb ((vector signed char) a1, (vector signed char) a2);
+  return (vector signed char) __builtin_altivec_vcmpgtsb ((vector signed char) a2, (vector signed char) a1);
 }
 
 inline vector signed short
 vec_cmplt (vector unsigned short a1, vector unsigned short a2)
 {
-  return (vector signed short) __builtin_altivec_vcmpgtuh ((vector signed short) a1, (vector signed short) a2);
+  return (vector signed short) __builtin_altivec_vcmpgtuh ((vector signed short) a2, (vector signed short) a1);
 }
 
 inline vector signed short
 vec_cmplt (vector signed short a1, vector signed short a2)
 {
-  return (vector signed short) __builtin_altivec_vcmpgtsh ((vector signed short) a1, (vector signed short) a2);
+  return (vector signed short) __builtin_altivec_vcmpgtsh ((vector signed short) a2, (vector signed short) a1);
 }
 
 inline vector signed int
 vec_cmplt (vector unsigned int a1, vector unsigned int a2)
 {
-  return (vector signed int) __builtin_altivec_vcmpgtuw ((vector signed int) a1, (vector signed int) a2);
+  return (vector signed int) __builtin_altivec_vcmpgtuw ((vector signed int) a2, (vector signed int) a1);
 }
 
 inline vector signed int
 vec_cmplt (vector signed int a1, vector signed int a2)
 {
-  return (vector signed int) __builtin_altivec_vcmpgtsw ((vector signed int) a1, (vector signed int) a2);
+  return (vector signed int) __builtin_altivec_vcmpgtsw ((vector signed int) a2, (vector signed int) a1);
 }
 
 inline vector signed int
 vec_cmplt (vector float a1, vector float a2)
 {
-  return (vector signed int) __builtin_altivec_vcmpgtfp ((vector float) a1, (vector float) a2);
+  return (vector signed int) __builtin_altivec_vcmpgtfp ((vector float) a2, (vector float) a1);
 }
 
 /* vec_ctf */
@@ -6523,7 +6523,7 @@ __ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
 
 #define vec_cmple(a1, a2) __builtin_altivec_vcmpgefp ((a1), (a2))
 
-#define vec_cmplt(a1, a2) \
+#define vec_cmplt(a2, a1) \
 __ch (__bin_args_eq (vector unsigned char, (a1), vector unsigned char, (a2)), \
       ((vector signed char) __builtin_altivec_vcmpgtub ((vector signed char) (a1), (vector signed char) (a2))), \
 __ch (__bin_args_eq (vector signed char, (a1), vector signed char, (a2)), \
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 3d20ff48e23..25e4b084433 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -490,18 +490,27 @@
   "vmaddfp %0,%1,%2,%3"
   [(set_attr "type" "vecfloat")])
 
-;; The unspec here is a vec splat of 0. We do multiply as a fused
-;; multiply-add with an add of a 0 vector. 
+;; We do multiply as a fused multiply-add with an add of a -0.0 vector.
 
 (define_expand "mulv4sf3"
-  [(set (match_dup 3) (unspec:V4SF [(const_int 0)] 142))
-   (set (match_operand:V4SF 0 "register_operand" "=v")
-        (plus:V4SF (mult:V4SF (match_operand:V4SF 1 "register_operand" "v")
-	                      (match_operand:V4SF 2 "register_operand" "v"))
-		   (match_dup 3)))]
+  [(use (match_operand:V4SF 0 "register_operand" ""))
+   (use (match_operand:V4SF 1 "register_operand" ""))
+   (use (match_operand:V4SF 2 "register_operand" ""))]
   "TARGET_ALTIVEC && TARGET_FUSED_MADD"
   "
-{ operands[3] = gen_reg_rtx (V4SFmode); }")
+{
+  rtx neg0;
+
+  /* Generate [-0.0, -0.0, -0.0, -0.0].  */
+  neg0 = gen_reg_rtx (V4SFmode);
+  emit_insn (gen_altivec_vspltisw_v4sf (neg0, GEN_INT (-1)));
+  emit_insn (gen_altivec_vslw_v4sf (neg0, neg0, neg0));
+
+  /* Use the multiply-add.  */
+  emit_insn (gen_altivec_vmaddfp (operands[0], operands[1], operands[2],
+				  neg0));
+  DONE;
+}")
 
 ;; Fused multiply subtract 
 (define_insn "altivec_vnmsubfp"
@@ -1043,6 +1052,14 @@
   "vslw %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
+(define_insn "altivec_vslw_v4sf"
+  [(set (match_operand:V4SF 0 "register_operand" "=v")
+        (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")
+                      (match_operand:V4SF 2 "register_operand" "v")] 109))]
+  "TARGET_ALTIVEC"
+  "vslw %0,%1,%2"
+  [(set_attr "type" "vecsimple")])
+
 (define_insn "altivec_vsl"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
@@ -1316,7 +1333,7 @@
   "vspltisw %0, %1"
   [(set_attr "type" "vecsimple")])
 
-(define_insn ""
+(define_insn "altivec_vspltisw_v4sf"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
         (unspec:V4SF [(match_operand:QI 1 "immediate_operand" "i")] 142))]
   "TARGET_ALTIVEC"
@@ -1830,8 +1847,8 @@
 (define_insn "absv16qi2"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(abs:V16QI (match_operand:V16QI 1 "register_operand" "v")))
-   (clobber (match_scratch:V16QI 2 "=v"))
-   (clobber (match_scratch:V16QI 3 "=v"))]
+   (clobber (match_scratch:V16QI 2 "=&v"))
+   (clobber (match_scratch:V16QI 3 "=&v"))]
   "TARGET_ALTIVEC"
   "vspltisb %2,0\;vsububm %3,%2,%1\;vmaxsb %0,%1,%3"
   [(set_attr "type" "altivec")
@@ -1840,8 +1857,8 @@
 (define_insn "absv8hi2"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (abs:V8HI (match_operand:V8HI 1 "register_operand" "v")))
-   (clobber (match_scratch:V8HI 2 "=v"))
-   (clobber (match_scratch:V8HI 3 "=v"))]
+   (clobber (match_scratch:V8HI 2 "=&v"))
+   (clobber (match_scratch:V8HI 3 "=&v"))]
   "TARGET_ALTIVEC"
   "vspltisb %2,0\;vsubuhm %3,%2,%1\;vmaxsh %0,%1,%3"
   [(set_attr "type" "altivec")
@@ -1850,8 +1867,8 @@
 (define_insn "absv4si2"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (abs:V4SI (match_operand:V4SI 1 "register_operand" "v")))
-   (clobber (match_scratch:V4SI 2 "=v"))
-   (clobber (match_scratch:V4SI 3 "=v"))]
+   (clobber (match_scratch:V4SI 2 "=&v"))
+   (clobber (match_scratch:V4SI 3 "=&v"))]
   "TARGET_ALTIVEC"
   "vspltisb %2,0\;vsubuwm %3,%2,%1\;vmaxsw %0,%1,%3"
   [(set_attr "type" "altivec")
@@ -1860,8 +1877,8 @@
 (define_insn "absv4sf2"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
         (abs:V4SF (match_operand:V4SF 1 "register_operand" "v")))
-   (clobber (match_scratch:V4SF 2 "=v"))
-   (clobber (match_scratch:V4SF 3 "=v"))]
+   (clobber (match_scratch:V4SF 2 "=&v"))
+   (clobber (match_scratch:V4SF 3 "=&v"))]
   "TARGET_ALTIVEC"
   "vspltisw %2, -1\;vslw %3,%2,%2\;vandc %0,%1,%3"
   [(set_attr "type" "altivec")
@@ -1870,8 +1887,8 @@
 (define_insn "altivec_abss_v16qi"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
         (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] 210))
-   (clobber (match_scratch:V16QI 2 "=v"))
-   (clobber (match_scratch:V16QI 3 "=v"))]
+   (clobber (match_scratch:V16QI 2 "=&v"))
+   (clobber (match_scratch:V16QI 3 "=&v"))]
   "TARGET_ALTIVEC"
   "vspltisb %2,0\;vsubsbs %3,%2,%1\;vmaxsb %0,%1,%3"
   [(set_attr "type" "altivec")
@@ -1880,8 +1897,8 @@
 (define_insn "altivec_abss_v8hi"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V8HI 1 "register_operand" "v")] 211))
-   (clobber (match_scratch:V8HI 2 "=v"))
-   (clobber (match_scratch:V8HI 3 "=v"))]
+   (clobber (match_scratch:V8HI 2 "=&v"))
+   (clobber (match_scratch:V8HI 3 "=&v"))]
   "TARGET_ALTIVEC"
   "vspltisb %2,0\;vsubshs %3,%2,%1\;vmaxsh %0,%1,%3"
   [(set_attr "type" "altivec")
@@ -1890,8 +1907,8 @@
 (define_insn "altivec_abss_v4si"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 212))
-   (clobber (match_scratch:V4SI 2 "=v"))
-   (clobber (match_scratch:V4SI 3 "=v"))]
+   (clobber (match_scratch:V4SI 2 "=&v"))
+   (clobber (match_scratch:V4SI 3 "=&v"))]
   "TARGET_ALTIVEC"
   "vspltisb %2,0\;vsubsws %3,%2,%1\;vmaxsw %0,%1,%3"
   [(set_attr "type" "altivec")
diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h
index 73041a28775..b3468f2b66b 100644
--- a/gcc/config/rs6000/darwin.h
+++ b/gcc/config/rs6000/darwin.h
@@ -58,7 +58,9 @@ Boston, MA 02111-1307, USA.  */
 /* We want -fPIC by default, unless we're using -static to compile for
    the kernel or some such.  */
 
-#define CC1_SPEC "%{!static:-fPIC}"
+#define CC1_SPEC "\
+%{static: %{Zdynamic: %e conflicting code gen style switches are used}}\
+%{!static:-fPIC}"
 
 /* Make both r2 and r3 available for allocation.  */
 #define FIXED_R2 0
diff --git a/gcc/config/rs6000/freebsd.h b/gcc/config/rs6000/freebsd.h
index e4b6482798e..699d2adbda8 100644
--- a/gcc/config/rs6000/freebsd.h
+++ b/gcc/config/rs6000/freebsd.h
@@ -38,6 +38,11 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
 #undef	LINK_OS_DEFAULT_SPEC
 #define	LINK_OS_DEFAULT_SPEC "%(link_os_freebsd)"
 
+/* XXX: This is wrong for many platforms in sysv4.h.
+   We should work on getting that defination fixed.  */
+#undef  LINK_SHLIB_SPEC
+#define LINK_SHLIB_SPEC "%{shared:-shared} %{!shared: %{static:-static}}"
+
 
 /************************[  Target stuff  ]***********************************/
 
@@ -45,6 +50,9 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
    Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
    c-common.c, and config/<arch>/<arch>.h.  */
 
+#undef  SIZE_TYPE
+#define SIZE_TYPE "unsigned int"
+
 /* rs6000.h gets this wrong for FreeBSD.  We use the GCC defaults instead.  */
 #undef WCHAR_TYPE
 
diff --git a/gcc/config/rs6000/linux.h b/gcc/config/rs6000/linux.h
index 1711354da76..593e961f743 100644
--- a/gcc/config/rs6000/linux.h
+++ b/gcc/config/rs6000/linux.h
@@ -1,7 +1,7 @@
 /* Definitions of target machine for GNU compiler,
    for PowerPC machines running Linux.
-   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 Free Software Foundation, 
-   Inc.
+   Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
+   Free Software Foundation, Inc.
    Contributed by Michael Meissner (meissner@cygnus.com).
 
 This file is part of GNU CC.
@@ -84,7 +84,18 @@ Boston, MA 02111-1307, USA.  */
 
 #ifdef IN_LIBGCC2
 #include <signal.h>
-#include <sys/ucontext.h>
+
+/* During the 2.5 kernel series the kernel ucontext was changed, but
+   the new layout is compatible with the old one, so we just define
+   and use the old one here for simplicity and compatibility.  */
+
+struct kernel_old_ucontext {
+  unsigned long     uc_flags;
+  struct ucontext  *uc_link;
+  stack_t           uc_stack;
+  struct sigcontext_struct uc_mcontext;
+  sigset_t          uc_sigmask;
+};
 
 enum { SIGNAL_FRAMESIZE = 64 };
 #endif
@@ -96,12 +107,34 @@ enum { SIGNAL_FRAMESIZE = 64 };
     long new_cfa_;							\
     int i_;								\
 									\
-    /* li r0, 0x7777; sc  (rt_sigreturn)  */				\
-    /* li r0, 0x6666; sc  (sigreturn)  */				\
-    if (((*(unsigned int *) (pc_+0) == 0x38007777)			\
-	 || (*(unsigned int *) (pc_+0) == 0x38006666))			\
-	&& (*(unsigned int *) (pc_+4)  == 0x44000002))			\
-	sc_ = (CONTEXT)->cfa + SIGNAL_FRAMESIZE;			\
+    /* li r0, 0x7777; sc  (sigreturn old)  */				\
+    /* li r0, 0x0077; sc  (sigreturn new)  */				\
+    /* li r0, 0x6666; sc  (rt_sigreturn old)  */			\
+    /* li r0, 0x00AC; sc  (rt_sigreturn new)  */			\
+    if (*(unsigned int *) (pc_+4) != 0x44000002)			\
+      break;								\
+    if (*(unsigned int *) (pc_+0) == 0x38007777				\
+	|| *(unsigned int *) (pc_+0) == 0x38000077)			\
+      {									\
+	struct sigframe {						\
+	  char gap[SIGNAL_FRAMESIZE];					\
+	  struct sigcontext sigctx;					\
+	} *rt_ = (CONTEXT)->cfa;					\
+	sc_ = &rt_->sigctx;						\
+      }									\
+    else if (*(unsigned int *) (pc_+0) == 0x38006666			\
+	     || *(unsigned int *) (pc_+0) == 0x380000AC)		\
+      {									\
+	struct rt_sigframe {						\
+	  char gap[SIGNAL_FRAMESIZE];					\
+	  unsigned long _unused[2];					\
+	  struct siginfo *pinfo;					\
+	  void *puc;							\
+	  struct siginfo info;						\
+	  struct kernel_old_ucontext uc;				\
+	} *rt_ = (CONTEXT)->cfa;					\
+	sc_ = &rt_->uc.uc_mcontext;					\
+      }									\
     else								\
       break;								\
     									\
@@ -122,10 +155,6 @@ enum { SIGNAL_FRAMESIZE = 64 };
     (FS)->regs.reg[LINK_REGISTER_REGNUM].loc.offset 			\
       = (long)&(sc_->regs->link) - new_cfa_;				\
 									\
-    /* The unwinder expects the IP to point to the following insn,	\
-       whereas the kernel returns the address of the actual		\
-       faulting insn.  */						\
-    sc_->regs->nip += 4;  						\
     (FS)->regs.reg[CR0_REGNO].how = REG_SAVED_OFFSET;			\
     (FS)->regs.reg[CR0_REGNO].loc.offset 				\
       = (long)&(sc_->regs->nip) - new_cfa_;				\
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index 73fd5beed3d..55065c6a31d 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -207,6 +207,18 @@ Boston, MA 02111-1307, USA.  */
 #undef  RS6000_MCOUNT
 #define RS6000_MCOUNT "_mcount"
 
+#ifdef __powerpc64__
+/* _init and _fini functions are built from bits spread across many
+   object files, each potentially with a different TOC pointer.  For
+   that reason, place a nop after the call so that the linker can
+   restore the TOC pointer if a TOC adjusting call stub is needed.  */
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)	\
+  asm (SECTION_OP "\n"					\
+"	bl ." #FUNC "\n"				\
+"	nop\n"						\
+"	.previous");
+#endif
+
 /* FP save and restore routines.  */
 #undef  SAVE_FP_PREFIX
 #define SAVE_FP_PREFIX "._savef"
diff --git a/gcc/config/rs6000/ppc64-fp.c b/gcc/config/rs6000/ppc64-fp.c
new file mode 100644
index 00000000000..3f6d7cd1bb0
--- /dev/null
+++ b/gcc/config/rs6000/ppc64-fp.c
@@ -0,0 +1,146 @@
+/* Functions needed for soft-float on powerpc64-linux, copied from
+   libgcc2.c with macros expanded to force the use of specific types.
+ 
+   Copyright (C) 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
+   2000, 2001, 2002, 2003  Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.  */
+
+#if defined(__powerpc64__)
+#include "fp-bit.h"
+
+extern DItype __fixdfdi (DFtype);
+extern DItype __fixsfdi (SFtype);
+extern USItype __fixunsdfsi (DFtype);
+extern USItype __fixunssfsi (SFtype);
+extern DFtype __floatdidf (DItype);
+extern SFtype __floatdisf (DItype);
+
+static DItype local_fixunssfdi (SFtype);
+static DItype local_fixunsdfdi (DFtype);
+
+DItype
+__fixdfdi (DFtype a)
+{
+  if (a < 0)
+    return - local_fixunsdfdi (-a);
+  return local_fixunsdfdi (a);
+}
+
+DItype
+__fixsfdi (SFtype a)
+{
+  if (a < 0)
+    return - local_fixunssfdi (-a);
+  return local_fixunssfdi (a);
+}
+
+USItype
+__fixunsdfsi (DFtype a)
+{
+  if (a >= - (DFtype) (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1))
+    return (SItype) (a + (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1))
+                       - (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1);
+  return (SItype) a;
+}
+
+USItype
+__fixunssfsi (SFtype a)
+{
+  if (a >= - (SFtype) (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1))
+    return (SItype) (a + (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1))
+                       - (- ((SItype)(((USItype)1 << ((4 * 8) - 1)) - 1)) - 1);
+  return (SItype) a;
+}
+
+DFtype
+__floatdidf (DItype u)
+{
+  DFtype d;
+
+  d = (SItype) (u >> (sizeof (SItype) * 8));
+  d *= (((UDItype) 1) << ((sizeof (SItype) * 8) / 2));
+  d *= (((UDItype) 1) << ((sizeof (SItype) * 8) / 2));
+  d += (USItype) (u & ((((UDItype) 1) << (sizeof (SItype) * 8)) - 1));
+
+  return d;
+}
+
+SFtype
+__floatdisf (DItype u)
+{
+  DFtype f;
+
+  if (53 < (sizeof (DItype) * 8)
+      && 53 > ((sizeof (DItype) * 8) - 53 + 24))
+    {
+      if (! (- ((DItype) 1 << 53) < u
+             && u < ((DItype) 1 << 53)))
+        {
+          if ((UDItype) u & (((UDItype) 1 << ((sizeof (DItype) * 8) - 53)) - 1))
+            {
+              u &= ~ (((UDItype) 1 << ((sizeof (DItype) * 8) - 53)) - 1);
+              u |= ((UDItype) 1 << ((sizeof (DItype) * 8) - 53));
+            }
+        }
+    }
+  f = (SItype) (u >> (sizeof (SItype) * 8));
+  f *= (((UDItype) 1) << ((sizeof (SItype) * 8) / 2));
+  f *= (((UDItype) 1) << ((sizeof (SItype) * 8) / 2));
+  f += (USItype) (u & ((((UDItype) 1) << (sizeof (SItype) * 8)) - 1));
+
+  return (SFtype) f;
+}
+
+/* This version is needed to prevent recursion; fixunsdfdi in libgcc
+   calls fixdfdi, which in turn calls calls fixunsdfdi.  */
+
+static DItype
+local_fixunsdfdi (DFtype a)
+{
+  USItype hi, lo;
+
+  hi = a / (((UDItype) 1) << (sizeof (SItype) * 8));
+  lo = (a - ((DFtype) hi) * (((UDItype) 1) << (sizeof (SItype) * 8)));
+  return ((UDItype) hi << (sizeof (SItype) * 8)) | lo;
+}
+
+/* This version is needed to prevent recursion; fixunssfdi in libgcc
+   calls fixsfdi, which in turn calls calls fixunssfdi.  */
+
+static DItype
+local_fixunssfdi (SFtype original_a)
+{
+  DFtype a = original_a;
+  USItype hi, lo;
+
+  hi = a / (((UDItype) 1) << (sizeof (SItype) * 8));
+  lo = (a - ((DFtype) hi) * (((UDItype) 1) << (sizeof (SItype) * 8)));
+  return ((UDItype) hi << (sizeof (SItype) * 8)) | lo;
+}
+
+#endif /* __powerpc64__ */
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index a7ec02b03e2..4d6daa533e7 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -27,7 +27,7 @@ Boston, MA 02111-1307, USA.  */
 #ifdef RTX_CODE
 
 #ifdef TREE_CODE
-extern void init_cumulative_args PARAMS ((CUMULATIVE_ARGS *, tree, rtx, int));
+extern void init_cumulative_args PARAMS ((CUMULATIVE_ARGS *, tree, rtx, int, int));
 extern void rs6000_va_start PARAMS ((tree, rtx));
 #endif /* TREE_CODE */
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 5c0ef2f6f30..591f77dd46e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1,6 +1,6 @@
 /* Subroutines used for code generation on IBM RS/6000.
    Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 
-   2000, 2001, 2002 Free Software Foundation, Inc.
+   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
 This file is part of GNU CC.
@@ -284,7 +284,9 @@ char rs6000_reg_names[][8] =
       "8",  "9",  "10", "11", "12", "13", "14", "15",
       "16", "17", "18", "19", "20", "21", "22", "23",
       "24", "25", "26", "27", "28", "29", "30", "31",
-      "vrsave"
+      "vrsave", "vscr",
+      /* SPE registers.  */
+      "spe_acc", "spefscr"
 };
 
 #ifdef TARGET_REGNAMES
@@ -301,12 +303,14 @@ static const char alt_reg_names[][8] =
     "mq",    "lr",  "ctr",   "ap",
   "%cr0",  "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
    "xer",
-   /* AltiVec registers.  */
+  /* AltiVec registers.  */
    "%v0",  "%v1",  "%v2",  "%v3",  "%v4",  "%v5",  "%v6", "%v7",
-   "%v8",  "%v9",  "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
-   "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
-   "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
-   "vrsave"
+   "%v8",  "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
+  "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
+  "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
+  "vrsave", "vscr",
+  /* SPE registers.  */
+  "spe_acc", "spefscr"
 };
 #endif
 
@@ -2636,16 +2640,15 @@ rs6000_emit_move (dest, source, mode)
 	}
     }
 
-  /* Handle the case where reload calls us with an invalid address;
-     and the case of CONSTANT_P_RTX.  */
-  if (!ALTIVEC_VECTOR_MODE (mode)
+  /* Handle the case where reload calls us with an invalid address.  */
+  if (reload_in_progress && mode == Pmode
       && (! general_operand (operands[1], mode)
-	  || ! nonimmediate_operand (operands[0], mode)
-	  || GET_CODE (operands[1]) == CONSTANT_P_RTX))
-    {
-      emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
-      return;
-    }
+	  || ! nonimmediate_operand (operands[0], mode)))
+    goto emit_set;
+
+  /* Handle the case of CONSTANT_P_RTX.  */
+  if (GET_CODE (operands[1]) == CONSTANT_P_RTX)
+    goto emit_set;
   
   /* FIXME:  In the long term, this switch statement should go away
      and be replaced by a sequence of tests based on things like
@@ -2841,6 +2844,16 @@ rs6000_emit_move (dest, source, mode)
 	operands[1]
 	  = replace_equiv_address (operands[1],
 				   copy_addr_to_reg (XEXP (operands[1], 0)));
+      if (TARGET_POWER)
+        {
+	  emit_insn (gen_rtx_PARALLEL (VOIDmode,
+		       gen_rtvec (2,
+				  gen_rtx_SET (VOIDmode,
+					       operands[0], operands[1]),
+				  gen_rtx_CLOBBER (VOIDmode,
+						   gen_rtx_SCRATCH (SImode)))));
+	  return;
+	}
       break;
 
     default:
@@ -2850,13 +2863,11 @@ rs6000_emit_move (dest, source, mode)
   /* Above, we may have called force_const_mem which may have returned
      an invalid address.  If we can, fix this up; otherwise, reload will
      have to deal with it.  */
-  if (GET_CODE (operands[1]) == MEM
-      && ! memory_address_p (mode, XEXP (operands[1], 0))
-      && ! reload_in_progress)
-    operands[1] = adjust_address (operands[1], mode, 0);
+  if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
+    operands[1] = validize_mem (operands[1]);
 
+ emit_set:
   emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
-  return;
 }
 
 /* Initialize a variable CUM of type CUMULATIVE_ARGS
@@ -2867,11 +2878,12 @@ rs6000_emit_move (dest, source, mode)
    so we never return a PARALLEL.  */
 
 void
-init_cumulative_args (cum, fntype, libname, incoming)
+init_cumulative_args (cum, fntype, libname, incoming, libcall)
      CUMULATIVE_ARGS *cum;
      tree fntype;
      rtx libname ATTRIBUTE_UNUSED;
      int incoming;
+     int libcall;
 {
   static CUMULATIVE_ARGS zero_cumulative;
 
@@ -2880,7 +2892,8 @@ init_cumulative_args (cum, fntype, libname, incoming)
   cum->fregno = FP_ARG_MIN_REG;
   cum->vregno = ALTIVEC_ARG_MIN_REG;
   cum->prototype = (fntype && TYPE_ARG_TYPES (fntype));
-  cum->call_cookie = CALL_NORMAL;
+  cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
+		      ? CALL_LIBCALL : CALL_NORMAL);
   cum->sysv_gregno = GP_ARG_MIN_REG;
 
   if (incoming)
@@ -3088,7 +3101,7 @@ function_arg_advance (cum, mode, type, named)
 
    If this is floating-point and no prototype is specified, we use
    both an FP and integer register (or possibly FP reg and stack).  Library
-   functions (when TYPE is zero) always have the proper types for args,
+   functions (when CALL_LIBCALL is set) always have the proper types for args,
    so we can pass the FP value just in one register.  emit_library_function
    doesn't support PARALLEL anyway.  */
 
@@ -3109,7 +3122,8 @@ function_arg (cum, mode, type, named)
     {
       if (abi == ABI_V4
 	  && cum->nargs_prototype < 0
-	  && type && (cum->prototype || TARGET_NO_PROTOTYPE))
+	  && (cum->call_cookie & CALL_LIBCALL) == 0
+	  && (cum->prototype || TARGET_NO_PROTOTYPE))
 	{
 	  /* For the SPE, we need to crxor CR6 always.  */
 	  if (TARGET_SPE_ABI)
@@ -3275,7 +3289,10 @@ function_arg_partial_nregs (cum, mode, type, named)
    the argument itself.  The pointer is passed in whatever way is
    appropriate for passing a pointer to that type.
 
-   Under V.4, structures and unions are passed by reference.  */
+   Under V.4, structures and unions are passed by reference.
+
+   As an extension to all ABIs, variable sized types are passed by
+   reference.  */
 
 int
 function_arg_pass_by_reference (cum, mode, type, named)
@@ -3293,8 +3310,7 @@ function_arg_pass_by_reference (cum, mode, type, named)
 
       return 1;
     }
-
-  return 0;
+  return type && int_size_in_bytes (type) <= 0;
 }
 
 /* Perform any needed actions needed for a function that is receiving a
@@ -3533,7 +3549,28 @@ rs6000_va_arg (valist, type)
   rtx lab_false, lab_over, addr_rtx, r;
 
   if (DEFAULT_ABI != ABI_V4)
-    return std_expand_builtin_va_arg (valist, type);
+    {
+      /* Variable sized types are passed by reference.  */
+      if (int_size_in_bytes (type) <= 0)
+	{
+	  u = build_pointer_type (type);
+
+	  /* Args grow upward.  */
+	  t = build (POSTINCREMENT_EXPR, TREE_TYPE (valist), valist,
+		     build_int_2 (POINTER_SIZE / BITS_PER_UNIT, 0));
+	  TREE_SIDE_EFFECTS (t) = 1;
+
+	  t = build1 (NOP_EXPR, build_pointer_type (u), t);
+	  TREE_SIDE_EFFECTS (t) = 1;
+
+	  t = build1 (INDIRECT_REF, u, t);
+	  TREE_SIDE_EFFECTS (t) = 1;
+
+	  return expand_expr (t, NULL_RTX, VOIDmode, EXPAND_NORMAL);
+	}
+      else
+	return std_expand_builtin_va_arg (valist, type);
+    }
 
   f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
   f_fpr = TREE_CHAIN (f_gpr);
@@ -8837,7 +8874,7 @@ first_reg_to_save ()
     if (regs_ever_live[first_reg] 
 	&& (! call_used_regs[first_reg]
 	    || (first_reg == RS6000_PIC_OFFSET_TABLE_REGNUM
-		&& ((DEFAULT_ABI == ABI_V4 && flag_pic == 1)
+		&& ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
 		    || (DEFAULT_ABI == ABI_DARWIN && flag_pic)))))
       break;
 
@@ -9265,7 +9302,6 @@ rs6000_stack_info ()
 					 + ehrd_size
 					 + info_ptr->cr_size
 					 + info_ptr->lr_size
-					 + info_ptr->vrsave_size
 					 + info_ptr->toc_size,
 					 (TARGET_ALTIVEC_ABI || ABI_DARWIN)
 					 ? 16 : 8);
@@ -9609,7 +9645,7 @@ void
 rs6000_emit_load_toc_table (fromprolog)
      int fromprolog;
 {
-  rtx dest;
+  rtx dest, insn;
   dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
 
   if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
@@ -9617,8 +9653,12 @@ rs6000_emit_load_toc_table (fromprolog)
       rtx temp = (fromprolog
 		  ? gen_rtx_REG (Pmode, LINK_REGISTER_REGNUM)
 		  : gen_reg_rtx (Pmode));
-      rs6000_maybe_dead (emit_insn (gen_load_toc_v4_pic_si (temp)));
-      rs6000_maybe_dead (emit_move_insn (dest, temp));
+      insn = emit_insn (gen_load_toc_v4_pic_si (temp));
+      if (fromprolog)
+	rs6000_maybe_dead (insn);
+      insn = emit_move_insn (dest, temp);
+      if (fromprolog)
+	rs6000_maybe_dead (insn);
     }
   else if (TARGET_ELF && DEFAULT_ABI != ABI_AIX && flag_pic == 2)
     {
@@ -9665,14 +9705,13 @@ rs6000_emit_load_toc_table (fromprolog)
 	  ASM_GENERATE_INTERNAL_LABEL (buf, "LCG", reload_toc_labelno++);
 	  symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
 
-	  rs6000_maybe_dead (emit_insn (gen_load_toc_v4_PIC_1b (tempLR,
-								symF,
-								tocsym)));
-	  rs6000_maybe_dead (emit_move_insn (dest, tempLR));
-	  rs6000_maybe_dead (emit_move_insn (temp0,
-					     gen_rtx_MEM (Pmode, dest)));
+	  emit_insn (gen_load_toc_v4_PIC_1b (tempLR, symF, tocsym));
+	  emit_move_insn (dest, tempLR);
+	  emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
 	}
-      rs6000_maybe_dead (emit_insn (gen_addsi3 (dest, temp0, dest)));
+      insn = emit_insn (gen_addsi3 (dest, temp0, dest));
+      if (fromprolog)
+	rs6000_maybe_dead (insn);
     }
   else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
     {
@@ -9682,15 +9721,21 @@ rs6000_emit_load_toc_table (fromprolog)
       ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
       realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
 
-      rs6000_maybe_dead (emit_insn (gen_elf_high (dest, realsym)));
-      rs6000_maybe_dead (emit_insn (gen_elf_low (dest, dest, realsym)));
+      insn = emit_insn (gen_elf_high (dest, realsym));
+      if (fromprolog)
+	rs6000_maybe_dead (insn);
+      insn = emit_insn (gen_elf_low (dest, dest, realsym));
+      if (fromprolog)
+	rs6000_maybe_dead (insn);
     }
   else if (DEFAULT_ABI == ABI_AIX)
     {
       if (TARGET_32BIT)
-	rs6000_maybe_dead (emit_insn (gen_load_toc_aix_si (dest)));
+	insn = emit_insn (gen_load_toc_aix_si (dest));
       else
-	rs6000_maybe_dead (emit_insn (gen_load_toc_aix_di (dest)));
+	insn = emit_insn (gen_load_toc_aix_di (dest));
+      if (fromprolog)
+	rs6000_maybe_dead (insn);
     }
   else
     abort ();
@@ -10423,7 +10468,7 @@ rs6000_emit_prologue ()
 	if ((regs_ever_live[info->first_gp_reg_save+i] 
 	     && ! call_used_regs[info->first_gp_reg_save+i])
 	    || (i+info->first_gp_reg_save == RS6000_PIC_OFFSET_TABLE_REGNUM
-		&& ((DEFAULT_ABI == ABI_V4 && flag_pic == 1)
+		&& ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
 		    || (DEFAULT_ABI == ABI_DARWIN && flag_pic))))
 	  {
 	    rtx addr, reg, mem;
@@ -10839,7 +10884,7 @@ rs6000_emit_epilogue (sibcall)
       if ((regs_ever_live[info->first_gp_reg_save+i] 
 	   && ! call_used_regs[info->first_gp_reg_save+i])
 	  || (i+info->first_gp_reg_save == RS6000_PIC_OFFSET_TABLE_REGNUM
-	      && ((DEFAULT_ABI == ABI_V4 && flag_pic == 1)
+	      && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
 		  || (DEFAULT_ABI == ABI_DARWIN && flag_pic))))
 	{
 	  rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, 
@@ -12617,11 +12662,15 @@ rs6000_elf_encode_section_info (decl, first)
 	   && DEFAULT_ABI == ABI_V4
 	   && TREE_CODE (decl) == VAR_DECL)
     {
+      rtx sym_ref = XEXP (DECL_RTL (decl), 0);
       int size = int_size_in_bytes (TREE_TYPE (decl));
       tree section_name = DECL_SECTION_NAME (decl);
       const char *name = (char *)0;
       int len = 0;
 
+      if ((*targetm.binds_local_p) (decl))
+	SYMBOL_REF_FLAG (sym_ref) = 1;
+
       if (section_name)
 	{
 	  if (TREE_CODE (section_name) == STRING_CST)
@@ -12648,7 +12697,6 @@ rs6000_elf_encode_section_info (decl, first)
 		  || (len == sizeof (".PPC.EMB.sbss0") - 1
 		      && strcmp (name, ".PPC.EMB.sbss0") == 0))))
 	{
-	  rtx sym_ref = XEXP (DECL_RTL (decl), 0);
 	  size_t len = strlen (XSTR (sym_ref, 0));
 	  char *str = alloca (len + 2);
 
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index e5e9e27cce9..957fcecfe1e 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler, for IBM RS/6000.
    Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
-   2000, 2001, 2002 Free Software Foundation, Inc.
+   2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
 This file is part of GNU CC.
@@ -611,6 +611,9 @@ extern int rs6000_default_long_calls;
 #define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
 #endif
 
+/* Work around rs6000_long_double_type_size dependency in ada/targtyps.c.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
 /* Width in bits of a pointer.
    See also the macro `Pmode' defined below.  */
 #define POINTER_SIZE (TARGET_32BIT ? 32 : 64)
@@ -997,6 +1000,10 @@ extern int rs6000_default_long_calls;
         = call_really_used_regs[i] = 1;					\
   if (DEFAULT_ABI == ABI_V4						\
       && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM			\
+      && flag_pic == 2)							\
+    fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;			\
+  if (DEFAULT_ABI == ABI_V4						\
+      && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM			\
       && flag_pic == 1)							\
     fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]				\
       = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]			\
@@ -1337,10 +1344,12 @@ enum reg_class
 
 /* Return a class of registers that cannot change FROM mode to TO mode.  */
 
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO)				 \
-  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) ? FLOAT_REGS		 \
-   : (SPE_VECTOR_MODE (FROM) + SPE_VECTOR_MODE (TO)) == 1 ? GENERAL_REGS \
-   : NO_REGS)
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)			\
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)				\
+   ? reg_classes_intersect_p (FLOAT_REGS, CLASS)			\
+   : (SPE_VECTOR_MODE (FROM) + SPE_VECTOR_MODE (TO)) == 1		\
+   ? reg_classes_intersect_p (GENERAL_REGS, CLASS) 			\
+   : 0)
 
 /* Stack layout; function entry, exit and calling.  */
 
@@ -1607,25 +1616,26 @@ typedef struct rs6000_stack {
 #define CALL_V4_CLEAR_FP_ARGS	0x00000002	/* V.4, no FP args passed */
 #define CALL_V4_SET_FP_ARGS	0x00000004	/* V.4, FP args were passed */
 #define CALL_LONG		0x00000008	/* always call indirect */
+#define CALL_LIBCALL		0x00000010	/* libcall */
 
 /* 1 if N is a possible register number for a function value
    as seen by the caller.
 
    On RS/6000, this is r3, fp1, and v2 (for AltiVec).  */
-#define FUNCTION_VALUE_REGNO_P(N)  ((N) == GP_ARG_RETURN	\
-				    || ((N) == FP_ARG_RETURN)	\
-				    || (TARGET_ALTIVEC &&	\
-					(N) == ALTIVEC_ARG_RETURN))
+#define FUNCTION_VALUE_REGNO_P(N)					\
+  ((N) == GP_ARG_RETURN							\
+   || ((N) == FP_ARG_RETURN && TARGET_HARD_FLOAT)			\
+   || ((N) == ALTIVEC_ARG_RETURN && TARGET_ALTIVEC))
 
 /* 1 if N is a possible register number for function argument passing.
    On RS/6000, these are r3-r10 and fp1-fp13.
    On AltiVec, v2 - v13 are used for passing vectors.  */
 #define FUNCTION_ARG_REGNO_P(N)						\
-  (((unsigned)((N) - GP_ARG_MIN_REG) < (unsigned)(GP_ARG_NUM_REG))	\
-   || (TARGET_ALTIVEC &&						\
-       (unsigned)((N) - ALTIVEC_ARG_MIN_REG) < (unsigned)(ALTIVEC_ARG_NUM_REG)) \
-   || ((unsigned)((N) - FP_ARG_MIN_REG) < (unsigned)(FP_ARG_NUM_REG)))
-
+  ((unsigned) (N) - GP_ARG_MIN_REG < GP_ARG_NUM_REG			\
+   || ((unsigned) (N) - ALTIVEC_ARG_MIN_REG < ALTIVEC_ARG_NUM_REG	\
+       && TARGET_ALTIVEC)						\
+   || ((unsigned) (N) - FP_ARG_MIN_REG < FP_ARG_NUM_REG			\
+       && TARGET_HARD_FLOAT))
 
 /* A C structure for machine-specific, per-function data.
    This is added to the cfun structure.  */
@@ -1680,13 +1690,18 @@ typedef struct rs6000_args
    For a library call, FNTYPE is 0.  */
 
 #define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT) \
-  init_cumulative_args (&CUM, FNTYPE, LIBNAME, FALSE)
+  init_cumulative_args (&CUM, FNTYPE, LIBNAME, FALSE, FALSE)
 
 /* Similar, but when scanning the definition of a procedure.  We always
    set NARGS_PROTOTYPE large so we never return an EXPR_LIST.  */
 
 #define INIT_CUMULATIVE_INCOMING_ARGS(CUM,FNTYPE,LIBNAME) \
-  init_cumulative_args (&CUM, FNTYPE, LIBNAME, TRUE)
+  init_cumulative_args (&CUM, FNTYPE, LIBNAME, TRUE, FALSE)
+
+/* Like INIT_CUMULATIVE_ARGS' but only used for outgoing libcalls.  */
+
+#define INIT_CUMULATIVE_LIBCALL_ARGS(CUM, MODE, LIBNAME) \
+  init_cumulative_args (&CUM, NULL_TREE, LIBNAME, FALSE, TRUE)
 
 /* Update the data in CUM to advance over an argument
    of mode MODE and data type TYPE.
@@ -1832,7 +1847,7 @@ typedef struct rs6000_args
    || (TARGET_ALTIVEC && (REGNO) == VRSAVE_REGNO)		\
    || (current_function_calls_eh_return				\
        && TARGET_AIX						\
-       && (REGNO) == TOC_REGISTER))
+       && (REGNO) == 2))
 
 
 /* TRAMPOLINE_TEMPLATE deleted */
@@ -2382,12 +2397,16 @@ do {									     \
         return COSTS_N_INSNS (4);					\
       case PROCESSOR_PPC620:						\
       case PROCESSOR_PPC630:						\
-      case PROCESSOR_POWER4:						\
         return (GET_CODE (XEXP (X, 1)) != CONST_INT			\
 		? GET_MODE (XEXP (X, 1)) != DImode			\
 		? COSTS_N_INSNS (5) : COSTS_N_INSNS (7)			\
 		: INTVAL (XEXP (X, 1)) >= -256 && INTVAL (XEXP (X, 1)) <= 255 \
 		? COSTS_N_INSNS (3) : COSTS_N_INSNS (4));		\
+      case PROCESSOR_POWER4:						\
+        return (GET_CODE (XEXP (X, 1)) != CONST_INT			\
+		? GET_MODE (XEXP (X, 1)) != DImode			\
+		? COSTS_N_INSNS (3) : COSTS_N_INSNS (4)			\
+		: COSTS_N_INSNS (2));					\
       }									\
   case DIV:								\
   case MOD:								\
@@ -2723,8 +2742,8 @@ extern char rs6000_reg_names[][8];	/* register names (0 vs. %r0).  */
 
 #define DEBUG_REGISTER_NAMES						\
 {									\
-     "r0", "r1",   "r2",  "r3",  "r4",  "r5",  "r6",  "r7",		\
-     "r8", "r9",  "r10", "r11", "r12", "r13", "r14", "r15",		\
+     "r0",  "r1",  "r2",  "r3",  "r4",  "r5",  "r6",  "r7",		\
+     "r8",  "r9", "r10", "r11", "r12", "r13", "r14", "r15",		\
     "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",		\
     "r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31",		\
      "f0",  "f1",  "f2",  "f3",  "f4",  "f5",  "f6",  "f7",		\
@@ -2733,13 +2752,13 @@ extern char rs6000_reg_names[][8];	/* register names (0 vs. %r0).  */
     "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31",		\
      "mq",  "lr", "ctr",  "ap",						\
     "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7",		\
-  "xer",								\
+    "xer",								\
      "v0",  "v1",  "v2",  "v3",  "v4",  "v5",  "v6",  "v7",             \
      "v8",  "v9", "v10", "v11", "v12", "v13", "v14", "v15",             \
     "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",             \
     "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",             \
-    "vrsave", "vscr"							\
-    , "spe_acc", "spefscr"                                              \
+    "vrsave", "vscr",							\
+    "spe_acc", "spefscr"                                                \
 }
 
 /* Table of additional register names to use in user input.  */
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index a031a295e9b..4c950312871 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1,6 +1,6 @@
 ;; Machine description for IBM RISC System 6000 (POWER) for GNU C compiler
 ;; Copyright (C) 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 
-;; 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+;; 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 ;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
 ;; This file is part of GNU CC.
@@ -772,17 +772,17 @@
   2 1)
 
 (define_function_unit "iu2" 2 0
-  (and (eq_attr "type" "imul,lmul")
+  (and (eq_attr "type" "lmul")
        (eq_attr "cpu" "power4"))
   7 6)
 
 (define_function_unit "iu2" 2 0
-  (and (eq_attr "type" "imul2")
+  (and (eq_attr "type" "imul")
        (eq_attr "cpu" "power4"))
   5 4)
 
 (define_function_unit "iu2" 2 0
-  (and (eq_attr "type" "imul3")
+  (and (eq_attr "type" "imul2,imul3")
        (eq_attr "cpu" "power4"))
   4 3)
 
@@ -9133,6 +9133,32 @@
   operands[5] = GEN_INT (CONST_DOUBLE_LOW  (operands[1]));
 }")
 
+(define_split
+  [(set (match_operand:TI 0 "gpc_reg_operand" "")
+	(match_operand:TI 1 "const_double_operand" ""))]
+  "TARGET_POWERPC64"
+  [(set (match_dup 2) (match_dup 4))
+   (set (match_dup 3) (match_dup 5))]
+  "
+{
+  operands[2] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN == 0,
+				       TImode);
+  operands[3] = operand_subword_force (operands[0], WORDS_BIG_ENDIAN != 0,
+				       TImode);
+  if (GET_CODE (operands[1]) == CONST_DOUBLE)
+    {
+      operands[4] = GEN_INT (CONST_DOUBLE_HIGH (operands[1]));
+      operands[5] = GEN_INT (CONST_DOUBLE_LOW (operands[1]));
+    }
+  else if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      operands[4] = GEN_INT (- (INTVAL (operands[1]) < 0));
+      operands[5] = operands[1];
+    }
+  else
+    FAIL;
+}")
+
 (define_insn "*movdi_internal64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,m,r,r,r,r,?f,f,m,r,*h,*h")
 	(match_operand:DI 1 "input_operand" "r,m,r,I,L,nF,R,f,m,f,*h,r,0"))]
@@ -9313,8 +9339,7 @@
 
 (define_insn "*movti_string"
   [(set (match_operand:TI 0 "reg_or_mem_operand" "=Q,m,????r,????r,????r")
-	(match_operand:TI 1 "reg_or_mem_operand" "r,r,r,Q,m"))
-   (clobber (match_scratch:SI 2 "X,X,X,X,X"))]
+	(match_operand:TI 1 "reg_or_mem_operand" "r,r,r,Q,m"))]
   "TARGET_STRING && ! TARGET_POWER && ! TARGET_POWERPC64
    && (gpc_reg_operand (operands[0], TImode) || gpc_reg_operand (operands[1], TImode))"
   "*
@@ -14850,19 +14875,17 @@
 
 ; This is used in compiling the unwind routines.
 (define_expand "eh_return"
-  [(use (match_operand 0 "general_operand" ""))
-   (use (match_operand 1 "general_operand" ""))]
+  [(use (match_operand 0 "general_operand" ""))]
   ""
   "
 {
 #if TARGET_AIX
-    rs6000_emit_eh_toc_restore (operands[0]);
+  rs6000_emit_eh_toc_restore (EH_RETURN_STACKADJ_RTX);
 #endif
   if (TARGET_32BIT)
-    emit_insn (gen_eh_set_lr_si (operands[1]));
+    emit_insn (gen_eh_set_lr_si (operands[0]));
   else
-    emit_insn (gen_eh_set_lr_di (operands[1]));
-  emit_move_insn (EH_RETURN_STACKADJ_RTX, operands[0]);
+    emit_insn (gen_eh_set_lr_di (operands[0]));
   DONE;
 }")
 
diff --git a/gcc/config/rs6000/rtems.h b/gcc/config/rs6000/rtems.h
index 072d86c4ca2..7e7584604e9 100644
--- a/gcc/config/rs6000/rtems.h
+++ b/gcc/config/rs6000/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a PowerPC using elf.
-   Copyright (C) 1996, 1997, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Joel Sherrill (joel@OARcorp.com).
 
 This file is part of GNU CC.
@@ -32,3 +32,6 @@ Boston, MA 02111-1307, USA.  */
       builtin_assert ("machine=powerpc"); \
     }                                     \
   while (0)
+
+#undef CPP_OS_DEFAULT_SPEC
+#define CPP_OS_DEFAULT_SPEC "%(cpp_os_rtems)"
diff --git a/gcc/config/rs6000/spe.h b/gcc/config/rs6000/spe.h
index 48d5b28b6b9..b15dac5a38b 100644
--- a/gcc/config/rs6000/spe.h
+++ b/gcc/config/rs6000/spe.h
@@ -1,5 +1,5 @@
 /* PowerPC E500 user include file.
-   Copyright (C) 2002 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2003 Free Software Foundation, Inc.
    Contributed by Aldy Hernandez (aldyh@redhat.com).
 
 This file is part of GNU CC.
@@ -42,10 +42,9 @@ typedef short 			__vector __ev64_s16__;
 typedef unsigned short  	__vector __ev64_u16__;
 typedef int 			__vector __ev64_s32__;
 typedef unsigned 		__vector __ev64_u32__;
-typedef long long 		__ev64_s64__;
-typedef unsigned long long 	__ev64_u64__;
+typedef long long 		__vector __ev64_s64__;
+typedef unsigned long long 	__vector __ev64_u64__;
 typedef float 			__vector __ev64_fs__;
-
 typedef int 			__vector __ev64_opaque__;
 
 #define __v2si __ev64_opaque__
@@ -54,7 +53,11 @@ typedef int 			__vector __ev64_opaque__;
 #define __ev_addw(a,b) __builtin_spe_evaddw((__v2si) (a), (__v2si) (b))
 #define __ev_addiw(a,b) __builtin_spe_evaddiw ((__v2si) (a), (b))
 #define __ev_subfw(a,b) __builtin_spe_evsubfw ((__v2si) (a), (__v2si) (b))
-#define __ev_subifw(a,b) __builtin_spe_evsubifw ((__v2si) (a), (b))
+#define __ev_subw(a,b) __builtin_spe_evsubfw ((__v2si) (b), (__v2si) (a))
+/* ??? The spe_evsubifw pattern accepts operands reversed, so we need to also
+   reverse them here between the intrinsic and the builtin function.  */
+#define __ev_subifw(a,b) __builtin_spe_evsubifw ((__v2si) (b), (a))
+#define __ev_subiw(a,b) __builtin_spe_evsubifw ((__v2si) (a), (b))
 #define __ev_abs(a) __builtin_spe_evabs ((__v2si) (a))
 #define __ev_neg(a) __builtin_spe_evneg ((__v2si) (a))
 #define __ev_extsb(a) __builtin_spe_evextsb ((__v2si) (a))
@@ -124,9 +127,9 @@ typedef int 			__vector __ev64_opaque__;
 #define __ev_stwwox(a,b,c) __builtin_spe_evstwwox ((__v2si)(a), (b), (c))
 #define __ev_stwhex(a,b,c) __builtin_spe_evstwhex ((__v2si)(a), (b), (c))
 #define __ev_stwhox(a,b,c) __builtin_spe_evstwhox ((__v2si)(a), (b), (c))
-#define __ev_stdd(a,b,c) __builtin_spe_evstdd ((__v2si)(a), (b), (c))
-#define __ev_stdw(a,b,c) __builtin_spe_evstdw ((__v2si)(a), (b), (c))
-#define __ev_stdh(a,b,c) __builtin_spe_evstdh ((__v2si)(a), (b), (c))
+#define __ev_stdd(a,b,c) __builtin_spe_evstdd ((__v2si)(a), (void *)(b), (c))
+#define __ev_stdw(a,b,c) __builtin_spe_evstdw ((__v2si)(a), (void *)(b), (c))
+#define __ev_stdh(a,b,c) __builtin_spe_evstdh ((__v2si)(a), (void *)(b), (c))
 #define __ev_stwwe(a,b,c) __builtin_spe_evstwwe ((__v2si)(a), (b), (c))
 #define __ev_stwwo(a,b,c) __builtin_spe_evstwwo ((__v2si)(a), (b), (c))
 #define __ev_stwhe(a,b,c) __builtin_spe_evstwhe ((__v2si)(a), (b), (c))
@@ -224,24 +227,16 @@ typedef int 			__vector __ev64_opaque__;
 #define __ev_mwhumf __ev_mwhumi
 #define __ev_mwhumfa __ev_mwhumia
 
-#define __ev_mwlssf(a, b) __builtin_spe_evmwlssf ((__v2si) (a), (__v2si) (b))
-#define __ev_mwlsmf(a, b) __builtin_spe_evmwlsmf ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlumi(a, b) __builtin_spe_evmwlumi ((__v2si) (a), (__v2si) (b))
-#define __ev_mwlssfa(a, b) __builtin_spe_evmwlssfa ((__v2si) (a), (__v2si) (b))
-#define __ev_mwlsmfa(a, b) __builtin_spe_evmwlsmfa ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlumia(a, b) __builtin_spe_evmwlumia ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlumiaaw(a, b) __builtin_spe_evmwlumiaaw ((__v2si) (a), (__v2si) (b))
 
-#define __ev_mwlssfaaw(a, b) __builtin_spe_evmwlssfaaw ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlssiaaw(a, b) __builtin_spe_evmwlssiaaw ((__v2si) (a), (__v2si) (b))
-#define __ev_mwlsmfaaw(a, b) __builtin_spe_evmwlsmfaaw ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlsmiaaw(a, b) __builtin_spe_evmwlsmiaaw ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlusiaaw(a, b) __builtin_spe_evmwlusiaaw ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlusiaaw(a, b) __builtin_spe_evmwlusiaaw ((__v2si) (a), (__v2si) (b))
 
-#define __ev_mwlssfanw(a, b) __builtin_spe_evmwlssfanw ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlssianw(a, b) __builtin_spe_evmwlssianw ((__v2si) (a), (__v2si) (b))
-#define __ev_mwlsmfanw(a, b) __builtin_spe_evmwlsmfanw ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlsmianw(a, b) __builtin_spe_evmwlsmianw ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlusianw(a, b) __builtin_spe_evmwlusianw ((__v2si) (a), (__v2si) (b))
 #define __ev_mwlumianw(a, b) __builtin_spe_evmwlumianw ((__v2si) (a), (__v2si) (b))
@@ -294,26 +289,26 @@ typedef int 			__vector __ev64_opaque__;
 
 /* Floating Point SIMD Instructions  */
 
-/* These all return V2SF, but we need to cast them to V2SI because the SPE
-   expect all functions to be __ev64_opaque__.  */
-
-#define __ev_fsabs(a) ((__v2si) __builtin_spe_evfsabs ((__v2sf) a))
-#define __ev_fsnabs(a) ((__v2si) __builtin_spe_evfsnabs ((__v2sf) a))
-#define __ev_fsneg(a) ((__v2si) __builtin_spe_evfsneg ((__v2sf) a))
-#define __ev_fsadd(a, b) ((__v2si) __builtin_spe_evfsadd ((__v2sf) a, (__v2sf) b))
-#define __ev_fssub(a, b) ((__v2si) __builtin_spe_evfssub ((__v2sf) a, (__v2sf) b))
-#define __ev_fsmul(a, b) ((__v2si) __builtin_spe_evfsmul ((__v2sf) a, (__v2sf) b))
-#define __ev_fsdiv(a, b) ((__v2si) __builtin_spe_evfsdiv ((__v2sf) a, (__v2sf) b))
-#define __ev_fscfui(a) ((__v2si) __builtin_spe_evfscfui ((__v2si) a))
-#define __ev_fscfsi(a) ((__v2si) __builtin_spe_evfscfsi ((__v2sf) a))
-#define __ev_fscfuf(a) ((__v2si) __builtin_spe_evfscfuf ((__v2sf) a))
-#define __ev_fscfsf(a) ((__v2si) __builtin_spe_evfscfsf ((__v2sf) a))
-#define __ev_fsctui(a) ((__v2si) __builtin_spe_evfsctui ((__v2sf) a))
-#define __ev_fsctsi(a) ((__v2si) __builtin_spe_evfsctsi ((__v2sf) a))
-#define __ev_fsctuf(a) ((__v2si) __builtin_spe_evfsctuf ((__v2sf) a))
-#define __ev_fsctsf(a) ((__v2si) __builtin_spe_evfsctsf ((__v2sf) a))
-#define __ev_fsctuiz(a) ((__v2si) __builtin_spe_evfsctuiz ((__v2sf) a))
-#define __ev_fsctsiz(a) ((__v2si) __builtin_spe_evfsctsiz ((__v2sf) a))
+/* These all return V2SF, but we need to cast them to V2SI
+   because the SPE expect all functions to be __ev64_opaque__.  */
+
+#define __ev_fsabs(a) ((__v2si) __builtin_spe_evfsabs ((__v2sf) (a)))
+#define __ev_fsnabs(a) ((__v2si) __builtin_spe_evfsnabs ((__v2sf) (a)))
+#define __ev_fsneg(a) ((__v2si) __builtin_spe_evfsneg ((__v2sf) (a)))
+#define __ev_fsadd(a, b) ((__v2si) __builtin_spe_evfsadd ((__v2sf) (a), (__v2sf) (b)))
+#define __ev_fssub(a, b) ((__v2si) __builtin_spe_evfssub ((__v2sf) (a), (__v2sf) (b)))
+#define __ev_fsmul(a, b) ((__v2si) __builtin_spe_evfsmul ((__v2sf) (a), (__v2sf) b))
+#define __ev_fsdiv(a, b) ((__v2si) __builtin_spe_evfsdiv ((__v2sf) (a), (__v2sf) b))
+#define __ev_fscfui(a) ((__v2si) __builtin_spe_evfscfui ((__v2si) (a)))
+#define __ev_fscfsi(a) ((__v2si) __builtin_spe_evfscfsi ((__v2sf) (a)))
+#define __ev_fscfuf(a) ((__v2si) __builtin_spe_evfscfuf ((__v2sf) (a)))
+#define __ev_fscfsf(a) ((__v2si) __builtin_spe_evfscfsf ((__v2sf) (a)))
+#define __ev_fsctui(a) ((__v2si) __builtin_spe_evfsctui ((__v2sf) (a)))
+#define __ev_fsctsi(a) ((__v2si) __builtin_spe_evfsctsi ((__v2sf) (a)))
+#define __ev_fsctuf(a) ((__v2si) __builtin_spe_evfsctuf ((__v2sf) (a)))
+#define __ev_fsctsf(a) ((__v2si) __builtin_spe_evfsctsf ((__v2sf) (a)))
+#define __ev_fsctuiz(a) ((__v2si) __builtin_spe_evfsctuiz ((__v2sf) (a)))
+#define __ev_fsctsiz(a) ((__v2si) __builtin_spe_evfsctsiz ((__v2sf) (a)))
 
 /* NOT SUPPORTED IN FIRST e500, support via two instructions:  */
 
@@ -332,7 +327,6 @@ typedef int 			__vector __ev64_opaque__;
 #define __ev_mwhgsmfan(a, b) __internal_ev_mwhgsmfan ((__v2si) (a), (__v2si) (b))
 #define __ev_mwhgsmian(a, b) __internal_ev_mwhgsmian ((__v2si) (a), (__v2si) (b))
 #define __ev_mwhgumian(a, b) __internal_ev_mwhgumian ((__v2si) (a), (__v2si) (b))
-
 #define __ev_mwhssiaaw(a, b) __internal_ev_mwhssiaaw ((__v2si) (a), (__v2si) (b))
 #define __ev_mwhssfaaw(a, b) __internal_ev_mwhssfaaw ((__v2si) (a), (__v2si) (b))
 #define __ev_mwhsmfaaw(a, b) __internal_ev_mwhsmfaaw ((__v2si) (a), (__v2si) (b))
@@ -352,7 +346,7 @@ __internal_ev_mwhssfaaw (__ev64_opaque__ a, __ev64_opaque__ b)
   __ev64_opaque__ t;
 
   t = __ev_mwhssf (a, b);
-  return __ev_addssiaaw(t);
+  return __ev_addssiaaw (t);
 }
 
 static inline __ev64_opaque__
@@ -360,7 +354,7 @@ __internal_ev_mwhssiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
   
-  t = __ev_mwhsmi (a,b);
+  t = __ev_mwhsmi (a, b);
   return __ev_addssiaaw (t);
 }
 
@@ -369,7 +363,7 @@ __internal_ev_mwhsmfaaw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
 
-  t = __ev_mwhsmf (a,b);
+  t = __ev_mwhsmf (a, b);
   return __ev_addsmiaaw (t);
 }
  
@@ -378,7 +372,7 @@ __internal_ev_mwhsmiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
 
-  t = __ev_mwhsmi (a,b);
+  t = __ev_mwhsmi (a, b);
   return __ev_addsmiaaw (t);
 }
  
@@ -387,7 +381,7 @@ __internal_ev_mwhusiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
 
-  t = __ev_mwhumi (a,b);
+  t = __ev_mwhumi (a, b);
   return __ev_addusiaaw (t);
 }
  
@@ -396,7 +390,7 @@ __internal_ev_mwhumiaaw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
 
-  t = __ev_mwhumi (a,b);
+  t = __ev_mwhumi (a, b);
   return __ev_addumiaaw (t);
 }
  
@@ -405,7 +399,7 @@ __internal_ev_mwhssfanw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
 
-  t = __ev_mwhssf (a,b);
+  t = __ev_mwhssf (a, b);
   return __ev_subfssiaaw (t);
 }
 
@@ -414,7 +408,7 @@ __internal_ev_mwhssianw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
 
-  t = __ev_mwhsmi (a,b);
+  t = __ev_mwhsmi (a, b);
   return __ev_subfssiaaw (t);
 }
  
@@ -423,7 +417,7 @@ __internal_ev_mwhsmfanw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
 
-  t = __ev_mwhsmf (a,b);
+  t = __ev_mwhsmf (a, b);
   return __ev_subfsmiaaw (t);
 }
  
@@ -432,7 +426,7 @@ __internal_ev_mwhsmianw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
 
-  t = __ev_mwhsmi (a,b);
+  t = __ev_mwhsmi (a, b);
   return __ev_subfsmiaaw (t);
 }
  
@@ -441,7 +435,7 @@ __internal_ev_mwhusianw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
 
-  t = __ev_mwhumi (a,b);
+  t = __ev_mwhumi (a, b);
   return __ev_subfusiaaw (t);
 }
  
@@ -450,7 +444,7 @@ __internal_ev_mwhumianw (__ev64_opaque__ a, __ev64_opaque__ b)
 {
   __ev64_opaque__ t;
 
-  t = __ev_mwhumi (a,b);
+  t = __ev_mwhumi (a, b);
   return __ev_subfumiaaw (t);
 }
 
@@ -619,7 +613,7 @@ __ev_create_sfix32_fs (float a, float b)
   __ev64_opaque__ ev;
 
   ev = (__ev64_opaque__) __ev_create_fs (a, b);
-  return (__ev64_opaque__) __builtin_spe_evfsctsf (ev);
+  return (__ev64_opaque__) __builtin_spe_evfsctsf ((__v2sf) ev);
 }
 
 static inline __ev64_opaque__
@@ -628,7 +622,7 @@ __ev_create_ufix32_fs (float a, float b)
   __ev64_opaque__ ev;
 
   ev = (__ev64_opaque__) __ev_create_fs (a, b);
-  return (__ev64_opaque__) __builtin_spe_evfsctuf (ev);
+  return (__ev64_opaque__) __builtin_spe_evfsctuf ((__v2sf) ev);
 }
 
 static inline __ev64_opaque__
@@ -657,17 +651,26 @@ __ev_create_u64 (uint64_t a)
   return u.v;
 }
 
-#define __ev_convert_u64(a) ((uint64_t) (a))
-#define __ev_convert_s64(a) ((int64_t) (a))
+static inline uint64_t
+__ev_convert_u64 (__ev64_opaque__ a)
+{
+  return (uint64_t) a;
+}
+
+static inline int64_t
+__ev_convert_s64 (__ev64_opaque__ a)
+{
+  return (int64_t) a;
+}
 
 /* __ev_get_* functions.  */
 
-#define __ev_get_upper_u32(a) __ev_get_u32_internal ((__ev64_opaque__) a, 0)
-#define __ev_get_lower_u32(a) __ev_get_u32_internal ((__ev64_opaque__) a, 1)
-#define __ev_get_upper_s32(a) __ev_get_s32_internal ((__ev64_opaque__) a, 0)
-#define __ev_get_lower_s32(a) __ev_get_s32_internal ((__ev64_opaque__) a, 1)
-#define __ev_get_upper_fs(a) __ev_get_fs_internal ((__ev64_opaque__) a, 0)
-#define __ev_get_lower_fs(a) __ev_get_fs_internal ((__ev64_opaque__) a, 1)
+#define __ev_get_upper_u32(a) __ev_get_u32_internal ((__ev64_opaque__) (a), 0)
+#define __ev_get_lower_u32(a) __ev_get_u32_internal ((__ev64_opaque__) (a), 1)
+#define __ev_get_upper_s32(a) __ev_get_s32_internal ((__ev64_opaque__) (a), 0)
+#define __ev_get_lower_s32(a) __ev_get_s32_internal ((__ev64_opaque__) (a), 1)
+#define __ev_get_upper_fs(a) __ev_get_fs_internal ((__ev64_opaque__) (a), 0)
+#define __ev_get_lower_fs(a) __ev_get_fs_internal ((__ev64_opaque__) (a), 1)
 #define __ev_get_upper_ufix32_u32(a) __ev_get_upper_u32(a)
 #define __ev_get_lower_ufix32_u32(a) __ev_get_lower_u32(a)
 #define __ev_get_upper_sfix32_s32(a) __ev_get_upper_s32(a)
@@ -677,11 +680,11 @@ __ev_create_u64 (uint64_t a)
 #define __ev_get_upper_ufix32_fs(a)  __ev_get_ufix32_fs (a, 0)
 #define __ev_get_lower_ufix32_fs(a)  __ev_get_ufix32_fs (a, 1)
 
-#define __ev_get_u32(a, b) __ev_get_u32_internal ((__ev64_opaque__) a, b)
-#define __ev_get_s32(a, b) __ev_get_s32_internal ((__ev64_opaque__) a, b)
-#define __ev_get_fs(a, b) __ev_get_fs_internal ((__ev64_opaque__) a, b)
-#define __ev_get_u16(a, b) __ev_get_u16_internal ((__ev64_opaque__) a, b)
-#define __ev_get_s16(a, b) __ev_get_s16_internal ((__ev64_opaque__) a, b)
+#define __ev_get_u32(a, b) __ev_get_u32_internal ((__ev64_opaque__) (a), b)
+#define __ev_get_s32(a, b) __ev_get_s32_internal ((__ev64_opaque__) (a), b)
+#define __ev_get_fs(a, b) __ev_get_fs_internal ((__ev64_opaque__) (a), b)
+#define __ev_get_u16(a, b) __ev_get_u16_internal ((__ev64_opaque__) (a), b)
+#define __ev_get_s16(a, b) __ev_get_s16_internal ((__ev64_opaque__) (a), b)
 
 #define __ev_get_ufix32_u32(a, b) __ev_get_u32 (a, b)
 #define __ev_get_sfix32_s32(a, b) __ev_get_s32 (a, b)
@@ -732,8 +735,8 @@ __ev_get_sfix32_fs_internal (__ev64_opaque__ a, uint32_t pos)
 {
   __ev64_fs__ v;
 
-  v = __builtin_spe_evfscfsf (a);
-  return __ev_get_fs_internal (v, pos);
+  v = __builtin_spe_evfscfsf ((__v2sf) a);
+  return __ev_get_fs_internal ((__ev64_opaque__) v, pos);
 }
 
 static inline float
@@ -741,8 +744,8 @@ __ev_get_ufix32_fs_internal (__ev64_opaque__ a, uint32_t pos)
 {
   __ev64_fs__ v;
 
-  v = __builtin_spe_evfscfuf (a);
-  return __ev_get_fs_internal (v, pos);
+  v = __builtin_spe_evfscfuf ((__v2sf) a);
+  return __ev_get_fs_internal ((__ev64_opaque__) v, pos);
 }
 
 static inline uint16_t
@@ -785,7 +788,7 @@ __ev_get_s16_internal (__ev64_opaque__ a, uint32_t pos)
 #define __ev_set_sfix32_fs(a, b, c)  __ev_set_sfix32_fs_internal ((__ev64_opaque__) (a), b, c)
 #define __ev_set_ufix32_fs(a, b, c)  __ev_set_ufix32_fs_internal ((__ev64_opaque__) (a), b, c)
 
-#define __ev_set_upper_u32(a, b) __ev_set_u32(a, b, 0)
+#define __ev_set_upper_u32(a, b) __ev_set_u32 (a, b, 0)
 #define __ev_set_lower_u32(a, b) __ev_set_u32 (a, b, 1)
 #define __ev_set_upper_s32(a, b) __ev_set_s32 (a, b, 0)
 #define __ev_set_lower_s32(a, b) __ev_set_s32 (a, b, 1)
@@ -821,7 +824,7 @@ __ev_set_acc_s64 (int64_t a)
 }
 
 static inline __ev64_opaque__
-__ev_set_u32_internal (__ev64_opaque__ a, uint32_t b, uint32_t pos )
+__ev_set_u32_internal (__ev64_opaque__ a, uint32_t b, uint32_t pos)
 {
   union
   {
@@ -835,7 +838,7 @@ __ev_set_u32_internal (__ev64_opaque__ a, uint32_t b, uint32_t pos )
 }
 
 static inline __ev64_opaque__
-__ev_set_s32_internal (__ev64_opaque__ a, int32_t b, uint32_t pos )
+__ev_set_s32_internal (__ev64_opaque__ a, int32_t b, uint32_t pos)
 {
   union
   {
@@ -849,7 +852,7 @@ __ev_set_s32_internal (__ev64_opaque__ a, int32_t b, uint32_t pos )
 }
 
 static inline __ev64_opaque__
-__ev_set_fs_internal (__ev64_opaque__ a, float b, uint32_t pos )
+__ev_set_fs_internal (__ev64_opaque__ a, float b, uint32_t pos)
 {
   union
   {
@@ -895,7 +898,7 @@ __ev_set_ufix32_fs_internal (__ev64_opaque__ a, float b, uint32_t pos)
 }
 
 static inline __ev64_opaque__
-__ev_set_u16_internal (__ev64_opaque__ a, uint16_t b, uint32_t pos )
+__ev_set_u16_internal (__ev64_opaque__ a, uint16_t b, uint32_t pos)
 {
   union
   {
@@ -909,7 +912,7 @@ __ev_set_u16_internal (__ev64_opaque__ a, uint16_t b, uint32_t pos )
 }
 
 static inline __ev64_opaque__
-__ev_set_s16_internal (__ev64_opaque__ a, int16_t b, uint32_t pos )
+__ev_set_s16_internal (__ev64_opaque__ a, int16_t b, uint32_t pos)
 {
   union
   {
@@ -929,136 +932,70 @@ __ev_set_s16_internal (__ev64_opaque__ a, int16_t b, uint32_t pos )
 #define __pred_upper	2
 #define __pred_lower	3
 
-#define __ev_any_gts(a, b) \
-	__builtin_spe_evcmpgts (__pred_any, (__v2si)(a), (__v2si)(b))
-#define __ev_all_gts(a, b) \
-	__builtin_spe_evcmpgts (__pred_all, (__v2si)(a), (__v2si)(b))
-#define __ev_upper_gts(a, b) \
-	__builtin_spe_evcmpgts (__pred_upper, (__v2si)(a), (__v2si)(b))
-#define __ev_lower_gts(a, b) \
-	__builtin_spe_evcmpgts (__pred_lower, (__v2si)(a), (__v2si)(b))
-#define __ev_select_gts(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_gts ((__v2si)(a), (__v2si)(b), \
-					   (__v2si)(c), (__v2si)(d)))
-
-#define __ev_any_gtu(a, b) \
-	__builtin_spe_evcmpgtu (__pred_any, (__v2si)(a), (__v2si)(b))
-#define __ev_all_gtu(a, b) \
-	__builtin_spe_evcmpgtu (__pred_all, (__v2si)(a), (__v2si)(b))
-#define __ev_upper_gtu(a, b) \
-	__builtin_spe_evcmpgtu (__pred_upper, (__v2si)(a), (__v2si)(b))
-#define __ev_lower_gtu(a, b) \
-	__builtin_spe_evcmpgtu (__pred_lower, (__v2si)(a), (__v2si)(b))
-#define __ev_select_gtu(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_gtu ((__v2si)(a), (__v2si)(b), \
-					   (__v2si)(c), (__v2si)(d)))
-
-#define __ev_any_lts(a, b) \
-	__builtin_spe_evcmplts (__pred_any, (__v2si)(a), (__v2si)(b))
-#define __ev_all_lts(a, b) \
-	__builtin_spe_evcmplts (__pred_all, (__v2si)(a), (__v2si)(b))
-#define __ev_upper_lts(a, b) \
-	__builtin_spe_evcmplts (__pred_upper, (__v2si)(a), (__v2si)(b))
-#define __ev_lower_lts(a, b) \
-	__builtin_spe_evcmplts (__pred_lower, (__v2si)(a), (__v2si)(b))
-#define __ev_select_lts(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_lts ((__v2si)(a), (__v2si)(b), \
-					   (__v2si)(c), (__v2si)(d)))
-
-#define __ev_any_ltu(a, b) \
-	__builtin_spe_evcmpltu (__pred_any, (__v2si)(a), (__v2si)(b))
-#define __ev_all_ltu(a, b) \
-	__builtin_spe_evcmpltu (__pred_all, (__v2si)(a), (__v2si)(b))
-#define __ev_upper_ltu(a, b) \
-	__builtin_spe_evcmpltu (__pred_upper, (__v2si)(a), (__v2si)(b))
-#define __ev_lower_ltu(a, b) \
-	__builtin_spe_evcmpltu (__pred_lower, (__v2si)(a), (__v2si)(b))
-#define __ev_select_ltu(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_ltu ((__v2si)(a), (__v2si)(b), \
-					   (__v2si)(c), (__v2si)(d)))
-#define __ev_any_eq(a, b) \
-	__builtin_spe_evcmpeq (__pred_any, (__v2si)(a), (__v2si)(b))
-#define __ev_all_eq(a, b) \
-	__builtin_spe_evcmpeq (__pred_all, (__v2si)(a), (__v2si)(b))
-#define __ev_upper_eq(a, b) \
-	__builtin_spe_evcmpeq (__pred_upper, (__v2si)(a), (__v2si)(b))
-#define __ev_lower_eq(a, b) \
-	__builtin_spe_evcmpeq (__pred_lower, (__v2si)(a), (__v2si)(b))
-#define __ev_select_eq(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_eq ((__v2si)(a), (__v2si)(b), \
-					  (__v2si)(c), (__v2si)(d)))
-
-#define __ev_any_fs_gt(a, b) \
-	__builtin_spe_evfscmpgt (__pred_any, (__v2sf)(a), (__v2sf)(b))
-#define __ev_all_fs_gt(a, b) \
-	__builtin_spe_evfscmpgt (__pred_all, (__v2sf)(a), (__v2sf)(b))
-#define __ev_upper_fs_gt(a, b) \
-	__builtin_spe_evfscmpgt (__pred_upper, (__v2sf)(a), (__v2sf)(b))
-#define __ev_lower_fs_gt(a, b) \
-	__builtin_spe_evfscmpgt (__pred_lower, (__v2sf)(a), (__v2sf)(b))
-#define __ev_select_fs_gt(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_fsgt ((__v2sf)(a), (__v2sf)(b), \
-					    (__v2sf)(c), (__v2sf)(d)))
-
-#define __ev_any_fs_lt(a, b) \
-	__builtin_spe_evfscmplt (__pred_any, (__v2sf)(a), (__v2sf)(b))
-#define __ev_all_fs_lt(a, b) \
-	__builtin_spe_evfscmplt (__pred_all, (__v2sf)(a), (__v2sf)(b))
-#define __ev_upper_fs_lt(a, b) \
-	__builtin_spe_evfscmplt (__pred_upper, (__v2sf)(a), (__v2sf)(b))
-#define __ev_lower_fs_lt(a, b) \
-	__builtin_spe_evfscmplt (__pred_lower, (__v2sf)(a), (__v2sf)(b))
-#define __ev_select_fs_lt(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_fslt ((__v2sf)(a), (__v2sf)(b), \
-					    (__v2sf)(c), (__v2sf)(d)))
-
-#define __ev_any_fs_eq(a, b) \
-	__builtin_spe_evfscmpeq (__pred_any, (__v2sf)(a), (__v2sf)(b))
-#define __ev_all_fs_eq(a, b) \
-	__builtin_spe_evfscmpeq (__pred_all, (__v2sf)(a), (__v2sf)(b))
-#define __ev_upper_fs_eq(a, b) \
-	__builtin_spe_evfscmpeq (__pred_upper, (__v2sf)(a), (__v2sf)(b))
-#define __ev_lower_fs_eq(a, b) \
-	__builtin_spe_evfscmpeq (__pred_lower, (__v2sf)(a), (__v2sf)(b))
-#define __ev_select_fs_eq(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_fseq ((__v2sf)(a), (__v2sf)(b), \
-					    (__v2sf)(c), (__v2sf)(d)))
-
-#define __ev_any_fs_tst_gt(a, b) \
-	__builtin_spe_evfststgt (__pred_any, (__v2sf)(a), (__v2sf)(b))
-#define __ev_all_fs_tst_gt(a, b) \
-	__builtin_spe_evfststgt (__pred_all, (__v2sf)(a), (__v2sf)(b))
-#define __ev_upper_fs_tst_gt(a, b) \
-	__builtin_spe_evfststgt (__pred_upper, (__v2sf)(a), (__v2sf)(b))
-#define __ev_lower_fs_tst_gt(a, b) \
-	__builtin_spe_evfststgt (__pred_lower, (__v2sf)(a), (__v2sf)(b))
-#define __ev_select_fs_tst_gt(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_fststgt ((__v2sf)(a), (__v2sf)(b), \
-					       (__v2sf)(c), (__v2sf)(d)))
-
-#define __ev_any_fs_tst_lt(a, b) \
-	__builtin_spe_evfststlt (__pred_any, (__v2sf)(a), (__v2sf)(b))
-#define __ev_all_fs_tst_lt(a, b) \
-	__builtin_spe_evfststlt (__pred_all, (__v2sf)(a), (__v2sf)(b))
-#define __ev_upper_fs_tst_lt(a, b) \
-	__builtin_spe_evfststlt (__pred_upper, (__v2sf)(a), (__v2sf)(b))
-#define __ev_lower_fs_tst_lt(a, b) \
-	__builtin_spe_evfststlt (__pred_lower, (__v2sf)(a), (__v2sf)(b))
-#define __ev_select_fs_tst_lt(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_fststlt ((__v2sf)(a), (__v2sf)(b), \
-					       (__v2sf)(c), (__v2sf)(d)))
-
-#define __ev_any_fs_tst_eq(a, b) \
-	__builtin_spe_evfststeq (__pred_any, (__v2sf)(a), (__v2sf)(b))
-#define __ev_all_fs_tst_eq(a, b) \
-	__builtin_spe_evfststeq (__pred_all, (__v2sf)(a), (__v2sf)(b))
-#define __ev_upper_fs_tst_eq(a, b) \
-	__builtin_spe_evfststeq (__pred_upper, (__v2sf)(a), (__v2sf)(b))
-#define __ev_lower_fs_tst_eq(a, b) \
-	__builtin_spe_evfststeq (__pred_lower, (__v2sf)(a), (__v2sf)(b))
-#define __ev_select_fs_tst_eq(a, b, c, d) \
-	((__v2si) __builtin_spe_evsel_fststeq ((__v2sf)(a), (__v2sf)(b), \
-					       (__v2sf)(c), (__v2sf)(d)))
+#define __ev_any_gts(a, b)		__builtin_spe_evcmpgts (__pred_any, (__v2si) (a), (__v2si) (b))
+#define __ev_all_gts(a, b)		__builtin_spe_evcmpgts (__pred_all, (__v2si) (a), (__v2si) (b))
+#define __ev_upper_gts(a, b)		__builtin_spe_evcmpgts (__pred_upper, (__v2si) (a), (__v2si) (b))
+#define __ev_lower_gts(a, b)		__builtin_spe_evcmpgts (__pred_lower, (__v2si) (a), (__v2si) (b))
+#define __ev_select_gts(a, b, c, d) 	((__v2si) __builtin_spe_evsel_gts ((__v2si) (a), (__v2si) (b), (__v2si) (c), (__v2si) (d)))
+
+#define __ev_any_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_any, (__v2si) (a), (__v2si) (b))
+#define __ev_all_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_all, (__v2si) (a), (__v2si) (b))
+#define __ev_upper_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_upper, (__v2si) (a), (__v2si) (b))
+#define __ev_lower_gtu(a, b)		__builtin_spe_evcmpgtu (__pred_lower, (__v2si) (a), (__v2si) (b))
+#define __ev_select_gtu(a, b, c, d) 	((__v2si) __builtin_spe_evsel_gtu ((__v2si) (a), (__v2si) (b), (__v2si) (c), (__v2si) (d)))
+
+#define __ev_any_lts(a, b)		__builtin_spe_evcmplts (__pred_any, (__v2si) (a), (__v2si) (b))
+#define __ev_all_lts(a, b)		__builtin_spe_evcmplts (__pred_all, (__v2si) (a), (__v2si) (b))
+#define __ev_upper_lts(a, b)		__builtin_spe_evcmplts (__pred_upper, (__v2si) (a), (__v2si) (b))
+#define __ev_lower_lts(a, b)		__builtin_spe_evcmplts (__pred_lower, (__v2si) (a), (__v2si) (b))
+#define __ev_select_lts(a, b, c, d) 	((__v2si) __builtin_spe_evsel_lts ((__v2si) (a), (__v2si) (b), (__v2si) (c), (__v2si) (d)))
+
+#define __ev_any_ltu(a, b)		__builtin_spe_evcmpltu (__pred_any, (__v2si) (a), (__v2si) (b))
+#define __ev_all_ltu(a, b)		__builtin_spe_evcmpltu (__pred_all, (__v2si) (a), (__v2si) (b))
+#define __ev_upper_ltu(a, b)		__builtin_spe_evcmpltu (__pred_upper, (__v2si) (a), (__v2si) (b))
+#define __ev_lower_ltu(a, b)		__builtin_spe_evcmpltu (__pred_lower, (__v2si) (a), (__v2si) (b))
+#define __ev_select_ltu(a, b, c, d) 	((__v2si) __builtin_spe_evsel_ltu ((__v2si) (a), (__v2si) (b), (__v2si) (c), (__v2si) (d)))
+#define __ev_any_eq(a, b)		__builtin_spe_evcmpeq (__pred_any, (__v2si) (a), (__v2si) (b))
+#define __ev_all_eq(a, b)		__builtin_spe_evcmpeq (__pred_all, (__v2si) (a), (__v2si) (b))
+#define __ev_upper_eq(a, b)		__builtin_spe_evcmpeq (__pred_upper, (__v2si) (a), (__v2si) (b))
+#define __ev_lower_eq(a, b)		__builtin_spe_evcmpeq (__pred_lower, (__v2si) (a), (__v2si) (b))
+#define __ev_select_eq(a, b, c, d) 	((__v2si) __builtin_spe_evsel_eq ((__v2si) (a), (__v2si) (b), (__v2si) (c), (__v2si) (d)))
+
+#define __ev_any_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_any, (__v2sf) (a), (__v2sf) (b))
+#define __ev_all_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_all, (__v2sf) (a), (__v2sf) (b))
+#define __ev_upper_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_upper, (__v2sf) (a), (__v2sf) (b))
+#define __ev_lower_fs_gt(a, b)		__builtin_spe_evfscmpgt (__pred_lower, (__v2sf) (a), (__v2sf) (b))
+#define __ev_select_fs_gt(a, b, c, d)	((__v2si) __builtin_spe_evsel_fsgt ((__v2sf) (a), (__v2sf) (b), (__v2sf) (c), (__v2sf) (d)))
+
+#define __ev_any_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_any, (__v2sf) (a), (__v2sf) (b))
+#define __ev_all_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_all, (__v2sf) (a), (__v2sf) (b))
+#define __ev_upper_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_upper, (__v2sf) (a), (__v2sf) (b))
+#define __ev_lower_fs_lt(a, b)		__builtin_spe_evfscmplt (__pred_lower, (__v2sf) (a), (__v2sf) (b))
+#define __ev_select_fs_lt(a, b, c, d)	((__v2si) __builtin_spe_evsel_fslt ((__v2sf) (a), (__v2sf) (b), (__v2sf) (c), (__v2sf) (d)))
+
+#define __ev_any_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_any, (__v2sf) (a), (__v2sf) (b))
+#define __ev_all_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_all, (__v2sf) (a), (__v2sf) (b))
+#define __ev_upper_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_upper, (__v2sf) (a), (__v2sf) (b))
+#define __ev_lower_fs_eq(a, b)		__builtin_spe_evfscmpeq (__pred_lower, (__v2sf) (a), (__v2sf) (b))
+#define __ev_select_fs_eq(a, b, c, d)	((__v2si) __builtin_spe_evsel_fseq ((__v2sf) (a), (__v2sf) (b), (__v2sf) (c), (__v2sf) (d)))
+
+#define __ev_any_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_any, (__v2sf) (a), (__v2sf) (b))
+#define __ev_all_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_all, (__v2sf) (a), (__v2sf) (b))
+#define __ev_upper_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_upper, (__v2sf) (a), (__v2sf) (b))
+#define __ev_lower_fs_tst_gt(a, b)	__builtin_spe_evfststgt (__pred_lower, (__v2sf) (a), (__v2sf) (b))
+#define __ev_select_fs_tst_gt(a, b, c, d)	((__v2si) __builtin_spe_evsel_fststgt ((__v2sf) (a), (__v2sf) (b), (__v2sf) (c), (__v2sf) (d)))
+
+#define __ev_any_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_any, (__v2sf) (a), (__v2sf) (b))
+#define __ev_all_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_all, (__v2sf) (a), (__v2sf) (b))
+#define __ev_upper_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_upper, (__v2sf) (a), (__v2sf) (b))
+#define __ev_lower_fs_tst_lt(a, b)	__builtin_spe_evfststlt (__pred_lower, (__v2sf) (a), (__v2sf) (b))
+#define __ev_select_fs_tst_lt(a, b, c, d)	((__v2si) __builtin_spe_evsel_fststlt ((__v2sf) (a), (__v2sf) (b), (__v2sf) (c), (__v2sf) (d)))
+
+#define __ev_any_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_any, (__v2sf) (a), (__v2sf) (b))
+#define __ev_all_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_all, (__v2sf) (a), (__v2sf) (b))
+#define __ev_upper_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_upper, (__v2sf) (a), (__v2sf) (b))
+#define __ev_lower_fs_tst_eq(a, b)	__builtin_spe_evfststeq (__pred_lower, (__v2sf) (a), (__v2sf) (b))
+#define __ev_select_fs_tst_eq(a, b, c, d)	((__v2si) __builtin_spe_evsel_fststeq ((__v2sf) (a), (__v2sf) (b), (__v2sf) (c), (__v2sf) (d)))
 
 /* SPEFSCR accesor functions.  */
 
@@ -1070,7 +1007,7 @@ __ev_set_s16_internal (__ev64_opaque__ a, int16_t b, uint32_t pos )
 #define __SPEFSCR_FDBZH		0x04000000
 #define __SPEFSCR_FUNFH		0x02000000
 #define __SPEFSCR_FOVFH		0x01000000
-/* 2 unused bits */
+/* 2 unused bits.  */
 #define __SPEFSCR_FINXS		0x00200000
 #define __SPEFSCR_FINVS		0x00100000
 #define __SPEFSCR_FDBZS		0x00080000
@@ -1085,7 +1022,7 @@ __ev_set_s16_internal (__ev64_opaque__ a, int16_t b, uint32_t pos )
 #define __SPEFSCR_FDBZ		0x00000400
 #define __SPEFSCR_FUNF		0x00000200
 #define __SPEFSCR_FOVF		0x00000100
-/* 1 unused bit */
+/* 1 unused bit.  */
 #define __SPEFSCR_FINXE		0x00000040
 #define __SPEFSCR_FINVE		0x00000020
 #define __SPEFSCR_FDBZE		0x00000010
@@ -1144,8 +1081,8 @@ __ev_clr_spefscr_field (int mask)
      rnd = 0 (nearest)
      rnd = 1 (zero)
      rnd = 2 (+inf)
-     rnd = 3 (-inf)
-*/
+     rnd = 3 (-inf).  */
+
 static inline void
 __ev_set_spefscr_frmc (int rnd)
 {
diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h
index 68fa73f7997..3da996fbd54 100644
--- a/gcc/config/rs6000/sysv4.h
+++ b/gcc/config/rs6000/sysv4.h
@@ -1,5 +1,5 @@
 /* Target definitions for GNU compiler for PowerPC running System V.4
-   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
    Free Software Foundation, Inc.
    Contributed by Cygnus Support.
 
@@ -257,7 +257,8 @@ do {									\
 	     rs6000_sdata_name);					\
     }									\
 									\
-  if (rs6000_sdata != SDATA_NONE && DEFAULT_ABI != ABI_V4)		\
+  if ((rs6000_sdata != SDATA_NONE && DEFAULT_ABI != ABI_V4)		\
+      || (rs6000_sdata == SDATA_EABI && !TARGET_EABI))			\
     {									\
       rs6000_sdata = SDATA_NONE;					\
       error ("-msdata=%s and -mcall-%s are incompatible",		\
@@ -328,7 +329,7 @@ do {									\
 
 /* Define this to set the endianness to use in libgcc2.c, which can
    not depend on target_flags.  */
-#if !defined(_LITTLE_ENDIAN) && !defined(__sun__)
+#if !defined(__LITTLE_ENDIAN__) && !defined(__sun__)
 #define LIBGCC2_WORDS_BIG_ENDIAN 1
 #else
 #define LIBGCC2_WORDS_BIG_ENDIAN 0
@@ -683,12 +684,12 @@ do {									\
       assemble_name ((FILE), (NAME));					\
       fprintf ((FILE), ",%u,%u\n", (SIZE), (ALIGN) / BITS_PER_UNIT);	\
     }									\
+  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
 } while (0)
 
 /* Describe how to emit uninitialized external linkage items.  */
 #define	ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
 do {									\
-  (*targetm.asm_out.globalize_label) (FILE, NAME);			\
   ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN);			\
 } while (0)
 
@@ -1094,7 +1095,7 @@ do {						\
 /* FreeBSD support.  */
 
 #define CPP_OS_FREEBSD_SPEC	"\
-  -D__PPC__ -D__ppc__ -D__PowerPC__ -D__powerpc__ \
+  -D__ELF__ -D__PPC__ -D__ppc__ -D__PowerPC__ -D__powerpc__ \
   -Acpu=powerpc -Amachine=powerpc"
 
 #define	STARTFILE_FREEBSD_SPEC	FBSD_STARTFILE_SPEC
@@ -1103,7 +1104,17 @@ do {						\
 #define LINK_START_FREEBSD_SPEC	""
 
 #define LINK_OS_FREEBSD_SPEC "\
-  %{symbolic:-Bsymbolic}"
+  %{p:%e`-p' not supported; use `-pg' and gprof(1)} \
+    %{Wl,*:%*} \
+    %{v:-V} \
+    %{assert*} %{R*} %{rpath*} %{defsym*} \
+    %{shared:-Bshareable %{h*} %{soname*}} \
+    %{!shared: \
+      %{!static: \
+	%{rdynamic: -export-dynamic} \
+	%{!dynamic-linker: -dynamic-linker /usr/libexec/ld-elf.so.1}} \
+      %{static:-Bstatic}} \
+    %{symbolic:-Bsymbolic}"
 
 /* GNU/Linux support.  */
 #ifdef USE_GNULIBC_1
@@ -1205,6 +1216,21 @@ ncrtn.o%s"
 #define CPP_OS_NETBSD_SPEC "\
 -D__powerpc__ -D__NetBSD__ -D__ELF__ -D__KPRINTF_ATTRIBUTE__"
 
+/* RTEMS support.  */  
+
+#define CPP_OS_RTEMS_SPEC "\
+%{!mcpu*:  %{!Dppc*: %{!Dmpc*: -Dmpc750} } }\
+%{mcpu=403:  %{!Dppc*: %{!Dmpc*: -Dppc403}  } } \
+%{mcpu=505:  %{!Dppc*: %{!Dmpc*: -Dmpc505}  } } \
+%{mcpu=601:  %{!Dppc*: %{!Dmpc*: -Dppc601}  } } \
+%{mcpu=602:  %{!Dppc*: %{!Dmpc*: -Dppc602}  } } \
+%{mcpu=603:  %{!Dppc*: %{!Dmpc*: -Dppc603}  } } \
+%{mcpu=603e: %{!Dppc*: %{!Dmpc*: -Dppc603e} } } \
+%{mcpu=604:  %{!Dppc*: %{!Dmpc*: -Dmpc604}  } } \
+%{mcpu=750:  %{!Dppc*: %{!Dmpc*: -Dmpc750}  } } \
+%{mcpu=821:  %{!Dppc*: %{!Dmpc*: -Dmpc821}  } } \
+%{mcpu=860:  %{!Dppc*: %{!Dmpc*: -Dmpc860}  } }" 
+
 /* VxWorks support.  */
 /* VxWorks does all the library stuff itself.  */
 #define LIB_VXWORKS_SPEC ""
@@ -1343,6 +1369,7 @@ ncrtn.o%s"
   { "cpp_os_gnu",		CPP_OS_GNU_SPEC },			\
   { "cpp_os_linux",		CPP_OS_LINUX_SPEC },			\
   { "cpp_os_netbsd",		CPP_OS_NETBSD_SPEC },			\
+  { "cpp_os_rtems",		CPP_OS_RTEMS_SPEC },			\
   { "cpp_os_vxworks",		CPP_OS_VXWORKS_SPEC },			\
   { "cpp_os_windiss",           CPP_OS_WINDISS_SPEC },                  \
   { "cpp_os_default",		CPP_OS_DEFAULT_SPEC },
diff --git a/gcc/config/rs6000/t-aix43 b/gcc/config/rs6000/t-aix43
index 1acccf64d10..a716209e9bc 100644
--- a/gcc/config/rs6000/t-aix43
+++ b/gcc/config/rs6000/t-aix43
@@ -55,7 +55,7 @@ SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
 	rm -f @multilib_dir@/shr.o
 # $(slibdir) double quoted to protect it from expansion while building
 # libgcc.mk.  We want this delayed until actual install time.
-SHLIB_INSTALL = $(INSTALL_DATA) @shlib_base_name@.a $$(slibdir)/
+SHLIB_INSTALL = $(INSTALL_DATA) @shlib_base_name@.a $$(DESTDIR)$$(slibdir)/
 SHLIB_LIBS = -lc `case @shlib_base_name@ in *pthread*) echo -lpthread ;; esac`
 SHLIB_MKMAP = $(srcdir)/mkmap-flat.awk
 SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver
diff --git a/gcc/config/rs6000/t-aix52 b/gcc/config/rs6000/t-aix52
index 97e1e079508..bddcdb11490 100644
--- a/gcc/config/rs6000/t-aix52
+++ b/gcc/config/rs6000/t-aix52
@@ -36,7 +36,7 @@ SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
 	rm -f @multilib_dir@/shr.o
 # $(slibdir) double quoted to protect it from expansion while building
 # libgcc.mk.  We want this delayed until actual install time.
-SHLIB_INSTALL = $(INSTALL_DATA) @shlib_base_name@.a $$(slibdir)/
+SHLIB_INSTALL = $(INSTALL_DATA) @shlib_base_name@.a $$(DESTDIR)$$(slibdir)/
 SHLIB_LIBS = -lc `case @shlib_base_name@ in *pthread*) echo -lpthread ;; esac`
 SHLIB_MKMAP = $(srcdir)/mkmap-flat.awk
 SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver
diff --git a/gcc/config/rs6000/t-linux64 b/gcc/config/rs6000/t-linux64
index 48b61505bf1..af7b44bd70b 100644
--- a/gcc/config/rs6000/t-linux64
+++ b/gcc/config/rs6000/t-linux64
@@ -5,6 +5,9 @@ TARGET_LIBGCC2_CFLAGS =
 EXTRA_MULTILIB_PARTS=crtbegin.o crtend.o crtbeginS.o crtendS.o crtbeginT.o \
 			crtsavres.o
 
+# These functions are needed for soft-float on powerpc64-linux.
+LIB2FUNCS_EXTRA = $(srcdir)/config/rs6000/ppc64-fp.c
+
 # ld provides these functions as needed.
 crtsavres.S:
 	echo >crtsavres.S
diff --git a/gcc/config/rs6000/t-rtems b/gcc/config/rs6000/t-rtems
new file mode 100644
index 00000000000..364a22d2278
--- /dev/null
+++ b/gcc/config/rs6000/t-rtems
@@ -0,0 +1,86 @@
+# Multilibs for powerpc RTEMS targets.
+
+MULTILIB_OPTIONS	= \
+mcpu=403/mcpu=505/mcpu=601/mcpu=602/mcpu=603/mcpu=603e/mcpu=604/mcpu=750/mcpu=821/mcpu=860 \
+Dmpc509/Dmpc8260 \
+D_OLD_EXCEPTIONS \
+msoft-float
+
+MULTILIB_DIRNAMES	= \
+m403 m505 m601 m602 m603 m603e m604 m750 m821 m860 \
+mpc509 \
+mpc8260 \
+roe \
+nof
+
+MULTILIB_EXTRA_OPTS	= mrelocatable-lib mno-eabi mstrict-align
+
+# MULTILIB_MATCHES	= ${MULTILIB_MATCHES_FLOAT}
+MULTILIB_MATCHES  	= ${MULTILIB_MATCHES_ENDIAN} \
+			  ${MULTILIB_MATCHES_SYSV} \
+			  mcpu?505/Dmpc505=mcpu?505/Dmpc509
+
+#
+# RTEMS old/new-exceptions handling
+#
+# old-exception processing is depredicated, therefore
+#
+# * Cpu-variants supporting new exception processing are build 
+# with new exception processing only
+# * Cpu-variants not having been ported to new exception processing are
+# build with old and new exception processing
+#
+
+# Cpu-variants supporting new exception processing only
+MULTILIB_NEW_EXCEPTIONS_ONLY = \
+*mcpu=604*/*D_OLD_EXCEPTIONS* \
+*mcpu=750*/*D_OLD_EXCEPTIONS* \
+*mcpu=821*/*D_OLD_EXCEPTIONS* \
+*Dmpc8260*/*D_OLD_EXCEPTIONS* \
+*mcpu=860*/*D_OLD_EXCEPTIONS*
+
+# Soft-float only, default implies msoft-float
+# NOTE: Must match with MULTILIB_MATCHES_FLOAT and MULTILIB_MATCHES
+MULTILIB_SOFTFLOAT_ONLY = \
+mcpu=403/*msoft-float* \
+mcpu=821/*msoft-float* \
+mcpu=860/*msoft-float*
+
+# Hard-float only, take out msoft-float
+MULTILIB_HARDFLOAT_ONLY = \
+mcpu=505/*msoft-float*
+
+MULTILIB_EXCEPTIONS =
+
+# Disallow -D_OLD_EXCEPTIONS without other options
+MULTILIB_EXCEPTIONS	+= D_OLD_EXCEPTIONS*
+
+# Disallow -Dppc and -Dmpc without other options
+MULTILIB_EXCEPTIONS 	+= Dppc* Dmpc*
+
+MULTILIB_EXCEPTIONS	+= \
+${MULTILIB_NEW_EXCEPTIONS_ONLY} \
+${MULTILIB_SOFTFLOAT_ONLY} \
+${MULTILIB_HARDFLOAT_ONLY}
+
+# Special rules
+# Take out all variants we don't want
+MULTILIB_EXCEPTIONS += mcpu=403/Dmpc509*
+MULTILIB_EXCEPTIONS += mcpu=403/Dmpc8260*
+MULTILIB_EXCEPTIONS += mcpu=505/Dmpc509*
+MULTILIB_EXCEPTIONS += mcpu=505/Dmpc8260*
+MULTILIB_EXCEPTIONS += mcpu=601/Dmpc509*
+MULTILIB_EXCEPTIONS += mcpu=601/Dmpc8260*
+MULTILIB_EXCEPTIONS += mcpu=602/Dmpc509*
+MULTILIB_EXCEPTIONS += mcpu=602/Dmpc8260*
+MULTILIB_EXCEPTIONS += mcpu=603/Dmpc509*
+MULTILIB_EXCEPTIONS += mcpu=603/Dmpc8260*
+MULTILIB_EXCEPTIONS += mcpu=603e/Dmpc509*
+MULTILIB_EXCEPTIONS += mcpu=604/Dmpc509*
+MULTILIB_EXCEPTIONS += mcpu=604/Dmpc8260*
+MULTILIB_EXCEPTIONS += mcpu=750/Dmpc509*
+MULTILIB_EXCEPTIONS += mcpu=750/Dmpc8260*
+MULTILIB_EXCEPTIONS += mcpu=821/Dmpc509*
+MULTILIB_EXCEPTIONS += mcpu=821/Dmpc8260*
+MULTILIB_EXCEPTIONS += mcpu=860/Dmpc509*
+MULTILIB_EXCEPTIONS += mcpu=860/Dmpc8260*
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index deecef6b969..7efacb4bc84 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for IBM S/390.
-   Copyright (C) 2000, 2002 Free Software Foundation, Inc.
+   Copyright (C) 2000, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Hartmut Penner (hpenner@de.ibm.com)
 
 This file is part of GNU CC.
@@ -43,13 +43,16 @@ extern int s390_single_hi PARAMS ((rtx, enum machine_mode, int));
 extern int s390_extract_hi PARAMS ((rtx, enum machine_mode, int));
 extern int s390_single_qi PARAMS ((rtx, enum machine_mode, int));
 extern int s390_extract_qi PARAMS ((rtx, enum machine_mode, int));
+extern int tls_symbolic_operand PARAMS ((rtx));
 
 extern int s390_match_ccmode PARAMS ((rtx, enum machine_mode));
 extern enum machine_mode s390_tm_ccmode PARAMS ((rtx, rtx, int));
 extern enum machine_mode s390_select_ccmode PARAMS ((enum rtx_code, rtx, rtx));
 extern int symbolic_reference_mentioned_p PARAMS ((rtx));
+extern int tls_symbolic_reference_mentioned_p PARAMS ((rtx));
+extern rtx s390_tls_get_offset PARAMS ((void));
 extern int legitimate_la_operand_p PARAMS ((rtx));
-extern int preferred_la_operand_p PARAMS ((rtx, int));
+extern int preferred_la_operand_p PARAMS ((rtx));
 extern int legitimate_pic_operand_p PARAMS ((rtx));
 extern int legitimate_constant_p PARAMS ((rtx));
 extern int legitimate_reload_constant_p PARAMS ((rtx));
@@ -60,7 +63,7 @@ extern enum reg_class s390_preferred_reload_class PARAMS ((rtx, enum reg_class))
 extern enum reg_class s390_secondary_input_reload_class PARAMS ((enum reg_class, enum machine_mode, rtx));
 extern int s390_plus_operand PARAMS ((rtx, enum machine_mode));
 extern void s390_expand_plus_operand PARAMS ((rtx, rtx, rtx));
-extern void emit_pic_move PARAMS ((rtx *, enum machine_mode));
+extern void emit_symbolic_move PARAMS ((rtx *));
 extern void s390_load_address PARAMS ((rtx, rtx));
 extern void s390_expand_movstr PARAMS ((rtx, rtx, rtx));
 extern void s390_expand_clrstr PARAMS ((rtx, rtx));
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 3f6ec452b1c..5f4559051d2 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on IBM S/390 and zSeries
-   Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
                   Ulrich Weigand (uweigand@de.ibm.com).
 
@@ -54,6 +54,11 @@ static int s390_adjust_priority PARAMS ((rtx, int));
 static void s390_select_rtx_section PARAMS ((enum machine_mode, rtx, 
 					     unsigned HOST_WIDE_INT));
 static void s390_encode_section_info PARAMS ((tree, int));
+static const char *s390_strip_name_encoding PARAMS ((const char *));
+static bool s390_cannot_force_const_mem PARAMS ((rtx));
+static void s390_init_builtins PARAMS ((void));
+static rtx s390_expand_builtin PARAMS ((tree, rtx, rtx, 
+					enum machine_mode, int));
 static void s390_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
 					  HOST_WIDE_INT, tree));
 
@@ -81,6 +86,20 @@ static void s390_output_mi_thunk PARAMS ((FILE *, tree, HOST_WIDE_INT,
 
 #undef	TARGET_ENCODE_SECTION_INFO
 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
+#undef  TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING s390_strip_name_encoding
+
+#ifdef HAVE_AS_TLS
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS true
+#endif
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS s390_init_builtins
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN s390_expand_builtin
 
 #undef TARGET_ASM_OUTPUT_MI_THUNK
 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
@@ -98,6 +117,9 @@ static int s390_sr_alias_set = 0;
    emitted.  */
 rtx s390_compare_op0, s390_compare_op1;
 
+/* The encoding characters for the four TLS models present in ELF.  */
+static char const tls_model_chars[] = " GLil";
+
 /* Structure used to hold the components of a S/390 memory
    address.  A legitimate address on S/390 is of the general
    form
@@ -132,6 +154,9 @@ struct machine_function GTY(())
 
   /* Size of stack frame.  */
   HOST_WIDE_INT frame_size;
+
+  /* Some local-dynamic TLS symbol name.  */
+  const char *some_ld_name;
 };
 
 static int s390_match_ccmode_set PARAMS ((rtx, enum machine_mode));
@@ -140,6 +165,10 @@ static const char *s390_branch_condition_mnemonic PARAMS ((rtx, int));
 static int check_mode PARAMS ((rtx, enum machine_mode *));
 static int general_s_operand PARAMS ((rtx, enum machine_mode, int));
 static int s390_decompose_address PARAMS ((rtx, struct s390_address *));
+static rtx get_thread_pointer PARAMS ((void));
+static rtx legitimize_tls_address PARAMS ((rtx, rtx));
+static const char *get_some_local_dynamic_name PARAMS ((void));
+static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
 static int reg_used_in_mem_p PARAMS ((int, rtx));
 static int addr_generation_dependency_p PARAMS ((rtx, rtx));
 static int s390_split_branches PARAMS ((rtx, bool *));
@@ -910,6 +939,8 @@ larl_operand (op, mode)
   if (GET_CODE (op) == LABEL_REF)
     return 1;
   if (GET_CODE (op) == SYMBOL_REF
+      && XSTR (op, 0)[0] != '@'
+      && !tls_symbolic_operand (op)
       && (!flag_pic || SYMBOL_REF_FLAG (op) 
           || CONSTANT_POOL_ADDRESS_P (op)))
     return 1;
@@ -932,17 +963,23 @@ larl_operand (op, mode)
   if (GET_CODE (op) == LABEL_REF)
     return 1;
   if (GET_CODE (op) == SYMBOL_REF
+      && XSTR (op, 0)[0] != '@'
+      && !tls_symbolic_operand (op)
       && (!flag_pic || SYMBOL_REF_FLAG (op)
           || CONSTANT_POOL_ADDRESS_P (op)))
     return 1;
 
-  /* Now we must have a @GOTENT offset or @PLT stub.  */
+  /* Now we must have a @GOTENT offset or @PLT stub
+     or an @INDNTPOFF TLS offset.  */
   if (GET_CODE (op) == UNSPEC
       && XINT (op, 1) == 111)
     return 1;
   if (GET_CODE (op) == UNSPEC
       && XINT (op, 1) == 113)
     return 1;
+  if (GET_CODE (op) == UNSPEC
+      && XINT (op, 1) == UNSPEC_INDNTPOFF)
+    return 1;
 
   return 0;
 }
@@ -1091,6 +1128,23 @@ bras_sym_operand (op, mode)
   return 0;
 }
 
+/* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
+   otherwise return 0.  */
+
+int
+tls_symbolic_operand (op)
+     register rtx op;
+{
+  const char *symbol_str;
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+  symbol_str = XSTR (op, 0);
+
+  if (symbol_str[0] != '%')
+    return 0;
+  return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
+}
 
 /* Return true if OP is a load multiple operation.  It is known to be a
    PARALLEL and the first section will be tested. 
@@ -1250,6 +1304,37 @@ symbolic_reference_mentioned_p (op)
   return 0;
 }
 
+/* Return true if OP contains a reference to a thread-local symbol.  */
+
+int
+tls_symbolic_reference_mentioned_p (op)
+     rtx op;
+{
+  register const char *fmt;
+  register int i;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+    return tls_symbolic_operand (op);
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return 1;
+	}
+
+      else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
 
 /* Return true if OP is a legitimate general operand when 
    generating PIC code.  It is given that flag_pic is on 
@@ -1264,7 +1349,7 @@ legitimate_pic_operand_p (op)
     return 1;
 
   /* Reject everything else; must be handled 
-     via emit_pic_move.  */
+     via emit_symbolic_move.  */
   return 0;
 }
 
@@ -1279,22 +1364,87 @@ legitimate_constant_p (op)
   if (!SYMBOLIC_CONST (op))
     return 1;
 
+  /* Accept immediate LARL operands.  */
+  if (TARGET_64BIT && larl_operand (op, VOIDmode))
+    return 1;
+
+  /* Thread-local symbols are never legal constants.  This is
+     so that emit_call knows that computing such addresses
+     might require a function call.  */
+  if (TLS_SYMBOLIC_CONST (op))
+    return 0;
+
   /* In the PIC case, symbolic constants must *not* be
      forced into the literal pool.  We accept them here,
-     so that they will be handled by emit_pic_move.  */
+     so that they will be handled by emit_symbolic_move.  */
   if (flag_pic)
     return 1;
 
-  /* Even in the non-PIC case, we can accept immediate
-     LARL operands here.  */
-  if (TARGET_64BIT)
-    return larl_operand (op, VOIDmode);
-
   /* All remaining non-PIC symbolic constants are
      forced into the literal pool.  */
   return 0;
 }
 
+/* Determine if it's legal to put X into the constant pool.  This
+   is not possible if X contains the address of a symbol that is
+   not constant (TLS) or not known at final link time (PIC).  */
+
+static bool
+s390_cannot_force_const_mem (x)
+     rtx x;
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+    case CONST_DOUBLE:
+      /* Accept all non-symbolic constants.  */
+      return false;
+
+    case LABEL_REF:
+      /* Labels are OK iff we are non-PIC.  */
+      return flag_pic != 0;
+
+    case SYMBOL_REF:
+      /* 'Naked' TLS symbol references are never OK,
+         non-TLS symbols are OK iff we are non-PIC.  */
+      if (tls_symbolic_operand (x))
+	return true;
+      else
+	return flag_pic != 0;
+
+    case CONST:
+      return s390_cannot_force_const_mem (XEXP (x, 0));
+    case PLUS:
+    case MINUS:
+      return s390_cannot_force_const_mem (XEXP (x, 0))
+	     || s390_cannot_force_const_mem (XEXP (x, 1));
+
+    case UNSPEC:
+      switch (XINT (x, 1))
+	{
+	/* Only lt-relative or GOT-relative UNSPECs are OK.  */
+	case 100:
+	case 104:
+	case 112:
+	case 114:
+	case UNSPEC_TLSGD:
+	case UNSPEC_TLSLDM:
+	case UNSPEC_NTPOFF:
+	case UNSPEC_DTPOFF:
+	case UNSPEC_GOTNTPOFF:
+	case UNSPEC_INDNTPOFF:
+	  return false;
+
+	default:
+	  return true;
+	}
+      break;
+
+    default:
+      abort ();
+    }
+}
+
 /* Returns true if the constant value OP is a legitimate general
    operand during and after reload.  The difference to 
    legitimate_constant_p is that this function will not accept
@@ -1341,23 +1491,14 @@ s390_preferred_reload_class (op, class)
   switch (GET_CODE (op))
     {
       /* Constants we cannot reload must be forced into the
-	 literal pool.  For constants we *could* handle directly,
-	 it might still be preferable to put them in the pool and
-	 use a memory-to-memory instruction.
-
-	 However, try to avoid needlessly allocating a literal
-	 pool in a routine that wouldn't otherwise need any.
-	 Heuristically, we assume that 64-bit leaf functions
-	 typically don't need a literal pool, all others do.  */
+	 literal pool.  */
+
       case CONST_DOUBLE:
       case CONST_INT:
-	if (!legitimate_reload_constant_p (op))
-	  return NO_REGS;
-
-	if (TARGET_64BIT && current_function_is_leaf)
+	if (legitimate_reload_constant_p (op))
 	  return class;
-
-	return NO_REGS;
+	else
+	  return NO_REGS;
 
       /* If a symbolic constant or a PLUS is reloaded,
 	 it is most likely being used as an address, so
@@ -1546,6 +1687,22 @@ s390_decompose_address (addr, out)
     disp = addr;		/* displacement */
 
 
+  /* Prefer to use pointer as base, not index.  */
+  if (base && indx)
+    {
+      int base_ptr = GET_CODE (base) == UNSPEC
+		     || (REG_P (base) && REG_POINTER (base));
+      int indx_ptr = GET_CODE (indx) == UNSPEC
+		     || (REG_P (indx) && REG_POINTER (indx));
+
+      if (!base_ptr && indx_ptr)
+	{
+	  rtx tmp = base;
+	  base = indx;
+	  indx = tmp;
+	}
+    }
+
   /* Validate base register.  */
   if (base)
     {
@@ -1624,10 +1781,11 @@ s390_decompose_address (addr, out)
         }
 
       /* In the small-PIC case, the linker converts @GOT12 
-         offsets to possible displacements.  */
+         and @GOTNTPOFF offsets to possible displacements.  */
       else if (GET_CODE (disp) == CONST
                && GET_CODE (XEXP (disp, 0)) == UNSPEC
-               && XINT (XEXP (disp, 0), 1) == 110)
+               && (XINT (XEXP (disp, 0), 1) == 110
+		   || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
         {
           if (flag_pic != 1)
             return FALSE;
@@ -1677,12 +1835,6 @@ s390_decompose_address (addr, out)
               || !CONSTANT_POOL_ADDRESS_P (disp))
             return FALSE;
 
-          /* In 64-bit PIC mode we cannot accept symbolic 
-             constants in the constant pool.  */
-          if (TARGET_64BIT && flag_pic
-              && SYMBOLIC_CONST (get_pool_constant (disp)))
-            return FALSE;
-
           /* If we have an offset, make sure it does not
              exceed the size of the constant pool entry.  */
           if (offset && offset >= GET_MODE_SIZE (get_pool_mode (disp)))
@@ -1773,14 +1925,11 @@ legitimate_la_operand_p (op)
 }
 
 /* Return 1 if OP is a valid operand for the LA instruction,
-   and we prefer to use LA over addition to compute it.
-   If STRICT is true, only accept operands that will never
-   change to something we cannot recognize as preferred.  */
+   and we prefer to use LA over addition to compute it.  */
    
 int
-preferred_la_operand_p (op, strict)
+preferred_la_operand_p (op)
      register rtx op;
-     int strict;
 {
   struct s390_address addr;
   if (!s390_decompose_address (op, &addr))
@@ -1792,10 +1941,9 @@ preferred_la_operand_p (op, strict)
   if (addr.pointer)
     return TRUE;
 
-  if (!strict) 
-    if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
-        || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
-      return TRUE;
+  if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
+      || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
+    return TRUE;
 
   return FALSE;
 }
@@ -1848,7 +1996,7 @@ legitimize_pic_address (orig, reg)
               || CONSTANT_POOL_ADDRESS_P (addr))))
     {
       /* This is a local symbol.  */
-      if (TARGET_64BIT)
+      if (TARGET_64BIT && larl_operand (addr, VOIDmode))
         {
           /* Access local symbols PC-relative via LARL.  
              This is the same as in the non-PIC case, so it is 
@@ -1860,9 +2008,9 @@ legitimize_pic_address (orig, reg)
 
           rtx temp = reg? reg : gen_reg_rtx (Pmode);
 
-          addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, addr), 100);
-          addr = gen_rtx_CONST (SImode, addr);
-          addr = force_const_mem (SImode, addr);
+          addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 100);
+          addr = gen_rtx_CONST (Pmode, addr);
+          addr = force_const_mem (Pmode, addr);
 	  emit_move_insn (temp, addr);
 
           base = gen_rtx_REG (Pmode, BASE_REGISTER);
@@ -1923,9 +2071,9 @@ legitimize_pic_address (orig, reg)
 	  if (reload_in_progress || reload_completed)
 	    regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
 
-          addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, addr), 112);
-          addr = gen_rtx_CONST (SImode, addr);
-          addr = force_const_mem (SImode, addr);
+          addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 112);
+          addr = gen_rtx_CONST (Pmode, addr);
+          addr = force_const_mem (Pmode, addr);
           emit_move_insn (temp, addr);
 
           new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
@@ -1951,7 +2099,7 @@ legitimize_pic_address (orig, reg)
                   case 100:
                   case 112:
                   case 114:
-                    new = force_const_mem (SImode, orig);
+                    new = force_const_mem (Pmode, orig);
                     break;
 
                   /* @GOTENT is OK as is.  */
@@ -1966,9 +2114,9 @@ legitimize_pic_address (orig, reg)
                         rtx temp = reg? reg : gen_reg_rtx (Pmode);
 
                         addr = XVECEXP (addr, 0, 0);
-                        addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, addr), 114);
-                        addr = gen_rtx_CONST (SImode, addr);
-                        addr = force_const_mem (SImode, addr);
+                        addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), 114);
+                        addr = gen_rtx_CONST (Pmode, addr);
+                        addr = force_const_mem (Pmode, addr);
 	                emit_move_insn (temp, addr);
 
                         base = gen_rtx_REG (Pmode, BASE_REGISTER);
@@ -2002,7 +2150,7 @@ legitimize_pic_address (orig, reg)
                         || CONSTANT_POOL_ADDRESS_P (op0))))
 	      && GET_CODE (op1) == CONST_INT)
 	    {
-              if (TARGET_64BIT)
+              if (TARGET_64BIT && larl_operand (op0, VOIDmode))
                 {
                   if (INTVAL (op1) & 1)
                     {
@@ -2039,10 +2187,10 @@ legitimize_pic_address (orig, reg)
 
                   rtx temp = reg? reg : gen_reg_rtx (Pmode);
 
-                  addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, op0), 100);
-                  addr = gen_rtx_PLUS (SImode, addr, op1);
-                  addr = gen_rtx_CONST (SImode, addr);
-                  addr = force_const_mem (SImode, addr);
+                  addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 100);
+                  addr = gen_rtx_PLUS (Pmode, addr, op1);
+                  addr = gen_rtx_CONST (Pmode, addr);
+                  addr = force_const_mem (Pmode, addr);
         	  emit_move_insn (temp, addr);
 
                   base = gen_rtx_REG (Pmode, BASE_REGISTER);
@@ -2068,7 +2216,7 @@ legitimize_pic_address (orig, reg)
               if (XINT (op0, 1) != 100)
                 abort ();
 
-              new = force_const_mem (SImode, orig);
+              new = force_const_mem (Pmode, orig);
             }
 
           /* Otherwise, compute the sum.  */
@@ -2098,18 +2246,237 @@ legitimize_pic_address (orig, reg)
   return new;
 }
 
+/* Load the thread pointer into a register.  */
+
+static rtx
+get_thread_pointer ()
+{
+  rtx tp;
+
+  tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
+  tp = force_reg (Pmode, tp);
+  mark_reg_pointer (tp, BITS_PER_WORD);
+
+  return tp;
+}
+
+/* Construct the SYMBOL_REF for the tls_get_offset function.  */
+
+static GTY(()) rtx s390_tls_symbol;
+rtx
+s390_tls_get_offset ()
+{
+  if (!s390_tls_symbol)
+    s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
+
+  return s390_tls_symbol;
+}
+
+/* ADDR contains a thread-local SYMBOL_REF.  Generate code to compute
+   this (thread-local) address.  REG may be used as temporary.  */
+
+static rtx
+legitimize_tls_address (addr, reg)
+     rtx addr;
+     rtx reg;
+{
+  rtx new, tls_call, temp, base, r2, insn;
+
+  if (GET_CODE (addr) == SYMBOL_REF)
+    switch (tls_symbolic_operand (addr))
+      {
+      case TLS_MODEL_GLOBAL_DYNAMIC:
+	start_sequence ();
+	r2 = gen_rtx_REG (Pmode, 2);
+	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
+	new = gen_rtx_CONST (Pmode, tls_call);
+	new = force_const_mem (Pmode, new);
+	emit_move_insn (r2, new);
+	emit_call_insn (gen_call_value_tls (r2, tls_call));
+	insn = get_insns ();
+	end_sequence ();
+
+	new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
+	temp = gen_reg_rtx (Pmode);
+	emit_libcall_block (insn, temp, r2, new);
+
+	new = gen_rtx_PLUS (Pmode, get_thread_pointer (), temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new);
+	    new = reg;
+	  }
+	break;
+
+      case TLS_MODEL_LOCAL_DYNAMIC:
+	start_sequence ();
+	r2 = gen_rtx_REG (Pmode, 2);
+	tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
+	new = gen_rtx_CONST (Pmode, tls_call);
+	new = force_const_mem (Pmode, new);
+	emit_move_insn (r2, new);
+	emit_call_insn (gen_call_value_tls (r2, tls_call));
+	insn = get_insns ();
+	end_sequence ();
+
+	new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
+	temp = gen_reg_rtx (Pmode);
+	emit_libcall_block (insn, temp, r2, new);
+
+	new = gen_rtx_PLUS (Pmode, get_thread_pointer (), temp);
+	base = gen_reg_rtx (Pmode);
+	s390_load_address (base, new);
+
+	new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
+	new = gen_rtx_CONST (Pmode, new);
+	new = force_const_mem (Pmode, new);
+	temp = gen_reg_rtx (Pmode);
+	emit_move_insn (temp, new);
+
+	new = gen_rtx_PLUS (Pmode, base, temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new);
+	    new = reg;
+	  }
+	break;
+
+      case TLS_MODEL_INITIAL_EXEC:
+	if (flag_pic == 1)
+	  {
+	    /* Assume GOT offset < 4k.  This is handled the same way
+	       in both 31- and 64-bit code.  */
+
+	    if (reload_in_progress || reload_completed)
+	      regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+
+	    new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
+	    new = gen_rtx_CONST (Pmode, new);
+	    new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+	    new = gen_rtx_MEM (Pmode, new);
+	    RTX_UNCHANGING_P (new) = 1;
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new);
+	  }
+	else if (TARGET_64BIT)
+	  {
+	    /* If the GOT offset might be >= 4k, we determine the position
+	       of the GOT entry via a PC-relative LARL.  */
+
+	    new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
+	    new = gen_rtx_CONST (Pmode, new);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new);
+
+	    new = gen_rtx_MEM (Pmode, temp);
+	    RTX_UNCHANGING_P (new) = 1;
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new);
+	  }
+	else if (flag_pic)
+	  {
+	    /* If the GOT offset might be >= 4k, we have to load it 
+	       from the literal pool.  */
+
+	    if (reload_in_progress || reload_completed)
+	      regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
+
+	    new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
+	    new = gen_rtx_CONST (Pmode, new);
+	    new = force_const_mem (Pmode, new);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new);
+
+            new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
+	    new = gen_rtx_MEM (Pmode, new);
+	    RTX_UNCHANGING_P (new) = 1;
+
+	    new = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new, addr), UNSPEC_TLS_LOAD);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_insn (gen_rtx_SET (Pmode, temp, new));
+	  }
+	else
+	  {
+	    /* In position-dependent code, load the absolute address of
+	       the GOT entry from the literal pool.  */
+
+	    new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
+	    new = gen_rtx_CONST (Pmode, new);
+	    new = force_const_mem (Pmode, new);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_move_insn (temp, new);
+
+	    new = temp;
+	    new = gen_rtx_MEM (Pmode, new);
+	    RTX_UNCHANGING_P (new) = 1;
+
+	    new = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new, addr), UNSPEC_TLS_LOAD);
+	    temp = gen_reg_rtx (Pmode);
+	    emit_insn (gen_rtx_SET (Pmode, temp, new));
+	  }
+
+	new = gen_rtx_PLUS (Pmode, get_thread_pointer (), temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new);
+	    new = reg;
+	  }
+	break;
+
+      case TLS_MODEL_LOCAL_EXEC:
+	new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
+	new = gen_rtx_CONST (Pmode, new);
+	new = force_const_mem (Pmode, new);
+        temp = gen_reg_rtx (Pmode);
+	emit_move_insn (temp, new);
+
+	new = gen_rtx_PLUS (Pmode, get_thread_pointer (), temp);
+	if (reg != 0)
+	  {
+	    s390_load_address (reg, new);
+	    new = reg;
+	  }
+	break;
+
+      default:
+	abort ();
+      }
+
+  else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
+    {
+      switch (XINT (XEXP (addr, 0), 1))
+	{
+	case UNSPEC_INDNTPOFF:
+	  if (TARGET_64BIT)
+	    new = addr;
+	  else
+	    abort ();
+	  break;
+
+	default:
+	  abort ();
+	}
+    }
+
+  else
+    abort ();  /* for now ... */
+
+  return new;
+}
+
 /* Emit insns to move operands[1] into operands[0].  */
 
 void
-emit_pic_move (operands, mode)
+emit_symbolic_move (operands)
      rtx *operands;
-     enum machine_mode mode ATTRIBUTE_UNUSED;
 {
   rtx temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
 
-  if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
+  if (GET_CODE (operands[0]) == MEM)
     operands[1] = force_reg (Pmode, operands[1]);
-  else
+  else if (TLS_SYMBOLIC_CONST (operands[1]))
+    operands[1] = legitimize_tls_address (operands[1], temp);
+  else if (flag_pic)
     operands[1] = legitimize_pic_address (operands[1], temp);
 }
 
@@ -2132,7 +2499,14 @@ legitimize_address (x, oldx, mode)
 {
   rtx constant_term = const0_rtx;
 
-  if (flag_pic)
+  if (TLS_SYMBOLIC_CONST (x))
+    {
+      x = legitimize_tls_address (x, 0);
+
+      if (legitimate_address_p (mode, x, FALSE))
+	return x;
+    }
+  else if (flag_pic)
     {
       if (SYMBOLIC_CONST (x)
           || (GET_CODE (x) == PLUS 
@@ -2420,7 +2794,7 @@ s390_expand_cmpstr (target, op0, op1, len)
         emit_move_insn (target, const0_rtx);
     }
 
-  else if (TARGET_MVCLE)
+  else /* if (TARGET_MVCLE) */
     {
       enum machine_mode double_mode = TARGET_64BIT ? TImode : DImode;
       enum machine_mode single_mode = TARGET_64BIT ? DImode : SImode;
@@ -2439,6 +2813,9 @@ s390_expand_cmpstr (target, op0, op1, len)
       emit_insn ((*gen_result) (target));
     }
 
+#if 0
+  /* Deactivate for now as profile code cannot cope with
+     CC being live across basic block boundaries.  */
   else
     {
       rtx addr0, addr1, count, blocks, temp;
@@ -2504,6 +2881,7 @@ s390_expand_cmpstr (target, op0, op1, len)
 
       emit_insn ((*gen_result) (target));
     }
+#endif
 }
 
 /* In the name of slightly smaller debug output, and to cater to
@@ -2544,6 +2922,48 @@ s390_simplify_dwarf_addr (orig_x)
   return orig_x;      
 }
 
+/* Locate some local-dynamic symbol still in use by this function
+   so that we can print its name in local-dynamic base patterns.  */
+
+static const char *
+get_some_local_dynamic_name ()
+{
+  rtx insn;
+
+  if (cfun->machine->some_ld_name)
+    return cfun->machine->some_ld_name;
+
+  for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
+    if (INSN_P (insn)
+        && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
+      return cfun->machine->some_ld_name;
+
+  abort ();
+}
+
+static int
+get_some_local_dynamic_name_1 (px, data)
+     rtx *px;
+     void *data ATTRIBUTE_UNUSED;
+{
+  rtx x = *px;
+
+  if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
+    {
+      x = get_pool_constant (x);
+      return for_each_rtx (&x, get_some_local_dynamic_name_1, 0);
+    }
+
+  if (GET_CODE (x) == SYMBOL_REF
+      && tls_symbolic_operand (x) == TLS_MODEL_LOCAL_DYNAMIC)
+    {
+      cfun->machine->some_ld_name = XSTR (x, 0);
+      return 1;
+    }
+
+  return 0;
+}
+
 /* Output symbolic constant X in assembler syntax to 
    stdio stream FILE.  */
 
@@ -2616,6 +3036,30 @@ s390_output_symbolic_const (file, x)
           fprintf (file, "@PLT-");
 	  s390_output_symbolic_const (file, cfun->machine->literal_pool_label);
 	  break;
+	case UNSPEC_TLSGD:
+	  s390_output_symbolic_const (file, XVECEXP (x, 0, 0));
+	  fprintf (file, "@TLSGD");
+	  break;
+	case UNSPEC_TLSLDM:
+	  assemble_name (file, get_some_local_dynamic_name ());
+	  fprintf (file, "@TLSLDM");
+	  break;
+	case UNSPEC_DTPOFF:
+	  s390_output_symbolic_const (file, XVECEXP (x, 0, 0));
+	  fprintf (file, "@DTPOFF");
+	  break;
+	case UNSPEC_NTPOFF:
+	  s390_output_symbolic_const (file, XVECEXP (x, 0, 0));
+	  fprintf (file, "@NTPOFF");
+	  break;
+	case UNSPEC_GOTNTPOFF:
+	  s390_output_symbolic_const (file, XVECEXP (x, 0, 0));
+	  fprintf (file, "@GOTNTPOFF");
+	  break;
+	case UNSPEC_INDNTPOFF:
+	  s390_output_symbolic_const (file, XVECEXP (x, 0, 0));
+	  fprintf (file, "@INDNTPOFF");
+	  break;
 	default:
 	  output_operand_lossage ("invalid UNSPEC as operand (2)");
 	  break;
@@ -2661,6 +3105,7 @@ print_operand_address (file, addr)
 
     'C': print opcode suffix for branch condition.
     'D': print opcode suffix for inverse branch condition.
+    'J': print tls_load/tls_gdcall/tls_ldcall suffix
     'O': print only the displacement of a memory reference.
     'R': print only the base register of a memory reference.
     'N': print the second word of a DImode operand.
@@ -2686,6 +3131,26 @@ print_operand (file, x, code)
       fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
       return;
 
+    case 'J':
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  fprintf (file, "%s", ":tls_load:");
+	  output_addr_const (file, x);
+	}
+      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
+	{
+	  fprintf (file, "%s", ":tls_gdcall:");
+	  output_addr_const (file, XVECEXP (x, 0, 0));
+	}
+      else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
+	{
+	  fprintf (file, "%s", ":tls_ldcall:");
+	  assemble_name (file, get_some_local_dynamic_name ());
+	}
+      else
+	abort ();
+      return;
+
     case 'O':
       {
         struct s390_address ad;
@@ -4126,11 +4591,21 @@ s390_optimize_prolog (temp_regno)
   
   for (i = 6; i < 16; i++)
     if (regs_ever_live[i])
-      break;
+      if (!global_regs[i]
+	  || i == STACK_POINTER_REGNUM 
+          || i == RETURN_REGNUM
+          || i == BASE_REGISTER 
+          || (flag_pic && i == (int)PIC_OFFSET_TABLE_REGNUM))
+	break;
 
   for (j = 15; j > i; j--)
     if (regs_ever_live[j])
-      break;
+      if (!global_regs[j]
+	  || j == STACK_POINTER_REGNUM 
+          || j == RETURN_REGNUM
+          || j == BASE_REGISTER 
+          || (flag_pic && j == (int)PIC_OFFSET_TABLE_REGNUM))
+	break;
 
   if (i == 16)
     {
@@ -4242,6 +4717,12 @@ s390_fixup_clobbered_return_reg (return_reg)
   bool replacement_done = 0;
   rtx insn;
 
+  /* If we never called __builtin_return_address, register 14
+     might have been used as temp during the prolog; we do
+     not want to touch those uses.  */
+  if (!has_hard_reg_initial_val (Pmode, REGNO (return_reg)))
+    return false;
+
   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
     {
       rtx reg, off, new_insn;
@@ -4424,7 +4905,7 @@ s390_frame_info ()
   cfun->machine->save_fprs_p = 0;
   if (TARGET_64BIT)
     for (i = 24; i < 32; i++) 
-      if (regs_ever_live[i])
+      if (regs_ever_live[i] && !global_regs[i])
 	{
           cfun->machine->save_fprs_p = 1;
 	  break;
@@ -4448,8 +4929,11 @@ s390_frame_info ()
      prolog/epilog code is modified again.  */
 
   for (i = 0; i < 16; i++)
-    gprs_ever_live[i] = regs_ever_live[i];
+    gprs_ever_live[i] = regs_ever_live[i] && !global_regs[i];
 
+  if (flag_pic)
+    gprs_ever_live[PIC_OFFSET_TABLE_REGNUM] =
+    regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
   gprs_ever_live[BASE_REGISTER] = 1;
   gprs_ever_live[RETURN_REGNUM] = 1;
   gprs_ever_live[STACK_POINTER_REGNUM] = cfun->machine->frame_size > 0;
@@ -4486,7 +4970,7 @@ s390_arg_frame_offset ()
   save_fprs_p = 0;
   if (TARGET_64BIT)
     for (i = 24; i < 32; i++) 
-      if (regs_ever_live[i])
+      if (regs_ever_live[i] && !global_regs[i])
 	{
           save_fprs_p = 1;
 	  break;
@@ -4715,12 +5199,12 @@ s390_emit_prologue ()
   if (!TARGET_64BIT)
     {
       /* Save fpr 4 and 6.  */
-      if (regs_ever_live[18])
+      if (regs_ever_live[18] && !global_regs[18])
 	{
 	  insn = save_fpr (stack_pointer_rtx, STACK_POINTER_OFFSET - 16, 18);
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
-      if (regs_ever_live[19]) 
+      if (regs_ever_live[19] && !global_regs[19])
 	{
 	  insn = save_fpr (stack_pointer_rtx, STACK_POINTER_OFFSET - 8, 19); 
 	  RTX_FRAME_RELATED_P (insn) = 1;
@@ -4763,6 +5247,16 @@ s390_emit_prologue ()
 	  set_mem_alias_set (addr, s390_sr_alias_set);
 	  insn = emit_insn (gen_move_insn (addr, temp_reg));
 	}
+
+      /* If we support asynchronous exceptions (e.g. for Java),
+	 we need to make sure the backchain pointer is set up
+	 before any possibly trapping memory access.  */
+
+      if (TARGET_BACKCHAIN && flag_non_call_exceptions)
+	{
+	  addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
+	  emit_insn (gen_rtx_CLOBBER (VOIDmode, addr));
+	}
     }
 
   /* Save fprs 8 - 15 (64 bit ABI).  */
@@ -4772,7 +5266,7 @@ s390_emit_prologue ()
       insn = emit_insn (gen_add2_insn (temp_reg, GEN_INT(-64)));
 
       for (i = 24; i < 32; i++)
-	if (regs_ever_live[i])
+	if (regs_ever_live[i] && !global_regs[i])
 	  {
 	    rtx addr = plus_constant (stack_pointer_rtx, 
 				      cfun->machine->frame_size - 64 + (i-24)*8);
@@ -4823,8 +5317,10 @@ s390_emit_prologue ()
           REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, NULL_RTX,
                                                REG_NOTES (insn));
 
-	  insn = emit_insn (gen_add2_insn (pic_offset_table_rtx,
-					   gen_rtx_REG (Pmode, BASE_REGISTER)));
+          got_symbol = gen_rtx_REG (Pmode, BASE_REGISTER);
+          got_symbol = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, got_symbol), 101);
+          got_symbol = gen_rtx_PLUS (Pmode, got_symbol, pic_offset_table_rtx);
+	  insn = emit_move_insn (pic_offset_table_rtx, got_symbol);
           REG_NOTES(insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, NULL_RTX,
                                                REG_NOTES (insn));
 	}
@@ -4870,14 +5366,14 @@ s390_emit_epilogue ()
     }
   else
     {
-      if (regs_ever_live[18])
+      if (regs_ever_live[18] && !global_regs[18])
 	{
 	  if (area_bottom > STACK_POINTER_OFFSET - 16)
 	    area_bottom = STACK_POINTER_OFFSET - 16;
 	  if (area_top < STACK_POINTER_OFFSET - 8)
 	    area_top = STACK_POINTER_OFFSET - 8;
 	}
-      if (regs_ever_live[19])
+      if (regs_ever_live[19] && !global_regs[19])
 	{
 	  if (area_bottom > STACK_POINTER_OFFSET - 8)
 	    area_bottom = STACK_POINTER_OFFSET - 8;
@@ -5447,6 +5943,134 @@ s390_va_arg (valist, type)
 }
 
 
+/* Builtins.  */
+
+enum s390_builtin
+{
+  S390_BUILTIN_THREAD_POINTER,
+  S390_BUILTIN_SET_THREAD_POINTER,
+
+  S390_BUILTIN_max
+};
+
+static unsigned int const code_for_builtin_64[S390_BUILTIN_max] = {
+  CODE_FOR_get_tp_64,
+  CODE_FOR_set_tp_64
+};
+
+static unsigned int const code_for_builtin_31[S390_BUILTIN_max] = {
+  CODE_FOR_get_tp_31,
+  CODE_FOR_set_tp_31
+};
+
+static void
+s390_init_builtins ()
+{
+  tree ftype;
+
+  ftype = build_function_type (ptr_type_node, void_list_node);
+  builtin_function ("__builtin_thread_pointer", ftype,
+		    S390_BUILTIN_THREAD_POINTER, BUILT_IN_MD,
+		    NULL, NULL_TREE);
+
+  ftype = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
+  builtin_function ("__builtin_set_thread_pointer", ftype,
+		    S390_BUILTIN_SET_THREAD_POINTER, BUILT_IN_MD,
+		    NULL, NULL_TREE);
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+s390_expand_builtin (exp, target, subtarget, mode, ignore)
+     tree exp;
+     rtx target;
+     rtx subtarget ATTRIBUTE_UNUSED;
+     enum machine_mode mode ATTRIBUTE_UNUSED;
+     int ignore ATTRIBUTE_UNUSED;
+{
+#define MAX_ARGS 2
+
+  unsigned int const *code_for_builtin = 
+    TARGET_64BIT ? code_for_builtin_64 : code_for_builtin_31;
+
+  tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
+  unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+  tree arglist = TREE_OPERAND (exp, 1);
+  enum insn_code icode;
+  rtx op[MAX_ARGS], pat;
+  int arity;
+  bool nonvoid;
+
+  if (fcode >= S390_BUILTIN_max)
+    internal_error ("bad builtin fcode");
+  icode = code_for_builtin[fcode];
+  if (icode == 0)
+    internal_error ("bad builtin fcode");
+
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+  for (arglist = TREE_OPERAND (exp, 1), arity = 0;
+       arglist;
+       arglist = TREE_CHAIN (arglist), arity++)
+    {
+      const struct insn_operand_data *insn_op;
+
+      tree arg = TREE_VALUE (arglist);
+      if (arg == error_mark_node)
+	return NULL_RTX;
+      if (arity > MAX_ARGS)
+	return NULL_RTX;
+
+      insn_op = &insn_data[icode].operand[arity + nonvoid];
+
+      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, 0);
+
+      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
+	op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+    }
+
+  if (nonvoid)
+    {
+      enum machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target
+	  || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+    }
+
+  switch (arity)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (target);
+      break;
+    case 1:
+      if (nonvoid)
+        pat = GEN_FCN (icode) (target, op[0]);
+      else
+	pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (target, op[0], op[1]);
+      break;
+    default:
+      abort ();
+    }
+  if (!pat)
+    return NULL_RTX;
+  emit_insn (pat);
+
+  if (nonvoid)
+    return target;
+  else
+    return const0_rtx;
+}
+
+
 /* Output assembly code for the trampoline template to
    stdio stream FILE.
 
@@ -5606,28 +6230,104 @@ s390_select_rtx_section (mode, x, align)
     function_section (current_function_decl);
 }
 
-/* If using PIC, mark a SYMBOL_REF for a non-global symbol so that we
-   may access it directly in the GOT.  */
+/* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
+   into its name and SYMBOL_REF_FLAG.  */
 
 static void
 s390_encode_section_info (decl, first)
      tree decl;
      int first ATTRIBUTE_UNUSED;
 {
+  bool local_p = (*targetm.binds_local_p) (decl);
+  rtx rtl, symbol;
+
+  rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
+  if (GET_CODE (rtl) != MEM)
+    return;
+  symbol = XEXP (rtl, 0);
+  if (GET_CODE (symbol) != SYMBOL_REF)
+    return;
+
+  /* When using PIC, SYMBOL_REF_FLAG marks non-global symbols
+     that can be accessed directly.  */
   if (flag_pic)
+    SYMBOL_REF_FLAG (symbol) = local_p;
+
+  /* Encode thread-local data with %[GLil] for "global dynamic",
+     "local dynamic", "initial exec" or "local exec" TLS models,
+     respectively.  */
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
     {
-      rtx rtl = (TREE_CODE_CLASS (TREE_CODE (decl)) != 'd'
-		 ? TREE_CST_RTL (decl) : DECL_RTL (decl));
+      const char *symbol_str = XSTR (symbol, 0);
+      char *newstr;
+      size_t len;
+      enum tls_model kind = decl_tls_model (decl);
+
+      if (!flag_pic)
+	{
+	  /* We don't allow non-pic code for shared libraries,
+	     so don't generate GD/LD TLS models for non-pic code.  */
+	  switch (kind)
+	    {
+	    case TLS_MODEL_GLOBAL_DYNAMIC:
+	      kind = TLS_MODEL_INITIAL_EXEC; break;
+	    case TLS_MODEL_LOCAL_DYNAMIC:
+	      kind = TLS_MODEL_LOCAL_EXEC; break;
+	    default:
+	      break;
+	    }
+	}
 
-      if (GET_CODE (rtl) == MEM)
+      if (symbol_str[0] == '%')
 	{
-	  SYMBOL_REF_FLAG (XEXP (rtl, 0))
-	    = (TREE_CODE_CLASS (TREE_CODE (decl)) != 'd'
-	       || ! TREE_PUBLIC (decl));
+	  if (symbol_str[1] == tls_model_chars[kind])
+	    return;
+	  symbol_str += 2;
 	}
+      len = strlen (symbol_str) + 1;
+      newstr = alloca (len + 2);
+
+      newstr[0] = '%';
+      newstr[1] = tls_model_chars[kind];
+      memcpy (newstr + 2, symbol_str, len);
+
+      XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
+    }
+
+  /* If a variable has a forced alignment to < 2 bytes, mark it
+     with '@' to prevent it from being used as LARL operand.  */
+
+  else if (TREE_CODE (decl) == VAR_DECL 
+	   && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 16
+	   && XSTR (symbol, 0)[0] != '@')
+    {
+      const char *symbol_str = XSTR (symbol, 0);
+      size_t len = strlen (symbol_str) + 1;
+      char *newstr = alloca (len + 1);
+
+      newstr[0] = '@';
+      memcpy (newstr + 1, symbol_str, len);
+
+      XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 1 - 1);
     }
 }
 
+/* Undo the above when printing symbol names.  */
+
+static const char *
+s390_strip_name_encoding (str)
+     const char *str;
+{
+  if (str[0] == '%')
+    str += 2;
+  if (str[0] == '@')
+    str += 1;
+  if (str[0] == '*')
+    str += 1;
+  return str;
+}
+
 /* Output thunk to FILE that implements a C++ virtual function call (with
    multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer 
    by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
@@ -5642,13 +6342,16 @@ s390_output_mi_thunk (file, thunk, delta, vcall_offset, function)
      HOST_WIDE_INT vcall_offset;
      tree function;
 {
-  rtx op[9];
+  rtx op[10];
+  int nonlocal = 0;
 
   /* Operand 0 is the target function.  */
   op[0] = XEXP (DECL_RTL (function), 0);
   if (flag_pic && !SYMBOL_REF_FLAG (op[0]))
     {
-      op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]), 113);
+      nonlocal = 1;
+      op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
+			      TARGET_64BIT ? 113 : flag_pic == 2 ? 112 : 110);
       op[0] = gen_rtx_CONST (Pmode, op[0]);
     }
 
@@ -5673,6 +6376,9 @@ s390_output_mi_thunk (file, thunk, delta, vcall_offset, function)
   op[7] = NULL_RTX;
   op[8] = NULL_RTX;
 
+  /* Operand 9 can be used for temporary register.  */
+  op[9] = NULL_RTX;
+
   /* Generate code.  */
   if (TARGET_64BIT)
     {
@@ -5798,14 +6504,39 @@ s390_output_mi_thunk (file, thunk, delta, vcall_offset, function)
 
       /* Jump to target.  */
       op[8] = gen_label_rtx ();
+
       if (!flag_pic)
 	output_asm_insn ("l\t%4,%8-%5(%4)", op);
-      else
+      else if (!nonlocal)
 	output_asm_insn ("a\t%4,%8-%5(%4)", op);
+      /* We cannot call through .plt, since .plt requires %r12 loaded.  */
+      else if (flag_pic == 1)
+	{
+	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
+	  output_asm_insn ("l\t%4,%0(%4)", op);
+	}
+      else if (flag_pic == 2)
+	{
+	  op[9] = gen_rtx_REG (Pmode, 0);
+	  output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
+	  output_asm_insn ("a\t%4,%8-%5(%4)", op);
+	  output_asm_insn ("ar\t%4,%9", op);
+	  output_asm_insn ("l\t%4,0(%4)", op);
+	}
+
       output_asm_insn ("br\t%4", op);
 
       /* Output literal pool.  */
       output_asm_insn (".align\t4", op);
+
+      if (nonlocal && flag_pic == 2)
+	output_asm_insn (".long\t%0", op);
+      if (nonlocal)
+	{
+	  op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
+	  SYMBOL_REF_FLAG (op[0]) = 1;
+	}
+
       ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (op[8]));
       if (!flag_pic)
 	output_asm_insn (".long\t%0", op);
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 23721588c0f..566cd8241dc 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for IBM S/390
-   Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
                   Ulrich Weigand (uweigand@de.ibm.com).
 This file is part of GNU CC.
@@ -24,7 +24,7 @@ Boston, MA 02111-1307, USA.  */
 
 /* Override the __fixdfdi etc. routines when building libgcc2.
    ??? This should be done in a cleaner way ...  */
-#ifdef IN_LIBGCC2
+#if defined (IN_LIBGCC2) && !defined (__s390x__)
 #include <s390/fixdfdi.h>
 #endif
 
@@ -69,7 +69,7 @@ extern int target_flags;
   { "soft-float",   -1, N_("Don't use hardware fp")},	      	       \
   { "backchain",     2, N_("Set backchain")},           		       \
   { "no-backchain", -2, N_("Don't set backchain (faster, but debug harder")}, \
-  { "small-exec",    4, N_("Use bras for execucable < 64k")},           \
+  { "small-exec",    4, N_("Use bras for executable < 64k")},           \
   { "no-small-exec",-4, N_("Don't use bras")},            	       \
   { "debug",         8, N_("Additional debug prints")},        	       \
   { "no-debug",     -8, N_("Don't print additional debug prints")},     \
@@ -94,6 +94,17 @@ extern int target_flags;
 #define CAN_DEBUG_WITHOUT_FP
 
 
+/* In libgcc2, determine target settings as compile-time constants.  */
+#ifdef IN_LIBGCC2
+#undef TARGET_64BIT
+#ifdef __s390x__
+#define TARGET_64BIT 1
+#else
+#define TARGET_64BIT 0
+#endif
+#endif
+
+
 /* Target machine storage layout.  */
 
 /* Everything is big-endian.  */
@@ -103,7 +114,9 @@ extern int target_flags;
 
 /* Width of a word, in units (bytes).  */
 #define UNITS_PER_WORD (TARGET_64BIT ? 8 : 4)
+#ifndef IN_LIBGCC2
 #define MIN_UNITS_PER_WORD 4
+#endif
 #define MAX_BITS_PER_WORD 64
 
 /* Function arguments and return values are promoted to word size.  */
@@ -195,11 +208,13 @@ if (INTEGRAL_MODE_P (MODE) &&	        	    	\
 #define ADDR_REGNO_P(N)		((N) >= 1 && (N) < 16)
 #define FP_REGNO_P(N)		((N) >= 16 && (N) < (TARGET_IEEE_FLOAT? 32 : 20))
 #define CC_REGNO_P(N)		((N) == 33)
+#define FRAME_REGNO_P(N)	((N) == 32 || (N) == 34)
 
 #define GENERAL_REG_P(X)	(REG_P (X) && GENERAL_REGNO_P (REGNO (X)))
 #define ADDR_REG_P(X)		(REG_P (X) && ADDR_REGNO_P (REGNO (X)))
 #define FP_REG_P(X)		(REG_P (X) && FP_REGNO_P (REGNO (X)))
 #define CC_REG_P(X)		(REG_P (X) && CC_REGNO_P (REGNO (X)))
+#define FRAME_REG_P(X)		(REG_P (X) && FRAME_REGNO_P (REGNO (X)))
 
 #define BASE_REGISTER 13
 #define RETURN_REGNUM 14
@@ -314,6 +329,8 @@ do								\
     (HARD_REGNO_NREGS(REGNO, MODE) == 1 || !((REGNO) & 1)) :        \
    CC_REGNO_P(REGNO)?                                               \
      GET_MODE_CLASS (MODE) == MODE_CC :                             \
+   FRAME_REGNO_P(REGNO)?                                            \
+     (enum machine_mode) (MODE) == Pmode :                          \
    0)
 
 #define MODES_TIEABLE_P(MODE1, MODE2)		\
@@ -330,8 +347,9 @@ do								\
 /* If a 4-byte value is loaded into a FPR, it is placed into the
    *upper* half of the register, not the lower.  Therefore, we
    cannot use SUBREGs to switch between modes in FP registers.  */
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO)		\
-  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO) ? FP_REGS : NO_REGS)
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS)		\
+  (GET_MODE_SIZE (FROM) != GET_MODE_SIZE (TO)			\
+   ? reg_classes_intersect_p (FP_REGS, CLASS) : 0)
 
 /* Register classes.  */
  
@@ -492,7 +510,6 @@ extern int current_function_outgoing_args_size;
 
 /* Describe how we implement __builtin_eh_return.  */
 #define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 6 : INVALID_REGNUM)
-#define EH_RETURN_STACKADJ_RTX  gen_rtx_REG (Pmode, 10)
 #define EH_RETURN_HANDLER_RTX \
   gen_rtx_MEM (Pmode, plus_constant (arg_pointer_rtx, \
                                      TARGET_64BIT? -48 : -40))
@@ -749,6 +766,10 @@ CUMULATIVE_ARGS;
  || GET_CODE (X) == LABEL_REF                                           \
  || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
 
+#define TLS_SYMBOLIC_CONST(X)	\
+((GET_CODE (X) == SYMBOL_REF && tls_symbolic_operand (X))	\
+ || (GET_CODE (X) == CONST && tls_symbolic_reference_mentioned_p (X)))
+
 
 /* Condition codes.  */
 
@@ -926,6 +947,10 @@ extern int flag_pic;
 #define ASM_OUTPUT_SKIP(FILE, SIZE) \
   fprintf ((FILE), "\t.set\t.,.+%u\n", (SIZE))
 
+/* Output a reference to a user-level label named NAME.  */
+#define ASM_OUTPUT_LABELREF(FILE, NAME) \
+  asm_fprintf ((FILE), "%U%s", (*targetm.strip_name_encoding) (NAME))
+
 /* Store in OUTPUT a string (made with alloca) containing
    an assembler-name for a local static variable named NAME.
    LABELNO is an integer which is different for each call.  */
@@ -1014,10 +1039,9 @@ extern int s390_nr_constants;
 									    \
     case MODE_INT:							    \
     case MODE_PARTIAL_INT:						    \
-      if (flag_pic							    \
-	  && (GET_CODE (EXP) == CONST					    \
-	      || GET_CODE (EXP) == SYMBOL_REF				    \
-	      || GET_CODE (EXP) == LABEL_REF ))				    \
+      if (GET_CODE (EXP) == CONST					    \
+	  || GET_CODE (EXP) == SYMBOL_REF				    \
+	  || GET_CODE (EXP) == LABEL_REF)				    \
         {								    \
 	  fputs (integer_asm_op (UNITS_PER_WORD, TRUE), FILE);		    \
           s390_output_symbolic_const (FILE, EXP);			    \
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 197aa67ebcb..8f7df95652a 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1,5 +1,5 @@
 ;;- Machine description for GNU compiler -- S/390 / zSeries version.
-;;  Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
+;;  Copyright (C) 1999, 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
 ;;  Contributed by Hartmut Penner (hpenner@de.ibm.com) and
 ;;                 Ulrich Weigand (uweigand@de.ibm.com).
 ;; This file is part of GNU CC.
@@ -44,6 +44,34 @@
 ;;   s_operand -- Matches a valid S operand in a RS, SI or SS type instruction.
 ;;
 
+;;
+;; UNSPEC usage
+;;
+
+(define_constants
+  [; TLS relocation specifiers
+   (UNSPEC_TLSGD		500)
+   (UNSPEC_TLSLDM		501)
+   (UNSPEC_NTPOFF               502)
+   (UNSPEC_DTPOFF               503)
+   (UNSPEC_GOTNTPOFF            504)
+   (UNSPEC_INDNTPOFF            505)
+
+   ; TLS support
+   (UNSPEC_TP			510)
+   (UNSPEC_TLSLDM_NTPOFF	511)
+   (UNSPEC_TLS_LOAD		512)
+ ])
+
+;;
+;; UNSPEC_VOLATILE usage
+;;
+
+(define_constants
+  [; TLS support
+   (UNSPECV_SET_TP		500)
+  ])
+
 
 ;; Define an insn type attribute.  This is used in function unit delay
 ;; computations.
@@ -500,6 +528,31 @@
   "ltr\\t%0,%0"
   [(set_attr "op_type" "RR")])
 
+(define_insn "*tsthiCCT"
+  [(set (reg 33)
+        (compare (match_operand:HI 0 "nonimmediate_operand" "?Q,d")
+                 (match_operand:HI 1 "const0_operand" "")))
+   (set (match_operand:HI 2 "register_operand" "=d,0")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   icm\\t%2,3,%0
+   tml\\t%0,65535"
+  [(set_attr "op_type" "RS,RI")
+   (set_attr "atype"   "mem,reg")])
+
+(define_insn "*tsthiCCT_cconly"
+  [(set (reg 33)
+        (compare (match_operand:HI 0 "nonimmediate_operand" "Q,d")
+                 (match_operand:HI 1 "const0_operand" "")))
+   (clobber (match_scratch:HI 2 "=d,X"))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   icm\\t%2,3,%0
+   tml\\t%0,65535"
+  [(set_attr "op_type" "RS,RI")
+   (set_attr "atype"   "mem,reg")])
+
 (define_insn "*tsthi"
   [(set (reg 33)
         (compare (match_operand:HI 0 "s_operand" "Q")
@@ -521,6 +574,30 @@
   [(set_attr "op_type" "RS")
    (set_attr "atype"   "mem")])
 
+(define_insn "*tstqiCCT"
+  [(set (reg 33)
+        (compare (match_operand:QI 0 "nonimmediate_operand" "?Q,d")
+                 (match_operand:QI 1 "const0_operand" "")))
+   (set (match_operand:QI 2 "register_operand" "=d,0")
+        (match_dup 0))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   icm\\t%2,1,%0
+   tml\\t%0,255"
+  [(set_attr "op_type" "RS,RI")
+   (set_attr "atype"   "mem,reg")])
+
+(define_insn "*tstqiCCT_cconly"
+  [(set (reg 33)
+        (compare (match_operand:QI 0 "nonimmediate_operand" "?Q,d")
+                 (match_operand:QI 1 "const0_operand" "")))]
+  "s390_match_ccmode(insn, CCTmode)"
+  "@
+   cli\\t%0,0
+   tml\\t%0,255"
+  [(set_attr "op_type" "SI,RI")
+   (set_attr "atype"   "mem,reg")])
+
 (define_insn "*tstqi"
   [(set (reg 33)
         (compare (match_operand:QI 0 "s_operand" "Q")
@@ -780,8 +857,8 @@
 ;
 
 (define_insn "movti"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,Q,d,m,Q")
-        (match_operand:TI 1 "general_operand" "Q,d,dKm,d,Q"))]
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=d,Q,d,o,Q")
+        (match_operand:TI 1 "general_operand" "Q,d,dKo,d,Q"))]
   "TARGET_64BIT"
   "@
    lmg\\t%0,%N0,%1
@@ -844,18 +921,15 @@
 ; movdi instruction pattern(s).
 ;
 
-;; If generating PIC code and operands[1] is a symbolic CONST, emit a
-;; move to get the address of the symbolic object from the GOT.
-
 (define_expand "movdi"
   [(set (match_operand:DI 0 "general_operand" "")
         (match_operand:DI 1 "general_operand" ""))]
   ""
   "
 {
-  /* Handle PIC symbolic constants.  */
-  if (TARGET_64BIT && flag_pic && SYMBOLIC_CONST (operands[1]))
-    emit_pic_move (operands, DImode);
+  /* Handle symbolic constants.  */
+  if (TARGET_64BIT && SYMBOLIC_CONST (operands[1]))
+    emit_symbolic_move (operands);
 
   /* During and after reload, we need to force constants
      to the literal pool ourselves, if necessary.  */
@@ -910,8 +984,8 @@
     (set_attr "type"    "la")])
 
 (define_insn "*movdi_64"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,m,!*f,!*f,!m,Q")
-        (match_operand:DI 1 "general_operand" "d,m,d,*f,m,*f,Q"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,d,m,!*f,!*f,!m,?Q")
+        (match_operand:DI 1 "general_operand" "d,m,d,*f,m,*f,?Q"))]
   "TARGET_64BIT"
   "@
    lgr\\t%0,%1
@@ -925,8 +999,8 @@
    (set_attr "atype"   "reg,mem,mem,reg,mem,mem,mem")])
 
 (define_insn "*movdi_31"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,Q,d,m,!*f,!*f,!m,Q")
-        (match_operand:DI 1 "general_operand" "Q,d,dKm,d,*f,m,*f,Q"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=d,Q,d,o,!*f,!*f,!m,Q")
+        (match_operand:DI 1 "general_operand" "Q,d,dKo,d,*f,m,*f,Q"))]
   "!TARGET_64BIT"
   "@
    lm\\t%0,%N0,%1
@@ -1008,18 +1082,15 @@
 ; movsi instruction pattern(s).
 ;
 
-;; If generating PIC code and operands[1] is a symbolic CONST, emit a
-;; move to get the address of the symbolic object from the GOT.
-
 (define_expand "movsi"
   [(set (match_operand:SI 0 "general_operand" "")
         (match_operand:SI 1 "general_operand" ""))]
   ""
   "
 {
-  /* Handle PIC symbolic constants.  */
-  if (!TARGET_64BIT && flag_pic && SYMBOLIC_CONST (operands[1]))
-    emit_pic_move (operands, SImode);
+  /* Handle symbolic constants.  */
+  if (!TARGET_64BIT && SYMBOLIC_CONST (operands[1]))
+    emit_symbolic_move (operands);
 
   /* expr.c tries to load an effective address using 
      force_reg.  This fails because we don't have a 
@@ -1072,8 +1143,8 @@
   [(set_attr "op_type" "RI")])
 
 (define_insn "*movsi"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,m,!*f,!*f,!m,Q")
-        (match_operand:SI 1 "general_operand" "d,m,d,*f,m,*f,Q"))]
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,d,m,!*f,!*f,!m,?Q")
+        (match_operand:SI 1 "general_operand" "d,m,d,*f,m,*f,?Q"))]
   ""
   "@
    lr\\t%0,%1
@@ -1102,8 +1173,8 @@
 ;
 
 (define_insn "movhi"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,m,Q")
-        (match_operand:HI 1 "general_operand" "d,n,m,d,Q"))]
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=d,d,d,m,?Q")
+        (match_operand:HI 1 "general_operand" "d,n,m,d,?Q"))]
   ""
   "@
    lr\\t%0,%1
@@ -1129,8 +1200,8 @@
 ;
 
 (define_insn "movqi_64"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,m,Q,Q")
-        (match_operand:QI 1 "general_operand" "d,n,m,d,n,Q"))]
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,m,Q,?Q")
+        (match_operand:QI 1 "general_operand" "d,n,m,d,n,?Q"))]
   "TARGET_64BIT"
   "@
    lr\\t%0,%1
@@ -1144,8 +1215,8 @@
 
 
 (define_insn "movqi"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,m,Q,Q")
-        (match_operand:QI 1 "general_operand" "d,n,m,d,n,Q"))]
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=d,d,d,m,Q,?Q")
+        (match_operand:QI 1 "general_operand" "d,n,m,d,n,?Q"))]
   ""
   "@
    lr\\t%0,%1
@@ -1226,8 +1297,8 @@
 }")
 
 (define_insn "*movdf_64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,m,d,d,m,Q")
-        (match_operand:DF 1 "general_operand" "f,m,f,d,m,d,Q"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,m,d,d,m,?Q")
+        (match_operand:DF 1 "general_operand" "f,m,f,d,m,d,?Q"))]
   "TARGET_64BIT"
   "@
    ldr\\t%0,%1
@@ -1241,8 +1312,8 @@
    (set_attr "atype"   "reg,mem,mem,reg,mem,mem,mem")])
 
 (define_insn "*movdf_31"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,m,d,Q,d,m,Q")
-        (match_operand:DF 1 "general_operand" "f,m,f,Q,d,dKm,d,Q"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,m,d,Q,d,o,Q")
+        (match_operand:DF 1 "general_operand" "f,m,f,Q,d,dKo,d,Q"))]
   "!TARGET_64BIT"
   "@
    ldr\\t%0,%1
@@ -1326,8 +1397,8 @@
 }")
 
 (define_insn "*movsf"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,m,d,d,m,Q")
-        (match_operand:SF 1 "general_operand" "f,m,f,d,m,d,Q"))]
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,m,d,d,m,?Q")
+        (match_operand:SF 1 "general_operand" "f,m,f,d,m,d,?Q"))]
   ""
   "@
    ler\\t%0,%1
@@ -1990,8 +2061,7 @@
   operands[2] = GEN_INT (32 - INTVAL (operands[2]));
   operands[1] = change_address (operands[1], QImode, 0);
 }"
-  [(set_attr "type"    "o2")
-   (set_attr "atype"   "mem")])
+  [(set_attr "atype"   "mem")])
 
 (define_insn_and_split "*extracthi"
   [(set (match_operand:SI 0 "register_operand" "=d")
@@ -2012,8 +2082,7 @@
   operands[2] = GEN_INT (32 - INTVAL (operands[2]));
   operands[1] = change_address (operands[1], HImode, 0);
 }"
-  [(set_attr "type"    "o2")
-   (set_attr "atype"   "mem")])
+  [(set_attr "atype"   "mem")])
 
 ;
 ; extendsidi2 instruction pattern(s).
@@ -2315,8 +2384,7 @@
     [(set (strict_low_part (match_dup 2)) (match_dup 1))
      (clobber (reg:CC 33))])]
   "operands[2] = gen_lowpart (HImode, operands[0]);"
-  [(set_attr "type" "o2")
-   (set_attr "atype" "mem")])
+  [(set_attr "atype" "mem")])
  
 ;
 ; zero_extendqisi2 instruction pattern(s).
@@ -2351,8 +2419,7 @@
   [(set (match_dup 0) (const_int 0))
    (set (strict_low_part (match_dup 2)) (match_dup 1))]
   "operands[2] = gen_lowpart (QImode, operands[0]);"
-  [(set_attr "type" "o2")
-   (set_attr "atype" "mem")])
+  [(set_attr "atype" "mem")])
  
 ;
 ; zero_extendqihi2 instruction pattern(s).
@@ -2387,8 +2454,7 @@
   [(set (match_dup 0) (const_int 0))
    (set (strict_low_part (match_dup 2)) (match_dup 1))]
   "operands[2] = gen_lowpart (QImode, operands[0]);"
-  [(set_attr "type" "o2")
-   (set_attr "atype" "mem")])
+  [(set_attr "atype" "mem")])
 
 
 ;
@@ -2837,25 +2903,6 @@
 ; adddi3 instruction pattern(s).
 ;
 
-(define_insn "*la_64_cc"
-  [(set (match_operand:DI 0 "register_operand" "=d")
-        (match_operand:QI 1 "address_operand"  "p"))
-   (clobber (reg:CC 33))]
-  "TARGET_64BIT
-   && preferred_la_operand_p (operands[1], 1)"
-  "#"
-  [(set_attr "op_type"  "RX")
-   (set_attr "atype"    "mem")
-   (set_attr "type"     "la")])
-
-(define_split
-  [(set (match_operand:DI 0 "register_operand" "")
-        (match_operand:QI 1 "address_operand" ""))
-   (clobber (reg:CC 33))]
-  "TARGET_64BIT && reload_completed
-   && preferred_la_operand_p (operands[1], 0)"
-  [(set (match_dup 0) (match_dup 1))])
-
 (define_insn "*adddi3_sign"
   [(set (match_operand:DI 0 "register_operand" "=d,d")
         (plus:DI (sign_extend:DI (match_operand:SI 2 "general_operand" "d,m"))
@@ -2976,7 +3023,7 @@
 (define_insn_and_split "*adddi3_31"
   [(set (match_operand:DI 0 "register_operand" "=&d")
         (plus:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
-                 (match_operand:DI 2 "general_operand" "dm") ) )
+                 (match_operand:DI 2 "general_operand" "do") ) )
    (clobber (reg:CC 33))]
   "!TARGET_64BIT"
   "#"
@@ -2997,15 +3044,14 @@
     [(set (match_dup 3) (plus:SI (match_dup 3) (const_int 1)))
      (clobber (reg:CC 33))])
    (match_dup 9)]
-  "operands[3] = operand_subword (operands[0], 0, 1, DImode);
-   operands[4] = operand_subword (operands[1], 0, 1, DImode);
-   operands[5] = operand_subword (operands[2], 0, 1, DImode);
-   operands[6] = operand_subword (operands[0], 1, 1, DImode);
-   operands[7] = operand_subword (operands[1], 1, 1, DImode);
-   operands[8] = operand_subword (operands[2], 1, 1, DImode);
+  "operands[3] = operand_subword (operands[0], 0, 0, DImode);
+   operands[4] = operand_subword (operands[1], 0, 0, DImode);
+   operands[5] = operand_subword (operands[2], 0, 0, DImode);
+   operands[6] = operand_subword (operands[0], 1, 0, DImode);
+   operands[7] = operand_subword (operands[1], 1, 0, DImode);
+   operands[8] = operand_subword (operands[2], 1, 0, DImode);
    operands[9] = gen_label_rtx ();"
-  [(set_attr "op_type"  "NN")
-   (set_attr "type"     "o3")])
+  [(set_attr "op_type"  "NN")])
 
 (define_expand "adddi3"
   [(parallel
@@ -3025,6 +3071,32 @@
    (set_attr "atype"   "mem")
    (set_attr "type"    "la")])
 
+(define_peephole2
+  [(parallel
+    [(set (match_operand:DI 0 "register_operand" "")
+          (match_operand:QI 1 "address_operand" ""))
+     (clobber (reg:CC 33))])]
+  "TARGET_64BIT
+   && strict_memory_address_p (VOIDmode, operands[1])
+   && preferred_la_operand_p (operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:DI 0 "register_operand" "")
+        (match_operand:DI 1 "register_operand" ""))
+   (parallel
+    [(set (match_dup 0)
+          (plus:DI (match_dup 0)
+                   (match_operand:DI 2 "nonmemory_operand" "")))
+     (clobber (reg:CC 33))])]
+  "TARGET_64BIT
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && strict_memory_address_p (VOIDmode, gen_rtx_PLUS (DImode, operands[1], operands[2]))
+   && preferred_la_operand_p (gen_rtx_PLUS (DImode, operands[1], operands[2]))"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
 (define_expand "reload_indi"
   [(parallel [(match_operand:DI 0 "register_operand" "=a")
               (match_operand:DI 1 "s390_plus_operand" "")
@@ -3041,25 +3113,6 @@
 ; addsi3 instruction pattern(s).
 ;
 
-(define_insn "*la_31_cc"
-  [(set (match_operand:SI 0 "register_operand" "=d")
-        (match_operand:QI 1 "address_operand"  "p"))
-   (clobber (reg:CC 33))]
-  "!TARGET_64BIT
-   && preferred_la_operand_p (operands[1], 1)"
-  "#"
-  [(set_attr "op_type"  "RX")
-   (set_attr "atype"    "mem")
-   (set_attr "type"     "la")])
-
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-        (match_operand:QI 1 "address_operand" ""))
-   (clobber (reg:CC 33))]
-  "!TARGET_64BIT && reload_completed
-   && preferred_la_operand_p (operands[1], 0)"
-  [(set (match_dup 0) (match_dup 1))])
-
 (define_insn "*addsi3_imm_cc"
   [(set (reg 33) 
         (compare (plus:SI (match_operand:SI 1 "nonimmediate_operand" "0")
@@ -3208,6 +3261,32 @@
    (set_attr "atype"    "mem")
    (set_attr "type"     "la")])
 
+(define_peephole2
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand" "")
+          (match_operand:QI 1 "address_operand" ""))
+     (clobber (reg:CC 33))])]
+  "!TARGET_64BIT
+   && strict_memory_address_p (VOIDmode, operands[1])
+   && preferred_la_operand_p (operands[1])"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+        (match_operand:SI 1 "register_operand" ""))
+   (parallel
+    [(set (match_dup 0)
+          (plus:SI (match_dup 0)
+                   (match_operand:SI 2 "nonmemory_operand" "")))
+     (clobber (reg:CC 33))])]
+  "!TARGET_64BIT
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && strict_memory_address_p (VOIDmode, gen_rtx_PLUS (SImode, operands[1], operands[2]))
+   && preferred_la_operand_p (gen_rtx_PLUS (SImode, operands[1], operands[2]))"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 2)))]
+  "")
+
 (define_insn "*la_31_and"
   [(set (match_operand:SI 0 "register_operand" "=d")
         (and:SI (match_operand:QI 1 "address_operand" "p")
@@ -3431,7 +3510,7 @@
 (define_insn_and_split "*subdi3_31"
   [(set (match_operand:DI 0 "register_operand" "=&d")
         (minus:DI (match_operand:DI 1 "register_operand" "0")
-                  (match_operand:DI 2 "general_operand" "dm") ) )
+                  (match_operand:DI 2 "general_operand" "do") ) )
    (clobber (reg:CC 33))]
   "!TARGET_64BIT"
   "#"
@@ -3452,15 +3531,14 @@
     [(set (match_dup 3) (plus:SI (match_dup 3) (const_int -1)))
      (clobber (reg:CC 33))])
    (match_dup 9)]
-  "operands[3] = operand_subword (operands[0], 0, 1, DImode);
-   operands[4] = operand_subword (operands[1], 0, 1, DImode);
-   operands[5] = operand_subword (operands[2], 0, 1, DImode);
-   operands[6] = operand_subword (operands[0], 1, 1, DImode);
-   operands[7] = operand_subword (operands[1], 1, 1, DImode);
-   operands[8] = operand_subword (operands[2], 1, 1, DImode);
+  "operands[3] = operand_subword (operands[0], 0, 0, DImode);
+   operands[4] = operand_subword (operands[1], 0, 0, DImode);
+   operands[5] = operand_subword (operands[2], 0, 0, DImode);
+   operands[6] = operand_subword (operands[0], 1, 0, DImode);
+   operands[7] = operand_subword (operands[1], 1, 0, DImode);
+   operands[8] = operand_subword (operands[2], 1, 0, DImode);
    operands[9] = gen_label_rtx ();"
-  [(set_attr "op_type"  "NN")
-   (set_attr "type"     "o3")])
+  [(set_attr "op_type"  "NN")])
 
 (define_expand "subdi3"
   [(parallel
@@ -4591,7 +4669,7 @@
 
 (define_insn "*iordi3_oi"
   [(set (match_operand:DI 0 "register_operand" "=d")
-        (ior:DI (match_operand:DI 1 "nonimmediate_operand" "%0")
+        (ior:DI (match_operand:DI 1 "nonimmediate_operand" "0")
                 (match_operand:DI 2 "immediate_operand" "n")))
    (clobber (reg:CC 33))]
   "TARGET_64BIT && s390_single_hi (operands[2], DImode, 0) >= 0"
@@ -4677,7 +4755,7 @@
 
 (define_insn "*iorsi3_oi"
   [(set (match_operand:SI 0 "register_operand" "=d")
-        (ior:SI (match_operand:SI 1 "nonimmediate_operand" "%0")
+        (ior:SI (match_operand:SI 1 "nonimmediate_operand" "0")
                 (match_operand:SI 2 "immediate_operand" "n")))
    (clobber (reg:CC 33))]
   "TARGET_64BIT && s390_single_hi (operands[2], SImode, 0) >= 0"
@@ -6452,6 +6530,192 @@
     (set_attr "type"    "jsr")
     (set_attr "atype"   "mem")])
 
+;;
+;;- Thread-local storage support.
+;;
+
+(define_insn "get_tp_64"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=??d,Q")
+        (unspec:DI [(const_int 0)] UNSPEC_TP))]
+  "TARGET_64BIT"
+  "@
+   ear\\t%0,%%a0\;sllg\\t%0,%0,32\;ear\\t%0,%%a1
+   stam\\t%%a0,%%a1,%0"
+  [(set_attr "op_type" "NN,RS")
+   (set_attr "atype"   "reg,mem")
+   (set_attr "type"    "o3,*")
+   (set_attr "length"  "14,*")])
+
+(define_insn "get_tp_31"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=d,Q")
+        (unspec:SI [(const_int 0)] UNSPEC_TP))]
+  "!TARGET_64BIT"
+  "@
+   ear\\t%0,%%a0
+   stam\\t%%a0,%%a0,%0"
+  [(set_attr "op_type" "RRE,RS")
+   (set_attr "atype"   "reg,mem")])
+
+(define_insn "set_tp_64"
+  [(unspec_volatile [(match_operand:DI 0 "general_operand" "??d,Q")] UNSPECV_SET_TP)
+   (clobber (match_scratch:SI 1 "=d,X"))]
+  "TARGET_64BIT"
+  "@
+   sar\\t%%a1,%0\;srlg\\t%1,%0,32\;sar\\t%%a0,%1
+   lam\\t%%a0,%%a1,%0"
+  [(set_attr "op_type" "NN,RS")
+   (set_attr "atype"   "reg,mem")
+   (set_attr "type"    "o3,*")
+   (set_attr "length"  "14,*")])
+
+(define_insn "set_tp_31"
+  [(unspec_volatile [(match_operand:SI 0 "general_operand" "d,Q")] UNSPECV_SET_TP)]
+  "!TARGET_64BIT"
+  "@
+   sar\\t%%a0,%0
+   lam\\t%%a0,%%a0,%0"
+  [(set_attr "op_type" "RRE,RS")
+   (set_attr "atype"   "reg,mem")])
+  
+(define_insn "*tls_load_64"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+        (unspec:DI [(match_operand:DI 1 "memory_operand" "m")
+                    (match_operand:DI 2 "" "")]
+		   UNSPEC_TLS_LOAD))]
+  "TARGET_64BIT"
+  "lg\\t%0,%1%J2"
+  [(set_attr "op_type" "RXE")
+   (set_attr "atype"   "mem")])
+
+(define_insn "*tls_load_31"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+        (unspec:SI [(match_operand:SI 1 "memory_operand" "m")
+                    (match_operand:SI 2 "" "")]
+		   UNSPEC_TLS_LOAD))]
+  "!TARGET_64BIT"
+  "l\\t%0,%1%J2"
+  [(set_attr "op_type" "RX")
+   (set_attr "atype"   "mem")])
+
+(define_expand "call_value_tls"
+  [(set (match_operand 0 "" "")
+        (call (const_int 0) (const_int 0)))
+   (use (match_operand 1 "" ""))]
+  ""
+  "
+{
+  rtx insn, sym;
+
+  if (!flag_pic)
+    abort ();
+
+  sym = s390_tls_get_offset ();
+  sym = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, sym), 113);
+  sym = gen_rtx_CONST (Pmode, sym);
+
+  /* Unless we can use the bras(l) insn, force the 
+     routine address into a register.  */
+  if (!TARGET_SMALL_EXEC && !TARGET_64BIT)
+    {
+      rtx target = gen_reg_rtx (Pmode);
+      emit_move_insn (target, sym);
+      sym = target;
+    }
+
+  sym = gen_rtx_MEM (QImode, sym);
+
+  /* Emit insn.  */
+  insn = emit_call_insn (
+	    gen_call_value_tls_exp (operands[0], sym, const0_rtx,
+  				    gen_rtx_REG (Pmode, RETURN_REGNUM),
+				    operands[1]));
+
+  /* The calling convention of __tls_get_offset uses the
+     GOT register implicitly.  */
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
+  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), operands[0]);
+  CONST_OR_PURE_CALL_P (insn) = 1;
+
+  DONE;
+}")
+
+(define_expand "call_value_tls_exp"
+  [(parallel [(set (match_operand 0 "" "")
+                   (call (match_operand 1 "" "")
+                         (match_operand 2 "" "")))
+              (clobber (match_operand 3 "" ""))
+	      (use (match_operand 4 "" ""))])]
+  ""
+  "")
+
+(define_insn "brasl_tls"
+  [(set (match_operand 0 "register_operand" "=df")
+        (call (mem:QI (match_operand:DI 1 "bras_sym_operand" "X"))
+              (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (match_operand:DI 3 "register_operand" "=r"))
+   (use (match_operand:DI 4 "" ""))]
+  "TARGET_64BIT"
+  "brasl\\t%3,%1%J4"
+  [(set_attr "op_type" "RIL")
+   (set_attr "type"    "jsr")])
+
+(define_insn "bras_tls"
+  [(set (match_operand 0 "register_operand" "=df")
+        (call (mem:QI (match_operand:SI 1 "bras_sym_operand" "X"))
+              (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (match_operand:SI 3 "register_operand" "=r"))
+   (use (match_operand:SI 4 "" ""))]
+  "TARGET_SMALL_EXEC"
+  "bras\\t%3,%1%J4"
+  [(set_attr "op_type" "RI")
+   (set_attr "type"    "jsr")])
+
+(define_insn "basr_tls_64"
+  [(set (match_operand 0 "register_operand" "=df")
+        (call (mem:QI (match_operand:DI 1 "register_operand" "a"))
+              (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (match_operand:DI 3 "register_operand" "=r"))
+   (use (match_operand:DI 4 "" ""))]
+  "TARGET_64BIT"
+  "basr\\t%3,%1%J4"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"    "jsr")])
+
+(define_insn "basr_tls_31"
+  [(set (match_operand 0 "register_operand" "=df")
+        (call (mem:QI (match_operand:SI 1 "register_operand" "a"))
+              (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (match_operand:SI 3 "register_operand" "=r"))
+   (use (match_operand:SI 4 "" ""))]
+  "!TARGET_64BIT"
+  "basr\\t%3,%1%J4"
+  [(set_attr "op_type" "RR")
+   (set_attr "type"    "jsr")
+   (set_attr "atype"   "mem")])
+
+(define_insn "bas_tls_64"
+  [(set (match_operand 0 "register_operand" "=df")
+        (call (mem:QI (match_operand:QI 1 "address_operand" "p"))
+              (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (match_operand:DI 3 "register_operand" "=r"))
+   (use (match_operand:DI 4 "" ""))]
+  "TARGET_64BIT"
+  "bas\\t%3,%a1%J4"
+  [(set_attr "op_type" "RX")
+   (set_attr "type"    "jsr")
+   (set_attr "atype"   "mem")])
+
+(define_insn "bas_tls_31"
+  [(set (match_operand 0 "register_operand" "=df")
+        (call (mem:QI (match_operand:QI 1 "address_operand" "p"))
+              (match_operand:SI 2 "const_int_operand" "n")))
+   (clobber (match_operand:SI 3 "register_operand" "=r"))
+   (use (match_operand:SI 4 "" ""))]
+  "!TARGET_64BIT"
+  "bas\\t%3,%a1%J4"
+   [(set_attr "op_type" "RX")
+    (set_attr "type"    "jsr")
+    (set_attr "atype"   "mem")])
 
 ;;
 ;;- Miscellaneous instructions.
@@ -6820,14 +7084,21 @@
         (label_ref (match_operand 1 "" "")))	
    (use (label_ref (match_operand 2 "" "")))]
   ""
-  "*
 {
-   if (s390_nr_constants) {
-     output_asm_insn (\"bras\\t%0,%2\", operands);
-     s390_output_constant_pool (operands[1], operands[2]);
-   }
-   return \"\";	
-}"
+   if (s390_nr_constants) 
+     {
+       output_asm_insn ("bras\\t%0,%2", operands);
+       s390_output_constant_pool (operands[1], operands[2]);
+     } 
+   else if (flag_pic)
+     {
+        /* We need the anchor label in any case.  */
+        ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
+                                   CODE_LABEL_NUMBER (operands[1]));
+     }
+
+   return "";	
+}
   [(set_attr "op_type" "NN")
    (set_attr "type"    "la")])
 
@@ -6837,13 +7108,14 @@
         (label_ref (match_operand 1 "" "")))	
    (use (label_ref (match_operand 2 "" "")))]
   ""
-  "*
 {
-   if (s390_nr_constants) {
-     output_asm_insn (\"larl\\t%0,%1\", operands);
-     s390_output_constant_pool (operands[1], operands[2]);
-   }
-   return \"\";	
-}"
+   if (s390_nr_constants) 
+     {
+       output_asm_insn ("larl\\t%0,%1", operands);
+       s390_output_constant_pool (operands[1], operands[2]);
+     }
+
+   return "";	
+}
   [(set_attr "op_type" "NN")
    (set_attr "type"    "la")])
diff --git a/gcc/config/sh/linux.h b/gcc/config/sh/linux.h
index 1a3948386fc..82106ed0d8e 100644
--- a/gcc/config/sh/linux.h
+++ b/gcc/config/sh/linux.h
@@ -52,6 +52,10 @@ do { \
      %{!rpath:-rpath /lib}} \
    %{static:-static}"
 
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
 #undef LIB_SPEC
 #define LIB_SPEC \
   "%{shared: -lc} \
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index 4135b334af1..70d0ade0787 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -126,8 +126,8 @@ extern int sh_pr_n_sets PARAMS ((void));
 extern int sh_hard_regno_rename_ok PARAMS ((unsigned int, unsigned int));
 extern int sh_cfun_interrupt_handler_p PARAMS ((void));
 extern void sh_initialize_trampoline PARAMS ((rtx, rtx, rtx));
-extern enum reg_class sh_cannot_change_mode_class
-	      PARAMS ((enum machine_mode, enum machine_mode));
+extern bool sh_cannot_change_mode_class
+	      PARAMS ((enum machine_mode, enum machine_mode, enum reg_class));
 extern void sh_mark_label PARAMS ((rtx, int));
 extern int sh_register_move_cost
   PARAMS ((enum machine_mode mode, enum reg_class, enum reg_class));
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 75c9d263bfa..6f0f1464629 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -953,6 +953,7 @@ output_far_jump (insn, op)
   const char *jump;
   int far;
   int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
+  rtx prev;
 
   this.lab = gen_label_rtx ();
 
@@ -977,10 +978,10 @@ output_far_jump (insn, op)
 	jump = "mov.l	%O0,%1; jmp	@%1";
     }
   /* If we have a scratch register available, use it.  */
-  if (GET_CODE (PREV_INSN (insn)) == INSN
-      && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
+  if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
+      && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
     {
-      this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
+      this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
       if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
 	jump = "mov.l	r1,@-r15; mova	%O0,r0; mov.l	@r0,r1; add	r1,r0; mov.l	@r15+,r1; jmp	@%1";
       output_asm_insn (jump, &this.lab);
@@ -2728,7 +2729,7 @@ find_barrier (num_mova, mova, from)
 	{
 	  if (num_mova)
 	    num_mova--;
-	  if (barrier_align (next_real_insn (from)) == CACHE_LOG)
+	  if (barrier_align (next_real_insn (from)) == align_jumps_log)
 	    {
 	      /* We have just passed the barrier in front of the
 		 ADDR_DIFF_VEC, which is stored in found_barrier.  Since
@@ -3142,7 +3143,7 @@ gen_block_redirect (jump, addr, need_block)
       rtx next = next_active_insn (next_active_insn (dest));
       if (next && GET_CODE (next) == JUMP_INSN
 	  && GET_CODE (PATTERN (next)) == SET
-	  && recog_memoized (next) == CODE_FOR_jump)
+	  && recog_memoized (next) == CODE_FOR_jump_compact)
 	{
 	  dest = JUMP_LABEL (next);
 	  if (dest
@@ -3166,6 +3167,13 @@ gen_block_redirect (jump, addr, need_block)
       rtx insn = emit_insn_before (gen_indirect_jump_scratch
 				   (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
 				   , jump);
+      /* ??? We would like this to have the scope of the jump, but that
+	 scope will change when a delay slot insn of an inner scope is added.
+	 Hence, after delay slot scheduling, we'll have to expect
+	 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
+	 the jump.  */
+	 
+      INSN_SCOPE (insn) = INSN_SCOPE (jump);
       INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
       return insn;
     }
@@ -3308,14 +3316,14 @@ barrier_align (barrier_or_label)
       return ((TARGET_SMALLCODE
 	       || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
 		   <= (unsigned)1 << (CACHE_LOG - 2)))
-	      ? 1 << TARGET_SHMEDIA : CACHE_LOG);
+	      ? 1 << TARGET_SHMEDIA : align_jumps_log);
     }
 
   if (TARGET_SMALLCODE)
     return 0;
 
   if (! TARGET_SH2 || ! optimize)
-    return CACHE_LOG;
+    return align_jumps_log;
 
   /* When fixing up pcloads, a constant table might be inserted just before
      the basic block that ends with the barrier.  Thus, we can't trust the
@@ -3380,7 +3388,8 @@ barrier_align (barrier_or_label)
 	      || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),	    
 		  (INSN_P (x) 
 		   && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
-		       || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch))))
+		       || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
+		       || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
 	    {
 	      rtx pat = PATTERN (prev);
 	      if (GET_CODE (pat) == PARALLEL)
@@ -3391,7 +3400,7 @@ barrier_align (barrier_or_label)
 	}
     }
   
-  return CACHE_LOG;
+  return align_jumps_log;
 }
 
 /* If we are inside a phony loop, almost any kind of label can turn up as the
@@ -3416,10 +3425,7 @@ sh_loop_align (label)
       || recog_memoized (next) == CODE_FOR_consttable_2)
     return 0;
 
-  if (TARGET_SH5)
-    return 3;
-
-  return 2;
+  return align_loops_log;
 }
 
 /* Exported to toplev.c.
@@ -4027,7 +4033,7 @@ split_branches (first)
 			|| ((beyond = next_active_insn (beyond))
 			    && GET_CODE (beyond) == JUMP_INSN))
 		    && GET_CODE (PATTERN (beyond)) == SET
-		    && recog_memoized (beyond) == CODE_FOR_jump
+		    && recog_memoized (beyond) == CODE_FOR_jump_compact
 		    && ((INSN_ADDRESSES
 			 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
 			 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
@@ -4041,7 +4047,7 @@ split_branches (first)
 	    if ((GET_CODE (next) == JUMP_INSN
 		 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
 		&& GET_CODE (PATTERN (next)) == SET
-		&& recog_memoized (next) == CODE_FOR_jump
+		&& recog_memoized (next) == CODE_FOR_jump_compact
 		&& ((INSN_ADDRESSES
 		     (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
 		     - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
@@ -4130,9 +4136,6 @@ split_branches (first)
    If relaxing, output the label and pseudo-ops used to link together
    calls and the instruction which set the registers.  */
 
-/* ??? This is unnecessary, and probably should be deleted.  This makes
-   the insn_addresses declaration above unnecessary.  */
-
 /* ??? The addresses printed by this routine for insns are nonsense for
    insns which are inside of a sequence where none of the inner insns have
    variable length.  This is because the second pass of shorten_branches
@@ -7355,14 +7358,14 @@ sh_initialize_trampoline (tramp, fnaddr, cxt)
       emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
 				 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
 				 movishori));
-      emit_insn (gen_rotldi3_mextr (quad0, quad0,
+      emit_insn (gen_rotrdi3_mextr (quad0, quad0,
 				    GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
       emit_insn (gen_ashldi3_media (quad0, quad0, GEN_INT (2)));
       emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
       emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
 				 gen_rtx_SUBREG (V2HImode, cxt, 0),
 				 movishori));
-      emit_insn (gen_rotldi3_mextr (cxtload, cxtload,
+      emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
 				    GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
       emit_insn (gen_ashldi3_media (cxtload, cxtload, GEN_INT (2)));
       if (TARGET_LITTLE_ENDIAN)
@@ -7740,24 +7743,25 @@ sh_expand_binop_v2sf (code, op0, op1, op2)
 
 /* Return the class of registers for which a mode change from FROM to TO
    is invalid.  */
-enum reg_class 
-sh_cannot_change_mode_class (from, to)
+bool
+sh_cannot_change_mode_class (from, to, class)
      enum machine_mode from, to;
+     enum reg_class class;
 {
   if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
     {
        if (TARGET_LITTLE_ENDIAN)
          {
 	   if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
-	     return DF_REGS;
+	     return reg_classes_intersect_p (DF_REGS, class);
 	 }
        else
 	 {
 	   if (GET_MODE_SIZE (from) < 8)
-	     return DF_HI_REGS;
+	     return reg_classes_intersect_p (DF_HI_REGS, class);
 	 }
     }
-  return NO_REGS;
+  return 0;
 }
 
 
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index 57ebbb311af..14667794e20 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -475,6 +475,13 @@ do {									\
      to the pressure on R0.  */						\
   flag_schedule_insns = 0;						\
 									\
+  if (align_loops == 0)							\
+    align_loops =  1 << (TARGET_SH5 ? 3 : 2);				\
+  if (align_jumps == 0)							\
+    align_jumps = 1 << CACHE_LOG;					\
+  else if (align_jumps < (TARGET_SHMEDIA ? 4 : 2))			\
+    align_jumps = TARGET_SHMEDIA ? 4 : 2;				\
+									\
   /* Allocation boundary (in *bytes*) for the code of a function.	\
      SH1: 32 bit alignment is faster, because instructions are always	\
      fetched as a pair from a longword boundary.			\
@@ -482,6 +489,20 @@ do {									\
   if (align_functions == 0)						\
     align_functions							\
       = TARGET_SMALLCODE ? FUNCTION_BOUNDARY/8 : (1 << CACHE_LOG);	\
+  /* The linker relaxation code breaks when a function contains		\
+     alignments that are larger than that at the start of a		\
+     compilation unit.  */						\
+  if (TARGET_RELAX)							\
+    {									\
+      int min_align							\
+	= align_loops > align_jumps ? align_loops : align_jumps;	\
+									\
+      /* Also take possible .long constants / mova tables int account.	*/\
+      if (min_align < 4)						\
+	min_align = 4;							\
+      if (align_functions < min_align)					\
+	align_functions = min_align;					\
+    }									\
 } while (0)
 
 /* Target machine storage layout.  */
@@ -1346,8 +1367,9 @@ extern enum reg_class reg_class_from_letter[];
    ? R0_REGS								\
    : (CLASS == FPUL_REGS						\
       && ((GET_CODE (X) == REG						\
-          && (REGNO (X) == MACL_REG || REGNO (X) == MACH_REG		\
-	      || REGNO (X) == T_REG))))					\
+	   && (REGNO (X) == MACL_REG || REGNO (X) == MACH_REG		\
+	       || REGNO (X) == T_REG))					\
+	  || GET_CODE (X) == PLUS))					\
    ? GENERAL_REGS							\
    : CLASS == FPUL_REGS && immediate_operand ((X), (MODE))		\
    ? (GET_CODE (X) == CONST_INT && CONST_OK_FOR_I (INTVAL (X))		\
@@ -1377,8 +1399,8 @@ extern enum reg_class reg_class_from_letter[];
 /* ??? We need to renumber the internal numbers for the frnn registers
    when in little endian in order to allow mode size changes.  */
 
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO) 			    \
-  sh_cannot_change_mode_class (FROM, TO)
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) 			    \
+  sh_cannot_change_mode_class (FROM, TO, CLASS)
 
 /* Stack layout; function entry, exit and calling.  */
 
@@ -2574,7 +2596,7 @@ while (0)
 
 /* Specify the machine mode that this machine uses
    for the index in the tablejump instruction.  */
-#define CASE_VECTOR_MODE (TARGET_BIGTABLE ? SImode : HImode)
+#define CASE_VECTOR_MODE ((! optimize || TARGET_BIGTABLE) ? SImode : HImode)
 
 #define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \
 ((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 127 \
@@ -3047,7 +3069,7 @@ while (0)
 /* Output an absolute table element.  */
 
 #define ASM_OUTPUT_ADDR_VEC_ELT(STREAM,VALUE)  				\
-  if (TARGET_BIGTABLE) 							\
+  if (! optimize || TARGET_BIGTABLE)					\
     asm_fprintf ((STREAM), "\t.long\t%LL%d\n", (VALUE)); 		\
   else									\
     asm_fprintf ((STREAM), "\t.word\t%LL%d\n", (VALUE));
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 4dbda27e4b9..56f93cc14c8 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -383,9 +383,9 @@
 	 (eq_attr "type" "jump")
 	 (cond [(eq_attr "med_branch_p" "yes")
 		(const_int 2)
-		(and (eq (symbol_ref "GET_CODE (PREV_INSN (insn))")
+		(and (eq (symbol_ref "GET_CODE (prev_nonnote_insn (insn))")
 			 (symbol_ref "INSN"))
-		     (eq (symbol_ref "INSN_CODE (PREV_INSN (insn))")
+		     (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))")
 			 (symbol_ref "code_for_indirect_jump_scratch")))
 		(if_then_else (eq_attr "braf_branch_p" "yes")
 			      (const_int 6)
@@ -5024,9 +5024,14 @@
 
 ;; This one has the additional purpose to record a possible scratch register
 ;; for the following branch.
+;; ??? Unfortunately, just setting the scratch register is not good enough,
+;; because the insn then might be deemed dead and deleted.  And we can't
+;; make the use in the jump insn explicit because that would disable
+;; delay slot scheduling from the target.
 (define_insn "indirect_jump_scratch"
   [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI [(match_operand 1 "const_int_operand" "")] UNSPEC_BBR))]
+	(unspec:SI [(match_operand 1 "const_int_operand" "")] UNSPEC_BBR)) 
+   (set (pc) (unspec [(const_int 0)] UNSPEC_BBR))]
   "TARGET_SH1"
   ""
   [(set_attr "length" "0")])
@@ -5464,6 +5469,19 @@
   [(set_attr "type" "jump")
    (set_attr "needs_delay_slot" "yes")])
 
+;; ??? It would be much saner to explicitly use the scratch register
+;; in the jump insn, and have indirect_jump_scratch only set it,
+;; but fill_simple_delay_slots would refuse to do delay slot filling
+;; from the target then, as it uses simplejump_p.
+;;(define_insn "jump_compact_far"
+;;  [(set (pc)
+;;	(label_ref (match_operand 0 "" "")))
+;;   (use (match_operand 1 "register_operand" "r")]
+;;  "TARGET_SH1"
+;;  "* return output_far_jump(insn, operands[0], operands[1]);"
+;;  [(set_attr "type" "jump")
+;;   (set_attr "needs_delay_slot" "yes")])
+
 (define_insn "jump_media"
   [(set (pc)
 	(match_operand:DI 0 "target_operand" "b"))]
diff --git a/gcc/config/sh/t-linux b/gcc/config/sh/t-linux
index 2fc932cdc7f..60199dcb262 100644
--- a/gcc/config/sh/t-linux
+++ b/gcc/config/sh/t-linux
@@ -1,3 +1,6 @@
+# Don't run fixproto
+STMP_FIXPROTO =
+
 TARGET_LIBGCC2_CFLAGS = -fpic
 LIB1ASMFUNCS_CACHE = _ic_invalidate
 
diff --git a/gcc/config/sparc/freebsd.h b/gcc/config/sparc/freebsd.h
index 02cc7a1dfae..b2809ddb7cf 100644
--- a/gcc/config/sparc/freebsd.h
+++ b/gcc/config/sparc/freebsd.h
@@ -22,11 +22,12 @@ the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.  */
    Emacs needs to know if the arch is 64 or 32-bits.  */
 
 #undef  CPP_CPU64_DEFAULT_SPEC
-#define CPP_CPU64_DEFAULT_SPEC "-D__sparc64__ -D__sparc_v9__ -D__arch64__"
+#define CPP_CPU64_DEFAULT_SPEC \
+  "-D__sparc64__ -D__sparc_v9__ -D__sparcv9 -D__sparc__ -D__arch64__"
 
 /* Because we include sparc/sysv4.h.  */
 #undef  CPP_PREDEFINES
-#define CPP_PREDEFINES FBSD_CPP_PREDEFINES
+/* Do not define it here, we now use TARGET_OS_CPP_BUILTINS.  */
 
 #define LINK_SPEC "%(link_arch)						\
   %{!mno-relax:%{!r:-relax}}						\
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 9f608c352eb..7c25bc55a9a 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -3393,7 +3393,7 @@ mem_min_alignment (mem, desired)
 
 
 /* Vectors to keep interesting information about registers where it can easily
-   be got.  We use to use the actual mode value as the bit number, but there
+   be got.  We used to use the actual mode value as the bit number, but there
    are more than 32 modes now.  Instead we use two tables: one indexed by
    hard register number, and one indexed by mode.  */
 
@@ -4522,10 +4522,13 @@ function_arg_slotno (cum, mode, type, named, incoming_p, pregno, ppadding)
 
 struct function_arg_record_value_parms
 {
-  rtx ret;
-  int slotno, named, regbase;
-  unsigned int nregs;
-  int intoffset;
+  rtx ret;		/* return expression being built.  */
+  int slotno;		/* slot number of the argument.  */
+  int named;		/* whether the argument is named.  */
+  int regbase;		/* regno of the base register.  */
+  int stack;		/* 1 if part of the argument is on the stack.  */
+  int intoffset;	/* offset of the pending integer field.  */
+  unsigned int nregs;	/* number of words passed in registers.  */
 };
 
 static void function_arg_record_value_3
@@ -4600,8 +4603,13 @@ function_arg_record_value_1 (type, startbitpos, parms)
 		  this_slotno = parms->slotno + parms->intoffset
 		    / BITS_PER_WORD;
 
-		  intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
-		  intslots = MAX (intslots, 0);
+		  if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
+		    {
+		      intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
+		      /* We need to pass this field on the stack.  */
+		      parms->stack = 1;
+		    }
+
 		  parms->nregs += intslots;
 		  parms->intoffset = -1;
 		}
@@ -4666,7 +4674,7 @@ function_arg_record_value_3 (bitpos, parms)
     {
       regno = parms->regbase + this_slotno;
       reg = gen_rtx_REG (mode, regno);
-      XVECEXP (parms->ret, 0, parms->nregs)
+      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
 	= gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
 
       this_slotno += 1;
@@ -4739,7 +4747,7 @@ function_arg_record_value_2 (type, startbitpos, parms)
 		default: break;
 		}
 	      reg = gen_rtx_REG (mode, regno);
-	      XVECEXP (parms->ret, 0, parms->nregs)
+	      XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
 		= gen_rtx_EXPR_LIST (VOIDmode, reg,
 			   GEN_INT (bitpos / BITS_PER_UNIT));
 	      parms->nregs += 1;
@@ -4747,7 +4755,7 @@ function_arg_record_value_2 (type, startbitpos, parms)
 		{
 		  regno += GET_MODE_SIZE (mode) / 4;
 	  	  reg = gen_rtx_REG (mode, regno);
-		  XVECEXP (parms->ret, 0, parms->nregs)
+		  XVECEXP (parms->ret, 0, parms->stack + parms->nregs)
 		    = gen_rtx_EXPR_LIST (VOIDmode, reg,
 			GEN_INT ((bitpos + GET_MODE_BITSIZE (mode))
 				 / BITS_PER_UNIT));
@@ -4764,8 +4772,19 @@ function_arg_record_value_2 (type, startbitpos, parms)
 }
 
 /* Used by function_arg and function_value to implement the complex
-   SPARC64 structure calling conventions.  */
+   conventions of the 64-bit ABI for passing and returning structures.
+   Return an expression valid as a return value for the two macros
+   FUNCTION_ARG and FUNCTION_VALUE.
 
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   MODE is the argument's machine mode.
+   SLOTNO is the index number of the argument's slot in the parameter array.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).
+   REGBASE is the regno of the base register for the parameter array.  */
+   
 static rtx
 function_arg_record_value (type, mode, slotno, named, regbase)
      tree type;
@@ -4780,6 +4799,7 @@ function_arg_record_value (type, mode, slotno, named, regbase)
   parms.slotno = slotno;
   parms.named = named;
   parms.regbase = regbase;
+  parms.stack = 0;
 
   /* Compute how many registers we need.  */
   parms.nregs = 0;
@@ -4796,8 +4816,12 @@ function_arg_record_value (type, mode, slotno, named, regbase)
       intslots = (endbit - startbit) / BITS_PER_WORD;
       this_slotno = slotno + parms.intoffset / BITS_PER_WORD;
 
-      intslots = MIN (intslots, SPARC_INT_ARG_MAX - this_slotno);
-      intslots = MAX (intslots, 0);
+      if (intslots > 0 && intslots > SPARC_INT_ARG_MAX - this_slotno)
+        {
+	  intslots = MAX (0, SPARC_INT_ARG_MAX - this_slotno);
+	  /* We need to pass this field on the stack.  */
+	  parms.stack = 1;
+        }
 
       parms.nregs += intslots;
     }
@@ -4827,7 +4851,17 @@ function_arg_record_value (type, mode, slotno, named, regbase)
   if (nregs == 0)
     abort ();
 
-  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
+  parms.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (parms.stack + nregs));
+
+  /* If at least one field must be passed on the stack, generate
+     (parallel [(expr_list (nil) ...) ...]) so that all fields will
+     also be passed on the stack.  We can't do much better because the
+     semantics of FUNCTION_ARG_PARTIAL_NREGS doesn't handle the case
+     of structures for which the fields passed exclusively in registers
+     are not at the beginning of the structure.  */
+  if (parms.stack)
+    XVECEXP (parms.ret, 0, 0)
+      = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
 
   /* Fill in the entries.  */
   parms.nregs = 0;
@@ -6464,6 +6498,24 @@ print_operand (file, x, code)
       output_address (XEXP (x, 0));
       return;
 
+    case 's':
+      {
+	/* Print a sign-extended 32-bit value.  */
+	HOST_WIDE_INT i;
+	if (GET_CODE(x) == CONST_INT)
+	  i = INTVAL (x);
+	else if (GET_CODE(x) == CONST_DOUBLE)
+	  i = CONST_DOUBLE_LOW (x);
+	else
+	  {
+	    output_operand_lossage ("invalid %%s operand");
+	    return;
+	  }
+	i = trunc_int_for_mode (i, SImode);
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
+	return;
+      }
+
     case 0:
       /* Do nothing special.  */
       break;
@@ -7953,6 +8005,8 @@ sparc_check_64 (x, insn)
   return 0;
 }
 
+/* Returns assembly code to perform a DImode shift using
+   a 64-bit global or out register on SPARC-V8+.  */
 char *
 sparc_v8plus_shift (operands, insn, opcode)
      rtx *operands;
@@ -7961,8 +8015,11 @@ sparc_v8plus_shift (operands, insn, opcode)
 {
   static char asm_code[60];
 
-  if (GET_CODE (operands[3]) == SCRATCH)
+  /* The scratch register is only required when the destination
+     register is not a 64-bit global or out register.  */
+  if (which_alternative != 2)
     operands[3] = operands[0];
+
   if (GET_CODE (operands[1]) == CONST_INT)
     {
       output_asm_insn ("mov\t%1, %3", operands);
@@ -7976,6 +8033,7 @@ sparc_v8plus_shift (operands, insn, opcode)
     }
 
   strcpy(asm_code, opcode);
+
   if (which_alternative != 2)
     return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
   else
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 4dcff9105f0..7c6a7fd1d72 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -1366,7 +1366,8 @@ extern char leaf_reg_remap[];
    `K' is used for constants which can be loaded with a single sethi insn.
    `L' is used for the range of constants supported by the movcc insns.
    `M' is used for the range of constants supported by the movrcc insns.
-   `N' is like K, but for constants wider than 32 bits.  */
+   `N' is like K, but for constants wider than 32 bits.
+   `O' is used for the range which is just 4096.  */
 
 #define SPARC_SIMM10_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x200 < 0x400)
 #define SPARC_SIMM11_P(X) ((unsigned HOST_WIDE_INT) (X) + 0x400 < 0x800)
@@ -1390,6 +1391,7 @@ extern char leaf_reg_remap[];
    : (C) == 'L' ? SPARC_SIMM11_P (VALUE)		\
    : (C) == 'M' ? SPARC_SIMM10_P (VALUE)		\
    : (C) == 'N' ? SPARC_SETHI_P (VALUE)			\
+   : (C) == 'O' ? (VALUE) == 4096			\
    : 0)
 
 /* Similar, but for floating constants, and defining letters G and H.
@@ -1398,6 +1400,7 @@ extern char leaf_reg_remap[];
 #define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C)	\
   ((C) == 'G' ? fp_zero_operand (VALUE, GET_MODE (VALUE))	\
    : (C) == 'H' ? arith_double_operand (VALUE, DImode)		\
+   : (C) == 'O' ? arith_double_4096_operand (VALUE, DImode)	\
    : 0)
 
 /* Given an rtx X being reloaded into a reg required to be
@@ -2205,6 +2208,8 @@ do {									\
 
    If you change this, execute "rm explow.o recog.o reload.o".  */
 
+#define SYMBOLIC_CONST(X) symbolic_operand (X, VOIDmode)
+
 #define RTX_OK_FOR_BASE_P(X)						\
   ((GET_CODE (X) == REG && REG_OK_FOR_BASE_P (X))			\
   || (GET_CODE (X) == SUBREG						\
@@ -2238,6 +2243,8 @@ do {									\
 		   && GET_CODE (op1) != REG		\
 		   && GET_CODE (op1) != LO_SUM		\
 		   && GET_CODE (op1) != MEM		\
+		   && (! SYMBOLIC_CONST (op1)		\
+		       || MODE == Pmode)		\
 		   && (GET_CODE (op1) != CONST_INT	\
 		       || SMALL_INT (op1)))		\
 	    goto ADDR;					\
@@ -2325,6 +2332,34 @@ do {									\
   else if (GET_CODE (X) == CONST_INT && SMALL_INT (X))	\
     goto ADDR;						\
 }
+
+/* Go to LABEL if ADDR (a legitimate address expression)
+   has an effect that depends on the machine mode it is used for.
+
+   In PIC mode,
+
+      (mem:HI [%l7+a])
+
+   is not equivalent to
+   
+      (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
+
+   because [%l7+a+1] is interpreted as the address of (a+1).  */
+
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL)	\
+{							\
+  if (flag_pic == 1)					\
+    {							\
+      if (GET_CODE (ADDR) == PLUS)			\
+	{						\
+	  rtx op0 = XEXP (ADDR, 0);			\
+	  rtx op1 = XEXP (ADDR, 1);			\
+	  if (op0 == pic_offset_table_rtx		\
+	      && SYMBOLIC_CONST (op1))			\
+	    goto LABEL;					\
+	}						\
+    }							\
+}
 
 /* Try machine-dependent ways of modifying an illegitimate address
    to be legitimate.  If we find one, return the new, valid address.
@@ -2402,12 +2437,6 @@ do {                                                                    \
     }									\
   /* ??? 64-bit reloads.  */						\
 } while (0)
-
-/* Go to LABEL if ADDR (a legitimate address expression)
-   has an effect that depends on the machine mode it is used for.
-   On the SPARC this is never true.  */
-
-#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR,LABEL)
 
 /* Specify the machine mode that this machine uses
    for the index in the tablejump instruction.  */
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 4024290d436..ebe9d2b3d50 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -2062,9 +2062,9 @@
 
 (define_insn "*movdi_insn_sp32_v9"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-					"=T,o,T,U,o,r,r,r,?T,?f,?f,?o,?f,?e,?e,?W")
+					"=T,o,T,U,o,r,r,r,?T,?f,?f,?o,?e,?e,?W")
         (match_operand:DI 1 "input_operand"
-					" J,J,U,T,r,o,i,r, f, T, o, f, f, e, W, e"))]
+					" J,J,U,T,r,o,i,r, f, T, o, f, e, W, e"))]
   "! TARGET_ARCH64 && TARGET_V9
    && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)"
   "@
@@ -2080,13 +2080,12 @@
    ldd\t%1, %0
    #
    #
-   #
    fmovd\\t%1, %0
    ldd\\t%1, %0
    std\\t%1, %0"
-  [(set_attr "type" "store,store,store,load,*,*,*,*,fpstore,fpload,*,*,*,fpmove,fpload,fpstore")
-   (set_attr "length" "*,2,*,*,2,2,2,2,*,*,2,2,2,*,*,*")
-   (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,*,double,*,*")])
+  [(set_attr "type" "store,store,store,load,*,*,*,*,fpstore,fpload,*,*,fpmove,fpload,fpstore")
+   (set_attr "length" "*,2,*,*,2,2,2,2,*,*,2,2,*,*,*")
+   (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*")])
 
 (define_insn "*movdi_insn_sp32"
   [(set (match_operand:DI 0 "nonimmediate_operand"
@@ -2425,7 +2424,14 @@
 (define_split
   [(set (match_operand:DI 0 "register_operand" "")
         (match_operand:DI 1 "register_operand" ""))]
-  "! TARGET_ARCH64 && reload_completed"
+  "reload_completed
+   && (! TARGET_V9
+       || (! TARGET_ARCH64
+           && ((GET_CODE (operands[0]) == REG
+                && REGNO (operands[0]) < 32)
+               || (GET_CODE (operands[0]) == SUBREG
+                   && GET_CODE (SUBREG_REG (operands[0])) == REG
+                   && REGNO (SUBREG_REG (operands[0])) < 32))))"
   [(clobber (const_int 0))]
 {
   rtx set_dest = operands[0];
@@ -4742,13 +4748,11 @@
 ;;- arithmetic instructions
 
 (define_expand "adddi3"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
-		 (match_operand:DI 2 "arith_double_add_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (match_operand:DI 1 "register_operand" "")
+		 (match_operand:DI 2 "arith_double_add_operand" "")))]
   ""
 {
-  HOST_WIDE_INT i;
-
   if (! TARGET_ARCH64)
     {
       emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
@@ -4759,21 +4763,6 @@
 				   gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
       DONE;
     }
-  if (arith_double_4096_operand(operands[2], DImode))
-    {
-      switch (GET_CODE (operands[1]))
-	{
-	case CONST_INT: i = INTVAL (operands[1]); break;
-	case CONST_DOUBLE: i = CONST_DOUBLE_LOW (operands[1]); break;
-	default:
-	  emit_insn (gen_rtx_SET (VOIDmode, operands[0],
-				  gen_rtx_MINUS (DImode, operands[1],
-						 GEN_INT(-4096))));
-	  DONE;
-	}
-      emit_insn (gen_movdi (operands[0], GEN_INT (i + 4096)));
-      DONE;
-    }
 })
 
 (define_insn_and_split "adddi3_insn_sp32"
@@ -4939,40 +4928,24 @@
   [(set_attr "length" "2")])
 
 (define_insn "*adddi3_sp64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(plus:DI (match_operand:DI 1 "arith_double_operand" "%r")
-		 (match_operand:DI 2 "arith_double_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r")
+		 (match_operand:DI 2 "arith_double_add_operand" "rHI,O")))]
   "TARGET_ARCH64"
-  "add\t%1, %2, %0")
-
-(define_expand "addsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r,d")
-	(plus:SI (match_operand:SI 1 "arith_operand" "%r,d")
-		 (match_operand:SI 2 "arith_add_operand" "rI,d")))]
-  ""
-{
-  if (arith_4096_operand(operands[2], SImode))
-    {
-      if (GET_CODE (operands[1]) == CONST_INT)
-	emit_insn (gen_movsi (operands[0],
-			      GEN_INT (INTVAL (operands[1]) + 4096)));
-      else
-	emit_insn (gen_rtx_SET (VOIDmode, operands[0],
-				gen_rtx_MINUS (SImode, operands[1],
-					       GEN_INT(-4096))));
-      DONE;
-    }
-})
+  "@
+   add\t%1, %2, %0
+   sub\t%1, -%2, %0")
 
-(define_insn "*addsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r,d")
-	(plus:SI (match_operand:SI 1 "arith_operand" "%r,d")
-		 (match_operand:SI 2 "arith_operand" "rI,d")))]
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,d")
+	(plus:SI (match_operand:SI 1 "register_operand" "%r,r,d")
+		 (match_operand:SI 2 "arith_add_operand" "rI,O,d")))]
   ""
   "@
    add\t%1, %2, %0
+   sub\t%1, -%2, %0
    fpadd32s\t%1, %2, %0"
-  [(set_attr "type" "*,fp")])
+  [(set_attr "type" "*,*,fp")])
 
 (define_insn "*cmp_cc_plus"
   [(set (reg:CC_NOOV 100)
@@ -5015,9 +4988,9 @@
   [(set_attr "type" "compare")])
 
 (define_expand "subdi3"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(minus:DI (match_operand:DI 1 "register_operand" "r")
-		  (match_operand:DI 2 "arith_double_add_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "")
+	(minus:DI (match_operand:DI 1 "register_operand" "")
+		  (match_operand:DI 2 "arith_double_add_operand" "")))]
   ""
 {
   if (! TARGET_ARCH64)
@@ -5030,13 +5003,6 @@
 				   gen_rtx_REG (CCmode, SPARC_ICC_REG)))));
       DONE;
     }
-  if (arith_double_4096_operand(operands[2], DImode))
-    {
-      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
-			      gen_rtx_PLUS (DImode, operands[1],
-					    GEN_INT(-4096))));
-      DONE;
-    }
 })
 
 (define_insn_and_split "*subdi3_sp32"
@@ -5118,36 +5084,24 @@
   [(set_attr "length" "2")])
 
 (define_insn "*subdi3_sp64"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(minus:DI (match_operand:DI 1 "register_operand" "r")
-		  (match_operand:DI 2 "arith_double_operand" "rHI")))]
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI (match_operand:DI 1 "register_operand" "r,r")
+		  (match_operand:DI 2 "arith_double_add_operand" "rHI,O")))]
   "TARGET_ARCH64"
-  "sub\t%1, %2, %0")
-
-(define_expand "subsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r,d")
-	(minus:SI (match_operand:SI 1 "register_operand" "r,d")
-		  (match_operand:SI 2 "arith_add_operand" "rI,d")))]
-  ""
-{
-  if (arith_4096_operand(operands[2], SImode))
-    {
-      emit_insn (gen_rtx_SET (VOIDmode, operands[0],
-			      gen_rtx_PLUS (SImode, operands[1],
-					    GEN_INT(-4096))));
-      DONE;
-    }
-})
+  "@
+   sub\t%1, %2, %0
+   add\t%1, -%2, %0")
 
-(define_insn "*subsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r,d")
-	(minus:SI (match_operand:SI 1 "register_operand" "r,d")
-		  (match_operand:SI 2 "arith_operand" "rI,d")))]
+(define_insn "subsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,d")
+	(minus:SI (match_operand:SI 1 "register_operand" "r,r,d")
+		  (match_operand:SI 2 "arith_add_operand" "rI,O,d")))]
   ""
   "@
    sub\t%1, %2, %0
+   add\t%1, -%2, %0
    fpsub32s\t%1, %2, %0"
-  [(set_attr "type" "*,fp")])
+  [(set_attr "type" "*,*,fp")])
 
 (define_insn "*cmp_minus_cc"
   [(set (reg:CC_NOOV 100)
@@ -5283,9 +5237,12 @@
       if (TARGET_V8PLUS)
 	emit_insn (gen_const_mulsidi3_v8plus (operands[0], operands[1],
 					      operands[2]));
-      else
+      else if (TARGET_ARCH32)
 	emit_insn (gen_const_mulsidi3_sp32 (operands[0], operands[1],
 					    operands[2]));
+      else 
+	emit_insn (gen_const_mulsidi3_sp64 (operands[0], operands[1],
+					    operands[2]));
       DONE;
     }
   if (TARGET_V8PLUS)
@@ -5314,7 +5271,7 @@
 (define_insn "const_mulsidi3_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=h,r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
-		 (match_operand:SI 2 "small_int" "I,I")))
+		 (match_operand:DI 2 "small_int" "I,I")))
    (clobber (match_scratch:SI 3 "=X,&h"))]
   "TARGET_V8PLUS"
   "@
@@ -5355,7 +5312,7 @@
 (define_insn "const_mulsidi3_sp32"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
-		 (match_operand:SI 2 "small_int" "I")))]
+		 (match_operand:DI 2 "small_int" "I")))]
   "TARGET_HARD_MUL32"
 {
   return TARGET_SPARCLET
@@ -5372,7 +5329,7 @@
 (define_insn "const_mulsidi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
-		 (match_operand:SI 2 "small_int" "I")))]
+		 (match_operand:DI 2 "small_int" "I")))]
   "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
   "smul\t%1, %2, %0"
   [(set_attr "type" "imul")])
@@ -5444,7 +5401,7 @@
   [(set (match_operand:SI 0 "register_operand" "=h,r")
 	(truncate:SI
 	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
-			       (match_operand 2 "small_int" "i,i"))
+			       (match_operand:DI 2 "small_int" "i,i"))
 		      (match_operand:SI 3 "const_int_operand" "i,i"))))
    (clobber (match_scratch:SI 4 "=X,&h"))]
   "TARGET_V8PLUS"
@@ -5471,7 +5428,7 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(truncate:SI
 	 (lshiftrt:DI (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
-			       (match_operand:SI 2 "register_operand" "r"))
+			       (match_operand:DI 2 "small_int" "i"))
 		      (const_int 32))))]
   "TARGET_HARD_MUL32"
   "smul\t%1, %2, %%g0\n\trd\t%%y, %0"
@@ -5489,9 +5446,12 @@
       if (TARGET_V8PLUS)
 	emit_insn (gen_const_umulsidi3_v8plus (operands[0], operands[1],
 					       operands[2]));
-      else
+      else if (TARGET_ARCH32)
 	emit_insn (gen_const_umulsidi3_sp32 (operands[0], operands[1],
 					     operands[2]));
+      else 
+	emit_insn (gen_const_umulsidi3_sp64 (operands[0], operands[1],
+					     operands[2]));
       DONE;
     }
   if (TARGET_V8PLUS)
@@ -5546,12 +5506,12 @@
 (define_insn "const_umulsidi3_sp32"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
-		 (match_operand:SI 2 "uns_small_int" "")))]
+		 (match_operand:DI 2 "uns_small_int" "")))]
   "TARGET_HARD_MUL32"
 {
   return TARGET_SPARCLET
-         ? "umuld\t%1, %2, %L0"
-         : "umul\t%1, %2, %L0\n\trd\t%%y, %H0";
+         ? "umuld\t%1, %s2, %L0"
+         : "umul\t%1, %s2, %L0\n\trd\t%%y, %H0";
 }
   [(set (attr "type")
 	(if_then_else (eq_attr "isa" "sparclet")
@@ -5563,21 +5523,21 @@
 (define_insn "const_umulsidi3_sp64"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
-		 (match_operand:SI 2 "uns_small_int" "")))]
+		 (match_operand:DI 2 "uns_small_int" "")))]
   "TARGET_DEPRECATED_V8_INSNS && TARGET_ARCH64"
-  "umul\t%1, %2, %0"
+  "umul\t%1, %s2, %0"
   [(set_attr "type" "imul")])
 
 ;; XXX
 (define_insn "const_umulsidi3_v8plus"
   [(set (match_operand:DI 0 "register_operand" "=h,r")
 	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
-		 (match_operand:SI 2 "uns_small_int" "")))
+		 (match_operand:DI 2 "uns_small_int" "")))
    (clobber (match_scratch:SI 3 "=X,h"))]
   "TARGET_V8PLUS"
   "@
-   umul\t%1, %2, %L0\n\tsrlx\t%L0, 32, %H0
-   umul\t%1, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
+   umul\t%1, %s2, %L0\n\tsrlx\t%L0, 32, %H0
+   umul\t%1, %s2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0"
   [(set_attr "type" "multi")
    (set_attr "length" "2,3")])
 
@@ -5630,13 +5590,13 @@
   [(set (match_operand:SI 0 "register_operand" "=h,r")
 	(truncate:SI
 	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r,r"))
-			       (match_operand:SI 2 "uns_small_int" ""))
+			       (match_operand:DI 2 "uns_small_int" ""))
 		      (match_operand:SI 3 "const_int_operand" "i,i"))))
    (clobber (match_scratch:SI 4 "=X,h"))]
   "TARGET_V8PLUS"
   "@
-   umul\t%1, %2, %0\n\tsrlx\t%0, %3, %0
-   umul\t%1, %2, %4\n\tsrlx\t%4, %3, %0"
+   umul\t%1, %s2, %0\n\tsrlx\t%0, %3, %0
+   umul\t%1, %s2, %4\n\tsrlx\t%4, %3, %0"
   [(set_attr "type" "multi")
    (set_attr "length" "2")])
 
@@ -5657,10 +5617,10 @@
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(truncate:SI
 	 (lshiftrt:DI (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "r"))
-			       (match_operand:SI 2 "uns_small_int" ""))
+			       (match_operand:DI 2 "uns_small_int" ""))
 		      (const_int 32))))]
   "TARGET_HARD_MUL32"
-  "umul\t%1, %2, %%g0\n\trd\t%%y, %0"
+  "umul\t%1, %s2, %%g0\n\trd\t%%y, %0"
   [(set_attr "type" "multi")
    (set_attr "length" "2")])
 
@@ -7246,7 +7206,7 @@
 	  == INSN_ADDRESSES (INSN_UID (insn))))
     return "b\t%l0%#";
   else
-    return TARGET_V9 ? "ba,pt%*\t%%xcc, %l0%(" : "b%*\t%l0%(";
+    return TARGET_V9 ? "ba%*,pt\t%%xcc, %l0%(" : "b%*\t%l0%(";
 }
   [(set_attr "type" "uncond_branch")])
 
@@ -7949,7 +7909,7 @@
    && mems_ok_for_ldd_peep (operands[0], operands[1], NULL_RTX)"
   [(set (match_dup 0)
        (const_int 0))]
-  "operands[0] = change_address (operands[0], DImode, NULL);")
+  "operands[0] = widen_memory_access (operands[0], DImode, 0);")
 
 (define_peephole2
   [(set (match_operand:SI 0 "memory_operand" "")
@@ -7960,7 +7920,7 @@
    && mems_ok_for_ldd_peep (operands[1], operands[0], NULL_RTX)"
   [(set (match_dup 1)
        (const_int 0))]
-  "operands[1] = change_address (operands[1], DImode, NULL);")
+  "operands[1] = widen_memory_access (operands[1], DImode, 0);")
 
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand" "")
@@ -7971,7 +7931,7 @@
    && mems_ok_for_ldd_peep (operands[1], operands[3], operands[0])" 
   [(set (match_dup 0)
 	(match_dup 1))]
-  "operands[1] = change_address (operands[1], DImode, NULL);
+  "operands[1] = widen_memory_access (operands[1], DImode, 0);
    operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));")
 
 (define_peephole2
@@ -7983,7 +7943,7 @@
    && mems_ok_for_ldd_peep (operands[0], operands[2], NULL_RTX)"
   [(set (match_dup 0)
 	(match_dup 1))]
-  "operands[0] = change_address (operands[0], DImode, NULL);
+  "operands[0] = widen_memory_access (operands[0], DImode, 0);
    operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));")
 
 (define_peephole2
@@ -7995,7 +7955,7 @@
    && mems_ok_for_ldd_peep (operands[1], operands[3], operands[0])"
   [(set (match_dup 0)
 	(match_dup 1))]
-  "operands[1] = change_address (operands[1], DFmode, NULL);
+  "operands[1] = widen_memory_access (operands[1], DFmode, 0);
    operands[0] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
 
 (define_peephole2
@@ -8007,7 +7967,7 @@
   && mems_ok_for_ldd_peep (operands[0], operands[2], NULL_RTX)"
   [(set (match_dup 0)
 	(match_dup 1))]
-  "operands[0] = change_address (operands[0], DFmode, NULL);
+  "operands[0] = widen_memory_access (operands[0], DFmode, 0);
    operands[1] = gen_rtx_REG (DFmode, REGNO (operands[1]));")
 
 (define_peephole2
@@ -8019,7 +7979,7 @@
   && mems_ok_for_ldd_peep (operands[3], operands[1], operands[0])"
   [(set (match_dup 2)
 	(match_dup 3))]
-   "operands[3] = change_address (operands[3], DImode, NULL);
+   "operands[3] = widen_memory_access (operands[3], DImode, 0);
     operands[2] = gen_rtx_REG (DImode, REGNO (operands[2]));")
 
 (define_peephole2
@@ -8031,7 +7991,7 @@
   && mems_ok_for_ldd_peep (operands[2], operands[0], NULL_RTX)" 
   [(set (match_dup 2)
 	(match_dup 3))]
-  "operands[2] = change_address (operands[2], DImode, NULL);
+  "operands[2] = widen_memory_access (operands[2], DImode, 0);
    operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
    ")
  
@@ -8044,7 +8004,7 @@
   && mems_ok_for_ldd_peep (operands[3], operands[1], operands[0])"
   [(set (match_dup 2)
 	(match_dup 3))]
-  "operands[3] = change_address (operands[3], DFmode, NULL);
+  "operands[3] = widen_memory_access (operands[3], DFmode, 0);
    operands[2] = gen_rtx_REG (DFmode, REGNO (operands[2]));")
 
 (define_peephole2
@@ -8056,7 +8016,7 @@
   && mems_ok_for_ldd_peep (operands[2], operands[0], NULL_RTX)"
   [(set (match_dup 2)
 	(match_dup 3))]
-  "operands[2] = change_address (operands[2], DFmode, NULL);
+  "operands[2] = widen_memory_access (operands[2], DFmode, 0);
    operands[3] = gen_rtx_REG (DFmode, REGNO (operands[3]));")
  
 ;; Optimize the case of following a reg-reg move with a test
diff --git a/gcc/config/t-darwin b/gcc/config/t-darwin
index aca59ba4f30..7fe5e9397e2 100644
--- a/gcc/config/t-darwin
+++ b/gcc/config/t-darwin
@@ -18,5 +18,5 @@ $(T)crt2$(objext): $(srcdir)/config/darwin-crt2.c $(GCC_PASSES) \
 
 # Use unwind-dw2-fde-darwin
 LIB2ADDEH = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde-darwin.c \
-  $(srcdir)/unwind-sjlj.c
+  $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
 LIB2ADDEHDEP = unwind.inc unwind-dw2-fde.h unwind-dw2-fde.c
diff --git a/gcc/config/t-linux b/gcc/config/t-linux
index 3c73561fd26..61bce29c245 100644
--- a/gcc/config/t-linux
+++ b/gcc/config/t-linux
@@ -12,5 +12,5 @@ SHLIB_MAPFILES += $(srcdir)/config/libgcc-glibc.ver
 
 # Use unwind-dw2-fde-glibc
 LIB2ADDEH = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde-glibc.c \
-  $(srcdir)/unwind-sjlj.c
+  $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
 LIB2ADDEHDEP = unwind.inc unwind-dw2-fde.h unwind-dw2-fde.c
diff --git a/gcc/config/t-linux-gnulibc1 b/gcc/config/t-linux-gnulibc1
index 56a471579ed..52effd5ca2f 100644
--- a/gcc/config/t-linux-gnulibc1
+++ b/gcc/config/t-linux-gnulibc1
@@ -3,5 +3,5 @@ T_CFLAGS = -DUSE_GNULIBC_1
 
 # Use unwind-dw2-fde
 LIB2ADDEH = $(srcdir)/unwind-dw2.c $(srcdir)/unwind-dw2-fde.c \
-  $(srcdir)/unwind-sjlj.c
+  $(srcdir)/unwind-sjlj.c $(srcdir)/unwind-c.c
 LIB2ADDEHDEP = unwind.inc unwind-dw2-fde.h
diff --git a/gcc/config/t-netbsd b/gcc/config/t-netbsd
index 919543ad427..fa2a4886320 100644
--- a/gcc/config/t-netbsd
+++ b/gcc/config/t-netbsd
@@ -1,9 +1,5 @@
 # Don't run fixproto
 STMP_FIXPROTO =
 
-# We don't want the base GCC user headers, but we do want
-# any extras a target might specify.
-USER_H = $(EXTRA_HEADERS)
-
 # Always build crtstuff with PIC.
 CRTSTUFF_T_CFLAGS = -fPIC
diff --git a/gcc/config/t-slibgcc-elf-ver b/gcc/config/t-slibgcc-elf-ver
index c02ff9d6b7c..a176b10fa0a 100644
--- a/gcc/config/t-slibgcc-elf-ver
+++ b/gcc/config/t-slibgcc-elf-ver
@@ -19,11 +19,11 @@ SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
 # $(slibdir) double quoted to protect it from expansion while building
 # libgcc.mk.  We want this delayed until actual install time.
 SHLIB_INSTALL = \
-	$$(SHELL) $$(srcdir)/mkinstalldirs $$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+	$$(SHELL) $$(srcdir)/mkinstalldirs $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
 	$(INSTALL_DATA) $(SHLIB_NAME) \
-	  $$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
-	rm -f $$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
 	$(LN_S) $(SHLIB_SONAME) \
-	  $$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
 SHLIB_MKMAP = $(srcdir)/mkmap-symver.awk
 SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver
diff --git a/gcc/config/t-slibgcc-sld b/gcc/config/t-slibgcc-sld
index c11a5721966..6bdd521da1b 100644
--- a/gcc/config/t-slibgcc-sld
+++ b/gcc/config/t-slibgcc-sld
@@ -17,11 +17,11 @@ SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
 # $(slibdir) double quoted to protect it from expansion while building
 # libgcc.mk.  We want this delayed until actual install time.
 SHLIB_INSTALL = \
-	$$(SHELL) $$(srcdir)/mkinstalldirs $$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+	$$(SHELL) $$(srcdir)/mkinstalldirs $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
 	$(INSTALL_DATA) $(SHLIB_NAME) \
-	  $$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
-	rm -f $$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
 	$(LN_S) $(SHLIB_SONAME) \
-	  $$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
 SHLIB_MKMAP = $(srcdir)/mkmap-symver.awk
 SHLIB_MAPFILES = $(srcdir)/libgcc-std.ver
diff --git a/gcc/config/v850/v850.h b/gcc/config/v850/v850.h
index b04b59ef690..551c3843af1 100644
--- a/gcc/config/v850/v850.h
+++ b/gcc/config/v850/v850.h
@@ -186,7 +186,7 @@ extern int target_flags;
    { "no-app-regs",              MASK_NO_APP_REGS, 			\
        				N_("Do not use registers r2 and r5") }, \
    { "strict-align",             MASK_STRICT_ALIGN,			\
-				N_("Enfore strict alignment") },        \
+				N_("Enforce strict alignment") },       \
    { "no-strict-align",         -MASK_STRICT_ALIGN, "" },		\
    { "big-switch",		 MASK_BIG_SWITCH, 			\
        				N_("Use 4 byte entries in switch tables") },\
diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h
index 8f10577312e..1930bac0b53 100644
--- a/gcc/config/xtensa/linux.h
+++ b/gcc/config/xtensa/linux.h
@@ -1,6 +1,6 @@
 /* Xtensa Linux configuration.
    Derived from the configuration for GCC for Intel i386 running Linux.
-   Copyright (C) 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -26,11 +26,11 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
     builtin_define ("__ELF__");					\
     builtin_define ("__gnu_linux__");				\
     builtin_assert ("system=posix");				\
-    /* The GNU C++ standard library requires this.  */		\
-    if (c_language == clk_cplusplus)				\
-      builtin_define ("_GNU_SOURCE");				\
   } while (0)
 
+#undef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
 #undef TARGET_VERSION
 #define TARGET_VERSION fputs (" (Xtensa GNU/Linux with ELF)", stderr);
 
@@ -45,12 +45,6 @@ Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 
 #undef ASM_FINAL_SPEC
 
-#undef LIB_SPEC
-#define LIB_SPEC \
-  "%{shared: -lc} \
-   %{!shared: %{pthread:-lpthread} \
-   %{profile:-lc_p} %{!profile: -lc}}"
-
 #undef LINK_SPEC
 #define LINK_SPEC \
  "%{shared:-shared} \
diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
index 195d71da642..47befc04be9 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes of target machine for GNU compiler for Xtensa.
-   Copyright 2001,2002 Free Software Foundation, Inc.
+   Copyright 2001,2002,2003 Free Software Foundation, Inc.
    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
 
 This file is part of GCC.
@@ -109,8 +109,6 @@ extern int xtensa_mem_offset PARAMS ((unsigned, enum machine_mode));
 extern void xtensa_setup_frame_addresses PARAMS ((void));
 extern int xtensa_dbx_register_number PARAMS ((int));
 extern void override_options PARAMS ((void));
-extern void xtensa_declare_object
-  PARAMS ((FILE *, char *, char *, char *, int));
 extern long compute_frame_size PARAMS ((int));
 extern int xtensa_frame_pointer_required PARAMS ((void));
 extern void xtensa_function_prologue PARAMS ((FILE *, int));
diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c
index babb5b06a51..d72a3e94cc2 100644
--- a/gcc/config/xtensa/xtensa.c
+++ b/gcc/config/xtensa/xtensa.c
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.c for Tensilica's Xtensa architecture.
-   Copyright 2001,2002 Free Software Foundation, Inc.
+   Copyright 2001,2002,2003 Free Software Foundation, Inc.
    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
 
 This file is part of GCC.
@@ -2082,22 +2082,6 @@ print_operand_address (file, addr)
 }
 
 
-/* Emit either a label, .comm, or .lcomm directive. */
-
-void
-xtensa_declare_object (file, name, init_string, final_string, size)
-     FILE *file;
-     char *name;
-     char *init_string;
-     char *final_string;
-     int size;
-{
-  fputs (init_string, file);		/* "", "\t.comm\t", or "\t.lcomm\t" */
-  assemble_name (file, name);
-  fprintf (file, final_string, size);	/* ":\n", ",%u\n", ",%u\n" */
-}
-
-
 void
 xtensa_output_literal (file, x, mode, labelno)
      FILE *file;
@@ -2234,8 +2218,9 @@ xtensa_reorg (first)
 	continue;
 
       pat = PATTERN (insn);
-      if (GET_CODE (pat) == UNSPEC_VOLATILE
-	  && (XINT (pat, 1) == UNSPECV_SET_FP))
+      if (GET_CODE (pat) == SET
+	  && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
+	  && (XINT (SET_SRC (pat), 1) == UNSPECV_SET_FP))
 	{
 	  set_frame_ptr_insn = insn;
 	  break;
@@ -2718,6 +2703,10 @@ order_regs_for_local_alloc ()
       for (i = 0; i < num_arg_regs; i++)
 	reg_alloc_order[nxt++] = GP_ARG_FIRST + i;
 
+      /* list the coprocessor registers in order */
+      for (i = 0; i < BR_REG_NUM; i++)
+	reg_alloc_order[nxt++] = BR_REG_FIRST + i;
+
       /* list the FP registers in order for now */
       for (i = 0; i < 16; i++)
 	reg_alloc_order[nxt++] = FP_REG_FIRST + i;
@@ -2728,10 +2717,6 @@ order_regs_for_local_alloc ()
       reg_alloc_order[nxt++] = 16;	/* pseudo frame pointer */
       reg_alloc_order[nxt++] = 17;	/* pseudo arg pointer */
 
-      /* list the coprocessor registers in order */
-      for (i = 0; i < BR_REG_NUM; i++)
-	reg_alloc_order[nxt++] = BR_REG_FIRST + i;
-
       reg_alloc_order[nxt++] = ACC_REG_FIRST;	/* MAC16 accumulator */
     }
 }
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index c495ef11af2..3a5a7343a01 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -1,5 +1,5 @@
 /* Definitions of Tensilica's Xtensa target machine for GNU compiler.
-   Copyright 2001,2002 Free Software Foundation, Inc.
+   Copyright 2001,2002,2003 Free Software Foundation, Inc.
    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
 
 This file is part of GCC.
@@ -208,6 +208,15 @@ extern unsigned xtensa_current_frame_size;
       }									\
   } while (0)
 
+#define CPP_SPEC " %(subtarget_cpp_spec) "
+
+#ifndef SUBTARGET_CPP_SPEC
+#define SUBTARGET_CPP_SPEC ""
+#endif
+
+#define EXTRA_SPECS							\
+  { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC },
+
 /* Define this to set the endianness to use in libgcc2.c, which can
    not depend on target_flags.  */
 #define LIBGCC2_WORDS_BIG_ENDIAN XCHAL_HAVE_BE
@@ -345,7 +354,6 @@ extern unsigned xtensa_current_frame_size;
    0 - 15	AR[0] - AR[15]
    16		FRAME_POINTER (fake = initial sp)
    17		ARG_POINTER (fake = initial sp + framesize)
-   18           LOOP_COUNT (loop count special register)
    18		BR[0] for floating-point CC
    19 - 34	FR[0] - FR[15]
    35		MAC16 accumulator */
@@ -394,10 +402,11 @@ extern unsigned xtensa_current_frame_size;
    have been exhausted.  */
 
 #define REG_ALLOC_ORDER \
-{  8,  9, 10, 11, 12, 13, 14, 15,  7,  6,  5,  4,  3,  2, 19, \
-  20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, \
+{  8,  9, 10, 11, 12, 13, 14, 15,  7,  6,  5,  4,  3,  2, \
+  18, \
+  19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, \
    0,  1, 16, 17, \
-  36, \
+  35, \
 }
 
 #define ORDER_REGS_FOR_LOCAL_ALLOC order_regs_for_local_alloc ()
@@ -424,11 +433,6 @@ extern int leaf_function;
 #define GP_REG_LAST  17
 #define GP_REG_NUM   (GP_REG_LAST - GP_REG_FIRST + 1)
 
-/* Special registers */
-#define SPEC_REG_FIRST 18
-#define SPEC_REG_LAST  18
-#define SPEC_REG_NUM   (SPEC_REG_LAST - SPEC_REG_FIRST + 1)
-
 /* Coprocessor registers */
 #define BR_REG_FIRST 18
 #define BR_REG_LAST  18 
@@ -473,9 +477,6 @@ extern char xtensa_hard_regno_mode_ok[][FIRST_PSEUDO_REGISTER];
    == (GET_MODE_CLASS (MODE2) == MODE_FLOAT ||				\
        GET_MODE_CLASS (MODE2) == MODE_COMPLEX_FLOAT))
 
-/* Register to use for LCOUNT special register.  */
-#define COUNT_REGISTER_REGNUM (SPEC_REG_FIRST + 0)
-
 /* Register to use for pushing function arguments.  */
 #define STACK_POINTER_REGNUM (GP_REG_FIRST + 1)
 
@@ -1539,14 +1540,9 @@ typedef struct xtensa_args {
 /* Globalizing directive for a label.  */
 #define GLOBAL_ASM_OP "\t.global\t"
 
-/* This says how to define a global common symbol.  */
-#define ASM_OUTPUT_COMMON(STREAM, NAME, SIZE, ROUNDED)			\
-  xtensa_declare_object (STREAM, NAME, "\n\t.comm\t", ",%u\n", (SIZE))
-
-/* This says how to define a local common symbol (ie, not visible to
-   linker).  */
-#define ASM_OUTPUT_LOCAL(STREAM, NAME, SIZE, ROUNDED)			\
-  xtensa_declare_object (STREAM, NAME, "\n\t.lcomm\t", ",%u\n", (SIZE))
+/* Declare an uninitialized external linkage data object.  */
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
+  asm_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
 
 /* This is how to output an element of a case-vector that is absolute.  */
 #define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)				\
@@ -1592,8 +1588,9 @@ typedef struct xtensa_args {
 
 
 /* Define the strings to put out for each section in the object file.  */
-#define TEXT_SECTION_ASM_OP	"\t.text"  	/* instructions */
-#define DATA_SECTION_ASM_OP	"\t.data" 	/* large data */
+#define TEXT_SECTION_ASM_OP	"\t.text"
+#define DATA_SECTION_ASM_OP	"\t.data"
+#define BSS_SECTION_ASM_OP	"\t.section\t.bss"
 
 
 /* Define output to appear before the constant pool.  If the function
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index 5db5c0ca487..cc722b932a9 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -97,6 +97,7 @@
   ""
   "
 {
+  rtx srclo;
   rtx dstlo = gen_lowpart (SImode, operands[0]);
   rtx src1lo = gen_lowpart (SImode, operands[1]);
   rtx src2lo = gen_lowpart (SImode, operands[2]);
@@ -105,9 +106,21 @@
   rtx src1hi = gen_highpart (SImode, operands[1]);
   rtx src2hi = gen_highpart (SImode, operands[2]);
 
+  /* Either source can be used for overflow checking, as long as it's
+     not clobbered by the first addition.  */
+  if (!rtx_equal_p (dstlo, src1lo))
+    srclo = src1lo;
+  else if (!rtx_equal_p (dstlo, src2lo))
+    srclo = src2lo;
+  else
+    {
+      srclo = gen_reg_rtx (SImode);
+      emit_move_insn (srclo, src1lo);
+    }
+
   emit_insn (gen_addsi3 (dstlo, src1lo, src2lo));
   emit_insn (gen_addsi3 (dsthi, src1hi, src2hi));
-  emit_insn (gen_adddi_carry (dsthi, dstlo, src2lo));
+  emit_insn (gen_adddi_carry (dsthi, dstlo, srclo));
   DONE;
 }")
 
@@ -209,9 +222,9 @@
   rtx src1hi = gen_highpart (SImode, operands[1]);
   rtx src2hi = gen_highpart (SImode, operands[2]);
 
-  emit_insn (gen_subsi3 (dstlo, src1lo, src2lo));
   emit_insn (gen_subsi3 (dsthi, src1hi, src2hi));
   emit_insn (gen_subdi_carry (dsthi, src1lo, src2lo));
+  emit_insn (gen_subsi3 (dstlo, src1lo, src2lo));
   DONE;
 }")
 
@@ -2387,7 +2400,7 @@
 ;; to set up the frame pointer.
 
 (define_insn "set_frame_ptr"
-  [(unspec_volatile [(const_int 0)] UNSPECV_SET_FP)]
+  [(set (reg:SI A7_REG) (unspec_volatile [(const_int 0)] UNSPECV_SET_FP))]
   ""
   "*
 {
@@ -2401,7 +2414,7 @@
 
 ;; Post-reload splitter to remove fp assignment when it's not needed.
 (define_split
-  [(unspec_volatile [(const_int 0)] UNSPECV_SET_FP)]
+  [(set (reg:SI A7_REG) (unspec_volatile [(const_int 0)] UNSPECV_SET_FP))]
   "reload_completed && !frame_pointer_needed"
   [(unspec [(const_int 0)] UNSPEC_NOP)]
   "")