113 files changed, 4367 insertions, 370 deletions
diff --git a/boehm-gc/ChangeLog b/boehm-gc/ChangeLog
index bcf4a42ce78..a4e404842a7 100644
--- a/boehm-gc/ChangeLog
+++ b/boehm-gc/ChangeLog
@@ -1,3 +1,8 @@
+2007-06-22  Jakub Jelinek  <jakub@redhat.com>
+
+	* pthread_support.c (GC_get_thread_stack_base): Handle
+	pthread_getattr_np failures.
+
 2007-05-18  Keith Seitz  <keiths@redhat.com>
 
 	Merged from trunk:
diff --git a/boehm-gc/pthread_support.c b/boehm-gc/pthread_support.c
index bbda8522c03..f0a58c886a3 100644
--- a/boehm-gc/pthread_support.c
+++ b/boehm-gc/pthread_support.c
@@ -1135,7 +1135,13 @@ GC_PTR GC_get_thread_stack_base()
   size_t stack_size;
   
   my_pthread = pthread_self();  
-  pthread_getattr_np (my_pthread, &attr);
+  if (pthread_getattr_np (my_pthread, &attr) != 0)
+    {
+#   ifdef DEBUG_THREADS
+      GC_printf1("Can not determine stack base for attached thread");
+#   endif
+      return 0;
+    }
   pthread_attr_getstack (&attr, (void **) &stack_addr, &stack_size);
   pthread_attr_destroy (&attr);
   
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c4770506756..54c2b9b4526 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,333 @@
+2006-10-18  Jan Hubicka  <jh@suse.cz>
+
+	PR middle-end/29299
+	* cfgexpand.c (expand_used_vars_for_block): Vars marked used by user
+	are used.
+
+2006-10-15  Jan Hubicka  <jh@suse.cz>
+            Richard Guenther  <rguenther@suse.de>
+
+	PR middle-end/29299
+	* cgraphunit.c (cgraph_finalize_compilation_unit): Call
+	process_function_and_variable_attributes on all variables, including
+	those discovered during cgraph construction phase.
+
+2006-10-23  Jakub Jelinek  <jakub@redhat.com>
+
+	* builtins.c (expand_builtin, maybe_emit_chk_warning): Handle
+	BUILT_IN_STRNCAT_CHK.
+
+2006-09-16  Andrew Pinski  <pinskia@physics.uc.edu>
+
+	PR tree-opt/29059
+	* tree-ssa-propagate.c (set_rhs): Reject MODIFY_EXPR right
+	away for the expr argument.
+
+2006-10-10  Jakub Jelinek  <jakub@redhat.com>
+
+	PR middle-end/29272
+	* builtins.c (fold_builtin_memset, fold_builtin_memory_op): Restrict
+	single entry optimization to variables and components thereof.
+
+2006-09-02  Jakub Jelinek  <jakub@redhat.com>
+
+	PR middle-end/27567
+	* builtins.c (fold_builtin_memcpy, fold_builtin_memmove): Remove.
+	(fold_builtin_mempcpy): Rename to...
+	(fold_builtin_memory_op): ... this.  Optimize one element copy
+	into an assignment.
+	(fold_builtin_memset, fold_builtin_bzero, fold_builtin_bcopy): New
+	functions.
+	(expand_builtin_strcpy, expand_builtin_strncpy): Handle COMPOUND_EXPR.
+	(expand_builtin_memcpy, expand_builtin_mempcpy,
+	expand_builtin_memmove): Likewise.  Use fold_builtin_memory_op.
+	(fold_builtin_1): Handle BUILT_IN_MEMSET, BUILT_IN_BZERO and
+	BUILT_IN_BCOPY.  Use fold_builtin_memory_op for
+	BUILT_IN_MEM{CPY,PCPY,MOVE}.
+
+2007-02-08  Harsha Jagasia  <harsha.jagasia@amd.com>
+
+	* config/i386/xmmintrin.h: Make inclusion of emmintrin.h
+	conditional to __SSE2__.
+	* config/i386/emmintrin.h: Generate #error if __SSE2__ is not
+	defined.
+	* config/i386/pmmintrin.h: Generate #error if __SSE3__ is not
+	defined.
+	* config/i386/tmmintrin.h: Generate #error if __SSSE3__ is not
+	defined.
+
+2007-02-05  Harsha Jagasia  <harsha.jagasia@amd.com>
+
+	* config/i386/athlon.md (athlon_fldxf_k8, athlon_fld_k8,
+	athlon_fstxf_k8, athlon_fst_k8, athlon_fist, athlon_fmov,
+	athlon_fadd_load, athlon_fadd_load_k8, athlon_fadd, athlon_fmul,
+	athlon_fmul_load, athlon_fmul_load_k8, athlon_fsgn,
+	athlon_fdiv_load, athlon_fdiv_load_k8, athlon_fdiv_k8,
+	athlon_fpspc_load, athlon_fpspc, athlon_fcmov_load,
+	athlon_fcmov_load_k8, athlon_fcmov_k8, athlon_fcomi_load_k8,
+	athlon_fcomi, athlon_fcom_load_k8, athlon_fcom): Added amdfam10.
+
+	* config/i386/i386.md (x86_sahf_1, cmpfp_i_mixed, cmpfp_i_sse,
+	cmpfp_i_i387, cmpfp_iu_mixed, cmpfp_iu_sse, cmpfp_iu_387,
+	swapsi, swaphi_1, swapqi_1, swapdi_rex64, fix_truncsfdi_sse,
+	fix_truncdfdi_sse, fix_truncsfsi_sse, fix_truncdfsi_sse,
+	x86_fldcw_1, floatsisf2_mixed, floatsisf2_sse, floatdisf2_mixed,
+	floatdisf2_sse, floatsidf2_mixed, floatsidf2_sse,
+	floatdidf2_mixed, floatdidf2_sse, muldi3_1_rex64, mulsi3_1,
+	mulsi3_1_zext, mulhi3_1, mulqi3_1, umulqihi3_1, mulqihi3_insn,
+	umulditi3_insn, umulsidi3_insn, mulditi3_insn, mulsidi3_insn,
+	umuldi3_highpart_rex64, umulsi3_highpart_insn,
+	umulsi3_highpart_zext, smuldi3_highpart_rex64,
+	smulsi3_highpart_insn, smulsi3_highpart_zext, x86_64_shld,
+	x86_shld_1, x86_64_shrd, sqrtsf2_mixed, sqrtsf2_sse,
+	sqrtsf2_i387, sqrtdf2_mixed, sqrtdf2_sse, sqrtdf2_i387,
+	sqrtextendsfdf2_i387, sqrtxf2, sqrtextendsfxf2_i387,
+	sqrtextenddfxf2_i387): Added amdfam10_decode.
+	
+	* config/i386/athlon.md (athlon_idirect_amdfam10,
+	athlon_ivector_amdfam10, athlon_idirect_load_amdfam10,
+	athlon_ivector_load_amdfam10, athlon_idirect_both_amdfam10,
+	athlon_ivector_both_amdfam10, athlon_idirect_store_amdfam10,
+	athlon_ivector_store_amdfam10): New define_insn_reservation.
+	(athlon_idirect_loadmov, athlon_idirect_movstore): Added
+	amdfam10.
+
+	* config/i386/athlon.md (athlon_call_amdfam10,
+	athlon_pop_amdfam10, athlon_lea_amdfam10): New
+	define_insn_reservation.
+	(athlon_branch, athlon_push, athlon_leave_k8, athlon_imul_k8,
+	athlon_imul_k8_DI, athlon_imul_mem_k8, athlon_imul_mem_k8_DI,
+	athlon_idiv, athlon_idiv_mem, athlon_str): Added amdfam10.
+
+	* config/i386/athlon.md (athlon_sseld_amdfam10,
+	athlon_mmxld_amdfam10, athlon_ssest_amdfam10,
+	athlon_mmxssest_short_amdfam10): New define_insn_reservation.
+
+	* config/i386/athlon.md (athlon_sseins_amdfam10): New
+	define_insn_reservation.
+	* config/i386/i386.md (sseins): Added sseins to define_attr type
+	and define_attr unit.
+	* config/i386/sse.md: Set type attribute to sseins for insertq
+	and insertqi.
+
+	* config/i386/athlon.md (sselog_load_amdfam10, sselog_amdfam10,
+	ssecmpvector_load_amdfam10, ssecmpvector_amdfam10,
+	ssecomi_load_amdfam10, ssecomi_amdfam10,
+	sseaddvector_load_amdfam10, sseaddvector_amdfam10): New
+	define_insn_reservation.
+	(ssecmp_load_k8, ssecmp, sseadd_load_k8, seadd): Added amdfam10.
+
+	* config/i386/athlon.md (cvtss2sd_load_amdfam10,
+	cvtss2sd_amdfam10, cvtps2pd_load_amdfam10, cvtps2pd_amdfam10,
+	cvtsi2sd_load_amdfam10, cvtsi2ss_load_amdfam10,
+	cvtsi2sd_amdfam10, cvtsi2ss_amdfam10, cvtsd2ss_load_amdfam10,
+	cvtsd2ss_amdfam10, cvtpd2ps_load_amdfam10, cvtpd2ps_amdfam10,
+	cvtsX2si_load_amdfam10, cvtsX2si_amdfam10): New 
+	define_insn_reservation.
+
+	* config/i386/sse.md (cvtsi2ss, cvtsi2ssq, cvtss2si,
+	cvtss2siq, cvttss2si, cvttss2siq, cvtsi2sd, cvtsi2sdq,
+	cvtsd2si, cvtsd2siq, cvttsd2si, cvttsd2siq,
+	cvtpd2dq, cvttpd2dq, cvtsd2ss, cvtss2sd,
+	cvtpd2ps, cvtps2pd): Added amdfam10_decode attribute.
+
+	* config/i386/athlon.md (athlon_ssedivvector_amdfam10,
+	athlon_ssedivvector_load_amdfam10, athlon_ssemulvector_amdfam10,
+	athlon_ssemulvector_load_amdfam10): New define_insn_reservation.
+	(athlon_ssediv, athlon_ssediv_load_k8, athlon_ssemul,
+	athlon_ssemul_load_k8): Added amdfam10.
+
+	* config/i386/i386.h (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL): New macro.
+	(x86_sse_unaligned_move_optimal): New variable.
+	
+	* config/i386/i386.c (x86_sse_unaligned_move_optimal): Enable for  
+	m_AMDFAM10.
+	(ix86_expand_vector_move_misalign): Add code to generate movupd/movups
+	for unaligned vector SSE double/single precision loads for AMDFAM10.
+
+	* config/i386/i386.h (TARGET_AMDFAM10): New macro.
+	(TARGET_CPU_CPP_BUILTINS): Add code for amdfam10.
+	Define TARGET_CPU_DEFAULT_amdfam10.
+	(TARGET_CPU_DEFAULT_NAMES): Add amdfam10.
+	(processor_type): Add PROCESSOR_AMDFAM10.	
+	
+	* config/i386/i386.md: Add amdfam10 as a new cpu attribute to match
+	processor_type in config/i386/i386.h.
+	Enable imul peepholes for TARGET_AMDFAM10.
+	
+	* config.gcc: Add support for --with-cpu option for amdfam10.
+	
+	* config/i386/i386.c (amdfam10_cost): New variable.
+	(m_AMDFAM10): New macro.
+	(m_ATHLON_K8_AMDFAM10): New macro.
+	(x86_use_leave, x86_push_memory, x86_movx, x86_unroll_strlen,
+	x86_cmove, x86_3dnow_a, x86_deep_branch, x86_use_simode_fiop,
+	x86_promote_QImode, x86_integer_DFmode_moves,
+	x86_partial_reg_dependency, x86_memory_mismatch_stall, 
+	x86_accumulate_outgoing_args, x86_arch_always_fancy_math_387,
+	x86_sse_partial_reg_dependency, x86_sse_typeless_stores,
+	x86_use_ffreep, x86_use_incdec, x86_four_jump_limit,
+	x86_schedule, x86_use_bt, x86_cmpxchg16b, x86_pad_returns):
+	Enable/disable for amdfam10.
+	(override_options): Add amdfam10_cost to processor_target_table.
+	Set up PROCESSOR_AMDFAM10 for amdfam10 entry in 
+	processor_alias_table.
+	(ix86_issue_rate): Add PROCESSOR_AMDFAM10.
+	(ix86_adjust_cost): Add code for amdfam10.
+
+	* config/i386/i386.opt: Add new Advanced Bit Manipulation (-mabm)
+	instruction set feature flag. Add new (-mpopcnt) flag for popcnt 
+	instruction. Add new SSE4A (-msse4a) instruction set feature flag.
+	* config/i386/i386.h: Add builtin definition for SSE4A.
+	* config/i386/i386.md: Add support for ABM instructions 
+	(popcnt and lzcnt).
+	* config/i386/sse.md: Add support for SSE4A instructions
+	(movntss, movntsd, extrq, insertq).
+	* config/i386/i386.c: Add support for ABM and SSE4A builtins.
+	Add -march=amdfam10 flag.
+	* config/i386/ammintrin.h: Add support for SSE4A intrinsics.
+	* doc/invoke.texi: Add documentation on flags for sse4a, abm, popcnt
+	and amdfam10.
+	* doc/extend.texi: Add documentation for SSE4A builtins.
+
+2007-06-21  Jakub Jelinek  <jakub@redhat.com>
+
+	PR middle-end/32362
+	* omp-low.c (lookup_decl_in_outer_ctx): Don't ICE if t is NULL,
+	but decl is a global var, instead return decl.
+	* gimplify.c (gimplify_adjust_omp_clauses_1): Add shared clauses
+	even for is_global_var decls, if they are private in some outer
+	context.
+
+2007-06-21  Uros Bizjak  <ubizjak@gmail.com>
+
+	PR target/32389
+	* config/i386/i386.h (enum ix86_stack_slot): Add SLOT_VIRTUAL.
+	* config/i386/i386.c (assign_386_stack_local): Assert that
+	SLOT_VIRTUAL is valid only before virtual regs are instantiated.
+	(ix86_expand_builtin) [IX86_BUILTIN_LDMXCSR, IX86_BUILTIN_STMXCSR]:
+	Use SLOT_VIRTUAL stack slot instead of SLOT_TEMP.
+	* config/i386/i386.md (truncdfsf2, truncxfsf2, truncxfdf2): Ditto.
+
+2007-06-20  Jakub Jelinek  <jakub@redhat.com>
+
+	PR inline-asm/32109
+	* gimplify.c (gimplify_asm_expr): Issue error if type is addressable
+	and !allows_mem.
+
+	PR middle-end/32285
+	* calls.c (precompute_arguments): Also precompute CALL_EXPR arguments
+	if ACCUMULATE_OUTGOING_ARGS.
+
+2007-06-20  Kaz Kojima  <kkojima@gcc.gnu.org>
+
+	PR rtl-optimization/28011
+	Backport from mainline.
+	* reload.c (push_reload): Set dont_share if IN appears in OUT
+	also when IN is a PLUS rtx.
+	(reg_overlap_mentioned_for_reload_p): Return true if X and IN
+	are same PLUS rtx.
+
+2007-06-19  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/32353
+	* tree-ssa-structalias.c (set_uids_in_ptset): Also handle RESULT_DECL.
+
+2007-05-04  Ulrich Drepper  <drepper@redhat.com>
+	    Jakub Jelinek  <jakub@redhat.com>
+
+	* crtstuff.c (HIDDEN_DTOR_LIST_END): New macro.
+	(__do_global_dtors_aux): Use more paranoid loop to run
+	destructors if HIDDEN_DTOR_LIST_END.
+	(__DTOR_END__): Export as a hidden symbol when HIDDEN_DTOR_LIST_END.
+
+2007-05-04  Jakub Jelinek  <jakub@redhat.com>
+
+	* varasm.c (align_variable): Don't increase alignment for
+	DECL_THREAD_LOCAL_P variables above BITS_PER_WORD through
+	DATA_ALIGNMENT or CONSTANT_ALIGNMENT.
+
+2007-06-13  Eric Botcazou  <ebotcazou@libertysurf.fr>
+
+	* config/sparc/sparc.c (sparc_override_options): Initialize
+	fpu mask correctly.
+
+2007-06-08  Kaz Kojima  <kkojima@gcc.gnu.org>
+
+	PR target/32163
+	Backport from mainline.
+	* config/sh/sh.md (symGOT_load): Don't schedule insns when
+	the symbol is generated with the stack protector.
+
+2007-05-31  John David Anglin  <dave.anglin@nrc-cnrc.gc.ca>
+
+	Backport from mainline:
+	2007-05-05  Aurelien Jarno  <aurelien@aurel32.net>
+
+	* config/pa/pa.md: Split tgd_load, tld_load and tie_load
+	into pic and non-pic versions. Mark r19 as used for 
+	tgd_load_pic, tld_load_pic and tie_load_pic. Mark r27 as used 
+	for tgd_load, tld_load and tie_load .
+	* config/pa/pa.c (legitimize_tls_address): Emit pic or non-pic
+	version of tgd_load, tld_load and tie_load depending on the 
+	value of flag_pic.
+
+2007-05-20  Kaz Kojima  <kkojima@gcc.gnu.org>
+
+	PR target/31701
+	Backport from mainline.
+	* config/sh/sh.c (output_stack_adjust): Avoid using the frame
+	register itself to hold the offset constant.  Tell flow the use
+	of r4 and r5 when they are used.
+
+2007-05-20  Kaz Kojima  <kkojima@gcc.gnu.org>
+
+	PR target/31480
+	Backport from mainline.
+	* config/sh/sh.md (length): Check if prev_nonnote_insn (insn)
+	is null.
+
+2007-05-20  Kaz Kojima  <kkojima@gcc.gnu.org>
+
+	PR target/31022
+	Backport from mainline.
+	* config/sh/sh.c (sh_adjust_cost): Use the result of single_set
+	instead of PATTERN.
+
+2007-05-17  Eric Botcazou  <ebotcazou@libertysurf.fr>
+
+	PR rtl-optimization/31691
+	* combine.c (simplify_set): Build a new src pattern instead of
+	substituting its operands in the COMPARE case.
+
+2007-05-16  Richard Guenther  <rguenther@suse.de>
+
+	Backport from mainline:
+	2006-06-09  Richard Guenther  <rguenther@suse.de>
+
+	PR tree-optimization/26998
+	* tree-vrp.c (extract_range_from_unary_expr): For NEGATE_EXPR
+	of signed types, only TYPE_MIN_VALUE is special, but for both,
+	minimum and maximum value.  Likewise VR_ANTI_RANGE is special
+	in this case, as is -fwrapv.
+
+2007-05-11  Kaz Kojima  <kkojima@gcc.gnu.org>
+
+	PR target/31876
+	* config/sh/sh.md (andsi3): Avoid calling gen_lowpart on
+        a SImode SUBREG of a floating point register.
+
+2007-05-08  Andreas Krebbel  <krebbel1@de.ibm.com>
+
+	* loop.c (move_movables): Remove REG_EQUAL notes for conditional
+	invariants as well.
+
+2007-05-30  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/31769
+	* except.c (duplicate_eh_regions): Clear prev_try if
+	ERT_MUST_NOT_THROW region is inside of ERT_TRY region.
+
 2007-04-29  Bernd Schmidt  <bernd.schmidt@analog.com>
 
 	* reload.c (combine_reloads): When trying to use a dying register,
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index dbd2157a6dc..77bb8ae07a0 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20070502
+20070626
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 2a9f6de7b02..614eba1a036 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -160,9 +160,7 @@ static tree fold_builtin_ceil (tree, tree);
 static tree fold_builtin_round (tree, tree);
 static tree fold_builtin_int_roundingfn (tree, tree);
 static tree fold_builtin_bitop (tree, tree);
-static tree fold_builtin_memcpy (tree, tree);
-static tree fold_builtin_mempcpy (tree, tree, int);
-static tree fold_builtin_memmove (tree, tree);
+static tree fold_builtin_memory_op (tree, tree, bool, int);
 static tree fold_builtin_strchr (tree, tree);
 static tree fold_builtin_memcmp (tree);
 static tree fold_builtin_strcmp (tree);
@@ -2932,10 +2930,19 @@ expand_builtin_memcpy (tree exp, rtx target, enum machine_mode mode)
       unsigned int dest_align
 	= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
       rtx dest_mem, src_mem, dest_addr, len_rtx;
-      tree result = fold_builtin_memcpy (fndecl, arglist);
+      tree result = fold_builtin_memory_op (arglist, TREE_TYPE (TREE_TYPE (fndecl)),
+					    false, /*endp=*/0);
 
       if (result)
-	return expand_expr (result, target, mode, EXPAND_NORMAL);
+	{
+	  while (TREE_CODE (result) == COMPOUND_EXPR)
+	    {
+	      expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
+			   EXPAND_NORMAL);
+	      result = TREE_OPERAND (result, 1);
+	    }
+	  return expand_expr (result, target, mode, EXPAND_NORMAL);
+	}
 
       /* If DEST is not a pointer type, call the normal function.  */
       if (dest_align == 0)
@@ -3021,11 +3028,19 @@ expand_builtin_mempcpy (tree arglist, tree type, rtx target, enum machine_mode m
       unsigned int dest_align
 	= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
       rtx dest_mem, src_mem, len_rtx;
-      tree result = fold_builtin_mempcpy (arglist, type, endp);
+      tree result = fold_builtin_memory_op (arglist, type, false, endp);
 
       if (result)
-	return expand_expr (result, target, mode, EXPAND_NORMAL);
-      
+	{
+	  while (TREE_CODE (result) == COMPOUND_EXPR)
+	    {
+	      expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
+			   EXPAND_NORMAL);
+	      result = TREE_OPERAND (result, 1);
+	    }
+	  return expand_expr (result, target, mode, EXPAND_NORMAL);
+	}
+
       /* If either SRC or DEST is not a pointer type, don't do this
          operation in-line.  */
       if (dest_align == 0 || src_align == 0)
@@ -3095,10 +3110,18 @@ expand_builtin_memmove (tree arglist, tree type, rtx target,
       unsigned int src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
       unsigned int dest_align
 	= get_pointer_alignment (dest, BIGGEST_ALIGNMENT);
-      tree result = fold_builtin_memmove (arglist, type);
+      tree result = fold_builtin_memory_op (arglist, type, false, /*endp=*/3);
 
       if (result)
-	return expand_expr (result, target, mode, EXPAND_NORMAL);
+	{
+	  while (TREE_CODE (result) == COMPOUND_EXPR)
+	    {
+	      expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
+			   EXPAND_NORMAL);
+	      result = TREE_OPERAND (result, 1);
+	    }
+	  return expand_expr (result, target, mode, EXPAND_NORMAL);
+	}
 
       /* If DEST is not a pointer type, call the normal function.  */
       if (dest_align == 0)
@@ -3246,7 +3269,15 @@ expand_builtin_strcpy (tree fndecl, tree arglist, rtx target, enum machine_mode
     {
       tree result = fold_builtin_strcpy (fndecl, arglist, 0);
       if (result)
-	return expand_expr (result, target, mode, EXPAND_NORMAL);
+	{
+	  while (TREE_CODE (result) == COMPOUND_EXPR)
+	    {
+	      expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
+			   EXPAND_NORMAL);
+	      result = TREE_OPERAND (result, 1);
+	    }
+	  return expand_expr (result, target, mode, EXPAND_NORMAL);
+	}
 
       return expand_movstr (TREE_VALUE (arglist),
 			    TREE_VALUE (TREE_CHAIN (arglist)),
@@ -3372,7 +3403,15 @@ expand_builtin_strncpy (tree exp, rtx target, enum machine_mode mode)
       tree result = fold_builtin_strncpy (fndecl, arglist, slen);
       
       if (result)
-	return expand_expr (result, target, mode, EXPAND_NORMAL);
+	{
+	  while (TREE_CODE (result) == COMPOUND_EXPR)
+	    {
+	      expand_expr (TREE_OPERAND (result, 0), const0_rtx, VOIDmode,
+			   EXPAND_NORMAL);
+	      result = TREE_OPERAND (result, 1);
+	    }
+	  return expand_expr (result, target, mode, EXPAND_NORMAL);
+	}
 
       /* We must be passed a constant len and src parameter.  */
       if (!host_integerp (len, 1) || !slen || !host_integerp (slen, 1))
@@ -6534,6 +6573,7 @@ expand_builtin (tree exp, rtx target, rtx subtarget, enum machine_mode mode,
     case BUILT_IN_STPCPY_CHK:
     case BUILT_IN_STRNCPY_CHK:
     case BUILT_IN_STRCAT_CHK:
+    case BUILT_IN_STRNCAT_CHK:
     case BUILT_IN_SNPRINTF_CHK:
     case BUILT_IN_VSNPRINTF_CHK:
       maybe_emit_chk_warning (exp, fcode);
@@ -8032,78 +8072,130 @@ fold_builtin_exponent (tree fndecl, tree arglist,
   return 0;
 }
 
-/* Fold function call to builtin memcpy.  Return
+/* Fold function call to builtin memset.  Return
    NULL_TREE if no simplification can be made.  */
 
 static tree
-fold_builtin_memcpy (tree fndecl, tree arglist)
+fold_builtin_memset (tree arglist, tree type, bool ignore)
 {
-  tree dest, src, len;
+  tree dest, c, len, var, ret, inner;
+  unsigned HOST_WIDE_INT length, cval;
 
   if (!validate_arglist (arglist,
-			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
+			 POINTER_TYPE, INTEGER_TYPE, INTEGER_TYPE, VOID_TYPE))
     return 0;
 
   dest = TREE_VALUE (arglist);
-  src = TREE_VALUE (TREE_CHAIN (arglist));
+  c = TREE_VALUE (TREE_CHAIN (arglist));
   len = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
 
+  if (! host_integerp (len, 1))
+    return 0;
+
   /* If the LEN parameter is zero, return DEST.  */
   if (integer_zerop (len))
-    return omit_one_operand (TREE_TYPE (TREE_TYPE (fndecl)), dest, src);
+    return omit_one_operand (type, dest, c);
 
-  /* If SRC and DEST are the same (and not volatile), return DEST.  */
-  if (operand_equal_p (src, dest, 0))
-    return omit_one_operand (TREE_TYPE (TREE_TYPE (fndecl)), dest, len);
+  if (! host_integerp (c, 1) || TREE_SIDE_EFFECTS (dest))
+    return 0;
 
-  return 0;
+  var = dest;
+  STRIP_NOPS (var);
+  if (TREE_CODE (var) != ADDR_EXPR)
+    return 0;
+
+  var = TREE_OPERAND (var, 0);
+  if (TREE_THIS_VOLATILE (var))
+    return 0;
+
+  if (!INTEGRAL_TYPE_P (TREE_TYPE (var))
+      && !POINTER_TYPE_P (TREE_TYPE (var)))
+    return 0;
+
+  /* If var is a VAR_DECL or a component thereof,
+     we can use its alias set, otherwise we'd need to make
+     sure we go through alias set 0.  */
+  inner = var;
+  while (handled_component_p (inner))
+    inner = TREE_OPERAND (inner, 0);
+  if (! SSA_VAR_P (inner))
+    return 0;
+
+  length = tree_low_cst (len, 1);
+  if (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (var))) != length
+      || get_pointer_alignment (dest, BIGGEST_ALIGNMENT) / BITS_PER_UNIT
+	 < (int) length)
+    return 0;
+
+  if (length > HOST_BITS_PER_WIDE_INT / BITS_PER_UNIT)
+    return 0;
+
+  if (integer_zerop (c))
+    cval = 0;
+  else
+    {
+      if (CHAR_BIT != 8 || BITS_PER_UNIT != 8 || HOST_BITS_PER_WIDE_INT > 64)
+	return 0;
+
+      cval = tree_low_cst (c, 1);
+      cval &= 0xff;
+      cval |= cval << 8;
+      cval |= cval << 16;
+      cval |= (cval << 31) << 1;
+    }
+
+  ret = build_int_cst_type (TREE_TYPE (var), cval);
+  ret = build2 (MODIFY_EXPR, TREE_TYPE (var), var, ret);
+  if (ignore)
+    return ret;
+
+  return omit_one_operand (type, dest, ret);
 }
 
-/* Fold function call to builtin mempcpy.  Return
+/* Fold function call to builtin memset.  Return
    NULL_TREE if no simplification can be made.  */
 
 static tree
-fold_builtin_mempcpy (tree arglist, tree type, int endp)
+fold_builtin_bzero (tree arglist, bool ignore)
 {
-  if (validate_arglist (arglist,
-			POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
-    {
-      tree dest = TREE_VALUE (arglist);
-      tree src = TREE_VALUE (TREE_CHAIN (arglist));
-      tree len = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+  tree dest, size, newarglist;
 
-      /* If the LEN parameter is zero, return DEST.  */
-      if (integer_zerop (len))
-	return omit_one_operand (type, dest, src);
+  if (!validate_arglist (arglist, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
+    return 0;
 
-      /* If SRC and DEST are the same (and not volatile), return DEST+LEN.  */
-      if (operand_equal_p (src, dest, 0))
-        {
-	  if (endp == 0)
-	    return omit_one_operand (type, dest, len);
+  if (!ignore)
+    return 0;
 
-	  if (endp == 2)
-	    len = fold_build2 (MINUS_EXPR, TREE_TYPE (len), len,
-			       ssize_int (1));
-      
-	  len = fold_convert (TREE_TYPE (dest), len);
-	  len = fold_build2 (PLUS_EXPR, TREE_TYPE (dest), dest, len);
-	  return fold_convert (type, len);
-	}
-    }
-  return 0;
+  dest = TREE_VALUE (arglist);
+  size = TREE_VALUE (TREE_CHAIN (arglist));
+
+  /* New argument list transforming bzero(ptr x, int y) to
+     memset(ptr x, int 0, size_t y).   This is done this way
+     so that if it isn't expanded inline, we fallback to
+     calling bzero instead of memset.  */
+
+  newarglist = build_tree_list (NULL_TREE, fold_convert (sizetype, size));
+  newarglist = tree_cons (NULL_TREE, integer_zero_node, newarglist);
+  newarglist = tree_cons (NULL_TREE, dest, newarglist);
+  return fold_builtin_memset (newarglist, void_type_node, ignore);
 }
 
-/* Fold function call to builtin memmove.  Return
-   NULL_TREE if no simplification can be made.  */
+/* Fold function call to builtin mem{{,p}cpy,move}.  Return
+   NULL_TREE if no simplification can be made.
+   If ENDP is 0, return DEST (like memcpy).
+   If ENDP is 1, return DEST+LEN (like mempcpy).
+   If ENDP is 2, return DEST+LEN-1 (like stpcpy).
+   If ENDP is 3, return DEST, additionally *SRC and *DEST may overlap
+   (memmove).   */
 
 static tree
-fold_builtin_memmove (tree arglist, tree type)
+fold_builtin_memory_op (tree arglist, tree type, bool ignore, int endp)
 {
-  tree dest, src, len;
+  tree dest, src, len, destvar, srcvar, expr, inner;
+  unsigned HOST_WIDE_INT length;
 
-  if (!validate_arglist (arglist,
-			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
+  if (! validate_arglist (arglist,
+			  POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
     return 0;
 
   dest = TREE_VALUE (arglist);
@@ -8114,11 +8206,133 @@ fold_builtin_memmove (tree arglist, tree type)
   if (integer_zerop (len))
     return omit_one_operand (type, dest, src);
 
-  /* If SRC and DEST are the same (and not volatile), return DEST.  */
+  /* If SRC and DEST are the same (and not volatile), return
+     DEST{,+LEN,+LEN-1}.  */
   if (operand_equal_p (src, dest, 0))
-    return omit_one_operand (type, dest, len);
+    expr = len;
+  else
+    {
+      if (! host_integerp (len, 1))
+	return 0;
 
-  return 0;
+      if (TREE_SIDE_EFFECTS (dest) || TREE_SIDE_EFFECTS (src))
+	return 0;
+
+      destvar = dest;
+      STRIP_NOPS (destvar);
+      if (TREE_CODE (destvar) != ADDR_EXPR)
+	return 0;
+
+      destvar = TREE_OPERAND (destvar, 0);
+      if (TREE_THIS_VOLATILE (destvar))
+	return 0;
+
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (destvar))
+	  && !POINTER_TYPE_P (TREE_TYPE (destvar))
+	  && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (destvar)))
+	return 0;
+
+      /* If destvar is a VAR_DECL or a component thereof,
+	 we can use its alias set, otherwise we'd need to make
+	 sure we go through alias set 0.  */
+      inner = destvar;
+      while (handled_component_p (inner))
+	inner = TREE_OPERAND (inner, 0);
+      if (! SSA_VAR_P (inner))
+	return 0;
+
+      srcvar = src;
+      STRIP_NOPS (srcvar);
+      if (TREE_CODE (srcvar) != ADDR_EXPR)
+	return 0;
+
+      srcvar = TREE_OPERAND (srcvar, 0);
+      if (TREE_THIS_VOLATILE (srcvar))
+	return 0;
+
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (srcvar))
+	  && !POINTER_TYPE_P (TREE_TYPE (srcvar))
+	  && !SCALAR_FLOAT_TYPE_P (TREE_TYPE (srcvar)))
+	return 0;
+
+      /* If srcvar is a VAR_DECL or a component thereof,
+	 we can use its alias set, otherwise we'd need to make
+	 sure we go through alias set 0.  */
+      inner = srcvar;
+      while (handled_component_p (inner))
+	inner = TREE_OPERAND (inner, 0);
+      if (! SSA_VAR_P (inner))
+	return 0;
+
+      length = tree_low_cst (len, 1);
+      if (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (destvar))) != length
+	  || get_pointer_alignment (dest, BIGGEST_ALIGNMENT) / BITS_PER_UNIT
+	     < (int) length
+	  || GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (srcvar))) != length
+	  || get_pointer_alignment (src, BIGGEST_ALIGNMENT) / BITS_PER_UNIT
+	     < (int) length)
+	return 0;
+
+      if ((INTEGRAL_TYPE_P (TREE_TYPE (srcvar))
+	   || POINTER_TYPE_P (TREE_TYPE (srcvar)))
+	  && (INTEGRAL_TYPE_P (TREE_TYPE (destvar))
+	      || POINTER_TYPE_P (TREE_TYPE (destvar))))
+	expr = fold_convert (TREE_TYPE (destvar), srcvar);
+      else
+	expr = fold_build1 (VIEW_CONVERT_EXPR, TREE_TYPE (destvar), srcvar);
+      expr = build2 (MODIFY_EXPR, TREE_TYPE (destvar), destvar, expr);
+    }
+
+  if (ignore)
+    return expr;
+
+  if (endp == 0 || endp == 3)
+    return omit_one_operand (type, dest, expr);
+
+  if (expr == len)
+    expr = 0;
+
+  if (endp == 2)
+    len = fold_build2 (MINUS_EXPR, TREE_TYPE (len), len,
+		       ssize_int (1));
+
+  len = fold_convert (TREE_TYPE (dest), len);
+  dest = fold_build2 (PLUS_EXPR, TREE_TYPE (dest), dest, len);
+  dest = fold_convert (type, dest);
+  if (expr)
+    dest = omit_one_operand (type, dest, expr);
+  return dest;
+}
+
+/* Fold function call to builtin bcopy.  Return NULL_TREE if no
+   simplification can be made.  */
+
+static tree
+fold_builtin_bcopy (tree arglist, bool ignore)
+{
+  tree src, dest, size, newarglist;
+
+  if (!validate_arglist (arglist,
+			 POINTER_TYPE, POINTER_TYPE, INTEGER_TYPE, VOID_TYPE))
+    return 0;
+
+  if (! ignore)
+    return 0;
+
+  src = TREE_VALUE (arglist);
+  dest = TREE_VALUE (TREE_CHAIN (arglist));
+  size = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+
+  /* New argument list transforming bcopy(ptr x, ptr y, int z) to
+     memmove(ptr y, ptr x, size_t z).   This is done this way
+     so that if it isn't expanded inline, we fallback to
+     calling bcopy instead of memmove.  */
+
+  newarglist = build_tree_list (NULL_TREE, fold_convert (sizetype, size));
+  newarglist = tree_cons (NULL_TREE, src, newarglist);
+  newarglist = tree_cons (NULL_TREE, dest, newarglist);
+
+  return fold_builtin_memory_op (newarglist, void_type_node, true, /*endp=*/3);
 }
 
 /* Fold function call to builtin strcpy.  If LEN is not NULL, it represents
@@ -9089,14 +9303,23 @@ fold_builtin_1 (tree fndecl, tree arglist, bool ignore)
     case BUILT_IN_PARITYLL:
       return fold_builtin_bitop (fndecl, arglist);
 
+    case BUILT_IN_MEMSET:
+      return fold_builtin_memset (arglist, type, ignore);
+
     case BUILT_IN_MEMCPY:
-      return fold_builtin_memcpy (fndecl, arglist);
+      return fold_builtin_memory_op (arglist, type, ignore, /*endp=*/0);
 
     case BUILT_IN_MEMPCPY:
-      return fold_builtin_mempcpy (arglist, type, /*endp=*/1);
+      return fold_builtin_memory_op (arglist, type, ignore, /*endp=*/1);
 
     case BUILT_IN_MEMMOVE:
-      return fold_builtin_memmove (arglist, type);
+      return fold_builtin_memory_op (arglist, type, ignore, /*endp=*/3);
+
+    case BUILT_IN_BZERO:
+      return fold_builtin_bzero (arglist, ignore);
+
+    case BUILT_IN_BCOPY:
+      return fold_builtin_bcopy (arglist, ignore);
 
     case BUILT_IN_SIGNBIT:
     case BUILT_IN_SIGNBITF:
@@ -10206,6 +10429,11 @@ maybe_emit_chk_warning (tree exp, enum built_in_function fcode)
       arg_mask = 6;
       is_strlen = 1;
       break;
+    case BUILT_IN_STRNCAT_CHK:
+    /* For __strncat_chk the warning will be emitted only if overflowing
+       by at least strlen (dest) + 1 bytes.  */
+      arg_mask = 12;
+      break;
     case BUILT_IN_STRNCPY_CHK:
       arg_mask = 12;
       break;
@@ -10243,6 +10471,22 @@ maybe_emit_chk_warning (tree exp, enum built_in_function fcode)
       if (! len || ! host_integerp (len, 1) || tree_int_cst_lt (len, size))
 	return;
     }
+  else if (fcode == BUILT_IN_STRNCAT_CHK)
+    {
+      tree src = TREE_VALUE (TREE_CHAIN (arglist));
+      if (! src || ! host_integerp (len, 1) || tree_int_cst_lt (len, size))
+	return;
+      src = c_strlen (src, 1);
+      if (! src || ! host_integerp (src, 1))
+	{
+	  locus = EXPR_LOCATION (exp);
+	  warning (0, "%Hcall to %D might overflow destination buffer",
+		   &locus, get_callee_fndecl (exp));
+	  return;
+	}
+      else if (tree_int_cst_lt (src, size))
+	return;
+    }
   else if (! host_integerp (len, 1) || ! tree_int_cst_lt (size, len))
     return;
 
diff --git a/gcc/calls.c b/gcc/calls.c
index df6699a29bd..45001c08ee7 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1239,13 +1239,25 @@ precompute_arguments (int flags, int num_actuals, struct arg_data *args)
 
   /* If this is a libcall, then precompute all arguments so that we do not
      get extraneous instructions emitted as part of the libcall sequence.  */
-  if ((flags & ECF_LIBCALL_BLOCK) == 0)
+
+  /* If we preallocated the stack space, and some arguments must be passed
+     on the stack, then we must precompute any parameter which contains a
+     function call which will store arguments on the stack.
+     Otherwise, evaluating the parameter may clobber previous parameters
+     which have already been stored into the stack.  (we have code to avoid
+     such case by saving the outgoing stack arguments, but it results in
+     worse code)  */
+  if ((flags & ECF_LIBCALL_BLOCK) == 0 && !ACCUMULATE_OUTGOING_ARGS)
     return;
 
   for (i = 0; i < num_actuals; i++)
     {
       enum machine_mode mode;
 
+      if ((flags & ECF_LIBCALL_BLOCK) == 0
+	  && TREE_CODE (args[i].tree_value) != CALL_EXPR)
+	continue;
+
       /* If this is an addressable type, we cannot pre-evaluate it.  */
       gcc_assert (!TREE_ADDRESSABLE (TREE_TYPE (args[i].tree_value)));
 
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 711906a239d..d4ed731f9a2 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -753,7 +753,12 @@ expand_used_vars_for_block (tree block, bool toplevel)
 
   /* Expand all variables at this level.  */
   for (t = BLOCK_VARS (block); t ; t = TREE_CHAIN (t))
-    if (TREE_USED (t))
+    if (TREE_USED (t)
+	/* Force local static variables to be output when marked by
+	   used attribute.  For unit-at-a-time, cgraph code already takes
+	   care of this.  */
+	|| (!flag_unit_at_a_time && TREE_STATIC (t)
+	    && DECL_PRESERVE_P (t)))
       expand_one_var (t, toplevel);
 
   this_sv_num = stack_vars_num;
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index b086aead980..7f1c79f7384 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -993,6 +993,7 @@ cgraph_finalize_compilation_unit (void)
   /* Keep track of already processed nodes when called multiple times for
      intermodule optimization.  */
   static struct cgraph_node *first_analyzed;
+  struct cgraph_node *first_processed = first_analyzed;
   static struct cgraph_varpool_node *first_analyzed_var;
 
   if (errorcount || sorrycount)
@@ -1013,7 +1014,10 @@ cgraph_finalize_compilation_unit (void)
     }
 
   timevar_push (TV_CGRAPH);
-  process_function_and_variable_attributes (first_analyzed, first_analyzed_var);
+  process_function_and_variable_attributes (first_processed,
+					    first_analyzed_var);
+  first_processed = cgraph_nodes;
+  first_analyzed_var = cgraph_varpool_nodes;
   cgraph_varpool_analyze_pending_decls ();
   if (cgraph_dump_file)
     {
@@ -1055,11 +1059,16 @@ cgraph_finalize_compilation_unit (void)
 	if (!edge->callee->reachable)
 	  cgraph_mark_reachable_node (edge->callee);
 
+      /* We finalize local static variables during constructing callgraph
+         edges.  Process their attributes too.  */
+      process_function_and_variable_attributes (first_processed,
+						first_analyzed_var);
+      first_processed = cgraph_nodes;
+      first_analyzed_var = cgraph_varpool_nodes;
       cgraph_varpool_analyze_pending_decls ();
     }
 
   /* Collect entry points to the unit.  */
-
   if (cgraph_dump_file)
     {
       fprintf (cgraph_dump_file, "Unit entry points:");
@@ -1098,7 +1107,6 @@ cgraph_finalize_compilation_unit (void)
       dump_cgraph (cgraph_dump_file);
     }
   first_analyzed = cgraph_nodes;
-  first_analyzed_var = cgraph_varpool_nodes;
   ggc_collect ();
   timevar_pop (TV_CGRAPH);
 }
diff --git a/gcc/combine.c b/gcc/combine.c
index bf446083656..653ffb7b22c 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -5503,14 +5503,14 @@ simplify_set (rtx x)
 	}
       else if (GET_MODE (op0) == compare_mode && op1 == const0_rtx)
 	{
-	  SUBST(SET_SRC (x), op0);
+	  SUBST (SET_SRC (x), op0);
 	  src = SET_SRC (x);
-        }
-      else
+	}
+      /* Otherwise, update the COMPARE if needed.  */
+      else if (XEXP (src, 0) != op0 || XEXP (src, 1) != op1)
 	{
-	  /* Otherwise, update the COMPARE if needed.  */
-	  SUBST (XEXP (src, 0), op0);
-	  SUBST (XEXP (src, 1), op1);
+	  SUBST (SET_SRC (x), gen_rtx_COMPARE (compare_mode, op0, op1));
+	  src = SET_SRC (x);
 	}
     }
   else
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 366b550466f..86da78362b3 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -264,12 +264,12 @@ xscale-*-*)
 i[34567]86-*-*)
 	cpu_type=i386
 	extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
-		       pmmintrin.h tmmintrin.h"
+		       pmmintrin.h tmmintrin.h ammintrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
 	extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
-		       pmmintrin.h tmmintrin.h"
+		       pmmintrin.h tmmintrin.h ammintrin.h"
 	need_64bit_hwint=yes
 	;;
 ia64-*-*)
@@ -2396,6 +2396,9 @@ if test x$with_cpu = x ; then
       ;;
     i686-*-* | i786-*-*)
       case ${target_noncanonical} in
+        amdfam10-*)
+          with_cpu=amdfam10
+          ;;
         k8-*|opteron-*|athlon_64-*)
           with_cpu=k8
           ;;
@@ -2436,6 +2439,9 @@ if test x$with_cpu = x ; then
       ;;
     x86_64-*-*)
       case ${target_noncanonical} in
+        amdfam10-*)
+          with_cpu=amdfam10
+          ;;
         k8-*|opteron-*|athlon_64-*)
           with_cpu=k8
           ;;
@@ -2668,7 +2674,7 @@ case "${target}" in
 				esac
 				# OK
 				;;
-			"" | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
+			"" | amdfam10 | k8 | opteron | athlon64 | athlon-fx | nocona | core2 | generic)
 				# OK
 				;;
 			*)
diff --git a/gcc/config/i386/ammintrin.h b/gcc/config/i386/ammintrin.h
new file mode 100644
index 00000000000..869c2880e25
--- /dev/null
+++ b/gcc/config/i386/ammintrin.h
@@ -0,0 +1,73 @@
+/* Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING.  If not, write to
+   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+/* As a special exception, if you include this header file into source
+   files compiled by GCC, this header file does not by itself cause
+   the resulting executable to be covered by the GNU General Public
+   License.  This exception does not however invalidate any other
+   reasons why the executable file might be covered by the GNU General
+   Public License.  */
+
+/* Implemented from the specification included in the AMD Programmers
+   Manual Update, version 2.x */
+
+#ifndef _AMMINTRIN_H_INCLUDED
+#define _AMMINTRIN_H_INCLUDED
+
+#ifndef __SSE4A__
+# error "SSE4A instruction set not enabled"
+#else
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
+#include <pmmintrin.h>
+
+static __inline void __attribute__((__always_inline__))
+_mm_stream_sd (double * __P, __m128d __Y)
+{
+  __builtin_ia32_movntsd (__P, (__v2df) __Y);
+}
+
+static __inline void __attribute__((__always_inline__))
+_mm_stream_ss (float * __P, __m128 __Y)
+{
+  __builtin_ia32_movntss (__P, (__v4sf) __Y);
+}
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_extract_si64 (__m128i __X, __m128i __Y)
+{
+  return (__m128i) __builtin_ia32_extrq ((__v2di) __X, (__v16qi) __Y);
+}
+
+#define _mm_extracti_si64(X, I, L) \
+((__m128i) __builtin_ia32_extrqi ((__v2di)(X), I, L))
+
+static __inline __m128i __attribute__((__always_inline__))
+_mm_insert_si64 (__m128i __X,__m128i __Y)
+{
+  return (__m128i) __builtin_ia32_insertq ((__v2di)__X, (__v2di)__Y);
+}
+
+#define _mm_inserti_si64(X, Y, I, L) \
+((__m128i) __builtin_ia32_insertqi ((__v2di)(X), (__v2di)(Y), I, L))
+
+
+#endif /* __SSE4A__ */
+
+#endif /* _AMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/athlon.md b/gcc/config/i386/athlon.md
index 6d92b948b4d..a52f9bc28b4 100644
--- a/gcc/config/i386/athlon.md
+++ b/gcc/config/i386/athlon.md
@@ -29,6 +29,8 @@
 	   (const_string "vector")]
 	(const_string "direct")))
 
+(define_attr "amdfam10_decode" "direct,vector,double"
+  (const_string "direct"))
 ;;
 ;;           decode0 decode1 decode2
 ;;                 \    |   /
@@ -131,18 +133,22 @@
 
 ;; Jump instructions are executed in the branch unit completely transparent to us
 (define_insn_reservation "athlon_branch" 0
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (eq_attr "type" "ibr"))
 			 "athlon-direct,athlon-ieu")
 (define_insn_reservation "athlon_call" 0
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (eq_attr "type" "call,callv"))
 			 "athlon-vector,athlon-ieu")
+(define_insn_reservation "athlon_call_amdfam10" 0
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "call,callv"))
+			 "athlon-double,athlon-ieu")
 
 ;; Latency of push operation is 3 cycles, but ESP value is available
 ;; earlier
 (define_insn_reservation "athlon_push" 2
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (eq_attr "type" "push"))
 			 "athlon-direct,athlon-agu,athlon-store")
 (define_insn_reservation "athlon_pop" 4
@@ -153,12 +159,16 @@
 			 (and (eq_attr "cpu" "k8,generic64")
 			      (eq_attr "type" "pop"))
 			 "athlon-double,(athlon-ieu+athlon-load)")
+(define_insn_reservation "athlon_pop_amdfam10" 3
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "pop"))
+			 "athlon-direct,(athlon-ieu+athlon-load)")
 (define_insn_reservation "athlon_leave" 3
 			 (and (eq_attr "cpu" "athlon")
 			      (eq_attr "type" "leave"))
 			 "athlon-vector,(athlon-ieu+athlon-load)")
 (define_insn_reservation "athlon_leave_k8" 3
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (eq_attr "type" "leave"))
 			 "athlon-double,(athlon-ieu+athlon-load)")
 
@@ -167,6 +177,11 @@
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (eq_attr "type" "lea"))
 			 "athlon-direct,athlon-agu,nothing")
+;; Lea executes in AGU unit with 1 cycle latency on AMDFAM10
+(define_insn_reservation "athlon_lea_amdfam10" 1
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "lea"))
+			 "athlon-direct,athlon-agu,nothing")
 
 ;; Mul executes in special multiplier unit attached to IEU0
 (define_insn_reservation "athlon_imul" 5
@@ -176,29 +191,35 @@
 			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,nothing,athlon-ieu0")
 ;; ??? Widening multiply is vector or double.
 (define_insn_reservation "athlon_imul_k8_DI" 4
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "imul")
 				   (and (eq_attr "mode" "DI")
 					(eq_attr "memory" "none,unknown"))))
 			 "athlon-direct0,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")
 (define_insn_reservation "athlon_imul_k8" 3
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "imul")
 				   (eq_attr "memory" "none,unknown")))
 			 "athlon-direct0,athlon-ieu0,athlon-mult,athlon-ieu0")
+(define_insn_reservation "athlon_imul_amdfam10_HI" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "imul")
+				   (and (eq_attr "mode" "HI")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-vector,athlon-ieu0,athlon-mult,nothing,athlon-ieu0")			 
 (define_insn_reservation "athlon_imul_mem" 8
 			 (and (eq_attr "cpu" "athlon")
 			      (and (eq_attr "type" "imul")
 				   (eq_attr "memory" "load,both")))
 			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,nothing,athlon-ieu")
 (define_insn_reservation "athlon_imul_mem_k8_DI" 7
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "imul")
 				   (and (eq_attr "mode" "DI")
 					(eq_attr "memory" "load,both"))))
 			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,nothing,athlon-ieu")
 (define_insn_reservation "athlon_imul_mem_k8" 6
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "imul")
 				   (eq_attr "memory" "load,both")))
 			 "athlon-vector,athlon-load,athlon-ieu,athlon-mult,athlon-ieu")
@@ -209,21 +230,23 @@
 ;; other instructions.
 ;; ??? Experiments show that the idiv can overlap with roughly 6 cycles
 ;; of the other code
+;; Using the same heuristics for amdfam10 as K8 with idiv
 
 (define_insn_reservation "athlon_idiv" 6
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "type" "idiv")
 				   (eq_attr "memory" "none,unknown")))
 			 "athlon-vector,(athlon-ieu0*6+(athlon-fpsched,athlon-fvector))")
 (define_insn_reservation "athlon_idiv_mem" 9
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "type" "idiv")
 				   (eq_attr "memory" "load,both")))
 			 "athlon-vector,((athlon-load,athlon-ieu0*6)+(athlon-fpsched,athlon-fvector))")
 ;; The parallelism of string instructions is not documented.  Model it same way
 ;; as idiv to create smaller automata.  This probably does not matter much.
+;; Using the same heuristics for amdfam10 as K8 with idiv
 (define_insn_reservation "athlon_str" 6
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "type" "str")
 				   (eq_attr "memory" "load,both,store")))
 			 "athlon-vector,athlon-load,athlon-ieu0*6")
@@ -234,34 +257,62 @@
 				   (and (eq_attr "unit" "integer,unknown")
 					(eq_attr "memory" "none,unknown"))))
 			 "athlon-direct,athlon-ieu")
+(define_insn_reservation "athlon_idirect_amdfam10" 1
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-direct,athlon-ieu")
 (define_insn_reservation "athlon_ivector" 2
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (and (eq_attr "athlon_decode" "vector")
 				   (and (eq_attr "unit" "integer,unknown")
 					(eq_attr "memory" "none,unknown"))))
 			 "athlon-vector,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "none,unknown"))))
+			 "athlon-vector,athlon-ieu,athlon-ieu")
+
 (define_insn_reservation "athlon_idirect_loadmov" 3
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "type" "imov")
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-load")
+
 (define_insn_reservation "athlon_idirect_load" 4
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (and (eq_attr "athlon_decode" "direct")
 				   (and (eq_attr "unit" "integer,unknown")
 					(eq_attr "memory" "load"))))
 			 "athlon-direct,athlon-load,athlon-ieu")
+(define_insn_reservation "athlon_idirect_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-direct,athlon-load,athlon-ieu")
 (define_insn_reservation "athlon_ivector_load" 6
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (and (eq_attr "athlon_decode" "vector")
 				   (and (eq_attr "unit" "integer,unknown")
 					(eq_attr "memory" "load"))))
 			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+(define_insn_reservation "athlon_ivector_load_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "load"))))
+			 "athlon-vector,athlon-load,athlon-ieu,athlon-ieu")
+
 (define_insn_reservation "athlon_idirect_movstore" 1
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "type" "imov")
 				   (eq_attr "memory" "store")))
 			 "athlon-direct,athlon-agu,athlon-store")
+
 (define_insn_reservation "athlon_idirect_both" 4
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (and (eq_attr "athlon_decode" "direct")
@@ -270,6 +321,15 @@
 			 "athlon-direct,athlon-load,
 			  athlon-ieu,athlon-store,
 			  athlon-store")
+(define_insn_reservation "athlon_idirect_both_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-direct,athlon-load,
+			  athlon-ieu,athlon-store,
+			  athlon-store")			  
+
 (define_insn_reservation "athlon_ivector_both" 6
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (and (eq_attr "athlon_decode" "vector")
@@ -279,6 +339,16 @@
 			  athlon-ieu,
 			  athlon-ieu,
 			  athlon-store")
+(define_insn_reservation "athlon_ivector_both_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "both"))))
+			 "athlon-vector,athlon-load,
+			  athlon-ieu,
+			  athlon-ieu,
+			  athlon-store")
+
 (define_insn_reservation "athlon_idirect_store" 1
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (and (eq_attr "athlon_decode" "direct")
@@ -286,6 +356,14 @@
 					(eq_attr "memory" "store"))))
 			 "athlon-direct,(athlon-ieu+athlon-agu),
 			  athlon-store")
+(define_insn_reservation "athlon_idirect_store_amdfam10" 1
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "direct")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-direct,(athlon-ieu+athlon-agu),
+			  athlon-store")
+
 (define_insn_reservation "athlon_ivector_store" 2
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (and (eq_attr "athlon_decode" "vector")
@@ -293,6 +371,13 @@
 					(eq_attr "memory" "store"))))
 			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
 			  athlon-store")
+(define_insn_reservation "athlon_ivector_store_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "amdfam10_decode" "vector")
+				   (and (eq_attr "unit" "integer,unknown")
+					(eq_attr "memory" "store"))))
+			 "athlon-vector,(athlon-ieu+athlon-agu),athlon-ieu,
+			  athlon-store")
 
 ;; Athlon floatin point unit
 (define_insn_reservation "athlon_fldxf" 12
@@ -302,7 +387,7 @@
 					(eq_attr "mode" "XF"))))
 			 "athlon-vector,athlon-fpload2,athlon-fvector*9")
 (define_insn_reservation "athlon_fldxf_k8" 13
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fmov")
 				   (and (eq_attr "memory" "load")
 					(eq_attr "mode" "XF"))))
@@ -314,7 +399,7 @@
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fpload,athlon-fany")
 (define_insn_reservation "athlon_fld_k8" 2
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fmov")
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
@@ -326,7 +411,7 @@
 					(eq_attr "mode" "XF"))))
 			 "athlon-vector,(athlon-fpsched+athlon-agu),(athlon-store2+(athlon-fvector*7))")
 (define_insn_reservation "athlon_fstxf_k8" 8
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fmov")
 				   (and (eq_attr "memory" "store,both")
 					(eq_attr "mode" "XF"))))
@@ -337,16 +422,16 @@
 				   (eq_attr "memory" "store,both")))
 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
 (define_insn_reservation "athlon_fst_k8" 2
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fmov")
 				   (eq_attr "memory" "store,both")))
 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
 (define_insn_reservation "athlon_fist" 4
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (eq_attr "type" "fistp,fisttp"))
 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
 (define_insn_reservation "athlon_fmov" 2
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (eq_attr "type" "fmov"))
 			 "athlon-direct,athlon-fpsched,athlon-faddmul")
 (define_insn_reservation "athlon_fadd_load" 4
@@ -355,12 +440,12 @@
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fpload,athlon-fadd")
 (define_insn_reservation "athlon_fadd_load_k8" 6
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fop")
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
 (define_insn_reservation "athlon_fadd" 4
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (eq_attr "type" "fop"))
 			 "athlon-direct,athlon-fpsched,athlon-fadd")
 (define_insn_reservation "athlon_fmul_load" 4
@@ -369,16 +454,16 @@
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fpload,athlon-fmul")
 (define_insn_reservation "athlon_fmul_load_k8" 6
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fmul")
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fploadk8,athlon-fmul")
 (define_insn_reservation "athlon_fmul" 4
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (eq_attr "type" "fmul"))
 			 "athlon-direct,athlon-fpsched,athlon-fmul")
 (define_insn_reservation "athlon_fsgn" 2
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (eq_attr "type" "fsgn"))
 			 "athlon-direct,athlon-fpsched,athlon-fmul")
 (define_insn_reservation "athlon_fdiv_load" 24
@@ -387,7 +472,7 @@
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fpload,athlon-fmul")
 (define_insn_reservation "athlon_fdiv_load_k8" 13
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fdiv")
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fploadk8,athlon-fmul")
@@ -396,16 +481,16 @@
 			      (eq_attr "type" "fdiv"))
 			 "athlon-direct,athlon-fpsched,athlon-fmul")
 (define_insn_reservation "athlon_fdiv_k8" 11
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (eq_attr "type" "fdiv"))
 			 "athlon-direct,athlon-fpsched,athlon-fmul")
 (define_insn_reservation "athlon_fpspc_load" 103
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fpspc")
 				   (eq_attr "memory" "load")))
 			 "athlon-vector,athlon-fpload,athlon-fvector")
 (define_insn_reservation "athlon_fpspc" 100
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (eq_attr "type" "fpspc"))
 			 "athlon-vector,athlon-fpsched,athlon-fvector")
 (define_insn_reservation "athlon_fcmov_load" 7
@@ -418,12 +503,12 @@
 			      (eq_attr "type" "fcmov"))
 			 "athlon-vector,athlon-fpsched,athlon-fvector")
 (define_insn_reservation "athlon_fcmov_load_k8" 17
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fcmov")
 				   (eq_attr "memory" "load")))
 			 "athlon-vector,athlon-fploadk8,athlon-fvector")
 (define_insn_reservation "athlon_fcmov_k8" 15
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (eq_attr "type" "fcmov"))
 			 "athlon-vector,athlon-fpsched,athlon-fvector")
 ;; fcomi is vector decoded by uses only one pipe.
@@ -434,13 +519,13 @@
 				        (eq_attr "memory" "load"))))
 			 "athlon-vector,athlon-fpload,athlon-fadd")
 (define_insn_reservation "athlon_fcomi_load_k8" 5
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fcmp")
 				   (and (eq_attr "athlon_decode" "vector")
 				        (eq_attr "memory" "load"))))
 			 "athlon-vector,athlon-fploadk8,athlon-fadd")
 (define_insn_reservation "athlon_fcomi" 3
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "athlon_decode" "vector")
 				   (eq_attr "type" "fcmp")))
 			 "athlon-vector,athlon-fpsched,athlon-fadd")
@@ -450,18 +535,18 @@
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fpload,athlon-fadd")
 (define_insn_reservation "athlon_fcom_load_k8" 4
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "fcmp")
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
 (define_insn_reservation "athlon_fcom" 2
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (eq_attr "type" "fcmp"))
 			 "athlon-direct,athlon-fpsched,athlon-fadd")
 ;; Never seen by the scheduler because we still don't do post reg-stack
 ;; scheduling.
 ;(define_insn_reservation "athlon_fxch" 2
-;			 (and (eq_attr "cpu" "athlon,k8,generic64")
+;			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 ;			      (eq_attr "type" "fxch"))
 ;			 "athlon-direct,athlon-fpsched,athlon-fany")
 
@@ -516,6 +601,23 @@
 			      (and (eq_attr "type" "mmxmov,ssemov")
 				   (eq_attr "memory" "load")))
 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+;; On AMDFAM10 all double, single and integer packed and scalar SSEx data
+;; loads  generated are direct path, latency of 2 and do not use any FP
+;; executions units. No seperate entries for movlpx/movhpx loads, which
+;; are direct path, latency of 4 and use the FADD/FMUL FP execution units,
+;; as they will not be generated.
+(define_insn_reservation "athlon_sseld_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssemov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8")
+;; On AMDFAM10 MMX data loads  generated are direct path, latency of 4
+;; and can use any  FP executions units
+(define_insn_reservation "athlon_mmxld_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "mmxmov")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8, athlon-fany")			 
 (define_insn_reservation "athlon_mmxssest" 3
 			 (and (eq_attr "cpu" "k8,generic64")
 			      (and (eq_attr "type" "mmxmov,ssemov")
@@ -533,6 +635,25 @@
 			      (and (eq_attr "type" "mmxmov,ssemov")
 				   (eq_attr "memory" "store,both")))
 			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
+;; On AMDFAM10 all double, single and integer packed SSEx data stores
+;; generated are all double path, latency of 2 and use the FSTORE FP
+;; execution unit. No entries seperate for movupx/movdqu, which are
+;; vector path, latency of 3 and use the FSTORE*2 FP execution unit,
+;; as they will not be generated.
+(define_insn_reservation "athlon_ssest_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssemov")
+				   (and (eq_attr "mode" "V4SF,V2DF,TI")
+					(eq_attr "memory" "store,both"))))
+			 "athlon-double,(athlon-fpsched+athlon-agu),((athlon-fstore+athlon-store)*2)")
+;; On AMDFAM10 all double, single and integer scalar SSEx and MMX
+;; data stores generated are all direct path, latency of 2 and use
+;; the FSTORE FP execution unit
+(define_insn_reservation "athlon_mmxssest_short_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "mmxmov,ssemov")
+				   (eq_attr "memory" "store,both")))
+			 "athlon-direct,(athlon-fpsched+athlon-agu),(athlon-fstore+athlon-store)")
 (define_insn_reservation "athlon_movaps_k8" 2
 			 (and (eq_attr "cpu" "k8,generic64")
 			      (and (eq_attr "type" "ssemov")
@@ -578,6 +699,11 @@
 			      (and (eq_attr "type" "sselog,sselog1")
 				   (eq_attr "memory" "load")))
 			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_sselog_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sselog,sselog1")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,(athlon-fadd|athlon-fmul)")
 (define_insn_reservation "athlon_sselog" 3
 			 (and (eq_attr "cpu" "athlon")
 			      (eq_attr "type" "sselog,sselog1"))
@@ -586,6 +712,11 @@
 			 (and (eq_attr "cpu" "k8,generic64")
 			      (eq_attr "type" "sselog,sselog1"))
 			 "athlon-double,athlon-fpsched,athlon-fmul")
+(define_insn_reservation "athlon_sselog_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "sselog,sselog1"))
+			 "athlon-direct,athlon-fpsched,(athlon-fadd|athlon-fmul)")
+
 ;; ??? pcmp executes in addmul, probably not worthwhile to bother about that.
 (define_insn_reservation "athlon_ssecmp_load" 2
 			 (and (eq_attr "cpu" "athlon")
@@ -594,13 +725,13 @@
 					(eq_attr "memory" "load"))))
 			 "athlon-direct,athlon-fpload,athlon-fadd")
 (define_insn_reservation "athlon_ssecmp_load_k8" 4
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "ssecmp")
 				   (and (eq_attr "mode" "SF,DF,DI,TI")
 					(eq_attr "memory" "load"))))
 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
 (define_insn_reservation "athlon_ssecmp" 2
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "type" "ssecmp")
 				   (eq_attr "mode" "SF,DF,DI,TI")))
 			 "athlon-direct,athlon-fpsched,athlon-fadd")
@@ -614,6 +745,11 @@
 			      (and (eq_attr "type" "ssecmp")
 				   (eq_attr "memory" "load")))
 			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecmp")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
 (define_insn_reservation "athlon_ssecmpvector" 3
 			 (and (eq_attr "cpu" "athlon")
 			      (eq_attr "type" "ssecmp"))
@@ -622,6 +758,10 @@
 			 (and (eq_attr "cpu" "k8,generic64")
 			      (eq_attr "type" "ssecmp"))
 			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_ssecmpvector_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "ssecmp"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
 (define_insn_reservation "athlon_ssecomi_load" 4
 			 (and (eq_attr "cpu" "athlon")
 			      (and (eq_attr "type" "ssecomi")
@@ -632,10 +772,20 @@
 			      (and (eq_attr "type" "ssecomi")
 				   (eq_attr "memory" "load")))
 			 "athlon-vector,athlon-fploadk8,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_load_amdfam10" 5
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecomi")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
 (define_insn_reservation "athlon_ssecomi" 4
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (eq_attr "type" "ssecmp"))
 			 "athlon-vector,athlon-fpsched,athlon-fadd")
+(define_insn_reservation "athlon_ssecomi_amdfam10" 3
+			 (and (eq_attr "cpu" "amdfam10")
+;; It seems athlon_ssecomi has a bug in the attr_type, fixed for amdfam10
+			      (eq_attr "type" "ssecomi"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
 (define_insn_reservation "athlon_sseadd_load" 4
 			 (and (eq_attr "cpu" "athlon")
 			      (and (eq_attr "type" "sseadd")
@@ -643,13 +793,13 @@
 					(eq_attr "memory" "load"))))
 			 "athlon-direct,athlon-fpload,athlon-fadd")
 (define_insn_reservation "athlon_sseadd_load_k8" 6
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "sseadd")
 				   (and (eq_attr "mode" "SF,DF,DI")
 					(eq_attr "memory" "load"))))
 			 "athlon-direct,athlon-fploadk8,athlon-fadd")
 (define_insn_reservation "athlon_sseadd" 4
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "type" "sseadd")
 				   (eq_attr "mode" "SF,DF,DI")))
 			 "athlon-direct,athlon-fpsched,athlon-fadd")
@@ -663,6 +813,11 @@
 			      (and (eq_attr "type" "sseadd")
 				   (eq_attr "memory" "load")))
 			 "athlon-double,athlon-fpload2k8,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_load_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseadd")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fadd")
 (define_insn_reservation "athlon_sseaddvector" 5
 			 (and (eq_attr "cpu" "athlon")
 			      (eq_attr "type" "sseadd"))
@@ -671,6 +826,10 @@
 			 (and (eq_attr "cpu" "k8,generic64")
 			      (eq_attr "type" "sseadd"))
 			 "athlon-double,athlon-fpsched,(athlon-fadd*2)")
+(define_insn_reservation "athlon_sseaddvector_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "sseadd"))
+			 "athlon-direct,athlon-fpsched,athlon-fadd")
 
 ;; Conversions behaves very irregularly and the scheduling is critical here.
 ;; Take each instruction separately.  Assume that the mode is always set to the
@@ -684,12 +843,25 @@
 					(and (eq_attr "mode" "DF")
 					     (eq_attr "memory" "load")))))
 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_load_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
 (define_insn_reservation "athlon_ssecvt_cvtss2sd" 2
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "athlon_decode" "direct")
 					(eq_attr "mode" "DF"))))
 			 "athlon-direct,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtss2sd_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(eq_attr "mode" "DF"))))
+			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
 ;; cvtps2pd.  Model same way the other double decoded FP conversions.
 (define_insn_reservation "athlon_ssecvt_cvtps2pd_load_k8" 5
 			 (and (eq_attr "cpu" "k8,athlon,generic64")
@@ -698,12 +870,25 @@
 					(and (eq_attr "mode" "V2DF,V4SF,TI")
 					     (eq_attr "memory" "load")))))
 			 "athlon-double,athlon-fpload2k8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_load_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "direct")
+					(and (eq_attr "mode" "V2DF,V4SF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-direct,athlon-fploadk8,athlon-fstore")
 (define_insn_reservation "athlon_ssecvt_cvtps2pd_k8" 3
 			 (and (eq_attr "cpu" "k8,athlon,generic64")
 			      (and (eq_attr "type" "ssecvt")
 				   (and (eq_attr "athlon_decode" "double")
 					(eq_attr "mode" "V2DF,V4SF,TI"))))
 			 "athlon-double,athlon-fpsched,athlon-fstore,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtps2pd_amdfam10" 2
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "direct")
+					(eq_attr "mode" "V2DF,V4SF,TI"))))
+			 "athlon-direct,athlon-fpsched,athlon-fstore")
 ;; cvtsi2sd mem,reg is directpath path  (cvtsi2sd reg,reg is doublepath)
 ;; cvtsi2sd has troughput 1 and is executed in store unit with latency of 6
 (define_insn_reservation "athlon_sseicvt_cvtsi2sd_load" 6
@@ -713,6 +898,13 @@
 					(and (eq_attr "mode" "SF,DF")
 					     (eq_attr "memory" "load")))))
 			 "athlon-direct,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
 ;; cvtsi2ss mem, reg is doublepath
 (define_insn_reservation "athlon_sseicvt_cvtsi2ss_load" 9
 			 (and (eq_attr "cpu" "athlon")
@@ -728,6 +920,13 @@
 					(and (eq_attr "mode" "SF,DF")
 					     (eq_attr "memory" "load")))))
 			 "athlon-double,athlon-fploadk8,(athlon-fstore*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")			 
 ;; cvtsi2sd reg,reg is double decoded (vector on Athlon)
 (define_insn_reservation "athlon_sseicvt_cvtsi2sd_k8" 11
 			 (and (eq_attr "cpu" "k8,athlon,generic64")
@@ -736,6 +935,13 @@
 					(and (eq_attr "mode" "SF,DF")
 					     (eq_attr "memory" "none")))))
 			 "athlon-double,athlon-fploadk8,athlon-fstore")
+(define_insn_reservation "athlon_sseicvt_cvtsi2sd_amdfam10" 14
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
 ;; cvtsi2ss reg, reg is doublepath
 (define_insn_reservation "athlon_sseicvt_cvtsi2ss" 14
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
@@ -744,6 +950,13 @@
 					(and (eq_attr "mode" "SF,DF")
 					     (eq_attr "memory" "none")))))
 			 "athlon-vector,athlon-fploadk8,(athlon-fvector*2)")
+(define_insn_reservation "athlon_sseicvt_cvtsi2ss_amdfam10" 14
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(and (eq_attr "mode" "SF,DF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
 ;; cvtsd2ss mem,reg is doublepath, troughput unknown, latency 9
 (define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_k8" 9
 			 (and (eq_attr "cpu" "k8,athlon,generic64")
@@ -752,6 +965,13 @@
 					(and (eq_attr "mode" "SF")
 					     (eq_attr "memory" "load")))))
 			 "athlon-double,athlon-fploadk8,(athlon-fstore*3)")
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
 ;; cvtsd2ss reg,reg is vectorpath, troughput unknown, latency 12
 (define_insn_reservation "athlon_ssecvt_cvtsd2ss" 12
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
@@ -760,6 +980,13 @@
 					(and (eq_attr "mode" "SF")
 					     (eq_attr "memory" "none")))))
 			 "athlon-vector,athlon-fpsched,(athlon-fvector*3)")
+(define_insn_reservation "athlon_ssecvt_cvtsd2ss_amdfam10" 8
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "vector")
+					(and (eq_attr "mode" "SF")
+					     (eq_attr "memory" "none")))))
+			 "athlon-vector,athlon-fpsched,athlon-faddmul,(athlon-fstore*2)")
 (define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_k8" 8
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
 			      (and (eq_attr "type" "ssecvt")
@@ -767,6 +994,13 @@
 					(and (eq_attr "mode" "V4SF,V2DF,TI")
 					     (eq_attr "memory" "load")))))
 			 "athlon-double,athlon-fpload2k8,(athlon-fstore*3)")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
 ;; cvtpd2ps mem,reg is vectorpath, troughput unknown, latency 10
 ;; ??? Why it is fater than cvtsd2ss?
 (define_insn_reservation "athlon_ssecvt_cvtpd2ps" 8
@@ -776,6 +1010,13 @@
 					(and (eq_attr "mode" "V4SF,V2DF,TI")
 					     (eq_attr "memory" "none")))))
 			 "athlon-vector,athlon-fpsched,athlon-fvector*2")
+(define_insn_reservation "athlon_ssecvt_cvtpd2ps_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssecvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "V4SF,V2DF,TI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
 ;; cvtsd2si mem,reg is doublepath, troughput 1, latency 9
 (define_insn_reservation "athlon_secvt_cvtsX2si_load" 9
 			 (and (eq_attr "cpu" "athlon,k8,generic64")
@@ -784,6 +1025,13 @@
 					(and (eq_attr "mode" "SI,DI")
 					     (eq_attr "memory" "load")))))
 			 "athlon-vector,athlon-fploadk8,athlon-fvector")
+(define_insn_reservation "athlon_secvt_cvtsX2si_load_amdfam10" 10
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-fadd+athlon-fstore)")
 ;; cvtsd2si reg,reg is doublepath, troughput 1, latency 9
 (define_insn_reservation "athlon_ssecvt_cvtsX2si" 9
 			 (and (eq_attr "cpu" "athlon")
@@ -799,6 +1047,29 @@
 					(and (eq_attr "mode" "SI,DI")
 					     (eq_attr "memory" "none")))))
 			 "athlon-double,athlon-fpsched,athlon-fstore")
+(define_insn_reservation "athlon_ssecvt_cvtsX2si_amdfam10" 8
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "SI,DI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,(athlon-fadd+athlon-fstore)")
+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 9 on amdfam10
+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_load_amdfam10" 9
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "TI")
+					     (eq_attr "memory" "load")))))
+			 "athlon-double,athlon-fploadk8,(athlon-faddmul+athlon-fstore)")
+;; cvtpd2dq reg,mem is doublepath, troughput 1, latency 7 on amdfam10
+(define_insn_reservation "athlon_sseicvt_cvtpd2dq_amdfam10" 7
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "sseicvt")
+				   (and (eq_attr "amdfam10_decode" "double")
+					(and (eq_attr "mode" "TI")
+					     (eq_attr "memory" "none")))))
+			 "athlon-double,athlon-fpsched,(athlon-faddmul+athlon-fstore)")
 
 
 (define_insn_reservation "athlon_ssemul_load" 4
@@ -808,13 +1079,13 @@
 					(eq_attr "memory" "load"))))
 			 "athlon-direct,athlon-fpload,athlon-fmul")
 (define_insn_reservation "athlon_ssemul_load_k8" 6
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "ssemul")
 				   (and (eq_attr "mode" "SF,DF")
 					(eq_attr "memory" "load"))))
 			 "athlon-direct,athlon-fploadk8,athlon-fmul")
 (define_insn_reservation "athlon_ssemul" 4
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "type" "ssemul")
 				   (eq_attr "mode" "SF,DF")))
 			 "athlon-direct,athlon-fpsched,athlon-fmul")
@@ -828,6 +1099,11 @@
 			      (and (eq_attr "type" "ssemul")
 				   (eq_attr "memory" "load")))
 			 "athlon-double,athlon-fpload2k8,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_load_amdfam10" 6
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssemul")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul")
 (define_insn_reservation "athlon_ssemulvector" 5
 			 (and (eq_attr "cpu" "athlon")
 			      (eq_attr "type" "ssemul"))
@@ -836,6 +1112,10 @@
 			 (and (eq_attr "cpu" "k8,generic64")
 			      (eq_attr "type" "ssemul"))
 			 "athlon-double,athlon-fpsched,(athlon-fmul*2)")
+(define_insn_reservation "athlon_ssemulvector_amdfam10" 4
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "ssemul"))
+			 "athlon-direct,athlon-fpsched,athlon-fmul")			 
 ;; divsd timings.  divss is faster
 (define_insn_reservation "athlon_ssediv_load" 20
 			 (and (eq_attr "cpu" "athlon")
@@ -844,13 +1124,13 @@
 					(eq_attr "memory" "load"))))
 			 "athlon-direct,athlon-fpload,athlon-fmul*17")
 (define_insn_reservation "athlon_ssediv_load_k8" 22
-			 (and (eq_attr "cpu" "k8,generic64")
+			 (and (eq_attr "cpu" "k8,generic64,amdfam10")
 			      (and (eq_attr "type" "ssediv")
 				   (and (eq_attr "mode" "SF,DF")
 					(eq_attr "memory" "load"))))
 			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")
 (define_insn_reservation "athlon_ssediv" 20
-			 (and (eq_attr "cpu" "athlon,k8,generic64")
+			 (and (eq_attr "cpu" "athlon,k8,generic64,amdfam10")
 			      (and (eq_attr "type" "ssediv")
 				   (eq_attr "mode" "SF,DF")))
 			 "athlon-direct,athlon-fpsched,athlon-fmul*17")
@@ -864,6 +1144,11 @@
 			      (and (eq_attr "type" "ssediv")
 				   (eq_attr "memory" "load")))
 			 "athlon-double,athlon-fpload2k8,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_load_amdfam10" 22
+			 (and (eq_attr "cpu" "amdfam10")
+			      (and (eq_attr "type" "ssediv")
+				   (eq_attr "memory" "load")))
+			 "athlon-direct,athlon-fploadk8,athlon-fmul*17")			 
 (define_insn_reservation "athlon_ssedivvector" 39
 			 (and (eq_attr "cpu" "athlon")
 			      (eq_attr "type" "ssediv"))
@@ -872,3 +1157,12 @@
 			 (and (eq_attr "cpu" "k8,generic64")
 			      (eq_attr "type" "ssediv"))
 			 "athlon-double,athlon-fmul*34")
+(define_insn_reservation "athlon_ssedivvector_amdfam10" 20
+			 (and (eq_attr "cpu" "amdfam10")
+			      (eq_attr "type" "ssediv"))
+			 "athlon-direct,athlon-fmul*17")
+(define_insn_reservation "athlon_sseins_amdfam10" 5
+                         (and (eq_attr "cpu" "amdfam10")
+                              (and (eq_attr "type" "sseins")
+                                   (eq_attr "mode" "TI")))
+                         "athlon-vector,athlon-fpsched,athlon-faddmul")
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index 7257431b1fd..54ed0fffd45 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -30,7 +30,11 @@
 #ifndef _EMMINTRIN_H_INCLUDED
 #define _EMMINTRIN_H_INCLUDED
 
-#ifdef __SSE2__
+#ifndef __SSE2__
+# error "SSE2 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE header files*/
 #include <xmmintrin.h>
 
 /* SSE2 */
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 94d2f11fefc..243a804ac8f 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -534,6 +534,71 @@ struct processor_costs k8_cost = {
   COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
 };
 
+struct processor_costs amdfam10_cost = {
+  COSTS_N_INSNS (1),                    /* cost of an add instruction */
+  COSTS_N_INSNS (2),                    /* cost of a lea instruction */
+  COSTS_N_INSNS (1),                    /* variable shift costs */
+  COSTS_N_INSNS (1),                    /* constant shift costs */
+  {COSTS_N_INSNS (3),                   /* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),                   /*                               HI */
+   COSTS_N_INSNS (3),                   /*                               SI */
+   COSTS_N_INSNS (4),                   /*                               DI */
+   COSTS_N_INSNS (5)},                  /*                               other */
+  0,                                    /* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),                  /* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),                  /*                          HI */
+   COSTS_N_INSNS (51),                  /*                          SI */
+   COSTS_N_INSNS (83),                  /*                          DI */
+   COSTS_N_INSNS (83)},                 /*                          other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,					/* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  					/* On K8
+  					    MOVD reg64, xmmreg 	Double	FSTORE 4
+					    MOVD reg32, xmmreg 	Double	FSTORE 4
+					   On AMDFAM10
+					    MOVD reg64, xmmreg 	Double	FADD 3
+                                                                1/1  1/1
+					    MOVD reg32, xmmreg 	Double	FADD 3
+                                                                1/1  1/1 */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  5,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+};
+
 static const
 struct processor_costs pentium4_cost = {
   COSTS_N_INSNS (1),			/* cost of an add instruction */
@@ -816,11 +881,13 @@ const struct processor_costs *ix86_cost = &pentium_cost;
 #define m_PENT4  (1<<PROCESSOR_PENTIUM4)
 #define m_K8  (1<<PROCESSOR_K8)
 #define m_ATHLON_K8  (m_K8 | m_ATHLON)
+#define m_AMDFAM10  (1<<PROCESSOR_AMDFAM10)
 #define m_NOCONA  (1<<PROCESSOR_NOCONA)
 #define m_CORE2  (1<<PROCESSOR_CORE2)
 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
+#define m_ATHLON_K8_AMDFAM10  (m_K8 | m_ATHLON | m_AMDFAM10)
 
 /* Generic instruction choice should be common subset of supported CPUs
    (PPro/PENT4/NOCONA/CORE2/Athlon/K8).  */
@@ -828,23 +895,31 @@ const struct processor_costs *ix86_cost = &pentium_cost;
 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
    Generic64 seems like good code size tradeoff.  We can't enable it for 32bit
    generic because it is not working well with PPro base chips.  */
-const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
-const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
+                          | m_GENERIC64;
+const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
+                            | m_NOCONA | m_CORE2 | m_GENERIC;
 const int x86_zero_extend_with_and = m_486 | m_PENT;
-const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
+/* Enable to zero extend integer registers to avoid partial dependencies */
+const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
+                     | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
 const int x86_double_with_add = ~m_386;
 const int x86_use_bit_test = m_386;
-const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
-const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA; 
+const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
+                              | m_K6 | m_CORE2 | m_GENERIC;
+const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
+                      | m_NOCONA;
 const int x86_fisttp = m_NOCONA;
-const int x86_3dnow_a = m_ATHLON_K8;
-const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
+const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
+                            | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
 /* Branch hints were put in P4 based on simulation result. But
    after P4 was made, no performance benefit was observed with
    branch hints. It also increases the code size. As the result,
    icc never generates branch hints.  */
 const int x86_branch_hints = 0;
-const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
+const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
+                         /*m_GENERIC | m_ATHLON_K8 ? */
 /* We probably ought to watch for partial register stalls on Generic32
    compilation setting as well.  However in current implementation the
    partial register stalls are not eliminated very well - they can
@@ -856,13 +931,16 @@ const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
 const int x86_partial_reg_stall = m_PPRO;
 const int x86_partial_flag_reg_stall =  m_CORE2 | m_GENERIC;
 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
-const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
+const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
+                                  | m_CORE2 | m_GENERIC);
 const int x86_use_mov0 = m_K6;
 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
 const int x86_read_modify_write = ~m_PENT;
 const int x86_read_modify = ~(m_PENT | m_PPRO);
 const int x86_split_long_moves = m_PPRO;
-const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
+const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
+                               | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
+                               /* m_PENT4 ? */
 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
 const int x86_qimode_math = ~(0);
@@ -872,18 +950,37 @@ const int x86_promote_qi_regs = 0;
    if our scheme for avoiding partial stalls was more effective.  */
 const int x86_himode_math = ~(m_PPRO);
 const int x86_promote_hi_regs = m_PPRO;
-const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
-const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
+const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
+                          | m_CORE2 | m_GENERIC;
+const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
+                          | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
+                          | m_CORE2 | m_GENERIC;
+const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
+                          | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+/* Enable if integer moves are preferred for DFmode copies */
+const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
+                                       | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
+const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
+                                       | m_CORE2 | m_GENERIC;
+const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
+                                      | m_CORE2 | m_GENERIC;
+/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
+   for outgoing arguments will be computed and placed into the variable
+   `current_function_outgoing_args_size'. No space will be pushed onto the stack
+   for each call; instead, the function prologue should increase the stack frame
+   size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
+   not proper. */
+const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
+                                         | m_NOCONA | m_PPRO | m_CORE2
+                                         | m_GENERIC;
 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
 const int x86_shift1 = ~m_486;
-const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
+                                           | m_ATHLON_K8_AMDFAM10 | m_PENT4
+                                           | m_NOCONA | m_CORE2 | m_GENERIC;
 /* In Generic model we have an confict here in between PPro/Pentium4 based chips
    that thread 128bit SSE registers as single units versus K8 based chips that
    divide SSE registers to two 64bit halves.
@@ -893,15 +990,66 @@ const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PEN
    this option on P4 brings over 20% SPECfp regression, while enabling it on
    K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
    of moves.  */
-const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
+const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
+                                           | m_GENERIC | m_AMDFAM10;
 /* Set for machines where the type and dependencies are resolved on SSE
    register parts instead of whole registers, so we may maintain just
    lower part of scalar values in proper format leaving the upper part
    undefined.  */
 const int x86_sse_split_regs = m_ATHLON_K8;
-const int x86_sse_typeless_stores = m_ATHLON_K8;
+/* Code generation for scalar reg-reg moves of single and double precision data:
+     if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
+       movaps reg, reg
+     else
+       movss reg, reg
+     if (x86_sse_partial_reg_dependency == true)
+       movapd reg, reg
+     else
+       movsd reg, reg
+
+   Code generation for scalar loads of double precision data:
+     if (x86_sse_split_regs == true)
+       movlpd mem, reg      (gas syntax)
+     else
+       movsd mem, reg
+
+   Code generation for unaligned packed loads of single precision data
+   (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
+     if (x86_sse_unaligned_move_optimal)
+       movups mem, reg
+
+     if (x86_sse_partial_reg_dependency == true)
+       {
+         xorps  reg, reg
+         movlps mem, reg
+         movhps mem+8, reg
+       }
+     else
+       {
+         movlps mem, reg
+         movhps mem+8, reg
+       }
+
+   Code generation for unaligned packed loads of double precision data
+   (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
+     if (x86_sse_unaligned_move_optimal)
+       movupd mem, reg
+
+     if (x86_sse_split_regs == true)
+       {
+         movlpd mem, reg
+         movhpd mem+8, reg
+       }
+     else
+       {
+         movsd  mem, reg
+         movhpd mem+8, reg
+       }
+ */
+const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
+const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
-const int x86_use_ffreep = m_ATHLON_K8;
+const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
 const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6_GEODE | m_CORE2;
 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
 
@@ -909,19 +1057,22 @@ const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
    integer data in xmm registers.  Which results in pretty abysmal code.  */
 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
 
-const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4 | m_NOCONA | m_PPRO | m_GENERIC32;
+const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON | m_PENT4
+                                    | m_NOCONA | m_PPRO | m_GENERIC32;
 /* Some CPU cores are not able to predict more than 4 branch instructions in
    the 16 byte window.  */
-const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
-const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
-const int x86_use_bt = m_ATHLON_K8;
+const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
+                                | m_NOCONA | m_CORE2 | m_GENERIC;
+const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
+                         | m_CORE2 | m_GENERIC;
+const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
 /* Compare and exchange was added for 80486.  */
 const int x86_cmpxchg = ~m_386;
 /* Compare and exchange 8 bytes was added for pentium.  */
 const int x86_cmpxchg8b = ~(m_386 | m_486);
 /* Exchange and add was added for 80486.  */
 const int x86_xadd = ~m_386;
-const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
+const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
 
 /* In case the average insn count for single function invocation is
    lower than this constant, emit fast (but longer) prologue and
@@ -1485,16 +1636,24 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
     case OPT_msse:
       if (!value)
 	{
-	  target_flags &= ~(MASK_SSE2 | MASK_SSE3);
-	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
+	  target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
+	  target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
 	}
       return true;
 
     case OPT_msse2:
       if (!value)
 	{
-	  target_flags &= ~MASK_SSE3;
-	  target_flags_explicit |= MASK_SSE3;
+	  target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
+	  target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
+	}
+      return true;
+
+    case OPT_msse3:
+      if (!value)
+	{
+	  target_flags &= ~MASK_SSE4A;
+	  target_flags_explicit |= MASK_SSE4A;
 	}
       return true;
 
@@ -1546,7 +1705,8 @@ override_options (void)
       {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
       {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
       {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
-      {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
+      {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
+      {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
     };
 
   static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
@@ -1565,7 +1725,10 @@ override_options (void)
 	  PTA_3DNOW_A = 64,
 	  PTA_64BIT = 128,
 	  PTA_SSSE3 = 256,
-	  PTA_CX16 = 512
+	  PTA_CX16 = 512,
+	  PTA_POPCNT = 1024,
+	  PTA_ABM = 2048,
+ 	  PTA_SSE4A = 4096
 	} flags;
     }
   const processor_alias_table[] =
@@ -1621,6 +1784,10 @@ override_options (void)
 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
       {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
 				      | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
+      {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
+                                       | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
+                                       | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
+                                       | PTA_ABM | PTA_SSE4A | PTA_CX16},
       {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch.  */ },
       {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch.  */ },
     };
@@ -1772,6 +1939,15 @@ override_options (void)
 	  x86_prefetch_sse = true;
 	if (processor_alias_table[i].flags & PTA_CX16)
 	  x86_cmpxchg16b = true;
+	if (processor_alias_table[i].flags & PTA_POPCNT
+	    && !(target_flags_explicit & MASK_POPCNT))
+	  target_flags |= MASK_POPCNT;
+	if (processor_alias_table[i].flags & PTA_ABM
+	    && !(target_flags_explicit & MASK_ABM))
+	  target_flags |= MASK_ABM;
+	if (processor_alias_table[i].flags & PTA_SSE4A
+	    && !(target_flags_explicit & MASK_SSE4A))
+	  target_flags |= MASK_SSE4A;
 	if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
 	  error ("CPU you selected does not support x86-64 "
 		 "instruction set");
@@ -1963,6 +2139,10 @@ override_options (void)
   if (TARGET_SSSE3)
     target_flags |= MASK_SSE3;
 
+  /* Turn on SSE3 builtins for -msse4a.  */
+  if (TARGET_SSE4A)
+    target_flags |= MASK_SSE3;
+
   /* Turn on SSE2 builtins for -msse3.  */
   if (TARGET_SSE3)
     target_flags |= MASK_SSE2;
@@ -1982,6 +2162,10 @@ override_options (void)
   if (TARGET_3DNOW)
     target_flags |= MASK_MMX;
 
+  /* Turn on POPCNT builtins for -mabm.  */
+  if (TARGET_ABM)
+    target_flags |= MASK_POPCNT;
+
   if (TARGET_64BIT)
     {
       if (TARGET_ALIGN_DOUBLE)
@@ -8900,8 +9084,16 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
 	}
 
       if (TARGET_SSE2 && mode == V2DFmode)
-	{
-	  rtx zero;
+        {
+          rtx zero;
+
+          if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
+            {
+              op0 = gen_lowpart (V2DFmode, op0);
+              op1 = gen_lowpart (V2DFmode, op1);
+              emit_insn (gen_sse2_movupd (op0, op1));
+              return;
+            }
 
 	  /* When SSE registers are split into halves, we can avoid
 	     writing to the top half twice.  */
@@ -8929,7 +9121,15 @@ ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
 	  emit_insn (gen_sse2_loadhpd (op0, op0, m));
 	}
       else
-	{
+        {
+          if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
+            {
+              op0 = gen_lowpart (V4SFmode, op0);
+              op1 = gen_lowpart (V4SFmode, op1);
+              emit_insn (gen_sse_movups (op0, op1));
+              return;
+            }
+
 	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
 	    emit_move_insn (op0, CONST0_RTX (mode));
 	  else
@@ -13267,6 +13467,9 @@ assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
 
   gcc_assert (n < MAX_386_STACK_LOCALS);
 
+  /* Virtual slot is valid only before vregs are instantiated.  */
+  gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
+
   for (s = ix86_stack_locals; s; s = s->next)
     if (s->mode == mode && s->n == n)
       return s->rtl;
@@ -13461,6 +13664,7 @@ ix86_issue_rate (void)
     case PROCESSOR_PENTIUM4:
     case PROCESSOR_ATHLON:
     case PROCESSOR_K8:
+    case PROCESSOR_AMDFAM10:
     case PROCESSOR_NOCONA:
     case PROCESSOR_GENERIC32:
     case PROCESSOR_GENERIC64:
@@ -13659,6 +13863,7 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
 
     case PROCESSOR_ATHLON:
     case PROCESSOR_K8:
+    case PROCESSOR_AMDFAM10:
     case PROCESSOR_GENERIC32:
     case PROCESSOR_GENERIC64:
       memory = get_attr_memory (insn);
@@ -14370,6 +14575,14 @@ enum ix86_builtins
   IX86_BUILTIN_PABSW128,
   IX86_BUILTIN_PABSD128,
 
+  /* AMDFAM10 - SSE4A New Instructions.  */
+  IX86_BUILTIN_MOVNTSD,
+  IX86_BUILTIN_MOVNTSS,
+  IX86_BUILTIN_EXTRQI,
+  IX86_BUILTIN_EXTRQ,
+  IX86_BUILTIN_INSERTQI,
+  IX86_BUILTIN_INSERTQ,
+
   IX86_BUILTIN_VEC_INIT_V2SI,
   IX86_BUILTIN_VEC_INIT_V4HI,
   IX86_BUILTIN_VEC_INIT_V8QI,
@@ -15096,6 +15309,18 @@ ix86_init_mmx_sse_builtins (void)
     = build_function_type_list (void_type_node,
 			        pchar_type_node, V16QI_type_node, NULL_TREE);
 
+  tree v2di_ftype_v2di_unsigned_unsigned
+    = build_function_type_list (V2DI_type_node, V2DI_type_node,
+                                unsigned_type_node, unsigned_type_node,
+                                NULL_TREE);
+  tree v2di_ftype_v2di_v2di_unsigned_unsigned
+    = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
+                                unsigned_type_node, unsigned_type_node,
+                                NULL_TREE);
+  tree v2di_ftype_v2di_v16qi
+    = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
+                                NULL_TREE);
+
   tree float80_type;
   tree float128_type;
   tree ftype;
@@ -15429,6 +15654,20 @@ ix86_init_mmx_sse_builtins (void)
   def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
 	       IX86_BUILTIN_PALIGNR);
 
+  /* AMDFAM10 SSE4A New built-ins  */
+  def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd", 
+               void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
+  def_builtin (MASK_SSE4A, "__builtin_ia32_movntss", 
+               void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
+  def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi", 
+               v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
+  def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
+               v2di_ftype_v2di_v16qi,  IX86_BUILTIN_EXTRQ);
+  def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
+               v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
+  def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
+               v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
+
   /* Access to the vec_init patterns.  */
   ftype = build_function_type_list (V2SI_type_node, integer_type_node,
 				    integer_type_node, NULL_TREE);
@@ -15921,9 +16160,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
   enum insn_code icode;
   tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
   tree arglist = TREE_OPERAND (exp, 1);
-  tree arg0, arg1, arg2;
-  rtx op0, op1, op2, pat;
-  enum machine_mode tmode, mode0, mode1, mode2, mode3;
+  tree arg0, arg1, arg2, arg3;
+  rtx op0, op1, op2, op3, pat;
+  enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
   unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
 
   switch (fcode)
@@ -16035,13 +16274,13 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
 
     case IX86_BUILTIN_LDMXCSR:
       op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
-      target = assign_386_stack_local (SImode, SLOT_TEMP);
+      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
       emit_move_insn (target, op0);
       emit_insn (gen_sse_ldmxcsr (target));
       return 0;
 
     case IX86_BUILTIN_STMXCSR:
-      target = assign_386_stack_local (SImode, SLOT_TEMP);
+      target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
       emit_insn (gen_sse_stmxcsr (target));
       return copy_to_mode_reg (SImode, target);
 
@@ -16436,6 +16675,114 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       emit_insn (pat);
       return target;
 
+    case IX86_BUILTIN_MOVNTSD:
+      return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
+
+    case IX86_BUILTIN_MOVNTSS:
+      return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
+
+    case IX86_BUILTIN_INSERTQ:
+    case IX86_BUILTIN_EXTRQ:
+      icode = (fcode == IX86_BUILTIN_EXTRQ
+               ? CODE_FOR_sse4a_extrq
+               : CODE_FOR_sse4a_insertq);
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+        op0 = copy_to_mode_reg (mode1, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+        op1 = copy_to_mode_reg (mode2, op1);
+      if (optimize || target == 0
+          || GET_MODE (target) != tmode
+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+        target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1);
+      if (! pat)
+        return NULL_RTX;
+      emit_insn (pat);
+      return target;
+
+    case IX86_BUILTIN_EXTRQI:
+      icode = CODE_FOR_sse4a_extrqi;
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+      mode3 = insn_data[icode].operand[3].mode;
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+        op0 = copy_to_mode_reg (mode1, op0);
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+        {
+          error ("index mask must be an immediate");
+          return gen_reg_rtx (tmode);
+        }
+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+        {
+          error ("length mask must be an immediate");
+          return gen_reg_rtx (tmode);
+        }
+      if (optimize || target == 0
+          || GET_MODE (target) != tmode
+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+        target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2);
+      if (! pat)
+        return NULL_RTX;
+      emit_insn (pat);
+      return target;
+
+    case IX86_BUILTIN_INSERTQI:
+      icode = CODE_FOR_sse4a_insertqi;
+      arg0 = TREE_VALUE (arglist);
+      arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+      arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+      arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+      op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
+      op3 = expand_expr (arg3, NULL_RTX, VOIDmode, 0);
+      tmode = insn_data[icode].operand[0].mode;
+      mode1 = insn_data[icode].operand[1].mode;
+      mode2 = insn_data[icode].operand[2].mode;
+      mode3 = insn_data[icode].operand[3].mode;
+      mode4 = insn_data[icode].operand[4].mode;
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
+        op0 = copy_to_mode_reg (mode1, op0);
+
+      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
+        op1 = copy_to_mode_reg (mode2, op1);
+
+      if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
+        {
+          error ("index mask must be an immediate");
+          return gen_reg_rtx (tmode);
+        }
+      if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
+        {
+          error ("length mask must be an immediate");
+          return gen_reg_rtx (tmode);
+        }
+      if (optimize || target == 0
+          || GET_MODE (target) != tmode
+          || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+        target = gen_reg_rtx (tmode);
+      pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
+      if (! pat)
+        return NULL_RTX;
+      emit_insn (pat);
+      return target;
+
     case IX86_BUILTIN_VEC_INIT_V2SI:
     case IX86_BUILTIN_VEC_INIT_V4HI:
     case IX86_BUILTIN_VEC_INIT_V8QI:
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index a7ed29ee88f..778eb9f4d68 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -141,6 +141,7 @@ extern const struct processor_costs *ix86_cost;
 #define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32)
 #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64)
 #define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
+#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
 
 #define TUNEMASK (1 << ix86_tune)
 extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
@@ -159,6 +160,7 @@ extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
 extern const int x86_epilogue_using_move, x86_decompose_lea;
 extern const int x86_arch_always_fancy_math_387, x86_shift1;
 extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs;
+extern const int x86_sse_unaligned_move_optimal;
 extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
 extern const int x86_use_ffreep;
 extern const int x86_inter_unit_moves, x86_schedule;
@@ -208,6 +210,8 @@ extern int x86_prefetch_sse, x86_cmpxchg16b;
 #define TARGET_PARTIAL_REG_DEPENDENCY (x86_partial_reg_dependency & TUNEMASK)
 #define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
 				      (x86_sse_partial_reg_dependency & TUNEMASK)
+#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
+				      (x86_sse_unaligned_move_optimal & TUNEMASK)
 #define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & TUNEMASK)
 #define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & TUNEMASK)
 #define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & TUNEMASK)
@@ -376,6 +380,8 @@ extern int x86_prefetch_sse, x86_cmpxchg16b;
 	}							\
       else if (TARGET_K8)					\
 	builtin_define ("__tune_k8__");				\
+      else if (TARGET_AMDFAM10)					\
+	builtin_define ("__tune_amdfam10__");			\
       else if (TARGET_PENTIUM4)					\
 	builtin_define ("__tune_pentium4__");			\
       else if (TARGET_NOCONA)					\
@@ -400,6 +406,8 @@ extern int x86_prefetch_sse, x86_cmpxchg16b;
 	  builtin_define ("__SSSE3__");				\
 	  builtin_define ("__MNI__");				\
 	}							\
+      if (TARGET_SSE4A)						\
+	builtin_define ("__SSE4A__");		                \
       if (TARGET_SSE_MATH && TARGET_SSE)			\
 	builtin_define ("__SSE_MATH__");			\
       if (TARGET_SSE_MATH && TARGET_SSE2)			\
@@ -455,6 +463,11 @@ extern int x86_prefetch_sse, x86_cmpxchg16b;
 	  builtin_define ("__k8");				\
 	  builtin_define ("__k8__");				\
 	}							\
+      else if (ix86_arch == PROCESSOR_AMDFAM10)			\
+	{							\
+	  builtin_define ("__amdfam10");			\
+	  builtin_define ("__amdfam10__");			\
+	}							\
       else if (ix86_arch == PROCESSOR_PENTIUM4)			\
 	{							\
 	  builtin_define ("__pentium4");			\
@@ -493,13 +506,14 @@ extern int x86_prefetch_sse, x86_cmpxchg16b;
 #define TARGET_CPU_DEFAULT_nocona 17
 #define TARGET_CPU_DEFAULT_core2 18
 #define TARGET_CPU_DEFAULT_generic 19
+#define TARGET_CPU_DEFAULT_amdfam10 20
 
 #define TARGET_CPU_DEFAULT_NAMES {"i386", "i486", "pentium", "pentium-mmx",\
 				  "pentiumpro", "pentium2", "pentium3", \
 				  "pentium4", "geode", "k6", "k6-2", "k6-3", \
 				  "athlon", "athlon-4", "k8", \
 				  "pentium-m", "prescott", "nocona", \
-				  "core2", "generic"}
+				  "core2", "generic", "amdfam10"}
 
 #ifndef CC1_SPEC
 #define CC1_SPEC "%(cc1_cpu) "
@@ -2162,6 +2176,7 @@ enum processor_type
   PROCESSOR_CORE2,
   PROCESSOR_GENERIC32,
   PROCESSOR_GENERIC64,
+  PROCESSOR_AMDFAM10,
   PROCESSOR_max
 };
 
@@ -2243,7 +2258,8 @@ enum ix86_entity
 
 enum ix86_stack_slot 
 {
-  SLOT_TEMP = 0,
+  SLOT_VIRTUAL = 0,
+  SLOT_TEMP,
   SLOT_CW_STORED,
   SLOT_CW_TRUNC,
   SLOT_CW_FLOOR,
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 78f74ee344c..e8b72969710 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -151,6 +151,12 @@
    (UNSPEC_PSHUFB		120)
    (UNSPEC_PSIGN		121)
    (UNSPEC_PALIGNR		122)
+
+   ; For SSE4A support
+   (UNSPEC_EXTRQI               130)
+   (UNSPEC_EXTRQ                131)   
+   (UNSPEC_INSERTQI             132)
+   (UNSPEC_INSERTQ              133)
   ])
 
 (define_constants
@@ -190,7 +196,8 @@
 
 ;; Processor type.  This attribute must exactly match the processor_type
 ;; enumeration in i386.h.
-(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,nocona,core2,generic32,generic64"
+(define_attr "cpu" "i386,i486,pentium,pentiumpro,geode,k6,athlon,pentium4,k8,
+                    nocona,core2,generic32,generic64,amdfam10"
   (const (symbol_ref "ix86_tune")))
 
 ;; A basic instruction type.  Refinements due to arguments to be
@@ -201,10 +208,10 @@
    incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
    icmp,test,ibr,setcc,icmov,
    push,pop,call,callv,leave,
-   str,cld,
+   str,bitmanip,cld,
    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint,
    sselog,sselog1,sseiadd,sseishft,sseimul,
-   sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,
+   sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins,
    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
   (const_string "other"))
 
@@ -218,7 +225,7 @@
   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,fxch,fistp,fisttp,frndint")
 	   (const_string "i387")
 	 (eq_attr "type" "sselog,sselog1,sseiadd,sseishft,sseimul,
-			  sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv")
+			  sse,ssemov,sseadd,ssemul,ssecmp,ssecomi,ssecvt,sseicvt,ssediv,sseins")
 	   (const_string "sse")
 	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
 	   (const_string "mmx")
@@ -228,7 +235,8 @@
 
 ;; The (bounding maximum) length of an instruction immediate.
 (define_attr "length_immediate" ""
-  (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave")
+  (cond [(eq_attr "type" "incdec,setcc,icmov,str,cld,lea,other,multi,idiv,leave,
+			  bitmanip")
 	   (const_int 0)
 	 (eq_attr "unit" "i387,sse,mmx")
 	   (const_int 0)
@@ -282,7 +290,7 @@
 ;; Set when 0f opcode prefix is used.
 (define_attr "prefix_0f" ""
   (if_then_else 
-    (ior (eq_attr "type" "imovx,setcc,icmov")
+    (ior (eq_attr "type" "imovx,setcc,icmov,bitmanip")
 	 (eq_attr "unit" "sse,mmx"))
     (const_int 1)
     (const_int 0)))
@@ -407,7 +415,7 @@
 	   (const_string "load")
 	 (and (eq_attr "type"
 		 "!alu1,negnot,ishift1,
-		   imov,imovx,icmp,test,
+		   imov,imovx,icmp,test,bitmanip,
 		   fmov,fcmp,fsgn,
 		   sse,ssemov,ssecmp,ssecomi,ssecvt,sseicvt,sselog1,
 		   mmx,mmxmov,mmxcmp,mmxcvt")
@@ -961,10 +969,11 @@
   "sahf"
   [(set_attr "length" "1")
    (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")
    (set_attr "mode" "SI")])
 
 ;; Pentium Pro can do steps 1 through 3 in one go.
-
+;; comi*, ucomi*, fcomi*, ficomi*,fucomi* (i387 instructions set condition codes) 
 (define_insn "*cmpfp_i_mixed"
   [(set (reg:CCFP FLAGS_REG)
 	(compare:CCFP (match_operand 0 "register_operand" "f#x,x#f")
@@ -978,7 +987,8 @@
      (if_then_else (match_operand:SF 1 "" "")
         (const_string "SF")
         (const_string "DF")))
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
 
 (define_insn "*cmpfp_i_sse"
   [(set (reg:CCFP FLAGS_REG)
@@ -993,7 +1003,8 @@
      (if_then_else (match_operand:SF 1 "" "")
         (const_string "SF")
         (const_string "DF")))
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
 
 (define_insn "*cmpfp_i_i387"
   [(set (reg:CCFP FLAGS_REG)
@@ -1012,7 +1023,8 @@
 	      (const_string "DF")
 	   ]
 	   (const_string "XF")))
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
 
 (define_insn "*cmpfp_iu_mixed"
   [(set (reg:CCFPU FLAGS_REG)
@@ -1027,7 +1039,8 @@
      (if_then_else (match_operand:SF 1 "" "")
         (const_string "SF")
         (const_string "DF")))
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
 
 (define_insn "*cmpfp_iu_sse"
   [(set (reg:CCFPU FLAGS_REG)
@@ -1042,7 +1055,8 @@
      (if_then_else (match_operand:SF 1 "" "")
         (const_string "SF")
         (const_string "DF")))
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
 
 (define_insn "*cmpfp_iu_387"
   [(set (reg:CCFPU FLAGS_REG)
@@ -1061,7 +1075,8 @@
 	      (const_string "DF")
 	   ]
 	   (const_string "XF")))
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "direct")])
 
 ;; Move instructions.
 
@@ -1267,7 +1282,8 @@
   [(set_attr "type" "imov")
    (set_attr "mode" "SI")
    (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")])   
 
 (define_expand "movhi"
   [(set (match_operand:HI 0 "nonimmediate_operand" "")
@@ -1384,8 +1400,10 @@
   [(set_attr "type" "imov")
    (set_attr "mode" "SI")
    (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")])   
 
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
 (define_insn "*swaphi_2"
   [(set (match_operand:HI 0 "register_operand" "+r")
 	(match_operand:HI 1 "register_operand" "+r"))
@@ -1558,8 +1576,10 @@
   [(set_attr "type" "imov")
    (set_attr "mode" "SI")
    (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")])   
 
+;; Not added amdfam10_decode since TARGET_PARTIAL_REG_STALL is disabled for AMDFAM10
 (define_insn "*swapqi_2"
   [(set (match_operand:QI 0 "register_operand" "+q")
 	(match_operand:QI 1 "register_operand" "+q"))
@@ -2113,7 +2133,8 @@
   [(set_attr "type" "imov")
    (set_attr "mode" "DI")
    (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "double")])   
 
 (define_expand "movti"
   [(set (match_operand:TI 0 "nonimmediate_operand" "")
@@ -3694,7 +3715,7 @@
     ;
   else
     {
-      rtx temp = assign_386_stack_local (SFmode, SLOT_TEMP);
+      rtx temp = assign_386_stack_local (SFmode, SLOT_VIRTUAL);
       emit_insn (gen_truncdfsf2_with_temp (operands[0], operands[1], temp));
       DONE;
     }
@@ -3846,7 +3867,7 @@
       DONE;
     }
   else
-    operands[2] = assign_386_stack_local (SFmode, SLOT_TEMP);
+    operands[2] = assign_386_stack_local (SFmode, SLOT_VIRTUAL);
 })
 
 (define_insn "*truncxfsf2_mixed"
@@ -3944,7 +3965,7 @@
       DONE;
     }
   else
-    operands[2] = assign_386_stack_local (DFmode, SLOT_TEMP);
+    operands[2] = assign_386_stack_local (DFmode, SLOT_VIRTUAL);
 })
 
 (define_insn "*truncxfdf2_mixed"
@@ -4122,7 +4143,8 @@
   "cvttss2si{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "double,vector")])
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")])
 
 (define_insn "fix_truncdfdi_sse"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
@@ -4131,7 +4153,8 @@
   "cvttsd2si{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,vector")])
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")])
 
 (define_insn "fix_truncsfsi_sse"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -4140,7 +4163,8 @@
   "cvttss2si\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,vector")])
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")])
 
 (define_insn "fix_truncdfsi_sse"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -4149,7 +4173,8 @@
   "cvttsd2si\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,vector")])
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")])
 
 ;; Avoid vector decoded forms of the instruction.
 (define_peephole2
@@ -4410,7 +4435,8 @@
   [(set_attr "length" "2")
    (set_attr "mode" "HI")
    (set_attr "unit" "i387")
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")])   
 
 ;; Conversion between fixed point and floating point.
 
@@ -4461,6 +4487,7 @@
    (set_attr "mode" "SF")
    (set_attr "unit" "*,i387,*,*")
    (set_attr "athlon_decode" "*,*,vector,double")
+   (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatsisf2_sse"
@@ -4471,6 +4498,7 @@
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "SF")
    (set_attr "athlon_decode" "vector,double")
+   (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatsisf2_i387"
@@ -4504,6 +4532,7 @@
    (set_attr "mode" "SF")
    (set_attr "unit" "*,i387,*,*")
    (set_attr "athlon_decode" "*,*,vector,double")
+   (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatdisf2_sse"
@@ -4514,6 +4543,7 @@
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "SF")
    (set_attr "athlon_decode" "vector,double")
+   (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatdisf2_i387"
@@ -4572,6 +4602,7 @@
    (set_attr "mode" "DF")
    (set_attr "unit" "*,i387,*,*")
    (set_attr "athlon_decode" "*,*,double,direct")
+   (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatsidf2_sse"
@@ -4582,6 +4613,7 @@
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "DF")
    (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatsidf2_i387"
@@ -4615,6 +4647,7 @@
    (set_attr "mode" "DF")
    (set_attr "unit" "*,i387,*,*")
    (set_attr "athlon_decode" "*,*,double,direct")
+   (set_attr "amdfam10_decode" "*,*,vector,double")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatdidf2_sse"
@@ -4625,6 +4658,7 @@
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "DF")
    (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")
    (set_attr "fp_int_src" "true")])
 
 (define_insn "*floatdidf2_i387"
@@ -6832,6 +6866,14 @@
   "TARGET_64BIT"
   "")
 
+;; On AMDFAM10 
+;; IMUL reg64, reg64, imm8 	Direct
+;; IMUL reg64, mem64, imm8 	VectorPath
+;; IMUL reg64, reg64, imm32 	Direct
+;; IMUL reg64, mem64, imm32 	VectorPath 
+;; IMUL reg64, reg64 		Direct
+;; IMUL reg64, mem64 		Direct
+
 (define_insn "*muldi3_1_rex64"
   [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(mult:DI (match_operand:DI 1 "nonimmediate_operand" "%rm,rm,0")
@@ -6854,6 +6896,11 @@
 		    (match_operand 1 "memory_operand" ""))
 		  (const_string "vector")]
 	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))	      
    (set_attr "mode" "DI")])
 
 (define_expand "mulsi3"
@@ -6864,6 +6911,14 @@
   ""
   "")
 
+;; On AMDFAM10 
+;; IMUL reg32, reg32, imm8 	Direct
+;; IMUL reg32, mem32, imm8 	VectorPath
+;; IMUL reg32, reg32, imm32 	Direct
+;; IMUL reg32, mem32, imm32 	VectorPath
+;; IMUL reg32, reg32 		Direct
+;; IMUL reg32, mem32 		Direct
+
 (define_insn "*mulsi3_1"
   [(set (match_operand:SI 0 "register_operand" "=r,r,r")
 	(mult:SI (match_operand:SI 1 "nonimmediate_operand" "%rm,rm,0")
@@ -6885,6 +6940,11 @@
 		    (match_operand 1 "memory_operand" ""))
 		  (const_string "vector")]
 	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))	      
    (set_attr "mode" "SI")])
 
 (define_insn "*mulsi3_1_zext"
@@ -6910,6 +6970,11 @@
 		    (match_operand 1 "memory_operand" ""))
 		  (const_string "vector")]
 	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(and (eq_attr "alternative" "0,1")
+		    (match_operand 1 "memory_operand" ""))
+		  (const_string "vector")]
+	      (const_string "direct")))	      
    (set_attr "mode" "SI")])
 
 (define_expand "mulhi3"
@@ -6920,6 +6985,13 @@
   "TARGET_HIMODE_MATH"
   "")
 
+;; On AMDFAM10
+;; IMUL reg16, reg16, imm8 	VectorPath
+;; IMUL reg16, mem16, imm8 	VectorPath
+;; IMUL reg16, reg16, imm16 	VectorPath
+;; IMUL reg16, mem16, imm16 	VectorPath
+;; IMUL reg16, reg16 		Direct
+;; IMUL reg16, mem16 		Direct
 (define_insn "*mulhi3_1"
   [(set (match_operand:HI 0 "register_operand" "=r,r,r")
 	(mult:HI (match_operand:HI 1 "nonimmediate_operand" "%rm,rm,0")
@@ -6938,6 +7010,10 @@
 	       (eq_attr "alternative" "1,2")
 		  (const_string "vector")]
 	      (const_string "direct")))
+   (set (attr "amdfam10_decode")
+	(cond [(eq_attr "alternative" "0,1")
+		  (const_string "vector")]
+	      (const_string "direct")))
    (set_attr "mode" "HI")])
 
 (define_expand "mulqi3"
@@ -6948,6 +7024,10 @@
   "TARGET_QIMODE_MATH"
   "")
 
+;;On AMDFAM10
+;; MUL reg8 	Direct
+;; MUL mem8 	Direct
+
 (define_insn "*mulqi3_1"
   [(set (match_operand:QI 0 "register_operand" "=a")
 	(mult:QI (match_operand:QI 1 "nonimmediate_operand" "%0")
@@ -6962,6 +7042,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")        
    (set_attr "mode" "QI")])
 
 (define_expand "umulqihi3"
@@ -6988,6 +7069,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")        
    (set_attr "mode" "QI")])
 
 (define_expand "mulqihi3"
@@ -7012,6 +7094,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "direct")))
+   (set_attr "amdfam10_decode" "direct")        
    (set_attr "mode" "QI")])
 
 (define_expand "umulditi3"
@@ -7038,6 +7121,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "double")))
+   (set_attr "amdfam10_decode" "double")        
    (set_attr "mode" "DI")])
 
 ;; We can't use this pattern in 64bit mode, since it results in two separate 32bit registers
@@ -7065,6 +7149,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "double")))
+   (set_attr "amdfam10_decode" "double")        
    (set_attr "mode" "SI")])
 
 (define_expand "mulditi3"
@@ -7091,6 +7176,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
    (set_attr "mode" "DI")])
 
 (define_expand "mulsidi3"
@@ -7117,6 +7203,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "double")))
+   (set_attr "amdfam10_decode" "double")        
    (set_attr "mode" "SI")])
 
 (define_expand "umuldi3_highpart"
@@ -7153,6 +7240,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "double")))
+   (set_attr "amdfam10_decode" "double")        
    (set_attr "mode" "DI")])
 
 (define_expand "umulsi3_highpart"
@@ -7188,6 +7276,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
    (set_attr "mode" "SI")])
 
 (define_insn "*umulsi3_highpart_zext"
@@ -7210,6 +7299,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
    (set_attr "mode" "SI")])
 
 (define_expand "smuldi3_highpart"
@@ -7245,6 +7335,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
    (set_attr "mode" "DI")])
 
 (define_expand "smulsi3_highpart"
@@ -7279,6 +7370,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
    (set_attr "mode" "SI")])
 
 (define_insn "*smulsi3_highpart_zext"
@@ -7300,6 +7392,7 @@
      (if_then_else (eq_attr "cpu" "athlon")
         (const_string "vector")
         (const_string "double")))
+   (set_attr "amdfam10_decode" "double")
    (set_attr "mode" "SI")])
 
 ;; The patterns that match these are at the end of this file.
@@ -10281,7 +10374,8 @@
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")])   
 
 (define_expand "x86_64_shift_adj"
   [(set (reg:CCZ FLAGS_REG)
@@ -10496,7 +10590,8 @@
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "SI")
    (set_attr "pent_pair" "np")
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")])   
 
 (define_expand "x86_shift_adj_1"
   [(set (reg:CCZ FLAGS_REG)
@@ -11256,7 +11351,8 @@
   [(set_attr "type" "ishift")
    (set_attr "prefix_0f" "1")
    (set_attr "mode" "DI")
-   (set_attr "athlon_decode" "vector")])
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")])   
 
 (define_expand "ashrdi3"
   [(set (match_operand:DI 0 "shiftdi_operand" "")
@@ -14520,7 +14616,23 @@
      [(set (match_dup 0) (xor:SI (match_dup 0) (const_int 31)))
       (clobber (reg:CC FLAGS_REG))])]
   ""
-  "")
+{
+  if (TARGET_ABM)
+    {
+      emit_insn (gen_clzsi2_abm (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "clzsi2_abm"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (clz:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ABM"
+  "lzcnt{l}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
 
 (define_insn "*bsr"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -14529,7 +14641,44 @@
    (clobber (reg:CC FLAGS_REG))]
   ""
   "bsr{l}\t{%1, %0|%0, %1}"
-  [(set_attr "prefix_0f" "1")])
+  [(set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")])
+
+(define_insn "popcountsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(popcount:SI (match_operand:SI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT"
+  "popcnt{l}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
+(define_insn "*popcountsi2_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(popcount:SI (match_dup 1)))]
+  "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+  "popcnt{l}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
+(define_insn "*popcountsi2_cmp_zext"
+  [(set (reg FLAGS_REG)
+        (compare
+          (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))
+          (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+        (zero_extend:DI(popcount:SI (match_dup 1))))]
+  "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+  "popcnt{l}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
 
 (define_expand "clzdi2"
   [(parallel
@@ -14541,7 +14690,23 @@
      [(set (match_dup 0) (xor:DI (match_dup 0) (const_int 63)))
       (clobber (reg:CC FLAGS_REG))])]
   "TARGET_64BIT"
-  "")
+{
+  if (TARGET_ABM)
+    {
+      emit_insn (gen_clzdi2_abm (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "clzdi2_abm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(clz:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_ABM"
+  "lzcnt{q}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "DI")])
 
 (define_insn "*bsr_rex64"
   [(set (match_operand:DI 0 "register_operand" "=r")
@@ -14550,7 +14715,92 @@
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
   "bsr{q}\t{%1, %0|%0, %1}"
-  [(set_attr "prefix_0f" "1")])
+  [(set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")])
+
+(define_insn "popcountdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(popcount:DI (match_operand:DI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_POPCNT"
+  "popcnt{q}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "DI")])
+
+(define_insn "*popcountdi2_cmp"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (popcount:DI (match_operand:DI 1 "nonimmediate_operand" "rm"))
+	  (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(popcount:DI (match_dup 1)))]
+  "TARGET_64BIT && TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+  "popcnt{q}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "DI")])
+
+(define_expand "clzhi2"
+  [(parallel
+     [(set (match_operand:HI 0 "register_operand" "")
+	   (minus:HI (const_int 15)
+		     (clz:HI (match_operand:HI 1 "nonimmediate_operand" ""))))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 0) (xor:HI (match_dup 0) (const_int 15)))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (TARGET_ABM)
+    {
+      emit_insn (gen_clzhi2_abm (operands[0], operands[1]));
+      DONE;
+    }
+})
+
+(define_insn "clzhi2_abm"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(clz:HI (match_operand:HI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_ABM"
+  "lzcnt{w}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "HI")])
+
+(define_insn "*bsrhi"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(minus:HI (const_int 15)
+		  (clz:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  ""
+  "bsr{w}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_0f" "1")
+   (set_attr "mode" "HI")])
+
+(define_insn "popcounthi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(popcount:HI (match_operand:HI 1 "nonimmediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT"
+  "popcnt{w}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "HI")])
+
+(define_insn "*popcounthi2_cmp"
+  [(set (reg FLAGS_REG)
+        (compare
+          (popcount:HI (match_operand:HI 1 "nonimmediate_operand" "rm"))
+          (const_int 0)))
+   (set (match_operand:HI 0 "register_operand" "=r")
+        (popcount:HI (match_dup 1)))]
+  "TARGET_POPCNT && ix86_match_ccmode (insn, CCZmode)"
+  "popcnt{w}\t{%1, %0|%0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "HI")])
 
 ;; Thread-local storage patterns for ELF.
 ;;
@@ -15302,7 +15552,8 @@
    sqrtss\t{%1, %0|%0, %1}"
   [(set_attr "type" "fpspc,sse")
    (set_attr "mode" "SF,SF")
-   (set_attr "athlon_decode" "direct,*")])
+   (set_attr "athlon_decode" "direct,*")
+   (set_attr "amdfam10_decode" "direct,*")])
 
 (define_insn "*sqrtsf2_sse"
   [(set (match_operand:SF 0 "register_operand" "=x")
@@ -15311,7 +15562,8 @@
   "sqrtss\t{%1, %0|%0, %1}"
   [(set_attr "type" "sse")
    (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "*")])
+   (set_attr "athlon_decode" "*")
+   (set_attr "amdfam10_decode" "*")])
 
 (define_insn "*sqrtsf2_i387"
   [(set (match_operand:SF 0 "register_operand" "=f")
@@ -15320,7 +15572,8 @@
   "fsqrt"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "SF")
-   (set_attr "athlon_decode" "direct")])
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "direct")])
 
 (define_expand "sqrtdf2"
   [(set (match_operand:DF 0 "register_operand" "")
@@ -15399,7 +15652,8 @@
   "fsqrt"
   [(set_attr "type" "fpspc")
    (set_attr "mode" "XF")
-   (set_attr "athlon_decode" "direct")])
+   (set_attr "athlon_decode" "direct")
+   (set_attr "amdfam10_decode" "direct")])
 
 (define_insn "fpremxf4"
   [(set (match_operand:XF 0 "register_operand" "=f")
@@ -20186,7 +20440,7 @@
 		   (mult:DI (match_operand:DI 1 "memory_operand" "")
 			    (match_operand:DI 2 "immediate_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
    && (GET_CODE (operands[2]) != CONST_INT
        || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
   [(set (match_dup 3) (match_dup 1))
@@ -20200,7 +20454,7 @@
 		   (mult:SI (match_operand:SI 1 "memory_operand" "")
 			    (match_operand:SI 2 "immediate_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
    && (GET_CODE (operands[2]) != CONST_INT
        || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
   [(set (match_dup 3) (match_dup 1))
@@ -20215,7 +20469,7 @@
 		     (mult:SI (match_operand:SI 1 "memory_operand" "")
 			      (match_operand:SI 2 "immediate_operand" ""))))
 	      (clobber (reg:CC FLAGS_REG))])]
-  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
    && (GET_CODE (operands[2]) != CONST_INT
        || !CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K'))"
   [(set (match_dup 3) (match_dup 1))
@@ -20233,7 +20487,7 @@
 			    (match_operand:DI 2 "const_int_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])
    (match_scratch:DI 3 "r")]
-  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
    && CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:DI (match_dup 0) (match_dup 3)))
@@ -20249,7 +20503,7 @@
 			    (match_operand:SI 2 "const_int_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])
    (match_scratch:SI 3 "r")]
-  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size
+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size
    && CONST_OK_FOR_LETTER_P (INTVAL (operands[2]), 'K')"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:SI (match_dup 0) (match_dup 3)))
@@ -20265,7 +20519,7 @@
 			    (match_operand:HI 2 "immediate_operand" "")))
 	      (clobber (reg:CC FLAGS_REG))])
    (match_scratch:HI 3 "r")]
-  "(TARGET_K8 || TARGET_GENERIC64) && !optimize_size"
+  "(TARGET_K8 || TARGET_GENERIC64 || TARGET_AMDFAM10) && !optimize_size"
   [(set (match_dup 3) (match_dup 2))
    (parallel [(set (match_dup 0) (mult:HI (match_dup 0) (match_dup 3)))
 	      (clobber (reg:CC FLAGS_REG))])]
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 366e9c1fe36..ade70771fb5 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -205,6 +205,22 @@ mmni
 Target Undocumented Mask(SSSE3) MaskExists
 Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
 
+msse4a
+Target Report Mask(SSE4A)
+Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
+
+mpopcnt
+Target Report Mask(POPCNT)
+Support code generation of popcount instruction for popcount built-ins 
+namely __builtin_popcount, __builtin_popcountl and __builtin_popcountll
+
+mabm
+Target Report Mask(ABM)
+Support code generation of Advanced Bit Manipulation (ABM) instructions,
+which include popcnt and lzcnt instructions, for popcount and clz built-ins
+namely __builtin_popcount, __builtin_popcountl, __builtin_popcountll and
+__builtin_clz, __builtin_clzl, __builtin_clzll
+
 msseregparm
 Target RejectNegative Mask(SSEREGPARM)
 Use SSE register passing conventions for SF and DF mode
diff --git a/gcc/config/i386/pmmintrin.h b/gcc/config/i386/pmmintrin.h
index 7dbf03043fb..318ee73913b 100644
--- a/gcc/config/i386/pmmintrin.h
+++ b/gcc/config/i386/pmmintrin.h
@@ -30,7 +30,11 @@
 #ifndef _PMMINTRIN_H_INCLUDED
 #define _PMMINTRIN_H_INCLUDED
 
-#ifdef __SSE3__
+#ifndef __SSE3__
+# error "SSE3 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE2 and SSE header files*/
 #include <xmmintrin.h>
 #include <emmintrin.h>
 
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 511d2dc1459..5f143133f91 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -963,6 +963,7 @@
   "cvtsi2ss\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "vector,double")
+   (set_attr "amdfam10_decode" "vector,double")
    (set_attr "mode" "SF")])
 
 (define_insn "sse_cvtsi2ssq"
@@ -976,6 +977,7 @@
   "cvtsi2ssq\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "vector,double")
+   (set_attr "amdfam10_decode" "vector,double")
    (set_attr "mode" "SF")])
 
 (define_insn "sse_cvtss2si"
@@ -989,6 +991,7 @@
   "cvtss2si\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
    (set_attr "mode" "SI")])
 
 (define_insn "sse_cvtss2siq"
@@ -1002,6 +1005,7 @@
   "cvtss2siq\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
    (set_attr "mode" "DI")])
 
 (define_insn "sse_cvttss2si"
@@ -1014,6 +1018,7 @@
   "cvttss2si\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
    (set_attr "mode" "SI")])
 
 (define_insn "sse_cvttss2siq"
@@ -1026,6 +1031,7 @@
   "cvttss2siq\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
    (set_attr "mode" "DI")])
 
 (define_insn "sse2_cvtdq2ps"
@@ -1921,7 +1927,8 @@
   "cvtsi2sd\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,direct")])
+   (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")])
 
 (define_insn "sse2_cvtsi2sdq"
   [(set (match_operand:V2DF 0 "register_operand" "=x,x")
@@ -1934,7 +1941,8 @@
   "cvtsi2sdq\t{%2, %0|%0, %2}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "DF")
-   (set_attr "athlon_decode" "double,direct")])
+   (set_attr "athlon_decode" "double,direct")
+   (set_attr "amdfam10_decode" "vector,double")])
 
 (define_insn "sse2_cvtsd2si"
   [(set (match_operand:SI 0 "register_operand" "=r,r")
@@ -1947,6 +1955,7 @@
   "cvtsd2si\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
    (set_attr "mode" "SI")])
 
 (define_insn "sse2_cvtsd2siq"
@@ -1960,6 +1969,7 @@
   "cvtsd2siq\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")
    (set_attr "mode" "DI")])
 
 (define_insn "sse2_cvttsd2si"
@@ -1972,7 +1982,8 @@
   "cvttsd2si\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "SI")
-   (set_attr "athlon_decode" "double,vector")])
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")])
 
 (define_insn "sse2_cvttsd2siq"
   [(set (match_operand:DI 0 "register_operand" "=r,r")
@@ -1984,7 +1995,8 @@
   "cvttsd2siq\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "mode" "DI")
-   (set_attr "athlon_decode" "double,vector")])
+   (set_attr "athlon_decode" "double,vector")
+   (set_attr "amdfam10_decode" "double,double")])
 
 (define_insn "sse2_cvtdq2pd"
   [(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -2015,7 +2027,8 @@
   "TARGET_SSE2"
   "cvtpd2dq\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+   (set_attr "mode" "TI")
+   (set_attr "amdfam10_decode" "double")])
 
 (define_expand "sse2_cvttpd2dq"
   [(set (match_operand:V4SI 0 "register_operand" "")
@@ -2033,7 +2046,8 @@
   "TARGET_SSE2"
   "cvttpd2dq\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
-   (set_attr "mode" "TI")])
+   (set_attr "mode" "TI")
+   (set_attr "amdfam10_decode" "double")])
 
 (define_insn "sse2_cvtsd2ss"
   [(set (match_operand:V4SF 0 "register_operand" "=x,x")
@@ -2047,20 +2061,22 @@
   "cvtsd2ss\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "athlon_decode" "vector,double")
+   (set_attr "amdfam10_decode" "vector,double")
    (set_attr "mode" "SF")])
 
 (define_insn "sse2_cvtss2sd"
-  [(set (match_operand:V2DF 0 "register_operand" "=x")
+  [(set (match_operand:V2DF 0 "register_operand" "=x,x")
 	(vec_merge:V2DF
 	  (float_extend:V2DF
 	    (vec_select:V2SF
-	      (match_operand:V4SF 2 "nonimmediate_operand" "xm")
+	      (match_operand:V4SF 2 "nonimmediate_operand" "x,m")
 	      (parallel [(const_int 0) (const_int 1)])))
-	  (match_operand:V2DF 1 "register_operand" "0")
+	  (match_operand:V2DF 1 "register_operand" "0,0")
 	  (const_int 1)))]
   "TARGET_SSE2"
   "cvtss2sd\t{%2, %0|%0, %2}"
   [(set_attr "type" "ssecvt")
+   (set_attr "amdfam10_decode" "vector,double")
    (set_attr "mode" "DF")])
 
 (define_expand "sse2_cvtpd2ps"
@@ -2081,7 +2097,8 @@
   "TARGET_SSE2"
   "cvtpd2ps\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V4SF")])
+   (set_attr "mode" "V4SF")
+   (set_attr "amdfam10_decode" "double")])
 
 (define_insn "sse2_cvtps2pd"
   [(set (match_operand:V2DF 0 "register_operand" "=x")
@@ -2092,7 +2109,8 @@
   "TARGET_SSE2"
   "cvtps2pd\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssecvt")
-   (set_attr "mode" "V2DF")])
+   (set_attr "mode" "V2DF")
+   (set_attr "amdfam10_decode" "direct")])
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -4550,3 +4568,92 @@
   "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
   [(set_attr "type" "sselog1")
    (set_attr "mode" "DI")])
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; AMD SSE4A instructions
+;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "sse4a_vmmovntv2df"
+  [(set (match_operand:DF 0 "memory_operand" "=m")
+        (unspec:DF [(vec_select:DF 
+                      (match_operand:V2DF 1 "register_operand" "x")
+                      (parallel [(const_int 0)]))]
+                   UNSPEC_MOVNT))]
+  "TARGET_SSE4A"
+  "movntsd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse4a_movntdf"
+  [(set (match_operand:DF 0 "memory_operand" "=m")
+        (unspec:DF [(match_operand:DF 1 "register_operand" "x")]
+                   UNSPEC_MOVNT))]
+  "TARGET_SSE4A"
+  "movntsd\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "DF")])
+
+(define_insn "sse4a_vmmovntv4sf"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(unspec:SF [(vec_select:SF 
+	              (match_operand:V4SF 1 "register_operand" "x")
+		      (parallel [(const_int 0)]))]
+		   UNSPEC_MOVNT))]
+  "TARGET_SSE4A"
+  "movntss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse4a_movntsf"
+  [(set (match_operand:SF 0 "memory_operand" "=m")
+	(unspec:SF [(match_operand:SF 1 "register_operand" "x")]
+		   UNSPEC_MOVNT))]
+  "TARGET_SSE4A"
+  "movntss\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "mode" "SF")])
+
+(define_insn "sse4a_extrqi"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+                      (match_operand 2 "const_int_operand" "")
+                      (match_operand 3 "const_int_operand" "")]
+                     UNSPEC_EXTRQI))]
+  "TARGET_SSE4A"
+  "extrq\t{%3, %2, %0|%0, %2, %3}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_extrq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+                      (match_operand:V16QI 2 "register_operand" "x")]
+                     UNSPEC_EXTRQ))]
+  "TARGET_SSE4A"
+  "extrq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertqi"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+        	      (match_operand:V2DI 2 "register_operand" "x")
+                      (match_operand 3 "const_int_operand" "")
+                      (match_operand 4 "const_int_operand" "")]
+                     UNSPEC_INSERTQI))]
+  "TARGET_SSE4A"
+  "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+  [(set_attr "type" "sseins")
+   (set_attr "mode" "TI")])
+
+(define_insn "sse4a_insertq"
+  [(set (match_operand:V2DI 0 "register_operand" "=x")
+        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
+        	      (match_operand:V2DI 2 "register_operand" "x")]
+        	     UNSPEC_INSERTQ))]
+  "TARGET_SSE4A"
+  "insertq\t{%2, %0|%0, %2}"
+  [(set_attr "type" "sseins")
+   (set_attr "mode" "TI")])
diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h
index 1b4ae036735..17fcecb41fe 100644
--- a/gcc/config/i386/tmmintrin.h
+++ b/gcc/config/i386/tmmintrin.h
@@ -30,7 +30,11 @@
 #ifndef _TMMINTRIN_H_INCLUDED
 #define _TMMINTRIN_H_INCLUDED
 
-#ifdef __SSSE3__
+#ifndef __SSSE3__
+# error "SSSE3 instruction set not enabled"
+#else
+
+/* We need definitions from the SSE3, SSE2 and SSE header files*/
 #include <pmmintrin.h>
 
 static __inline __m128i
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 3b623f0d7b0..848b837496e 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1242,7 +1242,9 @@ do {									\
 } while (0)
 
 /* For backward source compatibility.  */
-#include <emmintrin.h>
+#ifdef __SSE2__
+# include <emmintrin.h>
+#endif
 
 #endif /* __SSE__ */
 #endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index 450e19182b8..37b051315e8 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -712,7 +712,10 @@ legitimize_tls_address (rtx addr)
     {
       case TLS_MODEL_GLOBAL_DYNAMIC:
 	tmp = gen_reg_rtx (Pmode);
-	emit_insn (gen_tgd_load (tmp, addr));
+	if (flag_pic)
+	  emit_insn (gen_tgd_load_pic (tmp, addr));
+	else
+	  emit_insn (gen_tgd_load (tmp, addr));
 	ret = hppa_tls_call (tmp);
 	break;
 
@@ -720,7 +723,10 @@ legitimize_tls_address (rtx addr)
 	ret = gen_reg_rtx (Pmode);
 	tmp = gen_reg_rtx (Pmode);
 	start_sequence ();
-	emit_insn (gen_tld_load (tmp, addr));
+	if (flag_pic)
+	  emit_insn (gen_tld_load_pic (tmp, addr));
+	else
+	  emit_insn (gen_tld_load (tmp, addr));
 	t1 = hppa_tls_call (tmp);
 	insn = get_insns ();
 	end_sequence ();
@@ -736,7 +742,10 @@ legitimize_tls_address (rtx addr)
 	tmp = gen_reg_rtx (Pmode);
 	ret = gen_reg_rtx (Pmode);
 	emit_insn (gen_tp_load (tp));
-	emit_insn (gen_tie_load (tmp, addr));
+	if (flag_pic)
+	  emit_insn (gen_tie_load_pic (tmp, addr));
+	else
+	  emit_insn (gen_tie_load (tmp, addr));
 	emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
 	break;
 
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 28f398acc1b..9a4bc47d67b 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -39,6 +39,9 @@
    (UNSPEC_TLSLDBASE	7)
    (UNSPEC_TLSIE	8)
    (UNSPEC_TLSLE 	9)
+   (UNSPEC_TLSGD_PIC   10)
+   (UNSPEC_TLSLDM_PIC  11)
+   (UNSPEC_TLSIE_PIC   12)
   ])
 
 ;; UNSPEC_VOLATILE:
@@ -9548,14 +9551,25 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 (define_insn "tgd_load"
  [(set (match_operand:SI 0 "register_operand" "=r")
        (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")] UNSPEC_TLSGD))
-  (clobber (reg:SI 1))]
+  (clobber (reg:SI 1))
+  (use (reg:SI 27))]
   ""
   "*
 {
-  if (flag_pic)
-    return \"addil LT'%1-$tls_gdidx$,%%r19\;ldo RT'%1-$tls_gdidx$(%%r1),%0\";
-  else
-    return \"addil LR'%1-$tls_gdidx$,%%r27\;ldo RR'%1-$tls_gdidx$(%%r1),%0\";
+  return \"addil LR'%1-$tls_gdidx$,%%r27\;ldo RR'%1-$tls_gdidx$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tgd_load_pic"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand 1 "tgd_symbolic_operand" "")] UNSPEC_TLSGD_PIC))
+  (clobber (reg:SI 1))
+  (use (reg:SI 19))]
+  ""
+  "*
+{
+  return \"addil LT'%1-$tls_gdidx$,%%r19\;ldo RT'%1-$tls_gdidx$(%%r1),%0\";
 }"
   [(set_attr "type" "multi")
    (set_attr "length" "8")])
@@ -9563,14 +9577,25 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 (define_insn "tld_load"
  [(set (match_operand:SI 0 "register_operand" "=r")
        (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] UNSPEC_TLSLDM))
-  (clobber (reg:SI 1))]
+  (clobber (reg:SI 1))
+  (use (reg:SI 27))]
   ""
   "*
 {
-  if (flag_pic)
-    return \"addil LT'%1-$tls_ldidx$,%%r19\;ldo RT'%1-$tls_ldidx$(%%r1),%0\";
-  else
-    return \"addil LR'%1-$tls_ldidx$,%%r27\;ldo RR'%1-$tls_ldidx$(%%r1),%0\";
+  return \"addil LR'%1-$tls_ldidx$,%%r27\;ldo RR'%1-$tls_ldidx$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tld_load_pic"
+ [(set (match_operand:SI 0 "register_operand" "=r")
+       (unspec:SI [(match_operand 1 "tld_symbolic_operand" "")] UNSPEC_TLSLDM_PIC))
+  (clobber (reg:SI 1))
+  (use (reg:SI 19))]
+  ""
+  "*
+{
+  return \"addil LT'%1-$tls_ldidx$,%%r19\;ldo RT'%1-$tls_ldidx$(%%r1),%0\";
 }"
   [(set_attr "type" "multi")
    (set_attr "length" "8")])
@@ -9600,14 +9625,25 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 (define_insn "tie_load"
   [(set (match_operand:SI 0 "register_operand" "=r")
         (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")] UNSPEC_TLSIE))
-   (clobber (reg:SI 1))]
+   (clobber (reg:SI 1))
+   (use (reg:SI 27))]
   ""
   "*
 {
-  if (flag_pic)
-    return \"addil LT'%1-$tls_ieoff$,%%r19\;ldw RT'%1-$tls_ieoff$(%%r1),%0\";
-  else
-    return \"addil LR'%1-$tls_ieoff$,%%r27\;ldw RR'%1-$tls_ieoff$(%%r1),%0\";
+  return \"addil LR'%1-$tls_ieoff$,%%r27\;ldw RR'%1-$tls_ieoff$(%%r1),%0\";
+}"
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "tie_load_pic"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+        (unspec:SI [(match_operand 1 "tie_symbolic_operand" "")] UNSPEC_TLSIE_PIC))
+   (clobber (reg:SI 1))
+   (use (reg:SI 19))]
+  ""
+  "*
+{
+  return \"addil LT'%1-$tls_ieoff$,%%r19\;ldw RT'%1-$tls_ieoff$(%%r1),%0\";
 }"
   [(set_attr "type" "multi")
    (set_attr "length" "8")])
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 216bbab447d..df9e146ba7f 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -5154,7 +5154,13 @@ output_stack_adjust (int size, rtx reg, int epilogue_p,
 	      temp = scavenge_reg (&temps);
 	    }
 	  if (temp < 0 && live_regs_mask)
-	    temp = scavenge_reg (live_regs_mask);
+	    {
+	      HARD_REG_SET temps;
+
+	      COPY_HARD_REG_SET (temps, *live_regs_mask);
+	      CLEAR_HARD_REG_BIT (temps, REGNO (reg));
+	      temp = scavenge_reg (&temps);
+	    }
 	  if (temp < 0)
 	    {
 	      rtx adj_reg, tmp_reg, mem;
@@ -5203,6 +5209,9 @@ output_stack_adjust (int size, rtx reg, int epilogue_p,
 	      emit_move_insn (adj_reg, mem);
 	      mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
 	      emit_move_insn (tmp_reg, mem);
+	      /* Tell flow the insns that pop r4/r5 aren't dead.  */
+	      emit_insn (gen_rtx_USE (VOIDmode, tmp_reg));
+	      emit_insn (gen_rtx_USE (VOIDmode, adj_reg));
 	      return;
 	    }
 	  const_reg = gen_rtx_REG (GET_MODE (reg), temp);
@@ -8477,7 +8486,7 @@ sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
       else if (TARGET_SH4
 	       && get_attr_type (insn) == TYPE_DYN_SHIFT
 	       && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
-	       && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
+	       && reg_overlap_mentioned_p (SET_DEST (single_set (dep_insn)),
 					   XEXP (SET_SRC (single_set (insn)),
 						 1)))
 	cost++;
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index f4e13c34115..e136761ae89 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -412,10 +412,12 @@
 	 (eq_attr "type" "jump")
 	 (cond [(eq_attr "med_branch_p" "yes")
 		(const_int 2)
-		(and (eq (symbol_ref "GET_CODE (prev_nonnote_insn (insn))")
-                         (symbol_ref "INSN"))
-                     (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))")
-                         (symbol_ref "code_for_indirect_jump_scratch")))
+		(and (ne (symbol_ref "prev_nonnote_insn (insn)")
+			 (const_int 0))
+		     (and (eq (symbol_ref "GET_CODE (prev_nonnote_insn (insn))")
+			      (symbol_ref "INSN"))
+			  (eq (symbol_ref "INSN_CODE (prev_nonnote_insn (insn))")
+			      (symbol_ref "code_for_indirect_jump_scratch"))))
                 (cond [(eq_attr "braf_branch_p" "yes")
                        (const_int 6)
                        (eq (symbol_ref "flag_pic") (const_int 0))
@@ -3018,7 +3020,9 @@ label:
   "
 {
   if (TARGET_SH1
-      && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 255)
+      && GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) == 255
+      && (GET_CODE (operands[1]) != SUBREG
+	  || SCALAR_INT_MODE_P (GET_MODE (XEXP (operands[1], 0)))))
     {
       emit_insn (gen_zero_extendqisi2 (operands[0],
 				       gen_lowpart (QImode, operands[1])));
@@ -8283,6 +8287,20 @@ label:
 					     operands[2],
 					     gen_rtx_REG (Pmode, PIC_REG)));
 
+  /* When stack protector inserts codes after the result is set to
+     R0, @(rX, r12) will cause a spill failure for R0.  Don't schedule
+     insns to avoid combining (set A (plus rX r12)) and (set op0 (mem A))
+     when rX is a GOT address for the guard symbol.  Ugly but doesn't
+     matter because this is a rare situation.  */
+  if (!TARGET_SHMEDIA
+      && flag_stack_protect
+      && GET_CODE (operands[1]) == CONST
+      && GET_CODE (XEXP (operands[1], 0)) == UNSPEC
+      && GET_CODE (XVECEXP (XEXP (operands[1], 0), 0, 0)) == SYMBOL_REF
+      && strcmp (XSTR (XVECEXP (XEXP (operands[1], 0), 0, 0), 0),
+		 \"__stack_chk_guard\") == 0)
+    emit_insn (gen_blockage ());
+
   /* N.B. This is not constant for a GOTPLT relocation.  */
   mem = gen_rtx_MEM (Pmode, operands[3]);
   MEM_NOTRAP_P (mem) = 1;
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 46681d38bb9..18582bd45c9 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -675,7 +675,7 @@ sparc_override_options (void)
 	error ("-mcmodel= is not supported on 32 bit systems");
     }
 
-  fpu = TARGET_FPU; /* save current -mfpu status */
+  fpu = target_flags & MASK_FPU; /* save current -mfpu status */
 
   /* Set the default CPU.  */
   for (def = &cpu_default[0]; def->name; ++def)
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 936b60109e1..37ae16319c0 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,32 @@
+2007-03-30  Jason Merrill  <jason@redhat.com>
+
+	PR c++/31187
+	* typeck.c (cp_type_readonly): New fn.
+	* cp-tree.h: Declare it.
+	* decl.c (start_decl): Set implicit DECL_THIS_STATIC here.
+	(cp_finish_decl): Not here.
+
+2007-06-20  Dirk Mueller  <dmueller@suse.de>
+
+	PR c++/31809
+	PR c++/31806
+	Backport from mainline:
+	2007-05-31  Jakub Jelinek  <jakub@redhat.com>
+
+	* decl.c (cp_finish_decl): Also clear was_readonly if a static var
+	needs runtime initialization.
+
+	2007-05-30  Jakub Jelinek  <jakub@redhat.com>
+
+	* decl.c (cp_finish_decl): Clear TREE_READONLY flag on TREE_STATIC
+	variables that need runtime initialization.
+
+2007-06-08  Jakub Jelinek  <jakub@redhat.com>
+
+	PR c++/32177
+	* semantics.c (finish_omp_for): Call fold_build_cleanup_point_expr
+	on init, the non-decl cond operand and increment value.
+
 2007-04-28  Andrew Pinski  <andrew_pinski@playstation.sony.com>
 
 	PR C++/30221
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 04d2632d8b0..3a5c43d6e53 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -4469,6 +4469,7 @@ extern bool comp_ptr_ttypes_const               (tree, tree);
 extern int ptr_reasonably_similar		(tree, tree);
 extern tree build_ptrmemfunc			(tree, tree, int, bool);
 extern int cp_type_quals			(tree);
+extern bool cp_type_readonly			(tree);
 extern bool cp_has_mutable_p			(tree);
 extern bool at_least_as_qualified_p		(tree, tree);
 extern void cp_apply_type_quals_to_decl		(int, tree);
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 0c8826f6f63..cb3a44e6493 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -3817,6 +3817,7 @@ start_decl (const cp_declarator *declarator,
   tree decl;
   tree type, tem;
   tree context;
+  bool was_public;
 
   *pushed_scope_p = NULL_TREE;
 
@@ -3969,6 +3970,8 @@ start_decl (const cp_declarator *declarator,
 		 decl);
     }
 
+  was_public = TREE_PUBLIC (decl);
+
   /* Enter this declaration into the symbol table.  */
   tem = maybe_push_decl (decl);
 
@@ -3988,6 +3991,17 @@ start_decl (const cp_declarator *declarator,
 		       && (flag_conserve_space || ! TREE_PUBLIC (tem)));
 #endif
 
+  if (TREE_CODE (tem) == VAR_DECL
+      && DECL_NAMESPACE_SCOPE_P (tem) && !TREE_PUBLIC (tem) && !was_public
+      && !DECL_THIS_STATIC (tem) && !DECL_ARTIFICIAL (tem))
+    {
+      /* This is a const variable with implicit 'static'.  Set
+	 DECL_THIS_STATIC so we can tell it from variables that are
+	 !TREE_PUBLIC because of the anonymous namespace.  */
+      gcc_assert (cp_type_readonly (TREE_TYPE (tem)));
+      DECL_THIS_STATIC (tem) = 1;
+    }
+
   if (! processing_template_decl)
     start_decl_1 (tem);
 
@@ -5325,14 +5339,6 @@ cp_finish_decl (tree decl, tree init, bool init_const_expr_p,
 	{
 	  layout_var_decl (decl);
 	  maybe_commonize_var (decl);
-	  if (DECL_NAMESPACE_SCOPE_P (decl) && !TREE_PUBLIC (decl)
-	      && !DECL_THIS_STATIC (decl) && !DECL_ARTIFICIAL (decl))
-	    {
-	      /* This is a const variable with implicit 'static'.  Set
-		 DECL_THIS_STATIC so we can tell it from variables that are
-		 !TREE_PUBLIC because of the anonymous namespace.  */
-	      DECL_THIS_STATIC (decl) = 1;
-	    }
 	}
 
       make_rtl_for_nonlocal_decl (decl, init, asmspec);
@@ -5384,7 +5390,15 @@ cp_finish_decl (tree decl, tree init, bool init_const_expr_p,
 	     initializer.  It is not legal to redeclare a static data
 	     member, so this issue does not arise in that case.  */
 	  if (var_definition_p && TREE_STATIC (decl))
-	    expand_static_init (decl, init);
+            {
+              if (init)
+                {
+                  if (TREE_READONLY (decl))
+                      TREE_READONLY (decl) = 0;
+                  was_readonly = 0;
+                }
+	      expand_static_init (decl, init);
+            }
 	}
     }
 
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index c4fd2a0ab16..3700cbe4e0d 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -3754,6 +3754,8 @@ tree
 finish_omp_for (location_t locus, tree decl, tree init, tree cond,
 		tree incr, tree body, tree pre_body)
 {
+  tree omp_for;
+
   if (decl == NULL)
     {
       if (init != NULL)
@@ -3831,8 +3833,31 @@ finish_omp_for (location_t locus, tree decl, tree init, tree cond,
       add_stmt (pre_body);
       pre_body = NULL;
     }
+
+  init = fold_build_cleanup_point_expr (TREE_TYPE (init), init);
   init = build_modify_expr (decl, NOP_EXPR, init);
-  return c_finish_omp_for (locus, decl, init, cond, incr, body, pre_body);
+  if (cond && TREE_SIDE_EFFECTS (cond) && COMPARISON_CLASS_P (cond))
+    {
+      int n = TREE_SIDE_EFFECTS (TREE_OPERAND (cond, 1)) != 0;
+      tree t = TREE_OPERAND (cond, n);
+
+      TREE_OPERAND (cond, n)
+	= fold_build_cleanup_point_expr (TREE_TYPE (t), t);
+    }
+  omp_for = c_finish_omp_for (locus, decl, init, cond, incr, body, pre_body);
+  if (omp_for != NULL
+      && TREE_CODE (OMP_FOR_INCR (omp_for)) == MODIFY_EXPR
+      && TREE_SIDE_EFFECTS (TREE_OPERAND (OMP_FOR_INCR (omp_for), 1))
+      && BINARY_CLASS_P (TREE_OPERAND (OMP_FOR_INCR (omp_for), 1)))
+    {
+      tree t = TREE_OPERAND (OMP_FOR_INCR (omp_for), 1);
+      int n = TREE_SIDE_EFFECTS (TREE_OPERAND (t, 1)) != 0;
+
+      TREE_OPERAND (t, n)
+	= fold_build_cleanup_point_expr (TREE_TYPE (TREE_OPERAND (t, n)),
+					 TREE_OPERAND (t, n));
+    }
+  return omp_for;
 }
 
 void
diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index ef2ae83aa68..29677b97bea 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -6641,6 +6641,16 @@ cp_type_quals (tree type)
   return TYPE_QUALS (type);
 }
 
+/* Returns nonzero if the TYPE is const from a C++ perspective: look inside
+   arrays.  */
+
+bool
+cp_type_readonly (tree type)
+{
+  type = strip_array_types (type);
+  return TYPE_READONLY (type);
+}
+
 /* Returns nonzero if the TYPE contains a mutable member.  */
 
 bool
diff --git a/gcc/crtstuff.c b/gcc/crtstuff.c
index cde75573132..cf36fdfb8d6 100644
--- a/gcc/crtstuff.c
+++ b/gcc/crtstuff.c
@@ -104,6 +104,11 @@ call_ ## FUNC (void)					\
 # define EH_FRAME_SECTION_CONST
 #endif
 
+#if !defined(DTOR_LIST_END) && defined(OBJECT_FORMAT_ELF) \
+    && defined(HAVE_GAS_HIDDEN) && !defined(FINI_ARRAY_SECTION_ASM_OP)
+# define HIDDEN_DTOR_LIST_END
+#endif
+
 /* We do not want to add the weak attribute to the declarations of these
    routines in unwind-dw2-fde.h because that will cause the definition of
    these symbols to be weak as well.
@@ -260,10 +265,6 @@ extern void __cxa_finalize (void *) TARGET_ATTRIBUTE_WEAK;
 static void __attribute__((used))
 __do_global_dtors_aux (void)
 {
-#ifndef FINI_ARRAY_SECTION_ASM_OP
-  static func_ptr *p = __DTOR_LIST__ + 1;
-  func_ptr f;
-#endif /* !defined(FINI_ARRAY_SECTION_ASM_OP)  */
   static _Bool completed;
 
   if (__builtin_expect (completed, 0))
@@ -277,12 +278,32 @@ __do_global_dtors_aux (void)
 #ifdef FINI_ARRAY_SECTION_ASM_OP
   /* If we are using .fini_array then destructors will be run via that
      mechanism.  */
+#elif defined(HIDDEN_DTOR_LIST_END)
+  {
+    /* Safer version that makes sure only .dtors function pointers are
+       called even if the static variable is maliciously changed.  */
+    extern func_ptr __DTOR_END__[] __attribute__((visibility ("hidden")));
+    static size_t dtor_idx;
+    const size_t max_idx = __DTOR_END__ - __DTOR_LIST__ - 1;
+    func_ptr f;
+
+    while (dtor_idx < max_idx)
+      {
+	f = __DTOR_LIST__[++dtor_idx];
+	f ();
+      }
+  }
 #else /* !defined (FINI_ARRAY_SECTION_ASM_OP) */
-  while ((f = *p))
-    {
-      p++;
-      f ();
-    }
+  {
+    static func_ptr *p = __DTOR_LIST__ + 1;
+    func_ptr f;
+
+    while ((f = *p))
+      {
+	p++;
+	f ();
+      }
+  }
 #endif /* !defined(FINI_ARRAY_SECTION_ASM_OP) */
 
 #ifdef USE_EH_FRAME_REGISTRY
@@ -466,6 +487,17 @@ STATIC func_ptr __CTOR_END__[1]
 
 #ifdef DTOR_LIST_END
 DTOR_LIST_END;
+#elif defined(HIDDEN_DTOR_LIST_END)
+#ifdef DTORS_SECTION_ASM_OP
+asm (DTORS_SECTION_ASM_OP);
+#endif
+func_ptr __DTOR_END__[1]
+  __attribute__ ((unused,
+#ifndef DTORS_SECTION_ASM_OP
+		  section(".dtors"),
+#endif
+		  aligned(sizeof(func_ptr)), visibility ("hidden")))
+  = { (func_ptr) 0 };
 #elif defined(DTORS_SECTION_ASM_OP)
 asm (DTORS_SECTION_ASM_OP);
 STATIC func_ptr __DTOR_END__[1]
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 7376185d14e..d319ba90a57 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -6982,6 +6982,23 @@ v4si __builtin_ia32_pabsd128 (v4si)
 v8hi __builtin_ia32_pabsw128 (v8hi)
 @end smallexample
 
+The following built-in functions are available when @option{-msse4a} is used.
+
+@smallexample
+void             _mm_stream_sd (double*,__m128d);
+Generates the @code{movntsd} machine instruction.
+void             _mm_stream_ss (float*,__m128);
+Generates the @code{movntss} machine instruction.
+__m128i          _mm_extract_si64 (__m128i, __m128i);
+Generates the @code{extrq} machine instruction with only SSE register operands.
+__m128i          _mm_extracti_si64 (__m128i, int, int);
+Generates the @code{extrq} machine instruction with SSE register and immediate operands.
+__m128i          _mm_insert_si64 (__m128i, __m128i);
+Generates the @code{insertq} machine instruction with only SSE register operands.
+__m128i          _mm_inserti_si64 (__m128i, __m128i, int, int);
+Generates the @code{insertq} machine instruction with SSE register and immediate operands.
+@end smallexample
+
 The following built-in functions are available when @option{-m3dnow} is used.
 All of them generate the machine instruction that is part of the name.
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index e47aeda0047..dc4750abae4 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -523,7 +523,7 @@ Objective-C and Objective-C++ Dialects}.
 -mno-fp-ret-in-387  -msoft-float  -msvr3-shlib @gol
 -mno-wide-multiply  -mrtd  -malign-double @gol
 -mpreferred-stack-boundary=@var{num} @gol
--mmmx  -msse  -msse2 -msse3 -mssse3 -m3dnow @gol
+-mmmx  -msse  -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol
 -mthreads  -mno-align-stringops  -minline-all-stringops @gol
 -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
 -m96bit-long-double  -mregparm=@var{num}  -msseregparm @gol
@@ -9091,6 +9091,10 @@ instruction set support.
 @item k8, opteron, athlon64, athlon-fx
 AMD K8 core based CPUs with x86-64 instruction set support.  (This supersets
 MMX, SSE, SSE2, 3dNOW!, enhanced 3dNOW! and 64-bit instruction set extensions.)
+@item amdfam10
+AMD Family 10 core based CPUs with x86-64 instruction set support.  (This
+supersets MMX, SSE, SSE2, SSE3, SSE4A, 3dNOW!, enhanced 3dNOW!, ABM and 64-bit
+instruction set extensions.)
 @item winchip-c6
 IDT Winchip C6 CPU, dealt in same way as i486 with additional MMX instruction
 set support.
@@ -9368,8 +9372,14 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @itemx -mno-sse3
 @item -mssse3
 @itemx -mno-ssse3
+@item -msse4a
+@item -mno-sse4a
 @item -m3dnow
 @itemx -mno-3dnow
+@item -mpopcnt
+@itemx -mno-popcnt
+@item -mabm
+@itemx -mno-abm
 @opindex mmmx
 @opindex mno-mmx
 @opindex msse
diff --git a/gcc/except.c b/gcc/except.c
index f8ccac9dfd1..9cd2a286f5f 100644
--- a/gcc/except.c
+++ b/gcc/except.c
@@ -1006,7 +1006,11 @@ duplicate_eh_regions (struct function *ifun, duplicate_eh_regions_map map,
     for (prev_try = VEC_index (eh_region, cfun->eh->region_array, outer_region);
          prev_try && prev_try->type != ERT_TRY;
 	 prev_try = prev_try->outer)
-      ;
+      if (prev_try->type == ERT_MUST_NOT_THROW)
+	{
+	  prev_try = NULL;
+	  break;
+	}
 
   /* Remap all of the internal catch and cleanup linkages.  Since we 
      duplicate entire subtrees, all of the referenced regions will have
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 61d5770dcdd..76494ec006a 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -3901,6 +3901,19 @@ gimplify_asm_expr (tree *expr_p, tree *pre_p, tree *post_p)
       parse_input_constraint (&constraint, 0, 0, noutputs, 0,
 			      oconstraints, &allows_mem, &allows_reg);
 
+      /* If we can't make copies, we can only accept memory.  */
+      if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link))))
+	{
+	  if (allows_mem)
+	    allows_reg = 0;
+	  else
+	    {
+	      error ("impossible constraint in %<asm%>");
+	      error ("non-memory input %d must stay in memory", i);
+	      return GS_ERROR;
+	    }
+	}
+
       /* If the operand is a memory input, it should be an lvalue.  */
       if (!allows_reg && allows_mem)
 	{
@@ -4654,7 +4667,20 @@ gimplify_adjust_omp_clauses_1 (splay_tree_node n, void *data)
   else if (flags & GOVD_SHARED)
     {
       if (is_global_var (decl))
-	return 0;
+	{
+	  struct gimplify_omp_ctx *ctx = gimplify_omp_ctxp->outer_context;
+	  while (ctx != NULL)
+	    {
+	      splay_tree_node on
+		= splay_tree_lookup (ctx->variables, (splay_tree_key) decl);
+	      if (on && (on->value & (GOVD_FIRSTPRIVATE | GOVD_LASTPRIVATE
+				      | GOVD_PRIVATE | GOVD_REDUCTION)) != 0)
+		break;
+	      ctx = ctx->outer_context;
+	    }
+	  if (ctx == NULL)
+	    return 0;
+	}
       code = OMP_CLAUSE_SHARED;
     }
   else if (flags & GOVD_PRIVATE)
diff --git a/gcc/loop.c b/gcc/loop.c
index 8974972bd9f..43c3d92d2e4 100644
--- a/gcc/loop.c
+++ b/gcc/loop.c
@@ -2561,7 +2561,7 @@ move_movables (struct loop *loop, struct loop_movables *movables,
 			     like this as a result of record_jump_cond.  */
 
 			  if ((temp = find_reg_note (i1, REG_EQUAL, NULL_RTX))
-			      && ! loop_invariant_p (loop, XEXP (temp, 0)))
+			      && loop_invariant_p (loop, XEXP (temp, 0)) != 1)
 			    remove_note (i1, temp);
 			}
 
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index d48e0721c50..29e63e3fb2c 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -1505,9 +1505,9 @@ lookup_decl_in_outer_ctx (tree decl, omp_context *ctx)
   for (up = ctx->outer, t = NULL; up && t == NULL; up = up->outer)
     t = maybe_lookup_decl (decl, up);
 
-  gcc_assert (t);
+  gcc_assert (t || is_global_var (decl));
 
-  return t;
+  return t ? t : decl;
 }
 
 
diff --git a/gcc/reload.c b/gcc/reload.c
index d775259d6c1..eea9cc738a1 100644
--- a/gcc/reload.c
+++ b/gcc/reload.c
@@ -1210,7 +1210,7 @@ push_reload (rtx in, rtx out, rtx *inloc, rtx *outloc,
 
   /* If IN appears in OUT, we can't share any input-only reload for IN.  */
   if (in != 0 && out != 0 && MEM_P (out)
-      && (REG_P (in) || MEM_P (in))
+      && (REG_P (in) || MEM_P (in) || GET_CODE (in) == PLUS)
       && reg_overlap_mentioned_for_reload_p (in, XEXP (out, 0)))
     dont_share = 1;
 
@@ -6464,7 +6464,8 @@ reg_overlap_mentioned_for_reload_p (rtx x, rtx in)
       if (REG_P (in))
 	return 0;
       else if (GET_CODE (in) == PLUS)
-	return (reg_overlap_mentioned_for_reload_p (x, XEXP (in, 0))
+	return (rtx_equal_p (x, in)
+		|| reg_overlap_mentioned_for_reload_p (x, XEXP (in, 0))
 		|| reg_overlap_mentioned_for_reload_p (x, XEXP (in, 1)));
       else return (reg_overlap_mentioned_for_reload_p (XEXP (x, 0), in)
 		   || reg_overlap_mentioned_for_reload_p (XEXP (x, 1), in));
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 2665ef8d5fc..b2acaaea2cb 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,172 @@
+2006-12-13  Jakub Jelinek  <jakub@redhat.com>
+
+	* g++.dg/debug/vartrack1.C: New test.
+
+	* g++.dg/opt/ifcvt1.C: New test.
+
+	* gcc.dg/tls/opt-13.c: New test.
+
+	* gcc.dg/20060425-2.c: New test.
+
+	* g++.dg/opt/pr15054-2.C: New test.
+
+	* gcc.c-torture/execute/20060420-1.c: New test.
+
+	* gcc.c-torture/execute/20060412-1.c: New test.
+
+2006-12-08  Alexandre Oliva  <aoliva@redhat.com>
+
+	* g++.dg/template/array17.C: New test.
+
+2006-02-25  Alexandre Oliva  <aoliva@redhat.com>
+
+	* gcc.target/powerpc/altivec-23.c: New test.
+
+2007-04-02  Jason Merrill  <jason@redhat.com>
+
+	PR c++/31187
+	* g++.dg/ext/visibility/anon3.C: New test.
+
+2006-10-15  Jan Hubicka  <jh@suse.cz>
+            Richard Guenther  <rguenther@suse.de>
+
+	PR middle-end/29299
+	* gcc.dg/pr29299.c: New testcase.
+
+2006-10-23  Jakub Jelinek  <jakub@redhat.com>
+
+	* gcc.dg/builtin-strncat-chk-1.c: New test.
+
+2006-09-16  Andrew Pinski  <pinskia@physics.uc.edu>
+
+	PR tree-opt/29059
+	* gcc.c-torture/compile/strcpy-1.c: New test.
+	* gcc.c-torture/compile/strcpy-2.c: New test.
+	* gcc.c-torture/compile/memcpy-1.c: New test.
+	* gcc.c-torture/compile/memcpy-2.c: New test.
+
+2006-10-10  Jakub Jelinek  <jakub@redhat.com>
+
+	PR middle-end/29272
+	* gcc.c-torture/execute/20060930-2.c: New test.
+
+2006-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+	PR preprocessor/28709
+	* gcc.dg/cpp/paste14.c: New test.
+
+2006-12-13  Jakub Jelinek  <jakub@redhat.com>
+
+	* objc/compile/20060406-1.m: New test.
+
+2007-02-10  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* gcc.target/i386/sse4a-extract.c: Add "LL" to 64bit constants.
+	* gcc.target/i386/sse4a-insert.c: Likewise.
+
+2007-02-05  Dwarakanath Rajagopal  <dwarak.rajagopal@amd.com>
+
+	* gcc.dg/i386-cpuid.h: Test whether SSE4A is supported
+	for running tests.
+	* gcc.target/i386/sse4a-extract.c: New test.
+	* gcc.target/i386/sse4a-insert.c: New test.
+	* gcc.target/i386/sse4a-montsd.c: New test.
+	* gcc.target/i386/sse4a-montss.c: New test.
+
+2006-12-15  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* gcc.dg/i386-cpuid.h (bit_SSSE3): New.
+
+2006-11-30  H.J. Lu  <hongjiu.lu@intel.com>
+
+	* gcc.dg/i386-cpuid.h (bit_SSE3): New.
+	(i386_get_cpuid): New function.
+	(i386_cpuid_ecx): Likewise.
+	(i386_cpuid_edx): Likewise.
+	(i386_cpuid): Updated to call i386_cpuid_edx.
+
+2007-06-21  Uros Bizjak  <ubizjak@gmail.com>
+ 
+	PR target/32389
+	* gcc.target/i386/pr32389.c New test.
+
+2007-06-20  Dirk Mueller  <dmueller@suse.de>
+
+	PR c++/31806
+	PR c++/31809
+	* g++.dg/opt/static5.C: New test.
+	* g++.dg/opt/static6.C: New test.
+
+2007-06-20  Jakub Jelinek  <jakub@redhat.com>
+
+	PR inline-asm/32109
+	* g++.dg/ext/asm10.C: New test.
+
+	PR middle-end/32285
+	* gcc.c-torture/execute/20070614-1.c: New test.
+
+2007-06-19  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/32353
+	* g++.dg/opt/nrv13.C: New test.
+
+2007-06-17  Eric Botcazou  <ebotcazou@libertysurf.fr>
+
+	* gcc.target/sparc/align.c: Use 'unsigned char' as element type.
+	* gcc.target/sparc/combined-2.c: Likewise.
+	* gcc.target/sparc/fexpand.c : Likewise.
+	* gcc.target/sparc/fnot.c: Likewise.  Fix a couple of prototypes.
+	* gcc.target/sparc/fpack16.c : Likewise.
+	* gcc.target/sparc/fpmerge.c : Likewise.
+	* gcc.target/sparc/fpmul.c : Likewise.
+	* gcc.target/sparc/noresult.c : Likewise.
+	* gcc.target/sparc/pdist.c: Likewise.
+
+2007-06-08  Jakub Jelinek  <jakub@redhat.com>
+
+	PR c++/32177
+	* g++.dg/gomp/pr32177.C: New test.
+
+2007-06-13  Eric Botcazou  <ebotcazou@libertysurf.fr>
+
+	* gcc.target/sparc/mfpu.c: New test.
+
+2007-05-23  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
+
+	PR libfortran/31964
+	* gfortran.fortran-torture/execute/intrinsic_bitops.f90: Update.
+
+2007-05-17  Eric Botcazou  <ebotcazou@libertysurf.fr>
+
+	* gcc.c-torture/execute/20070517-1.c: New test.
+
+2007-05-16  Richard Guenther  <rguenther@suse.de>
+
+	Backport from mainline:
+	2006-06-09  Richard Guenther  <rguenther@suse.de>
+
+	PR tree-optimization/26998
+	* gcc.dg/torture/pr26998.c: New testcase.
+	* gcc.dg/tree-ssa/vrp29.c: New testcase.
+
+2007-05-10  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
+
+	PR libfortran/31880
+	* gfortran.dg/unf_read_corrupted_2.f90: New test.
+
+2007-05-10  Andreas Krebbel  <krebbel1@de.ibm.com>
+
+	* gcc.dg/20070507-1.c: Disable for non-pic targets.
+
+2007-05-08  Andreas Krebbel  <krebbel1@de.ibm.com>
+
+	* gcc.dg/20070507-1.c: New testcase.
+
+2007-05-30  Jakub Jelinek  <jakub@redhat.com>
+
+	PR tree-optimization/31769
+	* g++.dg/gomp/pr31769.C: New test.
+
 2007-04-28  Andrew Pinski  <andrew_pinski@playstation.sony.com>
 
 	PR C++/30221
diff --git a/gcc/testsuite/g++.dg/debug/vartrack1.C b/gcc/testsuite/g++.dg/debug/vartrack1.C
new file mode 100644
index 00000000000..d72cb6f3572
--- /dev/null
+++ b/gcc/testsuite/g++.dg/debug/vartrack1.C
@@ -0,0 +1,99 @@
+// This testcase used to hang the compiler in vt_find_locations.
+// { dg-do compile }
+// { dg-options "-O2 -g" }
+
+struct S
+{
+  int a;
+  S *b, *c, *d;
+};
+
+struct T
+{
+  void f1 (S *x);
+  void f2 (S *x);
+  void f3 (S *x, S *y);
+  S *e;
+};
+
+void
+T::f3 (S *x, S *y)
+{
+  while (x != this->e && (!x || x->a == 1))
+    {
+      if (x == y->c)
+	{
+	  S *w = y->d;
+	  if (w && w->a == 0)
+	    {
+	      w->a = 1;
+	      y->a = 0;
+	      f2 (y);
+	      w = y->d;
+	    }
+	  if (w && (!w->c || w->c->a == 1) && (!w->d || w->d->a == 1))
+	    {
+	      w->a = 0;
+	      x = y;
+	      y = x->b;
+	    }
+	  else
+	    {
+	      if (w && (!w->d || w->d->a == 1))
+		{
+		  if (w->c)
+		    w->c->a = 1;
+		  w->a = 0;
+		  f1 (w);
+		  w = y->d;
+		}
+	      if (w)
+		{
+		  w->a = y->a;
+		  if (w->d)
+		    w->d->a = 1;
+		}
+	      y->a = 1;
+	      f2 (y);
+	      x = e;
+	    }
+	}
+      else
+	{
+	  S *w = y->c;
+	  if (w && w->a == 0)
+	    {
+	      w->a = 1;
+	      y->a = 0;
+	      f1 (y);
+	      w = y->c;
+	    }
+	  if (w && (!w->c || w->c->a == 1) && (!w->d || w->d->a == 1))
+	    {
+	      w->a = 0;
+	      x = y;
+	      y = x->b;
+	    }
+	  else
+	    {
+	      if (w && (!w->c || w->c->a == 1))
+		{
+		  w->a = 0;
+		  if (w->d)
+		    w->d->a = 1;
+		  f2 (w);
+		  w = y->c;
+		}
+	      if (w)
+		{
+		  w->a = y->a;
+		  if (w->c)
+		    w->c->a = 1;
+		}
+	      y->a = 1;
+	      f1 (y);
+	      x = e;
+	    }
+	}
+    }
+}
diff --git a/gcc/testsuite/g++.dg/ext/asm10.C b/gcc/testsuite/g++.dg/ext/asm10.C
new file mode 100644
index 00000000000..b95027c8cac
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/asm10.C
@@ -0,0 +1,14 @@
+// PR inline-asm/32109
+// { dg-do compile }
+// { dg-options "-O2" }
+
+struct A { int i[3]; ~A (); };
+struct A a;
+struct B { struct A c; int i; B (); } b;
+
+B::B ()
+{
+  __asm ("" : : "r" (a));	// { dg-error "impossible constraint|non-memory input" }
+  __asm ("" : : "r" (b.c));	// { dg-error "impossible constraint|non-memory input" }
+  __asm ("" : : "r" (c));	// { dg-error "impossible constraint|non-memory input" }
+}
diff --git a/gcc/testsuite/g++.dg/ext/visibility/anon3.C b/gcc/testsuite/g++.dg/ext/visibility/anon3.C
new file mode 100644
index 00000000000..9def559d253
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ext/visibility/anon3.C
@@ -0,0 +1,16 @@
+// PR c++/31187
+// Bug: the repeated declaration was confusing the compiler into
+// thinking that foo1 had language internal linkage.
+
+class foo { };
+
+namespace
+{
+  extern foo foo1;
+  foo foo1;
+}
+
+template< foo * >
+class bar { };
+
+bar< &foo1 > bar1;
diff --git a/gcc/testsuite/g++.dg/gomp/pr31769.C b/gcc/testsuite/g++.dg/gomp/pr31769.C
new file mode 100644
index 00000000000..54945f94efa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/pr31769.C
@@ -0,0 +1,61 @@
+// PR tree-optimization/31769
+// { dg-options "-O2 -fopenmp" }
+// { dg-do compile }
+
+struct B
+{
+  B () {}
+  virtual ~B () {}
+};
+struct C
+{
+  C (int x, int y) {}
+};
+template<typename T, int U>
+struct D
+{
+  D () {}
+  ~D () {}
+};
+struct E
+{
+  E () {}
+  ~E () {}
+  D<int, 1> e;
+};
+struct A
+{
+  B *b;
+  A () { b = __null; }
+  ~A () { if (b != __null) delete b; }
+};
+struct F : public A
+{
+  explicit F (int x) { foo (0); }
+  F (const F &x) {}
+  F (F &x, C y) {}
+  F operator () (C x) const
+  {
+    return F (const_cast<F &>(*this), x);
+  }
+  template <typename U> F & operator+= (const U &);
+  void foo (int);
+  E f;
+};
+
+int
+main ()
+{
+  try
+  {
+    F f (10);
+    F g (10);
+    C h (0, 9);
+#pragma omp parallel for
+    for (int i = 0; i < 2; ++i)
+      g += f (h);
+  }
+  catch (int &e)
+  {
+  }
+}
diff --git a/gcc/testsuite/g++.dg/gomp/pr32177.C b/gcc/testsuite/g++.dg/gomp/pr32177.C
new file mode 100644
index 00000000000..55c8483be27
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/pr32177.C
@@ -0,0 +1,46 @@
+// PR c++/32177
+// { dg-do compile }
+// { dg-options "-fopenmp" }
+//
+// Copyright (C) 2007 Free Software Foundation, Inc.
+// Contributed by Theodore.Papadopoulo 1 Jun 2007 <Theodore.Papadopoulo@sophia.inria.fr>
+
+struct A
+{
+  A () {}
+  ~A () {}
+  int s () const { return 1; }
+};
+
+void
+f1 ()
+{
+  #pragma omp parallel for
+    for (int i = 1; i <= A ().s (); ++i)
+      ;
+}
+
+void
+f2 ()
+{
+  #pragma omp parallel for
+    for (int i = A ().s (); i <= 20; ++i)
+      ;
+}
+
+void
+f3 ()
+{
+  #pragma omp parallel for
+    for (int i = 1; i <= 20; i += A ().s ())
+      ;
+}
+
+void
+f4 ()
+{
+  int i;
+  #pragma omp parallel for
+    for (i = A ().s (); i <= 20; i++)
+      ;
+}
diff --git a/gcc/testsuite/g++.dg/opt/ifcvt1.C b/gcc/testsuite/g++.dg/opt/ifcvt1.C
new file mode 100644
index 00000000000..8fcbf461907
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/ifcvt1.C
@@ -0,0 +1,17 @@
+// { dg-do compile }
+// { dg-options "-O2 -fnon-call-exceptions" }
+
+struct S { ~S () throw () {} };
+double bar ();
+
+int
+foo ()
+{
+  S a;
+  int i = 0;
+  double c = bar ();
+  c = c < 0 ? -c : c;
+  if (c <= 1.e-8)
+    i += 24;
+  return i;
+}
diff --git a/gcc/testsuite/g++.dg/opt/nrv13.C b/gcc/testsuite/g++.dg/opt/nrv13.C
new file mode 100644
index 00000000000..bb49a3a6e4f
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/nrv13.C
@@ -0,0 +1,42 @@
+// PR tree-optimization/32353
+// { dg-do run }
+// { dg-options "-O2" }
+
+extern "C" void abort ();
+
+struct A
+{
+  int f;
+  A (int x) : f (x) {}
+};
+
+A
+foo (const A &x, const A &y)
+{
+  A r (0);
+  r = x.f == -111 ? y : (y.f == -111 || x.f > y.f) ? x : y;
+  A s (0);
+  r = r.f == -111 ? s : (r.f > s.f) ? r : s;
+  return r;
+}
+
+int
+main ()
+{
+  if (foo (A (0), A (1)).f != 1)
+    abort ();
+  if (foo (A (1), A (9)).f != 9)
+    abort ();
+  if (foo (A (9), A (1)).f != 9)
+    abort ();
+  if (foo (A (-4), A (-5)).f != 0)
+    abort ();
+  if (foo (A (-111), A (-111)).f != 0)
+    abort ();
+  if (foo (A (2), A (-111)).f != 2)
+    abort ();
+  if (foo (A (-111), A (6)).f != 6)
+    abort ();
+  if (foo (A (-111), A (-4)).f != 0)
+    abort ();
+}
diff --git a/gcc/testsuite/g++.dg/opt/pr15054-2.C b/gcc/testsuite/g++.dg/opt/pr15054-2.C
new file mode 100644
index 00000000000..156e945d011
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr15054-2.C
@@ -0,0 +1,39 @@
+// PR middle-end/15054
+
+// { dg-do run }
+// { dg-options "-O2" }
+
+extern "C" void abort (void);
+
+void
+__attribute__((noinline))
+check (long x, long y)
+{
+  if (x != y)
+    abort ();
+}
+
+struct A
+{
+  A() : a(2) { check (a, 2); }
+  ~A() { check (a, 2); }
+private:
+  long a;
+};
+
+class B {
+  long b;
+  B& operator =(const B& );
+public:
+  B (long p) : b(p) { check (b, 6); }
+  B (const B& p) : b(p.b) { check (b, 6); }
+  ~B () { check (b, 6); A obj; check (b, 6); }
+  B foo() { return B(*this); }
+};
+
+int main ()
+{
+  B o(6);
+  o.foo().foo();
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/opt/static5.C b/gcc/testsuite/g++.dg/opt/static5.C
new file mode 100644
index 00000000000..1daca6d7194
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/static5.C
@@ -0,0 +1,29 @@
+// PR c++/31809
+// { dg-do run }
+// { dg-options "-O2" }
+
+struct S
+{
+  unsigned v;
+  static inline S f (unsigned a);
+};
+
+inline S
+S::f (unsigned a)
+{
+  static S t = { a };
+  return t;
+}
+
+const static S s = S::f (26);
+
+extern "C" void abort (void);
+
+int
+main ()
+{
+  S t = s;
+  if (t.v != 26)
+    abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/opt/static6.C b/gcc/testsuite/g++.dg/opt/static6.C
new file mode 100644
index 00000000000..00e76fb7350
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/static6.C
@@ -0,0 +1,35 @@
+// PR c++/31806
+// { dg-do run }
+// { dg-options "-O2 -fno-inline -fno-threadsafe-statics" }
+
+extern "C" void abort(void);
+
+struct A
+{
+    void *d;
+};
+
+static const A& staticA()
+{
+    static A s_static;
+    return s_static;
+}
+
+void assert_failed()
+{
+    abort();
+}
+
+A testMethod()
+{
+    static const A& s = staticA( );
+    if (&s == 0)
+        assert_failed();
+    return s;
+}
+
+int main()
+{
+    testMethod();
+    return 0;
+}
diff --git a/gcc/testsuite/g++.dg/template/array17.C b/gcc/testsuite/g++.dg/template/array17.C
new file mode 100644
index 00000000000..12a5c472095
--- /dev/null
+++ b/gcc/testsuite/g++.dg/template/array17.C
@@ -0,0 +1,23 @@
+// { dg-do compile }
+
+template <typename T>
+struct V {
+  T& operator[](int);
+};
+
+struct S {
+  S operator +(int);
+  template <typename T> T value();
+};
+
+template <typename T>
+void R (T v)
+{
+  v[(S() + 0).template value<int>()][0] = 0;
+}
+
+int
+main ()
+{
+  R(V<V<int> >());
+}
diff --git a/gcc/testsuite/gcc.c-torture/compile/memcpy-1.c b/gcc/testsuite/gcc.c-torture/compile/memcpy-1.c
new file mode 100644
index 00000000000..4a4e7509654
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/memcpy-1.c
@@ -0,0 +1,9 @@
+static const char OggFLAC__MAPPING_VERSION_MAJOR = 1;
+void f(void)
+{
+  char synthetic_first_packet_body[10];
+  char *b = &synthetic_first_packet_body[4];
+  __builtin_memcpy (b, &OggFLAC__MAPPING_VERSION_MAJOR, (1u));
+}
+
+
diff --git a/gcc/testsuite/gcc.c-torture/compile/memcpy-2.c b/gcc/testsuite/gcc.c-torture/compile/memcpy-2.c
new file mode 100644
index 00000000000..4645ced597e
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/memcpy-2.c
@@ -0,0 +1,10 @@
+static const char OggFLAC__MAPPING_VERSION_MAJOR = 1;
+void f(void)
+{
+  char synthetic_first_packet_body[10];
+  char *b = synthetic_first_packet_body;
+  b+=4u;
+  __builtin_memcpy (b, &OggFLAC__MAPPING_VERSION_MAJOR, (1u));
+}
+
+
diff --git a/gcc/testsuite/gcc.c-torture/compile/strcpy-1.c b/gcc/testsuite/gcc.c-torture/compile/strcpy-1.c
new file mode 100644
index 00000000000..2c7b16a08d8
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/strcpy-1.c
@@ -0,0 +1,15 @@
+
+
+typedef struct
+{
+  char str[20];
+}STACK;
+STACK stack[15];
+int level;
+rezero ()
+{
+  level = 0;
+  __builtin_strcpy (stack[level].str, "");
+}
+
+
diff --git a/gcc/testsuite/gcc.c-torture/compile/strcpy-2.c b/gcc/testsuite/gcc.c-torture/compile/strcpy-2.c
new file mode 100644
index 00000000000..075cdd1f6ab
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/compile/strcpy-2.c
@@ -0,0 +1,7 @@
+char wrkstr_un[270];
+extern void
+LoadUserAlph (char *s)
+{
+  s = &wrkstr_un[0];
+  __builtin_strcpy (s, "");
+};
diff --git a/gcc/testsuite/gcc.c-torture/execute/20060412-1.c b/gcc/testsuite/gcc.c-torture/execute/20060412-1.c
new file mode 100644
index 00000000000..a4cc4d45039
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/20060412-1.c
@@ -0,0 +1,33 @@
+extern void abort (void);
+
+struct S
+{
+  long o;
+};
+
+struct T
+{
+  long o;
+  struct S m[82];
+};
+
+struct T t;
+
+int
+main ()
+{
+  struct S *p, *q;
+
+  p = (struct S *) &t;
+  p = &((struct T *) p)->m[0];
+  q = p + 82;
+  while (--q > p)
+    q->o = -1;
+  q->o = 0;
+
+  if (q > p)
+    abort ();
+  if (q - p > 0)
+    abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/20060420-1.c b/gcc/testsuite/gcc.c-torture/execute/20060420-1.c
new file mode 100644
index 00000000000..fe62f6bf0ee
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/20060420-1.c
@@ -0,0 +1,71 @@
+extern void abort (void);
+
+typedef float v4flt __attribute__ ((vector_size (16)));
+
+void __attribute__ ((noinline)) foo (float *dst, float **src, int a, int n)
+{
+  int i, j;
+  int z = sizeof (v4flt) / sizeof (float);
+  unsigned m = sizeof (v4flt) - 1;
+
+  for (j = 0; j < n && (((unsigned long) dst + j) & m); ++j)
+    {
+      float t = src[0][j];
+      for (i = 1; i < a; ++i)
+	t += src[i][j];
+      dst[j] = t;
+    }
+
+  for (; j < (n - (4 * z - 1)); j += 4 * z)
+    {
+      v4flt t0 = *(v4flt *) (src[0] + j + 0 * z);
+      v4flt t1 = *(v4flt *) (src[0] + j + 1 * z);
+      v4flt t2 = *(v4flt *) (src[0] + j + 2 * z);
+      v4flt t3 = *(v4flt *) (src[0] + j + 3 * z);
+      for (i = 1; i < a; ++i)
+	{
+	  t0 += *(v4flt *) (src[i] + j + 0 * z);
+	  t1 += *(v4flt *) (src[i] + j + 1 * z);
+	  t2 += *(v4flt *) (src[i] + j + 2 * z);
+	  t3 += *(v4flt *) (src[i] + j + 3 * z);
+	}
+      *(v4flt *) (dst + j + 0 * z) = t0;
+      *(v4flt *) (dst + j + 1 * z) = t1;
+      *(v4flt *) (dst + j + 2 * z) = t2;
+      *(v4flt *) (dst + j + 3 * z) = t3;
+    }
+  for (; j < n; ++j)
+    {
+      float t = src[0][j];
+      for (i = 1; i < a; ++i)
+	t += src[i][j];
+      dst[j] = t;
+    }
+}
+
+float buffer[64];
+
+int
+main (void)
+{
+  int i;
+  float *dst, *src[2];
+
+  dst = buffer;
+  dst += (-(long int) buffer & (16 * sizeof (float) - 1)) / sizeof (float);
+  src[0] = dst + 16;
+  src[1] = dst + 32;
+  for (i = 0; i < 16; ++i)
+    {
+      src[0][i] = (float) i + 11 * (float) i;
+      src[1][i] = (float) i + 12 * (float) i;
+    }
+  foo (dst, src, 2, 16);
+  for (i = 0; i < 16; ++i)
+    {
+      float e = (float) i + 11 * (float) i + (float) i + 12 * (float) i;
+      if (dst[i] != e)
+	abort ();
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/20060930-2.c b/gcc/testsuite/gcc.c-torture/execute/20060930-2.c
new file mode 100644
index 00000000000..498f7811d1c
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/20060930-2.c
@@ -0,0 +1,31 @@
+/* PR middle-end/29272 */
+
+extern void abort (void);
+
+struct S { struct S *s; } s;
+struct T { struct T *t; } t;
+
+static inline void
+foo (void *s)
+{
+  struct T *p = s;
+  __builtin_memcpy (&p->t, &t.t, sizeof (t.t));
+}
+
+void *
+__attribute__((noinline))
+bar (void *p, struct S *q)
+{
+  q->s = &s;
+  foo (p);
+  return q->s;
+}
+
+int
+main (void)
+{
+  t.t = &t;
+  if (bar (&s, &s) != (void *) &t)
+    abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/20070517-1.c b/gcc/testsuite/gcc.c-torture/execute/20070517-1.c
new file mode 100644
index 00000000000..c81cbc639be
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/20070517-1.c
@@ -0,0 +1,41 @@
+/* PR rtl-optimization/31691 */
+/* Origin: Chi-Hua Chen <stephaniechc-gccbug@yahoo.com> */
+
+extern void abort (void);
+
+static int get_kind(int) __attribute__ ((noinline));
+
+static int get_kind(int v)
+{
+  volatile int k = v;
+  return k;
+}
+
+static int some_call(void) __attribute__ ((noinline));
+
+static int some_call(void)
+{
+  return 0;
+}
+
+static void example (int arg)
+{
+  int tmp, kind = get_kind (arg);
+
+  if (kind == 9 || kind == 10 || kind == 5)
+    {
+      if (some_call() == 0)
+        {
+          if (kind == 9 || kind == 10)
+            tmp = arg;
+          else
+            abort();
+        }
+    }
+} 
+
+int main(void)
+{
+  example(10);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/20070614-1.c b/gcc/testsuite/gcc.c-torture/execute/20070614-1.c
new file mode 100644
index 00000000000..fa44f7fa3ec
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/20070614-1.c
@@ -0,0 +1,33 @@
+extern void abort (void);
+
+_Complex v = 3.0 + 1.0iF;
+
+void
+foo (_Complex z, int *x)
+{
+  if (z != v)
+    abort ();
+}
+
+_Complex bar (_Complex z) __attribute__ ((pure));
+_Complex
+bar (_Complex z)
+{
+  return v;
+}
+
+int
+baz (void)
+{
+  int a, i;
+  for (i = 0; i < 6; i++)
+    foo (bar (1.0iF * i), &a);
+  return 0;
+}
+
+int
+main ()
+{
+  baz ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/20060425-2.c b/gcc/testsuite/gcc.dg/20060425-2.c
new file mode 100644
index 00000000000..2a5e131a100
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/20060425-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+double
+crashme (double v, double *p)
+{
+  if (v < 0. && *p == 1.)
+    v = 0.;
+  return v;
+}
diff --git a/gcc/testsuite/gcc.dg/20070507-1.c b/gcc/testsuite/gcc.dg/20070507-1.c
new file mode 100644
index 00000000000..2884d1aa8d8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/20070507-1.c
@@ -0,0 +1,103 @@
+/* This failed on s390x due to bug in loop.c.
+   loop.c failed to remove a REG_EQUAL note when
+   hoisting an insn from a loop body.  */
+
+/* { dg-options "-O3 -fPIC" } */
+/* { dg-do run { target fpic } } */
+
+typedef __SIZE_TYPE__ size_t;
+int memcmp(const void *s1, const void *s2, size_t n);
+
+typedef struct
+{
+  char name[30];
+  int a;
+} LOCAL;
+
+int global = 0;
+int sy = 1;
+int subroutine_offset;
+
+LOCAL local = { "local", 0 };
+LOCAL keywords = { "keywords", 1 };
+int local_table = 0;
+int keywords_table = 0;
+
+void __attribute__((noinline)) bar (char *p_buffer)
+{
+  p_buffer[255] = 1;
+}
+
+int __attribute__((noinline)) foo (char *p_str1)
+{
+  global = 1;
+  return 1;
+}
+
+int __attribute__((noinline)) loop_next (int *p_table, char *p_table_head)
+{
+  static loop_next = 0;
+
+  if (loop_next == 1)
+    return 1;
+
+  loop_next = 1;
+  return 0;
+}
+
+int
+main ()
+{
+  char buffer[256];
+  int ende = 0;
+  int index;
+  int local_base = 2;
+
+  keywords.a = 1;
+  for (sy = 0;; sy++)
+    {
+      for (index = 1;;)
+	{
+	  bar (buffer);
+	  if (buffer[sy] != 0)
+	    {
+	      ende = 1;
+	      break;
+	    };
+	  if (foo (buffer))
+	    {
+	      keywords.a += index - 1;
+	      break;
+	    }
+	  index++;
+	}
+      if (ende)
+	break;
+    }
+
+  subroutine_offset = 0;
+
+  for (;;)
+    {
+      if (loop_next (&keywords_table, (char*)&keywords))
+	break;
+
+      if ((!memcmp (keywords.name, "+++", 3)))
+	local_base = 100;
+      else
+	local_base = 0;
+
+      if ((!memcmp (keywords.name, "+++", 3)))
+	subroutine_offset += local_table;
+
+      for (;;)
+	{
+	  if (loop_next (&local_table, (char*)&local))
+	    break;;
+	  if ((local.a == 0))
+	    continue;;
+	  foo (local.name);
+	}
+    }
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/builtin-strncat-chk-1.c b/gcc/testsuite/gcc.dg/builtin-strncat-chk-1.c
new file mode 100644
index 00000000000..80d7b9d6ebf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/builtin-strncat-chk-1.c
@@ -0,0 +1,38 @@
+/* Test whether buffer overflow warnings for __strncat_chk builtin
+   are emitted properly.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=gnu99" } */
+
+extern void abort (void);
+
+#include "../gcc.c-torture/execute/builtins/chk.h"
+
+char buf1[20];
+char *q;
+
+void
+test (int arg, ...)
+{
+  char *p = &buf1[10];
+
+  *p = 0;
+  strncat (p, "abcdefg", 9);
+  *p = 0;
+  strncat (p, "abcdefghi", 9);
+  *p = 0;
+  strncat (p, "abcdefghij", 9);
+  *p = 0;
+  strncat (p, "abcdefghi", 10);
+  *p = 0;
+  strncat (p, "abcdefghij", 10); /* { dg-warning "will always overflow" } */
+  *p = 0;
+  strncat (p, "abcdefgh", 11);
+  *p = 0;
+  strncat (p, "abcdefghijkl", 11); /* { dg-warning "will always overflow" } */
+  *p = 0;
+  strncat (p, q, 9);
+  *p = 0;
+  strncat (p, q, 10); /* { dg-warning "might overflow" } */
+  *p = 0;
+  strncat (p, q, 11); /* { dg-warning "might overflow" } */
+}
diff --git a/gcc/testsuite/gcc.dg/cpp/paste14.c b/gcc/testsuite/gcc.dg/cpp/paste14.c
new file mode 100644
index 00000000000..ec243c2326f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/cpp/paste14.c
@@ -0,0 +1,7 @@
+/* PR preprocessor/28709 */
+/* { dg-do preprocess } */
+
+#define foo - ## >>
+foo		/* { dg-error "pasting \"-\" and \">>\"" } */
+#define bar = ## ==
+bar		/* { dg-error "pasting \"=\" and \"==\"" } */
diff --git a/gcc/testsuite/gcc.dg/i386-cpuid.h b/gcc/testsuite/gcc.dg/i386-cpuid.h
index dc300e4482b..c7b999c7fdf 100644
--- a/gcc/testsuite/gcc.dg/i386-cpuid.h
+++ b/gcc/testsuite/gcc.dg/i386-cpuid.h
@@ -2,23 +2,32 @@
    Used by 20020523-2.c and i386-sse-6.c, and possibly others.  */
 /* Plagarized from 20020523-2.c.  */
 
+/* %ecx */
+#define bit_SSE3 (1 << 0)
+#define bit_SSSE3 (1 << 9)
+
+/* %edx */
 #define bit_CMOV (1 << 15)
 #define bit_MMX (1 << 23)
 #define bit_SSE (1 << 25)
 #define bit_SSE2 (1 << 26)
 
+/* Extended Features */
+/* %ecx */
+#define bit_SSE4a (1 << 6)
+
 #ifndef NOINLINE
 #define NOINLINE __attribute__ ((noinline))
 #endif
 
-unsigned int i386_cpuid (void) NOINLINE;
-
-unsigned int NOINLINE
-i386_cpuid (void)
+static inline unsigned int
+i386_get_cpuid (unsigned int *ecx, unsigned int *edx)
 {
-  int fl1, fl2;
+  int fl1;
 
 #ifndef __x86_64__
+  int fl2;
+
   /* See if we can use cpuid.  On AMD64 we always can.  */
   __asm__ ("pushfl; pushfl; popl %0; movl %0,%1; xorl %2,%0;"
 	   "pushl %0; popfl; pushfl; popl %0; popfl"
@@ -42,15 +51,99 @@ i386_cpuid (void)
   if (fl1 == 0)
     return (0);
 
-  /* Invoke CPUID(1), return %edx; caller can examine bits to
+  /* Invoke CPUID(1), return %ecx and %edx; caller can examine bits to
      determine what's supported.  */
 #ifdef __x86_64__
-  __asm__ ("pushq %%rcx; pushq %%rbx; cpuid; popq %%rbx; popq %%rcx"
-	   : "=d" (fl2), "=a" (fl1) : "1" (1) : "cc");
+  __asm__ ("pushq %%rbx; cpuid; popq %%rbx"
+	   : "=c" (*ecx), "=d" (*edx), "=a" (fl1) : "2" (1) : "cc");
 #else
-  __asm__ ("pushl %%ecx; pushl %%ebx; cpuid; popl %%ebx; popl %%ecx"
-	   : "=d" (fl2), "=a" (fl1) : "1" (1) : "cc");
+  __asm__ ("pushl %%ebx; cpuid; popl %%ebx"
+	   : "=c" (*ecx), "=d" (*edx), "=a" (fl1) : "2" (1) : "cc");
+#endif
+
+  return 1;
+}
+
+static inline unsigned int
+i386_get_extended_cpuid (unsigned int *ecx, unsigned int *edx)
+{
+  int fl1;
+  if (!(i386_get_cpuid (ecx, edx)))
+    return 0;
+
+  /* Invoke CPUID(0x80000000) to get the highest supported extended function
+     number */
+#ifdef __x86_64__
+  __asm__ ("cpuid"
+	   : "=a" (fl1) : "0" (0x80000000) : "edx", "ecx", "ebx");
+#else
+  __asm__ ("pushl %%ebx; cpuid; popl %%ebx"
+	   : "=a" (fl1) : "0" (0x80000000) : "edx", "ecx");
+#endif
+  /* Check if highest supported extended function used below are supported */
+  if (fl1 < 0x80000001)
+    return 0;  
+
+  /* Invoke CPUID(0x80000001), return %ecx and %edx; caller can examine bits to
+     determine what's supported.  */
+#ifdef __x86_64__
+  __asm__ ("cpuid"
+	   : "=c" (*ecx), "=d" (*edx), "=a" (fl1) : "2" (0x80000001) : "ebx");
+#else
+  __asm__ ("pushl %%ebx; cpuid; popl %%ebx"
+	   : "=c" (*ecx), "=d" (*edx), "=a" (fl1) : "2" (0x80000001));
 #endif
+  return 1;
+}
+
+
+unsigned int i386_cpuid_ecx (void) NOINLINE;
+unsigned int i386_cpuid_edx (void) NOINLINE;
+unsigned int i386_extended_cpuid_ecx (void) NOINLINE;
+unsigned int i386_extended_cpuid_edx (void) NOINLINE;
+
+unsigned int NOINLINE
+i386_cpuid_ecx (void)
+{
+  unsigned int ecx, edx;
+  if (i386_get_cpuid (&ecx, &edx))
+    return ecx;
+  else
+    return 0;
+}
+
+unsigned int NOINLINE
+i386_cpuid_edx (void)
+{
+  unsigned int ecx, edx;
+  if (i386_get_cpuid (&ecx, &edx))
+    return edx;
+  else
+    return 0;
+}
 
-  return fl2;
+unsigned int NOINLINE
+i386_extended_cpuid_ecx (void)
+{
+  unsigned int ecx, edx;
+  if (i386_get_extended_cpuid (&ecx, &edx))
+    return ecx;
+  else
+    return 0;
+}
+
+unsigned int NOINLINE
+i386_extended_cpuid_edx (void)
+{
+  unsigned int ecx, edx;
+  if (i386_get_extended_cpuid (&ecx, &edx))
+    return edx;
+  else
+    return 0;
+}
+
+static inline unsigned int
+i386_cpuid (void)
+{
+  return i386_cpuid_edx ();
 }
diff --git a/gcc/testsuite/gcc.dg/pr29299.c b/gcc/testsuite/gcc.dg/pr29299.c
new file mode 100644
index 00000000000..9049060fae4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr29299.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+static int bof __attribute__((used));
+int foo()
+{
+	static int barbarbarbar __attribute__((used));
+};
+
+/* { dg-final { scan-assembler "barbarbarbar" } } */
diff --git a/gcc/testsuite/gcc.dg/tls/opt-13.c b/gcc/testsuite/gcc.dg/tls/opt-13.c
new file mode 100644
index 00000000000..8eea76b68ab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tls/opt-13.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target tls } */
+
+__thread struct
+{
+  int a;
+  char b[32];
+} thr;
+
+int
+main ()
+{
+  __builtin_strcpy (thr.b, "abcd");
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/torture/pr26998.c b/gcc/testsuite/gcc.dg/torture/pr26998.c
new file mode 100644
index 00000000000..d50c344734e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/torture/pr26998.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+
+int decCompareOp (int result)
+{
+    if (result != (int)0x80000000)
+    {
+        result = -result;
+        return (result > 0);
+    }
+    return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/vrp29.c b/gcc/testsuite/gcc.dg/tree-ssa/vrp29.c
new file mode 100644
index 00000000000..bace4ffcadf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/vrp29.c
@@ -0,0 +1,20 @@
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+extern void abort(void);
+
+void decCompareOp (int result)
+{
+  if (result != (int)0x80000000)
+    {
+      result = -result;
+      if (result != (int)0x80000001)
+        abort ();
+    }
+}
+
+int main()
+{
+  decCompareOp (0x7fffffff);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr32389.c b/gcc/testsuite/gcc.target/i386/pr32389.c
new file mode 100644
index 00000000000..3f4cb3e6228
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr32389.c
@@ -0,0 +1,10 @@
+/* Testcase by Mike Frysinger <vapier@gentoo.org>  */
+
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */
+/* { dg-options "-msse" } */
+
+double f1();
+int f2() {
+  __builtin_ia32_stmxcsr();
+  return f1();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4a-extract.c b/gcc/testsuite/gcc.target/i386/sse4a-extract.c
new file mode 100644
index 00000000000..20817a9cf52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4a-extract.c
@@ -0,0 +1,100 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse4a" } */
+#include <ammintrin.h>
+#include <stdlib.h>
+#include "../../gcc.dg/i386-cpuid.h"
+
+static void sse4a_test (void);
+
+typedef union
+{
+  long long i[2];
+  __m128i vec;
+} LI;
+
+int
+main ()
+{  
+  unsigned long cpu_facilities;
+
+  cpu_facilities = i386_extended_cpuid_ecx ();
+
+  /* Run SSE4a test only if host has SSE4a support.  */
+  if ((cpu_facilities & bit_SSE4a))
+    sse4a_test ();
+
+  exit (0);
+}
+
+static long long 
+sse4a_test_extrq (long long in)
+{
+  __m128i v1, v2;
+  long long index_length, pad;
+  LI v_out;
+  index_length = 0x0000000000000810LL; 
+  pad = 0x0;
+  v1 = _mm_set_epi64x (pad, in);
+  v2 = _mm_set_epi64x (pad, index_length); 
+  v_out.vec = _mm_extract_si64 (v1, v2);
+  return (v_out.i[0]); 
+}
+
+static long long 
+sse4a_test_extrqi (long long in)
+{
+  __m128i v1;
+  long long pad =0x0;
+  LI v_out;
+  v1 = _mm_set_epi64x (pad, in);
+  v_out.vec = _mm_extracti_si64 (v1, (unsigned int) 0x10,(unsigned int) 0x08);
+  return (v_out.i[0]);
+}
+
+static chk (long long i1, long long i2)
+{
+  int n_fails =0;
+  if (i1 != i2) 
+    n_fails +=1;
+  return n_fails;
+}
+
+long long vals_in[5] =
+  {
+    0x1234567887654321LL,
+    0x1456782093002490LL,
+    0x2340909123990390LL,
+    0x9595959599595999LL,
+    0x9099038798000029LL
+  };
+
+long long vals_out[5] =
+  {
+    0x0000000000006543LL,
+    0x0000000000000024LL,
+    0x0000000000009903LL,
+    0x0000000000005959LL,
+    0x0000000000000000LL
+  };
+
+static void
+sse4a_test (void)
+{
+  int i;
+  int fail = 0;
+  long long out;
+
+  for (i = 0; i < 5; i += 1)
+    {
+      out = sse4a_test_extrq (vals_in[i]);
+      fail += chk(out, vals_out[i]);
+
+      out = sse4a_test_extrqi (vals_in[i]);
+      fail += chk(out, vals_out[i]);
+    }
+
+  if (fail != 0)
+    abort ();
+
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4a-insert.c b/gcc/testsuite/gcc.target/i386/sse4a-insert.c
new file mode 100644
index 00000000000..3e44bba141b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4a-insert.c
@@ -0,0 +1,110 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse4a" } */
+#include <ammintrin.h>
+#include <stdlib.h>
+#include "../../gcc.dg/i386-cpuid.h"
+
+static void sse4a_test (void);
+
+typedef union
+{
+  long long i[2];
+  __m128i vec;
+} LI;
+
+int
+main ()
+{  
+  unsigned long cpu_facilities;
+
+  cpu_facilities = i386_extended_cpuid_ecx ();
+
+  /* Run SSE4a test only if host has SSE4a support.  */
+  if ((cpu_facilities & bit_SSE4a))
+    sse4a_test ();
+
+  exit (0);
+}
+
+static long long
+sse4a_test_insert (long long in1, long long in2)
+{
+  __m128i v1,v2;
+  long long index_length, pad;
+  LI v_out;
+  index_length = 0x0000000000000810LL;
+  pad = 0x0;
+  v1 = _mm_set_epi64x (pad, in1);
+  v2 = _mm_set_epi64x (index_length, in2); 
+  v_out.vec = _mm_insert_si64 (v1, v2);
+  return (v_out.i[0]);
+}
+
+static long long
+sse4a_test_inserti (long long in1, long long in2)
+{
+  __m128i v1,v2;
+  long long pad = 0x0;
+  LI v_out;
+  v1 = _mm_set_epi64x (pad, in1);
+  v2 = _mm_set_epi64x (pad, in2); 
+  v_out.vec = _mm_inserti_si64 (v1, v2, (unsigned int) 0x10, (unsigned int) 0x08);
+  return (v_out.i[0]);  
+}
+
+static chk (long long i1, long long i2)
+{
+  int n_fails =0;
+  if (i1 != i2) 
+    n_fails +=1;
+  return n_fails;
+}
+
+long long vals_in1[5] =
+  {
+    0x1234567887654321LL,
+    0x1456782093002490LL,
+    0x2340909123990390LL,
+    0x9595959599595999LL,
+    0x9099038798000029LL
+  };
+
+long long vals_in2[5] =
+  {
+    0x9ABCDEF00FEDCBA9LL,
+    0x234567097289672ALL,
+    0x45476453097BD342LL,
+    0x23569012AE586FF0LL,
+    0x432567ABCDEF765DLL
+  };
+
+long long vals_out[5] =
+  {
+    0x1234567887CBA921LL,
+    0x1456782093672A90LL,
+    0x2340909123D34290LL,
+    0x95959595996FF099LL,
+    0x9099038798765D29LL
+  };
+
+static void
+sse4a_test (void)
+{
+  int i;
+  int fail = 0;
+  long long out;
+
+  for (i = 0; i < 5; i += 1)
+    {
+      out = sse4a_test_insert (vals_in1[i], vals_in2[i]);
+      fail += chk(out, vals_out[i]);
+
+      out = sse4a_test_inserti (vals_in1[i], vals_in2[i]);
+      fail += chk(out, vals_out[i]);
+    }
+
+  if (fail != 0)
+    abort ();
+
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4a-montsd.c b/gcc/testsuite/gcc.target/i386/sse4a-montsd.c
new file mode 100644
index 00000000000..e9be98e3bb8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4a-montsd.c
@@ -0,0 +1,64 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse4a" } */
+#include <ammintrin.h>
+#include <stdlib.h>
+#include "../../gcc.dg/i386-cpuid.h"
+
+static void sse4a_test (void);
+
+int
+main ()
+{  
+  unsigned long cpu_facilities;
+
+  cpu_facilities = i386_extended_cpuid_ecx ();
+
+  /* Run SSE4a test only if host has SSE4a support.  */
+  if ((cpu_facilities & bit_SSE4a))
+    sse4a_test ();
+
+  exit (0);
+}
+  
+static void 
+sse4a_test_movntsd (double *out, double *in)
+{
+  __m128d in_v2df = _mm_load_sd (in);
+  _mm_stream_sd (out, in_v2df);
+}
+
+static int 
+chk_sd (double *v1, double *v2)
+{
+  int n_fails = 0;
+  if (v1[0] != v2[0])
+    n_fails += 1;
+  return n_fails;
+}
+
+double vals[10] =
+  {
+    100.0,  200.0, 300.0, 400.0, 5.0, 
+    -1.0, .345, -21.5, 9.32,  8.41
+  };
+
+static void
+sse4a_test (void)
+{
+  int i;
+  int fail = 0;
+  double *out;
+
+  out = (double *) malloc (sizeof (double));
+  for (i = 0; i < 10; i += 1)
+    {
+      sse4a_test_movntsd (out, &vals[i]);
+      
+      fail += chk_sd (out, &vals[i]);
+    }
+
+  if (fail != 0)
+    abort ();
+
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse4a-montss.c b/gcc/testsuite/gcc.target/i386/sse4a-montss.c
new file mode 100644
index 00000000000..28ecb1cf3c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/sse4a-montss.c
@@ -0,0 +1,64 @@
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -msse4a" } */
+#include <ammintrin.h>
+#include <stdlib.h>
+#include "../../gcc.dg/i386-cpuid.h"
+
+static void sse4a_test (void);
+
+int
+main ()
+{  
+  unsigned long cpu_facilities;
+
+  cpu_facilities = i386_extended_cpuid_ecx ();
+
+  /* Run SSE4a test only if host has SSE4a support.  */
+  if ((cpu_facilities & bit_SSE4a))
+    sse4a_test ();
+
+  exit (0);
+}
+
+static void 
+sse4a_test_movntss (float *out, float *in)
+{
+  __m128 in_v4sf = _mm_load_ss (in);
+  _mm_stream_ss (out, in_v4sf);
+}
+
+static int 
+chk_ss (float *v1, float *v2)
+{
+  int n_fails = 0;
+  if (v1[0] != v2[0])
+    n_fails += 1;
+  return n_fails;
+}
+
+float vals[10] =
+  {
+    100.0,  200.0, 300.0, 400.0, 5.0, 
+    -1.0, .345, -21.5, 9.32,  8.41
+  };
+
+static void
+sse4a_test (void)
+{
+  int i;
+  int fail = 0;
+  float *out;
+
+  out = (float *) malloc (sizeof (float));
+  for (i = 0; i < 10; i += 1)
+    {
+      sse4a_test_movntss (out, &vals[i]);
+      
+      fail += chk_ss (out, &vals[i]);
+    }
+
+  if (fail != 0)
+    abort ();
+
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-23.c b/gcc/testsuite/gcc.target/powerpc/altivec-23.c
new file mode 100644
index 00000000000..59de3e5a06d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/altivec-23.c
@@ -0,0 +1,25 @@
+/* Verify that it is possible to define variables of composite types
+   containing vector types.  We used to crash handling the
+   initializer of automatic ones.  */
+
+/* { dg-do compile } */
+/* { dg-xfail-if "" { "powerpc-ibm-aix*" } { "-maltivec" } { "" } } */
+/* { dg-options "-maltivec -mabi=altivec" } */
+
+#include <altivec.h>
+
+typedef int bt;
+typedef vector bt vt;
+typedef struct { vt x; bt y[sizeof(vt) / sizeof (bt)]; } st;
+#define INIT { 1, 2, 3, 4 }
+
+void f ()
+{
+  vt x = INIT;
+  vt y[1] = { INIT };
+  st s = { INIT, INIT };
+}
+
+vt x = INIT;
+vt y[1] = { INIT };
+st s = { INIT, INIT };
diff --git a/gcc/testsuite/gcc.target/sparc/align.c b/gcc/testsuite/gcc.target/sparc/align.c
index d9cc162e828..804ca9397f3 100644
--- a/gcc/testsuite/gcc.target/sparc/align.c
+++ b/gcc/testsuite/gcc.target/sparc/align.c
@@ -1,10 +1,9 @@
 /* { dg-do compile } */
 /* { dg-options "-mcpu=ultrasparc -mvis" } */
-
 typedef long long int64_t;
 typedef int vec32 __attribute__((vector_size(8)));
 typedef short vec16 __attribute__((vector_size(8)));
-typedef char vec8 __attribute__((vector_size(8)));
+typedef unsigned char vec8 __attribute__((vector_size(8)));
 
 vec16 foo1 (vec16 a, vec16 b) {
   return __builtin_vis_faligndatav4hi (a, b);
diff --git a/gcc/testsuite/gcc.target/sparc/combined-2.c b/gcc/testsuite/gcc.target/sparc/combined-2.c
index 016e4fa6a6c..c4b70a55a2d 100644
--- a/gcc/testsuite/gcc.target/sparc/combined-2.c
+++ b/gcc/testsuite/gcc.target/sparc/combined-2.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mcpu=ultrasparc -mvis" } */
-typedef char pixel __attribute__((vector_size(4)));
-typedef char vec8 __attribute__((vector_size(8)));
+typedef unsigned char pixel __attribute__((vector_size(4)));
+typedef unsigned char vec8 __attribute__((vector_size(8)));
 typedef short vec16 __attribute__((vector_size(8)));
 
 vec16 foo (pixel a, pixel b) {
diff --git a/gcc/testsuite/gcc.target/sparc/fexpand.c b/gcc/testsuite/gcc.target/sparc/fexpand.c
index 2483f4f7235..21aeafff0b1 100644
--- a/gcc/testsuite/gcc.target/sparc/fexpand.c
+++ b/gcc/testsuite/gcc.target/sparc/fexpand.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mcpu=ultrasparc -mvis" } */
 typedef short vec16 __attribute__((vector_size(8)));
-typedef char vec8 __attribute__((vector_size(4)));
+typedef unsigned char vec8 __attribute__((vector_size(4)));
 
 vec16 foo (vec8 a) {
   return __builtin_vis_fexpand (a);
diff --git a/gcc/testsuite/gcc.target/sparc/fnot.c b/gcc/testsuite/gcc.target/sparc/fnot.c
index e6f98d412a6..dceee52f7da 100644
--- a/gcc/testsuite/gcc.target/sparc/fnot.c
+++ b/gcc/testsuite/gcc.target/sparc/fnot.c
@@ -1,8 +1,8 @@
 /* { dg-do compile } */
 /* { dg-options "-O -mcpu=ultrasparc -mvis" } */
-typedef char  vec8 __attribute__((vector_size(8)));
+typedef unsigned char vec8 __attribute__((vector_size(8)));
 typedef short vec16 __attribute__((vector_size(8)));
-typedef int   vec32 __attribute__((vector_size(8)));
+typedef int vec32 __attribute__((vector_size(8)));
 
 extern vec8 foo1_8(void);
 extern void foo2_8(vec8);
@@ -21,7 +21,7 @@ vec8 fun8_2(vec8 a)
 #endif
 
 extern vec16 foo1_16(void);
-extern void foo2_16(vec8);
+extern void foo2_16(vec16);
 
 
 vec16 fun16(void)
@@ -38,7 +38,7 @@ vec16 fun16_2(vec16 a)
 #endif
 
 extern vec32 foo1_32(void);
-extern void foo2_32(vec8);
+extern void foo2_32(vec32);
 
 vec32 fun32(void)
 {
diff --git a/gcc/testsuite/gcc.target/sparc/fpack16.c b/gcc/testsuite/gcc.target/sparc/fpack16.c
index 2f7aac56171..79e0c4c1577 100644
--- a/gcc/testsuite/gcc.target/sparc/fpack16.c
+++ b/gcc/testsuite/gcc.target/sparc/fpack16.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mcpu=ultrasparc -mvis" } */
 typedef short vec16 __attribute__((vector_size(8)));
-typedef char vec8 __attribute__((vector_size(4)));
+typedef unsigned char vec8 __attribute__((vector_size(4)));
 
 vec8 foo (vec16 a) {
   return __builtin_vis_fpack16 (a);
diff --git a/gcc/testsuite/gcc.target/sparc/fpmerge.c b/gcc/testsuite/gcc.target/sparc/fpmerge.c
index 013795cc6d7..4d6a9c02382 100644
--- a/gcc/testsuite/gcc.target/sparc/fpmerge.c
+++ b/gcc/testsuite/gcc.target/sparc/fpmerge.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mcpu=ultrasparc -mvis" } */
-typedef char pixel __attribute__((vector_size(8)));
-typedef char vec8 __attribute__((vector_size(4)));
+typedef unsigned char pixel __attribute__((vector_size(8)));
+typedef unsigned char vec8 __attribute__((vector_size(4)));
 
 pixel foo (vec8 a, vec8 b) {
   return __builtin_vis_fpmerge (a, b);
diff --git a/gcc/testsuite/gcc.target/sparc/fpmul.c b/gcc/testsuite/gcc.target/sparc/fpmul.c
index 991cc75d661..71b3b17ac57 100644
--- a/gcc/testsuite/gcc.target/sparc/fpmul.c
+++ b/gcc/testsuite/gcc.target/sparc/fpmul.c
@@ -2,9 +2,9 @@
 /* { dg-options "-mcpu=ultrasparc -mvis" } */
 typedef int vec32 __attribute__((vector_size(8)));
 typedef short vec16 __attribute__((vector_size(8)));
-typedef char pixel __attribute__((vector_size(4)));
+typedef unsigned char pixel __attribute__((vector_size(4)));
 typedef short pixel16 __attribute__((vector_size(4)));
-typedef char vec8 __attribute__((vector_size(8)));
+typedef unsigned char vec8 __attribute__((vector_size(8)));
 
 vec16 foo1 (pixel a, vec16 b) {
   return __builtin_vis_fmul8x16 (a, b);
diff --git a/gcc/testsuite/gcc.target/sparc/mfpu.c b/gcc/testsuite/gcc.target/sparc/mfpu.c
new file mode 100644
index 00000000000..e95754c5d6b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/sparc/mfpu.c
@@ -0,0 +1,11 @@
+/* Reported by Peter A. Krauss <peter.a.krauss@web.de> */
+
+/* { dg-do compile } */
+/* { dg-options "-mfpu" } */
+
+float square(float x)
+{
+  return x * x;
+}
+
+/* { dg-final { scan-assembler "fmuls" } } */
diff --git a/gcc/testsuite/gcc.target/sparc/noresult.c b/gcc/testsuite/gcc.target/sparc/noresult.c
index f32805d3547..1be7458d2be 100644
--- a/gcc/testsuite/gcc.target/sparc/noresult.c
+++ b/gcc/testsuite/gcc.target/sparc/noresult.c
@@ -1,7 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-mcpu=ultrasparc -mvis" } */
 typedef short vec16 __attribute__((vector_size(8)));
-typedef char vec8 __attribute__((vector_size(4)));
 
 void foo (vec16 a) {
   __builtin_vis_fpack16 (a);
diff --git a/gcc/testsuite/gcc.target/sparc/pdist.c b/gcc/testsuite/gcc.target/sparc/pdist.c
index 48ca0dbf139..6ecc20aa178 100644
--- a/gcc/testsuite/gcc.target/sparc/pdist.c
+++ b/gcc/testsuite/gcc.target/sparc/pdist.c
@@ -1,8 +1,7 @@
 /* { dg-do compile } */
 /* { dg-options "-mcpu=ultrasparc -mvis" } */
-
 typedef long long int64_t;
-typedef char vec8 __attribute__((vector_size(8)));
+typedef unsigned char vec8 __attribute__((vector_size(8)));
 
 int64_t foo (vec8 a, vec8 b) {
   int64_t d = 0;
diff --git a/gcc/testsuite/gfortran.dg/unf_read_corrupted_2.f90 b/gcc/testsuite/gfortran.dg/unf_read_corrupted_2.f90
new file mode 100644
index 00000000000..1788b457d19
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/unf_read_corrupted_2.f90
@@ -0,0 +1,27 @@
+! { dg-do run }
+! PR31880 silent data corruption in gfortran read statement
+! Test from PR.
+      program r3
+
+      integer(kind=4) :: a(1025),b(1025),c(1025),d(2048),e(1022)
+      
+      a = 5
+      b = 6
+      c = 7
+      e = 8
+
+      do i=1,2048
+         d(i)=i
+      end do
+
+      open  (3,form='unformatted', status="scratch")
+      write (3) a,b,c,d,e
+      rewind 3
+      d = 0
+      read  (3) a,b,c,d
+      close (3)
+
+      if (d(1).ne.1) call abort
+      if (d(2048).ne.2048) call abort
+
+      end
diff --git a/gcc/testsuite/gfortran.fortran-torture/execute/intrinsic_bitops.f90 b/gcc/testsuite/gfortran.fortran-torture/execute/intrinsic_bitops.f90
index 95ff44c999e..7dcda255b96 100644
--- a/gcc/testsuite/gfortran.fortran-torture/execute/intrinsic_bitops.f90
+++ b/gcc/testsuite/gfortran.fortran-torture/execute/intrinsic_bitops.f90
@@ -8,7 +8,8 @@ program intrinsic_bitops
    i = 2
    j = 3
    k = 12
-
+   a = 5
+   
    if (.not. btest (i, o+1)) call abort
    if (btest (i, o+2)) call abort
    if (iand (i, j) .ne. 2) call abort
@@ -26,4 +27,6 @@ program intrinsic_bitops
    if (ishftc (k, o-30) .ne. 48) call abort
    if (ishftc (k, o+1, o+3) .ne. 9) call abort
    if (not (i) .ne. -3) call abort
+   if (ishftc (a, 1, bit_size(a)) .ne. 10) call abort
+   if (ishftc (1, 1, 32) .ne. 2) call abort
 end program
diff --git a/gcc/testsuite/objc/compile/20060406-1.m b/gcc/testsuite/objc/compile/20060406-1.m
new file mode 100644
index 00000000000..e4496bf6ede
--- /dev/null
+++ b/gcc/testsuite/objc/compile/20060406-1.m
@@ -0,0 +1,21 @@
+typedef struct
+{
+  void *p;
+} *S;
+
+@protocol O
+- (unsigned)j;
+@end
+
+@interface I
++ (unsigned char)T:(S<O>[2])p v:(S<O>)h;
+@end
+
+@implementation I
++ (unsigned char)T:(S<O>[2])p v:(S<O>)h
+{
+  p[0] = (S) 0;
+  p[1] = (S) 0;
+  return 0;
+}
+@end
diff --git a/gcc/tree-ssa-propagate.c b/gcc/tree-ssa-propagate.c
index 3781d359eb3..6b1741fef1b 100644
--- a/gcc/tree-ssa-propagate.c
+++ b/gcc/tree-ssa-propagate.c
@@ -581,7 +581,8 @@ set_rhs (tree *stmt_p, tree expr)
 	  && !is_gimple_val (TREE_OPERAND (TREE_OPERAND (expr, 0), 1)))
 	return false;
     }
-  else if (code == COMPOUND_EXPR)
+  else if (code == COMPOUND_EXPR
+	   || code == MODIFY_EXPR)
     return false;
 
   if (EXPR_HAS_LOCATION (stmt)
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index 0bc5ecaabc6..bcc1dac3e0e 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -3405,7 +3405,8 @@ set_uids_in_ptset (bitmap into, bitmap from)
 	    bitmap_set_bit (into, DECL_UID (sv->var));
 	}
       else if (TREE_CODE (vi->decl) == VAR_DECL 
-	       || TREE_CODE (vi->decl) == PARM_DECL)
+	       || TREE_CODE (vi->decl) == PARM_DECL
+	       || TREE_CODE (vi->decl) == RESULT_DECL)
 	{
 	  if (found_anyoffset
 	      && var_can_have_subvars (vi->decl)
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 0f62c372b44..cd46d1d8c43 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -1515,14 +1515,21 @@ extract_range_from_unary_expr (value_range_t *vr, tree expr)
   if (code == NEGATE_EXPR
       && !TYPE_UNSIGNED (TREE_TYPE (expr)))
     {
-      /* NEGATE_EXPR flips the range around.  */
-      min = (vr0.max == TYPE_MAX_VALUE (TREE_TYPE (expr)) && !flag_wrapv)
-	     ? TYPE_MIN_VALUE (TREE_TYPE (expr))
-	     : fold_unary_to_constant (code, TREE_TYPE (expr), vr0.max);
-
-      max = (vr0.min == TYPE_MIN_VALUE (TREE_TYPE (expr)) && !flag_wrapv)
-	     ? TYPE_MAX_VALUE (TREE_TYPE (expr))
-	     : fold_unary_to_constant (code, TREE_TYPE (expr), vr0.min);
+      /* NEGATE_EXPR flips the range around.  We need to treat
+	 TYPE_MIN_VALUE specially dependent on wrapping, range type
+	 and if it was used as minimum or maximum value:  
+	  -~[MIN, MIN] == ~[MIN, MIN]
+	  -[MIN, 0] == [0, MAX]  for -fno-wrapv
+	  -[MIN, 0] == [0, MIN]  for -fwrapv (will be set to varying later)  */
+      min = vr0.max == TYPE_MIN_VALUE (TREE_TYPE (expr))
+	    ? TYPE_MIN_VALUE (TREE_TYPE (expr))
+	    : fold_unary_to_constant (code, TREE_TYPE (expr), vr0.max);
+
+      max = vr0.min == TYPE_MIN_VALUE (TREE_TYPE (expr))
+	    ? (vr0.type == VR_ANTI_RANGE || flag_wrapv
+	       ? TYPE_MIN_VALUE (TREE_TYPE (expr))
+	       : TYPE_MAX_VALUE (TREE_TYPE (expr)))
+	    : fold_unary_to_constant (code, TREE_TYPE (expr), vr0.min);
     }
   else if (code == ABS_EXPR
            && !TYPE_UNSIGNED (TREE_TYPE (expr)))
diff --git a/gcc/varasm.c b/gcc/varasm.c
index cd604d24a26..2e560149ed0 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -1707,11 +1707,22 @@ assemble_variable (tree decl, int top_level ATTRIBUTE_UNUSED,
   if (! DECL_USER_ALIGN (decl))
     {
 #ifdef DATA_ALIGNMENT
-      align = DATA_ALIGNMENT (TREE_TYPE (decl), align);
+      unsigned int data_align = DATA_ALIGNMENT (TREE_TYPE (decl), align);
+      /* Don't increase alignment too much for TLS variables - TLS space
+	 is too precious.  */
+      if (! DECL_THREAD_LOCAL_P (decl) || data_align <= BITS_PER_WORD)
+	align = data_align;
 #endif
 #ifdef CONSTANT_ALIGNMENT
       if (DECL_INITIAL (decl) != 0 && DECL_INITIAL (decl) != error_mark_node)
-	align = CONSTANT_ALIGNMENT (DECL_INITIAL (decl), align);
+	{
+	  unsigned int const_align = CONSTANT_ALIGNMENT (DECL_INITIAL (decl),
+							 align);
+	  /* Don't increase alignment too much for TLS variables - TLS space
+	     is too precious.  */
+	  if (! DECL_THREAD_LOCAL_P (decl) || const_align <= BITS_PER_WORD)
+	    align = const_align;
+	}
 #endif
     }
 
diff --git a/gcc/version.c b/gcc/version.c
index c1be84ce53d..726d7fc3f9a 100644
--- a/gcc/version.c
+++ b/gcc/version.c
@@ -8,7 +8,7 @@
    in parentheses.  You may also wish to include a number indicating
    the revision of your modified compiler.  */
 
-#define VERSUFFIX " (Red Hat 4.1.2-12)"
+#define VERSUFFIX " (Red Hat 4.1.2-14)"
 
 /* This is the location of the online document giving instructions for
    reporting bugs.  If you distribute a modified version of GCC,
diff --git a/include/ChangeLog b/include/ChangeLog
index 9e6fd95f140..2ad102c3d19 100644
--- a/include/ChangeLog
+++ b/include/ChangeLog
@@ -1,3 +1,7 @@
+2007-05-04  Mark Mitchell  <mark@codesourcery.com>
+
+	* demangle.h: Change license to LGPL + exception.
+
 2007-02-13  Release Manager
 
 	* GCC 4.1.2 released.
diff --git a/include/demangle.h b/include/demangle.h
index af294672736..c23faaf1d68 100644
--- a/include/demangle.h
+++ b/include/demangle.h
@@ -1,21 +1,30 @@
 /* Defs for interface to demanglers.
    Copyright 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001, 2002,
-   2003, 2004 Free Software Foundation, Inc.
+   2003, 2004, 2005, 2007 Free Software Foundation, Inc.
    
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 2, or (at your option)
-   any later version.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin Street - Fifth Floor,
-   Boston, MA 02110-1301, USA.  */
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License
+   as published by the Free Software Foundation; either version 2, or
+   (at your option) any later version.
+
+   In addition to the permissions in the GNU Library General Public
+   License, the Free Software Foundation gives you unlimited
+   permission to link the compiled version of this file into
+   combinations with other programs, and to distribute those
+   combinations without any restriction coming from the use of this
+   file.  (The Library Public License restrictions do apply in other
+   respects; for example, they cover modification of the file, and
+   distribution when not linked into a combined executable.)
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
+   02110-1301, USA.  */
 
 
 #if !defined (DEMANGLE_H)
diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog
index c72f26f93c4..1cc6ba30baf 100644
--- a/libcpp/ChangeLog
+++ b/libcpp/ChangeLog
@@ -1,3 +1,10 @@
+2006-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+	PR preprocessor/28709
+	* macro.c (paste_tokens): Do error reporting here, use BUF with the
+	spelled LHS token as opposed to spelling it again.
+	(paste_all_tokens): Don't report errors here, just break on failure.
+
 2007-04-23  Tom Tromey  <tromey@redhat.com>
 
 	PR preprocessor/30468:
diff --git a/libcpp/macro.c b/libcpp/macro.c
index 572e06329e5..6c3a0789fba 100644
--- a/libcpp/macro.c
+++ b/libcpp/macro.c
@@ -392,15 +392,14 @@ stringify_arg (cpp_reader *pfile, macro_arg *arg)
 static bool
 paste_tokens (cpp_reader *pfile, const cpp_token **plhs, const cpp_token *rhs)
 {
-  unsigned char *buf, *end;
+  unsigned char *buf, *end, *lhsend;
   const cpp_token *lhs;
   unsigned int len;
-  bool valid;
 
   lhs = *plhs;
   len = cpp_token_len (lhs) + cpp_token_len (rhs) + 1;
   buf = (unsigned char *) alloca (len);
-  end = cpp_spell_token (pfile, lhs, buf, false);
+  end = lhsend = cpp_spell_token (pfile, lhs, buf, false);
 
   /* Avoid comment headers, since they are still processed in stage 3.
      It is simpler to insert a space here, rather than modifying the
@@ -417,10 +416,22 @@ paste_tokens (cpp_reader *pfile, const cpp_token **plhs, const cpp_token *rhs)
   /* Set pfile->cur_token as required by _cpp_lex_direct.  */
   pfile->cur_token = _cpp_temp_token (pfile);
   *plhs = _cpp_lex_direct (pfile);
-  valid = pfile->buffer->cur == pfile->buffer->rlimit;
-  _cpp_pop_buffer (pfile);
+  if (pfile->buffer->cur != pfile->buffer->rlimit)
+    {
+      _cpp_pop_buffer (pfile);
+      _cpp_backup_tokens (pfile, 1);
+      *lhsend = '\0';
+
+      /* Mandatory error for all apart from assembler.  */
+      if (CPP_OPTION (pfile, lang) != CLK_ASM)
+	cpp_error (pfile, CPP_DL_ERROR,
+	 "pasting \"%s\" and \"%s\" does not give a valid preprocessing token",
+		   buf, cpp_token_as_text (pfile, rhs));
+      return false;
+    }
 
-  return valid;
+  _cpp_pop_buffer (pfile);
+  return true;
 }
 
 /* Handles an arbitrarily long sequence of ## operators, with initial
@@ -452,17 +463,7 @@ paste_all_tokens (cpp_reader *pfile, const cpp_token *lhs)
 	abort ();
 
       if (!paste_tokens (pfile, &lhs, rhs))
-	{
-	  _cpp_backup_tokens (pfile, 1);
-
-	  /* Mandatory error for all apart from assembler.  */
-	  if (CPP_OPTION (pfile, lang) != CLK_ASM)
-	    cpp_error (pfile, CPP_DL_ERROR,
-	 "pasting \"%s\" and \"%s\" does not give a valid preprocessing token",
-		       cpp_token_as_text (pfile, lhs),
-		       cpp_token_as_text (pfile, rhs));
-	  break;
-	}
+	break;
     }
   while (rhs->flags & PASTE_LEFT);
 
diff --git a/libgfortran/ChangeLog b/libgfortran/ChangeLog
index efcc1185a78..db224125e95 100644
--- a/libgfortran/ChangeLog
+++ b/libgfortran/ChangeLog
@@ -1,3 +1,20 @@
+2007-05-23  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
+
+	PR libfortran/31964
+	Backport from trunk.
+	* intrinsics/ishftc.c (ishftc4, ishftc8, ishftc16): Fix mask to handle
+	shift of bit-size number of bits.
+
+2007-05-10  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
+
+	PR libfortran/31409
+	* io/transfer.c (read_block_direct): Backport from 4.3 trunk.
+
+2007-05-10  Jerry DeLisle  <jvdelisle@gcc.gnu.org>
+
+	PR libfortran/31880
+	* io/unix.c (fd_alloc_r_at): Fix calculation of physical offset.
+
 2007-03-14  Jakub Jelinek  <jakub@redhat.com>
 
 	* io/unix.c (regular_file): For ACTION_UNSPECIFIED retry with
diff --git a/libgfortran/intrinsics/ishftc.c b/libgfortran/intrinsics/ishftc.c
index a147b968389..91e0db2048d 100644
--- a/libgfortran/intrinsics/ishftc.c
+++ b/libgfortran/intrinsics/ishftc.c
@@ -36,8 +36,7 @@ export_proto(ishftc4);
 GFC_INTEGER_4
 ishftc4 (GFC_INTEGER_4 i, GFC_INTEGER_4 shift, GFC_INTEGER_4 size)
 {
-  GFC_INTEGER_4 mask;
-  GFC_UINTEGER_4 bits;
+  GFC_UINTEGER_4 mask, bits;
 
   if (shift < 0)
     shift = shift + size;
@@ -45,9 +44,14 @@ ishftc4 (GFC_INTEGER_4 i, GFC_INTEGER_4 shift, GFC_INTEGER_4 size)
   if (shift == 0 || shift == size)
     return i;
 
-  mask = (~(GFC_INTEGER_4)0) << size;
-  bits = i & ~mask;
-  return (i & mask) | (bits >> (size - shift)) | ((i << shift) & ~mask);
+  /* In C, the result of the shift operator is undefined if the right operand
+     is greater than or equal to the number of bits in the left operand. So we
+     have to special case it for fortran.  */
+  mask = ~((size == 32) ? 0 : (~0 << size));
+
+  bits = i & mask;
+  
+  return (i & ~mask) | ((bits << shift) & mask) | (bits >> (size - shift));
 }
 
 extern GFC_INTEGER_8 ishftc8 (GFC_INTEGER_8, GFC_INTEGER_4, GFC_INTEGER_4);
@@ -56,8 +60,7 @@ export_proto(ishftc8);
 GFC_INTEGER_8
 ishftc8 (GFC_INTEGER_8 i, GFC_INTEGER_4 shift, GFC_INTEGER_4 size)
 {
-  GFC_INTEGER_8 mask;
-  GFC_UINTEGER_8 bits;
+  GFC_UINTEGER_8 mask, bits;
 
   if (shift < 0)
     shift = shift + size;
@@ -65,9 +68,14 @@ ishftc8 (GFC_INTEGER_8 i, GFC_INTEGER_4 shift, GFC_INTEGER_4 size)
   if (shift == 0 || shift == size)
     return i;
 
-  mask = (~(GFC_INTEGER_8)0) << size;
-  bits = i & ~mask;
-  return (i & mask) | (bits >> (size - shift)) | ((i << shift) & ~mask);
+  /* In C, the result of the shift operator is undefined if the right operand
+     is greater than or equal to the number of bits in the left operand. So we
+     have to special case it for fortran.  */
+  mask = ~((size == 64) ? 0 : (~0 << size));
+
+  bits = i & mask;
+  
+  return (i & ~mask) | ((bits << shift) & mask) | (bits >> (size - shift));
 }
 
 #ifdef HAVE_GFC_INTEGER_16
@@ -77,8 +85,7 @@ export_proto(ishftc16);
 GFC_INTEGER_16
 ishftc16 (GFC_INTEGER_16 i, GFC_INTEGER_4 shift, GFC_INTEGER_4 size)
 {
-  GFC_INTEGER_16 mask;
-  GFC_UINTEGER_16 bits;
+  GFC_UINTEGER_16 mask, bits;
 
   if (shift < 0)
     shift = shift + size;
@@ -86,8 +93,13 @@ ishftc16 (GFC_INTEGER_16 i, GFC_INTEGER_4 shift, GFC_INTEGER_4 size)
   if (shift == 0 || shift == size)
     return i;
 
-  mask = (~(GFC_INTEGER_16)0) << size;
-  bits = i & ~mask;
-  return (i & mask) | (bits >> (size - shift)) | ((i << shift) & ~mask);
+  /* In C, the result of the shift operator is undefined if the right operand
+     is greater than or equal to the number of bits in the left operand. So we
+     have to special case it for fortran.  */
+  mask = ~((size == 128) ? 0 : (~0 << size));
+
+  bits = i & mask;
+  
+  return (i & ~mask) | ((bits << shift) & mask) | (bits >> (size - shift));
 }
 #endif
diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c
index a3b337c789f..124c33dfc00 100644
--- a/libgfortran/io/transfer.c
+++ b/libgfortran/io/transfer.c
@@ -347,6 +347,8 @@ read_block_direct (st_parameter_dt *dtp, void *buf, size_t *nbytes)
 
   if (short_record)
     {
+      dtp->u.p.current_unit->current_record = 0;
+      next_record (dtp, 0);
       generate_error (&dtp->common, ERROR_SHORT_RECORD, NULL);
       return;
     }
diff --git a/libgfortran/io/unix.c b/libgfortran/io/unix.c
index 4d67e07f8f5..7c51caa2e83 100644
--- a/libgfortran/io/unix.c
+++ b/libgfortran/io/unix.c
@@ -465,7 +465,7 @@ fd_alloc_r_at (unix_stream * s, int *len, gfc_offset where)
       if (n < 0)
 	return NULL;
 
-      s->physical_offset = where + n;
+      s->physical_offset = m + n;
       s->active += n;
     }
   else
@@ -476,7 +476,7 @@ fd_alloc_r_at (unix_stream * s, int *len, gfc_offset where)
       if (do_read (s, s->buffer + s->active, &n) != 0)
 	return NULL;
 
-      s->physical_offset = where + n;
+      s->physical_offset = m + n;
       s->active += n;
     }
 
diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 653d5f12719..7dc10f92ab7 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,22 @@
+2007-06-21  Jakub Jelinek  <jakub@redhat.com>
+
+	PR middle-end/32362
+	* testsuite/libgomp.c/pr32362-1.c: New test.
+	* testsuite/libgomp.c/pr32362-2.c: New test.
+	* testsuite/libgomp.c/pr32362-3.c: New test.
+
+2007-06-07  Jakub Jelinek  <jakub@redhat.com>
+
+	* team.c (gomp_team_start): Fix setting up thread_attr
+	stack size.
+
+2007-05-04  Jakub Jelinek  <jakub@redhat.com>
+
+	* config/linux/proc.c: New file.
+
+	PR libgomp/28482
+	* configure.tgt: Don't link with -Wl,-z,nodlopen even on Linux.
+
 2007-04-04  Jakub Jelinek  <jakub@redhat.com>
 
 	* libgomp.h (gomp_cpu_affinity, gomp_cpu_affinity_len): New extern
diff --git a/libgomp/config/linux/proc.c b/libgomp/config/linux/proc.c
new file mode 100644
index 00000000000..2267cfbd2d1
--- /dev/null
+++ b/libgomp/config/linux/proc.c
@@ -0,0 +1,179 @@
+/* Copyright (C) 2005, 2006, 2007 Free Software Foundation, Inc.
+   Contributed by Jakub Jelinek <jakub@redhat.com>.
+
+   This file is part of the GNU OpenMP Library (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published by
+   the Free Software Foundation; either version 2.1 of the License, or
+   (at your option) any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+   more details.
+
+   You should have received a copy of the GNU Lesser General Public License 
+   along with libgomp; see the file COPYING.LIB.  If not, write to the
+   Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
+   MA 02110-1301, USA.  */
+
+/* As a special exception, if you link this library with other files, some
+   of which are compiled with GCC, to produce an executable, this library
+   does not by itself cause the resulting executable to be covered by the
+   GNU General Public License.  This exception does not however invalidate
+   any other reasons why the executable file might be covered by the GNU
+   General Public License.  */
+
+/* This file contains system specific routines related to counting
+   online processors and dynamic load balancing.  */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE 1
+#endif
+#include "libgomp.h"
+#include <sched.h>
+#include <stdlib.h>
+#include <unistd.h>
+#ifdef HAVE_GETLOADAVG
+# ifdef HAVE_SYS_LOADAVG_H
+#  include <sys/loadavg.h>
+# endif
+#endif
+
+#ifdef HAVE_PTHREAD_AFFINITY_NP
+static unsigned long
+cpuset_popcount (cpu_set_t *cpusetp)
+{
+#ifdef CPU_COUNT
+  /* glibc 2.6 and above provide a macro for this.  */
+  return CPU_COUNT (cpusetp);
+#else
+  size_t i;
+  unsigned long ret = 0;
+  extern int check[sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int)];
+
+  (void) check;
+  for (i = 0; i < sizeof (*cpusetp) / sizeof (cpusetp->__bits[0]); i++)
+    {
+      unsigned long int mask = cpusetp->__bits[i];
+      if (mask == 0)
+	continue;
+      ret += __builtin_popcountl (mask);
+    }
+  return ret;
+#endif
+}
+#endif
+
+/* At startup, determine the default number of threads.  It would seem
+   this should be related to the number of cpus online.  */
+
+void
+gomp_init_num_threads (void)
+{
+#ifdef HAVE_PTHREAD_AFFINITY_NP
+  cpu_set_t cpuset;
+
+  if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset), &cpuset) == 0)
+    {
+      /* Count only the CPUs this process can use.  */
+      gomp_nthreads_var = cpuset_popcount (&cpuset);
+      if (gomp_nthreads_var == 0)
+	gomp_nthreads_var = 1;
+      return;
+    }
+#endif
+#ifdef _SC_NPROCESSORS_ONLN
+  gomp_nthreads_var = sysconf (_SC_NPROCESSORS_ONLN);
+#endif
+}
+
+static int
+get_num_procs (void)
+{
+#ifdef HAVE_PTHREAD_AFFINITY_NP
+  cpu_set_t cpuset;
+
+  if (gomp_cpu_affinity == NULL)
+    {
+      /* Count only the CPUs this process can use.  */
+      if (pthread_getaffinity_np (pthread_self (), sizeof (cpuset),
+				  &cpuset) == 0)
+	{
+	  int ret = cpuset_popcount (&cpuset);
+	  return ret != 0 ? ret : 1;
+	}
+    }
+  else
+    {
+      size_t idx;
+      static int affinity_cpus;
+
+      /* We can't use pthread_getaffinity_np in this case
+	 (we have changed it ourselves, it binds to just one CPU).
+	 Count instead the number of different CPUs we are
+	 using.  */
+      CPU_ZERO (&cpuset);
+      if (affinity_cpus == 0)
+	{
+	  int cpus = 0;
+	  for (idx = 0; idx < gomp_cpu_affinity_len; idx++)
+	    if (! CPU_ISSET (gomp_cpu_affinity[idx], &cpuset))
+	      {
+		cpus++;
+		CPU_SET (gomp_cpu_affinity[idx], &cpuset);
+	      }
+	  affinity_cpus = cpus;
+	}
+      return affinity_cpus;
+    }
+#endif
+#ifdef _SC_NPROCESSORS_ONLN
+  return sysconf (_SC_NPROCESSORS_ONLN);
+#else
+  return gomp_nthreads_var;
+#endif
+}
+
+/* When OMP_DYNAMIC is set, at thread launch determine the number of
+   threads we should spawn for this team.  */
+/* ??? I have no idea what best practice for this is.  Surely some
+   function of the number of processors that are *still* online and
+   the load average.  Here I use the number of processors online
+   minus the 15 minute load average.  */
+
+unsigned
+gomp_dynamic_max_threads (void)
+{
+  unsigned n_onln, loadavg;
+
+  n_onln = get_num_procs ();
+  if (n_onln > gomp_nthreads_var)
+    n_onln = gomp_nthreads_var;
+
+  loadavg = 0;
+#ifdef HAVE_GETLOADAVG
+  {
+    double dloadavg[3];
+    if (getloadavg (dloadavg, 3) == 3)
+      {
+	/* Add 0.1 to get a kind of biased rounding.  */
+	loadavg = dloadavg[2] + 0.1;
+      }
+  }
+#endif
+
+  if (loadavg >= n_onln)
+    return 1;
+  else
+    return n_onln - loadavg;
+}
+
+int
+omp_get_num_procs (void)
+{
+  return get_num_procs ();
+}
+
+ialias (omp_get_num_procs)
diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt
index dedafd350aa..7d85e00c4c7 100644
--- a/libgomp/configure.tgt
+++ b/libgomp/configure.tgt
@@ -11,11 +11,8 @@
 #  XLDFLAGS		Add extra link flags to use.
 
 # Optimize TLS usage by avoiding the overhead of dynamic allocation.
-# This does require that the library be present during process 
-# startup, so mark the library as not to be dlopened.
 if test $have_tls = yes && test "$with_gnu_ld" = "yes"; then
 	XCFLAGS="${XCFLAGS} -ftls-model=initial-exec"
-	XLDFLAGS="${XLDFLAGS} -Wl,-z,nodlopen"
 fi
 
 # Since we require POSIX threads, assume a POSIX system by default.
diff --git a/libgomp/team.c b/libgomp/team.c
index d114bb5de85..7d50bfc29af 100644
--- a/libgomp/team.c
+++ b/libgomp/team.c
@@ -272,7 +272,7 @@ gomp_team_start (void (*fn) (void *), void *data, unsigned nthreads,
       size_t stacksize;
       pthread_attr_init (&thread_attr);
       pthread_attr_setdetachstate (&thread_attr, PTHREAD_CREATE_DETACHED);
-      if (! pthread_attr_getstacksize (&thread_attr, &stacksize))
+      if (! pthread_attr_getstacksize (&gomp_thread_attr, &stacksize))
 	pthread_attr_setstacksize (&thread_attr, stacksize);
       attr = &thread_attr;
     }
diff --git a/libgomp/testsuite/libgomp.c/pr32362-1.c b/libgomp/testsuite/libgomp.c/pr32362-1.c
new file mode 100644
index 00000000000..3c62d4bdb26
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr32362-1.c
@@ -0,0 +1,32 @@
+/* PR middle-end/32362 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int
+main ()
+{
+  int n[4] = { -1, -1, -1, -1 };
+  static int a = 2, b = 4;
+  omp_set_num_threads (4);
+  omp_set_dynamic (0);
+  omp_set_nested (1);
+#pragma omp parallel private(b)
+  {
+    b = omp_get_thread_num ();
+#pragma omp parallel firstprivate(a)
+    {
+      a = (omp_get_thread_num () + a) + 1;
+      if (b == omp_get_thread_num ())
+	n[omp_get_thread_num ()] = a + (b << 4);
+    }
+  }
+  if (n[0] != 3)
+    abort ();
+  if (n[3] != -1
+      && (n[1] != 0x14 || n[2] != 0x25 || n[3] != 0x36))
+    abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/pr32362-2.c b/libgomp/testsuite/libgomp.c/pr32362-2.c
new file mode 100644
index 00000000000..43f36e0e98b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr32362-2.c
@@ -0,0 +1,33 @@
+/* PR middle-end/32362 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int a = 2, b = 4;
+
+int
+main ()
+{
+  int n[4] = { -1, -1, -1, -1 };
+  omp_set_num_threads (4);
+  omp_set_dynamic (0);
+  omp_set_nested (1);
+#pragma omp parallel private(b)
+  {
+    b = omp_get_thread_num ();
+#pragma omp parallel firstprivate(a)
+    {
+      a = (omp_get_thread_num () + a) + 1;
+      if (b == omp_get_thread_num ())
+	n[omp_get_thread_num ()] = a + (b << 4);
+    }
+  }
+  if (n[0] != 3)
+    abort ();
+  if (n[3] != -1
+      && (n[1] != 0x14 || n[2] != 0x25 || n[3] != 0x36))
+    abort ();
+  return 0;
+}
diff --git a/libgomp/testsuite/libgomp.c/pr32362-3.c b/libgomp/testsuite/libgomp.c/pr32362-3.c
new file mode 100644
index 00000000000..09a88f52a3b
--- /dev/null
+++ b/libgomp/testsuite/libgomp.c/pr32362-3.c
@@ -0,0 +1,34 @@
+/* PR middle-end/32362 */
+/* { dg-do run } */
+/* { dg-options "-O2" } */
+
+#include <omp.h>
+#include <stdlib.h>
+
+int a = 2;
+
+int
+main ()
+{
+  int n[4] = { -1, -1, -1, -1 };
+  int b = 4;
+  omp_set_num_threads (4);
+  omp_set_dynamic (0);
+  omp_set_nested (1);
+#pragma omp parallel private(b)
+  {
+    b = omp_get_thread_num ();
+#pragma omp parallel firstprivate(a)
+    {
+      a = (omp_get_thread_num () + a) + 1;
+      if (b == omp_get_thread_num ())
+	n[omp_get_thread_num ()] = a + (b << 4);
+    }
+  }
+  if (n[0] != 3)
+    abort ();
+  if (n[3] != -1
+      && (n[1] != 0x14 || n[2] != 0x25 || n[3] != 0x36))
+    abort ();
+  return 0;
+}
diff --git a/libjava/ChangeLog b/libjava/ChangeLog
index 6a59c0e499c..b9da6118142 100644
--- a/libjava/ChangeLog
+++ b/libjava/ChangeLog
@@ -1,3 +1,8 @@
+2007-05-31  Andrew Haley  <aph@redhat.com>
+
+	* java/lang/natClassLoader.cc (_Jv_NewClassFromInitializer): Clear
+	INTERPRETED access modifier.
+
 2007-05-18  Keith Seitz  <keiths@redhat.com>
 
 	Merged from trunk:
@@ -267,7 +272,7 @@
 	(jdwpThreadEndCB): Likewise.
 	(jdwpThreadStartCB): Likewise.
 	(jdwpVMInitCB): Likewise.
- 
+
 	2007-01-27  Keith Seitz  <keiths@redhat.com>
 	* gnu/classpath/jdwp/natVMVirtualMachine.cc
 	(getAllClassMethods): Move error handling to ...
diff --git a/libjava/java/lang/natClassLoader.cc b/libjava/java/lang/natClassLoader.cc
index e62c6d39281..9a687ff9105 100644
--- a/libjava/java/lang/natClassLoader.cc
+++ b/libjava/java/lang/natClassLoader.cc
@@ -281,7 +281,16 @@ _Jv_NewClassFromInitializer (const char *class_initializer)
   memcpy (dst, src, len);
   
   new_class->engine = &_Jv_soleIndirectCompiledEngine;
-  
+
+  /* FIXME:  Way back before the dawn of time, we overloaded the
+     SYNTHETIC class access modifier to mean INTERPRETED.  This was a
+     Bad Thing, but it didn't matter then because classes were never
+     marked synthetic.  However, it is possible to redeem the
+     situation: _Jv_NewClassFromInitializer is only called from
+     compiled classes, so we clear the INTERPRETED flag.  This is a
+     kludge!  */
+  new_class->accflags &= ~java::lang::reflect::Modifier::INTERPRETED;
+
   (*_Jv_RegisterClassHook) (new_class);
   
   return new_class;
diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog
index f371608b515..2682ee6ef07 100644
--- a/libstdc++-v3/ChangeLog
+++ b/libstdc++-v3/ChangeLog
@@ -1,3 +1,10 @@
+2007-01-24   Steve LoBasso <slobasso@yahoo.com>
+	     Paolo Carlini  <pcarlini@suse.de>
+
+	* include/bits/deque.tcc (deque<>::erase(iterator, iterator)):
+	Fix condition.
+	* testsuite/23_containers/deque/modifiers/erase/3.cc: New.
+
 2007-04-23  Matthias Klose  <doko@debian.org>
 
 	Backport from mainline:
diff --git a/libstdc++-v3/include/bits/deque.tcc b/libstdc++-v3/include/bits/deque.tcc
index 71722238964..c85c7240e7b 100644
--- a/libstdc++-v3/include/bits/deque.tcc
+++ b/libstdc++-v3/include/bits/deque.tcc
@@ -142,7 +142,7 @@ namespace _GLIBCXX_STD
 	  const difference_type __n = __last - __first;
 	  const difference_type __elems_before = (__first
 						  - this->_M_impl._M_start);
-	  if (static_cast<size_type>(__elems_before) < (size() - __n) / 2)
+	  if (static_cast<size_type>(__elems_before) <= (size() - __n) / 2)
 	    {
 	      std::copy_backward(this->_M_impl._M_start, __first, __last);
 	      iterator __new_start = this->_M_impl._M_start + __n;
diff --git a/libstdc++-v3/testsuite/23_containers/deque/modifiers/erase/3.cc b/libstdc++-v3/testsuite/23_containers/deque/modifiers/erase/3.cc
new file mode 100644
index 00000000000..c975f287dca
--- /dev/null
+++ b/libstdc++-v3/testsuite/23_containers/deque/modifiers/erase/3.cc
@@ -0,0 +1,52 @@
+// Copyright (C) 2007 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 2, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING.  If not, write to the Free
+// Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
+// USA.
+
+// 23.2.1.3 deque modifiers
+
+#include <deque>
+#include <testsuite_hooks.h>
+
+void erase(size_t num_elm, size_t elm_strt, size_t elm_end)
+{
+  bool test __attribute__((unused)) = true;
+  using __gnu_test::copy_tracker;
+  using __gnu_test::assignment_operator;
+
+  std::deque<copy_tracker> x(num_elm);
+  copy_tracker::reset();
+  
+  x.erase(x.begin() + elm_strt, x.begin() + elm_end);
+  
+  const size_t min_num_cpy = std::min(elm_strt, num_elm - elm_end);
+  VERIFY( assignment_operator::count() == min_num_cpy );
+}
+
+// http://gcc.gnu.org/ml/libstdc++/2007-01/msg00098.html
+void test01()
+{
+  for (size_t num_elm = 0; num_elm <= 10; ++num_elm)
+    for (size_t elm_strt = 0; elm_strt <= num_elm; ++elm_strt)
+      for (size_t elm_end = elm_strt; elm_end <= num_elm; ++elm_end)
+	erase(num_elm, elm_strt, elm_end);
+}
+
+int main()
+{
+  test01();
+  return 0;
+}