Check ARC patches into arc-20081210-branch.arc-20081210-branch

git-svn-id: https://gcc.gnu.org/svn/gcc/branches/arc-20081210-branch@144652 138bc75d-0d04-0410-961f-82ee72b054a4
author: Joern Rennecke <joern.rennecke@st.com> 2009-03-05 17:40:15 +0000
committer: Joern Rennecke <joern.rennecke@st.com> 2009-03-05 17:40:15 +0000
commit: 6000d164b04d1ff76b01aa08a13c6022a5b929eb (patch)
tree: 337865b53e42eced79d3723e441de1ebd665309f
parent: 3eaed088966a6e06d925191184da47647237756d (diff)
201 files changed, 35652 insertions, 2695 deletions
diff --git a/ChangeLog.ARC b/ChangeLog.ARC
new file mode 100644
index 00000000000..0a895b43a58
--- /dev/null
+++ b/ChangeLog.ARC
@@ -0,0 +1,13 @@
+2008-04-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config.sub: Add mxp support.
+
+2007-09-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* configure.in (FLAGS_FOR_TARGET):
+	Add -isystem $$s/newlib/libc/sys/'$target_cpu'/sys .
+	* configure: Regenerate.
+
+2007-04-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config-ml.in: Merge codito patches.
diff --git a/MANIFEST b/MANIFEST
new file mode 100644
index 00000000000..2f09c07ebea
--- /dev/null
+++ b/MANIFEST
@@ -0,0 +1,159 @@
+From codito-20070401 / codito-20070406:
+ added files:
+  gcc/gcc/config/arc/crti.asm
+  gcc/gcc/config/arc/predicates.md
+  gcc/gcc/config/arc/crti.asm
+  gcc/gcc/config/arc/crtn.asm
+  gcc/gcc/config/arc/crtn.asm
+  gcc/gcc/config/arc/arc-simd.h
+  gcc/gcc/config/arc/fpx.md
+  gcc/gcc/config/arc/t-arc700-uClibc
+  gcc/gcc/config/arc/constraints.md
+  gcc/gcc/config/arc/simdext.md
+  gcc/gcc/config/arc/xm-arc.h
+  gcc/gcc/testsuite/gcc.dg/pr35044.c
+ changed files:
+  gcc/config-ml.in (merge from codito's gcc/newlib/config-ml.in)
+  gcc/configure.in
+  gcc/configure
+  gcc/doc/invoke.texi
+  gcc/gcc/doc/extend.texi
+  gcc/gcc/reload.c
+  gcc/gcc/version.c
+  gcc/gcc/config.gcc
+  gcc/gcc/config/arc/arc-protos.h
+  gcc/gcc/config/arc/arc.c
+  gcc/gcc/config/arc/arc.h
+  gcc/gcc/config/arc/arc.md
+  gcc/gcc/config/arc/lib1funcs.asm
+  gcc/gcc/config/arc/t-arc
+  gcc/gcc/config/arc/arc.opt
+  gcc/gcc/config/arc/arc-modes.def
+
+By ARC employees:
+ added files:
+  gcc/gcc/config/arc/divtab-arc700.c
+  gcc/gcc/config/arc/gmon
+  gcc/gcc/config/arc/asm.h
+  gcc/gcc/config/arc/crtg.asm
+  gcc/gcc/config/arc/crtgend.asm
+  gcc/gcc/config/arc/t-arc-newlib
+  gcc/gcc/config/arc/arc600.md
+  gcc/gcc/config/arc/arc700.md
+  gcc/gcc/config/arc/gmon/auxreg.h
+  gcc/gcc/config/arc/gmon/machine-gmon.h
+  gcc/gcc/config/arc/gmon/profil.S
+  gcc/gcc/config/arc/gmon/dcache_linesz.S
+  gcc/gcc/config/arc/gmon/prof-freq-stub.S
+  gcc/gcc/config/arc/t-arc700-uClibc
+  gcc/gcc/config/arc/dp-hack.h
+  gcc/gcc/config/arc/fp-hack.h
+  gcc/gcc/config/arc/ieee-754/floatunsidf.S
+  gcc/gcc/config/arc/ieee-754/divdf3.S
+  gcc/gcc/config/arc/ieee-754/orddf2.S
+  gcc/gcc/config/arc/ieee-754/eqsf2.S
+  gcc/gcc/config/arc/ieee-754/truncdfsf2.S
+  gcc/gcc/config/arc/ieee-754/fixunsdfsi.S
+  gcc/gcc/config/arc/ieee-754/divtab-arc-df.c
+  gcc/gcc/config/arc/ieee-754/uneqsf2.S
+  gcc/gcc/config/arc/ieee-754/adddf3.S
+  gcc/gcc/config/arc/ieee-754/gtsf2.S
+  gcc/gcc/config/arc/ieee-754/gedf2.S
+  gcc/gcc/config/arc/ieee-754/floatsisf.S
+  gcc/gcc/config/arc/ieee-754/muldf3.S
+  gcc/gcc/config/arc/ieee-754/fixdfsi.S
+  gcc/gcc/config/arc/ieee-754/divsf3.S
+  gcc/gcc/config/arc/ieee-754/ordsf2.S
+  gcc/gcc/config/arc/ieee-754/eqdf2.S
+  gcc/gcc/config/arc/ieee-754/divtab-arc-sf.c
+  gcc/gcc/config/arc/ieee-754/divsf3-stdmul.S
+  gcc/gcc/config/arc/ieee-754/addsf3.S
+  gcc/gcc/config/arc/ieee-754/uneqdf2.S
+  gcc/gcc/config/arc/ieee-754/gesf2.S
+  gcc/gcc/config/arc/ieee-754/gtdf2.S
+  gcc/gcc/config/arc/ieee-754/mulsf3.S
+  gcc/gcc/config/arc/ieee-754/floatsidf.S
+  gcc/gcc/config/arc/ieee-754/fixsfsi.S
+  gcc/gcc/config/arc/ieee-754/arc-ieee-754.h
+  gcc/gcc/config/arc/ieee-754/extendsfdf2.S
+  gcc/gcc/config/arc/ieee-754/arc600/divdf3.S
+  gcc/gcc/config/arc/ieee-754/arc600/divsf3.S
+  gcc/gcc/config/arc/ieee-754/arc600/muldf3.S
+  gcc/gcc/config/arc/ieee-754/arc600/mulsf3.S
+  gcc/gcc/config/arc/ieee-754/arc600-dsp/mulsf.S
+  gcc/gcc/config/arc/ieee-754/arc600-dsp/muldf.S
+  gcc/gcc/config/arc/ieee-754/arc600-dsp/divdf3.S
+  gcc/gcc/config/arc/ieee-754/arc600-dsp/divsf3.S
+  gcc/gcc/config/mxp
+  gcc/gcc/ChangeLog.ARC
+  gcc/testsuite/ChangeLog.ARC
+  gcc/testsuite/gcc.c-torture/execute/ieee/denorm-rand.c
+  gcc/testsuite/gcc.dg/func-ptr-prof.c
+  gcc/testsuite/gcc.target/arc/arc.exp
+  gcc/testsuite/gcc.target/arc/adc.c
+  gcc/testsuite/gcc.target/arc/sbc.c
+  gcc/testsuite/gcc.target/arc/add_f.c
+  gcc/libstdc++-v3/ChangeLog.ARC
+  gcc/gcc/doc/mxp.texi
+ changed files:
+  gcc/config.sub
+  gcc/gcc/output.h
+  gcc/gcc/target-def.h
+  gcc/gcc/varasm.c
+  gcc/gcc/c-pch.c
+  gcc/gcc/longlong.h
+  gcc/gcc/libgcc2.c
+  gcc/gcc/builtins.c
+  gcc/gcc/combine.c
+  gcc/gcc/expr.h
+  gcc/gcc/expr.c
+  gcc/gcc/gimplify.c
+  gcc/gcc/cse.c
+  gcc/gcc/regmove.c
+  gcc/gcc/genmodes.c
+  gcc/gcc/mode-classes.def
+  gcc/gcc/loop-doloop.c
+  gcc/gcc/doc/tm.texi
+  gcc/gcc/config.gcc
+  gcc/gcc/tree-flow.h
+  gcc/gcc/tree-ssa-loop-cp.c
+  gcc/gcc/tree-ssa-loop-ivopts.c
+  gcc/gcc/tree-ssa-loop-ivcanon.c
+  gcc/gcc/tree-ssa-loop-manip.c
+  gcc/gcc/tree-ssa-pre.c
+  gcc/gcc/common.opt
+  gcc/gcc/cfgloop.h
+  gcc/gcc/params.def
+  gcc/gcc/Makefile.in
+  gcc/gcc/config/arc/t-arc
+  gcc/gcc/config/arc/arc-protos.h
+  gcc/gcc/config/arc/arc.h
+  gcc/gcc/config/arc/arc.c
+  gcc/gcc/config/arc/arc.md
+  gcc/gcc/config/arc/crti.asm
+  gcc/gcc/config/arc/crtn.asm
+  gcc/gcc/config/arc/predicates.md
+  gcc/gcc/config/arc/lib1funcs.asm
+  gcc/gcc/config/arc/t-arc700-uClibc
+  gcc/gcc/config/arc/constraints.md
+  gcc/gcc/config/arc/libgcc-excl.ver
+  gcc/libstdc++-v3/testsuite/27_io/basic_stringbuf/overflow/char/1.cc
+  gcc/libstdc++-v3/testsuite/ext/pb_ds/example/priority_queue_dijkstra.cc
+  gcc/libstdc++-v3/testsuite/lib/libstdc++.exp
+  gcc/libstdc++-v3/scripts/testsuite_flags.in
+  gcc/testsuite/gcc.dg/builtin-apply2.c
+  gcc/testsuite/gcc.dg/sibcall-3.c
+  gcc/testsuite/gcc.dg/sibcall-4.c
+  gcc/testsuite/gcc.dg/gcc.dg/cpp/_Pragma6.c
+  gcc/testsuite/g++.dg/cpp/_Pragma1.C
+  gcc/testsuite/gcc.dg/torture/pr37868.c
+  gcc/testsuite/gcc.dg/pr28243.c
+  gcc/testsuite/lib/scanasm.exp
+
+Others:
+ added files:
+  gcc/config/arc/gmon/sys/gmon.h (from BSD)
+  gcc/config/arc/gmon/sys/gmon_out.h (from BSD)
+  gcc/config/arc/gmon/gmon.c (from BSD)
+  gcc/config/arc/gmon/mcount.c (from BSD)
+  gcc/config/arc/gmon/prof-freq.c
diff --git a/config-ml.in b/config-ml.in
index f2497ada4f8..9cbbc74a818 100644
--- a/config-ml.in
+++ b/config-ml.in
@@ -225,13 +225,14 @@ done
 
 case "${host}" in
 arc-*-elf*)
-	if [ x$enable_biendian != xyes ]
+	if [ x$enable_biendian = xno ]
 	then
-	  old_multidirs=${multidirs}
+	  old_multidirs="${multidirs}"
 	  multidirs=""
 	  for x in ${old_multidirs}; do
-	    case "${x}" in
-	      *be*) : ;;
+	    case "$x" in
+	      *le* ) : ;;
+	      *be* ) : ;;
 	      *) multidirs="${multidirs} ${x}" ;;
 	    esac
 	  done
diff --git a/config.sub b/config.sub
index 053e7381fa0..f54697639cf 100755
--- a/config.sub
+++ b/config.sub
@@ -272,6 +272,7 @@ case $basic_machine in
 	| mn10200 | mn10300 \
 	| mt \
 	| msp430 \
+	| mxp \
 	| nios | nios2 \
 	| ns16k | ns32k \
 	| or32 \
@@ -355,6 +356,7 @@ case $basic_machine in
 	| mmix-* \
 	| mt-* \
 	| msp430-* \
+	| mxp-* \
 	| nios-* | nios2-* \
 	| none-* | np1-* | ns16k-* | ns32k-* \
 	| orion-* \
diff --git a/configure b/configure
index 403982c69fb..4a4709de3fd 100755
--- a/configure
+++ b/configure
@@ -6260,7 +6260,7 @@ case " $target_configdirs " in
       # If we're building newlib, use its generic headers last, but search
       # for any libc-related directories first (so make it the last -B
       # switch).
-      FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$$r/$(TARGET_SUBDIR)/newlib/ -isystem $$r/$(TARGET_SUBDIR)/newlib/targ-include -isystem $$s/newlib/libc/include'
+      FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$$r/$(TARGET_SUBDIR)/newlib/ -isystem $$r/$(TARGET_SUBDIR)/newlib/targ-include -isystem $$s/newlib/libc/sys/'$target_cpu'/sys -isystem $$s/newlib/libc/include'
 
       # If we're building libgloss, find the startup file, simulator library
       # and linker script.
diff --git a/configure.ac b/configure.ac
index 21fcf78ed49..23b6eab43b2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2563,7 +2563,7 @@ case " $target_configdirs " in
       # If we're building newlib, use its generic headers last, but search
       # for any libc-related directories first (so make it the last -B
       # switch).
-      FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$$r/$(TARGET_SUBDIR)/newlib/ -isystem $$r/$(TARGET_SUBDIR)/newlib/targ-include -isystem $$s/newlib/libc/include'
+      FLAGS_FOR_TARGET=$FLAGS_FOR_TARGET' -B$$r/$(TARGET_SUBDIR)/newlib/ -isystem $$r/$(TARGET_SUBDIR)/newlib/targ-include -isystem $$s/newlib/libc/sys/'$target_cpu'/sys -isystem $$s/newlib/libc/include'
 
       # If we're building libgloss, find the startup file, simulator library
       # and linker script.
diff --git a/gcc/ChangeLog.ARC b/gcc/ChangeLog.ARC
new file mode 100644
index 00000000000..56442b91ee3
--- /dev/null
+++ b/gcc/ChangeLog.ARC
@@ -0,0 +1,2467 @@
+2009-02-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* loop-doloop.c (doloop_valid_p): Rename to:
+	(validize_doloop).  Try to fix up loops with condiitons for infinite
+	looping by enclosing them in an outer loop.
+	Changed caller.
+	(add_test): Add new parameter edgep.  Changed caller.
+
+	* loop-iv.c (get_simple_loop_desc): Use XCNEW.
+
+2009-02-11  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* postreload.c (reload_combine): Also seek to combine a constant set
+	into a single mem-ref use.
+	* global.c (find_reg): Don't make allocations that are denied by
+	DONT_REALLOC.
+	* gcse.c (constprop_register): Don't propagate constants into
+	memory accesses, or into binary operations where the rtx_cost is
+	non-negligible.
+	(one_cprop_pass): Make function calls to provide REG_N_REFS.
+	* explow.c (memory_address): Call LEGITIMATE_ADDRESS also in the
+	cse-driven case.
+	* config/arc/arc-protos.h (arc_legitimize_address): Declare.
+	* config/arc/arc.c (TARGET_MIN_ANCHOR_OFFSET): Redefine.
+	(TARGET_MAX_ANCHOR_OFFSET): Likewise.
+	(prepare_move_operands): Call arc_legitimize_address on memory
+	addresses.
+	(arc_legitimize_address): New function.
+	* config/arc/arc.h (LEGITIMIZE_ADDRESS): Call arc_legitimize_address.
+	(LEGITIMIZE_RELOAD_ADDRESS): Decompose reg+offset into
+	anchor + small offset, and including non-allocated registers.
+	(DONT_REALLOC): Define.
+	* config/arc/arc.md (movsi): Replace VUsc/Cal with VUsc/C32 alternative.
+	* config/arc/constraints.md (C32): New constraint.
+
+	* config/arc/arc.c (arc_save_restore): Fix handling of extra registers
+	for epilogue_p == 2.
+
+2009-02-09  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	PR 39141:
+	* tree-ssa-loop-manip.c (gimple_can_duplicate_loop_to_header_edge):
+	New function.
+	* tree-ssa-loop-ivcanon.c (enum unroll_level): New value
+	UL_ESTIMATE_GROWTH.
+	(try_unroll_loop_completely): Handle UL_ESTIMATE_GROWTH.
+	(canonicalize_loop_induction_variables): Likewise.
+	(tree_unroll_loops_completely): Don't completely unroll loops where
+	the outer loop/function is larger than
+	PARAM_MAX_COMPLETELY_PEELED_OUTER_INSNS, or will/would become thus
+	due to unrolling.
+	* cfgloop.h (enum li_flags): New value LI_REALLY_FROM_INNERMOST.
+	(fel_init): Handle LI_REALLY_FROM_INNERMOST.
+	* tree-flow.h (gimple_can_duplicate_loop_to_header_edge): Declare.
+	* params.def (PARAM_MAX_COMPLETELY_PEELED_OUTER_INSNS): New parameter.
+
+2009-02-02  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	PR 38785:
+	* tree-ssa-pre.c (ppre_n_insert_for_speed_p): New function.
+	* (do_partial_partial_insertion): Use it to throttle
+	insert_into_preds_of_block calls.
+	* common.opt (-ftree-pre-partial-partial-obliviously): New option.
+
+2009-01-15  Steven Bosscher  <steven@gcc.gnu.org>
+
+	http://gcc.gnu.org/ml/gcc-patches/2008-12/msg00199.html
+	* opts.c (decode_options): Fix initialization of
+	flag_tree_switch_conversion.  Don't set optimize_size in block
+	that is conditional on optimize_size (sic).  Explicitly disable
+	PRE when optimizing for size (and add comment for rationale).
+	* tree-ssa-pre.c: Update outline of the algorithm.
+	(bitmap_set_and): Prototype.
+	(insert_into_preds_of_block): Don't report discovery of partial
+	redundancies here, do so from the callers instead (see below).
+	(do_regular_insertion): Add counter for an estimate for the number
+	of inserts required to eliminate a partial redundancy.  If the
+	current function is optimized for size, only perform the partial
+	redundancy elimination if this requires inserting in only one
+	predecessor.  Report all found partial redundancies from here.
+	(do_partial_partial_insertion): Report them from here too.
+	(insert_aux): Do not insert for partial-partial redundancies when
+	optimizing for size.
+	(execute_pre): Remove bogus ATTRIBUTE_UNUSED.
+	(do_pre): Run FRE at least, if PRE is disabled.
+	(gate_pre): Return true if flag_tree pre or flag_tree_fre is set.
+
+2009-01-15  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* common.opt (ftree-pre-partial-partial): New option.
+	* opts.c (decode_options): Initialize flag_tree_pre_partial_partial.
+	* tree-ssa-pre.c (execute_pre): Use flag_tree_pre_partial_partial.
+
+2009-01-07  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/constraints.md (Rcw): Remove ARC700 conditional.
+	(Rcr): New constraint.
+	* config/arc/arc.md (mulsi3_700, mulsi3_highpart, umulsi3_highpart_i):
+	Use "r" and "Rcr" constraints for destination.
+
+2008-12-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_verify_short): For out-of-range
+	brcc / bbit with short delay insn, prefer short compare.
+
+2008-12-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/adddf3.S (Ladd_same_exp): Fix corner case with
+	round-to-even obscuring or generating a carry.
+	(Lpast_denorm_large_cancel_sub): Fix handling of shift by 32.
+	* config/arc/ieee-754/arc600/muldf3.S (Linf_nan): Avoid clobbering
+	DBL1L before we have checked its value.
+	* config/arc/ieee-754/arc600-dsp/muldf3.S (Linf_nan): Likewise.
+	* config/arc/ieee-754/muldf3.S (Linf_nan): Likewise.
+
+2008-12-16  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_dead_or_set_postreload_p): Constify paramters.
+	Don't clear reg while processing a sequence.  Use find_reg_fusage.
+	* config/arc/arc-protos.h (arc_dead_or_set_postreload_p): Update.
+	* config/arc/arc.md (flush_icache+3): Add comment on found use.
+	Remove TARGET_DROS condition.
+
+2008-12-16  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_dead_or_set_postreload_1): New function.
+	(arc_dead_or_set_postreload_p): Likewise.
+	* config/arc/arc-protos.h (arc_dead_or_set_postreload_p): Declare.
+	* config/arc/arc.md (flush_icache+2, flush_icache+3): Use it.
+	(flush_icache+4, flush_icache+7): Likewise.
+
+2008-12-15  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (doloop_begin_i): When checking size of loop,
+	verify that end belongs to the loop being examined.
+
+2008-12-11  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_encode_section_info): Don't try to get
+	TYPE_ATTRIBUTES from error_mark_node.
+
+2008-12-11  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (doloop_end_i): Use %? to output nop of right size.
+	Length is not zero when outputting a nop.
+
+2008-12-11  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (movsicc_insn+1): Generate plus.
+	Use REVERSE_CONDITION.
+
+2008-12-11  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (adddi3_i): Fix case where we are adding
+	a zero lowpart and the destination lowpart is the same as source
+	highpart.
+
+2008-12-10  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* reorg.c (fill_slots_from_thread): Initialize crossing.
+
+2008-12-10  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* tree-ssa-loop-ivopts.c (get_computation_cost_at): Guard code using
+	int_cst_value call with cst_and_fits_in_hwi check.
+
+2008-12-10  Zdenek Dvorak  <ook@ucw.cz>
+	    J"orn Rennecke  <joern.rennecke@arc.com>
+
+	PR38440 / PR31849:
+	* tree-ssa-loop-ivopts.c (CP_AUTOINC_OFFSET): Define.
+	(struct iv_ca): New member cand_autoinc_distance.
+	(get_address_cost): New parameter may_autoinc.  Changed all callers.
+	(autoinc_distance, cand_autoincrement_p): New functions.
+	(recompute_autoinc_bonus): Likewise.
+	(get_computation_cost_at): New parameter autoinc_distance.
+	Changed all callers.
+	(get_computation_cost): Likewise.
+	(iv_ca_set_no_cp <USE_ADDRESS>): Call recompute_autoinc_bonus.
+	(iv_ca_set_cp): Likewise.
+	(iv_ca_new): Allocate nw->cand_autoinc_distance.
+	(iv_ca_dump): Indicate autoincrement.
+	(gt-tree-ssa-loop-ivopts.h): Include.
+	* common.opt: Add new options fivopts-post-inc and fivopts-post-modify.
+	* Makfile.in (GTFILES): Add $(srcdir)/tree-ssa-loop-ivopts.c .
+	(tree-ssa-loop-ivopts.o): Depend on gt-tree-ssa-loop-ivopts.h .
+
+2008-12-09  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	cr95949:
+	* config/arc/arc.md (movqi_insn): Split w/cI alternative into
+	w/cL and w/I.
+
+2008-12-08  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* hooks.c (hook_bool_const_rtx_const_rtx_true): New function.
+	* hooks.h (hook_bool_const_rtx_const_rtx_true): Declare.
+	* target.h (can_follow_jump): New hook.
+	* target-def.h (TARGET_CAN_FOLLOW_JUMP): Define.
+	(TARGET_INITIALIZER): Include it.
+	* reorg.c (follow_jumps): New parameters jump and cp.
+	Changed all callers.
+	* config/arc/arc.c (arc_can_follow_jump): New function.
+	(TARGET_CAN_FOLLOW_JUMP): Override.
+	* config/arc/arc.md (jump_i): If a REG_CROSSING_JUMP is present,
+	length is 4.
+
+	* config/arc/arc.h (arc_compute_function_type): Declare.
+
+2008-12-08  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_reorg): When giving up on zero-overhead
+	loop only containing zero size asm, remove loop insn.
+
+2008-12-07  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* tree-ssa-loop-ch.c (copy_loop_headers): Fixed merge problems
+	(tentatively).
+	* config/arc/arc.h (OPTIMIZATION_OPPTIONS, OVERRIDE_OPTIONS):
+	Fixed merge problem.
+
+2008-12-05  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_need_delay): Don't attempt to get type of
+	a SEQUENCE.
+
+2008-12-05  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc700.md (umulti_SI): Split into:
+	(umulti_xmac, umulti_std).
+	(define_bypass): Remove bogus multiply bypasses.
+	* config/arc/arc.md (mulsi3_700): Use mpyu instruction.
+	Change type to umulti.
+	(mulsidi3_700, umulsidi3_700): Now define_insn_and_split.
+	(mulsi3_highpart): New pattern.
+	(umulsi3_highpart_i, umulsi3_highpart_int): Now type multi.
+	* config/arc/ieee-754/divdf3.S: Take reduced mpyu latency for
+	standard multiplier block into account.
+	* config/arc/ieee-754/muldf3.S: Likewise.
+	* config/arc/ieee-754/divsf3-stdmul.S: Likewise.
+
+2008-12-04  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_save_restore): Set RTX_FRAME_RELATED_P on
+	the *emitted* sibthunk_insn.
+
+2008-12-04  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* varasm.c (get_unnamed_section): Case return value of xmalloc.
+	(get_noswitch_section): Likewise.
+	(pickle_in_section): Use GGC_NEW.
+	(unpickle_in_section): Make definition a prototype.
+	* config/arc/arc.c (arc_verify_short): Restore recog_data before
+	returning early.
+
+2008-12-04  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (define_delay [return]): Remove duplicate.
+	(adddi3): Put parts into parallel.
+
+2008-12-04  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* genrecog.c (validate_pattern): Accept combinations of VOIDmode with
+	MODE_CC operands like ones with CC0.
+	* config/arc/predicates.md (zn_compare_operator): Now special predicate.
+	* config/arc/arc.md (unary_comparison): Add mode to opreand 1.
+	(noncommutative_binary_comparison): Add mode to operand 1 and 2.
+
+	* config/sh/sh.md (doloop_end): Accept extra operand.
+
+	* config/arc/arc.c (arc_compute_frame_size): Don't try to use
+	REG_N_SETS.
+	(arc_expand_epilogue): Update call to gen_return_i.
+	* config/arc/arc.h (EPILOGUE_USES): Define.
+	* config/arc/arc.md (in_ret_delay_slot): Reject insns that
+	set / clobber the return address register.
+	(sibcall, sibcall_insn): Remove explicit return address register use.
+	(sibcall_value_insn, return_i): Likewise.
+
+	* config/arc/arc.md (define_delay): Don't put an sfunc in a delay slot.
+
+	* config/arc/arc.md (millicode_sibthunk_ld): Set is_SIBCALL.
+
+	* config/arc/t-arc (gt-arc.h): Remove rule.
+
+2008-12-03  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.opt (mexpand-adddi): New option.
+	* config/arc/arc.md (adddi3, subdi3): Make expansion dependent
+	on TARGET_EXPAND_ADDDI.
+
+2008-12-03  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_init): Tweak mult_cost defaults.
+
+2008-12-02  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* combine.c (try_combine): Revert hack to suppress loosing
+	'simplifications'.
+	(undo_since): Fix loop.
+	(combine_simplify_bittest): New function.
+	(combine_simplify_rtx, simplify_if_then_else): Use it.
+	* config/arc/arc.c (arc_rtx_costs): Check for bbit test.
+
+2008-12-01  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/t-arc-newlib (MULTILIB_EXCEPTIONS): Allow mARC700*/mnorm*.
+	Disallow mnorm*.
+	(MULTILIB_EXCLUSIONS): Allow mARC700*/mnorm*.  Exclude mARC700/!mnorm.
+	* config/arc/arc.h (DRIVER_SELF_SPECS): For -mA7 / -mARC700, provide
+	-mnorm.
+
+2008-11-28  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/lib1funcs.asm (__divsi3 [__ARC700__,MULDIV]):
+	Avoid references of lp_count.
+
+2008-11-27  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_print_operand): Clarify distinction between
+	'#' and '*'.
+	* config/arc/arc.md (define_delay <brcc>): Don't say we can annull
+	the delay slot if TARGET_AT_DBR_COND_EXEC is set.
+	(cbranchsi4_scratch): Change delay slot suffix output directive for
+	fallback template to '*'.
+
+2008-11-27  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_text_label): Tolerate NOTE_INSN_DELETED_LABEL.
+
+2008-11-27  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_text_label): New function.
+	* config/arc/arc-protos.h (arc_text_label): Declare.
+	* config/arc/constraints.md (Clb): Require a text label.
+
+2008-11-27  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (casesi_compact_jump): Fix offset calculation
+	for unaligned add_s / ld / add_s / j_s cases.
+
+2008-11-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (mixed_code_enabled): Delete.
+	(arc_conditional_register_usage): Use TARGET_Q_CLASS.
+	* config/arc/arc.h (mixed_code_enabled): Don't declare.
+	(OPTIMIZATION_OPTIONS): Initialize TARGET_Q_CLASS.
+	(OVERRIDE_OPTIONS): TARGET_MIXED_CODE implies TARGET_Q_CLASS.
+	* config/arc/arc.opt (mq-class): New option.
+
+	* config/arc/arc.c (arc_get_unalign, arc_toggle_unalign): New functions.
+	* config/arc/arc.h (OPTIMIZATION_OPTIONS): Initialize
+	TARGET_COMPACT_CASESI.
+	(OVERRIDE_OPTIONS): TARGET_COMPACT_CASESI requires TARGET_Q_CLASS.
+	TARGET_COMPACT_CASESI implies TARGET_CASE_VECTOR_PC_RELATIVE .
+	(ASM_OUTPUT_ADDR_DIFF_ELT): Add 4 resp. 6 for TARGET_COMPACT_CASESI.
+	(CASE_VECTOR_SHORTEN_MODE_1): New macro, broken out of:
+	CASE_VECTOR_SHORTEN_MODE).  For TARGET_COMPACT_CASESI, add six to
+	MAX_OFFSET before passing it on to CASE_VECTOR_SHORTEN_MODE_1.
+	(cmpsi_cc_insn_mixed, casesi_jump, casesi_load): Generate expander.
+	(casesi): Emit casesi_compact_jump for TARGET_COMPACT_CASESI.
+	(casesi_compact_jump): New pattern.
+	* config/arc/arc-protos.h (arc_get_unalign): Declare.
+	(arc_toggle_unalign): Likewise.
+	* config/arc/arc.opt (mcompact-casesi): New option.
+
+	* config/arc/arc.c (arc_legitimate_pic_addr_p): LABEL_REF is ok.
+	(arc_raw_symbolic_reference_mentioned_p): Don't flag LABEL_REF.
+	(arc_legitimize_pic_address): Leave LABEL_REF alone.
+	* config/arc/arc.md (movsi_insn): Add w/Clb alternative.
+	* config/arc/constraints.md (Clb): New constraint.
+
+2008-11-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_verify_short): Fix typo in test for next insn
+	being compact.
+	* config/arc/arc.md (ashlsi3_insn_mixed): Fix type in insn predicate.
+	(ashrsi3_insn_mixed, lshrsi3_insn_mixed): Likewise.
+
+2008-11-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_next_active_insn): Don't use label_to_alignment
+	before arc_reorg.
+	(arc_verify_short): Allow NEXT_INSN (insn) to be NULL before arc_reorg.
+	* config/arc/arc.md (return_i): Make length 2 dependent on
+	arc_verify_short.
+
+2008-11-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_label_align): Don't call recog_memoized on
+	non-insns.
+
+2008-11-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* final.c (uid_lock_length): Now file-scope variable.
+	(get_attr_lock_length): Use it.
+	(shorten_branches): Clear uid_lock_length after freeing it.
+	* config/arc/arc.c (arc_adjust_insn_length): Use get_attr_lock_length
+	to find length of first part of a SEQUENCE.
+
+	* config/arc/arc.c (arc_setup_incoming_varargs): Remove unused variable.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (delay_slot_length): New attribute.
+	(branch_insn, rev_branch_insn, jump, cbranchsi4_scratch, bbit): Use it.
+
+	* config/arc/arc.md (call_i): Refine cond attribute for Cbr alterative.
+	(call_value_i): Likewise.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc-protos.h (arc_split_dilogic): Move declaration inside
+	RTX_CODE conditional.
+	* config/arc/ieee-754/arc600-dsp/mulsf3.S: Turn off debug code.
+	* config/arc/ieee-754/arc600-dsp/muldf3.S: Likewise.
+	* config/arc/ieee-754/arc600-dsp/divsf3.S: Likewise.
+	* config/arc/ieee-754/arc600-dsp/divdf3.S: Likewise.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arg_setup_incoming_varargs): Don't add increase
+	alignment.  Always set pretend_size.
+	(arc_va_start): Delete.
+	(arc_va_arg): TYPE_ALIGN should be no larger than BITS_PER_WORD.
+	* config.arc/arc-protos.h (arc_va_start): Don't declare.
+	* config/arc/arc.h (FUNCTION_ARG_BOUNDARY): Delete.
+	(EXPAND_BUILTIN_VA_START): Delete.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (CASE_VECTOR_MODE): When not optimizing, use SImode.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_print_operand <!>): Print condition before _s.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_verify_short): Fix calculation of this_sequence.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_need_delay): Don't use num_delay_slots
+	on non-insns.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/predicates.md (cc_use_register): New predicate.
+	* config/arc/arc.c (arc_need_delay): New function.
+	* config/arc/arc-protos.h (arc_need_delay): Declare.
+	* config/arc/arc.md (addi3): Check first if l0 is const0_rtx.
+	Fix length attribute.
+	(add_f): Remove constant alternative constraint parts from operand1.
+	Add Rcw/0/I alternative.
+	(adc): Likewise.  Don't split if we anticipate this insn to go into
+	a delay slot.  Simplify SET_SRC of first splitter output insn.
+	(add_f_2): New pattern.
+	(adc+1): Remove constraints.
+	(subdi3): New define_expand.
+	(subdi3_i, sbc_0, sbc, sub_f, sub_f+1, sub_f+2): New patterns.
+	(negdi2): Delete.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_split_dilogic): New function.
+	* config/arc/arc-protos.h (arc_split_dilogic): Declare.
+	* config/arc/arc.md (adddi3): Avoid non-cononical rtl when low part
+	of opperands[2] is const0_rtx.
+	(add_f): Call extract_insn_cached at end of predicate test.
+	(anddi3, iordi3, xordi3): New define_insn_and_split.  New alternatives.
+	(negdi2): New alternatives.
+
+2008-11-22  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (LABEL_ALIGN): Define.
+	* config/arc/arc.c (arc_label_align): New function.
+	* config/arc/arc-protos.h (label_align): Declare.
+
+	* config/arc/arc.md (adc): Restore operands before emitting split.
+
+2008-11-22  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (add_f): Fix pattern.
+	(adc+2): Adjust generated pattern to add_f fix.
+	* config/arc/arc.c (arc_sets_cc_p): When being passed a sequence,
+	look at the delay slot insn.
+
+2008-11-21  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (call_i, call_value_i): Fix length for potentially
+	compact alternative.
+
+2008-11-21  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (adddi3): Don't use w/c/i alternative for
+	reloading.
+	Try to change to subdi3 first.
+	Use conditional add of 1 rather than add.f 0 / add.
+	(subdi3): Allow partial overlap when adding a constant.
+
+2008-11-21  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (addsi_compare_2): Change predicate for operand 2
+	to nonmemory_operand.
+
+2008-11-21  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_select_cc_mode): Use CC_ZNmode for test of
+	(a+b) == 0.
+	(arc_rtx_costs): Add some add.f / adc special cases.
+	(arc_attr_type, arc_sets_cc_p): New functions.
+	(arc_scheduling_not_expected): Likewise.
+	* config/arc/arc-protos.h (arc_attr_type, arc_sets_cc_p): Declare.
+	(arc_scheduling_not_expected): Likewise.
+	* config/arc/arc700.md (core_insn): Split out:
+	(cmove).
+	(cc_arith): New reservation.
+	(define_bypass): Mention more insns for which compare has latency 1.
+	Add compare / cmove vypass with latency 2.
+	* config/arc/arc.md (attribute type): Add value cc_arith
+	(addsi_compare): Change CC_REG mode to CC_ZN.
+	(addsi_compare_2, addsi_compare_3): New patterns.
+	(adddi3): Now define_insn_and_split.  Add more alternatives.
+	(add_f, adc, adc+1,adc+2): New patterns.
+	(adddi3): Add more alternatives.
+	(add_cond_exec, commutative_cond_exec, sub_cond_exec): Now type cmove.
+	(noncommutative_cond_exec): Likewise.
+
+	* config/arc/arc.c (arc_save_restore): Emit sibcall thunk insn as
+	jump insn.
+
+	* combine.c (combine_validate_cost): Revert to old behaviour when
+	combining two insns to one.
+
+2008-11-21  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/predicates.md (immediate_usidi_opperand): New predicate.
+	* config/arc/arc.md (umulsi3_highpart_int): Use it.
+	(umulsidi3_highpart): Avoid using negative CONST_INTs.
+
+2008-11-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (casesi_jump): New pattern.
+	(casesi): Use it.
+
+2008-11-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_output_casesi_insn): Delete.
+	* config/arc/arc-protos.h (arc_output_casesi_insn): Don't declare.
+	* config/arc/arc.h (OPTIMIZATION_OPTIONS):
+	Set TARGET_CASE_VECTOR_PC_RELATIVE to (SIZE !=0).
+	(JUMP_TABLES_IN_TEXT_SECTION): Also true for
+	TARGET_CASE_VECTOR_PC_RELATIVE.
+	(ASM_OUTPUT_ADDR_DIFF_ELT): Use proper directives for QImode / Simode.
+	(LABEL_ALIGN_AFTER_BARRIER): Always align at least to 2^1 after an
+	ADDR_DIFF_VEC.
+	(CASE_VECTOR_PC_RELATIVE, CASE_VECTOR_SHORTEN_MODE): Define.
+	(ADDR_VEC_ALIGN): Likewise.
+	(ASM_OUTPUT_BEFORE_CASE_LABEL): Override.
+	* config/arc/arc.md (UNSPEC_CASESI): New constant.
+	(indirect_jump): Expand q alternative to Rcqq.
+	(casesi_insn): Deleted.
+	(casesi_load): New pattern.
+	(casesi): Rewrite to use casesi_load.
+	* config/arc/arc.opt (mcase-vector-pcrel): New option.
+
+	* config/arc/arc.md (VUNSPEC_EPILOGUE): Deleted.
+	(ARC_UNSPEC_PROLOGUE_USE): Likewise.
+
+2008-11-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_expand_epilogue): Don't use a sibcall thunk
+	in a sibcall epilogue.
+
+	* config/arc/arc.c (arc_regno_use_in): New function.
+	* arc-protos.h (arc_regno_use_in): Declare.
+	* config/arc/arc.md (in_call_delay_slot): New predicate.
+	(in_sfunc_delay_slot): Use in_call_delay_slot and arc_regno_use_in.
+	(define_delay): Use separate form for call, using in_call_delay_slot.
+
+	* config/arc/arc.md (subsi3_insn): Fix output template for w/c/Cal
+	alternative.
+
+2008-11-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_compute_frame_size):
+	Don't zero the frame size even if it's the same as extra_size.
+
+2008-11-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_save_restore): Load offset for blink into r12.
+	(arc_expand_epilogue): No sibthunk if there are pretend_args.
+
+2008-11-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/predicates.md (millicode_load_operation):
+	Pass 2 as second argument to arc_check_millicode.
+	(millicode_load_clob_operation): New predicate.
+	* config/arc/arc.c (arc_compute_millicode_save_restore_regs):
+	Lower the required number of registers to save.
+	(arc_save_restore): If epilogue_p is 2, emit a thunk sibcall.
+	(arc_expand_epilogue): Check if we can emit a thunk sibcall.
+	(arc_next_active_insn): Check for NULL / BARRIER immediately
+	after advancing to next insn.
+	(arc_check_millicode): For load_p == {0,1}, require a final clobber.
+	Interpret load_p == 2 as no final clobber
+	is required, and minimumcount reduced to 2.
+	* config/arc/arc.md (millicode_thunk_st): Take final clobber into
+	account.
+	* config/arc/arc.md (millicode_thunk_ld): Use millicode_load_clob
+	predicate.  Take final clobber into account.
+	(millicode_sibthunk_ld): New pattern.
+	* lib1funcs.asm (L_millicode_thunk): Split into:
+	(L_millicode_thunk_ld, Lmillicode_thunk_st).  Bracket functions in
+	HIDDEN_FUNC / END_FUNC.  Remove 1 / 2 register save / restore,
+	and optimize scheduling of function end.
+	(L_millicode_thunk_ld_ret): New part.
+	* t-arc (LIB1ASMFUNCS): Replace _millicodethunk with
+	_millicode_thunk_st, _millicodethunk_ld and  _millicodethunk_ret.
+
+2008-11-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_save_restore): Allow larger values
+	of *first_offset for millicode generation if generating an epilogue.
+	(arc_expand_epilogue): Use cfun->machine->frame_info.millicode_end_reg
+	to decide if using a millicode call.
+	Don't make fram pointer use dependent on millicode generation.
+	Refine decision on how to adjust stack.
+
+2008-11-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (IS_POWEROF2_P): Reject 0.
+	* config/arc/arc.md (attribute "length"):
+	Shift of immidiate has length 8.
+	* config/arc/arc.md (C1p): Reject 0.
+
+2008-11-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (andsi3): Call arc_rewrite_small_data on
+	operands[1] when indicated.
+
+2008-11-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	Wrap up of changes going back to r59971
+	* final.c (get_attr_length_1): Use direct recursion rather than
+	calling get_attr_length.
+	(get_attr_lock_length): New function.
+	(INSN_VARIABLE_LENGTH_P): Define.
+	(shorten_branches): Take HAVE_ATTR_lock_length into account.
+	Don't overwrite non-delay slot insn lengths with the lengths of
+	delay slot insns with same uid.
+	* genattrtab.c (lock_length_str): New variable.
+	(make_length_attrs): New parameter base.
+	(main): Initialize lock_length_str.
+	Generate lock_lengths attributes.
+	* genattr.c (gen_attr): Emit declarations for lock_length attributes.
+
+	* genoutput.c (process_template): Process '*' in '@' alternatives.
+
+	* read-rtl.c (read_rtx_1): Terminate when reading EOF.
+
+	Attempt at avoiding zero_extract->lshiftrt which didn't go far enough:
+	* combine.c (undo_since): New function, broken out of:
+	(undo_all).
+	(try_combine): Don't do pure simplifications if they increase the cost.
+
+	* combine.c (combine_validate_cost): Be more strict in !i1 case.
+	(try_combine): Allow to combine-split two expensive insns into two less
+	expensive insns.
+
+	* config/arc/predicates.md (extend_operand): New predicate.
+	(equality_comparison_operator, millicode_store_operation): Likewise.
+	(millicode_load_operation): Likewise.
+	(cc_register): Use CC_REG.
+	* config/arc/arc.c (struct arc_ccfsm): New struct.
+	(arc_ccfsm_state, arc_ccfsm_current_cc): Deleted.
+	(arc_ccfsm_target_insn, arc_ccfsm_target_label): Likewise.
+	(cfa_offset, cfa_store_offset, doing_dwarf): Likewise.
+	(last_insn_set_cc_p): Likewise.
+	(TARGET_ASM_FUNCTION_PROLOGUE): Likewise.
+	(arc_ccfsm_current, ARC_CCFSM_BRANCH_DELETED_P): Define.
+	(ARC_CCFSM_RECORD_BRANCH_DELETED, ARC_CCFSM_COND_EXEC_P): Likewise.
+	(CCFSM_ISCOMPACT, CCFSM_DBR_ISCOMPACT): Likewise.
+	(arc_output_function_prologue): Renamed to:
+	(arc_expand_prologue).  No parameters.  Now output rtl.
+	No longer static.
+	(arc_init): Set arc_punct_chars['&'].
+	(enum arc_cc_code_index): New enum.
+	(get_arc_condition_code): Use it.
+	(arc_select_cc_mode): Use CC_Zmode if we want to emit add / bmsk.f .
+	(frame_insn, frame_move, frame_move_inc, frame_add): New functions.
+	(frame_stack_add): Likewise.
+	(struct arc_frame_info): Add GTY marker.
+	New members millicode_start_reg and millicode_end_reg.
+	(current_frame_info): Deleted.  Changed all users to use
+	cfun->machine->frame_info instead.
+	(machine_function): New struct / typedef.
+	(arc_compute_function_type): Take a struct function * parameter.
+	Initialize fun->machine->fn_type.
+	Changed all callers.
+	(ILINK1_REGNUM, ILINK2_REGNUM, RETURN_ADDR_REGNUM): Delete.
+	(MUST_SAVE_RETURN_ADDR): Only require save if register is set.
+	(arc_compute_millicode_save_restore_regs): Rewrite.
+	Take struct arc_frame_info * parameter.  Changed all callers.
+	(arc_save_restore): No longer take file, op, parity parameters.
+	Add epilogue_p parameter.  Changed all callers.
+	Emit rtl.
+	(arc_return_address_regs): New global scope array.
+	(arc_output_function_epilogue): Renamed to:
+	(arc_expand_epilogue).  Take only sibcall_p parameter.
+	Now output rtl.  Changed all callers.
+	(arc_finalize_pic): Use Pmode for addresses.
+	Don't add PROGRAM_COUNTER_REGNO.  Use gen_rtx_SET.
+	Emit using emit_insn.  Don't emit a USE.  Return emitted insn.
+	(arc_cond_exec_p): Rename to:
+	(arc_ccfsm_cond_exec_p).  Changed all callers.
+	Use cfun->machine->prescan_initialized / arc_ccfsm_current.
+	(arc_print_operand): Don't emit delay suffix if delay insn has
+	been deleted.  Take TARGET_AT_DBR_CONDEXEC into account.
+	Use ARC_CCFSM_COND_EXEC_P.
+	For '?' / '!', call output_short_suffix if the insn can be short.
+	Add '&' case for TARGET_ANNOTATE_ALIGN.
+	(record_cc_ref): Deleted.  Changed all callers.
+	(arc_ccfsm_advance): New function, broken out of:
+	(arc_final_prescan_insn).
+	(arc_ccfsm_at_label): Add state parameter.  Changed all callers.
+	Now static.
+	(arc_ccfsm_record_condition, arc_ccfsm_post_advance): New functions.
+	(arc_ccfsm_branch_deleted_p): Use ARC_CCFSM_RECORD_BRANCH_DELETED.
+	(arc_ccfsm_advance_to, arc_next_active_insn): New functions.
+	(arc_verify_short, output_short_suffix): Likewise.
+	(arc_final_prescan_insn): Clear arc_ccfsm_current if this is the
+	first call for the current function.
+	Clear cfun->machine->size_reason.
+	(branch_dest): Cope with PARALLEL.
+	(arc_rtx_costs): Handle more cases of cheap constants.
+	Reflect higher cost of shifting a constant.
+	Add cases for btst / bit / bmsk.f .
+	(arc_is_longcall_p): Remove call_symbol parameter.
+	Changed all callers.
+	(arc_reorg_in_progress): New variable.
+	(arc_reorg): Set cfun->machine->arc_reorg_started and
+	arc_reorg_in_progress.  Clear cfun->machine->ccfsm_current_insn after
+	each shorten_branches call.
+	Genrate bbit as well as brcc insn.  Express suitability for brcc_s
+	with mode of clobber.  If brcc insn are present from before arc_reorg,
+	wix up the clobber mode.
+	Clear arc_reorg_in_progress at the end.
+	(valid_brcc_with_delay_p): Update for changed cbranchsi4_scratch (brcc)
+	pattern.  Use brcc_nolimm_operator.
+	(arc_output_addsi): Don't check arc_size_opt_level.
+	Use %? directive.  Add logic to exploit commutativity for
+	matching constraint in reg/reg/reg case.
+	(arc_output_libcall): Use %! directive.  Take TARGET_MEDIUM_CALLS into
+	account.
+	(arc_insn_length_adjustment): Rename to:
+	(arc_adjust_insn_length).  Take length parameter.  Changed all callers.
+	Add SEQUENCE case.
+	Add code for TARGET_PAD_RETURN.
+	Add code to take ccfsm actions into account.
+	(arc_unalign_branch_p, arc_branch_size_unknown_p): New functions.
+	(arc_pad_return, arc_init_machine_status): Likewise.
+	(arc_init_expanders, arc_check_millicode, arc_clear_unalign): Likewise.
+	(split_addsi, split_subsi, arc_split_move, arc_short_long): Likewise.
+	(gt-arc.h): Include.
+	* config/arc/arc.h (TARGET_UNALIGN_BRANCH, TARGET_UPSIZE_DBR): Define.
+	(TARGET_PAD_RETURN, TARGET_AT_DBR_CONDEXEC): Likewise.
+	(OPTIMIZATION_OPTIONS): Initialize TARGET_Rcq, TARGET_Rcw,
+	TARGET_ALIGN_CALL, TARGET_EARLY_CBRANCHSI and TARGET_BBIT_PEEPHOLE.
+	(ARC_STACK_ALIGN): Use STACK_BOUNDARY.
+	(REG_CLASS_CONTENTS): Include ap / pcl in WRITABLE_CORE_REGS.
+	(ASM_OUTPUT_ALIGN): Call arc_clear_unalign.
+	(arc_return_address_regs): Declare.
+	(CAN_DEBUG_WITHOUT_FP, CALL_ATTR, INIT_EXPANDERS): Define.
+	(TARGET_DROSS, DROSS): Likewise.
+	* config/arc/arc.md (SP_REG, ILINK1_REGNUM): New constants.
+	(ILINK2_REGNUM, RETURN_ADDR_REGNUM, CC_REG): Likewise.
+	(is_sfunc, is_CALL, is_SIBCALL, is_NON_SIBCALL): New attributes.
+	(attribute type): New value jump.  Use special defauilt logic if
+	isfunc attribute is "yes".
+	(attribute iscompact): New values maybe, true_limm, maybe_limm.
+	Default to "maybe" if type is "sfunc".
+	(attribute cond): New values canuse_limm and canuse_limm_add.
+	(verify_short, delay_slot_filled, delay_slot_present): New attributes.
+	(lock_length): Likewise.
+	(attribute length): Add no-op clause to mark compact insns as
+	varying length, and make their length dependent on verify_short.
+	Set length of stores that store an immediate to 8.
+	Remove (eq_attr "cond" "set,set_zn,clob") clause.
+	Insn of type call_no_delay_slot have length 8.
+	Make use of new iscompact values.
+	(attribute in_delay_slot): Check for type "jump".
+	(cond_delay_insn, in_ret_delay_slot): New attributes.
+	(cond_ret_delay_insn): Likewise.
+	(define_delay forms): Check new insns types; add define_delay for
+	type "return" insns.
+	Separate branch/uncond_branch/jump define_delay and use
+	cond_delay_insn where indicated.
+	(all ARCompact patterns): Use %? / %! with %& instead of _s.  Add new
+	attributes / use new attribute values.  Rearrange alternatives
+	to make sure that short insns and conditional execution can be used.
+	Remove dependency of short insns emitting on arc_size_opt_level.
+	(movsi_pre_mod, tst, bset, bxor, bclr, btst): New patterns.
+	(movdi_insn, movdf_insn, addsi3_mixed): New define_insn_and_split.
+	(subsi3_insn): Likewise.
+	(movsicc_insn+1, btst+1): New peephole2 patterns.
+	(shift_and_add_insn_mixed, shift_and_add_insn): Delete.
+	(shift_and_sub_insn, bset_insn_mixed, bclr_insn_mixed): Likewise.
+	(bmsk_insn_mixed): Likewise.
+	(andsi3): Renamed to:
+	(andsi3_i). Add memory / register zero_extend alternatives.
+	(andsi3): New expander.
+	(andsi3_i+1): New splitter.
+	(call_via_reg_mixed, call_via_label, call_via_imm): Merge to:
+	(call_i).
+	(call_value_via_reg_mixed, call_value_via_label, call_value_via_imm):
+	Merge to:
+	(call_value_i).
+	(flush_icache+1, flush_icache+2, flush_icache+5): Mark as DROSS.
+	(flush_icache+6, flush_icache+7, flush_icache+8): Likewise.
+	(flush_icache+9, flush_icache+10, flush_icache+11): Likewise.
+	(flush_icache+12): Likewise.
+	(sibcall, sibcall_value, sibcall_insn): Add use of RETURN_ADDR_REGNUM.
+	(sibcall_value_insn): Likewise.
+	(prologue, return_i, bbit, millicode_thunk_st): New patterns.
+	(millicode_thunk_ld): Likewise.
+	(epilogue_i): Deleted.
+	(cbranchsi4_scratch): Include a clobber of CC_REG.  The mode of the
+	clobber indicates if brcc_s is possible.
+	* config/arc/arc-protos.h (arc_finalize_pic): Update prototype.
+	(arc_compute_function_type, arc_is_longcall_p): Likewise.
+	(arc_ccfsm_at_label, arc_cond_exec_p): Remove prototype.
+	(arc_insn_length_adjustment): Likewise.
+	(arc_ccfsm_cond_exec_p, arc_adjust_insn_length): Add prototype.
+	(arc_unalign_branch_p, arc_branch_size_unknown_p): Likewise.
+	(arc_ccfsm_record_condition, arc_expand_prologue): Likewise.
+	(arc_expand_epilogue, arc_init_expanders): Likewise.
+	(arc_check_millicode, arc_clear_unalign, split_addsi): Likewise.
+	(split_subsi, arc_pad_return, arc_split_move): Likewise.
+	(arc_shorten_align, arc_ccfsm_advance_to, arc_verify_short): Likewise.
+	(arc_short_long): Likewise.
+	* config/arc/t-arc (gt-arc.h): New rule.
+	($(out_object_file)): Depends on gt-arc.h.
+	* config/arc/constraints.md (Rcw): Now also register constraint.
+	(CnL, CmL, CL2, CM4, Csp, C0p, C1p, Ccp, Cux, Crr): New constraints.
+	(Us<, Us>, Rcw, Rcb, Rck): Likewise.
+	(Cbr): Use arc_is_longcall_p.
+	(Rcq): Don't check arc_size_opt_level, but rather TARGET_Rcq.
+	* config/arc/arc.opt (m2addr, munalign-prob-threshold=): New options.
+	(mmedium-calls, mannotate-align, malign-call, mRcq, mRcw): Likewise.
+	(mearly-cbranchsi, mbbit-peephole): Likewise.
+
+	* config/arc/lib1funcs.asm (__clzsi2): Shorten the !__ARC_NORM__ case.
+
+2008-11-18  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (one_cmpldi2): Change to a define_insn_and_split.
+
+2008-11-18  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	cr95728:
+	* config/arc/arc.md (sibcall, sibcall_value): If call address
+	is for a longcall, load it into a register.
+
+2008-11-10  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	Fix debug info problem with pretend_args:
+	* config/arc/arc.h (CFA_FRAME_BASE_OFFSET, ARG_POINTER_CFA_OFFSET):
+	Define.
+
+2008-11-10  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* genoutput.c (note_constraint):  Ignore redefinition.
+	* genpreds.c (struct constraint_data): New member is_overloaded.
+	(add_constraint): Allow constraint overloading.
+	(write_enum_constraint_num): Don't emit duplicates for overloaded
+	constraints.
+	(write_lookup_constraint, write_insn_constraint_len): Likewise.
+
+	* dwarf2out.c (dwarf2out_frame_debug_expr): Don't abort on
+	expressions from function epilogue.
+
+FIXME: Document changes since rev 59970
+
+2008-10-10  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (*add_n): Fix length of c/c/Cal alternative.
+
+2008-10-08  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_output_function_epilogue): Remove
+	noepilogue logic.
+
+	* config/arc/arc.md: (type): Add return.
+	(toplevel): Add define_delay for "return" type patterns.
+	(epilogue_i): Remove !optimize_size condition.  Set type to return.
+
+2008-10-03  Khurram.Riaz@arc.com
+	    J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (BITS_BIG_ENDIAN): Always 0.
+
+2008-09-29  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (TARGET_ASM_FUNCTION_EPILOGUE): Don't redefine.
+	(arc_output_function_epilogue): No longer static.
+	Take new parameter sibcall_epilogue.
+	* config/arc/arc-protos.h (arc_output_function_epilogue): Declare.
+	* config/arc/arc.md (sibcall_epilogue): Now an expander.
+	(epilogue_i, epilogue): New patterns.
+
+2008-09-29  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* target-def.h (TARGET_PRESERVE_RELOAD_P): Fix spelling.
+
+2008-09-24  Muhammad Khurram Riaz <khurram.riaz@arc.com>
+
+	* config/arc/predicates.md: (move_dest_operand) for MUL64 target 
+	r57-r59 cannot be destination, the manual says they are read only.
+
+2008-09-23  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/mxp/mxp.opt (mno-vld-label): New option.
+	* config/arc/mxp.md (*mov<mode>_i): Support TARGET_NO_VLD_LABEL.
+
+	* config/mxp/mxp.opt (mno-vim-label): New option.
+	* config/arc/mxp.md (*mov<mode>_i): Use vmovw for HImode.
+	(addhi3): Support TARGET_NO_VIM_LABEL.
+
+2008-09-19  Muhammad Khurram Riaz <khurram.riaz@arc.com>
+
+	* config/arc/arc.md : 
+	(mulsi3): for MULMAC32x16 unsigned range can be upto 65535, and 
+	mululw in enough (alongwith mov from acc2) to get the result (no
+	need for use mac instruction) ; note that single mul instruction can not 
+	be used with immediates less then 0 because 
+	mululw 0,reg,limm ( and also mullw ) treats lower 16bits of imm operand 
+	as unsigned, So in this case mac instruction is also required.
+	(umul_600) : limm can be the last operand in dsp-mul instructions
+	(mac_600, mul64_600, mac64_600, umul64_600, umac64_600) : Likewise
+
+2008-09-18  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/mxp/mxp.md (store_scalars, load_scalars): Add type attribute.
+	(mov<mode>_i): Fix type attribute.
+
+2008-09-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/mxp/mxp.h (ASM_OUTPUT_SYMBOL_REF): Define.
+	(ASM_OUTPUT_LABEL_REF): Likewise.
+
+2008-09-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	Enable use of symbol / label references.
+	* config/mxp/mxp.c (BINUTILS_FIXED): Define.
+
+	* config/mxp/mxp.md (attribute type): Add load and store.
+	(*mov<mode>_i): Set type of load/store instructions.
+	(branch_true, branch_false): Set type.  Emit delay slot nops.
+	(decrement_and_branch_until_zero, doloop_end): New patterns.
+	(mxp): New scheduling automaton.
+	* config/mxp/mxp.c (mxp_print_operand): When emitting just one
+	delay slot nop, check if it should be between the two delay slot
+	insns.
+
+2008-09-15  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/mxp/mxp.md (smax<mode>3, smin<mode>3): New patterns.
+	* config/mxp/mxp.opt (mint16): New option.
+	* config/mxp/mxp.h (INT_TYPE_SIZE): Take TARGET_INT16 into account.
+
+2008-09-15  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/ieee-754/arc600-dsp/mulsf3.S (.Ldenorm_dbl1):
+	Fix register number of multiplicant.
+
+2008-09-15  Muhammad Khurram Riaz <khurram.riaz@arc.com>
+
+	* config/arc/arc.md : Bug fix in mul/mac instructions
+	* config/arc/arc.c : Bug fix in mul/mac instructions
+
+2008-09-13  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/ieee-754/arc600-dsp/mulsf3.S: Fix ld.as offsets.
+
+	* config/arc/ieee-754/arc600/divsf3-stdmul.S: Rename to:
+	* config/arc/ieee-754/arc600/divsf3.S.
+	* config/arc/ieee-754/arc600-dsp/divsf3-stdmul.S: Rename to:
+	* config/arc/ieee-754/arc600-dsp/divsf3.S.
+	* config/arc/lib1funcs.asm (L_divsf3): update.
+
+2008-09-12  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/ieee-754/arc600-dsp/muldf3.S: Fix offset for loading
+	0x7ff00000.
+
+	* config/arc/dp-hack.h: Fix #ifdef condition for L_make_df etc.
+	* config/arc/ieee-754/arc600-dsp/divdf3.S: Fix issue where acc1 was
+	used as input to mululw/machulw sequence.
+	* config/arc/ieee-754/arc600-dsp/divsf3-stdmul.S: Likewise.
+	Re-schedule denorm_fp1 handling to reduce code duplication.
+
+	* config/arc/ieee-754/arc600/divsf3-stdmul.S (.Ldenorm_fp0):
+	Move upwards to execute mulu64.
+
+2008-09-12  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): If
+	TARGET_MULMAC_32BY16_SET, define __ARC_MUL32BY16__ .
+	* config/arc/lib1funcs.asm (L_muldf3, L_mulsf3, L_divdf3, L_divsf3):
+	Add __ARC_MUL32BY16__ alternative.
+	* config/arc/ieee-754/arc600-dsp/divdf3.S: New file.
+	* config/arc/ieee-754/arc600-dsp/divsf3-stdmul.S: Likewise.
+
+	* config/arc/ieee-754/divdf3.S: Move constant data to end of file,
+	change code to address it to save limm.
+	* config/arc/ieee-754/divsf3-stdmul.S: Hard-code sub3 argument to
+	avoid limm.
+	* config/arc/ieee-754/arc600/divsf3-stdmul.S: Likewise.
+	* config/arc/ieee-754/arc600/divdf3.S: Likewise.  Fix flag setting
+	problem when divisor is denormal.
+
+	* config/arc/arc.c (arc_compute_millicode_save_restore_regs): Only
+	consider save sequences starting with r13.
+
+2008-09-05  Muhammad Khurram Riaz <khurram.riaz@arc.com>
+
+	* config/arc/arc.h : WCHAR_T set to int type
+
+2008-09-05  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (doloop_fallback): Operand 0 is "+r,!w".
+	(doloop_fallback_m): Operand 0 is "+&r".
+
+2008-09-04  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* Makefile.in (targhooks.o): Depend on OPTABS_H.
+	* config/arc/arc.md (mulsidi3): Remove some junk.
+	(mulsidi3_700, umulsidi3_700): Remove 'J' Constraint.
+	* config/arc/ieee-754/arc600-dsp: New directory.
+
+2008-09-02  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/t-arc-newlib (MULTILIB_EXCLUSIONS): Re-enable multilibs
+	with -mnorm.
+	* config/arc/arc.h (ASM_SPEC): For -mmmul32x16, pass -mdsp.
+
+	* config/arc/lib1funcs.asm (__mulsi3): Fix invalid insn issues.
+
+	* config/arc/arc.md (*call_via_label): Replace %^%Pn with %Pn.
+	(call_prof, *call_value_via_label, call_value_prof): Likewise.
+	(*sibcall_insn, *sibcall_value_insn, sibcall_prof): Likewise.
+	(sibcall_value_prof): Likewise.
+
+	* config/arc/arc.md (doloop_begin_i): When n_insns is 0, output
+	three nops for ARC600.
+
+	* config/arc/predicates.md (arc_double_register_operand): Fix
+	indentation.
+	(vector_register_operand, vector_register_or_memory_operand): Likewise.
+	(arc_dpfp_operator): Likewise.
+	(acc1_operand, acc2_operand, mlo_operand, mhi_operand): New predicate.
+	* config/arc/arc.c (arc_init): For ARC600 with TARGET_MUL64_SET,
+	default multcost to 4.
+	(rname56, rname57, rname58, rname59): New character arrays.
+	(arc_init_regno_reg_class): Rename to:
+	(arc_conditional_register_usage).  Add code from
+	arc.h:CONDITIONAL_REGISTER_USAGE.  Establish special multiply result
+	register names, and use register numbers that work well with
+	the target endianness.
+	For ARC700, disable use of lp_count for SFmode.
+	(gen_acc1, gen-acc2, gen_mlo, gen_mhi): New functions.
+	* config/arc/arc-protos.h (arc_init_regno_reg_class): Don't declare.
+	(arc_conditional_register_usage): Declare.
+	(gen_acc1, gen-acc2, gen_mlo, gen_mhi): Likewise.
+	* config/arc/arc.h (CONDITIONAL_REGISTER_USAGE): Use
+	arc_conditional_register_usage.
+	(rname56, rname57, rname58, rname59): Declare.
+	(REGISTER_NAMES): Use them.
+	(DBX_REGISTER_NUMBER): Translate internal numbers for multiply
+	result registers into true hardware register numbers.
+	* config/arc/arc.md
+
+	* testsuite/gcc.c-torture/execute/ieee/denorm-rand.c (main):
+	Reduce iteration counts to 1000.
+	* varasm.c (pickle_in_section): Make defintion a declaration.
+	* config/arc/arc.c (arc_print_operand): Add 'o' to output
+	symbol without '@'.
+	(write_profile_sections): Use it.
+
+	* config/arc/arc.c (arc_final_prescan_insn): Call
+	extract_constrain_insn_cached after calling arc_hazard.
+	(arc_reorg): Check second parameter of compare before feeding it
+	to cbranchsi4_scratch.
+	* config/arc/arc.md (*unary_comparison_result_used): Use higher
+	operand numbers for match_operator than for ordinary operands.
+	Use match_dup for duplicated expression in match_operator.
+	(*commutative_binary_comparison_result_used): Likewise.
+	(*noncommutative_binary_comparison_result_used): Likewise.
+	(mulsi3): Generate expansion for TARGET_MUL64_SET.
+	(mulsi3): Refer to multiply result registers with special generator
+	/ recognizer functions to account for the endian-dependent numbers.
+	(umul_600, smul_600, mac_600, mulsi_600, mulsidi3): Likewise.
+	(umulsidi3): Likewise.
+	(mulsi_600): Type multi.
+	(*split_mulsi3_600): Delete.
+	(mulsidi_600, umulsidi_600): New patterns.
+	(mulsidi3, umulsidi3): Result is nonimmediate_operand.
+	Add TARGET_MUL64_SET code.
+	(mac64_600): Fix semantics description to be consistent with
+	PARALLEL semantics.
+	(umulsidi3_700): Operand 2 is register_operand.
+	* config/arc/arc600.md (mul64_600): New cpu_unit.
+	(load_DI_600, load_600): Fix attribute test.
+	(mul_600_fast, mul_600_slow): New reservations.
+	* config/arc/arc.h (TARGET_OPTFPE): True for TARGET_ARC600
+	&& TARGET_NORM_SET && TARGET_MULMAC_32BY16_SET.
+
+	* config/arc/arc.md (doloop_begin_i): Look inside SEQUENCEs.
+	(doloop_end_i) Change alternative 2 of operand 2 to C_0.
+	* config/arc/arc.h (HARD_REGNO_RENAME_OK): Delete.
+
+2008-09-02  Muhammad Khurram Riaz <khurram.riaz@arc.com>
+
+	* config/arc/arc.c (arc600_corereg_hazard): mul/mac and mul64 set the accum registers 
+	and are not a hazard.
+	* config/arc/arc.md :
+	(*unary_comparison_result_used): added for flag setting instructions(unary) that also use result.
+	(*addsi_compare): added for add.f ; needed by combiner.
+	(*commutative_binary_comparison_result_used): added for flag setting instructions(binary,commutative) 
+	that also use result.
+	(*noncommutative_binary_comparison_result_used): added for flag setting instructions(binary,commutative) 
+	that also use result.
+
+2008-09-01  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/ieee-754/fixdfsi.S: Fix negative case.
+
+2008-08-30  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c: Give up on zero overhead loop if loop setup
+	precedes loop end with no label in-between.
+
+2008-08-30  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_final_prescan_insn): Don't try to eliminate
+	'insns' of TYPE_LOOP_END.
+	* config/arc/arc.md (attr type): Replace loop with loop_start and
+	loop_end.
+	(attr in_delay_slot): Update.
+	(movqi_insn, movhi_insn): Add alternatives to read ALL_CORE_REGS.
+	(doloop_begin_i): Better estimate insn fetches with n_insns.
+	Allow zero n_insns if there is a code_label.
+	Don't emit nops for ARC600 with loop_top if n_insn is at least 3.
+	Change type to loop_setup.
+	(doloop_end_i): Set type to loop_end.
+
+2008-08-30  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (doloop_begin_i): Start insn counting with label
+	rather than label_ref.  Check that jump sucessor insns exists before
+	calling recog_memoized on it.
+
+2008-08-30  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (doloop_begin_i): Do separate calculation of
+	minimum insns from loop setup to loop end.
+	* config/arc/arc.c (arc_reorg): Fix setting of insn after deletion.
+
+2008-08-29  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_reorg): Use the loop top label to check for
+	empty loops.  When deleting an empty loop, put the set of the loop
+	count before the loop end.
+
+2008-08-29  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (HARD_REGNO_RENAME_OK): Prohibit renaming from
+	LP_COUNT.
+	* config/arc/arc.md (doloop_begin_i): Change alternative 2 of
+	operand 1 to C_0.
+
+2008-08-29  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/predicates.md (dest_reg_operand): Use ALL_CORE_REGS.
+	config/arc/arc-protos.h (arc_init_regno_reg_class): Declare.
+	* config/arc/arc.h: Fix merge error.
+	(CONDITIONAL_REGISTER_USAGE): Call arc_init_regno_reg_class.
+	config/arc/arc.c (arc_init_reg_tables): Break out arc_regno_reg_class
+	initializing code into:
+	(arc_init_regno_reg_class): New function.  Take new *CORE_REGS classes
+	into account.
+	* config/arc/arc.md (*umulsi3_highpart_int): Fix typo.
+
+2008-08-29  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/crtg.asm: Use _init / _fini instead of __init / __fini.
+	* config/arc/crti.asm: Likewise.
+
+	* config/arc/arc.c (arc_register_move_cost): Express costs of
+	writing / reading LP_COUNT register.
+	* config/arc/arc.h (CONDITIONAL_REGISTER_USAGE): Prune CHEAP_CORE_REGS.
+	(enum reg_class, REG_CLASS_NAMES, REG_CLASS_CONTENTS): Split CORE_REGS
+	into CHEAP_CORE_REGS and ALL_CORE_REGS.
+	(PREFERRED_RELOAD_CLASS): Check for CHEAP_CORE_REGS.
+	(REGISTER_MOVE_COST): Use arc_register_move_cost.
+	* config/arc/arc.md (movsi_insn): Add alternatives to read
+	ALL_CORE_REGS.
+	* config/arc/constraints.md ("c"): Now for CHEAP_CORE_REGS.
+	("Rac"): New constraint.
+
+	* loop-doloop.c (doloop_modify): Pass doloop_end pattern to
+	gen_doloop_begin.
+	* config/c4x/c4x.md (doloop_begin): Operand 4 is doloop_end pattern.
+	* loop-doloop.c (doloop_optimize): Pass flag to indicate if loop is
+	entered at top to gen_doloop_end.
+	* config/arc/arc.c (arc_reorg): If we can't find the loop entry at
+	the loop top, search entire function for a matching loop begin,
+	and record information about it if found.
+	* config/arc/arc.md (doloop_begin): Now takes five operands.
+	(doloop_begin_i): Likewise.
+	(doloop_end): Now takes 6 operands.
+
+	* config/arc/arc.md (umulsi3_highpart_i): New pattern.
+	(*umulsi3_highpart_int, umulsi3_highpart): Likewise.
+
+
+	* target-def.h (TARGET_PRESERVE_RELOAD_P): Define.
+	(TARGET_INITIALIZER): Add TARGET_PRESERVE_RELOAD_P.
+	* target.h (struct gcc_target): New member preserve_reload_p.
+	* reload.c (push_reload): Mark values flagged by
+	targetm.preserve_reload_p as RELOAD_OTHER.
+	* config/arc/arc.c (arc_preserve_reload_p): New function.
+	(TARGET_PRESERVE_RELOAD_P): Redefine.
+	* doc/tm.texi (TARGET_PRESERVE_RELOAD_P): Document.
+
+	* config/arc/arc.h (ASM_OUTPUT_SYMBOL_REF): Define.
+	(ASM_OUTPUT_LABEL_REF): Likewise.
+	* config/arc/arc.c (arc_assemble_integer, arc_print_operand):
+	Don't emit '@' before labels / symbols.
+	(arc_print_operand_address): Likewise.
+
+	* config/arc/arc.h (LINK_COMMAND_SPEC): When creating a shared
+	library with -nostdlib, add -lgcc_s.
+
+	* config/arc/fp-hack.c: Disable debug functions for ARC700.
+	* config/arc/dp-hack.c: Likewise.
+
+	* config/arc/arc.h (STATIC_LINK_SPEC): Make default to
+	use dynamic libraries where available.
+	* config.gcc (arc*-*-linux-uclibc*): Retain OS specific files in
+	tmake_file.
+	* config/arc/t-arc700-uClibc (SHLIB_MAPFILES): Fix pasto.
+	(SHLIB_LINK: Override to use a linker script libgcc_s.so.
+	(SHLIB_INSTALL): Likewise.
+	* config/arc/lib1funcs.asm (__mulsi3): Use (HIDDEN_)FUNC / ENDFUNC.
+	(__udivmodsi4, __umodsi3, __modsi3, __clzsi2): Likewise.
+	(__umulsidi3): Likewise.  Add ARC700 optimized code.
+	* config/arc/ieee-754/adddf3.S: Use FUNC / ENDFUNC.
+	* config/arc/ieee-754/muldf3.S: Likewise.
+
+2008-08-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/ieee-754/fixdfsi.S: Fix shift by zero case.
+
+2008-08-08  Muhammad Khurram Riaz <khurram.riaz@arc.com>
+
+	* config/arc/predicates.md (move_dest_operand): r56 r57 cannot be destination
+	operand of move.
+	* config/arc/arc.c (arc_init): error for -mmul32x16 used for !ARC600.
+	* config/arc/arc.h (ASM_SPEC): mmul32x16 passed.
+	* config/arc/arc.md (mulsi3):  added Arc600 dsp mul/mac also modified for mul64
+	(mulsidi3): added Arc600 mul/mac dsp instructions
+	* config/arc/arc600.md: scheduling info for new dsp mul/mac intructions.
+	* config/arc/arc.opt: new option -mmul32x16
+	* config/arc/t-arc-newlib: use -mmul32x16 for multilib
+
+2008-08-08  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (doloop_begin_i): Count asm insns only as a
+	single insn.
+
+2008-07-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* dwarf2out.c (loc_descriptor): Pass entire MEM expressions
+	through targetm.delegitimize_address.
+	* config/arc/arc.c (arc_delegitimize_address): New function.
+	(TARGET_DELEGITIMIZE_ADDRESS): Redefine.
+	(arc_output_addr_const_extrao): Remove.
+	* config/arc/arc.h (OUTPUT_ADDR_CONST_EXTRA): Remove.
+	* config/arc/arc-protos.h (arc_output_addr_const_extra): Don't declare.
+
+2008-07-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/ieee-754/eqdf2.S: Add some #ifdefed code for hardware
+	floating point compatibility.
+
+2008-07-23  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_output_libcall): Emit special sequence for
+	-fpic -mlong-calls.
+	* config/arc/arc.h (INSN_SETS_ARE_DELAYED): Also return true for
+	TYPE_SFUNC.
+	* config/arc/t-arc700-uClibc (SHLIB_MAPFILES): Define.
+	* config/arc/arc.md (attribute type): Add sfunc.
+	(attribute in_delay_slot): False for sfunc.
+	(attribute in_sfunc_delay_slot): New.
+	(toplevel): Add define_delay for sfunc.
+	(cmpsf_eq, cmpdf_eq): Type sfunc for -fpic -mlong-call.
+	(cmpsf_gt, cmpdf_gt, cmpsf_ge, cmpdf_ge: Likewise.
+	(cmpsf_uneq, cmpdf_uneq, cmpsf_ord, cmpdf_ord): Likewise.
+	* config/arc/lib1funcs.asm (HIDDEN_FUNC): Define.
+	* config/arc/ieee-754/eqsf2.S, onfig/arc/ieee-754/orddf2.S: Use it.
+	* config/arc/ieee-754/uneqsf2.S, config/arc/ieee-754/gtsf2.S: Likewise.
+	* config/arc/ieee-754/gedf2.S, config/arc/ieee-754/ordsf2.S: Likewise.
+	* config/arc/ieee-754/eqdf2.S, config/arc/ieee-754/uneqdf2.S: Likewise.
+	* config/arc/ieee-754/gesf2.S, config/arc/ieee-754/gtdf2.S: Likewise.
+	* config/arc/libgcc-excl.ver: New file.
+
+	* config/arc/arc.c (arc_legitimize_pic_address): Use gen_const_mem.
+	(arc_output_addr_const_extra): New function.
+	* config/arc/arc-protos.h (arc_output_addr_const_extra): Declare.
+	* config/arc/arc.h (LEGITIMIZE_ADDRESS): Don't pass OLDX.
+	(OUTPUT_ADDR_CONST_EXTRA): Define.
+
+2008-07-23  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (xorsi3): Use "w" constraint for output operand.
+	(call_value): Don't write past end of operands.
+
+	* gcc/config/arc/arc.c (arc_legitimate_pc_offset_p): New function.
+	(arc_legitimize_pic_address): Don't make addition of pcl explicit.
+	(arc_output_pic_addr_const): Also emit '+' for negative leading integer.
+	Emit leading "pcl," for ARC_UNSPEC_GOT.
+	* config/arc/arc-protos.h (arc_legitimate_pc_offset_p): Declare.
+	* config/arc/constraints.md ("Cpc", "Cal"): New constraints.
+	* config/arc/arc.md (entire file): Replace ARCompact "i", "J", "Ji" and
+	"iJ" constraints with "Cal".
+	(movsi_insn): Add missing 'S' output modifier.
+	Add "?w"/"Cpc" alternative.
+	(mulsi3_600, mulsi3_700, andsi3): Add missing 'S' output modifier.
+	(xorsi3, indirect_jump, normw, mul64, divaw, flag, sr): Likewise.
+
+2008-07-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/mxp/mxp.md (lshr<mode>3_imm): Rename to:
+	(vec_shr_<mode>).
+	(vec_unpacks_lo_v8hi, vec_unpacku_lo_v8hi): New patterns.
+	(vec_unpacks_hi_v8hi, vec_unpacku_hi_v8hi): Likewise.
+	(vec_pack_trunc_v4si): Likewise.
+
+2008-07-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/mxp/mxp.md (or<mode>3): Rename to:
+	(ior<mode>3).
+
+2008-07-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/sh/sh.c (expand_block_move): Update call to
+	can_move_by_pieces.
+
+	* config/mxp/mxp.h (FUNCTION_VALUE_REGNO_P): Use FIRST_PARM_REG.
+
+2008-07-16  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_final_prescan_insn): Fix bug in last change:
+	check for JUMP_INSN.
+
+2008-07-15  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_final_prescan_insn): Don't emit a nop
+	in front of a delay slot insn.
+	(arc_reorg): Use fall-back pattern for non-empty zero length loops.
+	(arc600_corereg_hazard_1): Fix extension register range.
+	(write_ext_corereg_1): Likewise.
+
+	* config/arc/arc.c (disi_highpart): Return result.
+
+2008-07-15  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* rtl-factoring.c (compute_dummy_rtx_cost): New function.
+	(compute_init_costs): Use it.
+
+	* config/arc/arc.h (IS_ASM_LOGICAL_LINE_SEPARATOR): Add STR parameter.
+
+2008-07-14  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/ieee-754/divdf3.S (.Lret0_NaN): Remove dead insn.
+	(.Linf_nan_dbl1): Reduce size (and cycle count).
+	Adjust affected offsets.
+	* config/arc/ieee-754/mulsf3.S (.Linf_nan_dbl0): Shorten NaN check.
+	Adjust affected offsets.
+	* lib1funcs.asm (__mulsi3): Remove __base conditional.
+
+	* config/arc/ieee-754/divdf3.S (.Linf_denorm): Fix handling of
+	0x7fe / 0x7ff exponents.
+	* config/arc/ieee-754/mulsf3.S (.Ldenorm_dbl1): Avoid clobbering r4.
+
+	* config/arc/arc.md (umulqihi3, mulqihi3): Delete.
+
+	* config/arc/t-arc-newlib (MULTILIB_OPTIONS): Add mmul64 and mnorm.
+	(MULTILIB_DIRNAMES): Add mul64 and norm.
+	(MULTILIB_EXCEPTIONS): Exclude combinations of mARC700 with mmul64
+	and mnorm.
+	(MULTILIB_EXCLUSIONS): Don't build multilibs with only mmul64 or
+	only mnorm, but just one with both combined.
+	* config/arc/predicates.md (dest_reg_operand): New predicate.
+	(move_dest_operand): Use it.
+	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define __ARC_NORM__ and
+	__ARC_MUL64__ when the eponymous instructions are available.
+	(TARGET_OPTFPE): Define.
+	* config/arc/arc.md (loadqi_update): Use dest_reg_operand.
+	(load_zeroextendqisi_update, load_signextendqisi_update): Likewise.
+	(load_zeroextendhisi_update, movsicc, movdicc, movsfcc): Likewise.
+	(movdfcc, movsicc_insn, movdicc_insn, movsfcc_insn): Likewise.
+	(movdfcc_insn, zero_extendqihi2_a4, zero_extendqihi2_i): Likewise.
+	(zero_extendqihi2, zero_extendqisi2_a4, zero_extendqisi2_ac): Likewise.
+	(zero_extendqisi2, zero_extendhisi2_a4, zero_extendhisi2_i): Likewise.
+	(zero_extendhisi2, zero_extendhisi2, extendqihi2_a4): Likewise.
+	(extendqihi2_i, extendqihi2, extendqisi2_a4, extendqisi2_ac): Likewise.
+	(extendqisi2): Likewise.
+	(extendhisi2_a4, extendhisi2_i, extendhisi2, abssi2): Likewise.
+	(smaxsi3, addsi3_mixed, addsi3_mixed, mulsi3i, mulsi3_700): Likewise.
+	(umulsidi3, mulsidi3, addsi3, addsi3_insn_a4, subsi3): Likewise.
+	(subsi3_insn, add_n, sub_n, shift_and_add_insn): Likewise.
+	(shift_and_sub_insn, bset_insn, bxor_insn, bclr_insn): Likewise.
+	(bmsk_insn, andsi3_insn_a4, bicsi3_insn, iorsi3_a4, xorsi3): Likewise.
+	(negsi2_a4, negsi2i, one_cmplsi2_a4, one_cmplsi2): Likewise.
+	(one_cmpldi2_a4, ashlsi3, ashrsi3, lshrsi3, shift_si3): Likewise.
+	(rotrsi3, seq, sne, sgt, sle, sge, slt, sgtu, sleu, sgeu): Likewise.
+	(sltu, scc_insn, neg_scc_insn, not_scc_insn, movsi_ne): Likewise.
+	(movsi_cond_exec, add_cond_exec, commutative_cond_exec): Likewise.
+	(sub_cond_exec, noncommutative_cond_exec, call_prof): Likewise.
+	(call_value_via_reg_mixed, call_value_via_label): Likewise.
+	(call_value_via_imm, call_value_prof, flush_icache+1): Likewise.
+	(flush_icache+2, flush_icache+3, flush_icache+4): Likewise.
+	(flush_icache+5, flush_icache+6, flush_icache+7): Likewise.
+	(flush_icache+8, flush_icache+9, flush_icache+10): Likewise.
+	(flush_icache+11, flush_icache+12, norm, normw, swap): Likewise.
+	(core_read, lr, sibcall_value, sibcall_value_insn): Likewise.
+	(sibcall_value_prof, abssf2, negsf2): Likewise.
+	(storeqi_update): Likewise.  Use "w" / "c" constraints.
+	(loadhi_update, load_signextendhisi_update, storehi_update): Likewise.
+	(loadsi_update, storesi_update, loadsf_update): Likewise.
+	(storesf_update, sminsi3, mulsi3_600, adddi3, subdi3): Likewise.
+	(anddi3, iordi3, xordi3, negdi2, one_cmpldi2): Likewise.
+	(ashlsi3_insn_mixed, ashrsi3_insn_mixed, lshrsi3_insn_mixed): Likewise.
+	(divaw): Likewise.
+	(andsi3, iorsi3): Likewise.  Mark commutative.
+	(cmpsf, cmpdf, cmpsf_eq, cmpdf_eq): Make conditional on TARGET_OPTFPE.
+	(cmpsf_gt, cmpdf_gt, cmpsf_ge, cmpdf_ge, cmpsf_uneq): Likewise.
+	(cmpdf_uneq, cmpsf_ord, cmpdf_ord): Likewise.
+	* config/arc/dp-hack.h: Make optimized floating-point support
+	dependent on __ARC_NORM__ instead of __ARC700__.
+	* config/arc/fp-hack.h: Likewise.
+	* longlong.h (count_leading_zeros): Make dependent on __ARC_NORM__
+	instead of __ARC700__.
+	* lib1funcs.asm (__clzsi2, L_adddf3, L_addsf3): Likewise.
+	(L_extendsfdf2, L_truncdfsf2, L_floatsidf,  L_floatsisf): Likewise.
+	(L_floatunsidf, L_fixdfsi, L_fixsfsi, L_fixunsdfsi, L_eqdf2): Likewise.
+	(L_eqsf2, L_gtdf2, L_gtsf2, L_gedf2, L_gesf2, L_uneqdf2): Likewise.
+	(L_uneqsf2, L_orddf2, L_ordsf2): Likewise.
+	(L_muldf3): Provide variant for __ARC_NORM__ && __ARC_MUL64__.
+	(L_mulsf3, L_divdf3, L_divsf3): Likewise.
+	(__mulsi3): Provide new variants for __ARC_MUL64__,
+	__ARC_NORM__ and !__OPTIMIZE_SIZE__.
+	(__udivmodsi4): Provide new variant for __ARC_NORM__.  Fix variant for
+	plain ARC600 to conform to lp instruction description of
+	document 5115-31.
+	* config/arc/ieee-754/arc600/divdf3.S: New file.
+	* config/arc/ieee-754/arc600/divsf3-stdmul.S: Likewise.
+	* config/arc/ieee-754/arc600/mulsf3.S: Likewise.
+	* config/arc/ieee-754/arc600/muldf3.S: Likewise.
+	* config/arc/arc.c (arc_select_cc_mode): Use TARGET_OPTFPE.
+	(gen_compare_reg): Likewise.
+
+	* config/arc/predicates.md (symbolic_memory_operand): Remove predicate.
+	* config/arc/arc-protos.h (symbolic_memory_operand): Don't declare.
+
+2008-07-07  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config.gcc (tm_defines): Set TARGET_CPU_DEFAULT_OPT to mA6.
+	* t-arc-newlib (MULTILIB_OPTIONS): Replace mA5 with mA6.
+	(MULTILIB_MATCHES): Likewise.
+	* config/arc/lib1funcs.asm (__divnorm): Only provide for A4.
+	Changed all callers.
+	(__udivmodsi4): Add optimized ARC600 version.
+
+2008-07-02  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* final.c (asm_insn_count): Make empty asm templates have zero length.
+	* config/arc/arc.c (arc_final_prescan_insn): Avoid calling arc_hazard
+	on epilogue delay slots.
+	(arc_reorg): When zero overhead loop is empty, replace it with a set
+	of lp_count to zero.
+	(arc600_corereg_hazard): Look into SEQUENCEs.
+	(arc_hazard): Don't look into SEQUENCEs.
+	* config/arc/arc.h (IS_ASM_LOGICAL_LINE_SEPARATOR): Define.
+	* config/arc/arc.md (attr "in_delay_slot"): use prev/next_active_insn.
+	(doloop_begin_i): Assert minimum number of insns, not size.
+
+2008-07-01  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc-protos.h (arc_insn_length_adjustment): Declare.
+	(arc_corereg_hazard, arc_hazard, arc_write_ext_corereg): Likewise.
+	* config/arc/arc.c (arc_final_prescan_insn): Output a nop where
+	necesary to avoid a hazards on ARC600.
+	(arc600_corereg_hazard_1, arc600_corereg_hazard): New functions.
+	(arc_hazard, arc_insn_length_adjustment): Likewise.
+	(write_ext_corereg_1, arc_write_ext_corereg): Likewise.
+	* config/arc/arc.md (in_delay_slot, doloop_begin_i, doloop_end_i):
+	Handle ARC600 idiosyncrasies concerning zero overhead loops.
+	(doloop_end): Enable for ARC600.
+
+	* config/arc/arc.c (arc_secondary_reload): Also require reload
+	for WRITABLE_CORE_REGS, but restrict LPCOUNT_REG / WRITABLE_CORE_REGS
+	secondary reloads to reloads from memory.
+
+	* config/arc/arc.c (arc_final_prescan_insn): Put TARGET_DUMPISIZE
+	code after declarations.
+
+	* config/arc/arc-protos.h (arc_output_mi_thunk): Unless long call
+	semantics apply, use branch.
+
+2008-07-15  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/mxp/mxp.md (mov<mode>_clob): Now define_insn_and_split.
+	Remove stray comma.
+	* config/mxp/mxp.c (TARGET_PROMOTE_FUNCTION_RETURN): Redefine.
+	* config/mxp/mxp.h (PREFERRED_STACK_BOUNDARY): Define.
+	(PREFERRED_RELOAD_CLASS): Change SffVff_REGS to Sff_REGS.
+	(STATIC_CHAIN): Define.
+	(HARD_REGNO_RENAME_OK): Define.
+
+2008-06-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config.gcc (mxp-arc-elf): Change to:
+	(mxp-*-elf).
+	* regclass.c (struct reg_pref): Use short rather than char for members.
+	* config/mxp/constraints.md: Rename constraints to satisfy new rule
+	on smale length for same leading character.
+	* config/mxp/mxp.md: Update to reflect constraint renaming.
+	Reflect rename of respectively define_code_macro and
+	define_mode_macro to define_code_iterator and define_mode_iterator.
+	* UNITS_PER_SIMD_WORD: Add MODE parameter.
+
+2008-06-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_expand_builtin) Use new CALL_EXPR accessors.
+	(arc_expand_simd_builtin): Likewise.
+
+	* config/arc/arc.c (arc_reorg): Don't try to create cbranchsi4_scratch
+	if a REG_CROSSING_JUMP is seen.
+	* config/arc/arc.md (jump): Rename to..
+	(jump_i).  Reject for TARGET_LONG_CALLS_SET if REG_CROSSING_JUMP
+	note is found.
+	(jump): New define_expand.
+	(cbranchsi4_scratch): Re-indent; Clean up length calculation.
+
+2008-06-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (subsi3_insn): Alternative 4 is "nocond".
+
+	* config/arc/arc.md (cbranchsi4_scratch): Show clobber of CC register
+	in pattern.
+
+	* config/arc/arc.h (LINK_SPEC): s/shared/shared:-shared/ .
+
+	* config/arc/arc.h (IS_POWEROF2_P): Check for zero.
+	* config/arc/arc.md (andsi3, iorsi3, xorsi3):
+	Use 'w' instead of 'c' for output constraint.  Drop J in Ji.
+	Use arc_size_opt_level.  Fix formatting.
+
+	* config/arc/arc.md (anddi3): Fix result constraints to use 'w'.
+	(iordi3, xordi3, negdi2, divaw): Likewise.
+
+	* config/arc/arc-protos.h (disi_highpart): Declare.
+	* config/arc/arc.h (disi_highpart): New function.
+	* config/arc/arc.md (addf, adc, subf, sbc): New patterns.
+	(anddi3, iordi3, xordi3): Make conditional on TARGET_OLD_DI_PATTERNS.
+	(negdi3): Likewise.
+	(*movsi_set_cc_insn, unary_comparison): Use cc_set_register.
+	(*commutative_binary_comparison): Likewise.
+	(*noncommutative_binary_comparison): Likewise.
+	(adddi3, subdi3): Now define_expand.
+	(*not_scc_insn): Use cc_use_register.
+	* config/arc/arc.opt (mold-di-patterns): New options.
+	* config/arc/predicates.md (cc_set_register): New predicates.
+	(cc_use_register): Likewise.
+
+	* config/arc/arc.md (subsi3): Allow constant operand 1.
+	(subsi3_insn): Likewise.
+
+2008-06-12  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (RTX_OK_FOR_OFFSET_P): Fix typo.
+
+2008-06-09  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* genattr.c (gen_attr): Include vec.h and statistics.h.
+	* Makefile.in (INSN_ATTR_H): Add vecprim.h, vec.h and statistics.h.
+
+	* expr.c (can_move_by_pieces): Fix logic.
+
+	* rtlanal.c (walk_stores): New function, uncostified copy
+	of note_stores.
+	* rtl.h (walk_stores): Declare.
+
+2008-06-09  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* doc/tm.texi (CAN_MOVE_BY_PIECES): Document.
+	* expr.c (emit_block_move_via_movmem): No longer static.
+	(can_move_by_pieces): New argument consider_movmem.
+	Changed all callers.
+	(expand_constructor): Use can_move_by_pieces.
+	* expr.h (emit_block_move_via_movmem): Declare.
+	(can_move_by_pieces): Adjust declaration.
+	* builtins.c (expand_builtin_mempcpy): Use target.
+	Use emit_block_move_via_movmem.
+	* config/arc/arc-protos.h (arc_expand_movmem): Declare.
+	* config/arc/arc.c (force_offsettable): New function.
+	(arc_expand_movmem): Likewise.
+	* config/arc/arc.h (MOVE_MAX_PIECES): Don't define.
+	(MOVE_BY_PIECES_P, CAN_MOVE_BY_PIECES, MOVE_RATIO): Define.
+	* config/arc/arc.md (movmemsi): New pattern.
+
+2008-06-06  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_init_reg_tables): Set proper
+	arc_regno_reg_class for core regs / writable core regs.
+	* config/arc/arc.h (CONDITIONAL_REGISTER_USAGE): If any of the
+	registers in the range 32..59 has been made non-fixed, include
+	them in WRITABLE_CORE_REGS.
+	(enum reg_class, REG_CLASS_NAMES): Add WRITABLE_CORE_REGS.
+	(REG_CLASS_CONTENTS): Likewise.
+	(PREFERRED_RELOAD_CLASS): Check for WRITABLE_CORE_REGS.
+	* config/arc/arc.md (*movqi_insn): Use 'w' constraint for
+	writable core regs.
+	(*movhi_insn, movsi, *movsi_set_cc_insn, *movdi_insn): Likewise.
+	(*movsf_insn, *movdf_insn, *movsicc_insn, *movdicc_insn): Likewise.
+	(*movdicc_insn, *movsfcc_insn, *movdfcc_insn): Likewise.
+	(*zero_extendqihi2_i, *zero_extendqisi2_ac): Likewise.
+	(*zero_extendhisi2_i, *extendqisi2_ac, *extendhisi2_i): Likewise.
+	(abssi2, smaxsi3, sminsi3, subsi3_insn, *add_n, *sub_n): Likewise.
+	(*shift_and_add_insn_mixed, *shift_and_add_insn): Likewise.
+	(*shift_and_sub_insn, *bset_insn_mixed, *bset_insn): Likewise.
+	(*bxor_insn, *bclr_insn, *bmsk_insn, *bicsi3_insn, iorsi3): Likewise.
+	(negsi2, one_cmplsi2, rotrsi3, *scc_insn, *neg_scc_insn): Likewise.
+	(*not_scc_insn, *movsi_ne, *movsi_cond_exec, *add_cond_exec): Likewise.
+	(*commutative_cond_exec, *sub_cond_exec): Likewise.
+	(*noncommutative_cond_exec, flush_icache+[23479], norm): Likewise.
+	(normw, swap, *doloop_fallback, abssf2, negsf2, negsf2): Likewise.
+	(mulsi3): Rename to:
+	(*mulsi3_700).
+	(mulsi3_600): New pattern.
+	(mulsi3): New define_expand.
+	* config/arc/arc.opt (mdynamic): New option.
+	* config/arc/constraints.md ("w"): New constraint.
+
+	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Fix ARC600 / ARC700 definitions.
+
+2008-05-22  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (flush_icache+5): Fix tests.
+
+2008-05-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h [UCLIBC_DEFAULT] (STATIC_LINK_SPEC): Define;
+	default to static linking.
+	(LINK_SPEC) [UCLIBC_DEFAULT]: Use STATIC_LINK_SPEC.
+
+2008-05-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (LINK_SPEC) [UCLIBC_DEFAULT]: Pass '-shared'
+	through.
+
+2008-05-09  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config.gcc (arc-*-elf*): Add TARGET_CPU_DEFAULT_OPT to tm-defines.
+	(arc600-*-linux-uclibc): Likewise; remove USE_UCLIBC.
+	(arc*-*-linux-uclibc): Likewise.
+	(with-cpu): Recognize arc600.
+	* config/arc/arc.c: Use UCLIBC_DEFAULT instead of USE_UCLIBC.
+	* config/arc/arc.h (UCLIBC_DEFAULT): Define if not already defined.
+	Use instead of USE_UCLIBC.
+	(CPP_SPEC): Move everything from here...
+	(TARGET_CPU_CPP_BUILTINS): ... to here.
+	(CPP_SPEC): Unify.
+	(DRIVER_SELF_SPECS: Always define.  Use TARGET_CPU_DEFAULT_OPT.
+	(MULTILIB_DEFAULTS): Use TARGET_CPU_DEFAULT_OPT.
+
+2008-04-27  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* mode-classes.def (MODE_VECTOR_CC): New mode class.
+	* genmodes.c (vector_class): Support MODE_VECTOR_CC.
+	(complete_mode): Allow bytesize to have been set for MODE_CC.
+	Support MODE_VECTOR_CC.
+	(SIZED_CC_MODE): New macro.
+	(make_special_mode ): Return mode_data struct.
+	*config.gcc (mxp-arc-elf*): Set extra_objs.
+
+2008-04-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config.gcc (mxp-arc-elf*): New configuration.
+	* doc/mxp.texi: New file.
+	* config/mxp: New directory.  FIXME: details.
+	* mode-classes.def (MODE_CLASSES): Add MODE_VECTOR_PARTIAL_INT.
+	* cse.c (cse_insn): Fix loop to stop at VOIDmode.
+	* regmove.c (discover_flags_reg): Cope with failure of gen_add3_insn
+	for word_mode.
+	* genmodes.c (vector_class): Support MODE_VECTOR_PARTIAL_INT.
+	(complete_mode, emit_mode_adjustments): Likewise.
+	* target-def.h (TARGET_PRESERVE_RELOAD): Fix spelling.
+
+2008-04-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (LINK_SPEC) [USE_UCLIBC]: Add options
+	-z max-page-size=0x1000 -z common-page-size=0x1000 .
+
+2008-04-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_profile_call): Pass pointer as argument to
+	_mcount_call.
+
+2008-04-16  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (CONDITIONAL_REGISTER_USAGE): Adjust SIBCALL_REGS
+	according to call_used_regs.
+	* config/arc/gmon/mcount.c (_MCOUNT_DECL): Remove stray
+	catomic_compare_and_exchange_bool_acq call.
+	* testsuite/gcc.dg/func-ptr-prof.c: New file.
+
+2008-04-09  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (LINK_SPEC) [USE_UCLIBC]: When profiling, use
+	arclinux_prof emulation; else, use arclinux emulation.
+	(LINK_SPEC) [!USE_UCLIBC]: When profiling, use arcelf_prof emulation.
+
+2008-04-08  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config.gcc (arc600-*-linux-uclibc*): New configuration.
+	* config/arc/t-arc600-uClibc: New file.
+
+2008-04-05  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (gen_compare_reg): Swap operands for fpx
+	CC_FP_GEmode comparisons.
+	* config/arc/fpx.md (cmpsfpx_gt): Rename to:
+	(cmpfpx_gt).  Also clear n flag if z flag is set.
+	(cmpsfpx_ge): Rename to:
+	(cmpfpx_ge).  Assume swapped comparison operands.  Don't
+	setc flag if z flag is set.
+
+2008-04-04  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/dp-hack.h (df_to_usi): Write as
+	(tf_to_usi).
+
+2008-04-04  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/fp-bit.c (_fpdiv_parts): Avoid double rounding.
+	* config/arc/arc-protos.h (prepare_extend_operands): Move into part
+	guarded by RTX_CODE.
+	(arc_output_addsi, arc_expand_movmem, prepare_move_operands): Likewise.
+	* config/arc/predicates.md (proper_comparison_operator):
+	Fix CC_Cmode case.
+	* config/arc/arc.c (branch_dest): Check for reversed branch.
+	* config/arc/arc.h (LEGITIMATE_SCALED_ADDRESS_P): Add missing
+	parentheses.
+	* config/arc/arc.md (*movsi_scaled_insn): Remove pattern.
+	(movdi_insn): Add %? output specifier.
+	(movdf_insn): Likewise. Add cond attribute.
+
+	* config/arc/arc-protos.h (gen_compare_reg): Update prototype.
+	(arc_output_libcall): Declare.
+	* config/arc/predicates.md (proper_comparison_operator):
+	Add cases for floating point CCmodes, SFmode and DFmode.
+	* config/arc/ieee-754/floatunsidf.S: New file.
+	* config/arc/ieee-754/divdf3.S: Likewise.
+	* config/arc/ieee-754/orddf2.S: Likewise.
+	* config/arc/ieee-754/eqsf2.S: Likewise.
+	* config/arc/ieee-754/truncdfsf2.S: Likewise.
+	* config/arc/ieee-754/fixunsdfsi.S: Likewise.
+	* config/arc/ieee-754/divtab-arc-df.c: Likewise.
+	* config/arc/ieee-754/uneqsf2.S: Likewise.
+	* config/arc/ieee-754/adddf3.S: Likewise.
+	* config/arc/ieee-754/gtsf2.S: Likewise.
+	* config/arc/ieee-754/gedf2.S: Likewise.
+	* config/arc/ieee-754/floatsisf.S: Likewise.
+	* config/arc/ieee-754/muldf3.S: Likewise.
+	* config/arc/ieee-754/fixdfsi.S: Likewise.
+	* config/arc/ieee-754/divsf3.S: Likewise.
+	* config/arc/ieee-754/ordsf2.S: Likewise.
+	* config/arc/ieee-754/eqdf2.S: Likewise.
+	* config/arc/ieee-754/divtab-arc-sf.c: Likewise.
+	* config/arc/ieee-754/divsf3-stdmul.S: Likewise.
+	* config/arc/ieee-754/addsf3.S: Likewise.
+	* config/arc/ieee-754/uneqdf2.S: Likewise.
+	* config/arc/ieee-754/gesf2.S: Likewise.
+	* config/arc/ieee-754/gtdf2.S: Likewise.
+	* config/arc/ieee-754/mulsf3.S: Likewise.
+	* config/arc/ieee-754/floatsidf.S: Likewise.
+	* config/arc/ieee-754/fixsfsi.S: Likewise.
+	* config/arc/ieee-754/arc-ieee-754.h: Likewise.
+	* config/arc/ieee-754/extendsfdf2.S: Likewise.
+	* config/arc/arc.c (get_arc_condition_code): Add floating point cases.
+	(arc_select_cc_mode, arc_init_reg_tables): Likewise.
+	(gen_compare_reg): Likewise.  Don't take comparison operands, but
+	but result mode as parameter.  Return appropriate comparison of
+	flags register against 0.  Changed all callers.
+	(arc_output_libcall): New function.
+	* config/arc/arc.h (INSN_SETS_ARE_DELAYED): Define.
+	(INSN_REFERENCES_ARE_DELAYED, REVERSE_CONDITION): Likewise.
+	* config/arc/fpx.md (cmpsfpx_raw, cmpdfpx_raw): New patterns.
+	(cmpsfpx_gt, cmpsfpx_ge): Likewise.
+	* config/arc/dp-hack.h: New file.
+	* config/arc/fp-hack.h: New file.
+	* config/arc/arc.md (movsicc_insn): Use proper_comparison_operator.
+	(movdicc_insn, movsfcc_insn, movdfcc_insn, neg_scc_insn): Likewise.
+	(not_scc_insn, movsi_cond_exec, add_cond_exec): Likewise.
+	(commutative_cond_exec, sub_cond_exec): Likewise.
+	(noncommutative_cond_exec): Likewise.
+	(scc_insn): Use REVERSE_CONDITION.
+	(bunge, bungt, bunle, bunlt, buneq, bltgt, bordered): New patterns.
+	(bunordered, cmpsf, cmpdf, cmp_float, cmpsf_eq, cmpdf_eq): Likewise.
+	(cmpsf_gt, cmpdf_gt, cmpsf_ge, cmpdf_ge, cmpsf_uneq: Likewise.
+	(cmpdf_uneq, cmpsf_ord, cmpdf_ord, abssf2, negsf2): Likewise.
+	* config/arc/lib1funcs.asm (FUNC, ENDFUNC0, ENDFUNC): New macros.
+	(__udivsi3, __divsi3): Use FUNC / ENDFUNC.
+	Toplevel: Add code to include files from ieee-754 subdirectory.
+	* config/arc/t-arc (LIB1ASMFUNCS): Add floating-point functions.
+	(dp-bit.c): cat config/arc/dp-hack.h.
+	(fp-bit.c): cat config/arc/fp-hack.h.
+	* config/arc/arc-modes.def (CC_FP_GT, CC_FP_GE, CC_FP_ORD): New modes.
+	(CC_FP_UNEQ, CC_FPX): Likewise.
+
+2008-03-27  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/t-arc700-uClibc (TARGET_LIBGCC2_CFLAGS): Add -fPIC.
+
+2008-03-27  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (lr, sr): Enable r,r alternative.
+
+2008-03-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (DRIVER_SELF_SPECS): Change condition to !mA*.
+
+2008-03-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (DRIVER_SELF_SPECS): Change condition to !-mA.
+
+2008-03-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (movqi_insn_mixed, loadqi_insn_mixed): Remove.
+	(storeqi_insn_mixed, movhi_insn_mixed, loadhi_insn_mixed): Likewise.
+	(storehi_insn_mixed, movsi_insn_mixed, loadsi_insn_mixed): Likewise.
+	(storesi_insn_mixed): Likewise.
+	(movhi_insn): Rewrite after model of movsi pattern.
+
+2008-03-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_init): Fix default setting of processor
+	type in target_flags.
+
+2008-03-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* reload.c (push_secondary_reload): Add missing break.
+	* config/arc/arc.h (LEGITIMIZE_RELOAD_ADDRESS): Define.
+
+2008-03-18  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (commutative_binary_comparison): Fix position
+	of '%'.
+
+2008-03-17  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (doloop_begin_i): Fix offset for loop end.
+
+2008-03-06  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_encode_section_info): Use
+	targetm.binds_local_p instead of TREE_PUBLIC.
+
+2008-03-05  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/lib1funcs.asm (clzsi2) [!__ARC700__,!__A4__]:
+	Re-implement to avoid undefined behaviour and increase performance.
+
+	* config/arc/arc.h (CONDITIONAL_REGISTER_USAGE) <TARGET_ARC700>:
+	Set fixed_regs[LP_COUNT].
+
+2008-03-05  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (OVERRIDE_OPTIONS): Disable small data for -fPIC.
+	(LEGITIMATE_SCALED_ADDRESS_P): For -fPIC, the only allowed constant
+	bases are CONST_INT.
+
+2008-02-27  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/constraints.md ("Cca", C2a): Check absolute value against
+	maximum scaled constant.
+
+2008-02-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/crti.asm, config/arc/crtn.asm: Fix stack layout.
+
+2008-02-08  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/predicates.md (long_immediate_loadstore_operand):
+	Also recognize that non-sdata SYMBOL_REFS require long immediates.
+
+2008-02-07  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_print_operand_address): Always add @sda
+	for small data symbols.  Use recursion.
+	(prepare_move_operands): Also do small data address checks for
+	floating point point modes.
+	(prepare_extend_operands): New function.
+	* config/arc/arc-protos.h (prepare_extend_operands): Declare.
+	* config/arc/arc.md (zero_extendqihi2, zero_extendhisi2): Rename to:
+	(*zero_extendqihi2_i, *zero_extendhisi2_i).
+	(zero_extendqihi2, zero_extendhisi2): New expanders.
+	(zero_extendqisi2, extendqisi2): Use prepare_extend_operands.
+	(extendqihi2, extendhisi2): Rename to:
+	(*extendqihi2_i, *extendhisi2_i).
+	(extendqihi2i, extendhisi2): New expanders.
+
+2008-02-07  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (doloop_begin_i): If loop is too large for lp
+	instruction, emit code sequence using sr to set LOOP_START / LOOP_END
+	appropriately.
+
+2008-01-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (RTX_OK_FOR_OFFSET_P): Before applying scaling,
+	check that offset is aligned.
+
+2008-01-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* resource.c (find_dead_or_set_registers): Don't consider sets inside
+	a COND_EXEC to kill the value in a register.
+
+2008-01-10  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/constraints.md ("Usc"): Reject small data.
+
+	* config/arc/arc.md (movsi_insn): Fix predicate test for valid constant
+	store address.
+
+2008-01-08  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* version.c (VERSUFFIX): Bump date.
+
+2007-12-06  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc-protos.h (arc_asm_output_aligned_decl_common):
+	Change to:
+	(arc_asm_output_aligned_decl_local).
+	* config/arc/arc.c: Likewise.
+	* config/arc/arc.h (OPTIMIZATION_OPTIONS): Initialize flag_no_common
+	as -1.
+	(OVERRIDE_OPTIONS): If flag_no_common is still -1,  initialize according
+	to TARGET_NO_SDATA_SET.
+	(ASM_OUTPUT_ALIGNED_DECL_COMMON): Don't define.
+	(ASM_OUTPUT_ALIGNED_DECL_LOCAL): Use arc_asm_output_aligned_decl_local.
+
+2007-11-29  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (unspec_prof_hash): Only hash s1 string for
+	SYMBOL_REF.
+	(unspec_prof_htab_eq): Use rtx_equal_p.
+	* config/arc/arc.md (call): Use emit_call_insn for profiling call.
+	(call_value): Likewise.
+
+2007-11-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	target/27758:
+	* config/arc/arc.c (arc_output_pic_addr_const) <case SYMBOL_REF>:
+	Use output_addr_const instead of assemble_name.
+
+2007-11-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (movsi_insn): Add 'S' modifier for operand 1 of
+	alternative 10.
+
+2007-11-01  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc-protos.h (prepare_move_operands): Declare.
+	* config/arc/predicates.md (move_dest_operand): Reject scaled index.
+	(shouldbe_register_operand): New predicate.
+	* config/arc/arc.c (arc_address_cost): Scaled index and plain index
+	have low cost.
+	(arc_print_operand): Print ".as" for scaled index.
+	(arc_print_operand_address): Decompose scaled index.
+	base can be CONST_INT.
+	(arc_rewrite_small_data_1): If expression would get too complex,
+	force small data address into register.
+	(arc_rewrite_small_data): Pass &op as data.
+	(prepare_move_operands): New function.
+	* config/arc/arc.h (RTX_OK_FOR_INDEX_P): Enable.
+	(LEGITIMATE_OFFSET_ADDRESS_P): Add INDEX parameter.  Changed all users.
+	Require MODE size to be no larger than 4.
+	(LEGITIMATE_SCALED_ADDRESS_P): Define.
+	(GO_IF_LEGITIMATE_ADDRESS): Use LEGITIMATE_SCALED_ADDRESS_P.
+	(GO_IF_MODE_DEPENDENT_ADDRESS): Only say REG-REG PRE/POST_MODIFY
+	is mode dependent.
+	Indexed addresses are mode dependent.
+	* config/arc/arc.md (movqi, movhi, movsi, movdf): Use
+	prepare_move_operands and prepare_move_operands.
+	(movdi): Use move_dest_operand.
+	(doloop_begin_i): Don't align if no lp instruction is used.
+	(doloop_end_i): Now define_insn_and_split.
+	* arc.opt (mindexed-load, mauto-modify-reg): New options.
+
+2007-10-30  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* Makefile.in (options.c): Depend on $(INSN_ATTR_H).  Include it.
+	* config/arc/arc.h: Don't include insn-attr.h.
+
+2007-10-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (doloop_fallback_m): Avoid unaligning code.
+
+2007-10-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (doloop_begin_i): Add ????X alternative.
+	(doloop_end_i): Add m alternative.
+	(doloop_fallback_m): New pattern.
+	* config/arc/arc.c (arc_reorg): Fix up doloop_end_i instructions
+	that ended up with memory.  When a doloop_begin_i doesn't have the
+	right register, try to find it in a preceding move.
+
+2007-10-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_init): Provide default for arc_tune and
+	arc_multcost.
+	(arc_rtx_costs): Use arc_multcost.
+	* arc.md: Move scheduling description to...
+	* config/arc/arc700.md: ... here.  New file.
+	* config/arc/arc600.md: New file.  Tie ARC700 scheduling description
+	to tune_arc700 attribute.
+	(mov_set_cc_insn, compare_insn): Combine to:
+	(compare_700).
+	(branch_700): New reservation.  Create new bypass.
+	(multi_SI): Replace with:
+	(multi_xmac, multi_std): New reservations.  Adjust bypasses.
+	* arc.md: Update Copyright blurb.
+	Include new files.
+	(tune, tune_arc700): New attributes.
+	(movsi_set_cc_insn): Set type to "compare".
+	* config/arc/arc.h: Protect against multiple inclusion.
+	Include "insn-attr.h".
+	* arc.opt (multcost, mtune): New options.
+
+2007-10-23  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (TRAMPOLINE_ADJUST_ADDRESS): Properly define.
+	* config/arc/arc.md (sub_cond_exec): Add mode to set source.
+	(doloop_begin_i): Add mode to operand 0.
+
+	* config/arc/divtab-arc700.c (main): Emit reverse ordered table.
+	* config/arc/lib1funcs.asm (__divsi3): Use pcl as base addressing
+	register with a negative index.
+
+	* config/arc/predicates.md (cc_register): Now a special_predicate.
+	(_2_4_8_operand): New predicate.
+	* config/arc/arc.c (arc_rtx_costs): Add description for add_n and
+	sub_n costs.
+	* config/arc/arc.md (add_n, sub_n): New patterns.
+	(shift_and_add_insn_mixed): Type shift.
+
+	* combine.c (gen_lowpart_for_combine): Use omode when generating
+	clobber.
+
+2007-10-18  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc-protos.h (arc_save_restore): Don't declare.
+	* config/arc/predicates.md (p2_immediate_operand): New predicate.
+	(zn_compare_operator, commutative_operator): Likewise.
+	(noncommutative_operator, unary_operator): Likewise.
+	(move_dest_operand): Reject PRE_MODIFY / POST_MODIFY with
+	register-register increment.
+	(proper_comparison_operator): Replace.
+	* config/arc/arc.c ("tm-constrs.h", "reload.h"): #include.
+	(void arc_init): Don't set arc_punct_chars['~'].
+	(get_arc_condition_code): Take mode into account.
+	(arc_select_cc_mode): Replace.
+	(arc_init_reg_tables): Update CC*mode handling.
+	(arc_address_cost): Replace costs.
+	(arc_save_restore): New arguments parity and first_offset.
+	(arc_output_function_prologue): Emit short instructions.
+	Fold stack adjustments into pre-modify addressing.
+	(arc_output_function_epilogue): Likewise.
+	Don't emit a nop when delay slot remains unfilled.
+	If the delay insn is short, make jump long.
+	(output_scaled): New variable.
+	(arc_print_operand): Remove '~' case.
+	Add support for scaled address offsets.
+	Add 'B' and 'O' cases.
+	(record_cc_ref): Remove COND_SET_ZNC handling.
+	(arc_final_prescan_insn): Support -misize.
+	(arc_reorg): Handle case when end of zero-overhead loop has been
+	duplicated.
+	When generating brcc insn, check that first comparison operand is
+	a register.
+	(arc_output_addsi): New function.
+	* config/arc/arc.h (OPTIMIZATION_OPTIONS): Set default for
+	arc_size_opt_level.
+	(OVERRIDE_OPTIONS): When (arc_size_opt_level == 3), set optimize_size.
+	(DATA_ALIGNMENT): Don't add extra alignment when arc_size_opt_level
+	is 3.
+	(SMALL_INT_RAGE): New macro.
+	(HAVE_PRE_MODIFY_REG, HAVE_POST_MODIFY_REG): Define.
+	(RTX_OK_FOR_OFFSET_P): Allow scaled indices.
+	(LEGITIMATE_OFFSET_ADDRESS_P): Allow register-register
+	PRE_MODIFY / POST_MODIFY.
+	(JUMP_ALIGN, MOVE_MAX_PIECES): Define.
+	(LABEL_ALIGN_AFTER_BARRIER, LOOP_ALIGN): Set to JUMP_ALIGN.
+	* config/arc/arc.md (mov_set_cc_insn,core_insn, compare_insn):
+	Align write_port allocation with other insn_reservation definitions.
+	(atribute cond): Remove set_znc value.  Changed all users.
+	(movqi_insn, movsi_insni, zero_extendqihi2): Rebalance alternatives.
+	(zero_extendqisi2_aci, addsi3_mixed, subsi3_insn): Likewise.
+	(cmpsi_cc_insn_mixed): Likewise.
+	(movsi_set_cc_insn): Match both CC_ZNmode and CC_Zmode.
+	(*movsi_insn_mixed): Changed another 'r' to 'c'.
+	(unary_comparison, commutative_binary_comparison): New patterns.
+	(noncommutative_binary_comparison, (cmpsi_cc_zn_insn): Likewise.
+	(cmpsi_cc_z_insn, cmpsi_cc_c_insn, movsi_ne): Likewise.
+	(movsi_cond_exec, add_cond_exec, commutative_cond_exec): Likewise.
+	(sub_cond_exec, noncommutative_cond_exec): Likewise.
+	(movdi_insn, movdf_insn): Add suport for scaled offsets.
+	(zero_extendhisi2, extendhisi2): Likewise.
+	(movsicc_insn, cbranchsi4_scratch): Tweak alternatives.
+	(zero_extendhisi2): Add suport for scaled offsets.
+	(addsi3_mixed): Use arc_output_addsi.
+	(cmpsi_ccznc_insn_a4, cmpsi_cczn_insn_mixed): Deleted.
+	(cmpsi_cczn_insn): Likewise.
+	(scc_insn): Now a define_split.
+	(branch_insn, rev_branch_insn, call_via_label): Don't output nops.
+	(call_prof, call_value_via_label, call_value_prof): Likewise.
+	(sibcall_value_insn, sibcall_prof, sibcall_value_prof): Lkewise.
+	(flush_icache+3): Use CC_ZN_mode.
+	(flush_icache+4): Likewise.  Check that operand[4] dies.
+	(sibcall_insn): Add short alternative.
+	(doloop_begin_i): Align loop insn.
+	* config/arc/constraints.md (C_0, Usc, Rcq, Rs5): New constraints.
+	(U): Rename to:
+	(Usd).
+	* config/arc/arc.opt (-malign-loops,-mno-align-loops): Remove.
+	(msize-level=): Add.
+	* config/arc/arc-modes.def (CCZNC, CCZN): Remove.
+	(CC_ZN, CC_Z, CC_C): Add.
+
+2007-10-15  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* tree-ssa-loop-ch.c (copy_loop_headers): Suppress one case of
+	counter-productive copying.
+	* config/arc/arc-protos.h (arc_cond_exec_p): Declare.
+	(arc_secondary_reload, arc_register_move_cost): Likewise.
+	* config/arc/predicates.md (move_double_src_operand): Allow
+	auto inc/dec.
+	* config/arc/arc.c (arc_invalid_within_doloop): New function.
+	(arc_cond_exec_p, arc_secondary_reload): Likewise.
+	(arc_register_move_cost): Likewise.
+	(TARGET_INVALID_WITHIN_DOLOOP): Define.
+	(arc_print_operand): Handle POST_INC / POST_DEC / PRE_MODIFY /
+	POST_MODIFY.
+	(arc_reorg): Fix up zero-overhead loops.
+	* config/arc/arc.h (FIRST_PSEUDO_REGISTER): Now 146.
+	(FIXED_REGS): Make LP_COUNT non-fixed.
+	(FIXED_REGS, CALL_USED_REGS, REG_CLASS_CONTENTS): Add LP_START & LP_END.
+	Make LP_COUNT non-fixed.
+	(PREFERRED_RELOAD_CLASS): Substitute GENERAL_REGS for CORE_REGS.
+	(HAVE_POST_INCREMENT, HAVE_POST_DECREMENT): Define.
+	(HAVE_PRE_MODIFY_DISP, HAVE_POST_MODIFY_DISP): Likewise.
+	(SECONDARY_RELOAD_CLASS): Likewie.
+	(GO_IF_LEGITIMATE_ADDRESS, GO_IF_MODE_DEPENDENT_ADDRESS): Add handling
+	for POST_DEC / POST_INC / PRE_MODIFY / POST_MODIFY.
+	(REGISTER_NAMES): Add lp_start and lp_end.
+	* config/arc/arc.md (UNSPEC_LP, LP_COUNT, LP_START, LP_END): New
+	constants.
+	(type): Add loop.
+	(in_delay_slot): No loops.
+	(entire file): Use "c" constraint.
+	(movdi_insn): Handle POST_INC / POST_DEC / PRE_MODIFY / POST_MODIFY.
+	(movdf_insn): Likewise.
+	(addsi3_mixed): Make conditional execution take precedence over
+	short insns.  Improve instruction selection.
+	(subsi3_insn): Add rsub support.
+	(doloop_begin, doloop_begin_i, doloop_end, doloop_end_i): New patterns.
+	(doloop_fallback): Likewise.
+	* config/arc/constraints.md (Cca, C2a): New constraints.
+	* config/arc/arc.opt (mexperimental-mask): New option.
+
+2007-10-08  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/t-arc-newlib (MULTILIB_OPTIONS): Remove mA4.
+	(MULTILIB_DIRNAMES): Remove arc.
+	* config/arc/arc.c (arc_init <!USE_UCLIB>): Default is A5.
+	* config/arc/arc.h (CPP_SPEC): Default to __A5__.
+	(ASM_SPEC): Default is -mA5.
+	(MULTILIB_DEFAULTS): Set to { "mA5", "EL" } .
+
+2007-10-03  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/gmon/gmon.c: #include <stddef.h> .
+
+2007-10-01  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* arc.h (LOCAL_ALIGNMENT): Define.
+
+2007-10-01  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config.gcc (arc-*-elf*): Set tmake_file.
+	(arc*-*-linux-uclibc*): Add arc/t-arc to tmake_file.
+	* config/arc/t-arc: Use $@ / $<.
+	(CRTSTUFF_T_CFLAGS_S): Remove.
+	($(T)prof-freq-stub.o, $(T)dcache_linesz.o): New rules.
+	($(T)profil-uclibc.o): Likewise.
+	($(T)libgmon.a): Use $(PROFILE_OSDEP).
+	(EXTRA_MULTILIB_PARTS): Remove mcount.o, gmon.o and prof-freq.o.
+	(MULTILIB_OPTIONS, MULTILIB_DIRNAMES ULTILIB_MATCHES) Move to:
+	* config/arc/t-arc-newlib: New file.
+	(PPROFILE_OSDEP): Define.
+	* config/arc/t-arc700-uClibc: Remove pieces redundant with t-arc.
+	(PROFILE_OSDEP): Define.
+	* config/arc/arc.h (STARTFILE_SPEC) <USE_UCLIBC>: Remove references
+	to gcrt1.o.
+	(ENDFILE_SPEC) <USE_UCLIBC> : Use crtgend.o for profiling.
+	(LIB_SPEC) <USE_UCLIBC>: Use -lgmon -u profil --defsym __profil=profil
+	for profiling.
+	* config/arc/gmon/gmon.c (__profile_frequency): Remove.
+	* config/arc/gmon/machine-gmon.h (_mcount_tailcall): Don't declare.
+	* config/arc/gmon/profil.S (__profile_timer_cycles): New weak symbol.
+	(__profil): Use it.  Fix FUNC usage.  Streamline ARC700 cache
+	initialization a bit.
+	(__dcache_linesz): Move to:
+	* config/arc/gmon/dcache_linesz.S: New file.
+	* config/arc/gmon/prof-freq-stub.S: New file.
+
+2007-10-01  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/predicates.md (call_address_operand): Check CONSTANT_P
+	before using LEGITIMATE_CONSTANT_P.
+
+2007-09-28  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (DEFAULT_NO_SDATA): Define, set of USE_UCLIBC.
+	(TARGET_DEFAULT_TARGET_FLAGS): Use it.
+
+2007-09-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.md (sibcall_insn): Use call_address_operand.
+	(sibcall_value_insn, sibcall_prof, sibcall_value_prof): Likewise.
+
+2007-09-11  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c: Back out patches for callee-based profiling.
+	* config/arc/arc.h: Likewise.
+
+	* config/arc/arc-protos.h (arc_profile_call): Declare.
+	* config/arc/arc.c (arc_initialize_trampoline): Generate oddly aligned
+	trampoline.
+	(write_profile_sections, unspec_prof_hash): New functions.
+	(unspec_prof_htab_eq, arc_profile_call): Likewise.
+	(arc_legitimate_constant_p): Allow UNSPEC_PROF.
+	(arc_function_ok_for_sibcall): Allow long & indirect calls.
+	Fix interrupt function check.
+	(arc_reorg): Fix up UNSPEC_PROF and emit special profile sections.
+	* config/arc/arc.h (ENDFILE_SPEC) <!USE_UCLIBC>: Add crtgend.
+	(TRAMPOLINE_SIZE): Bump up to 20.
+	(TRAMPOLINE_ADJUST_ADDRES): Define.
+	* config/arc/gmon/gmon.c: Adjust for caller-based profiling.
+	* config/arc/gmon/mcount.c, gmon/machine-gmon.h: Likewise.
+	* config/arc/gmon/profil.S, config/arc/gmon/sys/gmon.h: Likewise.
+	* config/arc/arc.md (UNSPEC_PROF): Define.
+	(call, call_value, sibcall, sibcall_value): Add profiling code.
+	(call_prof, call_value_prof, sibcall_prof): New patterns.
+	(sibcall_value_prof): Likewise.
+	(sibcall_insn, sibcall_value_insn): Enable long / indirect sibcalls.
+	* config/arc/crtgend.asm: New file.
+	* config/arc/lib1funcs.asm (__udivsi3, __umodsi3):
+	Add .__arc_profile_forward entries.
+	* config/arc/t-arc (crtgend.o) New target.
+	(mcount.o): Remove -fcall-saved-r11.
+	(gmon.o): Add -mno-sdata
+	(EXTRA_MULTILIB_PARTS): Add crtgend.o
+	* config/arc/constraints.md (Cbr): New constraint.
+	* config/arc/crtg.asm: Add Start labels and alignment for
+	profile sections.
+
+	* config/arc/arc.h (enum reg_class): Add SIBCALL_REGS and CORE_REGS.
+	(REG_CLASS_NAMES, REG_CLASS_CONTENTS): Likewise.
+	* contraints.md (c): Change to:
+	(Rgp).
+	(c, Rsc): New constraints.
+	* config/arc/arc.md (iorsi3): Use 'c' constraint.
+
+2007-09-07  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (TARGET_VERSION): Fix spelling.
+	* config/arc/t-arc700-uClibc (LIB1ASMFUNCS): Remove excess space.
+
+	* config/arc/lib1funcs.asm (__modsi3): Make Ldivstart a local label.
+
+	* config/arc/arc.h (STARTFILE_SPEC): Add -mkernel option.
+
+	* config/arc/gmon/gmon.c (__profile_frequency): Define nonzero.
+	(__monstartup): On error, clear p->tolimit.
+	* config/arc/gmon/mcount.c (_MCOUNT_DECL): Don't use
+	catomic_compare_and_exchange_bool_acq.
+	* config/arc/gmon/machine-gmon.h: Don't use __builtin_frame_address.
+	* config/arc/gmon/profil.S (__profil): Fix vector initialization.
+	Enable interrrupts.
+	(__profil_irq): Fix register assignment.  Make j.f explicit.
+
+2007-08-29  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/t-arc700-uClibc (CRTSTUFF_T_CFLAGS): Add -mno-sdata .
+	(MULTILIB_EXTRA_OPTS): Add mno-sdata .
+
+2007-08-23  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (LINK_SPEC) [!USE_UCLIBC]: Use -marcelf.
+
+	* config/arc/arc.h (STARTFILE_SPEC) [!USE_UCLIBC] [pg|p]: Add crtg.o.
+	(LIB_SPEC) [!USE_UCLIBC]  [pg|p]: Add -lgmon.
+	(REG_CLASS_CONTENTS): Add blink to GENERAL_REGS.
+	(LARGE_INT): Fix cast type.
+	(FUNCTION_PROFILER): Provide expansion.
+	(PROFILE_BEFORE_PROLOGUE): Define.
+	(NO_PROFILE_COUNTERS): Define.
+	* config/arc/arc.c (MUST_SAVE_RETURN_ADDR): Take
+	current_function_profile into account.
+	(arc_compute_frame_size): Don't use MUST_SAVE_RETURN_ADDR as lvalue.
+	arc_output_function_prologue): Don't push blink if it has already
+	been pushed by the profiling code.
+	* config/arc/gmon: New directory, containing atomic.h (empty),
+	auxreg.h, gmon.c, machine-gmon.h, mcount.c, prof-freq.c, profil.S,
+	sys/gmon.h, and sys/gmon_out.h .
+	* config/arc/t-arc (crtg.o, mcount.o, gmon.o, prof-freq.o): New rules.
+	(profil.o, libgmon.a): Likewise.
+	(EXTRA_MULTILIB_PARTS): Add mcount.o, gmon.o, prof-freq.o, libgmon.a
+	and crtg.o .
+	* config/arc/asm.h, config/arc/crtg.asm: New files.
+	
+
+2007-05-29  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_decl_anon_ns_mem_p): New function, copied from
+	cp/tree.c .
+	(arc_in_small_data_p): Use default_binds_local_p_1 and
+	arc_decl_anon_ns_mem_p to determine if a symbol binds locally.
+
+2007-05-18  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/predicates.md (move_src_operand): Uncomment handling of
+	CONST.
+
+	* config/arc/arc.h (ASM_SPEC): Supply -mEA also for -mA7.
+
+2007-05-14  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* version.c: Add leading space.  Add suffix to denote ARC changes.
+
+2007-05-14  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	Revert this patch:
+	2007-04-19  J"orn Rennecke  <joern.rennecke@arc.com>
+	  * output.h (get_unnamed_section_ggc): New macro.
+	  * varasm.c (init_varasm_once): Use it.
+
+	Instead, make section xmalloced:
+	* c-pch.c (c_common_write_pch): Call pickle_in_section and
+	unpickle_in_section.
+	(c_common_read_pch): Call unpickle_in_section.
+	* varasm.c (unnamed_sections): Remove GTY marker.
+	(get_unnamed_section, get_noswitch_section): xmalloc section.
+	(pickled_in_section): New static variable.
+	(pickle_in_section, unpickle_in_section): New functions.
+	* output.h (struct unnamed_section): Mark as GTY((skip)).
+	(union section): Mark members unnamed_section and noswitch_section
+	as GTY((skip)).
+	(text_section, data_section, readonly_data_section): Remove GTY marker.
+	(sdata_section, ctors_section, dtors_section, bss_section): Likewise.
+	(sbss_section, tls_comm_section, comm_section): Likewise.
+	(lcomm_section, bss_noswitch_section, in_section): Likewise.
+	(pickle_in_section, unpickle_in_section): Declare.
+
+	* config/arc/arc.c (arc_function_value): Mark unsignedp with
+	ATTRIBUTE_UNUSED.
+
+2007-04-26  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc-protos.h (arc_function_value): Declare.
+	* config/arc/arc.h (FUNCTION_VALUE): Don't define.
+	* config/arc/arc.c (TARGET_FUNCTION_VALUE): Define.
+	(arc_function_value): New function.
+
+2007-04-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	PR 20375
+	* config/arc/arc.c (arc_setup_incoming_varargs): Don't abort on
+	BLKmode.  Use arc_function_arg_advance.
+
+2007-04-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (TARGET_MUST_PASS_IN_STACK):
+	Set to must_pass_in_stack_var_size.
+
+2007-04-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Add __big_endian__ .
+	(CPP_SPEC): Remove __big_endian__ .
+	(ASM_SPEC, LINK_SPEC): For -mbig-endian, provide -EB.
+	* config/arc/t-arc (MULTILIB_MATCHES): Add EB=mbig-endian .
+
+2007-04-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config/arc/arc.c (arc_function_arg_advance): Only use
+	ROUND_ADVANCE_CUM if the argument has a non-zero size.
+
+2007-04-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	Fix profile based feedback:
+	* config.gcc (arc-*-elf*): Don't use config/svr4.h.
+	* config/arc/arc.h: Don't include config/svr4.h.
+
+2007-04-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* longlong.h (__arc__): For __ARC700__, define umul_ppmm, UMUL_TIME,
+	__umulsidi3, count_leading_zeros, COUNT_LEADING_ZEROS_0 .
+	(COUNT_TRAILING_ZEROS_0): Define this according to
+	COUNT_LEADING_ZEROES_0 when defining count_trailing_zeros using
+	count_leading_zeroes.
+	* libgcc2.c (__ffsSI2): Use COUNT_TRAILING_ZEROS_0.
+
+	* config/arc/divtab-arc700.c: New file.
+
+	Fix nested functions:
+	* gcc/config/arc/arc.c (emit_store_direct): New function.
+	(arc_initialize_trampoline): Initialize trampoline.
+	* gcc/config/arc/arc.h (TRAMPOLINE_TEMPLATE): Disable definition
+	which uses the wrong architecture and ABI.
+	(TRAMPOLINE_ALIGNMENT): Define.
+	(INITIALIZE_TRAMPOLINE): Use arc_initialize_trampoline.
+
+	Fix stdarg argument passing:
+	* gcc/config/arc/arc.c (arc_va_arg, arc_pass_by_reference): Pass
+	variable size and addressable type by reference.
+	(arc_arg_partial_bytes): Adjust CUM according to alignment of argument.
+	(arc_function_arg, arc_function_arg_advance): Likewise.
+	* gcc/config/arc/arc.h (ROUND_ADVANCE_CUM): Round.
+
+	* gcc/config/arc/arc.h (TARGET_CPU_CPP_BUILTINS): Define
+	__BIG_ENDIAN__ or __LITTLE_ENDIAN__, as the case might be.
+
+	*  gcc/config/arc/arc.h (ASM_SPEC): For mARC700, pass -mEA.
+
+2007-04-19  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* output.h (get_unnamed_section_ggc): New macro.
+	* varasm.c (init_varasm_once): Use it.
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 097159c6457..1415d95c4eb 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -849,8 +849,11 @@ DDG_H = ddg.h sbitmap.h $(DF_H)
 GCC_H = gcc.h version.h
 GGC_H = ggc.h gtype-desc.h statistics.h
 TIMEVAR_H = timevar.h timevar.def
+# insn-attr.h uses insn-addr.h, which uses vecprim.h, which requires vec.h
+# and statistics.h.
 INSN_ATTR_H = insn-attr.h $(INSN_ADDR_H) $(srcdir)/varray.h
-INSN_ADDR_H = $(srcdir)/insn-addr.h vecprim.h
+INSN_ADDR_H = $(srcdir)/insn-addr.h $(srcdir)/vecprim.h \
+  $(srcdir)/vec.h $(srcdir)/statistics.h
 C_COMMON_H = c-common.h $(SPLAY_TREE_H) $(CPPLIB_H) $(GGC_H)
 C_PRAGMA_H = c-pragma.h $(CPPLIB_H)
 C_TREE_H = c-tree.h $(C_COMMON_H) $(TOPLEV_H) $(DIAGNOSTIC_H)
@@ -1995,9 +1998,10 @@ s-options: $(ALL_OPT_FILES) Makefile $(srcdir)/opt-gather.awk
 	$(SHELL) $(srcdir)/../move-if-change tmp-optionlist optionlist
 	$(STAMP) s-options
 
-options.c: optionlist $(srcdir)/opt-functions.awk $(srcdir)/optc-gen.awk
-	$(AWK) -f $(srcdir)/opt-functions.awk -f $(srcdir)/optc-gen.awk \
-	       -v header_name="config.h system.h coretypes.h tm.h" < $< > $@
+options.c: optionlist $(srcdir)/opt-functions.awk $(srcdir)/optc-gen.awk \
+    $(INSN_ATTR_H)
+	$(AWK) -f $(srcdir)/opt-functions.awk -f $(srcdir)/optc-gen.awk -v \
+	 header_name="config.h system.h coretypes.h tm.h insn-attr.h" < $< > $@
 
 options.h: s-options-h ; @true
 s-options-h: optionlist $(srcdir)/opt-functions.awk $(srcdir)/opth-gen.awk
@@ -2290,7 +2294,8 @@ tree-ssa-loop-ivopts.o : tree-ssa-loop-ivopts.c $(TREE_FLOW_H) $(CONFIG_H) \
    output.h $(DIAGNOSTIC_H) $(TIMEVAR_H) $(TM_H) coretypes.h $(TREE_DUMP_H) \
    tree-pass.h $(GGC_H) $(RECOG_H) insn-config.h $(HASHTAB_H) $(SCEV_H) \
    $(CFGLOOP_H) $(PARAMS_H) langhooks.h $(BASIC_BLOCK_H) hard-reg-set.h \
-   tree-chrec.h $(VARRAY_H) tree-affine.h pointer-set.h $(TARGET_H)
+   tree-chrec.h $(VARRAY_H) tree-affine.h pointer-set.h $(TARGET_H) \
+   gt-tree-ssa-loop-ivopts.h
 tree-affine.o : tree-affine.c tree-affine.h $(CONFIG_H) pointer-set.h \
    $(SYSTEM_H) $(RTL_H) $(TREE_H) $(TM_P_H) hard-reg-set.h $(GIMPLE_H) \
    output.h $(DIAGNOSTIC_H) $(TM_H) coretypes.h $(TREE_DUMP_H) $(FLAGS_H)
@@ -3319,6 +3324,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
   $(srcdir)/tree-iterator.c $(srcdir)/gimplify.c \
   $(srcdir)/tree-chrec.h \
   $(srcdir)/tree-scalar-evolution.c \
+  $(srcdir)/tree-ssa-loop-ivopts.c \
   $(srcdir)/tree-ssa-operands.h \
   $(srcdir)/tree-profile.c $(srcdir)/tree-nested.c \
   $(srcdir)/varpool.c \
diff --git a/gcc/builtins.c b/gcc/builtins.c
index 745a125fe70..0d222cd80c5 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -3507,7 +3507,8 @@ expand_builtin_mempcpy_args (tree dest, tree src, tree len, tree type,
 
       if (GET_CODE (len_rtx) == CONST_INT
 	  && can_move_by_pieces (INTVAL (len_rtx),
-				 MIN (dest_align, src_align)))
+				 MIN (dest_align, src_align),
+				 0))
 	{
 	  dest_mem = get_memory_rtx (dest, len);
 	  set_mem_align (dest_mem, dest_align);
@@ -3515,12 +3516,30 @@ expand_builtin_mempcpy_args (tree dest, tree src, tree len, tree type,
 	  set_mem_align (src_mem, src_align);
 	  dest_mem = move_by_pieces (dest_mem, src_mem, INTVAL (len_rtx),
 				     MIN (dest_align, src_align), endp);
-	  dest_mem = force_operand (XEXP (dest_mem, 0), NULL_RTX);
+	  dest_mem = force_operand (XEXP (dest_mem, 0), target);
 	  dest_mem = convert_memory_address (ptr_mode, dest_mem);
 	  return dest_mem;
 	}
+      else
+	{
+	  unsigned int align = MIN (dest_align, src_align);
 
-      return NULL_RTX;
+	  dest_mem = get_memory_rtx (dest, len);
+	  set_mem_align (dest_mem, dest_align);
+	  src_mem = get_memory_rtx (src, len);
+	  set_mem_align (src_mem, src_align);
+	  if (!emit_block_move_via_movmem (dest_mem, src_mem, len_rtx, align,
+					   0, -1))
+	    return NULL_RTX;
+	  dest_mem = XEXP (dest_mem, 0);
+	  if (endp)
+	    dest_mem = gen_rtx_PLUS (GET_MODE (dest_mem), dest_mem,
+				     (endp == 2
+				      ? plus_constant (len_rtx, -1) : len_rtx));
+	  dest_mem = convert_memory_address (ptr_mode,
+					     force_operand (dest_mem, target));
+	  return dest_mem;
+	}
     }
 }
 
diff --git a/gcc/c-pch.c b/gcc/c-pch.c
index b4f70506e40..f79d4736721 100644
--- a/gcc/c-pch.c
+++ b/gcc/c-pch.c
@@ -208,8 +208,10 @@ c_common_write_pch (void)
   if (fseek (asm_out_file, 0, SEEK_END) != 0)
     fatal_error ("can%'t seek in %s: %m", asm_file_name);
 
+  pickle_in_section ();
   gt_pch_save (pch_outfile);
   cpp_write_pch_state (parse_in, pch_outfile);
+  unpickle_in_section ();
 
   if (fseek (pch_outfile, 0, SEEK_SET) != 0
       || fwrite (get_ident (), IDENT_LENGTH, 1, pch_outfile) != 1)
@@ -418,6 +420,7 @@ c_common_read_pch (cpp_reader *pfile, const char *name,
   cpp_prepare_state (pfile, &smd);
 
   gt_pch_restore (f);
+  unpickle_in_section ();
 
   if (cpp_read_state (pfile, name, f, smd) != 0)
     {
diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index 37bf9937bfa..359aa25464f 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -496,7 +496,11 @@ enum li_flags
   LI_INCLUDE_ROOT = 1,		/* Include the fake root of the loop tree.  */
   LI_FROM_INNERMOST = 2,	/* Iterate over the loops in the reverse order,
 				   starting from innermost ones.  */
-  LI_ONLY_INNERMOST = 4		/* Iterate only over innermost loops.  */
+  LI_ONLY_INNERMOST = 4,	/* Iterate only over innermost loops.  */
+  LI_REALLY_FROM_INNERMOST = 8	/* Iterate over the loops such that all child
+				   and nephew loops are visited first, i.e.
+				   the size of the loop father can be estimated
+				   looking at its child loops.  */
 };
 
 /* The iterator for loops.  */
@@ -530,9 +534,10 @@ fel_next (loop_iterator *li, loop_p *loop)
 static inline void
 fel_init (loop_iterator *li, loop_p *loop, unsigned flags)
 {
-  struct loop *aloop;
-  unsigned i;
+  struct loop *aloop, *floop;
+  unsigned i, j;
   int mn;
+  int visit_lim;
 
   li->idx = 0;
   if (!current_loops)
@@ -542,8 +547,9 @@ fel_init (loop_iterator *li, loop_p *loop, unsigned flags)
       return;
     }
 
-  li->to_visit = VEC_alloc (int, heap, number_of_loops ());
   mn = (flags & LI_INCLUDE_ROOT) ? 0 : 1;
+  visit_lim = number_of_loops () - mn;
+  li->to_visit = VEC_alloc (int, heap, visit_lim);
 
   if (flags & LI_ONLY_INNERMOST)
     {
@@ -553,6 +559,27 @@ fel_init (loop_iterator *li, loop_p *loop, unsigned flags)
 	    && aloop->num >= mn)
 	  VEC_quick_push (int, li->to_visit, aloop->num);
     }
+  else if (flags & LI_REALLY_FROM_INNERMOST)
+    {
+      VEC_safe_grow_cleared (int, heap, li->to_visit, visit_lim);
+      floop = current_loops->tree_root;
+      if (!mn)
+	VEC_replace (int, li->to_visit, --visit_lim, floop->num);
+      for (i = visit_lim;;)
+	{
+	  for (aloop = floop->inner; aloop; aloop = aloop->next)
+	    i--;
+	  for (aloop = floop->inner, j = i; aloop; aloop = aloop->next)
+	    VEC_replace (int, li->to_visit, j++, aloop->num);
+	  
+	  if (--visit_lim >= (int) i)
+	    floop = get_loop (VEC_index (int, li->to_visit, visit_lim));
+	  else
+	    break;
+	}
+      if (i)
+	VEC_block_remove (int, li->to_visit, 0, i);
+    }
   else if (flags & LI_FROM_INNERMOST)
     {
       /* Push the loops to LI->TO_VISIT in postorder.  */
diff --git a/gcc/combine.c b/gcc/combine.c
index d6f7480223a..f57cef5c5ce 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -388,6 +388,7 @@ static int combinable_i3pat (rtx, rtx *, rtx, rtx, int, rtx *);
 static int contains_muldiv (rtx);
 static rtx try_combine (rtx, rtx, rtx, int *);
 static void undo_all (void);
+static void undo_since (struct undo *);
 static void undo_commit (void);
 static rtx *find_split_point (rtx *, rtx);
 static rtx subst (rtx, rtx, rtx, int, int);
@@ -818,14 +819,15 @@ combine_validate_cost (rtx i1, rtx i2, rtx i3, rtx newpat, rtx newi2pat,
 	  old_cost += old_other_cost;
 	  new_cost += new_other_cost;
 	}
-      else
+      else if (i1)
 	old_cost = 0;
     }
 
   /* Disallow this recombination if both new_cost and old_cost are
      greater than zero, and new_cost is greater than old cost.  */
-  if (old_cost > 0
-      && new_cost > old_cost)
+  if ((old_cost > 0 && new_cost > old_cost)
+      /* Also disallow combine-splits that dont reduce insn count or cost.  */
+      || (!i1 && newi2pat && new_cost >= old_cost))
     {
       if (dump_file)
 	{
@@ -2912,7 +2914,12 @@ try_combine (rtx i3, rtx i2, rtx i1, int *new_direct_jump_p)
      machine-specific method (like when you have an addition of a large
      constant) or by combine in the function find_split_point.  */
 
-  if (i1 && insn_code_number < 0 && GET_CODE (newpat) == SET
+  if ((i1
+       /* Also do this if we have two expensive insns.  */
+       || ((INSN_UID (i2) <= max_uid_known ? INSN_COST (i2) : 0)
+	   + (INSN_UID (i3) <= max_uid_known ? INSN_COST (i3) : 0)
+	   >= COSTS_N_INSNS (3)))
+      && insn_code_number < 0 && GET_CODE (newpat) == SET
       && asm_noperands (newpat) < 0)
     {
       rtx parallel, m_split, *split;
@@ -3573,7 +3580,7 @@ try_combine (rtx i3, rtx i2, rtx i1, int *new_direct_jump_p)
        patterns, move from I1 to I2 then I2 to I3 so that we get the
        proper movement on registers that I2 modifies.  */
 
-    if (newi2pat)
+    if (newi2pat && i1)
       {
 	move_deaths (newi2pat, NULL_RTX, DF_INSN_LUID (i1), i2, &midnotes);
 	move_deaths (newpat, newi2pat, DF_INSN_LUID (i1), i3, &midnotes);
@@ -3795,14 +3802,15 @@ try_combine (rtx i3, rtx i2, rtx i1, int *new_direct_jump_p)
     return newi2pat ? i2 : i3;
 }
 
-/* Undo all the modifications recorded in undobuf.  */
+/* Undo all the modifications recorded in undobuf since SINCE was the
+   latest change.  */
 
 static void
-undo_all (void)
+undo_since (struct undo * since)
 {
   struct undo *undo, *next;
 
-  for (undo = undobuf.undos; undo; undo = next)
+  for (undo = undobuf.undos; undo != since; undo = next)
     {
       next = undo->next;
       switch (undo->kind)
@@ -3824,7 +3832,15 @@ undo_all (void)
       undobuf.frees = undo;
     }
 
-  undobuf.undos = 0;
+  undobuf.undos = undo;
+}
+
+/* Undo all the modifications recorded in undobuf.  */
+
+static void
+undo_all (void)
+{
+  undo_since (0);
 }
 
 /* We've committed to accepting the changes we made.  Move all
@@ -4550,6 +4566,38 @@ subst (rtx x, rtx from, rtx to, int in_dest, int unique_copy)
   return x;
 }
 
+/* If we are testing a single bit, and the upper bits are known to
+   be zero, we generally change ZERO_EXTRACT into LSHIFTRT.  However,
+   in the context of a compare that might not be the right thing to
+   do if the target has bit test instructions.
+   An example is unwind-dw2-fed.c:linear_search_fdes for ARC,
+   where we want to combine btst / b{eq,ne} into bbit.
+   OUTER is an expression that tests if *XP is zero.
+   If OUTER can be made cheaper by changing *XP from an implicit
+   bit test to an explicit bit test, substitute int *XP accordingly, and
+   return the substituted value.  Otherwise, retrun NULL_RTX.  */
+static rtx
+combine_simplify_bittest (rtx outer, rtx *xp)
+{
+  rtx x = *xp;
+
+  if (GET_CODE (x) == LSHIFTRT
+      && CONST_INT_P (XEXP (x, 1))
+      && nonzero_bits (x, GET_MODE (x)) == 1)
+    {
+	      struct undo *latest = undobuf.undos;
+      int old_cost = rtx_cost (outer, SET, optimize_this_for_speed_p);
+      rtx y = gen_rtx_ZERO_EXTRACT (GET_MODE (x), XEXP (x, 0),
+				    const1_rtx, XEXP (x, 1));
+
+      SUBST (*xp, y);
+      if (rtx_cost (outer, SET, optimize_this_for_speed_p) < old_cost)
+	return y;
+      undo_since (latest);
+    }
+  return NULL_RTX;
+}
+
 /* Simplify X, a piece of RTL.  We just operate on the expression at the
    outer level; call `subst' to simplify recursively.  Return the new
    expression.
@@ -4884,8 +4932,8 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 	return gen_lowpart (mode, XEXP (x, 0));
       break;
 
-#ifdef HAVE_cc0
     case COMPARE:
+#ifdef HAVE_cc0
       /* Convert (compare FOO (const_int 0)) to FOO unless we aren't
 	 using cc0, in which case we want to leave it as a COMPARE
 	 so we can distinguish it from a register-register-copy.  */
@@ -4899,9 +4947,13 @@ combine_simplify_rtx (rtx x, enum machine_mode op0_mode, int in_dest)
 	    && HONOR_SIGN_DEPENDENT_ROUNDING (GET_MODE (XEXP (x, 0))))
 	  && XEXP (x, 1) == CONST0_RTX (GET_MODE (XEXP (x, 0))))
 	return XEXP (x, 0);
-      break;
 #endif
-
+      if (XEXP (x, 1) == const0_rtx
+	  && GET_MODE (x) != CCmode && GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
+	combine_simplify_bittest (x, &XEXP (x, 0));
+      break;
+      /* As above, check if we have a shift that can be done more cheaply
+	 as a bit test.  */
     case CONST:
       /* (const (const X)) can become (const X).  Do it this way rather than
 	 returning the inner CONST since CONST can be shared with a
@@ -5571,6 +5623,9 @@ simplify_if_then_else (rtx x)
       && (i = exact_log2 (INTVAL (true_rtx) & GET_MODE_MASK (mode))) >= 0)
     return XEXP (cond, 0);
 
+  if (comparison_p && XEXP (XEXP (x, 0), 1) == const0_rtx)
+    combine_simplify_bittest (x, &XEXP (XEXP (x, 0), 0));
+
   return x;
 }
 
@@ -9967,7 +10022,7 @@ gen_lowpart_for_combine (enum machine_mode omode, rtx x)
     }
 
  fail:
-  return gen_rtx_CLOBBER (imode, const0_rtx);
+  return gen_rtx_CLOBBER (omode, const0_rtx);
 }
 
 /* Simplify a comparison between *POP0 and *POP1 where CODE is the
diff --git a/gcc/common.opt b/gcc/common.opt
index 4e680676c5b..121a5856c1e 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -705,6 +705,14 @@ fivopts
 Common Report Var(flag_ivopts) Init(1) Optimization
 Optimize induction variables on trees
 
+fivopts-post-inc
+Common Report Var(flag_ivopts_post_inc) Init(1) Optimization
+When optimizing induction variables on trees, assume post_increment will be used.
+
+fivopts-post-modify
+Common Report Var(flag_ivopts_post_modify) Init(1) Optimization
+When optimizing induction variables on trees, assume post_modify will be used.
+
 fjump-tables
 Common Var(flag_jump_tables) Init(1) Optimization
 Use jump tables for sufficiently large switch statements
@@ -1229,6 +1237,15 @@ ftree-pre
 Common Report Var(flag_tree_pre) Optimization
 Enable SSA-PRE optimization on trees
 
+ftree-pre-partial-partial
+Common Report Var(flag_tree_pre_partial_partial) Optimization
+In SSA-PRE optimization on trees, enable partial-partial redundancy elimination.
+
+ftree-pre-partial-partial-obliviously
+Common Report Var(flag_tree_pre_partial_partial_obliviously) Optimization
+In SSA-PRE optimization on trees, enable partial-partial redundancy
+elimination without regard for the cost of the inserted phi nodes.
+
 ftree-reassoc
 Common Report Var(flag_tree_reassoc) Init(1) Optimization
 Enable reassociation on tree level
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 5e9e89d9808..121584ad499 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -675,8 +675,40 @@ alpha*-dec-*vms*)
 	local_prefix=/gnu
 	;;
 arc-*-elf*)
-	tm_file="dbxelf.h elfos.h svr4.h ${tm_file}"
-	extra_parts="crtinit.o crtfini.o"
+	extra_headers="arc-simd.h"
+	tm_file="dbxelf.h elfos.h ${tm_file}"
+	tmake_file="arc/t-arc-newlib arc/t-arc"
+	tm_defines="${tm_defines} TARGET_CPU_DEFAULT_OPT=\\\"mA6\\\""
+	extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o"
+	;;
+arc600-*-linux-uclibc*)
+	extra_headers="arc-simd.h"
+	tm_file="dbxelf.h elfos.h svr4.h linux.h  ${tm_file}"
+	tmake_file="arc/t-arc600-uClibc arc/t-arc"
+	tm_defines="${tm_defines} TARGET_CPU_DEFAULT_OPT=\\\"mARC600\\\""
+  	extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o"
+	use_fixproto=yes
+        ;;
+arc*-*-linux-uclibc*)
+	extra_headers="arc-simd.h"
+	tm_file="dbxelf.h elfos.h svr4.h linux.h  ${tm_file}"
+	tmake_file="${tmake_file} arc/t-arc700-uClibc arc/t-arc"
+	case x"${with_cpu}" in
+	xarc600-*)
+		tm_defines="${tm_defines} TARGET_CPU_DEFAULT_OPT=\\\"mARC600\\\""
+		;;
+	*)
+		tm_defines="${tm_defines} TARGET_CPU_DEFAULT_OPT=\\\"mARC700\\\""
+		;;
+	esac
+  	extra_parts="crti.o crtn.o crtend.o crtbegin.o crtendS.o crtbeginS.o"
+	use_fixproto=yes
+        ;;
+# ARC mxp
+mxp-*-elf*)
+	tm_file="dbxelf.h elfos.h mxp/regset-config.h ${tm_file}"
+	extra_parts="crti.o crtn.o crtend.o crtbegin.o"
+	extra_objs=mxp-regset.o
 	;;
 arm-*-coff* | armel-*-coff*)
 	tm_file="arm/semi.h arm/aout.h arm/arm.h arm/coff.h dbxcoff.h"
@@ -2596,6 +2628,19 @@ case "${target}" in
 		done
 		;;
 
+	arc*-*-linux-uclibc)
+		supported_defaults="cpu"
+		case $with_cpu in 
+		  "") echo "Unknown Cpu type"
+			exit 1
+			;;
+		arc600) with_cpu="arc600"
+			;;		
+		arc700) with_cpu="arc700"
+			;;		
+		esac 
+		;;
+
 	arm*-*-*)
 		supported_defaults="arch cpu float tune fpu abi mode"
 		for which in cpu tune; do
diff --git a/gcc/config/arc/arc-modes.def b/gcc/config/arc/arc-modes.def
index c2d2ceaf82f..03bb1515211 100644
--- a/gcc/config/arc/arc-modes.def
+++ b/gcc/config/arc/arc-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, Argonaut ARC cpu.
-   Copyright (C) 2002, 2007 Free Software Foundation, Inc.
+   Copyright (C) 2002, 2007, 2008 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -20,5 +20,16 @@ along with GCC; see the file COPYING3.  If not see
 /* Some insns set all condition code flags, some only set the ZNC flags, and
    some only set the ZN flags.  */
 
-CC_MODE (CCZNC);
-CC_MODE (CCZN);
+CC_MODE (CC_ZN);
+CC_MODE (CC_Z);
+CC_MODE (CC_C);
+CC_MODE (CC_FP_GT);
+CC_MODE (CC_FP_GE);
+CC_MODE (CC_FP_ORD);
+CC_MODE (CC_FP_UNEQ);
+CC_MODE (CC_FPX);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
+VECTOR_MODES (INT, 8);        /*       V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16);       /* V16QI V8HI V4SI V2DI */
diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index c9fc51ad2ac..82d9bbf83db 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -1,5 +1,5 @@
-/* Definitions of target machine for GNU compiler, Argonaut ARC cpu.
-   Copyright (C) 2000, 2004, 2007 Free Software Foundation, Inc.
+/* Definitions of target machine for GNU compiler, ARC ARCompact cpu.
+   Copyright (C) 2000, 2007, 2008, 2009 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -18,15 +18,24 @@ along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
 #ifdef RTX_CODE
+#ifdef TREE_CODE
+extern rtx arc_va_arg (tree, tree);
+#endif /* TREE_CODE */
+
 extern enum machine_mode arc_select_cc_mode (enum rtx_code, rtx, rtx);
 
 /* Define the function that build the compare insn for scc and bcc.  */
-extern struct rtx_def *gen_compare_reg (enum rtx_code, rtx, rtx);
-#endif
+extern struct rtx_def *gen_compare_reg (enum rtx_code, enum machine_mode);
 
 /* Declarations for various fns used in the .md file.  */
+extern void arc_output_function_epilogue (FILE *, HOST_WIDE_INT, int);
 extern const char *output_shift (rtx *);
-
+extern int compact_load_memory_operand (rtx op,enum machine_mode  mode);
+extern int compact_sda_memory_operand (rtx op,enum machine_mode  mode);
+extern int arc_valid_machine_decl_attribute (tree type,tree attributes,tree identifier,tree args);
+extern int compact_store_memory_operand (rtx op,enum machine_mode  mode);
+extern int u6_immediate_operand (rtx op,enum machine_mode mode);
+extern int arc_double_register_operand (rtx op,enum machine_mode mode);
 extern int symbolic_operand (rtx, enum machine_mode);
 extern int arc_double_limm_p (rtx);
 extern int arc_eligible_for_epilogue_delay (rtx, int);
@@ -34,9 +43,11 @@ extern void arc_initialize_trampoline (rtx, rtx, rtx);
 extern void arc_print_operand (FILE *, rtx, int);
 extern void arc_print_operand_address (FILE *, rtx);
 extern void arc_final_prescan_insn (rtx, rtx *, int);
+extern void arc_set_default_type_attributes(tree type);
 extern int call_address_operand (rtx, enum machine_mode);
 extern int call_operand (rtx, enum machine_mode);
-extern int symbolic_memory_operand (rtx, enum machine_mode);
+extern int compact_register_operand (rtx op,enum machine_mode  mode);
+extern int cc_register (rtx x, enum machine_mode mode);
 extern int short_immediate_operand (rtx, enum machine_mode);
 extern int long_immediate_operand (rtx, enum machine_mode);
 extern int long_immediate_loadstore_operand (rtx, enum machine_mode);
@@ -50,15 +61,100 @@ extern int const_sint32_operand (rtx, enum machine_mode);
 extern int const_uint32_operand (rtx, enum machine_mode);
 extern int proper_comparison_operator (rtx, enum machine_mode);
 extern int shift_operator (rtx, enum machine_mode);
+extern int arc_dpfp_operator (rtx, enum machine_mode);
+extern int arc_emit_vector_const (FILE *, rtx);
+extern const char *arc_output_libcall (const char *);
+extern int prepare_extend_operands (rtx *operands, enum rtx_code code,
+				    enum machine_mode omode);
+extern const char *arc_output_addsi (rtx *operands, const char *);
+extern int arc_expand_movmem (rtx *operands);
+extern int prepare_move_operands (rtx *operands, enum machine_mode mode);
+extern void arc_split_dilogic (rtx *, enum rtx_code);
+#endif /* RTX_CODE */
 
-extern enum arc_function_type arc_compute_function_type (tree);
+#ifdef TREE_CODE
+extern enum arc_function_type arc_compute_function_type (struct function *);
+#endif /* TREE_CODE */
 
 
 extern void arc_init (void);
 extern unsigned int arc_compute_frame_size (int);
-extern void arc_save_restore (FILE *, const char *, unsigned int,
-			      unsigned int, const char *);
 extern int arc_delay_slots_for_epilogue (void);
-extern void arc_ccfsm_at_label (const char *, int);
+extern rtx arc_finalize_pic (void);
 extern int arc_ccfsm_branch_deleted_p (void);
 extern void arc_ccfsm_record_branch_deleted (void);
+
+extern rtx arc_legitimize_pic_address (rtx, rtx);
+extern int arc_function_arg_partial_nregs (CUMULATIVE_ARGS *, enum machine_mode, tree,int);
+extern rtx arc_function_arg (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
+extern void arc_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, tree, int);
+void arc_asm_output_aligned_decl_local (FILE *, tree, const char *, 
+					unsigned HOST_WIDE_INT, 
+					unsigned HOST_WIDE_INT, 
+					unsigned HOST_WIDE_INT);
+extern rtx arc_return_addr_rtx (int , rtx );
+extern int check_if_valid_regno_const (rtx *, int );
+extern int check_if_valid_sleep_operand (rtx *, int );
+extern int check_if_valid_sleep_operand (rtx *, int );
+extern bool arc_legitimate_constant_p (rtx);
+extern int arc_legitimate_pc_offset_p (rtx);
+extern int arc_legitimate_pic_addr_p (rtx);
+extern void arc_assemble_name (FILE *, const char*);
+extern int symbolic_reference_mentioned_p (rtx);
+extern void emit_pic_move (rtx *, enum machine_mode); 
+extern int arc_raw_symbolic_reference_mentioned_p (rtx);
+extern bool arc_legitimate_pic_operand_p (rtx);
+extern const char * gen_bbit_insns(rtx *) ATTRIBUTE_UNUSED;
+extern const char * gen_bbit_bic_insns(rtx *) ATTRIBUTE_UNUSED;
+extern int valid_bbit_pattern_p (rtx *, rtx) ATTRIBUTE_UNUSED;
+extern int arc_is_longcall_p (rtx);
+extern int arc_profile_call (rtx callee);
+extern int valid_brcc_with_delay_p (rtx *);
+extern int small_data_pattern (rtx , enum machine_mode ATTRIBUTE_UNUSED);
+extern rtx arc_rewrite_small_data (rtx);
+extern int arc_ccfsm_cond_exec_p (void);
+struct secondary_reload_info;
+extern enum reg_class arc_secondary_reload (bool, rtx, enum reg_class,
+					    enum machine_mode,
+					    struct secondary_reload_info *);
+extern int arc_register_move_cost (enum machine_mode, enum reg_class,
+				   enum reg_class);
+extern rtx disi_highpart (rtx);
+extern int arc_adjust_insn_length (rtx, int);
+extern int arc_corereg_hazard (rtx, rtx);
+extern int arc_hazard (rtx, rtx);
+extern int arc_write_ext_corereg (rtx);
+extern void arc_conditional_register_usage (void);
+extern rtx gen_acc1 (void);
+extern rtx gen_acc2 (void);
+extern rtx gen_mlo (void);
+extern rtx gen_mhi (void);
+extern int arc_unalign_branch_p (rtx);
+extern int arc_branch_size_unknown_p (void);
+struct arc_ccfsm;
+extern void arc_ccfsm_record_condition (rtx, int, rtx, struct arc_ccfsm *);
+extern void arc_expand_prologue (void);
+extern void arc_expand_epilogue (int);
+extern void arc_init_expanders (void);
+extern int arc_check_millicode (rtx op, int offset, int load_p);
+extern int arc_get_unalign (void);
+extern void arc_clear_unalign (void);
+extern void arc_toggle_unalign (void);
+extern void split_addsi (rtx *);
+extern void split_subsi (rtx *);
+extern void arc_pad_return (void);
+extern void arc_split_move (rtx *);
+extern char *arc_shorten_align (void);
+extern void arc_ccfsm_advance_to (rtx);
+extern int arc_verify_short (rtx insn, int unalign, int);
+extern const char *arc_short_long (rtx insn, const char *, const char *);
+extern rtx arc_regno_use_in (unsigned int, rtx);
+extern int arc_attr_type (rtx);
+extern int arc_scheduling_not_expected (void);
+extern int arc_sets_cc_p (rtx insn);
+extern int arc_label_align (rtx label);
+extern int arc_need_delay (rtx insn);
+extern int arc_text_label (rtx);
+extern int arc_decl_pretend_args (tree decl);
+extern int arc_dead_or_set_postreload_p (const_rtx insn, const_rtx reg);
+extern rtx arc_legitimize_address (rtx x, rtx oldx, int mode);
diff --git a/gcc/config/arc/arc-simd.h b/gcc/config/arc/arc-simd.h
new file mode 100644
index 00000000000..bb8d15ca24c
--- /dev/null
+++ b/gcc/config/arc/arc-simd.h
@@ -0,0 +1,185 @@
+/* ARC SIMD include file.
+   Copyright (C) 2007 Celunite, Inc.
+   Written by Saurabh Verma (saurabh.verma@celunite.com).
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* As a special exception, if you include this header file into source
+   files compiled by GCC, this header file does not by itself cause
+   the resulting executable to be covered by the GNU General Public
+   License.  This exception does not however invalidate any other
+   reasons why the executable file might be covered by the GNU General
+   Public License.  */
+
+#ifndef _ARC_SIMD_H
+#define _ARC_SIMD_H 1
+
+#ifndef __ARC_SIMD__
+#error Use the "-msimd" flag to enable ARC SIMD support
+#endif
+
+/* I0-I7 registers */
+#define _IREG_I0  0
+#define _IREG_I1  1
+#define _IREG_I2  2
+#define _IREG_I3  3
+#define _IREG_I4  4
+#define _IREG_I5  5
+#define _IREG_I6  6
+#define _IREG_I7  7
+
+/* DMA configuration registers */
+#define _DMA_REG_DR0		0
+#define _DMA_SDM_SRC_ADR_REG	_DMA_REG_DR0
+#define _DMA_SDM_DEST_ADR_REG	_DMA_REG_DR0
+
+#define _DMA_REG_DR1		1
+#define _DMA_SDM_STRIDE_REG	_DMA_REG_DR1
+
+#define _DMA_REG_DR2		2
+#define _DMA_BLK_REG		_DMA_REG_DR2
+
+#define _DMA_REG_DR3		3
+#define _DMA_LOC_REG		_DMA_REG_DR3
+
+#define _DMA_REG_DR4		4
+#define _DMA_SYS_SRC_ADR_REG	_DMA_REG_DR4
+#define _DMA_SYS_DEST_ADR_REG	_DMA_REG_DR4
+
+#define _DMA_REG_DR5		5
+#define _DMA_SYS_STRIDE_REG	_DMA_REG_DR5
+
+#define _DMA_REG_DR6		6
+#define _DMA_CFG_REG		_DMA_REG_DR6
+
+#define _DMA_REG_DR7		7
+#define _DMA_FT_BASE_ADR_REG	_DMA_REG_DR7
+
+/* Predefined types used in vector instructions */
+typedef int   __v4si  __attribute__((vector_size(16)));
+typedef short __v8hi  __attribute__((vector_size(16)));
+
+/* Synonyms */
+#define _vaddaw    __builtin_arc_vaddaw         
+#define _vaddw     __builtin_arc_vaddw          
+#define _vavb      __builtin_arc_vavb           
+#define _vavrb     __builtin_arc_vavrb          
+#define _vdifaw    __builtin_arc_vdifaw         
+#define _vdifw     __builtin_arc_vdifw          
+#define _vmaxaw    __builtin_arc_vmaxaw         
+#define _vmaxw     __builtin_arc_vmaxw          
+#define _vminaw    __builtin_arc_vminaw         
+#define _vminw     __builtin_arc_vminw          
+#define _vmulaw    __builtin_arc_vmulaw         
+#define _vmulfaw   __builtin_arc_vmulfaw        
+#define _vmulfw    __builtin_arc_vmulfw         
+#define _vmulw     __builtin_arc_vmulw          
+#define _vsubaw    __builtin_arc_vsubaw         
+#define _vsubw     __builtin_arc_vsubw          
+#define _vsummw    __builtin_arc_vsummw         
+#define _vand      __builtin_arc_vand           
+#define _vandaw    __builtin_arc_vandaw         
+#define _vbic      __builtin_arc_vbic           
+#define _vbicaw    __builtin_arc_vbicaw         
+#define _vor       __builtin_arc_vor            
+#define _vxor      __builtin_arc_vxor           
+#define _vxoraw    __builtin_arc_vxoraw         
+#define _veqw      __builtin_arc_veqw           
+#define _vlew      __builtin_arc_vlew           
+#define _vltw      __builtin_arc_vltw           
+#define _vnew      __builtin_arc_vnew           
+#define _vmr1aw    __builtin_arc_vmr1aw         
+#define _vmr1w     __builtin_arc_vmr1w          
+#define _vmr2aw    __builtin_arc_vmr2aw         
+#define _vmr2w     __builtin_arc_vmr2w          
+#define _vmr3aw    __builtin_arc_vmr3aw         
+#define _vmr3w     __builtin_arc_vmr3w          
+#define _vmr4aw    __builtin_arc_vmr4aw         
+#define _vmr4w     __builtin_arc_vmr4w          
+#define _vmr5aw    __builtin_arc_vmr5aw         
+#define _vmr5w     __builtin_arc_vmr5w          
+#define _vmr6aw    __builtin_arc_vmr6aw         
+#define _vmr6w     __builtin_arc_vmr6w          
+#define _vmr7aw    __builtin_arc_vmr7aw         
+#define _vmr7w     __builtin_arc_vmr7w          
+#define _vmrb      __builtin_arc_vmrb           
+#define _vh264f    __builtin_arc_vh264f         
+#define _vh264ft   __builtin_arc_vh264ft        
+#define _vh264fw   __builtin_arc_vh264fw        
+#define _vvc1f     __builtin_arc_vvc1f          
+#define _vvc1ft    __builtin_arc_vvc1ft         
+#define _vbaddw    __builtin_arc_vbaddw         
+#define _vbmaxw    __builtin_arc_vbmaxw         
+#define _vbminw    __builtin_arc_vbminw         
+#define _vbmulaw   __builtin_arc_vbmulaw        
+#define _vbmulfw   __builtin_arc_vbmulfw        
+#define _vbmulw    __builtin_arc_vbmulw         
+#define _vbrsubw   __builtin_arc_vbrsubw        
+#define _vbsubw    __builtin_arc_vbsubw         
+#define _vasrw     __builtin_arc_vasrw          
+#define _vsr8      __builtin_arc_vsr8           
+#define _vsr8aw    __builtin_arc_vsr8aw         
+#define _vasrrwi   __builtin_arc_vasrrwi        
+#define _vasrsrwi  __builtin_arc_vasrsrwi       
+#define _vasrwi    __builtin_arc_vasrwi         
+#define _vasrpwbi  __builtin_arc_vasrpwbi       
+#define _vasrrpwbi __builtin_arc_vasrrpwbi      
+#define _vsr8awi   __builtin_arc_vsr8awi        
+#define _vsr8i     __builtin_arc_vsr8i          
+#define _vmvaw     __builtin_arc_vmvaw          
+#define _vmvw      __builtin_arc_vmvw           
+#define _vmvzw     __builtin_arc_vmvzw          
+#define _vd6tapf   __builtin_arc_vd6tapf        
+#define _vmovaw    __builtin_arc_vmovaw         
+#define _vmovw     __builtin_arc_vmovw          
+#define _vmovzw    __builtin_arc_vmovzw         
+#define _vabsaw    __builtin_arc_vabsaw         
+#define _vabsw     __builtin_arc_vabsw          
+#define _vaddsuw   __builtin_arc_vaddsuw        
+#define _vsignw    __builtin_arc_vsignw         
+#define _vexch1    __builtin_arc_vexch1         
+#define _vexch2    __builtin_arc_vexch2         
+#define _vexch4    __builtin_arc_vexch4         
+#define _vupbaw    __builtin_arc_vupbaw         
+#define _vupbw     __builtin_arc_vupbw          
+#define _vupsbaw   __builtin_arc_vupsbaw        
+#define _vupsbw    __builtin_arc_vupsbw         
+#define _vdirun    __builtin_arc_vdirun         
+#define _vdorun    __builtin_arc_vdorun         
+#define _vdiwr     __builtin_arc_vdiwr          
+#define _vdowr     __builtin_arc_vdowr          
+#define _vrec      __builtin_arc_vrec           
+#define _vrun      __builtin_arc_vrun           
+#define _vrecrun   __builtin_arc_vrecrun        
+#define _vendrec   __builtin_arc_vendrec        
+#define _vld32wh   __builtin_arc_vld32wh        
+#define _vld32wl   __builtin_arc_vld32wl        
+#define _vld64     __builtin_arc_vld64          
+#define _vld32     __builtin_arc_vld32          
+#define _vld64w    __builtin_arc_vld64w         
+#define _vld128    __builtin_arc_vld128         
+#define _vst128    __builtin_arc_vst128         
+#define _vst64     __builtin_arc_vst64          
+#define _vst16_n   __builtin_arc_vst16_n        
+#define _vst32_n   __builtin_arc_vst32_n        
+#define _vinti     __builtin_arc_vinti          
+
+/* Additional synonyms to ease programming */
+#define _setup_dma_in_channel_reg  _vdiwr
+#define _setup_dma_out_channel_reg _vdowr
+
+#endif /* _ARC_SIMD_H */
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index 891b8efefba..e6137d0d212 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -1,6 +1,13 @@
-/* Subroutines used for code generation on the Argonaut ARC cpu.
-   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2003,
-   2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+/* Subroutines used for code generation on the ARC ARCompact cpu.
+   Copyright (C) 1994, 1995, 1997, 2004, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+
+   Sources derived from work done by Sankhya Technologies (www.sankhya.com)
+
+   Position Independent Code support added,Code cleaned up, 
+   Comments and Support For ARC700 instructions added by
+   Saurabh Verma (saurabh.verma@codito.com)
+   Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
 
 This file is part of GCC.
 
@@ -18,9 +25,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-/* ??? This is an old port, and is undoubtedly suffering from bit rot.  */
-
 #include "config.h"
+#include <stdio.h>
 #include "system.h"
 #include "coretypes.h"
 #include "tm.h"
@@ -31,23 +37,31 @@ along with GCC; see the file COPYING3.  If not see
 #include "real.h"
 #include "insn-config.h"
 #include "conditions.h"
-#include "output.h"
-#include "insn-attr.h"
-#include "flags.h"
+#include "insn-flags.h"
 #include "function.h"
-#include "expr.h"
-#include "recog.h"
 #include "toplev.h"
 #include "tm_p.h"
 #include "target.h"
 #include "target-def.h"
-
-/* Which cpu we're compiling for.  */
-int arc_cpu_type;
-
-/* Name of mangle string to add to symbols to separate code compiled for each
-   cpu (or NULL).  */
-const char *arc_mangle_cpu;
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "expr.h"
+#include "recog.h"
+#include "debug.h"
+#include "diagnostic.h"
+#include "insn-codes.h"
+#include "integrate.h"
+#include "c-tree.h"
+#include "langhooks.h"
+#include "optabs.h"
+#include "tm-constrs.h"
+#include "reload.h" /* For operands_match_p */
+#include "df.h"
+
+/* Which cpu we're compiling for (NULL(=A4), A4, A5, ARC600, ARC700) */
+const char *arc_cpu_string;
+enum processor_type arc_cpu;
 
 /* Save the operands last given to a compare for use when we
    generate a scc or bcc insn.  */
@@ -61,77 +75,388 @@ const char *arc_rodata_section;
 /* Array of valid operand punctuation characters.  */
 char arc_punct_chars[256];
 
-/* Variables used by arc_final_prescan_insn to implement conditional
-   execution.  */
-static int arc_ccfsm_state;
-static int arc_ccfsm_current_cc;
-static rtx arc_ccfsm_target_insn;
-static int arc_ccfsm_target_label;
+/* State used by arc_ccfsm_advance to implement conditional execution.  */
+struct arc_ccfsm GTY (())
+{
+  int state;
+  int cc;
+  rtx target_insn;
+  int target_label;
+};
+
+#define arc_ccfsm_current cfun->machine->ccfsm_current
+
+#define ARC_CCFSM_BRANCH_DELETED_P(STATE) \
+  ((STATE)->state == 1 || (STATE)->state == 2)
+
+/* Indicate we're conditionalizing insns now.  */
+#define ARC_CCFSM_RECORD_BRANCH_DELETED(STATE) \
+  ((STATE)->state += 2)
+
+#define ARC_CCFSM_COND_EXEC_P(STATE) \
+  ((STATE)->state == 3 || (STATE)->state == 4 || (STATE)->state == 5)
+
+/* Check if INSN has a 16 bit opcode considering struct arc_ccfsm *STATE.  */
+#define CCFSM_ISCOMPACT(INSN,STATE) \
+  (ARC_CCFSM_COND_EXEC_P (STATE) \
+   ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
+      || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
+   : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
+
+/* Likewise, but also consider that INSN might be in a delay slot of JUMP.  */
+#define CCFSM_DBR_ISCOMPACT(INSN,JUMP,STATE) \
+  ((ARC_CCFSM_COND_EXEC_P (STATE) \
+    || (INSN_ANNULLED_BRANCH_P (JUMP) \
+	&& (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (INSN)))) \
+   ? (get_attr_iscompact (INSN) == ISCOMPACT_TRUE \
+      || get_attr_iscompact (INSN) == ISCOMPACT_TRUE_LIMM) \
+   : get_attr_iscompact (INSN) != ISCOMPACT_FALSE)
+
+/* local obstack */
+static struct obstack arc_local_obstack;
+
+/* The following definition was shifted to arc.h, since #defines from arc.h
+   can be freely used in predicates.md */
+/* #define PROGRAM_COUNTER_REGNO 63 */
 
 /* The maximum number of insns skipped which will be conditionalised if
    possible.  */
+/* When optimizing for speed:
+    Let p be the probability that the potentially skipped insns need to
+    be executed, pn the cost of a correctly predicted non-taken branch,
+    mt the cost of a mis/non-predicted taken branch,
+    mn mispredicted non-taken, pt correctly predicted taken ;
+    costs expressed in numbers of instructions like the ones considered
+    skipping.
+    Unfortunately we don't have a measure of predictability - this
+    is linked to probability only in that in the no-eviction-scenario
+    there is a lower bound 1 - 2 * min (p, 1-p), and a somewhat larger
+    value that can be assumed *if* the distribution is perfectly random.
+    A predictability of 1 is perfectly plausible not matter what p is,
+    because the decision could be dependent on an invocation parameter
+    of the program.
+    For large p, we want MAX_INSNS_SKIPPED == pn/(1-p) + mt - pn
+    For small p, we want MAX_INSNS_SKIPPED == pt
+
+   When optimizing for size:
+    We want to skip insn unless we could use 16 opcodes for the
+    non-conditionalized insn to balance the branch length or more.
+    Performance can be tie-breaker.  */
+/* If the potentially-skipped insns are likely to be executed, we'll
+   generally save one non-taken branch
+   o
+   this to be no less than the 1/p  */
 #define MAX_INSNS_SKIPPED 3
 
+/* The values of unspec's first field */
+enum { 
+  ARC_UNSPEC_PLT = 3, 
+  ARC_UNSPEC_GOT, 
+  ARC_UNSPEC_GOTOFF
+} ;
+
+
+enum arc_builtins {
+  ARC_BUILTIN_NOP        =    2,
+  ARC_BUILTIN_NORM       =    3,
+  ARC_BUILTIN_NORMW      =    4,
+  ARC_BUILTIN_SWAP       =    5,
+  ARC_BUILTIN_BRK        =    6,
+  ARC_BUILTIN_DIVAW      =    7,
+  ARC_BUILTIN_EX         =    8,
+  ARC_BUILTIN_MUL64      =    9,
+  ARC_BUILTIN_MULU64     =   10,
+  ARC_BUILTIN_RTIE       =   11,
+  ARC_BUILTIN_SYNC       =   12,
+  ARC_BUILTIN_CORE_READ  =   13,
+  ARC_BUILTIN_CORE_WRITE =   14,
+  ARC_BUILTIN_FLAG       =   15,
+  ARC_BUILTIN_LR         =   16,
+  ARC_BUILTIN_SR         =   17,
+  ARC_BUILTIN_SLEEP      =   18,
+  ARC_BUILTIN_SWI        =   19,
+  ARC_BUILTIN_TRAP_S     =   20,
+  ARC_BUILTIN_UNIMP_S    =   21,
+
+  /* Sentinel to mark start of simd builtins */
+  ARC_SIMD_BUILTIN_BEGIN      = 1000,
+
+  ARC_SIMD_BUILTIN_VADDAW     = 1001,
+  ARC_SIMD_BUILTIN_VADDW      = 1002,
+  ARC_SIMD_BUILTIN_VAVB       = 1003,
+  ARC_SIMD_BUILTIN_VAVRB      = 1004,
+  ARC_SIMD_BUILTIN_VDIFAW     = 1005,
+  ARC_SIMD_BUILTIN_VDIFW      = 1006,
+  ARC_SIMD_BUILTIN_VMAXAW     = 1007,
+  ARC_SIMD_BUILTIN_VMAXW      = 1008,
+  ARC_SIMD_BUILTIN_VMINAW     = 1009,
+  ARC_SIMD_BUILTIN_VMINW      = 1010,
+  ARC_SIMD_BUILTIN_VMULAW     = 1011,
+  ARC_SIMD_BUILTIN_VMULFAW    = 1012,
+  ARC_SIMD_BUILTIN_VMULFW     = 1013,
+  ARC_SIMD_BUILTIN_VMULW      = 1014,
+  ARC_SIMD_BUILTIN_VSUBAW     = 1015,
+  ARC_SIMD_BUILTIN_VSUBW      = 1016,
+  ARC_SIMD_BUILTIN_VSUMMW     = 1017,
+  ARC_SIMD_BUILTIN_VAND       = 1018,
+  ARC_SIMD_BUILTIN_VANDAW     = 1019,
+  ARC_SIMD_BUILTIN_VBIC       = 1020,
+  ARC_SIMD_BUILTIN_VBICAW     = 1021,
+  ARC_SIMD_BUILTIN_VOR        = 1022,
+  ARC_SIMD_BUILTIN_VXOR       = 1023,
+  ARC_SIMD_BUILTIN_VXORAW     = 1024,
+  ARC_SIMD_BUILTIN_VEQW       = 1025,
+  ARC_SIMD_BUILTIN_VLEW       = 1026,
+  ARC_SIMD_BUILTIN_VLTW       = 1027,
+  ARC_SIMD_BUILTIN_VNEW       = 1028,
+  ARC_SIMD_BUILTIN_VMR1AW     = 1029,
+  ARC_SIMD_BUILTIN_VMR1W      = 1030,
+  ARC_SIMD_BUILTIN_VMR2AW     = 1031,
+  ARC_SIMD_BUILTIN_VMR2W      = 1032,
+  ARC_SIMD_BUILTIN_VMR3AW     = 1033,
+  ARC_SIMD_BUILTIN_VMR3W      = 1034,
+  ARC_SIMD_BUILTIN_VMR4AW     = 1035,
+  ARC_SIMD_BUILTIN_VMR4W      = 1036,
+  ARC_SIMD_BUILTIN_VMR5AW     = 1037,
+  ARC_SIMD_BUILTIN_VMR5W      = 1038,
+  ARC_SIMD_BUILTIN_VMR6AW     = 1039,
+  ARC_SIMD_BUILTIN_VMR6W      = 1040,
+  ARC_SIMD_BUILTIN_VMR7AW     = 1041,
+  ARC_SIMD_BUILTIN_VMR7W      = 1042,
+  ARC_SIMD_BUILTIN_VMRB       = 1043,
+  ARC_SIMD_BUILTIN_VH264F     = 1044,
+  ARC_SIMD_BUILTIN_VH264FT    = 1045,
+  ARC_SIMD_BUILTIN_VH264FW    = 1046,
+  ARC_SIMD_BUILTIN_VVC1F      = 1047,
+  ARC_SIMD_BUILTIN_VVC1FT     = 1048,
+
+  /* Va, Vb, rlimm instructions */
+  ARC_SIMD_BUILTIN_VBADDW     = 1050,
+  ARC_SIMD_BUILTIN_VBMAXW     = 1051,
+  ARC_SIMD_BUILTIN_VBMINW     = 1052,
+  ARC_SIMD_BUILTIN_VBMULAW    = 1053,
+  ARC_SIMD_BUILTIN_VBMULFW    = 1054,
+  ARC_SIMD_BUILTIN_VBMULW     = 1055,
+  ARC_SIMD_BUILTIN_VBRSUBW    = 1056,
+  ARC_SIMD_BUILTIN_VBSUBW     = 1057,
+
+  /* Va, Vb, Ic instructions */
+  ARC_SIMD_BUILTIN_VASRW      = 1060,
+  ARC_SIMD_BUILTIN_VSR8       = 1061,
+  ARC_SIMD_BUILTIN_VSR8AW     = 1062,
+
+  /* Va, Vb, u6 instructions */
+  ARC_SIMD_BUILTIN_VASRRWi    = 1065,
+  ARC_SIMD_BUILTIN_VASRSRWi   = 1066,
+  ARC_SIMD_BUILTIN_VASRWi     = 1067,
+  ARC_SIMD_BUILTIN_VASRPWBi   = 1068,
+  ARC_SIMD_BUILTIN_VASRRPWBi  = 1069,
+  ARC_SIMD_BUILTIN_VSR8AWi    = 1070,
+  ARC_SIMD_BUILTIN_VSR8i      = 1071,
+
+  /* Va, Vb, u8 (simm) instructions*/
+  ARC_SIMD_BUILTIN_VMVAW      = 1075,
+  ARC_SIMD_BUILTIN_VMVW       = 1076,
+  ARC_SIMD_BUILTIN_VMVZW      = 1077,
+  ARC_SIMD_BUILTIN_VD6TAPF    = 1078,
+
+  /* Va, rlimm, u8 (simm) instructions*/
+  ARC_SIMD_BUILTIN_VMOVAW     = 1080,
+  ARC_SIMD_BUILTIN_VMOVW      = 1081,
+  ARC_SIMD_BUILTIN_VMOVZW     = 1082,
+
+  /* Va, Vb instructions */
+  ARC_SIMD_BUILTIN_VABSAW     = 1085,
+  ARC_SIMD_BUILTIN_VABSW      = 1086,
+  ARC_SIMD_BUILTIN_VADDSUW    = 1087,
+  ARC_SIMD_BUILTIN_VSIGNW     = 1088,
+  ARC_SIMD_BUILTIN_VEXCH1     = 1089,
+  ARC_SIMD_BUILTIN_VEXCH2     = 1090,
+  ARC_SIMD_BUILTIN_VEXCH4     = 1091,
+  ARC_SIMD_BUILTIN_VUPBAW     = 1092,
+  ARC_SIMD_BUILTIN_VUPBW      = 1093,
+  ARC_SIMD_BUILTIN_VUPSBAW    = 1094,
+  ARC_SIMD_BUILTIN_VUPSBW     = 1095,
+
+  ARC_SIMD_BUILTIN_VDIRUN     = 1100,
+  ARC_SIMD_BUILTIN_VDORUN     = 1101,
+  ARC_SIMD_BUILTIN_VDIWR      = 1102,
+  ARC_SIMD_BUILTIN_VDOWR      = 1103,
+
+  ARC_SIMD_BUILTIN_VREC       = 1105,
+  ARC_SIMD_BUILTIN_VRUN       = 1106,
+  ARC_SIMD_BUILTIN_VRECRUN    = 1107,
+  ARC_SIMD_BUILTIN_VENDREC    = 1108,
+
+  ARC_SIMD_BUILTIN_VLD32WH    = 1110,
+  ARC_SIMD_BUILTIN_VLD32WL    = 1111,
+  ARC_SIMD_BUILTIN_VLD64      = 1112,
+  ARC_SIMD_BUILTIN_VLD32      = 1113,
+  ARC_SIMD_BUILTIN_VLD64W     = 1114,
+  ARC_SIMD_BUILTIN_VLD128     = 1115,
+  ARC_SIMD_BUILTIN_VST128     = 1116,
+  ARC_SIMD_BUILTIN_VST64      = 1117,
+
+  ARC_SIMD_BUILTIN_VST16_N    = 1120,
+  ARC_SIMD_BUILTIN_VST32_N    = 1121,
+
+  ARC_SIMD_BUILTIN_VINTI      = 1201,
+
+  ARC_SIMD_BUILTIN_END
+};
+
 /* A nop is needed between a 4 byte insn that sets the condition codes and
    a branch that uses them (the same isn't true for an 8 byte insn that sets
-   the condition codes).  Set by arc_final_prescan_insn.  Used by
+   the condition codes).  Set by arc_ccfsm_advance.  Used by
    arc_print_operand.  */
-static int last_insn_set_cc_p;
-static int current_insn_set_cc_p;
-static bool arc_handle_option (size_t, const char *, int);
-static void record_cc_ref (rtx);
-static void arc_init_reg_tables (void);
+
 static int get_arc_condition_code (rtx);
-const struct attribute_spec arc_attribute_table[];
-static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);
+/* Initialized arc_attribute_table to NULL since arc doesnot have any
+   machine specific supported attributes. */
+const struct attribute_spec arc_attribute_table[] =
+{
+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
+  /* Function calls made to this symbol must be done indirectly, because
+     it may lie outside of the 21/25 bit addressing range of a normal function
+     call.  */
+  { "long_call",    0, 0, false, true,  true,  NULL },
+  /* Whereas these functions are always known to reside within the 21/25 bit
+     addressing range.  */
+  { "short_call",   0, 0, false, true,  true,  NULL },
+  { NULL, 0, 0, false, false, false, NULL }
+};
 static bool arc_assemble_integer (rtx, unsigned int, int);
-static void arc_output_function_prologue (FILE *, HOST_WIDE_INT);
-static void arc_output_function_epilogue (FILE *, HOST_WIDE_INT);
+static int arc_comp_type_attributes (const_tree, const_tree);
 static void arc_file_start (void);
+static void arc_asm_file_start (FILE *)  ATTRIBUTE_UNUSED;
+static void arc_asm_file_end (void);
 static void arc_internal_label (FILE *, const char *, unsigned long);
-static void arc_va_start (tree, rtx);
 static void arc_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
 					tree, int *, int);
+static void arc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
+				 tree);
 static bool arc_rtx_costs (rtx, int, int, int *, bool);
 static int arc_address_cost (rtx, bool);
-static void arc_external_libcall (rtx);
+static void arc_encode_section_info (tree decl, rtx rtl, int first);
+static const char *arc_strip_name_encoding (const char *name);
+static bool arc_cannot_force_const_mem (rtx);
+
+static void arc_init_builtins (void);
+static rtx arc_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
+
+static int branch_dest (rtx);
+static void arc_encode_symbol (tree, const char);
+
+static void  arc_output_pic_addr_const (FILE *,  rtx, int);
+int symbolic_reference_mentioned_p (rtx);
+void arc_assemble_name (FILE *, const char*);
+int arc_raw_symbolic_reference_mentioned_p (rtx);
+int arc_legitimate_pic_addr_p (rtx) ATTRIBUTE_UNUSED;
+void emit_pic_move (rtx *, enum machine_mode) ATTRIBUTE_UNUSED;
+bool arc_legitimate_pic_operand_p (rtx);
+bool arc_legitimate_constant_p (rtx);
+static bool arc_function_ok_for_sibcall (tree, tree);
+static rtx arc_function_value (const_tree, const_tree, bool);
+const char * output_shift (rtx *);
+static void arc_reorg (void);
+static bool arc_in_small_data_p (const_tree);
+
+static void arc_init_reg_tables (void);
 static bool arc_return_in_memory (const_tree, const_tree);
 static bool arc_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
 				   const_tree, bool);
-
-/* Initialize the GCC target structure.  */
+static int arc_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
+				  tree, bool);
+
+static void arc_init_simd_builtins (void);
+static bool arc_vector_mode_supported_p (enum machine_mode);
+
+static const char *arc_invalid_within_doloop (const_rtx);
+
+static void output_short_suffix (FILE *file);
+
+/* Implements target hook vector_mode_supported_p.  */
+static bool
+arc_vector_mode_supported_p (enum machine_mode mode)
+{
+  if (!TARGET_SIMD_SET)
+    return false;
+
+  if ((mode == V4SImode)
+      || (mode == V8HImode))
+    return true;
+
+  return false;
+}
+
+/* to be defined for interrupt attribute addition */
+/*static tree arc_handle_interrupt_attribute (tree *, tree, tree, int, bool *);*/
+
+
+static bool arc_preserve_reload_p (rtx in);
+static rtx arc_delegitimize_address (rtx);
+static bool arc_can_follow_jump (const_rtx follower, const_rtx followee);
+
+static rtx frame_insn (rtx);
+
+/* initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
 #undef TARGET_ASM_ALIGNED_SI_OP
 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 #undef TARGET_ASM_INTEGER
 #define TARGET_ASM_INTEGER arc_assemble_integer
-
-#undef TARGET_ASM_FUNCTION_PROLOGUE
-#define TARGET_ASM_FUNCTION_PROLOGUE arc_output_function_prologue
-#undef TARGET_ASM_FUNCTION_EPILOGUE
-#define TARGET_ASM_FUNCTION_EPILOGUE arc_output_function_epilogue
+#undef  TARGET_COMP_TYPE_ATTRIBUTES
+#define TARGET_COMP_TYPE_ATTRIBUTES arc_comp_type_attributes
 #undef TARGET_ASM_FILE_START
 #define TARGET_ASM_FILE_START arc_file_start
+#undef TARGET_ASM_FILE_END
+#define TARGET_ASM_FILE_END arc_asm_file_end
 #undef TARGET_ATTRIBUTE_TABLE
 #define TARGET_ATTRIBUTE_TABLE arc_attribute_table
 #undef TARGET_ASM_INTERNAL_LABEL
 #define TARGET_ASM_INTERNAL_LABEL arc_internal_label
-#undef TARGET_ASM_EXTERNAL_LIBCALL
-#define TARGET_ASM_EXTERNAL_LIBCALL arc_external_libcall
-
-#undef TARGET_HANDLE_OPTION
-#define TARGET_HANDLE_OPTION arc_handle_option
-
 #undef TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS arc_rtx_costs
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST arc_address_cost
 
+#undef TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO arc_encode_section_info
+
+#undef TARGET_STRIP_NAME_ENCODING
+#define TARGET_STRIP_NAME_ENCODING arc_strip_name_encoding
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM arc_cannot_force_const_mem
+
+#undef  TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS  arc_init_builtins
+
+#undef  TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN arc_expand_builtin
+
+#undef  TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK arc_output_mi_thunk
+
+#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL arc_function_ok_for_sibcall
+
+#undef  TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG arc_reorg
+
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P arc_in_small_data_p
+
 #undef TARGET_PROMOTE_FUNCTION_ARGS
 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_const_tree_true
+
 #undef TARGET_PROMOTE_FUNCTION_RETURN
 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
+
 #undef TARGET_PROMOTE_PROTOTYPES
 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
 
@@ -139,39 +464,202 @@ static bool arc_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
 #define TARGET_RETURN_IN_MEMORY arc_return_in_memory
 #undef TARGET_PASS_BY_REFERENCE
 #define TARGET_PASS_BY_REFERENCE arc_pass_by_reference
-#undef TARGET_CALLEE_COPIES
-#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
 
 #undef TARGET_SETUP_INCOMING_VARARGS
 #define TARGET_SETUP_INCOMING_VARARGS arc_setup_incoming_varargs
 
-#undef TARGET_EXPAND_BUILTIN_VA_START
-#define TARGET_EXPAND_BUILTIN_VA_START arc_va_start
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES arc_arg_partial_bytes
 
-struct gcc_target targetm = TARGET_INITIALIZER;
-
-/* Implement TARGET_HANDLE_OPTION.  */
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
 
-static bool
-arc_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE arc_function_value
+
+#if UCLIBC_DEFAULT
+#define DEFAULT_NO_SDATA MASK_NO_SDATA_SET
+#else
+#define DEFAULT_NO_SDATA 0
+#endif
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS  (MASK_VOLATILE_CACHE_SET|DEFAULT_NO_SDATA)
+
+#undef  TARGET_SCHED_ADJUST_PRIORITY 
+#define TARGET_SCHED_ADJUST_PRIORITY arc_sched_adjust_priority
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P arc_vector_mode_supported_p
+
+#undef TARGET_INVALID_WITHIN_DOLOOP
+#define TARGET_INVALID_WITHIN_DOLOOP arc_invalid_within_doloop
+
+#undef TARGET_PRESERVE_RELOAD_P
+#define TARGET_PRESERVE_RELOAD_P arc_preserve_reload_p
+
+#undef TARGET_CAN_FOLLOW_JUMP
+#define TARGET_CAN_FOLLOW_JUMP arc_can_follow_jump
+
+#undef TARGET_DELEGITIMIZE_ADDRESS
+#define TARGET_DELEGITIMIZE_ADDRESS arc_delegitimize_address
+
+/* Usually, we will be able to scale anchor offsets.
+   When this fails, we want LEGITIMIZE_ADDRESS to kick in.  */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET (-1024)
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET (1020)
+
+/* Try to keep the (mov:DF _, reg) as early as possible so 
+   that the d<add/sub/mul>h-lr insns appear together and can
+   use the peephole2 pattern
+*/
+static int
+arc_sched_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
 {
-  switch (code)
+  rtx set = single_set (insn);
+  if (set 
+      && GET_MODE (SET_SRC(set)) == DFmode
+      && GET_CODE (SET_SRC(set)) == REG)
     {
-    case OPT_mcpu_:
-      return strcmp (arg, "base") == 0 || ARC_EXTENSION_CPU (arg);
-
-    default:
-      return true;
+      /* Incrementing priority by 20 (empirically derived). */
+      return priority + 20;
     }
+
+  return priority;
 }
 
-/* Called by OVERRIDE_OPTIONS to initialize various things.  */
+struct gcc_target targetm = TARGET_INITIALIZER;
 
-void
-arc_init (void)
+/* Called by OVERRIDE_OPTIONS to initialize various things.  */
+void arc_init (void)
 {
   char *tmp;
+  int target_found = 0;
+  enum attr_tune tune_dflt = TUNE_NONE;
+
+  if (TARGET_A4)
+    {
+      arc_cpu_string = "A4";
+      arc_cpu = PROCESSOR_A4;
+      target_found = 1;
+    }
+  else if (TARGET_A5)
+    {
+      arc_cpu_string = "A5";
+      arc_cpu = PROCESSOR_A5;
+      target_found = 1;
+    }
+  else if (TARGET_ARC600)
+    {
+      arc_cpu_string = "ARC600";
+      arc_cpu = PROCESSOR_ARC600;
+      tune_dflt = TUNE_ARC600;
+      target_found = 1;
+    }
+  else if (TARGET_ARC700)
+    {
+      arc_cpu_string = "ARC700";
+      arc_cpu = PROCESSOR_ARC700;
+      tune_dflt = TUNE_ARC700_4_2_STD;
+      target_found = 1;
+    }
+  if (arc_tune == TUNE_NONE)
+    arc_tune = tune_dflt;
+  /* Note: arc_multcost is only used in rtx_cost if speed is true.  */
+  if (arc_multcost < 0)
+    switch (arc_tune)
+      {
+      case TUNE_ARC700_4_2_STD:
+	/* latency 7;
+	   max throughput (1 multiply + 4 other insns) / 5 cycles.  */
+	arc_multcost = COSTS_N_INSNS (4);
+	break;
+      case TUNE_ARC700_4_2_XMAC:
+	/* latency 5;
+	   max throughput (1 multiply + 2 other insns) / 3 cycles.  */
+	arc_multcost = COSTS_N_INSNS (3);
+	break;
+      case TUNE_ARC600:
+	if (TARGET_MUL64_SET)
+	  {
+	    arc_multcost = COSTS_N_INSNS (4);
+	    break;
+	  }
+	/* Fall through.  */
+      default:
+	arc_multcost = COSTS_N_INSNS (30);
+	break;
+      }
+  if (TARGET_MIXED_CODE_SET)
+    {
+      /* -mmixed-code can not be used with the option -mA4. */
+      if (TARGET_A4)
+        {
+          error ("-mmixed-code can't be used with the option -mA4");
+        }
+
+      /* If -mmixed-code option is given but target option is *not* given,
+         then ARC700 will be automatically selected */
+      if (!target_found) 
+        {
+          target_flags |= MASK_ARC700;
+          arc_cpu_string = "ARC700";
+          arc_cpu = PROCESSOR_ARC700;
+          target_found = 1;
+        }
+    }
   
+  /* If none of the target option (-mA4,-mA5,-mARC600,-mARC700) is given,
+     select -mA5 as default. */
+  if (!target_found)
+    {
+#if !UCLIBC_DEFAULT
+      target_flags |= MASK_A5;
+      arc_cpu_string = "A5";
+      arc_cpu = PROCESSOR_A5;
+#else
+      target_flags |= MASK_ARC700;
+      arc_cpu_string = "ARC700";
+      arc_cpu = PROCESSOR_ARC700;
+
+#endif
+    }
+
+  /* Support mul64 generation only for A4, A5 and ARC600 */
+  if (TARGET_MUL64_SET && TARGET_ARC700)
+      error ("-mmul64 not supported for ARC700");
+
+  /* MPY instructions valid only for ARC700 */
+  if (TARGET_NOMPY_SET && !TARGET_ARC700)
+      error ("-mno-mpy supported only for ARC700");
+
+  /* mul/mac instructions only for ARC600 */
+  if (TARGET_MULMAC_32BY16_SET && !TARGET_ARC600)
+      error ("-mmul32x16 supported only for ARC600");
+
+  /* Sanity checks for usage of the FPX switches */
+  /* FPX-1. No fast and compact together */
+  if ((TARGET_DPFP_FAST_SET && TARGET_DPFP_COMPACT_SET)
+      || (TARGET_SPFP_FAST_SET && TARGET_SPFP_COMPACT_SET))
+    error ("FPX fast and compact options cannot be specified together");
+
+  /* FPX-2. No fast-spfp for arc600 */
+  if (TARGET_SPFP_FAST_SET && TARGET_ARC600)
+    error ("-mspfp_fast not available on ARC600");
+
+  /* FPX-3. No FPX extensions on pre-ARC600 cores */
+  if ((TARGET_DPFP || TARGET_SPFP) 
+      && !(TARGET_ARC600 || TARGET_ARC700))
+    error ("FPX extensions not available on pre-ARC600 cores");
+
+  /* Warn for unimplemented PIC in pre-ARC700 cores, and disable flag_pic */
+  if (flag_pic && !TARGET_ARC700)
+    {
+      warning (DK_WARNING, "PIC is not supported for %s. Generating non-PIC code only..", arc_cpu_string);
+      flag_pic = 0;
+    }
+
   /* Set the pseudo-ops for the various standard sections.  */
   arc_text_section = tmp = XNEWVEC (char, strlen (arc_text_string) + sizeof (ARC_SECTION_FORMAT) + 1);
   sprintf (tmp, ARC_SECTION_FORMAT, arc_text_string);
@@ -188,16 +676,28 @@ arc_init (void)
   arc_punct_chars['*'] = 1;
   arc_punct_chars['?'] = 1;
   arc_punct_chars['!'] = 1;
-  arc_punct_chars['~'] = 1;
+  arc_punct_chars['^'] = 1;
+  arc_punct_chars['&'] = 1;
+  gcc_obstack_init (&arc_local_obstack);
 }
-
+
 /* The condition codes of the ARC, and the inverse function.  */
-static const char *const arc_condition_codes[] =
+/* For short branches, the "c" / "nc" names are not defined in the ARC
+   Programmers manual, so we have to use "lo" / "hs"" instead.  */
+static const char *arc_condition_codes[] =
 {
-  "al", 0, "eq", "ne", "p", "n", "c", "nc", "v", "nv",
+  "al", 0, "eq", "ne", "p", "n", "lo", "hs", "v", "nv",
   "gt", "le", "ge", "lt", "hi", "ls", "pnz", 0
 };
 
+enum arc_cc_code_index
+{
+  ARC_CC_AL, ARC_CC_EQ = ARC_CC_AL+2, ARC_CC_NE, ARC_CC_P, ARC_CC_N,
+  ARC_CC_C,  ARC_CC_NC, ARC_CC_V, ARC_CC_NV,
+  ARC_CC_GT, ARC_CC_LE, ARC_CC_GE, ARC_CC_LT, ARC_CC_HI, ARC_CC_LS, ARC_CC_PNZ,
+  ARC_CC_LO = ARC_CC_C, ARC_CC_HS = ARC_CC_NC
+};
+
 #define ARC_INVERSE_CONDITION_CODE(X)  ((X) ^ 1)
 
 /* Returns the index of the ARC condition code string in
@@ -207,18 +707,96 @@ static const char *const arc_condition_codes[] =
 static int
 get_arc_condition_code (rtx comparison)
 {
-  switch (GET_CODE (comparison))
-    {
-    case EQ : return 2;
-    case NE : return 3;
-    case GT : return 10;
-    case LE : return 11;
-    case GE : return 12;
-    case LT : return 13;
-    case GTU : return 14;
-    case LEU : return 15;
-    case LTU : return 6;
-    case GEU : return 7;
+  switch (GET_MODE (XEXP (comparison, 0)))
+    {
+    case CCmode:
+    case SImode: /* For BRcc.  */
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_EQ;
+	case NE : return ARC_CC_NE;
+	case GT : return ARC_CC_GT;
+	case LE : return ARC_CC_LE;
+	case GE : return ARC_CC_GE;
+	case LT : return ARC_CC_LT;
+	case GTU : return ARC_CC_HI;
+	case LEU : return ARC_CC_LS;
+	case LTU : return ARC_CC_LO;
+	case GEU : return ARC_CC_HS;
+	default : gcc_unreachable ();
+	}
+    case CC_ZNmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_EQ;
+	case NE : return ARC_CC_NE;
+	case GE: return ARC_CC_P;
+	case LT: return ARC_CC_N;
+	case GT : return ARC_CC_PNZ;
+	default : gcc_unreachable ();
+	}
+    case CC_Zmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_EQ;
+	case NE : return ARC_CC_NE;
+	default : gcc_unreachable ();
+	}
+    case CC_Cmode:
+      switch (GET_CODE (comparison))
+	{
+	case LTU : return ARC_CC_C;
+	case GEU : return ARC_CC_NC;
+	default : gcc_unreachable ();
+	}
+    case CC_FP_GTmode:
+      if (TARGET_SPFP)
+	switch (GET_CODE (comparison))
+	  {
+	  case GT  : return ARC_CC_N;
+	  case UNLE: return ARC_CC_P;
+	  default : gcc_unreachable ();
+	}
+      else
+	switch (GET_CODE (comparison))
+	  {
+	  case GT   : return ARC_CC_HI;
+	  case UNLE : return ARC_CC_LS;
+	  default : gcc_unreachable ();
+	}
+    case CC_FP_GEmode:
+      /* Same for FPX and non-FPX.  */
+      switch (GET_CODE (comparison))
+	{
+	case GE   : return ARC_CC_HS;
+	case UNLT : return ARC_CC_LO;
+	default : gcc_unreachable ();
+	}
+    case CC_FP_UNEQmode:
+      switch (GET_CODE (comparison))
+	{
+	case UNEQ : return ARC_CC_EQ;
+	case LTGT : return ARC_CC_NE;
+	default : gcc_unreachable ();
+	}
+    case CC_FP_ORDmode:
+      switch (GET_CODE (comparison))
+	{
+	case UNORDERED : return ARC_CC_C;
+	case ORDERED   : return ARC_CC_NC;
+	default : gcc_unreachable ();
+	}
+    case CC_FPXmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ        : return ARC_CC_EQ;
+	case NE        : return ARC_CC_NE;
+	case UNORDERED : return ARC_CC_C;
+	case ORDERED   : return ARC_CC_NC;
+	case LTGT      : return ARC_CC_HI;
+	case UNEQ      : return ARC_CC_LS;
+	default : gcc_unreachable ();
+	}
     default : gcc_unreachable ();
     }
   /*NOTREACHED*/
@@ -229,35 +807,77 @@ get_arc_condition_code (rtx comparison)
    return the mode to be used for the comparison.  */
 
 enum machine_mode
-arc_select_cc_mode (enum rtx_code op,
-	            rtx x ATTRIBUTE_UNUSED,
-                    rtx y ATTRIBUTE_UNUSED)
+arc_select_cc_mode (enum rtx_code op,	
+		    rtx x ATTRIBUTE_UNUSED,
+		    rtx y ATTRIBUTE_UNUSED)
 {
-  switch (op)
-    {
-    case EQ :
-    case NE :
-      return CCZNmode;
-    default :
-      switch (GET_CODE (x))
-	{
-	case AND :
-	case IOR :
-	case XOR :
-	case SIGN_EXTEND :
-	case ZERO_EXTEND :
-	  return CCZNmode;
-	case ASHIFT :
-	case ASHIFTRT :
-	case LSHIFTRT :
-	  return CCZNCmode;
-	default:
-	  break;
-	}
-    }
+  enum machine_mode mode = GET_MODE (x);
+  rtx x1;
+
+  /* For an operation that sets the condition codes as a side-effect, the
+     C and V flags is not set as for cmp, so we can only use comparisons where
+     this doesn't matter.  (For LT and GE we can use "mi" and "pl"
+     instead.)  */
+  /* ??? We could use "pnz" for greater than zero, however, we could then
+     get into trouble because the comparison could not be reversed.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && y == const0_rtx
+      && (op == EQ || op == NE
+	  || ((op == LT || op == GE) && GET_MODE_SIZE (GET_MODE (x) <= 4))))
+    return CC_ZNmode;
+
+  /* add.f for if (a+b) */
+  if (mode == SImode
+      && GET_CODE (y) == NEG
+      && (op == EQ || op == NE))
+    return CC_ZNmode;
+
+  /* Check if this is a test suitable for bxor.f .  */
+  if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
+      && ((INTVAL (y) - 1) & INTVAL (y)) == 0
+      && INTVAL (y))
+    return CC_Zmode;
+
+  /* Check if this is a test suitable for add / bmsk.f .  */
+  if (mode == SImode && (op == EQ || op == NE) && CONST_INT_P (y)
+      && GET_CODE (x) == AND && CONST_INT_P ((x1 = XEXP (x, 1)))
+      && ((INTVAL (x1) + 1) & INTVAL (x1)) == 0
+      && (~INTVAL (x1) | INTVAL (y)) < 0
+      && (~INTVAL (x1) | INTVAL (y)) > -0x800)
+    return CC_Zmode;
+
+  if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
+      && GET_CODE (x) == PLUS
+      && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
+    return CC_Cmode;
+
+  if ((mode == SFmode && TARGET_SPFP) || (mode == DFmode && TARGET_DPFP))
+    switch (op)
+      {
+      case EQ: case NE: case UNEQ: case LTGT: case ORDERED: case UNORDERED:
+	return CC_FPXmode;
+      case LT: case UNGE: case GT: case UNLE:
+	return CC_FP_GTmode;
+      case LE: case UNGT: case GE: case UNLT:
+	return CC_FP_GEmode;
+      default: gcc_unreachable ();
+      }
+  else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_OPTFPE)
+    switch (op)
+      {
+      case EQ: case NE: return CC_Zmode;
+      case LT: case UNGE:
+      case GT: case UNLE: return CC_FP_GTmode;
+      case LE: case UNGT:
+      case GE: case UNLT: return CC_FP_GEmode;
+      case UNEQ: case LTGT: return CC_FP_UNEQmode;
+      case ORDERED: case UNORDERED: return CC_FP_ORDmode;
+      default: gcc_unreachable ();
+      }
+
   return CCmode;
 }
-
+
 /* Vectors to keep interesting information about registers where it can easily
    be got.  We use to use the actual mode value as the bit number, but there
    is (or may be) more than 32 modes now.  Instead we use two tables: one
@@ -270,7 +890,8 @@ arc_select_cc_mode (enum rtx_code op,
 enum arc_mode_class {
   C_MODE,
   S_MODE, D_MODE, T_MODE, O_MODE,
-  SF_MODE, DF_MODE, TF_MODE, OF_MODE
+  SF_MODE, DF_MODE, TF_MODE, OF_MODE,
+  V_MODE
 };
 
 /* Modes for condition codes.  */
@@ -282,12 +903,18 @@ enum arc_mode_class {
 /* Modes for double-word and smaller quantities.  */
 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << DF_MODE))
 
+/* Mode for 8-byte DF values only */
+#define DF_MODES (1 << DF_MODE)
+
 /* Modes for quad-word and smaller quantities.  */
 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
 
+/* Modes for 128-bit vectors.  */
+#define V_MODES (1 << (int) V_MODE)
+
 /* Value is 1 if register/mode pair is acceptable on arc.  */
 
-const unsigned int arc_hard_regno_mode_ok[] = {
+unsigned int arc_hard_regno_mode_ok[] = {
   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES,
   T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, T_MODES, D_MODES,
@@ -295,9 +922,22 @@ const unsigned int arc_hard_regno_mode_ok[] = {
 
   /* ??? Leave these as S_MODES for now.  */
   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
+  DF_MODES, 0, DF_MODES, 0, S_MODES, S_MODES, S_MODES, S_MODES,
   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES, S_MODES,
+
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+  V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES, V_MODES,
+
   S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES,
-  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, C_MODES
+  S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES, S_MODES
 };
 
 unsigned int arc_mode_class [NUM_MACHINE_MODES];
@@ -340,410 +980,346 @@ arc_init_reg_tables (void)
 	  else 
 	    arc_mode_class[i] = 0;
 	  break;
-	case MODE_CC:
-	  arc_mode_class[i] = 1 << (int) C_MODE;
+	case MODE_VECTOR_INT:
+	  arc_mode_class [i] = (1<< (int) V_MODE);
 	  break;
+	case MODE_CC:
 	default:
-	  arc_mode_class[i] = 0;
+	  /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
+	     we must explicitly check for them here.  */
+	  if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode
+	      || i == (int) CC_Cmode
+	      || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode)
+	    arc_mode_class[i] = 1 << (int) C_MODE;
+	  else
+	    arc_mode_class[i] = 0;
 	  break;
 	}
     }
-
-  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
-    {
-      if (i < 60)
-	arc_regno_reg_class[i] = GENERAL_REGS;
-      else if (i == 60)
-	arc_regno_reg_class[i] = LPCOUNT_REG;
-      else if (i == 61)
-	arc_regno_reg_class[i] = NO_REGS /* CC_REG: must be NO_REGS */;
-      else
-	arc_regno_reg_class[i] = NO_REGS;
-    }
 }
-
-/* ARC specific attribute support.
 
-   The ARC has these attributes:
-   interrupt - for interrupt functions
-*/
-
-const struct attribute_spec arc_attribute_table[] =
-{
-  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
-  { "interrupt", 1, 1, true,  false, false, arc_handle_interrupt_attribute },
-  { NULL,        0, 0, false, false, false, NULL }
-};
+/* Core registers 56..59 are used for multiply extension options.
+   The dsp option uses r56 and r57, these are then named acc1 and acc2.
+   acc1 is the highpart, and acc2 the lowpart, so which register gets which
+   number depends on endianness.
+   The mul64 multiplier options use r57 for mlo, r58 for mmid and r59 for mhi.
+   Because mlo / mhi form a 64 bit value, we use different gcc internal
+   register numbers to make them form a register pair as the gcc internals
+   know it.  mmid gets number 57, if still available, and mlo / mhi get
+   number 58 and 59, depending on endianness.  We use DBX_REGISTER_NUMBER
+   to map this back.  */
+  char rname56[5] = "r56";
+  char rname57[5] = "r57";
+  char rname58[5] = "r58";
+  char rname59[5] = "r59";
 
-/* Handle an "interrupt" attribute; arguments as in
-   struct attribute_spec.handler.  */
-static tree
-arc_handle_interrupt_attribute (tree *node ATTRIBUTE_UNUSED,
-                                tree name,
-                                tree args,
-                                int flags ATTRIBUTE_UNUSED,
-                                bool *no_add_attrs)
+void
+arc_conditional_register_usage (void)
 {
-  tree value = TREE_VALUE (args);
+  int regno;
+  int i;
+  int fix_start = 60, fix_end = 55;
 
-  if (TREE_CODE (value) != STRING_CST)
+  if (TARGET_MUL64_SET)
+    {
+      fix_start = 57;
+      fix_end = 59;
+
+      /* We don't provide a name for mmed.  In rtl / assembly resource lists,
+	 you are supposed to refer to it as mlo & mhi, e.g
+	 (zero_extract:SI (reg:DI 58) (const_int 32) (16)) .
+	 In an actual asm instruction, you are of course use mmed.
+	 The point of avoiding having a separate register for mmed is that
+	 this way, we don't have to carry clobbers of that reg around in every
+	 isntruction that modifies mlo and/or mhi.  */
+      strcpy (rname57, "");
+      strcpy (rname58, TARGET_BIG_ENDIAN ? "mhi" : "mlo");
+      strcpy (rname59, TARGET_BIG_ENDIAN ? "mlo" : "mhi");
+    }
+  if (TARGET_MULMAC_32BY16_SET)
+    {
+      fix_start = 56;
+      fix_end = fix_end > 57 ? fix_end : 57;
+      strcpy (rname56, TARGET_BIG_ENDIAN ? "acc1" : "acc2");
+      strcpy (rname57, TARGET_BIG_ENDIAN ? "acc2" : "acc1");
+    }
+  for (regno = fix_start; regno <= fix_end; regno++)
     {
-      warning (OPT_Wattributes,
-	       "argument of %qs attribute is not a string constant",
-	       IDENTIFIER_POINTER (name));
-      *no_add_attrs = true;
+      if (!fixed_regs[regno])
+	warning (0, "multiply option implies r%d is fixed", regno);
+      fixed_regs [regno] = call_used_regs[regno] = 1;
     }
-  else if (strcmp (TREE_STRING_POINTER (value), "ilink1")
-	   && strcmp (TREE_STRING_POINTER (value), "ilink2"))
+  if (TARGET_Q_CLASS)
     {
-      warning (OPT_Wattributes,
-	       "argument of %qs attribute is not \"ilink1\" or \"ilink2\"",
-	       IDENTIFIER_POINTER (name));
-      *no_add_attrs = true;
+      reg_alloc_order[2] = 12;
+      reg_alloc_order[3] = 13;
+      reg_alloc_order[4] = 14;
+      reg_alloc_order[5] = 15;
+      reg_alloc_order[6] = 1;
+      reg_alloc_order[7] = 0;
+      reg_alloc_order[8] = 4;
+      reg_alloc_order[9] = 5;
+      reg_alloc_order[10] = 6;
+      reg_alloc_order[11] = 7;
+      reg_alloc_order[12] = 8;
+      reg_alloc_order[13] = 9;
+      reg_alloc_order[14] = 10;
+      reg_alloc_order[15] = 11;
+    }
+    if (TARGET_SIMD_SET)
+    {
+      int i;
+      for (i=64; i<88; i++)
+	reg_alloc_order [i] = i;
+    }
+  /* For Arctangent-A5 / ARC600, lp_count may not be read in an instruction
+     following immediately after another one setting it to a new value.
+     There was some discussion on how to enforce scheduling constraints for
+     processors with missing interlocks on the gcc mailing list:
+     http://gcc.gnu.org/ml/gcc/2008-05/msg00021.html .
+     However, we can't actually use this approach, because for ARC the
+     delay slot scheduling pass is active, which runs after
+     machine_dependent_reorg.  */
+  if (TARGET_ARC600)
+    CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], LP_COUNT);
+  else if (!TARGET_ARC700)
+    fixed_regs[LP_COUNT] = 1;
+  for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!call_used_regs[regno])
+      CLEAR_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
+  for (regno = 32; regno < 60; regno++)
+    if (!fixed_regs[regno])
+      SET_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS], regno);
+  if (TARGET_ARC700)
+    {
+      for (regno = 32; regno <= 60; regno++)
+	CLEAR_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], regno);
+      arc_hard_regno_mode_ok[60] = 1 << (int) S_MODE;
     }
 
-  return NULL_TREE;
-}
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    {
+      if (i < 29)
+        {
+          if (TARGET_Q_CLASS && ((i <= 3) || ((i >= 12) && (i <= 15))))
+            arc_regno_reg_class[i] = ARCOMPACT16_REGS;
+          else
+            arc_regno_reg_class[i] = GENERAL_REGS;
+        }
+      else if (i < 60)
+	arc_regno_reg_class[i]
+	  = (fixed_regs[i]
+	      ? (TEST_HARD_REG_BIT (reg_class_contents[CHEAP_CORE_REGS], i)
+		 ? CHEAP_CORE_REGS : ALL_CORE_REGS)
+	      : WRITABLE_CORE_REGS);
+      else
+        {
+          arc_regno_reg_class[i] = NO_REGS;
+        } /* if */
+    }
 
-
-/* Acceptable arguments to the call insn.  */
+    /* ARCOMPACT16_REGS is empty, if TARGET_Q_CLASS has not been activated.  */
+      if (!TARGET_Q_CLASS)
+      {
+	CLEAR_HARD_REG_SET(reg_class_contents [ARCOMPACT16_REGS]);
+	CLEAR_HARD_REG_SET(reg_class_contents [AC16_BASE_REGS]);
+      }
 
-int
-call_address_operand (rtx op, enum machine_mode mode)
-{
-  return (symbolic_operand (op, mode)
-	  || (GET_CODE (op) == CONST_INT && LEGITIMATE_CONSTANT_P (op))
-	  || (GET_CODE (op) == REG));
-}
+    gcc_assert (FIRST_PSEUDO_REGISTER >= 144);
 
-int
-call_operand (rtx op, enum machine_mode mode)
-{
-  if (GET_CODE (op) != MEM)
-    return 0;
-  op = XEXP (op, 0);
-  return call_address_operand (op, mode);
-}
+    /* Handle Special Registers */
+    arc_regno_reg_class[29] = LINK_REGS; /* ilink1 register */
+    arc_regno_reg_class[30] = LINK_REGS; /* ilink2 register */
+    arc_regno_reg_class[31] = LINK_REGS; /* blink register */
+    arc_regno_reg_class[60] = LPCOUNT_REG;
+    arc_regno_reg_class[61] = NO_REGS;      /* CC_REG: must be NO_REGS */
+    arc_regno_reg_class[62] = GENERAL_REGS;
 
-/* Returns 1 if OP is a symbol reference.  */
+    if (TARGET_DPFP)
+      {
+	arc_regno_reg_class[40] = DOUBLE_REGS;
+	arc_regno_reg_class[41] = DOUBLE_REGS;
+	arc_regno_reg_class[42] = DOUBLE_REGS;
+	arc_regno_reg_class[43] = DOUBLE_REGS;
+      }
+    else
+      {
+	/* Disable all DOUBLE_REGISTER settings,
+	   if not generating DPFP code */
+	arc_regno_reg_class[40] = ALL_REGS;
+	arc_regno_reg_class[41] = ALL_REGS;
+	arc_regno_reg_class[42] = ALL_REGS;
+	arc_regno_reg_class[43] = ALL_REGS;
 
-int
-symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
-{
-  switch (GET_CODE (op))
-    {
-    case SYMBOL_REF:
-    case LABEL_REF:
-    case CONST :
-      return 1;
-    default:
-      return 0;
-    }
-}
+	arc_hard_regno_mode_ok[40] = 0;
+	arc_hard_regno_mode_ok[42] = 0;
 
-/* Return truth value of statement that OP is a symbolic memory
-   operand of mode MODE.  */
+	CLEAR_HARD_REG_SET(reg_class_contents [DOUBLE_REGS]);
+      }
 
-int
-symbolic_memory_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
-{
-  if (GET_CODE (op) == SUBREG)
-    op = SUBREG_REG (op);
-  if (GET_CODE (op) != MEM)
-    return 0;
-  op = XEXP (op, 0);
-  return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
-	  || GET_CODE (op) == LABEL_REF);
-}
+    if (TARGET_SIMD_SET)
+      {
+	gcc_assert (ARC_FIRST_SIMD_VR_REG == 64);
+	gcc_assert (ARC_LAST_SIMD_VR_REG  == 127);
 
-/* Return true if OP is a short immediate (shimm) value.  */
+	for (i = ARC_FIRST_SIMD_VR_REG; i <= ARC_LAST_SIMD_VR_REG; i++)
+	  arc_regno_reg_class [i] =  SIMD_VR_REGS;
 
-int
-short_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
-{
-  if (GET_CODE (op) != CONST_INT)
-    return 0;
-  return SMALL_INT (INTVAL (op));
-}
+	gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_REG == 128);
+	gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_IN_REG == 128);
+	gcc_assert (ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG == 136);
+	gcc_assert (ARC_LAST_SIMD_DMA_CONFIG_REG  == 143);
 
-/* Return true if OP will require a long immediate (limm) value.
-   This is currently only used when calculating length attributes.  */
+	for (i = ARC_FIRST_SIMD_DMA_CONFIG_REG; i <= ARC_LAST_SIMD_DMA_CONFIG_REG; i++)
+	  arc_regno_reg_class [i] =  SIMD_DMA_CONFIG_REGS;
+      }
 
-int
-long_immediate_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
-{
-  switch (GET_CODE (op))
-    {
-    case SYMBOL_REF :
-    case LABEL_REF :
-    case CONST :
-      return 1;
-    case CONST_INT :
-      return !SMALL_INT (INTVAL (op));
-    case CONST_DOUBLE :
-      /* These can happen because large unsigned 32-bit constants are
-	 represented this way (the multiplication patterns can cause these
-	 to be generated).  They also occur for SFmode values.  */
-      return 1;
-    default:
-      break;
-    }
-  return 0;
+    /* pc : r63 */
+    arc_regno_reg_class[PROGRAM_COUNTER_REGNO] = GENERAL_REGS;
 }
 
-/* Return true if OP is a MEM that when used as a load or store address will
-   require an 8 byte insn.
-   Load and store instructions don't allow the same possibilities but they're
-   similar enough that this one function will do.
-   This is currently only used when calculating length attributes.  */
+/* ARC specific attribute support.
+
+   The ARC has these attributes:
+   interrupt - for interrupt functions
+*/
+
+/* Return nonzero if IDENTIFIER is a valid decl attribute.  */
 
 int
-long_immediate_loadstore_operand (rtx op,
-                                  enum machine_mode mode ATTRIBUTE_UNUSED)
+arc_valid_machine_decl_attribute (tree type ATTRIBUTE_UNUSED,
+				  tree attributes ATTRIBUTE_UNUSED,
+				  tree identifier ATTRIBUTE_UNUSED,
+				  tree args ATTRIBUTE_UNUSED)
 {
-  if (GET_CODE (op) != MEM)
-    return 0;
-
-  op = XEXP (op, 0);
-  switch (GET_CODE (op))
+  if (identifier == get_identifier ("__nterrupt__")
+      && list_length (args) == 1
+      && TREE_CODE (TREE_VALUE (args)) == STRING_CST)
     {
-    case SYMBOL_REF :
-    case LABEL_REF :
-    case CONST :
-      return 1;
-    case CONST_INT :
-      /* This must be handled as "st c,[limm]".  Ditto for load.
-	 Technically, the assembler could translate some possibilities to
-	 "st c,[limm/2 + limm/2]" if limm/2 will fit in a shimm, but we don't
-	 assume that it does.  */
-      return 1;
-    case CONST_DOUBLE :
-      /* These can happen because large unsigned 32-bit constants are
-	 represented this way (the multiplication patterns can cause these
-	 to be generated).  They also occur for SFmode values.  */
-      return 1;
-    case REG :
-      return 0;
-    case PLUS :
-      if (GET_CODE (XEXP (op, 1)) == CONST_INT
-	  && !SMALL_INT (INTVAL (XEXP (op, 1))))
+      tree value = TREE_VALUE (args);
+
+      if (!strcmp (TREE_STRING_POINTER (value), "ilink1")
+	   || !strcmp (TREE_STRING_POINTER (value), "ilink2"))
 	return 1;
-      return 0;
-    default:
-      break;
     }
   return 0;
 }
 
-/* Return true if OP is an acceptable argument for a single word
-   move source.  */
+/* Return zero if TYPE1 and TYPE are incompatible, one if they are compatible,
+   and two if they are nearly compatible (which causes a warning to be
+   generated).  */
 
-int
-move_src_operand (rtx op, enum machine_mode mode)
+static int
+arc_comp_type_attributes (const_tree type1,
+			  const_tree type2)
 {
-  switch (GET_CODE (op))
-    {
-    case SYMBOL_REF :
-    case LABEL_REF :
-    case CONST :
-      return 1;
-    case CONST_INT :
-      return (LARGE_INT (INTVAL (op)));
-    case CONST_DOUBLE :
-      /* We can handle DImode integer constants in SImode if the value
-	 (signed or unsigned) will fit in 32 bits.  This is needed because
-	 large unsigned 32-bit constants are represented as CONST_DOUBLEs.  */
-      if (mode == SImode)
-	return arc_double_limm_p (op);
-      /* We can handle 32-bit floating point constants.  */
-      if (mode == SFmode)
-	return GET_MODE (op) == SFmode;
-      return 0;
-    case REG :
-      return register_operand (op, mode);
-    case SUBREG :
-      /* (subreg (mem ...) ...) can occur here if the inner part was once a
-	 pseudo-reg and is now a stack slot.  */
-      if (GET_CODE (SUBREG_REG (op)) == MEM)
-	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
-      else
-	return register_operand (op, mode);
-    case MEM :
-      return address_operand (XEXP (op, 0), mode);
-    default :
-      return 0;
-    }
-}
+  int l1, l2, s1, s2;
+  
+  /* Check for mismatch of non-default calling convention.  */
+  if (TREE_CODE (type1) != FUNCTION_TYPE)
+    return 1;
 
-/* Return true if OP is an acceptable argument for a double word
-   move source.  */
+  /* Check for mismatched call attributes.  */
+  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
+  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
+  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
 
-int
-move_double_src_operand (rtx op, enum machine_mode mode)
-{
-  switch (GET_CODE (op))
+  /* Only bother to check if an attribute is defined.  */
+  if (l1 | l2 | s1 | s2)
     {
-    case REG :
-      return register_operand (op, mode);
-    case SUBREG :
-      /* (subreg (mem ...) ...) can occur here if the inner part was once a
-	 pseudo-reg and is now a stack slot.  */
-      if (GET_CODE (SUBREG_REG (op)) == MEM)
-	return move_double_src_operand (SUBREG_REG (op), mode);
-      else
-	return register_operand (op, mode);
-    case MEM :
-      /* Disallow auto inc/dec for now.  */
-      if (GET_CODE (XEXP (op, 0)) == PRE_DEC
-	  || GET_CODE (XEXP (op, 0)) == PRE_INC)
+      /* If one type has an attribute, the other must have the same attribute.  */
+      if ((l1 != l2) || (s1 != s2))
 	return 0;
-      return address_operand (XEXP (op, 0), mode);
-    case CONST_INT :
-    case CONST_DOUBLE :
-      return 1;
-    default :
-      return 0;
-    }
-}
 
-/* Return true if OP is an acceptable argument for a move destination.  */
-
-int
-move_dest_operand (rtx op, enum machine_mode mode)
-{
-  switch (GET_CODE (op))
-    {
-    case REG :
-      return register_operand (op, mode);
-    case SUBREG :
-      /* (subreg (mem ...) ...) can occur here if the inner part was once a
-	 pseudo-reg and is now a stack slot.  */
-      if (GET_CODE (SUBREG_REG (op)) == MEM)
-	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
-      else
-	return register_operand (op, mode);
-    case MEM :
-      return address_operand (XEXP (op, 0), mode);
-    default :
-      return 0;
+      /* Disallow mixed attributes.  */
+      if ((l1 & s2) || (l2 & s1))
+	return 0;
     }
-}
-
-/* Return true if OP is valid load with update operand.  */
+  
 
-int
-load_update_operand (rtx op, enum machine_mode mode)
-{
-  if (GET_CODE (op) != MEM
-      || GET_MODE (op) != mode)
-    return 0;
-  op = XEXP (op, 0);
-  if (GET_CODE (op) != PLUS
-      || GET_MODE (op) != Pmode
-      || !register_operand (XEXP (op, 0), Pmode)
-      || !nonmemory_operand (XEXP (op, 1), Pmode))
-    return 0;
   return 1;
 }
 
-/* Return true if OP is valid store with update operand.  */
-
-int
-store_update_operand (rtx op, enum machine_mode mode)
-{
-  if (GET_CODE (op) != MEM
-      || GET_MODE (op) != mode)
-    return 0;
-  op = XEXP (op, 0);
-  if (GET_CODE (op) != PLUS
-      || GET_MODE (op) != Pmode
-      || !register_operand (XEXP (op, 0), Pmode)
-      || !(GET_CODE (XEXP (op, 1)) == CONST_INT
-	   && SMALL_INT (INTVAL (XEXP (op, 1)))))
-    return 0;
-  return 1;
-}
+/* Set the default attributes for TYPE.  */
 
-/* Return true if OP is a non-volatile non-immediate operand.
-   Volatile memory refs require a special "cache-bypass" instruction
-   and only the standard movXX patterns are set up to handle them.  */
-
-int
-nonvol_nonimm_operand (rtx op, enum machine_mode mode)
-{
-  if (GET_CODE (op) == MEM && MEM_VOLATILE_P (op))
-    return 0;
-  return nonimmediate_operand (op, mode);
-}
-
-/* Accept integer operands in the range -0x80000000..0x7fffffff.  We have
-   to check the range carefully since this predicate is used in DImode
-   contexts.  */
-
-int
-const_sint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
-{
-  /* All allowed constants will fit a CONST_INT.  */
-  return (GET_CODE (op) == CONST_INT
-	  && (INTVAL (op) >= (-0x7fffffff - 1) && INTVAL (op) <= 0x7fffffff));
-}
-
-/* Accept integer operands in the range 0..0xffffffff.  We have to check the
-   range carefully since this predicate is used in DImode contexts.  Also, we
-   need some extra crud to make it work when hosted on 64-bit machines.  */
-
-int
-const_uint32_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+void
+arc_set_default_type_attributes (tree type ATTRIBUTE_UNUSED)
 {
-#if HOST_BITS_PER_WIDE_INT > 32
-  /* All allowed constants will fit a CONST_INT.  */
-  return (GET_CODE (op) == CONST_INT
-	  && (INTVAL (op) >= 0 && INTVAL (op) <= 0xffffffffL));
-#else
-  return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0)
-	  || (GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_HIGH (op) == 0));
-#endif
+  gcc_unreachable();
 }
 
-/* Return 1 if OP is a comparison operator valid for the mode of CC.
-   This allows the use of MATCH_OPERATOR to recognize all the branch insns.
-
-   Some insns only set a few bits in the condition code.  So only allow those
-   comparisons that use the bits that are valid.  */
-
-int
-proper_comparison_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
-{
-  enum rtx_code code;
-  if (!COMPARISON_P (op))
-    return 0;
-
-  code = GET_CODE (op);
-  if (GET_MODE (XEXP (op, 0)) == CCZNmode)
-    return (code == EQ || code == NE);
-  if (GET_MODE (XEXP (op, 0)) == CCZNCmode)
-    return (code == EQ || code == NE
-	    || code == LTU || code == GEU || code == GTU || code == LEU);
-  return 1;
-}
-
 /* Misc. utilities.  */
 
 /* X and Y are two things to compare using CODE.  Emit the compare insn and
    return the rtx for the cc reg in the proper mode.  */
 
 rtx
-gen_compare_reg (enum rtx_code code, rtx x, rtx y)
+gen_compare_reg (enum rtx_code code, enum machine_mode omode)
 {
+  rtx x = arc_compare_op0, y = arc_compare_op1;
   enum machine_mode mode = SELECT_CC_MODE (code, x, y);
+  enum machine_mode cmode = GET_MODE (x);
   rtx cc_reg;
 
   cc_reg = gen_rtx_REG (mode, 61);
 
-  emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
-			  gen_rtx_COMPARE (mode, x, y)));
+  if ((cmode == SFmode && TARGET_SPFP) || (cmode == DFmode && TARGET_DPFP))
+    {
+      switch (code)
+	{
+	case NE: case EQ: case LT: case UNGE: case LE: case UNGT:
+	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
+	  break;
+	case GT: case UNLE: case GE: case UNLT:
+	  code = swap_condition (code);
+	  x = arc_compare_op1;
+	  y = arc_compare_op0;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      emit_insn ((cmode == SFmode ? gen_cmpsfpx_raw : gen_cmpdfpx_raw) (x, y));
+      if (mode != CC_FPXmode)
+	emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
+				gen_rtx_COMPARE (mode,
+						 gen_rtx_REG (CC_FPXmode, 61),
+						 const0_rtx)));
+    }
+  else if (GET_MODE_CLASS (cmode) == MODE_FLOAT && TARGET_OPTFPE)
+    {
+      rtx op0 = gen_rtx_REG (cmode, 0);
+      rtx op1 = gen_rtx_REG (cmode, GET_MODE_SIZE (cmode) / UNITS_PER_WORD);
 
-  return cc_reg;
+      switch (code)
+	{
+	case NE: case EQ: case GT: case UNLE: case GE: case UNLT:
+	case UNEQ: case LTGT: case ORDERED: case UNORDERED:
+	  break;
+	case LT: case UNGE: case LE: case UNGT:
+	  code = swap_condition (code);
+	  x = arc_compare_op1;
+	  y = arc_compare_op0;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      if (currently_expanding_to_rtl)
+	{
+	  emit_move_insn (op0, x);
+	  emit_move_insn (op1, y);
+	}
+      else
+	{
+	  gcc_assert (rtx_equal_p (op0, x));
+	  gcc_assert (rtx_equal_p (op1, y));
+	}
+      emit_insn (gen_cmp_float (cc_reg, gen_rtx_COMPARE (mode, op0, op1)));
+    }
+  else
+    emit_insn (gen_rtx_SET (omode, cc_reg,
+			    gen_rtx_COMPARE (mode, x, y)));
+  return gen_rtx_fmt_ee (code, omode, cc_reg, const0_rtx);
 }
 
 /* Return 1 if VALUE, a const_double, will fit in a limm (4 byte number).
@@ -756,6 +1332,9 @@ arc_double_limm_p (rtx value)
 
   gcc_assert (GET_CODE (value) == CONST_DOUBLE);
 
+  if(TARGET_DPFP) 
+    return 1;
+
   low = CONST_DOUBLE_LOW (value);
   high = CONST_DOUBLE_HIGH (value);
 
@@ -771,148 +1350,106 @@ arc_double_limm_p (rtx value)
       return (unsigned HOST_WIDE_INT) low <= 0x7fffffff && high == 0;
     }
 }
-
+
 /* Do any needed setup for a variadic function.  For the ARC, we must
    create a register parameter block, and then copy any anonymous arguments
    in registers to memory.
 
    CUM has not been updated for the last named argument which has type TYPE
-   and mode MODE, and we rely on this fact.
-
-   We do things a little weird here.  We're supposed to only allocate space
-   for the anonymous arguments.  However we need to keep the stack eight byte
-   aligned.  So we round the space up if necessary, and leave it to va_start
-   to compensate.  */
-
-static void
-arc_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
-                            enum machine_mode mode,
-                            tree type ATTRIBUTE_UNUSED,
-                            int *pretend_size,
-                            int no_rtl)
+   and mode MODE, and we rely on this fact.  */
+void
+arc_setup_incoming_varargs (CUMULATIVE_ARGS *args_so_far,
+			    enum machine_mode mode,
+			    tree type ATTRIBUTE_UNUSED,
+			    int *pretend_size,
+			    int no_rtl)
 {
   int first_anon_arg;
+  CUMULATIVE_ARGS next_cum;
 
-  /* All BLKmode values are passed by reference.  */
-  gcc_assert (mode != BLKmode);
-
-  first_anon_arg = *cum + ((GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1)
-			   / UNITS_PER_WORD);
+  /* We must treat `__builtin_va_alist' as an anonymous arg.  */
+  
+  next_cum = *args_so_far;
+  arc_function_arg_advance (&next_cum, mode, type, 1);
+  first_anon_arg = next_cum;
 
-  if (first_anon_arg < MAX_ARC_PARM_REGS && !no_rtl)
+  if (first_anon_arg < MAX_ARC_PARM_REGS)
     {
+      /* First anonymous (unnamed) argument is in a reg */
+
       /* Note that first_reg_offset < MAX_ARC_PARM_REGS.  */
       int first_reg_offset = first_anon_arg;
-      /* Size in words to "pretend" allocate.  */
-      int size = MAX_ARC_PARM_REGS - first_reg_offset;
-      /* Extra slop to keep stack eight byte aligned.  */
-      int align_slop = size & 1;
-      rtx regblock;
-
-      regblock = gen_rtx_MEM (BLKmode,
-			      plus_constant (arg_pointer_rtx,
-					     FIRST_PARM_OFFSET (0)
-					     + align_slop * UNITS_PER_WORD));
-      set_mem_alias_set (regblock, get_varargs_alias_set ());
-      set_mem_align (regblock, BITS_PER_WORD);
-      move_block_from_reg (first_reg_offset, regblock,
-			   MAX_ARC_PARM_REGS - first_reg_offset);
-
-      *pretend_size = ((MAX_ARC_PARM_REGS - first_reg_offset + align_slop)
-		       * UNITS_PER_WORD);
-    }
-}
-
-/* Cost functions.  */
 
-/* Compute a (partial) cost for rtx X.  Return true if the complete
-   cost has been computed, and false if subexpressions should be
-   scanned.  In either case, *TOTAL contains the cost result.  */
-
-static bool
-arc_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
-	       bool speed ATTRIBUTE_UNUSED)
-{
-  switch (code)
-    {
-      /* Small integers are as cheap as registers.  4 byte values can
-	 be fetched as immediate constants - let's give that the cost
-	 of an extra insn.  */
-    case CONST_INT:
-      if (SMALL_INT (INTVAL (x)))
+      if (!no_rtl)
 	{
-	  *total = 0;
-	  return true;
+	  rtx regblock
+	    = gen_rtx_MEM (BLKmode, plus_constant (arg_pointer_rtx,
+			   FIRST_PARM_OFFSET (0)));
+	  move_block_from_reg (first_reg_offset, regblock,
+			       MAX_ARC_PARM_REGS - first_reg_offset);
 	}
-      /* FALLTHRU */
 
-    case CONST:
-    case LABEL_REF:
-    case SYMBOL_REF:
-      *total = COSTS_N_INSNS (1);
-      return true;
-
-    case CONST_DOUBLE:
-      {
-        rtx high, low;
-        split_double (x, &high, &low);
-	*total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high))
-				+ !SMALL_INT (INTVAL (low)));
-	return true;
-      }
-
-    /* Encourage synth_mult to find a synthetic multiply when reasonable.
-       If we need more than 12 insns to do a multiply, then go out-of-line,
-       since the call overhead will be < 10% of the cost of the multiply.  */
-    case ASHIFT:
-    case ASHIFTRT:
-    case LSHIFTRT:
-      if (TARGET_SHIFTER)
-        *total = COSTS_N_INSNS (1);
-      else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
-        *total = COSTS_N_INSNS (16);
-      else
-        *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
-      return false;
-
-    default:
-      return false;
+      *pretend_size
+	= ((MAX_ARC_PARM_REGS - first_reg_offset ) * UNITS_PER_WORD);
     }
 }
 
+/* Cost functions.  */
 
 /* Provide the costs of an addressing mode that contains ADDR.
    If ADDR is not a valid address, its cost is irrelevant.  */
 
-static int
-arc_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
+int
+arc_address_cost (rtx addr, bool speed)
 {
   switch (GET_CODE (addr))
     {
     case REG :
-      return 1;
+      return speed || satisfies_constraint_Rcq (addr) ? 0 : 1;
+    case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC:
+    case PRE_MODIFY: case POST_MODIFY:
+      return !speed;
 
     case LABEL_REF :
     case SYMBOL_REF :
     case CONST :
-      return 2;
+      /* Most likely needs a LIMM.  */
+      return COSTS_N_INSNS (1);
 
     case PLUS :
       {
 	register rtx plus0 = XEXP (addr, 0);
 	register rtx plus1 = XEXP (addr, 1);
 
-	if (GET_CODE (plus0) != REG)
+	if (GET_CODE (plus0) != REG
+	    && (GET_CODE (plus0) != MULT
+		|| !CONST_INT_P (XEXP (plus0, 1))
+		|| (INTVAL (XEXP (plus0, 1)) != 2
+		    && INTVAL (XEXP (plus0, 1)) != 4)))
 	  break;
 
 	switch (GET_CODE (plus1))
 	  {
 	  case CONST_INT :
-	    return SMALL_INT (plus1) ? 1 : 2;
+	    return (TARGET_A4
+		    ? (SMALL_INT (plus1) ? 1 : 2)
+		    : !RTX_OK_FOR_OFFSET_P (SImode, plus1)
+		    ? COSTS_N_INSNS (1)
+		    : speed
+		    ? 0
+                    : (satisfies_constraint_Rcq (plus0)
+		       && satisfies_constraint_O (plus1))
+		    ? 0
+		    : 1);
+	  case REG:
+	    return (speed < 1 ? 0
+		    : (satisfies_constraint_Rcq (plus0)
+		       && satisfies_constraint_Rcq (plus1))
+		    ? 0 : 1);
 	  case CONST :
 	  case SYMBOL_REF :
 	  case LABEL_REF :
-	    return 2;
+	    return COSTS_N_INSNS (1);
 	  default:
 	    break;
 	  }
@@ -924,10 +1461,90 @@ arc_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
 
   return 4;
 }
-
+
+/* Emit instruction X with the frame related bit set.  */
+static rtx
+frame_insn (rtx x)
+{
+  x = emit_insn (x);
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Emit a frame insn to move SRC to DST.  */
+static rtx
+frame_move (rtx dst, rtx src)
+{
+  return frame_insn (gen_rtx_SET (VOIDmode, dst, src));
+}
+
+/* Like frame_move, but add a REG_INC note for REG if ADDR contains an
+   auto increment address, or is zero.  */
+static rtx
+frame_move_inc (rtx dst, rtx src, rtx reg, rtx addr)
+{
+  rtx insn = frame_move (dst, src);
+
+  if (!addr
+      || GET_CODE (addr) == PRE_DEC || GET_CODE (addr) == POST_INC
+      || GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
+    REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, reg, 0);
+  return insn;
+}
+
+/* Emit a frame insn which adjusts a frame address register REG by OFFSET.  */
+static rtx
+frame_add (rtx reg, HOST_WIDE_INT offset)
+{
+  gcc_assert ((offset & 0x3) == 0);
+  if (!offset)
+    return NULL_RTX;
+  return frame_move (reg, plus_constant (reg, offset));
+}
+
+/* Emit a frame insn which adjusts stack pointer by OFFSET.  */
+static rtx
+frame_stack_add (HOST_WIDE_INT offset)
+{
+  return frame_add (stack_pointer_rtx, offset);
+}
+
+/* Traditionally, we push saved registers first in the prologue,
+   then we allocate the rest of the frame - and reverse in the epilogue.
+   This has still its merits for ease of debugging, or saving code size
+   or even execution time if the stack frame is so large that some accesses
+   can't be encoded anymore with offsets in the instruction code when using
+   a different scheme.
+   Also, it would be a good starting point if we got instructions to help
+   with register save/restore.
+
+   However, often stack frames are small, and the pushing / popping has
+   some costs:
+   - the stack modification prevents a lot of scheduling.
+   - frame allocation / deallocation needs extra instructions.
+   - unless we know that we compile ARC700 user code, we need to put
+     a memory barrier after frame allocation / before deallocation to
+     prevent interrupts clobbering our data in the frame.
+     In particular, we don't have any such guarantees for library functions,
+     which tend to, on the other hand, to have small frames.
+
+   Thus, for small frames, we'd like to use a different scheme:
+   - The frame is allocated in full with the first prologue instruction,
+     and deallocated in full with the last epilogue instruction.
+     Thus, the instructions in-betwen can be freely scheduled.
+   - If the function has no outgoing arguments on the stack, we can allocate
+     one register save slot at the top of the stack.  This register can then
+     be saved simultanously with frame allocation, and restored with
+     frame deallocation.
+     This register can be picked depending on scheduling considerations,
+     although same though should go into having some set of registers
+     to be potentially lingering after a call, and others to be available
+     immediately - i.e. in the absence of interprocedual optimization, we
+     can use an ABI-like convention for register allocation to reduce
+     stalls after function return.  */
 /* Function prologue/epilogue handlers.  */
 
-/* ARC stack frames look like:
+/* ARCtangent-A4 stack frames look like:
 
              Before call                       After call
         +-----------------------+       +-----------------------+
@@ -968,6 +1585,55 @@ arc_address_cost (rtx addr, bool speed ATTRIBUTE_UNUSED)
    memory                               | return addr, prev %fp |    
                                   SP+0->+-----------------------+    
 
+ARCompact stack frames look like:
+
+           Before call                     After call
+  high  +-----------------------+       +-----------------------+
+  mem   |  reg parm save area   |       | reg parm save area    |
+        |  only created for     |       | only created for      |
+        |  variable arg fns     |       | variable arg fns      |
+    AP  +-----------------------+       +-----------------------+
+        |  return addr register |       | return addr register  |
+        |  (if required)        |       | (if required)         |
+        +-----------------------+       +-----------------------+
+        |                       |       |                       |
+        |  reg save area        |       | reg save area         |
+        |                       |       |                       |
+        +-----------------------+       +-----------------------+
+        |  frame pointer        |       | frame pointer         |
+        |  (if required)        |       | (if required)         |
+    FP  +-----------------------+       +-----------------------+
+        |                       |       |                       |
+        |  local/temp variables |       | local/temp variables  |
+        |                       |       |                       |
+        +-----------------------+       +-----------------------+    
+        |                       |       |                       |    
+        |  arguments on stack   |       | arguments on stack    |    
+        |                       |       |                       |    
+    SP  +-----------------------+       +-----------------------+    
+                                        | reg parm save area    |
+                                        | only created for      |
+                                        | variable arg fns      |
+                                    AP  +-----------------------+
+                                        | return addr register  |
+                                        | (if required)         |
+                                        +-----------------------+
+                                        |                       |
+                                        | reg save area         |
+                                        |                       |
+                                        +-----------------------+
+                                        | frame pointer         |
+                                        | (if required)         |
+                                    FP  +-----------------------+
+                                        |                       |
+                                        | local/temp variables  |
+                                        |                       |
+                                        +-----------------------+    
+                                        |                       |    
+                                        | arguments on stack    |    
+  low                                   |                       |    
+  mem                               SP  +-----------------------+    
+
 Notes:
 1) The "reg parm save area" does not exist for non variable argument fns.
    The "reg parm save area" can be eliminated completely if we created our
@@ -975,7 +1641,7 @@ Notes:
 
 /* Structure to be filled in by arc_compute_frame_size with register
    save masks, and offsets for the current function.  */
-struct arc_frame_info
+struct arc_frame_info GTY (())
 {
   unsigned int total_size;	/* # bytes that the entire frame takes up.  */
   unsigned int extra_size;	/* # bytes of extra stuff.  */
@@ -986,13 +1652,27 @@ struct arc_frame_info
   unsigned int reg_offset;	/* Offset from new sp to store regs.  */
   unsigned int gmask;		/* Mask of saved gp registers.  */
   int          initialized;	/* Nonzero if frame size already calculated.  */
+  short millicode_start_reg;
+  short millicode_end_reg;
+  bool save_return_addr;
 };
 
-/* Current frame information calculated by arc_compute_frame_size.  */
-static struct arc_frame_info current_frame_info;
+/* Defining data structures for per-function information */
 
-/* Zero structure to initialize current_frame_info.  */
-static struct arc_frame_info zero_frame_info;
+typedef struct machine_function GTY (())
+{
+  enum arc_function_type fn_type;
+  struct arc_frame_info frame_info;
+  /* To keep track of unalignment caused by short insns.  */
+  int unalign;
+  int force_short_suffix; /* Used when disgorging return delay slot insns.  */
+  const char *size_reason;
+  struct arc_ccfsm ccfsm_current;
+  /* Map from uid to ccfsm state during branch shortening.  */
+  rtx ccfsm_current_insn;
+  char arc_reorg_started;
+  char prescan_initialized;
+} machine_function;
 
 /* Type of function DECL.
 
@@ -1000,30 +1680,20 @@ static struct arc_frame_info zero_frame_info;
    call with DECL = NULL_TREE.  */
 
 enum arc_function_type
-arc_compute_function_type (tree decl)
+arc_compute_function_type (struct function *fun)
 {
+  tree decl = fun->decl;
   tree a;
-  /* Cached value.  */
-  static enum arc_function_type fn_type = ARC_FUNCTION_UNKNOWN;
-  /* Last function we were called for.  */
-  static tree last_fn = NULL_TREE;
-
-  /* Resetting the cached value?  */
-  if (decl == NULL_TREE)
-    {
-      fn_type = ARC_FUNCTION_UNKNOWN;
-      last_fn = NULL_TREE;
-      return fn_type;
-    }
+  enum arc_function_type fn_type = fun->machine->fn_type;
 
-  if (decl == last_fn && fn_type != ARC_FUNCTION_UNKNOWN)
+  if (fn_type != ARC_FUNCTION_UNKNOWN)
     return fn_type;
 
   /* Assume we have a normal function (not an interrupt handler).  */
   fn_type = ARC_FUNCTION_NORMAL;
 
   /* Now see if this is an interrupt handler.  */
-  for (a = DECL_ATTRIBUTES (current_function_decl);
+  for (a = DECL_ATTRIBUTES (decl);
        a;
        a = TREE_CHAIN (a))
     {
@@ -1045,24 +1715,57 @@ arc_compute_function_type (tree decl)
 	}
     }
 
-  last_fn = decl;
-  return fn_type;
+  return fun->machine->fn_type = fn_type;
 }
 
-#define ILINK1_REGNUM 29
-#define ILINK2_REGNUM 30
-#define RETURN_ADDR_REGNUM 31
 #define FRAME_POINTER_MASK (1 << (FRAME_POINTER_REGNUM))
 #define RETURN_ADDR_MASK (1 << (RETURN_ADDR_REGNUM))
 
 /* Tell prologue and epilogue if register REGNO should be saved / restored.
    The return address and frame pointer are treated separately.
-   Don't consider them here.  */
+   Don't consider them here.
+   Addition for pic: The gp register needs to be saved if the current
+   function changes it to access gotoff variables.
+   FIXME: This will not be needed if we used some arbitrary register
+   instead of r26.
+*/
 #define MUST_SAVE_REGISTER(regno, interrupt_p) \
-((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
- && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p)))
+(((regno) != RETURN_ADDR_REGNUM && (regno) != FRAME_POINTER_REGNUM \
+  && (df_regs_ever_live_p (regno) && (!call_used_regs[regno] || interrupt_p))) \
+ || (flag_pic && crtl->uses_pic_offset_table \
+     && regno == PIC_OFFSET_TABLE_REGNUM) )
+
+#define MUST_SAVE_RETURN_ADDR \
+  (cfun->machine->frame_info.save_return_addr)
+
+/* Return non-zero if there are registers to be saved or loaded using
+   millicode thunks.  We can only use consecutive sequences starting
+   with r13, and not going beyond r25.
+   GMASK is a bitmask of registers to save.  This function sets
+   FRAME->millicod_start_reg .. FRAME->millicode_end_reg to the range
+   of registers to be saved / restored with a millicode call.  */
+static int
+arc_compute_millicode_save_restore_regs (unsigned int gmask,
+					 struct arc_frame_info *frame)
+{
+  int regno;
 
-#define MUST_SAVE_RETURN_ADDR (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
+  int start_reg = 13, end_reg = 25;
+
+  for (regno = start_reg; regno <= end_reg && (gmask & (1L << regno));)
+    regno++;
+  end_reg = regno - 1;
+  /* There is no point in using millicode thunks if we don't save/restore
+     at least three registers.  For non-leaf functions we also have the
+     blink restore.  */
+  if (regno - start_reg >= 3 - (current_function_is_leaf == 0))
+    {
+      frame->millicode_start_reg = 13;
+      frame->millicode_end_reg = regno - 1;
+      return 1;
+    }
+  return 0;
+}
 
 /* Return the bytes needed to compute the frame pointer from the current
    stack pointer.
@@ -1070,7 +1773,7 @@ arc_compute_function_type (tree decl)
    SIZE is the size needed for local variables.  */
 
 unsigned int
-arc_compute_frame_size (int size /* # of var. bytes allocated.  */)
+arc_compute_frame_size (int size)	/* size = # of var. bytes allocated.  */
 {
   int regno;
   unsigned int total_size, var_size, args_size, pretend_size, extra_size;
@@ -1078,23 +1781,26 @@ arc_compute_frame_size (int size /* # of var. bytes allocated.  */)
   unsigned int gmask;
   enum arc_function_type fn_type;
   int interrupt_p;
+  struct arc_frame_info *frame_info = &cfun->machine->frame_info;
+
+  size = ARC_STACK_ALIGN (size);
 
+  /* 1) Size of locals and temporaries */
   var_size	= size;
+
+  /* 2) Size of outgoing arguments */
   args_size	= crtl->outgoing_args_size;
-  pretend_size	= crtl->args.pretend_args_size;
-  extra_size	= FIRST_PARM_OFFSET (0);
-  total_size	= extra_size + pretend_size + args_size + var_size;
-  reg_offset	= FIRST_PARM_OFFSET(0) + crtl->outgoing_args_size;
-  reg_size	= 0;
-  gmask		= 0;
+
+  /* 3) Calculate space needed for saved registers.
+     ??? We ignore the extension registers for now.  */
 
   /* See if this is an interrupt handler.  Call used registers must be saved
      for them too.  */
-  fn_type = arc_compute_function_type (current_function_decl);
-  interrupt_p = ARC_INTERRUPT_P (fn_type);
 
-  /* Calculate space needed for registers.
-     ??? We ignore the extension registers for now.  */
+  reg_size = 0;
+  gmask = 0;
+  fn_type = arc_compute_function_type (cfun);
+  interrupt_p = ARC_INTERRUPT_P (fn_type);
 
   for (regno = 0; regno <= 31; regno++)
     {
@@ -1105,127 +1811,337 @@ arc_compute_frame_size (int size /* # of var. bytes allocated.  */)
 	}
     }
 
-  total_size += reg_size;
+  /* 4) Space for back trace data structure.
+
+        For ARCtangent-A4:
+          <return addr reg size> + <fp size> + <static link reg size> +
+          <reserved-word>
 
-  /* If the only space to allocate is the fp/blink save area this is an
-     empty frame.  However, if we'll be making a function call we need to
-     allocate a stack frame for our callee's fp/blink save area.  */
-  if (total_size == extra_size
-      && !MUST_SAVE_RETURN_ADDR)
-    total_size = extra_size = 0;
+        For ARCompact:
+          <return addr reg size> (if required) + <fp size> (if required)
+  */
+  frame_info->save_return_addr
+    = (!current_function_is_leaf || df_regs_ever_live_p (RETURN_ADDR_REGNUM));
+  /* Saving blink reg in case of leaf function for millicode thunk calls */
+  if (optimize_size && !TARGET_NO_MILLICODE_THUNK_SET)
+    {
+      if (arc_compute_millicode_save_restore_regs (gmask, frame_info))
+	frame_info->save_return_addr = true;
+    }
+
+  if (TARGET_A4)
+    {
+      extra_size = 16;
+    }
+  else
+    {
+      extra_size = 0;
+      if (MUST_SAVE_RETURN_ADDR)
+        extra_size = 4;
+      if (frame_pointer_needed)
+        extra_size += 4;
+    }
+
+  /* 5) Space for variable arguments passed in registers */
+  pretend_size	= crtl->args.pretend_args_size;
+
+  /* Ensure everything before the locals is aligned appropriately */
+  if (TARGET_ARCOMPACT)
+    { 
+       unsigned int extra_plus_reg_size;
+       unsigned int extra_plus_reg_size_aligned;
+
+       extra_plus_reg_size = extra_size + reg_size;
+       extra_plus_reg_size_aligned = ARC_STACK_ALIGN(extra_plus_reg_size);
+       reg_size = extra_plus_reg_size_aligned - extra_size;
+    } /* if */
+
+  /* Compute total frame size */
+  total_size = var_size + args_size + extra_size + pretend_size + reg_size;
 
   total_size = ARC_STACK_ALIGN (total_size);
 
+  /* Compute offset of register save area from stack pointer:
+     A4 Frame: pretend_size var_size reg_size args_size extra_size <--sp
+     A5 Frame: pretend_size <blink> reg_size <fp> var_size args_size <--sp
+  */
+  if (TARGET_A4)
+     reg_offset = total_size - (pretend_size + var_size + reg_size);
+  else
+     reg_offset = total_size - (pretend_size + reg_size + extra_size) +
+                  (frame_pointer_needed ? 4 : 0);
+
   /* Save computed information.  */
-  current_frame_info.total_size   = total_size;
-  current_frame_info.extra_size   = extra_size;
-  current_frame_info.pretend_size = pretend_size;
-  current_frame_info.var_size     = var_size;
-  current_frame_info.args_size    = args_size;
-  current_frame_info.reg_size	  = reg_size;
-  current_frame_info.reg_offset	  = reg_offset;
-  current_frame_info.gmask	  = gmask;
-  current_frame_info.initialized  = reload_completed;
+  frame_info->total_size   = total_size;
+  frame_info->extra_size   = extra_size;
+  frame_info->pretend_size = pretend_size;
+  frame_info->var_size     = var_size;
+  frame_info->args_size    = args_size;
+  frame_info->reg_size     = reg_size;
+  frame_info->reg_offset   = reg_offset;
+  frame_info->gmask        = gmask;
+  frame_info->initialized  = reload_completed;
 
   /* Ok, we're done.  */
   return total_size;
 }
-
-/* Common code to save/restore registers.  */
 
-void
-arc_save_restore (FILE *file,
-                  const char *base_reg,
-                  unsigned int offset,
-                  unsigned int gmask,
-                  const char *op)
+/* Common code to save/restore registers.  */
+/* epilogue_p 0: prologue 1:epilogue 2:epilogue, sibling thunk  */
+static void
+arc_save_restore (rtx base_reg, unsigned int offset,
+		  unsigned int gmask, int epilogue_p, int *first_offset)
 {
   int regno;
+  struct arc_frame_info *frame = &cfun->machine->frame_info;
+  rtx sibthunk_insn = NULL_RTX;
+  rtx extra_pop = NULL_RTX;
+                      
+  if (gmask)      
+    {
+      /* Millicode thunks implementation:
+	 Generates calls to millicodes for registers starting from r13 to r25
+	 Present Limitations:
+            > Only one range supported. The remaining regs will have the ordinary
+	    st and ld instructions for store and loads. Hence a gmask asking
+	    to store r13-14, r16-r25 will only generate calls to store and
+	    load r13 to r14 while store and load insns will be generated for
+	    r16 to r25 in the prologue and epilogue respectively.
+    
+            > Presently library only supports register ranges starting from
+	    r13
+      */
+      if (epilogue_p == 2 || frame->millicode_end_reg > 14)
+	{
+	  int start_call = frame->millicode_start_reg;
+	  int end_call = frame->millicode_end_reg;
+	  int n_regs = end_call - start_call + 1;
+	  int i = 0, r, off = 0;
+	  rtx insn;
+	  rtx ret_addr = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+
+	  if (*first_offset)
+	    {
+	      /* "reg_size" won't be more than 127 */
+	      gcc_assert (epilogue_p || abs (*first_offset <= 127));
+	      frame_add (base_reg, *first_offset);
+	      *first_offset = 0;
+	    }
+	  insn = gen_rtx_PARALLEL
+		  (VOIDmode, rtvec_alloc ((epilogue_p == 2) + n_regs + 1));
+	  if (epilogue_p == 2)
+	    {
+	      int adj = n_regs * 4;
+	      rtx r12 = gen_rtx_REG (Pmode, 12);
+
+	      frame_insn (gen_rtx_SET (VOIDmode, r12, GEN_INT (adj)));
+	      XVECEXP (insn, 0, 0) = gen_rtx_RETURN (VOIDmode);
+	      XVECEXP (insn, 0, 1)
+		= gen_rtx_SET (VOIDmode, stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, stack_pointer_rtx, r12));
+	      i += 2;
+	    }
+	  else
+	    XVECEXP (insn, 0, n_regs) = gen_rtx_CLOBBER (VOIDmode, ret_addr);
+	  for (r = start_call; r <= end_call; r++, off += UNITS_PER_WORD, i++)
+	    {
+	      rtx reg = gen_rtx_REG (SImode, r);
+	      rtx mem = gen_frame_mem (SImode, plus_constant (base_reg, off));
 
-  if (gmask == 0)
-    return;
+	      if (epilogue_p)
+		XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, reg, mem);
+	      else
+		XVECEXP (insn, 0, i) = gen_rtx_SET (VOIDmode, mem, reg);
+	      gmask = gmask & ~(1L << r);
+	    }
+	  if (epilogue_p == 2)
+	    sibthunk_insn = insn;
+	  else
+	    {
+	      frame_insn (insn);
+	      offset += off;
+	    }
+	}
 
-  for (regno = 0; regno <= 31; regno++)
-    {
-      if ((gmask & (1L << regno)) != 0)
+      for (regno = 0; regno <= 31; regno++)
+	{
+	  if ((gmask & (1L << regno)) != 0)
+	    {
+	      rtx reg = gen_rtx_REG (SImode, regno);
+	      rtx addr, mem, insn;
+
+	      if (epilogue_p == 2 && !extra_pop)
+		{
+		  extra_pop = reg;
+		  offset += UNITS_PER_WORD;
+		  continue;
+		}
+	      if (*first_offset)
+		{
+		  gcc_assert (!offset);
+		  addr = plus_constant (base_reg, *first_offset);
+		  addr = gen_rtx_PRE_MODIFY (Pmode, base_reg, addr);
+		  *first_offset = 0;
+		}
+	      else
+		{
+		  gcc_assert (SMALL_INT (offset));
+		  addr = plus_constant (base_reg, offset);
+		}
+	      mem = gen_frame_mem (SImode, addr);
+	      if (epilogue_p)
+		insn = frame_move_inc (reg, mem, base_reg, addr);
+	      else
+		insn = frame_move_inc (mem, reg, base_reg, addr);
+	      offset += UNITS_PER_WORD;
+	    } /* if */
+	} /* for */
+      if (extra_pop)
 	{
-	  fprintf (file, "\t%s %s,[%s,%d]\n",
-		     op, reg_names[regno], base_reg, offset);
-	  offset += UNITS_PER_WORD;
+	  rtx addr = gen_rtx_POST_MODIFY (Pmode, base_reg,
+					  plus_constant (base_reg, offset));
+	  rtx mem = gen_frame_mem (SImode, addr);
+	  frame_move_inc (extra_pop, mem, base_reg, addr);
 	}
+    }/* if */
+  if (sibthunk_insn)
+    {
+      sibthunk_insn = emit_jump_insn (sibthunk_insn);
+      RTX_FRAME_RELATED_P (sibthunk_insn) = 1;
     }
-}
-
+} /* arc_save_restore */
+
+
 /* Target hook to assemble an integer object.  The ARC version needs to
    emit a special directive for references to labels and function
-   symbols.  */
+   symbols. */  
 
 static bool
 arc_assemble_integer (rtx x, unsigned int size, int aligned_p)
 {
-  if (size == UNITS_PER_WORD && aligned_p
-      && ((GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (x))
-	  || GET_CODE (x) == LABEL_REF))
+    if (size == UNITS_PER_WORD && aligned_p
+	&& ((GET_CODE (x) == SYMBOL_REF && ARC_FUNCTION_NAME_PREFIX_P(* (XSTR (x, 0))))
+	    || GET_CODE (x) == LABEL_REF))
     {
-      fputs ("\t.word\t%st(", asm_out_file);
-      output_addr_const (asm_out_file, x);
-      fputs (")\n", asm_out_file);
-      return true;
+	fputs ("\t.word\t", asm_out_file);
+	/* %st is to be generated only for A4 */
+	if( TARGET_A4 )
+	    fputs("%st(", asm_out_file);
+	output_addr_const (asm_out_file, x);
+	if( TARGET_A4 )
+	    fputs (")", asm_out_file);
+	fputs("\n", asm_out_file);
+	return true;
     }
   return default_assemble_integer (x, size, aligned_p);
 }
-
-/* Set up the stack and frame pointer (if desired) for the function.  */
 
-static void
-arc_output_function_prologue (FILE *file, HOST_WIDE_INT size)
-{
-  const char *sp_str = reg_names[STACK_POINTER_REGNUM];
-  const char *fp_str = reg_names[FRAME_POINTER_REGNUM];
-  unsigned int gmask = current_frame_info.gmask;
-  enum arc_function_type fn_type = arc_compute_function_type (current_function_decl);
+int arc_return_address_regs[4]
+  = {0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM};
 
-  /* If this is an interrupt handler, set up our stack frame.
-     ??? Optimize later.  */
-  if (ARC_INTERRUPT_P (fn_type))
-    {
-      fprintf (file, "\t%s interrupt handler\n",
-	       ASM_COMMENT_START);
-      fprintf (file, "\tsub %s,%s,16\n", sp_str, sp_str);
-    }
-
-  /* This is only for the human reader.  */
-  fprintf (file, "\t%s BEGIN PROLOGUE %s vars= %d, regs= %d, args= %d, extra= %d\n",
-	   ASM_COMMENT_START, ASM_COMMENT_START,
-	   current_frame_info.var_size,
-	   current_frame_info.reg_size / 4,
-	   current_frame_info.args_size,
-	   current_frame_info.extra_size);
+/* Set up the stack and frame pointer (if desired) for the function.  */
+void
+arc_expand_prologue (void)
+{
+  int size = get_frame_size ();
+  unsigned int gmask = cfun->machine->frame_info.gmask;
+  /*  unsigned int frame_pointer_offset;*/
+  unsigned int frame_size_to_allocate;
+  /* (FIXME: The first store will use a PRE_MODIFY; this will usually be r13.
+     Change the stack layout so that we rather store a high register with the
+     PRE_MODIFY, thus enabling more short insn generation.)  */
+  int first_offset = 0;
 
   size = ARC_STACK_ALIGN (size);
-  size = (! current_frame_info.initialized
+
+  /* Compute/get total frame size */
+  size = (!cfun->machine->frame_info.initialized
 	   ? arc_compute_frame_size (size)
-	   : current_frame_info.total_size);
+	   : cfun->machine->frame_info.total_size);
+
+  /* Keep track of frame size to be allocated */
+  frame_size_to_allocate = size;
 
   /* These cases shouldn't happen.  Catch them now.  */
-  gcc_assert (size || !gmask);
+  gcc_assert (!(size == 0 && gmask));
 
   /* Allocate space for register arguments if this is a variadic function.  */
-  if (current_frame_info.pretend_size != 0)
-    fprintf (file, "\tsub %s,%s,%d\n",
-	     sp_str, sp_str, current_frame_info.pretend_size);
+  if (cfun->machine->frame_info.pretend_size != 0)
+    {
+       /* Ensure pretend_size is maximum of 8 * word_size */
+      gcc_assert (cfun->machine->frame_info.pretend_size <= 32);
 
-  /* The home-grown ABI says link register is saved first.  */
+      frame_stack_add (-cfun->machine->frame_info.pretend_size);
+      frame_size_to_allocate -= cfun->machine->frame_info.pretend_size;
+    }
+    
+  /* The home-grown ABI says link register is saved first. */
   if (MUST_SAVE_RETURN_ADDR)
-    fprintf (file, "\tst %s,[%s,%d]\n",
-	     reg_names[RETURN_ADDR_REGNUM], sp_str, UNITS_PER_WORD);
+    {
+      if (TARGET_A4)
+        {
+#if 0
+          /* Save return address register in the space allocated by caller for
+             backtrace data structure */
+          fprintf (file, "\tst %s,[%s,%d]\n",
+                   reg_names[RETURN_ADDR_REGNUM], sp_str, UNITS_PER_WORD);
+	  if(doing_dwarf)
+	  {
+	      dwarf2out_reg_save ("", RETURN_ADDR_REGNUM, -cfa_offset + UNITS_PER_WORD);
+	  }
+#endif
+    
+        }
+      else /* TARGET_ARCOMPACT */
+        {
+	  rtx ra = gen_rtx_REG (SImode, RETURN_ADDR_REGNUM);
+	  rtx mem
+	    = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx));
 
-  /* Set up the previous frame pointer next (if we need to).  */
+	  frame_move_inc (mem, ra, stack_pointer_rtx, 0);
+          frame_size_to_allocate -= UNITS_PER_WORD;
+
+        }  /* if */
+    } /* MUST_SAVE_RETURN_ADDR */
+
+  /* Save any needed call-saved regs (and call-used if this is an
+     interrupt handler) for ARCompact ISA.  */
+  if (TARGET_ARCOMPACT && cfun->machine->frame_info.reg_size)
+    {
+      first_offset = -cfun->machine->frame_info.reg_size;
+      /* N.B. FRAME_POINTER_MASK and RETURN_ADDR_MASK are cleared in gmask.  */
+      arc_save_restore (stack_pointer_rtx, 0, gmask, 0, &first_offset);
+      frame_size_to_allocate -= cfun->machine->frame_info.reg_size;
+    } /* if */
+
+
+  /* Save frame pointer if needed */
   if (frame_pointer_needed)
     {
-      fprintf (file, "\tst %s,[%s]\n", fp_str, sp_str);
-      fprintf (file, "\tmov %s,%s\n", fp_str, sp_str);
-    }
+      if (TARGET_A4)
+        {
+#if 0
+          fprintf (file, "\tst %s,[%s]\n", fp_str, sp_str);
+	  if(doing_dwarf)
+	  {
+	      dwarf2out_reg_save ("", FRAME_POINTER_REGNUM, -cfa_offset);
+	  }
+#endif
+        }
+      else /* TARGET_ARCOMPACT */
+        {
+	  rtx addr = gen_rtx_PLUS (Pmode, stack_pointer_rtx,
+				   GEN_INT (-UNITS_PER_WORD + first_offset));
+	  rtx mem
+	    = gen_frame_mem (Pmode,
+			     gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
+						 addr));
+	  frame_move_inc (mem, frame_pointer_rtx, stack_pointer_rtx, 0);
+          frame_size_to_allocate -= UNITS_PER_WORD;
+	  first_offset = 0;
+        } /* if */
+      frame_move (frame_pointer_rtx, stack_pointer_rtx);
+    } /* if */
 
   /* ??? We don't handle the case where the saved regs are more than 252
      bytes away from sp.  This can be handled by decrementing sp once, saving
@@ -1233,60 +2149,60 @@ arc_output_function_prologue (FILE *file, HOST_WIDE_INT size)
      problem as the `ld' insn takes reg+limm values (though it would be more
      efficient to avoid reg+limm).  */
 
+  frame_size_to_allocate -= first_offset;
   /* Allocate the stack frame.  */
-  if (size - current_frame_info.pretend_size > 0)
-    fprintf (file, "\tsub %s,%s," HOST_WIDE_INT_PRINT_DEC "\n",
-	     sp_str, sp_str, size - current_frame_info.pretend_size);
-
-  /* Save any needed call-saved regs (and call-used if this is an
-     interrupt handler).  */
-  arc_save_restore (file, sp_str, current_frame_info.reg_offset,
-		    /* The zeroing of these two bits is unnecessary,
-		       but leave this in for clarity.  */
-		    gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
-		    "st");
-
-  fprintf (file, "\t%s END PROLOGUE\n", ASM_COMMENT_START);
+  if (frame_size_to_allocate > 0)
+    frame_stack_add (-frame_size_to_allocate);
+    
+  /* For ARCtangent-A4, save any needed call-saved regs (and call-used
+     if this is an interrupt handler).
+     This is already taken care for ARCompact architectures */
+
+  if (TARGET_A4)
+    {
+      arc_save_restore (stack_pointer_rtx, cfun->machine->frame_info.reg_offset,
+                        /* The zeroing of these two bits is unnecessary,
+                           but leave this in for clarity.  */
+                        gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 0, 0);
+    } /* if */
+
+  /* Setup the gp register, if needed */
+  if (crtl->uses_pic_offset_table)
+    arc_finalize_pic ();
 }
-
+
 /* Do any necessary cleanup after a function to restore stack, frame,
-   and regs.  */
+   and regs. */
 
-static void
-arc_output_function_epilogue (FILE *file, HOST_WIDE_INT size)
+void
+arc_expand_epilogue (int sibcall_p)
 {
-  rtx epilogue_delay = crtl->epilogue_delay_list;
-  int noepilogue = FALSE;
-  enum arc_function_type fn_type = arc_compute_function_type (current_function_decl);
-
-  /* This is only for the human reader.  */
-  fprintf (file, "\t%s EPILOGUE\n", ASM_COMMENT_START);
+  int size = get_frame_size ();
+  enum arc_function_type fn_type = arc_compute_function_type (cfun);
 
   size = ARC_STACK_ALIGN (size);
-  size = (!current_frame_info.initialized
+  size = (!cfun->machine->frame_info.initialized
 	   ? arc_compute_frame_size (size)
-	   : current_frame_info.total_size);
+	   : cfun->machine->frame_info.total_size);
 
-  if (size == 0 && epilogue_delay == 0)
+  if (1)
     {
-      rtx insn = get_last_insn ();
-
-      /* If the last insn was a BARRIER, we don't have to write any code
-	 because a jump (aka return) was put there.  */
-      if (GET_CODE (insn) == NOTE)
-	insn = prev_nonnote_insn (insn);
-      if (insn && GET_CODE (insn) == BARRIER)
-	noepilogue = TRUE;
-    }
-
-  if (!noepilogue)
-    {
-      unsigned int pretend_size = current_frame_info.pretend_size;
-      unsigned int frame_size = size - pretend_size;
+      unsigned int pretend_size = cfun->machine->frame_info.pretend_size;
+      unsigned int frame_size; 
+      unsigned int size_to_deallocate; 
       int restored, fp_restored_p;
       int can_trust_sp_p = !cfun->calls_alloca;
-      const char *sp_str = reg_names[STACK_POINTER_REGNUM];
-      const char *fp_str = reg_names[FRAME_POINTER_REGNUM];
+      int first_offset = 0;
+      int millicode_p = cfun->machine->frame_info.millicode_end_reg > 0;
+
+      size_to_deallocate = size;
+
+      if (TARGET_A4)
+        frame_size = size - pretend_size;
+      else
+        frame_size = size - (pretend_size +
+                             cfun->machine->frame_info.reg_size + 
+                             cfun->machine->frame_info.extra_size);
 
       /* ??? There are lots of optimizations that can be done here.
 	 EG: Use fp to restore regs if it's closer.
@@ -1294,119 +2210,210 @@ arc_output_function_epilogue (FILE *file, HOST_WIDE_INT size)
 	 sp, but don't restore sp if we don't have to.  */
 
       if (!can_trust_sp_p)
+	gcc_assert (frame_pointer_needed);
+
+      /* Restore stack pointer to the beginning of saved register area for
+         ARCompact ISA */
+      if (TARGET_ARCOMPACT && frame_size)
 	{
-	  gcc_assert (frame_pointer_needed);
-	  fprintf (file,"\tsub %s,%s,%d\t\t%s sp not trusted here\n",
-		   sp_str, fp_str, frame_size, ASM_COMMENT_START);
-	}
+	  if (frame_pointer_needed)
+	    frame_move (stack_pointer_rtx, frame_pointer_rtx);
+	  else
+	    first_offset = frame_size;
+          size_to_deallocate -= frame_size;
+        } /* if */
+      else if (!can_trust_sp_p)
+	frame_stack_add (-frame_size);
 
-      /* Restore any saved registers.  */
-      arc_save_restore (file, sp_str, current_frame_info.reg_offset,
-			/* The zeroing of these two bits is unnecessary,
-			   but leave this in for clarity.  */
-			current_frame_info.gmask & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK),
-			"ld");
 
-      if (MUST_SAVE_RETURN_ADDR)
-	fprintf (file, "\tld %s,[%s,%d]\n",
-		 reg_names[RETURN_ADDR_REGNUM],
-		 frame_pointer_needed ? fp_str : sp_str,
-		 UNITS_PER_WORD + (frame_pointer_needed ? 0 : frame_size));
+      /* Restore any saved registers. */
+      if (TARGET_A4)
+        {
+	  gcc_assert (0); /* Bitrot.  */
+#if 0
+          if (cfun->machine->frame_info.reg_size)
+            arc_save_restore (stack_pointer_rtx,
+			      cfun->machine->frame_info.reg_offset,
+			      /* The zeroing of these two bits is unnecessary,
+				 but leave this in for clarity.  */
+			      cfun->machine->frame_info.gmask
+			      & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, 0);
+          if (MUST_SAVE_RETURN_ADDR)
+            fprintf (file, "\tld %s,[%s,%d]\n", reg_names[RETURN_ADDR_REGNUM],
+                     (frame_pointer_needed ? fp_str : sp_str),
+                     UNITS_PER_WORD + (frame_pointer_needed ? 0 : frame_size));
+#endif
+        }
+      else /* TARGET_ARCOMPACT */
+        {
 
-      /* Keep track of how much of the stack pointer we've restored.
-	 It makes the following a lot more readable.  */
-      restored = 0;
-      fp_restored_p = 0;
+          if (frame_pointer_needed)
+            {
+	      rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
 
-      /* We try to emit the epilogue delay slot insn right after the load
-	 of the return address register so that it can execute with the
-	 stack intact.  Secondly, loads are delayed.  */
-      /* ??? If stack intactness is important, always emit now.  */
-      if (MUST_SAVE_RETURN_ADDR && epilogue_delay != NULL_RTX)
-	{
-	  final_scan_insn (XEXP (epilogue_delay, 0), file, 1, 1, NULL);
-	  epilogue_delay = NULL_RTX;
-	}
+	      frame_move_inc (frame_pointer_rtx, gen_frame_mem (Pmode, addr),
+			      stack_pointer_rtx, 0);
+              size_to_deallocate -= UNITS_PER_WORD;
+            } /* if */
 
-      if (frame_pointer_needed)
-	{
-	  /* Try to restore the frame pointer in the delay slot.  We can't,
-	     however, if any of these is true.  */
-	  if (epilogue_delay != NULL_RTX
-	      || !SMALL_INT (frame_size)
-	      || pretend_size
-	      || ARC_INTERRUPT_P (fn_type))
+	  /* Load blink after the calls to thunk calls in case of
+	     optimize size.  
+	  */
+	  if (millicode_p)
 	    {
-	      /* Note that we restore fp and sp here!  */
-	      fprintf (file, "\tld.a %s,[%s,%d]\n", fp_str, sp_str, frame_size);
-	      restored += frame_size;
-	      fp_restored_p = 1;
+	      int sibthunk_p = (!sibcall_p
+				&& fn_type == ARC_FUNCTION_NORMAL
+				&& !cfun->machine->frame_info.pretend_size);
+
+	      gcc_assert (!(cfun->machine->frame_info.gmask
+			    & (FRAME_POINTER_MASK | RETURN_ADDR_MASK)));
+	      arc_save_restore (stack_pointer_rtx, 0,
+				cfun->machine->frame_info.gmask,
+				1 + sibthunk_p, &first_offset);
+	      if (sibthunk_p)
+		return;
 	    }
-	}
-      else if (!SMALL_INT (size /* frame_size + pretend_size */)
-	       || ARC_INTERRUPT_P (fn_type))
-	{
-	  fprintf (file, "\tadd %s,%s,%d\n", sp_str, sp_str, frame_size);
-	  restored += frame_size;
-	}
-
-      /* These must be done before the return insn because the delay slot
-	 does the final stack restore.  */
-      if (ARC_INTERRUPT_P (fn_type))
-	{
-	  if (epilogue_delay)
+	  /* If we are to restore registers, and first_offset would require
+	     a limm to be encoded in a PRE_MODIFY, yet we can add it with a
+	     fast add to the stack pointer, do this now.  */
+	  if ((!SMALL_INT (first_offset)
+	       && cfun->machine->frame_info.gmask
+	       && ((TARGET_ARC700 && !optimize_size)
+		   ? first_offset <= 0x800
+		   : satisfies_constraint_C2a (GEN_INT (first_offset))))
+	      /* Also do this if we have both gprs and return
+		 address to restore, and they both would need a LIMM.  */
+	      || (MUST_SAVE_RETURN_ADDR
+		  && !SMALL_INT
+		        ((cfun->machine->frame_info.reg_size + first_offset)
+			 >> 2)
+		  && cfun->machine->frame_info.gmask))
 	    {
-	      final_scan_insn (XEXP (epilogue_delay, 0), file, 1, 1, NULL);
+	      frame_stack_add (first_offset);
+	      first_offset = 0;
+	    }
+	  if (MUST_SAVE_RETURN_ADDR)
+	    {
+	      rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+	      int ra_offs = cfun->machine->frame_info.reg_size + first_offset;
+	      rtx addr = plus_constant (stack_pointer_rtx, ra_offs);
+
+	      /* If the load of blink would need a LIMM, but we can add 
+		 the offset quickly to sp, do the latter.  */
+	      if (!SMALL_INT (ra_offs >> 2)
+		  && !cfun->machine->frame_info.gmask
+		  && ((TARGET_ARC700 && !optimize_size)
+		      ? ra_offs <= 0x800
+		      : satisfies_constraint_C2a (GEN_INT (ra_offs))))
+		{
+		  size_to_deallocate -= ra_offs - first_offset;
+		  first_offset = 0;
+		  frame_stack_add (ra_offs);
+		  ra_offs = 0;
+		  addr = stack_pointer_rtx;
+		}
+	      /* See if we can combine the load of the return address with the
+		 final stack adjustment.
+		 We need a separate load if there are still registers to
+		 restore.  We also want a separate load if the combined insn
+		 would need a limm, but a separate load doesn't.  */
+	      if (ra_offs
+		  && !cfun->machine->frame_info.gmask
+		  && (SMALL_INT (ra_offs) || !SMALL_INT (ra_offs >> 2)))
+		{
+		  addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, addr);
+		  first_offset = 0;
+		  size_to_deallocate -= cfun->machine->frame_info.reg_size;
+		}
+	      else if (!ra_offs && size_to_deallocate == UNITS_PER_WORD)
+		{
+		  addr = gen_rtx_POST_INC (Pmode, addr);
+		  size_to_deallocate = 0;
+		}
+	      frame_move_inc (ra, gen_frame_mem (Pmode, addr),
+			      stack_pointer_rtx, addr);
 	    }
-	}
 
-      /* Emit the return instruction.  */
-      {
-	static const int regs[4] = {
-	  0, RETURN_ADDR_REGNUM, ILINK1_REGNUM, ILINK2_REGNUM
-	};
+	  if (!millicode_p)
+	    {
+	      if (cfun->machine->frame_info.reg_size)
+		arc_save_restore (stack_pointer_rtx, 0,
+				  /* The zeroing of these two bits is unnecessary,
+				     but leave this in for clarity.  */
+				  cfun->machine->frame_info.gmask
+				  & ~(FRAME_POINTER_MASK | RETURN_ADDR_MASK), 1, &first_offset);
+	    }
 
-	/* Update the flags, if returning from an interrupt handler. */
-	if (ARC_INTERRUPT_P (fn_type))
-	  fprintf (file, "\tj.d.f %s\n", reg_names[regs[fn_type]]);
-	else
-	  fprintf (file, "\tj.d %s\n", reg_names[regs[fn_type]]);
-	}
+        } /* ARCOMPACT */
 
-      /* If the only register saved is the return address, we need a
-	 nop, unless we have an instruction to put into it.  Otherwise
-	 we don't since reloading multiple registers doesn't reference
-	 the register being loaded.  */
+      /* The rest of this function does the following:
+         ARCtangent-A4: handle epilogue_delay, restore fp, sp, return
+         ARCompact    : handle epilogue_delay, restore sp (phase-2), return
+      */
 
-      if (ARC_INTERRUPT_P (fn_type))
-	fprintf (file, "\tadd %s,%s,16\n", sp_str, sp_str);
-      else if (epilogue_delay != NULL_RTX)
-	{
-	  gcc_assert (!frame_pointer_needed || fp_restored_p);
-	  gcc_assert (restored >= size);
-	  final_scan_insn (XEXP (epilogue_delay, 0), file, 1, 1, NULL);
-	}
-      else if (frame_pointer_needed && !fp_restored_p)
-	{
-	  gcc_assert (SMALL_INT (frame_size));
-	  /* Note that we restore fp and sp here!  */
-	  fprintf (file, "\tld.a %s,[%s,%d]\n", fp_str, sp_str, frame_size);
-	}
-      else if (restored < size)
-	{
-	  gcc_assert (SMALL_INT (size - restored));
-	  fprintf (file, "\tadd %s,%s," HOST_WIDE_INT_PRINT_DEC "\n",
-		   sp_str, sp_str, size - restored);
-	}
+      /* Keep track of how much of the stack pointer we've restored.
+	 It makes the following a lot more readable. */
+      if (TARGET_A4)
+        {
+          restored = 0;
+          fp_restored_p = 0;
+        }
       else
-	fprintf (file, "\tnop\n");
-    }
+        {
+	  size_to_deallocate += first_offset;
+          restored = size - size_to_deallocate;
+          fp_restored_p = 1;
+        } /* if */
+
+  
+      if (TARGET_A4)
+        {
+          if (frame_pointer_needed)
+            {
+	      gcc_assert (0);  /* Bitrot.  */
+#if 0
+	      /* Try to restore the frame pointer in the delay slot.  We can't,
+	         however, if any of these is true.  */
+	      if (epilogue_delay != NULL_RTX
+	          || !SMALL_INT (frame_size)
+	          || pretend_size
+	          || ARC_INTERRUPT_P (fn_type))
+	        {
+	          fprintf (file, "\tld.a %s,[%s,%d]\n",
+                           fp_str, sp_str, frame_size);
+	          restored += frame_size;
+	          fp_restored_p = 1;
+		  if(doing_dwarf)
+		  {
+		    if (cfun->calls_alloca || frame_pointer_needed)
+		      dwarf2out_def_cfa("",FRAME_POINTER_REGNUM,cfa_offset);
+		    else
+		      {
+			cfa_offset-=frame_size;
+			dwarf2out_def_cfa("",STACK_POINTER_REGNUM,cfa_offset);
+		      }
+		  }
+		}
+#endif
+            }
+	  else if (!SMALL_INT (size /* frame_size + pretend_size */)
+	           || ARC_INTERRUPT_P (fn_type))
+	    {
+	      frame_stack_add (frame_size);
+	      restored += frame_size;
+	    }
+        } /* TARGET_A4 */
 
-  /* Reset state info for each function.  */
-  current_frame_info = zero_frame_info;
-  arc_compute_function_type (NULL_TREE);
+      if (size > restored)
+	frame_stack_add (size - restored);
+      /* Emit the return instruction.  */
+      if (sibcall_p == FALSE)
+	emit_jump_insn (gen_return_i ());
+    }
 }
-
+
+/* Set up the stack and frame pointer (if desired) for the function.  */
+
 /* Define the number of delay slots needed for the function epilogue.
 
    Interrupt handlers can't have any epilogue delay slots (it's always needed
@@ -1418,11 +2425,11 @@ arc_output_function_epilogue (FILE *file, HOST_WIDE_INT size)
 int
 arc_delay_slots_for_epilogue (void)
 {
-  if (arc_compute_function_type (current_function_decl) != ARC_FUNCTION_NORMAL)
+  if (arc_compute_function_type (cfun) != ARC_FUNCTION_NORMAL)
     return 0;
-  if (!current_frame_info.initialized)
+  if (!cfun->machine->frame_info.initialized)
     (void) arc_compute_frame_size (get_frame_size ());
-  if (current_frame_info.total_size == 0)
+  if (cfun->machine->frame_info.total_size == 0)
     return 1;
   return 0;
 }
@@ -1432,11 +2439,13 @@ arc_delay_slots_for_epilogue (void)
    pointer or any call-saved register is OK.  SLOT will always be 0.  */
 
 int
-arc_eligible_for_epilogue_delay (rtx trial, int slot)
+arc_eligible_for_epilogue_delay (rtx trial,int slot)
 {
-  gcc_assert (!slot);
+  int trial_length = get_attr_length (trial);
+
+  gcc_assert (slot == 0);
 
-  if (get_attr_length (trial) == 1
+  if ( ( (trial_length == 4) || (trial_length == 2) )
       /* If registers where saved, presumably there's more than enough
 	 possibilities for the delay slot.  The alternative is something
 	 more complicated (of course, if we expanded the epilogue as rtl
@@ -1444,27 +2453,49 @@ arc_eligible_for_epilogue_delay (rtx trial, int slot)
       /* ??? Note that this will always be true since only functions with
 	 empty frames have epilogue delay slots.  See
 	 arc_delay_slots_for_epilogue.  */
-      && current_frame_info.gmask == 0
+      && cfun->machine->frame_info.gmask == 0
       && ! reg_mentioned_p (stack_pointer_rtx, PATTERN (trial))
       && ! reg_mentioned_p (frame_pointer_rtx, PATTERN (trial)))
     return 1;
   return 0;
 }
-
-/* Return true if OP is a shift operator.  */
 
-int
-shift_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+
+/* PIC */
+
+/* Emit special PIC prologues and epilogues.  */
+/* If the function has any GOTOFF relocations, then the GOTBASE
+ * register has to be setup in the prologue 
+ * The instruction needed at the function start for setting up the
+ * GOTBASE register is
+ *    add rdest, pc, 
+ * ----------------------------------------------------------
+ * The rtl to be emitted for this should be:
+ *   set ( reg basereg) 
+ *       ( plus ( reg pc) 
+ *              ( const (unspec (symref _DYNAMIC) 3))) 
+ * ----------------------------------------------------------
+ */
+/* Can be used when rtl pro/epilog comes in. 
+   Unused till then */
+rtx
+arc_finalize_pic (void)
 {
-  switch (GET_CODE (op))
-    {
-    case ASHIFTRT:
-    case LSHIFTRT:
-    case ASHIFT:
-      return 1;
-    default:
-      return 0;
-    }
+  rtx new;
+  rtx baseptr_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
+
+  if (crtl->uses_pic_offset_table == 0)
+    return NULL_RTX;
+
+  gcc_assert (flag_pic != 0);
+  
+  new = gen_rtx_SYMBOL_REF (Pmode, "_DYNAMIC");
+  new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, new), ARC_UNSPEC_GOT);
+  new = gen_rtx_CONST (Pmode, new);
+  
+  new = gen_rtx_SET (VOIDmode, baseptr_rtx, new);
+
+  return emit_insn (new);
 }
 
 /* Output the assembler code for doing a shift.
@@ -1484,6 +2515,7 @@ shift_operator (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
 const char *
 output_shift (rtx *operands)
 {
+  /*  static int loopend_lab;*/
   rtx shift = operands[3];
   enum machine_mode mode = GET_MODE (shift);
   enum rtx_code code = GET_CODE (shift);
@@ -1501,23 +2533,24 @@ output_shift (rtx *operands)
 
   if (GET_CODE (operands[2]) != CONST_INT)
     {
-      if (optimize)
-	{
-	  output_asm_insn ("sub.f 0,%2,0", operands);
-      	  output_asm_insn ("mov lp_count,%2", operands);
-	  output_asm_insn ("bz 2f", operands);
-	}
-      else
-	output_asm_insn ("mov %4,%2", operands);
+      output_asm_insn ("and.f %2, %2, 0x1f", operands);
+      output_asm_insn ("mov lp_count,%2", operands);
+      output_asm_insn ("bz 2f", operands);
+
       goto shiftloop;
     }
   else
     {
       int n = INTVAL (operands[2]);
 
-      /* If the count is negative, make it 0.  */
+      /* Only consider the lower 5 bits of the shift count */
+      n = n & 0x1f;
+
+      /* If the count is negative, take only lower 5 bits.  */
+      /* FIXME: No longer needed */
       if (n < 0)
-	n = 0;
+	n = n & 0x1f;
+
       /* If the count is too big, truncate it.
          ANSI says shifts of GET_MODE_BITSIZE are undefined - we choose to
 	 do the intuitive thing.  */
@@ -1546,8 +2579,8 @@ output_shift (rtx *operands)
 	      /* The ARC doesn't have a rol insn.  Use something else.  */
 	      output_asm_insn ("asl.f 0,%0\n\tadc %0,0,0", operands);
 	      break;
-	    default:
-	      break;
+            default:
+              break;
 	    }
 	}
       /* Must loop.  */
@@ -1555,12 +2588,10 @@ output_shift (rtx *operands)
 	{
 	  char buf[100];
 
-	  if (optimize)
-	    output_asm_insn ("mov lp_count,%c2", operands);
-	  else
-	    output_asm_insn ("mov %4,%c2", operands);
+	  sprintf (buf, "mov lp_count,%ld", INTVAL (operands[2]) & 0x1f );
+	  output_asm_insn (buf, operands);
+
 	shiftloop:
-	  if (optimize)
 	    {
 	      if (flag_pic)
 		sprintf (buf, "lr %%4,[status]\n\tadd %%4,%%4,6\t%s single insn loop start",
@@ -1574,80 +2605,179 @@ output_shift (rtx *operands)
 	      output_asm_insn ("sr %4,[lp_end]", operands);
 	      output_asm_insn ("nop\n\tnop", operands);
 	      if (flag_pic)
-		fprintf (asm_out_file, "\t%s single insn loop\n",
-			 ASM_COMMENT_START);
+		asm_fprintf (asm_out_file, "\t%s single insn loop\n",
+			     ASM_COMMENT_START);
 	      else
-		fprintf (asm_out_file, "1:\t%s single insn loop\n",
-			 ASM_COMMENT_START);
+		asm_fprintf (asm_out_file, "1:\t%s single insn loop\n",
+			     ASM_COMMENT_START);
 	      output_asm_insn (shift_one, operands);
 	      fprintf (asm_out_file, "2:\t%s end single insn loop\n",
 		       ASM_COMMENT_START);
 	    }
-	  else 
-	    {
-	      fprintf (asm_out_file, "1:\t%s begin shift loop\n",
-		       ASM_COMMENT_START);
-	      output_asm_insn ("sub.f %4,%4,1", operands);
-	      output_asm_insn ("nop", operands);
-	      output_asm_insn ("bn.nd 2f", operands);
-	      output_asm_insn (shift_one, operands);
-	      output_asm_insn ("b.nd 1b", operands);
-	      fprintf (asm_out_file, "2:\t%s end shift loop\n",
-		       ASM_COMMENT_START);
-	    }
 	}
     }
 
   return "";
 }
-
+
 /* Nested function support.  */
 
+/* Directly store VALUE at BASE plus OFFSET.  */
+static void
+emit_store_direct (rtx base, int offset, int value)
+{
+  emit_insn (gen_store_direct (gen_rtx_MEM (SImode,
+					    plus_constant (base, offset)),
+                               force_reg (SImode,
+					  gen_int_mode (value, SImode))));
+}
+
 /* Emit RTL insns to initialize the variable parts of a trampoline.
    FNADDR is an RTX for the address of the function's pure code.
    CXT is an RTX for the static chain value for the function.  */
+/* With potentially multiple shared objects loaded, and multiple stacks
+   present for multiple thereds where trampolines might reside, a simple
+   range check will likely not suffice for the profiler to tell if a callee
+   is a trampoline.  We a speedier check by making the trampoline start at
+   an address that is not 4-byte aligned.
+   A trampoline looks like this:
+
+   nop_s	     0x78e0
+entry:
+   ld_s r12,[pcl,12] 0xd403
+   ld   r11,[pcl,12] 0x170c 700b
+   j_s [r12]         0x7c00
+   nop_s	     0x78e0
+
+   The fastest trampoline to execute for trampolines within +-8KB of CTX
+   would be:
+   add2 r11,pcl,s12
+   j [limm]           0x20200f80 limm
+   and that would also be faster to write to the stack by computing the offset
+   from CTX to TRAMP at compile time.  However, it would really be better to
+   get rid of the high cost of cache invalidation when generating trampolines,
+   which requires that the code part of trampolines stays constant, and
+   additionally either
+   - making sure that no executable code but trampolines is on the stack,
+     no icache entries linger for the area of the stack from when before the
+     stack was allocated, and allocating trampolines in trampoline-only
+     cache lines
+  or
+   - allocate trampolines fram a special pool of pre-allocated trampolines.  */
+
 
 void
 arc_initialize_trampoline (rtx tramp ATTRIBUTE_UNUSED,
-                           rtx fnaddr ATTRIBUTE_UNUSED,
-                           rtx cxt ATTRIBUTE_UNUSED)
+			   rtx fnaddr ATTRIBUTE_UNUSED,
+			   rtx cxt ATTRIBUTE_UNUSED)
 {
+  emit_store_direct (tramp, 0, TARGET_BIG_ENDIAN ? 0x78e0d403 : 0xd40378e0);
+  emit_store_direct (tramp, 4, TARGET_BIG_ENDIAN ? 0x170c700b : 0x700b170c);
+  emit_store_direct (tramp, 8, TARGET_BIG_ENDIAN ? 0x7c0078e0 : 0x78e07c00);
+  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)), fnaddr);
+  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)), cxt);
+  emit_insn (gen_flush_icache (validize_mem (gen_rtx_MEM (SImode, tramp))));
 }
-
+
 /* Set the cpu type and print out other fancy things,
    at the top of the file.  */
 
-static void
-arc_file_start (void)
+void
+arc_asm_file_start (FILE *file)
 {
-  default_file_start ();
-  fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
+  fprintf (file, "\t.cpu %s\n", arc_cpu_string);
 }
-
+
+/* This is set briefly to 1 when we output a ".as" address modifer, and then
+   reset when we output the scaled address.  */
+static int output_scaled = 0;
+
 /* Print operand X (an rtx) in assembler syntax to file FILE.
    CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
    For `%' followed by punctuation, CODE is the punctuation and X is null.  */
+/* In final.c:output_asm_insn:
+    'l' : label
+    'a' : address
+    'c' : constant address if CONSTANT_ADDRESS_P
+    'n' : negative
+   Here:
+    'Z': log2(x+1)-1
+    'z': log2
+    'M': log2(~x)
+    '#': condbranch delay slot suffix
+    '*': jump delay slot suffix
+    '?' : nonjump-insn suffix for conditional execution or short instruction
+    '!' : jump / call suffix for conditional execution or short instruction
+    '`': fold constant inside unary o-perator, re-recognize, and emit.
+    'd'
+    'D'
+    'R': Second word
+    'S'
+    'B': Branch comparison operand - suppress sda reference
+    'H': Most significant word
+    'L': Least significant word
+    'A': ASCII decimal representation of floating point value
+    'U': Load/store update or scaling indicator
+    'V': cache bypass indicator for volatile
+    'P'
+    'F'
+    '^'
+    'O': Operator
+    'o': original symbol - no @ prepending.  */
 
 void
-arc_print_operand (FILE *file, rtx x, int code)
+arc_print_operand (FILE *file,rtx x,int code)
 {
   switch (code)
     {
+    case 'Z':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%d",exact_log2(INTVAL (x) + 1) - 1 );
+      else
+	output_operand_lossage ("invalid operand to %%Z code");
+      
+      return;
+
+    case 'z':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%d",exact_log2(INTVAL (x)) );
+      else
+	output_operand_lossage ("invalid operand to %%z code");
+      
+      return;
+
+    case 'M':
+      if (GET_CODE (x) == CONST_INT)
+	fprintf (file, "%d",exact_log2(~INTVAL (x)) );
+      else
+	output_operand_lossage ("invalid operand to %%M code");
+      
+      return;
+
     case '#' :
-      /* Conditional branches.  For now these are equivalent.  */
+      /* Conditional branches depending on condition codes.
+	 Note that this is only for branches that were known to depend on
+	 condition codes before delay slot scheduling;
+	 out-of-range brcc / bbit expansions should use '*'.
+	 This distinction is important because of the different
+	 allowable delay slot insns and the output of the delay suffix
+	 for TARGET_AT_DBR_COND_EXEC.  */
     case '*' :
-      /* Unconditional branches.  Output the appropriate delay slot suffix.  */
-      if (!final_sequence || XVECLEN (final_sequence, 0) == 1)
-	{
-	  /* There's nothing in the delay slot.  */
-	  fputs (".nd", file);
-	}
-      else
+      /* Unconditional branches / branches not depending on condition codes.
+	 Output the appropriate delay slot suffix.  */
+      if (final_sequence && XVECLEN (final_sequence, 0) != 1)
 	{
 	  rtx jump = XVECEXP (final_sequence, 0, 0);
 	  rtx delay = XVECEXP (final_sequence, 0, 1);
+
+	  /* For TARGET_PAD_RETURN we might have grabbed the delay insn.  */
+	  if (INSN_DELETED_P (delay))
+	    return;
 	  if (INSN_ANNULLED_BRANCH_P (jump))
-	    fputs (INSN_FROM_TARGET_P (delay) ? ".jd" : ".nd", file);
+	    fputs (INSN_FROM_TARGET_P (delay)
+		   ?  ((arc_cpu == PROCESSOR_A4) ? ".jd" : ".d")
+		   : (TARGET_AT_DBR_CONDEXEC && code == '#' ? ".d" : ".nd"),
+		   file);
 	  else
 	    fputs (".d", file);
 	}
@@ -1655,49 +2785,63 @@ arc_print_operand (FILE *file, rtx x, int code)
     case '?' : /* with leading "." */
     case '!' : /* without leading "." */
       /* This insn can be conditionally executed.  See if the ccfsm machinery
-	 says it should be conditionalized.  */
-      if (arc_ccfsm_state == 3 || arc_ccfsm_state == 4)
+	 says it should be conditionalized.
+	 If it shouldn't, we'll check the compact attribute if this insn
+	 has a short variant, which may be used depending on code size and
+	 alignment considerations.  */
+      if (ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current))
 	{
-	  /* Is this insn in a delay slot?  */
-	  if (final_sequence && XVECLEN (final_sequence, 0) == 2)
+	  /* Is this insn in a delay slot sequence?  */
+	  if (!final_sequence || XVECLEN (final_sequence, 0) < 2)
+	    {
+	      /* This insn isn't in a delay slot sequence.  */
+	      fprintf (file, "%s%s",
+		       code == '?' ? "." : "",
+		       arc_condition_codes[arc_ccfsm_current.cc]);
+	      /* If this is a jump, there are still short variants.  However,
+		 only beq_s / bne_s have the same offset range as b_s,
+		 and the only short conditional returns are jeq_s and jne_s.  */
+	      if (code == '!'
+		  && (arc_ccfsm_current.cc == ARC_CC_EQ
+		      || arc_ccfsm_current.cc == ARC_CC_NE
+		      || 0 /* FIXME: check if branch in 7 bit range.  */))
+		output_short_suffix (file);
+	    }
+	  else if (code == '!') /* Jump with delay slot.  */
+	    fputs (arc_condition_codes[arc_ccfsm_current.cc], file);
+	  else /* An Instruction in a delay slot.  */
 	    {
+	      rtx jump = XVECEXP (final_sequence, 0, 0);
 	      rtx insn = XVECEXP (final_sequence, 0, 1);
 
 	      /* If the insn is annulled and is from the target path, we need
 		 to inverse the condition test.  */
-	      if (INSN_ANNULLED_BRANCH_P (insn))
+	      if (INSN_ANNULLED_BRANCH_P (jump))
 		{
 		  if (INSN_FROM_TARGET_P (insn))
 		    fprintf (file, "%s%s",
 			     code == '?' ? "." : "",
-			     arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current_cc)]);
+			     arc_condition_codes[ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current.cc)]);
 		  else
 		    fprintf (file, "%s%s",
 			     code == '?' ? "." : "",
-			     arc_condition_codes[arc_ccfsm_current_cc]);
+			     arc_condition_codes[arc_ccfsm_current.cc]);
+		  if (arc_ccfsm_current.state == 5)
+		    arc_ccfsm_current.state = 0;
 		}
 	      else
-	        {
-		  /* This insn is executed for either path, so don't
-		     conditionalize it at all.  */
-		  ; /* nothing to do */
-		}
-	    }
-	  else
-	    {
-	      /* This insn isn't in a delay slot.  */
-	      fprintf (file, "%s%s",
-		       code == '?' ? "." : "",
-		       arc_condition_codes[arc_ccfsm_current_cc]);
+		/* This insn is executed for either path, so don't
+		   conditionalize it at all.  */
+		output_short_suffix (file);
+	      
 	    }
 	}
+      else
+	output_short_suffix (file);
       return;
-    case '~' :
-      /* Output a nop if we're between a set of the condition codes,
-	 and a conditional branch.  */
-      if (last_insn_set_cc_p)
-	fputs ("nop\n\t", file);
-      return;
+    case'`':
+      /* FIXME: fold constant inside unary operator, re-recognize, and emit.  */
+      gcc_unreachable ();
     case 'd' :
       fputs (arc_condition_codes[get_arc_condition_code (x)], file);
       return;
@@ -1714,13 +2858,30 @@ arc_print_operand (FILE *file, rtx x, int code)
       else if (GET_CODE (x) == MEM)
 	{
 	  fputc ('[', file);
-	  /* Handle possible auto-increment.  Since it is pre-increment and
-	     we have already done it, we can just use an offset of four.  */
-	  /* ??? This is taken from rs6000.c I think.  I don't think it is
-	     currently necessary, but keep it around.  */
+
+	  /* Handle possible auto-increment.  For PRE_INC / PRE_DEC /
+	    PRE_MODIFY, we will have handled the first word already;
+	    For POST_INC / POST_DEC / POST_MODIFY, the access to the
+	    first word will be done later.  In either case, the access
+	    to the first word will do the modify, and we only have
+	    to add an offset of four here.  */
 	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
-	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
+	      || GET_CODE (XEXP (x, 0)) == PRE_DEC
+	      || GET_CODE (XEXP (x, 0)) == PRE_MODIFY
+	      || GET_CODE (XEXP (x, 0)) == POST_INC
+	      || GET_CODE (XEXP (x, 0)) == POST_DEC
+	      || GET_CODE (XEXP (x, 0)) == POST_MODIFY)
 	    output_address (plus_constant (XEXP (XEXP (x, 0), 0), 4));
+	  else if (output_scaled)
+	    {
+	      rtx addr = XEXP (x, 0);
+	      int size = GET_MODE_SIZE (GET_MODE (x));
+
+	      output_address (plus_constant (XEXP (addr, 0),
+					     ((INTVAL (XEXP (addr, 1)) + 4)
+					      >> (size == 2 ? 1 : 2))));
+	      output_scaled = 0;
+	    }
 	  else
 	    output_address (plus_constant (XEXP (x, 0), 4));
 	  fputc (']', file);
@@ -1729,21 +2890,60 @@ arc_print_operand (FILE *file, rtx x, int code)
 	output_operand_lossage ("invalid operand to %%R code");
       return;
     case 'S' :
-      if ((GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (x))
-	  || GET_CODE (x) == LABEL_REF)
+	if (GET_CODE (x) == CONST
+	    && GET_CODE( XEXP( XEXP (x,0),0)) == SYMBOL_REF
+	    && GET_CODE (XEXP( XEXP (x,0),1)) == CONST_INT
+	    && GET_CODE (XEXP (x,0)) == PLUS)
 	{
-	  fprintf (file, "%%st(");
-	  output_addr_const (file, x);
-	  fprintf (file, ")");
-	  return;
+	    if (TARGET_A4 && ARC_FUNCTION_NAME_PREFIX_P (* (XSTR (XEXP( XEXP (x,0),0), 0))))
+	    {
+		error ("Function address arithmetic is not supported.\n");
+		return;
+	    }
 	}
+	
+	else if (symbolic_reference_mentioned_p(x))
+	{
+	    if(TARGET_A4  && ARC_FUNCTION_NAME_PREFIX_P (* (XSTR (x, 0))))
+	    {
+	      fprintf (file, "%%st(");
+		output_addr_const (file, x);
+		fprintf (file, ")");
+		return;
+	    }
+	    else if (TARGET_A4 && GET_CODE (x) == LABEL_REF)
+	    {
+	      fprintf (file, "%%st(");
+		output_addr_const (file, x);
+		fprintf (file, ")");
+		return;
+	    }
+	}
+	
+	else if (GET_CODE (x) == LABEL_REF)
+	{
+	    if (TARGET_A4)
+	    {
+		fprintf (file, "%%st(");
+		output_addr_const (file, x);
+		fprintf (file, ")");
+		return;
+	    }
+	}
+	break;
+    case 'B' /* Branch or other LIMM ref - must not use sda references.  */ :
+      if (CONSTANT_P (x))
+	{
+          output_addr_const (file, x);
+	  return;
+	} 
       break;
     case 'H' :
     case 'L' :
       if (GET_CODE (x) == REG)
 	{
 	  /* L = least significant word, H = most significant word */
-	  if ((TARGET_BIG_ENDIAN != 0) ^ (code == 'L'))
+	  if ((WORDS_BIG_ENDIAN != 0) ^ (code == 'L'))
 	    fputs (reg_names[REGNO (x)], file);
 	  else
 	    fputs (reg_names[REGNO (x)+1], file);
@@ -1754,9 +2954,16 @@ arc_print_operand (FILE *file, rtx x, int code)
 	  rtx first, second;
 
 	  split_double (x, &first, &second);
-	  fprintf (file, "0x%08lx",
-		   (long)(code == 'L' ? INTVAL (first) : INTVAL (second)));
-	}
+
+	  if((WORDS_BIG_ENDIAN) == 0)
+	      fprintf (file, "0x%08lx",
+		       code == 'L' ? INTVAL (first) : INTVAL (second));
+	  else
+	      fprintf (file, "0x%08lx",
+		       code == 'L' ? INTVAL (second) : INTVAL (first));
+	      
+	  
+	  }
       else
 	output_operand_lossage ("invalid operand to %%H/%%L code");
       return;
@@ -1775,9 +2982,35 @@ arc_print_operand (FILE *file, rtx x, int code)
       /* Output a load/store with update indicator if appropriate.  */
       if (GET_CODE (x) == MEM)
 	{
-	  if (GET_CODE (XEXP (x, 0)) == PRE_INC
-	      || GET_CODE (XEXP (x, 0)) == PRE_DEC)
-	    fputs (".a", file);
+	  rtx addr = XEXP (x, 0);
+	  switch (GET_CODE (addr))
+	    {
+	    case PRE_INC: case PRE_DEC: case PRE_MODIFY:
+	      fputs (".a", file); break;
+	    case POST_INC: case POST_DEC: case POST_MODIFY:
+	      fputs (".ab", file); break;
+	    case PLUS:
+	      /* Can we use a scaled offset?  */
+	      if (CONST_INT_P (XEXP (addr, 1))
+		  && GET_MODE_SIZE (GET_MODE (x)) > 1
+		  && (!(INTVAL (XEXP (addr, 1))
+			& (GET_MODE_SIZE (GET_MODE (x)) - 1) & 3))
+		  /* Does it make a difference?  */
+		  && !SMALL_INT_RANGE(INTVAL (XEXP (addr, 1)),
+				      GET_MODE_SIZE (GET_MODE (x)) - 2, 0))
+		{
+		  fputs (".as", file);
+		  output_scaled = 1;
+		}
+	      /* Are we using a scaled index?  */
+	      else if (GET_CODE (XEXP (addr, 0)) == MULT)
+		fputs (".as", file);
+	      break;
+	    case REG:
+	      break;
+	    default:
+	      gcc_assert (CONSTANT_P (addr)); break;
+	    }
 	}
       else
 	output_operand_lossage ("invalid operand to %%U code");
@@ -1787,15 +3020,80 @@ arc_print_operand (FILE *file, rtx x, int code)
 	 refs are defined to use the cache bypass mechanism.  */
       if (GET_CODE (x) == MEM)
 	{
-	  if (MEM_VOLATILE_P (x))
+	  if (MEM_VOLATILE_P (x) && !TARGET_VOLATILE_CACHE_SET )
 	    fputs (".di", file);
 	}
       else
 	output_operand_lossage ("invalid operand to %%V code");
       return;
+      /* plt code */
+    case 'P':
     case 0 :
       /* Do nothing special.  */
       break;
+    case 'F':
+      fputs (reg_names[REGNO (x)]+1, file);
+      return;
+    case '^':
+	/* This punctuation character is needed because label references are
+	printed in the output template using %l. This is a front end
+	character, and when we want to emit a '@' before it, we have to use
+	this '^'. */
+
+	fputc('@',file);
+	return;
+    case 'O':
+      /* Output an operator.  */
+      switch (GET_CODE (x))
+	{
+	case PLUS:	fputs ("add", file); return;
+	case SS_PLUS:	fputs ("adds", file); return;
+	case AND:	fputs ("and", file); return;
+	case IOR:	fputs ("or", file); return;
+	case XOR:	fputs ("xor", file); return;
+	case MINUS:	fputs ("sub", file); return;
+	case SS_MINUS:	fputs ("subs", file); return;
+	case ASHIFT:	fputs ("asl", file); return;
+	case ASHIFTRT:	fputs ("asr", file); return;
+	case LSHIFTRT:	fputs ("lsr", file); return;
+	case ROTATERT:	fputs ("ror", file); return;
+	case MULT:	fputs ("mpy", file); return;
+	case ABS:	fputs ("abs", file); return; /* unconditional */
+	case NEG:	fputs ("neg", file); return;
+	case SS_NEG:	fputs ("negs", file); return;
+	case NOT:	fputs ("not", file); return; /* unconditional */
+	case ZERO_EXTEND:
+	  fputs ("ext", file); /* bmsk allows predication.  */
+	  goto size_suffix;
+	case SIGN_EXTEND: /* unconditional */
+	  fputs ("sex", file);
+	size_suffix:
+	  switch (GET_MODE (XEXP (x, 0)))
+	    {
+	    case QImode: fputs ("b", file); return;
+	    case HImode: fputs ("w", file); return;
+	    default: break;
+	    }
+	  break;
+	case SS_TRUNCATE:
+	  if (GET_MODE (x) != HImode)
+	    break;
+	  fputs ("sat16", file);
+	default: break;
+	}
+      output_operand_lossage ("invalid operand to %%O code"); return;
+    case 'o':
+      if (GET_CODE (x) == SYMBOL_REF)
+	{
+	  assemble_name (file, XSTR (x, 0));    
+	  return;
+	}
+      break;
+    case '&':
+      if (TARGET_ANNOTATE_ALIGN && cfun->machine->size_reason)
+	fprintf (file, "; %s. unalign: %d", cfun->machine->size_reason,
+		 cfun->machine->unalign);
+      return;
     default :
       /* Unknown flag.  */
       output_operand_lossage ("invalid operand output code");
@@ -1807,17 +3105,41 @@ arc_print_operand (FILE *file, rtx x, int code)
       fputs (reg_names[REGNO (x)], file);
       break;
     case MEM :
-      fputc ('[', file);
-      if (GET_CODE (XEXP (x, 0)) == PRE_INC)
-	output_address (plus_constant (XEXP (XEXP (x, 0), 0),
-				       GET_MODE_SIZE (GET_MODE (x))));
-      else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
-	output_address (plus_constant (XEXP (XEXP (x, 0), 0),
-				       - GET_MODE_SIZE (GET_MODE (x))));
-      else
-	output_address (XEXP (x, 0));
-      fputc (']', file);
-      break;
+      {
+	rtx addr = XEXP (x, 0);
+	int size = GET_MODE_SIZE (GET_MODE (x));
+
+	fputc ('[', file);
+
+	switch (GET_CODE (addr))
+	  {
+	  case PRE_INC: case POST_INC:
+	    output_address (plus_constant (XEXP (addr, 0), size)); break;
+	  case PRE_DEC: case POST_DEC:
+	    output_address (plus_constant (XEXP (addr, 0), -size)); break;
+	  case PRE_MODIFY: case POST_MODIFY:
+	    output_address (XEXP (addr, 1)); break;
+	  case PLUS:
+	    if (output_scaled)
+	      {
+		output_address (plus_constant (XEXP (addr, 0),
+					       (INTVAL (XEXP (addr, 1))
+						>> (size == 2 ? 1 : 2))));
+		output_scaled = 0;
+	      }
+	    else
+	      output_address (addr);
+	    break;
+	  default:
+	    if (flag_pic && CONSTANT_ADDRESS_P (addr))
+	      arc_output_pic_addr_const (file, addr, code);
+	    else
+	      output_address (addr);
+	    break;
+	  }
+	fputc (']', file);
+	break;
+      }
     case CONST_DOUBLE :
       /* We handle SFmode constants here as output_addr_const doesn't.  */
       if (GET_MODE (x) == SFmode)
@@ -1832,7 +3154,29 @@ arc_print_operand (FILE *file, rtx x, int code)
 	}
       /* Fall through.  Let output_addr_const deal with it.  */
     default :
-      output_addr_const (file, x);
+      if (flag_pic)
+      	arc_output_pic_addr_const (file, x, code);
+      else
+	{
+	  /* FIXME: Dirty way to handle @var@sda+const. Shd be handled
+	     with asm_output_symbol_ref */
+	  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
+	    {
+	      x = XEXP (x, 0);
+	      output_addr_const (file, XEXP (x, 0));
+	      if (GET_CODE (XEXP (x, 0)) == SYMBOL_REF && SYMBOL_REF_SMALL_P (XEXP (x, 0)))
+		fprintf (file, "@sda");
+
+	      if (GET_CODE (XEXP (x, 1)) != CONST_INT
+		  || INTVAL (XEXP (x, 1)) >= 0)
+		fprintf (file, "+");
+	      output_addr_const (file, XEXP (x, 1));
+	    }
+	  else
+	    output_addr_const (file, x);
+	}
+      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
+	fprintf (file, "@sda");
       break;
     }
 }
@@ -1840,10 +3184,9 @@ arc_print_operand (FILE *file, rtx x, int code)
 /* Print a memory address as an operand to reference that memory location.  */
 
 void
-arc_print_operand_address (FILE *file, rtx addr)
+arc_print_operand_address (FILE *file , rtx addr)
 {
   register rtx base, index = 0;
-  int offset = 0;
 
   switch (GET_CODE (addr))
     {
@@ -1851,44 +3194,47 @@ arc_print_operand_address (FILE *file, rtx addr)
       fputs (reg_names[REGNO (addr)], file);
       break;
     case SYMBOL_REF :
-      if (/*???*/ 0 && SYMBOL_REF_FUNCTION_P (addr))
+      if (TARGET_A4 && ARC_FUNCTION_NAME_PREFIX_P (* (XSTR (addr, 0))))
 	{
 	  fprintf (file, "%%st(");
 	  output_addr_const (file, addr);
 	  fprintf (file, ")");
 	}
       else
-	output_addr_const (file, addr);
+	{
+	  output_addr_const (file, addr);
+	  if (SYMBOL_REF_SMALL_P (addr))
+	    fprintf (file, "@sda");
+	}
       break;
     case PLUS :
-      if (GET_CODE (XEXP (addr, 0)) == CONST_INT)
-	offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
-      else if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
-	offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
+      if (GET_CODE (XEXP (addr, 0)) == MULT)
+	index = XEXP (XEXP (addr, 0), 0), base = XEXP (addr, 1);
+      else if (CONST_INT_P (XEXP (addr, 0)))
+	index = XEXP (addr, 0), base = XEXP (addr, 1);
       else
 	base = XEXP (addr, 0), index = XEXP (addr, 1);
-      gcc_assert (GET_CODE (base) == REG);
-      fputs (reg_names[REGNO (base)], file);
-      if (index == 0)
-	{
-	  if (offset != 0)
-	    fprintf (file, ",%d", offset);
-	}
+
+      gcc_assert (OBJECT_P (base));
+      arc_print_operand_address (file, base);
+      if (CONSTANT_P (base) && CONST_INT_P (index))
+	fputc ('+', file);
       else
-	{
-	  switch (GET_CODE (index))
-	    {
-	    case REG:
-	      fprintf (file, ",%s", reg_names[REGNO (index)]);
-	      break;
-	    case SYMBOL_REF:
-	      fputc (',', file), output_addr_const (file, index);
-	      break;
-	    default:
-	      gcc_unreachable ();
-	    }
-	}
+	fputc (',', file);
+      gcc_assert (OBJECT_P (index));
+      arc_print_operand_address (file, index);
       break;
+    case CONST:
+      {
+	rtx c = XEXP (addr, 0);
+
+	gcc_assert (GET_CODE (XEXP (c, 0)) == SYMBOL_REF);
+	gcc_assert (GET_CODE (XEXP (c, 1)) == CONST_INT);
+
+	output_address(XEXP(addr,0));
+	
+	break;
+      }
     case PRE_INC :
     case PRE_DEC :
       /* We shouldn't get here as we've lost the mode of the memory object
@@ -1896,34 +3242,90 @@ arc_print_operand_address (FILE *file, rtx addr)
       gcc_unreachable ();
       break;
     default :
-      output_addr_const (file, addr);
+      if (flag_pic)
+	arc_output_pic_addr_const (file, addr, 0);
+      else
+	output_addr_const (file, addr);
       break;
     }
 }
 
-/* Update compare/branch separation marker.  */
-
+/* Called via note_stores.  */
 static void
-record_cc_ref (rtx insn)
+write_profile_sections (rtx dest ATTRIBUTE_UNUSED, rtx x, void *data)
 {
-  last_insn_set_cc_p = current_insn_set_cc_p;
+  rtx *srcp, src;
+  htab_t htab = (htab_t) data;
+  rtx *slot;
 
-  switch (get_attr_cond (insn))
+  if (GET_CODE (x) != SET)
+    return;
+  srcp = &SET_SRC (x);
+  if (MEM_P (*srcp))
+    srcp = &XEXP (*srcp, 0);
+  else if (MEM_P (SET_DEST (x)))
+    srcp = &XEXP (SET_DEST (x), 0);
+  src = *srcp;
+  if (GET_CODE (src) != CONST)
+    return;
+  src = XEXP (src, 0);
+  if (GET_CODE (src) != UNSPEC || XINT (src, 1) != UNSPEC_PROF)
+    return;
+
+  gcc_assert (XVECLEN (src, 0) == 3);
+  if (!htab_elements (htab))
     {
-    case COND_SET :
-    case COND_SET_ZN :
-    case COND_SET_ZNC :
-      if (get_attr_length (insn) == 1)
-	current_insn_set_cc_p = 1;
-      else
-	current_insn_set_cc_p = 0;
-      break;
-    default :
-      current_insn_set_cc_p = 0;
-      break;
+      output_asm_insn (".section .__arc_profile_desc, \"a\"\n"
+		       "\t.long %0 + 1\n",
+		       &XVECEXP (src, 0, 0));
     }
+  slot = (rtx *) htab_find_slot (htab, src, INSERT);
+  if (*slot == HTAB_EMPTY_ENTRY)
+    {
+      static int count_nr;
+      char buf[24];
+      rtx count;
+
+      *slot = src;
+      sprintf (buf, "__prof_count%d", count_nr++);
+      count = gen_rtx_SYMBOL_REF (Pmode, xstrdup (buf));
+      XVECEXP (src, 0, 2) = count;
+      output_asm_insn (".section\t.__arc_profile_desc, \"a\"\n"
+		       "\t.long\t%1\n"
+		       "\t.section\t.__arc_profile_counters, \"aw\"\n"
+		       "\t.type\t%o2, @object\n"
+		       "\t.size\t%o2, 4\n"
+		       "%o2:\t.zero 4",
+		       &XVECEXP (src, 0, 0));
+      *srcp = count;
+    }
+  else
+    *srcp = XVECEXP (*slot, 0, 2);
+}
+
+static hashval_t
+unspec_prof_hash (const void *x)
+{
+  const_rtx u = (const_rtx) x;
+  const_rtx s1 = XVECEXP (u, 0, 1);
+
+  return (htab_hash_string (XSTR (XVECEXP (u, 0, 0), 0))
+	  ^ (s1->code == SYMBOL_REF ? htab_hash_string (XSTR (s1, 0)) : 0));
 }
-
+
+static int
+unspec_prof_htab_eq (const void *x, const void *y)
+{
+  const_rtx u0 = (const_rtx) x;
+  const_rtx u1 = (const_rtx) y;
+  const_rtx s01 = XVECEXP (u0, 0, 1);
+  const_rtx s11 = XVECEXP (u1, 0, 1);
+
+  return (!strcmp (XSTR (XVECEXP (u0, 0, 0), 0),
+		   XSTR (XVECEXP (u1, 0, 0), 0))
+	  && rtx_equal_p (s01, s11));
+}
+
 /* Conditional execution support.
 
    This is based on the ARM port but for now is much simpler.
@@ -1931,8 +3333,9 @@ record_cc_ref (rtx insn)
    A finite state machine takes care of noticing whether or not instructions
    can be conditionally executed, and thus decrease execution time and code
    size by deleting branch instructions.  The fsm is controlled by
-   final_prescan_insn, and controls the actions of PRINT_OPERAND.  The patterns
-   in the .md file for the branch insns also have a hand in this.  */
+   arc_ccfsm_advance (called by arc_final_prescan_insn), and controls the
+   actions of PRINT_OPERAND.  The patterns in the .md file for the branch
+   insns also have a hand in this.  */
 
 /* The state of the fsm controlling condition codes are:
    0: normal, do nothing special
@@ -1940,16 +3343,26 @@ record_cc_ref (rtx insn)
    2: don't output this insn
    3: make insns conditional
    4: make insns conditional
+   5: make insn conditional (only for outputting anulled delay slot insns)
+
+   special value for cfun->machine->uid_ccfsm_state:
+   6: return with but one insn before it since function start / call
 
    State transitions (state->state by whom, under what condition):
-   0 -> 1 final_prescan_insn, if insn is conditional branch
-   0 -> 2 final_prescan_insn, if the `target' is an unconditional branch
+   0 -> 1 arc_ccfsm_advance, if insn is a conditional branch skipping over
+          some instructions.
+   0 -> 2 arc_ccfsm_advance, if insn is a conditional branch followed
+          by zero or more non-jump insns and an unconditional branch with
+	  the same target label as the condbranch.
    1 -> 3 branch patterns, after having not output the conditional branch
    2 -> 4 branch patterns, after having not output the conditional branch
-   3 -> 0 (*targetm.asm_out.internal_label), if the `target' label is reached
+   0 -> 5 branch patterns, for anulled delay slot insn.
+   3 -> 0 ASM_OUTPUT_INTERNAL_LABEL, if the `target' label is reached
           (the target label has CODE_LABEL_NUMBER equal to
 	  arc_ccfsm_target_label).
-   4 -> 0 final_prescan_insn, if `target' unconditional branch is reached
+   4 -> 0 arc_ccfsm_advance, if `target' unconditional branch is reached
+   3 -> 1 arc_ccfsm_advance, finding an 'else' jump skipping over some insns.
+   5 -> 0 when outputting the delay slot insn
 
    If the jump clobbers the conditions then we use states 2 and 4.
 
@@ -1959,27 +3372,28 @@ record_cc_ref (rtx insn)
    This is done here because knowledge of the ccfsm state is required,
    we may not be outputting the branch.  */
 
-void
-arc_final_prescan_insn (rtx insn,
-                        rtx *opvec ATTRIBUTE_UNUSED,
-                        int noperands ATTRIBUTE_UNUSED)
+/* arc_final_prescan_insn calls arc_ccfsm_advance to adjust arc_ccfsm_current.
+   before letting final output INSN.  */
+static void
+arc_ccfsm_advance (rtx insn, struct arc_ccfsm *state)
 {
   /* BODY will hold the body of INSN.  */
-  register rtx body = PATTERN (insn);
+  register rtx body;
 
-  /* This will be 1 if trying to repeat the trick (i.e.: do the `else' part of
+  /* This will be 1 if trying to repeat the trick (ie: do the `else' part of
      an if/then/else), and things need to be reversed.  */
   int reverse = 0;
 
-  /* If we start with a return insn, we only succeed if we find another one.  */
+  /* If we start with a return insn, we only succeed if we find another one. */
   int seeking_return = 0;
   
   /* START_INSN will hold the insn from where we start looking.  This is the
      first insn after the following code_label if REVERSE is true.  */
   rtx start_insn = insn;
 
-  /* Update compare/branch separation marker.  */
-  record_cc_ref (insn);
+  /* Type of the jump_insn. Brcc insns don't affect ccfsm changes, 
+     since they don't rely on a cmp preceding them */
+  enum attr_type jump_insn_type;
 
   /* Allow -mdebug-ccfsm to turn this off so we can see how well it does.
      We can't do this in macro FINAL_PRESCAN_INSN because its called from
@@ -1987,14 +3401,18 @@ arc_final_prescan_insn (rtx insn,
   if (optimize < 2 || TARGET_NO_COND_EXEC)
     return;
 
+  /* Ignore notes and labels.  */
+  if (!INSN_P (insn))
+    return;
+  body = PATTERN (insn);
   /* If in state 4, check if the target branch is reached, in order to
      change back to state 0.  */
-  if (arc_ccfsm_state == 4)
+  if (state->state == 4)
     {
-      if (insn == arc_ccfsm_target_insn)
+      if (insn == state->target_insn)
 	{
-	  arc_ccfsm_target_insn = NULL;
-	  arc_ccfsm_state = 0;
+	  state->target_insn = NULL;
+	  state->state = 0;
 	}
       return;
     }
@@ -2004,7 +3422,7 @@ arc_final_prescan_insn (rtx insn,
      is the previous target label which is only used once, and the label this
      branch jumps to is not too far off.  Or in other words "we've done the
      `then' part, see if we can do the `else' part."  */
-  if (arc_ccfsm_state == 3)
+  if (state->state == 3)
     {
       if (simplejump_p (insn))
 	{
@@ -2015,7 +3433,7 @@ arc_final_prescan_insn (rtx insn,
 	      start_insn = next_nonnote_insn (start_insn);
 	    }
 	  if (GET_CODE (start_insn) == CODE_LABEL
-	      && CODE_LABEL_NUMBER (start_insn) == arc_ccfsm_target_label
+	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
 	      && LABEL_NUSES (start_insn) == 1)
 	    reverse = TRUE;
 	  else
@@ -2027,7 +3445,7 @@ arc_final_prescan_insn (rtx insn,
 	  if (GET_CODE (start_insn) == BARRIER)
 	    start_insn = next_nonnote_insn (start_insn);
 	  if (GET_CODE (start_insn) == CODE_LABEL
-	      && CODE_LABEL_NUMBER (start_insn) == arc_ccfsm_target_label
+	      && CODE_LABEL_NUMBER (start_insn) == state->target_label
 	      && LABEL_NUSES (start_insn) == 1)
 	    {
 	      reverse = TRUE;
@@ -2040,7 +3458,15 @@ arc_final_prescan_insn (rtx insn,
 	return;
     }
 
-  if (GET_CODE (insn) != JUMP_INSN)
+  if (GET_CODE (insn) != JUMP_INSN
+      || GET_CODE (PATTERN (insn)) == ADDR_VEC
+      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
+    return;
+
+  jump_insn_type = get_attr_type (insn);
+  if (jump_insn_type == TYPE_BRCC
+      || jump_insn_type == TYPE_BRCC_NO_DELAY_SLOT
+      || jump_insn_type == TYPE_LOOP_END)
     return;
 
   /* This jump might be paralleled with a clobber of the condition codes,
@@ -2102,7 +3528,7 @@ arc_final_prescan_insn (rtx insn,
 	      if (GET_CODE (this_insn) == CODE_LABEL
 		  && this_insn == label)
 		{
-		  arc_ccfsm_state = 1;
+		  state->state = 1;
 		  succeed = TRUE;
 		}
 	      else
@@ -2119,7 +3545,7 @@ arc_final_prescan_insn (rtx insn,
 		 control falls in from somewhere else.  */
 	      if (this_insn == label)
 		{
-		  arc_ccfsm_state = 1;
+		  state->state = 1;
 		  succeed = TRUE;
 		}
 	      else
@@ -2130,7 +3556,7 @@ arc_final_prescan_insn (rtx insn,
 	      /* Succeed if the following insn is the target label.
 		 Otherwise fail.  
 		 If return insns are used then the last insn in a function 
-		 will be a barrier.  */
+		 will be a barrier. */
 	      next_must_be_target_label_p = TRUE;
 	      break;
 
@@ -2149,7 +3575,7 @@ arc_final_prescan_insn (rtx insn,
       	      /* If this is an unconditional branch to the same label, succeed.
 		 If it is to another label, do nothing.  If it is conditional,
 		 fail.  */
-	      /* ??? Probably, the test for the SET and the PC are unnecessary.  */
+	      /* ??? Probably, the test for the SET and the PC are unnecessary. */
 
 	      if (GET_CODE (scanbody) == SET
 		  && GET_CODE (SET_DEST (scanbody)) == PC)
@@ -2157,16 +3583,18 @@ arc_final_prescan_insn (rtx insn,
 		  if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
 		      && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
 		    {
-		      arc_ccfsm_state = 2;
+		      state->state = 2;
 		      succeed = TRUE;
 		    }
 		  else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
 		    fail = TRUE;
+		  else if (get_attr_cond (this_insn) != COND_CANUSE)
+		    fail = TRUE;
 		}
 	      else if (GET_CODE (scanbody) == RETURN
 		       && seeking_return)
 	        {
-		  arc_ccfsm_state = 2;
+		  state->state = 2;
 		  succeed = TRUE;
 	        }
 	      else if (GET_CODE (scanbody) == PARALLEL)
@@ -2197,42 +3625,44 @@ arc_final_prescan_insn (rtx insn,
 
       if (succeed)
 	{
-	  if ((!seeking_return) && (arc_ccfsm_state == 1 || reverse))
-	    arc_ccfsm_target_label = CODE_LABEL_NUMBER (label);
-	  else
+	  if ((!seeking_return) && (state->state == 1 || reverse))
+	    state->target_label = CODE_LABEL_NUMBER (label);
+	  else if (seeking_return || state->state == 2)
 	    {
-	      gcc_assert (seeking_return || arc_ccfsm_state == 2);
 	      while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
 	        {
 		  this_insn = next_nonnote_insn (this_insn);
-		  gcc_assert (!this_insn
-			      || (GET_CODE (this_insn) != BARRIER
-				  && GET_CODE (this_insn) != CODE_LABEL));
+
+		  gcc_assert (!this_insn || 
+			      (GET_CODE (this_insn) != BARRIER
+			       && GET_CODE (this_insn) != CODE_LABEL));
 	        }
 	      if (!this_insn)
 	        {
 		  /* Oh dear! we ran off the end, give up.  */
 		  extract_insn_cached (insn);
-		  arc_ccfsm_state = 0;
-		  arc_ccfsm_target_insn = NULL;
+		  state->state = 0;
+		  state->target_insn = NULL;
 		  return;
 	        }
-	      arc_ccfsm_target_insn = this_insn;
+	      state->target_insn = this_insn;
 	    }
+	  else
+	    gcc_unreachable ();
 
 	  /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
 	     what it was.  */
 	  if (!reverse)
-	    arc_ccfsm_current_cc = get_arc_condition_code (XEXP (SET_SRC (body),
-								 0));
+	    state->cc = get_arc_condition_code (XEXP (SET_SRC (body), 0));
 
 	  if (reverse || then_not_else)
-	    arc_ccfsm_current_cc = ARC_INVERSE_CONDITION_CODE (arc_ccfsm_current_cc);
+	    state->cc = ARC_INVERSE_CONDITION_CODE (state->cc);
 	}
 
-      /* Restore recog_data.  Getting the attributes of other insns can
+      /* Restore recog_operand.  Getting the attributes of other insns can
 	 destroy this array, but final.c assumes that it remains intact
-	 across this call.  */
+	 across this call; since the insn has been recognized already we
+	 call insn_extract direct. */
       extract_insn_cached (insn);
     }
 }
@@ -2240,28 +3670,78 @@ arc_final_prescan_insn (rtx insn,
 /* Record that we are currently outputting label NUM with prefix PREFIX.
    It it's the label we're looking for, reset the ccfsm machinery.
 
-   Called from (*targetm.asm_out.internal_label).  */
+   Called from ASM_OUTPUT_INTERNAL_LABEL.  */
 
-void
-arc_ccfsm_at_label (const char *prefix, int num)
+static void
+arc_ccfsm_at_label (const char *prefix, int num, struct arc_ccfsm *state)
 {
-  if (arc_ccfsm_state == 3 && arc_ccfsm_target_label == num
+  if (state->state == 3 && state->target_label == num
       && !strcmp (prefix, "L"))
     {
-      arc_ccfsm_state = 0;
-      arc_ccfsm_target_insn = NULL_RTX;
+      state->state = 0;
+      state->target_insn = NULL_RTX;
+    }
+}
+
+/* We are considering a conditional branch with the condition COND.
+   Check if we want to conditionalize a delay slot insn, and if so modify
+   the ccfsm state accordingly.
+   REVERSE says branch will branch when the condition is false.  */
+void
+arc_ccfsm_record_condition (rtx cond, int reverse, rtx jump,
+			    struct arc_ccfsm *state)
+{
+  rtx seq_insn = NEXT_INSN (PREV_INSN (jump));
+  if (!state)
+    state = &arc_ccfsm_current;
+
+  gcc_assert (state->state == 0);
+  if (seq_insn != jump)
+    {
+      rtx insn = XVECEXP (PATTERN (seq_insn), 0, 1);
+
+      if (INSN_ANNULLED_BRANCH_P (jump)
+	  && (TARGET_AT_DBR_CONDEXEC || INSN_FROM_TARGET_P (insn)))
+	{
+	  state->cc = get_arc_condition_code (cond);
+	  if (!reverse)
+	    arc_ccfsm_current.cc
+	      = ARC_INVERSE_CONDITION_CODE (state->cc);
+	  arc_ccfsm_current.state = 5;
+	}
+    }
+}
+
+/* Update *STATE as we would when we emit INSN.  */
+static void
+arc_ccfsm_post_advance (rtx insn, struct arc_ccfsm *state)
+{
+  if (LABEL_P (insn))
+    arc_ccfsm_at_label ("L", CODE_LABEL_NUMBER (insn), state);
+  else if (JUMP_P (insn)
+	   && GET_CODE (PATTERN (insn)) != ADDR_VEC
+	   && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
+	   && get_attr_type (insn) == TYPE_BRANCH)
+    {
+      if (ARC_CCFSM_BRANCH_DELETED_P (state))
+	ARC_CCFSM_RECORD_BRANCH_DELETED (state);
+      else
+	{
+	  rtx src = SET_SRC (PATTERN (insn));
+	  arc_ccfsm_record_condition (XEXP (src, 0), XEXP (src, 1) == pc_rtx,
+				      insn, state);
+	}
     }
+  else if (arc_ccfsm_current.state == 5)
+    arc_ccfsm_current.state = 0;
 }
 
 /* See if the current insn, which is a conditional branch, is to be
    deleted.  */
-
 int
 arc_ccfsm_branch_deleted_p (void)
 {
-  if (arc_ccfsm_state == 1 || arc_ccfsm_state == 2)
-    return 1;
-  return 0;
+  return ARC_CCFSM_BRANCH_DELETED_P (&arc_ccfsm_current);
 }
 
 /* Record a branch isn't output because subsequent insns can be
@@ -2270,61 +3750,2293 @@ arc_ccfsm_branch_deleted_p (void)
 void
 arc_ccfsm_record_branch_deleted (void)
 {
-  /* Indicate we're conditionalizing insns now.  */
-  arc_ccfsm_state += 2;
+  ARC_CCFSM_RECORD_BRANCH_DELETED (&arc_ccfsm_current);
+}
+
+int
+arc_ccfsm_cond_exec_p (void)
+{
+  return (cfun->machine->prescan_initialized
+	  && ARC_CCFSM_COND_EXEC_P (&arc_ccfsm_current));
+}
+
+void
+arc_ccfsm_advance_to (rtx insn)
+{
+  struct machine_function *machine = cfun->machine;
+  rtx scan = machine->ccfsm_current_insn;
+  int restarted = 0;
+  struct arc_ccfsm *statep = &arc_ccfsm_current;
+
+  /* Rtl changes too much before arc_reorg to keep ccfsm state.
+     But we are not required to calculate exact lengths then.  */
+  if (!machine->arc_reorg_started)
+    return;
+  while (scan != insn)
+    {
+      if (scan)
+	{
+	  arc_ccfsm_post_advance (scan, statep);
+	  scan = next_insn (scan);
+	}
+      else
+	{
+	  gcc_assert (!restarted);
+	  scan = get_insns ();
+	  memset (statep, 0, sizeof *statep);
+	  restarted = 1;
+	}
+      if (scan)
+	arc_ccfsm_advance (scan, statep);
+    }
+  machine->ccfsm_current_insn = scan;
+}
+
+/* Like next_active_insn, but return NULL if we find an ADDR_(DIFF_)VEC,
+   and look inside SEQUENCEs.  */
+static rtx
+arc_next_active_insn (rtx insn, struct arc_ccfsm *statep)
+{
+  rtx pat;
+
+  do
+    {
+      if (statep)
+	arc_ccfsm_post_advance (insn, statep);
+      insn = NEXT_INSN (insn);
+      if (!insn || BARRIER_P (insn))
+	return NULL_RTX;
+      if (statep)
+	arc_ccfsm_advance (insn, statep);
+    }
+  while (NOTE_P (insn)
+	 || (cfun->machine->arc_reorg_started
+	     && LABEL_P (insn) && !label_to_alignment (insn))
+	 || (NONJUMP_INSN_P (insn)
+	     && (GET_CODE (PATTERN (insn)) == USE
+		 || GET_CODE (PATTERN (insn)) == CLOBBER)));
+  if (!LABEL_P (insn))
+    {
+      gcc_assert (INSN_P (insn));
+      pat = PATTERN (insn);
+      if (GET_CODE (pat) == ADDR_VEC || GET_CODE (pat) == ADDR_DIFF_VEC)
+	return NULL_RTX;
+      if (GET_CODE (pat) == SEQUENCE)
+	return XVECEXP (pat, 0, 0);
+    }
+  return insn;
+}
+
+/* When deciding if an insn should be output short, we want to know something
+   about the following insns:
+   - if another insn follows which we know we can output as a short insn
+     before an alignemnt-sensitive point, we can output this insn short:
+     the decision about the eventual alignment can be postponed.
+   - if a to-be-aligned label comes next, we should output this insn such
+     as to get / preserve 4-byte alignment.
+   - if a likely branch without delay slot insn, or a call with an immediately
+     following short insn comes next, we should out output this insn such as to
+     get / preserve 2 mod 4 unalignment.
+   - do the same for a not completely unlikely branch with a short insn
+     following before any other branch / label.
+   - in order to decide if we are actually looking at a branch, we need to
+     call arc_ccfsm_advance.
+   - in order to decide if we are looking at a short insn, we should know
+     if it is conditionalized.  To a first order of approximation this is
+     the case if the state from arc_ccfsm_advance from before this insn
+     indicates the insn is conditionalized.  However, a further refinement
+     could be to not conditionalize an insn if the destination register(s)
+     is/are dead in the non-executed case.  */
+int
+arc_verify_short (rtx insn, int unalign, int check_attr)
+{
+  rtx scan, next, later, prev;
+  struct arc_ccfsm *statep, old_state, save_state;
+  int odd = 3; /* 0/2: (mis)alignment specified; 3: keep short.  */
+  enum attr_iscompact iscompact;
+  struct machine_function *machine;
+  const char **rp = &cfun->machine->size_reason;
+  int jump_p;
+  rtx this_sequence = NULL_RTX;
+  rtx recog_insn = recog_data.insn;
+
+  if (check_attr > 0)
+    {
+      iscompact = get_attr_iscompact (insn);
+      if (iscompact == ISCOMPACT_FALSE)
+	return 0;
+    }
+  machine = cfun->machine;
+
+  if (machine->force_short_suffix >= 0)
+    return machine->force_short_suffix;
+
+  /* Now we know that the insn may be output with a "_s" suffix.  But even
+     when optimizing for size, we still want to look ahead, because if we
+     find a mandatory alignment, we might find that keeping the insn long
+     doesn't increase size, but gains speed.  */
+
+  /* The iscompact attribute depends on arc_ccfsm_current, thus, in order to
+     read the attributes relevant to our forward scan, we must modify
+     arc_ccfsm_current while scanning.  */
+  if (check_attr == 0)
+    arc_ccfsm_advance_to (insn);
+  statep = &arc_ccfsm_current;
+  old_state = *statep;
+  jump_p = (TARGET_ALIGN_CALL
+	    ? (JUMP_P (insn) || CALL_ATTR (insn, CALL))
+	    : (JUMP_P (insn) && get_attr_type (insn) != TYPE_RETURN));
+
+  /* Check if this is an out-of-range brcc / bbit which is expanded with
+     a short cmp / btst.  */
+  if (JUMP_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
+    {
+      enum attr_type type = get_attr_type (insn);
+      int len = get_attr_lock_length (insn);
+
+      /* Since both the length and lock_length attribute use insn_lengths,
+	 which has ADJUST_INSN_LENGTH applied, we can't rely on equality
+	 with 6 / 10 here.  */
+      if ((type == TYPE_BRCC && len > 4)
+	  || (type == TYPE_BRCC_NO_DELAY_SLOT && len > 8))
+	{
+	  rtx operator = XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 0);
+	  rtx op0 = XEXP (operator, 0);
+
+	  if (GET_CODE (op0) == ZERO_EXTRACT)
+	    op0 = XEXP (op0, 0);
+	  if (satisfies_constraint_Rcq (op0))
+	    {
+	      /* Check if the branch should be unaligned.  */
+	      if (arc_unalign_branch_p (insn))
+		{
+		  odd = 2;
+		  *rp = "Long unaligned jump avoids non-delay slot penalty";
+		  goto found_align;
+		}
+	      /* If we have a short delay slot insn, make this insn 'short'
+		 (actually, short compare & long jump) and defer alignment
+		 decision to processing of the delay insn.  Without this test
+		 here, we'd reason that a short jump with a short delay insn
+		 should be lengthened to avoid a stall if it's aligned - that
+		 is not just suboptimal, but can leads to infinitel loops as
+		 the delay insn is assumed to be long the next time, since we
+		 don't have independent delay slot size information.  */
+	      else if ((get_attr_delay_slot_filled (insn)
+			== DELAY_SLOT_FILLED_YES)
+		       && (get_attr_iscompact (NEXT_INSN (insn))
+			   != ISCOMPACT_FALSE))
+		{
+		  *rp = "Small is beautiful";
+		  goto found_align;
+		}
+	    }
+	}
+    }
+
+  /* If INSN is at the an unaligned return address of a preceding call,
+     make INSN short.  */
+  if (TARGET_ALIGN_CALL
+      && unalign
+      && (prev = prev_active_insn (insn)) != NULL_RTX
+      && arc_next_active_insn (prev, 0) == insn
+      && ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
+	  ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL)
+	  : (CALL_ATTR (prev, NON_SIBCALL)
+	     && NEXT_INSN (PREV_INSN (prev)) == prev)))
+    {
+      *rp = "Call return to unaligned long insn would stall";
+      goto found_align;
+    }
+
+  prev = PREV_INSN (insn);
+  next = NEXT_INSN (insn);
+  gcc_assert (prev);
+  /* Basic block reordering calculates insn lengths while it has the insns
+     at the end of a basic block detached from the remainder of the insn
+     chain.  */
+  gcc_assert (next || !cfun->machine->arc_reorg_started);
+  if (NEXT_INSN (prev) != insn)
+    this_sequence = PATTERN (NEXT_INSN (prev));
+  else if (next && PREV_INSN (next) != insn)
+    this_sequence = PATTERN (PREV_INSN (next));
+  if (this_sequence)
+    {
+      gcc_assert (GET_CODE (this_sequence) == SEQUENCE);
+      gcc_assert (XVECLEN (this_sequence, 0) == 2);
+      gcc_assert (insn == XVECEXP (this_sequence, 0, 0)
+		  || insn == XVECEXP (this_sequence, 0, 1));
+    }
+
+  /* If this is a jump without a delay slot, keep it long if we have
+     unalignment.  Don't do this for non sibling calls returning to a long
+     insn, because what we'd gain when calling, we'd loose when returning.  */
+  if (jump_p && unalign
+      && arc_unalign_branch_p (insn)
+      && (!CALL_ATTR (insn, NON_SIBCALL)
+	  || (((next = arc_next_active_insn (insn, statep))
+	       && INSN_P (next)
+	       && CCFSM_ISCOMPACT (next, statep)
+	       && arc_verify_short (next, 2, 1))
+	      ? (*statep = old_state, 1)
+	      : (*statep = old_state, 0))))
+    {
+      *rp = "Long unaligned jump avoids non-delay slot penalty";
+      if (recog_insn)
+	extract_insn_cached (recog_insn);
+      return 0;
+    }
+
+  /* ARC700 stalls if an aligned short branch has a short delay insn.  */
+  if (TARGET_UPSIZE_DBR && this_sequence && !unalign && jump_p
+      && !INSN_DELETED_P (next = XVECEXP (this_sequence, 0, 1))
+      && CCFSM_DBR_ISCOMPACT (next, insn, statep))
+    {
+      *rp = "Aligned short jumps with short delay insn stall when taken";
+      if (recog_insn)
+	extract_insn_cached (recog_insn);
+      return 0;
+    }
+
+  /* If this is a call with a long delay insn, or the delay slot insn to
+     a call, we want to choose INSN's length so that the return address
+     will be aligned, unless the following insn is short.  */
+  if (TARGET_ALIGN_CALL && this_sequence
+      && CALL_ATTR (XVECEXP (this_sequence, 0, 0), NON_SIBCALL)
+      && ((next = XVECEXP (this_sequence, 0, 1)) == insn
+	  || !CCFSM_ISCOMPACT (next, statep)))
+    {
+      /* If we curently have unalignment, getting alignment by using a
+	 short insn now is the smart choice, and we don't want to prejudice
+	 the short/long decision for the following insn.  */
+      *rp = "Function return stalls if the return address is unaligned";
+      if (unalign)
+	goto found_align;
+      scan = XVECEXP (this_sequence, 0, 1);
+      arc_ccfsm_advance (scan, statep);
+      scan = arc_next_active_insn (scan, statep);
+      if (!scan)
+	goto found_align;
+      if (LABEL_P (scan))
+	{
+	  odd = 0;
+	  goto found_align;
+	}
+      if (!CCFSM_ISCOMPACT (scan, statep) || !arc_verify_short (scan, 2, 1))
+	odd = 0;
+      else
+	*rp = "Small is beautiful";
+      goto found_align;
+    }
+  /* Likewise, if this is a call without a delay slot, except we want to
+     unalign this call.  */
+  if (jump_p && !this_sequence && CALL_ATTR (insn, NON_SIBCALL))
+    {
+      *rp = "Function return stalls if the return address is unaligned";
+      if (!TARGET_UNALIGN_BRANCH && unalign)
+	goto found_align;
+      scan = arc_next_active_insn (insn, statep);
+      if (!scan)
+	{
+	  /* Apparently a non-return call.  */
+	  *rp = (unalign
+		 ? "Long unaligned jump avoids non-delay slot penalty"
+		 : "Small is beautiful");
+	  odd = 2;
+	  goto found_align;
+	}
+      if (LABEL_P (scan))
+	{
+	  *rp = "Avoid nop insertion before label";
+	  odd = 0;
+	  goto found_align;
+	}
+      if (!CCFSM_ISCOMPACT (scan, statep) || !arc_verify_short (scan, 2, 1))
+	odd = 0;
+      else
+	{
+	  odd = 2;
+	  *rp = (unalign
+		 ? "Long unaligned jump avoids non-delay slot penalty"
+		 : "Small is beautiful");
+	}
+      goto found_align;
+    }
+
+  scan = arc_next_active_insn (insn, statep);
+
+  /* If this and the previous insn are the only ones between function start
+     or an outgoing function call, and a return insn, avoid having them
+     both be short.
+     N.B. we check that the next insn is a return, and this implies that
+     INSN can't be a CALL / SFUNC or in the delay slot of one, because
+     there has to be a restore of blink before the return.  */
+  if (TARGET_PAD_RETURN
+      && scan && JUMP_P (scan) && get_attr_type (scan) == TYPE_RETURN
+      && (prev = prev_active_insn (insn))
+      && arc_next_active_insn (prev, 0) == insn
+      && (INSN_ADDRESSES (INSN_UID (insn)) - INSN_ADDRESSES (INSN_UID (prev))
+	  == 2)
+      && ((prev = prev_active_insn (prev)) == NULL_RTX
+	  || CALL_ATTR (GET_CODE (PATTERN (prev)) == SEQUENCE
+			? XVECEXP (PATTERN (prev), 0, 0) : prev, CALL)))
+    {
+      *rp = "call/return and return/return must be 6 bytes apart to avoid mispredict";
+      *statep = old_state;
+      if (recog_insn)
+	extract_insn_cached (recog_insn);
+      return 0;
+    }
+
+  *rp = "Small is beautiful";
+  if (scan) for (;;)
+    {
+      if (JUMP_P (scan) && GET_CODE (PATTERN (scan)) == PARALLEL
+	  && arc_unalign_branch_p (scan))
+	{
+	  /* If this is an out-of-range brcc / bbit which is expanded with
+	     a compact cmp / btst, emit the curent insn short.  */
+
+	  enum attr_type type = get_attr_type (scan);
+	  int len = get_attr_lock_length (scan);
+
+	  /* Since both the length and lock_length attribute use insn_lengths,
+	     which has ADJUST_INSN_LENGTH applied, we can't rely on equality
+	     with 6 / 10 here.  */
+	  if ((type == TYPE_BRCC && len > 4)
+	      || (type == TYPE_BRCC_NO_DELAY_SLOT && len > 8))
+	    {
+	      rtx operator = XEXP (SET_SRC (XVECEXP (PATTERN (scan), 0, 0)), 0);
+	      rtx op0 = XEXP (operator, 0);
+
+	      if (GET_CODE (op0) == ZERO_EXTRACT)
+		op0 = XEXP (op0, 0);
+	      if (satisfies_constraint_Rcq (op0))
+		break;
+	    }
+	}
+
+      if ((JUMP_P (scan) || CALL_P (scan))
+	  && arc_unalign_branch_p (scan)
+	  && (TARGET_ALIGN_CALL
+	      ? (JUMP_P (scan) || CALL_ATTR (scan, SIBCALL))
+	      : (JUMP_P (scan) && get_attr_type (scan) != TYPE_RETURN))
+	  && !ARC_CCFSM_BRANCH_DELETED_P (statep))
+	{
+	  /* Assume for now that the branch is sufficiently likely to
+	     warrant unaligning.  */
+	  *rp = "Long unaligned jump avoids non-delay slot penalty";
+	  odd = 2;
+	  break;
+	}
+      /* A call without a delay slot insn with a short insn following
+	 should be unaligned.  */
+      if (TARGET_UNALIGN_BRANCH && TARGET_ALIGN_CALL
+	  && CALL_ATTR (scan, CALL)
+	  && NEXT_INSN (PREV_INSN (scan)) == scan /* No delay insn.  */
+	  && (((save_state = *statep,
+		next = arc_next_active_insn (scan, statep)) == NULL_RTX
+	       || (!LABEL_P (next) && CCFSM_ISCOMPACT (next, statep)))
+	      ? 1 : (*statep = save_state, 0)))
+	{
+	  *rp = "Long unaligned jump avoids non-delay slot penalty";
+	  odd = 2;
+	  break;
+	}
+      /* A long call with a long delay slot insn should be aligned,
+	 unless a short insn follows.  */
+      if (TARGET_ALIGN_CALL
+	  && CALL_ATTR (scan, CALL)
+	  && NEXT_INSN (PREV_INSN (scan)) != scan
+	  && !CCFSM_ISCOMPACT (scan, statep)
+	  && !CCFSM_ISCOMPACT ((next = NEXT_INSN (scan)) , statep)
+	  && (((save_state = *statep,
+		later = arc_next_active_insn (next, statep))
+	       && (LABEL_P (later)
+		   || !CCFSM_ISCOMPACT (later, statep)
+		   || !arc_verify_short (later, 2, 1)))
+	      ? 1 : (*statep = save_state, 0)))
+	{
+	  *rp = "Function return stalls if the return address is unaligned";
+	  odd = 0;
+	  break;
+	}
+      if (LABEL_P (scan) && label_to_alignment (scan) > 1)
+	{
+	  *rp = "Avoid nop insertion before label";
+	  odd = 0;
+	  break;
+	}
+      if (INSN_P (scan)
+	  && GET_CODE (PATTERN (scan)) != USE
+	  && GET_CODE (PATTERN (scan)) != CLOBBER
+	  && CCFSM_ISCOMPACT (scan, statep))
+	{
+	  /* Go ahead making INSN short, we decide about SCAN later.  */
+	  break;
+	}
+      if (GET_CODE (scan) == BARRIER)
+	break;
+      arc_ccfsm_post_advance (scan, statep);
+      scan = NEXT_INSN (scan);
+      if (!scan)
+	break;
+      if (GET_CODE (scan) == INSN && GET_CODE (PATTERN (scan)) == SEQUENCE)
+	scan = XVECEXP (PATTERN (scan), 0, 0);
+      if (JUMP_P (scan)
+	  && (GET_CODE (PATTERN (scan)) == ADDR_VEC
+	      || GET_CODE (PATTERN (scan)) == ADDR_DIFF_VEC))
+	{
+	  break;
+	}
+      arc_ccfsm_advance (scan, statep);
+    }
+ found_align:
+  *statep = old_state;
+  if (recog_insn)
+    extract_insn_cached (recog_insn);
+  if (odd != unalign)
+    return 1;
+  return 0;
+}
+
+static void
+output_short_suffix (FILE *file)
+{
+  rtx insn = current_output_insn;
+
+  if (arc_verify_short (insn, cfun->machine->unalign, 1))
+    {
+      fprintf (file, "_s");
+      cfun->machine->unalign ^= 2;
+    }
+  /* Restore recog_operand.  */
+  extract_insn_cached (insn);
+}
+
+void
+arc_final_prescan_insn (rtx insn,rtx *opvec ATTRIBUTE_UNUSED,
+			int noperands ATTRIBUTE_UNUSED)
+{
+  if (TARGET_DUMPISIZE)
+    fprintf (asm_out_file, "\n; at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
+
+  /* Output a nop if necessary to prevent a hazard.
+     Don't do this for delay slots: inserting a nop would
+     alter semantics, and the only time we would find a hazard is for a
+     call function result - and in that case, the hazard is spurious to
+     start with.  */
+  if (PREV_INSN (insn)
+      && PREV_INSN (NEXT_INSN (insn)) == insn
+      && arc_hazard (prev_real_insn (insn), insn))
+    {
+      current_output_insn = 
+	emit_insn_before (gen_nop (), NEXT_INSN (PREV_INSN (insn)));
+      final_scan_insn (current_output_insn, asm_out_file, optimize, 1, NULL);
+      current_output_insn = insn;
+    }
+  /* Restore extraction data which might have been clobbered by arc_hazard.  */
+  extract_constrain_insn_cached (insn);
+
+  if (!cfun->machine->prescan_initialized)
+    {
+      /* Clear lingering state from branch shortening.  */
+      memset (&arc_ccfsm_current, 0, sizeof arc_ccfsm_current);
+      cfun->machine->prescan_initialized = 1;
+    }
+  arc_ccfsm_advance (insn, &arc_ccfsm_current);
+
+  cfun->machine->size_reason = 0;
+}
+
+/* Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+
+int
+arc_initial_elimination_offset (int from,int to)
+{
+  if (! cfun->machine->frame_info.initialized)
+     arc_compute_frame_size (get_frame_size ());
+
+  if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
+    {
+      if (TARGET_A4)
+        return 0;
+      else
+        return (cfun->machine->frame_info.extra_size
+                + cfun->machine->frame_info.reg_size);
+    }
+
+  if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
+    {
+        return (cfun->machine->frame_info.total_size
+                - cfun->machine->frame_info.pretend_size);
+    }
 
-  /* If the next insn is a subroutine call, we still need a nop between the
-     cc setter and user.  We need to undo the effect of calling record_cc_ref
-     for the just deleted branch.  */
-  current_insn_set_cc_p = last_insn_set_cc_p;
+  if ((from == FRAME_POINTER_REGNUM) && (to == STACK_POINTER_REGNUM))
+    {
+      if (TARGET_A4)
+        return (cfun->machine->frame_info.total_size 
+                - cfun->machine->frame_info.pretend_size);
+      else
+        return (cfun->machine->frame_info.total_size
+                - (cfun->machine->frame_info.pretend_size
+                   + cfun->machine->frame_info.extra_size
+                   + cfun->machine->frame_info.reg_size));
+    }
+
+  gcc_unreachable ();
 }
-
+
+
+/* Generate a bbit{0,1} insn for the current pattern  
+ * bbit instructions are used as an optimized alternative to 
+ * a sequence of bic,cmp and branch instructions
+ * Similar to gen_bbit_insns(), with conditions reversed
+ */
+const char *
+gen_bbit_bic_insns(rtx * operands)
+{
+  
+  switch (INTVAL(operands[3]))
+  {
+    /*  bic r%0,imm%1,r%2
+     *  cmp r%0,0<- the value we have switched on
+     *  b{eq,ne} label%5 
+     *  ||
+     *	\/
+     * bbit{0,1} r%1,log2(imm%2),label%5
+     */
+  case 0:
+    if ( GET_CODE (operands[4]) == EQ ) {
+      return "bbit1%# %1,%z2,%^%l5";
+    }
+    else if ( GET_CODE (operands[4]) == NE )
+      return "bbit0%# %1,%z2,%^%l5";
+    else
+      gcc_unreachable();
+    
+    /*  bic r%0,imm%1,r%2
+     *  cmp r%0,0<- the value we have switched on
+     *  b{eq,ne} label%5 
+     *  ||
+     *	\/
+     * bbit{0,1} r%1,log2(imm%2),label%5
+     * the bne case does not make sense here as it gives too little 
+     * information for us to generate an insn.
+     * Such a case is therefore disallowed in the condition itself.
+     * ( ref: valid_bbit_pattern_p )
+     */
+  case 1:
+    if ( GET_CODE (operands[4]) == EQ )
+      return "bbit0%# %1,%z2,%l5";
+    else
+      gcc_unreachable();
+
+  default:
+    gcc_unreachable();
+  }
+}
+
+
+
+/* Generate a bbit{0,1} insn for the current pattern  
+ * bbit instructions are used as an optimized alternative to 
+ * a sequence of and,cmp and branch instructions   
+ */
+const char *
+gen_bbit_insns(rtx * operands)
+{
+  
+  switch (INTVAL(operands[3]))
+  {
+    /* and r%0,r%1,imm%2
+     *  cmp r%0,0<- the value we have switched on
+     *  b{eq,ne} label%5 
+     *  ||
+     *	\/
+     * bbit{0,1} r%0,log2(imm%2),label%5
+     */
+  case 0:
+    if ( GET_CODE (operands[4]) == EQ )
+      return "bbit0%# %1,%z2,%^%l5";
+    else if ( GET_CODE (operands[4]) == NE )
+      return "bbit1%# %1,%z2,%^%l5";
+    else
+      gcc_unreachable();
+    
+    /* and r%0,r%1,imm%2
+     *  cmp r%0,1<- the value we have switched on
+     *  beq label%5 
+     *  ||
+     *	\/
+     * bbit1 r%0,log2(imm%2),label%5
+     * the bne case does not make sense here as it gives too little 
+     * information for us to generate an insn.
+     * Such a case is therefore disallowed in the condition itself.
+     * ( ref: valid_bbit_pattern_p )
+     */
+  case 1:
+    if ( GET_CODE (operands[4]) == EQ )
+      return "bbit1%# %1,%z2,%l5";
+    else
+      gcc_unreachable();
+
+  default:
+    gcc_unreachable();
+  }
+}
+
+
+/* Return the destination address of a branch.  */
+int
+branch_dest (rtx branch)
+{
+  rtx pat = PATTERN (branch);
+  rtx dest = (GET_CODE (pat) == PARALLEL
+	      ? SET_SRC (XVECEXP (pat, 0, 0)) : SET_SRC (pat));
+  int dest_uid;
+
+  if (GET_CODE (dest) == IF_THEN_ELSE)
+    dest = XEXP (dest, XEXP (dest, 1) == pc_rtx ? 2 : 1);
+
+  dest = XEXP (dest, 0);
+  dest_uid = INSN_UID (dest);
+
+  return INSN_ADDRESSES (dest_uid);
+}
+
+
+/* Predicate for judging if a pattern is valid for bbit generation 
+ * The rtl pattern is:
+ *       and r%0,r%1,imm%2
+ *       cmp r%0, imm%3
+ *       pc = (cmp cc, 0) ? label%5 : pc
+ * The conditions required are:
+ *      1. imm%2 shd be an exact power of 2
+ *      2. imm%3 shd be 0 or 1
+ *      3. the comparison operator should be either EQ or NE
+ *      NOTE: imm%3 = 1 and comparion = NE is not valid
+ */
+int
+valid_bbit_pattern_p (rtx * operands,rtx insn)
+{
+  int retval; 
+
+  /* ret = (imm%2 == power of 2 */
+  retval = !( (INTVAL(operands[2]) & (INTVAL(operands[2]) - 1)) );
+  
+  /* now check for the right combinations 
+   * ( ref: comments in gen_bbit_insns above )
+   */
+  retval = retval && 
+    (
+     ( INTVAL(operands[3]) == 1 && GET_CODE (operands[4]) == EQ )
+     || ( ( INTVAL(operands[3]) == 0) 
+	  && ( GET_CODE (operands[4]) == EQ || GET_CODE (operands[4]) == NE))
+     );
+
+  retval = retval && SMALL_INT(branch_dest(insn)-INSN_ADDRESSES(INSN_UID(insn)));
+
+  return retval;
+
+}
+
+/* Symbols in the text segment can be accessed without indirecting via the
+   constant pool; it may take an extra binary operation, but this is still
+   faster than indirecting via memory.  Don't do this when not optimizing,
+   since we won't be calculating al of the offsets necessary to do this
+   simplification.  */
+
+/* On the ARC, function addresses are not the same as normal addresses.
+   Branch to absolute address insns take an address that is right-shifted
+   by 2.  We encode the fact that we have a function here, and then emit a
+   special assembler op when outputting the address.
+   The encoding involves adding an *_CALL_FLAG_CHAR to the symbol name
+   (depending on whether any of short_call/long_call attributes were specified
+   in the function's declaration) and  unmangling the name at the time of
+   printing the symbol name.
+
+   Also if the symbol is a local, then the machine specific
+   SYMBOL_REF_FLAG is set in the rtx.This flag is later used to print
+   the reference to local symbols as @GOTOFF references instead of
+   @GOT references so that the symbol does not get a GOT entry unlike
+   the global symbols.
+   Also calls to local functions are relative and not through the
+   Procedure Linkage Table.
+*/
+
 static void
-arc_va_start (tree valist, rtx nextarg)
+arc_encode_section_info (tree decl, rtx rtl, int first)
 {
-  /* See arc_setup_incoming_varargs for reasons for this oddity.  */
-  if (crtl->args.info < 8
-      && (crtl->args.info & 1))
-    nextarg = plus_constant (nextarg, UNITS_PER_WORD);
+  /* Check if it is a function, and whether it has the [long/short]_call
+     attribute specified */
+  if (TREE_CODE (decl) == FUNCTION_DECL)
+    {
+      tree attr = (TREE_TYPE (decl) != error_mark_node
+		   ? TYPE_ATTRIBUTES (TREE_TYPE (decl)) : NULL_TREE);
+      tree long_call_attr = lookup_attribute ("long_call", attr);
+      tree short_call_attr = lookup_attribute ("short_call", attr);
+
+      if (long_call_attr != NULL_TREE)
+	arc_encode_symbol (decl, LONG_CALL_FLAG_CHAR);
+      else if (short_call_attr != NULL_TREE)
+	arc_encode_symbol (decl, SHORT_CALL_FLAG_CHAR);
+      else
+	arc_encode_symbol (decl, SIMPLE_CALL_FLAG_CHAR);
+    }
 
-  std_expand_builtin_va_start (valist, nextarg);
+  if (flag_pic)
+    {
+      if (!DECL_P (decl) || targetm.binds_local_p (decl))
+	SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
+    }
+
+  /* for sdata and SYMBOL_FLAG_FUNCTION */
+  default_encode_section_info (decl, rtl, first);
 }
 
 /* This is how to output a definition of an internal numbered label where
    PREFIX is the class of label and NUM is the number within the class.  */
 
-static void
-arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
+static void arc_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
 {
-  arc_ccfsm_at_label (prefix, labelno);
+  if (cfun)
+    arc_ccfsm_at_label (prefix, labelno, &arc_ccfsm_current);
   default_internal_label (stream, prefix, labelno);
 }
 
-/* Worker function for TARGET_ASM_EXTERNAL_LIBCALL.  */
+/* Set the cpu type and print out other fancy things,
+   at the top of the file.  */
 
-static void
-arc_external_libcall (rtx fun ATTRIBUTE_UNUSED)
+static void arc_file_start (void)
 {
-#if 0
-/* On the ARC we want to have libgcc's for multiple cpus in one binary.
-   We can't use `assemble_name' here as that will call ASM_OUTPUT_LABELREF
-   and we'll get another suffix added on if -mmangle-cpu.  */
-  if (TARGET_MANGLE_CPU_LIBGCC)
+  default_file_start ();
+  fprintf (asm_out_file, "\t.cpu %s\n", arc_cpu_string);
+}
+
+static void arc_asm_file_end (void)
+{
+  /* Free the obstack */
+  /*    obstack_free (&arc_local_obstack, NULL);*/
+ 
+}
+/* Cost functions.  */
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+arc_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
+{
+  switch (code)
     {
-      fprintf (FILE, "\t.rename\t_%s, _%s%s\n",
-	       XSTR (SYMREF, 0), XSTR (SYMREF, 0),
-	       arc_mangle_suffix);
+      /* Small integers are as cheap as registers.  */
+    case CONST_INT:
+      {
+	bool nolimm = false; /* Can we do without long immediate?  */
+	bool fast = false; /* Is the result available immediately? */
+	bool condexec = false; /* Does this allow conditiobnal execution?  */
+	bool compact = false; /* Is a 16 bit opcode available?  */
+        /* CONDEXEC also implies that we can have an unconditional
+	   3-address operation.  */
+
+	nolimm = compact = condexec = false;
+	if (UNSIGNED_INT6 (INTVAL (x)))
+	  nolimm = condexec = compact = true;
+	else
+	  {
+	    if (SMALL_INT (INTVAL (x)))
+	      nolimm = fast = true;
+	    switch (outer_code)
+	      {
+	      case AND: /* bclr, bmsk, ext[bw] */
+		if (satisfies_constraint_Ccp (x) /* bclr */
+		    || satisfies_constraint_C1p (x) /* bmsk */)
+		  nolimm = fast = condexec = compact = true;
+		break;
+	      case IOR: /* bset */
+		if (satisfies_constraint_C0p (x)) /* bset */
+		  nolimm = fast = condexec = compact = true;
+		break;
+	      case XOR:
+		if (satisfies_constraint_C0p (x)) /* bxor */
+		  nolimm = fast = condexec = true;
+		break;
+	      case SET:
+		if (satisfies_constraint_Crr (x)) /* ror b,u6 */
+		  nolimm = true;
+	      default:
+		break;
+	      }
+	  }
+	/* FIXME: Add target options to attach a small cost if
+	   condexec / compact is not true.  */
+	if (nolimm)
+	  {
+	    *total = 0;
+	    return true;
+	  }
+      }
+      /* FALLTHRU */
+
+      /*  4 byte values can be fetched as immediate constants -
+	  let's give that the cost of an extra insn.  */
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (1);
+      return true;
+
+    case CONST_DOUBLE:
+      {
+        rtx high, low;
+
+	if (TARGET_DPFP)
+	  {
+	    *total = COSTS_N_INSNS (1);
+	    return true;
+	  }
+	/* FIXME: correct the order of high,low */
+        split_double (x, &high, &low);
+	*total = COSTS_N_INSNS (!SMALL_INT (INTVAL (high))
+				+ !SMALL_INT (INTVAL (low)));
+	return true;
+      }
+
+    /* Encourage synth_mult to find a synthetic multiply when reasonable.
+       If we need more than 12 insns to do a multiply, then go out-of-line,
+       since the call overhead will be < 10% of the cost of the multiply.  */
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      if (TARGET_SHIFTER)
+	{
+	  /* If we want to shift a constant, we need a LIMM.  */
+	  /* ??? when the optimizers want to know if a constant should be
+	     hoisted, they ask for the cost of the constant.  OUTER_CODE is
+	     insufficient context for shifts since we don't know which operand
+	     we are looking at.  */
+	  if (CONSTANT_P (XEXP (x, 0)))
+	    {
+	      *total += COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 1), code, speed);
+	      return true;
+	    }
+	  *total = COSTS_N_INSNS (1);
+	}
+      else if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+        *total = COSTS_N_INSNS (16);
+      else
+        *total = COSTS_N_INSNS (INTVAL (XEXP ((x), 1)));
+      return false;
+
+    case DIV:
+    case UDIV:
+      if (speed)
+	*total = COSTS_N_INSNS(30);
+      else
+	*total = COSTS_N_INSNS(1);
+	return false;
+
+    case MULT:
+      if ((TARGET_DPFP && GET_MODE (x) == DFmode))
+	*total = COSTS_N_INSNS (1);
+      else if (speed)
+	*total= arc_multcost;
+      /* We do not want synth_mult sequences when optimizing
+	 for size */
+      else if (TARGET_MUL64_SET || TARGET_ARC700)
+	*total = COSTS_N_INSNS (1);
+      else
+	*total = COSTS_N_INSNS (2);
+      return false;
+    case PLUS:
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  && _2_4_8_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
+	{
+	  *total += (rtx_cost (XEXP (x, 1), PLUS, speed)
+		     + rtx_cost (XEXP (XEXP (x, 0), 0), PLUS, speed));
+	  return true;
+	}
+      return false;
+    case MINUS:
+      if (GET_CODE (XEXP (x, 1)) == MULT
+	  && _2_4_8_operand (XEXP (XEXP (x, 1), 1), VOIDmode))
+	{
+	  *total += (rtx_cost (XEXP (x, 0), PLUS, speed)
+		     + rtx_cost (XEXP (XEXP (x, 1), 0), PLUS, speed));
+	  return true;
+	}
+      return false;
+    case COMPARE:
+      {
+	rtx op0 = XEXP (x, 0);
+	rtx op1 = XEXP (x, 1);
+
+	if (GET_CODE (op0) == ZERO_EXTRACT && op1 == const0_rtx
+	    && XEXP (op0, 1) == const1_rtx)
+	  {
+	    /* btst / bbit0 / bbit1:
+	       Small integers and registers are free; everything else can
+	       be put in a register.  */
+	    *total = (rtx_cost (XEXP (op0, 0), SET, speed)
+		      + rtx_cost (XEXP (op0, 2), SET, speed));
+	    return true;
+	  }
+	if (GET_CODE (op0) == AND && op1 == const0_rtx
+	    && satisfies_constraint_C1p (XEXP (op0, 1)))
+	  {
+	    /* bmsk.f */
+	    *total = rtx_cost (XEXP (op0, 0), SET, speed);
+	    return true;
+	  }
+	/* add.f  */
+	if (GET_CODE (op1) == NEG)
+	  {
+	    *total = (rtx_cost (op0, PLUS,speed)
+		      + rtx_cost (XEXP (op1, 0), PLUS, speed));
+	  }
+	return false;
+      }
+    case EQ: case NE:
+      if (outer_code == IF_THEN_ELSE
+	  && GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+	  && XEXP (x, 1) == const0_rtx
+	  && XEXP (XEXP (x, 0), 1) == const1_rtx)
+	{
+	  /* btst / bbit0 / bbit1:
+	     Small integers and registers are free; everything else can
+	     be put in a register.  */
+	  rtx op0 = XEXP (x, 0);
+
+	  *total = (rtx_cost (XEXP (op0, 0), SET, speed)
+		    + rtx_cost (XEXP (op0, 2), SET, speed));
+	  return true;
+	}
+      /* Fall through.  */
+    /* scc_insn expands into two insns.  */
+    case GTU: case GEU: case LEU:
+      if (GET_MODE (x) == SImode)
+	*total += COSTS_N_INSNS (1);
+      return false;
+    case LTU: /* might use adc.  */
+      if (GET_MODE (x) == SImode)
+	*total += COSTS_N_INSNS (1) - 1;
+      return false;
+    default:
+      return false;
     }
+}
+
+rtx
+arc_va_arg (tree valist, tree type)
+{
+  rtx addr_rtx;
+  tree addr, incr;
+  tree type_ptr = build_pointer_type (type);
+
+#if 0
+  /* All aggregates are passed by reference.  All scalar types larger
+     than 8 bytes are passed by reference.  */
+  /* FIXME: delete this */
+  if (0 && (AGGREGATE_TYPE_P (type) || int_size_in_bytes (type) > 8))
+#else
+  if (type != 0
+      && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+	  || TREE_ADDRESSABLE (type)))
 #endif
+    {
+      tree type_ptr_ptr = build_pointer_type (type_ptr);
+
+      addr = build1 (INDIRECT_REF, type_ptr,
+		     build1 (NOP_EXPR, type_ptr_ptr, valist));
+
+      incr = build2 (PLUS_EXPR, TREE_TYPE (valist),
+		     valist, build_int_cst (NULL_TREE, UNITS_PER_WORD));
+    }
+  else
+    {
+      HOST_WIDE_INT align, rounded_size;
+
+      /* Compute the rounded size of the type.  */
+      align = PARM_BOUNDARY / BITS_PER_UNIT;
+      rounded_size
+	= (((TREE_INT_CST_LOW (TYPE_SIZE (type)) / BITS_PER_UNIT + align - 1)
+	    / align)
+	   * align);
+
+      /* Align 8 byte operands.  */
+      addr = valist;
+      gcc_assert (TYPE_ALIGN (type) <= BITS_PER_WORD);
+      if (TYPE_ALIGN (type) > BITS_PER_WORD)
+	{
+abort ();
+	  /* AP = (TYPE *)(((int)AP + 7) & -8)  */
+
+	  addr = build1 (NOP_EXPR, integer_type_node, valist);
+	  addr = fold (build2 (PLUS_EXPR, integer_type_node, addr,
+                               build_int_cst (NULL_TREE, 7)));
+	  addr = fold (build2 (BIT_AND_EXPR, integer_type_node, addr,
+                               build_int_cst (NULL_TREE, -8)));
+	  addr = fold (build1 (NOP_EXPR, TREE_TYPE (valist), addr));
+	}
+
+      /* The increment is always rounded_size past the aligned pointer.  */
+      incr = fold (build2 (PLUS_EXPR, TREE_TYPE (addr), addr,
+			   build_int_cst (NULL_TREE, rounded_size)));
+
+      /* Adjust the pointer in big-endian mode.  */
+      if (BYTES_BIG_ENDIAN)
+	{
+	  HOST_WIDE_INT adj;
+	  adj = TREE_INT_CST_LOW (TYPE_SIZE (type)) / BITS_PER_UNIT;
+	  if (rounded_size > align)
+	    adj = rounded_size;
+
+	  addr = fold (build2 (PLUS_EXPR, TREE_TYPE (addr), addr,
+			       build_int_cst (NULL_TREE, rounded_size - adj)));
+	}
+    }
+
+  /* Evaluate the data address.  */
+  addr_rtx = expand_expr (addr, NULL_RTX, Pmode, EXPAND_NORMAL);
+  addr_rtx = copy_to_reg (addr_rtx);
+
+  /* Compute new value for AP.  */
+  incr = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, incr);
+  TREE_SIDE_EFFECTS (incr) = 1;
+  expand_expr (incr, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+  return addr_rtx;
+}
+
+/* Return a pointer to a function's name with any
+   and all prefix encodings stripped from it.  */
+const char *
+arc_strip_name_encoding (const char *name)
+{
+  switch (*name)
+    {
+    case SIMPLE_CALL_FLAG_CHAR:
+    case LONG_CALL_FLAG_CHAR:
+    case SHORT_CALL_FLAG_CHAR:
+      name++;
+    }
+  return (name) + ((name)[0] == '*') ;
+}
+
+
+
+/* An address that needs to be expressed as an explicit sum of pcl + offset.  */
+int
+arc_legitimate_pc_offset_p (rtx addr)
+{
+  if (GET_CODE (addr) != CONST)
+    return 0;
+  addr = XEXP (addr, 0);
+  if (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
+	return 0;
+      addr = XEXP (addr, 0);
+    }
+  return (GET_CODE (addr) == UNSPEC
+	  && XVECLEN (addr, 0) == 1
+	  && XINT (addr, 1) == ARC_UNSPEC_GOT
+	  && GET_CODE (XVECEXP (addr, 0, 0)) == SYMBOL_REF);
+}
+
+/* check whether it is a valid pic address or not
+ * A valid pic address on arc should look like
+ * const (unspec (SYMBOL_REF/LABEL) (ARC_UNSPEC_GOTOFF/ARC_UNSPEC_GOT))
+ */
+int
+arc_legitimate_pic_addr_p (rtx addr)
+{
+  if (GET_CODE (addr) == LABEL_REF)
+    return 1;
+  if (GET_CODE (addr) != CONST)
+    return 0;
+
+  addr = XEXP (addr, 0);
+
+
+  if (GET_CODE (addr) == PLUS)
+    {
+      if (GET_CODE (XEXP (addr, 1)) != CONST_INT)
+	return 0;
+      addr = XEXP (addr, 0);
+    }
+
+  if (GET_CODE (addr) != UNSPEC
+      || XVECLEN (addr, 0) != 1)
+    return 0;
+
+  /* Must be @GOT or @GOTOFF.  */
+  if (XINT (addr, 1) != ARC_UNSPEC_GOT
+      && XINT (addr, 1) != ARC_UNSPEC_GOTOFF)
+    return 0;
+
+  if (GET_CODE (XVECEXP (addr, 0, 0)) != SYMBOL_REF
+      && GET_CODE (XVECEXP (addr, 0, 0)) != LABEL_REF)
+    return 0;
+
+  return 1;
+}
+
+
+
+/* Returns 1 if OP contains a symbol reference */
+
+int
+symbolic_reference_mentioned_p (rtx op)
+{
+  register const char *fmt;
+  register int i;
+
+  if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return 1;
+	}
+
+      else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+int
+arc_raw_symbolic_reference_mentioned_p (rtx op)
+{
+  register const char *fmt;
+  register int i;
+
+  if (GET_CODE(op) == UNSPEC)
+    return 0;
+
+  if (GET_CODE (op) == SYMBOL_REF)
+	  return 1;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (op));
+  for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  register int j;
+
+	  for (j = XVECLEN (op, i) - 1; j >= 0; j--)
+	    if (arc_raw_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
+	      return 1;
+	}
+
+      else if (fmt[i] == 'e' && arc_raw_symbolic_reference_mentioned_p (XEXP (op, i)))
+	return 1;
+    }
+
+  return 0;
+}
+
+/* Legitimize a pic address reference
+ *    orig = src
+ *    oldx = target if reload_in_progress
+ *           src       otherwise
+ */
+rtx
+arc_legitimize_pic_address (rtx orig, rtx oldx)
+{
+  rtx addr = orig;
+  rtx new = orig;
+  rtx base;
+
+  if (oldx == orig)
+    oldx = NULL;
+
+  if (GET_CODE (addr) == LABEL_REF)
+    ; /* Do nothing.  */
+  else if (GET_CODE (addr) == SYMBOL_REF
+	   && (CONSTANT_POOL_ADDRESS_P (addr)
+	       || SYMBOL_REF_FLAG (addr)))
+    {
+      /* This symbol may be referenced via a displacement from the PIC
+	 base address (@GOTOFF).  */
+
+      /* FIXME: if we had a way to emit pc-relative adds that don't
+	 create a GOT entry, we could do without the use of the gp register.  */
+      crtl->uses_pic_offset_table = 1;
+      new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOTOFF);
+      new = gen_rtx_CONST (Pmode, new);
+      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+
+      if (oldx == NULL)
+	oldx = gen_reg_rtx (Pmode);
+
+      if (oldx != 0)
+	{
+	  emit_move_insn (oldx, new);
+	  new = oldx;
+	}
+
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF)
+    {
+      /* This symbol must be referenced via a load from the
+	 Global Offset Table (@GOTPC). */
+
+      new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), ARC_UNSPEC_GOT);
+      new = gen_rtx_CONST (Pmode, new);
+      new = gen_const_mem (Pmode, new);
+
+      if (oldx == 0)
+	oldx = gen_reg_rtx (Pmode);
+
+      emit_move_insn (oldx, new);
+      new = oldx;
+    }
+  else
+    {
+      if (GET_CODE (addr) == CONST)
+	{
+	  addr = XEXP (addr, 0);
+	  if (GET_CODE (addr) == UNSPEC)
+	    {
+	      /* Check that the unspec is one of the ones we generate? */
+	    }
+	  else
+	    gcc_assert (GET_CODE (addr) == PLUS);
+	}
+
+      if (GET_CODE (addr) == PLUS)
+	{
+	  rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
+
+	  /* Check first to see if this is a constant offset from a @GOTOFF
+	     symbol reference.  */
+	  if ((GET_CODE (op0) == LABEL_REF
+	       || (GET_CODE (op0) == SYMBOL_REF
+		   && (CONSTANT_POOL_ADDRESS_P (op0)
+		       || SYMBOL_REF_FLAG (op0))))
+	      && GET_CODE (op1) == CONST_INT)
+	    {
+	      /* FIXME: like above, could do without gp reference.  */
+	      crtl->uses_pic_offset_table = 1;
+	      new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), ARC_UNSPEC_GOTOFF);
+	      new = gen_rtx_PLUS (Pmode, new, op1);
+	      new = gen_rtx_CONST (Pmode, new);
+	      new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
+
+	      if (oldx != 0)
+		{
+		  emit_move_insn (oldx, new);
+		  new = oldx;
+		}
+	    }
+	  else
+	    {
+	      base = arc_legitimize_pic_address (XEXP (addr, 0), oldx);
+	      new  = arc_legitimize_pic_address (XEXP (addr, 1),
+					     base == oldx ? NULL_RTX : oldx);
+
+	      if (GET_CODE (new) == CONST_INT)
+		new = plus_constant (base, INTVAL (new));
+	      else
+		{
+		  if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
+		    {
+		      base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
+		      new = XEXP (new, 1);
+		    }
+		  new = gen_rtx_PLUS (Pmode, base, new);
+		}
+	    }
+	}
+    }
+
+ return new;
+}
+
+void
+arc_output_pic_addr_const (FILE * file, rtx x, int code)
+{
+  char buf[256];
+
+ restart:
+  switch (GET_CODE (x))
+    {
+    case PC:
+      if (flag_pic)
+	putc ('.', file);
+      else
+	gcc_unreachable ();
+      break;
+
+    case SYMBOL_REF:
+      output_addr_const (file, x);
+
+      /* Local functions do not get references through the PLT */
+      if (code == 'P' && ! SYMBOL_REF_FLAG (x))
+	fputs ("@plt", file);
+      break;
+
+    case LABEL_REF:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (XEXP (x, 0)));
+      arc_assemble_name (file, buf);
+      break;
+
+    case CODE_LABEL:
+      ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
+      arc_assemble_name (file, buf);
+      break;
+
+    case CONST_INT:
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
+      break;
+
+    case CONST:
+      arc_output_pic_addr_const (file, XEXP (x, 0), code);
+      break;
+
+    case CONST_DOUBLE:
+      if (GET_MODE (x) == VOIDmode)
+	{
+	  /* We can use %d if the number is one word and positive.  */
+	  if (CONST_DOUBLE_HIGH (x))
+	    fprintf (file, HOST_WIDE_INT_PRINT_DOUBLE_HEX,
+		     CONST_DOUBLE_HIGH (x), CONST_DOUBLE_LOW (x));
+	  else if  (CONST_DOUBLE_LOW (x) < 0)
+	    fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
+	  else
+	    fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
+	}
+      else
+	/* We can't handle floating point constants;
+	   PRINT_OPERAND must handle them.  */
+	output_operand_lossage ("floating constant misused");
+      break;
+
+    case PLUS:
+      /* FIXME: Not needed here */
+      /* Some assemblers need integer constants to appear last (eg masm).  */
+      if (GET_CODE (XEXP (x, 0)) == CONST_INT)
+	{
+	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
+	  fprintf (file, "+");
+	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
+	}
+      else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	{
+	  arc_output_pic_addr_const (file, XEXP (x, 0), code);
+	  if (INTVAL (XEXP (x, 1)) >= 0)
+	    fprintf (file, "+");
+	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
+	}
+      else
+	gcc_unreachable();
+      break;
+
+    case MINUS:
+      /* Avoid outputting things like x-x or x+5-x,
+	 since some assemblers can't handle that.  */
+      x = simplify_subtraction (x);
+      if (GET_CODE (x) != MINUS)
+	goto restart;
+
+      arc_output_pic_addr_const (file, XEXP (x, 0), code);
+      fprintf (file, "-");
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && INTVAL (XEXP (x, 1)) < 0)
+	{
+	  fprintf (file, "(");
+	  arc_output_pic_addr_const (file, XEXP (x, 1), code);
+	  fprintf (file, ")");
+	}
+      else
+	arc_output_pic_addr_const (file, XEXP (x, 1), code);
+      break;
+
+    case ZERO_EXTEND:
+    case SIGN_EXTEND:
+      arc_output_pic_addr_const (file, XEXP (x, 0), code);
+      break;
+
+
+    case UNSPEC:
+      gcc_assert (XVECLEN (x, 0) == 1);
+      if (XINT (x, 1) == ARC_UNSPEC_GOT)
+	fputs ("pcl,", file);
+      arc_output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
+      switch (XINT (x, 1))
+ 	{
+ 	case ARC_UNSPEC_GOT:
+ 	  fputs ("@gotpc", file);
+ 	  break;
+ 	case ARC_UNSPEC_GOTOFF:
+ 	  fputs ("@gotoff", file);
+ 	  break;
+ 	case ARC_UNSPEC_PLT:
+ 	  fputs ("@plt", file);
+ 	  break;
+ 	default:
+	  fprintf(stderr, "%d seen\n",XINT (x,1));
+ 	  output_operand_lossage ("invalid UNSPEC as operand");
+
+ 	  break;
+ 	}
+       break;
+
+    default:
+      output_operand_lossage ("invalid expression as operand");
+    }
+}
+
+/* Emit insns to move operands[1] into operands[0].  */
+
+void
+emit_pic_move (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
+
+  if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
+    operands[1] = force_reg (Pmode, operands[1]);
+  else
+    operands[1] = arc_legitimize_pic_address (operands[1], temp);
+}
+
+
+/* Prepend the symbol passed as argument to the name */
+static void
+arc_encode_symbol (tree decl, const char prefix)
+{
+  const char *str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+  int len = strlen (str);
+  char *newstr;
+
+  if(*str == prefix)
+    return;
+  newstr = (char*) obstack_alloc (&arc_local_obstack, len + 2);
+
+  strcpy (newstr + 1, str);
+  *newstr = prefix;
+  XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
+
+  return;
+
+}
+
+/* Output to FILE a reference to the assembler name of a C-level name NAME.
+   If NAME starts with a *, the rest of NAME is output verbatim.
+   Otherwise NAME is transformed in an implementation-defined way
+   (usually by the addition of an underscore).
+   Many macros in the tm file are defined to call this function.  */
+/* FIXME: This can be deleted */
+void
+arc_assemble_name (FILE *file, const char *name)
+{
+  const char *real_name=name;
+
+  /*real_name = arc_strip_name_encoding (name);*/
+  assemble_name(file, real_name);
+
+}
+
+/* The function returning the number of words, at the beginning of an
+   argument, must be put in registers.  The returned value must be
+   zero for arguments that are passed entirely in registers or that
+   are entirely pushed on the stack.
+
+   On some machines, certain arguments must be passed partially in
+   registers and partially in memory.  On these machines, typically
+   the first N words of arguments are passed in registers, and the
+   rest on the stack.  If a multi-word argument (a `double' or a
+   structure) crosses that boundary, its first few words must be
+   passed in registers and the rest must be pushed.  This function
+   tells the compiler when this occurs, and how many of the words
+   should go in registers.
+
+   `FUNCTION_ARG' for these arguments should return the first register
+   to be used by the caller for this argument; likewise
+   `FUNCTION_INCOMING_ARG', for the called function.
+
+   The function is used to implement macro FUNCTION_ARG_PARTIAL_NREGS. */
+
+/* if REGNO is the least arg reg available then what is the total number of arg
+   regs available */
+#define GPR_REST_ARG_REGS(REGNO) ( ((REGNO) <= (MAX_ARC_PARM_REGS))  \
+				   ? ((MAX_ARC_PARM_REGS) - (REGNO)) \
+                                   : 0 )
+
+/* since arc parm regs are contiguous */
+#define ARC_NEXT_ARG_REG(REGNO) ( (REGNO) + 1 )
+
+/* Implement TARGET_ARG_PARTIAL_BYTES.  */
+
+static int
+/* arc_function_arg_partial_nregs (cum, mode, type, named) */
+arc_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type, bool named ATTRIBUTE_UNUSED)
+{
+  int bytes = (mode == BLKmode
+	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
+  int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+  int arg_num = *cum;
+  int ret;
+
+  arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
+  ret = GPR_REST_ARG_REGS (arg_num);
+
+  /* ICEd at function.c:2361, and ret is copied to data->partial */
+    ret = (ret >= words ? 0 : ret * UNITS_PER_WORD);
+
+  return ret;
+}
+
+
+
+/* This function is used to control a function argument is passed in a
+   register, and which register.
+
+   The arguments are CUM, of type CUMULATIVE_ARGS, which summarizes
+   (in a way defined by INIT_CUMULATIVE_ARGS and FUNCTION_ARG_ADVANCE)
+   all of the previous arguments so far passed in registers; MODE, the
+   machine mode of the argument; TYPE, the data type of the argument
+   as a tree node or 0 if that is not known (which happens for C
+   support library functions); and NAMED, which is 1 for an ordinary
+   argument and 0 for nameless arguments that correspond to `...' in
+   the called function's prototype.
+
+   The returned value should either be a `reg' RTX for the hard
+   register in which to pass the argument, or zero to pass the
+   argument on the stack.
+
+   For machines like the Vax and 68000, where normally all arguments
+   are pushed, zero suffices as a definition.
+
+   The usual way to make the ANSI library `stdarg.h' work on a machine
+   where some arguments are usually passed in registers, is to cause
+   nameless arguments to be passed on the stack instead.  This is done
+   by making the function return 0 whenever NAMED is 0.
+
+   You may use the macro `MUST_PASS_IN_STACK (MODE, TYPE)' in the
+   definition of this function to determine if this argument is of a
+   type that must be passed in the stack.  If `REG_PARM_STACK_SPACE'
+   is not defined and the function returns non-zero for such an
+   argument, the compiler will abort.  If `REG_PARM_STACK_SPACE' is
+   defined, the argument will be computed in the stack and then loaded
+   into a register.
+
+   The function is used to implement macro FUNCTION_ARG. */
+
+rtx
+arc_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
+		  tree type ATTRIBUTE_UNUSED, int named ATTRIBUTE_UNUSED)
+{
+  int arg_num = *cum;
+  rtx ret;
+  const char *debstr;
+
+  arg_num = ROUND_ADVANCE_CUM (arg_num, mode, type);
+  /* Return a marker for use in the call instruction.  */
+  if (mode == VOIDmode)
+    {
+      ret = const0_rtx;
+      debstr = "<0>";
+    }
+  else if (GPR_REST_ARG_REGS (arg_num) > 0)
+    {
+      ret = gen_rtx_REG (mode, arg_num);
+      debstr = reg_names [arg_num];
+    }
+  else
+    {
+      ret = NULL_RTX;
+      debstr = "memory";
+    }
+  return ret;
+}
+
+/* The function to update the summarizer variable *CUM to advance past
+   an argument in the argument list.  The values MODE, TYPE and NAMED
+   describe that argument.  Once this is done, the variable *CUM is
+   suitable for analyzing the *following* argument with
+   `FUNCTION_ARG', etc.
+
+   This function need not do anything if the argument in question was
+   passed on the stack.  The compiler knows how to track the amount of
+   stack space used for arguments without any special help.
+
+   The function is used to implement macro FUNCTION_ARG_ADVANCE. */
+/* For the ARC: the cum set here is passed on to function_arg where we
+   look at its value and say which reg to use. Strategy: advance the
+   regnumber here till we run out of arg regs, then set *cum to last
+   reg. In function_arg, since *cum > last arg reg we would return 0
+   and thus the arg will end up on the stack. For straddling args of
+   course function_arg_partial_nregs will come into play */
+void
+arc_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type, int named ATTRIBUTE_UNUSED)
+{
+  int bytes = (mode == BLKmode
+	       ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode));
+  int words = (bytes + UNITS_PER_WORD  - 1) / UNITS_PER_WORD;
+  int i;
+
+  if (words)
+    *cum = ROUND_ADVANCE_CUM (*cum, mode, type);
+  for (i = 0; i < words; i++)
+    *cum = ARC_NEXT_ARG_REG (*cum);
+
+}
+
+/* Define how to find the value returned by a function.
+   VALTYPE is the data type of the value (as a tree).
+   If the precise function being called is known, FN_DECL_OR_TYPE is its
+   FUNCTION_DECL; otherwise, FN_DECL_OR_TYPE is its type.  */
+static rtx
+arc_function_value (const_tree valtype,
+		    const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
+		    bool outgoing ATTRIBUTE_UNUSED)
+{
+  enum machine_mode mode = TYPE_MODE (valtype);
+  int unsignedp ATTRIBUTE_UNUSED;
+
+  unsignedp = TYPE_UNSIGNED (valtype);
+  if (INTEGRAL_TYPE_P (valtype) || TREE_CODE (valtype) == OFFSET_TYPE)
+    PROMOTE_MODE(mode, unsignedp, valtype);
+  return gen_rtx_REG (mode, 0);
+}
+
+/* Returns the return address that is used by builtin_return_address */
+rtx
+arc_return_addr_rtx (int count, ATTRIBUTE_UNUSED rtx frame)
+{
+  if (count != 0)
+      return const0_rtx;
+
+  if(TARGET_A4)
+  {
+      /* Only the lower 24 bits of blink are valid */
+      rtx temp = get_hard_reg_initial_val (Pmode, RETURN_ADDR_REGNUM);
+      emit_insn (gen_andsi3(temp,temp,gen_rtx_CONST_INT (SImode,0x00ffffff)));
+      return temp;
+  }
+
+  return get_hard_reg_initial_val (Pmode , RETURN_ADDR_REGNUM);
+}
+
+/* Nonzero if the constant value X is a legitimate general operand
+   when generating PIC code.  It is given that flag_pic is on and
+   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+/* TODO: This should not be a separate function */
+bool
+arc_legitimate_pic_operand_p (rtx x)
+{
+  return !arc_raw_symbolic_reference_mentioned_p (x);
+}
+
+/* Determine if a given RTX is a valid constant.  We already know this
+   satisfies CONSTANT_P.  */
+bool
+arc_legitimate_constant_p (rtx x)
+{
+  if (!flag_pic)
+    return true;
+
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      x = XEXP (x, 0);
+
+      if (GET_CODE (x) == PLUS)
+	{
+	  if (GET_CODE (XEXP (x, 1)) != CONST_INT)
+	    return false;
+	  x = XEXP (x, 0);
+	}
+
+      /* Only some unspecs are valid as "constants".  */
+      if (GET_CODE (x) == UNSPEC)
+	switch (XINT (x, 1))
+	  {
+	  case ARC_UNSPEC_PLT:
+	  case ARC_UNSPEC_GOTOFF:
+	  case ARC_UNSPEC_GOT:
+	  case UNSPEC_PROF:
+	    return true;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+      /* We must have drilled down to a symbol.  */
+      if ( arc_raw_symbolic_reference_mentioned_p (x))
+	return false;
+
+      /* return true */
+      break;
+
+    case LABEL_REF:
+    case SYMBOL_REF:
+      return false;
+
+    default:
+      break;
+    }
+
+  /* Otherwise we handle everything else in the move patterns.  */
+  return true;
+}
+
+/* Determine if it's legal to put X into the constant pool. */
+static bool
+arc_cannot_force_const_mem (rtx x)
+{
+  return !arc_legitimate_constant_p (x);
+}
+
+
+/* Generic function to define a builtin */
+#define def_mbuiltin(MASK, NAME, TYPE, CODE)				\
+  do									\
+    {									\
+       if (MASK)                                                        \
+          add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, NULL_TREE); \
+    }									\
+  while (0)
+
+
+static void
+arc_init_builtins (void)
+{
+    tree endlink = void_list_node;
+
+    tree void_ftype_void
+	= build_function_type (void_type_node,
+			       endlink);
+
+    tree int_ftype_int
+	= build_function_type (integer_type_node,
+			   tree_cons (NULL_TREE, integer_type_node, endlink));
+
+    tree int_ftype_short_int
+	= build_function_type (integer_type_node,
+			       tree_cons (NULL_TREE, short_integer_type_node, endlink));
+
+    tree void_ftype_int_int
+	= build_function_type (void_type_node,
+			       tree_cons (NULL_TREE, integer_type_node,
+					  tree_cons (NULL_TREE, integer_type_node, endlink)));
+    tree void_ftype_usint_usint
+	= build_function_type (void_type_node,
+			       tree_cons (NULL_TREE, long_unsigned_type_node,
+					  tree_cons (NULL_TREE, long_unsigned_type_node, endlink)));
+
+    tree int_ftype_int_int
+	= build_function_type (integer_type_node,
+			       tree_cons (NULL_TREE, integer_type_node,
+					  tree_cons (NULL_TREE, integer_type_node, endlink)));
+
+    tree usint_ftype_usint
+	= build_function_type (long_unsigned_type_node,
+			   tree_cons (NULL_TREE, long_unsigned_type_node, endlink));
+
+    tree void_ftype_usint
+	= build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, long_unsigned_type_node, endlink));
+
+    /* Add the builtins */
+    def_mbuiltin (1,"__builtin_arc_nop", void_ftype_void, ARC_BUILTIN_NOP);
+    def_mbuiltin (TARGET_NORM, "__builtin_arc_norm", int_ftype_int, ARC_BUILTIN_NORM);
+    def_mbuiltin (TARGET_NORM, "__builtin_arc_normw", int_ftype_short_int, ARC_BUILTIN_NORMW);
+    def_mbuiltin (TARGET_SWAP, "__builtin_arc_swap", int_ftype_int, ARC_BUILTIN_SWAP);
+    def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mul64", void_ftype_int_int, ARC_BUILTIN_MUL64);
+    def_mbuiltin (TARGET_MUL64_SET,"__builtin_arc_mulu64", void_ftype_usint_usint, ARC_BUILTIN_MULU64);
+    def_mbuiltin (1,"__builtin_arc_rtie", void_ftype_void, ARC_BUILTIN_RTIE);
+    def_mbuiltin (TARGET_ARC700,"__builtin_arc_sync", void_ftype_void, ARC_BUILTIN_SYNC);
+    def_mbuiltin ((TARGET_EA_SET && TARGET_ARCOMPACT),"__builtin_arc_divaw", int_ftype_int_int, ARC_BUILTIN_DIVAW);
+    def_mbuiltin (1,"__builtin_arc_brk", void_ftype_void, ARC_BUILTIN_BRK);
+    def_mbuiltin (1,"__builtin_arc_flag", void_ftype_usint, ARC_BUILTIN_FLAG);
+    def_mbuiltin (1,"__builtin_arc_sleep", void_ftype_usint, ARC_BUILTIN_SLEEP);
+    def_mbuiltin (1,"__builtin_arc_swi", void_ftype_void, ARC_BUILTIN_SWI);
+    def_mbuiltin (1,"__builtin_arc_core_read", usint_ftype_usint, ARC_BUILTIN_CORE_READ);
+    def_mbuiltin (1,"__builtin_arc_core_write", void_ftype_usint_usint, ARC_BUILTIN_CORE_WRITE);
+    def_mbuiltin (1,"__builtin_arc_lr", usint_ftype_usint, ARC_BUILTIN_LR);
+    def_mbuiltin (1,"__builtin_arc_sr", void_ftype_usint_usint, ARC_BUILTIN_SR);
+    def_mbuiltin (TARGET_ARC700,"__builtin_arc_trap_s", void_ftype_usint, ARC_BUILTIN_TRAP_S);
+    def_mbuiltin (TARGET_ARC700,"__builtin_arc_unimp_s", void_ftype_void, ARC_BUILTIN_UNIMP_S);
+
+    if (TARGET_SIMD_SET)
+      arc_init_simd_builtins ();
+}
+
+static rtx arc_expand_simd_builtin (tree, rtx, rtx, enum machine_mode, int);
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+arc_expand_builtin (tree exp,
+		    rtx target,
+		    rtx subtarget ATTRIBUTE_UNUSED,
+		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    int ignore ATTRIBUTE_UNUSED)
+{
+  tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree              arg0;
+  tree              arg1;
+  rtx               op0;
+  rtx               op1;
+  int               fcode = DECL_FUNCTION_CODE (fndecl);
+  int               icode;
+  enum machine_mode mode0;
+  enum machine_mode mode1;
+
+  if (fcode > ARC_SIMD_BUILTIN_BEGIN && fcode < ARC_SIMD_BUILTIN_END)
+    return arc_expand_simd_builtin (exp, target, subtarget, mode, ignore);
+
+  switch (fcode)
+    {
+    case ARC_BUILTIN_NOP:
+      emit_insn (gen_nop ());
+      return NULL_RTX;
+
+    case ARC_BUILTIN_NORM:
+      icode = CODE_FOR_norm;
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+      mode0 =  insn_data[icode].operand[1].mode;
+      target = gen_reg_rtx (SImode);
+
+      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+
+      emit_insn (gen_norm (target,op0));
+      return target;
+
+    case ARC_BUILTIN_NORMW:
+
+	/* FIXME : This should all be HI mode, not SI mode */
+	icode = CODE_FOR_normw;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	mode0 =  insn_data[icode].operand[1].mode;
+	target = gen_reg_rtx (SImode);
+	
+	if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	  op0 = copy_to_mode_reg (mode0, convert_to_mode (mode0, op0,0));
+	
+	emit_insn (gen_normw (target, op0));
+	return target;
+	
+    case ARC_BUILTIN_MUL64:
+	icode = CODE_FOR_mul64;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	arg1 = CALL_EXPR_ARG (exp, 1);
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+	
+	mode0 =  insn_data[icode].operand[0].mode;
+	mode1 =  insn_data[icode].operand[1].mode;
+	
+	if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+	
+	if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+
+	emit_insn (gen_mul64 (op0,op1));
+	return NULL_RTX;
+
+    case ARC_BUILTIN_MULU64:
+	icode = CODE_FOR_mulu64;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	arg1 = CALL_EXPR_ARG (exp, 1);
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+	
+	mode0 =  insn_data[icode].operand[0].mode;
+	mode1 =  insn_data[icode].operand[1].mode;
+	
+	if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+	
+	if (! (*insn_data[icode].operand[0].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+
+	emit_insn (gen_mulu64 (op0,op1));
+	return NULL_RTX;
+
+    case ARC_BUILTIN_RTIE:
+	icode = CODE_FOR_rtie;
+	emit_insn (gen_rtie (const1_rtx));
+	return NULL_RTX;
+
+    case ARC_BUILTIN_SYNC:
+	icode = CODE_FOR_sync;
+	emit_insn (gen_sync (const1_rtx));
+	return NULL_RTX;
+
+    case ARC_BUILTIN_SWAP:
+	icode = CODE_FOR_swap;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	mode0 =  insn_data[icode].operand[1].mode;
+	target = gen_reg_rtx (SImode);
+
+	if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+	op0 = copy_to_mode_reg (mode0, op0);
+	
+	emit_insn (gen_swap (target,op0));
+	return target;
+
+    case ARC_BUILTIN_DIVAW:
+	icode = CODE_FOR_divaw;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	arg1 = CALL_EXPR_ARG (exp, 1);
+	
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+	target = gen_reg_rtx (SImode);
+
+	mode0 =  insn_data[icode].operand[0].mode;
+	mode1 =  insn_data[icode].operand[1].mode;
+	
+	if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	    op0 = copy_to_mode_reg (mode0, op0);
+	
+	if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
+	    op1 = copy_to_mode_reg (mode1, op1);
+	
+	emit_insn (gen_divaw (target,op0,op1));
+	return target;
+
+    case ARC_BUILTIN_BRK:
+	icode = CODE_FOR_brk;
+	emit_insn (gen_brk (const1_rtx));
+	return NULL_RTX;
+
+    case ARC_BUILTIN_SLEEP:
+	icode = CODE_FOR_sleep;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+
+	fold (arg0);
+	
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	mode0 = insn_data[icode].operand[1].mode;
+
+	emit_insn (gen_sleep (op0));
+	return NULL_RTX;
+
+    case ARC_BUILTIN_SWI:
+	icode = CODE_FOR_swi;
+	emit_insn (gen_swi (const1_rtx));
+	return NULL_RTX;
+	
+    case ARC_BUILTIN_FLAG:
+	icode = CODE_FOR_flag;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	mode0 =  insn_data[icode].operand[0].mode;
+
+	if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+	  op0 = copy_to_mode_reg (mode0, op0);
+	
+	emit_insn (gen_flag (op0));
+	return NULL_RTX;
+
+    case ARC_BUILTIN_CORE_READ:
+	icode = CODE_FOR_core_read;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	target = gen_reg_rtx (SImode);
+
+	fold (arg0);
+	
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	mode0 = insn_data[icode].operand[1].mode;
+	
+	emit_insn (gen_core_read (target, op0));
+	return target;
+
+    case ARC_BUILTIN_CORE_WRITE:
+	icode = CODE_FOR_core_write;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	arg1 = CALL_EXPR_ARG (exp, 1);
+	
+	fold (arg1);
+	
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+
+	mode0 = insn_data[icode].operand[0].mode;
+	mode1 = insn_data[icode].operand[1].mode;
+
+	emit_insn (gen_core_write (op0, op1));
+	return NULL_RTX;
+
+    case ARC_BUILTIN_LR:
+	icode = CODE_FOR_lr;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	target = gen_reg_rtx (SImode);
+
+	fold (arg0);
+	
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	mode0 = insn_data[icode].operand[1].mode;
+	
+	emit_insn (gen_lr (target, op0));
+	return target;
+
+    case ARC_BUILTIN_SR:
+	icode = CODE_FOR_sr;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+	arg1 = CALL_EXPR_ARG (exp, 1);
+	
+	fold (arg1);
+	
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
+
+	mode0 = insn_data[icode].operand[0].mode;
+	mode1 = insn_data[icode].operand[1].mode;
+
+	emit_insn (gen_sr (op0, op1));
+	return NULL_RTX;
+
+    case ARC_BUILTIN_TRAP_S:
+	icode = CODE_FOR_trap_s;
+	arg0 = CALL_EXPR_ARG (exp, 0);
+
+	fold (arg0);
+	
+	op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
+	mode0 = insn_data[icode].operand[1].mode;
+
+	emit_insn (gen_trap_s (op0));
+	return NULL_RTX;
+
+    case ARC_BUILTIN_UNIMP_S:
+	icode = CODE_FOR_unimp_s;
+	emit_insn (gen_unimp_s (const1_rtx));
+	return NULL_RTX;
+
+    default:
+	break;
+    }
+
+  /* @@@ Should really do something sensible here.  */
+  return NULL_RTX;
+}
+
+/* Returns if the operands[ opno ] is a valid compile-time constant to be used
+   as register number in the code for builtins. Else it flags an error. */
+
+int
+check_if_valid_regno_const (rtx *operands, int opno)
+{
+
+  switch (GET_CODE (operands[opno]))
+    {
+    case SYMBOL_REF :
+    case CONST :
+    case CONST_INT :
+      return 1;
+    default:
+	error("register number must be a compile-time constant. Try giving higher optimization levels");
+	break;
+    }
+  return 0;
+}
+
+/* Check that after all the constant folding, whether the operand to
+   __builtin_arc_sleep is an unsigned int of 6 bits. If not, flag an error
+*/
+int
+check_if_valid_sleep_operand (rtx *operands, int opno)
+{
+  switch (GET_CODE (operands[opno]))
+    {
+    case CONST :
+    case CONST_INT :
+	if( UNSIGNED_INT6 (INTVAL (operands[opno])))
+	    return 1;
+    default:
+	fatal_error("operand for sleep instruction must be a unsigned 6 bit compile-time constant.");
+	break;
+    }
+  return 0;
+}
+
+/* Return nonzero if it is ok to make a tail-call to DECL.  */
+static bool
+arc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  const char * fname;
+
+  if (!TARGET_ARCOMPACT)
+    {
+      /* Never tailcall something for which we have no decl.  */
+      if (decl == NULL)
+	return false;
+
+      /* Extract the function name from the decl node */
+      fname = XSTR (XEXP (DECL_RTL (decl), 0), 0);
+
+      /* ARC does not have a branch [reg], so no sibcalls with -mlong-calls, unless
+	 the called function has short_call attribute set */
+      if (TARGET_LONG_CALLS_SET && !ARC_ENCODED_SHORT_CALL_ATTR_P(fname))
+	return false;
+
+      /* Is this a long_call attributed function. If so, return false */
+      if (ARC_ENCODED_LONG_CALL_ATTR_P(fname))
+	return false;
+    }
+
+  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
+  if (ARC_INTERRUPT_P (arc_compute_function_type (cfun)))
+    return false;
+
+  /* Everything else is ok.  */
+  return true;
+}
+
+/* Output code to add DELTA to the first argument, and then jump
+   to FUNCTION.  Used for C++ multiple inheritance.  */
+static void
+arc_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		     HOST_WIDE_INT delta,
+		     HOST_WIDE_INT vcall_offset,
+		     tree function)
+{
+  int mi_delta = delta;
+  const char *const mi_op = mi_delta < 0 ? "sub" : "add";
+  int shift = 0;
+  int this_regno
+    = aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function) ? 1 : 0;
+  const char *fname;
+
+  if (mi_delta < 0)
+    mi_delta = - mi_delta;
+
+  /* Add DELTA.  When possible use a plain add, otherwise load it into
+     a register first. */
+
+  while (mi_delta != 0)
+    {
+      if ((mi_delta & (3 << shift)) == 0)
+	shift += 2;
+      else
+	{
+	  asm_fprintf (file, "\t%s\t%s, %s, %d\n",
+		       mi_op, reg_names[this_regno], reg_names[this_regno],
+		       mi_delta & (0xff << shift));
+	  mi_delta &= ~(0xff << shift);
+	  shift += 8;
+	}
+    }
+
+  /* If needed, add *(*THIS + VCALL_OFFSET) to THIS.  */
+  if (vcall_offset != 0)
+    {
+      /* ld  r12,[this]           --> temp = *this
+	 add r12,r12,vcall_offset --> temp = *(*this + vcall_offset)
+	 ld r12,[r12]
+	 add this,this,r12        --> this+ = *(*this + vcall_offset) */
+      asm_fprintf (file, "\tld\t%s, [%s]\n",
+		   ARC_TEMP_SCRATCH_REG, reg_names[this_regno]);
+      asm_fprintf (file, "\tadd\t%s, %s, %ld\n",
+		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG, vcall_offset);
+      asm_fprintf (file, "\tld\t%s, [%s]\n",
+		   ARC_TEMP_SCRATCH_REG, ARC_TEMP_SCRATCH_REG);
+      asm_fprintf (file, "\tadd\t%s, %s, %s\n", reg_names[this_regno],
+		   reg_names[this_regno], ARC_TEMP_SCRATCH_REG);
+    }
+
+  fname = XSTR (XEXP (DECL_RTL (function), 0), 0);
+  if (TARGET_LONG_CALLS_SET
+      ? !ARC_ENCODED_SHORT_CALL_ATTR_P (fname)
+      : ARC_ENCODED_LONG_CALL_ATTR_P (fname))
+    fputs ("\tj\t", file);
+  else
+    fputs ("\tb\t", file);
+  assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
+  fputc ('\n', file);
+}
+
+/* Return nonzero if a 32 bit "long_call" should be generated for
+   this call.  We generate a long_call if the function:
+
+        a.  has an __attribute__((long call))
+     or b.  the -mlong-calls command line switch has been specified
+
+   However we do not generate a long call if the function has an
+   __attribute__ ((short_call))
+
+   This function will be called by C fragments contained in the machine
+   description file.  */
+int
+arc_is_longcall_p (rtx sym_ref)
+{
+  if (GET_CODE (sym_ref) != SYMBOL_REF)
+    return 0;
+
+  return  ARC_ENCODED_LONG_CALL_ATTR_P (XSTR (sym_ref, 0))
+    || ( TARGET_LONG_CALLS_SET && !ARC_ENCODED_SHORT_CALL_ATTR_P (XSTR (sym_ref,0)));
+
+}
+
+/* Emit profiling code for calling CALLEE.  Return nonzero if a special
+   call pattern needs to be generated.  */
+int
+arc_profile_call (rtx callee)
+{
+  rtx from = XEXP (DECL_RTL (current_function_decl), 0);
+
+  if (CONSTANT_P (callee))
+    {
+      rtx count_ptr
+	= gen_rtx_CONST (Pmode,
+			 gen_rtx_UNSPEC (Pmode,
+					 gen_rtvec (3, from, callee,
+						    CONST0_RTX (Pmode)),
+					 UNSPEC_PROF));
+      rtx counter = gen_rtx_MEM (SImode, count_ptr);
+      /* ??? The increment would better be done atomically, but as there is
+	 no proper hardware support, that would be too expensive.  */
+      emit_move_insn (counter, force_reg (SImode, plus_constant (counter, 1)));
+      return 0;
+    }
+  else
+    {
+      rtx count_list_ptr
+	= gen_rtx_CONST (Pmode,
+			 gen_rtx_UNSPEC (Pmode,
+					 gen_rtvec (3, from, CONST0_RTX (Pmode),
+						    CONST0_RTX (Pmode)),
+					 UNSPEC_PROF));
+      emit_move_insn (gen_rtx_REG (Pmode, 8), count_list_ptr);
+      emit_move_insn (gen_rtx_REG (Pmode, 9), callee);
+      return 1;
+    }
 }
 
 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
 
 static bool
 arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
-{
-  if (AGGREGATE_TYPE_P (type))
-    return true;
+ {
+   if (AGGREGATE_TYPE_P (type) || TREE_ADDRESSABLE (type))
+     return true;
   else
     {
       HOST_WIDE_INT size = int_size_in_bytes (type);
@@ -2332,24 +6044,2803 @@ arc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
     }
 }
 
+/* ashwin : taken from gcc-4.2-FSF clean sources */
 /* For ARC, All aggregates and arguments greater than 8 bytes are
    passed by reference.  */
-
 static bool
 arc_pass_by_reference (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED,
-		       enum machine_mode mode, const_tree type,
+		       enum machine_mode mode ATTRIBUTE_UNUSED,
+		       const_tree type ATTRIBUTE_UNUSED,
 		       bool named ATTRIBUTE_UNUSED)
 {
-  unsigned HOST_WIDE_INT size;
+  return (type != 0
+	  && (TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
+	      || TREE_ADDRESSABLE (type)));
 
-  if (type)
+/*   ashwin : We always pass arguments are passed by value  */
+  return 0;
+
+  /*   unsigned HOST_WIDE_INT size; */
+
+/*   if (type) */
+/*     { */
+/*       if (AGGREGATE_TYPE_P (type)) */
+/* 	return true; */
+/*       size = int_size_in_bytes (type); */
+/*     } */
+/*   else */
+/*     size = GET_MODE_SIZE (mode); */
+
+/*   return size > 8; */
+}
+/* ~ashwin */
+
+
+/* NULL if INSN insn is valid within a low-overhead loop.
+   Otherwise return why doloop cannot be applied.  */
+
+static const char *
+arc_invalid_within_doloop (const_rtx insn)
+{
+  if (CALL_P (insn))
+    return "Function call in the loop.";
+  return NULL;
+}
+
+static int arc_reorg_in_progress = 0;
+
+/* ARC's machince specific reorg function.  */
+static void
+arc_reorg (void)
+{
+  rtx insn, pattern;
+  rtx pc_target;
+  long offset;
+  int changed;
+
+  cfun->machine->arc_reorg_started = 1;
+  arc_reorg_in_progress = 1;
+
+  /* Emit special sections for profiling.  */
+  if (crtl->profile)
     {
-      if (AGGREGATE_TYPE_P (type))
-	return true;
-      size = int_size_in_bytes (type);
+      section *save_text_section;
+      rtx insn;
+      int size = get_max_uid () >> 4;
+      htab_t htab = htab_create (size, unspec_prof_hash, unspec_prof_htab_eq,
+				 NULL);
+
+      save_text_section = in_section;
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	if (NONJUMP_INSN_P (insn))
+	  walk_stores (PATTERN (insn), write_profile_sections, htab);
+      if (htab_elements (htab))
+	in_section = 0;
+      switch_to_section (save_text_section);
+      htab_delete (htab);
+    }
+
+  /* Link up loop ends with their loop start.  */
+  {
+    for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+      if (GET_CODE (insn) == JUMP_INSN
+	  && recog_memoized (insn) == CODE_FOR_doloop_end_i)
+	{
+	  rtx top_label
+	    = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (insn), 0, 0)), 1), 0);
+	  rtx num = GEN_INT (CODE_LABEL_NUMBER (top_label));
+	  rtx lp, prev = prev_nonnote_insn (top_label);
+	  rtx next = NULL_RTX;
+	  rtx op0 = XEXP (XVECEXP (PATTERN (insn), 0, 1), 0);
+	  int seen_label = 0;
+
+	  for (lp = prev;
+	       (lp && NONJUMP_INSN_P (lp)
+		&& recog_memoized (lp) != CODE_FOR_doloop_begin_i);
+	       lp = prev_nonnote_insn (lp))
+	    ;
+	  if (!lp || !NONJUMP_INSN_P (lp)
+	      || dead_or_set_regno_p (lp, LP_COUNT))
+	    {
+	      for (prev = next = insn, lp = NULL_RTX ; prev || next;)
+		{
+		  if (prev)
+		    {
+		      if (NONJUMP_INSN_P (prev)
+			  && recog_memoized (prev) == CODE_FOR_doloop_begin_i
+			  && (INTVAL (XEXP (XVECEXP (PATTERN (prev), 0, 5), 0))
+			      == INSN_UID (insn)))
+			{
+			  lp = prev;
+			  break;
+			}
+		      else if (LABEL_P (prev))
+			seen_label = 1;
+		      prev = prev_nonnote_insn (prev);
+		    }
+		  if (next)
+		    {
+		      if (NONJUMP_INSN_P (next)
+			  && recog_memoized (next) == CODE_FOR_doloop_begin_i
+			  && (INTVAL (XEXP (XVECEXP (PATTERN (next), 0, 5), 0))
+			      == INSN_UID (insn)))
+			{
+			  lp = next;
+			  break;
+			}
+		      next = next_nonnote_insn (next);
+		    }
+		}
+	      prev = NULL_RTX;
+	    }
+	  if (lp && !dead_or_set_regno_p (lp, LP_COUNT))
+	    {
+	      rtx begin_cnt = XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0);
+	      if (INTVAL (XEXP (XVECEXP (PATTERN (lp), 0, 4), 0)))
+		/* The loop end insn has been duplicated.  That can happen
+		   when there is a conditional block at the very end of
+		   the loop.  */
+		goto failure;
+	      /* If Register allocation failed to allocate to the right
+		 register, There is no point into teaching reload to
+		 fix this up with reloads, as that would cost more
+		 than using an ordinary core register with the
+		 doloop_fallback pattern.  */
+	      if ((true_regnum (op0) != LP_COUNT || !REG_P (begin_cnt))
+	      /* Likewise, if the loop setup is evidently inside the loop,
+		 we loose.  */
+		  || (!prev && lp != next && !seen_label))
+		{
+		  remove_insn (lp);
+		  goto failure;
+		}
+	      /* It is common that the optimizers copy the loop count from
+		 another register, and doloop_begin_i is stuck with the
+		 source of the move.  Making doloop_begin_i only accept "l"
+		 is nonsentical, as this then makes reload evict the pseudo
+		 used for the loop end.  The underlying cause is that the
+		 optimizers don't understand that the register allocation for
+		 doloop_begin_i should be treated as part of the loop.
+		 Try to work around this problem by verifying the previous
+		 move exists.  */
+	      if (true_regnum (begin_cnt) != LP_COUNT)
+		{
+		  rtx mov, set, note;
+
+		  for (mov = prev_nonnote_insn (lp); mov;
+		       mov = prev_nonnote_insn (mov))
+		    {
+		      if (!NONJUMP_INSN_P (mov))
+			mov = 0;
+		      else if ((set = single_set (mov))
+			  && rtx_equal_p (SET_SRC (set), begin_cnt)
+			  && rtx_equal_p (SET_DEST (set), op0))
+			break;
+		    }
+		  if (mov)
+		    {
+		      XEXP (XVECEXP (PATTERN (lp), 0 ,3), 0) = op0;
+		      note = find_regno_note (lp, REG_DEAD, REGNO (begin_cnt));
+		      if (note)
+			remove_note (lp, note);
+		    }
+		  else
+		    {
+		      remove_insn (lp);
+		      goto failure;
+		    }
+		}
+	      XEXP (XVECEXP (PATTERN (insn), 0, 4), 0) = num;
+	      XEXP (XVECEXP (PATTERN (lp), 0, 4), 0) = num;
+	      if (next == lp)
+		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const2_rtx;
+	      else if (!prev)
+		XEXP (XVECEXP (PATTERN (lp), 0, 6), 0) = const1_rtx;
+	      else if (prev != lp)
+		{
+		  remove_insn (lp);
+		  add_insn_after (lp, prev, NULL);
+		}
+	      if (!prev)
+		{
+		  XEXP (XVECEXP (PATTERN (lp), 0, 7), 0)
+		    = gen_rtx_LABEL_REF (Pmode, top_label);
+		  REG_NOTES (lp)
+		    = gen_rtx_INSN_LIST (REG_LABEL_OPERAND, top_label,
+					 REG_NOTES (lp));
+		  LABEL_NUSES (top_label)++;
+		}
+	      /* We can avoid tedious loop start / end setting for empty loops
+		 be merely setting the loop count to its final value.  */
+	      if (next_active_insn (top_label) == insn)
+		{
+		  rtx lc_set
+		    = gen_rtx_SET (VOIDmode,
+				   XEXP (XVECEXP (PATTERN (lp), 0, 3), 0),
+				   const0_rtx);
+
+		  lc_set = emit_insn_before (lc_set, insn);
+		  delete_insn (lp);
+		  delete_insn (insn);
+		  insn = lc_set;
+		}
+	      /* If the loop is non-empty with zero length, we can't make it
+		 a zero-overhead loop.  That can happen for empty asms.  */
+	      else
+		{
+		  rtx scan;
+
+		  for (scan = top_label;
+		       (scan && scan != insn
+			&& (!NONJUMP_INSN_P (scan) || !get_attr_length (scan)));
+		       scan = NEXT_INSN (scan));
+		  if (scan == insn)
+		    {
+		      remove_insn (lp);
+		      goto failure;
+		    }
+		}
+	    }
+	  else
+	    {
+	      /* Sometimes the loop optimizer makes a complete hash of the
+		 loop.  If it were only that the loop is not entered at the
+		 top, we could fix this up by setting LP_START with SR .
+		 However, if we can't find the loop begin were it should be,
+		 chances are that it does not even dominate the loop, but is
+		 inside the loop instead.  Using SR there would kill
+		 performance.
+		 We use the doloop_fallback pattern here, which executes
+		 in two cycles on the ARC700 when predicted correctly.  */
+	    failure:
+	      if (!REG_P (op0))
+		{
+		  rtx op3 = XEXP (XVECEXP (PATTERN (insn), 0, 5), 0);
+
+		  emit_insn_before (gen_move_insn (op3, op0), insn);
+		  PATTERN (insn)
+		    = gen_doloop_fallback_m (op3, JUMP_LABEL (insn), op0);
+		}
+	      else
+		XVEC (PATTERN (insn), 0)
+		  = gen_rtvec (2, XVECEXP (PATTERN (insn), 0, 0),
+			       XVECEXP (PATTERN (insn), 0, 1));
+	      INSN_CODE (insn) = -1;
+	    }
+	}
+    }
+
+/*
+FIXME: should anticipate ccfsm action, generate special patterns for
+  to-be-deleted branches that have no delay slot and have at least the
+  length of the size increase forced on other insns that are conditionalized.
+  This can also have an insn_list inside that enumerates insns which are
+  not actually conditionalized because the destinations are dead in the
+  not-execute case.
+  Could also tag branches that we want to be unaligned if they get no delay
+  slot, or even ones that we don't want to do delay slot sheduling for
+   because we can unalign them.
+However, there are cases when conditional execution is only possible after
+delay slot scheduling:
+
+- If a delay slot is filled with a nocond/set insn from above, the previous
+  basic block can become elegible for conditional execution.
+- If a delay slot is filled with a nocond insn from the fall-through path,
+  the branch with that delay slot can become eligble for conditional execution
+  (however, with the same sort of data flow analysis that dbr does, we could
+   have figured out before that we don't need to conditionalize this insn.)
+- If a delay slot insn is filled with an insn from the target, the
+  target label gets its uses decremented (even deleted if falling to zero),
+  thus possibly creating more condexec opportunities there.
+Therefore, we should still be prepared to apply condexec optimization on
+non-prepared branches if the size increase of conditionalized insns is no
+more than the size saved from eliminating the branch.  An invocation option
+could also be used to reserve a bit of extra size for condbranches so that
+this'll work more often (could also test in arc_reorg if the block is
+'close enough' to be eligible for condexec to make this likely, and
+estimate required size increase).
+ */
+  /* Generate BRcc insns, by combining cmp and Bcc insns wherever possible */
+   /* BRcc only for arcompact ISA */
+   if (!TARGET_ARCOMPACT || TARGET_NO_BRCC_SET)
+     return;
+
+/*    /\* Compute LOG_LINKS.  *\/ */
+/*    for (bb = 0; bb < current_nr_blocks; bb++) */
+/*      compute_block_backward_dependences (bb); */
+
+  do
+    {
+      init_insn_lengths();
+      changed = 0;
+
+      /* Call shorten_branches to calculate the insn lengths */
+      shorten_branches (get_insns());
+      cfun->machine->ccfsm_current_insn = NULL_RTX;
+
+      if (!INSN_ADDRESSES_SET_P())
+ 	  fatal_error ("Insn addresses not set after shorten_branches");
+
+      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+	{
+ 	  rtx label;
+ 	  enum attr_type insn_type;
+
+ 	  /* If a non-jump insn (or a casesi jump table), continue */
+ 	  if (GET_CODE (insn) != JUMP_INSN ||
+ 	      GET_CODE (PATTERN (insn)) == ADDR_VEC
+ 	      || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
+ 	    continue;
+
+	  /* If we already have a brcc, note if it is suitable for brcc_s.
+	     Be a bit generous with the brcc_s range so that we can take
+	     advantage of any code shortening from delay slot scheduling.  */
+	  if (recog_memoized (insn) == CODE_FOR_cbranchsi4_scratch)
+	    {
+	      rtx pat = PATTERN (insn);
+	      rtx operator = XEXP (SET_SRC (XVECEXP (pat, 0, 0)), 0);
+	      rtx *ccp = &XEXP (XVECEXP (pat, 0, 1), 0);
+
+	      offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
+	      if ((offset >= -140 && offset < 140)
+		  && rtx_equal_p (XEXP (operator, 1), const0_rtx)
+ 		  && compact_register_operand (XEXP (operator, 0), VOIDmode)
+		  && equality_comparison_operator (operator, VOIDmode))
+		PUT_MODE (*ccp, CC_Zmode);
+	      else if (GET_MODE (*ccp) == CC_Zmode)
+		PUT_MODE (*ccp, CC_ZNmode);
+	      continue;
+	    }
+ 	  if ((insn_type =  get_attr_type (insn)) == TYPE_BRCC
+ 	      || insn_type == TYPE_BRCC_NO_DELAY_SLOT)
+ 	    continue;
+
+ 	  /* OK. so we have a jump insn */
+ 	  /* We need to check that it is a bcc */
+ 	  /* Bcc => set (pc) (if_then_else ) */
+ 	  pattern = PATTERN (insn);
+ 	  if (GET_CODE (pattern) != SET ||
+ 	      GET_CODE (SET_SRC(pattern)) != IF_THEN_ELSE)
+ 	    continue;
+
+ 	  /* Now check if the jump is beyond the s9 range */
+	  if (find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX))
+	    continue;
+ 	  offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
+
+ 	  if(offset > 253 || offset < -254)
+ 	    continue;
+
+ 	  pc_target = SET_SRC (pattern);
+
+ 	  /* Now go back and search for the set cc insn */
+
+ 	  label = XEXP (pc_target, 1);
+
+ 	    {
+	      rtx pat, scan, link_insn = NULL;
+
+	      for (scan = PREV_INSN (insn);
+		   scan && GET_CODE (scan) != CODE_LABEL;
+		   scan = PREV_INSN (scan))
+		{
+		  if (! INSN_P (scan))
+		    continue;
+		  pat = PATTERN (scan);
+		  if (GET_CODE (pat) == SET
+		      && cc_register (SET_DEST (pat), VOIDmode))
+		    {
+		      link_insn = scan;
+		      break;
+		    }
+		}
+	      if (! link_insn)
+		continue;
+	      else
+ 	        /* Check if this is a data dependency */
+ 		{
+ 		  rtx operator, cc_clob_rtx, op0, op1, brcc_insn, note;
+		  rtx cmp0, cmp1;
+
+ 		  /* ok this is the set cc. copy args here */
+ 		  operator = XEXP (pc_target, 0);
+
+		  op0 = cmp0 = XEXP (SET_SRC (pat), 0);
+		  op1 = cmp1 = XEXP (SET_SRC (pat), 1);
+		  if (GET_CODE (op0) == ZERO_EXTRACT
+		      && XEXP (op0, 1) == const1_rtx
+ 		      && (GET_CODE (operator) == EQ
+			  || GET_CODE (operator) == NE))
+		    {
+		      /* btst / b{eq,ne} -> bbit{0,1} */
+		      op0 = XEXP (cmp0, 0);
+		      op1 = XEXP (cmp0, 2);
+		    }
+		  else if (!register_operand (op0, VOIDmode)
+			  || !general_operand (op1, VOIDmode))
+		    continue;
+ 		  /* None of the two cmp operands should be set between the
+ 		     cmp and the branch */
+ 		  if (reg_set_between_p (op0, link_insn, insn))
+ 		    continue;
+
+ 		  if (reg_set_between_p (op1, link_insn, insn))
+ 		    continue;
+
+ 		  /* Since the MODE check does not work, check that this is
+ 		     CC reg's last set location before insn, and also no instruction
+		     between the cmp and branch uses the condition codes */
+ 		  if ((reg_set_between_p (SET_DEST (pat), link_insn, insn))
+		      || (reg_used_between_p (SET_DEST (pat), link_insn, insn)))
+ 		    continue;
+
+ 		  /* CC reg should be dead after insn */
+ 		  if (!find_regno_note (insn, REG_DEAD, CC_REG))
+ 		    continue;
+
+		  operator = gen_rtx_fmt_ee (GET_CODE (operator),
+					     GET_MODE (operator), cmp0, cmp1);
+		  /* If we create a LIMM where there was none before,
+		     we only benefit if we can avoid a scheduling bubble
+		     for the ARC600.  Otherwise, we'd only forgo chances
+		     at short insn generation, and risk out-of-range
+		     branches.  */
+		  if (!brcc_nolimm_operator (operator, VOIDmode)
+		      && !long_immediate_operand (op1, VOIDmode)
+		      && (TARGET_ARC700
+			  || next_active_insn (link_insn) != insn))
+		    continue;
+
+ 		  /* Emit bbit / brcc (or brcc_s if possible).
+		     CC_Zmode indicates that brcc_s is possible.  */
+
+		  if (op0 != cmp0)
+ 		    cc_clob_rtx = gen_rtx_REG (CC_ZNmode, CC_REG);
+ 		  else if ((offset >= -140 && offset < 140)
+			   && rtx_equal_p (op1, const0_rtx)
+			   && compact_register_operand (op0, VOIDmode)
+			   && (GET_CODE (operator) == EQ
+			       || GET_CODE (operator) == NE))
+ 		    cc_clob_rtx = gen_rtx_REG (CC_Zmode, CC_REG);
+ 		  else
+ 		    cc_clob_rtx = gen_rtx_REG (CCmode, CC_REG);
+
+ 		  brcc_insn
+		    = gen_rtx_IF_THEN_ELSE (VOIDmode, operator, label, pc_rtx);
+ 		  brcc_insn = gen_rtx_SET (VOIDmode, pc_rtx, brcc_insn);
+		  cc_clob_rtx = gen_rtx_CLOBBER (VOIDmode, cc_clob_rtx);
+		  brcc_insn
+		    = gen_rtx_PARALLEL
+			(VOIDmode, gen_rtvec (2, brcc_insn, cc_clob_rtx));
+		  brcc_insn = emit_jump_insn_before (brcc_insn, insn);
+
+ 		  JUMP_LABEL (brcc_insn) = JUMP_LABEL (insn);
+		  note = find_reg_note (insn, REG_BR_PROB, 0);
+		  if (note)
+		    {
+		      XEXP (note, 1) = REG_NOTES (brcc_insn);
+		      REG_NOTES (brcc_insn) = note;
+		    }
+		  note = find_reg_note (link_insn, REG_DEAD, op0);
+		  if (note)
+		    {
+		      remove_note (link_insn, note);
+		      XEXP (note, 1) = REG_NOTES (brcc_insn);
+		      REG_NOTES (brcc_insn) = note;
+		    }
+		  note = find_reg_note (link_insn, REG_DEAD, op1);
+		  if (note)
+		    {
+		      XEXP (note, 1) = REG_NOTES (brcc_insn);
+		      REG_NOTES (brcc_insn) = note;
+		    }
+ 		
+ 		  changed = 1;
+
+ 		  /* Delete the bcc insn */
+		  set_insn_deleted (insn);
+
+ 		  /* Delete the cmp insn */
+		  set_insn_deleted (link_insn);
+
+ 		}
+ 	    }
+	}
+      /* Clear out insn_addresses */
+      INSN_ADDRESSES_FREE ();
+
+    } while (changed);
+
+  if (INSN_ADDRESSES_SET_P())
+    fatal_error ("Insn addresses not freed\n");
+   
+  arc_reorg_in_progress = 0;
+}
+
+ /* Check if the operands are valid for BRcc.d generation
+    Valid Brcc.d patterns are
+        Brcc.d b, c, s9
+        Brcc.d b, u6, s9
+
+        For cc={GT, LE, GTU, LEU}, u6=63 can not be allowed,
+      since they are encoded by the assembler as {GE, LT, HS, LS} 64, which
+      does not have a delay slot
+
+  Assumed precondition: Second operand is either a register or a u6 value.  */
+int
+valid_brcc_with_delay_p (rtx *operands)
+{
+  if (optimize_size && GET_MODE (operands[4]) == CC_Zmode)
+    return 0;
+  return brcc_nolimm_operator (operands[0], VOIDmode);
+}
+
+/* ??? Hack.  This should no really be here.  See PR32143.  */
+static bool
+arc_decl_anon_ns_mem_p (const_tree decl)
+{
+  while (1)
+    {
+      if (decl == NULL_TREE || decl == error_mark_node)
+        return false;
+      if (TREE_CODE (decl) == NAMESPACE_DECL
+          && DECL_NAME (decl) == NULL_TREE)
+        return true;
+      /* Classes and namespaces inside anonymous namespaces have
+         TREE_PUBLIC == 0, so we can shortcut the search.  */
+      else if (TYPE_P (decl))
+        return (TREE_PUBLIC (TYPE_NAME (decl)) == 0);
+      else if (TREE_CODE (decl) == NAMESPACE_DECL)
+        return (TREE_PUBLIC (decl) == 0);
+      else
+        decl = DECL_CONTEXT (decl);
+    }
+}
+
+/* Implement TARGET_IN_SMALL_DATA_P.  Return true if it would be safe to
+   access DECL using %gp_rel(...)($gp).  */
+
+static bool
+arc_in_small_data_p (const_tree decl)
+{
+  HOST_WIDE_INT size;
+
+  if (TARGET_A4)
+    return false;
+
+  if (TREE_CODE (decl) == STRING_CST || TREE_CODE (decl) == FUNCTION_DECL)
+    return false;
+
+
+  /* We don't yet generate small-data references for -mabicalls.  See related
+     -G handling in override_options.  */
+  if (TARGET_NO_SDATA_SET)
+    return false;
+
+  if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl) != 0)
+    {
+      const char *name;
+
+      /* Reject anything that isn't in a known small-data section.  */
+      name = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
+      if (strcmp (name, ".sdata") != 0 && strcmp (name, ".sbss") != 0)
+	return false;
+
+      /* If a symbol is defined externally, the assembler will use the
+	 usual -G rules when deciding how to implement macros.  */
+      if (!DECL_EXTERNAL (decl))
+	  return true;
+    }
+  /* Only global variables go into sdata section for now */
+  else if (1)
+    {
+      /* Don't put constants into the small data section: we want them
+	 to be in ROM rather than RAM.  */
+      if (TREE_CODE (decl) != VAR_DECL)
+	return false;
+
+      if (TREE_READONLY (decl)
+	  && !TREE_SIDE_EFFECTS (decl)
+	  && (!DECL_INITIAL (decl) || TREE_CONSTANT (DECL_INITIAL (decl))))
+	return false;
+
+      /* TREE_PUBLIC might change after the first call, because of the patch
+	 for PR19238.  */
+      if (default_binds_local_p_1 (decl, 1)
+	  || arc_decl_anon_ns_mem_p (decl))
+	return false;
+
+      /* To ensure -mvolatile-cache works
+	 ld.di does not have a gp-relative variant */
+      if (TREE_THIS_VOLATILE (decl))
+	return false;
+    }
+
+  /* Disable sdata references to weak variables */
+  if (DECL_WEAK (decl))
+    return false;
+
+  size = int_size_in_bytes (TREE_TYPE (decl));
+
+/*   if (AGGREGATE_TYPE_P (TREE_TYPE (decl))) */
+/*     return false; */
+
+  /* Allow only <=4B long data types into sdata */
+  return (size > 0 && size <= 4);
+}
+
+/* Return true if X is a small data address that can be rewritten
+   as a gp+symref.  */
+
+static bool
+arc_rewrite_small_data_p (rtx x)
+{
+  if (GET_CODE (x) == CONST)
+    x = XEXP (x, 0);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      if (GET_CODE (XEXP (x, 1)) == CONST_INT)
+	x = XEXP (x, 0);
     }
+
+  return (GET_CODE (x) ==  SYMBOL_REF
+	  && SYMBOL_REF_SMALL_P(x));
+}
+
+/* A for_each_rtx callback, used by arc_rewrite_small_data.  */
+
+static int
+arc_rewrite_small_data_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  if (arc_rewrite_small_data_p (*loc))     
+    {
+      rtx top;
+
+      *loc = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, *loc);
+      if (loc == data)
+	return -1;
+      top = *(rtx*) data;
+      if (GET_CODE (top) == MEM && &XEXP (top, 0) == loc)
+	; /* OK.  */
+      else if (GET_CODE (top) == MEM
+	  && GET_CODE (XEXP (top, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (top, 0), 0)) == MULT)
+	*loc = force_reg (Pmode, *loc);
+      else
+	gcc_unreachable ();
+      return -1;
+    }
+
+  if (GET_CODE (*loc) == PLUS
+      && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx))
+    return -1;
+
+  return 0;
+}
+
+/* If possible, rewrite OP so that it refers to small data using
+   explicit relocations.  */
+
+rtx
+arc_rewrite_small_data (rtx op)
+{
+  op = copy_insn (op);
+  for_each_rtx (&op, arc_rewrite_small_data_1, &op);
+  return op;
+}
+
+/* A for_each_rtx callback for small_data_pattern.  */
+
+static int
+small_data_pattern_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
+{
+  if (GET_CODE (*loc) == PLUS
+      && rtx_equal_p (XEXP (*loc, 0), pic_offset_table_rtx))
+    return  -1;
+
+  return arc_rewrite_small_data_p (*loc);
+}
+
+/* Return true if OP refers to small data symbols directly, not through
+   a PLUS.  */
+
+int
+small_data_pattern (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
+{
+  return (GET_CODE (op) != SEQUENCE
+	  && for_each_rtx (&op, small_data_pattern_1, 0));
+}
+
+/* Return true if OP is an acceptable memory operand for ARCompact
+   16-bit gp-relative load instructions. 
+   op shd look like : [r26, symref@sda]
+   i.e. (mem (plus (reg 26) (symref with smalldata flag set))
+  */
+/* volatile cache option still to be handled */
+
+int
+compact_sda_memory_operand (rtx op,enum machine_mode  mode)
+{
+  rtx addr;
+  int size;
+
+  /* Eliminate non-memory operations */
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  size = GET_MODE_SIZE (mode);
+
+  /* dword operations really put out 2 instructions, so eliminate them. */ 
+  if (size > UNITS_PER_WORD)
+    return 0;
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+
+  return LEGITIMATE_SMALL_DATA_ADDRESS_P  (addr);
+}
+
+void
+arc_asm_output_aligned_decl_local (FILE * stream, tree decl, const char * name, 
+				   unsigned HOST_WIDE_INT size,
+				   unsigned HOST_WIDE_INT align,
+				   unsigned HOST_WIDE_INT globalize_p)
+{
+  int in_small_data =   arc_in_small_data_p (decl);
+
+  if (in_small_data)
+    switch_to_section (get_named_section (NULL, ".sbss", 0));
+  /*    named_section (0,".sbss",0); */
   else
-    size = GET_MODE_SIZE (mode);
+    switch_to_section (bss_section);
+
+  if (globalize_p)
+    (*targetm.asm_out.globalize_label) (stream, name);
+
+  ASM_OUTPUT_ALIGN (stream, floor_log2 ((align) / BITS_PER_UNIT));
+  ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "object");
+  ASM_OUTPUT_SIZE_DIRECTIVE (stream, name, size);
+  ASM_OUTPUT_LABEL (stream, name);
+
+  if (size != 0)
+    ASM_OUTPUT_SKIP (stream, size);
+}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
 
-  return size > 8;
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+/* SIMD builtins support */
+enum simd_insn_args_type {
+  Va_Vb_Vc,
+  Va_Vb_rlimm,
+  Va_Vb_Ic,
+  Va_Vb_u6,
+  Va_Vb_u8,
+  Va_rlimm_u8,
+
+  Va_Vb,
+
+  void_rlimm,
+  void_u6,
+
+  Da_u3_rlimm,
+  Da_rlimm_rlimm,
+
+  Va_Ib_u8,
+  void_Va_Ib_u8,
+
+  Va_Vb_Ic_u8,
+  void_Va_u3_Ib_u8
+};
+
+struct builtin_description
+{
+  enum simd_insn_args_type args_type;
+  const enum insn_code     icode;
+  const char * const       name;
+  const enum arc_builtins  code;
+  const enum rtx_code      comparison;
+  const unsigned int       flag;
+};
+
+static const struct builtin_description arc_simd_builtin_desc_list[] =
+{
+  /* VVV builtins go first */
+#define SIMD_BUILTIN(type,code, string, builtin) \
+  { type,CODE_FOR_##code, "__builtin_arc_" string, \
+    ARC_SIMD_BUILTIN_##builtin, 0, 0 },
+
+  SIMD_BUILTIN (Va_Vb_Vc,    vaddaw_insn,   "vaddaw",     VADDAW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vaddw_insn,    "vaddw",      VADDW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vavb_insn,     "vavb",       VAVB)
+  SIMD_BUILTIN (Va_Vb_Vc,     vavrb_insn,    "vavrb",      VAVRB)
+  SIMD_BUILTIN (Va_Vb_Vc,    vdifaw_insn,   "vdifaw",     VDIFAW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vdifw_insn,    "vdifw",      VDIFW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmaxaw_insn,   "vmaxaw",     VMAXAW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmaxw_insn,    "vmaxw",      VMAXW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vminaw_insn,   "vminaw",     VMINAW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vminw_insn,    "vminw",      VMINW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmulaw_insn,   "vmulaw",     VMULAW)
+  SIMD_BUILTIN (Va_Vb_Vc,   vmulfaw_insn,  "vmulfaw",    VMULFAW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmulfw_insn,   "vmulfw",     VMULFW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmulw_insn,    "vmulw",      VMULW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vsubaw_insn,   "vsubaw",     VSUBAW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vsubw_insn,    "vsubw",      VSUBW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vsummw_insn,   "vsummw",     VSUMMW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vand_insn,     "vand",       VAND)
+  SIMD_BUILTIN (Va_Vb_Vc,    vandaw_insn,   "vandaw",     VANDAW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vbic_insn,     "vbic",       VBIC)
+  SIMD_BUILTIN (Va_Vb_Vc,    vbicaw_insn,   "vbicaw",     VBICAW)
+  SIMD_BUILTIN (Va_Vb_Vc,       vor_insn,      "vor",        VOR)
+  SIMD_BUILTIN (Va_Vb_Vc,      vxor_insn,     "vxor",       VXOR)
+  SIMD_BUILTIN (Va_Vb_Vc,    vxoraw_insn,   "vxoraw",     VXORAW)
+  SIMD_BUILTIN (Va_Vb_Vc,      veqw_insn,     "veqw",       VEQW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vlew_insn,     "vlew",       VLEW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vltw_insn,     "vltw",       VLTW)
+  SIMD_BUILTIN (Va_Vb_Vc,      vnew_insn,     "vnew",       VNEW)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr1aw_insn,   "vmr1aw",     VMR1AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr1w_insn,    "vmr1w",      VMR1W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr2aw_insn,   "vmr2aw",     VMR2AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr2w_insn,    "vmr2w",      VMR2W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr3aw_insn,   "vmr3aw",     VMR3AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr3w_insn,    "vmr3w",      VMR3W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr4aw_insn,   "vmr4aw",     VMR4AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr4w_insn,    "vmr4w",      VMR4W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr5aw_insn,   "vmr5aw",     VMR5AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr5w_insn,    "vmr5w",      VMR5W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr6aw_insn,   "vmr6aw",     VMR6AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr6w_insn,    "vmr6w",      VMR6W)
+  SIMD_BUILTIN (Va_Vb_Vc,    vmr7aw_insn,   "vmr7aw",     VMR7AW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vmr7w_insn,    "vmr7w",      VMR7W)
+  SIMD_BUILTIN (Va_Vb_Vc,      vmrb_insn,     "vmrb",       VMRB)
+  SIMD_BUILTIN (Va_Vb_Vc,    vh264f_insn,   "vh264f",     VH264F)
+  SIMD_BUILTIN (Va_Vb_Vc,   vh264ft_insn,  "vh264ft",    VH264FT)
+  SIMD_BUILTIN (Va_Vb_Vc,   vh264fw_insn,  "vh264fw",    VH264FW)
+  SIMD_BUILTIN (Va_Vb_Vc,     vvc1f_insn,    "vvc1f",      VVC1F)
+  SIMD_BUILTIN (Va_Vb_Vc,    vvc1ft_insn,   "vvc1ft",     VVC1FT)
+
+  SIMD_BUILTIN (Va_Vb_rlimm,    vbaddw_insn,   "vbaddw",     VBADDW)
+  SIMD_BUILTIN (Va_Vb_rlimm,    vbmaxw_insn,   "vbmaxw",     VBMAXW)
+  SIMD_BUILTIN (Va_Vb_rlimm,    vbminw_insn,   "vbminw",     VBMINW)
+  SIMD_BUILTIN (Va_Vb_rlimm,   vbmulaw_insn,  "vbmulaw",    VBMULAW)
+  SIMD_BUILTIN (Va_Vb_rlimm,   vbmulfw_insn,  "vbmulfw",    VBMULFW)
+  SIMD_BUILTIN (Va_Vb_rlimm,    vbmulw_insn,   "vbmulw",     VBMULW)
+  SIMD_BUILTIN (Va_Vb_rlimm,   vbrsubw_insn,  "vbrsubw",    VBRSUBW)
+  SIMD_BUILTIN (Va_Vb_rlimm,    vbsubw_insn,   "vbsubw",     VBSUBW)
+
+  /* Va, Vb, Ic instructions */
+  SIMD_BUILTIN (Va_Vb_Ic,        vasrw_insn,    "vasrw",      VASRW) 
+  SIMD_BUILTIN (Va_Vb_Ic,         vsr8_insn,     "vsr8",       VSR8) 
+  SIMD_BUILTIN (Va_Vb_Ic,       vsr8aw_insn,   "vsr8aw",     VSR8AW) 
+
+  /* Va, Vb, u6 instructions */
+  SIMD_BUILTIN (Va_Vb_u6,      vasrrwi_insn,  "vasrrwi",    VASRRWi)
+  SIMD_BUILTIN (Va_Vb_u6,     vasrsrwi_insn, "vasrsrwi",   VASRSRWi)
+  SIMD_BUILTIN (Va_Vb_u6,       vasrwi_insn,   "vasrwi",     VASRWi)
+  SIMD_BUILTIN (Va_Vb_u6,     vasrpwbi_insn, "vasrpwbi",   VASRPWBi)
+  SIMD_BUILTIN (Va_Vb_u6,    vasrrpwbi_insn,"vasrrpwbi",  VASRRPWBi)
+  SIMD_BUILTIN (Va_Vb_u6,      vsr8awi_insn,  "vsr8awi",    VSR8AWi)
+  SIMD_BUILTIN (Va_Vb_u6,        vsr8i_insn,    "vsr8i",      VSR8i)
+
+  /* Va, Vb, u8 (simm) instructions */
+  SIMD_BUILTIN (Va_Vb_u8,        vmvaw_insn,    "vmvaw",      VMVAW)
+  SIMD_BUILTIN (Va_Vb_u8,         vmvw_insn,     "vmvw",       VMVW)
+  SIMD_BUILTIN (Va_Vb_u8,        vmvzw_insn,    "vmvzw",      VMVZW)
+  SIMD_BUILTIN (Va_Vb_u8,      vd6tapf_insn,  "vd6tapf",    VD6TAPF)
+
+  /* Va, rlimm, u8 (simm) instructions */
+  SIMD_BUILTIN (Va_rlimm_u8,    vmovaw_insn,   "vmovaw",     VMOVAW)
+  SIMD_BUILTIN (Va_rlimm_u8,     vmovw_insn,    "vmovw",      VMOVW)
+  SIMD_BUILTIN (Va_rlimm_u8,    vmovzw_insn,   "vmovzw",     VMOVZW)
+
+  /* Va, Vb instructions */
+  SIMD_BUILTIN (Va_Vb,          vabsaw_insn,   "vabsaw",     VABSAW)
+  SIMD_BUILTIN (Va_Vb,           vabsw_insn,    "vabsw",      VABSW)
+  SIMD_BUILTIN (Va_Vb,         vaddsuw_insn,  "vaddsuw",    VADDSUW)
+  SIMD_BUILTIN (Va_Vb,          vsignw_insn,   "vsignw",     VSIGNW)
+  SIMD_BUILTIN (Va_Vb,          vexch1_insn,   "vexch1",     VEXCH1)
+  SIMD_BUILTIN (Va_Vb,          vexch2_insn,   "vexch2",     VEXCH2)
+  SIMD_BUILTIN (Va_Vb,          vexch4_insn,   "vexch4",     VEXCH4)
+  SIMD_BUILTIN (Va_Vb,          vupbaw_insn,   "vupbaw",     VUPBAW)
+  SIMD_BUILTIN (Va_Vb,           vupbw_insn,    "vupbw",      VUPBW)
+  SIMD_BUILTIN (Va_Vb,         vupsbaw_insn,  "vupsbaw",    VUPSBAW)
+  SIMD_BUILTIN (Va_Vb,          vupsbw_insn,   "vupsbw",     VUPSBW)
+  
+  /* DIb, rlimm, rlimm instructions */
+  SIMD_BUILTIN (Da_rlimm_rlimm,  vdirun_insn,  "vdirun",     VDIRUN)
+  SIMD_BUILTIN (Da_rlimm_rlimm,  vdorun_insn,  "vdorun",     VDORUN)
+
+  /* DIb, limm, rlimm instructions */
+  SIMD_BUILTIN (Da_u3_rlimm,   vdiwr_insn,    "vdiwr",      VDIWR)
+  SIMD_BUILTIN (Da_u3_rlimm,    vdowr_insn,    "vdowr",     VDOWR)
+
+  /* rlimm instructions */
+  SIMD_BUILTIN (void_rlimm,        vrec_insn,     "vrec",      VREC)
+  SIMD_BUILTIN (void_rlimm,        vrun_insn,     "vrun",      VRUN)
+  SIMD_BUILTIN (void_rlimm,     vrecrun_insn,  "vrecrun",   VRECRUN)
+  SIMD_BUILTIN (void_rlimm,     vendrec_insn,  "vendrec",   VENDREC)
+
+  /* Va, [Ib,u8] instructions */
+  SIMD_BUILTIN (Va_Vb_Ic_u8,       vld32wh_insn,  "vld32wh",   VLD32WH)
+  SIMD_BUILTIN (Va_Vb_Ic_u8,       vld32wl_insn,  "vld32wl",   VLD32WL)
+  SIMD_BUILTIN (Va_Vb_Ic_u8,         vld64_insn,    "vld64",     VLD64)
+  SIMD_BUILTIN (Va_Vb_Ic_u8,         vld32_insn,    "vld32",     VLD32)
+
+  SIMD_BUILTIN (Va_Ib_u8,           vld64w_insn,   "vld64w",   VLD64W)
+  SIMD_BUILTIN (Va_Ib_u8,           vld128_insn,   "vld128",   VLD128)
+  SIMD_BUILTIN (void_Va_Ib_u8,      vst128_insn,   "vst128",   VST128)
+  SIMD_BUILTIN (void_Va_Ib_u8,       vst64_insn,    "vst64",    VST64)
+
+  /* Va, [Ib, u8] instructions */
+  SIMD_BUILTIN (void_Va_u3_Ib_u8,  vst16_n_insn,  "vst16_n",   VST16_N)
+  SIMD_BUILTIN (void_Va_u3_Ib_u8,  vst32_n_insn,  "vst32_n",   VST32_N)
+
+  SIMD_BUILTIN (void_u6,  vinti_insn,  "vinti",   VINTI)
+};
+
+static void
+arc_init_simd_builtins (void)
+{
+  int i;
+  tree endlink = void_list_node;
+  tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
+
+  tree v8hi_ftype_v8hi_v8hi
+    = build_function_type (V8HI_type_node,
+			   tree_cons (NULL_TREE, V8HI_type_node,
+				      tree_cons (NULL_TREE, V8HI_type_node, endlink)));
+  tree v8hi_ftype_v8hi_int
+    = build_function_type (V8HI_type_node,
+			   tree_cons (NULL_TREE, V8HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node, endlink)));
+
+  tree v8hi_ftype_v8hi_int_int
+    = build_function_type (V8HI_type_node,
+			   tree_cons (NULL_TREE, V8HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node, 
+						 tree_cons (NULL_TREE, integer_type_node, endlink))));
+
+  tree void_ftype_v8hi_int_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, V8HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node, 
+						 tree_cons (NULL_TREE, integer_type_node, endlink))));
+
+  tree void_ftype_v8hi_int_int_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, V8HI_type_node,
+				      tree_cons (NULL_TREE, integer_type_node, 
+						 tree_cons (NULL_TREE, integer_type_node, 
+							    tree_cons (NULL_TREE, integer_type_node, endlink)))));
+
+  tree v8hi_ftype_int_int
+    = build_function_type (V8HI_type_node,
+			   tree_cons (NULL_TREE, integer_type_node,
+				      tree_cons (NULL_TREE, integer_type_node, endlink)));
+
+  tree void_ftype_int_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, integer_type_node,
+				      tree_cons (NULL_TREE, integer_type_node, endlink)));
+
+  tree void_ftype_int
+    = build_function_type (void_type_node,
+			   tree_cons (NULL_TREE, integer_type_node, endlink));
+
+  tree v8hi_ftype_v8hi
+    = build_function_type (V8HI_type_node, tree_cons (NULL_TREE, V8HI_type_node,endlink));
+
+  /* These asserts have been introduced to ensure that the order of builtins
+     does not get messed up, else the initialization goes wrong */
+  gcc_assert (arc_simd_builtin_desc_list [0].args_type == Va_Vb_Vc);
+  for (i=0; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Vc; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  v8hi_ftype_v8hi_v8hi, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_rlimm; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list [i].code);
+ 
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u6; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list [i].code);
+ 
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  v8hi_ftype_v8hi_int, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_rlimm_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  v8hi_ftype_int_int, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  v8hi_ftype_v8hi, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm);
+  for (; arc_simd_builtin_desc_list [i].args_type == Da_rlimm_rlimm; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  void_ftype_int_int, arc_simd_builtin_desc_list [i].code);
+  
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm);
+  for (; arc_simd_builtin_desc_list [i].args_type == Da_u3_rlimm; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  void_ftype_int_int, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_rlimm);
+  for (; arc_simd_builtin_desc_list [i].args_type == void_rlimm; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  void_ftype_int, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Vb_Ic_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  v8hi_ftype_v8hi_int_int, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == Va_Ib_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  v8hi_ftype_int_int, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == void_Va_Ib_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  void_ftype_v8hi_int_int, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8);
+  for (; arc_simd_builtin_desc_list [i].args_type == void_Va_u3_Ib_u8; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  void_ftype_v8hi_int_int_int, arc_simd_builtin_desc_list [i].code);
+  
+  gcc_assert (arc_simd_builtin_desc_list [i].args_type == void_u6);
+  for (; arc_simd_builtin_desc_list [i].args_type == void_u6; i++)
+    def_mbuiltin (TARGET_SIMD_SET, arc_simd_builtin_desc_list [i].name,  void_ftype_int, arc_simd_builtin_desc_list [i].code);
+
+  gcc_assert(i == ARRAY_SIZE (arc_simd_builtin_desc_list));
 }
+
+static rtx
+arc_expand_simd_builtin (tree exp,
+			 rtx target,
+			 rtx subtarget ATTRIBUTE_UNUSED,
+			 enum machine_mode mode ATTRIBUTE_UNUSED,
+			 int ignore ATTRIBUTE_UNUSED)
+{
+  tree              fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  tree              arg0;
+  tree              arg1;
+  tree              arg2;
+  tree              arg3;
+  rtx               op0;
+  rtx               op1;
+  rtx               op2;
+  rtx               op3;
+  rtx               op4;
+  rtx pat;
+  unsigned int         i;
+  int               fcode = DECL_FUNCTION_CODE (fndecl);
+  int               icode;
+  enum machine_mode mode0;
+  enum machine_mode mode1;
+  enum machine_mode mode2;
+  enum machine_mode mode3;
+  enum machine_mode mode4;
+  const struct builtin_description * d;
+
+  for (i = 0, d = arc_simd_builtin_desc_list; i < ARRAY_SIZE (arc_simd_builtin_desc_list); i++, d++)
+    if (d->code == (const enum arc_builtins) fcode)
+      break;
+
+  /* We must get an enty here */
+  gcc_assert (i < ARRAY_SIZE (arc_simd_builtin_desc_list));
+
+  switch (d->args_type) {
+  case Va_Vb_rlimm:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0);
+    arg1 = CALL_EXPR_ARG (exp, 1);
+    op0 = expand_expr (arg0, NULL_RTX, V8HImode, 0);
+    op1 = expand_expr (arg1, NULL_RTX, SImode, 0);
+    
+    target = gen_reg_rtx (V8HImode);
+    mode0 =  insn_data[icode].operand[1].mode;
+    mode1 =  insn_data[icode].operand[2].mode;
+    
+    if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+      op0 = copy_to_mode_reg (mode0, op0);
+
+    if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	op1 = copy_to_mode_reg (mode1, op1);
+     
+    pat = GEN_FCN (icode) (target, op0, op1);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return target;
+
+  case Va_Vb_u6:
+  case Va_Vb_u8:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0);
+    arg1 = CALL_EXPR_ARG (exp, 1);
+    op0 = expand_expr (arg0, NULL_RTX, V8HImode, 0);
+    op1 = expand_expr (arg1, NULL_RTX, SImode, 0);
+    
+    target = gen_reg_rtx (V8HImode);
+    mode0 =  insn_data[icode].operand[1].mode;
+    mode1 =  insn_data[icode].operand[2].mode;
+    
+    if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+      op0 = copy_to_mode_reg (mode0, op0);
+
+    if ((! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+	||  (d->args_type == Va_Vb_u6 && !(UNSIGNED_INT6 (INTVAL (op1))))   
+	||  (d->args_type == Va_Vb_u8 && !(UNSIGNED_INT8 (INTVAL (op1))))   
+	)
+      error ("Operand 2 of %s instruction should be an unsigned %d-bit value.", 
+	     d->name,
+	     (d->args_type == Va_Vb_u6)? 6: 8);
+    
+    pat = GEN_FCN (icode) (target, op0, op1);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return target;
+
+  case Va_rlimm_u8:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0);
+    arg1 = CALL_EXPR_ARG (exp, 1);
+    op0 = expand_expr (arg0, NULL_RTX, SImode, 0);
+    op1 = expand_expr (arg1, NULL_RTX, SImode, 0);
+    
+    target = gen_reg_rtx (V8HImode);
+    mode0 =  insn_data[icode].operand[1].mode;
+    mode1 =  insn_data[icode].operand[2].mode;
+    
+    if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+      op0 = copy_to_mode_reg (mode0, op0);
+
+    if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+	 || !(UNSIGNED_INT8 (INTVAL (op1))))
+      error ("Operand 2 of %s instruction should be an unsigned 8-bit value.", 
+	     d->name);
+    
+    pat = GEN_FCN (icode) (target, op0, op1);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return target;
+
+  case Va_Vb_Ic:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0);
+    arg1 = CALL_EXPR_ARG (exp, 1);
+    op0 = expand_expr (arg0, NULL_RTX, V8HImode, 0);
+    op1 = expand_expr (arg1, NULL_RTX, SImode, 0);
+    op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);
+
+    target = gen_reg_rtx (V8HImode);
+    mode0 =  insn_data[icode].operand[1].mode;
+    mode1 =  insn_data[icode].operand[2].mode;
+    
+    if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+      op0 = copy_to_mode_reg (mode0, op0);
+
+    if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+	 || !(UNSIGNED_INT3 (INTVAL (op1))))
+      error ("Operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7).",
+	     d->name);
+    
+    pat = GEN_FCN (icode) (target, op0, op1, op2);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return target;
+
+  case Va_Vb_Vc:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0);
+    arg1 = CALL_EXPR_ARG (exp, 1);
+    op0 = expand_expr (arg0, NULL_RTX, V8HImode, 0);
+    op1 = expand_expr (arg1, NULL_RTX, V8HImode, 0);
+    
+    target = gen_reg_rtx (V8HImode);
+    mode0 =  insn_data[icode].operand[1].mode;
+    mode1 =  insn_data[icode].operand[2].mode;
+    
+    if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+      op0 = copy_to_mode_reg (mode0, op0);
+    
+    if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+      op1 = copy_to_mode_reg (mode1, op1);
+    
+    pat = GEN_FCN (icode) (target, op0, op1);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return target;
+
+  case Va_Vb:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0);
+    op0 = expand_expr (arg0, NULL_RTX, V8HImode, 0);
+    
+    target = gen_reg_rtx (V8HImode);
+    mode0 =  insn_data[icode].operand[1].mode;
+    
+    if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+      op0 = copy_to_mode_reg (mode0, op0);
+    
+    pat = GEN_FCN (icode) (target, op0);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return target;
+
+  case Da_rlimm_rlimm:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0);
+    arg1 = CALL_EXPR_ARG (exp, 1);
+    op0 = expand_expr (arg0, NULL_RTX, SImode, 0);
+    op1 = expand_expr (arg1, NULL_RTX, SImode, 0);
+
+    
+    if (icode == CODE_FOR_vdirun_insn)
+      target = gen_rtx_REG (SImode, 131);
+    else if (icode == CODE_FOR_vdorun_insn)
+      target = gen_rtx_REG (SImode, 139);
+    else
+	gcc_unreachable ();
+
+    mode0 =  insn_data[icode].operand[1].mode;
+    mode1 =  insn_data[icode].operand[2].mode;
+    
+    if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
+      op0 = copy_to_mode_reg (mode0, op0);
+
+    if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+      op1 = copy_to_mode_reg (mode1, op1);
+
+    
+    pat = GEN_FCN (icode) (target, op0, op1);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return NULL_RTX;
+
+  case Da_u3_rlimm:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0);
+    arg1 = CALL_EXPR_ARG (exp, 1);
+    op0 = expand_expr (arg0, NULL_RTX, SImode, 0);
+    op1 = expand_expr (arg1, NULL_RTX, SImode, 0);
+
+    
+    if (! (GET_CODE (op0) == CONST_INT)
+	|| !(UNSIGNED_INT3 (INTVAL (op0))))
+      error ("Operand 1 of %s instruction should be an unsigned 3-bit value (DR0-DR7).",
+	     d->name);
+      
+    mode1 =  insn_data[icode].operand[1].mode;
+
+    if (icode == CODE_FOR_vdiwr_insn)
+      target = gen_rtx_REG (SImode, ARC_FIRST_SIMD_DMA_CONFIG_IN_REG + INTVAL (op0)); 
+    else if (icode == CODE_FOR_vdowr_insn)
+      target = gen_rtx_REG (SImode, ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG + INTVAL (op0));
+    else
+      gcc_unreachable ();
+    
+    if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
+      op1 = copy_to_mode_reg (mode1, op1);
+
+    pat = GEN_FCN (icode) (target, op1);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return NULL_RTX;
+
+  case void_u6:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0);
+    
+    fold (arg0);
+    
+    op0 = expand_expr (arg0, NULL_RTX, SImode, 0);
+    mode0 = insn_data[icode].operand[0].mode;
+
+    /* op0 should be u6 */
+    if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)
+	|| !(UNSIGNED_INT6 (INTVAL (op0))))
+      error ("Operand of %s instruction should be an unsigned 6-bit value.",
+	     d->name);
+    
+    pat = GEN_FCN (icode) (op0);
+    if (! pat)
+      return 0;
+    
+    emit_insn (pat);
+    return NULL_RTX;
+    
+  case void_rlimm:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0);
+    
+    fold (arg0);
+    
+    op0 = expand_expr (arg0, NULL_RTX, SImode, 0);
+    mode0 = insn_data[icode].operand[0].mode;
+    
+    if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
+      op0 = copy_to_mode_reg (mode0, op0);
+    
+    pat = GEN_FCN (icode) (op0);
+    if (! pat)
+      return 0;
+    
+    emit_insn (pat);
+    return NULL_RTX;
+    
+  case Va_Vb_Ic_u8:
+    {
+      rtx src_vreg;
+      icode = d->icode;
+      arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */
+      arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
+      arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */
+
+      src_vreg = expand_expr (arg0, NULL_RTX, V8HImode, 0);
+      op0 = expand_expr (arg1, NULL_RTX, SImode, 0);    /* [I]0-7 */
+      op1 = expand_expr (arg2, NULL_RTX, SImode, 0);    /* u8 */
+      op2 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);	                /* VR0 */
+    
+      /* target <- src vreg */
+      emit_insn (gen_move_insn (target, src_vreg));
+
+      /* target <- vec_concat: target, mem(Ib, u8) */
+      mode0 =  insn_data[icode].operand[3].mode;
+      mode1 =  insn_data[icode].operand[1].mode;
+    
+      if ( (!(*insn_data[icode].operand[3].predicate) (op0, mode0))
+	   || !(UNSIGNED_INT3 (INTVAL (op0))))
+	error ("Operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7).",
+	       d->name);
+
+      if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1))
+	   || !(UNSIGNED_INT8 (INTVAL (op1))))
+	error ("Operand 2 of %s instruction should be an unsigned 8-bit value.",
+	       d->name);
+    
+      pat = GEN_FCN (icode) (target, op1, op2, op0);
+      if (! pat)
+	return 0;
+  
+      emit_insn (pat);
+      return target;
+    }
+
+  case void_Va_Ib_u8:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0); /* src vreg */
+    arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
+    arg2 = CALL_EXPR_ARG (exp, 2); /* u8 */
+
+    op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);  /* VR0    */
+    op1 = expand_expr (arg1, NULL_RTX, SImode, 0);        /* I[0-7] */
+    op2 = expand_expr (arg2, NULL_RTX, SImode, 0);        /* u8     */
+    op3 = expand_expr (arg0, NULL_RTX, V8HImode, 0);      /* Vdest  */
+    
+    mode0 =  insn_data[icode].operand[0].mode;
+    mode1 =  insn_data[icode].operand[1].mode;
+    mode2 =  insn_data[icode].operand[2].mode;
+    mode3 =  insn_data[icode].operand[3].mode;
+    
+    if ( (!(*insn_data[icode].operand[1].predicate) (op1, mode1))
+	 || !(UNSIGNED_INT3 (INTVAL (op1))))
+      error ("Operand 2 of %s instruction should be an unsigned 3-bit value (I0-I7).",
+	     d->name);
+
+    if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2))
+	 || !(UNSIGNED_INT8 (INTVAL (op2))))
+      error ("Operand 3 of %s instruction should be an unsigned 8-bit value.",
+	     d->name);
+    
+    if (!(*insn_data[icode].operand[3].predicate) (op3, mode3))
+      op3 = copy_to_mode_reg (mode3, op3);
+      
+    pat = GEN_FCN (icode) (op0, op1, op2, op3);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return NULL_RTX;
+
+  case Va_Ib_u8:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0); /* dest vreg */
+    arg1 = CALL_EXPR_ARG (exp, 1); /* [I]0-7 */
+
+    op0 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);  /* VR0    */
+    op1 = expand_expr (arg0, NULL_RTX, SImode, 0);        /* I[0-7] */
+    op2 = expand_expr (arg1, NULL_RTX, SImode, 0);        /* u8     */
+    
+    /* target <- src vreg */
+    target = gen_reg_rtx (V8HImode);
+
+    /* target <- vec_concat: target, mem(Ib, u8) */
+    mode0 =  insn_data[icode].operand[1].mode;
+    mode1 =  insn_data[icode].operand[2].mode;
+    mode2 =  insn_data[icode].operand[3].mode;
+    
+    if ( (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
+	 || !(UNSIGNED_INT3 (INTVAL (op1))))
+      error ("Operand 1 of %s instruction should be an unsigned 3-bit value (I0-I7).",
+	     d->name);
+
+    if ( (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
+	 || !(UNSIGNED_INT8 (INTVAL (op2))))
+      error ("Operand 2 of %s instruction should be an unsigned 8-bit value.",
+	     d->name);
+    
+    pat = GEN_FCN (icode) (target, op0, op1, op2);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return target;
+
+  case void_Va_u3_Ib_u8:
+    icode = d->icode;
+    arg0 = CALL_EXPR_ARG (exp, 0); /* source vreg */
+    arg1 = CALL_EXPR_ARG (exp, 1); /* u3 */
+    arg2 = CALL_EXPR_ARG (exp, 2); /* [I]0-7 */
+    arg3 = CALL_EXPR_ARG (exp, 3); /* u8 */
+
+    op0 = expand_expr (arg3, NULL_RTX, SImode, 0);         /* u8               */
+    op1 = gen_rtx_REG (V8HImode, ARC_FIRST_SIMD_VR_REG);  /* VR                */
+    op2 = expand_expr (arg2, NULL_RTX, SImode, 0);        /* [I]0-7            */
+    op3 = expand_expr (arg0, NULL_RTX, V8HImode, 0);      /* vreg to be stored */
+    op4 = expand_expr (arg1, NULL_RTX, SImode, 0);        /* vreg 0-7 subreg no. */
+
+    mode0 =  insn_data[icode].operand[0].mode;
+    mode2 =  insn_data[icode].operand[2].mode;
+    mode3 =  insn_data[icode].operand[3].mode;
+    mode4 =  insn_data[icode].operand[4].mode;
+    
+    /* correctness checks for the operands */
+    if ( (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
+	 || !(UNSIGNED_INT8 (INTVAL (op0))))
+      error ("Operand 4 of %s instruction should be an unsigned 8-bit value (0-255).",
+	     d->name);
+
+    if ( (!(*insn_data[icode].operand[2].predicate) (op2, mode2))
+	 || !(UNSIGNED_INT3 (INTVAL (op2))))
+      error ("Operand 3 of %s instruction should be an unsigned 3-bit value (I0-I7).",
+	     d->name);
+
+    if (!(*insn_data[icode].operand[3].predicate) (op3, mode3))
+      op3 = copy_to_mode_reg (mode3, op3);
+      
+    if ( (!(*insn_data[icode].operand[4].predicate) (op4, mode4))
+	 || !(UNSIGNED_INT3 (INTVAL (op4))))
+      error ("Operand 2 of %s instruction should be an unsigned 3-bit value (subreg 0-7).",
+	     d->name);
+    else if (icode == CODE_FOR_vst32_n_insn 
+	     && ((INTVAL(op4) % 2 ) != 0))
+      error ("Operand 2 of %s instruction should be an even 3-bit value (subreg 0,2,4,6).",
+	     d->name);
+
+    pat = GEN_FCN (icode) (op0, op1, op2, op3, op4);
+    if (! pat)
+      return 0;
+  
+    emit_insn (pat);
+    return NULL_RTX;
+
+  default:
+    gcc_unreachable ();
+  }
+  return NULL_RTX;
+}
+
+enum reg_class
+arc_secondary_reload (bool in_p, rtx x, enum reg_class class,
+                     enum machine_mode mode ATTRIBUTE_UNUSED,
+		     secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+  /* We can't load/store the D-registers directly */
+  if (class == DOUBLE_REGS && (GET_CODE (x) == MEM))
+    return GENERAL_REGS;
+  /* The loop counter register can be stored, but not loaded directly.  */
+  if ((class == LPCOUNT_REG || class == WRITABLE_CORE_REGS)
+      && in_p && GET_CODE (x) == MEM)
+    return GENERAL_REGS;
+  return NO_REGS;
+}
+
+static bool
+arc_preserve_reload_p (rtx in)
+{
+  return (GET_CODE (in) == PLUS
+	  && RTX_OK_FOR_BASE_P (XEXP (in, 0))
+	  && CONST_INT_P (XEXP (in, 1))
+	  && !((INTVAL (XEXP (in, 1)) & 511)));
+}
+
+int
+arc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
+			enum reg_class from_class,
+			enum reg_class to_class)
+{
+  /* The ARC600 has no bypass for extension registers, hence a nop might be
+     needed to be inserted after a write so that reads are safe.  */
+  if (TARGET_ARC600
+      && (to_class == LPCOUNT_REG || to_class == WRITABLE_CORE_REGS))
+    return 3;
+  /* The ARC700 stalls for 3 cycles when *reading* from lp_count.  */
+  if (TARGET_ARC700
+      && (from_class == LPCOUNT_REG || from_class == ALL_CORE_REGS
+	  || from_class == WRITABLE_CORE_REGS))
+    return 8;
+  return 2;
+
+}
+
+/* Emit code and return a template suitable for outputting an addsi
+   instruction with OPERANDS and the conditional execution specifier
+   COND.  If COND is zero, don't output anything, just return an
+   empty string for instructions with 32 bit opcode, and a non-empty one
+   for insns with a 16 bit opcode.  */
+const char*
+arc_output_addsi (rtx *operands, const char *cond)
+{
+  char format[32];
+
+  int cond_p = cond ? *cond : 0;
+  int match = operands_match_p (operands[0], operands[1]);
+  int match2 = operands_match_p (operands[0], operands[2]);
+  int intval = (REG_P (operands[2]) ? 1
+		: CONST_INT_P (operands[2]) ? INTVAL (operands[2]) : 0xbadc057);
+  int neg_intval = -intval;
+  int shift = 0;
+  int short_p = 0;
+
+  /* First try to emit a 16 bit insn.  */
+  if (1)
+    {
+      int short_0 = satisfies_constraint_Rcq (operands[0]);
+
+      short_p = (!cond_p && short_0 && satisfies_constraint_Rcq (operands[1]));
+      if (short_p
+	  && (REG_P (operands[2])
+	      ? (match || satisfies_constraint_Rcq (operands[2]))
+	      : (unsigned) intval <= (match ? 127 : 7)))
+	return "add%? %0,%1,%2%&";
+      if (!cond_p && short_0 && satisfies_constraint_Rcq (operands[2])
+	  && REG_P (operands[1]) && match2)
+	return "add%? %0,%2,%1%&";
+      if (!cond_p && (short_0 || REGNO (operands[0]) == STACK_POINTER_REGNUM)
+	  && REGNO (operands[1]) == STACK_POINTER_REGNUM && !(intval & ~124))
+	return "add%? %0,%1,%2%&";
+
+      if ((short_p && (unsigned) neg_intval <= (match ? 31 : 7))
+	  || (!cond_p && REGNO (operands[0]) == STACK_POINTER_REGNUM
+	      && match && !(neg_intval & ~124)))
+	return "sub%? %0,%1,%n2%&";
+    }
+
+#define ADDSI_OUTPUT(LIST) do {\
+  if (cond) \
+    sprintf LIST, output_asm_insn (format, operands);\
+  return ""; \
+} while (0)
+#define ADDSI_OUTPUT1(FORMAT) ADDSI_OUTPUT ((format, FORMAT, cond))
+  
+  /* Now try to emit a 32 bit insn without long immediate.  */
+  if (!match && match2 && REG_P (operands[1]))
+    ADDSI_OUTPUT1 ("add%s %%0,%%2,%%1");
+  if (match || !cond_p)
+    {
+      int limit = (match && !cond_p) ? 0x7ff : 0x3f;
+      int range_factor = neg_intval & intval;
+
+      if (intval == -1 << 31)
+	ADDSI_OUTPUT1 ("bxor%s %%0,%%1,31");
+
+      /* If we can use a straight add / sub instead of a {add,sub}[123] of
+	 same size, do, so - the insn latency is lower.  */
+      /* -0x800 is a 12-bit constant for add /add3 / sub / sub3, but
+	 0x800 is not.  */
+      if ((intval >= 0 && intval <= limit)
+	       || (intval == -0x800 && limit == 0x7ff))
+	ADDSI_OUTPUT1 ("add%s %%0,%%1,%%2");
+      else if ((intval < 0 && neg_intval <= limit)
+	       || (intval == 0x800 && limit == 0x7ff))
+	ADDSI_OUTPUT1 ("sub%s %%0,%%1,%%n2");
+      shift = range_factor >= 8 ? 3 : (range_factor >> 1 & 3);
+      if (((intval < 0 && intval != -0x4000)
+	   /* sub[123] is slower than add_s / sub, only use it if it
+	      avoids a long immediate.  */
+	   && neg_intval <= limit << shift)
+	  || (intval == 0x4000 && limit == 0x7ff))
+	ADDSI_OUTPUT ((format, "sub%d%s %%0,%%1,%d",
+		       shift, cond, neg_intval >> shift));
+      else if ((intval >= 0 && intval <= limit << shift)
+	       || (intval == -0x4000 && limit == 0x7ff))
+	ADDSI_OUTPUT ((format, "add%d%s %%0,%%1,%d", shift, cond,
+		       intval >> shift));
+    }
+  /* Try to emit a 16 bit opcode with long immediate.  */
+  if (short_p && match)
+    return "add%? %0,%1,%S2%&";
+
+  /* We have to use a 32 bit opcode, possibly with a long immediate.
+     (We also get here for add a,b,u6)  */
+  ADDSI_OUTPUT ((format,
+		 intval < 0 ? "sub%s %%0,%%1,%%n2" : "add%s %%0,%%1,%%S2",
+		 cond));
+}
+
+static rtx
+force_offsettable (rtx addr, HOST_WIDE_INT size, int reuse)
+{
+  rtx base = addr;
+  rtx offs = const0_rtx;
+
+  if (GET_CODE (base) == PLUS)
+    {
+      offs = XEXP (base, 1);
+      base = XEXP (base, 0);
+    }
+  if (!REG_P (base)
+      || (REGNO (base) != STACK_POINTER_REGNUM
+	  && REGNO_PTR_FRAME_P (REGNO (addr)))
+      || !CONST_INT_P (offs) || !SMALL_INT (INTVAL (offs))
+      || !SMALL_INT (INTVAL (offs) + size))
+    {
+      if (reuse)
+	emit_insn (gen_add2_insn (addr, offs));
+      else
+	addr = copy_to_mode_reg (Pmode, addr);
+    }
+  return addr;
+}
+
+/* Like move_by_pieces, but take account of load latency,
+   and actual offset ranges.
+   Return nonzero on success.  */
+int
+arc_expand_movmem (rtx *operands)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+  rtx dst_addr, src_addr;
+  HOST_WIDE_INT size;
+  int align = INTVAL (operands[3]);
+  unsigned n_pieces;
+  int piece = align;
+  rtx store[2];
+  rtx tmpx[2];
+  int i;
+
+  if (!CONST_INT_P (operands[2]))
+    return 0;
+  size = INTVAL (operands[2]);
+  /* move_by_pieces_ninsns is static, so we can't use it.  */
+  if (align >= 4)
+    n_pieces = (size + 2) / 4U + (size & 1);
+  else if (align == 2)
+    n_pieces = (size + 1) / 2U;
+  else
+    n_pieces = size;
+  if (n_pieces >= (unsigned int) (optimize_size ? 3 : 15))
+    return 0;
+  if (piece > 4)
+    piece = 4;
+  dst_addr = force_offsettable (XEXP (operands[0], 0), size, 0);
+  src_addr = force_offsettable (XEXP (operands[1], 0), size, 0);
+  store[0] = store[1] = NULL_RTX;
+  tmpx[0] = tmpx[1] = NULL_RTX;
+  for (i = 0; size > 0; i ^= 1, size -= piece)
+    {
+      rtx tmp;
+      enum machine_mode mode;
+
+      if (piece > size)
+	piece = size & -size;
+      mode = smallest_mode_for_size (piece * BITS_PER_UNIT, MODE_INT);
+      /* If we don't re-use temporaries, the scheduler gets carried away,
+	 and the register pressure gets unnecessarily high.  */
+      if (0 && tmpx[i] && GET_MODE (tmpx[i]) == mode)
+	tmp = tmpx[i];
+      else
+	tmpx[i] = tmp = gen_reg_rtx (mode);
+      dst_addr = force_offsettable (dst_addr, piece, 1);
+      src_addr = force_offsettable (src_addr, piece, 1);
+      if (store[i])
+	emit_insn (store[i]);
+      emit_move_insn (tmp, change_address (src, mode, src_addr));
+      store[i] = gen_move_insn (change_address (dst, mode, dst_addr), tmp);
+      dst_addr = plus_constant (dst_addr, piece);
+      src_addr = plus_constant (src_addr, piece);
+    }
+  if (store[i])
+    emit_insn (store[i]);
+  if (store[i^1])
+    emit_insn (store[i^1]);
+  return 1;
+}
+
+/* Prepare operands for move in MODE.  Return nonzero iff the move has
+   been emitted.  */
+int
+prepare_move_operands (rtx *operands, enum machine_mode mode)
+{
+  /* We used to do this only for MODE_INT Modes, but addresses to floating
+     point variables may well be in the small data section.  */
+  if (1)
+    {
+      if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[0], Pmode))
+	operands[0] = arc_rewrite_small_data (operands[0]);
+      else if (mode == SImode && flag_pic && SYMBOLIC_CONST (operands[1]))
+	{
+	  emit_pic_move (operands, SImode);
+
+	  /* Disable any REG_EQUALs associated with the symref
+	     otherwise the optimization pass undoes the work done
+	     here and references the variable directly.  */
+	}
+      else if (GET_CODE (operands[0]) != MEM
+	       && !TARGET_NO_SDATA_SET
+	       && small_data_pattern (operands[1], Pmode))
+       {
+	  /* This is to take care of address calculations involving sdata
+	     variables.  */
+	  operands[1] = arc_rewrite_small_data (operands[1]);
+
+	  emit_insn (gen_rtx_SET (mode, operands[0],operands[1]));
+	  /* ??? This note is useless, since it only restates the set itself.
+	     We should rather use the original SYMBOL_REF.  However, there is
+	     the problem that we are lying to the compiler about these
+	     SYMBOL_REFs to start with.  symbol@sda should be encoded specially
+	     so that we can tell it apart from an actual symbol.  */
+	  set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
+
+	  /* Take care of the REG_EQUAL note that will be attached to mark the
+	     output reg equal to the initial symbol_ref after this code is
+	     executed. */
+	  emit_move_insn (operands[0], operands[0]);
+	  return 1;
+	}
+    }
+
+  if (MEM_P (operands[0])
+      && !(reload_in_progress || reload_completed))
+    {
+      operands[1] = force_reg (mode, operands[1]);
+      if (!move_dest_operand (operands[0], mode))
+	{
+	  rtx addr = copy_to_mode_reg (Pmode, XEXP (operands[0], 0));
+	  /* This is like change_address_1 (operands[0], mode, 0, 1) ,
+	     except that we can't use that function because it is static.  */
+	  rtx new = change_address (operands[0], mode, addr);
+	  MEM_COPY_ATTRIBUTES (new, operands[0]);
+	  operands[0] = new;
+	}
+      if (!cse_not_expected)
+	{
+	  rtx new = XEXP (operands[0], 0);
+
+	  new = arc_legitimize_address (new, new, mode);
+	  if (new)
+	    {
+	      new = change_address (operands[0], mode, new);
+	      MEM_COPY_ATTRIBUTES (new, operands[0]);
+	      operands[0] = new;
+	    }
+	}
+    }
+  if (MEM_P (operands[1]) && !cse_not_expected)
+    {
+      rtx new = XEXP (operands[1], 0);
+
+      new = arc_legitimize_address (new, new, mode);
+      if (new)
+	{
+	  new = change_address (operands[1], mode, new);
+	  MEM_COPY_ATTRIBUTES (new, operands[1]);
+	  operands[1] = new;
+	}
+    }
+  return 0;
+}
+
+/* Prepare OPERANDS for an extension using CODE to OMODE.
+   Return nonzero iff the move has been emitted.  */
+int
+prepare_extend_operands (rtx *operands, enum rtx_code code,
+			 enum machine_mode omode)
+{
+  if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode))
+    {
+      /* This is to take care of address calculations involving sdata
+	 variables.  */
+      operands[1]
+	= gen_rtx_fmt_e (code, omode, arc_rewrite_small_data (operands[1]));
+      emit_insn (gen_rtx_SET (omode, operands[0], operands[1]));
+      set_unique_reg_note (get_last_insn (), REG_EQUAL, operands[1]);
+
+      /* Take care of the REG_EQUAL note that will be attached to mark the
+	 output reg equal to the initial extension after this code is
+	 executed. */
+      emit_move_insn (operands[0], operands[0]);
+      return 1;
+    }
+  return 0;
+}
+
+/* Output a library call to a function called FNAME that has been arranged
+   to be local to be any dso.  */
+const char *
+arc_output_libcall (const char *fname)
+{
+  unsigned len = strlen (fname);
+  static char buf[64];
+
+  gcc_assert (len < sizeof buf - 35);
+  if (TARGET_LONG_CALLS_SET
+     || (TARGET_MEDIUM_CALLS && arc_ccfsm_cond_exec_p ()))
+    {
+      if (flag_pic)
+	sprintf (buf, "add r12,pcl,@%s-(.&2)\n\tjl%%!%%* [r12]", fname);
+      else
+	sprintf (buf, "jl%%! @%s", fname);
+    }
+  else
+    sprintf (buf, "bl%%!%%* @%s", fname);
+  return buf;
+}
+
+rtx
+disi_highpart (rtx in)
+{
+  return simplify_gen_subreg (SImode, in, DImode, TARGET_BIG_ENDIAN ? 0 : 4);
+}
+
+/* Called by arc600_corereg_hazard via for_each_rtx.
+   If a hazard is found, return a conservative estimate of the required
+   length adjustment to accomodate a nop.  */
+static int
+arc600_corereg_hazard_1 (rtx *xp, void *data)
+{
+  rtx x = *xp;
+  rtx dest;
+  rtx pat = (rtx) data;
+
+  switch (GET_CODE (x))
+    {
+    case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
+      break;
+    default:
+    /* This is also fine for PRE/POST_MODIFY, because they contain a SET.  */
+      return 0;
+    }
+  dest = XEXP (x, 0);
+  /* Check if this sets a an extension register.  N.B. we use 61 for the
+     condition codes, which is definitely not an extension register.  */
+  if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61
+      /* Check if the same register is used by the PAT.  */
+      && (refers_to_regno_p
+	   (REGNO (dest),
+	   REGNO (dest) + (GET_MODE_SIZE (GET_MODE (dest)) + 3) / 4U, pat, 0)))
+    return 4;
+
+  return 0;
+}
+
+/* return length adjustment for INSN.
+   For ARC600:
+   A write to a core reg greater or equal to 32 must not be immediately
+   followed by a use.  Anticipate the length requirement to insert a nop
+   between PRED and SUCC to prevent a hazard.  */
+static int
+arc600_corereg_hazard (rtx pred, rtx succ)
+{
+  if (!TARGET_ARC600)
+    return 0;
+  /* If SUCC is a doloop_end_i with a preceding label, we must output a nop
+     in front of SUCC anyway, so there will be separation between PRED and
+     SUCC.  */
+  if (recog_memoized (succ) == CODE_FOR_doloop_end_i
+      && LABEL_P (prev_nonnote_insn (succ)))
+    return 0;
+  if (recog_memoized (succ) == CODE_FOR_doloop_begin_i)
+    return 0;
+  if (GET_CODE (PATTERN (pred)) == SEQUENCE)
+    pred = XVECEXP (PATTERN (pred), 0, 1);
+  if (GET_CODE (PATTERN (succ)) == SEQUENCE)
+    succ = XVECEXP (PATTERN (succ), 0, 0);
+  if (recog_memoized (pred) == CODE_FOR_mulsi_600
+      || recog_memoized (pred) == CODE_FOR_umul_600
+      || recog_memoized (pred) == CODE_FOR_mac_600
+      || recog_memoized (pred) == CODE_FOR_mul64_600
+      || recog_memoized (pred) == CODE_FOR_mac64_600
+      || recog_memoized (pred) == CODE_FOR_umul64_600
+      || recog_memoized (pred) == CODE_FOR_umac64_600)
+    return 0;
+  return for_each_rtx (&PATTERN (pred), arc600_corereg_hazard_1,
+		       PATTERN (succ));
+}
+
+/* For ARC600:
+   A write to a core reg greater or equal to 32 must not be immediately
+   followed by a use.  Anticipate the length requirement to insert a nop
+   between PRED and SUCC to prevent a hazard.  */
+int
+arc_hazard (rtx pred, rtx succ)
+{
+  if (!TARGET_ARC600)
+    return 0;
+  if (!pred || !INSN_P (pred) || !succ || !INSN_P (succ))
+    return 0;
+  if (recog_memoized (succ) == CODE_FOR_doloop_end_i
+      && (JUMP_P (pred) || GET_CODE (PATTERN (pred)) == SEQUENCE))
+    return 4;
+  return arc600_corereg_hazard (pred, succ);
+}
+
+/* Return length adjustment for INSN.  */
+int
+arc_adjust_insn_length (rtx insn, int len)
+{
+  int adj = 0;
+
+  if (!INSN_P (insn))
+    return 0;
+  if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+    {
+      int adj0, adj1, len1;
+      rtx pat = PATTERN (insn);
+      rtx i0 = XVECEXP (pat, 0, 0);
+      rtx i1 = XVECEXP (pat, 0, 1);
+
+      len1 = get_attr_lock_length (i1);
+      gcc_assert (!len || len >= 4 || (len == 2 && get_attr_iscompact (i1)));
+      if (!len1)
+	len1 = get_attr_iscompact (i1) != ISCOMPACT_FALSE ? 2 : 4;
+      adj0 = arc_adjust_insn_length (i0, len - len1);
+      adj1 = arc_adjust_insn_length (i1, len1);
+      return adj0 + adj1;
+    }
+  if (recog_memoized (insn) == CODE_FOR_doloop_end_i)
+    {
+      rtx prev = prev_nonnote_insn (insn);
+
+      return ((LABEL_P (prev)
+	       || (TARGET_ARC600
+		   && (JUMP_P (prev) || GET_CODE (PATTERN (prev)) == SEQUENCE)))
+	      ? 4 : 0);
+    }
+
+  /* Check for return with but one preceding insn since function
+     start / call.  */
+  if (TARGET_PAD_RETURN
+      && JUMP_P (insn)
+      && GET_CODE (PATTERN (insn)) != ADDR_VEC
+      && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
+      && get_attr_type (insn) == TYPE_RETURN)
+    {
+      rtx prev = prev_active_insn (insn);
+
+      if (!prev || !(prev = prev_active_insn (prev))
+	  || ((NONJUMP_INSN_P (prev)
+	       && GET_CODE (PATTERN (prev)) == SEQUENCE)
+	      ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL)
+	      : CALL_ATTR (prev, NON_SIBCALL)))
+	return 4;
+    }
+  /* Rtl changes too much before arc_reorg to keep ccfsm state.
+     But we are not required to give exact answers then.  */
+  if (cfun->machine->arc_reorg_started
+      && (JUMP_P (insn) || (len & 2)))
+    {
+      struct arc_ccfsm *statep = &cfun->machine->ccfsm_current;
+
+      arc_ccfsm_advance_to (insn);
+      switch (statep->state)
+	{
+	case 0:
+	  break;
+	case 1: case 2:
+	  /* Deleted branch.  */
+	  return -len;
+	case 3: case 4: case 5:
+	  /* Conditionalized insn.  */
+	  if ((!JUMP_P (insn)
+	       || (get_attr_type (insn) != TYPE_BRANCH
+		   && get_attr_type (insn) != TYPE_UNCOND_BRANCH
+		   && (get_attr_type (insn) != TYPE_RETURN
+		       || (statep->cc != ARC_CC_EQ && statep->cc != ARC_CC_NE)
+		       || NEXT_INSN (PREV_INSN (insn)) != insn)))
+	      && (len & 2))
+	    adj = 2;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+  /* Restore extracted operands - otherwise splitters like the addsi3_mixed one
+     can go awry.  */
+  extract_constrain_insn_cached (insn);
+
+  if (TARGET_ARC600)
+    {
+      rtx succ = next_real_insn (insn);
+
+      if (!succ || !INSN_P (succ))
+	return adj;
+      return adj + arc600_corereg_hazard (insn, succ);
+    }
+  return adj;
+}
+/* For ARC600: If a write to a core reg >=32 appears in a delay slot
+  (other than of a forward brcc), it creates a hazard when there is a read
+  of the same register at the branch target.  We can't know what is at the
+  branch target of calls, and for branches, we don't really know before the
+  end of delay slot scheduling, either.  Not only can individual instruction
+  be hoisted out into a delay slot, a basic block can also be emptied this
+  way, and branch and/or fall through targets be redirected.  Hence we don't
+  want such writes in a delay slot.  */
+/* Called by arc_write_ext_corereg via for_each_rtx.  */
+static int
+write_ext_corereg_1 (rtx *xp, void *data ATTRIBUTE_UNUSED)
+{
+  rtx x = *xp;
+  rtx dest;
+
+  switch (GET_CODE (x))
+    {
+    case SET: case POST_INC: case POST_DEC: case PRE_INC: case PRE_DEC:
+      break;
+    default:
+    /* This is also fine for PRE/POST_MODIFY, because they contain a SET.  */
+      return 0;
+    }
+  dest = XEXP (x, 0);
+  if (REG_P (dest) && REGNO (dest) >= 32 && REGNO (dest) < 61)
+    return 1;
+  return 0;
+}
+
+/* Return nonzreo iff INSN writes to an extension core register.  */
+int
+arc_write_ext_corereg (rtx insn)
+{
+  return for_each_rtx (&PATTERN (insn), write_ext_corereg_1, 0);
+}
+
+rtx
+arc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
+			int /* enum machine_mode */ mode)
+{
+  rtx addr, inner;
+
+  if (flag_pic && SYMBOLIC_CONST (x))
+     (x) =  arc_legitimize_pic_address (x, 0);
+  addr = x;
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+  if (GET_CODE (addr) == PLUS
+      && CONST_INT_P (XEXP (addr, 1))
+      && ((GET_CODE (XEXP (addr, 0)) == SYMBOL_REF
+	   && !SYMBOL_REF_FUNCTION_P (XEXP (addr, 0)))
+	  || (REG_P (XEXP (addr, 0))
+	      && (INTVAL (XEXP (addr, 1)) & 252))))
+    {
+      HOST_WIDE_INT offs, upper;
+      int size = GET_MODE_SIZE (mode);
+
+      offs = INTVAL (XEXP (addr, 1));
+      upper = (offs + 256 * size) & ~511 * size;
+      inner = plus_constant (XEXP (addr, 0), upper);
+#if 0 /* ??? this produces worse code for EEMBC idctrn01  */
+      if (GET_CODE (x) == CONST)
+	inner = gen_rtx_CONST (Pmode, inner);
+#endif
+      addr = plus_constant (force_reg (Pmode, inner), offs - upper);
+      x = addr;
+    }
+  else if (GET_CODE (addr) == SYMBOL_REF && !SYMBOL_REF_FUNCTION_P (addr))
+    x = force_reg (Pmode, x);
+  if (memory_address_p (mode, x))
+     return x;
+  return NULL_RTX;
+}
+
+static rtx
+arc_delegitimize_address (rtx x)
+{
+  if (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == CONST
+      && GET_CODE (XEXP (XEXP (x, 0), 0)) == UNSPEC
+      && XINT (XEXP (XEXP (x, 0), 0), 1) == ARC_UNSPEC_GOT)
+    return XVECEXP (XEXP (XEXP (x, 0), 0), 0, 0);
+  return x;
+}
+
+rtx
+gen_acc1 (void)
+{
+  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57);
+}
+
+rtx
+gen_acc2 (void)
+{
+  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56);
+}
+
+rtx
+gen_mlo (void)
+{
+  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 59: 58);
+}
+
+rtx
+gen_mhi (void)
+{
+  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 58: 59);
+}
+
+/* Return nonzero iff BRANCH should be unaligned if possible by upsizing
+   a previous instruction.  */
+int
+arc_unalign_branch_p (rtx branch)
+{
+  rtx note;
+
+  if (!TARGET_UNALIGN_BRANCH)
+    return 0;
+  /* Do not do this if we have a filled delay slot.  */
+  if (get_attr_delay_slot_filled (branch) == DELAY_SLOT_FILLED_YES
+      && !INSN_DELETED_P (NEXT_INSN (branch)))
+    return 0;
+  note = find_reg_note (branch, REG_BR_PROB, 0);
+  return (!note
+	  || (arc_unalign_prob_threshold && !br_prob_note_reliable_p (note))
+	  || INTVAL (XEXP (note, 0)) < arc_unalign_prob_threshold);
+}
+
+/* When estimating sizes during arc_reorg, when optimizing for speed, there
+   are three reasons why we need to consider branches to be length 6:
+   - annull-false delay slot insns are implemented using conditional execution,
+     thus preventing short insn formation where used.
+   - for ARC600: annul-true delay slot insns are implemented where possible
+     using conditional execution, preventing short insn formation where used.
+   - for ARC700: likely or somewhat likely taken branches are made long and
+     unaligned if possible to avoid branch penalty.  */
+int
+arc_branch_size_unknown_p (void)
+{
+  return !optimize_size && arc_reorg_in_progress;
+}
+
+/* We are about to output a return insn.  Add padding if necessary to avoid
+   a mispredict.  A return could happen immediately after the function
+   start, but after a call we know that there will be at least a blink
+   restore.  */
+void
+arc_pad_return (void)
+{
+  rtx insn = current_output_insn;
+  rtx prev = prev_active_insn (insn);
+  int want_long;
+
+  if (!prev)
+    {
+      fputs ("\tnop_s\n", asm_out_file);
+      cfun->machine->unalign ^= 2;
+      want_long = 1;
+    }
+  /* If PREV is a sequence, we know it must be a branch / jump or a tailcall,
+     because after a call, we'd have to restore blink first.  */
+  else if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+    return;
+  else
+    {
+      want_long = (get_attr_length (prev) == 2);
+      prev = prev_active_insn (prev);
+    }
+  if (!prev
+      || ((NONJUMP_INSN_P (prev) && GET_CODE (PATTERN (prev)) == SEQUENCE)
+	  ? CALL_ATTR (XVECEXP (PATTERN (prev), 0, 0), NON_SIBCALL)
+	  : CALL_ATTR (prev, NON_SIBCALL)))
+    {
+      if (want_long)
+	cfun->machine->size_reason
+	  = "call/return and return/return must be 6 bytes apart to avoid mispredict";
+      else if (TARGET_UNALIGN_BRANCH && cfun->machine->unalign)
+	{
+	  cfun->machine->size_reason
+	    = "Long unaligned jump avoids non-delay slot penalty";
+	  want_long = 1;
+	}
+      /* Disgorge delay insn, if there is any.  */
+      if (final_sequence)
+	{
+	  prev = XVECEXP (final_sequence, 0, 1);
+	  gcc_assert (!prev_real_insn (insn)
+		      || !arc_hazard (prev_real_insn (insn), prev));
+	  cfun->machine->force_short_suffix = !want_long;
+	  final_scan_insn (prev, asm_out_file, optimize, 1, NULL);
+	  cfun->machine->force_short_suffix = -1;
+	  INSN_DELETED_P (prev) = 1;
+	  current_output_insn = insn;
+	}
+      else if (want_long)
+	fputs ("\tnop\n", asm_out_file);
+      else
+	{
+	  fputs ("\tnop_s\n", asm_out_file);
+	  cfun->machine->unalign ^= 2;
+	}
+    }
+  return;
+}
+
+/* The usual; we set up our machine_function data.  */
+static struct machine_function *
+arc_init_machine_status (void)
+{
+  struct machine_function *machine;
+  machine =
+    (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
+  machine->fn_type = ARC_FUNCTION_UNKNOWN;
+  machine->force_short_suffix = -1;
+
+  return machine;
+}
+
+/* Implements INIT_EXPANDERS.  We just set up to call the above
+   function.  */
+void
+arc_init_expanders (void)
+{
+  init_machine_status = arc_init_machine_status;
+}
+
+/* Check if OP is a proper parallel of a millicode call pattern.  OFFSET
+   indicates a number of elements to ignore - that allows to have a
+   sibcall pattern that starts with (return).  LOAD_P is zero for store
+   multiple (for prologues), and one for load multiples (for epilogues),
+   and two for load multiples where no final clobber of blink is required.
+   We also skip the first load / store element since this is supposed to
+   be checked in the instruction pattern.  */
+int
+arc_check_millicode (rtx op, int offset, int load_p)
+{
+  int len = XVECLEN (op, 0) - offset;
+  int i;
+
+  if (load_p == 2)
+    {
+      if (len < 2 || len > 13)
+	return 0;
+      load_p = 1;
+    }
+  else
+    {
+      rtx elt = XVECEXP (op, 0, --len);
+
+      if (GET_CODE (elt) != CLOBBER
+	  || !REG_P (XEXP (elt, 0))
+	  || REGNO (XEXP (elt, 0)) != RETURN_ADDR_REGNUM
+	  || len < 3 || len > 13)
+	return 0;
+    }
+  for (i = 1; i < len; i++)
+    {
+      rtx elt = XVECEXP (op, 0, i + offset);
+      rtx reg, mem, addr;
+
+      if (GET_CODE (elt) != SET)
+	return 0;
+      mem = XEXP (elt, load_p);
+      reg = XEXP (elt, 1-load_p);
+      if (!REG_P (reg) || REGNO (reg) != 13+i || !MEM_P (mem))
+	return 0;
+      addr = XEXP (mem, 0);
+      if (GET_CODE (addr) != PLUS
+	  || !rtx_equal_p (stack_pointer_rtx, XEXP (addr, 0))
+	  || !CONST_INT_P (XEXP (addr, 1)) || INTVAL (XEXP (addr, 1)) != i*4)
+	return 0;
+    }
+  return 1;
+}
+
+int
+arc_get_unalign (void)
+{
+  return cfun->machine->unalign;
+}
+
+void
+arc_clear_unalign (void)
+{
+  if (cfun)
+    cfun->machine->unalign = 0;
+}
+
+void
+arc_toggle_unalign (void)
+{
+  cfun->machine->unalign ^= 2;
+}
+
+/* Operands 0..2 are the operands of a addsi which uses a 12 bit
+   constant in operand 2, but which would require a LIMM because of
+   operand mismatch.
+   operands 3 and 4 are new SET_SRCs for operands 0.  */
+void
+split_addsi (rtx *operands)
+{
+  int val = INTVAL (operands[2]);
+
+  /* Try for two short insns first.  Lengths being equal, we prefer
+     expansions with shorter register lifetimes.  */
+  if (val > 127 && val <= 255
+      && satisfies_constraint_Rcq (operands[0]))
+    {
+      operands[3] = operands[2];
+      operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
+    }
+  else
+    {
+      operands[3] = operands[1];
+      operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[2]);
+    }
+}
+
+/* Operands 0..2 are the operands of a subsi which uses a 12 bit
+   constant in operand 1, but which would require a LIMM because of
+   operand mismatch.
+   operands 3 and 4 are new SET_SRCs for operands 0.  */
+void
+split_subsi (rtx *operands)
+{
+  int val = INTVAL (operands[1]);
+
+  /* Try for two short insns first.  Lengths being equal, we prefer
+     expansions with shorter register lifetimes.  */
+  if (satisfies_constraint_Rcq (operands[0])
+      && satisfies_constraint_Rcq (operands[2]))
+    {
+      if (val >= -31 && val <= 127)
+	{
+	  operands[3] = gen_rtx_NEG (SImode, operands[2]);
+	  operands[4] = gen_rtx_PLUS (SImode, operands[0], operands[1]);
+	  return;
+	}
+      else if (val >= 0 && val < 255)
+	{
+	  operands[3] = operands[1];
+	  operands[4] = gen_rtx_MINUS (SImode, operands[0], operands[2]);
+	  return;
+	}
+    }
+  /* If the destination is not an ARCompact16 register, we might
+     still have a chance to make a short insn if the source is;
+      we need to start with a reg-reg move for this.  */
+  operands[3] = operands[2];
+  operands[4] = gen_rtx_MINUS (SImode, operands[1], operands[0]);
+}
+
+/* operands 0..1 are the operands of a 64 bit move instruction.
+   split it into two moves with operands 2/3 and 4/5.  */
+void
+arc_split_move (rtx *operands)
+{
+  enum machine_mode mode = GET_MODE (operands[0]);
+  int i;
+  int swap = 0;
+  rtx xop[4];
+
+  for (i = 0; i < 2; i++)
+    {
+      if (MEM_P (operands[i]) && auto_inc_p (XEXP (operands[i], 0)))
+	{
+	  rtx addr = XEXP (operands[i], 0);
+	  rtx r, o;
+	  enum rtx_code code;
+
+	  gcc_assert (!reg_overlap_mentioned_p (operands[0], addr));
+	  switch (GET_CODE (addr))
+	    {
+	    case PRE_DEC: o = GEN_INT (-8); goto pre_modify;
+	    case PRE_INC: o = GEN_INT (8); goto pre_modify;
+	    case PRE_MODIFY: o = XEXP (XEXP (addr, 1), 1);
+	    pre_modify:
+	      code = PRE_MODIFY;
+	      break;
+	    case POST_DEC: o = GEN_INT (-8); goto post_modify;
+	    case POST_INC: o = GEN_INT (8); goto post_modify;
+	    case POST_MODIFY: o = XEXP (XEXP (addr, 1), 1);
+	    post_modify:
+	      code = POST_MODIFY;
+	      swap = 2;
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	  r = XEXP (addr, 0);
+	  xop[0+i] = adjust_automodify_address_nv
+		      (operands[i], SImode,
+		       gen_rtx_fmt_ee (code, Pmode, r,
+				       gen_rtx_PLUS (Pmode, r, o)),
+		       0);
+	  xop[2+i] = adjust_automodify_address_nv
+		      (operands[i], SImode, plus_constant (r, 4), 4);
+	}
+      else
+	{
+	  xop[0+i] = operand_subword (operands[i], 0, 0, mode);
+	  xop[2+i] = operand_subword (operands[i], 1, 0, mode);
+	}
+    }
+  if (reg_overlap_mentioned_p (xop[0], xop[3]))
+    {
+      swap = 2;
+      gcc_assert (!reg_overlap_mentioned_p (xop[2], xop[1]));
+    }
+  operands[2+swap] = xop[0];
+  operands[3+swap] = xop[1];
+  operands[4-swap] = xop[2];
+  operands[5-swap] = xop[3];
+}
+
+void
+arc_split_dilogic (rtx *operands, enum rtx_code code)
+{
+  int word, i;
+
+  for (word = 0; word < 2; word++)
+    for (i = 0; i < 3; i++)
+      operands[3+word*3+i] = operand_subword (operands[i], word, 0, DImode);
+  if (reg_overlap_mentioned_p (operands[3], operands[7])
+     || reg_overlap_mentioned_p (operands[3], operands[8]))
+    {
+      rtx tmp;
+
+      for (i = 0; i < 3; i++)
+	{
+	  tmp = operands[3+i];
+	  operands[3+i] = operands[6+i];
+	  operands[6+i] = tmp;
+	}
+      gcc_assert (!reg_overlap_mentioned_p (operands[3], operands[7]));
+      gcc_assert (!reg_overlap_mentioned_p (operands[3], operands[8]));
+    }
+  for (word = 0, i = 0; word < 2; word++)
+    {
+      rtx src = simplify_gen_binary (code, SImode, operands[3+word*3+1],
+				     operands[3+word*3+2]);
+      rtx dst = operands[3+word*3];
+
+      if (!rtx_equal_p (src, dst) || !optimize)
+	emit_insn (gen_rtx_SET (VOIDmode, dst, src));
+    }
+  if (!get_insns ())
+    emit_note (NOTE_INSN_DELETED);
+}
+
+const char *
+arc_short_long (rtx insn, const char *s_tmpl, const char *l_tmpl)
+{
+  int is_short = arc_verify_short (insn, cfun->machine->unalign, -1);
+
+  extract_constrain_insn_cached (insn);
+  return is_short ? s_tmpl : l_tmpl;
+}
+
+/* Searches X for any reference to REGNO, returning the rtx of the
+   reference found if any.  Otherwise, returns NULL_RTX.  */
+rtx
+arc_regno_use_in (unsigned int regno, rtx x)
+{
+  const char *fmt;
+  int i, j;
+  rtx tem;
+
+  if (REG_P (x) && refers_to_regno_p (regno, regno+1, x, (rtx *) 0))
+    return x;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'e')
+	{
+	  if ((tem = regno_use_in (regno, XEXP (x, i))))
+	    return tem;
+	}
+      else if (fmt[i] == 'E')
+	for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	  if ((tem = regno_use_in (regno , XVECEXP (x, i, j))))
+	    return tem;
+    }
+
+  return NULL_RTX;
+}
+
+int
+arc_attr_type (rtx insn)
+{
+  if (NONJUMP_INSN_P (insn)
+      ? (GET_CODE (PATTERN (insn)) == USE
+	 || GET_CODE (PATTERN (insn)) == CLOBBER)
+      : JUMP_P (insn)
+      ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
+	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
+      : !CALL_P (insn))
+    return -1;
+  return get_attr_type (insn);
+}
+
+int
+arc_sets_cc_p (rtx insn)
+{
+  if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == SEQUENCE)
+    insn = XVECEXP (PATTERN (insn), 0, XVECLEN (PATTERN (insn), 0) - 1);
+  return arc_attr_type (insn) == TYPE_COMPARE;
+}
+
+/* Return nonzero if INSN is an instruction with a delay slot we may want
+   to fill.  */
+int
+arc_need_delay (rtx insn)
+{
+  rtx next;
+
+  if (!flag_delayed_branch)
+    return 0;
+  /* The return at the end of a function needs a delay slot.  */
+  if (NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == USE
+      && (!(next = next_active_insn (insn))
+	  || ((!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) != SEQUENCE)
+	      && arc_attr_type (next) == TYPE_RETURN))
+      && (!TARGET_PAD_RETURN
+	  || (prev_active_insn (insn)
+	      && prev_active_insn (prev_active_insn (insn))
+	      && prev_active_insn (prev_active_insn (prev_active_insn (insn))))))
+    return 1;
+  if (NONJUMP_INSN_P (insn)
+      ? (GET_CODE (PATTERN (insn)) == USE
+	 || GET_CODE (PATTERN (insn)) == CLOBBER
+	 || GET_CODE (PATTERN (insn)) == SEQUENCE)
+      : JUMP_P (insn)
+      ? (GET_CODE (PATTERN (insn)) == ADDR_VEC
+	 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
+      : !CALL_P (insn))
+    return 0;
+  return num_delay_slots (insn);
+}
+
+int
+arc_scheduling_not_expected (void)
+{
+  return cfun->machine->arc_reorg_started;
+}
+
+/* Oddly enough, sometimes we get a zero overhead loop that branch
+   shortening doesn't think is a loop - ovserved with compile/pr24883.c
+   -O3 -fomit-frame-pointer -funroll-loops.  Make sure to include the
+   alignment visible for branch shortening  (we actually align the loop
+   insn before it, but that is equivalent since the loop insn is 4 byte
+   long.)  */
+int
+arc_label_align (rtx label)
+{
+  int loop_align = LOOP_ALIGN (LABEL);
+
+  if (loop_align > align_labels_log)
+    {
+      rtx prev = prev_nonnote_insn (label);
+
+      if (prev && NONJUMP_INSN_P (prev)
+	  && GET_CODE (PATTERN (prev)) == PARALLEL
+	  && recog_memoized (prev) == CODE_FOR_doloop_begin_i)
+	return loop_align;
+    }
+  return align_labels_log;
+}
+
+int
+arc_text_label (rtx label)
+{
+  rtx next;
+
+  /* ??? We use deleted labels like they were still there, see
+     gcc.c-torture/compile/20000326-2.c .  */
+  gcc_assert (GET_CODE (label) == CODE_LABEL
+	      || (GET_CODE (label) == NOTE
+		  && NOTE_KIND (label) == NOTE_INSN_DELETED_LABEL));
+  next = next_nonnote_insn (label);
+  if (next)
+    return (GET_CODE (next) != JUMP_INSN
+	    || GET_CODE (PATTERN (next)) != ADDR_VEC);
+  return 0;
+}
+
+int
+arc_decl_pretend_args (tree decl)
+{
+  /* struct function is in DECL_STRUCT_FUNCTION (decl), but no
+     pretend_args there...  See PR38391.  */
+  gcc_assert (decl == current_function_decl);
+  return crtl->args.pretend_args_size;
+}
+
+/* Without this, gcc.dg/tree-prof/bb-reorg.c fails to assemble
+  when compiling with -O2 -freorder-blocks-and-partition -fprofile-use
+  -D_PROFILE_USE; delay branch scheduling then follows a REG_CROSSING_JUMP
+  to redirect two breqs.  */
+static bool
+arc_can_follow_jump (const_rtx follower, const_rtx followee)
+{
+  /* ??? get_attr_type is declared to take an rtx.  */
+  union { const_rtx c; rtx r; } u;
+
+  u.c = follower;
+  if (find_reg_note (followee, REG_CROSSING_JUMP, NULL_RTX))
+    switch (get_attr_type (u.r))
+      {
+      case TYPE_BRCC:
+      case TYPE_BRCC_NO_DELAY_SLOT:
+	return false;
+      default:
+	return true;
+      }
+  return true;
+}
+
+/* Called via note_stores.  */
+static void
+arc_dead_or_set_postreload_1 (rtx dest, const_rtx x ATTRIBUTE_UNUSED,
+			      void *data)
+{
+  rtx reg = *(rtx *)data;
+
+  if (REG_P (dest) && reg
+      && REGNO (reg) >= REGNO (dest)
+      && (REGNO (reg) + HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg))
+	  <= REGNO (dest) + HARD_REGNO_NREGS (REGNO (dest), GET_MODE (dest))))
+    *(rtx *)data = NULL_RTX;
+}
+
+/* Return nonzero if REG is set in or not used after INSN.
+   After reload, REG_DEAD notes may precede the actual death in of a register
+   in the same basic block.  Additional labels may be added by reorg, so
+   we only know we can trust a REG_DEAD note when we find a jump.  */
+int
+arc_dead_or_set_postreload_p (const_rtx insn, const_rtx reg)
+{
+  enum rtx_code code;
+  rtx dead;
+
+  /* If the reg is set by this instruction, then it is safe for our case.  */
+  note_stores (PATTERN (insn), arc_dead_or_set_postreload_1,  &reg);
+  if (!reg)
+    return 1;
+
+  dead = find_regno_note (insn, REG_DEAD, REGNO (reg));
+  if (dead
+      && (HARD_REGNO_NREGS (REGNO (reg), GET_MODE (reg))
+	  > HARD_REGNO_NREGS (REGNO (XEXP (dead, 0)),
+			      GET_MODE (XEXP (dead, 0)))))
+    dead = NULL_RTX;
+  while ((insn = NEXT_INSN (insn)))
+    {
+      if (!INSN_P (insn))
+	continue;
+
+      code = GET_CODE (insn);
+
+      /* If this is a sequence, we must handle them all at once.
+	 We could have for instance a call that sets the target register,
+	 and an insn in a delay slot that uses the register.  In this case,
+	 we must return 0.  */
+      if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+	{
+	  int i;
+	  int retval = 0;
+	  int annull = 0;
+
+	  for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
+	    {
+	      rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
+
+	      if (reg_referenced_p (reg, PATTERN (this_insn)))
+		return 0;
+	      if (!annull)
+		{
+		  const_rtx tmp = reg;
+
+		  note_stores (PATTERN (this_insn),
+			       arc_dead_or_set_postreload_1, &tmp);
+		  if (!tmp)
+		    retval = 1;
+		}
+	      if (GET_CODE (this_insn) == CALL_INSN)
+		{
+		  if (find_reg_fusage (this_insn, USE, reg))
+		    return 0;
+		  code = CALL_INSN;
+		}
+	      else if (GET_CODE (this_insn) == JUMP_INSN)
+		{
+		  if (INSN_ANNULLED_BRANCH_P (this_insn))
+		    annull = 1;
+		  code = JUMP_INSN;
+		}
+	    }
+	  if (retval == 1)
+	    return 1;
+	}
+      else
+	{
+	  if (reg_referenced_p (reg, PATTERN (insn)))
+	    return 0;
+	  if (GET_CODE (insn) == CALL_INSN
+	      && find_reg_fusage (insn, USE, reg))
+	    return 0;
+	  note_stores ( PATTERN (insn), arc_dead_or_set_postreload_1, &reg);
+	  if (!reg)
+	    return 1;
+	}
+
+      if (code == JUMP_INSN)
+	return dead != NULL_RTX;
+
+      if (code == CALL_INSN && call_used_regs[REGNO (reg)])
+	return 1;
+    }
+  return 1;
+}
+
+#include "gt-arc.h"
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index ff473c3f73e..ac58737ae05 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -1,6 +1,13 @@
 /* Definitions of target machine for GNU compiler, Argonaut ARC cpu.
-   Copyright (C) 1994, 1995, 1997, 1998, 1999, 2000, 2001, 2002, 2004, 2005,
-   2007 Free Software Foundation, Inc.
+   Copyright (C) 1994, 1995, 1997, 1998, 2007, 2008, 2009
+   Free Software Foundation, Inc.
+
+   Sources derived from work done by Sankhya Technologies (www.sankhya.com)
+
+   Position Independent Code support added,Code cleaned up, 
+   Comments and Support For ARC700 instructions added by
+   Saurabh Verma (saurabh.verma@codito.com)
+   Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
 
 This file is part of GCC.
 
@@ -18,14 +25,40 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-/* ??? This is an old port, and is undoubtedly suffering from bit rot.  */
+#ifndef GCC_ARC_H
+#define GCC_ARC_H
 
 /* Things to do:
 
    - incscc, decscc?
-   - print active compiler options in assembler output
+	
 */
 
+/* ************************************************************************* 
+ * Role of the SYMBOL_REF_FLAG in the rtx:
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This is to document the change in the role of the SYMBOL_REF_FLAG
+ * from the PIC enabled version of the toolchain onwards.
+ * Before the PIC support was added to the compiler, the machine
+ * specific SYMBOL_REF_FLAG was used to mark a function's symbol so
+ * that a symbol reference to a function ( as in obtaining a pointer to
+ * that function ) is printed as %st(<<functionname>>).
+ *
+ *   The PIC version of the compiler uses this flag to mark a locally
+ * defined symbol, for which references in the code have to be made as
+ * pc+ symbolname@GOTOFF instead of symbolname@GOT.Also references to
+ * local functions are made relative instead of going through the PLT.
+ *      The earlier work of the flag is accomplished by mangling the
+ * name of the symbol(adding an *_CALL_FLAG_CHAR at the start) and modifying 
+ * the print_operand routine to unmangle it and print the reference as
+ * %st(symbol_name_unmangled) instead. The convention used for mangling 
+ * accomodates the long_call and short_call function attributes by using one
+ * of (LONG_/SHORT_/SIMPLE_)CALL_FLAG_CHAR characters as the prefix.
+ * ************************************************************************/
+
+
+/* ashwin : include options.h */
+/* #include "options.h" */
 
 #undef ASM_SPEC
 #undef LINK_SPEC
@@ -35,56 +68,236 @@ along with GCC; see the file COPYING3.  If not see
 #undef PTRDIFF_TYPE
 #undef WCHAR_TYPE
 #undef WCHAR_TYPE_SIZE
-#undef ASM_OUTPUT_LABELREF
-
+#undef ASM_APP_ON
+#undef ASM_APP_OFF
+#undef CC1_SPEC
+
+#ifndef UCLIBC_DEFAULT
+#define UCLIBC_DEFAULT 0
+#endif
+
 /* Print subsidiary information on the compiler version in use.  */
+#if !UCLIBC_DEFAULT
 #define TARGET_VERSION fprintf (stderr, " (arc)")
+#else
+#define TARGET_VERSION fprintf (stderr, " (ARC GNU Linux / uClibc with ELF)")
+#endif
+
+
 
 /* Names to predefine in the preprocessor for this target machine.  */
-#define TARGET_CPU_CPP_BUILTINS()		\
-  do						\
-    {						\
-	builtin_define ("__arc__");		\
-	if (TARGET_BIG_ENDIAN)			\
-	  builtin_define ("__big_endian__");	\
-	if (arc_cpu_type == 0)			\
-	  builtin_define ("__base__");		\
-	builtin_assert ("cpu=arc");		\
-	builtin_assert ("machine=arc");		\
-    } while (0)
-
-/* Pass -mmangle-cpu if we get -mcpu=*.
-   Doing it this way lets one have it on as default with -mcpu=*,
-   but also lets one turn it off with -mno-mangle-cpu.  */
+/*
+   ??? check whether __base__ definition can be removed. If it can be
+       removed, "#ifdef __base__" has to be removed from lib1funcs.asm.
+*/
+#define TARGET_CPU_CPP_BUILTINS()	\
+ do {					\
+    builtin_define ("__arc__");		\
+    if (TARGET_A4)			\
+      builtin_define ("__A4__");	\
+    else if (TARGET_A5)			\
+      builtin_define ("__A5__");	\
+    else if (TARGET_ARC600)			\
+      {					\
+	builtin_define ("__A6__");	\
+	builtin_define ("__ARC600__");	\
+      }					\
+    else if (TARGET_ARC700)			\
+      {					\
+	builtin_define ("__A7__");	\
+	builtin_define ("__ARC700__");	\
+      }					\
+    if (TARGET_NORM)			\
+      builtin_define ("__ARC_NORM__");\
+    if (TARGET_MUL64_SET)		\
+      builtin_define ("__ARC_MUL64__");\
+    if (TARGET_MULMAC_32BY16_SET)	\
+      builtin_define ("__ARC_MUL32BY16__");\
+    builtin_define ("__base__");	\
+    if (TARGET_SIMD_SET)        	\
+      builtin_define ("__ARC_SIMD__");	\
+    builtin_assert ("cpu=arc");		\
+    builtin_assert ("machine=arc");	\
+    builtin_define (TARGET_BIG_ENDIAN	\
+		    ? "__BIG_ENDIAN__" : "__LITTLE_ENDIAN__"); \
+    if (TARGET_BIG_ENDIAN)		\
+      builtin_define ("__big_endian__"); \
+} while(0)
+
 #define CC1_SPEC "\
-%{mcpu=*:-mmangle-cpu} \
 %{EB:%{EL:%emay not use both -EB and -EL}} \
 %{EB:-mbig-endian} %{EL:-mlittle-endian} \
 "
+#define ASM_SPEC  "\
+%{v} %{mbig-endian|EB:-EB} %{EL} %{mA4} %{mA5} %{mA6} %{mARC600} \
+%{mA7} %{mARC700} \
+%{mbarrel_shifter} %{mno-mpy} %{mmul64} %{mmul32x16:-mdsp} %{mnorm} %{mswap} %{mARC700|mA7:-mEA} %{mEA} %{mmin_max} %{mspfp*} %{mdpfp*} \
+%{msimd}"
+
+#if UCLIBC_DEFAULT
+#if 1
+/* Note that the default is to link against dynamic libraries, if they are
+   available.  While it is a bit simpler to get started with static linking,
+   it is much easier to comply with the LGPL when you use dynamic linking, and
+   thus get a product that you can legally ship.  */
+#define STATIC_LINK_SPEC "%{static:-Bstatic}"
+#else /* Make ease of use of producing something the main concern.  */
+#define STATIC_LINK_SPEC "%{!mdynamic:%{!shared:-Bstatic}}"
+#endif
+#define LINK_SPEC "%{h*} %{version:-v} \
+                   %{b} %{Wl,*:%*} \
+                   "STATIC_LINK_SPEC" \
+                   %{symbolic:-Bsymbolic} \
+                   %{rdynamic:-export-dynamic}\
+                   %{!dynamic-linker:-dynamic-linker /lib/ld-uClibc.so.0}\
+                   -X %{mbig-endian:-EB} \
+                   %{EB} %{EL} \
+		   %{pg|p|profile:-marclinux_prof;: -marclinux} \
+		   %{!z:-z max-page-size=0x1000 -z common-page-size=0x1000} \
+		   %{shared:-shared}"
+/* Like the standard LINK_COMMAND_SPEC, but add -lgcc_s when building
+   a shared library with -nostdlib, so that the hidden functions of libgcc
+   will be incorporated.  */
+#define LINK_COMMAND_SPEC "\
+%{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
+    %(linker) %l " LINK_PIE_SPEC "%X %{o*} %{A} %{d} %{e*} %{m} %{N} %{n} %{r}\
+    %{s} %{t} %{u*} %{x} %{z} %{Z} %{!A:%{!nostdlib:%{!nostartfiles:%S}}}\
+    %{static:} %{L*} %(mfwrap) %(link_libgcc) %o\
+    %{fopenmp:%:include(libgomp.spec)%(link_gomp)} %(mflib)\
+    %{fprofile-arcs|fprofile-generate|coverage:-lgcov}\
+    %{!nostdlib:%{!nodefaultlibs:%(link_ssp) %(link_gcc_c_sequence)}}\
+    %{shared:%{nostdlib:%{!really-nostdlib: -lgcc_s }}} \
+    %{!A:%{!nostdlib:%{!nostartfiles:%E}}} %{T*} }}}}}}"
 
-#define ASM_SPEC "%{v} %{EB} %{EL}"
+#else
+#define LINK_SPEC "%{v} %{mbig-endian:-EB} %{EB} %{EL}\
+  %{pg|p:-marcelf_prof;mA7|mARC700: -marcelf}"
+#endif
 
-#define LINK_SPEC "%{v} %{EB} %{EL}"
+#if !UCLIBC_DEFAULT
+#define STARTFILE_SPEC "%{!shared:crt0.o%s} crti%O%s %{pg|p:crtg.o%s} crtbegin.o%s"
+#else
+#define STARTFILE_SPEC   "%{!shared:%{!mkernel:crt1.o%s}} crti.o%s \
+  %{!shared:%{pg|p|profile:crtg.o%s} crtbegin.o%s} %{shared:crtbeginS.o%s}"
 
-#define STARTFILE_SPEC "%{!shared:crt0.o%s} crtinit.o%s"
+#endif
 
-#define ENDFILE_SPEC "crtfini.o%s"
-
-/* Instruction set characteristics.
-   These are internal macros, set by the appropriate -mcpu= option.  */
+#if !UCLIBC_DEFAULT
+#define ENDFILE_SPEC "%{pg|p:crtgend.o%s} crtend.o%s crtn%O%s"
+#else
+#define ENDFILE_SPEC "%{!shared:%{pg|p|profile:crtgend.o%s} crtend.o%s} \
+  %{shared:crtendS.o%s} crtn.o%s"
+
+#endif 
+
+#if UCLIBC_DEFAULT
+#undef LIB_SPEC
+#define LIB_SPEC  \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared:%{pg|p|profile:-lgmon -u profil --defsym __profil=profil} -lc}"
+#else
+#undef LIB_SPEC
+/* -lc_p not present for arc-elf32-* : ashwin */
+#define LIB_SPEC "%{!shared:%{g*:-lg} %{pg|p:-lgmon} -lc}"
+#endif
+
+
+/* -mA7 is invalid for TARGET_CPU_OPT, it would have to be -mA7 -mnorm.  */
+#define DRIVER_SELF_SPECS \
+  "%{!mA*:-" TARGET_CPU_DEFAULT_OPT "}%{mA7|mARC700:-mnorm}"
 
-/* Nonzero means the cpu has a barrel shifter.  */
-#define TARGET_SHIFTER 0
+/* Run-time compilation parameters selecting different hardware subsets.  */
 
-/* Which cpu we're compiling for.  */
-extern int arc_cpu_type;
+extern int target_flags;
 
-/* Check if CPU is an extension and set `arc_cpu_type' and `arc_mangle_cpu'
-   appropriately.  The result should be nonzero if the cpu is recognized,
+#define TARGET_ARCOMPACT  (TARGET_A5 || TARGET_ARC600 || TARGET_ARC700)
+#define TARGET_MIXED_CODE (TARGET_ARCOMPACT && TARGET_MIXED_CODE_SET)
+
+#define TARGET_SPFP (TARGET_SPFP_FAST_SET || TARGET_SPFP_COMPACT_SET)
+#define TARGET_DPFP (TARGET_DPFP_FAST_SET || TARGET_DPFP_COMPACT_SET)
+
+#define SUBTARGET_SWITCHES
+
+/* Instruction set characteristics.
+   These are internal macros, set by the appropriate -m option.  */
+
+/* Non-zero means the cpu has a barrel shifter. This flag is set by default
+ * for post A4 cores, and only for A4 when -mbarrel_shifter is given.  */
+#define TARGET_SHIFTER (TARGET_ARCOMPACT || TARGET_BARREL_SHIFTER_SET)
+
+/* Non-zero means the cpu supports norm instruction.  This flag is set by
+ * default for A7, and only for pre A7 cores when -mnorm is given.  */
+#define TARGET_NORM (TARGET_ARC700 || TARGET_NORM_SET)
+/* Indicate if an optimized floating point emulation library is available.  */
+#define TARGET_OPTFPE \
+ (TARGET_ARC700 \
+  || (TARGET_ARC600 && TARGET_NORM_SET \
+      && (TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET)))
+
+/* Non-zero means the cpu supports swap instruction.  This flag is set by
+ * default for A7, and only for pre A7 cores when -mswap is given.  */
+#define TARGET_SWAP (TARGET_ARC700 || TARGET_SWAP_SET)
+
+/* Non-zero means the cpu supports min and max instructions.  This flag is set by
+ * default for post A4 cores, and only for A4 when -mmin_max is given.  */
+#define TARGET_MINMAX (TARGET_ARCOMPACT || TARGET_MINMAX_SET)
+
+/* Provide some macros for size / scheduling features of the ARC700, so
+   that we can pick & choose features if we get a new cpu family member.  */
+
+/* Should we try to unalign likely taken branches without a delay slot.  */
+#define TARGET_UNALIGN_BRANCH (TARGET_ARC700 && !optimize_size)
+
+/* Should we upsize short delayed branches with a short delay insn?  */
+#define TARGET_UPSIZE_DBR (TARGET_ARC700 && !optimize_size)
+
+/* Should we add padding before a return insn to avoid mispredict?  */
+#define TARGET_PAD_RETURN (TARGET_ARC700 && !optimize_size)
+
+/* For an anulled-true delay slot insn for a delayed branch, should we only
+   use conditional execution?  */
+#define TARGET_AT_DBR_CONDEXEC  (!TARGET_ARC700)
+
+enum processor_type {
+  PROCESSOR_A4,
+  PROCESSOR_A5,
+  PROCESSOR_ARC600,
+  PROCESSOR_ARC700
+};  
+ 
+extern enum processor_type arc_cpu;  /* which cpu we are compiling for */
+extern const char *arc_cpu_string;   /* A4/A5/ARC600/ARC700 */
+
+/* ashwin : since TARGET_OPTIONS are moved to arc.opt, no need of these here */
+/* extern const char *arc_text_string,*arc_data_string,*arc_rodata_string; */
+
+/* Recast the cpu class to be the cpu attribute.  */
+#define arc_cpu_attr ((enum attr_cpu)arc_cpu)
+
+/* Check if CPU is an extension and set `arc_mangle_cpu' appropriately.
+   The result should be non-zero if the cpu is recognized,
    otherwise zero.  This is intended to be redefined in a cover file.
    This is used by arc_init.  */
 #define ARC_EXTENSION_CPU(cpu) 0
 
+#ifndef MULTILIB_DEFAULTS
+#define MULTILIB_DEFAULTS { TARGET_CPU_DEFAULT_OPT, "EL" }
+#endif
+
+#define OPTIMIZATION_OPTIONS(LEVEL,SIZE)                                \
+do {                                                                    \
+  TARGET_Rcq = TARGET_Rcw = 1;						\
+  arc_size_opt_level = (SIZE) ? 3 : (LEVEL >= 3) ? 0 : 1;		\
+  flag_no_common = 255; /* Mark as not user-initialized.  */		\
+  TARGET_ALIGN_CALL = (!(SIZE) && ((LEVEL) >= 3));			\
+  TARGET_EARLY_CBRANCHSI = 1;						\
+  TARGET_BBIT_PEEPHOLE = 1;						\
+  TARGET_Q_CLASS = (SIZE != 0);						\
+  TARGET_CASE_VECTOR_PC_RELATIVE = (SIZE != 0);				\
+  TARGET_COMPACT_CASESI = (SIZE != 0);					\
+} while (0)
+
 /* Sometimes certain combinations of command options do not make
    sense on a particular target machine.  You can define a macro
    `OVERRIDE_OPTIONS' to take account of this.  This macro, if
@@ -94,18 +307,34 @@ extern int arc_cpu_type;
    Don't use this macro to turn on various extra optimizations for
    `-O'.  That is what `OPTIMIZATION_OPTIONS' is for.  */
 
-
 #define OVERRIDE_OPTIONS \
 do {				\
+  if (arc_size_opt_level == 3)	\
+    optimize_size = 1;		\
+  if (flag_pic) \
+    target_flags |= MASK_NO_SDATA_SET; \
+  if (flag_no_common == 255)	\
+    flag_no_common = !TARGET_NO_SDATA_SET; \
+  /* TARGET_COMPACT_CASESI needs the "q" register class.  */ \
+  if (TARGET_MIXED_CODE) \
+    TARGET_Q_CLASS = 1; \
+  if (!TARGET_Q_CLASS) \
+    TARGET_COMPACT_CASESI = 0; \
+  if (TARGET_COMPACT_CASESI) \
+    TARGET_CASE_VECTOR_PC_RELATIVE = 1; \
   /* These need to be done at start up.  It's convenient to do them here.  */ \
   arc_init ();			\
 } while (0)
-
+
 /* Target machine storage layout.  */
 
-/* Define this if most significant bit is lowest numbered
-   in instructions that operate on numbered bit-fields.  */
-#define BITS_BIG_ENDIAN 1
+/* Define to use software floating point emulator for REAL_ARITHMETIC and
+   decimal <-> binary conversion. */
+/*#define REAL_ARITHMETIC*/
+
+/* We want zero_extract to mean the same
+   no matter what the byte endianness is.  */
+#define BITS_BIG_ENDIAN 0
 
 /* Define this if most significant byte of a word is the lowest numbered.  */
 #define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN)
@@ -122,6 +351,15 @@ do {				\
 #define LIBGCC2_WORDS_BIG_ENDIAN 0
 #endif
 
+/* Number of bits in an addressable storage unit.  */
+#define BITS_PER_UNIT 8
+
+/* Width in bits of a "word", which is the contents of a machine register.
+   Note that this is not necessarily the width of data type `int';
+   if using 16-bit ints on a 68000, this would still be 32.
+   But on a machine with 16-bit registers, this would be 16.  */
+#define BITS_PER_WORD 32
+
 /* Width of a word, in units (bytes).  */
 #define UNITS_PER_WORD 4
 
@@ -137,14 +375,20 @@ if (GET_MODE_CLASS (MODE) == MODE_INT		\
   (MODE) = SImode;				\
 }
 
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+#define POINTER_SIZE 32
+
 /* Allocation boundary (in *bits*) for storing arguments in argument list.  */
 #define PARM_BOUNDARY 32
 
 /* Boundary (in *bits*) on which stack pointer should be aligned.  */
-#define STACK_BOUNDARY 64
+/* TOCHECK: Changed from 64 to 32 */
+#define STACK_BOUNDARY 32
 
 /* ALIGN FRAMES on word boundaries */
-#define ARC_STACK_ALIGN(LOC) (((LOC)+7) & ~7)
+#define ARC_STACK_ALIGN(LOC) \
+  (((LOC) + STACK_BOUNDARY / BITS_PER_UNIT - 1) & -STACK_BOUNDARY/BITS_PER_UNIT)
 
 /* Allocation boundary (in *bits*) for the code of a function.  */
 #define FUNCTION_BOUNDARY 32
@@ -155,15 +399,30 @@ if (GET_MODE_CLASS (MODE) == MODE_INT		\
 /* Every structure's size must be a multiple of this.  */
 #define STRUCTURE_SIZE_BOUNDARY 8
 
-/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+/* A bitfield declared as `int' forces `int' alignment for the struct.  */
 #define PCC_BITFIELD_TYPE_MATTERS 1
 
+/* An expression for the alignment of a structure field FIELD if the
+   alignment computed in the usual way (including applying of
+   `BIGGEST_ALIGNMENT' and `BIGGEST_FIELD_ALIGNMENT' to the
+   alignment) is COMPUTED.  It overrides alignment only if the field
+   alignment has not been set by the `__attribute__ ((aligned (N)))'
+   construct.
+*/
+
+#define ADJUST_FIELD_ALIGN(FIELD, COMPUTED) \
+(TYPE_MODE (strip_array_types (TREE_TYPE (FIELD))) == DFmode \
+ ? MIN ((COMPUTED), 32) : (COMPUTED))
+
+
+
 /* No data type wants to be aligned rounder than this.  */
 /* This is bigger than currently necessary for the ARC.  If 8 byte floats are
    ever added it's not clear whether they'll need such alignment or not.  For
    now we assume they will.  We can always relax it if necessary but the
    reverse isn't true.  */
-#define BIGGEST_ALIGNMENT 64
+/* TOCHECK: Changed from 64 to 32 */
+#define BIGGEST_ALIGNMENT 32
 
 /* The best alignment to use in cases where we have a choice.  */
 #define FASTEST_ALIGNMENT 32
@@ -174,10 +433,17 @@ if (GET_MODE_CLASS (MODE) == MODE_INT		\
     && (ALIGN) < FASTEST_ALIGNMENT)	\
    ? FASTEST_ALIGNMENT : (ALIGN))
 
+
 /* Make arrays of chars word-aligned for the same reasons.  */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN)             \
+  (TREE_CODE (TYPE) == ARRAY_TYPE               \
+   && TYPE_MODE (TREE_TYPE (TYPE)) == QImode    \
+   && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
+
 #define DATA_ALIGNMENT(TYPE, ALIGN)		\
   (TREE_CODE (TYPE) == ARRAY_TYPE		\
    && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
+   && arc_size_opt_level < 3			\
    && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
 
 /* Set this nonzero if move instructions will actually fail to work
@@ -186,7 +452,7 @@ if (GET_MODE_CLASS (MODE) == MODE_INT		\
    won't croak when given an unaligned address, but the insn will still fail
    to produce the correct result.  */
 #define STRICT_ALIGNMENT 1
-
+
 /* Layout of source language data types.  */
 
 #define SHORT_TYPE_SIZE		16
@@ -198,23 +464,33 @@ if (GET_MODE_CLASS (MODE) == MODE_INT		\
 #define LONG_DOUBLE_TYPE_SIZE	64
 
 /* Define this as 1 if `char' should by default be signed; else as 0.  */
-#define DEFAULT_SIGNED_CHAR 1
+#define DEFAULT_SIGNED_CHAR 0
 
 #define SIZE_TYPE "long unsigned int"
 #define PTRDIFF_TYPE "long int"
-#define WCHAR_TYPE "short unsigned int"
-#define WCHAR_TYPE_SIZE 16
-
+#define WCHAR_TYPE "int"
+#define WCHAR_TYPE_SIZE 32
+
+
+/* ashwin : shifted from arc.c:102 */
+#define PROGRAM_COUNTER_REGNO 63
+
 /* Standard register usage.  */
 
 /* Number of actual hardware registers.
    The hardware registers are assigned numbers for the compiler
    from 0 to just below FIRST_PSEUDO_REGISTER.
    All registers that the compiler knows about must be given numbers,
-   even those that are not normally considered general registers.  */
-/* Registers 61, 62, and 63 are not really registers and we needn't treat
-   them as such.  We still need a register for the condition code.  */
-#define FIRST_PSEUDO_REGISTER 62
+   even those that are not normally considered general registers.
+
+   Registers 61, 62, and 63 are not really registers and we needn't treat
+   them as such.  We still need a register for the condition code and
+   argument pointer */
+
+/* r63 is pc, r64-r127 = simd vregs, r128-r143 = simd dma config regs
+   r144, r145 = lp_start, lp_end
+   and therefore the pseudo registers start from r146 */
+#define FIRST_PSEUDO_REGISTER 146
 
 /* 1 for registers that have pervasive standard uses
    and are not available for the register allocator.
@@ -226,6 +502,8 @@ if (GET_MODE_CLASS (MODE) == MODE_INT		\
    32-59 - reserved for extensions
    60    - LP_COUNT
    61    - condition code
+   62    - argument pointer
+   63    - program counter
 
    For doc purposes:
    61    - short immediate data indicator (setting flags)
@@ -233,27 +511,47 @@ if (GET_MODE_CLASS (MODE) == MODE_INT		\
    63    - short immediate data indicator (not setting flags).
 
    The general purpose registers are further broken down into:
+
    0-7   - arguments/results
-   8-15  - call used
-   16-23 - call saved
-   24    - call used, static chain pointer
-   25    - call used, gptmp
+   8-12  - call used (r11 - static chain pointer)
+   13-25 - call saved
    26    - global pointer
    27    - frame pointer
    28    - stack pointer
+   29    - ilink1
+   30    - ilink2
+   31    - return address register
 
    By default, the extension registers are not available.  */
-
+/* Size of this and value of FIRST_PSEUDO_REGISTER should be equal, *\/ */
+/* hence the added another '1' at the end */
+/*    reference : init_reg_sets() in regclass.c:290 */
+/* Present implementations only have VR0-VR23 only */
+/* ??? FIXME: r27 and r31 should not be fixed registers.  */
 #define FIXED_REGISTERS \
 { 0, 0, 0, 0, 0, 0, 0, 0,	\
   0, 0, 0, 0, 0, 0, 0, 0,	\
   0, 0, 0, 0, 0, 0, 0, 0,	\
-  0, 0, 0, 1, 1, 1, 1, 0,	\
+  0, 0, 1, 1, 1, 1, 1, 1,	\
 				\
   1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 0, 0, 1, 1, 1, 1,	\
   1, 1, 1, 1, 1, 1, 1, 1,	\
-  1, 1, 1, 1, 1, 1, 1, 1,	\
-  1, 1, 1, 1, 1, 1 }
+  1, 1, 1, 1, 0, 1, 1, 1,       \
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  1, 1}
 
 /* 1 for registers not available across function calls.
    These must include the FIXED_REGISTERS and also any
@@ -262,46 +560,59 @@ if (GET_MODE_CLASS (MODE) == MODE_INT		\
    and the register where structure-value addresses are passed.
    Aside from that, you can include as many other registers as you like.  */
 
-#define CALL_USED_REGISTERS \
-{ 1, 1, 1, 1, 1, 1, 1, 1,	\
+/* Size of this and value of FIRST_PSEUDO_REGISTER should be equal, *\/ */
+/* hence the added another '1' at the end */
+/*    reference : init_reg_sets() in regclass.c:290 */
+#define CALL_USED_REGISTERS     \
+{                               \
   1, 1, 1, 1, 1, 1, 1, 1,	\
+  1, 1, 1, 1, 1, 0, 0, 0,	\
   0, 0, 0, 0, 0, 0, 0, 0,	\
-  1, 1, 1, 1, 1, 1, 1, 1,	\
+  0, 0, 1, 1, 1, 1, 1, 1,	\
 				\
   1, 1, 1, 1, 1, 1, 1, 1,	\
   1, 1, 1, 1, 1, 1, 1, 1,	\
   1, 1, 1, 1, 1, 1, 1, 1,	\
-  1, 1, 1, 1, 1, 1 }
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+				\
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+  1, 1, 1, 1, 1, 1, 1, 1,       \
+				\
+  0, 0, 0, 0, 0, 0, 0, 0,       \
+  0, 0, 0, 0, 0, 0, 0, 0,	\
+  1, 1}
+
+/* Macro to conditionally modify fixed_regs/call_used_regs.  */
+
+#define CONDITIONAL_REGISTER_USAGE arc_conditional_register_usage ();
 
 /* If defined, an initializer for a vector of integers, containing the
    numbers of hard registers in the order in which GCC should
    prefer to use them (from most preferred to least).  */
 #define REG_ALLOC_ORDER \
 { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1,			\
-  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 31,			\
+  16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 				\
   32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,	\
-  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61,		\
-  27, 28, 29, 30 }
-
-/* Macro to conditionally modify fixed_regs/call_used_regs.  */
-#define CONDITIONAL_REGISTER_USAGE			\
-do {							\
-  if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)	\
-    {							\
-      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;		\
-      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;	\
-    }							\
-} while (0)
+  48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62,		\
+  27, 28, 29, 30, 31, 63}
 
 /* Return number of consecutive hard regs needed starting at reg REGNO
    to hold something of mode MODE.
    This is ordinarily the length in words of a value of mode MODE
    but can be less for certain modes in special long registers.  */
 #define HARD_REGNO_NREGS(REGNO, MODE) \
-((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+((GET_MODE_SIZE (MODE) == 16 && REGNO>=64 && REGNO<88)?1:                             \
+          (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
 
 /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
-extern const unsigned int arc_hard_regno_mode_ok[];
+extern unsigned int arc_hard_regno_mode_ok[];
 extern unsigned int arc_mode_class[];
 #define HARD_REGNO_MODE_OK(REGNO, MODE) \
 ((arc_hard_regno_mode_ok[REGNO] & arc_mode_class[MODE]) != 0)
@@ -320,7 +631,17 @@ extern unsigned int arc_mode_class[];
  && GET_MODE_CLASS (MODE2) == MODE_INT		\
  && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD	\
  && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD)
-
+
+/* Internal macros to classify a register number as to whether it's a
+   general purpose register for compact insns (r0-r3,r12-r15), or
+   stack pointer (r28).  */
+
+#define COMPACT_GP_REG_P(REGNO) \
+   (((signed)(REGNO) >= 0 && (REGNO) <= 3) || ((REGNO) >= 12 && (REGNO) <= 15))
+#define SP_REG_P(REGNO)  ((REGNO) == 28)
+
+
+
 /* Register classes and constants.  */
 
 /* Define the classes of registers for register constraints in the
@@ -346,40 +667,113 @@ extern unsigned int arc_mode_class[];
    It is important that any condition codes have class NO_REGS.
    See `register_operand'.  */
 
-enum reg_class {
-  NO_REGS, LPCOUNT_REG, GENERAL_REGS, ALL_REGS, LIM_REG_CLASSES
+enum reg_class 
+{
+   NO_REGS, 
+   R0_REG,			/* 'x' */
+   GP_REG,			/* 'Rgp' */
+   FP_REG,			/* 'f' */
+   SP_REG,			/* 'b' */
+   LPCOUNT_REG, 		/* 'l' */
+   LINK_REGS,	 		/* 'k' */
+   DOUBLE_REGS,			/* D0, D1 */
+   SIMD_VR_REGS,		/* VR00-VR63 */
+   SIMD_DMA_CONFIG_REGS,	/* DI0-DI7,DO0-DO7 */
+   ARCOMPACT16_REGS,		/* 'q' */
+   AC16_BASE_REGS,  		/* 'e' */
+   SIBCALL_REGS,		/* "Rsc" */
+   GENERAL_REGS,		/* 'r' */
+   WRITABLE_CORE_REGS,		/* 'w' */
+   CHEAP_CORE_REGS,		/* 'c' */
+   ALL_CORE_REGS,		/* 'Rac' */
+   ALL_REGS,
+   LIM_REG_CLASSES
 };
 
 #define N_REG_CLASSES (int) LIM_REG_CLASSES
 
-/* Give names of register classes as strings for dump file.  */
-#define REG_CLASS_NAMES \
-{ "NO_REGS", "LPCOUNT_REG", "GENERAL_REGS", "ALL_REGS" }
+/* Give names of register classes as strings for dump file.   */
+#define REG_CLASS_NAMES	  \
+{                         \
+  "NO_REGS",           	  \
+  "R0_REG",            	  \
+  "GP_REG",            	  \
+  "FP_REG",            	  \
+  "SP_REG",		  \
+  "LPCOUNT_REG",	  \
+  "LINK_REGS",         	  \
+  "DOUBLE_REGS",          \
+  "SIMD_VR_REGS",         \
+  "SIMD_DMA_CONFIG_REGS", \
+  "ARCOMPACT16_REGS",  	  \
+  "AC16_BASE_REGS",       \
+  "SIBCALL_REGS",	  \
+  "GENERAL_REGS",      	  \
+  "WRITABLE_CORE_REGS",	  \
+  "CHEAP_CORE_REGS",	  \
+  "ALL_CORE_REGS",	  \
+  "ALL_REGS"          	  \
+} 
 
 /* Define which registers fit in which classes.
    This is an initializer for a vector of HARD_REG_SET
    of length N_REG_CLASSES.  */
 
 #define REG_CLASS_CONTENTS \
-{ {0, 0}, {0, 0x10000000}, {0xffffffff, 0xfffffff}, \
-  {0xffffffff, 0x1fffffff} }
+{													\
+  {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},	     /* No Registers */			\
+  {0x00000001, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'x', r0 register , r0 */	\
+  {0x04000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'Rgp', Global Pointer, r26 */	\
+  {0x08000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'f', Frame Pointer, r27 */	\
+  {0x10000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'b', Stack Pointer, r28 */	\
+  {0x00000000, 0x10000000, 0x00000000, 0x00000000, 0x00000000},      /* 'l', LPCOUNT Register, r60 */	\
+  {0xe0000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000},      /* 'k', LINK Registers, r29-r31 */	\
+  {0x00000000, 0x00000f00, 0x00000000, 0x00000000, 0x00000000},      /* 'D', D1, D2 Registers */	\
+  {0x00000000, 0x00000000, 0xffffffff, 0xffffffff, 0x00000000},      /* 'V', VR00-VR63 Registers */	\
+  {0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ffff},      /* 'V', DI0-7,DO0-7 Registers */	\
+  {0x0000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000},	     /* 'q', r0-r3, r12-r15 */		\
+  {0x1000f00f, 0x00000000, 0x00000000, 0x00000000, 0x00000000},	     /* 'e', r0-r3, r12-r15, sp */	\
+  {0x1c001fff, 0x10000000, 0x00000000, 0x00000000, 0x00000000},    /* "Rsc", r0-r12 and lp_count */ \
+  {0x9fffffff, 0xc0000000, 0x00000000, 0x00000000, 0x00000000},      /* 'r', r0-r28, blink, ap and pcl */	\
+  /* Include ap / pcl in WRITABLE_CORE_REGS for sake of symmetry.  As these \
+     registers are fixed, it does not affect the literal meaning of the \
+     constraints, but it makes it a superset of GENERAL_REGS, thus \
+     enabling some operations that would otherwise not be possible.  */ \
+  {0xffffffff, 0xd0000000, 0x00000000, 0x00000000, 0x00000000},      /* 'w', r0-r31, r60 */ \
+  {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000},      /* 'c', r0-r60, ap, pcl */ \
+  {0xffffffff, 0xdfffffff, 0x00000000, 0x00000000, 0x00000000},      /* 'Rac', r0-r60, ap, pcl */ \
+  {0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0003ffff}       /* All Registers */		\
+}
+
+/* local macros to mark the first and last regs of different classes */
+#define ARC_FIRST_SIMD_VR_REG              64
+#define ARC_LAST_SIMD_VR_REG               127
+
+#define ARC_FIRST_SIMD_DMA_CONFIG_REG      128
+#define ARC_FIRST_SIMD_DMA_CONFIG_IN_REG   128
+#define ARC_FIRST_SIMD_DMA_CONFIG_OUT_REG  136
+#define ARC_LAST_SIMD_DMA_CONFIG_REG       143
 
 /* The same information, inverted:
    Return the class number of the smallest class containing
    reg number REGNO.  This could be a conditional expression
    or could index an array.  */
-extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
-#define REGNO_REG_CLASS(REGNO) \
-(arc_regno_reg_class[REGNO])
 
-/* The class value for index registers, and the one for base regs.  */
-#define INDEX_REG_CLASS GENERAL_REGS
-#define BASE_REG_CLASS GENERAL_REGS
+extern enum reg_class arc_regno_reg_class[];
+
+#define REGNO_REG_CLASS(REGNO) (arc_regno_reg_class[REGNO])
+
+/* The class value for valid index registers. An index register is 
+   one used in an address where its value is either multiplied by 
+   a scale factor or added to another register (as well as added to a
+   displacement).  */
+
+#define INDEX_REG_CLASS (TARGET_MIXED_CODE ? ARCOMPACT16_REGS : GENERAL_REGS)
 
-/* Get reg_class from a letter such as appears in the machine description.  */
-#define REG_CLASS_FROM_LETTER(C) \
-((C) == 'l' ? LPCOUNT_REG /* ??? needed? */ \
- : NO_REGS)
+/* The class value for valid base registers. A base register is one used in
+   an address which is the register value plus a displacement.  */
+
+#define BASE_REG_CLASS (TARGET_MIXED_CODE ? AC16_BASE_REGS : GENERAL_REGS)
 
 /* These assume that REGNO is a hard or pseudo reg number.
    They give nonzero only if REGNO is a hard reg of the suitable class
@@ -387,77 +781,57 @@ extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
    Since they use reg_renumber, they are safe only once reg_renumber
    has been allocated, which happens in local-alloc.c.  */
 #define REGNO_OK_FOR_BASE_P(REGNO) \
-((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32)
-#define REGNO_OK_FOR_INDEX_P(REGNO) \
-((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32)
+((REGNO) < 29 || ((REGNO) == ARG_POINTER_REGNUM) || ((REGNO) == 63) ||\
+ (unsigned) reg_renumber[REGNO] < 29)
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) REGNO_OK_FOR_BASE_P(REGNO)
 
 /* Given an rtx X being reloaded into a reg required to be
    in class CLASS, return the class of reg to actually use.
    In general this is just CLASS; but on some machines
    in some cases it is preferable to use a more restrictive class.  */
-#define PREFERRED_RELOAD_CLASS(X,CLASS) \
-(CLASS)
+
+#define PREFERRED_RELOAD_CLASS(X, CLASS) \
+  (((CLASS) == CHEAP_CORE_REGS  || (CLASS) == WRITABLE_CORE_REGS) \
+   ? GENERAL_REGS \
+   : (CLASS))
 
 /* Return the maximum number of consecutive registers
    needed to represent mode MODE in a register of class CLASS.  */
-#define CLASS_MAX_NREGS(CLASS, MODE) \
-((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
 
-/* The letters I, J, K, L, M, N, O, P in a register constraint string
-   can be used to stand for particular ranges of immediate operands.
-   This macro defines what the ranges are.
-   C is the letter, and VALUE is a constant value.
-   Return 1 if VALUE is in the range specified by C.  */
-/* 'I' is used for short immediates (always signed).
-   'J' is used for long immediates.
-   'K' is used for any constant up to 64 bits (for 64x32 situations?).  */
+#define CLASS_MAX_NREGS(CLASS, MODE) \
+(( GET_MODE_SIZE (MODE) == 16 && CLASS == SIMD_VR_REGS) ? 1: \
+((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
 
 /* local to this file */
 #define SMALL_INT(X) ((unsigned) ((X) + 0x100) < 0x200)
+#define SMALL_INT_RANGE(X, OFFSET, SHIFT) \
+  ((unsigned) (((X) >> (SHIFT)) + 0x100) \
+   < 0x200 - ((unsigned) (OFFSET) >> (SHIFT)))
+#define SIGNED_INT12(X) ((unsigned) ((X) + 0x800) < 0x1000)
 /* local to this file */
 #define LARGE_INT(X) \
 ((X) >= (-(HOST_WIDE_INT) 0x7fffffff - 1) \
- && (unsigned HOST_WIDE_INT)(X) <= (unsigned HOST_WIDE_INT) 0xffffffff)
-
-#define CONST_OK_FOR_LETTER_P(VALUE, C) \
-((C) == 'I' ? SMALL_INT (VALUE)		\
- : (C) == 'J' ? LARGE_INT (VALUE)	\
- : (C) == 'K' ? 1			\
- : 0)
-
-/* Similar, but for floating constants, and defining letters G and H.
-   Here VALUE is the CONST_DOUBLE rtx itself.  */
-/* 'G' is used for integer values for the multiplication insns where the
-   operands are extended from 4 bytes to 8 bytes.
-   'H' is used when any 64-bit constant is allowed.  */
-#define CONST_DOUBLE_OK_FOR_LETTER_P(VALUE, C) \
-((C) == 'G' ? arc_double_limm_p (VALUE) \
- : (C) == 'H' ? 1 \
- : 0)
-
-/* A C expression that defines the optional machine-dependent constraint
-   letters that can be used to segregate specific types of operands,
-   usually memory references, for the target machine.  It should return 1 if
-   VALUE corresponds to the operand type represented by the constraint letter
-   C.  If C is not defined as an extra constraint, the value returned should
-   be 0 regardless of VALUE.  */
-/* ??? This currently isn't used.  Waiting for PIC.  */
-#if 0
-#define EXTRA_CONSTRAINT(VALUE, C) \
-((C) == 'R' ? (SYMBOL_REF_FUNCTION_P (VALUE) || GET_CODE (VALUE) == LABEL_REF) \
- : 0)
-#endif
-
+ && (unsigned HOST_WIDE_INT) (X) <= (unsigned HOST_WIDE_INT) 0xffffffff)
+#define UNSIGNED_INT3(X) ((unsigned) (X) < 0x8)
+#define UNSIGNED_INT5(X) ((unsigned) (X) < 0x20)
+#define UNSIGNED_INT6(X) ((unsigned) (X) < 0x40)
+#define UNSIGNED_INT7(X) ((unsigned) (X) < 0x80)
+#define UNSIGNED_INT8(X) ((unsigned) (X) < 0x100)
+#define IS_ONE(X) ((X) == 1)
+#define IS_ZERO(X) ((X) == 0)
+
 /* Stack layout and stack pointer usage.  */
 
 /* Define this macro if pushing a word onto the stack moves the stack
    pointer to a smaller address.  */
 #define STACK_GROWS_DOWNWARD
 
-/* Define this to nonzero if the nominal address of the stack frame
+/* Define this if the nominal address of the stack frame
    is at the high-address end of the local variables;
    that is, each additional local variable allocated
    goes at a more negative offset in the frame.  */
+/* ashwin : need to give some value to this macro */
 #define FRAME_GROWS_DOWNWARD 1
 
 /* Offset within stack frame to start allocating local variables at.
@@ -468,12 +842,17 @@ extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
 
 /* Offset from the stack pointer register to the first location at which
    outgoing arguments are placed.  */
-#define STACK_POINTER_OFFSET FIRST_PARM_OFFSET (0)
+#define STACK_POINTER_OFFSET (TARGET_A4 ? 16 : 0)
 
 /* Offset of first parameter from the argument pointer register value.  */
-/* 4 bytes for each of previous fp, return address, and previous gp.
-   4 byte reserved area for future considerations.  */
-#define FIRST_PARM_OFFSET(FNDECL) 16
+/* For ARCtangent-A4:
+    first_parm_offset = fp_size (4 bytes) + return_addr_size (4 bytes) +
+                        static_link_reg_size (4 bytes) +
+                        reserved_area_size (4 bytes);
+   For ARCompact:
+    first_parm_offset = 0;
+*/
+#define FIRST_PARM_OFFSET(FNDECL) (TARGET_A4 ? 16 : 0)
 
 /* A C expression whose value is RTL representing the address in a
    stack frame where the pointer to the caller's frame is stored.
@@ -493,15 +872,9 @@ extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
    is defined.  */
 /* The current return address is in r31.  The return address of anything
    farther back is at [%fp,4].  */
-#if 0 /* The default value should work.  */
+
 #define RETURN_ADDR_RTX(COUNT, FRAME) \
-(((COUNT) == -1)							\
- ? gen_rtx_REG (Pmode, 31)						\
- : copy_to_reg (gen_rtx_MEM (Pmode,					\
-			     memory_address (Pmode,			\
-					     plus_constant ((FRAME),	\
-							    UNITS_PER_WORD)))))
-#endif
+arc_return_addr_rtx(COUNT,FRAME)
 
 /* Register to use for pushing function arguments.  */
 #define STACK_POINTER_REGNUM 28
@@ -509,12 +882,16 @@ extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
 /* Base register for access to local variables of the function.  */
 #define FRAME_POINTER_REGNUM 27
 
-/* Base register for access to arguments of the function.  */
-#define ARG_POINTER_REGNUM FRAME_POINTER_REGNUM
+/* Base register for access to arguments of the function. This register
+   will be eliminated into either fp or sp. */
+#define ARG_POINTER_REGNUM 62
+
+#define RETURN_ADDR_REGNUM 31
 
+/* TODO - check usage of STATIC_CHAIN_REGNUM with a testcase */
 /* Register in which static-chain is passed to a function.  This must
    not be a register used by the prologue.  */
-#define STATIC_CHAIN_REGNUM 24
+#define STATIC_CHAIN_REGNUM  11
 
 /* A C expression which is nonzero if a function must have and use a
    frame pointer.  This expression is evaluated in the reload pass.
@@ -522,11 +899,7 @@ extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
 #define FRAME_POINTER_REQUIRED \
 (cfun->calls_alloca)
 
-/* C statement to store the difference between the frame pointer
-   and the stack pointer values immediately after the function prologue.  */
-#define INITIAL_FRAME_POINTER_OFFSET(VAR) \
-((VAR) = arc_compute_frame_size (get_frame_size ()))
-
+
 /* Function argument passing.  */
 
 /* If defined, the maximum amount of space required for outgoing
@@ -554,7 +927,7 @@ extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
 /* Initialize a variable CUM of type CUMULATIVE_ARGS
    for a call to a function whose data type is FNTYPE.
    For a library call, FNTYPE is 0.  */
-#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS) \
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS) \
 ((CUM) = 0)
 
 /* The number of registers used for parameter passing.  Local to this file.  */
@@ -576,22 +949,45 @@ extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
  : ROUND_ADVANCE (GET_MODE_SIZE (MODE)))
 
 /* Round CUM up to the necessary point for argument MODE/TYPE.  */
+/* N.B. Vectors have alignment exceeding BIGGEST_ALIGNMENT.
+   FUNCTION_ARG_BOUNDARY reduces this to no more than 32 bit.  */
 #define ROUND_ADVANCE_CUM(CUM, MODE, TYPE) \
-((((MODE) == BLKmode ? TYPE_ALIGN (TYPE) : GET_MODE_BITSIZE (MODE)) \
-  > BITS_PER_WORD)	\
- ? (((CUM) + 1) & ~1)	\
- : (CUM))
+  ((((CUM) - 1) | (FUNCTION_ARG_BOUNDARY ((MODE), (TYPE)) - 1)/BITS_PER_WORD)\
+   + 1)
+
+/* Special characters prefixed to function names
+   in order to encode attribute like information. */
+#define SIMPLE_CALL_FLAG_CHAR   '&'
+#define SHORT_CALL_FLAG_CHAR	'!'
+#define LONG_CALL_FLAG_CHAR	'#'
+
+/* Local macros to identify the function name symbols, prefixed with one of
+   the (LONG_CALL/SHORT_CALL/)FLAG_CHAR flags */
+#define ARC_FUNCTION_NAME_PREFIX_P(ch) (((ch) == SIMPLE_CALL_FLAG_CHAR) ||   \
+				    ((ch) == LONG_CALL_FLAG_CHAR)   ||   \
+				    ((ch) == SHORT_CALL_FLAG_CHAR))
+
+/* Check if this symbol has a long_call attribute in its declaration */
+#define ARC_ENCODED_LONG_CALL_ATTR_P(SYMBOL_NAME)	\
+  (*(SYMBOL_NAME) == LONG_CALL_FLAG_CHAR)
+
+/* Check if this symbol has a short_call attribute in its declaration */
+#define ARC_ENCODED_SHORT_CALL_ATTR_P(SYMBOL_NAME)	\
+  (*(SYMBOL_NAME) == SHORT_CALL_FLAG_CHAR)
 
 /* Return boolean indicating arg of type TYPE and mode MODE will be passed in
    a reg.  This includes arguments that have to be passed by reference as the
    pointer to them is passed in a reg if one is available (and that is what
    we're given).
+   When passing arguments NAMED is always 1.  When receiving arguments NAMED
+   is 1 for each argument except the last in a stdarg/varargs function.  In
+   a stdarg function we want to treat the last named arg as named.  In a
+   varargs function we want to treat the last named arg (which is
+   `__builtin_va_alist') as unnamed.
    This macro is only used in this file.  */
 #define PASS_IN_REG_P(CUM, MODE, TYPE) \
-((CUM) < MAX_ARC_PARM_REGS						\
- && ((ROUND_ADVANCE_CUM ((CUM), (MODE), (TYPE))				\
-      + ROUND_ADVANCE_ARG ((MODE), (TYPE))				\
-      <= MAX_ARC_PARM_REGS)))
+((CUM) < MAX_ARC_PARM_REGS)
+
 
 /* Determine where to put an argument to a function.
    Value is zero to push the argument on the stack,
@@ -608,32 +1004,15 @@ extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
 /* On the ARC the first MAX_ARC_PARM_REGS args are normally in registers
    and the rest are pushed.  */
 #define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
-(PASS_IN_REG_P ((CUM), (MODE), (TYPE))					\
- ? gen_rtx_REG ((MODE), ROUND_ADVANCE_CUM ((CUM), (MODE), (TYPE)))	\
- : 0)
+    arc_function_arg (&CUM, MODE, TYPE, NAMED) 
 
 /* Update the data in CUM to advance over an argument
    of mode MODE and data type TYPE.
    (TYPE is null for libcalls where that information may not be available.)  */
 #define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \
-((CUM) = (ROUND_ADVANCE_CUM ((CUM), (MODE), (TYPE)) \
-	  + ROUND_ADVANCE_ARG ((MODE), (TYPE))))
-
-/* If defined, a C expression that gives the alignment boundary, in bits,
-   of an argument with the specified mode and type.  If it is not defined, 
-   PARM_BOUNDARY is used for all arguments.  */
-#define FUNCTION_ARG_BOUNDARY(MODE, TYPE) \
-(((TYPE) ? TYPE_ALIGN (TYPE) : GET_MODE_BITSIZE (MODE)) <= PARM_BOUNDARY \
- ? PARM_BOUNDARY \
- : 2 * PARM_BOUNDARY)
-
-/* Function results.  */
+       arc_function_arg_advance(&CUM, MODE, TYPE, NAMED)
 
-/* Define how to find the value returned by a function.
-   VALTYPE is the data type of the value (as a tree).
-   If the precise function being called is known, FUNC is its FUNCTION_DECL;
-   otherwise, FUNC is 0.  */
-#define FUNCTION_VALUE(VALTYPE, FUNC) gen_rtx_REG (TYPE_MODE (VALTYPE), 0)
+/* Function results.  */
 
 /* Define how to find the value returned by a library function
    assuming the value has mode MODE.  */
@@ -644,59 +1023,91 @@ extern enum reg_class arc_regno_reg_class[FIRST_PSEUDO_REGISTER];
 /* ??? What about r1 in DI/DF values.  */
 #define FUNCTION_VALUE_REGNO_P(N) ((N) == 0)
 
-/* Tell GCC to use TARGET_RETURN_IN_MEMORY.  */
+/* Tell GCC to use RETURN_IN_MEMORY.  */
 #define DEFAULT_PCC_STRUCT_RETURN 0
-
+
+/* Register in which address to store a structure value
+   is passed to a function, or 0 to use `invisible' first argument.  */
+#define STRUCT_VALUE 0
+
 /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
    the stack pointer does not matter.  The value is tested only in
    functions that have frame pointers.
    No definition is equivalent to always zero.  */
 #define EXIT_IGNORE_STACK 0
 
+#define EPILOGUE_USES(REGNO) \
+  ((REGNO) == arc_return_address_regs[arc_compute_function_type (cfun)])
+
 /* Epilogue delay slots.  */
 #define DELAY_SLOTS_FOR_EPILOGUE arc_delay_slots_for_epilogue ()
 
 #define ELIGIBLE_FOR_EPILOGUE_DELAY(TRIAL, SLOTS_FILLED) \
 arc_eligible_for_epilogue_delay (TRIAL, SLOTS_FILLED)
 
-/* Output assembler code to FILE to increment profiler label # LABELNO
-   for profiling a function entry.  */
-#define FUNCTION_PROFILER(FILE, LABELNO)
-
-/* Trampolines.  */
-/* ??? This doesn't work yet because GCC will use as the address of a nested
-   function the address of the trampoline.  We need to use that address
-   right shifted by 2.  It looks like we'll need PSImode after all. :-(  */
+/* Definitions for register eliminations.
 
-/* Output assembler code for a block containing the constant parts
-   of a trampoline, leaving space for the variable parts.  */
-/* On the ARC, the trampoline is quite simple as we have 32-bit immediate
-   constants.
+   This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.
 
-	mov r24,STATIC
-	j.nd FUNCTION
+   We have two registers that can be eliminated on the ARC.  First, the
+   argument pointer register can always be eliminated in favor of the stack
+   pointer register or frame pointer register.  Secondly, the frame pointer
+   register can often be eliminated in favor of the stack pointer register.
 */
-#define TRAMPOLINE_TEMPLATE(FILE) \
-do { \
-  assemble_aligned_integer (UNITS_PER_WORD, GEN_INT (0x631f7c00)); \
-  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); \
-  assemble_aligned_integer (UNITS_PER_WORD, GEN_INT (0x381f0000)); \
-  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); \
-} while (0)
+
+#define ELIMINABLE_REGS					\
+{{ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+ {ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM},		\
+ {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM}}
+
+/* Given FROM and TO register numbers, say whether this elimination is allowed.
+   Frame pointer elimination is automatically handled.
+
+   All eliminations are permissible. If we need a frame
+   pointer, we must eliminate ARG_POINTER_REGNUM into
+   FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM.  */
+#define CAN_ELIMINATE(FROM, TO)         \
+  (!((FROM) == FRAME_POINTER_REGNUM && FRAME_POINTER_REQUIRED))
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+extern int arc_initial_elimination_offset(int from, int to);
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)                    \
+  (OFFSET) = arc_initial_elimination_offset ((FROM), (TO))
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.
+   We actually emit the profiler code at the call site, so leave this one
+   empty.  */
+#define FUNCTION_PROFILER(FILE, LABELNO)
+#define NO_PROFILE_COUNTERS  1
+
+/* Trampolines.  */
 
 /* Length in units of the trampoline for entering a nested function.  */
-#define TRAMPOLINE_SIZE 16
+#define TRAMPOLINE_SIZE 20
+
+/* Alignment required for a trampoline in bits .  */
+/* For actual data alignment we just need 32, no more than the stack;
+   however, to reduce cache coherency issues, we want to make sure that
+   trampoline instructions always appear the same in any given cache line.  */
+#define TRAMPOLINE_ALIGNMENT 256
 
 /* Emit RTL insns to initialize the variable parts of a trampoline.
    FNADDR is an RTX for the address of the function's pure code.
    CXT is an RTX for the static chain value for the function.  */
 #define INITIALIZE_TRAMPOLINE(TRAMP, FNADDR, CXT) \
-do { \
-  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (TRAMP, 4)), CXT); \
-  emit_move_insn (gen_rtx_MEM (SImode, plus_constant (TRAMP, 12)), FNADDR); \
-  emit_insn (gen_flush_icache (validize_mem (gen_rtx_MEM (SImode, TRAMP)))); \
-} while (0)
-
+  arc_initialize_trampoline (TRAMP, FNADDR, CXT)
+
+/* Allow the profiler to easily distinguish trampolines from normal
+  functions.  */
+#define TRAMPOLINE_ADJUST_ADDRESS(addr) ((addr) = plus_constant ((addr), 2))
+
+/* Library calls.  */
+
 /* Addressing modes, and classification of registers for them.  */
 
 /* Maximum number of registers that can appear in a valid memory address.  */
@@ -706,17 +1117,29 @@ do { \
 /* We have pre inc/dec (load/store with update).  */
 #define HAVE_PRE_INCREMENT 1
 #define HAVE_PRE_DECREMENT 1
+#define HAVE_POST_INCREMENT 1
+#define HAVE_POST_DECREMENT 1
+#define HAVE_PRE_MODIFY_DISP 1
+#define HAVE_POST_MODIFY_DISP 1
+#define HAVE_PRE_MODIFY_REG 1
+#define HAVE_POST_MODIFY_REG 1
+/* ??? should also do PRE_MODIFY_REG / POST_MODIFY_REG, but that requires
+   a special predicate for the memory operand of stores, like for the SH.  */
 
 /* Recognize any constant value that is a valid address.  */
 #define CONSTANT_ADDRESS_P(X) \
+(flag_pic?arc_legitimate_pic_addr_p (X): \
 (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF	\
- || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST)
+ || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST))
 
 /* Nonzero if the constant value X is a legitimate general operand.
    We can handle any 32- or 64-bit constant.  */
 /* "1" should work since the largest constant should be a 64 bit critter.  */
 /* ??? Not sure what to do for 64x32 compiler.  */
-#define LEGITIMATE_CONSTANT_P(X) 1
+#define LEGITIMATE_CONSTANT_P(X) (arc_legitimate_constant_p (X))
+
+/* Is the argument a const_int rtx, containing an exact power of 2 */
+#define  IS_POWEROF2_P(X) (! ( (X) & ((X) - 1)) && (X))
 
 /* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
    and check its validity for a certain class.
@@ -736,14 +1159,19 @@ do { \
 /* Nonzero if X is a hard reg that can be used as an index
    or if it is a pseudo reg.  */
 #define REG_OK_FOR_INDEX_P(X) \
-((unsigned) REGNO (X) - 32 >= FIRST_PSEUDO_REGISTER - 32)
+((unsigned) REGNO (X) >= FIRST_PSEUDO_REGISTER || \
+ (unsigned) REGNO (X) < 29 || \
+ (unsigned) REGNO (X) == 63 || \
+ (unsigned) REGNO (X) == ARG_POINTER_REGNUM)
 /* Nonzero if X is a hard reg that can be used as a base reg
    or if it is a pseudo reg.  */
 #define REG_OK_FOR_BASE_P(X) \
-((unsigned) REGNO (X) - 32 >= FIRST_PSEUDO_REGISTER - 32)
+((unsigned) REGNO (X) >= FIRST_PSEUDO_REGISTER || \
+ (unsigned) REGNO (X) < 29 || \
+ (unsigned) REGNO (X) == 63 || \
+ (unsigned) REGNO (X) == ARG_POINTER_REGNUM)
 
 #else
-
 /* Nonzero if X is a hard reg that can be used as an index.  */
 #define REG_OK_FOR_INDEX_P(X) REGNO_OK_FOR_INDEX_P (REGNO (X))
 /* Nonzero if X is a hard reg that can be used as a base reg.  */
@@ -766,54 +1194,216 @@ do { \
 
 /* local to this file */
 #define RTX_OK_FOR_INDEX_P(X) \
-(0 && /*???*/ REG_P (X) && REG_OK_FOR_INDEX_P (X))
+(REG_P (X) && REG_OK_FOR_INDEX_P (X))
 
 /* local to this file */
 /* ??? Loads can handle any constant, stores can only handle small ones.  */
-#define RTX_OK_FOR_OFFSET_P(X) \
-(GET_CODE (X) == CONST_INT && SMALL_INT (INTVAL (X)))
-
-#define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X) \
-(GET_CODE (X) == PLUS				\
- && RTX_OK_FOR_BASE_P (XEXP (X, 0))		\
- && (RTX_OK_FOR_INDEX_P (XEXP (X, 1))		\
-     || RTX_OK_FOR_OFFSET_P (XEXP (X, 1))))
-
-#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR)		\
-{ if (RTX_OK_FOR_BASE_P (X))				\
-    goto ADDR;						\
-  if (LEGITIMATE_OFFSET_ADDRESS_P ((MODE), (X)))	\
-    goto ADDR;						\
-  if (GET_CODE (X) == CONST_INT && LARGE_INT (INTVAL (X))) \
-    goto ADDR;						\
-  if (GET_CODE (X) == SYMBOL_REF			\
-	   || GET_CODE (X) == LABEL_REF			\
-	   || GET_CODE (X) == CONST)			\
-    goto ADDR;						\
-  if ((GET_CODE (X) == PRE_DEC || GET_CODE (X) == PRE_INC) \
-      /* We're restricted here by the `st' insn.  */	\
-      && RTX_OK_FOR_BASE_P (XEXP ((X), 0)))		\
-    goto ADDR;						\
+/* OTOH, LIMMs cost extra, so their usefulness is limited.  */
+#define RTX_OK_FOR_OFFSET_P(MODE, X) \
+(GET_CODE (X) == CONST_INT           \
+ && SMALL_INT_RANGE (INTVAL (X), (GET_MODE_SIZE (MODE) - 1) & -4, \
+		     (INTVAL (X) & (GET_MODE_SIZE (MODE) - 1) & 3 \
+		      ? 0 \
+		      : -(-GET_MODE_SIZE (MODE) | -4) >> 1)))
+
+#define LEGITIMATE_OFFSET_ADDRESS_P(MODE, X, INDEX) \
+(GET_CODE (X) == PLUS			     \
+  && RTX_OK_FOR_BASE_P (XEXP (X, 0))         \
+  && ((INDEX && RTX_OK_FOR_INDEX_P (XEXP (X, 1)) \
+       && GET_MODE_SIZE ((MODE)) <= 4) \
+      || RTX_OK_FOR_OFFSET_P (MODE, XEXP (X, 1))))
+    
+#define LEGITIMATE_SCALED_ADDRESS_P(MODE, X) \
+(GET_CODE (X) == PLUS \
+ && GET_CODE (XEXP (X, 0)) == MULT \
+ && RTX_OK_FOR_INDEX_P (XEXP (XEXP (X, 0), 0)) \
+ && GET_CODE (XEXP (XEXP (X, 0), 1)) == CONST_INT \
+ && ((GET_MODE_SIZE (MODE) == 2 && INTVAL (XEXP (XEXP (X, 0), 1)) == 2) \
+     || (GET_MODE_SIZE (MODE) == 4 && INTVAL (XEXP (XEXP (X, 0), 1)) == 4)) \
+ && (RTX_OK_FOR_BASE_P (XEXP (X, 1)) \
+     || (flag_pic ? CONST_INT_P (XEXP (X, 1)) : CONSTANT_P (XEXP (X, 1)))))
+
+#define LEGITIMATE_SMALL_DATA_ADDRESS_P(X) \
+(GET_CODE (X) == PLUS			     \
+ && (REG_P (XEXP(X,0)) && REGNO (XEXP (X,0)) == 26)           \
+&& ((GET_CODE (XEXP(X,1)) == SYMBOL_REF \
+ && SYMBOL_REF_SMALL_P (XEXP (X,1)))  ||\
+ (GET_CODE (XEXP (X,1)) == CONST && \
+  GET_CODE (XEXP(XEXP(X,1),0)) == PLUS && \
+  GET_CODE (XEXP(XEXP(XEXP(X,1),0),0)) == SYMBOL_REF \
+  && SYMBOL_REF_SMALL_P (XEXP(XEXP (XEXP(X,1),0),0)) \
+  && GET_CODE (XEXP(XEXP (XEXP(X,1),0), 1)) == CONST_INT)))
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, ADDR)			\
+{ if (RTX_OK_FOR_BASE_P (X))					\
+     goto ADDR;							\
+  if (LEGITIMATE_OFFSET_ADDRESS_P ((MODE), (X), TARGET_INDEXED_LOADS)) \
+     goto ADDR;							\
+  if (LEGITIMATE_SCALED_ADDRESS_P ((MODE), (X)))		\
+    goto ADDR;							\
+  if (LEGITIMATE_SMALL_DATA_ADDRESS_P (X))			\
+     goto ADDR;							\
+  if (GET_CODE (X) == CONST_INT && LARGE_INT (INTVAL (X)))	\
+     goto ADDR;							\
+  if ((GET_MODE_SIZE (MODE) != 16)				\
+      && (GET_CODE (X) == SYMBOL_REF				\
+	  || GET_CODE (X) == LABEL_REF				\
+	  || GET_CODE (X) == CONST))				\
+    {								\
+      if (!flag_pic || arc_legitimate_pic_addr_p (X))		\
+	goto ADDR;						\
+    }								\
+  if ((GET_CODE (X) == PRE_DEC || GET_CODE (X) == PRE_INC	\
+       || GET_CODE (X) == POST_DEC || GET_CODE (X) == POST_INC)	\
+      && RTX_OK_FOR_BASE_P (XEXP ((X), 0)))			\
+    goto ADDR;							\
+      /* We're restricted here by the `st' insn.  */		\
+  if ((GET_CODE (X) == PRE_MODIFY || GET_CODE (X) == POST_MODIFY) \
+      && GET_CODE (XEXP ((X), 1)) == PLUS			\
+      && rtx_equal_p (XEXP ((X), 0), XEXP (XEXP ((X), 1), 0))	\
+      && LEGITIMATE_OFFSET_ADDRESS_P (QImode, XEXP ((X), 1),	\
+				      TARGET_AUTO_MODIFY_REG))	\
+    goto ADDR;							\
+}
+
+#define SYMBOLIC_CONST(X)	\
+(GET_CODE (X) == SYMBOL_REF						\
+ || GET_CODE (X) == LABEL_REF						\
+ || (GET_CODE (X) == CONST && symbolic_reference_mentioned_p (X)))
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.
+   This macro is used in only one place: `memory_address' in explow.c.
+
+   OLDX is the address as it was before break_out_memory_refs was called.
+   In some cases it is useful to look at this to decide what needs to be done.
+
+   MODE and WIN are passed so that this macro can use
+   GO_IF_LEGITIMATE_ADDRESS.
+
+   It is always safe for this macro to do nothing.  It exists to recognize
+   opportunities to optimize the output.  */
+#define LEGITIMIZE_ADDRESS(X, OLDX, MODE, WIN)				\
+{									\
+  rtx arc_la_new = arc_legitimize_address ((X), (OLDX), (MODE));	\
+									\
+  if (arc_la_new)							\
+    {									\
+      (X) = arc_la_new;							\
+      goto WIN;								\
+    }									\
+}
+
+/* A C compound statement that attempts to replace X, which is an address
+   that needs reloading, with a valid memory address for an operand of
+   mode MODE.  WIN is a C statement label elsewhere in the code.
+
+   We try to get a normal form
+   of the address.  That will allow inheritance of the address reloads.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X,MODE,OPNUM,TYPE,IND_LEVELS,WIN)	\
+{									\
+  if (GET_CODE (X) == PLUS						\
+      && CONST_INT_P (XEXP (X, 1))					\
+      && (RTX_OK_FOR_BASE_P (XEXP (X, 0))				\
+	  || (REG_P (XEXP (X, 0))					\
+	      && reg_equiv_constant[REGNO (XEXP (X, 0))])))		\
+    {									\
+      int scale = GET_MODE_SIZE (MODE);					\
+      int shift;							\
+      rtx index_rtx = XEXP (X, 1);					\
+      HOST_WIDE_INT offset = INTVAL (index_rtx), offset_base;		\
+      rtx reg, sum, sum2;						\
+									\
+      if (scale > 4)							\
+	scale = 4;							\
+      if ((scale-1) & offset)						\
+	scale = 1;							\
+      shift = scale >> 1;						\
+      offset_base = (offset + (256 << shift)) & (-512 << shift);	\
+      /* Sometimes the normal form does not suit DImode.  We		\
+	 could avoid that by using smaller ranges, but that		\
+	 would give less optimized code when SImode is			\
+	 prevalent.  */							\
+      if (GET_MODE_SIZE (MODE) + offset - offset_base <= (256 << shift))\
+	{								\
+	  reg = XEXP (X, 0);						\
+	  int regno = REGNO (reg);					\
+	  sum2 = sum = plus_constant (reg, offset_base);		\
+									\
+	  if (reg_equiv_constant[regno])				\
+	    {								\
+	      sum2 = plus_constant (reg_equiv_constant[regno],		\
+				    offset_base);			\
+	      if (GET_CODE (sum2) == PLUS)				\
+		sum2 = gen_rtx_CONST (Pmode, sum2);			\
+	    }								\
+	  X = gen_rtx_PLUS (Pmode, sum, GEN_INT (offset - offset_base));\
+	  push_reload (sum2, NULL_RTX, &XEXP (X, 0), NULL,		\
+		       BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM),	\
+		       (TYPE));						\
+	  goto WIN;							\
+	}								\
+    }									\
+  /* We must re-recognize what we created before.  */			\
+  else if (GET_CODE (X) == PLUS						\
+	   && GET_CODE (XEXP (X, 0)) == PLUS				\
+	   && CONST_INT_P (XEXP (XEXP (X, 0), 1))			\
+	   && REG_P  (XEXP (XEXP (X, 0), 0))				\
+	   && CONST_INT_P (XEXP (X, 1)))				\
+    {									\
+      /* Because this address is so complex, we know it must have	\
+	 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,	\
+	 it is already unshared, and needs no further unsharing.  */	\
+      push_reload (XEXP ((X), 0), NULL_RTX, &XEXP ((X), 0), NULL,	\
+		   BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, (OPNUM), (TYPE));\
+      goto WIN;								\
+    }									\
 }
 
+/* Reading lp_count for anything but the lp instruction is very slow on the
+   ARC700.  */
+#define DONT_REALLOC(REGNO,MODE) \
+  (TARGET_ARC700 && (REGNO) == 60)
+
 /* Go to LABEL if ADDR (a legitimate address expression)
    has an effect that depends on the machine mode it is used for.  */
-#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL)
-
+#define GO_IF_MODE_DEPENDENT_ADDRESS(ADDR, LABEL) \
+{ \
+  /* ??? Was this needed for some reload issue?  Try without, in time remove\
+     or re-enable with comment stating why.  */\
+  /*if ((GET_CODE (ADDR) == POST_MODIFY || GET_CODE (ADDR) == POST_MODIFY) \
+      && !CONST_INT_P (XEXP (XEXP ((ADDR), 1), 1))) \
+    goto LABEL; */\
+  /* SYMBOL_REF is not mode dependent: it is either a small data reference, \
+     which is valid for loads and stores, or a limm offset, which is valid for \
+     loads.  */ \
+  /* Scaled indices are scaled by the access mode; likewise for scaled \
+     offsets, which are needed for maximum offset stores.  */ \
+  if (GET_CODE (ADDR) == PLUS		\
+      && (GET_CODE (XEXP ((ADDR), 0)) == MULT \
+	  || (CONST_INT_P (XEXP ((ADDR), 1)) \
+	      && !SMALL_INT (INTVAL (XEXP ((ADDR), 1)))))) \
+    goto LABEL;				\
+}
+
+
 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
    return the mode to be used for the comparison.  */
+/*extern enum machine_mode arc_select_cc_mode ();*/
 #define SELECT_CC_MODE(OP, X, Y) \
 arc_select_cc_mode (OP, X, Y)
 
-/* Return nonzero if SELECT_CC_MODE will never return MODE for a
+/* Return non-zero if SELECT_CC_MODE will never return MODE for a
    floating point inequality comparison.  */
 #define REVERSIBLE_CC_MODE(MODE) 1 /*???*/
-
+
 /* Costs.  */
 
 /* Compute extra cost of moving data between one register class
    and another.  */
-#define REGISTER_MOVE_COST(MODE, CLASS1, CLASS2) 2
+#define REGISTER_MOVE_COST(MODE, CLASS, TO_CLASS) \
+   arc_register_move_cost ((MODE), (CLASS), (TO_CLASS))
 
 /* Compute the cost of moving data between registers and memory.  */
 /* Memory is 3 times as expensive as registers.
@@ -830,13 +1420,13 @@ arc_select_cc_mode (OP, X, Y)
    For RISC chips, it means that access to memory by bytes is no
    better than access by words when possible, so grab a whole word
    and maybe make use of that.  */
-#define SLOW_BYTE_ACCESS 1
+#define SLOW_BYTE_ACCESS  0
 
 /* Define this macro if it is as good or better to call a constant
    function address than to call an address kept in a register.  */
 /* On the ARC, calling through registers is slow.  */
 #define NO_FUNCTION_CSE
-
+
 /* Section selection.  */
 /* WARNING: These section names also appear in dwarfout.c.  */
 
@@ -848,34 +1438,42 @@ arc_select_cc_mode (OP, X, Y)
 #define ARC_DEFAULT_DATA_SECTION	".data"
 #define ARC_DEFAULT_RODATA_SECTION	".rodata"
 
-extern const char *arc_text_section, *arc_data_section, *arc_rodata_section;
+extern const char *arc_text_section,*arc_data_section,*arc_rodata_section;
 
 /* initfini.c uses this in an asm.  */
-#if defined (CRT_INIT) || defined (CRT_FINI)
+#if defined (CRT_INIT) || defined (CRT_FINI) || defined (CRT_BEGIN) || defined (CRT_END)
 #define TEXT_SECTION_ASM_OP	"\t.section .text"
 #else
-#define TEXT_SECTION_ASM_OP	arc_text_section
+#define TEXT_SECTION_ASM_OP	arc_text_section /*"\t.section .text"*/
 #endif
-#define DATA_SECTION_ASM_OP	arc_data_section
+#define DATA_SECTION_ASM_OP	arc_data_section /*"\t.section .data"*/
 
 #undef  READONLY_DATA_SECTION_ASM_OP
-#define READONLY_DATA_SECTION_ASM_OP	arc_rodata_section
+#define READONLY_DATA_SECTION_ASM_OP	arc_rodata_section  /*"\t.section .rodata"*/
 
 #define BSS_SECTION_ASM_OP	"\t.section .bss"
+#define SDATA_SECTION_ASM_OP	"\t.section .sdata"
+#define SBSS_SECTION_ASM_OP	"\t.section .sbss"
+
+/* Expression whose value is a string, including spacing, containing the 
+   assembler operation to identify the following data as initialization/termination
+   code. If not defined, GCC will assume such a section does not exist. */
+#define INIT_SECTION_ASM_OP "\t.section\t.init"
+#define FINI_SECTION_ASM_OP "\t.section\t.fini"
 
 /* Define this macro if jump tables (for tablejump insns) should be
    output in the text section, along with the assembler instructions.
    Otherwise, the readonly data section is used.
    This macro is irrelevant if there is no separate readonly data section.  */
-/*#define JUMP_TABLES_IN_TEXT_SECTION*/
+#define JUMP_TABLES_IN_TEXT_SECTION  (flag_pic || CASE_VECTOR_PC_RELATIVE)
 
 /* For DWARF.  Marginally different than default so output is "prettier"
    (and consistent with above).  */
-#define PUSHSECTION_ASM_OP "\t.section "
+#define PUSHSECTION_FORMAT "\t%s %s\n"
 
 /* Tell crtstuff.c we're using ELF.  */
 #define OBJECT_FORMAT_ELF
-
+
 /* PIC */
 
 /* The register number of the register used to address a table of static
@@ -885,7 +1483,7 @@ extern const char *arc_text_section, *arc_data_section, *arc_rodata_section;
    pointer and frame pointer registers.  If this macro is not defined, it
    is up to the machine-dependent files to allocate such a register (if
    necessary).  */
-#define PIC_OFFSET_TABLE_REGNUM  (flag_pic ? 26 : INVALID_REGNUM)
+#define PIC_OFFSET_TABLE_REGNUM 26
 
 /* Define this macro if the register defined by PIC_OFFSET_TABLE_REGNUM is
    clobbered by calls.  Do not define this macro if PIC_OFFSET_TABLE_REGNUM
@@ -900,10 +1498,14 @@ extern const char *arc_text_section, *arc_data_section, *arc_rodata_section;
    check it either.  You need not define this macro if all constants
    (including SYMBOL_REF) can be immediate operands when generating
    position independent code.  */
-/*#define LEGITIMATE_PIC_OPERAND_P(X)*/
-
+#define LEGITIMATE_PIC_OPERAND_P(X)  (arc_legitimate_pic_operand_p(X))
+
 /* Control the assembler format that we output.  */
 
+/* Output at beginning of assembler file.  */
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START(FILE) arc_asm_file_start (FILE)
+
 /* A C string constant describing how to begin a comment in the target
    assembler language.  The compiler assumes that the comment will
    end at the end of the line.  */
@@ -920,44 +1522,162 @@ extern const char *arc_text_section, *arc_data_section, *arc_rodata_section;
 /* Globalizing directive for a label.  */
 #define GLOBAL_ASM_OP "\t.global\t"
 
+/* This is how to output an assembler line defining a `char' constant.  */
+#define ASM_OUTPUT_CHAR(FILE, VALUE) \
+( fprintf (FILE, "\t.byte\t"),			\
+  output_addr_const (FILE, (VALUE)),		\
+  fprintf (FILE, "\n"))
+
+/* This is how to output an assembler line defining a `short' constant.  */
+#define ASM_OUTPUT_SHORT(FILE, VALUE) \
+( fprintf (FILE, "\t.hword\t"),			\
+  output_addr_const (FILE, (VALUE)),		\
+  fprintf (FILE, "\n"))
+
+/* This is how to output an assembler line defining an `int' constant.
+   We also handle symbol output here.  Code addresses must be right shifted
+   by 2 because that's how the jump instruction wants them.
+   We take care to not generate %st for post A4 cores. */
+#define ASM_OUTPUT_INT(FILE, VALUE) \
+do {									\
+  fprintf (FILE, "\t.word\t");						\
+  if (TARGET_A4 && (GET_CODE (VALUE) == SYMBOL_REF                      \
+         && ARC_FUNCTION_NAME_PREFIX_P(* (XSTR (VALUE, 0))))                \
+      || GET_CODE (VALUE) == LABEL_REF)					\
+    {									\
+      fprintf (FILE, "%%st(@");						\
+      output_addr_const (FILE, (VALUE));				\
+      fprintf (FILE, ")");						\
+    }									\
+  else									\
+    output_addr_const (FILE, (VALUE));					\
+  fprintf (FILE, "\n");					                \
+} while (0)
+
+/* This is how to output an assembler line defining a `float' constant.  */
+#define ASM_OUTPUT_FLOAT(FILE, VALUE) \
+{							\
+  long t;						\
+  char str[30];						\
+  REAL_VALUE_TO_TARGET_SINGLE ((VALUE), t);		\
+  REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", str);	\
+  fprintf (FILE, "\t.word\t0x%lx %s %s\n",		\
+	   t, ASM_COMMENT_START, str);			\
+}
+
+/* This is how to output an assembler line defining a `double' constant.  */
+#define ASM_OUTPUT_DOUBLE(FILE, VALUE) \
+{							\
+  long t[2];						\
+  char str[30];						\
+  REAL_VALUE_TO_TARGET_DOUBLE ((VALUE), t);		\
+  REAL_VALUE_TO_DECIMAL ((VALUE), "%.20e", str);	\
+  fprintf (FILE, "\t.word\t0x%lx %s %s\n\t.word\t0x%lx\n", \
+	   t[0], ASM_COMMENT_START, str, t[1]);		\
+}
+
+/* This is how to output an assembler line for a numeric constant byte.  */
+#define ASM_BYTE_OP	".byte"
+#define ASM_OUTPUT_BYTE(FILE, VALUE)  \
+  fprintf (FILE, "\t%s\t0x%x\n", ASM_BYTE_OP, (VALUE))
+
+/* This is how to output the definition of a user-level label named NAME,
+   such as the label on a static function or variable NAME.  */
+#define ASM_OUTPUT_LABEL(FILE, NAME) \
+do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0)
+
+#define ASM_NAME_P(NAME) ( NAME[(ARC_FUNCTION_NAME_PREFIX_P (NAME[0]))?1:0]=='*')
+
 /* This is how to output a reference to a user-level label named NAME.
    `assemble_name' uses this.  */
 /* We mangle all user labels to provide protection from linking code
    compiled for different cpus.  */
 /* We work around a dwarfout.c deficiency by watching for labels from it and
    not adding the '_' prefix nor the cpu suffix.  There is a comment in
-   dwarfout.c that says it should be using (*targetm.asm_out.internal_label).  */
-extern const char *arc_mangle_cpu;
-#define ASM_OUTPUT_LABELREF(FILE, NAME) \
+   dwarfout.c that says it should be using ASM_OUTPUT_INTERNAL_LABEL.  */
+#define ASM_OUTPUT_LABELREF(FILE, NAME1) \
 do {							\
+  const char *NAME;                                     \
+  NAME=(*targetm.strip_name_encoding)(NAME1);           \
   if ((NAME)[0] == '.' && (NAME)[1] == 'L')		\
-    fprintf (FILE, "%s", NAME);				\
+         fprintf (FILE, "%s", NAME);			\
   else							\
     {							\
-      fputc ('_', FILE);				\
-      if (TARGET_MANGLE_CPU && arc_mangle_cpu != NULL)	\
-	fprintf (FILE, "%s_", arc_mangle_cpu);		\
+      if (!ASM_NAME_P (NAME1))                           \
+      fprintf (FILE, "%s", user_label_prefix);		\
       fprintf (FILE, "%s", NAME);			\
     }							\
 } while (0)
 
+/* This is how to output a reference to a symbol_ref / label_ref as
+   (part of) an operand.  To disambiguate from register names like
+   a1 / a2 / status etc, symbols are preceded by '@'.  */
+#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM) \
+  ASM_OUTPUT_LABEL_REF ((FILE), XSTR ((SYM), 0))
+#define ASM_OUTPUT_LABEL_REF(FILE,STR)			\
+  do							\
+    {							\
+      fputc ('@', file);				\
+      assemble_name ((FILE), (STR));			\
+    }							\
+  while (0)
+
+/* Store in OUTPUT a string (made with alloca) containing
+   an assembler-name for a local static variable named NAME.
+   LABELNO is an integer which is different for each call.  */
+#define ASM_FORMAT_PRIVATE_NAME(OUTPUT, NAME, LABELNO) \
+( (OUTPUT) = (char *) alloca (strlen ((NAME)) + 10),	\
+  sprintf ((OUTPUT), "%s.%d", (NAME), (LABELNO)))
+
+/* The following macro defines the format used to output the second
+   operand of the .type assembler directive.  Different svr4 assemblers
+   expect various different forms for this operand.  The one given here
+   is just a default.  You may need to override it in your machine-
+   specific tm.h file (depending upon the particulars of your assembler).  */
+
+#undef  TYPE_OPERAND_FMT
+#define TYPE_OPERAND_FMT	"@%s"
+
+/*  A C string containing the appropriate assembler directive to
+    specify the size of a symbol, without any arguments.  On systems
+    that use ELF, the default (in `config/elfos.h') is `"\t.size\t"';
+    on other systems, the default is not to define this macro. */
+#undef SIZE_ASM_OP
+#define SIZE_ASM_OP "\t.size\t"
+
 /* Assembler pseudo-op to equate one value with another.  */
 /* ??? This is needed because dwarfout.c provides a default definition too
-   late for defaults.h (which contains the default definition of ASM_OUTPUT_DEF
+   late for defaults.h (which contains the default definition of ASM_OTPUT_DEF
    that we use).  */
+#ifdef SET_ASM_OP
+#undef SET_ASM_OP
+#endif
 #define SET_ASM_OP "\t.set\t"
 
+extern char rname56[], rname57[], rname58[], rname59[];
 /* How to refer to registers in assembler output.
    This sequence is indexed by compiler's hard-register-number (see above).  */
-#define REGISTER_NAMES \
-{"r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",		\
- "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",		\
- "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",	\
- "r24", "r25", "r26", "fp", "sp", "ilink1", "ilink2", "blink",	\
- "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",	\
- "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",	\
- "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",	\
- "r56", "r57", "r58", "r59", "lp_count", "cc"}
+#define REGISTER_NAMES								\
+{  "r0",   "r1",   "r2",   "r3",       "r4",     "r5",     "r6",    "r7",	\
+   "r8",   "r9",  "r10",  "r11",      "r12",    "r13",    "r14",   "r15",	\
+  "r16",  "r17",  "r18",  "r19",      "r20",    "r21",    "r22",   "r23",	\
+  "r24",  "r25",   "gp",   "fp",       "sp", "ilink1", "ilink2", "blink",	\
+  "r32",  "r33",  "r34",  "r35",      "r36",    "r37",    "r38",   "r39",	\
+   "d1",   "d1",   "d2",   "d2",      "r44",    "r45",    "r46",   "r47",	\
+  "r48",  "r49",  "r50",  "r51",      "r52",    "r53",    "r54",   "r55",	\
+  rname56,rname57,rname58,rname59,"lp_count",    "cc",     "ap",   "pcl",	\
+  "vr0",  "vr1",  "vr2",  "vr3",      "vr4",    "vr5",    "vr6",   "vr7",       \
+  "vr8",  "vr9", "vr10", "vr11",     "vr12",   "vr13",   "vr14",  "vr15",	\
+ "vr16", "vr17", "vr18", "vr19",     "vr20",   "vr21",   "vr22",  "vr23",	\
+ "vr24", "vr25", "vr26", "vr27",     "vr28",   "vr29",   "vr30",  "vr31",	\
+ "vr32", "vr33", "vr34", "vr35",     "vr36",   "vr37",   "vr38",  "vr39",	\
+ "vr40", "vr41", "vr42", "vr43",     "vr44",   "vr45",   "vr46",  "vr47",	\
+ "vr48", "vr49", "vr50", "vr51",     "vr52",   "vr53",   "vr54",  "vr55",	\
+ "vr56", "vr57", "vr58", "vr59",     "vr60",   "vr61",   "vr62",  "vr63",	\
+  "dr0",  "dr1",  "dr2",  "dr3",      "dr4",    "dr5",    "dr6",   "dr7",	\
+  "dr0",  "dr1",  "dr2",  "dr3",      "dr4",    "dr5",    "dr6",   "dr7",	\
+  "lp_start", "lp_end" \
+}
 
 /* Entry to the insn conditionalizer.  */
 #define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS) \
@@ -965,7 +1685,7 @@ arc_final_prescan_insn (INSN, OPVEC, NOPERANDS)
 
 /* A C expression which evaluates to true if CODE is a valid
    punctuation character for use in the `PRINT_OPERAND' macro.  */
-extern char arc_punct_chars[256];
+extern char arc_punct_chars[];
 #define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \
 arc_punct_chars[(unsigned char) (CHAR)]
 
@@ -977,62 +1697,189 @@ arc_print_operand (FILE, X, CODE)
 
 /* A C compound statement to output to stdio stream STREAM the
    assembler syntax for an instruction operand that is a memory
-   reference whose address is ADDR.  ADDR is an RTL expression.  */
+   reference whose address is ADDR.  ADDR is an RTL expression.
+
+   On some machines, the syntax for a symbolic address depends on
+   the section that the address refers to.  On these machines,
+   define the macro `ENCODE_SECTION_INFO' to store the information
+   into the `symbol_ref', and then check for it here.  */
 #define PRINT_OPERAND_ADDRESS(FILE, ADDR) \
 arc_print_operand_address (FILE, ADDR)
 
-/* This is how to output an element of a case-vector that is absolute.  */
+/* This is how to output an element of a case-vector that is absolute.
+    We take care to not generate %st for post A4 cores. */
 #define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
 do {							\
   char label[30];					\
   ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);	\
-  fprintf (FILE, "\t.word %%st(");			\
-  assemble_name (FILE, label);				\
-  fprintf (FILE, ")\n");				\
+  fprintf (FILE, "\t.word ");			        \
+  if(TARGET_A4)                                         \
+     fprintf(FILE, "%%st(@");                           \
+  arc_assemble_name (FILE, label);			\
+  if(TARGET_A4)						\
+     fprintf (FILE, ")");				\
+  fprintf(FILE, "\n");		                        \
 } while (0)
 
-/* This is how to output an element of a case-vector that is relative.  */
+/* This is how to output an element of a case-vector that is relative.
+   We take care to not generate %st for post A4 cores. */
 #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
 do {							\
   char label[30];					\
   ASM_GENERATE_INTERNAL_LABEL (label, "L", VALUE);	\
-  fprintf (FILE, "\t.word %%st(");			\
-  assemble_name (FILE, label);				\
+  switch (GET_MODE (BODY))				\
+    {							\
+    case QImode: fprintf (FILE, "\t.byte "); break;	\
+    case HImode: fprintf (FILE, "\t.hword "); break;	\
+    case SImode: fprintf (FILE, "\t.word "); break;	\
+    default: gcc_unreachable ();			\
+    }							\
+  if(TARGET_A4)                                         \
+     fprintf(FILE, "%%st(");                            \
+  arc_assemble_name (FILE, label);			\
   fprintf (FILE, "-");					\
   ASM_GENERATE_INTERNAL_LABEL (label, "L", REL);	\
-  assemble_name (FILE, label);				\
-  fprintf (FILE, ")\n");				\
+  arc_assemble_name (FILE, label);			\
+  if (TARGET_COMPACT_CASESI)				\
+    fprintf (FILE, " + %d", 4 + arc_get_unalign ());	\
+  if(TARGET_A4)                                         \
+     fprintf (FILE, ")");				\
+  fprintf(FILE, "\n");                                  \
 } while (0)
 
+#define JUMP_ALIGN(LABEL) (arc_size_opt_level < 2 ? 2 : 0)
+#define LABEL_ALIGN_AFTER_BARRIER(LABEL) \
+  (JUMP_ALIGN(LABEL) \
+   ? JUMP_ALIGN(LABEL) \
+   : GET_CODE (PATTERN (prev_active_insn (LABEL))) == ADDR_DIFF_VEC \
+   ? 1 : 0)
 /* The desired alignment for the location counter at the beginning
    of a loop.  */
-/* On the ARC, align loops to 32 byte boundaries (cache line size)
-   if -malign-loops.  */
-#define LOOP_ALIGN(LABEL) (TARGET_ALIGN_LOOPS ? 5 : 0)
+/* On the ARC, align loops to 4 byte boundaries unless doing all-out size
+   optimization.  */
+#define LOOP_ALIGN JUMP_ALIGN
+
+#define LABEL_ALIGN(LABEL) (arc_label_align (LABEL))
 
 /* This is how to output an assembler line
    that says to advance the location counter
    to a multiple of 2**LOG bytes.  */
 #define ASM_OUTPUT_ALIGN(FILE,LOG) \
-do { if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); } while (0)
-
+do { \
+  if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); \
+  if ((LOG)  > 1) \
+    arc_clear_unalign (); \
+} while (0)
+
+/*  ASM_OUTPUT_ALIGNED_DECL_LOCAL (STREAM, DECL, NAME, SIZE, ALIGNMENT)
+    Define this macro when you need to see the variable's decl in order to
+    chose what to output. */
+#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(STREAM, DECL, NAME, SIZE, ALIGNMENT) \
+arc_asm_output_aligned_decl_local (STREAM, DECL, NAME, SIZE, ALIGNMENT, 0)
+
+/* To translate the return value of arc_function_type into a register number
+   to jump through for function return.  */
+extern int arc_return_address_regs[4];
+
 /* Debugging information.  */
 
 /* Generate DBX and DWARF debugging information.  */
-#define DBX_DEBUGGING_INFO 1
+#ifdef DBX_DEBUGGING_INFO
+#undef DBX_DEBUGGING_INFO
+#endif
+#define DBX_DEBUGGING_INFO
+
+#ifdef DWARF2_DEBUGGING_INFO
+#undef DWARF2_DEBUGGING_INFO
+#endif
+#define DWARF2_DEBUGGING_INFO
 
 /* Prefer STABS (for now).  */
 #undef PREFERRED_DEBUGGING_TYPE
-#define PREFERRED_DEBUGGING_TYPE DBX_DEBUG
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#define CAN_DEBUG_WITHOUT_FP
+
+/* How to renumber registers for dbx and gdb.  */
+#define DBX_REGISTER_NUMBER(REGNO) \
+  ((TARGET_MULMAC_32BY16_SET && (REGNO) >= 56 && (REGNO) <= 57) \
+   ? ((REGNO) ^ !TARGET_BIG_ENDIAN) \
+   : (TARGET_MUL64_SET && (REGNO) >= 57 && (REGNO) <= 59) \
+   ? ((REGNO) == 57 \
+      ? 58 /* MMED */ \
+      : ((REGNO) & 1) ^ TARGET_BIG_ENDIAN \
+      ? 59 /* MHI */ \
+      : 57 + !!TARGET_MULMAC_32BY16_SET) /* MLO */ \
+   : (REGNO))
+
+#define DWARF_FRAME_REGNUM(REG) (REG)
+
+#define DWARF_FRAME_RETURN_COLUMN 	DWARF_FRAME_REGNUM (31)
+
+#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, 31)
+
+/* Frame info */
+/* Force the generation of dwarf .debug_frame sections even if not
+   compiling -g.  This guarantees that we can unwind the stack. */
+
+#define DWARF2_FRAME_INFO 1
+
+/* Define this macro to 0 if your target supports DWARF 2 frame unwind
+   information, but it does not yet work with exception handling. */
+#define DWARF2_UNWIND_INFO 0
+
 
 /* Turn off splitting of long stabs.  */
 #define DBX_CONTIN_LENGTH 0
-
+
+/* A C statement to output DBX debugging information at the end of
+   compilation of the main source file NAME. If you don't define this macro,
+   nothing special is output at the end of compilation, which is correct for
+   most machines. */
+    
+/* ashwin */
+/* #undef DBX_OUTPUT_MAIN_SOURCE_FILE_END */
+/* #define DBX_OUTPUT_MAIN_SOURCE_FILE_END(FILE, FILENAME)			\ */
+/* do {									\ */
+/*   text_section ();							\ */
+/*   fprintf ((FILE), "\t.stabs \"\",%d,0,0,.Letext\n.Letext:\n", N_SO);	\ */
+/* } while (0) */
+    
 /* Miscellaneous.  */
 
 /* Specify the machine mode that this machine uses
-   for the index in the tablejump instruction.  */
-#define CASE_VECTOR_MODE Pmode
+   for the index in the tablejump instruction.
+   If we have pc relative case vectors, we start the case vector shortening
+   with QImode.  */
+#define CASE_VECTOR_MODE \
+  ((optimize && (CASE_VECTOR_PC_RELATIVE || flag_pic)) ? QImode : Pmode)
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+   Do not define this if the table should contain absolute addresses. */
+#define CASE_VECTOR_PC_RELATIVE TARGET_CASE_VECTOR_PC_RELATIVE
+
+#define CASE_VECTOR_SHORTEN_MODE(MIN_OFFSET, MAX_OFFSET, BODY) \
+  CASE_VECTOR_SHORTEN_MODE_1 \
+    (MIN_OFFSET, TARGET_COMPACT_CASESI ? MAX_OFFSET + 6 : MAX_OFFSET, BODY)
+
+#define CASE_VECTOR_SHORTEN_MODE_1(MIN_OFFSET, MAX_OFFSET, BODY) \
+((MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 255 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, QImode) \
+ : (MIN_OFFSET) >= -128 && (MAX_OFFSET) <= 127 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, QImode) \
+ : (MIN_OFFSET) >= 0 && (MAX_OFFSET) <= 65535 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 1, HImode) \
+ : (MIN_OFFSET) >= -32768 && (MAX_OFFSET) <= 32767 \
+ ? (ADDR_DIFF_VEC_FLAGS (BODY).offset_unsigned = 0, HImode) \
+ : SImode)
+
+#define ADDR_VEC_ALIGN(VEC_INSN) \
+  (exact_log2 (GET_MODE_SIZE (GET_MODE (PATTERN (VEC_INSN)))))
+#undef ASM_OUTPUT_BEFORE_CASE_LABEL
+#define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \
+  ASM_OUTPUT_ALIGN ((FILE), ADDR_VEC_ALIGN (TABLE));
 
 /* Define if operations between registers always perform the operation
    on the full register even if a narrower mode is specified.  */
@@ -1041,27 +1888,43 @@ do { if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); } while (0)
 /* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
    will either zero-extend or sign-extend.  The value of this macro should
    be the code that says which one of the two operations is implicitly
-   done, UNKNOWN if none.  */
+   done, NIL if none.  */
 #define LOAD_EXTEND_OP(MODE) ZERO_EXTEND
 
+
 /* Max number of bytes we can move from memory to memory
    in one reasonably fast instruction.  */
 #define MOVE_MAX 4
 
+/* Let the movmem expander handle small block moves.  */
+#define MOVE_BY_PIECES_P(LEN, ALIGN)  0
+#define CAN_MOVE_BY_PIECES(SIZE, ALIGN) \
+  (move_by_pieces_ninsns (SIZE, ALIGN, MOVE_MAX_PIECES + 1) \
+   < (unsigned int) MOVE_RATIO (!optimize_size))
+
+/* Undo the effects of the movmem pattern presence on STORE_BY_PIECES_P .  */
+#define MOVE_RATIO(SPEED) ((SPEED) ? 15 : 3)
+
 /* Define this to be nonzero if shift instructions ignore all but the low-order
-   few bits.  */
-#define SHIFT_COUNT_TRUNCATED 1
+   few bits. Changed from 1 to 0 for rotate pattern testcases
+   (e.g. 20020226-1.c). This change truncates the upper 27 bits of a word
+   while rotating a word. Came to notice through a combine phase
+   optimization viz. a << (32-b) is equivalent to a << (-b).
+*/
+#define SHIFT_COUNT_TRUNCATED 0
 
 /* Value is 1 if truncating an integer of INPREC bits to OUTPREC bits
    is done just by pretending it is already truncated.  */
 #define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
 
+/* We assume that the store-condition-codes instructions store 0 for false
+   and some other value for true.  This is the value stored for true.  */
+#define STORE_FLAG_VALUE 1
+
 /* Specify the machine mode that pointers have.
    After generation of rtl, the compiler makes no further distinction
    between pointers and any other objects of this machine mode.  */
-/* ??? The arc doesn't have full 32-bit pointers, but making this PSImode has
-   its own problems (you have to add extendpsisi2 and trucnsipsi2 but how does
-   one do it without getting excess code?).  Try to avoid it.  */
+/* ARCompact has full 32-bit pointers.  */
 #define Pmode SImode
 
 /* A function address in a call instruction.  */
@@ -1070,13 +1933,17 @@ do { if ((LOG) != 0) fprintf (FILE, "\t.align %d\n", 1 << (LOG)); } while (0)
 /* alloca should avoid clobbering the old register save area.  */
 /* ??? Not defined in tm.texi.  */
 #define SETJMP_VIA_SAVE_AREA
-
+
 /* Define the information needed to generate branch and scc insns.  This is
    stored from the compare operation.  Note that we can't use "rtx" here
    since it hasn't been defined!  */
 extern struct rtx_def *arc_compare_op0, *arc_compare_op1;
 
-/* ARC function types.  */
+/* Define the function that build the compare insn for scc and bcc.  */
+/*extern struct rtx_def *gen_compare_reg ();*/
+
+
+/* ARC function types.   */
 enum arc_function_type {
   ARC_FUNCTION_UNKNOWN, ARC_FUNCTION_NORMAL,
   /* These are interrupt handlers.  The name corresponds to the register
@@ -1085,4 +1952,82 @@ enum arc_function_type {
 };
 #define ARC_INTERRUPT_P(TYPE) \
 ((TYPE) == ARC_FUNCTION_ILINK1 || (TYPE) == ARC_FUNCTION_ILINK2)
-/* Compute the type of a function from its DECL.  */
+
+/* Compute the type of a function from its DECL.  Needed for EPILOGUE_USES.  */
+struct function;
+extern enum arc_function_type arc_compute_function_type (struct function *);
+
+/* Called by crtstuff.c to make calls to function FUNCTION that are defined in
+   SECTION_OP, and then to switch back to text section.  */
+#undef CRT_CALL_STATIC_FUNCTION
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
+    asm (SECTION_OP "\n\t"				\
+	"bl @" USER_LABEL_PREFIX #FUNC "\n"		\
+	TEXT_SECTION_ASM_OP);
+
+/* This macro expands to the name of the scratch register r12, used for
+ * temporary calculations according to the ABI */
+#define ARC_TEMP_SCRATCH_REG "r12"
+
+/* The C++ compiler must use one bit to indicate whether the function
+   that will be called through a pointer-to-member-function is
+   virtual.  Normally, we assume that the low-order bit of a function
+   pointer must always be zero.  Then, by ensuring that the
+   vtable_index is odd, we can distinguish which variant of the union
+   is in use.  But, on some platforms function pointers can be odd,
+   and so this doesn't work.  In that case, we use the low-order bit
+   of the `delta' field, and shift the remainder of the `delta' field
+   to the left. We need to this for A4 because the address is always 
+   shifted and thus can be odd. */
+#define TARGET_PTRMEMFUNC_VBIT_LOCATION \
+  (TARGET_A4 ? ptrmemfunc_vbit_in_delta : ptrmemfunc_vbit_in_pfn)
+
+#define INSN_SETS_ARE_DELAYED(X)		\
+  (GET_CODE (X) == INSN				\
+   && GET_CODE (PATTERN (X)) != SEQUENCE	\
+   && GET_CODE (PATTERN (X)) != USE		\
+   && GET_CODE (PATTERN (X)) != CLOBBER		\
+   && (get_attr_type (X) == TYPE_CALL || get_attr_type (X) == TYPE_SFUNC))
+
+#define INSN_REFERENCES_ARE_DELAYED(insn) INSN_SETS_ARE_DELAYED (insn)
+
+#define CALL_ATTR(X, NAME) \
+  ((CALL_P (X) || NONJUMP_INSN_P (X)) \
+   && GET_CODE (PATTERN (X)) != USE \
+   && GET_CODE (PATTERN (X)) != CLOBBER \
+   && get_attr_is_##NAME (X) == IS_##NAME##_YES) \
+
+#define REVERSE_CONDITION(CODE,MODE) \
+	(((MODE) == CC_FP_GTmode || (MODE) == CC_FP_GEmode \
+	  || (MODE) == CC_FP_UNEQmode || (MODE) == CC_FP_ORDmode \
+	  || (MODE) == CC_FPXmode) \
+	 ? reverse_condition_maybe_unordered ((CODE)) \
+	 : reverse_condition ((CODE)))
+
+#define ADJUST_INSN_LENGTH(X, LENGTH)                           \
+  (LENGTH) += arc_adjust_insn_length ((X), (LENGTH))
+
+#define IS_ASM_LOGICAL_LINE_SEPARATOR(C,STR) ((C) == '`')
+
+#define INIT_EXPANDERS arc_init_expanders ()
+
+#define CFA_FRAME_BASE_OFFSET(FUNDECL) (-arc_decl_pretend_args ((FUNDECL)))
+
+#define ARG_POINTER_CFA_OFFSET(FNDECL) \
+  (FIRST_PARM_OFFSET (FNDECL) + arc_decl_pretend_args ((FNDECL)))
+
+/* There are a number of peepholes which are most likely the result of
+   the original authers being ignorant of the combine pass and/or how
+   to drive it properly with matching instruction and splitter patterns.
+   We need at some point investigate if we can remove these patterns
+   without degadation in the quality oft he generated code.  */
+/* Should we perform the questionable peepholes?  */
+#define TARGET_DROSS 1
+/* Emit some warning / error when any of the questionable optimizations
+   trigger.  IDENT is a string constant identifying the peephole.  */
+#define DROSS(ident) \
+do { \
+/* FIXME.  */ \
+} while (0)
+
+#endif /* GCC_ARC_H */
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index da435941427..53b6dbea4a8 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -1,7 +1,22 @@
-;; Machine description of the Argonaut ARC cpu for GNU C compiler
-;; Copyright (C) 1994, 1997, 1998, 1999, 2000, 2004, 2005, 2007
+;; Machine description of the ARC ARCompact cpu for GNU C compiler
+;; Copyright (C) 1994, 1997, 1999, 2006, 2007, 2008, 2009
 ;; Free Software Foundation, Inc.
 
+;;    Sources derived from work done by Sankhya Technologies (www.sankhya.com)
+
+;;    Position Independent Code support added,Code cleaned up, 
+;;    Comments and Support For ARC700 instructions added by
+;;    Saurabh Verma (saurabh.verma@codito.com)
+;;    Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
+;;
+;;    Profiling support and performance improvements by
+;;    Joern Rennecke (joern.rennecke@arc.com)
+;;
+;;    Support for DSP multiply instructions and mul64
+;;    instructions for ARC600; and improvements in flag setting
+;;    instructions by
+;;    Muhammad Khurram Riaz (Khurram.Riaz@arc.com)
+
 ;; This file is part of GCC.
 
 ;; GCC is free software; you can redistribute it and/or modify
@@ -20,55 +35,174 @@
 
 ;; See file "rtl.def" for documentation on define_insn, match_*, et. al.
 
-;; ??? This is an old port, and is undoubtedly suffering from bit rot.
-
-;; Insn type.  Used to default other attribute values.
-
-(define_attr "type"
-  "move,load,store,cmove,unary,binary,compare,shift,mul,uncond_branch,branch,call,call_no_delay_slot,multi,misc"
-  (const_string "binary"))
-
-;; Length (in # of insns, long immediate constants counted too).
-;; ??? There's a nasty interaction between the conditional execution fsm
-;; and insn lengths: insns with shimm values cannot be conditionally executed.
-(define_attr "length" ""
-  (cond [(eq_attr "type" "load")
-	 (if_then_else (match_operand 1 "long_immediate_loadstore_operand" "")
-		       (const_int 2) (const_int 1))
+;; <op> dest, src         Two operand instruction's syntax
+;; <op> dest, src1, src2  Three operand instruction's syntax
 
-	 (eq_attr "type" "store")
-	 (if_then_else (match_operand 0 "long_immediate_loadstore_operand" "")
-		       (const_int 2) (const_int 1))
+;; ARC and ARCompact PREDICATES:
+;;
+;;   comparison_operator   LT, GT, LE, GE, LTU, GTU, LEU, GEU, EQ, NE
+;;   memory_operand        memory                         [m]
+;;   immediate_operand     immediate constant             [IJKLMNOP]
+;;   register_operand      register                       [rq]
+;;   general_operand       register, memory, constant     [rqmIJKLMNOP]
 
-	 (eq_attr "type" "move,unary,compare")
-	 (if_then_else (match_operand 1 "long_immediate_operand" "")
-		       (const_int 2) (const_int 1))
+;;  Note that the predicates are only used when selecting a pattern
+;;  to determine if an operand is valid.
 
-	 (eq_attr "type" "binary,mul")
-	 (if_then_else (match_operand 2 "long_immediate_operand" "")
-		       (const_int 2) (const_int 1))
+;;  The constraints then select which of the possible valid operands
+;;  is present (and guide register selection). The actual assembly
+;;  instruction is then selected on the basis of the constraints.
 
-	 (eq_attr "type" "cmove")
-	 (if_then_else (match_operand 2 "register_operand" "")
-		       (const_int 1) (const_int 2))
+;; ARC and ARCompact CONSTRAINTS:
+;;
+;;   b  stack pointer                           r28
+;;   f  frame pointer                           r27
+;;   Rgp global pointer                         r26
+;;   g  general reg, memory, constant
+;;   m  memory
+;;   p  memory address
+;;   q  registers commonly used in
+;;      16-bit insns                            r0-r3, r12-r15
+;;   c  core registers				r0-r60, ap, pcl
+;;   r  general registers                       r0-r28, blink, ap, pcl
+;;
+;;   H  fp 16-bit constant
+;;   I signed 12-bit immediate (for ARCompact)
+;;      signed 9-bit immediate (for ARCtangent-A4)
+;;   J  long immediate (signed 32-bit immediate)
+;;   K  unsigned 3-bit immediate (for ARCompact)
+;;   L  unsigned 6-bit immediate (for ARCompact)
+;;   M  unsinged 5-bit immediate (for ARCompact)
+;;   O  unsinged 7-bit immediate (for ARCompact)
+;;   P  unsinged 8-bit immediate (for ARCompact)
+;;   N  constant '1' (for ARCompact)
+
+
+;;  ashwin : include options.h from build dir
+;; (include "arc.c")
+
+
+;; TODO:
+;; -> Supporting arith/logic insns which update the status flag based on the
+;;    operation result (i.e <op>.f type insns).
+;; -> conditional jump Jcc
+;; -> prefetch instruction
+;; -> rsub insn
+
+;;  -----------------------------------------------------------------------------
+
+;; Include DFA scheduluers
+(include ("arc600.md"))
+(include ("arc700.md"))
+
+;; Predicates
+
+(include ("predicates.md"))
+(include ("constraints.md"))
+;;  -----------------------------------------------------------------------------
+
+;; UNSPEC Usage:
+;; ~~~~~~~~~~~~
+;;  -----------------------------------------------------------------------------
+;;  Symbolic name  Value              Desc.
+;;  -----------------------------------------------------------------------------
+;;  UNSPEC_PLT       3        symbol to be referenced through the PLT
+;;  UNSPEC_GOT       4        symbol to be rerenced through the GOT
+;;  UNSPEC_GOTOFF    5        Local symbol.To be referenced relative to the
+;;                            GOTBASE.(Referenced as @GOTOFF)
+;;  ----------------------------------------------------------------------------
+
+
+(define_constants
+  [(UNSPEC_NORM 11) ; norm generation through builtins. candidate for scheduling
+   (UNSPEC_NORMW 12) ; normw generation through builtins. candidate for scheduling
+   (UNSPEC_SWAP 13) ; swap generation through builtins. candidate for scheduling
+   (UNSPEC_MUL64 14) ; mul64 generation through builtins. candidate for scheduling
+   (UNSPEC_MULU64 15) ; mulu64 generation through builtins. candidate for scheduling
+   (UNSPEC_DIVAW 16) ; divaw generation through builtins. candidate for scheduling
+   (UNSPEC_DIRECT 17)
+   (UNSPEC_PROF 18) ; profile callgraph counter
+   (UNSPEC_LP 19) ; to set LP_END
+   (UNSPEC_CASESI 20)
+   (VUNSPEC_RTIE 17) ; blockage insn for rtie generation
+   (VUNSPEC_SYNC 18) ; blockage insn for sync generation
+   (VUNSPEC_BRK 19) ; blockage insn for brk generation
+   (VUNSPEC_FLAG 20) ; blockage insn for flag generation
+   (VUNSPEC_SLEEP 21) ; blockage insn for sleep generation
+   (VUNSPEC_SWI 22) ; blockage insn for swi generation
+   (VUNSPEC_CORE_READ 23) ; blockage insn for reading a core register 
+   (VUNSPEC_CORE_WRITE 24) ; blockage insn for writing to a core register 
+   (VUNSPEC_LR 25) ; blockage insn for reading an auxillary register 
+   (VUNSPEC_SR 26) ; blockage insn for writing to an auxillary register 
+   (VUNSPEC_TRAP_S 27) ; blockage insn for trap_s generation
+   (VUNSPEC_UNIMP_S 28) ; blockage insn for unimp_s generation
+
+   (SP_REG 28)
+   (ILINK1_REGNUM 29)
+   (ILINK2_REGNUM 30)
+   (RETURN_ADDR_REGNUM 31)
+   (LP_COUNT 60)
+   (CC_REG 61)
+   (LP_START 144)
+   (LP_END 145)
+  ]
+)
+
+(define_attr "is_sfunc" "no,yes" (const_string "no"))
 
-	 (eq_attr "type" "multi") (const_int 2)
-	]
+;; Insn type.  Used to default other attribute values.
 
-	(const_int 1)))
+(define_attr "type"
+  "move,load,store,cmove,unary,binary,compare,shift,uncond_branch,jump,branch,
+   brcc,brcc_no_delay_slot,call,sfunc,call_no_delay_slot,
+   multi,umulti, two_cycle_core,lr,sr,divaw,loop_setup,loop_end,return,
+   misc,spfp,dpfp_mult,dpfp_addsub,mulmac_600,cc_arith,
+   simd_vload, simd_vload128, simd_vstore, simd_vmove, simd_vmove_else_zero,
+   simd_vmove_with_acc, simd_varith_1cycle, simd_varith_2cycle, 
+   simd_varith_with_acc, simd_vlogic, simd_vlogic_with_acc,
+   simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc,
+   simd_valign, simd_valign_with_acc, simd_vcontrol,
+   simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma"
+  (cond [(eq_attr "is_sfunc" "yes")
+	 (cond [(eq (symbol_ref "TARGET_LONG_CALLS_SET") (const_int 0))
+		(const_string "call")
+		(ne (symbol_ref "flag_pic") (const_int 0))
+		(const_string "sfunc")]
+	       (const_string "call_no_delay_slot"))]
+	(const_string "binary")))
+
+;; The following three attributes are mixed case so that they can be
+;; used conveniently with the CALL_ATTR macro.
+(define_attr "is_CALL" "no,yes"
+  (cond [(eq_attr "is_sfunc" "yes") (const_string "yes")
+	 (eq_attr "type" "call,call_no_delay_slot") (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "is_SIBCALL" "no,yes" (const_string "no"))
+
+(define_attr "is_NON_SIBCALL" "no,yes"
+  (cond [(eq_attr "is_SIBCALL" "yes") (const_string "no")
+	 (eq_attr "is_CALL" "yes") (const_string "yes")]
+	(const_string "no")))
+  
+
+;; Attribute describing the processor
+(define_attr "cpu" "A4,A5,ARC600,ARC700"
+  (const (symbol_ref "arc_cpu_attr")))
+
+;; true for compact instructions (those with _s suffix)
+;; "maybe" means compact unless we conditionalize the insn.
+(define_attr "iscompact" "true,maybe,true_limm,maybe_limm,false"
+  (cond [(eq_attr "type" "sfunc")
+         (const_string "maybe")]
+        (const_string "false")))
 
-;; The length here is the length of a single asm.  Unfortunately it might be
-;; 1 or 2 so we must allow for 2.  That's ok though.  How often will users
-;; lament asm's not being put in delay slots?
-(define_asm_attributes
-  [(set_attr "length" "2")
-   (set_attr "type" "multi")])
 
 ;; Condition codes: this one is used by final_prescan_insn to speed up
 ;; conditionalizing instructions.  It saves having to scan the rtl to see if
 ;; it uses or alters the condition codes.
 
-;; USE: This insn uses the condition codes (e.g.: a conditional branch).
+;; USE: This insn uses the condition codes (eg: a conditional branch).
 ;; CANUSE: This insn can use the condition codes (for conditional execution).
 ;; SET: All condition codes are set by this insn.
 ;; SET_ZN: the Z and N flags are set by this insn.
@@ -76,186 +210,743 @@
 ;; CLOB: The condition codes are set to unknown values by this insn.
 ;; NOCOND: This insn can't use and doesn't affect the condition codes.
 
-(define_attr "cond" "use,canuse,set,set_zn,set_znc,clob,nocond"
-  (cond [(and (eq_attr "type" "unary,binary,move")
-	      (eq_attr "length" "1"))
-	 (const_string "canuse")
-
-	 (eq_attr "type" "compare")
-	 (const_string "set")
-
-	 (eq_attr "type" "cmove,branch")
-	 (const_string "use")
+(define_attr "cond" "use,canuse,canuse_limm,canuse_limm_add,set,set_zn,clob,nocond"
+  (if_then_else (eq_attr "cpu" "A4")
+      (cond [(and (eq_attr "type" "unary,move")
+                  (match_operand 1 "register_operand" ""))
+             (const_string "canuse")
+
+             (and (eq_attr "type" "binary")
+                  (match_operand 2 "register_operand" ""))
+             (const_string "canuse")
+
+             (eq_attr "type" "compare")
+             (const_string "set")
+
+             (eq_attr "type" "cmove,branch")
+             (const_string "use")
+
+             (eq_attr "type" "multi,misc,shift")
+             (const_string "clob")
+            ]
+
+            (const_string "nocond"))
+
+      (if_then_else (eq_attr "iscompact" "maybe,false")
+          (cond [(eq_attr "type" "unary,move")
+                 (if_then_else
+                    (ior (match_operand 1 "u6_immediate_operand" "")
+                         (match_operand 1 "long_immediate_operand" ""))
+                    (const_string "canuse")
+                    (const_string "nocond"))
+
+		 (eq_attr "type" "binary")
+		 (cond [(ne (symbol_ref "REGNO (operands[0])")
+                            (symbol_ref "REGNO (operands[1])"))
+			(const_string "nocond")
+			(match_operand 2 "register_operand" "")
+			(const_string "canuse")
+                        (match_operand 2 "u6_immediate_operand" "")
+			(const_string "canuse")
+                        (match_operand 2 "long_immediate_operand" "")
+			(const_string "canuse")
+                        (match_operand 2 "const_int_operand" "")
+			(const_string "canuse_limm")]
+		       (const_string "nocond"))
+
+                 (eq_attr "type" "compare")
+                 (const_string "set")
+
+                 (eq_attr "type" "cmove,branch")
+                 (const_string "use")
+
+		 (eq_attr "is_sfunc" "yes")
+		 (cond [(ne (symbol_ref "(TARGET_MEDIUM_CALLS
+					  && !TARGET_LONG_CALLS_SET
+					  && flag_pic)")
+			    (const_int 0))
+		        (const_string "canuse_limm_add")
+			(ne (symbol_ref "(TARGET_MEDIUM_CALLS
+					  && !TARGET_LONG_CALLS_SET)")
+			    (const_int 0))
+		        (const_string "canuse_limm")]
+		       (const_string "canuse"))
+
+                ]
+
+                (const_string "nocond"))
+
+          (cond [(eq_attr "type" "compare")
+                 (const_string "set")
+
+                 (eq_attr "type" "cmove,branch")
+                 (const_string "use")
+
+                ]
+
+                (const_string "nocond")))))
+
+(define_attr "verify_short" "no,yes"
+  (if_then_else
+    (ne (symbol_ref "arc_verify_short (insn, insn_current_address & 2, 0)")
+	(const_int 0))
+    (const_string "yes") (const_string "no")))
+
+; Is there an instruction that we are actually putting into the delay slot?
+(define_attr "delay_slot_filled" "no,yes"
+  (cond [(ne (symbol_ref "NEXT_INSN (PREV_INSN (insn)) == insn") (const_int 0))
+	 (const_string "no")
+	 (ne (symbol_ref "!TARGET_AT_DBR_CONDEXEC
+			  && INSN_ANNULLED_BRANCH_P (insn)
+			  && !INSN_FROM_TARGET_P (NEXT_INSN (insn))")
+	     (const_int 0))
+	 (const_string "no")]
+	(const_string "yes")))
+
+; Is a delay slot present for purposes of shorten_branches?
+; We have to take the length of this insn into account for forward branches
+; even if we don't put the insn actually into a delay slot.
+(define_attr "delay_slot_present" "no,yes"
+  (cond [(ne (symbol_ref "NEXT_INSN (PREV_INSN (insn)) == insn") (const_int 0))
+	 (const_string "no")]
+	(const_string "yes")))
+
+; We can't use get_attr_length (NEXT_INSN (insn)) because this gives the
+; length of a different insn with the same uid.
+(define_attr "delay_slot_length" ""
+  (cond [(ne (symbol_ref "NEXT_INSN (PREV_INSN (insn)) == insn") (const_int 0))
+	 (const_int 0)]
+	(symbol_ref "get_attr_length (NEXT_INSN (PREV_INSN (insn)))
+		     - get_attr_length (insn)")))
+
+
+;; The length variations of ARCompact can't be expressed with the traditional
+;; mechanisms in a way that avoids infinite looping in shorten_branches.
+;; Given a set of potentially short insns, we could express the effect of
+;; arc_verify_short returing zero as an alignment of the current or the
+;; following insn, depending on alignment.
+;; However, which branch / jump insns are potentially short in turn depends
+;; on instruction length.
+(define_attr "lock_length" "" (const_int 0))
+
+;; Length (in # of bytes, long immediate constants counted too).
+;; ??? There's a nasty interaction between the conditional execution fsm
+;; and insn lengths: insns with shimm values cannot be conditionally executed.
+(define_attr "length" ""
+  (cond
+   [(eq_attr "lock_length" "!0") (const_int 2)
+    (eq_attr "cpu" "A4")
+      (cond [(eq_attr "type" "load")
+             (if_then_else
+                 (match_operand 1 "long_immediate_loadstore_operand" "")
+                 (const_int 8) (const_int 4))
+
+             (eq_attr "type" "store")
+             (if_then_else
+                 (match_operand 0 "long_immediate_loadstore_operand" "")
+                 (const_int 8) (const_int 4))
+
+             (eq_attr "type" "move,unary,compare")
+             (if_then_else (match_operand 1 "long_immediate_operand" "")
+                           (const_int 8) (const_int 4))
+
+             (eq_attr "type" "binary")
+             (if_then_else (match_operand 2 "long_immediate_operand" "")
+                           (const_int 8) (const_int 4))
+
+             (eq_attr "type" "cmove")
+             (if_then_else (match_operand 1 "register_operand" "")
+                           (const_int 4) (const_int 8)) 
+
+             (eq_attr "type" "multi") (const_int 8)
+		     (eq_attr "cond" "set,set_zn,clob") (const_int 8)
+            ]
+
+            (const_int 4))
+
+    (eq_attr "iscompact" "true,maybe")
+    ; The length can vary because of ADJUST_INSN_LENGTH.
+    ; make sure that variable_length_p will be true.
+    (cond
+      [(and (ne (symbol_ref "0") (const_int 0)) (eq (match_dup 0) (pc)))
+       (const_int 4)
+       (eq_attr "verify_short" "yes")
+       (const_int 2)]
+      (const_int 4))
+  
+    (eq_attr "iscompact" "true_limm,maybe_limm")
+    (cond
+      [(and (ne (symbol_ref "0") (const_int 0)) (eq (match_dup 0) (pc)))
+       (const_int 8)
+       (eq_attr "verify_short" "yes")
+       (const_int 6)]
+      (const_int 8))
+]
+
+          (cond [(eq_attr "type" "load")
+                 (if_then_else
+                    (match_operand 1 "long_immediate_loadstore_operand" "")
+                    (const_int 8) (const_int 4))
+
+                 (eq_attr "type" "store")
+                 (if_then_else
+                   (ior (match_operand 0 "long_immediate_loadstore_operand" "")
+			(match_operand 1 "immediate_operand" ""))
+                   (const_int 8) (const_int 4))
+
+                 (eq_attr "type" "move,unary")
+                 (if_then_else (match_operand 1 "long_immediate_operand" "")
+                               (const_int 8) (const_int 4))
+
+;; Added this for adjusting length of nops, for bbit offset calculation to be correct
+
+	         (eq_attr "type" "compare")
+                 (if_then_else (match_operand 1 "long_immediate_operand" "")
+ 			       (if_then_else (eq_attr "cpu" "ARC700") 
+ 					     (const_int 8) (const_int 12))
+;			       (const_int 8) 
+ 			       (if_then_else (eq_attr "cpu" "ARC700") 
+ 					    (const_int 4) (const_int 8)))
+
+
+		 (and (eq_attr "type" "shift")
+		      (match_operand 1 "immediate_operand"))
+		 (const_int 8)
+                 (eq_attr "type" "binary,shift")
+                 (if_then_else
+                    (ior (match_operand 2 "long_immediate_operand" "")
+                         (and (ne (symbol_ref "REGNO(operands[0])")
+                                  (symbol_ref "REGNO(operands[1])"))
+                              (eq (match_operand 2 "u6_immediate_operand" "")
+                                  (const_int 0))))
+                            
+                    (const_int 8) (const_int 4))
+
+                 (eq_attr "type" "cmove")
+                    (if_then_else (match_operand 1 "register_operand" "")
+                                  (const_int 4) (const_int 8)) 
+
+                 (eq_attr "type" "call_no_delay_slot") (const_int 8)
+                ]
+
+                (const_int 4))
+))
 
-	 (eq_attr "type" "multi,misc")
-	 (const_string "clob")
-	 ]
+;; The length here is the length of a single asm.  Unfortunately it might be
+;; 4 or 8 so we must allow for 8.  That's ok though.  How often will users
+;; lament asm's not being put in delay slots?
+;;
+(define_asm_attributes
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")
+   (set_attr "cond" "clob") ])
 
-	 (const_string "nocond")))
-
 ;; Delay slots.
+;; The first two cond clauses and the default are necessary for correctness;
+;; the remaining cond clause is mainly an optimization, as otherwise nops
+;; would be inserted; however, if we didn't do this optimization, we would
+;; have to be more conservative in our length calculations.
 
 (define_attr "in_delay_slot" "false,true"
-  (cond [(eq_attr "type" "uncond_branch,branch,call,call_no_delay_slot,multi")
+  (cond [(eq_attr "type" "uncond_branch,jump,branch,
+			  call,sfunc,call_no_delay_slot, 
+                          brcc, brcc_no_delay_slot,loop_setup,loop_end")
+	 (const_string "false")
+	 (ne (symbol_ref "arc_write_ext_corereg (insn)") (const_int 0))
+	 (const_string "false")
+	 (gt (symbol_ref "arc_hazard (prev_active_insn (insn),
+				      next_active_insn (insn))")
+	     (symbol_ref "(arc_hazard (prev_active_insn (insn), insn)
+			   + arc_hazard (insn, next_active_insn (insn)))"))
 	 (const_string "false")
 	 ]
 
-	 (if_then_else (eq_attr "length" "1")
+	 (if_then_else (eq_attr "length" "2,4")
 		       (const_string "true")
 		       (const_string "false"))))
 
-(define_delay (eq_attr "type" "call")
+; must not put an insn inside that refers to blink.
+(define_attr "in_call_delay_slot" "false,true"
+  (cond [(eq_attr "in_delay_slot" "false")
+	 (const_string "false")
+	 (ne (symbol_ref "arc_regno_use_in (RETURN_ADDR_REGNUM,
+					    PATTERN (insn))")
+	     (const_int 0))
+	 (const_string "false")]
+	(const_string "true")))
+
+(define_attr "in_sfunc_delay_slot" "false,true"
+  (cond [(eq_attr "in_call_delay_slot" "false")
+	 (const_string "false")
+	 (ne (symbol_ref "arc_regno_use_in (12, PATTERN (insn))")
+	     (const_int 0))
+	 (const_string "false")]
+	(const_string "true")))
+
+;; Instructions that we can put into a delay slot and conditionalize.
+(define_attr "cond_delay_insn" "no,yes"
+  (cond [(eq_attr "cond" "!canuse") (const_string "no")
+	 (eq_attr "type" "call,branch,uncond_branch,jump,brcc")
+	 (const_string "no")
+	 (eq_attr "length" "2,4") (const_string "yes")]
+	(const_string "no")))
+
+(define_attr "in_ret_delay_slot" "no,yes"
+  (cond [(eq_attr "in_delay_slot" "false")
+	 (const_string "no")
+	 (ne (symbol_ref "regno_clobbered_p
+			    (arc_return_address_regs
+			      [arc_compute_function_type (cfun)],
+			     insn, SImode, 1)")
+	     (const_int 0))
+	 (const_string "no")]
+	(const_string "yes")))
+
+(define_attr "cond_ret_delay_insn" "no,yes"
+  (cond [(eq_attr "in_ret_delay_slot" "no") (const_string "no")
+	 (eq_attr "cond_delay_insn" "no") (const_string "no")]
+	(const_string "yes")))
+
+
+(define_delay (and (eq_attr "type" "call,branch,uncond_branch,jump")
+                   (ne (symbol_ref "TARGET_A4") (const_int 0)))
   [(eq_attr "in_delay_slot" "true")
    (eq_attr "in_delay_slot" "true")
    (eq_attr "in_delay_slot" "true")])
 
-(define_delay (eq_attr "type" "branch,uncond_branch")
+;; Delay slot definition for ARCompact ISA
+;; ??? FIXME:
+;; When outputting an annul-true insn elegible for cond-exec
+;; in a cbranch delay slot, unless optimizing for size, we use cond-exec
+;; for ARC600; we could also use this for ARC700 if the branch can't be
+;; unaligned and is at least somewhat likely (add parameter for this).
+
+;; (define_delay (and (eq_attr "type" "call,branch,uncond_branch,jump,brcc")
+;;                    (ne (symbol_ref "TARGET_ARCOMPACT") (const_int 0)))
+;;   [(eq_attr "in_delay_slot" "true")
+;;    (eq_attr "in_delay_slot" "true")
+;;    (nil)])
+(define_delay (and (ne (symbol_ref "TARGET_ARCOMPACT") (const_int 0))
+		   (eq_attr "type" "call"))
+  [(eq_attr "in_call_delay_slot" "true")
+   (eq_attr "in_call_delay_slot" "true")
+   (nil)])
+
+(define_delay (and (ne (symbol_ref "TARGET_ARCOMPACT
+				    && !TARGET_AT_DBR_CONDEXEC")
+		       (const_int 0))
+		   (eq_attr "type" "brcc"))
   [(eq_attr "in_delay_slot" "true")
    (eq_attr "in_delay_slot" "true")
-   (eq_attr "in_delay_slot" "true")])
-   
-;; Scheduling description for the ARC
+   (nil)])
 
-(define_cpu_unit "branch")
-
-(define_insn_reservation "any_insn" 1 (eq_attr "type" "!load,compare,branch")
-			 "nothing")
-
-;; 1) A conditional jump cannot immediately follow the insn setting the flags.
-;; This isn't a complete solution as it doesn't come with guarantees.  That
-;; is done in the branch patterns and in arc_print_operand.  This exists to
-;; avoid inserting a nop when we can.
-
-(define_insn_reservation "compare" 1 (eq_attr "type" "compare")
-		         "nothing,branch")
-
-(define_insn_reservation "branch" 1 (eq_attr "type" "branch")
-		         "branch")
+(define_delay (and (ne (symbol_ref "TARGET_ARCOMPACT && TARGET_AT_DBR_CONDEXEC")
+		       (const_int 0))
+		   (eq_attr "type" "brcc"))
+  [(eq_attr "in_delay_slot" "true")
+   (nil)
+   (nil)])
+
+(define_delay
+  (and (ne (symbol_ref "TARGET_ARCOMPACT")
+	   (const_int 0))
+       (eq_attr "type" "return"))
+  [(eq_attr "in_ret_delay_slot" "yes")
+   (eq_attr "type" "!call,branch,uncond_branch,jump,brcc,return,sfunc")
+   (eq_attr "cond_ret_delay_insn" "yes")])
+
+;; For ARC600, unexposing the delay sloy incurs a penalty also in the
+;; non-taken case, so the only meaningful way to have an annull-true
+;; filled delay slot is to conditionalize the delay slot insn.
+(define_delay (and (ne (symbol_ref "TARGET_AT_DBR_CONDEXEC") (const_int 0))
+		   (eq_attr "type" "branch,uncond_branch,jump")
+		   (eq (symbol_ref "optimize_size") (const_int 0)))
+  [(eq_attr "in_delay_slot" "true")
+   (eq_attr "cond_delay_insn" "yes")
+   (eq_attr "cond_delay_insn" "yes")])
+
+;; For ARC700, anything goes for annulled-true insns, since there is no
+;; penalty for the unexposed delay slot when the branch is not taken,
+;; however, we must avoid things that have a delay slot themselvese to
+;; avoid confucing gcc.
+(define_delay (and (ne (symbol_ref "!TARGET_AT_DBR_CONDEXEC") (const_int 0))
+		   (eq_attr "type" "branch,uncond_branch,jump")
+		   (eq (symbol_ref "optimize_size") (const_int 0)))
+  [(eq_attr "in_delay_slot" "true")
+   (eq_attr "type" "!call,branch,uncond_branch,jump,brcc,return,sfunc")
+   (eq_attr "cond_delay_insn" "yes")])
+
+;; -mlongcall -fpic sfuncs use r12 to load the function address
+(define_delay (and  (ne (symbol_ref "TARGET_ARCOMPACT") (const_int 0))
+		    (eq_attr "type" "sfunc"))
+  [(eq_attr "in_sfunc_delay_slot" "true")
+   (eq_attr "in_sfunc_delay_slot" "true")
+   (nil)])
+;; ??? need to use a working strategy for canuse_limm:
+;; - either canuse_limm is not eligible for delay slots, and has no
+;;   delay slots, or arc_reorg has to treat them as nocond, or it has to
+;;   somehow modify them to become inelegible for delay slots if a decision
+;;   is made that makes conditional execution required.
+
+(define_attr "tune" "none,arc600,arc700_4_2_std,arc700_4_2_xmac"
+  (const
+   (cond [(symbol_ref "arc_tune == TUNE_ARC600")
+	  (const_string "arc600")
+	  (symbol_ref "arc_tune == TUNE_ARC700_4_2_STD")
+	  (const_string "arc700_4_2_std")
+	  (symbol_ref "arc_tune == TUNE_ARC700_4_2_XMAC")
+	  (const_string "arc700_4_2_xmac")]
+         (const_string "none"))))
+
+(define_attr "tune_arc700" "false,true"
+  (if_then_else (eq_attr "tune" "arc700_4_2_std, arc700_4_2_xmac")
+		(const_string "true")
+		(const_string "false")))
+
+;; Function units of the ARC
+
+;; (define_function_unit {name} {num-units} {n-users} {test}
+;;                       {ready-delay} {issue-delay} [{conflict-list}])
+
+;; 1) A conditional jump cannot immediately follow the insn setting the flags in pre ARC700 processors.
+;; The ARC700 has no problems with consecutive instructions setting and
+;; using flags.
+;; (define_function_unit "compare" 1 0 (and (eq_attr "type" "compare") (not (eq_attr "cpu" "ARC700"))) 2 2 [(eq_attr "type" "branch")])
+;; (define_function_unit "compare" 1 0 (and (eq_attr "type" "compare") (eq_attr "cpu" "ARC700") ) 1 1 [(eq_attr "type" "branch")])
 
 ;; 2) References to loaded registers should wait a cycle.
 
 ;; Memory with load-delay of 1 (i.e., 2 cycle load).
+;; (define_function_unit "memory" 1 1 (eq_attr "type" "load") 2 0)
 
-(define_insn_reservation "memory" 2 (eq_attr "type" "load")
-			 "nothing")
-
-;; Move instructions.
+;; Units that take one cycle do not need to be specified.
 
+;; Move instructions.
 (define_expand "movqi"
-  [(set (match_operand:QI 0 "general_operand" "")
+  [(set (match_operand:QI 0 "move_dest_operand" "")
 	(match_operand:QI 1 "general_operand" ""))]
   ""
-  "
-{
-  /* Everything except mem = const or mem = mem can be done easily.  */
-
-  if (GET_CODE (operands[0]) == MEM)
-    operands[1] = force_reg (QImode, operands[1]);
-}")
-
+  "if (prepare_move_operands (operands, QImode)) DONE;")
+
+; In order to allow the ccfsm machinery to do its work, the leading compact
+; alternatives say 'canuse' - there is another alternative that will match
+; when the condition codes are used.
+; Rcq won't match if the condition is actually used; to avoid a spurious match
+; via q, q is inactivated as constraint there.
+; Likewise, the length of an alternative that might be shifted to conditional
+; execution must reflect this, lest out-of-range branches are created.
+; The iscompact attribute allows the epilogue expander to know for which
+; insns it should lengthen the return insn.
 (define_insn "*movqi_insn"
-  [(set (match_operand:QI 0 "move_dest_operand" "=r,r,r,m")
-	(match_operand:QI 1 "move_src_operand" "rI,Ji,m,r"))]
-;; ??? Needed?
+  [(set (match_operand:QI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w,???w, w,Rcq,S,!*x,r,m,???m")
+	(match_operand:QI 1 "move_src_operand"   "cL,cP,Rcq#q,cL,I,?Rac,?i,T,Rcq,Usd,m,c,?Rac"))]
   "register_operand (operands[0], QImode)
    || register_operand (operands[1], QImode)"
   "@
+   mov%? %0,%1%&
+   mov%? %0,%1%&
+   mov%? %0,%1%&
+   mov%? %0,%1
    mov%? %0,%1
    mov%? %0,%1
+   mov%? %0,%S1
+   ldb%? %0,%1%&
+   stb%? %1,%0%&
+   ldb%? %0,%1%&
    ldb%U1%V1 %0,%1
+   stb%U0%V0 %1,%0
    stb%U0%V0 %1,%0"
-  [(set_attr "type" "move,move,load,store")])
-
-;; ??? This may never match since there's no cmpqi insn.
-
-(define_insn "*movqi_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (sign_extend:SI (match_operand:QI 1 "move_src_operand" "rIJi"))
-		       (const_int 0)))
-   (set (match_operand:QI 0 "move_dest_operand" "=r")
-	(match_dup 1))]
-  ""
-  "mov%?.f %0,%1"
-  [(set_attr "type" "move")
-   (set_attr "cond" "set_zn")])
+  [(set_attr "type" "move,move,move,move,move,move,move,load,store,load,load,store,store")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,true,true,true,false,false,false")
+   (set_attr "cond" "canuse,canuse_limm,canuse,canuse,canuse_limm,canuse,canuse,nocond,nocond,nocond,nocond,nocond,nocond")])
 
 (define_expand "movhi"
-  [(set (match_operand:HI 0 "general_operand" "")
+  [(set (match_operand:HI 0 "move_dest_operand" "")
 	(match_operand:HI 1 "general_operand" ""))]
   ""
-  "
-{
-  /* Everything except mem = const or mem = mem can be done easily.  */
-
-  if (GET_CODE (operands[0]) == MEM)
-    operands[1] = force_reg (HImode, operands[1]);
-}")
+  "if (prepare_move_operands (operands, HImode)) DONE;")
 
 (define_insn "*movhi_insn"
-  [(set (match_operand:HI 0 "move_dest_operand" "=r,r,r,m")
-	(match_operand:HI 1 "move_src_operand" "rI,Ji,m,r"))]
+  [(set (match_operand:HI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w,???w,Rcq#q,w,Rcq,S,r,m,???m,VUsc")
+	(match_operand:HI 1 "move_src_operand"   "cL,cP,Rcq#q,cL,I,?Rac,  ?i,?i,T,Rcq,m,c,?Rac,i"))]
   "register_operand (operands[0], HImode)
-   || register_operand (operands[1], HImode)"
+   || register_operand (operands[1], HImode)
+   || (CONSTANT_P (operands[1])
+       /* Don't use a LIMM that we could load with a single insn - we loose
+	  delay-slot filling opportunities.  */
+       && !satisfies_constraint_I (operands[1])
+       && satisfies_constraint_Usc (operands[0]))"
   "@
+   mov%? %0,%1%&
+   mov%? %0,%1%&
+   mov%? %0,%1%&
+   mov%? %0,%1
    mov%? %0,%1
    mov%? %0,%1
+   mov%? %0,%S1%&
+   mov%? %0,%S1
+   ldw%? %0,%1%&
+   stw%? %1,%0%&
    ldw%U1%V1 %0,%1
-   stw%U0%V0 %1,%0"
-  [(set_attr "type" "move,move,load,store")])
-
-;; ??? Will this ever match?
-
-(define_insn "*movhi_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (sign_extend:SI (match_operand:HI 1 "move_src_operand" "rIJi"))
-		       (const_int 0)))
-   (set (match_operand:HI 0 "move_dest_operand" "=r")
-	(match_dup 1))]
-;; ??? Needed?
-  "register_operand (operands[0], HImode)
-   || register_operand (operands[1], HImode)"
-  "mov%?.f %0,%1"
-  [(set_attr "type" "move")
-   (set_attr "cond" "set_zn")])
+   stw%U0%V0 %1,%0
+   stw%U0%V0 %1,%0
+   stw%U0%V0 %S1,%0"
+  [(set_attr "type" "move,move,move,move,move,move,move,move,load,store,load,store,store,store")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,maybe_limm,false,true,true,false,false,false,false")
+   (set_attr "cond" "canuse,canuse_limm,canuse,canuse,canuse_limm,canuse,canuse,canuse,nocond,nocond,nocond,nocond,nocond,nocond")])
 
 (define_expand "movsi"
-  [(set (match_operand:SI 0 "general_operand" "")
+  [(set (match_operand:SI 0 "move_dest_operand" "")
 	(match_operand:SI 1 "general_operand" ""))]
   ""
-  "
-{
-  /* Everything except mem = const or mem = mem can be done easily.  */
-
-  if (GET_CODE (operands[0]) == MEM)
-    operands[1] = force_reg (SImode, operands[1]);
-}")
-
+  "if (prepare_move_operands (operands, SImode)) DONE;")
+
+
+; In order to allow the ccfsm machinery to do its work, the leading compact
+; alternatives say 'canuse' - there is another alternative that will match
+; when the condition codes are used.
+; Rcq won't match if the condition is actually used; to avoid a spurious match
+; via q, q is inactivated as constraint there.
+; Likewise, the length of an alternative that might be shifted to conditional
+; execution must reflect this, lest out-of-range branches are created.
+; the iscompact attribute allows the epilogue expander to know for which
+; insns it should lengthen the return insn.
+; N.B. operand 1 of alternative 7 expands into pcl,symbol@gotpc .
 (define_insn "*movsi_insn"
-  [(set (match_operand:SI 0 "move_dest_operand" "=r,r,r,m")
-	(match_operand:SI 1 "move_src_operand" "rI,GJi,m,r"))]
+  [(set (match_operand:SI 0 "move_dest_operand" "=Rcq,Rcq#q,w, w,w,  w,???w, ?w,  w,Rcq#q, w,Rcq,  S,Us<,RcqRck,!*x,r,m,???m,VUsc")
+	(match_operand:SI 1 "move_src_operand"  " cL,cP,Rcq#q,cL,I,Crr,?Rac,Cpc,Clb,?Cal,?Cal,T,Rcq,RcqRck,Us>,Usd,m,c,?Rac,C32"))]
   "register_operand (operands[0], SImode)
-   || register_operand (operands[1], SImode)"
+   || register_operand (operands[1], SImode)
+   || (CONSTANT_P (operands[1])
+       /* Don't use a LIMM that we could load with a single insn - we loose
+	  delay-slot filling opportunities.  */
+       && !satisfies_constraint_I (operands[1])
+       && satisfies_constraint_Usc (operands[0]))"
   "@
+   mov%? %0,%1%&
+   mov%? %0,%1%&
+   mov%? %0,%1%&
    mov%? %0,%1
+   mov%? %0,%1
+   ror %0,((%1*2+1) & 0x3f)
+   mov%? %0,%1
+   add %0,%S1
+   * return arc_get_unalign () ? \"add %0,pcl,%1-.+2\" : \"add %0,pcl,%1-.\";
+   mov%? %0,%S1%&
    mov%? %0,%S1
+   ld%? %0,%1%&
+   st%? %1,%0%&
+   * return arc_short_long (insn, \"push%? %1%&\", \"st%U0 %1,%0%&\");
+   * return arc_short_long (insn, \"pop%? %0%&\",  \"ld%U1 %0,%1%&\");
+   ld%? %0,%1%&
    ld%U1%V1 %0,%1
-   st%U0%V0 %1,%0"
-  [(set_attr "type" "move,move,load,store")])
+   st%U0%V0 %1,%0
+   st%U0%V0 %1,%0
+   st%U0%V0 %S1,%0"
+  [(set_attr "type" "move,move,move,move,move,two_cycle_core,move,binary,binary,move,move,load,store,store,load,load,load,store,store,store")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,false,false,maybe_limm,false,true,true,true,true,true,false,false,false,false")
+   ; Use default length for iscompact to mark length varying.  But set length
+   ; of Crr to 4.
+   (set_attr "length" "*,*,*,4,4,4,4,8,8,*,8,*,*,*,*,*,*,*,*,8")
+   (set_attr "cond" "canuse,canuse_limm,canuse,canuse,canuse_limm,canuse_limm,canuse,nocond,nocond,canuse,canuse,nocond,nocond,nocond,nocond,nocond,nocond,nocond,nocond,nocond")])
+
+;; sometimes generated by the epilogue code.  We don't want to
+;; recognize these addresses in general, because the limm is costly,
+;; and we can't use them for stores.  */
+(define_insn "*movsi_pre_mod"
+  [(set (match_operand:SI 0 "register_operand" "=w")
+	(mem:SI (pre_modify
+		  (reg:SI SP_REG)
+		  (plus:SI (reg:SI SP_REG)
+			   (match_operand 1 "immediate_operand" "Cal")))))]
+  "reload_completed"
+  "ld.a %0,[sp,%1]"
+  [(set_attr "type" "load")
+   (set_attr "length" "8")])
+
+/* Store a value to directly to memory.  The location might also be cached.
+   Since the cached copy can cause a write-back at unpredictable times,
+   we first write cached, then we write uncached.  */
+(define_insn "store_direct"
+  [(set (match_operand:SI 0 "move_dest_operand" "=m")
+      (unspec:SI [(match_operand:SI 1 "register_operand" "c")]
+       UNSPEC_DIRECT))]
+  ""
+  "st%U0 %1,%0\;st%U0.di %1,%0"
+  [(set_attr "type" "store")])
 
 (define_insn "*movsi_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (match_operand:SI 1 "move_src_operand" "rIJi")
-		       (const_int 0)))
-   (set (match_operand:SI 0 "move_dest_operand" "=r")
+  [(set (match_operand:CC_ZN 2 "cc_set_register" "")
+	(match_operator 3 "zn_compare_operator"
+	  [(match_operand:SI 1 "nonmemory_operand" "cI,Cal") (const_int 0)]))
+   (set (match_operand:SI 0 "register_operand" "=w,w")
 	(match_dup 1))]
-  "register_operand (operands[0], SImode)
-   || register_operand (operands[1], SImode)"
+  ""
   "mov%?.f %0,%S1"
-  [(set_attr "type" "move")
+  [(set_attr "type" "compare")
+   (set_attr "cond" "set_zn")
+   (set_attr "length" "4,8")])
+
+(define_insn "unary_comparison"
+  [(set (match_operand:CC_ZN 0 "cc_set_register" "")
+	(match_operator 3 "zn_compare_operator"
+	  [(match_operator:SI 2 "unary_operator"
+	     [(match_operand:SI 1 "register_operand" "c")])
+	   (const_int 0)]))]
+  ""
+  "%O2.f 0,%1"
+  [(set_attr "type" "compare")
    (set_attr "cond" "set_zn")])
 
+
+; this pattern is needed by combiner for cases like if (c=(~b)) { ... }
+(define_insn "*unary_comparison_result_used"
+  [(set (match_operand 2 "cc_register" "")
+	(match_operator 4 "zn_compare_operator"
+          [(match_operator:SI 3 "unary_operator"
+	     [(match_operand:SI 1 "register_operand" "c")])
+	       (const_int 0)]))
+   (set (match_operand:SI 0 "register_operand" "=w")
+	(match_dup 3))]
+  ""
+  "%O3.f %0,%1"
+  [(set_attr "type" "compare")
+   (set_attr "cond" "set_zn")
+   (set_attr "length" "4")])
+
+(define_insn "*tst"
+  [(set
+     (match_operand 0 "cc_register" "")
+     (match_operator 3 "zn_compare_operator"
+       [(and:SI
+	  (match_operand:SI 1 "register_operand"  "%Rcq,Rcq, c,  c,  c,  c,  c")
+	  (match_operand:SI 2 "nonmemory_operand"  "Rcq,C0p,cI,C1p,Ccp,CnL,Cal"))
+	(const_int 0)]))]
+  "TARGET_ARCOMPACT
+   && ((register_operand (operands[1], SImode)
+       && nonmemory_operand (operands[2], SImode))
+      || (memory_operand (operands[1], SImode)
+	  && satisfies_constraint_Cux (operands[2])))"
+  "*
+    switch (which_alternative)
+    {
+    case 0: case 2: case 6:
+      return \"tst%? %1,%2\";
+    case 1:
+      return \"btst%? %1,%z2\";
+    case 3:
+      return \"bmsk%?.f 0,%1,%Z2%&\";
+    case 4:
+      return \"bclr%?.f 0,%1,%M2%&\";
+    case 5:
+      return \"bic%?.f 0,%1,%n2-1\";
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "iscompact" "maybe,maybe,false,false,false,false,false")
+   (set_attr "type" "compare")
+   (set_attr "length" "*,*,4,4,4,4,8")
+   (set_attr "cond" "set_zn")])
+
+(define_insn "*commutative_binary_comparison"
+  [(set (match_operand:CC_ZN 0 "cc_set_register" "")
+	(match_operator 5 "zn_compare_operator"
+	  [(match_operator:SI 4 "commutative_operator"
+	     [(match_operand:SI 1 "register_operand" "%c,c,c")
+	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
+	   (const_int 0)]))
+   (clobber (match_scratch:SI 3 "=X,1,X"))]
+  ""
+  "%O4.f 0,%1,%2"
+  [(set_attr "type" "compare")
+   (set_attr "cond" "set_zn")
+   (set_attr "length" "4,4,8")])
+
+; for flag setting 'add' instructions like if (a+b) { ...}
+; the combiner needs this pattern
+(define_insn "*addsi_compare"
+  [(set (reg:CC_ZN CC_REG)
+	(compare:CC_ZN (match_operand:SI 0 "register_operand" "c")
+		       (neg:SI (match_operand:SI 1 "register_operand" "c"))))]
+  ""
+  "add.f 0,%0,%1"
+  [(set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "4")])
+
+; for flag setting 'add' instructions like if (a+b < a) { ...}
+; the combiner needs this pattern
+(define_insn "addsi_compare_2"
+  [(set (reg:CC_C CC_REG)
+	(compare:CC_C (plus:SI (match_operand:SI 0 "register_operand" "c,c")
+			       (match_operand:SI 1 "nonmemory_operand" "cL,Cal"))
+		      (match_dup 0)))]
+  ""
+  "add.f 0,%0,%1"
+  [(set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "4,8")])
+
+(define_insn "*addsi_compare_3"
+  [(set (reg:CC_C CC_REG)
+	(compare:CC_C (plus:SI (match_operand:SI 0 "register_operand" "c")
+			       (match_operand:SI 1 "register_operand" "c"))
+		      (match_dup 1)))]
+  ""
+  "add.f 0,%0,%1"
+  [(set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "4")])
+
+; this pattern is needed by combiner for cases like if (c=a+b) { ... }
+(define_insn "*commutative_binary_comparison_result_used"
+  [(set (match_operand 3 "cc_register" "")
+	(match_operator 5 "zn_compare_operator"
+          [(match_operator:SI 4 "commutative_operator"
+	     [(match_operand:SI 1 "register_operand" "c,0,c")
+	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
+	   (const_int 0)]))
+   (set (match_operand:SI 0 "register_operand" "=w,w,w")
+	(match_dup 4))]
+  ""
+  "%O4.f %0,%1,%2"
+  [(set_attr "type" "compare,compare,compare")
+   (set_attr "cond" "set_zn,set_zn,set_zn")
+   (set_attr "length" "4,4,8")])
+
+; this pattern is needed by combiner for cases like if (c=a<<b) { ... }
+(define_insn "*noncommutative_binary_comparison_result_used"
+  [(set (match_operand 3 "cc_register" "")
+	(match_operator 5 "zn_compare_operator"
+          [(match_operator:SI 4 "noncommutative_operator"
+	     [(match_operand:SI 1 "register_operand" "c,0,c")
+	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
+	       (const_int 0)]))
+   (set (match_operand:SI 0 "register_operand" "=w,w,w")
+	(match_dup 4 ))]
+  ""
+  "%O4.f %0,%1,%2"
+  [(set_attr "type" "compare,compare,compare")
+   (set_attr "cond" "set_zn,set_zn,set_zn")
+   (set_attr "length" "4,4,8")])
+
+(define_insn "*noncommutative_binary_comparison"
+  [(set (match_operand:CC_ZN 0 "cc_set_register" "")
+	(match_operator 5 "zn_compare_operator"
+	  [(match_operator:SI 4 "noncommutative_operator"
+	     [(match_operand:SI 1 "register_operand" "c,c,c")
+	      (match_operand:SI 2 "nonmemory_operand" "cL,I,?Cal")])
+	   (const_int 0)]))
+   (clobber (match_scratch:SI 3 "=X,1,X"))]
+  ""
+  "%O4.f 0,%1,%2"
+  [(set_attr "type" "compare")
+   (set_attr "cond" "set_zn")
+   (set_attr "length" "4,4,8")])
+
 (define_expand "movdi"
-  [(set (match_operand:DI 0 "general_operand" "")
+  [(set (match_operand:DI 0 "move_dest_operand" "")
 	(match_operand:DI 1 "general_operand" ""))]
   ""
   "
@@ -266,25 +957,26 @@
     operands[1] = force_reg (DImode, operands[1]);
 }")
 
-(define_insn "*movdi_insn"
-  [(set (match_operand:DI 0 "move_dest_operand" "=r,r,r,m")
-	(match_operand:DI 1 "move_double_src_operand" "r,HK,m,r"))]
+(define_insn_and_split "*movdi_insn"
+  [(set (match_operand:DI 0 "move_dest_operand" "=w,w,r,m")
+	(match_operand:DI 1 "move_double_src_operand" "c,HJi,m,c"))]
   "register_operand (operands[0], DImode)
    || register_operand (operands[1], DImode)"
   "*
 {
   switch (which_alternative)
     {
+    default:
     case 0 :
       /* We normally copy the low-numbered register first.  However, if
 	 the first register operand 0 is the same as the second register of
 	 operand 1, we must copy in the opposite order.  */
       if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
-	return \"mov %R0,%R1\;mov %0,%1\";
+	return \"mov%? %R0,%R1\;mov%? %0,%1\";
       else
-	return \"mov %0,%1\;mov %R0,%R1\";
+      return \"mov%? %0,%1\;mov%? %R0,%R1\";
     case 1 :
-      return \"mov %0,%L1\;mov %R0,%H1\";
+      return \"mov%? %L0,%L1\;mov%? %H0,%H1\";
     case 2 :
       /* If the low-address word is used in the address, we must load it
 	 last.  Otherwise, load it first.  Note that we cannot have
@@ -292,53 +984,47 @@
 	 dead.  */
       if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
 			     operands [1], 0))
-	  return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
-      else
-	  return \"ld%V1 %0,%1\;ld%V1 %R0,%R1\";
+	return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
+      else switch (GET_CODE (XEXP(operands[1], 0)))
+	{
+	case POST_MODIFY: case POST_INC: case POST_DEC:
+	  return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\";
+	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
+	  return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\";
+	default:
+	  return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\";
+	}
     case 3 :
-      return \"st%V0 %1,%0\;st%V0 %R1,%R0\";
-    default:
-      gcc_unreachable ();
+      switch (GET_CODE (XEXP(operands[0], 0)))
+	{
+	case POST_MODIFY: case POST_INC: case POST_DEC:
+     	  return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\";
+	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
+     	  return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\";
+	default:
+     	  return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\";
+	}
     }
 }"
+  "&& reload_completed && optimize"
+  [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
+  "arc_split_move (operands);"
   [(set_attr "type" "move,move,load,store")
    ;; ??? The ld/st values could be 4 if it's [reg,bignum].
-   (set_attr "length" "2,4,2,2")])
+   (set_attr "length" "8,16,16,16")])
+
 
-;(define_expand "movdi"
-;  [(set (match_operand:DI 0 "general_operand" "")
-;	(match_operand:DI 1 "general_operand" ""))]
-;  ""
-;  "
-;{
-;  /* Flow doesn't understand that this is effectively a DFmode move.
-;     It doesn't know that all of `operands[0]' is set.  */
-;  emit_clobber (operands[0]);
-;
-;  /* Emit insns that movsi_insn can handle.  */
-;  emit_insn (gen_movsi (operand_subword (operands[0], 0, 0, DImode),
-;			operand_subword (operands[1], 0, 0, DImode)));
-;  emit_insn (gen_movsi (operand_subword (operands[0], 1, 0, DImode),
-;			operand_subword (operands[1], 1, 0, DImode)));
-;  DONE;
-;}")
-
 ;; Floating point move insns.
 
 (define_expand "movsf"
   [(set (match_operand:SF 0 "general_operand" "")
 	(match_operand:SF 1 "general_operand" ""))]
   ""
-  "
-{
-  /* Everything except mem = const or mem = mem can be done easily.  */
-  if (GET_CODE (operands[0]) == MEM)
-    operands[1] = force_reg (SFmode, operands[1]);
-}")
+  "if (prepare_move_operands (operands, SFmode)) DONE;")
 
 (define_insn "*movsf_insn"
-  [(set (match_operand:SF 0 "move_dest_operand" "=r,r,r,m")
-	(match_operand:SF 1 "move_src_operand" "r,E,m,r"))]
+  [(set (match_operand:SF 0 "move_dest_operand" "=w,w,r,m")
+	(match_operand:SF 1 "move_src_operand" "c,E,m,c"))]
   "register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode)"
   "@
@@ -352,22 +1038,18 @@
   [(set (match_operand:DF 0 "general_operand" "")
 	(match_operand:DF 1 "general_operand" ""))]
   ""
-  "
-{
-  /* Everything except mem = const or mem = mem can be done easily.  */
-  if (GET_CODE (operands[0]) == MEM)
-    operands[1] = force_reg (DFmode, operands[1]);
-}")
+  "if (prepare_move_operands (operands, DFmode)) DONE;")
 
-(define_insn "*movdf_insn"
-  [(set (match_operand:DF 0 "move_dest_operand" "=r,r,r,m")
-	(match_operand:DF 1 "move_double_src_operand" "r,E,m,r"))]
+(define_insn_and_split "*movdf_insn"
+  [(set (match_operand:DF 0 "move_dest_operand"      "=w,w,r,m,D,r")
+	(match_operand:DF 1 "move_double_src_operand" "c,E,m,c,r,D"))]
   "register_operand (operands[0], DFmode)
    || register_operand (operands[1], DFmode)"
   "*
 {
   switch (which_alternative)
     {
+    default:
     case 0 :
       /* We normally copy the low-numbered register first.  However, if
 	 the first register operand 0 is the same as the second register of
@@ -375,9 +1057,9 @@
       if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
 	return \"mov %R0,%R1\;mov %0,%1\";
       else
-	return \"mov %0,%1\;mov %R0,%R1\";
+      return \"mov%? %0,%1\;mov%? %R0,%R1\";
     case 1 :
-      return \"mov %0,%L1\;mov %R0,%H1 ; %A1\";
+      return \"mov%? %L0,%L1\;mov%? %H0,%H1 ; %A1\";
     case 2 :
       /* If the low-address word is used in the address, we must load it
 	 last.  Otherwise, load it first.  Note that we cannot have
@@ -386,36 +1068,47 @@
       if (refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
 			     operands [1], 0))
 	return \"ld%V1 %R0,%R1\;ld%V1 %0,%1\";
-      else
-	return \"ld%V1 %0,%1\;ld%V1 %R0,%R1\";
+      else switch (GET_CODE (XEXP(operands[1], 0)))
+	{
+	case POST_MODIFY: case POST_INC: case POST_DEC:
+	  return \"ld%V1 %R0,%R1\;ld%U1%V1 %0,%1\";
+	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
+	  return \"ld%U1%V1 %0,%1\;ld%V1 %R0,%R1\";
+	default:
+	  return \"ld%U1%V1 %0,%1\;ld%U1%V1 %R0,%R1\";
+	}
     case 3 :
-      return \"st%V0 %1,%0\;st%V0 %R1,%R0\";
-    default:
-      gcc_unreachable ();
+      switch (GET_CODE (XEXP(operands[0], 0)))
+	{
+	case POST_MODIFY: case POST_INC: case POST_DEC:
+     	  return \"st%V0 %R1,%R0\;st%U0%V0 %1,%0\";
+	case PRE_MODIFY: case PRE_INC: case PRE_DEC:
+     	  return \"st%U0%V0 %1,%0\;st%V0 %R1,%R0\";
+	default:
+     	  return \"st%U0%V0 %1,%0\;st%U0%V0 %R1,%R0\";
+	}
+    case 4:
+        if (!TARGET_DPFP)
+          {
+            fatal_error (\"DPFP register allocated without -mdpfp\\n\");
+          }
+        return \"dexcl%F0 0, %H1, %L1\";
+    case 5:
+        return \"lr %H0,[%H1h]\;lr %L0,[%H1l] ; double reg moves\";
+
     }
 }"
-  [(set_attr "type" "move,move,load,store")
-   ;; ??? The ld/st values could be 4 if it's [reg,bignum].
-   (set_attr "length" "2,4,2,2")])
+  "&& reload_completed && optimize
+   /* Don't touch the DOUBLE_REGS alternatives.  */
+   && !refers_to_regno_p (40, 44, operands[0], 0)
+   && !refers_to_regno_p (40, 44, operands[1], 0)"
+  [(set (match_dup 2) (match_dup 3)) (set (match_dup 4) (match_dup 5))]
+  "arc_split_move (operands);"
+  [(set_attr "type" "move,move,load,store, move,lr")
+   (set_attr "cond" "canuse,canuse,nocond,nocond,nocond,nocond")
+   ;; ??? The ld/st values could be 16 if it's [reg,bignum].
+   (set_attr "length" "8,16,16,16,4,16")])
 
-;(define_expand "movdf"
-;  [(set (match_operand:DF 0 "general_operand" "")
-;	(match_operand:DF 1 "general_operand" ""))]
-;  ""
-;  "
-;{
-;  /* Flow doesn't understand that this is effectively a DFmode move.
-;     It doesn't know that all of `operands[0]' is set.  */
-;  emit_clobber (operands[0]);
-;
-;  /* Emit insns that movsi_insn can handle.  */
-;  emit_insn (gen_movsi (operand_subword (operands[0], 0, 0, DFmode),
-;			operand_subword (operands[1], 0, 0, DFmode)));
-;  emit_insn (gen_movsi (operand_subword (operands[0], 1, 0, DFmode),
-;			operand_subword (operands[1], 1, 0, DFmode)));
-;  DONE;
-;}")
-
 ;; Load/Store with update instructions.
 ;;
 ;; Some of these we can get by using pre-decrement or pre-increment, but the
@@ -430,733 +1123,2338 @@
 ;; We use match_operator here because we need to know whether the memory
 ;; object is volatile or not.
 
+
+;; Note: loadqi_update has no 16-bit variant
 (define_insn "*loadqi_update"
-  [(set (match_operand:QI 3 "register_operand" "=r,r")
+  [(set (match_operand:QI 3 "dest_reg_operand" "=r,r")
 	(match_operator:QI 4 "load_update_operand"
 	 [(match_operand:SI 1 "register_operand" "0,0")
-	  (match_operand:SI 2 "nonmemory_operand" "rI,J")]))
-   (set (match_operand:SI 0 "register_operand" "=r,r")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r,r")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ldb.a%V4 %3,[%0,%2]"
+  "ldb.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
-   (set_attr "length" "1,2")])
+   (set_attr "length" "4,8")])
 
 (define_insn "*load_zeroextendqisi_update"
-  [(set (match_operand:SI 3 "register_operand" "=r,r")
+  [(set (match_operand:SI 3 "dest_reg_operand" "=r,r")
 	(zero_extend:SI (match_operator:QI 4 "load_update_operand"
 			 [(match_operand:SI 1 "register_operand" "0,0")
-			  (match_operand:SI 2 "nonmemory_operand" "rI,J")])))
-   (set (match_operand:SI 0 "register_operand" "=r,r")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r,r")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ldb.a%V4 %3,[%0,%2]"
+  "ldb.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
-   (set_attr "length" "1,2")])
+   (set_attr "length" "4,8")])
 
 (define_insn "*load_signextendqisi_update"
-  [(set (match_operand:SI 3 "register_operand" "=r,r")
+  [(set (match_operand:SI 3 "dest_reg_operand" "=r,r")
 	(sign_extend:SI (match_operator:QI 4 "load_update_operand"
 			 [(match_operand:SI 1 "register_operand" "0,0")
-			  (match_operand:SI 2 "nonmemory_operand" "rI,J")])))
-   (set (match_operand:SI 0 "register_operand" "=r,r")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r,r")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ldb.x.a%V4 %3,[%0,%2]"
+  "ldb.x.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
-   (set_attr "length" "1,2")])
+   (set_attr "length" "4,8")])
 
 (define_insn "*storeqi_update"
   [(set (match_operator:QI 4 "store_update_operand"
 	 [(match_operand:SI 1 "register_operand" "0")
 	  (match_operand:SI 2 "short_immediate_operand" "I")])
-	(match_operand:QI 3 "register_operand" "r"))
-   (set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:QI 3 "register_operand" "c"))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
   "stb.a%V4 %3,[%0,%2]"
   [(set_attr "type" "store")
-   (set_attr "length" "1")])
+   (set_attr "length" "4")])
 
+;; ??? pattern may have to be re-written
+;; Note: no 16-bit variant for this pattern
 (define_insn "*loadhi_update"
-  [(set (match_operand:HI 3 "register_operand" "=r,r")
+  [(set (match_operand:HI 3 "dest_reg_operand" "=r,r")
 	(match_operator:HI 4 "load_update_operand"
 	 [(match_operand:SI 1 "register_operand" "0,0")
-	  (match_operand:SI 2 "nonmemory_operand" "rI,J")]))
-   (set (match_operand:SI 0 "register_operand" "=r,r")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ldw.a%V4 %3,[%0,%2]"
+  "ldw.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
-   (set_attr "length" "1,2")])
+   (set_attr "length" "4,8")])
 
 (define_insn "*load_zeroextendhisi_update"
-  [(set (match_operand:SI 3 "register_operand" "=r,r")
+  [(set (match_operand:SI 3 "dest_reg_operand" "=r,r")
 	(zero_extend:SI (match_operator:HI 4 "load_update_operand"
 			 [(match_operand:SI 1 "register_operand" "0,0")
-			  (match_operand:SI 2 "nonmemory_operand" "rI,J")])))
-   (set (match_operand:SI 0 "register_operand" "=r,r")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])))
+   (set (match_operand:SI 0 "dest_reg_operand" "=r,r")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ldw.a%V4 %3,[%0,%2]"
+  "ldw.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
-   (set_attr "length" "1,2")])
+   (set_attr "length" "4,8")])
 
+;; Note: no 16-bit variant for this instruction
 (define_insn "*load_signextendhisi_update"
-  [(set (match_operand:SI 3 "register_operand" "=r,r")
+  [(set (match_operand:SI 3 "dest_reg_operand" "=r,r")
 	(sign_extend:SI (match_operator:HI 4 "load_update_operand"
 			 [(match_operand:SI 1 "register_operand" "0,0")
-			  (match_operand:SI 2 "nonmemory_operand" "rI,J")])))
-   (set (match_operand:SI 0 "register_operand" "=r,r")
+			  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")])))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ldw.x.a%V4 %3,[%0,%2]"
+  "ldw.x.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
-   (set_attr "length" "1,2")])
+   (set_attr "length" "4,8")])
 
 (define_insn "*storehi_update"
   [(set (match_operator:HI 4 "store_update_operand"
 	 [(match_operand:SI 1 "register_operand" "0")
 	  (match_operand:SI 2 "short_immediate_operand" "I")])
-	(match_operand:HI 3 "register_operand" "r"))
-   (set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:HI 3 "register_operand" "c"))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
   "stw.a%V4 %3,[%0,%2]"
   [(set_attr "type" "store")
-   (set_attr "length" "1")])
+   (set_attr "length" "4")])
 
+;; No 16-bit variant for this instruction pattern
 (define_insn "*loadsi_update"
-  [(set (match_operand:SI 3 "register_operand" "=r,r")
+  [(set (match_operand:SI 3 "dest_reg_operand" "=r,r")
 	(match_operator:SI 4 "load_update_operand"
 	 [(match_operand:SI 1 "register_operand" "0,0")
-	  (match_operand:SI 2 "nonmemory_operand" "rI,J")]))
-   (set (match_operand:SI 0 "register_operand" "=r,r")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ld.a%V4 %3,[%0,%2]"
+  "ld.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
-   (set_attr "length" "1,2")])
+   (set_attr "length" "4,8")])
 
 (define_insn "*storesi_update"
   [(set (match_operator:SI 4 "store_update_operand"
 	 [(match_operand:SI 1 "register_operand" "0")
 	  (match_operand:SI 2 "short_immediate_operand" "I")])
-	(match_operand:SI 3 "register_operand" "r"))
-   (set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:SI 3 "register_operand" "c"))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
   "st.a%V4 %3,[%0,%2]"
   [(set_attr "type" "store")
-   (set_attr "length" "1")])
+   (set_attr "length" "4")])
 
 (define_insn "*loadsf_update"
-  [(set (match_operand:SF 3 "register_operand" "=r,r")
+  [(set (match_operand:SF 3 "dest_reg_operand" "=r,r")
 	(match_operator:SF 4 "load_update_operand"
 	 [(match_operand:SI 1 "register_operand" "0,0")
-	  (match_operand:SI 2 "nonmemory_operand" "rI,J")]))
-   (set (match_operand:SI 0 "register_operand" "=r,r")
+	  (match_operand:SI 2 "nonmemory_operand" "rI,Cal")]))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "ld.a%V4 %3,[%0,%2]"
+  "ld.a%V4 %3,[%0,%S2]"
   [(set_attr "type" "load,load")
-   (set_attr "length" "1,2")])
+   (set_attr "length" "4,8")])
 
 (define_insn "*storesf_update"
   [(set (match_operator:SF 4 "store_update_operand"
 	 [(match_operand:SI 1 "register_operand" "0")
 	  (match_operand:SI 2 "short_immediate_operand" "I")])
-	(match_operand:SF 3 "register_operand" "r"))
-   (set (match_operand:SI 0 "register_operand" "=r")
+	(match_operand:SF 3 "register_operand" "c"))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w")
 	(plus:SI (match_dup 1) (match_dup 2)))]
   ""
   "st.a%V4 %3,[%0,%2]"
   [(set_attr "type" "store")
-   (set_attr "length" "1")])
-
+   (set_attr "length" "4")])
+
 ;; Conditional move instructions.
 
 (define_expand "movsicc"
-  [(set (match_operand:SI 0 "register_operand" "")
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
 	(if_then_else:SI (match_operand 1 "comparison_operator" "")
-			 (match_operand:SI 2 "nonmemory_operand" "")
-			 (match_operand:SI 3 "register_operand" "")))]
+		         (match_operand:SI 2 "nonmemory_operand" "")
+ 		         (match_operand:SI 3 "register_operand" "")))]
   ""
   "
 {
   enum rtx_code code = GET_CODE (operands[1]);
-  rtx ccreg
-    = gen_rtx_REG (SELECT_CC_MODE (code, arc_compare_op0, arc_compare_op1),
-		   61);
 
-  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  operands[1] = gen_compare_reg (code, VOIDmode);
+}")
+
+
+(define_expand "movdicc"
+  [(set (match_operand:DI 0 "dest_reg_operand" "")
+	(if_then_else:DI(match_operand 1 "comparison_operator" "")
+		        (match_operand:DI 2 "nonmemory_operand" "")
+		        (match_operand:DI 3 "register_operand" "")))]
+  ""
+  "
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+
+  operands[1] = gen_compare_reg (code, VOIDmode);
 }")
 
-;(define_expand "movdicc"
-;  [(set (match_operand:DI 0 "register_operand" "")
-;	(if_then_else:DI (match_operand 1 "comparison_operator" "")
-;			 (match_operand:DI 2 "nonmemory_operand" "")
-;			 (match_operand:DI 3 "register_operand" "")))]
-;  "0 /* ??? this would work better if we had cmpdi */"
-;  "
-;{
-;  enum rtx_code code = GET_CODE (operands[1]);
-;  rtx ccreg
-;   = gen_rtx_REG (SELECT_CC_MODE (code, arc_compare_op0, arc_compare_op1),
-;		   61);
-;
-;  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
-;}")
 
 (define_expand "movsfcc"
-  [(set (match_operand:SF 0 "register_operand" "")
+  [(set (match_operand:SF 0 "dest_reg_operand" "")
 	(if_then_else:SF (match_operand 1 "comparison_operator" "")
-			 (match_operand:SF 2 "nonmemory_operand" "")
-			 (match_operand:SF 3 "register_operand" "")))]
+		      (match_operand:SF 2 "nonmemory_operand" "")
+		      (match_operand:SF 3 "register_operand" "")))]
   ""
   "
 {
   enum rtx_code code = GET_CODE (operands[1]);
-  rtx ccreg
-    = gen_rtx_REG (SELECT_CC_MODE (code, arc_compare_op0, arc_compare_op1),
-		   61);
 
-  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+  operands[1] = gen_compare_reg (code, VOIDmode);
 }")
 
-;(define_expand "movdfcc"
-;  [(set (match_operand:DF 0 "register_operand" "")
-;	(if_then_else:DF (match_operand 1 "comparison_operator" "")
-;			 (match_operand:DF 2 "nonmemory_operand" "")
-;			 (match_operand:DF 3 "register_operand" "")))]
-;  "0 /* ??? can generate less efficient code if constants involved */"
-;  "
-;{
-; enum rtx_code code = GET_CODE (operands[1]);
-; rtx ccreg
-;   = gen_rtx_REG (SELECT_CC_MODE (code, arc_compare_op0, arc_compare_op1),
-;		   61);
-;
-;  operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
-;}")
+(define_expand "movdfcc"
+  [(set (match_operand:DF 0 "dest_reg_operand" "")
+	(if_then_else:DF (match_operand 1 "comparison_operator" "")
+		      (match_operand:DF 2 "nonmemory_operand" "")
+		      (match_operand:DF 3 "register_operand" "")))]
+  ""
+  "
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+
+  operands[1] = gen_compare_reg (code, VOIDmode);
+}")
 
 (define_insn "*movsicc_insn"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(if_then_else:SI (match_operand 1 "comparison_operator" "")
-			 (match_operand:SI 2 "nonmemory_operand" "rJi")
-			 (match_operand:SI 3 "register_operand" "0")))]
-  ""
-  "mov.%d1 %0,%S2"
-  [(set_attr "type" "cmove")])
-
-; ??? This doesn't properly handle constants.
-;(define_insn "*movdicc_insn"
-;  [(set (match_operand:DI 0 "register_operand" "=r,r")
-;	(if_then_else:DI (match_operand 1 "comparison_operator" "")
-;			 (match_operand:DI 2 "nonmemory_operand" "r,Ji")
-;			 (match_operand:DI 3 "register_operand" "0,0")))]
-;  "0"
-;  "*
-;{
-;  switch (which_alternative)
-;    {
-;    case 0 :
-;      /* We normally copy the low-numbered register first.  However, if
-;	 the first register operand 0 is the same as the second register of
-;	 operand 1, we must copy in the opposite order.  */
-;      if (REGNO (operands[0]) == REGNO (operands[2]) + 1)
-;	return \"mov.%d1 %R0,%R2\;mov.%d1 %0,%2\";
-;      else
-;	return \"mov.%d1 %0,%2\;mov.%d1 %R0,%R2\";
-;    case 1 :
-;      return \"mov.%d1 %0,%2\;mov.%d1 %R0,%R2\";
-;    }
-;}"
-;  [(set_attr "type" "cmove,cmove")
-;   (set_attr "length" "2,4")])
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+  	(if_then_else:SI (match_operator 3 "proper_comparison_operator"
+  		       [(match_operand 4 "cc_register" "") (const_int 0)])
+  		      (match_operand:SI 1 "nonmemory_operand" "cL,Cal")
+  		      (match_operand:SI 2 "register_operand" "0,0")))]
+  ""
+{
+  if (rtx_equal_p (operands[1], const0_rtx) && GET_CODE (operands[3]) == NE
+      && satisfies_constraint_Rcq (operands[0]))
+    return "sub%?.ne %0,%0,%0";
+  /* ??? might be good for speed on ARC600 too, *if* properly scheduled.  */
+  if ((TARGET_ARC700 || optimize_size)
+      && rtx_equal_p (operands[1], constm1_rtx)
+      && GET_CODE (operands[3]) == LTU)
+    return "sbc.cs %0,%0,%0";
+  return "mov.%d3 %0,%S1";
+}
+  [(set_attr "type" "cmove,cmove")
+   (set_attr "length" "4,8")])
+
+; Try to generate more short moves, and/or less limms, by substituting a
+; conditional move with a conditional sub.
+(define_peephole2
+  [(set (match_operand:SI 0 "compact_register_operand")
+	(match_operand:SI 1 "const_int_operand"))
+   (set (match_dup 0)
+  	(if_then_else:SI (match_operator 3 "proper_comparison_operator"
+			   [(match_operand 4 "cc_register" "") (const_int 0)])
+			    (match_operand:SI 2 "const_int_operand" "")
+  		      (match_dup 0)))]
+  "!satisfies_constraint_P (operands[1])
+   && satisfies_constraint_P (operands[2])
+   && UNSIGNED_INT6 (INTVAL (operands[2]) - INTVAL (operands[1]))"
+  [(set (match_dup 0) (match_dup 2))
+   (cond_exec
+     (match_dup 3)
+     (set (match_dup 0)
+	  (plus:SI (match_dup 0) (match_dup 1))))]
+  "operands[3] = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (operands[3]),
+						    GET_MODE (operands[4])),
+				 VOIDmode, operands[4], const0_rtx);
+   operands[1] = GEN_INT (INTVAL (operands[1]) - INTVAL (operands[2]));")
+
+(define_insn "*movdicc_insn"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w")
+	(if_then_else:DI (match_operator 3 "proper_comparison_operator"
+			[(match_operand 4 "cc_register" "") (const_int 0)])
+		      (match_operand:DI 1 "nonmemory_operand" "c,Ji")
+		      (match_operand:DI 2 "register_operand" "0,0")))]
+   ""
+   "*
+{
+   switch (which_alternative)
+     {
+     default:
+     case 0 :
+       /* We normally copy the low-numbered register first.  However, if
+ 	 the first register operand 0 is the same as the second register of
+ 	 operand 1, we must copy in the opposite order.  */
+       if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
+ 	return \"mov.%d3 %R0,%R1\;mov.%d3 %0,%1\";
+       else
+ 	return \"mov.%d3 %0,%1\;mov.%d3 %R0,%R1\";
+     case 1 :
+	return \"mov.%d3 %L0,%L1\;mov.%d3 %H0,%H1\";
+
+
+     }
+}"
+  [(set_attr "type" "cmove,cmove")
+   (set_attr "length" "8,16")])
+
 
 (define_insn "*movsfcc_insn"
-  [(set (match_operand:SF 0 "register_operand" "=r,r")
-	(if_then_else:SF (match_operand 1 "comparison_operator" "")
-			 (match_operand:SF 2 "nonmemory_operand" "r,E")
-			 (match_operand:SF 3 "register_operand" "0,0")))]
+  [(set (match_operand:SF 0 "dest_reg_operand" "=w,w")
+	(if_then_else:SF (match_operator 3 "proper_comparison_operator"
+		       [(match_operand 4 "cc_register" "") (const_int 0)])
+		      (match_operand:SF 1 "nonmemory_operand" "c,E")
+		      (match_operand:SF 2 "register_operand" "0,0")))]
   ""
   "@
-   mov.%d1 %0,%2
-   mov.%d1 %0,%2 ; %A2"
+   mov.%d3 %0,%1
+   mov.%d3 %0,%1 ; %A1"
   [(set_attr "type" "cmove,cmove")])
 
-;(define_insn "*movdfcc_insn"
-;  [(set (match_operand:DF 0 "register_operand" "=r,r")
-;	(if_then_else:DF (match_operand 1 "comparison_operator" "")
-;			 (match_operand:DF 2 "nonmemory_operand" "r,E")
-;			 (match_operand:DF 3 "register_operand" "0,0")))]
-;  "0"
-;  "*
-;{
-;  switch (which_alternative)
-;    {
-;    case 0 :
-;      /* We normally copy the low-numbered register first.  However, if
-;	 the first register operand 0 is the same as the second register of
-;	 operand 1, we must copy in the opposite order.  */
-;      if (REGNO (operands[0]) == REGNO (operands[2]) + 1)
-;	return \"mov.%d1 %R0,%R2\;mov.%d1 %0,%2\";
-;      else
-;	return \"mov.%d1 %0,%2\;mov.%d1 %R0,%R2\";
-;    case 1 :
-;      return \"mov.%d1 %0,%L2\;mov.%d1 %R0,%H2 ; %A2\";
-;    }
-;}"
-;  [(set_attr "type" "cmove,cmove")
-;   (set_attr "length" "2,4")])
-
+(define_insn "*movdfcc_insn"
+  [(set (match_operand:DF 0 "dest_reg_operand" "=w,w")
+	(if_then_else:DF (match_operator 1 "proper_comparison_operator"
+		 [(match_operand 4 "cc_register" "") (const_int 0)])
+		      (match_operand:DF 2 "nonmemory_operand" "c,E")
+		      (match_operand:DF 3 "register_operand" "0,0")))]
+  ""
+  "*
+{
+  switch (which_alternative)
+    {
+    default:
+    case 0 :
+      /* We normally copy the low-numbered register first.  However, if
+	 the first register operand 0 is the same as the second register of
+	 operand 1, we must copy in the opposite order.  */
+      if (REGNO (operands[0]) == REGNO (operands[2]) + 1)
+	return \"mov.%d1 %R0,%R2\;mov.%d1 %0,%2\";
+      else
+	return \"mov.%d1 %0,%2\;mov.%d1 %R0,%R2\";
+    case 1 :
+	      return \"mov.%d1 %L0,%L2\;mov.%d1 %H0,%H2; %A2 \";
+
+    }
+}"
+  [(set_attr "type" "cmove,cmove")
+   (set_attr "length" "8,16")])
+
+
+;; TODO - Support push_s and pop_s insns
+;; PUSH/POP instruction
+;(define_insn "*pushsi"
+;  [(set (mem:SI (pre_dec:SI (reg:SI 28)))
+;        (match_operand:SI 0 "register_operand" "q"))]
+;  "TARGET_MIXED_CODE"
+;  "push_s %0"
+;  [(set_attr "type" "push")
+;   (set_attr "iscompact" "true")
+;   (set_attr "length" "2")])
+;
+;(define_insn "*popsi"
+;  [(set (match_operand:SI 0 "register_operand" "=q")
+;        (mem:SI (post_inc:SI (reg:SI 28))))]
+;  "TARGET_MIXED_CODE"
+;  "pop_s %0"
+;  [(set_attr "type" "pop")
+;   (set_attr "iscompact" "true")
+;   (set_attr "length" "2")])
+
 ;; Zero extension instructions.
 ;; ??? We don't support volatile memrefs here, but I'm not sure why.
 
-(define_insn "zero_extendqihi2"
-  [(set (match_operand:HI 0 "register_operand" "=r,r")
-	(zero_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+(define_insn "*zero_extendqihi2_a4"
+  [(set (match_operand:HI 0 "dest_reg_operand" "=r")
+	(zero_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_A4"
+  "extb%? %0,%1"
+  [(set_attr "type" "unary")])
+
+(define_insn "*zero_extendqihi2_i"
+  [(set (match_operand:HI 0 "dest_reg_operand" "=Rcq,Rcq#q,Rcw,w,r")
+	(zero_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "0,Rcq#q,0,c,m")))]
   ""
   "@
-   extb%? %0,%1
+   extb%? %0,%1%&
+   extb%? %0,%1%&
+   bmsk%? %0,%1,7
+   extb %0,%1
    ldb%U1 %0,%1"
-  [(set_attr "type" "unary,load")])
+  [(set_attr "type" "unary,unary,unary,unary,load")
+   (set_attr "iscompact" "maybe,true,false,false,false")
+   (set_attr "cond" "canuse,nocond,canuse,nocond,nocond")])
 
-(define_insn "*zero_extendqihi2_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (zero_extend:SI (match_operand:QI 1 "register_operand" "r"))
-		       (const_int 0)))
-   (set (match_operand:HI 0 "register_operand" "=r")
-	(zero_extend:HI (match_dup 1)))]
+(define_expand "zero_extendqihi2"
+  [(set (match_operand:HI 0 "dest_reg_operand" "")
+	(zero_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "")))]
   ""
-  "extb%?.f %0,%1"
-  [(set_attr "type" "unary")
-   (set_attr "cond" "set_zn")])
-
-(define_insn "zero_extendqisi2"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
+  "if (prepare_extend_operands (operands, ZERO_EXTEND, HImode)) DONE;"
+)
+
+;; (define_insn "zero_extendqihi2"
+;;   [(set (match_operand:HI 0 "register_operand" "=r,r")
+;; 	(zero_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+;;   ""
+;;   "@
+;;    extb %0,%1
+;;    ldb%U1 %0,%1"
+;;   [(set_attr "type" "unary,load")
+;;    (set_attr "cond" "nocond,nocond")])
+
+(define_insn "*zero_extendqisi2_a4"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
+;	(zero_extend:SI (match_operand:QI 1 "register_operand" "r")))]
 	(zero_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
-  ""
+  "TARGET_A4"
   "@
    extb%? %0,%1
    ldb%U1 %0,%1"
   [(set_attr "type" "unary,load")])
 
-(define_insn "*zero_extendqisi2_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (zero_extend:SI (match_operand:QI 1 "register_operand" "r"))
-		       (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(zero_extend:SI (match_dup 1)))]
+(define_insn "*zero_extendqisi2_ac"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,Rcq#q,Rcw,w,qRcq,!*x,r")
+	(zero_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "0,Rcq#q,0,c,T,Usd,m")))]
+  "TARGET_ARCOMPACT"
+  "*
+   switch (which_alternative)
+   {
+    case 0: case 1:
+      return \"extb%? %0,%1%&\";
+    case 2:
+      return \"bmsk%? %0,%1,7\";
+    case 3:
+      return \"extb %0,%1\";
+    case 4: case 5:
+      return \"ldb%? %0,%1%&\";
+    case 6:
+      return \"ldb%U1 %0,%1\";
+    default:
+      gcc_unreachable ();
+   }"
+  [(set_attr "type" "unary,unary,unary,unary,load,load,load")
+   (set_attr "iscompact" "maybe,true,false,false,true,true,false")
+   (set_attr "cond" "canuse,nocond,canuse,nocond,nocond,nocond,nocond")])
+
+(define_expand "zero_extendqisi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(zero_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "")))]
   ""
-  "extb%?.f %0,%1"
-  [(set_attr "type" "unary")
-   (set_attr "cond" "set_zn")])
+  "if (prepare_extend_operands (operands, ZERO_EXTEND, SImode)) DONE;"
+)
+;   (set_attr "length" "2,4,8")
+
+;; (define_insn "zero_extendqisi2"
+;;   [(set (match_operand:SI 0 "register_operand" "=r,r")
+;; 	(zero_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+;;   ""
+;;   "@
+;;    extb %0,%1
+;;    ldb%U1 %0,%1"
+;;   [(set_attr "type" "unary,load")
+;;    (set_attr "cond" "nocond,nocond")])
+
+(define_insn "*zero_extendhisi2_a4"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r")
+	(zero_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_A4"
+  "extw%? %0,%1"
+  [(set_attr "type" "unary")])
 
-(define_insn "zero_extendhisi2"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "r,m")))]
+(define_insn "*zero_extendhisi2_i"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq,q,Rcw,w,!x,Rcqq,r")
+	(zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "0,q,0,c,Usd,Usd,m")))]
   ""
-  "@
-   extw%? %0,%1
-   ldw%U1 %0,%1"
-  [(set_attr "type" "unary,load")])
+  "*
+  switch (which_alternative)
+    {
+    case 0: case 1:
+      return \"extw%? %0,%1%&\";
+    case 2:
+      return \"bmsk%? %0,%1,15\";
+    case 3:
+      return \"extw %0,%1\";
+    case 4:
+      return \"ldw%? %0,%1%&\";
+    case 5:
+      return \"ldw%U1 %0,%1\";
+    case 6:
+      return \"ldw%U1%V1 %0,%1\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "type" "unary,unary,unary,unary,load,load,load")
+   (set_attr "iscompact" "maybe,true,false,false,true,false,false")
+   (set_attr "cond" "canuse,nocond,canuse,nocond,nocond,nocond,nocond")])
+
 
-(define_insn "*zero_extendhisi2_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (zero_extend:SI (match_operand:HI 1 "register_operand" "r"))
-		       (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(zero_extend:SI (match_dup 1)))]
+(define_expand "zero_extendhisi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "")))]
   ""
-  "extw%?.f %0,%1"
-  [(set_attr "type" "unary")
-   (set_attr "cond" "set_zn")])
-
+  "if (prepare_extend_operands (operands, ZERO_EXTEND, SImode)) DONE;"
+)
+
+;; (define_insn "zero_extendhisi2"
+;;   [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
+;; 	(zero_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "r,m")))]
+;;   ""
+;;   "@
+;;    extw %0,%1
+;;    ldw%U1 %0,%1"
+;;   [(set_attr "type" "unary,load")
+;;    (set_attr "cond" "nocond,nocond")])
+
 ;; Sign extension instructions.
 
-(define_insn "extendqihi2"
-  [(set (match_operand:HI 0 "register_operand" "=r,r")
-	(sign_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+(define_insn "*extendqihi2_a4"
+  [(set (match_operand:HI 0 "dest_reg_operand" "=r")
+	(sign_extend:HI (match_operand:QI 1 "register_operand" "r")))]
+  "TARGET_A4"
+  "sexb%? %0,%1"
+  [(set_attr "type" "unary")])
+
+(define_insn "*extendqihi2_i"
+  [(set (match_operand:HI 0 "dest_reg_operand" "=Rcqq,r,r")
+	(sign_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "Rcqq,r,m")))]
   ""
   "@
-   sexb%? %0,%1
+   sexb%? %0,%1%&
+   sexb %0,%1
    ldb.x%U1 %0,%1"
-  [(set_attr "type" "unary,load")])
+  [(set_attr "type" "unary,unary,load")
+   (set_attr "iscompact" "true,false,false")
+   (set_attr "cond" "nocond,nocond,nocond")])
 
-(define_insn "*extendqihi2_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (sign_extend:SI (match_operand:QI 1 "register_operand" "r"))
-		       (const_int 0)))
-   (set (match_operand:HI 0 "register_operand" "=r")
-	(sign_extend:HI (match_dup 1)))]
-  ""
-  "sexb%?.f %0,%1"
-  [(set_attr "type" "unary")
-   (set_attr "cond" "set_zn")])
 
-(define_insn "extendqisi2"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(sign_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+(define_expand "extendqihi2"
+  [(set (match_operand:HI 0 "dest_reg_operand" "")
+	(sign_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "")))]
   ""
+  "if (prepare_extend_operands (operands, SIGN_EXTEND, HImode)) DONE;"
+)
+
+;; (define_insn "extendqihi2"
+;;   [(set (match_operand:HI 0 "register_operand" "=r,r")
+;; 	(sign_extend:HI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+;;   ""
+;;   "@
+;;    sexb %0,%1
+;;    ldb.x%U1 %0,%1"
+;;   [(set_attr "type" "unary,load")
+;;    (set_attr "cond" "nocond,nocond")])
+
+(define_insn "*extendqisi2_a4"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
+;	(sign_extend:SI (match_operand:QI 1 "register_operand" "r")))]
+	(sign_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "r,m")))]
+  "TARGET_A4"
   "@
-   sexb%? %0,%1
-   ldb.x%U1 %0,%1"
+    sexb%? %0,%1
+    ldb.x%U1 %0,%1"
   [(set_attr "type" "unary,load")])
 
-(define_insn "*extendqisi2_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (sign_extend:SI (match_operand:QI 1 "register_operand" "r"))
-		       (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(sign_extend:SI (match_dup 1)))]
+;; (define_insn "*extendqisi2_mixed"
+;;   [(set (match_operand:SI 0 "compact_register_operand" "=q")
+;; 	(sign_extend:SI (match_operand:QI 1 "compact_register_operand" "q")))]
+;;   "TARGET_MIXED_CODE"
+;;   "sexb_s %0,%1"
+;;   [(set_attr "type" "unary")
+;;    (set_attr "iscompact" "true")])
+
+(define_insn "*extendqisi2_ac"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w,r")
+	(sign_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "Rcqq,c,m")))]
+  "TARGET_ARCOMPACT"
+  "@
+   sexb%? %0,%1%&
+   sexb %0,%1
+   ldb.x%U1 %0,%1"
+  [(set_attr "type" "unary,unary,load")
+   (set_attr "iscompact" "true,false,false")
+   (set_attr "cond" "nocond,nocond,nocond")])
+
+(define_expand "extendqisi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(sign_extend:SI (match_operand:QI 1 "nonvol_nonimm_operand" "")))]
   ""
-  "sexb%?.f %0,%1"
-  [(set_attr "type" "unary")
-   (set_attr "cond" "set_zn")])
+  "if (prepare_extend_operands (operands, SIGN_EXTEND, SImode)) DONE;"
+)
+
+(define_insn "*extendhisi2_a4"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_A4"
+  "sexw%? %0,%1"
+  [(set_attr "type" "unary")])
+
+;; (define_insn "*extendhisi2_mixed"
+;;   [(set (match_operand:SI 0 "compact_register_operand" "=q")
+;; 	(sign_extend:SI (match_operand:HI 1 "compact_register_operand" "q")))]
+;;   "TARGET_MIXED_CODE"
+;;   "sexw_s %0,%1"
+;;   [(set_attr "type" "unary")
+;;    (set_attr "iscompact" "true")])
+
 
-(define_insn "extendhisi2"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(sign_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "r,m")))]
+(define_insn "*extendhisi2_i"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w,r")
+	(sign_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "Rcqq,c,m")))]
   ""
   "@
-   sexw%? %0,%1
-   ldw.x%U1 %0,%1"
-  [(set_attr "type" "unary,load")])
-
-(define_insn "*extendhisi2_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (sign_extend:SI (match_operand:HI 1 "register_operand" "r"))
-		       (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(sign_extend:SI (match_dup 1)))]
+   sexw%? %0,%1%&
+   sexw %0,%1
+   ldw.x%U1%V1 %0,%1"
+  [(set_attr "type" "unary,unary,load")
+   (set_attr "iscompact" "true,false,false")
+   (set_attr "cond" "nocond,nocond,nocond")])
+
+(define_expand "extendhisi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(sign_extend:SI (match_operand:HI 1 "nonvol_nonimm_operand" "")))]
   ""
-  "sexw%?.f %0,%1"
-  [(set_attr "type" "unary")
-   (set_attr "cond" "set_zn")])
-
+  "if (prepare_extend_operands (operands, SIGN_EXTEND, SImode)) DONE;"
+)
+
+;; Unary arithmetic insns
+
+;; We allow constant operands to enable late constant propagation, but it is
+;; not worth while to have more than one dedicated alternative to output them -
+;; if we are really worried about getting these the maximum benefit of all
+;; the available alternatives, we should add an extra pass to fold such
+;; operations to movsi.
+
+;; Absolute instructions
+
+(define_insn "*abssi2_mixed"
+  [(set (match_operand:SI 0 "compact_register_operand" "=q")
+        (abs:SI (match_operand:SI 1 "compact_register_operand" "q")))]
+  "TARGET_MIXED_CODE"
+  "abs%? %0,%1%&"
+  [(set_attr "type" "two_cycle_core")
+   (set_attr "iscompact" "true")])
+
+(define_insn "abssi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,w,w")
+        (abs:SI (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,Cal")))]
+  "TARGET_ARCOMPACT"
+  "abs%? %0,%1%&"
+  [(set_attr "type" "two_cycle_core")
+   (set_attr "length" "*,4,8")
+   (set_attr "iscompact" "true,false,false")
+   (set_attr "cond" "nocond,nocond,nocond")])
+
+;; Maximum and minimum insns
+
+(define_insn "smaxsi3"
+   [(set (match_operand:SI 0 "dest_reg_operand"         "=Rcw, w,  w")
+         (smax:SI (match_operand:SI 1 "register_operand"  "%0, c,  c")
+                  (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))]
+  "TARGET_MINMAX"
+  "max%? %0,%1,%2"
+  [(set_attr "type" "two_cycle_core")
+   (set_attr "length" "4,4,8")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+(define_insn "sminsi3"
+   [(set (match_operand:SI 0 "dest_reg_operand"         "=Rcw, w,  w")
+         (smin:SI (match_operand:SI 1 "register_operand"  "%0, c,  c")
+                  (match_operand:SI 2 "nonmemory_operand" "cL,cL,Cal")))]
+  "TARGET_MINMAX"
+  "min%? %0,%1,%2"
+  [(set_attr "type" "two_cycle_core")
+   (set_attr "length" "4,4,8")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
 ;; Arithmetic instructions.
+;; (define_insn "*addsi3_mixed"
+;;   [(set (match_operand:SI 0 "compact_register_operand" "=q,q,q,r")
+;; 	(plus:SI (match_operand:SI 1 "compact_register_operand" "%q,0,0,r")
+;; 		 (match_operand:SI 2 "nonmemory_operand" "qK,rO,Ji,rJi")))]
+;;   "TARGET_MIXED_CODE"
+;;   "*
+;;    {
+;;      switch (which_alternative)
+;;      {
+;;        case 0:
+;;          return \"add_s %0,%1,%2\";
+;;        case 1:
+;;          return \"add_s %0,%1,%2\";
+;;        case 12:
+;;          if (INTVAL (operands[2]) < 0)
+;;             return \"sub%? %0,%1,%n2\"; 
+;;          else
+;;            return \"add%? %0,%1,%2\";
+;;        case 2:
+;;          return \"add_s %0,%1,%S2\";
+;;        case 3:
+;;          return \"add%? %0,%1,%S2\";
+;;        default:
+;;          abort ();
+;;      }
+;;    }"
+;;   [(set_attr "iscompact" "true,true,true,false")
+;;    (set_attr "length" "*,*,6,8")
+;;    (set_attr "cond" "nocond,nocond,nocond,canuse")])
+
+; We say an insn can be conditionalized if this doesn't introduce a long
+; immediate.  We set the type such that we still have good scheduling if the
+; insn is conditionalized.
+; ??? It would make sense to allow introduction of long immediates, but
+;     we'd need to communicate to the ccfsm machinery the extra cost.
+; The alternatives in the constraints still serve three purposes:
+; - estimate insn size assuming conditional execution
+; - guide reload to re-order the second and third operand to get a better fit.
+; - give tentative insn type to guide scheduling
+;   N.B. "%" for commutativity doesn't help when there is another matching
+;   (but longer) alternative.
+(define_insn_and_split "*addsi3_mixed"
+  ;;                                                      0       1   2   3   4   5   6    7     8    9     a   b c   d    e   f  10
+  [(set (match_operand:SI 0 "dest_reg_operand"          "=Rcq#q,Rcq,Rcw,Rcw,Rcq,Rcb,Rcq, Rcw, Rcqq,Rcqq,    w,  w,w,  w,Rcqq,Rcw,  w")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,      c,  0,  c,  0,  0,Rcb,   0, Rcqq,   0,    c,  c,0,  0,   0,  0,  c")
+		 (match_operand:SI 2 "nonmemory_operand" "cL,     0, cL,  0,CL2,Csp,CM4,cCca,RcqqK,  cO,cLCmL,Cca,I,C2a, Cal,Cal,Cal")))]
+  "TARGET_ARCOMPACT"
+  "*if (which_alternative == 6)
+      return arc_short_long (insn, \"add%? %0,%1,%2\", \"add1 %0,%1,%2/2\");
+    return arc_output_addsi (operands,
+			     arc_ccfsm_cond_exec_p () ? \"%?\" : \"\");"
+  "&& reload_completed && get_attr_length (insn) == 8
+   && satisfies_constraint_I (operands[2])"
+  [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) (match_dup 4))]
+  "split_addsi (operands);"
+  [(set_attr "type" "*,*,*,*,two_cycle_core,two_cycle_core,*,two_cycle_core,*,*,*,two_cycle_core,*,two_cycle_core,*,*,*")
+   (set (attr "iscompact")
+	(cond [(eq (symbol_ref "*arc_output_addsi (operands, 0)") (const_int 0))
+	       (const_string "false")
+	       (match_operand 2 "long_immediate_operand" "")
+	       (const_string "maybe_limm")]
+	      (const_string "maybe")))
+   (set_attr "length" "*,*,4,4,*,*,*,4,*,*,4,4,4,4,*,8,8")
+   (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,nocond,canuse,nocond,nocond,nocond,nocond,canuse_limm,canuse_limm,canuse,canuse,nocond")])
+
+;; (define_insn "*addsi3_mixed"
+;;   [(set (match_operand:SI 0 "dest_reg_operand" "=q,q,q,q,r,r,r,r,r,r,r,r,r")
+;; 	(plus:SI (match_operand:SI 1 "register_operand" "%q,0,0,0,0,r,0,r,0,0,r,0,r")
+;; 		 (match_operand:SI 2 "nonmemory_operand" "qK,rO,J,i,r,r,L,L,I,J,J,i,i")))]
+;;   ""
+;;   "*
+;;    {
+;;      switch (which_alternative)
+;;      {
+;;        case 0:
+;;          return \"add_s %0,%1,%2\";
+;;        case 1:
+;;          return \"add_s %0,%1,%2\";
+;;        case 4:
+;;          return \"add%? %0,%1,%2\";
+;;        case 5:
+;;          return \"add %0,%1,%2\";
+;;        case 6:
+;;          return \"add%? %0,%1,%2\";
+;;        case 7:
+;;          return \"add %0,%1,%2\";
+;;        case 8:
+;;          return \"add %0,%1,%2\";
+;;        case 2:
+;;          {
+;;            int intval = INTVAL (operands[2]);
+;;            if (intval < 0)
+;;             {
+;;               if (-intval< 0x20)
+;;                 return \"sub_s %0,%1,%n2\"; 
+;;               else
+;;                 return \"sub %0,%1,%n2\"; 
+;;             }
+;;          else
+;;            return \"add_s %0,%1,%2\";
+;;          }
+;;        case 3:
+;;          return \"add_s %0,%1,%S2\";
+;;        case 9:
+;;          if (INTVAL (operands[2]) < 0)
+;;             return \"sub%? %0,%1,%n2\"; 
+;;          else
+;;            return \"add%? %0,%1,%2\";
+;;        case 10:
+;;          if (INTVAL (operands[2]) < 0)
+;;             return \"sub %0,%1,%n2\"; 
+;;          else
+;;            return \"add %0,%1,%2\";
+;;        case 11:
+;;          return \"add%? %0,%1,%S2\";
+;;        case 12:
+;;          return \"add %0,%1,%S2\";
+;;        default:
+;;          abort ();
+;;      }
+;;    }"
+;;   [(set_attr "iscompact" "true,true,false,true,false,false,false,false,false,false,false,false,false")
+;;    (set_attr "length" "2,2,8,6,4,4,4,4,4,8,8,8,8")
+;;    (set_attr "cond" "nocond,nocond,nocond,nocond,canuse,nocond,canuse,nocond,nocond,canuse,nocond,canuse,nocond")])
+
+;; ARC700/ARC600 multiply
+;; SI <- SI * SI
+
+(define_expand "mulsi3"
+ [(set (match_operand:SI 0 "nonimmediate_operand"            "")
+        (mult:SI (match_operand:SI 1 "register_operand"  "")
+                 (match_operand:SI 2 "nonmemory_operand" "")))]
+  "(TARGET_ARC700 && !TARGET_NOMPY_SET)
+    || TARGET_MUL64_SET || TARGET_MULMAC_32BY16_SET"
+  "
+{
+  if ((TARGET_ARC700 && !TARGET_NOMPY_SET) &&
+      !register_operand (operands[0], SImode))
+    {
+      rtx result = gen_reg_rtx (SImode);
 
-(define_insn "addsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(plus:SI (match_operand:SI 1 "register_operand" "%r")
-		 (match_operand:SI 2 "nonmemory_operand" "rIJ")))]
-  ""
-  "add%? %0,%1,%2")
-
-(define_insn "*addsi3_set_cc_insn"
-  [(set (reg:CC 61) (compare:CC
-		     (plus:SI (match_operand:SI 1 "register_operand" "%r")
-			      (match_operand:SI 2 "nonmemory_operand" "rIJ"))
-		     (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(plus:SI (match_dup 1)
-		 (match_dup 2)))]
-  ""
-  "add%?.f %0,%1,%2"
-  [(set_attr "cond" "set")])
-
-(define_insn "adddi3"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(plus:DI (match_operand:DI 1 "nonmemory_operand" "%r")
-		 (match_operand:DI 2 "nonmemory_operand" "ri")))
-   (clobber (reg:CC 61))]
-  ""
-  "*
+      emit_insn (gen_mulsi3 (result, operands[1], operands[2]));
+      emit_move_insn (operands[0], result);
+      DONE;
+    }
+  else if (TARGET_MUL64_SET)
+    {
+      emit_insn (gen_mulsi_600 (operands[1], operands[2],
+				gen_mlo (), gen_mhi ()));
+      emit_move_insn (operands[0], gen_mlo ());
+      DONE;
+    }
+  else if (TARGET_MULMAC_32BY16_SET)
+    {
+      if (immediate_operand (operands[2], SImode)
+	  && INTVAL (operands[2]) >= 0
+	  && INTVAL (operands[2]) <= 65535)
+        {
+	  emit_insn (gen_umul_600 (operands[1], operands[2],
+				     gen_acc2 (), gen_acc1 ()));
+	  emit_move_insn (operands[0], gen_acc2 ());
+	  DONE;
+	}
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_umul_600 (operands[1], operands[2],
+			       gen_acc2 (), gen_acc1 ()));
+      emit_insn (gen_mac_600 (operands[1], operands[2],
+			       gen_acc2 (), gen_acc1 ()));
+      emit_move_insn (operands[0], gen_acc2 ());
+      DONE;
+    }
+}")
+
+; mululw conditional execution without a LIMM clobbers an input register;
+; we'd need a different pattern to describe this.
+(define_insn "umul_600"
+  [(set (match_operand:SI 2 "acc2_operand" "")
+        (mult:SI (match_operand:SI 0 "register_operand"  "c,c,c")
+                 (zero_extract:SI (match_operand:SI 1 "nonmemory_operand"  "c,L,J")
+                                  (const_int 16)
+                                  (const_int 0))))
+   (clobber (match_operand:SI 3 "acc1_operand" ""))]
+  "TARGET_MULMAC_32BY16_SET"
+  "mululw%? 0, %0, %1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600, mulmac_600, mulmac_600")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+(define_insn "mac_600"
+  [(set (match_operand:SI 2 "acc2_operand" "")
+	(plus:SI
+	  (mult:SI (match_operand:SI 0 "register_operand" "c,c,c")
+		   (ashift:SI
+		     (zero_extract:SI (match_operand:SI 1 "nonmemory_operand" "c,L,J")
+				      (const_int 16)
+				      (const_int 16))
+		     (const_int 16)))
+	  (match_dup 2)))
+   (clobber (match_operand:SI 3 "acc1_operand" ""))]
+  "TARGET_MULMAC_32BY16_SET"
+  "machlw%? 0, %0, %1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600, mulmac_600, mulmac_600")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+(define_insn "mulsi_600"
+  [(set (match_operand:SI 2 "mlo_operand" "")
+	(mult:SI (match_operand:SI 0 "register_operand"  "Rcq#q,c,c,%c")
+		 (match_operand:SI 1 "nonmemory_operand" "Rcq#q,cL,I,J")))
+   (clobber (match_operand:SI 3 "mhi_operand" ""))]
+  "TARGET_MUL64_SET"
+  "mul64%? \t0, %0, %1%&"
+  [(set_attr "length" "*,4,4,8")
+  (set_attr "iscompact" "maybe,false,false,false")
+  (set_attr "type" "multi,multi,multi,multi")
+   (set_attr "cond" "canuse,canuse,canuse_limm,canuse")])
+
+(define_insn "mulsidi_600"
+  [(set (reg:DI 58)
+	(mult:DI (sign_extend:DI
+		   (match_operand:SI 0 "register_operand"  "Rcq#q,c,c,%c"))
+		 (sign_extend:DI
+		   (match_operand:SI 1 "register_operand" "Rcq#q,cL,I,J"))))]
+  "TARGET_MUL64_SET"
+  "mul64%? \t0, %0, %1%&"
+  [(set_attr "length" "*,4,4,8")
+  (set_attr "iscompact" "maybe,false,false,false")
+  (set_attr "type" "multi,multi,multi,multi")
+   (set_attr "cond" "canuse,canuse,canuse_limm,canuse")])
+
+(define_insn "umulsidi_600"
+  [(set (reg:DI 58)
+	(mult:DI (zero_extend:DI
+		   (match_operand:SI 0 "register_operand"  "Rcq#q,c,c,%c"))
+		 (sign_extend:DI
+		   (match_operand:SI 1 "register_operand" "Rcq#q,cL,I,J"))))]
+  "TARGET_MUL64_SET"
+  "mulu64%? \t0, %0, %1%&"
+  [(set_attr "length" "*,4,4,8")
+  (set_attr "iscompact" "maybe,false,false,false")
+  (set_attr "type" "umulti")
+   (set_attr "cond" "canuse,canuse,canuse_limm,canuse")])
+
+; ARC700 mpy* instructions: This is a multi-cycle extension, and thus 'w'
+; may not be used as destination constraint.
+
+; The result of mpy and mpyu is the same except for flag setting (if enabled),
+; but mpyu is faster for the standard multiplier.
+(define_insn "mulsi3_700"
+ [(set (match_operand:SI 0 "dest_reg_operand"          "=Rcr, r,r,Rcr,  r")
+        (mult:SI (match_operand:SI 1 "register_operand"  " 0, c,0,  0,  c")
+                 (match_operand:SI 2 "nonmemory_operand" "cL,cL,I,Cal,Cal")))]
+"TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "mpyu%? %0,%1,%2"
+  [(set_attr "length" "4,4,4,8,8")
+  (set_attr "type" "umulti")
+  (set_attr "cond" "canuse,nocond,canuse_limm,canuse,nocond")])
+
+
+(define_expand "mulsidi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (mult:DI (sign_extend:DI(match_operand:SI 1 "register_operand" ""))
+                 (sign_extend:DI(match_operand:SI 2 "nonmemory_operand" ""))))]
+  "(TARGET_ARC700 && !TARGET_NOMPY_SET)
+   || TARGET_MULMAC_32BY16_SET"
+"
 {
-  rtx op2 = operands[2];
+  if ((TARGET_ARC700 && !TARGET_NOMPY_SET))
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+      if (!register_operand (operands[0], DImode))
+	{
+	  rtx result = gen_reg_rtx (DImode);
+
+	  operands[2] = force_reg (SImode, operands[2]);
+	  emit_insn (gen_mulsidi3 (result, operands[1], operands[2]));
+	  emit_move_insn (operands[0], result);
+	  DONE;
+	}
+    }
+  if (TARGET_MUL64_SET)
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_mulsidi_600 (operands[1], operands[2]));
+      emit_move_insn (operands[0], gen_rtx_REG (DImode, 58));
+    }
+  else if (TARGET_MULMAC_32BY16_SET)
+    {
+      rtx result_hi = gen_highpart(SImode, operands[0]);
+      rtx result_low = gen_lowpart(SImode, operands[0]);
 
-  if (GET_CODE (op2) == CONST_INT)
+      emit_insn (gen_mul64_600 (operands[1], operands[2]));
+      emit_insn (gen_mac64_600 (result_hi, operands[1], operands[2]));
+      emit_move_insn (result_low, gen_acc2 ());
+      DONE;
+    }
+}")
+
+(define_insn "mul64_600"
+  [(set (reg:DI 56)
+        (mult:DI (sign_extend:DI (match_operand:SI 0 "register_operand"  "c,c,c"))
+                 (zero_extract:DI (match_operand:SI 1 "nonmemory_operand"  "c,L,J")
+                                  (const_int 16)
+                                  (const_int 0))))
+  ]
+  "TARGET_MULMAC_32BY16_SET"
+  "mullw%? 0, %0, %1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+
+;; ??? check if this is canonical rtl
+(define_insn "mac64_600"
+  [(set (reg:DI 56)
+        (plus:DI
+	  (mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "c,c,c"))
+		   (ashift:DI
+		     (sign_extract:DI (match_operand:SI 2 "nonmemory_operand" "c,L,J")
+				      (const_int 16) (const_int 16))
+		     (const_int 16)))
+	  (reg:DI 56)))
+   (set (match_operand:SI 0 "register_operand" "=w,w,w")
+	(zero_extract:SI
+	  (plus:DI
+	    (mult:DI (sign_extend:DI (match_dup 1))
+		     (ashift:DI
+		       (sign_extract:DI (match_dup 2)
+					(const_int 16) (const_int 16))
+			  (const_int 16)))
+	    (reg:DI 56))
+	  (const_int 32) (const_int 32)))]
+  "TARGET_MULMAC_32BY16_SET"
+  "machlw%? %0, %1, %2"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+
+;; DI <- DI(signed SI) * DI(signed SI)
+(define_insn_and_split "mulsidi3_700"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "%c"))
+		 (sign_extend:DI (match_operand:SI 2 "register_operand" "cL"))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int hi = !TARGET_BIG_ENDIAN;
+  int lo = !hi;
+  rtx l0 = operand_subword (operands[0], lo, 0, DImode);
+  rtx h0 = operand_subword (operands[0], hi, 0, DImode);
+  emit_insn (gen_mulsi3_highpart (h0, operands[1], operands[2]));
+  emit_insn (gen_mulsi3_700 (l0, operands[1], operands[2]));
+  DONE;
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "mulsi3_highpart"
+  [(set (match_operand:SI 0 "register_operand"                  "=Rcr,r,Rcr,r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (sign_extend:DI (match_operand:SI 1 "register_operand" "%0,c,  0,c"))
+	   (sign_extend:DI (match_operand:SI 2 "extend_operand"    "c,c,  s,s")))
+	  (const_int 32))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "mpyh%? %0,%1,%2"
+  [(set_attr "length" "4,4,8,8")
+   (set_attr "type" "multi")
+   (set_attr "cond" "canuse,nocond,canuse,nocond")])
+
+; Note that mpyhu has the same latency as mpy / mpyh,
+; thus we use the type multi.
+(define_insn "*umulsi3_highpart_i"
+  [(set (match_operand:SI 0 "register_operand"                  "=Rcr,r,Rcr,r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "register_operand" "%0,c,  0,c"))
+	   (zero_extend:DI (match_operand:SI 2 "extend_operand"    "c,c,  s,s")))
+	  (const_int 32))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "mpyhu%? %0,%1,%2"
+  [(set_attr "length" "4,4,8,8")
+   (set_attr "type" "multi")
+   (set_attr "cond" "canuse,nocond,canuse,nocond")])
+
+;; (zero_extend:DI (const_int)) leads to internal errors in combine, so we
+;; need a separate pattern for immediates
+;; ??? This is fine for combine, but not for reload.
+(define_insn "umulsi3_highpart_int"
+  [(set (match_operand:SI 0 "register_operand"            "=Rcr, r, r,Rcr,  r")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "register_operand"  " 0, c, 0,  0,  c"))
+	   (match_operand:DI 2 "immediate_usidi_operand" "L, L, I, Cal, Cal"))
+	  (const_int 32))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "mpyhu%? %0,%1,%2"
+  [(set_attr "length" "4,4,4,8,8")
+   (set_attr "type" "multi")
+   (set_attr "cond" "canuse,nocond,canuse_limm,canuse,nocond")])
+
+(define_expand "umulsi3_highpart"
+  [(set (match_operand:SI 0 "general_operand"  "")
+	(truncate:SI
+	 (lshiftrt:DI
+	  (mult:DI
+	   (zero_extend:DI (match_operand:SI 1 "register_operand" ""))
+	   (zero_extend:DI (match_operand:SI 2 "nonmemory_operand" "")))
+	  (const_int 32))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "
+{
+  rtx target = operands[0];
+
+  if (!register_operand (target, SImode))
+    target = gen_reg_rtx (SImode);
+
+  if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0)
+    operands[2] = simplify_const_unary_operation (ZERO_EXTEND, DImode,
+						  operands[2], SImode);
+  if (!immediate_operand (operands[2], SImode))
+    operands[2] = gen_rtx_ZERO_EXTEND (DImode, operands[2]);
+  emit_insn (gen_umulsi3_highpart_int (target, operands[1], operands[2]));
+  if (target != operands[0])
+    emit_move_insn (operands[0], target);
+  DONE;
+}")
+
+(define_expand "umulsidi3"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "")
+        (mult:DI (zero_extend:DI(match_operand:SI 1 "register_operand" ""))
+                 (zero_extend:DI(match_operand:SI 2 "nonmemory_operand" ""))))]
+  "(TARGET_ARC700 && !TARGET_NOMPY_SET)
+   || TARGET_MULMAC_32BY16_SET"
+"
+{
+  if ((TARGET_ARC700 && !TARGET_NOMPY_SET))
     {
-      int sign = INTVAL (op2);
-      if (sign < 0)
-	return \"add.f %L0,%L1,%2\;adc %H0,%H1,-1\";
-      else
-	return \"add.f %L0,%L1,%2\;adc %H0,%H1,0\";
+      operands[2] = force_reg (SImode, operands[2]);
+      if (!register_operand (operands[0], DImode))
+	{
+	  rtx result = gen_reg_rtx (DImode);
+
+	  emit_insn (gen_umulsidi3 (result, operands[1], operands[2]));
+	  emit_move_insn (operands[0], result);
+	  DONE;
+	}
     }
-  else
-    return \"add.f %L0,%L1,%L2\;adc %H0,%H1,%H2\";
-}"
-  [(set_attr "length" "2")])
-
-(define_insn "subsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(minus:SI (match_operand:SI 1 "register_operand" "r")
-		  (match_operand:SI 2 "nonmemory_operand" "rIJ")))]
-  ""
-  "sub%? %0,%1,%2")
-
-(define_insn "*subsi3_set_cc_insn"
-  [(set (reg:CC 61) (compare:CC
-		     (minus:SI (match_operand:SI 1 "register_operand" "%r")
-			       (match_operand:SI 2 "nonmemory_operand" "rIJ"))
-		     (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(minus:SI (match_dup 1)
-		  (match_dup 2)))]
-  ""
-  "sub%?.f %0,%1,%2"
-  [(set_attr "cond" "set")])
-
-(define_insn "subdi3"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(minus:DI (match_operand:DI 1 "nonmemory_operand" "r")
-		  (match_operand:DI 2 "nonmemory_operand" "ri")))
-   (clobber (reg:CC 61))]
+  else if (TARGET_MUL64_SET)
+    {
+      operands[2] = force_reg (SImode, operands[2]);
+      emit_insn (gen_mulsidi_600 (operands[1], operands[2]));
+      emit_move_insn (operands[0], gen_rtx_REG (DImode, 58));
+    }
+  else if (TARGET_MULMAC_32BY16_SET)
+    {
+      rtx result_hi = gen_reg_rtx (SImode);
+      rtx result_low = gen_reg_rtx (SImode);
+
+      result_hi = gen_highpart(SImode , operands[0]);
+      result_low = gen_lowpart(SImode , operands[0]);
+
+      emit_insn (gen_umul64_600 (operands[1], operands[2]));
+      emit_insn (gen_umac64_600 (result_hi, operands[1], operands[2]));
+      emit_move_insn (result_low, gen_acc2 ());
+      DONE;
+    }
+}")
+
+(define_insn "umul64_600"
+  [(set (reg:DI 56)
+        (mult:DI (zero_extend:DI (match_operand:SI 0 "register_operand"  "c,c,c"))
+                 (zero_extract:DI (match_operand:SI 1 "nonmemory_operand"  "c,L,Cal")
+                                  (const_int 16)
+                                  (const_int 0))))
+  ]
+  "TARGET_MULMAC_32BY16_SET"
+  "mululw%? 0, %0, %1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "mulmac_600")
+   (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+
+(define_insn "umac64_600"
+  [(set (reg:DI 56)
+        (plus:DI
+	  (mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "c,c,c"))
+		   (ashift:DI
+		     (zero_extract:DI (match_operand:SI 2 "nonmemory_operand" "c,L,Cal")
+				      (const_int 16) (const_int 16))
+		     (const_int 16)))
+	  (reg:DI 56)))
+   (set (match_operand:SI 0 "register_operand" "=w,w,w")
+	(zero_extract:SI
+	  (plus:DI
+	    (mult:DI (zero_extend:DI (match_dup 1))
+		     (ashift:DI
+		       (zero_extract:DI (match_dup 2)
+					(const_int 16) (const_int 16))
+			  (const_int 16)))
+	    (reg:DI 56))
+	  (const_int 32) (const_int 32)))]
+  "TARGET_MULMAC_32BY16_SET"
+  "machulw%? %0, %1, %2"
+ [(set_attr "length" "4,4,8")
+  (set_attr "type" "mulmac_600")
+  (set_attr "cond" "nocond, canuse_limm, canuse")])
+
+
+
+;; DI <- DI(unsigned SI) * DI(unsigned SI)
+(define_insn_and_split "umulsidi3_700"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&r")
+	(mult:DI (zero_extend:DI (match_operand:SI 1 "register_operand" "%c"))
+		 (zero_extend:DI (match_operand:SI 2 "register_operand" "c"))))]
+;;		 (zero_extend:DI (match_operand:SI 2 "register_operand" "rL"))))]
+  "TARGET_ARC700 && !TARGET_NOMPY_SET"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int hi = !TARGET_BIG_ENDIAN;
+  int lo = !hi;
+  rtx l0 = operand_subword (operands[0], lo, 0, DImode);
+  rtx h0 = operand_subword (operands[0], hi, 0, DImode);
+  emit_insn (gen_umulsi3_highpart (h0, operands[1], operands[2]));
+  emit_insn (gen_mulsi3_700 (l0, operands[1], operands[2]));
+  DONE;
+}
+  [(set_attr "type" "umulti")
+  (set_attr "length" "8")])
+
+(define_expand "addsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))]
   ""
+  "if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[2]) ) 
+     {
+       operands[2]=force_reg(SImode, operands[2]);
+     }
+  else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[2], Pmode))
+   {
+      operands[2] = force_reg (SImode, arc_rewrite_small_data (operands[2]));
+   }
+
+  ")
+
+(define_insn "*addsi3_insn_a4"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r,r,r,r,r,r,r,r,r")
+	(plus:SI (match_operand:SI 1 "register_operand" "%0,r,0,r,0,0,r,0,r")
+		 (match_operand:SI 2 "nonmemory_operand" "r,r,L,L,I,J,J,i,i")))]
+  "TARGET_A4"
   "*
+   {
+     switch (which_alternative)
+     {
+       case 1:
+         return \"add %0,%1,%2\";
+       case 0:
+         return \"add%? %0,%1,%2\";
+       case 3:
+         return \"add %0,%1,%2\";
+       case 2:
+         return \"add%? %0,%1,%2\";
+       case 4:
+         return \"add %0,%1,%2\";
+       case 6:
+         if (INTVAL (operands[2]) < 0)
+            return \"sub %0,%1,%n2\"; 
+         else
+           return \"add %0,%1,%2\";
+       case 5:
+         if (INTVAL (operands[2]) < 0)
+            return \"sub%? %0,%1,%n2\"; 
+         else
+           return \"add%? %0,%1,%2\";
+       case 8:
+         return \"add %0,%1,%S2\";
+       case 7:
+         return \"add%? %0,%1,%S2\";
+       default:
+         gcc_unreachable ();
+     }
+   }"
+  [(set_attr "length" "4,4,4,4,4,8,8,8,8")
+  (set_attr "cond" "canuse,nocond,canuse,nocond,nocond,canuse,nocond,canuse,nocond")]
+)
+
+(define_expand "adddi3"
+  [(parallel [(set (match_operand:DI 0 "dest_reg_operand" "")
+		   (plus:DI (match_operand:DI 1 "register_operand" "")
+			    (match_operand:DI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC CC_REG))])]
+  ""
 {
-  rtx op2 = operands[2];
+  if (TARGET_EXPAND_ADDDI)
+    {
+      rtx l0 = gen_lowpart (SImode, operands[0]);
+      rtx h0 = disi_highpart (operands[0]);
+      rtx l1 = gen_lowpart (SImode, operands[1]);
+      rtx h1 = disi_highpart (operands[1]);
+      rtx l2 = gen_lowpart (SImode, operands[2]);
+      rtx h2 = disi_highpart (operands[2]);
+      rtx cc_c = gen_rtx_REG (CC_Cmode, 61);
+
+      if (CONST_INT_P (h2) && INTVAL (h2) < 0 && SIGNED_INT12 (INTVAL (h2)))
+	{
+	  emit_insn (gen_sub_f (l0, l1, gen_int_mode (-INTVAL (l2), SImode)));
+	  emit_insn (gen_sbc (h0, h1,
+			      gen_int_mode (-INTVAL (h2) - (l1 != 0), SImode),
+			      cc_c));
+	  DONE;
+	}
+      emit_insn (gen_add_f (l0, l1, l2));
+      emit_insn (gen_adc (h0, h1, h2));
+      DONE;
+    }
+})
+
+; This assumes that there can be no strictly partial overlap between
+; operands[1] and operands[2].
+(define_insn_and_split "*adddi3_i"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,w")
+	(plus:DI (match_operand:DI 1 "register_operand" "%c,0,c")
+		 (match_operand:DI 2 "nonmemory_operand" "ci,ci,!i")))
+   (clobber (reg:CC CC_REG))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int hi = !TARGET_BIG_ENDIAN;
+  int lo = !hi;
+  rtx l0 = operand_subword (operands[0], lo, 0, DImode);
+  rtx h0 = operand_subword (operands[0], hi, 0, DImode);
+  rtx l1 = operand_subword (operands[1], lo, 0, DImode);
+  rtx h1 = operand_subword (operands[1], hi, 0, DImode);
+  rtx l2 = operand_subword (operands[2], lo, 0, DImode);
+  rtx h2 = operand_subword (operands[2], hi, 0, DImode);
+
+  
+  if (l2 == const0_rtx)
+    {
+      if (!rtx_equal_p (l0, l1) && !rtx_equal_p (l0, h1))
+	emit_move_insn (l0, l1);
+      emit_insn (gen_addsi3 (h0, h1, h2));
+      if (!rtx_equal_p (l0, l1) && rtx_equal_p (l0, h1))
+	emit_move_insn (l0, l1);
+      DONE;
+    }
+  if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) < 0
+      && INTVAL (operands[2]) >= -0x7fffffff)
+    {
+      emit_insn (gen_subdi3_i (operands[0], operands[1],
+		 GEN_INT (-INTVAL (operands[2]))));
+      DONE;
+    }
+  if (rtx_equal_p (l0, h1))
+    {
+      if (h2 != const0_rtx)
+	emit_insn (gen_addsi3 (h0, h1, h2));
+      else if (!rtx_equal_p (h0, h1))
+	emit_move_insn (h0, h1);
+      emit_insn (gen_add_f (l0, l1, l2));
+      emit_insn
+	(gen_rtx_COND_EXEC
+	  (VOIDmode,
+	   gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)),
+	   gen_rtx_SET (VOIDmode, h0, plus_constant (h0, 1))));
+      DONE;
+    }
+  emit_insn (gen_add_f (l0, l1, l2));
+  emit_insn (gen_adc (h0, h1, h2));
+  DONE;
+}
+  [(set_attr "cond" "clob")
+   (set_attr "type" "binary")
+   (set_attr "length" "16,16,20")])
+
+(define_insn "add_f"
+  [(set (reg:CC_C CC_REG)
+	(compare:CC_C
+	  (plus:SI (match_operand:SI 1 "register_operand" "c,0,c")
+		   (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal"))
+	  (match_dup 1)))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "add.f %0,%1,%2"
+  [(set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "4,4,8")])
+
+(define_insn "*add_f_2"
+  [(set (reg:CC_C CC_REG)
+	(compare:CC_C
+	  (plus:SI (match_operand:SI 1 "register_operand" "c,0,c")
+		   (match_operand:SI 2 "nonmemory_operand" "cL,I,cCal"))
+	  (match_dup 2)))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "add.f %0,%1,%2"
+  [(set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "4,4,8")])
+
+; w/c/c comes first (rather than w/0/C_0) to prevent the middle-end
+; needlessly prioritizing the matching constraint.
+; Rcw/0/C_0 comes before w/c/L so that the lower latency conditional
+; execution ; is used where possible.
+(define_insn_and_split "adc"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w,Rcw,w")
+	(plus:SI (plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+			  (match_operand:SI 1 "nonmemory_operand"
+							 "%c,0,c,0,cCal"))
+		 (match_operand:SI 2 "nonmemory_operand" "c,C_0,L,I,cCal")))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "@
+	adc %0,%1,%2
+	add.cs %0,%1,1
+	adc %0,%1,%2
+	adc %0,%1,%2
+	adc %0,%1,%2"
+  ; if we have a bad schedule after sched2, split.
+  "reload_completed
+   && !optimize_size && TARGET_ARC700
+   && arc_scheduling_not_expected ()
+   && arc_sets_cc_p (prev_nonnote_insn (insn))
+   /* If next comes a return or other insn that needs a delay slot,
+      expect the adc to get into the delay slot.  */
+   && next_nonnote_insn (insn)
+   && !arc_need_delay (next_nonnote_insn (insn))
+   /* Restore operands before emitting.  */
+   && (extract_insn_cached (insn), 1)"
+  [(set (match_dup 0) (match_dup 3))
+   (cond_exec
+     (ltu (reg:CC_C CC_REG) (const_int 0))
+     (set (match_dup 0) (plus:SI (match_dup 0) (const_int 1))))]
+  "operands[3] = simplify_gen_binary (PLUS, SImode, operands[1], operands[2]);"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cc_arith")
+   (set_attr "length" "4,4,4,4,8")])
+
+; combiner-splitter cmp / scc -> cmp / adc
+(define_split
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(gtu:SI (match_operand:SI 1 "register_operand" "")
+		(match_operand:SI 2 "register_operand" "")))
+   (clobber (reg CC_REG))]
+  ""
+  [(set (reg:CC_C CC_REG) (compare:CC_C (match_dup 2) (match_dup 1)))
+   (set (match_dup 0) (ltu:SI (reg:CC_C CC_REG) (const_int 0)))])
+
+; combine won't work when an intermediate result is used later...
+; add %0,%1,%2 ` cmp %0,%[12] -> add.f %0,%1,%2
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (reg:CC_C 61)
+        (compare:CC_C (match_dup 0)
+                      (match_operand:SI 3 "nonmemory_operand" "")))]
+  "rtx_equal_p (operands[1], operands[3])
+   || rtx_equal_p (operands[2], operands[3])"
+  [(parallel
+     [(set (reg:CC_C CC_REG)
+	   (compare:CC_C (plus:SI (match_dup 1) (match_dup 2)) (match_dup 1)))
+      (set (match_dup 0)
+	   (plus:SI (match_dup 1) (match_dup 2)))])])
+
+;(define_insn "*adc_0"
+;  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+;	(plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+;		 (match_operand:SI 1 "register_operand" "c")))]
+;  ""
+;  "adc %0,%1,0"
+;  [(set_attr "cond" "use")
+;   (set_attr "type" "cc_arith")
+;   (set_attr "length" "4")])
+;
+;(define_split
+;  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+;	(plus:SI (gtu:SI (match_operand:SI 1 "register_operand" "c")
+;			 (match_operand:SI 2 "register_operand" "c"))
+;		 (match_operand:SI 3 "register_operand" "c")))
+;   (clobber (reg CC_REG))]
+;  ""
+;  [(set (reg:CC_C CC_REG) (compare:CC_C (match_dup 2) (match_dup 1)))
+;   (set (match_dup 0)
+;	(plus:SI (ltu:SI (reg:CC_C CC_REG) (const_int 0))
+;		 (match_dup 3)))])
+
+;; (define_insn "*subsi3_mixed"
+;;   [(set (match_operand:SI 0 "register_operand" "=q,q,r")
+;; 	(minus:SI (match_operand:SI 1 "register_operand" "q,0,r")
+;; 		  (match_operand:SI 2 "nonmemory_operand" "K,qM,rJ")))]
+;;   "TARGET_MIXED_CODE"
+;;   "@
+;;    sub_s %0,%1,%2
+;;    sub_s %0,%1,%2
+;;    sub%? %0,%1,%S2"
+;;   [(set_attr "iscompact" "true,true,false")
+;;    (set_attr "length" "2,2,*")])
+
+(define_expand "subsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(minus:SI (match_operand:SI 1 "nonmemory_operand" "")
+		 (match_operand:SI 2 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  int c = 1;
 
-  if (GET_CODE (op2) == CONST_INT)
+  if (!register_operand (operands[2], SImode))
     {
-      int sign = INTVAL (op2);
-      if (sign < 0)
-	return \"sub.f %L0,%L1,%2\;sbc %H0,%H1,-1\";
-      else
-	return \"sub.f %L0,%L1,%2\;sbc %H0,%H1,0\";
+      operands[1] = force_reg (SImode, operands[1]);
+      c = 2;
     }
-  else
-    return \"sub.f %L0,%L1,%L2\;sbc %H0,%H1,%H2\";
-}"
-  [(set_attr "length" "2")])
-
-;; Boolean instructions.
-;;
-;; We don't define the DImode versions as expand_binop does a good enough job.
+  if (flag_pic && arc_raw_symbolic_reference_mentioned_p (operands[c]) ) 
+    operands[c] = force_reg (SImode, operands[c]);
+  else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[c], Pmode))
+      operands[c] = force_reg (SImode, arc_rewrite_small_data (operands[c]));
+}")
 
-(define_insn "andsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(and:SI (match_operand:SI 1 "register_operand" "%r")
-		(match_operand:SI 2 "nonmemory_operand" "rIJ")))]
+; the casesi expander might generate a sub of zero, so we have to recognize it.
+; combine should make such an insn go away.
+(define_insn_and_split "subsi3_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand"          "=Rcqq,Rcw,Rcw,w,w,w,  w,  w,  w")
+	(minus:SI (match_operand:SI 1 "nonmemory_operand"   "0,  0, cL,c,L,I,Cal,Cal,  c")
+		  (match_operand:SI 2 "nonmemory_operand" "Rcqq, c,  0,c,c,0,  0,  c,Cal")))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "@
+    sub%? %0,%1,%2%&
+    sub%? %0,%1,%2
+    rsub%? %0,%2,%1
+    sub %0,%1,%2
+    rsub %0,%2,%1
+    rsub %0,%2,%1
+    rsub%? %0,%2,%1
+    rsub %0,%2,%1
+    sub %0,%1,%2"
+  "reload_completed && get_attr_length (insn) == 8
+   && satisfies_constraint_I (operands[1])"
+  [(set (match_dup 0) (match_dup 3)) (set (match_dup 0) (match_dup 4))]
+  "split_subsi (operands);"
+  [(set_attr "iscompact" "maybe,false,false,false,false,false,false,false, false")
+  (set_attr "length" "*,4,4,4,4,4,8,8,8")
+  (set_attr "cond" "canuse,canuse,canuse,nocond,nocond,canuse_limm,canuse,nocond,nocond")])
+
+(define_expand "subdi3"
+  [(parallel [(set (match_operand:DI 0 "dest_reg_operand" "")
+		   (minus:DI (match_operand:DI 1 "nonmemory_operand" "")
+			     (match_operand:DI 2 "nonmemory_operand" "")))
+	      (clobber (reg:CC 61))])]
   ""
-  "and%? %0,%1,%2")
+{
+  if (!register_operand (operands[2], DImode))
+    operands[1] = force_reg (DImode, operands[1]);
+  if (TARGET_EXPAND_ADDDI)
+    {
+      rtx l0 = gen_lowpart (SImode, operands[0]);
+      rtx h0 = disi_highpart (operands[0]);
+      rtx l1 = gen_lowpart (SImode, operands[1]);
+      rtx h1 = disi_highpart (operands[1]);
+      rtx l2 = gen_lowpart (SImode, operands[2]);
+      rtx h2 = disi_highpart (operands[2]);
+      rtx cc_c = gen_rtx_REG (CC_Cmode, 61);
+
+      emit_insn (gen_sub_f (l0, l1, l2));
+      emit_insn (gen_sbc (h0, h1, h2, cc_c));
+      DONE;
+    }
+})
 
-(define_insn "*andsi3_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (and:SI (match_operand:SI 1 "register_operand" "%r")
-			       (match_operand:SI 2 "nonmemory_operand" "rIJ"))
-		       (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(and:SI (match_dup 1)
-		(match_dup 2)))]
+(define_insn_and_split "subdi3_i"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,w,w,w")
+	(minus:DI (match_operand:DI 1 "nonmemory_operand" "ci,0,ci,c,!i")
+		  (match_operand:DI 2 "nonmemory_operand" "ci,ci,0,!i,c")))
+   (clobber (reg:CC 61))]
+  "register_operand (operands[1], DImode)
+   || register_operand (operands[2], DImode)"
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  int hi = !TARGET_BIG_ENDIAN;
+  int lo = !hi;
+  rtx l0 = operand_subword (operands[0], lo, 0, DImode);
+  rtx h0 = operand_subword (operands[0], hi, 0, DImode);
+  rtx l1 = operand_subword (operands[1], lo, 0, DImode);
+  rtx h1 = operand_subword (operands[1], hi, 0, DImode);
+  rtx l2 = operand_subword (operands[2], lo, 0, DImode);
+  rtx h2 = operand_subword (operands[2], hi, 0, DImode);
+
+  if (rtx_equal_p (l0, h1) || rtx_equal_p (l0, h2))
+    {
+      h1 = simplify_gen_binary (MINUS, SImode, h1, h2);
+      if (!rtx_equal_p (h0, h1))
+	emit_insn (gen_rtx_SET (VOIDmode, h0, h1));
+      emit_insn (gen_sub_f (l0, l1, l2));
+      emit_insn
+	(gen_rtx_COND_EXEC
+	  (VOIDmode,
+	   gen_rtx_LTU (VOIDmode, gen_rtx_REG (CC_Cmode, CC_REG), GEN_INT (0)),
+	   gen_rtx_SET (VOIDmode, h0, plus_constant (h0, -1))));
+      DONE;
+    }
+  emit_insn (gen_sub_f (l0, l1, l2));
+  emit_insn (gen_sbc (h0, h1, h2, gen_rtx_REG (CCmode, CC_REG)));
+  DONE;
+}
+  [(set_attr "cond" "clob")
+   (set_attr "length" "16,16,16,20,20")])
+
+(define_insn "*sbc_0"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(minus:SI (match_operand:SI 1 "register_operand" "c")
+		  (ltu:SI (match_operand:CC_C 2 "cc_use_register")
+			  (const_int 0))))]
   ""
-  "and%?.f %0,%1,%2"
-  [(set_attr "cond" "set_zn")])
-
-(define_insn "*bicsi3_insn"
-  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
-	(and:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J")
-		(not:SI (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r"))))]
+  "sbc %0,%1,0"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cc_arith")
+   (set_attr "length" "4")])
+
+; w/c/c comes first (rather than Rcw/0/C_0) to prevent the middle-end
+; needlessly prioritizing the matching constraint.
+; Rcw/0/C_0 comes before w/c/L so that the lower latency conditional execution
+; is used where possible.
+(define_insn_and_split "sbc"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w,Rcw,w,Rcw,w")
+	(minus:SI (minus:SI (match_operand:SI 1 "nonmemory_operand"
+						"c,0,c,0,cCal")
+			    (ltu:SI (match_operand:CC_C 3 "cc_use_register")
+				    (const_int 0)))
+		  (match_operand:SI 2 "nonmemory_operand" "c,C_0,L,I,cCal")))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "@
+	sbc %0,%1,%2
+	sub.cs %0,%1,1
+	sbc %0,%1,%2
+	sbc %0,%1,%2
+	sbc %0,%1,%2"
+  ; if we have a bad schedule after sched2, split.
+  "reload_completed
+   && !optimize_size && TARGET_ARC700
+   && arc_scheduling_not_expected ()
+   && arc_sets_cc_p (prev_nonnote_insn (insn))
+   /* If next comes a return or other insn that needs a delay slot,
+      expect the adc to get into the delay slot.  */
+   && next_nonnote_insn (insn)
+   && !arc_need_delay (next_nonnote_insn (insn))
+   /* Restore operands before emitting.  */
+   && (extract_insn_cached (insn), 1)"
+  [(set (match_dup 0) (match_dup 4))
+   (cond_exec
+     (ltu (reg:CC_C CC_REG) (const_int 0))
+     (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1))))]
+  "operands[4] = simplify_gen_binary (MINUS, SImode, operands[1], operands[2]);"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cc_arith")
+   (set_attr "length" "4,4,4,4,8")])
+
+(define_insn "sub_f"
+  [(set (reg:CC CC_REG)
+        (compare:CC (match_operand:SI 1 "nonmemory_operand" " c,L,0,I,c,Cal")
+		    (match_operand:SI 2 "nonmemory_operand" "cL,c,I,0,Cal,c")))
+   (set (match_operand:SI 0 "dest_reg_operand" "=w,w,Rcw,Rcw,w,w")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "@
+	sub.f %0,%1,%2
+	rsub.f %0,%2,%1
+	sub.f %0,%1,%2
+	rsub.f %0,%2,%1
+	sub.f %0,%1,%2
+	sub.f %0,%1,%2"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4,4,4,4,8,8")])
+
+; combine won't work when an intermediate result is used later...
+; add %0,%1,%2 ` cmp %0,%[12] -> add.f %0,%1,%2
+(define_peephole2
+  [(set (reg:CC CC_REG)
+        (compare:CC (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (match_operand:SI 0 "dest_reg_operand" "")
+	(minus:SI (match_dup 1) (match_dup 2)))]
   ""
-  "bic%? %0,%1,%2"
-  [(set_attr "length" "1,2,1,2")])
+  [(parallel
+     [(set (reg:CC CC_REG) (compare:CC (match_dup 1) (match_dup 2)))
+      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])])
 
-(define_insn "*bicsi3_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (and:SI (match_operand:SI 1 "register_operand" "%r")
-			       (not:SI (match_operand:SI 2 "nonmemory_operand" "rIJ")))
-		       (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(and:SI (match_dup 1)
-		(not:SI (match_dup 2))))]
+(define_peephole2
+  [(set (reg:CC CC_REG)
+        (compare:CC (match_operand:SI 1 "register_operand" "")
+		    (match_operand:SI 2 "nonmemory_operand" "")))
+   (set (match_operand 3 "" "") (match_operand 4 "" ""))
+   (set (match_operand:SI 0 "dest_reg_operand" "")
+	(minus:SI (match_dup 1) (match_dup 2)))]
+  "!reg_overlap_mentioned_p (operands[3], operands[1])
+   && !reg_overlap_mentioned_p (operands[3], operands[2])
+   && !reg_overlap_mentioned_p (operands[0], operands[4])
+   && !reg_overlap_mentioned_p (operands[0], operands[3])"
+  [(parallel
+     [(set (reg:CC CC_REG) (compare:CC (match_dup 1) (match_dup 2)))
+      (set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))])
+   (set (match_dup 3) (match_dup 4))])
+
+(define_insn "*add_n"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,w,w")
+	(plus:SI (mult:SI (match_operand:SI 1 "register_operand" "Rcqq,c,c,c")
+			  (match_operand:SI 2 "_2_4_8_operand" ""))
+		 (match_operand:SI 3 "nonmemory_operand" "0,0,c,?Cal")))]
   ""
-  "bic%?.f %0,%1,%2"
-  [(set_attr "cond" "set_zn")])
-
-(define_insn "iorsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(ior:SI (match_operand:SI 1 "register_operand" "%r")
-		(match_operand:SI 2 "nonmemory_operand" "rIJ")))]
+  "add%z2%? %0,%3,%1%&"
+  [(set_attr "type" "shift")
+   (set_attr "length" "*,4,4,8")
+   (set_attr "cond" "canuse,canuse,nocond,nocond")
+   (set_attr "iscompact" "maybe,false,false,false")])
+
+;; N.B. sub[123] has the operands of the MINUS in the opposite order from
+;; what synth_mult likes.
+(define_insn "*sub_n"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(minus:SI (match_operand:SI 1 "nonmemory_operand" "0,c,?Cal")
+		  (mult:SI (match_operand:SI 2 "register_operand" "c,c,c")
+			   (match_operand:SI 3 "_2_4_8_operand" ""))))]
   ""
-  "or%? %0,%1,%2")
+  "sub%z3%? %0,%1,%2"
+  [(set_attr "type" "shift")
+   (set_attr "length" "4,4,8")
+   (set_attr "cond" "canuse,nocond,nocond")
+   (set_attr "iscompact" "false")])
+
+; ??? check if combine matches this.
+(define_insn "*bset"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") 
+	(ior:SI (ashift:SI (const_int 1) 
+			   (match_operand:SI 1 "nonmemory_operand" "cL,cL,c"))
+		(match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))]
+  "TARGET_ARCOMPACT"
+  "bset%? %0,%2,%1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; ??? check if combine matches this.
+(define_insn "*bxor"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") 
+	(xor:SI (ashift:SI (const_int 1) 
+			   (match_operand:SI 1 "nonmemory_operand" "cL,cL,c")) 
+		(match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))]
+  "TARGET_ARCOMPACT"
+  "bxor%? %0,%2,%1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; ??? check if combine matches this.
+(define_insn "*bclr"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") 
+	(and:SI (not:SI (ashift:SI (const_int 1) 
+				   (match_operand:SI 1 "nonmemory_operand" "cL,cL,c"))) 
+		(match_operand:SI 2 "nonmemory_operand" "0,c,Cal")))]
+  "TARGET_ARCOMPACT"
+  "bclr%? %0,%2,%1"
+  [(set_attr "length" "4,4,8")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; ??? FIXME: find combine patterns for bmsk.
+
+;;Following are the define_insns added for the purpose of peephole2's
+
+; see also iorsi3 for use with constant bit number.
+(define_insn "*bset_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") 
+	(ior:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") 
+		(ashift:SI (const_int 1) 
+			   (match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ]
+  "TARGET_ARCOMPACT"
+  "@
+     bset%? %0,%1,%2 ;;peep2, constr 1
+     bset %0,%1,%2 ;;peep2, constr 2
+     bset %0,%S1,%2 ;;peep2, constr 3"
+  [(set_attr "length" "4,4,8")]
+)
+
+; see also xorsi3 for use with constant bit number.
+(define_insn "*bxor_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w") 
+	(xor:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal") 
+		(ashift:SI (const_int 1) 
+			(match_operand:SI 2 "nonmemory_operand" "cL,cL,c"))) ) ]
+  "TARGET_ARCOMPACT"
+  "@
+     bxor%? %0,%1,%2
+     bxor %0,%1,%2
+     bxor %0,%S1,%2"
+  [(set_attr "length" "4,4,8")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; see also andsi3 for use with constant bit number.
+(define_insn "*bclr_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(and:SI (not:SI (ashift:SI (const_int 1)
+				   (match_operand:SI 2 "nonmemory_operand" "cL,rL,r")))
+		(match_operand:SI 1 "nonmemory_operand" "0,c,Cal")))]
+  "TARGET_ARCOMPACT"
+  "@
+     bclr%? %0,%1,%2
+     bclr %0,%1,%2
+     bclr %0,%S1,%2"
+  [(set_attr "length" "4,4,8")
+   (set_attr "cond" "canuse,nocond,nocond")]
+)
+
+; see also andsi3 for use with constant bit number.
+(define_insn "*bmsk_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcw,w,w")
+	(and:SI (match_operand:SI 1 "nonmemory_operand" "0,c,Cal")
+		(plus:SI (ashift:SI (const_int 1)
+				    (plus:SI (match_operand:SI 2 "nonmemory_operand" "rL,rL,r")
+					     (const_int 1)))
+			 (const_int -1))))]
+  "TARGET_ARCOMPACT"
+  "@
+     bmsk%? %0,%S1,%2
+     bmsk %0,%1,%2
+     bmsk %0,%S1,%2"
+  [(set_attr "length" "4,4,8")]
+)
 
-(define_insn "*iorsi3_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (ior:SI (match_operand:SI 1 "register_operand" "%r")
-			       (match_operand:SI 2 "nonmemory_operand" "rIJ"))
-		       (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(ior:SI (match_dup 1)
-		(match_dup 2)))]
-  ""
-  "or%?.f %0,%1,%2"
-  [(set_attr "cond" "set_zn")])
+;;Instructions added for peephole2s end
 
-(define_insn "xorsi3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(xor:SI (match_operand:SI 1 "register_operand" "%r")
-		(match_operand:SI 2 "nonmemory_operand" "rIJ")))]
-  ""
-  "xor%? %0,%1,%2")
+;; Boolean instructions.
 
-(define_insn "*xorsi3_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (xor:SI (match_operand:SI 1 "register_operand" "%r")
-			       (match_operand:SI 2 "nonmemory_operand" "rIJ"))
-		       (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(xor:SI (match_dup 1)
-		(match_dup 2)))]
+;; (define_insn "*andsi3_mixed"
+;;   [(set (match_operand:SI 0 "compact_register_operand" "=q")
+;; 	(and:SI (match_operand:SI 1 "compact_register_operand" "0") 
+;; 		(match_operand:SI 2 "compact_register_operand" "q")))]
+;;   "TARGET_MIXED_CODE"
+;;   "and_s %0,%1,%2"
+;;   [(set_attr "iscompact" "true")
+;;    (set_attr "type" "binary")
+;;    (set_attr "length" "2")])
+
+(define_insn "*andsi3_insn_a4"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
+	(and:SI (match_operand:SI 1 "register_operand" "%r,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,Ji")))]
+  "TARGET_A4"
+  "@
+    and%? %0,%1,%2
+    and%? %0,%1,%S2"
+  [(set_attr "type" "binary, binary")
+   (set_attr "length" "4, 8")])
+
+(define_expand "andsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+        (and:SI (match_operand:SI 1 "nonimmediate_operand" "")
+                (match_operand:SI 2 "nonmemory_operand" "")))]
+  "TARGET_ARCOMPACT"
+  "if (!satisfies_constraint_Cux (operands[2]))
+     operands[1] = force_reg (SImode, operands[1]);
+   else if (!TARGET_NO_SDATA_SET && small_data_pattern (operands[1], Pmode))
+     operands[1] = arc_rewrite_small_data (operands[1]);")
+
+(define_insn "andsi3_i"
+  [(set (match_operand:SI 0 "dest_reg_operand"          "=Rcqq,Rcq,Rcqq,Rcqq,Rcqq,Rcw,Rcw,Rcw,Rcw,Rcw,Rcw,  w,  w,  w,  w,w,Rcw,  w,  w")
+	(and:SI (match_operand:SI 1 "nonimmediate_operand" "%0,Rcq,   0,   0,Rcqq,  0,  c,  0,  0,  0,  0,  c,  c,  c,  c,0,  0,  c,  o") 
+		(match_operand:SI 2 "nonmemory_operand" " Rcqq,  0, C1p, Ccp, Cux, cL,  0,C1p,Ccp,CnL,  I, Lc,C1p,Ccp,CnL,I,Cal,Cal,Cux")))]
+  "TARGET_ARCOMPACT
+   && ((register_operand (operands[1], SImode)
+       && nonmemory_operand (operands[2], SImode))
+      || (memory_operand (operands[1], SImode)
+	  && satisfies_constraint_Cux (operands[2])))"
+  "*
+{
+  switch (which_alternative)
+    {
+    case 0: case 5: case 10: case 11: case 15: case 16: case 17:
+      return \"and%? %0,%1,%2%&\";
+    case 1: case 6:
+      return \"and%? %0,%2,%1%&\";
+    case 2: case 7: case 12:
+      return \"bmsk%? %0,%1,%Z2%&\";
+    case 3: case 8: case 13:
+      return \"bclr%? %0,%1,%M2%&\";
+    case 4:
+      return (INTVAL (operands[2]) == 0xff
+	      ? \"extb%? %0,%1%&\" : \"extw%? %0,%1%&\");
+    case 9: case 14: return \"bic%? %0,%1,%n2-1\";
+    case 18:
+      if (TARGET_BIG_ENDIAN)
+	{
+	  rtx xop[2];
+
+	  xop[0] = operands[0];
+	  xop[1] = adjust_address (operands[1], QImode,
+				   INTVAL (operands[2]) == 0xff ? 3 : 2);
+	  output_asm_insn (INTVAL (operands[2]) == 0xff
+			   ? \"ldb %0,%1\" : \"ldw %0,%1\",
+			   xop);
+	  return \"\";
+	}
+      return INTVAL (operands[2]) == 0xff ? \"ldb %0,%1\" : \"ldw %0,%1\";
+    default:
+      gcc_unreachable ();
+    }
+}"
+  [(set_attr "iscompact" "maybe,maybe,maybe,maybe,true,false,false,false,false,false,false,false,false,false,false,false,false,false,false")
+   (set_attr "type" "binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,binary,load")
+   (set_attr "length" "*,*,*,*,*,4,4,4,4,4,4,4,4,4,4,4,8,8,*")
+   (set_attr "cond" "canuse,canuse,canuse,canuse,nocond,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,nocond,nocond,canuse_limm,canuse,nocond,nocond")])
+
+; combiner splitter, pattern found in ldtoa.c .
+; and op3,op0,op1 / cmp op3,op2 -> add op3,op0,op4 / bmsk.f 0,op3,op1
+(define_split
+  [(set (reg:CC_Z CC_REG)
+	(compare:CC_Z (and:SI (match_operand:SI 0 "register_operand" "")
+			      (match_operand 1 "const_int_operand" ""))
+		      (match_operand 2 "const_int_operand" "")))
+   (clobber (match_operand:SI 3 "register_operand" ""))]
+  "((INTVAL (operands[1]) + 1) & INTVAL (operands[1])) == 0"
+  [(set (match_dup 3)
+	(plus:SI (match_dup 0) (match_dup 4)))
+   (set (reg:CC_Z CC_REG)
+	(compare:CC_Z (and:SI (match_dup 3) (match_dup 1))
+		      (const_int 0)))]
+  "operands[4] = GEN_INT ( -(~INTVAL (operands[1]) | INTVAL (operands[2])));")
+
+;; (define_insn "andsi3"
+;;   [(set (match_operand:SI 0 "register_operand" "=r,r")
+;; 	(and:SI (match_operand:SI 1 "register_operand" "%r,r")
+;; 		(match_operand:SI 2 "nonmemory_operand" "r,Ji")))]
+;;   ""
+;;   "@
+;;     and%? %0,%1,%2
+;;     and%? %0,%1,%S2"
+;;   [(set_attr "type" "binary, binary")
+;;    (set_attr "length" "4, 8")])
+
+(define_insn_and_split "anddi3"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,&w,w")
+	(and:DI (match_operand:DI 1 "register_operand" "%c,0,c,0")
+		(match_operand:DI 2 "nonmemory_operand" "c,c,H,H")))]
+  "TARGET_OLD_DI_PATTERNS"
+  "#"
+  "reload_completed"
+  [(set (match_dup 3) (and:SI (match_dup 4) (match_dup 5)))
+   (set (match_dup 6) (and:SI (match_dup 7) (match_dup 8)))]
+  "arc_split_dilogic (operands, AND); DONE;"
+  [(set_attr "length" "8,8,16,16")])
+
+;; (define_insn "*bicsi3_insn_mixed"
+;;   [(set (match_operand:SI 0 "compact_register_operand" "=q")
+;; 	(and:SI (match_operand:SI 1 "compact_register_operand" "0")
+;; 		(not:SI (match_operand:SI 2 "compact_register_operand" "q"))))]
+;;   "TARGET_MIXED_CODE"
+;;   "bic_s %0,%1,%2"
+;;   [(set_attr "iscompact" "true")])
+
+
+;;bic define_insn that allows limm to be the first operand
+(define_insn "*bicsi3_insn"
+   [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcw,Rcw,Rcw,w,w,w")
+ 	(and:SI	(not:SI (match_operand:SI 1 "nonmemory_operand" "Rcqq,Lc,I,Cal,Lc,Cal,c"))
+ 		(match_operand:SI 2 "nonmemory_operand" "0,0,0,0,c,c,Cal")))]
   ""
-  "xor%?.f %0,%1,%2"
-  [(set_attr "cond" "set_zn")])
+  "@
+   bic%? %0, %2, %1%& ;;constraint 0
+   bic%? %0,%2,%1  ;;constraint 1
+   bic %0,%2,%1    ;;constraint 2, FIXME: will it ever get generated ???
+   bic%? %0,%2,%S1 ;;constraint 3, FIXME: will it ever get generated ???
+   bic %0,%2,%1    ;;constraint 4
+   bic %0,%2,%S1   ;;constraint 5, FIXME: will it ever get generated ???
+   bic %0,%S2,%1   ;;constraint 6"
+  [(set_attr "length" "*,4,4,8,4,8,8")
+  (set_attr "iscompact" "maybe, false, false, false, false, false, false")
+  (set_attr "cond" "canuse,canuse,canuse_limm,canuse,nocond,nocond,nocond")])
+
+
+(define_insn "*iorsi3_a4"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "register_operand" "%r,r")
+		(match_operand:SI 2 "nonmemory_operand" "r,Ji")))]
+  "TARGET_A4"
+  "@
+    or%? %0,%1,%2
+    or%? %0,%1,%S2"
+  [(set_attr "type" "binary, binary")
+   (set_attr "length" "4, 8")])
 
-(define_insn "negsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+(define_insn "iorsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand"        "=Rcqq,Rcq,Rcqq,Rcw,Rcw,Rcw,Rcw,w,  w,w,Rcw,  w")
+	(ior:SI (match_operand:SI 1 "nonmemory_operand" "% 0,Rcq,   0,  0,  c,  0, 0, c,  c,0,  0,  c")
+		(match_operand:SI 2 "nonmemory_operand" "Rcqq, 0, C0p, cL,  0,C0p, I,cL,C0p,I,Cal,Cal")))]
+  "TARGET_ARCOMPACT"
+  "*
+  switch (which_alternative)
+    {
+    case 0: case 3: case 6: case 7: case 9: case 10: case 11:
+      return \"or%? %0,%1,%2%&\";
+    case 1: case 4:
+      return \"or%? %0,%2,%1%&\";
+    case 2: case 5: case 8:
+      return \"bset%? %0,%1,%z2%&\";
+    default:
+      gcc_unreachable ();
+    }"
+  [(set_attr "iscompact" "maybe,maybe,maybe,false,false,false,false,false,false,false,false,false")
+   (set_attr "length" "*,*,*,4,4,4,4,4,4,4,8,8")
+   (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")])
+
+;; (define_insn "iorsi3"
+;;   [(set (match_operand:SI 0 "register_operand" "=r,r")
+;; 	(ior:SI (match_operand:SI 1 "register_operand" "%r,r")
+;; 		(match_operand:SI 2 "nonmemory_operand" "r,Ji")))]
+;;   ""
+;;   "@
+;;     or%? %0,%1,%2
+;;     or%? %0,%1,%S2"
+;;   [(set_attr "type" "binary, binary")
+;;    (set_attr "length" "4, 8")])
+
+(define_insn_and_split "iordi3"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,&w,w")
+	(ior:DI (match_operand:DI 1 "register_operand" "%c,0,c,0")
+		(match_operand:DI 2 "nonmemory_operand" "c,c,H,H")))]
+  "TARGET_OLD_DI_PATTERNS"
+  "#"
+  "reload_completed"
+  [(set (match_dup 3) (ior:SI (match_dup 4) (match_dup 5)))
+   (set (match_dup 6) (ior:SI (match_dup 7) (match_dup 8)))]
+  "arc_split_dilogic (operands, IOR); DONE;"
+  [(set_attr "length" "8,8,16,16")])
+
+;; (define_insn "*xorsi3_mixed"
+;;   [(set (match_operand:SI 0 "compact_register_operand" "=q")
+;; 	(xor:SI (match_operand:SI 1 "compact_register_operand" "0")
+;; 		(match_operand:SI 2 "compact_register_operand" "q")))]
+;;   "TARGET_MIXED_CODE"
+;;   "xor_s %0,%1,%2"
+;;   [(set_attr "iscompact" "true")
+;;    (set_attr "length" "2")])
+
+(define_insn "xorsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand"          "=Rcqq,Rcq,Rcw,Rcw,Rcw,Rcw, w,  w,w,  w,  w")
+	(xor:SI (match_operand:SI 1 "register_operand"  "%0,   Rcq,  0,  c,  0,  0, c,  c,0,  0,  c")
+		(match_operand:SI 2 "nonmemory_operand" " Rcqq,  0, cL,  0,C0p,  I,cL,C0p,I,Cal,Cal")))]
   ""
+  "*
+  switch (which_alternative)
+    {
+    case 0: case 2: case 5: case 6: case 8: case 9: case 10:
+      return \"xor%? %0,%1,%2%&\";
+    case 1: case 3:
+      return \"xor%? %0,%2,%1%&\";
+    case 4: case 7:
+      return \"bxor%? %0,%1,%z2\";
+    default:
+      gcc_unreachable ();
+    }
+  "
+  [(set_attr "iscompact" "maybe,maybe,false,false,false,false,false,false,false,false,false")
+   (set_attr "type" "binary")
+   (set_attr "length" "*,*,4,4,4,4,4,4,4,8,8")
+   (set_attr "cond" "canuse,canuse,canuse,canuse,canuse,canuse_limm,nocond,nocond,canuse_limm,canuse,nocond")])
+
+(define_insn_and_split "xordi3"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&w,w,&w,w")
+	(xor:DI (match_operand:DI 1 "register_operand" "%c,0,c,0")
+		(match_operand:DI 2 "nonmemory_operand" "c,c,H,H")))]
+  "TARGET_OLD_DI_PATTERNS"
+  "#"
+  "reload_completed"
+  [(set (match_dup 3) (xor:SI (match_dup 4) (match_dup 5)))
+   (set (match_dup 6) (xor:SI (match_dup 7) (match_dup 8)))]
+  "arc_split_dilogic (operands, XOR); DONE;"
+  [(set_attr "length" "8,8,16,16")])
+
+(define_insn "*negsi2_a4"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r")
+	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_A4"
   "sub%? %0,0,%1"
-  [(set_attr "type" "unary")])
+  [(set_attr "type" "unary")
+   (set_attr "length" "4")])
 
-(define_insn "*negsi2_set_cc_insn"
-  [(set (reg:CC 61) (compare:CC
-		     (neg:SI (match_operand:SI 1 "register_operand" "r"))
-		     (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(neg:SI (match_dup 1)))]
+(define_insn "negsi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,Rcqq,Rcw,w")
+	(neg:SI (match_operand:SI 1 "register_operand" "0,Rcqq,0,c")))]
   ""
-  "sub%?.f %0,0,%1"
+  "neg%? %0,%1%&"
   [(set_attr "type" "unary")
-   (set_attr "cond" "set")])
-
-(define_insn "negdi2"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(neg:DI (match_operand:DI 1 "register_operand" "r")))
+   (set_attr "iscompact" "maybe,true,false,false")
+   (set_attr "cond" "canuse,nocond,canuse,nocond")])
+
+;; (define_insn "negsi2"
+;;   [(set (match_operand:SI 0 "register_operand" "=r")
+;; 	(neg:SI (match_operand:SI 1 "register_operand" "r")))]
+;;   ""
+;;   "sub%? %0,0,%1"
+;;   [(set_attr "type" "unary")
+;;    (set_attr "length" "8")])
+
+(define_insn_and_split "negdi2"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&w")
+	(neg:DI (match_operand:DI 1 "register_operand" "c")))
    (clobber (reg:SI 61))]
+  "TARGET_OLD_DI_PATTERNS"
+  "#"
+  ""
+  [(set (match_dup 0) (minus:DI (const_int 0) (match_dup 1)))]
   ""
-  "sub.f %L0,0,%L1\;sbc %H0,0,%H1"
   [(set_attr "type" "unary")
-   (set_attr "length" "2")])
+   (set_attr "cond" "clob")
+   (set_attr "length" "16")])
 
-(define_insn "one_cmplsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+(define_insn "*one_cmplsi2_a4"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r")
 	(not:SI (match_operand:SI 1 "register_operand" "r")))]
-  ""
-  "xor%? %0,%1,-1"
-  [(set_attr "type" "unary")])
+  "TARGET_A4"
+  "xor %0,%1,-1"
+  [(set_attr "type" "unary")
+   (set_attr "cond" "nocond")])
 
-(define_insn "*one_cmplsi2_set_cc_insn"
-  [(set (reg:CCZN 61) (compare:CCZN
-		       (not:SI (match_operand:SI 1 "register_operand" "r"))
-		       (const_int 0)))
-   (set (match_operand:SI 0 "register_operand" "=r")
-	(not:SI (match_dup 1)))]
+(define_insn "one_cmplsi2"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=Rcqq,w")
+	(not:SI (match_operand:SI 1 "register_operand" "Rcqq,c")))]
   ""
-  "xor%?.f %0,%1,-1"
+  "not%? %0,%1%&"
+  [(set_attr "type" "unary,unary")
+   (set_attr "iscompact" "true,false")])
+
+;; (define_insn "*one_cmplsi2_ac32"
+;;   [(set (match_operand:SI 0 "register_operand" "=r")
+;; 	(not:SI (match_operand:SI 1 "register_operand" "r")))]
+;;   "TARGET_ARCOMPACT"
+;;   "not %0,%1"
+;;   [(set_attr "type" "unary")
+;;    (set_attr "cond" "nocond")])
+
+(define_insn "*one_cmpldi2_a4"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=&r")
+	(not:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_A4"
+  "xor %H0,%H1,-1\;xor %L0,%L1,-1"
   [(set_attr "type" "unary")
-   (set_attr "cond" "set_zn")])
-
+   (set_attr "cond" "nocond")
+   (set_attr "length" "16")])
+
+(define_insn_and_split "one_cmpldi2"
+  [(set (match_operand:DI 0 "dest_reg_operand" "=q,w")
+	(not:DI (match_operand:DI 1 "register_operand" "q,c")))]
+  "TARGET_ARCOMPACT"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (not:SI (match_dup 3)))
+   (set (match_dup 4) (not:SI (match_dup 5)))]
+{
+  int swap = (true_regnum (operands[0]) == true_regnum (operands[1]) + 1);
+
+  operands[2] = operand_subword (operands[0], 0+swap, 0, DImode);
+  operands[3] = operand_subword (operands[1], 0+swap, 0, DImode);
+  operands[4] = operand_subword (operands[0], 1-swap, 0, DImode);
+  operands[5] = operand_subword (operands[1], 1-swap, 0, DImode);
+}
+  [(set_attr "type" "unary,unary")
+   (set_attr "cond" "nocond,nocond")
+   (set_attr "length" "4,8")])
+
 ;; Shift instructions.
 
 (define_expand "ashlsi3"
-  [(set (match_operand:SI 0 "register_operand" "")
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
 	(ashift:SI (match_operand:SI 1 "register_operand" "")
 		   (match_operand:SI 2 "nonmemory_operand" "")))]
   ""
   "
 {
-  if (! TARGET_SHIFTER)
+  if (!TARGET_SHIFTER)
     {
+/* ashwin : all gen_rtx (VAR.. ) are converted to gen_rtx_VAR (..) */
       emit_insn (gen_rtx_PARALLEL
 		 (VOIDmode,
-		  gen_rtvec (2,
+		  gen_rtvec (3,
 			     gen_rtx_SET (VOIDmode, operands[0],
-					  gen_rtx_ASHIFT (SImode, operands[1],
-							  operands[2])),
-			     gen_rtx_CLOBBER (VOIDmode,
-					      gen_rtx_SCRATCH (SImode)))));
+				      gen_rtx_ASHIFT (SImode, operands[1], operands[2])),
+			     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode)),
+			     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 61))
+			)));
       DONE;
     }
 }")
 
 (define_expand "ashrsi3"
-  [(set (match_operand:SI 0 "register_operand" "")
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
 	(ashiftrt:SI (match_operand:SI 1 "register_operand" "")
 		     (match_operand:SI 2 "nonmemory_operand" "")))]
   ""
   "
 {
-  if (! TARGET_SHIFTER)
+  if (!TARGET_SHIFTER)
     {
       emit_insn (gen_rtx_PARALLEL
 		 (VOIDmode,
-		  gen_rtvec (2,
+		  gen_rtvec (3,
 			     gen_rtx_SET (VOIDmode, operands[0],
-					  gen_rtx_ASHIFTRT (SImode,
-							    operands[1],
-							    operands[2])),
-			     gen_rtx_CLOBBER (VOIDmode,
-					      gen_rtx_SCRATCH (SImode)))));
+				      gen_rtx_ASHIFTRT (SImode, operands[1], operands[2])),
+			     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode)),
+			     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 61)))));
       DONE;
     }
 }")
 
 (define_expand "lshrsi3"
-  [(set (match_operand:SI 0 "register_operand" "")
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
 	(lshiftrt:SI (match_operand:SI 1 "register_operand" "")
 		     (match_operand:SI 2 "nonmemory_operand" "")))]
   ""
   "
 {
-  if (! TARGET_SHIFTER)
+  if (!TARGET_SHIFTER)
     {
       emit_insn (gen_rtx_PARALLEL
 		 (VOIDmode,
-		  gen_rtvec (2,
+		  gen_rtvec (3,
 			     gen_rtx_SET (VOIDmode, operands[0],
-					  gen_rtx_LSHIFTRT (SImode,
-							    operands[1],
-							    operands[2])),
-			     gen_rtx_CLOBBER (VOIDmode,
-					      gen_rtx_SCRATCH (SImode)))));
-      DONE;
+				      gen_rtx_LSHIFTRT (SImode, operands[1], operands[2])),
+	 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode)),
+                             gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode,61)))));
+						      DONE;
     }
 }")
 
-(define_insn "*ashlsi3_insn"
-  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
-	(ashift:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J")
-		   (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r")))]
-  "TARGET_SHIFTER"
-  "asl%? %0,%1,%2"
-  [(set_attr "type" "shift")
-   (set_attr "length" "1,2,1,2")])
-
-(define_insn "*ashrsi3_insn"
-  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
-	(ashiftrt:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J")
-		     (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r")))]
-  "TARGET_SHIFTER"
-  "asr%? %0,%1,%2"
-  [(set_attr "type" "shift")
-   (set_attr "length" "1,2,1,2")])
-
-(define_insn "*lshrsi3_insn"
-  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
-	(lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "r,r,I,J")
-		     (match_operand:SI 2 "nonmemory_operand" "rI,J,r,r")))]
-  "TARGET_SHIFTER"
-  "lsr%? %0,%1,%2"
-  [(set_attr "type" "shift")
-   (set_attr "length" "1,2,1,2")])
-
 (define_insn "*shift_si3"
-  [(set (match_operand:SI 0 "register_operand" "=r")
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r")
 	(match_operator:SI 3 "shift_operator"
 			   [(match_operand:SI 1 "register_operand" "0")
-			    (match_operand:SI 2 "nonmemory_operand" "rIJ")]))
-   (clobber (match_scratch:SI 4 "=&r"))]
-  "! TARGET_SHIFTER"
+			    (match_operand:SI 2 "nonmemory_operand" "rJ")]))
+   (clobber (match_scratch:SI 4 "=&r"))
+   (clobber (reg:CC 61))
+  ]
+  "!TARGET_SHIFTER"
   "* return output_shift (operands);"
   [(set_attr "type" "shift")
-   (set_attr "length" "8")])
-
+   (set_attr "length" "32")])
+
+; asl, asr, lsr patterns:
+; There is no point in including an 'I' alternative since only the lowest 5
+; bits are used for the shift.  OTOH Cal can be useful if the shift amount
+; is defined in an external symbol, as we don't have special relocations
+; to truncate a symbol in a u6 immediate; but that's rather exotic, so only
+; provide one alternatice for this, without condexec support.
+(define_insn "*ashlsi3_insn_mixed"
+  [(set (match_operand:SI 0 "dest_reg_operand"           "=Rcq,Rcqq,Rcqq,Rcw, w,   w")
+        (ashift:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq,   0,  0, c,cCal")
+                   (match_operand:SI 2 "nonmemory_operand"  "K,  K,RcqqM, cL,cL,cCal")))]
+  "TARGET_SHIFTER
+   && (register_operand (operands[1], SImode)
+       || register_operand (operands[2], SImode))"
+  "asl%? %0,%1,%2%&"
+  [(set_attr "type" "shift")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false")
+   (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")])
+
+;; (define_insn "*ashlsi3_insn"
+;;   [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+;;         (ashift:SI (match_operand:SI 1 "register_operand" "r,r,r,0")
+;;                    (match_operand:SI 2 "nonmemory_operand" "N,r,Cal,rJ")))]
+;;   "TARGET_SHIFTER"
+;;   "@
+;;    asl %0,%1
+;;    asl %0, %1, %2
+;;    asl %0, %1, %2
+;;    asl%? %0,%1,%S2"
+;;   [(set_attr "type" "shift,shift,shift,shift")
+;;   (set_attr "length" "4, 4, 8, 8")
+;;    (set_attr "cond" "nocond,nocond,nocond,canuse")])
+
+;; (define_insn "*ashrsi3_insn_mixed"
+;;   [(set (match_operand:SI 0 "register_operand" "=q,q,q,r,r")
+;;         (ashiftrt:SI (match_operand:SI 1 "register_operand" "q,q,0,0,r")
+;;                      (match_operand:SI 2 "nonmemory_operand" "N,K,qM,rJ,rJ")))]
+;;   "TARGET_MIXED_CODE"
+;;   "@
+;;    asr_s %0,%1
+;;    asr_s %0,%1,%2
+;;    asr_s %0,%1,%2
+;;    asr%? %0,%1,%S2
+;;    asr   %0,%1,%S2"
+;;   [(set_attr "type" "shift,shift,shift,shift,shift")
+;;    (set_attr "iscompact" "true,true,true,false,false")])
+
+;; (define_insn "*ashrsi3_insn"
+;;   [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+;;         (ashiftrt:SI (match_operand:SI 1 "register_operand" "r,0,r")
+;;                      (match_operand:SI 2 "nonmemory_operand" "N,rJ,rJ")))]
+;;   "TARGET_SHIFTER"
+;;   "@
+;;    asr %0,%1
+;;    asr%? %0,%1,%S2
+;;    asr %0,%1,%S2"
+;;   [(set_attr "type" "shift,shift,shift")
+;;    (set_attr "cond" "nocond,canuse,nocond")])
+(define_insn "*ashrsi3_insn_mixed"
+  [(set (match_operand:SI 0 "dest_reg_operand"             "=Rcq,Rcqq,Rcqq,Rcw, w,   w")
+        (ashiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq,   0,  0, c,cCal")
+                   (match_operand:SI 2 "nonmemory_operand"    "K,  K,RcqqM, cL,cL,cCal")))]
+  "TARGET_SHIFTER
+   && (register_operand (operands[1], SImode)
+       || register_operand (operands[2], SImode))"
+  "asr%? %0,%1,%2%&"
+  [(set_attr "type" "shift")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false")
+   (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")])
+
+;; (define_insn "*lshrsi3_insn_mixed"
+;;   [(set (match_operand:SI 0 "register_operand" "=q,q,r")
+;;         (lshiftrt:SI (match_operand:SI 1 "register_operand" "q,0,r")
+;;                      (match_operand:SI 2 "nonmemory_operand" "N,qM,rJ")))]
+;;   "TARGET_MIXED_CODE"
+;;   "@
+;;    lsr_s %0,%1
+;;    lsr_s %0,%1,%2
+;;    lsr%? %0,%1,%S2"
+;;   [(set_attr "type" "shift,shift,shift")
+;;    (set_attr "iscompact" "true,true,false")])
+
+;; (define_insn "*lshrsi3_insn"
+;;   [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+;;           (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,0,r")
+;;                      (match_operand:SI 2 "nonmemory_operand" "N,rJ,rJ")))]
+;;   "TARGET_SHIFTER"
+;;   "@
+;;    lsr %0,%1
+;;    lsr%? %0,%1,%S2
+;;    lsr %0,%1,%S2"
+;;   [(set_attr "type" "shift,shift,shift")
+;;    (set_attr "cond" "nocond,canuse,nocond")])
+(define_insn "*lshrsi3_insn_mixed"
+  [(set (match_operand:SI 0 "dest_reg_operand"             "=Rcq,Rcqq,Rcqq,Rcw, w,   w")
+        (lshiftrt:SI (match_operand:SI 1 "nonmemory_operand" "!0,Rcqq,   0,  0, c,cCal")
+                   (match_operand:SI 2 "nonmemory_operand"    "N,  N,RcqqM, cL,cL,cCal")))]
+  "TARGET_SHIFTER
+   && (register_operand (operands[1], SImode)
+       || register_operand (operands[2], SImode))"
+  "*return (which_alternative <= 1 && !arc_ccfsm_cond_exec_p ()
+	    ?  \"lsr%? %0,%1%&\" : \"lsr%? %0,%1,%2%&\");"
+  [(set_attr "type" "shift")
+   (set_attr "iscompact" "maybe,maybe,maybe,false,false,false")
+   (set_attr "cond" "canuse,nocond,canuse,canuse,nocond,nocond")])
+
+(define_insn "rotrsi3"
+  [(set (match_operand:SI 0 "dest_reg_operand"             "=Rcw, w,   w")
+        (rotatert:SI (match_operand:SI 1 "register_operand"  " 0,cL,cCal")
+                     (match_operand:SI 2 "nonmemory_operand" "cL,cL,cCal")))]
+  "TARGET_SHIFTER"
+  "ror%? %0,%1,%2"
+  [(set_attr "type" "shift,shift,shift")
+   (set_attr "cond" "canuse,nocond,nocond")
+   (set_attr "length" "4,4,8")])
+
 ;; Compare instructions.
 ;; This controls RTL generation and register allocation.
 
-;; We generate RTL for comparisons and branches by having the cmpxx 
+;; We generate RTL for comparisons and branches by having the cmpxx
 ;; patterns store away the operands.  Then, the scc and bcc patterns
 ;; emit RTL for both the compare and the branch.
 
@@ -1168,282 +3466,596 @@
   "
 {
   arc_compare_op0 = operands[0];
-  arc_compare_op1 = operands[1];
+  if (GET_CODE(operands[1]) == SYMBOL_REF && flag_pic)
+    arc_compare_op1 = force_reg (SImode, operands[1]);
+  else
+   arc_compare_op1 = operands[1];
   DONE;
 }")
 
-;; ??? We may be able to relax this a bit by adding a new constant 'K' for 0.
+;; ??? We may be able to relax this a bit by adding a new constraint for 0.
 ;; This assumes sub.f 0,symbol,0 is a valid insn.
 ;; Note that "sub.f 0,r0,1" is an 8 byte insn.  To avoid unnecessarily
 ;; creating 8 byte insns we duplicate %1 in the destination reg of the insn
 ;; if it's a small constant.
 
-(define_insn "*cmpsi_cc_insn"
+(define_insn "*cmpsi_cc_insn_a4"
   [(set (reg:CC 61)
 	(compare:CC (match_operand:SI 0 "register_operand" "r,r,r")
-		    (match_operand:SI 1 "nonmemory_operand" "r,I,J")))]
-  ""
+		    (match_operand:SI 1 "nonmemory_operand" "r,I,Cal")))]
+  "TARGET_A4"
   "@
    sub.f 0,%0,%1
    sub.f %1,%0,%1
-   sub.f 0,%0,%1"
-  [(set_attr "type" "compare,compare,compare")])
-
-(define_insn "*cmpsi_cczn_insn"
-  [(set (reg:CCZN 61)
-	(compare:CCZN (match_operand:SI 0 "register_operand" "r,r,r")
-		      (match_operand:SI 1 "nonmemory_operand" "r,I,J")))]
-  ""
+   sub.f 0,%0,%S1"
+  [(set_attr "type" "compare,compare,compare")
+  ])
+
+(define_insn "*cmpsi_cczn_insn_a4"
+  [(set (reg:CC_ZN 61)
+	(compare:CC_ZN (match_operand:SI 0 "register_operand" "r,r,r")
+		       (match_operand:SI 1 "nonmemory_operand" "r,I,Cal")))]
+  "TARGET_A4"
   "@
    sub.f 0,%0,%1
    sub.f %1,%0,%1
-   sub.f 0,%0,%1"
-  [(set_attr "type" "compare,compare,compare")])
-
-(define_insn "*cmpsi_ccznc_insn"
-  [(set (reg:CCZNC 61)
-	(compare:CCZNC (match_operand:SI 0 "register_operand" "r,r,r")
-		       (match_operand:SI 1 "nonmemory_operand" "r,I,J")))]
+   sub.f 0,%0,%S1"
+  [(set_attr "type" "compare,compare,compare")
+  ])
+
+;; (define_insn "*cmpsi_cc_insn_mixed"
+;;   [(set (reg:CC 61)
+;;         (compare:CC (match_operand:SI 0 "compact_register_operand" "q,r")
+;;                     (match_operand:SI 1 "nonmemory_operand" "rO,rJ")))]
+;;   "TARGET_MIXED_CODE"
+;;   "@
+;;    cmp_s %0,%1
+;;    cmp%? %0,%S1"
+;;   [(set_attr "type" "compare,compare")
+;;    (set_attr "iscompact" "true,false")])
+
+;; (define_insn "*cmpsi_cc_insn"
+;;   [(set (reg:CC 61)
+;;         (compare:CC (match_operand:SI 0 "register_operand" "r")
+;;                     (match_operand:SI 1 "nonmemory_operand" "rJ")))]
+;;   "TARGET_ARCOMPACT"
+;;   "cmp%? %0,%S1"
+;;   [(set_attr "type" "compare")
+;;   ])
+
+;; ??? Could add a peephole to generate compare with swapped operands and
+;; modifed cc user if second, but not first operand is a compact register.
+(define_insn "cmpsi_cc_insn_mixed"
+  [(set (reg:CC 61)
+        (compare:CC (match_operand:SI 0 "register_operand"  "Rcq#q, c, qRcq, c")
+                    (match_operand:SI 1 "nonmemory_operand"  "cO,cI,  Cal, Cal")))]
+  "TARGET_ARCOMPACT"
+  "cmp%? %0,%B1%&"
+  [(set_attr "type" "compare")
+   (set_attr "iscompact" "true,false,true_limm,false")
+   (set_attr "cond" "set")
+   (set_attr "length" "*,4,*,8")])
+
+(define_insn "*cmpsi_cc_zn_insn"
+  [(set (reg:CC_ZN 61)
+        (compare:CC_ZN (match_operand:SI 0 "register_operand"  "qRcq,c")
+                       (const_int 0)))]
+  "TARGET_ARCOMPACT"
+  "tst%? %0,%0%&"
+  [(set_attr "type" "compare,compare")
+   (set_attr "iscompact" "true,false")
+   (set_attr "cond" "set_zn")
+   (set_attr "length" "*,4")])
+
+; combiner pattern observed for unwind-dw2-fde.c:linear_search_fdes.
+(define_insn "*btst"
+  [(set (reg:CC_ZN CC_REG)
+	(compare:CC_ZN
+	  (zero_extract:SI (match_operand:SI 0 "register_operand" "Rcqq,c")
+			   (const_int 1)
+			   (match_operand:SI 1 "nonmemory_operand" "L,Lc"))
+	  (const_int 0)))]
   ""
+  "btst%? %0,%1"
+  [(set_attr "iscompact" "true,false")
+   (set_attr "cond" "set")
+   (set_attr "type" "compare")
+   (set_attr "length" "*,4")])
+
+; combine suffers from 'simplifications' that replace a one-bit zero
+; extract with a shift if it can prove that the upper bits are zero.
+; arc_reorg sees the code after sched2, which can have caused our
+; inputs to be clobbered even if they were not clobbered before.
+; Therefore, add a third way to convert btst / b{eq,ne} to bbit{0,1}
+; OTOH, this is somewhat marginal, and can leat to out-of-range
+; bbit (i.e. bad scheduling) and missed conditional execution,
+; so make this an option.
+(define_peephole2
+  [(set (reg:CC_ZN CC_REG)
+	(compare:CC_ZN
+	  (zero_extract:SI (match_operand:SI 0 "register_operand" "")
+			   (const_int 1)
+			   (match_operand:SI 1 "nonmemory_operand" ""))
+	  (const_int 0)))
+   (set (pc)
+	(if_then_else (match_operator 3 "equality_comparison_operator"
+				      [(reg:CC_ZN CC_REG) (const_int 0)])
+		      (label_ref (match_operand 2 "" ""))
+		      (pc)))]
+  "TARGET_BBIT_PEEPHOLE && peep2_regno_dead_p (2, CC_REG)"
+  [(parallel [(set (pc)
+		   (if_then_else
+		     (match_op_dup 3
+		       [(zero_extract:SI (match_dup 0)
+					 (const_int 1) (match_dup 1))
+			(const_int 0)])
+		     (label_ref (match_dup 2))
+		     (pc)))
+	      (clobber (reg:CC_ZN CC_REG))])])
+
+(define_insn "*cmpsi_cc_z_insn"
+  [(set (reg:CC_Z 61)
+        (compare:CC_Z (match_operand:SI 0 "register_operand"  "qRcq,c")
+                      (match_operand:SI 1 "p2_immediate_operand"  "O,n")))]
+  "TARGET_ARCOMPACT"
   "@
-   sub.f 0,%0,%1
-   sub.f %1,%0,%1
-   sub.f 0,%0,%1"
-  [(set_attr "type" "compare,compare,compare")])
+	cmp%? %0,%1%&
+	bxor.f 0,%0,%z1"
+  [(set_attr "type" "compare,compare")
+   (set_attr "iscompact" "true,false")
+   (set_attr "cond" "set,set_zn")
+   (set_attr "length" "*,4")])
+
+(define_insn "*cmpsi_cc_c_insn"
+  [(set (reg:CC_C 61)
+        (compare:CC_C (match_operand:SI 0 "register_operand"  "Rcqq, c,Rcqq,  c")
+                      (match_operand:SI 1 "nonmemory_operand" "cO,  cI, Cal,Cal")))]
+  "TARGET_ARCOMPACT"
+  "cmp%? %0,%S1%&"
+  [(set_attr "type" "compare")
+   (set_attr "iscompact" "true,false,true_limm,false")
+   (set_attr "cond" "set")
+   (set_attr "length" "*,4,*,8")])
 
 ;; Next come the scc insns.
 
 (define_expand "seq"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(eq:SI (match_dup 1) (const_int 0)))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") (match_dup 1))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (EQ, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (EQ, SImode);
 }")
 
 (define_expand "sne"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(ne:SI (match_dup 1) (const_int 0)))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") (match_dup 1))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (NE, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (NE, SImode);
 }")
 
 (define_expand "sgt"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(gt:SI (match_dup 1) (const_int 0)))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") (match_dup 1))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (GT, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (GT, SImode);
 }")
 
 (define_expand "sle"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(le:SI (match_dup 1) (const_int 0)))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") (match_dup 1))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (LE, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (LE, SImode);
 }")
 
 (define_expand "sge"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(ge:SI (match_dup 1) (const_int 0)))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") (match_dup 1))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (GE, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (GE, SImode);
 }")
 
 (define_expand "slt"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(lt:SI (match_dup 1) (const_int 0)))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") (match_dup 1))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (LT, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (LT, SImode);
 }")
 
 (define_expand "sgtu"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(gtu:SI (match_dup 1) (const_int 0)))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") (match_dup 1))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (GTU, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (GTU, SImode);
 }")
 
 (define_expand "sleu"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(leu:SI (match_dup 1) (const_int 0)))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") (match_dup 1))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (LEU, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (LEU, SImode);
 }")
 
 (define_expand "sgeu"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(geu:SI (match_dup 1) (const_int 0)))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") (match_dup 1))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (GEU, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (GEU, SImode);
 }")
 
 (define_expand "sltu"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(ltu:SI (match_dup 1) (const_int 0)))]
+  [(set (match_operand:SI 0 "dest_reg_operand" "=r") (match_dup 1))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (LTU, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (LTU, SImode);
 }")
 
-(define_insn "*scc_insn"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(match_operator:SI 1 "comparison_operator" [(reg 61) (const_int 0)]))]
+(define_insn_and_split "*scc_insn"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(match_operator:SI 1 "proper_comparison_operator" [(reg 61) (const_int 0)]))]
   ""
-  "mov %0,1\;sub.%D1 %0,%0,%0"
-  [(set_attr "type" "unary")
-   (set_attr "length" "2")])
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (const_int 1))
+   (cond_exec
+     (match_dup 1)
+     (set (match_dup 0) (const_int 0)))]
+{
+  operands[1]
+    = gen_rtx_fmt_ee (REVERSE_CONDITION (GET_CODE (operands[1]),
+					 GET_MODE (XEXP (operands[1], 0))),
+		      VOIDmode,
+		      XEXP (operands[1], 0), XEXP (operands[1], 1));
+}
+  [(set_attr "type" "unary")])
+
+;; ??? At least for ARC600, we should use sbc b,b,s12 if we want a value
+;; that is one lower if the carry flag is set.
 
 ;; ??? Look up negscc insn.  See pa.md for example.
 (define_insn "*neg_scc_insn"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(neg:SI (match_operator:SI 1 "comparison_operator"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(neg:SI (match_operator:SI 1 "proper_comparison_operator"
 		 [(reg 61) (const_int 0)])))]
   ""
   "mov %0,-1\;sub.%D1 %0,%0,%0"
   [(set_attr "type" "unary")
-   (set_attr "length" "2")])
+   (set_attr "length" "8")])
 
 (define_insn "*not_scc_insn"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-	(not:SI (match_operator:SI 1 "comparison_operator"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(not:SI (match_operator:SI 1 "proper_comparison_operator"
 		 [(reg 61) (const_int 0)])))]
   ""
   "mov %0,1\;sub.%d1 %0,%0,%0"
   [(set_attr "type" "unary")
-   (set_attr "length" "2")])
-
+   (set_attr "length" "8")])
+
+; cond_exec patterns
+(define_insn "*movsi_ne"
+  [(cond_exec
+     (ne (match_operand:CC_Z 2 "cc_use_register" "") (const_int 0))
+     (set (match_operand:SI 0 "dest_reg_operand" "=Rcq#q,w,w")
+	  (match_operand:SI 1 "nonmemory_operand" "%C_0,Lc,?Cal")))]
+  ""
+  "@
+	sub%?.ne %0,%0,%0%&
+	mov.ne %0,%1
+	mov.ne %0,%S1"
+  [(set_attr "type" "cmove,cmove,cmove")
+   (set_attr "iscompact" "true,false,false")
+   (set_attr "length" "*,4,8")])
+
+(define_insn "*movsi_cond_exec"
+  [(cond_exec
+     (match_operator 3 "proper_comparison_operator"
+       [(match_operand 2 "cc_register" "") (const_int 0)])
+     (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	  (match_operand:SI 1 "nonmemory_operand" "%Lc,?Cal")))]
+  ""
+  "mov.%d3 %0,%S1"
+  [(set_attr "type" "cmove")
+   (set_attr "length" "4,8")])
+
+(define_insn "*add_cond_exec"
+  [(cond_exec
+     (match_operator 4 "proper_comparison_operator"
+       [(match_operand 3 "cc_register" "") (const_int 0)])
+     (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	  (plus:SI (match_operand:SI 1 "register_operand" "%0,0")
+		   (match_operand:SI 2 "nonmemory_operand" "cCca,?Cal"))))]
+  ""
+  "*return arc_output_addsi (operands, \".%d4\");"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cmove")
+   (set_attr "length" "4,8")])
+
+; ??? and could use bclr,bmsk
+; ??? or / xor could use bset / bxor
+(define_insn "*commutative_cond_exec"
+  [(cond_exec
+     (match_operator 5 "proper_comparison_operator"
+       [(match_operand 4 "cc_register" "") (const_int 0)])
+     (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	  (match_operator:SI 3 "commutative_operator"
+	    [(match_operand:SI 1 "register_operand" "%0,0")
+	     (match_operand:SI 2 "nonmemory_operand" "cL,?Cal")])))]
+  ""
+  "%O3.%d5 %0,%1,%2"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cmove")
+   (set_attr "length" "4,8")])
+
+(define_insn "*sub_cond_exec"
+  [(cond_exec
+     (match_operator 4 "proper_comparison_operator"
+       [(match_operand 3 "cc_register" "") (const_int 0)])
+     (set (match_operand:SI 0 "dest_reg_operand" "=w,w,w")
+	  (minus:SI (match_operand:SI 1 "register_operand" "0,cL,Cal")
+		    (match_operand:SI 2 "nonmemory_operand" "cL,0,0"))))]
+  ""
+  "@
+	sub.%d4 %0,%1,%2
+	rsub.%d4 %0,%2,%1
+	rsub.%d4 %0,%2,%1"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cmove")
+   (set_attr "length" "4,4,8")])
+
+(define_insn "*noncommutative_cond_exec"
+  [(cond_exec
+     (match_operator 5 "proper_comparison_operator"
+       [(match_operand 4 "cc_register" "") (const_int 0)])
+     (set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+	  (match_operator:SI 3 "noncommutative_operator"
+	    [(match_operand:SI 1 "register_operand" "0,0")
+	     (match_operand:SI 2 "nonmemory_operand" "cL,Cal")])))]
+  ""
+  "%O3.%d5 %0,%1,%2"
+  [(set_attr "cond" "use")
+   (set_attr "type" "cmove")
+   (set_attr "length" "4,8")])
+
 ;; These control RTL generation for conditional jump insns
 
 (define_expand "beq"
   [(set (pc)
-	(if_then_else (eq (match_dup 1) (const_int 0))
+	(if_then_else (match_dup 1)
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (EQ, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (EQ, VOIDmode);
 }")
 
 (define_expand "bne"
   [(set (pc)
-	(if_then_else (ne (match_dup 1) (const_int 0))
+	(if_then_else (match_dup 1)
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (NE, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (NE, VOIDmode);
 }")
 
 (define_expand "bgt"
   [(set (pc)
-	(if_then_else (gt (match_dup 1) (const_int 0))
+	(if_then_else (match_dup 1)
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (GT, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (GT, VOIDmode);
 }")
 
 (define_expand "ble"
   [(set (pc)
-	(if_then_else (le (match_dup 1) (const_int 0))
+	(if_then_else (match_dup 1)
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (LE, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (LE, VOIDmode);
 }")
 
 (define_expand "bge"
   [(set (pc)
-	(if_then_else (ge (match_dup 1) (const_int 0))
+	(if_then_else (match_dup 1)
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (GE, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (GE, VOIDmode);
 }")
 
 (define_expand "blt"
   [(set (pc)
-	(if_then_else (lt (match_dup 1) (const_int 0))
+	(if_then_else (match_dup 1)
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (LT, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (LT, VOIDmode);
 }")
 
 (define_expand "bgtu"
   [(set (pc)
-	(if_then_else (gtu (match_dup 1) (const_int 0))
+	(if_then_else (match_dup 1)
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (GTU, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (GTU, VOIDmode);
 }")
 
 (define_expand "bleu"
   [(set (pc)
-	(if_then_else (leu (match_dup 1) (const_int 0))
+	(if_then_else (match_dup 1)
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (LEU, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (LEU, VOIDmode);
 }")
 
 (define_expand "bgeu"
   [(set (pc)
-	(if_then_else (geu (match_dup 1) (const_int 0))
+	(if_then_else (match_dup 1)
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (GEU, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (GEU, VOIDmode);
 }")
 
 (define_expand "bltu"
   [(set (pc)
-	(if_then_else (ltu (match_dup 1) (const_int 0))
+	(if_then_else (match_dup 1)
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_compare_reg (LTU, VOIDmode);
+}")
+
+(define_expand "bunge"
+  [(set (pc)
+	(if_then_else (match_dup 1)
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
   ""
   "
 {
-  operands[1] = gen_compare_reg (LTU, arc_compare_op0, arc_compare_op1);
+  operands[1] = gen_compare_reg (UNGE, VOIDmode);
+}")
+
+(define_expand "bungt"
+  [(set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_compare_reg (UNGT, VOIDmode);
+}")
+
+(define_expand "bunle"
+  [(set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_compare_reg (UNLE, VOIDmode);
+}")
+
+(define_expand "bunlt"
+  [(set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_compare_reg (UNLT, VOIDmode);
+}")
+
+(define_expand "buneq"
+  [(set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_compare_reg (UNEQ, VOIDmode);
+}")
+
+(define_expand "bltgt"
+  [(set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_compare_reg (LTGT, VOIDmode);
+}")
+
+(define_expand "bordered"
+  [(set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_compare_reg (ORDERED, VOIDmode);
+}")
+
+(define_expand "bunordered"
+  [(set (pc)
+	(if_then_else (match_dup 1)
+		      (label_ref (match_operand 0 "" ""))
+		      (pc)))]
+  ""
+  "
+{
+  operands[1] = gen_compare_reg (UNORDERED, VOIDmode);
 }")
 
 ;; Now match both normal and inverted jump.
 
+;; TODO - supporting 16-bit conditional short branch insns if needed.
+
+; (define_insn "*branch_insn_mixed"
+;   [(set (pc)
+; 	(if_then_else (match_operator 1 "comparison_operator"
+; 				      [(reg 61) (const_int 0)])
+; 		      (label_ref (match_operand 0 "" ""))
+; 		      (pc)))]
+;   "TARGET_MIXED_CODE"
+;   "*
+; {
+;   if (arc_ccfsm_branch_deleted_p ())
+;     {
+;       arc_ccfsm_record_branch_deleted ();
+;       return \"; branch deleted, next insns conditionalized\";
+;     }
+;   else
+;     return \"b%d1_s %^%l0\";
+; }"
+;  [(set_attr "type" "branch")])
+
+; When estimating sizes during arc_reorg, when optimizing for speed, there
+; are three reasons why we need to consider branches to be length 6:
+; - unnull-false delay slot insns are implemented using conditional execution,
+;   thus preventing short insn formation where used.
+; - for ARC600: unnul-true delay slot isnns are implemented where possile
+;   using conditional execution, preventing short insn formation where used.
+; - for ARC700: likely or somewhat likely taken branches are made long and
+;   unaligned if possible to avoid branch penalty.
 (define_insn "*branch_insn"
   [(set (pc)
 	(if_then_else (match_operator 1 "proper_comparison_operator"
@@ -1459,9 +4071,45 @@
       return \"; branch deleted, next insns conditionalized\";
     }
   else
-    return \"%~b%d1%# %l0\";
+    {
+      arc_ccfsm_record_condition (operands[1], 0, insn, 0);
+      if (get_attr_length (insn) == 2)
+         return \"b%d1%? %^%l0%&\";
+      else
+         return \"b%d1%# %^%l0\";
+    }
 }"
-  [(set_attr "type" "branch")])
+  [(set_attr "type" "branch")
+   (set
+     (attr "lock_length")
+     (cond [
+       ; In arc_reorg we just guesstimate; might be more or less.
+       (ne (symbol_ref "arc_branch_size_unknown_p ()") (const_int 0))
+       (const_int 4)
+
+       (eq_attr "delay_slot_filled" "yes")
+       (const_int 4)
+
+       (ne
+	 (if_then_else
+	   (match_operand 1 "equality_comparison_operator" "")
+	   (ior (lt (minus (match_dup 0) (pc)) (const_int -512))
+		(gt (minus (match_dup 0) (pc))
+		    (minus (const_int 506)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	   (ior (lt (minus (match_dup 0) (pc)) (const_int -64))
+		(gt (minus (match_dup 0) (pc))
+		    (minus (const_int 58)
+			   (symbol_ref "get_attr_delay_slot_length (insn)")))))
+	 (const_int 0))
+       (const_int 4)
+
+       (eq_attr "verify_short" "yes")
+       (const_int 2)]
+      (const_int 4)))
+   (set (attr "iscompact")
+	(cond [(eq_attr "lock_length" "2") (const_string "true")]
+	      (const_string "false")))])
 
 (define_insn "*rev_branch_insn"
   [(set (pc)
@@ -1478,25 +4126,120 @@
       return \"; branch deleted, next insns conditionalized\";
     }
   else
-    return \"%~b%D1%# %l0\";
+    {
+      arc_ccfsm_record_condition (operands[1], 1, insn, 0);
+      if (get_attr_length (insn) == 2)
+	 return \"b%D1%? %^%l0\";
+      else
+	 return \"b%D1%# %^%l0\";
+    }
 }"
-  [(set_attr "type" "branch")])
-
+  [(set_attr "type" "branch")
+   (set
+     (attr "lock_length")
+     (cond [
+       ; In arc_reorg we just guesstimate; might be more or less.
+       (ne (symbol_ref "arc_branch_size_unknown_p ()") (const_int 0))
+       (const_int 4)
+
+       (eq_attr "delay_slot_filled" "yes")
+       (const_int 4)
+
+       (ne
+	 (if_then_else
+	   (match_operand 1 "equality_comparison_operator" "")
+	   (ior (lt (minus (match_dup 0) (pc)) (const_int -512))
+		(gt (minus (match_dup 0) (pc))
+		    (minus (const_int 506)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	   (ior (lt (minus (match_dup 0) (pc)) (const_int -64))
+		(gt (minus (match_dup 0) (pc))
+		    (minus (const_int 58)
+			   (symbol_ref "get_attr_delay_slot_length (insn)")))))
+	 (const_int 0))
+       (const_int 4)
+
+       (eq_attr "verify_short" "yes")
+       (const_int 2)]
+      (const_int 4)))
+   (set (attr "iscompact")
+	(cond [(eq_attr "lock_length" "2") (const_string "true")]
+	      (const_string "false")))])
+
 ;; Unconditional and other jump instructions.
 
-(define_insn "jump"
+;; TODO - supporting 16-bit short branch insns if needed.
+;(define_insn "*jump_mixed"
+;  [(set (pc) (label_ref (match_operand 0 "" "")))]
+;  "TARGET_MIXED_CODE"
+;  "b_s %^%l0"
+;  [(set_attr "type" "uncond_branch")])
+
+(define_expand "jump"
   [(set (pc) (label_ref (match_operand 0 "" "")))]
   ""
-  "b%* %l0"
-  [(set_attr "type" "uncond_branch")])
+  "")
+
+(define_insn "jump_i"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  "!TARGET_LONG_CALLS_SET || !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
+  "b%!%* %^%l0%&"
+  [(set_attr "type" "uncond_branch")
+   (set (attr "iscompact")
+	(if_then_else (eq_attr "lock_length" "2")
+		      (const_string "true") (const_string "false")))
+   (set_attr "cond" "canuse")
+   (set (attr "lock_length")
+	(cond [
+	  ; In arc_reorg we just guesstimate; might be more or less.
+	  (ne (symbol_ref "arc_branch_size_unknown_p ()") (const_int 0))
+	  (const_int 4)
+
+	  (eq_attr "delay_slot_filled" "yes")
+	  (const_int 4)
+
+	  (ne (symbol_ref "find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)")
+	      (const_int 0))
+	  (const_int 4)
+
+	  (ior (lt (minus (match_dup 0) (pc)) (const_int -512))
+	       (gt (minus (match_dup 0) (pc))
+		   (minus (const_int 506)
+			  (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	  (const_int 4)
+
+	  (and (ne (symbol_ref "arc_ccfsm_advance_to (insn),
+				arc_ccfsm_cond_exec_p ()")
+		   (const_int 0))
+	       (ior (lt (minus (match_dup 0) (pc)) (const_int -64))
+		    (gt (minus (match_dup 0) (pc))
+			(minus (const_int 58)
+			       (symbol_ref
+				"get_attr_delay_slot_length (insn)")))))
+	  (const_int 4)
+
+	  (eq_attr "verify_short" "yes")
+	  (const_int 2)]
+
+	 (const_int 4)))])
 
 (define_insn "indirect_jump"
-  [(set (pc) (match_operand:SI 0 "address_operand" "p"))]
+  [(set (pc) (match_operand:SI 0 "nonmemory_operand" "L,I,Cal,Rcqq,r"))]
   ""
-  "j%* %a0"
-  [(set_attr "type" "uncond_branch")])
- 
+  "j%!%* [%0]%&"
+  [(set_attr "type" "jump")
+   (set_attr "iscompact" "false,false,false,maybe,false")
+   (set_attr "cond" "canuse,canuse_limm,canuse,canuse,canuse")])
+
+;; (define_insn "indirect_jump"
+;;   [(set (pc) (match_operand:SI 0 "register_operand" "r"))]
+;;   ""
+;;   "j%* [%0]"
+;;   [(set_attr "type" "jump")])
+
 ;; Implement a switch statement.
+; ??? the following comment shows ignorance of gcc internals
+; - or possibly code that old that it predates the current facilities.
 ;; This wouldn't be necessary in the non-pic case if we could distinguish
 ;; label refs of the jump table from other label refs.  The problem is that
 ;; label refs are output as "%st(.LL42)" but we don't want the %st - we want
@@ -1506,7 +4249,7 @@
   [(set (match_dup 5)
 	(minus:SI (match_operand:SI 0 "register_operand" "")
 		  (match_operand:SI 1 "nonmemory_operand" "")))
-   (set (reg:CC 61)
+   (set (reg:CC CC_REG)
 	(compare:CC (match_dup 5)
 		    (match_operand:SI 2 "nonmemory_operand" "")))
    (set (pc)
@@ -1514,47 +4257,219 @@
 			   (const_int 0))
 		      (label_ref (match_operand 4 "" ""))
 		      (pc)))
-   (parallel
-    [(set (pc)
-	  (mem:SI (plus:SI (mult:SI (match_dup 5)
-				    (const_int 4))
-			   (label_ref (match_operand 3 "" "")))))
-     (clobber (match_scratch:SI 6 ""))
-     (clobber (match_scratch:SI 7 ""))])]
+   (set (match_dup 6)
+	(unspec:SI [(match_operand 3 "" "")
+		    (match_dup 5) (match_dup 7)] UNSPEC_CASESI))
+   (parallel [(set (pc) (match_dup 6)) (use (match_dup 7))])]
   ""
   "
 {
+  rtx x;
+
   operands[5] = gen_reg_rtx (SImode);
+  operands[6] = gen_reg_rtx (SImode);
+  operands[7] = operands[3];
+  emit_insn (gen_subsi3 (operands[5], operands[0], operands[1]));
+  emit_insn (gen_cmpsi_cc_insn_mixed (operands[5], operands[2]));
+  x = gen_rtx_GTU (VOIDmode, gen_rtx_REG (CCmode, CC_REG), const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, operands[4]), pc_rtx);
+  emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
+  if (TARGET_COMPACT_CASESI)
+    {
+      emit_jump_insn (gen_casesi_compact_jump (operands[5], operands[7]));
+    }
+  else
+    {
+      operands[3] = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+      if (flag_pic || !cse_not_expected)
+	operands[3] = force_reg (Pmode, operands[3]);
+      emit_insn (gen_casesi_load (operands[6],
+				  operands[3], operands[5], operands[7]));
+      if (CASE_VECTOR_PC_RELATIVE || flag_pic)
+	emit_insn (gen_addsi3 (operands[6], operands[6], operands[3]));
+      emit_jump_insn (gen_casesi_jump (operands[6], operands[7]));
+    }
+  DONE;
 }")
 
-(define_insn "*casesi_insn"
-  [(set (pc)
-	(mem:SI (plus:SI (mult:SI (match_operand:SI 0 "register_operand" "r")
-				  (const_int 4))
-			 (label_ref (match_operand 1 "" "")))))
-   (clobber (match_scratch:SI 2 "=r"))
-   (clobber (match_scratch:SI 3 "=r"))]
+(define_insn "casesi_load"
+  [(set (match_operand:SI 0 "register_operand"             "=Rcq,r,r")
+	(unspec:SI [(match_operand:SI 1 "nonmemory_operand" "Rcq,c,Cal")
+		    (match_operand:SI 2 "register_operand"  "Rcq,c,c")
+		    (label_ref (match_operand 3 "" ""))] UNSPEC_CASESI))]
   ""
   "*
 {
-  output_asm_insn (\"mov %2,%1\", operands);
-  if (TARGET_SHIFTER)
-    output_asm_insn (\"asl %3,%0,2\", operands);
-  else
-    output_asm_insn (\"asl %3,%0\;asl %3,%3\", operands);
-  output_asm_insn (\"ld %2,[%2,%3]\", operands);
-  output_asm_insn (\"j.nd %a2\", operands);
-  return \"\";
+  rtx diff_vec = PATTERN (next_real_insn (operands[3]));
+
+  if (GET_CODE (diff_vec) != ADDR_DIFF_VEC)
+    {
+      gcc_assert (GET_CODE (diff_vec) == ADDR_VEC);
+      gcc_assert (GET_MODE (diff_vec) == SImode);
+      gcc_assert (!CASE_VECTOR_PC_RELATIVE && !flag_pic);
+    }
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      return \"ld.as %0,[%1,%2]%&\";
+    case HImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return \"ldw.as %0,[%1,%2]\";
+      return \"ldw.x.as %0,[%1,%2]\";
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	return \"ldb%? %0,[%1,%2]%&\";
+      return \"ldb.x %0,[%1,%2]\";
+    default:
+      gcc_unreachable ();
+    }
 }"
-  [(set_attr "type" "uncond_branch")
-   (set_attr "length" "6")])
+  [(set_attr "type" "load")
+   (set_attr_alternative "iscompact"
+     [(cond
+	[(ne (symbol_ref "GET_MODE (PATTERN (next_real_insn (operands[3])))")
+	     (symbol_ref "QImode"))
+	 (const_string "false")
+	 (eq (symbol_ref "ADDR_DIFF_VEC_FLAGS (PATTERN (next_real_insn (operands[3]))).offset_unsigned") (const_int 0))
+	 (const_string "false")]
+	(const_string "true"))
+      (const_string "false")
+      (const_string "false")])
+   (set_attr_alternative "length"
+     [(cond
+	[(eq_attr "iscompact" "false")
+	 (const_int 4)
+	 (and (ne (symbol_ref "0") (const_int 0)) (eq (match_dup 0) (pc)))
+	 (const_int 4)
+	 (eq_attr "verify_short" "yes")
+	 (const_int 2)]
+	(const_int 4))
+      (const_int 4)
+      (const_int 8)])])
+
+; Unlike the canonical tablejump, this pattern always uses a jump address,
+; even for CASE_VECTOR_PC_RELATIVE.
+(define_insn "casesi_jump"
+  [(set (pc) (match_operand:SI 0 "register_operand" "Cal,Rcqq,c"))
+   (use (label_ref (match_operand 1 "" "")))]
+  ""
+  "j%!%* [%0]%&"
+  [(set_attr "type" "jump")
+   (set_attr "iscompact" "false,maybe,false")
+   (set_attr "cond" "canuse")])
 
+(define_insn "casesi_compact_jump"
+  [(set (pc)
+	(unspec:SI [(match_operand:SI 0 "register_operand" "c,q")]
+		   UNSPEC_CASESI))
+   (use (label_ref (match_operand 1 "" "")))
+   (clobber (match_scratch:SI 2 "=q,0"))]
+  "TARGET_COMPACT_CASESI"
+  "*
+{
+  rtx diff_vec = PATTERN (next_real_insn (operands[1]));
+  int unalign = arc_get_unalign ();
+  rtx xop[3];
+  const char *s;
+
+  xop[0] = operands[0];
+  xop[2] = operands[2];
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case SImode:
+      /* Max length can be 12 in this case, but this is OK because
+	 2 of these are for alignment, and are anticipated in the length
+	 of the ADDR_DIFF_VEC.  */
+      if (unalign && !satisfies_constraint_Rcq (xop[0]))
+	s = \"add2 %2,pcl,%0\n\tld_s%2,[%2,12]\";
+      else if (unalign)
+	s = \"add_s %2,%0,2\n\tld.as %2,[pcl,%2]\";
+      else
+	s = \"add %2,%0,2\n\tld.as %2,[pcl,%2]\";
+      arc_clear_unalign ();
+      break;
+    case HImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	{
+	  if (satisfies_constraint_Rcq (xop[0]))
+	    {
+	      s = \"add_s %2,%0,%1\n\tldw.as %2,[pcl,%2]\";
+	      xop[1] = GEN_INT ((10 - unalign) / 2U);
+	    }
+	  else
+	    {
+	      s = \"add1 %2,pcl,%0\n\tldw_s %2,[%2,%1]\";
+	      xop[1] = GEN_INT (10 + unalign);
+	    }
+	}
+      else
+	{
+	  if (satisfies_constraint_Rcq (xop[0]))
+	    {
+	      s = \"add_s %2,%0,%1\n\tldw.x.as %2,[pcl,%2]\";
+	      xop[1] = GEN_INT ((10 - unalign) / 2U);
+	    }
+	  else
+	    {
+	      s = \"add1 %2,pcl,%0\n\tldw_s.x %2,[%2,%1]\";
+	      xop[1] = GEN_INT (10 + unalign);
+	    }
+	}
+      arc_toggle_unalign ();
+      break;
+    case QImode:
+      if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned)
+	{
+	  if (rtx_equal_p (xop[2], xop[0])
+	      || find_reg_note (insn, REG_DEAD, xop[0]))
+	    {
+	      s = \"add_s %0,%0,pcl\n\tldb_s %2,[%0,%1]\";
+	      xop[1] = GEN_INT (8 + unalign);
+	    }
+	  else
+	    {
+	      s = \"add %2,%0,pcl\n\tldb_s %2,[%2,%1]\";
+	      xop[1] = GEN_INT (10 + unalign);
+	      arc_toggle_unalign ();
+	    }
+	}
+      else if (rtx_equal_p (xop[0], xop[2])
+	       || find_reg_note (insn, REG_DEAD, xop[0]))
+	{
+	  s = \"add_s %0,%0,%1\n\tldb.x %2,[pcl,%0]\";
+	  xop[1] = GEN_INT (10 - unalign);
+	  arc_toggle_unalign ();
+	}
+      else
+	{
+	  /* ??? Length is 12.  */
+	  s = \"add %2,%0,%1\n\tldb.x %2,[pcl,%2]\";
+	  xop[1] = GEN_INT (8 + unalign);
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  output_asm_insn (s, xop);
+  return \"add_s %2,%2,pcl\n\tj_s%* [%2]\";
+}"
+  [(set_attr "length" "10")
+   (set_attr "type" "jump")
+   (set_attr "iscompact" "true")
+   (set_attr "cond" "nocond")])
+
+;; TODO: Splitting it up as separate patterns (when enabling this pattern) for
+;;       TARGET_MIXED_CODE so that length can be set correctly.
 (define_insn "tablejump"
   [(set (pc) (match_operand:SI 0 "address_operand" "p"))
    (use (label_ref (match_operand 1 "" "")))]
   "0 /* disabled -> using casesi now */"
   "j%* %a0"
-  [(set_attr "type" "uncond_branch")])
+  [(set_attr "type" "jump")])
 
 (define_expand "call"
   ;; operands[1] is stack_size_rtx
@@ -1563,75 +4478,1590 @@
 		    (match_operand 1 "" ""))
 	     (clobber (reg:SI 31))])]
   ""
-  "")
-
-(define_insn "*call_via_reg"
-  [(call (mem:SI (match_operand:SI 0 "register_operand" "r"))
+  "{
+    rtx callee;
+
+    gcc_assert (MEM_P (operands[0]));
+    callee  = XEXP (operands[0], 0);
+    if (crtl->profile && arc_profile_call (callee))
+      {
+	emit_call_insn (gen_call_prof (gen_rtx_SYMBOL_REF (Pmode,
+							   \"_mcount_call\"),
+				       operands[1]));
+	DONE;
+      }
+    /* This is to decide if we should generate indirect calls by loading the
+       32 bit address of the callee into a register before performing the
+       branch and link - this exposes cse opportunities.
+       Also, in weird cases like compile/20010107-1.c, we may get a PLUS.  */
+    if (GET_CODE (callee) != REG
+	&& (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee)))
+      XEXP (operands[0], 0) = force_reg (Pmode, callee);
+  }
+")
+
+
+(define_insn "*call_i"
+  [(call (mem:SI (match_operand:SI 0 "call_address_operand" "Rcqq,c,Cbr,L,I,Cal"))
 	 (match_operand 1 "" ""))
    (clobber (reg:SI 31))]
   ""
-  "lr blink,[status]\;j.d %0\;add blink,blink,2"
-  [(set_attr "type" "call_no_delay_slot")
-   (set_attr "length" "3")])
-
-(define_insn "*call_via_label"
-  [(call (mem:SI (match_operand:SI 0 "call_address_operand" ""))
+  "@
+   jl%!%* [%0]%&
+   jl%!%* [%0]
+   bl%!%* %P0
+   jl%!%* %S0
+   jl%* %S0
+   jl%! %S0"
+  [(set_attr "type" "call,call,call,call,call,call_no_delay_slot")
+   (set_attr "iscompact" "maybe,*,*,*,*,*")
+   (set_attr_alternative "cond"
+     [(const_string "canuse")
+      (const_string "canuse")
+      (cond [(eq (symbol_ref "TARGET_MEDIUM_CALLS") (const_int 0))
+	     (const_string "canuse")
+	     (eq_attr "delay_slot_filled" "yes")
+	     (const_string "nocond")
+	     (eq (symbol_ref "flag_pic") (const_int 0))
+	     (const_string "canuse_limm")]
+	    (const_string "nocond"))
+      (const_string "canuse")
+      (if_then_else (eq_attr "delay_slot_filled" "yes")
+		    (const_string "nocond")
+		    (const_string "canuse_limm"))
+      (const_string "canuse")])
+   (set_attr "length" "*,4,4,4,4,8")])
+
+(define_insn "call_prof"
+  [(call (mem:SI (match_operand:SI 0 "symbolic_operand" "Cbr,Cal"))
 	 (match_operand 1 "" ""))
-   (clobber (reg:SI 31))]
-  ""
-  ; The %~ is necessary in case this insn gets conditionalized and the previous
-  ; insn is the cc setter.
-  "%~bl%!%* %0"
-  [(set_attr "type" "call")
-   (set_attr "cond" "canuse")])
+   (clobber (reg:SI 31))
+   (use (reg:SI 8))
+   (use (reg:SI 9))]
+   ""
+  "@
+   bl%!%* %P0;2
+   jl%! %^%S0"
+  [(set_attr "type" "call,call_no_delay_slot")
+   (set_attr "cond" "canuse,canuse")
+   (set_attr "length" "4,8")])
 
 (define_expand "call_value"
   ;; operand 2 is stack_size_rtx
   ;; operand 3 is next_arg_register
-  [(parallel [(set (match_operand 0 "register_operand" "=r")
+  [(parallel [(set (match_operand 0 "dest_reg_operand" "=r")
 		   (call (match_operand:SI 1 "call_operand" "")
 			 (match_operand 2 "" "")))
 	     (clobber (reg:SI 31))])]
   ""
-  "")
-
-(define_insn "*call_value_via_reg"
-  [(set (match_operand 0 "register_operand" "=r")
-	(call (mem:SI (match_operand:SI 1 "register_operand" "r"))
+  "
+  {
+    rtx callee;
+
+    gcc_assert (MEM_P (operands[1]));
+    callee = XEXP (operands[1], 0);
+    if (crtl->profile && arc_profile_call (callee))
+      {
+	emit_call_insn (gen_call_value_prof (operands[0],
+					     gen_rtx_SYMBOL_REF (Pmode,
+							    \"_mcount_call\"),
+					     operands[2]));
+	DONE;
+      }
+     /* See the comment in define_expand \"call\".  */
+    if (GET_CODE (callee) != REG
+	&& (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee)))
+      XEXP (operands[1], 0) = force_reg (Pmode, callee);
+  }")
+
+
+(define_insn "*call_value_i"
+  [(set (match_operand 0 "dest_reg_operand"  "=Rcqq,w,  w,w,w,  w")
+	(call (mem:SI (match_operand:SI 1
+		       "call_address_operand" "Rcqq,c,Cbr,L,I,Cal"))
 	      (match_operand 2 "" "")))
    (clobber (reg:SI 31))]
   ""
-  "lr blink,[status]\;j.d %1\;add blink,blink,2"
-  [(set_attr "type" "call_no_delay_slot")
-   (set_attr "length" "3")])
-
-(define_insn "*call_value_via_label"
-  [(set (match_operand 0 "register_operand" "=r")
-	(call (mem:SI (match_operand:SI 1 "call_address_operand" ""))
+  "@
+   jl%!%* [%1]%&
+   jl%!%* [%1]
+   bl%!%* %P1;1
+   jl%!%* %S1
+   jl%* %S1
+   jl%! %S1"
+  [(set_attr "type" "call,call,call,call,call,call_no_delay_slot")
+   (set_attr "iscompact" "maybe,*,*,*,*,*")
+   (set_attr_alternative "cond"
+     [(const_string "canuse")
+      (const_string "canuse")
+      (cond [(eq (symbol_ref "TARGET_MEDIUM_CALLS") (const_int 0))
+	     (const_string "canuse")
+	     (eq_attr "delay_slot_filled" "yes")
+	     (const_string "nocond")
+	     (eq (symbol_ref "flag_pic") (const_int 0))
+	     (const_string "canuse_limm")]
+	    (const_string "nocond"))
+      (const_string "canuse")
+      (const_string "canuse_limm")
+      (const_string "canuse")])
+   (set_attr "length" "*,4,4,4,4,8")])
+
+
+;; TODO - supporting 16-bit short "branch and link" insns if required.
+;(define_insn "*call_value_via_label_mixed"
+;  [(set (match_operand 0 "register_operand" "=r")
+;	 (call (mem:SI (match_operand:SI 1 "call_address_operand" ""))
+;	       (match_operand 2 "" "")))
+;  (clobber (reg:SI 31))]
+;  "TARGET_MIXED_CODE"
+;  "bl_s %1"
+;  [(set_attr "type" "call")])
+
+(define_insn "call_value_prof"
+  [(set (match_operand 0 "dest_reg_operand" "=r,r")
+	(call (mem:SI (match_operand:SI 1 "symbolic_operand" "Cbr,Cal"))
 	      (match_operand 2 "" "")))
-   (clobber (reg:SI 31))]
-  ""
-  ; The %~ is necessary in case this insn gets conditionalized and the previous
-  ; insn is the cc setter.
-  "%~bl%!%* %1"
-  [(set_attr "type" "call")
-   (set_attr "cond" "canuse")])
-
+   (clobber (reg:SI 31))
+   (use (reg:SI 8))
+   (use (reg:SI 9))]
+   ""
+  "@
+   bl%!%* %P1;1
+   jl%! %^%S1"
+  [(set_attr "type" "call,call_no_delay_slot")
+   (set_attr "cond" "canuse,canuse")
+   (set_attr "length" "4,8")])
+
 (define_insn "nop"
   [(const_int 0)]
   ""
-  "nop"
-  [(set_attr "type" "misc")])
+  "nop%?"
+  [(set_attr "type" "misc")
+   (set_attr "iscompact" "true")
+   (set_attr "cond" "canuse")
+   (set_attr "length" "2")])
 
 ;; Special pattern to flush the icache.
 ;; ??? Not sure what to do here.  Some ARC's are known to support this.
 
 (define_insn "flush_icache"
-  [(unspec_volatile [(match_operand 0 "memory_operand" "m")] 0)]
+  [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 0)]
   ""
   "* return \"\";"
   [(set_attr "type" "misc")])
-
+
 ;; Split up troublesome insns for better scheduling.
-
+
 ;; Peepholes go at the end.
+;;asl followed by add can be replaced by an add{1,2,3}
+;; Three define_peepholes have been added for this optimization
+;; ??? This used to target non-canonical rtl.  Now we use add_n, which
+;; can be generated by combine.  Check if these peepholes still provide
+;; any benefit.
+
+;; -------------------------------------------------------------
+;; Pattern 1 : r0 = r1 << {i}
+;;             r3 = r4/INT + r0     ;;and commutative
+;;                 ||
+;;                 \/
+;;             add{i} r3,r4/INT,r1
+;; -------------------------------------------------------------
+
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+        (ashift:SI (match_operand:SI 1 "register_operand" "")
+                   (match_operand:SI 2 "const_int_operand" "")))
+  (set (match_operand:SI 3 "dest_reg_operand" "")
+       (plus:SI (match_operand:SI 4 "nonmemory_operand" "")
+		(match_operand:SI 5 "nonmemory_operand" "")))]
+  "TARGET_ARCOMPACT
+   && TARGET_DROSS
+   && (INTVAL (operands[2]) == 1
+       || INTVAL (operands[2]) == 2
+       || INTVAL (operands[2]) == 3)
+   && (true_regnum (operands[4]) == true_regnum (operands[0])
+       || true_regnum (operands[5]) == true_regnum (operands[0]))
+   && (peep2_reg_dead_p (2, operands[0]) || (true_regnum (operands[3]) == true_regnum (operands[0])))"
+ ;; the preparation statements take care to put proper operand in operands[4]
+ ;; operands[4] will always contain the correct operand. This is added to satisfy commutativity
+  [(set (match_dup 3)
+	(plus:SI (mult:SI (match_dup 1)
+			  (match_dup 2))
+		 (match_dup 4)))]
+  "DROSS (\"addn peephole2\");
+   if (true_regnum (operands[4]) == true_regnum (operands[0]))
+      operands[4] = operands[5];
+   operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
+)
+
+;; triggered by -g -O2 -mARC600 -mmul64 -mnorm
+;; libstdc++-v3/testsuite/23_containers/deque/check_construct_destroy.cc
+(define_peephole
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w,w")
+        (ashift:SI (match_operand:SI 1 "register_operand" "c,c")
+                   (match_operand:SI 2 "const_int_operand" "n,n")))
+  (set (match_operand:SI 3 "dest_reg_operand" "=w,w")
+       (plus:SI (match_operand:SI 4 "nonmemory_operand" "ci,0")
+		(match_operand:SI 5 "nonmemory_operand" "0,ci")))]
+  "TARGET_ARCOMPACT
+   && (INTVAL (operands[2]) == 1
+       || INTVAL (operands[2]) == 2
+       || INTVAL (operands[2]) == 3)
+   && (true_regnum (operands[4]) == true_regnum (operands[0])
+       || true_regnum (operands[5]) == true_regnum (operands[0]))
+   && arc_dead_or_set_postreload_p (insn, operands[0])"
+  "*{
+    DROSS (\"addn_peephole\");
+    /* This handles commutativity */
+    if (which_alternative == 1)
+      operands[4] = operands[5];
+    switch (INTVAL (operands[2])) {
+         case 1: return \"add1 %3,%S4,%1;;addi peephole - pattern 1\";
+         case 2: return \"add2 %3,%S4,%1;;addi peephole - pattern 2\";
+         case 3: return \"add3 %3,%S4,%1;;addi peephole - pattern 3\";
+         default: gcc_unreachable ();
+    }
+  }"
+  [(set_attr "length" "8")]
+)
+
+; ??? For ARC700, bbit peepholes should be replaced with a combiner pattern
+; combining a CC_Z btst with a bne/beq, and then properly branch shortened.
+; The combined pattern should be grokked by the ccfsm machinery.
+; For ARC600, the peephole should be adjusted to use btst as input.
+;
+; bbit0, bbit1 peephole that incorporates bic
+(define_peephole
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(and:SI (not:SI (match_operand:SI 1 "register_operand" "c"))
+		(match_operand:SI 2 "immediate_operand" "Cal")))
+   (set (reg:CC_ZN 61)
+	(compare:CC_ZN (match_dup 0)
+		       (match_operand:SI 3 "immediate_operand" "Cal")))
+   (set (pc)
+ 	(if_then_else (match_operator 4 "proper_comparison_operator"
+ 				      [(reg 61) (const_int 0)])
+ 		      (label_ref (match_operand 5 "" ""))
+ 		      (pc)))
+  ]
+  "TARGET_ARCOMPACT
+   && valid_bbit_pattern_p (operands,insn)
+   && arc_dead_or_set_postreload_p (prev_nonnote_insn (insn), operands[0])
+   && arc_dead_or_set_postreload_p (insn, XEXP (operands[4], 0))"
+   "* return gen_bbit_bic_insns (operands);"
+   [(set_attr "type" "branch")
+    (set_attr "length" "4")]
+)
+
+
+; bbit0,bbit1 peephole optimization
+
+(define_peephole
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+	(and:SI (match_operand:SI 1 "register_operand" "%c")
+		(match_operand:SI 2 "immediate_operand" "Cal")))
+   (set (reg:CC_ZN 61)
+	(compare:CC_ZN (match_dup 0)
+		       (match_operand:SI 3 "immediate_operand" "Cal")))
+   (set (pc)
+	(if_then_else (match_operator 4 "proper_comparison_operator"
+			[(reg 61) (const_int 0)])
+		      (label_ref (match_operand 5 "" ""))
+		      (pc)))
+ ]
+  "TARGET_ARCOMPACT
+   && valid_bbit_pattern_p (operands,insn)
+   && arc_dead_or_set_postreload_p (prev_nonnote_insn(insn), operands [0])"
+  "* return gen_bbit_insns(operands);"
+  [ (set_attr "type" "branch")
+    (set_attr "length" "4")])
+
+
+;; bset peephole2 optimization
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(const_int 1))
+  (set (match_operand:SI 1 "dest_reg_operand" "")
+        (ashift:SI (match_dup 0)
+                   (match_operand:SI 2 "register_operand" "")))
+  (set (match_operand:SI 3 "dest_reg_operand" "")
+       (ior:SI (match_operand:SI 4 "nonmemory_operand" "")
+	       (match_operand:SI 5 "nonmemory_operand" "")))]
+  "TARGET_ARCOMPACT
+   && TARGET_DROSS
+   && (peep2_reg_dead_p (2, operands[0]) || (true_regnum (operands[1]) == true_regnum (operands[0])))
+   && (peep2_reg_dead_p (3, operands[1]) || (true_regnum (operands[3]) == true_regnum (operands[1])))
+   && (true_regnum (operands[4]) == true_regnum (operands[1])
+       || true_regnum (operands[5]) == true_regnum (operands[1]))"
+  [(set (match_dup 3)
+	(ior:SI (match_dup 4)
+		(ashift:SI (const_int 1) (match_dup 2))))]  
+  "DROSS (\"bset peephole2\");
+   if (true_regnum (operands[4]) == true_regnum (operands[1]))
+      operands[4] = operands[5];"
+)
+
+
+;; -------------------------------------------------------------
+;; Pattern 1 : r0 = r1 << {i}
+;;             r3 = r4 - r0 
+;;                 ||
+;;                 \/
+;;             sub{i} r3,r4,r1
+;; -------------------------------------------------------------
+
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+        (ashift:SI (match_operand:SI 1 "register_operand" "")
+                   (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand:SI 3 "dest_reg_operand" "")
+	(minus:SI (match_operand:SI 4 "nonmemory_operand" "")
+		  (match_dup 0)))]
+  "TARGET_ARCOMPACT
+   && TARGET_DROSS
+   && (INTVAL (operands[2]) == 1
+       || INTVAL (operands[2]) == 2
+       || INTVAL (operands[2]) == 3)
+   && (peep2_reg_dead_p (2, operands[0]) || (true_regnum (operands[3]) == true_regnum (operands[0])))"
+  [(set (match_dup 3)
+	(minus:SI (match_dup 4)
+		  (mult:SI (match_dup 1)
+			   (match_dup 2))))]
+  "DROSS (\"subn peephole2 1/2\");
+   operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
+)
+
+(define_peephole
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+        (ashift:SI (match_operand:SI 1 "register_operand" "c")
+                   (match_operand:SI 2 "const_int_operand" "i")))
+  (set (match_operand:SI 3 "dest_reg_operand" "=r")
+       (minus:SI (match_operand:SI 4 "nonmemory_operand" "cCal")
+		(match_dup 0)))]
+  "TARGET_ARCOMPACT
+   && TARGET_DROSS
+   && (INTVAL (operands[2]) == 1
+       || INTVAL (operands[2]) == 2
+       || INTVAL (operands[2]) == 3)
+   && arc_dead_or_set_postreload_p (insn, operands[0])"
+  "*
+   {
+     DROSS (\"subn peephole 1/2\");
+     switch (INTVAL (operands[2]))
+       {
+       case 1: return \";;sub1 peephole - pattern 1\;sub1%? %3,%S4,%1\";
+       case 2: return \";;sub2 peephole - pattern 1\;sub2%? %3,%S4,%1\";
+       case 3: return \";;sub3 peephole - pattern 1\;sub3%? %3,%S4,%1\";
+       default: gcc_unreachable ();
+       }
+    }"
+    [(set_attr "length" "4")]
+)
+
+
+
+;; -------------------------------------------------------------
+;; Pattern 2 : r0 = r1 << {i}
+;;             r3 = INT
+;;             r5 = r3 - r0
+;;                 ||
+;;                 \/
+;;             sub{i} r5,INT,r1
+;; -------------------------------------------------------------
+
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+       (ashift:SI (match_operand:SI 1 "register_operand" "")
+                  (match_operand:SI 2 "immediate_operand" "")))
+   (set (match_operand:SI 3 "dest_reg_operand" "") 
+	(match_operand:SI 4 "immediate_operand" ""))
+   (set (match_operand:SI 5 "dest_reg_operand" "")
+	(minus:SI (match_dup 3)
+		  (match_dup 0)))]
+  "TARGET_ARCOMPACT
+   && TARGET_DROSS
+   && GET_CODE (operands[2]) == CONST_INT 
+   && (INTVAL (operands[2]) == 1
+       || INTVAL (operands[2]) == 2
+       || INTVAL (operands[2]) == 3)
+   && (peep2_reg_dead_p (3, operands[0]) || (true_regnum (operands[5]) == true_regnum (operands[0])))
+   && (peep2_reg_dead_p (3, operands[3]) || (true_regnum (operands[5]) == true_regnum (operands[3])))"
+  [(set (match_dup 5)
+	(minus:SI (match_dup 4)
+		  (mult:SI (match_dup 1)
+			   (match_dup 2))))]
+  "DROSS (\"subn peephole2 2/2\");
+   operands[2] = GEN_INT (1 << INTVAL (operands[2]));"
+)
+
+(define_peephole
+  [(set (match_operand:SI 0 "dest_reg_operand" "=w")
+       (ashift:SI (match_operand:SI 1 "register_operand" "c")
+                  (match_operand:SI 2 "immediate_operand" "Cal")))
+   (set (match_operand:SI 3 "dest_reg_operand" "=w") 
+	(match_operand:SI 4 "immediate_operand" "Cal"))
+   (set (match_operand:SI 5 "dest_reg_operand" "=w")
+       (minus:SI (match_dup 3)
+		(match_dup 0)
+		))]
+  "TARGET_ARCOMPACT
+   && TARGET_DROSS
+   && GET_CODE (operands[2]) == CONST_INT 
+   && (INTVAL (operands[2]) == 1
+       ||  INTVAL (operands[2]) == 2
+       ||  INTVAL (operands[2]) == 3)
+   && arc_dead_or_set_postreload_p (insn, operands[3])
+   && arc_dead_or_set_postreload_p (insn, operands[0])"
+  "*
+   {
+     DROSS (\"subn peephole 2/2\");
+     switch (INTVAL (operands[2]))
+       {
+       case 1: return \";;sub1 peephole - pattern 2\;sub1%? %5,%S4,%1\";
+       case 2: return \";;sub2 peephole - pattern 2\;sub2%? %5,%S4,%1\";
+       case 3: return \";;sub3 peephole - pattern 2\;sub3%? %5,%S4,%1\";
+       default: gcc_unreachable ();
+       }
+   }"
+    [(set_attr "length" "4")]
+)
+
+;;bxor peephole2
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(const_int 1))
+  (set (match_operand:SI 1 "dest_reg_operand" "")
+        (ashift:SI (match_dup 0)
+                   (match_operand:SI 2 "register_operand" "")))
+  (set (match_operand:SI 3 "dest_reg_operand" "")
+       (xor:SI (match_operand:SI 4 "nonmemory_operand" "")
+	       (match_operand:SI 5 "nonmemory_operand" ""))) 
+]
+ "
+  TARGET_ARCOMPACT
+  && TARGET_DROSS
+  && ( peep2_reg_dead_p (3, operands[1]) || ( true_regnum(operands[3])==true_regnum(operands[1]) ) )
+  && ( peep2_reg_dead_p (2, operands[0]) || ( true_regnum(operands[1])==true_regnum(operands[0]) ) )
+  && ( true_regnum(operands[4])==true_regnum(operands[1]) || true_regnum(operands[5])==true_regnum(operands[1]) )"
+
+ ;; the preparation statements take care to put proper operand in operands[4]
+ ;; operands[4] will always contain the correct operand
+ [(set (match_dup 3)
+	(xor:SI (match_dup 4)
+		(ashift:SI (const_int 1)
+			(match_dup 2) ) ) ) ]
+
+  "DROSS (\"bxor peephole2\");
+   if ( true_regnum(operands[4])==true_regnum(operands[1]) )
+	operands[4] = operands[5];
+  "
+)
+
+;; bclr peephole2 optimization 
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+	(const_int 1))
+   (set (match_operand:SI 1 "dest_reg_operand" "")
+       (ashift:SI (match_dup 0)
+		  (match_operand:SI 2 "register_operand" "")))
+   (set (match_operand:SI 3 "dest_reg_operand" "")
+	(and:SI	(not:SI (match_dup 1))
+		(match_operand:SI 4 "nonmemory_operand" "")))]
+  "TARGET_ARCOMPACT 
+   && TARGET_DROSS
+   && ( peep2_reg_dead_p (2, operands[0]) || true_regnum(operands[1])==true_regnum(operands[0]) )
+   && ( peep2_reg_dead_p (3, operands[1]) || true_regnum(operands[3])==true_regnum(operands[1]) )"
+  [(set (match_dup 3)
+	(and:SI (not:SI (ashift:SI (const_int 1)
+				   (match_dup 2)))
+		(match_dup 4)))]
+  "DROSS (\"bclr peephole2\");"
+)
+
+;; bmsk peephole2 optimization 
+(define_peephole2
+  [(set (match_operand:SI 0 "dest_reg_operand" "")
+       (plus:SI (match_operand:SI 1 "register_operand" "")
+		(const_int 1)))
+  (set (match_operand:SI 2 "dest_reg_operand" "")
+	(const_int 1))
+  (set (match_operand:SI 3 "dest_reg_operand" "")
+       (ashift:SI (match_dup 2)
+		  (match_dup 0)))
+  (set (match_operand:SI 4 "dest_reg_operand" "")
+       (plus:SI (match_dup 3) 
+		 (const_int -1) ))
+  (set (match_operand:SI 5 "dest_reg_operand" "")
+       (and:SI (match_operand:SI 6 "nonmemory_operand" "")
+	       (match_operand:SI 7 "nonmemory_operand" ""))) 
+  ]
+  "TARGET_ARCOMPACT
+   && TARGET_DROSS
+  && ( peep2_reg_dead_p (3, operands[0]) || true_regnum(operands[3])==true_regnum(operands[0]) )
+  && ( peep2_reg_dead_p (3, operands[2]) || true_regnum(operands[3])==true_regnum(operands[2]) )
+  && ( peep2_reg_dead_p (4, operands[3]) || true_regnum(operands[4])==true_regnum(operands[3]) )
+  && ( peep2_reg_dead_p (5, operands[4]) || true_regnum(operands[5])==true_regnum(operands[4]) )
+  && ( true_regnum(operands[6])==true_regnum(operands[4]) || true_regnum(operands[7])==true_regnum(operands[4]) )"
+  [(set (match_dup 5)
+	(and:SI (match_dup 6)
+		(plus:SI (ashift:SI (const_int 1)
+				    (plus:SI (match_dup 1)
+					     (const_int 1)))
+			 (const_int -1))))]
+  "DROSS (\"bmsk peephole2\");
+   if ( true_regnum(operands[6])==true_regnum(operands[4]) )
+      operands[6]=operands[7];
+  "
+)
+
+;; Instructions generated through builtins
+
+(define_insn "norm"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w")
+	(unspec:SI [(match_operand:SI 1 "general_operand" "cL,Cal")]
+			    UNSPEC_NORM))]
+  "TARGET_NORM"
+  "@
+   norm \t%0, %1
+   norm \t%0, %S1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "two_cycle_core,two_cycle_core")])
+
+(define_insn "normw"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w")
+	(unspec:SI [(match_operand:HI 1 "general_operand" "cL,Cal")]
+			    UNSPEC_NORMW))]
+  "TARGET_NORM"
+  "@
+   normw \t%0, %1
+   normw \t%0, %S1"
+  [(set_attr "length" "4,8")
+   (set_attr "type" "two_cycle_core,two_cycle_core")])
+
+
+(define_insn "swap"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=w,w,w")
+	(unspec:SI [(match_operand:SI 1 "general_operand" "L,Cal,c")]
+			    UNSPEC_SWAP))]
+  "TARGET_SWAP"
+  "@
+   swap \t%0, %1
+   swap \t%0, %S1
+   swap \t%0, %1"
+  [(set_attr "length" "4,8,4")
+   (set_attr "type" "two_cycle_core,two_cycle_core,two_cycle_core")])
+
+;; FIXME: an intrinsic for multiply is daft.  Can we remove this?
+(define_insn "mul64"
+  [(unspec [(match_operand:SI 0 "general_operand" "q,r,r,%r")
+		     (match_operand:SI 1 "general_operand" "q,rL,I,Cal")]
+		   UNSPEC_MUL64)]
+  "TARGET_MUL64_SET"
+  "@
+   mul64%? \t0, %0, %1%&
+   mul64%? \t0, %0, %1
+   mul64 \t0, %0, %1
+   mul64%? \t0, %0, %S1"
+  [(set_attr "length" "2,4,4,8")
+  (set_attr "iscompact" "true,false,false,false")
+  (set_attr "type" "binary,binary,binary,binary")
+  (set_attr "cond" "canuse,canuse, nocond, canuse")])
+
+(define_insn "mulu64"
+  [(unspec [(match_operand:SI 0 "general_operand" "%r,r,r,r")
+		     (match_operand:SI 1 "general_operand" "rL,I,r,Cal")]
+		   UNSPEC_MULU64)]
+  "TARGET_MUL64_SET"
+  "@
+   mulu64%? \t0, %0, %1
+   mulu64 \t0, %0, %1
+   mulu64 \t0, %0, %1
+   mulu64%? \t0, %0, %S1"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type" "binary,binary,binary,binary")
+   (set_attr "cond" "canuse,nocond,nocond,canuse")])
+
+(define_insn "divaw"
+  [(set (match_operand:SI 0 "dest_reg_operand" "=&w,&w,&w")
+			  (unspec:SI [(div:SI (match_operand:SI 1 "general_operand" "r,Cal,r")
+					   (match_operand:SI 2 "general_operand" "r,r,Cal"))]
+					   UNSPEC_DIVAW))]
+  "TARGET_EA_SET && TARGET_ARCOMPACT"
+  "@
+   divaw \t%0, %1, %2
+   divaw \t%0, %S1, %2
+   divaw \t%0, %1, %S2"
+  [(set_attr "length" "4,8,8")
+   (set_attr "type" "divaw,divaw,divaw")])
+
+; FIXME: The %? is of no use here, since cond is not canuse
+(define_insn "flag"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand" "rL,I,Cal")]
+		   VUNSPEC_FLAG)]
+  ""
+  "@
+    flag%? %0
+    flag %0
+    flag%? %S0"
+  [(set_attr "length" "4,4,8")
+   (set_attr "type" "misc,misc,misc")
+   (set_attr "cond" "clob,clob,clob")])
+
+(define_insn "brk"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")]
+		   VUNSPEC_BRK)]
+  ""
+  "brk"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+(define_insn "rtie"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")]
+		   VUNSPEC_RTIE)]
+  ""
+  "rtie"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")
+  (set_attr "cond" "clob")])
+
+(define_insn "sync"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")]
+		   VUNSPEC_SYNC)]
+  ""
+  "sync"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+(define_insn "swi"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")]
+		   VUNSPEC_SWI)]
+  ""
+  "*
+{
+    if(TARGET_ARC700)
+        return \"trap0\";
+    else
+        return \"swi\";
+}"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+
+(define_insn "sleep"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "L")]
+		   VUNSPEC_SLEEP)]
+  "(TARGET_A4 || check_if_valid_sleep_operand(operands,0))"
+  "*
+   if (TARGET_A4)
+      return \"sleep\";
+   else
+      return \"sleep %0\";
+  "
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+(define_insn "core_read"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=r,r")
+	(unspec_volatile:SI [(match_operand:SI 1 "general_operand" "HJ,!r")]
+			    VUNSPEC_CORE_READ))]
+  ""
+  "*
+    if(check_if_valid_regno_const (operands,1))
+       return \"mov \t%0, r%1\";
+    return \"mov \t%0, r%1\";
+  "
+  [(set_attr "length" "4")
+   (set_attr "type" "unary")])
+
+(define_insn "core_write"
+  [(unspec_volatile [(match_operand:SI 0 "general_operand" "r,r")
+		     (match_operand:SI 1 "general_operand" "HJ,!r")]
+		   VUNSPEC_CORE_WRITE)]
+  ""
+  "*
+    if(check_if_valid_regno_const (operands,1))
+       return \"mov \tr%1, %0\";
+    return \"mov \tr%1, %0\";
+  "
+  [(set_attr "length" "4")
+   (set_attr "type" "unary")])
+
+(define_insn "lr"
+  [(set (match_operand:SI  0 "dest_reg_operand" "=r,r,r,r")
+	(unspec_volatile:SI [(match_operand:SI 1 "general_operand" "I,HJ,r,D")]
+			    VUNSPEC_LR))]
+  ""
+  "lr\t%0, [%1]"
+  [(set_attr "length" "4,8,4,8")
+   (set_attr "type" "lr,lr,lr,lr")])
+
+(define_insn "sr"
+  [(unspec_volatile [(match_operand:SI 0 "general_operand" "Cal,r,r,r")
+		     (match_operand:SI 1 "general_operand" "Ir,I,HJ,r")]
+		   VUNSPEC_SR)]
+  ""
+  "sr\t%S0, [%1]"
+  [(set_attr "length" "8,4,8,4")
+   (set_attr "type" "sr,sr,sr,sr")])
+
+(define_insn "trap_s"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "L,Cal")]
+		   VUNSPEC_TRAP_S)]
+  "TARGET_ARC700"
+  "*
+    if (which_alternative == 0)
+       return \"trap_s %0\";
+
+    fatal_error (\"Operand to trap_s should be an unsigned 6-bit value.\");
+  "
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+(define_insn "unimp_s"
+  [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "N")]
+		   VUNSPEC_UNIMP_S)]
+  "TARGET_ARC700"
+  "unimp_s"
+  [(set_attr "length" "4")
+  (set_attr "type" "misc")])
+
+;; End of instructions generated through builtins
+
+; Since the demise of REG_N_SETS, it is no longer possible to find out
+; in the prologue / epilogue expanders how many times blink is set.
+; Using df_regs_ever_live_p to decide if blink needs saving means that
+; any explicit use of blink will cause it to be saved; hence we cannot
+; represent the blink use in return / sibcall instructions themselves, and
+; instead have to show it in EPILOGUE_USES and must explicitly
+; forbid instructions that change blink in the return / sibcall delay slot.
+(define_expand "sibcall"
+  [(parallel [(call (match_operand 0 "memory_operand" "")
+		    (match_operand 1 "general_operand" ""))
+	      (return)
+	      (use (match_operand 2 "" ""))])]
+  ""
+  "
+  {
+    rtx callee = XEXP (operands[0], 0);
+
+    if (operands[2] == NULL_RTX)
+      operands[2] = const0_rtx;
+    if (crtl->profile && arc_profile_call (callee))
+      {
+	emit_insn (gen_sibcall_prof
+		    (gen_rtx_SYMBOL_REF (Pmode, \"_mcount_call\"),
+		     operands[1], operands[2]));
+	DONE;
+      }
+    if (GET_CODE (callee) != REG
+	&& (GET_CODE (callee) == PLUS || arc_is_longcall_p (callee)))
+      XEXP (operands[0], 0) = force_reg (Pmode, callee);
+  }"
+)
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0 "dest_reg_operand" "")
+		   (call (match_operand 1 "memory_operand" "")
+			 (match_operand 2 "general_operand" "")))
+	      (return)
+	      (use (match_operand 3 "" ""))])]
+  ""
+  "
+  {
+    rtx callee = XEXP (operands[1], 0);
+
+    if (operands[3] == NULL_RTX)
+      operands[3] = const0_rtx;
+    if (crtl->profile && arc_profile_call (XEXP (operands[1], 0)))
+      {
+	emit_insn (gen_sibcall_value_prof
+		    (operands[0], gen_rtx_SYMBOL_REF (Pmode, \"_mcount_call\"),
+		     operands[2], operands[3]));
+	DONE;
+      }
+    if (GET_CODE (callee) != REG && arc_is_longcall_p (callee))
+      XEXP (operands[1], 0) = force_reg (Pmode, callee);
+  }"
+)
+
+(define_insn "*sibcall_insn"
+ [(call (mem:SI (match_operand:SI 0 "call_address_operand" "Cbr,Rs5,Rsc,Cal"))
+	(match_operand 1 "" ""))
+  (return)
+  (use (match_operand 2 "" ""))]
+  ""
+  "@
+   b%!%* %P0
+   j%!%* [%0]%&
+   j%!%* [%0]
+   j%! %P0"
+  [(set_attr "type" "call,call,call,call_no_delay_slot")
+   (set_attr "iscompact" "false,maybe,false,false")
+   (set_attr "is_SIBCALL" "yes")]
+)
+
+(define_insn "*sibcall_value_insn"
+ [(set (match_operand 0 "dest_reg_operand" "")
+       (call (mem:SI (match_operand:SI 1 "call_address_operand" "Cbr,Rs5,Rsc,Cal"))
+	     (match_operand 2 "" "")))
+  (return)
+  (use (match_operand 3 "" ""))]
+  ""
+  "@
+   b%!%* %P1
+   j%!%* [%1]%&
+   j%!%* [%1]
+   j%! %P1"
+  [(set_attr "type" "call,call,call,call_no_delay_slot")
+   (set_attr "iscompact" "false,maybe,false,false")
+   (set_attr "is_SIBCALL" "yes")]
+)
+
+(define_insn "sibcall_prof"
+ [(call (mem:SI (match_operand:SI 0 "call_address_operand" "Cbr,Cal"))
+	(match_operand 1 "" ""))
+  (return)
+  (use (match_operand 2 "" ""))
+  (use (reg:SI 8))
+  (use (reg:SI 9))]
+  ""
+  "@
+   b%!%* %P0;2
+   j%! %^%S0;2"
+  [(set_attr "type" "call,call_no_delay_slot")
+   (set_attr "is_SIBCALL" "yes")]
+)
+
+(define_insn "sibcall_value_prof"
+ [(set (match_operand 0 "dest_reg_operand" "")
+       (call (mem:SI (match_operand:SI 1 "call_address_operand" "Cbr,Cal"))
+	     (match_operand 2 "" "")))
+  (return)
+  (use (match_operand 3 "" ""))
+  (use (reg:SI 8))
+  (use (reg:SI 9))]
+  ""
+  "@
+   b%!%* %P1;1
+   j%! %^%S1;1"
+  [(set_attr "type" "call,call_no_delay_slot")
+   (set_attr "is_SIBCALL" "yes")]
+)
+
+(define_expand "prologue"
+  [(pc)]
+  ""
+{
+  arc_expand_prologue ();
+  DONE;
+})
+
+(define_expand "epilogue"
+  [(pc)]
+  ""
+{
+  arc_expand_epilogue (0);
+  DONE;
+})
+
+(define_expand "sibcall_epilogue"
+  [(pc)]
+  "TARGET_ARCOMPACT"
+{
+  arc_expand_epilogue (1);
+  DONE;
+})
+
+; Since the demise of REG_N_SETS, it is no longer possible to find out
+; in the prologue / epilogue expanders how many times blink is set.
+; Using df_regs_ever_live_p to decide if blink needs saving means that
+; any explicit use of blink will cause it to be saved; hence we cannot
+; represent the blink use in return / sibcall instructions themselves, and
+; instead have to show it in EPILOGUE_USES and must explicitly
+; forbid instructions that change blink in the return / sibcall delay slot.
+(define_insn "return_i"
+  [(return)]
+  "reload_completed"
+{
+  rtx reg
+    = gen_rtx_REG (Pmode,
+		   arc_return_address_regs[arc_compute_function_type (cfun)]);
+
+  if (TARGET_PAD_RETURN)
+    arc_pad_return ();
+  output_asm_insn (\"j%!%* [%0]%&\", &reg);
+  return \"\";
+}
+  [(set_attr "type" "return")
+   (set_attr "cond" "canuse")
+   (set (attr "iscompact")
+	(cond [(eq (symbol_ref "arc_compute_function_type (cfun)")
+		   (symbol_ref "ARC_FUNCTION_NORMAL"))
+	       (const_string "maybe")]
+	      (const_string "false")))
+   (set (attr "length")
+	(cond [(eq (symbol_ref "arc_compute_function_type (cfun)")
+		   (symbol_ref "ARC_FUNCTION_NORMAL"))
+	       (const_int 4)
+	       (and (ne (symbol_ref "0") (const_int 0))
+		    (ne (minus (match_dup 0) (pc)) (const_int 0)))
+	       (const_int 4)
+	       (eq_attr "verify_short" "no")
+	       (const_int 4)]
+	      (const_int 2)))])
+
+ ;; Comment in final.c (insn_current_reference_address) says
+ ;; forward branch addresses are calculated from the next insn after branch
+ ;; and for backward branches, it is calculated from the branch insn start.
+ ;; The shortening logic here is tuned to accomodate this behaviour
+;; ??? This should be grokked by the ccfsm machinery.
+(define_insn "cbranchsi4_scratch"
+  [(set (pc)
+	(if_then_else (match_operator 0 "proper_comparison_operator"
+			[(match_operand:SI 1 "register_operand" "c,c, c")
+			 (match_operand:SI 2 "nonmemory_operand" "L,c,?Cal")])
+		      (label_ref (match_operand 3 "" ""))
+		      (pc)))
+   (clobber (match_operand 4 "cc_register" ""))]
+   "TARGET_ARCOMPACT
+    && (reload_completed
+	|| (TARGET_EARLY_CBRANCHSI
+	    && brcc_nolimm_operator (operands[0], VOIDmode)))
+    && !find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
+   "*
+     switch (get_attr_length (insn))
+     {
+       case 2: return \"br%d0%? %1, %2, %^%l3%&\";
+       case 4: return \"br%d0%* %1, %B2, %^%l3\";
+       case 8: if (!brcc_nolimm_operator (operands[0], VOIDmode))
+		 return \"br%d0%* %1, %B2, %^%l3\";
+       case 6: case 10:
+       case 12:return \"cmp%? %1, %B2\\n\\tb%d0%* %^%l3%&;br%d0 out of range\";
+       default: fprintf (stderr, \"unexpected length %d\\n\", get_attr_length (insn)); fflush (stderr); gcc_unreachable ();
+     }
+   "
+  [(set_attr "cond" "clob, clob, clob")
+   (set (attr "iscompact")
+	(cond [(eq_attr "lock_length" "2,6,10") (const_string "true")]
+	      (const_string "false")))
+   (set (attr "type")
+	(if_then_else
+	  (ne (symbol_ref "valid_brcc_with_delay_p (operands)") (const_int 0))
+	  (const_string "brcc")
+	  (const_string "brcc_no_delay_slot")))
+   ; For forward branches, we need to account not only for the distance to
+   ; the target, but also the difference between pcl and pc, the instruction
+   ; length, and any delay insn, if present.
+   (set
+     (attr "lock_length")
+     (cond ; the outer cond does a test independent of branch shortening.
+       [(match_operand 0 "brcc_nolimm_operator" "")
+	(cond
+	  [(and (match_operand:CC_Z 4 "cc_register")
+		(eq_attr "delay_slot_filled" "no")
+		(ge (minus (match_dup 3) (pc)) (const_int -128))
+		(le (minus (match_dup 3) (pc))
+		    (minus (const_int 122)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	   (const_int 2)
+	   (and (ge (minus (match_dup 3) (pc)) (const_int -256))
+		(le (minus (match_dup 3) (pc))
+		    (minus (const_int 248)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	   (const_int 4)
+	   (match_operand:SI 1 "compact_register_operand" "")
+	   (const_int 6)]
+	  (const_int 8))]
+	 (cond [(and (ge (minus (match_dup 3) (pc)) (const_int -256))
+		     (le (minus (match_dup 3) (pc)) (const_int 244)))
+		(const_int 8)
+		(match_operand:SI 1 "compact_register_operand" "")
+		(const_int 10)]
+	       (const_int 12))))
+   (set
+     (attr "length")
+     (cond ; the outer cond does a test independent of branch shortening.
+       [(match_operand 0 "brcc_nolimm_operator" "")
+	(cond
+	  [(and (match_operand:CC_Z 4 "cc_register")
+		(eq_attr "delay_slot_filled" "no")
+		(ge (minus (match_dup 3) (pc)) (const_int -128))
+		(le (minus (match_dup 3) (pc))
+		    (minus (const_int 122)
+			   (symbol_ref "get_attr_delay_slot_length (insn)")))
+		(eq_attr "verify_short" "yes"))
+	   (const_int 2)
+	   (and (ge (minus (match_dup 3) (pc)) (const_int -256))
+		(le (minus (match_dup 3) (pc))
+		    (minus (const_int 248)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	   (const_int 4)
+	   (and (match_operand:SI 1 "compact_register_operand" "")
+	        (eq_attr "verify_short" "yes"))
+	   (const_int 6)]
+	  (const_int 8))]
+	 (cond [(and (ge (minus (match_dup 3) (pc)) (const_int -256))
+		     (le (minus (match_dup 3) (pc)) (const_int 244)))
+		(const_int 8)
+		(and (match_operand:SI 1 "compact_register_operand" "")
+		     (eq_attr "verify_short" "yes"))
+		(const_int 10)]
+	       (const_int 12))))])
+
+; combiner pattern observed for unwind-dw2-fde.c:linear_search_fdes.
+(define_insn "*bbit"
+  [(set (pc)
+	(if_then_else
+	  (match_operator 3 "equality_comparison_operator"
+	    [(zero_extract:SI (match_operand:SI 1 "register_operand" "Rcqq,c")
+			      (const_int 1)
+			      (match_operand:SI 2 "nonmemory_operand" "L,Lc"))
+	     (const_int 0)])
+	     (label_ref (match_operand 0 "" ""))
+	     (pc)))
+   (clobber (reg:CC_ZN CC_REG))]
+  "!find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX)"
+{
+  switch (get_attr_length (insn))
+    {
+      case 4: return (GET_CODE (operands[3]) == EQ
+		      ? \"bbit0%* %1,%2,%0\" : \"bbit1%* %1,%2,%0\");
+      case 6:
+      case 8: return \"btst%? %1,%2\n\tb%d3%* %0; bbit out of range\";
+      default: gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "brcc")
+   (set_attr "cond" "clob")
+   (set (attr "lock_length")
+	(cond [(and (ge (minus (match_dup 0) (pc)) (const_int -254))
+		    (le (minus (match_dup 0) (pc))
+		    (minus (const_int 248)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	       (const_int 4)
+	       (eq (symbol_ref "which_alternative") (const_int 0))
+	       (const_int 6)]
+	      (const_int 8)))
+   (set (attr "length")
+	(cond [(and (ge (minus (match_dup 0) (pc)) (const_int -254))
+		    (le (minus (match_dup 0) (pc))
+		    (minus (const_int 248)
+			   (symbol_ref "get_attr_delay_slot_length (insn)"))))
+	       (const_int 4)
+	       (and (eq (symbol_ref "which_alternative") (const_int 0))
+		    (eq_attr "verify_short" "yes"))
+	       (const_int 6)]
+	      (const_int 8)))
+   (set (attr "iscompact")
+	(if_then_else (eq_attr "lock_length" "6")
+		      (const_string "true") (const_string "false")))])
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the number of loop iterations or 0 if it is unknown
+; operand 2 is the maximum number of loop iterations
+; operand 3 is the number of levels of enclosed loops
+; operand 4 is the loop end pattern
+(define_expand "doloop_begin"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand:QI 1 "const_int_operand" ""))
+   (use (match_operand:QI 2 "const_int_operand" ""))
+   (use (match_operand:QI 3 "const_int_operand" ""))
+   (use (match_operand 4 "" ""))]
+  ""
+{
+  if (INTVAL (operands[3]) > 1)
+    FAIL;
+  emit_insn (gen_doloop_begin_i (operands[0], const0_rtx,
+				 GEN_INT (INSN_UID (operands[4])),
+				 const0_rtx, const0_rtx));
+  DONE;
+})
+
+; ??? can't describe the insn properly as then the optimizers try to
+; hoist the SETs.
+;(define_insn "doloop_begin_i"
+;  [(set (reg:SI LP_START) (pc))
+;   (set (reg:SI LP_END) (unspec:SI [(pc)] UNSPEC_LP))
+;   (use (match_operand 0 "const_int_operand" "n"))]
+;  ""
+;  "lp .L__GCC__LP%0"
+;)
+
+; If operand1 is still zero after arc_reorg, this is an orphaned loop
+; instruction that was not at the start of the loop.
+; There is no point is reloading this insn - then lp_count would still not
+; be available for the loop end.
+(define_insn "doloop_begin_i"
+  [(unspec:SI [(pc)] UNSPEC_LP)
+   (clobber (reg:SI LP_START))
+   (clobber (reg:SI LP_END))
+   (use (match_operand:SI 0 "register_operand" "l,l,????*X"))
+   (use (match_operand 1 "const_int_operand" "n,n,C_0"))
+   (use (match_operand 2 "const_int_operand" "n,n,X"))
+   (use (match_operand 3 "const_int_operand" "C_0,n,X"))
+   (use (match_operand 4 "const_int_operand" "C_0,X,X"))]
+  ""
+{
+  rtx scan;
+  int len, size = 0;
+  int n_insns = 0;
+  rtx loop_start = operands[4];
+
+  if (CONST_INT_P (loop_start))
+    loop_start = NULL_RTX;
+  /* Size implications of the alignment will be taken care off by the
+     alignemnt inserted at the loop start.  */
+  if (LOOP_ALIGN (0) && INTVAL (operands[1]))
+    asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0));
+  if (!INTVAL (operands[1]))
+    return "; LITTLE LOST LOOP";
+  if (loop_start && flag_pic)
+    /* ??? Can do better for when a scratch register
+       is known.  But that would require extra testing.  */
+    return ".p2align 2\;push_s r0\;add r0,pcl,%4-.+2\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1-.+2\;sr r0,[3]; LP_END\;pop_s r0";
+  /* Check if the loop end is in range to be set by the lp instruction.  */
+  size = INTVAL (operands[3]) < 2 ? 0 : 2048;
+  for (size = 0, scan = insn; scan && size < 2048; scan = NEXT_INSN (scan))
+    {
+      if (!INSN_P (scan))
+	continue;
+      if (recog_memoized (scan) == CODE_FOR_doloop_end_i
+	  && (XEXP (XVECEXP (PATTERN (scan), 0, 4), 0)
+	      == XEXP (XVECEXP (PATTERN (insn), 0, 4), 0)))
+	break;
+      len = get_attr_length (scan);
+      size += len;
+    }
+  /* Try to verify that there are at least three instruction fetches
+     between the loop setup and the first encounter of the loop end.  */
+  for (scan = NEXT_INSN (insn); scan && n_insns < 3; scan = NEXT_INSN (scan))
+    {
+      if (!INSN_P (scan))
+	continue;
+      if (GET_CODE (PATTERN (scan)) == SEQUENCE)
+	scan = XVECEXP (PATTERN (scan), 0, 0);
+      if (JUMP_P (scan))
+	{
+	  if (recog_memoized (scan) != CODE_FOR_doloop_end_i)
+	    {
+	      n_insns += 2;
+	      if (simplejump_p (scan))
+		{
+		  insn = XEXP (SET_SRC (PATTERN (scan)), 0);
+		  continue;
+		}
+	      if (JUMP_LABEL (scan)
+		  && (!next_active_insn (JUMP_LABEL (scan))
+		      || (recog_memoized (next_active_insn (JUMP_LABEL (scan)))
+			  != CODE_FOR_doloop_begin_i))
+		  && (!next_active_insn (NEXT_INSN (PREV_INSN (scan)))
+		      || (recog_memoized
+			   (next_active_insn (NEXT_INSN (PREV_INSN (scan))))
+			  != CODE_FOR_doloop_begin_i)))
+		n_insns++;
+	    }
+	  break;
+	}
+      len = get_attr_length (scan);
+      /* Size estimation of asms assumes that each line which is nonempty
+	 codes an insn, and that each has a long immediate.  For minimum insn
+	 count, assume merely that a nonempty asm has at least one insn.  */
+      if (GET_CODE (PATTERN (scan)) == ASM_INPUT
+	  || asm_noperands (PATTERN (scan)) >= 0)
+	n_insns += (len != 0);
+      else
+	n_insns += (len > 4 ? 2 : (len ? 1 : 0));
+    }
+  if (LOOP_ALIGN (0))
+    asm_fprintf (asm_out_file, "\t.p2align %d\\n", LOOP_ALIGN (0));
+  gcc_assert (n_insns || GET_CODE (next_nonnote_insn (insn)) == CODE_LABEL);
+  if (size >= 2048 || (TARGET_ARC600 && n_insns == 1) || loop_start)
+    {
+      if (flag_pic)
+	/* ??? Can do better for when a scratch register
+	   is known.  But that would require extra testing.  */
+	return ".p2align 2\;push_s r0\;add r0,pcl,24\;sr r0,[2]; LP_START\;add r0,pcl,.L__GCC__LP%1-.+2\;sr r0,[3]; LP_END\;pop_s r0";
+      output_asm_insn ((size < 2048
+			? "lp .L__GCC__LP%1" : "sr .L__GCC__LP%1,[3]; LP_END"),
+		       operands);
+      output_asm_insn (loop_start
+		       ? "sr %4,[2]; LP_START" : "sr 0f,[2]; LP_START",
+		       operands);
+      if (TARGET_ARC600 && n_insns < 1)
+	output_asm_insn ("nop", operands);
+      return (TARGET_ARC600 && n_insns < 3) ? "nop_s\;nop_s\;0:" : "0:";
+    }
+  else if (TARGET_ARC600 && n_insns < 3)
+    {
+      /* At least four instructions are needed between the setting of LP_COUNT
+	 and the loop end - but the lp instruction qualifies as one.  */
+      rtx prev = prev_nonnote_insn (insn);
+
+      if (!INSN_P (prev) || dead_or_set_regno_p (prev, LP_COUNT))
+	output_asm_insn ("nop", operands);
+    }
+  return "lp .L__GCC__LP%1";
+}
+  [(set_attr "type" "loop_setup")
+   (set_attr_alternative "length"
+     [(if_then_else (ne (symbol_ref "TARGET_ARC600") (const_int 0))
+		    (const_int 16) (const_int 4))
+      (if_then_else (ne (symbol_ref "flag_pic") (const_int 0))
+		    (const_int 28) (const_int 16))
+      (const_int 0)])]
+  ;; ??? strictly speaking, we should branch shorten this insn, but then
+  ;; we'd need a proper label first.  We could say it is always 24 bytes in
+  ;; length, but that would be very pessimistic; also, when the loop insn
+  ;; goes out of range, it is very likely that the same insns that have
+  ;; done so will already have made all other small offset branches go out
+  ;; of range, making the need for exact length information here mostly
+  ;; academic.
+)
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the number of loop iterations or 0 if it is unknown
+; operand 2 is the maximum number of loop iterations
+; operand 3 is the number of levels of enclosed loops
+; operand 4 is the label to jump to at the top of the loop
+; operand 5 is nonzero if the loop is entered at its top.
+; Use this for the ARC600 and ARC700.  For ARCtangent-A5, this is unsafe
+; without further checking for nearby branches etc., and without proper
+; annotation of shift patterns that clobber lp_count
+; ??? ARC600 might want to check if the loop has few iteration and only a
+; single insn - loop setup is expensive then.
+(define_expand "doloop_end"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand:QI 1 "const_int_operand" ""))
+   (use (match_operand:QI 2 "const_int_operand" ""))
+   (use (match_operand:QI 3 "const_int_operand" ""))
+   (use (label_ref (match_operand 4 "" "")))
+   (use (match_operand:QI 5 "const_int_operand" ""))]
+  "(TARGET_ARC600 || TARGET_ARC700) && arc_experimental_mask & 1"
+{
+  if (INTVAL (operands[3]) > 1)
+    FAIL;
+  /* Setting up the loop with two sr isntructions costs 6 cycles.  */
+  if (TARGET_ARC700 && !INTVAL (operands[5])
+      && INTVAL (operands[1]) && INTVAL (operands[1]) <= (flag_pic ? 6 : 3))
+    FAIL;
+  /* We could do smaller bivs with biv widening, and wider bivs by having
+     a high-word counter in an outer loop - but punt on this for now.  */
+  if (GET_MODE (operands[0]) != SImode)
+    FAIL;
+  emit_jump_insn (gen_doloop_end_i (operands[0], operands[4], const0_rtx));
+  DONE;
+})
+
+(define_insn_and_split "doloop_end_i"
+  [(set (pc)
+	(if_then_else (ne (match_operand:SI 0 "shouldbe_register_operand" "+l,*c,*m")
+			   (const_int 1))
+		      (label_ref (match_operand 1 "" ""))
+		      (pc)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))
+   (use (reg:SI LP_START))
+   (use (reg:SI LP_END))
+   (use (match_operand 2 "const_int_operand" "n,???C_0,???X"))
+   (clobber (match_scratch:SI 3 "=X,X,&????r"))]
+  ""
+  "*
+{
+  rtx prev = prev_nonnote_insn (insn);
+
+  /* If there is an immediately preceding label, we must output a nop,
+     lest a branch to that label will fall out of the loop.
+     ??? We could try to avoid this by claiming to have a delay slot if there
+     is a preceding label, and outputting the delay slot insn instead, if
+     present.
+     Or we could have some optimization that changes the source edge to update
+     the loop count and jump to the loop start instead.  */
+  /* For ARC600, we must also prevent jumps inside the loop and jumps where
+     the loop counter value is live at the target from being directly at the
+     loop end.  Being sure that the loop counter is dead at the target is
+     too much hair - we can't rely on data flow information at this point -
+     so insert a nop for all branches.
+     The ARC600 also can't read the loop counter in the last insn of a loop.  */
+  if (LABEL_P (prev))
+    output_asm_insn (\"nop%?\", operands);
+  return \"\\n.L__GCC__LP%2: ; loop end, start is %1\";
+}"
+  "&& memory_operand (operands[0], SImode)"
+  [(pc)]
+{
+  emit_move_insn (operands[3], operands[0]);
+  emit_jump_insn (gen_doloop_fallback_m (operands[3], operands[1], operands[0]));
+  DONE;
+}
+  [(set_attr "type" "loop_end")
+   (set (attr "length")
+	(if_then_else (ne (symbol_ref "LABEL_P (prev_nonnote_insn (insn))")
+			  (const_int 0))
+		      (const_int 4) (const_int 0)))]
+)
+
+; This pattern is generated by arc_reorg when there is no recognizable
+; loop start.
+(define_insn "*doloop_fallback"
+  [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+r,!w")
+			        (const_int 1))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))]
+   ; avoid fooling the loop optimizer into assuming this is a special insn.
+  "reload_completed"
+  "*return get_attr_length (insn) == 8
+   ? \"brne.d %0,1,%1\;sub %0,%0,1\"
+   : \"breq %0,1,0f\;b.d %1\;sub %0,%0,1\\n0:\";"
+  [(set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -256)) 
+ 			   (le (minus (match_dup 1) (pc)) (const_int 244)))
+ 		      (const_int 8) (const_int 12)))
+   (set_attr "type" "brcc_no_delay_slot")
+   (set_attr "cond" "nocond")]
+)
+
+; reload can't make output reloads for jump insns, so we have to do this by hand.
+(define_insn "doloop_fallback_m"
+  [(set (pc) (if_then_else (ne (match_operand:SI 0 "register_operand" "+&r")
+			        (const_int 1))
+			   (label_ref (match_operand 1 "" ""))
+			   (pc)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (const_int -1)))
+   (set (match_operand:SI 2 "memory_operand" "=m")
+	(plus:SI (match_dup 0) (const_int -1)))]
+   ; avoid fooling the loop optimizer into assuming this is a special insn.
+  "reload_completed"
+  "*return get_attr_length (insn) == 12
+   ? \"sub %0,%0,1\;brne.d %0,0,%1\;st%U2%V2 %0,%2\"
+   : \"sub %0,%0,1\;breq %0,0,0f\;b.d %1\\n0:\tst%U2%V2 %0,%2\";"
+  [(set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -252)) 
+ 			   (le (minus (match_dup 1) (pc)) (const_int 244)))
+ 		      (const_int 12) (const_int 16)))
+   (set_attr "type" "brcc_no_delay_slot")
+   (set_attr "cond" "nocond")]
+)
+
+(define_expand "movmemsi"
+  [(match_operand:BLK 0 "" "")
+   (match_operand:BLK 1 "" "")
+   (match_operand:SI 2 "nonmemory_operand" "")
+   (match_operand 3 "immediate_operand" "")]
+  ""
+  "if (arc_expand_movmem (operands)) DONE; else FAIL;")
+
+;; See http://gcc.gnu.org/bugzilla/show_bug.cgi?id=35803 why we can't
+;; get rid of this bogosity.
+(define_expand "cmpsf"
+  [(set (reg:CC 61)
+	(compare:CC (match_operand:SF 0 "general_operand" "")
+		    (match_operand:SF 1 "general_operand" "")))]
+  "TARGET_OPTFPE"
+  "
+{
+  arc_compare_op0 = operands[0];
+  arc_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_expand "cmpdf"
+  [(set (reg:CC 61)
+	(compare:CC (match_operand:DF 0 "general_operand" "")
+		    (match_operand:DF 1 "general_operand" "")))]
+  "TARGET_OPTFPE && !TARGET_DPFP"
+  "
+{
+  arc_compare_op0 = operands[0];
+  arc_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_expand "cmp_float"
+  [(parallel [(set (match_operand 0 "") (match_operand 1 ""))
+	      (clobber (reg:SI 31))
+	      (clobber (reg:SI 12))])]
+  ""
+  "")
+
+(define_insn "*cmpsf_eq"
+  [(set (reg:CC_Z 61) (compare:CC_Z (reg:SF 0) (reg:SF 1)))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 12))]
+  "TARGET_OPTFPE && !TARGET_SPFP"
+  "*return arc_output_libcall (\"__eqsf2\");"
+  [(set_attr "is_sfunc" "yes")]
+)
+	      
+(define_insn "*cmpdf_eq"
+  [(set (reg:CC_Z 61) (compare:CC_Z (reg:DF 0) (reg:DF 2)))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 12))]
+  "TARGET_OPTFPE && !TARGET_DPFP"
+  "*return arc_output_libcall (\"__eqdf2\");"
+  [(set_attr "is_sfunc" "yes")]
+)
+	      
+(define_insn "*cmpsf_gt"
+  [(set (reg:CC_FP_GT 61) (compare:CC_FP_GT (reg:SF 0) (reg:SF 1)))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 12))]
+  "TARGET_OPTFPE && !TARGET_SPFP"
+  "*return arc_output_libcall (\"__gtsf2\");"
+  [(set_attr "is_sfunc" "yes")]
+)
+	      
+(define_insn "*cmpdf_gt"
+  [(set (reg:CC_FP_GT 61) (compare:CC_FP_GT (reg:DF 0) (reg:DF 2)))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 12))]
+  "TARGET_OPTFPE && !TARGET_DPFP"
+  "*return arc_output_libcall (\"__gtdf2\");"
+  [(set_attr "is_sfunc" "yes")]
+)
+	      
+(define_insn "*cmpsf_ge"
+  [(set (reg:CC_FP_GE 61) (compare:CC_FP_GE (reg:SF 0) (reg:SF 1)))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 12))]
+  "TARGET_OPTFPE && !TARGET_SPFP"
+  "*return arc_output_libcall (\"__gesf2\");"
+  [(set_attr "is_sfunc" "yes")]
+)
+	      
+(define_insn "*cmpdf_ge"
+  [(set (reg:CC_FP_GE 61) (compare:CC_FP_GE (reg:DF 0) (reg:DF 2)))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 12))]
+  "TARGET_OPTFPE && !TARGET_DPFP"
+  "*return arc_output_libcall (\"__gedf2\");"
+  [(set_attr "is_sfunc" "yes")]
+)
+	      
+(define_insn "*cmpsf_uneq"
+  [(set (reg:CC_FP_UNEQ 61) (compare:CC_FP_UNEQ (reg:SF 0) (reg:SF 1)))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 12))]
+  "TARGET_OPTFPE && !TARGET_SPFP"
+  "*return arc_output_libcall (\"__uneqsf2\");"
+  [(set_attr "is_sfunc" "yes")]
+)
+	      
+(define_insn "*cmpdf_uneq"
+  [(set (reg:CC_FP_UNEQ 61) (compare:CC_FP_UNEQ (reg:DF 0) (reg:DF 2)))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 12))]
+  "TARGET_OPTFPE && !TARGET_DPFP"
+  "*return arc_output_libcall (\"__uneqdf2\");"
+  [(set_attr "is_sfunc" "yes")]
+)
+	      
+(define_insn "*cmpsf_ord"
+  [(set (reg:CC_FP_ORD 61) (compare:CC_FP_ORD (reg:SF 0) (reg:SF 1)))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 12))]
+  "TARGET_OPTFPE && !TARGET_SPFP"
+  "*return arc_output_libcall (\"__ordsf2\");"
+  [(set_attr "is_sfunc" "yes")]
+)
+	      
+;; N.B. double precision fpx sets bit 31 for NaNs.  We need bit 51 set
+;; for the floating point emulation to recognize the NaN.
+(define_insn "*cmpdf_ord"
+  [(set (reg:CC_FP_ORD 61) (compare:CC_FP_ORD (reg:DF 0) (reg:DF 2)))
+   (clobber (reg:SI 31))
+   (clobber (reg:SI 12))]
+  "TARGET_OPTFPE && !TARGET_DPFP"
+  "*return arc_output_libcall (\"__orddf2\");"
+  [(set_attr "is_sfunc" "yes")]
+)
+
+(define_insn "abssf2"
+  [(set (match_operand:SF 0 "dest_reg_operand"    "=Rcq#q,Rcw,w")
+	(abs:SF (match_operand:SF 1 "register_operand" "0,  0,c")))]
+  ""
+  "bclr%? %0,%1,31%&"
+  [(set_attr "type" "unary")
+   (set_attr "iscompact" "maybe,false,false")
+   (set_attr "length" "2,4,4")
+   (set_attr "cond" "canuse,canuse,nocond")])
+	      
+(define_insn "negsf2"
+  [(set (match_operand:SF 0 "dest_reg_operand" "=Rcw,w")
+	(neg:SF (match_operand:SF 1 "register_operand" "0,c")))]
+  ""
+  "bxor%? %0,%1,31"
+  [(set_attr "type" "unary")
+   (set_attr "cond" "canuse,nocond")])
+
+;; ??? Should this use arc_output_libcall and set is_sfunc?
+(define_insn "*millicode_thunk_st"
+  [(match_parallel 0 "millicode_store_operation"
+     [(set (mem:SI (reg:SI SP_REG)) (reg:SI 13))])]
+  ""
+{
+  output_asm_insn ("bl%* __st_r13_to_%0",
+		   &SET_SRC (XVECEXP (operands[0], 0,
+				      XVECLEN (operands[0], 0) - 2)));
+  return "";
+}
+  [(set_attr "type" "call")])
+	      
+(define_insn "*millicode_thunk_ld"
+  [(match_parallel 0 "millicode_load_clob_operation"
+     [(set (reg:SI 13) (mem:SI (reg:SI SP_REG)))])]
+  ""
+{
+  output_asm_insn ("bl%* __ld_r13_to_%0",
+		   &SET_DEST (XVECEXP (operands[0], 0,
+				       XVECLEN (operands[0], 0) - 2)));
+  return "";
+}
+  [(set_attr "type" "call")])
+
+(define_insn "*millicode_sibthunk_ld"
+  [(match_parallel 0 "millicode_load_operation"
+     [(return)
+      (set (reg:SI SP_REG) (plus:SI (reg:SI SP_REG) (reg:SI 12)))
+      (set (reg:SI 13) (mem:SI (reg:SI SP_REG)))])]
+  ""
+{
+  output_asm_insn ("b%* __ld_r13_to_%0_ret",
+		   &SET_DEST (XVECEXP (operands[0], 0,
+				       XVECLEN (operands[0], 0) - 1)));
+  return "";
+}
+  [(set_attr "type" "call")
+   (set_attr "is_SIBCALL" "yes")])
+	      
+;; If hardware floating point is available, don't define a negdf pattern;
+;; it would be something like:
+;;(define_insn "negdf2"
+;;  [(set (match_operand:DF 0 "register_operand" "=w,w,D,?r")
+;;	(neg:DF (match_operand:DF 1 "register_operand" "0,c,D,D")))
+;;   (clobber (match_scratch:DF 2 "=X,X,X,X,D1"))]
+;;  ""
+;;  "@
+;;   bxor%? %H0,%H1,31
+;;   bxor %H0,%H1,31 ` mov %L0,%L1
+;;   drsubh%F0%F1 0,0,0
+;;   drsubh%F2%F1 %H0,0,0 ` dexcl%F2 %L0,%H0,%L0"
+;;  [(set_attr "type" "unary,unary,dpfp_addsub,dpfp_addsub")
+;;   (set_attr "iscompact" "false,false,false,false")
+;;   (set_attr "length" "4,4,8,12")
+;;   (set_attr "cond" "canuse,nocond,nocond,nocond")])
+;; and this suffers from always requiring a long immediate when using
+;; the floating point hardware.
+;; We then want the sub[sd]f patterns to be used, so that we can load the
+;; constant zero efficiently into a register when we want to do the
+;; computation using the floating point hardware.  There should be a special
+;; subdf alternative that matches a zero operand 1, which then can allow
+;; to use bxor to flip the high bit of an integer register.
+;; ??? we actually can't use the floating point hardware for neg, because
+;; this would not work right for -0.  OTOH optabs.c has already code
+;; to synthesyze nagate by flipping the sign bit.
+
+	      
+;; include the arc-FPX instructions
+(include "fpx.md")
+
+(include "simdext.md")
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index dc47fc8f890..f746aa88491 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -1,6 +1,6 @@
 ; Options for the Argonaut ARC port of the compiler
 ;
-; Copyright (C) 2005, 2007 Free Software Foundation, Inc.
+; Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -18,14 +18,13 @@
 ; along with GCC; see the file COPYING3.  If not see
 ; <http://www.gnu.org/licenses/>.
 
-malign-loops
-Target Undocumented Report Mask(ALIGN_LOOPS)
-
 mbig-endian
-Target Undocumented Report RejectNegative Mask(BIG_ENDIAN)
+Target Report RejectNegative Mask(BIG_ENDIAN)
+Compile code for big endian mode
 
 mlittle-endian
-Target Undocumented Report RejectNegative InverseMask(BIG_ENDIAN)
+Target Report RejectNegative InverseMask(BIG_ENDIAN)
+Compile code for little endian mode.  This is the default
 
 mmangle-cpu
 Target Report Mask(MANGLE_CPU)
@@ -35,11 +34,124 @@ Prepend the name of the cpu to all public symbol names
 ; Target Undocumented Mask(MANGLE_CPU_LIBGC)
 
 mno-cond-exec
-Target Undocumented Report RejectNegative Mask(NO_COND_EXEC)
+Target Report RejectNegative Mask(NO_COND_EXEC)
+Do not generate conditional execution instructions
+
+mA4
+Target Report Mask(A4)
+Generate code for ARCtangent-A4 processor. This is the default
+
+mA5
+Target Report Mask(A5)
+Generate ARCompact 32-bit code for ARCtangent-A5 processor
+
+mA6
+Target Report Mask(ARC600)
+Generate ARCompact 32-bit code for ARCtangent-ARC600 processor
+
+mARC600
+Target Report Mask(ARC600) MaskExists
+Same as -mA6
+
+mA7
+Target Report Mask(ARC700)
+Generate ARCompact 32-bit code for ARCtangent-ARC700 processor
+
+mARC700
+Target Report Mask(ARC700) MaskExists
+Same as -mA7
+
+mmixed-code
+Target Report Mask(MIXED_CODE_SET)
+Generate ARCompact 16-bit instructions intermixed with 32-bit instructions for ARCtangent-A5 and higher processors
+
+mvolatile-cache
+Target Report Mask(VOLATILE_CACHE_SET)
+Enable cache bypass for volatile references
+
+mno-volatile-cache
+Target Report InverseMask(VOLATILE_CACHE_SET)
+Disable cache bypass for volatile references
+
+mbarrel_shifter
+Target Report Mask(BARREL_SHIFTER_SET)
+Generate instructions supported by barrel shifter
+
+mnorm
+Target Report Mask(NORM_SET)
+Generate norm instruction
+
+mswap
+Target Report Mask(SWAP_SET)
+Generate swap instruction
+
+mmul64
+Target Report Mask(MUL64_SET)
+Generate mul64 and mulu64 instructions
+
+mno-mpy
+Target Report Mask(NOMPY_SET)
+Do not generate mpy instructions for ARC700
+
+mEA
+Target Report Mask(EA_SET)
+Generate Extended arithmetic instructions. Currently only divaw supported
+
+mmin_max
+Target Report Mask(MINMAX_SET)
+Generate min and max instructions
+
+msoft-float
+Target Report Mask(0)
+Dummy flag. This is the default unless FPX switches are provided explicitly
+
+mlong-calls
+Target Report Mask(LONG_CALLS_SET)
+Generate call insns as register indirect calls
+
+mno-brcc
+Target Report Mask(NO_BRCC_SET)
+Do no generate BRcc instructions.
+
+mno-sdata
+Target Report Mask(NO_SDATA_SET)
+Do not generate sdata references
+
+mno-millicode
+Target Report Mask(NO_MILLICODE_THUNK_SET)
+Do not generate millicode thunks (needed only with -Os)
+
+mspfp
+Target Report Mask(SPFP_COMPACT_SET)
+FPX: Generate Single Precision FPX (compact) instructions.
+
+mspfp_compact
+Target Report Mask(SPFP_COMPACT_SET) MaskExists
+FPX: Generate Single Precision FPX (compact) instructions.
+
+mspfp_fast
+Target Report Mask(SPFP_FAST_SET) 
+FPX: Generate Single Precision FPX (fast) instructions.
+
+mdpfp
+Target Report Mask(DPFP_COMPACT_SET)
+FPX: Generate Double Precision FPX (compact) instructions.
+
+mdpfp_compact
+Target Report Mask(DPFP_COMPACT_SET) MaskExists
+FPX: Generate Double Precision FPX (compact) instructions.
+
+mdpfp_fast
+Target Report Mask(DPFP_FAST_SET) 
+FPX: Generate Double Precision FPX (fast) instructions.
 
-mcpu=
-Target RejectNegative Joined Var(arc_cpu_string) Init("base")
--mcpu=CPU	Compile code for ARC variant CPU
+msimd
+Target Report Mask(SIMD_SET)
+Enable generation of ARC SIMD instructions via target-specific builtins.
+
+;mcpu=
+;Target RejectNegative Joined Var(arc_cpu_string) Init("base")
+;-mcpu=CPU	Compile code for ARC variant CPU
 
 mtext=
 Target RejectNegative Joined Var(arc_text_string) Init(ARC_DEFAULT_TEXT_SECTION)
@@ -52,3 +164,113 @@ Target RejectNegative Joined Var(arc_data_string) Init(ARC_DEFAULT_DATA_SECTION)
 mrodata=
 Target RejectNegative Joined Var(arc_rodata_string) Init(ARC_DEFAULT_RODATA_SECTION)
 -mrodata=SECTION	Put read-only data in SECTION
+
+mexperimental-mask=
+Target RejectNegative Joined UInteger Var(arc_experimental_mask) Init(1)
+For experimental options
+
+msize-level=
+Target RejectNegative Joined UInteger Var(arc_size_opt_level) Init(-1)
+size optimization level: 0:none 1:opportunistic 2: regalloc 3:drop align, -Os
+
+misize
+Target Report Var(TARGET_DUMPISIZE)
+Annotate assembler instructions with estimated addresses
+
+multcost=
+Target RejectNegative Joined UInteger Var(arc_multcost) Init(-1)
+Cost to assume for a multiply instruction, with 4 being equal to a normal insn.
+
+mtune=arc600
+Target RejectNegative Var(arc_tune, TUNE_ARC600)
+Tune for ARC600 cpu.
+
+mtune=arc700
+Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_STD)
+Tune for ARC700 R4.2 Cpu with standard multiplier block.
+
+mtune=arc700-xmac
+Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC)
+Tune for ARC700 R4.2 Cpu with XMAC block.
+
+mtune=ARC725D
+Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC)
+Tune for ARC700 R4.2 Cpu with XMAC block.
+
+mtune=ARC750D 
+Target RejectNegative Var(arc_tune, TUNE_ARC700_4_2_XMAC)
+Tune for ARC700 R4.2 Cpu with XMAC block.
+
+mindexed-loads
+Target Var(TARGET_INDEXED_LOADS)
+Enable the use of indexed loads
+
+mauto-modify-reg
+Target Var(TARGET_AUTO_MODIFY_REG)
+Enable the use of pre/post modify with register displacement.
+
+mold-di-patterns
+Target Var(TARGET_OLD_DI_PATTERNS)
+enable use of old DI patterns that have presumably been obsoleted by subreg lowering.
+
+mdynamic
+Target RejectNegative
+Use dynamic libraries for linking
+
+mmul32x16
+Target Report Mask(MULMAC_32BY16_SET)
+Generate 32x16 multiply and mac instructions
+
+m2addr
+Target Var(TARGET_2ADDR)
+Make two-address instruction alternatives visible to reload.
+
+; the initializer is supposed to be: Init(REG_BR_PROB_BASE/2) ,
+; alas, basic-block.h is not included in options.c .
+munalign-prob-threshold=
+Target RejectNegative Joined UInteger Var(arc_unalign_prob_threshold) Init(10000/2)
+Set probability threshold for unaligning branches
+
+mmedium-calls
+Target Var(TARGET_MEDIUM_CALLS)
+Don't use less than 25 bit addressing range for calls.
+
+mannotate-align
+Target Var(TARGET_ANNOTATE_ALIGN)
+Explain what alignment considerations lead to the decision to make an insn short or long.
+
+malign-call
+Target Var(TARGET_ALIGN_CALL)
+Do alignment optimizations for call instructions.
+
+mRcq
+Target Var(TARGET_Rcq)
+Enable Rcq constraint handling - most short code generation depends on this.
+
+mRcw
+Target Var(TARGET_Rcw)
+Enable Rcw constraint handling - most ccfsm condexec mostly depdends on this.
+
+mearly-cbranchsi
+Target Var(TARGET_EARLY_CBRANCHSI)
+Enable pre-reload use of cbranchsi pattern
+
+mbbit-peephole
+Target Var(TARGET_BBIT_PEEPHOLE)
+Enable bbit peephole2
+
+mcase-vector-pcrel
+Target Var(TARGET_CASE_VECTOR_PC_RELATIVE)
+Use pc-relative switch case tables - this enables case table shortening.
+
+mcompact-casesi
+Target Var(TARGET_COMPACT_CASESI)
+Enable compact casesi pattern
+
+mq-class
+Target Var(TARGET_Q_CLASS)
+Enable 'q' instruction alternatives.
+
+mexpand-adddi
+Target Var(TARGET_EXPAND_ADDDI)
+Expand adddi3 and subdi3 at rtl generation time into add.f / adc etc.
diff --git a/gcc/config/arc/arc600.md b/gcc/config/arc/arc600.md
new file mode 100644
index 00000000000..74dde244aa4
--- /dev/null
+++ b/gcc/config/arc/arc600.md
@@ -0,0 +1,61 @@
+;; DFA scheduling description of the ARC600 cpu for GNU C compiler
+;;    Written by Joern Rennecke (joern.rennecke@arc.com)
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ARC600")
+
+(define_cpu_unit "issue_600" "ARC600")
+(define_cpu_unit "mul64_600" "ARC600")
+
+; latency from flag-setting insns to branches is 3.
+(define_insn_reservation "compare_600" 3
+  (and (eq_attr "tune" "arc600")
+       (eq_attr "type" "compare"))
+  "issue_600")
+
+(define_insn_reservation "load_DI_600" 4
+  (and (eq_attr "tune" "arc600")
+       (eq_attr "type" "load")
+       (match_operand:DI 0 "" ""))
+  "issue_600")
+
+(define_insn_reservation "load_600" 3
+  (and (eq_attr "tune" "arc600")
+       (eq_attr "type" "load")
+       (not (match_operand:DI 0 "" "")))
+  "issue_600")
+
+(define_insn_reservation "mul_600_fast" 3
+  (and (eq_attr "tune" "arc600")
+       (ne (symbol_ref "arc_multcost < COSTS_N_INSNS (7)") (const_int 0))
+       (eq_attr "type" "multi,umulti"))
+  "mul64_600*3")
+
+(define_insn_reservation "mul_600_slow" 8
+  (and (eq_attr "tune" "arc600")
+       (ne (symbol_ref "arc_multcost >= COSTS_N_INSNS (7)") (const_int 0))
+       (eq_attr "type" "multi,umulti"))
+  "mul64_600*8")
+
+(define_insn_reservation "mul_mac_600" 3
+  (and (eq_attr "tune" "arc600")
+       (eq_attr "type" "mulmac_600"))
+  "nothing*3")
+
+(define_bypass 1 "mul_mac_600" "mul_mac_600")
diff --git a/gcc/config/arc/arc700.md b/gcc/config/arc/arc700.md
new file mode 100644
index 00000000000..8c8302126e1
--- /dev/null
+++ b/gcc/config/arc/arc700.md
@@ -0,0 +1,170 @@
+;; DFA scheduling description of the ARC700 cpu for GNU C compiler
+;;    Comments and Support For ARC700 instructions added by
+;;    Saurabh Verma (saurabh.verma@codito.com)
+;;    Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
+;; Copyright (C) 2006 Free Software Foundation, Inc.
+;;    Factoring out and improvement of ARC700 Scheduling by
+;;    Joern Rennecke (joern.rennecke@arc.com)
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "ARC700")
+
+;; aux to be added here
+(define_cpu_unit "core, dmp,  write_port, dmp_write_port, multiplier, issue, blockage, simd_unit" "ARC700")
+
+(define_insn_reservation "core_insn_DI" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "unary, move, cmove, binary")
+       (match_operand:DI 0 "" ""))
+  "issue+core, issue+core+write_port, write_port")
+
+(define_insn_reservation "lr" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "lr"))
+  "issue+blockage, blockage*2, write_port")
+
+(define_insn_reservation "sr" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "sr"))
+  "issue+dmp_write_port+blockage, blockage*9")
+
+(define_insn_reservation "core_insn" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "unary, move, binary"))
+  "issue+core, nothing, write_port")
+
+(define_insn_reservation "cmove" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "cmove"))
+  "issue+core, nothing, write_port")
+
+(define_insn_reservation "cc_arith" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "cc_arith"))
+  "issue+core, nothing, write_port")
+
+(define_insn_reservation "two_cycle_core_insn" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "two_cycle_core"))
+  "issue+core, nothing, write_port")
+
+(define_insn_reservation "divaw_insn" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "divaw"))
+  "issue+core, nothing, write_port")
+
+(define_insn_reservation "shift_insn" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "shift"))
+  "issue+core, nothing, write_port")
+
+; Latency from flag setters to arithmetic with carry is 3.
+(define_insn_reservation "compare_700" 3
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "compare"))
+  "issue+core, nothing, write_port")
+
+; Assume here the branch is predicted correctly and has a delay slot insn
+; or is properly unaligned.
+(define_insn_reservation "branch_700" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "compare"))
+  "issue+core, nothing, write_port")
+
+; TODOs: is this correct ??
+(define_insn_reservation "multi_DI" 10
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "multi")
+       (match_operand:DI 0 "" ""))
+  "issue+multiplier, multiplier*2,issue+multiplier, multiplier*2, 
+   nothing,write_port,nothing*2, write_port")
+
+(define_insn_reservation "umulti_DI" 9
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "umulti")
+       (match_operand:DI 0 "" ""))
+  "issue+multiplier, multiplier,issue+multiplier, multiplier*2, 
+   write_port,nothing*3, write_port")
+
+(define_insn_reservation "umulti_xmac" 5
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "umulti"))
+  "issue+multiplier, multiplier, nothing*3, write_port")
+
+; latency of mpyu is lower than mpy / mpyh / mpyhu
+(define_insn_reservation "umulti_std" 6
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "umulti"))
+  "issue+multiplier, multiplier*3, nothing*2, write_port")
+
+;; arc700 xmac multiplier
+(define_insn_reservation "multi_xmac" 5
+  (and (eq_attr "tune" "arc700_4_2_xmac")
+       (eq_attr "type" "multi"))
+  "issue+multiplier,multiplier,nothing*3,write_port")
+
+; arc700 standard multiplier
+(define_insn_reservation "multi_std" 7
+  (and (eq_attr "tune" "arc700_4_2_std")
+       (eq_attr "type" "multi"))
+  "issue+multiplier,multiplier*4,nothing*2,write_port")
+
+;(define_insn_reservation "multi_SI" 7
+;       (eq_attr "type" "multi")
+;  "issue+multiplier, multiplier*2, nothing*4, write_port")
+
+; There is no multiplier -> multiplier bypass except for the
+; mac -> mac dependency on the accumulator.
+
+; divaw -> divaw latency is 1 cycle
+(define_bypass 1 "divaw_insn" "divaw_insn")
+
+(define_bypass 1 "compare_700" "branch_700,core_insn,data_store,data_load")
+
+; we could shedule the cmove immediately after the compare, but then
+; the cmove would have higher latency... so just keep the cmove apart
+; from the compare.
+(define_bypass 2 "compare_700" "cmove")
+
+; no functional unit runs when blockage is reserved
+(exclusion_set "blockage" "core, multiplier")
+
+(define_insn_reservation "data_load_DI" 4
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "load")
+       (match_operand:DI 0 "" ""))
+  "issue+dmp, issue+dmp, dmp_write_port, dmp_write_port")
+
+(define_insn_reservation "data_load" 3
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "load")
+       (not (match_operand:DI 0 "" "")))
+  "issue+dmp, nothing, dmp_write_port")
+
+(define_insn_reservation "data_store_DI" 2
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "store")
+       (match_operand:DI 0 "" ""))
+  "issue+dmp_write_port, issue+dmp_write_port")
+
+(define_insn_reservation "data_store" 1
+  (and (eq_attr "tune_arc700" "true")
+       (eq_attr "type" "store")
+       (not (match_operand:DI 0 "" "")))
+  "issue+dmp_write_port")
diff --git a/gcc/config/arc/asm.h b/gcc/config/arc/asm.h
new file mode 100644
index 00000000000..ffcc7cf0abe
--- /dev/null
+++ b/gcc/config/arc/asm.h
@@ -0,0 +1,2 @@
+#define FUNC(X)         .type X,@function
+#define ENDFUNC(X)      .size X, .-X
diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md
new file mode 100644
index 00000000000..f4ca7081d34
--- /dev/null
+++ b/gcc/config/arc/constraints.md
@@ -0,0 +1,396 @@
+;; Constraint definitions for ARC.
+;; Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Register constraints
+
+; Most instructions accept arbitrary core registers for their inputs, even
+; if the core register in question cannot be written to, like the multiply
+; result registers of the ARCtangent-A5 and ARC600 .
+; First, define a class for core registers that can be read cheaply.  This
+; is most or all core registers for ARC600, but only r0-r31 for ARC700
+(define_register_constraint "c" "CHEAP_CORE_REGS"
+  "core register @code{r0}-@code{r31}, @code{ap},@code{pcl}")
+
+; All core regs - e.g. for when we must have a way to reload a register.
+(define_register_constraint "Rac" "ALL_CORE_REGS"
+  "core register @code{r0}-@code{r60}, @code{ap},@code{pcl}")
+
+; Some core registers (.e.g lp_count) aren't general registers because they
+; can't be used as the destination of a multi-cycle operation like
+; load and/or multiply, yet they are still writable in the sense that
+; register-register moves and single-cycle arithmetic (e.g "add", "and",
+; but not "mpy") can write to them.
+(define_register_constraint "w" "WRITABLE_CORE_REGS"
+  "writable core register: @code{r0}-@code{r31}, @code{r60}, nonfixed core register")
+
+(define_register_constraint "Rcw"
+  "(TARGET_2ADDR ? WRITABLE_CORE_REGS : NO_REGS)"
+  "@internal
+   'w' constraint variant for use in early alternatives with matching constraint")
+
+(define_register_constraint "Rcr"
+  "(TARGET_2ADDR ? GENERAL_REGS : NO_REGS)"
+  "@internal
+   'r' constraint variant for use in early alternatives with matching constraint")
+
+(define_register_constraint "l" "LPCOUNT_REG"
+  "@internal
+   Loop count register @code{r60}")
+
+(define_register_constraint "x" "R0_REG"
+  "@code{R0} register.")
+
+(define_register_constraint "Rgp" "GP_REG"
+  "@internal
+   Global Pointer register @code{r26}")
+
+(define_register_constraint "f" "FP_REG"
+  "@internal
+   Frame Pointer register @code{r27}")
+
+(define_register_constraint "b" "SP_REG"
+  "@internal
+   Stack Pointer register @code{r28}")
+
+(define_register_constraint "k" "LINK_REGS"
+  "@internal
+   Link Registers @code{ilink1}:@code{r29}, @code{ilink2}:@code{r30},
+   @code{blink}:@code{r31},")
+
+(define_register_constraint "q" "ARCOMPACT16_REGS"
+  "Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3},
+   @code{r12}-@code{r15}")
+
+(define_register_constraint "e" "AC16_BASE_REGS"
+  "Registers usable as base-regs of memory addresses in ARCompact 16-bit memory
+   instructions: @code{r0}-@code{r3}, @code{r12}-@code{r15}, @code{sp}")
+
+(define_register_constraint "D" "DOUBLE_REGS"
+  "ARC FPX (dpfp) 64-bit registers. @code{D0}, @code{D1}")
+
+(define_register_constraint "d" "SIMD_DMA_CONFIG_REGS"
+  "@internal
+   ARC SIMD DMA configuration registers @code{di0}-@code{di7},
+   @code{do0}-@code{do7}")
+
+(define_register_constraint "v" "SIMD_VR_REGS"
+  "ARC SIMD 128-bit registers @code{VR0}-@code{VR23}")
+
+; We could allow call-saved registers for sibling calls if we restored them
+; in the delay slot of the call.  However, that would not allow to adjust the
+; stack pointer afterwards, so the call-saved register would have to be
+; restored from a call-used register that was just loaded with the value
+; before.  So sticking to call-used registers for sibcalls will likely
+; generate better code overall.
+(define_register_constraint "Rsc" "SIBCALL_REGS"
+  "@internal
+   Sibling call register")
+
+;; Integer constraints
+
+(define_constraint "I"
+  "@internal
+   For ARCtangent-A4, an integer constant in the range -256 to 255. For other
+   ARC cores a signed 12-bit integer constant." 
+  (and (match_code "const_int")
+       (match_test "(TARGET_A4 ? SMALL_INT (ival) : SIGNED_INT12 (ival))")))
+
+(define_constraint "J"
+  "@internal
+   A 32-bit signed integer constant"
+  (and (match_code "const_int")
+       (match_test "LARGE_INT (ival)")))
+
+(define_constraint "K"
+  "@internal
+   A 3-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT3 (ival)")))
+
+(define_constraint "L"
+  "@internal
+   A 6-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT6 (ival)")))
+
+(define_constraint "CnL"
+  "@internal
+   One's complement of a 6-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT6 (~ival)")))
+
+(define_constraint "CmL"
+  "@internal
+   Two's complement of a 6-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT6 (-ival)")))
+
+(define_constraint "M"
+  "@internal
+   A 5-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT5 (ival)")))
+
+(define_constraint "N"
+  "@internal
+   Integer constant 1"
+  (and (match_code "const_int")
+       (match_test "IS_ONE (ival)")))
+
+(define_constraint "O"
+  "@internal
+   A 7-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT7 (ival)")))
+
+(define_constraint "P"
+  "@internal
+   A 8-bit unsigned integer constant"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT8 (ival)")))
+
+(define_constraint "C_0"
+  "@internal
+   Zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "Cca"
+  "@internal
+   Conditional or three-address add / sub constant"
+  (and (match_code "const_int")
+       (match_test "ival == -1 << 31
+		    || (ival >= -0x1f8 && ival <= 0x1f8
+			&& ((ival >= 0 ? ival : -ival)
+			    <= 0x3f * (ival & -ival)))")))
+
+; intersection of "O" and "Cca".
+(define_constraint "CL2"
+  "@internal
+   A 6-bit unsigned integer constant times 2"
+  (and (match_code "const_int")
+       (match_test "!(ival & ~126)")))
+
+(define_constraint "CM4"
+  "@internal
+   A 5-bit unsigned integer constant times 4"
+  (and (match_code "const_int")
+       (match_test "!(ival & ~124)")))
+
+(define_constraint "Csp"
+  "@internal
+   A valid stack pointer offset for a short add"
+  (and (match_code "const_int")
+       (match_test "!(ival & ~124) || !(-ival & ~124)")))
+
+(define_constraint "C2a"
+  "@internal
+   Unconditional two-address add / sub constant"
+  (and (match_code "const_int")
+       (match_test "ival == -1 << 31
+		    || (ival >= -0x4000 && ival <= 0x4000
+			&& ((ival >= 0 ? ival : -ival)
+			    <= 0x7ff * (ival & -ival)))")))
+
+(define_constraint "C0p"
+ "@internal
+  power of two"
+  (and (match_code "const_int")
+       (match_test "IS_POWEROF2_P (ival)")))
+
+(define_constraint "C1p"
+ "@internal
+  constant such that x+1 is a power of two, and x != 0"
+  (and (match_code "const_int")
+       (match_test "ival && IS_POWEROF2_P (ival + 1)")))
+
+(define_constraint "Ccp"
+ "@internal
+  constant such that ~x (one's Complement) is a power of two"
+  (and (match_code "const_int")
+       (match_test "IS_POWEROF2_P (~ival)")))
+
+(define_constraint "Cux"
+ "@internal
+  constant such that AND gives an unsigned extension"
+  (and (match_code "const_int")
+       (match_test "ival == 0xff || ival == 0xffff")))
+
+(define_constraint "Crr"
+ "@internal
+  constant that can be loaded with ror b,u6"
+  (and (match_code "const_int")
+       (match_test "(ival & ~0x8000001f) == 0 && !arc_ccfsm_cond_exec_p ()")))
+
+;; Floating-point constraints
+
+(define_constraint "G"
+  "@internal
+   A 32-bit constant double value"
+  (and (match_code "const_double")
+       (match_test "arc_double_limm_p (op)")))
+
+(define_constraint "H"
+  "@internal
+   All const_double values (including 64-bit values)"
+  (and (match_code "const_double")
+       (match_test "1")))
+
+;; Memory constraints
+(define_memory_constraint "T"
+  "@internal
+   A valid memory operand for ARCompact load instructions"
+  (and (match_code "mem")
+       (match_test "compact_load_memory_operand (op, VOIDmode)")))
+
+(define_memory_constraint "S"
+  "@internal
+   A valid memory operand for ARCompact store instructions"
+  (and (match_code "mem")
+       (match_test "compact_store_memory_operand (op, VOIDmode)")))
+
+(define_memory_constraint "Usd"
+  "@internal
+   A valid _small-data_ memory operand for ARCompact instructions"
+  (and (match_code "mem")
+       (match_test "compact_sda_memory_operand (op, VOIDmode)")))
+
+(define_memory_constraint "Usc"
+  "@internal
+   A valid memory operand for storing constants"
+  (and (match_code "mem")
+       (match_test "!CONSTANT_P (XEXP (op,0))")
+;; ??? the assembler rejects stores of immediates to small data.
+       (match_test "!compact_sda_memory_operand (op, VOIDmode)")))
+
+(define_memory_constraint "Us<"
+  "@internal
+   Stack pre-decrement"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == PRE_DEC")
+       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+       (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG")))
+
+(define_memory_constraint "Us>"
+  "@internal
+   Stack post-increment"
+  (and (match_code "mem")
+       (match_test "GET_CODE (XEXP (op, 0)) == POST_INC")
+       (match_test "REG_P (XEXP (XEXP (op, 0), 0))")
+       (match_test "REGNO (XEXP (XEXP (op, 0), 0)) == SP_REG")))
+
+;; General constraints
+
+(define_constraint "Cbr"
+  "Branch destination"
+  (ior (and (match_code "symbol_ref")
+	    (match_test "!arc_is_longcall_p (op)"))
+       (match_code "label_ref")))
+
+(define_constraint "Cpc"
+  "pc-relative constant"
+  (match_test "arc_legitimate_pc_offset_p (op)"))
+
+(define_constraint "Clb"
+  "label"
+  (and (match_code "label_ref")
+       (match_test "arc_text_label (XEXP (op, 0))")))
+
+(define_constraint "Cal"
+  "constant for arithmetic/logical operations"
+  (match_test "immediate_operand (op, VOIDmode) && !arc_legitimate_pc_offset_p (op)"))
+
+(define_constraint "C32"
+  "32 bit constant for arithmetic/logical operations"
+  (match_test "immediate_operand (op, VOIDmode)
+	       && !arc_legitimate_pc_offset_p (op)
+	       && !satisfies_constraint_I (op)"))
+
+; Note that the 'cryptic' register constraints will not make reload use the
+; associated class to reload into, but this will not penalize reloading of any
+; other operands, or using an alternate part of the same alternative.
+
+; Rcq is different in three important ways from a register class constraint:
+; - It does not imply a register class, hence reload will not use it to drive
+;   reloads.
+; - It matches even when there is no register class to describe its accepted
+;   set; not having such a set again lessens the impact on register allocation.
+; - It won't match when the instruction is conditionalized by the ccfsm.
+(define_constraint "Rcq"
+  "@internal
+   Cryptic q - for short insn generation while not affecting register allocation
+   Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3},
+   @code{r12}-@code{r15}"
+  (and (match_code "REG")
+       (match_test "TARGET_Rcq
+		    && !arc_ccfsm_cond_exec_p ()
+		    && ((((REGNO (op) & 7) ^ 4) - 4) & 15) == REGNO (op)")))
+
+; If we need a reload, we generally want to steer reload to use three-address
+; alternatives in preference of two-address alternatives, unless the
+; three-address alternative introduces a LIMM that is unnecessary for the
+; two-address alternative.
+(define_constraint "Rcw"
+  "@internal
+   Cryptic w - for use in early alternatives with matching constraint"
+  (and (match_code "REG")
+       (match_test
+	"TARGET_Rcw
+	 && REGNO (op) < FIRST_PSEUDO_REGISTER
+	 && TEST_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS],
+			       REGNO (op))")))
+
+(define_constraint "Rcr"
+  "@internal
+   Cryptic r - for use in early alternatives with matching constraint"
+  (and (match_code "REG")
+       (match_test
+	"TARGET_Rcw
+	 && REGNO (op) < FIRST_PSEUDO_REGISTER
+	 && TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS],
+			       REGNO (op))")))
+
+(define_constraint "Rcb"
+  "@internal
+   Stack Pointer register @code{r28} - do not reload into its class"
+  (and (match_code "REG")
+       (match_test "REGNO (op) == 28")))
+
+(define_constraint "Rck"
+  "@internal
+   blink (usful for push_s / pop_s)"
+  (and (match_code "REG")
+       (match_test "REGNO (op) == 31")))
+
+(define_constraint "Rs5"
+  "@internal
+   sibcall register - only allow one of the five available 16 bit isnsn.
+   Registers usable in ARCompact 16-bit instructions: @code{r0}-@code{r3},
+   @code{r12}"
+  (and (match_code "REG")
+       (match_test "!arc_ccfsm_cond_exec_p ()")
+       (ior (match_test "(unsigned) REGNO (op) <= 3")
+	    (match_test "REGNO (op) == 12"))))
+
+
+(define_constraint "Q"
+  "@internal
+   Integer constant zero"
+  (and (match_code "const_int")
+       (match_test "IS_ZERO (ival)")))
diff --git a/gcc/config/arc/crtg.asm b/gcc/config/arc/crtg.asm
new file mode 100644
index 00000000000..0ffc492ef63
--- /dev/null
+++ b/gcc/config/arc/crtg.asm
@@ -0,0 +1,28 @@
+#  This file contains code to start and stop profiling.
+#ifndef __A4__
+
+	.section .init
+	.global _init
+	.global _fini
+	.global __monstartup
+	mov_s	r0,_init
+	mov_s	r1,_fini
+	jl	__monstartup
+
+	.section .__arc_profile_desc, "a"
+	.global __arc_profile_desc_secstart
+	.balign	4
+__arc_profile_desc_secstart:
+	.section .__arc_profile_forward, "a"
+	.global __arc_profile_forward_secstart
+	.balign 4
+__arc_profile_forward_secstart:
+	.section .__arc_profile_counters, "aw"
+	.global __arc_profile_counters_secstart
+	.balign	4
+__arc_profile_counters_secstart:
+
+	.section .fini
+	.global _mcleanup
+	jl	_mcleanup
+#endif /* !A4 */
diff --git a/gcc/config/arc/crtgend.asm b/gcc/config/arc/crtgend.asm
new file mode 100644
index 00000000000..730fcae4833
--- /dev/null
+++ b/gcc/config/arc/crtgend.asm
@@ -0,0 +1,10 @@
+#  This file contains code to start and stop profiling.
+#ifndef __A4__
+
+	.section .__arc_profile_desc, "a"
+	.global __arc_profile_desc_secend
+__arc_profile_desc_secend:
+	.section .__arc_profile_forward, "a"
+	.global __arc_profile_forward_secend
+__arc_profile_forward_secend:
+#endif /* !A4 */
diff --git a/gcc/config/arc/crti.asm b/gcc/config/arc/crti.asm
new file mode 100644
index 00000000000..0fc09c583ee
--- /dev/null
+++ b/gcc/config/arc/crti.asm
@@ -0,0 +1,14 @@
+#  This file contains the stack frame setup for contents of the .fini and
+# .init sections.
+
+	.section .init
+	.global _init
+	.word 0
+_init:
+	push_s	blink
+
+	.section .fini
+	.global _fini
+	.word 0
+_fini:
+	push_s	blink
diff --git a/gcc/config/arc/crtn.asm b/gcc/config/arc/crtn.asm
new file mode 100644
index 00000000000..bf8cb0d3430
--- /dev/null
+++ b/gcc/config/arc/crtn.asm
@@ -0,0 +1,11 @@
+# This file just makes sure that the .fini and .init sections do in
+# fact return. This file is the last thing linked into any executable.
+
+	.section .init
+	pop_s	blink
+	j_s	[blink]
+
+
+	.section .fini
+	pop_s	blink
+	j_s	[blink]
diff --git a/gcc/config/arc/divtab-arc700.c b/gcc/config/arc/divtab-arc700.c
new file mode 100644
index 00000000000..ec659a59b60
--- /dev/null
+++ b/gcc/config/arc/divtab-arc700.c
@@ -0,0 +1,68 @@
+/* Copyright (C) 2004, 2006, 2007 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Calculate division table for ARC700 integer division
+   Contributed by Joern Rennecke
+   joern.rennecke@arc.com  */
+
+#include <stdio.h>
+#include <math.h>
+
+int
+main ()
+{
+  int i, j;
+  unsigned x;
+  double q, r, err, max_err = -1;
+
+  puts("/* This table has been generated by divtab-arc700.c.  */");
+  puts("\
+/* 1/512 .. 1/256, normalized.  There is a leading 1 in bit 31.\n\
+   For powers of two, we list unnormalized numbers instead.  The values\n\
+   for powers of 2 are loaded, but not used.  The value for 1 is actually\n\
+   the first instruction after .Lmuldiv.  */\n\
+	.balign 4");
+  puts (".Ldivtab:\n");
+  for (i = 256; i >= 2; --i)
+    {
+      j = i < 0 ? -i : i;
+      if (j & (j-1))
+	while (j < 128)
+	  j += j;
+      else
+	/* Power of two. */
+	j *= 128;
+      q = 4.*(1<<30)*128/j;
+      r = ceil (q);
+      printf ("\t.long\t0x%X\n", (unsigned) r);
+      err = r - q;
+      if (err > max_err)
+	max_err = err;
+    }
+#if 0
+  printf ("\t/* maximum error: %f */\n", max_err);
+#endif
+  exit (0);
+}
diff --git a/gcc/config/arc/dp-hack.h b/gcc/config/arc/dp-hack.h
new file mode 100644
index 00000000000..f1ce55220bb
--- /dev/null
+++ b/gcc/config/arc/dp-hack.h
@@ -0,0 +1,65 @@
+#define FINE_GRAINED_LIBRARIES
+#define ARC_DP_DEBUG 1
+#if !defined (__ARC_NORM__) || ARC_DP_DEBUG
+#define L_pack_df
+#define L_unpack_df
+#define L_make_df
+#define L_thenan_df
+#define L_sf_to_df
+#endif
+#ifndef __ARC_NORM__
+#define L_addsub_df
+#elif ARC_DP_DEBUG
+#define L_addsub_df
+#define __adddf3 __adddf3_c
+#define __subdf3 __subdf3_c
+#endif
+#ifndef __ARC_NORM__
+#define L_mul_df
+#elif ARC_DP_DEBUG
+#define L_mul_df
+#define __muldf3 __muldf3_c
+#endif
+#ifndef __ARC_NORM__
+#define L_div_df
+#define L_df_to_sf
+#define L_si_to_df
+#define L_df_to_si
+#define L_tf_to_usi /* need to defined this instead of df_to_usi */
+#define L_usi_to_df
+#elif ARC_DP_DEBUG
+#define L_div_df
+#define __divdf3 __divdf3_c
+#define L_df_to_sf
+#define __truncdfsf2 __truncdfsf2_c
+#define L_si_to_df
+#define __floatsidf __floatsidf_c
+#define L_df_to_si
+#define __fixdfsi __fixdfsi_c
+#define L_tf_to_usi
+#define __fixunsdfsi __fixunsdfsi_c
+#define L_usi_to_df
+#define __floatunsidf __floatunsidf_c
+#endif
+#ifndef __ARC_NORM__
+#define L_fpcmp_parts_df
+#define L_compare_df
+#define L_eq_df
+#define L_ne_df
+#define L_gt_df
+#define L_ge_df
+#define L_lt_df
+#define L_le_df
+#define L_unord_df
+#define L_negate_df
+#elif ARC_DP_DEBUG
+#define L_fpcmp_parts_df
+#define L_eq_df
+#define __eqdf2 __eqdf2_c
+#define L_gt_df
+#define __gtdf2 __gtdf2_c
+#define L_ge_df
+#define __gedf2 __gedf2_c
+#define L_unord_df
+#define __unorddf2 __unorddf2_c
+#endif
diff --git a/gcc/config/arc/fp-hack.h b/gcc/config/arc/fp-hack.h
new file mode 100644
index 00000000000..c6434b5501f
--- /dev/null
+++ b/gcc/config/arc/fp-hack.h
@@ -0,0 +1,55 @@
+#define ARC_FP_DEBUG 1
+#define FINE_GRAINED_LIBRARIES
+#if !defined (__ARC_NORM__) || ARC_FP_DEBUG
+#define L_pack_sf
+#define L_unpack_sf
+#define L_make_sf
+#define L_thenan_sf
+#endif
+#ifndef __ARC_NORM__
+#define L_addsub_sf
+#define L_mul_sf
+#define L_div_sf
+#define L_sf_to_df
+#define L_si_to_sf
+#define L_sf_to_si
+#define L_usi_to_sf
+#elif ARC_FP_DEBUG
+#define L_addsub_sf
+#define __addsf3 __addsf3_c
+#define __subsf3 __subsf3_c
+#define L_mul_sf
+#define __mulsf3 __mulsf3_c
+#define L_div_sf
+#define __divsf3 __divsf3_c
+#define L_sf_to_df
+#define __extendsfdf2 __extendsfdf2_c
+#define L_si_to_sf
+#define __floatsisf __floatsisf_c
+#define L_sf_to_si
+#define __fixsfsi __fixsfsi_c
+#define L_usi_to_sf
+#define __floatunsisf __floatunsisf_c
+#endif
+#ifndef __ARC_NORM__
+#define L_fpcmp_parts_sf
+#define L_compare_sf
+#define L_eq_sf
+#define L_ne_sf
+#define L_gt_sf
+#define L_ge_sf
+#define L_lt_sf
+#define L_le_sf
+#define L_unord_sf
+#define L_negate_sf
+#elif ARC_FP_DEBUG
+#define L_fpcmp_parts_sf
+#define L_eq_sf
+#define __eqsf2 __eqsf2_c
+#define L_gt_sf
+#define __gtsf2 __gtsf2_c
+#define L_ge_sf
+#define __gesf2 __gesf2_c
+#define L_unord_sf
+#define __unordsf2 __unordsf2_c
+#endif
diff --git a/gcc/config/arc/fpx.md b/gcc/config/arc/fpx.md
new file mode 100644
index 00000000000..39f311a4a67
--- /dev/null
+++ b/gcc/config/arc/fpx.md
@@ -0,0 +1,604 @@
+;; Machine description of the Argonaut ARC cpu Floating Point extensions for GNU C compiler
+;; Copyright (C) 2007 Celunite, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; TODOs:
+;;        dpfp blocks?
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Scheduler descriptions for the fpx instructions
+(define_insn_reservation "spfp_compact" 3
+       (eq_attr "type" "spfp")
+  "issue+core, nothing*2, write_port")
+
+(define_insn_reservation "spfp_fast" 6
+       (eq_attr "type" "spfp")
+  "issue+core, nothing*5, write_port")
+
+(define_insn_reservation "dpfp_compact_mult" 7
+  (and (ne (symbol_ref "TARGET_DPFP_COMPACT_SET") (const_int 0))
+       (eq_attr "type" "dpfp_mult"))
+  "issue+core, nothing*6, write_port")
+
+(define_insn_reservation "dpfp_compact_addsub" 5
+  (and (ne (symbol_ref "TARGET_DPFP_COMPACT_SET") (const_int 0))
+       (eq_attr "type" "dpfp_addsub"))
+  "issue+core, nothing*4, write_port")
+
+(define_insn_reservation "dpfp_fast" 5
+  (and (ne (symbol_ref "TARGET_DPFP_FAST_SET") (const_int 0))
+       (eq_attr "type" "dpfp_mult,dpfp_addsub"))
+  "issue+core, nothing*4, write_port")
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(define_insn "addsf3"
+  [(set (match_operand:SF 0 "register_operand"          "=r,r,r,r,r ")
+	(plus:SF (match_operand:SF 1 "nonmemory_operand" "0,r,GJ,r,0")
+		 (match_operand:SF 2 "nonmemory_operand" "I,rL,r,GJ,LrJ")))]
+;  "(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET";Add flag for float
+  "TARGET_SPFP"
+  "@
+   fadd %0,%1,%2
+   fadd %0,%1,%2
+   fadd   %0,%S1,%2
+   fadd   %0,%1,%S2
+   fadd%? %0,%1,%S2"
+  [(set_attr "type" "spfp")
+  (set_attr "length" "4,4,8,8,8")])
+
+(define_insn "subsf3"
+  [(set (match_operand:SF 0 "register_operand"          "=r,r,r,r,r ")
+	(minus:SF (match_operand:SF 1 "nonmemory_operand" "r,0,GJ,r,0")
+		 (match_operand:SF 2 "nonmemory_operand" "rL,I,r,GJ,LrJ")))]
+  ;"(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET";Add flag for float
+  "TARGET_SPFP"
+  "@
+   fsub %0,%1,%2
+   fsub %0,%1,%2
+   fsub   %0,%S1,%2
+   fsub   %0,%1,%S2
+   fsub%? %0,%1,%S2"
+  [(set_attr "type" "spfp")
+  (set_attr "length" "4,4,8,8,8")])
+
+(define_insn "mulsf3"
+  [(set (match_operand:SF 0 "register_operand"          "=r,r,r,r,r ")
+	(mult:SF (match_operand:SF 1 "nonmemory_operand" "r,0,GJ,r,0")
+		 (match_operand:SF 2 "nonmemory_operand" "rL,I,r,GJ,LrJ")))]
+;  "(TARGET_ARC700 || TARGET_ARC600) && TARGET_SPFP_SET"	;Add flag for float
+  "TARGET_SPFP"
+  "@
+   fmul %0,%1,%2
+   fmul %0,%1,%2
+   fmul   %0,%S1,%2
+   fmul   %0,%1,%S2
+   fmul%? %0,%1,%S2"
+  [(set_attr "type" "spfp")
+  (set_attr "length" "4,4,8,8,8")])
+
+(define_insn "cmpsfpx_raw"
+  [(set (reg:CC_FPX 61)
+	(compare:CC_FPX (match_operand:SF 0 "register_operand" "r")
+			 (match_operand:SF 1 "register_operand" "r")))]
+  "TARGET_SPFP"
+  "fsub.f 0,%0,%1"
+  [(set_attr "type" "spfp")
+   (set_attr "length" "4")])
+
+(define_insn "cmpdfpx_raw"
+  [(set (reg:CC_FPX 61)
+	(compare:CC_FPX (match_operand:DF 0 "nonmemory_operand" "D,r")
+			 (match_operand:DF 1 "nonmemory_operand" "r,D")))
+   (clobber (match_scratch:DF 2 "=D,D"))]
+  "TARGET_DPFP"
+  "@
+   dsubh%F0%F1.f 0,%H2,%L2
+   drsubh%F0%F2.f 0,%H1,%L1"
+  [(set_attr "type" "dpfp_addsub")
+   (set_attr "length" "4")])
+
+(define_insn "*cmpfpx_gt"
+  [(set (reg:CC_FP_GT 61) (compare:CC_FP_GT (reg:CC_FPX 61) (const_int 0)))]
+  ""
+  "cmp.ls pcl,pcl"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4")])
+
+(define_insn "*cmpfpx_ge"
+  [(set (reg:CC_FP_GE 61) (compare:CC_FP_GE (reg:CC_FPX 61) (const_int 0)))]
+  ""
+  "rcmp.pnz pcl,0"
+  [(set_attr "type" "compare")
+   (set_attr "length" "4")])
+
+;; DPFP instructions begin...
+
+;; op0_reg = D1_reg.low
+(define_insn "*lr_double_lower"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:DF 1 "register_operand" "D")] VUNSPEC_LR ))]
+ "TARGET_DPFP"
+"lr %0, [%1l]"
+[(set_attr "length" "8")
+(set_attr "type" "lr")]
+)
+
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;                             doubles support for ARC
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; D0 = D1+{reg_pair}2
+;; (define_expand "adddf3"
+;;   [(set (match_operand:DF 0 "arc_double_register_operand"          "")
+;; 	(plus:DF (match_operand:DF 1 "arc_double_register_operand" "")
+;; 		 (match_operand:DF 2 "nonmemory_operand" "")))]
+;;  "TARGET_DPFP"
+;;  " "
+;; )
+;; daddh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo
+;; OR
+;; daddh{0}{1} 0, reg3, limm2.lo
+(define_expand "adddf3"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "")
+	(plus:DF (match_operand:DF 1 "arc_double_register_operand" "")
+		 (match_operand:DF 2 "nonmemory_operand" "")))
+     ]
+ "TARGET_DPFP"
+ " if (GET_CODE (operands[2]) == CONST_DOUBLE)
+     {
+        rtx high, low, tmp;
+        split_double (operands[2], &low, &high);
+        tmp = force_reg (SImode, high);
+        emit_insn(gen_adddf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx));
+     }
+   else
+     emit_insn(gen_adddf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx));
+     DONE;
+ "
+)
+
+;; daddh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo  /* operand 4 = 1*/
+;; OR
+;; daddh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0 */
+;; 
+(define_insn "adddf3_insn"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "=D,D")
+	(plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D")
+		 (match_operand:DF 2 "nonmemory_operand" "!r,G")))
+  (use (match_operand:SI 3 "" "N,r"))
+  (use (match_operand:SI 4 "" "N,Q"))
+  ]
+  "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+  "@
+     daddh%F0%F1 0,%H2,%L2
+     daddh%F0%F1 0,%3,%L2"
+  [(set_attr "type" "dpfp_addsub")
+  (set_attr "length" "4,8")])
+
+;; dmulh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo
+;; OR
+;; dmulh{0}{1} 0, reg3, limm2.lo
+(define_expand "muldf3"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "")
+	(mult:DF (match_operand:DF 1 "arc_double_register_operand" "")
+		 (match_operand:DF 2 "nonmemory_operand" "")))]
+"TARGET_DPFP"
+"  if (GET_CODE (operands[2]) == CONST_DOUBLE)
+     {
+        rtx high, low, tmp;
+        split_double (operands[2], &low, &high);
+        tmp = force_reg (SImode, high);
+        emit_insn(gen_muldf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx));
+     }
+   else
+     emit_insn(gen_muldf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx));
+
+  DONE;
+ ")
+
+
+;; dmulh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo /* operand 4 = 1*/
+;; OR
+;; dmulh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0*/
+(define_insn "muldf3_insn"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "=D,D")
+	(mult:DF (match_operand:DF 1 "arc_double_register_operand" "D,D")
+		 (match_operand:DF 2 "nonmemory_operand" "!r,G")))
+  (use (match_operand:SI 3 "" "N,!r"))
+  (use (match_operand:SI 4 "" "N,Q"))
+  ]
+  "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+  "@
+    dmulh%F0%F1 0,%H2,%L2
+    dmulh%F0%F1 0,%3, %L2"
+  [(set_attr "type" "dpfp_mult")
+  (set_attr "length" "4,8")])
+
+;; dsubh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo
+;; OR
+;; dsubh{0}{1} 0, reg3, limm2.lo
+;; OR
+;; drsubh{0}{2} 0, {reg_pair}1.hi, {reg_pair}1.lo
+;; OR
+;; drsubh{0}{2} 0, reg3, limm1.lo
+(define_expand "subdf3"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "")
+		    (minus:DF (match_operand:DF 1 "nonmemory_operand" "")
+				  (match_operand:DF 2 "nonmemory_operand" "")))]
+"TARGET_DPFP"
+"   if (GET_CODE (operands[1]) == CONST_DOUBLE || GET_CODE (operands[2]) == CONST_DOUBLE)
+     {
+        rtx high, low, tmp;
+        int const_index = ((GET_CODE (operands[1]) == CONST_DOUBLE) ? 1: 2);
+        split_double (operands[const_index], &low, &high);
+        tmp = force_reg (SImode, high);
+        emit_insn(gen_subdf3_insn(operands[0], operands[1], operands[2],tmp,const0_rtx));
+     }
+   else 
+     emit_insn(gen_subdf3_insn(operands[0], operands[1], operands[2],const1_rtx,const1_rtx));
+
+   DONE;
+  "
+)
+
+;; dsubh{0}{1} 0, {reg_pair}2.hi, {reg_pair}2.lo /* operand 4 = 1 */
+;; OR
+;; dsubh{0}{1} 0, reg3, limm2.lo /* operand 4 = 0*/
+;; OR
+;; drsubh{0}{2} 0, {reg_pair}1.hi, {reg_pair}1.lo /* operand 4 = 1 */
+;; OR
+;; drsubh{0}{2} 0, reg3, limm1.lo /* operand 4 = 0*/
+(define_insn "subdf3_insn"
+  [(set (match_operand:DF 0 "arc_double_register_operand"          "=D,D,D,D")
+		   (minus:DF (match_operand:DF 1 "nonmemory_operand" "D,D,!r,G")
+			    (match_operand:DF 2 "nonmemory_operand" "!r,G,D,D")))
+  (use (match_operand:SI 3 "" "N,r,N,r"))
+  (use (match_operand:SI 4 "" "N,Q,N,Q"))
+]
+  "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT) &&
+   !(GET_CODE(operands[1]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+  "@
+     dsubh%F0%F1 0,%H2,%L2
+     dsubh%F0%F1 0,%3,%L2
+     drsubh%F0%F2 0,%H1,%L1
+     drsubh%F0%F2 0,%3,%L1"
+  [(set_attr "type" "dpfp_addsub")
+  (set_attr "length" "4,8,4,8")])
+
+;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ;; Peephole for following conversion
+;; ;;                    D0 = D2<op>{reg_pair}3
+;; ;;                    {reg_pair}5 = D0
+;; ;;                    D0 = {reg_pair}6
+;; ;;                            |
+;; ;;                            V
+;; ;;            _________________________________________________________
+;; ;;           / D0             = D2 <op> {regpair3_or_limmreg34}
+;; ;;    ---- +   {reg_pair}5.hi = ( D2<op>{regpair3_or_limmreg34} ).hi
+;; ;;   |       \_________________________________________________________
+;; ;;   |
+;; ;;   |         ________________________________________________________
+;; ;;   |      / {reg_pair}5.lo  = ( D2<op>{regpair3_or_limmreg34} ).lo
+;; ;;   +-----+  D0              = {reg_pair}6
+;; ;;          \ _________________________________________________________
+;; ;;                            ||
+;; ;;                            ||
+;; ;;                            \/
+;; ;;  d<op>{0}{2}h {reg_pair}5.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi
+;; ;;  dexcl{0}    {reg_pair}5.lo, {reg_pair}6.lo, {reg_pair}6.hi
+;; ;; -----------------------------------------------------------------------------------------
+;; ;;  where <op> is one of {+,*,-}
+;; ;;        <opname> is {add,mult,sub}
+;; ;;
+;; ;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as
+;; ;;       {regpair2_or_limmreg24} and D3
+;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; (define_peephole2
+;;   [(parallel [(set (match_operand:DF 0 "register_operand"          "")
+;; 	(match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "")
+;; 			   (match_operand:DF 3 "nonmemory_operand" "")]))
+;; 	     (use (match_operand:SI 4 "" ""))])
+;;   (set (match_operand:DF 5 "register_operand" "")
+;;        (match_dup 0))
+;;   (set (match_dup 0)
+;;        (match_operand:DF 6 "register_operand" "")) 
+;;   ]
+;;   "TARGET_DPFP"
+;;   [
+;;   (parallel [(set (match_dup 0)
+;; 		  (match_op_dup:DF 1 [(match_dup 2)
+;; 				   (match_dup 3)]))
+;; 	    (use (match_dup 4))
+;;             (set (match_dup 5)
+;; 		 (match_op_dup:DF  1 [(match_dup 2)
+;; 				   (match_dup 3)]))])
+;;   (parallel [
+;; ;;	    (set (subreg:SI (match_dup 5) 0)
+;; 	    (set (match_dup 7)
+;; 		 (unspec_volatile [(match_dup 0)] VUNSPEC_LR ))
+;; 	    (set (match_dup 0) (match_dup 6))]
+;; 	    )
+;;   ]
+;;   "operands[7] = simplify_gen_subreg(SImode,operands[5],DFmode,0);"
+;;   )
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Peephole for following conversion
+;;                    D0 = D2<op>{reg_pair}3
+;;                    {reg_pair}6 = D0
+;;                    D0 = {reg_pair}7
+;;                            |
+;;                            V
+;;            _________________________________________________________
+;;           / D0             = D2 <op> {regpair3_or_limmreg34}
+;;    ---- +   {reg_pair}6.hi = ( D2<op>{regpair3_or_limmreg34} ).hi
+;;   |       \_________________________________________________________
+;;   |
+;;   |         ________________________________________________________
+;;   |      / {reg_pair}6.lo  = ( D2<op>{regpair3_or_limmreg34} ).lo
+;;   +-----+  D0              = {reg_pair}7
+;;          \ _________________________________________________________
+;;                            ||
+;;                            ||
+;;                            \/
+;;  d<op>{0}{2}h {reg_pair}6.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi
+;;  dexcl{0}    {reg_pair}6.lo, {reg_pair}7.lo, {reg_pair}7.hi
+;; -----------------------------------------------------------------------------------------
+;;  where <op> is one of {+,*,-}
+;;        <opname> is {add,mult,sub}
+;;
+;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as
+;;       {regpair2_or_limmreg24} and D3
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_peephole2
+  [(parallel [(set (match_operand:DF 0 "register_operand"          "")
+	(match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "")
+			   (match_operand:DF 3 "nonmemory_operand" "")]))
+	     (use (match_operand:SI 4 "" ""))
+	     (use (match_operand:SI 5 "" ""))])
+  (set (match_operand:DF 6 "register_operand" "")
+       (match_dup 0))
+  (set (match_dup 0)
+       (match_operand:DF 7 "register_operand" "")) 
+  ]
+  "TARGET_DPFP"
+  [
+  (parallel [(set (match_dup 0)
+		  (match_op_dup:DF 1 [(match_dup 2)
+				   (match_dup 3)]))
+	    (use (match_dup 4))
+	    (use (match_dup 5))
+            (set (match_dup 6)
+		 (match_op_dup:DF  1 [(match_dup 2)
+				   (match_dup 3)]))])
+  (parallel [
+;;	    (set (subreg:SI (match_dup 6) 0)
+	    (set (match_dup 8)
+		 (unspec_volatile:SI [(match_dup 0)] VUNSPEC_LR ))
+	    (set (match_dup 0) (match_dup 7))]
+	    )
+  ]
+  "operands[8] = simplify_gen_subreg(SImode,operands[6],DFmode,0);"
+  )
+
+;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; ;; Peephole to generate d<opname>{ij}h a,b,c instructions
+;; ;;                    D0 = D2<op>{reg_pair}3
+;; ;;                    {reg_pair}5 = D0
+;; ;;                            |
+;; ;;                            V
+;; ;;            __________________________________________
+;; ;;           / D0             = D2 <op> {regpair3_or_limmreg34}
+;; ;;    ---- +   {reg_pair}5.hi = ( D2<op>{regpair3_or_limmreg34} ).hi
+;; ;;   |       \__________________________________________
+;; ;;   |
+;; ;;   + ---    {reg_pair}5.lo     = ( D2<op>{regpair3_or_limmreg34} ).lo
+;; ;;                            ||
+;; ;;                            ||
+;; ;;                            \/
+;; ;;  d<op>{0}{2}h {reg_pair}4.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi
+;; ;;  lr    {reg_pair}4.lo, {D2l}
+;; ;; ----------------------------------------------------------------------------------------
+;; ;;  where <op> is one of {+,*,-}
+;; ;;        <opname> is {add,mult,sub}
+;; ;;
+;; ;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as
+;; ;;       {regpair2_or_limmreg24} and D3
+;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; (define_peephole2
+;;   [(parallel [(set (match_operand:DF 0 "register_operand"          "")
+;; 		   (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "")
+;; 				      (match_operand:DF 3 "nonmemory_operand" "")]))
+;; 	     (use (match_operand:SI 4 "" ""))])
+;;   (set (match_operand:DF 5 "register_operand" "")
+;;        (match_dup 0)) 
+;;   ]
+;;   "TARGET_DPFP"
+;;   [
+;;   (parallel [(set (match_dup 0)
+;; 		  (match_op_dup:DF 1 [(match_dup 2)
+;; 				   (match_dup 3)]))
+;; 	    (use (match_dup 4))
+;;             (set (match_dup 5)
+;; 		 (match_op_dup:DF  1 [(match_dup 2)
+;; 				   (match_dup 3)]))])
+;; ;  (set (subreg:SI (match_dup 5) 0)
+;;   (set (match_dup 6)
+;;        (unspec_volatile [(match_dup 0)] VUNSPEC_LR ))
+;;   ]
+;;   "operands[6] = simplify_gen_subreg(SImode,operands[5],DFmode,0);"
+;;   )
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Peephole to generate d<opname>{ij}h a,b,c instructions
+;;                    D0 = D2<op>{reg_pair}3
+;;                    {reg_pair}6 = D0
+;;                            |
+;;                            V
+;;            __________________________________________
+;;           / D0             = D2 <op> {regpair3_or_limmreg34}
+;;    ---- +   {reg_pair}6.hi = ( D2<op>{regpair3_or_limmreg34} ).hi
+;;   |       \__________________________________________
+;;   |
+;;   + ---    {reg_pair}6.lo     = ( D2<op>{regpair3_or_limmreg34} ).lo
+;;                            ||
+;;                            ||
+;;                            \/
+;;  d<op>{0}{2}h {reg_pair}4.hi, {regpair3_or_limmreg34}.lo, {regpair3_or_limmreg34}.hi
+;;  lr    {reg_pair}4.lo, {D2l}
+;; ----------------------------------------------------------------------------------------
+;;  where <op> is one of {+,*,-}
+;;        <opname> is {add,mult,sub}
+;;
+;; NOTE: For rsub insns D2 and {regpair3_or_limmreg34} get interchanged as
+;;       {regpair2_or_limmreg24} and D3
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_peephole2
+  [(parallel [(set (match_operand:DF 0 "register_operand"          "")
+		   (match_operator:DF 1 "arc_dpfp_operator" [(match_operand:DF 2 "nonmemory_operand" "")
+				      (match_operand:DF 3 "nonmemory_operand" "")]))
+	     (use (match_operand:SI 4 "" ""))
+	     (use (match_operand:SI 5 "" ""))])
+  (set (match_operand:DF 6 "register_operand" "")
+       (match_dup 0)) 
+  ]
+  "TARGET_DPFP"
+  [
+  (parallel [(set (match_dup 0)
+		  (match_op_dup:DF 1 [(match_dup 2)
+				   (match_dup 3)]))
+	    (use (match_dup 4))
+	    (use (match_dup 5))
+            (set (match_dup 6)
+		 (match_op_dup:DF  1 [(match_dup 2)
+				   (match_dup 3)]))])
+;  (set (subreg:SI (match_dup 6) 0)
+  (set (match_dup 7)
+       (unspec_volatile:SI [(match_dup 0)] VUNSPEC_LR ))
+  ]
+  "operands[7] = simplify_gen_subreg(SImode,operands[6],DFmode,0);"
+  )
+
+;; ;;            _______________________________________________________
+;; ;;           / D0             = D1 + {regpair2_or_limmreg23}
+;; ;;         +   {reg_pair}4.hi = ( D1 + {regpair2_or_limmreg23} ).hi
+;; ;;           \_______________________________________________________
+;; (define_insn "*daddh_peep2_insn"
+;;   [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D")
+;; 		   (plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D")
+;; 			    (match_operand:DF 2 "nonmemory_operand" "r,G")))
+;; 	     (use (match_operand:SI 3 "" "N,r"))
+;; 	     (set (match_operand:DF 4 "register_operand" "=r,r")
+;; 		  (plus:DF (match_dup 1)
+;; 			   (match_dup 2)))])]
+;;  "TARGET_DPFP"
+;;  "@
+;;     daddh%F0%F1 %H4, %H2, %L2
+;;     daddh%F0%F1 %H4, %3, %L2"
+;;  [(set_attr "type" "dpfp_addsub")
+;;  (set_attr "length" "4,8")]
+;; )
+;;            _______________________________________________________
+;;           / D0             = D1 + {regpair2_or_limmreg23}
+;;         +   {reg_pair}5.hi = ( D1 + {regpair2_or_limmreg23} ).hi
+;;           \_______________________________________________________
+(define_insn "*daddh_peep2_insn"
+  [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D")
+		   (plus:DF (match_operand:DF 1 "arc_double_register_operand" "D,D")
+			    (match_operand:DF 2 "nonmemory_operand" "r,G")))
+	     (use (match_operand:SI 3 "" "N,r"))
+	     (use (match_operand:SI 4 "" "N,Q"))
+	     (set (match_operand:DF 5 "register_operand" "=r,r")
+		  (plus:DF (match_dup 1)
+			   (match_dup 2)))])]
+ "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+ "@
+    daddh%F0%F1 %H5, %H2, %L2
+    daddh%F0%F1 %H5, %3, %L2"
+ [(set_attr "type" "dpfp_addsub")
+ (set_attr "length" "4,8")]
+)
+
+;;            _______________________________________________________
+;;           / D0             = D1 * {regpair2_or_limmreg23}
+;;         +   {reg_pair}5.hi = ( D1 * {regpair2_or_limmreg23} ).hi
+;;           \_______________________________________________________
+(define_insn "*dmulh_peep2_insn"
+  [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D")
+		   (mult:DF (match_operand:DF 1 "arc_double_register_operand" "D,D")
+			    (match_operand:DF 2 "nonmemory_operand" "r,G")))
+	     (use (match_operand:SI 3 "" "N,r"))
+	     (use (match_operand:SI 4 "" "N,Q"))
+	     (set (match_operand:DF 5 "register_operand" "=r,r")
+		  (mult:DF (match_dup 1)
+				      (match_dup 2)))])]
+ "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+ "@
+    dmulh%F0%F1 %H5, %H2, %L2
+    dmulh%F0%F1 %H5, %3, %L2"
+ [(set_attr "type" "dpfp_mult")
+ (set_attr "length" "4,8")]
+)
+
+;;            _______________________________________________________
+;;           / D0             = D1 - {regpair2_or_limmreg23}
+;;         +   {reg_pair}5.hi = ( D1 - {regpair2_or_limmreg23} ).hi
+;;           \_______________________________________________________
+;;  OR
+;;            _______________________________________________________
+;;           / D0             = {regpair1_or_limmreg13} - D2
+;;         +   {reg_pair}5.hi = ( {regpair1_or_limmreg13} ).hi - D2
+;;           \_______________________________________________________
+(define_insn "*dsubh_peep2_insn"
+  [(parallel [(set (match_operand:DF 0 "arc_double_register_operand" "=D,D,D,D")
+		   (minus:DF (match_operand:DF 1 "nonmemory_operand" "D,D,r,G")
+			     (match_operand:DF 2 "nonmemory_operand" "r,G,D,D")))
+	     (use (match_operand:SI 3 "" "N,r,N,r"))
+	     (use (match_operand:SI 4 "" "N,Q,N,Q"))
+	     (set (match_operand:DF 5 "register_operand" "=r,r,r,r")
+		  (minus:DF (match_dup 1)
+				      (match_dup 2)))])]
+ "TARGET_DPFP &&
+   !(GET_CODE(operands[2]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)  &&
+   !(GET_CODE(operands[1]) == CONST_DOUBLE && GET_CODE(operands[3]) == CONST_INT)"
+ "@
+  dsubh%F0%F1 %H5, %H2, %L2
+  dsubh%F0%F1 %H5, %3, %L2
+  drsubh%F0%F2 %H5, %H1, %L1
+  drsubh%F0%F2 %H5, %3, %L1"
+ [(set_attr "type" "dpfp_addsub")
+  (set_attr "length" "4,8,4,8")]
+)
+
+;; dexcl a,b,c pattern generated by the peephole2 above
+(define_insn "*dexcl_3op_peep2_insn"
+  [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
+		   (unspec_volatile:SI [(match_operand:DF 1 "arc_double_register_operand" "=D")] VUNSPEC_LR ))
+	     (set (match_dup 1) (match_operand:DF 2 "register_operand" "r"))]
+	    )
+  ]
+"TARGET_DPFP"
+"dexcl%F1 %0,%H2,%L2"
+ [(set_attr "type" "move")
+  (set_attr "length" "4")]
+)
diff --git a/gcc/config/arc/gmon/auxreg.h b/gcc/config/arc/gmon/auxreg.h
new file mode 100644
index 00000000000..bcaf571853b
--- /dev/null
+++ b/gcc/config/arc/gmon/auxreg.h
@@ -0,0 +1,10 @@
+#define	LP_START	0x02
+#define	LP_END		0x03
+#define IDENTITY	0x04
+#define STATUS32	0x0a
+#define	COUNT0		0x21 /* Timer 0 count */
+#define	CONTROL0	0x22 /* Timer 0 control */
+#define	LIMIT0		0x23 /* Timer 0 limit */
+#define INT_VECTOR_BASE	0x25
+#define D_CACHE_BUILD	0x72
+#define DC_FLDL		0x4c
diff --git a/gcc/config/arc/gmon/dcache_linesz.S b/gcc/config/arc/gmon/dcache_linesz.S
new file mode 100644
index 00000000000..d71383943c6
--- /dev/null
+++ b/gcc/config/arc/gmon/dcache_linesz.S
@@ -0,0 +1,30 @@
+#include "../asm.h"
+#include "auxreg.h"
+/*  This file contains code to do profiling.  */
+#ifndef __A4__
+	.weak	__profile_timer_cycles
+	.global __profile_timer_cycles
+	.set	__profile_timer_cycles, 200
+	.text
+	; For Arctangent-A5, if no data cache is present, a read of the
+	; cache build register returns the ID register.  For ARC600 and
+	; later, the version field will be zero.
+	.global	__dcache_linesz
+	.balign	4
+__dcache_linesz:
+	lr	r12,[D_CACHE_BUILD]
+	extb_s	r0,r12
+	breq_s	r0,0,.Lsz_nocache
+	brge	r0,0x20,.Lsz_havecache
+	lr	r0,[IDENTITY]
+	breq	r12,r0,.Lsz_nocache
+.Lsz_havecache:
+	lsr_s	r12,r12,16
+	mov_s	r0,16
+	bmsk_s	r12,r12,3
+	asl_s	r0,r0,r12
+	j_s	[blink]
+.Lsz_nocache:
+	mov_s	r0,1
+	j_s	[blink]
+#endif /* !A4 */
diff --git a/gcc/config/arc/gmon/gmon.c b/gcc/config/arc/gmon/gmon.c
new file mode 100644
index 00000000000..5a26205bb45
--- /dev/null
+++ b/gcc/config/arc/gmon/gmon.c
@@ -0,0 +1,450 @@
+/*-
+ * Copyright (c) 1983, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * Copyright (c) 2007 Free Software Foundation, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#if 0
+#include <sys/param.h>
+#include <sys/time.h>
+#endif
+#include <sys/gmon.h>
+#include <sys/gmon_out.h>
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#if 0
+#include <libc-internal.h>
+#include <not-cancel.h>
+
+#ifdef USE_IN_LIBIO
+# include <wchar.h>
+#endif
+#endif
+#define internal_function
+#define weak_alias(fun,aliasid) extern __typeof(fun) aliasid __attribute__ ((weak, alias (#fun)));
+#define __libc_enable_secure 0
+
+/*  Head of basic-block list or NULL. */
+struct __bb *__bb_head attribute_hidden;
+
+struct gmonparam _gmonparam attribute_hidden = { GMON_PROF_OFF };
+
+/*
+ * See profil(2) where this is described:
+ */
+static int	s_scale;
+#define		SCALE_1_TO_1	0x10000L
+
+#define ERR(s) write (STDERR_FILENO, s, sizeof (s) - 1)
+
+void moncontrol (int mode);
+void __moncontrol (int mode);
+static void write_hist (int fd) internal_function;
+static void write_call_graph (int fd) internal_function;
+static void write_bb_counts (int fd) internal_function;
+
+/*
+ * Control profiling
+ *	profiling is what mcount checks to see if
+ *	all the data structures are ready.
+ */
+void
+__moncontrol (int mode)
+{
+  struct gmonparam *p = &_gmonparam;
+
+  /* Don't change the state if we ran into an error.  */
+  if (p->state == GMON_PROF_ERROR)
+    return;
+
+  if (mode)
+    {
+      /* start */
+      __profil((void *) p->kcount, p->kcountsize, p->lowpc, s_scale);
+      p->state = GMON_PROF_ON;
+    }
+  else
+    {
+      /* stop */
+      __profil(NULL, 0, 0, 0);
+      p->state = GMON_PROF_OFF;
+    }
+}
+weak_alias (__moncontrol, moncontrol)
+
+
+void
+__monstartup (u_long lowpc, u_long highpc)
+{
+  register int o;
+  char *cp;
+  struct gmonparam *p = &_gmonparam;
+  int linesz;
+
+  /*
+   * round lowpc and highpc to multiples of the density we're using
+   * so the rest of the scaling (here and in gprof) stays in ints.
+   */
+  p->lowpc = ROUNDDOWN(lowpc, HISTFRACTION * sizeof(HISTCOUNTER));
+  if (sizeof *p->froms % sizeof(HISTCOUNTER) != 0)
+    {
+      p->highpc = ROUNDUP(highpc, HISTFRACTION * sizeof(HISTCOUNTER));
+      p->textsize = p->highpc - p->lowpc;
+      p->kcountsize = ROUNDUP((p->textsize + HISTFRACTION - 1) / HISTFRACTION,
+			      sizeof (*p->froms));
+    }
+  else
+    {
+      /* Avoid odd scales by rounding up highpc to get kcountsize rounded.  */
+      p->textsize = ROUNDUP (highpc - p->lowpc,
+			     HISTFRACTION * sizeof (*p->froms));
+      p->highpc = p->lowpc + p->textsize;
+      p->kcountsize = p->textsize / HISTFRACTION;
+    }
+  p->hashfraction = HASHFRACTION;
+  p->log_hashfraction = -1;
+  /* The following test must be kept in sync with the corresponding
+     test in mcount.c.  */
+  if ((HASHFRACTION & (HASHFRACTION - 1)) == 0) {
+      /* if HASHFRACTION is a power of two, mcount can use shifting
+	 instead of integer division.  Precompute shift amount. */
+      p->log_hashfraction = ffs(p->hashfraction * sizeof(*p->froms)) - 1;
+  }
+  p->tolimit = p->textsize * ARCDENSITY / 100;
+  if (p->tolimit < MINARCS)
+    p->tolimit = MINARCS;
+  else if (p->tolimit > MAXARCS)
+    p->tolimit = MAXARCS;
+  p->tossize = p->tolimit * sizeof(struct tostruct);
+
+  /* p->kcount must not share cache lines with the adjacent data, because
+     we use uncached accesses while profiling.  */
+  linesz = __dcache_linesz ();
+  cp = calloc (ROUNDUP (p->kcountsize, linesz) + p->tossize
+	       + (linesz - 1), 1);
+  if (! cp)
+    {
+      ERR("monstartup: out of memory\n");
+      p->tos = NULL;
+      p->state = GMON_PROF_ERROR;
+      /* In case we loose the error state due to a race,
+	 prevent invalid writes also by clearing tolimit.  */
+      p->tolimit = 0;
+      return;
+    }
+  p->tos = (struct tostruct *)cp;
+  cp += p->tossize;
+  cp = (char *) ROUNDUP ((ptrdiff_t) cp, linesz);
+  p->kcount = (HISTCOUNTER *)cp;
+  cp += ROUNDUP (p->kcountsize, linesz);
+
+  p->tos[0].link = 0;
+
+  o = p->highpc - p->lowpc;
+  if (p->kcountsize < (u_long) o)
+    {
+#ifndef hp300
+      s_scale = ((float)p->kcountsize / o ) * SCALE_1_TO_1;
+#else
+      /* avoid floating point operations */
+      int quot = o / p->kcountsize;
+
+      if (quot >= 0x10000)
+	s_scale = 1;
+      else if (quot >= 0x100)
+	s_scale = 0x10000 / quot;
+      else if (o >= 0x800000)
+	s_scale = 0x1000000 / (o / (p->kcountsize >> 8));
+      else
+	s_scale = 0x1000000 / ((o << 8) / p->kcountsize);
+#endif
+    } else
+      s_scale = SCALE_1_TO_1;
+
+  __moncontrol(1);
+}
+weak_alias (__monstartup, monstartup)
+
+
+static void
+internal_function
+write_hist (int fd)
+{
+  u_char tag = GMON_TAG_TIME_HIST;
+  struct arc_gmon_hist_hdr thdr __attribute__ ((aligned (__alignof__ (char *))));
+  int r;
+
+  if (_gmonparam.kcountsize > 0)
+    {
+      *(char **) thdr.low_pc = (char *) _gmonparam.lowpc;
+      *(char **) thdr.high_pc = (char *) _gmonparam.highpc;
+      *(int32_t *) thdr.hist_size = (_gmonparam.kcountsize
+				     / sizeof (HISTCOUNTER));
+      *(int32_t *) thdr.prof_rate = __profile_frequency ();
+      strncpy (thdr.dimen, "seconds", sizeof (thdr.dimen));
+      thdr.dimen_abbrev = 's';
+
+      r = write (fd, &tag, sizeof tag);
+      if (r != sizeof tag)
+	return;
+      r = write (fd, &thdr, sizeof thdr);
+      if (r != sizeof thdr)
+	return;
+      r = write (fd,_gmonparam.kcount, _gmonparam.kcountsize);
+      if ((unsigned) r != _gmonparam.kcountsize)
+	return;
+    }
+}
+
+
+static void
+internal_function
+write_call_graph (int fd)
+{
+#define NARCS_PER_WRITE	64
+#define BYTES_PER_ARC (1 + sizeof (struct gmon_cg_arc_record))
+#define BYTES_PER_WRITE (BYTES_PER_ARC * NARCS_PER_WRITE)
+  ARCINDEX to_index;
+  u_long frompc, selfpc, count;
+  char buffer[BYTES_PER_WRITE], *p;
+  u_long *prof_desc = __arc_profile_desc_secstart;
+  u_long *prof_count = __arc_profile_counters_secstart;
+  u_long *prof_desc_end = __arc_profile_desc_secend;
+  u_long *prof_forward = __arc_profile_forward_secstart;
+
+  for (p = buffer; p < buffer + BYTES_PER_WRITE; p += BYTES_PER_ARC)
+    *p = GMON_TAG_CG_ARC;
+  p = buffer;
+  frompc = *prof_desc++ & -2;
+  while (prof_desc < prof_desc_end)
+    {
+      selfpc = *prof_desc++;
+      if (selfpc & 1)
+	{
+	  frompc = selfpc & -2;
+	  selfpc = *prof_desc++;
+	}
+      count = *prof_count++;
+      if (selfpc)
+	{
+	  struct arc
+	    {
+	      char *frompc;
+	      char *selfpc;
+	      int32_t count;
+	    }
+	  arc;
+
+	  if (!count)
+	    continue;
+	  arc.frompc = (char *) frompc;
+	  arc.selfpc = (char *) selfpc;
+	  arc.count  = count;
+	  memcpy (p + 1, &arc, sizeof arc);
+	  p += 1 + sizeof arc;
+
+	  if (p == buffer + BYTES_PER_WRITE)
+	    {
+	      write (fd, buffer, BYTES_PER_WRITE);
+	      p = buffer;
+	    }
+	}
+      else
+	{
+	  for (to_index = count;
+	       to_index != 0;
+	       to_index = _gmonparam.tos[to_index].link)
+	    {
+	      struct arc
+		{
+		  char *frompc;
+		  char *selfpc;
+		  int32_t count;
+		}
+	      arc;
+
+	      arc.frompc = (char *) frompc;
+	      arc.selfpc = (char *) _gmonparam.tos[to_index].selfpc;
+	      arc.count  = _gmonparam.tos[to_index].count;
+	      memcpy (p + 1, &arc, sizeof arc);
+	      p += 1 + sizeof arc;
+
+	      if (p == buffer + BYTES_PER_WRITE)
+		{
+		  write (fd, buffer, BYTES_PER_WRITE);
+		  p = buffer;
+		}
+	    }
+	}
+    }
+  while (prof_forward < __arc_profile_forward_secend)
+    {
+      /* ??? The 'call count' is actually supposed to be a fixed point
+	 factor, with 16 bits each before and after the point.
+	 It would be much nicer if we figured out the actual number
+	 of calls to the caller, and multiplied that with the fixed point
+	 factor to arrive at the estimated calls for the callee.  */
+      memcpy (p + 1, prof_forward, 3 * sizeof *prof_forward);
+      prof_forward += 3;
+      p += 1 + 3 * sizeof *prof_forward;
+      if (p == buffer + BYTES_PER_WRITE)
+	{
+	  write (fd, buffer, BYTES_PER_WRITE);
+	  p = buffer;
+	}
+    }
+  if (p != buffer)
+    write (fd, buffer, p - buffer);
+}
+
+
+static void
+internal_function
+write_bb_counts (int fd)
+{
+  struct __bb *grp;
+  u_char tag = GMON_TAG_BB_COUNT;
+  size_t ncounts;
+  size_t i;
+
+  struct { unsigned long address; long count; } bbbody[8];
+  size_t nfilled;
+
+  /* Write each group of basic-block info (all basic-blocks in a
+     compilation unit form a single group). */
+
+  for (grp = __bb_head; grp; grp = grp->next)
+    {
+      ncounts = grp->ncounts;
+      write (fd, &tag, 1);
+      write (fd, &ncounts, sizeof ncounts);
+      for (nfilled = i = 0; i < ncounts; ++i)
+	{
+	  if (nfilled == sizeof (bbbody) / sizeof (bbbody[0]))
+	    {
+	      write (fd, bbbody, sizeof bbbody);
+	      nfilled = 0;
+	    }
+
+	  bbbody[nfilled].address = grp->addresses[i];
+	  bbbody[nfilled++].count = grp->counts[i];
+	}
+      if (nfilled > 0)
+	write (fd, bbbody, nfilled * sizeof bbbody[0]);
+    }
+}
+
+
+static void
+write_gmon (void)
+{
+    struct gmon_hdr ghdr __attribute__ ((aligned (__alignof__ (int))));
+    int fd = -1;
+    char *env;
+
+#ifndef O_NOFOLLOW
+# define O_NOFOLLOW	0
+#endif
+
+    env = getenv ("GMON_OUT_PREFIX");
+    if (env != NULL && !__libc_enable_secure)
+      {
+	size_t len = strlen (env);
+	char buf[len + 20];
+	snprintf (buf, sizeof (buf), "%s.%u", env, getpid ());
+	fd = open (buf, O_CREAT|O_TRUNC|O_WRONLY|O_NOFOLLOW, 0666);
+      }
+
+    if (fd == -1)
+      {
+	fd = open ("gmon.out", O_CREAT|O_TRUNC|O_WRONLY|O_NOFOLLOW,
+			      0666);
+	if (fd < 0)
+	  {
+	    perror ("_mcleanup: gmon.out");
+	    return;
+	  }
+      }
+
+    /* write gmon.out header: */
+    memset (&ghdr, '\0', sizeof (struct gmon_hdr));
+    memcpy (&ghdr.cookie[0], GMON_MAGIC, sizeof (ghdr.cookie));
+    *(int32_t *) ghdr.version = GMON_VERSION;
+    write (fd, &ghdr, sizeof (struct gmon_hdr));
+
+    /* write PC histogram: */
+    write_hist (fd);
+
+    /* write call-graph: */
+    write_call_graph (fd);
+
+    /* write basic-block execution counts: */
+    write_bb_counts (fd);
+
+    close (fd);
+}
+
+
+void
+__write_profiling (void)
+{
+  int save = _gmonparam.state;
+  _gmonparam.state = GMON_PROF_OFF;
+  if (save == GMON_PROF_ON)
+    write_gmon ();
+  _gmonparam.state = save;
+}
+#ifndef SHARED
+/* This symbol isn't used anywhere in the DSO and it is not exported.
+   This would normally mean it should be removed to get the same API
+   in static libraries.  But since profiling is special in static libs
+   anyway we keep it.  But not when building the DSO since some
+   quality assurance tests will otherwise trigger.  */
+weak_alias (__write_profiling, write_profiling)
+#endif
+
+
+void
+_mcleanup (void)
+{
+  __moncontrol (0);
+
+  if (_gmonparam.state != GMON_PROF_ERROR)
+    write_gmon ();
+
+  /* free the memory. */
+  if (_gmonparam.tos != NULL)
+    free (_gmonparam.tos);
+}
diff --git a/gcc/config/arc/gmon/machine-gmon.h b/gcc/config/arc/gmon/machine-gmon.h
new file mode 100644
index 00000000000..2ed449c38ec
--- /dev/null
+++ b/gcc/config/arc/gmon/machine-gmon.h
@@ -0,0 +1,40 @@
+#ifndef MACHINE_GMON_H
+#define MACHINE_GMON_H
+
+/* We can't fake out own <sys/types.h> header because the newlib / uclibc
+   headers in GCC_FOR_TARGET take precedence.  */
+
+#define __BEGIN_DECLS
+#define __END_DECLS
+
+#define __THROW
+
+extern int __dcache_linesz (void);
+
+#define _MCOUNT_DECL(countp, selfpc) \
+  static inline void _mcount_internal (void *countp, u_long selfpc)
+
+extern void _mcount (void);
+extern void _mcount_call (void);
+
+/* N.B.: the calling point might be a sibcall, thus blink does not necessarily
+  hold the caller's address.  r8 doesn't hold the caller's address, either,
+  but rather a pointer to the counter data structure associated with the
+  caller.
+  This function must be compiled with optimization turned on in order to
+  enable a sibcall for the final call to selfpc; this is important when trying
+  to profile a program with deep tail-recursion that would get a stack
+  overflow otherwise.  */
+#define MCOUNT \
+void \
+_mcount_call (void) \
+{ \
+  register void *countp __asm("r8"); \
+  register u_long selfpc __asm("r9"); \
+  _mcount_internal (countp, selfpc); \
+  ((void (*)(void)) selfpc) (); \
+}
+
+extern int __profil (u_short *,size_t, size_t, u_int);
+
+#endif /* MACHINE_GMON_H */
diff --git a/gcc/config/arc/gmon/mcount.c b/gcc/config/arc/gmon/mcount.c
new file mode 100644
index 00000000000..4210d1cc9d2
--- /dev/null
+++ b/gcc/config/arc/gmon/mcount.c
@@ -0,0 +1,204 @@
+/*-
+ * Copyright (c) 1983, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(lint) && !defined(KERNEL) && defined(LIBC_SCCS)
+static char sccsid[] = "@(#)mcount.c	8.1 (Berkeley) 6/4/93";
+#endif
+
+#if 0
+#include <unistd.h>
+#include <sys/param.h>
+#endif
+#include <sys/gmon.h>
+
+/* This file provides the machine-dependent definitions of the _MCOUNT_DECL
+   and MCOUNT macros.  */
+#include <machine-gmon.h>
+
+#include <atomic.h>
+
+/*
+ * mcount is called on entry to each function compiled with the profiling
+ * switch set.  _mcount(), which is declared in a machine-dependent way
+ * with _MCOUNT_DECL, does the actual work and is either inlined into a
+ * C routine or called by an assembly stub.  In any case, this magic is
+ * taken care of by the MCOUNT definition in <machine/profile.h>.
+ *
+ * _mcount updates data structures that represent traversals of the
+ * program's call graph edges.  frompc and selfpc are the return
+ * address and function address that represents the given call graph edge.
+ *
+ * Note: the original BSD code used the same variable (frompcindex) for
+ * both frompcindex and frompc.  Any reasonable, modern compiler will
+ * perform this optimization.
+ */
+_MCOUNT_DECL(count_ptr, selfpc)	/* _mcount; may be static, inline, etc */
+{
+	register ARCINDEX *frompcindex;
+	register struct tostruct *top, *prevtop;
+	register struct gmonparam *p;
+	register ARCINDEX toindex;
+
+	/* Check for nested function trampoline.  */
+	if (selfpc & 2)
+	  selfpc = *(u_long *) (selfpc + 10);
+
+	p = &_gmonparam;
+	/*
+	 * check that we are profiling
+	 * and that we aren't recursively invoked.
+	 */
+#if 0
+	if (catomic_compare_and_exchange_bool_acq (&p->state, GMON_PROF_BUSY,
+						   GMON_PROF_ON))
+	  return;
+#elif defined (__ARC700__)
+/* ??? This could temporrarily loose the ERROR / OFF condition in a race,
+   but doing an actual compare_and_exchange would be too costly.  It would
+   be better if we had a semaphore independent of the 'sticky' state, but
+   then we could run into ABI compatibility problems with the size of struct
+   gmonparam.  */
+	{
+	  u_long old_state;
+
+	  __asm ("ex %0,%1": "=r" (old_state), "+m" (p->state)
+		 : "0" (GMON_PROF_BUSY));
+	  if (old_state != GMON_PROF_ON)
+	    {
+	      switch (old_state)
+		{
+		case GMON_PROF_OFF:
+		  __asm ("ex %0,%1": "+r" (old_state), "+m" (p->state));
+		  if (old_state == GMON_PROF_BUSY
+		      /* Switching off while we say we are busy while profiling
+			 was actually already switched off is all right.  */
+		      || old_state == GMON_PROF_OFF)
+		    break;
+		  /* It is not clear if we should allow switching on
+		     profiling at this point, and how to handle further races.
+		     For now, record an error in this case.  */
+		  /* Fall through.  */
+		default: /* We expect here only GMON_PROF_ERROR.  */
+		  p->state = GMON_PROF_ERROR;
+		  break;
+		case GMON_PROF_BUSY: break;
+		}
+	      return;
+	    }
+	}
+#else /* ??? No semaphore primitives available.  */
+	if (p->state != GMON_PROF_ON)
+	  return;
+	p->state = GMON_PROF_BUSY;
+#endif
+
+	frompcindex = count_ptr;
+	toindex = *frompcindex;
+	if (toindex == 0) {
+		/*
+		 *	first time traversing this arc
+		 */
+		toindex = ++p->tos[0].link;
+		if (toindex >= (ARCINDEX) p->tolimit)
+			/* halt further profiling */
+			goto overflow;
+
+		*frompcindex = toindex;
+		top = &p->tos[toindex];
+		top->selfpc = selfpc;
+		top->count = 1;
+		top->link = 0;
+		goto done;
+	}
+	top = &p->tos[toindex];
+	if (top->selfpc == selfpc) {
+		/*
+		 * arc at front of chain; usual case.
+		 */
+		top->count++;
+		goto done;
+	}
+	/*
+	 * have to go looking down chain for it.
+	 * top points to what we are looking at,
+	 * prevtop points to previous top.
+	 * we know it is not at the head of the chain.
+	 */
+	for (; /* goto done */; ) {
+		if (top->link == 0) {
+			/*
+			 * top is end of the chain and none of the chain
+			 * had top->selfpc == selfpc.
+			 * so we allocate a new tostruct
+			 * and link it to the head of the chain.
+			 */
+			toindex = ++p->tos[0].link;
+			if (toindex >= (ARCINDEX) p->tolimit)
+				goto overflow;
+
+			top = &p->tos[toindex];
+			top->selfpc = selfpc;
+			top->count = 1;
+			top->link = *frompcindex;
+			*frompcindex = toindex;
+			goto done;
+		}
+		/*
+		 * otherwise, check the next arc on the chain.
+		 */
+		prevtop = top;
+		top = &p->tos[top->link];
+		if (top->selfpc == selfpc) {
+			/*
+			 * there it is.
+			 * increment its count
+			 * move it to the head of the chain.
+			 */
+			top->count++;
+			toindex = prevtop->link;
+			prevtop->link = top->link;
+			top->link = *frompcindex;
+			*frompcindex = toindex;
+			goto done;
+		}
+
+	}
+done:
+	p->state = GMON_PROF_ON;
+	return;
+overflow:
+	p->state = GMON_PROF_ERROR;
+	return;
+}
+
+/*
+ * Actual definition of mcount function.  Defined in <machine/profile.h>,
+ * which is included by <sys/gmon.h>.
+ */
+MCOUNT
diff --git a/gcc/config/arc/gmon/prof-freq-stub.S b/gcc/config/arc/gmon/prof-freq-stub.S
new file mode 100644
index 00000000000..f9ec1a2e530
--- /dev/null
+++ b/gcc/config/arc/gmon/prof-freq-stub.S
@@ -0,0 +1,16 @@
+#include "../asm.h"
+/*  This file contains code to do profiling.  */
+#ifndef __A4__
+	.weak	__profile_frequency_value
+	.global __profile_frequency_value
+	.set	__profile_frequency_value, 1000
+	.text
+	.balign	4
+	.global __profile_frequency
+	FUNC(__profile_frequency)
+__profile_frequency:
+	mov_s	r0,__profile_frequency_value
+	j_s	[blink]
+	ENDFUNC(__profile_frequency)
+
+#endif /* !A4 */
diff --git a/gcc/config/arc/gmon/prof-freq.c b/gcc/config/arc/gmon/prof-freq.c
new file mode 100644
index 00000000000..b9d517688eb
--- /dev/null
+++ b/gcc/config/arc/gmon/prof-freq.c
@@ -0,0 +1,58 @@
+/* Return frequency of ticks reported by profil.  Generic version. */
+/*-
+ * Copyright (c) 1983, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#include <sys/types.h>
+#include <sys/time.h>
+#if 0
+#include <libc-internal.h>
+#else
+#include "sys/gmon.h"
+#endif
+
+int
+__profile_frequency (void)
+{
+  /*
+   * Discover the tick frequency of the machine if something goes wrong,
+   * we return 0, an impossible hertz.
+   */
+  struct itimerval tim;
+
+  tim.it_interval.tv_sec = 0;
+  tim.it_interval.tv_usec = 1;
+  tim.it_value.tv_sec = 0;
+  tim.it_value.tv_usec = 0;
+  setitimer(ITIMER_REAL, &tim, 0);
+  setitimer(ITIMER_REAL, 0, &tim);
+  if (tim.it_interval.tv_usec < 2)
+    return 0;
+  return (1000000 / tim.it_interval.tv_usec);
+}
diff --git a/gcc/config/arc/gmon/profil.S b/gcc/config/arc/gmon/profil.S
new file mode 100644
index 00000000000..0ab34543119
--- /dev/null
+++ b/gcc/config/arc/gmon/profil.S
@@ -0,0 +1,127 @@
+#include "../asm.h"
+#include "auxreg.h"
+/*  This file contains code to do profiling.  */
+#ifndef __A4__
+	.weak	__profile_timer_cycles
+	.global __profile_timer_cycles
+	.set	__profile_timer_cycles, 200
+
+        .section .bss
+        .global __profil_offset
+        .align 4
+        .type	__profil_offset, @object
+        .size	__profil_offset, 4
+__profil_offset:
+        .zero   4
+
+	.text
+	.global	__dcache_linesz
+	.global __profil
+	FUNC(__profil)
+.Lstop_profiling:
+	sr	r0,[CONTROL0]
+	j_s	[blink]
+	.balign	4
+__profil:
+.Lprofil:
+	breq_s	r0,0,.Lstop_profiling
+	; r0: buf r1: bufsiz r2: offset r3: scale
+	bxor.f	r3,r3,15; scale must be 0x8000, i.e. 1/2; generate 0.
+	push_s	blink
+	lsr_s	r2,r2,1
+	mov_s	r8,r0
+	flag.ne	1	; halt if wrong scale
+	sub_s	r0,r0,r2
+	st	r0,[__profil_offset]
+	bl	__dcache_linesz
+	pop_s	blink
+	bbit1.d	r0,0,nocache
+	mov_s	r0,r8
+#ifdef __ARC700__
+	add_s	r1,r1,31
+	lsr.f	lp_count,r1,5
+	lpne	2f
+	sr	r0,[DC_FLDL]
+	add_s	r0,r0,32
+#else /* !__ARC700__ */
+# FIX ME: set up loop according to cache line size
+	lr	r12,[D_CACHE_BUILD]
+	sub_s	r0,r0,16
+	sub_s	r1,r1,1
+	lsr_s	r12,r12,16
+	asr_s	r1,r1,4
+	bmsk_s	r12,r12,3
+	asr_s	r1,r1,r12
+	add.f	lp_count,r1,1
+	mov_s	r1,16
+	asl_s	r1,r1,r12
+	lpne	2f
+	add	r0,r0,r1
+	sr	r0,[DC_FLDL]
+#endif /* __ARC700__ */
+2:	b_s	.Lcounters_cleared
+nocache:
+.Lcounters_cleared:
+	lr	r1,[INT_VECTOR_BASE] ; disable timer0 interrupts
+	sr	r3,[CONTROL0]
+	sr	r3,[COUNT0]
+0:	ld_s	r0,[pcl,1f-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF
+0:	ld_s	r12,[pcl,1f+4-0b+((0b-.Lprofil) & 2)] ; 1f@GOTOFF + 4
+	st_s	r0,[r1,24]; timer0 uses vector3
+	st_s	r12,[r1,24+4]; timer0 uses vector3
+	;sr	10000,[LIMIT0]
+	sr	__profile_timer_cycles,[LIMIT0]
+	mov_s	r12,3	; enable timer interrupts; count only when not halted.
+	sr	r12,[CONTROL0]
+	lr	r12,[STATUS32]
+	bset_s	r12,r12,1 ; allow level 1 interrupts
+	flag	r12
+	mov_s	r0,0
+	j_s	[blink]
+	.balign	4
+1:	j	__profil_irq
+	ENDFUNC(__profil)
+
+	FUNC(__profil_irq)
+	.balign 4	; make final jump unaligned to avoid delay penalty
+	.balign 32,0,12	; make sure the code spans no more that two cache lines
+	nop_s
+__profil_irq:
+	push_s	r0
+	ld	r0,[__profil_offset]
+	push_s	r1
+	lsr	r1,ilink1,2
+	push_s	r2
+	ldw.as.di r2,[r0,r1]
+	add1	r0,r0,r1
+	ld_s	r1,[sp,4]
+	add_s	r2,r2,1
+	bbit1	r2,16,nostore
+	stw.di	r2,[r0]
+nostore:ld.ab	r2,[sp,8]
+	pop_s	r0
+	j.f	[ilink1]
+	ENDFUNC(__profil_irq)
+
+; could save one cycle if the counters were allocated at link time and
+; the contents of __profil_offset were pre-computed at link time, like this:
+#if 0
+; __profil_offset needs to be PROVIDEd as __profile_base-text/4
+	.global	__profil_offset
+	.balign 4
+__profil_irq:
+	push_s	r0
+	lsr	r0,ilink1,2
+	add1	r0,__profil_offset,r0
+	push_s	r1
+	ldw.di	r1,[r0]
+
+
+	add_s	r1,r1,1
+	bbit1	r1,16,nostore
+	stw.di	r1,[r0]
+nostore:pop_s	r1
+	pop_s	r0
+	j	[ilink1]
+#endif /* 0 */
+#endif /* !A4 */
diff --git a/gcc/config/arc/gmon/sys/gmon.h b/gcc/config/arc/gmon/sys/gmon.h
new file mode 100644
index 00000000000..b2cecdc8e71
--- /dev/null
+++ b/gcc/config/arc/gmon/sys/gmon.h
@@ -0,0 +1,217 @@
+/*-
+ * Copyright (c) 1982, 1986, 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ * Copyright (x) 2007 Arc International (UK) Ltd
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)gmon.h	8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef	_SYS_GMON_H
+#define	_SYS_GMON_H	1
+
+#if 0
+#include <features.h>
+#include <sys/types.h>
+#else
+#include <sys/types.h>
+#include "machine-gmon.h"
+#define attribute_hidden __attribute__ ((visibility("hidden")))
+#endif
+
+#include <stdint.h>
+
+/*
+ * See gmon_out.h for gmon.out format.
+ */
+
+/* structure emitted by "gcc -a".  This must match struct bb in
+   gcc/libgcc2.c.  It is OK for gcc to declare a longer structure as
+   long as the members below are present.  */
+struct __bb
+{
+  long			zero_word;
+  const char		*filename;
+  long			*counts;
+  long			ncounts;
+  struct __bb		*next;
+  const unsigned long	*addresses;
+};
+
+extern struct __bb *__bb_head;
+
+/*
+ * histogram counters are unsigned shorts (according to the kernel).
+ */
+#define	HISTCOUNTER	unsigned short
+
+/*
+ * fraction of text space to allocate for histogram counters here, 1/2
+ */
+#define	HISTFRACTION	2
+
+/*
+ * Fraction of text space to allocate for from hash buckets.
+ * The value of HASHFRACTION is based on the minimum number of bytes
+ * of separation between two subroutine call points in the object code.
+ * Given MIN_SUBR_SEPARATION bytes of separation the value of
+ * HASHFRACTION is calculated as:
+ *
+ *	HASHFRACTION = MIN_SUBR_SEPARATION / (2 * sizeof(short) - 1);
+ *
+ * For example, on the VAX, the shortest two call sequence is:
+ *
+ *	calls	$0,(r0)
+ *	calls	$0,(r0)
+ *
+ * which is separated by only three bytes, thus HASHFRACTION is
+ * calculated as:
+ *
+ *	HASHFRACTION = 3 / (2 * 2 - 1) = 1
+ *
+ * Note that the division above rounds down, thus if MIN_SUBR_FRACTION
+ * is less than three, this algorithm will not work!
+ *
+ * In practice, however, call instructions are rarely at a minimal
+ * distance.  Hence, we will define HASHFRACTION to be 2 across all
+ * architectures.  This saves a reasonable amount of space for
+ * profiling data structures without (in practice) sacrificing
+ * any granularity.
+ */
+#define	HASHFRACTION	2
+
+/*
+ * Percent of text space to allocate for tostructs.
+ * This is a heuristic; we will fail with a warning when profiling programs
+ * with a very large number of very small functions, but that's
+ * normally OK.
+ * 2 is probably still a good value for normal programs.
+ * Profiling a test case with 64000 small functions will work if
+ * you raise this value to 3 and link statically (which bloats the
+ * text size, thus raising the number of arcs expected by the heuristic).
+ */
+#define ARCDENSITY	3
+
+/*
+ * Always allocate at least this many tostructs.  This
+ * hides the inadequacy of the ARCDENSITY heuristic, at least
+ * for small programs.
+ */
+#define MINARCS		50
+
+/*
+ * The type used to represent indices into gmonparam.tos[].
+ */
+#define	ARCINDEX	u_long
+
+/* 
+ * Maximum number of arcs we want to allow.
+ * Used to be max representable value of ARCINDEX minus 2, but now 
+ * that ARCINDEX is a long, that's too large; we don't really want 
+ * to allow a 48 gigabyte table.
+ * The old value of 1<<16 wasn't high enough in practice for large C++
+ * programs; will 1<<20 be adequate for long?  FIXME
+ */
+#define MAXARCS		(1 << 20)
+
+struct tostruct {
+	u_long		selfpc;
+	long		count;
+	ARCINDEX	link;
+};
+
+/*
+ * a raw arc, with pointers to the calling site and
+ * the called site and a count.
+ */
+struct rawarc {
+	u_long	raw_frompc;
+	u_long	raw_selfpc;
+	long	raw_count;
+};
+
+/*
+ * general rounding functions.
+ */
+#define ROUNDDOWN(x,y)	(((x)/(y))*(y))
+#define ROUNDUP(x,y)	((((x)+(y)-1)/(y))*(y))
+
+/*
+ * The profiling data structures are housed in this structure.
+ */
+struct gmonparam {
+	long int	state;
+	u_short		*kcount;
+	u_long		kcountsize;
+	ARCINDEX	*froms;
+	u_long		fromssize;
+	struct tostruct	*tos;
+	u_long		tossize;
+	long		tolimit;
+	u_long		lowpc;
+	u_long		highpc;
+	u_long		textsize;
+	u_long		hashfraction;
+	long		log_hashfraction;
+};
+extern struct gmonparam _gmonparam;
+
+/*
+ * Possible states of profiling.
+ */
+#define	GMON_PROF_ON	0
+#define	GMON_PROF_BUSY	1
+#define	GMON_PROF_ERROR	2
+#define	GMON_PROF_OFF	3
+
+/*
+ * Sysctl definitions for extracting profiling information from the kernel.
+ */
+#define	GPROF_STATE	0	/* int: profiling enabling variable */
+#define	GPROF_COUNT	1	/* struct: profile tick count buffer */
+#define	GPROF_FROMS	2	/* struct: from location hash bucket */
+#define	GPROF_TOS	3	/* struct: destination/count structure */
+#define	GPROF_GMONPARAM	4	/* struct: profiling parameters (see above) */
+
+__BEGIN_DECLS
+
+/* Set up data structures and start profiling.  */
+extern void __monstartup (u_long __lowpc, u_long __highpc) __THROW;
+extern void monstartup (u_long __lowpc, u_long __highpc) __THROW;
+
+/* Clean up profiling and write out gmon.out.  */
+extern void _mcleanup (void) __THROW;
+
+extern void __write_profiling (void);
+extern int attribute_hidden __profile_frequency (void);
+
+extern u_long __arc_profile_desc_secstart[], __arc_profile_desc_secend[];
+extern u_long __arc_profile_forward_secstart[], __arc_profile_forward_secend[];
+extern u_long __arc_profile_counters_secstart[];
+
+__END_DECLS
+
+#endif /* sys/gmon.h */
diff --git a/gcc/config/arc/gmon/sys/gmon_out.h b/gcc/config/arc/gmon/sys/gmon_out.h
new file mode 100644
index 00000000000..180eb12bb89
--- /dev/null
+++ b/gcc/config/arc/gmon/sys/gmon_out.h
@@ -0,0 +1,30 @@
+#define GMON_TAG_TIME_HIST 0
+#define GMON_TAG_CG_ARC 1
+#define GMON_TAG_BB_COUNT 2
+
+#define GMON_MAGIC "gmon"
+#define GMON_VERSION 1
+
+struct arc_gmon_hist_hdr
+{
+  char low_pc[4];
+  char high_pc[4];
+  char hist_size[4];
+  char prof_rate[4];
+  char dimen[15];
+  char dimen_abbrev;
+};
+
+struct gmon_cg_arc_record
+{
+  char afrompc[4];
+  char selfpc[4];
+  char count[4];
+};
+
+struct gmon_hdr
+{
+  char cookie[4];
+  char version[4];
+  char c[12];
+};
diff --git a/gcc/config/arc/ieee-754/adddf3.S b/gcc/config/arc/ieee-754/adddf3.S
new file mode 100644
index 00000000000..4abbb5f9312
--- /dev/null
+++ b/gcc/config/arc/ieee-754/adddf3.S
@@ -0,0 +1,518 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+#if 0 /* DEBUG */
+	.global __adddf3
+	.balign 4
+__adddf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __adddf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __adddf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	bl abort
+	.global __subdf3
+	.balign 4
+__subdf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __subdf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __subdf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	bl abort
+#define __adddf3 __adddf3_asm
+#define __subdf3 __subdf3_asm
+#endif /* DEBUG */
+/* N.B. This is optimized for ARC700.
+  ARC600 has very different scheduling / instruction selection criteria.  */
+
+/* inputs: DBL0, DBL1 (r0-r3)
+   output: DBL0 (r0, r1)
+   clobber: r2-r10, r12, flags
+   All NaN highword bits must be 1.  NaN low word is random.  */
+
+	.balign 4
+	.global __adddf3
+	.global __subdf3
+	.long 0x7ff00000 ; exponent mask
+	FUNC(__adddf3)
+	FUNC(__subdf3)
+__subdf3:
+	bxor_l DBL1H,DBL1H,31
+__adddf3:
+	ld r9,[pcl,-8]
+	bmsk r4,DBL0H,30
+	xor r10,DBL0H,DBL1H
+	and r6,DBL1H,r9
+	sub.f r12,r4,r6
+	asr_s r12,r12,20
+	blo .Ldbl1_gt
+	brhs r4,r9,.Linf_nan
+	brhs r12,32,.Large_shift
+	brne r12,0,.Lsmall_shift
+	brge r10,0,.Ladd_same_exp ; r12 == 0
+/* After subtracting, we need to normalize; when shifting to place the
+  leading 1 into position for the implicit 1 and adding that to DBL0H,
+  we increment the exponent.  Thus, we have to subtract one more than
+  the shift count from the exponent beforehand.  Iff the exponent drops thus
+  below zero (before adding in the fraction with the leading one), we have
+  generated a denormal number.  Denormal handling is basicallly reducing the
+  shift count so that we produce a zero exponent instead; however, this way
+  the shift count can become zero (if we started out with exponent 1).
+  Therefore, a simple min operation is not good enough, since we don't
+  want to handle a zero normalizing shift in the main path.
+  On the plus side, we don't need to check for denorm input, the result
+  of subtracing these it looks just the same as denormals generated during
+  subtraction.  */
+	bmsk r7,DBL1H,30
+	cmp r4,r7
+	cmp.eq DBL0L,DBL1L
+	blo .L_rsub_same_exp
+	sub.f DBL0L,DBL0L,DBL1L
+	bmsk r12,DBL0H,19
+	bic DBL1H,DBL0H,r12
+	sbc.f r4,r4,r7
+	beq_l .Large_cancel
+	norm DBL1L,r4
+	b.d .Lsub_done_same_exp
+	sub r12,DBL1L,9
+.Linf_nan:
+	j_s.d [blink]
+	or.eq DBL0H,DBL0H,DBL1H
+	.balign 4
+.L_rsub_same_exp:
+	rsub.f DBL0L,DBL0L,DBL1L
+	bmsk r12,DBL1H,19
+	bic_s DBL1H,DBL1H,r12
+	sbc.f r4,r7,r4
+	beq_l .Large_cancel
+	norm DBL1L,r4
+
+	sub r12,DBL1L,9
+.Lsub_done_same_exp:
+	asl_s r12,r12,20
+	sub_s DBL1L,DBL1L,10
+	sub DBL0H,DBL1H,r12
+	xor.f 0,DBL0H,DBL1H
+	bmi .Ldenorm
+.Lpast_denorm:
+	neg_s r12,DBL1L
+	lsr r7,DBL0L,r12
+	asl r12,r4,DBL1L
+	asl_s DBL0L,DBL0L,DBL1L
+	add_s r12,r12,r7
+	j_s.d [blink]
+	add_l DBL0H,DBL0H,r12
+	.balign 4
+.Ladd_same_exp:
+	/* This is a special case because we can't test for need to shift
+	   down by checking if bit 20 of DBL0H changes.  OTOH, here we know
+	   that we always need to shift down.  */
+	; The implicit 1 of DBL0 is not shifted together with the
+	;  fraction, thus effectively doubled, compensating for not setting
+	;  implicit1 for DBL1
+	add_s r12,DBL0L,DBL1L
+	lsr.f 0,r12,2 ; round to even
+	breq r6,0,.Ldenorm_add
+	adc.f DBL0L,DBL0L,DBL1L
+	sub r7,DBL1H,DBL0H
+	sub1 r7,r7,r9 ; boost exponent by 2/2
+	rrc DBL0L,DBL0L
+	asr.f r7,r7 ; DBL1.fraction/2 - DBL0.fraction/2 ; exp++
+	add.cs.f DBL0L,DBL0L,0x80000000
+	add_l DBL0H,DBL0H,r7 ; DBL0.implicit1 not shifted for DBL1.implicit1
+	add.cs DBL0H,DBL0H,1
+	bic.f 0,r9,DBL0H ; check for overflow -> infinity.
+	jne_l [blink]
+	and DBL0H,DBL0H,0xfff00000
+	j_s.d [blink]
+	mov_s DBL0L,0
+	.balign 4
+.Large_shift:
+	brhs r12,55,.Lret_dbl0
+	bmsk_s DBL1H,DBL1H,19
+	brne r6,0,.Lno_denorm_large_shift
+	brhi.d r12,33,.Lfixed_denorm_large_shift
+	sub_s r12,r12,1
+	breq r12,31, .Lfixed_denorm_small_shift
+.Lshift32:
+	mov_s r12,DBL1L
+	mov_s DBL1L,DBL1H
+	brlt.d r10,0,.Lsub
+	mov_s DBL1H,0
+	b_s .Ladd
+.Ldenorm_add:
+	cmp_s r12,DBL1L
+	mov_s DBL0L,r12
+	j_s.d [blink]
+	adc DBL0H,r4,DBL1H
+
+.Lret_dbl0:
+	j_s [blink]
+	.balign 4
+.Lsmall_shift:
+	breq.d r6,0,.Ldenorm_small_shift
+	bmsk_s DBL1H,DBL1H,19
+	bset_s DBL1H,DBL1H,20
+.Lfixed_denorm_small_shift:
+	neg r8,r12
+	asl r4,DBL1H,r8
+	lsr_l DBL1H,DBL1H,r12
+	lsr r5,DBL1L,r12
+	asl r12,DBL1L,r8
+	brge.d r10,0,.Ladd
+	or DBL1L,r4,r5
+/* subtract, abs(DBL0) > abs(DBL1) */
+/* DBL0H, DBL0L: original values
+   DBL1H, DBL1L: fraction with explicit leading 1, shifted into place
+   r4:  orig. DBL0H & 0x7fffffff
+   r6:  orig. DBL1H & 0x7ff00000
+   r9:  0x7ff00000
+   r10: orig. DBL0H ^ DBL1H
+   r12: guard bits */
+	.balign 4
+.Lsub:
+	neg.f r12,r12
+	mov_s r7,DBL1H
+	bmsk r5,DBL0H,19
+	sbc.f DBL0L,DBL0L,DBL1L
+	bic DBL1H,DBL0H,r5
+	bset r5,r5,20
+	sbc.f r4,r5,r7
+	beq_l .Large_cancel_sub
+	norm DBL1L,r4
+	bmsk r6,DBL1H,30
+.Lsub_done:
+	sub_s DBL1L,DBL1L,9
+	breq DBL1L,1,.Lsub_done_noshift
+	asl r5,DBL1L,20
+	sub_s DBL1L,DBL1L,1
+	brlo r6,r5,.Ldenorm_sub
+	sub DBL0H,DBL1H,r5
+.Lpast_denorm_sub:
+	neg_s DBL1H,DBL1L
+	lsr r6,r12,DBL1H
+	asl_s r12,r12,DBL1L
+	and r8,r6,1
+	add1.f 0,r8,r12
+	add.ne.f r12,r12,r12
+	asl r8,DBL0L,DBL1L
+	lsr r12,DBL0L,DBL1H
+	adc.f DBL0L,r8,r6
+	asl r5,r4,DBL1L
+	add_s DBL0H,DBL0H,r12
+	j_s.d [blink]
+	adc DBL0H,DBL0H,r5
+
+	.balign 4
+.Lno_denorm_large_shift:
+	breq.d r12,32,.Lshift32
+	bset_l DBL1H,DBL1H,20
+.Lfixed_denorm_large_shift:
+	neg r8,r12
+	asl r4,DBL1H,r8
+	lsr r5,DBL1L,r12
+	asl.f 0,DBL1L,r8
+	lsr DBL1L,DBL1H,r12
+	or r12,r4,r5
+	tst.eq r12,1
+	or.ne r12,r12,2
+	brlt.d r10,0,.Lsub
+	mov_s DBL1H,0
+	b_l .Ladd
+
+	; If a denorm is produced without shifting, we have an exact result -
+	; no need for rounding.
+	.balign 4
+.Ldenorm_sub:
+	lsr DBL1L,r6,20
+	xor DBL0H,r6,DBL1H
+	brne.d DBL1L,1,.Lpast_denorm_sub
+	sub_s DBL1L,DBL1L,1
+.Lsub_done_noshift:
+	add.f 0,r12,r12
+	btst.eq DBL0L,0
+	cmp.eq r12,r12
+	add.cs.f DBL0L,DBL0L,1
+	bclr r4,r4,20
+	j_s.d [blink]
+	adc DBL0H,DBL1H,r4
+
+	.balign 4
+.Ldenorm_small_shift:
+	brne.d r12,1,.Lfixed_denorm_small_shift
+	sub_l r12,r12,1
+	brlt r10,0,.Lsub
+.Ladd: ; bit 20 of DBL1H is clear and bit 0 of r12 does not matter
+	add.f DBL0L,DBL0L,DBL1L
+	add_s DBL1H,DBL1H,DBL0H
+	add.cs DBL1H,DBL1H,1
+	xor_l DBL0H,DBL0H,DBL1H
+	bbit0 DBL0H,20,.Lno_shiftdown
+	lsr.f DBL0H,DBL1H
+	and r4,DBL0L,2
+	bmsk DBL0H,DBL0H,18
+	sbc DBL0H,DBL1H,DBL0H
+	rrc.f DBL0L,DBL0L
+	or.f r12,r12,r4
+	cmp.eq r12,r12
+	add.cs.f DBL0L,DBL0L,1
+	bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
+	jne.d [blink]    ; ... non-zero fraction
+	add.cs DBL0H,DBL0H,1
+	mov_s DBL0L,0
+	bmsk DBL1H,DBL0H,19
+	j_s.d [blink]
+	bic_s DBL0H,DBL0H,DBL1H
+.Lno_shiftdown:
+	mov_s DBL0H,DBL1H
+	add.f 0,r12,r12
+	btst.eq DBL0L,0
+	cmp.eq r12,r12
+	add.cs.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.cs DBL0H,DBL0H,1
+	.balign 4
+.Ldenorm:
+	bmsk DBL0H,DBL1H,30
+	lsr r12,DBL0H,20
+	xor_s DBL0H,DBL0H,DBL1H
+	sub_l DBL1L,r12,1
+	bne .Lpast_denorm
+	j_s.d [blink]
+	add_l DBL0H,DBL0H,r4
+	.balign 4
+.Large_cancel:
+	norm.f DBL1L,DBL0L
+	bmsk DBL0H,DBL1H,30
+	add_s DBL1L,DBL1L,22
+	mov.mi DBL1L,21
+	add_s r12,DBL1L,1
+	asl_s r12,r12,20
+	beq_s .Lret0
+	brhs.d DBL0H,r12,.Lpast_denorm_large_cancel
+	sub DBL0H,DBL1H,r12
+	bmsk DBL0H,DBL1H,30
+	lsr r12,DBL0H,20
+	xor_s DBL0H,DBL0H,DBL1H
+	sub.f DBL1L,r12,1
+	jeq_l [blink]
+.Lpast_denorm_large_cancel:
+	rsub.f r7,DBL1L,32
+	lsr r7,DBL0L,r7
+	asl DBL0L,DBL0L,DBL1L
+	mov.ls r7,DBL0L
+	add_s DBL0H,DBL0H,r7
+	j_s.d [blink]
+	mov.ls DBL0L,0
+.Lret0:
+	j_s.d	[blink]
+	mov_l	DBL0H,0
+
+/* r4:DBL0L:r12 : unnormalized result fraction
+   DBL1H: result sign and exponent         */
+/* When seeing large cancellation, only the topmost guard bit might be set.  */
+	.balign 4
+.Large_cancel_sub:
+	norm.f DBL1L,DBL0L
+	bpnz.d 0f
+	bmsk DBL0H,DBL1H,30
+	mov r5,22<<20
+	bne.d 1f
+	mov_s DBL1L,21
+	bset r5,r5,5+20
+	add_s DBL1L,DBL1L,32
+	brne r12,0,1f
+	j_s.d	[blink]
+	mov_l	DBL0H,0
+	.balign 4
+0:	add r5,DBL1L,23
+	asl r5,r5,20
+	add_s DBL1L,DBL1L,22
+1:	brlo DBL0H,r5,.Ldenorm_large_cancel_sub
+	sub DBL0H,DBL1H,r5
+.Lpast_denorm_large_cancel_sub:
+	rsub.f r7,DBL1L,32
+	lsr r12,r12,r7
+	lsr r7,DBL0L,r7
+	asl_s DBL0L,DBL0L,DBL1L
+	add.ge DBL0H,DBL0H,r7
+	add_s DBL0L,DBL0L,r12
+	add.lt DBL0H,DBL0H,DBL0L
+	mov.eq DBL0L,r12
+	j_s.d [blink]
+	mov.lt DBL0L,0
+	.balign 4
+.Ldenorm_large_cancel_sub:
+	lsr r5,DBL0H,20
+	xor_s DBL0H,DBL0H,DBL1H
+	brne.d r5,1,.Lpast_denorm_large_cancel_sub
+	sub DBL1L,r5,1
+	j_l [blink] ; denorm, no shift -> no rounding needed.
+
+/* r4: DBL0H & 0x7fffffff
+   r6: DBL1H & 0x7ff00000
+   r9: 0x7ff00000
+   r10: sign difference
+   r12: shift count (negative) */
+	.balign 4
+.Ldbl1_gt:
+	brhs r6,r9,.Lret_dbl1 ; inf or NaN
+	neg r8,r12
+	brhs r8,32,.Large_shift_dbl0
+.Lsmall_shift_dbl0:
+	breq.d r6,0,.Ldenorm_small_shift_dbl0
+	bmsk_s DBL0H,DBL0H,19
+	bset_s DBL0H,DBL0H,20
+.Lfixed_denorm_small_shift_dbl0:
+	asl r4,DBL0H,r12
+	lsr DBL0H,DBL0H,r8
+	lsr r5,DBL0L,r8
+	asl r12,DBL0L,r12
+	brge.d r10,0,.Ladd_dbl1_gt
+	or DBL0L,r4,r5
+/* subtract, abs(DBL0) < abs(DBL1) */
+/* DBL0H, DBL0L: fraction with explicit leading 1, shifted into place
+   DBL1H, DBL1L: original values
+   r6:  orig. DBL1H & 0x7ff00000
+   r9:  0x7ff00000
+   r12: guard bits */
+	.balign 4
+.Lrsub:
+	neg.f r12,r12
+	bmsk r7,DBL1H,19
+	mov_s r5,DBL0H
+	sbc.f DBL0L,DBL1L,DBL0L
+	bic DBL1H,DBL1H,r7
+	bset r7,r7,20
+	sbc.f r4,r7,r5
+	beq_l .Large_cancel_sub
+	norm DBL1L,r4
+	b_l .Lsub_done ; note: r6 is already set up.
+
+.Lret_dbl1:
+	mov_s DBL0H,DBL1H
+	j_s.d [blink]
+	mov_l DBL0L,DBL1L
+	.balign 4
+.Ldenorm_small_shift_dbl0:
+	sub.f r8,r8,1
+	bne.d .Lfixed_denorm_small_shift_dbl0
+	add_s r12,r12,1
+	brlt r10,0,.Lrsub
+.Ladd_dbl1_gt: ; bit 20 of DBL0H is clear and bit 0 of r12 does not matter
+	add.f DBL0L,DBL0L,DBL1L
+	add_s DBL0H,DBL0H,DBL1H
+	add.cs DBL0H,DBL0H,1
+	xor DBL1H,DBL0H,DBL1H
+	bbit0 DBL1H,20,.Lno_shiftdown_dbl1_gt
+	lsr.f DBL1H,DBL0H
+	and r4,DBL0L,2
+	bmsk DBL1H,DBL1H,18
+	sbc DBL0H,DBL0H,DBL1H
+	rrc.f DBL0L,DBL0L
+	or.f r12,r12,r4
+	cmp.eq r12,r12
+	add.cs.f DBL0L,DBL0L,1
+	bic.f 0,r9,DBL0H ; check for generating infinity with possible ...
+	jne.d [blink]    ; ... non-zero fraction
+	add.cs DBL0H,DBL0H,1
+	mov_s DBL0L,0
+	bmsk DBL1H,DBL0H,19
+	j_s.d [blink]
+	bic_s DBL0H,DBL0H,DBL1H
+.Lno_shiftdown_dbl1_gt:
+	add.f 0,r12,r12
+	btst.eq DBL0L,0
+	cmp.eq r12,r12
+	add.cs.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.cs DBL0H,DBL0H,1
+
+	.balign 4
+.Large_shift_dbl0:
+	brhs r8,55,.Lret_dbl1
+	bmsk_s DBL0H,DBL0H,19
+	brne r6,0,.Lno_denorm_large_shift_dbl0
+	add_s r12,r12,1
+	brne.d r8,33,.Lfixed_denorm_large_shift_dbl0
+	sub r8,r8,1
+	bset_s DBL0H,DBL0H,20
+.Lshift32_dbl0:
+	mov_s r12,DBL0L
+	mov_s DBL0L,DBL0H
+	brlt.d r10,0,.Lrsub
+	mov_s DBL0H,0
+	b_s .Ladd_dbl1_gt
+
+	.balign 4
+.Lno_denorm_large_shift_dbl0:
+	breq.d r8,32,.Lshift32_dbl0
+	bset_l DBL0H,DBL0H,20
+.Lfixed_denorm_large_shift_dbl0:
+	asl r4,DBL0H,r12
+	lsr r5,DBL0L,r8
+	asl.f 0,DBL0L,r12
+	lsr DBL0L,DBL0H,r8
+	or r12,r4,r5
+	tst.eq r12,1
+	or.ne r12,r12,2
+	brlt.d r10,0,.Lrsub
+	mov_s DBL0H,0
+	b_l .Ladd_dbl1_gt
+	ENDFUNC(__adddf3)
+	ENDFUNC(__subdf3)
diff --git a/gcc/config/arc/ieee-754/addsf3.S b/gcc/config/arc/ieee-754/addsf3.S
new file mode 100644
index 00000000000..623b998ad1f
--- /dev/null
+++ b/gcc/config/arc/ieee-754/addsf3.S
@@ -0,0 +1,344 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+#if 0 /* DEBUG */
+	.global __addsf3
+	FUNC(__addsf3)
+	.balign 4
+__addsf3:
+	push_s blink
+	push_s r1
+	bl.d __addsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __addsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__addsf3)
+	.global __subsf3
+	FUNC(__subsf3)
+	.balign 4
+__subsf3:
+	push_s blink
+	push_s r1
+	bl.d __subsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __subsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__subsf3)
+#define __addsf3 __addsf3_asm
+#define __subsf3 __subsf3_asm
+#endif /* DEBUG */
+/* N.B. This is optimized for ARC700.
+  ARC600 has very different scheduling / instruction selection criteria.  */
+
+/* inputs: DBL0, DBL1 (r0-r3)
+   output: DBL0 (r0, r1)
+   clobber: r2-r10, r12, flags
+   All NaN highword bits must be 1.  NaN low word is random.  */
+
+	.balign 4
+	.global __addsf3
+	.global __subsf3
+	FUNC(__addsf3)
+	FUNC(__subsf3)
+	.long 0x7f800000 ; exponent mask
+__subsf3:
+	bxor_l r1,r1,31
+__addsf3:
+	ld r9,[pcl,-8]
+	bmsk r4,r0,30
+	xor r10,r0,r1
+	and r6,r1,r9
+	sub.f r12,r4,r6
+	asr_s r12,r12,23
+	blo .Ldbl1_gt
+	brhs r4,r9,.Linf_nan
+	brne r12,0,.Lsmall_shift
+	brge r10,0,.Ladd_same_exp ; r12 == 0
+/* After subtracting, we need to normalize; when shifting to place the
+  leading 1 into position for the implicit 1 and adding that to DBL0,
+  we increment the exponent.  Thus, we have to subtract one more than
+  the shift count from the exponent beforehand.  Iff the exponent drops thus
+  below zero (before adding in the fraction with the leading one), we have
+  generated a denormal number.  Denormal handling is basicallly reducing the
+  shift count so that we produce a zero exponent instead; FWIW, this way
+  the shift count can become zero (if we started out with exponent 1).
+  On the plus side, we don't need to check for denorm input, the result
+  of subtracing these looks just the same as denormals generated during
+  subtraction.  */
+	bmsk r7,r1,30
+	breq	r4,r7,.Lret0
+	sub.f r5,r4,r7
+	lsr r12,r4,23
+	neg.cs r5,r5
+	norm r3,r5
+	bmsk r2,r0,22
+	sub_s r3,r3,6
+	min r12,r12,r3
+	bic r1,r0,r2
+	sub_s r3,r12,1
+	asl_s r12,r12,23
+	asl r2,r5,r3
+	sub_s r1,r1,r12
+	add_s r0,r1,r2
+	j_s.d [blink]
+	bxor.cs r0,r0,31
+.Linf_nan:
+	j_s.d [blink]
+	or.eq r0,r0,r1
+	.balign 4
+.Ladd_same_exp:
+	/* This is a special case because we can't test for need to shift
+	   down by checking if bit 23 of DBL0 changes.  OTOH, here we know
+	   that we always need to shift down.  */
+	; adding the two floating point numbers together makes the sign
+	; cancel out and apear as carry; the exponent is doubled, and the
+	; fraction also in need of shifting left by one. The two implicit
+	; ones of the sources make an implicit 1 of the result, again
+	; non-existent in a place shifted by one.
+	add.f	r0,r0,r1
+	btst_s	r0,1
+	breq	r6,0,.Ldenorm_add
+	add.ne	r0,r0,1 ; round to even.
+	rrc	r0,r0
+	bmsk	r1,r9,23
+	add	r0,r0,r1 ; increment exponent
+	bic.f	0,r9,r0; check for overflow -> infinity.
+	jne_l	[blink]
+	mov_s	r0,r9
+	j_s.d	[blink]
+	bset.cs	r0,r0,31
+
+.Ldenorm_add:
+	j_s.d [blink]
+	add r0,r4,r1
+
+.Lret_dbl0:
+        j_s [blink]
+
+	.balign 4
+.Lsmall_shift:
+	brhi r12,25,.Lret_dbl0
+	breq.d r6,0,.Ldenorm_small_shift
+	bmsk_s r1,r1,22
+	bset_s r1,r1,23
+.Lfixed_denorm_small_shift:
+	neg r8,r12
+	asl r5,r1,r8
+	brge.d r10,0,.Ladd
+	lsr_l r1,r1,r12
+/* subtract, abs(DBL0) > abs(DBL1) */
+/* DBL0: original values
+   DBL1: fraction with explicit leading 1, shifted into place
+   r4:  orig. DBL0 & 0x7fffffff
+   r6:  orig. DBL1 & 0x7f800000
+   r9:  0x7f800000
+   r10: orig. DBL0H ^ DBL1H
+   r5 : guard bits */
+	.balign 4
+.Lsub:
+	neg.f r12,r5
+	bmsk r3,r0,22
+	bset r5,r3,23
+	sbc.f r4,r5,r1
+	beq.d .Large_cancel_sub
+	bic r7,r0,r3
+	norm r3,r4
+	bmsk r6,r7,30
+.Lsub_done:
+	sub_s r3,r3,6
+	breq r3,1,.Lsub_done_noshift
+	asl r5,r3,23
+	sub_l r3,r3,1
+	brlo r6,r5,.Ldenorm_sub
+	sub r0,r7,r5
+	neg_s r1,r3
+	lsr.f r2,r12,r1
+	asl_s r12,r12,r3
+	btst_s	r2,0
+	bmsk.eq.f r12,r12,30
+	asl r5,r4,r3
+	add_s r0,r0,r2
+	adc.ne r0,r0,0
+	j_s.d [blink]
+	add_l r0,r0,r5
+
+.Lret0:
+	j_s.d	[blink]
+	mov_l	r0,0
+
+	.balign 4
+.Ldenorm_small_shift:
+	brne.d	r12,1,.Lfixed_denorm_small_shift
+	sub_s	r12,r12,1
+	brlt.d	r10,0,.Lsub
+	mov_s	r5,r12 ; zero r5, and align following code
+.Ladd: ; Both bit 23 of DBL1 and bit 0 of r5 are clear.
+	bmsk	r2,r0,22
+	add_s	r2,r2,r1
+	bbit0.d	r2,23,.Lno_shiftdown
+	add_s	r0,r0,r1
+	bic.f	0,r9,r0; check for overflow -> infinity; eq : infinity
+	bmsk	r1,r2,22
+	lsr.ne.f r2,r2,2; cc: even ; hi: might round down
+	lsr.ne	r1,r1,1
+	rcmp.hi	r5,1; hi : round down
+	bclr.hi	r0,r0,0
+	j_l.d	[blink]
+	sub_s	r0,r0,r1
+
+/* r4: DBL0H & 0x7fffffff
+   r6: DBL1H & 0x7f800000
+   r9: 0x7f800000
+   r10: sign difference
+   r12: shift count (negative) */
+	.balign 4
+.Ldbl1_gt:
+	brhs r6,r9,.Lret_dbl1 ; inf or NaN
+	neg r8,r12
+	brhi r8,25,.Lret_dbl1
+.Lsmall_shift_dbl0:
+	breq.d r6,0,.Ldenorm_small_shift_dbl0
+	bmsk_s r0,r0,22
+	bset_s r0,r0,23
+.Lfixed_denorm_small_shift_dbl0:
+	asl r5,r0,r12
+	brge.d r10,0,.Ladd_dbl1_gt
+	lsr r0,r0,r8
+/* subtract, abs(DBL0) < abs(DBL1) */
+/* DBL0: fraction with explicit leading 1, shifted into place
+   DBL1: original value
+   r6:  orig. DBL1 & 0x7f800000
+   r9:  0x7f800000
+   r5: guard bits */
+	.balign 4
+.Lrsub:
+	neg.f r12,r5
+	bmsk r5,r1,22
+	bic r7,r1,r5
+	bset r5,r5,23
+	sbc.f r4,r5,r0
+	bne.d .Lsub_done ; note: r6 is already set up.
+	norm r3,r4
+	/* Fall through */
+
+/* r4:r12 : unnormalized result fraction
+   r7: result sign and exponent         */
+/* When seeing large cancellation, only the topmost guard bit might be set.  */
+	.balign 4
+.Large_cancel_sub:
+	breq_s	r12,0,.Lret0
+	sub	r0,r7,24<<23
+	xor.f	0,r0,r7 ; test if exponent is negative
+	tst.pl	r9,r0  ; test if exponent is zero
+	jpnz	[blink] ; return if non-denormal result
+	bmsk	r6,r7,30
+	lsr	r3,r6,23
+	xor	r0,r6,r7
+	sub_s	r3,r3,24-22
+	j_s.d	[blink]
+	bset	r0,r0,r3
+
+	; If a denorm is produced, we have an exact result -
+	; no need for rounding.
+	.balign 4
+.Ldenorm_sub:
+	sub r3,r6,1
+	lsr.f r3,r3,23
+	xor r0,r6,r7
+	neg_s r1,r3
+	asl.ne r4,r4,r3
+	lsr_s r12,r12,r1
+	add_s r0,r0,r4
+	j_s.d [blink]
+	add.ne r0,r0,r12
+
+	.balign 4
+.Lsub_done_noshift:
+	add.f 0,r12,r12
+	btst.eq r4,0
+	bclr r4,r4,23
+	add r0,r7,r4
+	j_s.d [blink]
+	adc.ne r0,r0,0
+
+	.balign 4
+.Lno_shiftdown:
+	add.f 0,r5,r5
+	btst.eq r0,0
+	cmp.eq r5,r5
+	j_s.d [blink]
+	add.cs r0,r0,1
+
+.Lret_dbl1:
+	j_s.d [blink]
+	mov_l r0,r1
+	.balign 4
+.Ldenorm_small_shift_dbl0:
+	sub.f r8,r8,1
+	bne.d .Lfixed_denorm_small_shift_dbl0
+	add_s r12,r12,1
+	brlt.d r10,0,.Lrsub
+	mov r5,0
+.Ladd_dbl1_gt: ; both bit 23 of DBL0 and bit 0 of r5 are clear.
+	bmsk	r2,r1,22
+	add_s	r2,r2,r0
+	bbit0.d	r2,23,.Lno_shiftdown_dbl1_gt
+	add_s	r0,r1,r0
+	bic.f	0,r9,r0; check for overflow -> infinity; eq : infinity
+	bmsk	r1,r2,22
+	lsr.ne.f r2,r2,2; cc: even ; hi: might round down
+	lsr.ne	r1,r1,1
+	rcmp.hi	r5,1; hi : round down
+	bclr.hi	r0,r0,0
+	j_l.d	[blink]
+	sub_s	r0,r0,r1
+
+	.balign	4
+.Lno_shiftdown_dbl1_gt:
+	add.f	0,r5,r5
+	btst.eq	r0,0
+	cmp.eq	r5,r5
+	j_s.d	[blink]
+	add.cs	r0,r0,1
+	ENDFUNC(__addsf3)
+	ENDFUNC(__subsf3)
diff --git a/gcc/config/arc/ieee-754/arc-ieee-754.h b/gcc/config/arc/ieee-754/arc-ieee-754.h
new file mode 100644
index 00000000000..cc17d18d9d4
--- /dev/null
+++ b/gcc/config/arc/ieee-754/arc-ieee-754.h
@@ -0,0 +1,31 @@
+#ifdef __LITTLE_ENDIAN__
+#define DBL0L r0
+#define DBL0H r1
+#define DBL1L r2
+#define DBL1H r3
+#else
+#define DBL0L r1
+#define DBL0H r0
+#define DBL1L r3
+#define DBL1H r2
+#endif
+#define add_l add
+#define asr_l asr
+#define j_l j
+#define jne_l jne
+#define jeq_l jeq
+#define or_l or
+#define mov_l mov
+#define b_l b
+#define beq_l beq
+#define bne_l bne
+#define brne_l brne
+#define bset_l bset
+#define sub_l sub
+#define sub1_l sub1
+#define lsr_l lsr
+#define xor_l xor
+#define bic_l bic
+#define bmsk_l bmsk
+#define bxor_l bxor
+#define bcs_s blo_s
diff --git a/gcc/config/arc/ieee-754/arc600-dsp/divdf3.S b/gcc/config/arc/ieee-754/arc600-dsp/divdf3.S
new file mode 100644
index 00000000000..38fc2525823
--- /dev/null
+++ b/gcc/config/arc/ieee-754/arc600-dsp/divdf3.S
@@ -0,0 +1,424 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+/*
+   to calculate a := b/x as b*y, with y := 1/x:
+   - x is in the range [1..2)
+   - calculate 15..18 bit inverse y0 using a table of approximating polynoms.
+     Precision is higher for polynoms used to evaluate input with larger
+     value.
+   - Do one newton-raphson iteration step to double the precision,
+     then multiply this with the divisor
+	-> more time to decide if dividend is subnormal
+     - the worst error propagation is on the side of the value range
+       with the least initial defect, thus giving us about 30 bits precision.
+      The truncation error for the either is less than 1 + x/2 ulp.
+      A 31 bit inverse can be simply calculated by using x with implicit 1
+      and chaining the multiplies.  For a 32 bit inverse, we multiply y0^2
+      with the bare fraction part of x, then add in y0^2 for the implicit
+      1 of x.
+    - If calculating a 31 bit inverse, the systematic error is less than
+      -1 ulp; likewise, for 32 bit, it is less than -2 ulp.
+    - If we calculate our seed with a 32 bit fraction, we can archive a
+      tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
+      only need to take the step to calculate the 2nd stage rest and
+      rounding adjust 1/32th of the time.  However, if we use a 20 bit
+      fraction for the seed, the negative error can exceed -2 ulp/128, (2)
+      thus for a simple add / tst check, we need to do the 2nd stage
+      rest calculation/ rounding adjust 1/16th of the time.
+      (1): The inexactness of the 32 bit inverse contributes an error in the
+      range of (-1 .. +(1+x/2) ) ulp/128.  Leaving out the low word of the
+      rest contributes an error < +1/x ulp/128 .  In the interval [1,2),
+      x/2 + 1/x <= 1.5 .
+      (2): Unless proven otherwise.  I have not actually looked for an
+      example where -2 ulp/128 is exceeded, and my calculations indicate
+      that the excess, if existent, is less than -1/512 ulp.
+    ??? The algorithm is still based on the ARC700 optimized code.
+    Maybe we could make better use of 32x16 bit multiply, or 64 bit multiply
+    results.
+ */
+#include "../arc-ieee-754.h"
+#define mlo acc2
+#define mhi acc1
+#define mul64(b,c) mullw 0,b,c` machlw 0,b,c
+#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c
+
+/* N.B. fp-bit.c does double rounding on denormal numbers.  */
+#if 0 /* DEBUG */
+	.global __divdf3
+	FUNC(__divdf3)
+	.balign 4
+__divdf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __divdf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __divdf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	and r12,DBL0H,DBL1H
+	bic.f 0,0x7ff80000,r12 ; both NaN -> OK
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__divdf3)
+#define __divdf3 __divdf3_asm
+#endif /* DEBUG */
+
+	FUNC(__divdf3)
+	.balign 4
+.L7ff00000:
+	.long 0x7ff00000
+.Ldivtab:
+	.long 0xfc0fffe1
+	.long 0xf46ffdfb
+	.long 0xed1ffa54
+	.long 0xe61ff515
+	.long 0xdf7fee75
+	.long 0xd91fe680
+	.long 0xd2ffdd52
+	.long 0xcd1fd30c
+	.long 0xc77fc7cd
+	.long 0xc21fbbb6
+	.long 0xbcefaec0
+	.long 0xb7efa100
+	.long 0xb32f92bf
+	.long 0xae8f83b7
+	.long 0xaa2f7467
+	.long 0xa5ef6479
+	.long 0xa1cf53fa
+	.long 0x9ddf433e
+	.long 0x9a0f3216
+	.long 0x965f2091
+	.long 0x92df0f11
+	.long 0x8f6efd05
+	.long 0x8c1eeacc
+	.long 0x88eed876
+	.long 0x85dec615
+	.long 0x82eeb3b9
+	.long 0x800ea10b
+	.long 0x7d3e8e0f
+	.long 0x7a8e7b3f
+	.long 0x77ee6836
+	.long 0x756e5576
+	.long 0x72fe4293
+	.long 0x709e2f93
+	.long 0x6e4e1c7f
+	.long 0x6c0e095e
+	.long 0x69edf6c5
+	.long 0x67cde3a5
+	.long 0x65cdd125
+	.long 0x63cdbe25
+	.long 0x61ddab3f
+	.long 0x600d991f
+	.long 0x5e3d868c
+	.long 0x5c6d7384
+	.long 0x5abd615f
+	.long 0x590d4ecd
+	.long 0x576d3c83
+	.long 0x55dd2a89
+	.long 0x545d18e9
+	.long 0x52dd06e9
+	.long 0x516cf54e
+	.long 0x4ffce356
+	.long 0x4e9cd1ce
+	.long 0x4d3cbfec
+	.long 0x4becae86
+	.long 0x4aac9da4
+	.long 0x496c8c73
+	.long 0x483c7bd3
+	.long 0x470c6ae8
+	.long 0x45dc59af
+	.long 0x44bc4915
+	.long 0x43ac3924
+	.long 0x428c27fb
+	.long 0x418c187a
+	.long 0x407c07bd
+
+__divdf3_support: /* This label makes debugger output saner.  */
+	.balign 4
+.Ldenorm_dbl1:
+	brge r6, \
+		0x43500000,.Linf_NaN ; large number / denorm -> Inf
+	bmsk.f r12,DBL1H,19
+	mov.eq r12,DBL1L
+	mov.eq DBL1L,0
+	sub.eq r7,r7,32
+	norm.f r11,r12 ; flag for x/0 -> Inf check
+	beq_s .Linf_NaN
+	mov.mi r11,0
+	add.pl r11,r11,1
+	add_s r12,r12,r12
+	asl r8,r12,r11
+	rsub r12,r11,31
+	lsr r12,DBL1L,r12
+	tst_s DBL1H,DBL1H
+	or r8,r8,r12
+	lsr r4,r8,26
+	lsr DBL1H,r8,12
+	ld.as r4,[r10,r4]
+	bxor.mi DBL1H,DBL1H,31
+	sub r11,r11,11
+	asl DBL1L,DBL1L,r11
+	sub r11,r11,1
+	mulu64 (r4,r8)
+	sub r7,r7,r11
+	b.d .Lpast_denorm_dbl1
+	asl r7,r7,20
+
+.Linf_NaN:
+	tst_s DBL0L,DBL0L ; 0/0 -> NaN
+	xor_s DBL1H,DBL1H,DBL0H
+	bclr.eq.f DBL0H,DBL0H,31
+	bmsk DBL0H,DBL1H,30
+	xor_s DBL0H,DBL0H,DBL1H
+	sub.eq DBL0H,DBL0H,1
+	mov_s DBL0L,0
+	j_s.d [blink]
+	or DBL0H,DBL0H,r9
+	.balign 4
+.Lret0_2:
+	xor_s DBL1H,DBL1H,DBL0H
+	mov_s DBL0L,0
+	bmsk DBL0H,DBL1H,30
+	j_s.d [blink]
+	xor_s DBL0H,DBL0H,DBL1H
+	.balign 4
+	.global __divdf3
+/* N.B. the spacing between divtab and the sub3 to get its address must
+   be a multiple of 8.  */
+__divdf3:
+	asl r8,DBL1H,12
+	lsr r4,r8,26
+	sub3 r10,pcl,51;(.-.Ldivtab) >> 3
+	ld.as r9,[pcl,-104]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+	ld.as r4,[r10,r4]
+	lsr r12,DBL1L,20
+	and.f r7,DBL1H,r9
+	or r8,r8,r12
+	mulu64 (r4,r8)
+	beq.d .Ldenorm_dbl1
+.Lpast_denorm_dbl1:
+	and.f r6,DBL0H,r9
+	breq.d r7,r9,.Linf_nan_dbl1
+	asl r4,r4,12
+	sub r4,r4,mhi
+	mululw 0,r4,r4
+	machulw r5,r4,r4
+	bne.d .Lnormal_dbl0
+	lsr r8,r8,1
+
+	.balign 4
+.Ldenorm_dbl0:
+	bmsk.f r12,DBL0H,19
+	; wb stall
+	mov.eq r12,DBL0L
+	sub.eq r6,r6,32
+	norm.f r11,r12 ; flag for 0/x -> 0 check
+	brge r7, \
+		0x43500000, .Lret0_2 ; denorm/large number -> 0
+	beq_s .Lret0_2
+	mov.mi r11,0
+	add.pl r11,r11,1
+	asl r12,r12,r11
+	sub r6,r6,r11
+	add.f 0,r6,31
+	lsr r10,DBL0L,r6
+	mov.mi r10,0
+	add r6,r6,11+32
+	neg.f r11,r6
+	asl DBL0L,DBL0L,r11
+	mov.pl DBL0L,0
+	sub r6,r6,32-1
+	b.d .Lpast_denorm_dbl0
+	asl r6,r6,20
+
+	.balign 4
+.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
+	or.f 0,r6,DBL0L
+	cmp.ne r6,r9
+	not_s DBL0L,DBL1H
+	sub_s.ne DBL0L,DBL0L,DBL0L
+	tst_s DBL0H,DBL0H
+	add_s DBL0H,DBL1H,DBL0L
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+
+	.balign 4
+.Lnormal_dbl0:
+	breq.d r6,r9,.Linf_nan_dbl0
+	asl r12,DBL0H,11
+	lsr r10,DBL0L,21
+.Lpast_denorm_dbl0:
+	bset r8,r8,31
+	mulu64 (r5,r8)
+	add_s r12,r12,r10
+	bset r5,r12,31
+	cmp r5,r8
+	cmp.eq DBL0L,DBL1L
+	lsr.cc r5,r5,1
+	sub r4,r4,mhi ; u1.31 inverse, about 30 bit
+	mululw 0,r5,r4
+	machulw r11,r5,r4 ; result fraction highpart
+	lsr r8,r8,2 ; u3.29
+	add r5,r6, /* wait for immediate */ \
+		0x3fe00000
+	mulu64 (r11,r8) ; u-28.31
+	asl_s DBL1L,DBL1L,9 ; u-29.23:9
+	sbc r6,r5,r7
+	mov r12,mlo ; u-28.31
+	mulu64 (r11,DBL1L) ; mhi: u-28.23:9
+	add.cs DBL0L,DBL0L,DBL0L
+	asl_s DBL0L,DBL0L,6 ; u-26.25:7
+	asl r10,r11,23
+	sub_l DBL0L,DBL0L,r12
+	lsr r7,r11,9
+	sub r5,DBL0L,mhi ; rest msw ; u-26.31:0
+	mul64 (r5,r4) ; mhi: result fraction lowpart
+	xor.f 0,DBL0H,DBL1H
+	and DBL0H,r6,r9
+	add_s DBL0H,DBL0H,r7
+	bclr r12,r9,20 ; 0x7fe00000
+	brhs.d r6,r12,.Linf_denorm
+	bxor.mi DBL0H,DBL0H,31
+	add.f r12,mhi,0x11
+	asr r9,r12,5
+	sub.mi DBL0H,DBL0H,1
+	add.f DBL0L,r9,r10
+	tst r12,0x1c
+	jne.d [blink]
+	add.cs DBL0H,DBL0H,1
+        /* work out exact rounding if we fall through here.  */
+        /* We know that the exact result cannot be represented in double
+           precision.  Find the mid-point between the two nearest
+           representable values, multiply with the divisor, and check if
+           the result is larger than the dividend.  Since we want to know
+	   only the sign bit, it is sufficient to calculate only the
+	   highpart of the lower 64 bits.  */
+	mulu64 (r11,DBL1L) ; rest before considering r12 in r5 : -mlo
+	sub.f DBL0L,DBL0L,1
+	asl r12,r9,2 ; u-22.30:2
+	sub.cs DBL0H,DBL0H,1
+	sub.f r12,r12,2
+	mov r10,mlo ; rest before considering r12 in r5 : -r10
+	mululw 0,r12,DBL1L
+	machulw r7,r12,DBL1L ; mhi: u-51.32
+	asl r5,r5,25 ; s-51.7:25
+	lsr r10,r10,7 ; u-51.30:2
+	mulu64 (r12,r8) ; mlo: u-51.31:1
+	sub r5,r5,r10
+	add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
+	bset r7,r7,0 ; make sure that the result is not zero, and that
+	sub r5,r5,r7 ; a highpart zero appears negative
+	sub.f r5,r5,mlo ; rest msw
+	add.pl.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.eq DBL0H,DBL0H,1
+
+.Linf_nan_dbl0:
+	tst_s DBL1H,DBL1H
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+	.balign 4
+.Linf_denorm:
+	lsr r12,r6,28
+	brlo.d r12,0xc,.Linf
+.Ldenorm:
+	asr r6,r6,20
+	neg r9,r6
+	mov_s DBL0H,0
+	brhs.d r9,54,.Lret0
+	bxor.mi DBL0H,DBL0H,31
+	add r12,mhi,1
+	and r12,r12,-4
+	rsub r7,r6,5
+	asr r10,r12,28
+	bmsk r4,r12,27
+	min r7,r7,31
+	asr DBL0L,r4,r7
+	add DBL1H,r11,r10
+	abs.f r10,r4
+	sub.mi r10,r10,1
+	add.f r7,r6,32-5
+	asl r4,r4,r7
+	mov.mi r4,r10
+	add.f r10,r6,23
+	rsub r7,r6,9
+	lsr r7,DBL1H,r7
+	asl r10,DBL1H,r10
+	or.pnz DBL0H,DBL0H,r7
+	or.mi r4,r4,r10
+	mov.mi r10,r7
+	add.f DBL0L,r10,DBL0L
+	add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
+	bxor.f 0,r4,31
+	add.pnz.f DBL0L,DBL0L,1
+	add.cs.f DBL0H,DBL0H,1
+	jne_s [blink]
+	/* Calculation so far was not conclusive; calculate further rest.  */
+	mulu64 (r11,DBL1L) ; rest before considering r12 in r5 : -mlo
+	asr.f r12,r12,3
+	asl r5,r5,25 ; s-51.7:25
+	mov r11,mlo ; rest before considering r12 in r5 : -r11
+	mulu64 (r12,r8) ; u-51.31:1
+	and r9,DBL0L,1 ; tie-breaker: round to even
+	lsr r11,r11,7 ; u-51.30:2
+	mov DBL1H,mlo ; u-51.31:1
+	mulu64 (r12,DBL1L) ; u-51.62:2
+	sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
+	add_s DBL1H,DBL1H,r11
+	sub DBL1H,DBL1H,r5 ; -rest msw
+	add_s DBL1H,DBL1H,mhi ; -rest msw
+	add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
+	tst_s DBL1H,DBL1H
+	cmp.eq mlo,r9
+	add.cs.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.cs DBL0H,DBL0H,1
+
+.Lret0:
+	/* return +- 0 */
+	j_s.d [blink]
+	mov_s DBL0L,0
+.Linf:
+	mov_s DBL0H,r9
+	mov_s DBL0L,0
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+	ENDFUNC(__divdf3)
diff --git a/gcc/config/arc/ieee-754/arc600-dsp/divsf3.S b/gcc/config/arc/ieee-754/arc600-dsp/divsf3.S
new file mode 100644
index 00000000000..8f2f35527b9
--- /dev/null
+++ b/gcc/config/arc/ieee-754/arc600-dsp/divsf3.S
@@ -0,0 +1,275 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+/*
+   - calculate 15..18 bit inverse using a table of approximating polynoms.
+     precision is higher for polynoms used to evaluate input with larger
+     value.
+   - do one newton-raphson iteration step to double the precision,
+     then multiply this with the divisor
+	-> more time to decide if dividend is subnormal
+     - the worst error propagation is on the side of the value range
+       with the least initial defect, thus giving us about 30 bits precision.
+ */
+#include "../arc-ieee-754.h"
+#define mlo acc2
+#define mhi acc1
+#define mul64(b,c) mullw 0,b,c` machlw 0,b,c
+#define mulu64(b,c) mululw 0,b,c` machulw 0,b,c
+
+#if 0 /* DEBUG */
+	.global __divsf3
+	FUNC(__divsf3)
+	.balign 4
+__divsf3:
+	push_s blink
+	push_s r1
+	bl.d __divsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __divsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+#if 1
+	bne abort
+	jeq_s [blink]
+	b abort
+#else
+	bne abort
+	j_s [blink]
+#endif
+	ENDFUNC(__divsf3)
+#define __divsf3 __divsf3_asm
+#endif /* DEBUG */
+
+	FUNC(__divsf3)
+	.balign 4
+.Ldivtab:
+	.long 0xfc0ffff0
+	.long 0xf46ffefd
+	.long 0xed1ffd2a
+	.long 0xe627fa8e
+	.long 0xdf7ff73b
+	.long 0xd917f33b
+	.long 0xd2f7eea3
+	.long 0xcd1fe986
+	.long 0xc77fe3e7
+	.long 0xc21fdddb
+	.long 0xbcefd760
+	.long 0xb7f7d08c
+	.long 0xb32fc960
+	.long 0xae97c1ea
+	.long 0xaa27ba26
+	.long 0xa5e7b22e
+	.long 0xa1cfa9fe
+	.long 0x9ddfa1a0
+	.long 0x9a0f990c
+	.long 0x9667905d
+	.long 0x92df878a
+	.long 0x8f6f7e84
+	.long 0x8c27757e
+	.long 0x88f76c54
+	.long 0x85df630c
+	.long 0x82e759c5
+	.long 0x8007506d
+	.long 0x7d3f470a
+	.long 0x7a8f3da2
+	.long 0x77ef341e
+	.long 0x756f2abe
+	.long 0x72f7212d
+	.long 0x709717ad
+	.long 0x6e4f0e44
+	.long 0x6c1704d6
+	.long 0x69e6fb44
+	.long 0x67cef1d7
+	.long 0x65c6e872
+	.long 0x63cedf18
+	.long 0x61e6d5cd
+	.long 0x6006cc6d
+	.long 0x5e36c323
+	.long 0x5c76b9f3
+	.long 0x5abeb0b7
+	.long 0x5916a79b
+	.long 0x57769e77
+	.long 0x55de954d
+	.long 0x54568c4e
+	.long 0x52d6834d
+	.long 0x51667a7f
+	.long 0x4ffe71b5
+	.long 0x4e9e68f1
+	.long 0x4d466035
+	.long 0x4bf65784
+	.long 0x4aae4ede
+	.long 0x496e4646
+	.long 0x48363dbd
+	.long 0x47063547
+	.long 0x45de2ce5
+	.long 0x44be2498
+	.long 0x43a61c64
+	.long 0x4296144a
+	.long 0x41860c0e
+	.long 0x407e03ee
+.L7f800000:
+	.long 0x7f800000
+	.balign 4
+	.global __divsf3_support
+__divsf3_support:
+.Linf_NaN:
+	bclr.f 0,r0,31 ; 0/0 -> NaN
+	xor_s r0,r0,r1
+	bmsk r1,r0,30
+	bic_s r0,r0,r1
+	sub.eq r0,r0,1
+	j_s.d [blink]
+	or r0,r0,r9
+.Lret0:
+	xor_s r0,r0,r1
+	bmsk r1,r0,30
+	j_s.d [blink]
+	bic_s r0,r0,r1
+/* N.B. the spacing between divtab and the sub3 to get its address must
+   be a multiple of 8.  */
+__divsf3:
+	ld.as r9,[pcl,-9]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
+	sub3 r3,pcl,37;(.-.Ldivtab) >> 3
+	lsr r2,r1,17
+	and.f r11,r1,r9
+	bmsk r5,r2,5
+	beq.d .Ldenorm_fp1
+	asl r6,r1,8
+	and.f r2,r0,r9
+	ld.as r5,[r3,r5]
+	asl r4,r1,9
+	bset r6,r6,31
+	breq.d r11,r9,.Linf_nan_fp1
+.Lpast_denorm_fp1:
+	mululw 0,r5,r4
+	machulw r8,r5,r4
+	breq.d r2,r9,.Linf_nan_fp0
+	asl r5,r5,13
+	sub r7,r5,r8
+	mululw 0,r7,r6
+	machulw r8,r7,r6
+	beq.d .Ldenorm_fp0
+	asl r12,r0,8
+	mulu64 (r8,r7)
+	bset r3,r12,31
+.Lpast_denorm_fp0:
+	cmp_s r3,r6
+	lsr.cc r3,r3,1
+	add_s r2,r2, /* wait for immediate */ \
+		0x3f000000
+	sub r7,r7,mhi ; u1.31 inverse, about 30 bit
+	mulu64 (r3,r7)
+	sbc r2,r2,r11
+	xor.f 0,r0,r1
+	and r0,r2,r9
+	bclr r3,r9,23 ; 0x7f000000
+	brhs.d r2,r3,.Linf_denorm
+	bxor.mi r0,r0,31
+.Lpast_denorm:
+	add r3,mhi,0x22 ; round to nearest or higher
+	tst r3,0x3c ; check if rounding was unsafe
+	lsr r3,r3,6
+	jne.d [blink] ; return if rounding was safe.
+	add_s r0,r0,r3
+        /* work out exact rounding if we fall through here.  */
+        /* We know that the exact result cannot be represented in single
+           precision.  Find the mid-point between the two nearest
+           representable values, multiply with the divisor, and check if
+           the result is larger than the dividend.  */
+        add_s r3,r3,r3
+        sub_s r3,r3,1
+        mulu64 (r3,r6)
+	asr.f 0,r0,1 ; for round-to-even in case this is a denorm
+	rsub r2,r9,25
+        asl_s r12,r12,r2
+        sub.f 0,r12,mlo
+        j_s.d [blink]
+        sub.mi r0,r0,1
+.Linf_nan_fp1:
+	cmp_s r2,r9
+	mov.eq r1,-1
+	tst_s r0,r0
+	mov_s r0,r1
+	j_s.d [blink]
+	bxor.mi r0,r0,31
+.Linf_nan_fp0:
+	tst_s r1,r1
+	j_s.d [blink]
+	bxor.mi r0,r0,31
+	.balign 4
+	.global __divsf3
+/* For denormal results, it is possible that an exact result needs
+   rounding, and thus the round-to-even rule has to come into play.  */
+.Linf_denorm:
+	brlo r2,0xc0000000,.Linf
+.Ldenorm:
+	asr_s r2,r2,23
+	bic r0,r0,r9
+	neg r9,r2
+	brlo.d r9,25,.Lpast_denorm
+	lsr r3,mlo,r9
+	/* Fall through: return +- 0 */
+	j_s [blink]
+.Linf:
+	j_s.d [blink]
+	or r0,r0,r9
+	.balign 4
+.Ldenorm_fp1:
+	norm.f r12,r6 ; flag for x/0 -> Inf check
+	add r6,r6,r6
+	rsub r5,r12,16
+	ror r5,r1,r5
+	bmsk r5,r5,5
+	bic.ne.f 0, \
+		0x60000000,r0 ; large number / denorm -> Inf
+	ld.as r5,[r3,r5]
+	asl r6,r6,r12
+	beq.d .Linf_NaN
+	and.f r2,r0,r9
+	add r4,r6,r6
+	asl_s r12,r12,23
+	bne.d .Lpast_denorm_fp1
+	add_s r2,r2,r12
+.Ldenorm_fp0:
+	mulu64 (r8,r7)
+	bclr r12,r12,31
+	norm.f r3,r12 ; flag for 0/x -> 0 check
+	bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
+	beq_s .Lret0
+	asl_s r12,r12,r3
+	asl_s r3,r3,23
+	add_s r12,r12,r12
+	add r11,r11,r3
+	b.d .Lpast_denorm_fp0
+	mov_s r3,r12
+	ENDFUNC(__divsf3)
diff --git a/gcc/config/arc/ieee-754/arc600-dsp/muldf3.S b/gcc/config/arc/ieee-754/arc600-dsp/muldf3.S
new file mode 100644
index 00000000000..2ac9e16257e
--- /dev/null
+++ b/gcc/config/arc/ieee-754/arc600-dsp/muldf3.S
@@ -0,0 +1,232 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __muldf3
+	.balign 4
+__muldf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __muldf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __muldf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	b abort
+#define __muldf3 __muldf3_asm
+#endif /* DEBUG */
+
+__muldf3_support: /* This label makes debugger output saner.  */
+	.balign 4
+.Ldenorm_2:
+	breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
+	norm.f r12,DBL1L
+	mov.mi r12,21
+	add.pl r12,r12,22
+	neg r11,r12
+	asl_s r12,r12,20
+	lsr.f DBL1H,DBL1L,r11
+	ror DBL1L,DBL1L,r11
+	sub_s DBL0H,DBL0H,r12
+	mov.eq DBL1H,DBL1L
+	sub_l DBL1L,DBL1L,DBL1H
+	/* Fall through.  */
+	.global __muldf3
+	.balign 4
+__muldf3:
+	mululw 0,DBL0L,DBL1L
+	machulw r4,DBL0L,DBL1L
+	ld.as r9,[pcl,0x67] ; ((.L7ff00000-.+2)/4)]
+	bmsk r6,DBL0H,19
+	bset r6,r6,20
+	mov r8,acc2
+	mululw 0,r4,1
+	and r11,DBL0H,r9
+	breq.d r11,0,.Ldenorm_dbl0
+	and r12,DBL1H,r9
+	breq.d r12,0,.Ldenorm_dbl1
+	maclw 0,r6,DBL1L
+	machulw 0,r6,DBL1L
+	breq.d r11,r9,.Linf_nan
+	bmsk r10,DBL1H,19
+	breq.d r12,r9,.Linf_nan
+	bset r10,r10,20
+	maclw 0,r10,DBL0L
+	machulw r5,r10,DBL0L
+	add_s r12,r12,r11 ; add exponents
+	mov r4,acc2
+	mululw 0,r5,1
+	maclw 0,r6,r10
+	machulw r7,r6,r10 ; fraction product in r7:acc2:r4:r8
+	tst r8,r8
+	bclr r8,r9,30 ; 0x3ff00000
+	bset.ne r4,r4,0 ; put least significant word into sticky bit
+	bclr r6,r9,20 ; 0x7fe00000
+	lsr.f r10,r7,9
+	rsub.eq r8,r8,r9 ; 0x40000000
+	sub r12,r12,r8 ; subtract bias + implicit 1
+	brhs.d r12,r6,.Linf_denorm
+	rsub r10,r10,12
+.Lshift_frac:
+	neg r8,r10
+	asl r6,r4,r10
+	lsr DBL0L,r4,r8
+	add.f 0,r6,r6
+	btst.eq DBL0L,0
+	cmp.eq r4,r4 ; round to nearest / round to even
+	asl r4,acc2,r10
+	lsr r5,acc2,r8
+	adc.f DBL0L,DBL0L,r4
+	xor.f 0,DBL0H,DBL1H
+	asl r7,r7,r10
+	add_s r12,r12,r5
+	adc DBL0H,r12,r7
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+
+/* N.B. This is optimized for ARC700.
+  ARC600 has very different scheduling / instruction selection criteria.  */
+
+/* If one number is denormal, subtract some from the exponent of the other
+   one (if the other exponent is too small, return 0), and normalize the
+   denormal.  Then re-run the computation.  */
+.Lret0_2:
+	lsr_s DBL0H,DBL0H,31
+	asl_s DBL0H,DBL0H,31
+	j_s.d [blink]
+	mov_s DBL0L,0
+	.balign 4
+.Ldenorm_dbl0:
+	mov_s r12,DBL0L
+	mov_s DBL0L,DBL1L
+	mov_s DBL1L,r12
+	mov_s r12,DBL0H
+	mov_s DBL0H,DBL1H
+	mov_s DBL1H,r12
+	and r11,DBL0H,r9
+.Ldenorm_dbl1:
+	brhs r11,r9,.Linf_nan
+	brhs 0x3ca00001,r11,.Lret0
+	sub_s DBL0H,DBL0H,DBL1H
+	bmsk.f DBL1H,DBL1H,30
+	add_s DBL0H,DBL0H,DBL1H
+	beq.d .Ldenorm_2
+	norm r12,DBL1H
+	sub_s r12,r12,10
+	asl r5,r12,20
+	asl_s DBL1H,DBL1H,r12
+	sub DBL0H,DBL0H,r5
+	neg r5,r12
+	lsr r6,DBL1L,r5
+	asl_s DBL1L,DBL1L,r12
+	b.d __muldf3
+	add_s DBL1H,DBL1H,r6
+
+.Lret0:	xor_s DBL0H,DBL0H,DBL1H
+	bclr DBL1H,DBL0H,31
+	xor_s DBL0H,DBL0H,DBL1H
+	j_s.d [blink]
+	mov_s DBL0L,0
+
+	.balign 4
+.Linf_nan:
+	bclr r12,DBL1H,31
+	xor_s DBL1H,DBL1H,DBL0H
+	bclr_s DBL0H,DBL0H,31
+	max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
+	or.f 0,DBL0H,DBL0L
+	mov_s DBL0L,0
+	or.ne.f DBL1L,DBL1L,r12
+	not_s DBL0H,DBL0L ; inf * 0 -> NaN
+	mov.ne DBL0H,r8
+	tst_s DBL1H,DBL1H
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+
+/* We have checked for infinitey / NaN input before, and transformed
+   denormalized inputs into normalized inputs.  Thus, the worst case
+   exponent overflows are:
+       1 +     1 - 0x400 == 0xc02 : maximum underflow
+   0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
+   N.B. 0x7e and 0x7f are also values for overflow.
+
+   If (r12 <= -54), we have an underflow to zero.  */
+	.balign 4
+.Linf_denorm:
+	lsr r6,r12,28
+	brlo.d r6,0xc,.Linf
+	asr r6,r12,20
+	add.f r10,r10,r6
+	brgt.d r10,0,.Lshift_frac
+	mov_s r12,0
+	beq.d .Lround_frac
+	add r10,r10,32
+.Lshift32_frac:
+	tst r4,r4
+	mov r4,acc2
+	bset.ne r4,r4,1
+	mululw 0,r7,1
+	brge.d r10,1,.Lshift_frac
+	mov r7,0
+	breq.d r10,0,.Lround_frac
+	add r10,r10,32
+	brgt r10,21,.Lshift32_frac
+	b_s .Lret0
+
+.Lround_frac:
+	add.f 0,r4,r4
+	btst.eq acc2,0
+	mov_s DBL0L,acc2
+	mov_s DBL0H,r7
+	adc.eq.f DBL0L,DBL0L,0
+	j_s.d [blink]
+	adc.eq DBL0H,DBL0H,0
+
+.Linf:	mov_s DBL0L,0
+	xor.f DBL1H,DBL1H,DBL0H
+	mov_s DBL0H,r9
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+
+	.balign 4
+.L7ff00000:
+	.long 0x7ff00000
diff --git a/gcc/config/arc/ieee-754/arc600-dsp/mulsf3.S b/gcc/config/arc/ieee-754/arc600-dsp/mulsf3.S
new file mode 100644
index 00000000000..a8dda21e7f1
--- /dev/null
+++ b/gcc/config/arc/ieee-754/arc600-dsp/mulsf3.S
@@ -0,0 +1,179 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __mulsf3
+	FUNC(__mulsf3)
+	.balign 4
+__mulsf3:
+	push_s blink
+	push_s r1
+	bl.d __mulsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __mulsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	and r12,r0,r1
+	bic.f 0,0x7f800000,r12
+	bne 0f
+	bmsk.f 0,r0,22
+	bmsk.ne.f r1,r1,22
+	jne_s [blink] ; both NaN -> OK
+0:	bl abort
+	ENDFUNC(__mulsf3)
+#define __mulsf3 __mulsf3_asm
+#endif /* DEBUG */
+
+	.balign	4
+	.global	__mulsf3
+	FUNC(__mulsf3)
+__mulsf3:
+	ld.as	r9,[pcl,80]; [pcl,((.L7f800000-.+2)/4)]
+	bmsk	r4,r1,22
+	bset	r2,r0,23
+	asl_s	r2,r2,8
+	bset	r3,r4,23
+	and	r11,r0,r9
+	breq.d	r11,0,.Ldenorm_dbl0
+	and	r12,r1,r9
+	breq.d	r12,0,.Ldenorm_dbl1
+	xor_s	r0,r0,r1
+	mululw	0,r2,r3
+	machulw	r6,r2,r3
+	breq.d	r11,r9,.Linf_nan_dbl0
+	ld.as	r4,[pcl,69]; [pcl,((.L7fffffff-.+2)/4)]
+	breq.d	r12,r9,.Linf_nan_dbl1
+.Lpast_denorm:
+	asl.f	0,r6,8
+	mov	r7,acc2
+	add.pl	r6,r6,r6
+	bclr.pl	r6,r6,23
+	add.pl.f r7,r7,r7
+	add.cs	r6,r6,1
+	lsr.f	0,r6,1
+	add_s	r12,r12,r11
+	adc.f	0,r7,r4
+	add_s	r12,r12, \
+		-0x3f800000
+	adc.f	r8,r6,r12
+	tst.pl	r8,r9
+	bic	r0,r0,r4
+	min	r3,r8,r9
+	jpnz.d	[blink]
+	add.pnz	r0,r0,r3
+; infinity or denormal number
+	add.ne.f r3,r3,r3
+	asr_s	r3,r3,23+1
+	bset	r6,r6,23
+	bpnz.d	.Linfinity
+	sub_s	r3,r3,1
+	neg_s	r2,r3
+	brhi.d	r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
+	lsr	r2,r6,r2
+	asl	r9,r6,r3
+	lsr.f	0,r2,1
+	tst	r7,r7
+	add_s	r0,r0,r2
+	bset.ne	r9,r9,0
+	adc.f	0,r9,r4
+	j_s.d	[blink]
+	add.cs	r0,r0,1
+.Linfinity:
+	j_s.d	[blink]
+	add_s	r0,r0,r9
+
+.Lret_r0: j_s [blink]
+
+	.balign	4
+.Ldenorm_dbl0:
+	bclr_s	r2,r2,31
+	norm.f	r4,r2
+	add_s	r2,r2,r2
+	asl	r2,r2,r4
+	breq.d	r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
+	asl	r4,r4,23
+	mululw	0,r2,r3
+	machulw	r6,r2,r3
+	sub.ne.f r12,r12,r4
+	ld.as	r4,[pcl,28]; [pcl,((.L7fffffff-.+2)/4)]
+	bhi.d	.Lpast_denorm
+	xor_s	r0,r0,r1
+	bmsk	r1,r0,30
+	j_s.d	[blink]
+	bic_s	r0,r0,r1
+
+	.balign	4
+.Ldenorm_dbl0_inf_nan_dbl1:
+	bmsk.f	0,r0,30
+	mov.eq	r1,-1
+.Linf_nan_dbl1:
+	xor_s	r1,r1,r0
+.Linf_nan_dbl0:
+	bclr_s	r1,r1,31
+	j_s.d	[blink]
+	xor_s	r0,r0,r1
+
+	.balign	4
+.Ldenorm_dbl1:
+	breq.d	r11,r9,.Linf_nan_dbl0_2
+	norm.f	r3,r4
+	sub_s	r3,r3,7
+	asl	r4,r4,r3
+	mululw	0,r2,r4
+	machulw	r6,r2,r4
+	sub_s	r3,r3,1
+	asl_s	r3,r3,23
+	sub.ne.f r11,r11,r3
+	ld.as	r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)]
+	bhi.d	.Lpast_denorm
+	bmsk	r8,r0,30
+	j_s.d	[blink]
+	bic	r0,r0,r8
+
+	.balign	4
+.Linf_nan_dbl0_2:
+	bclr_s	r1,r1,31
+	xor_s	r0,r0,r1
+	sub.eq	r1,r1,1 ; inf/nan * 0 -> nan
+	bic.f	0,r9,r1
+	j_s.d	[blink]
+	or.eq	r0,r0,r1 ; r1 nan -> result nan
+
+	.balign	4
+.L7f800000:
+	.long	0x7f800000
+.L7fffffff:
+	.long	0x7fffffff
+	ENDFUNC(__mulsf3)
diff --git a/gcc/config/arc/ieee-754/arc600/divdf3.S b/gcc/config/arc/ieee-754/arc600/divdf3.S
new file mode 100644
index 00000000000..950e509dd19
--- /dev/null
+++ b/gcc/config/arc/ieee-754/arc600/divdf3.S
@@ -0,0 +1,413 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+/*
+   to calculate a := b/x as b*y, with y := 1/x:
+   - x is in the range [1..2)
+   - calculate 15..18 bit inverse y0 using a table of approximating polynoms.
+     Precision is higher for polynoms used to evaluate input with larger
+     value.
+   - Do one newton-raphson iteration step to double the precision,
+     then multiply this with the divisor
+	-> more time to decide if dividend is subnormal
+     - the worst error propagation is on the side of the value range
+       with the least initial defect, thus giving us about 30 bits precision.
+      The truncation error for the either is less than 1 + x/2 ulp.
+      A 31 bit inverse can be simply calculated by using x with implicit 1
+      and chaining the multiplies.  For a 32 bit inverse, we multiply y0^2
+      with the bare fraction part of x, then add in y0^2 for the implicit
+      1 of x.
+    - If calculating a 31 bit inverse, the systematic error is less than
+      -1 ulp; likewise, for 32 bit, it is less than -2 ulp.
+    - If we calculate our seed with a 32 bit fraction, we can archive a
+      tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
+      only need to take the step to calculate the 2nd stage rest and
+      rounding adjust 1/32th of the time.  However, if we use a 20 bit
+      fraction for the seed, the negative error can exceed -2 ulp/128, (2)
+      thus for a simple add / tst check, we need to do the 2nd stage
+      rest calculation/ rounding adjust 1/16th of the time.
+      (1): The inexactness of the 32 bit inverse contributes an error in the
+      range of (-1 .. +(1+x/2) ) ulp/128.  Leaving out the low word of the
+      rest contributes an error < +1/x ulp/128 .  In the interval [1,2),
+      x/2 + 1/x <= 1.5 .
+      (2): Unless proven otherwise.  I have not actually looked for an
+      example where -2 ulp/128 is exceeded, and my calculations indicate
+      that the excess, if existent, is less than -1/512 ulp.
+    ??? The algorithm is still based on the ARC700 optimized code.
+    Maybe we could make better use of 64 bit multiply results and/or mmed .
+ */
+#include "../arc-ieee-754.h"
+
+/* N.B. fp-bit.c does double rounding on denormal numbers.  */
+#if 0 /* DEBUG */
+	.global __divdf3
+	FUNC(__divdf3)
+	.balign 4
+__divdf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __divdf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __divdf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	and r12,DBL0H,DBL1H
+	bic.f 0,0x7ff80000,r12 ; both NaN -> OK
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__divdf3)
+#define __divdf3 __divdf3_asm
+#endif /* DEBUG */
+
+	FUNC(__divdf3)
+	.balign 4
+.L7ff00000:
+	.long 0x7ff00000
+.Ldivtab:
+	.long 0xfc0fffe1
+	.long 0xf46ffdfb
+	.long 0xed1ffa54
+	.long 0xe61ff515
+	.long 0xdf7fee75
+	.long 0xd91fe680
+	.long 0xd2ffdd52
+	.long 0xcd1fd30c
+	.long 0xc77fc7cd
+	.long 0xc21fbbb6
+	.long 0xbcefaec0
+	.long 0xb7efa100
+	.long 0xb32f92bf
+	.long 0xae8f83b7
+	.long 0xaa2f7467
+	.long 0xa5ef6479
+	.long 0xa1cf53fa
+	.long 0x9ddf433e
+	.long 0x9a0f3216
+	.long 0x965f2091
+	.long 0x92df0f11
+	.long 0x8f6efd05
+	.long 0x8c1eeacc
+	.long 0x88eed876
+	.long 0x85dec615
+	.long 0x82eeb3b9
+	.long 0x800ea10b
+	.long 0x7d3e8e0f
+	.long 0x7a8e7b3f
+	.long 0x77ee6836
+	.long 0x756e5576
+	.long 0x72fe4293
+	.long 0x709e2f93
+	.long 0x6e4e1c7f
+	.long 0x6c0e095e
+	.long 0x69edf6c5
+	.long 0x67cde3a5
+	.long 0x65cdd125
+	.long 0x63cdbe25
+	.long 0x61ddab3f
+	.long 0x600d991f
+	.long 0x5e3d868c
+	.long 0x5c6d7384
+	.long 0x5abd615f
+	.long 0x590d4ecd
+	.long 0x576d3c83
+	.long 0x55dd2a89
+	.long 0x545d18e9
+	.long 0x52dd06e9
+	.long 0x516cf54e
+	.long 0x4ffce356
+	.long 0x4e9cd1ce
+	.long 0x4d3cbfec
+	.long 0x4becae86
+	.long 0x4aac9da4
+	.long 0x496c8c73
+	.long 0x483c7bd3
+	.long 0x470c6ae8
+	.long 0x45dc59af
+	.long 0x44bc4915
+	.long 0x43ac3924
+	.long 0x428c27fb
+	.long 0x418c187a
+	.long 0x407c07bd
+
+__divdf3_support: /* This label makes debugger output saner.  */
+	.balign 4
+.Ldenorm_dbl1:
+	brge r6, \
+		0x43500000,.Linf_NaN ; large number / denorm -> Inf
+	bmsk.f r12,DBL1H,19
+	mov.eq r12,DBL1L
+	mov.eq DBL1L,0
+	sub.eq r7,r7,32
+	norm.f r11,r12 ; flag for x/0 -> Inf check
+	beq_s .Linf_NaN
+	mov.mi r11,0
+	add.pl r11,r11,1
+	add_s r12,r12,r12
+	asl r8,r12,r11
+	rsub r12,r11,31
+	lsr r12,DBL1L,r12
+	tst_s DBL1H,DBL1H
+	or r8,r8,r12
+	lsr r4,r8,26
+	lsr DBL1H,r8,12
+	ld.as r4,[r10,r4]
+	bxor.mi DBL1H,DBL1H,31
+	sub r11,r11,11
+	asl DBL1L,DBL1L,r11
+	sub r11,r11,1
+	mulu64 r4,r8
+	sub r7,r7,r11
+	b.d .Lpast_denorm_dbl1
+	asl r7,r7,20
+
+	.balign 4
+.Ldenorm_dbl0:
+	bmsk.f r12,DBL0H,19
+	; wb stall
+	mov.eq r12,DBL0L
+	sub.eq r6,r6,32
+	norm.f r11,r12 ; flag for 0/x -> 0 check
+	brge r7, \
+		0x43500000, .Lret0_2 ; denorm/large number -> 0
+	beq_s .Lret0_2
+	mov.mi r11,0
+	add.pl r11,r11,1
+	asl r12,r12,r11
+	sub r6,r6,r11
+	add.f 0,r6,31
+	lsr r10,DBL0L,r6
+	mov.mi r10,0
+	add r6,r6,11+32
+	neg.f r11,r6
+	asl DBL0L,DBL0L,r11
+	mov.pl DBL0L,0
+	sub r6,r6,32-1
+	b.d .Lpast_denorm_dbl0
+	asl r6,r6,20
+
+.Linf_NaN:
+	tst_s DBL0L,DBL0L ; 0/0 -> NaN
+	xor_s DBL1H,DBL1H,DBL0H
+	bclr.eq.f DBL0H,DBL0H,31
+	bmsk DBL0H,DBL1H,30
+	xor_s DBL0H,DBL0H,DBL1H
+	sub.eq DBL0H,DBL0H,1
+	mov_s DBL0L,0
+	j_s.d [blink]
+	or DBL0H,DBL0H,r9
+	.balign 4
+.Lret0_2:
+	xor_s DBL1H,DBL1H,DBL0H
+	mov_s DBL0L,0
+	bmsk DBL0H,DBL1H,30
+	j_s.d [blink]
+	xor_s DBL0H,DBL0H,DBL1H
+	.balign 4
+	.global __divdf3
+/* N.B. the spacing between divtab and the sub3 to get its address must
+   be a multiple of 8.  */
+__divdf3:
+	asl r8,DBL1H,12
+	lsr r4,r8,26
+	sub3 r10,pcl,61; (.-.Ldivtab) >> 3
+	ld.as r9,[pcl,-124]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+	ld.as r4,[r10,r4]
+	lsr r12,DBL1L,20
+	and.f r7,DBL1H,r9
+	or r8,r8,r12
+	mulu64 r4,r8
+	beq.d .Ldenorm_dbl1
+.Lpast_denorm_dbl1:
+	and.f r6,DBL0H,r9
+	breq.d r7,r9,.Linf_nan_dbl1
+	asl r4,r4,12
+	sub r4,r4,mhi
+	mulu64 r4,r4
+	beq.d .Ldenorm_dbl0
+	lsr r8,r8,1
+	breq.d r6,r9,.Linf_nan_dbl0
+	asl r12,DBL0H,11
+	lsr r10,DBL0L,21
+.Lpast_denorm_dbl0:
+	bset r8,r8,31
+	mulu64 mhi,r8
+	add_s r12,r12,r10
+	bset r5,r12,31
+	cmp r5,r8
+	cmp.eq DBL0L,DBL1L
+	lsr.cc r5,r5,1
+	sub r4,r4,mhi ; u1.31 inverse, about 30 bit
+	mulu64 r5,r4 ; result fraction highpart
+	lsr r8,r8,2 ; u3.29
+	add r5,r6, /* wait for immediate */ \
+		0x3fe00000
+	mov r11,mhi ; result fraction highpart
+	mulu64 r11,r8 ; u-28.31
+	asl_s DBL1L,DBL1L,9 ; u-29.23:9
+	sbc r6,r5,r7
+	mov r12,mlo ; u-28.31
+	mulu64 r11,DBL1L ; mhi: u-28.23:9
+	add.cs DBL0L,DBL0L,DBL0L
+	asl_s DBL0L,DBL0L,6 ; u-26.25:7
+	asl r10,r11,23
+	sub_l DBL0L,DBL0L,r12
+	lsr r7,r11,9
+	sub r5,DBL0L,mhi ; rest msw ; u-26.31:0
+	mul64 r5,r4 ; mhi: result fraction lowpart
+	xor.f 0,DBL0H,DBL1H
+	and DBL0H,r6,r9
+	add_s DBL0H,DBL0H,r7
+	bclr r12,r9,20 ; 0x7fe00000
+	brhs.d r6,r12,.Linf_denorm
+	bxor.mi DBL0H,DBL0H,31
+	add.f r12,mhi,0x11
+	asr r9,r12,5
+	sub.mi DBL0H,DBL0H,1
+	add.f DBL0L,r9,r10
+	tst r12,0x1c
+	jne.d [blink]
+	add.cs DBL0H,DBL0H,1
+        /* work out exact rounding if we fall through here.  */
+        /* We know that the exact result cannot be represented in double
+           precision.  Find the mid-point between the two nearest
+           representable values, multiply with the divisor, and check if
+           the result is larger than the dividend.  Since we want to know
+	   only the sign bit, it is sufficient to calculate only the
+	   highpart of the lower 64 bits.  */
+	mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo
+	sub.f DBL0L,DBL0L,1
+	asl r12,r9,2 ; u-22.30:2
+	sub.cs DBL0H,DBL0H,1
+	sub.f r12,r12,2
+	mov r10,mlo ; rest before considering r12 in r5 : -r10
+	mulu64 r12,DBL1L ; mhi: u-51.32
+	asl r5,r5,25 ; s-51.7:25
+	lsr r10,r10,7 ; u-51.30:2
+	mov r7,mhi ; u-51.32
+	mulu64 r12,r8 ; mlo: u-51.31:1
+	sub r5,r5,r10
+	add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
+	bset r7,r7,0 ; make sure that the result is not zero, and that
+	sub r5,r5,r7 ; a highpart zero appears negative
+	sub.f r5,r5,mlo ; rest msw
+	add.pl.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.eq DBL0H,DBL0H,1
+
+.Linf_nan_dbl1: ; 0/Inf -> NaN Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
+	or.f 0,r6,DBL0L
+	cmp.ne r6,r9
+	not_s DBL0L,DBL1H
+	sub_s.ne DBL0L,DBL0L,DBL0L
+	tst_s DBL0H,DBL0H
+	add_s DBL0H,DBL1H,DBL0L
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+.Linf_nan_dbl0:
+	tst_s DBL1H,DBL1H
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+	.balign 4
+.Linf_denorm:
+	lsr r12,r6,28
+	brlo.d r12,0xc,.Linf
+.Ldenorm:
+	asr r6,r6,20
+	neg r9,r6
+	mov_s DBL0H,0
+	brhs.d r9,54,.Lret0
+	bxor.mi DBL0H,DBL0H,31
+	add r12,mhi,1
+	and r12,r12,-4
+	rsub r7,r6,5
+	asr r10,r12,28
+	bmsk r4,r12,27
+	min r7,r7,31
+	asr DBL0L,r4,r7
+	add DBL1H,r11,r10
+	abs.f r10,r4
+	sub.mi r10,r10,1
+	add.f r7,r6,32-5
+	asl r4,r4,r7
+	mov.mi r4,r10
+	add.f r10,r6,23
+	rsub r7,r6,9
+	lsr r7,DBL1H,r7
+	asl r10,DBL1H,r10
+	or.pnz DBL0H,DBL0H,r7
+	or.mi r4,r4,r10
+	mov.mi r10,r7
+	add.f DBL0L,r10,DBL0L
+	add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
+	bxor.f 0,r4,31
+	add.pnz.f DBL0L,DBL0L,1
+	add.cs.f DBL0H,DBL0H,1
+	jne_s [blink]
+	/* Calculation so far was not conclusive; calculate further rest.  */
+	mulu64 r11,DBL1L ; rest before considering r12 in r5 : -mlo
+	asr.f r12,r12,3
+	asl r5,r5,25 ; s-51.7:25
+	mov r11,mlo ; rest before considering r12 in r5 : -r11
+	mulu64 r12,r8 ; u-51.31:1
+	and r9,DBL0L,1 ; tie-breaker: round to even
+	lsr r11,r11,7 ; u-51.30:2
+	mov DBL1H,mlo ; u-51.31:1
+	mulu64 r12,DBL1L ; u-51.62:2
+	sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
+	add_s DBL1H,DBL1H,r11
+	sub DBL1H,DBL1H,r5 ; -rest msw
+	add_s DBL1H,DBL1H,mhi ; -rest msw
+	add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
+	tst_s DBL1H,DBL1H
+	cmp.eq mlo,r9
+	add.cs.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.cs DBL0H,DBL0H,1
+
+.Lret0:
+	/* return +- 0 */
+	j_s.d [blink]
+	mov_s DBL0L,0
+.Linf:
+	mov_s DBL0H,r9
+	mov_s DBL0L,0
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+	ENDFUNC(__divdf3)
diff --git a/gcc/config/arc/ieee-754/arc600/divsf3.S b/gcc/config/arc/ieee-754/arc600/divsf3.S
new file mode 100644
index 00000000000..cb226fe1be0
--- /dev/null
+++ b/gcc/config/arc/ieee-754/arc600/divsf3.S
@@ -0,0 +1,275 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+/*
+   - calculate 15..18 bit inverse using a table of approximating polynoms.
+     precision is higher for polynoms used to evaluate input with larger
+     value.
+   - do one newton-raphson iteration step to double the precision,
+     then multiply this with the divisor
+	-> more time to decide if dividend is subnormal
+     - the worst error propagation is on the side of the value range
+       with the least initial defect, thus giving us about 30 bits precision.
+ */
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __divsf3
+	FUNC(__divsf3)
+	.balign 4
+__divsf3:
+	push_s blink
+	push_s r1
+	bl.d __divsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __divsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+#if 1
+	bne abort
+	jeq_s [blink]
+	b abort
+#else
+	bne abort
+	j_s [blink]
+#endif
+	ENDFUNC(__divsf3)
+#define __divsf3 __divsf3_asm
+#endif /* DEBUG */
+
+	FUNC(__divsf3)
+	.balign 4
+.Ldivtab:
+	.long 0xfc0ffff0
+	.long 0xf46ffefd
+	.long 0xed1ffd2a
+	.long 0xe627fa8e
+	.long 0xdf7ff73b
+	.long 0xd917f33b
+	.long 0xd2f7eea3
+	.long 0xcd1fe986
+	.long 0xc77fe3e7
+	.long 0xc21fdddb
+	.long 0xbcefd760
+	.long 0xb7f7d08c
+	.long 0xb32fc960
+	.long 0xae97c1ea
+	.long 0xaa27ba26
+	.long 0xa5e7b22e
+	.long 0xa1cfa9fe
+	.long 0x9ddfa1a0
+	.long 0x9a0f990c
+	.long 0x9667905d
+	.long 0x92df878a
+	.long 0x8f6f7e84
+	.long 0x8c27757e
+	.long 0x88f76c54
+	.long 0x85df630c
+	.long 0x82e759c5
+	.long 0x8007506d
+	.long 0x7d3f470a
+	.long 0x7a8f3da2
+	.long 0x77ef341e
+	.long 0x756f2abe
+	.long 0x72f7212d
+	.long 0x709717ad
+	.long 0x6e4f0e44
+	.long 0x6c1704d6
+	.long 0x69e6fb44
+	.long 0x67cef1d7
+	.long 0x65c6e872
+	.long 0x63cedf18
+	.long 0x61e6d5cd
+	.long 0x6006cc6d
+	.long 0x5e36c323
+	.long 0x5c76b9f3
+	.long 0x5abeb0b7
+	.long 0x5916a79b
+	.long 0x57769e77
+	.long 0x55de954d
+	.long 0x54568c4e
+	.long 0x52d6834d
+	.long 0x51667a7f
+	.long 0x4ffe71b5
+	.long 0x4e9e68f1
+	.long 0x4d466035
+	.long 0x4bf65784
+	.long 0x4aae4ede
+	.long 0x496e4646
+	.long 0x48363dbd
+	.long 0x47063547
+	.long 0x45de2ce5
+	.long 0x44be2498
+	.long 0x43a61c64
+	.long 0x4296144a
+	.long 0x41860c0e
+	.long 0x407e03ee
+.L7f800000:
+	.long 0x7f800000
+	.balign 4
+	.global __divsf3_support
+__divsf3_support:
+.Linf_NaN:
+	bclr.f 0,r0,31 ; 0/0 -> NaN
+	xor_s r0,r0,r1
+	bmsk r1,r0,30
+	bic_s r0,r0,r1
+	sub.eq r0,r0,1
+	j_s.d [blink]
+	or r0,r0,r9
+.Lret0:
+	xor_s r0,r0,r1
+	bmsk r1,r0,30
+	j_s.d [blink]
+	bic_s r0,r0,r1
+/* N.B. the spacing between divtab and the sub3 to get its address must
+   be a multiple of 8.  */
+__divsf3:
+	lsr r2,r1,17
+	sub3 r3,pcl,37 ; (.-.Ldivtab) >> 3
+	bmsk_s r2,r2,5
+	ld.as r5,[r3,r2]
+	asl r4,r1,9
+	ld.as r9,[pcl,-13]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
+	mulu64 r5,r4
+	and.f r11,r1,r9
+	asl r6,r1,8
+	bset r6,r6,31
+	beq.d .Ldenorm_fp1
+	asl r5,r5,13
+	breq.d r11,r9,.Linf_nan_fp1
+	and.f r2,r0,r9
+	sub r7,r5,mhi
+	mulu64 r7,r6
+	beq.d .Ldenorm_fp0
+	asl r12,r0,8
+	breq.d r2,r9,.Linf_nan_fp0
+	mulu64 mhi,r7
+.Lpast_denorm_fp1:
+	bset r3,r12,31
+.Lpast_denorm_fp0:
+	cmp_s r3,r6
+	lsr.cc r3,r3,1
+	add_s r2,r2, /* wait for immediate */ \
+		0x3f000000
+	sub r7,r7,mhi ; u1.31 inverse, about 30 bit
+	mulu64 r3,r7
+	sbc r2,r2,r11
+	xor.f 0,r0,r1
+	and r0,r2,r9
+	bclr r3,r9,23 ; 0x7f000000
+	brhs.d r2,r3,.Linf_denorm
+	bxor.mi r0,r0,31
+.Lpast_denorm:
+	add r3,mhi,0x22 ; round to nearest or higher
+	tst r3,0x3c ; check if rounding was unsafe
+	lsr r3,r3,6
+	jne.d [blink] ; return if rounding was safe.
+	add_s r0,r0,r3
+        /* work out exact rounding if we fall through here.  */
+        /* We know that the exact result cannot be represented in single
+           precision.  Find the mid-point between the two nearest
+           representable values, multiply with the divisor, and check if
+           the result is larger than the dividend.  */
+        add_s r3,r3,r3
+        sub_s r3,r3,1
+        mulu64 r3,r6
+	asr.f 0,r0,1 ; for round-to-even in case this is a denorm
+	rsub r2,r9,25
+        asl_s r12,r12,r2
+        sub.f 0,r12,mlo
+        j_s.d [blink]
+        sub.mi r0,r0,1
+.Linf_nan_fp1:
+	cmp_s r2,r9
+	mov.eq r1,-1
+	tst_s r0,r0
+	mov_s r0,r1
+	j_s.d [blink]
+	bxor.mi r0,r0,31
+.Linf_nan_fp0:
+	tst_s r1,r1
+	j_s.d [blink]
+	bxor.mi r0,r0,31
+	.balign 4
+	.global __divsf3
+/* For denormal results, it is possible that an exact result needs
+   rounding, and thus the round-to-even rule has to come into play.  */
+.Linf_denorm:
+	brlo r2,0xc0000000,.Linf
+.Ldenorm:
+	asr_s r2,r2,23
+	bic r0,r0,r9
+	neg r9,r2
+	brlo.d r9,25,.Lpast_denorm
+	lsr r3,mlo,r9
+	/* Fall through: return +- 0 */
+	j_s [blink]
+.Linf:
+	j_s.d [blink]
+	or r0,r0,r9
+	.balign 4
+.Ldenorm_fp1:
+	bclr r6,r6,31
+	norm.f r12,r6 ; flag for x/0 -> Inf check
+	add r6,r6,r6
+	rsub r5,r12,16
+	ror r5,r1,r5
+	asl r6,r6,r12
+	bmsk r5,r5,5
+	ld.as r5,[r3,r5]
+	add r4,r6,r6
+	; load latency
+	mulu64 r5,r4
+	bic.ne.f 0, \
+		0x60000000,r0 ; large number / denorm -> Inf
+	asl r5,r5,13
+	sub r7,r5,mhi
+	beq.d .Linf_NaN
+	mulu64 r7,r6
+	asl_s r12,r12,23
+	and.f r2,r0,r9
+	add_s r2,r2,r12
+	asl r12,r0,8
+	bne.d .Lpast_denorm_fp1
+.Ldenorm_fp0: mulu64 mhi,r7
+	bclr r12,r12,31
+	norm.f r3,r12 ; flag for 0/x -> 0 check
+	bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
+	beq_s .Lret0
+	asl_s r12,r12,r3
+	asl_s r3,r3,23
+	add_s r12,r12,r12
+	add r11,r11,r3
+	b.d .Lpast_denorm_fp0
+	mov_s r3,r12
+	ENDFUNC(__divsf3)
diff --git a/gcc/config/arc/ieee-754/arc600/muldf3.S b/gcc/config/arc/ieee-754/arc600/muldf3.S
new file mode 100644
index 00000000000..0ff29735faa
--- /dev/null
+++ b/gcc/config/arc/ieee-754/arc600/muldf3.S
@@ -0,0 +1,235 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __muldf3
+	.balign 4
+__muldf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __muldf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __muldf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	and r12,DBL0H,DBL1H
+	bic.f 0,0x7ff80000,r12 ; both NaN -> OK
+	jeq_s [blink]
+	b abort
+#define __muldf3 __muldf3_asm
+#endif /* DEBUG */
+
+__muldf3_support: /* This label makes debugger output saner.  */
+	.balign 4
+.Ldenorm_2:
+	breq.d DBL1L,0,.Lret0_2 ; 0 input -> 0 output
+	norm.f r12,DBL1L
+	mov.mi r12,21
+	add.pl r12,r12,22
+	neg r11,r12
+	asl_s r12,r12,20
+	lsr.f DBL1H,DBL1L,r11
+	ror DBL1L,DBL1L,r11
+	sub_s DBL0H,DBL0H,r12
+	mov.eq DBL1H,DBL1L
+	sub_l DBL1L,DBL1L,DBL1H
+	/* Fall through.  */
+	.global __muldf3
+	.balign 4
+__muldf3:
+	mulu64 DBL0L,DBL1L
+	ld.as r9,[pcl,0x68] ; ((.L7ff00000-.+2)/4)]
+	bmsk r6,DBL0H,19
+	bset r6,r6,20
+	and r11,DBL0H,r9
+	breq.d r11,0,.Ldenorm_dbl0
+	and r12,DBL1H,r9
+	breq.d r12,0,.Ldenorm_dbl1
+	mov r8,mlo
+	mov r4,mhi
+	mulu64 r6,DBL1L
+	breq.d r11,r9,.Linf_nan
+	bmsk r10,DBL1H,19
+	breq.d r12,r9,.Linf_nan
+	bset r10,r10,20
+	add.f r4,r4,mlo
+	adc r5,mhi,0
+	mulu64 r10,DBL0L
+	add_s r12,r12,r11 ; add exponents
+	add.f r4,r4,mlo
+	adc r5,r5,mhi
+	mulu64 r6,r10
+	tst r8,r8
+	bclr r8,r9,30 ; 0x3ff00000
+	bset.ne r4,r4,0 ; put least significant word into sticky bit
+	bclr r6,r9,20 ; 0x7fe00000
+	add.f r5,r5,mlo
+	adc r7,mhi,0 ; fraction product in r7:r5:r4
+	lsr.f r10,r7,9
+	rsub.eq r8,r8,r9 ; 0x40000000
+	sub r12,r12,r8 ; subtract bias + implicit 1
+	brhs.d r12,r6,.Linf_denorm
+	rsub r10,r10,12
+.Lshift_frac:
+	neg r8,r10
+	asl r6,r4,r10
+	lsr DBL0L,r4,r8
+	add.f 0,r6,r6
+	btst.eq DBL0L,0
+	cmp.eq r4,r4 ; round to nearest / round to even
+	asl r4,r5,r10
+	lsr r5,r5,r8
+	adc.f DBL0L,DBL0L,r4
+	xor.f 0,DBL0H,DBL1H
+	asl r7,r7,r10
+	add_s r12,r12,r5
+	adc DBL0H,r12,r7
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+
+/* N.B. This is optimized for ARC700.
+  ARC600 has very different scheduling / instruction selection criteria.  */
+
+/* If one number is denormal, subtract some from the exponent of the other
+   one (if the other exponent is too small, return 0), and normalize the
+   denormal.  Then re-run the computation.  */
+.Lret0_2:
+	lsr_s DBL0H,DBL0H,31
+	asl_s DBL0H,DBL0H,31
+	j_s.d [blink]
+	mov_s DBL0L,0
+	.balign 4
+.Ldenorm_dbl0:
+	mov_s r12,DBL0L
+	mov_s DBL0L,DBL1L
+	mov_s DBL1L,r12
+	mov_s r12,DBL0H
+	mov_s DBL0H,DBL1H
+	mov_s DBL1H,r12
+	and r11,DBL0H,r9
+.Ldenorm_dbl1:
+	brhs r11,r9,.Linf_nan
+	brhs 0x3ca00001,r11,.Lret0
+	sub_s DBL0H,DBL0H,DBL1H
+	bmsk.f DBL1H,DBL1H,30
+	add_s DBL0H,DBL0H,DBL1H
+	beq.d .Ldenorm_2
+	norm r12,DBL1H
+	sub_s r12,r12,10
+	asl r5,r12,20
+	asl_s DBL1H,DBL1H,r12
+	sub DBL0H,DBL0H,r5
+	neg r5,r12
+	lsr r6,DBL1L,r5
+	asl_s DBL1L,DBL1L,r12
+	b.d __muldf3
+	add_s DBL1H,DBL1H,r6
+
+.Lret0:	xor_s DBL0H,DBL0H,DBL1H
+	bclr DBL1H,DBL0H,31
+	xor_s DBL0H,DBL0H,DBL1H
+	j_s.d [blink]
+	mov_s DBL0L,0
+
+	.balign 4
+.Linf_nan:
+	bclr r12,DBL1H,31
+	xor_s DBL1H,DBL1H,DBL0H
+	bclr_s DBL0H,DBL0H,31
+	max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
+	or.f 0,DBL0H,DBL0L
+	mov_s DBL0L,0
+	or.ne.f DBL1L,DBL1L,r12
+	not_s DBL0H,DBL0L ; inf * 0 -> NaN
+	mov.ne DBL0H,r8
+	tst_s DBL1H,DBL1H
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+
+/* We have checked for infinitey / NaN input before, and transformed
+   denormalized inputs into normalized inputs.  Thus, the worst case
+   exponent overflows are:
+       1 +     1 - 0x400 == 0xc02 : maximum underflow
+   0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
+   N.B. 0x7e and 0x7f are also values for overflow.
+
+   If (r12 <= -54), we have an underflow to zero.  */
+	.balign 4
+.Linf_denorm:
+	lsr r6,r12,28
+	brlo.d r6,0xc,.Linf
+	asr r6,r12,20
+	add.f r10,r10,r6
+	brgt.d r10,0,.Lshift_frac
+	mov_s r12,0
+	beq.d .Lround_frac
+	add r10,r10,32
+.Lshift32_frac:
+	tst r4,r4
+	mov r4,r5
+	bset.ne r4,r4,1
+	mov r5,r7
+	brge.d r10,1,.Lshift_frac
+	mov r7,0
+	breq.d r10,0,.Lround_frac
+	add r10,r10,32
+	brgt r10,21,.Lshift32_frac
+	b_s .Lret0
+
+.Lround_frac:
+	add.f 0,r4,r4
+	btst.eq r5,0
+	mov_s DBL0L,r5
+	mov_s DBL0H,r7
+	adc.eq.f DBL0L,DBL0L,0
+	j_s.d [blink]
+	adc.eq DBL0H,DBL0H,0
+
+.Linf:	mov_s DBL0L,0
+	xor.f DBL1H,DBL1H,DBL0H
+	mov_s DBL0H,r9
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+
+	.balign 4
+.L7ff00000:
+	.long 0x7ff00000
diff --git a/gcc/config/arc/ieee-754/arc600/mulsf3.S b/gcc/config/arc/ieee-754/arc600/mulsf3.S
new file mode 100644
index 00000000000..f77c6d97e6d
--- /dev/null
+++ b/gcc/config/arc/ieee-754/arc600/mulsf3.S
@@ -0,0 +1,177 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "../arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __mulsf3
+	FUNC(__mulsf3)
+	.balign 4
+__mulsf3:
+	push_s blink
+	push_s r1
+	bl.d __mulsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __mulsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	and r12,r0,r1
+	bic.f 0,0x7f800000,r12
+	bne 0f
+	bmsk.f 0,r0,22
+	bmsk.ne.f r1,r1,22
+	jne_s [blink] ; both NaN -> OK
+0:	bl abort
+	ENDFUNC(__mulsf3)
+#define __mulsf3 __mulsf3_asm
+#endif /* DEBUG */
+
+	.balign	4
+	.global	__mulsf3
+	FUNC(__mulsf3)
+__mulsf3:
+	ld.as	r9,[pcl,78]; [pcl,((.L7f800000-.+2)/4)]
+	bmsk	r4,r1,22
+	bset	r2,r0,23
+	asl_s	r2,r2,8
+	bset	r3,r4,23
+	mulu64	r2,r3
+	and	r11,r0,r9
+	breq.d	r11,0,.Ldenorm_dbl0
+	and	r12,r1,r9
+	breq.d	r12,0,.Ldenorm_dbl1
+	xor_s	r0,r0,r1
+	breq.d	r11,r9,.Linf_nan_dbl0
+	ld.as	r4,[pcl,68]; [pcl,((.L7fffffff-.+2)/4)]
+	breq.d	r12,r9,.Linf_nan_dbl1
+.Lpast_denorm:
+	asl.f	0,mhi,8
+	mov	r6,mhi
+	mov	r7,mlo
+	add.pl	r6,r6,r6
+	bclr.pl	r6,r6,23
+	add.pl.f r7,r7,r7
+	add.cs	r6,r6,1
+	lsr.f	0,r6,1
+	add_s	r12,r12,r11
+	adc.f	0,r7,r4
+	add_s	r12,r12, \
+		-0x3f800000
+	adc.f	r8,r6,r12
+	tst.pl	r8,r9
+	bic	r0,r0,r4
+	min	r3,r8,r9
+	jpnz.d	[blink]
+	add.pnz	r0,r0,r3
+; infinity or denormal number
+	add.ne.f r3,r3,r3
+	asr_s	r3,r3,23+1
+	bset	r6,r6,23
+	bpnz.d	.Linfinity
+	sub_s	r3,r3,1
+	neg_s	r2,r3
+	brhi.d	r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
+	lsr	r2,r6,r2
+	asl	r9,r6,r3
+	lsr.f	0,r2,1
+	tst	r7,r7
+	add_s	r0,r0,r2
+	bset.ne	r9,r9,0
+	adc.f	0,r9,r4
+	j_s.d	[blink]
+	add.cs	r0,r0,1
+.Linfinity:
+	j_s.d	[blink]
+	add_s	r0,r0,r9
+
+.Lret_r0: j_s [blink]
+
+	.balign	4
+.Ldenorm_dbl0:
+	bclr_s	r2,r2,31
+	norm.f	r4,r2
+	add_s	r2,r2,r2
+	asl	r2,r2,r4
+	mulu64	r2,r3
+	breq.d	r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
+	asl	r4,r4,23
+	sub.ne.f r12,r12,r4
+	ld.as	r4,[pcl,27]; [pcl,((.L7fffffff-.+2)/4)]
+	bhi.d	.Lpast_denorm
+	xor_s	r0,r0,r1
+	bmsk	r1,r0,30
+	j_s.d	[blink]
+	bic_s	r0,r0,r1
+
+	.balign	4
+.Ldenorm_dbl0_inf_nan_dbl1:
+	bmsk.f	0,r0,30
+	mov.eq	r1,-1
+.Linf_nan_dbl1:
+	xor_s	r1,r1,r0
+.Linf_nan_dbl0:
+	bclr_s	r1,r1,31
+	j_s.d	[blink]
+	xor_s	r0,r0,r1
+
+	.balign	4
+.Ldenorm_dbl1:
+	breq.d	r11,r9,.Linf_nan_dbl0_2
+	norm.f	r3,r4
+	sub_s	r3,r3,7
+	asl	r4,r4,r3
+	mulu64	r2,r4
+	sub_s	r3,r3,1
+	asl_s	r3,r3,23
+	sub.ne.f r11,r11,r3
+	ld.as	r4,[pcl,11]; [pcl,((.L7fffffff-.+2)/4)]
+	bhi.d	.Lpast_denorm
+	bmsk	r8,r0,30
+	j_s.d	[blink]
+	bic	r0,r0,r8
+
+	.balign	4
+.Linf_nan_dbl0_2:
+	bclr_s	r1,r1,31
+	xor_s	r0,r0,r1
+	sub.eq	r1,r1,1 ; inf/nan * 0 -> nan
+	bic.f	0,r9,r1
+	j_s.d	[blink]
+	or.eq	r0,r0,r1 ; r1 nan -> result nan
+
+	.balign	4
+.L7f800000:
+	.long	0x7f800000
+.L7fffffff:
+	.long	0x7fffffff
+	ENDFUNC(__mulsf3)
diff --git a/gcc/config/arc/ieee-754/divdf3.S b/gcc/config/arc/ieee-754/divdf3.S
new file mode 100644
index 00000000000..fcfb1e7812d
--- /dev/null
+++ b/gcc/config/arc/ieee-754/divdf3.S
@@ -0,0 +1,419 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+/*
+   to calculate a := b/x as b*y, with y := 1/x:
+   - x is in the range [1..2)
+   - calculate 15..18 bit inverse y0 using a table of approximating polynoms.
+     Precision is higher for polynoms used to evaluate input with larger
+     value.
+   - Do one newton-raphson iteration step to double the precision,
+     then multiply this with the divisor
+	-> more time to decide if dividend is subnormal
+     - the worst error propagation is on the side of the value range
+       with the least initial defect, thus giving us about 30 bits precision.
+      The truncation error for the either is less than 1 + x/2 ulp.
+      A 31 bit inverse can be simply calculated by using x with implicit 1
+      and chaining the multiplies.  For a 32 bit inverse, we multiply y0^2
+      with the bare fraction part of x, then add in y0^2 for the implicit
+      1 of x.
+    - If calculating a 31 bit inverse, the systematic error is less than
+      -1 ulp; likewise, for 32 bit, it is less than -2 ulp.
+    - If we calculate our seed with a 32 bit fraction, we can archive a
+      tentative result strictly better than -2 / +2.5 (1) ulp/128, i.e. we
+      only need to take the step to calculate the 2nd stage rest and
+      rounding adjust 1/32th of the time.  However, if we use a 20 bit
+      fraction for the seed, the negative error can exceed -2 ulp/128, (2)
+      thus for a simple add / tst check, we need to do the 2nd stage
+      rest calculation/ rounding adjust 1/16th of the time.
+      (1): The inexactness of the 32 bit inverse contributes an error in the
+      range of (-1 .. +(1+x/2) ) ulp/128.  Leaving out the low word of the
+      rest contributes an error < +1/x ulp/128 .  In the interval [1,2),
+      x/2 + 1/x <= 1.5 .
+      (2): Unless proven otherwise.  I have not actually looked for an
+      example where -2 ulp/128 is exceeded, and my calculations indicate
+      that the excess, if existent, is less than -1/512 ulp.
+ */
+#include "arc-ieee-754.h"
+
+/* N.B. fp-bit.c does double rounding on denormal numbers.  */
+#if 0 /* DEBUG */
+	.global __divdf3
+	FUNC(__divdf3)
+	.balign 4
+__divdf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __divdf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __divdf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	and r12,DBL0H,DBL1H
+	bic.f 0,0x7ff80000,r12 ; both NaN -> OK
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__divdf3)
+#define __divdf3 __divdf3_asm
+#endif /* DEBUG */
+
+	FUNC(__divdf3)
+__divdf3_support: /* This label makes debugger output saner.  */
+	.balign 4
+.Ldenorm_dbl1:
+	brge r6, \
+		0x43500000,.Linf_NaN ; large number / denorm -> Inf
+	bmsk.f r12,DBL1H,19
+	mov.eq r12,DBL1L
+	mov.eq DBL1L,0
+	sub.eq r7,r7,32
+	norm.f r11,r12 ; flag for x/0 -> Inf check
+	beq_s .Linf_NaN
+	mov.mi r11,0
+	add.pl r11,r11,1
+	add_s r12,r12,r12
+	asl r8,r12,r11
+	rsub r12,r11,31
+	lsr r12,DBL1L,r12
+	tst_s DBL1H,DBL1H
+	or r8,r8,r12
+	lsr r4,r8,26
+	lsr DBL1H,r8,12
+	ld.as r4,[r10,r4]
+	bxor.mi DBL1H,DBL1H,31
+	sub r11,r11,11
+	asl DBL1L,DBL1L,r11
+	sub r11,r11,1
+	mpyhu r5,r4,r8
+	sub r7,r7,r11
+	asl r4,r4,12
+	b.d .Lpast_denorm_dbl1
+	asl r7,r7,20
+	; wb stall
+
+	.balign 4
+.Ldenorm_dbl0:
+	bmsk.f r12,DBL0H,19
+	; wb stall
+	mov.eq r12,DBL0L
+	sub.eq r6,r6,32
+	norm.f r11,r12 ; flag for 0/x -> 0 check
+	brge r7, \
+		0x43500000, .Lret0_NaN ; denorm/large number -> 0
+	beq_s .Lret0_NaN
+	mov.mi r11,0
+	add.pl r11,r11,1
+	asl r12,r12,r11
+	sub r6,r6,r11
+	add.f 0,r6,31
+	lsr r10,DBL0L,r6
+	mov.mi r10,0
+	add r6,r6,11+32
+	neg.f r11,r6
+	asl DBL0L,DBL0L,r11
+	mov.pl DBL0L,0
+	sub r6,r6,32-1
+	b.d .Lpast_denorm_dbl0
+	asl r6,r6,20
+
+.Linf_NaN:
+	tst_s DBL0L,DBL0L ; 0/0 -> NaN
+	xor_s DBL1H,DBL1H,DBL0H
+	bclr.eq.f DBL0H,DBL0H,31
+	bmsk DBL0H,DBL1H,30
+	xor_s DBL0H,DBL0H,DBL1H
+	sub.eq DBL0H,DBL0H,1
+	mov_s DBL0L,0
+	j_s.d [blink]
+	or DBL0H,DBL0H,r9
+	.balign 4
+.Lret0_NaN:
+	xor_s DBL1H,DBL1H,DBL0H
+	cmp_s r12,r9
+	mov_s DBL0L,0
+	bmsk DBL0H,DBL1H,30
+	xor_s DBL0H,DBL0H,DBL1H
+	j_s.d [blink]
+	sub.hi DBL0H,DBL0H,1
+.Linf_nan_dbl1: ; Inf/Inf -> NaN x/Inf-> 0 x/NaN -> NaN
+	not_s DBL0L,DBL1H
+	cmp r6,r9
+	sub_s.ne DBL0L,DBL0L,DBL0L
+	tst_s DBL0H,DBL0H
+	add_s DBL0H,DBL1H,DBL0L
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+.Linf_nan_dbl0:
+	tst_s DBL1H,DBL1H
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+	.balign 4
+	.global __divdf3
+/* N.B. the spacing between divtab and the add3 to get its address must
+   be a multiple of 8.  */
+__divdf3:
+	asl r8,DBL1H,12
+	lsr r12,DBL1L,20
+	lsr r4,r8,26
+	add3 r10,pcl,59 ; (.Ldivtab-.) >> 3
+	ld.as r4,[r10,r4]
+	ld.as r9,[pcl,180]; [pcl,(-((.-.L7ff00000) >> 2))] ; 0x7ff00000
+	or r8,r8,r12
+	mpyhu r5,r4,r8
+	and.f r7,DBL1H,r9
+	asl r4,r4,12 ; having the asl here is a concession to the XMAC pipeline.
+	beq.d .Ldenorm_dbl1
+	and r6,DBL0H,r9
+.Lpast_denorm_dbl1: ; wb stall
+	sub r4,r4,r5
+	mpyhu r5,r4,r4
+	breq.d r6,0,.Ldenorm_dbl0
+	lsr r8,r8,1
+	asl r12,DBL0H,11
+	lsr r10,DBL0L,21
+.Lpast_denorm_dbl0: ; wb stall
+	bset r8,r8,31
+	mpyhu r11,r5,r8
+	add_s r12,r12,r10
+	bset r5,r12,31
+	cmp r5,r8
+	cmp.eq DBL0L,DBL1L
+	; wb stall
+	lsr.cc r5,r5,1
+	sub r4,r4,r11 ; u1.31 inverse, about 30 bit
+	mpyhu r11,r5,r4 ; result fraction highpart
+	breq r7,r9,.Linf_nan_dbl1
+	lsr r8,r8,2 ; u3.29
+	add r5,r6, /* wait for immediate /  XMAC wb stall */ \
+		0x3fe00000
+	; wb stall (not for XMAC)
+	breq r6,r9,.Linf_nan_dbl0
+	mpyu r12,r11,r8 ; u-28.31
+	asl_s DBL1L,DBL1L,9 ; u-29.23:9
+	sbc r6,r5,r7
+	; resource conflict (not for XMAC)
+	mpyhu r5,r11,DBL1L ; u-28.23:9
+	add.cs DBL0L,DBL0L,DBL0L
+	asl_s DBL0L,DBL0L,6 ; u-26.25:7
+	asl r10,r11,23
+	sub_l DBL0L,DBL0L,r12
+	; wb stall (before 'and' for XMAC)
+	lsr r7,r11,9
+	sub r5,DBL0L,r5 ; rest msw ; u-26.31:0
+	mpyh r12,r5,r4 ; result fraction lowpart
+	xor.f 0,DBL0H,DBL1H
+	and DBL0H,r6,r9
+	add_s DBL0H,DBL0H,r7 ; (XMAC wb stall)
+	bxor.mi DBL0H,DBL0H,31
+	brhs r6, /*  wb stall / wait for immediate */ \
+		0x7fe00000,.Linf_denorm
+	add.f r12,r12,0x11
+	asr r9,r12,5
+	sub.mi DBL0H,DBL0H,1
+	add.f DBL0L,r9,r10
+	tst r12,0x1c
+	jne.d [blink]
+	add.cs DBL0H,DBL0H,1
+        /* work out exact rounding if we fall through here.  */
+        /* We know that the exact result cannot be represented in double
+           precision.  Find the mid-point between the two nearest
+           representable values, multiply with the divisor, and check if
+           the result is larger than the dividend.  Since we want to know
+	   only the sign bit, it is sufficient to calculate only the
+	   highpart of the lower 64 bits.  */
+	sub.f DBL0L,DBL0L,1
+	asl r12,r9,2 ; u-22.30:2
+	mpyu r10,r11,DBL1L ; rest before considering r12 in r5 : -r10
+	sub.cs DBL0H,DBL0H,1
+	sub.f r12,r12,2
+	; resource conflict (not for XMAC)
+	mpyhu r7,r12,DBL1L ; u-51.32
+	asl r5,r5,25 ; s-51.7:25
+	lsr r10,r10,7 ; u-51.30:2
+	; resource conflict (not for XMAC)
+	; resource conflict (not for XMAC)
+	mpyu r9,r12,r8 ; u-51.31:1
+	sub r5,r5,r10
+	add.mi r5,r5,DBL1L ; signed multiply adjust for r12*DBL1L
+	bset r7,r7,0 ; make sure that the result is not zero, and that
+	; wb stall (one earlier for XMAC)
+	sub r5,r5,r7 ; a highpart zero appears negative
+	sub.f r5,r5,r9 ; rest msw
+	add.pl.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.eq DBL0H,DBL0H,1
+
+	.balign 4
+.Linf_denorm:
+	brlo r6,0xc0000000,.Linf
+.Ldenorm:
+	asr r6,r6,20
+	neg r9,r6
+	mov_s DBL0H,0
+	brhs.d r9,54,.Lret0
+	bxor.mi DBL0H,DBL0H,31
+	add_l r12,r12,1
+	and r12,r12,-4
+	rsub r7,r6,5
+	asr r10,r12,28
+	bmsk r4,r12,27
+	asrs DBL0L,r4,r7
+	add DBL1H,r11,r10
+	add.f r7,r6,32-5
+	abss r10,r4
+	asl r4,r4,r7
+	mov.mi r4,r10
+	add.f r10,r6,23
+	rsub r7,r6,9
+	lsr r7,DBL1H,r7
+	asl r10,DBL1H,r10
+	or.pnz DBL0H,DBL0H,r7
+	or.mi r4,r4,r10
+	mov.mi r10,r7
+	add.f DBL0L,r10,DBL0L
+	add.cs.f DBL0H,DBL0H,1 ; carry clear after this point
+	bxor.f 0,r4,31
+	add.pnz.f DBL0L,DBL0L,1
+	add.cs.f DBL0H,DBL0H,1
+	jne_l [blink]
+	/* Calculation so far was not conclusive; calculate further rest.  */
+	mpyu r11,r11,DBL1L ; rest before considering r12 in r5 : -r11
+	asr.f r12,r12,3
+	asl r5,r5,25 ; s-51.7:25
+	; resource conflict (not for XMAC)
+	mpyu DBL1H,r12,r8 ; u-51.31:1
+	and r9,DBL0L,1 ; tie-breaker: round to even
+	lsr r11,r11,7 ; u-51.30:2
+	; resource conflict (not for XMAC)
+	mpyhu r8,r12,DBL1L ; u-51.32
+	sub.mi r11,r11,DBL1L ; signed multiply adjust for r12*DBL1L
+	add_s DBL1H,DBL1H,r11
+	; resource conflict (not for XMAC)
+	; resource conflict (not for XMAC)
+	mpyu r12,r12,DBL1L ; u-83.30:2
+	sub DBL1H,DBL1H,r5 ; -rest msw
+	add_s DBL1H,DBL1H,r8 ; -rest msw
+	add.f 0,DBL1H,DBL1H ; can't ror.f by 32 :-(
+	; wb stall (XMAC: Before add.f)
+	tst_s DBL1H,DBL1H
+	cmp.eq r12,r9
+	add.cs.f DBL0L,DBL0L,1
+	j_s.d [blink]
+	add.cs DBL0H,DBL0H,1
+
+.Lret0:
+	/* return +- 0 */
+	j_s.d [blink]
+	mov_s DBL0L,0
+.Linf:
+	mov_s DBL0H,r9
+	mov_s DBL0L,0
+	j_s.d [blink]
+	bxor.mi DBL0H,DBL0H,31
+
+	.balign 4
+.Ldivtab:
+	.long 0xfc0fffe1
+	.long 0xf46ffdfb
+	.long 0xed1ffa54
+	.long 0xe61ff515
+	.long 0xdf7fee75
+	.long 0xd91fe680
+	.long 0xd2ffdd52
+	.long 0xcd1fd30c
+	.long 0xc77fc7cd
+	.long 0xc21fbbb6
+	.long 0xbcefaec0
+	.long 0xb7efa100
+	.long 0xb32f92bf
+	.long 0xae8f83b7
+	.long 0xaa2f7467
+	.long 0xa5ef6479
+	.long 0xa1cf53fa
+	.long 0x9ddf433e
+	.long 0x9a0f3216
+	.long 0x965f2091
+	.long 0x92df0f11
+	.long 0x8f6efd05
+	.long 0x8c1eeacc
+	.long 0x88eed876
+	.long 0x85dec615
+	.long 0x82eeb3b9
+	.long 0x800ea10b
+	.long 0x7d3e8e0f
+	.long 0x7a8e7b3f
+	.long 0x77ee6836
+	.long 0x756e5576
+	.long 0x72fe4293
+	.long 0x709e2f93
+	.long 0x6e4e1c7f
+	.long 0x6c0e095e
+	.long 0x69edf6c5
+	.long 0x67cde3a5
+	.long 0x65cdd125
+	.long 0x63cdbe25
+	.long 0x61ddab3f
+	.long 0x600d991f
+	.long 0x5e3d868c
+	.long 0x5c6d7384
+	.long 0x5abd615f
+	.long 0x590d4ecd
+	.long 0x576d3c83
+	.long 0x55dd2a89
+	.long 0x545d18e9
+	.long 0x52dd06e9
+	.long 0x516cf54e
+	.long 0x4ffce356
+	.long 0x4e9cd1ce
+	.long 0x4d3cbfec
+	.long 0x4becae86
+	.long 0x4aac9da4
+	.long 0x496c8c73
+	.long 0x483c7bd3
+	.long 0x470c6ae8
+	.long 0x45dc59af
+	.long 0x44bc4915
+	.long 0x43ac3924
+	.long 0x428c27fb
+	.long 0x418c187a
+	.long 0x407c07bd
+.L7ff00000:
+	.long 0x7ff00000
+	ENDFUNC(__divdf3)
diff --git a/gcc/config/arc/ieee-754/divsf3-stdmul.S b/gcc/config/arc/ieee-754/divsf3-stdmul.S
new file mode 100644
index 00000000000..3e83e3e66e8
--- /dev/null
+++ b/gcc/config/arc/ieee-754/divsf3-stdmul.S
@@ -0,0 +1,281 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+/*
+   - calculate 15..18 bit inverse using a table of approximating polynoms.
+     precision is higher for polynoms used to evaluate input with larger
+     value.
+   - do one newton-raphson iteration step to double the precision,
+     then multiply this with the divisor
+	-> more time to decide if dividend is subnormal
+     - the worst error propagation is on the side of the value range
+       with the least initial defect, thus giving us about 30 bits precision.
+ */
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __divsf3
+	FUNC(__divsf3)
+	.balign 4
+__divsf3:
+	push_s blink
+	push_s r1
+	bl.d __divsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __divsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+#if 1
+	bne abort
+	jeq_s [blink]
+	b abort
+#else
+	bne abort
+	j_s [blink]
+#endif
+	ENDFUNC(__divsf3)
+#define __divsf3 __divsf3_asm
+#endif /* DEBUG */
+
+	FUNC(__divsf3)
+	.balign 4
+.L7f800000:
+	.long 0x7f800000
+.Ldivtab:
+	.long 0xfc0ffff0
+	.long 0xf46ffefd
+	.long 0xed1ffd2a
+	.long 0xe627fa8e
+	.long 0xdf7ff73b
+	.long 0xd917f33b
+	.long 0xd2f7eea3
+	.long 0xcd1fe986
+	.long 0xc77fe3e7
+	.long 0xc21fdddb
+	.long 0xbcefd760
+	.long 0xb7f7d08c
+	.long 0xb32fc960
+	.long 0xae97c1ea
+	.long 0xaa27ba26
+	.long 0xa5e7b22e
+	.long 0xa1cfa9fe
+	.long 0x9ddfa1a0
+	.long 0x9a0f990c
+	.long 0x9667905d
+	.long 0x92df878a
+	.long 0x8f6f7e84
+	.long 0x8c27757e
+	.long 0x88f76c54
+	.long 0x85df630c
+	.long 0x82e759c5
+	.long 0x8007506d
+	.long 0x7d3f470a
+	.long 0x7a8f3da2
+	.long 0x77ef341e
+	.long 0x756f2abe
+	.long 0x72f7212d
+	.long 0x709717ad
+	.long 0x6e4f0e44
+	.long 0x6c1704d6
+	.long 0x69e6fb44
+	.long 0x67cef1d7
+	.long 0x65c6e872
+	.long 0x63cedf18
+	.long 0x61e6d5cd
+	.long 0x6006cc6d
+	.long 0x5e36c323
+	.long 0x5c76b9f3
+	.long 0x5abeb0b7
+	.long 0x5916a79b
+	.long 0x57769e77
+	.long 0x55de954d
+	.long 0x54568c4e
+	.long 0x52d6834d
+	.long 0x51667a7f
+	.long 0x4ffe71b5
+	.long 0x4e9e68f1
+	.long 0x4d466035
+	.long 0x4bf65784
+	.long 0x4aae4ede
+	.long 0x496e4646
+	.long 0x48363dbd
+	.long 0x47063547
+	.long 0x45de2ce5
+	.long 0x44be2498
+	.long 0x43a61c64
+	.long 0x4296144a
+	.long 0x41860c0e
+	.long 0x407e03ee
+.Ldenorm_fp1:
+	bclr r6,r6,31
+	norm.f r12,r6 ; flag for x/0 -> Inf check
+	add r6,r6,r6
+	rsub r5,r12,16
+	ror r5,r1,r5
+	asl r6,r6,r12
+	bmsk r5,r5,5
+	ld.as r5,[r3,r5]
+	add r4,r6,r6
+	; load latency
+	mpyhu r7,r5,r4
+	bic.ne.f 0, \
+		0x60000000,r0 ; large number / denorm -> Inf
+	beq_s .Linf_NaN
+	asl r5,r5,13
+	; wb stall
+	; slow track
+	sub r7,r5,r7
+	mpyhu r8,r7,r6
+	asl_s r12,r12,23
+	and.f r2,r0,r9
+	add r2,r2,r12
+	asl r12,r0,8
+	; wb stall
+	bne.d .Lpast_denorm_fp1
+.Ldenorm_fp0:
+	mpyhu r8,r8,r7
+	bclr r12,r12,31
+	norm.f r3,r12 ; flag for 0/x -> 0 check
+	bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
+	beq_s .Lret0
+	asl_s r12,r12,r3
+	asl_s r3,r3,23
+	add_s r12,r12,r12
+	add r11,r11,r3
+	b.d .Lpast_denorm_fp0
+	mov_s r3,r12
+	.balign 4
+.Linf_NaN:
+	bclr.f 0,r0,31 ; 0/0 -> NaN
+	xor_s r0,r0,r1
+	bmsk r1,r0,30
+	bic_s r0,r0,r1
+	sub.eq r0,r0,1
+	j_s.d [blink]
+	or r0,r0,r9
+.Lret0:
+	xor_s r0,r0,r1
+	bmsk r1,r0,30
+	j_s.d [blink]
+	bic_s r0,r0,r1
+.Linf_nan_fp1:
+	cmp_s r2,r9
+	mov.eq r1,-1
+	tst_s r0,r0
+	mov_s r0,r1
+	j_s.d [blink]
+	bxor.mi r0,r0,31
+.Linf_nan_fp0:
+	tst_s r1,r1
+	j_s.d [blink]
+	bxor.mi r0,r0,31
+	.balign 4
+	.global __divsf3
+/* N.B. the spacing between divtab and the sub3 to get its address must
+   be a multiple of 8.  */
+__divsf3:
+	lsr r2,r1,17
+	sub3 r3,pcl,55;(.-.Ldivtab) >> 3
+	bmsk_s r2,r2,5
+	ld.as r5,[r3,r2]
+	asl r4,r1,9
+	ld.as r9,[pcl,-114]; [pcl,(-((.-.L7f800000) >> 2))] ; 0x7f800000
+	mpyhu r7,r5,r4
+	asl r6,r1,8
+	and.f r11,r1,r9
+	bset r6,r6,31
+	asl r5,r5,13
+	; wb stall
+	beq .Ldenorm_fp1
+	sub r7,r5,r7
+	mpyhu r8,r7,r6
+	breq.d r11,r9,.Linf_nan_fp1
+	and.f r2,r0,r9
+	beq.d .Ldenorm_fp0
+	asl r12,r0,8
+	; wb stall
+	breq r2,r9,.Linf_nan_fp0
+	mpyhu r8,r8,r7
+.Lpast_denorm_fp1:
+	bset r3,r12,31
+.Lpast_denorm_fp0:
+	cmp_s r3,r6
+	lsr.cc r3,r3,1
+	add_s r2,r2, /* wait for immediate */ \
+	/* wb stall */ \
+		0x3f000000
+	sub r7,r7,r8 ; u1.31 inverse, about 30 bit
+	mpyhu r3,r3,r7
+	sbc r2,r2,r11
+	xor.f 0,r0,r1
+	and r0,r2,r9
+	bxor.mi r0,r0,31
+	brhs r2, /* wb stall / wait for immediate */ \
+		0x7f000000,.Linf_denorm
+.Lpast_denorm:
+	add_s r3,r3,0x22 ; round to nearest or higher
+	tst r3,0x3c ; check if rounding was unsafe
+	lsr r3,r3,6
+	jne.d [blink] ; return if rounding was safe.
+	add_s r0,r0,r3
+        /* work out exact rounding if we fall through here.  */
+        /* We know that the exact result cannot be represented in single
+           precision.  Find the mid-point between the two nearest
+           representable values, multiply with the divisor, and check if
+           the result is larger than the dividend.  */
+        add_s r3,r3,r3
+        sub_s r3,r3,1
+        mpyu r3,r3,r6
+	asr.f 0,r0,1 ; for round-to-even in case this is a denorm
+	rsub r2,r9,25
+        asl_s r12,r12,r2
+	; wb stall
+	; slow track
+        sub.f 0,r12,r3
+        j_s.d [blink]
+        sub.mi r0,r0,1
+/* For denormal results, it is possible that an exact result needs
+   rounding, and thus the round-to-even rule has to come into play.  */
+.Linf_denorm:
+	brlo r2,0xc0000000,.Linf
+.Ldenorm:
+	asr_s r2,r2,23
+	bic r0,r0,r9
+	neg r9,r2
+	brlo.d r9,25,.Lpast_denorm
+	lsr r3,r3,r9
+	/* Fall through: return +- 0 */
+	j_s [blink]
+.Linf:
+	j_s.d [blink]
+	or r0,r0,r9
+	ENDFUNC(__divsf3)
diff --git a/gcc/config/arc/ieee-754/divsf3.S b/gcc/config/arc/ieee-754/divsf3.S
new file mode 100644
index 00000000000..857626b8a8c
--- /dev/null
+++ b/gcc/config/arc/ieee-754/divsf3.S
@@ -0,0 +1,222 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __divsf3
+	FUNC(__divsf3)
+	.balign 4
+__divsf3:
+	push_s blink
+	push_s r1
+	bl.d __divsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __divsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+#if 1
+	bne abort
+	jeq_s [blink]
+	b abort
+#else
+	bne abort
+	j_s [blink]
+#endif
+	ENDFUNC(__divsf3)
+#define __divsf3 __divsf3_asm
+#endif /* DEBUG */
+
+	.balign 4
+__divdf3_support: /* This label makes debugger output saner.  */
+	FUNC(__divsf3)
+.Ldenorm_fp0:
+	norm.f r12,r2 ; flag for 0/x -> 0 check
+	bic.ne.f 0,0x60000000,r1 ; denorm/large number -> 0
+	beq_s .Lret0_NaN
+	tst r1,r9
+	add_s r2,r2,r2
+	sub_s r12,r12,8
+	asl_s r2,r2,r12
+	asl_l r12,r12,23
+	bne.d .Lpast_denorm_fp0
+	add r5,r5,r12
+/* r0 is subnormal, r1 is subnormal or 0.  */
+
+	.balign 4
+.Ldenorm_fp1:
+	norm.f r12,r3 ; flag for  x/0 -> Inf check
+	bic.ne.f 0,0x60000000,r0 ; large number/denorm -> Inf
+	beq_s .Linf
+	add_s r3,r3,r3
+	sub_s r12,r12,8
+	asl_s r3,r3,r12
+	asl_s r12,r12,23
+	b.d .Lpast_denorm_fp1
+	add r4,r4,r12
+
+.Lret0_NaN:
+	bclr.f 0,r1,31 ; 0/0 -> NaN
+	bic r0,r10,r9
+	j_s.d [blink]
+	sub.eq r0,r0,1
+
+	.global __divsf3
+	.balign 4
+        .long 0x7f800000 ; exponent mask
+__divsf3:
+	ld r9,[pcl,-4]
+	bmsk r2,r0,22
+	xor r4,r0,r2
+	bmsk r3,r1,22
+	xor r5,r1,r3
+	and r11,r0,r9
+	breq.d r11,0,.Ldenorm_fp0
+	xor r10,r4,r5
+	breq r11,r9,.Linf_nan_fp0
+	bset_s r2,r2,23
+	and r11,r1,r9
+	breq r11,0,.Ldenorm_fp1
+	breq r11,r9,.Linf_nan_fp1
+.Lpast_denorm_fp0:
+	bset_s r3,r3,23
+.Lpast_denorm_fp1:
+	cmp r2,r3
+	asl_s r2,r2,6+1
+	asl_s r3,r3,7
+	add.lo r2,r2,r2
+	bclr r8,r9,30 ; exponent bias
+	bclr.lo r8,r8,23 ; reduce exp by one if fraction is shifted
+	sub r4,r4,r5
+	add r4,r4,r8
+	xor.f 0,r10,r4
+	bmi .Linf_denorm
+	and r12,r4,r9
+	breq r12,0,.Ldenorm
+	sub_s r2,r2,r3 ; discard implicit 1
+.Ldiv_23bit:
+	.rep 6
+	divaw r2,r2,r3
+	.endr
+	breq r12,r9,.Linf
+	bmsk r0,r2,6
+	xor_s r2,r2,r0
+.Ldiv_17bit:
+	.rep 7
+	divaw r2,r2,r3
+	.endr
+	asl_s r0,r0,7
+	bmsk r1,r2,6
+	xor_s r2,r2,r1
+	or_s r0,r0,r1
+.Ldiv_10bit:
+	.rep 7
+	divaw r2,r2,r3
+	.endr
+	asl_s r0,r0,7
+	bmsk r1,r2,6
+	xor_s r2,r2,r1
+	or_s r0,r0,r1
+.Ldiv_3bit:
+	.rep 3
+	divaw r2,r2,r3
+	.endr
+	asl_s r0,r0,3
+.Ldiv_0bit:
+	divaw r1,r2,r3
+	bmsk_s r2,r2,2
+	tst r1,-0x7e ; 0xffffff82, test for rest or odd
+	bmsk_s r1,r1,0
+	add_s r0,r0,r2 ; assemble fraction
+	add_s r0,r0,r4 ; add in sign & exponent
+	j_s.d [blink]
+	add.ne r0,r0,r1 ; round to nearest / even
+
+	.balign 4
+.Linf_nan_fp0:
+	bic.f 0,r9,r1 ; fp1 Inf -> result NaN
+	bic r1,r5,r9  ; fp1 sign
+	sub.eq r1,r1,1
+	j_s.d [blink]
+	xor_s r0,r0,r1
+.Linf_nan_fp1:
+	bic r0,r4,r9 ; fp0 sign
+	j_s.d [blink]
+	xor_s r0,r0,r1
+.Linf:
+	j_s.d [blink]
+	or r0,r10,r9
+
+.Lret_r4:
+	j_s.d [blink]
+	mov_s r0,r4
+	.balign 4
+.Linf_denorm:
+	add.f r12,r4,r4
+	asr_l r12,r12,24
+	bpl .Linf
+	max r12,r12,-24
+.Ldenorm:
+	add r1,pcl,42; .Ldenorm_tab-.
+	ldb_s r12,[r12,r1]
+	mov_s r0,0
+	lsr_s r2,r2
+	sub_s r1,r1,r12
+	j_s.d [r1]
+	bic r4,r10,r9
+	.byte .Ldenorm_tab-.Lret_r4
+	.byte .Ldenorm_tab-.Ldiv_0bit
+	.byte .Ldenorm_tab-.Ldiv_3bit-8
+	.byte .Ldenorm_tab-.Ldiv_3bit-4
+	.byte .Ldenorm_tab-.Ldiv_3bit
+	.byte .Ldenorm_tab-.Ldiv_10bit-24
+	.byte .Ldenorm_tab-.Ldiv_10bit-20
+	.byte .Ldenorm_tab-.Ldiv_10bit-16
+	.byte .Ldenorm_tab-.Ldiv_10bit-12
+	.byte .Ldenorm_tab-.Ldiv_10bit-8
+	.byte .Ldenorm_tab-.Ldiv_10bit-4
+	.byte .Ldenorm_tab-.Ldiv_10bit
+	.byte .Ldenorm_tab-.Ldiv_17bit-24
+	.byte .Ldenorm_tab-.Ldiv_17bit-20
+	.byte .Ldenorm_tab-.Ldiv_17bit-16
+	.byte .Ldenorm_tab-.Ldiv_17bit-12
+	.byte .Ldenorm_tab-.Ldiv_17bit-8
+	.byte .Ldenorm_tab-.Ldiv_17bit-4
+	.byte .Ldenorm_tab-.Ldiv_17bit
+	.byte .Ldenorm_tab-.Ldiv_23bit-20
+	.byte .Ldenorm_tab-.Ldiv_23bit-16
+	.byte .Ldenorm_tab-.Ldiv_23bit-12
+	.byte .Ldenorm_tab-.Ldiv_23bit-8
+	.byte .Ldenorm_tab-.Ldiv_23bit-4
+.Ldenorm_tab:
+	.byte .Ldenorm_tab-.Ldiv_23bit
+	ENDFUNC(__divsf3)
diff --git a/gcc/config/arc/ieee-754/divtab-arc-df.c b/gcc/config/arc/ieee-754/divtab-arc-df.c
new file mode 100644
index 00000000000..74c00ae3b4e
--- /dev/null
+++ b/gcc/config/arc/ieee-754/divtab-arc-df.c
@@ -0,0 +1,164 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+/* We use a polynom similar to a Tchebycheff polynom to get an initial
+   seed, and then use a newton-raphson iteration step to get an
+   approximate result
+   If this result can't be rounded to the exact result with confidence, we
+   round to the value between the two closest representable values, and
+   test if the correctly rounded value is above or below this value.
+ 
+   Because of the Newton-raphson iteration step, an error in the seed at X
+   is amplified by X.  Therefore, we don't want a Tchebycheff polynom
+   or a polynom that is close to optimal according to the maximum norm
+   on the errro of the seed value; we want one that is close to optimal
+   according to the maximum norm on the error of the result, i.e. we
+   want the maxima of the polynom to increase linearily.
+   Given an interval [X0,X2) over which to approximate,
+   with X1 := (X0+X2)/2,  D := X1-X0, F := 1/D, and S := D/X1 we have,
+   like for Tchebycheff polynoms:
+   P(0) := 1
+   but then we have:
+   P(1) := X + S*D
+   P(2) := 2 * X^2 + S*D * X - D^2
+   Then again:
+   P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
+ */
+
+static long double merr = 42.;
+
+double
+err (long double a0, long double a1, long double x)
+{
+  long double y0 = a0 + (x-1)*a1;
+
+  long double approx = 2. * y0 - y0 * x * y0;
+  long double true = 1./x;
+  long double err = approx - true;
+
+  if (err <= -1./65536./16384.)
+    printf ("ERROR EXCEEDS 1 ULP %.15f %.15f %.15f\n",
+	    (double)x, (double)approx, (double)true);
+  if (merr > err)
+    merr = err;
+  return err;
+}
+
+int
+main (void)
+{
+  long double T[5]; /* Taylor polynom */
+  long double P[5][5];
+  int i, j;
+  long double X0, X1, X2, S;
+  long double inc = 1./64;
+  long double D = inc*0.5;
+  long i0, i1, i2, io;
+
+  memset (P, 0, sizeof (P));
+  P[0][0] = 1.;
+  for (i = 1; i < 5; i++)
+    P[i][i] = 1 << i-1;
+  P[2][0] = -D*D;
+  for (X0 = 1.; X0 < 2.; X0 += inc)
+    {
+      X1 = X0 + inc * 0.5;
+      X2 = X0 + inc;
+      S = D / X1;
+      T[0] = 1./X1;
+      for (i = 1; i < 5; i++)
+	T[i] = T[i-1] * -T[0];
+#if 0
+      printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
+(double)T[3], (double)T[4]);
+#endif
+      P[1][0] = S*D;
+      P[2][1] = S*D;
+      for (i = 3; i < 5; i++)
+	{
+	  P[i][0] = -D*D*P[i-2][0];
+	  for (j = 1; j < i; j++)
+	    P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
+	}
+#if 0
+      printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
+(double)P[3][3], (double)P[3][4]);
+      printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
+(double)P[4][3], (double)P[4][4]);
+#endif
+      for (i = 4; i > 1; i--)
+	{
+	  long double a = T[i]/P[i][i];
+
+	  for (j = 0; j < i; j++)
+	    T[j] -= a * P[i][j];
+	}
+#if 0
+      printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
+#endif
+#if 0
+      i2 = T[2]*1024;
+      long double a = (T[2]-i/1024.)/P[2][2];
+      for (j = 0; j < 2; j++)
+	T[j] -= a * P[2][j];
+#else
+      i2 = 0;
+#endif
+	  long double T0, Ti1;
+      for (i = 0, i0 = 0; i < 4; i++)
+	{
+
+	  i1 = T[1]*4096. + i0 / (long double)(1 << 20) - 0.5;
+	  i1 = - (-i1 & 0x0fff);
+	  Ti1 = ((unsigned)(-i1 << 20) | i0) /-(long double)(1LL<<32LL);
+	  T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
+	  i0 = T0 * 1024 * 1024 + 0.5;
+	  i0 &= 0xfffff;
+	}
+#if 0
+      printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
+#endif
+      io = (unsigned)(-i1 << 20) | i0;
+      long double A1 = (unsigned)io/-65536./65536.;
+      long double A0 =  (unsigned)(io << 12)/65536./65536.;
+      long double Xm0 = 1./sqrt (-A1);
+      long double Xm1 = 0.5+0.5*-A0/A1;
+#if 0
+      printf ("%f %f %f %f\n", (double)A0, (double)A1, (double) Ti1, (double)X0);
+      printf ("%.12f %.12f %.12f\n",
+	      err (A0, A1, X0), err (A0, A1, X1), err (A0, A1, X2));
+      printf ("%.12f %.12f\n", (double)Xm0, (double)Xm1);
+      printf ("%.12f %.12f\n", err (A0, A1, Xm0), err (A0, A1, Xm1));
+#endif
+      printf ("\t.long 0x%x\n", io);
+   }
+#if 0
+  printf ("maximum error: %.15f %x %f\n", (double)merr, (unsigned)(long long)(-merr * 65536 * 65536), (double)log(-merr)/log(2));
+#endif
+  return 0;
+}
diff --git a/gcc/config/arc/ieee-754/divtab-arc-sf.c b/gcc/config/arc/ieee-754/divtab-arc-sf.c
new file mode 100644
index 00000000000..482b4db19fb
--- /dev/null
+++ b/gcc/config/arc/ieee-754/divtab-arc-sf.c
@@ -0,0 +1,130 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+/* We use a polynom similar to a Tchebycheff polynom to get an initial
+   seed, and then use a newton-raphson iteration step to get an
+   approximate result
+   If this result can't be rounded to the exact result with confidence, we
+   round to the value between the two closest representable values, and
+   test if the correctly rounded value is above or below this value.
+ 
+   Because of the Newton-raphson iteration step, an error in the seed at X
+   is amplified by X.  Therefore, we don't want a Tchebycheff polynom
+   or a polynom that is close to optimal according to the maximum norm
+   on the errro of the seed value; we want one that is close to optimal
+   according to the maximum norm on the error of the result, i.e. we
+   want the maxima of the polynom to increase linearily.
+   Given an interval [X0,X2) over which to approximate,
+   with X1 := (X0+X2)/2,  D := X1-X0, F := 1/D, and S := D/X1 we have,
+   like for Tchebycheff polynoms:
+   P(0) := 1
+   but then we have:
+   P(1) := X + S*D
+   P(2) := 2 * X^2 + S*D * X - D^2
+   Then again:
+   P(n+1) := 2 * X * P(n) - D^2 * P (n-1)
+ */
+
+int
+main (void)
+{
+  long double T[5]; /* Taylor polynom */
+  long double P[5][5];
+  int i, j;
+  long double X0, X1, X2, S;
+  long double inc = 1./64;
+  long double D = inc*0.5;
+  long i0, i1, i2;
+
+  memset (P, 0, sizeof (P));
+  P[0][0] = 1.;
+  for (i = 1; i < 5; i++)
+    P[i][i] = 1 << i-1;
+  P[2][0] = -D*D;
+  for (X0 = 1.; X0 < 2.; X0 += inc)
+    {
+      X1 = X0 + inc * 0.5;
+      X2 = X1 + inc;
+      S = D / X1;
+      T[0] = 1./X1;
+      for (i = 1; i < 5; i++)
+	T[i] = T[i-1] * -T[0];
+#if 0
+      printf ("T %1.8f %f %f %f %f\n", (double)T[0], (double)T[1], (double)T[2],
+(double)T[3], (double)T[4]);
+#endif
+      P[1][0] = S*D;
+      P[2][1] = S*D;
+      for (i = 3; i < 5; i++)
+	{
+	  P[i][0] = -D*D*P[i-2][0];
+	  for (j = 1; j < i; j++)
+	    P[i][j] = 2*P[i-1][j-1]-D*D*P[i-2][j];
+	}
+#if 0
+      printf ("P3 %1.8f %f %f %f %f\n", (double)P[3][0], (double)P[3][1], (double)P[3][2],
+(double)P[3][3], (double)P[3][4]);
+      printf ("P4 %1.8f %f %f %f %f\n", (double)P[4][0], (double)P[4][1], (double)P[4][2],
+(double)P[4][3], (double)P[4][4]);
+#endif
+      for (i = 4; i > 1; i--)
+	{
+	  long double a = T[i]/P[i][i];
+
+	  for (j = 0; j < i; j++)
+	    T[j] -= a * P[i][j];
+	}
+#if 0
+      printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
+#endif
+#if 0
+      i2 = T[2]*512;
+      long double a = (T[2]-i/512.)/P[2][2];
+      for (j = 0; j < 2; j++)
+	T[j] -= a * P[2][j];
+#else
+      i2 = 0;
+#endif
+      for (i = 0, i0 = 0; i < 4; i++)
+	{
+	  long double T0, Ti1;
+
+	  i1 = T[1]*8192. + i0 / (long double)(1 << 19) - 0.5;
+	  i1 = - (-i1 & 0x1fff);
+	  Ti1 = ((unsigned)(-i1 << 19) | i0) /-(long double)(1LL<<32LL);
+	  T0 = T[0] - (T[1]-Ti1)/P[1][1] * P[1][0] - (X1 - 1) * Ti1;
+	  i0 = T0 * 512 * 1024 + 0.5;
+	  i0 &= 0x7ffff;
+	}
+#if 0
+      printf ("A %1.8f %f %f\n", (double)T[0], (double)T[1], (double)T[2]);
+#endif
+      printf ("\t.long 0x%x\n", (-i1 << 19) | i0);
+   }
+  return 0;
+}
diff --git a/gcc/config/arc/ieee-754/eqdf2.S b/gcc/config/arc/ieee-754/eqdf2.S
new file mode 100644
index 00000000000..fe7d684f5f8
--- /dev/null
+++ b/gcc/config/arc/ieee-754/eqdf2.S
@@ -0,0 +1,79 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+/* inputs: DBL0, DBL1
+   output: z flag
+   clobber: r12, flags
+   For NaNs, bit 19.. bit 30 of the high word must be set.  */
+#if 0 /* DEBUG */
+	.global __eqdf2
+	.balign 4
+	FUNC(__eqdf2)
+__eqdf2:
+	st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
+	st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
+	st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
+	bl.d __eqdf2_c` push_s r0
+	mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
+	ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
+	ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
+	bl.d __eqdf2_asm` ld.ab r10,[sp,4]
+	pop_s blink
+	breq.d r11,0,0f
+	ld.ab r11,[sp,4]
+	jne_s [blink]
+	bl abort
+0:	jeq_s [blink]
+	bl abort
+	ENDFUNC(__eqdf2)
+#define __eqdf2 __eqdf2_asm
+#endif /* DEBUG */
+	.global __eqdf2
+	.balign 4
+	HIDDEN_FUNC(__eqdf2)
+	/* Good performance as long as the difference in high word is
+	   well predictable (as seen from the branch predictor).  */
+__eqdf2:
+	brne.d DBL0H,DBL1H,.Lhighdiff
+	bmsk r12,DBL0H,20
+#ifdef DPFP_COMPAT
+	or.f 0,DBL0L,DBL1L
+	bset.ne r12,r12,21
+#endif /* DPFP_COMPAT */
+	add1.f	r12,r12,DBL0H /* set c iff NaN; also, clear z if NaN.  */
+	j_s.d	[blink]
+	cmp.cc	DBL0L,DBL1L
+	.balign 4
+.Lhighdiff:
+	or	r12,DBL0H,DBL1H
+	or.f	0,DBL0L,DBL1L
+	j_s.d	[blink]
+	bmsk.eq.f r12,r12,30
+	ENDFUNC(__eqdf2)
+/* ??? could we do better by speeding up some 'common' case of inequality?  */
diff --git a/gcc/config/arc/ieee-754/eqsf2.S b/gcc/config/arc/ieee-754/eqsf2.S
new file mode 100644
index 00000000000..e1be9c225e8
--- /dev/null
+++ b/gcc/config/arc/ieee-754/eqsf2.S
@@ -0,0 +1,72 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+/* inputs: r0, r1
+   output: z flag
+   clobber: r12, flags
+   For NaNs, bit 22 .. bit 30 must be set.  */
+#if 0 /* DEBUG */
+	.global __eqsf2
+	.balign 4
+	FUNC(__eqsf2)
+__eqsf2:
+	st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
+	st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
+	st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
+	bl.d __eqsf2_c` push_s r0
+	mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
+	ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
+	ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
+	bl.d __eqsf2_asm` ld.ab r10,[sp,4]
+	pop_s blink
+	breq.d r11,0,0f
+	ld.ab r11,[sp,4]
+	jne_s [blink]
+	bl abort
+0:	jeq_s [blink]
+	bl abort
+	ENDFUNC(__eqsf2)
+#define __eqsf2 __eqsf2_asm
+#endif /* DEBUG */
+	/* Good performance as long as the binary difference is
+	   well predictable (as seen from the branch predictor).  */
+	.global __eqsf2
+	.balign 4
+	HIDDEN_FUNC(__eqsf2)
+__eqsf2:
+	breq	r0, r1,.Lno_bdiff
+	or	r12,r0,r1
+	j_s.d	[blink]
+	bmsk.f	0,r12,30
+.Lno_bdiff:
+	bmsk r12,r0,23
+	add1.f	r12,r12,r0 /* set c iff NaN; also, clear z if NaN.  */
+	j_s.d	[blink]
+	cmp.cc	r0,r1
+	ENDFUNC(__eqsf2)
diff --git a/gcc/config/arc/ieee-754/extendsfdf2.S b/gcc/config/arc/ieee-754/extendsfdf2.S
new file mode 100644
index 00000000000..b634012205a
--- /dev/null
+++ b/gcc/config/arc/ieee-754/extendsfdf2.S
@@ -0,0 +1,125 @@
+/* Copyright 2006, 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __extendsfdf2
+	.balign 4
+	FUNC(__extendsfdf2)
+__extendsfdf2:
+	push_s blink
+	bl.d __extendsfdf2_c
+	push_s r0
+	ld_s r2,[sp]
+	st_s r1,[sp]
+	push_s r0
+	bl.d __extendsfdf2_asm
+	mov_s r0,r2
+	pop_s r2
+	pop_s r3
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__extendsfdf2)
+#define __extendsfdf2 __extendsfdf2_asm
+#endif /* DEBUG */
+#if 0 /* ARC600 */
+__extendsfdf2:
+	lsr r2,r0,23
+	tst r2,0xff
+	bic.ne.f r2,0xff
+	beq_s .Linf_nan_denorm_0
+..
+.Linf_nan_denorm:
+	bbit1 r0,30,.Linf_nan
+#endif
+	.global __extendsfdf2
+	.balign 4
+	FUNC(__extendsfdf2)
+__extendsfdf2:
+	add.f r1,r0,r0
+	norm r3,r1
+#ifdef __LITTLE_ENDIAN__
+	lsr_s DBL0H,r1,4
+	brhs r3,7,.Linf_nan_denorm_0
+	asl_s DBL0L,r0,29
+	add_s DBL0H,DBL0H, \
+		0x38000000
+#else
+	lsr r2,r1,4
+	brhs r3,7,.Linf_nan_denorm_0
+	asl_s DBL0L,r1,28
+	add DBL0H,r2, \
+		0x38000000
+#endif
+	j_s.d [blink]
+	bxor.cs DBL0H,DBL0H,31
+	.balign 4
+.Linf_nan_denorm_0:
+#ifdef __LITTLE_ENDIAN__
+	mov_s DBL0H,r0
+	jeq.d [blink]
+	mov.eq DBL0L,0
+#else
+	jeq_s [blink]
+#endif
+	bmi .Linf_nan
+	asl_s r0,r0,r3
+	rsub r3,r3,0x380+6
+#ifdef __LITTLE_ENDIAN__
+	asl_s r3,r3,20
+	lsr DBL0H,r0,9
+	asl_s DBL0L,r0,23
+	add_s DBL0H,DBL0H,r3
+	j_s.d [blink]
+	bxor.cs DBL0H,DBL0H,31
+#else
+	asl DBL0L,r0,23
+	lsr_s DBL0H,r0,9
+	asl_s r3,r3,20
+	bxor.cs DBL0H,DBL0H,31
+	j_s.d [blink]
+	add_l DBL0H,DBL0H,r3
+#endif
+.Linf_nan:
+#ifdef __LITTLE_ENDIAN__
+	lsr DBL0H,r0,3
+
+	or_s DBL0H,DBL0H,r0
+	j_s.d [blink]
+	mov_l DBL0L,0
+#else
+	lsr r3,r0,3
+	mov_s DBL0L,0
+	j_s.d [blink]
+	or_l DBL0H,r0,r3
+#endif
+	ENDFUNC(__extendsfdf2)
diff --git a/gcc/config/arc/ieee-754/fixdfsi.S b/gcc/config/arc/ieee-754/fixdfsi.S
new file mode 100644
index 00000000000..0132688639d
--- /dev/null
+++ b/gcc/config/arc/ieee-754/fixdfsi.S
@@ -0,0 +1,88 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	FUNC(__fixdfsi)
+	.global __fixdfsi
+	.balign 4
+__fixdfsi:
+	push_s blink
+	push_s r0
+	bl.d __fixdfsi_c
+	push_s r1
+	mov_s r2,r0
+	pop_s r1
+	ld r0,[sp]
+	bl.d __fixdfsi_asm
+	st r2,[sp]
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__fixdfsi)
+#define __fixdfsi __fixdfsi_asm
+#endif /* DEBUG */
+
+/*      If the fraction has to be shifted left by a positive non-zero amount,
+        we have to combine bits from DBL0L and DBL0H.  If we shift right,
+        or shift by zero, we only want to have the bits from DBL0H in r0.  */
+
+	.global __fixdfsi
+	FUNC(__fixdfsi)
+	.balign 4
+__fixdfsi:
+	bbit0 DBL0H,30,.Lret0or1
+	asr r2,DBL0H,20
+	bmsk_s DBL0H,DBL0H,19
+	sub_s r2,r2,19; 0x3ff+20-0x400
+	neg_s r3,r2
+	asr.f 0,r3,11
+	bset_s DBL0H,DBL0H,20
+#ifdef __LITTLE_ENDIAN__
+	mov.cs DBL0L,DBL0H
+	asl DBL0H,DBL0H,r2
+#else
+	asl.cc DBL0H,DBL0H,r2
+	lsr.cs DBL0H,DBL0H,r3
+#endif
+	lsr_s DBL0L,DBL0L,r3
+
+	add.cc r0,r0,r1
+	j_s.d [blink]
+	neg.pl r0,r0
+.Lret0or1:
+	add.f r0,DBL0H,0x100000
+	lsr_s r0,r0,30
+
+	bmsk_s r0,r0,0
+	j_s.d [blink]
+	neg.mi r0,r0
+	ENDFUNC(__fixdfsi)
diff --git a/gcc/config/arc/ieee-754/fixsfsi.S b/gcc/config/arc/ieee-754/fixsfsi.S
new file mode 100644
index 00000000000..9704be1f916
--- /dev/null
+++ b/gcc/config/arc/ieee-754/fixsfsi.S
@@ -0,0 +1,74 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __fixsfsi
+	FUNC(__fixsfsi)
+	.balign 4
+__fixsfsi:
+	push_s blink
+	bl.d __fixsfsi_c
+	push_s r0
+	ld_s r1,[sp]
+	st_s r0,[sp]
+	bl.d __fixsfsi_asm
+	mov_s r0,r1
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__fixsfsi)
+#define __fixsfsi __fixsfsi_asm
+#endif /* DEBUG */
+
+	.global __fixsfsi
+	FUNC(__fixsfsi)
+	.balign 4
+__fixsfsi:
+	bbit0 r0,30,.Lret0or1
+	lsr r2,r0,23
+	bmsk_s r0,r0,22
+	bset_s r0,r0,23
+	sub_s r2,r2,22;0x7f+23-0x80
+	asl.f 0,r2,24
+	neg r3,r2
+	asl.mi r0,r0,r2
+	lsr.pl r0,r0,r3
+	j_s.d [blink]
+	neg.cs r0,r0
+.Lret0or1:
+	add.f r0,r0,0x800000
+	lsr_s r0,r0,30
+
+	bmsk_s r0,r0,0
+	j_s.d [blink]
+	neg.mi r0,r0
+	ENDFUNC(__fixsfsi)
diff --git a/gcc/config/arc/ieee-754/fixunsdfsi.S b/gcc/config/arc/ieee-754/fixunsdfsi.S
new file mode 100644
index 00000000000..4b9f7e8342f
--- /dev/null
+++ b/gcc/config/arc/ieee-754/fixunsdfsi.S
@@ -0,0 +1,83 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	FUNC(__fixunsdfsi)
+	.global __fixunsdfsi
+	.balign 4
+__fixunsdfsi:
+	push_s blink
+	push_s r0
+	bl.d __fixunsdfsi_c
+	push_s r1
+	mov_s r2,r0
+	pop_s r1
+	ld r0,[sp]
+	bl.d __fixunsdfsi_asm
+	st r2,[sp]
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__fixunsdfsi)
+#define __fixunsdfsi __fixunsdfsi_asm
+#endif /* DEBUG */
+
+	.global __fixunsdfsi
+	FUNC(__fixunsdfsi)
+	.balign 4
+__fixunsdfsi:
+	bbit0 DBL0H,30,.Lret0or1
+	lsr r2,DBL0H,20
+	bmsk_s DBL0H,DBL0H,19
+	sub_s r2,r2,19; 0x3ff+20-0x400
+	neg_s r3,r2
+	btst_s r3,10
+	bset_s DBL0H,DBL0H,20
+#ifdef __LITTLE_ENDIAN__
+	mov.ne DBL0L,DBL0H
+	asl DBL0H,DBL0H,r2
+#else
+	asl.eq DBL0H,DBL0H,r2
+	lsr.ne DBL0H,DBL0H,r3
+#endif
+	lsr DBL0L,DBL0L,r3
+	j_s.d [blink]
+	add.eq r0,r0,r1
+.Lret0:
+	j_s.d [blink]
+	mov_l r0,0
+.Lret0or1:
+	add_s DBL0H,DBL0H,0x100000
+	lsr_s DBL0H,DBL0H,30
+	j_s.d [blink]
+	bmsk_l r0,DBL0H,0
+	ENDFUNC(__fixunsdfsi)
diff --git a/gcc/config/arc/ieee-754/floatsidf.S b/gcc/config/arc/ieee-754/floatsidf.S
new file mode 100644
index 00000000000..994965fe404
--- /dev/null
+++ b/gcc/config/arc/ieee-754/floatsidf.S
@@ -0,0 +1,80 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __floatsidf
+	.balign 4
+	FUNC(__floatsidf)
+__floatsidf:
+	push_s blink
+	bl.d __floatsidf_c
+	push_s r0
+	ld_s r2,[sp]
+	st_s r1,[sp]
+	push_s r0
+	bl.d __floatsidf_asm
+	mov_s r0,r2
+	pop_s r2
+	pop_s r3
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__floatsidf)
+#define __floatsidf __floatsidf_asm
+#endif /* DEBUG */
+
+	.global __floatsidf
+	.balign 4
+	FUNC(__floatsidf)
+__floatsidf:
+	abs.f r1,r0
+	jeq_s [blink]
+	lsr r2,r1
+	mov r12,-0x41d ; -(0x3ff+31-1)
+	norm r2,r2
+	bclr.cs r12,r12,11
+	rsub.f r3,r2,11
+	add_s r12,r2,r12
+	add_s r2,r2,21
+#ifdef __LITTLE_ENDIAN__
+	asl DBL0L,r1,r2
+	lsr_s DBL0H,r1,r3
+#else
+	lsr DBL0H,r1,r3
+	asl_s DBL0L,r1,r2
+#endif
+	asl_s r12,r12,20
+	mov.lo DBL0H,DBL0L
+	sub_s DBL0H,DBL0H,r12
+	j_s.d [blink]
+	mov.ls DBL0L,0
+	ENDFUNC(__floatsidf)
diff --git a/gcc/config/arc/ieee-754/floatsisf.S b/gcc/config/arc/ieee-754/floatsisf.S
new file mode 100644
index 00000000000..d8b32f2ad60
--- /dev/null
+++ b/gcc/config/arc/ieee-754/floatsisf.S
@@ -0,0 +1,102 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __floatsisf
+	FUNC(__floatsisf)
+	.balign 4
+__floatsisf:
+	push_s blink
+	bl.d __floatsisf_c
+	push_s r0
+	ld_s r1,[sp]
+	st_s r0,[sp]
+	bl.d __floatsisf_asm
+	mov_s r0,r1
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__floatsisf)
+	.global __floatunsisf
+	FUNC(__floatunsisf)
+	.balign 4
+__floatunsisf:
+	push_s blink
+	bl.d __floatunsisf_c
+	push_s r0
+	ld_s r1,[sp]
+	st_s r0,[sp]
+	bl.d __floatunsisf_asm
+	mov_s r0,r1
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__floatunsisf)
+#define __floatsisf __floatsisf_asm
+#define __floatunsisf __floatunsisf_asm
+#endif /* DEBUG */
+
+	.global __floatunsisf
+	.global __floatsisf
+	FUNC(__floatsisf)
+	FUNC(__floatunsisf)
+	.balign 4
+__floatunsisf:
+	lsr_s r2,r0
+	mov_l r12,0x9d ; 0x7f + 31 - 1
+	norm r2,r2
+	brne_l r0,0,0f
+	j_s [blink]
+	.balign 4
+__floatsisf:
+	abs.f r0,r0
+	jeq_s [blink]
+	lsr_s r2,r0
+	mov_s r12,0x9d ; 0x7f + 31 - 1
+	norm r2,r2
+	bset.cs r12,r12,8
+0:	rsub.f r3,r2,8
+	bmsk r1,r0,r3
+	ror r1,r1,r3
+	lsr.pl r0,r0,r3
+	neg_s r3,r3
+	asl.mi r0,r0,r3
+	sub_s r12,r12,r2
+	asl_s r12,r12,23
+	bxor.pl.f r1,r1,31
+	add_s r0,r0,r12
+	j_s.d [blink]
+	add.pnz r0,r0,1
+	ENDFUNC(__floatunsisf)
+	ENDFUNC(__floatsisf)
diff --git a/gcc/config/arc/ieee-754/floatunsidf.S b/gcc/config/arc/ieee-754/floatunsidf.S
new file mode 100644
index 00000000000..17cdf7699ad
--- /dev/null
+++ b/gcc/config/arc/ieee-754/floatunsidf.S
@@ -0,0 +1,78 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __floatunsidf
+	.balign 4
+	FUNC(__floatunsidf)
+__floatunsidf:
+	push_s blink
+	bl.d __floatunsidf_c
+	push_s r0
+	ld_s r2,[sp]
+	st_s r1,[sp]
+	push_s r0
+	bl.d __floatunsidf_asm
+	mov_s r0,r2
+	pop_s r2
+	pop_s r3
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	bl abort
+	ENDFUNC(__floatunsidf)
+#define __floatunsidf __floatunsidf_asm
+#endif /* DEBUG */
+
+	.global __floatunsidf
+	.balign 4
+	FUNC(__floatunsidf)
+__floatunsidf:
+	lsr_s r1,r0
+	breq_s r0,0,.Lret0
+	norm r2,r1
+	mov r12,-0x41d ; -(0x3ff+31-1)
+	rsub.f r3,r2,11
+	add_s r12,r2,r12
+	add_s r2,r2,21
+#ifdef __LITTLE_ENDIAN__
+	lsr DBL0H,r0,r3
+	asl_s DBL0L,r0,r2
+#else
+	asl DBL0L,r0,r2
+	lsr_s DBL0H,r0,r3
+#endif
+	asl_s r12,r12,20
+	mov.lo DBL0H,DBL0L
+	sub_s DBL0H,DBL0H,r12
+.Lret0:	j_s.d [blink]
+	mov.ls DBL0L,0
+	ENDFUNC(__floatunsidf)
diff --git a/gcc/config/arc/ieee-754/gedf2.S b/gcc/config/arc/ieee-754/gedf2.S
new file mode 100644
index 00000000000..3640d74a549
--- /dev/null
+++ b/gcc/config/arc/ieee-754/gedf2.S
@@ -0,0 +1,89 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+/* inputs: DBL0, DBL1
+   output: c flags to be used for 'hs' condition
+   clobber: r12, flags  */
+/* For NaNs, bit 19.. bit 30 of the high word must be set.  */
+#if 0 /* DEBUG */
+	.global __gedf2
+	.balign 4
+	FUNC(__gedf2)
+__gedf2:
+	st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
+	st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
+	st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
+	bl.d __gedf2_c` push_s r0
+	mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
+	ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
+	ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
+	bl.d __gedf2_asm` ld.ab r10,[sp,4]
+	pop_s blink
+	brge.d r11,0,0f
+	ld.ab r11,[sp,4]
+	jlo [blink]
+	bl abort
+0:	jhs [blink]
+	bl abort
+	ENDFUNC(__gedf2)
+#define __gedf2 __gedf2_asm
+#endif /* DEBUG */
+	.global __gedf2
+	.balign	4
+	HIDDEN_FUNC(__gedf2)
+__gedf2:
+	or.f	r12,DBL0H,DBL1H
+	bmi.d	.Lneg
+	bmsk_s	r12,r12,20
+	add1.f	0,r12,DBL0H	; clear z; set c iff NaN
+	add1.cc.f r12,r12,DBL1H	; clear z; set c iff NaN
+	bbit1	DBL0H,31,.Lneg
+	cmp.cc	DBL0H,DBL1H
+	j_s.d	[blink]
+	cmp.eq	DBL0L,DBL1L
+	.balign 4
+.Lneg:	breq.d	DBL1H,0,.L0
+	add1.f	0,r12,DBL0H
+	add1.cc.f r12,r12,DBL1H
+	cmp.cc	DBL1H,DBL0H
+	j_s.d	[blink]
+	cmp.eq	DBL1L,DBL0L
+	.balign 4
+.L0:
+	bxor.f	0,DBL0H,31 ; check for high word of -0.
+	beq_s	.Lcheck_0
+	cmp.cc	DBL1H,DBL0H
+	j_s.d	[blink]
+	cmp.eq	DBL1L,DBL0L
+.Lcheck_0:
+	; high words suggest DBL0 may be -0, DBL1 +0; check low words.
+	cmp_s	DBL1H,DBL0L
+	j_s.d	[blink]
+	cmp.cc	DBL1H,DBL1L
+	ENDFUNC(__gedf2)
diff --git a/gcc/config/arc/ieee-754/gesf2.S b/gcc/config/arc/ieee-754/gesf2.S
new file mode 100644
index 00000000000..560039ae48e
--- /dev/null
+++ b/gcc/config/arc/ieee-754/gesf2.S
@@ -0,0 +1,78 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+/* inputs: r0, r1
+   output: c flag to be used for 'hs' condition
+   clobber: r12,flags  */
+/* For NaNs, bit 22.. bit 30 must be set.  */
+#if 0 /* DEBUG */
+	.global __gesf2
+	.balign 4
+	FUNC(__gesf2)
+__gesf2:
+	st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
+	st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
+	st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
+	bl.d __gesf2_c` push_s r0
+	mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
+	ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
+	ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
+	bl.d __gesf2_asm` ld.ab r10,[sp,4]
+	pop_s blink
+	brge.d r11,0,0f
+	ld.ab r11,[sp,4]
+	jlo [blink]
+	bl abort
+0:	jhs [blink]
+	bl abort
+	ENDFUNC(__gesf2)
+#define __gesf2 __gesf2_asm
+#endif /* DEBUG */
+	.global __gesf2
+	.balign	4
+	HIDDEN_FUNC(__gesf2)
+__gesf2:
+	or.f	r12,r0,r1
+	bmi.d	.Lneg
+	bmsk_s	r12,r12,23
+	add1.f	0,r12,r0 ; check for NaN
+	add1.cc.f r12,r12,r1
+	j_s.d	[blink]
+	cmp.cc	r0,r1
+	.balign	4
+.Lneg:	breq.d	r1,0,.L0
+	add1.f	0,r12,r0 ; check for NaN
+	add1.cc.f r12,r12,r1
+	j_s.d	[blink]
+	cmp.cc r1,r0
+	.balign	4
+.L0:	bxor.f	0,r0,31 ; check for -0
+	j_s.d	[blink]
+	cmp.hi	r1,r0
+	ENDFUNC(__gesf2)
diff --git a/gcc/config/arc/ieee-754/gtdf2.S b/gcc/config/arc/ieee-754/gtdf2.S
new file mode 100644
index 00000000000..1f161076703
--- /dev/null
+++ b/gcc/config/arc/ieee-754/gtdf2.S
@@ -0,0 +1,89 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+/* inputs: DBL0, DBL1
+   output: c,z flags to be used for 'hi' condition
+   clobber: r12, flags  */
+/* For NaNs, bit 19.. bit 30 of the high word must be set.  */
+#if 0 /* DEBUG */
+	.global __gtdf2
+	.balign 4
+	FUNC(__gtdf2)
+__gtdf2:
+	st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
+	st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
+	st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
+	bl.d __gtdf2_c` push_s r0
+	mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
+	ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
+	ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
+	bl.d __gtdf2_asm` ld.ab r10,[sp,4]
+	pop_s blink
+	brgt.d r11,0,0f
+	ld.ab r11,[sp,4]
+	jls [blink]
+	bl abort
+0:	jhi [blink]
+	bl abort
+	ENDFUNC(__gtdf2)
+#define __gtdf2 __gtdf2_asm
+#endif /* DEBUG */
+	.global __gtdf2
+	.balign	4
+	HIDDEN_FUNC(__gtdf2)
+__gtdf2:
+	or.f	r12,DBL0H,DBL1H
+	bmi.d	.Lneg
+	bmsk_s	r12,r12,20
+	add1.f	0,r12,DBL0H	; clear z; set c iff NaN
+	add1.cc.f r12,r12,DBL1H	; clear z; set c iff NaN
+	bbit1	DBL0H,31,.Lneg
+	cmp.cc	DBL0H,DBL1H
+	j_s.d	[blink]
+	cmp.eq	DBL0L,DBL1L
+	.balign 4
+.Lneg:	breq.d	DBL0H,0,.L0
+	add1.f	0,r12,DBL1H
+	add1.cc.f r12,r12,DBL0H
+	cmp.cc	DBL1H,DBL0H
+	j_s.d	[blink]
+	cmp.eq	DBL1L,DBL0L
+	.balign 4
+.L0:
+	bxor.f	0,DBL1H,31
+	beq_s	.Lcheck_0
+	cmp.cc	DBL1H,DBL0H
+	j_s.d	[blink]
+	cmp.eq	DBL1L,DBL0L
+	.balign	4
+.Lcheck_0:
+	; high words suggest DBL0 may be +0, DBL1 -0; check low words.
+	j_s.d	[blink]
+	or.f	0,DBL0L,DBL1L
+	ENDFUNC(__gtdf2)
diff --git a/gcc/config/arc/ieee-754/gtsf2.S b/gcc/config/arc/ieee-754/gtsf2.S
new file mode 100644
index 00000000000..c3d65f3220a
--- /dev/null
+++ b/gcc/config/arc/ieee-754/gtsf2.S
@@ -0,0 +1,78 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+/* inputs: r0, r1
+   output: c, z flags to be used for 'hi' condition
+   clobber: r12,flags  */
+/* For NaNs, bit 22.. bit 30 must be set.  */
+#if 0 /* DEBUG */
+	.global __gtsf2
+	.balign 4
+	FUNC(__gtsf2)
+__gtsf2:
+	st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
+	st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
+	st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
+	bl.d __gtsf2_c` push_s r0
+	mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
+	ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
+	ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
+	bl.d __gtsf2_asm` ld.ab r10,[sp,4]
+	pop_s blink
+	brgt.d r11,0,0f
+	ld.ab r11,[sp,4]
+	jls [blink]
+	bl abort
+0:	jhi [blink]
+	bl abort
+	ENDFUNC(__gtsf2)
+#define __gtsf2 __gtsf2_asm
+#endif /* DEBUG */
+	.global __gtsf2
+	.balign	4
+	HIDDEN_FUNC(__gtsf2)
+__gtsf2:
+	or.f	r12,r0,r1
+	bmi.d	.Lneg
+	bmsk_s	r12,r12,23
+	add1.f	0,r12,r0 ; check for NaN
+	add1.cc.f r12,r12,r1
+	j_s.d	[blink]
+	cmp.cc	r0,r1
+	.balign	4
+.Lneg:	breq.d	r0,0,.L0
+	add1.f	0,r12,r0 ; check for NaN
+	add1.cc.f r12,r12,r1
+	j_s.d	[blink]
+	cmp.cc r1,r0
+	.balign	4
+.L0:	bxor.f	0,r1,31 ; check for -0
+	j_s.d	[blink]
+	cmp.hi	r1,r0
+	ENDFUNC(__gtsf2)
diff --git a/gcc/config/arc/ieee-754/muldf3.S b/gcc/config/arc/ieee-754/muldf3.S
new file mode 100644
index 00000000000..ff1310f0876
--- /dev/null
+++ b/gcc/config/arc/ieee-754/muldf3.S
@@ -0,0 +1,238 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+/* XMAC schedule: directly back-to-back multiplies stall; the third
+   instruction after a multiply stalls unless it is also a multiply.  */
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __muldf3
+	.balign 4
+__muldf3:
+	push_s blink
+	push_s r2
+	push_s r3
+	push_s r0
+	bl.d __muldf3_c
+	push_s r1
+	ld_s r2,[sp,12]
+	ld_s r3,[sp,8]
+	st_s r0,[sp,12]
+	st_s r1,[sp,8]
+	pop_s r1
+	bl.d __muldf3_asm
+	pop_s r0
+	pop_s r3
+	pop_s r2
+	pop_s blink
+	cmp r0,r2
+	cmp.eq r1,r3
+	jeq_s [blink]
+	b abort
+#define __muldf3 __muldf3_asm
+#endif /* DEBUG */
+/* N.B. This is optimized for ARC700.
+  ARC600 has very different scheduling / instruction selection criteria.  */
+/* For the standard multiplier, instead of mpyu rx,DBL0L,DBL1L; tst rx,rx  ,
+   we can do:
+   sub rx,DBL0L,1; bic rx,DBL0L,rx; lsr rx,rx; norm rx,rx; asl.f 0,DBL1L,rx  */
+
+__muldf3_support: /* This label makes debugger output saner.  */
+/* If one number is denormal, subtract some from the exponent of the other
+   one (if the other exponent is too small, return 0), and normalize the
+   denormal.  Then re-run the computation.  */
+	.balign 4
+	FUNC(__muldf3)
+.Ldenorm_dbl0:
+	mov_s r12,DBL0L
+	mov_s DBL0L,DBL1L
+	mov_s DBL1L,r12
+	mov_s r12,DBL0H
+	mov_s DBL0H,DBL1H
+	mov_s DBL1H,r12
+	and r11,DBL0H,r9
+.Ldenorm_dbl1:
+	brhs r11,r9,.Linf_nan
+	brhs 0x3ca00001,r11,.Lret0
+	sub_s DBL0H,DBL0H,DBL1H
+	bmsk_s DBL1H,DBL1H,30
+	add_s DBL0H,DBL0H,DBL1H
+	breq_s DBL1H,0,.Ldenorm_2
+	norm r12,DBL1H
+
+	sub_s r12,r12,10
+	asl r5,r12,20
+	asl_s DBL1H,DBL1H,r12
+	sub DBL0H,DBL0H,r5
+	neg r5,r12
+	lsr r6,DBL1L,r5
+	asl_s DBL1L,DBL1L,r12
+	b.d __muldf3
+	add_s DBL1H,DBL1H,r6
+
+	.balign 4
+.Linf_nan:
+	bclr r12,DBL1H,31
+	xor_s DBL1H,DBL1H,DBL0H
+	bclr_s DBL0H,DBL0H,31
+	max r8,DBL0H,r12 ; either NaN -> NaN ; otherwise inf
+	or.f 0,DBL0H,DBL0L
+	mov_s DBL0L,0
+	or.ne.f DBL1L,DBL1L,r12
+	not_s DBL0H,DBL0L ; inf * 0 -> NaN
+	mov.ne DBL0H,r8
+	tst_s DBL1H,DBL1H
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+
+.Lret0:	xor_s DBL0H,DBL0H,DBL1H
+	bclr DBL1H,DBL0H,31
+	xor_s DBL0H,DBL0H,DBL1H
+	j_s.d [blink]
+	mov_l DBL0L,0
+
+	.balign 4
+.Ldenorm_2:
+	breq_s DBL1L,0,.Lret0 ; 0 input -> 0 output
+	norm.f r12,DBL1L
+
+	mov.mi r12,21
+	add.pl r12,r12,22
+	neg r11,r12
+	asl_s r12,r12,20
+	lsr.f DBL1H,DBL1L,r11
+	ror DBL1L,DBL1L,r11
+	sub_s DBL0H,DBL0H,r12
+	mov.eq DBL1H,DBL1L
+	sub_s DBL1L,DBL1L,DBL1H
+	/* Fall through.  */
+	.global __muldf3
+	.balign 4
+__muldf3:
+	ld.as r9,[pcl,0x4b] ; ((.L7ff00000-.+2)/4)]
+	mpyhu r4,DBL0L,DBL1L
+	bmsk r6,DBL0H,19
+	bset r6,r6,20
+	mpyu r7,r6,DBL1L
+	and r11,DBL0H,r9
+	breq r11,0,.Ldenorm_dbl0
+	mpyhu r8,r6,DBL1L
+	bmsk r10,DBL1H,19
+	bset r10,r10,20
+	mpyhu r5,r10,DBL0L
+	add.f r4,r4,r7
+	and r12,DBL1H,r9
+	mpyhu r7,r6,r10
+	breq r12,0,.Ldenorm_dbl1
+	adc.f r5,r5,r8
+	mpyu r8,r10,DBL0L
+	breq r11,r9,.Linf_nan
+	breq r12,r9,.Linf_nan
+	mpyu r6,r6,r10
+	add.cs r7,r7,1
+	add.f r4,r4,r8
+	mpyu r10,DBL1L,DBL0L
+	bclr r8,r9,30 ; 0x3ff00000
+	adc.f r5,r5,r6
+	; XMAC write-back stall / std. mult stall is one cycle later
+	bclr r6,r9,20 ; 0x7fe00000
+	add.cs r7,r7,1 ; fraction product in r7:r5:r4
+	tst r10,r10
+	bset.ne r4,r4,0 ; put least significant word into sticky bit
+	lsr.f r10,r7,9
+	add_l r12,r12,r11 ; add exponents
+	rsub.eq r8,r8,r9 ; 0x40000000
+	sub r12,r12,r8 ; subtract bias + implicit 1
+	brhs.d r12,r6,.Linf_denorm
+	rsub r10,r10,12
+.Lshift_frac:
+	neg r8,r10
+	asl r6,r4,r10
+	lsr DBL0L,r4,r8
+	add.f 0,r6,r6
+	btst.eq DBL0L,0
+	cmp.eq r4,r4 ; round to nearest / round to even
+	asl r4,r5,r10
+	lsr r5,r5,r8
+	adc.f DBL0L,DBL0L,r4
+	xor.f 0,DBL0H,DBL1H
+	asl r7,r7,r10
+	add_s r12,r12,r5
+	adc DBL0H,r12,r7
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+
+/* We have checked for infinitey / NaN input before, and transformed
+   denormalized inputs into normalized inputs.  Thus, the worst case
+   exponent overflows are:
+       1 +     1 - 0x400 == 0xc02 : maximum underflow
+   0x7fe + 0x7fe - 0x3ff == 0xbfd ; maximum overflow
+   N.B. 0x7e and 0x7f are also values for overflow.
+
+   If (r12 <= -54), we have an underflow to zero.  */
+	.balign 4
+.Linf_denorm:
+	brlo r12,0xc0000000,.Linf
+	asr r6,r12,20
+	mov_s r12,0
+	add.f r10,r10,r6
+	brgt r10,0,.Lshift_frac
+	beq_s .Lround_frac
+	add.f r10,r10,32
+.Lshift32_frac:
+	tst r4,r4
+	mov r4,r5
+	bset.ne r4,r4,1
+	mov r5,r7
+	mov r7,0
+	brge r10,1,.Lshift_frac
+	breq r10,0,.Lround_frac
+	add.f r10,r10,32
+	brgt r10,21,.Lshift32_frac
+	b_s .Lret0
+
+.Lround_frac:
+	add.f 0,r4,r4
+	btst.eq r5,0
+	mov_s DBL0L,r5
+	mov_s DBL0H,r7
+	adc.eq.f DBL0L,DBL0L,0
+	j_s.d [blink]
+
+	adc.eq DBL0H,DBL0H,0
+
+.Linf:	xor.f DBL1H,DBL1H,DBL0H
+	mov_s DBL0L,0
+	mov_s DBL0H,r9
+	j_s.d [blink]
+	bset.mi DBL0H,DBL0H,31
+	ENDFUNC(__muldf3)
+
+	.balign 4
+.L7ff00000:
+	.long 0x7ff00000
diff --git a/gcc/config/arc/ieee-754/mulsf3.S b/gcc/config/arc/ieee-754/mulsf3.S
new file mode 100644
index 00000000000..f51934a9288
--- /dev/null
+++ b/gcc/config/arc/ieee-754/mulsf3.S
@@ -0,0 +1,179 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+/* XMAC schedule: directly back-to-back multiplies stall; the third
+   instruction after a multiply stalls unless it is also a multiply.  */
+#include "arc-ieee-754.h"
+
+#if 0 /* DEBUG */
+	.global __mulsf3
+	FUNC(__mulsf3)
+	.balign 4
+__mulsf3:
+	push_s blink
+	push_s r1
+	bl.d __mulsf3_c
+	push_s r0
+	ld_s r1,[sp,4]
+	st_s r0,[sp,4]
+	bl.d __mulsf3_asm
+	pop_s r0
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	and r12,r0,r1
+	bic.f 0,0x7f800000,r12
+	bne 0f
+	bmsk.f 0,r0,22
+	bmsk.ne.f r1,r1,22
+	jne_s [blink] ; both NaN -> OK
+0:	bl abort
+	ENDFUNC(__mulsf3)
+#define __mulsf3 __mulsf3_asm
+#endif /* DEBUG */
+
+	.balign	4
+	.global	__mulsf3
+	FUNC(__mulsf3)
+__mulsf3:
+	ld.as	r9,[pcl,81]; [pcl,((.L7f800000-.+2)/4)]
+	bmsk	r4,r1,22
+	bset	r2,r0,23
+	asl_s	r2,r2,8
+	bset	r3,r4,23
+	mpyhu	r6,r2,r3
+	and	r11,r0,r9
+	breq	r11,0,.Ldenorm_dbl0
+	mpyu	r7,r2,r3
+	breq	r11,r9,.Linf_nan_dbl0
+	and	r12,r1,r9
+	asl.f	0,r6,8
+	breq	r12,0,.Ldenorm_dbl1
+.Lpast_denorm:
+	xor_s	r0,r0,r1
+.Lpast_denorm_dbl1:
+	add.pl	r6,r6,r6
+	bclr.pl	r6,r6,23
+	add.pl.f r7,r7,r7
+	ld.as	r4,[pcl,66]; [pcl,((.L7fffffff-.+2)/4)]
+	add.cs	r6,r6,1
+	lsr.f	0,r6,1
+	breq	r12,r9,.Linf_nan_dbl1
+	add_s	r12,r12,r11
+	adc.f	0,r7,r4
+	add_s	r12,r12, \
+		-0x3f800000
+	adc.f	r8,r6,r12
+	bic	r0,r0,r4
+	tst.pl	r8,r9
+	min	r3,r8,r9
+	jpnz.d	[blink]
+	add.pnz	r0,r0,r3
+; infinity or denormal number
+	add.ne.f r3,r3,r3
+	bpnz	.Linfinity
+	asr_s	r3,r3,23+1
+	bset	r6,r6,23
+	sub_s	r3,r3,1
+	neg_s	r2,r3
+	brhi	r2,24,.Lret_r0 ; right shift shift > 24 -> return +-0
+	lsr	r2,r6,r2
+	asl	r9,r6,r3
+	lsr.f	0,r2,1
+	tst	r7,r7
+	add_s	r0,r0,r2
+	bset.ne	r9,r9,0
+	adc.f	0,r9,r4
+	j_s.d	[blink]
+	add.cs	r0,r0,1
+.Linfinity:
+	j_s.d	[blink]
+	add_s	r0,r0,r9
+
+.Lret_r0: j_s [blink]
+
+	.balign	4
+.Linf_nan_dbl0:
+	add.f	0,r1,r1 ; set c to sign; set z if +-zero
+	sub.eq	r1,r1,1 ; inf/nan * 0 -> nan
+	bxor.cs	r0,r0,31 ; calculate result sign
+	bic.f	0,r9,r1
+	j_s.d	[blink]
+	or.eq	r0,r0,r1 ; r1 nan -> result nan
+.Ldenorm_dbl0_inf_nan_dbl1:
+	bmsk.f	0,r0,30
+	mov.eq	r1,-1
+.Linf_nan_dbl1:
+	xor_s	r1,r1,r0
+	bclr_s	r1,r1,31
+	j_s.d	[blink]
+	xor_l	r0,r0,r1
+
+	.balign	4
+.Ldenorm_dbl0:
+	bclr_s	r2,r2,31
+	norm.f	r4,r2
+	and	r12,r1,r9
+	add_s	r2,r2,r2
+	asl	r2,r2,r4
+	asl	r4,r4,23
+	mpyhu	r6,r2,r3
+	breq	r12,r9,.Ldenorm_dbl0_inf_nan_dbl1
+	sub.ne.f r12,r12,r4
+	mpyu	r7,r2,r3
+	bhi.d	.Lpast_denorm
+	asl.f	0,r6,8
+	xor_s	r0,r0,r1
+	bmsk	r1,r0,30
+	j_s.d	[blink]
+	bic_l	r0,r0,r1
+
+	.balign	4
+.Ldenorm_dbl1:
+	norm.f	r3,r4
+	xor_s	r0,r0,r1
+	sub_s	r3,r3,7
+	asl	r4,r4,r3
+	sub_s	r3,r3,1
+	asl_s	r3,r3,23
+	mpyhu	r6,r2,r4
+	sub.ne.f r11,r11,r3
+	bmsk	r8,r0,30
+	mpyu	r7,r2,r4
+	bhi.d	.Lpast_denorm_dbl1
+	asl.f	0,r6,8
+	j_s.d	[blink]
+	bic	r0,r0,r8
+
+	.balign	4
+.L7f800000:
+	.long	0x7f800000
+.L7fffffff:
+	.long	0x7fffffff
+	ENDFUNC(__mulsf3)
diff --git a/gcc/config/arc/ieee-754/orddf2.S b/gcc/config/arc/ieee-754/orddf2.S
new file mode 100644
index 00000000000..fb2f7d5dcb9
--- /dev/null
+++ b/gcc/config/arc/ieee-754/orddf2.S
@@ -0,0 +1,66 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+/* inputs: r0, r1
+   output: c flag
+   clobber: r12, flags
+   For NaNs, bit 19 .. bit 30 must be set.  */
+#if 0 /* DEBUG */
+	.global __orddf2
+	.balign 4
+	FUNC(__orddf2)
+__orddf2:
+	st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
+	st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
+	st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
+	bl.d __unorddf2_c` push_s r0
+	mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
+	ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
+	ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
+	bl.d __orddf2_asm` ld.ab r10,[sp,4]
+	pop_s blink
+	brne.d r11,0,0f
+	ld.ab r11,[sp,4]
+	jcc [blink]
+	bl abort
+0:	jcs [blink]
+	bl abort
+	ENDFUNC(__orddf2)
+#define __orddf2 __orddf2_asm
+#endif /* DEBUG */
+	.global __orddf2
+	.balign 4
+	HIDDEN_FUNC(__orddf2)
+__orddf2:
+	bmsk r12,DBL0H,20
+	add1.f	r12,r12,DBL0H /* clear z; set c if NaN.  */
+	bmsk r12,DBL1H,20
+	j_s.d	[blink]
+	add1.cc.f r12,r12,DBL1H /* clear z; set c if NaN.  */
+	ENDFUNC(__orddf2)
diff --git a/gcc/config/arc/ieee-754/ordsf2.S b/gcc/config/arc/ieee-754/ordsf2.S
new file mode 100644
index 00000000000..776eb3f4624
--- /dev/null
+++ b/gcc/config/arc/ieee-754/ordsf2.S
@@ -0,0 +1,66 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+/* inputs: r0, r1
+   output: c flag
+   clobber: r12, flags
+   For NaNs, bit 22 .. bit 30 must be set.  */
+#if 0 /* DEBUG */
+	.global __ordsf2
+	.balign 4
+	FUNC(__ordsf2)
+__ordsf2:
+	st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
+	st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
+	st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
+	bl.d __unordsf2_c` push_s r0
+	mov r11,r0` pop_s r0` pop_s r1` pop_s r2` pop_s r3
+	ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
+	ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
+	bl.d __ordsf2_asm` ld.ab r10,[sp,4]
+	pop_s blink
+	brne.d r11,0,0f
+	ld.ab r11,[sp,4]
+	jcc [blink]
+	bl abort
+0:	jcs [blink]
+	bl abort
+	ENDFUNC(__ordsf2)
+#define __ordsf2 __ordsf2_asm
+#endif /* DEBUG */
+	.global __ordsf2
+	.balign 4
+	HIDDEN_FUNC(__ordsf2)
+__ordsf2:
+	bmsk r12,r0,23
+	add1.f	r12,r12,r0 /* clear z; set c if NaN.  */
+	bmsk r12,r1,23
+	j_s.d	[blink]
+	add1.cc.f r12,r12,r1 /* clear z; set c if NaN.  */
+	ENDFUNC(__ordsf2)
diff --git a/gcc/config/arc/ieee-754/truncdfsf2.S b/gcc/config/arc/ieee-754/truncdfsf2.S
new file mode 100644
index 00000000000..286680ecd61
--- /dev/null
+++ b/gcc/config/arc/ieee-754/truncdfsf2.S
@@ -0,0 +1,131 @@
+/* Copyright 2006, 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+
+/* NaNs must have a non-zero high fraction part.  */
+
+#if 0 /* DEBUG */
+	FUNC(__truncdfsf2)
+	.global __truncdfsf2
+	.balign 4
+__truncdfsf2:
+	push_s blink
+	push_s r0
+	bl.d __truncdfsf2_c
+	push_s r1
+	mov_s r2,r0
+	pop_s r1
+	ld r0,[sp]
+	bl.d __truncdfsf2_asm
+	st r2,[sp]
+	pop_s r1
+	pop_s blink
+	cmp r0,r1
+	jeq_s [blink]
+	and r12,r0,r1
+	bic.f 0,0x7f800000,r12
+	bne 0f
+	bmsk.f 0,r0,22
+	bmsk.ne.f r1,r1,22
+	jne_s [blink] ; both NaN -> OK
+0:	bl abort
+	ENDFUNC(__truncdfsf2)
+#define __truncdfsf2 __truncdfsf2_asm
+#endif /* DEBUG */
+
+	.global __truncdfsf2
+	.balign 4
+	FUNC(__truncdfsf2)
+__truncdfsf2:
+	lsr r2,DBL0H,20
+	asl_s DBL0H,DBL0H,12
+	sub r12,r2,0x380
+	tst r12,0x700
+	bne .Lill_exp
+	tst r12,0xff
+	beq_l .Ldenorm0
+	asl_s r12,r12,23
+	tst DBL0L, \
+		0x2fffffff /* Check if msb guard bit wants rounding up.  */
+	lsr_s DBL0L,DBL0L,28
+	lsr_s DBL0H,DBL0H,8
+	add.ne DBL0L,DBL0L,1
+	add_s DBL0H,DBL0H,DBL0L
+	lsr_s DBL0H,DBL0H
+	btst_s r2,11
+	add_s r0,DBL0H,r12
+	j_s.d [blink]
+	bxor.ne r0,r0,31
+	.balign 4
+.Lill_exp:
+	bbit1 r2,10,.Linf_nan
+	bmsk_s r12,r12,9
+	rsub.f r12,r12,8+0x400-32 ; Go from 9 to 1 guard bit in MSW.  */
+	bhs_s .Lzero
+	lsr r3,DBL0L,21
+	rrc DBL0H,DBL0H ; insert leading 1
+	asl.f 0,DBL0L,8 ; check lower 24 guard bits
+	add_s r3,DBL0H,r3
+	add.pnz r3,r3,1 ; assemble fraction with compressed guard bits.
+	lsr r0,r3,r12
+	neg_s r12,r12
+	btst_s r0,1
+	asl.eq.f r3,r3,r12
+	add.ne r0,r0,1
+	btst_s r2,11
+	lsr_s r0,r0
+	j_s.d [blink]
+	bxor.ne r0,r0,31
+.Lzero:
+	lsr_s r2,r2,11
+	j_s.d [blink]
+	asl r0,r2,31
+.Ldenorm0:
+	asl_s r12,r12,20
+	tst DBL0L, \
+		0x5fffffff /* Check if msb guard bit wants rounding up.  */
+	lsr_s DBL0L,DBL0L,29
+	lsr_s DBL0H,DBL0H,9
+	add.ne DBL0L,DBL0L,1
+	bset_s DBL0H,DBL0H,23
+	add_s DBL0H,DBL0H,DBL0L
+	lsr_s DBL0H,DBL0H
+	j_s.d [blink]
+	add_l r0,DBL0H,r12
+.Linf_nan:
+	tst_s DBL0H,DBL0H
+	mov_s r0,1
+	add.ne r2,r2,1
+	tst r2,0x7ff
+	asl.ne r0,r0,23
+	btst_s r12,11
+	neg r0,r0
+	j_s.d [blink]
+	bxor.eq r0,r0,31
+	ENDFUNC(__truncdfsf2)
diff --git a/gcc/config/arc/ieee-754/uneqdf2.S b/gcc/config/arc/ieee-754/uneqdf2.S
new file mode 100644
index 00000000000..86b045a28b7
--- /dev/null
+++ b/gcc/config/arc/ieee-754/uneqdf2.S
@@ -0,0 +1,76 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+/* inputs: DBL0, DBL1
+   output: z flag
+   clobber: r12, flags
+   For NaNs, bit 19.. bit 30 of the high word must be set.  */
+#if 0 /* DEBUG */
+	.global __uneqdf2
+	.balign 4
+	FUNC(__uneqdf2)
+__uneqdf2:
+	st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
+	st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
+	st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
+	bl.d __eqdf2_c` push_s r0
+	push_s r0` ld_s r0, [sp,4]` ld_s r1, [sp,8]` ld_s r2,[sp,12]
+	bl.d __unorddf2_c` ld_s r3,[sp,16]
+	ld.ab r11,[sp,4]` tst r0,r0` mov.ne r11,0
+	pop_s r0` pop_s r1` pop_s r2` pop_s r3
+	ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
+	ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
+	bl.d __uneqdf2_asm` ld.ab r10,[sp,4]
+	pop_s blink
+	breq.d r11,0,0f
+	ld.ab r11,[sp,4]
+	jne_s [blink]
+	bl abort
+0:	jeq_s [blink]
+	bl abort
+	ENDFUNC(__uneqdf2)
+#define __uneqdf2 __uneqdf2_asm
+#endif /* DEBUG */
+	.global __uneqdf2
+	.balign 4
+	HIDDEN_FUNC(__uneqdf2)
+__uneqdf2:
+	cmp_s	DBL0H,DBL1H
+	cmp.eq	DBL0L,DBL1L
+	jeq_s	[blink]
+	or	r12,DBL0H,DBL1H
+	or.f	0,DBL0L,DBL1L
+	bclr.eq.f r12,r12,31
+	jeq_s	[blink]
+	mov_s	r12, \
+		0x7ff80000
+	bic.f	0,r12,DBL0H
+	j_s.d	[blink]
+	bic.ne.f r12,r12,DBL1H
+	ENDFUNC(__uneqdf2)
diff --git a/gcc/config/arc/ieee-754/uneqsf2.S b/gcc/config/arc/ieee-754/uneqsf2.S
new file mode 100644
index 00000000000..cfee9d82ecf
--- /dev/null
+++ b/gcc/config/arc/ieee-754/uneqsf2.S
@@ -0,0 +1,72 @@
+/* Copyright 2008 Free Software Foundation, Inc.  
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file into combinations with other programs,
+and to distribute those combinations without any restriction coming
+from the use of this file.  (The General Public License restrictions
+do apply in other respects; for example, they cover modification of
+the file, and distribution when not linked into a combine
+executable.)
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "arc-ieee-754.h"
+/* inputs: r0, r1
+   output: z flag
+   clobber: r12, flags
+   For NaNs, bit 22 .. bit 30 must be set.  */
+#if 0 /* DEBUG */
+	.global __uneqsf2
+	.balign 4
+	FUNC(__uneqsf2)
+__uneqsf2:
+	st.a r11,[sp,-4]` push_s blink` st.a r10,[sp,-4]` st.a r9,[sp,-4]
+	st.a r8,[sp,-4]` st.a r7,[sp,-4]` st.a r6,[sp,-4]` st.a r5,[sp,-4]
+	st.a r4,[sp,-4]` push_s r3` push_s r2` push_s r1`
+	bl.d __eqsf2_c` push_s r0
+	push_s r0` ld_s r0, [sp,4]
+	bl.d __unordsf2_c` ld_s r1,[sp,8]
+	ld.ab r11,[sp,4]` tst r0,r0` mov.ne r11,0
+	pop_s r0` pop_s r1` pop_s r2` pop_s r3
+	ld.ab r4,[sp,4]` ld.ab r5,[sp,4]` ld.ab r6,[sp,4]`
+	ld.ab r7,[sp,4]` ld.ab r8,[sp,4]` ld.ab r9,[sp,4]
+	bl.d __uneqsf2_asm` ld.ab r10,[sp,4]
+	pop_s blink
+	breq.d r11,0,0f
+	ld.ab r11,[sp,4]
+	jne_s [blink]
+	bl abort
+0:	jeq_s [blink]
+	bl abort
+	ENDFUNC(__uneqsf2)
+#define __uneqsf2 __uneqsf2_asm
+#endif /* DEBUG */
+	.global __uneqsf2
+	.balign 4
+	HIDDEN_FUNC(__uneqsf2)
+__uneqsf2:
+	mov_s	r12, \
+		0x7fc00000
+	bic.f	0,r12,r0
+	bic.ne.f r12,r12,r1
+	or	r12,r0,r1
+	bmsk.ne.f r12,r12,30
+	j_s.d	[blink]
+	cmp.ne	r0,r1
+	ENDFUNC(__uneqsf2)
diff --git a/gcc/config/arc/initfini.c b/gcc/config/arc/initfini.c
index fac3991e76e..fd117b0a461 100644
--- a/gcc/config/arc/initfini.c
+++ b/gcc/config/arc/initfini.c
@@ -7,7 +7,7 @@ This file is part of GCC.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
+the Free Software Foundation; either version 3, or (at your option)
 any later version.
 
 In addition to the permissions in the GNU General Public License, the
@@ -25,9 +25,8 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING.  If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
 
 /*  Declare a pointer to void function type.  */
 typedef void (*func_ptr) (void);
diff --git a/gcc/config/arc/lib1funcs.asm b/gcc/config/arc/lib1funcs.asm
index 2d74cbee5bd..51fd605a536 100644
--- a/gcc/config/arc/lib1funcs.asm
+++ b/gcc/config/arc/lib1funcs.asm
@@ -1,20 +1,19 @@
-; libgcc routines for ARC cpu.
+; libgcc1 routines for ARC cpu.
 
-/* Copyright (C) 1995, 1997,2004 Free Software Foundation, Inc.
+/* Copyright (C) 1995, 1997, 2007, 2008 Free Software Foundation, Inc.
 
 This file is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 2, or (at your option) any
+Free Software Foundation; either version 3, or (at your option) any
 later version.
 
 In addition to the permissions in the GNU General Public License, the
 Free Software Foundation gives you unlimited permission to link the
-compiled version of this file into combinations with other programs,
-and to distribute those combinations without any restriction coming
-from the use of this file.  (The General Public License restrictions
-do apply in other respects; for example, they cover modification of
-the file, and distribution when not linked into a combine
-executable.)
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file.  (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
 
 This file is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -22,18 +21,45 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 General Public License for more details.
 
 You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING.  If not, write to
-the Free Software Foundation, 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
 
+/* As a special exception, if you link this library with other files,
+   some of which are compiled with GCC, to produce an executable,
+   this library does not by itself cause the resulting executable
+   to be covered by the GNU General Public License.
+   This exception does not however invalidate any other reasons why
+   the executable file might be covered by the GNU General Public License.  */
+
+ 
+ /* ANSI concatenation macros.  */
+ 
+ #define CONCAT1(a, b) CONCAT2(a, b)
+ #define CONCAT2(a, b) a ## b
+ 
+ /* Use the right prefix for global labels.  */
+ 
+ #define SYM(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+ 
+#ifndef WORKING_ASSEMBLER
+#define abs_l abs
+#define asl_l asl
+#define mov_l mov
+#endif
+ 	
+#define FUNC(X)         .type SYM(X),@function
+#define HIDDEN_FUNC(X)	FUNC(X)` .hidden X
+#define ENDFUNC0(X)     .Lfe_##X: .size X,.Lfe_##X-X
+#define ENDFUNC(X)      ENDFUNC0(X)
+
+	
+	
 #ifdef  L_mulsi3
 	.section .text
 	.align 4
 
-#ifdef __base__
-	.cpu base
-	.global ___mulsi3
-___mulsi3:
+	.global SYM(__mulsi3)
+SYM(__mulsi3):
 
 /* This the simple version.
 
@@ -45,21 +71,88 @@ ___mulsi3:
       b <<= 1;
     }
 */
+
+#ifdef __A4__
+	FUNC(__mulsi3)
 	mov r2,0		; Accumulate result here.
 .Lloop:
 	sub.f 0,r0,0		; while (a)
 	nop
-	beq.nd .Ldone
+	beq.nd @.Ldone
 	and.f 0,r0,1		; if (a & 1)
+	nop	
 	add.nz r2,r2,r1		; r += b
 	lsr r0,r0		; a >>= 1
-	b.d .Lloop
-	lsl r1,r1		; b <<= 1
+	b.d @.Lloop
+	asl r1,r1		; b <<= 1
 .Ldone:
-	j.d blink
+	j.d [blink]
 	mov r0,r2
+	ENDFUNC(__mulsi3)
+#elif defined (__ARC_MUL64__)
+	FUNC(__mulsi3)
+	mulu64 r0,r1
+	j_s.d [blink]
+	mov_s r0,r57
+	ENDFUNC(__mulsi3)
+#elif defined (__ARC700__)
+	HIDDEN_FUNC(__mulsi3)
+	mpyu	r0,r0,r1
+	nop_s
+	j_s	[blink]
+	ENDFUNC(__mulsi3)
+#elif defined (__ARC_NORM__)
+	FUNC(__mulsi3)
+	norm.f	r2,r0
+	rsub	lp_count,r2,31
+	mov.mi	lp_count,32
+	mov_s	r2,r0
+	mov_s	r0,0
+	lpnz	@.Lend		; loop is aligned
+	lsr.f	r2,r2
+	add.cs	r0,r0,r1
+	add_s	r1,r1,r1
+.Lend:	j_s [blink]
+	ENDFUNC(__mulsi3)
+#elif !defined (__OPTIMIZE_SIZE__)
+	/* Up to 3.5 times faster than the simpler code below, but larger.  */
+	FUNC(__mulsi3)
+	ror.f	r2,r0,4
+	mov_s	r0,0
+	add3.mi	r0,r0,r1
+	asl.f	r2,r2,2
+	add2.cs	r0,r0,r1
+	jeq_s	[blink]
+.Loop:
+	add1.mi	r0,r0,r1
+	asl.f	r2,r2,2
+	add.cs	r0,r0,r1
+	asl_s	r1,r1,4
+	ror.f	r2,r2,8
+	add3.mi	r0,r0,r1
+	asl.f	r2,r2,2
+	bne.d	.Loop
+	add2.cs	r0,r0,r1
+	j_s	[blink]
+	ENDFUNC(__mulsi3)
+#else
+/********************************************************/
+	FUNC(__mulsi3)
+	mov_s r2,0		; Accumulate result here.
+.Lloop:
+	bbit0 r0,0,@.Ly
+	add_s r2,r2,r1		; r += b
+.Ly:	
+	lsr_s r0,r0		; a >>= 1
+	asl_s r1,r1		; b <<= 1	
+	brne_s r0,0,@.Lloop	
+.Ldone:
+	j_s.d [blink]
+	mov_s r0,r2
+	ENDFUNC(__mulsi3)
+/********************************************************/
 #endif
-
+	
 #endif /* L_mulsi3 */
 
 #ifdef  L_umulsidi3
@@ -67,9 +160,8 @@ ___mulsi3:
 	.align 4
 
 #ifdef __base__
-	.cpu base
-	.global ___umulsidi3
-___umulsidi3:
+	.global SYM(__umulsidi3)
+SYM(__umulsidi3):
 
 /* This the simple version.
 
@@ -81,33 +173,44 @@ ___umulsidi3:
       b <<= 1;
     }
 */
+#ifdef __ARC700__
+#include "ieee-754/arc-ieee-754.h"
+	HIDDEN_FUNC(__umulsidi3)
+	mov_s	r12,DBL0L
+	mpyu	DBL0L,r12,DBL0H
+	j_s.d	[blink]
+	mpyhu	DBL0H,r12,DBL0H
+	ENDFUNC(__umulsidi3)
+#else /* !__ARC700__*/
+	FUNC(__umulsidi3)
 	mov r2,0		; Top part of b.
 	mov r3,0		; Accumulate result here.
 	mov r4,0
 .Lloop:
 	sub.f 0,r0,0		; while (a)
 	nop
-	beq.nd .Ldone
+	beq.nd @.Ldone
 	and.f 0,r0,1		; if (a & 1)
-	sub.f 0,r0,0
 	nop
-	beq .Ldontadd
-	add.f r4,r4,r1		; r += b
+	bz.nd @.Lnoadd	
+	add.f r4,r4,r1	; r += b
 	adc   r3,r3,r2
-.Ldontadd:
+.Lnoadd:	
 	lsr r0,r0		; a >>= 1
-	lsl.f r1,r1		; b <<= 1
-	b.d .Lloop
+	asl.f r1,r1		; b <<= 1
+	b.d @.Lloop
 	rlc r2,r2
 .Ldone:
 #ifdef __big_endian__
 	mov r1,r4
-	j.d blink
+	j.d [blink]
 	mov r0,r3
 #else
 	mov r0,r4
-	j.d blink
+	j.d [blink]
 	mov r1,r3
+	ENDFUNC(__umulsidi3)
+#endif /* !__ARC700__*/
 #endif
 #endif
 
@@ -124,8 +227,9 @@ ___umulsidi3:
 ; outputs: positive r0/r1,
 ;          r6.bit1 = sign of numerator, r6.bit0 = sign of result
 
-	.global ___divnorm
-___divnorm:
+#ifdef __A4__
+	.global SYM(__divnorm)
+SYM(__divnorm):
 	mov r6,0		; keep sign in r6
 	sub.f 0,r0,0		; is numerator -ve?
 	sub.lt r0,0,r0		; negate numerator
@@ -133,7 +237,8 @@ ___divnorm:
 	sub.f 0,r1,0		; is denominator -ve?
 	sub.lt r1,0,r1		; negate denominator
 	xor.lt r6,r6,1		; toggle sign
-	j.nd blink
+	j.nd [blink]
+#endif
 
 /*
 unsigned long
@@ -165,40 +270,203 @@ udivmodsi4(int modwanted, unsigned long num, unsigned long den)
 ; inputs: r0 = numerator, r1 = denominator
 ; outputs: r0 = quotient, r1 = remainder, r2/r3 trashed
 
-	.global ___udivmodsi4
-___udivmodsi4:
+	.balign 4
+	.global SYM(__udivmodsi4)
+	FUNC(__udivmodsi4)
+SYM(__udivmodsi4):
+
+#ifdef __A4__	
 	mov r2,1		; bit = 1
 	mov r3,0		; res = 0
 .Lloop1:
 	sub.f 0,r1,r0		; while (den < num
 	nop
-	bnc.nd .Lloop2
+	bnc.nd @.Lloop2
 	sub.f 0,r2,0		; && bit
 	nop
-	bz.nd .Lloop2
-	lsl.f 0,r1		; && !(den & (1<<31))
+	bz.nd @.Lloop2
+	asl.f 0,r1		; && !(den & (1<<31))
 	nop
-	bc.nd .Lloop2
-	lsl r1,r1		; den <<= 1
-	b.d .Lloop1
-	lsl r2,r2		; bit <<= 1
+	bc.nd @.Lloop2
+	asl r1,r1		; den <<= 1
+	b.d @.Lloop1
+	asl r2,r2		; bit <<= 1
 .Lloop2:
 	sub.f 0,r2,0		; while (bit)
 	nop
-	bz.nd .Ldivmodend
+	bz.nd @.Ldivmodend
 	sub.f 0,r0,r1		; if (num >= den)
 	nop
-	bc.nd .Lshiftdown
+	bc.nd @.Lshiftdown
 	sub r0,r0,r1		; num -= den
 	or r3,r3,r2		; res |= bit
 .Lshiftdown:
 	lsr r2,r2		; bit >>= 1
-	b.d .Lloop2
+	b.d @.Lloop2
 	lsr r1,r1		; den >>= 1
 .Ldivmodend:
 	mov r1,r0		; r1 = mod
-	j.d blink
+	j.d [blink]
 	mov r0,r3		; r0 = res
+#elif defined (__ARC700__)
+/* Normalize divisor and divident, and then use the appropriate number of
+   divaw (the number of result bits, or one more) to produce the result.
+   There are some special conditions that need to be tested:
+   - We can only directly normalize unsigned numbers that fit in 31 bit.  For
+     the divisor, we test early on that it is not 'negative'.
+   - divaw can't corrrectly process a divident that is larger than the divisor.
+     We handle this be checking that the divident prior to normalization is
+     not larger than the normalized divisor.  As we then already know then
+     that the divisor fits 31 bit, this check also makes sure that the
+     divident fits.
+   - ordinary normalization of the divident could make it larger than the
+     normalized divisor, which again would be unsuitable for divaw.
+     Thus, we want to shift left the divident by one less, except that we
+     want to leave it alone if it is already 31 bit.  To this end, we
+     double the input to norm with adds.
+   - If the divident has less bits than the divisor, that would leave us
+     with a negative number of divaw to execute.  Although we could use a
+     conditional loop to avoid excess divaw, and then the quotient could
+     be extracted correctly as there'd be more than enough zero bits, the
+     remainder would be shifted left too far, requiring a conditional shift
+     right.  The cost of that shift and the possible mispredict on the
+     conditional loop cost as much as putting in an early check for a zero
+     result.  */
+	bmsk	r3,r0,29
+	brne.d	r3,r0,.Large_dividend
+	norm.f	r2,r1
+	brlo	r0,r1,.Lret0
+	norm	r3,r0
+	asl_s	r1,r1,r2
+	sub_s	r3,r3,1
+	asl_l	r0,r0,r3	; not short to keep loop aligned
+	sub	lp_count,r2,r3
+	lp	.Ldiv_end
+	divaw	r0,r0,r1
+.Ldiv_end:sub_s	r3,r2,1
+	lsr	r1,r0,r2
+	j_s.d	[blink]
+	bmsk	r0,r0,r3
+
+	.balign 4
+.Large_dividend:
+	bmi	.Ltrivial
+	asl_s	r1,r1,r2
+	mov_s	r3,0
+	sub1.f	r4,r0,r1
+	mov.lo	r4,r0
+	mov.hs	r3,2
+	cmp	r4,r1
+	sub.hs	r4,r4,r1
+	add.hs	r3,r3,1
+	mov.f	lp_count,r2
+	lpne	.Ldiv_end2
+	divaw	r4,r4,r1
+.Ldiv_end2:asl	r0,r3,r2
+	lsr	r1,r4,r2
+	sub_s	r2,r2,1
+	bmsk	r4,r4,r2
+	j_s.d	[blink]
+	or.ne	r0,r0,r4
+
+.Lret0:
+	mov_s	r1,r0
+	j_s.d	[blink]
+	mov_l	r0,0
+	.balign	4
+.Ltrivial:
+	sub.f	r1,r0,r1
+	mov.c	r1,r0
+	mov_s	r0,1
+	j_s.d	[blink]
+	mov.c	r0,0
+#elif !defined (__OPTIMIZE_SIZE__)
+#ifdef __ARC_NORM__
+	lsr_s r2,r0
+	brhs.d r1,r2,.Lret0_3
+	norm r2,r2
+	norm r3,r1
+	sub_s r3,r3,r2
+	asl_s r1,r1,r3
+	sub1.f 0,r0,r1
+	lsr.cs r1,r1,1
+	sbc r2,r3,0
+	sub1 r0,r0,r1
+	cmp_s r0,r1
+	mov.f lp_count,r2
+#else
+	lsr_s r2,r0
+	brhs.d r1,r2,.Lret0_3
+	mov lp_count,32
+.Lloop1:
+	asl_s r1,r1		; den <<= 1
+	brls.d r1,r2,@.Lloop1
+	sub lp_count,lp_count,1
+	sub_s r0,r0,r1
+	lsr_s r1,r1
+	cmp_s r0,r1
+	xor.f r2,lp_count,31
+	mov_s lp_count,r2
+#endif
+	sub.cc r0,r0,r1
+	mov_s r3,3
+	sbc r3,r3,0
+	asl_s r3,r3,r2
+	rsub r1,r1,1
+	lpne @.Lloop2_end
+	add1.f r0,r1,r0
+	sub.cc r0,r0,r1
+.Lloop2_end:
+	lsr r1,r0,r2
+	bmsk r0,r0,r2
+	bclr r0,r0,r2
+	j_s.d [blink]
+	or_s r0,r0,r3
+.Lret0_3:
+#if 0 /* Slightly shorter, but slower.  */
+	lp .Loop3_end
+	brhi.d r1,r0,.Loop3_end
+	sub_s r0,r0,r1
+.Loop3_end
+	add_s r1,r1,r0
+	j_s.d [blink]
+	rsub r0,lp_count,32-1
+#else
+	mov_s r4,r1
+	sub.f r1,r0,r1
+	sbc r0,r0,r0
+	sub.cc.f r1,r1,r4
+	sbc r0,r0,0
+	sub.cc.f r1,r1,r4
+	sbc r0,r0,-3
+	j_s.d [blink]
+	add.cs r1,r1,r4
+#endif
+#else /* Arctangent-A5 */
+	breq_s r1,0,@.Ldivmodend
+	mov_s r2,1		; bit = 1
+	mov_s r3,0		; res = 0
+.Lloop1:
+  	brhs r1,r0,@.Lloop2
+	bbit1 r1,31,@.Lloop2
+	asl_s r1,r1		; den <<= 1
+	b.d @.Lloop1
+	asl_s r2,r2		; bit <<= 1
+.Lloop2:
+  	brlo r0,r1,@.Lshiftdown
+	sub_s r0,r0,r1		; num -= den
+	or_s r3,r3,r2		; res |= bit
+.Lshiftdown:
+	lsr_s r2,r2		; bit >>= 1
+	lsr_s r1,r1		; den >>= 1	
+	brne_s r2,0,@.Lloop2
+.Ldivmodend:
+	mov_s r1,r0		; r1 = mod
+	j.d [blink]
+	mov_s r0,r3		; r0 = res
+/******************************************************/
+#endif
+	ENDFUNC(__udivmodsi4)
 
 #endif
 
@@ -207,12 +475,15 @@ ___udivmodsi4:
 	.align 4
 
 #ifdef __base__
-	.cpu base
-	.global ___udivsi3
-___udivsi3:
-	mov r7,blink
-	bl.nd ___udivmodsi4
-	j.nd r7
+	.global SYM(__udivsi3)
+	FUNC(__udivsi3)
+SYM(__udivsi3):
+	b @SYM(__udivmodsi4)
+	ENDFUNC(__udivsi3)
+	.section .__arc_profile_forward, "a"
+	.long SYM(__udivsi3)
+	.long SYM(__udivmodsi4)
+	.long 65536
 #endif
 
 #endif /* L_udivsi3 */
@@ -222,17 +493,393 @@ ___udivsi3:
 	.align 4
 
 #ifdef __base__
-	.cpu base
-	.global ___divsi3
-___divsi3:
+	.global SYM(__divsi3)
+	FUNC(__divsi3)
+
+#ifndef __ARC700__
+SYM(__divsi3):
+#ifdef __A4__
 	mov r7,blink
-	bl.nd ___divnorm
-	bl.nd ___udivmodsi4
+	bl.nd @SYM(__divnorm)
+	bl.nd @SYM(__udivmodsi4)
 	and.f 0,r6,1
 	sub.nz r0,0,r0		; cannot go in delay slot, has limm value
-	j.nd r7
-#endif
+	j.nd [r7]
+#else /* !__A4__, i.e. A5 / ARC600 */
+	mov r7,blink
+	xor r6,r0,r1
+	abs_s r0,r0
+	bl.d @SYM(__udivmodsi4)
+	 abs_s r1,r1
+	tst r6,r6
+	j.d [r7]
+	 neg.mi r0,r0
+#endif /* __A4__ */
+#else 	/* !ifndef __ARC700__ */
+	;; We can use the abs, norm, divaw and mpy instructions for ARC700
+#define MULDIV
+#ifdef MULDIV
+/* This table has been generated by divtab-arc700.c.  */
+/* 1/512 .. 1/256, normalized.  There is a leading 1 in bit 31.
+   For powers of two, we list unnormalized numbers instead.  The values
+   for powers of 2 are loaded, but not used.  The value for 1 is actually
+   the first instruction after .Lmuldiv.  */
+	.balign 4
+.Ldivtab:
+
+	.long	0x1000000
+	.long	0x80808081
+	.long	0x81020409
+	.long	0x81848DA9
+	.long	0x82082083
+	.long	0x828CBFBF
+	.long	0x83126E98
+	.long	0x83993053
+	.long	0x84210843
+	.long	0x84A9F9C9
+	.long	0x85340854
+	.long	0x85BF3762
+	.long	0x864B8A7E
+	.long	0x86D90545
+	.long	0x8767AB60
+	.long	0x87F78088
+	.long	0x88888889
+	.long	0x891AC73B
+	.long	0x89AE408A
+	.long	0x8A42F871
+	.long	0x8AD8F2FC
+	.long	0x8B70344B
+	.long	0x8C08C08D
+	.long	0x8CA29C05
+	.long	0x8D3DCB09
+	.long	0x8DDA5203
+	.long	0x8E78356E
+	.long	0x8F1779DA
+	.long	0x8FB823EF
+	.long	0x905A3864
+	.long	0x90FDBC0A
+	.long	0x91A2B3C5
+	.long	0x92492493
+	.long	0x92F11385
+	.long	0x939A85C5
+	.long	0x94458095
+	.long	0x94F20950
+	.long	0x95A02569
+	.long	0x964FDA6D
+	.long	0x97012E03
+	.long	0x97B425EE
+	.long	0x9868C80A
+	.long	0x991F1A52
+	.long	0x99D722DB
+	.long	0x9A90E7DA
+	.long	0x9B4C6F9F
+	.long	0x9C09C09D
+	.long	0x9CC8E161
+	.long	0x9D89D89E
+	.long	0x9E4CAD24
+	.long	0x9F1165E8
+	.long	0x9FD809FE
+	.long	0xA0A0A0A1
+	.long	0xA16B312F
+	.long	0xA237C32C
+	.long	0xA3065E40
+	.long	0xA3D70A3E
+	.long	0xA4A9CF1E
+	.long	0xA57EB503
+	.long	0xA655C43A
+	.long	0xA72F053A
+	.long	0xA80A80A9
+	.long	0xA8E83F58
+	.long	0xA9C84A48
+	.long	0xAAAAAAAB
+	.long	0xAB8F69E3
+	.long	0xAC769185
+	.long	0xAD602B59
+	.long	0xAE4C415D
+	.long	0xAF3ADDC7
+	.long	0xB02C0B03
+	.long	0xB11FD3B9
+	.long	0xB21642C9
+	.long	0xB30F6353
+	.long	0xB40B40B5
+	.long	0xB509E68B
+	.long	0xB60B60B7
+	.long	0xB70FBB5B
+	.long	0xB81702E1
+	.long	0xB92143FB
+	.long	0xBA2E8BA3
+	.long	0xBB3EE722
+	.long	0xBC52640C
+	.long	0xBD691048
+	.long	0xBE82FA0C
+	.long	0xBFA02FE9
+	.long	0xC0C0C0C1
+	.long	0xC1E4BBD6
+	.long	0xC30C30C4
+	.long	0xC4372F86
+	.long	0xC565C87C
+	.long	0xC6980C6A
+	.long	0xC7CE0C7D
+	.long	0xC907DA4F
+	.long	0xCA4587E7
+	.long	0xCB8727C1
+	.long	0xCCCCCCCD
+	.long	0xCE168A78
+	.long	0xCF6474A9
+	.long	0xD0B69FCC
+	.long	0xD20D20D3
+	.long	0xD3680D37
+	.long	0xD4C77B04
+	.long	0xD62B80D7
+	.long	0xD79435E6
+	.long	0xD901B204
+	.long	0xDA740DA8
+	.long	0xDBEB61EF
+	.long	0xDD67C8A7
+	.long	0xDEE95C4D
+	.long	0xE070381D
+	.long	0xE1FC780F
+	.long	0xE38E38E4
+	.long	0xE525982B
+	.long	0xE6C2B449
+	.long	0xE865AC7C
+	.long	0xEA0EA0EB
+	.long	0xEBBDB2A6
+	.long	0xED7303B6
+	.long	0xEF2EB720
+	.long	0xF0F0F0F1
+	.long	0xF2B9D649
+	.long	0xF4898D60
+	.long	0xF6603D99
+	.long	0xF83E0F84
+	.long	0xFA232CF3
+	.long	0xFC0FC0FD
+	.long	0xFE03F810
+	.long	0x2000000
+	.long	0x81020409
+	.long	0x82082083
+	.long	0x83126E98
+	.long	0x84210843
+	.long	0x85340854
+	.long	0x864B8A7E
+	.long	0x8767AB60
+	.long	0x88888889
+	.long	0x89AE408A
+	.long	0x8AD8F2FC
+	.long	0x8C08C08D
+	.long	0x8D3DCB09
+	.long	0x8E78356E
+	.long	0x8FB823EF
+	.long	0x90FDBC0A
+	.long	0x92492493
+	.long	0x939A85C5
+	.long	0x94F20950
+	.long	0x964FDA6D
+	.long	0x97B425EE
+	.long	0x991F1A52
+	.long	0x9A90E7DA
+	.long	0x9C09C09D
+	.long	0x9D89D89E
+	.long	0x9F1165E8
+	.long	0xA0A0A0A1
+	.long	0xA237C32C
+	.long	0xA3D70A3E
+	.long	0xA57EB503
+	.long	0xA72F053A
+	.long	0xA8E83F58
+	.long	0xAAAAAAAB
+	.long	0xAC769185
+	.long	0xAE4C415D
+	.long	0xB02C0B03
+	.long	0xB21642C9
+	.long	0xB40B40B5
+	.long	0xB60B60B7
+	.long	0xB81702E1
+	.long	0xBA2E8BA3
+	.long	0xBC52640C
+	.long	0xBE82FA0C
+	.long	0xC0C0C0C1
+	.long	0xC30C30C4
+	.long	0xC565C87C
+	.long	0xC7CE0C7D
+	.long	0xCA4587E7
+	.long	0xCCCCCCCD
+	.long	0xCF6474A9
+	.long	0xD20D20D3
+	.long	0xD4C77B04
+	.long	0xD79435E6
+	.long	0xDA740DA8
+	.long	0xDD67C8A7
+	.long	0xE070381D
+	.long	0xE38E38E4
+	.long	0xE6C2B449
+	.long	0xEA0EA0EB
+	.long	0xED7303B6
+	.long	0xF0F0F0F1
+	.long	0xF4898D60
+	.long	0xF83E0F84
+	.long	0xFC0FC0FD
+	.long	0x4000000
+	.long	0x82082083
+	.long	0x84210843
+	.long	0x864B8A7E
+	.long	0x88888889
+	.long	0x8AD8F2FC
+	.long	0x8D3DCB09
+	.long	0x8FB823EF
+	.long	0x92492493
+	.long	0x94F20950
+	.long	0x97B425EE
+	.long	0x9A90E7DA
+	.long	0x9D89D89E
+	.long	0xA0A0A0A1
+	.long	0xA3D70A3E
+	.long	0xA72F053A
+	.long	0xAAAAAAAB
+	.long	0xAE4C415D
+	.long	0xB21642C9
+	.long	0xB60B60B7
+	.long	0xBA2E8BA3
+	.long	0xBE82FA0C
+	.long	0xC30C30C4
+	.long	0xC7CE0C7D
+	.long	0xCCCCCCCD
+	.long	0xD20D20D3
+	.long	0xD79435E6
+	.long	0xDD67C8A7
+	.long	0xE38E38E4
+	.long	0xEA0EA0EB
+	.long	0xF0F0F0F1
+	.long	0xF83E0F84
+	.long	0x8000000
+	.long	0x84210843
+	.long	0x88888889
+	.long	0x8D3DCB09
+	.long	0x92492493
+	.long	0x97B425EE
+	.long	0x9D89D89E
+	.long	0xA3D70A3E
+	.long	0xAAAAAAAB
+	.long	0xB21642C9
+	.long	0xBA2E8BA3
+	.long	0xC30C30C4
+	.long	0xCCCCCCCD
+	.long	0xD79435E6
+	.long	0xE38E38E4
+	.long	0xF0F0F0F1
+	.long	0x10000000
+	.long	0x88888889
+	.long	0x92492493
+	.long	0x9D89D89E
+	.long	0xAAAAAAAB
+	.long	0xBA2E8BA3
+	.long	0xCCCCCCCD
+	.long	0xE38E38E4
+	.long	0x20000000
+	.long	0x92492493
+	.long	0xAAAAAAAB
+	.long	0xCCCCCCCD
+	.long	0x40000000
+	.long	0xAAAAAAAB
+	.long	0x80000000
+__muldiv:
+	neg	r4,r2
+	ld.as	r5,[pcl,r4]
+	abs_s	r12,r0
+        bic.f	0,r2,r4
+        mpyhu.ne r12,r12,r5
+	norm	r3,r2
+	xor.f	0,r0,r1
+        ; write port allocation stall
+        rsub	r3,r3,30
+        lsr	r0,r12,r3
+        j_s.d	[blink]
+        neg.mi	r0,r0
+
+	.balign	4
+SYM(__divsi3):
+	norm	r3,r1
+	abs_s	r2,r1
+	brhs	r3,23,__muldiv
+	norm	r4,r0
+	abs_l	r12,r0
+	brhs	r4,r3,.Lonebit
+	asl_s	r2,r2,r3
+	asl	r12,r12,r4
+	sub	lp_count,r3,r4
+	sub.f	r12,r12,r2
+	brge.d	r12,r2,.Lsbit
+	sub	r4,r3,r4
+	add.lo	r12,r12,r2
+	lp	.Ldivend
+.Ldivstart:divaw r12,r12,r2
+.Ldivend:xor_s	r1,r1,r0
+	sub	r0,r4,1
+	bmsk	r0,r12,r0
+	bset.hs	r0,r0,r4
+	tst_s	r1,r1
+	j_s.d	[blink]
+	neg.mi	r0,r0
+.Lonebit:
+	xor_s	r1,r1,r0
+	asr_s	r1,r1,31
+	sub1.f	0,r12,r2	; special case:	-2**(n+1) / 2**n
+	or	r0,r1,1
+	add.eq	r0,r0,r0
+	cmp_s	r12,r2
+	j_s.d	[blink]
+	mov.lo	r0,0
+.Lsbit:
+	; Need to handle special cases involving negative powers of two:
+	; r12,r2 are normalized dividend / divisor;
+	; divide anything by 0x80000000, or divide 0x80000000 by 0x40000000
+	add_s	r12,r12,r2
+	xor_s	r1,r1,r0
+	rsub	r4,r4,-1
+	ror	r0,r12,r4
+	tst_s	r2,r2
+	bmsk	r0,r0,r3
+	add.pl	r0,r0,r0
+	tst_s	r1,r1
+	j_s.d	[blink]
+	neg.mi	r0,r0
+#else /* !MULDIV */
+/* This version requires that divaw works with a divisor of 0x80000000U  */
+	abs_s	r2,r1
+	norm	r4,r0
+	neg_s	r3,r2
+	norm	r3,r3
+	abs_s	r12,r0
+	brhs	r4,r3,.Lonebit
+	asl_s	r2,r2,r3
+	asl	r12,r12,r4
+	sub	lp_count,r3,r4
+	cmp_s	r12,r2
+	sub.hs	r12,r12,r2
+	lp	.Ldivend
+.Ldivstart:divaw r12,r12,r2
+.Ldivend:xor_s	r1,r1,r0
+	sub_s	r0,r3,1
+	bmsk	r0,r12,r0
+	bset.hs	r0,r0,r3
+	tst_s	r1,r1
+	j_s.d	[blink]
+	negmi	r0,r0
+.Lonebit:
+	xor_s	r1,r1,r0
+	asr_s	r1,r1,31
+	cmp_s	r12,r2
+	mov_s	r0,0
+	j_s.d	[blink]
+	orhs	r0,r1,1
+#endif /* MULDIV */
 
+#endif	/* ifndef __ARC700__ */
+	ENDFUNC(__divsi3)
+#endif  /* __base__ */
+
+
+	
 #endif /* L_divsi3 */
 
 #ifdef  L_umodsi3
@@ -240,13 +887,18 @@ ___divsi3:
 	.align 4
 
 #ifdef __base__
-	.cpu base
-	.global ___umodsi3
-___umodsi3:
+	.global SYM(__umodsi3)
+	FUNC(__umodsi3)
+SYM(__umodsi3):
 	mov r7,blink
-	bl.nd ___udivmodsi4
-	j.d r7
+	bl.nd @SYM(__udivmodsi4)
+	j.d [r7]
 	mov r0,r1
+	ENDFUNC(__umodsi3)
+	.section .__arc_profile_forward, "a"
+	.long SYM(__umodsi3)
+	.long SYM(__udivmodsi4)
+	.long 65536
 #endif
 
 #endif /* L_umodsi3 */
@@ -256,16 +908,522 @@ ___umodsi3:
 	.align 4
 
 #ifdef __base__
-	.cpu base
-	.global ___modsi3
-___modsi3:
+	.global SYM (__modsi3)
+	FUNC(__modsi3)
+SYM(__modsi3):
+#ifndef __ARC700__
+#ifdef __A4__
 	mov r7,blink
-	bl.nd ___divnorm
-	bl.nd ___udivmodsi4
+	bl.nd @SYM(__divnorm)
+	bl.nd @SYM(__udivmodsi4)
 	and.f 0,r6,2
 	sub.nz r1,0,r1
-	j.d r7
+	j.d [r7]
 	mov r0,r1
+#else /* !__A4__, i.e. A5 / ARC600 */
+	mov_s r12,blink
+	mov_s r6,r0
+	abs_s r0,r0
+	bl.d @SYM(__udivmodsi4)
+	 abs_s r1,r1
+	tst r6,r6
+	neg_s r0,r1
+	j_s.d [r12]
+	 mov.pl r0,r1
+#endif /* __A4__ */
+#else /* __ARC700__ */
+	abs_s	r2,r1
+	norm.f	r4,r0
+	neg	r5,r2
+	norm	r3,r5
+	abs_l	r12,r0
+	brhs	r4,r3,.Lonebit
+	asl_s	r2,r2,r3
+	asl	r12,r12,r4
+	sub	lp_count,r3,r4
+	cmp_s	r12,r2
+	sub.hs	r12,r12,r2
+	tst_s	r0,r0
+	lp	.Ldivend
+.Ldivstart:divaw r12,r12,r2
+.Ldivend:
+	lsr	r0,r12,r3
+	j_s.d	[blink]
+	neg.mi	r0,r0
+	.balign	4
+.Lonebit:neg.pl	r5,r5
+	cmp_s	r12,r2
+	j_s.d	[blink]
+	sub.hs	r0,r0,r5
+#endif /* __ARC700__ */
+	ENDFUNC(__modsi3)
 #endif
 
 #endif /* L_modsi3 */
+
+#ifdef L_clzsi2
+       .section .text
+       .align 4
+       .global SYM (__clzsi2)
+SYM(__clzsi2):	
+#ifdef __ARC_NORM__
+	HIDDEN_FUNC(__clzsi2)
+	norm.f	r0,r0
+	mov.n	r0,0
+	j_s.d	[blink]
+	add.pl	r0,r0,1
+	ENDFUNC(__clzsi2)
+#elif !defined (__A4__)
+	FUNC(__clzsi2)
+/* N.B. zero-overhead loops have some restrictions for these targets
+   which makes them tricky to use correctly for small loops.  */
+	asl.f 0,r0,2
+	mov r1,-1
+.Lcheck:
+	bbit1.d r0,31,.Ldone
+	asl.pl r0,r0,3
+	bcs.d .Ldone_1
+	add_s r1,r1,3
+	bpnz.d .Lcheck
+	asl.f 0,r0,2
+	mov_s r0,32
+	j_s.d [blink]
+	mov.ne r0,r1
+.Ldone:
+	j_s.d [blink]
+	add_s r0,r1,1
+.Ldone_1:
+	j_s.d [blink]
+	sub_s r0,r1,1
+	ENDFUNC(__clzsi2)
+#else
+	mov	lp_count,32
+	lp	@SYM(.Ldone)
+	and.f	%r1,%r0,0x80000000
+	bnz	@SYM(.Ldone)
+	asl	%r0,%r0
+.Ldone:
+	sub	%r0,32,lp_count
+	j	[%blink]
+#endif
+#endif
+
+
+;;; MILLICODE THUNK LIB ;***************
+	
+;;; 	.macro push_regs from, to, offset
+;;; 		st_s "\from", [sp, \offset]
+;;; 		.if \to-\from
+;;; 			push_regs "(\from+1)", \to, "(\offset+4)"
+;;; 		.endif
+;;; 	.endm
+;;; 	push_regs 13, 18, 0
+;;;
+
+;;;;   	.macro sum from, to, three
+;;;;   		.long \from
+;;;;   		.long \three
+;;;;   		.local regno
+;;;;   		.set regno, \from+1
+;;;;   		.set shift, 32
+;;;;   		.set shift, shift - 1
+;;;;   #		st_s %shift @3 lsl #shift 
+;;;;   		.if \to-\from
+;;;;   		sum "(\from+1)", \to, "(\three)"
+;;;;   		.endif		
+;;;;   	.endm
+;;;;   	
+;;;;   	SUM 0,5, 9
+;;;;   	
+;	.altmacro		
+;;  	.macro push_regs from=0, to=3, offset
+;;  		st_s r\from, [sp, \offset]
+;;  		.if \to-\from
+;;  			push_regs "\from+1 ",\to,"(\offset+4)"
+;;  		.endif
+;;  	.endm
+;;  
+;;  	.macro expand_to_push from=13, to
+;;  ;		.section .text
+;;  ;		.align 4
+;;  ;		.global st_
+;;  ;		.type foo,
+;;  	st_13_to_25:
+;;  ;		push_regs \from, \to, 0
+;;  	push_regs 0,3		; 
+;;  	.endm
+;;  
+;;  	expand_to_push 13,18
+;;  
+;#endif
+
+#ifndef __A4__
+#ifdef L_millicodethunk_st
+	.section .text
+	.align 4
+	.global SYM(__st_r13_to_r15)
+	.global SYM(__st_r13_to_r16)
+	.global SYM(__st_r13_to_r17)
+	.global SYM(__st_r13_to_r18)
+	.global SYM(__st_r13_to_r19)
+	.global SYM(__st_r13_to_r20)
+	.global SYM(__st_r13_to_r21)
+	.global SYM(__st_r13_to_r22)
+	.global SYM(__st_r13_to_r23)
+	.global SYM(__st_r13_to_r24)
+	.global SYM(__st_r13_to_r25)
+	HIDDEN_FUNC(__st_r13_to_r15)
+	HIDDEN_FUNC(__st_r13_to_r16)
+	HIDDEN_FUNC(__st_r13_to_r17)
+	HIDDEN_FUNC(__st_r13_to_r18)
+	HIDDEN_FUNC(__st_r13_to_r19)
+	HIDDEN_FUNC(__st_r13_to_r20)
+	HIDDEN_FUNC(__st_r13_to_r21)
+	HIDDEN_FUNC(__st_r13_to_r22)
+	HIDDEN_FUNC(__st_r13_to_r23)
+	HIDDEN_FUNC(__st_r13_to_r24)
+	HIDDEN_FUNC(__st_r13_to_r25)
+	.align 4
+SYM(__st_r13_to_r25):
+	st r25, [sp,48]
+SYM(__st_r13_to_r24):	
+	st r24, [sp,44]
+SYM(__st_r13_to_r23):	
+	st r23, [sp,40]
+SYM(__st_r13_to_r22):	
+	st r22, [sp,36]
+SYM(__st_r13_to_r21):	
+	st r21, [sp,32]
+SYM(__st_r13_to_r20):	
+	st r20, [sp,28]		
+SYM(__st_r13_to_r19):	
+	st r19, [sp,24]
+SYM(__st_r13_to_r18):	
+	st r18, [sp,20]
+SYM(__st_r13_to_r17):	
+	st r17, [sp,16]
+SYM(__st_r13_to_r16):	
+	st r16, [sp,12]
+SYM(__st_r13_to_r15):	
+#ifdef __ARC700__
+	st r15, [sp,8] ; minimum function size to avoid stall: 6 bytes.
+#else
+	st_s r15, [sp,8]
+#endif
+	st_s r14, [sp,4]
+	j_s.d [%blink]
+	st_s r13, [sp,0]	
+	ENDFUNC(__st_r13_to_r15)
+	ENDFUNC(__st_r13_to_r16)
+	ENDFUNC(__st_r13_to_r17)
+	ENDFUNC(__st_r13_to_r18)
+	ENDFUNC(__st_r13_to_r19)
+	ENDFUNC(__st_r13_to_r20)
+	ENDFUNC(__st_r13_to_r21)
+	ENDFUNC(__st_r13_to_r22)
+	ENDFUNC(__st_r13_to_r23)
+	ENDFUNC(__st_r13_to_r24)
+	ENDFUNC(__st_r13_to_r25)
+#endif  /* L_millicodethunk_st */
+
+
+#ifdef L_millicodethunk_ld
+	.section .text
+	.align 4
+;	================================== 
+;	the loads
+
+	.global SYM(__ld_r13_to_r15)
+	.global SYM(__ld_r13_to_r16)
+	.global SYM(__ld_r13_to_r17)
+	.global SYM(__ld_r13_to_r18)
+	.global SYM(__ld_r13_to_r19)
+	.global SYM(__ld_r13_to_r20)
+	.global SYM(__ld_r13_to_r21)
+	.global SYM(__ld_r13_to_r22)
+	.global SYM(__ld_r13_to_r23)
+	.global SYM(__ld_r13_to_r24)
+	.global SYM(__ld_r13_to_r25)
+	HIDDEN_FUNC(__ld_r13_to_r15)
+	HIDDEN_FUNC(__ld_r13_to_r16)
+	HIDDEN_FUNC(__ld_r13_to_r17)
+	HIDDEN_FUNC(__ld_r13_to_r18)
+	HIDDEN_FUNC(__ld_r13_to_r19)
+	HIDDEN_FUNC(__ld_r13_to_r20)
+	HIDDEN_FUNC(__ld_r13_to_r21)
+	HIDDEN_FUNC(__ld_r13_to_r22)
+	HIDDEN_FUNC(__ld_r13_to_r23)
+	HIDDEN_FUNC(__ld_r13_to_r24)
+	HIDDEN_FUNC(__ld_r13_to_r25)
+SYM(__ld_r13_to_r25):
+	ld r25, [sp,48]
+SYM(__ld_r13_to_r24):
+	ld r24, [sp,44]
+SYM(__ld_r13_to_r23):
+	ld r23, [sp,40]
+SYM(__ld_r13_to_r22):
+	ld r22, [sp,36]
+SYM(__ld_r13_to_r21):
+	ld r21, [sp,32]
+SYM(__ld_r13_to_r20):
+	ld r20, [sp,28]		
+SYM(__ld_r13_to_r19):
+	ld r19, [sp,24]
+SYM(__ld_r13_to_r18):
+	ld r18, [sp,20]
+SYM(__ld_r13_to_r17):
+	ld r17, [sp,16]
+SYM(__ld_r13_to_r16):
+	ld r16, [sp,12]
+SYM(__ld_r13_to_r15):
+#ifdef __ARC700__
+	ld r15, [sp,8] ; minimum function size to avoid stall: 6 bytes.
+#else
+	ld_s r15, [sp,8]
+#endif
+	ld_s r14, [sp,4]
+	j_s.d [%blink]
+	ld_s r13, [sp,0]
+	ENDFUNC(__ld_r13_to_r15)
+	ENDFUNC(__ld_r13_to_r16)
+	ENDFUNC(__ld_r13_to_r17)
+	ENDFUNC(__ld_r13_to_r18)
+	ENDFUNC(__ld_r13_to_r19)
+	ENDFUNC(__ld_r13_to_r20)
+	ENDFUNC(__ld_r13_to_r21)
+	ENDFUNC(__ld_r13_to_r22)
+	ENDFUNC(__ld_r13_to_r23)
+	ENDFUNC(__ld_r13_to_r24)
+	ENDFUNC(__ld_r13_to_r25)
+
+#endif /* L_millicodethunk_ld */
+#ifdef L_millicodethunk_ret
+	.global SYM(__ld_r13_to_r14_ret)
+	.global SYM(__ld_r13_to_r15_ret)
+	.global SYM(__ld_r13_to_r16_ret)
+	.global SYM(__ld_r13_to_r17_ret)
+	.global SYM(__ld_r13_to_r18_ret)
+	.global SYM(__ld_r13_to_r19_ret)
+	.global SYM(__ld_r13_to_r20_ret)
+	.global SYM(__ld_r13_to_r21_ret)
+	.global SYM(__ld_r13_to_r22_ret)
+	.global SYM(__ld_r13_to_r23_ret)
+	.global SYM(__ld_r13_to_r24_ret)
+	.global SYM(__ld_r13_to_r25_ret)
+	HIDDEN_FUNC(__ld_r13_to_r14_ret)
+	HIDDEN_FUNC(__ld_r13_to_r15_ret)
+	HIDDEN_FUNC(__ld_r13_to_r16_ret)
+	HIDDEN_FUNC(__ld_r13_to_r17_ret)
+	HIDDEN_FUNC(__ld_r13_to_r18_ret)
+	HIDDEN_FUNC(__ld_r13_to_r19_ret)
+	HIDDEN_FUNC(__ld_r13_to_r20_ret)
+	HIDDEN_FUNC(__ld_r13_to_r21_ret)
+	HIDDEN_FUNC(__ld_r13_to_r22_ret)
+	HIDDEN_FUNC(__ld_r13_to_r23_ret)
+	HIDDEN_FUNC(__ld_r13_to_r24_ret)
+	HIDDEN_FUNC(__ld_r13_to_r25_ret)
+	.section .text
+	.align 4
+SYM(__ld_r13_to_r25_ret):
+	ld r25, [sp,48]
+SYM(__ld_r13_to_r24_ret):
+	ld r24, [sp,44]
+SYM(__ld_r13_to_r23_ret):
+	ld r23, [sp,40]
+SYM(__ld_r13_to_r22_ret):
+	ld r22, [sp,36]
+SYM(__ld_r13_to_r21_ret):
+	ld r21, [sp,32]
+SYM(__ld_r13_to_r20_ret):
+	ld r20, [sp,28]		
+SYM(__ld_r13_to_r19_ret):
+	ld r19, [sp,24]
+SYM(__ld_r13_to_r18_ret):
+	ld r18, [sp,20]
+SYM(__ld_r13_to_r17_ret):
+	ld r17, [sp,16]
+SYM(__ld_r13_to_r16_ret):
+	ld r16, [sp,12]
+SYM(__ld_r13_to_r15_ret):
+	ld r15, [sp,8]
+SYM(__ld_r13_to_r14_ret):
+	ld blink,[sp,r12]
+	ld_s r14, [sp,4]
+	ld.ab r13, [sp,r12]
+	j_s.d [%blink]
+	add_s sp,sp,4
+	ENDFUNC(__ld_r13_to_r14_ret)
+	ENDFUNC(__ld_r13_to_r15_ret)
+	ENDFUNC(__ld_r13_to_r16_ret)
+	ENDFUNC(__ld_r13_to_r17_ret)
+	ENDFUNC(__ld_r13_to_r18_ret)
+	ENDFUNC(__ld_r13_to_r19_ret)
+	ENDFUNC(__ld_r13_to_r20_ret)
+	ENDFUNC(__ld_r13_to_r21_ret)
+	ENDFUNC(__ld_r13_to_r22_ret)
+	ENDFUNC(__ld_r13_to_r23_ret)
+	ENDFUNC(__ld_r13_to_r24_ret)
+	ENDFUNC(__ld_r13_to_r25_ret)
+
+#endif /* L_millicodethunk_ret */
+#endif  /*__A4__*/
+
+#ifdef  L_adddf3
+#ifdef __ARC_NORM__
+#include "ieee-754/adddf3.S"
+#endif
+#endif
+
+#ifdef  L_muldf3
+#ifdef __ARC700__
+#include "ieee-754/muldf3.S"
+#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
+#include "ieee-754/arc600/muldf3.S"
+#elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
+#include "ieee-754/arc600-dsp/muldf3.S"
+#endif
+#endif
+
+#ifdef  L_addsf3
+#ifdef __ARC_NORM__
+#include "ieee-754/addsf3.S"
+#endif
+#endif
+
+#ifdef  L_mulsf3
+#ifdef __ARC700__
+#include "ieee-754/mulsf3.S"
+#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
+#include "ieee-754/arc600/mulsf3.S"
+#elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
+#include "ieee-754/arc600-dsp/mulsf3.S"
+#endif
+#endif
+
+#ifdef  L_divdf3
+#ifdef __ARC700__
+#include "ieee-754/divdf3.S"
+#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
+#include "ieee-754/arc600/divdf3.S"
+#elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
+#include "ieee-754/arc600-dsp/divdf3.S"
+#endif
+#endif
+
+#ifdef  L_divsf3
+#ifdef __ARC700__
+#include "ieee-754/divsf3-stdmul.S"
+#elif defined (__ARC_NORM__) && defined(__ARC_MUL64__)
+#include "ieee-754/arc600/divsf3.S"
+#elif defined (__ARC_NORM__) && defined(__ARC_MUL32BY16__)
+#include "ieee-754/arc600-dsp/divsf3.S"
+#endif
+#endif
+
+#ifdef L_extendsfdf2
+#ifdef __ARC_NORM__
+#include "ieee-754/extendsfdf2.S"
+#endif
+#endif
+
+#ifdef L_truncdfsf2
+#ifdef __ARC_NORM__
+#include "ieee-754/truncdfsf2.S"
+#endif
+#endif
+
+#ifdef L_floatsidf
+#ifdef __ARC_NORM__
+#include "ieee-754/floatsidf.S"
+#endif
+#endif
+
+#ifdef L_floatsisf
+#ifdef __ARC_NORM__
+#include "ieee-754/floatsisf.S"
+#endif
+#endif
+
+#ifdef L_floatunsidf
+#ifdef __ARC_NORM__
+#include "ieee-754/floatunsidf.S"
+#endif
+#endif
+
+#ifdef L_fixdfsi
+#ifdef __ARC_NORM__
+#include "ieee-754/fixdfsi.S"
+#endif
+#endif
+
+#ifdef L_fixsfsi
+#ifdef __ARC_NORM__
+#include "ieee-754/fixsfsi.S"
+#endif
+#endif
+
+#ifdef L_fixunsdfsi
+#ifdef __ARC_NORM__
+#include "ieee-754/fixunsdfsi.S"
+#endif
+#endif
+
+#ifdef L_eqdf2
+#ifdef __ARC_NORM__
+#include "ieee-754/eqdf2.S"
+#endif
+#endif
+
+#ifdef L_eqsf2
+#ifdef __ARC_NORM__
+#include "ieee-754/eqsf2.S"
+#endif
+#endif
+
+#ifdef L_gtdf2
+#ifdef __ARC_NORM__
+#include "ieee-754/gtdf2.S"
+#endif
+#endif
+
+#ifdef L_gtsf2
+#ifdef __ARC_NORM__
+#include "ieee-754/gtsf2.S"
+#endif
+#endif
+
+#ifdef L_gedf2
+#ifdef __ARC_NORM__
+#include "ieee-754/gedf2.S"
+#endif
+#endif
+
+#ifdef L_gesf2
+#ifdef __ARC_NORM__
+#include "ieee-754/gesf2.S"
+#endif
+#endif
+
+#ifdef L_uneqdf2
+#ifdef __ARC_NORM__
+#include "ieee-754/uneqdf2.S"
+#endif
+#endif
+
+#ifdef L_uneqsf2
+#ifdef __ARC_NORM__
+#include "ieee-754/uneqsf2.S"
+#endif
+#endif
+
+#ifdef L_orddf2
+#ifdef __ARC_NORM__
+#include "ieee-754/orddf2.S"
+#endif
+#endif
+
+#ifdef L_ordsf2
+#ifdef __ARC_NORM__
+#include "ieee-754/ordsf2.S"
+#endif
+#endif
diff --git a/gcc/config/arc/libgcc-excl.ver b/gcc/config/arc/libgcc-excl.ver
new file mode 100644
index 00000000000..2eaa87a87b0
--- /dev/null
+++ b/gcc/config/arc/libgcc-excl.ver
@@ -0,0 +1,16 @@
+# Exclude various symbols which should not be visible in libgcc.so for ARC.
+# Floating point comparisons use a special lightweight ABI which is not
+# compatible with calls via a plt.  Moreover, the code is so compact that
+# it is better to include a separate copy in each dso.
+%exclude {
+  __eqsf2
+  __eqdf2
+  __gtsf2
+  __gtdf2
+  __gesf2
+  __gedf2
+  __uneqsf2
+  __uneqdf2
+  __ordsf2
+  __orddf2
+}
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
new file mode 100644
index 00000000000..36c90e18f3a
--- /dev/null
+++ b/gcc/config/arc/predicates.md
@@ -0,0 +1,760 @@
+(define_predicate "dest_reg_operand"
+  (match_code "reg,subreg")
+{
+  rtx op0 = op;
+
+  if (GET_CODE (op0) == SUBREG)
+    op0 = SUBREG_REG (op0);
+  if (REG_P (op0) && REGNO (op0) < FIRST_PSEUDO_REGISTER
+      && TEST_HARD_REG_BIT (reg_class_contents[ALL_CORE_REGS],
+			    REGNO (op0))
+      && !TEST_HARD_REG_BIT (reg_class_contents[WRITABLE_CORE_REGS],
+			    REGNO (op0)))
+    return 0;
+  return register_operand (op, mode);
+})
+
+;; Returns 1 if OP is a symbol reference.
+(define_predicate "symbolic_operand"
+  (match_code "symbol_ref, label_ref, const")
+)
+
+;; Acceptable arguments to the call insn.
+(define_predicate "call_address_operand"
+  (ior (match_code "const_int, reg")
+       (match_operand 0 "symbolic_operand")
+       (match_test "CONSTANT_P (op) && LEGITIMATE_CONSTANT_P (op)"))
+)
+
+(define_predicate "call_operand"
+  (and (match_code "mem")
+       (match_test "call_address_operand (XEXP (op, 0), mode)"))
+)
+
+;; Return true if OP is a unsigned 6-bit immediate (u6) value.
+(define_predicate "u6_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "UNSIGNED_INT6 (INTVAL (op))"))
+)
+
+;; Return true if OP is a short immediate (shimm) value.  
+(define_predicate "short_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "SMALL_INT (INTVAL (op))"))
+)
+
+(define_predicate "p2_immediate_operand"
+  (and (match_code "const_int")
+       (match_test "((INTVAL (op) - 1) & INTVAL (op)) == 0")
+       (match_test "INTVAL (op)"))
+)
+
+;; Return true if OP will require a long immediate (limm) value.
+;; This is currently only used when calculating length attributes.
+(define_predicate "long_immediate_operand"
+  (match_code "symbol_ref, label_ref, const, const_double, const_int")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+    case LABEL_REF :
+    case CONST :
+      return 1;
+    case CONST_INT :
+      return (TARGET_A4) ? !SMALL_INT (INTVAL (op))
+                         : !SIGNED_INT12 (INTVAL (op));
+    case CONST_DOUBLE :
+      /* These can happen because large unsigned 32 bit constants are
+	 represented this way (the multiplication patterns can cause these
+	 to be generated).  They also occur for SFmode values.  */
+      return 1;
+    default:
+      break;
+    }
+  return 0;
+}  
+)
+
+;; Return true if OP is a MEM that when used as a load or store address will
+;; require an 8 byte insn.
+;; Load and store instructions don't allow the same possibilities but they're
+;; similar enough that this one function will do.
+;; This is currently only used when calculating length attributes.  */
+(define_predicate "long_immediate_loadstore_operand"
+  (match_code "mem")
+{
+  op = XEXP (op, 0);
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+    case LABEL_REF :
+    case CONST :
+      return 1;
+    case CONST_INT :
+      /* This must be handled as "st c,[limm]".  Ditto for load.
+	 Technically, the assembler could translate some possibilities to
+	 "st c,[limm/2 + limm/2]" if limm/2 will fit in a shimm, but we don't
+	 assume that it does.  */
+      return 1;
+    case CONST_DOUBLE :
+      /* These can happen because large unsigned 32 bit constants are
+	 represented this way (the multiplication patterns can cause these
+	 to be generated).  They also occur for SFmode values.  */
+      return 1;
+    case REG :
+      return 0;
+    case PLUS :
+      {
+	rtx x = XEXP (op, 1);
+
+	if (GET_CODE (x) == CONST)
+	  {
+	    x = XEXP (x, 0);
+	    if (GET_CODE (x) == PLUS)
+	      x = XEXP (x, 0);
+	  }
+	if (CONST_INT_P (x))
+	  return !SMALL_INT (INTVAL (x));
+	else if (GET_CODE (x) == SYMBOL_REF)
+	  return TARGET_NO_SDATA_SET || !SYMBOL_REF_SMALL_P (x);
+	return 0;
+      }
+    default:
+      break;
+    }
+  return 0;
+}
+)
+
+;; Return true if OP is any of R0-R3,R12-R15 for ARCompact 16-bit 
+;; instructions
+(define_predicate "compact_register_operand"
+  (match_code "reg, subreg")
+  {
+     if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+         return 0;
+
+      return (GET_CODE (op) == REG)
+      && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+		|| COMPACT_GP_REG_P (REGNO (op))) ;
+  }
+)
+
+;; Return true if OP is an acceptable memory operand for ARCompact
+;; 16-bit load instructions.
+(define_predicate "compact_load_memory_operand"
+  (match_code "mem")
+{
+  rtx addr, plus0, plus1;
+  int size, off;
+
+  /* Eliminate non-memory operations */
+  if (GET_CODE (op) != MEM)
+    return 0;
+
+  /* .di instructions have no 16-bit form */
+  if (!TARGET_VOLATILE_CACHE_SET)
+     return 0;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  size = GET_MODE_SIZE (mode);
+
+  /* dword operations really put out 2 instructions, so eliminate them. */ 
+  if (size > UNITS_PER_WORD)
+    return 0;
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      return (REGNO (addr) >= FIRST_PSEUDO_REGISTER
+                || COMPACT_GP_REG_P (REGNO (addr))
+	      || (SP_REG_P (REGNO (addr)) && (size != 2)));
+	/* Reverting for the moment since ldw_s  does not have sp as a valid
+	   parameter */
+    case PLUS:
+      plus0 = XEXP (addr, 0);
+      plus1 = XEXP (addr, 1);
+ 
+      if ((GET_CODE (plus0) == REG)
+          && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+              || COMPACT_GP_REG_P (REGNO (plus0)))
+          && ((GET_CODE (plus1) == REG)
+              && ((REGNO (plus1) >= FIRST_PSEUDO_REGISTER)
+                  || COMPACT_GP_REG_P (REGNO (plus1)))))
+        {
+          return 1;
+        }
+
+      if ((GET_CODE (plus0) == REG)
+          && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+              || COMPACT_GP_REG_P (REGNO (plus0)))
+          && (GET_CODE (plus1) == CONST_INT))
+        {
+          off = INTVAL (plus1);
+
+          /* negative offset is not supported in 16-bit load/store insns. */
+          if (off < 0)
+            return 0;
+
+          switch (size)
+            {
+            case 1:
+              return (off < 32);
+            case 2:
+              return ((off < 64) && (off % 2 == 0));
+            case 4:
+              return ((off < 128) && (off % 4 == 0));
+            }
+        }
+
+      if ((GET_CODE (plus0) == REG)
+          && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+              || SP_REG_P (REGNO (plus0)))
+          && (GET_CODE (plus1) == CONST_INT))
+        {
+          off = INTVAL (plus1);
+          return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0));
+        }
+    default:
+      break ;
+      /* TODO: 'gp' and 'pcl' are to supported as base address operand
+               for 16-bit load instructions. */
+    }
+  return 0;
+
+}
+)
+
+;; Return true if OP is an acceptable memory operand for ARCompact
+;; 16-bit store instructions
+(define_predicate "compact_store_memory_operand"
+  (match_code "mem")
+{
+  rtx addr, plus0, plus1;
+  int size, off;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  /* .di instructions have no 16-bit form */
+  if (!TARGET_VOLATILE_CACHE_SET)
+     return 0;
+
+  size = GET_MODE_SIZE (mode);
+
+  /* dword operations really put out 2 instructions, so eliminate them. */ 
+  if (size > UNITS_PER_WORD)
+    return 0;
+
+  /* Decode the address now.  */
+  addr = XEXP (op, 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      return (REGNO (addr) >= FIRST_PSEUDO_REGISTER
+                || COMPACT_GP_REG_P (REGNO (addr))
+	      || (SP_REG_P (REGNO (addr)) && (size != 2)));
+	/* stw_s does not support SP as a parameter */
+    case PLUS:
+      plus0 = XEXP (addr, 0);
+      plus1 = XEXP (addr, 1);
+ 
+      if ((GET_CODE (plus0) == REG)
+          && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+              || COMPACT_GP_REG_P (REGNO (plus0)))
+          && (GET_CODE (plus1) == CONST_INT))
+        {
+          off = INTVAL (plus1);
+
+          /* negative offset is not supported in 16-bit load/store insns. */
+          if (off < 0)
+            return 0;
+
+          switch (size)
+            {
+            case 1:
+              return (off < 32);
+            case 2:
+              return ((off < 64) && (off % 2 == 0));
+            case 4:
+              return ((off < 128) && (off % 4 == 0));
+            }
+        }
+
+      if ((GET_CODE (plus0) == REG)
+          && ((REGNO (plus0) >= FIRST_PSEUDO_REGISTER)
+              || SP_REG_P (REGNO (plus0)))
+          && (GET_CODE (plus1) == CONST_INT))
+        {
+          off = INTVAL (plus1);
+
+          return ((size != 2) && (off >= 0 && off < 128) && (off % 4 == 0));
+        }
+    default:
+      break;
+    }
+  return 0;
+  }
+)
+
+;; Return true if OP is an acceptable argument for a single word
+;;   move source.
+(define_predicate "move_src_operand"
+  (match_code "symbol_ref, label_ref, const, const_int, const_double, reg, subreg, mem")
+{
+  switch (GET_CODE (op))
+    {
+    case SYMBOL_REF :
+    case LABEL_REF :
+    case CONST :
+/*ashwin : The use of this macro is discontinued       */
+/*     case CONSTANT_P_RTX: */
+      return (!flag_pic || arc_legitimate_pic_operand_p(op));
+    case CONST_INT :
+      return (LARGE_INT (INTVAL (op)));
+    case CONST_DOUBLE :
+      /* We can handle DImode integer constants in SImode if the value
+	 (signed or unsigned) will fit in 32 bits.  This is needed because
+	 large unsigned 32 bit constants are represented as CONST_DOUBLEs.  */
+      if (mode == SImode)
+	return arc_double_limm_p (op);
+      /* We can handle 32 bit floating point constants.  */
+      if (mode == SFmode)
+	return GET_MODE (op) == SFmode;
+      return 0;
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      return address_operand (XEXP (op, 0), mode);
+    default :
+      return 0;
+    }
+}
+)
+
+;; Return true if OP is an acceptable argument for a double word
+;; move source.
+(define_predicate "move_double_src_operand"
+  (match_code "reg, subreg, mem, const_int, const_double")
+{
+  switch (GET_CODE (op))
+    {
+    case REG :
+      return register_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	return move_double_src_operand (SUBREG_REG (op), mode);
+      else
+	return register_operand (op, mode);
+    case MEM :
+      return address_operand (XEXP (op, 0), mode);
+    case CONST_INT :
+    case CONST_DOUBLE :
+      return 1;
+    default :
+      return 0;
+    }
+}
+)
+
+;; Return true if OP is an acceptable argument for a move destination.
+(define_predicate "move_dest_operand"
+  (match_code "reg, subreg, mem")
+{
+  switch (GET_CODE (op))
+    {
+    case REG :
+     /* Program Counter register cannot be the target of a move.It is
+	 a readonly register */
+      if (REGNO (op) == PROGRAM_COUNTER_REGNO)
+	return 0;
+      else if (TARGET_MULMAC_32BY16_SET 
+               && (REGNO (op) == 56 || REGNO(op) == 57))
+        return 0; 
+      else if (TARGET_MUL64_SET
+	       && (REGNO (op) == 57 || REGNO(op) == 58 || REGNO(op) == 59 ))
+	return 0;
+      else
+	return dest_reg_operand (op, mode);
+    case SUBREG :
+      /* (subreg (mem ...) ...) can occur here if the inner part was once a
+	 pseudo-reg and is now a stack slot.  */
+      if (GET_CODE (SUBREG_REG (op)) == MEM)
+	return address_operand (XEXP (SUBREG_REG (op), 0), mode);
+      else
+	return dest_reg_operand (op, mode);
+    case MEM :
+      {
+	rtx addr = XEXP (op, 0);
+
+	if (GET_CODE (addr) == PLUS
+	    && (GET_CODE (XEXP (addr, 0)) == MULT
+		|| (!CONST_INT_P (XEXP (addr, 1))
+		    && (TARGET_NO_SDATA_SET
+			|| GET_CODE (XEXP (addr, 1)) != SYMBOL_REF
+			|| !SYMBOL_REF_SMALL_P (XEXP (addr, 1))))))
+	  return 0;
+	if ((GET_CODE (addr) == PRE_MODIFY || GET_CODE (addr) == POST_MODIFY)
+	    && (GET_CODE (XEXP (addr, 1)) != PLUS
+		|| !CONST_INT_P (XEXP (XEXP (addr, 1), 1))))
+	  return 0;
+	return address_operand (addr, mode);
+      }
+    default :
+      return 0;
+    }
+
+}
+)
+
+;; Return true if OP is valid load with update operand.
+(define_predicate "load_update_operand"
+  (match_code "mem")
+{
+  if (GET_CODE (op) != MEM
+      || GET_MODE (op) != mode)
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != PLUS
+      || GET_MODE (op) != Pmode
+      || !register_operand (XEXP (op, 0), Pmode)
+      || !nonmemory_operand (XEXP (op, 1), Pmode))
+    return 0;
+  return 1;
+
+}
+)
+
+;; Return true if OP is valid store with update operand.
+(define_predicate "store_update_operand"
+  (match_code "mem")
+{
+  if (GET_CODE (op) != MEM
+      || GET_MODE (op) != mode)
+    return 0;
+  op = XEXP (op, 0);
+  if (GET_CODE (op) != PLUS
+      || GET_MODE (op) != Pmode
+      || !register_operand (XEXP (op, 0), Pmode)
+      || !(GET_CODE (XEXP (op, 1)) == CONST_INT
+	   && SMALL_INT (INTVAL (XEXP (op, 1)))))
+    return 0;
+  return 1;
+}
+)
+
+;; Return true if OP is a non-volatile non-immediate operand.
+;; Volatile memory refs require a special "cache-bypass" instruction
+;; and only the standard movXX patterns are set up to handle them.
+(define_predicate "nonvol_nonimm_operand"
+  (and (match_code "subreg, reg, mem")
+       (match_test "(GET_CODE (op) != MEM || !MEM_VOLATILE_P (op)) && nonimmediate_operand (op, mode)"))
+)
+
+;; Accept integer operands in the range -0x80000000..0x7fffffff.  We have
+;; to check the range carefully since this predicate is used in DImode
+;; contexts.
+(define_predicate "const_sint32_operand"
+  (match_code "const_int")
+{
+  /* All allowed constants will fit a CONST_INT.  */
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) >= (-0x7fffffff - 1) && INTVAL (op) <= 0x7fffffff));
+}
+)
+
+;; Accept integer operands in the range 0..0xffffffff.  We have to check the
+;; range carefully since this predicate is used in DImode contexts.  Also, we
+;; need some extra crud to make it work when hosted on 64-bit machines.
+(define_predicate "const_uint32_operand"
+  (match_code "const_int, const_double")
+{
+#if HOST_BITS_PER_WIDE_INT > 32
+  /* All allowed constants will fit a CONST_INT.  */
+  return (GET_CODE (op) == CONST_INT
+	  && (INTVAL (op) >= 0 && INTVAL (op) <= 0xffffffffL));
+#else
+  return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0)
+	  || (GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_HIGH (op) == 0));
+#endif
+}
+)
+
+;; Return 1 if OP is a comparison operator valid for the mode of CC.
+;; This allows the use of MATCH_OPERATOR to recognize all the branch insns.
+
+(define_predicate "proper_comparison_operator"
+  (match_code "eq, ne, le, lt, ge, gt, leu, ltu, geu, gtu, unordered, ordered, uneq, unge, ungt, unle, unlt, ltgt")
+{
+  enum rtx_code code = GET_CODE (op);
+
+  if (!COMPARISON_P (op))
+    return 0;
+
+  /* After generic flag-setting insns, we can use eq / ne / pl / mi / pnz .
+     There are some creative uses for hi / ls after shifts, but these are
+     hard to understand for the compiler and could be at best the target of
+     a peephole.  */
+  switch (GET_MODE (XEXP (op, 0)))
+    {
+    case CC_ZNmode:
+      return (code == EQ || code == NE || code == GE || code == LT
+	      || code == GT);
+    case CC_Zmode:
+      return code == EQ || code == NE;
+    case CC_Cmode:
+      return code == LTU || code == GEU;
+    case CC_FP_GTmode:
+      return code == GT || code == UNLE;
+    case CC_FP_GEmode:
+      return code == GE || code == UNLT;
+    case CC_FP_ORDmode:
+      return code == ORDERED || code == UNORDERED;
+    case CC_FP_UNEQmode:
+      return code == UNEQ || code == LTGT;
+    case CC_FPXmode:
+      return (code == EQ || code == NE || code == UNEQ || code == LTGT
+	      || code == ORDERED || code == UNORDERED);
+    case CCmode:
+    case SImode: /* Used for BRcc.  */
+      return 1;
+    /* From combiner.  */
+    case QImode: case HImode: case SFmode: case DFmode:
+      return 0;
+    default:
+      gcc_unreachable ();
+  }
+})
+
+(define_predicate "equality_comparison_operator"
+  (match_code "eq, ne"))
+
+(define_predicate "brcc_nolimm_operator"
+  (ior (match_test "REG_P (XEXP (op, 1))")
+       (and (match_code "eq, ne, lt, ge, ltu, geu")
+	    (match_test "u6_immediate_operand (XEXP (op, 1), SImode)"))
+       (and (match_code "le, gt, leu, gtu")
+	    (match_test "UNSIGNED_INT6 (INTVAL (XEXP (op, 1)) + 1)"))))
+
+;; Return TRUE if this is the condition code register, if we aren't given
+;; a mode, accept any CCmode register
+(define_special_predicate "cc_register"
+  (match_code "reg")
+{
+  if (mode == VOIDmode)
+    {
+      mode = GET_MODE (op);
+      if (GET_MODE_CLASS (mode) != MODE_CC)
+        return FALSE;
+    }
+
+  if (mode == GET_MODE (op) && GET_CODE (op) == REG && REGNO (op) == CC_REG)
+    return TRUE;
+
+  return FALSE;
+})
+
+;; Return TRUE if this is the condition code register; if we aren't given
+;; a mode, accept any CCmode register.  If we are given a mode, accept
+;; modes that set a subset of flags.
+(define_special_predicate "cc_set_register"
+  (match_code "reg")
+{
+  enum machine_mode rmode = GET_MODE (op);
+
+  if (mode == VOIDmode)
+    {
+      mode = rmode;
+      if (GET_MODE_CLASS (mode) != MODE_CC)
+        return FALSE;
+    }
+
+  if (REGNO (op) != 61)
+    return FALSE;
+  if (mode == rmode
+      || (mode == CC_ZNmode && rmode == CC_Zmode)
+      || (mode == CCmode && rmode == CC_Zmode)
+      || (mode == CCmode && rmode == CC_ZNmode)
+      || (mode == CCmode && rmode == CC_Cmode))
+    return TRUE;
+
+  return FALSE;
+})
+
+; Accept CC_REG in modes which provide the flags needed for MODE.  */
+(define_special_predicate "cc_use_register"
+  (match_code "reg")
+{
+  if (REGNO (op) != CC_REG)
+    return 0;
+  if (GET_MODE (op) == mode)
+    return 1;
+  switch (mode)
+    {
+    case CC_Zmode:
+      if (GET_MODE (op) == CC_ZNmode)
+	return 1;
+      /* Fall through.  */
+    case CC_ZNmode: case CC_Cmode:
+      return GET_MODE (op) == CCmode;
+    default:
+      gcc_unreachable ();
+    }
+})
+
+(define_special_predicate "zn_compare_operator"
+  (match_code "compare")
+{
+  return GET_MODE (op) == CC_ZNmode || GET_MODE (op) == CC_Zmode;
+})
+
+;; Return true if OP is a shift operator.
+(define_predicate "shift_operator"
+  (match_code "ashiftrt, lshiftrt, ashift")
+)
+
+(define_predicate "commutative_operator"
+  (ior (match_code "plus,ior,xor,and")
+       (and (match_code "mult") (match_test "TARGET_ARC700"))
+       (and (match_code "ss_plus")
+	    (match_test "TARGET_ARC700 || TARGET_EA_SET")))
+)
+
+(define_predicate "noncommutative_operator"
+  (ior (match_code "minus,ashift,ashiftrt,lshiftrt,rotatert")
+       (and (match_code "ss_minus")
+	    (match_test "TARGET_ARC700 || TARGET_EA_SET")))
+)
+
+(define_predicate "unary_operator"
+  (ior (match_code "abs,neg,not,sign_extend,zero_extend")
+       (and (ior (match_code "ss_neg")
+		 (and (match_code "ss_truncate")
+		      (match_test "GET_MODE (XEXP (op, 0)) == HImode")))
+	    (match_test "TARGET_ARC700 || TARGET_EA_SET")))
+)
+
+(define_predicate "_2_4_8_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8"))
+)
+
+(define_predicate "arc_double_register_operand"
+  (match_code "reg")
+{
+  if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+    return 0;
+  
+  return (GET_CODE (op) == REG
+		   && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+			     || REGNO_REG_CLASS (REGNO (op)) == DOUBLE_REGS));
+})
+
+(define_predicate "shouldbe_register_operand"
+  (match_code "reg,subreg,mem")
+{
+  return ((reload_in_progress || reload_completed)
+	  ? general_operand : register_operand) (op, mode);
+})
+
+(define_predicate "vector_register_operand"
+  (match_code "reg")
+{
+  if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+  return 0;
+  
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) == SIMD_VR_REGS));
+})
+
+(define_predicate "vector_register_or_memory_operand"
+  ( ior (match_code "reg")
+	(match_code "mem"))
+{
+  if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+    return 0;
+
+  if ((GET_CODE (op) == MEM) 
+      && (mode == V8HImode)
+      && GET_CODE (XEXP (op,0)) == REG)
+    return 1;
+  
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) == SIMD_VR_REGS));
+})
+
+(define_predicate "arc_dpfp_operator"
+  (match_code "plus, mult,minus")
+)
+
+(define_predicate "arc_simd_dma_register_operand"
+  (match_code "reg")
+{
+  if ((GET_MODE (op) != mode) && (mode != VOIDmode))
+    return 0;
+  
+  return (GET_CODE (op) == REG
+	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
+	      || REGNO_REG_CLASS (REGNO (op)) == SIMD_DMA_CONFIG_REGS));
+})
+
+(define_predicate "acc1_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 56 : 57)")))
+  
+(define_predicate "acc2_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 57 : 56)")))
+  
+(define_predicate "mlo_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 59 : 58)")))
+  
+(define_predicate "mhi_operand"
+  (and (match_code "reg")
+       (match_test "REGNO (op) == (TARGET_BIG_ENDIAN ? 58 : 59)")))
+
+(define_predicate "extend_operand"
+  (ior (match_test "register_operand (op, mode)")
+       (and (match_test "immediate_operand (op, mode)")
+	    (not (match_test "const_int_operand (op, mode)")))))
+
+(define_predicate "millicode_store_operation"
+  (match_code "parallel")
+{
+  return arc_check_millicode (op, 0, 0);
+})
+
+(define_predicate "millicode_load_operation"
+  (match_code "parallel")
+{
+  return arc_check_millicode (op, 2, 2);
+})
+
+(define_predicate "millicode_load_clob_operation"
+  (match_code "parallel")
+{
+  return arc_check_millicode (op, 0, 1);
+})
+
+(define_special_predicate "immediate_usidi_operand"
+  (if_then_else
+    (match_code "const_int")
+    (match_test "INTVAL (op) >= 0")
+    (and (match_test "const_double_operand (op, mode)")
+	 (match_test "CONST_DOUBLE_HIGH (op) == 0"))))
diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md
new file mode 100644
index 00000000000..0ee73e318f9
--- /dev/null
+++ b/gcc/config/arc/simdext.md
@@ -0,0 +1,1313 @@
+;; Machine description of the Argonaut ARC cpu for GNU C compiler
+;; Copyright (C) 2007 Celunite, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_constants
+  [
+  ;; Va, Vb, Vc builtins
+  (UNSPEC_ARC_SIMD_VADDAW     1000)
+  (UNSPEC_ARC_SIMD_VADDW      1001)
+  (UNSPEC_ARC_SIMD_VAVB       1002)
+  (UNSPEC_ARC_SIMD_VAVRB      1003)
+  (UNSPEC_ARC_SIMD_VDIFAW     1004)
+  (UNSPEC_ARC_SIMD_VDIFW      1005)
+  (UNSPEC_ARC_SIMD_VMAXAW     1006)
+  (UNSPEC_ARC_SIMD_VMAXW      1007)
+  (UNSPEC_ARC_SIMD_VMINAW     1008)
+  (UNSPEC_ARC_SIMD_VMINW      1009)
+  (UNSPEC_ARC_SIMD_VMULAW     1010)
+  (UNSPEC_ARC_SIMD_VMULFAW    1011)
+  (UNSPEC_ARC_SIMD_VMULFW     1012)
+  (UNSPEC_ARC_SIMD_VMULW      1013)
+  (UNSPEC_ARC_SIMD_VSUBAW     1014)
+  (UNSPEC_ARC_SIMD_VSUBW      1015)
+  (UNSPEC_ARC_SIMD_VSUMMW     1016)
+  (UNSPEC_ARC_SIMD_VAND       1017)
+  (UNSPEC_ARC_SIMD_VANDAW     1018)
+  (UNSPEC_ARC_SIMD_VBIC       1019)
+  (UNSPEC_ARC_SIMD_VBICAW     1020)
+  (UNSPEC_ARC_SIMD_VOR        1021)
+  (UNSPEC_ARC_SIMD_VXOR       1022)
+  (UNSPEC_ARC_SIMD_VXORAW     1023)
+  (UNSPEC_ARC_SIMD_VEQW       1024)
+  (UNSPEC_ARC_SIMD_VLEW       1025)
+  (UNSPEC_ARC_SIMD_VLTW       1026)
+  (UNSPEC_ARC_SIMD_VNEW       1027)
+  (UNSPEC_ARC_SIMD_VMR1AW     1028)
+  (UNSPEC_ARC_SIMD_VMR1W      1029)
+  (UNSPEC_ARC_SIMD_VMR2AW     1030)
+  (UNSPEC_ARC_SIMD_VMR2W      1031)
+  (UNSPEC_ARC_SIMD_VMR3AW     1032)
+  (UNSPEC_ARC_SIMD_VMR3W      1033)
+  (UNSPEC_ARC_SIMD_VMR4AW     1034)
+  (UNSPEC_ARC_SIMD_VMR4W      1035)
+  (UNSPEC_ARC_SIMD_VMR5AW     1036)
+  (UNSPEC_ARC_SIMD_VMR5W      1037)
+  (UNSPEC_ARC_SIMD_VMR6AW     1038)
+  (UNSPEC_ARC_SIMD_VMR6W      1039)
+  (UNSPEC_ARC_SIMD_VMR7AW     1040)
+  (UNSPEC_ARC_SIMD_VMR7W      1041)
+  (UNSPEC_ARC_SIMD_VMRB       1042)
+  (UNSPEC_ARC_SIMD_VH264F     1043)
+  (UNSPEC_ARC_SIMD_VH264FT    1044)
+  (UNSPEC_ARC_SIMD_VH264FW    1045)
+  (UNSPEC_ARC_SIMD_VVC1F      1046)
+  (UNSPEC_ARC_SIMD_VVC1FT     1047)
+  ;; Va, Vb, rc/limm builtins
+  (UNSPEC_ARC_SIMD_VBADDW     1050)
+  (UNSPEC_ARC_SIMD_VBMAXW     1051)
+  (UNSPEC_ARC_SIMD_VBMINW     1052)
+  (UNSPEC_ARC_SIMD_VBMULAW    1053)
+  (UNSPEC_ARC_SIMD_VBMULFW    1054)
+  (UNSPEC_ARC_SIMD_VBMULW     1055)
+  (UNSPEC_ARC_SIMD_VBRSUBW    1056)
+  (UNSPEC_ARC_SIMD_VBSUBW     1057)
+
+  ;; Va, Vb, Ic builtins
+  (UNSPEC_ARC_SIMD_VASRW      1060)
+  (UNSPEC_ARC_SIMD_VSR8       1061)
+  (UNSPEC_ARC_SIMD_VSR8AW     1062)
+
+  ;; Va, Vb, Ic builtins
+  (UNSPEC_ARC_SIMD_VASRRWi    1065)
+  (UNSPEC_ARC_SIMD_VASRSRWi   1066)
+  (UNSPEC_ARC_SIMD_VASRWi     1067)
+  (UNSPEC_ARC_SIMD_VASRPWBi   1068)
+  (UNSPEC_ARC_SIMD_VASRRPWBi  1069)
+  (UNSPEC_ARC_SIMD_VSR8AWi    1070)
+  (UNSPEC_ARC_SIMD_VSR8i      1071)
+
+  ;; Va, Vb, u8 (simm) builtins
+  (UNSPEC_ARC_SIMD_VMVAW      1075)
+  (UNSPEC_ARC_SIMD_VMVW       1076)
+  (UNSPEC_ARC_SIMD_VMVZW      1077)
+  (UNSPEC_ARC_SIMD_VD6TAPF    1078)
+
+  ;; Va, rlimm, u8 (simm) builtins
+  (UNSPEC_ARC_SIMD_VMOVAW     1080)
+  (UNSPEC_ARC_SIMD_VMOVW      1081)
+  (UNSPEC_ARC_SIMD_VMOVZW     1082)
+
+  ;; Va, Vb builtins
+  (UNSPEC_ARC_SIMD_VABSAW     1085)
+  (UNSPEC_ARC_SIMD_VABSW      1086)
+  (UNSPEC_ARC_SIMD_VADDSUW    1087)
+  (UNSPEC_ARC_SIMD_VSIGNW     1088)
+  (UNSPEC_ARC_SIMD_VEXCH1     1089)
+  (UNSPEC_ARC_SIMD_VEXCH2     1090)
+  (UNSPEC_ARC_SIMD_VEXCH4     1091)
+  (UNSPEC_ARC_SIMD_VUPBAW     1092)
+  (UNSPEC_ARC_SIMD_VUPBW      1093)
+  (UNSPEC_ARC_SIMD_VUPSBAW    1094)
+  (UNSPEC_ARC_SIMD_VUPSBW     1095)
+
+  (UNSPEC_ARC_SIMD_VDIRUN     1100)
+  (UNSPEC_ARC_SIMD_VDORUN     1101)
+  (UNSPEC_ARC_SIMD_VDIWR      1102)
+  (UNSPEC_ARC_SIMD_VDOWR      1103)
+
+  (UNSPEC_ARC_SIMD_VREC      1105)
+  (UNSPEC_ARC_SIMD_VRUN      1106)
+  (UNSPEC_ARC_SIMD_VRECRUN   1107)
+  (UNSPEC_ARC_SIMD_VENDREC   1108)
+
+  (UNSPEC_ARC_SIMD_VLD32WH   1110)
+  (UNSPEC_ARC_SIMD_VLD32WL   1111)
+
+  (UNSPEC_ARC_SIMD_VCAST     1200)
+  (UNSPEC_ARC_SIMD_VINTI     1201)
+   ]
+)
+
+;; Scheduler descriptions for the simd instructions
+(define_insn_reservation "simd_lat_0_insn" 1
+  (eq_attr "type" "simd_dma, simd_vstore, simd_vcontrol")
+  "issue+simd_unit")
+
+(define_insn_reservation "simd_lat_1_insn" 2
+       (eq_attr "type" "simd_vcompare, simd_vlogic, 
+                        simd_vmove_else_zero, simd_varith_1cycle")
+  "issue+simd_unit, nothing")
+
+(define_insn_reservation "simd_lat_2_insn" 3
+       (eq_attr "type" "simd_valign, simd_vpermute, 
+                        simd_vpack, simd_varith_2cycle")
+  "issue+simd_unit, nothing*2")
+
+(define_insn_reservation "simd_lat_3_insn" 4
+       (eq_attr "type" "simd_valign_with_acc, simd_vpack_with_acc,
+                        simd_vlogic_with_acc, simd_vload128,
+                        simd_vmove_with_acc, simd_vspecial_3cycle,
+                        simd_varith_with_acc")  
+  "issue+simd_unit, nothing*3")
+
+(define_insn_reservation "simd_lat_4_insn" 5
+       (eq_attr "type" "simd_vload, simd_vmove, simd_vspecial_4cycle") 
+  "issue+simd_unit, nothing*4")
+
+(define_expand "movv8hi"
+  [(set (match_operand:V8HI 0 "general_operand" "")
+	(match_operand:V8HI 1 "general_operand" ""))]
+  ""
+  "
+{
+  /* Everything except mem = const or mem = mem can be done easily.  */
+
+  if (GET_CODE (operands[0]) == MEM && GET_CODE(operands[1]) == MEM)
+    operands[1] = force_reg (V8HImode, operands[1]);
+}")
+
+;; This pattern should appear before the movv8hi_insn pattern
+(define_insn "vld128_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand" "=v")
+	(mem:V8HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand"  "v") 
+							  (parallel [(match_operand:SI 2 "immediate_operand" "L")])))
+			   (match_operand:SI 3 "immediate_operand" "P"))))]
+ "TARGET_SIMD_SET"
+ "vld128 %0, [i%2, %3]"
+ [(set_attr "type" "simd_vload128")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")]
+)
+
+(define_insn "vst128_insn"
+  [(set	(mem:V8HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 0 "vector_register_operand"  "v") 
+							  (parallel [(match_operand:SI 1 "immediate_operand" "L")])))
+			   (match_operand:SI 2 "immediate_operand" "P")))
+	(match_operand:V8HI 3 "vector_register_operand" "=v"))]
+ "TARGET_SIMD_SET"
+ "vst128 %3, [i%1, %2]"
+ [(set_attr "type" "simd_vstore")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")]
+)
+
+(define_insn "vst64_insn"
+  [(set	(mem:V4HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 0 "vector_register_operand"  "v") 
+							  (parallel [(match_operand:SI 1 "immediate_operand" "L")])))
+			   (match_operand:SI 2 "immediate_operand" "P")))
+	(vec_select:V4HI (match_operand:V8HI 3 "vector_register_operand" "=v")
+			 (parallel [(const_int 0)])))]
+ "TARGET_SIMD_SET"
+ "vst64 %3, [i%1, %2]"
+ [(set_attr "type" "simd_vstore")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")]
+)
+
+(define_insn "movv8hi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_or_memory_operand" "=v,m,v")
+	(match_operand:V8HI 1 "vector_register_or_memory_operand" "m,v,v"))]
+  "TARGET_SIMD_SET && !(GET_CODE (operands[0]) == MEM && GET_CODE(operands[1]) == MEM)"
+  "@
+    vld128r %0, %1
+    vst128r %1, %0
+    vmvzw %0,%1,0xffff"
+  [(set_attr "type" "simd_vload128,simd_vstore,simd_vmove_else_zero")
+   (set_attr "length" "8,8,4")
+   (set_attr "cond" "nocond, nocond, nocond")])
+
+(define_insn "movti_insn"
+  [(set (match_operand:TI 0 "vector_register_or_memory_operand" "=v,m,v")
+	(match_operand:TI 1 "vector_register_or_memory_operand" "m,v,v"))]
+  ""
+  "@
+    vld128r %0, %1
+    vst128r %1, %0
+    vmvzw %0,%1,0xffff"
+  [(set_attr "type" "simd_vload128,simd_vstore,simd_vmove_else_zero")
+   (set_attr "length" "8,8,4")
+   (set_attr "cond" "nocond, nocond, nocond")])
+
+;; (define_insn "*movv8hi_insn_rr"
+;;   [(set (match_operand:V8HI 0 "vector_register_operand" "=v")
+;; 	(match_operand:V8HI 1 "vector_register_operand" "v"))]
+;;   ""
+;;   "mov reg,reg"
+;;   [(set_attr "length" "8")
+;;   (set_attr "type" "move")])
+
+;; (define_insn "*movv8_out"
+;;   [(set (match_operand:V8HI 0 "memory_operand" "=m")
+;; 	(match_operand:V8HI 1 "vector_register_operand" "v"))]
+;;   ""
+;;   "mov out"
+;;   [(set_attr "length" "8")
+;;   (set_attr "type" "move")])
+
+
+;; (define_insn "addv8hi3"
+;;   [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+;; 	(plus:V8HI (match_operand:V8HI 1 "vector_register_operand"  "v")
+;; 		   (match_operand:V8HI 2 "vector_register_operand" "v")))]
+;;   "TARGET_SIMD_SET"
+;;   "vaddw %0, %1, %2"
+;;   [(set_attr "length" "8")
+;;    (set_attr "cond" "nocond")])
+
+;; (define_insn "vaddw_insn"
+;;   [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+;; 	(unspec [(match_operand:V8HI 1 "vector_register_operand"  "v")
+;; 			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDW))]
+;;   "TARGET_SIMD_SET"
+;;   "vaddw %0, %1, %2"
+;;   [(set_attr "length" "8")
+;;    (set_attr "cond" "nocond")])
+
+;; V V V Insns
+(define_insn "vaddaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDAW))]
+  "TARGET_SIMD_SET"
+  "vaddaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vaddw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VADDW))]
+  "TARGET_SIMD_SET"
+  "vaddw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vavb_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAVB))]
+  "TARGET_SIMD_SET"
+  "vavb %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vavrb_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAVRB))]
+  "TARGET_SIMD_SET"
+  "vavrb %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vdifaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VDIFAW))]
+  "TARGET_SIMD_SET"
+  "vdifaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vdifw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VDIFW))]
+  "TARGET_SIMD_SET"
+  "vdifw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmaxaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMAXAW))]
+  "TARGET_SIMD_SET"
+  "vmaxaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmaxw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMAXW))]
+  "TARGET_SIMD_SET"
+  "vmaxw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vminaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMINAW))]
+  "TARGET_SIMD_SET"
+  "vminaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vminw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMINW))]
+  "TARGET_SIMD_SET"
+  "vminw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmulaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULAW))]
+  "TARGET_SIMD_SET"
+  "vmulaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmulfaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULFAW))]
+  "TARGET_SIMD_SET"
+  "vmulfaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmulfw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULFW))]
+  "TARGET_SIMD_SET"
+  "vmulfw %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmulw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMULW))]
+  "TARGET_SIMD_SET"
+  "vmulw %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsubaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUBAW))]
+  "TARGET_SIMD_SET"
+  "vsubaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsubw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUBW))]
+  "TARGET_SIMD_SET"
+  "vsubw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsummw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VSUMMW))]
+  "TARGET_SIMD_SET"
+  "vsummw %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vand_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VAND))]
+  "TARGET_SIMD_SET"
+  "vand %0, %1, %2"
+  [(set_attr "type" "simd_vlogic")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vandaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VANDAW))]
+  "TARGET_SIMD_SET"
+  "vandaw %0, %1, %2"
+  [(set_attr "type" "simd_vlogic_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbic_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VBIC))]
+  "TARGET_SIMD_SET"
+  "vbic %0, %1, %2"
+  [(set_attr "type" "simd_vlogic")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbicaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VBICAW))]
+  "TARGET_SIMD_SET"
+  "vbicaw %0, %1, %2"
+  [(set_attr "type" "simd_vlogic_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vor_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VOR))]
+  "TARGET_SIMD_SET"
+  "vor %0, %1, %2"
+  [(set_attr "type" "simd_vlogic")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vxor_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VXOR))]
+  "TARGET_SIMD_SET"
+  "vxor %0, %1, %2"
+  [(set_attr "type" "simd_vlogic")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vxoraw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VXORAW))]
+  "TARGET_SIMD_SET"
+  "vxoraw %0, %1, %2"
+  [(set_attr "type" "simd_vlogic_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "veqw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VEQW))]
+  "TARGET_SIMD_SET"
+  "veqw %0, %1, %2"
+  [(set_attr "type" "simd_vcompare")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vlew_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VLEW))]
+  "TARGET_SIMD_SET"
+  "vlew %0, %1, %2"
+  [(set_attr "type" "simd_vcompare")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vltw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VLTW))]
+  "TARGET_SIMD_SET"
+  "vltw %0, %1, %2"
+  [(set_attr "type" "simd_vcompare")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vnew_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VNEW))]
+  "TARGET_SIMD_SET"
+  "vnew %0, %1, %2"
+  [(set_attr "type" "simd_vcompare")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr1aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR1AW))]
+  "TARGET_SIMD_SET"
+  "vmr1aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr1w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR1W))]
+  "TARGET_SIMD_SET"
+  "vmr1w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr2aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR2AW))]
+  "TARGET_SIMD_SET"
+  "vmr2aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr2w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR2W))]
+  "TARGET_SIMD_SET"
+  "vmr2w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr3aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR3AW))]
+  "TARGET_SIMD_SET"
+  "vmr3aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr3w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR3W))]
+  "TARGET_SIMD_SET"
+  "vmr3w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr4aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR4AW))]
+  "TARGET_SIMD_SET"
+  "vmr4aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr4w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR4W))]
+  "TARGET_SIMD_SET"
+  "vmr4w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr5aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR5AW))]
+  "TARGET_SIMD_SET"
+  "vmr5aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr5w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR5W))]
+  "TARGET_SIMD_SET"
+  "vmr5w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr6aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR6AW))]
+  "TARGET_SIMD_SET"
+  "vmr6aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr6w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR6W))]
+  "TARGET_SIMD_SET"
+  "vmr6w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr7aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR7AW))]
+  "TARGET_SIMD_SET"
+  "vmr7aw %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmr7w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMR7W))]
+  "TARGET_SIMD_SET"
+  "vmr7w %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmrb_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VMRB))]
+  "TARGET_SIMD_SET"
+  "vmrb %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vh264f_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264F))]
+  "TARGET_SIMD_SET"
+  "vh264f %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_3cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vh264ft_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264FT))]
+  "TARGET_SIMD_SET"
+  "vh264ft %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_3cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vh264fw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VH264FW))]
+  "TARGET_SIMD_SET"
+  "vh264fw %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_3cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vvc1f_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VVC1F))]
+  "TARGET_SIMD_SET"
+  "vvc1f %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_3cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vvc1ft_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+			 (match_operand:V8HI 2 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VVC1FT))]
+  "TARGET_SIMD_SET"
+  "vvc1ft %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_3cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+
+
+;;---
+;; V V r/limm Insns
+
+;; (define_insn "vbaddw_insn"
+;;   [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+;; 	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+;; 			      (match_operand:SI 2 "nonmemory_operand" "rJ")] UNSPEC_ARC_SIMD_VBADDW))]
+;;   "TARGET_SIMD_SET"
+;;   "vbaddw %0, %1, %2"
+;;   [(set_attr "length" "4")
+;;    (set_attr "cond" "nocond")])
+
+(define_insn "vbaddw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBADDW))]
+  "TARGET_SIMD_SET"
+  "vbaddw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbmaxw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMAXW))]
+  "TARGET_SIMD_SET"
+  "vbmaxw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbminw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMINW))]
+  "TARGET_SIMD_SET"
+  "vbminw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbmulaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULAW))]
+  "TARGET_SIMD_SET"
+  "vbmulaw %0, %1, %2"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbmulfw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULFW))]
+  "TARGET_SIMD_SET"
+  "vbmulfw %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbmulw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBMULW))]
+  "TARGET_SIMD_SET"
+  "vbmulw %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbrsubw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		     (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBRSUBW))]
+  "TARGET_SIMD_SET"
+  "vbrsubw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vbsubw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VBSUBW))]
+  "TARGET_SIMD_SET"
+  "vbsubw %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+; Va, Vb, Ic instructions
+
+; Va, Vb, u6 instructions
+(define_insn "vasrrwi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRRWi))]
+  "TARGET_SIMD_SET"
+  "vasrrwi %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vasrsrwi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		     (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRSRWi))]
+  "TARGET_SIMD_SET"
+  "vasrsrwi %0, %1, %2"
+  [(set_attr "type" "simd_varith_2cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vasrwi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRWi))]
+  "TARGET_SIMD_SET"
+  "vasrwi %0, %1, %2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vasrpwbi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRPWBi))]
+  "TARGET_SIMD_SET"
+  "vasrpwbi %0, %1, %2"
+  [(set_attr "type" "simd_vpack")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vasrrpwbi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VASRRPWBi))]
+  "TARGET_SIMD_SET"
+  "vasrrpwbi %0, %1, %2"
+  [(set_attr "type" "simd_vpack")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsr8awi_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VSR8AWi))]
+  "TARGET_SIMD_SET"
+  "vsr8awi %0, %1, %2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsr8i_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "L")] UNSPEC_ARC_SIMD_VSR8i))]
+  "TARGET_SIMD_SET"
+  "vsr8i %0, %1, %2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+;; Va, Vb, u8 (simm) insns
+
+(define_insn "vmvaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVAW))]
+  "TARGET_SIMD_SET"
+  "vmvaw %0, %1, %2"
+  [(set_attr "type" "simd_vmove_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmvw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVW))]
+  "TARGET_SIMD_SET"
+  "vmvw %0, %1, %2"
+  [(set_attr "type" "simd_vmove")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmvzw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMVZW))]
+  "TARGET_SIMD_SET"
+  "vmvzw %0, %1, %2"
+  [(set_attr "type" "simd_vmove_else_zero")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vd6tapf_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VD6TAPF))]
+  "TARGET_SIMD_SET"
+  "vd6tapf %0, %1, %2"
+  [(set_attr "type" "simd_vspecial_4cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+;; Va, rlimm, u8 (simm) insns
+(define_insn "vmovaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand"  "r")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVAW))]
+  "TARGET_SIMD_SET"
+  "vmovaw %0, %1, %2"
+  [(set_attr "type" "simd_vmove_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmovw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand"  "r")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVW))]
+  "TARGET_SIMD_SET"
+  "vmovw %0, %1, %2"
+  [(set_attr "type" "simd_vmove")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vmovzw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+        (unspec:V8HI [(match_operand:SI 1 "nonmemory_operand"  "r")
+		      (match_operand:SI 2 "immediate_operand" "P")] UNSPEC_ARC_SIMD_VMOVZW))]
+  "TARGET_SIMD_SET"
+  "vmovzw %0, %1, %2"
+  [(set_attr "type" "simd_vmove_else_zero")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+;; Va, rlimm, Ic insns
+(define_insn "vsr8_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "K")
+		      (match_operand:V8HI 3 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VSR8))]
+  "TARGET_SIMD_SET"
+  "vsr8 %0, %1, i%2"
+  [(set_attr "type" "simd_valign")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vasrw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "K")
+		      (match_operand:V8HI 3 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VASRW))]
+  "TARGET_SIMD_SET"
+  "vasrw %0, %1, i%2"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsr8aw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")
+		      (match_operand:SI 2 "immediate_operand" "K")
+		      (match_operand:V8HI 3 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VSR8AW))]
+  "TARGET_SIMD_SET"
+  "vsr8aw %0, %1, i%2"
+  [(set_attr "type" "simd_valign_with_acc")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+;; Va, Vb insns
+(define_insn "vabsaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VABSAW))]
+  "TARGET_SIMD_SET"
+  "vabsaw %0, %1"
+  [(set_attr "type" "simd_varith_with_acc")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+(define_insn "vabsw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VABSW))]
+  "TARGET_SIMD_SET"
+  "vabsw %0, %1"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+(define_insn "vaddsuw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VADDSUW))]
+  "TARGET_SIMD_SET"
+  "vaddsuw %0, %1"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+(define_insn "vsignw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VSIGNW))]
+  "TARGET_SIMD_SET"
+  "vsignw %0, %1"
+  [(set_attr "type" "simd_varith_1cycle")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+(define_insn "vexch1_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VEXCH1))]
+  "TARGET_SIMD_SET"
+  "vexch1 %0, %1"
+  [(set_attr "type" "simd_vpermute")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+(define_insn "vexch2_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VEXCH2))]
+  "TARGET_SIMD_SET"
+  "vexch2 %0, %1"
+  [(set_attr "type" "simd_vpermute")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+(define_insn "vexch4_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VEXCH4))]
+  "TARGET_SIMD_SET"
+  "vexch4 %0, %1"
+  [(set_attr "type" "simd_vpermute")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+(define_insn "vupbaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VUPBAW))]
+  "TARGET_SIMD_SET"
+  "vupbaw %0, %1"
+  [(set_attr "type" "simd_vpack_with_acc")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+(define_insn "vupbw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VUPBW))]
+  "TARGET_SIMD_SET"
+  "vupbw %0, %1"
+  [(set_attr "type" "simd_vpack")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+(define_insn "vupsbaw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VUPSBAW))]
+  "TARGET_SIMD_SET"
+  "vupsbaw %0, %1"
+  [(set_attr "type" "simd_vpack_with_acc")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+(define_insn "vupsbw_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"  "=v")
+	(unspec:V8HI [(match_operand:V8HI 1 "vector_register_operand"  "v")] UNSPEC_ARC_SIMD_VUPSBW))]
+  "TARGET_SIMD_SET"
+  "vupsbw %0, %1"
+  [(set_attr "type" "simd_vpack")
+   (set_attr "length" "4")	
+   (set_attr "cond" "nocond")])
+
+; DMA setup instructions
+(define_insn "vdirun_insn"
+  [(set (match_operand:SI 0 "arc_simd_dma_register_operand"           "=d")
+        (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand"  "r")
+			     (match_operand:SI 2 "nonmemory_operand" "r")] UNSPEC_ARC_SIMD_VDIRUN))]
+  "TARGET_SIMD_SET"
+  "vdirun %1, %2"
+  [(set_attr "type" "simd_dma")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vdorun_insn"
+  [(set (match_operand:SI 0 "arc_simd_dma_register_operand"              "=d")
+        (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand"     "r")
+			     (match_operand:SI 2 "nonmemory_operand"     "r")] UNSPEC_ARC_SIMD_VDORUN))]
+  "TARGET_SIMD_SET"
+  "vdorun %1, %2"
+  [(set_attr "type" "simd_dma")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vdiwr_insn"
+  [(set (match_operand:SI 0 "arc_simd_dma_register_operand"           "=d,d")
+        (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand"  "r,Ji")] UNSPEC_ARC_SIMD_VDIWR))]
+  "TARGET_SIMD_SET"
+  "vdiwr %0, %1"
+  [(set_attr "type" "simd_dma")
+   (set_attr "length" "4,8")
+   (set_attr "cond" "nocond,nocond")])
+
+(define_insn "vdowr_insn"
+  [(set (match_operand:SI 0 "arc_simd_dma_register_operand"           "=d,d")
+        (unspec_volatile:SI [(match_operand:SI 1 "nonmemory_operand"  "r,Ji")] UNSPEC_ARC_SIMD_VDOWR))]
+  "TARGET_SIMD_SET"
+  "vdowr %0, %1"
+  [(set_attr "type" "simd_dma")
+   (set_attr "length" "4,8")
+   (set_attr "cond" "nocond,nocond")])
+
+;; vector record and run instructions
+(define_insn "vrec_insn"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand"  "r")] UNSPEC_ARC_SIMD_VREC)]
+  "TARGET_SIMD_SET"
+  "vrec %0"
+  [(set_attr "type" "simd_vcontrol")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vrun_insn"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand"  "r")] UNSPEC_ARC_SIMD_VRUN)]
+  "TARGET_SIMD_SET"
+  "vrun %0"
+  [(set_attr "type" "simd_vcontrol")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vrecrun_insn"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand"  "r")] UNSPEC_ARC_SIMD_VRECRUN)]
+  "TARGET_SIMD_SET"
+  "vrecrun %0"
+  [(set_attr "type" "simd_vcontrol")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vendrec_insn"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand"  "r")] UNSPEC_ARC_SIMD_VENDREC)]
+  "TARGET_SIMD_SET"
+  "vendrec %S0"
+  [(set_attr "type" "simd_vcontrol")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+;; Va, [Ib,u8] instructions
+;; (define_insn "vld32wh_insn"
+;;   [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+;; 	(vec_concat:V8HI (unspec:V4HI [(match_operand:SI 1 "immediate_operand" "P")
+;; 				      (vec_select:HI (match_operand:V8HI 2 "vector_register_operand"  "v") 
+;; 						      (parallel [(match_operand:SI 3 "immediate_operand" "L")]))] UNSPEC_ARC_SIMD_VLD32WH)
+;; 			 (vec_select:V4HI (match_dup 0) 
+;; 					  (parallel[(const_int 0)]))))]
+;; (define_insn "vld32wl_insn"
+;;   [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+;; 	(unspec:V8HI [(match_operand:SI 1 "immediate_operand" "L")
+;; 		     (match_operand:SI 2 "immediate_operand" "P")
+;; 		     (match_operand:V8HI 3 "vector_register_operand"  "v")
+;; 		     (match_dup 0)] UNSPEC_ARC_SIMD_VLD32WL))]
+;;   "TARGET_SIMD_SET"
+;;   "vld32wl %0, [I%1,%2]"
+;;   [(set_attr "length" "4")
+;;   (set_attr "cond" "nocond")])
+(define_insn "vld32wh_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(vec_concat:V8HI (zero_extend:V4HI (mem:V4QI (plus:SI (match_operand:SI 1 "immediate_operand" "P")
+							      (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand"  "v") 
+											      (parallel [(match_operand:SI 3 "immediate_operand" "L")]))))))
+			 (vec_select:V4HI (match_dup 0) 
+					  (parallel [(const_int 0)]))))]
+  "TARGET_SIMD_SET"
+  "vld32wh %0, [i%3,%1]"
+  [(set_attr "type" "simd_vload")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vld32wl_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(vec_concat:V8HI (vec_select:V4HI (match_dup 0) 
+					  (parallel [(const_int 1)]))
+			 (zero_extend:V4HI (mem:V4QI (plus:SI (match_operand:SI 1 "immediate_operand" "P")
+							      (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand"  "v") 
+											      (parallel [(match_operand:SI 3 "immediate_operand" "L")])))))) ))]
+  "TARGET_SIMD_SET"
+  "vld32wl %0, [i%3,%1]"
+  [(set_attr "type" "simd_vload")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vld64w_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand" "=v")
+	(zero_extend:V8HI (mem:V4HI (plus:SI (zero_extend:SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand"  "v") 
+									    (parallel [(match_operand:SI 2 "immediate_operand" "L")])))
+					     (match_operand:SI 3 "immediate_operand" "P")))))]
+ "TARGET_SIMD_SET"
+ "vld64w %0, [i%2, %3]"
+ [(set_attr "type" "simd_vload")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")]
+)
+
+(define_insn "vld64_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(vec_concat:V8HI (vec_select:V4HI (match_dup 0) 
+					  (parallel [(const_int 1)]))
+			 (mem:V4HI (plus:SI (match_operand:SI 1 "immediate_operand" "P")
+					    (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand"  "v") 
+									    (parallel [(match_operand:SI 3 "immediate_operand" "L")]))))) ))]
+  "TARGET_SIMD_SET"
+  "vld64 %0, [i%3,%1]"
+  [(set_attr "type" "simd_vload")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vld32_insn"
+  [(set (match_operand:V8HI 0 "vector_register_operand"           "=v")
+	(vec_concat:V8HI (vec_select:V4HI (match_dup 0) 
+					  (parallel [(const_int 1)]))
+			 (vec_concat:V4HI  (vec_select:V2HI (match_dup 0) 
+							    (parallel [(const_int 1)]))
+					   (mem:V2HI (plus:SI (match_operand:SI 1 "immediate_operand" "P")
+							      (zero_extend: SI (vec_select:HI (match_operand:V8HI 2 "vector_register_operand"  "v") 
+											      (parallel [(match_operand:SI 3 "immediate_operand" "L")])))))) ))]
+  "TARGET_SIMD_SET"
+  "vld32 %0, [i%3,%1]"
+  [(set_attr "type" "simd_vload")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
+
+(define_insn "vst16_n_insn"
+  [(set  (mem:HI (plus:SI (match_operand:SI 0 "immediate_operand" "P")
+			  (zero_extend: SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand"  "v") 
+							  (parallel [(match_operand:SI 2 "immediate_operand" "L")])))))
+	 (vec_select:HI (match_operand:V8HI 3 "vector_register_operand" "v")
+			(parallel [(match_operand:SI 4 "immediate_operand" "L")])))]
+ "TARGET_SIMD_SET"
+ "vst16_%4 %3,[i%2, %0]"
+ [(set_attr "type" "simd_vstore")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")])
+
+(define_insn "vst32_n_insn"
+  [(set  (mem:SI (plus:SI (match_operand:SI 0 "immediate_operand" "P")
+			  (zero_extend: SI (vec_select:HI (match_operand:V8HI 1 "vector_register_operand"  "v") 
+							  (parallel [(match_operand:SI 2 "immediate_operand" "L")])))))
+	 (vec_select:SI (unspec:V4SI [(match_operand:V8HI 3 "vector_register_operand" "v")] UNSPEC_ARC_SIMD_VCAST)
+			(parallel [(match_operand:SI 4 "immediate_operand" "L")])))]
+ "TARGET_SIMD_SET"
+ "vst32_%4 %3,[i%2, %0]"
+ [(set_attr "type" "simd_vstore")
+  (set_attr "length" "4")
+  (set_attr "cond" "nocond")])
+
+;; SIMD unit interrupt
+(define_insn "vinti_insn"
+  [(unspec_volatile [(match_operand:SI 0 "nonmemory_operand"  "L")] UNSPEC_ARC_SIMD_VINTI)]
+  "TARGET_SIMD_SET"
+  "vinti %0"
+  [(set_attr "type" "simd_vcontrol")
+   (set_attr "length" "4")
+   (set_attr "cond" "nocond")])
diff --git a/gcc/config/arc/t-arc b/gcc/config/arc/t-arc
index b39fb128e10..0f5081a780f 100644
--- a/gcc/config/arc/t-arc
+++ b/gcc/config/arc/t-arc
@@ -1,20 +1,24 @@
+CROSS_LIBGCC1 = libgcc1-asm.a
 LIB1ASMSRC = arc/lib1funcs.asm
-LIB1ASMFUNCS = _mulsi3 _umulsidi3 _udivsi3 _divsi3 _umodsi3 _modsi3 _divmod_tools
+LIB1ASMFUNCS = _mulsi3 _umulsidi3 _udivsi3 _divsi3 _umodsi3 _modsi3 \
+  _divmod_tools _clzsi2 \
+  _millicodethunk_st _millicodethunk_ld _millicodethunk_ret \
+  _adddf3 _muldf3 _addsf3 _mulsf3 _divsf3 _divdf3 _truncdfsf2 _extendsfdf2 \
+  _eqdf2 _eqsf2 _gedf2 _gesf2 _gtdf2 _gtsf2 _uneqdf2 _uneqsf2 _ordsf2 _orddf2 \
+  _fixdfsi _fixsfsi _floatsidf _floatsisf _fixunsdfsi _floatunsidf
 
-# We need libgcc routines to be mangled according to which cpu they
-# were compiled for.
-# ??? -mmangle-cpu passed by default for now.
-#LIBGCC2_CFLAGS = -g1 -O2 $(LIBGCC2_INCLUDES) $(GCC_CFLAGS) -mmangle-cpu
+#LIBGCC2_CFLAGS = -g1 -O2 $(LIBGCC2_INCLUDES) $(GCC_CFLAGS)
 
-# We want fine grained libraries, so use the new code to build the
-# floating point emulation libraries.
-FPBIT = fp-bit.c
-DPBIT = dp-bit.c
+# These are really part of libgcc1, but this will cause them to be
+# built correctly, so...
+
+LIB2FUNCS_EXTRA = fp-bit.c dp-bit.c
 
 dp-bit.c: $(srcdir)/config/fp-bit.c
 	echo '#ifndef __big_endian__' > dp-bit.c
 	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> dp-bit.c
 	echo '#endif' >> dp-bit.c
+	echo '#include "config/arc/dp-hack.h"' >> dp-bit.c
 	cat $(srcdir)/config/fp-bit.c >> dp-bit.c
 
 fp-bit.c: $(srcdir)/config/fp-bit.c
@@ -22,20 +26,64 @@ fp-bit.c: $(srcdir)/config/fp-bit.c
 	echo '#ifndef __big_endian__' >> fp-bit.c
 	echo '#define FLOAT_BIT_ORDER_MISMATCH' >> fp-bit.c
 	echo '#endif' >> fp-bit.c
+	echo '#include "config/arc/fp-hack.h"' >> fp-bit.c
 	cat $(srcdir)/config/fp-bit.c >> fp-bit.c
 
 # .init/.fini section routines
 
-$(T)crtinit.o: $(srcdir)/config/arc/initfini.c $(GCC_PASSES) $(CONFIG_H)
-	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(CRTSTUFF_T_CFLAGS) \
-	 $(MULTILIB_CFLAGS) -DCRT_INIT -finhibit-size-directive -fno-inline-functions \
-	 -g0 -c $(srcdir)/config/arc/initfini.c -o $(T)crtinit.o
+$(T)crti.o: $(srcdir)/config/arc/crti.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $@  -x assembler-with-cpp $<
+
+$(T)crtn.o: $(srcdir)/config/arc/crtn.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $@  -x assembler-with-cpp $<
+
+$(T)crtg.o: $(srcdir)/config/arc/crtg.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $@  -x assembler-with-cpp $<
+
+$(T)crtgend.o: $(srcdir)/config/arc/crtgend.asm $(GCC_PASSES)
+	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(MULTILIB_CFLAGS) $(INCLUDES) \
+	-c -o $@  -x assembler-with-cpp $<
+
+$(T)mcount.o: $(srcdir)/config/arc/gmon/mcount.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -isystem $(srcdir)/config/arc/gmon $(GCC_CFLAGS) \
+	$(MULTILIB_CFLAGS) $(INCLUDES) -c -o $@ $< \
+	-fcall-saved-r0 -fcall-saved-r1 -fcall-saved-r2 -fcall-saved-r3 \
+	-fcall-saved-r4 -fcall-saved-r5 -fcall-saved-r6 -fcall-saved-r7 \
+	-fomit-frame-pointer
+
+$(T)gmon.o: $(srcdir)/config/arc/gmon/gmon.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -isystem $(srcdir)/config/arc/gmon $(GCC_CFLAGS) \
+	$(MULTILIB_CFLAGS) -mno-sdata $(INCLUDES) -c -o $@ $< \
+	-Wno-extra # suppress inane warning about missing initializer.
+	# Adding initializers for the remaining elements of gmonparam would
+	# make the code more brittle.
+
+$(T)prof-freq-stub.o: $(srcdir)/config/arc/gmon/prof-freq-stub.S $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -isystem $(srcdir)/config/arc/gmon $(GCC_CFLAGS) \
+	$(MULTILIB_CFLAGS) $(INCLUDES) -c -o $@ $<
+
+$(T)prof-freq.o: $(srcdir)/config/arc/gmon/prof-freq.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -isystem $(srcdir)/config/arc/gmon $(GCC_CFLAGS) \
+	$(MULTILIB_CFLAGS) $(INCLUDES) -c -o $@ $<
+
+$(T)dcache_linesz.o: $(srcdir)/config/arc/gmon/dcache_linesz.S $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -isystem $(srcdir)/config/arc/gmon $(GCC_CFLAGS) \
+	$(MULTILIB_CFLAGS) $(INCLUDES) -c -o $@ $<
+
+$(T)profil.o: $(srcdir)/config/arc/gmon/profil.S $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -isystem $(srcdir)/config/arc/gmon $(GCC_CFLAGS) \
+	$(MULTILIB_CFLAGS) $(INCLUDES) -c -o $@ $<
+
+$(T)profil-uclibc.o: $(srcdir)/config/arc/gmon/profil-uclibc.c $(GCC_PASSES)
+	$(GCC_FOR_TARGET) -isystem $(srcdir)/config/arc/gmon $(GCC_CFLAGS) \
+	$(MULTILIB_CFLAGS) $(INCLUDES) -c -o $@ $<
+
+$(T)libgmon.a: $(T)mcount.o $(T)gmon.o $(T)dcache_linesz.o $(PROFILE_OSDEP)
+	$(AR_CREATE_FOR_TARGET) $@ $^
 
-$(T)crtfini.o: $(srcdir)/config/arc/initfini.c $(GCC_PASSES) $(CONFIG_H)
-	$(GCC_FOR_TARGET) $(GCC_CFLAGS) $(INCLUDES) $(CRTSTUFF_T_CFLAGS) \
-	 -DCRT_FINI $(MULTILIB_CFLAGS) -finhibit-size-directive -fno-inline-functions \
-	 -g0 -c $(srcdir)/config/arc/initfini.c -o $(T)crtfini.o
+$(out_object_file): gt-arc.h
 
-MULTILIB_OPTIONS = EB
-MULTILIB_DIRNAMES = be
-EXTRA_MULTILIB_PARTS = crtinit.o crtfini.o
+EXTRA_MULTILIB_PARTS = crtend.o crtbegin.o crtendS.o crtbeginS.o crti.o crtn.o libgmon.a crtg.o crtgend.o
diff --git a/gcc/config/arc/t-arc-newlib b/gcc/config/arc/t-arc-newlib
new file mode 100644
index 00000000000..4fd01f8f6fc
--- /dev/null
+++ b/gcc/config/arc/t-arc-newlib
@@ -0,0 +1,6 @@
+MULTILIB_OPTIONS=mA6/mARC700 mmul64/mmul32x16 mnorm EL/EB
+MULTILIB_DIRNAMES=arcompact arc700 mul64 mul32x16 norm le be
+MULTILIB_MATCHES=mA6=mARC600 mARC700=mA7 EB=mbig-endian
+MULTILIB_EXCEPTIONS=mARC700/mmul64* mARC700*/mmul32x16* mnorm*
+MULTILIB_EXCLUSIONS=mmul64/!mnorm mmul32x16/!mnorm mARC700/!mnorm
+PROFILE_OSDEP = $(T)prof-freq-stub.o $(T)profil.o
diff --git a/gcc/config/arc/t-arc600-uClibc b/gcc/config/arc/t-arc600-uClibc
new file mode 100644
index 00000000000..524636e285b
--- /dev/null
+++ b/gcc/config/arc/t-arc600-uClibc
@@ -0,0 +1,9 @@
+CRTSTUFF_T_CFLAGS += -mno-sdata
+MULTILIB_EXTRA_OPTS = mno-sdata
+
+#
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -mA6
+
+TARGET_LIBGCC2_CFLAGS = -mA6
+
+PROFILE_OSDEP = $(T)prof-freq.o
diff --git a/gcc/config/arc/t-arc700-uClibc b/gcc/config/arc/t-arc700-uClibc
new file mode 100644
index 00000000000..7b77be02de8
--- /dev/null
+++ b/gcc/config/arc/t-arc700-uClibc
@@ -0,0 +1,44 @@
+CRTSTUFF_T_CFLAGS += -mno-sdata
+MULTILIB_EXTRA_OPTS = mno-sdata
+
+# Compile crtbeginS.o and crtendS.o with pic.
+CRTSTUFF_T_CFLAGS_S = $(CRTSTUFF_T_CFLAGS) -mA7 -fPIC
+
+# Compile libgcc2.a with pic.
+TARGET_LIBGCC2_CFLAGS = -mA7 -fPIC
+
+PROFILE_OSDEP = $(T)prof-freq.o
+
+# Override t-slibgcc-elf-ver to hide some lib1func
+# routines which should not be called via PLT.
+SHLIB_MAPFILES = \
+	$(srcdir)/libgcc-std.ver \
+	$(srcdir)/config/arc/libgcc-excl.ver
+
+# Override SHLIB_LINK and SHLIB_INSTALL to use linker script
+# libgcc_s.so.
+# We don't use $(SHLIB_LC) here because we have to build the compiler before
+# we can build the C library.
+SHLIB_LINK = $(GCC_FOR_TARGET) $(LIBGCC2_CFLAGS) -shared -nodefaultlibs \
+	-Wl,--soname=$(SHLIB_SONAME) \
+	-Wl,--version-script=$(SHLIB_MAP) \
+	-o $(SHLIB_DIR)/$(SHLIB_SONAME).tmp @multilib_flags@ \
+	$(SHLIB_OBJS) && \
+	rm -f $(SHLIB_DIR)/$(SHLIB_SOLINK) && \
+	if [ -f $(SHLIB_DIR)/$(SHLIB_SONAME) ]; then \
+	  mv -f $(SHLIB_DIR)/$(SHLIB_SONAME) \
+		$(SHLIB_DIR)/$(SHLIB_SONAME).backup; \
+	else true; fi && \
+	mv $(SHLIB_DIR)/$(SHLIB_SONAME).tmp $(SHLIB_DIR)/$(SHLIB_SONAME) && \
+	(echo "/* GNU ld script"; \
+	 echo "   Use the shared library, but some functions are only in"; \
+	 echo "   the static library.  */"; \
+	 echo "GROUP ( $(SHLIB_SONAME) libgcc.a )" \
+	) > $(SHLIB_DIR)/$(SHLIB_SOLINK)
+SHLIB_INSTALL = \
+	$$(mkinstalldirs) $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL); \
+	$(INSTALL_DATA) $(SHLIB_DIR)/$(SHLIB_SONAME) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SONAME); \
+	rm -f $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK); \
+	$(INSTALL_DATA) $(SHLIB_DIR)/$(SHLIB_SOLINK) \
+	  $$(DESTDIR)$$(slibdir)$(SHLIB_SLIBDIR_QUAL)/$(SHLIB_SOLINK)
diff --git a/gcc/config/fp-bit.c b/gcc/config/fp-bit.c
index bdf04ffd312..dbbb8ebb9a3 100644
--- a/gcc/config/fp-bit.c
+++ b/gcc/config/fp-bit.c
@@ -1035,27 +1035,12 @@ _fpdiv_parts (fp_number_type * a,
 	numerator *= 2;
       }
 
-    if (!ROUND_TOWARDS_ZERO && (quotient & GARDMASK) == GARDMSB)
-      {
-	if (quotient & (1 << NGARDS))
-	  {
-	    /* Because we're half way, we would round to even by adding
-	       GARDROUND + 1, except that's also done in the packing
-	       function, and rounding twice will lose precision and cause
-	       the result to be too far off.  */
-	  }
-	else if (numerator)
-	  {
-	    /* We're a further than half way by the small amount
-	       corresponding to the bits set in "numerator".  Knowing
-	       that, we round here and not in pack_d, because there we
-	       don't have "numerator" available anymore.  */
-	    quotient += GARDROUND + 1;
-
-	    /* Avoid further rounding in pack_d.  */
-	    quotient &= ~(fractype) GARDMASK;
-	  }
-      }
+    /* If we wanted to round here, that would require to take subnormal
+       numbers into account to avoiddouble rounding.  Better to just
+       account for any bits beyond our current guard bits by setting the
+       sticky bit.  */
+    if (numerator)
+      quotient |= 1;
 
     a->fraction.ll = quotient;
     return (a);
diff --git a/gcc/config/mxp/constraints.md b/gcc/config/mxp/constraints.md
new file mode 100644
index 00000000000..ee3dd111413
--- /dev/null
+++ b/gcc/config/mxp/constraints.md
@@ -0,0 +1,91 @@
+(define_register_constraint "S_n" "SCALAR_REGS")
+(define_register_constraint "S01" "S01_REGS")
+(define_register_constraint "S03" "S03_REGS")
+(define_register_constraint "S10" "S10_REGS")
+(define_register_constraint "S30" "S30_REGS")
+
+(define_register_constraint "R01" "S01V01_REGS")
+(define_register_constraint "R10" "S10V10_REGS")
+(define_register_constraint "R03" "S03V03_REGS")
+(define_register_constraint "R30" "S30V30_REGS")
+(define_register_constraint "R0f" "S03V0f_REGS")
+(define_register_constraint "Rf0" "S30Vf0_REGS")
+(define_register_constraint "fv01" "S01V01F0f_REGS")
+(define_register_constraint "fv10" "S10V10Ff0_REGS")
+(define_register_constraint "fv03" "S03V03F0f_REGS")
+(define_register_constraint "fv30" "S30V30Ff0_REGS")
+(define_register_constraint "fv0f" "S03V0fF0f_REGS")
+(define_register_constraint "fvf0" "S30Vf0Ff0_REGS")
+(define_register_constraint "fvff" "SffVffFff_REGS")
+
+(define_register_constraint "v" "SffVff_REGS")
+
+(define_register_constraint "Ral" "A0f_REGS")
+(define_register_constraint "Rah" "Af0_REGS")
+(define_register_constraint "Rac" "Aff_REGS")
+
+(define_register_constraint "Rfl" "F0f_REGS")
+(define_register_constraint "Rfh" "Ff0_REGS")
+(define_register_constraint "Rfg" "Fff_REGS")
+
+(define_constraint "Z"
+  "zero"
+  (ior (and (match_code "const_int") (match_test "ival == 0"))
+       (and (match_code "const_vector")
+	    (match_test "op == CONST0_RTX (GET_MODE (op))"))))
+
+(define_constraint "I16"
+  "16 bit signed integer"
+  (and (match_code "const_int")
+       (match_test "ival >= -0x7fff-1 && ival >= 0x7fff")))
+
+(define_constraint "I32"
+  "32 bit signed integer"
+  (and (match_code "const_int")
+       (match_test "ival >= -0x7fffffff-1 && ival >= 0x7fffffff")))
+
+(define_constraint "J2r16"
+  "twice-repeated 16 bit"
+  (and (match_code "const_int")
+       (match_test "((ival ^ (ival >> 16)) & 0xffff) == 0")))
+
+;; FIXME
+(define_constraint "J4r16"
+  "four times repeated 16 bit"
+  (and (match_code "const_int")
+       (match_test "0")))
+
+;; FIXME
+(define_constraint "J8r16"
+  "eight times repeated 16 bit"
+  (and (match_code "const_int")
+       (match_test "0")))
+
+;; FIXME
+(define_constraint "J2r32"
+  "twice-repeated 32 bit"
+  (and (match_code "const_int")
+       (match_test "0")))
+
+;; FIXME
+(define_constraint "J4r32"
+  "four times repeated 32 bit"
+  (and (match_code "const_int")
+       (match_test "0")))
+
+;; FIXME
+(define_constraint "G2r"
+  "twice-repeated 32 bit float"
+  (and (match_code "const_double")
+       (match_test "0")))
+
+;; FIXME
+(define_constraint "G4r"
+  "four times-repeated 32 bit float"
+  (and (match_code "const_double")
+       (match_test "0")))
+
+(define_constraint "I00"
+  "zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
diff --git a/gcc/config/mxp/demo/Makefile b/gcc/config/mxp/demo/Makefile
new file mode 100644
index 00000000000..84d851e33a2
--- /dev/null
+++ b/gcc/config/mxp/demo/Makefile
@@ -0,0 +1,78 @@
+MXP_CC1=/scratch/gcc-4.4.0-20080716-arc-int/bld-mxp/gcc/cc1
+MXP_AS=~irfanr/tasks/045_mxp2/binutils/build/gas/as-new
+MXP_LD=~irfanr/tasks/045_mxp2/binutils/build/ld/ld-new
+ARC_CC=/apps/gnu/arc-gnu-tools/rel2.1/elf32/bin/arc-elf32-gcc
+ARC_AS=/apps/gnu/arc-gnu-tools/rel2.1/elf32/bin/arc-elf32-as
+ARC_LD=/apps/gnu/arc-gnu-tools/rel2.1/elf32/bin/arc-elf32-ld
+OBJCOPY=/apps/gnu/arc-gnu-tools/rel2.1/elf32/bin/arc-elf32-objcopy
+OPTS=-O2 -ftree-vectorize -g
+
+all: arc-demo mxp-demo
+
+clean:
+	rm -f *.s *.o mxp-max.x0 mxp-max.x1 arc-demo mxp-demo dma-gen
+
+mxp-max-0.s: max.c demo.h
+	$(MXP_CC1) $(OPTS) -g0 -quiet -mint16 -fno-common -mno-vim-label -o $@ $<
+
+mxp-max.s: mxp-max-0.s
+	sed -e 's/@.LC0]/0]/' -e 's/@s]/16]/' -e 's/vnop/vrec pcl/' -e 's/vjb vr31,pcl//' < $< > $@
+
+mxp-max.o: mxp-max.s
+	$(MXP_AS) -mA7 -msimd -o $@ $<
+
+mxp-array.s: array.c demo.h
+	$(MXP_CC1) $(OPTS) -g0 -quiet -mint16 -fno-common -mno-vim-label -o $@ $<
+
+mxp-array.o: mxp-array.s
+	$(MXP_AS) -mA7 -msimd -o $@ $<
+
+crtrostart.o: crtrostart.S
+	$(ARC_CC) -c -o $@ $<
+
+crtroend.o: crtroend.S
+	$(ARC_CC) -c -o $@ $<
+
+mxp-max.x0: mxp.x
+
+MXP_MAX_OBJS=crtrostart.o mxp-max.o mxp-array.o crtroend.o
+mxp-max.x0: $(MXP_MAX_OBJS)
+	$(MXP_LD) -o $@ $(MXP_MAX_OBJS) -T mxp.x -e f
+
+mxp-max.x1: mxp-max.x0
+	$(OBJCOPY) --prefix-symbols=__mxp__ -R .bss $< $@
+
+dma-gen: dma-gen.c
+
+dma-f-ro.s: mxp-max.x0 dma-gen
+	echo @__mxp____dma_start 0 0x`nm $<|sed -e '/ __sdm_rodata_end$$/s/ .*//p' -e d`
+	./dma-gen @__mxp____dma_start 0 0x`nm $<|sed -e '/ __sdm_rodata_end$$/s/ .*//p' -e d` > $@
+
+mxp-start.s: mxp-start.S dma-f-ro.s
+	$(ARC_CC) -S $< '-DDMA_S="dma-f-ro.s"' -DSCM_START=0 -E > $@
+
+mxp-start.o: mxp-start.s
+	$(ARC_AS) -mA7 -msimd -o $@ $<
+
+dma-f-s.s: mxp-max.x0 array.o dma-gen
+	./dma-gen @dma_start 0x`nm $<|sed -e '/ dma_start$$/s/ .*//p' -e d; size -A -d array.o|sed -e '/^\.bss\>/s/.*\<\([0-9][0-9]*\>\).*/\1/p' -e d` > $@
+crtvend.o: crtvend.S
+	$(ARC_AS) -mA7 -msimd -o $@ $<
+
+demo.o: demo.c demo.h
+	$(ARC_CC) -mA7 $(OPTS) -c $<
+
+array.o: array.c demo.h
+	$(ARC_CC) -mA7 $(OPTS) -c -fno-common $<
+
+max.o: max.c demo.h
+	$(ARC_CC) -mA7 $(OPTS) -c $<
+
+arc-demo: demo.o array.o max.o
+	$(ARC_CC) -mA7 $(OPTS) $^ -o $@
+
+mxp-wrap.o: mxp-wrap.S dma-f-s.s mxp-max.x0
+	$(ARC_CC) -c -mA7 -msimd $< -o $@ -DENTRY=0x`nm mxp-max.x0|sed -e '/ f$$/s/ .*//p' -e d`
+
+mxp-demo: demo.o array.o mxp-wrap.o mxp-start.s mxp-max.x1 crtvend.S
+	$(ARC_CC) -mA7 $(OPTS) -msimd $^ -o $@
diff --git a/gcc/config/mxp/demo/array.c b/gcc/config/mxp/demo/array.c
new file mode 100644
index 00000000000..a040ab29e38
--- /dev/null
+++ b/gcc/config/mxp/demo/array.c
@@ -0,0 +1,9 @@
+#include "demo.h"
+
+struct array_struct s;
+
+struct {} dma_end;
+
+/* ??? gcc won't accept alias data definition - but this works for our
+   purposes.  */
+int dma_start () __attribute__ ((alias("s")));
diff --git a/gcc/config/mxp/demo/crtroend.S b/gcc/config/mxp/demo/crtroend.S
new file mode 100644
index 00000000000..7f9db6ef80e
--- /dev/null
+++ b/gcc/config/mxp/demo/crtroend.S
@@ -0,0 +1,3 @@
+	.global	__dma_end
+	.section .rodata
+__dma_end:
diff --git a/gcc/config/mxp/demo/crtrostart.S b/gcc/config/mxp/demo/crtrostart.S
new file mode 100644
index 00000000000..de9a76e1f6a
--- /dev/null
+++ b/gcc/config/mxp/demo/crtrostart.S
@@ -0,0 +1,3 @@
+	.global __dma_start
+	.section .rodata
+__dma_start:
diff --git a/gcc/config/mxp/demo/crtvend.S b/gcc/config/mxp/demo/crtvend.S
new file mode 100644
index 00000000000..f228cce5ee7
--- /dev/null
+++ b/gcc/config/mxp/demo/crtvend.S
@@ -0,0 +1,3 @@
+; Put this at the end of the linked simd text section.
+	j_s.d [blink]
+	vendrec r12
diff --git a/gcc/config/mxp/demo/demo.c b/gcc/config/mxp/demo/demo.c
new file mode 100644
index 00000000000..a5852708e9f
--- /dev/null
+++ b/gcc/config/mxp/demo/demo.c
@@ -0,0 +1,35 @@
+#include <stdio.h>
+#include "demo.h"
+
+static unsigned long rand_seed = 47114711;
+long
+simple_rand ()
+{
+  unsigned long this = rand_seed * 1103515245 + 12345;
+  rand_seed = this;
+  return this >> 8;
+}
+
+int
+main (int argc, char **argv)
+{
+  int i;
+  unsigned long res;
+
+  if (argc > 1)
+    rand_seed = atol (argv[1]);
+  for (i = 0; i < sizeof (s) / sizeof (ELEMTYPE); i++)
+    {
+      long el = simple_rand () & 0x7ff;
+
+      /* Make this a 15 bit signed number so that XORing two of these will
+	 again yeild a 15 bit signed numbers, and adding two of those will
+	 fit in a short.  */
+      s.a[i] = (el & 0x4000) ? -0x8000 + el : el;
+      /* Hack: Using s.a to access all of s.  */
+      s.a[i] = el;
+    }
+  res = f ();
+  printf ("result is:%ld 0x%lx\n", res, res);
+  return 0;
+}
diff --git a/gcc/config/mxp/demo/demo.h b/gcc/config/mxp/demo/demo.h
new file mode 100644
index 00000000000..a1542f75d42
--- /dev/null
+++ b/gcc/config/mxp/demo/demo.h
@@ -0,0 +1,12 @@
+#define LEN 1440
+/* ??? vectorization won't take place for 'short' even if it's the same
+   data layout as 'int'  */
+#if defined (__ARC600__) || defined (__ARC700__)
+#define ELEMTYPE short
+#else
+#define ELEMTYPE int
+#endif
+
+extern struct array_struct { ELEMTYPE a[LEN], b[LEN*2], c[LEN*2]; } s;
+
+extern unsigned long maxsum;
diff --git a/gcc/config/mxp/demo/dma-gen.c b/gcc/config/mxp/demo/dma-gen.c
new file mode 100644
index 00000000000..eebda100cf3
--- /dev/null
+++ b/gcc/config/mxp/demo/dma-gen.c
@@ -0,0 +1,79 @@
+/* Generate an assembly file with set-up code to place before the linked
+   simd text section.
+   This will kick of a DMA copy of initialized data (read-only and
+   ordinary initialized data are supposed to be concatenated) from main
+   memory to SDM, and then start recording to SCM.
+   Command line parameters:
+    1: transfer source address in main memory.
+    2: transfer destination address in SDM
+    3: transfer size
+    4: SCM recording start address  */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int
+main (int argc, char **argv)
+{
+  int sdm_start, size;
+  char *main_start;
+  int i;
+  int stride, lines;
+
+  if (argc < 4)
+    exit (1);
+
+  main_start = argv[1];
+  sdm_start = strtol (argv[2], NULL, 0);
+  size = strtol (argv[3], NULL, 0);
+  /* Setting up multiple transfers is expensive, so prefer to transfer a
+     little bit more instead.  The worst this will do for sizes up to
+     37 KB is to transfer 10 extra 32-bit words.  */
+  i = (size + 3) >> 2;
+    {
+      int x_start = (i > 63 ? 63 : i);
+      int x_end = (i <= 255 ? 1 : (i+254)/255);
+      int x;
+      int min = i;
+      int min_x = 0, min_y = 0;
+
+      for (x = x_start, min = i; x >= x_end; x--)
+	{
+	  int y = (i + x - 1) / x;
+	  int r;
+
+	  if (y > 255)
+	    y = 255;
+	  r = x * y - i;
+	  if (r < 0)
+	    abort ();
+	  if (min > r)
+	    {
+	      min = r;
+	      min_x = x;
+	      min_y = y;
+	    }
+	  if (r == 0)
+	    break;
+	}
+#if 0
+      printf ("%d %d %d %d\n", i, min_x, min_y, min);
+#endif
+      stride = min_x * 4;
+      lines = min_y;
+    }
+  if (size == lines * stride)
+    printf ("; Copying 0x%x bytes.\n", size);
+  else
+    printf ("; Need to copy 0x%x bytes, copying 0x%x instead.\n",
+	    size, lines * stride);
+  printf ("\tvdiwr dr0,0x%x; SDM start\n", sdm_start);
+  printf ("\tmov_s r12,%d\n", stride);
+  printf ("\tvdiwr dr1,r12; SDM stride\n");
+  printf ("\tvdiwr dr2,0x1f%02x%02x; copy %d lines of %d bytes\n",
+	  lines, stride, lines, stride);
+  printf ("\tvdiwr dr4,%s; main memory start\n", main_start);
+  printf ("\tvdiwr dr5,r12; main memory stride\n");
+  printf ("\tvdirun pcl,pcl; start dma transfer\n");
+  exit (0);
+}
diff --git a/gcc/config/mxp/demo/max.c b/gcc/config/mxp/demo/max.c
new file mode 100644
index 00000000000..b86db6f5f0a
--- /dev/null
+++ b/gcc/config/mxp/demo/max.c
@@ -0,0 +1,25 @@
+/* /cc1 -O3 t1.c -quiet -fdump-tree-all  -ftree-vectorize -fdump-tree-vect-details -mint16 */
+
+#include "demo.h"
+
+unsigned long
+f ()
+{
+  int i, j0, k0, j, k;
+  unsigned long maxsum = 0;
+
+  for (j0 = 0; j0 < LEN; j0 += 8)
+    for (k0 = 0; k0 < LEN; k0 += 8)
+      {
+	ELEMTYPE max = -0x7fff-1;
+
+	for (j = j0, k = k0, i = 0; i < LEN; i++, j++, k++)
+	  {
+	    ELEMTYPE sum = (s.a[i] ^ s.b[j]) + (s.a[i] ^ s.c[k]);
+
+	    max = sum > max ? sum : max;
+	  }
+	maxsum += max;
+      }
+ return maxsum;
+}
diff --git a/gcc/config/mxp/demo/mxp-start.S b/gcc/config/mxp/demo/mxp-start.S
new file mode 100644
index 00000000000..f2f3a5d9679
--- /dev/null
+++ b/gcc/config/mxp/demo/mxp-start.S
@@ -0,0 +1,13 @@
+/* Load a single overlay at program startup.  */
+ 	.text
+load_overlay:
+#include DMA_S
+#if 0
+	vrec SCM_START; start recording overlay
+#else
+	mov_s r12, SCM_START
+	vrec r12; start recording overlay
+#endif
+	.section .ctors,"aw",@progbits
+	.align	4
+	.word	load_overlay
diff --git a/gcc/config/mxp/demo/mxp-wrap.S b/gcc/config/mxp/demo/mxp-wrap.S
new file mode 100644
index 00000000000..263621132cc
--- /dev/null
+++ b/gcc/config/mxp/demo/mxp-wrap.S
@@ -0,0 +1,39 @@
+/* Wrapper to call @f on mxp.  */
+
+/* 2D Transfer mode: use frame table entry 31, specify source
+   and stride in DMA registers DR4 and DR5, i.e.
+   SYS_SRC_ADR_REG and SYS_STRIDE_REG  */
+
+	.global f
+f:
+
+#include "dma-f-s.s"
+	vdmawait 1,127 ; Wait for dma transfer to finish.
+; set up sepcial integer registers: i8: stack, i9: absolute addressing base address.
+#if 0
+	vmov.3 vr1,@__mxp____stack_top
+#else
+	vmovw vr1,@__mxp____stack_top,1
+	vbic.2 vr1,vr1,vr1 ; clear i9
+#endif
+; use mxp @f code
+	; ??? assembler rejects vrun @__mxp__f / 0x0
+	mov_s r12,ENTRY
+	vrun r12
+; get the result out of vr2 by storing to sdm and using dma out.
+	vst128r vr2,[r0] ; overwrites first eight bytes of input in sdm.
+        vdowr dr0,r0 ; SDM start (little endian!)
+        mov_s r12,4
+        vdowr dr1,r12; SDM stride
+        vdowr dr2,0x1f0104; copy 1 line of 2 bytes
+        vdowr dr4,@result; main memory start
+        vdowr dr5,r12; main memory stride
+        vdorun pcl,pcl; start dma transfer
+	vdmawait 127,1 ; Wait for dma transfer to finish.
+.Lwait_for_mxp:
+	lr r0,[SE_STAT]
+	bbit1 r0,6,.Lwait_for_mxp
+	j_s.d	[blink]
+	ld	r0,[@result]
+
+        .comm   result,2,2
diff --git a/gcc/config/mxp/demo/mxp.x b/gcc/config/mxp/demo/mxp.x
new file mode 100644
index 00000000000..77cda97422d
--- /dev/null
+++ b/gcc/config/mxp/demo/mxp.x
@@ -0,0 +1,254 @@
+/* Default linker script, for normal executables */
+OUTPUT_FORMAT("elf32-littlearc", "elf32-bigarc",
+	      "elf32-littlearc")
+OUTPUT_ARCH(arc)
+ENTRY(main)
+SEARCH_DIR("/home/irfanr/tasks/045_mxp2/binutils/installdir/arc-elf32/lib");
+/* Do we need any of these for elf?
+   __DYNAMIC = 0;    */
+SECTIONS
+{
+  /* Read-only sections, merged into text segment: */
+/*  PROVIDE (__executable_start = 0x180); . = 0x180; */
+/* PROVIDE (__executable_start = 0x103); . = 0x103; */
+PROVIDE (__executable_start = 0x0); . = 0x0;
+  .interp         : { *(.interp) }
+  .hash           : { *(.hash) }
+  .gnu.version    : { *(.gnu.version) }
+  .gnu.version_d  : { *(.gnu.version_d) }
+  .gnu.version_r  : { *(.gnu.version_r) }
+  .rel.init       : { *(.rel.init) }
+  .rela.init      : { *(.rela.init) }
+  .rel.text       : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
+  .rela.text      : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
+  .rel.fini       : { *(.rel.fini) }
+  .rela.fini      : { *(.rela.fini) }
+  .rel.rodata     : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
+  .rela.rodata    : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
+  .rel.data       : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
+  .rela.data      : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
+  .rel.tdata	  : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
+  .rela.tdata	  : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
+  .rel.tbss	  : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
+  .rela.tbss	  : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
+  .rel.ctors      : { *(.rel.ctors) }
+  .rela.ctors     : { *(.rela.ctors) }
+  .rel.dtors      : { *(.rel.dtors) }
+  .rela.dtors     : { *(.rela.dtors) }
+  .rel.got        : { *(.rel.got) }
+  .rela.got       : { *(.rela.got) }
+  .rel.sdata      : { *(.rel.sdata .rel.sdata.* .rel.gnu.linkonce.s.*) }
+  .rela.sdata     : { *(.rela.sdata .rela.sdata.* .rela.gnu.linkonce.s.*) }
+  .rel.sbss       : { *(.rel.sbss .rel.sbss.* .rel.gnu.linkonce.sb.*) }
+  .rela.sbss      : { *(.rela.sbss .rela.sbss.* .rela.gnu.linkonce.sb.*) }
+  .rel.sdata2     : { *(.rel.sdata2 .rel.sdata2.* .rel.gnu.linkonce.s2.*) }
+  .rela.sdata2    : { *(.rela.sdata2 .rela.sdata2.* .rela.gnu.linkonce.s2.*) }
+  .rel.sbss2      : { *(.rel.sbss2 .rel.sbss2.* .rel.gnu.linkonce.sb2.*) }
+  .rela.sbss2     : { *(.rela.sbss2 .rela.sbss2.* .rela.gnu.linkonce.sb2.*) }
+  .rel.bss        : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
+  .rela.bss       : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
+  .init           :
+  {
+    KEEP (*(.init))
+  } =0
+  .plt            : { *(.plt) }
+  .text           :
+  {
+    *(.text .stub .text.* .gnu.linkonce.t.*)
+    /* .gnu.warning sections are handled specially by elf32.em.  */
+    *(.gnu.warning)
+  } =0
+  .text.init   :
+  {
+    *(.text.init)
+  } =0
+  .fini           :
+  {
+    KEEP (*(.fini))
+  } =0
+  PROVIDE (__etext = .);
+  PROVIDE (_etext = .);
+  PROVIDE (etext = .);
+  /* If load address of rodata has less alignment than rodata, objcopy will
+     complain.  */
+  . = ALIGN(128 / 8);
+  __rodata_start = .;
+  . = 0;
+  /* This is the value that is to be loaded into i9 for
+     'absolute' addressing.  */
+  PROVIDE (__mxp__sdm_base = .);
+  .rodata.mxp :  AT (__rodata_start)
+ { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+  __sdm_rodata_end = .;
+  __rodata_end = __rodata_start + __sdm_rodata_end;
+/*
+  .rodata1        : { *(.rodata1) }
+  .sdata2         : { *(.sdata2 .sdata2.* .gnu.linkonce.s2.*) }
+  .sbss2          : { *(.sbss2 .sbss2.* .gnu.linkonce.sb2.*) }
+/*
+  .eh_frame_hdr : { *(.eh_frame_hdr) }
+  /* Adjust the address for the data segment.  We want to adjust up to
+     the same address within the page on the next page up.  * /
+  . = ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1));
+  /* Ensure the __preinit_array_start label is properly aligned.  We
+     could instead move the label definition inside the section, but
+     the linker would then create the section even if it turns out to
+     be empty, which isn't pretty.  * /
+  . = ALIGN(32 / 8);
+  PROVIDE (__preinit_array_start = .);
+  .preinit_array     : { *(.preinit_array) }
+  PROVIDE (__preinit_array_end = .);
+  PROVIDE (__init_array_start = .);
+  .init_array     : { *(.init_array) }
+  PROVIDE (__init_array_end = .);
+  PROVIDE (__fini_array_start = .);
+  .fini_array     : { *(.fini_array) }
+  PROVIDE (__fini_array_end = .);
+  .data           :
+  {
+    *(.data .data.* .gnu.linkonce.d.*)
+    SORT(CONSTRUCTORS)
+  }
+  .data.init    :
+  {
+    *(.data.init)
+  }
+  .data1          : { *(.data1) }
+  .tdata	  : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
+  .tbss		  : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
+  .eh_frame       : { KEEP (*(.eh_frame)) }
+  .gcc_except_table   : { *(.gcc_except_table) }
+  .dynamic        : { *(.dynamic) }
+  .ctors          :
+  {
+    /* gcc uses crtbegin.o to find the start of
+       the constructors, so we make sure it is
+       first.  Because this is a wildcard, it
+       doesn't matter if the user does not
+       actually link against crtbegin.o; the
+       linker won't look for a file to match a
+       wildcard.  The wildcard also means that it
+       doesn't matter which directory crtbegin.o
+       is in.  * /
+    KEEP (*crtbegin*.o(.ctors))
+    /* We don't want to include the .ctor section from
+       from the crtend.o file until after the sorted ctors.
+       The .ctor section from the crtend file contains the
+       end of ctors marker and it must be last * /
+    KEEP (*(EXCLUDE_FILE (*crtend*.o ) .ctors))
+    KEEP (*(SORT(.ctors.*)))
+    KEEP (*(.ctors))
+  }
+  .dtors          :
+  {
+    KEEP (*crtbegin*.o(.dtors))
+    KEEP (*(EXCLUDE_FILE (*crtend*.o ) .dtors))
+    KEEP (*(SORT(.dtors.*)))
+    KEEP (*(.dtors))
+  }
+  .jcr            : { KEEP (*(.jcr)) }
+  .got            : { *(.got.plt) *(.got) }
+  /* We want the small data sections together, so single-instruction offsets
+     can access them all, and initialized data all before uninitialized, so
+     we can shorten the on-disk segment size.  * /
+  .sdata          :
+  {
+    __SDATA_BEGIN__ = .;
+    *(.sdata .sdata.* .gnu.linkonce.s.*)
+  }
+  _edata = .;
+  PROVIDE (edata = .);
+  __bss_start = .;
+  .sbss           :
+  {
+    PROVIDE (__sbss_start = .);
+    PROVIDE (___sbss_start = .);
+    *(.dynsbss)
+    *(.sbss .sbss.* .gnu.linkonce.sb.*)
+    *(.scommon)
+    PROVIDE (__sbss_end = .);
+    PROVIDE (___sbss_end = .);
+  }
+*/
+  .bss (NOLOAD)           :
+  {
+   __sdm_bss_start = .;
+   *(.bss)
+   *(.bss.* .gnu.linkonce.b.*)
+   __sdm_bss_common = .;
+   *(COMMON)
+   /* Align here to ensure that the .bss section occupies space up to
+      _end.  Align after .bss to ensure correct alignment even if the
+      .bss section disappears because there are no input sections.  */
+   . = ALIGN(32 / 8);
+  }
+  __sdm_bss_end = .;
+  __bss_end = __rodata_start + __sdm_bss_end;
+  /* Next load address would be at __rodata_start + __sdm_bss_common .  */
+  . = ALIGN(32 / 8);
+  _end = .;
+  PROVIDE (end = .);
+  /* We want to be able to set a default stack / heap size in a dejagnu
+     board description file, but override it for selected test cases.
+     The options appear in the wrong order to do this with a single symbol -
+     ldflags comes after flags injected with per-file stanzas, and thus
+     the setting from ldflags prevails.  */
+  .heap  (NOLOAD) :
+  {
+         __start_heap = . ;
+         . = . + (DEFINED(__HEAP_SIZE) ? __HEAP_SIZE : (DEFINED(__DEFAULT_HEAP_SIZE) ? __DEFAULT_HEAP_SIZE : 20))  ;
+         __end_heap = . ;
+  }
+  . = ALIGN(0x8);
+  .stack (NOLOAD)  :
+  {
+         __stack = . ;
+         . = . + (DEFINED(__STACK_SIZE) ? __STACK_SIZE : (DEFINED(__DEFAULT_STACK_SIZE) ? __DEFAULT_STACK_SIZE : 64))  ;
+	/*
+	 . = 0x7ff0;
+         __stack_top = . ;
+	*/
+  } 
+         __stack_top = 0x7ff0;
+  /* Stabs debugging sections.  */
+  .stab          0 : { *(.stab) }
+  .stabstr       0 : { *(.stabstr) }
+  .stab.excl     0 : { *(.stab.excl) }
+  .stab.exclstr  0 : { *(.stab.exclstr) }
+  .stab.index    0 : { *(.stab.index) }
+  .stab.indexstr 0 : { *(.stab.indexstr) }
+  .comment       0 : { *(.comment) }
+  .arcextmap 0 : { *(.arcextmap) }
+  /* DWARF debug sections.
+     Symbols in the DWARF debugging sections are relative to the beginning
+     of the section so we begin them at 0.  */
+  /* DWARF 1 */
+  .debug          0 : { *(.debug) }
+  .line           0 : { *(.line) }
+  /* GNU DWARF 1 extensions */
+  .debug_srcinfo  0 : { *(.debug_srcinfo) }
+  .debug_sfnames  0 : { *(.debug_sfnames) }
+  /* DWARF 1.1 and DWARF 2 */
+  .debug_aranges  0 : { *(.debug_aranges) }
+  .debug_pubnames 0 : { *(.debug_pubnames) }
+  /* DWARF 2 */
+  .debug_info     0 : { *(.debug_info .gnu.linkonce.wi.*) }
+  .debug_abbrev   0 : { *(.debug_abbrev) }
+  .debug_line     0 : { *(.debug_line) }
+  .debug_frame    0 : { *(.debug_frame) }
+  .debug_str      0 : { *(.debug_str) }
+  .debug_loc      0 : { *(.debug_loc) }
+  .debug_macinfo  0 : { *(.debug_macinfo) }
+  /* SGI/MIPS DWARF 2 extensions */
+  .debug_weaknames 0 : { *(.debug_weaknames) }
+  .debug_funcnames 0 : { *(.debug_funcnames) }
+  .debug_typenames 0 : { *(.debug_typenames) }
+  .debug_varnames  0 : { *(.debug_varnames) }
+  /* ARC Extension Sections */
+  .arcextmap 0 :
+  {
+    *(.gnu.linkonce.arcextmap.*)
+  }
+  /DISCARD/ : { *(.__arc_profile_*) }
+  /DISCARD/ : { *(.note.GNU-stack) }
+}
diff --git a/gcc/config/mxp/mxp-genregset.c b/gcc/config/mxp/mxp-genregset.c
new file mode 100644
index 00000000000..455f62d09d0
--- /dev/null
+++ b/gcc/config/mxp/mxp-genregset.c
@@ -0,0 +1,432 @@
+/* Generate the machine mode enumeration and associated tables.
+   Copyright (C) 2003, 2004, 2008
+   Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING.  If not, write to the Free
+Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301, USA.  */
+
+#include "config/mxp/regset-config.h"
+#include "bconfig.h"
+#include "system.h"
+#include "errors.h"
+
+int vec_lanes[] = MXP_VEC_LANE_SETS;
+int acc_lanes[] = MXP_FLAG_ACC_LANE_SETS;
+#define flag_lanes acc_lanes
+int scalar_lanes[] = MXP_SCALAR_LANE_SETS;
+const char *fake_reg_names[] = MXP_FAKE_REG_NAMES;
+const int regs_per_vreg = 16 / MXP_UNITS_PER_MXP_VEC_REG;
+
+#define SCALAR_NSETS ARRAY_SIZE(scalar_lanes)
+#define VEC_NSETS ARRAY_SIZE(vec_lanes)
+#define ACC_NSETS ARRAY_SIZE(acc_lanes)
+#define FLAG_NSETS ARRAY_SIZE(flag_lanes)
+#define N_RCLASS ((int) (SCALAR_NSETS * VEC_NSETS * ACC_NSETS * FLAG_NSETS))
+
+
+/* Return element I of the array LANESET_SET.  */
+static int
+laneset (int i, int *laneset_set)
+{
+  return laneset_set[i];
+}
+
+#define DECODE_REGCLASS \
+  int scalar = laneset (i % SCALAR_NSETS, scalar_lanes); \
+  int vec = laneset ((i /= SCALAR_NSETS) % VEC_NSETS, vec_lanes);\
+  int flags = laneset ((i /= VEC_NSETS) % FLAG_NSETS, flag_lanes); \
+  int acc   = laneset((i /= FLAG_NSETS) % ACC_NSETS, acc_lanes);
+
+static int
+valid_regclass (int i)
+{
+  DECODE_REGCLASS
+
+  return MXP_VALID_REG_CLASS (scalar, vec, flags, acc);
+}
+
+static void
+emit_regclass_name (int i)
+{
+  if (i == 0)
+    printf ("NO_REGS");
+  else if (i == N_RCLASS - 1)
+    printf ("ALL_REGS");
+  else
+    {
+      DECODE_REGCLASS
+
+      if (scalar)
+	printf ("S%02x", scalar);
+      if (vec)
+	printf ("V%02x", vec);
+      if (flags)
+	printf ("F%02x", flags);
+      if (acc)
+	printf ("A%02x", acc);
+      printf ("_REGS");
+    }
+}
+
+static void
+emit_regclass_content (int i)
+{
+  long shift, mask;
+  int n_words = 0;
+  DECODE_REGCLASS
+
+#define INC_SHIFT(UNITS) \
+  do \
+    { \
+      shift += (UNITS) / MXP_UNITS_PER_MXP_VEC_REG; \
+      if (shift >= 32) \
+	{ \
+	  if (n_words++ > 5) \
+	    { \
+	      printf (" \\\n    "); \
+	      n_words = 1; \
+	    } \
+	  printf (" 0x%08lx,", mask); \
+	  mask = shift = 0; \
+	} \
+    } \
+  while (0)
+
+  /* vr0 / vr1 contain scalar regs.  */
+  mask = scalar | (scalar << 8);
+  shift = 16;
+  /* vr2 .. vr62 are (inasmuch as implemented) non-scalar vector registers.  */
+  for (i = 2; i < 62; i++)
+    {
+      mask |= vec << shift;
+      INC_SHIFT (16);
+    }
+  /* accumulator */
+  mask |= acc << shift;
+  INC_SHIFT (16);
+  /* vflags */
+  mask |= flags << shift;
+  INC_SHIFT (16);
+  /* Fake registers.  */
+  for (i = 0; i < (int) ARRAY_SIZE (fake_reg_names); i++)
+    {
+      INC_SHIFT (0);
+      if (scalar & MXP_FAKE_REG_LANES)
+	mask |= 1 << shift++;
+    }
+  INC_SHIFT (256);
+}
+
+static void
+upputs (const char *s)
+{
+  while (*s)
+#if 0
+    putchar (TOUPPER (*s++));
+#else
+    {
+      putchar (*s >= 'a' && *s <= 'z' ? *s - 'a' + 'A': *s);
+      s++;
+    }
+#endif
+}
+
+static void
+regno_reg_class_1 (char letter, int *a, int size)
+{
+  int mask, seen = 0;
+  int i;
+
+  for (i = 0; i < size; i++)
+    {
+      mask = a[i];
+     if (!(mask & ~seen))
+	continue;
+      seen |= mask;
+      if (seen != 0xff)
+	printf ("1 << VREG_LANE (N) & %d ? %c%02x_REGS : ", mask, letter, mask);
+      else
+	printf ("%c%02x_REGS", letter, mask);
+    }
+}
+
+static void
+regno_reg_class_2 (void)
+{
+  printf ("#define REGNO_REG_CLASS(N) \\\n"
+	  "  ((N) < ACC_REG \\\n"
+	  "   ? ((N) < VR2_REG \\\n"
+	  "      ? (");
+  regno_reg_class_1 ('S', scalar_lanes, ARRAY_SIZE (scalar_lanes));
+  printf (") \\\n"
+	  "      : (");
+  regno_reg_class_1 ('V', vec_lanes, VEC_NSETS);
+  printf (")) \\\n"
+	  "   : ((N) < VFLAGS_REG \\\n"
+	  "      ? (");
+  regno_reg_class_1 ('A', acc_lanes, ACC_NSETS);
+  printf (") \\\n"
+	  "      : (");
+  regno_reg_class_1 ('F', flag_lanes, FLAG_NSETS);
+  printf (")))\n\n");
+}
+
+/* Print FIXED_REGISTERS / CALL_USED_REGISTERS according to PREFIX.
+   For call used registers, the call used scalars are marked in SCALAR_USED,
+   and the call used non-scalar vector registers are marked in VREG_USED.  */
+static void
+print_fixed_regs (const char *prefix, long scalar_used, long vreg_used)
+{
+  int n_printed = 0;
+  int i, j;
+
+#define PRINT_VAL(VAL) \
+  do \
+    { \
+      printf ("%d, ", (VAL) != 0); \
+      if (++n_printed > 15) \
+	{ \
+	  printf ("\\\n  "); \
+	  n_printed = 0; \
+	} \
+    } while (0)
+
+  printf ("#define %s_REGISTERS \\\n{ \\\n  "
+      "/* Scalar registers.  Stack pointer and bss base are fixed.  */ \\\n"
+      "  ", prefix);
+  for (i = 0; i < 16; i++)
+    PRINT_VAL (i == STACK_POINTER_REGNUM || i == BSS_BASE_REGNUM
+	       || ((1 << i) & scalar_used));
+  printf ("/* vr2-vr30: non-scalar vector registers.  */ \\\n  ");
+  for (i = 2; i < 31; i++)
+    for (j = 0; j < regs_per_vreg; j++)
+      PRINT_VAL ((1 << i) & vreg_used);
+  printf ("/* vr31: call stack; vr32-vr61: reserved.  */ \\\n  ");
+  for (i = 31; i < 62; i++)
+    for (j = 0; j < regs_per_vreg; j++)
+      PRINT_VAL (1);
+  printf ("/* Accumulator and flags are not fixed to allow lane allocation.  */ \\\n  ");
+  for (i = 62; i < 64; i++)
+    for (j = 0; j < regs_per_vreg; j++)
+      PRINT_VAL (scalar_used);
+  for (i = 0; i < (int) ARRAY_SIZE (fake_reg_names); i++)
+    printf ("1 /* %s */, ", fake_reg_names[i]);
+  printf ("\\\n}\n\n");
+}
+
+static void
+emit_regsets (void)
+{
+  int i, j;
+  int n_printed;
+  const char *name;
+  int n_regs = 0;
+
+#define COUNT_REG \
+  do \
+    { \
+      n_regs++; \
+      if (++n_printed > 7) \
+	{ \
+	  printf ("\\\n  "); \
+	  n_printed = 0; \
+	} \
+    } while (0)
+
+  printf ("#define REGISTER_NAMES \\\n{ \\\n"
+	  "  \"i0\",   \"i1\",   \"i2\",   \"i3\",   \"i4\",   \"i5\",   \"i6\",   \"i7\", \\\n"
+	  "  \"i8\",   \"i9\",   \"i10\",  \"i11\",  \"i12\",  \"i13\",  \"i14\",  \"i15\", \\\n  ");
+  n_regs += 16;
+  for (i = 2, n_printed = 0; i < 62; i++)
+    {
+      for (j = 0; j < 16 / MXP_UNITS_PER_MXP_VEC_REG; j++)
+	{
+	  if (j)
+	    printf ("\"vr%d_%d\",%s", i, j * MXP_UNITS_PER_MXP_VEC_REG / 2,
+		    " " + (i > 9));
+	  else
+	    printf ("\"vr%d\",%s", i, " " + (i > 9));
+	  COUNT_REG;
+	}
+    }
+  name = "acc";
+  do
+    {
+      for (j = 0; j < 16 / MXP_UNITS_PER_MXP_VEC_REG; j++)
+	{
+	  if (j)
+	    printf ("\"%s_%d\",%s", name, j, " " + (*name != 'a'));
+	  else
+	    printf ("\"%s\",%s", name, " " + (*name != 'a'));
+	  COUNT_REG;
+	}
+    } while (*name == 'a' && (name = "vflags"));
+  for (i = 0; i < (int) ARRAY_SIZE (fake_reg_names); i++)
+    {
+      printf ("\"%s\", ", fake_reg_names[i]);
+      COUNT_REG;
+    }
+  if (n_printed)
+    printf ("\\\n");
+  printf("}\n\n");
+  printf ("#define ADDITIONAL_REGISTER_NAMES"
+	  " { { \"vr0\", 0}, { \"vr1\", 8} }\n\n");
+
+  printf ("#define LAST_SCALAR_REG %d\n\n", 15);
+  printf ("#define SCALAR_REGNO_P(N) \\\n"
+	  "  ((N) <= LAST_SCALAR_REG || ((N) >= ");
+  upputs (fake_reg_names[0]);
+  printf ("_REG && n <= ");
+  upputs (fake_reg_names[ARRAY_SIZE (fake_reg_names)-1]);
+  printf ("_REG))\n");
+  printf ("#define LANES_PER_REG %d\n", MXP_UNITS_PER_MXP_VEC_REG / 2);
+  printf ("#define LANE0_REGNO(N) ((N) & -%d)\n", regs_per_vreg);
+  printf ("#define VREG_LANE(N) (((N) & %d) * LANES_PER_REG)\n",
+	  regs_per_vreg - 1);
+  printf ("#define REGS_PER_VREG %d\n\n", regs_per_vreg);
+  printf ("#define FIRST_PSEUDO_REGISTER %d\n\n", n_regs);
+  regno_reg_class_2 ();
+
+  print_fixed_regs ("FIXED", 0, 0);
+  print_fixed_regs ("CALL_USED", 0xff, 0x1ffff);
+
+  printf ("enum reg_class\n{");
+  for (i = j = 0; i < N_RCLASS; i++)
+    {
+      if (!valid_regclass (i))
+	continue;
+      if (j++ % 4 == 0)
+	printf ("\n  ");
+      else
+	printf (" ");
+      emit_regclass_name (i);
+      printf (",");
+    }
+  printf (" LIM_REG_CLASSES\n};\n\n"
+	  "#define N_REG_CLASSES (int) LIM_REG_CLASSES\n\n"
+	  "#define REG_CLASS_NAMES \\\n{");
+  for (i = j = 0; i < N_RCLASS; i++)
+    {
+      if (!valid_regclass (i))
+	continue;
+      if (j++ % 4 == 0)
+	printf (" \\\n  \"");
+      else
+	printf (" \"");
+      emit_regclass_name (i);
+      printf ("\",");
+    }
+  printf (" \"LIM_REG_CLASSES\" \\\n};\n\n");
+  printf ("#define REG_CLASS_CONTENTS \\\n{ \\\n");
+  for (i = 0; i < N_RCLASS; i++)
+    {
+      if (!valid_regclass (i))
+	continue;
+      printf ("/* ");
+      emit_regclass_name (i);
+      printf (" */ \\\n  { ");
+      emit_regclass_content (i);
+      printf (" }, \\\n");
+    }
+  printf ("}\n");
+}
+
+static void
+emit_md (void)
+{
+  int n_regs = 0;
+  int i;
+
+  printf ("(define_constants\n");
+  n_regs += 16; /* Scalar regs.  */
+  printf ("  [(VR2_REG\t\t%d)\n", n_regs);
+  n_regs += 29 * regs_per_vreg; /* Advance past vr2..vr30.  */
+  printf ("   (CALL_STACK_REG\t%d)\n", n_regs);
+  printf ("   (VR32_REG\t\t%d)\n", n_regs += regs_per_vreg);
+  n_regs += 30 * regs_per_vreg; /* Advance past vr32..vr62.  */
+  printf ("   (ACC_REG\t\t%d)\n", n_regs);
+  printf ("   (ACCH_REG\t\t%d)\n", n_regs + regs_per_vreg/2);
+  printf ("   (VFLAGS_REG\t\t%d)\n", n_regs += regs_per_vreg);
+  printf ("   (VFLAGSH_REG\t\t%d)\n", n_regs + regs_per_vreg/2);
+  n_regs += regs_per_vreg;
+  for (i = 0; i < (int) ARRAY_SIZE (fake_reg_names); i++)
+    {
+      printf ("   (");
+      upputs (fake_reg_names[i]);
+      printf ("_REG\t\t%d)\n", n_regs++);
+    }
+  printf ("   ])\n");
+}
+
+static void
+emit_c (void)
+{
+  int j;
+  const char *s = "scalar_";
+  int n_printed = 0;
+
+  printf (
+"/* For each register class, indicate the lanes relevant for potential\n"
+"   cross-lane moves.  For move destinations, we can disregard scalar\n"
+"   registers, since they allow cheap cross-lane moves.  */\n");
+  do
+    {
+      printf ("\nunsigned char class_%svec_lanes[] =\n{\n ", s);
+      for (j = 0; j < N_RCLASS; j++)
+	{
+	  int i = j;
+	  DECODE_REGCLASS
+    
+	  if (!valid_regclass (j))
+	    continue;
+	  if (n_printed++ > 12)
+	    {
+	      printf ("\n ");
+	      n_printed = 1;
+	    }
+	  printf (" %d,", vec | flags | acc | (*s ? scalar : 0));
+	}
+      printf ("\n};\n");
+    }
+  while (*s && (s = ""));
+}
+
+int
+main (int argc, char **argv)
+{
+  bool gen_header = false, gen_md = false;
+  char *progname = argv[0];
+
+  if (argc == 1)
+    ;
+  else if (argc == 2 && !strcmp (argv[1], "-h"))
+    gen_header = true;
+  else if (argc == 2 && !strcmp (argv[1], "-m"))
+    gen_md = true;
+  else
+    {
+      error ("usage: %s -h|-m > file", progname);
+      return FATAL_EXIT_CODE;
+    }
+  if (gen_header)
+    emit_regsets ();
+  else if (gen_md)
+    emit_md ();
+  else
+    emit_c ();
+  return 0;
+}
diff --git a/gcc/config/mxp/mxp-modes.def b/gcc/config/mxp/mxp-modes.def
new file mode 100644
index 00000000000..f2eadcf8651
--- /dev/null
+++ b/gcc/config/mxp/mxp-modes.def
@@ -0,0 +1,15 @@
+VECTOR_MODES (INT, 4); /* V4QI V2HI */
+VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
+VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
+PARTIAL_INT_MODE (SI);  /* Needed to make V2PSI / V4PSI.  */
+VECTOR_MODE (PARTIAL_INT, PSI, 2); /* V2PSI, flags for DImode arithmetic. */
+VECTOR_MODE (PARTIAL_INT, PSI, 4); /* V4PSI, flags for V2DImode arithmetic.  */
+VECTOR_MODES (FLOAT, 8); /* V2SF */
+VECTOR_MODES (FLOAT, 16); /* V4SF V2DF */
+#define CC_MODES(N) SIZED_CC_MODE (N, 2); \
+  VECTOR_MODE (CC, N, 2); VECTOR_MODE (CC, N, 4); VECTOR_MODE (CC, N, 8)
+CC_MODES (CCI); /* Ordinary integer flags.  */
+CC_MODES (CCZN); /* Only zero / negative flag relevant.  */
+CC_MODES (CCZ); /* Only zero flag relevant.  */
+VECTOR_MODE (CC, CC, 2); /* V2CCmode - flag clobber for DI arithmetic.  */
+VECTOR_MODE (CC, CC, 4); /* V4CCmode - flag clobber for V2DI arithmetic.  */
diff --git a/gcc/config/mxp/mxp-protos.h b/gcc/config/mxp/mxp-protos.h
new file mode 100644
index 00000000000..3dd4469e5b0
--- /dev/null
+++ b/gcc/config/mxp/mxp-protos.h
@@ -0,0 +1,20 @@
+#ifdef RTX_CODE
+extern void mxp_print_operand (FILE *file, rtx x, int code);
+extern bool mxp_legitimate_address_p (enum machine_mode, rtx, int strict_p);
+extern int mxp_initial_elimination_offset (int, int);
+extern void mxp_emit_conditional_branch (rtx *, enum rtx_code);
+struct secondary_reload_info;
+extern enum reg_class mxp_secondary_reload (bool, rtx, enum reg_class,
+					    enum machine_mode,
+					    struct secondary_reload_info *);
+#endif /* RTX_CODE */
+
+extern void mxp_expand_prologue (void);
+extern void mxp_expand_epilogue (void);
+extern struct machine_function *mxp_init_machine_status (void);
+extern void mxp_conditional_register_usage (void);
+extern int mxp_register_move_cost (enum machine_mode mode,
+				   enum reg_class from, enum reg_class to);
+extern int mxp_memory_move_cost (enum machine_mode, enum reg_class, int);
+extern int peep2_regno_mode_dead_p (int ofs, int regno, enum machine_mode mode);
+
diff --git a/gcc/config/mxp/mxp.c b/gcc/config/mxp/mxp.c
new file mode 100644
index 00000000000..2fcfa9d3242
--- /dev/null
+++ b/gcc/config/mxp/mxp.c
@@ -0,0 +1,685 @@
+/* Subroutines used for code generation on the ARC mxp cpu.
+
+   Copyright (C) 2008 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define BINUTILS_FIXED
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "hard-reg-set.h"
+#include "function.h"
+#include "toplev.h"
+#include "tm_p.h"
+#include "ggc.h"
+#include "target.h"
+#include "target-def.h"
+#include "output.h"
+#include "expr.h"
+#include "optabs.h"
+#include "sbitmap.h"
+#include "reload.h"
+#include "recog.h"
+
+struct machine_function GTY(())
+{
+  int rounded_frame_size;
+  int saved_regs_size;
+  unsigned char lanes_written[N_HARDWARE_VECREGS];
+};
+
+rtx mxp_compare_op0, mxp_compare_op1;
+sbitmap mxp_acc_classes;
+
+/* Functions to save and restore machine-specific function data.  */
+struct machine_function *
+mxp_init_machine_status (void)
+{
+  return ggc_alloc_cleared (sizeof (struct machine_function));
+}
+
+static void mxp_init_libfuncs (void);
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS mxp_init_libfuncs
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD mxp_secondary_reload
+
+static bool mxp_vector_mode_supported_p (enum machine_mode mode);
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P mxp_vector_mode_supported_p
+
+#undef TARGET_PROMOTE_FUNCTION_RETURN
+#define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_const_tree_true
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+/* machine-indepemndent: l,c,n
+   v: vector register
+   L: Lane mask
+   N: Lane mask for lower half
+   O: Lane mask for upper half
+   M: memory size/lane specifier
+   #: Output nops for unfilled delay slots  */
+void
+mxp_print_operand (FILE *file, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'v': /* Vector register.  */
+      if (GET_CODE (x) == PARALLEL)
+	x = XEXP (XVECEXP (x, 0, 0), 0);
+      if (x == CONST0_RTX (GET_MODE (x)))
+	{
+	  fprintf (file, "vr62");
+	  return;
+	}
+      gcc_assert (REG_P (x));
+      if (REGNO (x) <= LAST_SCALAR_REG)
+	fprintf (file, "vr%d", REGNO (x) >> 3);
+      else if (LANE0_REGNO (REGNO (x)) == ACC_REG)
+	fputs ("vzero", file);
+      else if (LANE0_REGNO (REGNO (x)) == VFLAGS_REG)
+	fputs ("vr63", file);
+      else
+	fputs (reg_names[LANE0_REGNO (REGNO (x))], file);
+      return;
+    case 'L': /* Lane mask.  */
+      if (x == const0_rtx)
+	{
+	  printf ("3"); /* For viv / vkv.  */
+	  return;
+	}
+      gcc_assert (REG_P (x));
+      if (REGNO (x) <= LAST_SCALAR_REG)
+	fprintf (file, "%d",
+		 (((1 << (GET_MODE_SIZE (GET_MODE (x)) >> 1)) - 1)
+		  << (REGNO (x) & 7)));
+      else
+	fprintf (file, "%d",
+		 (((1 << (GET_MODE_SIZE (GET_MODE (x)) >> 1)) - 1)
+		  << VREG_LANE (REGNO (x))));
+      return;
+    case 'O': /* Lane mask lower half.  */
+      fprintf (file, "%d",
+	       (((1 << (GET_MODE_SIZE (GET_MODE (x)) >> 2)) - 1)
+		<< VREG_LANE (REGNO (x))));
+      return;
+    case 'P': /* Lane mask upper half.  */
+      fprintf (file, "%d",
+	       (((1 << (GET_MODE_SIZE (GET_MODE (x)) >> 2)) - 1)
+		<< (VREG_LANE (REGNO (x))
+		    + (GET_MODE_SIZE (GET_MODE (x)) >> 2))));
+      return;
+    case 'N': /* Load memory size and lane specifier.  */
+    case 'M': /* Memory size and lane specifier.  */
+      {
+	int r, lane;
+
+	gcc_assert (REG_P (x));
+	r = REGNO (x);
+        if (r <= LAST_SCALAR_REG)
+	  lane = r & 7;
+	else
+	  lane = VREG_LANE (r);
+	fprintf (file, "%d", GET_MODE_BITSIZE (GET_MODE (x)));
+	if (code == 'N' && GET_MODE_SIZE (GET_MODE (x)) == 1)
+	  fprintf (file, "w");
+	/* ??? The manual says that _0 is OK.  The assembler disagrees.  */
+	if (lane)
+	  fprintf (file, "_%d", lane);
+	return;
+      }
+    case 'Q': /* 64 bit constant lower half.   */
+      {
+	enum machine_mode mode = GET_MODE (x);
+	rtx sub = NULL_RTX;
+
+	mode = GET_MODE (x);
+	if (mode == VOIDmode)
+	  mode = DImode;
+	if (GET_MODE_SIZE (mode) >= 8)
+	  sub = simplify_subreg (SImode, x, mode, 0);
+	if (sub)
+	  mxp_print_operand (file, sub, 0);
+	else
+	  output_operand_lossage ("invalid operand to %%Q");
+	return;
+      }
+    case 'R': /* 64 bit constant upper half.  */
+      {
+	enum machine_mode mode = GET_MODE (x);
+	rtx sub = NULL_RTX;
+
+	mode = GET_MODE (x);
+	if (mode == VOIDmode)
+	  mode = DImode;
+	if (GET_MODE_SIZE (mode) >= 8)
+	  sub = simplify_subreg (SImode, x, mode, 4);
+	if (sub)
+	  mxp_print_operand (file, sub, 0);
+	else
+	  output_operand_lossage ("invalid operand to %%R");
+	return;
+      }
+    case 'C': /* Condition.  */
+      {
+	switch (GET_CODE (x))
+	  {
+	  case GTU: fprintf (file, ".hi"); return;
+	  case GEU: fprintf (file, ".hs"); return;
+	  case LTU: fprintf (file, ".lo"); return;
+	  case LEU: fprintf (file, ".ls"); return;
+	  case EQ: fprintf (file, ".eq"); return;
+	  case NE: fprintf (file, ".ne"); return;
+	  default: gcc_unreachable ();
+	  }
+      }
+    case '#':
+      /* Output a nop for each unfilled delay slot.  */
+      {
+	int i = 3 - dbr_sequence_length ();
+
+	/* If there are two delay slot insns, check if the second
+	   depends on the first; in that case, we want the nop between
+	   these two insns.  */
+	if (i == 1)
+	  {
+	    rtx i1 = XVECEXP (final_sequence, 0, 1);
+	    rtx i2 = XVECEXP (final_sequence, 0, 2);
+	    rtx set = single_set (i1);
+
+	    if (set && reg_referenced_p (SET_DEST (set), PATTERN (i2)))
+	      {
+		fprintf (file, "\n");
+		final_scan_insn (i1, asm_out_file, optimize, 1, NULL);
+		INSN_DELETED_P (i1) = 1;
+		fprintf (file, "\tvnop");
+		return;
+	    }
+	  }
+	while (i--)
+	  fprintf (file, "\n\tvnop");
+	return;
+      }
+    case 'r': case 'i':
+#ifndef BINUTILS_FIXED
+      if (GET_CODE (x) == SYMBOL_REF
+	 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+	     && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
+	 || GET_CODE (x) == CODE_LABEL || GET_CODE (x) == LABEL_REF)
+	{
+	  fputs ((code == 'r' ? "r0" : "0"), file);
+	  return;
+	}
+      break;
+#endif
+    case 0:
+      break;
+    default :
+      /* Unknown flag.  */
+      output_operand_lossage ("invalid operand output code");
+    }
+  switch (GET_CODE (x))
+    {
+    case REG:
+      if (REGNO (x) <= LAST_SCALAR_REG && GET_MODE_SIZE (GET_MODE (x)) > 2)
+	{
+	  fputc ('k', file);
+	  fputs (reg_names[REGNO (x)] + 1, file);
+	  break;
+	}
+      fputs (reg_names[REGNO (x)], file);
+      break;
+    case MEM:
+      {
+#if 0
+	int size = GET_MODE_SIZE (GET_MODE (x));
+#endif
+	rtx addr = XEXP (x, 0);
+	rtx offs = const0_rtx;
+	int r = BSS_BASE_REGNUM;
+
+	if (CONSTANT_P (addr))
+	  offs = addr;
+	else if (GET_CODE (addr) == PLUS)
+	  {
+	    r = REGNO (XEXP (addr, 0));
+	    offs = XEXP (addr, 1);
+	  }
+	else
+	  r = REGNO (addr);
+	if (GET_CODE (offs) == CONST_INT)
+#if 0 /* The manual says that you use the unscaled offset...  */
+	  fprintf (file, "[%s,%ld]", reg_names[r], INTVAL (offs) / size);
+#else /* ... but the assembler expects the scaled offset.  */
+	  fprintf (file, "[%s,%ld]", reg_names[r], INTVAL (offs));
+#endif
+	else
+	  {
+	    fprintf (file, "[%s,", reg_names[r]);
+	    mxp_print_operand (file, offs, 'i');
+	    fputc (']', file);
+	  }
+	break;
+      }
+    case CONST_DOUBLE :
+      /* We handle SFmode constants here as output_addr_const doesn't.  */
+      if (GET_MODE (x) == SFmode)
+	{
+	  REAL_VALUE_TYPE d;
+	  long l;
+
+	  REAL_VALUE_FROM_CONST_DOUBLE (d, x);
+	  REAL_VALUE_TO_TARGET_SINGLE (d, l);
+	  fprintf (file, "0x%08lx", l);
+	  break;
+	}
+      /* Fall through.  Let output_addr_const deal with it.  */
+    default:
+      output_addr_const (file, x);
+      break;
+    }
+}
+
+bool
+mxp_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
+{
+  int mem_mode_size = (GET_MODE_SIZE (mode));
+  /* 64 bit moves might need to be split.  */
+  int min_mode_size
+    = ((mem_mode_size == 8 && !reload_completed) ? 4 : mem_mode_size);
+
+  if (REG_P (x) && (!strict_p || REGNO (x) <= LAST_SCALAR_REG))
+    return 1;
+  if (CONSTANT_P (x) && mem_mode_size >= 16)
+    return 1;
+  if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
+      && (!strict_p || REGNO (XEXP (x, 0)) <= LAST_SCALAR_REG))
+    x = XEXP (x, 1);
+  if (mem_mode_size >= 16 && (CONST_INT_P (x) && (INTVAL (x) & 15) == 0))
+    return 1;
+  if (CONST_INT_P (x)
+      && (INTVAL (x) & (mem_mode_size - 1)) == 0
+      && INTVAL (x) >= (-1 << 11) * min_mode_size
+      && INTVAL (x) < (1 << 11) * min_mode_size)
+    return 1;
+  /* We assume that all symbols are suitably positioned for i9+offset
+     addressing in the data / bss sections.  */
+  if (!TARGET_BIGDATA)
+    {
+      if (GET_CODE (x) == SYMBOL_REF)
+	return 1;
+      if (mem_mode_size == 4
+	  && GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
+	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+	  && INTVAL (XEXP (XEXP (x, 0), 1)) <= 4)
+	return 1;
+    }
+  return 0;
+}
+
+int
+mxp_initial_elimination_offset (int from, int to)
+{
+  /* FIXME */
+  return 0;
+}
+
+static rtx
+frame_insn (rtx x)
+{
+  x = emit_insn (x);
+  RTX_FRAME_RELATED_P (x) = 1;
+  return x;
+}
+
+/* Helper function for find_save_lanes, called via note_stores.  */
+static void
+find_save_lanes_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
+{
+  unsigned char *buf = data;
+  int r = true_regnum (x);
+  int vecnum, lanes;
+
+  if (r < 0)
+    return;
+  if (r <= LAST_SCALAR_REG)
+    {
+      vecnum = r >> 3;
+      lanes = 1 << (r & 7);
+    }
+  else
+    {
+      lanes = (1 << (GET_MODE_SIZE (GET_MODE (x)) >> 1)) - 1;
+      vecnum
+	= (LANE0_REGNO (r) - (VR2_REG - 2 * REGS_PER_VREG)) / REGS_PER_VREG;
+      lanes <<= VREG_LANE (r);
+    }
+  if ((unsigned)vecnum >= N_HARDWARE_VECREGS)
+    gcc_assert (r >= ACC_REG);
+  else
+    buf[vecnum] |= lanes;
+}
+
+/* Each byte in BUF curresponds to a vector register.  For each register,
+   generate a bitmask that shows which lanes need saving.  */
+static void
+find_save_lanes (unsigned char *buf)
+{
+  rtx insn;
+
+  memset (buf, 0, N_HARDWARE_VECREGS);
+  for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
+    note_stores (INSN_P (insn) ? PATTERN (insn) : insn,
+		 find_save_lanes_1, buf);
+}
+
+/* A register is saved in SAVE_MODE of size SAVE_SIZE if there are bits in
+   both the upper and the lower half that need saving.  In that case, return
+   an rtx for the register to be saved.  */
+static rtx
+save_reg (int vecnum, int save_size, enum machine_mode save_mode,
+	  unsigned char *buf)
+{
+  int lanes = buf[vecnum];
+  int regno, size, i;
+  rtx ret;
+
+  if (!lanes)
+    return NULL_RTX;
+  if (vecnum < 2)
+    {
+      for (regno = vecnum * 8; regno < vecnum * 8 + 8; regno++)
+	if (call_used_regs[regno])
+	  lanes &= ~(1 << (regno & 7));
+      if (!lanes)
+	return NULL_RTX;
+      /* FIXME: calculate optimal concatenations.  */
+      /* N.B. we can restore the stack together with other scalar registers.  */
+      if (save_size != 16)
+	return NULL_RTX;
+      ret = gen_rtx_PARALLEL (V4SImode, rtvec_alloc (4));
+      for (i = 0; i < 4; i++)
+	XVECEXP (ret, 0, i)
+	  = gen_rtx_EXPR_LIST (VOIDmode,
+			       gen_rtx_REG (SImode, vecnum * 8 +  i * 2),
+			       GEN_INT (i * 2));
+      return ret;
+    }
+  regno = vecnum * 2 + LAST_SCALAR_REG - 3;
+  if (call_used_regs[regno])
+    lanes &= -16;
+  if (call_used_regs[regno+1])
+    lanes &= 15;
+  if (!lanes)
+    return NULL_RTX;
+  if ((lanes & -16) && (lanes & 15))
+    size = 16;
+  else
+    {
+      regno += (lanes > 15);
+      size = lanes | (lanes >> 4);
+      size = ((size & 3) + 1) << ((size & 8) != 0);
+    }
+  if (size != save_size)
+    return NULL_RTX;
+  return gen_rtx_REG (save_mode, regno);
+}
+
+void
+mxp_expand_prologue (void)
+{
+  int frame_size = 0;
+  int save_size;
+  unsigned char *buf = cfun->machine->lanes_written;
+
+  /* There are no interrupts, hence we may save registers before adjusting
+     the stack pointer, and for leaf functions we can leave out the stack
+     adjustment altogether.  */
+  find_save_lanes (buf);
+  for (save_size = 16; save_size >= 2; save_size >>= 1)
+    {
+      enum machine_mode save_mode
+	= mode_for_size (BITS_PER_UNIT * save_size, MODE_INT, 0);
+      rtx addr, reg;
+      int regno;
+
+      for (regno = 0; regno < N_HARDWARE_VECREGS; regno++)
+	{
+	  reg = save_reg (regno, save_size, save_mode, buf);
+	  if (reg)
+	    {
+	      frame_size += save_size;
+	      addr = gen_rtx_PLUS (HImode,
+				   stack_pointer_rtx, GEN_INT (-frame_size));
+	      emit_insn (gen_rtx_SET (VOIDmode,
+				      gen_rtx_MEM (GET_MODE (reg), addr), reg));
+	    }
+	}
+    }
+  cfun->machine->saved_regs_size = frame_size;
+  frame_size += get_frame_size ();
+  /* Round up frame size to keep stack 128-bit aligned.  */
+  frame_size = (frame_size + 15) & -16;
+  cfun->machine->rounded_frame_size = frame_size;
+  if (!current_function_is_leaf && frame_size)
+    {
+      /* Adjust stack pointer for register saves, local variables and
+	 outgoing arguments.  */
+      frame_insn (gen_add2_insn (stack_pointer_rtx, GEN_INT (-frame_size)));
+      frame_size = 0;
+    }
+  if (frame_pointer_needed)
+    frame_insn (gen_addhi3 (hard_frame_pointer_rtx,
+			    stack_pointer_rtx, GEN_INT (-frame_size)));
+}
+
+void
+mxp_expand_epilogue (void)
+{
+  int frame_size = get_frame_size ();
+  int save_size;
+  unsigned char *buf = cfun->machine->lanes_written;
+
+  /* Round up frame size to keep stack 128-bit aligned.  */
+  frame_size = (frame_size + 15) & -16;
+
+  if (!current_function_is_leaf && frame_size)
+    emit_insn (gen_add2_insn (stack_pointer_rtx,
+	       GEN_INT (cfun->machine->rounded_frame_size)));
+  /* FIXME: restore callee-saved registers.  */
+  frame_size = cfun->machine->saved_regs_size;
+  for (save_size = 2; save_size <= 16; save_size <<= 1)
+    {
+      enum machine_mode save_mode
+	= mode_for_size (BITS_PER_UNIT * save_size, MODE_INT, 0);
+      rtx addr, reg;
+      int regno;
+
+      for (regno = N_HARDWARE_VECREGS - 1; regno >= 0; regno--)
+	{
+	  reg = save_reg (regno, save_size, save_mode, buf);
+	  if (reg)
+	    {
+	      frame_size -= save_size;
+	      addr = gen_rtx_PLUS (HImode,
+				   stack_pointer_rtx, GEN_INT (-frame_size));
+	      emit_insn (gen_rtx_SET (VOIDmode,
+				      reg, gen_rtx_MEM (GET_MODE (reg), addr)));
+	    }
+	}
+    }
+}
+
+static void
+mxp_init_libfuncs (void)
+{
+  set_optab_libfunc (sdiv_optab, HImode, "__divdi3");
+  set_optab_libfunc (sdiv_optab, SImode, "__divdi3");
+  set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
+}
+
+static rtx
+mxp_emit_comparison (enum rtx_code code)
+{
+  rtx tmp, tmp2, flags, result;
+  enum machine_mode mode = GET_MODE (mxp_compare_op0);
+  result = gen_reg_rtx (HImode);
+  if ((mode != HImode || (code != GEU && code != LEU && code != LTU))
+      && !reg_or_0_operand (mxp_compare_op1, VOIDmode))
+    mxp_compare_op1 = force_reg (mode, mxp_compare_op1);
+  tmp2 = gen_rtx_fmt_ee (code, HImode, mxp_compare_op0, mxp_compare_op1);
+
+  switch (code)
+    {
+      case GE: case GT:
+      case EQ: case NE: case LE: case LT:
+	tmp = gen_rtx_SET (VOIDmode, result, gen_rtx_NEG (HImode, tmp2));
+	tmp2 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (mode));
+	tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2));
+	emit_insn (tmp);
+	break;
+      case GEU: case LEU: case GTU: case LTU:
+	flags = gen_reg_rtx (CCImode);
+	emit_insn ((mode == HImode ? gen_ucmphi : gen_ucmpsi)
+		   (flags, mxp_compare_op0, mxp_compare_op1));
+	emit_insn (gen_vtany (result, flags, tmp2));
+	break;
+      default: gcc_unreachable ();
+    }
+
+  return result;
+}
+
+void
+mxp_emit_conditional_branch (rtx *operands, enum rtx_code code)
+{
+  rtx result = mxp_emit_comparison (code);
+
+  emit_jump_insn (gen_branch_true (operands[0], result));
+}
+
+enum reg_class
+mxp_secondary_reload (bool in_p, rtx x, enum reg_class class,
+		      enum machine_mode mode,
+		      secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+  /* FIXME: cross-lane moves from the accumulator; WIP.  */
+  if (GET_MODE_SIZE (mode) < 16
+      && (GET_CODE (x) == REG && REGNO (x) < FIRST_PSEUDO_REGISTER)
+      && (in_p
+	  ? (class == Af0_REGS || class == A0f_REGS || class == Aff_REGS || class == ALL_REGS)
+	  : LANE0_REGNO (REGNO (x)) == ACC_REG))
+    {
+      int x_regno = true_regnum (x);
+      int mask = (GET_MODE_SIZE (mode) - 1) | 1;
+      int x_lanes = mask << VREG_LANE (x_regno);
+      int c_lanes = class_scalar_vec_lanes[class];
+      int in_lanes  = in_p ? x_lanes : c_lanes;
+      int out_lanes = in_p ? c_lanes : x_lanes;
+
+      /* CLASS might contain both lower and higher lanes, so we should look
+	 at the lanes from x_class to decide which half of the accumulator
+	 is copied from.  */
+      if (((in_lanes << 4 | in_lanes >> 4) & out_lanes)
+	  || ((in_lanes | out_lanes) & ~(mask | mask * 16)))
+	return       (!(VREG_LANE (REGNO (x)) & 3)
+		      ? (((in_p ? in_lanes : out_lanes) < 16) ^ in_p
+			 ? Vf0_REGS : V0f_REGS)
+		      : !(out_lanes & 0xf0) ? Vf0_REGS
+		      : !(out_lanes & 0x0f) ? V0f_REGS
+		      : V01_REGS /* ??? */);
+    }
+  /* Cross-lanes clobbering the accumulator leads to too messy secondary
+     reloads, thus we don't generate such reloads, but rather use clobber-free
+     instruction sequences for the move patterns.  A peephole2 instroduces
+     accumulator clobbers where this is profitable.  */
+  return NO_REGS;
+}
+
+void
+mxp_conditional_register_usage (void)
+{
+  int i;
+
+  mxp_acc_classes = sbitmap_alloc (N_REG_CLASSES);
+  for (i = 0; i < N_REG_CLASSES; i++)
+    if (reg_classes_intersect_p (i, Aff_REGS))
+      SET_BIT (mxp_acc_classes, i);
+}
+
+int
+mxp_register_move_cost (enum machine_mode mode,
+			enum reg_class from, enum reg_class to)
+{
+  /* Moving from the accumulator has higher latency.  */
+  if (from == A0f_REGS || from == Af0_REGS || from == Aff_REGS)
+    return 4;
+  /* Cross-lane moves cost more and require a clobber, unless source or
+     destination is a scalar register.  Even if the source is a scalar
+     register, we need a clobber for a cross-lane move to a non-scalar
+     register, so we must show a higher cost than for an ordinary move.  */
+  if (GET_MODE_SIZE (mode) < 8
+      && (class_scalar_vec_lanes[from]
+	  & (class_vec_lanes[to] << 4 | class_vec_lanes[to] >> 4)))
+    return TEST_BIT (mxp_acc_classes, (to)) ? 6 : 3;
+  if (GET_MODE_SIZE (mode) < 4
+      && class_vec_lanes[from] && class_vec_lanes[to]
+      && ((class_vec_lanes[from] | class_vec_lanes[to])
+	  & ~(((GET_MODE_SIZE (mode) - 1) | 1) * 0x11)))
+    return TEST_BIT (mxp_acc_classes, (to)) ? 6 : 3;
+  /* Tilt the balance.  */
+  if ((class_vec_lanes[from]|class_vec_lanes[to]) & 0xf0)
+    return 2;
+  return 2;
+}
+
+int
+mxp_memory_move_cost (enum machine_mode mode, enum reg_class class,
+		      int in_p ATTRIBUTE_UNUSED)
+{
+  return (4
+	  + (GET_MODE_SIZE (mode) == 8
+	     && (class_scalar_vec_lanes[(class)] & 0xf0))
+	  + memory_move_secondary_cost (mode, class, in_p));
+}
+
+int
+peep2_regno_mode_dead_p (int ofs, int regno, enum machine_mode mode)
+{
+  int i;
+
+  for (i = HARD_REGNO_NREGS (regno, mode); --i >= 0; )
+    if (!peep2_regno_dead_p (ofs, regno + 1))
+      return 0;
+  return 1;
+}
+
+static bool
+mxp_vector_mode_supported_p (enum machine_mode mode)
+{
+  return mov_optab->handlers[mode].insn_code != CODE_FOR_nothing;
+}
+
+#include "gt-mxp.h"
diff --git a/gcc/config/mxp/mxp.h b/gcc/config/mxp/mxp.h
new file mode 100644
index 00000000000..86a7b6d1fdb
--- /dev/null
+++ b/gcc/config/mxp/mxp.h
@@ -0,0 +1,293 @@
+#include "mxp-regset.h"
+
+#define N_HARDWARE_VECREGS 32
+/* Data is byte-addressed, using 16 bit addresses.  Code is word-addressed,
+   using 16 bit addresses.  */
+#define Pmode HImode
+
+#define REGISTER_MOVE_COST(MODE, FROM, TO) \
+  mxp_register_move_cost ((MODE), (FROM), (TO))
+
+#define CONDITIONAL_REGISTER_USAGE mxp_conditional_register_usage ();
+
+#define CASE_VECTOR_MODE HImode
+
+/* For non-pointer types, use the sizes that arc.h has, to make structure
+   passing simpler.
+   32 bit int & long also makes sense because that's the widest add we can
+   do cheaply.  */
+#define SHORT_TYPE_SIZE		16
+#define INT_TYPE_SIZE		(TARGET_INT16 ? 16 : 32)
+#define LONG_TYPE_SIZE		32
+#define LONG_LONG_TYPE_SIZE	64
+#define FLOAT_TYPE_SIZE		32
+#define DOUBLE_TYPE_SIZE	64
+#define LONG_DOUBLE_TYPE_SIZE	64
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 0
+
+#define SIZE_TYPE "short unsigned int"
+#define PTRDIFF_TYPE "short int"
+#define WCHAR_TYPE "short unsigned int"
+#define WCHAR_TYPE_SIZE 16
+
+#define UNITS_PER_SIMD_WORD(MODE) 16
+
+/* ??? arc.h as BIGGEST_ALIGNMENT as 32, but we need 128 bit alignemnt for
+   128 bit vector loads / stores.  */
+#define BIGGEST_ALIGNMENT 128
+#define MAX_FIXED_MODE_SIZE 128
+
+#define STRICT_ALIGNMENT 1
+
+#define PARM_BOUNDARY 16
+
+/* ??? The actual stack boundary is 128 bits, but we want to save stack space
+   when we can combine a frame of unaligned size with a save area of unaligned
+   size.  */
+#define STACK_BOUNDARY 16
+#define PREFERRED_STACK_BOUNDARY 128
+
+/* Functions are actually word-aligned, but the code is also word-addressed,
+   and there is no point in trying to model all the details.  */
+#define FUNCTION_BOUNDARY 8
+#define FUNCTION_MODE QImode
+
+#define BITS_BIG_ENDIAN 0
+#define BYTES_BIG_ENDIAN 0
+#define WORDS_BIG_ENDIAN 0
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \
+  if ((MODE) == QImode) \
+    (MODE) = HImode; \
+
+/* mxp_print_operand relies on LAST_SCALAR_REG to be odd.  */
+#define FIRST_PARM_REG VR2_REG
+#define N_PARM_REGS 8
+
+/* In principle, we could describe all the registers in 16-bit granularity,
+   and have a regclass for every lane combination.  Then addsi would have
+   four alternatives, each equally valid.  However, to much choice of
+   equally good, but incompatible reg classes would only confuse gcc.
+   The other extreme would be to have only one lane combination per mode -
+   then it would be easy to tun out of registers.  We use an intermediate
+   approach: for each mode, the lane combination with lane 0 for its lowest
+   bits is the main alternative, and the one with lane 4 for its lowest bits
+   is the fallback alternative.  */
+   
+#define GENERAL_REGS S03V0f_REGS
+#define SCALAR_REGS Sff_REGS
+
+#define HARD_REGNO_MODE_OK(REGNO,MODE) \
+  ((REGNO) >= VFLAGS_REG \
+   ? (GET_MODE_CLASS ((MODE)) == MODE_VECTOR_CC \
+     ? ((GET_MODE_SIZE (MODE) <= 8 && VREG_LANE (REGNO) == 4) \
+	|| !VREG_LANE (REGNO)) \
+     : GET_MODE_CLASS ((MODE)) == MODE_CC) \
+   : (REGNO) == FP_REG || (REGNO) == AP_REG ? (MODE) == HImode \
+   : (REGNO) >= ACC_REG \
+   ? ((GET_MODE_SIZE (MODE) <= 8 && VREG_LANE (REGNO) == 4) \
+      || !VREG_LANE (REGNO)) \
+   : (REGNO) > LAST_SCALAR_REG \
+   ? !(VREG_LANE (REGNO) & ((GET_MODE_SIZE (MODE) - 1) >> 1)) \
+   : (GET_MODE_SIZE (MODE) <= 2 \
+      || (GET_MODE_SIZE (MODE) <= 4 && !((REGNO) & 1))))
+#define HARD_REGNO_NREGS(REGNO, MODE) ((GET_MODE_SIZE (MODE) + 1) >> 1)
+#define CLASS_MAX_NREGS(CLASS, MODE) ((GET_MODE_SIZE (MODE) + 1) >> 1)
+
+#define HARD_FRAME_POINTER_REGNUM 12
+#define FRAME_POINTER_REGNUM FP_REG
+#define ARG_POINTER_REGNUM AP_REG
+
+#define FRAME_POINTER_REQUIRED  0
+
+#define ELIMINABLE_REGS						\
+{								\
+  { HARD_FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+  { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},		\
+  { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+  { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},			\
+  { ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},		\
+}
+
+#define CAN_ELIMINATE(FROM, TO) 1
+
+#define CAN_DEBUG_WITHOUT_FP
+
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
+  OFFSET = mxp_initial_elimination_offset ((FROM), (TO))
+
+#define STARTING_FRAME_OFFSET  0
+
+#define BASE_REG_CLASS SCALAR_REGS
+#define INDEX_REG_CLASS NO_REGS
+#define MAX_REGS_PER_ADDRESS 1
+
+#ifdef REG_OK_STRICT
+#define REG_OK_STRICT_P 1
+#else
+#define REG_OK_STRICT_P 0
+#endif
+
+#define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL) \
+  if (mxp_legitimate_address_p ((MODE), (X), REG_OK_STRICT_P)) goto LABEL
+
+#define GO_IF_MODE_DEPENDENT_ADDRESS(X, LABEL) \
+  if (!mxp_legitimate_address_p (TImode, (X), REG_OK_STRICT_P) \
+      || !mxp_legitimate_address_p (QImode, (X), REG_OK_STRICT_P)) goto LABEL
+
+#define REGNO_OK_FOR_BASE_P(NUM) \
+  ((NUM) <= LAST_SCALAR_REG \
+   || (!REG_OK_STRICT_P && (NUM) >= FIRST_PSEUDO_REGISTER))
+
+#define REGNO_OK_FOR_INDEX_P(NUM) 0
+
+#define PREFERRED_RELOAD_CLASS(X,CLASS) \
+  (GET_CODE (X) == PLUS && CONST_INT_P (XEXP ((X), 1)) \
+   ? ((CLASS) == S01V01_REGS ? S01_REGS \
+      : (CLASS) == S10V10_REGS ? S10_REGS \
+      : (CLASS) == S03V0f_REGS ? S03_REGS \
+      : (CLASS) == S30Vf0_REGS ? S30_REGS \
+      : (CLASS) == SffVff_REGS ? Sff_REGS \
+      : (CLASS)) \
+   : (CLASS))
+
+#define REG_ALLOC_ORDER \
+{ \
+  /* Non-scalar general registers. */ \
+  18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, \
+  50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, \
+  /* Other non-scalar vector registers.  */ \
+  19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, \
+  51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, \
+  /* Scalar registers */ \
+  1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, \
+  0, 8, \
+  /* Accumulator */ \
+  78, 79, \
+}
+
+/* TImode references can address the entire 16 bit data address space using
+   the data base pointer i9 and a 12-bit offset shifted left by 4.  */
+#define CONSTANT_ADDRESS_P(X) 1
+
+#define CUMULATIVE_ARGS int
+#define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,INDIRECT,N_NAMED_ARGS) \
+  ((CUM) = 0)
+#define FUNCTION_ARG_ADVANCE(CUM, MODE, TYPE, NAMED) \
+  ((CUM)++)
+
+#define TRAMPOLINE_SIZE 0
+#define INITIALIZE_TRAMPOLINE(ADDR, FNADDR, STATIC_CHAIN) \
+  sorry ("Trampolines are not implemented.\n");
+/* Trampolines are not supported, yet we have to provide a definition for
+   the static chain to avoid compiler crashes.  */
+#define STATIC_CHAIN gen_rtx_MEM (HImode, GEN_INT (0))
+
+#define FUNCTION_PROFILER(FILE, LABELNO) sorry ("Profiling not supported.\n");
+
+#define MOVE_MAX 16
+
+#define SLOW_BYTE_ACCESS 1
+
+#define SHIFT_COUNT_TRUNCATED 1
+
+#define TRULY_NOOP_TRUNCATION(OUTPREC, INPREC) 1
+
+#define LEGITIMATE_CONSTANT_P(X) (GET_MODE_SIZE (GET_MODE (X)) <= 4)
+
+#define FUNCTION_ARG(CUM, MODE, TYPE, NAMED) \
+  ((CUM) < N_PARM_REGS \
+   ? gen_rtx_REG ((MODE), FIRST_PARM_REG + REGS_PER_VREG * (CUM)) : 0)
+
+#define FUNCTION_ARG_REGNO_P(N) \
+  ((N) >= FIRST_PARM_REG && (N) <= FIRST_PARM_REG + REGS_PER_VREG * N_PARM_REGS)
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+#define FUNCTION_VALUE_REGNO_P(N) ((N) == FIRST_PARM_REG)
+
+#define LIBCALL_VALUE(MODE) gen_rtx_REG ((MODE), FIRST_PARM_REG)
+/* Like LIBCALL_VALUE, except that we must
+   change the mode like PROMOTE_MODE does.  */
+#define FUNCTION_VALUE(VALTYPE, FUNC)					\
+  gen_rtx_REG (								\
+	   ((GET_MODE_CLASS (TYPE_MODE (VALTYPE)) == MODE_INT		\
+	     && GET_MODE_SIZE (TYPE_MODE (VALTYPE)) < 2			\
+	     && (TREE_CODE (VALTYPE) == INTEGER_TYPE			\
+		 || TREE_CODE (VALTYPE) == ENUMERAL_TYPE		\
+		 || TREE_CODE (VALTYPE) == BOOLEAN_TYPE			\
+		 || TREE_CODE (VALTYPE) == OFFSET_TYPE))		\
+	    ? HImode : TYPE_MODE (VALTYPE)),				\
+	   FIRST_PARM_REG)
+
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+#define RETURN_POPS_ARGS(DECL, FUNTYPE, SIZE) 0
+
+#define INIT_EXPANDERS (init_machine_status = mxp_init_machine_status)
+
+#define ASM_APP_ON "#APP\n"
+#define ASM_APP_OFF "#NO_APP\n"
+
+#define ASM_OUTPUT_ALIGN(FILE,LOG)	\
+  if ((LOG) != 0)			\
+    fprintf ((FILE), "\t.balign %d\n", 1 << (LOG))
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.global\t"
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+#define DATA_SECTION_ASM_OP "\t.data"
+
+#define PRINT_OPERAND(STREAM, X, CODE) \
+  mxp_print_operand ((STREAM), (X), (CODE))
+
+#define PRINT_OPERAND_PUNCT_VALID_P(CHAR) \
+  ((CHAR) == '#')
+
+#define PRINT_OPERAND_ADDRESS(FILE, ADDR) gcc_unreachable ()
+
+#define MODES_TIEABLE_P(MODE1, MODE2) \
+  (MODE1 != Pmode && MODE2 != Pmode \
+   && GET_MODE_SIZE (MODE1) <= UNITS_PER_WORD \
+   && GET_MODE_SIZE (MODE2) <= UNITS_PER_WORD)
+
+#define TARGET_CPU_CPP_BUILTINS() do	\
+{					\
+  builtin_define ("__mxp__");		\
+  builtin_define ("__LITTLE_ENDIAN__");	\
+} while (0)
+
+#define MEMORY_MOVE_COST(M,C,I) mxp_memory_move_cost ((M), (C), (I))
+
+#define MODE_NLANES(MODE) ((GET_MODE_SIZE (MODE) + 1) >> 1)
+#define VREG_LANE_MASK(REG) \
+(((1 << MODE_NLANES (GET_MODE (REG))) - 1) << VREG_LANE (REGNO ((REG))))
+
+/* ??? We add accumulator clobbers for cross-lane moves in peep2 where
+   possible.  Without the accumulator clobbers, cross-lane moves are more
+   expensive.  Therefore, we don't want regrename to change lanes of move
+   instruction without accumulator clobbers.  Unfortunately, regrename cannot
+   be controlled in a way that takes the instructions involved into account;
+   all we can do is prevent a lane change.  */
+#define HARD_REGNO_RENAME_OK(REG,NEW_REG) \
+  (VREG_LANE (REG) == VREG_LANE (NEW_REG))
+
+/* This is how to output a reference to a symbol_ref / label_ref as
+   (part of) an operand.  To disambiguate from register names like
+   a1 / a2 / status etc, symbols are preceded by '@'.  */
+#define ASM_OUTPUT_SYMBOL_REF(FILE,SYM) \
+  ASM_OUTPUT_LABEL_REF ((FILE), XSTR ((SYM), 0))
+#define ASM_OUTPUT_LABEL_REF(FILE,STR)                  \
+  do                                                    \
+    {                                                   \
+      fputc ('@', file);                                \
+      assemble_name ((FILE), (STR));                    \
+    }                                                   \
+  while (0)
+
+extern rtx mxp_compare_op0, mxp_compare_op1;
+extern unsigned char class_vec_lanes[], class_scalar_vec_lanes[];
+extern struct simple_bitmap_def *mxp_acc_classes;
diff --git a/gcc/config/mxp/mxp.md b/gcc/config/mxp/mxp.md
new file mode 100644
index 00000000000..5f2a04fda85
--- /dev/null
+++ b/gcc/config/mxp/mxp.md
@@ -0,0 +1,910 @@
+;; GCC machine description for ARC mxp.
+;; Copyright (C) 2008 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(include ("mxp-regset.md"))
+(include ("predicates.md"))
+(include ("constraints.md"))
+
+(define_attr "type" "jump,load,store,other" (const_string "other"))
+
+;; instruction lengths are in words.  Actual machine instructions are always
+;; one word, but some patterns here might emit multiple machine instructions.
+(define_attr "length" "" (const_int 1))
+
+;; The mxp allows anything in a delay slot - even jumps.
+;; However, gcc's delay slot scheduling pass gets confused if we allow this.
+;; Also, we lack a mechanism to fill more than one delay slot with one insn
+;; pattern.
+(define_attr "in_delay_slot" "no,yes"
+  (cond [(eq_attr "type" "jump") (const_string "no")
+	 (eq_attr "length" "1") (const_string "yes")] (const_string "no")))
+
+(define_delay
+  (eq_attr "type" "jump")
+  [(eq_attr "in_delay_slot" "yes") (nil) (nil)
+   (eq_attr "in_delay_slot" "yes") (nil) (nil)
+   (eq_attr "in_delay_slot" "yes") (nil) (nil)])
+	      
+;; moves and logical operations can be performed on 128 bit vectors and
+;; parts of them in 16-bit granularity.
+(define_mode_iterator VECI [HI SI DI TI V2HI V4HI V8HI V2SI V4SI V2DI])
+
+(define_mode_iterator VECX
+  [QI HI CCI CCZN CCZ SI DI TI V2HI V2CCI V2CCZN V2CCZ V4HI V4CCI V4CCZN V4CCZ
+   V8HI V8CCI V8CCZN V8CCZ V2SI V4SI V2DI SF V2SF DF V4SF V2DF])
+
+;; Like VECI, but with size 32 bit (like SImode)
+(define_mode_iterator VECSI [SI V2HI])
+
+;; Like VECI, but with minimum size 32 bit (like SImode)
+(define_mode_iterator VECMSI [SI DI TI V2HI V4HI V8HI V2SI V4SI V2DI])
+
+;; Like VECI, but with minimum size 64 bit (like DImode)
+(define_mode_iterator VECMDI [DI TI V4HI V8HI V2SI V4SI V2DI])
+
+; like VECX, but with size 64 bit (line DImode)
+(define_mode_iterator VECDX [V4HI V4CCI V4CCZN V4CCZ V2SI V2CC V2SF DI DF])
+(define_mode_attr HALF_MODE [(V4HI "V2HI") (V4CCI "V2CCI") (V4CCZN "V2CCZN")
+			     (V4CCZ "V2CCZ") (V2SI "SI") (V2CC "CC")
+			     (V2SF "SF") (DI "SI") (DF "SI")])
+
+;; single-instruction patterns for arithmetic like add/sub is
+;; limited to 16 and 32 bit per data item.
+(define_mode_iterator VECA [HI SI V2HI V4HI V8HI V2SI V4SI])
+
+;; Like VECA, but exclude HI / SI
+(define_mode_iterator VECVA [V2HI V4HI V8HI V2SI V4SI])
+
+(define_mode_iterator VXHI [HI V2HI V4HI V8HI])
+(define_mode_iterator VXSI [SI V2SI V4SI])
+(define_mode_iterator VXDI [DI V2DI])
+
+(define_mode_iterator HSI [HI SI])
+
+(define_mode_iterator CCX [CC])
+
+(define_mode_attr unit_suffix [(HI "w") (V2HI "w") (V4HI "w") (V8HI "w")
+			       (SI "") (V2SI "") (V4SI "")])
+(define_mode_attr size_suffix
+  [(QI "w") (HI "w") (CCI "w") (CCZN "w") (CCZ "w")
+   (V2HI "") (V2CCI "") (V2CCZN "") (V2CCZ "") (SI "") (SF "")])
+
+(define_mode_attr v0
+  [(QI "R01Z") (HI "R01Z") (CCI "fv01Z") (CCZN "fv01Z") (CCZ "fv01Z")
+   (V2HI "R03Z") (V2CCI "fv03Z") (V2CCZN "fv03Z") (V2CCZ "fv03Z")
+   (SI "R03Z") (SF "R03Z")
+   (V4HI "R0fZ") (V4CCI "fv0fZ") (V4CCZN "fv0fZ") (V4CCZ "fv0fZ")
+   (V2SI "R0fZ") (DI "R0fZ") (V2SF "R0fZ") (DF "R0fZ")
+   (V8HI "vZ") (V8CCI "fvffZ") (V8CCZN "fvffZ") (V8CCZ "fvffZ")
+   (V4SI "vZ") (V2DI "vZ") (TI "vZ") (V4SF "vZ") (V2DF "vZ")])
+
+(define_mode_attr v4
+  [(QI "R10Z") (HI "R10Z") (CCI "fv10Z") (CCZN "fv10Z") (CCZ "fv10Z")
+   (V2HI "R30Z") (V2CCI "fv30Z") (V2CCZN "fv30Z") (V2CCZ "fv30Z")
+   (SI "R30Z") (SF "R30Z")
+   (V4HI "Rf0Z") (V4CCI "fvf0Z") (V4CCZN "fvf0Z") (V4CCZ "fvf0Z")
+   (V2SI "Rf0Z") (DI "Rf0Z") (V2SF "Rf0Z") (DF "Rf0Z")
+   (V8HI "vZ") (V8CCI "fvffZ") (V8CCZN "fvffZ") (V8CCZ "fvffZ")
+   (V4SI "vZ") (V2DI "vZ") (TI "vZ") (V4SF "vZ") (V2DF "vZ")])
+
+(define_mode_attr vn
+  [(QI "vZ") (HI "vZ") (CCI "fvffZ") (CCZN "fvffZ") (CCZ "fvffZ")
+   (V2HI "vZ") (V2CCI "fvffZ") (V2CCZN "fvffZ") (V2CCZ "fvffZ")
+   (SI "vZ") (SF "vZ")
+   (V4HI "vZ") (V4CCI "fvffZ") (V4CCZN "fvffZ") (V4CCZ "fvffZ")
+   (V2SI "vZ") (DI "vZ") (V2SF "vZ") (DF "vZ") (V2CC "vZ")
+   (V8HI "vZ") (V8CCI "fvffZ") (V8CCZN "fvffZ") (V8CCZ "fvffZ")
+   (V4SI "vZ") (V2DI "vZ") (TI "vZ") (V4SF "vZ") (V2DF "vZ") (V4CC "vZ")])
+
+(define_mode_attr vx
+  [(HI "R01Z,R10Z") (V2HI "R03Z,R30Z") (SI "R03Z,R30Z") (SF "R03,R30")
+   (V4HI "R0f,Rf0") (V2SI "R0f,Rf0") (DI "R0f,Rf0") (V2SF "R0f,Rf0")
+   (DF "R0f,Rf0")
+   (V8HI "v") (V4SI "v") (V2DI "v") (TI "v") (V4SF "v") (V2DF "v")])
+
+(define_mode_attr vxp
+  [(HI "R01Z,R10Z") (V2HI "R03Z,R30Z") (SI "R03Z,R30Z") (SF "R03,R30")
+   (V4HI "R0f,Rf0") (V2SI "R0f,Rf0") (DI "R0f,Rf0?") (V2SF "R0f,Rf0")
+   (DF "R0f,Rf0")
+   (V8HI "v") (V4SI "v") (V2DI "v") (TI "v") (V4SF "v") (V2DF "v")])
+
+(define_mode_attr a0
+  [(QI "Ral") (HI "Ral") (CCI "Ral") (CCZN "Ral") (CCZ "Ral")
+   (V2HI "Ral") (V2CCI "Ral") (V2CCZN "Ral") (V2CCZ "Ral") (SI "Ral") (SF "Ral")
+   (V4HI "Ral") (V4CCI "Ral") (V4CCZN "Ral") (V4CCZ "Ral") (V2SI "Ral")
+   (DI "Ral") (V2SF "Ral") (DF "Ral")
+   (V8HI "Rac") (V8CCI "Rac") (V8CCZN "Rac") (V8CCZ "Rac") (V4SI "Rac")
+   (V2DI "Rac") (TI "Rac") (V4SF "Rac") (V2DF "Rac")])
+
+(define_mode_attr a4
+  [(QI "Rah") (HI "Rah") (CCI "Rah") (CCZN "Rah") (CCZ "Rah")
+   (V2HI "Rah") (V2CCI "Rah") (V2CCZN "Rah") (V2CCZ "Rah") (SI "Rah") (SF "Rah")
+   (V4HI "Rah") (V4CCI "Rah") (V4CCZN "Rah") (V4CCZ "Rah") (V2SI "Rah")
+   (DI "Rah") (V2SF "Rah") (DF "Rah")
+   (V8HI "Rac") (V8CCI "Rac") (V8CCZN "Rac") (V8CCZ "Rac") (V4SI "Rac")
+   (V2DI "Rac") (TI "Rac") (V4SF "Rac") (V2DF "Rac")])
+
+(define_mode_attr ax
+  [(HI "Ral,Rah") (V2HI "Ral,Rah") (SI "Ral,Rah") (SF "Ral,Rah")
+   (V4HI "Ral,Rah") (V2SI "Ral,Rah") (DI "Ral,Rah") (V2SF "Ral,Rah")
+   (DF "Ral,Rah")
+   (V8HI "Rac") (V4SI "Rac") (V2DI "Rac") (TI "Rac") (V4SF "Rac") (V2DF "Rac")])
+
+(define_mode_attr fx
+  [(DI "Rfl,Rfh") (V2DI "Rfg")])
+
+(define_mode_attr c4
+  [(HI ",") (V2HI ",") (SI ",") (SF ",")
+   (V4HI ",") (V2SI ",") (DI ",") (V2SF ",") (DF ",")
+   (V8HI "") (V4SI "") (V2DI "") (TI "") (V4SF "") (V2DF "")])
+
+(define_mode_attr I16v
+  [(HI "i") (CCI "i") (CCZN "i") (CCZ "i")
+   (V2HI "J2r16") (V2CCI "J2r16") (V2CCZN "J2r16") (V2CCZ "J2r16") (SI "J2r16")
+   (V4HI "J4r16") (V4CCI "J4r16") (V4CCZN "J4r16") (V4CCZ "J4r16")
+   (V2SI "J4r16") (DI "J4r16")
+   (V8HI "J8r16") (V8CCI "J8r16") (V8CCZN "J8r16") (V8CCZ "J8r16")
+   (V4SI "J8r16") (V2DI "J8r16") (TI "J8r16")
+   (SF "F") (V2SF "G2r") (DF "G2r") (V4SF "G4r") (V2DF "G4r")])
+
+(define_mode_attr I32v
+  [(QI "i") (HI "i") (CCI "i") (CCZN "i") (CCZ "i")
+   (V2HI "i") (V2CCI "i") (V2CCZN "i") (V2CCZ "i") (SI "i")
+   (V4HI "J2r32") (V4CCI "J2r32") (V4CCZN "J2r32") (V4CCZ "J2r32")
+   (V2SI "J2r32") (DI "J2r32")
+   (V8HI "J4r32") (V8CCI "J4r32") (V8CCZN "J4r32") (V8CCZ "J4r32")
+   (V4SI "J4r32") (V2DI "J4r32") (TI "J4r32")
+   (SF "F") (V2SF "G2r") (DF "G2r") (V4SF "G4r") (V2DF "G4r")])
+
+(define_mode_attr I32m
+  [(QI "i") (HI "i") (CCI "i") (CCZN "i") (CCZ "i")
+   (V2HI "i") (V2CCI "i") (V2CCZN "i") (V2CCZ "i") (SI "i")
+   (V4HI "i") (V4CCI "i") (V4CCZN "i") (V4CCZ "i")
+   (V2SI "i") (DI "i")
+   (V8HI "J4r32") (V8CCI "J4r32") (V8CCZN "J4r32") (V8CCZ "J4r32")
+   (V4SI "J4r32") (V2DI "J4r32") (TI "J4r32")
+   (SF "F") (V2SF "G2r") (DF "G2r") (V4SF "G4r") (V2DF "G4r")])
+
+(define_mode_attr VXCC [(DI "V2CC") (V2DI "V4CC")])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VECX 0 "nonimmediate_operand" "")
+	(match_operand:VECX 1      "general_operand" ""))]
+  ""
+  "
+{
+  if (!register_operand (operands[0], <MODE>mode)
+      && !(reg_or_0_operand (operands[1], <MODE>mode)
+	   || (GET_MODE_SIZE (<MODE>mode <= 4
+	       ? CONSTANT_P (operands[1])
+	       : satisfies_constraint_I32 (operands[1])))))
+    operands[1] = copy_to_mode_reg (<MODE>mode, operands[1]);
+}")
+
+(define_insn "*mov<mode>_i"
+  [(set (match_operand:VECX 0 "nonimmediate_operand"
+	 "=<vn>,Rac,<v0>,<v4>,<vn>,<vn>,m,m")
+	(match_operand:VECX 1 "general_operand"
+	 "<vn>,<vn>,<a0>,<a4>,<I32m>,m,Z,<vn>"))]
+  "register_operand (operands[0], <MODE>mode)
+   || reg_or_0_operand (operands[1], <MODE>mode)"
+  "*
+{
+  switch (which_alternative)
+    {
+    case 0: case 1:
+      {
+	int src, dst, srclane, dstlane;
+	rtx xop[3];
+
+	dst = REGNO (operands[0]);
+	dstlane = dst <= LAST_SCALAR_REG ? dst & 7 : VREG_LANE (dst);
+	if (REG_P (operands[1]))
+	  {
+	    src = REGNO (operands[1]);
+	    srclane = src <= LAST_SCALAR_REG ? src & 7 : VREG_LANE (src);
+	  }
+	else /* const0_rtx */
+	  srclane = dstlane;
+	if (srclane == dstlane)
+	  return (dst >= ACC_REG && dst < ACC_REG + REGS_PER_VREG
+		  ? \"vaddw.%L0, %v0,%v1,vr62\"
+		  : \"vmvw.%L0 %v0,%v1\" /* Acc unchanged.  */);
+	if (dst <= LAST_SCALAR_REG)
+	  return (GET_MODE_SIZE (<MODE>mode) <= 2
+		  ? \"viv.%L1 %0,%v1\"
+		  : \"vkv.%L1 %0,%v1\");  /* Acc unchanged.  */
+	if (dst >= ACC_REG && dst < ACC_REG + REGS_PER_VREG)
+	  {
+	    if (src <= LAST_SCALAR_REG)
+	      return \"vmiv<size_suffix>.%L0 %v0,%1\";
+	    /* vmr has higher latency.  */
+	    xop[0] = operands[0];
+	    xop[1] = operands[1];
+	    xop[2] = GEN_INT ((srclane - dstlane) & 7);
+	    output_asm_insn (\"vmr%2w.%L0 %v0,%v1,%v1\", xop);
+	    return \"\";
+	  }
+	/* Non-scalar cross-lane moves have higher latency.  */
+	if (GET_MODE_SIZE (<MODE>mode) <= 4)
+	  return \"vxsum<size_suffix>i.%L0 %v0,%v1,%L1\";
+	if ((src ^ dst) & ~(REGS_PER_VREG - 1))
+	  {
+	    if (GET_MODE_SIZE (<MODE>mode) == 8 && (srclane ^ dstlane) == 4)
+	      return ((dstlane & 4)
+		      ? \"vexch4.%L0 %v0,%v1\" : \"vexch4.%L0 %v1,%v0\");
+	  }
+	if (GET_MODE_SIZE (<MODE>mode) == 8)
+	  return \"vxsumi.%O0 %v0,%v1,%O1` vxsumi.%P0 %v0,%v1,%P1\";
+	gcc_unreachable ();
+      }
+    case 2: case 3:
+      return \"vaddaw.%L0 %v0,vr62,vr62\";
+    case 4:
+      if (GET_MODE_SIZE (<MODE>mode) == 8
+	  && !satisfies_constraint_J2r32 (operands[1]))
+	return \"vmov.%O0 %v0,%Q1` vmov.%P0 %v0,%R1\";
+      if (GET_MODE_SIZE (<MODE>mode) == 2)
+	return \"vmovw %v0,%1,%L0\";
+      return \"vmov.%L0 %v0,%1\"; /* Acc unchanged.  */
+    case 5:
+      if (TARGET_NO_VLD_LABEL && ! REG_P (XEXP (operands[1], 0)))
+	return  \"vld%N0 %v0,[i9,0]; ??? SHOULD BE: %1\";
+      return \"vld%N0 %v0,%1\";
+    case 6:
+      {
+	rtx xop[3];
+	xop[0] = operands[0];
+	xop[1] = operands[1];
+	xop[2] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+	output_asm_insn (\"vst%2 %v1,%0\", xop);
+	return \"\";
+      }
+    case 7:
+      return \"vst%M1 %v1,%0\";
+    default: gcc_unreachable ();
+    }
+}"
+  [(set_attr "type" "*,*,*,*,*,load,store,store")])
+
+;; There is no v{ld,st}64_4 .  Split into two 32 bit moves.
+(define_split 
+  [(set (match_operand:VECDX 0 "nonimmediate_operand")
+	(match_operand:VECDX 1 "nonimmediate_operand"))]
+  "reload_completed
+   && ((memory_operand (operands[0], <MODE>mode)
+	&& VREG_LANE (REGNO (operands[1])))
+       || (memory_operand (operands[1], <MODE>mode)
+	   && VREG_LANE (REGNO (operands[0]))))"
+  [(set (match_dup 2) (match_dup 3))
+   (set (match_dup 4) (match_dup 5))]
+  "
+{
+  enum machine_mode submode = <HALF_MODE>mode;
+
+  operands[2] = simplify_gen_subreg (submode, operands[0], <MODE>mode, 0);
+  operands[3] = simplify_gen_subreg (submode, operands[1], <MODE>mode, 0);
+  operands[4] = simplify_gen_subreg (submode, operands[0], <MODE>mode, 4);
+  operands[5] = simplify_gen_subreg (submode, operands[1], <MODE>mode, 4);
+}")
+
+; vmiv has lower latency than vxsumi, but it clobbers the accumulator.
+; vmr is faster than two vxsumi, but it clobbers the accumulator.
+(define_peephole2
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "register_operand" ""))]
+  "REGNO (operands[0]) > LAST_SCALAR_REG
+   && VREG_LANE (REGNO (operands[0])) != VREG_LANE (REGNO (operands[1]))
+   && (REGNO (operands[1]) <= LAST_SCALAR_REG
+       || GET_MODE_SIZE (GET_MODE (operands[0])) == 8)
+   && (peep2_regno_mode_dead_p
+	(1,
+	 ACC_REG + VREG_LANE (REGNO (operands[0])) / LANES_PER_REG,
+	 GET_MODE (operands[0])))"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (clobber (match_dup 2))])]
+  "
+{
+  operands[2]
+    = gen_rtx_REG (GET_MODE (operands[0]),
+		   ACC_REG + VREG_LANE (REGNO (operands[0])) / LANES_PER_REG);
+}")
+
+;; post-reload pattern for cross-lane moves.
+;; regrename can change a cross-lane move into an in-lane move.
+;; We could prohibit this, but that would then reduce the effectiveness of
+;; regrename.  So rather accept in-lane moves, and split away the clobber.
+(define_insn_and_split "*mov<mode>_clob"
+  [(set (match_operand:VECX 0 "register_operand" "=<vn>")
+	(match_operand:VECX 1 "register_operand" "<vn>"))
+   (clobber (match_operand 2 "register_operand" "=Rac"))]
+  "reload_completed
+   && (VREG_LANE_MASK (operands[0]) & ~VREG_LANE_MASK (operands[2])) == 0"
+  "*
+{
+  int src = REGNO (operands[0]), dst = REGNO (operands[1]);
+  int srclane = src <= LAST_SCALAR_REG ? src & 7 : VREG_LANE (src);
+  int dstlane = dst <= LAST_SCALAR_REG ? dst & 7 : VREG_LANE (dst);
+  rtx xop[3];
+  if (src <= LAST_SCALAR_REG)
+    return \"vmiv<size_suffix>.%L0 %v0,%1\";
+  /* vmr has higher latency.  */
+  xop[0] = operands[0];
+  xop[1] = operands[1];
+  xop[2] = GEN_INT ((srclane - dstlane) & 7);
+  output_asm_insn (\"vmr%2w.%L0 %v0,%v1,%v1\", xop);
+  return \"\";
+}"
+  "&& VREG_LANE (REGNO (operands[0])) == VREG_LANE (REGNO (operands[1]))"
+  [(set (match_dup 0) (match_dup 1))]
+  "")
+
+(define_insn "andhi3"
+  [(set (match_operand:HI 0 "register_operand" "=R01,R10,S_n")
+	(and:HI (match_operand:HI 1 "register_operand" "%R01,R10,S_n")
+		(match_operand:HI 2 "nonmemory_operand" "R01,R10,i")))
+   (clobber (match_scratch:HI 3 "=Ral,Rah,X"))]
+  ""
+  "@
+   vand.%L0 %v0,%v1,%v2
+   vand.%L0 %v0,%v1,%v2
+   via %0,%1,%2")
+
+(define_insn "and<mode>3"
+  [(set (match_operand:VECMSI 0 "register_operand" "=<vx>")
+	(and:VECMSI (match_operand:VECMSI 1 "register_operand" "%<vx>")
+		    (match_operand:VECMSI 2 "register_operand" "<vx>")))
+   (clobber (match_scratch:VECMSI 3 "=<ax>"))]
+  ""
+  "vand.%L0 %v0,%v1,%v2")
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VECI 0 "register_operand" "=<vx>")
+	(ior:VECI (match_operand:VECI 1 "register_operand" "%<vx>")
+		  (match_operand:VECI 2 "register_operand" "<vx>")))
+   (clobber (match_scratch:VECI 3 "=<ax>"))]
+  ""
+  "vor.%L0 %v0,%v1,%v2")
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VECI 0 "register_operand" "=<vx>")
+	(xor:VECI (match_operand:VECI 1 "register_operand" "%<vx>")
+		  (match_operand:VECI 2 "register_operand" "<vx>")))
+   (clobber (match_scratch:VECI 3 "=<ax>"))]
+  ""
+  "vxor.%L0 %v0,%v1,%v2")
+
+(define_insn "addhi3"
+  [(set (match_operand:HI 0 "register_operand" "=R01,R10,S_n")
+	(plus:HI (match_operand:HI 1 "register_operand" "%R01,R10,S_n")
+		 (match_operand:HI 2 "nonmemory_operand" "R01,R10,i")))]
+  ""
+;; This is how we want to do it when binutils is fixed to handle
+;; relocations for vim
+;;  "@
+;;   vaddnaw.%L0 %v0,%v1,%v2
+;;   vaddnaw.%L0 %v0,%v1,%v2
+;;   vim %0,%1,%i2")
+{
+  switch (which_alternative)
+    {
+    case 0: return "vaddnaw.%L0 %v0,%v1,%v2";
+    case 1: return "vaddnaw.%L0 %v0,%v1,%v2";
+    case 2: if (TARGET_NO_VIM_LABEL && !CONST_INT_P (operands[2]))
+	{
+	  rtx xop[4];
+
+	  xop[0] = operands[0];
+	  xop[1] = operands[1];
+	  xop[2] = operands[2];
+	  xop[3] = gen_rtx_REG (HImode, true_regnum (operands[0]) ^ 8);
+	  output_asm_insn ("vim %0,%1,0` "
+			   "vpushiw vr31,%3` "
+			   "vmovw %v3,%2,%L0` "
+			   "vaddnaw.%L0 %v0,%v0,%v3` "
+			   "vpopw %3,vr31,pcl",
+			   xop);
+	  return "";
+	}
+      else
+	return "vim %0,%1,%i2";
+    }
+})
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=R03,R30,S_n,R03,R30")
+	(plus:SI (match_operand:SI 1 "register_operand" "%R03,R30,S_n,R03,R30")
+		 (match_operand:SI 2 "nonmemory_operand""R03,R30,I16,S_n,S_n")))
+   (clobber (match_scratch:SI 3 "=X,X,X,Ral,Rah"))]
+  ""
+  "@
+   vaddnaw.%L0 %v0,%v1,%v2
+   vaddnaw.%L0 %v0,%v1,%v2
+   vkm %0,%1,%2
+   vkadd %v0,%v1,%2
+   vkadd %v0,%v1,%2")
+
+(define_insn "add<mode>3"
+  [(set (match_operand:VECVA 0 "register_operand" "=<vx>")
+	(plus:VECVA (match_operand:VECVA 1 "register_operand" "%<vx>")
+		    (match_operand:VECVA 2 "register_operand" "<vx>")))]
+  ""
+  "vaddna<unit_suffix>.%L0 %v0,%v1,%v2")
+
+;; ??? This should be a define_insn_and_split.
+(define_insn "add<mode>3"
+  [(set (match_operand:VXDI 0 "register_operand" "=<vxp>")
+	(plus:VXDI (match_operand:VXDI 1 "register_operand" "%<vx>")
+		   (match_operand:VXDI 2 "register_operand" "<vx>")))
+   (clobber (match_scratch:<VXCC> 3 "=<fx>"))
+   (clobber (match_scratch:VXDI 4 "=<ax>"))]
+  ""
+  "vaddna.%L0.f %v0,%v1,%v2` vmr6w.%P0 vr63,vr63,vr63` vadc.%P0 %v0,%v0,vr62")
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:VECA 0 "register_operand" "=<vx>")
+	(minus:VECA (match_operand:VECA 1 "register_operand" "<vx>")
+		     (match_operand:VECA 2 "register_operand" "<vx>")))]
+  ""
+  "vsubna<unit_suffix>.%L0 %v0,%v1,%v2")
+
+;; ??? This should be a define_insn_and_split.
+(define_insn "sub<mode>3"
+  [(set (match_operand:VXDI 0 "register_operand" "=<vxp>")
+	(minus:VXDI (match_operand:VXDI 1 "register_operand" "<vx>")
+		    (match_operand:VXDI 2 "register_operand" "<vx>")))
+   (clobber (match_scratch:<VXCC> 3 "=<fx>"))
+   (clobber (match_scratch:VXDI 4 "=<ax>"))]
+  ""
+  "vsubna.%L0.f %v0,%v1,%v2` vmr6w.%P0 vr63,vr63,vr63` vsbc.%P0 %v0,%v0,vr62")
+
+(define_insn "ashl<mode>3"
+  [(set (match_operand:VECA 0 "register_operand" "=<vx>")
+        (ashift:VECA (match_operand:VECA 1 "register_operand" "<vx>")
+		     (match_operand:VECA 2 "register_operand" "<vx>")))
+   (clobber (match_scratch:VECA 3 "=&<ax>"))]
+  ""
+  "vlslv<unit_suffix>.%L0 %v0,%v1,%v2")
+
+(define_insn "lshr<mode>3"
+  [(set (match_operand:VECA 0 "register_operand" "=<vx>")
+        (lshiftrt:VECA (match_operand:VECA 1 "register_operand" "<vx>")
+		       (match_operand:VECA 2 "register_operand" "<vx>")))
+   (clobber (match_scratch:VECA 3 "=&<ax>"))]
+  ""
+  "vlsrv<unit_suffix>.%L0 %v0,%v1,%v2")
+
+(define_insn "ashr<mode>3"
+  [(set (match_operand:VECA 0 "register_operand" "=<vx>")
+        (ashiftrt:VECA (match_operand:VECA 1 "register_operand" "<vx>")
+		       (match_operand:VECA 2 "register_operand" "<vx>")))
+   (clobber (match_scratch:VECA 3 "=&<ax>"))]
+  ""
+  "vasrv<unit_suffix>.%L0 %v0,%v1,%v2")
+
+;; ??? vblsr also allows scalar registers
+;; ??? should show vblsr / vasr also as alterantives for
+;; lshr<mode>3 / ashr<mode>3 with appropriately restricted constant range.
+;; (using vec_duplicate for operand predicate with scalar register).
+(define_insn "vec_shr_<mode>"
+  [(set (match_operand:VECA 0 "register_operand" "=<vx>")
+        (lshiftrt:VECA (match_operand:VECA 1 "register_operand" "<vx>")
+		       (match_operand:HI 2 "immediate_operand" "i<c4>i")))
+   (clobber (match_scratch:VECA 3 "=&<ax>"))]
+  ""
+  "vblsr<unit_suffix>.%L0 %v0,%v1,%2")
+
+(define_insn "ashr<mode>3_scalar"
+  [(set (match_operand:VECA 0 "register_operand" "=<vx>")
+        (ashiftrt:VECA (match_operand:VECA 1 "register_operand" "<vx>")
+		       (match_operand:HI 2 "nonmemory_operand" "S_ni<c4>S_ni")))
+   (clobber (match_scratch:VECA 3 "=&<ax>"))]
+  ""
+  "vasr<unit_suffix>.%L0 %v0,%v1,%2")
+
+(define_expand "mul<mode>3"
+  [(parallel [(set (match_operand:VECA 0 "register_operand" "")
+		   (mult:VECA (match_operand:VECA 1 "register_operand")
+			      (match_operand:VECA 2 "nonmemory_operand")))
+	      (clobber (scratch:VECA))])]
+  ""
+  "
+{
+  if (GET_MODE_UNIT_SIZE (<MODE>mode) > 16)
+    operands[2] = force_reg (<MODE>mode, operands[2]);
+}")
+
+(define_insn "*mul<mode>_i"
+  [(set (match_operand:VXHI 0 "register_operand" "=<v0>,<v4>,<v0>,<v4>")
+	(mult:VXHI (match_operand:VXHI 1 "register_operand"
+		    "%<v0>,<v4>,<v0>,<v4>")
+		   (match_operand:VXHI 2 "nonmemory_operand"
+		    "<v0>,<v4>,S_n<I16v>,S_n<I16v>")))
+   (clobber (match_scratch:VXHI 3 "=&<a0>,&<a4>,&<a0>,&<a4>"))]
+  ""
+  "@
+   vmulw.%L0 %v0,%v1,%v2
+   vmulw.%L0 %v0,%v1,%v2
+   vmulw.%L0 %v0,%v1,%2
+   vmulw.%L0 %v0,%v1,%2")
+
+;; ??? should be a define_insn_and_split
+(define_insn "*mul<mode>_i"
+  [(set (match_operand:VXSI 0 "register_operand" "=&<v0>,&<v4>")
+	(mult:VXSI (match_operand:VXSI 1 "register_operand" "%<v0>0,<v4>0")
+		   (match_operand:VXSI 2 "nonmemory_operand"
+		    "<v0>I32v,<v4>I32v")))
+   (clobber (match_scratch:VXSI 3 "=&<a0>,&<a4>"))]
+  ""
+  "*
+{
+  output_asm_insn (register_operand (operands[2], <MODE>mode)
+		   ? \"vmov.%L0 %v0,0x8000` vaddna.%L0 %v0,%v0,%v2` vmvw.%O0 %v0,vr62` vmule.%L0 vr62,%v1,%v2\"
+		   : \"vmov.%L0 %v0,%2 + 0x8000 & 0xffff0000` vbmule.%L0 vr62,%v1,%2\",
+		   operands);
+  return \"vmulae.%L0 %v0,%v0,%v1\";
+}"
+  [(set_attr "length" "3")])
+
+;; ??? should be a define_insn_and_split
+;; ??? FIXME: check effect of sign extension in vmul*
+(define_insn "muldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(mult:DI (match_operand:DI 1 "register_operand" "%r,r,v")
+		   (match_operand:DI 2 "register_operand" "r,v,v")))
+   (clobber (match_scratch:TI 3 "=&v,2,2"))
+   (clobber (match_scratch:TI 4 "=&v,&v,&v"))
+   (clobber (match_scratch:TI 5 "=&v,&v,1"))
+   (clobber (match_scratch:TI 6 "=&Rac,&Rac,&Rac"))]
+  ""
+  "vup %3,%2
+   vmrgw %4,%1,%1
+   
+   vmule.3 %0,%3,%1 ; a0b0
+   vswp %5,%3
+   
+   vmr5w.16 %3,%0,%0
+   vmule.15 %3,%5,%4 ; b0a1|a1b0
+   vmr4w %5,%5,%5
+   vmr7w %4,%4,vr62
+   vxsumi.192 %3,%3,21
+   
+   
+   vblsr vr62,%3,16
+   vmulae %5,%4,%5
+   
+   
+   vxsumwi.2 %0,%3,21
+   vxsumi.12 %0,%5,255"
+  [(set_attr "length" "13")])
+
+;; ??? This should be a define_insn_and_split
+(define_insn "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=R0f,Rf0")
+	(sign_extend:DI (match_operand:SI 1 "register_operand" "0,0")))
+   (clobber (match_scratch:DI 2 "=Ral,Rah"))]
+  ""
+  "vswpe.%P0 %v0,%v1` vlt.%P0 %v0,%v0,vr62"
+  [(set_attr "length" "2")])
+
+(define_insn "vec_unpacks_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(sign_extend:V4SI
+	  (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "v")
+			   (parallel [(const_int 0) (const_int 1)
+				      (const_int 2) (const_int 3)]))))
+   (clobber:TI (match_scratch:TI 2 "=Rac"))]
+  ""
+  "vups %0,%1")
+
+(define_insn "vec_unpacku_lo_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(zero_extend:V4SI
+	  (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "v")
+			   (parallel [(const_int 0) (const_int 1)
+				      (const_int 2) (const_int 3)]))))
+   (clobber:TI (match_scratch:TI 2 "=Rac"))]
+  ""
+  "vup %0,%1")
+
+;; ??? This should be a define_insn_and_split.
+(define_insn "vec_unpacks_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(sign_extend:V4SI
+	  (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "v")
+			   (parallel [(const_int 4) (const_int 5)
+				      (const_int 6) (const_int 7)]))))
+   (clobber:TI (match_scratch:TI 2 "=Rac"))]
+  ""
+  "vmr4w %0,%1,%1\;vups %0,%0"
+  [(set_attr "length" "2")])
+
+;; ??? This should be a define_insn_and_split.
+(define_insn "vec_unpacku_hi_v8hi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+	(zero_extend:V4SI
+	  (vec_select:V4HI (match_operand:V8HI 1 "register_operand" "v")
+			   (parallel [(const_int 4) (const_int 5)
+				      (const_int 6) (const_int 7)]))))
+   (clobber:TI (match_scratch:TI 2 "=Rac"))]
+  ""
+  "vmr4w %0,%1,%1\;vup %0,%0"
+  [(set_attr "length" "2")])
+
+;; ??? This should be a define_insn_and_split.
+;; ??? usually, we will have undefined behaviour for overflow, but
+;; this expander can't tell and thus has to mask the input values.
+(define_insn "vec_pack_trunc_v4si"
+  [(set (match_operand:V8HI 0 "register_operand" "=&v")
+	(vec_concat:V8HI
+	  (truncate:V4HI (match_operand:V4SI 1 "register_operand" "v"))
+	  (truncate:V4HI (match_operand:V4SI 2 "register_operand" "v"))))
+   (clobber:TI (match_scratch:TI 3 "=&Rac"))
+   (clobber:TI (match_scratch:TI 4 "=&v"))]
+  ""
+  "vmvzw %4,%1,85\;vasrpi %0,%4,0\;vmvzw %4,%2,85\;vasrpi %4,%4,0\;\
+vmr4w.240 %0,%4,%4")
+
+(define_insn "jump"
+  [(set (pc) (label_ref (match_operand 0 "" "")))]
+  ""
+  "vjp %r0%#"
+  [(set_attr "type" "jump")])
+
+;; ??? manual says vjpi, but assempler only accepts vjp.
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:HI 0 "register_operand" "S_n"))]
+  ""
+  "vjp %0%#"
+  [(set_attr "type" "jump")])
+
+(define_expand "call"
+  [(match_operand 0 "" "") (match_operand 1 "" "")]
+  ""
+  "emit_call_insn (gen_call_i (XEXP (operands[0], 0), operands[1])); DONE;")
+
+(define_insn "call_i"
+  [(call (mem:QI (match_operand:HI 0 "nonmemory_operand" "i,S_n"))
+	 (match_operand 1 "" ""))
+   (clobber (reg:TI CALL_STACK_REG))]
+  ""
+  "@
+   vjl vr31,%0%#
+   vjli vr31,%0%#"
+  [(set_attr "type" "jump")])
+
+(define_expand "call_value"
+  [(match_operand 0 "" "") (match_operand 1 "" "") (match_operand 2 "" "")]
+  ""
+  "
+{
+  emit_call_insn (gen_call_value_i (operands[0],
+				    XEXP (operands[1], 0), operands[2]));
+  DONE;
+}")
+
+(define_insn "call_value_i"
+  [(set (match_operand 0 "register_operand")
+	(call (mem:QI (match_operand:HI 1 "nonmemory_operand" "i,S_n"))
+	      (match_operand 2 "" "")))
+   (clobber (reg:TI CALL_STACK_REG))]
+  ""
+  "@
+   vjl vr31,%1%#
+   vjli vr31,%1%#"
+  [(set_attr "type" "jump")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+  "mxp_expand_prologue (); DONE;")
+
+;; We dont really have a use for the arc core register parameter.  We use
+;; pcl here because there is no latency attached to it on the ARC700 core.
+(define_insn "return"
+  [(return)]
+  ""
+  "vjb vr31,pcl%#"
+  [(set_attr "type" "jump")])
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "mxp_expand_epilogue ();")
+
+(define_insn "*store_scalars"
+  [(set (match_operand:V4SI 0 "memory_operand" "=m")
+	(match_operand:V4SI 1 "scalar_save_operand" ""))]
+  ""
+  "vst128 %v1,%0"
+  [(set_attr "type" "store")])
+
+(define_insn "*load_scalars"
+  [(set (match_operand:V4SI 0 "scalar_save_operand" "")
+	(match_operand:V4SI 1 "memory_operand" "m"))]
+  ""
+  "vld128 %v0,%1"
+  [(set_attr "type" "load")])
+
+(define_expand "cmp<mode>"
+  [(set (reg:CC VFLAGS_REG)
+	(compare:CC (match_operand:VECA 0 "reg_or_0_operand" "")
+		    (match_operand:VECA 1 "nonmemory_operand" "")))]
+  ""
+  "
+{
+  mxp_compare_op0 = operands[0];
+  mxp_compare_op1 = operands[1];
+  DONE;
+}")
+
+(define_code_iterator any_cond [;;unordered ordered unlt unge uneq ltgt unle ungt
+			     eq ne gt ge lt le gtu geu ltu leu])
+
+(define_expand "b<any_cond:code>"
+  [(set (pc)
+	(if_then_else (any_cond:CC (reg:CC VFLAGS_REG)
+                                   (const_int 0))
+                      (label_ref (match_operand 0 ""))
+                      (pc)))]
+  ""
+  "
+{
+  mxp_emit_conditional_branch (operands, <CODE>);
+  DONE;
+}")
+
+(define_code_iterator vec_cond [eq ne le lt])
+
+(define_insn "*cmp<code><mode>"
+  [(set (match_operand:HI 0 "register_operand" "=<vx>")
+	(neg:HI (vec_cond:HI (match_operand:HSI 1 "reg_or_0_operand" "<vx>")
+			     (match_operand:HSI 2 "reg_or_0_operand" "<vx>"))))
+   (clobber (match_scratch:HSI 3 "=<ax>"))]
+  ""
+  "v<code><unit_suffix>.%L0 %v0,%v1,%v2")
+
+(define_code_iterator vec_rcond [ge gt])
+(define_code_attr rcode [(gt "lt") (ge "le")])
+
+(define_insn "*cmp<code><mode>"
+  [(set (match_operand:HI 0 "register_operand" "=<vx>")
+	(neg:HI (vec_rcond:HI (match_operand:HSI 1 "reg_or_0_operand" "<vx>")
+			      (match_operand:HSI 2 "reg_or_0_operand" "<vx>"))))
+   (clobber (match_scratch:HSI 3 "=<ax>"))]
+  ""
+  "v<rcode><unit_suffix>.%L0 %v0,%v2,%v1")
+
+(define_insn "ucmphi"
+  [(set (match_operand:CCI 0 "register_operand" "=Rfl,Rfh,Rfl,Rfh")
+	(compare:CCI (match_operand:HI 1 "reg_or_0_operand"
+		      "R01Z,R10Z,R01Z,R10Z")
+		     (match_operand:HI 2 "nonmemory_operand"
+		      "R01Z,R10Z,i,i")))
+   (clobber (match_scratch:HI 3 "=Ral,Rah,Ral,Rah"))]
+  ""
+  "@
+   vsubw.%L0.f vr62,%v1,%v2
+   vsubw.%L0.f vr62,%v1,%v2
+   vsubw.%L0.f vr62,%v1,%r2
+   vsubw.%L0.f vr62,%v1,%r2")
+
+(define_insn "ucmpsi"
+  [(set (match_operand:CCI 0 "register_operand" "=Rfl,Rfh,Rfl,Rfh,Rfl,Rfh")
+	(compare:CCI (match_operand:SI 1 "reg_or_0_operand"
+		      "R03Z,R30Z,R03Z,R30Z,S_n,S_n")
+		     (match_operand:SI 2 "reg_or_0_operand"
+		      "R03Z,R30Z,S_n,S_n,R03Z,R30Z")))
+   (clobber (match_scratch:SI 3 "=Ral,Rah,Ral,Rah,Ral,Rah"))]
+  ""
+  "@
+   vsube.%L0.f vr62,%v1,%v2
+   vsube.%L0.f vr62,%v1,%v2
+   vksub.%L0.f vr62,%v1,%2
+   vksub.%L0.f vr62,%v1,%2
+   vkrsub.%L0.f vr62,%v2,%1
+   vkrsub.%L0.f vr62,%v2,%1")
+
+(define_insn "vtany"
+  [(set (match_operand:HI 0 "register_operand" "=S_n")
+	(neg:HI (match_operator:HI 2 "comparison_operator"
+		  [(match_operand:CCI 1 "register_operand" "Rfg")
+		   (const_int 0)])))]
+  ""
+  "vtany.%L1%C2 %0,0xffff")
+
+(define_insn "branch_true"
+  [(set (pc)
+   (if_then_else (ne (match_operand:HI 1 "register_operand" "S_n")
+		     (const_int 0))
+		 (label_ref (match_operand 0 "" "")) (pc)))]
+  ""
+  "vjp.%1 %r0%#"
+  [(set_attr "type" "jump")])
+
+(define_insn "branch_false"
+  [(set (pc)
+   (if_then_else (ne (match_operand:HI 1 "register_operand" "S_n")
+		     (const_int 65535))
+		 (label_ref (match_operand 0 "" "")) (pc)))]
+  ""
+  "vjp.!%1 %r0%#"
+  [(set_attr "type" "jump")])
+
+; FIXME: handle reloads of operands 0 / 2.
+; ??? This doesn't seem to ever get generated.
+(define_insn "decrement_and_branch_until_zero"
+  [(set (pc)
+   (if_then_else (ne (match_operand:HI 0 "register_operand" "S_n")
+		     (const_int 0))
+		 (label_ref (match_operand 1 "" "")) (pc)))
+   (set (match_operand:HI 2 "register_operand" "+S_n")
+	(plus (match_dup 2) (const_int -1)))]
+  ""
+  "vjd.%0 %2,%r1"
+  [(set_attr "type" "jump")])
+
+; operand 0 is the loop count pseudo register
+; operand 1 is the number of loop iterations or 0 if it is unknown
+; operand 2 is the maximum number of loop iterations
+; operand 3 is the number of levels of enclosed loops
+; operand 4 is the label to jump to at the top of the loop
+(define_expand "doloop_end"
+  [(use (match_operand 0 "register_operand" ""))
+   (use (match_operand:QI 1 "const_int_operand" ""))
+   (use (match_operand:QI 2 "const_int_operand" ""))
+   (use (match_operand:QI 3 "const_int_operand" ""))
+   (use (label_ref (match_operand 4 "" "")))]
+  ""
+{
+  emit_jump_insn (gen_decrement_and_branch_until_zero (operands[0], operands[4],
+						       operands[0]));
+  DONE;
+})
+
+(define_insn "smax<mode>3"
+  [(set (match_operand:VECA 0 "register_operand" "=<vx>")
+	(smax:VECA (match_operand:VECA 1 "register_operand" "<vx>")
+		  (match_operand:VECA 2 "register_operand" "<vx>")))
+   (clobber (match_scratch:VECA 3 "=&<ax>"))]
+  ""
+  "vmax<unit_suffix>.%L0 %v0,%v1,%v2")
+
+(define_insn "smin<mode>3"
+  [(set (match_operand:VECA 0 "register_operand" "=<vx>")
+	(smin:VECA (match_operand:VECA 1 "register_operand" "<vx>")
+		  (match_operand:VECA 2 "register_operand" "<vx>")))
+   (clobber (match_scratch:VECA 3 "=&<ax>"))]
+  ""
+  "vmin<unit_suffix>.%L0 %v0,%v1,%v2")
+
+(define_automaton "mxp")
+
+(define_cpu_unit "issue" "mxp")
+
+(define_insn_reservation "int_pipe" 2
+  (eq_attr "type" "other")
+  "issue")
+
+(define_insn_reservation "load" 4
+  (eq_attr "type" "load")
+  "issue")
+
+;; FIXME: add sibcalls.
+;; FIXME: add conditional execution
diff --git a/gcc/config/mxp/mxp.opt b/gcc/config/mxp/mxp.opt
new file mode 100644
index 00000000000..8abc41692b0
--- /dev/null
+++ b/gcc/config/mxp/mxp.opt
@@ -0,0 +1,15 @@
+mbig-data
+Target Report Mask(BIGDATA)
+Allow for data / bss sections overflowing i9+offset addressing range
+
+mint16
+Target Report Mask(INT16)
+Make integers 16 bit wide.
+
+mno-vim-label
+Target Report Mask(NO_VIM_LABEL)
+Don't output vim with a label operand.
+
+mno-vld-label
+Target Report Mask(NO_VLD_LABEL)
+Don't output vld with a label operand.
diff --git a/gcc/config/mxp/predicates.md b/gcc/config/mxp/predicates.md
new file mode 100644
index 00000000000..f3359be7b3e
--- /dev/null
+++ b/gcc/config/mxp/predicates.md
@@ -0,0 +1,19 @@
+(define_predicate "scalar_save_operand"
+  (and (match_code "parallel")
+       (match_test "1 /*FIXME*/"))
+)
+
+(define_predicate "reg_or_0_operand"
+  (ior (and (match_code "subreg,reg")
+	    (match_test "register_operand (op, mode)"))
+       (and (match_code "const_int,const_vector")
+	    (match_test "op == CONST0_RTX (GET_MODE (op))"))))
+
+;; Like register_operand, but this predicate is defined with
+;; define_special_predicate, not define_predicate.
+
+(define_special_predicate "any_register_operand"
+  (match_code "subreg,reg")
+{
+  return register_operand (op, mode);
+})
diff --git a/gcc/config/mxp/regset-config.h b/gcc/config/mxp/regset-config.h
new file mode 100644
index 00000000000..9ec93171479
--- /dev/null
+++ b/gcc/config/mxp/regset-config.h
@@ -0,0 +1,96 @@
+/* There are two plausible definitions for UNITS_PER_WORD:
+   * 16, because that is the register size.
+   *  2, because that is the smallest register size that can be written to
+         without affecting other parts of the same register.  This is also
+     the size we have the most arithmetic operations for, and which is used
+     for conditions.  */
+#define UNITS_PER_WORD 2
+
+/* Even if we set UNITS_PER_WORD as 16, we can still have smaller regsiters
+   as far as gcc is concerned, in order to allow more efficient register
+   allocation.  A plausible setting is 8, so that we can use an alternate
+   class, but don't loose ourselves in describing irrelevant details.
+   OTOH, if UNITS_PER_WORD is 2, MXP_UNIT_PER_VEC_REG should also be 2.
+   Note also, that for efficient and sane support of SImode values in
+   scalar registers, we want a setting of 2.  Unfortunately, this drives up
+   the number of registers to approximately 512.  */
+#define MXP_UNITS_PER_MXP_VEC_REG 2
+
+/* Even if we model registers in 16-bit chunks, we need not allow to use
+   them all for any possible purpose.  There are practical advanatages to
+   put artifical restrictions on the usage:
+   - we can keep the number of register classes to be merely high, rather
+     than grow excessively high.  If we allow all 256 possible lane
+     combinations for scalars, non-scalar vector registers, accumulators,
+     and flags, and all combinations of these, we get 2**32 register classes,
+     requiring 2 terabits for REG_CLASS_CONTENTS.
+   - If we have 8 lane classes to choose from to put an HImode value in,
+     with no real preference for either of them, the compiler is prone to
+     use them all, and end up with costly cross-lane moves.
+
+   Some register sets serve no practival purpose as a register class.
+   - There should be no need for any register class but ALL_REGS to
+     incorporate both accumulator and flag registers, since it seems
+     rather far-fetched that a value can be hold in the flags register,
+     but alternatively might be stored in the accumulator.
+   - For HImode values in scalar registers, lanes 0 and 4 are special,
+     all other lanes make no difference.  Hence useful masks are 0x01, 0x10,
+     0x11 and 0xff.  Likewise, for SImode values in scalar registers,
+     useful masks are 0x03, 0x30 and 0xff; scalar register classes with
+     masks like 0x31 serve no purpose.
+   - When two register classes with specific lane requirements are
+     combined, it is to be expected that the lanes agree.  */
+
+/* The non-scalar vector registers have costly cross-lane moves, so we will
+   only allow two lane sets for arithmetic per mode:
+   GENERAL_REGS in lanes 0..3 and an alternate register set in lanes 4..7 .
+   However, we need to allow moves in every lane to make subregs work.
+   To then avoid the arithmetic going astray into an unwanted lane, we need
+   to have exact per-mode lane masks for arithmetic.
+   This gives us a total of eight lane sets for non-scalar vector registers.  */
+#define MXP_VEC_LANE_SETS { 0x00, 0x01, 0x10, 0x03, 0x30, 0x0f, 0xf0, 0xff }
+/* We use s smaller set of sets for flags and accumulators.  */
+#define MXP_FLAG_ACC_LANE_SETS { 0x00, 0x0f, 0xf0, 0xff }
+/* The scalar registers are good for holding 16 and 32 bit values and have
+   special cross-lane operations, so we allow all lanes for 16 and 32 bit
+   values.  Only the lanes that line up with the non-scalar vector registers
+   require consideration of their actual lane values.
+   We don't allow wider values in scalar registers since that would
+   increase regsiter pressure on this valuable register set, and also
+   require more register classes.  */
+#define MXP_SCALAR_LANE_SETS { 0x00, 0x01, 0x10, 0x03, 0x30, 0xff }
+
+#define MXP_VALID_REG_CLASS(SCALAR,VECTOR,FLAGS,ACC) \
+   /* ALL_REGS is valid.  */ \
+  ((SCALAR) == 0xff && (VECTOR) == 0xff && (FLAGS) == 0xff && (ACC) == 0xff \
+   ? 1 \
+   /*  Otherwise, reject combination of FLAGS and ACC.  */ \
+   : (FLAGS) != 0 && (ACC) != 0 ? 0 \
+   /* Reject combination of mismatched lanes.  */ \
+   : (SCALAR) & ~(VECTOR) & 0xf0 && (VECTOR) & ~(SCALAR) & 0x0f ? 0 \
+   : (SCALAR) & ~(VECTOR) & 0x0f && (VECTOR) & ~(SCALAR) & 0xf0 ? 0 \
+   : (SCALAR) & ~(FLAGS) & 0xf0 && (FLAGS) & ~(SCALAR) & 0x0f ? 0 \
+   : (SCALAR) & ~(FLAGS) & 0x0f && (FLAGS) & ~(SCALAR) & 0xf0 ? 0 \
+   : (SCALAR) & ~(ACC) & 0xf0 && (ACC) & ~(SCALAR) & 0x0f ? 0 \
+   : (SCALAR) & ~(ACC) & 0x0f && (ACC) & ~(SCALAR) & 0xf0 ? 0 \
+   : (VECTOR) & ~(FLAGS) & 0xf0 && (FLAGS) & ~(VECTOR) & 0x0f ? 0 \
+   : (VECTOR) & ~(FLAGS) & 0x0f && (FLAGS) & ~(VECTOR) & 0xf0 ? 0 \
+   : (VECTOR) & ~(ACC) & 0xf0 && (ACC) & ~(VECTOR) & 0x0f ? 0 \
+   : (VECTOR) & ~(ACC) & 0x0f && (ACC) & ~(VECTOR) & 0xf0 ? 0 \
+   : (SCALAR) == 0x01 && (VECTOR) & 0x0e && !((VECTOR) & 0xf0) ? 0 \
+   : (VECTOR) == 0x01 && (SCALAR) & 0x0e && !((SCALAR) & 0xf0) ? 0 \
+   : (SCALAR) == 0x10 && (VECTOR) & 0xe0 && !((VECTOR) & 0x0f) ? 0 \
+   : (VECTOR) == 0x10 && (SCALAR) & 0xe0 && !((SCALAR) & 0x0f) ? 0 \
+   : 1)
+
+/* Actual fixed hard registers.  */
+#define STACK_POINTER_REGNUM 8
+#define BSS_BASE_REGNUM 9
+
+/* Frame pointer and argument pointer are nominally hard registers, but
+   they don't actually exist in hardware, and have to be always eliminated.  */
+#define MXP_FAKE_REG_NAMES { "fp", "ap" }
+/* The fake registers (fp, ap) are considerd scalar registers.  Indicate
+   the lane set to which they are supposed to belong for the purpose of
+   including them in register classes.  */
+#define MXP_FAKE_REG_LANES 0xcc
diff --git a/gcc/config/mxp/t-mxp b/gcc/config/mxp/t-mxp
new file mode 100644
index 00000000000..c9adf86f33b
--- /dev/null
+++ b/gcc/config/mxp/t-mxp
@@ -0,0 +1,31 @@
+$(out_object_file): gt-sh.h
+gt-sh.h : s-gtype ; @true
+
+# ??? the generated .md file must be placed in srcdir, becuase that's the
+# only place that the gensupport include knows about.
+s-mddeps: $(srcdir)/config/mxp/mxp-regset.md
+build/genconstants.o: genconstants.c s-mxp-regset-h
+mxp-regset.h: s-mxp-regset-h; @true
+mxp-regset.c: s-mxp-regset-c; @true
+$(srcdir)/config/mxp/mxp-regset.md: s-mxp-regset-md; @true
+
+s-mxp-regset-h: build/mxp-genregset$(build_exeext)
+	$(RUN_GEN) build/mxp-genregset$(build_exeext) -h > tmp-mxp-regset.h
+	$(SHELL) $(srcdir)/../move-if-change tmp-mxp-regset.h mxp-regset.h
+	$(STAMP) s-mxp-regset-h
+
+s-mxp-regset-md: build/mxp-genregset$(build_exeext)
+	$(RUN_GEN) build/mxp-genregset$(build_exeext) -m > tmp-mxp-regset.md
+	$(SHELL) $(srcdir)/../move-if-change tmp-mxp-regset.md $(srcdir)/config/mxp/mxp-regset.md
+	$(STAMP) s-mxp-regset-md
+
+s-mxp-regset-c: build/mxp-genregset$(build_exeext)
+	$(RUN_GEN) build/mxp-genregset$(build_exeext) > tmp-mxp-regset.c
+	$(SHELL) $(srcdir)/../move-if-change tmp-mxp-regset.c mxp-regset.c
+	$(STAMP) s-mxp-regset-c
+
+build/mxp-genregset.o : config/mxp/mxp-genregset.c $(BCONFIG_H) $(SYSTEM_H) \
+  errors.h config/mxp/regset-config.h
+
+# The startup code is for the arc core only.
+EXTRA_PARTS=
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 3c57730b83e..fd08a1e1455 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -1085,7 +1085,7 @@ expand_block_move (rtx *operands)
      and efficient sequence.  */
   if (TARGET_SH4A_ARCH && align < 4
       && MEM_ALIGN (operands[0]) >= 32
-      && can_move_by_pieces (bytes, 32))
+      && can_move_by_pieces (bytes, 32, 0))
     {
       rtx dest = copy_rtx (operands[0]);
       rtx src = copy_rtx (operands[1]);
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 024407ced58..977c814ae08 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -7382,12 +7382,15 @@ label:
 			  (pc)))
 	      (set (match_dup 0)
 		   (plus:SI (match_dup 0) (const_int -1)))
-	      (clobber (reg:SI T_REG))])]
+	      (clobber (reg:SI T_REG))])
+   (match_operand 5 "" "")]
   "TARGET_SH2"
   "
 {
   if (GET_MODE (operands[0]) != SImode)
     FAIL;
+  emit_insn (gen_doloop_end_split (operands[0], operands[4]));
+  DONE;
 }
 ")
 
diff --git a/gcc/cse.c b/gcc/cse.c
index 91cb108e94c..bab0908c984 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -4483,7 +4483,8 @@ cse_insn (rtx insn)
 	  enum machine_mode wider_mode;
 
 	  for (wider_mode = GET_MODE_WIDER_MODE (mode);
-	       GET_MODE_BITSIZE (wider_mode) <= BITS_PER_WORD
+	       wider_mode != VOIDmode
+	       && GET_MODE_BITSIZE (wider_mode) <= BITS_PER_WORD
 	       && src_related == 0;
 	       wider_mode = GET_MODE_WIDER_MODE (wider_mode))
 	    {
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 43e91afe8b4..128370a0ccf 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -6984,6 +6984,7 @@ instructions, but allow the compiler to schedule those calls.
 
 @menu
 * Alpha Built-in Functions::
+* ARC Built-in Functions::
 * ARM iWMMXt Built-in Functions::
 * ARM NEON Intrinsics::
 * Blackfin Built-in Functions::
@@ -7081,6 +7082,287 @@ void *__builtin_thread_pointer (void)
 void __builtin_set_thread_pointer (void *)
 @end smallexample
 
+@node ARC Built-in Functions
+@subsection ARC Built-in Functions
+
+SIMD instruction can be generated for ARC, using the built-in functions provided
+for the ARC cores when the @option{-msimd} switch is used:
+
+The set of builtins defined for ARC can be categorized according to their
+signatures into the following types:
+
+@smallexample
+I) Return type    :  v8hi
+   First argument :  v8hi
+   Second argument:  v8hi
+
+v8hi __builtin_arc_vaddaw (v8hi, v8hi)
+v8hi __builtin_arc_vaddw (v8hi, v8hi)
+v8hi __builtin_arc_vavb (v8hi, v8hi)
+v8hi __builtin_arc_vavrb (v8hi, v8hi)
+v8hi __builtin_arc_vdifaw (v8hi, v8hi)
+v8hi __builtin_arc_vdifw (v8hi, v8hi)
+v8hi __builtin_arc_vmaxaw (v8hi, v8hi)
+v8hi __builtin_arc_vmaxw (v8hi, v8hi)
+v8hi __builtin_arc_vminaw (v8hi, v8hi)
+v8hi __builtin_arc_vminw (v8hi, v8hi)
+v8hi __builtin_arc_vmulaw (v8hi, v8hi)
+v8hi __builtin_arc_vmulfaw (v8hi, v8hi)
+v8hi __builtin_arc_vmulfw (v8hi, v8hi)
+v8hi __builtin_arc_vmulw (v8hi, v8hi)
+v8hi __builtin_arc_vsubaw (v8hi, v8hi)
+v8hi __builtin_arc_vsubw (v8hi, v8hi)
+v8hi __builtin_arc_vsummw (v8hi, v8hi)
+v8hi __builtin_arc_vand (v8hi, v8hi)
+v8hi __builtin_arc_vandaw (v8hi, v8hi)
+v8hi __builtin_arc_vbic (v8hi, v8hi)
+v8hi __builtin_arc_vbicaw (v8hi, v8hi)
+v8hi __builtin_arc_vor (v8hi, v8hi)
+v8hi __builtin_arc_vxor (v8hi, v8hi)
+v8hi __builtin_arc_vxoraw (v8hi, v8hi)
+v8hi __builtin_arc_veqw (v8hi, v8hi)
+v8hi __builtin_arc_vlew (v8hi, v8hi)
+v8hi __builtin_arc_vltw (v8hi, v8hi)
+v8hi __builtin_arc_vnew (v8hi, v8hi)
+v8hi __builtin_arc_vmr1aw (v8hi, v8hi)
+v8hi __builtin_arc_vmr1w (v8hi, v8hi)
+v8hi __builtin_arc_vmr2aw (v8hi, v8hi)
+v8hi __builtin_arc_vmr2w (v8hi, v8hi)
+v8hi __builtin_arc_vmr3aw (v8hi, v8hi)
+v8hi __builtin_arc_vmr3w (v8hi, v8hi)
+v8hi __builtin_arc_vmr4aw (v8hi, v8hi)
+v8hi __builtin_arc_vmr4w (v8hi, v8hi)
+v8hi __builtin_arc_vmr5aw (v8hi, v8hi)
+v8hi __builtin_arc_vmr5w (v8hi, v8hi)
+v8hi __builtin_arc_vmr6aw (v8hi, v8hi)
+v8hi __builtin_arc_vmr6w (v8hi, v8hi)
+v8hi __builtin_arc_vmr7aw (v8hi, v8hi)
+v8hi __builtin_arc_vmr7w (v8hi, v8hi)
+v8hi __builtin_arc_vmrb (v8hi, v8hi)
+v8hi __builtin_arc_vh264f (v8hi, v8hi)
+v8hi __builtin_arc_vh264ft (v8hi, v8hi)
+v8hi __builtin_arc_vh264fw (v8hi, v8hi)
+v8hi __builtin_arc_vvc1f (v8hi, v8hi)
+v8hi __builtin_arc_vvc1ft (v8hi, v8hi)
+@end smallexample
+
+@smallexample
+II)  Return type    :  v8hi
+     First argument :  v8hi
+     Second argument:  int
+
+v8hi __builtin_arc_vbaddw (v8hi, int)
+v8hi __builtin_arc_vbmaxw (v8hi, int)
+v8hi __builtin_arc_vbminw (v8hi, int)
+v8hi __builtin_arc_vbmulaw (v8hi, int)
+v8hi __builtin_arc_vbmulfw (v8hi, int)
+v8hi __builtin_arc_vbmulw (v8hi, int)
+v8hi __builtin_arc_vbrsubw (v8hi, int)
+v8hi __builtin_arc_vbsubw (v8hi, int)
+@end smallexample
+
+@smallexample
+III)  Return type    :  v8hi
+      First argument :  v8hi
+      Second argument:  const int
+
+	The second argument in these builtins has to be an unsigned 3-bit
+integer constant, as it indicate the registers I0-I7:
+
+v8hi __builtin_arc_vasrw (v8hi, const int)
+v8hi __builtin_arc_vsr8 (v8hi, const int)
+v8hi __builtin_arc_vsr8aw (v8hi, const int)
+@end smallexample
+
+@smallexample
+IV)  Return type    :  v8hi
+     First argument :  v8hi
+     Second argument:  const int
+
+	The second argument in these builtins has to be an unsigned 6-bit
+integer constant:
+
+v8hi __builtin_arc_vasrrwi (v8hi, const int)
+v8hi __builtin_arc_vasrsrwi (v8hi, const int)
+v8hi __builtin_arc_vasrwi (v8hi, const int)
+v8hi __builtin_arc_vasrpwbi (v8hi, const int)
+v8hi __builtin_arc_vasrrpwbi (v8hi, const int)
+v8hi __builtin_arc_vsr8awi (v8hi, const int)
+v8hi __builtin_arc_vsr8i (v8hi, const int)
+@end smallexample
+
+@smallexample
+V)  Return type    :  v8hi
+    First argument :  v8hi
+    Second argument:  const int
+
+	The second argument in these builtins has to be an unsigned 8-bit
+integer constant:
+
+v8hi __builtin_arc_vmvaw (v8hi, const int)
+v8hi __builtin_arc_vmvw (v8hi, const int)
+v8hi __builtin_arc_vmvzw (v8hi, const int)
+v8hi __builtin_arc_vd6tapf (v8hi, const int)
+@end smallexample
+
+@smallexample
+VI)  Return type    :  v8hi
+     First argument :  int
+     Second argument:  const int
+
+	The second argument in these builtins has to be an unsigned 8-bit
+integer constant:
+
+v8hi __builtin_arc_vmovaw (int, const int)
+v8hi __builtin_arc_vmovw (int, const int)
+v8hi __builtin_arc_vmovzw (int, const int)
+@end smallexample
+
+@smallexample
+VII)  Return type    :  v8hi
+      First argument :  v8hi
+
+v8hi __builtin_arc_vabsaw (v8hi)
+v8hi __builtin_arc_vabsw (v8hi)
+v8hi __builtin_arc_vaddsuw (v8hi)
+v8hi __builtin_arc_vsignw (v8hi)
+v8hi __builtin_arc_vexch1 (v8hi)
+v8hi __builtin_arc_vexch2 (v8hi)
+v8hi __builtin_arc_vexch4 (v8hi)
+v8hi __builtin_arc_vupbaw (v8hi)
+v8hi __builtin_arc_vupbw (v8hi)
+v8hi __builtin_arc_vupsbaw (v8hi)
+v8hi __builtin_arc_vupsbw (v8hi)
+@end smallexample
+
+@smallexample
+VIII)  Return type     :  void
+       First argument  :  int
+       Second argument :  int
+
+void __builtin_arc_vdirun (int, int)
+void __builtin_arc_vdorun (int, int)
+@end smallexample
+
+@smallexample
+IX)  Return type     :  void
+     First argument  :  const int
+     Second argument :  int
+
+	The first argument in these builtins has to be an unsigned 3-bit
+integer constant, as it indicates DR0-DR7 DMA channel setup registers. The file
+arc-simd.h also profides defines which can be used in place of the DMA register
+numbers to facilitate better code readability:
+
+void __builtin_arc_vdiwr (const int, int)
+void __builtin_arc_vdowr (const int, int)
+@end smallexample
+
+@smallexample
+X)  Return type     :  void
+    First argument  :  int
+
+void __builtin_arc_vrec (int)
+void __builtin_arc_vrun (int)
+void __builtin_arc_vrecrun (int)
+void __builtin_arc_vendrec (int)
+@end smallexample
+
+@smallexample
+XI)  Return type      :  v8hi
+     First argument   :  v8hi
+     Second argument  :  const int
+     Third argument   :  const int
+
+	The second argument in these builtins has to be an unsigned 3-bit
+integer constant, as it indicates I0-I7 registers. The third argument has to be
+an unsigned 8-bit quantity The file arc-simd.h also profides defines which can
+be used in place of the I0-I7 registe numbers to facilitate better code readability:
+
+v8hi __builtin_arc_vld32wh (v8hi, const int, const int)
+v8hi __builtin_arc_vld32wl (v8hi, const int, const int)
+v8hi __builtin_arc_vld64 (v8hi, const int, const int)
+v8hi __builtin_arc_vld32 (v8hi, const int, const int)
+
+NOTE: Although the equivalent hardware instructions do not take a simd register
+      as an operand, these builtins overwrite the relevant bits of the v8hi
+      quantity provided as the first argument with the value loaded from 
+      [Ib, u8] location in the SDM.
+
+@end smallexample
+
+@smallexample
+XII)  Return type      :  v8hi
+      First argument   :  const int
+      Second argument  :  const int
+
+	The first argument in these builtins has to be an unsigned 3-bit
+integer constant, as it indicates I0-I7 registers. The second argument has to be
+an unsigned 8-bit quantity The file arc-simd.h also profides defines which can
+be used in place of the I0-I7 registe numbers to facilitate better code readability:
+
+v8hi __builtin_arc_vld64w (const int, const int)
+v8hi __builtin_arc_vld128 (const int, const int)
+@end smallexample
+
+@smallexample
+XIII)  Return type      :  void
+       First argument   :  v8hi
+       Second argument  :  const int
+       Third argument   :  const int
+
+	The second argument in these builtins has to be an unsigned 3-bit
+integer constant, as it indicates I0-I7 registers. The third argument has to be
+an unsigned 8-bit quantity The file arc-simd.h also profides defines which can
+be used in place of the I0-I7 registe numbers to facilitate better code readability:
+
+void __builtin_arc_vst128 (v8hi, const int, const int)
+void __builtin_arc_vst64 (v8hi, const int, const int)
+@end smallexample
+
+
+@smallexample
+XIV)  Return type      :  void
+      First argument   :  v8hi
+      Second argument  :  const int
+      Third argument   :  const int
+
+	The second argument has to be an unsigned 3-bit quantity to identify the
+16-bit subregister to be stored. The third argument in these builtins has to be
+an unsigned 3-bit integer constant, as it indicates I0-I7 registers. The fourth
+argument has to be an unsigned 8-bit quantity The file arc-simd.h also profides
+defines which can be used in place of the I0-I7 registe numbers to facilitate
+better code readability:
+
+void __builtin_arc_vst16_n (v8hi, const int, const int, const int)
+void __builtin_arc_vst32_n (v8hi, const int, const int, const int)
+@end smallexample
+
+
+@smallexample
+XIV)  Return type      :  void
+      First argument   :  const int
+
+	The argument has to be an unsigned 6-bit quantity.
+
+void __builtin_arc_vinti (const int)
+@end smallexample
+
+@smallexample
+NOTE: For all builtins __builtin_arc_<someinsn>, the header file arc-simd.h also
+      provides macros called _<someinsn> which can be used for programming ease
+      and improved readability. 
+
+     Besides these, the following extra defines and typedefs are also provided
+in the header file
+ 
+#define _setup_dma_in_channel_reg  _vdiwr
+#define _setup_dma_out_channel_reg _vdowr
+
+typedef int   __v4si  __attribute__((vector_size(16)));
+typedef short __v8hi  __attribute__((vector_size(16)));
+@end smallexample
+
 @node ARM iWMMXt Built-in Functions
 @subsection ARM iWMMXt Built-in Functions
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 7e6da15515d..4efc8c2871c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -427,8 +427,33 @@ Objective-C and Objective-C++ Dialects}.
 
 @emph{ARC Options}
 @gccoptlist{-EB  -EL @gol
--mmangle-cpu  -mcpu=@var{cpu}  -mtext=@var{text-section} @gol
--mdata=@var{data-section}  -mrodata=@var{readonly-data-section}}
+-mbig-endian -mlittle-endian @gol
+-mA4  -mA5  -mA6 -mARC600 -mA7 -mARC700 -mmixed-code @gol
+-mtext=@var{text-section} -mdata=@var{data-section}   @gol
+-mrodata=@var{readonly-data-section} @gol
+-malign-loops -mno-align-loops @gol
+-mvolatile-cache -mno-volatile-cache @gol
+-mno-cond-exec @gol
+-mnorm @gol
+-mswap @gol
+-mbarrel_shifter @gol
+-mmul64 @gol
+-mmin_max @gol
+-mEA @gol
+-msoft-float @gol
+-mno-mpy @gol
+-mno-brcc @gol
+-mlong-calls @gol
+-mno-sdata @gol
+-mno-millicode @gol
+-mspfp @gol
+-mspfp_compact @gol
+-mspfp_fast @gol
+-mdpfp @gol
+-mdpfp_compact @gol
+-mdpfp_fast @gol
+-msimd @gol
+}
 
 @emph{ARM Options}
 @gccoptlist{-mapcs-frame  -mno-apcs-frame @gol
@@ -8684,44 +8709,162 @@ These options are defined for ARC implementations:
 @table @gcctabopt
 @item -EL
 @opindex EL
+@itemx -mlittle-endian
+@opindex mlittle-endian
 Compile code for little endian mode.  This is the default.
 
 @item -EB
 @opindex EB
+@itemx -mbig-endian
+@opindex mbig-endian
 Compile code for big endian mode.
 
-@item -mmangle-cpu
-@opindex mmangle-cpu
-Prepend the name of the cpu to all public symbol names.
-In multiple-processor systems, there are many ARC variants with different
-instruction and register set characteristics.  This flag prevents code
-compiled for one cpu to be linked with code compiled for another.
-No facility exists for handling variants that are ``almost identical''.
-This is an all or nothing option.
+@item -mA4
+@opindex mA4
+Generates code for ARCtangent-A4 processor. This is the default.
 
-@item -mcpu=@var{cpu}
-@opindex mcpu
-Compile code for ARC variant @var{cpu}.
-Which variants are supported depend on the configuration.
-All variants support @option{-mcpu=base}, this is the default.
+@item -mA5
+@opindex mA5
+Generates ARCompact 32-bit code for ARCtangent-A5 processor.
+
+@item -mA6
+@opindex mA6
+@itemx -mARC600
+@opindex mARC600
+Generates ARCompact 32-bit code for ARCtangent-ARC600 processor.
+
+@item -mA7
+@opindex mA7
+@itemx -mARC700
+@opindex mARC700
+Generates ARCompact 32-bit code for ARCtangent-ARC700 processor.
+
+@item -mmixed-code
+@opindex mmixed-code
+Generates ARCompact 16-bit instructions intermixed with 32-bit instructions
+for ARCtangent-A5 and higher processors.
 
 @item -mtext=@var{text-section}
 @itemx -mdata=@var{data-section}
 @itemx -mrodata=@var{readonly-data-section}
-@opindex mtext
-@opindex mdata
-@opindex mrodata
+@opindex mtext=@var{text-section}
+@opindex mdata=@var{data-section}
+@opindex mrodata=@var{readonly-data-section}
 Put functions, data, and readonly data in @var{text-section},
 @var{data-section}, and @var{readonly-data-section} respectively
 by default.  This can be overridden with the @code{section} attribute.
 @xref{Variable Attributes}.
 
-@item -mfix-cortex-m3-ldrd
-@opindex mfix-cortex-m3-ldrd
-Some Cortex-M3 cores can cause data corruption when @code{ldrd} instructions
-with overlapping destination and base registers are used.  This option avoids
-generating these instructions.  This option is enabled by default when
-@option{-mcpu=cortex-m3} is specified.
+@item -malign-loops
+@opindex malign-loops
+Align loop starts to 32-byte boundaries (cache line size).
+
+@item -malign-loops
+@opindex malign-loops
+Do not align loop starts to 32-byte boundaries (cache line size).
+
+@item -mvolatile-cache
+@opindex mvolatile-cache
+Allow caching of volatile references. This is the default.
+
+@item -mno-valatile-cache
+@opindex mno-volatile-cache
+Do not cache volatile references. 
+
+@item -mno-cond-exec
+@opindex mno-cond-exec
+Do not generate predicated instructions for conditional execution.
+
+@item -mnorm
+@opindex mnorm
+Allow generation of norm instruction through the use of builtins. For
+ARC700, the -mnorm option is turned on by default.
+
+@item -mswap
+@opindex mswap
+Allow generation of swap instruction through the use of builtins. For
+ARC700, the -mswap option is turned on by default.
+
+@item -mbarrel_shifter
+@opindex mbarrel_shifter
+Allow generation of multiple shift instruction supported by barrel
+shifter unit. For post A4 cores, such as A5, ARC600, ARC700, the
+-mbarrel_shifter option is turned on by default.
+
+@item -mmul64
+@opindex mmul64
+Allow generation of mul64 and mulu64 instructions, by using
+builtins. This option is not allowed for ARC700.
+
+@item -mmin_max
+@opindex mmin_max
+Allow generation of min and max instructions for A4. For post A4
+cores, these are generated by default.
+
+@item -mno-mpy
+@opindex mno-mpy
+Disallow generation of mpy mpyh, mpyhu, mpyu instructions for ARC700. This
+option is allowed only for ARC700 processor.
+
+@item -mEA
+@opindex mEA
+Allow generation of extended arithmetic instructions.
+
+@item -msoft-float
+@opindex msoft-float
+Dummy flag. Many applications use this flag generically, and soft-floats 
+are the only option on ARC.
+
+@item -mno-brcc
+@opindex mno-brcc
+Disable generation of BRcc instructions.
+
+@item -mlong-calls
+@opindex mlong-calls
+Make all function calls as register-indirect. This flag can be overridden 
+by using the @samp{short_call} function attribute.
+
+@item -mno-sdata
+@opindex mno-sdata
+Do not generate sdata references
+
+@item -mno-millicode
+@opindex mno-millicode
+Do not generate millicode thunk code for saving and restoring registers in 
+functions' prologue/epilogue. This flags is needed only with -Os, since millicode 
+thunks are used only when optimizing for size..
+
+@end table
+
+@subsection FPX Options
+@cindex ARC FPX Options
+These options can be used to generate code for the FPX (Floating Point
+eXtension) extension unit.
+
+@table @gcctabopt
+@item -mspfp
+@opindex mspfp
+@itemx -mspfp_compact
+@opindex mspfp_compact
+Generate Single Precision FPX (compact) instructions
+
+@item -mspfp_fast
+@opindex mspfp_fast
+Generate Single Precision FPX (fast) instructions
+
+@item -mdpfp
+@opindex mdpfp
+@itemx -mdpfp_compact
+@opindex mdpfp_compact
+Generate Double Precision FPX (compact) instructions
+
+@item -mdpfp_fast
+@opindex mdpfp_fast
+Generate Double Precision FPX (fast) instructions
+
+@item -msimd
+@opindex msimd
+Enable generation of ARC SIMD instructions via target-specific builtins.
 
 @end table
 
@@ -8733,6 +8876,13 @@ These @samp{-m} options are defined for Advanced RISC Machines (ARM)
 architectures:
 
 @table @gcctabopt
+@item -mfix-cortex-m3-ldrd
+@opindex mfix-cortex-m3-ldrd
+Some Cortex-M3 cores can cause data corruption when @code{ldrd} instructions
+with overlapping destination and base registers are used.  This option avoids
+generating these instructions.  This option is enabled by default when
+@option{-mcpu=cortex-m3} is specified.
+
 @item -mabi=@var{name}
 @opindex mabi
 Generate code for the specified ABI@.  Permissible values are: @samp{apcs-gnu},
diff --git a/gcc/doc/mxp.texi b/gcc/doc/mxp.texi
new file mode 100644
index 00000000000..69cef2657da
--- /dev/null
+++ b/gcc/doc/mxp.texi
@@ -0,0 +1,106 @@
+data/bss layout: uses different sections ordered by minimum addressing scale.
+no separate .rodata section(s).
+.data16: scaling factor 16
+.data8, .data4, data2, .data1: likewise for smaller scaling factors
+.bss1, .bss2, .bss4, .bss8, .bss16: bss sections for increasing scaling
+factors
+The data base pointer register i9 typically points at the place where .bss1
+ends and .data1 starts.  It might be moved up or down if allocation
+would otherwise overflow on one side, and on the other side is slack.
+
+Tasks to be done:
+- Convert this document into a proper texinfo file, incorporate it into
+  gcc ducumentation, and test 'make info'
+- binutils support for using undefined labels in mxp data/bss sections
+  as offsets in memory addresses.
+- binutils support for mxp code labels.  For a start, we are looking to
+  have a special text section where to put all the mxp code.  At link time,
+  this special text section is considered to be loaded at the start of the
+  SCM for purposes of resolving SCM absolute relocations.  However, the
+  code gets actually a different load address for the ARC700 core, and gets
+  a j_s [blink] instruction appended (extra points if you make this a j_s.d
+  [blink] before the last insn without the potential to break stuff...)
+  Later we will likely want to move to multiple of such special text sections
+  to handle overlays, and possibly also have different load addreses to
+  accomodate multiple overlays.  If we want to be able to handle SCM PIE,
+  I.e. code that can be loaded to varying SCM locations, the arc will need
+  to load an a core register with the SCM load address before calling the
+  SCQ loading code, and the latter will have to use add instructions to
+  calculate SCM locations on the fly.
+  No matter if we use such add instructions, or long immediates, instructions
+  that reference SCM memory locations work out as 64 bit of code on the
+  arc side, while the other SIMD instructions are injected with a single
+  32 bit code from the arc side.  Thus we have a discrepancy between the
+  space taken up by the instructions in the object file and the size we
+  have to consider for purposes of calculating SCM addresses.
+  Luckily, these differences are constant from the first time the SIMD
+  assembly is emitted.  Thus, the total number of instructions
+  with SCM references that precede an SCM label gives us the number of
+  32 bit words to subtract from the total number of preceding 32 bits words
+  to arrive at the offset from the SCM load address.
+  To account for preceding SCM references in the same module, we can make
+  the SCM label appear to be accordingly earlier in the module.
+  (This will have to be compensated for if we want to do any linktime
+   relaxation at some later point in time.)
+  We also need to keep a tally of the total number of SCM references in each
+  module.
+  When linking multiple modules together, the total of these tallies for all
+  preceding modules needs to be added up, and subtracted from the value of
+  each label.
+  Like SCM references, (other) long immediates bulk up the code on the arc
+  side while leaving the SIMD instruction count the same, so they have to
+  be tallied up together with the SCM references.
+- library functions:
+  - divsi3: use sh64 code as starting point.  Note that there is no
+    point in loading the table base address before the function call, because
+    all SCM memory addressing has an offset.
+    divv8hi3, divv4si3: use older sh64 code w/out lookup table as starting
+    point
+  - divhi3
+- Investigate register class preferencing issues.  Naming lane sets with
+  lane 0 first actually results in the wrong reg_class_subunions.  In theory
+  the ordierng should be something like 00, 10, 01, 30, 03, ff, to get the
+  sets with lane zero prefered for subunions.  preferred classes can be
+  seen in the *lreg dump file after compiling with -da.  Another avenue to
+  saner subunions is to add proper union lane sets 11, 33.
+  The paradoxical thing I am seeing here is that the instruction count for
+  muldi increases when I introduce these measures.
+  Another - or complimentary - approach is to shift the cost balance.
+  in theory REGISTER_MOVE_COST should have an influence, but in practice
+  I haven't seen any.  What works is adding extra cost to insn alternatives
+  which allow non-lane0 registers.  A problem here - and in general - is that
+  we want a viable alternate register class.  Jacking up the cost for
+  non-lane0 alternatives can disparage these to the point that we loose the
+  altclass.  We also have often altclasses that don't actually contain any
+  extra valid registers.  In theory increasing MEMORY_MOVE_COST can
+  compensate, however I see paradoxical outcomes when I try to make this
+  dependent on !(reload_in_progress || reload_completed).  I have a diff
+  for some of the changes I've tried in
+  /home/joernr/prefclass-experiments-20080428.
+  Maybe we ned to jackup REGISTER_MOVE_COST, MEMORY_MOVE_COST and RTX_COST
+  consistently to get a more fine-grained resolution of costs.
+- Obtain code samples of code that we think is suitable and relevant for
+  autovectorization.  E.g. some codec.
+  Dependent tasks:
+  - Identify the actual section of this code that we think we should be
+    able to autovectorize.
+  - Make sure autovectorization takes place.
+- Partitioning work.  Check with IBM Haifa and other Milepost partners
+  what they already have.
+  Inasmuch as not already done:
+  - Identify individual functions and subgraphs of the callgraph we can move
+    to the SIMD engine.
+  - Add code to tree loop analysis to break out loops that we can move to
+    the SIMD engine.
+  - Handle data sets that don't fit into SDM.  The simplest to implement
+    approach is probably to do loop tiling at the interface between arc core
+    and simd engine.  OTOH we can get much better parallelism if we hand
+    over the entire work to the simd engine and let it DMA out the previoud
+    block, and DMA in the next block, while it is performing calculations.
+    For this we need to represent main memory pointers.
+    Need not necessarilty be exposed as pointers to the mxp-gcc, we could
+    express the loop tiling with intrinsics.
+- Add doloop pattern
+- Convert multi-insn define_insn patterns into define_insn_and_split patterns.
+- Add scheduler description
+- Where missing, add comments to the code according to GNU coding standards.
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 7dfb46b3a0d..5e9a2792337 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -2758,6 +2758,12 @@ Do not define this macro if you do not define
 is @code{BITS_PER_WORD} bits wide is correct for your machine.
 @end defmac
 
+@deftypefn {Target Hook} bool TARGET_PRESERVE_RELOAD_P (rtx @var{in})
+Called when doing an input reload using the value @var{in}.  Return true
+if the reload register should be available for inheritance later.  This
+might increase the spill pressure, but enhances reload inheritance.
+@end deftypefn
+
 @defmac SMALL_REGISTER_CLASSES
 On some machines, it is risky to let hard registers live across arbitrary
 insns.  Typically, these machines have instructions that require values
@@ -5962,6 +5968,13 @@ will be used.  Defaults to 1 if @code{move_by_pieces_ninsns} returns less
 than @code{MOVE_RATIO}.
 @end defmac
 
+@defmac CAN_MOVE_BY_PIECES (@var{size}, @var{alignment})
+A C expression used to determine whether a chunk of memory is to be copied
+in pieces either by @code{move_by_pieces}, or by a movmem expander.  This
+is used by other optimizers that want to anticipate how a block copy is
+going to be done.  If not defined, MOVE_BY_PIECES_P is used instead.
+@end defmac
+
 @defmac MOVE_MAX_PIECES
 A C expression used by @code{move_by_pieces} to determine the largest unit
 a load or store used to copy memory is.  Defaults to @code{MOVE_MAX}.
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 68d0ee8da3b..e83c7df3571 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -1973,6 +1973,7 @@ dwarf2out_frame_debug_expr (rtx expr, const char *label)
 	      cfa_temp.reg = cfa.reg;
 	      cfa_temp.offset = cfa.offset;
 	    }
+else if (dest == stack_pointer_rtx && src == frame_pointer_rtx) ; /*FIXME*/
 	  else
 	    {
 	      /* Saving a register in a register.  */
@@ -2143,6 +2144,10 @@ dwarf2out_frame_debug_expr (rtx expr, const char *label)
             }
           return;
 
+	case MEM:
+	  /* FIXME.  Need this for epilogues.  */
+	  break;
+
 	default:
 	  gcc_unreachable ();
 	}
@@ -10302,6 +10307,7 @@ loc_descriptor (rtx rtl, enum var_init_status initialized)
       break;
 
     case MEM:
+      rtl = targetm.delegitimize_address (rtl);
       loc_result = mem_loc_descriptor (XEXP (rtl, 0), GET_MODE (rtl),
 				       initialized);
       if (loc_result == NULL)
diff --git a/gcc/explow.c b/gcc/explow.c
index 498d40e284e..3e11ac40c5c 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -418,8 +418,15 @@ memory_address (enum machine_mode mode, rtx x)
 
   /* By passing constant addresses through registers
      we get a chance to cse them.  */
-  if (! cse_not_expected && CONSTANT_P (x) && CONSTANT_ADDRESS_P (x))
-    x = force_reg (Pmode, x);
+  if (1 && ! cse_not_expected && CONSTANT_P (x) && CONSTANT_ADDRESS_P (x))
+    {
+      /* If the target has offset addressing and a suitable LEGITIMIZE_ADDRESS
+	  definition, that can give much better cse.  */
+#if 1
+      LEGITIMIZE_ADDRESS (x, oldx, mode, done);
+#endif
+      x = force_reg (Pmode, x);
+    }
 
   /* We get better cse by rejecting indirect addressing at this stage.
      Let the combiner create indirect addresses where appropriate.
diff --git a/gcc/expr.c b/gcc/expr.c
index 0f46b199883..5c6512b31b2 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -127,7 +127,6 @@ static unsigned HOST_WIDE_INT move_by_pieces_ninsns (unsigned HOST_WIDE_INT,
 static void move_by_pieces_1 (rtx (*) (rtx, ...), enum machine_mode,
 			      struct move_by_pieces *);
 static bool block_move_libcall_safe_for_call_parm (void);
-static bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned, unsigned, HOST_WIDE_INT);
 static tree emit_block_move_libcall_fn (int);
 static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
 static rtx clear_by_pieces_1 (void *, HOST_WIDE_INT, enum machine_mode);
@@ -868,13 +867,20 @@ convert_modes (enum machine_mode mode, enum machine_mode oldmode, rtx x, int uns
 #define STORE_MAX_PIECES  MIN (MOVE_MAX_PIECES, 2 * sizeof (HOST_WIDE_INT))
 
 /* Determine whether the LEN bytes can be moved by using several move
-   instructions.  Return nonzero if a call to move_by_pieces should
-   succeed.  */
+   instructions.  If consider_movmem is false: Return nonzero if a call
+   to move_by_pieces should should be done for this move.
+   If consider_movmem is true: Return nonzero if we want to do this move
+   in pieces, either via move_by_pieces, or via movmem.  */
 
 int
 can_move_by_pieces (unsigned HOST_WIDE_INT len,
-		    unsigned int align ATTRIBUTE_UNUSED)
+		    unsigned int align ATTRIBUTE_UNUSED,
+		    bool consider_movmem ATTRIBUTE_UNUSED)
 {
+#ifdef CAN_MOVE_BY_PIECES
+  if (consider_movmem)
+    return CAN_MOVE_BY_PIECES (len, align);
+#endif
   return MOVE_BY_PIECES_P (len, align);
 }
 
@@ -1302,7 +1308,7 @@ block_move_libcall_safe_for_call_parm (void)
 /* A subroutine of emit_block_move.  Expand a movmem pattern;
    return true if successful.  */
 
-static bool
+bool
 emit_block_move_via_movmem (rtx x, rtx y, rtx size, unsigned int align,
 			    unsigned int expected_align, HOST_WIDE_INT expected_size)
 {
@@ -6988,9 +6994,9 @@ expand_constructor (tree exp, rtx target, enum expand_modifier modifier,
 	    && ! (target != 0 && safe_from_p (target, exp, 1)))
 		  || TREE_ADDRESSABLE (exp)
 		  || (host_integerp (TYPE_SIZE_UNIT (type), 1)
-		      && (! MOVE_BY_PIECES_P
+		      && (! can_move_by_pieces
 				     (tree_low_cst (TYPE_SIZE_UNIT (type), 1),
-				      TYPE_ALIGN (type)))
+				      TYPE_ALIGN (type), 1))
 		      && ! mostly_zeros_p (exp))))
       || ((modifier == EXPAND_INITIALIZER || modifier == EXPAND_CONST_ADDRESS)
 	  && TREE_CONSTANT (exp)))
diff --git a/gcc/expr.h b/gcc/expr.h
index 48e0e2d6dcd..9eb5c0e499b 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -411,6 +411,8 @@ extern rtx emit_block_move (rtx, rtx, rtx, enum block_op_methods);
 extern rtx emit_block_move_via_libcall (rtx, rtx, rtx, bool);
 extern rtx emit_block_move_hints (rtx, rtx, rtx, enum block_op_methods,
 			          unsigned int, HOST_WIDE_INT);
+extern bool emit_block_move_via_movmem (rtx, rtx, rtx, unsigned,
+					unsigned, HOST_WIDE_INT);
 
 /* Copy all or part of a value X into registers starting at REGNO.
    The number of registers to be filled is NREGS.  */
@@ -469,7 +471,7 @@ extern bool set_storage_via_setmem (rtx, rtx, rtx, unsigned int,
 /* Determine whether the LEN bytes can be moved by using several move
    instructions.  Return nonzero if a call to move_by_pieces should
    succeed.  */
-extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int);
+extern int can_move_by_pieces (unsigned HOST_WIDE_INT, unsigned int, bool);
 
 /* Return nonzero if it is desirable to store LEN bytes generated by
    CONSTFUN with several move instructions by store_by_pieces
diff --git a/gcc/final.c b/gcc/final.c
index aceeb7cfb13..a683501f503 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -304,6 +304,7 @@ dbr_sequence_length (void)
    `insn_current_length'.  */
 
 static int *insn_lengths;
+static char *uid_lock_length;
 
 VEC(int,heap) *insn_addresses_;
 
@@ -438,6 +439,20 @@ get_attr_length (rtx insn)
   return get_attr_length_1 (insn, insn_default_length);
 }
 
+#ifdef HAVE_ATTR_lock_length
+int
+get_attr_lock_length (rtx insn)
+{
+  if (uid_lock_length && insn_lengths_max_uid > INSN_UID (insn))
+    return uid_lock_length[INSN_UID (insn)];
+  return get_attr_length_1 (insn, insn_min_lock_length);
+}
+#define INSN_VARIABLE_LENGTH_P(INSN) \
+  (insn_variable_length_p (INSN) || insn_variable_lock_length_p (INSN))
+#else
+#define INSN_VARIABLE_LENGTH_P(INSN) (insn_variable_length_p (INSN))
+#endif
+
 /* Obtain the current length of an insn.  If branch shortening has been done,
    get its actual length.  Otherwise, get its minimum length.  */
 int
@@ -839,6 +854,7 @@ shorten_branches (rtx first ATTRIBUTE_UNUSED)
   rtx body;
   int uid;
   rtx align_tab[MAX_CODE_ALIGN];
+  int (*length_fun) (rtx);
 
 #endif
 
@@ -849,6 +865,10 @@ shorten_branches (rtx first ATTRIBUTE_UNUSED)
   free (uid_shuid);
 
   uid_shuid = XNEWVEC (int, max_uid);
+#ifdef HAVE_ATTR_lock_length
+  uid_lock_length = XNEWVEC (char, max_uid);
+  memset (uid_lock_length, 0, max_uid);
+#endif
 
   if (max_labelno != max_label_num ())
     {
@@ -1041,6 +1061,10 @@ shorten_branches (rtx first ATTRIBUTE_UNUSED)
 #endif /* CASE_VECTOR_SHORTEN_MODE */
 
   /* Compute initial lengths, addresses, and varying flags for each insn.  */
+  length_fun = insn_default_length;
+#ifdef HAVE_ATTR_lock_length
+  length_fun = insn_min_length;
+#endif
   for (insn_current_address = 0, insn = first;
        insn != 0;
        insn_current_address += insn_lengths[uid], insn = NEXT_INSN (insn))
@@ -1105,26 +1129,32 @@ shorten_branches (rtx first ATTRIBUTE_UNUSED)
 		inner_length = (asm_insn_count (PATTERN (inner_insn))
 				* insn_default_length (inner_insn));
 	      else
-		inner_length = insn_default_length (inner_insn);
+		inner_length = length_fun (inner_insn);
 
 	      insn_lengths[inner_uid] = inner_length;
 	      if (const_delay_slots)
 		{
 		  if ((varying_length[inner_uid]
-		       = insn_variable_length_p (inner_insn)) != 0)
+		       = INSN_VARIABLE_LENGTH_P (inner_insn)) != 0)
 		    varying_length[uid] = 1;
 		  INSN_ADDRESSES (inner_uid) = (insn_current_address
 						+ insn_lengths[uid]);
 		}
 	      else
-		varying_length[inner_uid] = 0;
+		{
+		  /* We'd need to make this code a bit more complicated
+		     to properly support non-const-delay-slots with the
+		     lock_length attribute.  */
+		  gcc_assert (length_fun == &insn_default_length);
+		  varying_length[inner_uid] = 0;
+		}
 	      insn_lengths[uid] += inner_length;
 	    }
 	}
       else if (GET_CODE (body) != USE && GET_CODE (body) != CLOBBER)
 	{
-	  insn_lengths[uid] = insn_default_length (insn);
-	  varying_length[uid] = insn_variable_length_p (insn);
+	  insn_lengths[uid] = length_fun (insn);
+	  varying_length[uid] = INSN_VARIABLE_LENGTH_P (insn);
 	}
 
       /* If needed, do any adjustment.  */
@@ -1194,6 +1224,7 @@ shorten_branches (rtx first ATTRIBUTE_UNUSED)
 	      rtx prev;
 	      int rel_align = 0;
 	      addr_diff_vec_flags flags;
+	      enum machine_mode vec_mode;
 
 	      /* Avoid automatic aggregate initialization.  */
 	      flags = ADDR_DIFF_VEC_FLAGS (body);
@@ -1272,9 +1303,15 @@ shorten_branches (rtx first ATTRIBUTE_UNUSED)
 		  else
 		    max_addr += align_fuzz (max_lab, rel_lab, 0, 0);
 		}
-	      PUT_MODE (body, CASE_VECTOR_SHORTEN_MODE (min_addr - rel_addr,
-							max_addr - rel_addr,
-							body));
+	      vec_mode = CASE_VECTOR_SHORTEN_MODE (min_addr - rel_addr,
+						   max_addr - rel_addr, body);
+	      if (!uid_lock_length
+		  || !uid_lock_length[uid]
+		  || (GET_MODE_SIZE (vec_mode)
+		      >= GET_MODE_SIZE (GET_MODE (body))))
+		PUT_MODE (body, vec_mode);
+	      if (uid_lock_length)
+		uid_lock_length[uid] = 1;
 	      if (JUMP_TABLES_IN_TEXT_SECTION
 		  || readonly_data_section == text_section)
 		{
@@ -1334,18 +1371,44 @@ shorten_branches (rtx first ATTRIBUTE_UNUSED)
 		  else
 		    inner_length = insn_current_length (inner_insn);
 
-		  if (inner_length != insn_lengths[inner_uid])
+		  /* We can't record lengths of delay slot insns.  */
+		  if (i == 0 && inner_length != insn_lengths[inner_uid])
 		    {
-		      insn_lengths[inner_uid] = inner_length;
-		      something_changed = 1;
+#ifdef HAVE_ATTR_lock_length
+		      int lock_length = insn_current_lock_length (inner_insn);
+
+		      if (lock_length > uid_lock_length[inner_uid])
+			uid_lock_length[inner_uid] = lock_length;
+		      else
+			lock_length = uid_lock_length[inner_uid];
+		      if (inner_length < lock_length)
+			inner_length = lock_length;
+		      if (inner_length != insn_lengths[inner_uid])
+#endif
+			{
+			  insn_lengths[inner_uid] = inner_length;
+			  something_changed = 1;
+			}
 		    }
-		  insn_current_address += insn_lengths[inner_uid];
+		  insn_current_address += inner_length;
 		  new_length += inner_length;
 		}
 	    }
 	  else
 	    {
 	      new_length = insn_current_length (insn);
+#ifdef HAVE_ATTR_lock_length
+		{
+		  int lock_length = insn_current_lock_length (insn);
+
+		  if (lock_length > uid_lock_length[uid])
+		    uid_lock_length[uid] = lock_length;
+		  else
+		    lock_length = uid_lock_length[uid];
+		  if (new_length < lock_length)
+		    new_length = lock_length;
+		}
+#endif
 	      insn_current_address += new_length;
 	    }
 
@@ -1362,12 +1425,17 @@ shorten_branches (rtx first ATTRIBUTE_UNUSED)
 	      something_changed = 1;
 	    }
 	}
+#ifndef HAVE_ATTR_lock_length
       /* For a non-optimizing compile, do only a single pass.  */
       if (!optimize)
 	break;
+#endif
     }
 
   free (varying_length);
+  if (uid_lock_length)
+    free (uid_lock_length);
+  uid_lock_length = 0;
 
 #endif /* HAVE_ATTR_length */
 }
@@ -1375,26 +1443,33 @@ shorten_branches (rtx first ATTRIBUTE_UNUSED)
 #ifdef HAVE_ATTR_length
 /* Given the body of an INSN known to be generated by an ASM statement, return
    the number of machine instructions likely to be generated for this insn.
-   This is used to compute its length.  */
+   This is used to compute its length.
+   Note that an empty asm body like in execute/20001009-2.c has length zero.  */
 
 static int
 asm_insn_count (rtx body)
 {
   const char *templ;
-  int count = 1;
+  int count = 0;
+  int text;
 
   if (GET_CODE (body) == ASM_INPUT)
     templ = XSTR (body, 0);
   else
     templ = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
 
-  if (!*templ)
-    return 0;
-
-  for (; *templ; templ++)
-    if (IS_ASM_LOGICAL_LINE_SEPARATOR (*templ, templ)
-	|| *templ == '\n')
-      count++;
+  for (text = 0; *templ; templ++)
+    {
+      if (IS_ASM_LOGICAL_LINE_SEPARATOR (*templ, templ)
+	  || *templ == '\n')
+	{
+	  count += text;
+	  text = 0;
+	}
+      else
+	text = 1;
+    }
+  count += text;
 
   return count;
 }
diff --git a/gcc/gcse.c b/gcc/gcse.c
index ee2d31e0a42..ead5d7e1b03 100644
--- a/gcc/gcse.c
+++ b/gcc/gcse.c
@@ -2902,6 +2902,37 @@ constprop_register (rtx insn, rtx from, rtx to, bool alter_jumps)
 {
   rtx sset;
 
+  /* ??? This algorithm lacks proper infrastructure to do a const-benefit
+     analysis, since we would first have to tally the cost of all the users
+     of the constant and then compare it to the benefit of the constant
+     loading instruction saved.
+     In the absense of a proper way to tally costs, it is probably better
+     to punt on any cost-increasing changes; combine can still do
+     combinations where a constant is only used in one insn.
+     We could propably have a target hook here.
+     For now, hard-code ARC heuristic as proof of concept.  */
+  sset = single_set (insn);
+  /* Memory addresses and stored constants can be encoded as a long immediate,
+     but that costs an extra cycle.  */
+  if (sset
+      && (MEM_P (SET_SRC (sset))
+	  || ((GET_CODE (SET_SRC (sset)) == ZERO_EXTEND
+	       || GET_CODE (SET_SRC (sset)) == SIGN_EXTEND)
+	      && MEM_P (XEXP (SET_SRC (sset), 0)))
+	  || MEM_P (SET_DEST (sset)))
+      && (REG_N_REFS (REGNO (from)) > 2
+	  || rtx_cost (to, GET_CODE (to), SET) <= 1))
+    return 0;
+  /* Likewise, putting a long immediate costs an extra cycle.
+     We assum here that even if the instruction looks three-address now,
+     we could fix it up if necessary.  A more strict check would reject
+     constants that can't be encoded within a 32 bit opcode with the
+     prima facie number of operands.  */
+  if (sset && BINARY_P (SET_SRC (sset))
+      && REG_P (XEXP (SET_SRC (sset), 0))
+      && rtx_cost (to, GET_CODE (to), GET_CODE (SET_SRC (sset))) > 1
+      && REG_N_REFS (REGNO (from)) > 2)
+    return 0;
   /* Check for reg or cc0 setting instructions followed by
      conditional branch instructions first.  */
   if (alter_jumps
@@ -3353,6 +3384,15 @@ one_cprop_pass (int pass, bool cprop_jumps, bool bypass_jumps)
 {
   int changed = 0;
 
+  /* Make REG_N_REFS usable so that we can better assess the merit of
+     constant propagation.  Note that while constant propagation can
+     iterate a few times, we'll still consider the same regs to eliminate,
+     even though they might have been set to something prima facie
+     non-constant initially, and later are set to a literal constant.  */
+  df_note_add_problem ();
+  df_analyze ();
+  regstat_init_n_sets_and_refs ();
+
   global_const_prop_count = local_const_prop_count = 0;
   global_copy_prop_count = local_copy_prop_count = 0;
 
@@ -3383,6 +3423,7 @@ one_cprop_pass (int pass, bool cprop_jumps, bool bypass_jumps)
     }
 
   free_hash_table (&set_hash_table);
+  regstat_free_n_sets_and_refs ();
 
   if (dump_file)
     {
diff --git a/gcc/genattr.c b/gcc/genattr.c
index bef41cdc327..2d4519083a1 100644
--- a/gcc/genattr.c
+++ b/gcc/genattr.c
@@ -81,6 +81,12 @@ extern int insn_default_length (rtx);\n\
 extern int insn_min_length (rtx);\n\
 extern int insn_variable_length_p (rtx);\n\
 extern int insn_current_length (rtx);\n\n\
+extern int insn_default_lock_length (rtx);\n\
+extern int insn_min_lock_length (rtx);\n\
+extern int insn_variable_lock_length_p (rtx);\n\
+extern int insn_current_lock_length (rtx);\n\n\
+#include \"vec.h\"\n\
+#include \"statistics.h\"\n\
 #include \"insn-addr.h\"\n");
     }
 }
diff --git a/gcc/genattrtab.c b/gcc/genattrtab.c
index 794a8db1bb1..cabf8075313 100644
--- a/gcc/genattrtab.c
+++ b/gcc/genattrtab.c
@@ -235,6 +235,7 @@ static rtx true_rtx, false_rtx;
 
 static const char *alternative_name;
 static const char *length_str;
+static const char *lock_length_str;
 static const char *delay_type_str;
 static const char *delay_1_0_str;
 static const char *num_delay_slots_str;
@@ -1527,14 +1528,14 @@ substitute_address (rtx exp, rtx (*no_address_fn) (rtx),
   */
 
 static void
-make_length_attrs (void)
+make_length_attrs (const char **base)
 {
   static const char *new_names[] =
     {
-      "*insn_default_length",
-      "*insn_min_length",
-      "*insn_variable_length_p",
-      "*insn_current_length"
+      "*insn_default_%s",
+      "*insn_min_%s",
+      "*insn_variable_%s_p",
+      "*insn_current_%s"
     };
   static rtx (*const no_address_fn[]) (rtx)
     = {identity_fn,identity_fn, zero_fn, zero_fn};
@@ -1547,7 +1548,7 @@ make_length_attrs (void)
 
   /* See if length attribute is defined.  If so, it must be numeric.  Make
      it special so we don't output anything for it.  */
-  length_attr = find_attr (&length_str, 0);
+  length_attr = find_attr (base, 0);
   if (length_attr == 0)
     return;
 
@@ -1560,11 +1561,14 @@ make_length_attrs (void)
   /* Make each new attribute, in turn.  */
   for (i = 0; i < ARRAY_SIZE (new_names); i++)
     {
-      make_internal_attr (new_names[i],
+      const char *p = attr_printf (strlen (new_names[i]) - 2 + strlen (*base),
+				   new_names[i], *base);
+
+      make_internal_attr (p,
 			  substitute_address (length_attr->default_val->value,
 					      no_address_fn[i], address_fn[i]),
 			  ATTR_NONE);
-      new_attr = find_attr (&new_names[i], 0);
+      new_attr = find_attr (&p, 0);
       for (av = length_attr->first_value; av; av = av->next)
 	for (ie = av->first_insn; ie; ie = ie->next)
 	  {
@@ -4461,6 +4465,7 @@ main (int argc, char **argv)
 
   alternative_name = DEF_ATTR_STRING ("alternative");
   length_str = DEF_ATTR_STRING ("length");
+  lock_length_str = DEF_ATTR_STRING ("lock_length");
   delay_type_str = DEF_ATTR_STRING ("*delay_type");
   delay_1_0_str = DEF_ATTR_STRING ("*delay_1_0");
   num_delay_slots_str = DEF_ATTR_STRING ("*num_delay_slots");
@@ -4577,7 +4582,8 @@ from the machine description file `md'.  */\n\n");
       fill_attr (attr);
 
   /* Construct extra attributes for `length'.  */
-  make_length_attrs ();
+  make_length_attrs (&length_str);
+  make_length_attrs (&lock_length_str);
 
   /* Perform any possible optimizations to speed up compilation.  */
   optimize_attrs ();
diff --git a/gcc/genmodes.c b/gcc/genmodes.c
index 3851aff11e4..56e1642c35d 100644
--- a/gcc/genmodes.c
+++ b/gcc/genmodes.c
@@ -128,7 +128,9 @@ vector_class (enum mode_class cl)
   switch (cl)
     {
     case MODE_INT: return MODE_VECTOR_INT;
+    case MODE_PARTIAL_INT: return MODE_VECTOR_PARTIAL_INT;
     case MODE_FLOAT: return MODE_VECTOR_FLOAT;
+    case MODE_CC: return MODE_VECTOR_CC;
     case MODE_FRACT: return MODE_VECTOR_FRACT;
     case MODE_UFRACT: return MODE_VECTOR_UFRACT;
     case MODE_ACCUM: return MODE_VECTOR_ACCUM;
@@ -326,10 +328,15 @@ complete_mode (struct mode_data *m)
 
     case MODE_CC:
       /* Again, nothing more need be said.  For historical reasons,
-	 the size of a CC mode is four units.  */
-      validate_mode (m, UNSET, UNSET, UNSET, UNSET, UNSET);
+	 the size of a CC mode defaults to four units.  */
+      if (m->bytesize != blank_mode.bytesize)
+	validate_mode (m, UNSET, SET, UNSET, UNSET, UNSET);
+      else
+	{
+	  validate_mode (m, UNSET, UNSET, UNSET, UNSET, UNSET);
+	  m->bytesize = 4;
+	}
 
-      m->bytesize = 4;
       m->ncomponents = 1;
       m->component = 0;
       break;
@@ -375,7 +382,9 @@ complete_mode (struct mode_data *m)
       break;
 
     case MODE_VECTOR_INT:
+    case MODE_VECTOR_PARTIAL_INT:
     case MODE_VECTOR_FLOAT:
+    case MODE_VECTOR_CC:
     case MODE_VECTOR_FRACT:
     case MODE_VECTOR_UFRACT:
     case MODE_VECTOR_ACCUM:
@@ -530,12 +539,13 @@ make_vector_modes (enum mode_class cl, unsigned int width,
 #define _SPECIAL_MODE(C, N) make_special_mode(MODE_##C, #N, __FILE__, __LINE__)
 #define RANDOM_MODE(N) _SPECIAL_MODE (RANDOM, N)
 #define CC_MODE(N) _SPECIAL_MODE (CC, N)
+#define SIZED_CC_MODE(N, Y) (CC_MODE (N)->bytesize = (Y))
 
-static void
+static struct mode_data *
 make_special_mode (enum mode_class cl, const char *name,
 		   const char *file, unsigned int line)
 {
-  new_mode (cl, name, file, line);
+  return new_mode (cl, name, file, line);
 }
 
 #define INT_MODE(N, Y) FRACTIONAL_INT_MODE (N, -1U, Y)
@@ -1224,6 +1234,7 @@ emit_mode_adjustments (void)
 	      break;
 
 	    case MODE_VECTOR_INT:
+	    case MODE_VECTOR_PARTIAL_INT:
 	    case MODE_VECTOR_FLOAT:
 	    case MODE_VECTOR_FRACT:
 	    case MODE_VECTOR_UFRACT:
@@ -1262,6 +1273,7 @@ emit_mode_adjustments (void)
 	      break;
 
 	    case MODE_VECTOR_INT:
+	    case MODE_VECTOR_PARTIAL_INT:
 	    case MODE_VECTOR_FLOAT:
 	    case MODE_VECTOR_FRACT:
 	    case MODE_VECTOR_UFRACT:
diff --git a/gcc/genoutput.c b/gcc/genoutput.c
index 601483d8b2f..d678216db0d 100644
--- a/gcc/genoutput.c
+++ b/gcc/genoutput.c
@@ -680,19 +680,55 @@ process_template (struct data *d, const char *template_code)
      list of assembler code templates, one for each alternative.  */
   else if (template_code[0] == '@')
     {
-      d->template_code = 0;
-      d->output_format = INSN_OUTPUT_FORMAT_MULTI;
+      int found_star = 0;
 
-      printf ("\nstatic const char * const output_%d[] = {\n", d->code_number);
+      for (cp = &template_code[1]; *cp; )
+	{
+	  while (ISSPACE (*cp))
+	    cp++;
+	  if (*cp == '*')
+	    found_star = 1;
+	  while (!IS_VSPACE (*cp) && *cp != '\0')
+	    ++cp;
+	}
+      d->template_code = 0;
+      if (found_star)
+	{
+	  d->output_format = INSN_OUTPUT_FORMAT_FUNCTION;
+	  puts ("\nstatic const char *");
+	  printf ("output_%d (rtx *operands ATTRIBUTE_UNUSED, "
+		  "rtx insn ATTRIBUTE_UNUSED)\n", d->code_number);
+	  puts ("{");
+	  puts ("  switch (which_alternative)\n    {");
+	}
+      else
+	{
+	  d->output_format = INSN_OUTPUT_FORMAT_MULTI;
+	  printf ("\nstatic const char * const output_%d[] = {\n",
+		  d->code_number);
+	}
 
       for (i = 0, cp = &template_code[1]; *cp; )
 	{
-	  const char *ep, *sp;
+	  const char *ep, *sp, *bp;
 
 	  while (ISSPACE (*cp))
 	    cp++;
 
-	  printf ("  \"");
+	  bp = cp;
+	  if (found_star)
+	    {
+	      printf ("    case %d:", i);
+	      if (*cp == '*')
+		{
+		  printf ("\n      ");
+		  cp++;
+		}
+	      else
+		printf (" return \"");
+	    }
+	  else
+	    printf ("  \"");
 
 	  for (ep = sp = cp; !IS_VSPACE (*ep) && *ep != '\0'; ++ep)
 	    if (!ISSPACE (*ep))
@@ -708,7 +744,18 @@ process_template (struct data *d, const char *template_code)
 	      cp++;
 	    }
 
-	  printf ("\",\n");
+	  if (!found_star)
+	    puts ("\",");
+	  else if (*bp != '*')
+	    puts ("\";");
+	  else
+	    {
+	      /* The usual action will end with a return.
+		 If there is neither break or return at the end, this is
+		 assumed to be intentional; this allows to have multiple
+		 consecutive alternatives share some code.  */
+	      puts ("");
+	    }
 	  i++;
 	}
       if (i == 1)
@@ -721,7 +768,10 @@ process_template (struct data *d, const char *template_code)
 	  have_error = 1;
 	}
 
-      printf ("};\n");
+      if (found_star)
+	puts ("      default: gcc_unreachable ();\n    }\n}");
+      else
+	printf ("};\n");
     }
   else
     {
@@ -1112,8 +1162,12 @@ strip_whitespace (const char *s)
 /* Record just enough information about a constraint to allow checking
    of operand constraint strings above, in validate_insn_alternatives.
    Does not validate most properties of the constraint itself; does
+   enforce no overlap with MI constraints, and no prefixes.
+   Check for no duplicate names is left to genpreds.c, since only there
+   is enough information to check for overloading.
+   Does not validate most properties of the constraint itself; does
    enforce no duplicate names, no overlap with MI constraints, and no
-   prefixes.  EXP is the define_*constraint form, LINENO the line number
+EXP is the define_*constraint form, LINENO the line number
    reported by the reader.  */
 static void
 note_constraint (rtx exp, int lineno)
@@ -1150,12 +1204,7 @@ note_constraint (rtx exp, int lineno)
 	slot = iter;
 
       if (!strcmp ((*iter)->name, name))
-	{
-	  message_with_line (lineno, "redefinition of constraint '%s'", name);
-	  message_with_line ((*iter)->lineno, "previous definition is here");
-	  have_error = 1;
-	  return;
-	}
+	; /* Ignore here, see more detailed check in genpreds.  */
       else if (!strncmp ((*iter)->name, name, (*iter)->namelen))
 	{
 	  message_with_line (lineno, "defining constraint '%s' here", name);
diff --git a/gcc/genpreds.c b/gcc/genpreds.c
index bf53944e297..7534a7c875e 100644
--- a/gcc/genpreds.c
+++ b/gcc/genpreds.c
@@ -674,6 +674,7 @@ struct constraint_data
   unsigned int is_extra     : 1;
   unsigned int is_memory    : 1;
   unsigned int is_address   : 1;
+  unsigned int is_overloaded : 1; /* Set for all but the first definition.  */
 };
 
 /* Overview of all constraints beginning with a given letter.  */
@@ -757,6 +758,7 @@ add_constraint (const char *name, const char *regclass,
   bool is_const_int;
   bool is_const_dbl;
   size_t namelen;
+  bool is_overloaded = 0;
 
   if (exp && validate_exp (exp, name, lineno))
     return;
@@ -816,10 +818,29 @@ add_constraint (const char *name, const char *regclass,
 
       if (!strcmp ((*iter)->name, name))
 	{
-	  message_with_line (lineno, "redefinition of constraint '%s'", name);
-	  message_with_line ((*iter)->lineno, "previous definition is here");
-	  have_error = 1;
-	  return;
+	  /* An exact match is OK if the purpose is to overload the constraint.
+	   */
+	  if (is_overloaded)
+	    ; /* We've already warned against the 1st definition.  */
+	  else if ((*iter)->is_register != (regclass != 0)
+		   || (*iter)->is_memory != is_memory
+		   || (*iter)->is_address != is_address)
+	    {
+	      message_with_line (lineno,
+				 "overloading of constraint '%s'", name);
+	      message_with_line ((*iter)->lineno,
+				 "previous definition is here");
+	      is_overloaded = 1;
+	    }
+	  else
+	    {
+	      message_with_line (lineno,
+				 "redefinition of constraint '%s'", name);
+	      message_with_line ((*iter)->lineno,
+				 "previous definition is here");
+	      have_error = 1;
+	      return;
+	    }
 	}
       else if (!strncmp ((*iter)->name, name, (*iter)->namelen))
 	{
@@ -910,6 +931,7 @@ add_constraint (const char *name, const char *regclass,
   c->is_extra = !(regclass || is_const_int || is_const_dbl);
   c->is_memory = is_memory;
   c->is_address = is_address;
+  c->is_overloaded = is_overloaded;
 
   c->next_this_letter = *slot;
   *slot = c;
@@ -958,7 +980,8 @@ write_enum_constraint_num (void)
 	 "{\n"
 	 "  CONSTRAINT__UNKNOWN = 0", stdout);
   FOR_ALL_CONSTRAINTS (c)
-    printf (",\n  CONSTRAINT_%s", c->c_name);
+    if (!c->is_overloaded)
+      printf (",\n  CONSTRAINT_%s", c->c_name);
   puts ("\n};\n");
 }
 
@@ -987,9 +1010,10 @@ write_lookup_constraint (void)
 	{
 	  do
 	    {
-	      printf ("      if (!strncmp (str, \"%s\", %lu))\n"
-		      "        return CONSTRAINT_%s;\n",
-		      c->name, (unsigned long int) c->namelen, c->c_name);
+	      if (!c->is_overloaded)
+		printf ("      if (!strncmp (str, \"%s\", %lu))\n"
+			"        return CONSTRAINT_%s;\n",
+			c->name, (unsigned long int) c->namelen, c->c_name);
 	      c = c->next_this_letter;
 	    }
 	  while (c);
diff --git a/gcc/genrecog.c b/gcc/genrecog.c
index 70ab87745d3..b950566f4ef 100644
--- a/gcc/genrecog.c
+++ b/gcc/genrecog.c
@@ -791,8 +791,10 @@ validate_pattern (rtx pattern, rtx insn, rtx set, int set_code)
 	else if (dmode != smode
 		 && GET_CODE (dest) != PC
 		 && GET_CODE (dest) != CC0
+		 && GET_MODE_CLASS (dmode) != MODE_CC
 		 && GET_CODE (src) != PC
 		 && GET_CODE (src) != CC0
+		 && GET_MODE_CLASS (smode) != MODE_CC
 		 && GET_CODE (src) != CONST_INT)
 	  {
 	    const char *which;
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 01b2fbef6b7..2f33e09dedd 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -3610,7 +3610,7 @@ gimplify_init_constructor (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	    else
 	      align = TYPE_ALIGN (type);
 
-	    if (size > 0 && !can_move_by_pieces (size, align))
+	    if (size > 0 && !can_move_by_pieces (size, align, 1))
 	      {
 		tree new_tree;
 
diff --git a/gcc/global.c b/gcc/global.c
index 824fcf0f702..643b66d3f21 100644
--- a/gcc/global.c
+++ b/gcc/global.c
@@ -1048,6 +1048,7 @@ find_reg (int num, HARD_REG_SET losers, int alt_regs_p, int accept_call_clobbere
 #endif
 	  if (! TEST_HARD_REG_BIT (used, regno)
 	      && HARD_REGNO_MODE_OK (regno, mode)
+	      && (!alt_regs_p || !DONT_REALLOC (regno, mode))
 	      && (allocno[num].calls_crossed == 0
 		  || accept_call_clobbered
 		  || ! HARD_REGNO_CALL_PART_CLOBBERED (regno, mode)))
@@ -1223,6 +1224,16 @@ find_reg (int num, HARD_REG_SET losers, int alt_regs_p, int accept_call_clobbere
 	      && ! invalid_mode_change_p (regno, REGNO_REG_CLASS (regno),
 					  mode)
 #endif
+/* Some registers are just stupid to reallocate based on frequency alone.
+   E.g. floating point registers for integer use, integer registers for
+   floating point use, and dedicated loop count registers for any
+   non-loop-count use.
+   Since this code does not honor the cost implications of using the wrong
+   class, we need another way to prevent this.  STACK_REGS is the x86 specific
+   hack for it...  */
+#ifdef DONT_REALLOOC
+	     && !DONT_REALLOC (regno, mode)
+#endif
 #ifdef STACK_REGS
 	     && (!allocno[num].no_stack_reg
 		 || regno < FIRST_STACK_REG || regno > LAST_STACK_REG)
diff --git a/gcc/hooks.c b/gcc/hooks.c
index 04dbd3eab2a..3bf0c929b14 100644
--- a/gcc/hooks.c
+++ b/gcc/hooks.c
@@ -85,6 +85,14 @@ hook_bool_mode_const_rtx_true (enum machine_mode mode ATTRIBUTE_UNUSED,
   return true;
 }
 
+/* Generic hook that takes (rtx, rtx) and returns true.  */
+bool
+hook_bool_const_rtx_const_rtx_true (const_rtx follower ATTRIBUTE_UNUSED,
+				    const_rtx followee ATTRIBUTE_UNUSED)
+{
+  return true;
+}
+
 /* Generic hook that takes (FILE *, const char *) and does nothing.  */
 void
 hook_void_FILEptr_constcharptr (FILE *a ATTRIBUTE_UNUSED, const char *b ATTRIBUTE_UNUSED)
diff --git a/gcc/hooks.h b/gcc/hooks.h
index 9d7e56a3589..48fe34c1f82 100644
--- a/gcc/hooks.h
+++ b/gcc/hooks.h
@@ -30,6 +30,7 @@ extern bool hook_bool_bool_false (bool);
 extern bool hook_bool_mode_false (enum machine_mode);
 extern bool hook_bool_mode_const_rtx_false (enum machine_mode, const_rtx);
 extern bool hook_bool_mode_const_rtx_true (enum machine_mode, const_rtx);
+extern bool hook_bool_const_rtx_const_rtx_true (const_rtx, const_rtx);
 extern bool hook_bool_tree_false (tree);
 extern bool hook_bool_const_tree_false (const_tree);
 extern bool hook_bool_tree_true (tree);
diff --git a/gcc/libgcc2.c b/gcc/libgcc2.c
index 5a82f82f7cd..1a8eedaae06 100644
--- a/gcc/libgcc2.c
+++ b/gcc/libgcc2.c
@@ -523,8 +523,11 @@ __ffsSI2 (UWtype u)
 {
   UWtype count;
 
-  if (u == 0)
-    return 0;
+#ifdef COUNT_TRAILING_ZEROS_0
+  if (COUNT_TRAILING_ZEROS_0 != -1)
+#endif
+    if (u == 0)
+      return 0;
 
   count_trailing_zeros (count, u);
   return count + 1;
diff --git a/gcc/longlong.h b/gcc/longlong.h
index 45a95c40eb5..fe9d6f4de93 100644
--- a/gcc/longlong.h
+++ b/gcc/longlong.h
@@ -173,6 +173,29 @@ extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
 	     "rIJ" ((USItype) (bh)),					\
 	     "r" ((USItype) (al)),					\
 	     "rIJ" ((USItype) (bl)))
+#ifdef __ARC_NORM__
+#define count_leading_zeros(count, x) \
+  do									\
+    {									\
+      SItype c_;							\
+									\
+      __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\
+      (count) = c_ + 1;							\
+    }									\
+  while (0)
+#define COUNT_LEADING_ZEROS_0 32
+#endif
+#ifdef __ARC700__
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ (								\
+       "mpyu\t%1,%2,%3\n\tmpyhu\t%0,%2,%3"				\
+	   : "=r" ((USItype)(w1)),					\
+	     "=r" ((USItype)(w0))					\
+	   : "r" ((USItype)(u)),					\
+	     "r" ((USItype)(v)))
+#define UMUL_TIME 7
+#define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v)
+#else /* ! __ARC700__  */
 /* Call libgcc routine.  */
 #define umul_ppmm(w1, w0, u, v) \
 do {									\
@@ -184,6 +207,7 @@ do {									\
 #define __umulsidi3 __umulsidi3
 UDItype __umulsidi3 (USItype, USItype);
 #endif
+#endif
 
 #if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
@@ -1451,7 +1475,10 @@ UDItype __umulsidi3 (USItype, USItype);
     count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x);			\
     (count) = W_TYPE_SIZE - 1 - __ctz_c;				\
   } while (0)
-#endif
+#ifdef COUNT_LEADING_ZEROS_0
+#define COUNT_TRAILING_ZEROS_0 (W_TYPE_SIZE - 1 - COUNT_LEADING_ZEROS_0)
+#endif /* COUNT_LEADING_ZEROS_0 */
+#endif /* !defined (count_trailing_zeros) */
 
 #ifndef UDIV_NEEDS_NORMALIZATION
 #define UDIV_NEEDS_NORMALIZATION 0
diff --git a/gcc/loop-doloop.c b/gcc/loop-doloop.c
index 1f5856f581b..684df4f0839 100644
--- a/gcc/loop-doloop.c
+++ b/gcc/loop-doloop.c
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "output.h"
 #include "params.h"
 #include "target.h"
+#include "optabs.h"
 
 /* This module is used to modify loops with a determinable number of
    iterations to use special low-overhead looping instructions.
@@ -194,22 +195,92 @@ doloop_condition_get (rtx doloop_pat)
   return 0;
 }
 
-/* Return nonzero if the loop specified by LOOP is suitable for
-   the use of special low-overhead looping instructions.  DESC
-   describes the number of iterations of the loop.  */
+static bool add_test (rtx cond, edge *e, basic_block dest, edge *);
 
-static bool
-doloop_valid_p (struct loop *loop, struct niter_desc *desc)
+/* Check if the loop specified by LOOP is suitable for
+   the use of special low-overhead looping instructions.
+   If necessary to properly implement infinite loops, this may cause
+   a new enclosing loop to be formed.  Returns the (possible changed)
+   loop structure pointer on success, else NULL.
+   DESC describes the number of iterations of the loop.  */
+
+static struct loop *
+validize_doloop (struct loop *loop, struct niter_desc *desc)
 {
   basic_block *body = get_loop_body (loop), bb;
   rtx insn;
   unsigned i;
   bool result = true;
+  rtx list;
+  edge out_edge;
 
   /* Check for loops that may not terminate under special conditions.  */
   if (!desc->simple_p
       || desc->assumptions
-      || desc->infinite)
+      || (desc->infinite
+	  && (EDGE_COUNT (loop->latch->preds) != 1
+	      || !optimize_loop_for_speed_p (loop))))
+    result = false;
+  if (desc->infinite)
+    {
+      edge e, latch_in;
+      edge_iterator ei;
+      rtx insn;
+
+      /* We want to set out_edge to the edge that is used to exit the loop
+	 if the loop count is exhausted.  For now, only handle the case
+	 of a single exit.  */
+      out_edge = NULL;
+      if (single_pred_p (loop->latch))
+	{
+	  latch_in = single_pred_edge (loop->latch);
+	  FOR_EACH_EDGE (e, ei, latch_in->src->succs)
+	    if (e == latch_in)
+	      ; /* do nothing */
+	    else if (!out_edge)
+	      out_edge = e;
+	    else
+	      result = false;
+	}
+      if (!out_edge)
+	result = false;
+      else if (dump_file)
+	fprintf (dump_file, "Doloop: considering putting infinite loop"
+		 " instructions on edge from %d to %d.\n",
+		 out_edge->src->index, out_edge->dest->index);
+      /* The (non-jump) instructions in the current loop latch shoould be
+	 copied int othe new loop latch cf. gcc.c-torture/execute/pr27285.c .
+	 For now, just punt when we see any insns in the latch.  */
+      FOR_BB_INSNS (loop->latch, insn)
+	if (NONJUMP_INSN_P (insn))
+	  {
+	    result = false;
+	    break;
+	  }
+	 
+    }
+  /* check_simple_exit can create conditions that do_compare_and_jump_rtx
+     can't grok.  */
+  for (list = desc->infinite; list; list = XEXP (list, 1))
+    {
+      rtx cond = XEXP (list, 0);
+      enum machine_mode mode;
+
+      if (!BINARY_P (cond))
+	{
+	  result = false;
+	  break;
+	}
+      mode = GET_MODE (XEXP (cond, 0));
+      if (mode == VOIDmode)
+	mode = GET_MODE (XEXP (cond, 1));
+
+      if (GET_MODE_CLASS (mode) == MODE_INT
+	  && !can_compare_p (GET_CODE (cond), mode, ccp_jump)
+	  && !COMPARISON_P (cond))
+	result = false;
+    }
+  if (!result)
     {
       /* There are some cases that would require a special attention.
 	 For example if the comparison is LEU and the comparison value
@@ -261,27 +332,84 @@ doloop_valid_p (struct loop *loop, struct niter_desc *desc)
 	    }
 	}
     }
-  result = true;
+  if (desc->infinite)
+    {
+      basic_block header = loop->header;
+      basic_block latch;
+      struct loop *new_loop;
+
+      gcc_assert (loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS));
+	{
+	  edge latch_edge = single_succ_edge (loop->latch);
+	  edge in_edge;
+
+	  gcc_assert (EDGE_COUNT (header->preds) == 2);
+	  gcc_assert (latch_edge->dest == header);
+	  if (dump_file)
+	    fprintf (dump_file,
+		     "Doloop: infinite loop generation: latch %d header %d\n",
+		      loop->latch->index, header->index);
+	  in_edge = EDGE_PRED (header, 0);
+	  if (in_edge == latch_edge)
+	    in_edge = EDGE_PRED (header, 1);
+	  else
+	    gcc_assert (latch_edge == EDGE_PRED (header, 1));
+	  gcc_assert (in_edge != out_edge);
+	  header = split_edge (in_edge);
+	  set_immediate_dominator (CDI_DOMINATORS, loop->header, header);
+	  remove_bb_from_loops (header);
+	  add_bb_to_loop (header, loop);
+#if 0 /* For debugging, insert a marker insn.  */
+	  emit_insn_after (gen_unimp_s (GEN_INT (1)), BB_END (header));
+#endif
+	}
+      for (latch = header, list = desc->infinite; list; list = XEXP (list, 1))
+	{
+	  edge new_latch_edge = out_edge;
+
+	  add_test (XEXP (list, 0), &out_edge, latch, &new_latch_edge);
+	  remove_bb_from_loops (out_edge->src);
+	  add_bb_to_loop (out_edge->src, loop);
+	  if (latch == header)
+	    {
+	      latch = split_edge (new_latch_edge);
+	      remove_bb_from_loops (latch);
+	      add_bb_to_loop (latch, loop);
+#if 0 /* For debugging, insert a marker insn.  */
+	      emit_insn_after (gen_trap_s (GEN_INT (42)), BB_END (latch));
+#endif
+	    }
+	}
+      new_loop = alloc_loop ();
+      new_loop->header = loop->header;
+      new_loop->latch = loop->latch;
+      loop->header = header;
+      loop->latch = latch;
+      add_loop (new_loop, loop);
+      loop = new_loop;
+    }
 
 cleanup:
   free (body);
 
-  return result;
+  return result ? loop : 0;
 }
 
 /* Adds test of COND jumping to DEST on edge *E and set *E to the new fallthru
    edge.  If the condition is always false, do not do anything.  If it is always
    true, redirect E to DEST and return false.  In all other cases, true is
-   returned.  */
+   returned.
+   If EDGEP is non-null, assign the any newly created edge to it.  */
 
 static bool
-add_test (rtx cond, edge *e, basic_block dest)
+add_test (rtx cond, edge *e, basic_block dest, edge *edgep)
 {
   rtx seq, jump, label;
   enum machine_mode mode;
   rtx op0 = XEXP (cond, 0), op1 = XEXP (cond, 1);
   enum rtx_code code = GET_CODE (cond);
   basic_block bb;
+  edge new_edge;
 
   mode = GET_MODE (XEXP (cond, 0));
   if (mode == VOIDmode)
@@ -325,7 +453,9 @@ add_test (rtx cond, edge *e, basic_block dest)
 
   LABEL_NUSES (label)++;
 
-  make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
+  new_edge = make_edge (bb, dest, (*e)->flags & ~EDGE_FALLTHRU);
+  if (edgep)
+    *edgep = new_edge;
   return true;
 }
 
@@ -448,7 +578,7 @@ doloop_modify (struct loop *loop, struct niter_desc *desc,
 
       te = single_succ_edge (preheader);
       for (; ass; ass = XEXP (ass, 1))
-	if (!add_test (XEXP (ass, 0), &te, set_zero))
+	if (!add_test (XEXP (ass, 0), &te, set_zero, NULL))
 	  break;
 
       if (ass)
@@ -497,7 +627,8 @@ doloop_modify (struct loop *loop, struct niter_desc *desc,
     init = gen_doloop_begin (counter_reg,
 			     desc->const_iter ? desc->niter_expr : const0_rtx,
 			     GEN_INT (desc->niter_max),
-			     GEN_INT (level));
+			     GEN_INT (level),
+			     doloop_seq);
     if (init)
       {
 	start_sequence ();
@@ -555,6 +686,7 @@ doloop_optimize (struct loop *loop)
   unsigned word_mode_size;
   unsigned HOST_WIDE_INT word_mode_max;
   bool zero_extend_p = false;
+  int entered_at_top;
 
   if (dump_file)
     fprintf (dump_file, "Doloop: Processing loop %d.\n", loop->num);
@@ -565,7 +697,8 @@ doloop_optimize (struct loop *loop)
   desc = get_simple_loop_desc (loop);
 
   /* Check that loop is a candidate for a low-overhead looping insn.  */
-  if (!doloop_valid_p (loop, desc))
+  loop = validize_doloop (loop, desc);
+  if (!loop)
     {
       if (dump_file)
 	fprintf (dump_file,
@@ -613,8 +746,10 @@ doloop_optimize (struct loop *loop)
      not like.  */
   start_label = block_label (desc->in_edge->dest);
   doloop_reg = gen_reg_rtx (mode);
+  entered_at_top = loop_preheader_edge (loop)->dest == desc->in_edge->dest;
   doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
-			       GEN_INT (level), start_label);
+			       GEN_INT (level), start_label,
+			       GEN_INT (entered_at_top));
 
   word_mode_size = GET_MODE_BITSIZE (word_mode);
   word_mode_max
@@ -643,7 +778,8 @@ doloop_optimize (struct loop *loop)
 	}
       PUT_MODE (doloop_reg, word_mode);
       doloop_seq = gen_doloop_end (doloop_reg, iterations, iterations_max,
-				   GEN_INT (level), start_label);
+				   GEN_INT (level), start_label,
+				   GEN_INT (entered_at_top));
     }
   if (! doloop_seq)
     {
diff --git a/gcc/loop-iv.c b/gcc/loop-iv.c
index 3723dbd463a..3b3696c1cf4 100644
--- a/gcc/loop-iv.c
+++ b/gcc/loop-iv.c
@@ -2799,7 +2799,9 @@ get_simple_loop_desc (struct loop *loop)
   if (desc)
     return desc;
 
-  desc = XNEW (struct niter_desc);
+  /* At least desc->infinite is not always initialized by
+     find_simple_loop_exit.  */
+  desc = XCNEW (struct niter_desc);
   iv_analysis_loop_init (loop);
   find_simple_exit (loop, desc);
   loop->aux = desc;
diff --git a/gcc/mode-classes.def b/gcc/mode-classes.def
index 83017ec2533..4f472ada791 100644
--- a/gcc/mode-classes.def
+++ b/gcc/mode-classes.def
@@ -32,8 +32,10 @@ along with GCC; see the file COPYING3.  If not see
   DEF_MODE_CLASS (MODE_COMPLEX_INT), 	/* complex numbers */		   \
   DEF_MODE_CLASS (MODE_COMPLEX_FLOAT),					   \
   DEF_MODE_CLASS (MODE_VECTOR_INT),	/* SIMD vectors */		   \
+  DEF_MODE_CLASS (MODE_VECTOR_PARTIAL_INT),				   \
   DEF_MODE_CLASS (MODE_VECTOR_FRACT),	/* SIMD vectors */		   \
   DEF_MODE_CLASS (MODE_VECTOR_UFRACT),	/* SIMD vectors */		   \
   DEF_MODE_CLASS (MODE_VECTOR_ACCUM),	/* SIMD vectors */		   \
   DEF_MODE_CLASS (MODE_VECTOR_UACCUM),	/* SIMD vectors */		   \
-  DEF_MODE_CLASS (MODE_VECTOR_FLOAT)
+  DEF_MODE_CLASS (MODE_VECTOR_FLOAT),					   \
+  DEF_MODE_CLASS (MODE_VECTOR_CC)
diff --git a/gcc/opts.c b/gcc/opts.c
index a87fb498b15..b3e05cd2b62 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -930,7 +930,7 @@ decode_options (unsigned int argc, const char **argv)
   flag_tree_vrp = opt2;
   flag_tree_builtin_call_dce = opt2;
   flag_tree_pre = opt2;
-  flag_tree_switch_conversion = 1;
+  flag_tree_switch_conversion = opt2;
   flag_ipa_cp = opt2;
 
   /* Allow more virtual operators to increase alias precision.  */
@@ -950,6 +950,7 @@ decode_options (unsigned int argc, const char **argv)
   flag_gcse_after_reload = opt3;
   flag_tree_vectorize = opt3;
   flag_ipa_cp_clone = opt3;
+  flag_tree_pre_partial_partial = opt3;
   if (flag_ipa_cp_clone)
     flag_ipa_cp = 1;
 
@@ -973,10 +974,13 @@ decode_options (unsigned int argc, const char **argv)
 	 being declared inline.  */
       flag_inline_functions = 1;
 
-      /* Basic optimization options.  */
-      optimize_size = 1;
+      /* Basic optimization options at -Os are almost the same as -O2.  The
+	 only difference is that we disable PRE, because it sometimes still
+	 increases code size.  If the user want to run PRE with -Os, he/she
+	 will have to indicate so explicitly.  */
       if (optimize > 2)
 	optimize = 2;
+      flag_tree_pre = 0;
 
       /* We want to crossjump as much as possible.  */
       set_param_value ("min-crossjump-insns", 1);
diff --git a/gcc/output.h b/gcc/output.h
index 79b628fd338..8874a9ba3a2 100644
--- a/gcc/output.h
+++ b/gcc/output.h
@@ -491,8 +491,13 @@ struct named_section GTY(()) {
    section.  The argument provides callback-specific data.  */
 typedef void (*unnamed_section_callback) (const void *);
 
-/* Information about a SECTION_UNNAMED section.  */
-struct unnamed_section GTY(()) {
+/* Information about a SECTION_UNNAMED section.
+   WARNING: this struct is unsuitable for garbage collection, because
+   the DATA member can point to malloced memory, which will change between
+   a pch-generating and a pch-using compilation, and the callback member
+   points to a function, which can change between a pch-generating and a
+   pch-using compilation when address space randomization is in effect.  */
+struct unnamed_section GTY((skip)) {
   struct section_common common;
 
   /* The callback used to switch to the section, and the data that
@@ -530,8 +535,8 @@ union section GTY ((desc ("SECTION_STYLE (&(%h))")))
 {
   struct section_common GTY ((skip)) common;
   struct named_section GTY ((tag ("SECTION_NAMED"))) named;
-  struct unnamed_section GTY ((tag ("SECTION_UNNAMED"))) unnamed;
-  struct noswitch_section GTY ((tag ("SECTION_NOSWITCH"))) noswitch;
+  struct unnamed_section GTY ((tag ("SECTION_UNNAMED"),skip)) unnamed;
+  struct noswitch_section GTY ((tag ("SECTION_NOSWITCH"),skip)) noswitch;
 };
 
 /* Return the style of section SECT.  */
@@ -540,22 +545,23 @@ union section GTY ((desc ("SECTION_STYLE (&(%h))")))
 struct object_block;
 
 /* Special well-known sections.  */
-extern GTY(()) section *text_section;
-extern GTY(()) section *data_section;
-extern GTY(()) section *readonly_data_section;
-extern GTY(()) section *sdata_section;
-extern GTY(()) section *ctors_section;
-extern GTY(()) section *dtors_section;
-extern GTY(()) section *bss_section;
-extern GTY(()) section *sbss_section;
+/* Don't GTY the unnamed / noswitch sections, see PR31634.  */
+extern /* unnamed */ section *text_section;
+extern /* unnamed */ section *data_section;
+extern /* unnamed */ section *readonly_data_section;
+extern /* unnamed */ section *sdata_section;
+extern /* unnamed */ section *ctors_section;
+extern /* unnamed */ section *dtors_section;
+extern /* unnamed */ section *bss_section;
+extern /* unnamed */ section *sbss_section;
 extern GTY(()) section *exception_section;
 extern GTY(()) section *eh_frame_section;
-extern GTY(()) section *tls_comm_section;
-extern GTY(()) section *comm_section;
-extern GTY(()) section *lcomm_section;
-extern GTY(()) section *bss_noswitch_section;
+extern /* noswitch */ section *tls_comm_section;
+extern /* noswitch */ section *comm_section;
+extern /* noswitch */ section *lcomm_section;
+extern /* noswitch */ section *bss_noswitch_section;
 
-extern GTY(()) section *in_section;
+extern /* unknown */ section *in_section;
 extern GTY(()) bool in_cold_section_p;
 
 extern section *get_unnamed_section (unsigned int, void (*) (const void *),
@@ -606,6 +612,8 @@ extern section *default_select_rtx_section (enum machine_mode, rtx,
 extern section *default_elf_select_rtx_section (enum machine_mode, rtx,
 						unsigned HOST_WIDE_INT);
 extern void default_encode_section_info (tree, rtx, int);
+extern void pickle_in_section (void);
+extern void unpickle_in_section (void);
 extern const char *default_strip_name_encoding (const char *);
 extern void default_asm_output_anchor (rtx);
 extern bool default_use_anchors_for_symbol_p (const_rtx);
diff --git a/gcc/params.def b/gcc/params.def
index ea3015b3640..cc170e774dd 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -282,6 +282,15 @@ DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_INSNS,
 	"max-completely-peeled-insns",
 	"The maximum number of insns of a completely peeled loop",
 	400, 0, 0)
+/* Completely peeling a loop in couter-productive if that leads to
+   increased icache misses.  Therefore, we want there to be an outer
+   loop of a size that fits into the icache which rolls sufficiently
+   to amortize the icache misses when the peeled loop is fetched.  */
+/* The maximum number of insns in the outer loop of a peeled loop.  */
+DEFPARAM(PARAM_MAX_COMPLETELY_PEELED_OUTER_INSNS,
+	"max-completely-peeled-outer-insns",
+	"The maximum number of insns of the outer loopp of a completely peeled loop",
+	4000, 0, 0)
 /* The maximum number of peelings of a single loop that is peeled completely.  */
 DEFPARAM(PARAM_MAX_COMPLETELY_PEEL_TIMES,
 	"max-completely-peel-times",
diff --git a/gcc/postreload.c b/gcc/postreload.c
index 8abc90f83d9..3d4451c8a84 100644
--- a/gcc/postreload.c
+++ b/gcc/postreload.c
@@ -906,6 +906,42 @@ reload_combine (void)
 		}
 	    }
 	}
+      /* Look for (set (REGX) (CONST A))
+	 ... (MEM (PLUS (REGX) (const_int)))...
+         and convert it to
+         ... (MEM (CONST B))... .  */
+      if (set != NULL_RTX
+	  && REG_P (SET_DEST (set))
+	  && (hard_regno_nregs[REGNO (SET_DEST (set))]
+			      [GET_MODE (SET_DEST (set))]
+	      == 1)
+	  && CONSTANT_P (SET_SRC (set))
+	  && last_label_ruid < reg_state[REGNO (SET_DEST (set))].use_ruid
+	  /* One use maximum - otherwise we'd de-cse.  */
+	  && (reg_state[REGNO (SET_DEST (set))].use_index
+	      == RELOAD_COMBINE_MAX_USES - 1))
+	{
+	  rtx reg = SET_DEST (set);
+	  unsigned int regno = REGNO (reg);
+	  rtx sum
+	    = plus_constant (SET_SRC (set), INTVAL (reg_state[regno].offset));
+
+	  if (GET_CODE (sum) == PLUS)
+	    sum = gen_rtx_CONST (Pmode, sum);
+	   i = RELOAD_COMBINE_MAX_USES - 1;
+	  /* If we wanted to handle JUMP_INSNS, we'd have to fix up JUMP_LABEL.
+	     (e.g. pr21728.c -Os).  Doesn't seem worth the hassle.  */
+	  if (!JUMP_P (reg_state[regno].reg_use[i].insn)
+	      && validate_change (reg_state[regno].reg_use[i].insn,
+				  reg_state[regno].reg_use[i].usep, sum, 0))
+	    {
+	      /* Delete the reg set.  */
+	      delete_insn (insn);
+
+	      reg_state[regno].use_index = RELOAD_COMBINE_MAX_USES;
+	      continue;
+	    }
+	}
 
       note_stores (PATTERN (insn), reload_combine_note_store, NULL);
 
diff --git a/gcc/read-rtl.c b/gcc/read-rtl.c
index 902e1f6b7da..9ee508dbe5d 100644
--- a/gcc/read-rtl.c
+++ b/gcc/read-rtl.c
@@ -1593,7 +1593,7 @@ read_rtx_1 (FILE *infile, struct map_value **mode_maps)
 
 	  /* Add expressions to a list, while keeping a count.  */
 	  obstack_init (&vector_stack);
-	  while ((c = read_skip_spaces (infile)) && c != ']')
+	  while ((c = read_skip_spaces (infile)) && c != ']' && c != EOF)
 	    {
 	      ungetc (c, infile);
 	      list_counter++;
diff --git a/gcc/regclass.c b/gcc/regclass.c
index b12d4168506..999c51ccea7 100644
--- a/gcc/regclass.c
+++ b/gcc/regclass.c
@@ -946,7 +946,7 @@ struct reg_pref
 {
   /* (enum reg_class) prefclass is the preferred class.  May be
      NO_REGS if no class is better than memory.  */
-  char prefclass;
+  short prefclass;
 
   /* altclass is a register class that we should use for allocating
      pseudo if no register in the preferred class is available.
@@ -955,7 +955,7 @@ struct reg_pref
      It might appear to be more general to have a bitmask of classes here,
      but since it is recommended that there be a class corresponding to the
      union of most major pair of classes, that generality is not required.  */
-  char altclass;
+  short altclass;
 };
 
 /* Record the cost of each class for each pseudo.  */
diff --git a/gcc/regmove.c b/gcc/regmove.c
index 12b93fc2ff8..8a8409c00fe 100644
--- a/gcc/regmove.c
+++ b/gcc/regmove.c
@@ -222,9 +222,22 @@ try_auto_increment (rtx insn, rtx inc_insn, rtx inc_insn_set, rtx reg,
 static rtx
 discover_flags_reg (void)
 {
-  rtx tmp;
-  tmp = gen_rtx_REG (word_mode, 10000);
-  tmp = gen_add3_insn (tmp, tmp, const2_rtx);
+  rtx tmp0, tmp = NULL_RTX;
+  enum machine_mode mode;
+
+  /* mxp has 128 wide words, but the widest scalar it can add is 32 bit,
+     and the widest scalar for which gen_add3_insn succeeds is 16 bit.  */
+  for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
+       mode != VOIDmode && GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
+       mode = GET_MODE_WIDER_MODE (mode))
+    {
+      tmp0 = gen_rtx_REG (mode, 10000);
+      tmp0 = gen_add3_insn (tmp0, tmp0, const2_rtx);
+      if (tmp0)
+	tmp = tmp0;
+    }
+  if (!tmp)
+    return pc_rtx;
 
   /* If we get something that isn't a simple set, or a
      [(set ..) (clobber ..)], this whole function will go wrong.  */
diff --git a/gcc/reload.c b/gcc/reload.c
index e353c50acdb..074d4732224 100644
--- a/gcc/reload.c
+++ b/gcc/reload.c
@@ -916,6 +916,8 @@ push_reload (rtx in, rtx out, rtx *inloc, rtx *outloc,
   enum insn_code secondary_in_icode = CODE_FOR_nothing;
   enum insn_code secondary_out_icode = CODE_FOR_nothing;
 
+  if (in != 0 && !out && targetm.preserve_reload_p (in))
+    type = RELOAD_OTHER;
   /* INMODE and/or OUTMODE could be VOIDmode if no mode
      has been specified for the operand.  In that case,
      use the operand's mode as the mode to reload.  */
diff --git a/gcc/reorg.c b/gcc/reorg.c
index 97570e858dd..8d681685d69 100644
--- a/gcc/reorg.c
+++ b/gcc/reorg.c
@@ -2494,21 +2494,25 @@ fill_simple_delay_slots (int non_jumps_p)
 #endif
 }
 
-/* Follow any unconditional jump at LABEL;
+/* Follow any unconditional jump at LABEL, for the purpose of refirecting JUMP;
    return the ultimate label reached by any such chain of jumps.
    Return null if the chain ultimately leads to a return instruction.
    If LABEL is not followed by a jump, return LABEL.
    If the chain loops or we can't find end, return LABEL,
-   since that tells caller to avoid changing the insn.  */
+   since that tells caller to avoid changing the insn.
+   If the returned label is obtained by following a REG_CROSSING_JUMP
+   jump, set *cp to (one of) the note(s), otherwise set it to NULL_RTX.  */
 
 static rtx
-follow_jumps (rtx label)
+follow_jumps (rtx label, rtx jump, rtx *cp)
 {
   rtx insn;
   rtx next;
   rtx value = label;
   int depth;
+  rtx crossing = NULL_RTX;
 
+  *cp = 0;
   for (depth = 0;
        (depth < 10
 	&& (insn = next_active_insn (value)) != 0
@@ -2531,10 +2535,15 @@ follow_jumps (rtx label)
 		  || GET_CODE (PATTERN (tem)) == ADDR_DIFF_VEC))
 	break;
 
+      if (!targetm.can_follow_jump (jump, insn))
+	break;
+      if (!crossing)
+	crossing = find_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX);
       value = JUMP_LABEL (insn);
     }
   if (depth == 10)
     return label;
+  *cp = crossing;
   return value;
 }
 
@@ -2974,6 +2983,7 @@ fill_slots_from_thread (rtx insn, rtx condition, rtx thread,
   if (new_thread != thread)
     {
       rtx label;
+      rtx crossing = NULL_RTX;
 
       gcc_assert (thread_if_true);
 
@@ -2983,7 +2993,7 @@ fill_slots_from_thread (rtx insn, rtx condition, rtx thread,
 	  && redirect_with_delay_list_safe_p (insn,
 					      JUMP_LABEL (new_thread),
 					      delay_list))
-	new_thread = follow_jumps (JUMP_LABEL (new_thread));
+	new_thread = follow_jumps (JUMP_LABEL (new_thread), insn, &crossing);
 
       if (new_thread == 0)
 	label = find_end_label ();
@@ -2993,7 +3003,11 @@ fill_slots_from_thread (rtx insn, rtx condition, rtx thread,
 	label = get_label_before (new_thread);
 
       if (label)
-	reorg_redirect_jump (insn, label);
+	{
+	  reorg_redirect_jump (insn, label);
+	  if (crossing)
+	    set_unique_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX);
+	}
     }
 
   return delay_list;
@@ -3327,6 +3341,7 @@ relax_delay_slots (rtx first)
   for (insn = first; insn; insn = next)
     {
       rtx other;
+      rtx crossing;
 
       next = next_active_insn (insn);
 
@@ -3337,7 +3352,9 @@ relax_delay_slots (rtx first)
 	  && (condjump_p (insn) || condjump_in_parallel_p (insn))
 	  && (target_label = JUMP_LABEL (insn)) != 0)
 	{
-	  target_label = skip_consecutive_labels (follow_jumps (target_label));
+	  target_label
+	    = skip_consecutive_labels (follow_jumps (target_label, insn,
+						     &crossing));
 	  if (target_label == 0)
 	    target_label = find_end_label ();
 
@@ -3349,7 +3366,11 @@ relax_delay_slots (rtx first)
 	    }
 
 	  if (target_label && target_label != JUMP_LABEL (insn))
-	    reorg_redirect_jump (insn, target_label);
+	    {
+	      reorg_redirect_jump (insn, target_label);
+	      if (crossing)
+		set_unique_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX);
+	    }
 
 	  /* See if this jump conditionally branches around an unconditional
 	     jump.  If so, invert this jump and point it to the target of the
@@ -3486,7 +3507,11 @@ relax_delay_slots (rtx first)
 	{
 	  /* If this jump goes to another unconditional jump, thread it, but
 	     don't convert a jump into a RETURN here.  */
-	  trial = skip_consecutive_labels (follow_jumps (target_label));
+	  rtx crossing;
+
+	  trial
+	    = skip_consecutive_labels (follow_jumps (target_label, delay_insn,
+						     &crossing));
 	  if (trial == 0)
 	    trial = find_end_label ();
 
@@ -3495,6 +3520,8 @@ relax_delay_slots (rtx first)
 	    {
 	      reorg_redirect_jump (delay_insn, trial);
 	      target_label = trial;
+	      if (crossing)
+		set_unique_reg_note (insn, REG_CROSSING_JUMP, NULL_RTX);
 	    }
 
 	  /* If the first insn at TARGET_LABEL is redundant with a previous
diff --git a/gcc/resource.c b/gcc/resource.c
index 009fb6e609b..69ae5ecfa81 100644
--- a/gcc/resource.c
+++ b/gcc/resource.c
@@ -530,26 +530,40 @@ find_dead_or_set_registers (rtx target, struct resources *res,
 		     filled by instructions from the target.  This is correct
 		     if the branch is not taken.  Since we are following both
 		     paths from the branch, we must also compute correct info
-		     if the branch is taken.  We do this by inverting all of
-		     the INSN_FROM_TARGET_P bits, calling mark_set_resources,
-		     and then inverting the INSN_FROM_TARGET_P bits again.  */
+		     if the branch is taken.  We do this by inlining the loop
+		     for processing the sequence, and inverting the sense of
+		     the INSN_FROM_TARGET_P test for the target.  */
 
 		  if (GET_CODE (PATTERN (insn)) == SEQUENCE
 		      && INSN_ANNULLED_BRANCH_P (this_jump_insn))
 		    {
-		      for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
-			INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i))
-			  = ! INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i));
+		      rtx x = PATTERN (insn);
+		      int i;
 
 		      target_set = set;
-		      mark_set_resources (insn, &target_set, 0,
-					  MARK_SRC_DEST_CALL);
-
-		      for (i = 1; i < XVECLEN (PATTERN (insn), 0); i++)
-			INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i))
-			  = ! INSN_FROM_TARGET_P (XVECEXP (PATTERN (insn), 0, i));
+		      for (i = 0; i < XVECLEN (x, 0); i++)
+			if ((i == 0 || INSN_FROM_TARGET_P (XVECEXP (x, 0, i)))
+			    && (GET_CODE (PATTERN (XVECEXP (x, 0, i)))
+				!= COND_EXEC))
+			  mark_set_resources (XVECEXP (x, 0, i), &target_set,
+					      0, MARK_SRC_DEST_CALL);
+		      for (i = 0; i < XVECLEN (x, 0); i++)
+			if ((i == 0 || !INSN_FROM_TARGET_P (XVECEXP (x, 0, i)))
+			    && (GET_CODE (PATTERN (XVECEXP (x, 0, i)))
+				!= COND_EXEC))
+			  mark_set_resources (XVECEXP (x, 0, i), &set,
+					      0, MARK_SRC_DEST_CALL);
+		    }
+		  else if (GET_CODE (PATTERN (insn)) == SEQUENCE)
+		    {
+		      rtx x = PATTERN (insn);
+		      int i;
 
-		      mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
+		      for (i = 0; i < XVECLEN (x, 0); i++)
+			if (GET_CODE (PATTERN (XVECEXP (x, 0, i))) != COND_EXEC)
+			  mark_set_resources (XVECEXP (x, 0, i), &set,
+					      0, MARK_SRC_DEST_CALL);
+		      target_set = set;
 		    }
 		  else
 		    {
@@ -591,7 +605,8 @@ find_dead_or_set_registers (rtx target, struct resources *res,
 	}
 
       mark_referenced_resources (insn, &needed, 1);
-      mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
+      if (GET_CODE (PATTERN (insn)) != COND_EXEC)
+	mark_set_resources (insn, &set, 0, MARK_SRC_DEST_CALL);
 
       COPY_HARD_REG_SET (scratch, set.regs);
       AND_COMPL_HARD_REG_SET (scratch, needed.regs);
diff --git a/gcc/rtl-factoring.c b/gcc/rtl-factoring.c
index 07c66e20cc3..aed162c9915 100644
--- a/gcc/rtl-factoring.c
+++ b/gcc/rtl-factoring.c
@@ -352,6 +352,16 @@ compute_rtx_cost (rtx insn)
   return cost != 0 ? cost : COSTS_N_INSNS (1);
 }
 
+/* Like compute_rtx_cost, but is passed a raw insn with uninitialized
+   PREV and NEXT fields.  We must zero them first lest context checks in
+   ADJUST_INSN_LENGTH segfault.  */
+static int
+compute_dummy_rtx_cost (rtx insn)
+{
+  PREV_INSN (insn) = NULL_RTX;
+  NEXT_INSN (insn) = NULL_RTX;
+  return compute_rtx_cost (insn);
+}
 /* Determines the number of common insns in the sequences ending in INSN1 and
    INSN2. Returns with LEN number of common insns and COST cost of sequence.
 */
@@ -1379,13 +1389,14 @@ compute_init_costs (void)
   rtx_return = gen_jump (label);
 
   /* The cost of jump.  */
-  seq_jump_cost = compute_rtx_cost (make_jump_insn_raw (rtx_jump));
+  seq_jump_cost = compute_dummy_rtx_cost (make_jump_insn_raw (rtx_jump));
 
   /* The cost of calling sequence.  */
-  seq_call_cost = seq_jump_cost + compute_rtx_cost (make_insn_raw (rtx_store));
+  seq_call_cost
+    = seq_jump_cost + compute_dummy_rtx_cost (make_insn_raw (rtx_store));
 
   /* The cost of return.  */
-  seq_return_cost = compute_rtx_cost (make_jump_insn_raw (rtx_return));
+  seq_return_cost = compute_dummy_rtx_cost (make_jump_insn_raw (rtx_return));
 
   /* Simple heuristic for minimal sequence cost.  */
   seq_call_cost   = (int)(seq_call_cost * (double)SEQ_CALL_COST_MULTIPLIER);
diff --git a/gcc/rtl.h b/gcc/rtl.h
index 5bd79053c1d..b4d7df2964f 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -1750,6 +1750,7 @@ extern int refers_to_regno_p (unsigned int, unsigned int, const_rtx, rtx *);
 extern int reg_overlap_mentioned_p (const_rtx, const_rtx);
 extern const_rtx set_of (const_rtx, const_rtx);
 extern void note_stores (const_rtx, void (*) (rtx, const_rtx, void *), void *);
+extern void walk_stores (rtx, void (*) (rtx, rtx, void *), void *);
 extern void note_uses (rtx *, void (*) (rtx *, void *), void *);
 extern int dead_or_set_p (const_rtx, const_rtx);
 extern int dead_or_set_regno_p (const_rtx, unsigned int);
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 5d9df2cc089..d951d98ae7a 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -1443,6 +1443,55 @@ note_stores (const_rtx x, void (*fun) (rtx, const_rtx, void *), void *data)
       note_stores (XVECEXP (x, 0, i), fun, data);
 }
 
+/* Call FUN on each register or MEM that is stored into or clobbered by X.
+   (X would be the pattern of an insn).  DATA is an arbitrary pointer,
+   ignored by note_stores, but passed to FUN.
+   FUN may alter parts of the RTL.
+
+   FUN receives three arguments:
+   1. the REG, MEM, CC0 or PC being stored in or clobbered,
+   2. the SET or CLOBBER rtx that does the store,
+   3. the pointer DATA provided to note_stores.
+
+  If the item being stored in or clobbered is a SUBREG of a hard register,
+  the SUBREG will be passed.  */
+
+void
+walk_stores (rtx x, void (*fun) (rtx, rtx, void *), void *data)
+{
+  int i;
+
+  if (GET_CODE (x) == COND_EXEC)
+    x = COND_EXEC_CODE (x);
+
+  if (GET_CODE (x) == SET || GET_CODE (x) == CLOBBER)
+    {
+      rtx dest = SET_DEST (x);
+
+      while ((GET_CODE (dest) == SUBREG
+	      && (!REG_P (SUBREG_REG (dest))
+		  || REGNO (SUBREG_REG (dest)) >= FIRST_PSEUDO_REGISTER))
+	     || GET_CODE (dest) == ZERO_EXTRACT
+	     || GET_CODE (dest) == STRICT_LOW_PART)
+	dest = XEXP (dest, 0);
+
+      /* If we have a PARALLEL, SET_DEST is a list of EXPR_LIST expressions,
+	 each of whose first operand is a register.  */
+      if (GET_CODE (dest) == PARALLEL)
+	{
+	  for (i = XVECLEN (dest, 0) - 1; i >= 0; i--)
+	    if (XEXP (XVECEXP (dest, 0, i), 0) != 0)
+	      (*fun) (XEXP (XVECEXP (dest, 0, i), 0), x, data);
+	}
+      else
+	(*fun) (dest, x, data);
+    }
+
+  else if (GET_CODE (x) == PARALLEL)
+    for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+      walk_stores (XVECEXP (x, 0, i), fun, data);
+}
+
 /* Like notes_stores, but call FUN for each expression that is being
    referenced in PBODY, a pointer to the PATTERN of an insn.  We only call
    FUN for each expression, not any interior subexpressions.  FUN receives a
diff --git a/gcc/target-def.h b/gcc/target-def.h
index 3a332d88efd..dfdd09cbb2c 100644
--- a/gcc/target-def.h
+++ b/gcc/target-def.h
@@ -479,6 +479,7 @@
 
 /* In hooks.c.  */
 #define TARGET_CANNOT_MODIFY_JUMPS_P hook_bool_void_false
+#define TARGET_CAN_FOLLOW_JUMP hook_bool_const_rtx_const_rtx_true
 #define TARGET_BRANCH_TARGET_REGISTER_CLASS hook_int_void_no_regs
 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED hook_bool_bool_false
 #define TARGET_CANNOT_FORCE_CONST_MEM hook_bool_rtx_false
@@ -641,6 +642,10 @@
 #define TARGET_SECONDARY_RELOAD default_secondary_reload
 #endif
 
+#ifndef TARGET_PRESERVE_RELOAD_P
+#define TARGET_PRESERVE_RELOAD_P hook_bool_rtx_false
+#endif
+
 #ifndef TARGET_EXPAND_TO_RTL_HOOK
 #define TARGET_EXPAND_TO_RTL_HOOK hook_void_void
 #endif
@@ -858,6 +863,7 @@
   TARGET_INIT_LIBFUNCS,				\
   TARGET_SECTION_TYPE_FLAGS,			\
   TARGET_CANNOT_MODIFY_JUMPS_P,			\
+  TARGET_CAN_FOLLOW_JUMP,			\
   TARGET_BRANCH_TARGET_REGISTER_CLASS,		\
   TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED,	\
   TARGET_CANNOT_FORCE_CONST_MEM,		\
@@ -915,6 +921,7 @@
   TARGET_INVALID_BINARY_OP,			\
   TARGET_IRA_COVER_CLASSES,			\
   TARGET_SECONDARY_RELOAD,			\
+  TARGET_PRESERVE_RELOAD_P,			\
   TARGET_EXPAND_TO_RTL_HOOK,			\
   TARGET_INSTANTIATE_DECLS,			\
   TARGET_HARD_REGNO_SCRATCH_OK,			\
diff --git a/gcc/target.h b/gcc/target.h
index 21c46adbbe2..320d3c478cc 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -580,6 +580,9 @@ struct gcc_target
      not, at the current point in the compilation.  */
   bool (* cannot_modify_jumps_p) (void);
 
+  /* True if FOLLOWER may be modified to follow FOLLOWEE.  */
+  bool (*can_follow_jump) (const_rtx follower, const_rtx followee);
+
   /* Return a register class for which branch target register
      optimizations should be applied.  */
   int (* branch_target_register_class) (void);
@@ -906,6 +909,9 @@ struct gcc_target
   enum reg_class (*secondary_reload) (bool, rtx, enum reg_class,
 				      enum machine_mode,
 				      struct secondary_reload_info *);
+  /* Return true if a reload loading IN should share a reload register
+     with an unrelated output reload.  */
+  bool (*preserve_reload_p) (rtx in);
 
   /* This target hook allows the backend to perform additional
      processing while initializing for variable expansion.  */
diff --git a/gcc/testsuite/ChangeLog.ARC b/gcc/testsuite/ChangeLog.ARC
new file mode 100644
index 00000000000..efa9e196e83
--- /dev/null
+++ b/gcc/testsuite/ChangeLog.ARC
@@ -0,0 +1,72 @@
+2008-12-10  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.target/arc/add_f.c: Use -O2.
+	Allow mov.lo as alternative to mov.hs.
+	* lib/scanasm.exp (scan-assembler-times):
+	Translate control characters for printing.
+	Fix regexp count in the presence of subexpressions.
+
+2008-12-08  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.dg/torture/pr37868.c: Skip for ARC.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.dg/pr28243.c: Only for target pic.
+	* gcc/dg/pr35044.c: Don't put -mA7 into dg-options.
+
+2008-11-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.target/arc: New directory.
+	* gcc.target/arc/arc.exp: New driver.
+	* gcc.target/arc/adc.c: New test.
+	* gcc.target/arc/add_f.c: New test.
+	* gcc.target/arc/sbc.c: New test.
+
+2008-09-16  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.dg/tree-ssa/ssa-store-ccp-2.c (dg-options): Add -fcommon.
+
+2008-07-15  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.c-torture/compile/20000804-1.c: Skip for mxp.
+	* gcc.c-torture/compile/20010327-1.c: Likewise.
+	* gcc.c-torture/compile/920501-4.c: Likewise.
+	* gcc.c-torture/compile/20020604-1.c: Likewise.
+	* gcc.c-torture/compile/20010518-2.c: Likewise.
+	* gcc.c-torture/compile/920501-12.c: Likewise.
+	* gcc.c-torture/compile/limits-blockid.c: Likewise.
+	* gcc.c-torture/compile/20001226-1.c: Likewise.
+	* gcc.c-torture/compile/961203-1.c: Likewise.
+	* gcc.c-torture/compile/930506-2.c:
+	* gcc.c-torture/compile/20050622-1.c: Likewise.
+	Use dg-require-effective-target trampolines.
+	* gcc.c-torture/compile/limits-fndefn.c: Use STACK_SIZE.
+	* gcc.c-torture/compile/limits-fnargs.c: Likewise.
+
+2008-04-04  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.c-torture/execute/ieee/denorm-rand.c: New file.
+	* gcc.dg/torture/fp-int-convert.h: Avoid undefined behaviour.
+
+2008-01-31  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.dg/pr35044.c: New file.
+
+2007-04-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.dg/sibcall-3.c: Don't XFAIL.
+	* gcc.dg/sibcall-4.c: Likewise.
+
+2007-04-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* g++.dg/cpp/_Pragma1.c: Disable for arc-*-elf*.
+
+2007-04-25  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.dg/cpp/_Pragma6.c: Disable for arc-*-elf*.
+
+2007-04-24  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* gcc.dg/builtin-apply2.c (main): Make sure we have 64 bytes of
+	stack to copy.
diff --git a/gcc/testsuite/g++.dg/cpp/_Pragma1.C b/gcc/testsuite/g++.dg/cpp/_Pragma1.C
index 93d55ee53cd..c1234da6a4c 100644
--- a/gcc/testsuite/g++.dg/cpp/_Pragma1.C
+++ b/gcc/testsuite/g++.dg/cpp/_Pragma1.C
@@ -2,7 +2,7 @@
 // This is supposed to succeed only if
 // the target defines HANDLE_PRAGMA_PACK_PUSH_POP 
 // and doesn't define HANDLE_PRAGMA_PACK_WITH_EXPANSION.
-// { dg-do compile { target { ! { powerpc-ibm-aix* *-*-solaris2* fido-*-* m68k-*-* sh*-[us]*-elf } } } }
+// { dg-do compile { target { ! { arc-*-elf* powerpc-ibm-aix* *-*-solaris2* fido-*-* m68k-*-* sh*-[us]*-elf } } } }
 
 #define push bar
 #define foo _Pragma ("pack(push)")
diff --git a/gcc/testsuite/gcc.c-torture/compile/20000804-1.c b/gcc/testsuite/gcc.c-torture/compile/20000804-1.c
index 68db6d36aed..56b14c3e50d 100644
--- a/gcc/testsuite/gcc.c-torture/compile/20000804-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/20000804-1.c
@@ -4,6 +4,7 @@
 /* { dg-skip-if "" { { i?86-*-* x86_64-*-* } && ilp32 } { "-fpic" "-fPIC" } { "" } } */
 /* { dg-skip-if "PIC default" { i?86-*-darwin* } { "*" } { "" } } */
 /* { dg-skip-if "No 64-bit registers" { m32c-*-* } { "*" } { "" } } */
+/* { dg-skip-if "Wrong constraint" { mxp-*-* } { "*" } { "" } } */
 /* { dg-xfail-if "" { m6811-*-* m6812-*-* h8300-*-* } { "*" } { "" } } */
 
 /* Copyright (C) 2000, 2003 Free Software Foundation */
diff --git a/gcc/testsuite/gcc.c-torture/compile/20001226-1.c b/gcc/testsuite/gcc.c-torture/compile/20001226-1.c
index 1a1af7b5823..c3c3538ea3a 100644
--- a/gcc/testsuite/gcc.c-torture/compile/20001226-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/20001226-1.c
@@ -3,6 +3,7 @@
 /* { dg-do assemble } */
 /* { dg-xfail-if "function larger than 64K" { m6811-*-* } { "*" } { "" } } */
 /* { dg-skip-if "too much code for avr" { "avr-*-*" } { "*" } { "" } } */
+/* { dg-skip-if "too much code for mxp" { "mxp-*-*" } { "*" } { "" } } */
 /* { dg-xfail-if "jump beyond 128K not supported" { xtensa*-*-* } { "-O0" } { "" } } */
 /* { dg-xfail-if "PR36698" { spu-*-* } { "-O0" } { "" } } */
 /* { dg-skip-if "" { m32c-*-* } { "*" } { "" } } */
diff --git a/gcc/testsuite/gcc.c-torture/compile/20010327-1.c b/gcc/testsuite/gcc.c-torture/compile/20010327-1.c
index f20e3a3e2dc..9839fe58d3f 100644
--- a/gcc/testsuite/gcc.c-torture/compile/20010327-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/20010327-1.c
@@ -1,5 +1,6 @@
 /* { dg-skip-if "non-SI pointers" { m32c-*-* } { "*" } { "" } } */
 /* { dg-skip-if "HI mode pointer for avr" { "avr-*-*" } { "*" } { "" } } */ 
+/* { dg-skip-if "HI mode pointer for mxp" { "mxp-*-*" } { "*" } { "" } } */ 
 /* { dg-skip-if "non-SI pointers for w64" { "x86_64-*-mingw*" } { "*" } { "" } } */ 
 
 /* This testcase tests whether GCC can produce static initialized data
diff --git a/gcc/testsuite/gcc.c-torture/compile/20010518-2.c b/gcc/testsuite/gcc.c-torture/compile/20010518-2.c
index f35d7c6b96d..f74ae263bdd 100644
--- a/gcc/testsuite/gcc.c-torture/compile/20010518-2.c
+++ b/gcc/testsuite/gcc.c-torture/compile/20010518-2.c
@@ -4,6 +4,7 @@
    the array is too large (INT_MAX/2 > 64K).  Force to use 16-bit ints
    for it.  */
 /* { dg-options "-w -mshort" { target m6811-*-* m6812-*-* } } */
+/* { dg-skip-if "HI mode pointer for mxp" { "mxp-*-*" } { "*" } { "" } } */
 
 /* Large static storage.  */
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/20020604-1.c b/gcc/testsuite/gcc.c-torture/compile/20020604-1.c
index 17b9b2165ba..cba87fa39c1 100644
--- a/gcc/testsuite/gcc.c-torture/compile/20020604-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/20020604-1.c
@@ -1,6 +1,7 @@
 /* { dg-do assemble } */
 /* { dg-xfail-if "The array is too big" { "m6811-*-* m6812-*-*" } { "*" } { "" } } */
 /* { dg-skip-if "The array is too big" { "avr-*-*" } { "*" } { "" } } */ 
+/* { dg-skip-if "The array is too big" { "mxp-*-*" } { "*" } { "" } } */ 
 /* { dg-xfail-if "The array too big" { "h8300-*-*" } { "-mno-h" "-mn" } { "" } } */
 /* { dg-skip-if "" { m32c-*-* } { } { } } */
 
diff --git a/gcc/testsuite/gcc.c-torture/compile/20050622-1.c b/gcc/testsuite/gcc.c-torture/compile/20050622-1.c
index db183909f40..bc26592e8fc 100644
--- a/gcc/testsuite/gcc.c-torture/compile/20050622-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/20050622-1.c
@@ -1,3 +1,4 @@
+/* { dg-skip-if "The array is too big" { "mxp-*-*" } { "*" } { "" } } */
 #if __SCHAR_MAX__ == 127 && __INT_MAX__ >= 2147483647
 struct S { char buf[72*1024*1024]; };
 #else
diff --git a/gcc/testsuite/gcc.c-torture/compile/920501-12.c b/gcc/testsuite/gcc.c-torture/compile/920501-12.c
index 7eac9724677..d78562e5587 100644
--- a/gcc/testsuite/gcc.c-torture/compile/920501-12.c
+++ b/gcc/testsuite/gcc.c-torture/compile/920501-12.c
@@ -2,6 +2,7 @@
    the stack arrays are too large.  Force to use 16-bit ints for it.  */
 /* { dg-do assemble } */
 /* { dg-xfail-if "" { m6811-*-* m6812-*-* } { "*" } { "-mshort" } } */
+/* { dg-skip-if "The array is too big" { "mxp-*-*" } { "*" } { "" } } */
 
 x(x){            return 3 + x;}
 a(x){int y[994]; return 3 + x;}
diff --git a/gcc/testsuite/gcc.c-torture/compile/920501-4.c b/gcc/testsuite/gcc.c-torture/compile/920501-4.c
index 3481deefedb..edfbdd9b09a 100644
--- a/gcc/testsuite/gcc.c-torture/compile/920501-4.c
+++ b/gcc/testsuite/gcc.c-torture/compile/920501-4.c
@@ -2,6 +2,7 @@
    the 'r0' array is too large.  Force to use 16-bit ints for it.  */
 /* { dg-do assemble } */
 /* { dg-xfail-if "" { m6811-*-* m6812-*-* } { "*" } { "-mshort" } } */
+/* { dg-skip-if "total size of local objects too large" { "mxp-*-*" } { "*" } { "" } } */
 
 foo ()
 {
diff --git a/gcc/testsuite/gcc.c-torture/compile/930506-2.c b/gcc/testsuite/gcc.c-torture/compile/930506-2.c
index e11e62f026f..ca16a3d99d1 100644
--- a/gcc/testsuite/gcc.c-torture/compile/930506-2.c
+++ b/gcc/testsuite/gcc.c-torture/compile/930506-2.c
@@ -1,3 +1,4 @@
+/* { dg-require-effective-target trampolines } */
 #ifndef NO_TRAMPOLINES
 int f1()
 {
diff --git a/gcc/testsuite/gcc.c-torture/compile/961203-1.c b/gcc/testsuite/gcc.c-torture/compile/961203-1.c
index 98908584333..a27378ba7f6 100644
--- a/gcc/testsuite/gcc.c-torture/compile/961203-1.c
+++ b/gcc/testsuite/gcc.c-torture/compile/961203-1.c
@@ -1,6 +1,7 @@
 /* The structure is too large for the xstormy16 - won't fit in 16
    bits.  */
 /* { dg-do assemble } */
+/* { dg-skip-if "The array is too big" { "mxp-*-*" } { "*" } { "" } } */
 
 #if __INT_MAX__ >= 2147483647L
 struct s {
diff --git a/gcc/testsuite/gcc.c-torture/compile/limits-blockid.c b/gcc/testsuite/gcc.c-torture/compile/limits-blockid.c
index 545dfe4dcb7..c7cbc788786 100644
--- a/gcc/testsuite/gcc.c-torture/compile/limits-blockid.c
+++ b/gcc/testsuite/gcc.c-torture/compile/limits-blockid.c
@@ -1,3 +1,4 @@
+/* { dg-skip-if "total size of local objects too large" { "mxp-*-*" } { "*" } { "" } } */
 #define LIM1(x) x##0; x##1; x##2; x##3; x##4; x##5; x##6; x##7; x##8; x##9;
 #define LIM2(x) LIM1(x##0) LIM1(x##1) LIM1(x##2) LIM1(x##3) LIM1(x##4) \
 		LIM1(x##5) LIM1(x##6) LIM1(x##7) LIM1(x##8) LIM1(x##9)
diff --git a/gcc/testsuite/gcc.c-torture/compile/limits-fnargs.c b/gcc/testsuite/gcc.c-torture/compile/limits-fnargs.c
index b94fa43db12..1377c660cd3 100644
--- a/gcc/testsuite/gcc.c-torture/compile/limits-fnargs.c
+++ b/gcc/testsuite/gcc.c-torture/compile/limits-fnargs.c
@@ -1,3 +1,8 @@
+#ifndef STACK_SIZE
+#define STACK_SIZE 99999
+#endif
+#if STACK_SIZE >= 40004
+
 #define PAR1 int, int, int, int, int, int, int, int, int, int
 #define PAR2 PAR1, PAR1, PAR1, PAR1, PAR1, PAR1, PAR1, PAR1, PAR1, PAR1
 #define PAR3 PAR2, PAR2, PAR2, PAR2, PAR2, PAR2, PAR2, PAR2, PAR2, PAR2
@@ -18,3 +23,4 @@ void caller(void)
 {
   func (ARG4);
 }
+#endif /* STACK_SIZE */
diff --git a/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c b/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c
index 0c5a2f5e774..c0f2db6ec86 100644
--- a/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c
+++ b/gcc/testsuite/gcc.c-torture/compile/limits-fndefn.c
@@ -1,4 +1,9 @@
 /* { dg-skip-if "too complex for avr" { "avr-*-*" } { "*" } { "" } } */
+#ifndef STACK_SIZE
+#define STACK_SIZE 999999
+#endif
+#if STACK_SIZE >= 400004
+
 #define LIM1(x) x##0, x##1, x##2, x##3, x##4, x##5, x##6, x##7, x##8, x##9,
 #define LIM2(x) LIM1(x##0) LIM1(x##1) LIM1(x##2) LIM1(x##3) LIM1(x##4) \
 		LIM1(x##5) LIM1(x##6) LIM1(x##7) LIM1(x##8) LIM1(x##9)
@@ -16,3 +21,4 @@
 void func1 (LIM5(int p) int t)
 {
 }
+#endif /* STACK_SIZE */
diff --git a/gcc/testsuite/gcc.c-torture/execute/ieee/denorm-rand.c b/gcc/testsuite/gcc.c-torture/execute/ieee/denorm-rand.c
new file mode 100644
index 00000000000..5ec9a767564
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/ieee/denorm-rand.c
@@ -0,0 +1,199 @@
+/* Test proper rounding for division when generating subnormal numbers.  */
+/* x86 needs "-mfpmath=sse -msse2" to pass this test.  */
+
+/* The debug code is only meant to run on linux with long double support.  */
+#define dEBUG
+
+long long
+simple_rand ()
+{
+  static unsigned long long seed = 47114711;
+  unsigned long long this = seed * 1103515245 + 12345;
+  seed = this;
+  return this >> 8;
+}
+
+unsigned long long int
+random_bitstring (int *lenp)
+{
+  unsigned long long int x;
+  int n_bits;
+  int tot_bits = 0;
+  int len = 0;
+  int limit = *lenp;
+
+  x = 0;
+  for (;;)
+    {
+      long long ran = simple_rand ();
+      int ones_p = ran & 1;
+      n_bits = (ran >> 1) % 16;
+      if (x || ones_p)
+	{
+	  if (n_bits > limit - len)
+	    n_bits = limit - len;
+	  *lenp = len += n_bits;
+	}
+      tot_bits += n_bits;
+
+      if (n_bits == 0 && len)
+	return x;
+      else if (n_bits)
+	{
+	  x <<= n_bits;
+	  if (ones_p)
+	    x |= (1 << n_bits) - 1;
+
+	  if (len == limit
+	      || (len && tot_bits > 8 * sizeof (long long) + 6))
+	    return x;
+	}
+    }
+}
+
+main ()
+{
+  int i;
+  double e0, e1, ie1, i2e1;
+  /* Hack to get x86 math to work.  */
+  volatile double eval_tmp;
+#define EVAL(x) (eval_tmp = (x))
+
+  for (i = 0, e1 = 0.5; EVAL (1. + e1) > 1.; e1 *= 0.5, i++);
+  e1 *= 2.;
+  ie1 = 1./e1;
+  i2e1 = 2./e1;
+  if (i < 52 || (sizeof (double) == 8 && i > 52))
+    abort ();
+  for (i = 1, e0 = 0.5; EVAL (e0 * e0); e0 *= e0, i <<= 1);
+  for (;EVAL (e0 * 0.5); e0 *= 0.5, i++);
+  if (i < 0x3ff + 51 || (sizeof (double) == 8 && i > 0x3ff + 51))
+    abort ();
+  
+  /* First, check that a quotient that can be computed exactly is properly
+     rounded, and try variantions on the fraction to do some simple
+     round-to-nearest checks for inexact results.  */
+  for (i = 0; i < 1000; i++)
+    {
+      unsigned long long x, y;
+      int xlen, ylen;
+      long long ran;
+      double xd, xr, yd, yd2, e2y, pd, ep;
+
+      xlen = 53;
+      x = random_bitstring (&xlen);
+      ylen = 54-xlen;
+      y = random_bitstring (&ylen);
+      y |= 1;
+      if (x > 2 &&
+	  (double) (x|3) * y >= (double) (1LL << 53))
+	{
+	  x >>= 1;
+	  xlen--;
+	}
+      x |= 1;
+      ran = simple_rand ();
+      if (xlen > 1)
+	x ^= ran & 2;
+      xd = (double)x * e0;
+      yd = (double)y * ie1;
+      yd2 = yd * 2.;
+      e2y = (double) (1LL << ylen);
+      if (EVAL (yd2 + e2y) == yd2)
+	abort ();
+      if ((yd2 + e2y) / yd2 > (1.+e1)/1.)
+	abort ();
+      pd = xd * yd;
+      ep = e0 * (1LL << xlen-1) * (1LL << ylen-1);
+      if (EVAL (pd + ep) == pd)
+	ep += ep;
+      if (pd + ep == pd)
+	abort ();
+      if (EVAL((pd + ep) / pd) > EVAL (1 + e1))
+	abort ();
+      if (EVAL (pd / yd) != x * e0)
+	abort ();
+      /* Round to even.  */
+      xr = ((x & 2) + x) >> 1;
+      if (EVAL (pd / yd2) != xr * e0)
+	abort ();
+      /* Round to nearest - upwards.  */
+      xr = x+1 >> 1;
+      if (EVAL (pd / (yd2-e2y)) < xr * e0)
+	abort ();
+      if (EVAL ((pd + ep) / yd2) < xr * e0)
+	abort ();
+      /* Round to nearest - downwards.  */
+      xr = x >> 1;
+      if (EVAL (pd / (yd2+e2y)) > xr * e0)
+	abort ();
+      if (EVAL ((pd - ep) / yd2) > xr * e0)
+	abort ();
+    }
+  /* Now generate a set of 53 bit random numbers, calculate a fractional
+     approximation which is likely to be hard to distinguish from the
+     exact result, and check for proper rounding.  */
+  for (i = 0; i < 1000; i++)
+    {
+      unsigned long long x, y, x0, y0, x1, y1, x2, y2, x3, tmp;
+      int rest_sign;
+      long long ran;
+      int xlen;
+      long long a[20];
+      int j, k;
+
+      do
+	{
+	  xlen = 53;
+	  x = random_bitstring (&xlen);
+	}
+      while (xlen < 10);
+      x |= 1;
+      x0 = x;
+      /* Look for a close, but inexact approximation that fits in 53 bits
+	 numerator / denominator.  */
+      y = 1LL << xlen - 1;
+      y0 = y;
+      rest_sign = 0;
+      for (j = 0; j < 20; j++)
+	{
+	  a[j] = x/y;
+	  x1 = a[j], y1 = 1;
+	  for (k = j - 1; k >= 0; k--)
+	    {
+	      tmp = a[k] * x1 + y1;
+	      if (tmp >= 1LL << 53 || tmp/a[k] < x1)
+		goto end_approx;
+	      y1 = x1;
+	      x1 = tmp;
+	    }
+	  tmp = x - a[j] * y;
+	  if (!tmp)
+	    break;
+	  rest_sign = j & 1 ? -1 : 1;
+	  x2 = x1, y2 = y1;
+	  x = y;
+	  y = tmp;
+	}
+      end_approx:
+      if (!rest_sign)
+	continue;
+#ifdef DEBUG
+      printf ("%d %d %f %f %e\n", j, rest_sign,
+	      (double)x0/y0, (double)x2/y2,
+	      (double)((long double)x0/y0-((long double)x2/y2)));
+#endif
+      x3 = EVAL (x2*e0*y0/(2.*y2)) / e0 * 2.;
+#ifdef  DEBUG
+      printf ("%llx %llx\n", x0, x3);
+#endif
+      if (rest_sign > 0 ? x3 >= x0 : x3 <= x0)
+#ifdef DEBUG
+	printf("ERROR!\n");
+#else
+	abort ();
+#endif
+    }
+
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.dg/builtin-apply2.c b/gcc/testsuite/gcc.dg/builtin-apply2.c
index bc49a645809..66fb8d4412e 100644
--- a/gcc/testsuite/gcc.dg/builtin-apply2.c
+++ b/gcc/testsuite/gcc.dg/builtin-apply2.c
@@ -26,6 +26,7 @@ void bar(char *name, ...)
 
 int main(void)
 {
+  char dummy[64]; /* Make sure we have 64 bytes of stack to copy.  */
   bar("eeee", 5.444567, 8.90765, 4.567789, INTEGER_ARG);
 
   return 0;
diff --git a/gcc/testsuite/gcc.dg/cpp/_Pragma6.c b/gcc/testsuite/gcc.dg/cpp/_Pragma6.c
index 73f800486d1..bf9319efc45 100644
--- a/gcc/testsuite/gcc.dg/cpp/_Pragma6.c
+++ b/gcc/testsuite/gcc.dg/cpp/_Pragma6.c
@@ -2,7 +2,7 @@
 /* This is supposed to succeed only if
    the target defines HANDLE_PRAGMA_PACK_PUSH_POP
    and doesn't define HANDLE_PRAGMA_PACK_WITH_EXPANSION.  */
-/* { dg-do compile { target { ! { powerpc-ibm-aix* *-*-solaris2* fido-*-* m68k-*-* sh*-[us]*-elf } } } } */
+/* { dg-do compile { target { ! { arc-*-elf* powerpc-ibm-aix* *-*-solaris2* fido-*-* m68k-*-* sh*-[us]*-elf } } } } */
 
 #define push bar
 #define foo _Pragma ("pack(push)")
diff --git a/gcc/testsuite/gcc.dg/func-ptr-prof.c b/gcc/testsuite/gcc.dg/func-ptr-prof.c
new file mode 100644
index 00000000000..7122347eeac
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/func-ptr-prof.c
@@ -0,0 +1,18 @@
+/* { dg-require-profiling "-pg" } */
+/* { dg-options "-pg" } */
+
+extern void exit (int);
+
+void
+f (void)
+{
+}
+
+void (*fp) (void) = &f;
+
+int
+main (int argc, char **argv)
+{
+  fp ();
+  exit (0);
+}
diff --git a/gcc/testsuite/gcc.dg/pr28243.c b/gcc/testsuite/gcc.dg/pr28243.c
index 12447a1c3d3..c02c4037213 100644
--- a/gcc/testsuite/gcc.dg/pr28243.c
+++ b/gcc/testsuite/gcc.dg/pr28243.c
@@ -1,7 +1,7 @@
 /* PR rtl-optimization/28243 */
 /* Reported by Mike Frysinger <vapier@gentoo.org> */
 
-/* { dg-do compile } */
+/* { dg-do compile { target fpic } } */
 /* { dg-require-effective-target fpic } */
 /* { dg-options "-O2 -ftracer -fPIC" } */
 /* { dg-skip-if "requires unsupported run-time relocation" { spu-*-* } { "*" } { "" } } */
diff --git a/gcc/testsuite/gcc.dg/pr35044.c b/gcc/testsuite/gcc.dg/pr35044.c
new file mode 100644
index 00000000000..aede8ec323a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr35044.c
@@ -0,0 +1,235 @@
+/* { dg-do run } */
+/* We want to test this with -mA7 in particular, but don't add it explicitly
+   to dg-options since this clashes with automatic multi-subtarget testing.  */
+/* { dg-options "-O2 -mlong-calls -fomit-frame-pointer" { target arc*-*-* } } */
+
+typedef signed long long s64;
+typedef unsigned long long u64;
+
+struct restart_block
+{
+  long (*fn) (struct restart_block *);
+  unsigned long arg0, arg1, arg2, arg3;
+};
+
+typedef unsigned long mm_segment_t;
+
+typedef struct
+{
+  int counter;
+} atomic_t;
+
+struct thread_info
+{
+  void *task;
+  struct exec_domain *exec_domain;
+  unsigned long flags;
+  unsigned long tp_value;
+  unsigned cpu;
+  int preempt_count;
+
+  mm_segment_t addr_limit;
+
+  struct restart_block restart_block;
+};
+
+static inline __attribute__ ((always_inline))
+     struct thread_info *current_thread_info (void)
+  __attribute__ ((__const__));
+
+     static inline __attribute__ ((always_inline))
+     struct thread_info *current_thread_info (void)
+{
+  register unsigned long sp asm ("sp");
+  return (struct thread_info *) (sp & ~((1UL << 13) - 1));
+}
+
+
+typedef struct
+{
+} raw_rwlock_t;
+
+typedef struct
+{
+  raw_rwlock_t raw_lock;
+} rwlock_t;
+
+struct list_head
+{
+  struct list_head *next, *prev;
+};
+
+struct rcu_head
+{
+  struct rcu_head *next;
+  void (*func) (struct rcu_head * head);
+};
+
+enum pid_type
+{
+  PIDTYPE_PID,
+  PIDTYPE_PGID,
+  PIDTYPE_SID,
+  PIDTYPE_MAX
+};
+
+struct fown_struct
+{
+  rwlock_t lock;
+  struct pid *pid;
+  enum pid_type pid_type;
+  unsigned uid, euid;
+  int signum;
+};
+
+struct file_ra_state
+{
+  unsigned long start;
+  unsigned long size;
+  unsigned long flags;
+  unsigned long cache_hit;
+  unsigned long prev_page;
+  unsigned long ahead_start;
+  unsigned long ahead_size;
+  unsigned long ra_pages;
+  unsigned long mmap_hit;
+  unsigned long mmap_miss;
+};
+
+struct file
+{
+  union
+  {
+    struct list_head fu_list;
+    struct rcu_head fu_rcuhead;
+  } f_u;
+  struct dentry *f_dentry;
+  struct vfsmount *f_vfsmnt;
+  const struct file_operations *f_op;
+  atomic_t f_count;
+  unsigned int f_flags;
+  unsigned short f_mode;
+  s64 f_pos;
+  struct fown_struct f_owner;
+  unsigned int f_uid, f_gid;
+  struct file_ra_state f_ra;
+  unsigned long f_version;
+  void *private_data;
+  struct address_space *f_mapping;
+};
+typedef int (*filldir_t) (void *, const char *, int, s64, u64, unsigned);
+extern int __attribute__((noinline)) vfs_readdir (struct file *, filldir_t, void *);
+extern void __attribute__((noinline)) fput (struct file *);
+extern struct file * __attribute__((noinline)) fget (unsigned int fd);
+
+struct linux_dirent64
+{
+  u64 d_ino;
+  s64 d_off;
+  unsigned short d_reclen;
+  unsigned char d_type;
+  char d_name[0];
+};
+
+long __attribute__((noinline)) sys_getdents64 (unsigned int fd,
+		     struct linux_dirent64 *dirent, unsigned int count);
+
+struct getdents_callback64
+{
+  struct linux_dirent64 *current_dir;
+  struct linux_dirent64 *previous;
+  int count;
+  int error;
+};
+
+extern void abort (void);
+
+struct file *
+fget (unsigned int fd)
+{
+  static struct file dummy;
+
+  asm volatile ("");
+  return &dummy;
+}
+
+extern int __attribute__((noinline)) filldir64 (void *__buf, const char *name,
+		      int namlen, s64 offset, u64 ino, unsigned int d_type);
+
+int
+filldir64 (void *vp, const char *cp, int i, s64 o, u64 ull, unsigned u)
+{
+  return 0;
+}
+
+int
+vfs_readdir (struct file *f, filldir_t fun, void *buf)
+{
+  struct getdents_callback64 *cb = buf;
+
+  asm volatile ("");
+  return (long) cb->current_dir < 0 ? -1 : 0;
+}
+
+void
+fput (struct file *f)
+{
+  asm volatile ("");
+}
+
+int
+main (void)
+{
+  if (sys_getdents64 (0, 0, 0))
+    abort ();
+  return 0;
+}
+
+long
+sys_getdents64 (unsigned int fd,
+		struct linux_dirent64 *dirent, unsigned int count)
+{
+
+  register struct file *file;
+  register struct linux_dirent64 *lastdirent;
+  struct getdents_callback64 buf;
+  register int error;
+  error = -14;
+  if (!(current_thread_info ()->addr_limit == (mm_segment_t) 0xFFFFFFFF
+	|| (count <= 0x60000000UL
+	    && (unsigned long) dirent <= 0x60000000UL - count)))
+    goto out;
+  error = -9;
+  file = fget (fd);
+  if (!file)
+    goto out;
+  buf.current_dir = dirent;
+  buf.previous = ((void *) 0);
+  buf.count = count;
+  buf.error = 0;
+  error = vfs_readdir (file, filldir64, &buf);
+  if (error < 0)
+    goto out_putf;
+  error = buf.error;
+  lastdirent = buf.previous;
+  if (lastdirent)
+    {
+      s64 d_off = file->f_pos;
+      error = -14;
+      {
+	long __pu_err = 0;
+	s64 *__pu_addr = &lastdirent->d_off;
+	__asm__ __volatile__ ("": "=r" (__pu_err):"r" (d_off),
+			      "r" (__pu_addr), "i" (-14),
+			      "0" (__pu_err));
+	if ((__pu_err))
+	  goto out_putf;
+      }
+      error = count - buf.count;
+    }
+
+out_putf:
+  fput (file);
+out:
+  return (error);
+}
diff --git a/gcc/testsuite/gcc.dg/sibcall-3.c b/gcc/testsuite/gcc.dg/sibcall-3.c
index a33d4d34502..c826eb91ffd 100644
--- a/gcc/testsuite/gcc.dg/sibcall-3.c
+++ b/gcc/testsuite/gcc.dg/sibcall-3.c
@@ -5,7 +5,7 @@
    Copyright (C) 2002 Free Software Foundation Inc.
    Contributed by Hans-Peter Nilsson  <hp@bitrange.com>  */
 
-/* { dg-do run { xfail arc-*-* avr-*-* cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* m68hc1?-*-* mcore-*-* mn10300-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } } */
+/* { dg-do run { xfail avr-*-* cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* m68hc1?-*-* mcore-*-* mn10300-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } } */
 /* -mlongcall disables sibcall patterns.  */
 /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
 /* { dg-options "-O2 -foptimize-sibling-calls" } */
diff --git a/gcc/testsuite/gcc.dg/sibcall-4.c b/gcc/testsuite/gcc.dg/sibcall-4.c
index e13db6bb74f..b98f3295a48 100644
--- a/gcc/testsuite/gcc.dg/sibcall-4.c
+++ b/gcc/testsuite/gcc.dg/sibcall-4.c
@@ -5,7 +5,7 @@
    Copyright (C) 2002 Free Software Foundation Inc.
    Contributed by Hans-Peter Nilsson  <hp@bitrange.com>  */
 
-/* { dg-do run { xfail arc-*-* avr-*-* cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* m68hc1?-*-* mcore-*-* mn10300-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } } */
+/* { dg-do run { xfail avr-*-* cris-*-* crisv32-*-* h8300-*-* hppa*64*-*-* m32r-*-* m68hc1?-*-* mcore-*-* mn10300-*-* xstormy16-*-* v850*-*-* vax-*-* xtensa*-*-* } } */
 /* -mlongcall disables sibcall patterns.  */
 /* { dg-skip-if "" { powerpc*-*-* } { "-mlongcall" } { "" } } */
 /* { dg-options "-O2 -foptimize-sibling-calls" } */
diff --git a/gcc/testsuite/gcc.dg/torture/fp-int-convert.h b/gcc/testsuite/gcc.dg/torture/fp-int-convert.h
index 8c1968e9be2..e8742598be3 100644
--- a/gcc/testsuite/gcc.dg/torture/fp-int-convert.h
+++ b/gcc/testsuite/gcc.dg/torture/fp-int-convert.h
@@ -80,7 +80,9 @@ do {							\
   ivin = (VAL);						\
   fv1 = (VAL);						\
   fv2 = ivin;						\
-  ivout = fv2;						\
+  /* (unsigned long long)(double)~0ULL invokes undefined behaviour.  */\
+  if (PREC_OK)						\
+    ivout = fv2;					\
   if (ivin != (VAL)					\
       || ((PREC_OK) && ivout != ivin)			\
       || ((PREC_OK) && ivout != (VAL))			\
diff --git a/gcc/testsuite/gcc.dg/torture/pr37868.c b/gcc/testsuite/gcc.dg/torture/pr37868.c
index 50fa903008f..04d6f5d0e25 100644
--- a/gcc/testsuite/gcc.dg/torture/pr37868.c
+++ b/gcc/testsuite/gcc.dg/torture/pr37868.c
@@ -1,6 +1,6 @@
 /* { dg-do run } */
 /* { dg-options "-fno-strict-aliasing" } */
-/* { dg-skip-if "unaligned access" { sparc*-*-* } "*" "" } */
+/* { dg-skip-if "unaligned access" { sparc*-*-* arc-*-* } "*" "" } */
 
 extern void abort (void);
 
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-store-ccp-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-store-ccp-2.c
index be37a21d415..091106d3bc3 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-store-ccp-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-store-ccp-2.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fdump-tree-optimized -fcommon" } */
 
 const int conststaticvariable;
 
diff --git a/gcc/testsuite/gcc.target/arc/adc.c b/gcc/testsuite/gcc.target/arc/adc.c
new file mode 100644
index 00000000000..691a4e76de7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/adc.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+f (unsigned a, unsigned b, unsigned c, unsigned d)
+{
+  int s = 0;
+  a ^= 1;
+  if (a + b < a)
+    s = 1;
+  s += c + d;
+  return s;
+}
+
+int s0;
+
+int
+g (unsigned a, unsigned b, unsigned c, unsigned d)
+{
+  int s = 0;
+  if (a + b < a)
+    s = 1;
+  s0 = a+b;
+  s += c + d;
+  return s;
+}
+/* { dg-final { scan-assembler "add\.f\[ \t\]+0," } } */
+/* { dg-final { scan-assembler "add\.f\[ \t\]+r" } } */
+/* { dg-final { scan-assembler-times "adc\[ \t\]r\[0-9\]+, *r\[0-9\]+ *,r" 2 } } */
diff --git a/gcc/testsuite/gcc.target/arc/add_f.c b/gcc/testsuite/gcc.target/arc/add_f.c
new file mode 100644
index 00000000000..ebda0468a0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/add_f.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+int
+f (int a, int b, int c)
+{
+  if (a+b)
+    c = 1;
+  return c;
+}
+
+int
+g (unsigned a, unsigned b)
+{
+  if (a + b < a)
+    return 43;
+  return 42;
+}
+
+int
+h (unsigned a, unsigned b)
+{
+  if (a + b < b)
+    return 43;
+  return 42;
+}
+/* { dg-final { scan-assembler-times "add\.f\[ \t\]+0," 3 } } */
+/* { dg-final { scan-assembler "mov\.eq\[ \t\]" } } */
+/* { dg-final { scan-assembler-times "mov\.(hs|lo)\[ \t\]" 2 } } */
+/* { dg-final { scan-assembler "mov\.(hs|lo)\[ \t\]" } } */
diff --git a/gcc/testsuite/gcc.target/arc/arc.exp b/gcc/testsuite/gcc.target/arc/arc.exp
new file mode 100644
index 00000000000..3b2e386c00f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/arc.exp
@@ -0,0 +1,41 @@
+#   Copyright (C) 2005 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an ARC target.
+if ![istarget arc*-*-*] then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+    set DEFAULT_CFLAGS " -ansi -pedantic-errors"
+}
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
+	"" $DEFAULT_CFLAGS
+
+# All done.
+dg-finish
diff --git a/gcc/testsuite/gcc.target/arc/sbc.c b/gcc/testsuite/gcc.target/arc/sbc.c
new file mode 100644
index 00000000000..7549579680b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arc/sbc.c
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int s0;
+
+int
+f(a, b, c, d)
+  unsigned a, b, c, d;
+{
+  s0 = a - b;
+  return c - d - (a < b);
+}
+
+/* { dg-final { scan-assembler "sub\.f\[ \t\]+r" } } */
+/* { dg-final { scan-assembler "sbc\[ \t\]+r\[0-9\]+, *r\[0-9\]+ *,r" } } */
diff --git a/gcc/testsuite/lib/scanasm.exp b/gcc/testsuite/lib/scanasm.exp
index 4c2903061aa..1cdf003dc83 100644
--- a/gcc/testsuite/lib/scanasm.exp
+++ b/gcc/testsuite/lib/scanasm.exp
@@ -181,10 +181,15 @@ proc scan-assembler-times { args } {
     set text [read $fd]
     close $fd
 
-    if { [llength [regexp -inline -all -- [lindex $args 0] $text]] == [lindex $args 1]} {
-	pass "$testcase scan-assembler-times [lindex $args 0] [lindex $args 1]"
+    set pattern [lindex $args 0]
+    set printable_pattern [string map {\t \\t \n \\n \r \\r \\ \\\\} $pattern]
+
+    # don't use regexp -inline - that gives bogus results with alternative
+    # subexpressions (e.g. "operation\.(this|that)" ).
+    if { [regexp -all -- $pattern $text] == [lindex $args 1]} {
+	pass "$testcase scan-assembler-times $printable_pattern [lindex $args 1]"
     } else {
-	fail "$testcase scan-assembler-times [lindex $args 0] [lindex $args 1]"
+	fail "$testcase scan-assembler-times $printable_pattern [lindex $args 1]"
     }
 }
 
diff --git a/gcc/tree-flow.h b/gcc/tree-flow.h
index 6004978416b..fdf8caf3d28 100644
--- a/gcc/tree-flow.h
+++ b/gcc/tree-flow.h
@@ -1020,6 +1020,7 @@ bool gimple_duplicate_loop_to_header_edge (struct loop *, edge,
 					 unsigned int, sbitmap,
 					 edge, VEC (edge, heap) **,
 					 int);
+bool gimple_can_duplicate_loop_to_header_edge (struct loop *);
 struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, edge);
 void rename_variables_in_loop (struct loop *);
 void rename_variables_in_bb (basic_block bb);
diff --git a/gcc/tree-ssa-loop-ch.c b/gcc/tree-ssa-loop-ch.c
index 33e85c839dc..3385029b3bf 100644
--- a/gcc/tree-ssa-loop-ch.c
+++ b/gcc/tree-ssa-loop-ch.c
@@ -132,7 +132,7 @@ copy_loop_headers (void)
   loop_iterator li;
   struct loop *loop;
   basic_block header;
-  edge exit, entry;
+  edge exit, new_exit, entry;
   basic_block *bbs, *copied_bbs;
   unsigned n_bbs;
   unsigned bbs_size;
@@ -180,9 +180,41 @@ copy_loop_headers (void)
 	  /* Find a successor of header that is inside a loop; i.e. the new
 	     header after the condition is copied.  */
 	  if (flow_bb_inside_loop_p (loop, EDGE_SUCC (header, 0)->dest))
-	    exit = EDGE_SUCC (header, 0);
+	    new_exit = EDGE_SUCC (header, 0);
 	  else
-	    exit = EDGE_SUCC (header, 1);
+	    new_exit = EDGE_SUCC (header, 1);
+	  /* If we already have copied a header that tests a variable,
+	     and all that we'd have left is an in/decrement of that variable,
+	     that variable is most likely the biv, and the 'header' we are
+	     currently looking at is the actual loop body.  This happens
+	     for instance with libgcc2.c:__gcc_bcmp .  */
+	  if (header != loop->header
+	      && single_succ_p (new_exit->dest)
+	      && EDGE_SUCC (new_exit->dest, 0)->dest == loop->header)
+	    {
+	      gimple cond = last_and_only_stmt (loop->header);
+	      gimple modify = last_stmt (new_exit->dest);
+
+	      if (cond && modify && gimple_code (modify) == GIMPLE_ASSIGN
+		  && modify == gsi_stmt (gsi_after_labels (new_exit->dest)))
+		{
+		  /* We checked earlier that cond is a GIMPLE_COND.  */
+		  tree var;
+		  tree m_var = gimple_get_lhs (modify);
+
+		  if (truth_value_p (gimple_cond_code (cond)))
+		    var = gimple_cond_lhs (cond);
+		  else
+		    /* If variable is used as dircetly condition, use that.  */
+		/* Abort so that we can use gdb to inspect this point live.  */
+		    gcc_unreachable ();
+		  if (TREE_CODE (var) == SSA_NAME
+		      && TREE_CODE (m_var) == SSA_NAME
+		      && SSA_NAME_VAR (var) == SSA_NAME_VAR (m_var))
+		    break;
+		}
+	    }
+	  exit = new_exit;
 	  bbs[n_bbs++] = header;
 	  gcc_assert (bbs_size > n_bbs);
 	  header = exit->dest;
diff --git a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
index e278c55b08b..cc2a795a6d6 100644
--- a/gcc/tree-ssa-loop-ivcanon.c
+++ b/gcc/tree-ssa-loop-ivcanon.c
@@ -62,6 +62,7 @@ enum unroll_level
 			   iteration.  */
   UL_NO_GROWTH,		/* Only loops whose unrolling will not cause increase
 			   of code size.  */
+  UL_ESTIMATE_GROWTH,	/* Estimate size increase for UL_ALL.  */
   UL_ALL		/* All suitable loops.  */
 };
 
@@ -156,14 +157,20 @@ estimated_unrolled_size (unsigned HOST_WIDE_INT ninsns,
 
 /* Tries to unroll LOOP completely, i.e. NITER times.
    UL determines which loops we are allowed to unroll. 
-   EXIT is the exit of the loop that should be eliminated.  */
+   EXIT is the exit of the loop that should be eliminated.
+   If UL is UL_ESTIMATE_GROWTH, we set loop->lpt_decision.times to
+   the number of instructions that the loop is estimated to grow by
+   when completely or ordinarily unrolling; we do both these estimations
+   here rather than spread them further around in order to keep the number
+   of times we recompute the number of instructions in the loop down.  */
 
 static bool
 try_unroll_loop_completely (struct loop *loop,
 			    edge exit, tree niter,
 			    enum unroll_level ul)
 {
-  unsigned HOST_WIDE_INT n_unroll, ninsns, max_unroll, unr_insns;
+  unsigned HOST_WIDE_INT n_unroll, e_unroll, max_unroll, unr_insns;
+  unsigned HOST_WIDE_INT ninsns = 0;
   gimple cond;
 
   if (loop->inner)
@@ -173,6 +180,21 @@ try_unroll_loop_completely (struct loop *loop,
     return false;
   n_unroll = tree_low_cst (niter, 1);
 
+  /* In case we don't completely unroll the loop, estimate its size increase
+     from normal unrolling.  */
+  if (ul == UL_ESTIMATE_GROWTH && n_unroll)
+    {
+      ninsns = tree_num_loop_insns (loop, &eni_size_weights);
+      e_unroll = n_unroll + 1;
+      max_unroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
+      if (e_unroll > max_unroll)
+	e_unroll = max_unroll;
+      max_unroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / ninsns;
+      if (e_unroll > max_unroll)
+	e_unroll = max_unroll;
+      if (e_unroll > 1)
+	loop->lpt_decision.times = ninsns * (e_unroll - 1);
+    }
   max_unroll = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES);
   if (n_unroll > max_unroll)
     return false;
@@ -182,7 +204,8 @@ try_unroll_loop_completely (struct loop *loop,
       if (ul == UL_SINGLE_ITER)
 	return false;
 
-      ninsns = tree_num_loop_insns (loop, &eni_size_weights);
+      if (!ninsns)
+	ninsns = tree_num_loop_insns (loop, &eni_size_weights);
 
       unr_insns = estimated_unrolled_size (ninsns, n_unroll);
       if (dump_file && (dump_flags & TDF_DETAILS))
@@ -212,6 +235,20 @@ try_unroll_loop_completely (struct loop *loop,
 	}
     }
 
+  if (ul == UL_ESTIMATE_GROWTH)
+    {
+      if (n_unroll && gimple_can_duplicate_loop_to_header_edge (loop))
+	{
+	  int unroll_cost = unr_insns - ninsns;
+
+	  if (unroll_cost < 0)
+	    unroll_cost = 0;
+	  loop->lpt_decision.times = unroll_cost;
+	  return true;
+	}
+      else
+	return false;
+    }
   if (n_unroll)
     {
       sbitmap wont_exit;
@@ -274,6 +311,8 @@ canonicalize_loop_induction_variables (struct loop *loop,
   edge exit = NULL;
   tree niter;
 
+  if (ul == UL_ESTIMATE_GROWTH)
+    loop->lpt_decision.times = 0;
   niter = number_of_latch_executions (loop);
   if (TREE_CODE (niter) == INTEGER_CST)
     {
@@ -352,23 +391,77 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
   struct loop *loop;
   bool changed;
   enum unroll_level ul;
+  sbitmap changed_loops;
+  int nloops;
 
+  nloops = number_of_loops ();
+  changed_loops = sbitmap_alloc (nloops);
   do
     {
+      int last_outer = -1;
+      unsigned outer_size = 0;
+
       changed = false;
 
-      FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
+      sbitmap_zero (changed_loops);
+      FOR_EACH_LOOP (li, loop, LI_REALLY_FROM_INNERMOST)
 	{
+	  int outer = loop_outer (loop)->num;
+
+	  gcc_assert (loop->num < nloops);
+	  gcc_assert (outer < nloops);
+	  /* By propagating up the changed bit piecemeal, we avoid
+	     n^3 behaviour for deeply nested loops.  */
+	  if (TEST_BIT (changed_loops, loop->num))
+	    {
+
+	      SET_BIT (changed_loops, outer);
+	      continue;
+	    }
+	  if (loop->inner)
+	    continue;
 	  if (may_increase_size && optimize_loop_for_speed_p (loop)
 	      /* Unroll outermost loops only if asked to do so or they do
 		 not cause code growth.  */
-	      && (unroll_outer
-		  || loop_outer (loop_outer (loop))))
-	    ul = UL_ALL;
+	      && (unroll_outer || loop_outer (loop_outer (loop))))
+	    {
+	      unsigned max_outer
+		= PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_OUTER_INSNS);
+
+	      if (outer != last_outer)
+		{
+		  struct loop *oloop, *sloop;
+
+		  oloop = loop_outer (loop);
+		  outer_size = tree_num_loop_insns (oloop, &eni_size_weights);
+		  /* Sum up the size growth for unrolling to be done on
+		     all the sibling loops.
+		     If a loop is  not an inner loop, we won't actually unroll
+		     it, but we'd likely unroll its leaf loops, so assuming
+		     that we unroll the outer loop gives an estimate of the
+		     growth of the inner loops.  */
+		  for (sloop = oloop->inner;
+		       outer_size <= max_outer && sloop;
+		       sloop = sloop->next)
+		    {
+		      sloop->lpt_decision.times = 0;
+		      canonicalize_loop_induction_variables
+			(sloop, false, UL_ESTIMATE_GROWTH,
+			 !flag_tree_loop_ivcanon);
+		      outer_size += sloop->lpt_decision.times;
+		    }
+		  last_outer = outer;
+		}
+	      ul = outer_size <= max_outer ? UL_ALL : UL_NO_GROWTH;
+	    }
 	  else
 	    ul = UL_NO_GROWTH;
-	  changed |= canonicalize_loop_induction_variables
-		       (loop, false, ul, !flag_tree_loop_ivcanon);
+	  if (canonicalize_loop_induction_variables
+		(loop, false, ul, !flag_tree_loop_ivcanon))
+	    {
+	      SET_BIT (changed_loops, outer);
+	      changed |= true;
+	    }
 	}
 
       if (changed)
@@ -385,6 +478,7 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer)
 	}
     }
   while (changed);
+  sbitmap_free (changed_loops);
 
   return 0;
 }
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 683d7d4b5fa..75a38ccb997 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -152,11 +152,14 @@ struct cost_pair
   comp_cost cost;	/* The cost.  */
   bitmap depends_on;	/* The list of invariants that have to be
 			   preserved.  */
-  tree value;		/* For final value elimination, the expression for
-			   the final value of the iv.  For iv elimination,
-			   the new bound to compare with.  */
+  tree value;		/* For iv elimination, the new bound to compare
+			   with.  For addresses, the amound by that the
+			   candidate can be increased in an autoincrement
+			   (int casted to pointer, not a tree).  */
 };
 
+#define CP_AUTOINC_OFFSET(CP) ((HOST_WIDE_INT) (size_t) (CP)->value)
+
 /* Use.  */
 struct iv_use
 {
@@ -270,6 +273,10 @@ struct iv_ca
   /* Number of times each candidate is used.  */
   unsigned *n_cand_uses;
 
+  /* For each candidate, the total offset of all autoincrements applied
+     to it.  */
+  HOST_WIDE_INT *cand_autoinc_distance;
+
   /* The candidates used.  */
   bitmap cands;
 
@@ -2995,20 +3002,24 @@ multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
    variable is omitted.  Compute the cost for a memory reference that accesses
    a memory location of mode MEM_MODE.
 
+   MAY_AUTOINC is set to true if the autoincrement (increasing index by
+   size of MEM_MODE / RATIO) is available.
+
    TODO -- there must be some better way.  This all is quite crude.  */
 
 static comp_cost
 get_address_cost (bool symbol_present, bool var_present,
 		  unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
 		  enum machine_mode mem_mode,
-		  bool speed)
+		  bool speed, bool *may_autoinc)
 {
   static bool initialized[MAX_MACHINE_MODE];
   static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
   static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
   static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
+  static bool has_autoinc[MAX_MACHINE_MODE][2][2][2][2];
   unsigned cost, acost, complexity;
-  bool offset_p, ratio_p;
+  bool offset_p, ratio_p, autoinc;
   HOST_WIDE_INT s_offset;
   unsigned HOST_WIDE_INT mask;
   unsigned bits;
@@ -3018,7 +3029,7 @@ get_address_cost (bool symbol_present, bool var_present,
       HOST_WIDE_INT i;
       HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
       int old_cse_not_expected;
-      unsigned sym_p, var_p, off_p, rat_p, add_c;
+      unsigned sym_p, var_p, off_p, rat_p, add_c, ainc_p;
       rtx seq, addr, base;
       rtx reg0, reg1;
 
@@ -3068,14 +3079,21 @@ get_address_cost (bool symbol_present, bool var_present,
       reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
       reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
 
-      for (i = 0; i < 16; i++)
+      for (i = 0; i < 32; i++)
 	{
 	  sym_p = i & 1;
 	  var_p = (i >> 1) & 1;
 	  off_p = (i >> 2) & 1;
 	  rat_p = (i >> 3) & 1;
+	  ainc_p = (i >> 4) & 1;
 
 	  addr = reg0;
+	  if (ainc_p)
+	    {
+	      if (!(flag_ivopts_post_inc && HAVE_POST_INCREMENT))
+		continue;
+	      addr = gen_rtx_POST_INC (Pmode, addr);
+	    }
 	  if (rat_p)
 	    addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
 				   gen_int_mode (rat[mem_mode], Pmode));
@@ -3106,7 +3124,15 @@ get_address_cost (bool symbol_present, bool var_present,
     
 	  if (base)
 	    addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
-  
+
+	  /* Try detecting presence of autoincrement instructions.  */
+	  if (ainc_p)
+	    {
+	      has_autoinc[mem_mode][sym_p][var_p][off_p][rat_p]
+		      = memory_address_p (mem_mode, addr);
+	      continue;
+	    }
+
 	  start_sequence ();
 	  /* To avoid splitting addressing modes, pretend that no cse will
 	     follow.  */
@@ -3151,11 +3177,11 @@ get_address_cost (bool symbol_present, bool var_present,
 	  if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
 	    costs[mem_mode][1][var_p][off_p][rat_p] = acost;
 	}
-  
+
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
 	  fprintf (dump_file, "Address costs:\n");
-      
+
 	  for (i = 0; i < 16; i++)
 	    {
 	      sym_p = i & 1;
@@ -3174,7 +3200,9 @@ get_address_cost (bool symbol_present, bool var_present,
 		fprintf (dump_file, "rat * ");
 
 	      acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
-	      fprintf (dump_file, "index costs %d\n", acost);
+	      autoinc = has_autoinc[mem_mode][sym_p][var_p][off_p][rat_p];
+	      fprintf (dump_file, "index costs %d%s\n", acost,
+		       autoinc ? " (may include autoinc/dec)" : "");
 	    }
 	  fprintf (dump_file, "\n");
 	}
@@ -3187,6 +3215,8 @@ get_address_cost (bool symbol_present, bool var_present,
     offset |= ~mask;
   s_offset = offset;
 
+  autoinc = has_autoinc[mem_mode][symbol_present][var_present][offset != 0][ratio != 1];
+
   cost = 0;
   offset_p = (s_offset != 0
 	      && min_offset[mem_mode] <= s_offset
@@ -3195,11 +3225,22 @@ get_address_cost (bool symbol_present, bool var_present,
 	     && multiplier_allowed_in_address_p (ratio, mem_mode));
 
   if (ratio != 1 && !ratio_p)
-    cost += multiply_by_cost (ratio, Pmode, speed);
+    {
+      cost += multiply_by_cost (ratio, Pmode, speed);
+
+      /* If we have to do the multiplication of index separately, we cannot
+	 use autoincrement.  */
+      autoinc = false;
+    }
 
   if (s_offset && !offset_p && !symbol_present)
-    cost += add_cost (Pmode, speed);
+    {
+      cost += add_cost (Pmode, speed);
+      autoinc = false;
+    }
 
+  if (may_autoinc)
+    *may_autoinc = autoinc;
   acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
   complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
   return new_cost (cost + acost, complexity);
@@ -3495,29 +3536,71 @@ difference_cost (struct ivopts_data *data,
   return cost;
 }
 
+/* Returns the offset by that CAND will be incremented in a postincrement,
+   when it is used (after multiplication by RATIO) to access a memory
+   location whose mode is MMODE.  Zero is returned if using autoincrement
+   is not possible.  */
+
+static int
+autoincrement_distance (struct iv_cand *cand, HOST_WIDE_INT ratio,
+			enum machine_mode mmode)
+{
+  HOST_WIDE_INT step, delta, size;
+  bool negate = false;
+
+  if (!cst_and_fits_in_hwi (cand->iv->step))
+    return 0;
+  step = int_cst_value (cand->iv->step);
+
+  if (step < 0)
+    {
+      if (!HAVE_POST_DECREMENT)
+	return 0;
+
+      step = -step;
+      negate = true;
+    }
+
+  size = GET_MODE_SIZE (mmode);
+  if (size % ratio != 0)
+    return 0;
+
+  delta = size / ratio;
+  if (delta > step)
+    return 0;
+
+  return negate ? -delta : delta;
+}
+
+static GTY(()) rtx post_modify_addr;
 /* Determines the cost of the computation by that USE is expressed
    from induction variable CAND.  If ADDRESS_P is true, we just need
    to create an address from it, otherwise we want to get it into
    register.  A set of invariants we depend on is stored in
-   DEPENDS_ON.  AT is the statement at that the value is computed.  */
+   DEPENDS_ON.  AT is the statement at that the value is computed.
+   If autoincrement of CAND can be used in the computation,
+   AUTOINC_DISTANCE is set to the amount by that it can be incremented.  */
 
 static comp_cost
 get_computation_cost_at (struct ivopts_data *data,
 			 struct iv_use *use, struct iv_cand *cand,
-			 bool address_p, bitmap *depends_on, gimple at)
+			 bool address_p, bitmap *depends_on, gimple at,
+			 HOST_WIDE_INT *autoinc_distance)
 {
   tree ubase = use->iv->base, ustep = use->iv->step;
   tree cbase, cstep;
   tree utype = TREE_TYPE (ubase), ctype;
   unsigned HOST_WIDE_INT cstepi, offset = 0;
   HOST_WIDE_INT ratio, aratio;
-  bool var_present, symbol_present;
+  bool var_present, symbol_present, autoinc;
   comp_cost cost;
   unsigned n_sums;
   double_int rat;
   bool speed = optimize_bb_for_speed_p (gimple_bb (at));
 
   *depends_on = NULL;
+  if (autoinc_distance)
+    *autoinc_distance = 0;
 
   /* Only consider real candidates.  */
   if (!cand->iv)
@@ -3613,9 +3696,38 @@ get_computation_cost_at (struct ivopts_data *data,
      (symbol/var/const parts may be omitted).  If we are looking for an address,
      find the cost of addressing this.  */
   if (address_p)
-    return add_costs (cost, get_address_cost (symbol_present, var_present,
-				offset, ratio,
-				TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
+    {
+      enum machine_mode mmode = TYPE_MODE (TREE_TYPE (*use->op_p));
+
+      cost = add_costs (cost, get_address_cost (symbol_present, var_present,
+			offset, ratio, mmode, speed, &autoinc));
+      if (autoinc && autoinc_distance)
+	*autoinc_distance = autoincrement_distance (cand, ratio, mmode);
+      /* ??? We could use PRE / POST modify in the presence of an offset -
+	 if we'd arrange for the iv to be adjusted accordingly.  */
+      /* ??? could we use POST_MODIFY_REG?  compile/pr35043.i provides a
+	 testcase with variable step.  */
+      if (flag_ivopts_post_modify && HAVE_POST_MODIFY_DISP
+	  && cst_and_fits_in_hwi (cand->iv->step)
+	  && !symbol_present && !var_present && !offset && ratio == 1)
+	{
+	  rtx offset = GEN_INT (int_cst_value (cand->iv->step));
+	  rtx addr = post_modify_addr;
+
+	  if (!addr)
+	    {
+	      rtx reg = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
+	      addr = gen_rtx_PLUS (Pmode, reg, const0_rtx);
+	      addr = gen_rtx_POST_MODIFY (Pmode, reg, addr);
+	      post_modify_addr = addr;
+	    }
+	  XEXP (XEXP (addr, 1), 1) = offset;
+	  if (memory_address_p (mmode, addr))
+	    *autoinc_distance = int_cst_value (cand->iv->step);
+	}
+
+      return cost;
+    }
 
   /* Otherwise estimate the costs for computing the expression.  */
   aratio = ratio > 0 ? ratio : -ratio;
@@ -3668,10 +3780,11 @@ fallback:
 static comp_cost
 get_computation_cost (struct ivopts_data *data,
 		      struct iv_use *use, struct iv_cand *cand,
-		      bool address_p, bitmap *depends_on)
+		      bool address_p, bitmap *depends_on,
+		      HOST_WIDE_INT *autoinc_distance)
 {
-  return get_computation_cost_at (data,
-				  use, cand, address_p, depends_on, use->stmt);
+  return get_computation_cost_at (data, use, cand, address_p, depends_on,
+				  use->stmt, autoinc_distance);
 }
 
 /* Determines cost of basing replacement of USE on CAND in a generic
@@ -3695,7 +3808,7 @@ determine_use_iv_cost_generic (struct ivopts_data *data,
       return true;
     }
 
-  cost = get_computation_cost (data, use, cand, false, &depends_on);
+  cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
   set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
 
   return !infinite_cost_p (cost);
@@ -3708,9 +3821,12 @@ determine_use_iv_cost_address (struct ivopts_data *data,
 			       struct iv_use *use, struct iv_cand *cand)
 {
   bitmap depends_on;
-  comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
+  HOST_WIDE_INT autoinc_distance;
+  comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
+					 &autoinc_distance);
 
-  set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
+  set_use_iv_cost (data, use, cand, cost, depends_on,
+		   (tree) (size_t) autoinc_distance);
 
   return !infinite_cost_p (cost);
 }
@@ -3884,7 +4000,7 @@ determine_use_iv_cost_condition (struct ivopts_data *data,
   gcc_assert (ok);
 
   express_cost = get_computation_cost (data, use, cand, false,
-				       &depends_on_express);
+				       &depends_on_express, NULL);
   fd_ivopts_data = data;
   walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
 
@@ -4226,6 +4342,42 @@ iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
     }
 }
 
+/* Returns true if the candidate CAND is autoincremented according to
+   iv usage assignment IVS.  */
+
+static bool
+cand_autoincremented_p (struct iv_ca *ivs, struct iv_cand *cand)
+{
+  if (!cst_and_fits_in_hwi (cand->iv->step))
+    return false;
+
+  /* TODO: this misses the cases where only some of the memory references
+     expressed by CAND are used for the autoincrements.  */
+  return (int_cst_value (cand->iv->step)
+	  == ivs->cand_autoinc_distance[cand->id]);
+}
+
+/* Adds (if ADD is true) or removes (if ADD is false) a bonus for using
+   autoincrements to express candidate CAND to costs in IVS.  */
+
+static void
+recompute_autoinc_bonus (struct iv_ca *ivs, struct iv_cand *cand, bool add,
+			bool speed)
+{
+  unsigned bonus;
+
+  if (!cand_autoincremented_p (ivs, cand))
+    return;
+
+  /* If the candidate is autoincremented, we do not need to count its increment
+     cost.  */
+  bonus = add_cost (TYPE_MODE (TREE_TYPE (cand->iv->base)), speed);
+  if (add)
+    ivs->cand_cost -= bonus;
+  else
+    ivs->cand_cost += bonus;
+}
+
 /* Set USE not to be expressed by any candidate in IVS.  */
 
 static void
@@ -4234,6 +4386,7 @@ iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
 {
   unsigned uid = use->id, cid;
   struct cost_pair *cp;
+  bool speed = optimize_loop_for_speed_p (data->current_loop);
 
   cp = ivs->cand_for_use[uid];
   if (!cp)
@@ -4244,6 +4397,13 @@ iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
   ivs->cand_for_use[uid] = NULL;
   ivs->n_cand_uses[cid]--;
 
+  if (use->type == USE_ADDRESS)
+    {
+      recompute_autoinc_bonus (ivs, cp->cand, false, speed);
+      ivs->cand_autoinc_distance[cid] -= CP_AUTOINC_OFFSET (cp);
+      recompute_autoinc_bonus (ivs, cp->cand, true, speed);
+    }
+
   if (ivs->n_cand_uses[cid] == 0)
     {
       bitmap_clear_bit (ivs->cands, cid);
@@ -4288,6 +4448,7 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
 	      struct iv_use *use, struct cost_pair *cp)
 {
   unsigned uid = use->id, cid;
+  bool speed = optimize_loop_for_speed_p (data->current_loop);
 
   if (ivs->cand_for_use[uid] == cp)
     return;
@@ -4313,6 +4474,13 @@ iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
 
 	  iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
 	}
+  
+      if (use->type == USE_ADDRESS)
+	{
+	  recompute_autoinc_bonus (ivs, cp->cand, false, speed);
+	  ivs->cand_autoinc_distance[cid] += CP_AUTOINC_OFFSET (cp);
+	  recompute_autoinc_bonus (ivs, cp->cand, true, speed);
+	}
 
       ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
       iv_ca_set_add_invariants (ivs, cp->depends_on);
@@ -4519,6 +4687,7 @@ iv_ca_new (struct ivopts_data *data)
   nw->bad_uses = 0;
   nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
   nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
+  nw->cand_autoinc_distance = XCNEWVEC (HOST_WIDE_INT, n_iv_cands (data));
   nw->cands = BITMAP_ALLOC (NULL);
   nw->n_cands = 0;
   nw->n_regs = 0;
@@ -4537,6 +4706,7 @@ iv_ca_free (struct iv_ca **ivs)
 {
   free ((*ivs)->cand_for_use);
   free ((*ivs)->n_cand_uses);
+  free ((*ivs)->cand_autoinc_distance);
   BITMAP_FREE ((*ivs)->cands);
   free ((*ivs)->n_invariant_uses);
   free (*ivs);
@@ -4551,9 +4721,18 @@ iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
   const char *pref = "  invariants ";
   unsigned i;
   comp_cost cost = iv_ca_cost (ivs);
+  struct iv_cand *cand;
 
   fprintf (file, "  cost %d (complexity %d)\n", cost.cost, cost.complexity);
-  bitmap_print (file, ivs->cands, "  candidates ","\n");
+  fprintf (file, "  candidates");
+  for (i = 0; i < n_iv_cands (data); i++)
+    {
+      cand = iv_cand (data, i);
+      if (iv_ca_cand_used_p (ivs, cand))
+	fprintf (file, " %d%s", i,
+		 cand_autoincremented_p (ivs, cand) ? "(autoincremented)" : "");
+    }
+  fprintf (file, "\n");
 
   for (i = 1; i <= data->max_inv_id; i++)
     if (ivs->n_invariant_uses[i])
@@ -5608,3 +5787,5 @@ tree_ssa_iv_optimize (void)
 
   tree_ssa_iv_optimize_finalize (&data);
 }
+
+#include "gt-tree-ssa-loop-ivopts.h"
diff --git a/gcc/tree-ssa-loop-manip.c b/gcc/tree-ssa-loop-manip.c
index 05e87d241cd..ebc68f1cec6 100644
--- a/gcc/tree-ssa-loop-manip.c
+++ b/gcc/tree-ssa-loop-manip.c
@@ -623,6 +623,21 @@ gimple_duplicate_loop_to_header_edge (struct loop *loop, edge e,
   return true;
 }
 
+/* Return true if gimple_duplicate_loop_to_header_edge would return true,
+   without actually altering any trees.  */
+bool
+gimple_can_duplicate_loop_to_header_edge (struct loop *loop)
+{
+  basic_block *bbs;
+
+  if (!loops_state_satisfies_p (LOOPS_HAVE_SIMPLE_LATCHES))
+    return false;
+  if (!loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS))
+    return false;
+  bbs = get_loop_body_in_dom_order (loop);
+  return can_copy_bbs_p (bbs, loop->num_nodes);
+}
+
 /* Returns true if we can unroll LOOP FACTOR times.  Number
    of iterations of the loop is returned in NITER.  */
 
diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index 7773fe96b6f..f58ee94af62 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -104,6 +104,10 @@ along with GCC; see the file COPYING3.  If not see
    In order to make it fully redundant, we insert the expression into
    the predecessors where it is not available, but is ANTIC.
 
+   When optimizing for size, we only eliminate the partial redundancy
+   if we need to insert in only one predecessor.  This avoids almost
+   completely the code size increase that PRE usually causes.
+
    For the partial anticipation case, we only perform insertion if it
    is partially anticipated in some block, and fully available in all
    of the predecessors.
@@ -426,6 +430,7 @@ static pre_expr bitmap_find_leader (bitmap_set_t, unsigned int, gimple);
 static void bitmap_value_insert_into_set (bitmap_set_t, pre_expr);
 static void bitmap_value_replace_in_set (bitmap_set_t, pre_expr);
 static void bitmap_set_copy (bitmap_set_t, bitmap_set_t);
+static void bitmap_set_and (bitmap_set_t, bitmap_set_t);
 static bool bitmap_set_contains_value (bitmap_set_t, unsigned int);
 static void bitmap_insert_into_set (bitmap_set_t, pre_expr);
 static void bitmap_insert_into_set_1 (bitmap_set_t, pre_expr, bool);
@@ -2978,13 +2983,6 @@ insert_into_preds_of_block (basic_block block, unsigned int exprnum,
   tree temp, res;
   gimple phi;
 
-  if (dump_file && (dump_flags & TDF_DETAILS))
-    {
-      fprintf (dump_file, "Found partial redundancy for expression ");
-      print_pre_expr (dump_file, expr);
-      fprintf (dump_file, " (%04d)\n", val);
-    }
-
   /* Make sure we aren't creating an induction variable.  */
   if (block->loop_depth > 0 && EDGE_COUNT (block->preds) == 2
       && expr->kind != REFERENCE)
@@ -3183,6 +3181,47 @@ insert_into_preds_of_block (basic_block block, unsigned int exprnum,
 }
 
 
+/* Indicate if, when optimizing for speed, it is appropriate to make
+   INSERTS_NEEDED insertions in order to make EXPR in BLOCK redundant.  */
+static bool
+ppre_n_insert_for_speed_p (pre_expr expr, basic_block block,
+			   unsigned int inserts_needed)
+{
+  /* The more expensive EXPR is, the more we should be prepared to insert
+     in the predecessors of BLOCK to make EXPR fully redundant.
+     For now, only recognize AND, OR, XOR, PLUS and MINUS of a multiple-use
+     SSA_NAME with a constant as cheap.  */
+  int cost;
+
+  if (flag_tree_pre_partial_partial_obliviously)
+    return true;
+  if (expr->kind == NARY)
+    {
+      vn_nary_op_t nary = PRE_EXPR_NARY (expr);
+      switch (nary->opcode)
+	{
+	  tree name, cnst;
+	case BIT_AND_EXPR: case BIT_IOR_EXPR: case BIT_XOR_EXPR:
+	case PLUS_EXPR: case MINUS_EXPR:
+
+	  gcc_assert (nary->length == 2);
+	  name = nary->op[0];
+	  cnst = nary->op[1];
+	  if (TREE_CODE (name) != SSA_NAME || has_single_use (name))
+	    return true;
+	  if (!is_gimple_min_invariant (cnst))
+	    return true;
+	  cost = 1;
+	  break;
+	default:
+	  return true;
+	}
+    }
+  else
+    return true;
+  return EDGE_COUNT (block->preds) * cost >= inserts_needed;
+    
+}
 
 /* Perform insertion of partially redundant values.
    For BLOCK, do the following:
@@ -3217,6 +3256,7 @@ do_regular_insertion (basic_block block, basic_block dom)
 	  pre_expr *avail;
 	  unsigned int val;
 	  bool by_some = false;
+	  unsigned int inserts_needed = 0;
 	  bool cant_insert = false;
 	  bool all_same = true;
 	  pre_expr first_s = NULL;
@@ -3271,6 +3311,7 @@ do_regular_insertion (basic_block block, basic_block dom)
 		{
 		  avail[bprime->index] = eprime;
 		  all_same = false;
+		  inserts_needed++;
 		}
 	      else
 		{
@@ -3280,6 +3321,11 @@ do_regular_insertion (basic_block block, basic_block dom)
 		    first_s = edoubleprime;
 		  else if (!pre_expr_eq (first_s, edoubleprime))
 		    all_same = false;
+		  /* If the available value is not a NAME, PREing this
+		     value will probably result in a copy on the edge
+		     to assign the expression to a register.  */
+		  if (edoubleprime->kind != NAME)
+		    inserts_needed++;
 		}
 	    }
 	  /* If we can insert it, it's not the same value
@@ -3288,9 +3334,28 @@ do_regular_insertion (basic_block block, basic_block dom)
 	     partially redundant.  */
 	  if (!cant_insert && !all_same && by_some && dbg_cnt (treepre_insert))
 	    {
-	      if (insert_into_preds_of_block (block, get_expression_id (expr),
-					      avail))
-		new_stuff = true;
+  	      if (dump_file && (dump_flags & TDF_DETAILS))
+		{
+		  fprintf (dump_file,
+			   "Found partial redundancy for expression ");
+		  print_pre_expr (dump_file, expr);
+		  fprintf (dump_file, " (%04d)\n", get_expr_value_id (expr));
+		}
+
+	      /* If optimizing for size, insert at most one
+		 new expression to avoid increasing code size.  */
+	      if (optimize_function_for_speed_p (cfun)
+		  ? (1 || ppre_n_insert_for_speed_p (expr, block, inserts_needed))
+		  : EDGE_COUNT (block->preds) - inserts_needed == 1)
+		new_stuff |=
+		  insert_into_preds_of_block (block,
+					      get_expression_id (expr),
+					      avail);
+	      else if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not inserting (optimizing for %s)\n",
+			 optimize_function_for_speed_p (cfun)
+			 ? "speed" : "size");
+		
 	    }
 	  /* If all edges produce the same value and that value is
 	     an invariant, then the PHI has the same value on all
@@ -3419,9 +3484,28 @@ do_partial_partial_insertion (basic_block block, basic_block dom)
 	  if (!cant_insert && by_all && dbg_cnt (treepre_insert))
 	    {
 	      pre_stats.pa_insert++;
-	      if (insert_into_preds_of_block (block, get_expression_id (expr),
-					      avail))
-		new_stuff = true;
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		{
+		  fprintf (dump_file,
+			   "Found partial redundancy for expression ");
+		  print_pre_expr (dump_file, expr);
+		  fprintf (dump_file, " (%04d)\n", get_expr_value_id (expr));
+		}
+	      /* Assuming the expression is 50% anticipatable, we have to
+		 multiply the number of insertions needed by two for a cost
+		 comparison.  */
+	      if (!optimize_function_for_speed_p (cfun)
+		  || ppre_n_insert_for_speed_p (expr, block,
+						2 * EDGE_COUNT (block->preds)))
+		new_stuff |=
+		  insert_into_preds_of_block (block,
+					      get_expression_id (expr),
+					      avail);
+	      else if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "Not inserting (optimizing for %s)\n",
+			 optimize_function_for_speed_p (cfun)
+			 ? "speed" : "size");
+		
 	    }
 	  free (avail);
 	}
@@ -3462,7 +3546,9 @@ insert_aux (basic_block block)
 	  if (!single_pred_p (block))
 	    {
 	      new_stuff |= do_regular_insertion (block, dom);
-	      if (do_partial_partial)
+	      /* Don't bother with partial-partial redundancies when
+		 optimizing for size.  */
+	      if (do_partial_partial && ! optimize_function_for_size_p (cfun))
 		new_stuff |= do_partial_partial_insertion (block, dom);
 	    }
 	}
@@ -4213,11 +4299,11 @@ fini_pre (bool do_fre)
    only wants to do full redundancy elimination.  */
 
 static unsigned int
-execute_pre (bool do_fre ATTRIBUTE_UNUSED)
+execute_pre (bool do_fre)
 {
   unsigned int todo = 0;
 
-  do_partial_partial = optimize > 2;
+  do_partial_partial = flag_tree_pre_partial_partial;
 
   /* This has to happen before SCCVN runs because
      loop_optimizer_init may create new phis, etc.  */
@@ -4290,19 +4376,20 @@ execute_pre (bool do_fre ATTRIBUTE_UNUSED)
   return todo;
 }
 
-/* Gate and execute functions for PRE.  */
+/* Gate and execute functions for FRE/PRE.  */
 
 static unsigned int
 do_pre (void)
 {
-  return TODO_rebuild_alias | execute_pre (false);
+  return TODO_rebuild_alias
+	 | execute_pre (! flag_tree_pre);
 }
 
 static bool
 gate_pre (void)
 {
-  /* PRE tends to generate bigger code.  */
-  return flag_tree_pre != 0 && optimize_function_for_speed_p (cfun);
+  /* Run FRE even if we don't run PRE.  */
+  return (flag_tree_fre || flag_tree_pre);
 }
 
 struct gimple_opt_pass pass_pre =
diff --git a/gcc/value-prof.c b/gcc/value-prof.c
index 120a68d61e8..1ce68740d47 100644
--- a/gcc/value-prof.c
+++ b/gcc/value-prof.c
@@ -1412,7 +1412,7 @@ gimple_stringops_transform (gimple_stmt_iterator *gsi)
     case BUILT_IN_MEMPCPY:
       src = gimple_call_arg (stmt, 1);
       src_align = get_pointer_alignment (src, BIGGEST_ALIGNMENT);
-      if (!can_move_by_pieces (val, MIN (dest_align, src_align)))
+      if (!can_move_by_pieces (val, MIN (dest_align, src_align) ,1))
 	return false;
       break;
     case BUILT_IN_MEMSET:
diff --git a/gcc/varasm.c b/gcc/varasm.c
index 7fed3009b84..7e58eb9ed84 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -168,8 +168,11 @@ section *in_section;
    at the cold section.  */
 bool in_cold_section_p;
 
-/* A linked list of all the unnamed sections.  */
-static GTY(()) section *unnamed_sections;
+/* A linked list of all the unnamed sections.
+   Must not be garbage collected, because that would cause it to be
+   overwritten when a pch file is loaded, and the data and callback
+   members (can) point to non-ggc memory.  */
+static section *unnamed_sections;
 
 /* Return a nonzero value if DECL has a section attribute.  */
 #ifndef IN_NAMED_SECTION
@@ -518,7 +521,7 @@ get_unnamed_section (unsigned int flags, void (*callback) (const void *),
 {
   section *sect;
 
-  sect = GGC_NEW (section);
+  sect = (section *) xmalloc (sizeof (section));
   sect->unnamed.common.flags = flags | SECTION_UNNAMED;
   sect->unnamed.callback = callback;
   sect->unnamed.data = data;
@@ -535,7 +538,7 @@ get_noswitch_section (unsigned int flags, noswitch_section_callback callback)
 {
   section *sect;
 
-  sect = GGC_NEW (section);
+  sect = (section *) xmalloc (sizeof (section));
   sect->noswitch.common.flags = flags | SECTION_NOSWITCH;
   sect->noswitch.callback = callback;
 
@@ -5601,6 +5604,56 @@ init_varasm_once (void)
     readonly_data_section = text_section;
 }
 
+static GTY(()) section *pickled_in_section;
+
+/* Replace in_section with something that can be restored after reading
+   in a precompiled header file.  */
+void
+pickle_in_section (void)
+{
+  section *p;
+  int i = 0;
+
+  if (!in_section || SECTION_STYLE (in_section) != SECTION_UNNAMED)
+    {
+      pickled_in_section = in_section;
+      return;
+    }
+  for (p = unnamed_sections; p != in_section; p = p->unnamed.next)
+    i++;
+  gcc_assert (p == in_section);
+  pickled_in_section = GGC_NEW (struct unnamed_section);
+  *pickled_in_section = *in_section;
+  pickled_in_section->unnamed.data = (void *) i;
+  in_section = pickled_in_section;
+}
+
+void
+unpickle_in_section (void)
+{
+  int i;
+  section *p;
+
+  in_section = pickled_in_section;
+  if (!in_section || SECTION_STYLE (in_section) != SECTION_UNNAMED)
+    return;
+  /* When flag_preprocess_only is set, backend_init wasn't called, hence the
+     list is empty.  But as we are not outputting any asm then, it doesn't
+     matter that we can't restore in_section.  */
+  if (!unnamed_sections)
+    {
+      in_section = 0;
+      return;
+    }
+  for (p = unnamed_sections, i = (int) in_section->unnamed.data; i; i--)
+    {
+      gcc_assert (p);
+      p = p->unnamed.next;
+    }
+  gcc_assert (p);
+  in_section = p;
+}
+
 enum tls_model
 decl_default_tls_model (const_tree decl)
 {
diff --git a/libgcc/ChangeLog.ARC b/libgcc/ChangeLog.ARC
new file mode 100644
index 00000000000..13656f454ce
--- /dev/null
+++ b/libgcc/ChangeLog.ARC
@@ -0,0 +1,4 @@
+2008-07-15  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* config.host (mxp-*-elf*): Add.
+
diff --git a/libgcc/config.host b/libgcc/config.host
index 0711410cf6a..edcea45474e 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -364,6 +364,8 @@ m32r-*-linux*)
  	;;
 m32rle-*-linux*)
 	;;
+mxp-*-elf*)
+	;;
 m68hc11-*-*|m6811-*-*)
         ;;
 m68hc12-*-*|m6812-*-*)
diff --git a/libstdc++-v3/ChangeLog.ARC b/libstdc++-v3/ChangeLog.ARC
new file mode 100644
index 00000000000..cc2ef180ade
--- /dev/null
+++ b/libstdc++-v3/ChangeLog.ARC
@@ -0,0 +1,20 @@
+2007-06-11  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* libstdc++-v3/testsuite/lib/libstdc++.exp (v3-build_support):
+	When compilation fails, tell why.
+	* scripts/testsuite_flags.in (--build-cxx): Don't mistake -m* inside
+	an identifier for an -m option.
+
+2007-05-16  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* scripts/testsuite_flags.in (--build-cxx): Remove -m options.
+
+2007-05-10  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* testsuite/ext/pb_ds/example/priority_queue_dijkstra.cc (main): Don't
+	access deallocated nodes.
+
+2007-04-20  J"orn Rennecke  <joern.rennecke@arc.com>
+
+	* testsuite/27_io/basic_stringbuf/overflow/char/1.cc:
+	specify heap size for ARC.
diff --git a/libstdc++-v3/scripts/testsuite_flags.in b/libstdc++-v3/scripts/testsuite_flags.in
index 457adaf4d5f..84e4a2b5d0f 100755
--- a/libstdc++-v3/scripts/testsuite_flags.in
+++ b/libstdc++-v3/scripts/testsuite_flags.in
@@ -45,7 +45,10 @@ case ${query} in
       ;;
     --build-cxx)
       CXX_build="@CXX@"
-      CXX=`echo "$CXX_build" | sed 's,gcc/xgcc ,gcc/g++ ,'`
+      # must remove -m options because the actual test options used can be
+      # different from the ones used to build the multilib, when
+      # MULTILIB_MATCHES is used.
+      CXX=`echo "$CXX_build" | sed -e 's,gcc/xgcc ,gcc/g++ ,' -e 's/[ 	]-m[a-zA-Z0-9_]*//g'`
       echo ${CXX}
       ;;
     --build-cc)
diff --git a/libstdc++-v3/testsuite/27_io/basic_stringbuf/overflow/char/1.cc b/libstdc++-v3/testsuite/27_io/basic_stringbuf/overflow/char/1.cc
index b07863b18fb..1a7ed9de5bf 100644
--- a/libstdc++-v3/testsuite/27_io/basic_stringbuf/overflow/char/1.cc
+++ b/libstdc++-v3/testsuite/27_io/basic_stringbuf/overflow/char/1.cc
@@ -20,6 +20,8 @@
 
 // 27.7.1.3 basic_stringbuf overridden virtual functions.
 
+// { dg-options "-Wl,--defsym,__HEAP_SIZE=57m" { target arc-*-* } }
+
 #include <sstream>
 #include <cstdlib>
 #include <testsuite_hooks.h>
diff --git a/libstdc++-v3/testsuite/ext/pb_ds/example/priority_queue_dijkstra.cc b/libstdc++-v3/testsuite/ext/pb_ds/example/priority_queue_dijkstra.cc
index af582b37b34..5ddfe57646f 100644
--- a/libstdc++-v3/testsuite/ext/pb_ds/example/priority_queue_dijkstra.cc
+++ b/libstdc++-v3/testsuite/ext/pb_ds/example/priority_queue_dijkstra.cc
@@ -145,6 +145,10 @@ int main()
       // distances, if applicable.
       for (size_t neighbor_i = 0; neighbor_i < num_vertices; ++neighbor_i)
         {
+	  /* If the neighbor has already been deleted, don't try to
+	     dereference it.  */
+	  if (a_it[neighbor_i] == a_it[0])
+	    continue;
 	  // Potentially, the distance to the neighbor is the distance
 	  // to the currently-considered node + the distance from this
 	  // node to the neighbor.
@@ -158,6 +162,7 @@ int main()
 	    p.modify(a_it[neighbor_i], pq_value(neighbor_i, pot_dist));
         }
 
+      a_it[node_id] = a_it[0];
       // Done with the node, so we pop it.
       a_it[node_id] = a_it[0];
       p.pop();
diff --git a/libstdc++-v3/testsuite/lib/libstdc++.exp b/libstdc++-v3/testsuite/lib/libstdc++.exp
index 3b2e18b7b2c..33c14c800f1 100644
--- a/libstdc++-v3/testsuite/lib/libstdc++.exp
+++ b/libstdc++-v3/testsuite/lib/libstdc++.exp
@@ -547,10 +547,10 @@ proc v3-build_support { } {
 	set object_file [file tail $obj]
 	# Compile with "-w" so that warnings issued by the compiler
 	# do not prevent compilation.
-	if { [v3_target_compile $srcdir/util/$f $object_file "object" \
+	set result [v3_target_compile $srcdir/util/$f $object_file "object" \
 		  [list "incdir=$srcdir" "additional_flags=-w"]]
-	     != "" } {
-	    error "could not compile $f"
+	if { $result != "" } {
+	    error "could not compile $f : $result"
 	}
 	append libtest_objs "$object_file "
     }
author	Joern Rennecke <joern.rennecke@st.com>	2009-03-05 17:40:15 +0000
committer	Joern Rennecke <joern.rennecke@st.com>	2009-03-05 17:40:15 +0000
commit	6000d164b04d1ff76b01aa08a13c6022a5b929eb (patch)
tree	337865b53e42eced79d3723e441de1ebd665309f
parent	3eaed088966a6e06d925191184da47647237756d (diff)