diff options
Diffstat (limited to 'gcc/config/sh/sh.md')
-rw-r--r-- | gcc/config/sh/sh.md | 1481 |
1 files changed, 1371 insertions, 110 deletions
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md index 3ca60b5d9ba..f572d6f343d 100644 --- a/gcc/config/sh/sh.md +++ b/gcc/config/sh/sh.md @@ -1,5 +1,5 @@ ;;- Machine description for the Hitachi SH. -;; Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc. +;; Copyright (C) 1993 - 1999 Free Software Foundation, Inc. ;; Contributed by Steve Chamberlain (sac@cygnus.com). ;; Improved by Jim Wilson (wilson@cygnus.com). @@ -70,13 +70,20 @@ ;; Target CPU. (define_attr "cpu" - "sh1,sh2,sh3,sh3e" + "sh1,sh2,sh3,sh3e,sh4" (const (symbol_ref "sh_cpu_attr"))) (define_attr "endian" "big,little" (const (if_then_else (symbol_ref "TARGET_LITTLE_ENDIAN") (const_string "little") (const_string "big")))) +(define_attr "fmovd" "yes,no" + (const (if_then_else (symbol_ref "TARGET_FMOVD") + (const_string "yes") (const_string "no")))) +;; issues/clock +(define_attr "issues" "1,2" + (const (if_then_else (symbol_ref "TARGET_SUPERSCALAR") (const_string "2") (const_string "1")))) + ;; cbranch conditional branch instructions ;; jump unconditional jumps ;; arith ordinary arithmetic @@ -101,10 +108,12 @@ ;; fp floating point ;; fdiv floating point divide (or square root) ;; gp_fpul move between general purpose register and fpul +;; dfp_arith, dfp_cmp,dfp_conv +;; dfdiv double precision floating point divide (or square root) ;; nil no-op move, will be deleted. (define_attr "type" - "cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,other,load,load_si,store,move,fmove,smpy,dmpy,return,pload,pstore,pcload,pcload_si,rte,sfunc,call,fp,fdiv,gp_fpul,nil" + "cbranch,jump,jump_ind,arith,arith3,arith3b,dyn_shift,other,load,load_si,store,move,fmove,smpy,dmpy,return,pload,pstore,pcload,pcload_si,rte,sfunc,call,fp,fdiv,dfp_arith,dfp_cmp,dfp_conv,dfdiv,gp_fpul,nil" (const_string "other")) ; If a conditional branch destination is within -252..258 bytes away @@ -252,34 +261,216 @@ ;; We only do this for SImode loads of general registers, to make the work ;; for ADJUST_COST easier. (define_function_unit "memory" 1 0 - (eq_attr "type" "load_si,pcload_si") + (and (eq_attr "issues" "1") + (eq_attr "type" "load_si,pcload_si")) 3 2) (define_function_unit "memory" 1 0 - (eq_attr "type" "load,pcload,pload,store,pstore") + (and (eq_attr "issues" "1") + (eq_attr "type" "load,pcload,pload,store,pstore")) 2 2) (define_function_unit "int" 1 0 - (eq_attr "type" "arith3,arith3b") 3 3) + (and (eq_attr "issues" "1") (eq_attr "type" "arith3,arith3b")) 3 3) (define_function_unit "int" 1 0 - (eq_attr "type" "dyn_shift") 2 2) + (and (eq_attr "issues" "1") (eq_attr "type" "dyn_shift")) 2 2) (define_function_unit "int" 1 0 - (eq_attr "type" "arith,arith3b,dyn_shift") 2 2) + (and (eq_attr "issues" "1") (eq_attr "type" "!arith3,arith3b,dyn_shift")) 1 1) ;; ??? These are approximations. -(define_function_unit "mpy" 1 0 (eq_attr "type" "smpy") 2 2) -(define_function_unit "mpy" 1 0 (eq_attr "type" "dmpy") 3 3) +(define_function_unit "mpy" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "smpy")) 2 2) +(define_function_unit "mpy" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "dmpy")) 3 3) + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "fp,fmove")) 2 1) +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "1") (eq_attr "type" "fdiv")) 13 12) + + +;; SH4 scheduling +;; The SH4 is a dual-issue implementation, thus we have to multiply all +;; costs by at least two. +;; There will be single increments of the modeled that don't correspond +;; to the actual target ;; whenever two insns to be issued depend one a +;; single resource, and the scheduler picks to be the first one. +;; If we multiplied the costs just by two, just two of these single +;; increments would amount to an actual cycle. By picking a larger +;; factor, we can ameliorate the effect; However, we then have to make sure +;; that only two insns are modeled as issued per actual cycle. +;; Moreover, we need a way to specify the latency of insns that don't +;; use an actual function unit. +;; We use an 'issue' function unit to do that, and a cost factor of 10. + +(define_function_unit "issue" 2 0 + (and (eq_attr "issues" "2") (eq_attr "type" "!nil,arith3")) + 10 10) + +(define_function_unit "issue" 2 0 + (and (eq_attr "issues" "2") (eq_attr "type" "arith3")) + 30 30) + +;; There is no point in providing exact scheduling information about branches, +;; because they are at the starts / ends of basic blocks anyways. + +;; Some insns cannot be issued before/after another insn in the same cycle, +;; irrespective of the type of the other insn. + +;; default is dual-issue, but can't be paired with an insn that +;; uses multiple function units. +(define_function_unit "single_issue" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "!smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul,call,sfunc,arith3,arith3b")) + 1 10 + [(eq_attr "type" "smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul")]) + +(define_function_unit "single_issue" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "smpy,dmpy,pload,pstore,dfp_cmp,gp_fpul")) + 10 10 + [(const_int 1)]) + +;; arith3 insns are always pairable at the start, but not inecessarily at +;; the end; however, there doesn;t seem to be a way to express that. +(define_function_unit "single_issue" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "arith3")) + 30 20 + [(const_int 1)]) + +;; arith3b insn are pairable at the end and have latency that prevents pairing +;; with the following branch, but we don't want this latency be respected; +;; When the following branch is immediately adjacent, we can redirect the +;; internal branch, which is likly to be a larger win. +(define_function_unit "single_issue" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "arith3b")) + 20 20 + [(const_int 1)]) + +;; calls introduce a longisch delay that is likely to flush the pipelines. +(define_function_unit "single_issue" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "call,sfunc")) + 160 160 + [(eq_attr "type" "!call") (eq_attr "type" "call")]) + +;; Load and store instructions have no alignment peculiarities for the SH4, +;; but they use the load-store unit, which they share with the fmove type +;; insns (fldi[01]; fmov frn,frm; flds; fsts; fabs; fneg) . +;; Loads have a latency of two. +;; However, call insns can only paired with a preceding insn, and have +;; a delay slot, so that we want two more insns to be scheduled between the +;; load of the function address and the call. This is equivalent to a +;; latency of three. +;; We cannot use a conflict list for this, because we need to distinguish +;; between the actual call address and the function arguments. +;; ADJUST_COST can only properly handle reductions of the cost, so we +;; use a latency of three here, which gets multiplied by 10 to yield 30. +;; We only do this for SImode loads of general registers, to make the work +;; for ADJUST_COST easier. -(define_function_unit "fp" 1 0 (eq_attr "type" "fp,fmove") 2 1) -(define_function_unit "fp" 1 0 (eq_attr "type" "fdiv") 13 12) +;; When specifying different latencies for different insns using the +;; the same function unit, genattrtab.c assumes a 'FIFO constraint' +;; so that the blockage is at least READY-COST (E) + 1 - READY-COST (C) +;; for an executing insn E and a candidate insn C. +;; Therefore, we define three different function units for load_store: +;; load_store, load and load_si. + +(define_function_unit "load_si" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "load_si,pcload_si")) 30 10) +(define_function_unit "load" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "load,pcload,pload")) 20 10) +(define_function_unit "load_store" 1 0 + (and (eq_attr "issues" "2") + (eq_attr "type" "load_si,pcload_si,load,pcload,pload,store,pstore,fmove")) + 10 10) +(define_function_unit "int" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "arith,dyn_shift")) 10 10) + +;; Again, we have to pretend a lower latency for the "int" unit to avoid a +;; spurious FIFO constraint; the multiply instructions use the "int" +;; unit actually only for two cycles. +(define_function_unit "int" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 20 20) + +;; We use a fictous "mpy" unit to express the actual latency. +(define_function_unit "mpy" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 20) + +;; Again, we have to pretend a lower latency for the "int" unit to avoid a +;; spurious FIFO constraint. +(define_function_unit "int" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 10 10) + +;; We use a fictous "gp_fpul" unit to express the actual latency. +(define_function_unit "gp_fpul" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "gp_fpul")) 20 10) + +;; ??? multiply uses the floating point unit, but with a two cycle delay. +;; Thus, a simple single-precision fp operation could finish if issued in +;; the very next cycle, but stalls when issued two or three cycles later. +;; Similarily, a divide / sqrt can work without stalls if issued in +;; the very next cycle, while it would have to block if issued two or +;; three cycles later. +;; There is no way to model this with gcc's function units. This problem is +;; actually mentioned in md.texi. Tackling this problem requires first that +;; it is possible to speak about the target in an open discussion. +;; +;; However, simple double-precision operations always conflict. + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "smpy,dmpy")) 40 40 + [(eq_attr "type" "dfp_cmp,dfp_conv,dfp_arith")]) + +;; The "fp" unit is for pipeline stages F1 and F2. + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "fp")) 30 10) + +;; Again, we have to pretend a lower latency for the "fp" unit to avoid a +;; spurious FIFO constraint; the bulk of the fdiv type insns executes in +;; the F3 stage. +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 30 10) + +;; The "fdiv" function unit models the aggregate effect of the F1, F2 and F3 +;; pipeline stages on the pipelining of fdiv/fsqrt insns. +;; We also use it to give the actual latency here. +;; fsqrt is actually one cycle faster than fdiv (and the value used here), +;; but that will hardly matter in practice for scheduling. +(define_function_unit "fdiv" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "fdiv")) 120 100) + +;; There is again a late use of the "fp" unit by [d]fdiv type insns +;; that we can't express. + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "dfp_cmp,dfp_conv")) 40 20) + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "dfp_arith")) 80 60) + +(define_function_unit "fp" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 10) + +(define_function_unit "fdiv" 1 0 + (and (eq_attr "issues" "2") (eq_attr "type" "dfdiv")) 230 210) ; Definitions for filling branch delay slots. (define_attr "needs_delay_slot" "yes,no" (const_string "no")) -(define_attr "hit_stack" "yes,no" (const_string "no")) +;; ??? This should be (nil) instead of (const_int 0) +(define_attr "hit_stack" "yes,no" + (cond [(eq (symbol_ref "find_regno_note (insn, REG_INC, 15)") (const_int 0)) + (const_string "no")] + (const_string "yes"))) (define_attr "interrupt_function" "no,yes" (const (symbol_ref "pragma_interrupt"))) @@ -291,6 +482,9 @@ (eq_attr "length" "2") (const_string "yes") ] (const_string "no"))) +(define_attr "is_sfunc" "" + (if_then_else (eq_attr "type" "sfunc") (const_int 1) (const_int 0))) + (define_delay (eq_attr "needs_delay_slot" "yes") [(eq_attr "in_delay_slot" "yes") (nil) (nil)]) @@ -668,7 +862,42 @@ (clobber (reg:SI 17)) (clobber (reg:SI 4)) (use (match_operand:SI 1 "arith_reg_operand" "r"))] - "" + "! TARGET_SH4" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "udivsi3_i4" + [(set (match_operand:SI 0 "register_operand" "=y") + (udiv:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 17)) + (clobber (reg:DF 24)) + (clobber (reg:DF 26)) + (clobber (reg:DF 28)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 4)) + (clobber (reg:SI 5)) + (use (reg:PSI 48)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH4 && ! TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "udivsi3_i4_single" + [(set (match_operand:SI 0 "register_operand" "=y") + (udiv:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 17)) + (clobber (reg:DF 24)) + (clobber (reg:DF 26)) + (clobber (reg:DF 28)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 4)) + (clobber (reg:SI 5)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_HARD_SH4 && TARGET_FPU_SINGLE" "jsr @%1%#" [(set_attr "type" "sfunc") (set_attr "needs_delay_slot" "yes")]) @@ -685,7 +914,22 @@ (clobber (reg:SI 4)) (use (match_dup 3))])] "" - "operands[3] = gen_reg_rtx(SImode);") + " +{ + operands[3] = gen_reg_rtx(SImode); + if (TARGET_HARD_SH4) + { + emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]); + emit_move_insn (gen_rtx (REG, SImode, 5), operands[2]); + emit_move_insn (operands[3], + gen_rtx_SYMBOL_REF (SImode, \"__udivsi3_i4\")); + if (TARGET_FPU_SINGLE) + emit_insn (gen_udivsi3_i4_single (operands[0], operands[3])); + else + emit_insn (gen_udivsi3_i4 (operands[0], operands[3])); + DONE; + } +}") (define_insn "" [(set (match_operand:SI 0 "register_operand" "=z") @@ -696,7 +940,33 @@ (clobber (reg:SI 2)) (clobber (reg:SI 3)) (use (match_operand:SI 1 "arith_reg_operand" "r"))] - "" + "! TARGET_SH4" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "divsi3_i4" + [(set (match_operand:SI 0 "register_operand" "=y") + (div:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 17)) + (clobber (reg:DF 24)) + (clobber (reg:DF 26)) + (use (reg:PSI 48)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_SH4 && ! TARGET_FPU_SINGLE" + "jsr @%1%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "divsi3_i4_single" + [(set (match_operand:SI 0 "register_operand" "=y") + (div:SI (reg:SI 4) (reg:SI 5))) + (clobber (reg:SI 17)) + (clobber (reg:DF 24)) + (clobber (reg:DF 26)) + (clobber (reg:SI 2)) + (use (match_operand:SI 1 "arith_reg_operand" "r"))] + "TARGET_HARD_SH4 && TARGET_FPU_SINGLE" "jsr @%1%#" [(set_attr "type" "sfunc") (set_attr "needs_delay_slot" "yes")]) @@ -715,7 +985,22 @@ (clobber (reg:SI 3)) (use (match_dup 3))])] "" - "operands[3] = gen_reg_rtx(SImode);") + " +{ + operands[3] = gen_reg_rtx(SImode); + if (TARGET_HARD_SH4) + { + emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]); + emit_move_insn (gen_rtx (REG, SImode, 5), operands[2]); + emit_move_insn (operands[3], + gen_rtx_SYMBOL_REF (SImode, \"__sdivsi3_i4\")); + if (TARGET_FPU_SINGLE) + emit_insn (gen_divsi3_i4_single (operands[0], operands[3])); + else + emit_insn (gen_divsi3_i4 (operands[0], operands[3])); + DONE; + } +}") ;; ------------------------------------------------------------------------- ;; Multiplication instructions @@ -782,7 +1067,6 @@ (define_expand "mulsi3_call" [(set (reg:SI 4) (match_operand:SI 1 "general_operand" "")) (set (reg:SI 5) (match_operand:SI 2 "general_operand" "")) - (set (match_dup 3) (symbol_ref:SI "__mulsi3")) (parallel[(set (match_operand:SI 0 "register_operand" "") (mult:SI (reg:SI 4) (reg:SI 5))) @@ -792,9 +1076,9 @@ (clobber (reg:SI 3)) (clobber (reg:SI 2)) (clobber (reg:SI 1)) - (use (match_dup 3))])] + (use (match_operand:SI 3 "register_operand" ""))])] "" - "operands[3] = gen_reg_rtx(SImode);") + "") (define_insn "mul_l" [(set (reg:SI 21) @@ -813,13 +1097,32 @@ "" " { + rtx first, last; + if (!TARGET_SH2) { - FAIL; - /* ??? Does this give worse or better code? */ - emit_insn (gen_mulsi3_call (operands[0], operands[1], operands[2])); - DONE; + /* The address must be set outside the libcall, + since it goes into a pseudo. */ + rtx addr = force_reg (SImode, gen_rtx_SYMBOL_REF (SImode, \"__mulsi3\")); + rtx insns = gen_mulsi3_call (operands[0], operands[1], operands[2], addr); + first = XVECEXP (insns, 0, 0); + last = XVECEXP (insns, 0, XVECLEN (insns, 0) - 1); + emit_insn (insns); + } + else + { + rtx macl = gen_rtx_REG (SImode, MACL_REG); + first = emit_insn (gen_mul_l (operands[1], operands[2])); + emit_insn (gen_movsi_i ((operands[0]), macl)); + /* The sequence must end in a no-op move, lest cse puts macl in its + tables and does invalid substitutions. */ + last = emit_insn (gen_movsi_i ((operands[0]), operands[0])); } + /* Wrap the sequence in REG_LIBCALL / REG_RETVAL notes so that loop + invariant code motion can move it. */ + REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first)); + REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, REG_NOTES (last)); + DONE; }") (define_insn "mulsidi3_i" @@ -1767,50 +2070,65 @@ ;; define push and pop so it is easy for sh.c -(define_insn "push" +(define_expand "push" [(set (mem:SI (pre_dec:SI (reg:SI 15))) (match_operand:SI 0 "register_operand" "r,l,x"))] "" - "@ - mov.l %0,@-r15 - sts.l %0,@-r15 - sts.l %0,@-r15" - [(set_attr "type" "store,pstore,store") - (set_attr "hit_stack" "yes")]) + "") -(define_insn "pop" +(define_expand "pop" [(set (match_operand:SI 0 "register_operand" "=r,l,x") (mem:SI (post_inc:SI (reg:SI 15))))] "" - "@ - mov.l @r15+,%0 - lds.l @r15+,%0 - lds.l @r15+,%0" - [(set_attr "type" "load,pload,load") - (set_attr "hit_stack" "yes")]) + "") + +(define_expand "push_e" + [(parallel [(set (mem:SF (pre_dec:SI (reg:SI 15))) + (match_operand:SF 0 "" "")) + (use (reg:PSI 48)) + (clobber (scratch:SI))])] + "" + "") -(define_insn "push_e" - [(set (mem:SF (pre_dec:SI (reg:SI 15))) - (match_operand:SF 0 "register_operand" "r,f,y"))] +(define_insn "push_fpul" + [(set (mem:SF (pre_dec:SI (reg:SI 15))) (reg:SF 22))] "TARGET_SH3E" - "@ - mov.l %0,@-r15 - fmov.s %0,@-r15 - sts.l %0,@-r15" + "sts.l fpul,@-r15" [(set_attr "type" "store") (set_attr "hit_stack" "yes")]) -(define_insn "pop_e" - [(set (match_operand:SF 0 "register_operand" "=r,f,y") - (mem:SF (post_inc:SI (reg:SI 15))))] +;; DFmode pushes for sh4 require a lot of what is defined for movdf_i4, +;; so use that. +(define_expand "push_4" + [(parallel [(set (mem:DF (pre_dec:SI (reg:SI 15))) (match_operand:DF 0 "" "")) + (use (reg:PSI 48)) + (clobber (scratch:SI))])] + "" + "") + +(define_expand "pop_e" + [(parallel [(set (match_operand:SF 0 "" "") + (mem:SF (post_inc:SI (reg:SI 15)))) + (use (reg:PSI 48)) + (clobber (scratch:SI))])] + "" + "") + +(define_insn "pop_fpul" + [(set (reg:SF 22) (mem:SF (post_inc:SI (reg:SI 15))))] "TARGET_SH3E" - "@ - mov.l @r15+,%0 - fmov.s @r15+,%0 - lds.l @r15+,%0" + "lds.l @r15+,fpul" [(set_attr "type" "load") (set_attr "hit_stack" "yes")]) +(define_expand "pop_4" + [(parallel [(set (match_operand:DF 0 "" "") + (mem:DF (post_inc:SI (reg:SI 15)))) + (use (reg:PSI 48)) + (clobber (scratch:SI))])] + "" + "") + ;; These two patterns can happen as the result of optimization, when ;; comparisons get simplified to a move of zero or 1 into the T reg. ;; They don't disappear completely, because the T reg is a fixed hard reg. @@ -1825,19 +2143,20 @@ "" "sett") -;; t/r is first, so that it will be preferred over r/r when reloading a move -;; of a pseudo-reg into the T reg +;; t/r must come after r/r, lest reload will try to reload stuff like +;; (set (subreg:SI (mem:QI (plus:SI (reg:SI 15 r15) (const_int 12)) 0) 0) +;; (made from (set (subreg:SI (reg:QI 73) 0) ) into T. (define_insn "movsi_i" - [(set (match_operand:SI 0 "general_movdst_operand" "=t,r,r,r,r,r,m,<,<,xl,x,l,r") - (match_operand:SI 1 "general_movsrc_operand" "r,Q,rI,m,xl,t,r,x,l,r,>,>,i"))] + [(set (match_operand:SI 0 "general_movdst_operand" "=r,r,t,r,r,r,m,<,<,xl,x,l,r") + (match_operand:SI 1 "general_movsrc_operand" "Q,rI,r,mr,xl,t,r,x,l,r,>,>,i"))] " ! TARGET_SH3E && (register_operand (operands[0], SImode) || register_operand (operands[1], SImode))" "@ - cmp/pl %1 mov.l %1,%0 mov %1,%0 + cmp/pl %1 mov.l %1,%0 sts %1,%0 movt %0 @@ -1848,7 +2167,7 @@ lds.l %1,%0 lds.l %1,%0 fake %1,%0" - [(set_attr "type" "*,pcload_si,move,load_si,move,move,store,store,pstore,move,load,pload,pcload_si") + [(set_attr "type" "pcload_si,move,*,load_si,move,move,store,store,pstore,move,load,pload,pcload_si") (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*")]) ;; t/r must come after r/r, lest reload will try to reload stuff like @@ -1856,8 +2175,8 @@ ;; ??? This allows moves from macl to fpul to be recognized, but these moves ;; will require a reload. (define_insn "movsi_ie" - [(set (match_operand:SI 0 "general_movdst_operand" "=r,r,t,r,r,r,m,<,<,xl,x,l,r,y,r,y") - (match_operand:SI 1 "general_movsrc_operand" "Q,rI,r,m,xl,t,r,x,l,r,>,>,i,r,y,y"))] + [(set (match_operand:SI 0 "general_movdst_operand" "=r,r,t,r,r,r,m,<,<,xl,x,l,y,r,y,r,y") + (match_operand:SI 1 "general_movsrc_operand" "Q,rI,r,mr,xl,t,r,x,l,r,>,>,>,i,r,y,y"))] "TARGET_SH3E && (register_operand (operands[0], SImode) || register_operand (operands[1], SImode))" @@ -1874,16 +2193,17 @@ lds %1,%0 lds.l %1,%0 lds.l %1,%0 + lds.l %1,%0 fake %1,%0 lds %1,%0 sts %1,%0 ! move optimized away" - [(set_attr "type" "pcload_si,move,*,load_si,move,move,store,store,pstore,move,load,pload,pcload_si,gp_fpul,gp_fpul,nil") - (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")]) + [(set_attr "type" "pcload_si,move,*,load_si,move,move,store,store,pstore,move,load,pload,load,pcload_si,gp_fpul,gp_fpul,nil") + (set_attr "length" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,0")]) (define_insn "movsi_i_lowpart" [(set (strict_low_part (match_operand:SI 0 "general_movdst_operand" "=r,r,r,r,r,m,r")) - (match_operand:SI 1 "general_movsrc_operand" "Q,rI,m,xl,t,r,i"))] + (match_operand:SI 1 "general_movsrc_operand" "Q,rI,mr,xl,t,r,i"))] "register_operand (operands[0], SImode) || register_operand (operands[1], SImode)" "@ @@ -1901,6 +2221,30 @@ "" "{ if (prepare_move_operands (operands, SImode)) DONE; }") +(define_expand "ic_invalidate_line" + [(parallel [(unspec_volatile [(match_operand:SI 0 "register_operand" "+r") + (match_dup 1)] 12) + (clobber (scratch:SI))])] + "TARGET_HARD_SH4" + " +{ + operands[0] = force_reg (Pmode, operands[0]); + operands[1] = force_reg (Pmode, GEN_INT (0xf0000008)); +}") + +;; The address %0 is assumed to be 4-aligned at least. Thus, by ORing +;; 0xf0000008, we get the low-oder bits *1*00 (binary), ;; which fits +;; the requirement *0*00 for associative address writes. The alignment of +;; %0 implies that its least significant bit is cleared, +;; thus we clear the V bit of a matching entry if there is one. +(define_insn "ic_invalidate_line_i" + [(unspec_volatile [(match_operand:SI 0 "register_operand" "r,r") + (match_operand:SI 1 "register_operand" "r,r")] 12) + (clobber (match_scratch:SI 2 "=&r,1"))] + "TARGET_HARD_SH4" + "ocbwb\\t@%0\;extu.w\\t%0,%2\;or\\t%r1,%r2\;mov.l\\t%0,@%2" + [(set_attr "length" "8")]) + (define_insn "movqi_i" [(set (match_operand:QI 0 "general_movdst_operand" "=r,r,m,r,r,l") (match_operand:QI 1 "general_movsrc_operand" "ri,m,r,t,l,r"))] @@ -2014,12 +2358,330 @@ (define_insn "movdf_k" [(set (match_operand:DF 0 "general_movdst_operand" "=r,r,r,m") (match_operand:DF 1 "general_movsrc_operand" "r,FQ,m,r"))] - "arith_reg_operand (operands[0], DFmode) - || arith_reg_operand (operands[1], DFmode)" + "(! TARGET_SH4 || reload_completed + /* ??? We provide some insn so that direct_{load,store}[DFmode] get set */ + || GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3 + || GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3) + && (arith_reg_operand (operands[0], DFmode) + || arith_reg_operand (operands[1], DFmode))" "* return output_movedouble (insn, operands, DFmode);" [(set_attr "length" "4") (set_attr "type" "move,pcload,load,store")]) +;; All alternatives of movdf_i4 are split for ! TARGET_FMOVD. +;; However, the d/F/c/z alternative cannot be split directly; it is converted +;; with special code in machine_dependent_reorg into a load of the R0_REG and +;; the d/m/c/X alternative, which is split later into single-precision +;; instructions. And when not optimizing, no splits are done before fixing +;; up pcloads, so we need usable length information for that. +(define_insn "movdf_i4" + [(set (match_operand:DF 0 "general_movdst_operand" "=d,r,d,d,m,r,r,m,!??r,!???d") + (match_operand:DF 1 "general_movsrc_operand" "d,r,F,m,d,FQ,m,r,d,r")) + (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c")) + (clobber (match_scratch:SI 3 "=X,X,&z,X,X,X,X,X,X,X"))] + "TARGET_SH4 + && (arith_reg_operand (operands[0], DFmode) + || arith_reg_operand (operands[1], DFmode))" + "@ + fmov %1,%0 + # + # + fmov.d %1,%0 + fmov.d %1,%0 + # + # + # + # + #" + [(set_attr_alternative "length" + [(if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 4)) + (const_int 4) + (if_then_else (eq_attr "fmovd" "yes") (const_int 4) (const_int 6)) + (if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6)) + (if_then_else (eq_attr "fmovd" "yes") (const_int 2) (const_int 6)) + (const_int 4) + (const_int 8) (const_int 8) ;; these need only 8 bytes for @(r0,rn) + (const_int 8) (const_int 8)]) + (set_attr "type" "fmove,move,pcload,load,store,pcload,load,store,load,load")]) + +;; Moving DFmode between fp/general registers through memory +;; (the top of the stack) is faster than moving through fpul even for +;; little endian. Because the type of an instruction is important for its +;; scheduling, it is beneficial to split these operations, rather than +;; emitting them in one single chunk, even if this will expose a stack +;; use that will prevent scheduling of other stack accesses beyond this +;; instruction. +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "=X"))] + "TARGET_SH4 && reload_completed + && (true_regnum (operands[0]) < 16) != (true_regnum (operands[1]) < 16)" + [(const_int 0)] + " +{ + rtx insn, tos; + + tos = gen_rtx (MEM, DFmode, gen_rtx (PRE_DEC, Pmode, stack_pointer_rtx)); + insn = emit_insn (gen_movdf_i4 (tos, operands[1], operands[2])); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, stack_pointer_rtx, NULL_RTX); + tos = gen_rtx (MEM, DFmode, gen_rtx (POST_INC, Pmode, stack_pointer_rtx)); + insn = emit_insn (gen_movdf_i4 (operands[0], tos, operands[2])); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, stack_pointer_rtx, NULL_RTX); + DONE; +}") + +;; local-alloc sometimes allocates scratch registers even when not required, +;; so we must be prepared to handle these. + +;; Remove the use and clobber from a movdf_i4 so that we can use movdf_k. +(define_split + [(set (match_operand:DF 0 "general_movdst_operand" "") + (match_operand:DF 1 "general_movsrc_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH4 + && reload_completed + && true_regnum (operands[0]) < 16 + && true_regnum (operands[1]) < 16" + [(set (match_dup 0) (match_dup 1))] + " +{ + /* If this was a reg <-> mem operation with base + index reg addressing, + we have to handle this in a special way. */ + rtx mem = operands[0]; + int store_p = 1; + if (! memory_operand (mem, DFmode)) + { + mem = operands[1]; + store_p = 0; + } + if (GET_CODE (mem) == SUBREG && SUBREG_WORD (mem) == 0) + mem = SUBREG_REG (mem); + if (GET_CODE (mem) == MEM) + { + rtx addr = XEXP (mem, 0); + if (GET_CODE (addr) == PLUS + && GET_CODE (XEXP (addr, 0)) == REG + && GET_CODE (XEXP (addr, 1)) == REG) + { + int offset; + rtx reg0 = gen_rtx (REG, Pmode, 0); + rtx regop = operands[store_p], word0 ,word1; + + if (GET_CODE (regop) == SUBREG) + regop = alter_subreg (regop); + if (REGNO (XEXP (addr, 0)) == REGNO (XEXP (addr, 1))) + offset = 2; + else + offset = 4; + mem = copy_rtx (mem); + PUT_MODE (mem, SImode); + word0 = gen_rtx(SUBREG, SImode, regop, 0); + emit_insn (store_p + ? gen_movsi_ie (mem, word0) : gen_movsi_ie (word0, mem)); + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (offset))); + mem = copy_rtx (mem); + word1 = gen_rtx(SUBREG, SImode, regop, 1); + emit_insn (store_p + ? gen_movsi_ie (mem, word1) : gen_movsi_ie (word1, mem)); + emit_insn (gen_addsi3 (reg0, reg0, GEN_INT (-offset))); + DONE; + } + } +}") + +;; Split away the clobber of r0 after machine_dependent_reorg has fixed pcloads. +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (reg:SI 0))] + "TARGET_SH4 && reload_completed" + [(parallel [(set (match_dup 0) (match_dup 1)) + (use (match_dup 2)) + (clobber (scratch:SI))])] + "") + +(define_expand "reload_indf" + [(parallel [(set (match_operand:DF 0 "register_operand" "=f") + (match_operand:DF 1 "immediate_operand" "FQ")) + (use (reg:PSI 48)) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "" + "") + +(define_expand "reload_outdf" + [(parallel [(set (match_operand:DF 0 "register_operand" "=r,f") + (match_operand:DF 1 "register_operand" "af,r")) + (clobber (match_operand:SI 2 "register_operand" "=&y,y"))])] + "" + "") + +;; Simplify no-op moves. +(define_split + [(set (match_operand:SF 0 "register_operand" "") + (match_operand:SF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH3E && reload_completed + && true_regnum (operands[0]) == true_regnum (operands[1])" + [(set (match_dup 0) (match_dup 0))] + "") + +;; fmovd substitute post-reload splits +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH4 && ! TARGET_FMOVD && reload_completed + && true_regnum (operands[0]) >= FIRST_FP_REG + && true_regnum (operands[1]) >= FIRST_FP_REG" + [(const_int 0)] + " +{ + int dst = true_regnum (operands[0]), src = true_regnum (operands[1]); + emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, dst), + gen_rtx (REG, SFmode, src), operands[2])); + emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, dst + 1), + gen_rtx (REG, SFmode, src + 1), operands[2])); + DONE; +}") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (mem:DF (match_operand:SI 1 "register_operand" ""))) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH4 && ! TARGET_FMOVD && reload_completed + && true_regnum (operands[0]) >= FIRST_FP_REG + && find_regno_note (insn, REG_DEAD, true_regnum (operands[1]))" + [(const_int 0)] + " +{ + int regno = true_regnum (operands[0]); + rtx insn; + rtx mem2 = gen_rtx (MEM, SFmode, gen_rtx (POST_INC, Pmode, operands[1])); + + insn = emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, + regno + !! TARGET_LITTLE_ENDIAN), + mem2, operands[2])); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[1], NULL_RTX); + insn = emit_insn (gen_movsf_ie (gen_rtx (REG, SFmode, + regno + ! TARGET_LITTLE_ENDIAN), + gen_rtx (MEM, SFmode, operands[1]), + operands[2])); + DONE; +}") + +(define_split + [(set (match_operand:DF 0 "register_operand" "") + (match_operand:DF 1 "memory_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH4 && ! TARGET_FMOVD && reload_completed + && true_regnum (operands[0]) >= FIRST_FP_REG" + [(const_int 0)] + " +{ + int regno = true_regnum (operands[0]); + rtx addr, insn, adjust = NULL_RTX; + rtx mem2 = copy_rtx (operands[1]); + rtx reg0 = gen_rtx_REG (SFmode, regno + !! TARGET_LITTLE_ENDIAN); + rtx reg1 = gen_rtx_REG (SFmode, regno + ! TARGET_LITTLE_ENDIAN); + + PUT_MODE (mem2, SFmode); + operands[1] = copy_rtx (mem2); + addr = XEXP (mem2, 0); + if (GET_CODE (addr) != POST_INC) + { + /* If we have to modify the stack pointer, the value that we have + read with post-increment might be modified by an interrupt, + so write it back. */ + if (REGNO (addr) == STACK_POINTER_REGNUM) + adjust = gen_push_e (reg0); + else + adjust = gen_addsi3 (addr, addr, GEN_INT (-4)); + XEXP (mem2, 0) = addr = gen_rtx_POST_INC (SImode, addr); + } + addr = XEXP (addr, 0); + insn = emit_insn (gen_movsf_ie (reg0, mem2, operands[2])); + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, addr, NULL_RTX); + insn = emit_insn (gen_movsf_ie (reg1, operands[1], operands[2])); + if (adjust) + emit_insn (adjust); + else + REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_INC, addr, NULL_RTX); + DONE; +}") + +(define_split + [(set (match_operand:DF 0 "memory_operand" "") + (match_operand:DF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (match_scratch:SI 3 "X"))] + "TARGET_SH4 && ! TARGET_FMOVD && reload_completed + && true_regnum (operands[1]) >= FIRST_FP_REG" + [(const_int 0)] + " +{ + int regno = true_regnum (operands[1]); + rtx insn, addr, adjust = NULL_RTX; + + operands[0] = copy_rtx (operands[0]); + PUT_MODE (operands[0], SFmode); + insn = emit_insn (gen_movsf_ie (operands[0], + gen_rtx (REG, SFmode, + regno + ! TARGET_LITTLE_ENDIAN), + operands[2])); + operands[0] = copy_rtx (operands[0]); + addr = XEXP (operands[0], 0); + if (GET_CODE (addr) != PRE_DEC) + { + adjust = gen_addsi3 (addr, addr, GEN_INT (4)); + emit_insn_before (adjust, insn); + XEXP (operands[0], 0) = addr = gen_rtx (PRE_DEC, SImode, addr); + } + addr = XEXP (addr, 0); + if (! adjust) + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX); + insn = emit_insn (gen_movsf_ie (operands[0], + gen_rtx (REG, SFmode, + regno + !! TARGET_LITTLE_ENDIAN), + operands[2])); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, addr, NULL_RTX); + DONE; +}") + +;; The '&' for operand 2 is not really true, but push_secondary_reload +;; insists on it. +;; Operand 1 must accept FPUL_REGS in case fpul is reloaded to memory, +;; to avoid a bogus tertiary reload. +;; We need a tertiary reload when a floating point register is reloaded +;; to memory, so the predicate for operand 0 must accept this, while the +;; constraint of operand 1 must reject the secondary reload register. +;; Thus, the secondary reload register for this case has to be GENERAL_REGS, +;; too. +;; By having the predicate for operand 0 reject any register, we make +;; sure that the ordinary moves that just need an intermediate register +;; won't get a bogus tertiary reload. +;; We use tertiary_reload_operand instead of memory_operand here because +;; memory_operand rejects operands that are not directly addressible, e.g.: +;; (mem:SF (plus:SI (reg:SI 14 r14) +;; (const_int 132))) + +(define_expand "reload_outsf" + [(parallel [(set (match_operand:SF 2 "register_operand" "=&r") + (match_operand:SF 1 "register_operand" "y")) + (clobber (scratch:SI))]) + (parallel [(set (match_operand:SF 0 "tertiary_reload_operand" "=m") + (match_dup 2)) + (clobber (scratch:SI))])] + "" + "") + ;; If the output is a register and the input is memory or a register, we have ;; to be careful and see which word needs to be loaded first. @@ -2129,14 +2791,33 @@ " { if (prepare_move_operands (operands, DFmode)) DONE; + if (TARGET_SH4) + { + if (no_new_pseudos) + { + /* ??? FIXME: This is only a stopgap fix. There is no guarantee + that fpscr is in the right state. */ + emit_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ())); + DONE; + } + emit_df_insn (gen_movdf_i4 (operands[0], operands[1], get_fpscr_rtx ())); + /* We need something to tag possible REG_LIBCALL notes on to. */ + if (TARGET_FPU_SINGLE && rtx_equal_function_value_matters + && GET_CODE (operands[0]) == REG) + emit_insn (gen_mov_nop (operands[0])); + DONE; + } }") (define_insn "movsf_i" [(set (match_operand:SF 0 "general_movdst_operand" "=r,r,r,r,m,l,r") - (match_operand:SF 1 "general_movsrc_operand" "r,I,FQ,m,r,r,l"))] + (match_operand:SF 1 "general_movsrc_operand" "r,I,FQ,mr,r,r,l"))] " - ! TARGET_SH3E + (! TARGET_SH3E + /* ??? We provide some insn so that direct_{load,store}[SFmode] get set */ + || GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 3 + || GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 3) && (arith_reg_operand (operands[0], SFmode) || arith_reg_operand (operands[1], SFmode))" "@ @@ -2156,8 +2837,9 @@ [(set (match_operand:SF 0 "general_movdst_operand" "=f,r,f,f,fy,f,m,r,r,m,f,y,y,rf,r,y,y") (match_operand:SF 1 "general_movsrc_operand" - "f,r,G,H,FQ,m,f,FQ,m,r,y,f,>,fr,y,r,y")) - (clobber (match_scratch:SI 2 "=X,X,X,X,&z,X,X,X,X,X,X,X,X,y,X,X,X"))] + "f,r,G,H,FQ,mf,f,FQ,mr,r,y,f,>,fr,y,r,y")) + (use (match_operand:PSI 2 "fpscr_operand" "c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c,c")) + (clobber (match_scratch:SI 3 "=X,X,X,X,&z,X,X,X,X,X,X,X,X,y,X,X,X"))] "TARGET_SH3E && (arith_reg_operand (operands[0], SFmode) @@ -2181,16 +2863,19 @@ lds %1,%0 ! move optimized away" [(set_attr "type" "fmove,move,fmove,fmove,pcload,load,store,pcload,load,store,fmove,fmove,load,*,gp_fpul,gp_fpul,nil") - (set_attr "length" "*,*,*,*,4,*,*,*,*,*,2,2,2,*,2,2,0")]) + (set_attr "length" "*,*,*,*,4,*,*,*,*,*,2,2,2,4,2,2,0")]) (define_split [(set (match_operand:SF 0 "register_operand" "") (match_operand:SF 1 "register_operand" "")) + (use (match_operand:PSI 2 "fpscr_operand" "c")) (clobber (reg:SI 22))] "" [(parallel [(set (reg:SF 22) (match_dup 1)) + (use (match_dup 2)) (clobber (scratch:SI))]) (parallel [(set (match_dup 0) (reg:SF 22)) + (use (match_dup 2)) (clobber (scratch:SI))])] "") @@ -2204,17 +2889,70 @@ DONE; if (TARGET_SH3E) { - emit_insn (gen_movsf_ie (operands[0], operands[1])); + if (no_new_pseudos) + { + /* ??? FIXME: This is only a stopgap fix. There is no guarantee + that fpscr is in the right state. */ + emit_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ())); + DONE; + } + emit_sf_insn (gen_movsf_ie (operands[0], operands[1], get_fpscr_rtx ())); + /* We need something to tag possible REG_LIBCALL notes on to. */ + if (! TARGET_FPU_SINGLE && rtx_equal_function_value_matters + && GET_CODE (operands[0]) == REG) + emit_insn (gen_mov_nop (operands[0])); DONE; } }") +(define_insn "mov_nop" + [(set (match_operand 0 "register_operand" "") (match_dup 0))] + "TARGET_SH3E" + "" + [(set_attr "length" "0") + (set_attr "type" "nil")]) + (define_expand "reload_insf" [(parallel [(set (match_operand:SF 0 "register_operand" "=f") (match_operand:SF 1 "immediate_operand" "FQ")) + (use (reg:PSI 48)) + (clobber (match_operand:SI 2 "register_operand" "=&z"))])] + "" + "") + +(define_expand "reload_insi" + [(parallel [(set (match_operand:SF 0 "register_operand" "=y") + (match_operand:SF 1 "immediate_operand" "FQ")) (clobber (match_operand:SI 2 "register_operand" "=&z"))])] "" "") + +(define_insn "*movsi_y" + [(set (match_operand:SI 0 "register_operand" "=y,y") + (match_operand:SI 1 "immediate_operand" "Qi,I")) + (clobber (match_scratch:SI 3 "=&z,r"))] + "TARGET_SH3E + && (reload_in_progress || reload_completed)" + "#" + [(set_attr "length" "4") + (set_attr "type" "pcload,move")]) + +(define_split + [(set (match_operand:SI 0 "register_operand" "y") + (match_operand:SI 1 "immediate_operand" "I")) + (clobber (match_operand:SI 2 "register_operand" "r"))] + "" + [(set (match_dup 2) (match_dup 1)) + (set (match_dup 0) (match_dup 2))] + "") + +(define_split + [(set (match_operand:SI 0 "register_operand" "y") + (match_operand:SI 1 "memory_operand" ">")) + (clobber (reg:SI 0))] + "" + [(set (match_dup 0) (match_dup 1))] + "") ;; ------------------------------------------------------------------------ ;; Define the real conditional branch instructions. @@ -2289,7 +3027,7 @@ "" " { - if (GET_MODE (sh_compare_op0) == SFmode) + if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT) { rtx tmp = sh_compare_op0; sh_compare_op0 = sh_compare_op1; @@ -2396,6 +3134,7 @@ (define_insn "calli" [(call (mem:SI (match_operand:SI 0 "arith_reg_operand" "r")) (match_operand 1 "" "")) + (use (reg:SI 48)) (clobber (reg:SI 17))] "" "jsr @%0%#" @@ -2406,6 +3145,7 @@ [(set (match_operand 0 "" "=rf") (call (mem:SI (match_operand:SI 1 "arith_reg_operand" "r")) (match_operand 2 "" ""))) + (use (reg:SI 48)) (clobber (reg:SI 17))] "" "jsr @%1%#" @@ -2415,6 +3155,7 @@ (define_expand "call" [(parallel [(call (mem:SI (match_operand 0 "arith_reg_operand" "")) (match_operand 1 "" "")) + (use (reg:SI 48)) (clobber (reg:SI 17))])] "" "operands[0] = force_reg (SImode, XEXP (operands[0], 0));") @@ -2423,6 +3164,7 @@ [(parallel [(set (match_operand 0 "arith_reg_operand" "") (call (mem:SI (match_operand 1 "arith_reg_operand" "")) (match_operand 2 "" ""))) + (use (reg:SI 48)) (clobber (reg:SI 17))])] "" "operands[1] = force_reg (SImode, XEXP (operands[1], 0));") @@ -2656,9 +3398,16 @@ }" [(set_attr "length" "4")]) +;; ??? This is not the proper place to invoke another compiler pass; +;; Alas, there is no proper place to put it. +;; ??? This is also an odd place for the call to emit_fpscr_use. It +;; would be all right if it were for an define_expand for return, but +;; that doesn't mix with emitting a prologue. (define_insn "return" [(return)] - "reload_completed" + "emit_fpscr_use (), + remove_dead_before_cse (), + reload_completed" "%@ %#" [(set_attr "type" "return") (set_attr "needs_delay_slot" "yes")]) @@ -2726,19 +3475,15 @@ "" " { - if (GET_MODE (sh_compare_op0) == SFmode) + if (GET_MODE_CLASS (GET_MODE (sh_compare_op0)) == MODE_FLOAT) { if (TARGET_IEEE) { rtx t_reg = gen_rtx (REG, SImode, T_REG); rtx lab = gen_label_rtx (); - emit_insn (gen_rtx (SET, VOIDmode, t_reg, - gen_rtx (EQ, SImode, sh_compare_op0, - sh_compare_op1))); + prepare_scc_operands (EQ); emit_jump_insn (gen_branch_true (lab)); - emit_insn (gen_rtx (SET, VOIDmode, t_reg, - gen_rtx (GT, SImode, sh_compare_op0, - sh_compare_op1))); + prepare_scc_operands (GT); emit_label (lab); emit_insn (gen_movt (operands[0])); } @@ -2963,7 +3708,7 @@ (use (match_operand:SI 0 "arith_reg_operand" "r")) (clobber (reg:SI 17)) (clobber (reg:SI 0))])] - "" + "! TARGET_HARD_SH4" "jsr @%0%#" [(set_attr "type" "sfunc") (set_attr "needs_delay_slot" "yes")]) @@ -2978,7 +3723,38 @@ (clobber (reg:SI 5)) (clobber (reg:SI 6)) (clobber (reg:SI 0))])] - "" + "! TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_move_real_i4" + [(parallel [(set (mem:BLK (reg:SI 4)) + (mem:BLK (reg:SI 5))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (clobber (reg:SI 17)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2))])] + "TARGET_HARD_SH4" + "jsr @%0%#" + [(set_attr "type" "sfunc") + (set_attr "needs_delay_slot" "yes")]) + +(define_insn "block_lump_real_i4" + [(parallel [(set (mem:BLK (reg:SI 4)) + (mem:BLK (reg:SI 5))) + (use (match_operand:SI 0 "arith_reg_operand" "r")) + (use (reg:SI 6)) + (clobber (reg:SI 17)) + (clobber (reg:SI 4)) + (clobber (reg:SI 5)) + (clobber (reg:SI 6)) + (clobber (reg:SI 0)) + (clobber (reg:SI 1)) + (clobber (reg:SI 2)) + (clobber (reg:SI 3))])] + "TARGET_HARD_SH4" "jsr @%0%#" [(set_attr "type" "sfunc") (set_attr "needs_delay_slot" "yes")]) @@ -2989,43 +3765,188 @@ ;; ??? All patterns should have a type attribute. -(define_insn "addsf3" +(define_expand "fpu_switch0" + [(set (match_operand:SI 0 "" "") (symbol_ref "__fpscr_values")) + (set (match_dup 2) (match_dup 1))] + "" + " +{ + operands[1] = gen_rtx (MEM, PSImode, operands[0]); + RTX_UNCHANGING_P (operands[1]) = 1; + operands[2] = get_fpscr_rtx (); +}") + +(define_expand "fpu_switch1" + [(set (match_operand:SI 0 "" "") (symbol_ref "__fpscr_values")) + (set (match_dup 1) (plus:SI (match_dup 0) (const_int 4))) + (set (match_dup 3) (match_dup 2))] + "" + " +{ + operands[1] = gen_reg_rtx (SImode); + operands[2] = gen_rtx (MEM, PSImode, operands[1]); + RTX_UNCHANGING_P (operands[2]) = 1; + operands[3] = get_fpscr_rtx (); +}") + +(define_expand "movpsi" + [(set (match_operand:PSI 0 "register_operand" "") + (match_operand:PSI 1 "general_movsrc_operand" ""))] + "" + "") + +;; The c / m alternative is a fake to guide reload to load directly into +;; fpscr, since reload doesn't know how to use post-increment. +;; GO_IF_LEGITIMATE_ADDRESS guards about bogus addresses before reload, +;; SECONDARY_INPUT_RELOAD_CLASS does this during reload, and the insn's +;; predicate after reload. +;; The gp_fpul type for r/!c might look a bit odd, but it actually schedules +;; like a gpr <-> fpul move. +(define_insn "fpu_switch" + [(set (match_operand:PSI 0 "register_operand" "c,c,r,c,c,r,m,r") + (match_operand:PSI 1 "general_movsrc_operand" "c,>,m,m,r,r,r,!c"))] + "! reload_completed + || true_regnum (operands[0]) != FPSCR_REG || GET_CODE (operands[1]) != MEM + || GET_CODE (XEXP (operands[1], 0)) != PLUS" + "@ + ! precision stays the same + lds.l %1,fpscr + mov.l %1,%0 + # + lds %1,fpscr + mov %1,%0 + mov.l %1,%0 + sts fpscr,%0" + [(set_attr "length" "0,2,2,4,2,2,2,2") + (set_attr "type" "dfp_conv,dfp_conv,load,dfp_conv,dfp_conv,move,store,gp_fpul")]) + +(define_split + [(set (reg:PSI 48) (mem:PSI (match_operand:SI 0 "register_operand" "r")))] + "find_regno_note (insn, REG_DEAD, true_regnum (operands[0]))" + [(set (match_dup 0) (match_dup 0))] + " +{ + rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (), + gen_rtx (MEM, PSImode, + gen_rtx (POST_INC, Pmode, + operands[0])))); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[0], NULL_RTX); +}") + +(define_split + [(set (reg:PSI 48) (mem:PSI (match_operand:SI 0 "register_operand" "r")))] + "" + [(set (match_dup 0) (plus:SI (match_dup 0) (const_int -4)))] + " +{ + rtx insn = emit_insn (gen_fpu_switch (get_fpscr_rtx (), + gen_rtx (MEM, PSImode, + gen_rtx (POST_INC, Pmode, + operands[0])))); + REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_INC, operands[0], NULL_RTX); +}") + +;; ??? This uses the fp unit, but has no type indicating that. +;; If we did that, this would either give a bogus latency or introduce +;; a bogus FIFO constraint. +;; Since this insn is currently only used for prologues/epilogues, +;; it is probably best to claim no function unit, which matches the +;; current setting. +(define_insn "toggle_sz" + [(set (reg:PSI 48) (xor:PSI (reg:PSI 48) (const_int 1048576)))] + "TARGET_SH4" + "fschg") + +(define_expand "addsf3" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "") + (match_operand:SF 2 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_binop (&gen_addsf3_i, operands); DONE; }") + +(define_insn "addsf3_i" [(set (match_operand:SF 0 "arith_reg_operand" "=f") (plus:SF (match_operand:SF 1 "arith_reg_operand" "%0") - (match_operand:SF 2 "arith_reg_operand" "f")))] + (match_operand:SF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] "TARGET_SH3E" "fadd %2,%0" [(set_attr "type" "fp")]) -(define_insn "subsf3" - [(set (match_operand:SF 0 "arith_reg_operand" "=f") - (minus:SF (match_operand:SF 1 "arith_reg_operand" "0") - (match_operand:SF 2 "arith_reg_operand" "f")))] +(define_expand "subsf3" + [(match_operand:SF 0 "fp_arith_reg_operand" "") + (match_operand:SF 1 "fp_arith_reg_operand" "") + (match_operand:SF 2 "fp_arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_binop (&gen_subsf3_i, operands); DONE; }") + +(define_insn "subsf3_i" + [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f") + (minus:SF (match_operand:SF 1 "fp_arith_reg_operand" "0") + (match_operand:SF 2 "fp_arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] "TARGET_SH3E" "fsub %2,%0" [(set_attr "type" "fp")]) -(define_insn "mulsf3" +;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR +;; register in feeding fp instructions. Thus, we cannot generate fmac for +;; mixed-precision SH4 targets. To allow it to be still generated for the +;; SH3E, we use a separate insn for SH3E mulsf3. + +(define_expand "mulsf3" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "") + (match_operand:SF 2 "arith_reg_operand" "")] + "TARGET_SH3E" + " +{ + if (TARGET_SH4) + expand_sf_binop (&gen_mulsf3_i4, operands); + else + emit_insn (gen_mulsf3_ie (operands[0], operands[1], operands[2])); + DONE; +}") + +(define_insn "mulsf3_i4" [(set (match_operand:SF 0 "arith_reg_operand" "=f") (mult:SF (match_operand:SF 1 "arith_reg_operand" "%0") - (match_operand:SF 2 "arith_reg_operand" "f")))] + (match_operand:SF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] "TARGET_SH3E" "fmul %2,%0" [(set_attr "type" "fp")]) +(define_insn "mulsf3_ie" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (mult:SF (match_operand:SF 1 "arith_reg_operand" "%0") + (match_operand:SF 2 "arith_reg_operand" "f")))] + "TARGET_SH3E && ! TARGET_SH4" + "fmul %2,%0" + [(set_attr "type" "fp")]) + (define_insn "*macsf3" [(set (match_operand:SF 0 "arith_reg_operand" "=f") (plus:SF (mult:SF (match_operand:SF 1 "arith_reg_operand" "%w") (match_operand:SF 2 "arith_reg_operand" "f")) - (match_operand:SF 3 "arith_reg_operand" "0")))] - "TARGET_SH3E" + (match_operand:SF 3 "arith_reg_operand" "0"))) + (use (match_operand:PSI 4 "fpscr_operand" "c"))] + "TARGET_SH3E && ! TARGET_SH4" "fmac fr0,%2,%0" [(set_attr "type" "fp")]) -(define_insn "divsf3" +(define_expand "divsf3" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "") + (match_operand:SF 2 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_binop (&gen_divsf3_i, operands); DONE; }") + +(define_insn "divsf3_i" [(set (match_operand:SF 0 "arith_reg_operand" "=f") (div:SF (match_operand:SF 1 "arith_reg_operand" "0") - (match_operand:SF 2 "arith_reg_operand" "f")))] + (match_operand:SF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] "TARGET_SH3E" "fdiv %2,%0" [(set_attr "type" "fdiv")]) @@ -3033,15 +3954,34 @@ (define_expand "floatsisf2" [(set (reg:SI 22) (match_operand:SI 1 "arith_reg_operand" "")) - (set (match_operand:SF 0 "arith_reg_operand" "") - (float:SF (reg:SI 22)))] + (parallel [(set (match_operand:SF 0 "arith_reg_operand" "") + (float:SF (reg:SI 22))) + (use (match_dup 2))])] "TARGET_SH3E" - "") + " +{ + if (TARGET_SH4) + { + emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 22), + operands[1])); + emit_sf_insn (gen_floatsisf2_i4 (operands[0], get_fpscr_rtx ())); + DONE; + } + operands[2] = get_fpscr_rtx (); +}") + +(define_insn "floatsisf2_i4" + [(set (match_operand:SF 0 "arith_reg_operand" "=f") + (float:SF (reg:SI 22))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH3E" + "float fpul,%0" + [(set_attr "type" "fp")]) (define_insn "*floatsisf2_ie" [(set (match_operand:SF 0 "arith_reg_operand" "=f") (float:SF (reg:SI 22)))] - "TARGET_SH3E" + "TARGET_SH3E && ! TARGET_SH4" "float fpul,%0" [(set_attr "type" "fp")]) @@ -3051,26 +3991,62 @@ (set (match_operand:SI 0 "arith_reg_operand" "=r") (reg:SI 22))] "TARGET_SH3E" - "") + " +{ + if (TARGET_SH4) + { + emit_sf_insn (gen_fix_truncsfsi2_i4 (operands[1], get_fpscr_rtx ())); + emit_insn (gen_rtx (SET, VOIDmode, operands[0], + gen_rtx (REG, SImode, 22))); + DONE; + } +}") + +(define_insn "fix_truncsfsi2_i4" + [(set (reg:SI 22) + (fix:SI (match_operand:SF 0 "arith_reg_operand" "f"))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH4" + "ftrc %0,fpul" + [(set_attr "type" "fp")]) + +(define_insn "fix_truncsfsi2_i4_2" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (fix:SI (match_operand:SF 1 "arith_reg_operand" "f"))) + (use (reg:SI 48)) + (clobber (reg:SI 22))] + "TARGET_SH4" + "#" + [(set_attr "length" "4")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (fix:SI (match_operand:SF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (reg:SI 22))] + "TARGET_SH4" + [(parallel [(set (reg:SI 22) (fix:SI (match_dup 1))) + (use (match_dup 2))]) + (set (match_dup 0) (reg:SI 22))]) (define_insn "*fixsfsi" [(set (reg:SI 22) (fix:SI (match_operand:SF 0 "arith_reg_operand" "f")))] - "TARGET_SH3E" + "TARGET_SH3E && ! TARGET_SH4" "ftrc %0,fpul" [(set_attr "type" "fp")]) (define_insn "cmpgtsf_t" [(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f") (match_operand:SF 1 "arith_reg_operand" "f")))] - "TARGET_SH3E" + "TARGET_SH3E && ! TARGET_SH4" "fcmp/gt %1,%0" [(set_attr "type" "fp")]) (define_insn "cmpeqsf_t" [(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f") (match_operand:SF 1 "arith_reg_operand" "f")))] - "TARGET_SH3E" + "TARGET_SH3E && ! TARGET_SH4" "fcmp/eq %1,%0" [(set_attr "type" "fp")]) @@ -3078,11 +4054,36 @@ [(set (reg:SI 18) (ior:SI (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f") (match_operand:SF 1 "arith_reg_operand" "f"))))] - "TARGET_SH3E && TARGET_IEEE" + "TARGET_SH3E && TARGET_IEEE && ! TARGET_SH4" "* return output_ieee_ccmpeq (insn, operands);" [(set_attr "length" "4")]) +(define_insn "cmpgtsf_t_i4" + [(set (reg:SI 18) (gt:SI (match_operand:SF 0 "arith_reg_operand" "f") + (match_operand:SF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcmp/gt %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "cmpeqsf_t_i4" + [(set (reg:SI 18) (eq:SI (match_operand:SF 0 "arith_reg_operand" "f") + (match_operand:SF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcmp/eq %1,%0" + [(set_attr "type" "fp")]) + +(define_insn "*ieee_ccmpeqsf_t_4" + [(set (reg:SI 18) (ior:SI (reg:SI 18) + (eq:SI (match_operand:SF 0 "arith_reg_operand" "f") + (match_operand:SF 1 "arith_reg_operand" "f")))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_IEEE && TARGET_SH4" + "* return output_ieee_ccmpeq (insn, operands);" + [(set_attr "length" "4")]) + (define_expand "cmpsf" [(set (reg:SI 18) (compare (match_operand:SF 0 "arith_operand" "") (match_operand:SF 1 "arith_operand" "")))] @@ -3094,25 +4095,285 @@ DONE; }") -(define_insn "negsf2" +(define_expand "negsf2" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_unop (&gen_negsf2_i, operands); DONE; }") + +(define_insn "negsf2_i" [(set (match_operand:SF 0 "arith_reg_operand" "=f") - (neg:SF (match_operand:SF 1 "arith_reg_operand" "0")))] + (neg:SF (match_operand:SF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] "TARGET_SH3E" "fneg %0" - [(set_attr "type" "fp")]) + [(set_attr "type" "fmove")]) -(define_insn "sqrtsf2" +(define_expand "sqrtsf2" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_unop (&gen_sqrtsf2_i, operands); DONE; }") + +(define_insn "sqrtsf2_i" [(set (match_operand:SF 0 "arith_reg_operand" "=f") - (sqrt:SF (match_operand:SF 1 "arith_reg_operand" "0")))] + (sqrt:SF (match_operand:SF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] "TARGET_SH3E" "fsqrt %0" [(set_attr "type" "fdiv")]) -(define_insn "abssf2" +(define_expand "abssf2" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "")] + "TARGET_SH3E" + "{ expand_sf_unop (&gen_abssf2_i, operands); DONE; }") + +(define_insn "abssf2_i" [(set (match_operand:SF 0 "arith_reg_operand" "=f") - (abs:SF (match_operand:SF 1 "arith_reg_operand" "0")))] + (abs:SF (match_operand:SF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] "TARGET_SH3E" "fabs %0" + [(set_attr "type" "fmove")]) + +(define_expand "adddf3" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "") + (match_operand:DF 2 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_binop (&gen_adddf3_i, operands); DONE; }") + +(define_insn "adddf3_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (plus:DF (match_operand:DF 1 "arith_reg_operand" "%0") + (match_operand:DF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4" + "fadd %2,%0" + [(set_attr "type" "dfp_arith")]) + +(define_expand "subdf3" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "") + (match_operand:DF 2 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_binop (&gen_subdf3_i, operands); DONE; }") + +(define_insn "subdf3_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (minus:DF (match_operand:DF 1 "arith_reg_operand" "0") + (match_operand:DF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4" + "fsub %2,%0" + [(set_attr "type" "dfp_arith")]) + +(define_expand "muldf3" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "") + (match_operand:DF 2 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_binop (&gen_muldf3_i, operands); DONE; }") + +(define_insn "muldf3_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (mult:DF (match_operand:DF 1 "arith_reg_operand" "%0") + (match_operand:DF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4" + "fmul %2,%0" + [(set_attr "type" "dfp_arith")]) + +(define_expand "divdf3" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "") + (match_operand:DF 2 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_binop (&gen_divdf3_i, operands); DONE; }") + +(define_insn "divdf3_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (div:DF (match_operand:DF 1 "arith_reg_operand" "0") + (match_operand:DF 2 "arith_reg_operand" "f"))) + (use (match_operand:PSI 3 "fpscr_operand" "c"))] + "TARGET_SH4" + "fdiv %2,%0" + [(set_attr "type" "dfdiv")]) + +(define_expand "floatsidf2" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:SI 1 "arith_reg_operand" "")] + "TARGET_SH4" + " +{ + emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (REG, SImode, 22), operands[1])); + emit_df_insn (gen_floatsidf2_i (operands[0], get_fpscr_rtx ())); + DONE; +}") + +(define_insn "floatsidf2_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (float:DF (reg:SI 22))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH4" + "float fpul,%0" + [(set_attr "type" "dfp_conv")]) + +(define_expand "fix_truncdfsi2" + [(match_operand:SI 0 "arith_reg_operand" "=r") + (match_operand:DF 1 "arith_reg_operand" "f")] + "TARGET_SH4" + " +{ + emit_df_insn (gen_fix_truncdfsi2_i (operands[1], get_fpscr_rtx ())); + emit_insn (gen_rtx (SET, VOIDmode, operands[0], gen_rtx (REG, SImode, 22))); + DONE; +}") + +(define_insn "fix_truncdfsi2_i" + [(set (reg:SI 22) + (fix:SI (match_operand:DF 0 "arith_reg_operand" "f"))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH4" + "ftrc %0,fpul" + [(set_attr "type" "dfp_conv")]) + +(define_insn "fix_truncdfsi2_i4" + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (fix:SI (match_operand:DF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (reg:SI 22))] + "TARGET_SH4" + "#" + [(set_attr "length" "4")]) + +(define_split + [(set (match_operand:SI 0 "arith_reg_operand" "=r") + (fix:SI (match_operand:DF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c")) + (clobber (reg:SI 22))] + "TARGET_SH4" + [(parallel [(set (reg:SI 22) (fix:SI (match_dup 1))) + (use (match_dup 2))]) + (set (match_dup 0) (reg:SI 22))]) + +(define_insn "cmpgtdf_t" + [(set (reg:SI 18) (gt:SI (match_operand:DF 0 "arith_reg_operand" "f") + (match_operand:DF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcmp/gt %1,%0" + [(set_attr "type" "dfp_cmp")]) + +(define_insn "cmpeqdf_t" + [(set (reg:SI 18) (eq:SI (match_operand:DF 0 "arith_reg_operand" "f") + (match_operand:DF 1 "arith_reg_operand" "f"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcmp/eq %1,%0" + [(set_attr "type" "dfp_cmp")]) + +(define_insn "*ieee_ccmpeqdf_t" + [(set (reg:SI 18) (ior:SI (reg:SI 18) + (eq:SI (match_operand:DF 0 "arith_reg_operand" "f") + (match_operand:DF 1 "arith_reg_operand" "f")))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_IEEE && TARGET_SH4" + "* return output_ieee_ccmpeq (insn, operands);" + [(set_attr "length" "4")]) + +(define_expand "cmpdf" + [(set (reg:SI 18) (compare (match_operand:DF 0 "arith_operand" "") + (match_operand:DF 1 "arith_operand" "")))] + "TARGET_SH4" + " +{ + sh_compare_op0 = operands[0]; + sh_compare_op1 = operands[1]; + DONE; +}") + +(define_expand "negdf2" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_unop (&gen_negdf2_i, operands); DONE; }") + +(define_insn "negdf2_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (neg:DF (match_operand:DF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fneg %0" + [(set_attr "type" "fmove")]) + +(define_expand "sqrtdf2" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_unop (&gen_sqrtdf2_i, operands); DONE; }") + +(define_insn "sqrtdf2_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (sqrt:DF (match_operand:DF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fsqrt %0" + [(set_attr "type" "dfdiv")]) + +(define_expand "absdf2" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "")] + "TARGET_SH4" + "{ expand_df_unop (&gen_absdf2_i, operands); DONE; }") + +(define_insn "absdf2_i" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (abs:DF (match_operand:DF 1 "arith_reg_operand" "0"))) + (use (match_operand:PSI 2 "fpscr_operand" "c"))] + "TARGET_SH4" + "fabs %0" + [(set_attr "type" "fmove")]) + +(define_expand "extendsfdf2" + [(match_operand:DF 0 "arith_reg_operand" "") + (match_operand:SF 1 "arith_reg_operand" "")] + "TARGET_SH4" + " +{ + emit_sf_insn (gen_movsf_ie (gen_rtx (REG, SFmode, 22), operands[1], + get_fpscr_rtx ())); + emit_df_insn (gen_extendsfdf2_i4 (operands[0], get_fpscr_rtx ())); + DONE; +}") + +(define_insn "extendsfdf2_i4" + [(set (match_operand:DF 0 "arith_reg_operand" "=f") + (float_extend:DF (reg:SF 22))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcnvsd fpul,%0" + [(set_attr "type" "fp")]) + +(define_expand "truncdfsf2" + [(match_operand:SF 0 "arith_reg_operand" "") + (match_operand:DF 1 "arith_reg_operand" "")] + "TARGET_SH4" + " +{ + emit_df_insn (gen_truncdfsf2_i4 (operands[1], get_fpscr_rtx ())); + emit_sf_insn (gen_movsf_ie (operands[0], gen_rtx (REG, SFmode, 22), + get_fpscr_rtx ())); + DONE; +}") + +(define_insn "truncdfsf2_i4" + [(set (reg:SF 22) + (float_truncate:SF (match_operand:DF 0 "arith_reg_operand" "f"))) + (use (match_operand:PSI 1 "fpscr_operand" "c"))] + "TARGET_SH4" + "fcnvds %0,fpul" [(set_attr "type" "fp")]) ;; Bit field extract patterns. These give better code for packed bitfields, |