diff options
author | dje <dje@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-06-06 02:32:27 +0000 |
---|---|---|
committer | dje <dje@138bc75d-0d04-0410-961f-82ee72b054a4> | 2005-06-06 02:32:27 +0000 |
commit | f782d9e9cb3c9d1e0733dcd0d07bd510c56151db (patch) | |
tree | 0f1235f2b78072790a5552cb4f522f677335fbe8 /gcc/config | |
parent | c6448de0fc63fbb1b4080aa98a009fefb6ecb490 (diff) |
* config/rs6000/rs6000.c (rs6000_emit_swdivsf): New function.
(rs6000_emit_swdivdf): New function.
* config/rs6000/rs6000.md (fres): New pattern.
(divsf3): Add approximation through rs6000_emit_swdivsf.
(fred): New pattern.
(divdf3): Add approximation through rs6000_emit_swdivdf.
* config/rs6000/rs6000-protos.h (rs6000_emit_swdivsf): Declare.
(rs6000_emit_swdivdf): Declare.
* config/rs6000/rs6000.opt (mswdiv): New option.
* doc/invoke.texi (RS/6000 and PowerPC Options): Document mswdiv.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@100645 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 103 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 33 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.opt | 4 |
4 files changed, 140 insertions, 2 deletions
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 63ffe4fb9fc..041169bde81 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -82,6 +82,8 @@ extern int rs6000_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx); extern void rs6000_emit_minmax (rtx, enum rtx_code, rtx, rtx); extern void rs6000_emit_sync (enum rtx_code, enum machine_mode, rtx, rtx, rtx, rtx, bool); +extern void rs6000_emit_swdivsf (rtx, rtx, rtx); +extern void rs6000_emit_swdivdf (rtx, rtx, rtx); extern void output_toc (FILE *, rtx, int, enum machine_mode); extern void rs6000_initialize_trampoline (rtx, rtx, rtx); extern rtx rs6000_longcall_ref (rtx); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 250644511b4..9626885a664 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -17955,6 +17955,109 @@ rs6000_memory_move_cost (enum machine_mode mode, enum reg_class class, return 4 + rs6000_register_move_cost (mode, class, GENERAL_REGS); } +/* Newton-Raphson approximation of single-precision floating point divide n/d. + Assumes no trapping math and finite arguments. */ + +void +rs6000_emit_swdivsf (rtx res, rtx n, rtx d) +{ + rtx x0, e0, e1, y1, u0, v0, one; + + x0 = gen_reg_rtx (SFmode); + e0 = gen_reg_rtx (SFmode); + e1 = gen_reg_rtx (SFmode); + y1 = gen_reg_rtx (SFmode); + u0 = gen_reg_rtx (SFmode); + v0 = gen_reg_rtx (SFmode); + one = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconst1, SFmode)); + + /* x0 = 1./d estimate */ + emit_insn (gen_rtx_SET (VOIDmode, x0, + gen_rtx_UNSPEC (SFmode, gen_rtvec (1, d), + UNSPEC_FRES))); + /* e0 = 1. - d * x0 */ + emit_insn (gen_rtx_SET (VOIDmode, e0, + gen_rtx_MINUS (SFmode, one, + gen_rtx_MULT (SFmode, d, x0)))); + /* e1 = e0 + e0 * e0 */ + emit_insn (gen_rtx_SET (VOIDmode, e1, + gen_rtx_PLUS (SFmode, + gen_rtx_MULT (SFmode, e0, e0), e0))); + /* y1 = x0 + e1 * x0 */ + emit_insn (gen_rtx_SET (VOIDmode, y1, + gen_rtx_PLUS (SFmode, + gen_rtx_MULT (SFmode, e1, x0), x0))); + /* u0 = n * y1 */ + emit_insn (gen_rtx_SET (VOIDmode, u0, + gen_rtx_MULT (SFmode, n, y1))); + /* v0 = n - d * u0 */ + emit_insn (gen_rtx_SET (VOIDmode, v0, + gen_rtx_MINUS (SFmode, n, + gen_rtx_MULT (SFmode, d, u0)))); + /* res = u0 + v0 * y1 */ + emit_insn (gen_rtx_SET (VOIDmode, res, + gen_rtx_PLUS (SFmode, + gen_rtx_MULT (SFmode, v0, y1), u0))); +} + +/* Newton-Raphson approximation of double-precision floating point divide n/d. + Assumes no trapping math and finite arguments. */ + +void +rs6000_emit_swdivdf (rtx res, rtx n, rtx d) +{ + rtx x0, e0, e1, e2, y1, y2, y3, u0, v0, one; + + x0 = gen_reg_rtx (DFmode); + e0 = gen_reg_rtx (DFmode); + e1 = gen_reg_rtx (DFmode); + e2 = gen_reg_rtx (DFmode); + y1 = gen_reg_rtx (DFmode); + y2 = gen_reg_rtx (DFmode); + y3 = gen_reg_rtx (DFmode); + u0 = gen_reg_rtx (DFmode); + v0 = gen_reg_rtx (DFmode); + one = force_reg (DFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconst1, DFmode)); + + /* x0 = 1./d estimate */ + emit_insn (gen_rtx_SET (VOIDmode, x0, + gen_rtx_UNSPEC (DFmode, gen_rtvec (1, d), + UNSPEC_FRES))); + /* e0 = 1. - d * x0 */ + emit_insn (gen_rtx_SET (VOIDmode, e0, + gen_rtx_MINUS (DFmode, one, + gen_rtx_MULT (SFmode, d, x0)))); + /* y1 = x0 + e0 * x0 */ + emit_insn (gen_rtx_SET (VOIDmode, y1, + gen_rtx_PLUS (DFmode, + gen_rtx_MULT (DFmode, e0, x0), x0))); + /* e1 = e0 * e0 */ + emit_insn (gen_rtx_SET (VOIDmode, e1, + gen_rtx_MULT (DFmode, e0, e0))); + /* y2 = y1 + e1 * y1 */ + emit_insn (gen_rtx_SET (VOIDmode, y2, + gen_rtx_PLUS (DFmode, + gen_rtx_MULT (DFmode, e1, y1), y1))); + /* e2 = e1 * e1 */ + emit_insn (gen_rtx_SET (VOIDmode, e2, + gen_rtx_MULT (DFmode, e1, e1))); + /* y3 = y2 + e2 * y2 */ + emit_insn (gen_rtx_SET (VOIDmode, y3, + gen_rtx_PLUS (DFmode, + gen_rtx_MULT (DFmode, e2, y2), y2))); + /* u0 = n * y3 */ + emit_insn (gen_rtx_SET (VOIDmode, u0, + gen_rtx_MULT (DFmode, n, y3))); + /* v0 = n - d * u0 */ + emit_insn (gen_rtx_SET (VOIDmode, v0, + gen_rtx_MINUS (DFmode, n, + gen_rtx_MULT (DFmode, d, u0)))); + /* res = u0 + v0 * y3 */ + emit_insn (gen_rtx_SET (VOIDmode, res, + gen_rtx_PLUS (DFmode, + gen_rtx_MULT (DFmode, v0, y3), u0))); +} + /* Return an RTX representing where to find the function value of a function returning MODE. */ static rtx diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 97ac84ba753..633dd7c1ca3 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -58,6 +58,7 @@ (UNSPEC_LWSYNC 36) (UNSPEC_ISYNC 37) (UNSPEC_POPCNTB 38) + (UNSPEC_FRES 39) ]) ;; @@ -4640,12 +4641,26 @@ "{fm|fmul} %0,%1,%2" [(set_attr "type" "dmul")]) +(define_insn "fres" + [(set (match_operand:SF 0 "gpc_reg_operand" "=f") + (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))] + "TARGET_PPC_GFXOPT && flag_finite_math_only" + "fres %0,%1" + [(set_attr "type" "fp")]) + (define_expand "divsf3" [(set (match_operand:SF 0 "gpc_reg_operand" "") (div:SF (match_operand:SF 1 "gpc_reg_operand" "") (match_operand:SF 2 "gpc_reg_operand" "")))] "TARGET_HARD_FLOAT" - "") +{ + if (swdiv && !optimize_size && TARGET_PPC_GFXOPT + && flag_finite_math_only && !flag_trapping_math) + { + rs6000_emit_swdivsf (operands[0], operands[1], operands[2]); + DONE; + } +}) (define_insn "" [(set (match_operand:SF 0 "gpc_reg_operand" "=f") @@ -5028,12 +5043,26 @@ "{fm|fmul} %0,%1,%2" [(set_attr "type" "dmul")]) +(define_insn "fred" + [(set (match_operand:DF 0 "gpc_reg_operand" "=f") + (unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRES))] + "TARGET_POPCNTB && flag_finite_math_only" + "fre %0,%1" + [(set_attr "type" "fp")]) + (define_expand "divdf3" [(set (match_operand:DF 0 "gpc_reg_operand" "") (div:DF (match_operand:DF 1 "gpc_reg_operand" "") (match_operand:DF 2 "gpc_reg_operand" "")))] "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)" - "") +{ + if (swdiv && !optimize_size && TARGET_POPCNTB + && flag_finite_math_only && !flag_trapping_math) + { + rs6000_emit_swdivdf (operands[0], operands[1], operands[2]); + DONE; + } +}) (define_insn "*divdf3_fpr" [(set (match_operand:DF 0 "gpc_reg_operand" "=f") diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index fd5d3b09338..7a08c678462 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -123,6 +123,10 @@ mxl-compat Target Report Var(TARGET_XL_COMPAT) Conform more closely to IBM XLC semantics +mswdiv +Target Report Var(swdiv) +Generate software floating point divide for better throughput + mno-fp-in-toc Target Report RejectNegative Mask(NO_FP_IN_TOC) Do not place floating point constants in TOC |