aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Meissner <meissner@linux.vnet.ibm.com>2010-04-01 17:20:58 +0000
committerMichael Meissner <meissner@linux.vnet.ibm.com>2010-04-01 17:20:58 +0000
commit0c831bb685c6872226b653c8802be6237cfa9bd8 (patch)
treedbcd4ef24ee3d53e1f19184e7df9d4d5f3f3ae4e
parent3c3686b95104e5042ba8d046bcd158e521f54378 (diff)
Add -mrecip changes to make gromacs faster
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/ibm/gcc-4_4-branch@157917 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--gcc/ChangeLog.ibm35
-rw-r--r--gcc/config/rs6000/rs6000-builtin.def1
-rw-r--r--gcc/config/rs6000/rs6000-protos.h2
-rw-r--r--gcc/config/rs6000/rs6000.c153
-rw-r--r--gcc/config/rs6000/rs6000.md22
-rw-r--r--gcc/config/rs6000/rs6000.opt9
-rw-r--r--gcc/config/rs6000/vsx.md4
-rw-r--r--gcc/doc/extend.texi1
-rw-r--r--gcc/doc/invoke.texi24
9 files changed, 161 insertions, 90 deletions
diff --git a/gcc/ChangeLog.ibm b/gcc/ChangeLog.ibm
index 1d582a19d8a..87216504c3c 100644
--- a/gcc/ChangeLog.ibm
+++ b/gcc/ChangeLog.ibm
@@ -1,5 +1,40 @@
2010-03-31 Michael Meissner <meissner@linux.vnet.ibm.com>
+ * doc/extend.texi (__builtin_rsqrt): Document.
+
+ * doc/invoke.texi (-mrecip): Document.
+ (-mno-recip): Ditto.
+ (-mrecip-passes=n): New switch to control the number of passes for
+ reciprocal square root estimate.
+
+ * config/rs6000/rs6000-protos.h (rs6000_emit_swrsqrt): Rename from
+ rs6000_emit_swrsqrtsf.
+
+ * config/rs6000/rs6000.opt (-mrecip): Change documentation.
+ (-mrecip-passes=n): New switch.
+
+ * config/rs6000/rs6000-builtin.def (RS6000_BUILTIN_RSQRT): New
+ builtin.
+
+ * config/rs6000/rs6000.c (rs6000_override_options): Don't allow
+ -mvsx -mno-altivec. Set rs6000_recip_passes to 2 on power6 and
+ power7, or 3 on other machines. Add some enum casts.
+ (rs6000_expand_builtin): Add __builtin_rsqrt support.
+ (rs6000_init_builtins): Ditto.
+ (rs6000_builtin_reciprocal): Ditto.
+ (rs6000_emit_swdivdf): Emit correct type for MULT.
+ (rs6000_emit_swrsqrt): Rewrite. Support both single and double
+ precision. Add support for reducing the number of Newton-Raphson
+ passes on newer machines.
+
+ * config/rs6000/vsx.md (UNSPEC_VSX_RSQRTE): Delete.
+ (vsx_rsqrte<mode>2): Use UNSPEC_RSQRT.
+ (FP2): New iterator.
+ (rsqrt<mode>2): Rename from rsqrtsf2, and add DFmode support.
+ Call rs6000_emit_swrsqrt instead of rs6000_emit_swrsqrtsf.
+ (rsqrtsf_internal1): Rename from rsqrt_internal1. Add test for
+ TARGET_SINGLE_FLOAT for completeness.
+
(back ported from mainline, 2010-03-02, Jeff Law)
PR middle-end/42431
* reload1.c (rtx_p, substitute_stack): Declare.
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index e66e8c4318f..968f3321ae0 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -991,4 +991,5 @@ RS6000_BUILTIN(POWER7_BUILTIN_BPERMD, RS6000_BTC_CONST)
RS6000_BUILTIN(RS6000_BUILTIN_RECIP, RS6000_BTC_FP_PURE)
RS6000_BUILTIN(RS6000_BUILTIN_RECIPF, RS6000_BTC_FP_PURE)
RS6000_BUILTIN(RS6000_BUILTIN_RSQRTF, RS6000_BTC_FP_PURE)
+RS6000_BUILTIN(RS6000_BUILTIN_RSQRT, RS6000_BTC_FP_PURE)
RS6000_BUILTIN(RS6000_BUILTIN_BSWAP_HI, RS6000_BTC_CONST)
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index b41dbf39387..d6955616107 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -107,7 +107,7 @@ extern void rs6000_split_compare_and_swapqhi (rtx, rtx, rtx, rtx, rtx, rtx);
extern void rs6000_split_lock_test_and_set (rtx, rtx, rtx, rtx);
extern void rs6000_emit_swdivsf (rtx, rtx, rtx);
extern void rs6000_emit_swdivdf (rtx, rtx, rtx);
-extern void rs6000_emit_swrsqrtsf (rtx, rtx);
+extern void rs6000_emit_swrsqrt (rtx, rtx);
extern void output_toc (FILE *, rtx, int, enum machine_mode);
extern void rs6000_initialize_trampoline (rtx, rtx, rtx);
extern rtx rs6000_longcall_ref (rtx);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 6c70f2cd61b..a99a98ead79 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -2309,8 +2309,7 @@ rs6000_override_options (const char *default_cpu)
}
}
- /* Add some warnings for VSX. Enable -maltivec unless the user explicitly
- used -mno-altivec */
+ /* Add some warnings for VSX. */
if (TARGET_VSX)
{
const char *msg = NULL;
@@ -2331,14 +2330,20 @@ rs6000_override_options (const char *default_cpu)
msg = N_("-mvsx used with little endian code");
else if (TARGET_AVOID_XFORM > 0)
msg = N_("-mvsx needs indexed addressing");
+ else if (!TARGET_ALTIVEC && (target_flags_explicit & MASK_ALTIVEC))
+ {
+ if (target_flags_explicit & MASK_VSX)
+ msg = N_("-mvsx and -mno-altivec are incompatible");
+ else
+ msg = N_("-mno-altivec disables vsx");
+ }
if (msg)
{
warning (0, msg);
target_flags &= ~ MASK_VSX;
}
- else if (TARGET_VSX && !TARGET_ALTIVEC
- && (target_flags_explicit & MASK_ALTIVEC) == 0)
+ else if (TARGET_VSX && !TARGET_ALTIVEC)
target_flags |= MASK_ALTIVEC;
}
@@ -2496,6 +2501,11 @@ rs6000_override_options (const char *default_cpu)
|| rs6000_cpu == PROCESSOR_POWER6
|| rs6000_cpu == PROCESSOR_POWER7);
+ /* Set the default # of passes to use for -mrecip. */
+ if (rs6000_recip_passes < 0)
+ rs6000_recip_passes = (rs6000_cpu == PROCESSOR_POWER6
+ || rs6000_cpu == PROCESSOR_POWER7) ? 2 : 3;
+
/* Allow debug switches to override the above settings. */
if (TARGET_ALWAYS_HINT > 0)
rs6000_always_hint = TARGET_ALWAYS_HINT;
@@ -2524,7 +2534,8 @@ rs6000_override_options (const char *default_cpu)
else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
rs6000_sched_costly_dep = store_to_load_dep_costly;
else
- rs6000_sched_costly_dep = atoi (rs6000_sched_costly_dep_str);
+ rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
+ atoi (rs6000_sched_costly_dep_str));
}
/* Handle -minsert-sched-nops option. */
@@ -2540,7 +2551,8 @@ rs6000_override_options (const char *default_cpu)
else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
rs6000_sched_insert_nops = sched_finish_regroup_exact;
else
- rs6000_sched_insert_nops = atoi (rs6000_sched_insert_nops_str);
+ rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
+ atoi (rs6000_sched_insert_nops_str));
}
#ifdef TARGET_REGNAMES
@@ -10802,6 +10814,9 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
if (fcode == RS6000_BUILTIN_RSQRTF)
return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
+ if (fcode == RS6000_BUILTIN_RSQRT)
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
+
if (fcode == RS6000_BUILTIN_BSWAP_HI)
return rs6000_expand_unop_builtin (CODE_FOR_bswaphi2, exp, target);
@@ -11083,6 +11098,12 @@ rs6000_init_builtins (void)
"__builtin_rsqrtf");
def_builtin (MASK_PPC_GFXOPT, "__builtin_rsqrtf", ftype,
RS6000_BUILTIN_RSQRTF);
+
+ ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
+ RS6000_BUILTIN_RSQRT,
+ "__builtin_rsqrt");
+ def_builtin (MASK_PPC_GFXOPT, "__builtin_rsqrt", ftype,
+ RS6000_BUILTIN_RSQRT);
}
if (TARGET_POPCNTB)
{
@@ -24796,6 +24817,9 @@ rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
else
switch (fn)
{
+ case BUILT_IN_SQRT:
+ return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
+
case BUILT_IN_SQRTF:
return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
@@ -24875,7 +24899,7 @@ rs6000_emit_swdivdf (rtx dst, rtx n, rtx d)
/* e0 = 1. - d * x0 */
emit_insn (gen_rtx_SET (VOIDmode, e0,
gen_rtx_MINUS (DFmode, one,
- gen_rtx_MULT (SFmode, d, x0))));
+ gen_rtx_MULT (DFmode, d, x0))));
/* y1 = x0 + e0 * x0 */
emit_insn (gen_rtx_SET (VOIDmode, y1,
gen_rtx_PLUS (DFmode,
@@ -24908,88 +24932,63 @@ rs6000_emit_swdivdf (rtx dst, rtx n, rtx d)
}
-/* Newton-Raphson approximation of single-precision floating point rsqrt.
- Assumes no trapping math and finite arguments. */
+/* Newton-Raphson approximation of single/double-precision floating point
+ rsqrt. Assumes no trapping math and finite arguments. */
void
-rs6000_emit_swrsqrtsf (rtx dst, rtx src)
+rs6000_emit_swrsqrt (rtx dst, rtx src)
{
- rtx x0, x1, x2, y1, u0, u1, u2, v0, v1, v2, t0,
- half, one, halfthree, c1, cond, label;
+ enum machine_mode mode = GET_MODE (src);
+ rtx x0 = gen_reg_rtx (mode);
- x0 = gen_reg_rtx (SFmode);
- x1 = gen_reg_rtx (SFmode);
- x2 = gen_reg_rtx (SFmode);
- y1 = gen_reg_rtx (SFmode);
- u0 = gen_reg_rtx (SFmode);
- u1 = gen_reg_rtx (SFmode);
- u2 = gen_reg_rtx (SFmode);
- v0 = gen_reg_rtx (SFmode);
- v1 = gen_reg_rtx (SFmode);
- v2 = gen_reg_rtx (SFmode);
- t0 = gen_reg_rtx (SFmode);
- halfthree = gen_reg_rtx (SFmode);
- cond = gen_rtx_REG (CCFPmode, CR1_REGNO);
- label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+ gcc_assert (flag_finite_math_only && !flag_trapping_math);
+ gcc_assert (mode == SFmode || mode == DFmode);
+
+ /* x0 = rsqrt estimate */
+ emit_insn (gen_rtx_SET (VOIDmode, x0,
+ gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
+ UNSPEC_RSQRT)));
- /* check 0.0, 1.0, NaN, Inf by testing src * src = src */
- emit_insn (gen_rtx_SET (VOIDmode, t0,
- gen_rtx_MULT (SFmode, src, src)));
+ if (rs6000_recip_passes > 0)
+ {
+ REAL_VALUE_TYPE dconst3_2;
+ int i;
+ rtx halfthree;
+ rtx y = gen_reg_rtx (mode);
+ rtx m;
+ rtx d;
- emit_insn (gen_rtx_SET (VOIDmode, cond,
- gen_rtx_COMPARE (CCFPmode, t0, src)));
- c1 = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
- emit_unlikely_jump (c1, label);
+ real_from_integer (&dconst3_2, VOIDmode, 3, 0, 0);
+ SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
+ d = CONST_DOUBLE_FROM_REAL_VALUE (dconst3_2, mode);
+ halfthree = force_reg (mode, d);
- half = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode));
- one = force_reg (SFmode, CONST_DOUBLE_FROM_REAL_VALUE (dconst1, SFmode));
+ /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
+ m = gen_rtx_MULT (mode, src, halfthree),
+ emit_insn (gen_rtx_SET (VOIDmode, y, gen_rtx_MINUS (mode, m, src)));
- /* halfthree = 1.5 = 1.0 + 0.5 */
- emit_insn (gen_rtx_SET (VOIDmode, halfthree,
- gen_rtx_PLUS (SFmode, one, half)));
+ for (i = 0; i < rs6000_recip_passes; i++)
+ {
+ rtx x1 = gen_reg_rtx (mode);
+ rtx u = gen_reg_rtx (mode);
+ rtx v = gen_reg_rtx (mode);
- /* x0 = rsqrt estimate */
- emit_insn (gen_rtx_SET (VOIDmode, x0,
- gen_rtx_UNSPEC (SFmode, gen_rtvec (1, src),
- UNSPEC_RSQRT)));
+ /* x1 = x0 * (1.5 - y * (x0 * x0)) */
+ emit_insn (gen_rtx_SET (VOIDmode, u,
+ gen_rtx_MULT (mode, x0, x0)));
- /* y1 = 0.5 * src = 1.5 * src - src -> fewer constants */
- emit_insn (gen_rtx_SET (VOIDmode, y1,
- gen_rtx_MINUS (SFmode,
- gen_rtx_MULT (SFmode, src, halfthree),
- src)));
+ m = gen_rtx_MULT (mode, y, u);
+ emit_insn (gen_rtx_SET (VOIDmode, v,
+ gen_rtx_MINUS (mode, halfthree, m)));
- /* x1 = x0 * (1.5 - y1 * (x0 * x0)) */
- emit_insn (gen_rtx_SET (VOIDmode, u0,
- gen_rtx_MULT (SFmode, x0, x0)));
- emit_insn (gen_rtx_SET (VOIDmode, v0,
- gen_rtx_MINUS (SFmode,
- halfthree,
- gen_rtx_MULT (SFmode, y1, u0))));
- emit_insn (gen_rtx_SET (VOIDmode, x1,
- gen_rtx_MULT (SFmode, x0, v0)));
-
- /* x2 = x1 * (1.5 - y1 * (x1 * x1)) */
- emit_insn (gen_rtx_SET (VOIDmode, u1,
- gen_rtx_MULT (SFmode, x1, x1)));
- emit_insn (gen_rtx_SET (VOIDmode, v1,
- gen_rtx_MINUS (SFmode,
- halfthree,
- gen_rtx_MULT (SFmode, y1, u1))));
- emit_insn (gen_rtx_SET (VOIDmode, x2,
- gen_rtx_MULT (SFmode, x1, v1)));
-
- /* dst = x2 * (1.5 - y1 * (x2 * x2)) */
- emit_insn (gen_rtx_SET (VOIDmode, u2,
- gen_rtx_MULT (SFmode, x2, x2)));
- emit_insn (gen_rtx_SET (VOIDmode, v2,
- gen_rtx_MINUS (SFmode,
- halfthree,
- gen_rtx_MULT (SFmode, y1, u2))));
- emit_insn (gen_rtx_SET (VOIDmode, dst,
- gen_rtx_MULT (SFmode, x2, v2)));
+ emit_insn (gen_rtx_SET (VOIDmode, x1,
+ gen_rtx_MULT (mode, x0, v)));
+ x0 = x1;
+ }
+ }
- emit_label (XEXP (label, 0));
+ emit_move_insn (dst, x0);
+ return;
}
/* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 9b323e262cf..6b95e0becd2 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -213,6 +213,13 @@
(DD "TARGET_DFP")
(TD "TARGET_DFP")])
+; Single/double precision
+(define_mode_iterator FP2 [
+ (SF "TARGET_HARD_FLOAT
+ && ((TARGET_FPRS && TARGET_SINGLE_FLOAT) || TARGET_E500_SINGLE)")
+ (DF "TARGET_HARD_FLOAT
+ && ((TARGET_FPRS && TARGET_DOUBLE_FLOAT) || TARGET_E500_DOUBLE)")])
+
; Various instructions that come in SI and DI forms.
; A generic w/d attribute, for things like cmpw/cmpd.
(define_mode_attr wd [(QI "b") (HI "h") (SI "w") (DI "d")])
@@ -5861,22 +5868,23 @@
"fsqrt %0,%1"
[(set_attr "type" "dsqrt")])
-(define_expand "rsqrtsf2"
- [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
- (unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
- UNSPEC_RSQRT))]
+(define_expand "rsqrt<mode>2"
+ [(set (match_operand:FP2 0 "gpc_reg_operand" "")
+ (unspec:FP2 [(match_operand:FP2 1 "gpc_reg_operand" "")]
+ UNSPEC_RSQRT))]
"TARGET_RECIP && TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT && !optimize_size
&& flag_finite_math_only && !flag_trapping_math"
{
- rs6000_emit_swrsqrtsf (operands[0], operands[1]);
+ rs6000_emit_swrsqrt (operands[0], operands[1]);
DONE;
})
-(define_insn "*rsqrt_internal1"
+(define_insn "*rsqrtsf_internal1"
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
(unspec:SF [(match_operand:SF 1 "gpc_reg_operand" "f")]
UNSPEC_RSQRT))]
- "TARGET_HARD_FLOAT && TARGET_PPC_GFXOPT"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+ && TARGET_PPC_GFXOPT"
"frsqrte %0,%1"
[(set_attr "type" "fp")])
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index d3fa10f856f..47da0156792 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -187,8 +187,13 @@ Target Report Var(TARGET_XL_COMPAT)
Conform more closely to IBM XLC semantics
mrecip
-Target Report Var(TARGET_RECIP)
-Generate software reciprocal sqrt for better throughput
+Target Report Var(TARGET_RECIP) Init(-1)
+Generate software reciprocal square root for better throughput.
+
+mrecip-passes=
+Target Report UInteger Joined Var(rs6000_recip_passes) Init(-1)
+Number of fixup passes after doing the reciprocal sqrt esitmate. Default is 2
+for the power6 and newer machines, and 3 for older machines.
mno-fp-in-toc
Target Report RejectNegative Var(TARGET_NO_FP_IN_TOC)
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 7d572a48412..e6fefad232f 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -195,7 +195,7 @@
(UNSPEC_VSX_MSUB 511)
(UNSPEC_VSX_NMADD 512)
(UNSPEC_VSX_NMSUB 513)
- (UNSPEC_VSX_RSQRTE 514)
+ ; 514 deleted
(UNSPEC_VSX_TDIV 515)
(UNSPEC_VSX_TSQRT 516)
(UNSPEC_VSX_XXPERMDI 517)
@@ -449,7 +449,7 @@
(define_insn "vsx_rsqrte<mode>2"
[(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,?wa")
(unspec:VSX_B [(match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,wa")]
- UNSPEC_VSX_RSQRTE))]
+ UNSPEC_RSQRT))]
"VECTOR_UNIT_VSX_P (<MODE>mode)"
"x<VSv>rsqrte<VSs> %x0,%x1"
[(set_attr "type" "<VStype_simple>")
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 0d9862f52b9..ebf53e16001 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -11634,6 +11634,7 @@ GCC provides a few other builtins on Powerpc to access certain instructions:
float __builtin_recipdivf (float, float);
float __builtin_rsqrtf (float);
double __builtin_recipdiv (double, double);
+double __builtin_rsqrt (double);
long __builtin_bpermd (long, long);
int __builtin_bswap16 (int);
@end smallexample
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 3d4e720b004..edd8421f8bc 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -753,7 +753,8 @@ See RS/6000 and PowerPC Options.
-mfloat-gprs=yes -mfloat-gprs=no -mfloat-gprs=single -mfloat-gprs=double @gol
-mprototype -mno-prototype @gol
-msim -mmvme -mads -myellowknife -memb -msdata @gol
--msdata=@var{opt} -mvxworks -G @var{num} -pthread}
+-msdata=@var{opt} -mvxworks -G @var{num} -pthread @gol
+-mrecip -mno-recip -mrecip-passes=@var{num}}
@emph{S/390 and zSeries Options}
@gccoptlist{-mtune=@var{cpu-type} -march=@var{cpu-type} @gol
@@ -14610,6 +14611,27 @@ when the linker is known to generate glue.
Adds support for multithreading with the @dfn{pthreads} library.
This option sets flags for both the preprocessor and linker.
+@item -mrecip
+@itemx -mno-recip
+@opindex mrecip
+This option will enable GCC to use FRSQRTE instruction for reciprocal
+square root with an additional Newton-Raphson step to increase
+precision instead of FSQRTS/FDIVS for single precision floating point
+arguments. These instructions are generated only when
+@option{-funsafe-math-optimizations} is enabled together with
+@option{-finite-math-only} and @option{-fno-trapping-math}. Note that
+while the throughput of the sequence is generally higher than the
+throughput of the non-reciprocal instruction, the precision of the
+sequence can be decreased by up to 2 ulp (i.e. the inverse of 1.0
+equals 0.99999994).
+
+@item -mrecip-passes=@var{num}
+@opindex mrecip-passes
+Control the number of Newton-Raphson passes to use after an estimate
+instruction if @option{-mrecip} was used. The default is 2 on newer
+machines that support higher precision estimate instruction and 3 on
+older machines.
+
@end table
@node S/390 and zSeries Options