diff options
Diffstat (limited to 'gcc/doc')
-rw-r--r-- | gcc/doc/extend.texi | 282 | ||||
-rw-r--r-- | gcc/doc/invoke.texi | 198 | ||||
-rw-r--r-- | gcc/doc/mxp.texi | 106 | ||||
-rw-r--r-- | gcc/doc/tm.texi | 13 |
4 files changed, 575 insertions, 24 deletions
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 43e91afe8b4..128370a0ccf 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -6984,6 +6984,7 @@ instructions, but allow the compiler to schedule those calls. @menu * Alpha Built-in Functions:: +* ARC Built-in Functions:: * ARM iWMMXt Built-in Functions:: * ARM NEON Intrinsics:: * Blackfin Built-in Functions:: @@ -7081,6 +7082,287 @@ void *__builtin_thread_pointer (void) void __builtin_set_thread_pointer (void *) @end smallexample +@node ARC Built-in Functions +@subsection ARC Built-in Functions + +SIMD instruction can be generated for ARC, using the built-in functions provided +for the ARC cores when the @option{-msimd} switch is used: + +The set of builtins defined for ARC can be categorized according to their +signatures into the following types: + +@smallexample +I) Return type : v8hi + First argument : v8hi + Second argument: v8hi + +v8hi __builtin_arc_vaddaw (v8hi, v8hi) +v8hi __builtin_arc_vaddw (v8hi, v8hi) +v8hi __builtin_arc_vavb (v8hi, v8hi) +v8hi __builtin_arc_vavrb (v8hi, v8hi) +v8hi __builtin_arc_vdifaw (v8hi, v8hi) +v8hi __builtin_arc_vdifw (v8hi, v8hi) +v8hi __builtin_arc_vmaxaw (v8hi, v8hi) +v8hi __builtin_arc_vmaxw (v8hi, v8hi) +v8hi __builtin_arc_vminaw (v8hi, v8hi) +v8hi __builtin_arc_vminw (v8hi, v8hi) +v8hi __builtin_arc_vmulaw (v8hi, v8hi) +v8hi __builtin_arc_vmulfaw (v8hi, v8hi) +v8hi __builtin_arc_vmulfw (v8hi, v8hi) +v8hi __builtin_arc_vmulw (v8hi, v8hi) +v8hi __builtin_arc_vsubaw (v8hi, v8hi) +v8hi __builtin_arc_vsubw (v8hi, v8hi) +v8hi __builtin_arc_vsummw (v8hi, v8hi) +v8hi __builtin_arc_vand (v8hi, v8hi) +v8hi __builtin_arc_vandaw (v8hi, v8hi) +v8hi __builtin_arc_vbic (v8hi, v8hi) +v8hi __builtin_arc_vbicaw (v8hi, v8hi) +v8hi __builtin_arc_vor (v8hi, v8hi) +v8hi __builtin_arc_vxor (v8hi, v8hi) +v8hi __builtin_arc_vxoraw (v8hi, v8hi) +v8hi __builtin_arc_veqw (v8hi, v8hi) +v8hi __builtin_arc_vlew (v8hi, v8hi) +v8hi __builtin_arc_vltw (v8hi, v8hi) +v8hi __builtin_arc_vnew (v8hi, v8hi) +v8hi __builtin_arc_vmr1aw (v8hi, v8hi) +v8hi __builtin_arc_vmr1w (v8hi, v8hi) +v8hi __builtin_arc_vmr2aw (v8hi, v8hi) +v8hi __builtin_arc_vmr2w (v8hi, v8hi) +v8hi __builtin_arc_vmr3aw (v8hi, v8hi) +v8hi __builtin_arc_vmr3w (v8hi, v8hi) +v8hi __builtin_arc_vmr4aw (v8hi, v8hi) +v8hi __builtin_arc_vmr4w (v8hi, v8hi) +v8hi __builtin_arc_vmr5aw (v8hi, v8hi) +v8hi __builtin_arc_vmr5w (v8hi, v8hi) +v8hi __builtin_arc_vmr6aw (v8hi, v8hi) +v8hi __builtin_arc_vmr6w (v8hi, v8hi) +v8hi __builtin_arc_vmr7aw (v8hi, v8hi) +v8hi __builtin_arc_vmr7w (v8hi, v8hi) +v8hi __builtin_arc_vmrb (v8hi, v8hi) +v8hi __builtin_arc_vh264f (v8hi, v8hi) +v8hi __builtin_arc_vh264ft (v8hi, v8hi) +v8hi __builtin_arc_vh264fw (v8hi, v8hi) +v8hi __builtin_arc_vvc1f (v8hi, v8hi) +v8hi __builtin_arc_vvc1ft (v8hi, v8hi) +@end smallexample + +@smallexample +II) Return type : v8hi + First argument : v8hi + Second argument: int + +v8hi __builtin_arc_vbaddw (v8hi, int) +v8hi __builtin_arc_vbmaxw (v8hi, int) +v8hi __builtin_arc_vbminw (v8hi, int) +v8hi __builtin_arc_vbmulaw (v8hi, int) +v8hi __builtin_arc_vbmulfw (v8hi, int) +v8hi __builtin_arc_vbmulw (v8hi, int) +v8hi __builtin_arc_vbrsubw (v8hi, int) +v8hi __builtin_arc_vbsubw (v8hi, int) +@end smallexample + +@smallexample +III) Return type : v8hi + First argument : v8hi + Second argument: const int + + The second argument in these builtins has to be an unsigned 3-bit +integer constant, as it indicate the registers I0-I7: + +v8hi __builtin_arc_vasrw (v8hi, const int) +v8hi __builtin_arc_vsr8 (v8hi, const int) +v8hi __builtin_arc_vsr8aw (v8hi, const int) +@end smallexample + +@smallexample +IV) Return type : v8hi + First argument : v8hi + Second argument: const int + + The second argument in these builtins has to be an unsigned 6-bit +integer constant: + +v8hi __builtin_arc_vasrrwi (v8hi, const int) +v8hi __builtin_arc_vasrsrwi (v8hi, const int) +v8hi __builtin_arc_vasrwi (v8hi, const int) +v8hi __builtin_arc_vasrpwbi (v8hi, const int) +v8hi __builtin_arc_vasrrpwbi (v8hi, const int) +v8hi __builtin_arc_vsr8awi (v8hi, const int) +v8hi __builtin_arc_vsr8i (v8hi, const int) +@end smallexample + +@smallexample +V) Return type : v8hi + First argument : v8hi + Second argument: const int + + The second argument in these builtins has to be an unsigned 8-bit +integer constant: + +v8hi __builtin_arc_vmvaw (v8hi, const int) +v8hi __builtin_arc_vmvw (v8hi, const int) +v8hi __builtin_arc_vmvzw (v8hi, const int) +v8hi __builtin_arc_vd6tapf (v8hi, const int) +@end smallexample + +@smallexample +VI) Return type : v8hi + First argument : int + Second argument: const int + + The second argument in these builtins has to be an unsigned 8-bit +integer constant: + +v8hi __builtin_arc_vmovaw (int, const int) +v8hi __builtin_arc_vmovw (int, const int) +v8hi __builtin_arc_vmovzw (int, const int) +@end smallexample + +@smallexample +VII) Return type : v8hi + First argument : v8hi + +v8hi __builtin_arc_vabsaw (v8hi) +v8hi __builtin_arc_vabsw (v8hi) +v8hi __builtin_arc_vaddsuw (v8hi) +v8hi __builtin_arc_vsignw (v8hi) +v8hi __builtin_arc_vexch1 (v8hi) +v8hi __builtin_arc_vexch2 (v8hi) +v8hi __builtin_arc_vexch4 (v8hi) +v8hi __builtin_arc_vupbaw (v8hi) +v8hi __builtin_arc_vupbw (v8hi) +v8hi __builtin_arc_vupsbaw (v8hi) +v8hi __builtin_arc_vupsbw (v8hi) +@end smallexample + +@smallexample +VIII) Return type : void + First argument : int + Second argument : int + +void __builtin_arc_vdirun (int, int) +void __builtin_arc_vdorun (int, int) +@end smallexample + +@smallexample +IX) Return type : void + First argument : const int + Second argument : int + + The first argument in these builtins has to be an unsigned 3-bit +integer constant, as it indicates DR0-DR7 DMA channel setup registers. The file +arc-simd.h also profides defines which can be used in place of the DMA register +numbers to facilitate better code readability: + +void __builtin_arc_vdiwr (const int, int) +void __builtin_arc_vdowr (const int, int) +@end smallexample + +@smallexample +X) Return type : void + First argument : int + +void __builtin_arc_vrec (int) +void __builtin_arc_vrun (int) +void __builtin_arc_vrecrun (int) +void __builtin_arc_vendrec (int) +@end smallexample + +@smallexample +XI) Return type : v8hi + First argument : v8hi + Second argument : const int + Third argument : const int + + The second argument in these builtins has to be an unsigned 3-bit +integer constant, as it indicates I0-I7 registers. The third argument has to be +an unsigned 8-bit quantity The file arc-simd.h also profides defines which can +be used in place of the I0-I7 registe numbers to facilitate better code readability: + +v8hi __builtin_arc_vld32wh (v8hi, const int, const int) +v8hi __builtin_arc_vld32wl (v8hi, const int, const int) +v8hi __builtin_arc_vld64 (v8hi, const int, const int) +v8hi __builtin_arc_vld32 (v8hi, const int, const int) + +NOTE: Although the equivalent hardware instructions do not take a simd register + as an operand, these builtins overwrite the relevant bits of the v8hi + quantity provided as the first argument with the value loaded from + [Ib, u8] location in the SDM. + +@end smallexample + +@smallexample +XII) Return type : v8hi + First argument : const int + Second argument : const int + + The first argument in these builtins has to be an unsigned 3-bit +integer constant, as it indicates I0-I7 registers. The second argument has to be +an unsigned 8-bit quantity The file arc-simd.h also profides defines which can +be used in place of the I0-I7 registe numbers to facilitate better code readability: + +v8hi __builtin_arc_vld64w (const int, const int) +v8hi __builtin_arc_vld128 (const int, const int) +@end smallexample + +@smallexample +XIII) Return type : void + First argument : v8hi + Second argument : const int + Third argument : const int + + The second argument in these builtins has to be an unsigned 3-bit +integer constant, as it indicates I0-I7 registers. The third argument has to be +an unsigned 8-bit quantity The file arc-simd.h also profides defines which can +be used in place of the I0-I7 registe numbers to facilitate better code readability: + +void __builtin_arc_vst128 (v8hi, const int, const int) +void __builtin_arc_vst64 (v8hi, const int, const int) +@end smallexample + + +@smallexample +XIV) Return type : void + First argument : v8hi + Second argument : const int + Third argument : const int + + The second argument has to be an unsigned 3-bit quantity to identify the +16-bit subregister to be stored. The third argument in these builtins has to be +an unsigned 3-bit integer constant, as it indicates I0-I7 registers. The fourth +argument has to be an unsigned 8-bit quantity The file arc-simd.h also profides +defines which can be used in place of the I0-I7 registe numbers to facilitate +better code readability: + +void __builtin_arc_vst16_n (v8hi, const int, const int, const int) +void __builtin_arc_vst32_n (v8hi, const int, const int, const int) +@end smallexample + + +@smallexample +XIV) Return type : void + First argument : const int + + The argument has to be an unsigned 6-bit quantity. + +void __builtin_arc_vinti (const int) +@end smallexample + +@smallexample +NOTE: For all builtins __builtin_arc_<someinsn>, the header file arc-simd.h also + provides macros called _<someinsn> which can be used for programming ease + and improved readability. + + Besides these, the following extra defines and typedefs are also provided +in the header file + +#define _setup_dma_in_channel_reg _vdiwr +#define _setup_dma_out_channel_reg _vdowr + +typedef int __v4si __attribute__((vector_size(16))); +typedef short __v8hi __attribute__((vector_size(16))); +@end smallexample + @node ARM iWMMXt Built-in Functions @subsection ARM iWMMXt Built-in Functions diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 7e6da15515d..4efc8c2871c 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -427,8 +427,33 @@ Objective-C and Objective-C++ Dialects}. @emph{ARC Options} @gccoptlist{-EB -EL @gol --mmangle-cpu -mcpu=@var{cpu} -mtext=@var{text-section} @gol --mdata=@var{data-section} -mrodata=@var{readonly-data-section}} +-mbig-endian -mlittle-endian @gol +-mA4 -mA5 -mA6 -mARC600 -mA7 -mARC700 -mmixed-code @gol +-mtext=@var{text-section} -mdata=@var{data-section} @gol +-mrodata=@var{readonly-data-section} @gol +-malign-loops -mno-align-loops @gol +-mvolatile-cache -mno-volatile-cache @gol +-mno-cond-exec @gol +-mnorm @gol +-mswap @gol +-mbarrel_shifter @gol +-mmul64 @gol +-mmin_max @gol +-mEA @gol +-msoft-float @gol +-mno-mpy @gol +-mno-brcc @gol +-mlong-calls @gol +-mno-sdata @gol +-mno-millicode @gol +-mspfp @gol +-mspfp_compact @gol +-mspfp_fast @gol +-mdpfp @gol +-mdpfp_compact @gol +-mdpfp_fast @gol +-msimd @gol +} @emph{ARM Options} @gccoptlist{-mapcs-frame -mno-apcs-frame @gol @@ -8684,44 +8709,162 @@ These options are defined for ARC implementations: @table @gcctabopt @item -EL @opindex EL +@itemx -mlittle-endian +@opindex mlittle-endian Compile code for little endian mode. This is the default. @item -EB @opindex EB +@itemx -mbig-endian +@opindex mbig-endian Compile code for big endian mode. -@item -mmangle-cpu -@opindex mmangle-cpu -Prepend the name of the cpu to all public symbol names. -In multiple-processor systems, there are many ARC variants with different -instruction and register set characteristics. This flag prevents code -compiled for one cpu to be linked with code compiled for another. -No facility exists for handling variants that are ``almost identical''. -This is an all or nothing option. +@item -mA4 +@opindex mA4 +Generates code for ARCtangent-A4 processor. This is the default. -@item -mcpu=@var{cpu} -@opindex mcpu -Compile code for ARC variant @var{cpu}. -Which variants are supported depend on the configuration. -All variants support @option{-mcpu=base}, this is the default. +@item -mA5 +@opindex mA5 +Generates ARCompact 32-bit code for ARCtangent-A5 processor. + +@item -mA6 +@opindex mA6 +@itemx -mARC600 +@opindex mARC600 +Generates ARCompact 32-bit code for ARCtangent-ARC600 processor. + +@item -mA7 +@opindex mA7 +@itemx -mARC700 +@opindex mARC700 +Generates ARCompact 32-bit code for ARCtangent-ARC700 processor. + +@item -mmixed-code +@opindex mmixed-code +Generates ARCompact 16-bit instructions intermixed with 32-bit instructions +for ARCtangent-A5 and higher processors. @item -mtext=@var{text-section} @itemx -mdata=@var{data-section} @itemx -mrodata=@var{readonly-data-section} -@opindex mtext -@opindex mdata -@opindex mrodata +@opindex mtext=@var{text-section} +@opindex mdata=@var{data-section} +@opindex mrodata=@var{readonly-data-section} Put functions, data, and readonly data in @var{text-section}, @var{data-section}, and @var{readonly-data-section} respectively by default. This can be overridden with the @code{section} attribute. @xref{Variable Attributes}. -@item -mfix-cortex-m3-ldrd -@opindex mfix-cortex-m3-ldrd -Some Cortex-M3 cores can cause data corruption when @code{ldrd} instructions -with overlapping destination and base registers are used. This option avoids -generating these instructions. This option is enabled by default when -@option{-mcpu=cortex-m3} is specified. +@item -malign-loops +@opindex malign-loops +Align loop starts to 32-byte boundaries (cache line size). + +@item -malign-loops +@opindex malign-loops +Do not align loop starts to 32-byte boundaries (cache line size). + +@item -mvolatile-cache +@opindex mvolatile-cache +Allow caching of volatile references. This is the default. + +@item -mno-valatile-cache +@opindex mno-volatile-cache +Do not cache volatile references. + +@item -mno-cond-exec +@opindex mno-cond-exec +Do not generate predicated instructions for conditional execution. + +@item -mnorm +@opindex mnorm +Allow generation of norm instruction through the use of builtins. For +ARC700, the -mnorm option is turned on by default. + +@item -mswap +@opindex mswap +Allow generation of swap instruction through the use of builtins. For +ARC700, the -mswap option is turned on by default. + +@item -mbarrel_shifter +@opindex mbarrel_shifter +Allow generation of multiple shift instruction supported by barrel +shifter unit. For post A4 cores, such as A5, ARC600, ARC700, the +-mbarrel_shifter option is turned on by default. + +@item -mmul64 +@opindex mmul64 +Allow generation of mul64 and mulu64 instructions, by using +builtins. This option is not allowed for ARC700. + +@item -mmin_max +@opindex mmin_max +Allow generation of min and max instructions for A4. For post A4 +cores, these are generated by default. + +@item -mno-mpy +@opindex mno-mpy +Disallow generation of mpy mpyh, mpyhu, mpyu instructions for ARC700. This +option is allowed only for ARC700 processor. + +@item -mEA +@opindex mEA +Allow generation of extended arithmetic instructions. + +@item -msoft-float +@opindex msoft-float +Dummy flag. Many applications use this flag generically, and soft-floats +are the only option on ARC. + +@item -mno-brcc +@opindex mno-brcc +Disable generation of BRcc instructions. + +@item -mlong-calls +@opindex mlong-calls +Make all function calls as register-indirect. This flag can be overridden +by using the @samp{short_call} function attribute. + +@item -mno-sdata +@opindex mno-sdata +Do not generate sdata references + +@item -mno-millicode +@opindex mno-millicode +Do not generate millicode thunk code for saving and restoring registers in +functions' prologue/epilogue. This flags is needed only with -Os, since millicode +thunks are used only when optimizing for size.. + +@end table + +@subsection FPX Options +@cindex ARC FPX Options +These options can be used to generate code for the FPX (Floating Point +eXtension) extension unit. + +@table @gcctabopt +@item -mspfp +@opindex mspfp +@itemx -mspfp_compact +@opindex mspfp_compact +Generate Single Precision FPX (compact) instructions + +@item -mspfp_fast +@opindex mspfp_fast +Generate Single Precision FPX (fast) instructions + +@item -mdpfp +@opindex mdpfp +@itemx -mdpfp_compact +@opindex mdpfp_compact +Generate Double Precision FPX (compact) instructions + +@item -mdpfp_fast +@opindex mdpfp_fast +Generate Double Precision FPX (fast) instructions + +@item -msimd +@opindex msimd +Enable generation of ARC SIMD instructions via target-specific builtins. @end table @@ -8733,6 +8876,13 @@ These @samp{-m} options are defined for Advanced RISC Machines (ARM) architectures: @table @gcctabopt +@item -mfix-cortex-m3-ldrd +@opindex mfix-cortex-m3-ldrd +Some Cortex-M3 cores can cause data corruption when @code{ldrd} instructions +with overlapping destination and base registers are used. This option avoids +generating these instructions. This option is enabled by default when +@option{-mcpu=cortex-m3} is specified. + @item -mabi=@var{name} @opindex mabi Generate code for the specified ABI@. Permissible values are: @samp{apcs-gnu}, diff --git a/gcc/doc/mxp.texi b/gcc/doc/mxp.texi new file mode 100644 index 00000000000..69cef2657da --- /dev/null +++ b/gcc/doc/mxp.texi @@ -0,0 +1,106 @@ +data/bss layout: uses different sections ordered by minimum addressing scale. +no separate .rodata section(s). +.data16: scaling factor 16 +.data8, .data4, data2, .data1: likewise for smaller scaling factors +.bss1, .bss2, .bss4, .bss8, .bss16: bss sections for increasing scaling +factors +The data base pointer register i9 typically points at the place where .bss1 +ends and .data1 starts. It might be moved up or down if allocation +would otherwise overflow on one side, and on the other side is slack. + +Tasks to be done: +- Convert this document into a proper texinfo file, incorporate it into + gcc ducumentation, and test 'make info' +- binutils support for using undefined labels in mxp data/bss sections + as offsets in memory addresses. +- binutils support for mxp code labels. For a start, we are looking to + have a special text section where to put all the mxp code. At link time, + this special text section is considered to be loaded at the start of the + SCM for purposes of resolving SCM absolute relocations. However, the + code gets actually a different load address for the ARC700 core, and gets + a j_s [blink] instruction appended (extra points if you make this a j_s.d + [blink] before the last insn without the potential to break stuff...) + Later we will likely want to move to multiple of such special text sections + to handle overlays, and possibly also have different load addreses to + accomodate multiple overlays. If we want to be able to handle SCM PIE, + I.e. code that can be loaded to varying SCM locations, the arc will need + to load an a core register with the SCM load address before calling the + SCQ loading code, and the latter will have to use add instructions to + calculate SCM locations on the fly. + No matter if we use such add instructions, or long immediates, instructions + that reference SCM memory locations work out as 64 bit of code on the + arc side, while the other SIMD instructions are injected with a single + 32 bit code from the arc side. Thus we have a discrepancy between the + space taken up by the instructions in the object file and the size we + have to consider for purposes of calculating SCM addresses. + Luckily, these differences are constant from the first time the SIMD + assembly is emitted. Thus, the total number of instructions + with SCM references that precede an SCM label gives us the number of + 32 bit words to subtract from the total number of preceding 32 bits words + to arrive at the offset from the SCM load address. + To account for preceding SCM references in the same module, we can make + the SCM label appear to be accordingly earlier in the module. + (This will have to be compensated for if we want to do any linktime + relaxation at some later point in time.) + We also need to keep a tally of the total number of SCM references in each + module. + When linking multiple modules together, the total of these tallies for all + preceding modules needs to be added up, and subtracted from the value of + each label. + Like SCM references, (other) long immediates bulk up the code on the arc + side while leaving the SIMD instruction count the same, so they have to + be tallied up together with the SCM references. +- library functions: + - divsi3: use sh64 code as starting point. Note that there is no + point in loading the table base address before the function call, because + all SCM memory addressing has an offset. + divv8hi3, divv4si3: use older sh64 code w/out lookup table as starting + point + - divhi3 +- Investigate register class preferencing issues. Naming lane sets with + lane 0 first actually results in the wrong reg_class_subunions. In theory + the ordierng should be something like 00, 10, 01, 30, 03, ff, to get the + sets with lane zero prefered for subunions. preferred classes can be + seen in the *lreg dump file after compiling with -da. Another avenue to + saner subunions is to add proper union lane sets 11, 33. + The paradoxical thing I am seeing here is that the instruction count for + muldi increases when I introduce these measures. + Another - or complimentary - approach is to shift the cost balance. + in theory REGISTER_MOVE_COST should have an influence, but in practice + I haven't seen any. What works is adding extra cost to insn alternatives + which allow non-lane0 registers. A problem here - and in general - is that + we want a viable alternate register class. Jacking up the cost for + non-lane0 alternatives can disparage these to the point that we loose the + altclass. We also have often altclasses that don't actually contain any + extra valid registers. In theory increasing MEMORY_MOVE_COST can + compensate, however I see paradoxical outcomes when I try to make this + dependent on !(reload_in_progress || reload_completed). I have a diff + for some of the changes I've tried in + /home/joernr/prefclass-experiments-20080428. + Maybe we ned to jackup REGISTER_MOVE_COST, MEMORY_MOVE_COST and RTX_COST + consistently to get a more fine-grained resolution of costs. +- Obtain code samples of code that we think is suitable and relevant for + autovectorization. E.g. some codec. + Dependent tasks: + - Identify the actual section of this code that we think we should be + able to autovectorize. + - Make sure autovectorization takes place. +- Partitioning work. Check with IBM Haifa and other Milepost partners + what they already have. + Inasmuch as not already done: + - Identify individual functions and subgraphs of the callgraph we can move + to the SIMD engine. + - Add code to tree loop analysis to break out loops that we can move to + the SIMD engine. + - Handle data sets that don't fit into SDM. The simplest to implement + approach is probably to do loop tiling at the interface between arc core + and simd engine. OTOH we can get much better parallelism if we hand + over the entire work to the simd engine and let it DMA out the previoud + block, and DMA in the next block, while it is performing calculations. + For this we need to represent main memory pointers. + Need not necessarilty be exposed as pointers to the mxp-gcc, we could + express the loop tiling with intrinsics. +- Add doloop pattern +- Convert multi-insn define_insn patterns into define_insn_and_split patterns. +- Add scheduler description +- Where missing, add comments to the code according to GNU coding standards. diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index 7dfb46b3a0d..5e9a2792337 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -2758,6 +2758,12 @@ Do not define this macro if you do not define is @code{BITS_PER_WORD} bits wide is correct for your machine. @end defmac +@deftypefn {Target Hook} bool TARGET_PRESERVE_RELOAD_P (rtx @var{in}) +Called when doing an input reload using the value @var{in}. Return true +if the reload register should be available for inheritance later. This +might increase the spill pressure, but enhances reload inheritance. +@end deftypefn + @defmac SMALL_REGISTER_CLASSES On some machines, it is risky to let hard registers live across arbitrary insns. Typically, these machines have instructions that require values @@ -5962,6 +5968,13 @@ will be used. Defaults to 1 if @code{move_by_pieces_ninsns} returns less than @code{MOVE_RATIO}. @end defmac +@defmac CAN_MOVE_BY_PIECES (@var{size}, @var{alignment}) +A C expression used to determine whether a chunk of memory is to be copied +in pieces either by @code{move_by_pieces}, or by a movmem expander. This +is used by other optimizers that want to anticipate how a block copy is +going to be done. If not defined, MOVE_BY_PIECES_P is used instead. +@end defmac + @defmac MOVE_MAX_PIECES A C expression used by @code{move_by_pieces} to determine the largest unit a load or store used to copy memory is. Defaults to @code{MOVE_MAX}. |