summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/ARM
diff options
context:
space:
mode:
authorEvandro Menezes <e.menezes@samsung.com>2018-07-27 18:16:47 +0000
committerEvandro Menezes <e.menezes@samsung.com>2018-07-27 18:16:47 +0000
commit4c1149f421d2598c4262d2dcb056a74da3eb02e3 (patch)
treeba4264a519dcc1b07367a044844f143410a3867e /llvm/lib/Target/ARM
parent03e5f7feb59811124e6a6e5da7b418438c2d008c (diff)
[ARM] Add new target feature to fuse literal generation
This feature enables the fusion of such operations on Cortex A57 and Cortex A72, as recommended in their Software Optimisation Guides, sections 4.14 and 4.11, respectively. Differential revision: https://reviews.llvm.org/D49563
Diffstat (limited to 'llvm/lib/Target/ARM')
-rw-r--r--llvm/lib/Target/ARM/ARM.td4
-rw-r--r--llvm/lib/Target/ARM/ARMMacroFusion.cpp63
-rw-r--r--llvm/lib/Target/ARM/ARMSubtarget.h7
3 files changed, 55 insertions, 19 deletions
diff --git a/llvm/lib/Target/ARM/ARM.td b/llvm/lib/Target/ARM/ARM.td
index 742b3551889..2e62a079041 100644
--- a/llvm/lib/Target/ARM/ARM.td
+++ b/llvm/lib/Target/ARM/ARM.td
@@ -141,6 +141,10 @@ def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
def FeatureFuseAES : SubtargetFeature<"fuse-aes", "HasFuseAES", "true",
"CPU fuses AES crypto operations">;
+// Fast execution of bottom and top halves of literal generation
+def FeatureFuseLiterals : SubtargetFeature<"fuse-literals", "HasFuseLiterals", "true",
+ "CPU fuses literal generation operations">;
+
// The way of reading thread pointer
def FeatureReadTp : SubtargetFeature<"read-tp-hard", "ReadTPHard", "true",
"Reading thread pointer from register">;
diff --git a/llvm/lib/Target/ARM/ARMMacroFusion.cpp b/llvm/lib/Target/ARM/ARMMacroFusion.cpp
index f2dc650a6f3..d11fe9d5c50 100644
--- a/llvm/lib/Target/ARM/ARMMacroFusion.cpp
+++ b/llvm/lib/Target/ARM/ARMMacroFusion.cpp
@@ -19,6 +19,47 @@
namespace llvm {
+// Fuse AES crypto encoding or decoding.
+static bool isAESPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
+
+ switch(SecondOpcode) {
+ // AES encode.
+ case ARM::AESMC :
+ return FirstOpcode == ARM::AESE ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ // AES decode.
+ case ARM::AESIMC:
+ return FirstOpcode == ARM::AESD ||
+ FirstOpcode == ARM::INSTRUCTION_LIST_END;
+ }
+
+ return false;
+}
+
+// Fuse literal generation.
+static bool isLiteralsPair(const MachineInstr *FirstMI,
+ const MachineInstr &SecondMI) {
+ // Assume the 1st instr to be a wildcard if it is unspecified.
+ unsigned FirstOpcode =
+ FirstMI ? FirstMI->getOpcode()
+ : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
+ unsigned SecondOpcode = SecondMI.getOpcode();
+
+ // 32 bit immediate.
+ if ((FirstOpcode == ARM::INSTRUCTION_LIST_END ||
+ FirstOpcode == ARM::MOVi16) &&
+ SecondOpcode == ARM::MOVTi16)
+ return true;
+
+ return false;
+}
+
/// Check if the instr pair, FirstMI and SecondMI, should be fused
/// together. Given SecondMI, when FirstMI is unspecified, then check if
/// SecondMI may be part of a fused pair at all.
@@ -28,24 +69,10 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII,
const MachineInstr &SecondMI) {
const ARMSubtarget &ST = static_cast<const ARMSubtarget&>(TSI);
- // Assume wildcards for unspecified instrs.
- unsigned FirstOpcode =
- FirstMI ? FirstMI->getOpcode()
- : static_cast<unsigned>(ARM::INSTRUCTION_LIST_END);
- unsigned SecondOpcode = SecondMI.getOpcode();
-
- if (ST.hasFuseAES())
- // Fuse AES crypto operations.
- switch(SecondOpcode) {
- // AES encode.
- case ARM::AESMC :
- return FirstOpcode == ARM::AESE ||
- FirstOpcode == ARM::INSTRUCTION_LIST_END;
- // AES decode.
- case ARM::AESIMC:
- return FirstOpcode == ARM::AESD ||
- FirstOpcode == ARM::INSTRUCTION_LIST_END;
- }
+ if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI))
+ return true;
+ if (ST.hasFuseLiterals() && isLiteralsPair(FirstMI, SecondMI))
+ return true;
return false;
}
diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h
index 165077926c8..74aee9a8ed3 100644
--- a/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -327,6 +327,10 @@ protected:
/// pairs faster.
bool HasFuseAES = false;
+ /// HasFuseLiterals - if true, processor executes back to back
+ /// bottom and top halves of literal generation faster.
+ bool HasFuseLiterals = false;
+
/// If true, if conversion may decide to leave some instructions unpredicated.
bool IsProfitableToUnpredicate = false;
@@ -616,8 +620,9 @@ public:
bool hasFullFP16() const { return HasFullFP16; }
bool hasFuseAES() const { return HasFuseAES; }
+ bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
- bool hasFusion() const { return hasFuseAES(); }
+ bool hasFusion() const { return hasFuseAES() || hasFuseLiterals(); }
const Triple &getTargetTriple() const { return TargetTriple; }