summaryrefslogtreecommitdiff
path: root/llvm/test/tools/llvm-mca
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2018-10-04 13:05:42 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2018-10-04 13:05:42 +0000
commite46e2354fffe20dc162f1954d887fbe398fdbecc (patch)
tree7c4419c5f22f7ac02e83e26205f21ba650ee26a7 /llvm/test/tools/llvm-mca
parentfde588197fc7ce18b685cf226b06c47bca805021 (diff)
[llvm-mca][x86] Add tests demonstrating ReadAfterLd delay
Diffstat (limited to 'llvm/test/tools/llvm-mca')
-rw-r--r--llvm/test/tools/llvm-mca/X86/read-after-ld-1.s115
-rw-r--r--llvm/test/tools/llvm-mca/X86/read-after-ld-2.s253
2 files changed, 368 insertions, 0 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
new file mode 100644
index 00000000000..5e4e79e30c1
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
@@ -0,0 +1,115 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SANDY
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
+
+vdivps %xmm0, %xmm1, %xmm1
+vaddps (%rax), %xmm1, %xmm1
+
+# ALL: Iterations: 1
+# ALL-NEXT: Instructions: 2
+
+# BDWELL-NEXT: Total Cycles: 17
+# BDWELL-NEXT: Total uOps: 3
+
+# BTVER2-NEXT: Total Cycles: 27
+# BTVER2-NEXT: Total uOps: 2
+
+# HASWELL-NEXT: Total Cycles: 20
+# HASWELL-NEXT: Total uOps: 3
+
+# SANDY-NEXT: Total Cycles: 21
+# SANDY-NEXT: Total uOps: 3
+
+# SKYLAKE-NEXT: Total Cycles: 19
+# SKYLAKE-NEXT: Total uOps: 3
+
+# ZNVER1-NEXT: Total Cycles: 24
+# ZNVER1-NEXT: Total uOps: 2
+
+# BTVER2: Dispatch Width: 2
+# BTVER2-NEXT: uOps Per Cycle: 0.07
+# BTVER2-NEXT: IPC: 0.07
+# BTVER2-NEXT: Block RThroughput: 19.0
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.08
+# ZNVER1-NEXT: IPC: 0.08
+# ZNVER1-NEXT: Block RThroughput: 1.0
+
+# SANDY: Dispatch Width: 4
+# SANDY-NEXT: uOps Per Cycle: 0.14
+# SANDY-NEXT: IPC: 0.10
+# SANDY-NEXT: Block RThroughput: 14.0
+
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 0.15
+# HASWELL-NEXT: IPC: 0.10
+# HASWELL-NEXT: Block RThroughput: 7.0
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 0.18
+# BDWELL-NEXT: IPC: 0.12
+# BDWELL-NEXT: Block RThroughput: 5.0
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.16
+# SKYLAKE-NEXT: IPC: 0.11
+# SKYLAKE-NEXT: Block RThroughput: 3.0
+
+# ALL: Timeline view:
+
+# BDWELL-NEXT: 0123456
+# BDWELL-NEXT: Index 0123456789
+
+# BTVER2-NEXT: 0123456789
+# BTVER2-NEXT: Index 0123456789 0123456
+
+# HASWELL-NEXT: 0123456789
+# HASWELL-NEXT: Index 0123456789
+
+# SANDY-NEXT: 0123456789
+# SANDY-NEXT: Index 0123456789 0
+
+# SKYLAKE-NEXT: 012345678
+# SKYLAKE-NEXT: Index 0123456789
+
+# ZNVER1-NEXT: 0123456789
+# ZNVER1-NEXT: Index 0123456789 0123
+
+# SKYLAKE: [0,0] DeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1
+# SKYLAKE-NEXT: [0,1] D======eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+
+# BDWELL: [0,0] DeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1
+# BDWELL-NEXT: [0,1] D======eeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+
+# HASWELL: [0,0] DeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1
+# HASWELL-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+
+# SANDY: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1
+# SANDY-NEXT: [0,1] D=========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+
+# ZNVER1: [0,0] DeeeeeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1
+# ZNVER1-NEXT: [0,1] D===========eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+
+# BTVER2: [0,0] DeeeeeeeeeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1
+# BTVER2-NEXT: [0,1] D================eeeeeeeeER vaddps (%rax), %xmm1, %xmm1
+
+# ALL: Average Wait times (based on the timeline view):
+# ALL-NEXT: [0]: Executions
+# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# ALL-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# ALL: [0] [1] [2] [3]
+# ALL-NEXT: 0. 1 1.0 1.0 0.0 vdivps %xmm0, %xmm1, %xmm1
+
+# BDWELL-NEXT: 1. 1 7.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
+# BTVER2-NEXT: 1. 1 17.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
+# HASWELL-NEXT: 1. 1 9.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
+# SANDY-NEXT: 1. 1 10.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
+# SKYLAKE-NEXT: 1. 1 7.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
+# ZNVER1-NEXT: 1. 1 12.0 0.0 0.0 vaddps (%rax), %xmm1, %xmm1
diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s
new file mode 100644
index 00000000000..e3dcbd69bdc
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s
@@ -0,0 +1,253 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
+
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
+
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
+
+# Code Snippet from "Ithemal: Accurate, Portable and Fast Basic Block Throughput Estimation using Deep Neural Networks"
+# Charith Mendis, Saman Amarasinghe, Michael Carbin
+add $1, %edx
+vpaddd (%r8), %ymm0, %ymm0
+add $32, %r8
+cmp %edi, %edx
+
+# ALL: Iterations: 100
+# ALL-NEXT: Instructions: 400
+
+# BDWELL-NEXT: Total Cycles: 208
+# BDWELL-NEXT: Total uOps: 500
+
+# HASWELL-NEXT: Total Cycles: 308
+# HASWELL-NEXT: Total uOps: 500
+
+# SKYLAKE-NEXT: Total Cycles: 803
+# SKYLAKE-NEXT: Total uOps: 500
+
+# ZNVER1-NEXT: Total Cycles: 407
+# ZNVER1-NEXT: Total uOps: 400
+
+# ZNVER1: Dispatch Width: 4
+# ZNVER1-NEXT: uOps Per Cycle: 0.98
+# ZNVER1-NEXT: IPC: 0.98
+# ZNVER1-NEXT: Block RThroughput: 1.0
+
+# HASWELL: Dispatch Width: 4
+# HASWELL-NEXT: uOps Per Cycle: 1.62
+# HASWELL-NEXT: IPC: 1.30
+# HASWELL-NEXT: Block RThroughput: 1.3
+
+# BDWELL: Dispatch Width: 4
+# BDWELL-NEXT: uOps Per Cycle: 2.40
+# BDWELL-NEXT: IPC: 1.92
+# BDWELL-NEXT: Block RThroughput: 1.3
+
+# SKYLAKE: Dispatch Width: 6
+# SKYLAKE-NEXT: uOps Per Cycle: 0.62
+# SKYLAKE-NEXT: IPC: 0.50
+# SKYLAKE-NEXT: Block RThroughput: 0.8
+
+# ALL: Timeline view:
+
+# BDWELL-NEXT: 0123456789
+# BDWELL-NEXT: Index 0123456789 01234567
+
+# HASWELL-NEXT: 0123456789 01234567
+# HASWELL-NEXT: Index 0123456789 0123456789
+
+# SKYLAKE-NEXT: 0123456789 0123456789 0123456789 01234
+# SKYLAKE-NEXT: Index 0123456789 0123456789 0123456789 0123456789
+
+# ZNVER1-NEXT: 0123456789 0123456789
+# ZNVER1-NEXT: Index 0123456789 0123456789 0123456
+
+# SKYLAKE: [0,0] DeER . . . . . . . . . . . . . . . addl $1, %edx
+# SKYLAKE-NEXT: [0,1] DeeeeeeeeER . . . . . . . . . . . . . vpaddd (%r8), %ymm0, %ymm0
+# SKYLAKE-NEXT: [0,2] DeE-------R . . . . . . . . . . . . . addq $32, %r8
+# SKYLAKE-NEXT: [0,3] D=eE------R . . . . . . . . . . . . . cmpl %edi, %edx
+# SKYLAKE-NEXT: [1,0] D=eE------R . . . . . . . . . . . . . addl $1, %edx
+# SKYLAKE-NEXT: [1,1] .D=======eeeeeeeeER . . . . . . . . . . . . vpaddd (%r8), %ymm0, %ymm0
+# SKYLAKE-NEXT: [1,2] .DeE--------------R . . . . . . . . . . . . addq $32, %r8
+# SKYLAKE-NEXT: [1,3] .D=eE-------------R . . . . . . . . . . . . cmpl %edi, %edx
+# SKYLAKE-NEXT: [2,0] .D=eE-------------R . . . . . . . . . . . . addl $1, %edx
+# SKYLAKE-NEXT: [2,1] . D==============eeeeeeeeER . . . . . . . . . . vpaddd (%r8), %ymm0, %ymm0
+# SKYLAKE-NEXT: [2,2] . DeE---------------------R . . . . . . . . . . addq $32, %r8
+# SKYLAKE-NEXT: [2,3] . D=eE--------------------R . . . . . . . . . . cmpl %edi, %edx
+# SKYLAKE-NEXT: [3,0] . D=eE--------------------R . . . . . . . . . . addl $1, %edx
+# SKYLAKE-NEXT: [3,1] . D=====================eeeeeeeeER. . . . . . . . . vpaddd (%r8), %ymm0, %ymm0
+# SKYLAKE-NEXT: [3,2] . DeE----------------------------R. . . . . . . . . addq $32, %r8
+# SKYLAKE-NEXT: [3,3] . D=eE---------------------------R. . . . . . . . . cmpl %edi, %edx
+# SKYLAKE-NEXT: [4,0] . D=eE---------------------------R. . . . . . . . . addl $1, %edx
+# SKYLAKE-NEXT: [4,1] . D============================eeeeeeeeER . . . . . . . vpaddd (%r8), %ymm0, %ymm0
+# SKYLAKE-NEXT: [4,2] . DeE-----------------------------------R . . . . . . . addq $32, %r8
+# SKYLAKE-NEXT: [4,3] . D=eE----------------------------------R . . . . . . . cmpl %edi, %edx
+# SKYLAKE-NEXT: [5,0] . D=eE----------------------------------R . . . . . . . addl $1, %edx
+# SKYLAKE-NEXT: [5,1] . D===================================eeeeeeeeER . . . . . vpaddd (%r8), %ymm0, %ymm0
+# SKYLAKE-NEXT: [5,2] . DeE------------------------------------------R . . . . . addq $32, %r8
+# SKYLAKE-NEXT: [5,3] . D=eE-----------------------------------------R . . . . . cmpl %edi, %edx
+# SKYLAKE-NEXT: [6,0] . D=eE-----------------------------------------R . . . . . addl $1, %edx
+# SKYLAKE-NEXT: [6,1] . .D==========================================eeeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0
+# SKYLAKE-NEXT: [6,2] . .DeE-------------------------------------------------R . . . . addq $32, %r8
+# SKYLAKE-NEXT: [6,3] . .D=eE------------------------------------------------R . . . . cmpl %edi, %edx
+# SKYLAKE-NEXT: [7,0] . .D=eE------------------------------------------------R . . . . addl $1, %edx
+# SKYLAKE-NEXT: [7,1] . . D=================================================eeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# SKYLAKE-NEXT: [7,2] . . DeE--------------------------------------------------------R . . addq $32, %r8
+# SKYLAKE-NEXT: [7,3] . . D=eE-------------------------------------------------------R . . cmpl %edi, %edx
+# SKYLAKE-NEXT: [8,0] . . D=eE-------------------------------------------------------R . . addl $1, %edx
+# SKYLAKE-NEXT: [8,1] . . D========================================================eeeeeeeeER vpaddd (%r8), %ymm0, %ymm0
+# SKYLAKE-NEXT: [8,2] . . DeE---------------------------------------------------------------R addq $32, %r8
+# SKYLAKE-NEXT: [8,3] . . D=eE--------------------------------------------------------------R cmpl %edi, %edx
+# SKYLAKE-NEXT: [9,0] . . D=eE--------------------------------------------------------------R addl $1, %edx
+
+# ZNVER1: [0,0] DeER . . . . . . . . .. addl $1, %edx
+# ZNVER1-NEXT: [0,1] DeeeeeeeeER . . . . . . .. vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [0,2] DeE-------R . . . . . . .. addq $32, %r8
+# ZNVER1-NEXT: [0,3] D=eE------R . . . . . . .. cmpl %edi, %edx
+# ZNVER1-NEXT: [1,0] .DeE------R . . . . . . .. addl $1, %edx
+# ZNVER1-NEXT: [1,1] .D===eeeeeeeeER. . . . . . .. vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [1,2] .DeE----------R. . . . . . .. addq $32, %r8
+# ZNVER1-NEXT: [1,3] .D=eE---------R. . . . . . .. cmpl %edi, %edx
+# ZNVER1-NEXT: [2,0] . DeE---------R. . . . . . .. addl $1, %edx
+# ZNVER1-NEXT: [2,1] . D======eeeeeeeeER . . . . . .. vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [2,2] . DeE-------------R . . . . . .. addq $32, %r8
+# ZNVER1-NEXT: [2,3] . D=eE------------R . . . . . .. cmpl %edi, %edx
+# ZNVER1-NEXT: [3,0] . DeE------------R . . . . . .. addl $1, %edx
+# ZNVER1-NEXT: [3,1] . D=========eeeeeeeeER . . . . .. vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [3,2] . DeE----------------R . . . . .. addq $32, %r8
+# ZNVER1-NEXT: [3,3] . D=eE---------------R . . . . .. cmpl %edi, %edx
+# ZNVER1-NEXT: [4,0] . DeE---------------R . . . . .. addl $1, %edx
+# ZNVER1-NEXT: [4,1] . D============eeeeeeeeER . . . .. vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [4,2] . DeE-------------------R . . . .. addq $32, %r8
+# ZNVER1-NEXT: [4,3] . D=eE------------------R . . . .. cmpl %edi, %edx
+# ZNVER1-NEXT: [5,0] . DeE------------------R . . . .. addl $1, %edx
+# ZNVER1-NEXT: [5,1] . D===============eeeeeeeeER . . .. vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [5,2] . DeE----------------------R . . .. addq $32, %r8
+# ZNVER1-NEXT: [5,3] . D=eE---------------------R . . .. cmpl %edi, %edx
+# ZNVER1-NEXT: [6,0] . .DeE---------------------R . . .. addl $1, %edx
+# ZNVER1-NEXT: [6,1] . .D==================eeeeeeeeER. . .. vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [6,2] . .DeE-------------------------R. . .. addq $32, %r8
+# ZNVER1-NEXT: [6,3] . .D=eE------------------------R. . .. cmpl %edi, %edx
+# ZNVER1-NEXT: [7,0] . . DeE------------------------R. . .. addl $1, %edx
+# ZNVER1-NEXT: [7,1] . . D=====================eeeeeeeeER . .. vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [7,2] . . DeE----------------------------R . .. addq $32, %r8
+# ZNVER1-NEXT: [7,3] . . D=eE---------------------------R . .. cmpl %edi, %edx
+# ZNVER1-NEXT: [8,0] . . DeE---------------------------R . .. addl $1, %edx
+# ZNVER1-NEXT: [8,1] . . D========================eeeeeeeeER .. vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [8,2] . . DeE-------------------------------R .. addq $32, %r8
+# ZNVER1-NEXT: [8,3] . . D=eE------------------------------R .. cmpl %edi, %edx
+# ZNVER1-NEXT: [9,0] . . DeE------------------------------R .. addl $1, %edx
+# ZNVER1-NEXT: [9,1] . . D===========================eeeeeeeeER vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: [9,2] . . DeE----------------------------------R addq $32, %r8
+# ZNVER1-NEXT: [9,3] . . D=eE---------------------------------R cmpl %edi, %edx
+
+# HASWELL: [0,0] DeER . . . . . . . . addl $1, %edx
+# HASWELL-NEXT: [0,1] DeeeeeeeeER . . . . . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [0,2] DeE-------R . . . . . . addq $32, %r8
+# HASWELL-NEXT: [0,3] .DeE------R . . . . . . cmpl %edi, %edx
+# HASWELL-NEXT: [1,0] .DeE------R . . . . . . addl $1, %edx
+# HASWELL-NEXT: [1,1] .D==eeeeeeeeER . . . . . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [1,2] . DeE--------R . . . . . . addq $32, %r8
+# HASWELL-NEXT: [1,3] . DeE--------R . . . . . . cmpl %edi, %edx
+# HASWELL-NEXT: [2,0] . DeE--------R . . . . . . addl $1, %edx
+# HASWELL-NEXT: [2,1] . D===eeeeeeeeER . . . . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [2,2] . DeE----------R . . . . . addq $32, %r8
+# HASWELL-NEXT: [2,3] . DeE----------R . . . . . cmpl %edi, %edx
+# HASWELL-NEXT: [3,0] . DeE---------R . . . . . addl $1, %edx
+# HASWELL-NEXT: [3,1] . D=====eeeeeeeeER. . . . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [3,2] . DeE------------R. . . . . addq $32, %r8
+# HASWELL-NEXT: [3,3] . DeE-----------R. . . . . cmpl %edi, %edx
+# HASWELL-NEXT: [4,0] . DeE-----------R. . . . . addl $1, %edx
+# HASWELL-NEXT: [4,1] . D=======eeeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [4,2] . .DeE-------------R . . . . addq $32, %r8
+# HASWELL-NEXT: [4,3] . .DeE-------------R . . . . cmpl %edi, %edx
+# HASWELL-NEXT: [5,0] . .DeE-------------R . . . . addl $1, %edx
+# HASWELL-NEXT: [5,1] . . D========eeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [5,2] . . DeE---------------R . . . addq $32, %r8
+# HASWELL-NEXT: [5,3] . . DeE---------------R . . . cmpl %edi, %edx
+# HASWELL-NEXT: [6,0] . . DeE--------------R . . . addl $1, %edx
+# HASWELL-NEXT: [6,1] . . D==========eeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [6,2] . . DeE-----------------R . . . addq $32, %r8
+# HASWELL-NEXT: [6,3] . . DeE----------------R . . . cmpl %edi, %edx
+# HASWELL-NEXT: [7,0] . . DeE----------------R . . . addl $1, %edx
+# HASWELL-NEXT: [7,1] . . D============eeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [7,2] . . DeE------------------R . . addq $32, %r8
+# HASWELL-NEXT: [7,3] . . DeE------------------R . . cmpl %edi, %edx
+# HASWELL-NEXT: [8,0] . . DeE------------------R . . addl $1, %edx
+# HASWELL-NEXT: [8,1] . . .D=============eeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [8,2] . . .DeE--------------------R. . addq $32, %r8
+# HASWELL-NEXT: [8,3] . . .DeE--------------------R. . cmpl %edi, %edx
+# HASWELL-NEXT: [9,0] . . . DeE-------------------R. . addl $1, %edx
+# HASWELL-NEXT: [9,1] . . . D===============eeeeeeeeER vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: [9,2] . . . DeE----------------------R addq $32, %r8
+# HASWELL-NEXT: [9,3] . . . DeE---------------------R cmpl %edi, %edx
+
+# BDWELL: [0,0] DeER . . . . . . addl $1, %edx
+# BDWELL-NEXT: [0,1] DeeeeeeeER. . . . . vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [0,2] DeE------R. . . . . addq $32, %r8
+# BDWELL-NEXT: [0,3] .DeE-----R. . . . . cmpl %edi, %edx
+# BDWELL-NEXT: [1,0] .DeE-----R. . . . . addl $1, %edx
+# BDWELL-NEXT: [1,1] .D=eeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [1,2] . DeE------R . . . . addq $32, %r8
+# BDWELL-NEXT: [1,3] . DeE------R . . . . cmpl %edi, %edx
+# BDWELL-NEXT: [2,0] . DeE------R . . . . addl $1, %edx
+# BDWELL-NEXT: [2,1] . D=eeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [2,2] . DeE-------R . . . . addq $32, %r8
+# BDWELL-NEXT: [2,3] . DeE-------R . . . . cmpl %edi, %edx
+# BDWELL-NEXT: [3,0] . DeE------R . . . . addl $1, %edx
+# BDWELL-NEXT: [3,1] . D==eeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [3,2] . DeE--------R . . . addq $32, %r8
+# BDWELL-NEXT: [3,3] . DeE-------R . . . cmpl %edi, %edx
+# BDWELL-NEXT: [4,0] . DeE-------R . . . addl $1, %edx
+# BDWELL-NEXT: [4,1] . D===eeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [4,2] . .DeE--------R . . . addq $32, %r8
+# BDWELL-NEXT: [4,3] . .DeE--------R . . . cmpl %edi, %edx
+# BDWELL-NEXT: [5,0] . .DeE--------R . . . addl $1, %edx
+# BDWELL-NEXT: [5,1] . . D===eeeeeeeER. . . vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [5,2] . . DeE---------R. . . addq $32, %r8
+# BDWELL-NEXT: [5,3] . . DeE---------R. . . cmpl %edi, %edx
+# BDWELL-NEXT: [6,0] . . DeE--------R. . . addl $1, %edx
+# BDWELL-NEXT: [6,1] . . D====eeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [6,2] . . DeE----------R . . addq $32, %r8
+# BDWELL-NEXT: [6,3] . . DeE---------R . . cmpl %edi, %edx
+# BDWELL-NEXT: [7,0] . . DeE---------R . . addl $1, %edx
+# BDWELL-NEXT: [7,1] . . D=====eeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [7,2] . . DeE----------R . . addq $32, %r8
+# BDWELL-NEXT: [7,3] . . DeE----------R . . cmpl %edi, %edx
+# BDWELL-NEXT: [8,0] . . DeE----------R . . addl $1, %edx
+# BDWELL-NEXT: [8,1] . . .D=====eeeeeeeER . vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [8,2] . . .DeE-----------R . addq $32, %r8
+# BDWELL-NEXT: [8,3] . . .DeE-----------R . cmpl %edi, %edx
+# BDWELL-NEXT: [9,0] . . . DeE----------R . addl $1, %edx
+# BDWELL-NEXT: [9,1] . . . D======eeeeeeeER vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: [9,2] . . . DeE------------R addq $32, %r8
+# BDWELL-NEXT: [9,3] . . . DeE-----------R cmpl %edi, %edx
+
+# ALL: Average Wait times (based on the timeline view):
+# ALL-NEXT: [0]: Executions
+# ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue
+# ALL-NEXT: [2]: Average time spent waiting in a scheduler's queue while ready
+# ALL-NEXT: [3]: Average time elapsed from WB until retire stage
+
+# ALL: [0] [1] [2] [3]
+
+# BDWELL-NEXT: 0. 10 1.0 0.4 6.9 addl $1, %edx
+# BDWELL-NEXT: 1. 10 4.0 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
+# BDWELL-NEXT: 2. 10 1.0 0.4 8.7 addq $32, %r8
+# BDWELL-NEXT: 3. 10 1.0 0.0 8.3 cmpl %edi, %edx
+
+# HASWELL-NEXT: 0. 10 1.0 0.4 11.4 addl $1, %edx
+# HASWELL-NEXT: 1. 10 8.5 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
+# HASWELL-NEXT: 2. 10 1.0 0.4 14.2 addq $32, %r8
+# HASWELL-NEXT: 3. 10 1.0 0.0 13.8 cmpl %edi, %edx
+
+# SKYLAKE-NEXT: 0. 10 1.9 0.1 30.6 addl $1, %edx
+# SKYLAKE-NEXT: 1. 10 32.5 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
+# SKYLAKE-NEXT: 2. 10 1.0 0.1 38.5 addq $32, %r8
+# SKYLAKE-NEXT: 3. 10 2.0 0.0 37.5 cmpl %edi, %edx
+
+# ZNVER1-NEXT: 0. 10 1.0 0.1 16.2 addl $1, %edx
+# ZNVER1-NEXT: 1. 10 14.5 0.1 0.0 vpaddd (%r8), %ymm0, %ymm0
+# ZNVER1-NEXT: 2. 10 1.0 0.1 20.5 addq $32, %r8
+# ZNVER1-NEXT: 3. 10 2.0 0.0 19.5 cmpl %edi, %edx