diff options
author | Greg Bedwell <greg_bedwell@sn.scee.net> | 2018-10-04 14:42:19 +0000 |
---|---|---|
committer | Greg Bedwell <greg_bedwell@sn.scee.net> | 2018-10-04 14:42:19 +0000 |
commit | 6d253a8706ac3f4dfe7be416c77d3082f9e684de (patch) | |
tree | 9167260c6a6d9373f43fe2eb0a126009362176da | |
parent | ff7b3d18c76b87afdd0ec61da896562ee2ccb9f6 (diff) |
[utils] Ensure that update_mca_test_checks.py writes prefixes in alphabetical order
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s | 26 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s | 10 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/cpus.s | 30 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s | 28 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s | 28 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/read-after-ld-1.s | 40 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/read-after-ld-2.s | 182 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s | 22 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s | 52 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s | 44 | ||||
-rw-r--r-- | llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s | 44 | ||||
-rwxr-xr-x | llvm/utils/update_mca_test_checks.py | 13 |
12 files changed, 265 insertions, 254 deletions
diff --git a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s index da4699f4615..4e4e23231b9 100644 --- a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s +++ b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s @@ -26,21 +26,16 @@ bextrl %esi, (%rdi), %eax # ZNVER1-NEXT: Total Cycles: 8 # ZNVER1-NEXT: Total uOps: 3 -# BTVER2: Dispatch Width: 2 -# BTVER2-NEXT: uOps Per Cycle: 0.29 -# BTVER2-NEXT: IPC: 0.29 -# BTVER2-NEXT: Block RThroughput: 1.0 - -# ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.38 -# ZNVER1-NEXT: IPC: 0.25 -# ZNVER1-NEXT: Block RThroughput: 0.8 - # BDWELL: Dispatch Width: 4 # BDWELL-NEXT: uOps Per Cycle: 0.40 # BDWELL-NEXT: IPC: 0.20 # BDWELL-NEXT: Block RThroughput: 1.0 +# BTVER2: Dispatch Width: 2 +# BTVER2-NEXT: uOps Per Cycle: 0.29 +# BTVER2-NEXT: IPC: 0.29 +# BTVER2-NEXT: Block RThroughput: 1.0 + # HASWELL: Dispatch Width: 4 # HASWELL-NEXT: uOps Per Cycle: 0.40 # HASWELL-NEXT: IPC: 0.20 @@ -51,6 +46,11 @@ bextrl %esi, (%rdi), %eax # SKYLAKE-NEXT: IPC: 0.20 # SKYLAKE-NEXT: Block RThroughput: 0.7 +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.38 +# ZNVER1-NEXT: IPC: 0.25 +# ZNVER1-NEXT: Block RThroughput: 0.8 + # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency @@ -87,6 +87,9 @@ bextrl %esi, (%rdi), %eax # BDWELL: [0,0] DeER . . addl %edi, %esi # BDWELL-NEXT: [0,1] DeeeeeeeER bextrl %esi, (%rdi), %eax +# BTVER2: [0,0] DeER .. addl %edi, %esi +# BTVER2-NEXT: [0,1] DeeeeER bextrl %esi, (%rdi), %eax + # HASWELL: [0,0] DeER . . addl %edi, %esi # HASWELL-NEXT: [0,1] DeeeeeeeER bextrl %esi, (%rdi), %eax @@ -96,9 +99,6 @@ bextrl %esi, (%rdi), %eax # ZNVER1: [0,0] DeER . . addl %edi, %esi # ZNVER1-NEXT: [0,1] DeeeeeER bextrl %esi, (%rdi), %eax -# BTVER2: [0,0] DeER .. addl %edi, %esi -# BTVER2-NEXT: [0,1] DeeeeER bextrl %esi, (%rdi), %eax - # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue diff --git a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s index a9c7c8b8564..fe3ba3a6ec5 100644 --- a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s +++ b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s @@ -27,16 +27,16 @@ bzhil %esi, (%rdi), %eax # HASWELL-NEXT: IPC: 0.22 # HASWELL-NEXT: Block RThroughput: 0.8 -# ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.38 -# ZNVER1-NEXT: IPC: 0.25 -# ZNVER1-NEXT: Block RThroughput: 0.8 - # SKYLAKE: Dispatch Width: 6 # SKYLAKE-NEXT: uOps Per Cycle: 0.33 # SKYLAKE-NEXT: IPC: 0.22 # SKYLAKE-NEXT: Block RThroughput: 0.5 +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.38 +# ZNVER1-NEXT: IPC: 0.25 +# ZNVER1-NEXT: Block RThroughput: 0.8 + # ALL: Instruction Info: # ALL-NEXT: [1]: #uOps # ALL-NEXT: [2]: Latency diff --git a/llvm/test/tools/llvm-mca/X86/cpus.s b/llvm/test/tools/llvm-mca/X86/cpus.s index 6a4bcff687c..47e1e83c543 100644 --- a/llvm/test/tools/llvm-mca/X86/cpus.s +++ b/llvm/test/tools/llvm-mca/X86/cpus.s @@ -17,21 +17,16 @@ add %edi, %eax # ALL-NEXT: Total Cycles: 103 # ALL-NEXT: Total uOps: 100 -# BTVER2: Dispatch Width: 2 -# BTVER2-NEXT: uOps Per Cycle: 0.97 -# BTVER2-NEXT: IPC: 0.97 -# BTVER2-NEXT: Block RThroughput: 0.5 - -# SLM: Dispatch Width: 2 -# SLM-NEXT: uOps Per Cycle: 0.97 -# SLM-NEXT: IPC: 0.97 -# SLM-NEXT: Block RThroughput: 0.5 - # BROADWELL: Dispatch Width: 4 # BROADWELL-NEXT: uOps Per Cycle: 0.97 # BROADWELL-NEXT: IPC: 0.97 # BROADWELL-NEXT: Block RThroughput: 0.3 +# BTVER2: Dispatch Width: 2 +# BTVER2-NEXT: uOps Per Cycle: 0.97 +# BTVER2-NEXT: IPC: 0.97 +# BTVER2-NEXT: Block RThroughput: 0.5 + # HASWELL: Dispatch Width: 4 # HASWELL-NEXT: uOps Per Cycle: 0.97 # HASWELL-NEXT: IPC: 0.97 @@ -52,11 +47,6 @@ add %edi, %eax # SANDYBRIDGE-NEXT: IPC: 0.97 # SANDYBRIDGE-NEXT: Block RThroughput: 0.3 -# ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.97 -# ZNVER1-NEXT: IPC: 0.97 -# ZNVER1-NEXT: Block RThroughput: 0.3 - # SKX: Dispatch Width: 6 # SKX-NEXT: uOps Per Cycle: 0.97 # SKX-NEXT: IPC: 0.97 @@ -66,3 +56,13 @@ add %edi, %eax # SKX-AVX512-NEXT: uOps Per Cycle: 0.97 # SKX-AVX512-NEXT: IPC: 0.97 # SKX-AVX512-NEXT: Block RThroughput: 0.3 + +# SLM: Dispatch Width: 2 +# SLM-NEXT: uOps Per Cycle: 0.97 +# SLM-NEXT: IPC: 0.97 +# SLM-NEXT: Block RThroughput: 0.5 + +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.97 +# ZNVER1-NEXT: IPC: 0.97 +# ZNVER1-NEXT: Block RThroughput: 0.3 diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s index 679e8a3b598..75c77f8f8c4 100644 --- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s @@ -25,26 +25,26 @@ vfmadd213ps (%rdi), %xmm1, %xmm2 # ZNVER1-NEXT: Total Cycles: 15 # ZNVER1-NEXT: Total uOps: 2 -# ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.13 -# ZNVER1-NEXT: IPC: 0.13 -# ZNVER1-NEXT: Block RThroughput: 1.0 +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.23 +# BDWELL-NEXT: IPC: 0.15 +# BDWELL-NEXT: Block RThroughput: 1.0 # HASWELL: Dispatch Width: 4 # HASWELL-NEXT: uOps Per Cycle: 0.21 # HASWELL-NEXT: IPC: 0.14 # HASWELL-NEXT: Block RThroughput: 1.0 -# BDWELL: Dispatch Width: 4 -# BDWELL-NEXT: uOps Per Cycle: 0.23 -# BDWELL-NEXT: IPC: 0.15 -# BDWELL-NEXT: Block RThroughput: 1.0 - # SKYLAKE: Dispatch Width: 6 # SKYLAKE-NEXT: uOps Per Cycle: 0.23 # SKYLAKE-NEXT: IPC: 0.15 # SKYLAKE-NEXT: Block RThroughput: 1.0 +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.13 +# ZNVER1-NEXT: IPC: 0.13 +# ZNVER1-NEXT: Block RThroughput: 1.0 + # ALL: Timeline view: # BDWELL-NEXT: 012 @@ -54,18 +54,18 @@ vfmadd213ps (%rdi), %xmm1, %xmm2 # ALL-NEXT: Index 0123456789 -# ZNVER1: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1 -# ZNVER1-NEXT: [0,1] DeeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 +# BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1 +# BDWELL-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 # HASWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1 # HASWELL-NEXT: [0,1] DeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 -# BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1 -# BDWELL-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 - # SKYLAKE: [0,0] DeeeeER . . vaddps %xmm0, %xmm0, %xmm1 # SKYLAKE-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 +# ZNVER1: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm1 +# ZNVER1-NEXT: [0,1] DeeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s index 698aba487df..96d3ba613c5 100644 --- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s @@ -25,26 +25,26 @@ vfmadd213ps (%rdi), %xmm1, %xmm2 # ZNVER1-NEXT: Total Cycles: 15 # ZNVER1-NEXT: Total uOps: 2 -# ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.13 -# ZNVER1-NEXT: IPC: 0.13 -# ZNVER1-NEXT: Block RThroughput: 1.0 +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.23 +# BDWELL-NEXT: IPC: 0.15 +# BDWELL-NEXT: Block RThroughput: 1.0 # HASWELL: Dispatch Width: 4 # HASWELL-NEXT: uOps Per Cycle: 0.21 # HASWELL-NEXT: IPC: 0.14 # HASWELL-NEXT: Block RThroughput: 1.0 -# BDWELL: Dispatch Width: 4 -# BDWELL-NEXT: uOps Per Cycle: 0.23 -# BDWELL-NEXT: IPC: 0.15 -# BDWELL-NEXT: Block RThroughput: 1.0 - # SKYLAKE: Dispatch Width: 6 # SKYLAKE-NEXT: uOps Per Cycle: 0.23 # SKYLAKE-NEXT: IPC: 0.15 # SKYLAKE-NEXT: Block RThroughput: 1.0 +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.13 +# ZNVER1-NEXT: IPC: 0.13 +# ZNVER1-NEXT: Block RThroughput: 1.0 + # ALL: Timeline view: # BDWELL-NEXT: 012 @@ -54,18 +54,18 @@ vfmadd213ps (%rdi), %xmm1, %xmm2 # ALL-NEXT: Index 0123456789 -# ZNVER1: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2 -# ZNVER1-NEXT: [0,1] DeeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 +# BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2 +# BDWELL-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 # HASWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2 # HASWELL-NEXT: [0,1] DeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 -# BDWELL: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2 -# BDWELL-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 - # SKYLAKE: [0,0] DeeeeER . . vaddps %xmm0, %xmm0, %xmm2 # SKYLAKE-NEXT: [0,1] DeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 +# ZNVER1: [0,0] DeeeER . . vaddps %xmm0, %xmm0, %xmm2 +# ZNVER1-NEXT: [0,1] DeeeeeeeeeeeeER vfmadd213ps (%rdi), %xmm1, %xmm2 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s index 5e4e79e30c1..c427be1cd8b 100644 --- a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s @@ -30,36 +30,36 @@ vaddps (%rax), %xmm1, %xmm1 # ZNVER1-NEXT: Total Cycles: 24 # ZNVER1-NEXT: Total uOps: 2 +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.18 +# BDWELL-NEXT: IPC: 0.12 +# BDWELL-NEXT: Block RThroughput: 5.0 + # BTVER2: Dispatch Width: 2 # BTVER2-NEXT: uOps Per Cycle: 0.07 # BTVER2-NEXT: IPC: 0.07 # BTVER2-NEXT: Block RThroughput: 19.0 -# ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.08 -# ZNVER1-NEXT: IPC: 0.08 -# ZNVER1-NEXT: Block RThroughput: 1.0 - -# SANDY: Dispatch Width: 4 -# SANDY-NEXT: uOps Per Cycle: 0.14 -# SANDY-NEXT: IPC: 0.10 -# SANDY-NEXT: Block RThroughput: 14.0 - # HASWELL: Dispatch Width: 4 # HASWELL-NEXT: uOps Per Cycle: 0.15 # HASWELL-NEXT: IPC: 0.10 # HASWELL-NEXT: Block RThroughput: 7.0 -# BDWELL: Dispatch Width: 4 -# BDWELL-NEXT: uOps Per Cycle: 0.18 -# BDWELL-NEXT: IPC: 0.12 -# BDWELL-NEXT: Block RThroughput: 5.0 +# SANDY: Dispatch Width: 4 +# SANDY-NEXT: uOps Per Cycle: 0.14 +# SANDY-NEXT: IPC: 0.10 +# SANDY-NEXT: Block RThroughput: 14.0 # SKYLAKE: Dispatch Width: 6 # SKYLAKE-NEXT: uOps Per Cycle: 0.16 # SKYLAKE-NEXT: IPC: 0.11 # SKYLAKE-NEXT: Block RThroughput: 3.0 +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.08 +# ZNVER1-NEXT: IPC: 0.08 +# ZNVER1-NEXT: Block RThroughput: 1.0 + # ALL: Timeline view: # BDWELL-NEXT: 0123456 @@ -80,24 +80,24 @@ vaddps (%rax), %xmm1, %xmm1 # ZNVER1-NEXT: 0123456789 # ZNVER1-NEXT: Index 0123456789 0123 -# SKYLAKE: [0,0] DeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1 -# SKYLAKE-NEXT: [0,1] D======eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 - # BDWELL: [0,0] DeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1 # BDWELL-NEXT: [0,1] D======eeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# BTVER2: [0,0] DeeeeeeeeeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1 +# BTVER2-NEXT: [0,1] D================eeeeeeeeER vaddps (%rax), %xmm1, %xmm1 + # HASWELL: [0,0] DeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # HASWELL-NEXT: [0,1] D========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 # SANDY: [0,0] DeeeeeeeeeeeeeeER . vdivps %xmm0, %xmm1, %xmm1 # SANDY-NEXT: [0,1] D=========eeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 +# SKYLAKE: [0,0] DeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1 +# SKYLAKE-NEXT: [0,1] D======eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 + # ZNVER1: [0,0] DeeeeeeeeeeeeeeeER . . vdivps %xmm0, %xmm1, %xmm1 # ZNVER1-NEXT: [0,1] D===========eeeeeeeeeeER vaddps (%rax), %xmm1, %xmm1 -# BTVER2: [0,0] DeeeeeeeeeeeeeeeeeeeER .. vdivps %xmm0, %xmm1, %xmm1 -# BTVER2-NEXT: [0,1] D================eeeeeeeeER vaddps (%rax), %xmm1, %xmm1 - # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s index e3dcbd69bdc..6368d42b5c7 100644 --- a/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s @@ -29,26 +29,26 @@ cmp %edi, %edx # ZNVER1-NEXT: Total Cycles: 407 # ZNVER1-NEXT: Total uOps: 400 -# ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.98 -# ZNVER1-NEXT: IPC: 0.98 -# ZNVER1-NEXT: Block RThroughput: 1.0 +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 2.40 +# BDWELL-NEXT: IPC: 1.92 +# BDWELL-NEXT: Block RThroughput: 1.3 # HASWELL: Dispatch Width: 4 # HASWELL-NEXT: uOps Per Cycle: 1.62 # HASWELL-NEXT: IPC: 1.30 # HASWELL-NEXT: Block RThroughput: 1.3 -# BDWELL: Dispatch Width: 4 -# BDWELL-NEXT: uOps Per Cycle: 2.40 -# BDWELL-NEXT: IPC: 1.92 -# BDWELL-NEXT: Block RThroughput: 1.3 - # SKYLAKE: Dispatch Width: 6 # SKYLAKE-NEXT: uOps Per Cycle: 0.62 # SKYLAKE-NEXT: IPC: 0.50 # SKYLAKE-NEXT: Block RThroughput: 0.8 +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.98 +# ZNVER1-NEXT: IPC: 0.98 +# ZNVER1-NEXT: Block RThroughput: 1.0 + # ALL: Timeline view: # BDWELL-NEXT: 0123456789 @@ -63,6 +63,88 @@ cmp %edi, %edx # ZNVER1-NEXT: 0123456789 0123456789 # ZNVER1-NEXT: Index 0123456789 0123456789 0123456 +# BDWELL: [0,0] DeER . . . . . . addl $1, %edx +# BDWELL-NEXT: [0,1] DeeeeeeeER. . . . . vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [0,2] DeE------R. . . . . addq $32, %r8 +# BDWELL-NEXT: [0,3] .DeE-----R. . . . . cmpl %edi, %edx +# BDWELL-NEXT: [1,0] .DeE-----R. . . . . addl $1, %edx +# BDWELL-NEXT: [1,1] .D=eeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [1,2] . DeE------R . . . . addq $32, %r8 +# BDWELL-NEXT: [1,3] . DeE------R . . . . cmpl %edi, %edx +# BDWELL-NEXT: [2,0] . DeE------R . . . . addl $1, %edx +# BDWELL-NEXT: [2,1] . D=eeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [2,2] . DeE-------R . . . . addq $32, %r8 +# BDWELL-NEXT: [2,3] . DeE-------R . . . . cmpl %edi, %edx +# BDWELL-NEXT: [3,0] . DeE------R . . . . addl $1, %edx +# BDWELL-NEXT: [3,1] . D==eeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [3,2] . DeE--------R . . . addq $32, %r8 +# BDWELL-NEXT: [3,3] . DeE-------R . . . cmpl %edi, %edx +# BDWELL-NEXT: [4,0] . DeE-------R . . . addl $1, %edx +# BDWELL-NEXT: [4,1] . D===eeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [4,2] . .DeE--------R . . . addq $32, %r8 +# BDWELL-NEXT: [4,3] . .DeE--------R . . . cmpl %edi, %edx +# BDWELL-NEXT: [5,0] . .DeE--------R . . . addl $1, %edx +# BDWELL-NEXT: [5,1] . . D===eeeeeeeER. . . vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [5,2] . . DeE---------R. . . addq $32, %r8 +# BDWELL-NEXT: [5,3] . . DeE---------R. . . cmpl %edi, %edx +# BDWELL-NEXT: [6,0] . . DeE--------R. . . addl $1, %edx +# BDWELL-NEXT: [6,1] . . D====eeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [6,2] . . DeE----------R . . addq $32, %r8 +# BDWELL-NEXT: [6,3] . . DeE---------R . . cmpl %edi, %edx +# BDWELL-NEXT: [7,0] . . DeE---------R . . addl $1, %edx +# BDWELL-NEXT: [7,1] . . D=====eeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [7,2] . . DeE----------R . . addq $32, %r8 +# BDWELL-NEXT: [7,3] . . DeE----------R . . cmpl %edi, %edx +# BDWELL-NEXT: [8,0] . . DeE----------R . . addl $1, %edx +# BDWELL-NEXT: [8,1] . . .D=====eeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [8,2] . . .DeE-----------R . addq $32, %r8 +# BDWELL-NEXT: [8,3] . . .DeE-----------R . cmpl %edi, %edx +# BDWELL-NEXT: [9,0] . . . DeE----------R . addl $1, %edx +# BDWELL-NEXT: [9,1] . . . D======eeeeeeeER vpaddd (%r8), %ymm0, %ymm0 +# BDWELL-NEXT: [9,2] . . . DeE------------R addq $32, %r8 +# BDWELL-NEXT: [9,3] . . . DeE-----------R cmpl %edi, %edx + +# HASWELL: [0,0] DeER . . . . . . . . addl $1, %edx +# HASWELL-NEXT: [0,1] DeeeeeeeeER . . . . . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [0,2] DeE-------R . . . . . . addq $32, %r8 +# HASWELL-NEXT: [0,3] .DeE------R . . . . . . cmpl %edi, %edx +# HASWELL-NEXT: [1,0] .DeE------R . . . . . . addl $1, %edx +# HASWELL-NEXT: [1,1] .D==eeeeeeeeER . . . . . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [1,2] . DeE--------R . . . . . . addq $32, %r8 +# HASWELL-NEXT: [1,3] . DeE--------R . . . . . . cmpl %edi, %edx +# HASWELL-NEXT: [2,0] . DeE--------R . . . . . . addl $1, %edx +# HASWELL-NEXT: [2,1] . D===eeeeeeeeER . . . . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [2,2] . DeE----------R . . . . . addq $32, %r8 +# HASWELL-NEXT: [2,3] . DeE----------R . . . . . cmpl %edi, %edx +# HASWELL-NEXT: [3,0] . DeE---------R . . . . . addl $1, %edx +# HASWELL-NEXT: [3,1] . D=====eeeeeeeeER. . . . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [3,2] . DeE------------R. . . . . addq $32, %r8 +# HASWELL-NEXT: [3,3] . DeE-----------R. . . . . cmpl %edi, %edx +# HASWELL-NEXT: [4,0] . DeE-----------R. . . . . addl $1, %edx +# HASWELL-NEXT: [4,1] . D=======eeeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [4,2] . .DeE-------------R . . . . addq $32, %r8 +# HASWELL-NEXT: [4,3] . .DeE-------------R . . . . cmpl %edi, %edx +# HASWELL-NEXT: [5,0] . .DeE-------------R . . . . addl $1, %edx +# HASWELL-NEXT: [5,1] . . D========eeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [5,2] . . DeE---------------R . . . addq $32, %r8 +# HASWELL-NEXT: [5,3] . . DeE---------------R . . . cmpl %edi, %edx +# HASWELL-NEXT: [6,0] . . DeE--------------R . . . addl $1, %edx +# HASWELL-NEXT: [6,1] . . D==========eeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [6,2] . . DeE-----------------R . . . addq $32, %r8 +# HASWELL-NEXT: [6,3] . . DeE----------------R . . . cmpl %edi, %edx +# HASWELL-NEXT: [7,0] . . DeE----------------R . . . addl $1, %edx +# HASWELL-NEXT: [7,1] . . D============eeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [7,2] . . DeE------------------R . . addq $32, %r8 +# HASWELL-NEXT: [7,3] . . DeE------------------R . . cmpl %edi, %edx +# HASWELL-NEXT: [8,0] . . DeE------------------R . . addl $1, %edx +# HASWELL-NEXT: [8,1] . . .D=============eeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [8,2] . . .DeE--------------------R. . addq $32, %r8 +# HASWELL-NEXT: [8,3] . . .DeE--------------------R. . cmpl %edi, %edx +# HASWELL-NEXT: [9,0] . . . DeE-------------------R. . addl $1, %edx +# HASWELL-NEXT: [9,1] . . . D===============eeeeeeeeER vpaddd (%r8), %ymm0, %ymm0 +# HASWELL-NEXT: [9,2] . . . DeE----------------------R addq $32, %r8 +# HASWELL-NEXT: [9,3] . . . DeE---------------------R cmpl %edi, %edx + # SKYLAKE: [0,0] DeER . . . . . . . . . . . . . . . addl $1, %edx # SKYLAKE-NEXT: [0,1] DeeeeeeeeER . . . . . . . . . . . . . vpaddd (%r8), %ymm0, %ymm0 # SKYLAKE-NEXT: [0,2] DeE-------R . . . . . . . . . . . . . addq $32, %r8 @@ -142,88 +224,6 @@ cmp %edi, %edx # ZNVER1-NEXT: [9,2] . . DeE----------------------------------R addq $32, %r8 # ZNVER1-NEXT: [9,3] . . D=eE---------------------------------R cmpl %edi, %edx -# HASWELL: [0,0] DeER . . . . . . . . addl $1, %edx -# HASWELL-NEXT: [0,1] DeeeeeeeeER . . . . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [0,2] DeE-------R . . . . . . addq $32, %r8 -# HASWELL-NEXT: [0,3] .DeE------R . . . . . . cmpl %edi, %edx -# HASWELL-NEXT: [1,0] .DeE------R . . . . . . addl $1, %edx -# HASWELL-NEXT: [1,1] .D==eeeeeeeeER . . . . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [1,2] . DeE--------R . . . . . . addq $32, %r8 -# HASWELL-NEXT: [1,3] . DeE--------R . . . . . . cmpl %edi, %edx -# HASWELL-NEXT: [2,0] . DeE--------R . . . . . . addl $1, %edx -# HASWELL-NEXT: [2,1] . D===eeeeeeeeER . . . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [2,2] . DeE----------R . . . . . addq $32, %r8 -# HASWELL-NEXT: [2,3] . DeE----------R . . . . . cmpl %edi, %edx -# HASWELL-NEXT: [3,0] . DeE---------R . . . . . addl $1, %edx -# HASWELL-NEXT: [3,1] . D=====eeeeeeeeER. . . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [3,2] . DeE------------R. . . . . addq $32, %r8 -# HASWELL-NEXT: [3,3] . DeE-----------R. . . . . cmpl %edi, %edx -# HASWELL-NEXT: [4,0] . DeE-----------R. . . . . addl $1, %edx -# HASWELL-NEXT: [4,1] . D=======eeeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [4,2] . .DeE-------------R . . . . addq $32, %r8 -# HASWELL-NEXT: [4,3] . .DeE-------------R . . . . cmpl %edi, %edx -# HASWELL-NEXT: [5,0] . .DeE-------------R . . . . addl $1, %edx -# HASWELL-NEXT: [5,1] . . D========eeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [5,2] . . DeE---------------R . . . addq $32, %r8 -# HASWELL-NEXT: [5,3] . . DeE---------------R . . . cmpl %edi, %edx -# HASWELL-NEXT: [6,0] . . DeE--------------R . . . addl $1, %edx -# HASWELL-NEXT: [6,1] . . D==========eeeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [6,2] . . DeE-----------------R . . . addq $32, %r8 -# HASWELL-NEXT: [6,3] . . DeE----------------R . . . cmpl %edi, %edx -# HASWELL-NEXT: [7,0] . . DeE----------------R . . . addl $1, %edx -# HASWELL-NEXT: [7,1] . . D============eeeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [7,2] . . DeE------------------R . . addq $32, %r8 -# HASWELL-NEXT: [7,3] . . DeE------------------R . . cmpl %edi, %edx -# HASWELL-NEXT: [8,0] . . DeE------------------R . . addl $1, %edx -# HASWELL-NEXT: [8,1] . . .D=============eeeeeeeeER. . vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [8,2] . . .DeE--------------------R. . addq $32, %r8 -# HASWELL-NEXT: [8,3] . . .DeE--------------------R. . cmpl %edi, %edx -# HASWELL-NEXT: [9,0] . . . DeE-------------------R. . addl $1, %edx -# HASWELL-NEXT: [9,1] . . . D===============eeeeeeeeER vpaddd (%r8), %ymm0, %ymm0 -# HASWELL-NEXT: [9,2] . . . DeE----------------------R addq $32, %r8 -# HASWELL-NEXT: [9,3] . . . DeE---------------------R cmpl %edi, %edx - -# BDWELL: [0,0] DeER . . . . . . addl $1, %edx -# BDWELL-NEXT: [0,1] DeeeeeeeER. . . . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [0,2] DeE------R. . . . . addq $32, %r8 -# BDWELL-NEXT: [0,3] .DeE-----R. . . . . cmpl %edi, %edx -# BDWELL-NEXT: [1,0] .DeE-----R. . . . . addl $1, %edx -# BDWELL-NEXT: [1,1] .D=eeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [1,2] . DeE------R . . . . addq $32, %r8 -# BDWELL-NEXT: [1,3] . DeE------R . . . . cmpl %edi, %edx -# BDWELL-NEXT: [2,0] . DeE------R . . . . addl $1, %edx -# BDWELL-NEXT: [2,1] . D=eeeeeeeER . . . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [2,2] . DeE-------R . . . . addq $32, %r8 -# BDWELL-NEXT: [2,3] . DeE-------R . . . . cmpl %edi, %edx -# BDWELL-NEXT: [3,0] . DeE------R . . . . addl $1, %edx -# BDWELL-NEXT: [3,1] . D==eeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [3,2] . DeE--------R . . . addq $32, %r8 -# BDWELL-NEXT: [3,3] . DeE-------R . . . cmpl %edi, %edx -# BDWELL-NEXT: [4,0] . DeE-------R . . . addl $1, %edx -# BDWELL-NEXT: [4,1] . D===eeeeeeeER . . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [4,2] . .DeE--------R . . . addq $32, %r8 -# BDWELL-NEXT: [4,3] . .DeE--------R . . . cmpl %edi, %edx -# BDWELL-NEXT: [5,0] . .DeE--------R . . . addl $1, %edx -# BDWELL-NEXT: [5,1] . . D===eeeeeeeER. . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [5,2] . . DeE---------R. . . addq $32, %r8 -# BDWELL-NEXT: [5,3] . . DeE---------R. . . cmpl %edi, %edx -# BDWELL-NEXT: [6,0] . . DeE--------R. . . addl $1, %edx -# BDWELL-NEXT: [6,1] . . D====eeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [6,2] . . DeE----------R . . addq $32, %r8 -# BDWELL-NEXT: [6,3] . . DeE---------R . . cmpl %edi, %edx -# BDWELL-NEXT: [7,0] . . DeE---------R . . addl $1, %edx -# BDWELL-NEXT: [7,1] . . D=====eeeeeeeER . . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [7,2] . . DeE----------R . . addq $32, %r8 -# BDWELL-NEXT: [7,3] . . DeE----------R . . cmpl %edi, %edx -# BDWELL-NEXT: [8,0] . . DeE----------R . . addl $1, %edx -# BDWELL-NEXT: [8,1] . . .D=====eeeeeeeER . vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [8,2] . . .DeE-----------R . addq $32, %r8 -# BDWELL-NEXT: [8,3] . . .DeE-----------R . cmpl %edi, %edx -# BDWELL-NEXT: [9,0] . . . DeE----------R . addl $1, %edx -# BDWELL-NEXT: [9,1] . . . D======eeeeeeeER vpaddd (%r8), %ymm0, %ymm0 -# BDWELL-NEXT: [9,2] . . . DeE------------R addq $32, %r8 -# BDWELL-NEXT: [9,3] . . . DeE-----------R cmpl %edi, %edx - # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue diff --git a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s index ea06ad0bc80..e22f4a51887 100644 --- a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s +++ b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s @@ -17,9 +17,6 @@ xor %eax, %ebx # ALL-NEXT: 0, 3 (75.0%) # ALL-NEXT: 1, 1 (25.0%) -# SLM: Scheduler's queue usage: -# SLM-NEXT: No scheduler resources used. - # BDW: Scheduler's queue usage: # BDW-NEXT: [1] Resource name. # BDW-NEXT: [2] Average number of used buffer entries. @@ -62,6 +59,9 @@ xor %eax, %ebx # SKX-AVX512-NEXT: [3] Maximum number of used buffer entries. # SKX-AVX512-NEXT: [4] Total number of buffer entries. +# SLM: Scheduler's queue usage: +# SLM-NEXT: No scheduler resources used. + # SNB: Scheduler's queue usage: # SNB-NEXT: [1] Resource name. # SNB-NEXT: [2] Average number of used buffer entries. @@ -77,22 +77,19 @@ xor %eax, %ebx # BDW: [1] [2] [3] [4] # BDW-NEXT: BWPortAny 0 1 60 -# HSW: [1] [2] [3] [4] -# HSW-NEXT: HWPortAny 0 1 60 - -# KNL: [1] [2] [3] [4] -# KNL-NEXT: HWPortAny 0 1 60 - # BTVER2: [1] [2] [3] [4] # BTVER2-NEXT: JALU01 0 1 20 # BTVER2-NEXT: JFPU01 0 0 18 # BTVER2-NEXT: JLSAGU 0 0 12 +# HSW: [1] [2] [3] [4] +# HSW-NEXT: HWPortAny 0 1 60 + # IVB: [1] [2] [3] [4] # IVB-NEXT: SBPortAny 0 1 54 -# SNB: [1] [2] [3] [4] -# SNB-NEXT: SBPortAny 0 1 54 +# KNL: [1] [2] [3] [4] +# KNL-NEXT: HWPortAny 0 1 60 # SKX: [1] [2] [3] [4] # SKX-NEXT: SKLPortAny 0 1 60 @@ -100,6 +97,9 @@ xor %eax, %ebx # SKX-AVX512: [1] [2] [3] [4] # SKX-AVX512-NEXT: SKXPortAny 0 1 60 +# SNB: [1] [2] [3] [4] +# SNB-NEXT: SBPortAny 0 1 54 + # ZNVER1: [1] [2] [3] [4] # ZNVER1-NEXT: ZnAGU 0 0 28 # ZNVER1-NEXT: ZnALU 0 1 56 diff --git a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s index 1741f9961c6..66b87e72df2 100644 --- a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s +++ b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s @@ -44,20 +44,20 @@ rcpss (%rax), %xmm1 # ZNVER1-NEXT: 0123456789 0 # ZNVER1-NEXT: Index 0123456789 0123456789 -# ZNVER1: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax -# ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 - -# SKYLAKE: [0,0] DeER . . . . leaq 8(%rsp,%rdi,2), %rax -# SKYLAKE-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 - # BROADWELL: [0,0] DeER . . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 +# BTVER2: [0,0] DeeER. . . . . . leaq 8(%rsp,%rdi,2), %rax +# BTVER2-NEXT: [0,1] D==eeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 + # HASWELL: [0,0] DeER . . . . leaq 8(%rsp,%rdi,2), %rax # HASWELL-NEXT: [0,1] D=eeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 -# BTVER2: [0,0] DeeER. . . . . . leaq 8(%rsp,%rdi,2), %rax -# BTVER2-NEXT: [0,1] D==eeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 +# SKYLAKE: [0,0] DeER . . . . leaq 8(%rsp,%rdi,2), %rax +# SKYLAKE-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 + +# ZNVER1: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax +# ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtss (%rax), %xmm1 # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions @@ -93,20 +93,20 @@ rcpss (%rax), %xmm1 # ZNVER1-NEXT: 0123456789 0 # ZNVER1-NEXT: Index 0123456789 0123456789 -# ZNVER1: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax -# ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 - -# SKYLAKE: [0,0] DeER . . . . .. leaq 8(%rsp,%rdi,2), %rax -# SKYLAKE-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 - # BROADWELL: [0,0] DeER . . . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 +# BTVER2: [0,0] DeeER. . . . . . .. leaq 8(%rsp,%rdi,2), %rax +# BTVER2-NEXT: [0,1] D==eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 + # HASWELL: [0,0] DeER . . . . . leaq 8(%rsp,%rdi,2), %rax # HASWELL-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 -# BTVER2: [0,0] DeeER. . . . . . .. leaq 8(%rsp,%rdi,2), %rax -# BTVER2-NEXT: [0,1] D==eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 +# SKYLAKE: [0,0] DeER . . . . .. leaq 8(%rsp,%rdi,2), %rax +# SKYLAKE-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 + +# ZNVER1: [0,0] DeER . . . . . . leaq 8(%rsp,%rdi,2), %rax +# ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeeeeeeeeeeeeeeeeER sqrtsd (%rax), %xmm1 # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions @@ -135,20 +135,20 @@ rcpss (%rax), %xmm1 # ALL-NEXT: Index 0123456789 -# ZNVER1: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax -# ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeER rsqrtss (%rax), %xmm1 - # BROADWELL: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeER rsqrtss (%rax), %xmm1 +# BTVER2: [0,0] DeeER. .. leaq 8(%rsp,%rdi,2), %rax +# BTVER2-NEXT: [0,1] D==eeeeeeeER rsqrtss (%rax), %xmm1 + # HASWELL: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # HASWELL-NEXT: [0,1] D=eeeeeeeeeeER rsqrtss (%rax), %xmm1 # SKYLAKE: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # SKYLAKE-NEXT: [0,1] D=eeeeeeeeeER rsqrtss (%rax), %xmm1 -# BTVER2: [0,0] DeeER. .. leaq 8(%rsp,%rdi,2), %rax -# BTVER2-NEXT: [0,1] D==eeeeeeeER rsqrtss (%rax), %xmm1 +# ZNVER1: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax +# ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeER rsqrtss (%rax), %xmm1 # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions @@ -177,20 +177,20 @@ rcpss (%rax), %xmm1 # ALL-NEXT: Index 0123456789 -# ZNVER1: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax -# ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeER rcpss (%rax), %xmm1 - # BROADWELL: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # BROADWELL-NEXT: [0,1] D=eeeeeeeeeeER rcpss (%rax), %xmm1 +# BTVER2: [0,0] DeeER. .. leaq 8(%rsp,%rdi,2), %rax +# BTVER2-NEXT: [0,1] D==eeeeeeeER rcpss (%rax), %xmm1 + # HASWELL: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # HASWELL-NEXT: [0,1] D=eeeeeeeeeeER rcpss (%rax), %xmm1 # SKYLAKE: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax # SKYLAKE-NEXT: [0,1] D=eeeeeeeeeER rcpss (%rax), %xmm1 -# BTVER2: [0,0] DeeER. .. leaq 8(%rsp,%rdi,2), %rax -# BTVER2-NEXT: [0,1] D==eeeeeeeER rcpss (%rax), %xmm1 +# ZNVER1: [0,0] DeER . . . leaq 8(%rsp,%rdi,2), %rax +# ZNVER1-NEXT: [0,1] D=eeeeeeeeeeeeER rcpss (%rax), %xmm1 # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s index a4d9d1b42e7..3a2f4d260f2 100644 --- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s +++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s @@ -40,15 +40,20 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ZNVER1-NEXT: Total Cycles: 11 # ZNVER1-NEXT: Total uOps: 2 +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.40 +# BDWELL-NEXT: IPC: 0.20 +# BDWELL-NEXT: Block RThroughput: 2.0 + # BTVER2: Dispatch Width: 2 # BTVER2-NEXT: uOps Per Cycle: 0.36 # BTVER2-NEXT: IPC: 0.18 # BTVER2-NEXT: Block RThroughput: 2.0 -# ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.18 -# ZNVER1-NEXT: IPC: 0.18 -# ZNVER1-NEXT: Block RThroughput: 1.0 +# HASWELL: Dispatch Width: 4 +# HASWELL-NEXT: uOps Per Cycle: 0.36 +# HASWELL-NEXT: IPC: 0.18 +# HASWELL-NEXT: Block RThroughput: 2.0 # IVY: Dispatch Width: 4 # IVY-NEXT: uOps Per Cycle: 0.36 @@ -60,21 +65,19 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # SANDY-NEXT: IPC: 0.18 # SANDY-NEXT: Block RThroughput: 1.0 -# HASWELL: Dispatch Width: 4 -# HASWELL-NEXT: uOps Per Cycle: 0.36 -# HASWELL-NEXT: IPC: 0.18 -# HASWELL-NEXT: Block RThroughput: 2.0 - -# BDWELL: Dispatch Width: 4 -# BDWELL-NEXT: uOps Per Cycle: 0.40 -# BDWELL-NEXT: IPC: 0.20 -# BDWELL-NEXT: Block RThroughput: 2.0 - # SKYLAKE: Dispatch Width: 6 # SKYLAKE-NEXT: uOps Per Cycle: 0.36 # SKYLAKE-NEXT: IPC: 0.18 # SKYLAKE-NEXT: Block RThroughput: 0.7 +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.18 +# ZNVER1-NEXT: IPC: 0.18 +# ZNVER1-NEXT: Block RThroughput: 1.0 + +# BDWELL: Timeline view: +# BDWELL-NEXT: Index 0123456789 + # BTVER2: Timeline view: # BTVER2-NEXT: 0 # BTVER2-NEXT: Index 0123456789 @@ -99,8 +102,8 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ZNVER1-NEXT: 0 # ZNVER1-NEXT: Index 0123456789 -# BDWELL: Timeline view: -# BDWELL-NEXT: Index 0123456789 +# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 +# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 # BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 @@ -114,15 +117,12 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # SANDY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 # SANDY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 -# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - -# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 -# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - # SKYLAKE: [0,0] DeeeeER . vaddps %xmm0, %xmm0, %xmm1 # SKYLAKE-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm1 +# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s index dd4dd773958..fd581e0debf 100644 --- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s +++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s @@ -40,15 +40,20 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ZNVER1-NEXT: Total Cycles: 11 # ZNVER1-NEXT: Total uOps: 2 +# BDWELL: Dispatch Width: 4 +# BDWELL-NEXT: uOps Per Cycle: 0.40 +# BDWELL-NEXT: IPC: 0.20 +# BDWELL-NEXT: Block RThroughput: 2.0 + # BTVER2: Dispatch Width: 2 # BTVER2-NEXT: uOps Per Cycle: 0.36 # BTVER2-NEXT: IPC: 0.18 # BTVER2-NEXT: Block RThroughput: 2.0 -# ZNVER1: Dispatch Width: 4 -# ZNVER1-NEXT: uOps Per Cycle: 0.18 -# ZNVER1-NEXT: IPC: 0.18 -# ZNVER1-NEXT: Block RThroughput: 1.0 +# HASWELL: Dispatch Width: 4 +# HASWELL-NEXT: uOps Per Cycle: 0.36 +# HASWELL-NEXT: IPC: 0.18 +# HASWELL-NEXT: Block RThroughput: 2.0 # IVY: Dispatch Width: 4 # IVY-NEXT: uOps Per Cycle: 0.36 @@ -60,21 +65,19 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # SANDY-NEXT: IPC: 0.18 # SANDY-NEXT: Block RThroughput: 1.0 -# HASWELL: Dispatch Width: 4 -# HASWELL-NEXT: uOps Per Cycle: 0.36 -# HASWELL-NEXT: IPC: 0.18 -# HASWELL-NEXT: Block RThroughput: 2.0 - -# BDWELL: Dispatch Width: 4 -# BDWELL-NEXT: uOps Per Cycle: 0.40 -# BDWELL-NEXT: IPC: 0.20 -# BDWELL-NEXT: Block RThroughput: 2.0 - # SKYLAKE: Dispatch Width: 6 # SKYLAKE-NEXT: uOps Per Cycle: 0.36 # SKYLAKE-NEXT: IPC: 0.18 # SKYLAKE-NEXT: Block RThroughput: 0.7 +# ZNVER1: Dispatch Width: 4 +# ZNVER1-NEXT: uOps Per Cycle: 0.18 +# ZNVER1-NEXT: IPC: 0.18 +# ZNVER1-NEXT: Block RThroughput: 1.0 + +# BDWELL: Timeline view: +# BDWELL-NEXT: Index 0123456789 + # BTVER2: Timeline view: # BTVER2-NEXT: 0 # BTVER2-NEXT: Index 0123456789 @@ -99,8 +102,8 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # ZNVER1-NEXT: 0 # ZNVER1-NEXT: Index 0123456789 -# BDWELL: Timeline view: -# BDWELL-NEXT: Index 0123456789 +# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 +# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # BTVER2: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 # BTVER2-NEXT: [0,1] .DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 @@ -114,15 +117,12 @@ vblendvps %xmm1, (%rdi), %xmm2, %xmm3 # SANDY: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 # SANDY-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 -# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 -# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - -# BDWELL: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 -# BDWELL-NEXT: [0,1] DeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 - # SKYLAKE: [0,0] DeeeeER . vaddps %xmm0, %xmm0, %xmm2 # SKYLAKE-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 +# ZNVER1: [0,0] DeeeER . vaddps %xmm0, %xmm0, %xmm2 +# ZNVER1-NEXT: [0,1] DeeeeeeeeER vblendvps %xmm1, (%rdi), %xmm2, %xmm3 + # ALL: Average Wait times (based on the timeline view): # ALL-NEXT: [0]: Executions # ALL-NEXT: [1]: Average time spent waiting in a scheduler's queue diff --git a/llvm/utils/update_mca_test_checks.py b/llvm/utils/update_mca_test_checks.py index c1f89e006fa..a83186cd336 100755 --- a/llvm/utils/update_mca_test_checks.py +++ b/llvm/utils/update_mca_test_checks.py @@ -502,18 +502,29 @@ def _write_output(test_path, input_lines, prefix_list, block_infos, # noqa else: # _break_down_block() was unable to do do anything so output the block # as-is. + + # Rather than writing out each block as soon we encounter it, save it + # indexed by prefix so that we can write all of the blocks out sorted by + # prefix at the end. + output_blocks = defaultdict(list) + for block_text in sorted(block_infos[block_num]): + if not block_text: continue lines = block_text.split('\n') for prefix in block_infos[block_num][block_text]: - used_prefixes |= _write_block(output_check_lines, + assert prefix not in output_blocks + used_prefixes |= _write_block(output_blocks[prefix], [(prefix, line) for line in lines], not_prefix_set, common_prefix, prefix_pad) + for prefix in sorted(output_blocks): + output_check_lines.extend(output_blocks[prefix]) + unused_prefixes = (prefix_set - not_prefix_set) - used_prefixes if unused_prefixes: raise Error('unused prefixes: {}'.format(sorted(unused_prefixes))) |