aboutsummaryrefslogtreecommitdiff
path: root/libgfortran
diff options
context:
space:
mode:
authorHarald Anlauf <anlauf@gmx.de>2020-10-18 20:15:26 +0200
committerHarald Anlauf <anlauf@gmx.de>2020-10-19 23:07:16 +0200
commit53325dec8e01775af3eb9231d10f2afa1b8d5559 (patch)
treec0411bd32b0826566324b3cf3b9802b9d7bba788 /libgfortran
parent634ee90804cae1e08c8e25913288e0ecf0a5ea0a (diff)
PR libfortran/97063 - Wrong result for vector (step size is negative) * matrix
The MATMUL intrinsic provided a wrong result for rank-1 times rank-2 array when a negative stride was used for addressing the elements of the rank-1 array, because a check on strides was erroneously placed before the check on the rank. Interchange order of checks. libgfortran/ChangeLog: * m4/matmul_internal.m4: Move check for rank-1 times rank-2 before checks on strides for rank-2 times rank-2. * generated/matmul_c10.c: Regenerated. * generated/matmul_c16.c: Likewise. * generated/matmul_c4.c: Likewise. * generated/matmul_c8.c: Likewise. * generated/matmul_i1.c: Likewise. * generated/matmul_i16.c: Likewise. * generated/matmul_i2.c: Likewise. * generated/matmul_i4.c: Likewise. * generated/matmul_i8.c: Likewise. * generated/matmul_r10.c: Likewise. * generated/matmul_r16.c: Likewise. * generated/matmul_r4.c: Likewise. * generated/matmul_r8.c: Likewise. * generated/matmulavx128_c10.c: Likewise. * generated/matmulavx128_c16.c: Likewise. * generated/matmulavx128_c4.c: Likewise. * generated/matmulavx128_c8.c: Likewise. * generated/matmulavx128_i1.c: Likewise. * generated/matmulavx128_i16.c: Likewise. * generated/matmulavx128_i2.c: Likewise. * generated/matmulavx128_i4.c: Likewise. * generated/matmulavx128_i8.c: Likewise. * generated/matmulavx128_r10.c: Likewise. * generated/matmulavx128_r16.c: Likewise. * generated/matmulavx128_r4.c: Likewise. * generated/matmulavx128_r8.c: Likewise. gcc/testsuite/ChangeLog: * gfortran.dg/matmul_20.f90: New test. (cherry picked from commit cd6cd6aed195b4ec7d652e8b41d60b60e174304e)
Diffstat (limited to 'libgfortran')
-rw-r--r--libgfortran/generated/matmul_c10.c140
-rw-r--r--libgfortran/generated/matmul_c16.c140
-rw-r--r--libgfortran/generated/matmul_c4.c140
-rw-r--r--libgfortran/generated/matmul_c8.c140
-rw-r--r--libgfortran/generated/matmul_i1.c140
-rw-r--r--libgfortran/generated/matmul_i16.c140
-rw-r--r--libgfortran/generated/matmul_i2.c140
-rw-r--r--libgfortran/generated/matmul_i4.c140
-rw-r--r--libgfortran/generated/matmul_i8.c140
-rw-r--r--libgfortran/generated/matmul_r10.c140
-rw-r--r--libgfortran/generated/matmul_r16.c140
-rw-r--r--libgfortran/generated/matmul_r4.c140
-rw-r--r--libgfortran/generated/matmul_r8.c140
-rw-r--r--libgfortran/generated/matmulavx128_c10.c56
-rw-r--r--libgfortran/generated/matmulavx128_c16.c56
-rw-r--r--libgfortran/generated/matmulavx128_c4.c56
-rw-r--r--libgfortran/generated/matmulavx128_c8.c56
-rw-r--r--libgfortran/generated/matmulavx128_i1.c56
-rw-r--r--libgfortran/generated/matmulavx128_i16.c56
-rw-r--r--libgfortran/generated/matmulavx128_i2.c56
-rw-r--r--libgfortran/generated/matmulavx128_i4.c56
-rw-r--r--libgfortran/generated/matmulavx128_i8.c56
-rw-r--r--libgfortran/generated/matmulavx128_r10.c56
-rw-r--r--libgfortran/generated/matmulavx128_r16.c56
-rw-r--r--libgfortran/generated/matmulavx128_r4.c56
-rw-r--r--libgfortran/generated/matmulavx128_r8.c56
-rw-r--r--libgfortran/m4/matmul_internal.m428
27 files changed, 1288 insertions, 1288 deletions
diff --git a/libgfortran/generated/matmul_c10.c b/libgfortran/generated/matmul_c10.c
index e866a6a4df1..5a97ef5783c 100644
--- a/libgfortran/generated/matmul_c10.c
+++ b/libgfortran/generated/matmul_c10.c
@@ -590,20 +590,6 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_10 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_c10_avx (gfc_array_c10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_10 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_10 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_c10_avx2 (gfc_array_c10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_10 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_10 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_c10_avx512f (gfc_array_c10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_10 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_10 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_c10_vanilla (gfc_array_c10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_10 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_10 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_c10 (gfc_array_c10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_10 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_c16.c b/libgfortran/generated/matmul_c16.c
index e6605e89282..6dfef172498 100644
--- a/libgfortran/generated/matmul_c16.c
+++ b/libgfortran/generated/matmul_c16.c
@@ -590,20 +590,6 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_16 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_c16_avx (gfc_array_c16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_16 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_16 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_c16_avx2 (gfc_array_c16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_16 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_16 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_c16_avx512f (gfc_array_c16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_16 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_16 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_c16_vanilla (gfc_array_c16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_16 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_16 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_c16 (gfc_array_c16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_16 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_c4.c b/libgfortran/generated/matmul_c4.c
index e012fa200fd..08f7874e42f 100644
--- a/libgfortran/generated/matmul_c4.c
+++ b/libgfortran/generated/matmul_c4.c
@@ -590,20 +590,6 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_4 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_c4_avx (gfc_array_c4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_4 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_4 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_c4_avx2 (gfc_array_c4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_4 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_4 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_c4_avx512f (gfc_array_c4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_4 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_4 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_c4_vanilla (gfc_array_c4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_4 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_4 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_c4 (gfc_array_c4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_4 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_c8.c b/libgfortran/generated/matmul_c8.c
index 8c19b49deae..4529a0fbc76 100644
--- a/libgfortran/generated/matmul_c8.c
+++ b/libgfortran/generated/matmul_c8.c
@@ -590,20 +590,6 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_8 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_c8_avx (gfc_array_c8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_8 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_8 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_c8_avx2 (gfc_array_c8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_8 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_8 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_c8_avx512f (gfc_array_c8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_8 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_8 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_c8_vanilla (gfc_array_c8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_8 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_8 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_c8 (gfc_array_c8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_8 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_i1.c b/libgfortran/generated/matmul_i1.c
index 8ae4194366e..9e0b80d2af3 100644
--- a/libgfortran/generated/matmul_i1.c
+++ b/libgfortran/generated/matmul_i1.c
@@ -590,20 +590,6 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_1 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_i1_avx (gfc_array_i1 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_1 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_1 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_i1_avx2 (gfc_array_i1 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_1 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_1 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_i1_avx512f (gfc_array_i1 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_1 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_1 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_i1_vanilla (gfc_array_i1 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_1 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_1 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_i1 (gfc_array_i1 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_1 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_i16.c b/libgfortran/generated/matmul_i16.c
index cfbf9206c18..7e4e3058531 100644
--- a/libgfortran/generated/matmul_i16.c
+++ b/libgfortran/generated/matmul_i16.c
@@ -590,20 +590,6 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_16 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_i16_avx (gfc_array_i16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_16 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_16 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_i16_avx2 (gfc_array_i16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_16 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_16 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_i16_avx512f (gfc_array_i16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_16 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_16 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_i16_vanilla (gfc_array_i16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_16 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_16 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_i16 (gfc_array_i16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_16 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_i2.c b/libgfortran/generated/matmul_i2.c
index 5a4aeed78e3..cf2eb3a1de3 100644
--- a/libgfortran/generated/matmul_i2.c
+++ b/libgfortran/generated/matmul_i2.c
@@ -590,20 +590,6 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_2 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_i2_avx (gfc_array_i2 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_2 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_2 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_i2_avx2 (gfc_array_i2 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_2 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_2 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_i2_avx512f (gfc_array_i2 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_2 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_2 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_i2_vanilla (gfc_array_i2 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_2 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_2 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_i2 (gfc_array_i2 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_2 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_i4.c b/libgfortran/generated/matmul_i4.c
index 80592a04d14..7014ee74424 100644
--- a/libgfortran/generated/matmul_i4.c
+++ b/libgfortran/generated/matmul_i4.c
@@ -590,20 +590,6 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_i4_avx (gfc_array_i4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_i4_avx2 (gfc_array_i4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_i4_avx512f (gfc_array_i4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_i4_vanilla (gfc_array_i4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_i4 (gfc_array_i4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_i8.c b/libgfortran/generated/matmul_i8.c
index 7e4c5bcc1bb..ee456d0b00f 100644
--- a/libgfortran/generated/matmul_i8.c
+++ b/libgfortran/generated/matmul_i8.c
@@ -590,20 +590,6 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_8 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_i8_avx (gfc_array_i8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_8 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_8 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_i8_avx2 (gfc_array_i8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_8 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_8 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_i8_avx512f (gfc_array_i8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_8 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_8 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_i8_vanilla (gfc_array_i8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_8 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_8 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_i8 (gfc_array_i8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_8 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_r10.c b/libgfortran/generated/matmul_r10.c
index d97aa41315e..a0be38e8894 100644
--- a/libgfortran/generated/matmul_r10.c
+++ b/libgfortran/generated/matmul_r10.c
@@ -590,20 +590,6 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_10 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_r10_avx (gfc_array_r10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_10 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_10 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_r10_avx2 (gfc_array_r10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_10 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_10 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_r10_avx512f (gfc_array_r10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_10 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_10 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_r10_vanilla (gfc_array_r10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_10 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_10 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_r10 (gfc_array_r10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_10 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_r16.c b/libgfortran/generated/matmul_r16.c
index 82e8b502ba3..6d050eef7e0 100644
--- a/libgfortran/generated/matmul_r16.c
+++ b/libgfortran/generated/matmul_r16.c
@@ -590,20 +590,6 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_16 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_r16_avx (gfc_array_r16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_16 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_16 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_r16_avx2 (gfc_array_r16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_16 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_16 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_r16_avx512f (gfc_array_r16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_16 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_16 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_r16_vanilla (gfc_array_r16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_16 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_16 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_r16 (gfc_array_r16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_16 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_r4.c b/libgfortran/generated/matmul_r4.c
index 36ce7daf781..ed39fb3c3c5 100644
--- a/libgfortran/generated/matmul_r4.c
+++ b/libgfortran/generated/matmul_r4.c
@@ -590,20 +590,6 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_4 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_r4_avx (gfc_array_r4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_4 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_4 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_r4_avx2 (gfc_array_r4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_4 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_4 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_r4_avx512f (gfc_array_r4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_4 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_4 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_r4_vanilla (gfc_array_r4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_4 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_4 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_r4 (gfc_array_r4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_4 *restrict abase_x;
diff --git a/libgfortran/generated/matmul_r8.c b/libgfortran/generated/matmul_r8.c
index 9a81df189d5..53fba2faefa 100644
--- a/libgfortran/generated/matmul_r8.c
+++ b/libgfortran/generated/matmul_r8.c
@@ -590,20 +590,6 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_8 *restrict bbase_y;
@@ -618,6 +604,20 @@ matmul_r8_avx (gfc_array_r8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_8 *restrict abase_x;
@@ -1158,20 +1158,6 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_8 *restrict bbase_y;
@@ -1186,6 +1172,20 @@ matmul_r8_avx2 (gfc_array_r8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_8 *restrict abase_x;
@@ -1726,20 +1726,6 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_8 *restrict bbase_y;
@@ -1754,6 +1740,20 @@ matmul_r8_avx512f (gfc_array_r8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_8 *restrict abase_x;
@@ -2308,20 +2308,6 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_8 *restrict bbase_y;
@@ -2336,6 +2322,20 @@ matmul_r8_vanilla (gfc_array_r8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_8 *restrict abase_x;
@@ -2950,20 +2950,6 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_8 *restrict bbase_y;
@@ -2978,6 +2964,20 @@ matmul_r8 (gfc_array_r8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_8 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_c10.c b/libgfortran/generated/matmulavx128_c10.c
index b5ffd030d4a..d0b417c39fd 100644
--- a/libgfortran/generated/matmulavx128_c10.c
+++ b/libgfortran/generated/matmulavx128_c10.c
@@ -555,20 +555,6 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_10 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_c10_avx128_fma3 (gfc_array_c10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_10 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_10 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_c10_avx128_fma4 (gfc_array_c10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_10 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_c16.c b/libgfortran/generated/matmulavx128_c16.c
index 32a355e424d..0137ba550e4 100644
--- a/libgfortran/generated/matmulavx128_c16.c
+++ b/libgfortran/generated/matmulavx128_c16.c
@@ -555,20 +555,6 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_16 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_c16_avx128_fma3 (gfc_array_c16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_16 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_16 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_c16_avx128_fma4 (gfc_array_c16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_16 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_c4.c b/libgfortran/generated/matmulavx128_c4.c
index 97b53d3300f..850bd2ba1db 100644
--- a/libgfortran/generated/matmulavx128_c4.c
+++ b/libgfortran/generated/matmulavx128_c4.c
@@ -555,20 +555,6 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_4 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_c4_avx128_fma3 (gfc_array_c4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_4 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_4 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_c4_avx128_fma4 (gfc_array_c4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_4 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_c8.c b/libgfortran/generated/matmulavx128_c8.c
index e73575e3b63..49d8b446ad9 100644
--- a/libgfortran/generated/matmulavx128_c8.c
+++ b/libgfortran/generated/matmulavx128_c8.c
@@ -555,20 +555,6 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_8 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_c8_avx128_fma3 (gfc_array_c8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_8 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_COMPLEX_8 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_c8_avx128_fma4 (gfc_array_c8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_COMPLEX_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_COMPLEX_8 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_i1.c b/libgfortran/generated/matmulavx128_i1.c
index 00885fa3139..8fc6d921b00 100644
--- a/libgfortran/generated/matmulavx128_i1.c
+++ b/libgfortran/generated/matmulavx128_i1.c
@@ -555,20 +555,6 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_1 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_i1_avx128_fma3 (gfc_array_i1 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_1 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_1 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_i1_avx128_fma4 (gfc_array_i1 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_1)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_1 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_i16.c b/libgfortran/generated/matmulavx128_i16.c
index 942dc08fdb5..a3495570d52 100644
--- a/libgfortran/generated/matmulavx128_i16.c
+++ b/libgfortran/generated/matmulavx128_i16.c
@@ -555,20 +555,6 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_16 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_i16_avx128_fma3 (gfc_array_i16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_16 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_16 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_i16_avx128_fma4 (gfc_array_i16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_16 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_i2.c b/libgfortran/generated/matmulavx128_i2.c
index baa3c9fba2e..944eaf08cd1 100644
--- a/libgfortran/generated/matmulavx128_i2.c
+++ b/libgfortran/generated/matmulavx128_i2.c
@@ -555,20 +555,6 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_2 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_i2_avx128_fma3 (gfc_array_i2 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_2 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_2 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_i2_avx128_fma4 (gfc_array_i2 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_2)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_2 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_i4.c b/libgfortran/generated/matmulavx128_i4.c
index 0c69623ba72..a8e270dd97c 100644
--- a/libgfortran/generated/matmulavx128_i4.c
+++ b/libgfortran/generated/matmulavx128_i4.c
@@ -555,20 +555,6 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_i4_avx128_fma3 (gfc_array_i4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_4 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_i4_avx128_fma4 (gfc_array_i4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_4 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_i8.c b/libgfortran/generated/matmulavx128_i8.c
index f8670020caa..9c7f4925687 100644
--- a/libgfortran/generated/matmulavx128_i8.c
+++ b/libgfortran/generated/matmulavx128_i8.c
@@ -555,20 +555,6 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_8 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_i8_avx128_fma3 (gfc_array_i8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_8 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_INTEGER_8 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_i8_avx128_fma4 (gfc_array_i8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_INTEGER_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_INTEGER_8 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_r10.c b/libgfortran/generated/matmulavx128_r10.c
index 24fb2972d1d..e2a44cf7e0d 100644
--- a/libgfortran/generated/matmulavx128_r10.c
+++ b/libgfortran/generated/matmulavx128_r10.c
@@ -555,20 +555,6 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_10 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_r10_avx128_fma3 (gfc_array_r10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_10 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_10 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_r10_avx128_fma4 (gfc_array_r10 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_10)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_10 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_r16.c b/libgfortran/generated/matmulavx128_r16.c
index 231d04db0ad..186b226ebc7 100644
--- a/libgfortran/generated/matmulavx128_r16.c
+++ b/libgfortran/generated/matmulavx128_r16.c
@@ -555,20 +555,6 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_16 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_r16_avx128_fma3 (gfc_array_r16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_16 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_16 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_r16_avx128_fma4 (gfc_array_r16 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_16)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_16 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_r4.c b/libgfortran/generated/matmulavx128_r4.c
index c58228017bf..e21ea39f124 100644
--- a/libgfortran/generated/matmulavx128_r4.c
+++ b/libgfortran/generated/matmulavx128_r4.c
@@ -555,20 +555,6 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_4 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_r4_avx128_fma3 (gfc_array_r4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_4 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_4 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_r4_avx128_fma4 (gfc_array_r4 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_4)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_4 *restrict abase_x;
diff --git a/libgfortran/generated/matmulavx128_r8.c b/libgfortran/generated/matmulavx128_r8.c
index e93aeec8910..e7efd075889 100644
--- a/libgfortran/generated/matmulavx128_r8.c
+++ b/libgfortran/generated/matmulavx128_r8.c
@@ -555,20 +555,6 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_8 *restrict bbase_y;
@@ -583,6 +569,20 @@ matmul_r8_avx128_fma3 (gfc_array_r8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_8 *restrict abase_x;
@@ -1124,20 +1124,6 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray,
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const GFC_REAL_8 *restrict bbase_y;
@@ -1152,6 +1138,20 @@ matmul_r8_avx128_fma4 (gfc_array_r8 * const restrict retarray,
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = (GFC_REAL_8)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const GFC_REAL_8 *restrict abase_x;
diff --git a/libgfortran/m4/matmul_internal.m4 b/libgfortran/m4/matmul_internal.m4
index 32a1e01e12f..13fd7696238 100644
--- a/libgfortran/m4/matmul_internal.m4
+++ b/libgfortran/m4/matmul_internal.m4
@@ -506,20 +506,6 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
}
}
}
- else if (axstride < aystride)
- {
- for (y = 0; y < ycount; y++)
- for (x = 0; x < xcount; x++)
- dest[x*rxstride + y*rystride] = ('rtype_name`)0;
-
- for (y = 0; y < ycount; y++)
- for (n = 0; n < count; n++)
- for (x = 0; x < xcount; x++)
- /* dest[x,y] += a[x,n] * b[n,y] */
- dest[x*rxstride + y*rystride] +=
- abase[x*axstride + n*aystride] *
- bbase[n*bxstride + y*bystride];
- }
else if (GFC_DESCRIPTOR_RANK (a) == 1)
{
const 'rtype_name` *restrict bbase_y;
@@ -534,6 +520,20 @@ sinclude(`matmul_asm_'rtype_code`.m4')dnl
dest[y*rxstride] = s;
}
}
+ else if (axstride < aystride)
+ {
+ for (y = 0; y < ycount; y++)
+ for (x = 0; x < xcount; x++)
+ dest[x*rxstride + y*rystride] = ('rtype_name`)0;
+
+ for (y = 0; y < ycount; y++)
+ for (n = 0; n < count; n++)
+ for (x = 0; x < xcount; x++)
+ /* dest[x,y] += a[x,n] * b[n,y] */
+ dest[x*rxstride + y*rystride] +=
+ abase[x*axstride + n*aystride] *
+ bbase[n*bxstride + y*bystride];
+ }
else
{
const 'rtype_name` *restrict abase_x;