aboutsummaryrefslogtreecommitdiff
path: root/gcc/testsuite/gcc.target/riscv/rvv/base/pr112431-13.c
blob: fde7076d34f7d1e4226620d8cd1d853931413db1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
/* { dg-do compile } */
/* { dg-options "-march=rv64gcv -mabi=lp64d -O3" } */

#include "riscv_vector.h"

double __attribute__ ((noinline))
sumation (double sum0, double sum1, double sum2, double sum3, double sum4,
	  double sum5, double sum6, double sum7, double sum8, double sum9,
	  double sum10, double sum11, double sum12, double sum13, double sum14,
	  double sum15)
{
  return sum0 + sum1 + sum2 + sum3 + sum4 + sum5 + sum6 + sum7 + sum8 + sum9
	 + sum10 + sum11 + sum12 + sum13 + sum14 + sum15;
}

double
foo (char const *buf, size_t len)
{
  double sum = 0;
  size_t vl = __riscv_vsetvlmax_e8m8 ();
  size_t step = vl * 4;
  const char *it = buf, *end = buf + len;
  for (; it + step <= end;)
    {
      vfloat32m1_t v0 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v1 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v2 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v3 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v4 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v5 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v6 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v7 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v8 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v9 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v10 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v11 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v12 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v13 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v14 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v15 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      
      asm volatile("nop" ::: "memory");
      vint64m2_t vw0 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v0, vl);
      vint64m2_t vw1 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v1, vl);
      vint64m2_t vw2 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v2, vl);
      vint64m2_t vw3 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v3, vl);
      vint64m2_t vw4 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v4, vl);
      vint64m2_t vw5 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v5, vl);
      vint64m2_t vw6 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v6, vl);
      vint64m2_t vw7 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v7, vl);
      vint64m2_t vw8 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v8, vl);
      vint64m2_t vw9 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v9, vl);
      vint64m2_t vw10 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v10, vl);
      vint64m2_t vw11 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v11, vl);
      vint64m2_t vw12 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v12, vl);
      vint64m2_t vw13 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v13, vl);
      vint64m2_t vw14 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v14, vl);
      vint64m2_t vw15 = __riscv_vfwcvt_rtz_x_f_v_i64m2 (v15, vl);

      asm volatile("nop" ::: "memory");
      double sum0 = __riscv_vmv_x_s_i64m2_i64 (vw0);
      double sum1 = __riscv_vmv_x_s_i64m2_i64 (vw1);
      double sum2 = __riscv_vmv_x_s_i64m2_i64 (vw2);
      double sum3 = __riscv_vmv_x_s_i64m2_i64 (vw3);
      double sum4 = __riscv_vmv_x_s_i64m2_i64 (vw4);
      double sum5 = __riscv_vmv_x_s_i64m2_i64 (vw5);
      double sum6 = __riscv_vmv_x_s_i64m2_i64 (vw6);
      double sum7 = __riscv_vmv_x_s_i64m2_i64 (vw7);
      double sum8 = __riscv_vmv_x_s_i64m2_i64 (vw8);
      double sum9 = __riscv_vmv_x_s_i64m2_i64 (vw9);
      double sum10 = __riscv_vmv_x_s_i64m2_i64 (vw10);
      double sum11 = __riscv_vmv_x_s_i64m2_i64 (vw11);
      double sum12 = __riscv_vmv_x_s_i64m2_i64 (vw12);
      double sum13 = __riscv_vmv_x_s_i64m2_i64 (vw13);
      double sum14 = __riscv_vmv_x_s_i64m2_i64 (vw14);
      double sum15 = __riscv_vmv_x_s_i64m2_i64 (vw15);

      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8,
		       sum9, sum10, sum11, sum12, sum13, sum14, sum15);
    }
  return sum;
}

double
foo2 (char const *buf, size_t len)
{
  double sum = 0;
  size_t vl = __riscv_vsetvlmax_e8m8 ();
  size_t step = vl * 4;
  const char *it = buf, *end = buf + len;
  for (; it + step <= end;)
    {
      vfloat32m1_t v0 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v1 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v2 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v3 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v4 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v5 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v6 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v7 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v8 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v9 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v10 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v11 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v12 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v13 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v14 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      vfloat32m1_t v15 = __riscv_vle32_v_f32m1 ((void *) it, vl);
      it += vl;
      
      asm volatile("nop" ::: "memory");
      vint64m2_t vw0 = __riscv_vfwcvt_x_f_v_i64m2 (v0, vl);
      vint64m2_t vw1 = __riscv_vfwcvt_x_f_v_i64m2 (v1, vl);
      vint64m2_t vw2 = __riscv_vfwcvt_x_f_v_i64m2 (v2, vl);
      vint64m2_t vw3 = __riscv_vfwcvt_x_f_v_i64m2 (v3, vl);
      vint64m2_t vw4 = __riscv_vfwcvt_x_f_v_i64m2 (v4, vl);
      vint64m2_t vw5 = __riscv_vfwcvt_x_f_v_i64m2 (v5, vl);
      vint64m2_t vw6 = __riscv_vfwcvt_x_f_v_i64m2 (v6, vl);
      vint64m2_t vw7 = __riscv_vfwcvt_x_f_v_i64m2 (v7, vl);
      vint64m2_t vw8 = __riscv_vfwcvt_x_f_v_i64m2 (v8, vl);
      vint64m2_t vw9 = __riscv_vfwcvt_x_f_v_i64m2 (v9, vl);
      vint64m2_t vw10 = __riscv_vfwcvt_x_f_v_i64m2 (v10, vl);
      vint64m2_t vw11 = __riscv_vfwcvt_x_f_v_i64m2 (v11, vl);
      vint64m2_t vw12 = __riscv_vfwcvt_x_f_v_i64m2 (v12, vl);
      vint64m2_t vw13 = __riscv_vfwcvt_x_f_v_i64m2 (v13, vl);
      vint64m2_t vw14 = __riscv_vfwcvt_x_f_v_i64m2 (v14, vl);
      vint64m2_t vw15 = __riscv_vfwcvt_x_f_v_i64m2 (v15, vl);

      asm volatile("nop" ::: "memory");
      double sum0 = __riscv_vmv_x_s_i64m2_i64 (vw0);
      double sum1 = __riscv_vmv_x_s_i64m2_i64 (vw1);
      double sum2 = __riscv_vmv_x_s_i64m2_i64 (vw2);
      double sum3 = __riscv_vmv_x_s_i64m2_i64 (vw3);
      double sum4 = __riscv_vmv_x_s_i64m2_i64 (vw4);
      double sum5 = __riscv_vmv_x_s_i64m2_i64 (vw5);
      double sum6 = __riscv_vmv_x_s_i64m2_i64 (vw6);
      double sum7 = __riscv_vmv_x_s_i64m2_i64 (vw7);
      double sum8 = __riscv_vmv_x_s_i64m2_i64 (vw8);
      double sum9 = __riscv_vmv_x_s_i64m2_i64 (vw9);
      double sum10 = __riscv_vmv_x_s_i64m2_i64 (vw10);
      double sum11 = __riscv_vmv_x_s_i64m2_i64 (vw11);
      double sum12 = __riscv_vmv_x_s_i64m2_i64 (vw12);
      double sum13 = __riscv_vmv_x_s_i64m2_i64 (vw13);
      double sum14 = __riscv_vmv_x_s_i64m2_i64 (vw14);
      double sum15 = __riscv_vmv_x_s_i64m2_i64 (vw15);

      sum += sumation (sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum8,
		       sum9, sum10, sum11, sum12, sum13, sum14, sum15);
    }
  return sum;
}

/* { dg-final { scan-assembler-not {vmv1r} } } */
/* { dg-final { scan-assembler-not {vmv2r} } } */
/* { dg-final { scan-assembler-not {vmv4r} } } */
/* { dg-final { scan-assembler-not {vmv8r} } } */
/* { dg-final { scan-assembler-not {csrr} } } */