blob: 37b8ab4b5fecd842e882584c6d091bfe94cd6c8e (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
|
#define workUnits 128
__kernel void x1_search_kernel(int totalRows,
__global int *data1,
__global int *data2,
__global int *resultMask){
int4 v1, v2, r;
int i = get_global_id(0);
size_t offset = i * (totalRows/workUnits);
size_t endRow = (totalRows/workUnits) % 4;
size_t total = (totalRows/workUnits) / 4;
do {
v1 = vload4(0, data1+offset);
v2 = vload4(0, data2+offset);
r = (v1 > -60) && (v2 < 5);
vstore4(r, 0, resultMask+offset);
offset+=4;
total--;
} while(total);
if (endRow) {
do {
resultMask[offset] = (data1[offset] > -60) && (data2[offset] < 5);
offset ++;
endRow--;
} while(endRow);
}
}
|