You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

92 lines
2.7 KiB

#define TEST
#define X 0
#define Y 26
#define USE_DBL
#ifdef USE_DBL
#define TYPE_FLT double
#define TYPE_INT long
#define MASK 0xFFFF
#define SHIFT 16
#define EPSILSON 4.94065645841247E-324
#else
#define TYPE_FLT float
#define TYPE_INT int
#define MASK 0x00FF
#define SHIFT 8
#define EPSILSON 1.401298E-45
#endif
constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_LINEAR;
TYPE_FLT read_data(image2d_t input_frame, int x, int z)
{
int2 gid = (int2)(x, z);
uint4 pixel = read_imageui(input_frame, sampler, gid);
TYPE_INT temp = (TYPE_INT)((TYPE_INT)pixel.x & MASK) |
(TYPE_INT)(((TYPE_INT)pixel.y & MASK) << SHIFT) |
(TYPE_INT)(((TYPE_INT)pixel.z & MASK) << (SHIFT * 2)) |
(TYPE_INT)(((TYPE_INT)pixel.w & MASK) << (SHIFT * 3));
TYPE_FLT raw_data = *((TYPE_FLT*)(&temp));
return raw_data;
}
kernel void Max(read_only image2d_t input_frame, read_write image2d_t output_frame, local double* max, global double* scratch_pad) {
int2 gid = (int2)(get_global_id(0), get_global_id(1));
TYPE_FLT output_data = 0;
TYPE_FLT input = read_data(input_frame, gid.x, gid.y);
output_data = input;
uint local_id = get_local_id(0);
uint group_size = get_local_size(0);
max[local_id] = input;
if(gid.x == 0 && gid.y == 0)
{
printf("gid [%d] ", get_group_id(0));
printf("lid0 [%d] ", get_local_id(0));
printf("lid1 [%d] ", get_local_id(1));
printf("lis0 [%d] ", get_local_size(0));
printf("lis1 [%d] ", get_local_size(1));
}
uint is_odd = 0;
// Loop for computing localMaxes : divide WorkGroup into 2 parts
for (uint stride = group_size / 2; stride > 0; stride /= 2)
{
// Waiting for each 2x2 max into given workgroup
barrier(CLK_LOCAL_MEM_FENCE);
// max elements 2 by 2 between local_id and local_id + stride
if (local_id < stride)
max[local_id] = max[local_id] > max[local_id + stride] ? max[local_id] : max[local_id + stride];
if (local_id == 0)
{
if(is_odd)
max[local_id] = max[local_id] > max[2 * stride] ? max[local_id] : max[2 * stride];
is_odd = stride % 2;
}
}
// Write result into scratchPad[nWorkGroups]
if (local_id == 0)
{
scratch_pad[get_group_id(1)] = max[0];
printf("out[%d]: %.15f | ", get_group_id(1), scratch_pad[get_group_id(1)]);
}
TYPE_INT out = *((TYPE_INT*)(&output_data));
uint4 pixel;
pixel.x = (TYPE_INT)(out & MASK);
pixel.y = (TYPE_INT)((out >> SHIFT) & MASK);
pixel.z = (TYPE_INT)((out >> (SHIFT *2)) & MASK);
pixel.w = (TYPE_INT)((out >> (SHIFT * 3)) & MASK);
write_imageui(output_frame, gid, pixel);
}