You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
200 lines
4.7 KiB
200 lines
4.7 KiB
5 years ago
|
// #define TEST
|
||
|
#define X 95
|
||
|
#define Y 22
|
||
|
|
||
|
//#define USE_DBL
|
||
|
#ifdef USE_DBL
|
||
|
#define TYPE_FLT double
|
||
|
#define TYPE_INT long
|
||
|
#define MASK 0xFFFF
|
||
|
#define SHIFT 16
|
||
|
#define EPSILSON 4.94065645841247E-324
|
||
|
#else
|
||
|
#define TYPE_FLT float
|
||
|
#define TYPE_INT int
|
||
|
#define MASK 0x00FF
|
||
|
#define SHIFT 8
|
||
|
#define EPSILSON 1.401298E-45
|
||
|
#endif
|
||
|
|
||
|
constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
|
||
|
CLK_ADDRESS_CLAMP_TO_EDGE |
|
||
|
CLK_FILTER_LINEAR;
|
||
|
|
||
|
|
||
|
struct input {
|
||
|
int state;
|
||
|
int sri;
|
||
|
int width;
|
||
|
int height;
|
||
|
};
|
||
|
|
||
|
TYPE_FLT read_data(image2d_t input_frame, int x, int z)
|
||
|
{
|
||
|
int2 gid = (int2)(x, z);
|
||
|
uint4 pixel = read_imageui(input_frame, sampler, gid);
|
||
|
TYPE_INT temp = (TYPE_INT)((TYPE_INT)pixel.x & MASK) |
|
||
|
(TYPE_INT)(((TYPE_INT)pixel.y & MASK) << SHIFT) |
|
||
|
(TYPE_INT)(((TYPE_INT)pixel.z & MASK) << (SHIFT * 2)) |
|
||
|
(TYPE_INT)(((TYPE_INT)pixel.w & MASK) << (SHIFT * 3));
|
||
|
TYPE_FLT raw_data = *((TYPE_FLT*)(&temp));
|
||
|
return raw_data;
|
||
|
}
|
||
|
|
||
|
|
||
|
kernel void Sri(read_only image2d_t input_frame, read_write image2d_t output_frame, local TYPE_FLT* sum, global TYPE_FLT* scratch_pad, struct input params)
|
||
|
{
|
||
|
int2 gid = (int2)(get_global_id(0), get_global_id(1));
|
||
|
TYPE_FLT input = read_data(input_frame, gid.x, gid.y);
|
||
|
TYPE_FLT output_data = 0;
|
||
|
|
||
|
if(params.sri == 0)
|
||
|
{
|
||
|
output_data = input;
|
||
|
}
|
||
|
else if(params.state == 1 || params.state == 4)
|
||
|
{
|
||
|
uint local_id = get_local_id(0);
|
||
|
uint group_size = get_local_size(0);
|
||
|
sum[local_id] = input;
|
||
|
|
||
|
uint is_odd = group_size % 2;
|
||
|
// Loop for computing localMaxes : divide WorkGroup into 2 parts
|
||
|
for (uint stride = group_size / 2; stride > 0; stride /= 2)
|
||
|
{
|
||
|
// Waiting
|
||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||
|
|
||
|
if (local_id < stride)
|
||
|
sum[local_id] += sum[local_id + stride];
|
||
|
|
||
|
if (local_id == 0)
|
||
|
{
|
||
|
if(is_odd)
|
||
|
sum[local_id] += sum[2 * stride];
|
||
|
is_odd = stride % 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Write result into scratchPad[nWorkGroups]
|
||
|
if (local_id == 0)
|
||
|
{
|
||
|
scratch_pad[get_group_id(1)] = sum[0];
|
||
|
}
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
else if(params.state == 2 || params.state == 5)
|
||
|
{
|
||
|
uint local_id = get_local_id(0);
|
||
|
uint group_size = get_local_size(0);
|
||
|
sum[local_id] = scratch_pad[gid.x];
|
||
|
|
||
|
uint is_odd = group_size % 2;
|
||
|
// Loop for computing localMaxes : divide WorkGroup into 2 parts
|
||
|
for (uint stride = group_size / 2; stride > 0; stride /= 2)
|
||
|
{
|
||
|
// Waiting
|
||
|
barrier(CLK_LOCAL_MEM_FENCE);
|
||
|
|
||
|
if (local_id < stride)
|
||
|
sum[local_id] += sum[local_id + stride];
|
||
|
|
||
|
if (local_id == 0)
|
||
|
{
|
||
|
if(is_odd)
|
||
|
sum[local_id] += sum[2 * stride];
|
||
|
is_odd = stride % 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Write result into scratchPad[nWorkGroups]
|
||
|
if (local_id == 0)
|
||
|
{
|
||
|
scratch_pad[0] = sum[0] / (params.width * params.height);
|
||
|
}
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
else if(params.state == 3)
|
||
|
{
|
||
|
TYPE_FLT mean = scratch_pad[0];
|
||
|
output_data = pow((input - mean), 2);
|
||
|
}
|
||
|
else if(params.state == 6)
|
||
|
{
|
||
|
int temp[5] = {3, 3, 5, 7, 7};
|
||
|
int p = temp[params.sri - 1];
|
||
|
int q = params.width > 1 ? p : 1;
|
||
|
|
||
|
float alpha = 1 - 0.25 * (params.sri - 1);
|
||
|
TYPE_FLT rho2 = scratch_pad[0];
|
||
|
|
||
|
int lag1 = (p - 1) / 2;
|
||
|
int lag2 = (q - 1) / 2;
|
||
|
|
||
|
float window[49];
|
||
|
|
||
|
int window_size = p * q;
|
||
|
for(int i = 0; i < window_size; i++)
|
||
|
window[i] = 0;
|
||
|
if(gid.x >= lag2 && gid.x < params.width - lag2 && gid.y >= lag1 && gid.y < params.height - lag1)
|
||
|
{
|
||
|
for(int i = -lag1; i <= lag1 ; i++)
|
||
|
{
|
||
|
for(int j = -lag2; j <= lag2 ; j++)
|
||
|
{
|
||
|
window[(i + lag1) * q + (j + lag2)] = read_data(input_frame, gid.x + j, gid.y + i);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#ifdef TEST
|
||
|
if(gid.x == X && gid.y == Y)
|
||
|
{
|
||
|
for(int i = 0; i < window_size; i++)
|
||
|
printf("[%d] > %f; ", i, window[i]);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
float mean = 0;
|
||
|
for(int i = 0; i < window_size; i++)
|
||
|
{
|
||
|
mean += window[i];
|
||
|
}
|
||
|
mean /= window_size;
|
||
|
|
||
|
for(int i = 0; i < window_size; i++)
|
||
|
{
|
||
|
window[i] = pow((window[i] - mean), 2);
|
||
|
}
|
||
|
|
||
|
float sigma2 = 0;
|
||
|
for(int i = 0; i < window_size; i++)
|
||
|
{
|
||
|
sigma2 += window[i];
|
||
|
}
|
||
|
sigma2 /= window_size;
|
||
|
|
||
|
output_data = mean + alpha * (sigma2 / (sigma2 + rho2)) * (input - mean);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
output_data = input;
|
||
|
}
|
||
|
|
||
|
if(output_data < 0)
|
||
|
output_data = 0;
|
||
|
}
|
||
|
|
||
|
TYPE_INT out = *((TYPE_INT*)(&output_data));
|
||
|
uint4 pixel;
|
||
|
pixel.x = (TYPE_INT)(out & MASK);
|
||
|
pixel.y = (TYPE_INT)((out >> SHIFT) & MASK);
|
||
|
pixel.z = (TYPE_INT)((out >> (SHIFT *2)) & MASK);
|
||
|
pixel.w = (TYPE_INT)((out >> (SHIFT * 3)) & MASK);
|
||
|
|
||
|
write_imageui(output_frame, gid, pixel);
|
||
|
}
|
||
|
|