| #define MAX(x, y)((x > y) ? x : y) |
| |
| __global__ void find_max(const int * array, int * max, int * mutex, unsigned int n) { |
| unsigned int idx = threadIdx.x + blockIdx.x * blockDim.x; |
| unsigned int stride = gridDim.x * blockDim.x; |
| unsigned int input_idx = idx; |
| |
| __shared__ int partial_res[256]; |
| |
| int local_max = array[0]; |
| while (input_idx < n) { |
| local_max = MAX(local_max, array[input_idx]); |
| input_idx += stride; |
| } |
| |
| partial_res[threadIdx.x] = local_max; |
| |
| __syncthreads(); |
| |
| // reduction |
| unsigned int i = blockDim.x / 2; |
| while (i != 0) { |
| if (threadIdx.x < i) { |
| partial_res[threadIdx.x] = MAX(partial_res[threadIdx.x], partial_res[threadIdx.x + i]); |
| } |
| |
| __syncthreads(); |
| i /= 2; |
| } |
| |
| if (threadIdx.x == 0) { |
| while (atomicCAS(mutex, 0, 1) != 0); |
| * max = MAX( * max, partial_res[0]); |
| atomicExch(mutex, 0); |
| } |
| } |