Parallel/OpenMP at main · modi2meet/Parallel · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
/*
 *  Name: Meet Dineshkumar Modi
 *  UID: 906-244-078
 */

#include <stdlib.h>
#include <omp.h>

#include "utils.h"
#include "parallel.h"

/*
 *  PHASE 1: compute the mean pixel value
 *  This code is buggy! Find the bug and speed it up.
 */
void mean_pixel_parallel(const uint8_t img[][NUM_CHANNELS], int num_rows, int num_cols, double mean[NUM_CHANNELS])
{
    int row, col, ch;
    long count = num_rows * num_cols;
    double local_mean[NUM_CHANNELS] = {0.0};

    #pragma omp parallel for private(row, col, ch) reduction(+:local_mean[:NUM_CHANNELS])
    for (row = 0; row < num_rows; row++) {
        int rowcol = row * num_cols;
        for (col = 0; col < num_cols; col++) {
            local_mean[0] += img[rowcol + col][0];
            local_mean[1] += img[rowcol + col][1];
            local_mean[2] += img[rowcol + col][2];
        }
    }

    for (ch = 0; ch < NUM_CHANNELS; ch++) {
        mean[ch] = local_mean[ch] / count;
    }
}


/*
 *  PHASE 2: convert image to grayscale and record the max grayscale value along with the number of times it appears
 *  This code is NOT buggy, just sequential. Speed it up.
 *
 * Work on this!
 */

void grayscale_parallel(const uint8_t img[][NUM_CHANNELS], int num_rows, int num_cols, uint32_t grayscale_img[][NUM_CHANNELS], uint8_t *max_gray, uint32_t *max_count)
{
    int row, rowIndex, col;
    uint8_t max_gray_local = 0;
    *max_gray = 0;
    *max_count = 0;
    int maxCount = 0;
    int gray_val, r, g, b;

 #pragma omp parallel private(row,col) firstprivate(max_gray_local,maxCount)
    {
        #pragma omp for collapse(2)
    for (row = 0; row < num_rows; row++)
    {
        for (col = 0; col < num_cols; col++)
        {
            // avoiding redundant computation
            rowIndex = row * num_cols + col;

            r = img[rowIndex][0];
            g = img[rowIndex][1];
            b = img[rowIndex][2];

            gray_val = (r + g + b) / NUM_CHANNELS;

            grayscale_img[rowIndex][0] = gray_val;
            grayscale_img[rowIndex][1] = gray_val;
            grayscale_img[rowIndex][2] = gray_val;

             if (gray_val > max_gray_local)
            {
                max_gray_local = gray_val;
                maxCount = 3;
            }
            else if (gray_val == max_gray_local) {
                maxCount += 3;
            }
        }
    }

     #pragma omp critical
        if (max_gray_local > *max_gray){
            *max_gray = max_gray_local;
            *max_count = maxCount;
        }
        else if (max_gray_local == *max_gray){
            *max_count += maxCount;
        }

    }
}

/*
 *  PHASE 3: perform convolution on image
 *  This code is NOT buggy, just sequential. Speed it up.
 */
void convolution_parallel(const uint8_t padded_img[][NUM_CHANNELS], int num_rows, int num_cols, const uint32_t kernel[], int kernel_size, uint32_t convolved_img[][NUM_CHANNELS])
{
    int row, col, kernel_row, kernel_col;
    int kernel_norm = 0;
    int conv_rows = num_rows - kernel_size + 1;
    int conv_cols = num_cols - kernel_size + 1;
    const uint8_t* pixel;
    int kernel_val, r, g, b;

    // compute kernel normalization factor
    #pragma omp parallel for reduction(+:kernel_norm)
    for (int i = 0; i < kernel_size * kernel_size; i++) {
        kernel_norm += kernel[i];
    }

    #pragma omp parallel for private(row, col, kernel_row, kernel_col) collapse(2)
    for (row = 0; row < conv_rows; row++) {
        for (col = 0; col < conv_cols; col++) {
            r = 0;
            g = 0;
            b = 0;
            for (kernel_row = 0; kernel_row < kernel_size; kernel_row++) {
                for (kernel_col = 0; kernel_col < kernel_size; kernel_col++) {
                    pixel = &padded_img[(row + kernel_row) * num_cols + col + kernel_col][0];
                    kernel_val = kernel[kernel_row * kernel_size + kernel_col];
                    r += pixel[0] * kernel_val;
                    g += pixel[1] * kernel_val;
                    b += pixel[2] * kernel_val;
                }
            }
            // avoiding redundant computation
            int temp = row * conv_cols + col;

            convolved_img[temp][0] = r / kernel_norm;
            convolved_img[temp][1] = g / kernel_norm;
            convolved_img[temp][2] = b / kernel_norm;
        }
    }
}