70 lines
1.6 KiB
C++
70 lines
1.6 KiB
C++
#include "integral_image.h"
|
|
|
|
#include <omp.h>
|
|
|
|
|
|
namespace integral_image {
|
|
|
|
|
|
Mat integral_image_serial(const Mat &image)
|
|
{
|
|
if (image.cols == 0 || image.rows == 0)
|
|
return Mat();
|
|
|
|
Mat result = image.clone();
|
|
|
|
for (size_t row = 1; row < result.rows; ++row)
|
|
result[row][0] += result[row - 1][0];
|
|
|
|
for (size_t col = 1; col < result.cols; ++col)
|
|
result[0][col] += result[0][col - 1];
|
|
|
|
for (size_t row = 1; row < result.rows; ++row)
|
|
for (size_t col = 1; col < result.cols; ++col)
|
|
result[row][col] += result[row - 1][col] + result[row][col - 1] - result[row - 1][col - 1];
|
|
|
|
return result;
|
|
}
|
|
|
|
Mat integral_image_openmp(const Mat &image, int thread_number)
|
|
{
|
|
if (image.cols == 0 || image.rows == 0)
|
|
return Mat();
|
|
|
|
if (0 != thread_number) {
|
|
|
|
omp_set_dynamic(0);
|
|
omp_set_num_threads(thread_number);
|
|
}
|
|
|
|
Mat result = image.clone();
|
|
|
|
#pragma omp parallel for
|
|
for (int row = 0; row < result.rows; ++row) {
|
|
|
|
for (int col = 1; col < result.cols; ++col) {
|
|
|
|
result[row][col] += result[row][col - 1];
|
|
}
|
|
}
|
|
|
|
//It is more cache-friendly to accumulate data row-wise, so here we transpose the matrix,
|
|
//than process it, and than traspose it again to restore original matrix shape
|
|
|
|
result = result.t();
|
|
|
|
#pragma omp parallel for
|
|
for (int row = 0; row < result.rows; ++row) {
|
|
|
|
for (int col = 1; col < result.cols; ++col) {
|
|
|
|
result[row][col] += result[row][col - 1];
|
|
}
|
|
}
|
|
|
|
result = result.t();
|
|
|
|
return result;
|
|
}
|
|
|
|
}
|