integral_image/integral_image.cpp

67 lines
1.5 KiB
C++

#include "integral_image.h"
#include <omp.h>
namespace integral_image {
Mat integral_image_serial(const Mat &image)
{
if (image.cols == 0 || image.rows == 0)
return Mat();
Mat result = image.clone();
for (size_t row = 1; row < result.rows; ++row)
result[row][0] += result[row - 1][0];
for (size_t col = 1; col < result.cols; ++col)
result[0][col] += result[0][col - 1];
for (size_t row = 1; row < result.rows; ++row)
for (size_t col = 1; col < result.cols; ++col)
result[row][col] += result[row - 1][col] + result[row][col - 1] - result[row - 1][col - 1];
return result;
}
Mat integral_image_openmp(const Mat &image, int thread_number)
{
if (image.cols == 0 || image.rows == 0)
return Mat();
if (0 != thread_number) {
omp_set_dynamic(0);
omp_set_num_threads(thread_number);
}
Mat result = image.clone();
#pragma omp parallel for
for (int row = 0; row < result.rows; ++row) {
for (int col = 1; col < result.cols; ++col) {
result[row][col] += result[row][col - 1];
}
}
//This loop is likely to have lots of cache misses that can probably be avoided by transposing data, processing data
//in a way similar to the previous loop, and than transposing data again.
//TODO: benchmark
#pragma omp parallel for
for (int col = 0; col < result.cols; ++col) {
for (int row = 1; row < result.rows; ++row) {
result[row][col] += result[row - 1][col];
}
}
return result;
}
}