From cccd0f164120acadca6d16225a7a778e62777e31 Mon Sep 17 00:00:00 2001 From: Dmitry Kokorin Date: Wed, 5 Sep 2018 23:36:00 +0300 Subject: [PATCH] optimization: rows of a transposed image are processed instead of cols --- integral_image.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/integral_image.cpp b/integral_image.cpp index 1494d57..0a53ff8 100644 --- a/integral_image.cpp +++ b/integral_image.cpp @@ -48,19 +48,22 @@ Mat integral_image_openmp(const Mat &image, int thread_number) } } - //This loop is likely to have lots of cache misses that can probably be avoided by transposing data, processing data - //in a way similar to the previous loop, and than transposing data again. - //TODO: benchmark + //It is more cache-friendly to accumulate data row-wise, so here we transpose the matrix, + //than process it, and than traspose it again to restore original matrix shape + + result = result.t(); #pragma omp parallel for - for (int col = 0; col < result.cols; ++col) { + for (int row = 0; row < result.rows; ++row) { - for (int row = 1; row < result.rows; ++row) { + for (int col = 1; col < result.cols; ++col) { - result[row][col] += result[row - 1][col]; + result[row][col] += result[row][col - 1]; } } + result = result.t(); + return result; }