optimization: rows of a transposed image are processed instead of cols
This commit is contained in:
parent
196957f3d0
commit
cccd0f1641
1 changed files with 9 additions and 6 deletions
|
|
@ -48,19 +48,22 @@ Mat integral_image_openmp(const Mat &image, int thread_number)
|
|||
}
|
||||
}
|
||||
|
||||
//This loop is likely to have lots of cache misses that can probably be avoided by transposing data, processing data
|
||||
//in a way similar to the previous loop, and than transposing data again.
|
||||
//TODO: benchmark
|
||||
//It is more cache-friendly to accumulate data row-wise, so here we transpose the matrix,
|
||||
//than process it, and than traspose it again to restore original matrix shape
|
||||
|
||||
result = result.t();
|
||||
|
||||
#pragma omp parallel for
|
||||
for (int col = 0; col < result.cols; ++col) {
|
||||
for (int row = 0; row < result.rows; ++row) {
|
||||
|
||||
for (int row = 1; row < result.rows; ++row) {
|
||||
for (int col = 1; col < result.cols; ++col) {
|
||||
|
||||
result[row][col] += result[row - 1][col];
|
||||
result[row][col] += result[row][col - 1];
|
||||
}
|
||||
}
|
||||
|
||||
result = result.t();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue