optimization: rows of a transposed image are processed instead of cols
This commit is contained in:
parent
196957f3d0
commit
cccd0f1641
1 changed files with 9 additions and 6 deletions
|
|
@ -48,19 +48,22 @@ Mat integral_image_openmp(const Mat &image, int thread_number)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//This loop is likely to have lots of cache misses that can probably be avoided by transposing data, processing data
|
//It is more cache-friendly to accumulate data row-wise, so here we transpose the matrix,
|
||||||
//in a way similar to the previous loop, and than transposing data again.
|
//than process it, and than traspose it again to restore original matrix shape
|
||||||
//TODO: benchmark
|
|
||||||
|
result = result.t();
|
||||||
|
|
||||||
#pragma omp parallel for
|
#pragma omp parallel for
|
||||||
for (int col = 0; col < result.cols; ++col) {
|
for (int row = 0; row < result.rows; ++row) {
|
||||||
|
|
||||||
for (int row = 1; row < result.rows; ++row) {
|
for (int col = 1; col < result.cols; ++col) {
|
||||||
|
|
||||||
result[row][col] += result[row - 1][col];
|
result[row][col] += result[row][col - 1];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
result = result.t();
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue