This is an algorithm I'm trying to optimize. I was trying to use openMP without any success.
I know the code is long but most of it is params init
Please try to explain why I need to change any line of code. I want to learn. I also wish to understand if using openMP is the best way for matrix based algorithms.
void CAlgo::runAlgo()
{
neighborhood
int nbr_TBL_RES[Algo_NBR_SZ][Algo_NBR_SZ] ;
int TBL [Algo_NUM_CANDIDATE] ;
int expIn [Algo_NUM_CANDIDATE] ;
int W [Algo_NUM_CANDIDATE] ;
unsigned int WShift1 [Algo_NUM_CANDIDATE] ;
unsigned int WShift2 [Algo_NUM_CANDIDATE] ;
bool WSubFlag [Algo_NUM_CANDIDATE] ;
int meanP ;
unsigned int meanPShift1 ;
unsigned int meanPShift2 ;
bool meanPSubFlag ;
unsigned int TBLShift1 ;
unsigned int TBLShift2 ;
bool TBLSubFlag ;
int sigmaP ;
int eta ;
unsigned int etaShift1 ;
unsigned int etaShift2 ;
bool etaSubFlag ;
unsigned int rSqrShift1 ;
unsigned int rSqrShift2 ;
bool rSqrSubFlag ;
int RxSqr ;
int RySqr ;
unsigned int rPwrShift1 ;
unsigned int rPwrShift2 ;
bool rPwrSubFlag ;
int alpha0 ;
int WSum ;
int PAvrg ;
unsigned int alphaShift1 ;
unsigned int alphaShift2 ;
bool alphaSubFlag ;
unsigned int ind;
int Rx,Ry ;
int wMax[4] ;
int dP ;
int Pout ;
int i0 = Algo_NBR_SZ/2 - Algo_PATCH_SZ/2 + Algo_FILT_PIXEL_dY;
int i1 = Algo_NBR_SZ/2 + Algo_PATCH_SZ/2 - Algo_FILT_PIXEL_dY;
int j0 = Algo_NBR_SZ/2 - Algo_PATCH_SZ/2 + Algo_FILT_PIXEL_dX;
int j1 = Algo_NBR_SZ/2 + Algo_PATCH_SZ/2 - Algo_FILT_PIXEL_dX;
//int k,l;
// debug data
int** TBL00 = NULL;
int** TBL01 = NULL;
int** TBL02 = NULL;
int** TBL03 = NULL;
int** TBL04 = NULL;
int** TBL05 = NULL;
int** TBL06 = NULL;
int** TBL07 = NULL;
int** TBL08 = NULL;
int** TBL09 = NULL;
int** TBL10 = NULL;
int** TBL11 = NULL;
int** TBL12 = NULL;
int** TBL13 = NULL;
int** TBL14 = NULL;
int** TBL15 = NULL;
int** W00 = NULL;
int** W01 = NULL;
int** W02 = NULL;
int** W03 = NULL;
int** W04 = NULL;
int** W05 = NULL;
int** W06 = NULL;
int** W07 = NULL;
int** W08 = NULL;
int** W09 = NULL;
int** W10 = NULL;
int** W11 = NULL;
int** W12 = NULL;
int** W13 = NULL;
int** W14 = NULL;
int** W15 = NULL;
int** EXP_IN00 = NULL;
int** EXP_IN01 = NULL;
int** EXP_IN02 = NULL;
int** EXP_IN03 = NULL;
int** EXP_IN04 = NULL;
int** EXP_IN05 = NULL;
int** EXP_IN06 = NULL;
int** EXP_IN07 = NULL;
int** EXP_IN08 = NULL;
int** EXP_IN09 = NULL;
int** EXP_IN10 = NULL;
int** EXP_IN11 = NULL;
int** EXP_IN12 = NULL;
int** EXP_IN13 = NULL;
int** EXP_IN14 = NULL;
int** EXP_IN15 = NULL;
// allocate memory for storing debug data
if (m_Debug)
{
TBL00 = new int*[m_Height];
TBL01 = new int*[m_Height];
TBL02 = new int*[m_Height];
TBL03 = new int*[m_Height];
TBL04 = new int*[m_Height];
TBL05 = new int*[m_Height];
TBL06 = new int*[m_Height];
TBL07 = new int*[m_Height];
TBL08 = new int*[m_Height];
TBL09 = new int*[m_Height];
TBL10 = new int*[m_Height];
TBL11 = new int*[m_Height];
TBL12 = new int*[m_Height];
TBL13 = new int*[m_Height];
TBL14 = new int*[m_Height];
TBL15 = new int*[m_Height];
W00 = new int*[m_Height];
W01 = new int*[m_Height];
W02 = new int*[m_Height];
W03 = new int*[m_Height];
W04 = new int*[m_Height];
W05 = new int*[m_Height];
W06 = new int*[m_Height];
W07 = new int*[m_Height];
W08 = new int*[m_Height];
W09 = new int*[m_Height];
W10 = new int*[m_Height];
W11 = new int*[m_Height];
W12 = new int*[m_Height];
W13 = new int*[m_Height];
W14 = new int*[m_Height];
W15 = new int*[m_Height];
EXP_IN00 = new int*[m_Height];
EXP_IN01 = new int*[m_Height];
EXP_IN02 = new int*[m_Height];
EXP_IN03 = new int*[m_Height];
EXP_IN04 = new int*[m_Height];
EXP_IN05 = new int*[m_Height];
EXP_IN06 = new int*[m_Height];
EXP_IN07 = new int*[m_Height];
EXP_IN08 = new int*[m_Height];
EXP_IN09 = new int*[m_Height];
EXP_IN10 = new int*[m_Height];
EXP_IN11 = new int*[m_Height];
EXP_IN12 = new int*[m_Height];
EXP_IN13 = new int*[m_Height];
EXP_IN14 = new int*[m_Height];
EXP_IN15 = new int*[m_Height];
for (int i=0; i < m_Height; i++)
{
TBL00[i] = new int[m_Width];
TBL01[i] = new int[m_Width];
TBL02[i] = new int[m_Width];
TBL03[i] = new int[m_Width];
TBL04[i] = new int[m_Width];
TBL05[i] = new int[m_Width];
TBL06[i] = new int[m_Width];
TBL07[i] = new int[m_Width];
TBL08[i] = new int[m_Width];
TBL09[i] = new int[m_Width];
TBL10[i] = new int[m_Width];
TBL11[i] = new int[m_Width];
TBL12[i] = new int[m_Width];
TBL13[i] = new int[m_Width];
TBL14[i] = new int[m_Width];
TBL15[i] = new int[m_Width];
W00[i] = new int[m_Width];
W01[i] = new int[m_Width];
W02[i] = new int[m_Width];
W03[i] = new int[m_Width];
W04[i] = new int[m_Width];
W05[i] = new int[m_Width];
W06[i] = new int[m_Width];
W07[i] = new int[m_Width];
W08[i] = new int[m_Width];
W09[i] = new int[m_Width];
W10[i] = new int[m_Width];
W11[i] = new int[m_Width];
W12[i] = new int[m_Width];
W13[i] = new int[m_Width];
W14[i] = new int[m_Width];
W15[i] = new int[m_Width];
EXP_IN00[i] = new int[m_Width];
EXP_IN01[i] = new int[m_Width];
EXP_IN02[i] = new int[m_Width];
EXP_IN03[i] = new int[m_Width];
EXP_IN04[i] = new int[m_Width];
EXP_IN05[i] = new int[m_Width];
EXP_IN06[i] = new int[m_Width];
EXP_IN07[i] = new int[m_Width];
EXP_IN08[i] = new int[m_Width];
EXP_IN09[i] = new int[m_Width];
EXP_IN10[i] = new int[m_Width];
EXP_IN11[i] = new int[m_Width];
EXP_IN12[i] = new int[m_Width];
EXP_IN13[i] = new int[m_Width];
EXP_IN14[i] = new int[m_Width];
EXP_IN15[i] = new int[m_Width];
}
}
Ry = m_RyInitial;
int row,col;
#pragma omp parallel for schedule(static) default(none) \
firstprivate(Pout,nbr_TBL_RES,TBL,row,col,Ry,Rx,i0,i1,j0,j1,\
meanP,meanPShift1,meanPShift2,meanPSubFlag,TBLShift1,TBLShift2,TBLSubFlag,sigmaP,eta,etaShift1,etaShift2,etaSubFlag,rSqrShift1,rSqrShift2,\
rSqrSubFlag,RxSqr,RySqr,rPwrShift1,rPwrShift2,rPwrSubFlag,alpha0,WSum,PAvrg,alphaShift1,alphaShift2,alphaSubFlag,ind,\
expIn,W,WShift1,WShift2,WSubFlag,wMax,dP,TBL00,TBL01,TBL02,TBL03,TBL04,TBL05,TBL06,TBL07,TBL08,TBL09,TBL10,TBL11,\
TBL12,TBL13,TBL14,TBL15,W00,W01,W02,W03,W04,W05,W06,W07,W08,W09,W10,W11,W12,W13,W14,W15,EXP_IN00,EXP_IN01,EXP_IN02,EXP_IN03,\
EXP_IN04,EXP_IN05,EXP_IN06,EXP_IN07,EXP_IN08,EXP_IN09,EXP_IN10,EXP_IN11,EXP_IN12,EXP_IN13,EXP_IN14,EXP_IN15) */
for(row = 0; row < m_Height; ++row)
{
int nbr[Algo_NBR_SZ][Algo_NBR_SZ] ; // neighborhood
int k,l;
//int **ptrPtrNbrK = (int**)nbr;
//int* ptrNbrK = 0;
Rx = m_RxInitial;
for(col = 0; col < m_Width ; ++col)
{
// prepare neighborhood
/*k = 0;
for (int i=-i0;i<i1; ++i)
{
l = 0;
for (int j=-j0;j<j1; ++j)
{
if ((row+i)<0 || (row+i)>=m_Height ||(col+j)<0 || (col+j)>=m_Width)
{
nbr[k][l] = 0;
} else {
nbr[k][l] = m_Pin->m_R[row+i][col+j];
}
l++;
}
k++;
}*/
// prepare neighborhood
k = 0;
int* ptrNbrK = nbr[0];
for (int i=-i0;i<i1; ++i)
{
int row_plus_i = row+i;
int* m_Pin_m_R_row_plus_i=m_Pin->m_R[row_plus_i];
l = 0;
for (int j=-j0;j<j1; ++j)
{
int col_plus_j = col+j;
if ((row_plus_i)<0 || (row_plus_i)>=m_Height ||(col_plus_j)<0 || (col_plus_j)>=m_Width)
{
//nbr[k][l] = 0;
ptrNbrK[l] =0;
} else {
///nbr[k][l] = m_Pin->m_R[row_plus_i][col_plus_j];
ptrNbrK[l]= m_Pin_m_R_row_plus_i[col_plus_j];
}
l++;
}
//k++;
ptrNbrK = nbr[++k];
//ptrNbrK++;
}
shiftToRequiredPrecision(nbr, nbr_TBL_RES);
computeTBLs(nbr_TBL_RES, TBL);
calculatePatchStatistics(nbr_TBL_RES, meanP, meanPShift1, meanPShift2, meanPSubFlag, sigmaP, TBLShift1, TBLShift2, TBLSubFlag);
eta = calculateDetailIndex(sigmaP, meanPShift1, meanPShift2, meanPSubFlag, ind);
SettingSigmaN(eta, Rx, Ry, RxSqr, RySqr, etaShift1, etaShift2, etaSubFlag, rSqrShift1, rSqrShift2, rSqrSubFlag, rPwrShift1, rPwrShift2, rPwrSubFlag);
for (int i=0; i<Algo_NUM_CANDIDATE; ++i)
{
if (m_CandEnable[i])
{
expIn[i] = CalculateWeights(TBL[i], etaShift1, etaShift2, etaSubFlag, rSqrShift1, rSqrShift2, rSqrSubFlag,
TBLShift1, TBLShift2, TBLSubFlag, W[i], WShift1[i], WShift2[i], WSubFlag[i]);
} else {
W[i] = 0;
WShift1[i] = 0;
WShift2[i] = 0;
WSubFlag[i] = 0;
expIn[i] = 0;
}
}
// adjust candidates group (B,C,D) weight values
for (int i = Algo_FIRST_CANDIDATE_B; i<Algo_NUM_CANDIDATE_B+Algo_FIRST_CANDIDATE_B; ++i)
{
W[i] = CommonFunctions::ShiftSubMultiplier(W[i], m_BCandSubFlag[ind], m_BCandShift1[ind], m_BCandShift2[ind]);
}
for (int i = Algo_FIRST_CANDIDATE_C; i<Algo_NUM_CANDIDATE_C+Algo_FIRST_CANDIDATE_C; ++i)
{
W[i] = CommonFunctions::ShiftSubMultiplier(W[i], m_CCandSubFlag[ind], m_CCandShift1[ind], m_CCandShift2[ind]);
}
for (int i = Algo_FIRST_CANDIDATE_D; i<Algo_NUM_CANDIDATE_D+Algo_FIRST_CANDIDATE_D; ++i)
{
W[i] = CommonFunctions::ShiftSubMultiplier(W[i], m_DCandSubFlag[ind], m_DCandShift1[ind], m_DCandShift2[ind]);
}
WSum = ComputingWeightSum(W);
alpha0 = MatchQualityIndex(W, wMax, ind);
PAvrg = ComputeAveragePixel(WShift1, WShift2, WSubFlag, nbr, WSum, ind);
BlendingCoefficient(alpha0, alphaSubFlag, alphaShift1, alphaShift2, ind);
#if 0
if ((row>226)&&(col>410))
int p_in = m_Pin->m_R[row][col];
#endif
Pout = m_Pin->m_R[row][col];
dP = 0;
if (WSum>m_wSumMinTh || wMax[0]>m_wMaxMinTh)
{
Pout = SynthesizeOutputPixel(m_Pin->m_R[row][col], PAvrg, alphaSubFlag, alphaShift1, alphaShift2, rPwrSubFlag, rPwrShift1, rPwrShift2, dP );
}
Rx++;
m_Pout ->m_R[row][col] = Pout;
m_PoutDiff ->m_R[row][col] = m_Pin->m_R[row][col] - Pout;
//m_PoutDiff ->m_R[row][col] = abs(m_Pin->m_R[row][col] - PAvrg);
m_PoutAlpha0->m_R[row][col] = alpha0;
m_MapMeanP ->m_R[row][col] = meanP;
m_MapAlpha0 ->m_R[row][col] = alpha0;
m_MapAlpha ->m_R[row][col] = CommonFunctions::ShiftSubMultiplier((1<<Algo_NL_LUT_ARR_MAX_SHIFT), alphaSubFlag, alphaShift1, alphaShift2);
m_MapSTD ->m_R[row][col] = eta;
m_MapInd ->m_R[row][col] = ind;
//m_MapInd ->m_R[row][col] = rSqr;
//m_MapInd ->m_R[row][col] = dP;
if (m_Debug)
{
TBL00[row][col] = TBL[0];
TBL01[row][col] = TBL[1];
TBL02[row][col] = TBL[2];
TBL03[row][col] = TBL[3];
TBL04[row][col] = TBL[4];
TBL05[row][col] = TBL[5];
TBL06[row][col] = TBL[6];
TBL07[row][col] = TBL[7];
TBL08[row][col] = TBL[8];
TBL09[row][col] = TBL[9];
TBL10[row][col] = TBL[10];
TBL11[row][col] = TBL[11];
TBL12[row][col] = TBL[12];
TBL13[row][col] = TBL[13];
TBL14[row][col] = TBL[14];
TBL15[row][col] = TBL[15];
W00[row][col] = W[0];
W01[row][col] = W[1];
W02[row][col] = W[2];
W03[row][col] = W[3];
W04[row][col] = W[4];
W05[row][col] = W[5];
W06[row][col] = W[6];
W07[row][col] = W[7];
W08[row][col] = W[8];
W09[row][col] = W[9];
W10[row][col] = W[10];
W11[row][col] = W[11];
W12[row][col] = W[12];
W13[row][col] = W[13];
W14[row][col] = W[14];
W15[row][col] = W[15];
EXP_IN00[row][col] = expIn[0] ;
EXP_IN01[row][col] = expIn[1] ;
EXP_IN02[row][col] = expIn[2] ;
EXP_IN03[row][col] = expIn[3] ;
EXP_IN04[row][col] = expIn[4] ;
EXP_IN05[row][col] = expIn[5] ;
EXP_IN06[row][col] = expIn[6] ;
EXP_IN07[row][col] = expIn[7] ;
EXP_IN08[row][col] = expIn[8] ;
EXP_IN09[row][col] = expIn[9] ;
EXP_IN10[row][col] = expIn[10];
EXP_IN11[row][col] = expIn[11];
EXP_IN12[row][col] = expIn[12] ;
EXP_IN13[row][col] = expIn[13] ;
EXP_IN14[row][col] = expIn[14] ;
EXP_IN15[row][col] = expIn[15] ;
}
}
Ry++;
}
// write debug data to bin file
if (m_Debug)
{
writeDebugDataToBinFile("TBL", TBL00, TBL01, TBL02, TBL03, TBL04, TBL05, TBL06, TBL07, TBL08, TBL09, TBL10, TBL11, TBL12, TBL13, TBL14, TBL15);
writeDebugDataToBinFile("W", W00, W01, W02, W03, W04, W05, W06, W07, W08, W09, W10, W11, W12, W13, W14, W15 );
writeDebugDataToBinFile("EXP_IN", EXP_IN00, EXP_IN01, EXP_IN02, EXP_IN03, EXP_IN04, EXP_IN05, EXP_IN06, EXP_IN07, EXP_IN08, EXP_IN09, EXP_IN10, EXP_IN11,
EXP_IN12, EXP_IN13, EXP_IN14, EXP_IN15);
// free memory for debug data
for (int i=0; i < m_Height; i++)
{
delete TBL00[i];
delete TBL01[i];
delete TBL02[i];
delete TBL03[i];
delete TBL04[i];
delete TBL05[i];
delete TBL06[i];
delete TBL07[i];
delete TBL08[i];
delete TBL09[i];
delete TBL10[i];
delete TBL11[i];
delete TBL12[i];
delete TBL13[i];
delete TBL14[i];
delete TBL15[i];
delete W00[i];
delete W01[i];
delete W02[i];
delete W03[i];
delete W04[i];
delete W05[i];
delete W06[i];
delete W07[i];
delete W08[i];
delete W09[i];
delete W10[i];
delete W11[i];
delete W12[i];
delete W13[i];
delete W14[i];
delete W15[i];
delete EXP_IN00[i];
delete EXP_IN01[i];
delete EXP_IN02[i];
delete EXP_IN03[i];
delete EXP_IN04[i];
delete EXP_IN05[i];
delete EXP_IN06[i];
delete EXP_IN07[i];
delete EXP_IN08[i];
delete EXP_IN09[i];
delete EXP_IN10[i];
delete EXP_IN11[i];
delete EXP_IN12[i];
delete EXP_IN13[i];
delete EXP_IN14[i];
delete EXP_IN15[i];
}
delete TBL00;
delete TBL01;
delete TBL02;
delete TBL03;
delete TBL04;
delete TBL05;
delete TBL06;
delete TBL07;
delete TBL08;
delete TBL09;
delete TBL10;
delete TBL11;
delete TBL12;
delete TBL13;
delete TBL14;
delete TBL15;
delete W00;
delete W01;
delete W02;
delete W03;
delete W04;
delete W05;
delete W06;
delete W07;
delete W08;
delete W09;
delete W10;
delete W11;
delete W12;
delete W13;
delete W14;
delete W15;
delete EXP_IN00;
delete EXP_IN01;
delete EXP_IN02;
delete EXP_IN03;
delete EXP_IN04;
delete EXP_IN05;
delete EXP_IN06;
delete EXP_IN07;
delete EXP_IN08;
delete EXP_IN09;
delete EXP_IN10;
delete EXP_IN11;
delete EXP_IN12;
delete EXP_IN13;
delete EXP_IN14;
delete EXP_IN15;
}
}