
/*****************************************************************************/
/*                                                                           */
/*      Copyright (C) 1999 SHARP LABS OF AMERICA All Rights Reserved.        */
/*                                                                           */
/*****************************************************************************/

/* Copyright (C) 1997 by Sharp Laboratories of America, Camas, WA 98607, USA.
   All Rights Reserved.

   The sections in this program that are contained within the headings:  */

   /*## <Sharp Start> ##*/


   /*## <Sharp End> ##*/

/* are copyrighted by Sharp Laboratories of America. It may not be redistributed 
   without the consent of the copyright holders. In no circumstances may the 
   copyright notice be removed. The program may not be used without a written 
   permission of the copyright holders.  The users shall treat the program in 
   confidence. This program is provided as is, without any express or implied 
   warranty, without even the warranty of fitness for a particular purpose.
   */

/*****************************************************************************/

/************************************************************************
*
*  Routine      intrapred
*
*  Description: Intra prediction according to all 5 prediction modes are made
*
*                    
************************************************************************/

#include <math.h> 
#include <stdlib.h> 
#include <stdio.h>

#include "global.h"
#include "block.h" 

void intrapred(int CurrPixX,int CurrPixY)
{
    
    int i,j,i0,j0,Curr4PixX,Curr4PixY,ia[14] ,js[4][2],js0,js1,js2;
           
    Curr4PixX=CurrPixX/4;
    Curr4PixY=CurrPixY/4;
    js1=0;
    js2=0;
    for (i=0; i < 4; ++i) 
    {
        if (predmode[Curr4PixX+1][Curr4PixY] >= 0) 
        {
            js1 += imgY[CurrPixX+i][CurrPixY-1][1];  /* sum hor pix */ 
        }
        if (predmode[Curr4PixX][Curr4PixY+1] >= 0) 
        {
            js2 += imgY[CurrPixX-1][CurrPixY+i][1]; /* sum vert pix */ 
        }
    }
    /* make DC prediction */
    if (predmode[Curr4PixX+1][Curr4PixY] >= 0 && predmode[Curr4PixX][Curr4PixY+1] >= 0) 
    {
        js0=(js1+js2+4)/8; /* no edge */
    }
    if (predmode[Curr4PixX+1][Curr4PixY]< 0 && predmode[Curr4PixX][Curr4PixY+1] >= 0) 
    {
        js0=(js2+2)/4;  /* upper edge */
    }
    if (predmode[Curr4PixX+1][Curr4PixY] >= 0 && predmode[Curr4PixX][Curr4PixY+1] < 0) 
    {
        js0=(js1+2)/4;  /* left edge */
    }
    if (predmode[Curr4PixX+1][Curr4PixY] < 0 && predmode[Curr4PixX][Curr4PixY+1] < 0) 
    {
        js0=128;        /* top left corner */
    }
    
    for (i=0; i < 4; ++i) 
    {
        if (predmode[Curr4PixX+1][Curr4PixY] >= 0) 
        {
            js[i][0]=imgY[CurrPixX+i][CurrPixY-1][1];
        }
        if (predmode[Curr4PixX][Curr4PixY+1] >= 0) 
        {
            js[i][1]=imgY[CurrPixX-1][CurrPixY+i][1];
        }
    }
    for (j=0; j < 4; ++j) /* store DC/vert/hor pred */
    {
        for (i=0; i < 4; ++i) 
        {
            mprr[j][i][0]=js0;                    
            mprr[j][i][1]=js[j][0];
            mprr[j][i][2]=js[i][1];
        }
    }
    
    /*  Prediction according to 'diagonal' modes */
    
    if (predmode[Curr4PixX+1][Curr4PixY] >= 0 && predmode[Curr4PixX][Curr4PixY+1] >= 0) 
    {
        i0=CurrPixX-1;
        j0=CurrPixY-1;
        ia[0]=(imgY[i0][CurrPixY+3][1]+2*imgY[i0][CurrPixY+2][1]+imgY[i0][CurrPixY+1][1]+2)/4;
        ia[1]=(imgY[i0][CurrPixY+2][1]+2*imgY[i0][CurrPixY+1][1]+imgY[i0][CurrPixY+0][1]+2)/4;
        ia[2]=(imgY[i0][CurrPixY+1][1]+2*imgY[i0][CurrPixY+0][1]+imgY[i0][CurrPixY-1][1]+2)/4;
        ia[3]=(imgY[i0][CurrPixY+0][1]+2*imgY[i0][CurrPixY-1][1]+imgY[CurrPixX+0][j0][1]+2)/4;
        ia[4]=(imgY[CurrPixX-1][j0][1]+2*imgY[CurrPixX+0][j0][1]+imgY[CurrPixX+1][j0][1]+2)/4;
        ia[5]=(imgY[CurrPixX+0][j0][1]+2*imgY[CurrPixX+1][j0][1]+imgY[CurrPixX+2][j0][1]+2)/4;
        ia[6]=(imgY[CurrPixX+1][j0][1]+2*imgY[CurrPixX+2][j0][1]+imgY[CurrPixX+3][j0][1]+2)/4;
        ia[7]=(imgY[i0][CurrPixY+1][1]+imgY[CurrPixX+1][j0][1])/2;
        ia[8]=(imgY[i0][CurrPixY+2][1]+imgY[CurrPixX+2][j0][1])/2;
        ia[9]=(imgY[i0][CurrPixY+3][1]+imgY[CurrPixX+3][j0][1])/2;
        ia[10]=ia[11]=ia[12]=ia[13]=ia[9];
        
        for (j=0; j < 4; ++j) /* store diagonal pred */
        {
            for (i=0; i < 4; ++i) 
            {
                mprr[j][i][3]=ia[j-i+3];
                mprr[j][i][4]=ia[j+i+7];
            }
        }
    }
} 

/*## <Sharp Start> ##*/

/************************************************************************
*
*    Routine :  motion_search
*
*    Description:    
*  
*    In this routine motion search (integer+(1/2+1/3+1/6) pel), motion accuracy and 
*    mode selection are performed
*    
*    Since we treat all 4x4 blocks before coding/decoding the prediction may not be based on decoded pixels 
*    (except for some of the blocks). Therefore original pixel data are used for prediction in this assessment 
*    of intra coding.  This will result in too good prediction.  To compensate for this the SAD for intra
*    (min_sad_intra) is given a 'handicap' depending on QP.  
*
************************************************************************/

int motion_search(struct inp_par *inp_par,struct img_par *img,int min_sad_intra)
{
    int predframe_no;
    int numc,j,k,len,i;
    int icorr,i0,j0,i1,i4,j4,j1,i2,j2,ka,i40,na,jj,j40,j44,km,j88,ii,i44,i88,i22,lv,l;
    int khv[4][4][5][9][4][2];  /*## add a new dimension for motion accuracy ##*/ 

    int lh0,lh1,ip0,ip1,ip2,lv0,iz0,lv1,mcr;
    int lh10,lv10,lh11,lv11,lh12,lv12,lh13,lv13;
    int abort_search;

    int ipx[4],ipy[4];
    int iy0,jy0;
    int k0,k1;
    int anchor_lh,anchor_lv;
    int third_pixel;
    
    int checked[11][11];

    int is,isa,isb,isc;
    int is0,is1,is2,is3;          
    int mb_is0,mb_is1,mb_is2,mb_is3;

    int min_sad_inter,iss;
    int mo[16][16];
    int iz[5][2];

    int half0,half1;
    int intp_mb[16][16];
    int iss4b[9];
    int onethird_pos[4][4][5][9][4][2];

    int jn[4];
    int in[4];
    int iii,jjj;
    int kx0,ky0,kk;
    int intp_x0,intp_y0;
    float intp_v;

    /*  Now to inter mode */
    
    min_sad_inter=MAX_VALUE;

    /*########################################################### 
         Step.1 mode decision 
         --------------------
         loop through all reference frames under consideration 
         and all the coding modes, do motion estimation at 1/2 
         pixel accuracy to determine the best coding mode and
         reference number for the current MB 
    ############################################################*/ 
  
    /* Looping through all reference frames under consideration */

    for (k=0; k < mmin(img->pic_no,inp_par->no_multpred); ++k) 
    {
        km=mmin(k,1);
        
        ka=k+1;
        na=(img->np0 + MAX_MULT_PRED -k) % MAX_MULT_PRED;
        
        /*  Looping through all the chosen block sizes: */        
        iz0=1;      
        while (img->mhv[iz0][0]==0 && iz0<=7) /* skip blocksizes not chosen */
            iz0++;

        for (;iz0 <= 7;iz0) 
        {
            iz[1][0]=img->mhv[iz0][0];
            iz[1][1]=img->mhv[iz0][1];
            iz[2][0]=iz[1][0]-1;
            iz[2][1]=iz[1][1]-1;
            iz[4][0]=iz[1][0]/4;
            iz[4][1]=iz[1][1]/4;
            
            icorr=16-iz[1][0];

            /*  mcr is the actual search range used depending on block size and reference frame.  
                It may be reduced by 1/2 or 1/4 for smaller blocks and prediction from older frames 
            */
            mcr=inp_par->mcrange/((km+1)*mmin(2,iz0)); 
            iss=JQ[img->quant][2]*km * 2;
            mb_is0=iss;

            /*  Loop through all block sizes  */            
            for (jj=0; jj <= 15; jj += iz[1][1]) 
            {
                j40=jj/4;
                j4=img->Line4Pix+j40;
                j44=img->LinePix+jj;
                j88=j44*2;          /*## multiply by 2 because of 1/2 pixel up-sampling ##*/ 
                for (ii=0; ii <= 15; ii += iz[1][0]) 
                {
                    i40=ii/4;
                    i4=img->Col4Pix+i40;
                    i44=img->ColPix+ii;
                    i88=i44*2;     /*## multiply by 2 because of 1/2 pixel up-sampling ##*/ 
                    
                    
                    /*####################################################################### 

                        Prediction of motion vectors 
                        ----------------------------

                        1. i0,1,2  and j0,1,2 are the coordinates of vectors to be used 
                           for prediction.

                        2. As in H.263 the prediction is the median of three vectors.

                        3. All the motion vector in iMV are represented as integer of 
                           in 1/6 pixel accuracy .
                           The predicted motion vector is also in 1/6 pixel accuracy.
                           later it is converted to integer and 1/2 pixel accuracy 
                           for bit rate counting.
              
                        4. Special care is taken for the first column pixel to avoid 
                           index pointing outside the range of iMV. 
                           (This is a reported bug. Another way to fix the bug is presented 
                            in the new Telenor h.26L code v1.1.)
  
                    #########################################################################*/

                    
                    if(i4==0)
                    {
                        if(j4==0)
                        {
                           ihv[i40][j40][k][iz0][HALF][0]=0;
                           ihv[i40][j40][k][iz0][HALF][1]=0;
                        }
                        else
                        {
                            i1=i4;
                            j1=j4-1;
                            i2=i4+iz[4][0];
                            j2=j4-1;

                            for (l=0; l <= 1; ++l)
                            {
                                ip0=0;
                                ip1=iMv[i1][j1][l];
                                ip2=iMv[i2][j2][l];
                                ihv[i40][j40][k][iz0][HALF][l]=ip0+ip1+ip2-mmin(mmin(ip0,ip1),ip2)-mmax(mmax(ip0,ip1),ip2);
                            }
                        }
                    }
                    else
                    {
                        i0=i4-1;
                        j0=j4;
                        i1=i4;
                        j1=j4-1;
                        i2=i4+iz[4][0];
                        j2=j4-1;
                   
                        /*  Corrections if we are at the upper edge of the frame (see similar correction in H.263) */
                   
                        if (j4 == 0)
                        {
                            i1=i0;
                            i2=i0;
                            j1=j0;
                            j2=j0;
                        }
                   
                        /*  Correction if a vector to the right is not yet available or to the right of the frame*/

                        if (ii == icorr && jj != 0 || i2 > (img->img_width_cr)/2-1)
                        {
                            i2=i4-1;
                        }

                        /*  Prediction by calculating the median */

                        for (l=0; l <= 1; ++l)
                        {

                            ip0=iMv[i0][j0][l];
                            ip1=iMv[i1][j1][l];
                            ip2=iMv[i2][j2][l];
                            ihv[i40][j40][k][iz0][HALF][l]=ip0+ip1+ip2-mmin(mmin(ip0,ip1),ip2)-mmax(mmax(ip0,ip1),ip2);
                        }
                    }

                    /*## 1/6 pixel accuracy motion vector predictor ##*/
                    ip0=ihv[i40][j40][k][iz0][HALF][0];
                    ip1=ihv[i40][j40][k][iz0][HALF][1];

                    /*## convert to 1/2 pixel accuracy ##*/ 
                    half0 = mround(ip0,Accuracy[HALF]);
                    half1 = mround(ip1,Accuracy[HALF]);

                    /*## Integer pel search.  lh0,lv0 is the 'search center' ##*/
                    
                    lh0=mround(ip0,Accuracy[INTEGER]);
                    lv0=mround(ip1,Accuracy[INTEGER]);
                    
                    /*  Limitation of lh0,lv0 so that the search window contains the (0,0) vector*/
                    
                    lh0=mmax(-(inp_par->mcrange),mmin(inp_par->mcrange,lh0));
                    lv0=mmax(-(inp_par->mcrange),mmin(inp_par->mcrange,lv0));
                    
                    /*  Search center corrected to prevent vectors outsibe the frame.  
                    (This model does not permit vectors outside the picture)    */
                    
                    lh0=mmax(mmin(lh0,img->img_width-1-i44-iz[2][0]-mcr),mcr-i44);
                    lv0=mmax(mmin(lv0,img->img_height1-j44-iz[2][1]-mcr),mcr-j44);
                    

                    /*  mo() is the the original block to be used in the search process */

                    for (i=0; i <= iz[2][0]; ++i)
                    {
                        for (j=0; j <= iz[2][1]; ++j)
                        {
                            mo[i][j]=imgY[i44+i][j44+j][0];
                        }
                    }


                    /*##############################################
                        Integer motion vector estimation 
                    ##############################################*/

                    is0=MAX_VALUE;

                    /* Computing 2nd pow */
                    
                    numc=(mcr*2+1)*(mcr*2+1)-1;
                    
                    for (lv=0; lv <= numc; ++lv) 
                    {
                        lh1=(lh0+mcpos[lv][0])*2;  /*## multiply by 2 ##*/ 
                        lv1=(lv0+mcpos[lv][1])*2;  /*## multiply by 2 ##*/
                        iy0=i88+lh1;
                        jy0=j88+lv1;
                        
                        /*  
                        The initial setting of is reflects the number of bits used to signal the vector.  
                        It also depends on the QP.  (See documented approach of RD constrained motion search) 
                        */
                        
                        /*## convert the integer motion vector to 1/3 accuracy to count motion bits ##*/
                        is=(int)(JQ[img->quant][2]*(float)1.125*(mcbit[abs(lh1-half0)*3/2]+mcbit[abs(lv1-half1)*3/2]));
                        if (lh1 == 0 && lv1 == 0 && iz0 == 1) 
                        {
                            is -= JQ[img->quant][2] * 16;
                        } 

                        abort_search=FALSE;
                        for (j=0; j <= iz[2][1] && !abort_search; ++j) 
                        {
                            j2=jy0+j*2;  /*## multiply by 2 ##*/
                           
                            for (i=0; i <= iz[2][0] ; ++i) 
                            {
                                is += (abs(mo[i][j]-mref[iy0+i*2][j2][na])); /*## multiply by 2 ##*/
                            }
                            if (is >= is0) 
                            {
                                abort_search=TRUE;
                            }
                        }
                        if(!abort_search)
                        {
                            lh10=lh1;
                            lv10=lv1;
                            is0=is;
                        }
                    }

                    mb_is0+=is0;
                    
                    /*#################################################################
                        1/2 pixel motion vector search. 
     
                        The search is over 9 vector positions. 
                        lh10,lv10 is the search center to be used for 1/2 pixel search  
                        It is adjusted to prevent vectors pointing outside the frame
                    ##################################################################*/
                    
                    lh10=mmax(1-i88,mmin(lh10,(img->img_width-1-i44-iz[2][0])*2-1)); /*## multiply by 2 ##*/
                    lv10=mmax(1-j88,mmin(lv10,(img->img_height1-j44-iz[2][1])*2-1)); /*## multiply by 2 ##*/
                    
                    is0=MAX_VALUE;
                    is1=MAX_VALUE;
                    k0=k1=0;
                    for (lv=0; lv <= 8; ++lv)                    /*  7  1  8 */
                    {                                            /*  3  0  4 */
                        lh1=lh10+mcpos[lv][0];                   /*  5  2  6 */
                        lv1=lv10+mcpos[lv][1];
                        iy0=i88+lh1;
                        jy0=j88+lv1;
                        is=(int)(JQ[img->quant][2]*(float)1.125*(mcbit[abs(lh1-half0)]+mcbit[abs(lv1-half1)]));
                        if (lh1 == 0 && lv1 == 0 && iz0 == 1) 
                        {
                            is -= JQ[img->quant][2] *16;
                        }

                        for (i1=0; i1 <= iz[2][0]; i1 += 4) 
                        {
                            for (j1=0; j1 <= iz[2][1]; j1 += 4) 
                            {
                                for (i=0; i <= 3; ++i) 
                                {
                                    i2=i+i1;
                                    i22=iy0+i2*2;    /*## multiply by 2 ##*/
                                    for (j=0; j <= 3; ++j) 
                                    {
                                        j2=j+j1;
                                        m7[i][j]=mo[i2][j2]-mref[i22][jy0+j2*2][na]; /*## multiply by 2 ##*/
                                    }
                                }
                                find_sad(&is,inp_par->hadamard);
                            }
                        }

                        /*## save the motion vector costs for each subblock ##*/
                        iss4b[lv]=is;

                        /*## find the best two candidates ##*/
                        if (is < is0) 
                        {
                            /* iMv() is a 'temporary' assignment ov vectors to be used 
                               to estimate 'bit cost' in vector prediction.
                            */
                            for (i=0; i <= iz[4][0]-1; ++i) 
                            {
                                for (j=0; j <= iz[4][1]-1; ++j) 
                                {
                                    /*## represent 1/2 pixel motion vector in 1/6 pixel
                                         for motion vector prediction               ##*/

                                    khv[i40+i][j40+j][k][iz0][HALF][0]=lh1*Accuracy[HALF];
                                    khv[i40+i][j40+j][k][iz0][HALF][1]=lv1*Accuracy[HALF];
                                    iMv[i4+i][j4+j][0]=lh1*Accuracy[HALF];
                                    iMv[i4+i][j4+j][1]=lv1*Accuracy[HALF];
                                }
                            }

                            /*## save the second best result ##*/
                            k1=k0;
                            is1=is0;

                            /*## save the first best result ##*/
                            k0=lv;
                            is0=is;
                        }
                        else if(is < is1)
                        {
                            /*## save the second best result ##*/
                            k1=lv;
                            is1=is;
                        }
                    }

                    /*## 
                        calculate the 1/3 pixel searching positions 
                        the searching pattern is determined by the position 
                        of the best two 1/2 pixel motion vectors  
                    ##*/

                    anchor_lh=lh10*3/2;
                    anchor_lv=lv10*3/2;

                    if(k0==0)
                    {
                        for(i=0;i<4;i++)
                        {
                           onethird_pos[i40][j40][k][iz0][i][0]=anchor_lh+pattern0[k1-1][i][0];
                           onethird_pos[i40][j40][k][iz0][i][1]=anchor_lv+pattern0[k1-1][i][1];
                        }
                    }
                    else if(k0<=4)
                    {
                        int p0;

                        if(iss4b[neighbor[k0-1][0]]<iss4b[neighbor[k0-1][1]])
                        {
                           p0=2*(k0-1);
                        }
                        else
                        {
                           p0=2*(k0-1)+1;
                        }

                        for(i=0;i<4;i++)
                        {
                            onethird_pos[i40][j40][k][iz0][i][0]=anchor_lh+pattern1[p0][i][0];
                            onethird_pos[i40][j40][k][iz0][i][1]=anchor_lv+pattern1[p0][i][1];
                        }

                    }
                    else
                    {
                        for(i=0;i<4;i++)
                        {
                            onethird_pos[i40][j40][k][iz0][i][0]=anchor_lh+pattern2[k0-5][i][0];
                            onethird_pos[i40][j40][k][iz0][i][1]=anchor_lv+pattern2[k0-5][i][1];
                        }
                    }

                    iss += is0;
               }
            }

            /*  
                Here we update which is the best inter mode. At the end we have the best inter mode.
                predframe_no:  which reference frame to use for prediction
                img->multframe_no:  Index in the mref[] matrix to use for prediction 
            */
            if (iss <= min_sad_inter) 
            {
                img->mb_mode=iz0;
                img->izh=img->mhv[iz0][0];
                img->izv=img->mhv[iz0][1];
                predframe_no=k;
                img->multframe_no=na;
                min_sad_inter=iss;
            }
            while (img->mhv[++iz0][0]==0 && iz0<=7); /* only go through chosen blocksizes */
        }
    }
    
    /*################################################################
       Step.2 motion accuracy selection  

              select the best motion accuracy for given coding 
              mode and reference frame. 
    ################################################################*/
 
    /*## get the predition frame number, coding mode , etc. ##*/
    k=predframe_no;
    na=img->multframe_no;
    iz0=img->mb_mode;

    km=mmin(k,1);

    iz[1][0]=img->mhv[iz0][0];
    iz[1][1]=img->mhv[iz0][1];
    iz[2][0]=iz[1][0]-1;
    iz[2][1]=iz[1][1]-1;
    iz[4][0]=iz[1][0]/4;
    iz[4][1]=iz[1][1]/4;
    icorr=16-iz[1][0];

    /*################################################################
      Step 2.1 recaulate the r-d cost of best 1/2 pixel motion 
               vector using on-line cubic interpolated compensation 
               value
    ################################################################*/
 
    /*## count the motion accuracy bit : 1/2 pixel 2 bits ##*/
    mb_is1=JQ[img->quant][2]*km*2+(int)(JQ[img->quant][2]*(float)1.125*(2));

    /*  Loop through all block sizes  */
    for (jj=0; jj <= 15; jj += iz[1][1])
    {
        j40=jj/4;
        j4=img->Line4Pix+j40;
        j44=img->LinePix+jj;
        j88=j44*6;

        for (ii=0; ii <= 15; ii += iz[1][0])
        {
            i40=ii/4;
            i4=img->Col4Pix+i40;
            i44=img->ColPix+ii;
            i88=i44*6;

            total_search_position++;

            /*  mo() is the the original block to be used in the search process */

            for (i=0; i <= iz[2][0]; ++i)
            {
                for (j=0; j <= iz[2][1]; ++j)
                {
                     mo[i][j]=imgY[i44+i][j44+j][0];
                }
            }

            /*## get the best 1/2 pixel motion vector ##*/
            lh10=khv[i40][j40][k][iz0][HALF][0];
            lv10=khv[i40][j40][k][iz0][HALF][1];

            /*## get 1/6 pixel accuracy motion vector predictor ##*/
            ip0=ihv[i40][j40][k][iz0][HALF][0];
            ip1=ihv[i40][j40][k][iz0][HALF][1];

            /*## 1/2 pixel accuracy motion vector predictor ##*/
            half0 = mround(ip0,Accuracy[HALF]);
            half1 = mround(ip1,Accuracy[HALF]);

            is=(int)(JQ[img->quant][2]*(float)1.125*(mcbit[abs(lh10-half0)]+mcbit[abs(lv10-half1)]));
            if (lh10 == 0 && lv10 == 0 && iz0 == 1)
            {
                is -= JQ[img->quant][2] *16;
            }

            /*## do on-line 1/2 pixel cubic interpolation ##*/

            intp_x0=(i88+lh10)/6;
            intp_y0=(j88+lv10)/6;
            kx0=(i88+lh10)%6;
            ky0=(j88+lv10)%6;

            if(kx0==0 && ky0==0)
            {
                for (i1=0; i1 <= iz[2][0]; i1 ++)
                {
                     for (j1=0; j1 <= iz[2][1]; j1 ++)
                     {
                          mref_mb[ii+i1][jj+j1][HALF]=mref[(intp_x0+i1)*2][(intp_y0+j1)*2][na];
                     }
                }
            }
            else
            {
                for (i1=0; i1 <= iz[2][0]; i1 ++)
                {
                     in[1]=intp_x0+i1;
                     in[0]=mmax(0,in[1]-1);
                     in[2]=mmin(img->img_width-1,in[1]+1);
                     in[3]=mmin(img->img_width-1,in[1]+2);

                     for (j1=0; j1 <= iz[2][1]; j1 ++)
                     {
                          jn[1]=intp_y0+j1;
                          jn[0]=mmax(0,jn[1]-1);
                          jn[2]=mmin(img->img_height-1,jn[1]+1);
                          jn[3]=mmin(img->img_height-1,jn[1]+2);

                          intp_v=0.0;
                          for (iii=0; iii <= 3; ++iii)
                          {
                               for (jjj=0; jjj <= 3; ++jjj)
                               {
                                   intp_v+=mref[in[iii]*2][jn[jjj]*2][na]*LF0[iii][kx0]*LF0[jjj][ky0];
                               }
                          }

                          mref_mb[ii+i1][jj+j1][HALF]=mmax(0,mmin((int)(intp_v+0.5),255));
                     }
                }
            }

            for (i1=0; i1 <= iz[2][0]; i1 += 4)
            {
                 for (j1=0; j1 <= iz[2][1]; j1 += 4)
                 {
                      for (i=0; i <= 3; ++i)
                      {
                          i2=i+i1;
                          for (j=0; j <= 3; ++j)
                          {
                               j2=j+j1;

                               m7[i][j]=mo[i2][j2]-mref_mb[ii+i2][jj+j2][HALF];
                          }
                      }

                      find_sad(&is,inp_par->hadamard);
                 }
            }

            mb_is1+=is;
        }
    }

    /*################################################################
      Step 2.2  1/3 and 1/6 pixel accuracy motion vector search
    ################################################################*/
 
    /*## count motion accuracy bits : 1/3 pixel 1 bit , 1/6 pixel 2 bits ##*/
    mb_is2=JQ[img->quant][2]*km*2+(int)(JQ[img->quant][2]*(float)1.125*(1));
    mb_is3=JQ[img->quant][2]*km*2+(int)(JQ[img->quant][2]*(float)1.125*(2));

    /*  Loop through all block sizes  */
    for (jj=0; jj <= 15; jj += iz[1][1])
    {
        j40=jj/4;
        j4=img->Line4Pix+j40;
        j44=img->LinePix+jj;
        j88=j44*6;

        for (ii=0; ii <= 15; ii += iz[1][0])
        {
            i40=ii/4;
            i4=img->Col4Pix+i40;
            i44=img->ColPix+ii;
            i88=i44*6;

            total_blocks++;

            /*## prediction of motion vectors at 1/3 and 1/6 pixel accuracies ##*/

            if(i4==0)
            {
                if(j4==0)
                {
                     for(i=2;i<=3;i++)
                     {
                         ihv[i40][j40][k][iz0][i][0]=0;
                         ihv[i40][j40][k][iz0][i][1]=0;
                         ipx[i]=ihv[i40][j40][k][iz0][i][0];
                         ipy[i]=ihv[i40][j40][k][iz0][i][1];
                     }
                }
                else
                {
                     i1=i4;
                     j1=j4-1;
                     i2=i4+iz[4][0];
                     j2=j4-1;

                     for (i=2; i<=3; i++) /* for each accuracy mode */
                     {
                          int b4i,b4j;
                          int pv0[2],pv1[2],pv2[2];

                          pv0[0]=0;
                          pv0[1]=0;

                          b4i=i1-img->Col4Pix;
                          b4j=j1-img->Line4Pix;
                          if(b4i>=0 && b4i<=3 && b4j>=0 && b4j<=3)/* within the MB */
                          {
                              pv1[0]= khv[b4i][b4j][k][iz0][i][0];
                              pv1[1]= khv[b4i][b4j][k][iz0][i][1];
                          }
                          else
                          {
                              pv1[0]=iMv[i1][j1][0];
                              pv1[1]=iMv[i1][j1][1];
                          }

                          b4i=i2-img->Col4Pix;
                          b4j=j2-img->Line4Pix;
                          if(b4i>=0 && b4i<=3 && b4j>=0 && b4j<=3)/* within the MB */
                          {
                              pv2[0]= khv[b4i][b4j][k][iz0][i][0];
                              pv2[1]= khv[b4i][b4j][k][iz0][i][1];
                          }
                          else
                          {
                              pv2[0]=iMv[i2][j2][0];
                              pv2[1]=iMv[i2][j2][1];
                          }

                          ihv[i40][j40][k][iz0][i][0]=pv0[0]+pv1[0]+pv2[0]-mmin(mmin(pv0[0],pv1[0]),pv2[0])-mmax(mmax(pv0[0],pv1[0]),pv2[0]);
                          ihv[i40][j40][k][iz0][i][1]=pv0[1]+pv1[1]+pv2[1]-mmin(mmin(pv0[1],pv1[1]),pv2[1])-mmax(mmax(pv0[1],pv1[1]),pv2[1]);
                          ipx[i]=ihv[i40][j40][k][iz0][i][0];
                          ipy[i]=ihv[i40][j40][k][iz0][i][1];
                     }
                }
            }
            else
            {
                i0=i4-1;
                j0=j4;
                i1=i4;
                j1=j4-1;
                i2=i4+iz[4][0];
                j2=j4-1;

                /*  Corrections if we are at the upper edge of the frame (see similar correction in H.263) */

                if (j4 == 0)
                {
                     i1=i0;
                     i2=i0;
                     j1=j0;
                     j2=j0;
                }

                /*  Correction if a vector to the right is not yet available or to the
right of the frame*/

                if (ii == icorr && jj != 0 || i2 > (img->img_width_cr)/2-1)
                {
                     i2=i4-1;
                }

                /*  Prediction by calculating the median */

                for (i=2; i<=3; i++) /* for each accuracy mode */
                {
                     int b4i,b4j;
                     int pv0[2],pv1[2],pv2[2];

                     b4i=i0-img->Col4Pix;
                     b4j=j0-img->Line4Pix;
                     if(b4i>=0 && b4i<=3 && b4j>=0 && b4j<=3) /* within the MB */
                     {
                           pv0[0]= khv[b4i][b4j][k][iz0][i][0];
                           pv0[1]= khv[b4i][b4j][k][iz0][i][1];
                     }
                     else
                     {
                           pv0[0]=iMv[i0][j0][0];
                           pv0[1]=iMv[i0][j0][1];
                     }

                     b4i=i1-img->Col4Pix;
                     b4j=j1-img->Line4Pix;
                     if(b4i>=0 && b4i<=3 && b4j>=0 && b4j<=3) /* within the MB */
                     {
                           pv1[0]= khv[b4i][b4j][k][iz0][i][0];
                           pv1[1]= khv[b4i][b4j][k][iz0][i][1];
                     }
                     else
                     {
                           pv1[0]=iMv[i1][j1][0];
                           pv1[1]=iMv[i1][j1][1];
                     }

                     b4i=i2-img->Col4Pix;
                     b4j=j2-img->Line4Pix;
                     if(b4i>=0 && b4i<=3 && b4j>=0 && b4j<=3) /* within the MB */
                     {
                           pv2[0]= khv[b4i][b4j][k][iz0][i][0];
                           pv2[1]= khv[b4i][b4j][k][iz0][i][1];
                     }
                     else
                     {
                           pv2[0]=iMv[i2][j2][0];
                           pv2[1]=iMv[i2][j2][1];
                     }

                     ihv[i40][j40][k][iz0][i][0]=pv0[0]+pv1[0]+pv2[0]-mmin(mmin(pv0[0],pv1[0]),pv2[0])-mmax(mmax(pv0[0],pv1[0]),pv2[0]);
                     ihv[i40][j40][k][iz0][i][1]=pv0[1]+pv1[1]+pv2[1]-mmin(mmin(pv0[1],pv1[1]),pv2[1])-mmax(mmax(pv0[1],pv1[1]),pv2[1]);
                     ipx[i]=ihv[i40][j40][k][iz0][i][0];
                     ipy[i]=ihv[i40][j40][k][iz0][i][1];
                }
            }

            /*  mo() is the the original block to be used in the search process */

            for (i=0; i <= iz[2][0]; ++i)
            {
                for (j=0; j <= iz[2][1]; ++j)
                {
                     mo[i][j]=imgY[i44+i][j44+j][0];
                }
            }

            /*###########################################
                 1/3 pixel motion vector search

                 search over the positions determined 
                 by the searching pattern
            ############################################*/

            /*## clean the checked postion ##*/

            for (i=0; i<11; i++)
            {
                for(j=0; j<11; j++)
                {
                     checked[i][j]=0;
                }
            }

            checked[5][5]=1;

            /*## get 1/2 pixel motion vector 
                 as the search center ##*/

            lh11=khv[i40][j40][k][iz0][HALF][0];
            lv11=khv[i40][j40][k][iz0][HALF][1];

            is2=MAX_VALUE;
            is3=MAX_VALUE;
            for (lv=0; lv <= 3; lv++)
            {
                int dv1,dv2;

                lh1=onethird_pos[i40][j40][k][iz0][lv][0]*2;
                lv1=onethird_pos[i40][j40][k][iz0][lv][1]*2;
                iy0=i88+lh1;
                jy0=j88+lv1;

                /*## check the boundary ##*/
                if(iy0 < 0 || jy0 < 0)
                    continue;
                if(iy0>img->img_width*6-iz[1][0] || jy0>img->img_height*6-iz[1][1])
                    continue;

                /*## label the checked postion ##*/
                checked[lh1-lh11+5][lv1-lv11+5]=1;

                dv1=mround(lh1,Accuracy[ONE_THIRD])-mround(ipx[ONE_THIRD],Accuracy[ONE_THIRD]);
                dv2=mround(lv1,Accuracy[ONE_THIRD])-mround(ipy[ONE_THIRD],Accuracy[ONE_THIRD]);
                isb=(int)(JQ[img->quant][2]*(float)1.125*(mcbit[abs(dv1)]+mcbit[abs(dv2)]));
                if (lh1 == 0 && lv1 == 0 && iz0 == 1)
                {
                     isb -= JQ[img->quant][2] *16;
                }

                /*## do on-line cubic interpolation for 1/3 pixel position ##*/
                intp_x0=iy0/6;
                intp_y0=jy0/6;
                kx0=iy0%6;
                ky0=jy0%6;

                if(kx0==0 && ky0==0)
                {
                    for (i1=0; i1 <= iz[2][0]; i1 ++)
                    {
                        for (j1=0; j1 <= iz[2][1]; j1 ++)
                        {
                            intp_mb[ii+i1][jj+j1]=mref[(intp_x0+i1)*2][(intp_y0+j1)*2][na]; 
                        }
                    }
                }
                else
                {
                    for (i1=0; i1 <= iz[2][0]; i1 ++)
                    {
                        in[1]=intp_x0+i1;
                        in[0]=mmax(0,in[1]-1);
                        in[2]=mmin(img->img_width-1,in[1]+1);
                        in[3]=mmin(img->img_width-1,in[1]+2);
 
                        for (j1=0; j1 <= iz[2][1]; j1 ++)
                        {
                            jn[1]=intp_y0+j1;
                            jn[0]=mmax(0,jn[1]-1);
                            jn[2]=mmin(img->img_height-1,jn[1]+1);
                            jn[3]=mmin(img->img_height-1,jn[1]+2);

                            intp_v=0.0;
                            for (iii=0; iii <= 3; ++iii)
                            {
                                for (jjj=0; jjj <= 3; ++jjj)
                                {
                                    intp_v+=mref[in[iii]*2][jn[jjj]*2][na]*LF0[iii][kx0]*LF0[jjj][ky0];
                                }
                            }

                            intp_mb[ii+i1][jj+j1]=mmax(0,mmin((int)(intp_v+0.5),255));
                        }
                    }
                }

                for (i1=0; i1 <= iz[2][0]; i1 += 4)
                {
                    for (j1=0; j1 <= iz[2][1]; j1 += 4)
                    {
                        for (i=0; i <= 3; ++i)
                        {
                            i2=i+i1;
                            for (j=0; j <= 3; ++j)
                            {
                                j2=j+j1;

                                m7[i][j]=mo[i2][j2]-intp_mb[ii+i2][jj+j2];
                            }
                        }

                        find_sad(&isb,inp_par->hadamard);
                    }
                }

                if (isb < is2)
                {
                    /*##  save 1/3 pixel motion vectores ##*/
                    for (i=0; i <= iz[4][0]-1; ++i)
                    {
                        for (j=0; j <= iz[4][1]-1; ++j)
                        {
                            khv[i40+i][j40+j][k][iz0][ONE_THIRD][0]=lh1;
                            khv[i40+i][j40+j][k][iz0][ONE_THIRD][1]=lv1;
                        }
                    }

                    /*## save the motion compensation block ##*/
                    for (i1=0; i1 <= iz[2][0]; i1 ++)
                    {
                        for (j1=0; j1 <= iz[2][1]; j1 ++)
                        {
                            mref_mb[ii+i1][jj+j1][ONE_THIRD]=intp_mb[ii+i1][jj+j1];
                        }
                    }

                    is2=isb;

                    lh12=lh1;
                    lv12=lv1;
                }

                total_search_position++;
            }

            /*############################################ 
               1/6 pixel motion search 

               the search is performed over 8 1/6 pixel
               positions surrounding the best 1/3 pixel
               motion vector 
            #############################################*/

            for (lv=1; lv <= 8; ++lv)
            {
                int dv1,dv2;

                lh1=lh12+mcpos[lv][0];
                lv1=lv12+mcpos[lv][1];
                iy0=i88+lh1;
                jy0=j88+lv1;

                /*## check the boundary ##*/
                if(iy0 < 0 || jy0 < 0)
                     continue;
                if(iy0>img->img_width*6-iz[1][0] || jy0>img->img_height*6-iz[1][1])
                     continue;

                /*## if the position has been checked, bypass it ##*/
                if(checked[lh1-lh11+5][lv1-lv11+5]==1)
                     continue;

                isc=(int)(JQ[img->quant][2]*(float)1.125*(mcbit[abs(lh1-ipx[ONE_SIXTH])]+mcbit[abs(lv1-ipy[ONE_SIXTH])]));

                if (lh1 == 0 && lv1 == 0 && iz0 == 1)
                {
                     isc -= JQ[img->quant][2] *16;
                }

                /*## do on-line cubic interpolation for 1/6 pixel positions ##*/
                intp_x0=iy0/6;
                intp_y0=jy0/6;
                kx0=iy0%6;
                ky0=jy0%6;

                if(kx0==0 && ky0==0)
                {
                     for (i1=0; i1 <= iz[2][0]; i1 ++)
                     {
                          for (j1=0; j1 <= iz[2][1]; j1 ++)
                          {
                               intp_mb[ii+i1][jj+j1]=mref[(intp_x0+i1)*2][(intp_y0+j1)*2][na];
                          }
                     }
                }
                else
                {
                     for (i1=0; i1 <= iz[2][0]; i1 ++)
                     {
                          in[1]=intp_x0+i1;
                          in[0]=mmax(0,in[1]-1);
                          in[2]=mmin(img->img_width-1,in[1]+1);
                          in[3]=mmin(img->img_width-1,in[1]+2);

                          for (j1=0; j1 <= iz[2][1]; j1 ++)
                          {
                               jn[1]=intp_y0+j1;
                               jn[0]=mmax(0,jn[1]-1);
                               jn[2]=mmin(img->img_height-1,jn[1]+1);
                               jn[3]=mmin(img->img_height-1,jn[1]+2);

                               intp_v=0.0;
                               for (iii=0; iii <= 3; ++iii)
                               {
                                    for (jjj=0; jjj <= 3; ++jjj)
                                    {
                                        intp_v+=mref[in[iii]*2][jn[jjj]*2][na]*LF0[iii][kx0]*LF0[jjj][ky0];
                                    }
                               }

                               intp_mb[ii+i1][jj+j1]=mmax(0,mmin((int)(intp_v+0.5),255));
                          }
                     }
                }

                for (i1=0; i1 <= iz[2][0]; i1 += 4)
                {
                     for (j1=0; j1 <= iz[2][1]; j1 += 4)
                     {
                          for (i=0; i <= 3; ++i)
                          {
                               i2=i+i1;
                               i22=iy0+i2*6;
                               for (j=0; j <= 3; ++j)
                               {
                                    j2=j+j1;
                                    m7[i][j]=mo[i2][j2]-intp_mb[ii+i2][jj+j2];
                               }
                          }
                          find_sad(&isc,inp_par->hadamard);
                     }
                }

                if (isc < is3)
                {
                     /*##  save 1/6 pixel motion vectores ##*/
                     for (i=0; i <= iz[4][0]-1; ++i)
                     {
                          for (j=0; j <= iz[4][1]-1; ++j)
                          {
                               khv[i40+i][j40+j][k][iz0][ONE_SIXTH][0]=lh1;
                               khv[i40+i][j40+j][k][iz0][ONE_SIXTH][1]=lv1;
                          }
                     }

                     /*## save the motion compensation block ##*/
                     for (i1=0; i1 <= iz[2][0]; i1 ++)
                     {
                          for (j1=0; j1 <= iz[2][1]; j1 ++)
                          {
                               mref_mb[ii+i1][jj+j1][ONE_SIXTH]=intp_mb[ii+i1][jj+j1];
                          }
                     }

                     is3=isc;
                }

                total_search_position ++;
            }

            mb_is2+=is2;
            mb_is3+=is3;
        }
    }

    /*######################################################
      Step 2.3 select the best motion accuracy 
    ######################################################*/
 
    if(mb_is1<=mb_is2 && mb_is1<=mb_is3)
    {
        min_sad_inter=mb_is1;
        img->mb_accuracy=HALF;
    }
    else if(mb_is2<=mb_is3)
    {
        min_sad_inter=mb_is2;
        img->mb_accuracy=ONE_THIRD;
    }
    else
    {
        min_sad_inter=mb_is3;
        img->mb_accuracy=ONE_SIXTH;
    }


    /*######################################################
      Step 3. choose INTRA or INTER mode  
    ######################################################*/
 
    /*  
    min_sad_intra is now the minimum SAD for intra.  min_sad_inter is the best SAD for inter (see above).  
    Inter/intra is determined depending on which is smallest 
    */
    
    if (min_sad_intra < min_sad_inter) 
    {
        img->mb_mode=INTRA_MB;
        for (k=0; k < 2; ++k) 
        {
            for (i=0; i < 4; ++i) 
            {
                for (j=0; j < 4; ++j) 
                {
                    iMv[img->Col4Pix+i][img->Line4Pix+j][k]=0;
                }
            }
        }
        return predframe_no;
    } 
    else 
    {
        /* get the estimated motion vector */
        for (len=0; len < 2; ++len) 
        {
            for (i=0; i < 4; ++i) 
            {
                for (j=0; j < 4; ++j) 
                {
                    iMv[img->Col4Pix+i][img->Line4Pix+j][len]=khv[i][j][predframe_no][img->mb_mode][img->mb_accuracy][len];
                }
            }
        }
    }

    /*## collect some statistics ##*/
    img->accuracy_count[img->mb_accuracy]++;

    return predframe_no;
}
 
/*## <Sharp End> ##*/

/************************************************************************
*
*  Routine      find_sad
*
*  Description: If Hadamard=0 normal SAD is calculated. 
*               If Hadamard=1 4x4 Hadamard transform is performed and SAD of transform 
*               coefficients is calculated
*
*                    
************************************************************************/
void find_sad(int *is,int hadamard)
{

    int i,j,m1[4][4],is0;
    
    if (abs(hadamard) == 1) 
    {
        is0=0;
        for (j=0; j < 4; j++) 
        {
            m1[0][j]=m7[0][j]+m7[3][j];
            m1[1][j]=m7[1][j]+m7[2][j];
            m1[2][j]=m7[1][j]-m7[2][j];
            m1[3][j]=m7[0][j]-m7[3][j];
            
            m7[0][j]=m1[0][j]+m1[1][j];
            m7[2][j]=m1[0][j]-m1[1][j];
            m7[1][j]=m1[2][j]+m1[3][j];
            m7[3][j]=m1[3][j]-m1[2][j];
        }
        for (i=0; i < 4; i++) 
        {
            m1[i][0]=m7[i][0]+m7[i][3];
            m1[i][1]=m7[i][1]+m7[i][2];
            m1[i][2]=m7[i][1]-m7[i][2];
            m1[i][3]=m7[i][0]-m7[i][3];
            
            m7[i][0]=m1[i][0]+m1[i][1];
            m7[i][1]=m1[i][0]-m1[i][1];
            m7[i][2]=m1[i][2]+m1[i][3];
            m7[i][3]=m1[i][3]-m1[i][2];
            
            for (j=0; j < 4; j++)
                is0 += abs(m7[i][j]);
        }
        /*  
        Calculation of normalized Hadamard transforms would require divison by 4 here.  
        However,we find that divison by 2 is better (assuming we give the same penalty for 
        bituse for Hadamard=0 and 1) 
        */
        *is += is0/2;
    } 
    else if (hadamard == 0)
    {
        for (i=0; i <= 3; ++i) 
        {
            for (j=0; j <= 3; ++j) 
            {
                *is += abs(m7[i][j]);
            }
        }
    }
    else
    {
        printf("Error input parameter 'hadamard', check configuration file\n");
        exit(0);
    }

} 

/************************************************************************
*
*  Routine:     dct_luma
*
*  Description:  
*               The routine performs transform,quantization,inverse transform and writes the result +
*               prediction to the decoded frame for luma. 
*               Parameters:
*               pos_mb1,pos_mb2: Position inside a macro block (0,4,8,12).
*               nonzero: 0 if no coefficients are nonzero.  1 if there are nonzero coefficients.
*               sngl_nonzero: Parameter used to discard single nonzero coefficients (see previous note) 
*
*                    
************************************************************************/
void dct_luma(int pos_mb1,int pos_mb2,int *nonzero,int *sngl_nonzero, struct img_par *img)
{ 
    int sign(int a,int b);
       
    const byte JTAB[16][2] = 
    {
        {3,9},{2,9},{2,9},{1,9},{1,9},{1,9},{0,9},{0,9},{0,9},{0,9},{0,9},{0,9},{0,9},{0,9},{0,9},{0,9}
    };
        
    int i,j,i1,j1,k0,m5[4],m6[4],scan_mode,kk,jq0,pos_block1,pos_block2,js1,ind,jnd,kkk,nul;
        
    if (img->pic_type == INTRA_IMG) 
        jq0=JQQ3;
    else
        jq0=JQQ4;
    
    pos_block1=pos_mb1/4;
    pos_block2=pos_mb2/4;
    
    /*  Horizontal transform */
    for (j=0; j <= 3; ++j) 
    {
        for (i=0; i <= 1; ++i) 
        {
            i1=3-i;
            m5[i]=m7[i][j]+m7[i1][j];
            m5[i1]=m7[i][j]-m7[i1][j];
        }
        m7[0][j]=(m5[0]+m5[1])*13;
        m7[2][j]=(m5[0]-m5[1])*13;
        m7[1][j]=m5[3]*17+m5[2]*7;
        m7[3][j]=m5[3]*7-m5[2]*17;
    }
    
    /*  Vertival transform */
    
    for (i=0; i <= 3; ++i) 
    {
        for (j=0; j <= 1; ++j) 
        {
            j1=3-j;
            m5[j]=m7[i][j]+m7[i][j1];
            m5[j1]=m7[i][j]-m7[i][j1];
        }
        m7[i][0]=(m5[0]+m5[1])*13;
        m7[i][2]=(m5[0]-m5[1])*13;
        m7[i][1]=m5[3]*17+m5[2]*7;
        m7[i][3]=m5[3]*7-m5[2]*17;
    }
    
    /* quant */
    
    js1=img->quant;
    *nonzero=0;
    
    if (img->mb_mode == INTRA_MB && img->quant < 24) 
        scan_mode=1; /* double scan */
    else 
        scan_mode=0; /* single scan */
    
    for (kkk=0; kkk <= scan_mode; ++kkk) 
    {
        nul=-1;
        jnd=kkk*9;
        for (kk=0; kk <= 15/(scan_mode+1); ++kk) 
        {
            if (scan_mode == 0) 
            {
                i=NSCAN1[kk][0];
                j=NSCAN1[kk][1];
            } 
            else 
            {
                i=NSCAN2[kk][0][kkk];
                j=NSCAN2[kk][1][kkk];
            }
            ++nul;
            k0=0;
            ind=0;
            
            ind=(abs(m7[i][j])*JQ[js1][0]+jq0)/JQQ1;
            if (ind != 0) 
            {
                *nonzero=1;
                *sngl_nonzero += JTAB[nul][mmin(2,ind)-1];
                kof[pos_block1][pos_block2][jnd][0][scan_mode]=sign(ind,m7[i][j]);
                kof[pos_block1][pos_block2][jnd][1][scan_mode]=nul;
                ++jnd;
                nul=-1;
                k0=ind*JQ[js1][1];
            }
            m7[i][j]=sign(k0,m7[i][j]);
        }
        kof[pos_block1][pos_block2][jnd][0][scan_mode]=0;
    }
    
    /*     IDCT. */
    /*     horizontal */
    
    for (j=0; j <= 3; ++j) 
    {
        for (i=0; i <= 3; ++i) 
        {
            m5[i]=m7[i][j];
        }
        m6[0]=(m5[0]+m5[2])*13;
        m6[1]=(m5[0]-m5[2])*13;
        m6[2]=m5[1]*7-m5[3]*17;
        m6[3]=m5[1]*17+m5[3]*7;
        
        for (i=0; i <= 1; ++i) 
        {
            i1=3-i;
            m7[i][j]=m6[i]+m6[i1];
            m7[i1][j]=m6[i]-m6[i1];
        }
    }
    
    /*  vertical */
    
    for (i=0; i <= 3; ++i) 
    {
        for (j=0; j <= 3; ++j) 
        {
            m5[j]=m7[i][j];
        }
        m6[0]=(m5[0]+m5[2])*13;
        m6[1]=(m5[0]-m5[2])*13;
        m6[2]=m5[1]*7-m5[3]*17;
        m6[3]=m5[1]*17+m5[3]*7;
        
        for (j=0; j <= 1; ++j) 
        {
            j1=3-j;
            
            m7[i][j] =mmin(255,mmax(0,(m6[j]+m6[j1]+mpr[i][j] *JQQ1+JQQ2)/JQQ1));
            m7[i][j1]=mmin(255,mmax(0,(m6[j]-m6[j1]+mpr[i][j1]*JQQ1+JQQ2)/JQQ1));
        }
    }
    
    /*  Decoded block moved to frame memory */    
    for (j=0; j <= 3; ++j) 
    {
        for (i=0; i <= 3; ++i) 
        {
            imgY[img->ColPix+pos_mb1+i][img->LinePix+pos_mb2+j][1]=m7[i][j];
        }
    }
}
 
/************************************************************************
*
*  Routine :  dct_chroma
*
*  Description:    
*            Transform,quantization,inverse transform for chroma.  
*            The main reason why this is done in a separate routine is the 
*            additional 2x2 transform of DC-coeffs.
*  
*
************************************************************************/

void dct_chroma(int uv,int *i11,struct img_par *img) /* i11 both in/out */
{
    const int JCHROMA[32]=
    {
        0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,
        17,17,18,19,20,20,21,22,22,23,23,24,24,25,25
    }; 
    
    int i,j,i1,j1,k0,m1[4],n2,n1,m5[4],m6[4],ii,jj,
        kk,jq0,pos_block1,pos_block2,js1,ind,jnd,nul;    
    
    if (img->pic_type == INTRA_IMG) 
        jq0=JQQ3;
    else
        jq0=JQQ4;
    
    for (n2=0; n2 <= 4; n2 += 4) 
    {
        for (n1=0; n1 <= 4; n1 += 4) 
        {
            
            /*  Horizontal transform. */
            for (j=0; j <= 3; ++j) 
            {
                jj=n2+j;
                for (i=0; i <= 1; ++i) 
                {
                    i1=3-i;
                    m5[i]=m7[i+n1][jj]+m7[i1+n1][jj];
                    m5[i1]=m7[i+n1][jj]-m7[i1+n1][jj];
                }
                m7[n1][jj]=(m5[0]+m5[1])*13;
                m7[n1+2][jj]=(m5[0]-m5[1])*13;
                m7[n1+1][jj]=m5[3]*17+m5[2]*7;
                m7[n1+3][jj]=m5[3]*7-m5[2]*17;
            }
            
            /*  Vertical transform. */
            
            for (i=0; i <= 3; ++i) 
            {
                ii=n1+i;
                for (j=0; j <= 1; ++j) 
                {
                    j1=3-j;
                    m5[j]=m7[ii][n2+j]+m7[ii][n2+j1];
                    m5[j1]=m7[ii][n2+j]-m7[ii][n2+j1];
                }
                m7[ii][n2+0]=(m5[0]+m5[1])*13;
                m7[ii][n2+2]=(m5[0]-m5[1])*13;
                m7[ii][n2+1]=m5[3]*17+m5[2]*7;
                m7[ii][n2+3]=m5[3]*7-m5[2]*17;
            }
        }
    }
    
    /*     2X2 transform of DC coeffs. */    
    m1[0]=(m7[0][0]+m7[4][0]+m7[0][4]+m7[4][4])/2;
    m1[1]=(m7[0][0]-m7[4][0]+m7[0][4]-m7[4][4])/2;
    m1[2]=(m7[0][0]+m7[4][0]-m7[0][4]-m7[4][4])/2;
    m1[3]=(m7[0][0]-m7[4][0]-m7[0][4]+m7[4][4])/2;
    
    /*     Quant of 2X2 coeffs.*/
    js1=JCHROMA[img->quant];
    nul=-1;
    jnd=0;
    for (kk=0; kk <= 3; ++kk) 
    {
        ++nul;
        k0=0;
        ind=0;
        ind=(abs(m1[kk])*JQ[js1][0]+jq0)/JQQ1;
        if (ind != 0) 
        {
            *i11=mmax(1,*i11);
            kofu[jnd][0][uv]=sign(ind,m1[kk]);
            kofu[jnd][1][uv]=nul;
            ++jnd;
            nul=-1;
            k0=ind*JQ[js1][1];
        }
        m1[kk]=sign(k0,m1[kk]);
    }
    kofu[jnd][0][uv]=0;
    
    /*  Invers transform of 2x2 DC coefficients */
    
    m7[0][0]=(m1[0]+m1[1]+m1[2]+m1[3])/2;
    m7[4][0]=(m1[0]-m1[1]+m1[2]-m1[3])/2;
    m7[0][4]=(m1[0]+m1[1]-m1[2]-m1[3])/2;
    m7[4][4]=(m1[0]-m1[1]-m1[2]+m1[3])/2;
    
    /*     Quant of AC-coeffs. */
    
    for (n2=0; n2 <= 4; n2 += 4) 
    {
        for (n1=0; n1 <= 4; n1 += 4) 
        {
            pos_block1=n1/4 + 2*uv;
            pos_block2=n2/4 + 4;
            nul=-1;
            jnd=0;
            for (kk=1; kk <= 15; ++kk) 
            {
                i=NSCAN1[kk][0];
                j=NSCAN1[kk][1];
                ++nul;
                k0=0;
                ind=0;
                
                ind=(abs(m7[n1+i][n2+j])*JQ[js1][0]+jq0)/JQQ1;
                if (ind != 0) 
                {
                    *i11=2;
                    kof[pos_block1][pos_block2][jnd][0][0]=sign(ind,m7[n1+i][n2+j]);
                    kof[pos_block1][pos_block2][jnd][1][0]=nul;
                    ++jnd;
                    nul=-1;
                    k0=ind*JQ[js1][1];
                }
                m7[n1+i][n2+j]=sign(k0,m7[n1+i][n2+j]);
            }
            kof[pos_block1][pos_block2][jnd][0][0]=0;
            
            /*     IDCT. */

            /*     Horizontal. */
            for (j=0; j <= 3; ++j) 
            {
                for (i=0; i <= 3; ++i) 
                {
                    m5[i]=m7[n1+i][n2+j];
                }
                m6[0]=(m5[0]+m5[2])*13;
                m6[1]=(m5[0]-m5[2])*13;
                m6[2]=m5[1]*7-m5[3]*17;
                m6[3]=m5[1]*17+m5[3]*7;
                
                for (i=0; i <= 1; ++i) 
                {
                    i1=3-i;
                    m7[n1+i][n2+j]=m6[i]+m6[i1];
                    m7[n1+i1][n2+j]=m6[i]-m6[i1];
                }
            }
            
            /*     Vertical. */
            for (i=0; i <= 3; ++i) 
            {
                for (j=0; j <= 3; ++j) 
                {
                    m5[j]=m7[n1+i][n2+j];
                }
                m6[0]=(m5[0]+m5[2])*13;
                m6[1]=(m5[0]-m5[2])*13;
                m6[2]=m5[1]*7-m5[3]*17;
                m6[3]=m5[1]*17+m5[3]*7;
                
                for (j=0; j <= 1; ++j) 
                {
                    j1=3-j;
                    m7[n1+i][n2+j]=mmin(255,mmax(0,(m6[j]+m6[j1]+mpr[n1+i][n2+j]*JQQ1+JQQ2)/JQQ1));
                    m7[n1+i][n2+j1]=mmin(255,mmax(0,(m6[j]-m6[j1]+mpr[n1+i][n2+j1]*JQQ1+JQQ2)/JQQ1));
                }
            }
        }
    }
    
    /*  Decoded block moved to memory */
    for (j=0; j <= 7; ++j) 
    {
        for (i=0; i <= 7; ++i) 
        {
            imgUV[img->ColCPix+i][img->LineCPix+j][1][uv]= m7[i][j];
        }
    }
} 

int sign(int a,int b)
{
    int x;
    x=abs(a); 
    if (b >= 0)
        return x;
    else
        return -x;
}
