
/*****************************************************************************/
/*                                                                           */
/*      Copyright (C) 1999 SHARP LABS OF AMERICA All Rights Reserved.        */
/*                                                                           */
/*****************************************************************************/

/* Copyright (C) 1997 by Sharp Laboratories of America, Camas, WA 98607, USA.
   All Rights Reserved.

   The sections in this program that are contained within the headings:  */

   /*## <Sharp Start> ##*/


   /*## <Sharp End> ##*/

/* are copyrighted by Sharp Laboratories of America. It may not be redistributed 
   without the consent of the copyright holders. In no circumstances may the 
   copyright notice be removed. The program may not be used without a written 
   permission of the copyright holders.  The users shall treat the program in 
   confidence. This program is provided as is, without any express or implied 
   warranty, without even the warranty of fitness for a particular purpose.
   */

/*****************************************************************************/

/************************************************************************
*
*  Name :       macroblock.c
*
*  Description: Encodes one macro block
*
************************************************************************/
#include <math.h>

#include "global.h"
#include "macroblock.h"

void macroblock(struct img_par *img,struct inp_par *inp,struct stat_par *stat)
{
    int i,j,k;
    int predframe_no;
    int js[2][2];
    int cbp,CbpMask;
    int i33,jjj,iii,i22,j4;
    int i4,ij4,itel;
    int is0,is,isi;
    int found_predmode;
    int i1,lh,i2,lv,uv,js0,js1,js2,js3,jj,ii,j2,isteph,istepv;
    int len,info;
    int kk,kbeg,kend,level,scan_mode;
    int modi[8];
    int CurrPixY,CurrPixX,Curr4PixY,Curr4PixX;
    int intrap;
    
    int kx0,ky0;
    int ii0,jj0;
    int in[4],jn[4];
    int intp_x0,intp_y0;
    float intp_v;

     /*  
        Set vectors to 0 before doing motion search in motion_search().  
        Here mode selection is done and appropriate motion vectors are found
     */
    for (k=0; k < 2; ++k) 
        for (j=0; j < 4; ++j) 
            for (i=0; i < 4; ++i) 
                iMv[img->Col4Pix+i][img->Line4Pix+j][k]=0;
            
    /*
        do intra coding 
    */ 
    cbp=0;
    isi=JQ[img->quant][2]*24;
    for (jjj=0; jjj < 16; jjj += 8) 
    {
        for (iii=0; iii < 16; iii += 8) 
        {
            CbpMask=(int)pow(2,(iii/8+jjj/4));
            for (j4=jjj; j4 < jjj+8; j4 += 4) 
            {
                CurrPixY=img->LinePix+j4;
                Curr4PixY=CurrPixY/4;
                for (i4=iii; i4 < iii+8; i4 += 4) 
                {
                    CurrPixX=img->ColPix+i4;
                    Curr4PixX=CurrPixX/4;
                    ij4=(int)pow(2,((i4-iii)/4+(j4-jjj)/2));
                    itel=Curr4PixX & 1;
                    /*   intrapred returns all 5 possible intra predictions */
                   
                    intrapred(CurrPixX,CurrPixY);
                    /* 
                        Search for the best out of max. 5 prediction modes.  
                        Notice that some modes are not possible at frame edges
                    */
                    is0=MAX_VALUE;
                    img->mb_mode = INTRA_MB;
                    for (k=0; k <= 4; ++k) 
                    {
                        /* Horizontal pred needs Y pix to pred from, vertical needs X pix, diagonal needs both */
                        if (k==DC_PRED||k==HOR_PRED||predmode[CurrPixX/4+1][CurrPixY/4] >= 0)/* use DC or vert pred if hor edge*/
                        {
                            if (k==DC_PRED||k==VERT_PRED||predmode[CurrPixX/4][CurrPixY/4+1] >= 0)/* use DC or hor pred if vert edge*/
                            {
                                for (j=0; j <= 3; ++j) 
                                {
                                    for (i=0; i <= 3; ++i) 
                                    {
                                        m7[i][j]=imgY[CurrPixX+i][CurrPixY+j][0]-mprr[i][j][k];
                                    }
                                }
                                
                                is=JQ[img->quant][2]*iprang[k+(predmode[Curr4PixX+1][Curr4PixY]+predmode[Curr4PixX][Curr4PixY+1]*6)*5+35];
                                
                                find_sad(&is,inp->hadamard);
                                
                                if (is < is0) 
                                {
                                    is0=is;
                                    found_predmode=k;
                                    for (j=0; j <= 3; ++j) 
                                    {
                                        for (i=0; i <= 3; ++i) 
                                        {
                                            mpr[i][j]=mprr[i][j][k];
                                        }
                                    }
                                }
                            }
                        }
                    }
                    isi=isi+is0;
                    
                    predmode[Curr4PixX+1][Curr4PixY+1]=found_predmode;
                    if (itel == 1) 
                    {
                        modi[i4/8+j4/2]=MODN[intrap+iprang[found_predmode+(predmode[Curr4PixX+1][Curr4PixY] +
                            predmode[Curr4PixX][Curr4PixY+1]*6)*5+35]*5];
                        
                    }
                    intrap=iprang[found_predmode+(predmode[Curr4PixX+1][Curr4PixY] + predmode[Curr4PixX][Curr4PixY+1]*6)*5+35];
                   
                    /*  Make difference from prediction to be transformed*/
                    
                    for (j=0; j <= 3; ++j) 
                    {
                        for (i=0; i <= 3; ++i) 
                        {
                            m7[i][j] =imgY[img->ColPix+i4+i][img->LinePix+j4+j][0] - mpr[i][j];
                        }
                    }
                    dct_luma(i4,j4,&i1,&i22,img);
                    if (i1 != 0) 
                    {
                        cbp=cbp | CbpMask;
                    }
                }
            }
        }
    }
            
            
    if ((img->LineMb == img->LineMb_upd && img->LineMb_upd != img->LineMb_intra) || img->pic_type == INTRA_IMG) 
    {
        img->mb_mode=INTRA_MB;  /* if intra image or intra MB for error robustness */       
    } 
    else /* inter */
    { 
        predframe_no=motion_search(inp,img,isi); 
    }
    if (img->mb_mode!=INTRA_MB)
    {
        cbp=0;
        i33=0;
        for (jjj=0; jjj < 16; jjj += 8) 
        {
            for (iii=0; iii < 16; iii += 8) 
            {
                CbpMask=(int)pow(2,(iii/8+jjj/4));
                i22=0;
                for (j4=jjj; j4 < jjj+8; j4 += 4) 
                {
                    CurrPixY=img->LinePix+j4;
                    Curr4PixY=CurrPixY/4;
                    for (i4=iii; i4 < iii+8; i4 += 4) 
                    {
                        CurrPixX=img->ColPix+i4;
                        Curr4PixX=CurrPixX/4;
                        ij4=(int)pow(2,((i4-iii)/4+(j4-jjj)/2));
                        predmode[Curr4PixX+1][Curr4PixY+1]=0;
 
                        /*## <Sharp Start> ##*/

                        /*## 
                          modified luma motion compensation part.
                          the compensated image block mref_mb[16][16][4] is 
                          calculated during motion estimation.
                        ##*/
 
                        for (j=0; j <= 3; ++j) 
                        {
                            for (i=0; i <= 3; ++i) 
                            {
                                mpr[i][j]=mref_mb[i4+i][j4+j][img->mb_accuracy];
                                m7[i][j] =imgY[img->ColPix+i4+i][img->LinePix+j4+j][0] - mpr[i][j];
                            }
                        }

                        /*## <Sharp End> ##*/

                        dct_luma(i4,j4,&i1,&i22,img);
                        if (i1 != 0) 
                        {
                            cbp=cbp | CbpMask;
                        }
                    }
                }
                
                /* 
                The purpose of the action below is to prevent that single or 'expensive' coefficients are coded.  
                With 4x4 transform there is larger chance that a single coefficient in a 8x8 or 16x16 block may be nonzero.  
                A single small (level=1) coefficient in a 8x8 block will cost in bits: 3 or more bits for the coefficient,
                4 bits for EOBs for the 4x4 blocks,possibly also more bits for CBP.  Hence the total 'cost' of that single 
                coefficient will typically be 10-12 bits which in a RD consideration is too much to justify the Distortion improvement.  
                The action below is to watch such 'single' coefficients and set the reconstructed block equal to the prediction according 
                to a given criterium.  The action is taken only for inter luma blocks.  
                Notice that this is a pure encoder issue and hence does not have any implication on the standard.
                i22 is a parameter set in dct_luma() and accumulated for each 8x8 block.  If level=1 for a coefficient,i22 is increased by a 
                number depending on RUN for that coefficient.  The numbers are (see also dct_luma()): 3,2,2,1,1,1,0,0,... when RUN equals 0,1,2,3,4,5,6, etc.  
                If level >1 i22 is increased by 9 (or any number above 3).  The threshold is set to 3.  This means for example:
                1: If there is one coefficient with (RUN,level)=(0,1) in a 8x8 block this coefficient is discarded.
                2: If there are two coefficients with (RUN,level)=(1,1) and (4,1) the coefficients are also discarded
                i33 is the accumulation of i22 over a whole macro block.  If i33 is 5 or less for the whole MB,all nonzero coefficients are 
                discarded for the MB and the reconstructed block is set equal to the prediction.
                */
                
                if (i22 > 3) 
                {
                    i33 += i22;
                }
                if (i22 <= 3 ) 
                {
                    cbp=cbp & (63-CbpMask);

                    /*## <Sharp Start> ##*/

                    /*## the following lines are modified to use mref_mb[16][16][4] ##*/
                    for (i=iii; i <= iii+7; ++i) 
                    {
                        for (j=jjj; j <= jjj+7; ++j) 
                        {
                            imgY[img->ColPix+i][img->LinePix+j][1]=mref_mb[i][j][img->mb_accuracy];
                        }
                    }

                    /*## <Sharp End> ##*/
                }
            }
        }
        if (i33 <= 5 ) 
        {
            cbp=cbp & 48; /* mask bit 4 and 5 */ 

            /*## <Sharp Start> ##*/

            /*## the following lines are modified to use mref_mb[16][16][4] ##*/
            for (i=0; i < 16; ++i) 
            {
                for (j=0; j < 16; ++j) 
                {
                    imgY[img->ColPix+i][img->LinePix+j][1]=mref_mb[i][j][img->mb_accuracy];
                }
            }

            /*## <Sharp End> ##*/
        }
    }
     
    /* 4X4 chroma blocks */
    
    i1=0;
    for (uv=0; uv <= 1; ++uv) 
    {
        
        /*  
            Intra prediction for chroma.  There is only one prediction mode, corresponding to 'DC prediction' for luma.  
            However,since 2x2 transform of DC levels are used,all predictions are madefrom neighbouring MBs.  
            Prediction also depends on whether the MB is at a frame edge.  Hence all the 'options' with js(0,1,2,3) 
            and depending on img->ColMb and img->LineMb 
        */
        if (img->mb_mode == INTRA_MB) 
        {
            js0=0;
            js1=0;
            js2=0;
            js3=0;
            for (i=0; i <= 3; ++i) 
            {
                if (img->LineMb > 0) /* no vertical edge */
                {
                    js0 += imgUV[img->ColCPix+i][img->LineCPix-1][1][uv];
                    js1 += imgUV[img->ColCPix+i+4][img->LineCPix-1][1][uv];
                }
                if (img->ColMb > 0) /* no horisontal edge */
                {
                    js2 += imgUV[img->ColCPix-1][img->LineCPix+i][1][uv];
                    js3 += imgUV[img->ColCPix-1][img->LineCPix+i+4][1][uv];
                }
            }
            
            if (img->ColMb > 0 && img->LineMb > 0) /* free macro block */
            {
                js[0][0]=(js0+js2+4)/8;
                js[1][0]=(js1+2)/4;
                js[0][1]=(js3+2)/4;
                js[1][1]=(js1+js3+4)/8;
            }
            else if (img->ColMb == 0 && img->LineMb > 0) /* left edge */
            {
                js[0][0]=(js0+2)/4;
                js[1][0]=(js1+2)/4;
                js[0][1]=(js0+2)/4;
                js[1][1]=(js1+2)/4;
            }
            else if (img->ColMb > 0 && img->LineMb == 0) /* upper edge */
            {
                js[0][0]=(js2+2)/4;
                js[1][0]=(js2+2)/4;
                js[0][1]=(js3+2)/4;
                js[1][1]=(js3+2)/4;
            }
            else if (img->ColMb == 0 && img->LineMb == 0) /* top left corner */
            {
                js[0][0]=128;
                js[1][0]=128;
                js[0][1]=128;
                js[1][1]=128;
            }
            for (j=0; j <= 7; ++j) 
            {
                for (i=0; i <= 7; ++i) 
                {
                    mpr[i][j]=js[i/4][j/4];
                    m7[i][j]=imgUV[img->ColCPix+i][img->LineCPix+j][0][uv]-mpr[i][j];
                }
            }
        }
        
        if (img->mb_mode != INTRA_MB) 
        {
            /*## <Sharp Start> ##*/

            /*## 
               on-line interpolation of chroma value using cubic interpolator
               LF0 (defined in macroblock.h).
            ##*/

            for (j=0; j < 8; ++j) 
            {
                Curr4PixY=(img->LineCPix+j)/2;
                for (i=0; i < 8; ++i) 
                {
                    Curr4PixX=(img->ColCPix+i)/2;
                    lh=iMv[Curr4PixX][Curr4PixY][0]/2;
                    lv=iMv[Curr4PixX][Curr4PixY][1]/2;
                    ii=mmin((img->img_width_cr-1)*6,mmax(0,(img->ColCPix+i)*6+lh));
                    jj=mmin((img->img_height_cr-1)*6,mmax(0,(img->LineCPix+j)*6+lv));

                    intp_x0=ii/6;
                    intp_y0=jj/6;
                    kx0=ii%6;
                    ky0=jj%6;

                    if(kx0==0 && ky0==0)
                    {
                         mpr[i][j]=mcef[intp_x0][intp_y0][uv][img->multframe_no];
                    }
                    else
                    {
                         in[1]=intp_x0;
                         in[0]=mmax(0,in[1]-1);
                         in[2]=mmin(img->img_width_cr-1,in[1]+1);
                         in[3]=mmin(img->img_width_cr-1,in[1]+2);

                         jn[1]=intp_y0;
                         jn[0]=mmax(0,jn[1]-1);
                         jn[2]=mmin(img->img_height_cr-1,jn[1]+1);
                         jn[3]=mmin(img->img_height_cr-1,jn[1]+2);

                         intp_v=0.0;
                         for (ii0=0; ii0 <= 3; ++ii0)
                         {
                              for (jj0=0; jj0 <= 3; ++jj0)
                              {
                                   intp_v+=mcef[in[ii0]][jn[jj0]][uv][img->multframe_no]*LF0[ii0][kx0]*LF0[jj0][ky0];
                              }
                         }

                         mpr[i][j]=mmax(0,mmin((int)(intp_v+0.5),255));
                    }

                    m7[i][j]=imgUV[img->ColCPix+i][img->LineCPix+j][0][uv]
                             - mpr[i][j];
                }
            }

            /*## <Sharp End> ##*/

        }
        dct_chroma(uv,&i1,img);
    }
    cbp += i1*16;
    
    /*  Check if a MB is not coded (no coeffs. only 0-vectors and prediction from the most recent frame*/
    
    if (img->mb_mode==M16x16_MB && cbp==0 && iMv[img->Col4Pix][img->Line4Pix][0]==0 && iMv[img->Col4Pix][img->Line4Pix][1]==0 && predframe_no==0) 
    {
        img->mb_mode=COPY_MB;
    }
    
    /*  Bit file prouction (and collectiong statistics) for one macro block */
    /*  Bits for mode.  This does not apply for intra frames*/
    
    if (img->pic_type != INTRA_IMG) 
    {
        /* coding mode bits */
        n_linfo(img->mb_mode,&len,&info);
        put(len,info);
        stat->bit_ctr += len;
        tmp_bit_use[0][img->pic_type] += len;

        /*## <Sharp Start> ##*/

        /*## code motion vector accuracy using 2-1-2 VLC table ##*/

        if(img->mb_mode != COPY_MB && img->mb_mode != INTRA_MB)
        {
           /* 2-1-2 VLC */
           if(img->mb_accuracy==ONE_THIRD)
           {
              n_linfo(0,&len,&info);
              put(len,info);
              stat->bit_ctr += len;
              tmp_bit_use[0][img->pic_type] += len;
           }
           else if(img->mb_accuracy==HALF)
           {  
              n_linfo(1,&len,&info);
              put(2,info);
              stat->bit_ctr += 2;
              tmp_bit_use[0][img->pic_type] += 2;
           }
           else if(img->mb_accuracy==ONE_SIXTH)
           {
              n_linfo(2,&len,&info);
              put(2,info);
              stat->bit_ctr += 2;
              tmp_bit_use[0][img->pic_type] += 2;
           }
           else{
              fprintf(stderr,"error : unknow motion accuracy \n");
              exit(-1);
           }
        }

        /*## <Sharp End> ##*/
    }
    
    /*  Return if mode=0 (just copy last MB) */
    
    if (img->mb_mode == COPY_MB) 
    {
        ++vectmode_use[img->pic_type][img->mb_mode];
        return;
    }
    
    /*  Bits for intra prediction modes*/
    
    if (img->mb_mode == INTRA_MB) 
    {
        for (i=0; i <= 7; ++i) 
        {
            n_linfo(modi[i],&len,&info);
            put(len,info);
            stat->bit_ctr += len;
            tmp_bit_use[10][img->pic_type] += len;
        }
    }
    
    /*  Bits for vector data*/
    
    if (img->mb_mode != INTRA_MB) 
    {
        /*  If multiple prediction mode, write reference frame for the MB */
        if (inp->no_multpred > 1) 
        {
            n_linfo(predframe_no,&len,&info);
            put(len,info);
            stat->bit_ctr += len;
            tmp_bit_use[img->mb_mode][img->pic_type] += len;
        }
        
        isteph=img->izh/4;      /* horizontal stepsize */
        istepv=img->izv/4;      /* vertical stepsize */
        
        for (j=0; j <= 3; j += istepv) 
        {
            
            for (i=0;i <= 3; i += isteph) 
            {
                for (k=0; k <= 1; ++k) 
                {
                    /*## <Sharp Start> ##*/

                    /*## calculate motion vector bits 

                         first,both the motion vector and predicted motion
                         vector are truncted to its accuracy by integer 
                         division.
                         then, the motion vector difference and its bits
                         are calculated
                    ##*/ 

                    int dv1,dv2,denom;

                    denom=Accuracy[img->mb_accuracy];
                    dv1=mround(iMv[img->Col4Pix+i][img->Line4Pix+j][k],denom);
                    dv2=mround(ihv[i][j][predframe_no][img->mb_mode][img->mb_accuracy][k],denom);
                    mvd_linfo(dv1-dv2,&len,&info);

                    /*## <Sharp End> ##*/

                    put(len,info);
                    stat->bit_ctr += len;
                    tmp_bit_use[img->mb_mode][img->pic_type] += len;
                }
            }
        }
    }
    
    /*  Bits for CBP */
    
    n_linfo(NCBP[cbp][1-img->mb_mode/INTRA_MB],&len,&info);
    put(len,info);
    stat->bit_ctr += len;
    tmp_bit_use[INTRA_MB][img->pic_type] += len;
    
    /*  Bits for luma coefficients */
    
    for (j=0; j <= 3; ++j) 
    {
        jj=j/2;
        for (i=0; i <= 3; ++i) 
        {
            ii=i/2;
            if ((cbp & (int)pow(2,ii+jj*2)) != 0) 
            {
                if (img->mb_mode == INTRA_MB && img->quant < 24) 
                {
                    scan_mode=1; /* choose double scan */
                }
                else
                {
                    scan_mode=0; /* choose single scan */
                }
                for (kk=0; kk <= scan_mode; ++kk) 
                {
                    kbeg=kk*9;
                    kend=kbeg+16/(scan_mode+1);
                    level=1;
                    for (k=kbeg; k <= kend && level !=0; ++k) 
                    {
                        level=kof[i][j][k][0][scan_mode];
                        if (scan_mode == 0) /* single scan */
                        {
                            levrun_linfo_inter(level,kof[i][j][k][1][0],&len,&info);
                        }
                        if (scan_mode == 1) /* double scan */
                        {
                            levrun_linfo_intra(level,kof[i][j][k][1][1],&len,&info);
                        }
                        put(len,info);
                        stat->bit_ctr += len;
                        tmp_bit_use[10][img->pic_type] += len;   
                    }
                }
            }
        }
    }
    
    /*  Bits for chroma AC-coeffs. */
    
    if (cbp/16 == 2) /* chech the fifth bit (chroma bit) set in coded block pattern */
    {
        for (j=4; j <= 5; ++j) 
        {
            for (i=0; i <= 3; ++i) 
            {
                level=1;
                for (k=0; k <= 15 && level!=0; ++k) 
                {
                    level=kof[i][j][k][0][0];
                    levrun_linfo_inter(level,kof[i][j][k][1][0],&len,&info);
                    put(len,info);
                    stat->bit_ctr += len;
                    tmp_bit_use[11][img->pic_type] += len;
                }
            }
        }
    }
    
    /*  Bits for chroma 2x2 DC transform coefficients */
    
    if (cbp >= 16) 
    {
        for (uv=0; uv <= 1; ++uv) 
        {
            level=1;/* get inside*/
            for (k=0; k <= 4 && level != 0; ++k) 
            {
                level=kofu[k][0][uv];
                levrun_linfo_c2x2(level,kofu[k][1][uv],&len,&info);
                put(len,info);
                stat->bit_ctr += len;
                tmp_bit_use[11][img->pic_type] += len;
            }
        }
    }
    ++vectmode_use[img->pic_type][img->mb_mode];    /* counter for statistics */
}

