FFmpeg: libavcodec/dsputil.c Source File

00001 /*
00002  * DSP utils
00003  * Copyright (c) 2000, 2001 Fabrice Bellard
00004  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
00005  *
00006  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
00007  *
00008  * This file is part of FFmpeg.
00009  *
00010  * FFmpeg is free software; you can redistribute it and/or
00011  * modify it under the terms of the GNU Lesser General Public
00012  * License as published by the Free Software Foundation; either
00013  * version 2.1 of the License, or (at your option) any later version.
00014  *
00015  * FFmpeg is distributed in the hope that it will be useful,
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018  * Lesser General Public License for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public
00021  * License along with FFmpeg; if not, write to the Free Software
00022  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041 
00042 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00043 uint32_t ff_squareTbl[512] = {0, };
00044 
00045 #define BIT_DEPTH 9
00046 #include "dsputil_template.c"
00047 #undef BIT_DEPTH
00048 
00049 #define BIT_DEPTH 10
00050 #include "dsputil_template.c"
00051 #undef BIT_DEPTH
00052 
00053 #define BIT_DEPTH 8
00054 #include "dsputil_template.c"
00055 
00056 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
00057 #define pb_7f (~0UL/255 * 0x7f)
00058 #define pb_80 (~0UL/255 * 0x80)
00059 
00060 const uint8_t ff_zigzag_direct[64] = {
00061     0,   1,  8, 16,  9,  2,  3, 10,
00062     17, 24, 32, 25, 18, 11,  4,  5,
00063     12, 19, 26, 33, 40, 48, 41, 34,
00064     27, 20, 13,  6,  7, 14, 21, 28,
00065     35, 42, 49, 56, 57, 50, 43, 36,
00066     29, 22, 15, 23, 30, 37, 44, 51,
00067     58, 59, 52, 45, 38, 31, 39, 46,
00068     53, 60, 61, 54, 47, 55, 62, 63
00069 };
00070 
00071 /* Specific zigzag scan for 248 idct. NOTE that unlike the
00072    specification, we interleave the fields */
00073 const uint8_t ff_zigzag248_direct[64] = {
00074      0,  8,  1,  9, 16, 24,  2, 10,
00075     17, 25, 32, 40, 48, 56, 33, 41,
00076     18, 26,  3, 11,  4, 12, 19, 27,
00077     34, 42, 49, 57, 50, 58, 35, 43,
00078     20, 28,  5, 13,  6, 14, 21, 29,
00079     36, 44, 51, 59, 52, 60, 37, 45,
00080     22, 30,  7, 15, 23, 31, 38, 46,
00081     53, 61, 54, 62, 39, 47, 55, 63,
00082 };
00083 
00084 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
00085 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00086 
00087 const uint8_t ff_alternate_horizontal_scan[64] = {
00088     0,  1,   2,  3,  8,  9, 16, 17,
00089     10, 11,  4,  5,  6,  7, 15, 14,
00090     13, 12, 19, 18, 24, 25, 32, 33,
00091     26, 27, 20, 21, 22, 23, 28, 29,
00092     30, 31, 34, 35, 40, 41, 48, 49,
00093     42, 43, 36, 37, 38, 39, 44, 45,
00094     46, 47, 50, 51, 56, 57, 58, 59,
00095     52, 53, 54, 55, 60, 61, 62, 63,
00096 };
00097 
00098 const uint8_t ff_alternate_vertical_scan[64] = {
00099     0,  8,  16, 24,  1,  9,  2, 10,
00100     17, 25, 32, 40, 48, 56, 57, 49,
00101     41, 33, 26, 18,  3, 11,  4, 12,
00102     19, 27, 34, 42, 50, 58, 35, 43,
00103     51, 59, 20, 28,  5, 13,  6, 14,
00104     21, 29, 36, 44, 52, 60, 37, 45,
00105     53, 61, 22, 30,  7, 15, 23, 31,
00106     38, 46, 54, 62, 39, 47, 55, 63,
00107 };
00108 
00109 /* Input permutation for the simple_idct_mmx */
00110 static const uint8_t simple_mmx_permutation[64]={
00111         0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00112         0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00113         0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00114         0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00115         0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00116         0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00117         0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00118         0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00119 };
00120 
00121 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00122 
00123 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00124     int i;
00125     int end;
00126 
00127     st->scantable= src_scantable;
00128 
00129     for(i=0; i<64; i++){
00130         int j;
00131         j = src_scantable[i];
00132         st->permutated[i] = permutation[j];
00133 #if ARCH_PPC
00134         st->inverse[j] = i;
00135 #endif
00136     }
00137 
00138     end=-1;
00139     for(i=0; i<64; i++){
00140         int j;
00141         j = st->permutated[i];
00142         if(j>end) end=j;
00143         st->raster_end[i]= end;
00144     }
00145 }
00146 
00147 static int pix_sum_c(uint8_t * pix, int line_size)
00148 {
00149     int s, i, j;
00150 
00151     s = 0;
00152     for (i = 0; i < 16; i++) {
00153         for (j = 0; j < 16; j += 8) {
00154             s += pix[0];
00155             s += pix[1];
00156             s += pix[2];
00157             s += pix[3];
00158             s += pix[4];
00159             s += pix[5];
00160             s += pix[6];
00161             s += pix[7];
00162             pix += 8;
00163         }
00164         pix += line_size - 16;
00165     }
00166     return s;
00167 }
00168 
00169 static int pix_norm1_c(uint8_t * pix, int line_size)
00170 {
00171     int s, i, j;
00172     uint32_t *sq = ff_squareTbl + 256;
00173 
00174     s = 0;
00175     for (i = 0; i < 16; i++) {
00176         for (j = 0; j < 16; j += 8) {
00177 #if 0
00178             s += sq[pix[0]];
00179             s += sq[pix[1]];
00180             s += sq[pix[2]];
00181             s += sq[pix[3]];
00182             s += sq[pix[4]];
00183             s += sq[pix[5]];
00184             s += sq[pix[6]];
00185             s += sq[pix[7]];
00186 #else
00187 #if LONG_MAX > 2147483647
00188             register uint64_t x=*(uint64_t*)pix;
00189             s += sq[x&0xff];
00190             s += sq[(x>>8)&0xff];
00191             s += sq[(x>>16)&0xff];
00192             s += sq[(x>>24)&0xff];
00193             s += sq[(x>>32)&0xff];
00194             s += sq[(x>>40)&0xff];
00195             s += sq[(x>>48)&0xff];
00196             s += sq[(x>>56)&0xff];
00197 #else
00198             register uint32_t x=*(uint32_t*)pix;
00199             s += sq[x&0xff];
00200             s += sq[(x>>8)&0xff];
00201             s += sq[(x>>16)&0xff];
00202             s += sq[(x>>24)&0xff];
00203             x=*(uint32_t*)(pix+4);
00204             s += sq[x&0xff];
00205             s += sq[(x>>8)&0xff];
00206             s += sq[(x>>16)&0xff];
00207             s += sq[(x>>24)&0xff];
00208 #endif
00209 #endif
00210             pix += 8;
00211         }
00212         pix += line_size - 16;
00213     }
00214     return s;
00215 }
00216 
00217 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00218     int i;
00219 
00220     for(i=0; i+8<=w; i+=8){
00221         dst[i+0]= av_bswap32(src[i+0]);
00222         dst[i+1]= av_bswap32(src[i+1]);
00223         dst[i+2]= av_bswap32(src[i+2]);
00224         dst[i+3]= av_bswap32(src[i+3]);
00225         dst[i+4]= av_bswap32(src[i+4]);
00226         dst[i+5]= av_bswap32(src[i+5]);
00227         dst[i+6]= av_bswap32(src[i+6]);
00228         dst[i+7]= av_bswap32(src[i+7]);
00229     }
00230     for(;i<w; i++){
00231         dst[i+0]= av_bswap32(src[i+0]);
00232     }
00233 }
00234 
00235 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00236 {
00237     while (len--)
00238         *dst++ = av_bswap16(*src++);
00239 }
00240 
00241 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00242 {
00243     int s, i;
00244     uint32_t *sq = ff_squareTbl + 256;
00245 
00246     s = 0;
00247     for (i = 0; i < h; i++) {
00248         s += sq[pix1[0] - pix2[0]];
00249         s += sq[pix1[1] - pix2[1]];
00250         s += sq[pix1[2] - pix2[2]];
00251         s += sq[pix1[3] - pix2[3]];
00252         pix1 += line_size;
00253         pix2 += line_size;
00254     }
00255     return s;
00256 }
00257 
00258 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00259 {
00260     int s, i;
00261     uint32_t *sq = ff_squareTbl + 256;
00262 
00263     s = 0;
00264     for (i = 0; i < h; i++) {
00265         s += sq[pix1[0] - pix2[0]];
00266         s += sq[pix1[1] - pix2[1]];
00267         s += sq[pix1[2] - pix2[2]];
00268         s += sq[pix1[3] - pix2[3]];
00269         s += sq[pix1[4] - pix2[4]];
00270         s += sq[pix1[5] - pix2[5]];
00271         s += sq[pix1[6] - pix2[6]];
00272         s += sq[pix1[7] - pix2[7]];
00273         pix1 += line_size;
00274         pix2 += line_size;
00275     }
00276     return s;
00277 }
00278 
00279 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00280 {
00281     int s, i;
00282     uint32_t *sq = ff_squareTbl + 256;
00283 
00284     s = 0;
00285     for (i = 0; i < h; i++) {
00286         s += sq[pix1[ 0] - pix2[ 0]];
00287         s += sq[pix1[ 1] - pix2[ 1]];
00288         s += sq[pix1[ 2] - pix2[ 2]];
00289         s += sq[pix1[ 3] - pix2[ 3]];
00290         s += sq[pix1[ 4] - pix2[ 4]];
00291         s += sq[pix1[ 5] - pix2[ 5]];
00292         s += sq[pix1[ 6] - pix2[ 6]];
00293         s += sq[pix1[ 7] - pix2[ 7]];
00294         s += sq[pix1[ 8] - pix2[ 8]];
00295         s += sq[pix1[ 9] - pix2[ 9]];
00296         s += sq[pix1[10] - pix2[10]];
00297         s += sq[pix1[11] - pix2[11]];
00298         s += sq[pix1[12] - pix2[12]];
00299         s += sq[pix1[13] - pix2[13]];
00300         s += sq[pix1[14] - pix2[14]];
00301         s += sq[pix1[15] - pix2[15]];
00302 
00303         pix1 += line_size;
00304         pix2 += line_size;
00305     }
00306     return s;
00307 }
00308 
00309 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00310 {
00311     int i;
00312 
00313     /* read the pixels */
00314     for(i=0;i<8;i++) {
00315         block[0] = pixels[0];
00316         block[1] = pixels[1];
00317         block[2] = pixels[2];
00318         block[3] = pixels[3];
00319         block[4] = pixels[4];
00320         block[5] = pixels[5];
00321         block[6] = pixels[6];
00322         block[7] = pixels[7];
00323         pixels += line_size;
00324         block += 8;
00325     }
00326 }
00327 
00328 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00329                           const uint8_t *s2, int stride){
00330     int i;
00331 
00332     /* read the pixels */
00333     for(i=0;i<8;i++) {
00334         block[0] = s1[0] - s2[0];
00335         block[1] = s1[1] - s2[1];
00336         block[2] = s1[2] - s2[2];
00337         block[3] = s1[3] - s2[3];
00338         block[4] = s1[4] - s2[4];
00339         block[5] = s1[5] - s2[5];
00340         block[6] = s1[6] - s2[6];
00341         block[7] = s1[7] - s2[7];
00342         s1 += stride;
00343         s2 += stride;
00344         block += 8;
00345     }
00346 }
00347 
00348 
00349 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00350                              int line_size)
00351 {
00352     int i;
00353     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00354 
00355     /* read the pixels */
00356     for(i=0;i<8;i++) {
00357         pixels[0] = cm[block[0]];
00358         pixels[1] = cm[block[1]];
00359         pixels[2] = cm[block[2]];
00360         pixels[3] = cm[block[3]];
00361         pixels[4] = cm[block[4]];
00362         pixels[5] = cm[block[5]];
00363         pixels[6] = cm[block[6]];
00364         pixels[7] = cm[block[7]];
00365 
00366         pixels += line_size;
00367         block += 8;
00368     }
00369 }
00370 
00371 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00372                                  int line_size)
00373 {
00374     int i;
00375     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00376 
00377     /* read the pixels */
00378     for(i=0;i<4;i++) {
00379         pixels[0] = cm[block[0]];
00380         pixels[1] = cm[block[1]];
00381         pixels[2] = cm[block[2]];
00382         pixels[3] = cm[block[3]];
00383 
00384         pixels += line_size;
00385         block += 8;
00386     }
00387 }
00388 
00389 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00390                                  int line_size)
00391 {
00392     int i;
00393     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00394 
00395     /* read the pixels */
00396     for(i=0;i<2;i++) {
00397         pixels[0] = cm[block[0]];
00398         pixels[1] = cm[block[1]];
00399 
00400         pixels += line_size;
00401         block += 8;
00402     }
00403 }
00404 
00405 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00406                                     uint8_t *restrict pixels,
00407                                     int line_size)
00408 {
00409     int i, j;
00410 
00411     for (i = 0; i < 8; i++) {
00412         for (j = 0; j < 8; j++) {
00413             if (*block < -128)
00414                 *pixels = 0;
00415             else if (*block > 127)
00416                 *pixels = 255;
00417             else
00418                 *pixels = (uint8_t)(*block + 128);
00419             block++;
00420             pixels++;
00421         }
00422         pixels += (line_size - 8);
00423     }
00424 }
00425 
00426 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00427                                     int line_size)
00428 {
00429     int i;
00430 
00431     /* read the pixels */
00432     for(i=0;i<8;i++) {
00433         pixels[0] = block[0];
00434         pixels[1] = block[1];
00435         pixels[2] = block[2];
00436         pixels[3] = block[3];
00437         pixels[4] = block[4];
00438         pixels[5] = block[5];
00439         pixels[6] = block[6];
00440         pixels[7] = block[7];
00441 
00442         pixels += line_size;
00443         block += 8;
00444     }
00445 }
00446 
00447 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00448                              int line_size)
00449 {
00450     int i;
00451     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00452 
00453     /* read the pixels */
00454     for(i=0;i<8;i++) {
00455         pixels[0] = cm[pixels[0] + block[0]];
00456         pixels[1] = cm[pixels[1] + block[1]];
00457         pixels[2] = cm[pixels[2] + block[2]];
00458         pixels[3] = cm[pixels[3] + block[3]];
00459         pixels[4] = cm[pixels[4] + block[4]];
00460         pixels[5] = cm[pixels[5] + block[5]];
00461         pixels[6] = cm[pixels[6] + block[6]];
00462         pixels[7] = cm[pixels[7] + block[7]];
00463         pixels += line_size;
00464         block += 8;
00465     }
00466 }
00467 
00468 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00469                           int line_size)
00470 {
00471     int i;
00472     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00473 
00474     /* read the pixels */
00475     for(i=0;i<4;i++) {
00476         pixels[0] = cm[pixels[0] + block[0]];
00477         pixels[1] = cm[pixels[1] + block[1]];
00478         pixels[2] = cm[pixels[2] + block[2]];
00479         pixels[3] = cm[pixels[3] + block[3]];
00480         pixels += line_size;
00481         block += 8;
00482     }
00483 }
00484 
00485 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00486                           int line_size)
00487 {
00488     int i;
00489     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00490 
00491     /* read the pixels */
00492     for(i=0;i<2;i++) {
00493         pixels[0] = cm[pixels[0] + block[0]];
00494         pixels[1] = cm[pixels[1] + block[1]];
00495         pixels += line_size;
00496         block += 8;
00497     }
00498 }
00499 
00500 static int sum_abs_dctelem_c(DCTELEM *block)
00501 {
00502     int sum=0, i;
00503     for(i=0; i<64; i++)
00504         sum+= FFABS(block[i]);
00505     return sum;
00506 }
00507 
00508 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00509 {
00510     int i;
00511 
00512     for (i = 0; i < h; i++) {
00513         memset(block, value, 16);
00514         block += line_size;
00515     }
00516 }
00517 
00518 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00519 {
00520     int i;
00521 
00522     for (i = 0; i < h; i++) {
00523         memset(block, value, 8);
00524         block += line_size;
00525     }
00526 }
00527 
00528 static void scale_block_c(const uint8_t src[64]/*align 8*/, uint8_t *dst/*align 8*/, int linesize)
00529 {
00530     int i, j;
00531     uint16_t *dst1 = (uint16_t *) dst;
00532     uint16_t *dst2 = (uint16_t *)(dst + linesize);
00533 
00534     for (j = 0; j < 8; j++) {
00535         for (i = 0; i < 8; i++) {
00536             dst1[i] = dst2[i] = src[i] * 0x0101;
00537         }
00538         src  += 8;
00539         dst1 += linesize;
00540         dst2 += linesize;
00541     }
00542 }
00543 
00544 #define avg2(a,b) ((a+b+1)>>1)
00545 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00546 
00547 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00548 {
00549     const int A=(16-x16)*(16-y16);
00550     const int B=(   x16)*(16-y16);
00551     const int C=(16-x16)*(   y16);
00552     const int D=(   x16)*(   y16);
00553     int i;
00554 
00555     for(i=0; i<h; i++)
00556     {
00557         dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00558         dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00559         dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00560         dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00561         dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00562         dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00563         dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00564         dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00565         dst+= stride;
00566         src+= stride;
00567     }
00568 }
00569 
00570 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00571                   int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00572 {
00573     int y, vx, vy;
00574     const int s= 1<<shift;
00575 
00576     width--;
00577     height--;
00578 
00579     for(y=0; y<h; y++){
00580         int x;
00581 
00582         vx= ox;
00583         vy= oy;
00584         for(x=0; x<8; x++){ //XXX FIXME optimize
00585             int src_x, src_y, frac_x, frac_y, index;
00586 
00587             src_x= vx>>16;
00588             src_y= vy>>16;
00589             frac_x= src_x&(s-1);
00590             frac_y= src_y&(s-1);
00591             src_x>>=shift;
00592             src_y>>=shift;
00593 
00594             if((unsigned)src_x < width){
00595                 if((unsigned)src_y < height){
00596                     index= src_x + src_y*stride;
00597                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
00598                                            + src[index       +1]*   frac_x )*(s-frac_y)
00599                                         + (  src[index+stride  ]*(s-frac_x)
00600                                            + src[index+stride+1]*   frac_x )*   frac_y
00601                                         + r)>>(shift*2);
00602                 }else{
00603                     index= src_x + av_clip(src_y, 0, height)*stride;
00604                     dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
00605                                           + src[index       +1]*   frac_x )*s
00606                                         + r)>>(shift*2);
00607                 }
00608             }else{
00609                 if((unsigned)src_y < height){
00610                     index= av_clip(src_x, 0, width) + src_y*stride;
00611                     dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
00612                                            + src[index+stride  ]*   frac_y )*s
00613                                         + r)>>(shift*2);
00614                 }else{
00615                     index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00616                     dst[y*stride + x]=    src[index         ];
00617                 }
00618             }
00619 
00620             vx+= dxx;
00621             vy+= dyx;
00622         }
00623         ox += dxy;
00624         oy += dyy;
00625     }
00626 }
00627 
00628 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00629     switch(width){
00630     case 2: put_pixels2_8_c (dst, src, stride, height); break;
00631     case 4: put_pixels4_8_c (dst, src, stride, height); break;
00632     case 8: put_pixels8_8_c (dst, src, stride, height); break;
00633     case 16:put_pixels16_8_c(dst, src, stride, height); break;
00634     }
00635 }
00636 
00637 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00638     int i,j;
00639     for (i=0; i < height; i++) {
00640       for (j=0; j < width; j++) {
00641         dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00642       }
00643       src += stride;
00644       dst += stride;
00645     }
00646 }
00647 
00648 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00649     int i,j;
00650     for (i=0; i < height; i++) {
00651       for (j=0; j < width; j++) {
00652         dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00653       }
00654       src += stride;
00655       dst += stride;
00656     }
00657 }
00658 
00659 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00660     int i,j;
00661     for (i=0; i < height; i++) {
00662       for (j=0; j < width; j++) {
00663         dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00664       }
00665       src += stride;
00666       dst += stride;
00667     }
00668 }
00669 
00670 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00671     int i,j;
00672     for (i=0; i < height; i++) {
00673       for (j=0; j < width; j++) {
00674         dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00675       }
00676       src += stride;
00677       dst += stride;
00678     }
00679 }
00680 
00681 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00682     int i,j;
00683     for (i=0; i < height; i++) {
00684       for (j=0; j < width; j++) {
00685         dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00686       }
00687       src += stride;
00688       dst += stride;
00689     }
00690 }
00691 
00692 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00693     int i,j;
00694     for (i=0; i < height; i++) {
00695       for (j=0; j < width; j++) {
00696         dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00697       }
00698       src += stride;
00699       dst += stride;
00700     }
00701 }
00702 
00703 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00704     int i,j;
00705     for (i=0; i < height; i++) {
00706       for (j=0; j < width; j++) {
00707         dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00708       }
00709       src += stride;
00710       dst += stride;
00711     }
00712 }
00713 
00714 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00715     int i,j;
00716     for (i=0; i < height; i++) {
00717       for (j=0; j < width; j++) {
00718         dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00719       }
00720       src += stride;
00721       dst += stride;
00722     }
00723 }
00724 
00725 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00726     switch(width){
00727     case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00728     case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00729     case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00730     case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00731     }
00732 }
00733 
00734 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00735     int i,j;
00736     for (i=0; i < height; i++) {
00737       for (j=0; j < width; j++) {
00738         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00739       }
00740       src += stride;
00741       dst += stride;
00742     }
00743 }
00744 
00745 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00746     int i,j;
00747     for (i=0; i < height; i++) {
00748       for (j=0; j < width; j++) {
00749         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00750       }
00751       src += stride;
00752       dst += stride;
00753     }
00754 }
00755 
00756 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00757     int i,j;
00758     for (i=0; i < height; i++) {
00759       for (j=0; j < width; j++) {
00760         dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00761       }
00762       src += stride;
00763       dst += stride;
00764     }
00765 }
00766 
00767 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00768     int i,j;
00769     for (i=0; i < height; i++) {
00770       for (j=0; j < width; j++) {
00771         dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00772       }
00773       src += stride;
00774       dst += stride;
00775     }
00776 }
00777 
00778 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00779     int i,j;
00780     for (i=0; i < height; i++) {
00781       for (j=0; j < width; j++) {
00782         dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00783       }
00784       src += stride;
00785       dst += stride;
00786     }
00787 }
00788 
00789 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00790     int i,j;
00791     for (i=0; i < height; i++) {
00792       for (j=0; j < width; j++) {
00793         dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00794       }
00795       src += stride;
00796       dst += stride;
00797     }
00798 }
00799 
00800 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00801     int i,j;
00802     for (i=0; i < height; i++) {
00803       for (j=0; j < width; j++) {
00804         dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00805       }
00806       src += stride;
00807       dst += stride;
00808     }
00809 }
00810 
00811 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00812     int i,j;
00813     for (i=0; i < height; i++) {
00814       for (j=0; j < width; j++) {
00815         dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00816       }
00817       src += stride;
00818       dst += stride;
00819     }
00820 }
00821 #if 0
00822 #define TPEL_WIDTH(width)\
00823 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00824     void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
00825 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00826     void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
00827 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00828     void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
00829 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00830     void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
00831 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00832     void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
00833 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00834     void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
00835 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00836     void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
00837 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00838     void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
00839 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00840     void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
00841 #endif
00842 
00843 #define QPEL_MC(r, OPNAME, RND, OP) \
00844 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00845     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00846     int i;\
00847     for(i=0; i<h; i++)\
00848     {\
00849         OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00850         OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00851         OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00852         OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00853         OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00854         OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00855         OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00856         OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00857         dst+=dstStride;\
00858         src+=srcStride;\
00859     }\
00860 }\
00861 \
00862 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00863     const int w=8;\
00864     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00865     int i;\
00866     for(i=0; i<w; i++)\
00867     {\
00868         const int src0= src[0*srcStride];\
00869         const int src1= src[1*srcStride];\
00870         const int src2= src[2*srcStride];\
00871         const int src3= src[3*srcStride];\
00872         const int src4= src[4*srcStride];\
00873         const int src5= src[5*srcStride];\
00874         const int src6= src[6*srcStride];\
00875         const int src7= src[7*srcStride];\
00876         const int src8= src[8*srcStride];\
00877         OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00878         OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00879         OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00880         OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00881         OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00882         OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00883         OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00884         OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00885         dst++;\
00886         src++;\
00887     }\
00888 }\
00889 \
00890 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00891     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00892     int i;\
00893     \
00894     for(i=0; i<h; i++)\
00895     {\
00896         OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00897         OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00898         OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00899         OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00900         OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00901         OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00902         OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00903         OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00904         OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00905         OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00906         OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00907         OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00908         OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00909         OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00910         OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00911         OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00912         dst+=dstStride;\
00913         src+=srcStride;\
00914     }\
00915 }\
00916 \
00917 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00918     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00919     int i;\
00920     const int w=16;\
00921     for(i=0; i<w; i++)\
00922     {\
00923         const int src0= src[0*srcStride];\
00924         const int src1= src[1*srcStride];\
00925         const int src2= src[2*srcStride];\
00926         const int src3= src[3*srcStride];\
00927         const int src4= src[4*srcStride];\
00928         const int src5= src[5*srcStride];\
00929         const int src6= src[6*srcStride];\
00930         const int src7= src[7*srcStride];\
00931         const int src8= src[8*srcStride];\
00932         const int src9= src[9*srcStride];\
00933         const int src10= src[10*srcStride];\
00934         const int src11= src[11*srcStride];\
00935         const int src12= src[12*srcStride];\
00936         const int src13= src[13*srcStride];\
00937         const int src14= src[14*srcStride];\
00938         const int src15= src[15*srcStride];\
00939         const int src16= src[16*srcStride];\
00940         OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00941         OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00942         OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00943         OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00944         OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00945         OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00946         OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00947         OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00948         OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00949         OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00950         OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00951         OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00952         OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00953         OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00954         OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00955         OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00956         dst++;\
00957         src++;\
00958     }\
00959 }\
00960 \
00961 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00962     uint8_t half[64];\
00963     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00964     OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00965 }\
00966 \
00967 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00968     OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00969 }\
00970 \
00971 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00972     uint8_t half[64];\
00973     put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00974     OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00975 }\
00976 \
00977 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00978     uint8_t full[16*9];\
00979     uint8_t half[64];\
00980     copy_block9(full, src, 16, stride, 9);\
00981     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00982     OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00983 }\
00984 \
00985 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00986     uint8_t full[16*9];\
00987     copy_block9(full, src, 16, stride, 9);\
00988     OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00989 }\
00990 \
00991 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00992     uint8_t full[16*9];\
00993     uint8_t half[64];\
00994     copy_block9(full, src, 16, stride, 9);\
00995     put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00996     OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00997 }\
00998 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00999     uint8_t full[16*9];\
01000     uint8_t halfH[72];\
01001     uint8_t halfV[64];\
01002     uint8_t halfHV[64];\
01003     copy_block9(full, src, 16, stride, 9);\
01004     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01005     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01006     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01007     OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01008 }\
01009 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01010     uint8_t full[16*9];\
01011     uint8_t halfH[72];\
01012     uint8_t halfHV[64];\
01013     copy_block9(full, src, 16, stride, 9);\
01014     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01015     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01016     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01017     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01018 }\
01019 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01020     uint8_t full[16*9];\
01021     uint8_t halfH[72];\
01022     uint8_t halfV[64];\
01023     uint8_t halfHV[64];\
01024     copy_block9(full, src, 16, stride, 9);\
01025     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01026     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01027     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01028     OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01029 }\
01030 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01031     uint8_t full[16*9];\
01032     uint8_t halfH[72];\
01033     uint8_t halfHV[64];\
01034     copy_block9(full, src, 16, stride, 9);\
01035     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01036     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01037     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01038     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01039 }\
01040 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01041     uint8_t full[16*9];\
01042     uint8_t halfH[72];\
01043     uint8_t halfV[64];\
01044     uint8_t halfHV[64];\
01045     copy_block9(full, src, 16, stride, 9);\
01046     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01047     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01048     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01049     OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01050 }\
01051 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01052     uint8_t full[16*9];\
01053     uint8_t halfH[72];\
01054     uint8_t halfHV[64];\
01055     copy_block9(full, src, 16, stride, 9);\
01056     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01057     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01058     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01059     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01060 }\
01061 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01062     uint8_t full[16*9];\
01063     uint8_t halfH[72];\
01064     uint8_t halfV[64];\
01065     uint8_t halfHV[64];\
01066     copy_block9(full, src, 16, stride, 9);\
01067     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
01068     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01069     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01070     OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01071 }\
01072 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01073     uint8_t full[16*9];\
01074     uint8_t halfH[72];\
01075     uint8_t halfHV[64];\
01076     copy_block9(full, src, 16, stride, 9);\
01077     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01078     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01079     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01080     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01081 }\
01082 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01083     uint8_t halfH[72];\
01084     uint8_t halfHV[64];\
01085     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01086     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01087     OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01088 }\
01089 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01090     uint8_t halfH[72];\
01091     uint8_t halfHV[64];\
01092     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01093     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01094     OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01095 }\
01096 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01097     uint8_t full[16*9];\
01098     uint8_t halfH[72];\
01099     uint8_t halfV[64];\
01100     uint8_t halfHV[64];\
01101     copy_block9(full, src, 16, stride, 9);\
01102     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01103     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01104     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01105     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01106 }\
01107 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01108     uint8_t full[16*9];\
01109     uint8_t halfH[72];\
01110     copy_block9(full, src, 16, stride, 9);\
01111     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01112     put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01113     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01114 }\
01115 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01116     uint8_t full[16*9];\
01117     uint8_t halfH[72];\
01118     uint8_t halfV[64];\
01119     uint8_t halfHV[64];\
01120     copy_block9(full, src, 16, stride, 9);\
01121     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01122     put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01123     put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01124     OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01125 }\
01126 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01127     uint8_t full[16*9];\
01128     uint8_t halfH[72];\
01129     copy_block9(full, src, 16, stride, 9);\
01130     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01131     put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01132     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01133 }\
01134 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01135     uint8_t halfH[72];\
01136     put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01137     OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01138 }\
01139 \
01140 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01141     uint8_t half[256];\
01142     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01143     OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01144 }\
01145 \
01146 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01147     OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01148 }\
01149 \
01150 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01151     uint8_t half[256];\
01152     put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01153     OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01154 }\
01155 \
01156 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01157     uint8_t full[24*17];\
01158     uint8_t half[256];\
01159     copy_block17(full, src, 24, stride, 17);\
01160     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01161     OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01162 }\
01163 \
01164 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01165     uint8_t full[24*17];\
01166     copy_block17(full, src, 24, stride, 17);\
01167     OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01168 }\
01169 \
01170 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01171     uint8_t full[24*17];\
01172     uint8_t half[256];\
01173     copy_block17(full, src, 24, stride, 17);\
01174     put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01175     OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01176 }\
01177 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01178     uint8_t full[24*17];\
01179     uint8_t halfH[272];\
01180     uint8_t halfV[256];\
01181     uint8_t halfHV[256];\
01182     copy_block17(full, src, 24, stride, 17);\
01183     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01184     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01185     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01186     OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01187 }\
01188 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01189     uint8_t full[24*17];\
01190     uint8_t halfH[272];\
01191     uint8_t halfHV[256];\
01192     copy_block17(full, src, 24, stride, 17);\
01193     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01194     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01195     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01196     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01197 }\
01198 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01199     uint8_t full[24*17];\
01200     uint8_t halfH[272];\
01201     uint8_t halfV[256];\
01202     uint8_t halfHV[256];\
01203     copy_block17(full, src, 24, stride, 17);\
01204     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01205     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01206     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01207     OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01208 }\
01209 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01210     uint8_t full[24*17];\
01211     uint8_t halfH[272];\
01212     uint8_t halfHV[256];\
01213     copy_block17(full, src, 24, stride, 17);\
01214     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01215     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01216     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01217     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01218 }\
01219 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01220     uint8_t full[24*17];\
01221     uint8_t halfH[272];\
01222     uint8_t halfV[256];\
01223     uint8_t halfHV[256];\
01224     copy_block17(full, src, 24, stride, 17);\
01225     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01226     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01227     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01228     OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01229 }\
01230 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01231     uint8_t full[24*17];\
01232     uint8_t halfH[272];\
01233     uint8_t halfHV[256];\
01234     copy_block17(full, src, 24, stride, 17);\
01235     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01236     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01237     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01238     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01239 }\
01240 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01241     uint8_t full[24*17];\
01242     uint8_t halfH[272];\
01243     uint8_t halfV[256];\
01244     uint8_t halfHV[256];\
01245     copy_block17(full, src, 24, stride, 17);\
01246     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
01247     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01248     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01249     OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01250 }\
01251 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01252     uint8_t full[24*17];\
01253     uint8_t halfH[272];\
01254     uint8_t halfHV[256];\
01255     copy_block17(full, src, 24, stride, 17);\
01256     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01257     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01258     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01259     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01260 }\
01261 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01262     uint8_t halfH[272];\
01263     uint8_t halfHV[256];\
01264     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01265     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01266     OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01267 }\
01268 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01269     uint8_t halfH[272];\
01270     uint8_t halfHV[256];\
01271     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01272     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01273     OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01274 }\
01275 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01276     uint8_t full[24*17];\
01277     uint8_t halfH[272];\
01278     uint8_t halfV[256];\
01279     uint8_t halfHV[256];\
01280     copy_block17(full, src, 24, stride, 17);\
01281     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01282     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01283     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01284     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01285 }\
01286 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01287     uint8_t full[24*17];\
01288     uint8_t halfH[272];\
01289     copy_block17(full, src, 24, stride, 17);\
01290     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01291     put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01292     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01293 }\
01294 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01295     uint8_t full[24*17];\
01296     uint8_t halfH[272];\
01297     uint8_t halfV[256];\
01298     uint8_t halfHV[256];\
01299     copy_block17(full, src, 24, stride, 17);\
01300     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01301     put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01302     put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01303     OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01304 }\
01305 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01306     uint8_t full[24*17];\
01307     uint8_t halfH[272];\
01308     copy_block17(full, src, 24, stride, 17);\
01309     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01310     put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01311     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01312 }\
01313 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01314     uint8_t halfH[272];\
01315     put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01316     OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01317 }
01318 
01319 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01320 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01321 #define op_put(a, b) a = cm[((b) + 16)>>5]
01322 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01323 
01324 QPEL_MC(0, put_       , _       , op_put)
01325 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01326 QPEL_MC(0, avg_       , _       , op_avg)
01327 //QPEL_MC(1, avg_no_rnd , _       , op_avg)
01328 #undef op_avg
01329 #undef op_avg_no_rnd
01330 #undef op_put
01331 #undef op_put_no_rnd
01332 
01333 #define put_qpel8_mc00_c  ff_put_pixels8x8_c
01334 #define avg_qpel8_mc00_c  ff_avg_pixels8x8_c
01335 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01336 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01337 #define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
01338 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01339 
01340 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01341     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01342     int i;
01343 
01344     for(i=0; i<h; i++){
01345         dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01346         dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01347         dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01348         dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01349         dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01350         dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01351         dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01352         dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01353         dst+=dstStride;
01354         src+=srcStride;
01355     }
01356 }
01357 
01358 #if CONFIG_RV40_DECODER
01359 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01360     put_pixels16_xy2_8_c(dst, src, stride, 16);
01361 }
01362 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01363     avg_pixels16_xy2_8_c(dst, src, stride, 16);
01364 }
01365 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01366     put_pixels8_xy2_8_c(dst, src, stride, 8);
01367 }
01368 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01369     avg_pixels8_xy2_8_c(dst, src, stride, 8);
01370 }
01371 #endif /* CONFIG_RV40_DECODER */
01372 
01373 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01374     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01375     int i;
01376 
01377     for(i=0; i<w; i++){
01378         const int src_1= src[ -srcStride];
01379         const int src0 = src[0          ];
01380         const int src1 = src[  srcStride];
01381         const int src2 = src[2*srcStride];
01382         const int src3 = src[3*srcStride];
01383         const int src4 = src[4*srcStride];
01384         const int src5 = src[5*srcStride];
01385         const int src6 = src[6*srcStride];
01386         const int src7 = src[7*srcStride];
01387         const int src8 = src[8*srcStride];
01388         const int src9 = src[9*srcStride];
01389         dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01390         dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
01391         dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
01392         dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
01393         dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
01394         dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
01395         dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
01396         dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
01397         src++;
01398         dst++;
01399     }
01400 }
01401 
01402 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01403     uint8_t half[64];
01404     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01405     put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01406 }
01407 
01408 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01409     wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01410 }
01411 
01412 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01413     uint8_t half[64];
01414     wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01415     put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01416 }
01417 
01418 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01419     wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01420 }
01421 
01422 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01423     uint8_t halfH[88];
01424     uint8_t halfV[64];
01425     uint8_t halfHV[64];
01426     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01427     wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01428     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01429     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01430 }
01431 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01432     uint8_t halfH[88];
01433     uint8_t halfV[64];
01434     uint8_t halfHV[64];
01435     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01436     wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01437     wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01438     put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01439 }
01440 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01441     uint8_t halfH[88];
01442     wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01443     wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01444 }
01445 
01446 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01447     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01448     int x;
01449     const int strength= ff_h263_loop_filter_strength[qscale];
01450 
01451     for(x=0; x<8; x++){
01452         int d1, d2, ad1;
01453         int p0= src[x-2*stride];
01454         int p1= src[x-1*stride];
01455         int p2= src[x+0*stride];
01456         int p3= src[x+1*stride];
01457         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01458 
01459         if     (d<-2*strength) d1= 0;
01460         else if(d<-  strength) d1=-2*strength - d;
01461         else if(d<   strength) d1= d;
01462         else if(d< 2*strength) d1= 2*strength - d;
01463         else                   d1= 0;
01464 
01465         p1 += d1;
01466         p2 -= d1;
01467         if(p1&256) p1= ~(p1>>31);
01468         if(p2&256) p2= ~(p2>>31);
01469 
01470         src[x-1*stride] = p1;
01471         src[x+0*stride] = p2;
01472 
01473         ad1= FFABS(d1)>>1;
01474 
01475         d2= av_clip((p0-p3)/4, -ad1, ad1);
01476 
01477         src[x-2*stride] = p0 - d2;
01478         src[x+  stride] = p3 + d2;
01479     }
01480     }
01481 }
01482 
01483 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01484     if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01485     int y;
01486     const int strength= ff_h263_loop_filter_strength[qscale];
01487 
01488     for(y=0; y<8; y++){
01489         int d1, d2, ad1;
01490         int p0= src[y*stride-2];
01491         int p1= src[y*stride-1];
01492         int p2= src[y*stride+0];
01493         int p3= src[y*stride+1];
01494         int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01495 
01496         if     (d<-2*strength) d1= 0;
01497         else if(d<-  strength) d1=-2*strength - d;
01498         else if(d<   strength) d1= d;
01499         else if(d< 2*strength) d1= 2*strength - d;
01500         else                   d1= 0;
01501 
01502         p1 += d1;
01503         p2 -= d1;
01504         if(p1&256) p1= ~(p1>>31);
01505         if(p2&256) p2= ~(p2>>31);
01506 
01507         src[y*stride-1] = p1;
01508         src[y*stride+0] = p2;
01509 
01510         ad1= FFABS(d1)>>1;
01511 
01512         d2= av_clip((p0-p3)/4, -ad1, ad1);
01513 
01514         src[y*stride-2] = p0 - d2;
01515         src[y*stride+1] = p3 + d2;
01516     }
01517     }
01518 }
01519 
01520 static void h261_loop_filter_c(uint8_t *src, int stride){
01521     int x,y,xy,yz;
01522     int temp[64];
01523 
01524     for(x=0; x<8; x++){
01525         temp[x      ] = 4*src[x           ];
01526         temp[x + 7*8] = 4*src[x + 7*stride];
01527     }
01528     for(y=1; y<7; y++){
01529         for(x=0; x<8; x++){
01530             xy = y * stride + x;
01531             yz = y * 8 + x;
01532             temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01533         }
01534     }
01535 
01536     for(y=0; y<8; y++){
01537         src[  y*stride] = (temp[  y*8] + 2)>>2;
01538         src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01539         for(x=1; x<7; x++){
01540             xy = y * stride + x;
01541             yz = y * 8 + x;
01542             src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01543         }
01544     }
01545 }
01546 
01547 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01548 {
01549     int s, i;
01550 
01551     s = 0;
01552     for(i=0;i<h;i++) {
01553         s += abs(pix1[0] - pix2[0]);
01554         s += abs(pix1[1] - pix2[1]);
01555         s += abs(pix1[2] - pix2[2]);
01556         s += abs(pix1[3] - pix2[3]);
01557         s += abs(pix1[4] - pix2[4]);
01558         s += abs(pix1[5] - pix2[5]);
01559         s += abs(pix1[6] - pix2[6]);
01560         s += abs(pix1[7] - pix2[7]);
01561         s += abs(pix1[8] - pix2[8]);
01562         s += abs(pix1[9] - pix2[9]);
01563         s += abs(pix1[10] - pix2[10]);
01564         s += abs(pix1[11] - pix2[11]);
01565         s += abs(pix1[12] - pix2[12]);
01566         s += abs(pix1[13] - pix2[13]);
01567         s += abs(pix1[14] - pix2[14]);
01568         s += abs(pix1[15] - pix2[15]);
01569         pix1 += line_size;
01570         pix2 += line_size;
01571     }
01572     return s;
01573 }
01574 
01575 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01576 {
01577     int s, i;
01578 
01579     s = 0;
01580     for(i=0;i<h;i++) {
01581         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01582         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01583         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01584         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01585         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01586         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01587         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01588         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01589         s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01590         s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01591         s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01592         s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01593         s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01594         s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01595         s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01596         s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01597         pix1 += line_size;
01598         pix2 += line_size;
01599     }
01600     return s;
01601 }
01602 
01603 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01604 {
01605     int s, i;
01606     uint8_t *pix3 = pix2 + line_size;
01607 
01608     s = 0;
01609     for(i=0;i<h;i++) {
01610         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01611         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01612         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01613         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01614         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01615         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01616         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01617         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01618         s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01619         s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01620         s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01621         s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01622         s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01623         s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01624         s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01625         s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01626         pix1 += line_size;
01627         pix2 += line_size;
01628         pix3 += line_size;
01629     }
01630     return s;
01631 }
01632 
01633 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01634 {
01635     int s, i;
01636     uint8_t *pix3 = pix2 + line_size;
01637 
01638     s = 0;
01639     for(i=0;i<h;i++) {
01640         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01641         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01642         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01643         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01644         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01645         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01646         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01647         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01648         s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01649         s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01650         s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01651         s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01652         s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01653         s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01654         s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01655         s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01656         pix1 += line_size;
01657         pix2 += line_size;
01658         pix3 += line_size;
01659     }
01660     return s;
01661 }
01662 
01663 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01664 {
01665     int s, i;
01666 
01667     s = 0;
01668     for(i=0;i<h;i++) {
01669         s += abs(pix1[0] - pix2[0]);
01670         s += abs(pix1[1] - pix2[1]);
01671         s += abs(pix1[2] - pix2[2]);
01672         s += abs(pix1[3] - pix2[3]);
01673         s += abs(pix1[4] - pix2[4]);
01674         s += abs(pix1[5] - pix2[5]);
01675         s += abs(pix1[6] - pix2[6]);
01676         s += abs(pix1[7] - pix2[7]);
01677         pix1 += line_size;
01678         pix2 += line_size;
01679     }
01680     return s;
01681 }
01682 
01683 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01684 {
01685     int s, i;
01686 
01687     s = 0;
01688     for(i=0;i<h;i++) {
01689         s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01690         s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01691         s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01692         s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01693         s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01694         s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01695         s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01696         s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01697         pix1 += line_size;
01698         pix2 += line_size;
01699     }
01700     return s;
01701 }
01702 
01703 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01704 {
01705     int s, i;
01706     uint8_t *pix3 = pix2 + line_size;
01707 
01708     s = 0;
01709     for(i=0;i<h;i++) {
01710         s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01711         s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01712         s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01713         s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01714         s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01715         s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01716         s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01717         s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01718         pix1 += line_size;
01719         pix2 += line_size;
01720         pix3 += line_size;
01721     }
01722     return s;
01723 }
01724 
01725 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01726 {
01727     int s, i;
01728     uint8_t *pix3 = pix2 + line_size;
01729 
01730     s = 0;
01731     for(i=0;i<h;i++) {
01732         s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01733         s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01734         s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01735         s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01736         s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01737         s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01738         s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01739         s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01740         pix1 += line_size;
01741         pix2 += line_size;
01742         pix3 += line_size;
01743     }
01744     return s;
01745 }
01746 
01747 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01748     MpegEncContext *c = v;
01749     int score1=0;
01750     int score2=0;
01751     int x,y;
01752 
01753     for(y=0; y<h; y++){
01754         for(x=0; x<16; x++){
01755             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01756         }
01757         if(y+1<h){
01758             for(x=0; x<15; x++){
01759                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01760                              - s1[x+1] + s1[x+1+stride])
01761                         -FFABS(  s2[x  ] - s2[x  +stride]
01762                              - s2[x+1] + s2[x+1+stride]);
01763             }
01764         }
01765         s1+= stride;
01766         s2+= stride;
01767     }
01768 
01769     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01770     else  return score1 + FFABS(score2)*8;
01771 }
01772 
01773 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01774     MpegEncContext *c = v;
01775     int score1=0;
01776     int score2=0;
01777     int x,y;
01778 
01779     for(y=0; y<h; y++){
01780         for(x=0; x<8; x++){
01781             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
01782         }
01783         if(y+1<h){
01784             for(x=0; x<7; x++){
01785                 score2+= FFABS(  s1[x  ] - s1[x  +stride]
01786                              - s1[x+1] + s1[x+1+stride])
01787                         -FFABS(  s2[x  ] - s2[x  +stride]
01788                              - s2[x+1] + s2[x+1+stride]);
01789             }
01790         }
01791         s1+= stride;
01792         s2+= stride;
01793     }
01794 
01795     if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01796     else  return score1 + FFABS(score2)*8;
01797 }
01798 
01799 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01800     int i;
01801     unsigned int sum=0;
01802 
01803     for(i=0; i<8*8; i++){
01804         int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01805         int w= weight[i];
01806         b>>= RECON_SHIFT;
01807         assert(-512<b && b<512);
01808 
01809         sum += (w*b)*(w*b)>>4;
01810     }
01811     return sum>>2;
01812 }
01813 
01814 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01815     int i;
01816 
01817     for(i=0; i<8*8; i++){
01818         rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01819     }
01820 }
01821 
01830 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01831 {
01832     int i;
01833     DCTELEM temp[64];
01834 
01835     if(last<=0) return;
01836     //if(permutation[1]==1) return; //FIXME it is ok but not clean and might fail for some permutations
01837 
01838     for(i=0; i<=last; i++){
01839         const int j= scantable[i];
01840         temp[j]= block[j];
01841         block[j]=0;
01842     }
01843 
01844     for(i=0; i<=last; i++){
01845         const int j= scantable[i];
01846         const int perm_j= permutation[j];
01847         block[perm_j]= temp[j];
01848     }
01849 }
01850 
01851 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01852     return 0;
01853 }
01854 
01855 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01856     int i;
01857 
01858     memset(cmp, 0, sizeof(void*)*6);
01859 
01860     for(i=0; i<6; i++){
01861         switch(type&0xFF){
01862         case FF_CMP_SAD:
01863             cmp[i]= c->sad[i];
01864             break;
01865         case FF_CMP_SATD:
01866             cmp[i]= c->hadamard8_diff[i];
01867             break;
01868         case FF_CMP_SSE:
01869             cmp[i]= c->sse[i];
01870             break;
01871         case FF_CMP_DCT:
01872             cmp[i]= c->dct_sad[i];
01873             break;
01874         case FF_CMP_DCT264:
01875             cmp[i]= c->dct264_sad[i];
01876             break;
01877         case FF_CMP_DCTMAX:
01878             cmp[i]= c->dct_max[i];
01879             break;
01880         case FF_CMP_PSNR:
01881             cmp[i]= c->quant_psnr[i];
01882             break;
01883         case FF_CMP_BIT:
01884             cmp[i]= c->bit[i];
01885             break;
01886         case FF_CMP_RD:
01887             cmp[i]= c->rd[i];
01888             break;
01889         case FF_CMP_VSAD:
01890             cmp[i]= c->vsad[i];
01891             break;
01892         case FF_CMP_VSSE:
01893             cmp[i]= c->vsse[i];
01894             break;
01895         case FF_CMP_ZERO:
01896             cmp[i]= zero_cmp;
01897             break;
01898         case FF_CMP_NSSE:
01899             cmp[i]= c->nsse[i];
01900             break;
01901 #if CONFIG_DWT
01902         case FF_CMP_W53:
01903             cmp[i]= c->w53[i];
01904             break;
01905         case FF_CMP_W97:
01906             cmp[i]= c->w97[i];
01907             break;
01908 #endif
01909         default:
01910             av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01911         }
01912     }
01913 }
01914 
01915 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01916     long i;
01917     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01918         long a = *(long*)(src+i);
01919         long b = *(long*)(dst+i);
01920         *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01921     }
01922     for(; i<w; i++)
01923         dst[i+0] += src[i+0];
01924 }
01925 
01926 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01927     long i;
01928 #if !HAVE_FAST_UNALIGNED
01929     if((long)src2 & (sizeof(long)-1)){
01930         for(i=0; i+7<w; i+=8){
01931             dst[i+0] = src1[i+0]-src2[i+0];
01932             dst[i+1] = src1[i+1]-src2[i+1];
01933             dst[i+2] = src1[i+2]-src2[i+2];
01934             dst[i+3] = src1[i+3]-src2[i+3];
01935             dst[i+4] = src1[i+4]-src2[i+4];
01936             dst[i+5] = src1[i+5]-src2[i+5];
01937             dst[i+6] = src1[i+6]-src2[i+6];
01938             dst[i+7] = src1[i+7]-src2[i+7];
01939         }
01940     }else
01941 #endif
01942     for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01943         long a = *(long*)(src1+i);
01944         long b = *(long*)(src2+i);
01945         *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01946     }
01947     for(; i<w; i++)
01948         dst[i+0] = src1[i+0]-src2[i+0];
01949 }
01950 
01951 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01952     int i;
01953     uint8_t l, lt;
01954 
01955     l= *left;
01956     lt= *left_top;
01957 
01958     for(i=0; i<w; i++){
01959         l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01960         lt= src1[i];
01961         dst[i]= l;
01962     }
01963 
01964     *left= l;
01965     *left_top= lt;
01966 }
01967 
01968 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01969     int i;
01970     uint8_t l, lt;
01971 
01972     l= *left;
01973     lt= *left_top;
01974 
01975     for(i=0; i<w; i++){
01976         const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01977         lt= src1[i];
01978         l= src2[i];
01979         dst[i]= l - pred;
01980     }
01981 
01982     *left= l;
01983     *left_top= lt;
01984 }
01985 
01986 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01987     int i;
01988 
01989     for(i=0; i<w-1; i++){
01990         acc+= src[i];
01991         dst[i]= acc;
01992         i++;
01993         acc+= src[i];
01994         dst[i]= acc;
01995     }
01996 
01997     for(; i<w; i++){
01998         acc+= src[i];
01999         dst[i]= acc;
02000     }
02001 
02002     return acc;
02003 }
02004 
02005 #if HAVE_BIGENDIAN
02006 #define B 3
02007 #define G 2
02008 #define R 1
02009 #define A 0
02010 #else
02011 #define B 0
02012 #define G 1
02013 #define R 2
02014 #define A 3
02015 #endif
02016 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
02017     int i;
02018     int r,g,b,a;
02019     r= *red;
02020     g= *green;
02021     b= *blue;
02022     a= *alpha;
02023 
02024     for(i=0; i<w; i++){
02025         b+= src[4*i+B];
02026         g+= src[4*i+G];
02027         r+= src[4*i+R];
02028         a+= src[4*i+A];
02029 
02030         dst[4*i+B]= b;
02031         dst[4*i+G]= g;
02032         dst[4*i+R]= r;
02033         dst[4*i+A]= a;
02034     }
02035 
02036     *red= r;
02037     *green= g;
02038     *blue= b;
02039     *alpha= a;
02040 }
02041 #undef B
02042 #undef G
02043 #undef R
02044 #undef A
02045 
02046 #define BUTTERFLY2(o1,o2,i1,i2) \
02047 o1= (i1)+(i2);\
02048 o2= (i1)-(i2);
02049 
02050 #define BUTTERFLY1(x,y) \
02051 {\
02052     int a,b;\
02053     a= x;\
02054     b= y;\
02055     x= a+b;\
02056     y= a-b;\
02057 }
02058 
02059 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02060 
02061 static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02062     int i;
02063     int temp[64];
02064     int sum=0;
02065 
02066     assert(h==8);
02067 
02068     for(i=0; i<8; i++){
02069         //FIXME try pointer walks
02070         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02071         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02072         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02073         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02074 
02075         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02076         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02077         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02078         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02079 
02080         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02081         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02082         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02083         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02084     }
02085 
02086     for(i=0; i<8; i++){
02087         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02088         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02089         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02090         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02091 
02092         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02093         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02094         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02095         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02096 
02097         sum +=
02098              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02099             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02100             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02101             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02102     }
02103     return sum;
02104 }
02105 
02106 static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02107     int i;
02108     int temp[64];
02109     int sum=0;
02110 
02111     assert(h==8);
02112 
02113     for(i=0; i<8; i++){
02114         //FIXME try pointer walks
02115         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02116         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02117         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02118         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02119 
02120         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02121         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02122         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02123         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02124 
02125         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02126         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02127         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02128         BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02129     }
02130 
02131     for(i=0; i<8; i++){
02132         BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02133         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02134         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02135         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02136 
02137         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02138         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02139         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02140         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02141 
02142         sum +=
02143              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02144             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02145             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02146             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02147     }
02148 
02149     sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
02150 
02151     return sum;
02152 }
02153 
02154 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02155     MpegEncContext * const s= (MpegEncContext *)c;
02156     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02157 
02158     assert(h==8);
02159 
02160     s->dsp.diff_pixels(temp, src1, src2, stride);
02161     s->dsp.fdct(temp);
02162     return s->dsp.sum_abs_dctelem(temp);
02163 }
02164 
02165 #if CONFIG_GPL
02166 #define DCT8_1D {\
02167     const int s07 = SRC(0) + SRC(7);\
02168     const int s16 = SRC(1) + SRC(6);\
02169     const int s25 = SRC(2) + SRC(5);\
02170     const int s34 = SRC(3) + SRC(4);\
02171     const int a0 = s07 + s34;\
02172     const int a1 = s16 + s25;\
02173     const int a2 = s07 - s34;\
02174     const int a3 = s16 - s25;\
02175     const int d07 = SRC(0) - SRC(7);\
02176     const int d16 = SRC(1) - SRC(6);\
02177     const int d25 = SRC(2) - SRC(5);\
02178     const int d34 = SRC(3) - SRC(4);\
02179     const int a4 = d16 + d25 + (d07 + (d07>>1));\
02180     const int a5 = d07 - d34 - (d25 + (d25>>1));\
02181     const int a6 = d07 + d34 - (d16 + (d16>>1));\
02182     const int a7 = d16 - d25 + (d34 + (d34>>1));\
02183     DST(0,  a0 + a1     ) ;\
02184     DST(1,  a4 + (a7>>2)) ;\
02185     DST(2,  a2 + (a3>>1)) ;\
02186     DST(3,  a5 + (a6>>2)) ;\
02187     DST(4,  a0 - a1     ) ;\
02188     DST(5,  a6 - (a5>>2)) ;\
02189     DST(6, (a2>>1) - a3 ) ;\
02190     DST(7, (a4>>2) - a7 ) ;\
02191 }
02192 
02193 static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02194     MpegEncContext * const s= (MpegEncContext *)c;
02195     DCTELEM dct[8][8];
02196     int i;
02197     int sum=0;
02198 
02199     s->dsp.diff_pixels(dct[0], src1, src2, stride);
02200 
02201 #define SRC(x) dct[i][x]
02202 #define DST(x,v) dct[i][x]= v
02203     for( i = 0; i < 8; i++ )
02204         DCT8_1D
02205 #undef SRC
02206 #undef DST
02207 
02208 #define SRC(x) dct[x][i]
02209 #define DST(x,v) sum += FFABS(v)
02210     for( i = 0; i < 8; i++ )
02211         DCT8_1D
02212 #undef SRC
02213 #undef DST
02214     return sum;
02215 }
02216 #endif
02217 
02218 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02219     MpegEncContext * const s= (MpegEncContext *)c;
02220     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02221     int sum=0, i;
02222 
02223     assert(h==8);
02224 
02225     s->dsp.diff_pixels(temp, src1, src2, stride);
02226     s->dsp.fdct(temp);
02227 
02228     for(i=0; i<64; i++)
02229         sum= FFMAX(sum, FFABS(temp[i]));
02230 
02231     return sum;
02232 }
02233 
02234 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02235     MpegEncContext * const s= (MpegEncContext *)c;
02236     LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02237     DCTELEM * const bak = temp+64;
02238     int sum=0, i;
02239 
02240     assert(h==8);
02241     s->mb_intra=0;
02242 
02243     s->dsp.diff_pixels(temp, src1, src2, stride);
02244 
02245     memcpy(bak, temp, 64*sizeof(DCTELEM));
02246 
02247     s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02248     s->dct_unquantize_inter(s, temp, 0, s->qscale);
02249     ff_simple_idct(temp); //FIXME
02250 
02251     for(i=0; i<64; i++)
02252         sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02253 
02254     return sum;
02255 }
02256 
02257 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02258     MpegEncContext * const s= (MpegEncContext *)c;
02259     const uint8_t *scantable= s->intra_scantable.permutated;
02260     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02261     LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02262     LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02263     int i, last, run, bits, level, distortion, start_i;
02264     const int esc_length= s->ac_esc_length;
02265     uint8_t * length;
02266     uint8_t * last_length;
02267 
02268     assert(h==8);
02269 
02270     copy_block8(lsrc1, src1, 8, stride, 8);
02271     copy_block8(lsrc2, src2, 8, stride, 8);
02272 
02273     s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02274 
02275     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02276 
02277     bits=0;
02278 
02279     if (s->mb_intra) {
02280         start_i = 1;
02281         length     = s->intra_ac_vlc_length;
02282         last_length= s->intra_ac_vlc_last_length;
02283         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02284     } else {
02285         start_i = 0;
02286         length     = s->inter_ac_vlc_length;
02287         last_length= s->inter_ac_vlc_last_length;
02288     }
02289 
02290     if(last>=start_i){
02291         run=0;
02292         for(i=start_i; i<last; i++){
02293             int j= scantable[i];
02294             level= temp[j];
02295 
02296             if(level){
02297                 level+=64;
02298                 if((level&(~127)) == 0){
02299                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02300                 }else
02301                     bits+= esc_length;
02302                 run=0;
02303             }else
02304                 run++;
02305         }
02306         i= scantable[last];
02307 
02308         level= temp[i] + 64;
02309 
02310         assert(level - 64);
02311 
02312         if((level&(~127)) == 0){
02313             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02314         }else
02315             bits+= esc_length;
02316 
02317     }
02318 
02319     if(last>=0){
02320         if(s->mb_intra)
02321             s->dct_unquantize_intra(s, temp, 0, s->qscale);
02322         else
02323             s->dct_unquantize_inter(s, temp, 0, s->qscale);
02324     }
02325 
02326     s->dsp.idct_add(lsrc2, 8, temp);
02327 
02328     distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02329 
02330     return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02331 }
02332 
02333 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02334     MpegEncContext * const s= (MpegEncContext *)c;
02335     const uint8_t *scantable= s->intra_scantable.permutated;
02336     LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02337     int i, last, run, bits, level, start_i;
02338     const int esc_length= s->ac_esc_length;
02339     uint8_t * length;
02340     uint8_t * last_length;
02341 
02342     assert(h==8);
02343 
02344     s->dsp.diff_pixels(temp, src1, src2, stride);
02345 
02346     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
02347 
02348     bits=0;
02349 
02350     if (s->mb_intra) {
02351         start_i = 1;
02352         length     = s->intra_ac_vlc_length;
02353         last_length= s->intra_ac_vlc_last_length;
02354         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
02355     } else {
02356         start_i = 0;
02357         length     = s->inter_ac_vlc_length;
02358         last_length= s->inter_ac_vlc_last_length;
02359     }
02360 
02361     if(last>=start_i){
02362         run=0;
02363         for(i=start_i; i<last; i++){
02364             int j= scantable[i];
02365             level= temp[j];
02366 
02367             if(level){
02368                 level+=64;
02369                 if((level&(~127)) == 0){
02370                     bits+= length[UNI_AC_ENC_INDEX(run, level)];
02371                 }else
02372                     bits+= esc_length;
02373                 run=0;
02374             }else
02375                 run++;
02376         }
02377         i= scantable[last];
02378 
02379         level= temp[i] + 64;
02380 
02381         assert(level - 64);
02382 
02383         if((level&(~127)) == 0){
02384             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02385         }else
02386             bits+= esc_length;
02387     }
02388 
02389     return bits;
02390 }
02391 
02392 #define VSAD_INTRA(size) \
02393 static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02394     int score=0;                                                                                            \
02395     int x,y;                                                                                                \
02396                                                                                                             \
02397     for(y=1; y<h; y++){                                                                                     \
02398         for(x=0; x<size; x+=4){                                                                             \
02399             score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
02400                    +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
02401         }                                                                                                   \
02402         s+= stride;                                                                                         \
02403     }                                                                                                       \
02404                                                                                                             \
02405     return score;                                                                                           \
02406 }
02407 VSAD_INTRA(8)
02408 VSAD_INTRA(16)
02409 
02410 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02411     int score=0;
02412     int x,y;
02413 
02414     for(y=1; y<h; y++){
02415         for(x=0; x<16; x++){
02416             score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02417         }
02418         s1+= stride;
02419         s2+= stride;
02420     }
02421 
02422     return score;
02423 }
02424 
02425 #define SQ(a) ((a)*(a))
02426 #define VSSE_INTRA(size) \
02427 static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02428     int score=0;                                                                                            \
02429     int x,y;                                                                                                \
02430                                                                                                             \
02431     for(y=1; y<h; y++){                                                                                     \
02432         for(x=0; x<size; x+=4){                                                                               \
02433             score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
02434                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
02435         }                                                                                                   \
02436         s+= stride;                                                                                         \
02437     }                                                                                                       \
02438                                                                                                             \
02439     return score;                                                                                           \
02440 }
02441 VSSE_INTRA(8)
02442 VSSE_INTRA(16)
02443 
02444 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02445     int score=0;
02446     int x,y;
02447 
02448     for(y=1; y<h; y++){
02449         for(x=0; x<16; x++){
02450             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
02451         }
02452         s1+= stride;
02453         s2+= stride;
02454     }
02455 
02456     return score;
02457 }
02458 
02459 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02460                                int size){
02461     int score=0;
02462     int i;
02463     for(i=0; i<size; i++)
02464         score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02465     return score;
02466 }
02467 
02468 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02469 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02470 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02471 #if CONFIG_GPL
02472 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02473 #endif
02474 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02475 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02476 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02477 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02478 
02479 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02480     int i;
02481     for(i=0; i<len; i++)
02482         dst[i] = src0[i] * src1[i];
02483 }
02484 
02485 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02486     int i;
02487     src1 += len-1;
02488     for(i=0; i<len; i++)
02489         dst[i] = src0[i] * src1[-i];
02490 }
02491 
02492 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02493     int i;
02494     for(i=0; i<len; i++)
02495         dst[i] = src0[i] * src1[i] + src2[i];
02496 }
02497 
02498 static void vector_fmul_window_c(float *dst, const float *src0,
02499                                  const float *src1, const float *win, int len)
02500 {
02501     int i,j;
02502     dst += len;
02503     win += len;
02504     src0+= len;
02505     for(i=-len, j=len-1; i<0; i++, j--) {
02506         float s0 = src0[i];
02507         float s1 = src1[j];
02508         float wi = win[i];
02509         float wj = win[j];
02510         dst[i] = s0*wj - s1*wi;
02511         dst[j] = s0*wi + s1*wj;
02512     }
02513 }
02514 
02515 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02516                                  int len)
02517 {
02518     int i;
02519     for (i = 0; i < len; i++)
02520         dst[i] = src[i] * mul;
02521 }
02522 
02523 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
02524                                       const float **sv, float mul, int len)
02525 {
02526     int i;
02527     for (i = 0; i < len; i += 2, sv++) {
02528         dst[i  ] = src[i  ] * sv[0][0] * mul;
02529         dst[i+1] = src[i+1] * sv[0][1] * mul;
02530     }
02531 }
02532 
02533 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
02534                                       const float **sv, float mul, int len)
02535 {
02536     int i;
02537     for (i = 0; i < len; i += 4, sv++) {
02538         dst[i  ] = src[i  ] * sv[0][0] * mul;
02539         dst[i+1] = src[i+1] * sv[0][1] * mul;
02540         dst[i+2] = src[i+2] * sv[0][2] * mul;
02541         dst[i+3] = src[i+3] * sv[0][3] * mul;
02542     }
02543 }
02544 
02545 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
02546                                int len)
02547 {
02548     int i;
02549     for (i = 0; i < len; i += 2, sv++) {
02550         dst[i  ] = sv[0][0] * mul;
02551         dst[i+1] = sv[0][1] * mul;
02552     }
02553 }
02554 
02555 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
02556                                int len)
02557 {
02558     int i;
02559     for (i = 0; i < len; i += 4, sv++) {
02560         dst[i  ] = sv[0][0] * mul;
02561         dst[i+1] = sv[0][1] * mul;
02562         dst[i+2] = sv[0][2] * mul;
02563         dst[i+3] = sv[0][3] * mul;
02564     }
02565 }
02566 
02567 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02568                                 int len)
02569 {
02570     int i;
02571     for (i = 0; i < len; i++) {
02572         float t = v1[i] - v2[i];
02573         v1[i] += v2[i];
02574         v2[i] = t;
02575     }
02576 }
02577 
02578 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02579 {
02580     float p = 0.0;
02581     int i;
02582 
02583     for (i = 0; i < len; i++)
02584         p += v1[i] * v2[i];
02585 
02586     return p;
02587 }
02588 
02589 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02590                    uint32_t maxi, uint32_t maxisign)
02591 {
02592 
02593     if(a > mini) return mini;
02594     else if((a^(1U<<31)) > maxisign) return maxi;
02595     else return a;
02596 }
02597 
02598 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02599     int i;
02600     uint32_t mini = *(uint32_t*)min;
02601     uint32_t maxi = *(uint32_t*)max;
02602     uint32_t maxisign = maxi ^ (1U<<31);
02603     uint32_t *dsti = (uint32_t*)dst;
02604     const uint32_t *srci = (const uint32_t*)src;
02605     for(i=0; i<len; i+=8) {
02606         dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02607         dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02608         dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02609         dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02610         dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02611         dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02612         dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02613         dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02614     }
02615 }
02616 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02617     int i;
02618     if(min < 0 && max > 0) {
02619         vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02620     } else {
02621         for(i=0; i < len; i+=8) {
02622             dst[i    ] = av_clipf(src[i    ], min, max);
02623             dst[i + 1] = av_clipf(src[i + 1], min, max);
02624             dst[i + 2] = av_clipf(src[i + 2], min, max);
02625             dst[i + 3] = av_clipf(src[i + 3], min, max);
02626             dst[i + 4] = av_clipf(src[i + 4], min, max);
02627             dst[i + 5] = av_clipf(src[i + 5], min, max);
02628             dst[i + 6] = av_clipf(src[i + 6], min, max);
02629             dst[i + 7] = av_clipf(src[i + 7], min, max);
02630         }
02631     }
02632 }
02633 
02634 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
02635 {
02636     int res = 0;
02637 
02638     while (order--)
02639         res += (*v1++ * *v2++) >> shift;
02640 
02641     return res;
02642 }
02643 
02644 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02645 {
02646     int res = 0;
02647     while (order--) {
02648         res   += *v1 * *v2++;
02649         *v1++ += mul * *v3++;
02650     }
02651     return res;
02652 }
02653 
02654 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02655                                  const int16_t *window, unsigned int len)
02656 {
02657     int i;
02658     int len2 = len >> 1;
02659 
02660     for (i = 0; i < len2; i++) {
02661         int16_t w       = window[i];
02662         output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15;
02663         output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02664     }
02665 }
02666 
02667 #define W0 2048
02668 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
02669 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
02670 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
02671 #define W4 2048 /* 2048*sqrt (2)*cos (4*pi/16) */
02672 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
02673 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
02674 #define W7 565  /* 2048*sqrt (2)*cos (7*pi/16) */
02675 
02676 static void wmv2_idct_row(short * b)
02677 {
02678     int s1,s2;
02679     int a0,a1,a2,a3,a4,a5,a6,a7;
02680     /*step 1*/
02681     a1 = W1*b[1]+W7*b[7];
02682     a7 = W7*b[1]-W1*b[7];
02683     a5 = W5*b[5]+W3*b[3];
02684     a3 = W3*b[5]-W5*b[3];
02685     a2 = W2*b[2]+W6*b[6];
02686     a6 = W6*b[2]-W2*b[6];
02687     a0 = W0*b[0]+W0*b[4];
02688     a4 = W0*b[0]-W0*b[4];
02689     /*step 2*/
02690     s1 = (181*(a1-a5+a7-a3)+128)>>8;//1,3,5,7,
02691     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02692     /*step 3*/
02693     b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02694     b[1] = (a4+a6 +s1   + (1<<7))>>8;
02695     b[2] = (a4-a6 +s2   + (1<<7))>>8;
02696     b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02697     b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02698     b[5] = (a4-a6 -s2   + (1<<7))>>8;
02699     b[6] = (a4+a6 -s1   + (1<<7))>>8;
02700     b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02701 }
02702 static void wmv2_idct_col(short * b)
02703 {
02704     int s1,s2;
02705     int a0,a1,a2,a3,a4,a5,a6,a7;
02706     /*step 1, with extended precision*/
02707     a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02708     a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02709     a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02710     a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02711     a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02712     a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02713     a0 = (W0*b[8*0]+W0*b[8*4]    )>>3;
02714     a4 = (W0*b[8*0]-W0*b[8*4]    )>>3;
02715     /*step 2*/
02716     s1 = (181*(a1-a5+a7-a3)+128)>>8;
02717     s2 = (181*(a1-a5-a7+a3)+128)>>8;
02718     /*step 3*/
02719     b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02720     b[8*1] = (a4+a6 +s1   + (1<<13))>>14;
02721     b[8*2] = (a4-a6 +s2   + (1<<13))>>14;
02722     b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02723 
02724     b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02725     b[8*5] = (a4-a6 -s2   + (1<<13))>>14;
02726     b[8*6] = (a4+a6 -s1   + (1<<13))>>14;
02727     b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02728 }
02729 void ff_wmv2_idct_c(short * block){
02730     int i;
02731 
02732     for(i=0;i<64;i+=8){
02733         wmv2_idct_row(block+i);
02734     }
02735     for(i=0;i<8;i++){
02736         wmv2_idct_col(block+i);
02737     }
02738 }
02739 /* XXX: those functions should be suppressed ASAP when all IDCTs are
02740  converted */
02741 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02742 {
02743     ff_wmv2_idct_c(block);
02744     ff_put_pixels_clamped_c(block, dest, line_size);
02745 }
02746 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02747 {
02748     ff_wmv2_idct_c(block);
02749     ff_add_pixels_clamped_c(block, dest, line_size);
02750 }
02751 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02752 {
02753     j_rev_dct (block);
02754     ff_put_pixels_clamped_c(block, dest, line_size);
02755 }
02756 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02757 {
02758     j_rev_dct (block);
02759     ff_add_pixels_clamped_c(block, dest, line_size);
02760 }
02761 
02762 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02763 {
02764     j_rev_dct4 (block);
02765     put_pixels_clamped4_c(block, dest, line_size);
02766 }
02767 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02768 {
02769     j_rev_dct4 (block);
02770     add_pixels_clamped4_c(block, dest, line_size);
02771 }
02772 
02773 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02774 {
02775     j_rev_dct2 (block);
02776     put_pixels_clamped2_c(block, dest, line_size);
02777 }
02778 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02779 {
02780     j_rev_dct2 (block);
02781     add_pixels_clamped2_c(block, dest, line_size);
02782 }
02783 
02784 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02785 {
02786     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02787 
02788     dest[0] = cm[(block[0] + 4)>>3];
02789 }
02790 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02791 {
02792     uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02793 
02794     dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
02795 }
02796 
02797 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02798 
02799 /* init static data */
02800 av_cold void dsputil_static_init(void)
02801 {
02802     int i;
02803 
02804     for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02805     for(i=0;i<MAX_NEG_CROP;i++) {
02806         ff_cropTbl[i] = 0;
02807         ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02808     }
02809 
02810     for(i=0;i<512;i++) {
02811         ff_squareTbl[i] = (i - 256) * (i - 256);
02812     }
02813 
02814     for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02815 }
02816 
02817 int ff_check_alignment(void){
02818     static int did_fail=0;
02819     DECLARE_ALIGNED(16, int, aligned);
02820 
02821     if((intptr_t)&aligned & 15){
02822         if(!did_fail){
02823 #if HAVE_MMX || HAVE_ALTIVEC
02824             av_log(NULL, AV_LOG_ERROR,
02825                 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02826                 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02827                 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02828                 "Do not report crashes to FFmpeg developers.\n");
02829 #endif
02830             did_fail=1;
02831         }
02832         return -1;
02833     }
02834     return 0;
02835 }
02836 
02837 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
02838 {
02839     int i;
02840 
02841     ff_check_alignment();
02842 
02843 #if CONFIG_ENCODERS
02844     if(avctx->dct_algo==FF_DCT_FASTINT) {
02845         c->fdct = fdct_ifast;
02846         c->fdct248 = fdct_ifast248;
02847     }
02848     else if(avctx->dct_algo==FF_DCT_FAAN) {
02849         c->fdct = ff_faandct;
02850         c->fdct248 = ff_faandct248;
02851     }
02852     else {
02853         c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
02854         c->fdct248 = ff_fdct248_islow;
02855     }
02856 #endif //CONFIG_ENCODERS
02857 
02858     if(avctx->lowres==1){
02859         if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
02860             c->idct_put= ff_jref_idct4_put;
02861             c->idct_add= ff_jref_idct4_add;
02862         }else{
02863             if (avctx->codec_id != CODEC_ID_H264) {
02864                 c->idct_put= ff_h264_lowres_idct_put_8_c;
02865                 c->idct_add= ff_h264_lowres_idct_add_8_c;
02866             } else {
02867                 switch (avctx->bits_per_raw_sample) {
02868                     case 9:
02869                         c->idct_put= ff_h264_lowres_idct_put_9_c;
02870                         c->idct_add= ff_h264_lowres_idct_add_9_c;
02871                         break;
02872                     case 10:
02873                         c->idct_put= ff_h264_lowres_idct_put_10_c;
02874                         c->idct_add= ff_h264_lowres_idct_add_10_c;
02875                         break;
02876                     default:
02877                         c->idct_put= ff_h264_lowres_idct_put_8_c;
02878                         c->idct_add= ff_h264_lowres_idct_add_8_c;
02879                 }
02880             }
02881         }
02882         c->idct    = j_rev_dct4;
02883         c->idct_permutation_type= FF_NO_IDCT_PERM;
02884     }else if(avctx->lowres==2){
02885         c->idct_put= ff_jref_idct2_put;
02886         c->idct_add= ff_jref_idct2_add;
02887         c->idct    = j_rev_dct2;
02888         c->idct_permutation_type= FF_NO_IDCT_PERM;
02889     }else if(avctx->lowres==3){
02890         c->idct_put= ff_jref_idct1_put;
02891         c->idct_add= ff_jref_idct1_add;
02892         c->idct    = j_rev_dct1;
02893         c->idct_permutation_type= FF_NO_IDCT_PERM;
02894     }else{
02895         if(avctx->idct_algo==FF_IDCT_INT){
02896             c->idct_put= ff_jref_idct_put;
02897             c->idct_add= ff_jref_idct_add;
02898             c->idct    = j_rev_dct;
02899             c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02900         }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02901                 avctx->idct_algo==FF_IDCT_VP3){
02902             c->idct_put= ff_vp3_idct_put_c;
02903             c->idct_add= ff_vp3_idct_add_c;
02904             c->idct    = ff_vp3_idct_c;
02905             c->idct_permutation_type= FF_NO_IDCT_PERM;
02906         }else if(avctx->idct_algo==FF_IDCT_WMV2){
02907             c->idct_put= ff_wmv2_idct_put_c;
02908             c->idct_add= ff_wmv2_idct_add_c;
02909             c->idct    = ff_wmv2_idct_c;
02910             c->idct_permutation_type= FF_NO_IDCT_PERM;
02911         }else if(avctx->idct_algo==FF_IDCT_FAAN){
02912             c->idct_put= ff_faanidct_put;
02913             c->idct_add= ff_faanidct_add;
02914             c->idct    = ff_faanidct;
02915             c->idct_permutation_type= FF_NO_IDCT_PERM;
02916         }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02917             c->idct_put= ff_ea_idct_put_c;
02918             c->idct_permutation_type= FF_NO_IDCT_PERM;
02919         }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
02920             c->idct     = ff_bink_idct_c;
02921             c->idct_add = ff_bink_idct_add_c;
02922             c->idct_put = ff_bink_idct_put_c;
02923             c->idct_permutation_type = FF_NO_IDCT_PERM;
02924         }else{ //accurate/default
02925             c->idct_put= ff_simple_idct_put;
02926             c->idct_add= ff_simple_idct_add;
02927             c->idct    = ff_simple_idct;
02928             c->idct_permutation_type= FF_NO_IDCT_PERM;
02929         }
02930     }
02931 
02932     c->get_pixels = get_pixels_c;
02933     c->diff_pixels = diff_pixels_c;
02934     c->put_pixels_clamped = ff_put_pixels_clamped_c;
02935     c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02936     c->put_pixels_nonclamped = put_pixels_nonclamped_c;
02937     c->add_pixels_clamped = ff_add_pixels_clamped_c;
02938     c->sum_abs_dctelem = sum_abs_dctelem_c;
02939     c->gmc1 = gmc1_c;
02940     c->gmc = ff_gmc_c;
02941     c->pix_sum = pix_sum_c;
02942     c->pix_norm1 = pix_norm1_c;
02943 
02944     c->fill_block_tab[0] = fill_block16_c;
02945     c->fill_block_tab[1] = fill_block8_c;
02946     c->scale_block = scale_block_c;
02947 
02948     /* TODO [0] 16  [1] 8 */
02949     c->pix_abs[0][0] = pix_abs16_c;
02950     c->pix_abs[0][1] = pix_abs16_x2_c;
02951     c->pix_abs[0][2] = pix_abs16_y2_c;
02952     c->pix_abs[0][3] = pix_abs16_xy2_c;
02953     c->pix_abs[1][0] = pix_abs8_c;
02954     c->pix_abs[1][1] = pix_abs8_x2_c;
02955     c->pix_abs[1][2] = pix_abs8_y2_c;
02956     c->pix_abs[1][3] = pix_abs8_xy2_c;
02957 
02958     c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02959     c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02960     c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02961     c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02962     c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02963     c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02964     c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02965     c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02966     c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02967 
02968     c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02969     c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02970     c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02971     c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02972     c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02973     c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02974     c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02975     c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02976     c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02977 
02978 #define dspfunc(PFX, IDX, NUM) \
02979     c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02980     c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02981     c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02982     c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02983     c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02984     c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02985     c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02986     c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02987     c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02988     c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02989     c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02990     c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02991     c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02992     c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02993     c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02994     c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02995 
02996     dspfunc(put_qpel, 0, 16);
02997     dspfunc(put_no_rnd_qpel, 0, 16);
02998 
02999     dspfunc(avg_qpel, 0, 16);
03000     /* dspfunc(avg_no_rnd_qpel, 0, 16); */
03001 
03002     dspfunc(put_qpel, 1, 8);
03003     dspfunc(put_no_rnd_qpel, 1, 8);
03004 
03005     dspfunc(avg_qpel, 1, 8);
03006     /* dspfunc(avg_no_rnd_qpel, 1, 8); */
03007 
03008 #undef dspfunc
03009 
03010 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
03011     ff_mlp_init(c, avctx);
03012 #endif
03013 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
03014     ff_intrax8dsp_init(c,avctx);
03015 #endif
03016 #if CONFIG_RV30_DECODER
03017     ff_rv30dsp_init(c,avctx);
03018 #endif
03019 #if CONFIG_RV40_DECODER
03020     ff_rv40dsp_init(c,avctx);
03021     c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
03022     c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
03023     c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
03024     c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
03025 #endif
03026 
03027     c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
03028     c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
03029     c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
03030     c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
03031     c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
03032     c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
03033     c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
03034     c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
03035 
03036 #define SET_CMP_FUNC(name) \
03037     c->name[0]= name ## 16_c;\
03038     c->name[1]= name ## 8x8_c;
03039 
03040     SET_CMP_FUNC(hadamard8_diff)
03041     c->hadamard8_diff[4]= hadamard8_intra16_c;
03042     c->hadamard8_diff[5]= hadamard8_intra8x8_c;
03043     SET_CMP_FUNC(dct_sad)
03044     SET_CMP_FUNC(dct_max)
03045 #if CONFIG_GPL
03046     SET_CMP_FUNC(dct264_sad)
03047 #endif
03048     c->sad[0]= pix_abs16_c;
03049     c->sad[1]= pix_abs8_c;
03050     c->sse[0]= sse16_c;
03051     c->sse[1]= sse8_c;
03052     c->sse[2]= sse4_c;
03053     SET_CMP_FUNC(quant_psnr)
03054     SET_CMP_FUNC(rd)
03055     SET_CMP_FUNC(bit)
03056     c->vsad[0]= vsad16_c;
03057     c->vsad[4]= vsad_intra16_c;
03058     c->vsad[5]= vsad_intra8_c;
03059     c->vsse[0]= vsse16_c;
03060     c->vsse[4]= vsse_intra16_c;
03061     c->vsse[5]= vsse_intra8_c;
03062     c->nsse[0]= nsse16_c;
03063     c->nsse[1]= nsse8_c;
03064 #if CONFIG_DWT
03065     ff_dsputil_init_dwt(c);
03066 #endif
03067 
03068     c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03069 
03070     c->add_bytes= add_bytes_c;
03071     c->diff_bytes= diff_bytes_c;
03072     c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03073     c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03074     c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
03075     c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03076     c->bswap_buf= bswap_buf;
03077     c->bswap16_buf = bswap16_buf;
03078 
03079     if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03080         c->h263_h_loop_filter= h263_h_loop_filter_c;
03081         c->h263_v_loop_filter= h263_v_loop_filter_c;
03082     }
03083 
03084     if (CONFIG_VP3_DECODER) {
03085         c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03086         c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03087         c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03088     }
03089 
03090     c->h261_loop_filter= h261_loop_filter_c;
03091 
03092     c->try_8x8basis= try_8x8basis_c;
03093     c->add_8x8basis= add_8x8basis_c;
03094 
03095 #if CONFIG_VORBIS_DECODER
03096     c->vorbis_inverse_coupling = vorbis_inverse_coupling;
03097 #endif
03098 #if CONFIG_AC3_DECODER
03099     c->ac3_downmix = ff_ac3_downmix_c;
03100 #endif
03101     c->vector_fmul = vector_fmul_c;
03102     c->vector_fmul_reverse = vector_fmul_reverse_c;
03103     c->vector_fmul_add = vector_fmul_add_c;
03104     c->vector_fmul_window = vector_fmul_window_c;
03105     c->vector_clipf = vector_clipf_c;
03106     c->scalarproduct_int16 = scalarproduct_int16_c;
03107     c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03108     c->apply_window_int16 = apply_window_int16_c;
03109     c->scalarproduct_float = scalarproduct_float_c;
03110     c->butterflies_float = butterflies_float_c;
03111     c->vector_fmul_scalar = vector_fmul_scalar_c;
03112 
03113     c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
03114     c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
03115 
03116     c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
03117     c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
03118 
03119     c->shrink[0]= av_image_copy_plane;
03120     c->shrink[1]= ff_shrink22;
03121     c->shrink[2]= ff_shrink44;
03122     c->shrink[3]= ff_shrink88;
03123 
03124     c->prefetch= just_return;
03125 
03126     memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03127     memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03128 
03129 #undef FUNC
03130 #undef FUNCC
03131 #define FUNC(f, depth) f ## _ ## depth
03132 #define FUNCC(f, depth) f ## _ ## depth ## _c
03133 
03134 #define dspfunc1(PFX, IDX, NUM, depth)\
03135     c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM        , depth);\
03136     c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03137     c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03138     c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03139 
03140 #define dspfunc2(PFX, IDX, NUM, depth)\
03141     c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03142     c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03143     c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03144     c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03145     c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03146     c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03147     c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03148     c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03149     c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03150     c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03151     c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03152     c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03153     c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03154     c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03155     c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03156     c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03157 
03158 
03159 #define BIT_DEPTH_FUNCS(depth)\
03160     c->draw_edges                    = FUNCC(draw_edges            , depth);\
03161     c->emulated_edge_mc              = FUNC (ff_emulated_edge_mc   , depth);\
03162     c->clear_block                   = FUNCC(clear_block           , depth);\
03163     c->clear_blocks                  = FUNCC(clear_blocks          , depth);\
03164     c->add_pixels8                   = FUNCC(add_pixels8           , depth);\
03165     c->add_pixels4                   = FUNCC(add_pixels4           , depth);\
03166     c->put_no_rnd_pixels_l2[0]       = FUNCC(put_no_rnd_pixels16_l2, depth);\
03167     c->put_no_rnd_pixels_l2[1]       = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03168 \
03169     c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8   , depth);\
03170     c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4   , depth);\
03171     c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2   , depth);\
03172     c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8   , depth);\
03173     c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4   , depth);\
03174     c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2   , depth);\
03175 \
03176     dspfunc1(put       , 0, 16, depth);\
03177     dspfunc1(put       , 1,  8, depth);\
03178     dspfunc1(put       , 2,  4, depth);\
03179     dspfunc1(put       , 3,  2, depth);\
03180     dspfunc1(put_no_rnd, 0, 16, depth);\
03181     dspfunc1(put_no_rnd, 1,  8, depth);\
03182     dspfunc1(avg       , 0, 16, depth);\
03183     dspfunc1(avg       , 1,  8, depth);\
03184     dspfunc1(avg       , 2,  4, depth);\
03185     dspfunc1(avg       , 3,  2, depth);\
03186     dspfunc1(avg_no_rnd, 0, 16, depth);\
03187     dspfunc1(avg_no_rnd, 1,  8, depth);\
03188 \
03189     dspfunc2(put_h264_qpel, 0, 16, depth);\
03190     dspfunc2(put_h264_qpel, 1,  8, depth);\
03191     dspfunc2(put_h264_qpel, 2,  4, depth);\
03192     dspfunc2(put_h264_qpel, 3,  2, depth);\
03193     dspfunc2(avg_h264_qpel, 0, 16, depth);\
03194     dspfunc2(avg_h264_qpel, 1,  8, depth);\
03195     dspfunc2(avg_h264_qpel, 2,  4, depth);
03196 
03197     if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) {
03198         BIT_DEPTH_FUNCS(8)
03199     } else {
03200         switch (avctx->bits_per_raw_sample) {
03201             case 9:
03202                 BIT_DEPTH_FUNCS(9)
03203                 break;
03204             case 10:
03205                 BIT_DEPTH_FUNCS(10)
03206                 break;
03207             default:
03208                 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
03209                 BIT_DEPTH_FUNCS(8)
03210                 break;
03211         }
03212     }
03213 
03214 
03215     if (HAVE_MMX)        dsputil_init_mmx   (c, avctx);
03216     if (ARCH_ARM)        dsputil_init_arm   (c, avctx);
03217     if (CONFIG_MLIB)     dsputil_init_mlib  (c, avctx);
03218     if (HAVE_VIS)        dsputil_init_vis   (c, avctx);
03219     if (ARCH_ALPHA)      dsputil_init_alpha (c, avctx);
03220     if (ARCH_PPC)        dsputil_init_ppc   (c, avctx);
03221     if (HAVE_MMI)        dsputil_init_mmi   (c, avctx);
03222     if (ARCH_SH4)        dsputil_init_sh4   (c, avctx);
03223     if (ARCH_BFIN)       dsputil_init_bfin  (c, avctx);
03224 
03225     for(i=0; i<64; i++){
03226         if(!c->put_2tap_qpel_pixels_tab[0][i])
03227             c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
03228         if(!c->avg_2tap_qpel_pixels_tab[0][i])
03229             c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
03230     }
03231 
03232     c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
03233     c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
03234     c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
03235     c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
03236 
03237     c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
03238     c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
03239     c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
03240     c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
03241 
03242     switch(c->idct_permutation_type){
03243     case FF_NO_IDCT_PERM:
03244         for(i=0; i<64; i++)
03245             c->idct_permutation[i]= i;
03246         break;
03247     case FF_LIBMPEG2_IDCT_PERM:
03248         for(i=0; i<64; i++)
03249             c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
03250         break;
03251     case FF_SIMPLE_IDCT_PERM:
03252         for(i=0; i<64; i++)
03253             c->idct_permutation[i]= simple_mmx_permutation[i];
03254         break;
03255     case FF_TRANSPOSE_IDCT_PERM:
03256         for(i=0; i<64; i++)
03257             c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
03258         break;
03259     case FF_PARTTRANS_IDCT_PERM:
03260         for(i=0; i<64; i++)
03261             c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
03262         break;
03263     case FF_SSE2_IDCT_PERM:
03264         for(i=0; i<64; i++)
03265             c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
03266         break;
03267     default:
03268         av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
03269     }
03270 }
03271