00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00030 #include "libavutil/imgutils.h"
00031 #include "avcodec.h"
00032 #include "dsputil.h"
00033 #include "simple_idct.h"
00034 #include "faandct.h"
00035 #include "faanidct.h"
00036 #include "mathops.h"
00037 #include "mpegvideo.h"
00038 #include "config.h"
00039 #include "ac3dec.h"
00040 #include "vorbis.h"
00041
00042 uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
00043 uint32_t ff_squareTbl[512] = {0, };
00044
00045 #define BIT_DEPTH 9
00046 #include "dsputil_template.c"
00047 #undef BIT_DEPTH
00048
00049 #define BIT_DEPTH 10
00050 #include "dsputil_template.c"
00051 #undef BIT_DEPTH
00052
00053 #define BIT_DEPTH 8
00054 #include "dsputil_template.c"
00055
00056
00057 #define pb_7f (~0UL/255 * 0x7f)
00058 #define pb_80 (~0UL/255 * 0x80)
00059
00060 const uint8_t ff_zigzag_direct[64] = {
00061 0, 1, 8, 16, 9, 2, 3, 10,
00062 17, 24, 32, 25, 18, 11, 4, 5,
00063 12, 19, 26, 33, 40, 48, 41, 34,
00064 27, 20, 13, 6, 7, 14, 21, 28,
00065 35, 42, 49, 56, 57, 50, 43, 36,
00066 29, 22, 15, 23, 30, 37, 44, 51,
00067 58, 59, 52, 45, 38, 31, 39, 46,
00068 53, 60, 61, 54, 47, 55, 62, 63
00069 };
00070
00071
00072
00073 const uint8_t ff_zigzag248_direct[64] = {
00074 0, 8, 1, 9, 16, 24, 2, 10,
00075 17, 25, 32, 40, 48, 56, 33, 41,
00076 18, 26, 3, 11, 4, 12, 19, 27,
00077 34, 42, 49, 57, 50, 58, 35, 43,
00078 20, 28, 5, 13, 6, 14, 21, 29,
00079 36, 44, 51, 59, 52, 60, 37, 45,
00080 22, 30, 7, 15, 23, 31, 38, 46,
00081 53, 61, 54, 62, 39, 47, 55, 63,
00082 };
00083
00084
00085 DECLARE_ALIGNED(16, uint16_t, inv_zigzag_direct16)[64];
00086
00087 const uint8_t ff_alternate_horizontal_scan[64] = {
00088 0, 1, 2, 3, 8, 9, 16, 17,
00089 10, 11, 4, 5, 6, 7, 15, 14,
00090 13, 12, 19, 18, 24, 25, 32, 33,
00091 26, 27, 20, 21, 22, 23, 28, 29,
00092 30, 31, 34, 35, 40, 41, 48, 49,
00093 42, 43, 36, 37, 38, 39, 44, 45,
00094 46, 47, 50, 51, 56, 57, 58, 59,
00095 52, 53, 54, 55, 60, 61, 62, 63,
00096 };
00097
00098 const uint8_t ff_alternate_vertical_scan[64] = {
00099 0, 8, 16, 24, 1, 9, 2, 10,
00100 17, 25, 32, 40, 48, 56, 57, 49,
00101 41, 33, 26, 18, 3, 11, 4, 12,
00102 19, 27, 34, 42, 50, 58, 35, 43,
00103 51, 59, 20, 28, 5, 13, 6, 14,
00104 21, 29, 36, 44, 52, 60, 37, 45,
00105 53, 61, 22, 30, 7, 15, 23, 31,
00106 38, 46, 54, 62, 39, 47, 55, 63,
00107 };
00108
00109
00110 static const uint8_t simple_mmx_permutation[64]={
00111 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
00112 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
00113 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
00114 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
00115 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
00116 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
00117 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
00118 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
00119 };
00120
00121 static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
00122
00123 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
00124 int i;
00125 int end;
00126
00127 st->scantable= src_scantable;
00128
00129 for(i=0; i<64; i++){
00130 int j;
00131 j = src_scantable[i];
00132 st->permutated[i] = permutation[j];
00133 #if ARCH_PPC
00134 st->inverse[j] = i;
00135 #endif
00136 }
00137
00138 end=-1;
00139 for(i=0; i<64; i++){
00140 int j;
00141 j = st->permutated[i];
00142 if(j>end) end=j;
00143 st->raster_end[i]= end;
00144 }
00145 }
00146
00147 static int pix_sum_c(uint8_t * pix, int line_size)
00148 {
00149 int s, i, j;
00150
00151 s = 0;
00152 for (i = 0; i < 16; i++) {
00153 for (j = 0; j < 16; j += 8) {
00154 s += pix[0];
00155 s += pix[1];
00156 s += pix[2];
00157 s += pix[3];
00158 s += pix[4];
00159 s += pix[5];
00160 s += pix[6];
00161 s += pix[7];
00162 pix += 8;
00163 }
00164 pix += line_size - 16;
00165 }
00166 return s;
00167 }
00168
00169 static int pix_norm1_c(uint8_t * pix, int line_size)
00170 {
00171 int s, i, j;
00172 uint32_t *sq = ff_squareTbl + 256;
00173
00174 s = 0;
00175 for (i = 0; i < 16; i++) {
00176 for (j = 0; j < 16; j += 8) {
00177 #if 0
00178 s += sq[pix[0]];
00179 s += sq[pix[1]];
00180 s += sq[pix[2]];
00181 s += sq[pix[3]];
00182 s += sq[pix[4]];
00183 s += sq[pix[5]];
00184 s += sq[pix[6]];
00185 s += sq[pix[7]];
00186 #else
00187 #if LONG_MAX > 2147483647
00188 register uint64_t x=*(uint64_t*)pix;
00189 s += sq[x&0xff];
00190 s += sq[(x>>8)&0xff];
00191 s += sq[(x>>16)&0xff];
00192 s += sq[(x>>24)&0xff];
00193 s += sq[(x>>32)&0xff];
00194 s += sq[(x>>40)&0xff];
00195 s += sq[(x>>48)&0xff];
00196 s += sq[(x>>56)&0xff];
00197 #else
00198 register uint32_t x=*(uint32_t*)pix;
00199 s += sq[x&0xff];
00200 s += sq[(x>>8)&0xff];
00201 s += sq[(x>>16)&0xff];
00202 s += sq[(x>>24)&0xff];
00203 x=*(uint32_t*)(pix+4);
00204 s += sq[x&0xff];
00205 s += sq[(x>>8)&0xff];
00206 s += sq[(x>>16)&0xff];
00207 s += sq[(x>>24)&0xff];
00208 #endif
00209 #endif
00210 pix += 8;
00211 }
00212 pix += line_size - 16;
00213 }
00214 return s;
00215 }
00216
00217 static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
00218 int i;
00219
00220 for(i=0; i+8<=w; i+=8){
00221 dst[i+0]= av_bswap32(src[i+0]);
00222 dst[i+1]= av_bswap32(src[i+1]);
00223 dst[i+2]= av_bswap32(src[i+2]);
00224 dst[i+3]= av_bswap32(src[i+3]);
00225 dst[i+4]= av_bswap32(src[i+4]);
00226 dst[i+5]= av_bswap32(src[i+5]);
00227 dst[i+6]= av_bswap32(src[i+6]);
00228 dst[i+7]= av_bswap32(src[i+7]);
00229 }
00230 for(;i<w; i++){
00231 dst[i+0]= av_bswap32(src[i+0]);
00232 }
00233 }
00234
00235 static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
00236 {
00237 while (len--)
00238 *dst++ = av_bswap16(*src++);
00239 }
00240
00241 static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00242 {
00243 int s, i;
00244 uint32_t *sq = ff_squareTbl + 256;
00245
00246 s = 0;
00247 for (i = 0; i < h; i++) {
00248 s += sq[pix1[0] - pix2[0]];
00249 s += sq[pix1[1] - pix2[1]];
00250 s += sq[pix1[2] - pix2[2]];
00251 s += sq[pix1[3] - pix2[3]];
00252 pix1 += line_size;
00253 pix2 += line_size;
00254 }
00255 return s;
00256 }
00257
00258 static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
00259 {
00260 int s, i;
00261 uint32_t *sq = ff_squareTbl + 256;
00262
00263 s = 0;
00264 for (i = 0; i < h; i++) {
00265 s += sq[pix1[0] - pix2[0]];
00266 s += sq[pix1[1] - pix2[1]];
00267 s += sq[pix1[2] - pix2[2]];
00268 s += sq[pix1[3] - pix2[3]];
00269 s += sq[pix1[4] - pix2[4]];
00270 s += sq[pix1[5] - pix2[5]];
00271 s += sq[pix1[6] - pix2[6]];
00272 s += sq[pix1[7] - pix2[7]];
00273 pix1 += line_size;
00274 pix2 += line_size;
00275 }
00276 return s;
00277 }
00278
00279 static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
00280 {
00281 int s, i;
00282 uint32_t *sq = ff_squareTbl + 256;
00283
00284 s = 0;
00285 for (i = 0; i < h; i++) {
00286 s += sq[pix1[ 0] - pix2[ 0]];
00287 s += sq[pix1[ 1] - pix2[ 1]];
00288 s += sq[pix1[ 2] - pix2[ 2]];
00289 s += sq[pix1[ 3] - pix2[ 3]];
00290 s += sq[pix1[ 4] - pix2[ 4]];
00291 s += sq[pix1[ 5] - pix2[ 5]];
00292 s += sq[pix1[ 6] - pix2[ 6]];
00293 s += sq[pix1[ 7] - pix2[ 7]];
00294 s += sq[pix1[ 8] - pix2[ 8]];
00295 s += sq[pix1[ 9] - pix2[ 9]];
00296 s += sq[pix1[10] - pix2[10]];
00297 s += sq[pix1[11] - pix2[11]];
00298 s += sq[pix1[12] - pix2[12]];
00299 s += sq[pix1[13] - pix2[13]];
00300 s += sq[pix1[14] - pix2[14]];
00301 s += sq[pix1[15] - pix2[15]];
00302
00303 pix1 += line_size;
00304 pix2 += line_size;
00305 }
00306 return s;
00307 }
00308
00309 static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
00310 {
00311 int i;
00312
00313
00314 for(i=0;i<8;i++) {
00315 block[0] = pixels[0];
00316 block[1] = pixels[1];
00317 block[2] = pixels[2];
00318 block[3] = pixels[3];
00319 block[4] = pixels[4];
00320 block[5] = pixels[5];
00321 block[6] = pixels[6];
00322 block[7] = pixels[7];
00323 pixels += line_size;
00324 block += 8;
00325 }
00326 }
00327
00328 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
00329 const uint8_t *s2, int stride){
00330 int i;
00331
00332
00333 for(i=0;i<8;i++) {
00334 block[0] = s1[0] - s2[0];
00335 block[1] = s1[1] - s2[1];
00336 block[2] = s1[2] - s2[2];
00337 block[3] = s1[3] - s2[3];
00338 block[4] = s1[4] - s2[4];
00339 block[5] = s1[5] - s2[5];
00340 block[6] = s1[6] - s2[6];
00341 block[7] = s1[7] - s2[7];
00342 s1 += stride;
00343 s2 += stride;
00344 block += 8;
00345 }
00346 }
00347
00348
00349 void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00350 int line_size)
00351 {
00352 int i;
00353 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00354
00355
00356 for(i=0;i<8;i++) {
00357 pixels[0] = cm[block[0]];
00358 pixels[1] = cm[block[1]];
00359 pixels[2] = cm[block[2]];
00360 pixels[3] = cm[block[3]];
00361 pixels[4] = cm[block[4]];
00362 pixels[5] = cm[block[5]];
00363 pixels[6] = cm[block[6]];
00364 pixels[7] = cm[block[7]];
00365
00366 pixels += line_size;
00367 block += 8;
00368 }
00369 }
00370
00371 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00372 int line_size)
00373 {
00374 int i;
00375 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00376
00377
00378 for(i=0;i<4;i++) {
00379 pixels[0] = cm[block[0]];
00380 pixels[1] = cm[block[1]];
00381 pixels[2] = cm[block[2]];
00382 pixels[3] = cm[block[3]];
00383
00384 pixels += line_size;
00385 block += 8;
00386 }
00387 }
00388
00389 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00390 int line_size)
00391 {
00392 int i;
00393 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00394
00395
00396 for(i=0;i<2;i++) {
00397 pixels[0] = cm[block[0]];
00398 pixels[1] = cm[block[1]];
00399
00400 pixels += line_size;
00401 block += 8;
00402 }
00403 }
00404
00405 void ff_put_signed_pixels_clamped_c(const DCTELEM *block,
00406 uint8_t *restrict pixels,
00407 int line_size)
00408 {
00409 int i, j;
00410
00411 for (i = 0; i < 8; i++) {
00412 for (j = 0; j < 8; j++) {
00413 if (*block < -128)
00414 *pixels = 0;
00415 else if (*block > 127)
00416 *pixels = 255;
00417 else
00418 *pixels = (uint8_t)(*block + 128);
00419 block++;
00420 pixels++;
00421 }
00422 pixels += (line_size - 8);
00423 }
00424 }
00425
00426 static void put_pixels_nonclamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00427 int line_size)
00428 {
00429 int i;
00430
00431
00432 for(i=0;i<8;i++) {
00433 pixels[0] = block[0];
00434 pixels[1] = block[1];
00435 pixels[2] = block[2];
00436 pixels[3] = block[3];
00437 pixels[4] = block[4];
00438 pixels[5] = block[5];
00439 pixels[6] = block[6];
00440 pixels[7] = block[7];
00441
00442 pixels += line_size;
00443 block += 8;
00444 }
00445 }
00446
00447 void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
00448 int line_size)
00449 {
00450 int i;
00451 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00452
00453
00454 for(i=0;i<8;i++) {
00455 pixels[0] = cm[pixels[0] + block[0]];
00456 pixels[1] = cm[pixels[1] + block[1]];
00457 pixels[2] = cm[pixels[2] + block[2]];
00458 pixels[3] = cm[pixels[3] + block[3]];
00459 pixels[4] = cm[pixels[4] + block[4]];
00460 pixels[5] = cm[pixels[5] + block[5]];
00461 pixels[6] = cm[pixels[6] + block[6]];
00462 pixels[7] = cm[pixels[7] + block[7]];
00463 pixels += line_size;
00464 block += 8;
00465 }
00466 }
00467
00468 static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
00469 int line_size)
00470 {
00471 int i;
00472 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00473
00474
00475 for(i=0;i<4;i++) {
00476 pixels[0] = cm[pixels[0] + block[0]];
00477 pixels[1] = cm[pixels[1] + block[1]];
00478 pixels[2] = cm[pixels[2] + block[2]];
00479 pixels[3] = cm[pixels[3] + block[3]];
00480 pixels += line_size;
00481 block += 8;
00482 }
00483 }
00484
00485 static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
00486 int line_size)
00487 {
00488 int i;
00489 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
00490
00491
00492 for(i=0;i<2;i++) {
00493 pixels[0] = cm[pixels[0] + block[0]];
00494 pixels[1] = cm[pixels[1] + block[1]];
00495 pixels += line_size;
00496 block += 8;
00497 }
00498 }
00499
00500 static int sum_abs_dctelem_c(DCTELEM *block)
00501 {
00502 int sum=0, i;
00503 for(i=0; i<64; i++)
00504 sum+= FFABS(block[i]);
00505 return sum;
00506 }
00507
00508 static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
00509 {
00510 int i;
00511
00512 for (i = 0; i < h; i++) {
00513 memset(block, value, 16);
00514 block += line_size;
00515 }
00516 }
00517
00518 static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
00519 {
00520 int i;
00521
00522 for (i = 0; i < h; i++) {
00523 memset(block, value, 8);
00524 block += line_size;
00525 }
00526 }
00527
00528 static void scale_block_c(const uint8_t src[64], uint8_t *dst, int linesize)
00529 {
00530 int i, j;
00531 uint16_t *dst1 = (uint16_t *) dst;
00532 uint16_t *dst2 = (uint16_t *)(dst + linesize);
00533
00534 for (j = 0; j < 8; j++) {
00535 for (i = 0; i < 8; i++) {
00536 dst1[i] = dst2[i] = src[i] * 0x0101;
00537 }
00538 src += 8;
00539 dst1 += linesize;
00540 dst2 += linesize;
00541 }
00542 }
00543
00544 #define avg2(a,b) ((a+b+1)>>1)
00545 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
00546
00547 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
00548 {
00549 const int A=(16-x16)*(16-y16);
00550 const int B=( x16)*(16-y16);
00551 const int C=(16-x16)*( y16);
00552 const int D=( x16)*( y16);
00553 int i;
00554
00555 for(i=0; i<h; i++)
00556 {
00557 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
00558 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
00559 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
00560 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
00561 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
00562 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
00563 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
00564 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
00565 dst+= stride;
00566 src+= stride;
00567 }
00568 }
00569
00570 void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
00571 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
00572 {
00573 int y, vx, vy;
00574 const int s= 1<<shift;
00575
00576 width--;
00577 height--;
00578
00579 for(y=0; y<h; y++){
00580 int x;
00581
00582 vx= ox;
00583 vy= oy;
00584 for(x=0; x<8; x++){
00585 int src_x, src_y, frac_x, frac_y, index;
00586
00587 src_x= vx>>16;
00588 src_y= vy>>16;
00589 frac_x= src_x&(s-1);
00590 frac_y= src_y&(s-1);
00591 src_x>>=shift;
00592 src_y>>=shift;
00593
00594 if((unsigned)src_x < width){
00595 if((unsigned)src_y < height){
00596 index= src_x + src_y*stride;
00597 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00598 + src[index +1]* frac_x )*(s-frac_y)
00599 + ( src[index+stride ]*(s-frac_x)
00600 + src[index+stride+1]* frac_x )* frac_y
00601 + r)>>(shift*2);
00602 }else{
00603 index= src_x + av_clip(src_y, 0, height)*stride;
00604 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
00605 + src[index +1]* frac_x )*s
00606 + r)>>(shift*2);
00607 }
00608 }else{
00609 if((unsigned)src_y < height){
00610 index= av_clip(src_x, 0, width) + src_y*stride;
00611 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
00612 + src[index+stride ]* frac_y )*s
00613 + r)>>(shift*2);
00614 }else{
00615 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
00616 dst[y*stride + x]= src[index ];
00617 }
00618 }
00619
00620 vx+= dxx;
00621 vy+= dyx;
00622 }
00623 ox += dxy;
00624 oy += dyy;
00625 }
00626 }
00627
00628 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00629 switch(width){
00630 case 2: put_pixels2_8_c (dst, src, stride, height); break;
00631 case 4: put_pixels4_8_c (dst, src, stride, height); break;
00632 case 8: put_pixels8_8_c (dst, src, stride, height); break;
00633 case 16:put_pixels16_8_c(dst, src, stride, height); break;
00634 }
00635 }
00636
00637 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00638 int i,j;
00639 for (i=0; i < height; i++) {
00640 for (j=0; j < width; j++) {
00641 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
00642 }
00643 src += stride;
00644 dst += stride;
00645 }
00646 }
00647
00648 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00649 int i,j;
00650 for (i=0; i < height; i++) {
00651 for (j=0; j < width; j++) {
00652 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
00653 }
00654 src += stride;
00655 dst += stride;
00656 }
00657 }
00658
00659 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00660 int i,j;
00661 for (i=0; i < height; i++) {
00662 for (j=0; j < width; j++) {
00663 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
00664 }
00665 src += stride;
00666 dst += stride;
00667 }
00668 }
00669
00670 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00671 int i,j;
00672 for (i=0; i < height; i++) {
00673 for (j=0; j < width; j++) {
00674 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
00675 }
00676 src += stride;
00677 dst += stride;
00678 }
00679 }
00680
00681 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00682 int i,j;
00683 for (i=0; i < height; i++) {
00684 for (j=0; j < width; j++) {
00685 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00686 }
00687 src += stride;
00688 dst += stride;
00689 }
00690 }
00691
00692 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00693 int i,j;
00694 for (i=0; i < height; i++) {
00695 for (j=0; j < width; j++) {
00696 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
00697 }
00698 src += stride;
00699 dst += stride;
00700 }
00701 }
00702
00703 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00704 int i,j;
00705 for (i=0; i < height; i++) {
00706 for (j=0; j < width; j++) {
00707 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
00708 }
00709 src += stride;
00710 dst += stride;
00711 }
00712 }
00713
00714 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00715 int i,j;
00716 for (i=0; i < height; i++) {
00717 for (j=0; j < width; j++) {
00718 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
00719 }
00720 src += stride;
00721 dst += stride;
00722 }
00723 }
00724
00725 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00726 switch(width){
00727 case 2: avg_pixels2_8_c (dst, src, stride, height); break;
00728 case 4: avg_pixels4_8_c (dst, src, stride, height); break;
00729 case 8: avg_pixels8_8_c (dst, src, stride, height); break;
00730 case 16:avg_pixels16_8_c(dst, src, stride, height); break;
00731 }
00732 }
00733
00734 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00735 int i,j;
00736 for (i=0; i < height; i++) {
00737 for (j=0; j < width; j++) {
00738 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
00739 }
00740 src += stride;
00741 dst += stride;
00742 }
00743 }
00744
00745 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00746 int i,j;
00747 for (i=0; i < height; i++) {
00748 for (j=0; j < width; j++) {
00749 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
00750 }
00751 src += stride;
00752 dst += stride;
00753 }
00754 }
00755
00756 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00757 int i,j;
00758 for (i=0; i < height; i++) {
00759 for (j=0; j < width; j++) {
00760 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
00761 }
00762 src += stride;
00763 dst += stride;
00764 }
00765 }
00766
00767 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00768 int i,j;
00769 for (i=0; i < height; i++) {
00770 for (j=0; j < width; j++) {
00771 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00772 }
00773 src += stride;
00774 dst += stride;
00775 }
00776 }
00777
00778 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00779 int i,j;
00780 for (i=0; i < height; i++) {
00781 for (j=0; j < width; j++) {
00782 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00783 }
00784 src += stride;
00785 dst += stride;
00786 }
00787 }
00788
00789 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00790 int i,j;
00791 for (i=0; i < height; i++) {
00792 for (j=0; j < width; j++) {
00793 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
00794 }
00795 src += stride;
00796 dst += stride;
00797 }
00798 }
00799
00800 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00801 int i,j;
00802 for (i=0; i < height; i++) {
00803 for (j=0; j < width; j++) {
00804 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00805 }
00806 src += stride;
00807 dst += stride;
00808 }
00809 }
00810
00811 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
00812 int i,j;
00813 for (i=0; i < height; i++) {
00814 for (j=0; j < width; j++) {
00815 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
00816 }
00817 src += stride;
00818 dst += stride;
00819 }
00820 }
00821 #if 0
00822 #define TPEL_WIDTH(width)\
00823 static void put_tpel_pixels ## width ## _mc00_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00824 void put_tpel_pixels_mc00_c(dst, src, stride, width, height);}\
00825 static void put_tpel_pixels ## width ## _mc10_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00826 void put_tpel_pixels_mc10_c(dst, src, stride, width, height);}\
00827 static void put_tpel_pixels ## width ## _mc20_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00828 void put_tpel_pixels_mc20_c(dst, src, stride, width, height);}\
00829 static void put_tpel_pixels ## width ## _mc01_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00830 void put_tpel_pixels_mc01_c(dst, src, stride, width, height);}\
00831 static void put_tpel_pixels ## width ## _mc11_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00832 void put_tpel_pixels_mc11_c(dst, src, stride, width, height);}\
00833 static void put_tpel_pixels ## width ## _mc21_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00834 void put_tpel_pixels_mc21_c(dst, src, stride, width, height);}\
00835 static void put_tpel_pixels ## width ## _mc02_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00836 void put_tpel_pixels_mc02_c(dst, src, stride, width, height);}\
00837 static void put_tpel_pixels ## width ## _mc12_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00838 void put_tpel_pixels_mc12_c(dst, src, stride, width, height);}\
00839 static void put_tpel_pixels ## width ## _mc22_c(uint8_t *dst, const uint8_t *src, int stride, int height){\
00840 void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
00841 #endif
00842
00843 #define QPEL_MC(r, OPNAME, RND, OP) \
00844 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00845 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00846 int i;\
00847 for(i=0; i<h; i++)\
00848 {\
00849 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
00850 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
00851 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
00852 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
00853 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
00854 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
00855 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
00856 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
00857 dst+=dstStride;\
00858 src+=srcStride;\
00859 }\
00860 }\
00861 \
00862 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00863 const int w=8;\
00864 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00865 int i;\
00866 for(i=0; i<w; i++)\
00867 {\
00868 const int src0= src[0*srcStride];\
00869 const int src1= src[1*srcStride];\
00870 const int src2= src[2*srcStride];\
00871 const int src3= src[3*srcStride];\
00872 const int src4= src[4*srcStride];\
00873 const int src5= src[5*srcStride];\
00874 const int src6= src[6*srcStride];\
00875 const int src7= src[7*srcStride];\
00876 const int src8= src[8*srcStride];\
00877 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
00878 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
00879 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
00880 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
00881 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
00882 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
00883 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
00884 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
00885 dst++;\
00886 src++;\
00887 }\
00888 }\
00889 \
00890 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
00891 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00892 int i;\
00893 \
00894 for(i=0; i<h; i++)\
00895 {\
00896 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
00897 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
00898 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
00899 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
00900 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
00901 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
00902 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
00903 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
00904 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
00905 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
00906 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
00907 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
00908 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
00909 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
00910 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
00911 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
00912 dst+=dstStride;\
00913 src+=srcStride;\
00914 }\
00915 }\
00916 \
00917 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
00918 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
00919 int i;\
00920 const int w=16;\
00921 for(i=0; i<w; i++)\
00922 {\
00923 const int src0= src[0*srcStride];\
00924 const int src1= src[1*srcStride];\
00925 const int src2= src[2*srcStride];\
00926 const int src3= src[3*srcStride];\
00927 const int src4= src[4*srcStride];\
00928 const int src5= src[5*srcStride];\
00929 const int src6= src[6*srcStride];\
00930 const int src7= src[7*srcStride];\
00931 const int src8= src[8*srcStride];\
00932 const int src9= src[9*srcStride];\
00933 const int src10= src[10*srcStride];\
00934 const int src11= src[11*srcStride];\
00935 const int src12= src[12*srcStride];\
00936 const int src13= src[13*srcStride];\
00937 const int src14= src[14*srcStride];\
00938 const int src15= src[15*srcStride];\
00939 const int src16= src[16*srcStride];\
00940 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
00941 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
00942 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
00943 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
00944 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
00945 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
00946 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
00947 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
00948 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
00949 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
00950 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
00951 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
00952 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
00953 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
00954 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
00955 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
00956 dst++;\
00957 src++;\
00958 }\
00959 }\
00960 \
00961 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
00962 uint8_t half[64];\
00963 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00964 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
00965 }\
00966 \
00967 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
00968 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
00969 }\
00970 \
00971 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
00972 uint8_t half[64];\
00973 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
00974 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
00975 }\
00976 \
00977 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
00978 uint8_t full[16*9];\
00979 uint8_t half[64];\
00980 copy_block9(full, src, 16, stride, 9);\
00981 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00982 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
00983 }\
00984 \
00985 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
00986 uint8_t full[16*9];\
00987 copy_block9(full, src, 16, stride, 9);\
00988 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
00989 }\
00990 \
00991 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
00992 uint8_t full[16*9];\
00993 uint8_t half[64];\
00994 copy_block9(full, src, 16, stride, 9);\
00995 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
00996 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
00997 }\
00998 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
00999 uint8_t full[16*9];\
01000 uint8_t halfH[72];\
01001 uint8_t halfV[64];\
01002 uint8_t halfHV[64];\
01003 copy_block9(full, src, 16, stride, 9);\
01004 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01005 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01006 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01007 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01008 }\
01009 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01010 uint8_t full[16*9];\
01011 uint8_t halfH[72];\
01012 uint8_t halfHV[64];\
01013 copy_block9(full, src, 16, stride, 9);\
01014 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01015 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01016 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01017 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01018 }\
01019 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01020 uint8_t full[16*9];\
01021 uint8_t halfH[72];\
01022 uint8_t halfV[64];\
01023 uint8_t halfHV[64];\
01024 copy_block9(full, src, 16, stride, 9);\
01025 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01026 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01027 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01028 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01029 }\
01030 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01031 uint8_t full[16*9];\
01032 uint8_t halfH[72];\
01033 uint8_t halfHV[64];\
01034 copy_block9(full, src, 16, stride, 9);\
01035 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01036 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01037 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01038 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01039 }\
01040 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01041 uint8_t full[16*9];\
01042 uint8_t halfH[72];\
01043 uint8_t halfV[64];\
01044 uint8_t halfHV[64];\
01045 copy_block9(full, src, 16, stride, 9);\
01046 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01047 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01048 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01049 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01050 }\
01051 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01052 uint8_t full[16*9];\
01053 uint8_t halfH[72];\
01054 uint8_t halfHV[64];\
01055 copy_block9(full, src, 16, stride, 9);\
01056 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01057 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01058 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01059 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01060 }\
01061 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01062 uint8_t full[16*9];\
01063 uint8_t halfH[72];\
01064 uint8_t halfV[64];\
01065 uint8_t halfHV[64];\
01066 copy_block9(full, src, 16, stride, 9);\
01067 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
01068 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01069 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01070 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
01071 }\
01072 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01073 uint8_t full[16*9];\
01074 uint8_t halfH[72];\
01075 uint8_t halfHV[64];\
01076 copy_block9(full, src, 16, stride, 9);\
01077 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01078 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01079 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01080 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01081 }\
01082 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01083 uint8_t halfH[72];\
01084 uint8_t halfHV[64];\
01085 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01086 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01087 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
01088 }\
01089 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01090 uint8_t halfH[72];\
01091 uint8_t halfHV[64];\
01092 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01093 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01094 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
01095 }\
01096 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01097 uint8_t full[16*9];\
01098 uint8_t halfH[72];\
01099 uint8_t halfV[64];\
01100 uint8_t halfHV[64];\
01101 copy_block9(full, src, 16, stride, 9);\
01102 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01103 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
01104 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01105 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01106 }\
01107 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01108 uint8_t full[16*9];\
01109 uint8_t halfH[72];\
01110 copy_block9(full, src, 16, stride, 9);\
01111 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01112 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
01113 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01114 }\
01115 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01116 uint8_t full[16*9];\
01117 uint8_t halfH[72];\
01118 uint8_t halfV[64];\
01119 uint8_t halfHV[64];\
01120 copy_block9(full, src, 16, stride, 9);\
01121 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01122 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
01123 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
01124 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
01125 }\
01126 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01127 uint8_t full[16*9];\
01128 uint8_t halfH[72];\
01129 copy_block9(full, src, 16, stride, 9);\
01130 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
01131 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
01132 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01133 }\
01134 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01135 uint8_t halfH[72];\
01136 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
01137 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
01138 }\
01139 \
01140 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, int stride){\
01141 uint8_t half[256];\
01142 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01143 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
01144 }\
01145 \
01146 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, int stride){\
01147 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
01148 }\
01149 \
01150 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, int stride){\
01151 uint8_t half[256];\
01152 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
01153 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
01154 }\
01155 \
01156 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, int stride){\
01157 uint8_t full[24*17];\
01158 uint8_t half[256];\
01159 copy_block17(full, src, 24, stride, 17);\
01160 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01161 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
01162 }\
01163 \
01164 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, int stride){\
01165 uint8_t full[24*17];\
01166 copy_block17(full, src, 24, stride, 17);\
01167 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
01168 }\
01169 \
01170 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, int stride){\
01171 uint8_t full[24*17];\
01172 uint8_t half[256];\
01173 copy_block17(full, src, 24, stride, 17);\
01174 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
01175 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
01176 }\
01177 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, int stride){\
01178 uint8_t full[24*17];\
01179 uint8_t halfH[272];\
01180 uint8_t halfV[256];\
01181 uint8_t halfHV[256];\
01182 copy_block17(full, src, 24, stride, 17);\
01183 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01184 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01185 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01186 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01187 }\
01188 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, int stride){\
01189 uint8_t full[24*17];\
01190 uint8_t halfH[272];\
01191 uint8_t halfHV[256];\
01192 copy_block17(full, src, 24, stride, 17);\
01193 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01194 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01195 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01196 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01197 }\
01198 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, int stride){\
01199 uint8_t full[24*17];\
01200 uint8_t halfH[272];\
01201 uint8_t halfV[256];\
01202 uint8_t halfHV[256];\
01203 copy_block17(full, src, 24, stride, 17);\
01204 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01205 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01206 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01207 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01208 }\
01209 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, int stride){\
01210 uint8_t full[24*17];\
01211 uint8_t halfH[272];\
01212 uint8_t halfHV[256];\
01213 copy_block17(full, src, 24, stride, 17);\
01214 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01215 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01216 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01217 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01218 }\
01219 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, int stride){\
01220 uint8_t full[24*17];\
01221 uint8_t halfH[272];\
01222 uint8_t halfV[256];\
01223 uint8_t halfHV[256];\
01224 copy_block17(full, src, 24, stride, 17);\
01225 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01226 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01227 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01228 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01229 }\
01230 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, int stride){\
01231 uint8_t full[24*17];\
01232 uint8_t halfH[272];\
01233 uint8_t halfHV[256];\
01234 copy_block17(full, src, 24, stride, 17);\
01235 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01236 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01237 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01238 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01239 }\
01240 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, int stride){\
01241 uint8_t full[24*17];\
01242 uint8_t halfH[272];\
01243 uint8_t halfV[256];\
01244 uint8_t halfHV[256];\
01245 copy_block17(full, src, 24, stride, 17);\
01246 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
01247 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01248 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01249 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
01250 }\
01251 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){\
01252 uint8_t full[24*17];\
01253 uint8_t halfH[272];\
01254 uint8_t halfHV[256];\
01255 copy_block17(full, src, 24, stride, 17);\
01256 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01257 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01258 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01259 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01260 }\
01261 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, int stride){\
01262 uint8_t halfH[272];\
01263 uint8_t halfHV[256];\
01264 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01265 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01266 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
01267 }\
01268 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, int stride){\
01269 uint8_t halfH[272];\
01270 uint8_t halfHV[256];\
01271 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01272 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01273 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
01274 }\
01275 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, int stride){\
01276 uint8_t full[24*17];\
01277 uint8_t halfH[272];\
01278 uint8_t halfV[256];\
01279 uint8_t halfHV[256];\
01280 copy_block17(full, src, 24, stride, 17);\
01281 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01282 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
01283 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01284 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01285 }\
01286 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, int stride){\
01287 uint8_t full[24*17];\
01288 uint8_t halfH[272];\
01289 copy_block17(full, src, 24, stride, 17);\
01290 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01291 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
01292 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01293 }\
01294 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, int stride){\
01295 uint8_t full[24*17];\
01296 uint8_t halfH[272];\
01297 uint8_t halfV[256];\
01298 uint8_t halfHV[256];\
01299 copy_block17(full, src, 24, stride, 17);\
01300 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01301 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
01302 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
01303 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
01304 }\
01305 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, int stride){\
01306 uint8_t full[24*17];\
01307 uint8_t halfH[272];\
01308 copy_block17(full, src, 24, stride, 17);\
01309 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
01310 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
01311 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01312 }\
01313 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, int stride){\
01314 uint8_t halfH[272];\
01315 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
01316 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
01317 }
01318
01319 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
01320 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
01321 #define op_put(a, b) a = cm[((b) + 16)>>5]
01322 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
01323
01324 QPEL_MC(0, put_ , _ , op_put)
01325 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
01326 QPEL_MC(0, avg_ , _ , op_avg)
01327
01328 #undef op_avg
01329 #undef op_avg_no_rnd
01330 #undef op_put
01331 #undef op_put_no_rnd
01332
01333 #define put_qpel8_mc00_c ff_put_pixels8x8_c
01334 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
01335 #define put_qpel16_mc00_c ff_put_pixels16x16_c
01336 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
01337 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
01338 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_8_c
01339
01340 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
01341 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01342 int i;
01343
01344 for(i=0; i<h; i++){
01345 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
01346 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
01347 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
01348 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
01349 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
01350 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
01351 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
01352 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
01353 dst+=dstStride;
01354 src+=srcStride;
01355 }
01356 }
01357
01358 #if CONFIG_RV40_DECODER
01359 static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01360 put_pixels16_xy2_8_c(dst, src, stride, 16);
01361 }
01362 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01363 avg_pixels16_xy2_8_c(dst, src, stride, 16);
01364 }
01365 static void put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01366 put_pixels8_xy2_8_c(dst, src, stride, 8);
01367 }
01368 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
01369 avg_pixels8_xy2_8_c(dst, src, stride, 8);
01370 }
01371 #endif
01372
01373 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
01374 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
01375 int i;
01376
01377 for(i=0; i<w; i++){
01378 const int src_1= src[ -srcStride];
01379 const int src0 = src[0 ];
01380 const int src1 = src[ srcStride];
01381 const int src2 = src[2*srcStride];
01382 const int src3 = src[3*srcStride];
01383 const int src4 = src[4*srcStride];
01384 const int src5 = src[5*srcStride];
01385 const int src6 = src[6*srcStride];
01386 const int src7 = src[7*srcStride];
01387 const int src8 = src[8*srcStride];
01388 const int src9 = src[9*srcStride];
01389 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
01390 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
01391 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
01392 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
01393 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
01394 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
01395 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
01396 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
01397 src++;
01398 dst++;
01399 }
01400 }
01401
01402 static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, int stride){
01403 uint8_t half[64];
01404 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01405 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
01406 }
01407
01408 static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, int stride){
01409 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
01410 }
01411
01412 static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, int stride){
01413 uint8_t half[64];
01414 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
01415 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
01416 }
01417
01418 static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, int stride){
01419 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
01420 }
01421
01422 static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, int stride){
01423 uint8_t halfH[88];
01424 uint8_t halfV[64];
01425 uint8_t halfHV[64];
01426 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01427 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
01428 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01429 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01430 }
01431 static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, int stride){
01432 uint8_t halfH[88];
01433 uint8_t halfV[64];
01434 uint8_t halfHV[64];
01435 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01436 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
01437 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
01438 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
01439 }
01440 static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
01441 uint8_t halfH[88];
01442 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
01443 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
01444 }
01445
01446 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
01447 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01448 int x;
01449 const int strength= ff_h263_loop_filter_strength[qscale];
01450
01451 for(x=0; x<8; x++){
01452 int d1, d2, ad1;
01453 int p0= src[x-2*stride];
01454 int p1= src[x-1*stride];
01455 int p2= src[x+0*stride];
01456 int p3= src[x+1*stride];
01457 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01458
01459 if (d<-2*strength) d1= 0;
01460 else if(d<- strength) d1=-2*strength - d;
01461 else if(d< strength) d1= d;
01462 else if(d< 2*strength) d1= 2*strength - d;
01463 else d1= 0;
01464
01465 p1 += d1;
01466 p2 -= d1;
01467 if(p1&256) p1= ~(p1>>31);
01468 if(p2&256) p2= ~(p2>>31);
01469
01470 src[x-1*stride] = p1;
01471 src[x+0*stride] = p2;
01472
01473 ad1= FFABS(d1)>>1;
01474
01475 d2= av_clip((p0-p3)/4, -ad1, ad1);
01476
01477 src[x-2*stride] = p0 - d2;
01478 src[x+ stride] = p3 + d2;
01479 }
01480 }
01481 }
01482
01483 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
01484 if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
01485 int y;
01486 const int strength= ff_h263_loop_filter_strength[qscale];
01487
01488 for(y=0; y<8; y++){
01489 int d1, d2, ad1;
01490 int p0= src[y*stride-2];
01491 int p1= src[y*stride-1];
01492 int p2= src[y*stride+0];
01493 int p3= src[y*stride+1];
01494 int d = (p0 - p3 + 4*(p2 - p1)) / 8;
01495
01496 if (d<-2*strength) d1= 0;
01497 else if(d<- strength) d1=-2*strength - d;
01498 else if(d< strength) d1= d;
01499 else if(d< 2*strength) d1= 2*strength - d;
01500 else d1= 0;
01501
01502 p1 += d1;
01503 p2 -= d1;
01504 if(p1&256) p1= ~(p1>>31);
01505 if(p2&256) p2= ~(p2>>31);
01506
01507 src[y*stride-1] = p1;
01508 src[y*stride+0] = p2;
01509
01510 ad1= FFABS(d1)>>1;
01511
01512 d2= av_clip((p0-p3)/4, -ad1, ad1);
01513
01514 src[y*stride-2] = p0 - d2;
01515 src[y*stride+1] = p3 + d2;
01516 }
01517 }
01518 }
01519
01520 static void h261_loop_filter_c(uint8_t *src, int stride){
01521 int x,y,xy,yz;
01522 int temp[64];
01523
01524 for(x=0; x<8; x++){
01525 temp[x ] = 4*src[x ];
01526 temp[x + 7*8] = 4*src[x + 7*stride];
01527 }
01528 for(y=1; y<7; y++){
01529 for(x=0; x<8; x++){
01530 xy = y * stride + x;
01531 yz = y * 8 + x;
01532 temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
01533 }
01534 }
01535
01536 for(y=0; y<8; y++){
01537 src[ y*stride] = (temp[ y*8] + 2)>>2;
01538 src[7+y*stride] = (temp[7+y*8] + 2)>>2;
01539 for(x=1; x<7; x++){
01540 xy = y * stride + x;
01541 yz = y * 8 + x;
01542 src[xy] = (temp[yz-1] + 2*temp[yz] + temp[yz+1] + 8)>>4;
01543 }
01544 }
01545 }
01546
01547 static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01548 {
01549 int s, i;
01550
01551 s = 0;
01552 for(i=0;i<h;i++) {
01553 s += abs(pix1[0] - pix2[0]);
01554 s += abs(pix1[1] - pix2[1]);
01555 s += abs(pix1[2] - pix2[2]);
01556 s += abs(pix1[3] - pix2[3]);
01557 s += abs(pix1[4] - pix2[4]);
01558 s += abs(pix1[5] - pix2[5]);
01559 s += abs(pix1[6] - pix2[6]);
01560 s += abs(pix1[7] - pix2[7]);
01561 s += abs(pix1[8] - pix2[8]);
01562 s += abs(pix1[9] - pix2[9]);
01563 s += abs(pix1[10] - pix2[10]);
01564 s += abs(pix1[11] - pix2[11]);
01565 s += abs(pix1[12] - pix2[12]);
01566 s += abs(pix1[13] - pix2[13]);
01567 s += abs(pix1[14] - pix2[14]);
01568 s += abs(pix1[15] - pix2[15]);
01569 pix1 += line_size;
01570 pix2 += line_size;
01571 }
01572 return s;
01573 }
01574
01575 static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01576 {
01577 int s, i;
01578
01579 s = 0;
01580 for(i=0;i<h;i++) {
01581 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01582 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01583 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01584 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01585 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01586 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01587 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01588 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01589 s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
01590 s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
01591 s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
01592 s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
01593 s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
01594 s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
01595 s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
01596 s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
01597 pix1 += line_size;
01598 pix2 += line_size;
01599 }
01600 return s;
01601 }
01602
01603 static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01604 {
01605 int s, i;
01606 uint8_t *pix3 = pix2 + line_size;
01607
01608 s = 0;
01609 for(i=0;i<h;i++) {
01610 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01611 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01612 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01613 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01614 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01615 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01616 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01617 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01618 s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
01619 s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
01620 s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
01621 s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
01622 s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
01623 s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
01624 s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
01625 s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
01626 pix1 += line_size;
01627 pix2 += line_size;
01628 pix3 += line_size;
01629 }
01630 return s;
01631 }
01632
01633 static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01634 {
01635 int s, i;
01636 uint8_t *pix3 = pix2 + line_size;
01637
01638 s = 0;
01639 for(i=0;i<h;i++) {
01640 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01641 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01642 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01643 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01644 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01645 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01646 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01647 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01648 s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
01649 s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
01650 s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
01651 s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
01652 s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
01653 s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
01654 s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
01655 s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
01656 pix1 += line_size;
01657 pix2 += line_size;
01658 pix3 += line_size;
01659 }
01660 return s;
01661 }
01662
01663 static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01664 {
01665 int s, i;
01666
01667 s = 0;
01668 for(i=0;i<h;i++) {
01669 s += abs(pix1[0] - pix2[0]);
01670 s += abs(pix1[1] - pix2[1]);
01671 s += abs(pix1[2] - pix2[2]);
01672 s += abs(pix1[3] - pix2[3]);
01673 s += abs(pix1[4] - pix2[4]);
01674 s += abs(pix1[5] - pix2[5]);
01675 s += abs(pix1[6] - pix2[6]);
01676 s += abs(pix1[7] - pix2[7]);
01677 pix1 += line_size;
01678 pix2 += line_size;
01679 }
01680 return s;
01681 }
01682
01683 static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01684 {
01685 int s, i;
01686
01687 s = 0;
01688 for(i=0;i<h;i++) {
01689 s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
01690 s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
01691 s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
01692 s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
01693 s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
01694 s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
01695 s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
01696 s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
01697 pix1 += line_size;
01698 pix2 += line_size;
01699 }
01700 return s;
01701 }
01702
01703 static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01704 {
01705 int s, i;
01706 uint8_t *pix3 = pix2 + line_size;
01707
01708 s = 0;
01709 for(i=0;i<h;i++) {
01710 s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
01711 s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
01712 s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
01713 s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
01714 s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
01715 s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
01716 s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
01717 s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
01718 pix1 += line_size;
01719 pix2 += line_size;
01720 pix3 += line_size;
01721 }
01722 return s;
01723 }
01724
01725 static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
01726 {
01727 int s, i;
01728 uint8_t *pix3 = pix2 + line_size;
01729
01730 s = 0;
01731 for(i=0;i<h;i++) {
01732 s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
01733 s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
01734 s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
01735 s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
01736 s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
01737 s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
01738 s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
01739 s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
01740 pix1 += line_size;
01741 pix2 += line_size;
01742 pix3 += line_size;
01743 }
01744 return s;
01745 }
01746
01747 static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01748 MpegEncContext *c = v;
01749 int score1=0;
01750 int score2=0;
01751 int x,y;
01752
01753 for(y=0; y<h; y++){
01754 for(x=0; x<16; x++){
01755 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01756 }
01757 if(y+1<h){
01758 for(x=0; x<15; x++){
01759 score2+= FFABS( s1[x ] - s1[x +stride]
01760 - s1[x+1] + s1[x+1+stride])
01761 -FFABS( s2[x ] - s2[x +stride]
01762 - s2[x+1] + s2[x+1+stride]);
01763 }
01764 }
01765 s1+= stride;
01766 s2+= stride;
01767 }
01768
01769 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01770 else return score1 + FFABS(score2)*8;
01771 }
01772
01773 static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
01774 MpegEncContext *c = v;
01775 int score1=0;
01776 int score2=0;
01777 int x,y;
01778
01779 for(y=0; y<h; y++){
01780 for(x=0; x<8; x++){
01781 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
01782 }
01783 if(y+1<h){
01784 for(x=0; x<7; x++){
01785 score2+= FFABS( s1[x ] - s1[x +stride]
01786 - s1[x+1] + s1[x+1+stride])
01787 -FFABS( s2[x ] - s2[x +stride]
01788 - s2[x+1] + s2[x+1+stride]);
01789 }
01790 }
01791 s1+= stride;
01792 s2+= stride;
01793 }
01794
01795 if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
01796 else return score1 + FFABS(score2)*8;
01797 }
01798
01799 static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
01800 int i;
01801 unsigned int sum=0;
01802
01803 for(i=0; i<8*8; i++){
01804 int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
01805 int w= weight[i];
01806 b>>= RECON_SHIFT;
01807 assert(-512<b && b<512);
01808
01809 sum += (w*b)*(w*b)>>4;
01810 }
01811 return sum>>2;
01812 }
01813
01814 static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
01815 int i;
01816
01817 for(i=0; i<8*8; i++){
01818 rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
01819 }
01820 }
01821
01830 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
01831 {
01832 int i;
01833 DCTELEM temp[64];
01834
01835 if(last<=0) return;
01836
01837
01838 for(i=0; i<=last; i++){
01839 const int j= scantable[i];
01840 temp[j]= block[j];
01841 block[j]=0;
01842 }
01843
01844 for(i=0; i<=last; i++){
01845 const int j= scantable[i];
01846 const int perm_j= permutation[j];
01847 block[perm_j]= temp[j];
01848 }
01849 }
01850
01851 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
01852 return 0;
01853 }
01854
01855 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
01856 int i;
01857
01858 memset(cmp, 0, sizeof(void*)*6);
01859
01860 for(i=0; i<6; i++){
01861 switch(type&0xFF){
01862 case FF_CMP_SAD:
01863 cmp[i]= c->sad[i];
01864 break;
01865 case FF_CMP_SATD:
01866 cmp[i]= c->hadamard8_diff[i];
01867 break;
01868 case FF_CMP_SSE:
01869 cmp[i]= c->sse[i];
01870 break;
01871 case FF_CMP_DCT:
01872 cmp[i]= c->dct_sad[i];
01873 break;
01874 case FF_CMP_DCT264:
01875 cmp[i]= c->dct264_sad[i];
01876 break;
01877 case FF_CMP_DCTMAX:
01878 cmp[i]= c->dct_max[i];
01879 break;
01880 case FF_CMP_PSNR:
01881 cmp[i]= c->quant_psnr[i];
01882 break;
01883 case FF_CMP_BIT:
01884 cmp[i]= c->bit[i];
01885 break;
01886 case FF_CMP_RD:
01887 cmp[i]= c->rd[i];
01888 break;
01889 case FF_CMP_VSAD:
01890 cmp[i]= c->vsad[i];
01891 break;
01892 case FF_CMP_VSSE:
01893 cmp[i]= c->vsse[i];
01894 break;
01895 case FF_CMP_ZERO:
01896 cmp[i]= zero_cmp;
01897 break;
01898 case FF_CMP_NSSE:
01899 cmp[i]= c->nsse[i];
01900 break;
01901 #if CONFIG_DWT
01902 case FF_CMP_W53:
01903 cmp[i]= c->w53[i];
01904 break;
01905 case FF_CMP_W97:
01906 cmp[i]= c->w97[i];
01907 break;
01908 #endif
01909 default:
01910 av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
01911 }
01912 }
01913 }
01914
01915 static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
01916 long i;
01917 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01918 long a = *(long*)(src+i);
01919 long b = *(long*)(dst+i);
01920 *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
01921 }
01922 for(; i<w; i++)
01923 dst[i+0] += src[i+0];
01924 }
01925
01926 static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
01927 long i;
01928 #if !HAVE_FAST_UNALIGNED
01929 if((long)src2 & (sizeof(long)-1)){
01930 for(i=0; i+7<w; i+=8){
01931 dst[i+0] = src1[i+0]-src2[i+0];
01932 dst[i+1] = src1[i+1]-src2[i+1];
01933 dst[i+2] = src1[i+2]-src2[i+2];
01934 dst[i+3] = src1[i+3]-src2[i+3];
01935 dst[i+4] = src1[i+4]-src2[i+4];
01936 dst[i+5] = src1[i+5]-src2[i+5];
01937 dst[i+6] = src1[i+6]-src2[i+6];
01938 dst[i+7] = src1[i+7]-src2[i+7];
01939 }
01940 }else
01941 #endif
01942 for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
01943 long a = *(long*)(src1+i);
01944 long b = *(long*)(src2+i);
01945 *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
01946 }
01947 for(; i<w; i++)
01948 dst[i+0] = src1[i+0]-src2[i+0];
01949 }
01950
01951 static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
01952 int i;
01953 uint8_t l, lt;
01954
01955 l= *left;
01956 lt= *left_top;
01957
01958 for(i=0; i<w; i++){
01959 l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
01960 lt= src1[i];
01961 dst[i]= l;
01962 }
01963
01964 *left= l;
01965 *left_top= lt;
01966 }
01967
01968 static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
01969 int i;
01970 uint8_t l, lt;
01971
01972 l= *left;
01973 lt= *left_top;
01974
01975 for(i=0; i<w; i++){
01976 const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
01977 lt= src1[i];
01978 l= src2[i];
01979 dst[i]= l - pred;
01980 }
01981
01982 *left= l;
01983 *left_top= lt;
01984 }
01985
01986 static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
01987 int i;
01988
01989 for(i=0; i<w-1; i++){
01990 acc+= src[i];
01991 dst[i]= acc;
01992 i++;
01993 acc+= src[i];
01994 dst[i]= acc;
01995 }
01996
01997 for(; i<w; i++){
01998 acc+= src[i];
01999 dst[i]= acc;
02000 }
02001
02002 return acc;
02003 }
02004
02005 #if HAVE_BIGENDIAN
02006 #define B 3
02007 #define G 2
02008 #define R 1
02009 #define A 0
02010 #else
02011 #define B 0
02012 #define G 1
02013 #define R 2
02014 #define A 3
02015 #endif
02016 static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
02017 int i;
02018 int r,g,b,a;
02019 r= *red;
02020 g= *green;
02021 b= *blue;
02022 a= *alpha;
02023
02024 for(i=0; i<w; i++){
02025 b+= src[4*i+B];
02026 g+= src[4*i+G];
02027 r+= src[4*i+R];
02028 a+= src[4*i+A];
02029
02030 dst[4*i+B]= b;
02031 dst[4*i+G]= g;
02032 dst[4*i+R]= r;
02033 dst[4*i+A]= a;
02034 }
02035
02036 *red= r;
02037 *green= g;
02038 *blue= b;
02039 *alpha= a;
02040 }
02041 #undef B
02042 #undef G
02043 #undef R
02044 #undef A
02045
02046 #define BUTTERFLY2(o1,o2,i1,i2) \
02047 o1= (i1)+(i2);\
02048 o2= (i1)-(i2);
02049
02050 #define BUTTERFLY1(x,y) \
02051 {\
02052 int a,b;\
02053 a= x;\
02054 b= y;\
02055 x= a+b;\
02056 y= a-b;\
02057 }
02058
02059 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
02060
02061 static int hadamard8_diff8x8_c( void *s, uint8_t *dst, uint8_t *src, int stride, int h){
02062 int i;
02063 int temp[64];
02064 int sum=0;
02065
02066 assert(h==8);
02067
02068 for(i=0; i<8; i++){
02069
02070 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
02071 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
02072 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
02073 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
02074
02075 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02076 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02077 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02078 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02079
02080 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02081 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02082 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02083 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02084 }
02085
02086 for(i=0; i<8; i++){
02087 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02088 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02089 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02090 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02091
02092 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02093 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02094 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02095 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02096
02097 sum +=
02098 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02099 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02100 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02101 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02102 }
02103 return sum;
02104 }
02105
02106 static int hadamard8_intra8x8_c( void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
02107 int i;
02108 int temp[64];
02109 int sum=0;
02110
02111 assert(h==8);
02112
02113 for(i=0; i<8; i++){
02114
02115 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
02116 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
02117 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
02118 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
02119
02120 BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
02121 BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
02122 BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
02123 BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
02124
02125 BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
02126 BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
02127 BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
02128 BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
02129 }
02130
02131 for(i=0; i<8; i++){
02132 BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
02133 BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
02134 BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
02135 BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
02136
02137 BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
02138 BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
02139 BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
02140 BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
02141
02142 sum +=
02143 BUTTERFLYA(temp[8*0+i], temp[8*4+i])
02144 +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
02145 +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
02146 +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
02147 }
02148
02149 sum -= FFABS(temp[8*0] + temp[8*4]);
02150
02151 return sum;
02152 }
02153
02154 static int dct_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02155 MpegEncContext * const s= (MpegEncContext *)c;
02156 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02157
02158 assert(h==8);
02159
02160 s->dsp.diff_pixels(temp, src1, src2, stride);
02161 s->dsp.fdct(temp);
02162 return s->dsp.sum_abs_dctelem(temp);
02163 }
02164
02165 #if CONFIG_GPL
02166 #define DCT8_1D {\
02167 const int s07 = SRC(0) + SRC(7);\
02168 const int s16 = SRC(1) + SRC(6);\
02169 const int s25 = SRC(2) + SRC(5);\
02170 const int s34 = SRC(3) + SRC(4);\
02171 const int a0 = s07 + s34;\
02172 const int a1 = s16 + s25;\
02173 const int a2 = s07 - s34;\
02174 const int a3 = s16 - s25;\
02175 const int d07 = SRC(0) - SRC(7);\
02176 const int d16 = SRC(1) - SRC(6);\
02177 const int d25 = SRC(2) - SRC(5);\
02178 const int d34 = SRC(3) - SRC(4);\
02179 const int a4 = d16 + d25 + (d07 + (d07>>1));\
02180 const int a5 = d07 - d34 - (d25 + (d25>>1));\
02181 const int a6 = d07 + d34 - (d16 + (d16>>1));\
02182 const int a7 = d16 - d25 + (d34 + (d34>>1));\
02183 DST(0, a0 + a1 ) ;\
02184 DST(1, a4 + (a7>>2)) ;\
02185 DST(2, a2 + (a3>>1)) ;\
02186 DST(3, a5 + (a6>>2)) ;\
02187 DST(4, a0 - a1 ) ;\
02188 DST(5, a6 - (a5>>2)) ;\
02189 DST(6, (a2>>1) - a3 ) ;\
02190 DST(7, (a4>>2) - a7 ) ;\
02191 }
02192
02193 static int dct264_sad8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02194 MpegEncContext * const s= (MpegEncContext *)c;
02195 DCTELEM dct[8][8];
02196 int i;
02197 int sum=0;
02198
02199 s->dsp.diff_pixels(dct[0], src1, src2, stride);
02200
02201 #define SRC(x) dct[i][x]
02202 #define DST(x,v) dct[i][x]= v
02203 for( i = 0; i < 8; i++ )
02204 DCT8_1D
02205 #undef SRC
02206 #undef DST
02207
02208 #define SRC(x) dct[x][i]
02209 #define DST(x,v) sum += FFABS(v)
02210 for( i = 0; i < 8; i++ )
02211 DCT8_1D
02212 #undef SRC
02213 #undef DST
02214 return sum;
02215 }
02216 #endif
02217
02218 static int dct_max8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02219 MpegEncContext * const s= (MpegEncContext *)c;
02220 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02221 int sum=0, i;
02222
02223 assert(h==8);
02224
02225 s->dsp.diff_pixels(temp, src1, src2, stride);
02226 s->dsp.fdct(temp);
02227
02228 for(i=0; i<64; i++)
02229 sum= FFMAX(sum, FFABS(temp[i]));
02230
02231 return sum;
02232 }
02233
02234 static int quant_psnr8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02235 MpegEncContext * const s= (MpegEncContext *)c;
02236 LOCAL_ALIGNED_16(DCTELEM, temp, [64*2]);
02237 DCTELEM * const bak = temp+64;
02238 int sum=0, i;
02239
02240 assert(h==8);
02241 s->mb_intra=0;
02242
02243 s->dsp.diff_pixels(temp, src1, src2, stride);
02244
02245 memcpy(bak, temp, 64*sizeof(DCTELEM));
02246
02247 s->block_last_index[0]= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02248 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02249 ff_simple_idct(temp);
02250
02251 for(i=0; i<64; i++)
02252 sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
02253
02254 return sum;
02255 }
02256
02257 static int rd8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02258 MpegEncContext * const s= (MpegEncContext *)c;
02259 const uint8_t *scantable= s->intra_scantable.permutated;
02260 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02261 LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
02262 LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
02263 int i, last, run, bits, level, distortion, start_i;
02264 const int esc_length= s->ac_esc_length;
02265 uint8_t * length;
02266 uint8_t * last_length;
02267
02268 assert(h==8);
02269
02270 copy_block8(lsrc1, src1, 8, stride, 8);
02271 copy_block8(lsrc2, src2, 8, stride, 8);
02272
02273 s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
02274
02275 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02276
02277 bits=0;
02278
02279 if (s->mb_intra) {
02280 start_i = 1;
02281 length = s->intra_ac_vlc_length;
02282 last_length= s->intra_ac_vlc_last_length;
02283 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02284 } else {
02285 start_i = 0;
02286 length = s->inter_ac_vlc_length;
02287 last_length= s->inter_ac_vlc_last_length;
02288 }
02289
02290 if(last>=start_i){
02291 run=0;
02292 for(i=start_i; i<last; i++){
02293 int j= scantable[i];
02294 level= temp[j];
02295
02296 if(level){
02297 level+=64;
02298 if((level&(~127)) == 0){
02299 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02300 }else
02301 bits+= esc_length;
02302 run=0;
02303 }else
02304 run++;
02305 }
02306 i= scantable[last];
02307
02308 level= temp[i] + 64;
02309
02310 assert(level - 64);
02311
02312 if((level&(~127)) == 0){
02313 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02314 }else
02315 bits+= esc_length;
02316
02317 }
02318
02319 if(last>=0){
02320 if(s->mb_intra)
02321 s->dct_unquantize_intra(s, temp, 0, s->qscale);
02322 else
02323 s->dct_unquantize_inter(s, temp, 0, s->qscale);
02324 }
02325
02326 s->dsp.idct_add(lsrc2, 8, temp);
02327
02328 distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
02329
02330 return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
02331 }
02332
02333 static int bit8x8_c( void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
02334 MpegEncContext * const s= (MpegEncContext *)c;
02335 const uint8_t *scantable= s->intra_scantable.permutated;
02336 LOCAL_ALIGNED_16(DCTELEM, temp, [64]);
02337 int i, last, run, bits, level, start_i;
02338 const int esc_length= s->ac_esc_length;
02339 uint8_t * length;
02340 uint8_t * last_length;
02341
02342 assert(h==8);
02343
02344 s->dsp.diff_pixels(temp, src1, src2, stride);
02345
02346 s->block_last_index[0]= last= s->fast_dct_quantize(s, temp, 0, s->qscale, &i);
02347
02348 bits=0;
02349
02350 if (s->mb_intra) {
02351 start_i = 1;
02352 length = s->intra_ac_vlc_length;
02353 last_length= s->intra_ac_vlc_last_length;
02354 bits+= s->luma_dc_vlc_length[temp[0] + 256];
02355 } else {
02356 start_i = 0;
02357 length = s->inter_ac_vlc_length;
02358 last_length= s->inter_ac_vlc_last_length;
02359 }
02360
02361 if(last>=start_i){
02362 run=0;
02363 for(i=start_i; i<last; i++){
02364 int j= scantable[i];
02365 level= temp[j];
02366
02367 if(level){
02368 level+=64;
02369 if((level&(~127)) == 0){
02370 bits+= length[UNI_AC_ENC_INDEX(run, level)];
02371 }else
02372 bits+= esc_length;
02373 run=0;
02374 }else
02375 run++;
02376 }
02377 i= scantable[last];
02378
02379 level= temp[i] + 64;
02380
02381 assert(level - 64);
02382
02383 if((level&(~127)) == 0){
02384 bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
02385 }else
02386 bits+= esc_length;
02387 }
02388
02389 return bits;
02390 }
02391
02392 #define VSAD_INTRA(size) \
02393 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02394 int score=0; \
02395 int x,y; \
02396 \
02397 for(y=1; y<h; y++){ \
02398 for(x=0; x<size; x+=4){ \
02399 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
02400 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
02401 } \
02402 s+= stride; \
02403 } \
02404 \
02405 return score; \
02406 }
02407 VSAD_INTRA(8)
02408 VSAD_INTRA(16)
02409
02410 static int vsad16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02411 int score=0;
02412 int x,y;
02413
02414 for(y=1; y<h; y++){
02415 for(x=0; x<16; x++){
02416 score+= FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02417 }
02418 s1+= stride;
02419 s2+= stride;
02420 }
02421
02422 return score;
02423 }
02424
02425 #define SQ(a) ((a)*(a))
02426 #define VSSE_INTRA(size) \
02427 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
02428 int score=0; \
02429 int x,y; \
02430 \
02431 for(y=1; y<h; y++){ \
02432 for(x=0; x<size; x+=4){ \
02433 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
02434 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
02435 } \
02436 s+= stride; \
02437 } \
02438 \
02439 return score; \
02440 }
02441 VSSE_INTRA(8)
02442 VSSE_INTRA(16)
02443
02444 static int vsse16_c( void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
02445 int score=0;
02446 int x,y;
02447
02448 for(y=1; y<h; y++){
02449 for(x=0; x<16; x++){
02450 score+= SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
02451 }
02452 s1+= stride;
02453 s2+= stride;
02454 }
02455
02456 return score;
02457 }
02458
02459 static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
02460 int size){
02461 int score=0;
02462 int i;
02463 for(i=0; i<size; i++)
02464 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
02465 return score;
02466 }
02467
02468 WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
02469 WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
02470 WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
02471 #if CONFIG_GPL
02472 WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
02473 #endif
02474 WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
02475 WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
02476 WRAPPER8_16_SQ(rd8x8_c, rd16_c)
02477 WRAPPER8_16_SQ(bit8x8_c, bit16_c)
02478
02479 static void vector_fmul_c(float *dst, const float *src0, const float *src1, int len){
02480 int i;
02481 for(i=0; i<len; i++)
02482 dst[i] = src0[i] * src1[i];
02483 }
02484
02485 static void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, int len){
02486 int i;
02487 src1 += len-1;
02488 for(i=0; i<len; i++)
02489 dst[i] = src0[i] * src1[-i];
02490 }
02491
02492 static void vector_fmul_add_c(float *dst, const float *src0, const float *src1, const float *src2, int len){
02493 int i;
02494 for(i=0; i<len; i++)
02495 dst[i] = src0[i] * src1[i] + src2[i];
02496 }
02497
02498 static void vector_fmul_window_c(float *dst, const float *src0,
02499 const float *src1, const float *win, int len)
02500 {
02501 int i,j;
02502 dst += len;
02503 win += len;
02504 src0+= len;
02505 for(i=-len, j=len-1; i<0; i++, j--) {
02506 float s0 = src0[i];
02507 float s1 = src1[j];
02508 float wi = win[i];
02509 float wj = win[j];
02510 dst[i] = s0*wj - s1*wi;
02511 dst[j] = s0*wi + s1*wj;
02512 }
02513 }
02514
02515 static void vector_fmul_scalar_c(float *dst, const float *src, float mul,
02516 int len)
02517 {
02518 int i;
02519 for (i = 0; i < len; i++)
02520 dst[i] = src[i] * mul;
02521 }
02522
02523 static void vector_fmul_sv_scalar_2_c(float *dst, const float *src,
02524 const float **sv, float mul, int len)
02525 {
02526 int i;
02527 for (i = 0; i < len; i += 2, sv++) {
02528 dst[i ] = src[i ] * sv[0][0] * mul;
02529 dst[i+1] = src[i+1] * sv[0][1] * mul;
02530 }
02531 }
02532
02533 static void vector_fmul_sv_scalar_4_c(float *dst, const float *src,
02534 const float **sv, float mul, int len)
02535 {
02536 int i;
02537 for (i = 0; i < len; i += 4, sv++) {
02538 dst[i ] = src[i ] * sv[0][0] * mul;
02539 dst[i+1] = src[i+1] * sv[0][1] * mul;
02540 dst[i+2] = src[i+2] * sv[0][2] * mul;
02541 dst[i+3] = src[i+3] * sv[0][3] * mul;
02542 }
02543 }
02544
02545 static void sv_fmul_scalar_2_c(float *dst, const float **sv, float mul,
02546 int len)
02547 {
02548 int i;
02549 for (i = 0; i < len; i += 2, sv++) {
02550 dst[i ] = sv[0][0] * mul;
02551 dst[i+1] = sv[0][1] * mul;
02552 }
02553 }
02554
02555 static void sv_fmul_scalar_4_c(float *dst, const float **sv, float mul,
02556 int len)
02557 {
02558 int i;
02559 for (i = 0; i < len; i += 4, sv++) {
02560 dst[i ] = sv[0][0] * mul;
02561 dst[i+1] = sv[0][1] * mul;
02562 dst[i+2] = sv[0][2] * mul;
02563 dst[i+3] = sv[0][3] * mul;
02564 }
02565 }
02566
02567 static void butterflies_float_c(float *restrict v1, float *restrict v2,
02568 int len)
02569 {
02570 int i;
02571 for (i = 0; i < len; i++) {
02572 float t = v1[i] - v2[i];
02573 v1[i] += v2[i];
02574 v2[i] = t;
02575 }
02576 }
02577
02578 static float scalarproduct_float_c(const float *v1, const float *v2, int len)
02579 {
02580 float p = 0.0;
02581 int i;
02582
02583 for (i = 0; i < len; i++)
02584 p += v1[i] * v2[i];
02585
02586 return p;
02587 }
02588
02589 static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
02590 uint32_t maxi, uint32_t maxisign)
02591 {
02592
02593 if(a > mini) return mini;
02594 else if((a^(1U<<31)) > maxisign) return maxi;
02595 else return a;
02596 }
02597
02598 static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
02599 int i;
02600 uint32_t mini = *(uint32_t*)min;
02601 uint32_t maxi = *(uint32_t*)max;
02602 uint32_t maxisign = maxi ^ (1U<<31);
02603 uint32_t *dsti = (uint32_t*)dst;
02604 const uint32_t *srci = (const uint32_t*)src;
02605 for(i=0; i<len; i+=8) {
02606 dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
02607 dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
02608 dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
02609 dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
02610 dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
02611 dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
02612 dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
02613 dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
02614 }
02615 }
02616 static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
02617 int i;
02618 if(min < 0 && max > 0) {
02619 vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
02620 } else {
02621 for(i=0; i < len; i+=8) {
02622 dst[i ] = av_clipf(src[i ], min, max);
02623 dst[i + 1] = av_clipf(src[i + 1], min, max);
02624 dst[i + 2] = av_clipf(src[i + 2], min, max);
02625 dst[i + 3] = av_clipf(src[i + 3], min, max);
02626 dst[i + 4] = av_clipf(src[i + 4], min, max);
02627 dst[i + 5] = av_clipf(src[i + 5], min, max);
02628 dst[i + 6] = av_clipf(src[i + 6], min, max);
02629 dst[i + 7] = av_clipf(src[i + 7], min, max);
02630 }
02631 }
02632 }
02633
02634 static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
02635 {
02636 int res = 0;
02637
02638 while (order--)
02639 res += (*v1++ * *v2++) >> shift;
02640
02641 return res;
02642 }
02643
02644 static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
02645 {
02646 int res = 0;
02647 while (order--) {
02648 res += *v1 * *v2++;
02649 *v1++ += mul * *v3++;
02650 }
02651 return res;
02652 }
02653
02654 static void apply_window_int16_c(int16_t *output, const int16_t *input,
02655 const int16_t *window, unsigned int len)
02656 {
02657 int i;
02658 int len2 = len >> 1;
02659
02660 for (i = 0; i < len2; i++) {
02661 int16_t w = window[i];
02662 output[i] = (MUL16(input[i], w) + (1 << 14)) >> 15;
02663 output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
02664 }
02665 }
02666
02667 #define W0 2048
02668 #define W1 2841
02669 #define W2 2676
02670 #define W3 2408
02671 #define W4 2048
02672 #define W5 1609
02673 #define W6 1108
02674 #define W7 565
02675
02676 static void wmv2_idct_row(short * b)
02677 {
02678 int s1,s2;
02679 int a0,a1,a2,a3,a4,a5,a6,a7;
02680
02681 a1 = W1*b[1]+W7*b[7];
02682 a7 = W7*b[1]-W1*b[7];
02683 a5 = W5*b[5]+W3*b[3];
02684 a3 = W3*b[5]-W5*b[3];
02685 a2 = W2*b[2]+W6*b[6];
02686 a6 = W6*b[2]-W2*b[6];
02687 a0 = W0*b[0]+W0*b[4];
02688 a4 = W0*b[0]-W0*b[4];
02689
02690 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02691 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02692
02693 b[0] = (a0+a2+a1+a5 + (1<<7))>>8;
02694 b[1] = (a4+a6 +s1 + (1<<7))>>8;
02695 b[2] = (a4-a6 +s2 + (1<<7))>>8;
02696 b[3] = (a0-a2+a7+a3 + (1<<7))>>8;
02697 b[4] = (a0-a2-a7-a3 + (1<<7))>>8;
02698 b[5] = (a4-a6 -s2 + (1<<7))>>8;
02699 b[6] = (a4+a6 -s1 + (1<<7))>>8;
02700 b[7] = (a0+a2-a1-a5 + (1<<7))>>8;
02701 }
02702 static void wmv2_idct_col(short * b)
02703 {
02704 int s1,s2;
02705 int a0,a1,a2,a3,a4,a5,a6,a7;
02706
02707 a1 = (W1*b[8*1]+W7*b[8*7] + 4)>>3;
02708 a7 = (W7*b[8*1]-W1*b[8*7] + 4)>>3;
02709 a5 = (W5*b[8*5]+W3*b[8*3] + 4)>>3;
02710 a3 = (W3*b[8*5]-W5*b[8*3] + 4)>>3;
02711 a2 = (W2*b[8*2]+W6*b[8*6] + 4)>>3;
02712 a6 = (W6*b[8*2]-W2*b[8*6] + 4)>>3;
02713 a0 = (W0*b[8*0]+W0*b[8*4] )>>3;
02714 a4 = (W0*b[8*0]-W0*b[8*4] )>>3;
02715
02716 s1 = (181*(a1-a5+a7-a3)+128)>>8;
02717 s2 = (181*(a1-a5-a7+a3)+128)>>8;
02718
02719 b[8*0] = (a0+a2+a1+a5 + (1<<13))>>14;
02720 b[8*1] = (a4+a6 +s1 + (1<<13))>>14;
02721 b[8*2] = (a4-a6 +s2 + (1<<13))>>14;
02722 b[8*3] = (a0-a2+a7+a3 + (1<<13))>>14;
02723
02724 b[8*4] = (a0-a2-a7-a3 + (1<<13))>>14;
02725 b[8*5] = (a4-a6 -s2 + (1<<13))>>14;
02726 b[8*6] = (a4+a6 -s1 + (1<<13))>>14;
02727 b[8*7] = (a0+a2-a1-a5 + (1<<13))>>14;
02728 }
02729 void ff_wmv2_idct_c(short * block){
02730 int i;
02731
02732 for(i=0;i<64;i+=8){
02733 wmv2_idct_row(block+i);
02734 }
02735 for(i=0;i<8;i++){
02736 wmv2_idct_col(block+i);
02737 }
02738 }
02739
02740
02741 static void ff_wmv2_idct_put_c(uint8_t *dest, int line_size, DCTELEM *block)
02742 {
02743 ff_wmv2_idct_c(block);
02744 ff_put_pixels_clamped_c(block, dest, line_size);
02745 }
02746 static void ff_wmv2_idct_add_c(uint8_t *dest, int line_size, DCTELEM *block)
02747 {
02748 ff_wmv2_idct_c(block);
02749 ff_add_pixels_clamped_c(block, dest, line_size);
02750 }
02751 static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
02752 {
02753 j_rev_dct (block);
02754 ff_put_pixels_clamped_c(block, dest, line_size);
02755 }
02756 static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
02757 {
02758 j_rev_dct (block);
02759 ff_add_pixels_clamped_c(block, dest, line_size);
02760 }
02761
02762 static void ff_jref_idct4_put(uint8_t *dest, int line_size, DCTELEM *block)
02763 {
02764 j_rev_dct4 (block);
02765 put_pixels_clamped4_c(block, dest, line_size);
02766 }
02767 static void ff_jref_idct4_add(uint8_t *dest, int line_size, DCTELEM *block)
02768 {
02769 j_rev_dct4 (block);
02770 add_pixels_clamped4_c(block, dest, line_size);
02771 }
02772
02773 static void ff_jref_idct2_put(uint8_t *dest, int line_size, DCTELEM *block)
02774 {
02775 j_rev_dct2 (block);
02776 put_pixels_clamped2_c(block, dest, line_size);
02777 }
02778 static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
02779 {
02780 j_rev_dct2 (block);
02781 add_pixels_clamped2_c(block, dest, line_size);
02782 }
02783
02784 static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
02785 {
02786 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02787
02788 dest[0] = cm[(block[0] + 4)>>3];
02789 }
02790 static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
02791 {
02792 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
02793
02794 dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
02795 }
02796
02797 static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
02798
02799
02800 av_cold void dsputil_static_init(void)
02801 {
02802 int i;
02803
02804 for(i=0;i<256;i++) ff_cropTbl[i + MAX_NEG_CROP] = i;
02805 for(i=0;i<MAX_NEG_CROP;i++) {
02806 ff_cropTbl[i] = 0;
02807 ff_cropTbl[i + MAX_NEG_CROP + 256] = 255;
02808 }
02809
02810 for(i=0;i<512;i++) {
02811 ff_squareTbl[i] = (i - 256) * (i - 256);
02812 }
02813
02814 for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
02815 }
02816
02817 int ff_check_alignment(void){
02818 static int did_fail=0;
02819 DECLARE_ALIGNED(16, int, aligned);
02820
02821 if((intptr_t)&aligned & 15){
02822 if(!did_fail){
02823 #if HAVE_MMX || HAVE_ALTIVEC
02824 av_log(NULL, AV_LOG_ERROR,
02825 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
02826 "and may be very slow or crash. This is not a bug in libavcodec,\n"
02827 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
02828 "Do not report crashes to FFmpeg developers.\n");
02829 #endif
02830 did_fail=1;
02831 }
02832 return -1;
02833 }
02834 return 0;
02835 }
02836
02837 av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
02838 {
02839 int i;
02840
02841 ff_check_alignment();
02842
02843 #if CONFIG_ENCODERS
02844 if(avctx->dct_algo==FF_DCT_FASTINT) {
02845 c->fdct = fdct_ifast;
02846 c->fdct248 = fdct_ifast248;
02847 }
02848 else if(avctx->dct_algo==FF_DCT_FAAN) {
02849 c->fdct = ff_faandct;
02850 c->fdct248 = ff_faandct248;
02851 }
02852 else {
02853 c->fdct = ff_jpeg_fdct_islow;
02854 c->fdct248 = ff_fdct248_islow;
02855 }
02856 #endif //CONFIG_ENCODERS
02857
02858 if(avctx->lowres==1){
02859 if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO || !CONFIG_H264_DECODER){
02860 c->idct_put= ff_jref_idct4_put;
02861 c->idct_add= ff_jref_idct4_add;
02862 }else{
02863 if (avctx->codec_id != CODEC_ID_H264) {
02864 c->idct_put= ff_h264_lowres_idct_put_8_c;
02865 c->idct_add= ff_h264_lowres_idct_add_8_c;
02866 } else {
02867 switch (avctx->bits_per_raw_sample) {
02868 case 9:
02869 c->idct_put= ff_h264_lowres_idct_put_9_c;
02870 c->idct_add= ff_h264_lowres_idct_add_9_c;
02871 break;
02872 case 10:
02873 c->idct_put= ff_h264_lowres_idct_put_10_c;
02874 c->idct_add= ff_h264_lowres_idct_add_10_c;
02875 break;
02876 default:
02877 c->idct_put= ff_h264_lowres_idct_put_8_c;
02878 c->idct_add= ff_h264_lowres_idct_add_8_c;
02879 }
02880 }
02881 }
02882 c->idct = j_rev_dct4;
02883 c->idct_permutation_type= FF_NO_IDCT_PERM;
02884 }else if(avctx->lowres==2){
02885 c->idct_put= ff_jref_idct2_put;
02886 c->idct_add= ff_jref_idct2_add;
02887 c->idct = j_rev_dct2;
02888 c->idct_permutation_type= FF_NO_IDCT_PERM;
02889 }else if(avctx->lowres==3){
02890 c->idct_put= ff_jref_idct1_put;
02891 c->idct_add= ff_jref_idct1_add;
02892 c->idct = j_rev_dct1;
02893 c->idct_permutation_type= FF_NO_IDCT_PERM;
02894 }else{
02895 if(avctx->idct_algo==FF_IDCT_INT){
02896 c->idct_put= ff_jref_idct_put;
02897 c->idct_add= ff_jref_idct_add;
02898 c->idct = j_rev_dct;
02899 c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
02900 }else if((CONFIG_VP3_DECODER || CONFIG_VP5_DECODER || CONFIG_VP6_DECODER ) &&
02901 avctx->idct_algo==FF_IDCT_VP3){
02902 c->idct_put= ff_vp3_idct_put_c;
02903 c->idct_add= ff_vp3_idct_add_c;
02904 c->idct = ff_vp3_idct_c;
02905 c->idct_permutation_type= FF_NO_IDCT_PERM;
02906 }else if(avctx->idct_algo==FF_IDCT_WMV2){
02907 c->idct_put= ff_wmv2_idct_put_c;
02908 c->idct_add= ff_wmv2_idct_add_c;
02909 c->idct = ff_wmv2_idct_c;
02910 c->idct_permutation_type= FF_NO_IDCT_PERM;
02911 }else if(avctx->idct_algo==FF_IDCT_FAAN){
02912 c->idct_put= ff_faanidct_put;
02913 c->idct_add= ff_faanidct_add;
02914 c->idct = ff_faanidct;
02915 c->idct_permutation_type= FF_NO_IDCT_PERM;
02916 }else if(CONFIG_EATGQ_DECODER && avctx->idct_algo==FF_IDCT_EA) {
02917 c->idct_put= ff_ea_idct_put_c;
02918 c->idct_permutation_type= FF_NO_IDCT_PERM;
02919 }else if(CONFIG_BINK_DECODER && avctx->idct_algo==FF_IDCT_BINK) {
02920 c->idct = ff_bink_idct_c;
02921 c->idct_add = ff_bink_idct_add_c;
02922 c->idct_put = ff_bink_idct_put_c;
02923 c->idct_permutation_type = FF_NO_IDCT_PERM;
02924 }else{
02925 c->idct_put= ff_simple_idct_put;
02926 c->idct_add= ff_simple_idct_add;
02927 c->idct = ff_simple_idct;
02928 c->idct_permutation_type= FF_NO_IDCT_PERM;
02929 }
02930 }
02931
02932 c->get_pixels = get_pixels_c;
02933 c->diff_pixels = diff_pixels_c;
02934 c->put_pixels_clamped = ff_put_pixels_clamped_c;
02935 c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_c;
02936 c->put_pixels_nonclamped = put_pixels_nonclamped_c;
02937 c->add_pixels_clamped = ff_add_pixels_clamped_c;
02938 c->sum_abs_dctelem = sum_abs_dctelem_c;
02939 c->gmc1 = gmc1_c;
02940 c->gmc = ff_gmc_c;
02941 c->pix_sum = pix_sum_c;
02942 c->pix_norm1 = pix_norm1_c;
02943
02944 c->fill_block_tab[0] = fill_block16_c;
02945 c->fill_block_tab[1] = fill_block8_c;
02946 c->scale_block = scale_block_c;
02947
02948
02949 c->pix_abs[0][0] = pix_abs16_c;
02950 c->pix_abs[0][1] = pix_abs16_x2_c;
02951 c->pix_abs[0][2] = pix_abs16_y2_c;
02952 c->pix_abs[0][3] = pix_abs16_xy2_c;
02953 c->pix_abs[1][0] = pix_abs8_c;
02954 c->pix_abs[1][1] = pix_abs8_x2_c;
02955 c->pix_abs[1][2] = pix_abs8_y2_c;
02956 c->pix_abs[1][3] = pix_abs8_xy2_c;
02957
02958 c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
02959 c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
02960 c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
02961 c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
02962 c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
02963 c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
02964 c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
02965 c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
02966 c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
02967
02968 c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
02969 c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
02970 c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
02971 c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
02972 c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
02973 c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
02974 c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
02975 c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
02976 c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
02977
02978 #define dspfunc(PFX, IDX, NUM) \
02979 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
02980 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
02981 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
02982 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
02983 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
02984 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
02985 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
02986 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
02987 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
02988 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
02989 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
02990 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
02991 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
02992 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
02993 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
02994 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
02995
02996 dspfunc(put_qpel, 0, 16);
02997 dspfunc(put_no_rnd_qpel, 0, 16);
02998
02999 dspfunc(avg_qpel, 0, 16);
03000
03001
03002 dspfunc(put_qpel, 1, 8);
03003 dspfunc(put_no_rnd_qpel, 1, 8);
03004
03005 dspfunc(avg_qpel, 1, 8);
03006
03007
03008 #undef dspfunc
03009
03010 #if CONFIG_MLP_DECODER || CONFIG_TRUEHD_DECODER
03011 ff_mlp_init(c, avctx);
03012 #endif
03013 #if CONFIG_WMV2_DECODER || CONFIG_VC1_DECODER
03014 ff_intrax8dsp_init(c,avctx);
03015 #endif
03016 #if CONFIG_RV30_DECODER
03017 ff_rv30dsp_init(c,avctx);
03018 #endif
03019 #if CONFIG_RV40_DECODER
03020 ff_rv40dsp_init(c,avctx);
03021 c->put_rv40_qpel_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
03022 c->avg_rv40_qpel_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
03023 c->put_rv40_qpel_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
03024 c->avg_rv40_qpel_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
03025 #endif
03026
03027 c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
03028 c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
03029 c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
03030 c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
03031 c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
03032 c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
03033 c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
03034 c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
03035
03036 #define SET_CMP_FUNC(name) \
03037 c->name[0]= name ## 16_c;\
03038 c->name[1]= name ## 8x8_c;
03039
03040 SET_CMP_FUNC(hadamard8_diff)
03041 c->hadamard8_diff[4]= hadamard8_intra16_c;
03042 c->hadamard8_diff[5]= hadamard8_intra8x8_c;
03043 SET_CMP_FUNC(dct_sad)
03044 SET_CMP_FUNC(dct_max)
03045 #if CONFIG_GPL
03046 SET_CMP_FUNC(dct264_sad)
03047 #endif
03048 c->sad[0]= pix_abs16_c;
03049 c->sad[1]= pix_abs8_c;
03050 c->sse[0]= sse16_c;
03051 c->sse[1]= sse8_c;
03052 c->sse[2]= sse4_c;
03053 SET_CMP_FUNC(quant_psnr)
03054 SET_CMP_FUNC(rd)
03055 SET_CMP_FUNC(bit)
03056 c->vsad[0]= vsad16_c;
03057 c->vsad[4]= vsad_intra16_c;
03058 c->vsad[5]= vsad_intra8_c;
03059 c->vsse[0]= vsse16_c;
03060 c->vsse[4]= vsse_intra16_c;
03061 c->vsse[5]= vsse_intra8_c;
03062 c->nsse[0]= nsse16_c;
03063 c->nsse[1]= nsse8_c;
03064 #if CONFIG_DWT
03065 ff_dsputil_init_dwt(c);
03066 #endif
03067
03068 c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
03069
03070 c->add_bytes= add_bytes_c;
03071 c->diff_bytes= diff_bytes_c;
03072 c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
03073 c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
03074 c->add_hfyu_left_prediction = add_hfyu_left_prediction_c;
03075 c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
03076 c->bswap_buf= bswap_buf;
03077 c->bswap16_buf = bswap16_buf;
03078
03079 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
03080 c->h263_h_loop_filter= h263_h_loop_filter_c;
03081 c->h263_v_loop_filter= h263_v_loop_filter_c;
03082 }
03083
03084 if (CONFIG_VP3_DECODER) {
03085 c->vp3_h_loop_filter= ff_vp3_h_loop_filter_c;
03086 c->vp3_v_loop_filter= ff_vp3_v_loop_filter_c;
03087 c->vp3_idct_dc_add= ff_vp3_idct_dc_add_c;
03088 }
03089
03090 c->h261_loop_filter= h261_loop_filter_c;
03091
03092 c->try_8x8basis= try_8x8basis_c;
03093 c->add_8x8basis= add_8x8basis_c;
03094
03095 #if CONFIG_VORBIS_DECODER
03096 c->vorbis_inverse_coupling = vorbis_inverse_coupling;
03097 #endif
03098 #if CONFIG_AC3_DECODER
03099 c->ac3_downmix = ff_ac3_downmix_c;
03100 #endif
03101 c->vector_fmul = vector_fmul_c;
03102 c->vector_fmul_reverse = vector_fmul_reverse_c;
03103 c->vector_fmul_add = vector_fmul_add_c;
03104 c->vector_fmul_window = vector_fmul_window_c;
03105 c->vector_clipf = vector_clipf_c;
03106 c->scalarproduct_int16 = scalarproduct_int16_c;
03107 c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
03108 c->apply_window_int16 = apply_window_int16_c;
03109 c->scalarproduct_float = scalarproduct_float_c;
03110 c->butterflies_float = butterflies_float_c;
03111 c->vector_fmul_scalar = vector_fmul_scalar_c;
03112
03113 c->vector_fmul_sv_scalar[0] = vector_fmul_sv_scalar_2_c;
03114 c->vector_fmul_sv_scalar[1] = vector_fmul_sv_scalar_4_c;
03115
03116 c->sv_fmul_scalar[0] = sv_fmul_scalar_2_c;
03117 c->sv_fmul_scalar[1] = sv_fmul_scalar_4_c;
03118
03119 c->shrink[0]= av_image_copy_plane;
03120 c->shrink[1]= ff_shrink22;
03121 c->shrink[2]= ff_shrink44;
03122 c->shrink[3]= ff_shrink88;
03123
03124 c->prefetch= just_return;
03125
03126 memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab));
03127 memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab));
03128
03129 #undef FUNC
03130 #undef FUNCC
03131 #define FUNC(f, depth) f ## _ ## depth
03132 #define FUNCC(f, depth) f ## _ ## depth ## _c
03133
03134 #define dspfunc1(PFX, IDX, NUM, depth)\
03135 c->PFX ## _pixels_tab[IDX][0] = FUNCC(PFX ## _pixels ## NUM , depth);\
03136 c->PFX ## _pixels_tab[IDX][1] = FUNCC(PFX ## _pixels ## NUM ## _x2 , depth);\
03137 c->PFX ## _pixels_tab[IDX][2] = FUNCC(PFX ## _pixels ## NUM ## _y2 , depth);\
03138 c->PFX ## _pixels_tab[IDX][3] = FUNCC(PFX ## _pixels ## NUM ## _xy2, depth)
03139
03140 #define dspfunc2(PFX, IDX, NUM, depth)\
03141 c->PFX ## _pixels_tab[IDX][ 0] = FUNCC(PFX ## NUM ## _mc00, depth);\
03142 c->PFX ## _pixels_tab[IDX][ 1] = FUNCC(PFX ## NUM ## _mc10, depth);\
03143 c->PFX ## _pixels_tab[IDX][ 2] = FUNCC(PFX ## NUM ## _mc20, depth);\
03144 c->PFX ## _pixels_tab[IDX][ 3] = FUNCC(PFX ## NUM ## _mc30, depth);\
03145 c->PFX ## _pixels_tab[IDX][ 4] = FUNCC(PFX ## NUM ## _mc01, depth);\
03146 c->PFX ## _pixels_tab[IDX][ 5] = FUNCC(PFX ## NUM ## _mc11, depth);\
03147 c->PFX ## _pixels_tab[IDX][ 6] = FUNCC(PFX ## NUM ## _mc21, depth);\
03148 c->PFX ## _pixels_tab[IDX][ 7] = FUNCC(PFX ## NUM ## _mc31, depth);\
03149 c->PFX ## _pixels_tab[IDX][ 8] = FUNCC(PFX ## NUM ## _mc02, depth);\
03150 c->PFX ## _pixels_tab[IDX][ 9] = FUNCC(PFX ## NUM ## _mc12, depth);\
03151 c->PFX ## _pixels_tab[IDX][10] = FUNCC(PFX ## NUM ## _mc22, depth);\
03152 c->PFX ## _pixels_tab[IDX][11] = FUNCC(PFX ## NUM ## _mc32, depth);\
03153 c->PFX ## _pixels_tab[IDX][12] = FUNCC(PFX ## NUM ## _mc03, depth);\
03154 c->PFX ## _pixels_tab[IDX][13] = FUNCC(PFX ## NUM ## _mc13, depth);\
03155 c->PFX ## _pixels_tab[IDX][14] = FUNCC(PFX ## NUM ## _mc23, depth);\
03156 c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
03157
03158
03159 #define BIT_DEPTH_FUNCS(depth)\
03160 c->draw_edges = FUNCC(draw_edges , depth);\
03161 c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
03162 c->clear_block = FUNCC(clear_block , depth);\
03163 c->clear_blocks = FUNCC(clear_blocks , depth);\
03164 c->add_pixels8 = FUNCC(add_pixels8 , depth);\
03165 c->add_pixels4 = FUNCC(add_pixels4 , depth);\
03166 c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
03167 c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
03168 \
03169 c->put_h264_chroma_pixels_tab[0] = FUNCC(put_h264_chroma_mc8 , depth);\
03170 c->put_h264_chroma_pixels_tab[1] = FUNCC(put_h264_chroma_mc4 , depth);\
03171 c->put_h264_chroma_pixels_tab[2] = FUNCC(put_h264_chroma_mc2 , depth);\
03172 c->avg_h264_chroma_pixels_tab[0] = FUNCC(avg_h264_chroma_mc8 , depth);\
03173 c->avg_h264_chroma_pixels_tab[1] = FUNCC(avg_h264_chroma_mc4 , depth);\
03174 c->avg_h264_chroma_pixels_tab[2] = FUNCC(avg_h264_chroma_mc2 , depth);\
03175 \
03176 dspfunc1(put , 0, 16, depth);\
03177 dspfunc1(put , 1, 8, depth);\
03178 dspfunc1(put , 2, 4, depth);\
03179 dspfunc1(put , 3, 2, depth);\
03180 dspfunc1(put_no_rnd, 0, 16, depth);\
03181 dspfunc1(put_no_rnd, 1, 8, depth);\
03182 dspfunc1(avg , 0, 16, depth);\
03183 dspfunc1(avg , 1, 8, depth);\
03184 dspfunc1(avg , 2, 4, depth);\
03185 dspfunc1(avg , 3, 2, depth);\
03186 dspfunc1(avg_no_rnd, 0, 16, depth);\
03187 dspfunc1(avg_no_rnd, 1, 8, depth);\
03188 \
03189 dspfunc2(put_h264_qpel, 0, 16, depth);\
03190 dspfunc2(put_h264_qpel, 1, 8, depth);\
03191 dspfunc2(put_h264_qpel, 2, 4, depth);\
03192 dspfunc2(put_h264_qpel, 3, 2, depth);\
03193 dspfunc2(avg_h264_qpel, 0, 16, depth);\
03194 dspfunc2(avg_h264_qpel, 1, 8, depth);\
03195 dspfunc2(avg_h264_qpel, 2, 4, depth);
03196
03197 if (avctx->codec_id != CODEC_ID_H264 || avctx->bits_per_raw_sample == 8) {
03198 BIT_DEPTH_FUNCS(8)
03199 } else {
03200 switch (avctx->bits_per_raw_sample) {
03201 case 9:
03202 BIT_DEPTH_FUNCS(9)
03203 break;
03204 case 10:
03205 BIT_DEPTH_FUNCS(10)
03206 break;
03207 default:
03208 av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
03209 BIT_DEPTH_FUNCS(8)
03210 break;
03211 }
03212 }
03213
03214
03215 if (HAVE_MMX) dsputil_init_mmx (c, avctx);
03216 if (ARCH_ARM) dsputil_init_arm (c, avctx);
03217 if (CONFIG_MLIB) dsputil_init_mlib (c, avctx);
03218 if (HAVE_VIS) dsputil_init_vis (c, avctx);
03219 if (ARCH_ALPHA) dsputil_init_alpha (c, avctx);
03220 if (ARCH_PPC) dsputil_init_ppc (c, avctx);
03221 if (HAVE_MMI) dsputil_init_mmi (c, avctx);
03222 if (ARCH_SH4) dsputil_init_sh4 (c, avctx);
03223 if (ARCH_BFIN) dsputil_init_bfin (c, avctx);
03224
03225 for(i=0; i<64; i++){
03226 if(!c->put_2tap_qpel_pixels_tab[0][i])
03227 c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i];
03228 if(!c->avg_2tap_qpel_pixels_tab[0][i])
03229 c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
03230 }
03231
03232 c->put_rv30_tpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
03233 c->put_rv30_tpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
03234 c->avg_rv30_tpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
03235 c->avg_rv30_tpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
03236
03237 c->put_rv40_qpel_pixels_tab[0][0] = c->put_h264_qpel_pixels_tab[0][0];
03238 c->put_rv40_qpel_pixels_tab[1][0] = c->put_h264_qpel_pixels_tab[1][0];
03239 c->avg_rv40_qpel_pixels_tab[0][0] = c->avg_h264_qpel_pixels_tab[0][0];
03240 c->avg_rv40_qpel_pixels_tab[1][0] = c->avg_h264_qpel_pixels_tab[1][0];
03241
03242 switch(c->idct_permutation_type){
03243 case FF_NO_IDCT_PERM:
03244 for(i=0; i<64; i++)
03245 c->idct_permutation[i]= i;
03246 break;
03247 case FF_LIBMPEG2_IDCT_PERM:
03248 for(i=0; i<64; i++)
03249 c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
03250 break;
03251 case FF_SIMPLE_IDCT_PERM:
03252 for(i=0; i<64; i++)
03253 c->idct_permutation[i]= simple_mmx_permutation[i];
03254 break;
03255 case FF_TRANSPOSE_IDCT_PERM:
03256 for(i=0; i<64; i++)
03257 c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
03258 break;
03259 case FF_PARTTRANS_IDCT_PERM:
03260 for(i=0; i<64; i++)
03261 c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
03262 break;
03263 case FF_SSE2_IDCT_PERM:
03264 for(i=0; i<64; i++)
03265 c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
03266 break;
03267 default:
03268 av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
03269 }
03270 }
03271