56 #define pb_7f (~0UL/255 * 0x7f)
57 #define pb_80 (~0UL/255 * 0x80)
62 0, 8, 1, 9, 16, 24, 2, 10,
63 17, 25, 32, 40, 48, 56, 33, 41,
64 18, 26, 3, 11, 4, 12, 19, 27,
65 34, 42, 49, 57, 50, 58, 35, 43,
66 20, 28, 5, 13, 6, 14, 21, 29,
67 36, 44, 51, 59, 52, 60, 37, 45,
68 22, 30, 7, 15, 23, 31, 38, 46,
69 53, 61, 54, 62, 39, 47, 55, 63,
73 0, 1, 2, 3, 8, 9, 16, 17,
74 10, 11, 4, 5, 6, 7, 15, 14,
75 13, 12, 19, 18, 24, 25, 32, 33,
76 26, 27, 20, 21, 22, 23, 28, 29,
77 30, 31, 34, 35, 40, 41, 48, 49,
78 42, 43, 36, 37, 38, 39, 44, 45,
79 46, 47, 50, 51, 56, 57, 58, 59,
80 52, 53, 54, 55, 60, 61, 62, 63,
84 0, 8, 16, 24, 1, 9, 2, 10,
85 17, 25, 32, 40, 48, 56, 57, 49,
86 41, 33, 26, 18, 3, 11, 4, 12,
87 19, 27, 34, 42, 50, 58, 35, 43,
88 51, 59, 20, 28, 5, 13, 6, 14,
89 21, 29, 36, 44, 52, 60, 37, 45,
90 53, 61, 22, 30, 7, 15, 23, 31,
91 38, 46, 54, 62, 39, 47, 55, 63,
96 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
97 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
98 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
99 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
100 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
101 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
102 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
103 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
118 j = src_scantable[i];
132 int idct_permutation_type)
136 switch(idct_permutation_type){
139 idct_permutation[i]= i;
143 idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
151 idct_permutation[i]= ((i&7)<<3) | (i>>3);
155 idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
171 for (i = 0; i < 16; i++) {
172 for (j = 0; j < 16; j += 8) {
183 pix += line_size - 16;
194 for (i = 0; i < 16; i++) {
195 for (j = 0; j < 16; j += 8) {
207 register uint64_t x=*(uint64_t*)pix;
209 s += sq[(x>>8)&0xff];
210 s += sq[(x>>16)&0xff];
211 s += sq[(x>>24)&0xff];
212 s += sq[(x>>32)&0xff];
213 s += sq[(x>>40)&0xff];
214 s += sq[(x>>48)&0xff];
215 s += sq[(x>>56)&0xff];
217 register uint32_t x=*(uint32_t*)pix;
219 s += sq[(x>>8)&0xff];
220 s += sq[(x>>16)&0xff];
221 s += sq[(x>>24)&0xff];
222 x=*(uint32_t*)(pix+4);
224 s += sq[(x>>8)&0xff];
225 s += sq[(x>>16)&0xff];
226 s += sq[(x>>24)&0xff];
231 pix += line_size - 16;
239 for(i=0; i+8<=w; i+=8){
266 for (i = 0; i < h; i++) {
267 s += sq[pix1[0] - pix2[0]];
268 s += sq[pix1[1] - pix2[1]];
269 s += sq[pix1[2] - pix2[2]];
270 s += sq[pix1[3] - pix2[3]];
283 for (i = 0; i < h; i++) {
284 s += sq[pix1[0] - pix2[0]];
285 s += sq[pix1[1] - pix2[1]];
286 s += sq[pix1[2] - pix2[2]];
287 s += sq[pix1[3] - pix2[3]];
288 s += sq[pix1[4] - pix2[4]];
289 s += sq[pix1[5] - pix2[5]];
290 s += sq[pix1[6] - pix2[6]];
291 s += sq[pix1[7] - pix2[7]];
304 for (i = 0; i < h; i++) {
305 s += sq[pix1[ 0] - pix2[ 0]];
306 s += sq[pix1[ 1] - pix2[ 1]];
307 s += sq[pix1[ 2] - pix2[ 2]];
308 s += sq[pix1[ 3] - pix2[ 3]];
309 s += sq[pix1[ 4] - pix2[ 4]];
310 s += sq[pix1[ 5] - pix2[ 5]];
311 s += sq[pix1[ 6] - pix2[ 6]];
312 s += sq[pix1[ 7] - pix2[ 7]];
313 s += sq[pix1[ 8] - pix2[ 8]];
314 s += sq[pix1[ 9] - pix2[ 9]];
315 s += sq[pix1[10] - pix2[10]];
316 s += sq[pix1[11] - pix2[11]];
317 s += sq[pix1[12] - pix2[12]];
318 s += sq[pix1[13] - pix2[13]];
319 s += sq[pix1[14] - pix2[14]];
320 s += sq[pix1[15] - pix2[15]];
334 block[0] = s1[0] - s2[0];
335 block[1] = s1[1] - s2[1];
336 block[2] = s1[2] - s2[2];
337 block[3] = s1[3] - s2[3];
338 block[4] = s1[4] - s2[4];
339 block[5] = s1[5] - s2[5];
340 block[6] = s1[6] - s2[6];
341 block[7] = s1[7] - s2[7];
355 pixels[0] = av_clip_uint8(block[0]);
356 pixels[1] = av_clip_uint8(block[1]);
357 pixels[2] = av_clip_uint8(block[2]);
358 pixels[3] = av_clip_uint8(block[3]);
359 pixels[4] = av_clip_uint8(block[4]);
360 pixels[5] = av_clip_uint8(block[5]);
361 pixels[6] = av_clip_uint8(block[6]);
362 pixels[7] = av_clip_uint8(block[7]);
376 pixels[0] = av_clip_uint8(block[0]);
377 pixels[1] = av_clip_uint8(block[1]);
378 pixels[2] = av_clip_uint8(block[2]);
379 pixels[3] = av_clip_uint8(block[3]);
393 pixels[0] = av_clip_uint8(block[0]);
394 pixels[1] = av_clip_uint8(block[1]);
407 for (i = 0; i < 8; i++) {
408 for (j = 0; j < 8; j++) {
411 else if (*block > 127)
414 *pixels = (
uint8_t)(*block + 128);
418 pixels += (line_size - 8);
429 pixels[0] += block[0];
430 pixels[1] += block[1];
431 pixels[2] += block[2];
432 pixels[3] += block[3];
433 pixels[4] += block[4];
434 pixels[5] += block[5];
435 pixels[6] += block[6];
436 pixels[7] += block[7];
449 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
450 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
451 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
452 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
453 pixels[4] = av_clip_uint8(pixels[4] + block[4]);
454 pixels[5] = av_clip_uint8(pixels[5] + block[5]);
455 pixels[6] = av_clip_uint8(pixels[6] + block[6]);
456 pixels[7] = av_clip_uint8(pixels[7] + block[7]);
469 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
470 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
471 pixels[2] = av_clip_uint8(pixels[2] + block[2]);
472 pixels[3] = av_clip_uint8(pixels[3] + block[3]);
485 pixels[0] = av_clip_uint8(pixels[0] + block[0]);
486 pixels[1] = av_clip_uint8(pixels[1] + block[1]);
496 sum+=
FFABS(block[i]);
504 for (i = 0; i < h; i++) {
505 memset(block, value, 16);
514 for (i = 0; i < h; i++) {
515 memset(block, value, 8);
520 #define avg2(a,b) ((a+b+1)>>1)
521 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
525 const int A=(16-x16)*(16-y16);
526 const int B=( x16)*(16-y16);
527 const int C=(16-x16)*( y16);
528 const int D=( x16)*( y16);
533 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] +
rounder)>>8;
534 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] +
rounder)>>8;
535 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] +
rounder)>>8;
536 dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] +
rounder)>>8;
537 dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] +
rounder)>>8;
538 dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] +
rounder)>>8;
539 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] +
rounder)>>8;
540 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] +
rounder)>>8;
561 int src_x, src_y, frac_x, frac_y,
index;
570 if((
unsigned)src_x <
width){
571 if((
unsigned)src_y <
height){
572 index= src_x + src_y*stride;
573 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
574 + src[index +1]* frac_x )*(s-frac_y)
575 + ( src[index+stride ]*(s-frac_x)
576 + src[index+stride+1]* frac_x )* frac_y
579 index= src_x + av_clip(src_y, 0, height)*stride;
580 dst[y*stride + x]= ( ( src[
index ]*(s-frac_x)
581 + src[index +1]* frac_x )*s
585 if((
unsigned)src_y <
height){
586 index= av_clip(src_x, 0, width) + src_y*stride;
587 dst[y*stride + x]= ( ( src[
index ]*(s-frac_y)
588 + src[index+stride ]* frac_y )*s
591 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
592 dst[y*stride + x]= src[
index ];
606 case 2: put_pixels2_8_c (dst, src, stride, height);
break;
607 case 4: put_pixels4_8_c (dst, src, stride, height);
break;
608 case 8: put_pixels8_8_c (dst, src, stride, height);
break;
609 case 16:put_pixels16_8_c(dst, src, stride, height);
break;
615 for (i=0; i <
height; i++) {
616 for (j=0; j <
width; j++) {
617 dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
626 for (i=0; i <
height; i++) {
627 for (j=0; j <
width; j++) {
628 dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
637 for (i=0; i <
height; i++) {
638 for (j=0; j <
width; j++) {
639 dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
648 for (i=0; i <
height; i++) {
649 for (j=0; j <
width; j++) {
650 dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
659 for (i=0; i <
height; i++) {
660 for (j=0; j <
width; j++) {
661 dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
670 for (i=0; i <
height; i++) {
671 for (j=0; j <
width; j++) {
672 dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
681 for (i=0; i <
height; i++) {
682 for (j=0; j <
width; j++) {
683 dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
692 for (i=0; i <
height; i++) {
693 for (j=0; j <
width; j++) {
694 dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
703 case 2: avg_pixels2_8_c (dst, src, stride, height);
break;
704 case 4: avg_pixels4_8_c (dst, src, stride, height);
break;
705 case 8: avg_pixels8_8_c (dst, src, stride, height);
break;
706 case 16:avg_pixels16_8_c(dst, src, stride, height);
break;
712 for (i=0; i <
height; i++) {
713 for (j=0; j <
width; j++) {
714 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
723 for (i=0; i <
height; i++) {
724 for (j=0; j <
width; j++) {
725 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
734 for (i=0; i <
height; i++) {
735 for (j=0; j <
width; j++) {
736 dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
745 for (i=0; i <
height; i++) {
746 for (j=0; j <
width; j++) {
747 dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
756 for (i=0; i <
height; i++) {
757 for (j=0; j <
width; j++) {
758 dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
767 for (i=0; i <
height; i++) {
768 for (j=0; j <
width; j++) {
769 dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
778 for (i=0; i <
height; i++) {
779 for (j=0; j <
width; j++) {
780 dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
789 for (i=0; i <
height; i++) {
790 for (j=0; j <
width; j++) {
791 dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
798 #define QPEL_MC(r, OPNAME, RND, OP) \
799 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
800 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
804 OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
805 OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
806 OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
807 OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
808 OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
809 OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
810 OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
811 OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
817 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
819 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
823 const int src0= src[0*srcStride];\
824 const int src1= src[1*srcStride];\
825 const int src2= src[2*srcStride];\
826 const int src3= src[3*srcStride];\
827 const int src4= src[4*srcStride];\
828 const int src5= src[5*srcStride];\
829 const int src6= src[6*srcStride];\
830 const int src7= src[7*srcStride];\
831 const int src8= src[8*srcStride];\
832 OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
833 OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
834 OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
835 OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
836 OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
837 OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
838 OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
839 OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
845 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
846 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
851 OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
852 OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
853 OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
854 OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
855 OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
856 OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
857 OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
858 OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
859 OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
860 OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
861 OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
862 OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
863 OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
864 OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
865 OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
866 OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
872 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
873 const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
878 const int src0= src[0*srcStride];\
879 const int src1= src[1*srcStride];\
880 const int src2= src[2*srcStride];\
881 const int src3= src[3*srcStride];\
882 const int src4= src[4*srcStride];\
883 const int src5= src[5*srcStride];\
884 const int src6= src[6*srcStride];\
885 const int src7= src[7*srcStride];\
886 const int src8= src[8*srcStride];\
887 const int src9= src[9*srcStride];\
888 const int src10= src[10*srcStride];\
889 const int src11= src[11*srcStride];\
890 const int src12= src[12*srcStride];\
891 const int src13= src[13*srcStride];\
892 const int src14= src[14*srcStride];\
893 const int src15= src[15*srcStride];\
894 const int src16= src[16*srcStride];\
895 OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
896 OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
897 OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
898 OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
899 OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
900 OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
901 OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
902 OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
903 OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
904 OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
905 OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
906 OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
907 OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
908 OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
909 OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
910 OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
916 static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
919 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
920 OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
923 static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
925 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
928 static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
931 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
932 OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
935 static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
939 copy_block9(full, src, 16, stride, 9);\
940 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
941 OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
944 static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
947 copy_block9(full, src, 16, stride, 9);\
948 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
951 static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
955 copy_block9(full, src, 16, stride, 9);\
956 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
957 OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
959 void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
965 copy_block9(full, src, 16, stride, 9);\
966 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
967 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
968 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
969 OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
971 static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
976 copy_block9(full, src, 16, stride, 9);\
977 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
978 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
979 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
980 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
982 void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
988 copy_block9(full, src, 16, stride, 9);\
989 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
990 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
991 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
992 OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
994 static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
999 copy_block9(full, src, 16, stride, 9);\
1000 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1001 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1002 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1003 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1005 void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1007 uint8_t full[16*9];\
1010 uint8_t halfHV[64];\
1011 copy_block9(full, src, 16, stride, 9);\
1012 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1013 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1014 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1015 OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1017 static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1019 uint8_t full[16*9];\
1021 uint8_t halfHV[64];\
1022 copy_block9(full, src, 16, stride, 9);\
1023 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1024 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1025 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1026 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1028 void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1030 uint8_t full[16*9];\
1033 uint8_t halfHV[64];\
1034 copy_block9(full, src, 16, stride, 9);\
1035 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
1036 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1037 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1038 OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1040 static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1042 uint8_t full[16*9];\
1044 uint8_t halfHV[64];\
1045 copy_block9(full, src, 16, stride, 9);\
1046 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1047 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1048 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1049 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1051 static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1054 uint8_t halfHV[64];\
1055 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1056 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1057 OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1059 static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1062 uint8_t halfHV[64];\
1063 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1064 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1065 OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1067 void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1069 uint8_t full[16*9];\
1072 uint8_t halfHV[64];\
1073 copy_block9(full, src, 16, stride, 9);\
1074 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1075 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1076 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1077 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1079 static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1081 uint8_t full[16*9];\
1083 copy_block9(full, src, 16, stride, 9);\
1084 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1085 put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1086 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1088 void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1090 uint8_t full[16*9];\
1093 uint8_t halfHV[64];\
1094 copy_block9(full, src, 16, stride, 9);\
1095 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1096 put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1097 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1098 OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1100 static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1102 uint8_t full[16*9];\
1104 copy_block9(full, src, 16, stride, 9);\
1105 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1106 put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1107 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1109 static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1112 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1113 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1116 static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1119 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1120 OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1123 static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1125 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1128 static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1131 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1132 OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1135 static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1137 uint8_t full[24*17];\
1139 copy_block17(full, src, 24, stride, 17);\
1140 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1141 OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1144 static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1146 uint8_t full[24*17];\
1147 copy_block17(full, src, 24, stride, 17);\
1148 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1151 static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1153 uint8_t full[24*17];\
1155 copy_block17(full, src, 24, stride, 17);\
1156 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1157 OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1159 void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1161 uint8_t full[24*17];\
1162 uint8_t halfH[272];\
1163 uint8_t halfV[256];\
1164 uint8_t halfHV[256];\
1165 copy_block17(full, src, 24, stride, 17);\
1166 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1167 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1168 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1169 OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1171 static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1173 uint8_t full[24*17];\
1174 uint8_t halfH[272];\
1175 uint8_t halfHV[256];\
1176 copy_block17(full, src, 24, stride, 17);\
1177 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1178 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1179 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1180 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1182 void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1184 uint8_t full[24*17];\
1185 uint8_t halfH[272];\
1186 uint8_t halfV[256];\
1187 uint8_t halfHV[256];\
1188 copy_block17(full, src, 24, stride, 17);\
1189 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1190 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1191 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1192 OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1194 static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1196 uint8_t full[24*17];\
1197 uint8_t halfH[272];\
1198 uint8_t halfHV[256];\
1199 copy_block17(full, src, 24, stride, 17);\
1200 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1201 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1202 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1203 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1205 void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1207 uint8_t full[24*17];\
1208 uint8_t halfH[272];\
1209 uint8_t halfV[256];\
1210 uint8_t halfHV[256];\
1211 copy_block17(full, src, 24, stride, 17);\
1212 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1213 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1214 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1215 OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1217 static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1219 uint8_t full[24*17];\
1220 uint8_t halfH[272];\
1221 uint8_t halfHV[256];\
1222 copy_block17(full, src, 24, stride, 17);\
1223 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1224 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1225 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1226 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1228 void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1230 uint8_t full[24*17];\
1231 uint8_t halfH[272];\
1232 uint8_t halfV[256];\
1233 uint8_t halfHV[256];\
1234 copy_block17(full, src, 24, stride, 17);\
1235 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
1236 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1237 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1238 OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1240 static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1242 uint8_t full[24*17];\
1243 uint8_t halfH[272];\
1244 uint8_t halfHV[256];\
1245 copy_block17(full, src, 24, stride, 17);\
1246 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1247 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1248 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1249 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1251 static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1253 uint8_t halfH[272];\
1254 uint8_t halfHV[256];\
1255 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1256 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1257 OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1259 static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1261 uint8_t halfH[272];\
1262 uint8_t halfHV[256];\
1263 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1264 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1265 OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1267 void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1269 uint8_t full[24*17];\
1270 uint8_t halfH[272];\
1271 uint8_t halfV[256];\
1272 uint8_t halfHV[256];\
1273 copy_block17(full, src, 24, stride, 17);\
1274 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1275 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1276 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1277 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1279 static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1281 uint8_t full[24*17];\
1282 uint8_t halfH[272];\
1283 copy_block17(full, src, 24, stride, 17);\
1284 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1285 put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1286 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1288 void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1290 uint8_t full[24*17];\
1291 uint8_t halfH[272];\
1292 uint8_t halfV[256];\
1293 uint8_t halfHV[256];\
1294 copy_block17(full, src, 24, stride, 17);\
1295 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1296 put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1297 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1298 OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1300 static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1302 uint8_t full[24*17];\
1303 uint8_t halfH[272];\
1304 copy_block17(full, src, 24, stride, 17);\
1305 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1306 put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1307 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1309 static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1311 uint8_t halfH[272];\
1312 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1313 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1316 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1317 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1318 #define op_put(a, b) a = cm[((b) + 16)>>5]
1319 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1323 QPEL_MC(0, avg_ , _ ,
op_avg)
1326 #undef op_avg_no_rnd
1328 #undef op_put_no_rnd
1332 put_pixels8_8_c(dst, src, stride, 8);
1336 avg_pixels8_8_c(dst, src, stride, 8);
1340 put_pixels16_8_c(dst, src, stride, 16);
1344 avg_pixels16_8_c(dst, src, stride, 16);
1347 #define put_qpel8_mc00_c ff_put_pixels8x8_c
1348 #define avg_qpel8_mc00_c ff_avg_pixels8x8_c
1349 #define put_qpel16_mc00_c ff_put_pixels16x16_c
1350 #define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1351 #define put_no_rnd_qpel8_mc00_c ff_put_pixels8x8_c
1352 #define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1359 dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1360 dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1361 dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1362 dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1363 dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1364 dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1365 dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1366 dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1372 #if CONFIG_RV40_DECODER
1375 put_pixels16_xy2_8_c(dst, src, stride, 16);
1379 avg_pixels16_xy2_8_c(dst, src, stride, 16);
1383 put_pixels8_xy2_8_c(dst, src, stride, 8);
1387 avg_pixels8_xy2_8_c(dst, src, stride, 8);
1391 #if CONFIG_DIRAC_DECODER
1392 #define DIRAC_MC(OPNAME)\
1393 void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1395 OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
1397 void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1399 OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
1401 void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1403 OPNAME ## _pixels16_8_c(dst , src[0] , stride, h);\
1404 OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
1406 void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1408 OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1410 void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1412 OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1414 void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1416 OPNAME ## _pixels16_l2_8(dst , src[0] , src[1] , stride, stride, stride, h);\
1417 OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
1419 void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1421 OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1423 void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1425 OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1427 void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1429 OPNAME ## _pixels16_l4_8(dst , src[0] , src[1] , src[2] , src[3] , stride, stride, stride, stride, stride, h);\
1430 OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
1441 const int src_1= src[ -srcStride];
1442 const int src0 = src[0 ];
1443 const int src1 = src[ srcStride];
1444 const int src2 = src[2*srcStride];
1445 const int src3 = src[3*srcStride];
1446 const int src4 = src[4*srcStride];
1447 const int src5 = src[5*srcStride];
1448 const int src6 = src[6*srcStride];
1449 const int src7 = src[7*srcStride];
1450 const int src8 = src[8*srcStride];
1451 const int src9 = src[9*srcStride];
1452 dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1453 dst[1*dstStride]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1454 dst[2*dstStride]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1455 dst[3*dstStride]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1456 dst[4*dstStride]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1457 dst[5*dstStride]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1458 dst[6*dstStride]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1459 dst[7*dstStride]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1469 put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1481 put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1497 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1507 put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1522 s += abs(pix1[0] - pix2[0]);
1523 s += abs(pix1[1] - pix2[1]);
1524 s += abs(pix1[2] - pix2[2]);
1525 s += abs(pix1[3] - pix2[3]);
1526 s += abs(pix1[4] - pix2[4]);
1527 s += abs(pix1[5] - pix2[5]);
1528 s += abs(pix1[6] - pix2[6]);
1529 s += abs(pix1[7] - pix2[7]);
1530 s += abs(pix1[8] - pix2[8]);
1531 s += abs(pix1[9] - pix2[9]);
1532 s += abs(pix1[10] - pix2[10]);
1533 s += abs(pix1[11] - pix2[11]);
1534 s += abs(pix1[12] - pix2[12]);
1535 s += abs(pix1[13] - pix2[13]);
1536 s += abs(pix1[14] - pix2[14]);
1537 s += abs(pix1[15] - pix2[15]);
1550 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1551 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1552 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1553 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1554 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1555 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1556 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1557 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1558 s += abs(pix1[8] -
avg2(pix2[8], pix2[9]));
1559 s += abs(pix1[9] -
avg2(pix2[9], pix2[10]));
1560 s += abs(pix1[10] -
avg2(pix2[10], pix2[11]));
1561 s += abs(pix1[11] -
avg2(pix2[11], pix2[12]));
1562 s += abs(pix1[12] -
avg2(pix2[12], pix2[13]));
1563 s += abs(pix1[13] -
avg2(pix2[13], pix2[14]));
1564 s += abs(pix1[14] -
avg2(pix2[14], pix2[15]));
1565 s += abs(pix1[15] -
avg2(pix2[15], pix2[16]));
1575 uint8_t *pix3 = pix2 + line_size;
1579 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1580 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1581 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1582 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1583 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1584 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1585 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1586 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1587 s += abs(pix1[8] -
avg2(pix2[8], pix3[8]));
1588 s += abs(pix1[9] -
avg2(pix2[9], pix3[9]));
1589 s += abs(pix1[10] -
avg2(pix2[10], pix3[10]));
1590 s += abs(pix1[11] -
avg2(pix2[11], pix3[11]));
1591 s += abs(pix1[12] -
avg2(pix2[12], pix3[12]));
1592 s += abs(pix1[13] -
avg2(pix2[13], pix3[13]));
1593 s += abs(pix1[14] -
avg2(pix2[14], pix3[14]));
1594 s += abs(pix1[15] -
avg2(pix2[15], pix3[15]));
1605 uint8_t *pix3 = pix2 + line_size;
1609 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1610 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1611 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1612 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1613 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1614 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1615 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1616 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1617 s += abs(pix1[8] -
avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1618 s += abs(pix1[9] -
avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1619 s += abs(pix1[10] -
avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1620 s += abs(pix1[11] -
avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1621 s += abs(pix1[12] -
avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1622 s += abs(pix1[13] -
avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1623 s += abs(pix1[14] -
avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1624 s += abs(pix1[15] -
avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1638 s += abs(pix1[0] - pix2[0]);
1639 s += abs(pix1[1] - pix2[1]);
1640 s += abs(pix1[2] - pix2[2]);
1641 s += abs(pix1[3] - pix2[3]);
1642 s += abs(pix1[4] - pix2[4]);
1643 s += abs(pix1[5] - pix2[5]);
1644 s += abs(pix1[6] - pix2[6]);
1645 s += abs(pix1[7] - pix2[7]);
1658 s += abs(pix1[0] -
avg2(pix2[0], pix2[1]));
1659 s += abs(pix1[1] -
avg2(pix2[1], pix2[2]));
1660 s += abs(pix1[2] -
avg2(pix2[2], pix2[3]));
1661 s += abs(pix1[3] -
avg2(pix2[3], pix2[4]));
1662 s += abs(pix1[4] -
avg2(pix2[4], pix2[5]));
1663 s += abs(pix1[5] -
avg2(pix2[5], pix2[6]));
1664 s += abs(pix1[6] -
avg2(pix2[6], pix2[7]));
1665 s += abs(pix1[7] -
avg2(pix2[7], pix2[8]));
1675 uint8_t *pix3 = pix2 + line_size;
1679 s += abs(pix1[0] -
avg2(pix2[0], pix3[0]));
1680 s += abs(pix1[1] -
avg2(pix2[1], pix3[1]));
1681 s += abs(pix1[2] -
avg2(pix2[2], pix3[2]));
1682 s += abs(pix1[3] -
avg2(pix2[3], pix3[3]));
1683 s += abs(pix1[4] -
avg2(pix2[4], pix3[4]));
1684 s += abs(pix1[5] -
avg2(pix2[5], pix3[5]));
1685 s += abs(pix1[6] -
avg2(pix2[6], pix3[6]));
1686 s += abs(pix1[7] -
avg2(pix2[7], pix3[7]));
1697 uint8_t *pix3 = pix2 + line_size;
1701 s += abs(pix1[0] -
avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1702 s += abs(pix1[1] -
avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1703 s += abs(pix1[2] -
avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1704 s += abs(pix1[3] -
avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1705 s += abs(pix1[4] -
avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1706 s += abs(pix1[5] -
avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1707 s += abs(pix1[6] -
avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1708 s += abs(pix1[7] -
avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1723 for(x=0; x<16; x++){
1724 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1727 for(x=0; x<15; x++){
1728 score2+=
FFABS( s1[x ] - s1[x +stride]
1729 - s1[x+1] + s1[x+1+stride])
1730 -
FFABS( s2[x ] - s2[x +stride]
1731 - s2[x+1] + s2[x+1+stride]);
1739 else return score1 +
FFABS(score2)*8;
1750 score1+= (s1[x ] - s2[x ])*(s1[x ] - s2[x ]);
1754 score2+=
FFABS( s1[x ] - s1[x +stride]
1755 - s1[x+1] + s1[x+1+stride])
1756 -
FFABS( s2[x ] - s2[x +stride]
1757 - s2[x+1] + s2[x+1+stride]);
1765 else return score1 +
FFABS(score2)*8;
1772 for(i=0; i<8*8; i++){
1778 sum += (w*
b)*(w*b)>>4;
1786 for(i=0; i<8*8; i++){
1798 memset(cmp, 0,
sizeof(
void*)*6);
1857 for (i = 0; i <= w - (int)
sizeof(
long); i +=
sizeof(long)) {
1858 long a = *(
long*)(src+i);
1859 long b = *(
long*)(dst+i);
1863 dst[i+0] += src[i+0];
1868 #if !HAVE_FAST_UNALIGNED
1869 if((
long)src2 & (
sizeof(
long)-1)){
1870 for(i=0; i+7<w; i+=8){
1871 dst[i+0] = src1[i+0]-src2[i+0];
1872 dst[i+1] = src1[i+1]-src2[i+1];
1873 dst[i+2] = src1[i+2]-src2[i+2];
1874 dst[i+3] = src1[i+3]-src2[i+3];
1875 dst[i+4] = src1[i+4]-src2[i+4];
1876 dst[i+5] = src1[i+5]-src2[i+5];
1877 dst[i+6] = src1[i+6]-src2[i+6];
1878 dst[i+7] = src1[i+7]-src2[i+7];
1882 for (i = 0; i <= w - (int)
sizeof(
long); i +=
sizeof(long)) {
1883 long a = *(
long*)(src1+i);
1884 long b = *(
long*)(src2+i);
1888 dst[i+0] = src1[i+0]-src2[i+0];
1899 l=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1916 const int pred=
mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1929 for(i=0; i<w-1; i++){
1986 #define BUTTERFLY2(o1,o2,i1,i2) \
1990 #define BUTTERFLY1(x,y) \
1999 #define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2010 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2011 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2012 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2013 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2055 BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2056 BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2057 BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2058 BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2089 sum -=
FFABS(temp[8*0] + temp[8*4]);
2107 const int s07 = SRC(0) + SRC(7);\
2108 const int s16 = SRC(1) + SRC(6);\
2109 const int s25 = SRC(2) + SRC(5);\
2110 const int s34 = SRC(3) + SRC(4);\
2111 const int a0 = s07 + s34;\
2112 const int a1 = s16 + s25;\
2113 const int a2 = s07 - s34;\
2114 const int a3 = s16 - s25;\
2115 const int d07 = SRC(0) - SRC(7);\
2116 const int d16 = SRC(1) - SRC(6);\
2117 const int d25 = SRC(2) - SRC(5);\
2118 const int d34 = SRC(3) - SRC(4);\
2119 const int a4 = d16 + d25 + (d07 + (d07>>1));\
2120 const int a5 = d07 - d34 - (d25 + (d25>>1));\
2121 const int a6 = d07 + d34 - (d16 + (d16>>1));\
2122 const int a7 = d16 - d25 + (d34 + (d34>>1));\
2124 DST(1, a4 + (a7>>2)) ;\
2125 DST(2, a2 + (a3>>1)) ;\
2126 DST(3, a5 + (a6>>2)) ;\
2128 DST(5, a6 - (a5>>2)) ;\
2129 DST(6, (a2>>1) - a3 ) ;\
2130 DST(7, (a4>>2) - a7 ) ;\
2141 #define SRC(x) dct[i][x]
2142 #define DST(x,v) dct[i][x]= v
2143 for( i = 0; i < 8; i++ )
2148 #define
SRC(x) dct[x][i]
2149 #define DST(x,v) sum += FFABS(v)
2150 for( i = 0; i < 8; i++ )
2177 int16_t *
const bak =
temp+64;
2185 memcpy(bak,
temp, 64*
sizeof(int16_t));
2192 sum+= (
temp[i]-bak[i])*(
temp[i]-bak[i]);
2232 for(i=start_i; i<last; i++){
2233 int j= scantable[i];
2238 if((level&(~127)) == 0){
2248 level=
temp[i] + 64;
2252 if((level&(~127)) == 0){
2268 distortion= s->
dsp.
sse[1](NULL, lsrc2, lsrc1, 8, 8);
2270 return distortion + ((bits*s->
qscale*s->
qscale*109 + 64)>>7);
2303 for(i=start_i; i<last; i++){
2304 int j= scantable[i];
2309 if((level&(~127)) == 0){
2319 level=
temp[i] + 64;
2323 if((level&(~127)) == 0){
2332 #define VSAD_INTRA(size) \
2333 static int vsad_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2337 for(y=1; y<h; y++){ \
2338 for(x=0; x<size; x+=4){ \
2339 score+= FFABS(s[x ] - s[x +stride]) + FFABS(s[x+1] - s[x+1+stride]) \
2340 +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]); \
2355 for(x=0; x<16; x++){
2356 score+=
FFABS(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2365 #define SQ(a) ((a)*(a))
2366 #define VSSE_INTRA(size) \
2367 static int vsse_intra##size##_c( void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2371 for(y=1; y<h; y++){ \
2372 for(x=0; x<size; x+=4){ \
2373 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) \
2374 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); \
2389 for(x=0; x<16; x++){
2390 score+=
SQ(s1[x ] - s2[x ] - s1[x +stride] + s2[x +stride]);
2403 for(i=0; i<
size; i++)
2404 score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2408 #define WRAPPER8_16_SQ(name8, name16)\
2409 static int name16(void *s, uint8_t *dst, uint8_t *src, int stride, int h){\
2411 score +=name8(s, dst , src , stride, 8);\
2412 score +=name8(s, dst+8 , src+8 , stride, 8);\
2416 score +=name8(s, dst , src , stride, 8);\
2417 score +=name8(s, dst+8 , src+8 , stride, 8);\
2434 uint32_t maxi, uint32_t maxisign)
2437 if(a > mini)
return mini;
2438 else if((a^(1
U<<31)) > maxisign)
return maxi;
2444 uint32_t mini = *(uint32_t*)min;
2445 uint32_t maxi = *(uint32_t*)max;
2446 uint32_t maxisign = maxi ^ (1
U<<31);
2447 uint32_t *dsti = (uint32_t*)dst;
2448 const uint32_t *srci = (
const uint32_t*)src;
2449 for(i=0; i<
len; i+=8) {
2450 dsti[i + 0] =
clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2451 dsti[i + 1] =
clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2452 dsti[i + 2] =
clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2453 dsti[i + 3] =
clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2454 dsti[i + 4] =
clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2455 dsti[i + 5] =
clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2456 dsti[i + 6] =
clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2457 dsti[i + 7] =
clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2462 if(min < 0 && max > 0) {
2465 for(i=0; i <
len; i+=8) {
2466 dst[i ] = av_clipf(src[i ], min, max);
2467 dst[i + 1] = av_clipf(src[i + 1], min, max);
2468 dst[i + 2] = av_clipf(src[i + 2], min, max);
2469 dst[i + 3] = av_clipf(src[i + 3], min, max);
2470 dst[i + 4] = av_clipf(src[i + 4], min, max);
2471 dst[i + 5] = av_clipf(src[i + 5], min, max);
2472 dst[i + 6] = av_clipf(src[i + 6], min, max);
2473 dst[i + 7] = av_clipf(src[i + 7], min, max);
2483 res += *v1++ * *v2++;
2493 *v1++ += mul * *v3++;
2502 *dst++ = av_clip(*src++, min, max);
2503 *dst++ = av_clip(*src++, min, max);
2504 *dst++ = av_clip(*src++, min, max);
2505 *dst++ = av_clip(*src++, min, max);
2506 *dst++ = av_clip(*src++, min, max);
2507 *dst++ = av_clip(*src++, min, max);
2508 *dst++ = av_clip(*src++, min, max);
2509 *dst++ = av_clip(*src++, min, max);
2549 dest[0] = av_clip_uint8((block[0] + 4)>>3);
2553 dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
2561 for(i=0;i<512;i++) {
2567 static int did_fail=0;
2570 if((intptr_t)aligned & 15){
2572 #if HAVE_MMX || HAVE_ALTIVEC
2574 "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2575 "and may be very slow or crash. This is not a bug in libavcodec,\n"
2576 "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2577 "Do not report crashes to FFmpeg developers.\n");
2608 #endif //CONFIG_ENCODERS
2615 }
else if(avctx->
lowres==2){
2620 }
else if(avctx->
lowres==3){
2699 #define dspfunc(PFX, IDX, NUM) \
2700 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2701 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2702 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2703 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2704 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2705 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2706 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2707 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2708 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2709 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2710 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2711 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2712 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2713 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2714 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2715 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2718 dspfunc(put_no_rnd_qpel, 0, 16);
2724 dspfunc(put_no_rnd_qpel, 1, 8);
2740 #define SET_CMP_FUNC(name) \
2741 c->name[0]= name ## 16_c;\
2742 c->name[1]= name ## 8x8_c;
2761 c->
vsad[4]= vsad_intra16_c;
2762 c->
vsad[5]= vsad_intra8_c;
2764 c->
vsse[4]= vsse_intra16_c;
2765 c->
vsse[5]= vsse_intra8_c;
2768 #if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
2800 #define FUNC(f, depth) f ## _ ## depth
2801 #define FUNCC(f, depth) f ## _ ## depth ## _c
2807 #define BIT_DEPTH_FUNCS(depth) \
2808 c->get_pixels = FUNCC(get_pixels, depth);