FFmpeg
h264pred_template.c
Go to the documentation of this file.
1 /*
2  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3  * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * H.264 / AVC / MPEG-4 part10 prediction functions.
25  * @author Michael Niedermayer <michaelni@gmx.at>
26  */
27 
28 #include "libavutil/intreadwrite.h"
29 
30 #include "mathops.h"
31 
32 #include "bit_depth_template.c"
33 
34 static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright,
35  ptrdiff_t _stride)
36 {
37  pixel *src = (pixel*)_src;
38  int stride = _stride>>(sizeof(pixel)-1);
39  const pixel4 a= AV_RN4PA(src-stride);
40 
41  AV_WN4PA(src+0*stride, a);
42  AV_WN4PA(src+1*stride, a);
43  AV_WN4PA(src+2*stride, a);
44  AV_WN4PA(src+3*stride, a);
45 }
46 
47 static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright,
48  ptrdiff_t _stride)
49 {
50  pixel *src = (pixel*)_src;
51  int stride = _stride>>(sizeof(pixel)-1);
56 }
57 
58 static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright,
59  ptrdiff_t _stride)
60 {
61  pixel *src = (pixel*)_src;
62  int stride = _stride>>(sizeof(pixel)-1);
63  const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
64  + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
65  const pixel4 a = PIXEL_SPLAT_X4(dc);
66 
67  AV_WN4PA(src+0*stride, a);
68  AV_WN4PA(src+1*stride, a);
69  AV_WN4PA(src+2*stride, a);
70  AV_WN4PA(src+3*stride, a);
71 }
72 
73 static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright,
74  ptrdiff_t _stride)
75 {
76  pixel *src = (pixel*)_src;
77  int stride = _stride>>(sizeof(pixel)-1);
78  const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
79  const pixel4 a = PIXEL_SPLAT_X4(dc);
80 
81  AV_WN4PA(src+0*stride, a);
82  AV_WN4PA(src+1*stride, a);
83  AV_WN4PA(src+2*stride, a);
84  AV_WN4PA(src+3*stride, a);
85 }
86 
87 static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright,
88  ptrdiff_t _stride)
89 {
90  pixel *src = (pixel*)_src;
91  int stride = _stride>>(sizeof(pixel)-1);
92  const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
93  const pixel4 a = PIXEL_SPLAT_X4(dc);
94 
95  AV_WN4PA(src+0*stride, a);
96  AV_WN4PA(src+1*stride, a);
97  AV_WN4PA(src+2*stride, a);
98  AV_WN4PA(src+3*stride, a);
99 }
100 
101 static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright,
102  ptrdiff_t _stride)
103 {
104  pixel *src = (pixel*)_src;
105  int stride = _stride>>(sizeof(pixel)-1);
106  const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
107 
108  AV_WN4PA(src+0*stride, a);
109  AV_WN4PA(src+1*stride, a);
110  AV_WN4PA(src+2*stride, a);
111  AV_WN4PA(src+3*stride, a);
112 }
113 
114 
115 #define LOAD_TOP_RIGHT_EDGE\
116  const unsigned av_unused t4 = topright[0];\
117  const unsigned av_unused t5 = topright[1];\
118  const unsigned av_unused t6 = topright[2];\
119  const unsigned av_unused t7 = topright[3];\
120 
121 #define LOAD_DOWN_LEFT_EDGE\
122  const unsigned av_unused l4 = src[-1+4*stride];\
123  const unsigned av_unused l5 = src[-1+5*stride];\
124  const unsigned av_unused l6 = src[-1+6*stride];\
125  const unsigned av_unused l7 = src[-1+7*stride];\
126 
127 #define LOAD_LEFT_EDGE\
128  const unsigned av_unused l0 = src[-1+0*stride];\
129  const unsigned av_unused l1 = src[-1+1*stride];\
130  const unsigned av_unused l2 = src[-1+2*stride];\
131  const unsigned av_unused l3 = src[-1+3*stride];\
132 
133 #define LOAD_TOP_EDGE\
134  const unsigned av_unused t0 = src[ 0-1*stride];\
135  const unsigned av_unused t1 = src[ 1-1*stride];\
136  const unsigned av_unused t2 = src[ 2-1*stride];\
137  const unsigned av_unused t3 = src[ 3-1*stride];\
138 
139 static void FUNCC(pred4x4_down_right)(uint8_t *_src, const uint8_t *topright,
140  ptrdiff_t _stride)
141 {
142  pixel *src = (pixel*)_src;
143  int stride = _stride>>(sizeof(pixel)-1);
144  const int lt= src[-1-1*stride];
147 
148  src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
149  src[0+2*stride]=
150  src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
151  src[0+1*stride]=
152  src[1+2*stride]=
153  src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
154  src[0+0*stride]=
155  src[1+1*stride]=
156  src[2+2*stride]=
157  src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
158  src[1+0*stride]=
159  src[2+1*stride]=
160  src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
161  src[2+0*stride]=
162  src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
163  src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
164 }
165 
166 static void FUNCC(pred4x4_down_left)(uint8_t *_src, const uint8_t *_topright,
167  ptrdiff_t _stride)
168 {
169  pixel *src = (pixel*)_src;
170  const pixel *topright = (const pixel*)_topright;
171  int stride = _stride>>(sizeof(pixel)-1);
174 // LOAD_LEFT_EDGE
175 
176  src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
177  src[1+0*stride]=
178  src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
179  src[2+0*stride]=
180  src[1+1*stride]=
181  src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
182  src[3+0*stride]=
183  src[2+1*stride]=
184  src[1+2*stride]=
185  src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
186  src[3+1*stride]=
187  src[2+2*stride]=
188  src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
189  src[3+2*stride]=
190  src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
191  src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
192 }
193 
194 static void FUNCC(pred4x4_vertical_right)(uint8_t *_src,
195  const uint8_t *topright,
196  ptrdiff_t _stride)
197 {
198  pixel *src = (pixel*)_src;
199  int stride = _stride>>(sizeof(pixel)-1);
200  const int lt= src[-1-1*stride];
203 
204  src[0+0*stride]=
205  src[1+2*stride]=(lt + t0 + 1)>>1;
206  src[1+0*stride]=
207  src[2+2*stride]=(t0 + t1 + 1)>>1;
208  src[2+0*stride]=
209  src[3+2*stride]=(t1 + t2 + 1)>>1;
210  src[3+0*stride]=(t2 + t3 + 1)>>1;
211  src[0+1*stride]=
212  src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
213  src[1+1*stride]=
214  src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
215  src[2+1*stride]=
216  src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
217  src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
218  src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
219  src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
220 }
221 
222 static void FUNCC(pred4x4_vertical_left)(uint8_t *_src,
223  const uint8_t *_topright,
224  ptrdiff_t _stride)
225 {
226  pixel *src = (pixel*)_src;
227  const pixel *topright = (const pixel*)_topright;
228  int stride = _stride>>(sizeof(pixel)-1);
231 
232  src[0+0*stride]=(t0 + t1 + 1)>>1;
233  src[1+0*stride]=
234  src[0+2*stride]=(t1 + t2 + 1)>>1;
235  src[2+0*stride]=
236  src[1+2*stride]=(t2 + t3 + 1)>>1;
237  src[3+0*stride]=
238  src[2+2*stride]=(t3 + t4+ 1)>>1;
239  src[3+2*stride]=(t4 + t5+ 1)>>1;
240  src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
241  src[1+1*stride]=
242  src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
243  src[2+1*stride]=
244  src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
245  src[3+1*stride]=
246  src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
247  src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
248 }
249 
250 static void FUNCC(pred4x4_horizontal_up)(uint8_t *_src, const uint8_t *topright,
251  ptrdiff_t _stride)
252 {
253  pixel *src = (pixel*)_src;
254  int stride = _stride>>(sizeof(pixel)-1);
256 
257  src[0+0*stride]=(l0 + l1 + 1)>>1;
258  src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
259  src[2+0*stride]=
260  src[0+1*stride]=(l1 + l2 + 1)>>1;
261  src[3+0*stride]=
262  src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
263  src[2+1*stride]=
264  src[0+2*stride]=(l2 + l3 + 1)>>1;
265  src[3+1*stride]=
266  src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
267  src[3+2*stride]=
268  src[1+3*stride]=
269  src[0+3*stride]=
270  src[2+2*stride]=
271  src[2+3*stride]=
272  src[3+3*stride]=l3;
273 }
274 
275 static void FUNCC(pred4x4_horizontal_down)(uint8_t *_src,
276  const uint8_t *topright,
277  ptrdiff_t _stride)
278 {
279  pixel *src = (pixel*)_src;
280  int stride = _stride>>(sizeof(pixel)-1);
281  const int lt= src[-1-1*stride];
284 
285  src[0+0*stride]=
286  src[2+1*stride]=(lt + l0 + 1)>>1;
287  src[1+0*stride]=
288  src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
289  src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
290  src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
291  src[0+1*stride]=
292  src[2+2*stride]=(l0 + l1 + 1)>>1;
293  src[1+1*stride]=
294  src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
295  src[0+2*stride]=
296  src[2+3*stride]=(l1 + l2+ 1)>>1;
297  src[1+2*stride]=
298  src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
299  src[0+3*stride]=(l2 + l3 + 1)>>1;
300  src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
301 }
302 
303 static void FUNCC(pred16x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
304 {
305  int i;
306  pixel *src = (pixel*)_src;
307  int stride = _stride>>(sizeof(pixel)-1);
308  const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
309  const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
310  const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
311  const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
312 
313  for(i=0; i<16; i++){
314  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
315  AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
316  AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
317  AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
318  }
319 }
320 
321 static void FUNCC(pred16x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
322 {
323  int i;
324  pixel *src = (pixel*)_src;
325  stride >>= sizeof(pixel)-1;
326 
327  for(i=0; i<16; i++){
328  const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
329 
330  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
331  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
332  AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
333  AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
334  }
335 }
336 
337 #define PREDICT_16x16_DC(v)\
338  for(i=0; i<16; i++){\
339  AV_WN4PA(src+ 0, v);\
340  AV_WN4PA(src+ 4, v);\
341  AV_WN4PA(src+ 8, v);\
342  AV_WN4PA(src+12, v);\
343  src += stride;\
344  }
345 
346 static void FUNCC(pred16x16_dc)(uint8_t *_src, ptrdiff_t stride)
347 {
348  int i, dc=0;
349  pixel *src = (pixel*)_src;
350  pixel4 dcsplat;
351  stride >>= sizeof(pixel)-1;
352 
353  for(i=0;i<16; i++){
354  dc+= src[-1+i*stride];
355  }
356 
357  for(i=0;i<16; i++){
358  dc+= src[i-stride];
359  }
360 
361  dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
362  PREDICT_16x16_DC(dcsplat);
363 }
364 
365 static void FUNCC(pred16x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
366 {
367  int i, dc=0;
368  pixel *src = (pixel*)_src;
369  pixel4 dcsplat;
370  stride >>= sizeof(pixel)-1;
371 
372  for(i=0;i<16; i++){
373  dc+= src[-1+i*stride];
374  }
375 
376  dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
377  PREDICT_16x16_DC(dcsplat);
378 }
379 
380 static void FUNCC(pred16x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
381 {
382  int i, dc=0;
383  pixel *src = (pixel*)_src;
384  pixel4 dcsplat;
385  stride >>= sizeof(pixel)-1;
386 
387  for(i=0;i<16; i++){
388  dc+= src[i-stride];
389  }
390 
391  dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
392  PREDICT_16x16_DC(dcsplat);
393 }
394 
395 #define PRED16x16_X(n, v) \
396 static void FUNCC(pred16x16_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
397 {\
398  int i;\
399  pixel *src = (pixel*)_src;\
400  stride >>= sizeof(pixel)-1;\
401  PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
402 }
403 
404 PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0)
405 #if BIT_DEPTH == 8
406 PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1)
407 PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1)
408 #endif
409 
410 static inline void FUNCC(pred16x16_plane_compat)(uint8_t *_src,
411  ptrdiff_t _stride,
412  const int svq3,
413  const int rv40)
414 {
415  int i, j, k;
416  int a;
418  int stride = _stride>>(sizeof(pixel)-1);
419  const pixel * const src0 = src +7-stride;
420  const pixel * src1 = src +8*stride-1;
421  const pixel * src2 = src1-2*stride; // == src+6*stride-1;
422  int H = src0[1] - src0[-1];
423  int V = src1[0] - src2[ 0];
424  for(k=2; k<=8; ++k) {
425  src1 += stride; src2 -= stride;
426  H += k*(src0[k] - src0[-k]);
427  V += k*(src1[0] - src2[ 0]);
428  }
429  if(svq3){
430  H = ( 5*(H/4) ) / 16;
431  V = ( 5*(V/4) ) / 16;
432 
433  /* required for 100% accuracy */
434  i = H; H = V; V = i;
435  }else if(rv40){
436  H = ( H + (H>>2) ) >> 4;
437  V = ( V + (V>>2) ) >> 4;
438  }else{
439  H = ( 5*H+32 ) >> 6;
440  V = ( 5*V+32 ) >> 6;
441  }
442 
443  a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
444  for(j=16; j>0; --j) {
445  int b = a;
446  a += V;
447  for(i=-16; i<0; i+=4) {
448  src[16+i] = CLIP((b ) >> 5);
449  src[17+i] = CLIP((b+ H) >> 5);
450  src[18+i] = CLIP((b+2*H) >> 5);
451  src[19+i] = CLIP((b+3*H) >> 5);
452  b += 4*H;
453  }
454  src += stride;
455  }
456 }
457 
458 static void FUNCC(pred16x16_plane)(uint8_t *src, ptrdiff_t stride)
459 {
460  FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
461 }
462 
463 static void FUNCC(pred8x8_vertical)(uint8_t *_src, ptrdiff_t _stride)
464 {
465  int i;
466  pixel *src = (pixel*)_src;
467  int stride = _stride>>(sizeof(pixel)-1);
468  const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
469  const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
470 
471  for(i=0; i<8; i++){
472  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
473  AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
474  }
475 }
476 
477 static void FUNCC(pred8x16_vertical)(uint8_t *_src, ptrdiff_t _stride)
478 {
479  int i;
480  pixel *src = (pixel*)_src;
481  int stride = _stride>>(sizeof(pixel)-1);
482  const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
483  const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
484 
485  for(i=0; i<16; i++){
486  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
487  AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
488  }
489 }
490 
491 static void FUNCC(pred8x8_horizontal)(uint8_t *_src, ptrdiff_t stride)
492 {
493  int i;
494  pixel *src = (pixel*)_src;
495  stride >>= sizeof(pixel)-1;
496 
497  for(i=0; i<8; i++){
498  const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
499  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
500  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
501  }
502 }
503 
504 static void FUNCC(pred8x16_horizontal)(uint8_t *_src, ptrdiff_t stride)
505 {
506  int i;
507  pixel *src = (pixel*)_src;
508  stride >>= sizeof(pixel)-1;
509  for(i=0; i<16; i++){
510  const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
511  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
512  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
513  }
514 }
515 
516 #define PRED8x8_X(n, v)\
517 static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, ptrdiff_t stride)\
518 {\
519  int i;\
520  const pixel4 a = PIXEL_SPLAT_X4(v);\
521  pixel *src = (pixel*)_src;\
522  stride >>= sizeof(pixel)-1;\
523  for(i=0; i<8; i++){\
524  AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
525  AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
526  }\
527 }
528 
529 PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0)
530 #if BIT_DEPTH == 8
531 PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1)
532 PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1)
533 #endif
534 
535 static void FUNCC(pred8x16_128_dc)(uint8_t *_src, ptrdiff_t stride)
536 {
539 }
540 
541 static void FUNCC(pred8x8_left_dc)(uint8_t *_src, ptrdiff_t stride)
542 {
543  int i;
544  int dc0, dc2;
545  pixel4 dc0splat, dc2splat;
546  pixel *src = (pixel*)_src;
547  stride >>= sizeof(pixel)-1;
548 
549  dc0=dc2=0;
550  for(i=0;i<4; i++){
551  dc0+= src[-1+i*stride];
552  dc2+= src[-1+(i+4)*stride];
553  }
554  dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
555  dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
556 
557  for(i=0; i<4; i++){
558  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
559  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
560  }
561  for(i=4; i<8; i++){
562  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
563  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
564  }
565 }
566 
567 static void FUNCC(pred8x16_left_dc)(uint8_t *_src, ptrdiff_t stride)
568 {
571 }
572 
573 static void FUNCC(pred8x8_top_dc)(uint8_t *_src, ptrdiff_t stride)
574 {
575  int i;
576  int dc0, dc1;
577  pixel4 dc0splat, dc1splat;
578  pixel *src = (pixel*)_src;
579  stride >>= sizeof(pixel)-1;
580 
581  dc0=dc1=0;
582  for(i=0;i<4; i++){
583  dc0+= src[i-stride];
584  dc1+= src[4+i-stride];
585  }
586  dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
587  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
588 
589  for(i=0; i<4; i++){
590  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
591  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
592  }
593  for(i=4; i<8; i++){
594  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
595  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
596  }
597 }
598 
599 static void FUNCC(pred8x16_top_dc)(uint8_t *_src, ptrdiff_t stride)
600 {
601  int i;
602  int dc0, dc1;
603  pixel4 dc0splat, dc1splat;
604  pixel *src = (pixel*)_src;
605  stride >>= sizeof(pixel)-1;
606 
607  dc0=dc1=0;
608  for(i=0;i<4; i++){
609  dc0+= src[i-stride];
610  dc1+= src[4+i-stride];
611  }
612  dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
613  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
614 
615  for(i=0; i<16; i++){
616  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
617  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
618  }
619 }
620 
621 static void FUNCC(pred8x8_dc)(uint8_t *_src, ptrdiff_t stride)
622 {
623  int i;
624  int dc0, dc1, dc2;
625  pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
626  pixel *src = (pixel*)_src;
627  stride >>= sizeof(pixel)-1;
628 
629  dc0=dc1=dc2=0;
630  for(i=0;i<4; i++){
631  dc0+= src[-1+i*stride] + src[i-stride];
632  dc1+= src[4+i-stride];
633  dc2+= src[-1+(i+4)*stride];
634  }
635  dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
636  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
637  dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
638  dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
639 
640  for(i=0; i<4; i++){
641  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
642  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
643  }
644  for(i=4; i<8; i++){
645  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
646  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
647  }
648 }
649 
650 static void FUNCC(pred8x16_dc)(uint8_t *_src, ptrdiff_t stride)
651 {
652  int i;
653  int dc0, dc1, dc2, dc3, dc4;
654  pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat;
655  pixel *src = (pixel*)_src;
656  stride >>= sizeof(pixel)-1;
657 
658  dc0=dc1=dc2=dc3=dc4=0;
659  for(i=0;i<4; i++){
660  dc0+= src[-1+i*stride] + src[i-stride];
661  dc1+= src[4+i-stride];
662  dc2+= src[-1+(i+4)*stride];
663  dc3+= src[-1+(i+8)*stride];
664  dc4+= src[-1+(i+12)*stride];
665  }
666  dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
667  dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
668  dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
669  dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
670  dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2);
671  dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3);
672  dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2);
673  dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3);
674 
675  for(i=0; i<4; i++){
676  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
677  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
678  }
679  for(i=4; i<8; i++){
680  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
681  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
682  }
683  for(i=8; i<12; i++){
684  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat);
685  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat);
686  }
687  for(i=12; i<16; i++){
688  AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat);
689  AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat);
690  }
691 }
692 
693 //the following 4 function should not be optimized!
694 static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
695 {
698 }
699 
700 static void FUNC(pred8x16_mad_cow_dc_l0t)(uint8_t *src, ptrdiff_t stride)
701 {
704 }
705 
706 static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
707 {
710 }
711 
712 static void FUNC(pred8x16_mad_cow_dc_0lt)(uint8_t *src, ptrdiff_t stride)
713 {
716 }
717 
718 static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
719 {
722  FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
723 }
724 
725 static void FUNC(pred8x16_mad_cow_dc_l00)(uint8_t *src, ptrdiff_t stride)
726 {
729  FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
730 }
731 
732 static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
733 {
736  FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
737 }
738 
739 static void FUNC(pred8x16_mad_cow_dc_0l0)(uint8_t *src, ptrdiff_t stride)
740 {
743  FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
744 }
745 
746 static void FUNCC(pred8x8_plane)(uint8_t *_src, ptrdiff_t _stride)
747 {
748  int j, k;
749  int a;
750  pixel *src = (pixel*)_src;
751  int stride = _stride>>(sizeof(pixel)-1);
752  const pixel * const src0 = src +3-stride;
753  const pixel * src1 = src +4*stride-1;
754  const pixel * src2 = src1-2*stride; // == src+2*stride-1;
755  int H = src0[1] - src0[-1];
756  int V = src1[0] - src2[ 0];
757  for(k=2; k<=4; ++k) {
758  src1 += stride; src2 -= stride;
759  H += k*(src0[k] - src0[-k]);
760  V += k*(src1[0] - src2[ 0]);
761  }
762  H = ( 17*H+16 ) >> 5;
763  V = ( 17*V+16 ) >> 5;
764 
765  a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
766  for(j=8; j>0; --j) {
767  int b = a;
768  a += V;
769  src[0] = CLIP((b ) >> 5);
770  src[1] = CLIP((b+ H) >> 5);
771  src[2] = CLIP((b+2*H) >> 5);
772  src[3] = CLIP((b+3*H) >> 5);
773  src[4] = CLIP((b+4*H) >> 5);
774  src[5] = CLIP((b+5*H) >> 5);
775  src[6] = CLIP((b+6*H) >> 5);
776  src[7] = CLIP((b+7*H) >> 5);
777  src += stride;
778  }
779 }
780 
781 static void FUNCC(pred8x16_plane)(uint8_t *_src, ptrdiff_t _stride)
782 {
783  int j, k;
784  int a;
785  pixel *src = (pixel*)_src;
786  int stride = _stride>>(sizeof(pixel)-1);
787  const pixel * const src0 = src +3-stride;
788  const pixel * src1 = src +8*stride-1;
789  const pixel * src2 = src1-2*stride; // == src+6*stride-1;
790  int H = src0[1] - src0[-1];
791  int V = src1[0] - src2[ 0];
792 
793  for (k = 2; k <= 4; ++k) {
794  src1 += stride; src2 -= stride;
795  H += k*(src0[k] - src0[-k]);
796  V += k*(src1[0] - src2[ 0]);
797  }
798  for (; k <= 8; ++k) {
799  src1 += stride; src2 -= stride;
800  V += k*(src1[0] - src2[0]);
801  }
802 
803  H = (17*H+16) >> 5;
804  V = (5*V+32) >> 6;
805 
806  a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H;
807  for(j=16; j>0; --j) {
808  int b = a;
809  a += V;
810  src[0] = CLIP((b ) >> 5);
811  src[1] = CLIP((b+ H) >> 5);
812  src[2] = CLIP((b+2*H) >> 5);
813  src[3] = CLIP((b+3*H) >> 5);
814  src[4] = CLIP((b+4*H) >> 5);
815  src[5] = CLIP((b+5*H) >> 5);
816  src[6] = CLIP((b+6*H) >> 5);
817  src[7] = CLIP((b+7*H) >> 5);
818  src += stride;
819  }
820 }
821 
822 #define SRC(x,y) src[(x)+(y)*stride]
823 #define PL(y) \
824  const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
825 #define PREDICT_8x8_LOAD_LEFT \
826  const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
827  + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
828  PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
829  const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
830 
831 #define PT(x) \
832  const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
833 #define PREDICT_8x8_LOAD_TOP \
834  const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
835  + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
836  PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
837  const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
838  + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
839 
840 #define PTR(x) \
841  t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
842 #define PREDICT_8x8_LOAD_TOPRIGHT \
843  int t8, t9, t10, t11, t12, t13, t14, t15; \
844  if(has_topright) { \
845  PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
846  t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
847  } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
848 
849 #define PREDICT_8x8_LOAD_TOPLEFT \
850  const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
851 
852 #define PREDICT_8x8_DC(v) \
853  int y; \
854  for( y = 0; y < 8; y++ ) { \
855  AV_WN4PA(((pixel4*)src)+0, v); \
856  AV_WN4PA(((pixel4*)src)+1, v); \
857  src += stride; \
858  }
859 
860 static void FUNCC(pred8x8l_128_dc)(uint8_t *_src, int has_topleft,
861  int has_topright, ptrdiff_t _stride)
862 {
863  pixel *src = (pixel*)_src;
864  int stride = _stride>>(sizeof(pixel)-1);
865 
867 }
868 static void FUNCC(pred8x8l_left_dc)(uint8_t *_src, int has_topleft,
869  int has_topright, ptrdiff_t _stride)
870 {
871  pixel *src = (pixel*)_src;
872  int stride = _stride>>(sizeof(pixel)-1);
873 
875  const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
877 }
878 static void FUNCC(pred8x8l_top_dc)(uint8_t *_src, int has_topleft,
879  int has_topright, ptrdiff_t _stride)
880 {
881  pixel *src = (pixel*)_src;
882  int stride = _stride>>(sizeof(pixel)-1);
883 
885  const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
887 }
888 static void FUNCC(pred8x8l_dc)(uint8_t *_src, int has_topleft,
889  int has_topright, ptrdiff_t _stride)
890 {
891  pixel *src = (pixel*)_src;
892  int stride = _stride>>(sizeof(pixel)-1);
893 
896  const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
897  +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
899 }
900 static void FUNCC(pred8x8l_horizontal)(uint8_t *_src, int has_topleft,
901  int has_topright, ptrdiff_t _stride)
902 {
903  pixel *src = (pixel*)_src;
904  int stride = _stride>>(sizeof(pixel)-1);
905  pixel4 a;
906 
908 #define ROW(y) a = PIXEL_SPLAT_X4(l##y); \
909  AV_WN4PA(src+y*stride, a); \
910  AV_WN4PA(src+y*stride+4, a);
911  ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
912 #undef ROW
913 }
914 static void FUNCC(pred8x8l_vertical)(uint8_t *_src, int has_topleft,
915  int has_topright, ptrdiff_t _stride)
916 {
917  int y;
918  pixel *src = (pixel*)_src;
919  int stride = _stride>>(sizeof(pixel)-1);
920  pixel4 a, b;
921 
923  src[0] = t0;
924  src[1] = t1;
925  src[2] = t2;
926  src[3] = t3;
927  src[4] = t4;
928  src[5] = t5;
929  src[6] = t6;
930  src[7] = t7;
931  a = AV_RN4PA(((pixel4*)src)+0);
932  b = AV_RN4PA(((pixel4*)src)+1);
933  for( y = 1; y < 8; y++ ) {
934  AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
935  AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
936  }
937 }
938 static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft,
939  int has_topright, ptrdiff_t _stride)
940 {
941  pixel *src = (pixel*)_src;
942  int stride = _stride>>(sizeof(pixel)-1);
945  SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
946  SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
947  SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
948  SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
949  SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
950  SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
951  SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
952  SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
953  SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
954  SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
955  SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
956  SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
957  SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
958  SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
959  SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
960 }
961 static void FUNCC(pred8x8l_down_right)(uint8_t *_src, int has_topleft,
962  int has_topright, ptrdiff_t _stride)
963 {
964  pixel *src = (pixel*)_src;
965  int stride = _stride>>(sizeof(pixel)-1);
969  SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
970  SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
971  SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
972  SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
973  SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
974  SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
975  SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
976  SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
977  SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
978  SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
979  SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
980  SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
981  SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
982  SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
983  SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
984 }
985 static void FUNCC(pred8x8l_vertical_right)(uint8_t *_src, int has_topleft,
986  int has_topright, ptrdiff_t _stride)
987 {
988  pixel *src = (pixel*)_src;
989  int stride = _stride>>(sizeof(pixel)-1);
993  SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
994  SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
995  SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
996  SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
997  SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
998  SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
999  SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
1000  SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
1001  SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
1002  SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
1003  SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
1004  SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
1005  SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
1006  SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
1007  SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
1008  SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
1009  SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
1010  SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
1011  SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
1012  SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
1013  SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1014  SRC(7,0)= (t6 + t7 + 1) >> 1;
1015 }
1016 static void FUNCC(pred8x8l_horizontal_down)(uint8_t *_src, int has_topleft,
1017  int has_topright, ptrdiff_t _stride)
1018 {
1019  pixel *src = (pixel*)_src;
1020  int stride = _stride>>(sizeof(pixel)-1);
1024  SRC(0,7)= (l6 + l7 + 1) >> 1;
1025  SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
1026  SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
1027  SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
1028  SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
1029  SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
1030  SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
1031  SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
1032  SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
1033  SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
1034  SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
1035  SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
1036  SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
1037  SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
1038  SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
1039  SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
1040  SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
1041  SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
1042  SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
1043  SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
1044  SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
1045  SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
1046 }
1047 static void FUNCC(pred8x8l_vertical_left)(uint8_t *_src, int has_topleft,
1048  int has_topright, ptrdiff_t _stride)
1049 {
1050  pixel *src = (pixel*)_src;
1051  int stride = _stride>>(sizeof(pixel)-1);
1054  SRC(0,0)= (t0 + t1 + 1) >> 1;
1055  SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
1056  SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
1057  SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
1058  SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
1059  SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
1060  SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
1061  SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
1062  SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
1063  SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
1064  SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
1065  SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
1066  SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
1067  SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
1068  SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
1069  SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
1070  SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
1071  SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
1072  SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
1073  SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
1074  SRC(7,6)= (t10 + t11 + 1) >> 1;
1075  SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
1076 }
1077 static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft,
1078  int has_topright, ptrdiff_t _stride)
1079 {
1080  pixel *src = (pixel*)_src;
1081  int stride = _stride>>(sizeof(pixel)-1);
1083  SRC(0,0)= (l0 + l1 + 1) >> 1;
1084  SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
1085  SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
1086  SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
1087  SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
1088  SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
1089  SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
1090  SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
1091  SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
1092  SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
1093  SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
1094  SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
1095  SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
1096  SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
1097  SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
1098  SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
1099  SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
1100  SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
1101 }
1102 
1103 static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1104  int has_topright, ptrdiff_t _stride)
1105 {
1106  int i;
1107  pixel *src = (pixel*)_src;
1108  const dctcoef *block = (const dctcoef*)_block;
1109  pixel pix[8];
1110  int stride = _stride>>(sizeof(pixel)-1);
1112 
1113  pix[0] = t0;
1114  pix[1] = t1;
1115  pix[2] = t2;
1116  pix[3] = t3;
1117  pix[4] = t4;
1118  pix[5] = t5;
1119  pix[6] = t6;
1120  pix[7] = t7;
1121 
1122  for(i=0; i<8; i++){
1123  pixel v = pix[i];
1124  src[0*stride]= v += block[0];
1125  src[1*stride]= v += block[8];
1126  src[2*stride]= v += block[16];
1127  src[3*stride]= v += block[24];
1128  src[4*stride]= v += block[32];
1129  src[5*stride]= v += block[40];
1130  src[6*stride]= v += block[48];
1131  src[7*stride]= v + block[56];
1132  src++;
1133  block++;
1134  }
1135 
1136  memset(_block, 0, sizeof(dctcoef) * 64);
1137 }
1138 
1139 static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft,
1140  int has_topright, ptrdiff_t _stride)
1141 {
1142  int i;
1143  pixel *src = (pixel*)_src;
1144  const dctcoef *block = (const dctcoef*)_block;
1145  pixel pix[8];
1146  int stride = _stride>>(sizeof(pixel)-1);
1148 
1149  pix[0] = l0;
1150  pix[1] = l1;
1151  pix[2] = l2;
1152  pix[3] = l3;
1153  pix[4] = l4;
1154  pix[5] = l5;
1155  pix[6] = l6;
1156  pix[7] = l7;
1157 
1158  for(i=0; i<8; i++){
1159  pixel v = pix[i];
1160  src[0]= v += block[0];
1161  src[1]= v += block[1];
1162  src[2]= v += block[2];
1163  src[3]= v += block[3];
1164  src[4]= v += block[4];
1165  src[5]= v += block[5];
1166  src[6]= v += block[6];
1167  src[7]= v + block[7];
1168  src+= stride;
1169  block+= 8;
1170  }
1171 
1172  memset(_block, 0, sizeof(dctcoef) * 64);
1173 }
1174 
1175 #undef PREDICT_8x8_LOAD_LEFT
1176 #undef PREDICT_8x8_LOAD_TOP
1177 #undef PREDICT_8x8_LOAD_TOPLEFT
1178 #undef PREDICT_8x8_LOAD_TOPRIGHT
1179 #undef PREDICT_8x8_DC
1180 #undef PTR
1181 #undef PT
1182 #undef PL
1183 #undef SRC
1184 
1185 static void FUNCC(pred4x4_vertical_add)(uint8_t *_pix, int16_t *_block,
1186  ptrdiff_t stride)
1187 {
1188  int i;
1189  pixel *pix = (pixel*)_pix;
1190  const dctcoef *block = (const dctcoef*)_block;
1191  stride >>= sizeof(pixel)-1;
1192  pix -= stride;
1193  for(i=0; i<4; i++){
1194  pixel v = pix[0];
1195  pix[1*stride]= v += block[0];
1196  pix[2*stride]= v += block[4];
1197  pix[3*stride]= v += block[8];
1198  pix[4*stride]= v + block[12];
1199  pix++;
1200  block++;
1201  }
1202 
1203  memset(_block, 0, sizeof(dctcoef) * 16);
1204 }
1205 
1206 static void FUNCC(pred4x4_horizontal_add)(uint8_t *_pix, int16_t *_block,
1207  ptrdiff_t stride)
1208 {
1209  int i;
1210  pixel *pix = (pixel*)_pix;
1211  const dctcoef *block = (const dctcoef*)_block;
1212  stride >>= sizeof(pixel)-1;
1213  for(i=0; i<4; i++){
1214  pixel v = pix[-1];
1215  pix[0]= v += block[0];
1216  pix[1]= v += block[1];
1217  pix[2]= v += block[2];
1218  pix[3]= v + block[3];
1219  pix+= stride;
1220  block+= 4;
1221  }
1222 
1223  memset(_block, 0, sizeof(dctcoef) * 16);
1224 }
1225 
1226 static void FUNCC(pred8x8l_vertical_add)(uint8_t *_pix, int16_t *_block,
1227  ptrdiff_t stride)
1228 {
1229  int i;
1230  pixel *pix = (pixel*)_pix;
1231  const dctcoef *block = (const dctcoef*)_block;
1232  stride >>= sizeof(pixel)-1;
1233  pix -= stride;
1234  for(i=0; i<8; i++){
1235  pixel v = pix[0];
1236  pix[1*stride]= v += block[0];
1237  pix[2*stride]= v += block[8];
1238  pix[3*stride]= v += block[16];
1239  pix[4*stride]= v += block[24];
1240  pix[5*stride]= v += block[32];
1241  pix[6*stride]= v += block[40];
1242  pix[7*stride]= v += block[48];
1243  pix[8*stride]= v + block[56];
1244  pix++;
1245  block++;
1246  }
1247 
1248  memset(_block, 0, sizeof(dctcoef) * 64);
1249 }
1250 
1251 static void FUNCC(pred8x8l_horizontal_add)(uint8_t *_pix, int16_t *_block,
1252  ptrdiff_t stride)
1253 {
1254  int i;
1255  pixel *pix = (pixel*)_pix;
1256  const dctcoef *block = (const dctcoef*)_block;
1257  stride >>= sizeof(pixel)-1;
1258  for(i=0; i<8; i++){
1259  pixel v = pix[-1];
1260  pix[0]= v += block[0];
1261  pix[1]= v += block[1];
1262  pix[2]= v += block[2];
1263  pix[3]= v += block[3];
1264  pix[4]= v += block[4];
1265  pix[5]= v += block[5];
1266  pix[6]= v += block[6];
1267  pix[7]= v + block[7];
1268  pix+= stride;
1269  block+= 8;
1270  }
1271 
1272  memset(_block, 0, sizeof(dctcoef) * 64);
1273 }
1274 
1275 static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset,
1276  int16_t *block,
1277  ptrdiff_t stride)
1278 {
1279  int i;
1280  for(i=0; i<16; i++)
1281  FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1282 }
1283 
1284 static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix,
1285  const int *block_offset,
1286  int16_t *block,
1287  ptrdiff_t stride)
1288 {
1289  int i;
1290  for(i=0; i<16; i++)
1291  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1292 }
1293 
1294 static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset,
1295  int16_t *block, ptrdiff_t stride)
1296 {
1297  int i;
1298  for(i=0; i<4; i++)
1299  FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1300 }
1301 
1302 static void FUNCC(pred8x16_vertical_add)(uint8_t *pix, const int *block_offset,
1303  int16_t *block, ptrdiff_t stride)
1304 {
1305  int i;
1306  for(i=0; i<4; i++)
1307  FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1308  for(i=4; i<8; i++)
1309  FUNCC(pred4x4_vertical_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1310 }
1311 
1312 static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset,
1313  int16_t *block,
1314  ptrdiff_t stride)
1315 {
1316  int i;
1317  for(i=0; i<4; i++)
1318  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1319 }
1320 
1321 static void FUNCC(pred8x16_horizontal_add)(uint8_t *pix,
1322  const int *block_offset,
1323  int16_t *block, ptrdiff_t stride)
1324 {
1325  int i;
1326  for(i=0; i<4; i++)
1327  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
1328  for(i=4; i<8; i++)
1329  FUNCC(pred4x4_horizontal_add)(pix + block_offset[i+4], block + i*16*sizeof(pixel), stride);
1330 }
pred8x8_vertical_add
static void FUNCC() pred8x8_vertical_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1294
pred8x8_mad_cow_dc_0l0
static void FUNC() pred8x8_mad_cow_dc_0l0(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:732
pred8x8_horizontal_add
static void FUNCC() pred8x8_horizontal_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1312
rv40
ptrdiff_t const int const int rv40
Definition: h264pred_template.c:414
pred8x8_plane
static void FUNCC() pred8x8_plane(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:746
pred16x16_horizontal_add
static void FUNCC() pred16x16_horizontal_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1284
pred8x8_mad_cow_dc_0lt
static void FUNC() pred8x8_mad_cow_dc_0lt(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:706
pred16x16_plane
static void FUNCC() pred16x16_plane(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:458
PREDICT_8x8_LOAD_LEFT
#define PREDICT_8x8_LOAD_LEFT
Definition: h264pred_template.c:825
pred8x8_top_dc
static void FUNCC() pred8x8_top_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:573
PREDICT_8x8_LOAD_TOPRIGHT
#define PREDICT_8x8_LOAD_TOPRIGHT
Definition: h264pred_template.c:842
pred8x16_dc
static void FUNCC() pred8x16_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:650
src1
const pixel * src1
Definition: h264pred_template.c:420
pred8x16_mad_cow_dc_0l0
static void FUNC() pred8x16_mad_cow_dc_0l0(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:739
pred8x16_horizontal_add
static void FUNCC() pred8x16_horizontal_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1321
pred8x8l_top_dc
static void FUNCC() pred8x8l_top_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:878
b
#define b
Definition: input.c:41
_stride
ptrdiff_t _stride
Definition: h264pred_template.c:411
svq3
ptrdiff_t const int svq3
Definition: h264pred_template.c:412
LOAD_TOP_EDGE
#define LOAD_TOP_EDGE
Definition: h264pred_template.c:133
pred16x16_left_dc
static void FUNCC() pred16x16_left_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:365
pred8x8l_vertical_filter_add
static void FUNCC() pred8x8l_vertical_filter_add(uint8_t *_src, int16_t *_block, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1103
PREDICT_16x16_DC
#define PREDICT_16x16_DC(v)
Definition: h264pred_template.c:337
_src
uint8_t ptrdiff_t const uint8_t * _src
Definition: dsp.h:52
pixel4
#define pixel4
Definition: bit_depth_template.c:81
LOAD_LEFT_EDGE
#define LOAD_LEFT_EDGE
Definition: h264pred_template.c:127
dctcoef
#define dctcoef
Definition: bit_depth_template.c:82
pred4x4_left_dc
static void FUNCC() pred4x4_left_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:73
pred4x4_horizontal_up
static void FUNCC() pred4x4_horizontal_up(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:250
pred4x4_top_dc
static void FUNCC() pred4x4_top_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:87
pred4x4_down_right
static void FUNCC() pred4x4_down_right(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:139
pred8x8_vertical
static void FUNCC() pred8x8_vertical(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:463
t15
static int t15(InterplayACMContext *s, unsigned ind, unsigned col)
Definition: interplayacm.c:339
pred16x16_top_dc
static void FUNCC() pred16x16_top_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:380
pred8x8l_left_dc
static void FUNCC() pred8x8l_left_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:868
intreadwrite.h
pred8x16_mad_cow_dc_l00
static void FUNC() pred8x16_mad_cow_dc_l00(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:725
a
int a
Definition: h264pred_template.c:416
AV_WN4PA
#define AV_WN4PA
Definition: bit_depth_template.c:92
pred4x4_horizontal
static void FUNCC() pred4x4_horizontal(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:47
pred8x8_mad_cow_dc_l0t
static void FUNC() pred8x8_mad_cow_dc_l0t(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:694
pred8x16_vertical_add
static void FUNCC() pred8x16_vertical_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1302
PIXEL_SPLAT_X4
#define PIXEL_SPLAT_X4(x)
Definition: bit_depth_template.c:93
PREDICT_8x8_DC
#define PREDICT_8x8_DC(v)
Definition: h264pred_template.c:852
pred4x4_horizontal_add
static void FUNCC() pred4x4_horizontal_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1206
pred8x16_mad_cow_dc_0lt
static void FUNC() pred8x16_mad_cow_dc_0lt(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:712
LOAD_TOP_RIGHT_EDGE
#define LOAD_TOP_RIGHT_EDGE
Definition: h264pred_template.c:115
pred8x8l_horizontal_add
static void FUNCC() pred8x8l_horizontal_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1251
NULL
#define NULL
Definition: coverity.c:32
pred8x8_dc
static void FUNCC() pred8x8_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:621
pixel
uint8_t pixel
Definition: tiny_ssim.c:41
pred8x8l_horizontal_up
static void FUNCC() pred8x8l_horizontal_up(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1077
pred8x8l_down_right
static void FUNCC() pred8x8l_down_right(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:961
pred16x16_horizontal
static void FUNCC() pred16x16_horizontal(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:321
pred4x4_dc
static void FUNCC() pred4x4_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:58
mathops.h
bit_depth_template.c
pred8x8_mad_cow_dc_l00
static void FUNC() pred8x8_mad_cow_dc_l00(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:718
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
pred4x4_horizontal_down
static void FUNCC() pred4x4_horizontal_down(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:275
FUNCC
#define FUNCC(a)
Definition: bit_depth_template.c:102
pred16x16_vertical_add
static void FUNCC() pred16x16_vertical_add(uint8_t *pix, const int *block_offset, int16_t *block, ptrdiff_t stride)
Definition: h264pred_template.c:1275
pred8x8_horizontal
static void FUNCC() pred8x8_horizontal(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:491
pred8x8_left_dc
static void FUNCC() pred8x8_left_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:541
pred8x8l_horizontal
static void FUNCC() pred8x8l_horizontal(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:900
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
PREDICT_8x8_LOAD_TOP
#define PREDICT_8x8_LOAD_TOP
Definition: h264pred_template.c:833
AV_RN4PA
#define AV_RN4PA
Definition: bit_depth_template.c:89
PRED8x8_X
#define PRED8x8_X(n, v)
Definition: h264pred_template.c:516
pred8x8l_down_left
static void FUNCC() pred8x8l_down_left(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:938
SRC
#define SRC(x, y)
Definition: h264pred_template.c:822
pred8x16_plane
static void FUNCC() pred8x16_plane(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:781
pred4x4_down_left
static void FUNCC() pred4x4_down_left(uint8_t *_src, const uint8_t *_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:166
ROW
#define ROW(y)
pred8x8l_dc
static void FUNCC() pred8x8l_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:888
pred8x8l_vertical_right
static void FUNCC() pred8x8l_vertical_right(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:985
pred4x4_vertical_right
static void FUNCC() pred4x4_vertical_right(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:194
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
pred8x16_top_dc
static void FUNCC() pred8x16_top_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:599
src2
const pixel * src2
Definition: h264pred_template.c:421
pred8x16_left_dc
static void FUNCC() pred8x16_left_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:567
PRED16x16_X
#define PRED16x16_X(n, v)
Definition: h264pred_template.c:395
pred8x8_128_dc
FUNCC() pred8x8_128_dc(_src+8 *stride, stride)
pred16x16_vertical
static void FUNCC() pred16x16_vertical(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:303
pred4x4_128_dc
static void FUNCC() pred4x4_128_dc(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:101
stride
int stride
Definition: h264pred_template.c:418
pred8x16_mad_cow_dc_l0t
static void FUNC() pred8x16_mad_cow_dc_l0t(uint8_t *src, ptrdiff_t stride)
Definition: h264pred_template.c:700
FUNC
#define FUNC(a)
Definition: bit_depth_template.c:101
BIT_DEPTH
#define BIT_DEPTH
Definition: dsp_init.c:55
pred8x8l_vertical_add
static void FUNCC() pred8x8l_vertical_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1226
pred8x8l_horizontal_filter_add
static void FUNCC() pred8x8l_horizontal_filter_add(uint8_t *_src, int16_t *_block, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1139
pred8x8l_128_dc
static void FUNCC() pred8x8l_128_dc(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:860
pred4x4_vertical
static void FUNCC() pred4x4_vertical(uint8_t *_src, const uint8_t *topright, ptrdiff_t _stride)
Definition: h264pred_template.c:34
pred16x16_dc
static void FUNCC() pred16x16_dc(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:346
pred4x4_vertical_left
static void FUNCC() pred4x4_vertical_left(uint8_t *_src, const uint8_t *_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:222
src0
const pixel *const src0
Definition: h264pred_template.c:419
H
int H
Definition: h264pred_template.c:422
src
pixel * src
Definition: h264pred_template.c:417
CLIP
@ CLIP
Definition: qdrw.c:37
pred8x8l_horizontal_down
static void FUNCC() pred8x8l_horizontal_down(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1016
pred8x8l_vertical_left
static void FUNCC() pred8x8l_vertical_left(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:1047
pred8x8l_vertical
static void FUNCC() pred8x8l_vertical(uint8_t *_src, int has_topleft, int has_topright, ptrdiff_t _stride)
Definition: h264pred_template.c:914
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
pred8x16_vertical
static void FUNCC() pred8x16_vertical(uint8_t *_src, ptrdiff_t _stride)
Definition: h264pred_template.c:477
pred8x16_horizontal
static void FUNCC() pred8x16_horizontal(uint8_t *_src, ptrdiff_t stride)
Definition: h264pred_template.c:504
PREDICT_8x8_LOAD_TOPLEFT
#define PREDICT_8x8_LOAD_TOPLEFT
Definition: h264pred_template.c:849
V
int V
Definition: h264pred_template.c:423
pred4x4_vertical_add
static void FUNCC() pred4x4_vertical_add(uint8_t *_pix, int16_t *_block, ptrdiff_t stride)
Definition: h264pred_template.c:1185