89 ptrdiff_t line_size,
int h);
91 ptrdiff_t line_size,
int h);
93 int dstStride,
int src1Stride,
int h);
96 int src1Stride,
int h);
98 int dstStride,
int src1Stride,
int h);
100 ptrdiff_t line_size,
int h);
102 ptrdiff_t line_size,
int h);
104 int dstStride,
int src1Stride,
int h);
106 int dstStride,
int src1Stride,
int h);
108 int dstStride,
int src1Stride,
int h);
110 ptrdiff_t line_size,
int h);
112 ptrdiff_t line_size,
int h);
113 void ff_put_no_rnd_pixels8_x2_exact_mmxext(
uint8_t *
block,
115 ptrdiff_t line_size,
int h);
116 void ff_put_no_rnd_pixels8_x2_exact_3dnow(
uint8_t *
block,
118 ptrdiff_t line_size,
int h);
120 ptrdiff_t line_size,
int h);
122 ptrdiff_t line_size,
int h);
124 ptrdiff_t line_size,
int h);
126 ptrdiff_t line_size,
int h);
127 void ff_put_no_rnd_pixels8_y2_exact_mmxext(
uint8_t *
block,
129 ptrdiff_t line_size,
int h);
130 void ff_put_no_rnd_pixels8_y2_exact_3dnow(
uint8_t *
block,
132 ptrdiff_t line_size,
int h);
134 ptrdiff_t line_size,
int h);
136 ptrdiff_t line_size,
int h);
138 ptrdiff_t line_size,
int h);
140 ptrdiff_t line_size,
int h);
142 ptrdiff_t line_size,
int h);
144 ptrdiff_t line_size,
int h);
146 ptrdiff_t line_size,
int h);
148 void ff_put_pixels8_mmxext(
uint8_t *
block,
const uint8_t *pixels, ptrdiff_t line_size,
int h);
150 ptrdiff_t line_size,
int h)
152 ff_put_pixels8_mmxext(block, pixels, line_size, h);
153 ff_put_pixels8_mmxext(block + 8, pixels + 8, line_size, h);
157 int dstStride,
int srcStride,
int h);
159 int dstStride,
int srcStride,
int h);
161 int dstStride,
int srcStride,
164 int dstStride,
int srcStride,
int h);
166 int dstStride,
int srcStride,
int h);
168 int dstStride,
int srcStride,
171 int dstStride,
int srcStride);
173 int dstStride,
int srcStride);
175 int dstStride,
int srcStride);
177 int dstStride,
int srcStride);
179 int dstStride,
int srcStride);
181 int dstStride,
int srcStride);
182 #define ff_put_no_rnd_pixels16_mmxext ff_put_pixels16_mmxext
183 #define ff_put_no_rnd_pixels8_mmxext ff_put_pixels8_mmxext
189 #define JUMPALIGN() __asm__ volatile (".p2align 3"::)
190 #define MOVQ_ZERO(regd) __asm__ volatile ("pxor %%"#regd", %%"#regd ::)
192 #define MOVQ_BFE(regd) \
194 "pcmpeqd %%"#regd", %%"#regd" \n\t" \
195 "paddb %%"#regd", %%"#regd" \n\t" ::)
198 #define MOVQ_BONE(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_bone))
199 #define MOVQ_WTWO(regd) __asm__ volatile ("movq %0, %%"#regd" \n\t" :: "m"(ff_wtwo))
203 #define MOVQ_BONE(regd) \
205 "pcmpeqd %%"#regd", %%"#regd" \n\t" \
206 "psrlw $15, %%"#regd" \n\t" \
207 "packuswb %%"#regd", %%"#regd" \n\t" ::)
209 #define MOVQ_WTWO(regd) \
211 "pcmpeqd %%"#regd", %%"#regd" \n\t" \
212 "psrlw $15, %%"#regd" \n\t" \
213 "psllw $1, %%"#regd" \n\t"::)
220 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
221 "movq "#rega", "#regr" \n\t" \
222 "pand "#regb", "#regr" \n\t" \
223 "pxor "#rega", "#regb" \n\t" \
224 "pand "#regfe", "#regb" \n\t" \
225 "psrlq $1, "#regb" \n\t" \
226 "paddb "#regb", "#regr" \n\t"
228 #define PAVGB_MMX(rega, regb, regr, regfe) \
229 "movq "#rega", "#regr" \n\t" \
230 "por "#regb", "#regr" \n\t" \
231 "pxor "#rega", "#regb" \n\t" \
232 "pand "#regfe", "#regb" \n\t" \
233 "psrlq $1, "#regb" \n\t" \
234 "psubb "#regb", "#regr" \n\t"
237 #define PAVGBP_MMX_NO_RND(rega, regb, regr, regc, regd, regp) \
238 "movq "#rega", "#regr" \n\t" \
239 "movq "#regc", "#regp" \n\t" \
240 "pand "#regb", "#regr" \n\t" \
241 "pand "#regd", "#regp" \n\t" \
242 "pxor "#rega", "#regb" \n\t" \
243 "pxor "#regc", "#regd" \n\t" \
244 "pand %%mm6, "#regb" \n\t" \
245 "pand %%mm6, "#regd" \n\t" \
246 "psrlq $1, "#regb" \n\t" \
247 "psrlq $1, "#regd" \n\t" \
248 "paddb "#regb", "#regr" \n\t" \
249 "paddb "#regd", "#regp" \n\t"
251 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
252 "movq "#rega", "#regr" \n\t" \
253 "movq "#regc", "#regp" \n\t" \
254 "por "#regb", "#regr" \n\t" \
255 "por "#regd", "#regp" \n\t" \
256 "pxor "#rega", "#regb" \n\t" \
257 "pxor "#regc", "#regd" \n\t" \
258 "pand %%mm6, "#regb" \n\t" \
259 "pand %%mm6, "#regd" \n\t" \
260 "psrlq $1, "#regd" \n\t" \
261 "psrlq $1, "#regb" \n\t" \
262 "psubb "#regb", "#regr" \n\t" \
263 "psubb "#regd", "#regp" \n\t"
268 #define DEF(x, y) x ## _no_rnd_ ## y ## _mmx
269 #define SET_RND MOVQ_WONE
270 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
271 #define PAVGB(a, b, c, e) PAVGB_MMX_NO_RND(a, b, c, e)
272 #define OP_AVG(a, b, c, e) PAVGB_MMX(a, b, c, e)
284 #define DEF(x, y) x ## _ ## y ## _mmx
285 #define SET_RND MOVQ_WTWO
286 #define PAVGBP(a, b, c, d, e, f) PAVGBP_MMX(a, b, c, d, e, f)
287 #define PAVGB(a, b, c, e) PAVGB_MMX(a, b, c, e)
305 #define DEF(x) x ## _3dnow
314 #define DEF(x) x ## _mmxext
324 #define put_no_rnd_pixels16_mmx put_pixels16_mmx
325 #define put_no_rnd_pixels8_mmx put_pixels8_mmx
341 "movq (%3), %%mm0 \n\t"
342 "movq 8(%3), %%mm1 \n\t"
343 "movq 16(%3), %%mm2 \n\t"
344 "movq 24(%3), %%mm3 \n\t"
345 "movq 32(%3), %%mm4 \n\t"
346 "movq 40(%3), %%mm5 \n\t"
347 "movq 48(%3), %%mm6 \n\t"
348 "movq 56(%3), %%mm7 \n\t"
349 "packuswb %%mm1, %%mm0 \n\t"
350 "packuswb %%mm3, %%mm2 \n\t"
351 "packuswb %%mm5, %%mm4 \n\t"
352 "packuswb %%mm7, %%mm6 \n\t"
353 "movq %%mm0, (%0) \n\t"
354 "movq %%mm2, (%0, %1) \n\t"
355 "movq %%mm4, (%0, %1, 2) \n\t"
356 "movq %%mm6, (%0, %2) \n\t"
357 ::
"r"(pix),
"r"((
x86_reg)line_size),
"r"((
x86_reg)line_size * 3),
360 pix += line_size * 4;
367 "movq (%3), %%mm0 \n\t"
368 "movq 8(%3), %%mm1 \n\t"
369 "movq 16(%3), %%mm2 \n\t"
370 "movq 24(%3), %%mm3 \n\t"
371 "movq 32(%3), %%mm4 \n\t"
372 "movq 40(%3), %%mm5 \n\t"
373 "movq 48(%3), %%mm6 \n\t"
374 "movq 56(%3), %%mm7 \n\t"
375 "packuswb %%mm1, %%mm0 \n\t"
376 "packuswb %%mm3, %%mm2 \n\t"
377 "packuswb %%mm5, %%mm4 \n\t"
378 "packuswb %%mm7, %%mm6 \n\t"
379 "movq %%mm0, (%0) \n\t"
380 "movq %%mm2, (%0, %1) \n\t"
381 "movq %%mm4, (%0, %1, 2) \n\t"
382 "movq %%mm6, (%0, %2) \n\t"
383 ::
"r"(pix),
"r"((
x86_reg)line_size),
"r"((
x86_reg)line_size * 3),
"r"(p)
387 #define put_signed_pixels_clamped_mmx_half(off) \
388 "movq "#off"(%2), %%mm1 \n\t" \
389 "movq 16 + "#off"(%2), %%mm2 \n\t" \
390 "movq 32 + "#off"(%2), %%mm3 \n\t" \
391 "movq 48 + "#off"(%2), %%mm4 \n\t" \
392 "packsswb 8 + "#off"(%2), %%mm1 \n\t" \
393 "packsswb 24 + "#off"(%2), %%mm2 \n\t" \
394 "packsswb 40 + "#off"(%2), %%mm3 \n\t" \
395 "packsswb 56 + "#off"(%2), %%mm4 \n\t" \
396 "paddb %%mm0, %%mm1 \n\t" \
397 "paddb %%mm0, %%mm2 \n\t" \
398 "paddb %%mm0, %%mm3 \n\t" \
399 "paddb %%mm0, %%mm4 \n\t" \
400 "movq %%mm1, (%0) \n\t" \
401 "movq %%mm2, (%0, %3) \n\t" \
402 "movq %%mm3, (%0, %3, 2) \n\t" \
403 "movq %%mm4, (%0, %1) \n\t"
413 "lea (%3, %3, 2), %1 \n\t"
414 put_signed_pixels_clamped_mmx_half(0)
415 "lea (%0, %3, 4), %0 \n\t"
416 put_signed_pixels_clamped_mmx_half(64)
417 :
"+&r"(pixels),
"=&r"(line_skip3)
418 :
"r"(block),
"r"(line_skip)
436 "movq (%2), %%mm0 \n\t"
437 "movq 8(%2), %%mm1 \n\t"
438 "movq 16(%2), %%mm2 \n\t"
439 "movq 24(%2), %%mm3 \n\t"
440 "movq %0, %%mm4 \n\t"
441 "movq %1, %%mm6 \n\t"
442 "movq %%mm4, %%mm5 \n\t"
443 "punpcklbw %%mm7, %%mm4 \n\t"
444 "punpckhbw %%mm7, %%mm5 \n\t"
445 "paddsw %%mm4, %%mm0 \n\t"
446 "paddsw %%mm5, %%mm1 \n\t"
447 "movq %%mm6, %%mm5 \n\t"
448 "punpcklbw %%mm7, %%mm6 \n\t"
449 "punpckhbw %%mm7, %%mm5 \n\t"
450 "paddsw %%mm6, %%mm2 \n\t"
451 "paddsw %%mm5, %%mm3 \n\t"
452 "packuswb %%mm1, %%mm0 \n\t"
453 "packuswb %%mm3, %%mm2 \n\t"
454 "movq %%mm0, %0 \n\t"
455 "movq %%mm2, %1 \n\t"
456 :
"+m"(*pix),
"+m"(*(pix + line_size))
459 pix += line_size * 2;
464 static void put_pixels8_mmx(
uint8_t *block,
const uint8_t *pixels,
465 ptrdiff_t line_size,
int h)
468 "lea (%3, %3), %%"REG_a
" \n\t"
471 "movq (%1 ), %%mm0 \n\t"
472 "movq (%1, %3), %%mm1 \n\t"
473 "movq %%mm0, (%2) \n\t"
474 "movq %%mm1, (%2, %3) \n\t"
475 "add %%"REG_a
", %1 \n\t"
476 "add %%"REG_a
", %2 \n\t"
477 "movq (%1 ), %%mm0 \n\t"
478 "movq (%1, %3), %%mm1 \n\t"
479 "movq %%mm0, (%2) \n\t"
480 "movq %%mm1, (%2, %3) \n\t"
481 "add %%"REG_a
", %1 \n\t"
482 "add %%"REG_a
", %2 \n\t"
485 :
"+g"(h),
"+r"(pixels),
"+r"(
block)
491 static void put_pixels16_mmx(
uint8_t *block,
const uint8_t *pixels,
492 ptrdiff_t line_size,
int h)
495 "lea (%3, %3), %%"REG_a
" \n\t"
498 "movq (%1 ), %%mm0 \n\t"
499 "movq 8(%1 ), %%mm4 \n\t"
500 "movq (%1, %3), %%mm1 \n\t"
501 "movq 8(%1, %3), %%mm5 \n\t"
502 "movq %%mm0, (%2) \n\t"
503 "movq %%mm4, 8(%2) \n\t"
504 "movq %%mm1, (%2, %3) \n\t"
505 "movq %%mm5, 8(%2, %3) \n\t"
506 "add %%"REG_a
", %1 \n\t"
507 "add %%"REG_a
", %2 \n\t"
508 "movq (%1 ), %%mm0 \n\t"
509 "movq 8(%1 ), %%mm4 \n\t"
510 "movq (%1, %3), %%mm1 \n\t"
511 "movq 8(%1, %3), %%mm5 \n\t"
512 "movq %%mm0, (%2) \n\t"
513 "movq %%mm4, 8(%2) \n\t"
514 "movq %%mm1, (%2, %3) \n\t"
515 "movq %%mm5, 8(%2, %3) \n\t"
516 "add %%"REG_a
", %1 \n\t"
517 "add %%"REG_a
", %2 \n\t"
520 :
"+g"(h),
"+r"(pixels),
"+r"(
block)
526 #define CLEAR_BLOCKS(name, n) \
527 static void name(int16_t *blocks) \
530 "pxor %%mm7, %%mm7 \n\t" \
531 "mov %1, %%"REG_a" \n\t" \
533 "movq %%mm7, (%0, %%"REG_a") \n\t" \
534 "movq %%mm7, 8(%0, %%"REG_a") \n\t" \
535 "movq %%mm7, 16(%0, %%"REG_a") \n\t" \
536 "movq %%mm7, 24(%0, %%"REG_a") \n\t" \
537 "add $32, %%"REG_a" \n\t" \
539 :: "r"(((uint8_t *)blocks) + 128 * n), \
544 CLEAR_BLOCKS(clear_blocks_mmx, 6)
545 CLEAR_BLOCKS(clear_block_mmx, 1)
547 static
void clear_block_sse(int16_t *block)
550 "xorps %%xmm0, %%xmm0 \n"
551 "movaps %%xmm0, (%0) \n"
552 "movaps %%xmm0, 16(%0) \n"
553 "movaps %%xmm0, 32(%0) \n"
554 "movaps %%xmm0, 48(%0) \n"
555 "movaps %%xmm0, 64(%0) \n"
556 "movaps %%xmm0, 80(%0) \n"
557 "movaps %%xmm0, 96(%0) \n"
558 "movaps %%xmm0, 112(%0) \n"
564 static void clear_blocks_sse(int16_t *blocks)
567 "xorps %%xmm0, %%xmm0 \n"
568 "mov %1, %%"REG_a
" \n"
570 "movaps %%xmm0, (%0, %%"REG_a
") \n"
571 "movaps %%xmm0, 16(%0, %%"REG_a
") \n"
572 "movaps %%xmm0, 32(%0, %%"REG_a
") \n"
573 "movaps %%xmm0, 48(%0, %%"REG_a
") \n"
574 "movaps %%xmm0, 64(%0, %%"REG_a
") \n"
575 "movaps %%xmm0, 80(%0, %%"REG_a
") \n"
576 "movaps %%xmm0, 96(%0, %%"REG_a
") \n"
577 "movaps %%xmm0, 112(%0, %%"REG_a
") \n"
578 "add $128, %%"REG_a
" \n"
580 ::
"r"(((
uint8_t *)blocks) + 128 * 6),
592 "movq (%1, %0), %%mm0 \n\t"
593 "movq (%2, %0), %%mm1 \n\t"
594 "paddb %%mm0, %%mm1 \n\t"
595 "movq %%mm1, (%2, %0) \n\t"
596 "movq 8(%1, %0), %%mm0 \n\t"
597 "movq 8(%2, %0), %%mm1 \n\t"
598 "paddb %%mm0, %%mm1 \n\t"
599 "movq %%mm1, 8(%2, %0) \n\t"
608 dst[i + 0] += src[i + 0];
612 static void add_hfyu_median_prediction_cmov(
uint8_t *dst,
const uint8_t *top,
614 int *left,
int *left_top)
618 int l = *left & 0xff;
619 int tl = *left_top & 0xff;
624 "movzbl (%3, %4), %2 \n"
637 "add (%6, %4), %b0 \n"
638 "mov %b0, (%5, %4) \n"
641 :
"+&q"(l),
"+&q"(tl),
"=&r"(
t),
"=&q"(x),
"+&r"(w2)
642 :
"r"(dst + w),
"r"(diff + w),
"rm"(top + w)
657 int w,
int h,
int sides)
662 last_line = buf + (height - 1) * wrap;
668 "movd (%0), %%mm0 \n\t"
669 "punpcklbw %%mm0, %%mm0 \n\t"
670 "punpcklwd %%mm0, %%mm0 \n\t"
671 "punpckldq %%mm0, %%mm0 \n\t"
672 "movq %%mm0, -8(%0) \n\t"
673 "movq -8(%0, %2), %%mm1 \n\t"
674 "punpckhbw %%mm1, %%mm1 \n\t"
675 "punpckhwd %%mm1, %%mm1 \n\t"
676 "punpckhdq %%mm1, %%mm1 \n\t"
677 "movq %%mm1, (%0, %2) \n\t"
687 "movd (%0), %%mm0 \n\t"
688 "punpcklbw %%mm0, %%mm0 \n\t"
689 "punpcklwd %%mm0, %%mm0 \n\t"
690 "punpckldq %%mm0, %%mm0 \n\t"
691 "movq %%mm0, -8(%0) \n\t"
692 "movq %%mm0, -16(%0) \n\t"
693 "movq -8(%0, %2), %%mm1 \n\t"
694 "punpckhbw %%mm1, %%mm1 \n\t"
695 "punpckhwd %%mm1, %%mm1 \n\t"
696 "punpckhdq %%mm1, %%mm1 \n\t"
697 "movq %%mm1, (%0, %2) \n\t"
698 "movq %%mm1, 8(%0, %2) \n\t"
709 "movd (%0), %%mm0 \n\t"
710 "punpcklbw %%mm0, %%mm0 \n\t"
711 "punpcklwd %%mm0, %%mm0 \n\t"
712 "movd %%mm0, -4(%0) \n\t"
713 "movd -4(%0, %2), %%mm1 \n\t"
714 "punpcklbw %%mm1, %%mm1 \n\t"
715 "punpckhwd %%mm1, %%mm1 \n\t"
716 "punpckhdq %%mm1, %%mm1 \n\t"
717 "movd %%mm1, (%0, %2) \n\t"
728 for (i = 0; i < h; i += 4) {
729 ptr = buf - (i + 1) * wrap - w;
732 "movq (%1, %0), %%mm0 \n\t"
733 "movq %%mm0, (%0) \n\t"
734 "movq %%mm0, (%0, %2) \n\t"
735 "movq %%mm0, (%0, %2, 2) \n\t"
736 "movq %%mm0, (%0, %3) \n\t"
742 "r"((
x86_reg) -wrap * 3),
"r"(ptr + width + 2 * w)
748 for (i = 0; i < h; i += 4) {
749 ptr = last_line + (i + 1) * wrap - w;
752 "movq (%1, %0), %%mm0 \n\t"
753 "movq %%mm0, (%0) \n\t"
754 "movq %%mm0, (%0, %2) \n\t"
755 "movq %%mm0, (%0, %2, 2) \n\t"
756 "movq %%mm0, (%0, %3) \n\t"
763 "r"(ptr + width + 2 * w)
772 #define QPEL_OP(OPNAME, ROUNDER, RND, MMX) \
773 static void OPNAME ## qpel8_mc00_ ## MMX (uint8_t *dst, uint8_t *src, \
776 ff_ ## OPNAME ## pixels8_ ## MMX(dst, src, stride, 8); \
779 static void OPNAME ## qpel8_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
783 uint8_t * const half = (uint8_t*)temp; \
784 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
786 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
787 stride, stride, 8); \
790 static void OPNAME ## qpel8_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
793 ff_ ## OPNAME ## mpeg4_qpel8_h_lowpass_ ## MMX(dst, src, stride, \
797 static void OPNAME ## qpel8_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
801 uint8_t * const half = (uint8_t*)temp; \
802 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(half, src, 8, \
804 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + 1, half, stride, \
808 static void OPNAME ## qpel8_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
812 uint8_t * const half = (uint8_t*)temp; \
813 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
815 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src, half, \
816 stride, stride, 8); \
819 static void OPNAME ## qpel8_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
822 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, src, \
826 static void OPNAME ## qpel8_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
830 uint8_t * const half = (uint8_t*)temp; \
831 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(half, src, \
833 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, src + stride, half, stride,\
837 static void OPNAME ## qpel8_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
840 uint64_t half[8 + 9]; \
841 uint8_t * const halfH = ((uint8_t*)half) + 64; \
842 uint8_t * const halfHV = ((uint8_t*)half); \
843 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
845 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
847 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
848 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
852 static void OPNAME ## qpel8_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
855 uint64_t half[8 + 9]; \
856 uint8_t * const halfH = ((uint8_t*)half) + 64; \
857 uint8_t * const halfHV = ((uint8_t*)half); \
858 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
860 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
862 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
863 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
867 static void OPNAME ## qpel8_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
870 uint64_t half[8 + 9]; \
871 uint8_t * const halfH = ((uint8_t*)half) + 64; \
872 uint8_t * const halfHV = ((uint8_t*)half); \
873 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
875 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, 8, \
877 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
878 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
882 static void OPNAME ## qpel8_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
885 uint64_t half[8 + 9]; \
886 uint8_t * const halfH = ((uint8_t*)half) + 64; \
887 uint8_t * const halfHV = ((uint8_t*)half); \
888 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
890 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
892 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
893 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
897 static void OPNAME ## qpel8_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
900 uint64_t half[8 + 9]; \
901 uint8_t * const halfH = ((uint8_t*)half) + 64; \
902 uint8_t * const halfHV = ((uint8_t*)half); \
903 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
905 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
906 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH, halfHV, \
910 static void OPNAME ## qpel8_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
913 uint64_t half[8 + 9]; \
914 uint8_t * const halfH = ((uint8_t*)half) + 64; \
915 uint8_t * const halfHV = ((uint8_t*)half); \
916 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
918 ff_put ## RND ## mpeg4_qpel8_v_lowpass_ ## MMX(halfHV, halfH, 8, 8);\
919 ff_ ## OPNAME ## pixels8_l2_ ## MMX(dst, halfH + 8, halfHV, \
923 static void OPNAME ## qpel8_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
926 uint64_t half[8 + 9]; \
927 uint8_t * const halfH = ((uint8_t*)half); \
928 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
930 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src, halfH, \
932 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
936 static void OPNAME ## qpel8_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
939 uint64_t half[8 + 9]; \
940 uint8_t * const halfH = ((uint8_t*)half); \
941 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
943 ff_put ## RND ## pixels8_l2_ ## MMX(halfH, src + 1, halfH, 8, \
945 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
949 static void OPNAME ## qpel8_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
953 uint8_t * const halfH = ((uint8_t*)half); \
954 ff_put ## RND ## mpeg4_qpel8_h_lowpass_ ## MMX(halfH, src, 8, \
956 ff_ ## OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(dst, halfH, \
960 static void OPNAME ## qpel16_mc00_ ## MMX (uint8_t *dst, uint8_t *src, \
963 ff_ ## OPNAME ## pixels16_ ## MMX(dst, src, stride, 16); \
966 static void OPNAME ## qpel16_mc10_ ## MMX(uint8_t *dst, uint8_t *src, \
970 uint8_t * const half = (uint8_t*)temp; \
971 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
973 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
977 static void OPNAME ## qpel16_mc20_ ## MMX(uint8_t *dst, uint8_t *src, \
980 ff_ ## OPNAME ## mpeg4_qpel16_h_lowpass_ ## MMX(dst, src, \
981 stride, stride, 16);\
984 static void OPNAME ## qpel16_mc30_ ## MMX(uint8_t *dst, uint8_t *src, \
988 uint8_t * const half = (uint8_t*)temp; \
989 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(half, src, 16, \
991 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src + 1, half, \
992 stride, stride, 16); \
995 static void OPNAME ## qpel16_mc01_ ## MMX(uint8_t *dst, uint8_t *src, \
999 uint8_t * const half = (uint8_t*)temp; \
1000 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
1002 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src, half, stride, \
1006 static void OPNAME ## qpel16_mc02_ ## MMX(uint8_t *dst, uint8_t *src, \
1009 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, src, \
1013 static void OPNAME ## qpel16_mc03_ ## MMX(uint8_t *dst, uint8_t *src, \
1016 uint64_t temp[32]; \
1017 uint8_t * const half = (uint8_t*)temp; \
1018 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(half, src, 16, \
1020 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, src+stride, half, \
1021 stride, stride, 16); \
1024 static void OPNAME ## qpel16_mc11_ ## MMX(uint8_t *dst, uint8_t *src, \
1027 uint64_t half[16 * 2 + 17 * 2]; \
1028 uint8_t * const halfH = ((uint8_t*)half) + 256; \
1029 uint8_t * const halfHV = ((uint8_t*)half); \
1030 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
1032 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
1034 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
1036 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
1040 static void OPNAME ## qpel16_mc31_ ## MMX(uint8_t *dst, uint8_t *src, \
1043 uint64_t half[16 * 2 + 17 * 2]; \
1044 uint8_t * const halfH = ((uint8_t*)half) + 256; \
1045 uint8_t * const halfHV = ((uint8_t*)half); \
1046 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
1048 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
1050 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
1052 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
1056 static void OPNAME ## qpel16_mc13_ ## MMX(uint8_t *dst, uint8_t *src, \
1059 uint64_t half[16 * 2 + 17 * 2]; \
1060 uint8_t * const halfH = ((uint8_t*)half) + 256; \
1061 uint8_t * const halfHV = ((uint8_t*)half); \
1062 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
1064 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
1066 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
1068 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
1072 static void OPNAME ## qpel16_mc33_ ## MMX(uint8_t *dst, uint8_t *src, \
1075 uint64_t half[16 * 2 + 17 * 2]; \
1076 uint8_t * const halfH = ((uint8_t*)half) + 256; \
1077 uint8_t * const halfHV = ((uint8_t*)half); \
1078 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
1080 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
1082 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
1084 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
1088 static void OPNAME ## qpel16_mc21_ ## MMX(uint8_t *dst, uint8_t *src, \
1091 uint64_t half[16 * 2 + 17 * 2]; \
1092 uint8_t * const halfH = ((uint8_t*)half) + 256; \
1093 uint8_t * const halfHV = ((uint8_t*)half); \
1094 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
1096 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
1098 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH, halfHV, \
1102 static void OPNAME ## qpel16_mc23_ ## MMX(uint8_t *dst, uint8_t *src, \
1105 uint64_t half[16 * 2 + 17 * 2]; \
1106 uint8_t * const halfH = ((uint8_t*)half) + 256; \
1107 uint8_t * const halfHV = ((uint8_t*)half); \
1108 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
1110 ff_put ## RND ## mpeg4_qpel16_v_lowpass_ ## MMX(halfHV, halfH, \
1112 ff_ ## OPNAME ## pixels16_l2_ ## MMX(dst, halfH + 16, halfHV, \
1116 static void OPNAME ## qpel16_mc12_ ## MMX(uint8_t *dst, uint8_t *src, \
1119 uint64_t half[17 * 2]; \
1120 uint8_t * const halfH = ((uint8_t*)half); \
1121 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
1123 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src, halfH, 16, \
1125 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
1129 static void OPNAME ## qpel16_mc32_ ## MMX(uint8_t *dst, uint8_t *src, \
1132 uint64_t half[17 * 2]; \
1133 uint8_t * const halfH = ((uint8_t*)half); \
1134 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
1136 ff_put ## RND ## pixels16_l2_ ## MMX(halfH, src + 1, halfH, 16, \
1138 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
1142 static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, \
1145 uint64_t half[17 * 2]; \
1146 uint8_t * const halfH = ((uint8_t*)half); \
1147 ff_put ## RND ## mpeg4_qpel16_h_lowpass_ ## MMX(halfH, src, 16, \
1149 ff_ ## OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, \
1155 QPEL_OP(put_no_rnd_,
ff_pw_15, _no_rnd_, mmxext)
1162 put_pixels8_xy2_mmx(dst, src, stride, 8);
1166 put_pixels16_xy2_mmx(dst, src, stride, 16);
1170 avg_pixels8_xy2_mmx(dst, src, stride, 8);
1174 avg_pixels16_xy2_mmx(dst, src, stride, 16);
1177 typedef void emulated_edge_mc_func(
uint8_t *dst,
const uint8_t *src,
1178 ptrdiff_t linesize,
int block_w,
int block_h,
1179 int src_x,
int src_y,
int w,
int h);
1182 int stride,
int h,
int ox,
int oy,
1183 int dxx,
int dxy,
int dyx,
int dyy,
1184 int shift,
int r,
int width,
int height,
1185 emulated_edge_mc_func *emu_edge_fn)
1188 const int ix = ox >> (16 +
shift);
1189 const int iy = oy >> (16 +
shift);
1190 const int oxs = ox >> 4;
1191 const int oys = oy >> 4;
1192 const int dxxs = dxx >> 4;
1193 const int dxys = dxy >> 4;
1194 const int dyxs = dyx >> 4;
1195 const int dyys = dyy >> 4;
1196 const uint16_t r4[4] = {
r,
r,
r, r };
1197 const uint16_t dxy4[4] = { dxys, dxys, dxys, dxys };
1198 const uint16_t dyy4[4] = { dyys, dyys, dyys, dyys };
1200 #define MAX_STRIDE 4096U
1202 uint8_t edge_buf[(MAX_H + 1) * MAX_STRIDE];
1205 const int dxw = (dxx - (1 << (16 +
shift))) * (w - 1);
1206 const int dyh = (dyy - (1 << (16 +
shift))) * (h - 1);
1207 const int dxh = dxy * (h - 1);
1208 const int dyw = dyx * (w - 1);
1209 int need_emu = (unsigned)ix >= width - w ||
1210 (
unsigned)iy >= height - h;
1213 ((ox ^ (ox + dxw)) | (ox ^ (ox + dxh)) | (ox ^ (ox + dxw + dxh)) |
1214 (oy ^ (oy + dyw)) | (oy ^ (oy + dyh)) | (oy ^ (oy + dyw + dyh))) >> (16 +
shift)
1216 || (dxx | dxy | dyx | dyy) & 15
1217 || (need_emu && (h > MAX_H ||
stride > MAX_STRIDE))) {
1219 ff_gmc_c(dst, src,
stride, h, ox, oy, dxx, dxy, dyx, dyy,
1220 shift,
r, width, height);
1226 emu_edge_fn(edge_buf, src, stride, w + 1, h + 1, ix, iy, width, height);
1231 "movd %0, %%mm6 \n\t"
1232 "pxor %%mm7, %%mm7 \n\t"
1233 "punpcklwd %%mm6, %%mm6 \n\t"
1234 "punpcklwd %%mm6, %%mm6 \n\t"
1238 for (x = 0; x < w; x += 4) {
1239 uint16_t dx4[4] = { oxs - dxys + dxxs * (x + 0),
1240 oxs - dxys + dxxs * (x + 1),
1241 oxs - dxys + dxxs * (x + 2),
1242 oxs - dxys + dxxs * (x + 3) };
1243 uint16_t dy4[4] = { oys - dyys + dyxs * (x + 0),
1244 oys - dyys + dyxs * (x + 1),
1245 oys - dyys + dyxs * (x + 2),
1246 oys - dyys + dyxs * (x + 3) };
1248 for (
y = 0;
y < h;
y++) {
1250 "movq %0, %%mm4 \n\t"
1251 "movq %1, %%mm5 \n\t"
1252 "paddw %2, %%mm4 \n\t"
1253 "paddw %3, %%mm5 \n\t"
1254 "movq %%mm4, %0 \n\t"
1255 "movq %%mm5, %1 \n\t"
1256 "psrlw $12, %%mm4 \n\t"
1257 "psrlw $12, %%mm5 \n\t"
1258 :
"+m"(*dx4),
"+m"(*dy4)
1259 :
"m"(*dxy4),
"m"(*dyy4)
1263 "movq %%mm6, %%mm2 \n\t"
1264 "movq %%mm6, %%mm1 \n\t"
1265 "psubw %%mm4, %%mm2 \n\t"
1266 "psubw %%mm5, %%mm1 \n\t"
1267 "movq %%mm2, %%mm0 \n\t"
1268 "movq %%mm4, %%mm3 \n\t"
1269 "pmullw %%mm1, %%mm0 \n\t"
1270 "pmullw %%mm5, %%mm3 \n\t"
1271 "pmullw %%mm5, %%mm2 \n\t"
1272 "pmullw %%mm4, %%mm1 \n\t"
1274 "movd %4, %%mm5 \n\t"
1275 "movd %3, %%mm4 \n\t"
1276 "punpcklbw %%mm7, %%mm5 \n\t"
1277 "punpcklbw %%mm7, %%mm4 \n\t"
1278 "pmullw %%mm5, %%mm3 \n\t"
1279 "pmullw %%mm4, %%mm2 \n\t"
1281 "movd %2, %%mm5 \n\t"
1282 "movd %1, %%mm4 \n\t"
1283 "punpcklbw %%mm7, %%mm5 \n\t"
1284 "punpcklbw %%mm7, %%mm4 \n\t"
1285 "pmullw %%mm5, %%mm1 \n\t"
1286 "pmullw %%mm4, %%mm0 \n\t"
1287 "paddw %5, %%mm1 \n\t"
1288 "paddw %%mm3, %%mm2 \n\t"
1289 "paddw %%mm1, %%mm0 \n\t"
1290 "paddw %%mm2, %%mm0 \n\t"
1292 "psrlw %6, %%mm0 \n\t"
1293 "packuswb %%mm0, %%mm0 \n\t"
1294 "movd %%mm0, %0 \n\t"
1297 :
"m"(src[0]),
"m"(src[1]),
1298 "m"(src[stride]),
"m"(src[stride + 1]),
1311 int stride,
int h,
int ox,
int oy,
1312 int dxx,
int dxy,
int dyx,
int dyy,
1313 int shift,
int r,
int width,
int height)
1315 gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
1316 width, height, &ff_emulated_edge_mc_8);
1320 int stride,
int h,
int ox,
int oy,
1321 int dxx,
int dxy,
int dyx,
int dyy,
1322 int shift,
int r,
int width,
int height)
1324 gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
1325 width, height, &ff_emulated_edge_mc_8);
1329 int stride,
int h,
int ox,
int oy,
1330 int dxx,
int dxy,
int dyx,
int dyy,
1331 int shift,
int r,
int width,
int height)
1333 gmc(dst, src, stride, h, ox, oy, dxx, dxy, dyx, dyy, shift, r,
1334 width, height, &ff_emulated_edge_mc_8);
1342 ptrdiff_t line_size,
int h);
1344 ptrdiff_t line_size,
int h);
1351 put_pixels8_mmx(dst, src, stride, 8);
1356 avg_pixels8_mmx(dst, src, stride, 8);
1361 put_pixels16_mmx(dst, src, stride, 16);
1366 avg_pixels16_mmx(dst, src, stride, 16);
1371 int stride,
int rnd)
1373 put_pixels8_mmx(dst, src, stride, 8);
1376 #if CONFIG_DIRAC_DECODER
1377 #define DIRAC_PIXOP(OPNAME2, OPNAME, EXT)\
1378 void ff_ ## OPNAME2 ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1381 ff_ ## OPNAME2 ## _dirac_pixels8_c(dst, src, stride, h);\
1383 OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\
1385 void ff_ ## OPNAME2 ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1388 ff_ ## OPNAME2 ## _dirac_pixels16_c(dst, src, stride, h);\
1390 OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
1392 void ff_ ## OPNAME2 ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1395 ff_ ## OPNAME2 ## _dirac_pixels32_c(dst, src, stride, h);\
1397 OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
1398 OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
1403 DIRAC_PIXOP(
put,
put, mmx)
1404 DIRAC_PIXOP(
avg, avg, mmx)
1408 DIRAC_PIXOP(avg, ff_avg, mmxext)
1413 ff_put_dirac_pixels16_c(dst, src, stride, h);
1420 ff_avg_dirac_pixels16_c(dst, src, stride, h);
1427 ff_put_dirac_pixels32_c(dst, src, stride, h);
1436 ff_avg_dirac_pixels32_c(dst, src, stride, h);
1448 static void ff_libmpeg2mmx_idct_put(
uint8_t *dest,
int line_size,
1455 static void ff_libmpeg2mmx_idct_add(
uint8_t *dest,
int line_size,
1462 static void ff_libmpeg2mmx2_idct_put(
uint8_t *dest,
int line_size,
1469 static void ff_libmpeg2mmx2_idct_add(
uint8_t *dest,
int line_size,
1477 static void vector_clipf_sse(
float *dst,
const float *src,
1478 float min,
float max,
int len)
1482 "movss %3, %%xmm4 \n\t"
1483 "movss %4, %%xmm5 \n\t"
1484 "shufps $0, %%xmm4, %%xmm4 \n\t"
1485 "shufps $0, %%xmm5, %%xmm5 \n\t"
1487 "movaps (%2, %0), %%xmm0 \n\t"
1488 "movaps 16(%2, %0), %%xmm1 \n\t"
1489 "movaps 32(%2, %0), %%xmm2 \n\t"
1490 "movaps 48(%2, %0), %%xmm3 \n\t"
1491 "maxps %%xmm4, %%xmm0 \n\t"
1492 "maxps %%xmm4, %%xmm1 \n\t"
1493 "maxps %%xmm4, %%xmm2 \n\t"
1494 "maxps %%xmm4, %%xmm3 \n\t"
1495 "minps %%xmm5, %%xmm0 \n\t"
1496 "minps %%xmm5, %%xmm1 \n\t"
1497 "minps %%xmm5, %%xmm2 \n\t"
1498 "minps %%xmm5, %%xmm3 \n\t"
1499 "movaps %%xmm0, (%1, %0) \n\t"
1500 "movaps %%xmm1, 16(%1, %0) \n\t"
1501 "movaps %%xmm2, 32(%1, %0) \n\t"
1502 "movaps %%xmm3, 48(%1, %0) \n\t"
1506 :
"r"(dst),
"r"(src),
"m"(min),
"m"(max)
1519 int order,
int mul);
1522 int order,
int mul);
1525 int order,
int mul);
1528 const int16_t *window,
unsigned int len);
1530 const int16_t *window,
unsigned int len);
1532 const int16_t *window,
unsigned int len);
1534 const int16_t *window,
unsigned int len);
1536 const int16_t *window,
unsigned int len);
1538 const int16_t *window,
unsigned int len);
1545 int *left,
int *left_top);
1560 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU, PREFIX) \
1562 c->PFX ## _pixels_tab[IDX][ 0] = PREFIX ## PFX ## SIZE ## _mc00_ ## CPU; \
1563 c->PFX ## _pixels_tab[IDX][ 1] = PREFIX ## PFX ## SIZE ## _mc10_ ## CPU; \
1564 c->PFX ## _pixels_tab[IDX][ 2] = PREFIX ## PFX ## SIZE ## _mc20_ ## CPU; \
1565 c->PFX ## _pixels_tab[IDX][ 3] = PREFIX ## PFX ## SIZE ## _mc30_ ## CPU; \
1566 c->PFX ## _pixels_tab[IDX][ 4] = PREFIX ## PFX ## SIZE ## _mc01_ ## CPU; \
1567 c->PFX ## _pixels_tab[IDX][ 5] = PREFIX ## PFX ## SIZE ## _mc11_ ## CPU; \
1568 c->PFX ## _pixels_tab[IDX][ 6] = PREFIX ## PFX ## SIZE ## _mc21_ ## CPU; \
1569 c->PFX ## _pixels_tab[IDX][ 7] = PREFIX ## PFX ## SIZE ## _mc31_ ## CPU; \
1570 c->PFX ## _pixels_tab[IDX][ 8] = PREFIX ## PFX ## SIZE ## _mc02_ ## CPU; \
1571 c->PFX ## _pixels_tab[IDX][ 9] = PREFIX ## PFX ## SIZE ## _mc12_ ## CPU; \
1572 c->PFX ## _pixels_tab[IDX][10] = PREFIX ## PFX ## SIZE ## _mc22_ ## CPU; \
1573 c->PFX ## _pixels_tab[IDX][11] = PREFIX ## PFX ## SIZE ## _mc32_ ## CPU; \
1574 c->PFX ## _pixels_tab[IDX][12] = PREFIX ## PFX ## SIZE ## _mc03_ ## CPU; \
1575 c->PFX ## _pixels_tab[IDX][13] = PREFIX ## PFX ## SIZE ## _mc13_ ## CPU; \
1576 c->PFX ## _pixels_tab[IDX][14] = PREFIX ## PFX ## SIZE ## _mc23_ ## CPU; \
1577 c->PFX ## _pixels_tab[IDX][15] = PREFIX ## PFX ## SIZE ## _mc33_ ## CPU; \
1580 #define SET_HPEL_FUNCS(PFX, IDX, SIZE, CPU) \
1582 c->PFX ## _pixels_tab IDX [0] = PFX ## _pixels ## SIZE ## _ ## CPU; \
1583 c->PFX ## _pixels_tab IDX [1] = PFX ## _pixels ## SIZE ## _x2_ ## CPU; \
1584 c->PFX ## _pixels_tab IDX [2] = PFX ## _pixels ## SIZE ## _y2_ ## CPU; \
1585 c->PFX ## _pixels_tab IDX [3] = PFX ## _pixels ## SIZE ## _xy2_ ## CPU; \
1598 if (!high_bit_depth) {
1612 #if CONFIG_VIDEODSP && (ARCH_X86_32 || !HAVE_YASM)
1620 if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1634 const int high_bit_depth = bit_depth > 8;
1645 if (!high_bit_depth) {
1662 if (!high_bit_depth) {
1674 #if HAVE_MMXEXT_EXTERNAL
1702 if (!high_bit_depth) {
1742 if (!high_bit_depth) {
1754 #if HAVE_INLINE_ASM && CONFIG_VIDEODSP
1764 const int high_bit_depth = bit_depth > 8;
1766 #if HAVE_SSE2_INLINE
1775 #if HAVE_SSE2_EXTERNAL
1778 if (!high_bit_depth) {
1794 }
else if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
1804 #if HAVE_SSSE3_EXTERNAL
1822 #if HAVE_SSE4_EXTERNAL
1831 #if HAVE_7REGS && HAVE_INLINE_ASM
1849 c->
idct_put = ff_libmpeg2mmx2_idct_put;
1850 c->
idct_add = ff_libmpeg2mmx2_idct_add;
1853 c->
idct_put = ff_libmpeg2mmx_idct_put;
1854 c->
idct_add = ff_libmpeg2mmx_idct_add;
1899 if (CONFIG_ENCODERS)