33 #define FIX(x,s) (int)((x) * (1 << s) + 0.5)
35 #define MULTIPLY16H(x,k) (((x) * (k)) >> 16)
36 #define THRESHOLD(r,x,t) \
37 if (((unsigned)((x) + t)) >= t * 2) r = (x); \
39 #define DESCALE(x,n) (((x) + (1 << ((n) - 1))) >> n)
56 { 0, 48, 12, 60, 3, 51, 15, 63, },
57 { 32, 16, 44, 28, 35, 19, 47, 31, },
58 { 8, 56, 4, 52, 11, 59, 7, 55, },
59 { 40, 24, 36, 20, 43, 27, 39, 23, },
60 { 2, 50, 14, 62, 1, 49, 13, 61, },
61 { 34, 18, 46, 30, 33, 17, 45, 29, },
62 { 10, 58, 6, 54, 9, 57, 5, 53, },
63 { 42, 26, 38, 22, 41, 25, 37, 21, },
68 ptrdiff_t dst_stride, ptrdiff_t src_stride,
69 ptrdiff_t
width, ptrdiff_t
height, ptrdiff_t log2_scale)
72 temp = (src[x + pos] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
73 src[x + pos] = src[x + pos - 8 * src_stride] = 0; \
74 temp = av_clip_uint8(temp); \
77 for (
int y = 0; y <
height; y++) {
79 for (
int x = 0; x <
width; x += 8) {
97 ptrdiff_t dst_stride, ptrdiff_t src_stride,
98 ptrdiff_t
width, ptrdiff_t
height, ptrdiff_t log2_scale)
100 #define STORE2(pos) \
101 temp = (src[x + pos] + src[x + pos + 16 * src_stride] + (d[pos] >> log2_scale)) >> (6 - log2_scale); \
102 src[x + pos + 16 * src_stride] = 0; \
103 temp = av_clip_uint8(temp); \
106 for (
int y = 0; y <
height; y++) {
108 for (
int x = 0; x <
width; x += 8) {
124 void ff_mul_thrmat_c(
const int16_t *restrict thr_adr_noq, int16_t *restrict thr_adr,
int q)
126 for (
int a = 0;
a < 64;
a++)
127 thr_adr[
a] = q * thr_adr_noq[
a];
131 int16_t *restrict
output,
int cnt)
133 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
142 for (; cnt > 0; cnt -= 2) {
143 const int16_t *threshold = thr_adr;
144 for (
int ctr =
DCTSIZE; ctr > 0; ctr--) {
179 tmp10 = (tmp0 + tmp2) >> 2;
180 tmp11 = (tmp0 - tmp2) >> 2;
182 tmp13 = (tmp1 + tmp3) >>2;
185 tmp0 = tmp10 + tmp13;
186 tmp3 = tmp10 - tmp13;
187 tmp1 = tmp11 + tmp12;
188 tmp2 = tmp11 - tmp12;
218 z10 = (tmp6 - tmp5) * 2;
220 z12 = (tmp4 - tmp7) * 2;
222 tmp7 = (z11 + z13) >> 2;
232 wsptr[
DCTSIZE * 0] += (tmp0 + tmp7);
233 wsptr[
DCTSIZE * 1] += (tmp1 + tmp6);
234 wsptr[
DCTSIZE * 2] += (tmp2 + tmp5);
235 wsptr[
DCTSIZE * 3] += (tmp3 - tmp4);
236 wsptr[
DCTSIZE * 4] += (tmp3 + tmp4);
237 wsptr[
DCTSIZE * 5] += (tmp2 - tmp5);
238 wsptr[
DCTSIZE * 6] = (tmp1 - tmp6);
239 wsptr[
DCTSIZE * 7] = (tmp0 - tmp7);
250 void ff_row_idct_c(
const int16_t *restrict wsptr, int16_t *restrict output_adr,
251 ptrdiff_t output_stride,
int cnt)
253 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
260 for (; cnt > 0; cnt--) {
263 tmp10 = wsptr[2] + wsptr[3];
264 tmp11 = wsptr[2] - wsptr[3];
266 tmp13 = wsptr[0] + wsptr[1];
269 tmp0 = tmp10 + tmp13;
270 tmp3 = tmp10 - tmp13;
271 tmp1 = tmp11 + tmp12;
272 tmp2 = tmp11 - tmp12;
280 z13 = wsptr[4] + wsptr[5];
281 z10 = wsptr[4] - wsptr[5];
282 z11 = wsptr[6] + wsptr[7];
283 z12 = wsptr[6] - wsptr[7];
292 tmp6 = tmp12 * 8 - tmp7;
293 tmp5 = tmp11 * 8 - tmp6;
294 tmp4 = tmp10 * 8 + tmp5;
297 outptr[0 * output_stride] +=
DESCALE(tmp0 + tmp7, 3);
298 outptr[1 * output_stride] +=
DESCALE(tmp1 + tmp6, 3);
299 outptr[2 * output_stride] +=
DESCALE(tmp2 + tmp5, 3);
300 outptr[3 * output_stride] +=
DESCALE(tmp3 - tmp4, 3);
301 outptr[4 * output_stride] +=
DESCALE(tmp3 + tmp4, 3);
302 outptr[5 * output_stride] +=
DESCALE(tmp2 - tmp5, 3);
303 outptr[6 * output_stride] +=
DESCALE(tmp1 - tmp6, 3);
304 outptr[7 * output_stride] +=
DESCALE(tmp0 - tmp7, 3);
312 ptrdiff_t line_size,
int cnt)
314 int_simd16_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
323 for (; cnt > 0; cnt--) {
324 tmp0 = pixels[line_size * 0] + pixels[line_size * 7];
325 tmp7 = pixels[line_size * 0] - pixels[line_size * 7];
326 tmp1 = pixels[line_size * 1] + pixels[line_size * 6];
327 tmp6 = pixels[line_size * 1] - pixels[line_size * 6];
328 tmp2 = pixels[line_size * 2] + pixels[line_size * 5];
329 tmp5 = pixels[line_size * 2] - pixels[line_size * 5];
330 tmp3 = pixels[line_size * 3] + pixels[line_size * 4];
331 tmp4 = pixels[line_size * 3] - pixels[line_size * 4];
342 dataptr[2] = tmp10 + tmp11;
343 dataptr[3] = tmp10 - tmp11;
346 dataptr[0] = tmp13 + z1;
347 dataptr[1] = tmp13 - z1;
363 dataptr[4] = z13 + z2;
364 dataptr[5] = z13 - z2;
365 dataptr[6] = z11 + z4;
366 dataptr[7] = z11 - z4;