FFmpeg
vp9_intra_lsx.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Loongson Technology Corporation Limited
3  * Contributed by Hao Chen <chenhao@loongson.cn>
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 #include "libavcodec/vp9dsp.h"
24 #include "vp9dsp_loongarch.h"
25 
26 #define LSX_ST_8(_dst0, _dst1, _dst2, _dst3, _dst4, \
27  _dst5, _dst6, _dst7, _dst, _stride, \
28  _stride2, _stride3, _stride4) \
29 { \
30  __lsx_vst(_dst0, _dst, 0); \
31  __lsx_vstx(_dst1, _dst, _stride); \
32  __lsx_vstx(_dst2, _dst, _stride2); \
33  __lsx_vstx(_dst3, _dst, _stride3); \
34  _dst += _stride4; \
35  __lsx_vst(_dst4, _dst, 0); \
36  __lsx_vstx(_dst5, _dst, _stride); \
37  __lsx_vstx(_dst6, _dst, _stride2); \
38  __lsx_vstx(_dst7, _dst, _stride3); \
39 }
40 
41 #define LSX_ST_8X16(_dst0, _dst1, _dst2, _dst3, _dst4, \
42  _dst5, _dst6, _dst7, _dst, _stride) \
43 { \
44  __lsx_vst(_dst0, _dst, 0); \
45  __lsx_vst(_dst0, _dst, 16); \
46  _dst += _stride; \
47  __lsx_vst(_dst1, _dst, 0); \
48  __lsx_vst(_dst1, _dst, 16); \
49  _dst += _stride; \
50  __lsx_vst(_dst2, _dst, 0); \
51  __lsx_vst(_dst2, _dst, 16); \
52  _dst += _stride; \
53  __lsx_vst(_dst3, _dst, 0); \
54  __lsx_vst(_dst3, _dst, 16); \
55  _dst += _stride; \
56  __lsx_vst(_dst4, _dst, 0); \
57  __lsx_vst(_dst4, _dst, 16); \
58  _dst += _stride; \
59  __lsx_vst(_dst5, _dst, 0); \
60  __lsx_vst(_dst5, _dst, 16); \
61  _dst += _stride; \
62  __lsx_vst(_dst6, _dst, 0); \
63  __lsx_vst(_dst6, _dst, 16); \
64  _dst += _stride; \
65  __lsx_vst(_dst7, _dst, 0); \
66  __lsx_vst(_dst7, _dst, 16); \
67  _dst += _stride; \
68 }
69 
70 void ff_vert_16x16_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *left,
71  const uint8_t *src)
72 {
73  __m128i src0;
74  ptrdiff_t stride2 = dst_stride << 1;
75  ptrdiff_t stride3 = stride2 + dst_stride;
76  ptrdiff_t stride4 = stride2 << 1;
77  src0 = __lsx_vld(src, 0);
78  LSX_ST_8(src0, src0, src0, src0, src0, src0, src0, src0, dst,
79  dst_stride, stride2, stride3, stride4);
80  dst += stride4;
81  LSX_ST_8(src0, src0, src0, src0, src0, src0, src0, src0, dst,
82  dst_stride, stride2, stride3, stride4);
83 }
84 
85 void ff_vert_32x32_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *left,
86  const uint8_t *src)
87 {
88  uint32_t row;
89  __m128i src0, src1;
90 
91  DUP2_ARG2(__lsx_vld, src, 0, src, 16, src0, src1);
92  for (row = 32; row--;) {
93  __lsx_vst(src0, dst, 0);
94  __lsx_vst(src1, dst, 16);
95  dst += dst_stride;
96  }
97 }
98 
99 void ff_hor_16x16_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src,
100  const uint8_t *top)
101 {
102  __m128i src0, src1, src2, src3, src4, src5, src6, src7;
103  __m128i src8, src9, src10, src11, src12, src13, src14, src15;
104  ptrdiff_t stride2 = dst_stride << 1;
105  ptrdiff_t stride3 = stride2 + dst_stride;
106  ptrdiff_t stride4 = stride2 << 1;
107 
108  src15 = __lsx_vldrepl_b(src, 0);
109  src14 = __lsx_vldrepl_b(src, 1);
110  src13 = __lsx_vldrepl_b(src, 2);
111  src12 = __lsx_vldrepl_b(src, 3);
112  src11 = __lsx_vldrepl_b(src, 4);
113  src10 = __lsx_vldrepl_b(src, 5);
114  src9 = __lsx_vldrepl_b(src, 6);
115  src8 = __lsx_vldrepl_b(src, 7);
116  src7 = __lsx_vldrepl_b(src, 8);
117  src6 = __lsx_vldrepl_b(src, 9);
118  src5 = __lsx_vldrepl_b(src, 10);
119  src4 = __lsx_vldrepl_b(src, 11);
120  src3 = __lsx_vldrepl_b(src, 12);
121  src2 = __lsx_vldrepl_b(src, 13);
122  src1 = __lsx_vldrepl_b(src, 14);
123  src0 = __lsx_vldrepl_b(src, 15);
124  LSX_ST_8(src0, src1, src2, src3, src4, src5, src6, src7, dst,
125  dst_stride, stride2, stride3, stride4);
126  dst += stride4;
127  LSX_ST_8(src8, src9, src10, src11, src12, src13, src14, src15, dst,
128  dst_stride, stride2, stride3, stride4);
129 }
130 
131 void ff_hor_32x32_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src,
132  const uint8_t *top)
133 {
134  __m128i src0, src1, src2, src3, src4, src5, src6, src7;
135  __m128i src8, src9, src10, src11, src12, src13, src14, src15;
136  __m128i src16, src17, src18, src19, src20, src21, src22, src23;
137  __m128i src24, src25, src26, src27, src28, src29, src30, src31;
138 
139  src31 = __lsx_vldrepl_b(src, 0);
140  src30 = __lsx_vldrepl_b(src, 1);
141  src29 = __lsx_vldrepl_b(src, 2);
142  src28 = __lsx_vldrepl_b(src, 3);
143  src27 = __lsx_vldrepl_b(src, 4);
144  src26 = __lsx_vldrepl_b(src, 5);
145  src25 = __lsx_vldrepl_b(src, 6);
146  src24 = __lsx_vldrepl_b(src, 7);
147  src23 = __lsx_vldrepl_b(src, 8);
148  src22 = __lsx_vldrepl_b(src, 9);
149  src21 = __lsx_vldrepl_b(src, 10);
150  src20 = __lsx_vldrepl_b(src, 11);
151  src19 = __lsx_vldrepl_b(src, 12);
152  src18 = __lsx_vldrepl_b(src, 13);
153  src17 = __lsx_vldrepl_b(src, 14);
154  src16 = __lsx_vldrepl_b(src, 15);
155  src15 = __lsx_vldrepl_b(src, 16);
156  src14 = __lsx_vldrepl_b(src, 17);
157  src13 = __lsx_vldrepl_b(src, 18);
158  src12 = __lsx_vldrepl_b(src, 19);
159  src11 = __lsx_vldrepl_b(src, 20);
160  src10 = __lsx_vldrepl_b(src, 21);
161  src9 = __lsx_vldrepl_b(src, 22);
162  src8 = __lsx_vldrepl_b(src, 23);
163  src7 = __lsx_vldrepl_b(src, 24);
164  src6 = __lsx_vldrepl_b(src, 25);
165  src5 = __lsx_vldrepl_b(src, 26);
166  src4 = __lsx_vldrepl_b(src, 27);
167  src3 = __lsx_vldrepl_b(src, 28);
168  src2 = __lsx_vldrepl_b(src, 29);
169  src1 = __lsx_vldrepl_b(src, 30);
170  src0 = __lsx_vldrepl_b(src, 31);
171  LSX_ST_8X16(src0, src1, src2, src3, src4, src5, src6, src7,
172  dst, dst_stride);
173  LSX_ST_8X16(src8, src9, src10, src11, src12, src13, src14, src15,
174  dst, dst_stride);
175  LSX_ST_8X16(src16, src17, src18, src19, src20, src21, src22, src23,
176  dst, dst_stride);
177  LSX_ST_8X16(src24, src25, src26, src27, src28, src29, src30, src31,
178  dst, dst_stride);
179 }
180 
181 void ff_dc_4x4_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left,
182  const uint8_t *src_top)
183 {
184  __m128i tmp0, tmp1, dst0;
185 
186  tmp0 = __lsx_vldrepl_w(src_top, 0);
187  tmp1 = __lsx_vldrepl_w(src_left, 0);
188  dst0 = __lsx_vilvl_w(tmp1, tmp0);
189  dst0 = __lsx_vhaddw_hu_bu(dst0, dst0);
190  dst0 = __lsx_vhaddw_wu_hu(dst0, dst0);
191  dst0 = __lsx_vhaddw_du_wu(dst0, dst0);
192  dst0 = __lsx_vsrari_w(dst0, 3);
193  dst0 = __lsx_vshuf4i_b(dst0, 0);
194  __lsx_vstelm_w(dst0, dst, 0, 0);
195  dst += dst_stride;
196  __lsx_vstelm_w(dst0, dst, 0, 0);
197  dst += dst_stride;
198  __lsx_vstelm_w(dst0, dst, 0, 0);
199  dst += dst_stride;
200  __lsx_vstelm_w(dst0, dst, 0, 0);
201 }
202 
203 #define INTRA_DC_TL_4X4(dir) \
204 void ff_dc_##dir##_4x4_lsx(uint8_t *dst, ptrdiff_t dst_stride, \
205  const uint8_t *left, \
206  const uint8_t *top) \
207 { \
208  __m128i tmp0, dst0; \
209  \
210  tmp0 = __lsx_vldrepl_w(dir, 0); \
211  dst0 = __lsx_vhaddw_hu_bu(tmp0, tmp0); \
212  dst0 = __lsx_vhaddw_wu_hu(dst0, dst0); \
213  dst0 = __lsx_vsrari_w(dst0, 2); \
214  dst0 = __lsx_vshuf4i_b(dst0, 0); \
215  __lsx_vstelm_w(dst0, dst, 0, 0); \
216  dst += dst_stride; \
217  __lsx_vstelm_w(dst0, dst, 0, 0); \
218  dst += dst_stride; \
219  __lsx_vstelm_w(dst0, dst, 0, 0); \
220  dst += dst_stride; \
221  __lsx_vstelm_w(dst0, dst, 0, 0); \
222 }
223 INTRA_DC_TL_4X4(top);
225 
226 void ff_dc_8x8_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left,
227  const uint8_t *src_top)
228 {
229  __m128i tmp0, tmp1, dst0;
230 
231  tmp0 = __lsx_vldrepl_d(src_top, 0);
232  tmp1 = __lsx_vldrepl_d(src_left, 0);
233  dst0 = __lsx_vilvl_d(tmp1, tmp0);
234  dst0 = __lsx_vhaddw_hu_bu(dst0, dst0);
235  dst0 = __lsx_vhaddw_wu_hu(dst0, dst0);
236  dst0 = __lsx_vhaddw_du_wu(dst0, dst0);
237  dst0 = __lsx_vhaddw_qu_du(dst0, dst0);
238  dst0 = __lsx_vsrari_w(dst0, 4);
239  dst0 = __lsx_vreplvei_b(dst0, 0);
240  __lsx_vstelm_d(dst0, dst, 0, 0);
241  dst += dst_stride;
242  __lsx_vstelm_d(dst0, dst, 0, 0);
243  dst += dst_stride;
244  __lsx_vstelm_d(dst0, dst, 0, 0);
245  dst += dst_stride;
246  __lsx_vstelm_d(dst0, dst, 0, 0);
247  dst += dst_stride;
248  __lsx_vstelm_d(dst0, dst, 0, 0);
249  dst += dst_stride;
250  __lsx_vstelm_d(dst0, dst, 0, 0);
251  dst += dst_stride;
252  __lsx_vstelm_d(dst0, dst, 0, 0);
253  dst += dst_stride;
254  __lsx_vstelm_d(dst0, dst, 0, 0);
255 }
256 
257 #define INTRA_DC_TL_8X8(dir) \
258 void ff_dc_##dir##_8x8_lsx(uint8_t *dst, ptrdiff_t dst_stride, \
259  const uint8_t *left, \
260  const uint8_t *top) \
261 { \
262  __m128i tmp0, dst0; \
263  \
264  tmp0 = __lsx_vldrepl_d(dir, 0); \
265  dst0 = __lsx_vhaddw_hu_bu(tmp0, tmp0); \
266  dst0 = __lsx_vhaddw_wu_hu(dst0, dst0); \
267  dst0 = __lsx_vhaddw_du_wu(dst0, dst0); \
268  dst0 = __lsx_vsrari_w(dst0, 3); \
269  dst0 = __lsx_vreplvei_b(dst0, 0); \
270  __lsx_vstelm_d(dst0, dst, 0, 0); \
271  dst += dst_stride; \
272  __lsx_vstelm_d(dst0, dst, 0, 0); \
273  dst += dst_stride; \
274  __lsx_vstelm_d(dst0, dst, 0, 0); \
275  dst += dst_stride; \
276  __lsx_vstelm_d(dst0, dst, 0, 0); \
277  dst += dst_stride; \
278  __lsx_vstelm_d(dst0, dst, 0, 0); \
279  dst += dst_stride; \
280  __lsx_vstelm_d(dst0, dst, 0, 0); \
281  dst += dst_stride; \
282  __lsx_vstelm_d(dst0, dst, 0, 0); \
283  dst += dst_stride; \
284  __lsx_vstelm_d(dst0, dst, 0, 0); \
285 }
286 
287 INTRA_DC_TL_8X8(top);
289 
290 void ff_dc_16x16_lsx(uint8_t *dst, ptrdiff_t dst_stride,
291  const uint8_t *src_left, const uint8_t *src_top)
292 {
293  __m128i tmp0, tmp1, dst0;
294  ptrdiff_t stride2 = dst_stride << 1;
295  ptrdiff_t stride3 = stride2 + dst_stride;
296  ptrdiff_t stride4 = stride2 << 1;
297 
298  tmp0 = __lsx_vld(src_top, 0);
299  tmp1 = __lsx_vld(src_left, 0);
300  DUP2_ARG2(__lsx_vhaddw_hu_bu, tmp0, tmp0, tmp1, tmp1, tmp0, tmp1);
301  dst0 = __lsx_vadd_h(tmp0, tmp1);
302  dst0 = __lsx_vhaddw_wu_hu(dst0, dst0);
303  dst0 = __lsx_vhaddw_du_wu(dst0, dst0);
304  dst0 = __lsx_vhaddw_qu_du(dst0, dst0);
305  dst0 = __lsx_vsrari_w(dst0, 5);
306  dst0 = __lsx_vreplvei_b(dst0, 0);
307  LSX_ST_8(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst,
308  dst_stride, stride2, stride3, stride4);
309  dst += stride4;
310  LSX_ST_8(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst,
311  dst_stride, stride2, stride3, stride4);
312 }
313 
314 #define INTRA_DC_TL_16X16(dir) \
315 void ff_dc_##dir##_16x16_lsx(uint8_t *dst, ptrdiff_t dst_stride, \
316  const uint8_t *left, \
317  const uint8_t *top) \
318 { \
319  __m128i tmp0, dst0; \
320  ptrdiff_t stride2 = dst_stride << 1; \
321  ptrdiff_t stride3 = stride2 + dst_stride; \
322  ptrdiff_t stride4 = stride2 << 1; \
323  \
324  tmp0 = __lsx_vld(dir, 0); \
325  dst0 = __lsx_vhaddw_hu_bu(tmp0, tmp0); \
326  dst0 = __lsx_vhaddw_wu_hu(dst0, dst0); \
327  dst0 = __lsx_vhaddw_du_wu(dst0, dst0); \
328  dst0 = __lsx_vhaddw_qu_du(dst0, dst0); \
329  dst0 = __lsx_vsrari_w(dst0, 4); \
330  dst0 = __lsx_vreplvei_b(dst0, 0); \
331  LSX_ST_8(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst, \
332  dst_stride, stride2, stride3, stride4); \
333  dst += stride4; \
334  LSX_ST_8(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst, \
335  dst_stride, stride2, stride3, stride4); \
336 }
337 
338 INTRA_DC_TL_16X16(top);
340 
341 void ff_dc_32x32_lsx(uint8_t *dst, ptrdiff_t dst_stride,
342  const uint8_t *src_left, const uint8_t *src_top)
343 {
344  __m128i tmp0, tmp1, tmp2, tmp3, dst0;
345 
346  DUP2_ARG2(__lsx_vld, src_top, 0, src_top, 16, tmp0, tmp1);
347  DUP2_ARG2(__lsx_vld, src_left, 0, src_left, 16, tmp2, tmp3);
348  DUP4_ARG2(__lsx_vhaddw_hu_bu, tmp0, tmp0, tmp1, tmp1, tmp2, tmp2,
349  tmp3, tmp3, tmp0, tmp1, tmp2, tmp3);
350  DUP2_ARG2(__lsx_vadd_h, tmp0, tmp1, tmp2, tmp3, tmp0, tmp1);
351  dst0 = __lsx_vadd_h(tmp0, tmp1);
352  dst0 = __lsx_vhaddw_wu_hu(dst0, dst0);
353  dst0 = __lsx_vhaddw_du_wu(dst0, dst0);
354  dst0 = __lsx_vhaddw_qu_du(dst0, dst0);
355  dst0 = __lsx_vsrari_w(dst0, 6);
356  dst0 = __lsx_vreplvei_b(dst0, 0);
357  LSX_ST_8X16(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0,
358  dst, dst_stride);
359  LSX_ST_8X16(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0,
360  dst, dst_stride);
361  LSX_ST_8X16(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0,
362  dst, dst_stride);
363  LSX_ST_8X16(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0,
364  dst, dst_stride);
365 }
366 
367 #define INTRA_DC_TL_32X32(dir) \
368 void ff_dc_##dir##_32x32_lsx(uint8_t *dst, ptrdiff_t dst_stride, \
369  const uint8_t *left, \
370  const uint8_t *top) \
371 { \
372  __m128i tmp0, tmp1, dst0; \
373  \
374  DUP2_ARG2(__lsx_vld, dir, 0, dir, 16, tmp0, tmp1); \
375  DUP2_ARG2(__lsx_vhaddw_hu_bu, tmp0, tmp0, tmp1, tmp1, tmp0, tmp1); \
376  dst0 = __lsx_vadd_h(tmp0, tmp1); \
377  dst0 = __lsx_vhaddw_wu_hu(dst0, dst0); \
378  dst0 = __lsx_vhaddw_du_wu(dst0, dst0); \
379  dst0 = __lsx_vhaddw_qu_du(dst0, dst0); \
380  dst0 = __lsx_vsrari_w(dst0, 5); \
381  dst0 = __lsx_vreplvei_b(dst0, 0); \
382  LSX_ST_8X16(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0, \
383  dst, dst_stride); \
384  LSX_ST_8X16(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0, \
385  dst, dst_stride); \
386  LSX_ST_8X16(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0, \
387  dst, dst_stride); \
388  LSX_ST_8X16(dst0, dst0, dst0, dst0, dst0, dst0, dst0, dst0, \
389  dst, dst_stride); \
390 }
391 
392 INTRA_DC_TL_32X32(top);
394 
395 #define INTRA_PREDICT_VALDC_16X16_LSX(val) \
396 void ff_dc_##val##_16x16_lsx(uint8_t *dst, ptrdiff_t dst_stride, \
397  const uint8_t *left, const uint8_t *top) \
398 { \
399  __m128i out = __lsx_vldi(val); \
400  ptrdiff_t stride2 = dst_stride << 1; \
401  ptrdiff_t stride3 = stride2 + dst_stride; \
402  ptrdiff_t stride4 = stride2 << 1; \
403  \
404  LSX_ST_8(out, out, out, out, out, out, out, out, dst, \
405  dst_stride, stride2, stride3, stride4); \
406  dst += stride4; \
407  LSX_ST_8(out, out, out, out, out, out, out, out, dst, \
408  dst_stride, stride2, stride3, stride4); \
409 }
410 
414 
415 #define INTRA_PREDICT_VALDC_32X32_LSX(val) \
416 void ff_dc_##val##_32x32_lsx(uint8_t *dst, ptrdiff_t dst_stride, \
417  const uint8_t *left, const uint8_t *top) \
418 { \
419  __m128i out = __lsx_vldi(val); \
420  \
421  LSX_ST_8X16(out, out, out, out, out, out, out, out, dst, dst_stride);\
422  LSX_ST_8X16(out, out, out, out, out, out, out, out, dst, dst_stride);\
423  LSX_ST_8X16(out, out, out, out, out, out, out, out, dst, dst_stride);\
424  LSX_ST_8X16(out, out, out, out, out, out, out, out, dst, dst_stride);\
425 }
426 
430 
431 void ff_tm_4x4_lsx(uint8_t *dst, ptrdiff_t dst_stride,
432  const uint8_t *src_left, const uint8_t *src_top_ptr)
433 {
434  uint8_t top_left = src_top_ptr[-1];
435  __m128i tmp0, tmp1, tmp2, tmp3, reg0, reg1;
436  __m128i src0, src1, src2, src3;
437  __m128i dst0, dst1, dst2, dst3;
438 
439  reg0 = __lsx_vreplgr2vr_h(top_left);
440  reg1 = __lsx_vld(src_top_ptr, 0);
441  DUP4_ARG2(__lsx_vldrepl_b, src_left, 0, src_left, 1, src_left, 2, src_left,
442  3, tmp3, tmp2, tmp1, tmp0);
443  DUP4_ARG2(__lsx_vilvl_b, tmp0, reg1, tmp1, reg1, tmp2, reg1, tmp3, reg1,
444  src0, src1, src2, src3);
445  DUP4_ARG2(__lsx_vhaddw_hu_bu, src0, src0, src1, src1, src2, src2, src3,
446  src3, dst0, dst1, dst2, dst3);
447  DUP4_ARG2(__lsx_vssub_hu, dst0, reg0, dst1, reg0, dst2, reg0, dst3, reg0,
448  dst0, dst1, dst2, dst3);
449  DUP4_ARG2(__lsx_vsat_hu, dst0, 7, dst1, 7, dst2, 7, dst3, 7,
450  dst0, dst1, dst2, dst3);
451  DUP2_ARG2(__lsx_vpickev_b, dst1, dst0, dst3, dst2, dst0, dst1);
452  __lsx_vstelm_w(dst0, dst, 0, 0);
453  dst += dst_stride;
454  __lsx_vstelm_w(dst0, dst, 0, 2);
455  dst += dst_stride;
456  __lsx_vstelm_w(dst1, dst, 0, 0);
457  dst += dst_stride;
458  __lsx_vstelm_w(dst1, dst, 0, 2);
459 }
460 
461 void ff_tm_8x8_lsx(uint8_t *dst, ptrdiff_t dst_stride,
462  const uint8_t *src_left, const uint8_t *src_top_ptr)
463 {
464  uint8_t top_left = src_top_ptr[-1];
465  __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
466  __m128i src0, src1, src2, src3, src4, src5, src6, src7;
467  __m128i reg0, reg1;
468 
469  reg0 = __lsx_vreplgr2vr_h(top_left);
470  reg1 = __lsx_vld(src_top_ptr, 0);
471  DUP4_ARG2(__lsx_vldrepl_b, src_left, 0, src_left, 1, src_left, 2, src_left,
472  3, tmp7, tmp6, tmp5, tmp4);
473  DUP4_ARG2(__lsx_vldrepl_b, src_left, 4, src_left, 5, src_left, 6, src_left,
474  7, tmp3, tmp2, tmp1, tmp0);
475  DUP4_ARG2(__lsx_vilvl_b, tmp0, reg1, tmp1, reg1, tmp2, reg1, tmp3, reg1,
476  src0, src1, src2, src3);
477  DUP4_ARG2(__lsx_vilvl_b, tmp4, reg1, tmp5, reg1, tmp6, reg1, tmp7, reg1,
478  src4, src5, src6, src7);
479  DUP4_ARG2(__lsx_vhaddw_hu_bu, src0, src0, src1, src1, src2, src2, src3,
480  src3, src0, src1, src2, src3);
481  DUP4_ARG2(__lsx_vhaddw_hu_bu, src4, src4, src5, src5, src6, src6, src7,
482  src7, src4, src5, src6, src7);
483  DUP4_ARG2(__lsx_vssub_hu, src0, reg0, src1, reg0, src2, reg0, src3, reg0,
484  src0, src1, src2, src3);
485  DUP4_ARG2(__lsx_vssub_hu, src4, reg0, src5, reg0, src6, reg0, src7, reg0,
486  src4, src5, src6, src7);
487  DUP4_ARG2(__lsx_vsat_hu, src0, 7, src1, 7, src2, 7, src3, 7,
488  src0, src1, src2, src3);
489  DUP4_ARG2(__lsx_vsat_hu, src4, 7, src5, 7, src6, 7, src7, 7,
490  src4, src5, src6, src7);
491  DUP4_ARG2(__lsx_vpickev_b, src1, src0, src3, src2, src5, src4, src7, src6,
492  src0, src1, src2, src3);
493  __lsx_vstelm_d(src0, dst, 0, 0);
494  dst += dst_stride;
495  __lsx_vstelm_d(src0, dst, 0, 1);
496  dst += dst_stride;
497  __lsx_vstelm_d(src1, dst, 0, 0);
498  dst += dst_stride;
499  __lsx_vstelm_d(src1, dst, 0, 1);
500  dst += dst_stride;
501  __lsx_vstelm_d(src2, dst, 0, 0);
502  dst += dst_stride;
503  __lsx_vstelm_d(src2, dst, 0, 1);
504  dst += dst_stride;
505  __lsx_vstelm_d(src3, dst, 0, 0);
506  dst += dst_stride;
507  __lsx_vstelm_d(src3, dst, 0, 1);
508 }
509 
510 void ff_tm_16x16_lsx(uint8_t *dst, ptrdiff_t dst_stride,
511  const uint8_t *src_left, const uint8_t *src_top_ptr)
512 {
513  uint8_t top_left = src_top_ptr[-1];
514  __m128i tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
515  __m128i tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
516  __m128i src0, src1, src2, src3, src4, src5, src6, src7;
517  __m128i reg0, reg1;
518  ptrdiff_t stride2 = dst_stride << 1;
519  ptrdiff_t stride3 = stride2 + dst_stride;
520  ptrdiff_t stride4 = stride2 << 1;
521 
522  reg0 = __lsx_vreplgr2vr_h(top_left);
523  reg1 = __lsx_vld(src_top_ptr, 0);
524  DUP4_ARG2(__lsx_vldrepl_b, src_left, 0, src_left, 1, src_left, 2, src_left,
525  3, tmp15, tmp14, tmp13, tmp12);
526  DUP4_ARG2(__lsx_vldrepl_b, src_left, 4, src_left, 5, src_left, 6, src_left,
527  7, tmp11, tmp10, tmp9, tmp8);
528  DUP4_ARG2(__lsx_vldrepl_b, src_left, 8, src_left, 9, src_left, 10,
529  src_left, 11, tmp7, tmp6, tmp5, tmp4);
530  DUP4_ARG2(__lsx_vldrepl_b, src_left, 12, src_left, 13, src_left, 14,
531  src_left, 15, tmp3, tmp2, tmp1, tmp0);
532  DUP4_ARG2(__lsx_vaddwev_h_bu, tmp0, reg1, tmp1, reg1, tmp2, reg1, tmp3,
533  reg1, src0, src1, src2, src3);
534  DUP4_ARG2(__lsx_vaddwod_h_bu, tmp0, reg1, tmp1, reg1, tmp2, reg1, tmp3,
535  reg1, src4, src5, src6, src7);
536  DUP4_ARG2(__lsx_vssub_hu, src0, reg0, src1, reg0, src2, reg0, src3, reg0,
537  src0, src1, src2, src3);
538  DUP4_ARG2(__lsx_vssub_hu, src4, reg0, src5, reg0, src6, reg0, src7, reg0,
539  src4, src5, src6, src7);
540  DUP4_ARG2(__lsx_vsat_hu, src0, 7, src1, 7, src2, 7, src3, 7,
541  src0, src1, src2, src3);
542  DUP4_ARG2(__lsx_vsat_hu, src4, 7, src5, 7, src6, 7, src7, 7,
543  src4, src5, src6, src7);
544  DUP4_ARG2(__lsx_vpackev_b, src4, src0, src5, src1, src6, src2, src7, src3,
545  tmp0, tmp1, tmp2, tmp3);
546  DUP4_ARG2(__lsx_vaddwev_h_bu, tmp4, reg1, tmp5, reg1, tmp6, reg1, tmp7,
547  reg1, src0, src1, src2, src3);
548  DUP4_ARG2(__lsx_vaddwod_h_bu, tmp4, reg1, tmp5, reg1, tmp6, reg1, tmp7,
549  reg1, src4, src5, src6, src7);
550  DUP4_ARG2(__lsx_vssub_hu, src0, reg0, src1, reg0, src2, reg0, src3, reg0,
551  src0, src1, src2, src3);
552  DUP4_ARG2(__lsx_vssub_hu, src4, reg0, src5, reg0, src6, reg0, src7, reg0,
553  src4, src5, src6, src7);
554  DUP4_ARG2(__lsx_vsat_hu, src0, 7, src1, 7, src2, 7, src3, 7,
555  src0, src1, src2, src3);
556  DUP4_ARG2(__lsx_vsat_hu, src4, 7, src5, 7, src6, 7, src7, 7,
557  src4, src5, src6, src7);
558  DUP4_ARG2(__lsx_vpackev_b, src4, src0, src5, src1, src6, src2, src7, src3,
559  tmp4, tmp5, tmp6, tmp7);
560  DUP4_ARG2(__lsx_vaddwev_h_bu, tmp8, reg1, tmp9, reg1, tmp10, reg1, tmp11,
561  reg1, src0, src1, src2, src3);
562  DUP4_ARG2(__lsx_vaddwod_h_bu, tmp8, reg1, tmp9, reg1, tmp10, reg1, tmp11,
563  reg1, src4, src5, src6, src7);
564  DUP4_ARG2(__lsx_vssub_hu, src0, reg0, src1, reg0, src2, reg0, src3, reg0,
565  src0, src1, src2, src3);
566  DUP4_ARG2(__lsx_vssub_hu, src4, reg0, src5, reg0, src6, reg0, src7, reg0,
567  src4, src5, src6, src7);
568  DUP4_ARG2(__lsx_vsat_hu, src0, 7, src1, 7, src2, 7, src3, 7,
569  src0, src1, src2, src3);
570  DUP4_ARG2(__lsx_vsat_hu, src4, 7, src5, 7, src6, 7, src7, 7,
571  src4, src5, src6, src7);
572  DUP4_ARG2(__lsx_vpackev_b, src4, src0, src5, src1, src6, src2, src7, src3,
573  tmp8, tmp9, tmp10, tmp11);
574  DUP4_ARG2(__lsx_vaddwev_h_bu, tmp12, reg1, tmp13, reg1, tmp14, reg1,
575  tmp15, reg1, src0, src1, src2, src3);
576  DUP4_ARG2(__lsx_vaddwod_h_bu, tmp12, reg1, tmp13, reg1, tmp14, reg1,
577  tmp15, reg1, src4, src5, src6, src7);
578  DUP4_ARG2(__lsx_vssub_hu, src0, reg0, src1, reg0, src2, reg0, src3, reg0,
579  src0, src1, src2, src3);
580  DUP4_ARG2(__lsx_vssub_hu, src4, reg0, src5, reg0, src6, reg0, src7, reg0,
581  src4, src5, src6, src7);
582  DUP4_ARG2(__lsx_vsat_hu, src0, 7, src1, 7, src2, 7, src3, 7,
583  src0, src1, src2, src3);
584  DUP4_ARG2(__lsx_vsat_hu, src4, 7, src5, 7, src6, 7, src7, 7,
585  src4, src5, src6, src7);
586  DUP4_ARG2(__lsx_vpackev_b, src4, src0, src5, src1, src6, src2, src7, src3,
587  tmp12, tmp13, tmp14, tmp15);
588  LSX_ST_8(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, dst,
589  dst_stride, stride2, stride3, stride4);
590  dst += stride4;
591  LSX_ST_8(tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, dst,
592  dst_stride, stride2, stride3, stride4);
593 }
594 
595 void ff_tm_32x32_lsx(uint8_t *dst, ptrdiff_t dst_stride,
596  const uint8_t *src_left, const uint8_t *src_top_ptr)
597 {
598  uint8_t top_left = src_top_ptr[-1];
599  uint32_t loop_cnt;
600  __m128i tmp0, tmp1, tmp2, tmp3, reg0, reg1, reg2;
601  __m128i src0, src1, src2, src3, src4, src5, src6, src7;
602  __m128i dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7;
603 
604  reg0 = __lsx_vreplgr2vr_h(top_left);
605  DUP2_ARG2(__lsx_vld, src_top_ptr, 0, src_top_ptr, 16, reg1, reg2);
606 
607  src_left += 28;
608  for (loop_cnt = 8; loop_cnt--;) {
609  DUP4_ARG2(__lsx_vldrepl_b, src_left, 0, src_left, 1, src_left, 2,
610  src_left, 3, tmp3, tmp2, tmp1, tmp0);
611  src_left -= 4;
612  DUP4_ARG2(__lsx_vaddwev_h_bu, tmp0, reg1, tmp1, reg1, tmp2, reg1,
613  tmp3, reg1, src0, src1, src2, src3);
614  DUP4_ARG2(__lsx_vaddwod_h_bu, tmp0, reg1, tmp1, reg1, tmp2, reg1,
615  tmp3, reg1, src4, src5, src6, src7);
616  DUP4_ARG2(__lsx_vssub_hu, src0, reg0, src1, reg0, src2, reg0, src3,
617  reg0, src0, src1, src2, src3);
618  DUP4_ARG2(__lsx_vssub_hu, src4, reg0, src5, reg0, src6, reg0, src7,
619  reg0, src4, src5, src6, src7);
620  DUP4_ARG2(__lsx_vaddwev_h_bu, tmp0, reg2, tmp1, reg2, tmp2, reg2,
621  tmp3, reg2, dst0, dst1, dst2, dst3);
622  DUP4_ARG2(__lsx_vaddwod_h_bu, tmp0, reg2, tmp1, reg2, tmp2, reg2,
623  tmp3, reg2, dst4, dst5, dst6, dst7);
624  DUP4_ARG2(__lsx_vssub_hu, dst0, reg0, dst1, reg0, dst2, reg0, dst3,
625  reg0, dst0, dst1, dst2, dst3);
626  DUP4_ARG2(__lsx_vssub_hu, dst4, reg0, dst5, reg0, dst6, reg0, dst7,
627  reg0, dst4, dst5, dst6, dst7);
628  DUP4_ARG2(__lsx_vsat_hu, src0, 7, src1, 7, src2, 7, src3, 7,
629  src0, src1, src2, src3);
630  DUP4_ARG2(__lsx_vsat_hu, src4, 7, src5, 7, src6, 7, src7, 7,
631  src4, src5, src6, src7);
632  DUP4_ARG2(__lsx_vsat_hu, dst0, 7, dst1, 7, dst2, 7, dst3, 7,
633  dst0, dst1, dst2, dst3);
634  DUP4_ARG2(__lsx_vsat_hu, dst4, 7, dst5, 7, dst6, 7, dst7, 7,
635  dst4, dst5, dst6, dst7);
636  DUP4_ARG2(__lsx_vpackev_b, src4, src0, src5, src1, src6, src2, src7,
637  src3, src0, src1, src2, src3);
638  DUP4_ARG2(__lsx_vpackev_b, dst4, dst0, dst5, dst1, dst6, dst2, dst7,
639  dst3, dst0, dst1, dst2, dst3);
640  __lsx_vst(src0, dst, 0);
641  __lsx_vst(dst0, dst, 16);
642  dst += dst_stride;
643  __lsx_vst(src1, dst, 0);
644  __lsx_vst(dst1, dst, 16);
645  dst += dst_stride;
646  __lsx_vst(src2, dst, 0);
647  __lsx_vst(dst2, dst, 16);
648  dst += dst_stride;
649  __lsx_vst(src3, dst, 0);
650  __lsx_vst(dst3, dst, 16);
651  dst += dst_stride;
652  }
653 }
ff_tm_32x32_lsx
void ff_tm_32x32_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left, const uint8_t *src_top_ptr)
Definition: vp9_intra_lsx.c:595
ff_tm_4x4_lsx
void ff_tm_4x4_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left, const uint8_t *src_top_ptr)
Definition: vp9_intra_lsx.c:431
ff_dc_4x4_lsx
void ff_dc_4x4_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left, const uint8_t *src_top)
Definition: vp9_intra_lsx.c:181
INTRA_DC_TL_4X4
#define INTRA_DC_TL_4X4(dir)
Definition: vp9_intra_lsx.c:203
DUP2_ARG2
#define DUP2_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _OUT0, _OUT1)
Definition: loongson_intrinsics.h:58
ff_dc_32x32_lsx
void ff_dc_32x32_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left, const uint8_t *src_top)
Definition: vp9_intra_lsx.c:341
LSX_ST_8X16
#define LSX_ST_8X16(_dst0, _dst1, _dst2, _dst3, _dst4, _dst5, _dst6, _dst7, _dst, _stride)
Definition: vp9_intra_lsx.c:41
LSX_ST_8
#define LSX_ST_8(_dst0, _dst1, _dst2, _dst3, _dst4, _dst5, _dst6, _dst7, _dst, _stride, _stride2, _stride3, _stride4)
Definition: vp9_intra_lsx.c:26
INTRA_DC_TL_32X32
#define INTRA_DC_TL_32X32(dir)
Definition: vp9_intra_lsx.c:367
DUP4_ARG2
#define DUP4_ARG2(_INS, _IN0, _IN1, _IN2, _IN3, _IN4, _IN5, _IN6, _IN7, _OUT0, _OUT1, _OUT2, _OUT3)
Definition: loongson_intrinsics.h:76
ff_tm_8x8_lsx
void ff_tm_8x8_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left, const uint8_t *src_top_ptr)
Definition: vp9_intra_lsx.c:461
INTRA_DC_TL_8X8
#define INTRA_DC_TL_8X8(dir)
Definition: vp9_intra_lsx.c:257
ff_hor_16x16_lsx
void ff_hor_16x16_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, const uint8_t *top)
Definition: vp9_intra_lsx.c:99
src
#define src
Definition: vp8dsp.c:255
ff_dc_16x16_lsx
void ff_dc_16x16_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left, const uint8_t *src_top)
Definition: vp9_intra_lsx.c:290
INTRA_PREDICT_VALDC_16X16_LSX
#define INTRA_PREDICT_VALDC_16X16_LSX(val)
Definition: vp9_intra_lsx.c:395
vp9dsp.h
ff_vert_32x32_lsx
void ff_vert_32x32_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *left, const uint8_t *src)
Definition: vp9_intra_lsx.c:85
src0
#define src0
Definition: h264pred.c:139
src1
#define src1
Definition: h264pred.c:140
vp9dsp_loongarch.h
INTRA_PREDICT_VALDC_32X32_LSX
#define INTRA_PREDICT_VALDC_32X32_LSX(val)
Definition: vp9_intra_lsx.c:415
left
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled left
Definition: snow.txt:386
ff_dc_8x8_lsx
void ff_dc_8x8_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left, const uint8_t *src_top)
Definition: vp9_intra_lsx.c:226
ff_tm_16x16_lsx
void ff_tm_16x16_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src_left, const uint8_t *src_top_ptr)
Definition: vp9_intra_lsx.c:510
INTRA_DC_TL_16X16
#define INTRA_DC_TL_16X16(dir)
Definition: vp9_intra_lsx.c:314
loongson_intrinsics.h
ff_vert_16x16_lsx
void ff_vert_16x16_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *left, const uint8_t *src)
Definition: vp9_intra_lsx.c:70
ff_hor_32x32_lsx
void ff_hor_32x32_lsx(uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *src, const uint8_t *top)
Definition: vp9_intra_lsx.c:131