FFmpeg
h264dsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264dsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  * Heiher <r@hev.cc>
8  *
9  * This file is part of FFmpeg.
10  *
11  * FFmpeg is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU Lesser General Public
13  * License as published by the Free Software Foundation; either
14  * version 2.1 of the License, or (at your option) any later version.
15  *
16  * FFmpeg is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19  * Lesser General Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser General Public
22  * License along with FFmpeg; if not, write to the Free Software
23  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24  */
25 
27 #include "h264dsp_mips.h"
29 #include "libavutil/mem_internal.h"
30 
31 void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
32 {
33  double ftmp[9];
35 
36  __asm__ volatile (
37  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
38  MMI_LDC1(%[ftmp1], %[src], 0x00)
39  MMI_LDC1(%[ftmp2], %[src], 0x08)
40  MMI_LDC1(%[ftmp3], %[src], 0x10)
41  MMI_LDC1(%[ftmp4], %[src], 0x18)
42  /* memset(src, 0, 32); */
43  MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x00)
44  MMI_SQC1(%[ftmp0], %[ftmp0], %[src], 0x10)
45  MMI_ULWC1(%[ftmp5], %[dst0], 0x00)
46  MMI_ULWC1(%[ftmp6], %[dst1], 0x00)
47  MMI_ULWC1(%[ftmp7], %[dst2], 0x00)
48  MMI_ULWC1(%[ftmp8], %[dst3], 0x00)
49  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
50  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
51  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
52  "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
53  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
54  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
55  "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
56  "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
57  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
58  "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
59  "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
60  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
61  MMI_SWC1(%[ftmp1], %[dst0], 0x00)
62  MMI_SWC1(%[ftmp2], %[dst1], 0x00)
63  MMI_SWC1(%[ftmp3], %[dst2], 0x00)
64  MMI_SWC1(%[ftmp4], %[dst3], 0x00)
65  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
66  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
67  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
68  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
70  [ftmp8]"=&f"(ftmp[8])
71  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
72  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
73  [src]"r"(src)
74  : "memory"
75  );
76 
77 }
78 
79 void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
80 {
81  double ftmp[12];
82  uint64_t tmp[1];
85 
86  __asm__ volatile (
87  MMI_LDC1(%[ftmp0], %[block], 0x00)
88  MMI_LDC1(%[ftmp1], %[block], 0x08)
89  MMI_LDC1(%[ftmp2], %[block], 0x10)
90  MMI_LDC1(%[ftmp3], %[block], 0x18)
91  /* memset(block, 0, 32) */
92  "pxor %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
93  MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x00)
94  MMI_SQC1(%[ftmp4], %[ftmp4], %[block], 0x10)
95  "dli %[tmp0], 0x01 \n\t"
96  "mtc1 %[tmp0], %[ftmp8] \n\t"
97  "dli %[tmp0], 0x06 \n\t"
98  "mtc1 %[tmp0], %[ftmp9] \n\t"
99  "psrah %[ftmp4], %[ftmp1], %[ftmp8] \n\t"
100  "psrah %[ftmp5], %[ftmp3], %[ftmp8] \n\t"
101  "psubh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
102  "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
103  "paddh %[ftmp10], %[ftmp2], %[ftmp0] \n\t"
104  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
105  "paddh %[ftmp11], %[ftmp5], %[ftmp10] \n\t"
106  "psubh %[ftmp2], %[ftmp10], %[ftmp5] \n\t"
107  "paddh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
108  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
109  "punpckhhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t"
110  "punpcklhw %[ftmp5], %[ftmp11], %[ftmp10] \n\t"
111  "punpckhhw %[ftmp4], %[ftmp0], %[ftmp2] \n\t"
112  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
113  "punpckhwd %[ftmp2], %[ftmp5], %[ftmp0] \n\t"
114  "punpcklwd %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
115  "punpcklwd %[ftmp10], %[ftmp1], %[ftmp4] \n\t"
116  "punpckhwd %[ftmp0], %[ftmp1], %[ftmp4] \n\t"
117  "paddh %[ftmp5], %[ftmp5], %[ff_pw_32] \n\t"
118  "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
119  "psrah %[ftmp3], %[ftmp0], %[ftmp8] \n\t"
120  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
121  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
122  "paddh %[ftmp1], %[ftmp10], %[ftmp5] \n\t"
123  "psubh %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
124  "paddh %[ftmp10], %[ftmp3], %[ftmp1] \n\t"
125  "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
126  "paddh %[ftmp11], %[ftmp4], %[ftmp5] \n\t"
127  "psubh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
128  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
129  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
130  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
131  "psrah %[ftmp3], %[ftmp10], %[ftmp9] \n\t"
132  "psrah %[ftmp4], %[ftmp11], %[ftmp9] \n\t"
133  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
134  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
135  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
136  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
137  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
138  "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
139  MMI_SWC1(%[ftmp2], %[dst], 0x00)
140  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
141  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
142  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
143  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
144  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
145  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
146  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
147  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
148  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
149  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
150  "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
151  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
152  MMI_SWC1(%[ftmp2], %[dst], 0x00)
153  "packushb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
154  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
155  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
156  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
157  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
158  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
159  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
160  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
163  [tmp0]"=&r"(tmp[0])
164  : [dst]"r"(dst), [block]"r"(block),
165  [stride]"r"((mips_reg)stride), [ff_pw_32]"f"(ff_pw_32.f)
166  : "memory"
167  );
168 
169 }
170 
171 void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
172 {
173  double ftmp[16];
174  uint64_t tmp[7];
175  mips_reg addr[1];
178 
179  __asm__ volatile (
180  "lhu %[tmp0], 0x00(%[block]) \n\t"
181  PTR_ADDI "$sp, $sp, -0x20 \n\t"
182  PTR_ADDIU "%[tmp0], %[tmp0], 0x20 \n\t"
183  MMI_LDC1(%[ftmp1], %[block], 0x10)
184  "sh %[tmp0], 0x00(%[block]) \n\t"
185  MMI_LDC1(%[ftmp2], %[block], 0x20)
186  "dli %[tmp0], 0x01 \n\t"
187  MMI_LDC1(%[ftmp3], %[block], 0x30)
188  "mtc1 %[tmp0], %[ftmp8] \n\t"
189  MMI_LDC1(%[ftmp5], %[block], 0x50)
190  MMI_LDC1(%[ftmp6], %[block], 0x60)
191  MMI_LDC1(%[ftmp7], %[block], 0x70)
192  "mov.d %[ftmp0], %[ftmp1] \n\t"
193  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
194  "psrah %[ftmp4], %[ftmp5], %[ftmp8] \n\t"
195  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
196  "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
197  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
198  "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
199  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
200  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
201  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
202  "psubh %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
203  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
204  "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
205  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
206  "psrah %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
207  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
208  "dli %[tmp0], 0x02 \n\t"
209  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
210  "mtc1 %[tmp0], %[ftmp9] \n\t"
211  "mov.d %[ftmp7], %[ftmp1] \n\t"
212  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
213  "psrah %[ftmp3], %[ftmp4], %[ftmp9] \n\t"
214  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
215  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
216  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
217  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
218  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
219  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
220  "mov.d %[ftmp5], %[ftmp6] \n\t"
221  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
222  "psrah %[ftmp4], %[ftmp2], %[ftmp8] \n\t"
223  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
224  "psubh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
225  MMI_LDC1(%[ftmp2], %[block], 0x00)
226  MMI_LDC1(%[ftmp5], %[block], 0x40)
227  "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
228  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
229  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
230  "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
231  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
232  "paddh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
233  "psubh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
234  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
235  "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
236  "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
237  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
238  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
239  "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
240  "paddh %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
241  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
242  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
243  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
244  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
245  "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
246  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
247  MMI_SDC1(%[ftmp6], %[block], 0x00)
248  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
249  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp0] \n\t"
250  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
251  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp1] \n\t"
252  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
253  "punpckhwd %[ftmp1], %[ftmp7], %[ftmp3] \n\t"
254  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
255  "punpckhwd %[ftmp3], %[ftmp6], %[ftmp0] \n\t"
256  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
257  MMI_LDC1(%[ftmp0], %[block], 0x00)
258  MMI_SDC1(%[ftmp7], $sp, 0x00)
259  MMI_SDC1(%[ftmp1], $sp, 0x10)
260  "dmfc1 %[tmp1], %[ftmp6] \n\t"
261  "dmfc1 %[tmp3], %[ftmp3] \n\t"
262  "punpckhhw %[ftmp3], %[ftmp5], %[ftmp2] \n\t"
263  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
264  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp0] \n\t"
265  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
266  "punpckhwd %[ftmp0], %[ftmp5], %[ftmp4] \n\t"
267  "punpcklwd %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
268  "punpckhwd %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
269  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
270  MMI_SDC1(%[ftmp5], $sp, 0x08)
271  MMI_SDC1(%[ftmp0], $sp, 0x18)
272  "dmfc1 %[tmp2], %[ftmp3] \n\t"
273  "dmfc1 %[tmp4], %[ftmp4] \n\t"
274  MMI_LDC1(%[ftmp1], %[block], 0x18)
275  MMI_LDC1(%[ftmp6], %[block], 0x28)
276  MMI_LDC1(%[ftmp2], %[block], 0x38)
277  MMI_LDC1(%[ftmp0], %[block], 0x58)
278  MMI_LDC1(%[ftmp3], %[block], 0x68)
279  MMI_LDC1(%[ftmp4], %[block], 0x78)
280  "mov.d %[ftmp7], %[ftmp1] \n\t"
281  "psrah %[ftmp5], %[ftmp0], %[ftmp8] \n\t"
282  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
283  "paddh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
284  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
285  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
286  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
287  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
288  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
289  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
290  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
291  "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
292  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
293  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
294  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
295  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
296  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
297  "mov.d %[ftmp4], %[ftmp1] \n\t"
298  "psrah %[ftmp2], %[ftmp5], %[ftmp9] \n\t"
299  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
300  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
301  "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
302  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
303  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
304  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
305  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
306  "mov.d %[ftmp0], %[ftmp3] \n\t"
307  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
308  "psrah %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
309  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
310  "psubh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
311  MMI_LDC1(%[ftmp6], %[block], 0x08)
312  MMI_LDC1(%[ftmp0], %[block], 0x48)
313  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
314  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
315  "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
316  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
317  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
318  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
319  "psubh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
320  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
321  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
322  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
323  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
324  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
325  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
326  "paddh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
327  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
328  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
329  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
330  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
331  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
332  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
333  MMI_SDC1(%[ftmp3], %[block], 0x08)
334  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
335  "punpckhhw %[ftmp3], %[ftmp4], %[ftmp7] \n\t"
336  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
337  "punpckhhw %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
338  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
339  "punpckhwd %[ftmp1], %[ftmp4], %[ftmp2] \n\t"
340  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
341  "punpckhwd %[ftmp2], %[ftmp3], %[ftmp7] \n\t"
342  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
343  MMI_LDC1(%[ftmp7], %[block], 0x08)
344  "dmfc1 %[tmp5], %[ftmp4] \n\t"
345  "mov.d %[ftmp10], %[ftmp1] \n\t"
346  "mov.d %[ftmp12], %[ftmp3] \n\t"
347  "mov.d %[ftmp14], %[ftmp2] \n\t"
348  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp6] \n\t"
349  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
350  "punpckhhw %[ftmp6], %[ftmp5], %[ftmp7] \n\t"
351  "punpcklhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
352  "punpckhwd %[ftmp7], %[ftmp0], %[ftmp5] \n\t"
353  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
354  "punpckhwd %[ftmp5], %[ftmp2], %[ftmp6] \n\t"
355  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
356  "dmfc1 %[tmp6], %[ftmp0] \n\t"
357  "mov.d %[ftmp11], %[ftmp7] \n\t"
358  "mov.d %[ftmp13], %[ftmp2] \n\t"
359  "mov.d %[ftmp15], %[ftmp5] \n\t"
360  PTR_ADDIU "%[addr0], %[dst], 0x04 \n\t"
361  "mov.d %[ftmp7], %[ftmp10] \n\t"
362  "dmtc1 %[tmp3], %[ftmp6] \n\t"
363  MMI_LDC1(%[ftmp1], $sp, 0x10)
364  "dmtc1 %[tmp1], %[ftmp3] \n\t"
365  "mov.d %[ftmp4], %[ftmp1] \n\t"
366  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
367  "psrah %[ftmp0], %[ftmp7], %[ftmp8] \n\t"
368  "paddh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
369  "paddh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
370  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
371  "paddh %[ftmp0], %[ftmp0], %[ftmp14] \n\t"
372  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
373  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
374  "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
375  "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
376  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
377  "paddh %[ftmp4], %[ftmp4], %[ftmp14] \n\t"
378  "psubh %[ftmp7], %[ftmp7], %[ftmp14] \n\t"
379  "psrah %[ftmp5], %[ftmp14], %[ftmp8] \n\t"
380  "psubh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
381  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
382  "mov.d %[ftmp5], %[ftmp1] \n\t"
383  "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
384  "psrah %[ftmp6], %[ftmp0], %[ftmp9] \n\t"
385  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
386  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
387  "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
388  "psrah %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
389  "psubh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
390  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
391  "mov.d %[ftmp7], %[ftmp12] \n\t"
392  "psrah %[ftmp2], %[ftmp12], %[ftmp8] \n\t"
393  "psrah %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
394  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
395  "psubh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
396  MMI_LDC1(%[ftmp3], $sp, 0x00)
397  "dmtc1 %[tmp5], %[ftmp7] \n\t"
398  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
399  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
400  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
401  "psubh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
402  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
403  "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
404  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
405  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
406  "paddh %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
407  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
408  "paddh %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
409  "paddh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
410  "psubh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
411  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
412  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
413  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
414  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
415  "paddh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
416  "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
417  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
418  MMI_SDC1(%[ftmp3], $sp, 0x00)
419  "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
420  MMI_SDC1(%[ftmp0], $sp, 0x10)
421  "dmfc1 %[tmp1], %[ftmp2] \n\t"
422  "pxor %[ftmp2], %[ftmp2], %[ftmp2] \n\t"
423  MMI_SDC1(%[ftmp2], %[block], 0x00)
424  MMI_SDC1(%[ftmp2], %[block], 0x08)
425  MMI_SDC1(%[ftmp2], %[block], 0x10)
426  MMI_SDC1(%[ftmp2], %[block], 0x18)
427  MMI_SDC1(%[ftmp2], %[block], 0x20)
428  MMI_SDC1(%[ftmp2], %[block], 0x28)
429  MMI_SDC1(%[ftmp2], %[block], 0x30)
430  MMI_SDC1(%[ftmp2], %[block], 0x38)
431  MMI_SDC1(%[ftmp2], %[block], 0x40)
432  MMI_SDC1(%[ftmp2], %[block], 0x48)
433  MMI_SDC1(%[ftmp2], %[block], 0x50)
434  MMI_SDC1(%[ftmp2], %[block], 0x58)
435  MMI_SDC1(%[ftmp2], %[block], 0x60)
436  MMI_SDC1(%[ftmp2], %[block], 0x68)
437  MMI_SDC1(%[ftmp2], %[block], 0x70)
438  MMI_SDC1(%[ftmp2], %[block], 0x78)
439  "dli %[tmp3], 0x06 \n\t"
440  "mtc1 %[tmp3], %[ftmp10] \n\t"
441  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
442  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
443  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
444  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
445  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
446  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
447  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
448  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
449  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
450  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
451  MMI_SWC1(%[ftmp3], %[dst], 0x00)
452  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
453  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
454  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
455  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
456  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
457  "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
458  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
459  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
460  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
461  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
462  "paddh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
463  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
464  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
465  MMI_SWC1(%[ftmp3], %[dst], 0x00)
466  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
467  MMI_LDC1(%[ftmp5], $sp, 0x00)
468  MMI_LDC1(%[ftmp4], $sp, 0x10)
469  "dmtc1 %[tmp1], %[ftmp6] \n\t"
470  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
471  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
472  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
473  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
474  "psrah %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
475  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
476  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
477  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
478  "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
479  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
480  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
481  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
482  MMI_SWC1(%[ftmp3], %[dst], 0x00)
483  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
484  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
485  PTR_ADDU "%[dst], %[dst], %[stride] \n\t"
486  MMI_ULWC1(%[ftmp3], %[dst], 0x00)
487  MMI_LWXC1(%[ftmp0], %[dst], %[stride], 0x00)
488  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
489  "psrah %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
490  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
491  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
492  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
493  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
494  "packushb %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
495  "packushb %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
496  MMI_SWC1(%[ftmp3], %[dst], 0x00)
497  MMI_SWXC1(%[ftmp0], %[dst], %[stride], 0x00)
498  "dmtc1 %[tmp4], %[ftmp1] \n\t"
499  "dmtc1 %[tmp2], %[ftmp6] \n\t"
500  MMI_LDC1(%[ftmp4], $sp, 0x18)
501  "mov.d %[ftmp5], %[ftmp4] \n\t"
502  "psrah %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
503  "psrah %[ftmp7], %[ftmp11], %[ftmp8] \n\t"
504  "paddh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
505  "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
506  "paddh %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
507  "paddh %[ftmp4], %[ftmp4], %[ftmp11] \n\t"
508  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
509  "paddh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
510  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
511  "psubh %[ftmp3], %[ftmp11], %[ftmp1] \n\t"
512  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
513  "paddh %[ftmp5], %[ftmp5], %[ftmp15] \n\t"
514  "psubh %[ftmp3], %[ftmp3], %[ftmp15] \n\t"
515  "psrah %[ftmp2], %[ftmp15], %[ftmp8] \n\t"
516  "psubh %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
517  "psubh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
518  "mov.d %[ftmp2], %[ftmp4] \n\t"
519  "psrah %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
520  "psrah %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
521  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
522  "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
523  "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t"
524  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
525  "psubh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
526  "psubh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
527  "mov.d %[ftmp3], %[ftmp13] \n\t"
528  "psrah %[ftmp0], %[ftmp13], %[ftmp8] \n\t"
529  "psrah %[ftmp7], %[ftmp6], %[ftmp8] \n\t"
530  "paddh %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
531  "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
532  MMI_LDC1(%[ftmp6], $sp, 0x08)
533  "dmtc1 %[tmp6], %[ftmp3] \n\t"
534  "paddh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
535  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
536  "paddh %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
537  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
538  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
539  "paddh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
540  "psubh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
541  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
542  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
543  "psubh %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
544  "paddh %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
545  "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
546  "psubh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
547  "paddh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
548  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
549  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
550  "paddh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
551  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
552  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
553  "paddh %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
554  MMI_SDC1(%[ftmp6], $sp, 0x08)
555  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
556  MMI_SDC1(%[ftmp7], $sp, 0x18)
557  "dmfc1 %[tmp2], %[ftmp0] \n\t"
558  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
559  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
560  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
561  "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
562  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
563  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
564  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
565  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
566  "paddh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
567  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
568  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
569  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
570  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
571  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
572  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
573  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
574  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
575  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
576  "psrah %[ftmp4], %[ftmp4], %[ftmp10] \n\t"
577  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
578  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
579  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
580  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
581  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
582  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
583  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
584  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
585  MMI_LDC1(%[ftmp2], $sp, 0x08)
586  MMI_LDC1(%[ftmp5], $sp, 0x18)
587  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
588  "dmtc1 %[tmp2], %[ftmp1] \n\t"
589  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
590  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
591  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
592  "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
593  "psrah %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
594  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
595  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
596  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
597  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
598  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
599  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
600  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
601  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
602  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
603  PTR_ADDU "%[addr0], %[addr0], %[stride] \n\t"
604  MMI_ULWC1(%[ftmp6], %[addr0], 0x00)
605  MMI_LWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
606  "psrah %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
607  "psrah %[ftmp1], %[ftmp1], %[ftmp10] \n\t"
608  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
609  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
610  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
611  "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
612  "packushb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
613  "packushb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
614  MMI_SWC1(%[ftmp6], %[addr0], 0x00)
615  MMI_SWXC1(%[ftmp7], %[addr0], %[stride], 0x00)
616  PTR_ADDIU "$sp, $sp, 0x20 \n\t"
617  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
618  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
619  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
620  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
621  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
622  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
623  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
624  [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
625  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
626  [tmp2]"=&r"(tmp[2]), [tmp3]"=&r"(tmp[3]),
627  [tmp4]"=&r"(tmp[4]), [tmp5]"=&r"(tmp[5]),
628  [tmp6]"=&r"(tmp[6]),
631  [addr0]"=&r"(addr[0])
632  : [dst]"r"(dst), [block]"r"(block),
633  [stride]"r"((mips_reg)stride)
634  : "memory"
635  );
636 
637 }
638 
639 void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
640 {
641  int dc = (block[0] + 32) >> 6;
642  double ftmp[6];
644 
645  block[0] = 0;
646 
647  __asm__ volatile (
648  "mtc1 %[dc], %[ftmp5] \n\t"
649  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
650  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
651  MMI_ULWC1(%[ftmp1], %[dst0], 0x00)
652  MMI_ULWC1(%[ftmp2], %[dst1], 0x00)
653  MMI_ULWC1(%[ftmp3], %[dst2], 0x00)
654  MMI_ULWC1(%[ftmp4], %[dst3], 0x00)
655  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
656  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
657  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
658  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
659  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
660  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
661  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
662  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
663  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
664  "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
665  "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
666  "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
667  MMI_SWC1(%[ftmp1], %[dst0], 0x00)
668  MMI_SWC1(%[ftmp2], %[dst1], 0x00)
669  MMI_SWC1(%[ftmp3], %[dst2], 0x00)
670  MMI_SWC1(%[ftmp4], %[dst3], 0x00)
671  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
672  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
673  [ftmp4]"=&f"(ftmp[4]),
675  [ftmp5]"=&f"(ftmp[5])
676  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
677  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
678  [dc]"r"(dc)
679  : "memory"
680  );
681 }
682 
683 void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
684 {
685  int dc = (block[0] + 32) >> 6;
686  double ftmp[10];
688 
689  block[0] = 0;
690 
691  __asm__ volatile (
692  "mtc1 %[dc], %[ftmp5] \n\t"
693  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
694  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
695  MMI_LDC1(%[ftmp1], %[dst0], 0x00)
696  MMI_LDC1(%[ftmp2], %[dst1], 0x00)
697  MMI_LDC1(%[ftmp3], %[dst2], 0x00)
698  MMI_LDC1(%[ftmp4], %[dst3], 0x00)
699  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
700  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
701  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
702  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
703  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
704  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
705  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
706  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
707  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
708  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
709  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
710  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
711  "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
712  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
713  "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
714  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
715  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
716  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
717  "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
718  "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
719  MMI_SDC1(%[ftmp1], %[dst0], 0x00)
720  MMI_SDC1(%[ftmp2], %[dst1], 0x00)
721  MMI_SDC1(%[ftmp3], %[dst2], 0x00)
722  MMI_SDC1(%[ftmp4], %[dst3], 0x00)
723 
724  MMI_LDC1(%[ftmp1], %[dst4], 0x00)
725  MMI_LDC1(%[ftmp2], %[dst5], 0x00)
726  MMI_LDC1(%[ftmp3], %[dst6], 0x00)
727  MMI_LDC1(%[ftmp4], %[dst7], 0x00)
728  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
729  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
730  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
731  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
732  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
733  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
734  "punpckhbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
735  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
736  "paddsh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
737  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
738  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
739  "paddsh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
740  "paddsh %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
741  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
742  "paddsh %[ftmp9], %[ftmp9], %[ftmp5] \n\t"
743  "paddsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
744  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
745  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
746  "packushb %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
747  "packushb %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
748  MMI_SDC1(%[ftmp1], %[dst4], 0x00)
749  MMI_SDC1(%[ftmp2], %[dst5], 0x00)
750  MMI_SDC1(%[ftmp3], %[dst6], 0x00)
751  MMI_SDC1(%[ftmp4], %[dst7], 0x00)
752  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
753  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
754  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
755  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
756  [ftmp8]"=&f"(ftmp[8]),
758  [ftmp9]"=&f"(ftmp[9])
759  : [dst0]"r"(dst), [dst1]"r"(dst+stride),
760  [dst2]"r"(dst+2*stride), [dst3]"r"(dst+3*stride),
761  [dst4]"r"(dst+4*stride), [dst5]"r"(dst+5*stride),
762  [dst6]"r"(dst+6*stride), [dst7]"r"(dst+7*stride),
763  [dc]"r"(dc)
764  : "memory"
765  );
766 }
767 
768 void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset,
769  int16_t *block, int stride,
770  const uint8_t nnzc[5 * 8])
771 {
772  int i;
773  for(i=0; i<16; i++){
774  int nnz = nnzc[ scan8[i] ];
775  if(nnz){
776  if(nnz==1 && ((int16_t*)block)[i*16])
777  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
778  stride);
779  else
780  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16,
781  stride);
782  }
783  }
784 }
785 
786 void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset,
787  int16_t *block, int stride, const uint8_t nnzc[5 * 8])
788 {
789  int i;
790  for(i=0; i<16; i++){
791  if(nnzc[ scan8[i] ])
792  ff_h264_idct_add_8_mmi(dst + block_offset[i], block + i*16, stride);
793  else if(((int16_t*)block)[i*16])
794  ff_h264_idct_dc_add_8_mmi(dst + block_offset[i], block + i*16,
795  stride);
796  }
797 }
798 
799 void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset,
800  int16_t *block, int stride, const uint8_t nnzc[5 * 8])
801 {
802  int i;
803  for(i=0; i<16; i+=4){
804  int nnz = nnzc[ scan8[i] ];
805  if(nnz){
806  if(nnz==1 && ((int16_t*)block)[i*16])
807  ff_h264_idct8_dc_add_8_mmi(dst + block_offset[i],
808  block + i*16, stride);
809  else
810  ff_h264_idct8_add_8_mmi(dst + block_offset[i], block + i*16,
811  stride);
812  }
813  }
814 }
815 
816 void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset,
817  int16_t *block, int stride, const uint8_t nnzc[15*8])
818 {
819  int i, j;
820  for(j=1; j<3; j++){
821  for(i=j*16; i<j*16+4; i++){
822  if(nnzc[ scan8[i] ])
823  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
824  block + i*16, stride);
825  else if(((int16_t*)block)[i*16])
826  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
827  block + i*16, stride);
828  }
829  }
830 }
831 
832 void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset,
833  int16_t *block, int stride, const uint8_t nnzc[15*8])
834 {
835  int i, j;
836 
837  for(j=1; j<3; j++){
838  for(i=j*16; i<j*16+4; i++){
839  if(nnzc[ scan8[i] ])
840  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i],
841  block + i*16, stride);
842  else if(((int16_t*)block)[i*16])
843  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i],
844  block + i*16, stride);
845  }
846  }
847 
848  for(j=1; j<3; j++){
849  for(i=j*16+4; i<j*16+8; i++){
850  if(nnzc[ scan8[i+4] ])
851  ff_h264_idct_add_8_mmi(dest[j-1] + block_offset[i+4],
852  block + i*16, stride);
853  else if(((int16_t*)block)[i*16])
854  ff_h264_idct_dc_add_8_mmi(dest[j-1] + block_offset[i+4],
855  block + i*16, stride);
856  }
857  }
858 }
859 
861  int qmul)
862 {
863  double ftmp[10];
864  uint64_t tmp[2];
866 
867  __asm__ volatile (
868  ".set noreorder \n\t"
869  "dli %[tmp0], 0x08 \n\t"
870  MMI_LDC1(%[ftmp3], %[input], 0x18)
871  "mtc1 %[tmp0], %[ftmp8] \n\t"
872  MMI_LDC1(%[ftmp2], %[input], 0x10)
873  "dli %[tmp0], 0x20 \n\t"
874  MMI_LDC1(%[ftmp1], %[input], 0x08)
875  "mtc1 %[tmp0], %[ftmp9] \n\t"
876  MMI_LDC1(%[ftmp0], %[input], 0x00)
877  "mov.d %[ftmp4], %[ftmp3] \n\t"
878  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
879  "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
880  "mov.d %[ftmp4], %[ftmp1] \n\t"
881  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
882  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
883  "mov.d %[ftmp4], %[ftmp3] \n\t"
884  "paddh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
885  "psubh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
886  "mov.d %[ftmp4], %[ftmp2] \n\t"
887  "paddh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
888  "psubh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
889  "mov.d %[ftmp4], %[ftmp3] \n\t"
890  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
891  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
892  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
893  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
894  "punpckhwd %[ftmp2], %[ftmp3], %[ftmp0] \n\t"
895  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
896  "mov.d %[ftmp0], %[ftmp4] \n\t"
897  "punpcklwd %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
898  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
899  "mov.d %[ftmp1], %[ftmp0] \n\t"
900  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
901  "psubh %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
902  "mov.d %[ftmp1], %[ftmp2] \n\t"
903  "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
904  "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
905  "mov.d %[ftmp1], %[ftmp0] \n\t"
906  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
907  "psubh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
908  "mov.d %[ftmp1], %[ftmp4] \n\t"
909  "daddi %[tmp0], %[qmul], -0x7fff \n\t"
910  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
911  "bgtz %[tmp0], 1f \n\t"
912  "psubh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
913  "ori %[tmp0], $0, 0x80 \n\t"
914  "dsll %[tmp0], %[tmp0], 0x10 \n\t"
915  "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
916  "daddu %[qmul], %[qmul], %[tmp0] \n\t"
917  "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
918  "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
919  "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
920  "mtc1 %[qmul], %[ftmp7] \n\t"
921  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
922  "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
923  "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
924  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
925  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
926  "psraw %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
927  "psraw %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
928  "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
929  "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
930  "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
931  "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
932  "dmfc1 %[tmp1], %[ftmp0] \n\t"
933  "ssrld %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
934  "mfc1 %[input], %[ftmp0] \n\t"
935  "sh %[tmp1], 0x00(%[output]) \n\t"
936  "sh %[input], 0x80(%[output]) \n\t"
937  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
938  PTR_SRL "%[input], %[input], 0x10 \n\t"
939  "sh %[tmp1], 0x20(%[output]) \n\t"
940  "sh %[input], 0xa0(%[output]) \n\t"
941  "dmfc1 %[tmp1], %[ftmp2] \n\t"
942  "ssrld %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
943  "mfc1 %[input], %[ftmp2] \n\t"
944  "sh %[tmp1], 0x40(%[output]) \n\t"
945  "sh %[input], 0xc0(%[output]) \n\t"
946  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
947  PTR_SRL "%[input], %[input], 0x10 \n\t"
948  "sh %[tmp1], 0x60(%[output]) \n\t"
949  "sh %[input], 0xe0(%[output]) \n\t"
950  "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
951  "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
952  "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
953  "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
954  "mtc1 %[qmul], %[ftmp7] \n\t"
955  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
956  "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
957  "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
958  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
959  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
960  "psraw %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
961  "psraw %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
962  "psraw %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
963  "psraw %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
964  "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
965  "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
966  "dmfc1 %[tmp1], %[ftmp3] \n\t"
967  "ssrld %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
968  "mfc1 %[input], %[ftmp3] \n\t"
969  "sh %[tmp1], 0x100(%[output]) \n\t"
970  "sh %[input], 0x180(%[output]) \n\t"
971  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
972  PTR_SRL "%[input], %[input], 0x10 \n\t"
973  "sh %[tmp1], 0x120(%[output]) \n\t"
974  "sh %[input], 0x1a0(%[output]) \n\t"
975  "dmfc1 %[tmp1], %[ftmp4] \n\t"
976  "ssrld %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
977  "mfc1 %[input], %[ftmp4] \n\t"
978  "sh %[tmp1], 0x140(%[output]) \n\t"
979  "sh %[input], 0x1c0(%[output]) \n\t"
980  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
981  PTR_SRL "%[input], %[input], 0x10 \n\t"
982  "sh %[tmp1], 0x160(%[output]) \n\t"
983  "j 2f \n\t"
984  "sh %[input], 0x1e0(%[output]) \n\t"
985  "1: \n\t"
986  "ori %[tmp0], $0, 0x1f \n\t"
987 #if HAVE_LOONGSON3
988  "clz %[tmp1], %[qmul] \n\t"
989 #elif HAVE_LOONGSON2
990 #endif
991  "ori %[input], $0, 0x07 \n\t"
992  "dsubu %[tmp1], %[tmp0], %[tmp1] \n\t"
993  "ori %[tmp0], $0, 0x80 \n\t"
994  "dsll %[tmp0], %[tmp0], 0x10 \n\t"
995  "daddu %[qmul], %[qmul], %[tmp0] \n\t"
996  "dsubu %[tmp0], %[tmp1], %[input] \n\t"
997  "movn %[tmp1], %[input], %[tmp0] \n\t"
998  PTR_ADDIU "%[input], %[input], 0x01 \n\t"
999  "andi %[tmp0], %[tmp1], 0xff \n\t"
1000  "srlv %[qmul], %[qmul], %[tmp0] \n\t"
1001  PTR_SUBU "%[input], %[input], %[tmp1] \n\t"
1002  "mtc1 %[input], %[ftmp6] \n\t"
1003  "punpckhhw %[ftmp1], %[ftmp0], %[ff_pw_1] \n\t"
1004  "punpcklhw %[ftmp0], %[ftmp0], %[ff_pw_1] \n\t"
1005  "punpckhhw %[ftmp5], %[ftmp2], %[ff_pw_1] \n\t"
1006  "punpcklhw %[ftmp2], %[ftmp2], %[ff_pw_1] \n\t"
1007  "mtc1 %[qmul], %[ftmp7] \n\t"
1008  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1009  "pmaddhw %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1010  "pmaddhw %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1011  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1012  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1013  "psraw %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1014  "psraw %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1015  "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1016  "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1017  "packsswh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1018  "packsswh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1019  "dmfc1 %[tmp1], %[ftmp0] \n\t"
1020  "ssrld %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1021  "sh %[tmp1], 0x00(%[output]) \n\t"
1022  "mfc1 %[input], %[ftmp0] \n\t"
1023  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1024  "sh %[input], 0x80(%[output]) \n\t"
1025  "sh %[tmp1], 0x20(%[output]) \n\t"
1026  PTR_SRL "%[input], %[input], 0x10 \n\t"
1027  "dmfc1 %[tmp1], %[ftmp2] \n\t"
1028  "sh %[input], 0xa0(%[output]) \n\t"
1029  "ssrld %[ftmp2], %[ftmp2], %[ftmp9] \n\t"
1030  "sh %[tmp1], 0x40(%[output]) \n\t"
1031  "mfc1 %[input], %[ftmp2] \n\t"
1032  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1033  "sh %[input], 0xc0(%[output]) \n\t"
1034  "sh %[tmp1], 0x60(%[output]) \n\t"
1035  PTR_SRL "%[input], %[input], 0x10 \n\t"
1036  "sh %[input], 0xe0(%[output]) \n\t"
1037  "punpckhhw %[ftmp1], %[ftmp3], %[ff_pw_1] \n\t"
1038  "punpcklhw %[ftmp3], %[ftmp3], %[ff_pw_1] \n\t"
1039  "punpckhhw %[ftmp5], %[ftmp4], %[ff_pw_1] \n\t"
1040  "punpcklhw %[ftmp4], %[ftmp4], %[ff_pw_1] \n\t"
1041  "mtc1 %[qmul], %[ftmp7] \n\t"
1042  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1043  "pmaddhw %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1044  "pmaddhw %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1045  "pmaddhw %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1046  "pmaddhw %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1047  "psraw %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1048  "psraw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1049  "psraw %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1050  "psraw %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1051  "packsswh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1052  "packsswh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1053  "dmfc1 %[tmp1], %[ftmp3] \n\t"
1054  "ssrld %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1055  "mfc1 %[input], %[ftmp3] \n\t"
1056  "sh %[tmp1], 0x100(%[output]) \n\t"
1057  "sh %[input], 0x180(%[output]) \n\t"
1058  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1059  PTR_SRL "%[input], %[input], 0x10 \n\t"
1060  "sh %[tmp1], 0x120(%[output]) \n\t"
1061  "sh %[input], 0x1a0(%[output]) \n\t"
1062  "dmfc1 %[tmp1], %[ftmp4] \n\t"
1063  "ssrld %[ftmp4], %[ftmp4], %[ftmp9] \n\t"
1064  "mfc1 %[input], %[ftmp4] \n\t"
1065  "sh %[tmp1], 0x140(%[output]) \n\t"
1066  "sh %[input], 0x1c0(%[output]) \n\t"
1067  "dsrl %[tmp1], %[tmp1], 0x10 \n\t"
1068  PTR_SRL "%[input], %[input], 0x10 \n\t"
1069  "sh %[tmp1], 0x160(%[output]) \n\t"
1070  "sh %[input], 0x1e0(%[output]) \n\t"
1071  "2: \n\t"
1072  ".set reorder \n\t"
1073  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1074  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1075  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1076  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1077  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1078  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
1080  [output]"+&r"(output), [input]"+&r"(input),
1081  [qmul]"+&r"(qmul)
1082  : [ff_pw_1]"f"(ff_pw_1.f)
1083  : "memory"
1084  );
1085 }
1086 
1088 {
1089  int temp[8];
1090  int t[8];
1091 
1092  temp[0] = block[0] + block[16];
1093  temp[1] = block[0] - block[16];
1094  temp[2] = block[32] + block[48];
1095  temp[3] = block[32] - block[48];
1096  temp[4] = block[64] + block[80];
1097  temp[5] = block[64] - block[80];
1098  temp[6] = block[96] + block[112];
1099  temp[7] = block[96] - block[112];
1100 
1101  t[0] = temp[0] + temp[4] + temp[2] + temp[6];
1102  t[1] = temp[0] - temp[4] + temp[2] - temp[6];
1103  t[2] = temp[0] - temp[4] - temp[2] + temp[6];
1104  t[3] = temp[0] + temp[4] - temp[2] - temp[6];
1105  t[4] = temp[1] + temp[5] + temp[3] + temp[7];
1106  t[5] = temp[1] - temp[5] + temp[3] - temp[7];
1107  t[6] = temp[1] - temp[5] - temp[3] + temp[7];
1108  t[7] = temp[1] + temp[5] - temp[3] - temp[7];
1109 
1110  block[ 0]= (t[0]*qmul + 128) >> 8;
1111  block[ 32]= (t[1]*qmul + 128) >> 8;
1112  block[ 64]= (t[2]*qmul + 128) >> 8;
1113  block[ 96]= (t[3]*qmul + 128) >> 8;
1114  block[ 16]= (t[4]*qmul + 128) >> 8;
1115  block[ 48]= (t[5]*qmul + 128) >> 8;
1116  block[ 80]= (t[6]*qmul + 128) >> 8;
1117  block[112]= (t[7]*qmul + 128) >> 8;
1118 }
1119 
1121 {
1122  int a,b,c,d;
1123 
1124  d = block[0] - block[16];
1125  a = block[0] + block[16];
1126  b = block[32] - block[48];
1127  c = block[32] + block[48];
1128  block[0] = ((a+c)*qmul) >> 7;
1129  block[16]= ((d+b)*qmul) >> 7;
1130  block[32]= ((a-c)*qmul) >> 7;
1131  block[48]= ((d-b)*qmul) >> 7;
1132 }
1133 
1134 void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1135  int log2_denom, int weight, int offset)
1136 {
1137  int y;
1138  double ftmp[8];
1140 
1141  offset <<= log2_denom;
1142 
1143  if (log2_denom)
1144  offset += 1 << (log2_denom - 1);
1145 
1146  for (y=0; y<height; y++, block+=stride) {
1147  __asm__ volatile (
1148  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1149  MMI_LDC1(%[ftmp1], %[block0], 0x00)
1150  MMI_LDC1(%[ftmp2], %[block1], 0x00)
1151  "mtc1 %[weight], %[ftmp3] \n\t"
1152  "mtc1 %[offset], %[ftmp4] \n\t"
1153  "mtc1 %[log2_denom], %[ftmp5] \n\t"
1154  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1155  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1156  "punpckhbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
1157  "punpckhbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
1158  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1159  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1160  "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1161  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1162  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1163  "pmullh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1164  "paddsh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1165  "paddsh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1166  "paddsh %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1167  "paddsh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1168  "psrah %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1169  "psrah %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1170  "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1171  "psrah %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1172  "packushb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1173  "packushb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1174  MMI_SDC1(%[ftmp1], %[block0], 0x00)
1175  MMI_SDC1(%[ftmp2], %[block1], 0x00)
1176  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1177  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1178  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1179  [ftmp6]"=&f"(ftmp[6]),
1181  [ftmp7]"=&f"(ftmp[7])
1182  : [block0]"r"(block), [block1]"r"(block+8),
1183  [weight]"r"(weight), [offset]"r"(offset),
1184  [log2_denom]"r"(log2_denom)
1185  : "memory"
1186  );
1187  }
1188 }
1189 
1190 void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src,
1191  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1192  int offset)
1193 {
1194  int y;
1195  double ftmp[9];
1197 
1198  offset = ((offset + 1) | 1) << log2_denom;
1199 
1200  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1201  __asm__ volatile (
1202  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1203  MMI_LDC1(%[ftmp1], %[src0], 0x00)
1204  MMI_LDC1(%[ftmp2], %[dst0], 0x00)
1205  "mtc1 %[weights], %[ftmp3] \n\t"
1206  "mtc1 %[weightd], %[ftmp4] \n\t"
1207  "mtc1 %[offset], %[ftmp5] \n\t"
1208  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1209  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1210  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1211  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1212  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1213  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1214  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1215  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1216  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1217  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1218  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1219  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1220  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1221  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1222  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1223  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1224  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1225  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1226  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1227  MMI_SDC1(%[ftmp1], %[dst0], 0x00)
1228  MMI_LDC1(%[ftmp1], %[src1], 0x00)
1229  MMI_LDC1(%[ftmp2], %[dst1], 0x00)
1230  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1231  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1232  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1233  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1234  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1235  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1236  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1237  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1238  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1239  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1240  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1241  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1242  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1243  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1244  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1245  MMI_SDC1(%[ftmp1], %[dst1], 0x00)
1246  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1247  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1248  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1249  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1251  [ftmp8]"=&f"(ftmp[8])
1252  : [dst0]"r"(dst), [dst1]"r"(dst+8),
1253  [src0]"r"(src), [src1]"r"(src+8),
1254  [weights]"r"(weights), [weightd]"r"(weightd),
1255  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1256  : "memory"
1257  );
1258  }
1259 }
1260 
1261 void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1262  int log2_denom, int weight, int offset)
1263 {
1264  int y;
1265  double ftmp[6];
1267 
1268  offset <<= log2_denom;
1269 
1270  if (log2_denom)
1271  offset += 1 << (log2_denom - 1);
1272 
1273  for (y=0; y<height; y++, block+=stride) {
1274  __asm__ volatile (
1275  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1276  MMI_LDC1(%[ftmp1], %[block], 0x00)
1277  "mtc1 %[weight], %[ftmp2] \n\t"
1278  "mtc1 %[offset], %[ftmp3] \n\t"
1279  "mtc1 %[log2_denom], %[ftmp5] \n\t"
1280  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1281  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1282  "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t"
1283  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1284  "pmullh %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
1285  "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1286  "paddsh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1287  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1288  "psrah %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1289  "psrah %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1290  "packushb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1291  MMI_SDC1(%[ftmp1], %[block], 0x00)
1292  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1293  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1294  [ftmp4]"=&f"(ftmp[4]),
1296  [ftmp5]"=&f"(ftmp[5])
1297  : [block]"r"(block), [weight]"r"(weight),
1298  [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1299  : "memory"
1300  );
1301  }
1302 }
1303 
1304 void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src,
1305  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1306  int offset)
1307 {
1308  int y;
1309  double ftmp[9];
1311 
1312  offset = ((offset + 1) | 1) << log2_denom;
1313 
1314  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1315  __asm__ volatile (
1316  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1317  MMI_LDC1(%[ftmp1], %[src], 0x00)
1318  MMI_LDC1(%[ftmp2], %[dst], 0x00)
1319  "mtc1 %[weights], %[ftmp3] \n\t"
1320  "mtc1 %[weightd], %[ftmp4] \n\t"
1321  "mtc1 %[offset], %[ftmp5] \n\t"
1322  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1323  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1324  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1325  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1326  "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
1327  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
1328  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1329  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1330  "pmullh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1331  "pmullh %[ftmp8], %[ftmp8], %[ftmp4] \n\t"
1332  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1333  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1334  "paddsh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1335  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1336  "paddsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1337  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1338  "psrah %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1339  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1340  "packushb %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1341  MMI_SDC1(%[ftmp1], %[dst], 0x00)
1342  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1343  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1344  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1345  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1347  [ftmp8]"=&f"(ftmp[8])
1348  : [dst]"r"(dst), [src]"r"(src),
1349  [weights]"r"(weights), [weightd]"r"(weightd),
1350  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1351  : "memory"
1352  );
1353  }
1354 }
1355 
1356 void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height,
1357  int log2_denom, int weight, int offset)
1358 {
1359  int y;
1360  double ftmp[5];
1362 
1363  offset <<= log2_denom;
1364 
1365  if (log2_denom)
1366  offset += 1 << (log2_denom - 1);
1367 
1368  for (y=0; y<height; y++, block+=stride) {
1369  __asm__ volatile (
1370  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1371  MMI_ULWC1(%[ftmp1], %[block], 0x00)
1372  "mtc1 %[weight], %[ftmp2] \n\t"
1373  "mtc1 %[offset], %[ftmp3] \n\t"
1374  "mtc1 %[log2_denom], %[ftmp4] \n\t"
1375  "pshufh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1376  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1377  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1378  "pmullh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1379  "paddsh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1380  "psrah %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
1381  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1382  MMI_SWC1(%[ftmp1], %[block], 0x00)
1383  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1384  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1386  [ftmp4]"=&f"(ftmp[4])
1387  : [block]"r"(block), [weight]"r"(weight),
1388  [offset]"r"(offset), [log2_denom]"r"(log2_denom)
1389  : "memory"
1390  );
1391  }
1392 }
1393 
1394 void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src,
1395  ptrdiff_t stride, int height, int log2_denom, int weightd, int weights,
1396  int offset)
1397 {
1398  int y;
1399  double ftmp[7];
1401 
1402  offset = ((offset + 1) | 1) << log2_denom;
1403 
1404  for (y=0; y<height; y++, dst+=stride, src+=stride) {
1405  __asm__ volatile (
1406  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1407  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1408  MMI_ULWC1(%[ftmp2], %[dst], 0x00)
1409  "mtc1 %[weight], %[ftmp3] \n\t"
1410  "mtc1 %[weightd], %[ftmp4] \n\t"
1411  "mtc1 %[offset], %[ftmp5] \n\t"
1412  "mtc1 %[log2_denom], %[ftmp6] \n\t"
1413  "pshufh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1414  "pshufh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1415  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1416  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1417  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1418  "pmullh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1419  "pmullh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1420  "paddsh %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1421  "paddsh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1422  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1423  "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1424  MMI_SWC1(%[ftmp1], %[dst], 0x00)
1425  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1426  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1427  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1429  [ftmp6]"=&f"(ftmp[6])
1430  : [dst]"r"(dst), [src]"r"(src),
1431  [weight]"r"(weights), [weightd]"r"(weightd),
1432  [offset]"r"(offset), [log2_denom]"r"(log2_denom+1)
1433  : "memory"
1434  );
1435  }
1436 }
1437 
1438 void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1439  int8_t *tc0)
1440 {
1441  double ftmp[12];
1442  mips_reg addr[2];
1446 
1447  __asm__ volatile (
1448  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1449  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1450  PTR_ADDU "%[addr1], %[stride], %[addr0] \n\t"
1451  "addi %[alpha], %[alpha], -0x01 \n\t"
1452  PTR_SUBU "%[addr1], $0, %[addr1] \n\t"
1453  "addi %[beta], %[beta], -0x01 \n\t"
1454  PTR_ADDU "%[addr1], %[addr1], %[pix] \n\t"
1455  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1456  MMI_LDXC1(%[ftmp1], %[addr1], %[stride], 0x00)
1457  MMI_LDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1458  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1459  "mtc1 %[alpha], %[ftmp5] \n\t"
1460  "mtc1 %[beta], %[ftmp6] \n\t"
1461  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1462  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1463  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1464  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1465  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1466  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1467  "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1468  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1469  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1470  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1471  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1472  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1473  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1474  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1475  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1476  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1477  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1478  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1479  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1480  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
1481  MMI_ULWC1(%[ftmp5], %[tc0], 0x00)
1482  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1483  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp5] \n\t"
1484  "pcmpgtb %[ftmp5], %[ftmp9], %[ftmp4] \n\t"
1485  MMI_LDC1(%[ftmp4], %[addr1], 0x00)
1486  "pand %[ftmp10], %[ftmp5], %[ftmp8] \n\t"
1487  "psubusb %[ftmp8], %[ftmp4], %[ftmp2] \n\t"
1488  "psubusb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1489  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1490  "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1491  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1492  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1493  "pand %[ftmp5], %[ftmp10], %[ftmp9] \n\t"
1494  "psubb %[ftmp8], %[ftmp5], %[ftmp7] \n\t"
1495  "pand %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1496  "pavgb %[ftmp5], %[ftmp2], %[ftmp3] \n\t"
1497  MMI_LDC1(%[ftmp11], %[addr1], 0x00)
1498  "pavgb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1499  "pxor %[ftmp5], %[ftmp5], %[ftmp11] \n\t"
1500  "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1501  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1502  "psubusb %[ftmp5], %[ftmp1], %[ftmp7] \n\t"
1503  "paddusb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1504  "pmaxub %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1505  "pminub %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1506  MMI_SDXC1(%[ftmp4], %[addr1], %[stride], 0x00)
1507  MMI_LDXC1(%[ftmp5], %[pix], %[addr0], 0x00)
1508  "psubusb %[ftmp4], %[ftmp5], %[ftmp3] \n\t"
1509  "psubusb %[ftmp7], %[ftmp3], %[ftmp5] \n\t"
1510  "psubusb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1511  "psubusb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1512  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1513  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1514  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1515  "pand %[ftmp6], %[ftmp9], %[ftmp7] \n\t"
1516  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1517  "pavgb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1518  MMI_LDXC1(%[ftmp11], %[pix], %[addr0], 0x00)
1519  "pavgb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1520  "pxor %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1521  "pand %[ftmp7], %[ftmp7], %[ff_pb_1] \n\t"
1522  "psubusb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1523  "psubusb %[ftmp7], %[ftmp4], %[ftmp6] \n\t"
1524  "paddusb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1525  "pmaxub %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1526  "pminub %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1527  MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1528  "pxor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1529  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1530  "pand %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1531  "pxor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1532  "pxor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1533  "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1534  "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1535  "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1536  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1537  "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1538  "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1539  "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1540  "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1541  "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1542  "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1543  "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1544  "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1545  "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1546  MMI_SDXC1(%[ftmp2], %[addr1], %[addr0], 0x00)
1547  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1548  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1549  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1550  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1551  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1552  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1553  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1557  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1])
1558  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1559  [alpha]"r"((mips_reg)alpha), [beta]"r"((mips_reg)beta),
1560  [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
1561  [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
1562  : "memory"
1563  );
1564 }
1565 
1566 static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1567  int beta)
1568 {
1569  DECLARE_ALIGNED(8, const uint64_t, stack[0x0a]);
1570  double ftmp[16];
1571  uint64_t tmp[1];
1572  mips_reg addr[3];
1575 
1576  __asm__ volatile (
1577  "ori %[tmp0], $0, 0x01 \n\t"
1578  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1579  "mtc1 %[tmp0], %[ftmp9] \n\t"
1580  PTR_SLL "%[addr0], %[stride], 0x02 \n\t"
1581  PTR_ADDU "%[addr2], %[stride], %[stride] \n\t"
1582  PTR_ADDIU "%[alpha], %[alpha], -0x01 \n\t"
1583  "sslld %[ftmp11], %[ftmp9], %[ftmp9] \n\t"
1584  "bltz %[alpha], 1f \n\t"
1585  PTR_ADDU "%[addr1], %[addr2], %[stride] \n\t"
1586  PTR_ADDIU "%[beta], %[beta], -0x01 \n\t"
1587  "bltz %[beta], 1f \n\t"
1588  PTR_SUBU "%[addr0], $0, %[addr0] \n\t"
1589  PTR_ADDU "%[addr0], %[addr0], %[pix] \n\t"
1590  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1591  MMI_LDXC1(%[ftmp1], %[addr0], %[addr2], 0x00)
1592  MMI_LDXC1(%[ftmp2], %[addr0], %[addr1], 0x00)
1593  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1594  "mtc1 %[alpha], %[ftmp5] \n\t"
1595  "mtc1 %[beta], %[ftmp6] \n\t"
1596  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1597  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1598  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1599  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1600  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1601  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1602  "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1603  MMI_SDC1(%[ftmp5], %[stack], 0x10)
1604  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1605  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1606  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1607  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1608  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1609  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1610  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1611  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1612  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1613  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1614  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1615  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1616  MMI_LDC1(%[ftmp5], %[stack], 0x10)
1617  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1618  "ldc1 %[ftmp10], %[ff_pb_1] \n\t"
1619  MMI_SDC1(%[ftmp8], %[stack], 0x20)
1620  "pavgb %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1621  "psubusb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1622  "pavgb %[ftmp5], %[ftmp5], %[ftmp10] \n\t"
1623  "psubusb %[ftmp7], %[ftmp2], %[ftmp3] \n\t"
1624  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1625  "psubusb %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1626  MMI_LDC1(%[ftmp15], %[stack], 0x20)
1627  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1628  "pand %[ftmp7], %[ftmp7], %[ftmp15] \n\t"
1629  MMI_LDXC1(%[ftmp15], %[addr0], %[stride], 0x00)
1630  "psubusb %[ftmp8], %[ftmp15], %[ftmp2] \n\t"
1631  "psubusb %[ftmp5], %[ftmp2], %[ftmp15] \n\t"
1632  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1633  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1634  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1635  "pand %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1636  MMI_LDXC1(%[ftmp14], %[pix], %[addr2], 0x00)
1637  MMI_SDC1(%[ftmp5], %[stack], 0x30)
1638  "psubusb %[ftmp8], %[ftmp14], %[ftmp3] \n\t"
1639  "psubusb %[ftmp5], %[ftmp3], %[ftmp14] \n\t"
1640  "psubusb %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1641  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1642  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1643  "pand %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1644  MMI_SDC1(%[ftmp5], %[stack], 0x40)
1645  "pavgb %[ftmp5], %[ftmp15], %[ftmp1] \n\t"
1646  "pavgb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1647  "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1648  MMI_SDC1(%[ftmp6], %[stack], 0x10)
1649  "paddb %[ftmp7], %[ftmp15], %[ftmp1] \n\t"
1650  "paddb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1651  "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1652  "mov.d %[ftmp8], %[ftmp7] \n\t"
1653  MMI_SDC1(%[ftmp7], %[stack], 0x00)
1654  "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1655  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1656  "pxor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1657  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1658  "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1659  "pavgb %[ftmp6], %[ftmp15], %[ftmp4] \n\t"
1660  "psubb %[ftmp7], %[ftmp15], %[ftmp4] \n\t"
1661  "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1662  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1663  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1664  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1665  MMI_LDC1(%[ftmp13], %[stack], 0x10)
1666  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1667  "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1668  "pavgb %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1669  "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1670  "pxor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1671  "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1672  "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1673  "pxor %[ftmp8], %[ftmp2], %[ftmp4] \n\t"
1674  "pavgb %[ftmp7], %[ftmp2], %[ftmp4] \n\t"
1675  "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1676  "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1677  MMI_LDC1(%[ftmp13], %[stack], 0x30)
1678  "pavgb %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
1679  MMI_LDC1(%[ftmp12], %[stack], 0x20)
1680  "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1681  "pxor %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
1682  "pand %[ftmp6], %[ftmp6], %[ftmp13] \n\t"
1683  "pand %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1684  "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1685  "pxor %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1686  MMI_SDXC1(%[ftmp6], %[addr0], %[addr1], 0x00)
1687  MMI_LDC1(%[ftmp6], %[addr0], 0x00)
1688  "paddb %[ftmp7], %[ftmp15], %[ftmp6] \n\t"
1689  "pavgb %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1690  MMI_LDC1(%[ftmp12], %[stack], 0x00)
1691  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1692  "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1693  "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1694  "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1695  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1696  "pxor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1697  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1698  MMI_LDC1(%[ftmp12], %[stack], 0x30)
1699  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1700  "pxor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1701  "pxor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1702  "pand %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1703  "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1704  "pxor %[ftmp5], %[ftmp5], %[ftmp1] \n\t"
1705  "pxor %[ftmp6], %[ftmp6], %[ftmp15] \n\t"
1706  MMI_SDXC1(%[ftmp5], %[addr0], %[addr2], 0x00)
1707  MMI_SDXC1(%[ftmp6], %[addr0], %[stride], 0x00)
1708  "pavgb %[ftmp5], %[ftmp14], %[ftmp4] \n\t"
1709  "pavgb %[ftmp6], %[ftmp3], %[ftmp2] \n\t"
1710  "pavgb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1711  MMI_SDC1(%[ftmp6], %[stack], 0x10)
1712  "paddb %[ftmp7], %[ftmp14], %[ftmp4] \n\t"
1713  "paddb %[ftmp8], %[ftmp3], %[ftmp2] \n\t"
1714  "paddb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1715  "mov.d %[ftmp8], %[ftmp7] \n\t"
1716  MMI_SDC1(%[ftmp7], %[stack], 0x00)
1717  "psrlh %[ftmp7], %[ftmp7], %[ftmp9] \n\t"
1718  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1719  "pxor %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
1720  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1721  "psubb %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1722  "pavgb %[ftmp6], %[ftmp14], %[ftmp1] \n\t"
1723  "paddb %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
1724  "psubb %[ftmp7], %[ftmp14], %[ftmp1] \n\t"
1725  "psubb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1726  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1727  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1728  MMI_LDC1(%[ftmp12], %[stack], 0x10)
1729  "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1730  "pavgb %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1731  "psrlh %[ftmp8], %[ftmp8], %[ftmp11] \n\t"
1732  "pavgb %[ftmp8], %[ftmp8], %[ftmp0] \n\t"
1733  "pxor %[ftmp8], %[ftmp8], %[ftmp6] \n\t"
1734  "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1735  "psubb %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1736  "pxor %[ftmp8], %[ftmp3], %[ftmp1] \n\t"
1737  "pavgb %[ftmp7], %[ftmp3], %[ftmp1] \n\t"
1738  "pand %[ftmp8], %[ftmp8], %[ftmp10] \n\t"
1739  MMI_LDC1(%[ftmp12], %[stack], 0x40)
1740  "psubb %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1741  MMI_LDC1(%[ftmp13], %[stack], 0x20)
1742  "pavgb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
1743  "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1744  "pxor %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
1745  "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1746  "pand %[ftmp7], %[ftmp7], %[ftmp13] \n\t"
1747  "pxor %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1748  "pxor %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1749  MMI_SDC1(%[ftmp6], %[pix], 0x00)
1750  MMI_LDXC1(%[ftmp6], %[pix], %[addr1], 0x00)
1751  "paddb %[ftmp7], %[ftmp14], %[ftmp6] \n\t"
1752  "pavgb %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1753  MMI_LDC1(%[ftmp12], %[stack], 0x00)
1754  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1755  "paddb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1756  "paddb %[ftmp7], %[ftmp7], %[ftmp12] \n\t"
1757  "psrlh %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
1758  "pavgb %[ftmp7], %[ftmp7], %[ftmp0] \n\t"
1759  "pxor %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
1760  "pand %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
1761  MMI_LDC1(%[ftmp12], %[stack], 0x40)
1762  "psubb %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
1763  "pxor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1764  "pxor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1765  "pand %[ftmp5], %[ftmp5], %[ftmp12] \n\t"
1766  "pand %[ftmp6], %[ftmp6], %[ftmp12] \n\t"
1767  "pxor %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1768  "pxor %[ftmp6], %[ftmp6], %[ftmp14] \n\t"
1769  MMI_SDXC1(%[ftmp5], %[pix], %[stride], 0x00)
1770  MMI_SDXC1(%[ftmp6], %[pix], %[addr2], 0x00)
1771  "1: \n\t"
1772  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1773  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1774  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1775  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1776  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1777  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
1778  [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]),
1779  [ftmp14]"=&f"(ftmp[14]), [ftmp15]"=&f"(ftmp[15]),
1780  [tmp0]"=&r"(tmp[0]),
1783  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
1784  [addr2]"=&r"(addr[2]),
1785  [alpha]"+&r"(alpha), [beta]"+&r"(beta)
1786  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1787  [stack]"r"(stack), [ff_pb_1]"m"(ff_pb_1)
1788  : "memory"
1789  );
1790 }
1791 
1792 void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1793  int beta, int8_t *tc0)
1794 {
1795  double ftmp[9];
1796  mips_reg addr[1];
1800 
1801  __asm__ volatile (
1802  "addi %[alpha], %[alpha], -0x01 \n\t"
1803  "addi %[beta], %[beta], -0x01 \n\t"
1804  "or %[addr0], $0, %[pix] \n\t"
1805  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1806  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1807  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1808  MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1809  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1810  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1811 
1812  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1813  "mtc1 %[alpha], %[ftmp5] \n\t"
1814  "mtc1 %[beta], %[ftmp6] \n\t"
1815  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1816  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1817  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1818  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1819  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1820  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1821  "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1822  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1823  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1824  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1825  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1826  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1827  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1828  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1829  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1830  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1831  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1832  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1833  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1834  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1835  MMI_ULWC1(%[ftmp7], %[tc0], 0x00)
1836  "punpcklbh %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1837  "pand %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1838  "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1839  "pxor %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1840  "pxor %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1841  "pand %[ftmp6], %[ftmp6], %[ff_pb_1] \n\t"
1842  "pavgb %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
1843  "pxor %[ftmp5], %[ftmp5], %[ftmp2] \n\t"
1844  "pavgb %[ftmp4], %[ftmp4], %[ff_pb_3] \n\t"
1845  "pavgb %[ftmp5], %[ftmp5], %[ftmp3] \n\t"
1846  "pavgb %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1847  "paddusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1848  "psubusb %[ftmp7], %[ff_pb_A1], %[ftmp4] \n\t"
1849  "psubusb %[ftmp4], %[ftmp4], %[ff_pb_A1] \n\t"
1850  "pminub %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1851  "pminub %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
1852  "psubusb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1853  "psubusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1854  "paddusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1855  "paddusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1856 
1857  MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1858  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1859  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1860  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1861  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1862  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1863  [ftmp8]"=&f"(ftmp[8]),
1867  [addr0]"=&r"(addr[0])
1868  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1869  [alpha]"r"(alpha), [beta]"r"(beta),
1870  [tc0]"r"(tc0), [ff_pb_1]"f"(ff_pb_1.f),
1871  [ff_pb_3]"f"(ff_pb_3.f), [ff_pb_A1]"f"(ff_pb_A1.f)
1872  : "memory"
1873  );
1874 }
1875 
1876 void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
1877  int beta)
1878 {
1879  double ftmp[9];
1880  mips_reg addr[1];
1883 
1884  __asm__ volatile (
1885  "addi %[alpha], %[alpha], -0x01 \n\t"
1886  "addi %[beta], %[beta], -0x01 \n\t"
1887  "or %[addr0], $0, %[pix] \n\t"
1888  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1889  PTR_SUBU "%[addr0], %[addr0], %[stride] \n\t"
1890  MMI_LDC1(%[ftmp1], %[addr0], 0x00)
1891  MMI_LDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1892  MMI_LDC1(%[ftmp3], %[pix], 0x00)
1893  MMI_LDXC1(%[ftmp4], %[pix], %[stride], 0x00)
1894 
1895  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1896  "mtc1 %[alpha], %[ftmp5] \n\t"
1897  "mtc1 %[beta], %[ftmp6] \n\t"
1898  "pshufh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1899  "pshufh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1900  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
1901  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1902  "psubusb %[ftmp7], %[ftmp3], %[ftmp2] \n\t"
1903  "psubusb %[ftmp8], %[ftmp2], %[ftmp3] \n\t"
1904  "por %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1905  "psubusb %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1906  "psubusb %[ftmp7], %[ftmp2], %[ftmp1] \n\t"
1907  "psubusb %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
1908  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1909  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1910  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1911  "psubusb %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1912  "psubusb %[ftmp5], %[ftmp4], %[ftmp3] \n\t"
1913  "por %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1914  "psubusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1915  "por %[ftmp8], %[ftmp8], %[ftmp5] \n\t"
1916  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1917  "pcmpeqb %[ftmp8], %[ftmp8], %[ftmp7] \n\t"
1918  "mov.d %[ftmp6], %[ftmp2] \n\t"
1919  "mov.d %[ftmp7], %[ftmp3] \n\t"
1920  "pxor %[ftmp5], %[ftmp2], %[ftmp4] \n\t"
1921  "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1922  "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1923  "psubusb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1924  "pavgb %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1925  "pxor %[ftmp5], %[ftmp3], %[ftmp1] \n\t"
1926  "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
1927  "pavgb %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
1928  "psubusb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1929  "pavgb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1930  "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1931  "psubb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1932  "pand %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1933  "pand %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1934  "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1935  "paddb %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1936 
1937  MMI_SDXC1(%[ftmp2], %[addr0], %[stride], 0x00)
1938  MMI_SDC1(%[ftmp3], %[pix], 0x00)
1939  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1940  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1941  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1942  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1943  [ftmp8]"=&f"(ftmp[8]),
1946  [addr0]"=&r"(addr[0])
1947  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
1948  [alpha]"r"(alpha), [beta]"r"(beta),
1949  [ff_pb_1]"f"(ff_pb_1.f)
1950  : "memory"
1951  );
1952 }
1953 
1954 void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
1955  int8_t *tc0)
1956 {
1957  double ftmp[11];
1958  mips_reg addr[6];
1960 
1961  __asm__ volatile (
1962  "addi %[alpha], %[alpha], -0x01 \n\t"
1963  "addi %[beta], %[beta], -0x01 \n\t"
1964  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
1965  PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
1966  PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
1967  PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
1968  "or %[addr5], $0, %[pix] \n\t"
1969  PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
1970  MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
1971  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
1972  MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
1973  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
1974  MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
1975  MMI_ULWC1(%[ftmp3], %[pix], 0x00)
1976  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1977  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1978  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
1979  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
1980  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1981  MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
1982  PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
1983  MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
1984  PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
1985  MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
1986  PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
1987  MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
1988  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1989  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1990  "mov.d %[ftmp6], %[ftmp4] \n\t"
1991  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
1992  "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1993  "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
1994  "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
1995  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1996  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1997  "mov.d %[ftmp9], %[ftmp0] \n\t"
1998  "mov.d %[ftmp10], %[ftmp3] \n\t"
1999 
2000  "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2001  "mtc1 %[alpha], %[ftmp4] \n\t"
2002  "mtc1 %[beta], %[ftmp5] \n\t"
2003  "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2004  "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2005  "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2006  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2007  "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2008  "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2009  "por %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2010  "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2011  "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2012  "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2013  "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2014  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2015  "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2016  "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2017  "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2018  "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2019  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2020  "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2021  "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2022  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2023  MMI_ULWC1(%[ftmp6], %[tc0], 0x00)
2024  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2025  "pand %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2026  "pcmpeqb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2027  "pxor %[ftmp5], %[ftmp1], %[ftmp2] \n\t"
2028  "pxor %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2029  "pand %[ftmp5], %[ftmp5], %[ff_pb_1] \n\t"
2030  "pavgb %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
2031  "pxor %[ftmp4], %[ftmp4], %[ftmp1] \n\t"
2032  "pavgb %[ftmp3], %[ftmp3], %[ff_pb_3] \n\t"
2033  "pavgb %[ftmp4], %[ftmp4], %[ftmp2] \n\t"
2034  "pavgb %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2035  "paddusb %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2036  "psubusb %[ftmp6], %[ff_pb_A1], %[ftmp3] \n\t"
2037  "psubusb %[ftmp3], %[ftmp3], %[ff_pb_A1] \n\t"
2038  "pminub %[ftmp6], %[ftmp6], %[ftmp7] \n\t"
2039  "pminub %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2040  "psubusb %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2041  "psubusb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2042  "paddusb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2043  "paddusb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2044 
2045  "punpckhwd %[ftmp4], %[ftmp9], %[ftmp9] \n\t"
2046  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2047  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2048  "punpcklbh %[ftmp0], %[ftmp9], %[ftmp1] \n\t"
2049  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp10] \n\t"
2050  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2051  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2052  MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2053  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2054  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2055  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2056  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2057  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2058  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2059  "punpckhwd %[ftmp3], %[ftmp10], %[ftmp10] \n\t"
2060  MMI_USWC1(%[ftmp0], %[pix], 0x00)
2061  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2062  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2063  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2064  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2065  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2066  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2067  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2068  PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2069  PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2070  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2071  MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2072  PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2073  "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2074  MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2075  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2076  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2077  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2078  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2079  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2080  [ftmp10]"=&f"(ftmp[10]),
2082  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2083  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2084  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2085  [pix]"+&r"(pix)
2086  : [alpha]"r"(alpha), [beta]"r"(beta),
2087  [stride]"r"((mips_reg)stride), [tc0]"r"(tc0),
2088  [ff_pb_1]"f"(ff_pb_1.f), [ff_pb_3]"f"(ff_pb_3.f),
2089  [ff_pb_A1]"f"(ff_pb_A1.f)
2090  : "memory"
2091  );
2092 }
2093 
2094 void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2095  int beta)
2096 {
2097  double ftmp[11];
2098  mips_reg addr[6];
2100 
2101  __asm__ volatile (
2102  "addi %[alpha], %[alpha], -0x01 \n\t"
2103  "addi %[beta], %[beta], -0x01 \n\t"
2104  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2105  PTR_ADDI "%[pix], %[pix], -0x02 \n\t"
2106  PTR_ADDU "%[addr1], %[addr0], %[stride] \n\t"
2107  PTR_ADDU "%[addr2], %[addr0], %[addr0] \n\t"
2108  "or %[addr5], $0, %[pix] \n\t"
2109  PTR_ADDU "%[pix], %[pix], %[addr1] \n\t"
2110  MMI_ULWC1(%[ftmp0], %[addr5], 0x00)
2111  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2112  MMI_ULWC1(%[ftmp2], %[addr3], 0x00)
2113  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2114  MMI_ULWC1(%[ftmp1], %[addr4], 0x00)
2115  MMI_ULWC1(%[ftmp3], %[pix], 0x00)
2116  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2117  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2118  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2119  "punpckhhw %[ftmp2], %[ftmp0], %[ftmp1] \n\t"
2120  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2121  MMI_ULWC1(%[ftmp4], %[addr3], 0x00)
2122  PTR_ADDU "%[addr4], %[pix], %[addr0] \n\t"
2123  MMI_ULWC1(%[ftmp6], %[addr4], 0x00)
2124  PTR_ADDU "%[addr3], %[pix], %[addr1] \n\t"
2125  MMI_ULWC1(%[ftmp5], %[addr3], 0x00)
2126  PTR_ADDU "%[addr4], %[pix], %[addr2] \n\t"
2127  MMI_ULWC1(%[ftmp7], %[addr4], 0x00)
2128  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2129  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
2130  "mov.d %[ftmp6], %[ftmp4] \n\t"
2131  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2132  "punpckhhw %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
2133  "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t"
2134  "punpckhwd %[ftmp3], %[ftmp2], %[ftmp6] \n\t"
2135  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2136  "punpcklwd %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2137 
2138  "pxor %[ftmp8], %[ftmp8], %[ftmp8] \n\t"
2139  "mtc1 %[alpha], %[ftmp4] \n\t"
2140  "mtc1 %[beta], %[ftmp5] \n\t"
2141  "pshufh %[ftmp4], %[ftmp4], %[ftmp8] \n\t"
2142  "pshufh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
2143  "packushb %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2144  "packushb %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2145  "psubusb %[ftmp6], %[ftmp2], %[ftmp1] \n\t"
2146  "psubusb %[ftmp7], %[ftmp1], %[ftmp2] \n\t"
2147  "por %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2148  "psubusb %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2149  "psubusb %[ftmp6], %[ftmp1], %[ftmp0] \n\t"
2150  "psubusb %[ftmp4], %[ftmp0], %[ftmp1] \n\t"
2151  "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2152  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2153  "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2154  "psubusb %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
2155  "psubusb %[ftmp4], %[ftmp3], %[ftmp2] \n\t"
2156  "por %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2157  "psubusb %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2158  "por %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
2159  "pxor %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
2160  "pcmpeqb %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
2161  "mov.d %[ftmp5], %[ftmp1] \n\t"
2162  "mov.d %[ftmp6], %[ftmp2] \n\t"
2163  "pxor %[ftmp4], %[ftmp1], %[ftmp3] \n\t"
2164  "pand %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2165  "pavgb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2166  "psubusb %[ftmp1], %[ftmp1], %[ftmp4] \n\t"
2167  "pavgb %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2168  "pxor %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2169  "pand %[ftmp4], %[ftmp4], %[ff_pb_1] \n\t"
2170  "pavgb %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2171  "psubusb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2172  "pavgb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2173  "psubb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2174  "psubb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2175  "pand %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2176  "pand %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2177  "paddb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2178  "paddb %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2179 
2180  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2181  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2182  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2183  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2184  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2185  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2186  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2187  MMI_USWC1(%[ftmp1], %[addr5], 0x00)
2188  PTR_ADDU "%[addr3], %[addr5], %[stride] \n\t"
2189  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2190  PTR_ADDU "%[addr4], %[addr5], %[addr0] \n\t"
2191  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2192  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2193  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2194  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2195  MMI_USWC1(%[ftmp0], %[pix], 0x00)
2196  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2197  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2198  PTR_ADDU "%[addr3], %[pix], %[stride] \n\t"
2199  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2200  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2201  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2202  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2203  PTR_ADDU "%[addr3], %[pix], %[addr0] \n\t"
2204  PTR_ADDU "%[addr4], %[pix], %[addr1] \n\t"
2205  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2206  PTR_ADDU "%[addr3], %[pix], %[addr2] \n\t"
2207  MMI_USWC1(%[ftmp4], %[addr4], 0x00)
2208  "punpckhwd %[ftmp9], %[ftmp4], %[ftmp4] \n\t"
2209  MMI_USWC1(%[ftmp9], %[addr3], 0x00)
2210  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2211  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2212  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2213  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2214  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2215  [ftmp10]"=&f"(ftmp[10]),
2217  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2218  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2219  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2220  [pix]"+&r"(pix)
2221  : [alpha]"r"(alpha), [beta]"r"(beta),
2222  [stride]"r"((mips_reg)stride), [ff_pb_1]"f"(ff_pb_1.f)
2223  : "memory"
2224  );
2225 }
2226 
2227 void ff_deblock_v_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2228  int8_t *tc0)
2229 {
2230  if ((tc0[0] & tc0[1]) >= 0)
2231  ff_deblock_v8_luma_8_mmi(pix + 0, stride, alpha, beta, tc0);
2232  if ((tc0[2] & tc0[3]) >= 0)
2233  ff_deblock_v8_luma_8_mmi(pix + 8, stride, alpha, beta, tc0 + 2);
2234 }
2235 
2236 void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2237  int beta)
2238 {
2239  deblock_v8_luma_intra_8_mmi(pix + 0, stride, alpha, beta);
2240  deblock_v8_luma_intra_8_mmi(pix + 8, stride, alpha, beta);
2241 }
2242 
2243 void ff_deblock_h_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta,
2244  int8_t *tc0)
2245 {
2246  DECLARE_ALIGNED(8, const uint64_t, stack[0x0d]);
2247  double ftmp[9];
2248  mips_reg addr[8];
2251 
2252  __asm__ volatile (
2253  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2254  PTR_ADDI "%[addr1], %[pix], -0x4 \n\t"
2255  PTR_ADDU "%[addr2], %[stride], %[addr0] \n\t"
2256  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2257  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2258  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2259  MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2260  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2261  MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2262  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2263  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2264  MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2265  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2266  MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2267  PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2268  MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2269  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2270  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2271  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2272  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2273  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2274  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2275  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2276  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2277  MMI_SDC1(%[ftmp1], %[stack], 0x10)
2278  MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2279  PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2280  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2281  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2282  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2283  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2284  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2285  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2286  MMI_LDC1(%[ftmp8], %[stack], 0x10)
2287  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2288  MMI_SDC1(%[ftmp0], %[stack], 0x00)
2289  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2290  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2291  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2292  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2293  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2294  "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2295  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2296  "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2297  "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2298  MMI_SDC1(%[ftmp1], %[stack], 0x10)
2299  MMI_SDC1(%[ftmp3], %[stack], 0x20)
2300  MMI_SDC1(%[ftmp7], %[stack], 0x30)
2301  MMI_SDC1(%[ftmp5], %[stack], 0x40)
2302  MMI_SDC1(%[ftmp6], %[stack], 0x50)
2303  PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2304  PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2305  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2306  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2307  MMI_ULDC1(%[ftmp1], %[addr3], 0x00)
2308  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2309  MMI_ULDC1(%[ftmp2], %[addr5], 0x00)
2310  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2311  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2312  MMI_ULDC1(%[ftmp4], %[addr3], 0x00)
2313  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2314  MMI_ULDC1(%[ftmp5], %[addr5], 0x00)
2315  PTR_ADDU "%[addr3], %[addr4], %[addr2] \n\t"
2316  MMI_ULDC1(%[ftmp6], %[addr3], 0x00)
2317  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2318  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2319  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2320  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2321  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2322  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2323  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2324  MMI_SDC1(%[ftmp1], %[stack], 0x18)
2325  MMI_ULDC1(%[ftmp8], %[addr3], 0x00)
2326  "punpckhhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2327  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2328  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2329  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2330  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2331  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2332  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2333  MMI_LDC1(%[ftmp8], %[stack], 0x18)
2334  MMI_SDC1(%[ftmp0], %[stack], 0x08)
2335  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp8] \n\t"
2336  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
2337  "punpckhhw %[ftmp0], %[ftmp3], %[ftmp5] \n\t"
2338  "punpcklhw %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2339  "punpckhwd %[ftmp5], %[ftmp7], %[ftmp3] \n\t"
2340  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
2341  "punpckhwd %[ftmp3], %[ftmp1], %[ftmp2] \n\t"
2342  "punpcklwd %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2343  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2344  MMI_SDC1(%[ftmp1], %[stack], 0x18)
2345  MMI_SDC1(%[ftmp3], %[stack], 0x28)
2346  MMI_SDC1(%[ftmp7], %[stack], 0x38)
2347  MMI_SDC1(%[ftmp5], %[stack], 0x48)
2348  MMI_SDC1(%[ftmp6], %[stack], 0x58)
2349  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2350  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2351  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2352  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2353  [ftmp8]"=&f"(ftmp[8]),
2355  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2356  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2357  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2358  [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2359  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2360  [stack]"r"(stack)
2361  : "memory"
2362  );
2363 
2364  ff_deblock_v_luma_8_mmi((uint8_t *) &stack[6], 0x10, alpha, beta, tc0);
2365 
2366  __asm__ volatile (
2367  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2368  PTR_ADDI "%[addr1], %[pix], -0x02 \n\t"
2369  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2370  PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2371  PTR_ADDU "%[addr7], %[addr6], %[addr6] \n\t"
2372  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2373  MMI_LDC1(%[ftmp0], %[stack], 0x10)
2374  MMI_LDC1(%[ftmp1], %[stack], 0x20)
2375  MMI_LDC1(%[ftmp2], %[stack], 0x30)
2376  MMI_LDC1(%[ftmp3], %[stack], 0x40)
2377  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2378  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2379  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2380  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2381  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2382  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2383  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2384  MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2385  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2386  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2387  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2388  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2389  MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2390  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2391  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2392  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2393  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2394  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2395  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2396  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2397  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2398  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2399  PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2400  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2401  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2402  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2403  MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2404  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2405  "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2406  PTR_ADDU "%[addr1], %[addr1], %[addr7] \n\t"
2407  MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2408  PTR_ADDU "%[addr4], %[addr4], %[addr7] \n\t"
2409  MMI_LDC1(%[ftmp0], %[stack], 0x18)
2410  MMI_LDC1(%[ftmp1], %[stack], 0x28)
2411  MMI_LDC1(%[ftmp2], %[stack], 0x38)
2412  MMI_LDC1(%[ftmp3], %[stack], 0x48)
2413  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2414  "punpckhwd %[ftmp4], %[ftmp0], %[ftmp0] \n\t"
2415  PTR_ADDU "%[addr6], %[addr0], %[addr0] \n\t"
2416  "punpckhwd %[ftmp5], %[ftmp1], %[ftmp1] \n\t"
2417  "punpckhwd %[ftmp6], %[ftmp2], %[ftmp2] \n\t"
2418  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2419  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2420  PTR_ADDU "%[addr3], %[addr1], %[stride] \n\t"
2421  "punpcklhw %[ftmp1], %[ftmp0], %[ftmp2] \n\t"
2422  "punpckhhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2423  MMI_USWC1(%[ftmp1], %[addr1], 0x00)
2424  "punpckhwd %[ftmp1], %[ftmp1], %[ftmp1] \n\t"
2425  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2426  MMI_USWC1(%[ftmp1], %[addr3], 0x00)
2427  MMI_USWC1(%[ftmp0], %[addr5], 0x00)
2428  "punpckhwd %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2429  "punpckhwd %[ftmp3], %[ftmp3], %[ftmp3] \n\t"
2430  MMI_USWC1(%[ftmp0], %[addr4], 0x00)
2431  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2432  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
2433  PTR_ADDU "%[addr3], %[addr4], %[stride] \n\t"
2434  "punpcklhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t"
2435  "punpckhhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2436  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2437  PTR_ADDU "%[addr3], %[addr4], %[addr0] \n\t"
2438  "punpckhwd %[ftmp5], %[ftmp5], %[ftmp5] \n\t"
2439  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2440  MMI_USWC1(%[ftmp5], %[addr3], 0x00)
2441  MMI_USWC1(%[ftmp4], %[addr5], 0x00)
2442  PTR_ADDU "%[addr3], %[addr4], %[addr6] \n\t"
2443  "punpckhwd %[ftmp4], %[ftmp4], %[ftmp4] \n\t"
2444  MMI_USWC1(%[ftmp4], %[addr3], 0x00)
2445  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2446  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2447  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2448  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2449  [ftmp8]"=&f"(ftmp[8]),
2452  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2453  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2454  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2455  [addr6]"=&r"(addr[6]), [addr7]"=&r"(addr[7])
2456  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2457  [stack]"r"(stack)
2458  : "memory"
2459  );
2460 }
2461 
2462 void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha,
2463  int beta)
2464 {
2465  DECLARE_ALIGNED(8, const uint64_t, ptmp[0x11]);
2466  DECLARE_ALIGNED(8, const uint64_t, pdat[0x04]);
2467  double ftmp[9];
2468  mips_reg addr[7];
2470 
2471  __asm__ volatile (
2472  PTR_ADDU "%[addr0], %[stride], %[stride] \n\t"
2473  PTR_ADDI "%[addr1], %[pix], -0x04 \n\t"
2474  PTR_ADDU "%[addr2], %[addr0], %[stride] \n\t"
2475  PTR_ADDU "%[addr3], %[addr0], %[addr0] \n\t"
2476  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2477  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2478  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2479  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2480  MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2481  MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2482  PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2483  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2484  PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2485  MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2486  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2487  MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2488  MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2489  PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2490  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2491  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2492  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2493  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2494  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2495  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2496  MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2497  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2498  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2499  MMI_SDC1(%[ftmp3], %[ptmp], 0x00)
2500  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2501  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2502  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2503  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2504  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2505  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2506  MMI_SDC1(%[ftmp2], %[ptmp], 0x20)
2507  MMI_LDC1(%[ftmp2], %[ptmp], 0x00)
2508  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2509  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2510  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2511  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2512  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2513  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2514  MMI_SDC1(%[ftmp0], %[ptmp], 0x00)
2515  MMI_SDC1(%[ftmp5], %[ptmp], 0x10)
2516  MMI_SDC1(%[ftmp7], %[ptmp], 0x40)
2517  MMI_SDC1(%[ftmp4], %[ptmp], 0x50)
2518  MMI_LDC1(%[ftmp8], %[ptmp], 0x20)
2519  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2520  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2521  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2522  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2523  PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2524  MMI_SDC1(%[ftmp3], %[ptmp], 0x20)
2525  MMI_SDC1(%[ftmp0], %[ptmp], 0x30)
2526  MMI_SDC1(%[ftmp6], %[ptmp], 0x60)
2527  MMI_SDC1(%[ftmp5], %[ptmp], 0x70)
2528  PTR_ADDU "%[addr1], %[addr1], %[addr5] \n\t"
2529  PTR_ADDU "%[addr4], %[addr4], %[addr5] \n\t"
2530  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2531  MMI_ULDC1(%[ftmp0], %[addr1], 0x00)
2532  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2533  MMI_ULDC1(%[ftmp1], %[addr5], 0x00)
2534  MMI_ULDC1(%[ftmp2], %[addr6], 0x00)
2535  PTR_ADDU "%[addr5], %[addr4], %[stride] \n\t"
2536  MMI_ULDC1(%[ftmp3], %[addr4], 0x00)
2537  PTR_ADDU "%[addr6], %[addr4], %[addr0] \n\t"
2538  MMI_ULDC1(%[ftmp4], %[addr5], 0x00)
2539  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2540  MMI_ULDC1(%[ftmp5], %[addr6], 0x00)
2541  MMI_ULDC1(%[ftmp6], %[addr5], 0x00)
2542  PTR_ADDU "%[addr5], %[addr4], %[addr3] \n\t"
2543  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2544  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2545  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2546  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2547  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2548  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2549  MMI_ULDC1(%[ftmp8], %[addr5], 0x00)
2550  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2551  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2552  MMI_SDC1(%[ftmp3], %[ptmp], 0x08)
2553  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2554  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2555  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2556  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2557  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2558  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2559  MMI_SDC1(%[ftmp2], %[ptmp], 0x28)
2560  MMI_LDC1(%[ftmp2], %[ptmp], 0x08)
2561  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2562  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2563  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2564  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2565  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2566  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2567  MMI_SDC1(%[ftmp0], %[ptmp], 0x08)
2568  MMI_SDC1(%[ftmp5], %[ptmp], 0x18)
2569  MMI_SDC1(%[ftmp7], %[ptmp], 0x48)
2570  MMI_SDC1(%[ftmp4], %[ptmp], 0x58)
2571  MMI_LDC1(%[ftmp8], %[ptmp], 0x28)
2572  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2573  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2574  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2575  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2576  MMI_SDC1(%[ftmp3], %[ptmp], 0x28)
2577  MMI_SDC1(%[ftmp0], %[ptmp], 0x38)
2578  MMI_SDC1(%[ftmp6], %[ptmp], 0x68)
2579  MMI_SDC1(%[ftmp5], %[ptmp], 0x78)
2580  PTR_S "%[addr1], 0x00(%[pdat]) \n\t"
2581  PTR_S "%[addr2], 0x08(%[pdat]) \n\t"
2582  PTR_S "%[addr0], 0x10(%[pdat]) \n\t"
2583  PTR_S "%[addr3], 0x18(%[pdat]) \n\t"
2584  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2585  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2586  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2587  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2588  [ftmp8]"=&f"(ftmp[8]),
2590  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2591  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2592  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2593  [addr6]"=&r"(addr[6])
2594  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2595  [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2596  : "memory"
2597  );
2598 
2599  ff_deblock_v_luma_intra_8_mmi((uint8_t *) &ptmp[8], 0x10, alpha, beta);
2600 
2601  __asm__ volatile (
2602  PTR_L "%[addr1], 0x00(%[pdat]) \n\t"
2603  PTR_L "%[addr2], 0x08(%[pdat]) \n\t"
2604  PTR_L "%[addr0], 0x10(%[pdat]) \n\t"
2605  PTR_L "%[addr3], 0x18(%[pdat]) \n\t"
2606  PTR_ADDU "%[addr4], %[addr1], %[addr2] \n\t"
2607  MMI_LDC1(%[ftmp0], %[ptmp], 0x08)
2608  MMI_LDC1(%[ftmp1], %[ptmp], 0x18)
2609  MMI_LDC1(%[ftmp2], %[ptmp], 0x28)
2610  MMI_LDC1(%[ftmp3], %[ptmp], 0x38)
2611  MMI_LDC1(%[ftmp4], %[ptmp], 0x48)
2612  MMI_LDC1(%[ftmp5], %[ptmp], 0x58)
2613  MMI_LDC1(%[ftmp6], %[ptmp], 0x68)
2614  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2615  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2616  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2617  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2618  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2619  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2620  MMI_LDC1(%[ftmp8], %[ptmp], 0x78)
2621  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2622  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2623  MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2624  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2625  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2626  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2627  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2628  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2629  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2630  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2631  MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2632  MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2633  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2634  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2635  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2636  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2637  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2638  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2639  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2640  MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2641  PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2642  MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2643  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2644  MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2645  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2646  MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2647  MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2648  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2649  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2650  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2651  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2652  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2653  MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2654  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2655  MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2656  PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2657  MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2658  PTR_ADDU "%[addr5], %[addr3], %[addr3] \n\t"
2659  MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2660  PTR_SUBU "%[addr1], %[addr1], %[addr5] \n\t"
2661  PTR_SUBU "%[addr4], %[addr4], %[addr5] \n\t"
2662  MMI_LDC1(%[ftmp0], %[ptmp], 0x00)
2663  MMI_LDC1(%[ftmp1], %[ptmp], 0x10)
2664  MMI_LDC1(%[ftmp2], %[ptmp], 0x20)
2665  MMI_LDC1(%[ftmp3], %[ptmp], 0x30)
2666  MMI_LDC1(%[ftmp4], %[ptmp], 0x40)
2667  MMI_LDC1(%[ftmp5], %[ptmp], 0x50)
2668  MMI_LDC1(%[ftmp6], %[ptmp], 0x60)
2669  "punpckhbh %[ftmp7], %[ftmp0], %[ftmp1] \n\t"
2670  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2671  "punpckhbh %[ftmp1], %[ftmp2], %[ftmp3] \n\t"
2672  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2673  "punpckhbh %[ftmp3], %[ftmp4], %[ftmp5] \n\t"
2674  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
2675  MMI_LDC1(%[ftmp8], %[ptmp], 0x70)
2676  "punpckhbh %[ftmp5], %[ftmp6], %[ftmp8] \n\t"
2677  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
2678  MMI_USDC1(%[ftmp3], %[addr1], 0x00)
2679  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2680  "punpckhhw %[ftmp3], %[ftmp0], %[ftmp2] \n\t"
2681  "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
2682  "punpckhhw %[ftmp2], %[ftmp4], %[ftmp6] \n\t"
2683  "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
2684  "punpckhhw %[ftmp6], %[ftmp7], %[ftmp1] \n\t"
2685  "punpcklhw %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
2686  MMI_USDC1(%[ftmp2], %[addr5], 0x00)
2687  MMI_ULDC1(%[ftmp2], %[addr1], 0x00)
2688  "punpckhhw %[ftmp1], %[ftmp2], %[ftmp5] \n\t"
2689  "punpcklhw %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2690  "punpckhwd %[ftmp5], %[ftmp0], %[ftmp4] \n\t"
2691  "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2692  "punpckhwd %[ftmp4], %[ftmp7], %[ftmp2] \n\t"
2693  "punpcklwd %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
2694  PTR_ADDU "%[addr5], %[addr1], %[stride] \n\t"
2695  MMI_USDC1(%[ftmp0], %[addr1], 0x00)
2696  PTR_ADDU "%[addr6], %[addr4], %[stride] \n\t"
2697  MMI_USDC1(%[ftmp5], %[addr5], 0x00)
2698  PTR_ADDU "%[addr5], %[addr4], %[addr0] \n\t"
2699  MMI_USDC1(%[ftmp7], %[addr6], 0x00)
2700  PTR_ADDU "%[addr6], %[addr1], %[addr0] \n\t"
2701  MMI_USDC1(%[ftmp4], %[addr5], 0x00)
2702  MMI_ULDC1(%[ftmp8], %[addr6], 0x00)
2703  PTR_ADDU "%[addr5], %[addr1], %[addr0] \n\t"
2704  "punpckhwd %[ftmp0], %[ftmp3], %[ftmp8] \n\t"
2705  "punpcklwd %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2706  "punpckhwd %[ftmp5], %[ftmp6], %[ftmp1] \n\t"
2707  "punpcklwd %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
2708  MMI_USDC1(%[ftmp3], %[addr5], 0x00)
2709  PTR_ADDU "%[addr5], %[addr4], %[addr2] \n\t"
2710  MMI_USDC1(%[ftmp0], %[addr4], 0x00)
2711  PTR_ADDU "%[addr6], %[addr4], %[addr3] \n\t"
2712  MMI_USDC1(%[ftmp6], %[addr5], 0x00)
2713  MMI_USDC1(%[ftmp5], %[addr6], 0x00)
2714  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2715  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2716  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2717  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2718  [ftmp8]"=&f"(ftmp[8]),
2720  [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]),
2721  [addr2]"=&r"(addr[2]), [addr3]"=&r"(addr[3]),
2722  [addr4]"=&r"(addr[4]), [addr5]"=&r"(addr[5]),
2723  [addr6]"=&r"(addr[6])
2724  : [pix]"r"(pix), [stride]"r"((mips_reg)stride),
2725  [ptmp]"r"(ptmp), [pdat]"r"(pdat)
2726  : "memory"
2727  );
2728 }
ff_deblock_h_chroma_8_mmi
void ff_deblock_h_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1954
stride
int stride
Definition: mace.c:144
ff_h264_idct8_add_8_mmi
void ff_h264_idct8_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:171
mem_internal.h
deblock_v8_luma_intra_8_mmi
static void deblock_v8_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1566
PTR_SLL
#define PTR_SLL
Definition: asmdefs.h:57
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
DECLARE_VAR_LOW32
#define DECLARE_VAR_LOW32
Definition: mmiutils.h:37
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:26
b
#define b
Definition: input.c:40
ff_h264_biweight_pixels4_8_mmi
void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1394
ff_pb_1
const union av_intfloat64 ff_pb_1
Definition: constants.c:58
ff_h264_idct_add16_8_mmi
void ff_h264_idct_add16_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[5 *8])
Definition: h264dsp_mmi.c:768
mips_reg
#define mips_reg
Definition: asmdefs.h:46
ff_h264_add_pixels4_8_mmi
void ff_h264_add_pixels4_8_mmi(uint8_t *dst, int16_t *src, int stride)
Definition: h264dsp_mmi.c:31
ff_pw_1
const union av_intfloat64 ff_pw_1
Definition: constants.c:25
ff_h264_weight_pixels8_8_mmi
void ff_h264_weight_pixels8_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1261
PTR_ADDI
#define PTR_ADDI
Definition: asmdefs.h:51
ff_pb_A1
const union av_intfloat64 ff_pb_A1
Definition: constants.c:61
ff_h264_biweight_pixels16_8_mmi
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1190
mmiutils.h
ff_pw_32
const union av_intfloat64 ff_pw_32
Definition: constants.c:42
ff_h264_chroma_dc_dequant_idct_8_mmi
void ff_h264_chroma_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
Definition: h264dsp_mmi.c:1120
ff_deblock_v_chroma_intra_8_mmi
void ff_deblock_v_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:1876
h264dsp_mips.h
src
#define src
Definition: vp8dsp.c:255
ff_h264_idct8_add4_8_mmi
void ff_h264_idct8_add4_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[5 *8])
Definition: h264dsp_mmi.c:799
bit_depth_template.c
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
weight
static int weight(int i, int blen, int offset)
Definition: diracdec.c:1561
ff_h264_idct_add8_8_mmi
void ff_h264_idct_add8_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:816
dc
Tag MUST be and< 10hcoeff half pel interpolation filter coefficients, hcoeff[0] are the 2 middle coefficients[1] are the next outer ones and so on, resulting in a filter like:...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... the sign of the coefficients is not explicitly stored but alternates after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... hcoeff[0] is not explicitly stored but found by subtracting the sum of all stored coefficients with signs from 32 hcoeff[0]=32 - hcoeff[1] - hcoeff[2] - ... a good choice for hcoeff and htaps is htaps=6 hcoeff={40,-10, 2} an alternative which requires more computations at both encoder and decoder side and may or may not be better is htaps=8 hcoeff={42,-14, 6,-2}ref_frames minimum of the number of available reference frames and max_ref_frames for example the first frame after a key frame always has ref_frames=1spatial_decomposition_type wavelet type 0 is a 9/7 symmetric compact integer wavelet 1 is a 5/3 symmetric compact integer wavelet others are reserved stored as delta from last, last is reset to 0 if always_reset||keyframeqlog quality(logarithmic quantizer scale) stored as delta from last, last is reset to 0 if always_reset||keyframemv_scale stored as delta from last, last is reset to 0 if always_reset||keyframe FIXME check that everything works fine if this changes between framesqbias dequantization bias stored as delta from last, last is reset to 0 if always_reset||keyframeblock_max_depth maximum depth of the block tree stored as delta from last, last is reset to 0 if always_reset||keyframequant_table quantization tableHighlevel bitstream structure:==============================--------------------------------------------|Header|--------------------------------------------|------------------------------------|||Block0||||split?||||yes no||||......... intra?||||:Block01 :yes no||||:Block02 :....... ..........||||:Block03 ::y DC ::ref index:||||:Block04 ::cb DC ::motion x :||||......... :cr DC ::motion y :||||....... ..........|||------------------------------------||------------------------------------|||Block1|||...|--------------------------------------------|------------ ------------ ------------|||Y subbands||Cb subbands||Cr subbands||||--- ---||--- ---||--- ---|||||LL0||HL0||||LL0||HL0||||LL0||HL0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||LH0||HH0||||LH0||HH0||||LH0||HH0|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HL1||LH1||||HL1||LH1||||HL1||LH1|||||--- ---||--- ---||--- ---||||--- ---||--- ---||--- ---|||||HH1||HL2||||HH1||HL2||||HH1||HL2|||||...||...||...|||------------ ------------ ------------|--------------------------------------------Decoding process:=================------------|||Subbands|------------||||------------|Intra DC||||LL0 subband prediction ------------|\ Dequantization ------------------- \||Reference frames|\ IDWT|------- -------|Motion \|||Frame 0||Frame 1||Compensation . OBMC v -------|------- -------|--------------. \------> Frame n output Frame Frame<----------------------------------/|...|------------------- Range Coder:============Binary Range Coder:------------------- The implemented range coder is an adapted version based upon "Range encoding: an algorithm for removing redundancy from a digitised message." by G. N. N. Martin. The symbols encoded by the Snow range coder are bits(0|1). The associated probabilities are not fix but change depending on the symbol mix seen so far. bit seen|new state ---------+----------------------------------------------- 0|256 - state_transition_table[256 - old_state];1|state_transition_table[old_state];state_transition_table={ 0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, 195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, 210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, 226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0};FIXME Range Coding of integers:------------------------- FIXME Neighboring Blocks:===================left and top are set to the respective blocks unless they are outside of the image in which case they are set to the Null block top-left is set to the top left block unless it is outside of the image in which case it is set to the left block if this block has no larger parent block or it is at the left side of its parent block and the top right block is not outside of the image then the top right block is used for top-right else the top-left block is used Null block y, cb, cr are 128 level, ref, mx and my are 0 Motion Vector Prediction:=========================1. the motion vectors of all the neighboring blocks are scaled to compensate for the difference of reference frames scaled_mv=(mv *(256 *(current_reference+1)/(mv.reference+1))+128)> the median of the scaled top and top right vectors is used as motion vector prediction the used motion vector is the sum of the predictor and(mvx_diff, mvy_diff) *mv_scale Intra DC Prediction block[y][x] dc[1]
Definition: snow.txt:400
ff_h264_biweight_pixels8_8_mmi
void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src, ptrdiff_t stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:1304
ff_deblock_v_luma_8_mmi
void ff_deblock_v_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:2227
ff_deblock_h_chroma_intra_8_mmi
void ff_deblock_h_chroma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2094
height
#define height
a
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:41
PTR_SUBU
#define PTR_SUBU
Definition: asmdefs.h:52
DECLARE_VAR_ALL64
#define DECLARE_VAR_ALL64
Definition: mmiutils.h:39
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
src0
#define src0
Definition: h264pred.c:139
DECLARE_ALIGNED
#define DECLARE_ALIGNED(n, t, v)
Definition: mem.h:116
src1
#define src1
Definition: h264pred.c:140
ff_h264_idct_add16intra_8_mmi
void ff_h264_idct_add16intra_8_mmi(uint8_t *dst, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[5 *8])
Definition: h264dsp_mmi.c:786
ff_h264_chroma422_dc_dequant_idct_8_mmi
void ff_h264_chroma422_dc_dequant_idct_8_mmi(int16_t *block, int qmul)
Definition: h264dsp_mmi.c:1087
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
ff_h264_idct_add_8_mmi
void ff_h264_idct_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:79
ff_h264_idct_add8_422_8_mmi
void ff_h264_idct_add8_422_8_mmi(uint8_t **dest, const int *block_offset, int16_t *block, int stride, const uint8_t nnzc[15 *8])
Definition: h264dsp_mmi.c:832
weights
static const int weights[]
Definition: hevc_pel.c:32
ff_deblock_v_luma_intra_8_mmi
void ff_deblock_v_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2236
ff_deblock_v_chroma_8_mmi
void ff_deblock_v_chroma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1792
av_intfloat64::f
double f
Definition: intfloat.h:34
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
ff_h264_weight_pixels4_8_mmi
void ff_h264_weight_pixels4_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1356
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:49
RESTRICT_ASM_LOW32
#define RESTRICT_ASM_LOW32
Definition: mmiutils.h:38
DECLARE_VAR_ADDRT
#define DECLARE_VAR_ADDRT
Definition: mmiutils.h:41
scan8
static const uint8_t scan8[16 *3+3]
Definition: h264dec.h:664
temp
else temp
Definition: vf_mcdeint.c:248
PTR_S
#define PTR_S
Definition: asmdefs.h:54
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:50
ff_pb_3
const union av_intfloat64 ff_pb_3
Definition: constants.c:59
PTR_L
#define PTR_L
Definition: asmdefs.h:53
PTR_SRL
#define PTR_SRL
Definition: asmdefs.h:56
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
ff_h264_weight_pixels16_8_mmi
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, ptrdiff_t stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:1134
d
d
Definition: ffmpeg_filter.c:153
ff_h264_idct8_dc_add_8_mmi
void ff_h264_idct8_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:683
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
ff_deblock_h_luma_intra_8_mmi
void ff_deblock_h_luma_intra_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta)
Definition: h264dsp_mmi.c:2462
ff_deblock_v8_luma_8_mmi
void ff_deblock_v8_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:1438
RESTRICT_ASM_ADDRT
#define RESTRICT_ASM_ADDRT
Definition: mmiutils.h:42
RESTRICT_ASM_ALL64
#define RESTRICT_ASM_ALL64
Definition: mmiutils.h:40
ff_deblock_h_luma_8_mmi
void ff_deblock_h_luma_8_mmi(uint8_t *pix, ptrdiff_t stride, int alpha, int beta, int8_t *tc0)
Definition: h264dsp_mmi.c:2243
block1
static int16_t block1[64]
Definition: dct.c:117
ff_h264_idct_dc_add_8_mmi
void ff_h264_idct_dc_add_8_mmi(uint8_t *dst, int16_t *block, int stride)
Definition: h264dsp_mmi.c:639
ff_h264_luma_dc_dequant_idct_8_mmi
void ff_h264_luma_dc_dequant_idct_8_mmi(int16_t *output, int16_t *input, int qmul)
Definition: h264dsp_mmi.c:860