FFmpeg
h264qpel_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264qpel
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  *
7  * This file is part of FFmpeg.
8  *
9  * FFmpeg is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 2.1 of the License, or (at your option) any later version.
13  *
14  * FFmpeg is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public
20  * License along with FFmpeg; if not, write to the Free Software
21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22  */
23 
24 #include "h264dsp_mips.h"
25 #include "hpeldsp_mips.h"
28 
29 static inline void copy_block4_mmi(uint8_t *dst, const uint8_t *src,
30  int dstStride, int srcStride, int h)
31 {
32  double ftmp[1];
34 
35  __asm__ volatile (
36  "1: \n\t"
37  MMI_ULWC1(%[ftmp0], %[src], 0x00)
38  MMI_SWC1(%[ftmp0], %[dst], 0x00)
39  "addi %[h], %[h], -0x01 \n\t"
40  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
41  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
42  "bnez %[h], 1b \n\t"
43  : [ftmp0]"=&f"(ftmp[0]),
44  [dst]"+&r"(dst), [src]"+&r"(src),
46  [h]"+&r"(h)
47  : [dstStride]"r"((mips_reg)dstStride),
48  [srcStride]"r"((mips_reg)srcStride)
49  : "memory"
50  );
51 }
52 
53 static inline void copy_block8_mmi(uint8_t *dst, const uint8_t *src,
54  int dstStride, int srcStride, int h)
55 {
56  double ftmp[1];
58 
59  __asm__ volatile (
60  "1: \n\t"
61  MMI_ULDC1(%[ftmp0], %[src], 0x00)
62  MMI_SDC1(%[ftmp0], %[dst], 0x00)
63  "addi %[h], %[h], -0x01 \n\t"
64  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
65  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
66  "bnez %[h], 1b \n\t"
67  : [ftmp0]"=&f"(ftmp[0]),
69  [dst]"+&r"(dst), [src]"+&r"(src),
70  [h]"+&r"(h)
71  : [dstStride]"r"((mips_reg)dstStride),
72  [srcStride]"r"((mips_reg)srcStride)
73  : "memory"
74  );
75 }
76 
77 static inline void copy_block16_mmi(uint8_t *dst, const uint8_t *src,
78  int dstStride, int srcStride, int h)
79 {
80  double ftmp[1];
81  uint64_t tmp[1];
83 
84  __asm__ volatile (
85  "1: \n\t"
86  MMI_ULDC1(%[ftmp0], %[src], 0x00)
87  "ldl %[tmp0], 0x0f(%[src]) \n\t"
88  "ldr %[tmp0], 0x08(%[src]) \n\t"
89  MMI_SDC1(%[ftmp0], %[dst], 0x00)
90  "sdl %[tmp0], 0x0f(%[dst]) \n\t"
91  "sdr %[tmp0], 0x08(%[dst]) \n\t"
92  "addi %[h], %[h], -0x01 \n\t"
93  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
94  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
95  "bnez %[h], 1b \n\t"
96  : [ftmp0]"=&f"(ftmp[0]),
97  [tmp0]"=&r"(tmp[0]),
99  [dst]"+&r"(dst), [src]"+&r"(src),
100  [h]"+&r"(h)
101  : [dstStride]"r"((mips_reg)dstStride),
102  [srcStride]"r"((mips_reg)srcStride)
103  : "memory"
104  );
105 }
106 
107 #define op2_avg(a, b) a = (((a)+CLIP(((b) + 512)>>10)+1)>>1)
108 #define op2_put(a, b) a = CLIP(((b) + 512)>>10)
109 static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
110  int dstStride, int srcStride)
111 {
112  double ftmp[10];
113  uint64_t tmp[1];
115 
116  __asm__ volatile (
117  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
118  "dli %[tmp0], 0x04 \n\t"
119  "1: \n\t"
120  MMI_ULWC1(%[ftmp1], %[src], -0x02)
121  MMI_ULWC1(%[ftmp2], %[src], -0x01)
122  MMI_ULWC1(%[ftmp3], %[src], 0x00)
123  MMI_ULWC1(%[ftmp4], %[src], 0x01)
124  MMI_ULWC1(%[ftmp5], %[src], 0x02)
125  MMI_ULWC1(%[ftmp6], %[src], 0x03)
126 
127  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
128  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
129  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
130  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
131  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
132  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
133  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
134  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
135  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
136  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
137  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
138  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
139  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
140  "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
141  "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
142  "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
143  MMI_SWC1(%[ftmp9], %[dst], 0x00)
144  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
145  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
146  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
147  "bnez %[tmp0], 1b \n\t"
148  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
149  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
150  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
151  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
152  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
153  [tmp0]"=&r"(tmp[0]),
155  [dst]"+&r"(dst), [src]"+&r"(src)
156  : [dstStride]"r"((mips_reg)dstStride),
157  [srcStride]"r"((mips_reg)srcStride),
158  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
159  [ff_pw_16]"f"(ff_pw_16.f)
160  : "memory"
161  );
162 }
163 
164 static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
165  int dstStride, int srcStride)
166 {
167  double ftmp[11];
168  uint64_t tmp[1];
170 
171  __asm__ volatile (
172  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
173  "dli %[tmp0], 0x08 \n\t"
174  "1: \n\t"
175  MMI_ULDC1(%[ftmp1], %[src], -0x02)
176  MMI_ULDC1(%[ftmp2], %[src], -0x01)
177  MMI_ULDC1(%[ftmp3], %[src], 0x00)
178  MMI_ULDC1(%[ftmp4], %[src], 0x01)
179  MMI_ULDC1(%[ftmp5], %[src], 0x02)
180  MMI_ULDC1(%[ftmp6], %[src], 0x03)
181  "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
182  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
183  "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
184  "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
185  "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t"
186  "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t"
187  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t"
188  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t"
189  "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
190  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
191  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
192  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
193  "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t"
194  "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t"
195  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
196  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t"
197  "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
198  "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t"
199  "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t"
200  "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
201  "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
202  "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t"
203  "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
204  "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
205  "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
206  "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
207  "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
208  "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
209  "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
210  "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
211  "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
212  MMI_SDC1(%[ftmp9], %[dst], 0x00)
213  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
214  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
215  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
216  "bnez %[tmp0], 1b \n\t"
217  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
218  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
219  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
220  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
221  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
222  [ftmp10]"=&f"(ftmp[10]),
223  [tmp0]"=&r"(tmp[0]),
225  [dst]"+&r"(dst), [src]"+&r"(src)
226  : [dstStride]"r"((mips_reg)dstStride),
227  [srcStride]"r"((mips_reg)srcStride),
228  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
229  [ff_pw_16]"f"(ff_pw_16.f)
230  : "memory"
231  );
232 }
233 
234 static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
235  int dstStride, int srcStride)
236 {
237  put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
238  put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
239  src += 8*srcStride;
240  dst += 8*dstStride;
241  put_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
242  put_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
243 }
244 
245 static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
246  int dstStride, int srcStride)
247 {
248  double ftmp[11];
249  uint64_t tmp[1];
251 
252  __asm__ volatile (
253  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
254  "dli %[tmp0], 0x04 \n\t"
255  "1: \n\t"
256  MMI_ULWC1(%[ftmp1], %[src], -0x02)
257  MMI_ULWC1(%[ftmp2], %[src], -0x01)
258  MMI_ULWC1(%[ftmp3], %[src], 0x00)
259  MMI_ULWC1(%[ftmp4], %[src], 0x01)
260  MMI_ULWC1(%[ftmp5], %[src], 0x02)
261  MMI_ULWC1(%[ftmp6], %[src], 0x03)
262  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
263  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
264  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
265  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
266  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
267  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
268  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
269  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
270  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
271  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
272  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
273  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
274  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
275  "paddsh %[ftmp9], %[ftmp9], %[ff_pw_16] \n\t"
276  "psrah %[ftmp9], %[ftmp9], %[ff_pw_5] \n\t"
277  "packushb %[ftmp9], %[ftmp9], %[ftmp0] \n\t"
278  MMI_LWC1(%[ftmp10], %[dst], 0x00)
279  "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
280  MMI_SWC1(%[ftmp9], %[dst], 0x00)
281  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
282  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
283  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
284  "bnez %[tmp0], 1b \n\t"
285  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
286  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
287  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
288  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
289  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
290  [ftmp10]"=&f"(ftmp[10]),
291  [tmp0]"=&r"(tmp[0]),
293  [dst]"+&r"(dst), [src]"+&r"(src)
294  : [dstStride]"r"((mips_reg)dstStride),
295  [srcStride]"r"((mips_reg)srcStride),
296  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
297  [ff_pw_16]"f"(ff_pw_16.f)
298  : "memory"
299  );
300 }
301 
302 static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
303  int dstStride, int srcStride)
304 {
305  double ftmp[11];
306  uint64_t tmp[1];
308 
309  __asm__ volatile (
310  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
311  "dli %[tmp0], 0x08 \n\t"
312  "1: \n\t"
313  MMI_ULDC1(%[ftmp1], %[src], -0x02)
314  MMI_ULDC1(%[ftmp2], %[src], -0x01)
315  MMI_ULDC1(%[ftmp3], %[src], 0x00)
316  MMI_ULDC1(%[ftmp4], %[src], 0x01)
317  MMI_ULDC1(%[ftmp5], %[src], 0x02)
318  MMI_ULDC1(%[ftmp6], %[src], 0x03)
319  "punpcklbh %[ftmp7], %[ftmp3], %[ftmp0] \n\t"
320  "punpckhbh %[ftmp8], %[ftmp3], %[ftmp0] \n\t"
321  "punpcklbh %[ftmp9], %[ftmp4], %[ftmp0] \n\t"
322  "punpckhbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t"
323  "paddsh %[ftmp3], %[ftmp7], %[ftmp9] \n\t"
324  "paddsh %[ftmp4], %[ftmp8], %[ftmp10] \n\t"
325  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_20] \n\t"
326  "pmullh %[ftmp4], %[ftmp4], %[ff_pw_20] \n\t"
327  "punpcklbh %[ftmp7], %[ftmp2], %[ftmp0] \n\t"
328  "punpckhbh %[ftmp8], %[ftmp2], %[ftmp0] \n\t"
329  "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t"
330  "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t"
331  "paddsh %[ftmp2], %[ftmp7], %[ftmp9] \n\t"
332  "paddsh %[ftmp5], %[ftmp8], %[ftmp10] \n\t"
333  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
334  "pmullh %[ftmp5], %[ftmp5], %[ff_pw_5] \n\t"
335  "punpcklbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t"
336  "punpckhbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t"
337  "punpcklbh %[ftmp9], %[ftmp6], %[ftmp0] \n\t"
338  "punpckhbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t"
339  "paddsh %[ftmp1], %[ftmp7], %[ftmp9] \n\t"
340  "paddsh %[ftmp6], %[ftmp8], %[ftmp10] \n\t"
341  "psubsh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
342  "psubsh %[ftmp4], %[ftmp4], %[ftmp5] \n\t"
343  "paddsh %[ftmp3], %[ftmp3], %[ftmp1] \n\t"
344  "paddsh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
345  "paddsh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
346  "paddsh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
347  "psrah %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
348  "psrah %[ftmp4], %[ftmp4], %[ff_pw_5] \n\t"
349  "packushb %[ftmp9], %[ftmp3], %[ftmp4] \n\t"
350  MMI_LDC1(%[ftmp10], %[dst], 0x00)
351  "pavgb %[ftmp9], %[ftmp9], %[ftmp10] \n\t"
352  MMI_SDC1(%[ftmp9], %[dst], 0x00)
353  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
354  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
355  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
356  "bnez %[tmp0], 1b \n\t"
357  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
358  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
359  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
360  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
361  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
362  [ftmp10]"=&f"(ftmp[10]),
363  [tmp0]"=&r"(tmp[0]),
365  [dst]"+&r"(dst), [src]"+&r"(src)
366  : [dstStride]"r"((mips_reg)dstStride),
367  [srcStride]"r"((mips_reg)srcStride),
368  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f),
369  [ff_pw_16]"f"(ff_pw_16.f)
370  : "memory"
371  );
372 }
373 
374 static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src,
375  int dstStride, int srcStride)
376 {
377  avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
378  avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
379  src += 8*srcStride;
380  dst += 8*dstStride;
381  avg_h264_qpel8_h_lowpass_mmi(dst, src, dstStride, srcStride);
382  avg_h264_qpel8_h_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
383 }
384 
385 static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
386  int dstStride, int srcStride)
387 {
388  double ftmp[12];
389  uint64_t tmp[1];
391 
392  src -= 2 * srcStride;
393 
394  __asm__ volatile (
395  ".set push \n\t"
396  ".set noreorder \n\t"
397  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
398  "dli %[tmp0], 0x02 \n\t"
399  MMI_LWC1(%[ftmp1], %[src], 0x00)
400  "mtc1 %[tmp0], %[ftmp10] \n\t"
401  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
402  "dli %[tmp0], 0x05 \n\t"
403  MMI_LWC1(%[ftmp2], %[src], 0x00)
404  "mtc1 %[tmp0], %[ftmp11] \n\t"
405  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
406  MMI_LWC1(%[ftmp3], %[src], 0x00)
407  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
408  MMI_LWC1(%[ftmp4], %[src], 0x00)
409  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
410  MMI_LWC1(%[ftmp5], %[src], 0x00)
411  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
412  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
413  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
414  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
415  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
416  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
417  MMI_LWC1(%[ftmp6], %[src], 0x00)
418  "paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
419  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
420  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
421  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
422  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
423  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
424  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
425  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
426  "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
427  "paddh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
428  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
429  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
430  MMI_SWC1(%[ftmp7], %[dst], 0x00)
431  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
432  MMI_LWC1(%[ftmp1], %[src], 0x00)
433  "paddh %[ftmp7], %[ftmp4], %[ftmp5] \n\t"
434  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
435  "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
436  "psubh %[ftmp7], %[ftmp7], %[ftmp6] \n\t"
437  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
438  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
439  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
440  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
441  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
442  "paddh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
443  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
444  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
445  MMI_SWC1(%[ftmp7], %[dst], 0x00)
446  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
447  MMI_LWC1(%[ftmp2], %[src], 0x00)
448  "paddh %[ftmp7], %[ftmp5], %[ftmp6] \n\t"
449  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
450  "psubh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
451  "psubh %[ftmp7], %[ftmp7], %[ftmp1] \n\t"
452  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
453  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
454  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
455  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
456  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
457  "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t"
458  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
459  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
460  MMI_SWC1(%[ftmp7], %[dst], 0x00)
461  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
462  MMI_LWC1(%[ftmp3], %[src], 0x00)
463  "paddh %[ftmp7], %[ftmp6], %[ftmp1] \n\t"
464  "psllh %[ftmp7], %[ftmp7], %[ftmp10] \n\t"
465  "psubh %[ftmp7], %[ftmp7], %[ftmp5] \n\t"
466  "psubh %[ftmp7], %[ftmp7], %[ftmp2] \n\t"
467  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
468  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_5] \n\t"
469  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
470  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
471  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
472  "paddh %[ftmp7], %[ftmp7], %[ftmp4] \n\t"
473  "psrah %[ftmp7], %[ftmp7], %[ftmp11] \n\t"
474  "packushb %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
475  MMI_SWC1(%[ftmp7], %[dst], 0x00)
476  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
477  ".set pop \n\t"
478  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
479  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
480  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
481  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
482  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
483  [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]),
484  [tmp0]"=&r"(tmp[0]),
486  [dst]"+&r"(dst), [src]"+&r"(src)
487  : [dstStride]"r"((mips_reg)dstStride),
488  [srcStride]"r"((mips_reg)srcStride),
489  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
490  : "memory"
491  );
492 }
493 
494 static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
495  int dstStride, int srcStride)
496 {
497  int w = 2;
498  int h = 8;
499  double ftmp[10];
500  uint64_t tmp[1];
502 
503  src -= 2 * srcStride;
504 
505  while (w--) {
506  __asm__ volatile (
507  ".set push \n\t"
508  ".set noreorder \n\t"
509  "dli %[tmp0], 0x02 \n\t"
510  MMI_LWC1(%[ftmp0], %[src], 0x00)
511  "mtc1 %[tmp0], %[ftmp8] \n\t"
512  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
513  "dli %[tmp0], 0x05 \n\t"
514  MMI_LWC1(%[ftmp1], %[src], 0x00)
515  "mtc1 %[tmp0], %[ftmp9] \n\t"
516  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
517  MMI_LWC1(%[ftmp2], %[src], 0x00)
518  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
519  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
520  MMI_LWC1(%[ftmp3], %[src], 0x00)
521  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
522  MMI_LWC1(%[ftmp4], %[src], 0x00)
523  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
524  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
525  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
526  MMI_LWC1(%[ftmp5], %[src], 0x00)
527  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
528  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
529  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
530  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
531  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
532  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
533  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
534  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
535  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
536  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
537  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
538  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
539  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
540  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
541  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
542  MMI_SWC1(%[ftmp6], %[dst], 0x00)
543  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
544  MMI_LWC1(%[ftmp0], %[src], 0x00)
545  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
546  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
547  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
548  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
549  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
550  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
551  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
552  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
553  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
554  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
555  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
556  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
557  MMI_SWC1(%[ftmp6], %[dst], 0x00)
558  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
559  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
560  MMI_LWC1(%[ftmp1], %[src], 0x00)
561  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
562  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
563  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
564  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
565  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
566  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
567  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
568  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
569  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
570  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
571  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
572  MMI_SWC1(%[ftmp6], %[dst], 0x00)
573  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
574  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
575  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
576  MMI_LWC1(%[ftmp2], %[src], 0x00)
577  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
578  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
579  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
580  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
581  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
582  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
583  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
584  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
585  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
586  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
587  MMI_SWC1(%[ftmp6], %[dst], 0x00)
588  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
589  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
590  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
591  MMI_LWC1(%[ftmp3], %[src], 0x00)
592  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
593  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
594  "punpcklbh %[ftmp3] , %[ftmp3], %[ftmp7] \n\t"
595  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
596  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
597  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
598  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
599  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
600  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
601  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
602  MMI_SWC1(%[ftmp6], %[dst], 0x00)
603  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
604  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
605  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
606  MMI_LWC1(%[ftmp4], %[src], 0x00)
607  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
608  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
609  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
610  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
611  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
612  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
613  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
614  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
615  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
616  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
617  MMI_SWC1(%[ftmp6], %[dst], 0x00)
618  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
619  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
620  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
621  MMI_LWC1(%[ftmp5], %[src], 0x00)
622  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
623  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
624  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
625  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
626  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
627  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
628  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
629  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
630  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
631  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
632  MMI_SWC1(%[ftmp6], %[dst], 0x00)
633  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
634  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
635  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
636  MMI_LWC1(%[ftmp0], %[src], 0x00)
637  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
638  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
639  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
640  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
641  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
642  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
643  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
644  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
645  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
646  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
647  MMI_SWC1(%[ftmp6], %[dst], 0x00)
648  "bne %[h], 0x10, 2f \n\t"
649  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
650  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
651  MMI_LWC1(%[ftmp1], %[src], 0x00)
652  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
653  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
654  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
655  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
656  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
657  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
658  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
659  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
660  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
661  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
662  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
663  MMI_SWC1(%[ftmp6], %[dst], 0x00)
664  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
665  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
666  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
667  MMI_LWC1(%[ftmp2], %[src], 0x00)
668  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
669  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
670  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
671  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
672  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
673  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
674  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
675  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
676  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
677  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
678  MMI_SWC1(%[ftmp6], %[dst], 0x00)
679  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
680  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
681  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
682  MMI_LWC1(%[ftmp3], %[src], 0x00)
683  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
684  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
685  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
686  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
687  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
688  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
689  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
690  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
691  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
692  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
693  MMI_SWC1(%[ftmp6], %[dst], 0x00)
694  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
695  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
696  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
697  MMI_LWC1(%[ftmp4], %[src], 0x00)
698  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
699  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
700  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
701  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
702  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
703  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
704  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
705  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
706  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
707  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
708  MMI_SWC1(%[ftmp6], %[dst], 0x00)
709  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
710  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
711  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
712  MMI_LWC1(%[ftmp5], %[src], 0x00)
713  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
714  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
715  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
716  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
717  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
718  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
719  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
720  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
721  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
722  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
723  MMI_SWC1(%[ftmp6], %[dst], 0x00)
724  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
725  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
726  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
727  MMI_LWC1(%[ftmp0], %[src], 0x00)
728  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
729  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
730  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
731  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
732  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
733  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
734  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
735  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
736  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
737  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
738  MMI_SWC1(%[ftmp6], %[dst], 0x00)
739  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
740  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
741  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
742  MMI_LWC1(%[ftmp1], %[src], 0x00)
743  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
744  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
745  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
746  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
747  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
748  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
749  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
750  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
751  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
752  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
753  MMI_SWC1(%[ftmp6], %[dst], 0x00)
754  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
755  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
756  "psllh %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
757  MMI_LWC1(%[ftmp2], %[src], 0x00)
758  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
759  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
760  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
761  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
762  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
763  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
764  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
765  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
766  "psrah %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
767  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
768  MMI_SWC1(%[ftmp6], %[dst], 0x00)
769  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
770  "2: \n\t"
771  ".set pop \n\t"
772  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
773  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
774  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
775  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
776  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
777  [tmp0]"=&r"(tmp[0]),
779  [src]"+&r"(src), [dst]"+&r"(dst),
780  [h]"+&r"(h)
781  : [dstStride]"r"((mips_reg)dstStride),
782  [srcStride]"r"((mips_reg)srcStride),
783  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
784  : "memory"
785  );
786 
787  src += 4 - (h + 5) * srcStride;
788  dst += 4 - h * dstStride;
789  }
790 }
791 
792 static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
793  int dstStride, int srcStride)
794 {
795  put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
796  put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
797  src += 8*srcStride;
798  dst += 8*dstStride;
799  put_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
800  put_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
801 }
802 
803 static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
804  int dstStride, int srcStride)
805 {
806  double ftmp[10];
807  uint64_t tmp[1];
808 
809  src -= 2 * srcStride;
810 
811  __asm__ volatile (
812  ".set push \n\t"
813  ".set noreorder \n\t"
814  "dli %[tmp0], 0x02 \n\t"
815  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
816  "mtc1 %[tmp0], %[ftmp9] \n\t"
817  "dli %[tmp0], 0x05 \n\t"
818  MMI_LWC1(%[ftmp0], %[src], 0x00)
819  "mtc1 %[tmp0], %[ftmp8] \n\t"
820  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
821  MMI_LWC1(%[ftmp1], %[src], 0x00)
822  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
823  MMI_LWC1(%[ftmp2], %[src], 0x00)
824  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
825  MMI_LWC1(%[ftmp3], %[src], 0x00)
826  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
827  MMI_LWC1(%[ftmp4], %[src], 0x00)
828  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
829  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
830  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
831  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
832  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
833  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
834  MMI_LWC1(%[ftmp5], %[src], 0x00)
835  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
836  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
837  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
838  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
839  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
840  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
841  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
842  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
843  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
844  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
845  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
846  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
847  MMI_LWC1(%[ftmp0], %[dst], 0x00)
848  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
849  MMI_SWC1(%[ftmp6], %[dst], 0x00)
850  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
851  MMI_LWC1(%[ftmp0], %[src], 0x00)
852  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
853  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
854  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
855  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
856  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
857  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
858  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
859  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
860  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
861  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
862  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
863  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
864  MMI_LWC1(%[ftmp1], %[dst], 0x00)
865  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
866  MMI_SWC1(%[ftmp6], %[dst], 0x00)
867  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
868  MMI_LWC1(%[ftmp1], %[src], 0x00)
869  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
870  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
871  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
872  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
873  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
874  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
875  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
876  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
877  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
878  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
879  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
880  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
881  MMI_LWC1(%[ftmp2], %[dst], 0x00)
882  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
883  MMI_SWC1(%[ftmp6], %[dst], 0x00)
884  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
885  MMI_LWC1(%[ftmp2], %[src], 0x00)
886  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
887  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
888  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
889  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
890  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
891  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
892  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
893  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
894  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
895  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
896  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
897  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
898  MMI_LWC1(%[ftmp3], %[dst], 0x00)
899  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
900  MMI_SWC1(%[ftmp6], %[dst], 0x00)
901  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
902  ".set pop \n\t"
903  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
904  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
905  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
906  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
907  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
908  [tmp0]"=&r"(tmp[0]),
909  [src]"+&r"(src), [dst]"+&r"(dst)
910  : [dstStride]"r"((mips_reg)dstStride),
911  [srcStride]"r"((mips_reg)srcStride),
912  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
913  : "memory"
914  );
915 }
916 
917 static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
918  int dstStride, int srcStride)
919 {
920  int w = 2;
921  int h = 8;
922  double ftmp[10];
923  uint64_t tmp[1];
925 
926  src -= 2 * srcStride;
927 
928  while (w--) {
929  __asm__ volatile (
930  ".set push \n\t"
931  ".set noreorder \n\t"
932  "dli %[tmp0], 0x02 \n\t"
933  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
934  "mtc1 %[tmp0], %[ftmp9] \n\t"
935  "dli %[tmp0], 0x05 \n\t"
936  MMI_LWC1(%[ftmp0], %[src], 0x00)
937  "mtc1 %[tmp0], %[ftmp8] \n\t"
938  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
939  MMI_LWC1(%[ftmp1], %[src], 0x00)
940  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
941  MMI_LWC1(%[ftmp2], %[src], 0x00)
942  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
943  MMI_LWC1(%[ftmp3], %[src], 0x00)
944  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
945  MMI_LWC1(%[ftmp4], %[src], 0x00)
946  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
947  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
948  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
949  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
950  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
951  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
952  MMI_LWC1(%[ftmp5], %[src], 0x00)
953  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
954  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
955  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
956  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
957  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
958  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
959  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
960  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
961  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
962  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
963  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
964  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
965  MMI_LWC1(%[ftmp0], %[dst], 0x00)
966  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
967  MMI_SWC1(%[ftmp6], %[dst], 0x00)
968  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
969  MMI_LWC1(%[ftmp0], %[src], 0x00)
970  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
971  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
972  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
973  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
974  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
975  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
976  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
977  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
978  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
979  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
980  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
981  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
982  MMI_LWC1(%[ftmp1], %[dst], 0x00)
983  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
984  MMI_SWC1(%[ftmp6], %[dst], 0x00)
985  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
986  MMI_LWC1(%[ftmp1], %[src], 0x00)
987  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
988  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
989  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
990  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
991  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
992  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
993  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
994  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
995  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
996  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
997  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
998  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
999  MMI_LWC1(%[ftmp2], %[dst], 0x00)
1000  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1001  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1002  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1003  MMI_LWC1(%[ftmp2], %[src], 0x00)
1004  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1005  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1006  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1007  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1008  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1009  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1010  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1011  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1012  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1013  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1014  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1015  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1016  MMI_LWC1(%[ftmp3], %[dst], 0x00)
1017  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1018  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1019  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1020  MMI_LWC1(%[ftmp3], %[src], 0x00)
1021  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1022  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1023  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1024  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1025  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1026  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1027  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1028  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1029  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1030  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1031  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1032  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1033  MMI_LWC1(%[ftmp4], %[dst], 0x00)
1034  "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1035  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1036  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1037  MMI_LWC1(%[ftmp4], %[src], 0x00)
1038  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1039  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1040  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1041  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1042  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1043  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1044  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1045  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1046  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1047  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1048  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1049  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1050  MMI_LWC1(%[ftmp5], %[dst], 0x00)
1051  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1052  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1053  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1054  MMI_LWC1(%[ftmp5], %[src], 0x00)
1055  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1056  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1057  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1058  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1059  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1060  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1061  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1062  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1063  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1064  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1065  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1066  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1067  MMI_LWC1(%[ftmp0], %[dst], 0x00)
1068  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1069  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1070  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1071  MMI_LWC1(%[ftmp0], %[src], 0x00)
1072  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1073  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1074  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1075  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1076  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1077  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1078  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1079  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1080  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1081  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1082  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1083  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1084  MMI_LWC1(%[ftmp1], %[dst], 0x00)
1085  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1086  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1087  "bne %[h], 0x10, 2f \n\t"
1088  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1089  MMI_LWC1(%[ftmp1], %[src], 0x00)
1090  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1091  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1092  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1093  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1094  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1095  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1096  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1097  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1098  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1099  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1100  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1101  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1102  MMI_LWC1(%[ftmp2], %[dst], 0x00)
1103  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1104  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1105  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1106  MMI_LWC1(%[ftmp2], %[src], 0x00)
1107  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1108  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1109  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1110  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1111  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1112  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1113  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1114  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1115  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1116  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1117  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1118  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1119  MMI_LWC1(%[ftmp3], %[dst], 0x00)
1120  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1121  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1122  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1123  MMI_LWC1(%[ftmp3], %[src], 0x00)
1124  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1125  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1126  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1127  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1128  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1129  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1130  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1131  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1132  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1133  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1134  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1135  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1136  MMI_LWC1(%[ftmp4], %[dst], 0x00)
1137  "pavgb %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1138  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1139  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1140  MMI_LWC1(%[ftmp4], %[src], 0x00)
1141  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1142  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1143  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1144  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1145  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1146  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1147  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1148  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1149  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1150  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1151  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1152  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1153  MMI_LWC1(%[ftmp5], %[dst], 0x00)
1154  "pavgb %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1155  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1156  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1157  MMI_LWC1(%[ftmp5], %[src], 0x00)
1158  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1159  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1160  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1161  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1162  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1163  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1164  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1165  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1166  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1167  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1168  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1169  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1170  MMI_LWC1(%[ftmp0], %[dst], 0x00)
1171  "pavgb %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1172  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1173  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1174  MMI_LWC1(%[ftmp0], %[src], 0x00)
1175  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1176  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1177  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1178  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1179  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1180  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1181  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1182  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1183  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1184  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1185  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1186  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1187  MMI_LWC1(%[ftmp1], %[dst], 0x00)
1188  "pavgb %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1189  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1190  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1191  MMI_LWC1(%[ftmp1], %[src], 0x00)
1192  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1193  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1194  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1195  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1196  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1197  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1198  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1199  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1200  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1201  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1202  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1203  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1204  MMI_LWC1(%[ftmp2], %[dst], 0x00)
1205  "pavgb %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1206  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1207  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1208  MMI_LWC1(%[ftmp2], %[src], 0x00)
1209  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1210  "psllh %[ftmp6], %[ftmp6], %[ftmp9] \n\t"
1211  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1212  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1213  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1214  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1215  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1216  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1217  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1218  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1219  "psrah %[ftmp6], %[ftmp6], %[ftmp8] \n\t"
1220  "packushb %[ftmp6], %[ftmp6], %[ftmp6] \n\t"
1221  MMI_LWC1(%[ftmp3], %[dst], 0x00)
1222  "pavgb %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1223  MMI_SWC1(%[ftmp6], %[dst], 0x00)
1224  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1225  "2: \n\t"
1226  ".set pop \n\t"
1227  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1228  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1229  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1230  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1231  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1232  [tmp0]"=&r"(tmp[0]),
1234  [src]"+&r"(src), [dst]"+&r"(dst),
1235  [h]"+&r"(h)
1236  : [dstStride]"r"((mips_reg)dstStride),
1237  [srcStride]"r"((mips_reg)srcStride),
1238  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
1239  : "memory"
1240  );
1241 
1242  src += 4 - (h + 5) * srcStride;
1243  dst += 4 - h * dstStride;
1244  }
1245 }
1246 
1247 static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1248  int dstStride, int srcStride)
1249 {
1250  avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1251  avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1252  src += 8*srcStride;
1253  dst += 8*dstStride;
1254  avg_h264_qpel8_v_lowpass_mmi(dst, src, dstStride, srcStride);
1255  avg_h264_qpel8_v_lowpass_mmi(dst+8, src+8, dstStride, srcStride);
1256 }
1257 
1258 static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1259  int dstStride, int srcStride)
1260 {
1261  int i;
1262  int16_t _tmp[36];
1263  int16_t *tmp = _tmp;
1264  double ftmp[10];
1265  uint64_t tmp0;
1267 
1268  src -= 2*srcStride;
1269 
1270  __asm__ volatile (
1271  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1272  "dli %[tmp0], 0x09 \n\t"
1273  "1: \n\t"
1274  MMI_ULWC1(%[ftmp1], %[src], -0x02)
1275  MMI_ULWC1(%[ftmp2], %[src], -0x01)
1276  MMI_ULWC1(%[ftmp3], %[src], 0x00)
1277  MMI_ULWC1(%[ftmp4], %[src], 0x01)
1278  MMI_ULWC1(%[ftmp5], %[src], 0x02)
1279  MMI_ULWC1(%[ftmp6], %[src], 0x03)
1280  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1281  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1282  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1283  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1284  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1285  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1286  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1287  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
1288  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
1289  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
1290  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
1291  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1292  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
1293  MMI_SDC1(%[ftmp9], %[tmp], 0x00)
1294  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
1295  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1296  PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
1297  "bnez %[tmp0], 1b \n\t"
1298  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1299  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1300  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1301  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1302  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1303  [tmp0]"=&r"(tmp0),
1305  [tmp]"+&r"(tmp), [src]"+&r"(src)
1306  : [tmpStride]"r"(8),
1307  [srcStride]"r"((mips_reg)srcStride),
1308  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
1309  : "memory"
1310  );
1311 
1312  tmp -= 28;
1313 
1314  for (i=0; i<4; i++) {
1315  const int16_t tmpB= tmp[-8];
1316  const int16_t tmpA= tmp[-4];
1317  const int16_t tmp0= tmp[ 0];
1318  const int16_t tmp1= tmp[ 4];
1319  const int16_t tmp2= tmp[ 8];
1320  const int16_t tmp3= tmp[12];
1321  const int16_t tmp4= tmp[16];
1322  const int16_t tmp5= tmp[20];
1323  const int16_t tmp6= tmp[24];
1324  op2_put(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1325  op2_put(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1326  op2_put(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1327  op2_put(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1328  dst++;
1329  tmp++;
1330  }
1331 }
1332 
1334  const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
1335 {
1336  int w = (size + 8) >> 2;
1337  double ftmp[11];
1338  uint64_t tmp0;
1340 
1341  src -= 2 * srcStride + 2;
1342 
1343  while (w--) {
1344  __asm__ volatile (
1345  "dli %[tmp0], 0x02 \n\t"
1346  MMI_ULWC1(%[ftmp0], %[src], 0x00)
1347  "mtc1 %[tmp0], %[ftmp10] \n\t"
1348  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1349  "pxor %[ftmp7], %[ftmp7], %[ftmp7] \n\t"
1350  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1351  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1352  MMI_ULWC1(%[ftmp2], %[src], 0x00)
1353  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1354  MMI_ULWC1(%[ftmp3], %[src], 0x00)
1355  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1356  MMI_ULWC1(%[ftmp4], %[src], 0x00)
1357  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1358  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1359  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1360  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1361  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1362  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1363  MMI_ULWC1(%[ftmp5], %[src], 0x00)
1364  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1365  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1366  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1367  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1368  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1369  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1370  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1371  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1372  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1373  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1374  MMI_SDC1(%[ftmp6], %[tmp], 0x00)
1375  MMI_ULWC1(%[ftmp0], %[src], 0x00)
1376  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1377  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1378  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1379  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1380  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1381  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1382  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1383  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1384  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1385  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1386  MMI_SDC1(%[ftmp6], %[tmp], 0x30)
1387  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1388  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1389  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1390  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1391  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1392  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1393  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1394  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1395  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1396  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1397  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1398  MMI_SDC1(%[ftmp6], %[tmp], 0x60)
1399  MMI_ULWC1(%[ftmp2], %[src], 0x00)
1400  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1401  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1402  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1403  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1404  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1405  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1406  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1407  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1408  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1409  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1410  MMI_SDC1(%[ftmp6], %[tmp], 0x90)
1411  MMI_ULWC1(%[ftmp3], %[src], 0x00)
1412  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1413  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1414  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1415  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1416  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1417  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1418  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1419  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1420  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1421  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1422  MMI_SDC1(%[ftmp6], %[tmp], 0xc0)
1423  MMI_ULWC1(%[ftmp4], %[src], 0x00)
1424  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1425  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1426  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1427  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1428  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1429  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1430  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1431  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1432  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1433  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1434  MMI_SDC1(%[ftmp6], %[tmp], 0xf0)
1435  MMI_ULWC1(%[ftmp5], %[src], 0x00)
1436  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1437  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1438  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1439  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1440  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1441  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1442  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1443  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1444  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1445  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1446  MMI_SDC1(%[ftmp6], %[tmp], 0x120)
1447  MMI_ULWC1(%[ftmp0], %[src], 0x00)
1448  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1449  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1450  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1451  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1452  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1453  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1454  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1455  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1456  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1457  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1458  MMI_SDC1(%[ftmp6], %[tmp], 0x150)
1459  "bne %[size], 0x10, 2f \n\t"
1460 
1461  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1462  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1463  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1464  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1465  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1466  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1467  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1468  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1469  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1470  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1471  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1472  MMI_SDC1(%[ftmp6], %[tmp], 0x180)
1473  MMI_ULWC1(%[ftmp2], %[src], 0x00)
1474  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1475  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1476  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1477  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1478  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1479  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1480  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1481  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1482  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1483  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1484  MMI_SDC1(%[ftmp6], %[tmp], 0x1b0)
1485  MMI_ULWC1(%[ftmp3], %[src], 0x00)
1486  "paddh %[ftmp6], %[ftmp0], %[ftmp1] \n\t"
1487  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1488  "paddh %[ftmp4], %[ftmp4], %[ff_pw_16] \n\t"
1489  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1490  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1491  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
1492  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1493  "paddh %[ftmp4], %[ftmp4], %[ftmp3] \n\t"
1494  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1495  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1496  MMI_SDC1(%[ftmp6], %[tmp], 0x1e0)
1497  MMI_ULWC1(%[ftmp4], %[src], 0x00)
1498  "paddh %[ftmp6], %[ftmp1], %[ftmp2] \n\t"
1499  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1500  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1501  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1502  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1503  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1504  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1505  "paddh %[ftmp5], %[ftmp5], %[ftmp4] \n\t"
1506  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1507  "paddh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1508  MMI_SDC1(%[ftmp6], %[tmp], 0x210)
1509  MMI_ULWC1(%[ftmp5], %[src], 0x00)
1510  "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t"
1511  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1512  "paddh %[ftmp0], %[ftmp0], %[ff_pw_16] \n\t"
1513  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1514  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1515  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1516  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1517  "paddh %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1518  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1519  "paddh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1520  MMI_SDC1(%[ftmp6], %[tmp], 0x240)
1521  MMI_ULWC1(%[ftmp0], %[src], 0x00)
1522  "paddh %[ftmp6], %[ftmp3], %[ftmp4] \n\t"
1523  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1524  "paddh %[ftmp1], %[ftmp1], %[ff_pw_16] \n\t"
1525  "psubh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1526  "psubh %[ftmp6], %[ftmp6], %[ftmp5] \n\t"
1527  "punpcklbh %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
1528  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1529  "paddh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1530  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1531  "paddh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1532  MMI_SDC1(%[ftmp6], %[tmp], 0x270)
1533  MMI_ULWC1(%[ftmp1], %[src], 0x00)
1534  "paddh %[ftmp6], %[ftmp4], %[ftmp5] \n\t"
1535  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1536  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
1537  "psubh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1538  "psubh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1539  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1540  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1541  "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t"
1542  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1543  "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t"
1544  MMI_SDC1(%[ftmp6], %[tmp], 0x2a0)
1545  MMI_ULWC1(%[ftmp2], %[src], 0x00)
1546  "paddh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1547  "psllh %[ftmp6], %[ftmp6], %[ftmp10] \n\t"
1548  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1549  "psubh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1550  "psubh %[ftmp6], %[ftmp6], %[ftmp1] \n\t"
1551  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1552  "pmullh %[ftmp6], %[ftmp6], %[ff_pw_5] \n\t"
1553  "paddh %[ftmp3], %[ftmp3], %[ftmp2] \n\t"
1554  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1555  "paddh %[ftmp6], %[ftmp6], %[ftmp3] \n\t"
1556  MMI_SDC1(%[ftmp6], %[tmp], 0x2d0)
1557  "2: \n\t"
1558  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1559  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1560  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1561  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1562  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1563  [ftmp10]"=&f"(ftmp[10]),
1564  [tmp0]"=&r"(tmp0),
1566  [src]"+&r"(src)
1567  : [tmp]"r"(tmp), [size]"r"(size),
1568  [srcStride]"r"((mips_reg)srcStride),
1569  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
1570  : "memory"
1571  );
1572 
1573  tmp += 4;
1574  src += 4 - (size + 5) * srcStride;
1575  }
1576 }
1577 
1579  int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
1580 {
1581  int w = size >> 4;
1582  double ftmp[10];
1583  uint64_t tmp0;
1585 
1586  do {
1587  int h = size;
1588 
1589  __asm__ volatile (
1590  "dli %[tmp0], 0x02 \n\t"
1591  "mtc1 %[tmp0], %[ftmp8] \n\t"
1592  "dli %[tmp0], 0x06 \n\t"
1593  "mtc1 %[tmp0], %[ftmp9] \n\t"
1594  "1: \n\t"
1595  MMI_LDC1(%[ftmp0], %[tmp], 0x00)
1596  MMI_LDC1(%[ftmp3], %[tmp], 0x08)
1597  MMI_LDC1(%[ftmp6], %[tmp], 0x10)
1598  MMI_ULDC1(%[ftmp1], %[tmp], 0x02)
1599  MMI_ULDC1(%[ftmp4], %[tmp], 0x0a)
1600  MMI_ULDC1(%[ftmp5], %[tmp], 0x12)
1601  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1602  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1603  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1604  "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t"
1605  MMI_ULDC1(%[ftmp2], %[tmp], 0x04)
1606  MMI_ULDC1(%[ftmp6], %[tmp], 0x06)
1607  MMI_ULDC1(%[ftmp5], %[tmp], 0x0c)
1608  MMI_ULDC1(%[ftmp7], %[tmp], 0x0e)
1609  "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1610  "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t"
1611  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1612  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1613  "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
1614  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1615  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1616  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1617  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1618  "paddsh %[ftmp3] , %[ftmp3], %[ftmp5] \n\t"
1619  "psrah %[ftmp0], %[ftmp0], %[ftmp8] \n\t"
1620  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1621  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1622  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1623  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1624  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1625  "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
1626  "addi %[h], %[h], -0x01 \n\t"
1627  MMI_SDC1(%[ftmp0], %[dst], 0x00)
1628  PTR_ADDIU "%[tmp], %[tmp], 0x30 \n\t"
1629  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1630  "bnez %[h], 1b \n\t"
1631  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1632  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1633  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1634  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1635  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1636  [tmp0]"=&r"(tmp0),
1638  [tmp]"+&r"(tmp), [dst]"+&r"(dst),
1639  [h]"+&r"(h)
1640  : [dstStride]"r"((mips_reg)dstStride)
1641  : "memory"
1642  );
1643 
1644  tmp += 8 - size * 24;
1645  dst += 8 - size * dstStride;
1646  } while (w--);
1647 }
1648 
1649 static void put_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1650  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1651  ptrdiff_t srcStride, int size)
1652 {
1653  put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size);
1654  put_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size);
1655 }
1656 
1657 static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1658  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1659  ptrdiff_t srcStride)
1660 {
1661  put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1662  srcStride, 8);
1663 }
1664 
1665 static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1666  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1667  ptrdiff_t srcStride)
1668 {
1669  put_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1670  srcStride, 16);
1671 }
1672 
1673 static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1674  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1675 {
1676  int h = 8;
1677  double ftmp[9];
1678  uint64_t tmp[1];
1681 
1682  __asm__ volatile (
1683  "dli %[tmp0], 0x02 \n\t"
1684  "mtc1 %[tmp0], %[ftmp7] \n\t"
1685  "dli %[tmp0], 0x05 \n\t"
1686  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1687  "mtc1 %[tmp0], %[ftmp8] \n\t"
1688  "1: \n\t"
1689  MMI_ULDC1(%[ftmp1], %[src], 0x00)
1690  MMI_ULDC1(%[ftmp3], %[src], 0x01)
1691  "punpckhbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t"
1692  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1693  "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
1694  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1695  "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1696  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1697  "psllh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1698  "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
1699  MMI_ULDC1(%[ftmp3], %[src], -0x01)
1700  MMI_ULDC1(%[ftmp5], %[src], 0x02)
1701  "punpckhbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t"
1702  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1703  "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
1704  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1705  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
1706  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1707  "psubh %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1708  "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1709  "pmullh %[ftmp2], %[ftmp2], %[ff_pw_5] \n\t"
1710  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
1711  MMI_ULWC1(%[ftmp3], %[src], -0x02)
1712  MMI_ULWC1(%[ftmp6], %[src], 0x07)
1713  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1714  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1715  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1716  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
1717  "paddh %[ftmp3], %[ftmp3], %[ff_pw_16] \n\t"
1718  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
1719  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1720  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
1721  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
1722  "psrah %[ftmp2], %[ftmp2], %[ftmp8] \n\t"
1723  MMI_LDC1(%[ftmp5], %[src2], 0x00)
1724  "packushb %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
1725  PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
1726  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
1727  PTR_ADDU "%[h], %[h], -0x01 \n\t"
1728  MMI_SDC1(%[ftmp1], %[dst], 0x00)
1729  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1730  PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
1731  "bgtz %[h], 1b \n\t"
1732  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1733  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1734  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1735  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1736  [ftmp8]"=&f"(ftmp[8]),
1737  [tmp0]"=&r"(tmp[0]),
1740  [src]"+&r"(src), [dst]"+&r"(dst),
1741  [src2]"+&r"(src2), [h]"+&r"(h)
1742  : [src2Stride]"r"((mips_reg)src2Stride),
1743  [dstStride]"r"((mips_reg)dstStride),
1744  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
1745  : "memory"
1746  );
1747 }
1748 
1749 static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
1750  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
1751 {
1752  double ftmp[7];
1753  uint64_t tmp0;
1756 
1757  do {
1758  __asm__ volatile (
1759  "dli %[tmp0], 0x05 \n\t"
1760  MMI_ULDC1(%[ftmp0], %[src16], 0x00)
1761  "mtc1 %[tmp0], %[ftmp6] \n\t"
1762  MMI_ULDC1(%[ftmp1], %[src16], 0x08)
1763  MMI_ULDC1(%[ftmp2], %[src16], 0x30)
1764  MMI_ULDC1(%[ftmp3], %[src16], 0x38)
1765  "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1766  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
1767  "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
1768  "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
1769  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1770  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
1771  MMI_LDC1(%[ftmp5], %[src8], 0x00)
1772  MMI_LDXC1(%[ftmp4], %[src8], %[src8Stride], 0x00)
1773  "pavgb %[ftmp0], %[ftmp0], %[ftmp5] \n\t"
1774  "pavgb %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
1775  MMI_SDC1(%[ftmp0], %[dst], 0x00)
1776  MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00)
1777  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1778  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1779  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1780  [ftmp6]"=&f"(ftmp[6]),
1783  [tmp0]"=&r"(tmp0)
1784  : [src8]"r"(src8), [src16]"r"(src16),
1785  [dst]"r"(dst),
1786  [src8Stride]"r"((mips_reg)src8Stride),
1787  [dstStride]"r"((mips_reg)dstStride)
1788  : "memory"
1789  );
1790 
1791  src8 += 2 * src8Stride;
1792  src16 += 48;
1793  dst += 2 * dstStride;
1794  } while (h -= 2);
1795 }
1796 
1797 static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1798  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1799 {
1800  put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
1801  put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
1802  src2Stride);
1803 
1804  src += 8 * dstStride;
1805  dst += 8 * dstStride;
1806  src2 += 8 * src2Stride;
1807 
1808  put_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
1809  put_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
1810  src2Stride);
1811 }
1812 
1813 static void put_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
1814  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
1815 {
1816  put_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, h);
1817  put_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride,
1818  src8Stride, h);
1819 }
1820 
1821 static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
1822  int dstStride, int srcStride)
1823 {
1824  int i;
1825  int16_t _tmp[36];
1826  int16_t *tmp = _tmp;
1827  double ftmp[10];
1828  uint64_t tmp0;
1830 
1831  src -= 2*srcStride;
1832 
1833  __asm__ volatile (
1834  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
1835  "dli %[tmp0], 0x09 \n\t"
1836  "1: \n\t"
1837  MMI_ULWC1(%[ftmp1], %[src], -0x02)
1838  MMI_ULWC1(%[ftmp2], %[src], -0x01)
1839  MMI_ULWC1(%[ftmp3], %[src], 0x00)
1840  MMI_ULWC1(%[ftmp4], %[src], 0x01)
1841  MMI_ULWC1(%[ftmp5], %[src], 0x02)
1842  MMI_ULWC1(%[ftmp6], %[src], 0x03)
1843  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
1844  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
1845  "punpcklbh %[ftmp3], %[ftmp3], %[ftmp0] \n\t"
1846  "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t"
1847  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
1848  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
1849  "paddsh %[ftmp7], %[ftmp3], %[ftmp4] \n\t"
1850  "paddsh %[ftmp8], %[ftmp2], %[ftmp5] \n\t"
1851  "paddsh %[ftmp9], %[ftmp1], %[ftmp6] \n\t"
1852  "pmullh %[ftmp7], %[ftmp7], %[ff_pw_20] \n\t"
1853  "pmullh %[ftmp8], %[ftmp8], %[ff_pw_5] \n\t"
1854  "psubsh %[ftmp7], %[ftmp7], %[ftmp8] \n\t"
1855  "paddsh %[ftmp9], %[ftmp7], %[ftmp9] \n\t"
1856  MMI_SDC1(%[ftmp9], %[tmp], 0x00)
1857  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
1858  PTR_ADDU "%[src], %[src], %[srcStride] \n\t"
1859  PTR_ADDU "%[tmp], %[tmp], %[tmpStride] \n\t"
1860  "bnez %[tmp0], 1b \n\t"
1861  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1862  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1863  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1864  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1865  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1866  [tmp0]"=&r"(tmp0),
1868  [tmp]"+&r"(tmp), [src]"+&r"(src)
1869  : [tmpStride]"r"(8),
1870  [srcStride]"r"((mips_reg)srcStride),
1871  [ff_pw_20]"f"(ff_pw_20.f), [ff_pw_5]"f"(ff_pw_5.f)
1872  : "memory"
1873  );
1874 
1875  tmp -= 28;
1876 
1877  for (i=0; i<4; i++) {
1878  const int16_t tmpB= tmp[-8];
1879  const int16_t tmpA= tmp[-4];
1880  const int16_t tmp0= tmp[ 0];
1881  const int16_t tmp1= tmp[ 4];
1882  const int16_t tmp2= tmp[ 8];
1883  const int16_t tmp3= tmp[12];
1884  const int16_t tmp4= tmp[16];
1885  const int16_t tmp5= tmp[20];
1886  const int16_t tmp6= tmp[24];
1887  op2_avg(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));
1888  op2_avg(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));
1889  op2_avg(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));
1890  op2_avg(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));
1891  dst++;
1892  tmp++;
1893  }
1894 }
1895 
1897  int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
1898 {
1899  int w = size >> 4;
1900  double ftmp[11];
1901  uint64_t tmp0;
1903 
1904  do {
1905  int h = size;
1906  __asm__ volatile (
1907  "dli %[tmp0], 0x02 \n\t"
1908  "mtc1 %[tmp0], %[ftmp9] \n\t"
1909  "dli %[tmp0], 0x06 \n\t"
1910  "mtc1 %[tmp0], %[ftmp10] \n\t"
1911  "1: \n\t"
1912  MMI_LDC1(%[ftmp0], %[tmp], 0x00)
1913  MMI_LDC1(%[ftmp3], %[tmp], 0x08)
1914  MMI_ULDC1(%[ftmp1], %[tmp], 0x02)
1915  MMI_ULDC1(%[ftmp4], %[tmp], 0x0a)
1916  MMI_LDC1(%[ftmp7], %[tmp], 0x10)
1917  MMI_ULDC1(%[ftmp8], %[tmp], 0x12)
1918  "paddh %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
1919  "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
1920  "paddh %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
1921  "paddh %[ftmp4], %[ftmp4], %[ftmp7] \n\t"
1922  MMI_ULDC1(%[ftmp2], %[tmp], 0x04)
1923  MMI_ULDC1(%[ftmp5], %[tmp], 0x0c)
1924  MMI_ULDC1(%[ftmp7], %[tmp], 0x06)
1925  MMI_ULDC1(%[ftmp8], %[tmp], 0x0e)
1926  "paddh %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
1927  "paddh %[ftmp5], %[ftmp5], %[ftmp8] \n\t"
1928  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1929  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1930  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1931  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1932  "psubh %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
1933  "psubh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
1934  "paddsh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1935  "paddsh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1936  "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t"
1937  "psrah %[ftmp3], %[ftmp3], %[ftmp9] \n\t"
1938  "paddh %[ftmp0], %[ftmp0], %[ftmp2] \n\t"
1939  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
1940  "psrah %[ftmp0], %[ftmp0], %[ftmp10] \n\t"
1941  "psrah %[ftmp3], %[ftmp3], %[ftmp10] \n\t"
1942  "packushb %[ftmp0], %[ftmp0], %[ftmp3] \n\t"
1943  MMI_LDC1(%[ftmp6], %[dst], 0x00)
1944  "pavgb %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
1945  MMI_SDC1(%[ftmp0], %[dst], 0x00)
1946  "addi %[h], %[h], -0x01 \n\t"
1947  PTR_ADDI "%[tmp], %[tmp], 0x30 \n\t"
1948  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
1949  "bnez %[h], 1b \n\t"
1950  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
1951  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
1952  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
1953  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
1954  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
1955  [ftmp10]"=&f"(ftmp[10]),
1956  [tmp0]"=&r"(tmp0),
1958  [tmp]"+&r"(tmp), [dst]"+&r"(dst),
1959  [h]"+&r"(h)
1960  : [dstStride]"r"((mips_reg)dstStride)
1961  : "memory"
1962  );
1963 
1964  tmp += 8 - size * 24;
1965  dst += 8 - size * dstStride;
1966  } while (w--);
1967 }
1968 
1969 static void avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1970  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1971  ptrdiff_t srcStride, int size)
1972 {
1973  put_h264_qpel8or16_hv1_lowpass_mmi(tmp, src, tmpStride, srcStride, size);
1974  avg_h264_qpel8or16_hv2_lowpass_mmi(dst, tmp, dstStride, tmpStride, size);
1975 }
1976 
1977 static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1978  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1979  ptrdiff_t srcStride)
1980 {
1981  avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1982  srcStride, 8);
1983 }
1984 
1985 static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp,
1986  const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride,
1987  ptrdiff_t srcStride)
1988 {
1989  avg_h264_qpel8or16_hv_lowpass_mmi(dst, tmp, src, dstStride, tmpStride,
1990  srcStride, 16);
1991 }
1992 
1993 static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
1994  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
1995 {
1996  double ftmp[10];
1997  uint64_t tmp[2];
2000 
2001  __asm__ volatile (
2002  "dli %[tmp1], 0x02 \n\t"
2003  "ori %[tmp0], $0, 0x8 \n\t"
2004  "mtc1 %[tmp1], %[ftmp7] \n\t"
2005  "dli %[tmp1], 0x05 \n\t"
2006  "pxor %[ftmp0], %[ftmp0], %[ftmp0] \n\t"
2007  "mtc1 %[tmp1], %[ftmp8] \n\t"
2008  "1: \n\t"
2009  MMI_ULDC1(%[ftmp1], %[src], 0x00)
2010  MMI_ULDC1(%[ftmp2], %[src], 0x01)
2011  "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t"
2012  "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t"
2013  "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2014  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2015  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2016  "paddh %[ftmp3], %[ftmp3], %[ftmp4] \n\t"
2017  "psllh %[ftmp1], %[ftmp1], %[ftmp7] \n\t"
2018  "psllh %[ftmp3], %[ftmp3], %[ftmp7] \n\t"
2019  MMI_ULDC1(%[ftmp2], %[src], -0x01)
2020  MMI_ULDC1(%[ftmp5], %[src], 0x02)
2021  "punpckhbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t"
2022  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2023  "punpckhbh %[ftmp6], %[ftmp5], %[ftmp0] \n\t"
2024  "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t"
2025  "paddh %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2026  "paddh %[ftmp6], %[ftmp6], %[ftmp4] \n\t"
2027  "psubh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2028  "psubh %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
2029  "pmullh %[ftmp1], %[ftmp1], %[ff_pw_5] \n\t"
2030  "pmullh %[ftmp3], %[ftmp3], %[ff_pw_5] \n\t"
2031  MMI_ULWC1(%[ftmp2], %[src], -0x02)
2032  MMI_ULWC1(%[ftmp6], %[src], 0x07)
2033  "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t"
2034  "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t"
2035  "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t"
2036  "paddh %[ftmp5], %[ftmp5], %[ftmp6] \n\t"
2037  "paddh %[ftmp2], %[ftmp2], %[ff_pw_16] \n\t"
2038  "paddh %[ftmp5], %[ftmp5], %[ff_pw_16] \n\t"
2039  "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t"
2040  "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t"
2041  "psrah %[ftmp1], %[ftmp1], %[ftmp8] \n\t"
2042  "psrah %[ftmp3], %[ftmp3], %[ftmp8] \n\t"
2043  MMI_LDC1(%[ftmp5], %[src2], 0x00)
2044  "packushb %[ftmp1], %[ftmp1], %[ftmp3] \n\t"
2045  MMI_LDC1(%[ftmp9], %[dst], 0x00)
2046  "pavgb %[ftmp1], %[ftmp1], %[ftmp5] \n\t"
2047  "pavgb %[ftmp1], %[ftmp1], %[ftmp9] \n\t"
2048  PTR_ADDU "%[src], %[src], %[dstStride] \n\t"
2049  MMI_SDC1(%[ftmp1], %[dst], 0x00)
2050  "daddi %[tmp0], %[tmp0], -0x01 \n\t"
2051  PTR_ADDU "%[dst], %[dst], %[dstStride] \n\t"
2052  PTR_ADDU "%[src2], %[src2], %[src2Stride] \n\t"
2053  "bgtz %[tmp0], 1b \n\t"
2054  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2055  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2056  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2057  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2058  [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]),
2059  [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]),
2062  [dst]"+&r"(dst), [src]"+&r"(src),
2063  [src2]"+&r"(src2)
2064  : [dstStride]"r"((mips_reg)dstStride),
2065  [src2Stride]"r"((mips_reg)src2Stride),
2066  [ff_pw_5]"f"(ff_pw_5.f), [ff_pw_16]"f"(ff_pw_16.f)
2067  : "memory"
2068  );
2069 }
2070 
2071 static void avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src,
2072  const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
2073 {
2074  avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
2075  avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
2076  src2Stride);
2077 
2078  src += 8 * dstStride;
2079  dst += 8 * dstStride;
2080  src2 += 8 * src2Stride;
2081 
2082  avg_h264_qpel8_h_lowpass_l2_mmi(dst, src, src2, dstStride, src2Stride);
2083  avg_h264_qpel8_h_lowpass_l2_mmi(dst + 8, src + 8, src2 + 8, dstStride,
2084  src2Stride);
2085 }
2086 
2087 static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
2088  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
2089 {
2090  double ftmp[8];
2091  uint64_t tmp0;
2094 
2095  do {
2096  __asm__ volatile (
2097  "dli %[tmp0], 0x05 \n\t"
2098  MMI_ULDC1(%[ftmp0], %[src16], 0x00)
2099  "mtc1 %[tmp0], %[ftmp6] \n\t"
2100  MMI_ULDC1(%[ftmp1], %[src16], 0x08)
2101  MMI_ULDC1(%[ftmp2], %[src16], 0x30)
2102  MMI_ULDC1(%[ftmp3], %[src16], 0x38)
2103  "psrah %[ftmp0], %[ftmp0], %[ftmp6] \n\t"
2104  "psrah %[ftmp1], %[ftmp1], %[ftmp6] \n\t"
2105  "psrah %[ftmp2], %[ftmp2], %[ftmp6] \n\t"
2106  "psrah %[ftmp3], %[ftmp3], %[ftmp6] \n\t"
2107  "packushb %[ftmp0], %[ftmp0], %[ftmp1] \n\t"
2108  MMI_LDC1(%[ftmp4], %[src8], 0x00)
2109  MMI_LDXC1(%[ftmp5], %[src8], %[src8Stride], 0x00)
2110  "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t"
2111  "pavgb %[ftmp0], %[ftmp0], %[ftmp4] \n\t"
2112  "pavgb %[ftmp2], %[ftmp2], %[ftmp5] \n\t"
2113  MMI_LDC1(%[ftmp7], %[dst], 0x00)
2114  "pavgb %[ftmp0], %[ftmp0], %[ftmp7] \n\t"
2115  MMI_SDC1(%[ftmp0], %[dst], 0x00)
2116  MMI_LDXC1(%[ftmp7], %[dst], %[dstStride], 0x00)
2117  "pavgb %[ftmp2], %[ftmp2], %[ftmp7] \n\t"
2118  MMI_SDXC1(%[ftmp2], %[dst], %[dstStride], 0x00)
2119  : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]),
2120  [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]),
2121  [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]),
2122  [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]),
2125  [tmp0]"=&r"(tmp0)
2126  : [src8]"r"(src8), [src16]"r"(src16),
2127  [dst]"r"(dst),
2128  [src8Stride]"r"((mips_reg)src8Stride),
2129  [dstStride]"r"((mips_reg)dstStride)
2130  : "memory"
2131  );
2132 
2133  src8 += 2 * src8Stride;
2134  src16 += 48;
2135  dst += 2 * dstStride;
2136  } while (b -= 2);
2137 }
2138 
2139 static void avg_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16,
2140  const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
2141 {
2142  avg_pixels8_l2_shift5_mmi(dst, src16, src8, dstStride, src8Stride, b);
2143  avg_pixels8_l2_shift5_mmi(dst + 8, src16 + 8, src8 + 8, dstStride,
2144  src8Stride, b);
2145 }
2146 
2147 //DEF_H264_MC_MMI(put_, 4)
2148 void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
2149  ptrdiff_t stride)
2150 {
2152 }
2153 
2154 void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
2155  ptrdiff_t stride)
2156 {
2157  uint8_t half[16];
2160 }
2161 
2162 void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
2163  ptrdiff_t stride)
2164 {
2166 }
2167 
2168 void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
2169  ptrdiff_t stride)
2170 {
2171  uint8_t half[16];
2174 }
2175 
2176 void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
2177  ptrdiff_t stride)
2178 {
2179  uint8_t full[36];
2180  uint8_t * const full_mid= full + 8;
2181  uint8_t half[16];
2182  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2183  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2184  ff_put_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4);
2185 }
2186 
2187 void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
2188  ptrdiff_t stride)
2189 {
2190  uint8_t full[36];
2191  uint8_t * const full_mid= full + 8;
2192  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2193  put_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
2194 }
2195 
2196 void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
2197  ptrdiff_t stride)
2198 {
2199  uint8_t full[36];
2200  uint8_t * const full_mid= full + 8;
2201  uint8_t half[16];
2202  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2203  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2204  ff_put_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
2205 }
2206 
2207 void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
2208  ptrdiff_t stride)
2209 {
2210  uint8_t full[36];
2211  uint8_t * const full_mid= full + 8;
2212  uint8_t halfH[16];
2213  uint8_t halfV[16];
2215  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2216  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2217  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2218 }
2219 
2220 void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
2221  ptrdiff_t stride)
2222 {
2223  uint8_t full[36];
2224  uint8_t * const full_mid= full + 8;
2225  uint8_t halfH[16];
2226  uint8_t halfV[16];
2228  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2229  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2230  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2231 }
2232 
2233 void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
2234  ptrdiff_t stride)
2235 {
2236  uint8_t full[36];
2237  uint8_t * const full_mid= full + 8;
2238  uint8_t halfH[16];
2239  uint8_t halfV[16];
2241  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2242  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2243  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2244 }
2245 
2246 void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
2247  ptrdiff_t stride)
2248 {
2249  uint8_t full[36];
2250  uint8_t * const full_mid= full + 8;
2251  uint8_t halfH[16];
2252  uint8_t halfV[16];
2254  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2255  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2256  ff_put_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2257 }
2258 
2259 void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
2260  ptrdiff_t stride)
2261 {
2263 }
2264 
2265 void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
2266  ptrdiff_t stride)
2267 {
2268  uint8_t halfH[16];
2269  uint8_t halfHV[16];
2272  ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2273 }
2274 
2275 void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
2276  ptrdiff_t stride)
2277 {
2278  uint8_t halfH[16];
2279  uint8_t halfHV[16];
2282  ff_put_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2283 }
2284 
2285 void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
2286  ptrdiff_t stride)
2287 {
2288  uint8_t full[36];
2289  uint8_t * const full_mid= full + 8;
2290  uint8_t halfV[16];
2291  uint8_t halfHV[16];
2292  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2293  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2295  ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2296 }
2297 
2298 void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
2299  ptrdiff_t stride)
2300 {
2301  uint8_t full[36];
2302  uint8_t * const full_mid= full + 8;
2303  uint8_t halfV[16];
2304  uint8_t halfHV[16];
2305  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2306  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2308  ff_put_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2309 }
2310 
2311 //DEF_H264_MC_MMI(avg_, 4)
2312 void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src,
2313  ptrdiff_t stride)
2314 {
2316 }
2317 
2318 void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src,
2319  ptrdiff_t stride)
2320 {
2321  uint8_t half[16];
2324 }
2325 
2326 void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src,
2327  ptrdiff_t stride)
2328 {
2330 }
2331 
2332 void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src,
2333  ptrdiff_t stride)
2334 {
2335  uint8_t half[16];
2338 }
2339 
2340 void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src,
2341  ptrdiff_t stride)
2342 {
2343  uint8_t full[36];
2344  uint8_t * const full_mid= full + 8;
2345  uint8_t half[16];
2346  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2347  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2348  ff_avg_pixels4_l2_8_mmi(dst, full_mid, half, stride, 4, 4, 4);
2349 }
2350 
2351 void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src,
2352  ptrdiff_t stride)
2353 {
2354  uint8_t full[36];
2355  uint8_t * const full_mid= full + 8;
2356  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2357  avg_h264_qpel4_v_lowpass_mmi(dst, full_mid, stride, 4);
2358 }
2359 
2360 void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src,
2361  ptrdiff_t stride)
2362 {
2363  uint8_t full[36];
2364  uint8_t * const full_mid= full + 8;
2365  uint8_t half[16];
2366  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2367  put_h264_qpel4_v_lowpass_mmi(half, full_mid, 4, 4);
2368  ff_avg_pixels4_l2_8_mmi(dst, full_mid+4, half, stride, 4, 4, 4);
2369 }
2370 
2371 void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src,
2372  ptrdiff_t stride)
2373 {
2374  uint8_t full[36];
2375  uint8_t * const full_mid= full + 8;
2376  uint8_t halfH[16];
2377  uint8_t halfV[16];
2379  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2380  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2381  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2382 }
2383 
2384 void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src,
2385  ptrdiff_t stride)
2386 {
2387  uint8_t full[36];
2388  uint8_t * const full_mid= full + 8;
2389  uint8_t halfH[16];
2390  uint8_t halfV[16];
2392  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2393  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2394  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2395 }
2396 
2397 void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src,
2398  ptrdiff_t stride)
2399 {
2400  uint8_t full[36];
2401  uint8_t * const full_mid= full + 8;
2402  uint8_t halfH[16];
2403  uint8_t halfV[16];
2405  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2406  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2407  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2408 }
2409 
2410 void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src,
2411  ptrdiff_t stride)
2412 {
2413  uint8_t full[36];
2414  uint8_t * const full_mid= full + 8;
2415  uint8_t halfH[16];
2416  uint8_t halfV[16];
2418  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2419  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2420  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfV, stride, 4, 4, 4);
2421 }
2422 
2423 void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src,
2424  ptrdiff_t stride)
2425 {
2427 }
2428 
2429 void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src,
2430  ptrdiff_t stride)
2431 {
2432  uint8_t halfH[16];
2433  uint8_t halfHV[16];
2436  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2437 }
2438 
2439 void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src,
2440  ptrdiff_t stride)
2441 {
2442  uint8_t halfH[16];
2443  uint8_t halfHV[16];
2446  ff_avg_pixels4_l2_8_mmi(dst, halfH, halfHV, stride, 4, 4, 4);
2447 }
2448 
2449 void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src,
2450  ptrdiff_t stride)
2451 {
2452  uint8_t full[36];
2453  uint8_t * const full_mid= full + 8;
2454  uint8_t halfV[16];
2455  uint8_t halfHV[16];
2456  copy_block4_mmi(full, src - stride*2, 4, stride, 9);
2457  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2459  ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2460 }
2461 
2462 void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src,
2463  ptrdiff_t stride)
2464 {
2465  uint8_t full[36];
2466  uint8_t * const full_mid= full + 8;
2467  uint8_t halfV[16];
2468  uint8_t halfHV[16];
2469  copy_block4_mmi(full, src - stride*2 + 1, 4, stride, 9);
2470  put_h264_qpel4_v_lowpass_mmi(halfV, full_mid, 4, 4);
2472  ff_avg_pixels4_l2_8_mmi(dst, halfV, halfHV, stride, 4, 4, 4);
2473 }
2474 
2475 //DEF_H264_MC_MMI(put_, 8)
2476 void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
2477  ptrdiff_t stride)
2478 {
2480 }
2481 
2482 void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
2483  ptrdiff_t stride)
2484 {
2485  uint8_t half[64];
2488 }
2489 
2490 void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
2491  ptrdiff_t stride)
2492 {
2494 }
2495 
2496 void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
2497  ptrdiff_t stride)
2498 {
2499  uint8_t half[64];
2502 }
2503 
2504 void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
2505  ptrdiff_t stride)
2506 {
2507  uint8_t full[104];
2508  uint8_t * const full_mid= full + 16;
2509  uint8_t half[64];
2510  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2511  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2512  ff_put_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8);
2513 }
2514 
2515 void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
2516  ptrdiff_t stride)
2517 {
2518  uint8_t full[104];
2519  uint8_t * const full_mid= full + 16;
2520  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2521  put_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2522 }
2523 
2524 void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
2525  ptrdiff_t stride)
2526 {
2527  uint8_t full[104];
2528  uint8_t * const full_mid= full + 16;
2529  uint8_t half[64];
2530  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2531  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2532  ff_put_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2533 }
2534 
2535 void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
2536  ptrdiff_t stride)
2537 {
2538  uint8_t full[104];
2539  uint8_t * const full_mid= full + 16;
2540  uint8_t halfH[64];
2541  uint8_t halfV[64];
2543  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2544  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2545  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2546 }
2547 
2548 void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
2549  ptrdiff_t stride)
2550 {
2551  uint8_t full[104];
2552  uint8_t * const full_mid= full + 16;
2553  uint8_t halfH[64];
2554  uint8_t halfV[64];
2556  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2557  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2558  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2559 }
2560 
2561 void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
2562  ptrdiff_t stride)
2563 {
2564  uint8_t full[104];
2565  uint8_t * const full_mid= full + 16;
2566  uint8_t halfH[64];
2567  uint8_t halfV[64];
2569  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2570  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2571  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2572 }
2573 
2574 void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
2575  ptrdiff_t stride)
2576 {
2577  uint8_t full[104];
2578  uint8_t * const full_mid= full + 16;
2579  uint8_t halfH[64];
2580  uint8_t halfV[64];
2582  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2583  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2584  ff_put_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2585 }
2586 
2587 void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
2588  ptrdiff_t stride)
2589 {
2590  uint16_t __attribute__ ((aligned(8))) temp[192];
2591 
2593 }
2594 
2595 void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
2596  ptrdiff_t stride)
2597 {
2598  uint8_t __attribute__ ((aligned(8))) temp[448];
2599  uint8_t *const halfHV = temp;
2600  int16_t *const halfV = (int16_t *) (temp + 64);
2601 
2602  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2604 }
2605 
2606 void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
2607  ptrdiff_t stride)
2608 {
2609  uint8_t __attribute__ ((aligned(8))) temp[448];
2610  uint8_t *const halfHV = temp;
2611  int16_t *const halfV = (int16_t *) (temp + 64);
2612 
2613  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2615 }
2616 
2617 void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
2618  ptrdiff_t stride)
2619 {
2620  uint8_t __attribute__ ((aligned(8))) temp[448];
2621  uint8_t *const halfHV = temp;
2622  int16_t *const halfV = (int16_t *) (temp + 64);
2623 
2624  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2625  put_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8);
2626 }
2627 
2628 void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
2629  ptrdiff_t stride)
2630 {
2631  uint8_t __attribute__ ((aligned(8))) temp[448];
2632  uint8_t *const halfHV = temp;
2633  int16_t *const halfV = (int16_t *) (temp + 64);
2634 
2635  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2636  put_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8);
2637 }
2638 
2639 //DEF_H264_MC_MMI(avg_, 8)
2640 void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src,
2641  ptrdiff_t stride)
2642 {
2644 }
2645 
2646 void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src,
2647  ptrdiff_t stride)
2648 {
2649  uint8_t half[64];
2652 }
2653 
2654 void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src,
2655  ptrdiff_t stride)
2656 {
2658 }
2659 
2660 void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src,
2661  ptrdiff_t stride)
2662 {
2663  uint8_t half[64];
2666 }
2667 
2668 void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src,
2669  ptrdiff_t stride)
2670 {
2671  uint8_t full[104];
2672  uint8_t * const full_mid= full + 16;
2673  uint8_t half[64];
2674  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2675  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2676  ff_avg_pixels8_l2_8_mmi(dst, full_mid, half, stride, 8, 8, 8);
2677 }
2678 
2679 void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src,
2680  ptrdiff_t stride)
2681 {
2682  uint8_t full[104];
2683  uint8_t * const full_mid= full + 16;
2684  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2685  avg_h264_qpel8_v_lowpass_mmi(dst, full_mid, stride, 8);
2686 }
2687 
2688 void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src,
2689  ptrdiff_t stride)
2690 {
2691  uint8_t full[104];
2692  uint8_t * const full_mid= full + 16;
2693  uint8_t half[64];
2694  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2695  put_h264_qpel8_v_lowpass_mmi(half, full_mid, 8, 8);
2696  ff_avg_pixels8_l2_8_mmi(dst, full_mid+8, half, stride, 8, 8, 8);
2697 }
2698 
2699 void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src,
2700  ptrdiff_t stride)
2701 {
2702  uint8_t full[104];
2703  uint8_t * const full_mid= full + 16;
2704  uint8_t halfH[64];
2705  uint8_t halfV[64];
2707  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2708  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2709  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2710 }
2711 
2712 void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src,
2713  ptrdiff_t stride)
2714 {
2715  uint8_t full[104];
2716  uint8_t * const full_mid= full + 16;
2717  uint8_t halfH[64];
2718  uint8_t halfV[64];
2720  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2721  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2722  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2723 }
2724 
2725 void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src,
2726  ptrdiff_t stride)
2727 {
2728  uint8_t full[104];
2729  uint8_t * const full_mid= full + 16;
2730  uint8_t halfH[64];
2731  uint8_t halfV[64];
2733  copy_block8_mmi(full, src - stride*2, 8, stride, 13);
2734  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2735  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2736 }
2737 
2738 void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src,
2739  ptrdiff_t stride)
2740 {
2741  uint8_t full[104];
2742  uint8_t * const full_mid= full + 16;
2743  uint8_t halfH[64];
2744  uint8_t halfV[64];
2746  copy_block8_mmi(full, src - stride*2 + 1, 8, stride, 13);
2747  put_h264_qpel8_v_lowpass_mmi(halfV, full_mid, 8, 8);
2748  ff_avg_pixels8_l2_8_mmi(dst, halfH, halfV, stride, 8, 8, 8);
2749 }
2750 
2751 void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src,
2752  ptrdiff_t stride)
2753 {
2754  uint16_t __attribute__ ((aligned(8))) temp[192];
2755 
2757 }
2758 
2759 void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src,
2760  ptrdiff_t stride)
2761 {
2762  uint8_t __attribute__ ((aligned(8))) temp[448];
2763  uint8_t *const halfHV = temp;
2764  int16_t *const halfV = (int16_t *) (temp + 64);
2765 
2766  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2768 }
2769 
2770 void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src,
2771  ptrdiff_t stride)
2772 {
2773  uint8_t __attribute__ ((aligned(8))) temp[448];
2774  uint8_t *const halfHV = temp;
2775  int16_t *const halfV = (int16_t *) (temp + 64);
2776 
2777  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2779 }
2780 
2781 void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src,
2782  ptrdiff_t stride)
2783 {
2784  uint8_t __attribute__ ((aligned(8))) temp[448];
2785  uint8_t *const halfHV = temp;
2786  int16_t *const halfV = (int16_t *) (temp + 64);
2787 
2788  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2789  avg_pixels8_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 8, 8);
2790 }
2791 
2792 void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src,
2793  ptrdiff_t stride)
2794 {
2795  uint8_t __attribute__ ((aligned(8))) temp[448];
2796  uint8_t *const halfHV = temp;
2797  int16_t *const halfV = (int16_t *) (temp + 64);
2798 
2799  put_h264_qpel8_hv_lowpass_mmi(halfHV, halfV, src, 8, 8, stride);
2800  avg_pixels8_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 8, 8);
2801 }
2802 
2803 //DEF_H264_MC_MMI(put_, 16)
2804 void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
2805  ptrdiff_t stride)
2806 {
2808 }
2809 
2810 void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
2811  ptrdiff_t stride)
2812 {
2813  uint8_t half[256];
2816 }
2817 
2818 void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
2819  ptrdiff_t stride)
2820 {
2822 }
2823 
2824 void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
2825  ptrdiff_t stride)
2826 {
2827  uint8_t half[256];
2830 }
2831 
2832 void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
2833  ptrdiff_t stride)
2834 {
2835  uint8_t full[336];
2836  uint8_t * const full_mid= full + 32;
2837  uint8_t half[256];
2838  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2839  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2840  ff_put_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16);
2841 }
2842 
2843 void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
2844  ptrdiff_t stride)
2845 {
2846  uint8_t full[336];
2847  uint8_t * const full_mid= full + 32;
2848  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2849  put_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
2850 }
2851 
2852 void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
2853  ptrdiff_t stride)
2854 {
2855  uint8_t full[336];
2856  uint8_t * const full_mid= full + 32;
2857  uint8_t half[256];
2858  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2859  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
2860  ff_put_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
2861 }
2862 
2863 void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
2864  ptrdiff_t stride)
2865 {
2866  uint8_t full[336];
2867  uint8_t * const full_mid= full + 32;
2868  uint8_t halfH[256];
2869  uint8_t halfV[256];
2871  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2872  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2873  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2874 }
2875 
2876 void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
2877  ptrdiff_t stride)
2878 {
2879  uint8_t full[336];
2880  uint8_t * const full_mid= full + 32;
2881  uint8_t halfH[256];
2882  uint8_t halfV[256];
2884  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2885  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2886  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2887 }
2888 
2889 void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
2890  ptrdiff_t stride)
2891 {
2892  uint8_t full[336];
2893  uint8_t * const full_mid= full + 32;
2894  uint8_t halfH[256];
2895  uint8_t halfV[256];
2897  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
2898  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2899  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2900 }
2901 
2902 void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
2903  ptrdiff_t stride)
2904 {
2905  uint8_t full[336];
2906  uint8_t * const full_mid= full + 32;
2907  uint8_t halfH[256];
2908  uint8_t halfV[256];
2910  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
2911  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
2912  ff_put_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
2913 }
2914 
2915 void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
2916  ptrdiff_t stride)
2917 {
2918  uint16_t __attribute__ ((aligned(8))) temp[384];
2919 
2921 }
2922 
2923 void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
2924  ptrdiff_t stride)
2925 {
2926  uint8_t __attribute__ ((aligned(8))) temp[1024];
2927  uint8_t *const halfHV = temp;
2928  int16_t *const halfV = (int16_t *) (temp + 256);
2929 
2930  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2932 }
2933 
2934 void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
2935  ptrdiff_t stride)
2936 {
2937  uint8_t __attribute__ ((aligned(8))) temp[1024];
2938  uint8_t *const halfHV = temp;
2939  int16_t *const halfV = (int16_t *) (temp + 256);
2940 
2941  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2943 }
2944 
2945 void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
2946  ptrdiff_t stride)
2947 {
2948  uint8_t __attribute__ ((aligned(8))) temp[1024];
2949  uint8_t *const halfHV = temp;
2950  int16_t *const halfV = (int16_t *) (temp + 256);
2951 
2952  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2953  put_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16);
2954 }
2955 
2956 void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
2957  ptrdiff_t stride)
2958 {
2959  uint8_t __attribute__ ((aligned(8))) temp[1024];
2960  uint8_t *const halfHV = temp;
2961  int16_t *const halfV = (int16_t *) (temp + 256);
2962 
2963  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
2964  put_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16);
2965 }
2966 
2967 //DEF_H264_MC_MMI(avg_, 16)
2968 void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src,
2969  ptrdiff_t stride)
2970 {
2972 }
2973 
2974 void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src,
2975  ptrdiff_t stride)
2976 {
2977  uint8_t half[256];
2980 }
2981 
2982 void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src,
2983  ptrdiff_t stride)
2984 {
2986 }
2987 
2988 void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src,
2989  ptrdiff_t stride)
2990 {
2991  uint8_t half[256];
2994 }
2995 
2996 void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src,
2997  ptrdiff_t stride)
2998 {
2999  uint8_t full[336];
3000  uint8_t * const full_mid= full + 32;
3001  uint8_t half[256];
3002  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3003  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
3004  ff_avg_pixels16_l2_8_mmi(dst, full_mid, half, stride, 16, 16, 16);
3005 }
3006 
3007 void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src,
3008  ptrdiff_t stride)
3009 {
3010  uint8_t full[336];
3011  uint8_t * const full_mid= full + 32;
3012  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3013  avg_h264_qpel16_v_lowpass_mmi(dst, full_mid, stride, 16);
3014 }
3015 
3016 void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src,
3017  ptrdiff_t stride)
3018 {
3019  uint8_t full[336];
3020  uint8_t * const full_mid= full + 32;
3021  uint8_t half[256];
3022  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3023  put_h264_qpel16_v_lowpass_mmi(half, full_mid, 16, 16);
3024  ff_avg_pixels16_l2_8_mmi(dst, full_mid+16, half, stride, 16, 16, 16);
3025 }
3026 
3027 void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src,
3028  ptrdiff_t stride)
3029 {
3030  uint8_t full[336];
3031  uint8_t * const full_mid= full + 32;
3032  uint8_t halfH[256];
3033  uint8_t halfV[256];
3035  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3036  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3037  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3038 }
3039 
3040 void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src,
3041  ptrdiff_t stride)
3042 {
3043  uint8_t full[336];
3044  uint8_t * const full_mid= full + 32;
3045  uint8_t halfH[256];
3046  uint8_t halfV[256];
3048  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
3049  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3050  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3051 }
3052 
3053 void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src,
3054  ptrdiff_t stride)
3055 {
3056  uint8_t full[336];
3057  uint8_t * const full_mid= full + 32;
3058  uint8_t halfH[256];
3059  uint8_t halfV[256];
3061  copy_block16_mmi(full, src - stride*2, 16, stride, 21);
3062  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3063  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3064 }
3065 
3066 void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src,
3067  ptrdiff_t stride)
3068 {
3069  uint8_t full[336];
3070  uint8_t * const full_mid= full + 32;
3071  uint8_t halfH[256];
3072  uint8_t halfV[256];
3074  copy_block16_mmi(full, src - stride*2 + 1, 16, stride, 21);
3075  put_h264_qpel16_v_lowpass_mmi(halfV, full_mid, 16, 16);
3076  ff_avg_pixels16_l2_8_mmi(dst, halfH, halfV, stride, 16, 16, 16);
3077 }
3078 
3079 void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src,
3080  ptrdiff_t stride)
3081 {
3082  uint16_t __attribute__ ((aligned(8))) temp[384];
3083 
3085 }
3086 
3087 void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src,
3088  ptrdiff_t stride)
3089 {
3090  uint8_t __attribute__ ((aligned(8))) temp[1024];
3091  uint8_t *const halfHV = temp;
3092  int16_t *const halfV = (int16_t *) (temp + 256);
3093 
3094  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3096 }
3097 
3098 void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src,
3099  ptrdiff_t stride)
3100 {
3101  uint8_t __attribute__ ((aligned(8))) temp[1024];
3102  uint8_t *const halfHV = temp;
3103  int16_t *const halfV = (int16_t *) (temp + 256);
3104 
3105  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3107 }
3108 
3109 void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src,
3110  ptrdiff_t stride)
3111 {
3112  uint8_t __attribute__ ((aligned(8))) temp[1024];
3113  uint8_t *const halfHV = temp;
3114  int16_t *const halfV = (int16_t *) (temp + 256);
3115 
3116  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3117  avg_pixels16_l2_shift5_mmi(dst, halfV + 2, halfHV, stride, 16, 16);
3118 }
3119 
3120 void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src,
3121  ptrdiff_t stride)
3122 {
3123  uint8_t __attribute__ ((aligned(8))) temp[1024];
3124  uint8_t *const halfHV = temp;
3125  int16_t *const halfV = (int16_t *) (temp + 256);
3126 
3127  put_h264_qpel16_hv_lowpass_mmi(halfHV, halfV, src, 16, 16, stride);
3128  avg_pixels16_l2_shift5_mmi(dst, halfV + 3, halfHV, stride, 16, 16);
3129 }
3130 
3131 #undef op2_avg
3132 #undef op2_put
op2_avg
#define op2_avg(a, b)
Definition: h264qpel_mmi.c:107
ff_put_h264_qpel8_mc13_mmi
void ff_put_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2561
ff_put_h264_qpel16_mc03_mmi
void ff_put_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2852
ff_put_h264_qpel4_mc23_mmi
void ff_put_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2275
ff_avg_h264_qpel8_mc22_mmi
void ff_avg_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2751
ff_avg_h264_qpel4_mc02_mmi
void ff_avg_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2351
ff_put_pixels4_l2_8_mmi
void ff_put_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:303
put_h264_qpel4_v_lowpass_mmi
static void put_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:385
avg_h264_qpel16_hv_lowpass_mmi
static void avg_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
Definition: h264qpel_mmi.c:1985
ff_avg_h264_qpel8_mc11_mmi
void ff_avg_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2699
ff_avg_h264_qpel8_mc31_mmi
void ff_avg_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2712
ff_avg_h264_qpel8_mc10_mmi
void ff_avg_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2646
put_h264_qpel8or16_hv1_lowpass_mmi
static void put_h264_qpel8or16_hv1_lowpass_mmi(int16_t *tmp, const uint8_t *src, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
Definition: h264qpel_mmi.c:1333
ff_avg_h264_qpel8_mc30_mmi
void ff_avg_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2660
ff_avg_h264_qpel16_mc11_mmi
void ff_avg_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3027
ff_put_h264_qpel4_mc33_mmi
void ff_put_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2246
put_h264_qpel8_h_lowpass_l2_mmi
static void put_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
Definition: h264qpel_mmi.c:1673
ff_put_h264_qpel4_mc20_mmi
void ff_put_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2162
ff_put_h264_qpel16_mc23_mmi
void ff_put_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2934
DECLARE_VAR_LOW32
#define DECLARE_VAR_LOW32
Definition: mmiutils.h:37
avg_h264_qpel8_h_lowpass_mmi
static void avg_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:302
tmp
static uint8_t tmp[11]
Definition: aes_ctr.c:28
w
uint8_t w
Definition: llviddspenc.c:38
ff_put_h264_qpel8_mc00_mmi
void ff_put_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2476
avg_h264_qpel8or16_hv_lowpass_mmi
static void avg_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
Definition: h264qpel_mmi.c:1969
ff_put_h264_qpel16_mc12_mmi
void ff_put_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2945
b
#define b
Definition: input.c:41
half
static uint8_t half(int a, int b)
Definition: mobiclip.c:539
ff_avg_h264_qpel4_mc11_mmi
void ff_avg_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2371
ff_put_h264_qpel4_mc13_mmi
void ff_put_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2233
mips_reg
#define mips_reg
Definition: asmdefs.h:46
ff_put_h264_qpel8_mc02_mmi
void ff_put_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2515
ff_avg_h264_qpel16_mc31_mmi
void ff_avg_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3040
ff_avg_pixels8_8_mmi
void ff_avg_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
ff_avg_h264_qpel4_mc10_mmi
void ff_avg_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2318
ff_put_h264_qpel4_mc32_mmi
void ff_put_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2298
avg_h264_qpel16_h_lowpass_l2_mmi
static void avg_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
Definition: h264qpel_mmi.c:2071
PTR_ADDI
#define PTR_ADDI
Definition: asmdefs.h:51
put_h264_qpel8_hv_lowpass_mmi
static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
Definition: h264qpel_mmi.c:1657
ff_put_h264_qpel8_mc33_mmi
void ff_put_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2574
ff_avg_pixels4_l2_8_mmi
void ff_avg_pixels4_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:487
ff_put_h264_qpel4_mc02_mmi
void ff_put_h264_qpel4_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2187
ff_put_h264_qpel4_mc12_mmi
void ff_put_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2285
ff_avg_h264_qpel16_mc32_mmi
void ff_avg_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3120
ff_put_h264_qpel16_mc31_mmi
void ff_put_h264_qpel16_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2876
ff_avg_h264_qpel16_mc21_mmi
void ff_avg_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3087
ff_avg_h264_qpel8_mc33_mmi
void ff_avg_h264_qpel8_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2738
put_h264_qpel4_h_lowpass_mmi
static void put_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:109
avg_h264_qpel16_h_lowpass_mmi
static void avg_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:374
mmiutils.h
ff_avg_h264_qpel4_mc00_mmi
void ff_avg_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2312
ff_put_h264_qpel16_mc01_mmi
void ff_put_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2832
ff_avg_h264_qpel4_mc32_mmi
void ff_avg_h264_qpel4_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2462
ff_put_h264_qpel8_mc31_mmi
void ff_put_h264_qpel8_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2548
aligned
static int aligned(int val)
Definition: dashdec.c:171
ff_put_pixels8_8_mmi
void ff_put_pixels8_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
ff_avg_h264_qpel16_mc10_mmi
void ff_avg_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2974
avg_h264_qpel8_h_lowpass_l2_mmi
static void avg_h264_qpel8_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
Definition: h264qpel_mmi.c:1993
ff_avg_h264_qpel4_mc12_mmi
void ff_avg_h264_qpel4_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2449
ff_avg_h264_qpel8_mc32_mmi
void ff_avg_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2792
ff_avg_h264_qpel16_mc23_mmi
void ff_avg_h264_qpel16_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3098
ff_pw_20
const union av_intfloat64 ff_pw_20
Definition: constants.c:39
put_h264_qpel8or16_hv2_lowpass_mmi
static void put_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
Definition: h264qpel_mmi.c:1578
ff_avg_h264_qpel4_mc23_mmi
void ff_avg_h264_qpel4_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2439
put_h264_qpel8or16_hv_lowpass_mmi
static void put_h264_qpel8or16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride, int size)
Definition: h264qpel_mmi.c:1649
ff_put_h264_qpel4_mc30_mmi
void ff_put_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2168
h264dsp_mips.h
ff_put_h264_qpel4_mc01_mmi
void ff_put_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2176
ff_avg_h264_qpel16_mc12_mmi
void ff_avg_h264_qpel16_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3109
ff_put_h264_qpel4_mc11_mmi
void ff_put_h264_qpel4_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2207
avg_h264_qpel8_v_lowpass_mmi
static void avg_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:917
put_h264_qpel4_hv_lowpass_mmi
static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:1258
ff_avg_h264_qpel8_mc13_mmi
void ff_avg_h264_qpel8_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2725
ff_avg_h264_qpel4_mc30_mmi
void ff_avg_h264_qpel4_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2332
put_h264_qpel16_hv_lowpass_mmi
static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
Definition: h264qpel_mmi.c:1665
bit_depth_template.c
avg_h264_qpel8or16_hv2_lowpass_mmi
static void avg_h264_qpel8or16_hv2_lowpass_mmi(uint8_t *dst, int16_t *tmp, ptrdiff_t dstStride, ptrdiff_t tmpStride, int size)
Definition: h264qpel_mmi.c:1896
ff_avg_h264_qpel16_mc03_mmi
void ff_avg_h264_qpel16_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3016
copy_block16_mmi
static void copy_block16_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:77
put_h264_qpel16_h_lowpass_mmi
static void put_h264_qpel16_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:234
ff_put_h264_qpel8_mc12_mmi
void ff_put_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2617
ff_avg_h264_qpel4_mc31_mmi
void ff_avg_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2384
put_h264_qpel16_v_lowpass_mmi
static void put_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:792
ff_avg_h264_qpel8_mc01_mmi
void ff_avg_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2668
ff_avg_h264_qpel4_mc01_mmi
void ff_avg_h264_qpel4_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2340
ff_put_h264_qpel16_mc00_mmi
void ff_put_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2804
ff_avg_h264_qpel16_mc20_mmi
void ff_avg_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2982
ff_avg_h264_qpel8_mc12_mmi
void ff_avg_h264_qpel8_mc12_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2781
ff_put_h264_qpel8_mc11_mmi
void ff_put_h264_qpel8_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2535
hpeldsp_mips.h
ff_avg_pixels16_l2_8_mmi
void ff_avg_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:605
dst
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
Definition: dsp.h:83
ff_put_h264_qpel8_mc32_mmi
void ff_put_h264_qpel8_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2628
avg_pixels8_l2_shift5_mmi
static void avg_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
Definition: h264qpel_mmi.c:2087
avg_h264_qpel16_v_lowpass_mmi
static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:1247
size
int size
Definition: twinvq_data.h:10344
ff_avg_h264_qpel16_mc13_mmi
void ff_avg_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3053
avg_h264_qpel4_h_lowpass_mmi
static void avg_h264_qpel4_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:245
ff_put_h264_qpel4_mc10_mmi
void ff_put_h264_qpel4_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2154
ff_avg_h264_qpel8_mc03_mmi
void ff_avg_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2688
DECLARE_VAR_ALL64
#define DECLARE_VAR_ALL64
Definition: mmiutils.h:39
ff_put_h264_qpel4_mc03_mmi
void ff_put_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2196
ff_put_h264_qpel16_mc11_mmi
void ff_put_h264_qpel16_mc11_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2863
put_pixels8_l2_shift5_mmi
static void put_pixels8_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
Definition: h264qpel_mmi.c:1749
put_h264_qpel8_v_lowpass_mmi
static void put_h264_qpel8_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:494
ff_avg_h264_qpel16_mc22_mmi
void ff_avg_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3079
avg_h264_qpel8_hv_lowpass_mmi
static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, int16_t *tmp, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t tmpStride, ptrdiff_t srcStride)
Definition: h264qpel_mmi.c:1977
ff_avg_h264_qpel16_mc33_mmi
void ff_avg_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3066
ff_avg_h264_qpel8_mc02_mmi
void ff_avg_h264_qpel8_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2679
ff_put_h264_qpel8_mc10_mmi
void ff_put_h264_qpel8_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2482
ff_avg_h264_qpel4_mc20_mmi
void ff_avg_h264_qpel4_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2326
ff_put_h264_qpel8_mc01_mmi
void ff_put_h264_qpel8_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2504
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
src2
const pixel * src2
Definition: h264pred_template.c:421
avg_pixels16_l2_shift5_mmi
static void avg_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int b)
Definition: h264qpel_mmi.c:2139
ff_avg_h264_qpel16_mc00_mmi
void ff_avg_h264_qpel16_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2968
ff_put_h264_qpel16_mc33_mmi
void ff_put_h264_qpel16_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2902
put_pixels16_l2_shift5_mmi
static void put_pixels16_l2_shift5_mmi(uint8_t *dst, int16_t *src16, const uint8_t *src8, ptrdiff_t dstStride, ptrdiff_t src8Stride, int h)
Definition: h264qpel_mmi.c:1813
avg_h264_qpel4_v_lowpass_mmi
static void avg_h264_qpel4_v_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:803
ff_avg_h264_qpel4_mc22_mmi
void ff_avg_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2423
put_h264_qpel8_h_lowpass_mmi
static void put_h264_qpel8_h_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:164
copy_block8_mmi
static void copy_block8_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:53
av_intfloat64::f
double f
Definition: intfloat.h:34
ff_put_h264_qpel8_mc22_mmi
void ff_put_h264_qpel8_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2587
stride
#define stride
Definition: h264pred_template.c:536
ff_avg_pixels8_l2_8_mmi
void ff_avg_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:533
ff_avg_h264_qpel16_mc02_mmi
void ff_avg_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:3007
copy_block4_mmi
static void copy_block4_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
Definition: h264qpel_mmi.c:29
avg_h264_qpel4_hv_lowpass_mmi
static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride)
Definition: h264qpel_mmi.c:1821
ff_avg_pixels16_8_mmi
void ff_avg_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
__asm__
__asm__(".macro parse_r var r\n\t" "\\var = -1\n\t" _IFC_REG(0) _IFC_REG(1) _IFC_REG(2) _IFC_REG(3) _IFC_REG(4) _IFC_REG(5) _IFC_REG(6) _IFC_REG(7) _IFC_REG(8) _IFC_REG(9) _IFC_REG(10) _IFC_REG(11) _IFC_REG(12) _IFC_REG(13) _IFC_REG(14) _IFC_REG(15) _IFC_REG(16) _IFC_REG(17) _IFC_REG(18) _IFC_REG(19) _IFC_REG(20) _IFC_REG(21) _IFC_REG(22) _IFC_REG(23) _IFC_REG(24) _IFC_REG(25) _IFC_REG(26) _IFC_REG(27) _IFC_REG(28) _IFC_REG(29) _IFC_REG(30) _IFC_REG(31) ".iflt \\var\n\t" ".error \"Unable to parse register name \\r\"\n\t" ".endif\n\t" ".endm")
ff_put_h264_qpel4_mc00_mmi
void ff_put_h264_qpel4_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2148
PTR_ADDU
#define PTR_ADDU
Definition: asmdefs.h:49
put_h264_qpel16_h_lowpass_l2_mmi
static void put_h264_qpel16_h_lowpass_l2_mmi(uint8_t *dst, const uint8_t *src, const uint8_t *src2, ptrdiff_t dstStride, ptrdiff_t src2Stride)
Definition: h264qpel_mmi.c:1797
ff_put_h264_qpel4_mc22_mmi
void ff_put_h264_qpel4_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2259
ff_avg_h264_qpel16_mc01_mmi
void ff_avg_h264_qpel16_mc01_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2996
ff_put_h264_qpel8_mc21_mmi
void ff_put_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2595
RESTRICT_ASM_LOW32
#define RESTRICT_ASM_LOW32
Definition: mmiutils.h:38
DECLARE_VAR_ADDRT
#define DECLARE_VAR_ADDRT
Definition: mmiutils.h:41
ff_put_h264_qpel8_mc30_mmi
void ff_put_h264_qpel8_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2496
ff_put_pixels4_8_mmi
void ff_put_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
ff_avg_h264_qpel8_mc21_mmi
void ff_avg_h264_qpel8_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2759
ff_put_h264_qpel16_mc13_mmi
void ff_put_h264_qpel16_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2889
ff_put_h264_qpel4_mc21_mmi
void ff_put_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2265
ff_avg_h264_qpel4_mc03_mmi
void ff_avg_h264_qpel4_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2360
ff_avg_h264_qpel16_mc30_mmi
void ff_avg_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2988
op2_put
#define op2_put(a, b)
Definition: h264qpel_mmi.c:108
temp
else temp
Definition: vf_mcdeint.c:263
ff_put_h264_qpel8_mc03_mmi
void ff_put_h264_qpel8_mc03_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2524
PTR_ADDIU
#define PTR_ADDIU
Definition: asmdefs.h:50
ff_put_h264_qpel8_mc20_mmi
void ff_put_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2490
ff_put_h264_qpel8_mc23_mmi
void ff_put_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2606
ff_avg_h264_qpel8_mc20_mmi
void ff_avg_h264_qpel8_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2654
ff_put_h264_qpel16_mc10_mmi
void ff_put_h264_qpel16_mc10_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2810
ff_put_h264_qpel16_mc30_mmi
void ff_put_h264_qpel16_mc30_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2824
ff_avg_h264_qpel4_mc33_mmi
void ff_avg_h264_qpel4_mc33_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2410
ff_put_h264_qpel16_mc21_mmi
void ff_put_h264_qpel16_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2923
ff_avg_h264_qpel4_mc21_mmi
void ff_avg_h264_qpel4_mc21_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2429
ff_put_pixels16_8_mmi
void ff_put_pixels16_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
ff_put_h264_qpel16_mc22_mmi
void ff_put_h264_qpel16_mc22_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2915
ff_put_h264_qpel16_mc20_mmi
void ff_put_h264_qpel16_mc20_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2818
h
h
Definition: vp9dsp_template.c:2070
ff_avg_h264_qpel4_mc13_mmi
void ff_avg_h264_qpel4_mc13_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2397
ff_put_h264_qpel4_mc31_mmi
void ff_put_h264_qpel4_mc31_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2220
ff_pw_5
const union av_intfloat64 ff_pw_5
Definition: constants.c:29
RESTRICT_ASM_ADDRT
#define RESTRICT_ASM_ADDRT
Definition: mmiutils.h:42
RESTRICT_ASM_ALL64
#define RESTRICT_ASM_ALL64
Definition: mmiutils.h:40
ff_avg_h264_qpel8_mc23_mmi
void ff_avg_h264_qpel8_mc23_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2770
ff_put_h264_qpel16_mc02_mmi
void ff_put_h264_qpel16_mc02_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2843
ff_put_h264_qpel16_mc32_mmi
void ff_put_h264_qpel16_mc32_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2956
src
#define src
Definition: vp8dsp.c:248
ff_pw_16
const union av_intfloat64 ff_pw_16
Definition: constants.c:36
ff_put_pixels16_l2_8_mmi
void ff_put_pixels16_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:408
ff_avg_pixels4_8_mmi
void ff_avg_pixels4_8_mmi(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int32_t h)
ff_avg_h264_qpel8_mc00_mmi
void ff_avg_h264_qpel8_mc00_mmi(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: h264qpel_mmi.c:2640
ff_put_pixels8_l2_8_mmi
void ff_put_pixels8_l2_8_mmi(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h)
Definition: hpeldsp_mmi.c:347