FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
h264dsp_mmi.c
Go to the documentation of this file.
1 /*
2  * Loongson SIMD optimized h264dsp
3  *
4  * Copyright (c) 2015 Loongson Technology Corporation Limited
5  * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
6  * Zhang Shuangshuang <zhangshuangshuang@ict.ac.cn>
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
26 #include "h264dsp_mips.h"
27 
29  int height, int log2_denom, int weight, int offset)
30 {
31  int y;
32 
33  offset <<= log2_denom;
34 
35  if (log2_denom)
36  offset += 1 << (log2_denom - 1);
37 
38  for (y=0; y<height; y++, block+=stride) {
39  __asm__ volatile (
40  "ldc1 $f2, %0 \r\n"
41  "ldc1 $f4, %1 \r\n"
42  "dmtc1 $0, $f20 \r\n"
43  "mtc1 %2, $f6 \r\n"
44  "mtc1 %3, $f8 \r\n"
45  "mtc1 %4, $f10 \r\n"
46  "pshufh $f6, $f6, $f20 \r\n"
47  "pshufh $f8, $f8, $f20 \r\n"
48  "punpckhbh $f14, $f2, $f20 \r\n"
49  "punpckhbh $f16, $f4, $f20 \r\n"
50  "punpcklbh $f2, $f2, $f20 \r\n"
51  "punpcklbh $f4, $f4, $f20 \r\n"
52  "pmullh $f14, $f14, $f6 \r\n"
53  "pmullh $f16, $f16, $f6 \r\n"
54  "pmullh $f2, $f2, $f6 \r\n"
55  "pmullh $f4, $f4, $f6 \r\n"
56  "paddsh $f14, $f14, $f8 \r\n"
57  "paddsh $f16, $f16, $f8 \r\n"
58  "paddsh $f2, $f2, $f8 \r\n"
59  "paddsh $f4, $f4, $f8 \r\n"
60  "psrah $f14, $f14, $f10 \r\n"
61  "psrah $f16, $f16, $f10 \r\n"
62  "psrah $f2, $f2, $f10 \r\n"
63  "psrah $f4, $f4, $f10 \r\n"
64  "packushb $f2, $f2, $f14 \r\n"
65  "packushb $f4, $f4, $f16 \r\n"
66  "sdc1 $f2, %0 \r\n"
67  "sdc1 $f4, %1 \r\n"
68  : "=m"(*block),"=m"(*(block + 8))
69  : "r"(weight),"r"(offset),"r"(log2_denom)
70  );
71  }
72 }
73 
75  int stride, int height, int log2_denom, int weightd, int weights,
76  int offset)
77 {
78  int y;
79 
80  offset = ((offset + 1) | 1) << log2_denom;
81 
82  for (y=0; y<height; y++, dst+=stride, src+=stride) {
83  __asm__ volatile (
84  "ldc1 $f2, %2 \r\n"
85  "ldc1 $f4, %3 \r\n"
86  "dmtc1 $0, $f20 \r\n"
87  "mtc1 %6, $f6 \r\n"
88  "mtc1 %7, $f8 \r\n"
89  "mtc1 %8, $f10 \r\n"
90  "mtc1 %9, $f12 \r\n"
91  "pshufh $f6, $f6, $f20 \r\n"
92  "pshufh $f8, $f8, $f20 \r\n"
93  "pshufh $f10, $f10, $f20 \r\n"
94  "punpckhbh $f14, $f2, $f20 \r\n"
95  "punpckhbh $f16, $f4, $f20 \r\n"
96  "punpcklbh $f2, $f2, $f20 \r\n"
97  "punpcklbh $f4, $f4, $f20 \r\n"
98  "pmullh $f14, $f14, $f6 \r\n"
99  "pmullh $f16, $f16, $f8 \r\n"
100  "pmullh $f2, $f2, $f6 \r\n"
101  "pmullh $f4, $f4, $f8 \r\n"
102  "paddsh $f14, $f14, $f10 \r\n"
103  "paddsh $f2, $f2, $f10 \r\n"
104  "paddsh $f14, $f14, $f16 \r\n"
105  "paddsh $f2, $f2, $f4 \r\n"
106  "psrah $f14, $f14, $f12 \r\n"
107  "psrah $f2, $f2, $f12 \r\n"
108  "packushb $f2, $f2, $f14 \r\n"
109  "sdc1 $f2, %0 \r\n"
110  "ldc1 $f2, %4 \r\n"
111  "ldc1 $f4, %5 \r\n"
112  "punpckhbh $f14, $f2, $f20 \r\n"
113  "punpckhbh $f16, $f4, $f20 \r\n"
114  "punpcklbh $f2, $f2, $f20 \r\n"
115  "punpcklbh $f4, $f4, $f20 \r\n"
116  "pmullh $f14, $f14, $f6 \r\n"
117  "pmullh $f16, $f16, $f8 \r\n"
118  "pmullh $f2, $f2, $f6 \r\n"
119  "pmullh $f4, $f4, $f8 \r\n"
120  "paddsh $f14, $f14, $f10 \r\n"
121  "paddsh $f2, $f2, $f10 \r\n"
122  "paddsh $f14, $f14, $f16 \r\n"
123  "paddsh $f2, $f2, $f4 \r\n"
124  "psrah $f14, $f14, $f12 \r\n"
125  "psrah $f2, $f2, $f12 \r\n"
126  "packushb $f2, $f2, $f14 \r\n"
127  "sdc1 $f2, %1 \r\n"
128  : "=m"(*dst),"=m"(*(dst+8))
129  : "m"(*src),"m"(*dst),"m"(*(src+8)),"m"(*(dst+8)),
130  "r"(weights),"r"(weightd),"r"(offset),"r"(log2_denom+1)
131  );
132  }
133 }
134 
136  int log2_denom, int weight, int offset)
137 {
138  int y;
139 
140  offset <<= log2_denom;
141 
142  if (log2_denom)
143  offset += 1 << (log2_denom - 1);
144 
145  for (y=0; y<height; y++, block+=stride) {
146  __asm__ volatile (
147  "ldc1 $f2, %0 \r\n"
148  "mtc1 %1, $f6 \r\n"
149  "mtc1 %2, $f8 \r\n"
150  "mtc1 %3, $f10 \r\n"
151  "dmtc1 $0, $f20 \r\n"
152  "pshufh $f6, $f6, $f20 \r\n"
153  "pshufh $f8, $f8, $f20 \r\n"
154  "punpckhbh $f14, $f2, $f20 \r\n"
155  "punpcklbh $f2, $f2, $f20 \r\n"
156  "pmullh $f14, $f14, $f6 \r\n"
157  "pmullh $f2, $f2, $f6 \r\n"
158  "paddsh $f14, $f14, $f8 \r\n"
159  "paddsh $f2, $f2, $f8 \r\n"
160  "psrah $f14, $f14, $f10 \r\n"
161  "psrah $f2, $f2, $f10 \r\n"
162  "packushb $f2, $f2, $f14 \r\n"
163  "sdc1 $f2, %0 \r\n"
164  : "=m"(*block)
165  : "r"(weight),"r"(offset),"r"(log2_denom)
166  );
167  }
168 }
169 
171  int stride, int height, int log2_denom, int weightd, int weights,
172  int offset)
173 {
174  int y;
175 
176  offset = ((offset + 1) | 1) << log2_denom;
177 
178  for (y=0; y<height; y++, dst+=stride, src+=stride) {
179  __asm__ volatile (
180  "ldc1 $f2, %1 \r\n"
181  "ldc1 $f4, %2 \r\n"
182  "dmtc1 $0, $f20 \r\n"
183  "mtc1 %3, $f6 \r\n"
184  "mtc1 %4, $f8 \r\n"
185  "mtc1 %5, $f10 \r\n"
186  "mtc1 %6, $f12 \r\n"
187  "pshufh $f6, $f6, $f20 \r\n"
188  "pshufh $f8, $f8, $f20 \r\n"
189  "pshufh $f10, $f10, $f20 \r\n"
190  "punpckhbh $f14, $f2, $f20 \r\n"
191  "punpckhbh $f16, $f4, $f20 \r\n"
192  "punpcklbh $f2, $f2, $f20 \r\n"
193  "punpcklbh $f4, $f4, $f20 \r\n"
194  "pmullh $f14, $f14, $f6 \r\n"
195  "pmullh $f16, $f16, $f8 \r\n"
196  "pmullh $f2, $f2, $f6 \r\n"
197  "pmullh $f4, $f4, $f8 \r\n"
198  "paddsh $f14, $f14, $f10 \r\n"
199  "paddsh $f2, $f2, $f10 \r\n"
200  "paddsh $f14, $f14, $f16 \r\n"
201  "paddsh $f2, $f2, $f4 \r\n"
202  "psrah $f14, $f14, $f12 \r\n"
203  "psrah $f2, $f2, $f12 \r\n"
204  "packushb $f2, $f2, $f14 \r\n"
205  "sdc1 $f2, %0 \r\n"
206  : "=m"(*dst)
207  : "m"(*src),"m"(*dst),"r"(weights),
208  "r"(weightd),"r"(offset),"r"(log2_denom+1)
209  );
210  }
211 }
212 
214  int log2_denom, int weight, int offset)
215 {
216  int y;
217 
218  offset <<= log2_denom;
219 
220  if (log2_denom)
221  offset += 1 << (log2_denom - 1);
222 
223  for (y=0; y<height; y++, block+=stride) {
224  __asm__ volatile (
225  "lwc1 $f2, %0 \r\n"
226  "mtc1 %1, $f6 \r\n"
227  "mtc1 %2, $f8 \r\n"
228  "mtc1 %3, $f10 \r\n"
229  "dmtc1 $0, $f20 \r\n"
230  "pshufh $f6, $f6, $f20 \r\n"
231  "pshufh $f8, $f8, $f20 \r\n"
232  "punpcklbh $f2, $f2, $f20 \r\n"
233  "pmullh $f2, $f2, $f6 \r\n"
234  "paddsh $f2, $f2, $f8 \r\n"
235  "psrah $f2, $f2, $f10 \r\n"
236  "packushb $f2, $f2, $f20 \r\n"
237  "swc1 $f2, %0 \r\n"
238  : "=m"(*block)
239  : "r"(weight),"r"(offset),"r"(log2_denom)
240  );
241  }
242 }
243 
245  int stride, int height, int log2_denom, int weightd, int weights,
246  int offset)
247 {
248  int y;
249 
250  offset = ((offset + 1) | 1) << log2_denom;
251 
252  for (y=0; y<height; y++, dst+=stride, src+=stride) {
253  __asm__ volatile (
254  "lwc1 $f2, %1 \r\n"
255  "lwc1 $f4, %2 \r\n"
256  "dmtc1 $0, $f20 \r\n"
257  "mtc1 %3, $f6 \r\n"
258  "mtc1 %4, $f8 \r\n"
259  "mtc1 %5, $f10 \r\n"
260  "mtc1 %6, $f12 \r\n"
261  "pshufh $f6, $f6, $f20 \r\n"
262  "pshufh $f8, $f8, $f20 \r\n"
263  "pshufh $f10, $f10, $f20 \r\n"
264  "punpcklbh $f2, $f2, $f20 \r\n"
265  "punpcklbh $f4, $f4, $f20 \r\n"
266  "pmullh $f2, $f2, $f6 \r\n"
267  "pmullh $f4, $f4, $f8 \r\n"
268  "paddsh $f2, $f2, $f10 \r\n"
269  "paddsh $f2, $f2, $f4 \r\n"
270  "psrah $f2, $f2, $f12 \r\n"
271  "packushb $f2, $f2, $f20 \r\n"
272  "swc1 $f2, %0 \r\n"
273  : "=m"(*dst)
274  : "m"(*src),"m"(*dst),"r"(weights),
275  "r"(weightd),"r"(offset),"r"(log2_denom+1)
276  );
277  }
278 }
void ff_h264_weight_pixels8_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:135
uint8_t
void ff_h264_biweight_pixels16_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:74
void ff_h264_weight_pixels4_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:213
void ff_h264_biweight_pixels4_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:244
static const uint8_t offset[127][2]
Definition: vf_spp.c:92
float y
AVS_Value src
Definition: avisynth_c.h:482
void ff_h264_biweight_pixels8_8_mmi(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, int weightd, int weights, int offset)
Definition: h264dsp_mmi.c:170
BYTE int const BYTE int int int height
Definition: avisynth_c.h:676
static int weight(int i, int blen, int offset)
Definition: diracdec.c:1298
GLint GLenum GLboolean GLsizei stride
Definition: opengl_enc.c:105
void ff_h264_weight_pixels16_8_mmi(uint8_t *block, int stride, int height, int log2_denom, int weight, int offset)
Definition: h264dsp_mmi.c:28
#define stride
static int16_t block[64]
Definition: dct-test.c:110