00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00055 #include <string.h>
00056
00057 #include "libavcodec/mpegaudiodsp.h"
00058
00059 static void ff_mpadsp_apply_window_mips_fixed(int32_t *synth_buf, int32_t *window,
00060 int *dither_state, int16_t *samples, int incr)
00061 {
00062 register const int32_t *w, *w2, *p;
00063 int j;
00064 int16_t *samples2;
00065 int w_asm, p_asm, w_asm1, p_asm1, w_asm2, p_asm2;
00066 int w2_asm, w2_asm1, *p_temp1, *p_temp2;
00067 int sum1 = 0;
00068 int const min_asm = -32768, max_asm = 32767;
00069 int temp1, temp2 = 0, temp3 = 0;
00070 int64_t sum;
00071
00072
00073 memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
00074 samples2 = samples + 31 * incr;
00075 w = window;
00076 w2 = window + 31;
00077 sum = *dither_state;
00078 p = synth_buf + 16;
00079 p_temp1 = synth_buf + 16;
00080 p_temp2 = synth_buf + 48;
00081 temp1 = sum;
00082
00087 __asm__ volatile (
00088 "mthi $zero \n\t"
00089 "mtlo %[temp1] \n\t"
00090 "lw %[w_asm], 0(%[w]) \n\t"
00091 "lw %[p_asm], 0(%[p]) \n\t"
00092 "lw %[w_asm1], 64*4(%[w]) \n\t"
00093 "lw %[p_asm1], 64*4(%[p]) \n\t"
00094 "lw %[w_asm2], 128*4(%[w]) \n\t"
00095 "lw %[p_asm2], 128*4(%[p]) \n\t"
00096 "madd %[w_asm], %[p_asm] \n\t"
00097 "madd %[w_asm1], %[p_asm1] \n\t"
00098 "madd %[w_asm2], %[p_asm2] \n\t"
00099 "lw %[w_asm], 192*4(%[w]) \n\t"
00100 "lw %[p_asm], 192*4(%[p]) \n\t"
00101 "lw %[w_asm1], 256*4(%[w]) \n\t"
00102 "lw %[p_asm1], 256*4(%[p]) \n\t"
00103 "lw %[w_asm2], 320*4(%[w]) \n\t"
00104 "lw %[p_asm2], 320*4(%[p]) \n\t"
00105 "madd %[w_asm], %[p_asm] \n\t"
00106 "madd %[w_asm1], %[p_asm1] \n\t"
00107 "madd %[w_asm2], %[p_asm2] \n\t"
00108 "lw %[w_asm], 384*4(%[w]) \n\t"
00109 "lw %[p_asm], 384*4(%[p]) \n\t"
00110 "lw %[w_asm1], 448*4(%[w]) \n\t"
00111 "lw %[p_asm1], 448*4(%[p]) \n\t"
00112 "lw %[w_asm2], 32*4(%[w]) \n\t"
00113 "lw %[p_asm2], 32*4(%[p]) \n\t"
00114 "madd %[w_asm], %[p_asm] \n\t"
00115 "madd %[w_asm1], %[p_asm1] \n\t"
00116 "msub %[w_asm2], %[p_asm2] \n\t"
00117 "lw %[w_asm], 96*4(%[w]) \n\t"
00118 "lw %[p_asm], 96*4(%[p]) \n\t"
00119 "lw %[w_asm1], 160*4(%[w]) \n\t"
00120 "lw %[p_asm1], 160*4(%[p]) \n\t"
00121 "lw %[w_asm2], 224*4(%[w]) \n\t"
00122 "lw %[p_asm2], 224*4(%[p]) \n\t"
00123 "msub %[w_asm], %[p_asm] \n\t"
00124 "msub %[w_asm1], %[p_asm1] \n\t"
00125 "msub %[w_asm2], %[p_asm2] \n\t"
00126 "lw %[w_asm], 288*4(%[w]) \n\t"
00127 "lw %[p_asm], 288*4(%[p]) \n\t"
00128 "lw %[w_asm1], 352*4(%[w]) \n\t"
00129 "lw %[p_asm1], 352*4(%[p]) \n\t"
00130 "msub %[w_asm], %[p_asm] \n\t"
00131 "lw %[w_asm], 480*4(%[w]) \n\t"
00132 "lw %[p_asm], 480*4(%[p]) \n\t"
00133 "lw %[w_asm2], 416*4(%[w]) \n\t"
00134 "lw %[p_asm2], 416*4(%[p]) \n\t"
00135 "msub %[w_asm], %[p_asm] \n\t"
00136 "msub %[w_asm1], %[p_asm1] \n\t"
00137 "msub %[w_asm2], %[p_asm2] \n\t"
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153 "extr.w %[sum1], $ac0, 24 \n\t"
00154 "mflo %[temp3] \n\t"
00155 "addi %[w], %[w], 4 \n\t"
00156 "and %[temp1], %[temp3], 0x00ffffff \n\t"
00157 "slt %[temp2], %[sum1], %[min_asm] \n\t"
00158 "movn %[sum1], %[min_asm], %[temp2] \n\t"
00159 "slt %[temp2], %[max_asm], %[sum1] \n\t"
00160 "movn %[sum1], %[max_asm], %[temp2] \n\t"
00161 "sh %[sum1], 0(%[samples]) \n\t"
00162
00163 : [w_asm] "=&r" (w_asm), [p_asm] "=&r" (p_asm), [w_asm1] "=&r" (w_asm1),
00164 [p_asm1] "=&r" (p_asm1), [temp1] "+r" (temp1), [temp2] "+r" (temp2),
00165 [w_asm2] "=&r" (w_asm2), [p_asm2] "=&r" (p_asm2),
00166 [sum1] "+r" (sum1), [w] "+r" (w), [temp3] "+r" (temp3)
00167 : [p] "r" (p), [samples] "r" (samples), [min_asm] "r" (min_asm),
00168 [max_asm] "r" (max_asm)
00169 : "hi","lo"
00170 );
00171
00172 samples += incr;
00173
00174
00175
00176
00177 for(j = 1; j < 16; j++) {
00178 __asm__ volatile (
00179 "mthi $0, $ac1 \n\t"
00180 "mtlo $0, $ac1 \n\t"
00181 "mthi $0 \n\t"
00182 "mtlo %[temp1] \n\t"
00183 "addi %[p_temp1], %[p_temp1], 4 \n\t"
00184 "lw %[w_asm], 0(%[w]) \n\t"
00185 "lw %[p_asm], 0(%[p_temp1]) \n\t"
00186 "lw %[w2_asm], 0(%[w2]) \n\t"
00187 "lw %[w_asm1], 64*4(%[w]) \n\t"
00188 "lw %[p_asm1], 64*4(%[p_temp1]) \n\t"
00189 "lw %[w2_asm1], 64*4(%[w2]) \n\t"
00190 "madd %[w_asm], %[p_asm] \n\t"
00191 "msub $ac1, %[w2_asm], %[p_asm] \n\t"
00192 "madd %[w_asm1], %[p_asm1] \n\t"
00193 "msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
00194 "lw %[w_asm], 128*4(%[w]) \n\t"
00195 "lw %[p_asm], 128*4(%[p_temp1]) \n\t"
00196 "lw %[w2_asm], 128*4(%[w2]) \n\t"
00197 "lw %[w_asm1], 192*4(%[w]) \n\t"
00198 "lw %[p_asm1], 192*4(%[p_temp1]) \n\t"
00199 "lw %[w2_asm1], 192*4(%[w2]) \n\t"
00200 "madd %[w_asm], %[p_asm] \n\t"
00201 "msub $ac1, %[w2_asm], %[p_asm] \n\t"
00202 "madd %[w_asm1], %[p_asm1] \n\t"
00203 "msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
00204 "lw %[w_asm], 256*4(%[w]) \n\t"
00205 "lw %[p_asm], 256*4(%[p_temp1]) \n\t"
00206 "lw %[w2_asm], 256*4(%[w2]) \n\t"
00207 "lw %[w_asm1], 320*4(%[w]) \n\t"
00208 "lw %[p_asm1], 320*4(%[p_temp1]) \n\t"
00209 "lw %[w2_asm1], 320*4(%[w2]) \n\t"
00210 "madd %[w_asm], %[p_asm] \n\t"
00211 "msub $ac1, %[w2_asm], %[p_asm] \n\t"
00212 "madd %[w_asm1], %[p_asm1] \n\t"
00213 "msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
00214 "lw %[w_asm], 384*4(%[w]) \n\t"
00215 "lw %[p_asm], 384*4(%[p_temp1]) \n\t"
00216 "lw %[w2_asm], 384*4(%[w2]) \n\t"
00217 "lw %[w_asm1], 448*4(%[w]) \n\t"
00218 "lw %[p_asm1], 448*4(%[p_temp1]) \n\t"
00219 "lw %[w2_asm1], 448*4(%[w2]) \n\t"
00220 "madd %[w_asm], %[p_asm] \n\t"
00221 "msub $ac1, %[w2_asm], %[p_asm] \n\t"
00222 "madd %[w_asm1], %[p_asm1] \n\t"
00223 "msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
00224 "addi %[p_temp2], %[p_temp2], -4 \n\t"
00225 "lw %[w_asm], 32*4(%[w]) \n\t"
00226 "lw %[p_asm], 0(%[p_temp2]) \n\t"
00227 "lw %[w2_asm], 32*4(%[w2]) \n\t"
00228 "lw %[w_asm1], 96*4(%[w]) \n\t"
00229 "lw %[p_asm1], 64*4(%[p_temp2]) \n\t"
00230 "lw %[w2_asm1], 96*4(%[w2]) \n\t"
00231 "msub %[w_asm], %[p_asm] \n\t"
00232 "msub $ac1, %[w2_asm], %[p_asm] \n\t"
00233 "msub %[w_asm1], %[p_asm1] \n\t"
00234 "msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
00235 "lw %[w_asm], 160*4(%[w]) \n\t"
00236 "lw %[p_asm], 128*4(%[p_temp2]) \n\t"
00237 "lw %[w2_asm], 160*4(%[w2]) \n\t"
00238 "lw %[w_asm1], 224*4(%[w]) \n\t"
00239 "lw %[p_asm1], 192*4(%[p_temp2]) \n\t"
00240 "lw %[w2_asm1], 224*4(%[w2]) \n\t"
00241 "msub %[w_asm], %[p_asm] \n\t"
00242 "msub $ac1, %[w2_asm], %[p_asm] \n\t"
00243 "msub %[w_asm1], %[p_asm1] \n\t"
00244 "msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
00245 "lw %[w_asm], 288*4(%[w]) \n\t"
00246 "lw %[p_asm], 256*4(%[p_temp2]) \n\t"
00247 "lw %[w2_asm], 288*4(%[w2]) \n\t"
00248 "lw %[w_asm1], 352*4(%[w]) \n\t"
00249 "lw %[p_asm1], 320*4(%[p_temp2]) \n\t"
00250 "lw %[w2_asm1], 352*4(%[w2]) \n\t"
00251 "msub %[w_asm], %[p_asm] \n\t"
00252 "msub $ac1, %[w2_asm], %[p_asm] \n\t"
00253 "msub %[w_asm1], %[p_asm1] \n\t"
00254 "msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
00255 "lw %[w_asm], 416*4(%[w]) \n\t"
00256 "lw %[p_asm], 384*4(%[p_temp2]) \n\t"
00257 "lw %[w2_asm], 416*4(%[w2]) \n\t"
00258 "lw %[w_asm1], 480*4(%[w]) \n\t"
00259 "lw %[p_asm1], 448*4(%[p_temp2]) \n\t"
00260 "lw %[w2_asm1], 480*4(%[w2]) \n\t"
00261 "msub %[w_asm], %[p_asm] \n\t"
00262 "msub %[w_asm1], %[p_asm1] \n\t"
00263 "msub $ac1, %[w2_asm], %[p_asm] \n\t"
00264 "msub $ac1, %[w2_asm1], %[p_asm1] \n\t"
00265 "addi %[w], %[w], 4 \n\t"
00266 "addi %[w2], %[w2], -4 \n\t"
00267 "mflo %[temp2] \n\t"
00268 "extr.w %[sum1], $ac0, 24 \n\t"
00269 "li %[temp3], 1 \n\t"
00270 "and %[temp1], %[temp2], 0x00ffffff \n\t"
00271 "madd $ac1, %[temp1], %[temp3] \n\t"
00272 "slt %[temp2], %[sum1], %[min_asm] \n\t"
00273 "movn %[sum1], %[min_asm], %[temp2] \n\t"
00274 "slt %[temp2], %[max_asm], %[sum1] \n\t"
00275 "movn %[sum1], %[max_asm], %[temp2] \n\t"
00276 "sh %[sum1], 0(%[samples]) \n\t"
00277 "mflo %[temp3], $ac1 \n\t"
00278 "extr.w %[sum1], $ac1, 24 \n\t"
00279 "and %[temp1], %[temp3], 0x00ffffff \n\t"
00280 "slt %[temp2], %[sum1], %[min_asm] \n\t"
00281 "movn %[sum1], %[min_asm], %[temp2] \n\t"
00282 "slt %[temp2], %[max_asm], %[sum1] \n\t"
00283 "movn %[sum1], %[max_asm], %[temp2] \n\t"
00284 "sh %[sum1], 0(%[samples2]) \n\t"
00285
00286 : [w_asm] "=&r" (w_asm), [p_asm] "=&r" (p_asm), [w_asm1] "=&r" (w_asm1),
00287 [p_asm1] "=&r" (p_asm1), [w2_asm1] "=&r" (w2_asm1),
00288 [w2_asm] "=&r" (w2_asm), [temp1] "+r" (temp1), [temp2] "+r" (temp2),
00289 [p_temp1] "+r" (p_temp1), [p_temp2] "+r" (p_temp2), [sum1] "+r" (sum1),
00290 [w] "+r" (w), [w2] "+r" (w2), [samples] "+r" (samples),
00291 [samples2] "+r" (samples2), [temp3] "+r" (temp3)
00292 : [min_asm] "r" (min_asm), [max_asm] "r" (max_asm)
00293 : "hi", "lo"
00294 );
00295
00296 samples += incr;
00297 samples2 -= incr;
00298 }
00299
00300 p = synth_buf + 32;
00301
00302 __asm__ volatile (
00303 "mthi $0 \n\t"
00304 "mtlo %[temp1] \n\t"
00305 "lw %[w_asm], 32*4(%[w]) \n\t"
00306 "lw %[p_asm], 0(%[p]) \n\t"
00307 "lw %[w_asm1], 96*4(%[w]) \n\t"
00308 "lw %[p_asm1], 64*4(%[p]) \n\t"
00309 "lw %[w_asm2], 160*4(%[w]) \n\t"
00310 "lw %[p_asm2], 128*4(%[p]) \n\t"
00311 "msub %[w_asm], %[p_asm] \n\t"
00312 "msub %[w_asm1], %[p_asm1] \n\t"
00313 "msub %[w_asm2], %[p_asm2] \n\t"
00314 "lw %[w_asm], 224*4(%[w]) \n\t"
00315 "lw %[p_asm], 192*4(%[p]) \n\t"
00316 "lw %[w_asm1], 288*4(%[w]) \n\t"
00317 "lw %[p_asm1], 256*4(%[p]) \n\t"
00318 "lw %[w_asm2], 352*4(%[w]) \n\t"
00319 "lw %[p_asm2], 320*4(%[p]) \n\t"
00320 "msub %[w_asm], %[p_asm] \n\t"
00321 "msub %[w_asm1], %[p_asm1] \n\t"
00322 "msub %[w_asm2], %[p_asm2] \n\t"
00323 "lw %[w_asm], 416*4(%[w]) \n\t"
00324 "lw %[p_asm], 384*4(%[p]) \n\t"
00325 "lw %[w_asm1], 480*4(%[w]) \n\t"
00326 "lw %[p_asm1], 448*4(%[p]) \n\t"
00327 "msub %[w_asm], %[p_asm] \n\t"
00328 "msub %[w_asm1], %[p_asm1] \n\t"
00329 "extr.w %[sum1], $ac0, 24 \n\t"
00330 "mflo %[temp2] \n\t"
00331 "and %[temp1], %[temp2], 0x00ffffff \n\t"
00332 "slt %[temp2], %[sum1], %[min_asm] \n\t"
00333 "movn %[sum1], %[min_asm], %[temp2] \n\t"
00334 "slt %[temp2], %[max_asm], %[sum1] \n\t"
00335 "movn %[sum1], %[max_asm], %[temp2] \n\t"
00336 "sh %[sum1], 0(%[samples]) \n\t"
00337
00338 : [w_asm] "=&r" (w_asm), [p_asm] "=&r" (p_asm), [w_asm1] "=&r" (w_asm1),
00339 [p_asm1] "=&r" (p_asm1), [temp1] "+r" (temp1), [temp2] "+r" (temp2),
00340 [w_asm2] "=&r" (w_asm2), [p_asm2] "=&r" (p_asm2), [sum1] "+r" (sum1)
00341 : [w] "r" (w), [p] "r" (p), [samples] "r" (samples), [min_asm] "r" (min_asm),
00342 [max_asm] "r" (max_asm)
00343 : "hi", "lo"
00344 );
00345
00346 *dither_state= temp1;
00347 }
00348
00349 static void imdct36_mips_fixed(int *out, int *buf, int *in, int *win)
00350 {
00351 int j;
00352 int t0, t1, t2, t3, s0, s1, s2, s3;
00353 int tmp[18], *tmp1, *in1;
00354
00355 int temp_reg1, temp_reg2, temp_reg3, temp_reg4, temp_reg5, temp_reg6;
00356 int t4, t5, t6, t8, t7;
00357
00358
00359
00360
00361 int const C_1 = 4229717092;
00362 int const C_2 = 4035949074;
00363 int const C_3 = 575416510;
00364 int const C_3A = 3719550786;
00365 int const C_4 = 1004831466;
00366 int const C_5 = 1534215534;
00367 int const C_7 = -1468965330;
00368 int const C_8 = -745813244;
00369
00370
00371
00372
00373
00374
00375 __asm__ volatile (
00376 "lw %[t1], 17*4(%[in]) \n\t"
00377 "lw %[t2], 16*4(%[in]) \n\t"
00378 "lw %[t3], 15*4(%[in]) \n\t"
00379 "lw %[t4], 14*4(%[in]) \n\t"
00380 "addu %[t1], %[t1], %[t2] \n\t"
00381 "addu %[t2], %[t2], %[t3] \n\t"
00382 "addu %[t3], %[t3], %[t4] \n\t"
00383 "lw %[t5], 13*4(%[in]) \n\t"
00384 "addu %[t1], %[t1], %[t3] \n\t"
00385 "sw %[t2], 16*4(%[in]) \n\t"
00386 "lw %[t6], 12*4(%[in]) \n\t"
00387 "sw %[t1], 17*4(%[in]) \n\t"
00388 "addu %[t4], %[t4], %[t5] \n\t"
00389 "addu %[t5], %[t5], %[t6] \n\t"
00390 "lw %[t7], 11*4(%[in]) \n\t"
00391 "addu %[t3], %[t3], %[t5] \n\t"
00392 "sw %[t4], 14*4(%[in]) \n\t"
00393 "lw %[t8], 10*4(%[in]) \n\t"
00394 "sw %[t3], 15*4(%[in]) \n\t"
00395 "addu %[t6], %[t6], %[t7] \n\t"
00396 "addu %[t7], %[t7], %[t8] \n\t"
00397 "sw %[t6], 12*4(%[in]) \n\t"
00398 "addu %[t5], %[t5], %[t7] \n\t"
00399 "lw %[t1], 9*4(%[in]) \n\t"
00400 "lw %[t2], 8*4(%[in]) \n\t"
00401 "sw %[t5], 13*4(%[in]) \n\t"
00402 "addu %[t8], %[t8], %[t1] \n\t"
00403 "addu %[t1], %[t1], %[t2] \n\t"
00404 "sw %[t8], 10*4(%[in]) \n\t"
00405 "addu %[t7], %[t7], %[t1] \n\t"
00406 "lw %[t3], 7*4(%[in]) \n\t"
00407 "lw %[t4], 6*4(%[in]) \n\t"
00408 "sw %[t7], 11*4(%[in]) \n\t"
00409 "addu %[t2], %[t2], %[t3] \n\t"
00410 "addu %[t3], %[t3], %[t4] \n\t"
00411 "sw %[t2], 8*4(%[in]) \n\t"
00412 "addu %[t1], %[t1], %[t3] \n\t"
00413 "lw %[t5], 5*4(%[in]) \n\t"
00414 "lw %[t6], 4*4(%[in]) \n\t"
00415 "sw %[t1], 9*4(%[in]) \n\t"
00416 "addu %[t4], %[t4], %[t5] \n\t"
00417 "addu %[t5], %[t5], %[t6] \n\t"
00418 "sw %[t4], 6*4(%[in]) \n\t"
00419 "addu %[t3], %[t3], %[t5] \n\t"
00420 "lw %[t7], 3*4(%[in]) \n\t"
00421 "lw %[t8], 2*4(%[in]) \n\t"
00422 "sw %[t3], 7*4(%[in]) \n\t"
00423 "addu %[t6], %[t6], %[t7] \n\t"
00424 "addu %[t7], %[t7], %[t8] \n\t"
00425 "sw %[t6], 4*4(%[in]) \n\t"
00426 "addu %[t5], %[t5], %[t7] \n\t"
00427 "lw %[t1], 1*4(%[in]) \n\t"
00428 "lw %[t2], 0*4(%[in]) \n\t"
00429 "sw %[t5], 5*4(%[in]) \n\t"
00430 "addu %[t8], %[t8], %[t1] \n\t"
00431 "addu %[t1], %[t1], %[t2] \n\t"
00432 "sw %[t8], 2*4(%[in]) \n\t"
00433 "addu %[t7], %[t7], %[t1] \n\t"
00434 "sw %[t7], 3*4(%[in]) \n\t"
00435 "sw %[t1], 1*4(%[in]) \n\t"
00436
00437 : [in] "+r" (in), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3),
00438 [t4] "=&r" (t4), [t5] "=&r" (t5), [t6] "=&r" (t6),
00439 [t7] "=&r" (t7), [t8] "=&r" (t8)
00440 );
00441
00442 for(j = 0; j < 2; j++) {
00443
00444 tmp1 = tmp + j;
00445 in1 = in + j;
00446
00472 __asm__ volatile (
00473 "lw %[t7], 4*4(%[in1]) \n\t"
00474 "lw %[t8], 8*4(%[in1]) \n\t"
00475 "lw %[t6], 16*4(%[in1]) \n\t"
00476 "lw %[t4], 0*4(%[in1]) \n\t"
00477 "addu %[temp_reg2], %[t7], %[t8] \n\t"
00478 "addu %[t2], %[t6], %[t8] \n\t"
00479 "multu %[C_2], %[temp_reg2] \n\t"
00480 "lw %[t5], 12*4(%[in1]) \n\t"
00481 "sub %[t2], %[t2], %[t7] \n\t"
00482 "sub %[t1], %[t4], %[t5] \n\t"
00483 "sra %[t3], %[t5], 1 \n\t"
00484 "sra %[temp_reg1], %[t2], 1 \n\t"
00485 "addu %[t3], %[t3], %[t4] \n\t"
00486 "sub %[temp_reg1], %[t1], %[temp_reg1] \n\t"
00487 "sra %[temp_reg2], %[temp_reg2], 31 \n\t"
00488 "sw %[temp_reg1], 6*4(%[tmp1]) \n\t"
00489 "move %[t0], $0 \n\t"
00490 "movn %[t0], %[C_2], %[temp_reg2] \n\t"
00491 "mfhi %[temp_reg1] \n\t"
00492 "addu %[t1], %[t1], %[t2] \n\t"
00493 "sw %[t1], 16*4(%[tmp1]) \n\t"
00494 "sub %[temp_reg4], %[t8], %[t6] \n\t"
00495 "add %[temp_reg2], %[t7], %[t6] \n\t"
00496 "mult $ac1, %[C_8], %[temp_reg4] \n\t"
00497 "multu $ac2, %[C_4], %[temp_reg2] \n\t"
00498 "sub %[t0], %[temp_reg1], %[t0] \n\t"
00499 "sra %[temp_reg1], %[temp_reg2], 31 \n\t"
00500 "move %[t2], $0 \n\t"
00501 "movn %[t2], %[C_4], %[temp_reg1] \n\t"
00502 "mfhi %[t1], $ac1 \n\t"
00503 "mfhi %[temp_reg1], $ac2 \n\t"
00504 "lw %[t6], 10*4(%[in1]) \n\t"
00505 "lw %[t8], 14*4(%[in1]) \n\t"
00506 "lw %[t7], 2*4(%[in1]) \n\t"
00507 "lw %[t4], 6*4(%[in1]) \n\t"
00508 "sub %[temp_reg3], %[t3], %[t0] \n\t"
00509 "add %[temp_reg4], %[t3], %[t0] \n\t"
00510 "sub %[temp_reg1], %[temp_reg1], %[temp_reg2] \n\t"
00511 "add %[temp_reg4], %[temp_reg4], %[t1] \n\t"
00512 "sub %[t2], %[temp_reg1], %[t2] \n\t"
00513 "sw %[temp_reg4], 2*4(%[tmp1]) \n\t"
00514 "sub %[temp_reg3], %[temp_reg3], %[t2] \n\t"
00515 "add %[temp_reg1], %[t3], %[t2] \n\t"
00516 "sw %[temp_reg3], 10*4(%[tmp1]) \n\t"
00517 "sub %[temp_reg1], %[temp_reg1], %[t1] \n\t"
00518 "addu %[temp_reg2], %[t6], %[t8] \n\t"
00519 "sw %[temp_reg1], 14*4(%[tmp1]) \n\t"
00520 "sub %[temp_reg2], %[temp_reg2], %[t7] \n\t"
00521 "addu %[temp_reg3], %[t7], %[t6] \n\t"
00522 "multu $ac3, %[C_3], %[temp_reg2] \n\t"
00523 "multu %[C_1], %[temp_reg3] \n\t"
00524 "sra %[temp_reg1], %[temp_reg2], 31 \n\t"
00525 "move %[t1], $0 \n\t"
00526 "sra %[temp_reg3], %[temp_reg3], 31 \n\t"
00527 "movn %[t1], %[C_3], %[temp_reg1] \n\t"
00528 "mfhi %[temp_reg1], $ac3 \n\t"
00529 "mfhi %[temp_reg4] \n\t"
00530 "move %[t2], $0 \n\t"
00531 "movn %[t2], %[C_1], %[temp_reg3] \n\t"
00532 "sub %[temp_reg3], %[t6], %[t8] \n\t"
00533 "sub %[t2], %[temp_reg4], %[t2] \n\t"
00534 "multu $ac1, %[C_7], %[temp_reg3] \n\t"
00535 "sub %[temp_reg1], %[temp_reg1], %[temp_reg2] \n\t"
00536 "sra %[temp_reg4], %[temp_reg3], 31 \n\t"
00537 "sub %[t1], %[temp_reg1], %[t1] \n\t"
00538 "move %[t3], $0 \n\t"
00539 "sw %[t1], 4*4(%[tmp1]) \n\t"
00540 "movn %[t3], %[C_7], %[temp_reg4] \n\t"
00541 "multu $ac2, %[C_3A], %[t4] \n\t"
00542 "add %[temp_reg2], %[t7], %[t8] \n\t"
00543 "move %[t1], $0 \n\t"
00544 "mfhi %[temp_reg4], $ac1 \n\t"
00545 "multu $ac3,%[C_5], %[temp_reg2] \n\t"
00546 "move %[t0], $0 \n\t"
00547 "sra %[temp_reg1], %[temp_reg2], 31 \n\t"
00548 "movn %[t1],%[C_5], %[temp_reg1] \n\t"
00549 "sub %[temp_reg4], %[temp_reg4], %[temp_reg3] \n\t"
00550 "mfhi %[temp_reg1], $ac3 \n\t"
00551 "sra %[temp_reg3], %[t4], 31 \n\t"
00552 "movn %[t0], %[C_3A], %[temp_reg3] \n\t"
00553 "mfhi %[temp_reg3], $ac2 \n\t"
00554 "sub %[t3], %[temp_reg4], %[t3] \n\t"
00555 "add %[temp_reg4], %[t3], %[t2] \n\t"
00556 "sub %[temp_reg1], %[temp_reg1], %[temp_reg2] \n\t"
00557 "sub %[t1], %[temp_reg1], %[t1] \n\t"
00558 "sub %[t0], %[temp_reg3], %[t0] \n\t"
00559 "add %[temp_reg1], %[t2], %[t1] \n\t"
00560 "add %[temp_reg4], %[temp_reg4], %[t0] \n\t"
00561 "sub %[temp_reg2], %[t3], %[t1] \n\t"
00562 "sw %[temp_reg4], 0*4(%[tmp1]) \n\t"
00563 "sub %[temp_reg1], %[temp_reg1], %[t0] \n\t"
00564 "sub %[temp_reg2], %[temp_reg2], %[t0] \n\t"
00565 "sw %[temp_reg1], 12*4(%[tmp1]) \n\t"
00566 "sw %[temp_reg2], 8*4(%[tmp1]) \n\t"
00567
00568 : [t7] "=&r" (t7), [temp_reg1] "=&r" (temp_reg1),
00569 [temp_reg2] "=&r" (temp_reg2), [temp_reg4] "=&r" (temp_reg4),
00570 [temp_reg3] "=&r" (temp_reg3), [t8] "=&r" (t8), [t0] "=&r" (t0),
00571 [t4] "=&r" (t4), [t5] "=&r" (t5), [t6] "=&r"(t6), [t2] "=&r" (t2),
00572 [t3] "=&r" (t3), [t1] "=&r" (t1)
00573 : [C_2] "r" (C_2), [in1] "r" (in1), [tmp1] "r" (tmp1), [C_8] "r" (C_8),
00574 [C_4] "r" (C_4), [C_3] "r" (C_3), [C_1] "r" (C_1), [C_7] "r" (C_7),
00575 [C_3A] "r" (C_3A), [C_5] "r" (C_5)
00576 : "hi", "lo"
00577 );
00578 }
00579
00588 __asm__ volatile (
00589 "lw %[t2], 1*4(%[tmp]) \n\t"
00590 "lw %[t3], 3*4(%[tmp]) \n\t"
00591 "lw %[t0], 0*4(%[tmp]) \n\t"
00592 "lw %[t1], 2*4(%[tmp]) \n\t"
00593 "addu %[temp_reg1], %[t3], %[t2] \n\t"
00594 "li %[temp_reg2], 0x807D2B1E \n\t"
00595 "move %[s1], $0 \n\t"
00596 "multu %[temp_reg2], %[temp_reg1] \n\t"
00597 "sra %[temp_reg1], %[temp_reg1], 31 \n\t"
00598 "movn %[s1], %[temp_reg2], %[temp_reg1] \n\t"
00599 "sub %[temp_reg3], %[t3], %[t2] \n\t"
00600 "li %[temp_reg4], 0x2de5151 \n\t"
00601 "mfhi %[temp_reg2] \n\t"
00602 "addu %[s0], %[t1], %[t0] \n\t"
00603 "lw %[temp_reg5], 9*4(%[win]) \n\t"
00604 "mult $ac1, %[temp_reg4], %[temp_reg3] \n\t"
00605 "lw %[temp_reg6], 4*9*4(%[buf]) \n\t"
00606 "sub %[s2], %[t1], %[t0] \n\t"
00607 "lw %[temp_reg3], 29*4(%[win]) \n\t"
00608 "subu %[s1], %[temp_reg2], %[s1] \n\t"
00609 "lw %[temp_reg4], 28*4(%[win]) \n\t"
00610 "add %[t0], %[s0], %[s1] \n\t"
00611 "extr.w %[s3], $ac1,23 \n\t"
00612 "mult $ac2, %[t0], %[temp_reg3] \n\t"
00613 "sub %[t1], %[s0], %[s1] \n\t"
00614 "lw %[temp_reg1], 4*8*4(%[buf]) \n\t"
00615 "mult %[t1], %[temp_reg5] \n\t"
00616 "lw %[temp_reg2], 8*4(%[win]) \n\t"
00617 "mfhi %[temp_reg3], $ac2 \n\t"
00618 "mult $ac3, %[t0], %[temp_reg4] \n\t"
00619 "add %[t0], %[s2], %[s3] \n\t"
00620 "mfhi %[temp_reg5] \n\t"
00621 "mult $ac1, %[t1], %[temp_reg2] \n\t"
00622 "sub %[t1], %[s2], %[s3] \n\t"
00623 "sw %[temp_reg3], 4*9*4(%[buf]) \n\t"
00624 "mfhi %[temp_reg4], $ac3 \n\t"
00625 "lw %[temp_reg3], 37*4(%[win]) \n\t"
00626 "mfhi %[temp_reg2], $ac1 \n\t"
00627 "add %[temp_reg5], %[temp_reg5], %[temp_reg6] \n\t"
00628 "lw %[temp_reg6], 17*4(%[win]) \n\t"
00629 "sw %[temp_reg5], 32*9*4(%[out]) \n\t"
00630 "sw %[temp_reg4], 4*8*4(%[buf]) \n\t"
00631 "mult %[t1], %[temp_reg6] \n\t"
00632 "add %[temp_reg1], %[temp_reg1], %[temp_reg2] \n\t"
00633 "lw %[temp_reg2], 0*4(%[win]) \n\t"
00634 "lw %[temp_reg5], 4*17*4(%[buf]) \n\t"
00635 "sw %[temp_reg1], 8*32*4(%[out]) \n\t"
00636 "mfhi %[temp_reg6] \n\t"
00637 "mult $ac1, %[t1], %[temp_reg2] \n\t"
00638 "lw %[temp_reg4], 20*4(%[win]) \n\t"
00639 "lw %[temp_reg1], 0(%[buf]) \n\t"
00640 "mult $ac2, %[t0], %[temp_reg3] \n\t"
00641 "mult %[t0], %[temp_reg4] \n\t"
00642 "mfhi %[temp_reg2], $ac1 \n\t"
00643 "lw %[t0], 4*4(%[tmp]) \n\t"
00644 "add %[temp_reg5], %[temp_reg5], %[temp_reg6] \n\t"
00645 "mfhi %[temp_reg3], $ac2 \n\t"
00646 "mfhi %[temp_reg4] \n\t"
00647 "sw %[temp_reg5], 17*32*4(%[out]) \n\t"
00648 "lw %[t1], 6*4(%[tmp]) \n\t"
00649 "add %[temp_reg1], %[temp_reg1], %[temp_reg2] \n\t"
00650 "lw %[t2], 5*4(%[tmp]) \n\t"
00651 "sw %[temp_reg1], 0*32*4(%[out]) \n\t"
00652 "addu %[s0], %[t1], %[t0] \n\t"
00653 "sw %[temp_reg3], 4*17*4(%[buf]) \n\t"
00654 "lw %[t3], 7*4(%[tmp]) \n\t"
00655 "sub %[s2], %[t1], %[t0] \n\t"
00656 "sw %[temp_reg4], 0(%[buf]) \n\t"
00657 "addu %[temp_reg5], %[t3], %[t2] \n\t"
00658 "li %[temp_reg6], 0x8483EE0C \n\t"
00659 "move %[s1], $0 \n\t"
00660 "multu %[temp_reg6], %[temp_reg5] \n\t"
00661 "sub %[temp_reg1], %[t3], %[t2] \n\t"
00662 "li %[temp_reg2], 0xf746ea \n\t"
00663 "sra %[temp_reg5], %[temp_reg5], 31 \n\t"
00664 "mult $ac1, %[temp_reg2], %[temp_reg1] \n\t"
00665 "movn %[s1], %[temp_reg6], %[temp_reg5] \n\t"
00666 "mfhi %[temp_reg5] \n\t"
00667 "lw %[temp_reg3], 10*4(%[win]) \n\t"
00668 "lw %[temp_reg4], 4*10*4(%[buf]) \n\t"
00669 "extr.w %[s3], $ac1, 23 \n\t"
00670 "lw %[temp_reg1], 4*7*4(%[buf]) \n\t"
00671 "lw %[temp_reg2], 7*4(%[win]) \n\t"
00672 "lw %[temp_reg6], 30*4(%[win]) \n\t"
00673 "subu %[s1], %[temp_reg5], %[s1] \n\t"
00674 "sub %[t1], %[s0], %[s1] \n\t"
00675 "add %[t0], %[s0], %[s1] \n\t"
00676 "mult $ac2, %[t1], %[temp_reg3] \n\t"
00677 "mult $ac3, %[t1], %[temp_reg2] \n\t"
00678 "mult %[t0], %[temp_reg6] \n\t"
00679 "lw %[temp_reg5], 27*4(%[win]) \n\t"
00680 "mult $ac1, %[t0], %[temp_reg5] \n\t"
00681 "mfhi %[temp_reg3], $ac2 \n\t"
00682 "mfhi %[temp_reg2], $ac3 \n\t"
00683 "mfhi %[temp_reg6] \n\t"
00684 "add %[t0], %[s2], %[s3] \n\t"
00685 "sub %[t1], %[s2], %[s3] \n\t"
00686 "add %[temp_reg3], %[temp_reg3], %[temp_reg4] \n\t"
00687 "lw %[temp_reg4], 16*4(%[win]) \n\t"
00688 "mfhi %[temp_reg5], $ac1 \n\t"
00689 "sw %[temp_reg3], 32*10*4(%[out]) \n\t"
00690 "add %[temp_reg1], %[temp_reg1], %[temp_reg2] \n\t"
00691 "lw %[temp_reg3], 4*16*4(%[buf]) \n\t"
00692 "sw %[temp_reg6], 4*10*4(%[buf]) \n\t"
00693 "sw %[temp_reg1], 7*32*4(%[out]) \n\t"
00694 "mult $ac2, %[t1], %[temp_reg4] \n\t"
00695 "sw %[temp_reg5], 4*7*4(%[buf]) \n\t"
00696 "lw %[temp_reg6], 1*4(%[win]) \n\t"
00697 "lw %[temp_reg5], 4*1*4(%[buf]) \n\t"
00698 "lw %[temp_reg1], 36*4(%[win]) \n\t"
00699 "mult $ac3, %[t1], %[temp_reg6] \n\t"
00700 "lw %[temp_reg2], 21*4(%[win]) \n\t"
00701 "mfhi %[temp_reg4], $ac2 \n\t"
00702 "mult %[t0], %[temp_reg1] \n\t"
00703 "mult $ac1, %[t0],%[temp_reg2] \n\t"
00704 "lw %[t0], 8*4(%[tmp]) \n\t"
00705 "mfhi %[temp_reg6], $ac3 \n\t"
00706 "lw %[t1], 10*4(%[tmp]) \n\t"
00707 "lw %[t3], 11*4(%[tmp]) \n\t"
00708 "mfhi %[temp_reg1] \n\t"
00709 "add %[temp_reg3], %[temp_reg3], %[temp_reg4] \n\t"
00710 "lw %[t2], 9*4(%[tmp]) \n\t"
00711 "mfhi %[temp_reg2], $ac1 \n\t"
00712 "add %[temp_reg5], %[temp_reg5], %[temp_reg6] \n\t"
00713 "sw %[temp_reg3], 16*32*4(%[out]) \n\t"
00714 "sw %[temp_reg5], 1*32*4(%[out]) \n\t"
00715 "sw %[temp_reg1], 4*16*4(%[buf]) \n\t"
00716 "addu %[temp_reg3], %[t3], %[t2] \n\t"
00717 "li %[temp_reg4], 0x8D3B7CD6 \n\t"
00718 "sw %[temp_reg2], 4*1*4(%[buf]) \n\t"
00719 "multu %[temp_reg4],%[temp_reg3] \n\t"
00720 "sra %[temp_reg3], %[temp_reg3], 31 \n\t"
00721 "move %[s1], $0 \n\t"
00722 "movn %[s1], %[temp_reg4], %[temp_reg3] \n\t"
00723 "addu %[s0], %[t1], %[t0] \n\t"
00724 "mfhi %[temp_reg3] \n\t"
00725 "sub %[s2], %[t1], %[t0] \n\t"
00726 "sub %[temp_reg5], %[t3], %[t2] \n\t"
00727 "li %[temp_reg6], 0x976fd9 \n\t"
00728 "lw %[temp_reg2], 11*4(%[win]) \n\t"
00729 "lw %[temp_reg1], 4*11*4(%[buf]) \n\t"
00730 "mult $ac1, %[temp_reg6], %[temp_reg5] \n\t"
00731 "subu %[s1], %[temp_reg3], %[s1] \n\t"
00732 "lw %[temp_reg5], 31*4(%[win]) \n\t"
00733 "sub %[t1], %[s0], %[s1] \n\t"
00734 "add %[t0], %[s0], %[s1] \n\t"
00735 "mult $ac2, %[t1], %[temp_reg2] \n\t"
00736 "mult %[t0], %[temp_reg5] \n\t"
00737 "lw %[temp_reg4], 6*4(%[win]) \n\t"
00738 "extr.w %[s3], $ac1, 23 \n\t"
00739 "lw %[temp_reg3], 4*6*4(%[buf]) \n\t"
00740 "mfhi %[temp_reg2], $ac2 \n\t"
00741 "lw %[temp_reg6], 26*4(%[win]) \n\t"
00742 "mfhi %[temp_reg5] \n\t"
00743 "mult $ac3, %[t1], %[temp_reg4] \n\t"
00744 "mult $ac1, %[t0], %[temp_reg6] \n\t"
00745 "add %[t0], %[s2], %[s3] \n\t"
00746 "sub %[t1], %[s2], %[s3] \n\t"
00747 "add %[temp_reg2], %[temp_reg2], %[temp_reg1] \n\t"
00748 "mfhi %[temp_reg4], $ac3 \n\t"
00749 "mfhi %[temp_reg6], $ac1 \n\t"
00750 "sw %[temp_reg5], 4*11*4(%[buf]) \n\t"
00751 "sw %[temp_reg2], 32*11*4(%[out]) \n\t"
00752 "lw %[temp_reg1], 4*15*4(%[buf]) \n\t"
00753 "add %[temp_reg3], %[temp_reg3], %[temp_reg4] \n\t"
00754 "lw %[temp_reg2], 15*4(%[win]) \n\t"
00755 "sw %[temp_reg3], 6*32*4(%[out]) \n\t"
00756 "sw %[temp_reg6], 4*6*4(%[buf]) \n\t"
00757 "mult %[t1], %[temp_reg2] \n\t"
00758 "lw %[temp_reg3], 2*4(%[win]) \n\t"
00759 "lw %[temp_reg4], 4*2*4(%[buf]) \n\t"
00760 "lw %[temp_reg5], 35*4(%[win]) \n\t"
00761 "mult $ac1, %[t1], %[temp_reg3] \n\t"
00762 "mfhi %[temp_reg2] \n\t"
00763 "lw %[temp_reg6], 22*4(%[win]) \n\t"
00764 "mult $ac2, %[t0], %[temp_reg5] \n\t"
00765 "lw %[t1], 14*4(%[tmp]) \n\t"
00766 "mult $ac3, %[t0], %[temp_reg6] \n\t"
00767 "lw %[t0], 12*4(%[tmp]) \n\t"
00768 "mfhi %[temp_reg3], $ac1 \n\t"
00769 "add %[temp_reg1], %[temp_reg1], %[temp_reg2] \n\t"
00770 "mfhi %[temp_reg5], $ac2 \n\t"
00771 "sw %[temp_reg1], 15*32*4(%[out]) \n\t"
00772 "mfhi %[temp_reg6], $ac3 \n\t"
00773 "lw %[t2], 13*4(%[tmp]) \n\t"
00774 "lw %[t3], 15*4(%[tmp]) \n\t"
00775 "add %[temp_reg4], %[temp_reg4], %[temp_reg3] \n\t"
00776 "sw %[temp_reg5], 4*15*4(%[buf]) \n\t"
00777 "addu %[temp_reg1], %[t3], %[t2] \n\t"
00778 "li %[temp_reg2], 0x9C42577C \n\t"
00779 "move %[s1], $0 \n\t"
00780 "multu %[temp_reg2], %[temp_reg1] \n\t"
00781 "sw %[temp_reg4], 2*32*4(%[out]) \n\t"
00782 "sra %[temp_reg1], %[temp_reg1], 31 \n\t"
00783 "movn %[s1], %[temp_reg2], %[temp_reg1] \n\t"
00784 "sub %[temp_reg3], %[t3], %[t2] \n\t"
00785 "li %[temp_reg4], 0x6f94a2 \n\t"
00786 "mfhi %[temp_reg1] \n\t"
00787 "addu %[s0], %[t1], %[t0] \n\t"
00788 "sw %[temp_reg6], 4*2*4(%[buf]) \n\t"
00789 "mult $ac1, %[temp_reg4], %[temp_reg3] \n\t"
00790 "sub %[s2], %[t1], %[t0] \n\t"
00791 "lw %[temp_reg5], 12*4(%[win]) \n\t"
00792 "lw %[temp_reg6], 4*12*4(%[buf]) \n\t"
00793 "subu %[s1], %[temp_reg1], %[s1] \n\t"
00794 "sub %[t1], %[s0], %[s1] \n\t"
00795 "lw %[temp_reg3], 32*4(%[win]) \n\t"
00796 "mult $ac2, %[t1], %[temp_reg5] \n\t"
00797 "add %[t0], %[s0], %[s1] \n\t"
00798 "extr.w %[s3], $ac1, 23 \n\t"
00799 "lw %[temp_reg2], 5*4(%[win]) \n\t"
00800 "mult %[t0], %[temp_reg3] \n\t"
00801 "mfhi %[temp_reg5], $ac2 \n\t"
00802 "lw %[temp_reg4], 25*4(%[win]) \n\t"
00803 "lw %[temp_reg1], 4*5*4(%[buf]) \n\t"
00804 "mult $ac3, %[t1], %[temp_reg2] \n\t"
00805 "mult $ac1, %[t0], %[temp_reg4] \n\t"
00806 "mfhi %[temp_reg3] \n\t"
00807 "add %[t0], %[s2], %[s3] \n\t"
00808 "add %[temp_reg5], %[temp_reg5], %[temp_reg6] \n\t"
00809 "mfhi %[temp_reg2], $ac3 \n\t"
00810 "mfhi %[temp_reg4], $ac1 \n\t"
00811 "sub %[t1], %[s2], %[s3] \n\t"
00812 "sw %[temp_reg5], 32*12*4(%[out]) \n\t"
00813 "sw %[temp_reg3], 4*12*4(%[buf]) \n\t"
00814 "lw %[temp_reg6], 14*4(%[win]) \n\t"
00815 "lw %[temp_reg5], 4*14*4(%[buf]) \n\t"
00816 "add %[temp_reg1], %[temp_reg1], %[temp_reg2] \n\t"
00817 "sw %[temp_reg4], 4*5*4(%[buf]) \n\t"
00818 "sw %[temp_reg1], 5*32*4(%[out]) \n\t"
00819 "mult %[t1], %[temp_reg6] \n\t"
00820 "lw %[temp_reg4], 34*4(%[win]) \n\t"
00821 "lw %[temp_reg2], 3*4(%[win]) \n\t"
00822 "lw %[temp_reg1], 4*3*4(%[buf]) \n\t"
00823 "mult $ac2, %[t0], %[temp_reg4] \n\t"
00824 "mfhi %[temp_reg6] \n\t"
00825 "mult $ac1, %[t1], %[temp_reg2] \n\t"
00826 "lw %[temp_reg3], 23*4(%[win]) \n\t"
00827 "lw %[s0], 16*4(%[tmp]) \n\t"
00828 "mfhi %[temp_reg4], $ac2 \n\t"
00829 "lw %[t1], 17*4(%[tmp]) \n\t"
00830 "mult $ac3, %[t0], %[temp_reg3] \n\t"
00831 "move %[s1], $0 \n\t"
00832 "add %[temp_reg5], %[temp_reg5], %[temp_reg6] \n\t"
00833 "mfhi %[temp_reg2], $ac1 \n\t"
00834 "sw %[temp_reg5], 14*32*4(%[out]) \n\t"
00835 "sw %[temp_reg4], 4*14*4(%[buf]) \n\t"
00836 "mfhi %[temp_reg3], $ac3 \n\t"
00837 "li %[temp_reg5], 0xB504F334 \n\t"
00838 "add %[temp_reg1], %[temp_reg1], %[temp_reg2] \n\t"
00839 "multu %[temp_reg5], %[t1] \n\t"
00840 "lw %[temp_reg2], 4*13*4(%[buf]) \n\t"
00841 "sw %[temp_reg1], 3*32*4(%[out]) \n\t"
00842 "sra %[t1], %[t1], 31 \n\t"
00843 "mfhi %[temp_reg6] \n\t"
00844 "movn %[s1], %[temp_reg5], %[t1] \n\t"
00845 "sw %[temp_reg3], 4*3*4(%[buf]) \n\t"
00846 "lw %[temp_reg1], 13*4(%[win]) \n\t"
00847 "lw %[temp_reg4], 4*4*4(%[buf]) \n\t"
00848 "lw %[temp_reg3], 4*4(%[win]) \n\t"
00849 "lw %[temp_reg5], 33*4(%[win]) \n\t"
00850 "subu %[s1], %[temp_reg6], %[s1] \n\t"
00851 "lw %[temp_reg6], 24*4(%[win]) \n\t"
00852 "sub %[t1], %[s0], %[s1] \n\t"
00853 "add %[t0], %[s0], %[s1] \n\t"
00854 "mult $ac1, %[t1], %[temp_reg1] \n\t"
00855 "mult $ac2, %[t1], %[temp_reg3] \n\t"
00856 "mult $ac3, %[t0], %[temp_reg5] \n\t"
00857 "mult %[t0], %[temp_reg6] \n\t"
00858 "mfhi %[temp_reg1], $ac1 \n\t"
00859 "mfhi %[temp_reg3], $ac2 \n\t"
00860 "mfhi %[temp_reg5], $ac3 \n\t"
00861 "mfhi %[temp_reg6] \n\t"
00862 "add %[temp_reg2], %[temp_reg2], %[temp_reg1] \n\t"
00863 "add %[temp_reg4], %[temp_reg4], %[temp_reg3] \n\t"
00864 "sw %[temp_reg2], 13*32*4(%[out]) \n\t"
00865 "sw %[temp_reg4], 4*32*4(%[out]) \n\t"
00866 "sw %[temp_reg5], 4*13*4(%[buf]) \n\t"
00867 "sw %[temp_reg6], 4*4*4(%[buf]) \n\t"
00868
00869 : [t0] "=&r" (t0), [t1] "=&r" (t1), [t2] "=&r" (t2), [t3] "=&r" (t3),
00870 [s0] "=&r" (s0), [s2] "=&r" (s2), [temp_reg1] "=&r" (temp_reg1),
00871 [temp_reg2] "=&r" (temp_reg2), [s1] "=&r" (s1), [s3] "=&r" (s3),
00872 [temp_reg3] "=&r" (temp_reg3), [temp_reg4] "=&r" (temp_reg4),
00873 [temp_reg5] "=&r" (temp_reg5), [temp_reg6] "=&r" (temp_reg6),
00874 [out] "+r" (out)
00875 : [tmp] "r" (tmp), [win] "r" (win), [buf] "r" (buf)
00876 : "hi", "lo"
00877 );
00878 }
00879
00880 static void ff_imdct36_blocks_mips_fixed(int *out, int *buf, int *in,
00881 int count, int switch_point, int block_type)
00882 {
00883 int j;
00884 for (j=0 ; j < count; j++) {
00885
00886
00887
00888 int win_idx = (switch_point && j < 2) ? 0 : block_type;
00889 int *win = ff_mdct_win_fixed[win_idx + (4 & -(j & 1))];
00890
00891 imdct36_mips_fixed(out, buf, in, win);
00892
00893 in += 18;
00894 buf += ((j&3) != 3 ? 1 : (72-3));
00895 out++;
00896 }
00897 }
00898
00899 void ff_mpadsp_init_mipsdspr1(MPADSPContext *s)
00900 {
00901 s->apply_window_fixed = ff_mpadsp_apply_window_mips_fixed;
00902 s->imdct36_blocks_fixed = ff_imdct36_blocks_mips_fixed;
00903 }