FFmpeg
Main Page
Related Pages
Modules
Data Structures
Files
Examples
File List
Globals
All
Data Structures
Files
Functions
Variables
Typedefs
Enumerations
Enumerator
Macros
Groups
Pages
libavutil
x86
float_dsp_init.c
Go to the documentation of this file.
1
/*
2
* This file is part of FFmpeg.
3
*
4
* FFmpeg is free software; you can redistribute it and/or
5
* modify it under the terms of the GNU Lesser General Public
6
* License as published by the Free Software Foundation; either
7
* version 2.1 of the License, or (at your option) any later version.
8
*
9
* FFmpeg is distributed in the hope that it will be useful,
10
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12
* Lesser General Public License for more details.
13
*
14
* You should have received a copy of the GNU Lesser General Public
15
* License along with FFmpeg; if not, write to the Free Software
16
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
*/
18
19
#include "config.h"
20
21
#include "
libavutil/cpu.h
"
22
#include "
libavutil/float_dsp.h
"
23
#include "
cpu.h
"
24
#include "
asm.h
"
25
26
extern
void
ff_vector_fmul_sse
(
float
*
dst
,
const
float
*src0,
const
float
*src1,
27
int
len
);
28
extern
void
ff_vector_fmul_avx
(
float
*
dst
,
const
float
*src0,
const
float
*src1,
29
int
len
);
30
31
extern
void
ff_vector_fmac_scalar_sse
(
float
*
dst
,
const
float
*src,
float
mul,
32
int
len
);
33
extern
void
ff_vector_fmac_scalar_avx
(
float
*
dst
,
const
float
*src,
float
mul,
34
int
len
);
35
36
extern
void
ff_vector_fmul_scalar_sse
(
float
*
dst
,
const
float
*src,
float
mul,
37
int
len
);
38
39
extern
void
ff_vector_dmul_scalar_sse2
(
double
*
dst
,
const
double
*src,
40
double
mul,
int
len
);
41
extern
void
ff_vector_dmul_scalar_avx
(
double
*
dst
,
const
double
*src,
42
double
mul,
int
len
);
43
44
void
ff_vector_fmul_add_sse
(
float
*
dst
,
const
float
*src0,
const
float
*src1,
45
const
float
*src2,
int
len
);
46
void
ff_vector_fmul_add_avx
(
float
*
dst
,
const
float
*src0,
const
float
*src1,
47
const
float
*src2,
int
len
);
48
49
void
ff_vector_fmul_reverse_sse
(
float
*
dst
,
const
float
*src0,
50
const
float
*src1,
int
len
);
51
void
ff_vector_fmul_reverse_avx
(
float
*
dst
,
const
float
*src0,
52
const
float
*src1,
int
len
);
53
54
float
ff_scalarproduct_float_sse
(
const
float
*v1,
const
float
*v2,
int
order);
55
56
#if HAVE_6REGS && HAVE_INLINE_ASM
57
static
void
vector_fmul_window_3dnowext(
float
*
dst
,
const
float
*src0,
58
const
float
*src1,
const
float
*win,
59
int
len
)
60
{
61
x86_reg
i = -len * 4;
62
x86_reg
j = len * 4 - 8;
63
__asm__
volatile
(
64
"1: \n"
65
"pswapd (%5, %1), %%mm1 \n"
66
"movq (%5, %0), %%mm0 \n"
67
"pswapd (%4, %1), %%mm5 \n"
68
"movq (%3, %0), %%mm4 \n"
69
"movq %%mm0, %%mm2 \n"
70
"movq %%mm1, %%mm3 \n"
71
"pfmul %%mm4, %%mm2 \n"
// src0[len + i] * win[len + i]
72
"pfmul %%mm5, %%mm3 \n"
// src1[j] * win[len + j]
73
"pfmul %%mm4, %%mm1 \n"
// src0[len + i] * win[len + j]
74
"pfmul %%mm5, %%mm0 \n"
// src1[j] * win[len + i]
75
"pfadd %%mm3, %%mm2 \n"
76
"pfsub %%mm0, %%mm1 \n"
77
"pswapd %%mm2, %%mm2 \n"
78
"movq %%mm1, (%2, %0) \n"
79
"movq %%mm2, (%2, %1) \n"
80
"sub $8, %1 \n"
81
"add $8, %0 \n"
82
"jl 1b \n"
83
"femms \n"
84
:
"+r"
(i),
"+r"
(j)
85
:
"r"
(dst +
len
),
"r"
(src0 + len),
"r"
(src1),
"r"
(win + len)
86
);
87
}
88
89
static
void
vector_fmul_window_sse(
float
*dst,
const
float
*src0,
90
const
float
*src1,
const
float
*win,
int
len)
91
{
92
x86_reg
i = -len * 4;
93
x86_reg
j = len * 4 - 16;
94
__asm__
volatile
(
95
"1: \n"
96
"movaps (%5, %1), %%xmm1 \n"
97
"movaps (%5, %0), %%xmm0 \n"
98
"movaps (%4, %1), %%xmm5 \n"
99
"movaps (%3, %0), %%xmm4 \n"
100
"shufps $0x1b, %%xmm1, %%xmm1 \n"
101
"shufps $0x1b, %%xmm5, %%xmm5 \n"
102
"movaps %%xmm0, %%xmm2 \n"
103
"movaps %%xmm1, %%xmm3 \n"
104
"mulps %%xmm4, %%xmm2 \n"
// src0[len + i] * win[len + i]
105
"mulps %%xmm5, %%xmm3 \n"
// src1[j] * win[len + j]
106
"mulps %%xmm4, %%xmm1 \n"
// src0[len + i] * win[len + j]
107
"mulps %%xmm5, %%xmm0 \n"
// src1[j] * win[len + i]
108
"addps %%xmm3, %%xmm2 \n"
109
"subps %%xmm0, %%xmm1 \n"
110
"shufps $0x1b, %%xmm2, %%xmm2 \n"
111
"movaps %%xmm1, (%2, %0) \n"
112
"movaps %%xmm2, (%2, %1) \n"
113
"sub $16, %1 \n"
114
"add $16, %0 \n"
115
"jl 1b \n"
116
:
"+r"
(i),
"+r"
(j)
117
:
"r"
(dst +
len
),
"r"
(src0 + len),
"r"
(src1),
"r"
(win + len)
118
);
119
}
120
#endif
/* HAVE_6REGS && HAVE_INLINE_ASM */
121
122
void
ff_float_dsp_init_x86
(
AVFloatDSPContext
*fdsp)
123
{
124
int
mm_flags =
av_get_cpu_flags
();
125
126
#if HAVE_6REGS && HAVE_INLINE_ASM
127
if
(
INLINE_AMD3DNOWEXT
(mm_flags)) {
128
fdsp->
vector_fmul_window
= vector_fmul_window_3dnowext;
129
}
130
if
(
INLINE_SSE
(mm_flags)) {
131
fdsp->
vector_fmul_window
= vector_fmul_window_sse;
132
}
133
#endif
134
if
(
EXTERNAL_SSE
(mm_flags)) {
135
fdsp->
vector_fmul
=
ff_vector_fmul_sse
;
136
fdsp->
vector_fmac_scalar
=
ff_vector_fmac_scalar_sse
;
137
fdsp->
vector_fmul_scalar
=
ff_vector_fmul_scalar_sse
;
138
fdsp->
vector_fmul_add
=
ff_vector_fmul_add_sse
;
139
fdsp->
vector_fmul_reverse
=
ff_vector_fmul_reverse_sse
;
140
fdsp->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
141
}
142
if
(
EXTERNAL_SSE2
(mm_flags)) {
143
fdsp->
vector_dmul_scalar
=
ff_vector_dmul_scalar_sse2
;
144
}
145
if
(
EXTERNAL_AVX
(mm_flags)) {
146
fdsp->
vector_fmul
=
ff_vector_fmul_avx
;
147
fdsp->
vector_fmac_scalar
=
ff_vector_fmac_scalar_avx
;
148
fdsp->
vector_dmul_scalar
=
ff_vector_dmul_scalar_avx
;
149
fdsp->
vector_fmul_add
=
ff_vector_fmul_add_avx
;
150
fdsp->
vector_fmul_reverse
=
ff_vector_fmul_reverse_avx
;
151
}
152
}
Generated on Sat May 25 2013 04:01:21 for FFmpeg by
1.8.2