FFmpeg
vp9dsp_init.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lervvr General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lervvr General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lervvr General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "libavutil/attributes.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/riscv/cpu.h"
24 #include "libavcodec/vp9dsp.h"
25 #include "vp9dsp.h"
26 
27 static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
28 {
29 #if HAVE_RV
30  int flags = av_get_cpu_flags();
31 
32 # if __riscv_xlen >= 64
33  if (bpp == 8 && (flags & AV_CPU_FLAG_RV_MISALIGNED)) {
34 
35 #define init_fpel(idx1, sz) \
36  dsp->mc[idx1][FILTER_8TAP_SMOOTH ][0][0][0] = ff_copy##sz##_rvi; \
37  dsp->mc[idx1][FILTER_8TAP_REGULAR][0][0][0] = ff_copy##sz##_rvi; \
38  dsp->mc[idx1][FILTER_8TAP_SHARP ][0][0][0] = ff_copy##sz##_rvi; \
39  dsp->mc[idx1][FILTER_BILINEAR ][0][0][0] = ff_copy##sz##_rvi
40 
41  init_fpel(0, 64);
42  init_fpel(1, 32);
43  init_fpel(2, 16);
44  init_fpel(3, 8);
45  init_fpel(4, 4);
46 
47 #undef init_fpel
48  }
49 # endif
50 
51 #if HAVE_RVV
52  if (bpp == 8 && (flags & AV_CPU_FLAG_RVV_I32) && ff_rv_vlen_least(128)) {
53 
54 #define init_fpel(idx1, sz) \
55  dsp->mc[idx1][FILTER_8TAP_SMOOTH ][1][0][0] = ff_vp9_avg##sz##_rvv; \
56  dsp->mc[idx1][FILTER_8TAP_REGULAR][1][0][0] = ff_vp9_avg##sz##_rvv; \
57  dsp->mc[idx1][FILTER_8TAP_SHARP ][1][0][0] = ff_vp9_avg##sz##_rvv; \
58  dsp->mc[idx1][FILTER_BILINEAR ][1][0][0] = ff_vp9_avg##sz##_rvv
59 
60  init_fpel(0, 64);
61  init_fpel(1, 32);
62  init_fpel(2, 16);
63  init_fpel(3, 8);
64  init_fpel(4, 4);
65 
66  dsp->mc[0][FILTER_BILINEAR ][0][0][1] = ff_put_vp9_bilin_64v_rvv;
67  dsp->mc[0][FILTER_BILINEAR ][0][1][0] = ff_put_vp9_bilin_64h_rvv;
68  dsp->mc[0][FILTER_BILINEAR ][1][0][1] = ff_avg_vp9_bilin_64v_rvv;
69  dsp->mc[0][FILTER_BILINEAR ][1][1][0] = ff_avg_vp9_bilin_64h_rvv;
70  dsp->mc[1][FILTER_BILINEAR ][0][0][1] = ff_put_vp9_bilin_32v_rvv;
71  dsp->mc[1][FILTER_BILINEAR ][0][1][0] = ff_put_vp9_bilin_32h_rvv;
72  dsp->mc[1][FILTER_BILINEAR ][1][0][1] = ff_avg_vp9_bilin_32v_rvv;
73  dsp->mc[1][FILTER_BILINEAR ][1][1][0] = ff_avg_vp9_bilin_32h_rvv;
74  dsp->mc[2][FILTER_BILINEAR ][0][0][1] = ff_put_vp9_bilin_16v_rvv;
75  dsp->mc[2][FILTER_BILINEAR ][0][1][0] = ff_put_vp9_bilin_16h_rvv;
76  dsp->mc[2][FILTER_BILINEAR ][1][0][1] = ff_avg_vp9_bilin_16v_rvv;
77  dsp->mc[2][FILTER_BILINEAR ][1][1][0] = ff_avg_vp9_bilin_16h_rvv;
78  dsp->mc[3][FILTER_BILINEAR ][0][0][1] = ff_put_vp9_bilin_8v_rvv;
79  dsp->mc[3][FILTER_BILINEAR ][0][1][0] = ff_put_vp9_bilin_8h_rvv;
80  dsp->mc[3][FILTER_BILINEAR ][1][0][1] = ff_avg_vp9_bilin_8v_rvv;
81  dsp->mc[3][FILTER_BILINEAR ][1][1][0] = ff_avg_vp9_bilin_8h_rvv;
82  dsp->mc[4][FILTER_BILINEAR ][0][0][1] = ff_put_vp9_bilin_4v_rvv;
83  dsp->mc[4][FILTER_BILINEAR ][0][1][0] = ff_put_vp9_bilin_4h_rvv;
84  dsp->mc[4][FILTER_BILINEAR ][1][0][1] = ff_avg_vp9_bilin_4v_rvv;
85  dsp->mc[4][FILTER_BILINEAR ][1][1][0] = ff_avg_vp9_bilin_4h_rvv;
86  dsp->mc[0][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_64hv_rvv;
87  dsp->mc[0][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_64hv_rvv;
88  dsp->mc[1][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_32hv_rvv;
89  dsp->mc[1][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_32hv_rvv;
90  dsp->mc[2][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_16hv_rvv;
91  dsp->mc[2][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_16hv_rvv;
92  dsp->mc[3][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_8hv_rvv;
93  dsp->mc[3][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_8hv_rvv;
94  dsp->mc[4][FILTER_BILINEAR ][0][1][1] = ff_put_vp9_bilin_4hv_rvv;
95  dsp->mc[4][FILTER_BILINEAR ][1][1][1] = ff_avg_vp9_bilin_4hv_rvv;
96 
97 #undef init_fpel
98  }
99 #endif
100 #endif
101 }
102 
104 {
105 #if HAVE_RV
106  int flags = av_get_cpu_flags();
107 
108 #if HAVE_RVV
109  if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_rv_vlen_least(128)) {
116  }
117 
118  if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_rv_vlen_least(128)) {
138  }
139 #endif
140 #endif
141 }
142 
143 av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
144 {
145  vp9dsp_intrapred_init_riscv(dsp, bpp);
146  vp9dsp_mc_init_riscv(dsp, bpp);
147 }
ff_dc_127_32x32_rvv
void ff_dc_127_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_h_32x32_rvv
void ff_h_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
DC_128_PRED
@ DC_128_PRED
Definition: vp9.h:58
ff_dc_8x8_rvv
void ff_dc_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_dc_left_16x16_rvv
void ff_dc_left_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_tm_32x32_rvv
void ff_tm_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_dc_129_16x16_rvv
void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_dc_top_32x32_rvv
void ff_dc_top_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
vp9dsp_mc_init_riscv
static av_cold void vp9dsp_mc_init_riscv(VP9DSPContext *dsp, int bpp)
Definition: vp9dsp_init.c:27
TM_VP8_PRED
@ TM_VP8_PRED
Definition: vp9.h:55
DC_PRED
@ DC_PRED
Definition: vp9.h:48
VP9DSPContext
Definition: vp9dsp.h:40
vp9dsp_intrapred_init_riscv
static av_cold void vp9dsp_intrapred_init_riscv(VP9DSPContext *dsp, int bpp)
Definition: vp9dsp_init.c:103
ff_dc_16x16_rvv
void ff_dc_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:109
ff_tm_16x16_rvv
void ff_tm_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
DC_127_PRED
@ DC_127_PRED
Definition: vp9.h:59
ff_h_8x8_rvv
void ff_h_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_dc_127_8x8_rvv
void ff_dc_127_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
HOR_PRED
@ HOR_PRED
Definition: vp9.h:47
av_cold
#define av_cold
Definition: attributes.h:90
ff_dc_129_32x32_rvv
void ff_dc_129_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
cpu.h
LEFT_DC_PRED
@ LEFT_DC_PRED
Definition: vp9.h:56
ff_dc_32x32_rvv
void ff_dc_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_dc_128_16x16_rvv
void ff_dc_128_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_dc_top_8x8_rvv
void ff_dc_top_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_dc_128_8x8_rvv
void ff_dc_128_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
TX_8X8
@ TX_8X8
Definition: vp9.h:29
TX_16X16
@ TX_16X16
Definition: vp9.h:30
FILTER_BILINEAR
@ FILTER_BILINEAR
Definition: vp9.h:68
VP9DSPContext::intra_pred
void(* intra_pred[N_TXFM_SIZES][N_INTRA_PRED_MODES])(uint8_t *dst, ptrdiff_t stride, const uint8_t *left, const uint8_t *top)
Definition: vp9dsp.h:52
init_fpel
#define init_fpel(idx1, idx2, sz, type, suffix)
vp9dsp.h
ff_h_16x16_rvv
void ff_h_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
cpu.h
VP9DSPContext::mc
vp9_mc_func mc[5][N_FILTERS][2][2][2]
Definition: vp9dsp.h:115
ff_dc_129_8x8_rvv
void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_dc_left_32x32_rvv
void ff_dc_left_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
TX_4X4
@ TX_4X4
Definition: vp9.h:28
attributes.h
AV_CPU_FLAG_RV_MISALIGNED
#define AV_CPU_FLAG_RV_MISALIGNED
Fast misaligned accesses.
Definition: cpu.h:101
DC_129_PRED
@ DC_129_PRED
Definition: vp9.h:60
ff_dc_127_16x16_rvv
void ff_dc_127_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
AV_CPU_FLAG_RVV_I32
#define AV_CPU_FLAG_RVV_I32
Vectors of 8/16/32-bit int's *‍/.
Definition: cpu.h:92
vp9dsp.h
ff_dc_top_16x16_rvv
void ff_dc_top_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_tm_8x8_rvv
void ff_tm_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_dc_left_8x8_rvv
void ff_dc_left_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_vp9dsp_init_riscv
av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
Definition: vp9dsp_init.c:143
TX_32X32
@ TX_32X32
Definition: vp9.h:31
TOP_DC_PRED
@ TOP_DC_PRED
Definition: vp9.h:57
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:482
AV_CPU_FLAG_RVV_I64
#define AV_CPU_FLAG_RVV_I64
Vectors of 64-bit int's *‍/.
Definition: cpu.h:94
ff_dc_128_32x32_rvv
void ff_dc_128_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)
ff_tm_4x4_rvv
void ff_tm_4x4_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l, const uint8_t *a)