FFmpeg
Data Structures | Macros | Functions | Variables
ops_asmgen.c File Reference
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libavutil/dynarray.h"
#include "rasm.c"
#include "rasm_print.c"
#include "ops_impl.c"
#include "ops_entries.c"

Go to the source code of this file.

Data Structures

struct  SwsAArch64Context
 

Macros

#define AVUTIL_AVASSERT_H
 This file is compiled as a standalone build-time tool and must not depend on internal FFmpeg libraries. More...
 
#define AVUTIL_LOG_H
 
#define AVUTIL_MACROS_H
 
#define AVUTIL_MEM_H
 
#define av_assert0(cond)   assert(cond)
 
#define av_malloc(s)   malloc(s)
 
#define av_mallocz(s)   calloc(1, s)
 
#define av_realloc(p, s)   realloc(p, s)
 
#define av_strdup(s)   strdup(s)
 
#define av_free(p)   free(p)
 
#define FFMAX(a, b)   ((a) > (b) ? (a) : (b))
 
#define FFMIN(a, b)   ((a) > (b) ? (b) : (a))
 
#define LOOP_VH(s, mask, idx)   if (s->use_vh) LOOP(mask, idx)
 
#define LOOP_MASK_VH(s, p, idx)   if (s->use_vh) LOOP_MASK(p, idx)
 
#define LOOP_MASK_BWD_VH(s, p, idx)   if (s->use_vh) LOOP_MASK_BWD(p, idx)
 
#define CMT(comment)   rasm_annotate(r, comment)
 
#define CMTF(fmt, ...)   rasm_annotatef(r, (char[128]){0}, 128, fmt, __VA_ARGS__)
 
#define MAX_SAVED_REGS   10
 
#define SWIZZLE_TMP   0xf
 
#define PRINT_SWIZZLE_V(n, vh)   print_swizzle_v((char[8]){ 0 }, n, vh)
 

Functions

static void av_freep (void *ptr)
 
static void * av_dynarray2_add (void **tab_ptr, int *nb_ptr, size_t elem_size, const uint8_t *elem_data)
 
static size_t aarch64_pixel_size (SwsAArch64PixelType fmt)
 
static void impl_func_name (char **buf, size_t *size, const SwsAArch64OpImplParams *params)
 
void aarch64_op_impl_func_name (char *buf, size_t size, const SwsAArch64OpImplParams *params)
 
static void reshape_all_vectors (SwsAArch64Context *s, int el_count, int el_size)
 
static unsigned clobbered_frame_size (unsigned n)
 
static void asmgen_prologue (SwsAArch64Context *s, const RasmOp *regs, unsigned n)
 
static void asmgen_epilogue (SwsAArch64Context *s, const RasmOp *regs, unsigned n)
 
static void clobber_gpr (RasmOp regs[MAX_SAVED_REGS], unsigned *count, RasmOp gpr)
 
static unsigned clobbered_gprs (const SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp regs[MAX_SAVED_REGS])
 
static void asmgen_process (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_process_return (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_read_bit (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_read_nibble (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_read_packed_1 (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_read_packed_n (SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx)
 
static void asmgen_op_read_packed (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_read_planar (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_write_bit (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_write_nibble (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_write_packed_1 (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_write_packed_n (SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx)
 
static void asmgen_op_write_packed (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_write_planar (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_swap_bytes (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static const char * print_swizzle_v (char buf[8], uint8_t n, uint8_t vh)
 
static RasmOp swizzle_a64op (SwsAArch64Context *s, uint8_t n, uint8_t vh)
 
static void swizzle_emit (SwsAArch64Context *s, uint8_t dst, uint8_t src)
 
static void asmgen_op_swizzle (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_unpack (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_pack (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_lshift (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_rshift (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_clear (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_convert (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_expand (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_min (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_max (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_scale (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void linear_pass (SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vt, RasmOp *vc, int save_mask, bool vh_pass)
 Performs one pass of the linear transform over a single vector bank (low or high). More...
 
static void asmgen_op_linear (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_dither (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op_cps (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void asmgen_op (SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
 
static void aarch64_op_impl_lookup_str (char *buf, size_t size, const SwsAArch64OpImplParams *params, const SwsAArch64OpImplParams *prev, const char *p_str)
 
static int lookup_gen (void)
 
static int asmgen (void)
 
int main (int argc, char *argv[])
 

Variables

static const SwsAArch64OpImplParams impl_params []
 Implementation parameters for all exported functions. More...
 

Macro Definition Documentation

◆ AVUTIL_AVASSERT_H

#define AVUTIL_AVASSERT_H

This file is compiled as a standalone build-time tool and must not depend on internal FFmpeg libraries.

The necessary utils are redefined below using standard C equivalents.

Definition at line 39 of file ops_asmgen.c.

◆ AVUTIL_LOG_H

#define AVUTIL_LOG_H

Definition at line 40 of file ops_asmgen.c.

◆ AVUTIL_MACROS_H

#define AVUTIL_MACROS_H

Definition at line 41 of file ops_asmgen.c.

◆ AVUTIL_MEM_H

#define AVUTIL_MEM_H

Definition at line 42 of file ops_asmgen.c.

◆ av_assert0

#define av_assert0 (   cond)    assert(cond)

Definition at line 43 of file ops_asmgen.c.

◆ av_malloc

#define av_malloc (   s)    malloc(s)
Examples
avio_read_callback.c, hw_decode.c, and qsv_transcode.c.

Definition at line 44 of file ops_asmgen.c.

◆ av_mallocz

#define av_mallocz (   s)    calloc(1, s)

Definition at line 45 of file ops_asmgen.c.

◆ av_realloc

#define av_realloc (   p,
  s 
)    realloc(p, s)

Definition at line 46 of file ops_asmgen.c.

◆ av_strdup

#define av_strdup (   s)    strdup(s)

◆ av_free

#define av_free (   p)    free(p)

Definition at line 48 of file ops_asmgen.c.

◆ FFMAX

#define FFMAX (   a,
  b 
)    ((a) > (b) ? (a) : (b))

Definition at line 49 of file ops_asmgen.c.

◆ FFMIN

#define FFMIN (   a,
  b 
)    ((a) > (b) ? (b) : (a))

Definition at line 50 of file ops_asmgen.c.

◆ LOOP_VH

#define LOOP_VH (   s,
  mask,
  idx 
)    if (s->use_vh) LOOP(mask, idx)

Definition at line 178 of file ops_asmgen.c.

◆ LOOP_MASK_VH

#define LOOP_MASK_VH (   s,
  p,
  idx 
)    if (s->use_vh) LOOP_MASK(p, idx)

Definition at line 179 of file ops_asmgen.c.

◆ LOOP_MASK_BWD_VH

#define LOOP_MASK_BWD_VH (   s,
  p,
  idx 
)    if (s->use_vh) LOOP_MASK_BWD(p, idx)

Definition at line 180 of file ops_asmgen.c.

◆ CMT

#define CMT (   comment)    rasm_annotate(r, comment)

Definition at line 183 of file ops_asmgen.c.

◆ CMTF

#define CMTF (   fmt,
  ... 
)    rasm_annotatef(r, (char[128]){0}, 128, fmt, __VA_ARGS__)

Definition at line 184 of file ops_asmgen.c.

◆ MAX_SAVED_REGS

#define MAX_SAVED_REGS   10

Definition at line 263 of file ops_asmgen.c.

◆ SWIZZLE_TMP

#define SWIZZLE_TMP   0xf

Definition at line 664 of file ops_asmgen.c.

◆ PRINT_SWIZZLE_V

#define PRINT_SWIZZLE_V (   n,
  vh 
)    print_swizzle_v((char[8]){ 0 }, n, vh)

Definition at line 674 of file ops_asmgen.c.

Function Documentation

◆ av_freep()

static void av_freep ( void *  ptr)
static

Definition at line 52 of file ops_asmgen.c.

Referenced by av_dynarray2_add().

◆ av_dynarray2_add()

static void* av_dynarray2_add ( void **  tab_ptr,
int *  nb_ptr,
size_t  elem_size,
const uint8_t *  elem_data 
)
static

Definition at line 65 of file ops_asmgen.c.

◆ aarch64_pixel_size()

static size_t aarch64_pixel_size ( SwsAArch64PixelType  fmt)
static

◆ impl_func_name()

static void impl_func_name ( char **  buf,
size_t *  size,
const SwsAArch64OpImplParams params 
)
static

Definition at line 113 of file ops_asmgen.c.

Referenced by aarch64_op_impl_func_name(), and aarch64_op_impl_lookup_str().

◆ aarch64_op_impl_func_name()

void aarch64_op_impl_func_name ( char *  buf,
size_t  size,
const SwsAArch64OpImplParams params 
)

Definition at line 125 of file ops_asmgen.c.

Referenced by asmgen_op_cps(), asmgen_process(), asmgen_process_return(), and lookup_gen().

◆ reshape_all_vectors()

static void reshape_all_vectors ( SwsAArch64Context s,
int  el_count,
int  el_size 
)
static

Definition at line 187 of file ops_asmgen.c.

Referenced by asmgen_op_cps(), asmgen_op_expand(), asmgen_op_pack(), and asmgen_op_unpack().

◆ clobbered_frame_size()

static unsigned clobbered_frame_size ( unsigned  n)
static

Definition at line 214 of file ops_asmgen.c.

Referenced by asmgen_epilogue(), and asmgen_prologue().

◆ asmgen_prologue()

static void asmgen_prologue ( SwsAArch64Context s,
const RasmOp regs,
unsigned  n 
)
static

Definition at line 219 of file ops_asmgen.c.

Referenced by asmgen_process().

◆ asmgen_epilogue()

static void asmgen_epilogue ( SwsAArch64Context s,
const RasmOp regs,
unsigned  n 
)
static

Definition at line 240 of file ops_asmgen.c.

Referenced by asmgen_process_return().

◆ clobber_gpr()

static void clobber_gpr ( RasmOp  regs[MAX_SAVED_REGS],
unsigned *  count,
RasmOp  gpr 
)
static

Definition at line 265 of file ops_asmgen.c.

Referenced by clobbered_gprs().

◆ clobbered_gprs()

static unsigned clobbered_gprs ( const SwsAArch64Context s,
const SwsAArch64OpImplParams p,
RasmOp  regs[MAX_SAVED_REGS] 
)
static

Definition at line 273 of file ops_asmgen.c.

Referenced by asmgen_process(), and asmgen_process_return().

◆ asmgen_process()

static void asmgen_process ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

The process/process_return functions for aarch64 work similarly to the x86 backend. The description in x86/ops_common.asm mostly holds as well here.

Definition at line 287 of file ops_asmgen.c.

Referenced by asmgen_op().

◆ asmgen_process_return()

static void asmgen_process_return ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 337 of file ops_asmgen.c.

Referenced by asmgen_op().

◆ asmgen_op_read_bit()

static void asmgen_op_read_bit ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 387 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_read_nibble()

static void asmgen_op_read_nibble ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 423 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_read_packed_1()

static void asmgen_op_read_packed_1 ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 449 of file ops_asmgen.c.

Referenced by asmgen_op_read_packed().

◆ asmgen_op_read_packed_n()

static void asmgen_op_read_packed_n ( SwsAArch64Context s,
const SwsAArch64OpImplParams p,
RasmOp vx 
)
static

Definition at line 466 of file ops_asmgen.c.

Referenced by asmgen_op_read_packed().

◆ asmgen_op_read_packed()

static void asmgen_op_read_packed ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 477 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_read_planar()

static void asmgen_op_read_planar ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 488 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_bit()

static void asmgen_op_write_bit ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 516 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_nibble()

static void asmgen_op_write_nibble ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 546 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_packed_1()

static void asmgen_op_write_packed_1 ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 573 of file ops_asmgen.c.

Referenced by asmgen_op_write_packed().

◆ asmgen_op_write_packed_n()

static void asmgen_op_write_packed_n ( SwsAArch64Context s,
const SwsAArch64OpImplParams p,
RasmOp vx 
)
static

Definition at line 590 of file ops_asmgen.c.

Referenced by asmgen_op_write_packed().

◆ asmgen_op_write_packed()

static void asmgen_op_write_packed ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 601 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_planar()

static void asmgen_op_write_planar ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 612 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_swap_bytes()

static void asmgen_op_swap_bytes ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 637 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ print_swizzle_v()

static const char* print_swizzle_v ( char  buf[8],
uint8_t  n,
uint8_t  vh 
)
static

Definition at line 666 of file ops_asmgen.c.

◆ swizzle_a64op()

static RasmOp swizzle_a64op ( SwsAArch64Context s,
uint8_t  n,
uint8_t  vh 
)
static

Definition at line 676 of file ops_asmgen.c.

Referenced by swizzle_emit().

◆ swizzle_emit()

static void swizzle_emit ( SwsAArch64Context s,
uint8_t  dst,
uint8_t  src 
)
static

Definition at line 683 of file ops_asmgen.c.

Referenced by asmgen_op_swizzle().

◆ asmgen_op_swizzle()

static void asmgen_op_swizzle ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 695 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_unpack()

static void asmgen_op_unpack ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

All-one values in movi only work up to 8-bit, and then at full 16- or 32-bit, but not for intermediate values like 10-bit. In those cases, we use mov + dup instead.

Definition at line 747 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_pack()

static void asmgen_op_pack ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 819 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_lshift()

static void asmgen_op_lshift ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 857 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_rshift()

static void asmgen_op_rshift ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 871 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_clear()

static void asmgen_op_clear ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

TODO

  • pack elements in impl->priv and perform smaller loads
  • if only 1 element and not vh, load directly with ld1r

Definition at line 885 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_convert()

static void asmgen_op_convert ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Since each instruction in the convert operation needs specific element types, it is simpler to use arrangement specifiers for each operand instead of reshaping all vectors.

This function assumes block_size is either 8 or 16, and that we're always using the most amount of vector registers possible. Therefore, u32 always uses the high vector bank.

Definition at line 908 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_expand()

static void asmgen_op_expand ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 986 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_min()

static void asmgen_op_min ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 1018 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_max()

static void asmgen_op_max ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 1042 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_scale()

static void asmgen_op_scale ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 1066 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ linear_pass()

static void linear_pass ( SwsAArch64Context s,
const SwsAArch64OpImplParams p,
RasmOp vt,
RasmOp vc,
int  save_mask,
bool  vh_pass 
)
static

Performs one pass of the linear transform over a single vector bank (low or high).

The intermediate registers for fmul+fadd (for when SWS_BITEXACT is set) start from temp vector 4.

Save rows that need to be used as input after they have been already written to.

The non-zero coefficients have been packed in aarch64_setup_linear() in sequential order into the individual lanes of the coefficient vector registers. We must follow the same order of execution here.

Split the multiply-accumulate into fmul+fadd. All multiplications are performed first into temporary registers, and only then added to the destination, to reduce the dependency chain. There is no need to perform multiplications by 1.

Most modern aarch64 cores have a fastpath for sequences of fmla instructions. This means that even if the coefficient is 1, it is still faster to use fmla by 1 instead of fadd.

Definition at line 1094 of file ops_asmgen.c.

Referenced by asmgen_op_linear().

◆ asmgen_op_linear()

static void asmgen_op_linear ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 1180 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_dither()

static void asmgen_op_dither ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

For a description of the matrix buffer layout, read the comments in aarch64_setup_dither() in aarch64/ops.c.

Sort components by y_offset value so that we can start dithering with the smallest value, and increment the pointer upwards for each new offset. The dither matrix is over-allocated and may be over-read at the top, but it cannot be over-read before the start of the buffer. Since we only mask the y offset once, this would be an issue if we tried to subtract a value larger than the initial y_offset.

We use ubfiz to mask and shift left in one single instruction: ubfiz <Wd>, <Wn>, #<lsb>, #<width> Wd = (Wn & ((1 << width) - 1)) << lsb;

Given: block_size = 8, log2(block_size) = 3 dither_size = 16, log2(dither_size) = 4, dither_mask = 0b1111 sizeof(float) = 4, log2(sizeof(float)) = 2

Suppose we have bx = 0bvvvv. To get x, we left shift by log2(block_size) and end up with 0bvvvv000. Then we mask against dither_mask, and end up with 0bv000. Finally we multiply by sizeof(float), which is the same as shifting left by log2(sizeof(float)). The result is 0bv00000.

Therefore: width = log2(dither_size) - log2(block_size) lsb = log2(block_size) + log2(sizeof(float))

The ubfiz instruction for the y offset performs masking by the dither matrix size and shifts by the stride.

On subsequent runs, just increment the pointer. The matrix is over-allocated, so we don't risk overreading.

Definition at line 1224 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_cps()

static void asmgen_op_cps ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Set up vector register dimensions and reshape all vectors accordingly.

Definition at line 1345 of file ops_asmgen.c.

Referenced by asmgen_op().

◆ asmgen_op()

static void asmgen_op ( SwsAArch64Context s,
const SwsAArch64OpImplParams p 
)
static

Definition at line 1403 of file ops_asmgen.c.

Referenced by asmgen().

◆ aarch64_op_impl_lookup_str()

static void aarch64_op_impl_lookup_str ( char *  buf,
size_t  size,
const SwsAArch64OpImplParams params,
const SwsAArch64OpImplParams prev,
const char *  p_str 
)
static

Definition at line 1419 of file ops_asmgen.c.

Referenced by lookup_gen().

◆ lookup_gen()

static int lookup_gen ( void  )
static

The lookup function matches the SwsAArch64OpImplParams from ops_entries.c to the exported functions generated by asmgen_op(). Each call to aarch64_op_impl_lookup_str() generates a code fragment to uniquely detect the current function, opening and/or closing conditions depending on the parameters of the previous function.

Definition at line 1485 of file ops_asmgen.c.

Referenced by main().

◆ asmgen()

static int asmgen ( void  )
static

The entry point of the SwsOpFunc is the process function. The kernel functions are chained by directly branching to the next operation, using a continuation-passing style design. The exit point of the SwsOpFunc is the process_return function.

The GPRs used by the entire call-chain are listed below.

Function arguments are passed in r0-r5. After the parameters from exec have been read, r0 is reused to branch to the continuation functions. After the original parameters from impl have been computed, r1 is reused as the impl pointer for each operation.

Loop iterators are r6 for bx and r3 for y, reused from y_start, which doesn't need to be preserved.

The intra-procedure-call temporary registers (r16 and r17) are used as scratch registers. They may be used by call veneers and PLT code inserted by the linker, so we cannot expect them to persist across branches between functions.

The Platform Register (r18) is not used.

The read/write data pointers and padding values first use up the remaining free caller-saved registers, and only then are the caller-saved registers (r19-r28) used.

Definition at line 1527 of file ops_asmgen.c.

Referenced by main().

◆ main()

int main ( int  argc,
char *  argv[] 
)

Definition at line 1625 of file ops_asmgen.c.

Variable Documentation

◆ impl_params

const SwsAArch64OpImplParams impl_params[]
static
Initial value:
= {
}

Implementation parameters for all exported functions.

This list is compiled by performing a dummy run of all conversions in sws_ops and collecting all functions that need to be generated. This is achieved by running: make sws_ops_entries_aarch64

Definition at line 93 of file ops_asmgen.c.

Referenced by asmgen(), and lookup_gen().

AARCH64_SWS_OP_NONE
@ AARCH64_SWS_OP_NONE
Definition: ops_impl.h:39