#include <assert.h>
#include <limits.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "libavutil/dynarray.h"
#include "rasm.c"
#include "rasm_print.c"
#include "ops_impl.c"
#include "ops_entries.c"

Data Structures
struct	SwsAArch64Context

Macros
#define	AVUTIL_AVASSERT_H
	This file is compiled as a standalone build-time tool and must not depend on internal FFmpeg libraries. More...

#define	AVUTIL_LOG_H

#define	AVUTIL_MACROS_H

#define	AVUTIL_MEM_H

#define	av_assert0(cond) assert(cond)

#define	av_malloc(s) malloc(s)

#define	av_mallocz(s) calloc(1, s)

#define	av_realloc(p, s) realloc(p, s)

#define	av_strdup(s) strdup(s)

#define	av_free(p) free(p)

#define	FFMAX(a, b) ((a) > (b) ? (a) : (b))

#define	FFMIN(a, b) ((a) > (b) ? (b) : (a))

#define	LOOP_VH(s, mask, idx) if (s->use_vh) LOOP(mask, idx)

#define	LOOP_MASK_VH(s, p, idx) if (s->use_vh) LOOP_MASK(p, idx)

#define	LOOP_MASK_BWD_VH(s, p, idx) if (s->use_vh) LOOP_MASK_BWD(p, idx)

#define	CMT(comment) rasm_annotate(r, comment)

#define	CMTF(fmt, ...) rasm_annotatef(r, (char[128]){0}, 128, fmt, __VA_ARGS__)

#define	MAX_SAVED_REGS 12

#define	SWIZZLE_TMP 0xf

#define	PRINT_SWIZZLE_V(n, vh) print_swizzle_v((char[8]){ 0 }, n, vh)

Functions
static void	av_freep (void *ptr)

static void *	av_dynarray2_add (void *tab_ptr, int nb_ptr, size_t elem_size, const uint8_t *elem_data)

static size_t	aarch64_pixel_size (SwsAArch64PixelType fmt)

static void	impl_func_name (char *buf, size_t size, const SwsAArch64OpImplParams *params)

void	aarch64_op_impl_func_name (char buf, size_t size, const SwsAArch64OpImplParams params)

static void	reshape_all_vectors (SwsAArch64Context *s, int el_count, int el_size)

static unsigned	clobbered_frame_size (unsigned n)

static void	asmgen_prologue (SwsAArch64Context s, const RasmOp regs, unsigned n)

static void	asmgen_epilogue (SwsAArch64Context s, const RasmOp regs, unsigned n)

static void	clobber_gpr (RasmOp regs[MAX_SAVED_REGS], unsigned *count, RasmOp gpr)

static unsigned	clobbered_gprs (const SwsAArch64Context *s, SwsAArch64OpMask mask, RasmOp regs[MAX_SAVED_REGS])

static void	asmgen_process (SwsAArch64Context *s, SwsAArch64OpMask mask)

static void	asmgen_set_load_cont_node (SwsAArch64Context *s)
	Set node where the continuation address will be loaded and impl will be incremented. More...

static void	asmgen_op_read_bit (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_read_nibble (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_read_packed_n (SwsAArch64Context s, const SwsAArch64OpImplParams p, RasmOp *vx)

static void	asmgen_op_read_packed (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_read_planar (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_write_bit (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_write_nibble (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_write_packed_n (SwsAArch64Context s, const SwsAArch64OpImplParams p, RasmOp *vx)

static void	asmgen_op_write_packed (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_write_planar (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_swap_bytes (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static const char *	print_swizzle_v (char buf[8], uint8_t n, uint8_t vh)

static RasmOp	swizzle_a64op (SwsAArch64Context *s, uint8_t n, uint8_t vh)

static void	swizzle_emit (SwsAArch64Context *s, uint8_t dst, uint8_t src)

static void	asmgen_op_swizzle (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_unpack (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_pack (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_lshift (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_rshift (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_clear (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_convert (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_expand (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_min (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_max (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_scale (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	linear_pass (SwsAArch64Context s, const SwsAArch64OpImplParams p, RasmOp vt, RasmOp vc, int save_mask, bool vh_pass)
	Performs one pass of the linear transform over a single vector bank (low or high). More...

static void	asmgen_op_linear (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_dither (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	asmgen_op_cps (SwsAArch64Context s, const SwsAArch64OpImplParams p)

static void	aarch64_op_impl_lookup_str (char buf, size_t size, const SwsAArch64OpImplParams params, const SwsAArch64OpImplParams prev, const char p_str)

static int	lookup_gen (void)

static int	asmgen (void)

int	main (int argc, char *argv[])

Variables
static const SwsAArch64OpImplParams	impl_params []
	Implementation parameters for all exported functions. More...

Macro Definition Documentation

◆ AVUTIL_AVASSERT_H

#define AVUTIL_AVASSERT_H

This file is compiled as a standalone build-time tool and must not depend on internal FFmpeg libraries.

The necessary utils are redefined below using standard C equivalents.

Definition at line 39 of file ops_asmgen.c.

◆ AVUTIL_LOG_H

#define AVUTIL_LOG_H

Definition at line 40 of file ops_asmgen.c.

◆ AVUTIL_MACROS_H

#define AVUTIL_MACROS_H

Definition at line 41 of file ops_asmgen.c.

◆ AVUTIL_MEM_H

#define AVUTIL_MEM_H

Definition at line 42 of file ops_asmgen.c.

◆ av_assert0

#define av_assert0 ( cond ) assert(cond)

Definition at line 43 of file ops_asmgen.c.

◆ av_malloc

#define av_malloc ( s ) malloc(s)

Examples: avio_read_callback.c, hw_decode.c, and qsv_transcode.c.

Definition at line 44 of file ops_asmgen.c.

◆ av_mallocz

#define av_mallocz ( s ) calloc(1, s)

Definition at line 45 of file ops_asmgen.c.

◆ av_realloc

#define av_realloc	(	p,
		s
	)	realloc(p, s)

Definition at line 46 of file ops_asmgen.c.

◆ av_strdup

#define av_strdup ( s ) strdup(s)

Examples: decode_filter_audio.c, decode_filter_video.c, transcode.c, and transcode_aac.c.

Definition at line 47 of file ops_asmgen.c.

◆ av_free

#define av_free ( p ) free(p)

Definition at line 48 of file ops_asmgen.c.

◆ FFMAX

#define FFMAX	(	a,
		b
	)	((a) > (b) ? (a) : (b))

Definition at line 49 of file ops_asmgen.c.

◆ FFMIN

#define FFMIN	(	a,
		b
	)	((a) > (b) ? (b) : (a))

Definition at line 50 of file ops_asmgen.c.

◆ LOOP_VH

#define LOOP_VH	(	s,
		mask,
		idx
	)	if (s->use_vh) LOOP(mask, idx)

Definition at line 179 of file ops_asmgen.c.

◆ LOOP_MASK_VH

#define LOOP_MASK_VH	(	s,
		p,
		idx
	)	if (s->use_vh) LOOP_MASK(p, idx)

Definition at line 180 of file ops_asmgen.c.

◆ LOOP_MASK_BWD_VH

#define LOOP_MASK_BWD_VH	(	s,
		p,
		idx
	)	if (s->use_vh) LOOP_MASK_BWD(p, idx)

Definition at line 181 of file ops_asmgen.c.

◆ CMT

#define CMT ( comment ) rasm_annotate(r, comment)

Definition at line 184 of file ops_asmgen.c.

◆ CMTF

#define CMTF	(	fmt,
		...
	)	rasm_annotatef(r, (char[128]){0}, 128, fmt, __VA_ARGS__)

Definition at line 185 of file ops_asmgen.c.

◆ MAX_SAVED_REGS

#define MAX_SAVED_REGS 12

Definition at line 264 of file ops_asmgen.c.

◆ SWIZZLE_TMP

#define SWIZZLE_TMP 0xf

Definition at line 628 of file ops_asmgen.c.

◆ PRINT_SWIZZLE_V

#define PRINT_SWIZZLE_V	(	n,
		vh
	)	print_swizzle_v((char[8]){ 0 }, n, vh)

Definition at line 638 of file ops_asmgen.c.

Function Documentation

◆ av_freep()

static void av_freep ( void * ptr )

static

Definition at line 52 of file ops_asmgen.c.

Referenced by av_dynarray2_add().

◆ av_dynarray2_add()

static void* av_dynarray2_add	(	void **	tab_ptr,
		int *	nb_ptr,
		size_t	elem_size,
		const uint8_t *	elem_data
	)

static

Definition at line 65 of file ops_asmgen.c.

◆ aarch64_pixel_size()

static size_t aarch64_pixel_size ( SwsAArch64PixelType fmt )

static

Definition at line 99 of file ops_asmgen.c.

Referenced by asmgen_op_convert(), asmgen_op_cps(), asmgen_op_expand(), and asmgen_op_swap_bytes().

◆ impl_func_name()

static void impl_func_name	(	char **	buf,
		size_t *	size,
		const SwsAArch64OpImplParams *	params
	)

static

Definition at line 113 of file ops_asmgen.c.

Referenced by aarch64_op_impl_func_name(), and aarch64_op_impl_lookup_str().

◆ aarch64_op_impl_func_name()

void aarch64_op_impl_func_name	(	char *	buf,
		size_t	size,
		const SwsAArch64OpImplParams *	params
	)

Definition at line 125 of file ops_asmgen.c.

Referenced by asmgen_op_cps(), and lookup_gen().

◆ reshape_all_vectors()

static void reshape_all_vectors	(	SwsAArch64Context *	s,
		int	el_count,
		int	el_size
	)

static

Definition at line 188 of file ops_asmgen.c.

Referenced by asmgen_op_cps(), asmgen_op_expand(), asmgen_op_pack(), and asmgen_op_unpack().

◆ clobbered_frame_size()

static unsigned clobbered_frame_size ( unsigned n )

static

Definition at line 215 of file ops_asmgen.c.

Referenced by asmgen_epilogue(), and asmgen_prologue().

◆ asmgen_prologue()

static void asmgen_prologue	(	SwsAArch64Context *	s,
		const RasmOp *	regs,
		unsigned	n
	)

static

Definition at line 220 of file ops_asmgen.c.

Referenced by asmgen_process().

◆ asmgen_epilogue()

static void asmgen_epilogue	(	SwsAArch64Context *	s,
		const RasmOp *	regs,
		unsigned	n
	)

static

Definition at line 241 of file ops_asmgen.c.

Referenced by asmgen_process().

◆ clobber_gpr()

static void clobber_gpr	(	RasmOp	regs[MAX_SAVED_REGS],
		unsigned *	count,
		RasmOp	gpr
	)

static

Definition at line 266 of file ops_asmgen.c.

Referenced by clobbered_gprs().

◆ clobbered_gprs()

static unsigned clobbered_gprs	(	const SwsAArch64Context *	s,
		SwsAArch64OpMask	mask,
		RasmOp	regs[MAX_SAVED_REGS]
	)

static

Definition at line 274 of file ops_asmgen.c.

Referenced by asmgen_process().

◆ asmgen_process()

static void asmgen_process	(	SwsAArch64Context *	s,
		SwsAArch64OpMask	mask
	)

static

The process function for aarch64 works similarly to the x86 backend. The description in x86/ops_include.asm mostly holds as well here.

Definition at line 289 of file ops_asmgen.c.

Referenced by asmgen().

◆ asmgen_set_load_cont_node()

static void asmgen_set_load_cont_node ( SwsAArch64Context * s )

static

Set node where the continuation address will be loaded and impl will be incremented.

This should be done right after impl->priv has been used.

Definition at line 376 of file ops_asmgen.c.

Referenced by asmgen_op_clear(), asmgen_op_cps(), asmgen_op_dither(), asmgen_op_linear(), asmgen_op_max(), asmgen_op_min(), asmgen_op_read_bit(), asmgen_op_scale(), and asmgen_op_write_bit().

◆ asmgen_op_read_bit()

static void asmgen_op_read_bit	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 389 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_read_nibble()

static void asmgen_op_read_nibble	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 426 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_read_packed_n()

static void asmgen_op_read_packed_n	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p,
		RasmOp *	vx
	)

static

Definition at line 452 of file ops_asmgen.c.

Referenced by asmgen_op_read_packed().

◆ asmgen_op_read_packed()

static void asmgen_op_read_packed	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 463 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_read_planar()

static void asmgen_op_read_planar	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 471 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_bit()

static void asmgen_op_write_bit	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 499 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_nibble()

static void asmgen_op_write_nibble	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 530 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_packed_n()

static void asmgen_op_write_packed_n	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p,
		RasmOp *	vx
	)

static

Definition at line 557 of file ops_asmgen.c.

Referenced by asmgen_op_write_packed().

◆ asmgen_op_write_packed()

static void asmgen_op_write_packed	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 568 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_write_planar()

static void asmgen_op_write_planar	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 576 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_swap_bytes()

static void asmgen_op_swap_bytes	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 601 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ print_swizzle_v()

static const char* print_swizzle_v	(	char	buf[8],
		uint8_t	n,
		uint8_t	vh
	)

static

Definition at line 630 of file ops_asmgen.c.

◆ swizzle_a64op()

static RasmOp swizzle_a64op	(	SwsAArch64Context *	s,
		uint8_t	n,
		uint8_t	vh
	)

static

Definition at line 640 of file ops_asmgen.c.

Referenced by swizzle_emit().

◆ swizzle_emit()

static void swizzle_emit	(	SwsAArch64Context *	s,
		uint8_t	dst,
		uint8_t	src
	)

static

Definition at line 647 of file ops_asmgen.c.

Referenced by asmgen_op_swizzle().

◆ asmgen_op_swizzle()

static void asmgen_op_swizzle	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 659 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_unpack()

static void asmgen_op_unpack	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

All-one values in movi only work up to 8-bit, and then at full 16- or 32-bit, but not for intermediate values like 10-bit. In those cases, we use mov + dup instead.

Definition at line 711 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_pack()

static void asmgen_op_pack	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 783 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_lshift()

static void asmgen_op_lshift	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 821 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_rshift()

static void asmgen_op_rshift	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 835 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_clear()

static void asmgen_op_clear	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

TODO

pack elements in impl->priv and perform smaller loads
if only 1 element and not vh, load directly with ld1r

Definition at line 849 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_convert()

static void asmgen_op_convert	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Since each instruction in the convert operation needs specific element types, it is simpler to use arrangement specifiers for each operand instead of reshaping all vectors.

This function assumes block_size is either 8 or 16, and that we're always using the most amount of vector registers possible. Therefore, u32 always uses the high vector bank.

Definition at line 873 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_expand()

static void asmgen_op_expand	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 951 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_min()

static void asmgen_op_min	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 983 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_max()

static void asmgen_op_max	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 1008 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_scale()

static void asmgen_op_scale	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 1033 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ linear_pass()

static void linear_pass	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p,
		RasmOp *	vt,
		RasmOp *	vc,
		int	save_mask,
		bool	vh_pass
	)

static

Performs one pass of the linear transform over a single vector bank (low or high).

The intermediate registers for fmul+fadd (for when SWS_BITEXACT is set) start from temp vector 4.

Save rows that need to be used as input after they have been already written to.

The non-zero coefficients have been packed in aarch64_setup_linear() in sequential order into the individual lanes of the coefficient vector registers. We must follow the same order of execution here.

Split the multiply-accumulate into fmul+fadd. All multiplications are performed first into temporary registers, and only then added to the destination, to reduce the dependency chain. There is no need to perform multiplications by 1.

Most modern aarch64 cores have a fastpath for sequences of fmla instructions. This means that even if the coefficient is 1, it is still faster to use fmla by 1 instead of fadd.

Definition at line 1062 of file ops_asmgen.c.

Referenced by asmgen_op_linear().

◆ asmgen_op_linear()

static void asmgen_op_linear	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Definition at line 1148 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_dither()

static void asmgen_op_dither	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

For a description of the matrix buffer layout, read the comments in aarch64_setup_dither() in aarch64/ops.c.

Sort components by y_offset value so that we can start dithering with the smallest value, and increment the pointer upwards for each new offset. The dither matrix is over-allocated and may be over-read at the top, but it cannot be over-read before the start of the buffer. Since we only mask the y offset once, this would be an issue if we tried to subtract a value larger than the initial y_offset.

We use ubfiz to mask and shift left in one single instruction: ubfiz <Wd>, <Wn>, #<lsb>, #<width> Wd = (Wn & ((1 << width) - 1)) << lsb;

Given: block_size = 8, log2(block_size) = 3 dither_size = 16, log2(dither_size) = 4, dither_mask = 0b1111 sizeof(float) = 4, log2(sizeof(float)) = 2

Suppose we have bx = 0bvvvv. To get x, we left shift by log2(block_size) and end up with 0bvvvv000. Then we mask against dither_mask, and end up with 0bv000. Finally we multiply by sizeof(float), which is the same as shifting left by log2(sizeof(float)). The result is 0bv00000.

Therefore: width = log2(dither_size) - log2(block_size) lsb = log2(block_size) + log2(sizeof(float))

The ubfiz instruction for the y offset performs masking by the dither matrix size and shifts by the stride.

On subsequent runs, just increment the pointer. The matrix is over-allocated, so we don't risk overreading.

Definition at line 1193 of file ops_asmgen.c.

Referenced by asmgen_op_cps().

◆ asmgen_op_cps()

static void asmgen_op_cps	(	SwsAArch64Context *	s,
		const SwsAArch64OpImplParams *	p
	)

static

Set up vector register dimensions and reshape all vectors accordingly.

Definition at line 1315 of file ops_asmgen.c.

Referenced by asmgen().

◆ aarch64_op_impl_lookup_str()

static void aarch64_op_impl_lookup_str	(	char *	buf,
		size_t	size,
		const SwsAArch64OpImplParams *	params,
		const SwsAArch64OpImplParams *	prev,
		const char *	p_str
	)

static

Definition at line 1402 of file ops_asmgen.c.

Referenced by lookup_gen().

◆ lookup_gen()

static int lookup_gen ( void )

static

The lookup function matches the SwsAArch64OpImplParams from ops_entries.c to the exported functions generated by asmgen_op(). Each call to aarch64_op_impl_lookup_str() generates a code fragment to uniquely detect the current function, opening and/or closing conditions depending on the parameters of the previous function.

Definition at line 1468 of file ops_asmgen.c.

Referenced by main().

◆ asmgen()

static int asmgen ( void )

static

The entry point of the SwsOpFunc is the process function. The first kernel function is called from process, and subsequent kernel functions are chained by directly branching to the next operation, using a continuation-passing style design. The last operation must be a write operation, which returns from the call to the process function.

The GPRs used by the entire call-chain are listed below.

Function arguments are passed in r0-r5. After the parameters from exec have been read, r0 is reused to branch to the continuation functions. After the original parameters from impl have been computed, r1 is reused as the impl pointer for each operation.

Loop iterators are r6 for bx and r3 for y, reused from y_start, which doesn't need to be preserved.

The intra-procedure-call temporary registers (r16 and r17) are used as scratch registers. They may be used by call veneers and PLT code inserted by the linker, so we cannot expect them to persist across branches between functions.

The Platform Register (r18) is not used.

The read/write data pointers and padding values first use up the remaining free caller-saved registers, and only then are the caller-saved registers (r19-r28) used.

The Link Register (r30) is used when calling the first kernel, so it must be saved.

Definition at line 1510 of file ops_asmgen.c.

Referenced by main().

◆ main()

int main	(	int	argc,
		char *	argv[]
	)

Definition at line 1619 of file ops_asmgen.c.

Variable Documentation

◆ impl_params

const SwsAArch64OpImplParams impl_params[]

static

Initial value:

= {
    { .op = AARCH64_SWS_OP_NONE }
}

Implementation parameters for all exported functions.

This list is compiled by performing a dummy run of all conversions in sws_ops and collecting all functions that need to be generated. This is achieved by running: make fate-sws-ops-entries-aarch64 GEN=1

Definition at line 93 of file ops_asmgen.c.

Referenced by asmgen(), and lookup_gen().

Data Structures

Macros

Functions

Variables

Macro Definition Documentation

◆ AVUTIL_AVASSERT_H

◆ AVUTIL_LOG_H

◆ AVUTIL_MACROS_H

◆ AVUTIL_MEM_H

◆ av_assert0

◆ av_malloc

◆ av_mallocz

◆ av_realloc

◆ av_strdup

◆ av_free

◆ FFMAX

◆ FFMIN

◆ LOOP_VH

◆ LOOP_MASK_VH

◆ LOOP_MASK_BWD_VH

◆ CMT

◆ CMTF

◆ MAX_SAVED_REGS

◆ SWIZZLE_TMP

◆ PRINT_SWIZZLE_V

Function Documentation

◆ av_freep()

◆ av_dynarray2_add()

◆ aarch64_pixel_size()

◆ impl_func_name()

◆ aarch64_op_impl_func_name()

◆ reshape_all_vectors()

◆ clobbered_frame_size()

◆ asmgen_prologue()

◆ asmgen_epilogue()

◆ clobber_gpr()

◆ clobbered_gprs()

◆ asmgen_process()

◆ asmgen_set_load_cont_node()

◆ asmgen_op_read_bit()

◆ asmgen_op_read_nibble()

◆ asmgen_op_read_packed_n()

◆ asmgen_op_read_packed()

◆ asmgen_op_read_planar()

◆ asmgen_op_write_bit()

◆ asmgen_op_write_nibble()

◆ asmgen_op_write_packed_n()

◆ asmgen_op_write_packed()

◆ asmgen_op_write_planar()

◆ asmgen_op_swap_bytes()

◆ print_swizzle_v()

◆ swizzle_a64op()

◆ swizzle_emit()

◆ asmgen_op_swizzle()

◆ asmgen_op_unpack()

◆ asmgen_op_pack()

◆ asmgen_op_lshift()

◆ asmgen_op_rshift()

◆ asmgen_op_clear()

◆ asmgen_op_convert()

◆ asmgen_op_expand()

◆ asmgen_op_min()

◆ asmgen_op_max()

◆ asmgen_op_scale()

◆ linear_pass()

◆ asmgen_op_linear()

◆ asmgen_op_dither()

◆ asmgen_op_cps()

◆ aarch64_op_impl_lookup_str()

◆ lookup_gen()

◆ asmgen()

◆ main()

Variable Documentation

◆ impl_params