Go to the documentation of this file.
39 #define AVUTIL_AVASSERT_H
41 #define AVUTIL_MACROS_H
43 #define av_assert0(cond) assert(cond)
44 #define av_malloc(s) malloc(s)
45 #define av_mallocz(s) calloc(1, s)
46 #define av_realloc(p, s) realloc(p, s)
47 #define av_strdup(s) strdup(s)
48 #define av_free(p) free(p)
49 #define FFMAX(a,b) ((a) > (b) ? (a) : (b))
50 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
54 void **pptr = (
void **) ptr;
66 const uint8_t *elem_data)
68 uint8_t *tab_elem_data =
NULL;
71 tab_elem_data = (uint8_t *)*tab_ptr + (*nb_ptr) * elem_size;
73 memcpy(tab_elem_data, elem_data, elem_size);
115 buf_appendf(buf,
size,
"ff_sws");
119 void *
p = (
void *) (((uintptr_t) params) +
field->offset);
122 buf_appendf(buf,
size,
"_neon");
179 #define LOOP_VH(s, mask, idx) if (s->use_vh) LOOP(mask, idx)
180 #define LOOP_MASK_VH(s, p, idx) if (s->use_vh) LOOP_MASK(p, idx)
181 #define LOOP_MASK_BWD_VH(s, p, idx) if (s->use_vh) LOOP_MASK_BWD(p, idx)
184 #define CMT(comment) rasm_annotate(r, comment)
185 #define CMTF(fmt, ...) rasm_annotatef(r, (char[128]){0}, 128, fmt, __VA_ARGS__)
217 return ((n + 1) >> 1) * 16;
231 i_str(
r, regs[0], sp_pre);
233 i_stp(
r, regs[0], regs[1], sp_pre);
234 for (
unsigned i = 2;
i + 1 < n;
i += 2)
252 i_ldr(
r, regs[0], sp_post);
256 for (
unsigned i = (n & ~1
u) - 2;
i >= 2;
i -= 2)
258 i_ldp(
r, regs[0], regs[1], sp_post);
264 #define MAX_SAVED_REGS 12
270 if (n >= 19 && n <= 30)
271 regs[(*count)++] = gpr;
300 snprintf(func_name,
sizeof(func_name),
"ff_sws_process_%04x_neon",
mask);
346 i_mov(
r,
s->bx,
s->bx_start);
CMT(
"bx = bx_start;");
350 i_mov(
r,
s->impl,
s->op1_impl);
CMT(
"impl = op1_impl;");
355 i_cmp(
r,
s->bx,
s->bx_end);
CMT(
"if (bx != bx_end)");
356 i_bne(
r, next_block);
CMT(
" goto next_block;");
361 i_bne(
r, next_row);
CMT(
" goto next_row;");
408 if (
p->block_size == 16) {
410 i_movi(
r, bitmask_vec,
IMM(1));
CMT(
"v128 bitmask_vec = {1 <repeats 16 times>};");
411 i_dup (
r, vl[0].b8, wtmp);
CMT(
"vl[0].lo = broadcast(tmp);");
413 i_dup (
r, vtmp.
b8, wtmp);
CMT(
"vtmp.lo = broadcast(tmp);");
414 i_ins (
r, vl[0].de[1], vtmp.
de[0]);
CMT(
"vl[0].hi = vtmp.lo;");
415 i_ushl(
r, vl[0].b16, vl[0].b16, shift_vec.
b16);
CMT(
"vl[0] <<= shift_vec;");
416 i_and (
r, vl[0].b16, vl[0].b16, bitmask_vec);
CMT(
"vl[0] &= bitmask_vec;");
419 i_movi(
r, bitmask_vec,
IMM(1));
CMT(
"v128 bitmask_vec = {1 <repeats 8 times>, 0 <repeats 8 times>};");
420 i_dup (
r, vl[0].b8, wtmp);
CMT(
"vl[0].lo = broadcast(tmp);");
421 i_ushl(
r, vl[0].b8, vl[0].b8, shift_vec.
b8);
CMT(
"vl[0] <<= shift_vec;");
422 i_and (
r, vl[0].b8, vl[0].b8, bitmask_vec);
CMT(
"vl[0] &= bitmask_vec;");
436 rasm_annotate_next(
r,
"v128 nibble_mask = {0xf <repeats 8 times>, 0x0 <repeats 8 times>};");
439 if (
p->block_size == 8) {
442 i_and (
r, vl[0].b8, vl[0].b8, nibble_mask);
CMT(
"vl[0].lo &= nibble_mask;");
447 i_and (
r, vl[0].b8, vl[0].b8, nibble_mask);
CMT(
"vl[0].lo &= nibble_mask;");
477 for (
int i = 0;
i < 4;
i++) {
483 switch ((
s->use_vh ? 0x100 : 0) |
s->vec_size) {
516 if (
p->block_size == 8) {
517 i_ushl(
r, vl[0].b8, vl[0].b8, shift_vec.
b8);
CMT(
"vl[0] <<= shift_vec;");
518 i_addv(
r, vtmp0.
b, vl[0].
b8);
CMT(
"vtmp0[0] = add_across(vl[0].lo);");
521 i_ushl(
r, vl[0].b16, vl[0].b16, shift_vec.
b16);
CMT(
"vl[0] <<= shift_vec;");
522 i_addv(
r, vtmp0.
b, vl[0].
b8);
CMT(
"vtmp0[0] = add_across(vl[0].lo);");
523 i_ins (
r, vtmp1.
de[0], vl[0].
de[1]);
CMT(
"vtmp1.lo = vl[0].hi;");
524 i_addv(
r, vtmp1.
b, vtmp1.
b8);
CMT(
"vtmp1[0] = add_across(vtmp1);");
525 i_ins (
r, vtmp0.
be[1], vtmp1.
be[0]);
CMT(
"vtmp0[1] = vtmp1[0];");
537 for (
int i = 0;
i < 4;
i++)
542 if (
p->block_size == 8) {
582 for (
int i = 0;
i < 4;
i++) {
588 switch ((
s->use_vh ? 0x100 : 0) |
s->vec_size) {
607 for (
int i = 0;
i < 4;
i++) {
613 case sizeof(uint16_t):
617 case sizeof(uint32_t):
628 #define SWIZZLE_TMP 0xf
633 snprintf(buf,
sizeof(
char[8]),
"vtmp%c", vh ?
'h' :
'l');
635 snprintf(buf,
sizeof(
char[8]),
"v%c[%u]", vh ?
'h' :
'l', n);
638 #define PRINT_SWIZZLE_V(n, vh) print_swizzle_v((char[8]){ 0 }, n, vh)
644 return vh ?
s->vh[n] :
s->vl[n];
662 uint8_t src_used[4] = { 0 };
663 bool done[4] = {
true,
true,
true,
true };
671 for (
bool progress =
true; progress; ) {
674 if (done[
dst] || src_used[
dst])
691 uint8_t cur_dst =
dst;
695 done[cur_dst] =
true;
701 done[cur_dst] =
true;
718 uint32_t mask_val[4] = { 0 };
719 uint8_t mask_idx[4] = { 0 };
733 for (
int j = 0; j < 4; j++) {
734 if (mask_val[j] ==
val) {
735 mask_val[
i] = mask_val[j];
736 mask_idx[
i] = mask_idx[j];
746 if (
val <= 0xff ||
val == 0xffff) {
750 i_dup (
r, vt[cur_vt], mask_gpr);
753 mask_idx[
i] = cur_vt++;
795 uint16_t offset_mask = 0;
809 i_orr (
r, vl[0], vl[0], vl[
i]);
CMTF(
"vl[0] |= vl[%u];",
i);
811 i_orr(
r, vh[0], vh[0], vh[
i]);
CMTF(
"vh[0] |= vh[%u];",
i);
885 for (
int i = 0;
i < 4;
i++) {
890 size_t src_el_size =
s->el_size;
904 if (
p->block_size == 8) {
905 if (src_el_size == 1 && dst_el_size > src_el_size) {
909 }
else if (src_el_size == 4 && dst_el_size < src_el_size) {
916 if (src_el_size == 2 && dst_el_size == 4) {
921 }
else if (src_el_size == 2 && dst_el_size == 1) {
927 if (src_el_size == 1 && dst_el_size == 2) {
931 }
else if (src_el_size == 2 && dst_el_size == 1) {
957 size_t src_el_size =
s->el_size;
959 size_t dst_total_size =
p->block_size * dst_el_size;
960 size_t dst_vec_size =
FFMIN(dst_total_size, 16);
963 s->use_vh = (dst_vec_size != dst_total_size);
965 if (src_el_size == 1) {
971 if (dst_el_size == 4) {
1064 int save_mask,
bool vh_pass)
1072 RasmOp *vx = vh_pass ?
s->vh :
s->vl;
1073 char cvh = vh_pass ?
'h' :
'l';
1075 if (vh_pass && !
s->use_vh)
1082 RasmOp src_vx[4] = { vx[0], vx[1], vx[2], vx[3] };
1084 for (
int i = 0;
i < 4;
i++) {
1101 for (
int j = 0; j < 5; j++) {
1106 RasmOp vsrc = src_vx[src_j];
1107 uint8_t vc_i = i_coeff / 4;
1108 uint8_t vc_j = i_coeff & 3;
1111 if (
first && is_offset) {
1112 i_dup (
r, vx[
i], vcoeff);
CMTF(
"v%c[%u] = broadcast(vc[%u][%u]);", cvh,
i, vc_i, vc_j);
1113 }
else if (
first && !is_offset) {
1117 i_fmul (
r, vx[
i], vsrc, vcoeff);
CMTF(
"v%c[%u] = vsrc[%u] * vc[%u][%u];", cvh,
i, src_j, vc_i, vc_j);
1119 }
else if (!
p->linear.fmla) {
1129 i_fmul(
r, vtmp[vc_j], vsrc, vcoeff);
CMTF(
"vtmp[%u] = vsrc[%u] * vc[%u][%u];", vc_j, src_j, vc_i, vc_j);
1131 i_fadd(
r, vx[
i], vx[
i], vtmp[vc_j]);
CMTF(
"v%c[%u] += vtmp[%u];", cvh,
i, vc_j);
1133 i_fadd(
r, vx[
i], vx[
i], vsrc);
CMTF(
"v%c[%u] += vsrc[%u];", cvh,
i, vc_j);
1141 i_fmla(
r, vx[
i], vsrc, vcoeff);
CMTF(
"v%c[%u] += vsrc[%u] * vc[%u][%u];", cvh,
i, src_j, vc_i, vc_j);
1159 switch (num_vregs) {
1160 case 1: coeff_veclist =
vv_1(vc[0]);
break;
1161 case 2: coeff_veclist =
vv_2(vc[0], vc[1]);
break;
1162 case 3: coeff_veclist =
vv_3(vc[0], vc[1], vc[2]);
break;
1163 case 4: coeff_veclist =
vv_4(vc[0], vc[1], vc[2], vc[3]);
break;
1170 uint16_t save_mask = 0;
1171 bool overwritten[4] = {
false,
false,
false,
false };
1173 for (
int j = 0; j < 5; j++) {
1178 if (!is_offset && overwritten[src_j])
1180 overwritten[
i] =
true;
1226 for (
int y_off = 0; y_off <= max_offset; y_off++) {
1228 if (
MASK_GET(
p->dither.y_offset,
i) == y_off)
1229 sorted[n_comps++] =
i;
1256 const int block_size_log2 = (
p->block_size == 16) ? 4 : 3;
1257 const int dither_size_log2 =
p->dither.size_log2;
1258 const int sizeof_float_log2 = 2;
1259 if (dither_size_log2 != block_size_log2) {
1260 RasmOp lsb =
IMM(block_size_log2 + sizeof_float_log2);
1262 i_ubfiz(
r, tmp1, bx64, lsb,
width);
CMT(
"tmp1 = (bx & ((dither_size / block_size) - 1)) * block_size * sizeof(float);");
1263 i_add (
r, ptr, ptr, tmp1);
CMT(
"ptr += tmp1;");
1266 int last_y_off = -1;
1268 for (
int sorted_i = 0; sorted_i < n_comps; sorted_i++) {
1269 int i = sorted[sorted_i];
1270 uint8_t y_off =
MASK_GET(
p->dither.y_offset,
i);
1271 bool do_load = (y_off != last_y_off);
1273 if (last_y_off < 0) {
1275 RasmOp lsb =
IMM(dither_size_log2 + sizeof_float_log2);
1282 i_ubfiz(
r, tmp1, y64, lsb,
width);
CMT(
"tmp1 = (y & (dither_size - 1)) * dither_size * sizeof(float);");
1285 i_ubfiz(
r, tmp1, tmp1, lsb,
width);
CMT(
"tmp1 = (tmp1 & (dither_size - 1)) * dither_size * sizeof(float);");
1287 i_add(
r, ptr, ptr, tmp1);
CMT(
"ptr += tmp1;");
1288 }
else if (do_load) {
1294 int delta = (y_off - last_y_off) * (1 << dither_size_log2) *
sizeof(
float);
1295 i_add(
r, ptr, ptr,
IMM(
delta));
CMTF(
"ptr += (y_off[%u] - y_off[%u]) * dither_size * sizeof(float);",
i, prev_i);
1304 i_fadd (
r, vl[
i], vl[
i], dither_vl);
CMTF(
"vl[%u] += vditherl;",
i);
1306 i_fadd(
r, vh[
i], vh[
i], dither_vh);
CMTF(
"vh[%u] += vditherh;",
i);
1319 bool is_read =
false;
1320 bool is_write =
false;
1338 char func_name[128];
1347 size_t total_size =
p->block_size * el_size;
1349 s->vec_size =
FFMIN(total_size, 16);
1350 s->use_vh = (
s->vec_size != total_size);
1352 s->el_size = el_size;
1353 s->el_count =
s->vec_size / el_size;
1394 i_ldr(
r,
s->cont, impl_post);
CMT(
"SwsFuncPtr cont = (impl++)->cont;");
1406 int prev_levels = 0;
1419 while (prev_fields[prev_levels])
1424 if (params && prev) {
1429 if (first_diff < 0) {
1430 int diff =
field->cmp_val((
void *) (((uintptr_t) params) +
field->offset),
1431 (
void *) (((uintptr_t) prev) +
field->offset));
1440 for (
int i = prev_levels - 1;
i > first_diff;
i--) {
1441 buf_appendf(&buf, &
size,
"%*sreturn NULL;\n", 4 * (
i + 1),
"");
1442 buf_appendf(&buf, &
size,
"%*s}\n", 4 *
i,
"");
1449 for (
int i = first_diff;
i < levels;
i++) {
1451 void *
p = (
void *) (((uintptr_t) params) +
field->offset);
1452 buf_appendf(&buf, &
size,
"%*sif (%s%s == ", 4 * (
i + 1),
"", p_str,
field->name);
1454 buf_appendf(&buf, &
size,
")");
1455 if (
i == (levels - 1)) {
1456 buf_appendf(&buf, &
size,
" return ");
1458 buf_appendf(&buf, &
size,
";\n");
1460 buf_appendf(&buf, &
size,
" {\n");
1482 printf(
"#include \"libswscale/aarch64/ops_lookup.h\"\n");
1486 printf(
"extern void %s(void);\n", buf);
1491 printf(
"SwsFuncPtr ff_sws_aarch64_lookup(const SwsAArch64OpImplParams *p)\n");
1501 printf(
" return NULL;\n");
1600 while (params->
op) {
1609 printf(
"#include \"libavutil/aarch64/asm.S\"\n");
1625 _setmode(_fileno(stdout), _O_BINARY);
1628 for (
int i = 1;
i < argc;
i++) {
1629 if (!strcmp(argv[
i],
"-ops"))
1631 else if (!strcmp(argv[
i],
"-lookup"))
1635 fprintf(stderr,
"Exactly one of -ops or -lookup must be specified.\n");
static void error(const char *err)
static void asmgen_op_write_planar(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static int linear_index_to_vx(int idx)
#define FF_DYNARRAY_ADD(av_size_max, av_elt_size, av_array, av_size, av_success, av_failure)
Add an element to a dynamic array.
#define LINEAR_MASK_GET(mask, idx, jdx)
RasmContext * rasm_alloc(void)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
__device__ int printf(const char *,...)
int main(int argc, char *argv[])
#define LOOP_MASK_BWD_VH(s, p, idx)
#define i_ld1(rctx, op0, op1)
#define LOOP_MASK_BWD(p, idx)
The following structure is used to describe one field from SwsAArch64OpImplParams.
static void reshape_all_vectors(SwsAArch64Context *s, int el_count, int el_size)
static RasmOp a64op_base(RasmOp op)
#define i_zip1(rctx, op0, op1, op2)
#define i_ld4(rctx, op0, op1)
#define i_mul(rctx, op0, op1, op2)
static RasmOp a64op_gpx(uint8_t n)
static void * av_dynarray2_add(void **tab_ptr, int *nb_ptr, size_t elem_size, const uint8_t *elem_data)
static void asmgen_op_read_packed(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static RasmOp a64op_w(RasmOp op)
static void clobber_gpr(RasmOp regs[MAX_SAVED_REGS], unsigned *count, RasmOp gpr)
void rasm_free(RasmContext **prctx)
RasmNode * rasm_set_current_node(RasmContext *rctx, RasmNode *node)
#define i_st4(rctx, op0, op1)
#define u(width, name, range_min, range_max)
static void asmgen_op_max(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
This helper structure is used to mimic the assembler syntax for vector register modifiers.
RasmNode * rasm_get_current_node(RasmContext *rctx)
static void asmgen_op_write_bit(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static RasmOp a64op_gpw(uint8_t n)
#define i_ld3(rctx, op0, op1)
static RasmOp vv_2(RasmOp op0, RasmOp op1)
static RasmOp vv_3(RasmOp op0, RasmOp op1, RasmOp op2)
#define PRINT_SWIZZLE_V(n, vh)
#define i_dup(rctx, op0, op1)
static void asmgen_op_dither(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
void int rasm_print(RasmContext *rctx, FILE *fp)
#define i_ld2(rctx, op0, op1)
#define i_fmla(rctx, op0, op1, op2)
static void asmgen_op_read_bit(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_rev16(rctx, op0, op1)
static void asmgen_op_write_nibble(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static unsigned clobbered_gprs(const SwsAArch64Context *s, SwsAArch64OpMask mask, RasmOp regs[MAX_SAVED_REGS])
#define i_fmin(rctx, op0, op1, op2)
#define LOOP_MASK_VH(s, p, idx)
void a64op_vec_views(RasmOp op, AArch64VecViews *out)
@ AARCH64_SWS_OP_READ_NIBBLE
@ AARCH64_SWS_OP_SWAP_BYTES
@ AARCH64_SWS_OP_READ_BIT
#define i_st2(rctx, op0, op1)
#define i_st3(rctx, op0, op1)
#define i_ushr(rctx, op0, op1, op2)
Runtime assembler for AArch64.
static void asmgen_op_read_nibble(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_addv(rctx, op0, op1)
static void asmgen_op_clear(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static RasmOp swizzle_a64op(SwsAArch64Context *s, uint8_t n, uint8_t vh)
static double val(void *priv, double ch)
RasmNode * rasm_add_label(RasmContext *rctx, int id)
#define i_fadd(rctx, op0, op1, op2)
static void asmgen_op_pack(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_ld1r(rctx, op0, op1)
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
static RasmOp vv_1(RasmOp op0)
static RasmOp a64op_elem(RasmOp op, uint8_t idx)
@ AARCH64_SWS_OP_WRITE_NIBBLE
uint16_t SwsAArch64OpMask
static const int offsets[]
static void impl_func_name(char **buf, size_t *size, const SwsAArch64OpImplParams *params)
static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_impl_cont
static void linear_pass(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vt, RasmOp *vc, int save_mask, bool vh_pass)
Performs one pass of the linear transform over a single vector bank (low or high).
#define i_ins(rctx, op0, op1)
static void asmgen_op_convert(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this field
static RasmOp v_8b(RasmOp op)
#define i_ldr(rctx, op0, op1)
static RasmOp a64op_make_vec(uint8_t n, uint8_t el_count, uint8_t el_size)
static const ParamField * op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS]
static void asmgen_op_scale(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
the definition of that something depends on the semantic of the filter The callback must examine the status of the filter s links and proceed accordingly The status of output links is stored in the status_in and status_out fields and tested by the then the processing requires a frame on this link and the filter is expected to make efforts in that direction The status of input links is stored by the fifo and status_out fields
void aarch64_op_impl_func_name(char *buf, size_t size, const SwsAArch64OpImplParams *params)
static void aarch64_op_impl_lookup_str(char *buf, size_t size, const SwsAArch64OpImplParams *params, const SwsAArch64OpImplParams *prev, const char *p_str)
#define i_cmp(rctx, op0, op1)
#define offsetof_exec_out_bump
static size_t aarch64_pixel_size(SwsAArch64PixelType fmt)
static const SwsAArch64OpImplParams impl_params[]
Implementation parameters for all exported functions.
static void asmgen_op_unpack(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_fmul(rctx, op0, op1, op2)
static void asmgen_op_write_packed_n(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx)
static RasmOp a64op_post(RasmOp op, int16_t imm)
@ AARCH64_SWS_OP_READ_PACKED
#define i_umin(rctx, op0, op1, op2)
#define LOOP_VH(s, mask, idx)
#define offsetof_exec_out
#define i_add(rctx, op0, op1, op2)
static void asmgen_op_read_planar(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static void asmgen_process(SwsAArch64Context *s, SwsAArch64OpMask mask)
int rasm_new_label(RasmContext *rctx, const char *name)
Allocate a new label ID with the given name.
static RasmOp a64op_sp(void)
@ AARCH64_SWS_OP_WRITE_PLANAR
#define i_uxtl(rctx, op0, op1)
#define LOOP_MASK(p, idx)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
#define i(width, name, range_min, range_max)
#define i_ldrb(rctx, op0, op1)
#define i_shl(rctx, op0, op1, op2)
#define i_fmax(rctx, op0, op1, op2)
RasmNode * load_cont_node
#define i_zip2(rctx, op0, op1, op2)
#define i_fcvtzu(rctx, op0, op1)
static av_always_inline int diff(const struct color_info *a, const struct color_info *b, const int trans_thresh)
static void asmgen_op_read_packed_n(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx)
#define i_ucvtf(rctx, op0, op1)
@ AARCH64_SWS_OP_WRITE_BIT
static RasmOp a64op_off(RasmOp op, int16_t imm)
@ AARCH64_SWS_OP_READ_PLANAR
static void av_freep(void *ptr)
#define i_uxtl2(rctx, op0, op1)
static RasmOp vv_4(RasmOp op0, RasmOp op1, RasmOp op2, RasmOp op3)
#define i_lsr(rctx, op0, op1, op2)
void rasm_annotate_next(RasmContext *rctx, const char *comment)
static unsigned clobbered_frame_size(unsigned n)
#define i_ldp(rctx, op0, op1, op2)
static void asmgen_op_swap_bytes(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_impl_priv
static void asmgen_set_load_cont_node(SwsAArch64Context *s)
Set node where the continuation address will be loaded and impl will be incremented.
static void asmgen_op_write_packed(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_xtn(rctx, op0, op1)
static RasmOp a64op_pre(RasmOp op, int16_t imm)
static void asmgen_op_lshift(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
void rasm_annotate_nextf(RasmContext *rctx, char *s, size_t n, const char *fmt,...)
static RasmOp rasm_op_label(int id)
#define i_umax(rctx, op0, op1, op2)
#define offsetof_exec_in
These values will be used by ops_asmgen to access fields inside of SwsOpExec and SwsOpImpl.
#define MASK_SET(mask, idx, val)
int rasm_func_begin(RasmContext *rctx, const char *name, bool export, bool jumpable)
#define i_mov16b(rctx, op0, op1)
static int lookup_gen(void)
#define MASK_GET(mask, idx)
#define i_str(rctx, op0, op1)
static void swizzle_emit(SwsAArch64Context *s, uint8_t dst, uint8_t src)
#define i_and(rctx, op0, op1, op2)
#define i_ldrh(rctx, op0, op1)
static void asmgen_prologue(SwsAArch64Context *s, const RasmOp *regs, unsigned n)
static void asmgen_op_expand(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_ubfiz(rctx, op0, op1, op2, op3)
static void asmgen_epilogue(SwsAArch64Context *s, const RasmOp *regs, unsigned n)
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
#define i_orr(rctx, op0, op1, op2)
@ AARCH64_SWS_OP_WRITE_PACKED
SwsAArch64OpImplParams describes the parameters for an SwsAArch64OpType operation.
#define i_rev32(rctx, op0, op1)
#define i_stp(rctx, op0, op1, op2)
static void asmgen_op_min(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_movi(rctx, op0, op1)
static RasmOp a64op_x(RasmOp op)
static void asmgen_op_linear(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_exec_in_bump
static void asmgen_op_rshift(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static int linear_index_is_offset(int idx)
static int linear_num_vregs(const SwsAArch64OpImplParams *params)
static const char * print_swizzle_v(char buf[8], uint8_t n, uint8_t vh)
static void asmgen_op_swizzle(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_ushl(rctx, op0, op1, op2)
RasmNode * rasm_add_comment(RasmContext *rctx, const char *comment)
static RasmOp a64op_lr(void)
static uint8_t a64op_gpr_n(RasmOp op)
#define i_mov(rctx, op0, op1)
static RasmOp v_q(RasmOp op)