Go to the documentation of this file.
39 #define AVUTIL_AVASSERT_H
41 #define AVUTIL_MACROS_H
43 #define av_assert0(cond) assert(cond)
44 #define av_malloc(s) malloc(s)
45 #define av_mallocz(s) calloc(1, s)
46 #define av_realloc(p, s) realloc(p, s)
47 #define av_strdup(s) strdup(s)
48 #define av_free(p) free(p)
49 #define FFMAX(a,b) ((a) > (b) ? (a) : (b))
50 #define FFMIN(a,b) ((a) > (b) ? (b) : (a))
54 void **pptr = (
void **) ptr;
66 const uint8_t *elem_data)
68 uint8_t *tab_elem_data =
NULL;
71 tab_elem_data = (uint8_t *)*tab_ptr + (*nb_ptr) * elem_size;
73 memcpy(tab_elem_data, elem_data, elem_size);
115 buf_appendf(buf,
size,
"ff_sws");
119 void *
p = (
void *) (((uintptr_t) params) +
field->offset);
122 buf_appendf(buf,
size,
"_neon");
178 #define LOOP_VH(s, mask, idx) if (s->use_vh) LOOP(mask, idx)
179 #define LOOP_MASK_VH(s, p, idx) if (s->use_vh) LOOP_MASK(p, idx)
180 #define LOOP_MASK_BWD_VH(s, p, idx) if (s->use_vh) LOOP_MASK_BWD(p, idx)
183 #define CMT(comment) rasm_annotate(r, comment)
184 #define CMTF(fmt, ...) rasm_annotatef(r, (char[128]){0}, 128, fmt, __VA_ARGS__)
216 return ((n + 1) >> 1) * 16;
230 i_str(
r, regs[0], sp_pre);
232 i_stp(
r, regs[0], regs[1], sp_pre);
233 for (
unsigned i = 2;
i + 1 < n;
i += 2)
251 i_ldr(
r, regs[0], sp_post);
255 for (
unsigned i = (n & ~1
u) - 2;
i >= 2;
i -= 2)
257 i_ldp(
r, regs[0], regs[1], sp_post);
263 #define MAX_SAVED_REGS 10
269 if (n >= 19 && n <= 28)
270 regs[(*count)++] = gpr;
332 i_mov(
r,
s->bx,
s->bx_start);
CMT(
"bx = bx_start;");
333 i_mov(
r,
s->impl,
s->op1_impl);
CMT(
"impl = op1_impl;");
334 i_br (
r,
s->op0_func);
CMT(
"jump to op0_func");
347 i_mov(
r,
s->impl,
s->op1_impl);
CMT(
"impl = op1_impl;");
352 i_cmp(
r,
s->bx,
s->bx_end);
CMT(
"if (bx != bx_end)");
364 i_mov(
r,
s->bx,
s->bx_start);
CMT(
"bx = bx_start;");
368 i_br (
r,
s->op0_func);
CMT(
"jump to op0_func");
405 if (
p->block_size == 16) {
407 i_movi(
r, bitmask_vec,
IMM(1));
CMT(
"v128 bitmask_vec = {1 <repeats 16 times>};");
408 i_dup (
r, vl[0].b8, wtmp);
CMT(
"vl[0].lo = broadcast(tmp);");
410 i_dup (
r, vtmp.
b8, wtmp);
CMT(
"vtmp.lo = broadcast(tmp);");
411 i_ins (
r, vl[0].de[1], vtmp.
de[0]);
CMT(
"vl[0].hi = vtmp.lo;");
412 i_ushl(
r, vl[0].b16, vl[0].b16, shift_vec.
b16);
CMT(
"vl[0] <<= shift_vec;");
413 i_and (
r, vl[0].b16, vl[0].b16, bitmask_vec);
CMT(
"vl[0] &= bitmask_vec;");
416 i_movi(
r, bitmask_vec,
IMM(1));
CMT(
"v128 bitmask_vec = {1 <repeats 8 times>, 0 <repeats 8 times>};");
417 i_dup (
r, vl[0].b8, wtmp);
CMT(
"vl[0].lo = broadcast(tmp);");
418 i_ushl(
r, vl[0].b8, vl[0].b8, shift_vec.
b8);
CMT(
"vl[0] <<= shift_vec;");
419 i_and (
r, vl[0].b8, vl[0].b8, bitmask_vec);
CMT(
"vl[0] &= bitmask_vec;");
433 rasm_annotate_next(
r,
"v128 nibble_mask = {0xf <repeats 8 times>, 0x0 <repeats 8 times>};");
436 if (
p->block_size == 8) {
439 i_and (
r, vl[0].b8, vl[0].b8, nibble_mask);
CMT(
"vl[0].lo &= nibble_mask;");
444 i_and (
r, vl[0].b8, vl[0].b8, nibble_mask);
CMT(
"vl[0].lo &= nibble_mask;");
458 switch ((
s->use_vh ? 0x100 : 0) |
s->vec_size) {
479 if (
p->mask == 0x0001) {
494 for (
int i = 0;
i < 4;
i++) {
500 switch ((
s->use_vh ? 0x100 : 0) |
s->vec_size) {
532 if (
p->block_size == 8) {
533 i_ushl(
r, vl[0].b8, vl[0].b8, shift_vec.
b8);
CMT(
"vl[0] <<= shift_vec;");
534 i_addv(
r, vtmp0.
b, vl[0].
b8);
CMT(
"vtmp0[0] = add_across(vl[0].lo);");
537 i_ushl(
r, vl[0].b16, vl[0].b16, shift_vec.
b16);
CMT(
"vl[0] <<= shift_vec;");
538 i_addv(
r, vtmp0.
b, vl[0].
b8);
CMT(
"vtmp0[0] = add_across(vl[0].lo);");
539 i_ins (
r, vtmp1.
de[0], vl[0].
de[1]);
CMT(
"vtmp1.lo = vl[0].hi;");
540 i_addv(
r, vtmp1.
b, vtmp1.
b8);
CMT(
"vtmp1[0] = add_across(vtmp1);");
541 i_ins (
r, vtmp0.
be[1], vtmp1.
be[0]);
CMT(
"vtmp0[1] = vtmp1[0];");
553 for (
int i = 0;
i < 4;
i++)
558 if (
p->block_size == 8) {
582 switch ((
s->use_vh ? 0x100 : 0) |
s->vec_size) {
585 case 0x108:
i_stp(
r, vl[0].d, vh[0].d,
a64op_post(
s->out[0],
s->vec_size * 2));
break;
586 case 0x110:
i_stp(
r, vl[0].q, vh[0].q,
a64op_post(
s->out[0],
s->vec_size * 2));
break;
603 if (
p->mask == 0x0001) {
618 for (
int i = 0;
i < 4;
i++) {
624 switch ((
s->use_vh ? 0x100 : 0) |
s->vec_size) {
643 for (
int i = 0;
i < 4;
i++) {
649 case sizeof(uint16_t):
653 case sizeof(uint32_t):
664 #define SWIZZLE_TMP 0xf
669 snprintf(buf,
sizeof(
char[8]),
"vtmp%c", vh ?
'h' :
'l');
671 snprintf(buf,
sizeof(
char[8]),
"v%c[%u]", vh ?
'h' :
'l', n);
674 #define PRINT_SWIZZLE_V(n, vh) print_swizzle_v((char[8]){ 0 }, n, vh)
680 return vh ?
s->vh[n] :
s->vl[n];
698 uint8_t src_used[4] = { 0 };
699 bool done[4] = {
true,
true,
true,
true };
707 for (
bool progress =
true; progress; ) {
710 if (done[
dst] || src_used[
dst])
727 uint8_t cur_dst =
dst;
731 done[cur_dst] =
true;
737 done[cur_dst] =
true;
754 uint32_t mask_val[4] = { 0 };
755 uint8_t mask_idx[4] = { 0 };
769 for (
int j = 0; j < 4; j++) {
770 if (mask_val[j] ==
val) {
771 mask_val[
i] = mask_val[j];
772 mask_idx[
i] = mask_idx[j];
782 if (
val <= 0xff ||
val == 0xffff) {
786 i_dup (
r, vt[cur_vt], mask_gpr);
789 mask_idx[
i] = cur_vt++;
831 uint16_t offset_mask = 0;
845 i_orr (
r, vl[0], vl[0], vl[
i]);
CMTF(
"vl[0] |= vl[%u];",
i);
847 i_orr(
r, vh[0], vh[0], vh[
i]);
CMTF(
"vh[0] |= vh[%u];",
i);
920 for (
int i = 0;
i < 4;
i++) {
925 size_t src_el_size =
s->el_size;
939 if (
p->block_size == 8) {
940 if (src_el_size == 1 && dst_el_size > src_el_size) {
944 }
else if (src_el_size == 4 && dst_el_size < src_el_size) {
951 if (src_el_size == 2 && dst_el_size == 4) {
956 }
else if (src_el_size == 2 && dst_el_size == 1) {
962 if (src_el_size == 1 && dst_el_size == 2) {
966 }
else if (src_el_size == 2 && dst_el_size == 1) {
992 size_t src_el_size =
s->el_size;
994 size_t dst_total_size =
p->block_size * dst_el_size;
995 size_t dst_vec_size =
FFMIN(dst_total_size, 16);
998 s->use_vh = (dst_vec_size != dst_total_size);
1000 if (src_el_size == 1) {
1006 if (dst_el_size == 4) {
1096 int save_mask,
bool vh_pass)
1104 RasmOp *vx = vh_pass ?
s->vh :
s->vl;
1105 char cvh = vh_pass ?
'h' :
'l';
1107 if (vh_pass && !
s->use_vh)
1114 RasmOp src_vx[4] = { vx[0], vx[1], vx[2], vx[3] };
1116 for (
int i = 0;
i < 4;
i++) {
1133 for (
int j = 0; j < 5; j++) {
1138 RasmOp vsrc = src_vx[src_j];
1139 uint8_t vc_i = i_coeff / 4;
1140 uint8_t vc_j = i_coeff & 3;
1143 if (
first && is_offset) {
1144 i_dup (
r, vx[
i], vcoeff);
CMTF(
"v%c[%u] = broadcast(vc[%u][%u]);", cvh,
i, vc_i, vc_j);
1145 }
else if (
first && !is_offset) {
1149 i_fmul (
r, vx[
i], vsrc, vcoeff);
CMTF(
"v%c[%u] = vsrc[%u] * vc[%u][%u];", cvh,
i, src_j, vc_i, vc_j);
1151 }
else if (!
p->linear.fmla) {
1161 i_fmul(
r, vtmp[vc_j], vsrc, vcoeff);
CMTF(
"vtmp[%u] = vsrc[%u] * vc[%u][%u];", vc_j, src_j, vc_i, vc_j);
1163 i_fadd(
r, vx[
i], vx[
i], vtmp[vc_j]);
CMTF(
"v%c[%u] += vtmp[%u];", cvh,
i, vc_j);
1165 i_fadd(
r, vx[
i], vx[
i], vsrc);
CMTF(
"v%c[%u] += vsrc[%u];", cvh,
i, vc_j);
1173 i_fmla(
r, vx[
i], vsrc, vcoeff);
CMTF(
"v%c[%u] += vsrc[%u] * vc[%u][%u];", cvh,
i, src_j, vc_i, vc_j);
1191 switch (num_vregs) {
1192 case 1: coeff_veclist =
vv_1(vc[0]);
break;
1193 case 2: coeff_veclist =
vv_2(vc[0], vc[1]);
break;
1194 case 3: coeff_veclist =
vv_3(vc[0], vc[1], vc[2]);
break;
1195 case 4: coeff_veclist =
vv_4(vc[0], vc[1], vc[2], vc[3]);
break;
1201 uint16_t save_mask = 0;
1202 bool overwritten[4] = {
false,
false,
false,
false };
1204 for (
int j = 0; j < 5; j++) {
1209 if (!is_offset && overwritten[src_j])
1211 overwritten[
i] =
true;
1257 for (
int y_off = 0; y_off <= max_offset; y_off++) {
1259 if (
MASK_GET(
p->dither.y_offset,
i) == y_off)
1260 sorted[n_comps++] =
i;
1286 const int block_size_log2 = (
p->block_size == 16) ? 4 : 3;
1287 const int dither_size_log2 =
p->dither.size_log2;
1288 const int sizeof_float_log2 = 2;
1289 if (dither_size_log2 != block_size_log2) {
1290 RasmOp lsb =
IMM(block_size_log2 + sizeof_float_log2);
1292 i_ubfiz(
r, tmp1, bx64, lsb,
width);
CMT(
"tmp1 = (bx & ((dither_size / block_size) - 1)) * block_size * sizeof(float);");
1293 i_add (
r, ptr, ptr, tmp1);
CMT(
"ptr += tmp1;");
1296 int last_y_off = -1;
1298 for (
int sorted_i = 0; sorted_i < n_comps; sorted_i++) {
1299 int i = sorted[sorted_i];
1300 uint8_t y_off =
MASK_GET(
p->dither.y_offset,
i);
1301 bool do_load = (y_off != last_y_off);
1303 if (last_y_off < 0) {
1305 RasmOp lsb =
IMM(dither_size_log2 + sizeof_float_log2);
1312 i_ubfiz(
r, tmp1, y64, lsb,
width);
CMT(
"tmp1 = (y & (dither_size - 1)) * dither_size * sizeof(float);");
1315 i_ubfiz(
r, tmp1, tmp1, lsb,
width);
CMT(
"tmp1 = (tmp1 & (dither_size - 1)) * dither_size * sizeof(float);");
1317 i_add(
r, ptr, ptr, tmp1);
CMT(
"ptr += tmp1;");
1318 }
else if (do_load) {
1324 int delta = (y_off - last_y_off) * (1 << dither_size_log2) *
sizeof(
float);
1325 i_add(
r, ptr, ptr,
IMM(
delta));
CMTF(
"ptr += (y_off[%u] - y_off[%u]) * dither_size * sizeof(float);",
i, prev_i);
1334 i_fadd (
r, vl[
i], vl[
i], dither_vl);
CMTF(
"vl[%u] += vditherl;",
i);
1336 i_fadd(
r, vh[
i], vh[
i], dither_vh);
CMTF(
"vh[%u] += vditherh;",
i);
1349 char func_name[128];
1358 size_t total_size =
p->block_size * el_size;
1360 s->vec_size =
FFMIN(total_size, 16);
1361 s->use_vh = (
s->vec_size != total_size);
1363 s->el_size = el_size;
1364 s->el_count =
s->vec_size / el_size;
1423 int prev_levels = 0;
1436 while (prev_fields[prev_levels])
1441 if (params && prev) {
1446 if (first_diff < 0) {
1447 int diff =
field->cmp_val((
void *) (((uintptr_t) params) +
field->offset),
1448 (
void *) (((uintptr_t) prev) +
field->offset));
1457 for (
int i = prev_levels - 1;
i > first_diff;
i--) {
1458 buf_appendf(&buf, &
size,
"%*sreturn NULL;\n", 4 * (
i + 1),
"");
1459 buf_appendf(&buf, &
size,
"%*s}\n", 4 *
i,
"");
1466 for (
int i = first_diff;
i < levels;
i++) {
1468 void *
p = (
void *) (((uintptr_t) params) +
field->offset);
1469 buf_appendf(&buf, &
size,
"%*sif (%s%s == ", 4 * (
i + 1),
"", p_str,
field->name);
1471 buf_appendf(&buf, &
size,
")");
1472 if (
i == (levels - 1)) {
1473 buf_appendf(&buf, &
size,
" return ");
1475 buf_appendf(&buf, &
size,
";\n");
1477 buf_appendf(&buf, &
size,
" {\n");
1499 printf(
"#include \"libswscale/aarch64/ops_lookup.h\"\n");
1503 printf(
"extern void %s(void);\n", buf);
1508 printf(
"SwsFuncPtr ff_sws_aarch64_lookup(const SwsAArch64OpImplParams *p)\n");
1518 printf(
" return NULL;\n");
1606 while (params->
op) {
1615 printf(
"#include \"libavutil/aarch64/asm.S\"\n");
1631 _setmode(_fileno(stdout), _O_BINARY);
1634 for (
int i = 1;
i < argc;
i++) {
1635 if (!strcmp(argv[
i],
"-ops"))
1637 else if (!strcmp(argv[
i],
"-lookup"))
1641 fprintf(stderr,
"Exactly one of -ops or -lookup must be specified.\n");
static void error(const char *err)
static void asmgen_op_write_planar(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static int linear_index_to_vx(int idx)
#define FF_DYNARRAY_ADD(av_size_max, av_elt_size, av_array, av_size, av_success, av_failure)
Add an element to a dynamic array.
#define LINEAR_MASK_GET(mask, idx, jdx)
RasmContext * rasm_alloc(void)
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
__device__ int printf(const char *,...)
int main(int argc, char *argv[])
#define LOOP_MASK_BWD_VH(s, p, idx)
#define i_ld1(rctx, op0, op1)
#define LOOP_MASK_BWD(p, idx)
The following structure is used to describe one field from SwsAArch64OpImplParams.
static void reshape_all_vectors(SwsAArch64Context *s, int el_count, int el_size)
static RasmOp a64op_base(RasmOp op)
#define i_zip1(rctx, op0, op1, op2)
#define i_ld4(rctx, op0, op1)
#define i_mul(rctx, op0, op1, op2)
static RasmOp a64op_gpx(uint8_t n)
static void * av_dynarray2_add(void **tab_ptr, int *nb_ptr, size_t elem_size, const uint8_t *elem_data)
static void asmgen_op_read_packed(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static RasmOp a64op_w(RasmOp op)
static void clobber_gpr(RasmOp regs[MAX_SAVED_REGS], unsigned *count, RasmOp gpr)
void rasm_free(RasmContext **prctx)
RasmNode * rasm_set_current_node(RasmContext *rctx, RasmNode *node)
#define i_st4(rctx, op0, op1)
#define u(width, name, range_min, range_max)
static void asmgen_op_max(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
This helper structure is used to mimic the assembler syntax for vector register modifiers.
RasmNode * rasm_get_current_node(RasmContext *rctx)
static void asmgen_op_write_bit(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static RasmOp a64op_gpw(uint8_t n)
#define i_ld3(rctx, op0, op1)
static RasmOp vv_2(RasmOp op0, RasmOp op1)
static RasmOp vv_3(RasmOp op0, RasmOp op1, RasmOp op2)
#define PRINT_SWIZZLE_V(n, vh)
#define i_dup(rctx, op0, op1)
static void asmgen_op_dither(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
void int rasm_print(RasmContext *rctx, FILE *fp)
#define i_ld2(rctx, op0, op1)
#define i_fmla(rctx, op0, op1, op2)
static void asmgen_op_read_bit(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_rev16(rctx, op0, op1)
static void asmgen_op_write_nibble(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static void asmgen_process_return(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_fmin(rctx, op0, op1, op2)
#define LOOP_MASK_VH(s, p, idx)
void a64op_vec_views(RasmOp op, AArch64VecViews *out)
@ AARCH64_SWS_OP_READ_NIBBLE
@ AARCH64_SWS_OP_SWAP_BYTES
@ AARCH64_SWS_OP_READ_BIT
#define i_st2(rctx, op0, op1)
#define i_st3(rctx, op0, op1)
#define i_ushr(rctx, op0, op1, op2)
Runtime assembler for AArch64.
static void asmgen_op_read_nibble(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_addv(rctx, op0, op1)
static void asmgen_op_clear(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static RasmOp swizzle_a64op(SwsAArch64Context *s, uint8_t n, uint8_t vh)
static double val(void *priv, double ch)
RasmNode * rasm_add_label(RasmContext *rctx, int id)
#define i_fadd(rctx, op0, op1, op2)
static void asmgen_op_pack(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_ld1r(rctx, op0, op1)
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But first
static RasmOp vv_1(RasmOp op0)
static RasmOp a64op_elem(RasmOp op, uint8_t idx)
static void asmgen_op(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
@ AARCH64_SWS_OP_WRITE_NIBBLE
static const int offsets[]
static void impl_func_name(char **buf, size_t *size, const SwsAArch64OpImplParams *params)
static void asmgen_op_cps(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_impl_cont
static void linear_pass(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vt, RasmOp *vc, int save_mask, bool vh_pass)
Performs one pass of the linear transform over a single vector bank (low or high).
#define i_ins(rctx, op0, op1)
static void asmgen_op_convert(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this field
static RasmOp v_8b(RasmOp op)
#define i_ldr(rctx, op0, op1)
static RasmOp a64op_make_vec(uint8_t n, uint8_t el_count, uint8_t el_size)
static const ParamField * op_fields[AARCH64_SWS_OP_TYPE_NB][MAX_LEVELS]
static void asmgen_op_scale(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
the definition of that something depends on the semantic of the filter The callback must examine the status of the filter s links and proceed accordingly The status of output links is stored in the status_in and status_out fields and tested by the then the processing requires a frame on this link and the filter is expected to make efforts in that direction The status of input links is stored by the fifo and status_out fields
void aarch64_op_impl_func_name(char *buf, size_t size, const SwsAArch64OpImplParams *params)
static void aarch64_op_impl_lookup_str(char *buf, size_t size, const SwsAArch64OpImplParams *params, const SwsAArch64OpImplParams *prev, const char *p_str)
#define i_cmp(rctx, op0, op1)
static void asmgen_op_read_packed_1(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_exec_out_bump
static size_t aarch64_pixel_size(SwsAArch64PixelType fmt)
static const SwsAArch64OpImplParams impl_params[]
Implementation parameters for all exported functions.
static void asmgen_op_unpack(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_fmul(rctx, op0, op1, op2)
static void asmgen_op_write_packed_n(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx)
static RasmOp a64op_post(RasmOp op, int16_t imm)
@ AARCH64_SWS_OP_READ_PACKED
#define i_umin(rctx, op0, op1, op2)
#define LOOP_VH(s, mask, idx)
#define offsetof_exec_out
#define i_add(rctx, op0, op1, op2)
static void asmgen_op_read_planar(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
int rasm_new_label(RasmContext *rctx, const char *name)
Allocate a new label ID with the given name.
static RasmOp a64op_sp(void)
@ AARCH64_SWS_OP_WRITE_PLANAR
#define i_uxtl(rctx, op0, op1)
#define LOOP_MASK(p, idx)
uint8_t ptrdiff_t const uint8_t ptrdiff_t int intptr_t intptr_t int int16_t * dst
#define i(width, name, range_min, range_max)
#define i_ldrb(rctx, op0, op1)
#define i_shl(rctx, op0, op1, op2)
#define i_fmax(rctx, op0, op1, op2)
#define i_zip2(rctx, op0, op1, op2)
#define i_fcvtzu(rctx, op0, op1)
static av_always_inline int diff(const struct color_info *a, const struct color_info *b, const int trans_thresh)
static void asmgen_op_read_packed_n(SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp *vx)
#define i_ucvtf(rctx, op0, op1)
@ AARCH64_SWS_OP_WRITE_BIT
static RasmOp a64op_off(RasmOp op, int16_t imm)
@ AARCH64_SWS_OP_READ_PLANAR
static void av_freep(void *ptr)
#define i_uxtl2(rctx, op0, op1)
static RasmOp vv_4(RasmOp op0, RasmOp op1, RasmOp op2, RasmOp op3)
#define i_lsr(rctx, op0, op1, op2)
void rasm_annotate_next(RasmContext *rctx, const char *comment)
static unsigned clobbered_frame_size(unsigned n)
#define i_ldp(rctx, op0, op1, op2)
static void asmgen_op_swap_bytes(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_impl_priv
static void asmgen_op_write_packed(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_xtn(rctx, op0, op1)
static RasmOp a64op_pre(RasmOp op, int16_t imm)
static void asmgen_op_lshift(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static unsigned clobbered_gprs(const SwsAArch64Context *s, const SwsAArch64OpImplParams *p, RasmOp regs[MAX_SAVED_REGS])
void rasm_annotate_nextf(RasmContext *rctx, char *s, size_t n, const char *fmt,...)
#define i_umax(rctx, op0, op1, op2)
static void asmgen_process(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_exec_in
These values will be used by ops_asmgen to access fields inside of SwsOpExec and SwsOpImpl.
#define MASK_SET(mask, idx, val)
int rasm_func_begin(RasmContext *rctx, const char *name, bool export, bool jumpable)
#define i_mov16b(rctx, op0, op1)
static int lookup_gen(void)
#define MASK_GET(mask, idx)
#define i_str(rctx, op0, op1)
static void swizzle_emit(SwsAArch64Context *s, uint8_t dst, uint8_t src)
#define i_and(rctx, op0, op1, op2)
#define i_ldrh(rctx, op0, op1)
static void asmgen_op_write_packed_1(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static void asmgen_prologue(SwsAArch64Context *s, const RasmOp *regs, unsigned n)
static void asmgen_op_expand(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_ubfiz(rctx, op0, op1, op2, op3)
static void asmgen_epilogue(SwsAArch64Context *s, const RasmOp *regs, unsigned n)
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
#define i_orr(rctx, op0, op1, op2)
@ AARCH64_SWS_OP_WRITE_PACKED
SwsAArch64OpImplParams describes the parameters for an SwsAArch64OpType operation.
#define i_rev32(rctx, op0, op1)
#define i_stp(rctx, op0, op1, op2)
static void asmgen_op_min(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
@ AARCH64_SWS_OP_PROCESS_RETURN
#define i_movi(rctx, op0, op1)
static RasmOp a64op_x(RasmOp op)
static void asmgen_op_linear(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define offsetof_exec_in_bump
static void asmgen_op_rshift(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
static int linear_index_is_offset(int idx)
static int linear_num_vregs(const SwsAArch64OpImplParams *params)
static const char * print_swizzle_v(char buf[8], uint8_t n, uint8_t vh)
static void asmgen_op_swizzle(SwsAArch64Context *s, const SwsAArch64OpImplParams *p)
#define i_ushl(rctx, op0, op1, op2)
RasmNode * rasm_add_comment(RasmContext *rctx, const char *comment)
static uint8_t a64op_gpr_n(RasmOp op)
#define i_mov(rctx, op0, op1)
static RasmOp v_q(RasmOp op)