FFmpeg
ops_chain.h
Go to the documentation of this file.
1 /**
2  * Copyright (C) 2025 Niklas Haas
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef SWSCALE_OPS_CHAIN_H
22 #define SWSCALE_OPS_CHAIN_H
23 
24 #include "libavutil/cpu.h"
25 #include "libavutil/mem.h"
26 
27 #include "ops_internal.h"
28 
29 /**
30  * Helpers for SIMD implementations based on chained kernels, using a
31  * continuation passing style to link them together.
32  *
33  * The basic idea here is to "link" together a series of different operation
34  * kernels by constructing a list of kernel addresses into an SwsOpChain. Each
35  * kernel will load the address of the next kernel (the "continuation") from
36  * this struct, and jump directly into it; using an internal function signature
37  * that is an implementation detail of the specific backend.
38  */
39 
40 typedef struct SwsOpTable SwsOpTable;
41 
42 /**
43  * Private data for each kernel.
44  */
45 typedef union SwsOpPriv {
46  DECLARE_ALIGNED_16(char, data)[16];
47 
48  /* Common types */
49  void *ptr;
50  uint8_t u8[16];
51  int8_t i8[16];
52  uint16_t u16[8];
53  int16_t i16[8];
54  uint32_t u32[4];
56  float f32[4];
57  uint64_t u64[2];
59  uintptr_t uptr[2];
60  intptr_t iptr[2];
61 } SwsOpPriv;
62 
63 static_assert(sizeof(SwsOpPriv) == 16, "SwsOpPriv size mismatch");
64 
65 /**
66  * Per-kernel execution context.
67  *
68  * Note: This struct is hard-coded in assembly, so do not change the layout.
69  */
70 typedef void (*SwsFuncPtr)(void);
71 typedef struct SwsOpImpl {
72  SwsFuncPtr cont; /* [offset = 0] Continuation for this operation. */
73  SwsOpPriv priv; /* [offset = 16] Private data for this operation. */
74 } SwsOpImpl;
75 
76 static_assert(sizeof(SwsOpImpl) == 32, "SwsOpImpl layout mismatch");
77 static_assert(offsetof(SwsOpImpl, priv) == 16, "SwsOpImpl layout mismatch");
78 
79 /**
80  * Compiled "chain" of operations, which can be dispatched efficiently.
81  * Effectively just a list of function pointers, alongside a small amount of
82  * private data for each operation.
83  */
84 typedef struct SwsOpChain {
85 #define SWS_MAX_OPS 16
86  SwsOpImpl impl[SWS_MAX_OPS + 1]; /* reserve extra space for the entrypoint */
87  void (*free[SWS_MAX_OPS + 1])(SwsOpPriv *);
88  int num_impl;
89  int cpu_flags; /* set of all used CPU flags */
90  int over_read; /* chain over-reads input by this many bytes */
91  int over_write; /* chain over-writes output by this many bytes */
92 } SwsOpChain;
93 
95 void ff_sws_op_chain_free_cb(void *chain);
96 static inline void ff_sws_op_chain_free(SwsOpChain *chain)
97 {
99 }
100 
101 /* Returns 0 on success, or a negative error code. */
103  void (*free)(SwsOpPriv *), const SwsOpPriv *priv);
104 
105 typedef struct SwsImplParams {
107  const SwsOp *op;
109 } SwsImplParams;
110 
111 typedef struct SwsImplResult {
112  SwsFuncPtr func; /* overrides `SwsOpEntry.func` if non-NULL */
113  SwsOpPriv priv; /* private data for this implementation instance */
114  void (*free)(SwsOpPriv *priv); /* free function for `priv` */
115  int over_read; /* implementation over-reads input by this many bytes */
116  int over_write; /* implementation over-writes output by this many bytes */
117 } SwsImplResult;
118 
119 typedef struct SwsOpEntry {
120  /* Kernel metadata; reduced size subset of SwsOp */
123  bool flexible; /* if true, only the type and op are matched */
124  bool unused[4]; /* for kernels which operate on a subset of components */
125 
126  union { /* extra data defining the operation, unless `flexible` is true */
131  uint32_t linear_mask; /* subset of SwsLinearOp */
132  int dither_size; /* subset of SwsDitherOp */
133  int clear_value; /* clear value for integer clears */
134  AVRational scale; /* scale factor for SWS_OP_SCALE */
135  };
136 
137  /* Kernel implementation */
139  int (*setup)(const SwsImplParams *params, SwsImplResult *out); /* optional */
140 } SwsOpEntry;
141 
142 /* Setup helpers */
143 int ff_sws_setup_u(const SwsImplParams *params, SwsImplResult *out);
144 int ff_sws_setup_u8(const SwsImplParams *params, SwsImplResult *out);
145 int ff_sws_setup_q(const SwsImplParams *params, SwsImplResult *out);
146 int ff_sws_setup_q4(const SwsImplParams *params, SwsImplResult *out);
147 
148 static inline void ff_op_priv_free(SwsOpPriv *priv)
149 {
150  av_freep(&priv->ptr);
151 }
152 
153 struct SwsOpTable {
154  unsigned cpu_flags; /* required CPU flags for this table */
155  int block_size; /* fixed block size of this table */
156  const SwsOpEntry *entries[]; /* terminated by NULL */
157 };
158 
159 /**
160  * "Compile" a single op by looking it up in a list of fixed size op tables.
161  * See `op_match` in `ops_chain.c` for details on how the matching works.
162  *
163  * Returns 0, AVERROR(EAGAIN), or a negative error code.
164  */
166  int num_tables, SwsOpList *ops, const int block_size,
167  SwsOpChain *chain);
168 
169 #endif
SwsOpTable
Copyright (C) 2025 Niklas Haas.
Definition: ops_chain.h:153
func
int(* func)(AVBPrint *dst, const char *in, const char *arg)
Definition: jacosubdec.c:66
SwsImplResult::func
SwsFuncPtr func
Definition: ops_chain.h:112
SWS_MAX_OPS
#define SWS_MAX_OPS
Definition: ops_chain.h:85
out
static FILE * out
Definition: movenc.c:55
int64_t
long long int64_t
Definition: coverity.c:34
SwsOpImpl::cont
SwsFuncPtr cont
Definition: ops_chain.h:72
ff_sws_op_chain_alloc
SwsOpChain * ff_sws_op_chain_alloc(void)
Definition: ops_chain.c:29
data
const char data[16]
Definition: mxf.c:149
SwsOpEntry::type
SwsPixelType type
Definition: ops_chain.h:122
ff_sws_op_chain_append
int ff_sws_op_chain_append(SwsOpChain *chain, SwsFuncPtr func, void(*free)(SwsOpPriv *), const SwsOpPriv *priv)
Definition: ops_chain.c:48
SwsOpEntry::op
SwsOpType op
Definition: ops_chain.h:121
SwsOpEntry::setup
int(* setup)(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.h:139
SwsOpChain::cpu_flags
int cpu_flags
Definition: ops_chain.h:89
SwsPixelType
SwsPixelType
Copyright (C) 2025 Niklas Haas.
Definition: ops.h:30
SwsOpTable::block_size
int block_size
Definition: ops_chain.h:155
SwsOpPriv::u32
uint32_t u32[4]
Definition: ops_chain.h:54
SwsFuncPtr
void(* SwsFuncPtr)(void)
Per-kernel execution context.
Definition: ops_chain.h:70
SwsOpEntry::scale
AVRational scale
Definition: ops_chain.h:134
ff_sws_op_chain_free_cb
void ff_sws_op_chain_free_cb(void *chain)
Definition: ops_chain.c:34
SwsOpPriv::DECLARE_ALIGNED_16
DECLARE_ALIGNED_16(char, data)[16]
tables
Writing a table generator This documentation is preliminary Parts of the API are not good and should be changed Basic concepts A table generator consists of two *_tablegen c and *_tablegen h The h file will provide the variable declarations and initialization code for the tables
Definition: tablegen.txt:10
SwsReadWriteOp
Definition: ops.h:100
SwsSwizzleOp
Definition: ops.h:122
SwsOpChain::over_read
int over_read
Definition: ops_chain.h:90
SwsOpChain::free
void(* free[SWS_MAX_OPS+1])(SwsOpPriv *)
Definition: ops_chain.h:87
SwsOpEntry::swizzle
SwsSwizzleOp swizzle
Definition: ops_chain.h:129
SwsOpEntry::convert
SwsConvertOp convert
Definition: ops_chain.h:130
SwsOpImpl
Definition: ops_chain.h:71
ff_sws_op_compile_tables
int ff_sws_op_compile_tables(SwsContext *ctx, const SwsOpTable *const tables[], int num_tables, SwsOpList *ops, const int block_size, SwsOpChain *chain)
"Compile" a single op by looking it up in a list of fixed size op tables.
Definition: ops_chain.c:196
ctx
static AVFormatContext * ctx
Definition: movenc.c:49
SwsOpChain::impl
SwsOpImpl impl[SWS_MAX_OPS+1]
Definition: ops_chain.h:86
SwsOpTable::entries
const SwsOpEntry * entries[]
Definition: ops_chain.h:156
SwsOpPriv::i64
int64_t i64[2]
Definition: ops_chain.h:58
SwsOpPriv::f32
float f32[4]
Definition: ops_chain.h:56
SwsOpEntry::dither_size
int dither_size
Definition: ops_chain.h:132
SwsOpPriv::ptr
void * ptr
Definition: ops_chain.h:49
SwsOpChain
Compiled "chain" of operations, which can be dispatched efficiently.
Definition: ops_chain.h:84
SwsOpEntry::flexible
bool flexible
Definition: ops_chain.h:123
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
SwsOpEntry::clear_value
int clear_value
Definition: ops_chain.h:133
SwsImplParams::op
const SwsOp * op
Definition: ops_chain.h:107
ff_sws_setup_q
int ff_sws_setup_q(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:278
SwsImplResult::over_read
int over_read
Definition: ops_chain.h:115
SwsOpType
SwsOpType
Definition: ops.h:43
SwsImplResult::over_write
int over_write
Definition: ops_chain.h:116
SwsImplParams
Definition: ops_chain.h:105
SwsOpEntry::func
SwsFuncPtr func
Definition: ops_chain.h:138
cpu.h
SwsOpPriv::u8
uint8_t u8[16]
Definition: ops_chain.h:50
SwsOpPriv::uptr
uintptr_t uptr[2]
Definition: ops_chain.h:59
SwsOpChain::num_impl
int num_impl
Definition: ops_chain.h:88
SwsOpPriv::i16
int16_t i16[8]
Definition: ops_chain.h:53
SwsOpEntry
Definition: ops_chain.h:119
SwsOpPriv::u16
uint16_t u16[8]
Definition: ops_chain.h:52
ff_sws_setup_u8
int ff_sws_setup_u8(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:260
SwsImplParams::ctx
SwsContext * ctx
Definition: ops_chain.h:108
ff_sws_op_chain_free
static void ff_sws_op_chain_free(SwsOpChain *chain)
Definition: ops_chain.h:96
SwsOpTable::cpu_flags
unsigned cpu_flags
Definition: ops_chain.h:154
SwsPackOp
Definition: ops.h:114
SwsOpPriv::iptr
intptr_t iptr[2]
Definition: ops_chain.h:60
ops_internal.h
SwsImplResult::free
void(* free)(SwsOpPriv *priv)
Definition: ops_chain.h:114
SwsOp
Definition: ops.h:188
ff_op_priv_free
static void ff_op_priv_free(SwsOpPriv *priv)
Definition: ops_chain.h:148
ff_sws_setup_q4
int ff_sws_setup_q4(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:290
SwsOpEntry::rw
SwsReadWriteOp rw
Definition: ops_chain.h:127
SwsOpEntry::unused
bool unused[4]
Definition: ops_chain.h:124
SwsOpImpl::priv
SwsOpPriv priv
Definition: ops_chain.h:73
SwsOpPriv::u64
uint64_t u64[2]
Definition: ops_chain.h:57
SwsImplResult::priv
SwsOpPriv priv
Definition: ops_chain.h:113
mem.h
ff_sws_setup_u
int ff_sws_setup_u(const SwsImplParams *params, SwsImplResult *out)
Definition: ops_chain.c:266
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
int32_t
int32_t
Definition: audioconvert.c:56
SwsConvertOp
Definition: ops.h:136
SwsOpPriv::i32
int32_t i32[4]
Definition: ops_chain.h:55
SwsOpPriv::i8
int8_t i8[16]
Definition: ops_chain.h:51
SwsOpEntry::pack
SwsPackOp pack
Definition: ops_chain.h:128
SwsOpList
Helper struct for representing a list of operations.
Definition: ops.h:224
SwsContext
Main external API structure.
Definition: swscale.h:206
SwsOpPriv
Private data for each kernel.
Definition: ops_chain.h:45
SwsImplResult
Definition: ops_chain.h:111
SwsImplParams::table
const SwsOpTable * table
Definition: ops_chain.h:106
SwsOpEntry::linear_mask
uint32_t linear_mask
Definition: ops_chain.h:131
SwsOpChain::over_write
int over_write
Definition: ops_chain.h:91