FFmpeg
dnn_backend_native_layer_avgpool.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2020
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * DNN native backend implementation.
24  */
25 
26 #include "libavutil/avassert.h"
28 
29 int ff_dnn_load_layer_avg_pool(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
30 {
31  AvgPoolParams *avgpool_params;
32  int dnn_size = 0;
33  avgpool_params = av_malloc(sizeof(*avgpool_params));
34  if(!avgpool_params)
35  return 0;
36 
37  avgpool_params->strides = (int32_t)avio_rl32(model_file_context);
38  avgpool_params->padding_method = (int32_t)avio_rl32(model_file_context);
39  avgpool_params->kernel_size = (int32_t)avio_rl32(model_file_context);
40  dnn_size += 12;
41 
42  if (dnn_size > file_size || avgpool_params->kernel_size <= 0 || avgpool_params->strides <=0){
43  av_freep(&avgpool_params);
44  return 0;
45  }
46 
47  layer->params = avgpool_params;
48  layer->input_operand_indexes[0] = (int32_t)avio_rl32(model_file_context);
49  layer->output_operand_index = (int32_t)avio_rl32(model_file_context);
50  dnn_size += 8;
51 
52  if (layer->input_operand_indexes[0] >= operands_num || layer->output_operand_index >= operands_num) {
53  return 0;
54  }
55  return dnn_size;
56 }
57 
58 int ff_dnn_execute_layer_avg_pool(DnnOperand *operands, const int32_t *input_operand_indexes,
59  int32_t output_operand_index, const void *parameters, NativeContext *ctx)
60 {
61  float *output;
62  int height_end, width_end, height_radius, width_radius, output_height, output_width, kernel_area;
63  int32_t input_operand_index = input_operand_indexes[0];
64  int number = operands[input_operand_index].dims[0];
65  int height = operands[input_operand_index].dims[1];
66  int width = operands[input_operand_index].dims[2];
67  int channel = operands[input_operand_index].dims[3];
68  const float *input = operands[input_operand_index].data;
69  const AvgPoolParams *avgpool_params = parameters;
70 
71  int kernel_strides = avgpool_params->strides;
72  int src_linesize = width * channel;
73  DnnOperand *output_operand = &operands[output_operand_index];
74 
75  /**
76  * When padding_method = SAME, the tensorflow will only padding the hald number of 0 pixels
77  * except the remainders.
78  * Eg: assuming the input height = 1080, the strides = 11, so the remainders = 1080 % 11 = 2
79  * and if ksize = 5: it will fill (5 - 2) >> 1 = 1 line before the first line of input image,
80  * and 5 - 2 - 1 = 2 lines after the last line of input image.
81  * and if ksize = 7: it will fill (7 - 2) >> 1 = 2 lines before the first line of input image,
82  * and 7 - 2 - 2 = 3 lines after the last line of input image.
83  */
84  if (avgpool_params->padding_method == SAME) {
85  height_end = height;
86  width_end = width;
87  height_radius = avgpool_params->kernel_size - ((height - 1) % kernel_strides + 1);
88  width_radius = avgpool_params->kernel_size - ((width - 1) % kernel_strides + 1);
89  height_radius = height_radius < 0 ? 0 : height_radius >> 1;
90  width_radius = width_radius < 0 ? 0 : width_radius >> 1;
91  output_height = ceil(height / (kernel_strides * 1.0));
92  output_width = ceil(width / (kernel_strides * 1.0));
93  } else {
94  av_assert0(avgpool_params->padding_method == VALID);
95  height_end = height - avgpool_params->kernel_size + 1;
96  width_end = width - avgpool_params->kernel_size + 1;
97  height_radius = 0;
98  width_radius = 0;
99  output_height = ceil((height - avgpool_params->kernel_size + 1) / (kernel_strides * 1.0));
100  output_width = ceil((width - avgpool_params->kernel_size + 1) / (kernel_strides * 1.0));
101  }
102 
103  output_operand->dims[0] = number;
104  output_operand->dims[1] = output_height;
105  output_operand->dims[2] = output_width;
106  // not support pooling in channel dimension now
107  output_operand->dims[3] = channel;
108  output_operand->data_type = operands[input_operand_index].data_type;
109  output_operand->length = ff_calculate_operand_data_length(output_operand);
110  if (output_operand->length <= 0) {
111  av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
112  return AVERROR(EINVAL);
113  }
114  output_operand->data = av_realloc(output_operand->data, output_operand->length);
115  if (!output_operand->data) {
116  av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
117  return AVERROR(ENOMEM);
118  }
119  output = output_operand->data;
120 
121  for (int y = 0; y < height_end; y += kernel_strides) {
122  for (int x = 0; x < width_end; x += kernel_strides) {
123  for (int n_channel = 0; n_channel < channel; ++n_channel) {
124  output[n_channel] = 0.0;
125  kernel_area = 0;
126  for (int kernel_y = 0; kernel_y < avgpool_params->kernel_size; ++kernel_y) {
127  for (int kernel_x = 0; kernel_x < avgpool_params->kernel_size; ++kernel_x) {
128  float input_pel;
129  int y_pos = y + (kernel_y - height_radius);
130  int x_pos = x + (kernel_x - width_radius);
131  if (x_pos < 0 || x_pos >= width || y_pos < 0 || y_pos >= height) {
132  input_pel = 0.0;
133  } else {
134  kernel_area++;
135  input_pel = input[y_pos * src_linesize + x_pos * channel + n_channel];
136  }
137  output[n_channel] += input_pel;
138  }
139  }
140  output[n_channel] /= kernel_area;
141  }
142  output += channel;
143  }
144  }
145 
146  return 0;
147 }
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
av_malloc
#define av_malloc(s)
Definition: tableprint_vlc.h:30
AvgPoolParams::strides
int32_t strides
Definition: dnn_backend_native_layer_avgpool.h:32
AvgPoolParams::kernel_size
int32_t kernel_size
Definition: dnn_backend_native_layer_avgpool.h:32
ff_calculate_operand_data_length
int32_t ff_calculate_operand_data_length(const DnnOperand *oprd)
Definition: dnn_backend_native.c:503
avassert.h
ceil
static __device__ float ceil(float a)
Definition: cuda_runtime.h:176
AV_LOG_ERROR
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:180
ff_dnn_execute_layer_avg_pool
int ff_dnn_execute_layer_avg_pool(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters, NativeContext *ctx)
Execute the Average Pooling Layer.
Definition: dnn_backend_native_layer_avgpool.c:58
width
#define width
DnnOperand::data
void * data
data pointer with data length in bytes.
Definition: dnn_backend_native.h:104
av_assert0
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
DnnOperand::data_type
DNNDataType data_type
support different kinds of data type such as float, half float, int8 etc, first support float now.
Definition: dnn_backend_native.h:85
ctx
AVFormatContext * ctx
Definition: movenc.c:48
Layer::params
void * params
Definition: dnn_backend_native.h:66
av_realloc
void * av_realloc(void *ptr, size_t size)
Allocate, reallocate, or free a block of memory.
Definition: mem.c:153
DnnOperand::dims
int32_t dims[4]
there are two memory layouts, NHWC or NCHW, so we use dims, dims[0] is Number.
Definition: dnn_backend_native.h:74
SAME
@ SAME
Definition: dnn_backend_native.h:54
DnnOperand::length
int32_t length
Definition: dnn_backend_native.h:105
avio_rl32
unsigned int avio_rl32(AVIOContext *s)
Definition: aviobuf.c:759
AVIOContext
Bytestream IO Context.
Definition: avio.h:162
Layer::output_operand_index
int32_t output_operand_index
Definition: dnn_backend_native.h:65
NativeContext
Definition: dnn_backend_native.h:118
Layer
Definition: dnn_backend_native.h:57
Layer::input_operand_indexes
int32_t input_operand_indexes[4]
a layer can have multiple inputs and one output.
Definition: dnn_backend_native.h:64
VALID
@ VALID
Definition: dnn_backend_native.h:54
height
#define height
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
dnn_backend_native_layer_avgpool.h
AvgPoolParams::padding_method
DNNPaddingParam padding_method
Definition: dnn_backend_native_layer_avgpool.h:33
DnnOperand
Definition: dnn_backend_native.h:69
AvgPoolParams
Definition: dnn_backend_native_layer_avgpool.h:31
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:34
int32_t
int32_t
Definition: audioconvert.c:56
av_log
#define av_log(a,...)
Definition: tableprint_vlc.h:27
ff_dnn_load_layer_avg_pool
int ff_dnn_load_layer_avg_pool(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
Load Average Pooling Layer.
Definition: dnn_backend_native_layer_avgpool.c:29
channel
channel
Definition: ebur128.h:39