FFmpeg: libavfilter/vf_edgedetect.c Source File

00001 /*
00002  * Copyright (c) 2012 Clément Bœsch
00003  *
00004  * This file is part of FFmpeg.
00005  *
00006  * FFmpeg is free software; you can redistribute it and/or
00007  * modify it under the terms of the GNU Lesser General Public
00008  * License as published by the Free Software Foundation; either
00009  * version 2.1 of the License, or (at your option) any later version.
00010  *
00011  * FFmpeg is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  * Lesser General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU Lesser General Public
00017  * License along with FFmpeg; if not, write to the Free Software
00018  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00019  */
00020 
00028 #include "libavutil/opt.h"
00029 #include "avfilter.h"
00030 #include "formats.h"
00031 #include "internal.h"
00032 #include "video.h"
00033 
00034 typedef struct {
00035     const AVClass *class;
00036     uint8_t  *tmpbuf;
00037     uint16_t *gradients;
00038     char     *directions;
00039     double   low, high;
00040     uint8_t  low_u8, high_u8;
00041 } EdgeDetectContext;
00042 
00043 #define OFFSET(x) offsetof(EdgeDetectContext, x)
00044 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
00045 static const AVOption edgedetect_options[] = {
00046     { "high", "set high threshold", OFFSET(high), AV_OPT_TYPE_DOUBLE, {.dbl=50/255.}, 0, 1, FLAGS },
00047     { "low",  "set low threshold",  OFFSET(low),  AV_OPT_TYPE_DOUBLE, {.dbl=20/255.}, 0, 1, FLAGS },
00048     { NULL },
00049 };
00050 
00051 AVFILTER_DEFINE_CLASS(edgedetect);
00052 
00053 static av_cold int init(AVFilterContext *ctx, const char *args)
00054 {
00055     int ret;
00056     EdgeDetectContext *edgedetect = ctx->priv;
00057 
00058     edgedetect->class = &edgedetect_class;
00059     av_opt_set_defaults(edgedetect);
00060 
00061     if ((ret = av_set_options_string(edgedetect, args, "=", ":")) < 0)
00062         return ret;
00063 
00064     edgedetect->low_u8  = edgedetect->low  * 255. + .5;
00065     edgedetect->high_u8 = edgedetect->high * 255. + .5;
00066     return 0;
00067 }
00068 
00069 static int query_formats(AVFilterContext *ctx)
00070 {
00071     static const enum PixelFormat pix_fmts[] = {PIX_FMT_GRAY8, PIX_FMT_NONE};
00072     ff_set_common_formats(ctx, ff_make_format_list(pix_fmts));
00073     return 0;
00074 }
00075 
00076 static int config_props(AVFilterLink *inlink)
00077 {
00078     AVFilterContext *ctx = inlink->dst;
00079     EdgeDetectContext *edgedetect = ctx->priv;
00080 
00081     edgedetect->tmpbuf     = av_malloc(inlink->w * inlink->h);
00082     edgedetect->gradients  = av_calloc(inlink->w * inlink->h, sizeof(*edgedetect->gradients));
00083     edgedetect->directions = av_malloc(inlink->w * inlink->h);
00084     if (!edgedetect->tmpbuf || !edgedetect->gradients || !edgedetect->directions)
00085         return AVERROR(ENOMEM);
00086     return 0;
00087 }
00088 
00089 static void gaussian_blur(AVFilterContext *ctx, int w, int h,
00090                                 uint8_t *dst, int dst_linesize,
00091                           const uint8_t *src, int src_linesize)
00092 {
00093     int i, j;
00094 
00095     memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
00096     memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
00097     for (j = 2; j < h - 2; j++) {
00098         dst[0] = src[0];
00099         dst[1] = src[1];
00100         for (i = 2; i < w - 2; i++) {
00101             /* Gaussian mask of size 5x5 with sigma = 1.4 */
00102             dst[i] = ((src[-2*src_linesize + i-2] + src[2*src_linesize + i-2]) * 2
00103                     + (src[-2*src_linesize + i-1] + src[2*src_linesize + i-1]) * 4
00104                     + (src[-2*src_linesize + i  ] + src[2*src_linesize + i  ]) * 5
00105                     + (src[-2*src_linesize + i+1] + src[2*src_linesize + i+1]) * 4
00106                     + (src[-2*src_linesize + i+2] + src[2*src_linesize + i+2]) * 2
00107 
00108                     + (src[  -src_linesize + i-2] + src[  src_linesize + i-2]) *  4
00109                     + (src[  -src_linesize + i-1] + src[  src_linesize + i-1]) *  9
00110                     + (src[  -src_linesize + i  ] + src[  src_linesize + i  ]) * 12
00111                     + (src[  -src_linesize + i+1] + src[  src_linesize + i+1]) *  9
00112                     + (src[  -src_linesize + i+2] + src[  src_linesize + i+2]) *  4
00113 
00114                     + src[i-2] *  5
00115                     + src[i-1] * 12
00116                     + src[i  ] * 15
00117                     + src[i+1] * 12
00118                     + src[i+2] *  5) / 159;
00119         }
00120         dst[i    ] = src[i    ];
00121         dst[i + 1] = src[i + 1];
00122 
00123         dst += dst_linesize;
00124         src += src_linesize;
00125     }
00126     memcpy(dst, src, w); dst += dst_linesize; src += src_linesize;
00127     memcpy(dst, src, w);
00128 }
00129 
00130 enum {
00131     DIRECTION_45UP,
00132     DIRECTION_45DOWN,
00133     DIRECTION_HORIZONTAL,
00134     DIRECTION_VERTICAL,
00135 };
00136 
00137 static int get_rounded_direction(int gx, int gy)
00138 {
00139     /* reference angles:
00140      *   tan( pi/8) = sqrt(2)-1
00141      *   tan(3pi/8) = sqrt(2)+1
00142      * Gy/Gx is the tangent of the angle (theta), so Gy/Gx is compared against
00143      * <ref-angle>, or more simply Gy against <ref-angle>*Gx
00144      *
00145      * Gx and Gy bounds = [1020;1020], using 16-bit arithmetic:
00146      *   round((sqrt(2)-1) * (1<<16)) =  27146
00147      *   round((sqrt(2)+1) * (1<<16)) = 158218
00148      */
00149     if (gx) {
00150         int tanpi8gx, tan3pi8gx;
00151 
00152         if (gx < 0)
00153             gx = -gx, gy = -gy;
00154         gy <<= 16;
00155         tanpi8gx  =  27146 * gx;
00156         tan3pi8gx = 158218 * gx;
00157         if (gy > -tan3pi8gx && gy < -tanpi8gx)  return DIRECTION_45UP;
00158         if (gy > -tanpi8gx  && gy <  tanpi8gx)  return DIRECTION_HORIZONTAL;
00159         if (gy >  tanpi8gx  && gy <  tan3pi8gx) return DIRECTION_45DOWN;
00160     }
00161     return DIRECTION_VERTICAL;
00162 }
00163 
00164 static void sobel(AVFilterContext *ctx, int w, int h,
00165                         uint16_t *dst, int dst_linesize,
00166                   const uint8_t  *src, int src_linesize)
00167 {
00168     int i, j;
00169     EdgeDetectContext *edgedetect = ctx->priv;
00170 
00171     for (j = 1; j < h - 1; j++) {
00172         dst += dst_linesize;
00173         src += src_linesize;
00174         for (i = 1; i < w - 1; i++) {
00175             const int gx =
00176                 -1*src[-src_linesize + i-1] + 1*src[-src_linesize + i+1]
00177                 -2*src[                i-1] + 2*src[                i+1]
00178                 -1*src[ src_linesize + i-1] + 1*src[ src_linesize + i+1];
00179             const int gy =
00180                 -1*src[-src_linesize + i-1] + 1*src[ src_linesize + i-1]
00181                 -2*src[-src_linesize + i  ] + 2*src[ src_linesize + i  ]
00182                 -1*src[-src_linesize + i+1] + 1*src[ src_linesize + i+1];
00183 
00184             dst[i] = FFABS(gx) + FFABS(gy);
00185             edgedetect->directions[j*w + i] = get_rounded_direction(gx, gy);
00186         }
00187     }
00188 }
00189 
00190 static void non_maximum_suppression(AVFilterContext *ctx, int w, int h,
00191                                           uint8_t  *dst, int dst_linesize,
00192                                     const uint16_t *src, int src_linesize)
00193 {
00194     int i, j;
00195     EdgeDetectContext *edgedetect = ctx->priv;
00196 
00197 #define COPY_MAXIMA(ay, ax, by, bx) do {                \
00198     if (src[i] > src[(ay)*src_linesize + i+(ax)] &&     \
00199         src[i] > src[(by)*src_linesize + i+(bx)])       \
00200         dst[i] = av_clip_uint8(src[i]);                 \
00201 } while (0)
00202 
00203     for (j = 1; j < h - 1; j++) {
00204         dst += dst_linesize;
00205         src += src_linesize;
00206         for (i = 1; i < w - 1; i++) {
00207             switch (edgedetect->directions[j*w + i]) {
00208             case DIRECTION_45UP:        COPY_MAXIMA( 1, -1, -1,  1); break;
00209             case DIRECTION_45DOWN:      COPY_MAXIMA(-1, -1,  1,  1); break;
00210             case DIRECTION_HORIZONTAL:  COPY_MAXIMA( 0, -1,  0,  1); break;
00211             case DIRECTION_VERTICAL:    COPY_MAXIMA(-1,  0,  1,  0); break;
00212             }
00213         }
00214     }
00215 }
00216 
00217 static void double_threshold(AVFilterContext *ctx, int w, int h,
00218                                    uint8_t *dst, int dst_linesize,
00219                              const uint8_t *src, int src_linesize)
00220 {
00221     int i, j;
00222     EdgeDetectContext *edgedetect = ctx->priv;
00223     const int low  = edgedetect->low_u8;
00224     const int high = edgedetect->high_u8;
00225 
00226     for (j = 0; j < h; j++) {
00227         for (i = 0; i < w; i++) {
00228             if (src[i] > high) {
00229                 dst[i] = src[i];
00230                 continue;
00231             }
00232 
00233             if ((!i || i == w - 1 || !j || j == h - 1) &&
00234                 src[i] > low &&
00235                 (src[-src_linesize + i-1] > high ||
00236                  src[-src_linesize + i  ] > high ||
00237                  src[-src_linesize + i+1] > high ||
00238                  src[                i-1] > high ||
00239                  src[                i+1] > high ||
00240                  src[ src_linesize + i-1] > high ||
00241                  src[ src_linesize + i  ] > high ||
00242                  src[ src_linesize + i+1] > high))
00243                 dst[i] = src[i];
00244             else
00245                 dst[i] = 0;
00246         }
00247         dst += dst_linesize;
00248         src += src_linesize;
00249     }
00250 }
00251 
00252 static int end_frame(AVFilterLink *inlink)
00253 {
00254     AVFilterContext *ctx = inlink->dst;
00255     EdgeDetectContext *edgedetect = ctx->priv;
00256     AVFilterLink *outlink = inlink->dst->outputs[0];
00257     AVFilterBufferRef  *inpicref = inlink->cur_buf;
00258     AVFilterBufferRef *outpicref = outlink->out_buf;
00259     uint8_t  *tmpbuf    = edgedetect->tmpbuf;
00260     uint16_t *gradients = edgedetect->gradients;
00261 
00262     /* gaussian filter to reduce noise  */
00263     gaussian_blur(ctx, inlink->w, inlink->h,
00264                   tmpbuf,            inlink->w,
00265                   inpicref->data[0], inpicref->linesize[0]);
00266 
00267     /* compute the 16-bits gradients and directions for the next step */
00268     sobel(ctx, inlink->w, inlink->h,
00269           gradients, inlink->w,
00270           tmpbuf,    inlink->w);
00271 
00272     /* non_maximum_suppression() will actually keep & clip what's necessary and
00273      * ignore the rest, so we need a clean output buffer */
00274     memset(tmpbuf, 0, inlink->w * inlink->h);
00275     non_maximum_suppression(ctx, inlink->w, inlink->h,
00276                             tmpbuf,    inlink->w,
00277                             gradients, inlink->w);
00278 
00279     /* keep high values, or low values surrounded by high values */
00280     double_threshold(ctx, inlink->w, inlink->h,
00281                      outpicref->data[0], outpicref->linesize[0],
00282                      tmpbuf,             inlink->w);
00283 
00284     ff_draw_slice(outlink, 0, outlink->h, 1);
00285     return ff_end_frame(outlink);
00286 }
00287 
00288 static av_cold void uninit(AVFilterContext *ctx)
00289 {
00290     EdgeDetectContext *edgedetect = ctx->priv;
00291     av_freep(&edgedetect->tmpbuf);
00292     av_freep(&edgedetect->gradients);
00293     av_freep(&edgedetect->directions);
00294 }
00295 
00296 static int null_draw_slice(AVFilterLink *inlink, int y, int h, int slice_dir) { return 0; }
00297 
00298 AVFilter avfilter_vf_edgedetect = {
00299     .name          = "edgedetect",
00300     .description   = NULL_IF_CONFIG_SMALL("Detect and draw edge."),
00301     .priv_size     = sizeof(EdgeDetectContext),
00302     .init          = init,
00303     .uninit        = uninit,
00304     .query_formats = query_formats,
00305 
00306     .inputs    = (const AVFilterPad[]) {
00307        {
00308            .name             = "default",
00309            .type             = AVMEDIA_TYPE_VIDEO,
00310            .draw_slice       = null_draw_slice,
00311            .config_props     = config_props,
00312            .end_frame        = end_frame,
00313            .min_perms        = AV_PERM_READ
00314         },
00315         { .name = NULL }
00316     },
00317     .outputs   = (const AVFilterPad[]) {
00318         {
00319             .name            = "default",
00320             .type            = AVMEDIA_TYPE_VIDEO,
00321         },
00322         { .name = NULL }
00323     },
00324 };