FFmpeg
webvttdec.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Clément Bœsch
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * WebVTT subtitle demuxer
24  * @see http://dev.w3.org/html5/webvtt/
25  */
26 
27 #include "avformat.h"
28 #include "demux.h"
29 #include "internal.h"
30 #include "subtitles.h"
31 #include "libavutil/bprint.h"
32 #include "libavutil/intreadwrite.h"
33 #include "libavutil/opt.h"
34 
35 typedef struct {
36  const AVClass *class;
38  int kind;
40 
41 static int webvtt_probe(const AVProbeData *p)
42 {
43  const uint8_t *ptr = p->buf;
44 
45  if (AV_RB24(ptr) == 0xEFBBBF)
46  ptr += 3; /* skip UTF-8 BOM */
47  if (!strncmp(ptr, "WEBVTT", 6) &&
48  (!ptr[6] || strchr("\n\r\t ", ptr[6])))
49  return AVPROBE_SCORE_MAX;
50  return 0;
51 }
52 
53 static int64_t read_ts(const char *s)
54 {
55  int hh, mm, ss, ms;
56  if (sscanf(s, "%u:%u:%u.%u", &hh, &mm, &ss, &ms) == 4) return (hh*3600LL + mm*60LL + ss) * 1000LL + ms;
57  if (sscanf(s, "%u:%u.%u", &mm, &ss, &ms) == 3) return ( mm*60LL + ss) * 1000LL + ms;
58  return AV_NOPTS_VALUE;
59 }
60 
62 {
63  WebVTTContext *webvtt = s->priv_data;
64  AVBPrint cue;
65  int res = 0;
67 
68  if (!st)
69  return AVERROR(ENOMEM);
70  avpriv_set_pts_info(st, 64, 1, 1000);
73  st->disposition |= webvtt->kind;
74 
76 
77  for (;;) {
78  int i;
79  int64_t pos;
80  AVPacket *sub;
81  const char *p, *identifier, *settings;
82  size_t identifier_len, settings_len;
83  int64_t ts_start, ts_end;
84 
85  res = ff_subtitles_read_chunk(s->pb, &cue);
86  if (res < 0)
87  goto end;
88 
89  if (!cue.len)
90  break;
91 
92  p = identifier = cue.str;
93  pos = avio_tell(s->pb);
94 
95  /* ignore header chunk */
96  if (!strncmp(p, "\xEF\xBB\xBFWEBVTT", 9) ||
97  !strncmp(p, "WEBVTT", 6) ||
98  !strncmp(p, "STYLE", 5) ||
99  !strncmp(p, "REGION", 6) ||
100  !strncmp(p, "NOTE", 4))
101  continue;
102 
103  /* optional cue identifier (can be a number like in SRT or some kind of
104  * chaptering id) */
105  for (i = 0; p[i] && p[i] != '\n' && p[i] != '\r'; i++) {
106  if (!strncmp(p + i, "-->", 3)) {
107  identifier = NULL;
108  break;
109  }
110  }
111  if (!identifier)
112  identifier_len = 0;
113  else {
114  identifier_len = strcspn(p, "\r\n");
115  p += identifier_len;
116  if (*p == '\r')
117  p++;
118  if (*p == '\n')
119  p++;
120  }
121 
122  /* cue timestamps */
123  if ((ts_start = read_ts(p)) == AV_NOPTS_VALUE)
124  break;
125  if (!(p = strstr(p, "-->")))
126  break;
127  p += 2;
128  do p++; while (*p == ' ' || *p == '\t');
129  if ((ts_end = read_ts(p)) == AV_NOPTS_VALUE)
130  break;
131 
132  /* optional cue settings */
133  p += strcspn(p, "\n\r\t ");
134  while (*p == '\t' || *p == ' ')
135  p++;
136  settings = p;
137  settings_len = strcspn(p, "\r\n");
138  p += settings_len;
139  if (*p == '\r')
140  p++;
141  if (*p == '\n')
142  p++;
143 
144  /* create packet */
145  sub = ff_subtitles_queue_insert(&webvtt->q, p, strlen(p), 0);
146  if (!sub) {
147  res = AVERROR(ENOMEM);
148  goto end;
149  }
150  sub->pos = pos;
151  sub->pts = ts_start;
152  sub->duration = ts_end - ts_start;
153 
154 #define SET_SIDE_DATA(name, type) do { \
155  if (name##_len) { \
156  uint8_t *buf = av_packet_new_side_data(sub, type, name##_len); \
157  if (!buf) { \
158  res = AVERROR(ENOMEM); \
159  goto end; \
160  } \
161  memcpy(buf, name, name##_len); \
162  } \
163 } while (0)
164 
167  }
168 
169  ff_subtitles_queue_finalize(s, &webvtt->q);
170 
171 end:
172  av_bprint_finalize(&cue, NULL);
173  return res;
174 }
175 
177 {
178  WebVTTContext *webvtt = s->priv_data;
179  return ff_subtitles_queue_read_packet(&webvtt->q, pkt);
180 }
181 
182 static int webvtt_read_seek(AVFormatContext *s, int stream_index,
183  int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
184 {
185  WebVTTContext *webvtt = s->priv_data;
186  return ff_subtitles_queue_seek(&webvtt->q, s, stream_index,
187  min_ts, ts, max_ts, flags);
188 }
189 
191 {
192  WebVTTContext *webvtt = s->priv_data;
193  ff_subtitles_queue_clean(&webvtt->q);
194  return 0;
195 }
196 
197 #define OFFSET(x) offsetof(WebVTTContext, x)
198 #define KIND_FLAGS AV_OPT_FLAG_SUBTITLE_PARAM|AV_OPT_FLAG_DECODING_PARAM
199 
200 static const AVOption options[] = {
201  { "kind", "Set kind of WebVTT track", OFFSET(kind), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, KIND_FLAGS, .unit = "webvtt_kind" },
202  { "subtitles", "WebVTT subtitles kind", 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, INT_MIN, INT_MAX, KIND_FLAGS, .unit = "webvtt_kind" },
203  { "captions", "WebVTT captions kind", 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_CAPTIONS }, INT_MIN, INT_MAX, KIND_FLAGS, .unit = "webvtt_kind" },
204  { "descriptions", "WebVTT descriptions kind", 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_DESCRIPTIONS }, INT_MIN, INT_MAX, KIND_FLAGS, .unit = "webvtt_kind" },
205  { "metadata", "WebVTT metadata kind", 0, AV_OPT_TYPE_CONST, { .i64 = AV_DISPOSITION_METADATA }, INT_MIN, INT_MAX, KIND_FLAGS, .unit = "webvtt_kind" },
206  { NULL }
207 };
208 
210  .class_name = "WebVTT demuxer",
211  .item_name = av_default_item_name,
212  .option = options,
213  .version = LIBAVUTIL_VERSION_INT,
214 };
215 
217  .p.name = "webvtt",
218  .p.long_name = NULL_IF_CONFIG_SMALL("WebVTT subtitle"),
219  .p.extensions = "vtt",
220  .p.priv_class = &webvtt_demuxer_class,
221  .priv_data_size = sizeof(WebVTTContext),
222  .flags_internal = FF_INFMT_FLAG_INIT_CLEANUP,
226  .read_seek2 = webvtt_read_seek,
228 };
AVMEDIA_TYPE_SUBTITLE
@ AVMEDIA_TYPE_SUBTITLE
Definition: avutil.h:204
AV_BPRINT_SIZE_UNLIMITED
#define AV_BPRINT_SIZE_UNLIMITED
AVERROR
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later. That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another. Frame references ownership and permissions
opt.h
AVCodecParameters::codec_type
enum AVMediaType codec_type
General type of the encoded data.
Definition: codec_par.h:51
av_bprint_init
void av_bprint_init(AVBPrint *buf, unsigned size_init, unsigned size_max)
Definition: bprint.c:69
avformat_new_stream
AVStream * avformat_new_stream(AVFormatContext *s, const struct AVCodec *c)
Add a new stream to a media file.
WebVTTContext
Definition: webvttenc.c:31
AVOption
AVOption.
Definition: opt.h:346
AVPacket::duration
int64_t duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: packet.h:540
ff_subtitles_read_chunk
int ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
Same as ff_subtitles_read_text_chunk(), but read from an AVIOContext.
Definition: subtitles.c:437
AVPROBE_SCORE_MAX
#define AVPROBE_SCORE_MAX
maximum score
Definition: avformat.h:463
OFFSET
#define OFFSET(x)
Definition: webvttdec.c:197
avpriv_set_pts_info
void avpriv_set_pts_info(AVStream *st, int pts_wrap_bits, unsigned int pts_num, unsigned int pts_den)
Set the time base and wrapping info for a given stream.
Definition: avformat.c:853
read_close
static av_cold int read_close(AVFormatContext *ctx)
Definition: libcdio.c:143
avio_tell
static av_always_inline int64_t avio_tell(AVIOContext *s)
ftell() equivalent for AVIOContext.
Definition: avio.h:494
ss
#define ss(width, name, subs,...)
Definition: cbs_vp9.c:202
AV_PKT_DATA_WEBVTT_SETTINGS
@ AV_PKT_DATA_WEBVTT_SETTINGS
The optional settings (rendering instructions) that immediately follow the timestamp specifier of a W...
Definition: packet.h:203
ff_subtitles_queue_seek
int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index, int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
Update current_sub_idx to emulate a seek.
Definition: subtitles.c:261
pkt
AVPacket * pkt
Definition: movenc.c:59
read_packet
static int read_packet(void *opaque, uint8_t *buf, int buf_size)
Definition: avio_read_callback.c:41
intreadwrite.h
s
#define s(width, name)
Definition: cbs_vp9.c:198
AVInputFormat::name
const char * name
A comma separated list of short names for the format.
Definition: avformat.h:553
AVProbeData::buf
unsigned char * buf
Buffer must have AVPROBE_PADDING_SIZE of extra allocated bytes filled with zero.
Definition: avformat.h:453
ff_subtitles_queue_read_packet
int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt)
Generic read_packet() callback for subtitles demuxers using this queue system.
Definition: subtitles.c:222
FF_INFMT_FLAG_INIT_CLEANUP
#define FF_INFMT_FLAG_INIT_CLEANUP
For an FFInputFormat with this flag set read_close() needs to be called by the caller upon read_heade...
Definition: demux.h:35
AV_CODEC_ID_WEBVTT
@ AV_CODEC_ID_WEBVTT
Definition: codec_id.h:567
AVFormatContext
Format I/O context.
Definition: avformat.h:1255
internal.h
AVStream::codecpar
AVCodecParameters * codecpar
Codec parameters associated with this stream.
Definition: avformat.h:766
LIBAVUTIL_VERSION_INT
#define LIBAVUTIL_VERSION_INT
Definition: version.h:85
read_header
static int read_header(FFV1Context *f)
Definition: ffv1dec.c:550
AVClass
Describe the class of an AVClass context structure.
Definition: log.h:66
NULL
#define NULL
Definition: coverity.c:32
av_default_item_name
const char * av_default_item_name(void *ptr)
Return the context name.
Definition: log.c:237
AVProbeData
This structure contains the data a format has to probe a file.
Definition: avformat.h:451
KIND_FLAGS
#define KIND_FLAGS
Definition: webvttdec.c:198
AV_DISPOSITION_METADATA
#define AV_DISPOSITION_METADATA
The subtitle stream contains time-aligned metadata that is not intended to be directly presented to t...
Definition: avformat.h:703
ff_subtitles_queue_insert
AVPacket * ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q, const uint8_t *event, size_t len, int merge)
Insert a new subtitle event.
Definition: subtitles.c:109
webvtt_demuxer_class
static const AVClass webvtt_demuxer_class
Definition: webvttdec.c:209
ff_webvtt_demuxer
const FFInputFormat ff_webvtt_demuxer
Definition: webvttdec.c:216
AV_DISPOSITION_CAPTIONS
#define AV_DISPOSITION_CAPTIONS
The subtitle stream contains captions, providing a transcription and possibly a translation of audio.
Definition: avformat.h:692
NULL_IF_CONFIG_SMALL
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification.
Definition: internal.h:106
av_bprint_finalize
int av_bprint_finalize(AVBPrint *buf, char **ret_str)
Finalize a print buffer.
Definition: bprint.c:240
AV_NOPTS_VALUE
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:248
ff_subtitles_queue_finalize
void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q)
Set missing durations, sort subtitles by PTS (and then byte position), and drop duplicated events.
Definition: subtitles.c:204
FFDemuxSubtitlesQueue
Definition: subtitles.h:103
FFInputFormat::p
AVInputFormat p
The public AVInputFormat.
Definition: demux.h:41
ff_subtitles_queue_clean
void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q)
Remove and destroy all the subtitles packets.
Definition: subtitles.c:313
read_ts
static int64_t read_ts(const char *s)
Definition: webvttdec.c:53
bprint.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:255
AVPacket::pts
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: packet.h:515
SET_SIDE_DATA
#define SET_SIDE_DATA(name, type)
demux.h
WebVTTContext::q
FFDemuxSubtitlesQueue q
Definition: webvttdec.c:37
AVStream::disposition
int disposition
Stream disposition - a combination of AV_DISPOSITION_* flags.
Definition: avformat.h:812
AVStream
Stream structure.
Definition: avformat.h:743
AVClass::class_name
const char * class_name
The name of the class; usually it is the same name as the context structure type to which the AVClass...
Definition: log.h:71
pos
unsigned int pos
Definition: spdifenc.c:413
avformat.h
AV_DISPOSITION_DESCRIPTIONS
#define AV_DISPOSITION_DESCRIPTIONS
The subtitle stream contains a textual description of the video content.
Definition: avformat.h:698
webvtt_read_packet
static int webvtt_read_packet(AVFormatContext *s, AVPacket *pkt)
Definition: webvttdec.c:176
AV_PKT_DATA_WEBVTT_IDENTIFIER
@ AV_PKT_DATA_WEBVTT_IDENTIFIER
The optional first identifier line of a WebVTT cue.
Definition: packet.h:197
options
static const AVOption options[]
Definition: webvttdec.c:200
webvtt_read_close
static int webvtt_read_close(AVFormatContext *s)
Definition: webvttdec.c:190
webvtt_probe
static int webvtt_probe(const AVProbeData *p)
Definition: webvttdec.c:41
subtitles.h
AV_OPT_TYPE_INT
@ AV_OPT_TYPE_INT
Definition: opt.h:235
WebVTTContext::kind
int kind
Definition: webvttdec.c:38
webvtt_read_seek
static int webvtt_read_seek(AVFormatContext *s, int stream_index, int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
Definition: webvttdec.c:182
read_probe
static int read_probe(const AVProbeData *p)
Definition: cdg.c:30
AVCodecParameters::codec_id
enum AVCodecID codec_id
Specific type of the encoded data (the codec used).
Definition: codec_par.h:55
AVPacket
This structure stores compressed data.
Definition: packet.h:499
AVPacket::pos
int64_t pos
byte position in stream, -1 if unknown
Definition: packet.h:542
FFInputFormat
Definition: demux.h:37
flags
#define flags(name, subs,...)
Definition: cbs_av1.c:474
AV_RB24
uint64_t_TMPL AV_WL64 unsigned int_TMPL AV_WL32 unsigned int_TMPL AV_WL24 unsigned int_TMPL AV_WL16 uint64_t_TMPL AV_WB64 unsigned int_TMPL AV_WB32 unsigned int_TMPL AV_RB24
Definition: bytestream.h:97
AV_OPT_TYPE_CONST
@ AV_OPT_TYPE_CONST
Definition: opt.h:244
webvtt_read_header
static int webvtt_read_header(AVFormatContext *s)
Definition: webvttdec.c:61