FFmpeg
webvttdec.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Clément Bœsch
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 /**
22  * @file
23  * WebVTT subtitle decoder
24  * @see https://www.w3.org/TR/webvtt1/
25  * @todo need to support extended markups and cue settings
26  */
27 
28 #include "avcodec.h"
29 #include "ass.h"
30 #include "codec_internal.h"
31 #include "libavutil/bprint.h"
32 #include "libavutil/mathematics.h"
33 
34 static const struct {
35  const char *from;
36  const char *to;
37 } webvtt_tag_replace[] = {
38  {"{", "\\{{}"}, {"\\", "\\\xe2\x81\xa0"}, // escape to avoid ASS markup conflicts
39  {"&gt;", ">"}, {"&lt;", "<"},
40  {"&lrm;", "\xe2\x80\x8e"}, {"&rlm;", "\xe2\x80\x8f"},
41  {"&amp;", "&"}, {"&nbsp;", "\\h"},
42 };
43 static const struct {
44  const char from[6];
45  const char to[6];
46 } webvtt_valid_tags[] = {
47  {"i", "{\\i1}"}, {"/i", "{\\i0}"},
48  {"b", "{\\b1}"}, {"/b", "{\\b0}"},
49  {"u", "{\\u1}"}, {"/u", "{\\u0}"},
50 };
51 
52 /* parse a WebVTT timestamp string (HH:MM:SS.mmm or MM:SS.mmm).
53  * Returns milliseconds or -1 on failure. */
54 static int64_t parse_webvtt_timestamp(const char *buf)
55 {
56  int h = 0, m = 0, s = 0, ms = 0;
57 
58  if (sscanf(buf, "%d:%2d:%2d.%3d", &h, &m, &s, &ms) == 4) {
59  if (m > 59 || s > 59)
60  return -1;
61  return (int64_t)h * 3600000 + m * 60000 + s * 1000 + ms;
62  }
63  if (sscanf(buf, "%2d:%2d.%3d", &m, &s, &ms) == 3) {
64  if (m > 59 || s > 59)
65  return -1;
66  return m * 60000 + s * 1000 + ms;
67  }
68 
69  return -1;
70 }
71 
72 /* validate a cue timestamp tag body: must be digits/colons/periods,
73  * parseable, strictly within (cue_start, cue_end), and after prev_ts.
74  * Returns 1 and writes to *ts_out on success, 0 on failure. */
75 static int read_cue_timestamp(const char *body, int len,
76  int64_t cue_start, int64_t cue_end,
77  int64_t prev_ts, int64_t *ts_out)
78 {
79  int64_t ts;
80 
81  if (len < 1 || !av_isdigit(body[0]))
82  return 0;
83  if ((int)strspn(body, "0123456789:.") != len)
84  return 0;
85 
87  if (ts <= cue_start || ts >= cue_end)
88  return 0;
89  if (prev_ts >= 0 && ts <= prev_ts)
90  return 0;
91 
92  *ts_out = ts;
93  return 1;
94 }
95 
96 /* Append the pending segment text, prefixed by a {\kf} karaoke override when
97  * dur_cs > 0. A {\kf} must precede the text it times, so segments are buffered
98  * in \seg and flushed once their duration (the span up to the next timestamp,
99  * or the cue end) is known. */
100 static void flush_segment(AVBPrint *buf, AVBPrint *seg, int64_t dur_cs)
101 {
102  if (dur_cs > 0)
103  av_bprintf(buf, "{\\kf%"PRId64"}", dur_cs);
104  av_bprintf(buf, "%s", seg->str);
105  av_bprint_clear(seg);
106 }
107 
108 static int webvtt_event_to_ass(AVBPrint *buf, const char *p,
109  int64_t cue_start_ms, int64_t cue_end_ms)
110 {
111  int i, again = 0;
112  int64_t prev_ts = -1, ts;
113  int64_t start_cs = 0; /* cs from cue start where the pending segment begins */
114  AVBPrint seg;
115 
117 
118  while (*p) {
119  if (*p == '<') {
120  const char *tag_end = strchr(p, '>');
121  ptrdiff_t len;
122  if (!tag_end)
123  break;
124  len = tag_end - p + 1;
125 
126  /* A cue timestamp ends the pending segment; flush it with its own
127  * duration (rounded against cue start, then differenced, so the
128  * durations telescope to the cue length without drift). */
129  if (len > 2 &&
130  read_cue_timestamp(p + 1, (int)(len - 2),
131  cue_start_ms, cue_end_ms, prev_ts, &ts)) {
132  int64_t end_cs = (ts - cue_start_ms + 5) / 10;
133  flush_segment(buf, &seg, end_cs - start_cs);
134  start_cs = end_cs;
135  prev_ts = ts;
136  p += len;
137  again = 1;
138  continue;
139  }
140 
141  for (i = 0; i < FF_ARRAY_ELEMS(webvtt_valid_tags); i++) {
142  const char *from = webvtt_valid_tags[i].from;
143  if(!strncmp(p + 1, from, strlen(from))) {
144  av_bprintf(&seg, "%s", webvtt_valid_tags[i].to);
145  break;
146  }
147  }
148  p += len;
149  again = 1;
150  }
151 
152  for (i = 0; i < FF_ARRAY_ELEMS(webvtt_tag_replace); i++) {
153  const char *from = webvtt_tag_replace[i].from;
154  const size_t len = strlen(from);
155  if (!strncmp(p, from, len)) {
156  av_bprintf(&seg, "%s", webvtt_tag_replace[i].to);
157  p += len;
158  again = 1;
159  break;
160  }
161  }
162 
163  if (again) {
164  again = 0;
165  continue;
166  }
167  if (p[0] == '\n' && p[1])
168  av_bprintf(&seg, "\\N");
169  else if (*p != '\r')
170  av_bprint_chars(&seg, *p, 1);
171  p++;
172  }
173 
174  /* Flush the final segment. With no cue timestamp this is the whole cue,
175  * emitted untimed (duration 0 -> no {\kf}). */
176  flush_segment(buf, &seg,
177  prev_ts < 0 ? 0 : (cue_end_ms - cue_start_ms + 5) / 10 - start_cs);
178 
179  av_bprint_finalize(&seg, NULL);
180  return 0;
181 }
182 
184  int *got_sub_ptr, const AVPacket *avpkt)
185 {
186  int ret = 0;
187  const char *ptr = avpkt->data;
188  FFASSDecoderContext *s = avctx->priv_data;
189  AVBPrint buf;
190  AVRational ms = { 1, 1000 };
191  int64_t start_ms = 0, end_ms = 0;
192 
193  /* Inline cue timestamps are absolute milliseconds on the media timeline.
194  * Convert the packet timing to the same unit via pkt_timebase instead of
195  * assuming a 1/1000 time base. If the timing is unknown, leave both at 0
196  * so every inline timestamp is rejected and the cue is emitted verbatim. */
197  if (avpkt->pts != AV_NOPTS_VALUE && avctx->pkt_timebase.num) {
198  start_ms = av_rescale_q(avpkt->pts, avctx->pkt_timebase, ms);
199  end_ms = start_ms + av_rescale_q(avpkt->duration, avctx->pkt_timebase, ms);
200  }
201 
203  if (ptr && avpkt->size > 0 &&
204  !webvtt_event_to_ass(&buf, ptr, start_ms, end_ms))
205  ret = ff_ass_add_rect(sub, buf.str, s->readorder++, 0, NULL, NULL);
206  av_bprint_finalize(&buf, NULL);
207  if (ret < 0)
208  return ret;
209  *got_sub_ptr = sub->num_rects > 0;
210  return avpkt->size;
211 }
212 
214  .p.name = "webvtt",
215  CODEC_LONG_NAME("WebVTT subtitle"),
216  .p.type = AVMEDIA_TYPE_SUBTITLE,
217  .p.id = AV_CODEC_ID_WEBVTT,
220  .flush = ff_ass_decoder_flush,
221  .priv_data_size = sizeof(FFASSDecoderContext),
222 };
AVSubtitle
Definition: avcodec.h:2100
webvtt_valid_tags
static const struct @364 webvtt_valid_tags[]
AVMEDIA_TYPE_SUBTITLE
@ AVMEDIA_TYPE_SUBTITLE
Definition: avutil.h:203
AV_BPRINT_SIZE_UNLIMITED
#define AV_BPRINT_SIZE_UNLIMITED
webvtt_event_to_ass
static int webvtt_event_to_ass(AVBPrint *buf, const char *p, int64_t cue_start_ms, int64_t cue_end_ms)
Definition: webvttdec.c:108
read_cue_timestamp
static int read_cue_timestamp(const char *body, int len, int64_t cue_start, int64_t cue_end, int64_t prev_ts, int64_t *ts_out)
Definition: webvttdec.c:75
parse_webvtt_timestamp
static int64_t parse_webvtt_timestamp(const char *buf)
Definition: webvttdec.c:54
ff_ass_subtitle_header_default
int ff_ass_subtitle_header_default(AVCodecContext *avctx)
Generate a suitable AVCodecContext.subtitle_header for SUBTITLE_ASS with default style.
Definition: ass.c:98
av_bprint_init
void av_bprint_init(AVBPrint *buf, unsigned size_init, unsigned size_max)
Definition: bprint.c:69
ff_webvtt_decoder
const FFCodec ff_webvtt_decoder
Definition: webvttdec.c:213
int64_t
long long int64_t
Definition: coverity.c:34
AVSubtitle::num_rects
unsigned num_rects
Definition: avcodec.h:2104
ff_ass_add_rect
int ff_ass_add_rect(AVSubtitle *sub, const char *dialog, int readorder, int layer, const char *style, const char *speaker)
Add an ASS dialog to a subtitle.
Definition: ass.c:159
AVPacket::data
uint8_t * data
Definition: packet.h:603
webvtt_decode_frame
static int webvtt_decode_frame(AVCodecContext *avctx, AVSubtitle *sub, int *got_sub_ptr, const AVPacket *avpkt)
Definition: webvttdec.c:183
FFCodec
Definition: codec_internal.h:127
AVPacket::duration
int64_t duration
Duration of this packet in AVStream->time_base units, 0 if unknown.
Definition: packet.h:621
mathematics.h
flush_segment
static void flush_segment(AVBPrint *buf, AVBPrint *seg, int64_t dur_cs)
Definition: webvttdec.c:100
FFCodec::p
AVCodec p
The public AVCodec.
Definition: codec_internal.h:131
AVRational::num
int num
Numerator.
Definition: rational.h:59
ass.h
FF_ARRAY_ELEMS
#define FF_ARRAY_ELEMS(a)
Definition: sinewin_tablegen.c:29
s
#define s(width, name)
Definition: cbs_vp9.c:198
av_rescale_q
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
CODEC_LONG_NAME
#define CODEC_LONG_NAME(str)
Definition: codec_internal.h:332
AV_CODEC_ID_WEBVTT
@ AV_CODEC_ID_WEBVTT
Definition: codec_id.h:592
NULL
#define NULL
Definition: coverity.c:32
AVRational
Rational number (pair of numerator and denominator).
Definition: rational.h:58
to
const char * to
Definition: webvttdec.c:36
AVPacket::size
int size
Definition: packet.h:604
av_bprint_finalize
int av_bprint_finalize(AVBPrint *buf, char **ret_str)
Finalize a print buffer.
Definition: bprint.c:235
codec_internal.h
i
#define i(width, name, range_min, range_max)
Definition: cbs_h264.c:63
AVCodecContext::pkt_timebase
AVRational pkt_timebase
Timebase in which pkt_dts/pts and AVPacket.dts/pts are expressed.
Definition: avcodec.h:554
from
const char * from
Definition: webvttdec.c:35
AV_NOPTS_VALUE
#define AV_NOPTS_VALUE
Undefined timestamp value.
Definition: avutil.h:247
av_isdigit
static av_const int av_isdigit(int c)
Locale-independent conversion of ASCII isdigit.
Definition: avstring.h:202
bprint.h
AVPacket::pts
int64_t pts
Presentation timestamp in AVStream->time_base units; the time at which the decompressed packet will b...
Definition: packet.h:596
AVCodec::name
const char * name
Name of the codec implementation.
Definition: codec.h:179
len
int len
Definition: vorbis_enc_data.h:426
avcodec.h
ret
ret
Definition: filter_design.txt:187
av_bprintf
void av_bprintf(AVBPrint *buf, const char *fmt,...)
Definition: bprint.c:122
AVCodecContext
main external API structure.
Definition: avcodec.h:443
again
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining again
Definition: filter_design.txt:25
av_bprint_clear
void av_bprint_clear(AVBPrint *buf)
Reset the string to "" but keep internal allocated data.
Definition: bprint.c:227
body
static void body(uint32_t ABCD[4], const uint8_t *src, size_t nblocks)
Definition: md5.c:103
Windows::Graphics::DirectX::Direct3D11::p
IDirect3DDxgiInterfaceAccess _COM_Outptr_ void ** p
Definition: vsrc_gfxcapture_winrt.hpp:53
FF_CODEC_DECODE_SUB_CB
#define FF_CODEC_DECODE_SUB_CB(func)
Definition: codec_internal.h:351
AVCodecContext::priv_data
void * priv_data
Definition: avcodec.h:470
AVPacket
This structure stores compressed data.
Definition: packet.h:580
webvtt_tag_replace
static const struct @363 webvtt_tag_replace[]
FFASSDecoderContext
Definition: ass.h:46
ff_ass_decoder_flush
av_cold void ff_ass_decoder_flush(AVCodecContext *avctx)
Helper to flush a text subtitles decoder making use of the FFASSDecoderContext.
Definition: ass.c:166
h
h
Definition: vp9dsp_template.c:2070
av_bprint_chars
void av_bprint_chars(AVBPrint *buf, char c, unsigned n)
Append char c n times to a print buffer.
Definition: bprint.c:130