1/*
2 * Copyright (c) 2012-2013 Clément Bœsch <u pkh me>
3 *
4 * This file is part of FFmpeg.
5 *
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
10 *
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 */
20
21#include "avformat.h"
22#include "subtitles.h"
23#include "avio_internal.h"
24#include "libavutil/avstring.h"
25
26void ff_text_init_avio(void *s, FFTextReader *r, AVIOContext *pb)
27{
28    int i;
29    r->pb = pb;
30    r->buf_pos = r->buf_len = 0;
31    r->type = FF_UTF_8;
32    for (i = 0; i < 2; i++)
33        r->buf[r->buf_len++] = avio_r8(r->pb);
34    if (strncmp("\xFF\xFE", r->buf, 2) == 0) {
35        r->type = FF_UTF16LE;
36        r->buf_pos += 2;
37    } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) {
38        r->type = FF_UTF16BE;
39        r->buf_pos += 2;
40    } else {
41        r->buf[r->buf_len++] = avio_r8(r->pb);
42        if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) {
43            // UTF8
44            r->buf_pos += 3;
45        }
46    }
47    if (s && (r->type == FF_UTF16LE || r->type == FF_UTF16BE))
48        av_log(s, AV_LOG_INFO,
49               "UTF16 is automatically converted to UTF8, do not specify a character encoding\n");
50}
51
52void ff_text_init_buf(FFTextReader *r, void *buf, size_t size)
53{
54    ffio_init_context(&r->buf_pb, buf, size, 0, NULL, NULL, NULL, NULL);
55    ff_text_init_avio(NULL, r, &r->buf_pb.pub);
56}
57
58int64_t ff_text_pos(FFTextReader *r)
59{
60    return avio_tell(r->pb) - r->buf_len + r->buf_pos;
61}
62
63int ff_text_r8(FFTextReader *r)
64{
65    uint32_t val;
66    uint8_t tmp;
67    if (r->buf_pos < r->buf_len)
68        return r->buf[r->buf_pos++];
69    if (r->type == FF_UTF16LE) {
70        GET_UTF16(val, avio_rl16(r->pb), return 0;)
71    } else if (r->type == FF_UTF16BE) {
72        GET_UTF16(val, avio_rb16(r->pb), return 0;)
73    } else {
74        return avio_r8(r->pb);
75    }
76    if (!val)
77        return 0;
78    r->buf_pos = 0;
79    r->buf_len = 0;
80    PUT_UTF8(val, tmp, r->buf[r->buf_len++] = tmp;)
81    return r->buf[r->buf_pos++]; // buf_len is at least 1
82}
83
84void ff_text_read(FFTextReader *r, char *buf, size_t size)
85{
86    for ( ; size > 0; size--)
87        *buf++ = ff_text_r8(r);
88}
89
90int ff_text_eof(FFTextReader *r)
91{
92    return r->buf_pos >= r->buf_len && avio_feof(r->pb);
93}
94
95int ff_text_peek_r8(FFTextReader *r)
96{
97    int c;
98    if (r->buf_pos < r->buf_len)
99        return r->buf[r->buf_pos];
100    c = ff_text_r8(r);
101    if (!avio_feof(r->pb)) {
102        r->buf_pos = 0;
103        r->buf_len = 1;
104        r->buf[0] = c;
105    }
106    return c;
107}
108
109AVPacket *ff_subtitles_queue_insert(FFDemuxSubtitlesQueue *q,
110                                    const uint8_t *event, size_t len, int merge)
111{
112    AVPacket **subs, *sub;
113
114    if (merge && q->nb_subs > 0) {
115        /* merge with previous event */
116
117        int old_len;
118        sub = q->subs[q->nb_subs - 1];
119        old_len = sub->size;
120        if (av_grow_packet(sub, len) < 0)
121            return NULL;
122        memcpy(sub->data + old_len, event, len);
123    } else {
124        /* new event */
125
126        if (q->nb_subs >= INT_MAX/sizeof(*q->subs) - 1)
127            return NULL;
128        subs = av_fast_realloc(q->subs, &q->allocated_size,
129                               (q->nb_subs + 1) * sizeof(*q->subs));
130        if (!subs)
131            return NULL;
132        q->subs = subs;
133        sub = av_packet_alloc();
134        if (!sub)
135            return NULL;
136        if (av_new_packet(sub, len) < 0) {
137            av_packet_free(&sub);
138            return NULL;
139        }
140        subs[q->nb_subs++] = sub;
141        sub->flags |= AV_PKT_FLAG_KEY;
142        sub->pts = sub->dts = 0;
143        memcpy(sub->data, event, len);
144    }
145    return sub;
146}
147
148static int cmp_pkt_sub_ts_pos(const void *a, const void *b)
149{
150    const AVPacket *s1 = *(const AVPacket **)a;
151    const AVPacket *s2 = *(const AVPacket **)b;
152    if (s1->pts == s2->pts)
153        return FFDIFFSIGN(s1->pos, s2->pos);
154    return FFDIFFSIGN(s1->pts , s2->pts);
155}
156
157static int cmp_pkt_sub_pos_ts(const void *a, const void *b)
158{
159    const AVPacket *s1 = *(const AVPacket **)a;
160    const AVPacket *s2 = *(const AVPacket **)b;
161    if (s1->pos == s2->pos) {
162        if (s1->pts == s2->pts)
163            return 0;
164        return s1->pts > s2->pts ? 1 : -1;
165    }
166    return s1->pos > s2->pos ? 1 : -1;
167}
168
169static void drop_dups(void *log_ctx, FFDemuxSubtitlesQueue *q)
170{
171    int i, drop = 0;
172
173    for (i = 1; i < q->nb_subs; i++) {
174        const int last_id = i - 1 - drop;
175        const AVPacket *last = q->subs[last_id];
176
177        if (q->subs[i]->pts        == last->pts &&
178            q->subs[i]->duration   == last->duration &&
179            q->subs[i]->stream_index == last->stream_index &&
180            !strcmp(q->subs[i]->data, last->data)) {
181
182            av_packet_free(&q->subs[i]);
183            drop++;
184        } else if (drop) {
185            q->subs[last_id + 1] = q->subs[i];
186            q->subs[i] = NULL;
187        }
188    }
189
190    if (drop) {
191        q->nb_subs -= drop;
192        av_log(log_ctx, AV_LOG_WARNING, "Dropping %d duplicated subtitle events\n", drop);
193    }
194}
195
196void ff_subtitles_queue_finalize(void *log_ctx, FFDemuxSubtitlesQueue *q)
197{
198    int i;
199
200    if (!q->nb_subs)
201        return;
202
203    qsort(q->subs, q->nb_subs, sizeof(*q->subs),
204          q->sort == SUB_SORT_TS_POS ? cmp_pkt_sub_ts_pos
205                                     : cmp_pkt_sub_pos_ts);
206    for (i = 0; i < q->nb_subs; i++)
207        if (q->subs[i]->duration < 0 && i < q->nb_subs - 1 && q->subs[i + 1]->pts - (uint64_t)q->subs[i]->pts <= INT64_MAX)
208            q->subs[i]->duration = q->subs[i + 1]->pts - q->subs[i]->pts;
209
210    if (!q->keep_duplicates)
211        drop_dups(log_ctx, q);
212}
213
214int ff_subtitles_queue_read_packet(FFDemuxSubtitlesQueue *q, AVPacket *pkt)
215{
216    AVPacket *sub;
217    int ret;
218
219    if (q->current_sub_idx == q->nb_subs)
220        return AVERROR_EOF;
221    sub = q->subs[q->current_sub_idx];
222    if ((ret = av_packet_ref(pkt, sub)) < 0) {
223        return ret;
224    }
225
226    pkt->dts = pkt->pts;
227    q->current_sub_idx++;
228    return 0;
229}
230
231static int search_sub_ts(const FFDemuxSubtitlesQueue *q, int64_t ts)
232{
233    int s1 = 0, s2 = q->nb_subs - 1;
234
235    if (s2 < s1)
236        return AVERROR(ERANGE);
237
238    for (;;) {
239        int mid;
240
241        if (s1 == s2)
242            return s1;
243        if (s1 == s2 - 1)
244            return q->subs[s1]->pts <= q->subs[s2]->pts ? s1 : s2;
245        mid = (s1 + s2) / 2;
246        if (q->subs[mid]->pts <= ts)
247            s1 = mid;
248        else
249            s2 = mid;
250    }
251}
252
253int ff_subtitles_queue_seek(FFDemuxSubtitlesQueue *q, AVFormatContext *s, int stream_index,
254                            int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
255{
256    if (flags & AVSEEK_FLAG_BYTE) {
257        return AVERROR(ENOSYS);
258    } else if (flags & AVSEEK_FLAG_FRAME) {
259        if (ts < 0 || ts >= q->nb_subs)
260            return AVERROR(ERANGE);
261        q->current_sub_idx = ts;
262    } else {
263        int i, idx = search_sub_ts(q, ts);
264        int64_t ts_selected;
265
266        if (idx < 0)
267            return idx;
268        for (i = idx; i < q->nb_subs && q->subs[i]->pts < min_ts; i++)
269            if (stream_index == -1 || q->subs[i]->stream_index == stream_index)
270                idx = i;
271        for (i = idx; i > 0 && q->subs[i]->pts > max_ts; i--)
272            if (stream_index == -1 || q->subs[i]->stream_index == stream_index)
273                idx = i;
274
275        ts_selected = q->subs[idx]->pts;
276        if (ts_selected < min_ts || ts_selected > max_ts)
277            return AVERROR(ERANGE);
278
279        /* look back in the latest subtitles for overlapping subtitles */
280        for (i = idx - 1; i >= 0; i--) {
281            int64_t pts = q->subs[i]->pts;
282            if (q->subs[i]->duration <= 0 ||
283                (stream_index != -1 && q->subs[i]->stream_index != stream_index))
284                continue;
285            if (pts >= min_ts && pts > ts_selected - q->subs[i]->duration)
286                idx = i;
287            else
288                break;
289        }
290
291#ifdef OHOS_SUBTITLE_DEMUXER
292        if (q->subs[idx]->pts + q->subs[idx]->duration < ts) {
293            if (idx < 1)
294                idx = 1;
295            for (i = idx - 1; i < q->nb_subs; i++) {
296                int64_t pts = q->subs[i]->pts;
297                if (q->subs[i]->duration <= 0 ||
298                    (stream_index != -1 && q->subs[i]->stream_index != stream_index))
299                    continue;
300                if (pts + q->subs[i]->duration >= ts) {
301                    idx = i;
302                    break;
303                }
304            }
305        }
306#endif
307        /* If the queue is used to store multiple subtitles streams (like with
308         * VobSub) and the stream index is not specified, we need to make sure
309         * to focus on the smallest file position offset for a same timestamp;
310         * queue is ordered by pts and then filepos, so we can take the first
311         * entry for a given timestamp. */
312        if (stream_index == -1)
313            while (idx > 0 && q->subs[idx - 1]->pts == q->subs[idx]->pts)
314                idx--;
315
316        q->current_sub_idx = idx;
317    }
318    return 0;
319}
320
321void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q)
322{
323    int i;
324
325    for (i = 0; i < q->nb_subs; i++)
326        av_packet_free(&q->subs[i]);
327    av_freep(&q->subs);
328    q->nb_subs = q->allocated_size = q->current_sub_idx = 0;
329}
330
331int ff_subtitles_read_packet(AVFormatContext *s, AVPacket *pkt)
332{
333    FFDemuxSubtitlesQueue *q = s->priv_data;
334    return ff_subtitles_queue_read_packet(q, pkt);
335}
336
337int ff_subtitles_read_seek(AVFormatContext *s, int stream_index,
338                           int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
339{
340    FFDemuxSubtitlesQueue *q = s->priv_data;
341    return ff_subtitles_queue_seek(q, s, stream_index,
342                                   min_ts, ts, max_ts, flags);
343}
344
345int ff_subtitles_read_close(AVFormatContext *s)
346{
347    FFDemuxSubtitlesQueue *q = s->priv_data;
348    ff_subtitles_queue_clean(q);
349    return 0;
350}
351
352int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c)
353{
354    int i = 0;
355    char end_chr;
356
357    if (!*c) // cached char?
358        *c = ff_text_r8(tr);
359    if (!*c)
360        return 0;
361
362    end_chr = *c == '<' ? '>' : '<';
363    do {
364        av_bprint_chars(buf, *c, 1);
365        *c = ff_text_r8(tr);
366        i++;
367    } while (*c != end_chr && *c);
368    if (end_chr == '>') {
369        av_bprint_chars(buf, '>', 1);
370        *c = 0;
371    }
372    return i;
373}
374
375const char *ff_smil_get_attr_ptr(const char *s, const char *attr)
376{
377    int in_quotes = 0;
378    const size_t len = strlen(attr);
379
380    while (*s) {
381        while (*s) {
382            if (!in_quotes && av_isspace(*s))
383                break;
384            in_quotes ^= *s == '"'; // XXX: support escaping?
385            s++;
386        }
387        while (av_isspace(*s))
388            s++;
389        if (!av_strncasecmp(s, attr, len) && s[len] == '=')
390            return s + len + 1 + (s[len + 1] == '"');
391    }
392    return NULL;
393}
394
395static inline int is_eol(char c)
396{
397    return c == '\r' || c == '\n';
398}
399
400void ff_subtitles_read_text_chunk(FFTextReader *tr, AVBPrint *buf)
401{
402    char eol_buf[5], last_was_cr = 0;
403    int n = 0, i = 0, nb_eol = 0;
404
405    av_bprint_clear(buf);
406
407    for (;;) {
408        char c = ff_text_r8(tr);
409
410        if (!c)
411            break;
412
413        /* ignore all initial line breaks */
414        if (n == 0 && is_eol(c))
415            continue;
416
417        /* line break buffering: we don't want to add the trailing \r\n */
418        if (is_eol(c)) {
419            nb_eol += c == '\n' || last_was_cr;
420            if (nb_eol == 2)
421                break;
422            eol_buf[i++] = c;
423            if (i == sizeof(eol_buf) - 1)
424                break;
425            last_was_cr = c == '\r';
426            continue;
427        }
428
429        /* only one line break followed by data: we flush the line breaks
430         * buffer */
431        if (i) {
432            eol_buf[i] = 0;
433            av_bprintf(buf, "%s", eol_buf);
434            i = nb_eol = 0;
435        }
436
437        av_bprint_chars(buf, c, 1);
438        n++;
439    }
440}
441
442void ff_subtitles_read_chunk(AVIOContext *pb, AVBPrint *buf)
443{
444    FFTextReader tr;
445    tr.buf_pos = tr.buf_len = 0;
446    tr.type = 0;
447    tr.pb = pb;
448    ff_subtitles_read_text_chunk(&tr, buf);
449}
450
451ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size)
452{
453    size_t cur = 0;
454    if (!size)
455        return 0;
456    buf[0] = '\0';
457    while (cur + 1 < size) {
458        unsigned char c = ff_text_r8(tr);
459        if (!c)
460            return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA;
461        if (c == '\r' || c == '\n')
462            break;
463        buf[cur++] = c;
464        buf[cur] = '\0';
465    }
466    while (ff_text_peek_r8(tr) == '\r')
467        ff_text_r8(tr);
468    if (ff_text_peek_r8(tr) == '\n')
469        ff_text_r8(tr);
470    return cur;
471}
472