153a5a1b3Sopenharmony_ci/***
253a5a1b3Sopenharmony_ci  This file is part of PulseAudio.
353a5a1b3Sopenharmony_ci
453a5a1b3Sopenharmony_ci  Copyright 2006 Lennart Poettering
553a5a1b3Sopenharmony_ci  Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB
653a5a1b3Sopenharmony_ci
753a5a1b3Sopenharmony_ci  PulseAudio is free software; you can redistribute it and/or modify
853a5a1b3Sopenharmony_ci  it under the terms of the GNU Lesser General Public License as
953a5a1b3Sopenharmony_ci  published by the Free Software Foundation; either version 2.1 of the
1053a5a1b3Sopenharmony_ci  License, or (at your option) any later version.
1153a5a1b3Sopenharmony_ci
1253a5a1b3Sopenharmony_ci  PulseAudio is distributed in the hope that it will be useful, but
1353a5a1b3Sopenharmony_ci  WITHOUT ANY WARRANTY; without even the implied warranty of
1453a5a1b3Sopenharmony_ci  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1553a5a1b3Sopenharmony_ci  Lesser General Public License for more details.
1653a5a1b3Sopenharmony_ci
1753a5a1b3Sopenharmony_ci  You should have received a copy of the GNU Lesser General Public
1853a5a1b3Sopenharmony_ci  License along with PulseAudio; if not, see <http://www.gnu.org/licenses/>.
1953a5a1b3Sopenharmony_ci***/
2053a5a1b3Sopenharmony_ci
2153a5a1b3Sopenharmony_ci/* This file is based on the GLIB utf8 validation functions. The
2253a5a1b3Sopenharmony_ci * original license text follows. */
2353a5a1b3Sopenharmony_ci
2453a5a1b3Sopenharmony_ci/* gutf8.c - Operations on UTF-8 strings.
2553a5a1b3Sopenharmony_ci *
2653a5a1b3Sopenharmony_ci * Copyright (C) 1999 Tom Tromey
2753a5a1b3Sopenharmony_ci * Copyright (C) 2000 Red Hat, Inc.
2853a5a1b3Sopenharmony_ci *
2953a5a1b3Sopenharmony_ci * This library is free software; you can redistribute it and/or
3053a5a1b3Sopenharmony_ci * modify it under the terms of the GNU Lesser General Public
3153a5a1b3Sopenharmony_ci * License as published by the Free Software Foundation; either
3253a5a1b3Sopenharmony_ci * version 2 of the License, or (at your option) any later version.
3353a5a1b3Sopenharmony_ci *
3453a5a1b3Sopenharmony_ci * This library is distributed in the hope that it will be useful,
3553a5a1b3Sopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of
3653a5a1b3Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
3753a5a1b3Sopenharmony_ci * Lesser General Public License for more details.
3853a5a1b3Sopenharmony_ci *
3953a5a1b3Sopenharmony_ci * You should have received a copy of the GNU Lesser General Public
4053a5a1b3Sopenharmony_ci * License along with this library; if not, see <http://www.gnu.org/licenses/>.
4153a5a1b3Sopenharmony_ci */
4253a5a1b3Sopenharmony_ci
4353a5a1b3Sopenharmony_ci#ifdef HAVE_CONFIG_H
4453a5a1b3Sopenharmony_ci#include <config.h>
4553a5a1b3Sopenharmony_ci#endif
4653a5a1b3Sopenharmony_ci
4753a5a1b3Sopenharmony_ci#include <errno.h>
4853a5a1b3Sopenharmony_ci#include <stdlib.h>
4953a5a1b3Sopenharmony_ci#include <inttypes.h>
5053a5a1b3Sopenharmony_ci#include <string.h>
5153a5a1b3Sopenharmony_ci
5253a5a1b3Sopenharmony_ci#ifdef HAVE_ICONV
5353a5a1b3Sopenharmony_ci#include <iconv.h>
5453a5a1b3Sopenharmony_ci#endif
5553a5a1b3Sopenharmony_ci
5653a5a1b3Sopenharmony_ci#include <pulse/xmalloc.h>
5753a5a1b3Sopenharmony_ci#include <pulsecore/macro.h>
5853a5a1b3Sopenharmony_ci
5953a5a1b3Sopenharmony_ci#include "utf8.h"
6053a5a1b3Sopenharmony_ci
6153a5a1b3Sopenharmony_ci#define FILTER_CHAR '_'
6253a5a1b3Sopenharmony_ci
6353a5a1b3Sopenharmony_cistatic inline bool is_unicode_valid(uint32_t ch) {
6453a5a1b3Sopenharmony_ci
6553a5a1b3Sopenharmony_ci    if (ch >= 0x110000) /* End of unicode space */
6653a5a1b3Sopenharmony_ci        return false;
6753a5a1b3Sopenharmony_ci    if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */
6853a5a1b3Sopenharmony_ci        return false;
6953a5a1b3Sopenharmony_ci    if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */
7053a5a1b3Sopenharmony_ci        return false;
7153a5a1b3Sopenharmony_ci    if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */
7253a5a1b3Sopenharmony_ci        return false;
7353a5a1b3Sopenharmony_ci
7453a5a1b3Sopenharmony_ci    return true;
7553a5a1b3Sopenharmony_ci}
7653a5a1b3Sopenharmony_ci
7753a5a1b3Sopenharmony_cistatic inline bool is_continuation_char(uint8_t ch) {
7853a5a1b3Sopenharmony_ci    if ((ch & 0xc0) != 0x80) /* 10xxxxxx */
7953a5a1b3Sopenharmony_ci        return false;
8053a5a1b3Sopenharmony_ci    return true;
8153a5a1b3Sopenharmony_ci}
8253a5a1b3Sopenharmony_ci
8353a5a1b3Sopenharmony_cistatic inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) {
8453a5a1b3Sopenharmony_ci    *u_ch <<= 6;
8553a5a1b3Sopenharmony_ci    *u_ch |= ch & 0x3f;
8653a5a1b3Sopenharmony_ci}
8753a5a1b3Sopenharmony_ci
8853a5a1b3Sopenharmony_cistatic char* utf8_validate(const char *str, char *output) {
8953a5a1b3Sopenharmony_ci    uint32_t val = 0;
9053a5a1b3Sopenharmony_ci    uint32_t min = 0;
9153a5a1b3Sopenharmony_ci    const uint8_t *p, *last;
9253a5a1b3Sopenharmony_ci    int size;
9353a5a1b3Sopenharmony_ci    uint8_t *o;
9453a5a1b3Sopenharmony_ci
9553a5a1b3Sopenharmony_ci    pa_assert(str);
9653a5a1b3Sopenharmony_ci
9753a5a1b3Sopenharmony_ci    o = (uint8_t*) output;
9853a5a1b3Sopenharmony_ci    for (p = (const uint8_t*) str; *p; p++) {
9953a5a1b3Sopenharmony_ci        if (*p < 128) {
10053a5a1b3Sopenharmony_ci            if (o)
10153a5a1b3Sopenharmony_ci                *o = *p;
10253a5a1b3Sopenharmony_ci        } else {
10353a5a1b3Sopenharmony_ci            last = p;
10453a5a1b3Sopenharmony_ci
10553a5a1b3Sopenharmony_ci            if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */
10653a5a1b3Sopenharmony_ci                size = 2;
10753a5a1b3Sopenharmony_ci                min = 128;
10853a5a1b3Sopenharmony_ci                val = (uint32_t) (*p & 0x1e);
10953a5a1b3Sopenharmony_ci                goto ONE_REMAINING;
11053a5a1b3Sopenharmony_ci            } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/
11153a5a1b3Sopenharmony_ci                size = 3;
11253a5a1b3Sopenharmony_ci                min = (1 << 11);
11353a5a1b3Sopenharmony_ci                val = (uint32_t) (*p & 0x0f);
11453a5a1b3Sopenharmony_ci                goto TWO_REMAINING;
11553a5a1b3Sopenharmony_ci            } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */
11653a5a1b3Sopenharmony_ci                size = 4;
11753a5a1b3Sopenharmony_ci                min = (1 << 16);
11853a5a1b3Sopenharmony_ci                val = (uint32_t) (*p & 0x07);
11953a5a1b3Sopenharmony_ci            } else
12053a5a1b3Sopenharmony_ci                goto error;
12153a5a1b3Sopenharmony_ci
12253a5a1b3Sopenharmony_ci            p++;
12353a5a1b3Sopenharmony_ci            if (!is_continuation_char(*p))
12453a5a1b3Sopenharmony_ci                goto error;
12553a5a1b3Sopenharmony_ci            merge_continuation_char(&val, *p);
12653a5a1b3Sopenharmony_ci
12753a5a1b3Sopenharmony_ciTWO_REMAINING:
12853a5a1b3Sopenharmony_ci            p++;
12953a5a1b3Sopenharmony_ci            if (!is_continuation_char(*p))
13053a5a1b3Sopenharmony_ci                goto error;
13153a5a1b3Sopenharmony_ci            merge_continuation_char(&val, *p);
13253a5a1b3Sopenharmony_ci
13353a5a1b3Sopenharmony_ciONE_REMAINING:
13453a5a1b3Sopenharmony_ci            p++;
13553a5a1b3Sopenharmony_ci            if (!is_continuation_char(*p))
13653a5a1b3Sopenharmony_ci                goto error;
13753a5a1b3Sopenharmony_ci            merge_continuation_char(&val, *p);
13853a5a1b3Sopenharmony_ci
13953a5a1b3Sopenharmony_ci            if (val < min)
14053a5a1b3Sopenharmony_ci                goto error;
14153a5a1b3Sopenharmony_ci
14253a5a1b3Sopenharmony_ci            if (!is_unicode_valid(val))
14353a5a1b3Sopenharmony_ci                goto error;
14453a5a1b3Sopenharmony_ci
14553a5a1b3Sopenharmony_ci            if (o) {
14653a5a1b3Sopenharmony_ci                memcpy(o, last, (size_t) size);
14753a5a1b3Sopenharmony_ci                o += size;
14853a5a1b3Sopenharmony_ci            }
14953a5a1b3Sopenharmony_ci
15053a5a1b3Sopenharmony_ci            continue;
15153a5a1b3Sopenharmony_ci
15253a5a1b3Sopenharmony_cierror:
15353a5a1b3Sopenharmony_ci            if (o) {
15453a5a1b3Sopenharmony_ci                *o = FILTER_CHAR;
15553a5a1b3Sopenharmony_ci                p = last; /* We retry at the next character */
15653a5a1b3Sopenharmony_ci            } else
15753a5a1b3Sopenharmony_ci                goto failure;
15853a5a1b3Sopenharmony_ci        }
15953a5a1b3Sopenharmony_ci
16053a5a1b3Sopenharmony_ci        if (o)
16153a5a1b3Sopenharmony_ci            o++;
16253a5a1b3Sopenharmony_ci    }
16353a5a1b3Sopenharmony_ci
16453a5a1b3Sopenharmony_ci    if (o) {
16553a5a1b3Sopenharmony_ci        *o = '\0';
16653a5a1b3Sopenharmony_ci        return output;
16753a5a1b3Sopenharmony_ci    }
16853a5a1b3Sopenharmony_ci
16953a5a1b3Sopenharmony_ci    return (char*) str;
17053a5a1b3Sopenharmony_ci
17153a5a1b3Sopenharmony_cifailure:
17253a5a1b3Sopenharmony_ci    return NULL;
17353a5a1b3Sopenharmony_ci}
17453a5a1b3Sopenharmony_ci
17553a5a1b3Sopenharmony_cichar* pa_utf8_valid (const char *str) {
17653a5a1b3Sopenharmony_ci    return utf8_validate(str, NULL);
17753a5a1b3Sopenharmony_ci}
17853a5a1b3Sopenharmony_ci
17953a5a1b3Sopenharmony_cichar* pa_utf8_filter (const char *str) {
18053a5a1b3Sopenharmony_ci    char *new_str;
18153a5a1b3Sopenharmony_ci
18253a5a1b3Sopenharmony_ci    pa_assert(str);
18353a5a1b3Sopenharmony_ci    new_str = pa_xmalloc(strlen(str) + 1);
18453a5a1b3Sopenharmony_ci    return utf8_validate(str, new_str);
18553a5a1b3Sopenharmony_ci}
18653a5a1b3Sopenharmony_ci
18753a5a1b3Sopenharmony_ci#ifdef HAVE_ICONV
18853a5a1b3Sopenharmony_ci
18953a5a1b3Sopenharmony_cistatic char* iconv_simple(const char *str, const char *to, const char *from) {
19053a5a1b3Sopenharmony_ci    char *new_str;
19153a5a1b3Sopenharmony_ci    size_t len, inlen;
19253a5a1b3Sopenharmony_ci    iconv_t cd;
19353a5a1b3Sopenharmony_ci    ICONV_CONST char *inbuf;
19453a5a1b3Sopenharmony_ci    char *outbuf;
19553a5a1b3Sopenharmony_ci    size_t res, inbytes, outbytes;
19653a5a1b3Sopenharmony_ci
19753a5a1b3Sopenharmony_ci    pa_assert(str);
19853a5a1b3Sopenharmony_ci    pa_assert(to);
19953a5a1b3Sopenharmony_ci    pa_assert(from);
20053a5a1b3Sopenharmony_ci
20153a5a1b3Sopenharmony_ci    cd = iconv_open(to, from);
20253a5a1b3Sopenharmony_ci    if (cd == (iconv_t)-1)
20353a5a1b3Sopenharmony_ci        return NULL;
20453a5a1b3Sopenharmony_ci
20553a5a1b3Sopenharmony_ci    inlen = len = strlen(str) + 1;
20653a5a1b3Sopenharmony_ci    new_str = pa_xmalloc(len);
20753a5a1b3Sopenharmony_ci
20853a5a1b3Sopenharmony_ci    for (;;) {
20953a5a1b3Sopenharmony_ci        inbuf = (ICONV_CONST char*) str; /* Brain dead prototype for iconv() */
21053a5a1b3Sopenharmony_ci        inbytes = inlen;
21153a5a1b3Sopenharmony_ci        outbuf = new_str;
21253a5a1b3Sopenharmony_ci        outbytes = len;
21353a5a1b3Sopenharmony_ci
21453a5a1b3Sopenharmony_ci        res = iconv(cd, &inbuf, &inbytes, &outbuf, &outbytes);
21553a5a1b3Sopenharmony_ci
21653a5a1b3Sopenharmony_ci        if (res != (size_t)-1)
21753a5a1b3Sopenharmony_ci            break;
21853a5a1b3Sopenharmony_ci
21953a5a1b3Sopenharmony_ci        if (errno != E2BIG) {
22053a5a1b3Sopenharmony_ci            pa_xfree(new_str);
22153a5a1b3Sopenharmony_ci            new_str = NULL;
22253a5a1b3Sopenharmony_ci            break;
22353a5a1b3Sopenharmony_ci        }
22453a5a1b3Sopenharmony_ci
22553a5a1b3Sopenharmony_ci        pa_assert(inbytes != 0);
22653a5a1b3Sopenharmony_ci
22753a5a1b3Sopenharmony_ci        len += inbytes;
22853a5a1b3Sopenharmony_ci        new_str = pa_xrealloc(new_str, len);
22953a5a1b3Sopenharmony_ci    }
23053a5a1b3Sopenharmony_ci
23153a5a1b3Sopenharmony_ci    iconv_close(cd);
23253a5a1b3Sopenharmony_ci
23353a5a1b3Sopenharmony_ci    return new_str;
23453a5a1b3Sopenharmony_ci}
23553a5a1b3Sopenharmony_ci
23653a5a1b3Sopenharmony_cichar* pa_utf8_to_locale (const char *str) {
23753a5a1b3Sopenharmony_ci    return iconv_simple(str, "", "UTF-8");
23853a5a1b3Sopenharmony_ci}
23953a5a1b3Sopenharmony_ci
24053a5a1b3Sopenharmony_cichar* pa_locale_to_utf8 (const char *str) {
24153a5a1b3Sopenharmony_ci    return iconv_simple(str, "UTF-8", "");
24253a5a1b3Sopenharmony_ci}
24353a5a1b3Sopenharmony_ci
24453a5a1b3Sopenharmony_ci#else
24553a5a1b3Sopenharmony_ci
24653a5a1b3Sopenharmony_cichar* pa_utf8_to_locale (const char *str) {
24753a5a1b3Sopenharmony_ci    pa_assert(str);
24853a5a1b3Sopenharmony_ci
24953a5a1b3Sopenharmony_ci    return pa_ascii_filter(str);
25053a5a1b3Sopenharmony_ci}
25153a5a1b3Sopenharmony_ci
25253a5a1b3Sopenharmony_cichar* pa_locale_to_utf8 (const char *str) {
25353a5a1b3Sopenharmony_ci    pa_assert(str);
25453a5a1b3Sopenharmony_ci
25553a5a1b3Sopenharmony_ci    if (pa_utf8_valid(str))
25653a5a1b3Sopenharmony_ci        return pa_xstrdup(str);
25753a5a1b3Sopenharmony_ci
25853a5a1b3Sopenharmony_ci    return NULL;
25953a5a1b3Sopenharmony_ci}
26053a5a1b3Sopenharmony_ci
26153a5a1b3Sopenharmony_ci#endif
26253a5a1b3Sopenharmony_ci
26353a5a1b3Sopenharmony_cichar *pa_ascii_valid(const char *str) {
26453a5a1b3Sopenharmony_ci    const char *p;
26553a5a1b3Sopenharmony_ci    pa_assert(str);
26653a5a1b3Sopenharmony_ci
26753a5a1b3Sopenharmony_ci    for (p = str; *p; p++)
26853a5a1b3Sopenharmony_ci        if ((unsigned char) *p >= 128)
26953a5a1b3Sopenharmony_ci            return NULL;
27053a5a1b3Sopenharmony_ci
27153a5a1b3Sopenharmony_ci    return (char*) str;
27253a5a1b3Sopenharmony_ci}
27353a5a1b3Sopenharmony_ci
27453a5a1b3Sopenharmony_cichar *pa_ascii_filter(const char *str) {
27553a5a1b3Sopenharmony_ci    char *r, *s, *d;
27653a5a1b3Sopenharmony_ci    pa_assert(str);
27753a5a1b3Sopenharmony_ci
27853a5a1b3Sopenharmony_ci    r = pa_xstrdup(str);
27953a5a1b3Sopenharmony_ci
28053a5a1b3Sopenharmony_ci    for (s = r, d = r; *s; s++)
28153a5a1b3Sopenharmony_ci        if ((unsigned char) *s < 128)
28253a5a1b3Sopenharmony_ci            *(d++) = *s;
28353a5a1b3Sopenharmony_ci
28453a5a1b3Sopenharmony_ci    *d = 0;
28553a5a1b3Sopenharmony_ci
28653a5a1b3Sopenharmony_ci    return r;
28753a5a1b3Sopenharmony_ci}
288