153a5a1b3Sopenharmony_ci/*** 253a5a1b3Sopenharmony_ci This file is part of PulseAudio. 353a5a1b3Sopenharmony_ci 453a5a1b3Sopenharmony_ci Copyright 2006 Lennart Poettering 553a5a1b3Sopenharmony_ci Copyright 2006 Pierre Ossman <ossman@cendio.se> for Cendio AB 653a5a1b3Sopenharmony_ci 753a5a1b3Sopenharmony_ci PulseAudio is free software; you can redistribute it and/or modify 853a5a1b3Sopenharmony_ci it under the terms of the GNU Lesser General Public License as 953a5a1b3Sopenharmony_ci published by the Free Software Foundation; either version 2.1 of the 1053a5a1b3Sopenharmony_ci License, or (at your option) any later version. 1153a5a1b3Sopenharmony_ci 1253a5a1b3Sopenharmony_ci PulseAudio is distributed in the hope that it will be useful, but 1353a5a1b3Sopenharmony_ci WITHOUT ANY WARRANTY; without even the implied warranty of 1453a5a1b3Sopenharmony_ci MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 1553a5a1b3Sopenharmony_ci Lesser General Public License for more details. 1653a5a1b3Sopenharmony_ci 1753a5a1b3Sopenharmony_ci You should have received a copy of the GNU Lesser General Public 1853a5a1b3Sopenharmony_ci License along with PulseAudio; if not, see <http://www.gnu.org/licenses/>. 1953a5a1b3Sopenharmony_ci***/ 2053a5a1b3Sopenharmony_ci 2153a5a1b3Sopenharmony_ci/* This file is based on the GLIB utf8 validation functions. The 2253a5a1b3Sopenharmony_ci * original license text follows. */ 2353a5a1b3Sopenharmony_ci 2453a5a1b3Sopenharmony_ci/* gutf8.c - Operations on UTF-8 strings. 2553a5a1b3Sopenharmony_ci * 2653a5a1b3Sopenharmony_ci * Copyright (C) 1999 Tom Tromey 2753a5a1b3Sopenharmony_ci * Copyright (C) 2000 Red Hat, Inc. 2853a5a1b3Sopenharmony_ci * 2953a5a1b3Sopenharmony_ci * This library is free software; you can redistribute it and/or 3053a5a1b3Sopenharmony_ci * modify it under the terms of the GNU Lesser General Public 3153a5a1b3Sopenharmony_ci * License as published by the Free Software Foundation; either 3253a5a1b3Sopenharmony_ci * version 2 of the License, or (at your option) any later version. 3353a5a1b3Sopenharmony_ci * 3453a5a1b3Sopenharmony_ci * This library is distributed in the hope that it will be useful, 3553a5a1b3Sopenharmony_ci * but WITHOUT ANY WARRANTY; without even the implied warranty of 3653a5a1b3Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 3753a5a1b3Sopenharmony_ci * Lesser General Public License for more details. 3853a5a1b3Sopenharmony_ci * 3953a5a1b3Sopenharmony_ci * You should have received a copy of the GNU Lesser General Public 4053a5a1b3Sopenharmony_ci * License along with this library; if not, see <http://www.gnu.org/licenses/>. 4153a5a1b3Sopenharmony_ci */ 4253a5a1b3Sopenharmony_ci 4353a5a1b3Sopenharmony_ci#ifdef HAVE_CONFIG_H 4453a5a1b3Sopenharmony_ci#include <config.h> 4553a5a1b3Sopenharmony_ci#endif 4653a5a1b3Sopenharmony_ci 4753a5a1b3Sopenharmony_ci#include <errno.h> 4853a5a1b3Sopenharmony_ci#include <stdlib.h> 4953a5a1b3Sopenharmony_ci#include <inttypes.h> 5053a5a1b3Sopenharmony_ci#include <string.h> 5153a5a1b3Sopenharmony_ci 5253a5a1b3Sopenharmony_ci#ifdef HAVE_ICONV 5353a5a1b3Sopenharmony_ci#include <iconv.h> 5453a5a1b3Sopenharmony_ci#endif 5553a5a1b3Sopenharmony_ci 5653a5a1b3Sopenharmony_ci#include <pulse/xmalloc.h> 5753a5a1b3Sopenharmony_ci#include <pulsecore/macro.h> 5853a5a1b3Sopenharmony_ci 5953a5a1b3Sopenharmony_ci#include "utf8.h" 6053a5a1b3Sopenharmony_ci 6153a5a1b3Sopenharmony_ci#define FILTER_CHAR '_' 6253a5a1b3Sopenharmony_ci 6353a5a1b3Sopenharmony_cistatic inline bool is_unicode_valid(uint32_t ch) { 6453a5a1b3Sopenharmony_ci 6553a5a1b3Sopenharmony_ci if (ch >= 0x110000) /* End of unicode space */ 6653a5a1b3Sopenharmony_ci return false; 6753a5a1b3Sopenharmony_ci if ((ch & 0xFFFFF800) == 0xD800) /* Reserved area for UTF-16 */ 6853a5a1b3Sopenharmony_ci return false; 6953a5a1b3Sopenharmony_ci if ((ch >= 0xFDD0) && (ch <= 0xFDEF)) /* Reserved */ 7053a5a1b3Sopenharmony_ci return false; 7153a5a1b3Sopenharmony_ci if ((ch & 0xFFFE) == 0xFFFE) /* BOM (Byte Order Mark) */ 7253a5a1b3Sopenharmony_ci return false; 7353a5a1b3Sopenharmony_ci 7453a5a1b3Sopenharmony_ci return true; 7553a5a1b3Sopenharmony_ci} 7653a5a1b3Sopenharmony_ci 7753a5a1b3Sopenharmony_cistatic inline bool is_continuation_char(uint8_t ch) { 7853a5a1b3Sopenharmony_ci if ((ch & 0xc0) != 0x80) /* 10xxxxxx */ 7953a5a1b3Sopenharmony_ci return false; 8053a5a1b3Sopenharmony_ci return true; 8153a5a1b3Sopenharmony_ci} 8253a5a1b3Sopenharmony_ci 8353a5a1b3Sopenharmony_cistatic inline void merge_continuation_char(uint32_t *u_ch, uint8_t ch) { 8453a5a1b3Sopenharmony_ci *u_ch <<= 6; 8553a5a1b3Sopenharmony_ci *u_ch |= ch & 0x3f; 8653a5a1b3Sopenharmony_ci} 8753a5a1b3Sopenharmony_ci 8853a5a1b3Sopenharmony_cistatic char* utf8_validate(const char *str, char *output) { 8953a5a1b3Sopenharmony_ci uint32_t val = 0; 9053a5a1b3Sopenharmony_ci uint32_t min = 0; 9153a5a1b3Sopenharmony_ci const uint8_t *p, *last; 9253a5a1b3Sopenharmony_ci int size; 9353a5a1b3Sopenharmony_ci uint8_t *o; 9453a5a1b3Sopenharmony_ci 9553a5a1b3Sopenharmony_ci pa_assert(str); 9653a5a1b3Sopenharmony_ci 9753a5a1b3Sopenharmony_ci o = (uint8_t*) output; 9853a5a1b3Sopenharmony_ci for (p = (const uint8_t*) str; *p; p++) { 9953a5a1b3Sopenharmony_ci if (*p < 128) { 10053a5a1b3Sopenharmony_ci if (o) 10153a5a1b3Sopenharmony_ci *o = *p; 10253a5a1b3Sopenharmony_ci } else { 10353a5a1b3Sopenharmony_ci last = p; 10453a5a1b3Sopenharmony_ci 10553a5a1b3Sopenharmony_ci if ((*p & 0xe0) == 0xc0) { /* 110xxxxx two-char seq. */ 10653a5a1b3Sopenharmony_ci size = 2; 10753a5a1b3Sopenharmony_ci min = 128; 10853a5a1b3Sopenharmony_ci val = (uint32_t) (*p & 0x1e); 10953a5a1b3Sopenharmony_ci goto ONE_REMAINING; 11053a5a1b3Sopenharmony_ci } else if ((*p & 0xf0) == 0xe0) { /* 1110xxxx three-char seq.*/ 11153a5a1b3Sopenharmony_ci size = 3; 11253a5a1b3Sopenharmony_ci min = (1 << 11); 11353a5a1b3Sopenharmony_ci val = (uint32_t) (*p & 0x0f); 11453a5a1b3Sopenharmony_ci goto TWO_REMAINING; 11553a5a1b3Sopenharmony_ci } else if ((*p & 0xf8) == 0xf0) { /* 11110xxx four-char seq */ 11653a5a1b3Sopenharmony_ci size = 4; 11753a5a1b3Sopenharmony_ci min = (1 << 16); 11853a5a1b3Sopenharmony_ci val = (uint32_t) (*p & 0x07); 11953a5a1b3Sopenharmony_ci } else 12053a5a1b3Sopenharmony_ci goto error; 12153a5a1b3Sopenharmony_ci 12253a5a1b3Sopenharmony_ci p++; 12353a5a1b3Sopenharmony_ci if (!is_continuation_char(*p)) 12453a5a1b3Sopenharmony_ci goto error; 12553a5a1b3Sopenharmony_ci merge_continuation_char(&val, *p); 12653a5a1b3Sopenharmony_ci 12753a5a1b3Sopenharmony_ciTWO_REMAINING: 12853a5a1b3Sopenharmony_ci p++; 12953a5a1b3Sopenharmony_ci if (!is_continuation_char(*p)) 13053a5a1b3Sopenharmony_ci goto error; 13153a5a1b3Sopenharmony_ci merge_continuation_char(&val, *p); 13253a5a1b3Sopenharmony_ci 13353a5a1b3Sopenharmony_ciONE_REMAINING: 13453a5a1b3Sopenharmony_ci p++; 13553a5a1b3Sopenharmony_ci if (!is_continuation_char(*p)) 13653a5a1b3Sopenharmony_ci goto error; 13753a5a1b3Sopenharmony_ci merge_continuation_char(&val, *p); 13853a5a1b3Sopenharmony_ci 13953a5a1b3Sopenharmony_ci if (val < min) 14053a5a1b3Sopenharmony_ci goto error; 14153a5a1b3Sopenharmony_ci 14253a5a1b3Sopenharmony_ci if (!is_unicode_valid(val)) 14353a5a1b3Sopenharmony_ci goto error; 14453a5a1b3Sopenharmony_ci 14553a5a1b3Sopenharmony_ci if (o) { 14653a5a1b3Sopenharmony_ci memcpy(o, last, (size_t) size); 14753a5a1b3Sopenharmony_ci o += size; 14853a5a1b3Sopenharmony_ci } 14953a5a1b3Sopenharmony_ci 15053a5a1b3Sopenharmony_ci continue; 15153a5a1b3Sopenharmony_ci 15253a5a1b3Sopenharmony_cierror: 15353a5a1b3Sopenharmony_ci if (o) { 15453a5a1b3Sopenharmony_ci *o = FILTER_CHAR; 15553a5a1b3Sopenharmony_ci p = last; /* We retry at the next character */ 15653a5a1b3Sopenharmony_ci } else 15753a5a1b3Sopenharmony_ci goto failure; 15853a5a1b3Sopenharmony_ci } 15953a5a1b3Sopenharmony_ci 16053a5a1b3Sopenharmony_ci if (o) 16153a5a1b3Sopenharmony_ci o++; 16253a5a1b3Sopenharmony_ci } 16353a5a1b3Sopenharmony_ci 16453a5a1b3Sopenharmony_ci if (o) { 16553a5a1b3Sopenharmony_ci *o = '\0'; 16653a5a1b3Sopenharmony_ci return output; 16753a5a1b3Sopenharmony_ci } 16853a5a1b3Sopenharmony_ci 16953a5a1b3Sopenharmony_ci return (char*) str; 17053a5a1b3Sopenharmony_ci 17153a5a1b3Sopenharmony_cifailure: 17253a5a1b3Sopenharmony_ci return NULL; 17353a5a1b3Sopenharmony_ci} 17453a5a1b3Sopenharmony_ci 17553a5a1b3Sopenharmony_cichar* pa_utf8_valid (const char *str) { 17653a5a1b3Sopenharmony_ci return utf8_validate(str, NULL); 17753a5a1b3Sopenharmony_ci} 17853a5a1b3Sopenharmony_ci 17953a5a1b3Sopenharmony_cichar* pa_utf8_filter (const char *str) { 18053a5a1b3Sopenharmony_ci char *new_str; 18153a5a1b3Sopenharmony_ci 18253a5a1b3Sopenharmony_ci pa_assert(str); 18353a5a1b3Sopenharmony_ci new_str = pa_xmalloc(strlen(str) + 1); 18453a5a1b3Sopenharmony_ci return utf8_validate(str, new_str); 18553a5a1b3Sopenharmony_ci} 18653a5a1b3Sopenharmony_ci 18753a5a1b3Sopenharmony_ci#ifdef HAVE_ICONV 18853a5a1b3Sopenharmony_ci 18953a5a1b3Sopenharmony_cistatic char* iconv_simple(const char *str, const char *to, const char *from) { 19053a5a1b3Sopenharmony_ci char *new_str; 19153a5a1b3Sopenharmony_ci size_t len, inlen; 19253a5a1b3Sopenharmony_ci iconv_t cd; 19353a5a1b3Sopenharmony_ci ICONV_CONST char *inbuf; 19453a5a1b3Sopenharmony_ci char *outbuf; 19553a5a1b3Sopenharmony_ci size_t res, inbytes, outbytes; 19653a5a1b3Sopenharmony_ci 19753a5a1b3Sopenharmony_ci pa_assert(str); 19853a5a1b3Sopenharmony_ci pa_assert(to); 19953a5a1b3Sopenharmony_ci pa_assert(from); 20053a5a1b3Sopenharmony_ci 20153a5a1b3Sopenharmony_ci cd = iconv_open(to, from); 20253a5a1b3Sopenharmony_ci if (cd == (iconv_t)-1) 20353a5a1b3Sopenharmony_ci return NULL; 20453a5a1b3Sopenharmony_ci 20553a5a1b3Sopenharmony_ci inlen = len = strlen(str) + 1; 20653a5a1b3Sopenharmony_ci new_str = pa_xmalloc(len); 20753a5a1b3Sopenharmony_ci 20853a5a1b3Sopenharmony_ci for (;;) { 20953a5a1b3Sopenharmony_ci inbuf = (ICONV_CONST char*) str; /* Brain dead prototype for iconv() */ 21053a5a1b3Sopenharmony_ci inbytes = inlen; 21153a5a1b3Sopenharmony_ci outbuf = new_str; 21253a5a1b3Sopenharmony_ci outbytes = len; 21353a5a1b3Sopenharmony_ci 21453a5a1b3Sopenharmony_ci res = iconv(cd, &inbuf, &inbytes, &outbuf, &outbytes); 21553a5a1b3Sopenharmony_ci 21653a5a1b3Sopenharmony_ci if (res != (size_t)-1) 21753a5a1b3Sopenharmony_ci break; 21853a5a1b3Sopenharmony_ci 21953a5a1b3Sopenharmony_ci if (errno != E2BIG) { 22053a5a1b3Sopenharmony_ci pa_xfree(new_str); 22153a5a1b3Sopenharmony_ci new_str = NULL; 22253a5a1b3Sopenharmony_ci break; 22353a5a1b3Sopenharmony_ci } 22453a5a1b3Sopenharmony_ci 22553a5a1b3Sopenharmony_ci pa_assert(inbytes != 0); 22653a5a1b3Sopenharmony_ci 22753a5a1b3Sopenharmony_ci len += inbytes; 22853a5a1b3Sopenharmony_ci new_str = pa_xrealloc(new_str, len); 22953a5a1b3Sopenharmony_ci } 23053a5a1b3Sopenharmony_ci 23153a5a1b3Sopenharmony_ci iconv_close(cd); 23253a5a1b3Sopenharmony_ci 23353a5a1b3Sopenharmony_ci return new_str; 23453a5a1b3Sopenharmony_ci} 23553a5a1b3Sopenharmony_ci 23653a5a1b3Sopenharmony_cichar* pa_utf8_to_locale (const char *str) { 23753a5a1b3Sopenharmony_ci return iconv_simple(str, "", "UTF-8"); 23853a5a1b3Sopenharmony_ci} 23953a5a1b3Sopenharmony_ci 24053a5a1b3Sopenharmony_cichar* pa_locale_to_utf8 (const char *str) { 24153a5a1b3Sopenharmony_ci return iconv_simple(str, "UTF-8", ""); 24253a5a1b3Sopenharmony_ci} 24353a5a1b3Sopenharmony_ci 24453a5a1b3Sopenharmony_ci#else 24553a5a1b3Sopenharmony_ci 24653a5a1b3Sopenharmony_cichar* pa_utf8_to_locale (const char *str) { 24753a5a1b3Sopenharmony_ci pa_assert(str); 24853a5a1b3Sopenharmony_ci 24953a5a1b3Sopenharmony_ci return pa_ascii_filter(str); 25053a5a1b3Sopenharmony_ci} 25153a5a1b3Sopenharmony_ci 25253a5a1b3Sopenharmony_cichar* pa_locale_to_utf8 (const char *str) { 25353a5a1b3Sopenharmony_ci pa_assert(str); 25453a5a1b3Sopenharmony_ci 25553a5a1b3Sopenharmony_ci if (pa_utf8_valid(str)) 25653a5a1b3Sopenharmony_ci return pa_xstrdup(str); 25753a5a1b3Sopenharmony_ci 25853a5a1b3Sopenharmony_ci return NULL; 25953a5a1b3Sopenharmony_ci} 26053a5a1b3Sopenharmony_ci 26153a5a1b3Sopenharmony_ci#endif 26253a5a1b3Sopenharmony_ci 26353a5a1b3Sopenharmony_cichar *pa_ascii_valid(const char *str) { 26453a5a1b3Sopenharmony_ci const char *p; 26553a5a1b3Sopenharmony_ci pa_assert(str); 26653a5a1b3Sopenharmony_ci 26753a5a1b3Sopenharmony_ci for (p = str; *p; p++) 26853a5a1b3Sopenharmony_ci if ((unsigned char) *p >= 128) 26953a5a1b3Sopenharmony_ci return NULL; 27053a5a1b3Sopenharmony_ci 27153a5a1b3Sopenharmony_ci return (char*) str; 27253a5a1b3Sopenharmony_ci} 27353a5a1b3Sopenharmony_ci 27453a5a1b3Sopenharmony_cichar *pa_ascii_filter(const char *str) { 27553a5a1b3Sopenharmony_ci char *r, *s, *d; 27653a5a1b3Sopenharmony_ci pa_assert(str); 27753a5a1b3Sopenharmony_ci 27853a5a1b3Sopenharmony_ci r = pa_xstrdup(str); 27953a5a1b3Sopenharmony_ci 28053a5a1b3Sopenharmony_ci for (s = r, d = r; *s; s++) 28153a5a1b3Sopenharmony_ci if ((unsigned char) *s < 128) 28253a5a1b3Sopenharmony_ci *(d++) = *s; 28353a5a1b3Sopenharmony_ci 28453a5a1b3Sopenharmony_ci *d = 0; 28553a5a1b3Sopenharmony_ci 28653a5a1b3Sopenharmony_ci return r; 28753a5a1b3Sopenharmony_ci} 288