1/* 2 * Copyright (c) 2007 Mans Rullgard 3 * 4 * This file is part of FFmpeg. 5 * 6 * FFmpeg is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Lesser General Public 8 * License as published by the Free Software Foundation; either 9 * version 2.1 of the License, or (at your option) any later version. 10 * 11 * FFmpeg is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Lesser General Public License for more details. 15 * 16 * You should have received a copy of the GNU Lesser General Public 17 * License along with FFmpeg; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 19 */ 20 21#ifndef AVUTIL_AVSTRING_H 22#define AVUTIL_AVSTRING_H 23 24#include <stddef.h> 25#include <stdint.h> 26#include "attributes.h" 27#include "version.h" 28 29/** 30 * @addtogroup lavu_string 31 * @{ 32 */ 33 34/** 35 * Return non-zero if pfx is a prefix of str. If it is, *ptr is set to 36 * the address of the first character in str after the prefix. 37 * 38 * @param str input string 39 * @param pfx prefix to test 40 * @param ptr updated if the prefix is matched inside str 41 * @return non-zero if the prefix matches, zero otherwise 42 */ 43int av_strstart(const char *str, const char *pfx, const char **ptr); 44 45/** 46 * Return non-zero if pfx is a prefix of str independent of case. If 47 * it is, *ptr is set to the address of the first character in str 48 * after the prefix. 49 * 50 * @param str input string 51 * @param pfx prefix to test 52 * @param ptr updated if the prefix is matched inside str 53 * @return non-zero if the prefix matches, zero otherwise 54 */ 55int av_stristart(const char *str, const char *pfx, const char **ptr); 56 57/** 58 * Locate the first case-independent occurrence in the string haystack 59 * of the string needle. A zero-length string needle is considered to 60 * match at the start of haystack. 61 * 62 * This function is a case-insensitive version of the standard strstr(). 63 * 64 * @param haystack string to search in 65 * @param needle string to search for 66 * @return pointer to the located match within haystack 67 * or a null pointer if no match 68 */ 69char *av_stristr(const char *haystack, const char *needle); 70 71/** 72 * Locate the first occurrence of the string needle in the string haystack 73 * where not more than hay_length characters are searched. A zero-length 74 * string needle is considered to match at the start of haystack. 75 * 76 * This function is a length-limited version of the standard strstr(). 77 * 78 * @param haystack string to search in 79 * @param needle string to search for 80 * @param hay_length length of string to search in 81 * @return pointer to the located match within haystack 82 * or a null pointer if no match 83 */ 84char *av_strnstr(const char *haystack, const char *needle, size_t hay_length); 85 86/** 87 * Copy the string src to dst, but no more than size - 1 bytes, and 88 * null-terminate dst. 89 * 90 * This function is the same as BSD strlcpy(). 91 * 92 * @param dst destination buffer 93 * @param src source string 94 * @param size size of destination buffer 95 * @return the length of src 96 * 97 * @warning since the return value is the length of src, src absolutely 98 * _must_ be a properly 0-terminated string, otherwise this will read beyond 99 * the end of the buffer and possibly crash. 100 */ 101size_t av_strlcpy(char *dst, const char *src, size_t size); 102 103/** 104 * Append the string src to the string dst, but to a total length of 105 * no more than size - 1 bytes, and null-terminate dst. 106 * 107 * This function is similar to BSD strlcat(), but differs when 108 * size <= strlen(dst). 109 * 110 * @param dst destination buffer 111 * @param src source string 112 * @param size size of destination buffer 113 * @return the total length of src and dst 114 * 115 * @warning since the return value use the length of src and dst, these 116 * absolutely _must_ be a properly 0-terminated strings, otherwise this 117 * will read beyond the end of the buffer and possibly crash. 118 */ 119size_t av_strlcat(char *dst, const char *src, size_t size); 120 121/** 122 * Append output to a string, according to a format. Never write out of 123 * the destination buffer, and always put a terminating 0 within 124 * the buffer. 125 * @param dst destination buffer (string to which the output is 126 * appended) 127 * @param size total size of the destination buffer 128 * @param fmt printf-compatible format string, specifying how the 129 * following parameters are used 130 * @return the length of the string that would have been generated 131 * if enough space had been available 132 */ 133size_t av_strlcatf(char *dst, size_t size, const char *fmt, ...) av_printf_format(3, 4); 134 135/** 136 * Get the count of continuous non zero chars starting from the beginning. 137 * 138 * @param len maximum number of characters to check in the string, that 139 * is the maximum value which is returned by the function 140 */ 141static inline size_t av_strnlen(const char *s, size_t len) 142{ 143 size_t i; 144 for (i = 0; i < len && s[i]; i++) 145 ; 146 return i; 147} 148 149/** 150 * Print arguments following specified format into a large enough auto 151 * allocated buffer. It is similar to GNU asprintf(). 152 * @param fmt printf-compatible format string, specifying how the 153 * following parameters are used. 154 * @return the allocated string 155 * @note You have to free the string yourself with av_free(). 156 */ 157char *av_asprintf(const char *fmt, ...) av_printf_format(1, 2); 158 159#if FF_API_D2STR 160/** 161 * Convert a number to an av_malloced string. 162 * @deprecated use av_asprintf() with "%f" or a more specific format 163 */ 164attribute_deprecated 165char *av_d2str(double d); 166#endif 167 168/** 169 * Unescape the given string until a non escaped terminating char, 170 * and return the token corresponding to the unescaped string. 171 * 172 * The normal \ and ' escaping is supported. Leading and trailing 173 * whitespaces are removed, unless they are escaped with '\' or are 174 * enclosed between ''. 175 * 176 * @param buf the buffer to parse, buf will be updated to point to the 177 * terminating char 178 * @param term a 0-terminated list of terminating chars 179 * @return the malloced unescaped string, which must be av_freed by 180 * the user, NULL in case of allocation failure 181 */ 182char *av_get_token(const char **buf, const char *term); 183 184/** 185 * Split the string into several tokens which can be accessed by 186 * successive calls to av_strtok(). 187 * 188 * A token is defined as a sequence of characters not belonging to the 189 * set specified in delim. 190 * 191 * On the first call to av_strtok(), s should point to the string to 192 * parse, and the value of saveptr is ignored. In subsequent calls, s 193 * should be NULL, and saveptr should be unchanged since the previous 194 * call. 195 * 196 * This function is similar to strtok_r() defined in POSIX.1. 197 * 198 * @param s the string to parse, may be NULL 199 * @param delim 0-terminated list of token delimiters, must be non-NULL 200 * @param saveptr user-provided pointer which points to stored 201 * information necessary for av_strtok() to continue scanning the same 202 * string. saveptr is updated to point to the next character after the 203 * first delimiter found, or to NULL if the string was terminated 204 * @return the found token, or NULL when no token is found 205 */ 206char *av_strtok(char *s, const char *delim, char **saveptr); 207 208/** 209 * Locale-independent conversion of ASCII isdigit. 210 */ 211static inline av_const int av_isdigit(int c) 212{ 213 return c >= '0' && c <= '9'; 214} 215 216/** 217 * Locale-independent conversion of ASCII isgraph. 218 */ 219static inline av_const int av_isgraph(int c) 220{ 221 return c > 32 && c < 127; 222} 223 224/** 225 * Locale-independent conversion of ASCII isspace. 226 */ 227static inline av_const int av_isspace(int c) 228{ 229 return c == ' ' || c == '\f' || c == '\n' || c == '\r' || c == '\t' || 230 c == '\v'; 231} 232 233/** 234 * Locale-independent conversion of ASCII characters to uppercase. 235 */ 236static inline av_const int av_toupper(int c) 237{ 238 if (c >= 'a' && c <= 'z') 239 c ^= 0x20; 240 return c; 241} 242 243/** 244 * Locale-independent conversion of ASCII characters to lowercase. 245 */ 246static inline av_const int av_tolower(int c) 247{ 248 if (c >= 'A' && c <= 'Z') 249 c ^= 0x20; 250 return c; 251} 252 253/** 254 * Locale-independent conversion of ASCII isxdigit. 255 */ 256static inline av_const int av_isxdigit(int c) 257{ 258 c = av_tolower(c); 259 return av_isdigit(c) || (c >= 'a' && c <= 'f'); 260} 261 262/** 263 * Locale-independent case-insensitive compare. 264 * @note This means only ASCII-range characters are case-insensitive 265 */ 266int av_strcasecmp(const char *a, const char *b); 267 268/** 269 * Locale-independent case-insensitive compare. 270 * @note This means only ASCII-range characters are case-insensitive 271 */ 272int av_strncasecmp(const char *a, const char *b, size_t n); 273 274/** 275 * Locale-independent strings replace. 276 * @note This means only ASCII-range characters are replace 277 */ 278char *av_strireplace(const char *str, const char *from, const char *to); 279 280/** 281 * Thread safe basename. 282 * @param path the string to parse, on DOS both \ and / are considered separators. 283 * @return pointer to the basename substring. 284 * If path does not contain a slash, the function returns a copy of path. 285 * If path is a NULL pointer or points to an empty string, a pointer 286 * to a string "." is returned. 287 */ 288const char *av_basename(const char *path); 289 290/** 291 * Thread safe dirname. 292 * @param path the string to parse, on DOS both \ and / are considered separators. 293 * @return A pointer to a string that's the parent directory of path. 294 * If path is a NULL pointer or points to an empty string, a pointer 295 * to a string "." is returned. 296 * @note the function may modify the contents of the path, so copies should be passed. 297 */ 298const char *av_dirname(char *path); 299 300/** 301 * Match instances of a name in a comma-separated list of names. 302 * List entries are checked from the start to the end of the names list, 303 * the first match ends further processing. If an entry prefixed with '-' 304 * matches, then 0 is returned. The "ALL" list entry is considered to 305 * match all names. 306 * 307 * @param name Name to look for. 308 * @param names List of names. 309 * @return 1 on match, 0 otherwise. 310 */ 311int av_match_name(const char *name, const char *names); 312 313/** 314 * Append path component to the existing path. 315 * Path separator '/' is placed between when needed. 316 * Resulting string have to be freed with av_free(). 317 * @param path base path 318 * @param component component to be appended 319 * @return new path or NULL on error. 320 */ 321char *av_append_path_component(const char *path, const char *component); 322 323enum AVEscapeMode { 324 AV_ESCAPE_MODE_AUTO, ///< Use auto-selected escaping mode. 325 AV_ESCAPE_MODE_BACKSLASH, ///< Use backslash escaping. 326 AV_ESCAPE_MODE_QUOTE, ///< Use single-quote escaping. 327 AV_ESCAPE_MODE_XML, ///< Use XML non-markup character data escaping. 328}; 329 330/** 331 * Consider spaces special and escape them even in the middle of the 332 * string. 333 * 334 * This is equivalent to adding the whitespace characters to the special 335 * characters lists, except it is guaranteed to use the exact same list 336 * of whitespace characters as the rest of libavutil. 337 */ 338#define AV_ESCAPE_FLAG_WHITESPACE (1 << 0) 339 340/** 341 * Escape only specified special characters. 342 * Without this flag, escape also any characters that may be considered 343 * special by av_get_token(), such as the single quote. 344 */ 345#define AV_ESCAPE_FLAG_STRICT (1 << 1) 346 347/** 348 * Within AV_ESCAPE_MODE_XML, additionally escape single quotes for single 349 * quoted attributes. 350 */ 351#define AV_ESCAPE_FLAG_XML_SINGLE_QUOTES (1 << 2) 352 353/** 354 * Within AV_ESCAPE_MODE_XML, additionally escape double quotes for double 355 * quoted attributes. 356 */ 357#define AV_ESCAPE_FLAG_XML_DOUBLE_QUOTES (1 << 3) 358 359 360/** 361 * Escape string in src, and put the escaped string in an allocated 362 * string in *dst, which must be freed with av_free(). 363 * 364 * @param dst pointer where an allocated string is put 365 * @param src string to escape, must be non-NULL 366 * @param special_chars string containing the special characters which 367 * need to be escaped, can be NULL 368 * @param mode escape mode to employ, see AV_ESCAPE_MODE_* macros. 369 * Any unknown value for mode will be considered equivalent to 370 * AV_ESCAPE_MODE_BACKSLASH, but this behaviour can change without 371 * notice. 372 * @param flags flags which control how to escape, see AV_ESCAPE_FLAG_ macros 373 * @return the length of the allocated string, or a negative error code in case of error 374 * @see av_bprint_escape() 375 */ 376av_warn_unused_result 377int av_escape(char **dst, const char *src, const char *special_chars, 378 enum AVEscapeMode mode, int flags); 379 380#define AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES 1 ///< accept codepoints over 0x10FFFF 381#define AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS 2 ///< accept non-characters - 0xFFFE and 0xFFFF 382#define AV_UTF8_FLAG_ACCEPT_SURROGATES 4 ///< accept UTF-16 surrogates codes 383#define AV_UTF8_FLAG_EXCLUDE_XML_INVALID_CONTROL_CODES 8 ///< exclude control codes not accepted by XML 384 385#define AV_UTF8_FLAG_ACCEPT_ALL \ 386 AV_UTF8_FLAG_ACCEPT_INVALID_BIG_CODES|AV_UTF8_FLAG_ACCEPT_NON_CHARACTERS|AV_UTF8_FLAG_ACCEPT_SURROGATES 387 388/** 389 * Read and decode a single UTF-8 code point (character) from the 390 * buffer in *buf, and update *buf to point to the next byte to 391 * decode. 392 * 393 * In case of an invalid byte sequence, the pointer will be updated to 394 * the next byte after the invalid sequence and the function will 395 * return an error code. 396 * 397 * Depending on the specified flags, the function will also fail in 398 * case the decoded code point does not belong to a valid range. 399 * 400 * @note For speed-relevant code a carefully implemented use of 401 * GET_UTF8() may be preferred. 402 * 403 * @param codep pointer used to return the parsed code in case of success. 404 * The value in *codep is set even in case the range check fails. 405 * @param bufp pointer to the address the first byte of the sequence 406 * to decode, updated by the function to point to the 407 * byte next after the decoded sequence 408 * @param buf_end pointer to the end of the buffer, points to the next 409 * byte past the last in the buffer. This is used to 410 * avoid buffer overreads (in case of an unfinished 411 * UTF-8 sequence towards the end of the buffer). 412 * @param flags a collection of AV_UTF8_FLAG_* flags 413 * @return >= 0 in case a sequence was successfully read, a negative 414 * value in case of invalid sequence 415 */ 416av_warn_unused_result 417int av_utf8_decode(int32_t *codep, const uint8_t **bufp, const uint8_t *buf_end, 418 unsigned int flags); 419 420/** 421 * Check if a name is in a list. 422 * @returns 0 if not found, or the 1 based index where it has been found in the 423 * list. 424 */ 425int av_match_list(const char *name, const char *list, char separator); 426 427/** 428 * See libc sscanf manual for more information. 429 * Locale-independent sscanf implementation. 430 */ 431int av_sscanf(const char *string, const char *format, ...); 432 433/** 434 * @} 435 */ 436 437#endif /* AVUTIL_AVSTRING_H */ 438