1987da915Sopenharmony_ci/** 2987da915Sopenharmony_ci * unistr.c - Unicode string handling. Originated from the Linux-NTFS project. 3987da915Sopenharmony_ci * 4987da915Sopenharmony_ci * Copyright (c) 2000-2004 Anton Altaparmakov 5987da915Sopenharmony_ci * Copyright (c) 2002-2009 Szabolcs Szakacsits 6987da915Sopenharmony_ci * Copyright (c) 2008-2015 Jean-Pierre Andre 7987da915Sopenharmony_ci * Copyright (c) 2008 Bernhard Kaindl 8987da915Sopenharmony_ci * 9987da915Sopenharmony_ci * This program/include file is free software; you can redistribute it and/or 10987da915Sopenharmony_ci * modify it under the terms of the GNU General Public License as published 11987da915Sopenharmony_ci * by the Free Software Foundation; either version 2 of the License, or 12987da915Sopenharmony_ci * (at your option) any later version. 13987da915Sopenharmony_ci * 14987da915Sopenharmony_ci * This program/include file is distributed in the hope that it will be 15987da915Sopenharmony_ci * useful, but WITHOUT ANY WARRANTY; without even the implied warranty 16987da915Sopenharmony_ci * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17987da915Sopenharmony_ci * GNU General Public License for more details. 18987da915Sopenharmony_ci * 19987da915Sopenharmony_ci * You should have received a copy of the GNU General Public License 20987da915Sopenharmony_ci * along with this program (in the main directory of the NTFS-3G 21987da915Sopenharmony_ci * distribution in the file COPYING); if not, write to the Free Software 22987da915Sopenharmony_ci * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 23987da915Sopenharmony_ci */ 24987da915Sopenharmony_ci 25987da915Sopenharmony_ci#ifdef HAVE_CONFIG_H 26987da915Sopenharmony_ci#include "config.h" 27987da915Sopenharmony_ci#endif 28987da915Sopenharmony_ci 29987da915Sopenharmony_ci#ifdef HAVE_STDIO_H 30987da915Sopenharmony_ci#include <stdio.h> 31987da915Sopenharmony_ci#endif 32987da915Sopenharmony_ci#ifdef HAVE_STDLIB_H 33987da915Sopenharmony_ci#include <stdlib.h> 34987da915Sopenharmony_ci#endif 35987da915Sopenharmony_ci#ifdef HAVE_WCHAR_H 36987da915Sopenharmony_ci#include <wchar.h> 37987da915Sopenharmony_ci#endif 38987da915Sopenharmony_ci#ifdef HAVE_STRING_H 39987da915Sopenharmony_ci#include <string.h> 40987da915Sopenharmony_ci#endif 41987da915Sopenharmony_ci#ifdef HAVE_ERRNO_H 42987da915Sopenharmony_ci#include <errno.h> 43987da915Sopenharmony_ci#endif 44987da915Sopenharmony_ci#ifdef HAVE_LOCALE_H 45987da915Sopenharmony_ci#include <locale.h> 46987da915Sopenharmony_ci#endif 47987da915Sopenharmony_ci 48987da915Sopenharmony_ci#if defined(__APPLE__) || defined(__DARWIN__) 49987da915Sopenharmony_ci#ifdef ENABLE_NFCONV 50987da915Sopenharmony_ci#include <CoreFoundation/CoreFoundation.h> 51987da915Sopenharmony_ci#endif /* ENABLE_NFCONV */ 52987da915Sopenharmony_ci#endif /* defined(__APPLE__) || defined(__DARWIN__) */ 53987da915Sopenharmony_ci 54987da915Sopenharmony_ci#include "compat.h" 55987da915Sopenharmony_ci#include "attrib.h" 56987da915Sopenharmony_ci#include "types.h" 57987da915Sopenharmony_ci#include "unistr.h" 58987da915Sopenharmony_ci#include "debug.h" 59987da915Sopenharmony_ci#include "logging.h" 60987da915Sopenharmony_ci#include "misc.h" 61987da915Sopenharmony_ci 62987da915Sopenharmony_ci#ifndef ALLOW_BROKEN_UNICODE 63987da915Sopenharmony_ci/* Erik allowing broken UTF-16 surrogate pairs and U+FFFE and U+FFFF by default, 64987da915Sopenharmony_ci * open to debate. */ 65987da915Sopenharmony_ci#define ALLOW_BROKEN_UNICODE 1 66987da915Sopenharmony_ci#endif /* !defined(ALLOW_BROKEN_UNICODE) */ 67987da915Sopenharmony_ci 68987da915Sopenharmony_ci/* 69987da915Sopenharmony_ci * IMPORTANT 70987da915Sopenharmony_ci * ========= 71987da915Sopenharmony_ci * 72987da915Sopenharmony_ci * All these routines assume that the Unicode characters are in little endian 73987da915Sopenharmony_ci * encoding inside the strings!!! 74987da915Sopenharmony_ci */ 75987da915Sopenharmony_ci 76987da915Sopenharmony_cistatic int use_utf8 = 1; /* use UTF-8 encoding for file names */ 77987da915Sopenharmony_ci 78987da915Sopenharmony_ci#if defined(__APPLE__) || defined(__DARWIN__) 79987da915Sopenharmony_ci#ifdef ENABLE_NFCONV 80987da915Sopenharmony_ci/** 81987da915Sopenharmony_ci * This variable controls whether or not automatic normalization form conversion 82987da915Sopenharmony_ci * should be performed when translating NTFS unicode file names to UTF-8. 83987da915Sopenharmony_ci * Defaults to on, but can be controlled from the outside using the function 84987da915Sopenharmony_ci * int ntfs_macosx_normalize_filenames(int normalize); 85987da915Sopenharmony_ci */ 86987da915Sopenharmony_cistatic int nfconvert_utf8 = 1; 87987da915Sopenharmony_ci#endif /* ENABLE_NFCONV */ 88987da915Sopenharmony_ci#endif /* defined(__APPLE__) || defined(__DARWIN__) */ 89987da915Sopenharmony_ci 90987da915Sopenharmony_ci/* 91987da915Sopenharmony_ci * This is used by the name collation functions to quickly determine what 92987da915Sopenharmony_ci * characters are (in)valid. 93987da915Sopenharmony_ci */ 94987da915Sopenharmony_ci#if 0 95987da915Sopenharmony_cistatic const u8 legal_ansi_char_array[0x40] = { 96987da915Sopenharmony_ci 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 97987da915Sopenharmony_ci 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 98987da915Sopenharmony_ci 99987da915Sopenharmony_ci 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 100987da915Sopenharmony_ci 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 101987da915Sopenharmony_ci 102987da915Sopenharmony_ci 0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17, 103987da915Sopenharmony_ci 0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00, 104987da915Sopenharmony_ci 105987da915Sopenharmony_ci 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 106987da915Sopenharmony_ci 0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18, 107987da915Sopenharmony_ci}; 108987da915Sopenharmony_ci#endif 109987da915Sopenharmony_ci 110987da915Sopenharmony_ci/** 111987da915Sopenharmony_ci * ntfs_names_are_equal - compare two Unicode names for equality 112987da915Sopenharmony_ci * @s1: name to compare to @s2 113987da915Sopenharmony_ci * @s1_len: length in Unicode characters of @s1 114987da915Sopenharmony_ci * @s2: name to compare to @s1 115987da915Sopenharmony_ci * @s2_len: length in Unicode characters of @s2 116987da915Sopenharmony_ci * @ic: ignore case bool 117987da915Sopenharmony_ci * @upcase: upcase table (only if @ic == IGNORE_CASE) 118987da915Sopenharmony_ci * @upcase_size: length in Unicode characters of @upcase (if present) 119987da915Sopenharmony_ci * 120987da915Sopenharmony_ci * Compare the names @s1 and @s2 and return TRUE (1) if the names are 121987da915Sopenharmony_ci * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE, 122987da915Sopenharmony_ci * the @upcase table is used to perform a case insensitive comparison. 123987da915Sopenharmony_ci */ 124987da915Sopenharmony_ciBOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len, 125987da915Sopenharmony_ci const ntfschar *s2, size_t s2_len, 126987da915Sopenharmony_ci const IGNORE_CASE_BOOL ic, 127987da915Sopenharmony_ci const ntfschar *upcase, const u32 upcase_size) 128987da915Sopenharmony_ci{ 129987da915Sopenharmony_ci if (s1_len != s2_len) 130987da915Sopenharmony_ci return FALSE; 131987da915Sopenharmony_ci if (!s1_len) 132987da915Sopenharmony_ci return TRUE; 133987da915Sopenharmony_ci if (ic == CASE_SENSITIVE) 134987da915Sopenharmony_ci return ntfs_ucsncmp(s1, s2, s1_len) ? FALSE: TRUE; 135987da915Sopenharmony_ci return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? FALSE: 136987da915Sopenharmony_ci TRUE; 137987da915Sopenharmony_ci} 138987da915Sopenharmony_ci 139987da915Sopenharmony_ci/* 140987da915Sopenharmony_ci * ntfs_names_full_collate() fully collate two Unicode names 141987da915Sopenharmony_ci * 142987da915Sopenharmony_ci * @name1: first Unicode name to compare 143987da915Sopenharmony_ci * @name1_len: length of first Unicode name to compare 144987da915Sopenharmony_ci * @name2: second Unicode name to compare 145987da915Sopenharmony_ci * @name2_len: length of second Unicode name to compare 146987da915Sopenharmony_ci * @ic: either CASE_SENSITIVE or IGNORE_CASE (see below) 147987da915Sopenharmony_ci * @upcase: upcase table 148987da915Sopenharmony_ci * @upcase_len: upcase table size 149987da915Sopenharmony_ci * 150987da915Sopenharmony_ci * If @ic is CASE_SENSITIVE, then the names are compared primarily ignoring 151987da915Sopenharmony_ci * case, but if the names are equal ignoring case, then they are compared 152987da915Sopenharmony_ci * case-sensitively. As an example, "abc" would collate before "BCD" (since 153987da915Sopenharmony_ci * "abc" and "BCD" differ ignoring case and 'A' < 'B') but after "ABC" (since 154987da915Sopenharmony_ci * "ABC" and "abc" are equal ignoring case and 'A' < 'a'). This matches the 155987da915Sopenharmony_ci * collation order of filenames as indexed in NTFS directories. 156987da915Sopenharmony_ci * 157987da915Sopenharmony_ci * If @ic is IGNORE_CASE, then the names are only compared case-insensitively 158987da915Sopenharmony_ci * and are considered to match if and only if they are equal ignoring case. 159987da915Sopenharmony_ci * 160987da915Sopenharmony_ci * Returns: 161987da915Sopenharmony_ci * -1 if the first name collates before the second one, 162987da915Sopenharmony_ci * 0 if the names match, or 163987da915Sopenharmony_ci * 1 if the second name collates before the first one 164987da915Sopenharmony_ci */ 165987da915Sopenharmony_ciint ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len, 166987da915Sopenharmony_ci const ntfschar *name2, const u32 name2_len, 167987da915Sopenharmony_ci const IGNORE_CASE_BOOL ic, const ntfschar *upcase, 168987da915Sopenharmony_ci const u32 upcase_len) 169987da915Sopenharmony_ci{ 170987da915Sopenharmony_ci u32 cnt; 171987da915Sopenharmony_ci u16 c1, c2; 172987da915Sopenharmony_ci u16 u1, u2; 173987da915Sopenharmony_ci 174987da915Sopenharmony_ci#ifdef DEBUG 175987da915Sopenharmony_ci if (!name1 || !name2 || !upcase || !upcase_len) { 176987da915Sopenharmony_ci ntfs_log_debug("ntfs_names_collate received NULL pointer!\n"); 177987da915Sopenharmony_ci exit(1); 178987da915Sopenharmony_ci } 179987da915Sopenharmony_ci#endif 180987da915Sopenharmony_ci cnt = min(name1_len, name2_len); 181987da915Sopenharmony_ci if (cnt > 0) { 182987da915Sopenharmony_ci if (ic == CASE_SENSITIVE) { 183987da915Sopenharmony_ci while (--cnt && (*name1 == *name2)) { 184987da915Sopenharmony_ci name1++; 185987da915Sopenharmony_ci name2++; 186987da915Sopenharmony_ci } 187987da915Sopenharmony_ci u1 = c1 = le16_to_cpu(*name1); 188987da915Sopenharmony_ci u2 = c2 = le16_to_cpu(*name2); 189987da915Sopenharmony_ci if (u1 < upcase_len) 190987da915Sopenharmony_ci u1 = le16_to_cpu(upcase[u1]); 191987da915Sopenharmony_ci if (u2 < upcase_len) 192987da915Sopenharmony_ci u2 = le16_to_cpu(upcase[u2]); 193987da915Sopenharmony_ci if ((u1 == u2) && cnt) 194987da915Sopenharmony_ci do { 195987da915Sopenharmony_ci name1++; 196987da915Sopenharmony_ci u1 = le16_to_cpu(*name1); 197987da915Sopenharmony_ci name2++; 198987da915Sopenharmony_ci u2 = le16_to_cpu(*name2); 199987da915Sopenharmony_ci if (u1 < upcase_len) 200987da915Sopenharmony_ci u1 = le16_to_cpu(upcase[u1]); 201987da915Sopenharmony_ci if (u2 < upcase_len) 202987da915Sopenharmony_ci u2 = le16_to_cpu(upcase[u2]); 203987da915Sopenharmony_ci } while ((u1 == u2) && --cnt); 204987da915Sopenharmony_ci if (u1 < u2) 205987da915Sopenharmony_ci return -1; 206987da915Sopenharmony_ci if (u1 > u2) 207987da915Sopenharmony_ci return 1; 208987da915Sopenharmony_ci if (name1_len < name2_len) 209987da915Sopenharmony_ci return -1; 210987da915Sopenharmony_ci if (name1_len > name2_len) 211987da915Sopenharmony_ci return 1; 212987da915Sopenharmony_ci if (c1 < c2) 213987da915Sopenharmony_ci return -1; 214987da915Sopenharmony_ci if (c1 > c2) 215987da915Sopenharmony_ci return 1; 216987da915Sopenharmony_ci } else { 217987da915Sopenharmony_ci do { 218987da915Sopenharmony_ci u1 = le16_to_cpu(*name1); 219987da915Sopenharmony_ci name1++; 220987da915Sopenharmony_ci u2 = le16_to_cpu(*name2); 221987da915Sopenharmony_ci name2++; 222987da915Sopenharmony_ci if (u1 < upcase_len) 223987da915Sopenharmony_ci u1 = le16_to_cpu(upcase[u1]); 224987da915Sopenharmony_ci if (u2 < upcase_len) 225987da915Sopenharmony_ci u2 = le16_to_cpu(upcase[u2]); 226987da915Sopenharmony_ci } while ((u1 == u2) && --cnt); 227987da915Sopenharmony_ci if (u1 < u2) 228987da915Sopenharmony_ci return -1; 229987da915Sopenharmony_ci if (u1 > u2) 230987da915Sopenharmony_ci return 1; 231987da915Sopenharmony_ci if (name1_len < name2_len) 232987da915Sopenharmony_ci return -1; 233987da915Sopenharmony_ci if (name1_len > name2_len) 234987da915Sopenharmony_ci return 1; 235987da915Sopenharmony_ci } 236987da915Sopenharmony_ci } else { 237987da915Sopenharmony_ci if (name1_len < name2_len) 238987da915Sopenharmony_ci return -1; 239987da915Sopenharmony_ci if (name1_len > name2_len) 240987da915Sopenharmony_ci return 1; 241987da915Sopenharmony_ci } 242987da915Sopenharmony_ci return 0; 243987da915Sopenharmony_ci} 244987da915Sopenharmony_ci 245987da915Sopenharmony_ci/** 246987da915Sopenharmony_ci * ntfs_ucsncmp - compare two little endian Unicode strings 247987da915Sopenharmony_ci * @s1: first string 248987da915Sopenharmony_ci * @s2: second string 249987da915Sopenharmony_ci * @n: maximum unicode characters to compare 250987da915Sopenharmony_ci * 251987da915Sopenharmony_ci * Compare the first @n characters of the Unicode strings @s1 and @s2, 252987da915Sopenharmony_ci * The strings in little endian format and appropriate le16_to_cpu() 253987da915Sopenharmony_ci * conversion is performed on non-little endian machines. 254987da915Sopenharmony_ci * 255987da915Sopenharmony_ci * The function returns an integer less than, equal to, or greater than zero 256987da915Sopenharmony_ci * if @s1 (or the first @n Unicode characters thereof) is found, respectively, 257987da915Sopenharmony_ci * to be less than, to match, or be greater than @s2. 258987da915Sopenharmony_ci */ 259987da915Sopenharmony_ciint ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n) 260987da915Sopenharmony_ci{ 261987da915Sopenharmony_ci u16 c1, c2; 262987da915Sopenharmony_ci size_t i; 263987da915Sopenharmony_ci 264987da915Sopenharmony_ci#ifdef DEBUG 265987da915Sopenharmony_ci if (!s1 || !s2) { 266987da915Sopenharmony_ci ntfs_log_debug("ntfs_wcsncmp() received NULL pointer!\n"); 267987da915Sopenharmony_ci exit(1); 268987da915Sopenharmony_ci } 269987da915Sopenharmony_ci#endif 270987da915Sopenharmony_ci for (i = 0; i < n; ++i) { 271987da915Sopenharmony_ci c1 = le16_to_cpu(s1[i]); 272987da915Sopenharmony_ci c2 = le16_to_cpu(s2[i]); 273987da915Sopenharmony_ci if (c1 < c2) 274987da915Sopenharmony_ci return -1; 275987da915Sopenharmony_ci if (c1 > c2) 276987da915Sopenharmony_ci return 1; 277987da915Sopenharmony_ci if (!c1) 278987da915Sopenharmony_ci break; 279987da915Sopenharmony_ci } 280987da915Sopenharmony_ci return 0; 281987da915Sopenharmony_ci} 282987da915Sopenharmony_ci 283987da915Sopenharmony_ci/** 284987da915Sopenharmony_ci * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case 285987da915Sopenharmony_ci * @s1: first string 286987da915Sopenharmony_ci * @s2: second string 287987da915Sopenharmony_ci * @n: maximum unicode characters to compare 288987da915Sopenharmony_ci * @upcase: upcase table 289987da915Sopenharmony_ci * @upcase_size: upcase table size in Unicode characters 290987da915Sopenharmony_ci * 291987da915Sopenharmony_ci * Compare the first @n characters of the Unicode strings @s1 and @s2, 292987da915Sopenharmony_ci * ignoring case. The strings in little endian format and appropriate 293987da915Sopenharmony_ci * le16_to_cpu() conversion is performed on non-little endian machines. 294987da915Sopenharmony_ci * 295987da915Sopenharmony_ci * Each character is uppercased using the @upcase table before the comparison. 296987da915Sopenharmony_ci * 297987da915Sopenharmony_ci * The function returns an integer less than, equal to, or greater than zero 298987da915Sopenharmony_ci * if @s1 (or the first @n Unicode characters thereof) is found, respectively, 299987da915Sopenharmony_ci * to be less than, to match, or be greater than @s2. 300987da915Sopenharmony_ci */ 301987da915Sopenharmony_ciint ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n, 302987da915Sopenharmony_ci const ntfschar *upcase, const u32 upcase_size) 303987da915Sopenharmony_ci{ 304987da915Sopenharmony_ci u16 c1, c2; 305987da915Sopenharmony_ci size_t i; 306987da915Sopenharmony_ci 307987da915Sopenharmony_ci#ifdef DEBUG 308987da915Sopenharmony_ci if (!s1 || !s2 || !upcase) { 309987da915Sopenharmony_ci ntfs_log_debug("ntfs_wcsncasecmp() received NULL pointer!\n"); 310987da915Sopenharmony_ci exit(1); 311987da915Sopenharmony_ci } 312987da915Sopenharmony_ci#endif 313987da915Sopenharmony_ci for (i = 0; i < n; ++i) { 314987da915Sopenharmony_ci if ((c1 = le16_to_cpu(s1[i])) < upcase_size) 315987da915Sopenharmony_ci c1 = le16_to_cpu(upcase[c1]); 316987da915Sopenharmony_ci if ((c2 = le16_to_cpu(s2[i])) < upcase_size) 317987da915Sopenharmony_ci c2 = le16_to_cpu(upcase[c2]); 318987da915Sopenharmony_ci if (c1 < c2) 319987da915Sopenharmony_ci return -1; 320987da915Sopenharmony_ci if (c1 > c2) 321987da915Sopenharmony_ci return 1; 322987da915Sopenharmony_ci if (!c1) 323987da915Sopenharmony_ci break; 324987da915Sopenharmony_ci } 325987da915Sopenharmony_ci return 0; 326987da915Sopenharmony_ci} 327987da915Sopenharmony_ci 328987da915Sopenharmony_ci/** 329987da915Sopenharmony_ci * ntfs_ucsnlen - determine the length of a little endian Unicode string 330987da915Sopenharmony_ci * @s: pointer to Unicode string 331987da915Sopenharmony_ci * @maxlen: maximum length of string @s 332987da915Sopenharmony_ci * 333987da915Sopenharmony_ci * Return the number of Unicode characters in the little endian Unicode 334987da915Sopenharmony_ci * string @s up to a maximum of maxlen Unicode characters, not including 335987da915Sopenharmony_ci * the terminating (ntfschar)'\0'. If there is no (ntfschar)'\0' between @s 336987da915Sopenharmony_ci * and @s + @maxlen, @maxlen is returned. 337987da915Sopenharmony_ci * 338987da915Sopenharmony_ci * This function never looks beyond @s + @maxlen. 339987da915Sopenharmony_ci */ 340987da915Sopenharmony_ciu32 ntfs_ucsnlen(const ntfschar *s, u32 maxlen) 341987da915Sopenharmony_ci{ 342987da915Sopenharmony_ci u32 i; 343987da915Sopenharmony_ci 344987da915Sopenharmony_ci for (i = 0; i < maxlen; i++) { 345987da915Sopenharmony_ci if (!le16_to_cpu(s[i])) 346987da915Sopenharmony_ci break; 347987da915Sopenharmony_ci } 348987da915Sopenharmony_ci return i; 349987da915Sopenharmony_ci} 350987da915Sopenharmony_ci 351987da915Sopenharmony_ci/** 352987da915Sopenharmony_ci * ntfs_ucsndup - duplicate little endian Unicode string 353987da915Sopenharmony_ci * @s: pointer to Unicode string 354987da915Sopenharmony_ci * @maxlen: maximum length of string @s 355987da915Sopenharmony_ci * 356987da915Sopenharmony_ci * Return a pointer to a new little endian Unicode string which is a duplicate 357987da915Sopenharmony_ci * of the string s. Memory for the new string is obtained with ntfs_malloc(3), 358987da915Sopenharmony_ci * and can be freed with free(3). 359987da915Sopenharmony_ci * 360987da915Sopenharmony_ci * A maximum of @maxlen Unicode characters are copied and a terminating 361987da915Sopenharmony_ci * (ntfschar)'\0' little endian Unicode character is added. 362987da915Sopenharmony_ci * 363987da915Sopenharmony_ci * This function never looks beyond @s + @maxlen. 364987da915Sopenharmony_ci * 365987da915Sopenharmony_ci * Return a pointer to the new little endian Unicode string on success and NULL 366987da915Sopenharmony_ci * on failure with errno set to the error code. 367987da915Sopenharmony_ci */ 368987da915Sopenharmony_cintfschar *ntfs_ucsndup(const ntfschar *s, u32 maxlen) 369987da915Sopenharmony_ci{ 370987da915Sopenharmony_ci ntfschar *dst; 371987da915Sopenharmony_ci u32 len; 372987da915Sopenharmony_ci 373987da915Sopenharmony_ci len = ntfs_ucsnlen(s, maxlen); 374987da915Sopenharmony_ci dst = ntfs_malloc((len + 1) * sizeof(ntfschar)); 375987da915Sopenharmony_ci if (dst) { 376987da915Sopenharmony_ci memcpy(dst, s, len * sizeof(ntfschar)); 377987da915Sopenharmony_ci dst[len] = const_cpu_to_le16(L'\0'); 378987da915Sopenharmony_ci } 379987da915Sopenharmony_ci return dst; 380987da915Sopenharmony_ci} 381987da915Sopenharmony_ci 382987da915Sopenharmony_ci/** 383987da915Sopenharmony_ci * ntfs_name_upcase - Map an Unicode name to its uppercase equivalent 384987da915Sopenharmony_ci * @name: 385987da915Sopenharmony_ci * @name_len: 386987da915Sopenharmony_ci * @upcase: 387987da915Sopenharmony_ci * @upcase_len: 388987da915Sopenharmony_ci * 389987da915Sopenharmony_ci * Description... 390987da915Sopenharmony_ci * 391987da915Sopenharmony_ci * Returns: 392987da915Sopenharmony_ci */ 393987da915Sopenharmony_civoid ntfs_name_upcase(ntfschar *name, u32 name_len, const ntfschar *upcase, 394987da915Sopenharmony_ci const u32 upcase_len) 395987da915Sopenharmony_ci{ 396987da915Sopenharmony_ci u32 i; 397987da915Sopenharmony_ci u16 u; 398987da915Sopenharmony_ci 399987da915Sopenharmony_ci for (i = 0; i < name_len; i++) 400987da915Sopenharmony_ci if ((u = le16_to_cpu(name[i])) < upcase_len) 401987da915Sopenharmony_ci name[i] = upcase[u]; 402987da915Sopenharmony_ci} 403987da915Sopenharmony_ci 404987da915Sopenharmony_ci/** 405987da915Sopenharmony_ci * ntfs_name_locase - Map a Unicode name to its lowercase equivalent 406987da915Sopenharmony_ci */ 407987da915Sopenharmony_civoid ntfs_name_locase(ntfschar *name, u32 name_len, const ntfschar *locase, 408987da915Sopenharmony_ci const u32 locase_len) 409987da915Sopenharmony_ci{ 410987da915Sopenharmony_ci u32 i; 411987da915Sopenharmony_ci u16 u; 412987da915Sopenharmony_ci 413987da915Sopenharmony_ci if (locase) 414987da915Sopenharmony_ci for (i = 0; i < name_len; i++) 415987da915Sopenharmony_ci if ((u = le16_to_cpu(name[i])) < locase_len) 416987da915Sopenharmony_ci name[i] = locase[u]; 417987da915Sopenharmony_ci} 418987da915Sopenharmony_ci 419987da915Sopenharmony_ci/** 420987da915Sopenharmony_ci * ntfs_file_value_upcase - Convert a filename to upper case 421987da915Sopenharmony_ci * @file_name_attr: 422987da915Sopenharmony_ci * @upcase: 423987da915Sopenharmony_ci * @upcase_len: 424987da915Sopenharmony_ci * 425987da915Sopenharmony_ci * Description... 426987da915Sopenharmony_ci * 427987da915Sopenharmony_ci * Returns: 428987da915Sopenharmony_ci */ 429987da915Sopenharmony_civoid ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr, 430987da915Sopenharmony_ci const ntfschar *upcase, const u32 upcase_len) 431987da915Sopenharmony_ci{ 432987da915Sopenharmony_ci ntfs_name_upcase((ntfschar*)&file_name_attr->file_name, 433987da915Sopenharmony_ci file_name_attr->file_name_length, upcase, upcase_len); 434987da915Sopenharmony_ci} 435987da915Sopenharmony_ci 436987da915Sopenharmony_ci/* 437987da915Sopenharmony_ci NTFS uses Unicode (UTF-16LE [NTFS-3G uses UCS-2LE, which is enough 438987da915Sopenharmony_ci for now]) for path names, but the Unicode code points need to be 439987da915Sopenharmony_ci converted before a path can be accessed under NTFS. For 7 bit ASCII/ANSI, 440987da915Sopenharmony_ci glibc does this even without a locale in a hard-coded fashion as that 441987da915Sopenharmony_ci appears to be is easy because the low 7-bit ASCII range appears to be 442987da915Sopenharmony_ci available in all charsets but it does not convert anything if 443987da915Sopenharmony_ci there was some error with the locale setup or none set up like 444987da915Sopenharmony_ci when mount is called during early boot where he (by policy) do 445987da915Sopenharmony_ci not use locales (and may be not available if /usr is not yet mounted), 446987da915Sopenharmony_ci so this patch fixes the resulting issues for systems which use 447987da915Sopenharmony_ci UTF-8 and for others, specifying the locale in fstab brings them 448987da915Sopenharmony_ci the encoding which they want. 449987da915Sopenharmony_ci 450987da915Sopenharmony_ci If no locale is defined or there was a problem with setting one 451987da915Sopenharmony_ci up and whenever nl_langinfo(CODESET) returns a sting starting with 452987da915Sopenharmony_ci "ANSI", use an internal UCS-2LE <-> UTF-8 codeset converter to fix 453987da915Sopenharmony_ci the bug where NTFS-3G does not show any path names which include 454987da915Sopenharmony_ci international characters!!! (and also fails on creating them) as result. 455987da915Sopenharmony_ci 456987da915Sopenharmony_ci Author: Bernhard Kaindl <bk@suse.de> 457987da915Sopenharmony_ci Jean-Pierre Andre made it compliant with RFC3629/RFC2781. 458987da915Sopenharmony_ci*/ 459987da915Sopenharmony_ci 460987da915Sopenharmony_ci/* 461987da915Sopenharmony_ci * Return the number of bytes in UTF-8 needed (without the terminating null) to 462987da915Sopenharmony_ci * store the given UTF-16LE string. 463987da915Sopenharmony_ci * 464987da915Sopenharmony_ci * On error, -1 is returned, and errno is set to the error code. The following 465987da915Sopenharmony_ci * error codes can be expected: 466987da915Sopenharmony_ci * EILSEQ The input string is not valid UTF-16LE (only possible 467987da915Sopenharmony_ci * if compiled without ALLOW_BROKEN_UNICODE). 468987da915Sopenharmony_ci * ENAMETOOLONG The length of the UTF-8 string in bytes (without the 469987da915Sopenharmony_ci * terminating null) would exceed @outs_len. 470987da915Sopenharmony_ci */ 471987da915Sopenharmony_cistatic int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_len) 472987da915Sopenharmony_ci{ 473987da915Sopenharmony_ci int i, ret = -1; 474987da915Sopenharmony_ci int count = 0; 475987da915Sopenharmony_ci BOOL surrog; 476987da915Sopenharmony_ci 477987da915Sopenharmony_ci surrog = FALSE; 478987da915Sopenharmony_ci for (i = 0; i < ins_len && ins[i] && count <= outs_len; i++) { 479987da915Sopenharmony_ci unsigned short c = le16_to_cpu(ins[i]); 480987da915Sopenharmony_ci if (surrog) { 481987da915Sopenharmony_ci if ((c >= 0xdc00) && (c < 0xe000)) { 482987da915Sopenharmony_ci surrog = FALSE; 483987da915Sopenharmony_ci count += 4; 484987da915Sopenharmony_ci } else { 485987da915Sopenharmony_ci#if ALLOW_BROKEN_UNICODE 486987da915Sopenharmony_ci /* The first UTF-16 unit of a surrogate pair has 487987da915Sopenharmony_ci * a value between 0xd800 and 0xdc00. It can be 488987da915Sopenharmony_ci * encoded as an individual UTF-8 sequence if we 489987da915Sopenharmony_ci * cannot combine it with the next UTF-16 unit 490987da915Sopenharmony_ci * unit as a surrogate pair. */ 491987da915Sopenharmony_ci surrog = FALSE; 492987da915Sopenharmony_ci count += 3; 493987da915Sopenharmony_ci 494987da915Sopenharmony_ci --i; 495987da915Sopenharmony_ci continue; 496987da915Sopenharmony_ci#else 497987da915Sopenharmony_ci goto fail; 498987da915Sopenharmony_ci#endif /* ALLOW_BROKEN_UNICODE */ 499987da915Sopenharmony_ci } 500987da915Sopenharmony_ci } else 501987da915Sopenharmony_ci if (c < 0x80) 502987da915Sopenharmony_ci count++; 503987da915Sopenharmony_ci else if (c < 0x800) 504987da915Sopenharmony_ci count += 2; 505987da915Sopenharmony_ci else if (c < 0xd800) 506987da915Sopenharmony_ci count += 3; 507987da915Sopenharmony_ci else if (c < 0xdc00) 508987da915Sopenharmony_ci surrog = TRUE; 509987da915Sopenharmony_ci#if ALLOW_BROKEN_UNICODE 510987da915Sopenharmony_ci else if (c < 0xe000) 511987da915Sopenharmony_ci count += 3; 512987da915Sopenharmony_ci else if (c >= 0xe000) 513987da915Sopenharmony_ci#else 514987da915Sopenharmony_ci else if ((c >= 0xe000) && (c < 0xfffe)) 515987da915Sopenharmony_ci#endif /* ALLOW_BROKEN_UNICODE */ 516987da915Sopenharmony_ci count += 3; 517987da915Sopenharmony_ci else 518987da915Sopenharmony_ci goto fail; 519987da915Sopenharmony_ci } 520987da915Sopenharmony_ci 521987da915Sopenharmony_ci if (surrog && count <= outs_len) { 522987da915Sopenharmony_ci#if ALLOW_BROKEN_UNICODE 523987da915Sopenharmony_ci count += 3; /* ending with a single surrogate */ 524987da915Sopenharmony_ci#else 525987da915Sopenharmony_ci goto fail; 526987da915Sopenharmony_ci#endif /* ALLOW_BROKEN_UNICODE */ 527987da915Sopenharmony_ci } 528987da915Sopenharmony_ci 529987da915Sopenharmony_ci if (count > outs_len) { 530987da915Sopenharmony_ci errno = ENAMETOOLONG; 531987da915Sopenharmony_ci goto out; 532987da915Sopenharmony_ci } 533987da915Sopenharmony_ci 534987da915Sopenharmony_ci ret = count; 535987da915Sopenharmony_ciout: 536987da915Sopenharmony_ci return ret; 537987da915Sopenharmony_cifail: 538987da915Sopenharmony_ci errno = EILSEQ; 539987da915Sopenharmony_ci goto out; 540987da915Sopenharmony_ci} 541987da915Sopenharmony_ci 542987da915Sopenharmony_ci/* 543987da915Sopenharmony_ci * ntfs_utf16_to_utf8 - convert a little endian UTF16LE string to an UTF-8 string 544987da915Sopenharmony_ci * @ins: input utf16 string buffer 545987da915Sopenharmony_ci * @ins_len: length of input string in utf16 characters 546987da915Sopenharmony_ci * @outs: on return contains the (allocated) output multibyte string 547987da915Sopenharmony_ci * @outs_len: length of output buffer in bytes (ignored if *@outs is NULL) 548987da915Sopenharmony_ci * 549987da915Sopenharmony_ci * Return -1 with errno set if string has invalid byte sequence or too long. 550987da915Sopenharmony_ci */ 551987da915Sopenharmony_cistatic int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len, 552987da915Sopenharmony_ci char **outs, int outs_len) 553987da915Sopenharmony_ci{ 554987da915Sopenharmony_ci#if defined(__APPLE__) || defined(__DARWIN__) 555987da915Sopenharmony_ci#ifdef ENABLE_NFCONV 556987da915Sopenharmony_ci char *original_outs_value = *outs; 557987da915Sopenharmony_ci int original_outs_len = outs_len; 558987da915Sopenharmony_ci#endif /* ENABLE_NFCONV */ 559987da915Sopenharmony_ci#endif /* defined(__APPLE__) || defined(__DARWIN__) */ 560987da915Sopenharmony_ci 561987da915Sopenharmony_ci char *t; 562987da915Sopenharmony_ci int i, size, ret = -1; 563987da915Sopenharmony_ci int halfpair; 564987da915Sopenharmony_ci 565987da915Sopenharmony_ci halfpair = 0; 566987da915Sopenharmony_ci if (!*outs) { 567987da915Sopenharmony_ci /* If no output buffer was provided, we will allocate one and 568987da915Sopenharmony_ci * limit its length to PATH_MAX. Note: we follow the standard 569987da915Sopenharmony_ci * convention of PATH_MAX including the terminating null. */ 570987da915Sopenharmony_ci outs_len = PATH_MAX; 571987da915Sopenharmony_ci } 572987da915Sopenharmony_ci 573987da915Sopenharmony_ci /* The size *with* the terminating null is limited to @outs_len, 574987da915Sopenharmony_ci * so the size *without* the terminating null is limited to one less. */ 575987da915Sopenharmony_ci size = utf16_to_utf8_size(ins, ins_len, outs_len - 1); 576987da915Sopenharmony_ci 577987da915Sopenharmony_ci if (size < 0) 578987da915Sopenharmony_ci goto out; 579987da915Sopenharmony_ci 580987da915Sopenharmony_ci if (!*outs) { 581987da915Sopenharmony_ci outs_len = size + 1; 582987da915Sopenharmony_ci *outs = ntfs_malloc(outs_len); 583987da915Sopenharmony_ci if (!*outs) 584987da915Sopenharmony_ci goto out; 585987da915Sopenharmony_ci } 586987da915Sopenharmony_ci 587987da915Sopenharmony_ci t = *outs; 588987da915Sopenharmony_ci 589987da915Sopenharmony_ci for (i = 0; i < ins_len && ins[i]; i++) { 590987da915Sopenharmony_ci unsigned short c = le16_to_cpu(ins[i]); 591987da915Sopenharmony_ci /* size not double-checked */ 592987da915Sopenharmony_ci if (halfpair) { 593987da915Sopenharmony_ci if ((c >= 0xdc00) && (c < 0xe000)) { 594987da915Sopenharmony_ci *t++ = 0xf0 + (((halfpair + 64) >> 8) & 7); 595987da915Sopenharmony_ci *t++ = 0x80 + (((halfpair + 64) >> 2) & 63); 596987da915Sopenharmony_ci *t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4); 597987da915Sopenharmony_ci *t++ = 0x80 + (c & 63); 598987da915Sopenharmony_ci halfpair = 0; 599987da915Sopenharmony_ci } else { 600987da915Sopenharmony_ci#if ALLOW_BROKEN_UNICODE 601987da915Sopenharmony_ci /* The first UTF-16 unit of a surrogate pair has 602987da915Sopenharmony_ci * a value between 0xd800 and 0xdc00. It can be 603987da915Sopenharmony_ci * encoded as an individual UTF-8 sequence if we 604987da915Sopenharmony_ci * cannot combine it with the next UTF-16 unit 605987da915Sopenharmony_ci * unit as a surrogate pair. */ 606987da915Sopenharmony_ci *t++ = 0xe0 | (halfpair >> 12); 607987da915Sopenharmony_ci *t++ = 0x80 | ((halfpair >> 6) & 0x3f); 608987da915Sopenharmony_ci *t++ = 0x80 | (halfpair & 0x3f); 609987da915Sopenharmony_ci halfpair = 0; 610987da915Sopenharmony_ci 611987da915Sopenharmony_ci --i; 612987da915Sopenharmony_ci continue; 613987da915Sopenharmony_ci#else 614987da915Sopenharmony_ci goto fail; 615987da915Sopenharmony_ci#endif /* ALLOW_BROKEN_UNICODE */ 616987da915Sopenharmony_ci } 617987da915Sopenharmony_ci } else if (c < 0x80) { 618987da915Sopenharmony_ci *t++ = c; 619987da915Sopenharmony_ci } else { 620987da915Sopenharmony_ci if (c < 0x800) { 621987da915Sopenharmony_ci *t++ = (0xc0 | ((c >> 6) & 0x3f)); 622987da915Sopenharmony_ci *t++ = 0x80 | (c & 0x3f); 623987da915Sopenharmony_ci } else if (c < 0xd800) { 624987da915Sopenharmony_ci *t++ = 0xe0 | (c >> 12); 625987da915Sopenharmony_ci *t++ = 0x80 | ((c >> 6) & 0x3f); 626987da915Sopenharmony_ci *t++ = 0x80 | (c & 0x3f); 627987da915Sopenharmony_ci } else if (c < 0xdc00) 628987da915Sopenharmony_ci halfpair = c; 629987da915Sopenharmony_ci#if ALLOW_BROKEN_UNICODE 630987da915Sopenharmony_ci else if (c < 0xe000) { 631987da915Sopenharmony_ci *t++ = 0xe0 | (c >> 12); 632987da915Sopenharmony_ci *t++ = 0x80 | ((c >> 6) & 0x3f); 633987da915Sopenharmony_ci *t++ = 0x80 | (c & 0x3f); 634987da915Sopenharmony_ci } 635987da915Sopenharmony_ci#endif /* ALLOW_BROKEN_UNICODE */ 636987da915Sopenharmony_ci else if (c >= 0xe000) { 637987da915Sopenharmony_ci *t++ = 0xe0 | (c >> 12); 638987da915Sopenharmony_ci *t++ = 0x80 | ((c >> 6) & 0x3f); 639987da915Sopenharmony_ci *t++ = 0x80 | (c & 0x3f); 640987da915Sopenharmony_ci } else 641987da915Sopenharmony_ci goto fail; 642987da915Sopenharmony_ci } 643987da915Sopenharmony_ci } 644987da915Sopenharmony_ci#if ALLOW_BROKEN_UNICODE 645987da915Sopenharmony_ci if (halfpair) { /* ending with a single surrogate */ 646987da915Sopenharmony_ci *t++ = 0xe0 | (halfpair >> 12); 647987da915Sopenharmony_ci *t++ = 0x80 | ((halfpair >> 6) & 0x3f); 648987da915Sopenharmony_ci *t++ = 0x80 | (halfpair & 0x3f); 649987da915Sopenharmony_ci } 650987da915Sopenharmony_ci#endif /* ALLOW_BROKEN_UNICODE */ 651987da915Sopenharmony_ci *t = '\0'; 652987da915Sopenharmony_ci 653987da915Sopenharmony_ci#if defined(__APPLE__) || defined(__DARWIN__) 654987da915Sopenharmony_ci#ifdef ENABLE_NFCONV 655987da915Sopenharmony_ci if(nfconvert_utf8 && (t - *outs) > 0) { 656987da915Sopenharmony_ci char *new_outs = NULL; 657987da915Sopenharmony_ci int new_outs_len = ntfs_macosx_normalize_utf8(*outs, &new_outs, 0); // Normalize to decomposed form 658987da915Sopenharmony_ci if(new_outs_len >= 0 && new_outs != NULL) { 659987da915Sopenharmony_ci if(original_outs_value != *outs) { 660987da915Sopenharmony_ci // We have allocated outs ourselves. 661987da915Sopenharmony_ci free(*outs); 662987da915Sopenharmony_ci *outs = new_outs; 663987da915Sopenharmony_ci t = *outs + new_outs_len; 664987da915Sopenharmony_ci } 665987da915Sopenharmony_ci else { 666987da915Sopenharmony_ci // We need to copy new_outs into the fixed outs buffer. 667987da915Sopenharmony_ci memset(*outs, 0, original_outs_len); 668987da915Sopenharmony_ci strncpy(*outs, new_outs, original_outs_len-1); 669987da915Sopenharmony_ci t = *outs + original_outs_len; 670987da915Sopenharmony_ci free(new_outs); 671987da915Sopenharmony_ci } 672987da915Sopenharmony_ci } 673987da915Sopenharmony_ci else { 674987da915Sopenharmony_ci ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFD: %s\n", *outs); 675987da915Sopenharmony_ci ntfs_log_error(" new_outs=0x%p\n", new_outs); 676987da915Sopenharmony_ci ntfs_log_error(" new_outs_len=%d\n", new_outs_len); 677987da915Sopenharmony_ci } 678987da915Sopenharmony_ci } 679987da915Sopenharmony_ci#endif /* ENABLE_NFCONV */ 680987da915Sopenharmony_ci#endif /* defined(__APPLE__) || defined(__DARWIN__) */ 681987da915Sopenharmony_ci 682987da915Sopenharmony_ci ret = t - *outs; 683987da915Sopenharmony_ciout: 684987da915Sopenharmony_ci return ret; 685987da915Sopenharmony_cifail: 686987da915Sopenharmony_ci errno = EILSEQ; 687987da915Sopenharmony_ci goto out; 688987da915Sopenharmony_ci} 689987da915Sopenharmony_ci 690987da915Sopenharmony_ci/* 691987da915Sopenharmony_ci * Return the amount of 16-bit elements in UTF-16LE needed 692987da915Sopenharmony_ci * (without the terminating null) to store given UTF-8 string. 693987da915Sopenharmony_ci * 694987da915Sopenharmony_ci * Return -1 with errno set if it's longer than PATH_MAX or string is invalid. 695987da915Sopenharmony_ci * 696987da915Sopenharmony_ci * Note: This does not check whether the input sequence is a valid utf8 string, 697987da915Sopenharmony_ci * and should be used only in context where such check is made! 698987da915Sopenharmony_ci */ 699987da915Sopenharmony_cistatic int utf8_to_utf16_size(const char *s) 700987da915Sopenharmony_ci{ 701987da915Sopenharmony_ci int ret = -1; 702987da915Sopenharmony_ci unsigned int byte; 703987da915Sopenharmony_ci size_t count = 0; 704987da915Sopenharmony_ci 705987da915Sopenharmony_ci while ((byte = *((const unsigned char *)s++))) { 706987da915Sopenharmony_ci if (++count >= PATH_MAX) 707987da915Sopenharmony_ci goto fail; 708987da915Sopenharmony_ci if (byte >= 0xc0) { 709987da915Sopenharmony_ci if (byte >= 0xF5) { 710987da915Sopenharmony_ci errno = EILSEQ; 711987da915Sopenharmony_ci goto out; 712987da915Sopenharmony_ci } 713987da915Sopenharmony_ci if (!*s) 714987da915Sopenharmony_ci break; 715987da915Sopenharmony_ci if (byte >= 0xC0) 716987da915Sopenharmony_ci s++; 717987da915Sopenharmony_ci if (!*s) 718987da915Sopenharmony_ci break; 719987da915Sopenharmony_ci if (byte >= 0xE0) 720987da915Sopenharmony_ci s++; 721987da915Sopenharmony_ci if (!*s) 722987da915Sopenharmony_ci break; 723987da915Sopenharmony_ci if (byte >= 0xF0) { 724987da915Sopenharmony_ci s++; 725987da915Sopenharmony_ci if (++count >= PATH_MAX) 726987da915Sopenharmony_ci goto fail; 727987da915Sopenharmony_ci } 728987da915Sopenharmony_ci } 729987da915Sopenharmony_ci } 730987da915Sopenharmony_ci ret = count; 731987da915Sopenharmony_ciout: 732987da915Sopenharmony_ci return ret; 733987da915Sopenharmony_cifail: 734987da915Sopenharmony_ci errno = ENAMETOOLONG; 735987da915Sopenharmony_ci goto out; 736987da915Sopenharmony_ci} 737987da915Sopenharmony_ci/* 738987da915Sopenharmony_ci * This converts one UTF-8 sequence to cpu-endian Unicode value 739987da915Sopenharmony_ci * within range U+0 .. U+10ffff and excluding U+D800 .. U+DFFF 740987da915Sopenharmony_ci * 741987da915Sopenharmony_ci * Return the number of used utf8 bytes or -1 with errno set 742987da915Sopenharmony_ci * if sequence is invalid. 743987da915Sopenharmony_ci */ 744987da915Sopenharmony_cistatic int utf8_to_unicode(u32 *wc, const char *s) 745987da915Sopenharmony_ci{ 746987da915Sopenharmony_ci unsigned int byte = *((const unsigned char *)s); 747987da915Sopenharmony_ci 748987da915Sopenharmony_ci /* single byte */ 749987da915Sopenharmony_ci if (byte == 0) { 750987da915Sopenharmony_ci *wc = (u32) 0; 751987da915Sopenharmony_ci return 0; 752987da915Sopenharmony_ci } else if (byte < 0x80) { 753987da915Sopenharmony_ci *wc = (u32) byte; 754987da915Sopenharmony_ci return 1; 755987da915Sopenharmony_ci /* double byte */ 756987da915Sopenharmony_ci } else if (byte < 0xc2) { 757987da915Sopenharmony_ci goto fail; 758987da915Sopenharmony_ci } else if (byte < 0xE0) { 759987da915Sopenharmony_ci if ((s[1] & 0xC0) == 0x80) { 760987da915Sopenharmony_ci *wc = ((u32)(byte & 0x1F) << 6) 761987da915Sopenharmony_ci | ((u32)(s[1] & 0x3F)); 762987da915Sopenharmony_ci return 2; 763987da915Sopenharmony_ci } else 764987da915Sopenharmony_ci goto fail; 765987da915Sopenharmony_ci /* three-byte */ 766987da915Sopenharmony_ci } else if (byte < 0xF0) { 767987da915Sopenharmony_ci if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) { 768987da915Sopenharmony_ci *wc = ((u32)(byte & 0x0F) << 12) 769987da915Sopenharmony_ci | ((u32)(s[1] & 0x3F) << 6) 770987da915Sopenharmony_ci | ((u32)(s[2] & 0x3F)); 771987da915Sopenharmony_ci /* Check valid ranges */ 772987da915Sopenharmony_ci#if ALLOW_BROKEN_UNICODE 773987da915Sopenharmony_ci if (((*wc >= 0x800) && (*wc <= 0xD7FF)) 774987da915Sopenharmony_ci || ((*wc >= 0xD800) && (*wc <= 0xDFFF)) 775987da915Sopenharmony_ci || ((*wc >= 0xe000) && (*wc <= 0xFFFF))) 776987da915Sopenharmony_ci return 3; 777987da915Sopenharmony_ci#else 778987da915Sopenharmony_ci if (((*wc >= 0x800) && (*wc <= 0xD7FF)) 779987da915Sopenharmony_ci || ((*wc >= 0xe000) && (*wc <= 0xFFFD))) 780987da915Sopenharmony_ci return 3; 781987da915Sopenharmony_ci#endif /* ALLOW_BROKEN_UNICODE */ 782987da915Sopenharmony_ci } 783987da915Sopenharmony_ci goto fail; 784987da915Sopenharmony_ci /* four-byte */ 785987da915Sopenharmony_ci } else if (byte < 0xF5) { 786987da915Sopenharmony_ci if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80) 787987da915Sopenharmony_ci && ((s[3] & 0xC0) == 0x80)) { 788987da915Sopenharmony_ci *wc = ((u32)(byte & 0x07) << 18) 789987da915Sopenharmony_ci | ((u32)(s[1] & 0x3F) << 12) 790987da915Sopenharmony_ci | ((u32)(s[2] & 0x3F) << 6) 791987da915Sopenharmony_ci | ((u32)(s[3] & 0x3F)); 792987da915Sopenharmony_ci /* Check valid ranges */ 793987da915Sopenharmony_ci if ((*wc <= 0x10ffff) && (*wc >= 0x10000)) 794987da915Sopenharmony_ci return 4; 795987da915Sopenharmony_ci } 796987da915Sopenharmony_ci goto fail; 797987da915Sopenharmony_ci } 798987da915Sopenharmony_cifail: 799987da915Sopenharmony_ci errno = EILSEQ; 800987da915Sopenharmony_ci return -1; 801987da915Sopenharmony_ci} 802987da915Sopenharmony_ci 803987da915Sopenharmony_ci/** 804987da915Sopenharmony_ci * ntfs_utf8_to_utf16 - convert a UTF-8 string to a UTF-16LE string 805987da915Sopenharmony_ci * @ins: input multibyte string buffer 806987da915Sopenharmony_ci * @outs: on return contains the (allocated) output utf16 string 807987da915Sopenharmony_ci * @outs_len: length of output buffer in utf16 characters 808987da915Sopenharmony_ci * 809987da915Sopenharmony_ci * Return -1 with errno set. 810987da915Sopenharmony_ci */ 811987da915Sopenharmony_cistatic int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs) 812987da915Sopenharmony_ci{ 813987da915Sopenharmony_ci#if defined(__APPLE__) || defined(__DARWIN__) 814987da915Sopenharmony_ci#ifdef ENABLE_NFCONV 815987da915Sopenharmony_ci char *new_ins = NULL; 816987da915Sopenharmony_ci if(nfconvert_utf8) { 817987da915Sopenharmony_ci int new_ins_len; 818987da915Sopenharmony_ci new_ins_len = ntfs_macosx_normalize_utf8(ins, &new_ins, 1); // Normalize to composed form 819987da915Sopenharmony_ci if(new_ins_len >= 0) 820987da915Sopenharmony_ci ins = new_ins; 821987da915Sopenharmony_ci else 822987da915Sopenharmony_ci ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFC: %s\n", ins); 823987da915Sopenharmony_ci } 824987da915Sopenharmony_ci#endif /* ENABLE_NFCONV */ 825987da915Sopenharmony_ci#endif /* defined(__APPLE__) || defined(__DARWIN__) */ 826987da915Sopenharmony_ci const char *t = ins; 827987da915Sopenharmony_ci u32 wc; 828987da915Sopenharmony_ci BOOL allocated; 829987da915Sopenharmony_ci ntfschar *outpos; 830987da915Sopenharmony_ci int shorts, ret = -1; 831987da915Sopenharmony_ci 832987da915Sopenharmony_ci shorts = utf8_to_utf16_size(ins); 833987da915Sopenharmony_ci if (shorts < 0) 834987da915Sopenharmony_ci goto fail; 835987da915Sopenharmony_ci 836987da915Sopenharmony_ci allocated = FALSE; 837987da915Sopenharmony_ci if (!*outs) { 838987da915Sopenharmony_ci *outs = ntfs_malloc((shorts + 1) * sizeof(ntfschar)); 839987da915Sopenharmony_ci if (!*outs) 840987da915Sopenharmony_ci goto fail; 841987da915Sopenharmony_ci allocated = TRUE; 842987da915Sopenharmony_ci } 843987da915Sopenharmony_ci 844987da915Sopenharmony_ci outpos = *outs; 845987da915Sopenharmony_ci 846987da915Sopenharmony_ci while(1) { 847987da915Sopenharmony_ci int m = utf8_to_unicode(&wc, t); 848987da915Sopenharmony_ci if (m <= 0) { 849987da915Sopenharmony_ci if (m < 0) { 850987da915Sopenharmony_ci /* do not leave space allocated if failed */ 851987da915Sopenharmony_ci if (allocated) { 852987da915Sopenharmony_ci free(*outs); 853987da915Sopenharmony_ci *outs = (ntfschar*)NULL; 854987da915Sopenharmony_ci } 855987da915Sopenharmony_ci goto fail; 856987da915Sopenharmony_ci } 857987da915Sopenharmony_ci *outpos++ = const_cpu_to_le16(0); 858987da915Sopenharmony_ci break; 859987da915Sopenharmony_ci } 860987da915Sopenharmony_ci if (wc < 0x10000) 861987da915Sopenharmony_ci *outpos++ = cpu_to_le16(wc); 862987da915Sopenharmony_ci else { 863987da915Sopenharmony_ci wc -= 0x10000; 864987da915Sopenharmony_ci *outpos++ = cpu_to_le16((wc >> 10) + 0xd800); 865987da915Sopenharmony_ci *outpos++ = cpu_to_le16((wc & 0x3ff) + 0xdc00); 866987da915Sopenharmony_ci } 867987da915Sopenharmony_ci t += m; 868987da915Sopenharmony_ci } 869987da915Sopenharmony_ci 870987da915Sopenharmony_ci ret = --outpos - *outs; 871987da915Sopenharmony_cifail: 872987da915Sopenharmony_ci#if defined(__APPLE__) || defined(__DARWIN__) 873987da915Sopenharmony_ci#ifdef ENABLE_NFCONV 874987da915Sopenharmony_ci if(new_ins != NULL) 875987da915Sopenharmony_ci free(new_ins); 876987da915Sopenharmony_ci#endif /* ENABLE_NFCONV */ 877987da915Sopenharmony_ci#endif /* defined(__APPLE__) || defined(__DARWIN__) */ 878987da915Sopenharmony_ci return ret; 879987da915Sopenharmony_ci} 880987da915Sopenharmony_ci 881987da915Sopenharmony_ci/** 882987da915Sopenharmony_ci * ntfs_ucstombs - convert a little endian Unicode string to a multibyte string 883987da915Sopenharmony_ci * @ins: input Unicode string buffer 884987da915Sopenharmony_ci * @ins_len: length of input string in Unicode characters 885987da915Sopenharmony_ci * @outs: on return contains the (allocated) output multibyte string 886987da915Sopenharmony_ci * @outs_len: length of output buffer in bytes (ignored if *@outs is NULL) 887987da915Sopenharmony_ci * 888987da915Sopenharmony_ci * Convert the input little endian, 2-byte Unicode string @ins, of length 889987da915Sopenharmony_ci * @ins_len into the multibyte string format dictated by the current locale. 890987da915Sopenharmony_ci * 891987da915Sopenharmony_ci * If *@outs is NULL, the function allocates the string and the caller is 892987da915Sopenharmony_ci * responsible for calling free(*@outs); when finished with it. 893987da915Sopenharmony_ci * 894987da915Sopenharmony_ci * On success the function returns the number of bytes written to the output 895987da915Sopenharmony_ci * string *@outs (>= 0), not counting the terminating NULL byte. If the output 896987da915Sopenharmony_ci * string buffer was allocated, *@outs is set to it. 897987da915Sopenharmony_ci * 898987da915Sopenharmony_ci * On error, -1 is returned, and errno is set to the error code. The following 899987da915Sopenharmony_ci * error codes can be expected: 900987da915Sopenharmony_ci * EINVAL Invalid arguments (e.g. @ins or @outs is NULL). 901987da915Sopenharmony_ci * EILSEQ The input string cannot be represented as a multibyte 902987da915Sopenharmony_ci * sequence according to the current locale. 903987da915Sopenharmony_ci * ENAMETOOLONG Destination buffer is too small for input string. 904987da915Sopenharmony_ci * ENOMEM Not enough memory to allocate destination buffer. 905987da915Sopenharmony_ci */ 906987da915Sopenharmony_ciint ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs, 907987da915Sopenharmony_ci int outs_len) 908987da915Sopenharmony_ci{ 909987da915Sopenharmony_ci char *mbs; 910987da915Sopenharmony_ci int mbs_len; 911987da915Sopenharmony_ci#ifdef MB_CUR_MAX 912987da915Sopenharmony_ci wchar_t wc; 913987da915Sopenharmony_ci int i, o; 914987da915Sopenharmony_ci int cnt = 0; 915987da915Sopenharmony_ci#ifdef HAVE_MBSINIT 916987da915Sopenharmony_ci mbstate_t mbstate; 917987da915Sopenharmony_ci#endif 918987da915Sopenharmony_ci#endif /* MB_CUR_MAX */ 919987da915Sopenharmony_ci 920987da915Sopenharmony_ci if (!ins || !outs) { 921987da915Sopenharmony_ci errno = EINVAL; 922987da915Sopenharmony_ci return -1; 923987da915Sopenharmony_ci } 924987da915Sopenharmony_ci mbs = *outs; 925987da915Sopenharmony_ci mbs_len = outs_len; 926987da915Sopenharmony_ci if (mbs && !mbs_len) { 927987da915Sopenharmony_ci errno = ENAMETOOLONG; 928987da915Sopenharmony_ci return -1; 929987da915Sopenharmony_ci } 930987da915Sopenharmony_ci if (use_utf8) 931987da915Sopenharmony_ci return ntfs_utf16_to_utf8(ins, ins_len, outs, outs_len); 932987da915Sopenharmony_ci#ifdef MB_CUR_MAX 933987da915Sopenharmony_ci if (!mbs) { 934987da915Sopenharmony_ci mbs_len = (ins_len + 1) * MB_CUR_MAX; 935987da915Sopenharmony_ci mbs = ntfs_malloc(mbs_len); 936987da915Sopenharmony_ci if (!mbs) 937987da915Sopenharmony_ci return -1; 938987da915Sopenharmony_ci } 939987da915Sopenharmony_ci#ifdef HAVE_MBSINIT 940987da915Sopenharmony_ci memset(&mbstate, 0, sizeof(mbstate)); 941987da915Sopenharmony_ci#else 942987da915Sopenharmony_ci wctomb(NULL, 0); 943987da915Sopenharmony_ci#endif 944987da915Sopenharmony_ci for (i = o = 0; i < ins_len; i++) { 945987da915Sopenharmony_ci /* Reallocate memory if necessary or abort. */ 946987da915Sopenharmony_ci if ((int)(o + MB_CUR_MAX) > mbs_len) { 947987da915Sopenharmony_ci char *tc; 948987da915Sopenharmony_ci if (mbs == *outs) { 949987da915Sopenharmony_ci errno = ENAMETOOLONG; 950987da915Sopenharmony_ci return -1; 951987da915Sopenharmony_ci } 952987da915Sopenharmony_ci tc = ntfs_malloc((mbs_len + 64) & ~63); 953987da915Sopenharmony_ci if (!tc) 954987da915Sopenharmony_ci goto err_out; 955987da915Sopenharmony_ci memcpy(tc, mbs, mbs_len); 956987da915Sopenharmony_ci mbs_len = (mbs_len + 64) & ~63; 957987da915Sopenharmony_ci free(mbs); 958987da915Sopenharmony_ci mbs = tc; 959987da915Sopenharmony_ci } 960987da915Sopenharmony_ci /* Convert the LE Unicode character to a CPU wide character. */ 961987da915Sopenharmony_ci wc = (wchar_t)le16_to_cpu(ins[i]); 962987da915Sopenharmony_ci if (!wc) 963987da915Sopenharmony_ci break; 964987da915Sopenharmony_ci /* Convert the CPU endian wide character to multibyte. */ 965987da915Sopenharmony_ci#ifdef HAVE_MBSINIT 966987da915Sopenharmony_ci cnt = wcrtomb(mbs + o, wc, &mbstate); 967987da915Sopenharmony_ci#else 968987da915Sopenharmony_ci cnt = wctomb(mbs + o, wc); 969987da915Sopenharmony_ci#endif 970987da915Sopenharmony_ci if (cnt == -1) 971987da915Sopenharmony_ci goto err_out; 972987da915Sopenharmony_ci if (cnt <= 0) { 973987da915Sopenharmony_ci ntfs_log_debug("Eeek. cnt <= 0, cnt = %i\n", cnt); 974987da915Sopenharmony_ci errno = EINVAL; 975987da915Sopenharmony_ci goto err_out; 976987da915Sopenharmony_ci } 977987da915Sopenharmony_ci o += cnt; 978987da915Sopenharmony_ci } 979987da915Sopenharmony_ci#ifdef HAVE_MBSINIT 980987da915Sopenharmony_ci /* Make sure we are back in the initial state. */ 981987da915Sopenharmony_ci if (!mbsinit(&mbstate)) { 982987da915Sopenharmony_ci ntfs_log_debug("Eeek. mbstate not in initial state!\n"); 983987da915Sopenharmony_ci errno = EILSEQ; 984987da915Sopenharmony_ci goto err_out; 985987da915Sopenharmony_ci } 986987da915Sopenharmony_ci#endif 987987da915Sopenharmony_ci /* Now write the NULL character. */ 988987da915Sopenharmony_ci mbs[o] = '\0'; 989987da915Sopenharmony_ci if (*outs != mbs) 990987da915Sopenharmony_ci *outs = mbs; 991987da915Sopenharmony_ci return o; 992987da915Sopenharmony_cierr_out: 993987da915Sopenharmony_ci if (mbs != *outs) { 994987da915Sopenharmony_ci int eo = errno; 995987da915Sopenharmony_ci free(mbs); 996987da915Sopenharmony_ci errno = eo; 997987da915Sopenharmony_ci } 998987da915Sopenharmony_ci#else /* MB_CUR_MAX */ 999987da915Sopenharmony_ci errno = EILSEQ; 1000987da915Sopenharmony_ci#endif /* MB_CUR_MAX */ 1001987da915Sopenharmony_ci return -1; 1002987da915Sopenharmony_ci} 1003987da915Sopenharmony_ci 1004987da915Sopenharmony_ci/** 1005987da915Sopenharmony_ci * ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string 1006987da915Sopenharmony_ci * @ins: input multibyte string buffer 1007987da915Sopenharmony_ci * @outs: on return contains the (allocated) output Unicode string 1008987da915Sopenharmony_ci * 1009987da915Sopenharmony_ci * Convert the input multibyte string @ins, from the current locale into the 1010987da915Sopenharmony_ci * corresponding little endian, 2-byte Unicode string. 1011987da915Sopenharmony_ci * 1012987da915Sopenharmony_ci * The function allocates the string and the caller is responsible for calling 1013987da915Sopenharmony_ci * free(*@outs); when finished with it. 1014987da915Sopenharmony_ci * 1015987da915Sopenharmony_ci * On success the function returns the number of Unicode characters written to 1016987da915Sopenharmony_ci * the output string *@outs (>= 0), not counting the terminating Unicode NULL 1017987da915Sopenharmony_ci * character. 1018987da915Sopenharmony_ci * 1019987da915Sopenharmony_ci * On error, -1 is returned, and errno is set to the error code. The following 1020987da915Sopenharmony_ci * error codes can be expected: 1021987da915Sopenharmony_ci * EINVAL Invalid arguments (e.g. @ins or @outs is NULL). 1022987da915Sopenharmony_ci * EILSEQ The input string cannot be represented as a Unicode 1023987da915Sopenharmony_ci * string according to the current locale. 1024987da915Sopenharmony_ci * ENAMETOOLONG Destination buffer is too small for input string. 1025987da915Sopenharmony_ci * ENOMEM Not enough memory to allocate destination buffer. 1026987da915Sopenharmony_ci */ 1027987da915Sopenharmony_ciint ntfs_mbstoucs(const char *ins, ntfschar **outs) 1028987da915Sopenharmony_ci{ 1029987da915Sopenharmony_ci#ifdef MB_CUR_MAX 1030987da915Sopenharmony_ci ntfschar *ucs; 1031987da915Sopenharmony_ci const char *s; 1032987da915Sopenharmony_ci wchar_t wc; 1033987da915Sopenharmony_ci int i, o, cnt, ins_len, ucs_len, ins_size; 1034987da915Sopenharmony_ci#ifdef HAVE_MBSINIT 1035987da915Sopenharmony_ci mbstate_t mbstate; 1036987da915Sopenharmony_ci#endif 1037987da915Sopenharmony_ci#endif /* MB_CUR_MAX */ 1038987da915Sopenharmony_ci 1039987da915Sopenharmony_ci if (!ins || !outs) { 1040987da915Sopenharmony_ci errno = EINVAL; 1041987da915Sopenharmony_ci return -1; 1042987da915Sopenharmony_ci } 1043987da915Sopenharmony_ci 1044987da915Sopenharmony_ci if (use_utf8) 1045987da915Sopenharmony_ci return ntfs_utf8_to_utf16(ins, outs); 1046987da915Sopenharmony_ci 1047987da915Sopenharmony_ci#ifdef MB_CUR_MAX 1048987da915Sopenharmony_ci /* Determine the size of the multi-byte string in bytes. */ 1049987da915Sopenharmony_ci ins_size = strlen(ins); 1050987da915Sopenharmony_ci /* Determine the length of the multi-byte string. */ 1051987da915Sopenharmony_ci s = ins; 1052987da915Sopenharmony_ci#if defined(HAVE_MBSINIT) 1053987da915Sopenharmony_ci memset(&mbstate, 0, sizeof(mbstate)); 1054987da915Sopenharmony_ci ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate); 1055987da915Sopenharmony_ci#ifdef __CYGWIN32__ 1056987da915Sopenharmony_ci if (!ins_len && *ins) { 1057987da915Sopenharmony_ci /* Older Cygwin had broken mbsrtowcs() implementation. */ 1058987da915Sopenharmony_ci ins_len = strlen(ins); 1059987da915Sopenharmony_ci } 1060987da915Sopenharmony_ci#endif 1061987da915Sopenharmony_ci#elif !defined(DJGPP) 1062987da915Sopenharmony_ci ins_len = mbstowcs(NULL, s, 0); 1063987da915Sopenharmony_ci#else 1064987da915Sopenharmony_ci /* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */ 1065987da915Sopenharmony_ci ins_len = strlen(ins); 1066987da915Sopenharmony_ci#endif 1067987da915Sopenharmony_ci if (ins_len == -1) 1068987da915Sopenharmony_ci return ins_len; 1069987da915Sopenharmony_ci#ifdef HAVE_MBSINIT 1070987da915Sopenharmony_ci if ((s != ins) || !mbsinit(&mbstate)) { 1071987da915Sopenharmony_ci#else 1072987da915Sopenharmony_ci if (s != ins) { 1073987da915Sopenharmony_ci#endif 1074987da915Sopenharmony_ci errno = EILSEQ; 1075987da915Sopenharmony_ci return -1; 1076987da915Sopenharmony_ci } 1077987da915Sopenharmony_ci /* Add the NULL terminator. */ 1078987da915Sopenharmony_ci ins_len++; 1079987da915Sopenharmony_ci ucs_len = ins_len; 1080987da915Sopenharmony_ci ucs = ntfs_malloc(ucs_len * sizeof(ntfschar)); 1081987da915Sopenharmony_ci if (!ucs) 1082987da915Sopenharmony_ci return -1; 1083987da915Sopenharmony_ci#ifdef HAVE_MBSINIT 1084987da915Sopenharmony_ci memset(&mbstate, 0, sizeof(mbstate)); 1085987da915Sopenharmony_ci#else 1086987da915Sopenharmony_ci mbtowc(NULL, NULL, 0); 1087987da915Sopenharmony_ci#endif 1088987da915Sopenharmony_ci for (i = o = cnt = 0; i < ins_size; i += cnt, o++) { 1089987da915Sopenharmony_ci /* Reallocate memory if necessary. */ 1090987da915Sopenharmony_ci if (o >= ucs_len) { 1091987da915Sopenharmony_ci ntfschar *tc; 1092987da915Sopenharmony_ci ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63; 1093987da915Sopenharmony_ci tc = realloc(ucs, ucs_len); 1094987da915Sopenharmony_ci if (!tc) 1095987da915Sopenharmony_ci goto err_out; 1096987da915Sopenharmony_ci ucs = tc; 1097987da915Sopenharmony_ci ucs_len /= sizeof(ntfschar); 1098987da915Sopenharmony_ci } 1099987da915Sopenharmony_ci /* Convert the multibyte character to a wide character. */ 1100987da915Sopenharmony_ci#ifdef HAVE_MBSINIT 1101987da915Sopenharmony_ci cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate); 1102987da915Sopenharmony_ci#else 1103987da915Sopenharmony_ci cnt = mbtowc(&wc, ins + i, ins_size - i); 1104987da915Sopenharmony_ci#endif 1105987da915Sopenharmony_ci if (!cnt) 1106987da915Sopenharmony_ci break; 1107987da915Sopenharmony_ci if (cnt == -1) 1108987da915Sopenharmony_ci goto err_out; 1109987da915Sopenharmony_ci if (cnt < -1) { 1110987da915Sopenharmony_ci ntfs_log_trace("Eeek. cnt = %i\n", cnt); 1111987da915Sopenharmony_ci errno = EINVAL; 1112987da915Sopenharmony_ci goto err_out; 1113987da915Sopenharmony_ci } 1114987da915Sopenharmony_ci /* Make sure we are not overflowing the NTFS Unicode set. */ 1115987da915Sopenharmony_ci if ((unsigned long)wc >= (unsigned long)(1 << 1116987da915Sopenharmony_ci (8 * sizeof(ntfschar)))) { 1117987da915Sopenharmony_ci errno = EILSEQ; 1118987da915Sopenharmony_ci goto err_out; 1119987da915Sopenharmony_ci } 1120987da915Sopenharmony_ci /* Convert the CPU wide character to a LE Unicode character. */ 1121987da915Sopenharmony_ci ucs[o] = cpu_to_le16(wc); 1122987da915Sopenharmony_ci } 1123987da915Sopenharmony_ci#ifdef HAVE_MBSINIT 1124987da915Sopenharmony_ci /* Make sure we are back in the initial state. */ 1125987da915Sopenharmony_ci if (!mbsinit(&mbstate)) { 1126987da915Sopenharmony_ci ntfs_log_trace("Eeek. mbstate not in initial state!\n"); 1127987da915Sopenharmony_ci errno = EILSEQ; 1128987da915Sopenharmony_ci goto err_out; 1129987da915Sopenharmony_ci } 1130987da915Sopenharmony_ci#endif 1131987da915Sopenharmony_ci /* Now write the NULL character. */ 1132987da915Sopenharmony_ci ucs[o] = const_cpu_to_le16(L'\0'); 1133987da915Sopenharmony_ci *outs = ucs; 1134987da915Sopenharmony_ci return o; 1135987da915Sopenharmony_cierr_out: 1136987da915Sopenharmony_ci free(ucs); 1137987da915Sopenharmony_ci#else /* MB_CUR_MAX */ 1138987da915Sopenharmony_ci errno = EILSEQ; 1139987da915Sopenharmony_ci#endif /* MB_CUR_MAX */ 1140987da915Sopenharmony_ci return -1; 1141987da915Sopenharmony_ci} 1142987da915Sopenharmony_ci 1143987da915Sopenharmony_ci/* 1144987da915Sopenharmony_ci * Turn a UTF8 name uppercase 1145987da915Sopenharmony_ci * 1146987da915Sopenharmony_ci * Returns an allocated uppercase name which has to be freed by caller 1147987da915Sopenharmony_ci * or NULL if there is an error (described by errno) 1148987da915Sopenharmony_ci */ 1149987da915Sopenharmony_ci 1150987da915Sopenharmony_cichar *ntfs_uppercase_mbs(const char *low, 1151987da915Sopenharmony_ci const ntfschar *upcase, u32 upcase_size) 1152987da915Sopenharmony_ci{ 1153987da915Sopenharmony_ci int size; 1154987da915Sopenharmony_ci char *upp; 1155987da915Sopenharmony_ci u32 wc; 1156987da915Sopenharmony_ci int n; 1157987da915Sopenharmony_ci const char *s; 1158987da915Sopenharmony_ci char *t; 1159987da915Sopenharmony_ci 1160987da915Sopenharmony_ci size = strlen(low); 1161987da915Sopenharmony_ci upp = (char*)ntfs_malloc(3*size + 1); 1162987da915Sopenharmony_ci if (upp) { 1163987da915Sopenharmony_ci s = low; 1164987da915Sopenharmony_ci t = upp; 1165987da915Sopenharmony_ci do { 1166987da915Sopenharmony_ci n = utf8_to_unicode(&wc, s); 1167987da915Sopenharmony_ci if (n > 0) { 1168987da915Sopenharmony_ci if (wc < upcase_size) 1169987da915Sopenharmony_ci wc = le16_to_cpu(upcase[wc]); 1170987da915Sopenharmony_ci if (wc < 0x80) 1171987da915Sopenharmony_ci *t++ = wc; 1172987da915Sopenharmony_ci else if (wc < 0x800) { 1173987da915Sopenharmony_ci *t++ = (0xc0 | ((wc >> 6) & 0x3f)); 1174987da915Sopenharmony_ci *t++ = 0x80 | (wc & 0x3f); 1175987da915Sopenharmony_ci } else if (wc < 0x10000) { 1176987da915Sopenharmony_ci *t++ = 0xe0 | (wc >> 12); 1177987da915Sopenharmony_ci *t++ = 0x80 | ((wc >> 6) & 0x3f); 1178987da915Sopenharmony_ci *t++ = 0x80 | (wc & 0x3f); 1179987da915Sopenharmony_ci } else { 1180987da915Sopenharmony_ci *t++ = 0xf0 | ((wc >> 18) & 7); 1181987da915Sopenharmony_ci *t++ = 0x80 | ((wc >> 12) & 63); 1182987da915Sopenharmony_ci *t++ = 0x80 | ((wc >> 6) & 0x3f); 1183987da915Sopenharmony_ci *t++ = 0x80 | (wc & 0x3f); 1184987da915Sopenharmony_ci } 1185987da915Sopenharmony_ci s += n; 1186987da915Sopenharmony_ci } 1187987da915Sopenharmony_ci } while (n > 0); 1188987da915Sopenharmony_ci if (n < 0) { 1189987da915Sopenharmony_ci free(upp); 1190987da915Sopenharmony_ci upp = (char*)NULL; 1191987da915Sopenharmony_ci errno = EILSEQ; 1192987da915Sopenharmony_ci } 1193987da915Sopenharmony_ci *t = 0; 1194987da915Sopenharmony_ci } 1195987da915Sopenharmony_ci return (upp); 1196987da915Sopenharmony_ci} 1197987da915Sopenharmony_ci 1198987da915Sopenharmony_ci/** 1199987da915Sopenharmony_ci * ntfs_upcase_table_build - build the default upcase table for NTFS 1200987da915Sopenharmony_ci * @uc: destination buffer where to store the built table 1201987da915Sopenharmony_ci * @uc_len: size of destination buffer in bytes 1202987da915Sopenharmony_ci * 1203987da915Sopenharmony_ci * ntfs_upcase_table_build() builds the default upcase table for NTFS and 1204987da915Sopenharmony_ci * stores it in the caller supplied buffer @uc of size @uc_len. 1205987da915Sopenharmony_ci * 1206987da915Sopenharmony_ci * Note, @uc_len must be at least 128kiB in size or bad things will happen! 1207987da915Sopenharmony_ci */ 1208987da915Sopenharmony_civoid ntfs_upcase_table_build(ntfschar *uc, u32 uc_len) 1209987da915Sopenharmony_ci{ 1210987da915Sopenharmony_ci struct NEWUPPERCASE { 1211987da915Sopenharmony_ci unsigned short first; 1212987da915Sopenharmony_ci unsigned short last; 1213987da915Sopenharmony_ci short diff; 1214987da915Sopenharmony_ci unsigned char step; 1215987da915Sopenharmony_ci unsigned char osmajor; 1216987da915Sopenharmony_ci unsigned char osminor; 1217987da915Sopenharmony_ci } ; 1218987da915Sopenharmony_ci 1219987da915Sopenharmony_ci /* 1220987da915Sopenharmony_ci * This is the table as defined by Windows XP 1221987da915Sopenharmony_ci */ 1222987da915Sopenharmony_ci static int uc_run_table[][3] = { /* Start, End, Add */ 1223987da915Sopenharmony_ci {0x0061, 0x007B, -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72, 74}, 1224987da915Sopenharmony_ci {0x00E0, 0x00F7, -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76, 86}, 1225987da915Sopenharmony_ci {0x00F8, 0x00FF, -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100}, 1226987da915Sopenharmony_ci {0x0256, 0x0258, -205}, {0x1F00, 0x1F08, 8}, {0x1F78, 0x1F7A, 128}, 1227987da915Sopenharmony_ci {0x028A, 0x028C, -217}, {0x1F10, 0x1F16, 8}, {0x1F7A, 0x1F7C, 112}, 1228987da915Sopenharmony_ci {0x03AC, 0x03AD, -38}, {0x1F20, 0x1F28, 8}, {0x1F7C, 0x1F7E, 126}, 1229987da915Sopenharmony_ci {0x03AD, 0x03B0, -37}, {0x1F30, 0x1F38, 8}, {0x1FB0, 0x1FB2, 8}, 1230987da915Sopenharmony_ci {0x03B1, 0x03C2, -32}, {0x1F40, 0x1F46, 8}, {0x1FD0, 0x1FD2, 8}, 1231987da915Sopenharmony_ci {0x03C2, 0x03C3, -31}, {0x1F51, 0x1F52, 8}, {0x1FE0, 0x1FE2, 8}, 1232987da915Sopenharmony_ci {0x03C3, 0x03CC, -32}, {0x1F53, 0x1F54, 8}, {0x1FE5, 0x1FE6, 7}, 1233987da915Sopenharmony_ci {0x03CC, 0x03CD, -64}, {0x1F55, 0x1F56, 8}, {0x2170, 0x2180, -16}, 1234987da915Sopenharmony_ci {0x03CD, 0x03CF, -63}, {0x1F57, 0x1F58, 8}, {0x24D0, 0x24EA, -26}, 1235987da915Sopenharmony_ci {0x0430, 0x0450, -32}, {0x1F60, 0x1F68, 8}, {0xFF41, 0xFF5B, -32}, 1236987da915Sopenharmony_ci {0} 1237987da915Sopenharmony_ci }; 1238987da915Sopenharmony_ci static int uc_dup_table[][2] = { /* Start, End */ 1239987da915Sopenharmony_ci {0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC}, 1240987da915Sopenharmony_ci {0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB}, 1241987da915Sopenharmony_ci {0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5}, 1242987da915Sopenharmony_ci {0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9}, 1243987da915Sopenharmony_ci {0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95}, 1244987da915Sopenharmony_ci {0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9}, 1245987da915Sopenharmony_ci {0} 1246987da915Sopenharmony_ci }; 1247987da915Sopenharmony_ci static int uc_byte_table[][2] = { /* Offset, Value */ 1248987da915Sopenharmony_ci {0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196}, 1249987da915Sopenharmony_ci {0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C}, 1250987da915Sopenharmony_ci {0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D}, 1251987da915Sopenharmony_ci {0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F}, 1252987da915Sopenharmony_ci {0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9}, 1253987da915Sopenharmony_ci {0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE}, 1254987da915Sopenharmony_ci {0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7}, 1255987da915Sopenharmony_ci {0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197}, 1256987da915Sopenharmony_ci {0} 1257987da915Sopenharmony_ci }; 1258987da915Sopenharmony_ci 1259987da915Sopenharmony_ci/* 1260987da915Sopenharmony_ci * Changes which were applied to later Windows versions 1261987da915Sopenharmony_ci * 1262987da915Sopenharmony_ci * md5 for $UpCase from Winxp : 6fa3db2468275286210751e869d36373 1263987da915Sopenharmony_ci * Vista : 2f03b5a69d486ff3864cecbd07f24440 1264987da915Sopenharmony_ci * Win8 : 7ff498a44e45e77374cc7c962b1b92f2 1265987da915Sopenharmony_ci */ 1266987da915Sopenharmony_ci static const struct NEWUPPERCASE newuppercase[] = { 1267987da915Sopenharmony_ci /* from Windows 6.0 (Vista) */ 1268987da915Sopenharmony_ci { 0x37b, 0x37d, 0x82, 1, 6, 0 }, 1269987da915Sopenharmony_ci { 0x1f80, 0x1f87, 0x8, 1, 6, 0 }, 1270987da915Sopenharmony_ci { 0x1f90, 0x1f97, 0x8, 1, 6, 0 }, 1271987da915Sopenharmony_ci { 0x1fa0, 0x1fa7, 0x8, 1, 6, 0 }, 1272987da915Sopenharmony_ci { 0x2c30, 0x2c5e, -0x30, 1, 6, 0 }, 1273987da915Sopenharmony_ci { 0x2d00, 0x2d25, -0x1c60, 1, 6, 0 }, 1274987da915Sopenharmony_ci { 0x2c68, 0x2c6c, -0x1, 2, 6, 0 }, 1275987da915Sopenharmony_ci { 0x219, 0x21f, -0x1, 2, 6, 0 }, 1276987da915Sopenharmony_ci { 0x223, 0x233, -0x1, 2, 6, 0 }, 1277987da915Sopenharmony_ci { 0x247, 0x24f, -0x1, 2, 6, 0 }, 1278987da915Sopenharmony_ci { 0x3d9, 0x3e1, -0x1, 2, 6, 0 }, 1279987da915Sopenharmony_ci { 0x48b, 0x48f, -0x1, 2, 6, 0 }, 1280987da915Sopenharmony_ci { 0x4fb, 0x513, -0x1, 2, 6, 0 }, 1281987da915Sopenharmony_ci { 0x2c81, 0x2ce3, -0x1, 2, 6, 0 }, 1282987da915Sopenharmony_ci { 0x3f8, 0x3fb, -0x1, 3, 6, 0 }, 1283987da915Sopenharmony_ci { 0x4c6, 0x4ce, -0x1, 4, 6, 0 }, 1284987da915Sopenharmony_ci { 0x23c, 0x242, -0x1, 6, 6, 0 }, 1285987da915Sopenharmony_ci { 0x4ed, 0x4f7, -0x1, 10, 6, 0 }, 1286987da915Sopenharmony_ci { 0x450, 0x45d, -0x50, 13, 6, 0 }, 1287987da915Sopenharmony_ci { 0x2c61, 0x2c76, -0x1, 21, 6, 0 }, 1288987da915Sopenharmony_ci { 0x1fcc, 0x1ffc, -0x9, 48, 6, 0 }, 1289987da915Sopenharmony_ci { 0x180, 0x180, 0xc3, 1, 6, 0 }, 1290987da915Sopenharmony_ci { 0x195, 0x195, 0x61, 1, 6, 0 }, 1291987da915Sopenharmony_ci { 0x19a, 0x19a, 0xa3, 1, 6, 0 }, 1292987da915Sopenharmony_ci { 0x19e, 0x19e, 0x82, 1, 6, 0 }, 1293987da915Sopenharmony_ci { 0x1bf, 0x1bf, 0x38, 1, 6, 0 }, 1294987da915Sopenharmony_ci { 0x1f9, 0x1f9, -0x1, 1, 6, 0 }, 1295987da915Sopenharmony_ci { 0x23a, 0x23a, 0x2a2b, 1, 6, 0 }, 1296987da915Sopenharmony_ci { 0x23e, 0x23e, 0x2a28, 1, 6, 0 }, 1297987da915Sopenharmony_ci { 0x26b, 0x26b, 0x29f7, 1, 6, 0 }, 1298987da915Sopenharmony_ci { 0x27d, 0x27d, 0x29e7, 1, 6, 0 }, 1299987da915Sopenharmony_ci { 0x280, 0x280, -0xda, 1, 6, 0 }, 1300987da915Sopenharmony_ci { 0x289, 0x289, -0x45, 1, 6, 0 }, 1301987da915Sopenharmony_ci { 0x28c, 0x28c, -0x47, 1, 6, 0 }, 1302987da915Sopenharmony_ci { 0x3f2, 0x3f2, 0x7, 1, 6, 0 }, 1303987da915Sopenharmony_ci { 0x4cf, 0x4cf, -0xf, 1, 6, 0 }, 1304987da915Sopenharmony_ci { 0x1d7d, 0x1d7d, 0xee6, 1, 6, 0 }, 1305987da915Sopenharmony_ci { 0x1fb3, 0x1fb3, 0x9, 1, 6, 0 }, 1306987da915Sopenharmony_ci { 0x214e, 0x214e, -0x1c, 1, 6, 0 }, 1307987da915Sopenharmony_ci { 0x2184, 0x2184, -0x1, 1, 6, 0 }, 1308987da915Sopenharmony_ci /* from Windows 6.1 (Win7) */ 1309987da915Sopenharmony_ci { 0x23a, 0x23e, 0x0, 4, 6, 1 }, 1310987da915Sopenharmony_ci { 0x250, 0x250, 0x2a1f, 2, 6, 1 }, 1311987da915Sopenharmony_ci { 0x251, 0x251, 0x2a1c, 2, 6, 1 }, 1312987da915Sopenharmony_ci { 0x271, 0x271, 0x29fd, 2, 6, 1 }, 1313987da915Sopenharmony_ci { 0x371, 0x373, -0x1, 2, 6, 1 }, 1314987da915Sopenharmony_ci { 0x377, 0x377, -0x1, 2, 6, 1 }, 1315987da915Sopenharmony_ci { 0x3c2, 0x3c2, 0x0, 2, 6, 1 }, 1316987da915Sopenharmony_ci { 0x3d7, 0x3d7, -0x8, 2, 6, 1 }, 1317987da915Sopenharmony_ci { 0x515, 0x523, -0x1, 2, 6, 1 }, 1318987da915Sopenharmony_ci /* below, -0x75fc stands for 0x8a04 and truncation */ 1319987da915Sopenharmony_ci { 0x1d79, 0x1d79, -0x75fc, 2, 6, 1 }, 1320987da915Sopenharmony_ci { 0x1efb, 0x1eff, -0x1, 2, 6, 1 }, 1321987da915Sopenharmony_ci { 0x1fc3, 0x1ff3, 0x9, 48, 6, 1 }, 1322987da915Sopenharmony_ci { 0x1fcc, 0x1ffc, 0x0, 48, 6, 1 }, 1323987da915Sopenharmony_ci { 0x2c65, 0x2c65, -0x2a2b, 2, 6, 1 }, 1324987da915Sopenharmony_ci { 0x2c66, 0x2c66, -0x2a28, 2, 6, 1 }, 1325987da915Sopenharmony_ci { 0x2c73, 0x2c73, -0x1, 2, 6, 1 }, 1326987da915Sopenharmony_ci { 0xa641, 0xa65f, -0x1, 2, 6, 1 }, 1327987da915Sopenharmony_ci { 0xa663, 0xa66d, -0x1, 2, 6, 1 }, 1328987da915Sopenharmony_ci { 0xa681, 0xa697, -0x1, 2, 6, 1 }, 1329987da915Sopenharmony_ci { 0xa723, 0xa72f, -0x1, 2, 6, 1 }, 1330987da915Sopenharmony_ci { 0xa733, 0xa76f, -0x1, 2, 6, 1 }, 1331987da915Sopenharmony_ci { 0xa77a, 0xa77c, -0x1, 2, 6, 1 }, 1332987da915Sopenharmony_ci { 0xa77f, 0xa787, -0x1, 2, 6, 1 }, 1333987da915Sopenharmony_ci { 0xa78c, 0xa78c, -0x1, 2, 6, 1 }, 1334987da915Sopenharmony_ci /* end mark */ 1335987da915Sopenharmony_ci { 0 } 1336987da915Sopenharmony_ci } ; 1337987da915Sopenharmony_ci 1338987da915Sopenharmony_ci int i, r; 1339987da915Sopenharmony_ci int k, off; 1340987da915Sopenharmony_ci const struct NEWUPPERCASE *puc; 1341987da915Sopenharmony_ci 1342987da915Sopenharmony_ci memset((char*)uc, 0, uc_len); 1343987da915Sopenharmony_ci uc_len >>= 1; 1344987da915Sopenharmony_ci if (uc_len > 65536) 1345987da915Sopenharmony_ci uc_len = 65536; 1346987da915Sopenharmony_ci for (i = 0; (u32)i < uc_len; i++) 1347987da915Sopenharmony_ci uc[i] = cpu_to_le16(i); 1348987da915Sopenharmony_ci for (r = 0; uc_run_table[r][0]; r++) { 1349987da915Sopenharmony_ci off = uc_run_table[r][2]; 1350987da915Sopenharmony_ci for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++) 1351987da915Sopenharmony_ci uc[i] = cpu_to_le16(i + off); 1352987da915Sopenharmony_ci } 1353987da915Sopenharmony_ci for (r = 0; uc_dup_table[r][0]; r++) 1354987da915Sopenharmony_ci for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2) 1355987da915Sopenharmony_ci uc[i + 1] = cpu_to_le16(i); 1356987da915Sopenharmony_ci for (r = 0; uc_byte_table[r][0]; r++) { 1357987da915Sopenharmony_ci k = uc_byte_table[r][1]; 1358987da915Sopenharmony_ci uc[uc_byte_table[r][0]] = cpu_to_le16(k); 1359987da915Sopenharmony_ci } 1360987da915Sopenharmony_ci for (r=0; newuppercase[r].first; r++) { 1361987da915Sopenharmony_ci puc = &newuppercase[r]; 1362987da915Sopenharmony_ci if ((puc->osmajor < UPCASE_MAJOR) 1363987da915Sopenharmony_ci || ((puc->osmajor == UPCASE_MAJOR) 1364987da915Sopenharmony_ci && (puc->osminor <= UPCASE_MINOR))) { 1365987da915Sopenharmony_ci off = puc->diff; 1366987da915Sopenharmony_ci for (i = puc->first; i <= puc->last; i += puc->step) 1367987da915Sopenharmony_ci uc[i] = cpu_to_le16(i + off); 1368987da915Sopenharmony_ci } 1369987da915Sopenharmony_ci } 1370987da915Sopenharmony_ci} 1371987da915Sopenharmony_ci 1372987da915Sopenharmony_ci/* 1373987da915Sopenharmony_ci * Allocate and build the default upcase table 1374987da915Sopenharmony_ci * 1375987da915Sopenharmony_ci * Returns the number of entries 1376987da915Sopenharmony_ci * 0 if failed 1377987da915Sopenharmony_ci */ 1378987da915Sopenharmony_ci 1379987da915Sopenharmony_ci#define UPCASE_LEN 65536 /* default number of entries in upcase */ 1380987da915Sopenharmony_ci 1381987da915Sopenharmony_ciu32 ntfs_upcase_build_default(ntfschar **upcase) 1382987da915Sopenharmony_ci{ 1383987da915Sopenharmony_ci u32 upcase_len = 0; 1384987da915Sopenharmony_ci 1385987da915Sopenharmony_ci *upcase = (ntfschar*)ntfs_malloc(UPCASE_LEN*2); 1386987da915Sopenharmony_ci if (*upcase) { 1387987da915Sopenharmony_ci ntfs_upcase_table_build(*upcase, UPCASE_LEN*2); 1388987da915Sopenharmony_ci upcase_len = UPCASE_LEN; 1389987da915Sopenharmony_ci } 1390987da915Sopenharmony_ci return (upcase_len); 1391987da915Sopenharmony_ci} 1392987da915Sopenharmony_ci 1393987da915Sopenharmony_ci/* 1394987da915Sopenharmony_ci * Build a table for converting to lower case 1395987da915Sopenharmony_ci * 1396987da915Sopenharmony_ci * This is only meaningful when there is a single lower case 1397987da915Sopenharmony_ci * character leading to an upper case one, and currently the 1398987da915Sopenharmony_ci * only exception is the greek letter sigma which has a single 1399987da915Sopenharmony_ci * upper case glyph (code U+03A3), but two lower case glyphs 1400987da915Sopenharmony_ci * (code U+03C3 and U+03C2, the latter to be used at the end 1401987da915Sopenharmony_ci * of a word). In the following implementation the upper case 1402987da915Sopenharmony_ci * sigma will be lowercased as U+03C3. 1403987da915Sopenharmony_ci */ 1404987da915Sopenharmony_ci 1405987da915Sopenharmony_cintfschar *ntfs_locase_table_build(const ntfschar *uc, u32 uc_cnt) 1406987da915Sopenharmony_ci{ 1407987da915Sopenharmony_ci ntfschar *lc; 1408987da915Sopenharmony_ci u32 upp; 1409987da915Sopenharmony_ci u32 i; 1410987da915Sopenharmony_ci 1411987da915Sopenharmony_ci lc = (ntfschar*)ntfs_malloc(uc_cnt*sizeof(ntfschar)); 1412987da915Sopenharmony_ci if (lc) { 1413987da915Sopenharmony_ci for (i=0; i<uc_cnt; i++) 1414987da915Sopenharmony_ci lc[i] = cpu_to_le16(i); 1415987da915Sopenharmony_ci for (i=0; i<uc_cnt; i++) { 1416987da915Sopenharmony_ci upp = le16_to_cpu(uc[i]); 1417987da915Sopenharmony_ci if ((upp != i) && (upp < uc_cnt)) 1418987da915Sopenharmony_ci lc[upp] = cpu_to_le16(i); 1419987da915Sopenharmony_ci } 1420987da915Sopenharmony_ci } else 1421987da915Sopenharmony_ci ntfs_log_error("Could not build the locase table\n"); 1422987da915Sopenharmony_ci return (lc); 1423987da915Sopenharmony_ci} 1424987da915Sopenharmony_ci 1425987da915Sopenharmony_ci/** 1426987da915Sopenharmony_ci * ntfs_str2ucs - convert a string to a valid NTFS file name 1427987da915Sopenharmony_ci * @s: input string 1428987da915Sopenharmony_ci * @len: length of output buffer in Unicode characters 1429987da915Sopenharmony_ci * 1430987da915Sopenharmony_ci * Convert the input @s string into the corresponding little endian, 1431987da915Sopenharmony_ci * 2-byte Unicode string. The length of the converted string is less 1432987da915Sopenharmony_ci * or equal to the maximum length allowed by the NTFS format (255). 1433987da915Sopenharmony_ci * 1434987da915Sopenharmony_ci * If @s is NULL then return AT_UNNAMED. 1435987da915Sopenharmony_ci * 1436987da915Sopenharmony_ci * On success the function returns the Unicode string in an allocated 1437987da915Sopenharmony_ci * buffer and the caller is responsible to free it when it's not needed 1438987da915Sopenharmony_ci * anymore. 1439987da915Sopenharmony_ci * 1440987da915Sopenharmony_ci * On error NULL is returned and errno is set to the error code. 1441987da915Sopenharmony_ci */ 1442987da915Sopenharmony_cintfschar *ntfs_str2ucs(const char *s, int *len) 1443987da915Sopenharmony_ci{ 1444987da915Sopenharmony_ci ntfschar *ucs = NULL; 1445987da915Sopenharmony_ci 1446987da915Sopenharmony_ci if (s && ((*len = ntfs_mbstoucs(s, &ucs)) == -1)) { 1447987da915Sopenharmony_ci ntfs_log_perror("Couldn't convert '%s' to Unicode", s); 1448987da915Sopenharmony_ci return NULL; 1449987da915Sopenharmony_ci } 1450987da915Sopenharmony_ci if (*len > NTFS_MAX_NAME_LEN) { 1451987da915Sopenharmony_ci free(ucs); 1452987da915Sopenharmony_ci errno = ENAMETOOLONG; 1453987da915Sopenharmony_ci return NULL; 1454987da915Sopenharmony_ci } 1455987da915Sopenharmony_ci if (!ucs || !*len) { 1456987da915Sopenharmony_ci ucs = AT_UNNAMED; 1457987da915Sopenharmony_ci *len = 0; 1458987da915Sopenharmony_ci } 1459987da915Sopenharmony_ci return ucs; 1460987da915Sopenharmony_ci} 1461987da915Sopenharmony_ci 1462987da915Sopenharmony_ci/** 1463987da915Sopenharmony_ci * ntfs_ucsfree - free memory allocated by ntfs_str2ucs() 1464987da915Sopenharmony_ci * @ucs input string to be freed 1465987da915Sopenharmony_ci * 1466987da915Sopenharmony_ci * Free memory at @ucs and which was allocated by ntfs_str2ucs. 1467987da915Sopenharmony_ci * 1468987da915Sopenharmony_ci * Return value: none. 1469987da915Sopenharmony_ci */ 1470987da915Sopenharmony_civoid ntfs_ucsfree(ntfschar *ucs) 1471987da915Sopenharmony_ci{ 1472987da915Sopenharmony_ci if (ucs && (ucs != AT_UNNAMED)) 1473987da915Sopenharmony_ci free(ucs); 1474987da915Sopenharmony_ci} 1475987da915Sopenharmony_ci 1476987da915Sopenharmony_ci/* 1477987da915Sopenharmony_ci * Check whether a name contains no chars forbidden 1478987da915Sopenharmony_ci * for DOS or Win32 use 1479987da915Sopenharmony_ci * 1480987da915Sopenharmony_ci * If @strict is TRUE, then trailing dots and spaces are forbidden. 1481987da915Sopenharmony_ci * These names are technically allowed in the Win32 namespace, but 1482987da915Sopenharmony_ci * they can be problematic. See comment for FILE_NAME_WIN32. 1483987da915Sopenharmony_ci * 1484987da915Sopenharmony_ci * If there is a bad char, errno is set to EINVAL 1485987da915Sopenharmony_ci */ 1486987da915Sopenharmony_ci 1487987da915Sopenharmony_ciBOOL ntfs_forbidden_chars(const ntfschar *name, int len, BOOL strict) 1488987da915Sopenharmony_ci{ 1489987da915Sopenharmony_ci BOOL forbidden; 1490987da915Sopenharmony_ci int ch; 1491987da915Sopenharmony_ci int i; 1492987da915Sopenharmony_ci static const u32 mainset = (1L << ('\"' - 0x20)) 1493987da915Sopenharmony_ci | (1L << ('*' - 0x20)) 1494987da915Sopenharmony_ci | (1L << ('/' - 0x20)) 1495987da915Sopenharmony_ci | (1L << (':' - 0x20)) 1496987da915Sopenharmony_ci | (1L << ('<' - 0x20)) 1497987da915Sopenharmony_ci | (1L << ('>' - 0x20)) 1498987da915Sopenharmony_ci | (1L << ('?' - 0x20)); 1499987da915Sopenharmony_ci 1500987da915Sopenharmony_ci forbidden = (len == 0) || 1501987da915Sopenharmony_ci (strict && (name[len-1] == const_cpu_to_le16(' ') || 1502987da915Sopenharmony_ci name[len-1] == const_cpu_to_le16('.'))); 1503987da915Sopenharmony_ci for (i=0; i<len; i++) { 1504987da915Sopenharmony_ci ch = le16_to_cpu(name[i]); 1505987da915Sopenharmony_ci if ((ch < 0x20) 1506987da915Sopenharmony_ci || ((ch < 0x40) 1507987da915Sopenharmony_ci && ((1L << (ch - 0x20)) & mainset)) 1508987da915Sopenharmony_ci || (ch == '\\') 1509987da915Sopenharmony_ci || (ch == '|')) 1510987da915Sopenharmony_ci forbidden = TRUE; 1511987da915Sopenharmony_ci } 1512987da915Sopenharmony_ci if (forbidden) 1513987da915Sopenharmony_ci errno = EINVAL; 1514987da915Sopenharmony_ci return (forbidden); 1515987da915Sopenharmony_ci} 1516987da915Sopenharmony_ci 1517987da915Sopenharmony_ci/* 1518987da915Sopenharmony_ci * Check whether a name contains no forbidden chars and 1519987da915Sopenharmony_ci * is not a reserved name for DOS or Win32 use 1520987da915Sopenharmony_ci * 1521987da915Sopenharmony_ci * The reserved names are CON, PRN, AUX, NUL, COM1..COM9, LPT1..LPT9 1522987da915Sopenharmony_ci * with no suffix or any suffix. 1523987da915Sopenharmony_ci * 1524987da915Sopenharmony_ci * If @strict is TRUE, then trailing dots and spaces are forbidden. 1525987da915Sopenharmony_ci * These names are technically allowed in the Win32 namespace, but 1526987da915Sopenharmony_ci * they can be problematic. See comment for FILE_NAME_WIN32. 1527987da915Sopenharmony_ci * 1528987da915Sopenharmony_ci * If the name is forbidden, errno is set to EINVAL 1529987da915Sopenharmony_ci */ 1530987da915Sopenharmony_ci 1531987da915Sopenharmony_ciBOOL ntfs_forbidden_names(ntfs_volume *vol, const ntfschar *name, int len, 1532987da915Sopenharmony_ci BOOL strict) 1533987da915Sopenharmony_ci{ 1534987da915Sopenharmony_ci BOOL forbidden; 1535987da915Sopenharmony_ci int h; 1536987da915Sopenharmony_ci static const ntfschar dot = const_cpu_to_le16('.'); 1537987da915Sopenharmony_ci static const ntfschar con[] = { const_cpu_to_le16('c'), 1538987da915Sopenharmony_ci const_cpu_to_le16('o'), const_cpu_to_le16('n') }; 1539987da915Sopenharmony_ci static const ntfschar prn[] = { const_cpu_to_le16('p'), 1540987da915Sopenharmony_ci const_cpu_to_le16('r'), const_cpu_to_le16('n') }; 1541987da915Sopenharmony_ci static const ntfschar aux[] = { const_cpu_to_le16('a'), 1542987da915Sopenharmony_ci const_cpu_to_le16('u'), const_cpu_to_le16('x') }; 1543987da915Sopenharmony_ci static const ntfschar nul[] = { const_cpu_to_le16('n'), 1544987da915Sopenharmony_ci const_cpu_to_le16('u'), const_cpu_to_le16('l') }; 1545987da915Sopenharmony_ci static const ntfschar com[] = { const_cpu_to_le16('c'), 1546987da915Sopenharmony_ci const_cpu_to_le16('o'), const_cpu_to_le16('m') }; 1547987da915Sopenharmony_ci static const ntfschar lpt[] = { const_cpu_to_le16('l'), 1548987da915Sopenharmony_ci const_cpu_to_le16('p'), const_cpu_to_le16('t') }; 1549987da915Sopenharmony_ci 1550987da915Sopenharmony_ci forbidden = ntfs_forbidden_chars(name, len, strict); 1551987da915Sopenharmony_ci if (!forbidden && (len >= 3)) { 1552987da915Sopenharmony_ci /* 1553987da915Sopenharmony_ci * Rough hash check to tell whether the first couple of chars 1554987da915Sopenharmony_ci * may be one of CO PR AU NU LP or lowercase variants. 1555987da915Sopenharmony_ci */ 1556987da915Sopenharmony_ci h = ((le16_to_cpu(name[0]) & 31)*48) 1557987da915Sopenharmony_ci ^ ((le16_to_cpu(name[1]) & 31)*165); 1558987da915Sopenharmony_ci if ((h % 23) == 17) { 1559987da915Sopenharmony_ci /* do a full check, depending on the third char */ 1560987da915Sopenharmony_ci switch (le16_to_cpu(name[2]) & ~0x20) { 1561987da915Sopenharmony_ci case 'N' : 1562987da915Sopenharmony_ci if (((len == 3) || (name[3] == dot)) 1563987da915Sopenharmony_ci && (!ntfs_ucsncasecmp(name, con, 3, 1564987da915Sopenharmony_ci vol->upcase, vol->upcase_len) 1565987da915Sopenharmony_ci || !ntfs_ucsncasecmp(name, prn, 3, 1566987da915Sopenharmony_ci vol->upcase, vol->upcase_len))) 1567987da915Sopenharmony_ci forbidden = TRUE; 1568987da915Sopenharmony_ci break; 1569987da915Sopenharmony_ci case 'X' : 1570987da915Sopenharmony_ci if (((len == 3) || (name[3] == dot)) 1571987da915Sopenharmony_ci && !ntfs_ucsncasecmp(name, aux, 3, 1572987da915Sopenharmony_ci vol->upcase, vol->upcase_len)) 1573987da915Sopenharmony_ci forbidden = TRUE; 1574987da915Sopenharmony_ci break; 1575987da915Sopenharmony_ci case 'L' : 1576987da915Sopenharmony_ci if (((len == 3) || (name[3] == dot)) 1577987da915Sopenharmony_ci && !ntfs_ucsncasecmp(name, nul, 3, 1578987da915Sopenharmony_ci vol->upcase, vol->upcase_len)) 1579987da915Sopenharmony_ci forbidden = TRUE; 1580987da915Sopenharmony_ci break; 1581987da915Sopenharmony_ci case 'M' : 1582987da915Sopenharmony_ci if ((len > 3) 1583987da915Sopenharmony_ci && (le16_to_cpu(name[3]) >= '1') 1584987da915Sopenharmony_ci && (le16_to_cpu(name[3]) <= '9') 1585987da915Sopenharmony_ci && ((len == 4) || (name[4] == dot)) 1586987da915Sopenharmony_ci && !ntfs_ucsncasecmp(name, com, 3, 1587987da915Sopenharmony_ci vol->upcase, vol->upcase_len)) 1588987da915Sopenharmony_ci forbidden = TRUE; 1589987da915Sopenharmony_ci break; 1590987da915Sopenharmony_ci case 'T' : 1591987da915Sopenharmony_ci if ((len > 3) 1592987da915Sopenharmony_ci && (le16_to_cpu(name[3]) >= '1') 1593987da915Sopenharmony_ci && (le16_to_cpu(name[3]) <= '9') 1594987da915Sopenharmony_ci && ((len == 4) || (name[4] == dot)) 1595987da915Sopenharmony_ci && !ntfs_ucsncasecmp(name, lpt, 3, 1596987da915Sopenharmony_ci vol->upcase, vol->upcase_len)) 1597987da915Sopenharmony_ci forbidden = TRUE; 1598987da915Sopenharmony_ci break; 1599987da915Sopenharmony_ci } 1600987da915Sopenharmony_ci } 1601987da915Sopenharmony_ci } 1602987da915Sopenharmony_ci 1603987da915Sopenharmony_ci if (forbidden) 1604987da915Sopenharmony_ci errno = EINVAL; 1605987da915Sopenharmony_ci return (forbidden); 1606987da915Sopenharmony_ci} 1607987da915Sopenharmony_ci 1608987da915Sopenharmony_ci/* 1609987da915Sopenharmony_ci * Check whether the same name can be used as a DOS and 1610987da915Sopenharmony_ci * a Win32 name 1611987da915Sopenharmony_ci * 1612987da915Sopenharmony_ci * The names must be the same, or the short name the uppercase 1613987da915Sopenharmony_ci * variant of the long name 1614987da915Sopenharmony_ci */ 1615987da915Sopenharmony_ci 1616987da915Sopenharmony_ciBOOL ntfs_collapsible_chars(ntfs_volume *vol, 1617987da915Sopenharmony_ci const ntfschar *shortname, int shortlen, 1618987da915Sopenharmony_ci const ntfschar *longname, int longlen) 1619987da915Sopenharmony_ci{ 1620987da915Sopenharmony_ci BOOL collapsible; 1621987da915Sopenharmony_ci unsigned int ch; 1622987da915Sopenharmony_ci unsigned int cs; 1623987da915Sopenharmony_ci int i; 1624987da915Sopenharmony_ci 1625987da915Sopenharmony_ci collapsible = shortlen == longlen; 1626987da915Sopenharmony_ci for (i=0; collapsible && (i<shortlen); i++) { 1627987da915Sopenharmony_ci ch = le16_to_cpu(longname[i]); 1628987da915Sopenharmony_ci cs = le16_to_cpu(shortname[i]); 1629987da915Sopenharmony_ci if ((cs != ch) 1630987da915Sopenharmony_ci && ((ch >= vol->upcase_len) 1631987da915Sopenharmony_ci || (cs >= vol->upcase_len) 1632987da915Sopenharmony_ci || (vol->upcase[cs] != vol->upcase[ch]))) 1633987da915Sopenharmony_ci collapsible = FALSE; 1634987da915Sopenharmony_ci } 1635987da915Sopenharmony_ci return (collapsible); 1636987da915Sopenharmony_ci} 1637987da915Sopenharmony_ci 1638987da915Sopenharmony_ci/* 1639987da915Sopenharmony_ci * Define the character encoding to be used. 1640987da915Sopenharmony_ci * Use UTF-8 unless specified otherwise. 1641987da915Sopenharmony_ci */ 1642987da915Sopenharmony_ci 1643987da915Sopenharmony_ciint ntfs_set_char_encoding(const char *locale) 1644987da915Sopenharmony_ci{ 1645987da915Sopenharmony_ci use_utf8 = 0; 1646987da915Sopenharmony_ci if (!locale || strstr(locale,"utf8") || strstr(locale,"UTF8") 1647987da915Sopenharmony_ci || strstr(locale,"utf-8") || strstr(locale,"UTF-8")) 1648987da915Sopenharmony_ci use_utf8 = 1; 1649987da915Sopenharmony_ci else 1650987da915Sopenharmony_ci if (setlocale(LC_ALL, locale)) 1651987da915Sopenharmony_ci use_utf8 = 0; 1652987da915Sopenharmony_ci else { 1653987da915Sopenharmony_ci ntfs_log_error("Invalid locale, encoding to UTF-8\n"); 1654987da915Sopenharmony_ci use_utf8 = 1; 1655987da915Sopenharmony_ci } 1656987da915Sopenharmony_ci return 0; /* always successful */ 1657987da915Sopenharmony_ci} 1658987da915Sopenharmony_ci 1659987da915Sopenharmony_ci#if defined(__APPLE__) || defined(__DARWIN__) 1660987da915Sopenharmony_ci 1661987da915Sopenharmony_ciint ntfs_macosx_normalize_filenames(int normalize) { 1662987da915Sopenharmony_ci#ifdef ENABLE_NFCONV 1663987da915Sopenharmony_ci if (normalize == 0 || normalize == 1) { 1664987da915Sopenharmony_ci nfconvert_utf8 = normalize; 1665987da915Sopenharmony_ci return 0; 1666987da915Sopenharmony_ci } 1667987da915Sopenharmony_ci else { 1668987da915Sopenharmony_ci return -1; 1669987da915Sopenharmony_ci } 1670987da915Sopenharmony_ci#else 1671987da915Sopenharmony_ci return -1; 1672987da915Sopenharmony_ci#endif /* ENABLE_NFCONV */ 1673987da915Sopenharmony_ci} 1674987da915Sopenharmony_ci 1675987da915Sopenharmony_ciint ntfs_macosx_normalize_utf8(const char *utf8_string, char **target, 1676987da915Sopenharmony_ci int composed) 1677987da915Sopenharmony_ci{ 1678987da915Sopenharmony_ci#ifdef ENABLE_NFCONV 1679987da915Sopenharmony_ci /* For this code to compile, the CoreFoundation framework must be fed to 1680987da915Sopenharmony_ci * the linker. */ 1681987da915Sopenharmony_ci CFStringRef cfSourceString; 1682987da915Sopenharmony_ci CFMutableStringRef cfMutableString; 1683987da915Sopenharmony_ci CFRange rangeToProcess; 1684987da915Sopenharmony_ci CFIndex requiredBufferLength; 1685987da915Sopenharmony_ci char *result = NULL; 1686987da915Sopenharmony_ci int resultLength = -1; 1687987da915Sopenharmony_ci 1688987da915Sopenharmony_ci /* Convert the UTF-8 string to a CFString. */ 1689987da915Sopenharmony_ci cfSourceString = CFStringCreateWithCString(kCFAllocatorDefault, 1690987da915Sopenharmony_ci utf8_string, kCFStringEncodingUTF8); 1691987da915Sopenharmony_ci if (cfSourceString == NULL) { 1692987da915Sopenharmony_ci ntfs_log_error("CFStringCreateWithCString failed!\n"); 1693987da915Sopenharmony_ci return -2; 1694987da915Sopenharmony_ci } 1695987da915Sopenharmony_ci 1696987da915Sopenharmony_ci /* Create a mutable string from cfSourceString that we are free to 1697987da915Sopenharmony_ci * modify. */ 1698987da915Sopenharmony_ci cfMutableString = CFStringCreateMutableCopy(kCFAllocatorDefault, 0, 1699987da915Sopenharmony_ci cfSourceString); 1700987da915Sopenharmony_ci CFRelease(cfSourceString); /* End-of-life. */ 1701987da915Sopenharmony_ci if (cfMutableString == NULL) { 1702987da915Sopenharmony_ci ntfs_log_error("CFStringCreateMutableCopy failed!\n"); 1703987da915Sopenharmony_ci return -3; 1704987da915Sopenharmony_ci } 1705987da915Sopenharmony_ci 1706987da915Sopenharmony_ci /* Normalize the mutable string to the desired normalization form. */ 1707987da915Sopenharmony_ci CFStringNormalize(cfMutableString, (composed != 0 ? 1708987da915Sopenharmony_ci kCFStringNormalizationFormC : kCFStringNormalizationFormD)); 1709987da915Sopenharmony_ci 1710987da915Sopenharmony_ci /* Store the resulting string in a '\0'-terminated UTF-8 encoded char* 1711987da915Sopenharmony_ci * buffer. */ 1712987da915Sopenharmony_ci rangeToProcess = CFRangeMake(0, CFStringGetLength(cfMutableString)); 1713987da915Sopenharmony_ci if (CFStringGetBytes(cfMutableString, rangeToProcess, 1714987da915Sopenharmony_ci kCFStringEncodingUTF8, 0, false, NULL, 0, 1715987da915Sopenharmony_ci &requiredBufferLength) > 0) 1716987da915Sopenharmony_ci { 1717987da915Sopenharmony_ci resultLength = sizeof(char) * (requiredBufferLength + 1); 1718987da915Sopenharmony_ci result = ntfs_calloc(resultLength); 1719987da915Sopenharmony_ci 1720987da915Sopenharmony_ci if (result != NULL) { 1721987da915Sopenharmony_ci if (CFStringGetBytes(cfMutableString, rangeToProcess, 1722987da915Sopenharmony_ci kCFStringEncodingUTF8, 0, false, 1723987da915Sopenharmony_ci (UInt8*) result, resultLength - 1, 1724987da915Sopenharmony_ci &requiredBufferLength) <= 0) 1725987da915Sopenharmony_ci { 1726987da915Sopenharmony_ci ntfs_log_error("Could not perform UTF-8 " 1727987da915Sopenharmony_ci "conversion of normalized " 1728987da915Sopenharmony_ci "CFMutableString.\n"); 1729987da915Sopenharmony_ci free(result); 1730987da915Sopenharmony_ci result = NULL; 1731987da915Sopenharmony_ci } 1732987da915Sopenharmony_ci } 1733987da915Sopenharmony_ci else { 1734987da915Sopenharmony_ci ntfs_log_error("Could not perform a ntfs_calloc of %d " 1735987da915Sopenharmony_ci "bytes for char *result.\n", resultLength); 1736987da915Sopenharmony_ci } 1737987da915Sopenharmony_ci } 1738987da915Sopenharmony_ci else { 1739987da915Sopenharmony_ci ntfs_log_error("Could not perform check for required length of " 1740987da915Sopenharmony_ci "UTF-8 conversion of normalized CFMutableString.\n"); 1741987da915Sopenharmony_ci } 1742987da915Sopenharmony_ci 1743987da915Sopenharmony_ci CFRelease(cfMutableString); 1744987da915Sopenharmony_ci 1745987da915Sopenharmony_ci if (result != NULL) { 1746987da915Sopenharmony_ci *target = result; 1747987da915Sopenharmony_ci return resultLength - 1; 1748987da915Sopenharmony_ci } 1749987da915Sopenharmony_ci else { 1750987da915Sopenharmony_ci return -1; 1751987da915Sopenharmony_ci } 1752987da915Sopenharmony_ci#else 1753987da915Sopenharmony_ci return -1; 1754987da915Sopenharmony_ci#endif /* ENABLE_NFCONV */ 1755987da915Sopenharmony_ci} 1756987da915Sopenharmony_ci#endif /* defined(__APPLE__) || defined(__DARWIN__) */ 1757