18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 28c2ecf20Sopenharmony_ci/* 38c2ecf20Sopenharmony_ci * fs/cifs/cifs_unicode.c 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * Copyright (c) International Business Machines Corp., 2000,2009 68c2ecf20Sopenharmony_ci * Modified by Steve French (sfrench@us.ibm.com) 78c2ecf20Sopenharmony_ci */ 88c2ecf20Sopenharmony_ci#include <linux/fs.h> 98c2ecf20Sopenharmony_ci#include <linux/slab.h> 108c2ecf20Sopenharmony_ci#include "cifs_fs_sb.h" 118c2ecf20Sopenharmony_ci#include "cifs_unicode.h" 128c2ecf20Sopenharmony_ci#include "cifs_uniupr.h" 138c2ecf20Sopenharmony_ci#include "cifspdu.h" 148c2ecf20Sopenharmony_ci#include "cifsglob.h" 158c2ecf20Sopenharmony_ci#include "cifs_debug.h" 168c2ecf20Sopenharmony_ci 178c2ecf20Sopenharmony_ciint cifs_remap(struct cifs_sb_info *cifs_sb) 188c2ecf20Sopenharmony_ci{ 198c2ecf20Sopenharmony_ci int map_type; 208c2ecf20Sopenharmony_ci 218c2ecf20Sopenharmony_ci if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) 228c2ecf20Sopenharmony_ci map_type = SFM_MAP_UNI_RSVD; 238c2ecf20Sopenharmony_ci else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) 248c2ecf20Sopenharmony_ci map_type = SFU_MAP_UNI_RSVD; 258c2ecf20Sopenharmony_ci else 268c2ecf20Sopenharmony_ci map_type = NO_MAP_UNI_RSVD; 278c2ecf20Sopenharmony_ci 288c2ecf20Sopenharmony_ci return map_type; 298c2ecf20Sopenharmony_ci} 308c2ecf20Sopenharmony_ci 318c2ecf20Sopenharmony_ci/* Convert character using the SFU - "Services for Unix" remapping range */ 328c2ecf20Sopenharmony_cistatic bool 338c2ecf20Sopenharmony_ciconvert_sfu_char(const __u16 src_char, char *target) 348c2ecf20Sopenharmony_ci{ 358c2ecf20Sopenharmony_ci /* 368c2ecf20Sopenharmony_ci * BB: Cannot handle remapping UNI_SLASH until all the calls to 378c2ecf20Sopenharmony_ci * build_path_from_dentry are modified, as they use slash as 388c2ecf20Sopenharmony_ci * separator. 398c2ecf20Sopenharmony_ci */ 408c2ecf20Sopenharmony_ci switch (src_char) { 418c2ecf20Sopenharmony_ci case UNI_COLON: 428c2ecf20Sopenharmony_ci *target = ':'; 438c2ecf20Sopenharmony_ci break; 448c2ecf20Sopenharmony_ci case UNI_ASTERISK: 458c2ecf20Sopenharmony_ci *target = '*'; 468c2ecf20Sopenharmony_ci break; 478c2ecf20Sopenharmony_ci case UNI_QUESTION: 488c2ecf20Sopenharmony_ci *target = '?'; 498c2ecf20Sopenharmony_ci break; 508c2ecf20Sopenharmony_ci case UNI_PIPE: 518c2ecf20Sopenharmony_ci *target = '|'; 528c2ecf20Sopenharmony_ci break; 538c2ecf20Sopenharmony_ci case UNI_GRTRTHAN: 548c2ecf20Sopenharmony_ci *target = '>'; 558c2ecf20Sopenharmony_ci break; 568c2ecf20Sopenharmony_ci case UNI_LESSTHAN: 578c2ecf20Sopenharmony_ci *target = '<'; 588c2ecf20Sopenharmony_ci break; 598c2ecf20Sopenharmony_ci default: 608c2ecf20Sopenharmony_ci return false; 618c2ecf20Sopenharmony_ci } 628c2ecf20Sopenharmony_ci return true; 638c2ecf20Sopenharmony_ci} 648c2ecf20Sopenharmony_ci 658c2ecf20Sopenharmony_ci/* Convert character using the SFM - "Services for Mac" remapping range */ 668c2ecf20Sopenharmony_cistatic bool 678c2ecf20Sopenharmony_ciconvert_sfm_char(const __u16 src_char, char *target) 688c2ecf20Sopenharmony_ci{ 698c2ecf20Sopenharmony_ci if (src_char >= 0xF001 && src_char <= 0xF01F) { 708c2ecf20Sopenharmony_ci *target = src_char - 0xF000; 718c2ecf20Sopenharmony_ci return true; 728c2ecf20Sopenharmony_ci } 738c2ecf20Sopenharmony_ci switch (src_char) { 748c2ecf20Sopenharmony_ci case SFM_COLON: 758c2ecf20Sopenharmony_ci *target = ':'; 768c2ecf20Sopenharmony_ci break; 778c2ecf20Sopenharmony_ci case SFM_DOUBLEQUOTE: 788c2ecf20Sopenharmony_ci *target = '"'; 798c2ecf20Sopenharmony_ci break; 808c2ecf20Sopenharmony_ci case SFM_ASTERISK: 818c2ecf20Sopenharmony_ci *target = '*'; 828c2ecf20Sopenharmony_ci break; 838c2ecf20Sopenharmony_ci case SFM_QUESTION: 848c2ecf20Sopenharmony_ci *target = '?'; 858c2ecf20Sopenharmony_ci break; 868c2ecf20Sopenharmony_ci case SFM_PIPE: 878c2ecf20Sopenharmony_ci *target = '|'; 888c2ecf20Sopenharmony_ci break; 898c2ecf20Sopenharmony_ci case SFM_GRTRTHAN: 908c2ecf20Sopenharmony_ci *target = '>'; 918c2ecf20Sopenharmony_ci break; 928c2ecf20Sopenharmony_ci case SFM_LESSTHAN: 938c2ecf20Sopenharmony_ci *target = '<'; 948c2ecf20Sopenharmony_ci break; 958c2ecf20Sopenharmony_ci case SFM_SPACE: 968c2ecf20Sopenharmony_ci *target = ' '; 978c2ecf20Sopenharmony_ci break; 988c2ecf20Sopenharmony_ci case SFM_PERIOD: 998c2ecf20Sopenharmony_ci *target = '.'; 1008c2ecf20Sopenharmony_ci break; 1018c2ecf20Sopenharmony_ci default: 1028c2ecf20Sopenharmony_ci return false; 1038c2ecf20Sopenharmony_ci } 1048c2ecf20Sopenharmony_ci return true; 1058c2ecf20Sopenharmony_ci} 1068c2ecf20Sopenharmony_ci 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci/* 1098c2ecf20Sopenharmony_ci * cifs_mapchar - convert a host-endian char to proper char in codepage 1108c2ecf20Sopenharmony_ci * @target - where converted character should be copied 1118c2ecf20Sopenharmony_ci * @src_char - 2 byte host-endian source character 1128c2ecf20Sopenharmony_ci * @cp - codepage to which character should be converted 1138c2ecf20Sopenharmony_ci * @map_type - How should the 7 NTFS/SMB reserved characters be mapped to UCS2? 1148c2ecf20Sopenharmony_ci * 1158c2ecf20Sopenharmony_ci * This function handles the conversion of a single character. It is the 1168c2ecf20Sopenharmony_ci * responsibility of the caller to ensure that the target buffer is large 1178c2ecf20Sopenharmony_ci * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). 1188c2ecf20Sopenharmony_ci */ 1198c2ecf20Sopenharmony_cistatic int 1208c2ecf20Sopenharmony_cicifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp, 1218c2ecf20Sopenharmony_ci int maptype) 1228c2ecf20Sopenharmony_ci{ 1238c2ecf20Sopenharmony_ci int len = 1; 1248c2ecf20Sopenharmony_ci __u16 src_char; 1258c2ecf20Sopenharmony_ci 1268c2ecf20Sopenharmony_ci src_char = *from; 1278c2ecf20Sopenharmony_ci 1288c2ecf20Sopenharmony_ci if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) 1298c2ecf20Sopenharmony_ci return len; 1308c2ecf20Sopenharmony_ci else if ((maptype == SFU_MAP_UNI_RSVD) && 1318c2ecf20Sopenharmony_ci convert_sfu_char(src_char, target)) 1328c2ecf20Sopenharmony_ci return len; 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci /* if character not one of seven in special remap set */ 1358c2ecf20Sopenharmony_ci len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); 1368c2ecf20Sopenharmony_ci if (len <= 0) 1378c2ecf20Sopenharmony_ci goto surrogate_pair; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci return len; 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_cisurrogate_pair: 1428c2ecf20Sopenharmony_ci /* convert SURROGATE_PAIR and IVS */ 1438c2ecf20Sopenharmony_ci if (strcmp(cp->charset, "utf8")) 1448c2ecf20Sopenharmony_ci goto unknown; 1458c2ecf20Sopenharmony_ci len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6); 1468c2ecf20Sopenharmony_ci if (len <= 0) 1478c2ecf20Sopenharmony_ci goto unknown; 1488c2ecf20Sopenharmony_ci return len; 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ciunknown: 1518c2ecf20Sopenharmony_ci *target = '?'; 1528c2ecf20Sopenharmony_ci len = 1; 1538c2ecf20Sopenharmony_ci return len; 1548c2ecf20Sopenharmony_ci} 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci/* 1578c2ecf20Sopenharmony_ci * cifs_from_utf16 - convert utf16le string to local charset 1588c2ecf20Sopenharmony_ci * @to - destination buffer 1598c2ecf20Sopenharmony_ci * @from - source buffer 1608c2ecf20Sopenharmony_ci * @tolen - destination buffer size (in bytes) 1618c2ecf20Sopenharmony_ci * @fromlen - source buffer size (in bytes) 1628c2ecf20Sopenharmony_ci * @codepage - codepage to which characters should be converted 1638c2ecf20Sopenharmony_ci * @mapchar - should characters be remapped according to the mapchars option? 1648c2ecf20Sopenharmony_ci * 1658c2ecf20Sopenharmony_ci * Convert a little-endian utf16le string (as sent by the server) to a string 1668c2ecf20Sopenharmony_ci * in the provided codepage. The tolen and fromlen parameters are to ensure 1678c2ecf20Sopenharmony_ci * that the code doesn't walk off of the end of the buffer (which is always 1688c2ecf20Sopenharmony_ci * a danger if the alignment of the source buffer is off). The destination 1698c2ecf20Sopenharmony_ci * string is always properly null terminated and fits in the destination 1708c2ecf20Sopenharmony_ci * buffer. Returns the length of the destination string in bytes (including 1718c2ecf20Sopenharmony_ci * null terminator). 1728c2ecf20Sopenharmony_ci * 1738c2ecf20Sopenharmony_ci * Note that some windows versions actually send multiword UTF-16 characters 1748c2ecf20Sopenharmony_ci * instead of straight UTF16-2. The linux nls routines however aren't able to 1758c2ecf20Sopenharmony_ci * deal with those characters properly. In the event that we get some of 1768c2ecf20Sopenharmony_ci * those characters, they won't be translated properly. 1778c2ecf20Sopenharmony_ci */ 1788c2ecf20Sopenharmony_ciint 1798c2ecf20Sopenharmony_cicifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, 1808c2ecf20Sopenharmony_ci const struct nls_table *codepage, int map_type) 1818c2ecf20Sopenharmony_ci{ 1828c2ecf20Sopenharmony_ci int i, charlen, safelen; 1838c2ecf20Sopenharmony_ci int outlen = 0; 1848c2ecf20Sopenharmony_ci int nullsize = nls_nullsize(codepage); 1858c2ecf20Sopenharmony_ci int fromwords = fromlen / 2; 1868c2ecf20Sopenharmony_ci char tmp[NLS_MAX_CHARSET_SIZE]; 1878c2ecf20Sopenharmony_ci __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci /* 1908c2ecf20Sopenharmony_ci * because the chars can be of varying widths, we need to take care 1918c2ecf20Sopenharmony_ci * not to overflow the destination buffer when we get close to the 1928c2ecf20Sopenharmony_ci * end of it. Until we get to this offset, we don't need to check 1938c2ecf20Sopenharmony_ci * for overflow however. 1948c2ecf20Sopenharmony_ci */ 1958c2ecf20Sopenharmony_ci safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); 1968c2ecf20Sopenharmony_ci 1978c2ecf20Sopenharmony_ci for (i = 0; i < fromwords; i++) { 1988c2ecf20Sopenharmony_ci ftmp[0] = get_unaligned_le16(&from[i]); 1998c2ecf20Sopenharmony_ci if (ftmp[0] == 0) 2008c2ecf20Sopenharmony_ci break; 2018c2ecf20Sopenharmony_ci if (i + 1 < fromwords) 2028c2ecf20Sopenharmony_ci ftmp[1] = get_unaligned_le16(&from[i + 1]); 2038c2ecf20Sopenharmony_ci else 2048c2ecf20Sopenharmony_ci ftmp[1] = 0; 2058c2ecf20Sopenharmony_ci if (i + 2 < fromwords) 2068c2ecf20Sopenharmony_ci ftmp[2] = get_unaligned_le16(&from[i + 2]); 2078c2ecf20Sopenharmony_ci else 2088c2ecf20Sopenharmony_ci ftmp[2] = 0; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci /* 2118c2ecf20Sopenharmony_ci * check to see if converting this character might make the 2128c2ecf20Sopenharmony_ci * conversion bleed into the null terminator 2138c2ecf20Sopenharmony_ci */ 2148c2ecf20Sopenharmony_ci if (outlen >= safelen) { 2158c2ecf20Sopenharmony_ci charlen = cifs_mapchar(tmp, ftmp, codepage, map_type); 2168c2ecf20Sopenharmony_ci if ((outlen + charlen) > (tolen - nullsize)) 2178c2ecf20Sopenharmony_ci break; 2188c2ecf20Sopenharmony_ci } 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci /* put converted char into 'to' buffer */ 2218c2ecf20Sopenharmony_ci charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); 2228c2ecf20Sopenharmony_ci outlen += charlen; 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci /* charlen (=bytes of UTF-8 for 1 character) 2258c2ecf20Sopenharmony_ci * 4bytes UTF-8(surrogate pair) is charlen=4 2268c2ecf20Sopenharmony_ci * (4bytes UTF-16 code) 2278c2ecf20Sopenharmony_ci * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 2288c2ecf20Sopenharmony_ci * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */ 2298c2ecf20Sopenharmony_ci if (charlen == 4) 2308c2ecf20Sopenharmony_ci i++; 2318c2ecf20Sopenharmony_ci else if (charlen >= 5) 2328c2ecf20Sopenharmony_ci /* 5-6bytes UTF-8 */ 2338c2ecf20Sopenharmony_ci i += 2; 2348c2ecf20Sopenharmony_ci } 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci /* properly null-terminate string */ 2378c2ecf20Sopenharmony_ci for (i = 0; i < nullsize; i++) 2388c2ecf20Sopenharmony_ci to[outlen++] = 0; 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci return outlen; 2418c2ecf20Sopenharmony_ci} 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci/* 2448c2ecf20Sopenharmony_ci * NAME: cifs_strtoUTF16() 2458c2ecf20Sopenharmony_ci * 2468c2ecf20Sopenharmony_ci * FUNCTION: Convert character string to unicode string 2478c2ecf20Sopenharmony_ci * 2488c2ecf20Sopenharmony_ci */ 2498c2ecf20Sopenharmony_ciint 2508c2ecf20Sopenharmony_cicifs_strtoUTF16(__le16 *to, const char *from, int len, 2518c2ecf20Sopenharmony_ci const struct nls_table *codepage) 2528c2ecf20Sopenharmony_ci{ 2538c2ecf20Sopenharmony_ci int charlen; 2548c2ecf20Sopenharmony_ci int i; 2558c2ecf20Sopenharmony_ci wchar_t wchar_to; /* needed to quiet sparse */ 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci /* special case for utf8 to handle no plane0 chars */ 2588c2ecf20Sopenharmony_ci if (!strcmp(codepage->charset, "utf8")) { 2598c2ecf20Sopenharmony_ci /* 2608c2ecf20Sopenharmony_ci * convert utf8 -> utf16, we assume we have enough space 2618c2ecf20Sopenharmony_ci * as caller should have assumed conversion does not overflow 2628c2ecf20Sopenharmony_ci * in destination len is length in wchar_t units (16bits) 2638c2ecf20Sopenharmony_ci */ 2648c2ecf20Sopenharmony_ci i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN, 2658c2ecf20Sopenharmony_ci (wchar_t *) to, len); 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci /* if success terminate and exit */ 2688c2ecf20Sopenharmony_ci if (i >= 0) 2698c2ecf20Sopenharmony_ci goto success; 2708c2ecf20Sopenharmony_ci /* 2718c2ecf20Sopenharmony_ci * if fails fall back to UCS encoding as this 2728c2ecf20Sopenharmony_ci * function should not return negative values 2738c2ecf20Sopenharmony_ci * currently can fail only if source contains 2748c2ecf20Sopenharmony_ci * invalid encoded characters 2758c2ecf20Sopenharmony_ci */ 2768c2ecf20Sopenharmony_ci } 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci for (i = 0; len && *from; i++, from += charlen, len -= charlen) { 2798c2ecf20Sopenharmony_ci charlen = codepage->char2uni(from, len, &wchar_to); 2808c2ecf20Sopenharmony_ci if (charlen < 1) { 2818c2ecf20Sopenharmony_ci cifs_dbg(VFS, "strtoUTF16: char2uni of 0x%x returned %d\n", 2828c2ecf20Sopenharmony_ci *from, charlen); 2838c2ecf20Sopenharmony_ci /* A question mark */ 2848c2ecf20Sopenharmony_ci wchar_to = 0x003f; 2858c2ecf20Sopenharmony_ci charlen = 1; 2868c2ecf20Sopenharmony_ci } 2878c2ecf20Sopenharmony_ci put_unaligned_le16(wchar_to, &to[i]); 2888c2ecf20Sopenharmony_ci } 2898c2ecf20Sopenharmony_ci 2908c2ecf20Sopenharmony_cisuccess: 2918c2ecf20Sopenharmony_ci put_unaligned_le16(0, &to[i]); 2928c2ecf20Sopenharmony_ci return i; 2938c2ecf20Sopenharmony_ci} 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci/* 2968c2ecf20Sopenharmony_ci * cifs_utf16_bytes - how long will a string be after conversion? 2978c2ecf20Sopenharmony_ci * @utf16 - pointer to input string 2988c2ecf20Sopenharmony_ci * @maxbytes - don't go past this many bytes of input string 2998c2ecf20Sopenharmony_ci * @codepage - destination codepage 3008c2ecf20Sopenharmony_ci * 3018c2ecf20Sopenharmony_ci * Walk a utf16le string and return the number of bytes that the string will 3028c2ecf20Sopenharmony_ci * be after being converted to the given charset, not including any null 3038c2ecf20Sopenharmony_ci * termination required. Don't walk past maxbytes in the source buffer. 3048c2ecf20Sopenharmony_ci */ 3058c2ecf20Sopenharmony_ciint 3068c2ecf20Sopenharmony_cicifs_utf16_bytes(const __le16 *from, int maxbytes, 3078c2ecf20Sopenharmony_ci const struct nls_table *codepage) 3088c2ecf20Sopenharmony_ci{ 3098c2ecf20Sopenharmony_ci int i; 3108c2ecf20Sopenharmony_ci int charlen, outlen = 0; 3118c2ecf20Sopenharmony_ci int maxwords = maxbytes / 2; 3128c2ecf20Sopenharmony_ci char tmp[NLS_MAX_CHARSET_SIZE]; 3138c2ecf20Sopenharmony_ci __u16 ftmp[3]; 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci for (i = 0; i < maxwords; i++) { 3168c2ecf20Sopenharmony_ci ftmp[0] = get_unaligned_le16(&from[i]); 3178c2ecf20Sopenharmony_ci if (ftmp[0] == 0) 3188c2ecf20Sopenharmony_ci break; 3198c2ecf20Sopenharmony_ci if (i + 1 < maxwords) 3208c2ecf20Sopenharmony_ci ftmp[1] = get_unaligned_le16(&from[i + 1]); 3218c2ecf20Sopenharmony_ci else 3228c2ecf20Sopenharmony_ci ftmp[1] = 0; 3238c2ecf20Sopenharmony_ci if (i + 2 < maxwords) 3248c2ecf20Sopenharmony_ci ftmp[2] = get_unaligned_le16(&from[i + 2]); 3258c2ecf20Sopenharmony_ci else 3268c2ecf20Sopenharmony_ci ftmp[2] = 0; 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD); 3298c2ecf20Sopenharmony_ci outlen += charlen; 3308c2ecf20Sopenharmony_ci } 3318c2ecf20Sopenharmony_ci 3328c2ecf20Sopenharmony_ci return outlen; 3338c2ecf20Sopenharmony_ci} 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci/* 3368c2ecf20Sopenharmony_ci * cifs_strndup_from_utf16 - copy a string from wire format to the local 3378c2ecf20Sopenharmony_ci * codepage 3388c2ecf20Sopenharmony_ci * @src - source string 3398c2ecf20Sopenharmony_ci * @maxlen - don't walk past this many bytes in the source string 3408c2ecf20Sopenharmony_ci * @is_unicode - is this a unicode string? 3418c2ecf20Sopenharmony_ci * @codepage - destination codepage 3428c2ecf20Sopenharmony_ci * 3438c2ecf20Sopenharmony_ci * Take a string given by the server, convert it to the local codepage and 3448c2ecf20Sopenharmony_ci * put it in a new buffer. Returns a pointer to the new string or NULL on 3458c2ecf20Sopenharmony_ci * error. 3468c2ecf20Sopenharmony_ci */ 3478c2ecf20Sopenharmony_cichar * 3488c2ecf20Sopenharmony_cicifs_strndup_from_utf16(const char *src, const int maxlen, 3498c2ecf20Sopenharmony_ci const bool is_unicode, const struct nls_table *codepage) 3508c2ecf20Sopenharmony_ci{ 3518c2ecf20Sopenharmony_ci int len; 3528c2ecf20Sopenharmony_ci char *dst; 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci if (is_unicode) { 3558c2ecf20Sopenharmony_ci len = cifs_utf16_bytes((__le16 *) src, maxlen, codepage); 3568c2ecf20Sopenharmony_ci len += nls_nullsize(codepage); 3578c2ecf20Sopenharmony_ci dst = kmalloc(len, GFP_KERNEL); 3588c2ecf20Sopenharmony_ci if (!dst) 3598c2ecf20Sopenharmony_ci return NULL; 3608c2ecf20Sopenharmony_ci cifs_from_utf16(dst, (__le16 *) src, len, maxlen, codepage, 3618c2ecf20Sopenharmony_ci NO_MAP_UNI_RSVD); 3628c2ecf20Sopenharmony_ci } else { 3638c2ecf20Sopenharmony_ci dst = kstrndup(src, maxlen, GFP_KERNEL); 3648c2ecf20Sopenharmony_ci } 3658c2ecf20Sopenharmony_ci 3668c2ecf20Sopenharmony_ci return dst; 3678c2ecf20Sopenharmony_ci} 3688c2ecf20Sopenharmony_ci 3698c2ecf20Sopenharmony_cistatic __le16 convert_to_sfu_char(char src_char) 3708c2ecf20Sopenharmony_ci{ 3718c2ecf20Sopenharmony_ci __le16 dest_char; 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci switch (src_char) { 3748c2ecf20Sopenharmony_ci case ':': 3758c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(UNI_COLON); 3768c2ecf20Sopenharmony_ci break; 3778c2ecf20Sopenharmony_ci case '*': 3788c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(UNI_ASTERISK); 3798c2ecf20Sopenharmony_ci break; 3808c2ecf20Sopenharmony_ci case '?': 3818c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(UNI_QUESTION); 3828c2ecf20Sopenharmony_ci break; 3838c2ecf20Sopenharmony_ci case '<': 3848c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(UNI_LESSTHAN); 3858c2ecf20Sopenharmony_ci break; 3868c2ecf20Sopenharmony_ci case '>': 3878c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(UNI_GRTRTHAN); 3888c2ecf20Sopenharmony_ci break; 3898c2ecf20Sopenharmony_ci case '|': 3908c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(UNI_PIPE); 3918c2ecf20Sopenharmony_ci break; 3928c2ecf20Sopenharmony_ci default: 3938c2ecf20Sopenharmony_ci dest_char = 0; 3948c2ecf20Sopenharmony_ci } 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci return dest_char; 3978c2ecf20Sopenharmony_ci} 3988c2ecf20Sopenharmony_ci 3998c2ecf20Sopenharmony_cistatic __le16 convert_to_sfm_char(char src_char, bool end_of_string) 4008c2ecf20Sopenharmony_ci{ 4018c2ecf20Sopenharmony_ci __le16 dest_char; 4028c2ecf20Sopenharmony_ci 4038c2ecf20Sopenharmony_ci if (src_char >= 0x01 && src_char <= 0x1F) { 4048c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(src_char + 0xF000); 4058c2ecf20Sopenharmony_ci return dest_char; 4068c2ecf20Sopenharmony_ci } 4078c2ecf20Sopenharmony_ci switch (src_char) { 4088c2ecf20Sopenharmony_ci case ':': 4098c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(SFM_COLON); 4108c2ecf20Sopenharmony_ci break; 4118c2ecf20Sopenharmony_ci case '"': 4128c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(SFM_DOUBLEQUOTE); 4138c2ecf20Sopenharmony_ci break; 4148c2ecf20Sopenharmony_ci case '*': 4158c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(SFM_ASTERISK); 4168c2ecf20Sopenharmony_ci break; 4178c2ecf20Sopenharmony_ci case '?': 4188c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(SFM_QUESTION); 4198c2ecf20Sopenharmony_ci break; 4208c2ecf20Sopenharmony_ci case '<': 4218c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(SFM_LESSTHAN); 4228c2ecf20Sopenharmony_ci break; 4238c2ecf20Sopenharmony_ci case '>': 4248c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(SFM_GRTRTHAN); 4258c2ecf20Sopenharmony_ci break; 4268c2ecf20Sopenharmony_ci case '|': 4278c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(SFM_PIPE); 4288c2ecf20Sopenharmony_ci break; 4298c2ecf20Sopenharmony_ci case '.': 4308c2ecf20Sopenharmony_ci if (end_of_string) 4318c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(SFM_PERIOD); 4328c2ecf20Sopenharmony_ci else 4338c2ecf20Sopenharmony_ci dest_char = 0; 4348c2ecf20Sopenharmony_ci break; 4358c2ecf20Sopenharmony_ci case ' ': 4368c2ecf20Sopenharmony_ci if (end_of_string) 4378c2ecf20Sopenharmony_ci dest_char = cpu_to_le16(SFM_SPACE); 4388c2ecf20Sopenharmony_ci else 4398c2ecf20Sopenharmony_ci dest_char = 0; 4408c2ecf20Sopenharmony_ci break; 4418c2ecf20Sopenharmony_ci default: 4428c2ecf20Sopenharmony_ci dest_char = 0; 4438c2ecf20Sopenharmony_ci } 4448c2ecf20Sopenharmony_ci 4458c2ecf20Sopenharmony_ci return dest_char; 4468c2ecf20Sopenharmony_ci} 4478c2ecf20Sopenharmony_ci 4488c2ecf20Sopenharmony_ci/* 4498c2ecf20Sopenharmony_ci * Convert 16 bit Unicode pathname to wire format from string in current code 4508c2ecf20Sopenharmony_ci * page. Conversion may involve remapping up the six characters that are 4518c2ecf20Sopenharmony_ci * only legal in POSIX-like OS (if they are present in the string). Path 4528c2ecf20Sopenharmony_ci * names are little endian 16 bit Unicode on the wire 4538c2ecf20Sopenharmony_ci */ 4548c2ecf20Sopenharmony_ciint 4558c2ecf20Sopenharmony_cicifsConvertToUTF16(__le16 *target, const char *source, int srclen, 4568c2ecf20Sopenharmony_ci const struct nls_table *cp, int map_chars) 4578c2ecf20Sopenharmony_ci{ 4588c2ecf20Sopenharmony_ci int i, charlen; 4598c2ecf20Sopenharmony_ci int j = 0; 4608c2ecf20Sopenharmony_ci char src_char; 4618c2ecf20Sopenharmony_ci __le16 dst_char; 4628c2ecf20Sopenharmony_ci wchar_t tmp; 4638c2ecf20Sopenharmony_ci wchar_t *wchar_to; /* UTF-16 */ 4648c2ecf20Sopenharmony_ci int ret; 4658c2ecf20Sopenharmony_ci unicode_t u; 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ci if (map_chars == NO_MAP_UNI_RSVD) 4688c2ecf20Sopenharmony_ci return cifs_strtoUTF16(target, source, PATH_MAX, cp); 4698c2ecf20Sopenharmony_ci 4708c2ecf20Sopenharmony_ci wchar_to = kzalloc(6, GFP_KERNEL); 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci for (i = 0; i < srclen; j++) { 4738c2ecf20Sopenharmony_ci src_char = source[i]; 4748c2ecf20Sopenharmony_ci charlen = 1; 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci /* check if end of string */ 4778c2ecf20Sopenharmony_ci if (src_char == 0) 4788c2ecf20Sopenharmony_ci goto ctoUTF16_out; 4798c2ecf20Sopenharmony_ci 4808c2ecf20Sopenharmony_ci /* see if we must remap this char */ 4818c2ecf20Sopenharmony_ci if (map_chars == SFU_MAP_UNI_RSVD) 4828c2ecf20Sopenharmony_ci dst_char = convert_to_sfu_char(src_char); 4838c2ecf20Sopenharmony_ci else if (map_chars == SFM_MAP_UNI_RSVD) { 4848c2ecf20Sopenharmony_ci bool end_of_string; 4858c2ecf20Sopenharmony_ci 4868c2ecf20Sopenharmony_ci /** 4878c2ecf20Sopenharmony_ci * Remap spaces and periods found at the end of every 4888c2ecf20Sopenharmony_ci * component of the path. The special cases of '.' and 4898c2ecf20Sopenharmony_ci * '..' do not need to be dealt with explicitly because 4908c2ecf20Sopenharmony_ci * they are addressed in namei.c:link_path_walk(). 4918c2ecf20Sopenharmony_ci **/ 4928c2ecf20Sopenharmony_ci if ((i == srclen - 1) || (source[i+1] == '\\')) 4938c2ecf20Sopenharmony_ci end_of_string = true; 4948c2ecf20Sopenharmony_ci else 4958c2ecf20Sopenharmony_ci end_of_string = false; 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ci dst_char = convert_to_sfm_char(src_char, end_of_string); 4988c2ecf20Sopenharmony_ci } else 4998c2ecf20Sopenharmony_ci dst_char = 0; 5008c2ecf20Sopenharmony_ci /* 5018c2ecf20Sopenharmony_ci * FIXME: We can not handle remapping backslash (UNI_SLASH) 5028c2ecf20Sopenharmony_ci * until all the calls to build_path_from_dentry are modified, 5038c2ecf20Sopenharmony_ci * as they use backslash as separator. 5048c2ecf20Sopenharmony_ci */ 5058c2ecf20Sopenharmony_ci if (dst_char == 0) { 5068c2ecf20Sopenharmony_ci charlen = cp->char2uni(source + i, srclen - i, &tmp); 5078c2ecf20Sopenharmony_ci dst_char = cpu_to_le16(tmp); 5088c2ecf20Sopenharmony_ci 5098c2ecf20Sopenharmony_ci /* 5108c2ecf20Sopenharmony_ci * if no match, use question mark, which at least in 5118c2ecf20Sopenharmony_ci * some cases serves as wild card 5128c2ecf20Sopenharmony_ci */ 5138c2ecf20Sopenharmony_ci if (charlen > 0) 5148c2ecf20Sopenharmony_ci goto ctoUTF16; 5158c2ecf20Sopenharmony_ci 5168c2ecf20Sopenharmony_ci /* convert SURROGATE_PAIR */ 5178c2ecf20Sopenharmony_ci if (strcmp(cp->charset, "utf8") || !wchar_to) 5188c2ecf20Sopenharmony_ci goto unknown; 5198c2ecf20Sopenharmony_ci if (*(source + i) & 0x80) { 5208c2ecf20Sopenharmony_ci charlen = utf8_to_utf32(source + i, 6, &u); 5218c2ecf20Sopenharmony_ci if (charlen < 0) 5228c2ecf20Sopenharmony_ci goto unknown; 5238c2ecf20Sopenharmony_ci } else 5248c2ecf20Sopenharmony_ci goto unknown; 5258c2ecf20Sopenharmony_ci ret = utf8s_to_utf16s(source + i, charlen, 5268c2ecf20Sopenharmony_ci UTF16_LITTLE_ENDIAN, 5278c2ecf20Sopenharmony_ci wchar_to, 6); 5288c2ecf20Sopenharmony_ci if (ret < 0) 5298c2ecf20Sopenharmony_ci goto unknown; 5308c2ecf20Sopenharmony_ci 5318c2ecf20Sopenharmony_ci i += charlen; 5328c2ecf20Sopenharmony_ci dst_char = cpu_to_le16(*wchar_to); 5338c2ecf20Sopenharmony_ci if (charlen <= 3) 5348c2ecf20Sopenharmony_ci /* 1-3bytes UTF-8 to 2bytes UTF-16 */ 5358c2ecf20Sopenharmony_ci put_unaligned(dst_char, &target[j]); 5368c2ecf20Sopenharmony_ci else if (charlen == 4) { 5378c2ecf20Sopenharmony_ci /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16 5388c2ecf20Sopenharmony_ci * 7-8bytes UTF-8(IVS) divided to 2 UTF-16 5398c2ecf20Sopenharmony_ci * (charlen=3+4 or 4+4) */ 5408c2ecf20Sopenharmony_ci put_unaligned(dst_char, &target[j]); 5418c2ecf20Sopenharmony_ci dst_char = cpu_to_le16(*(wchar_to + 1)); 5428c2ecf20Sopenharmony_ci j++; 5438c2ecf20Sopenharmony_ci put_unaligned(dst_char, &target[j]); 5448c2ecf20Sopenharmony_ci } else if (charlen >= 5) { 5458c2ecf20Sopenharmony_ci /* 5-6bytes UTF-8 to 6bytes UTF-16 */ 5468c2ecf20Sopenharmony_ci put_unaligned(dst_char, &target[j]); 5478c2ecf20Sopenharmony_ci dst_char = cpu_to_le16(*(wchar_to + 1)); 5488c2ecf20Sopenharmony_ci j++; 5498c2ecf20Sopenharmony_ci put_unaligned(dst_char, &target[j]); 5508c2ecf20Sopenharmony_ci dst_char = cpu_to_le16(*(wchar_to + 2)); 5518c2ecf20Sopenharmony_ci j++; 5528c2ecf20Sopenharmony_ci put_unaligned(dst_char, &target[j]); 5538c2ecf20Sopenharmony_ci } 5548c2ecf20Sopenharmony_ci continue; 5558c2ecf20Sopenharmony_ci 5568c2ecf20Sopenharmony_ciunknown: 5578c2ecf20Sopenharmony_ci dst_char = cpu_to_le16(0x003f); 5588c2ecf20Sopenharmony_ci charlen = 1; 5598c2ecf20Sopenharmony_ci } 5608c2ecf20Sopenharmony_ci 5618c2ecf20Sopenharmony_cictoUTF16: 5628c2ecf20Sopenharmony_ci /* 5638c2ecf20Sopenharmony_ci * character may take more than one byte in the source string, 5648c2ecf20Sopenharmony_ci * but will take exactly two bytes in the target string 5658c2ecf20Sopenharmony_ci */ 5668c2ecf20Sopenharmony_ci i += charlen; 5678c2ecf20Sopenharmony_ci put_unaligned(dst_char, &target[j]); 5688c2ecf20Sopenharmony_ci } 5698c2ecf20Sopenharmony_ci 5708c2ecf20Sopenharmony_cictoUTF16_out: 5718c2ecf20Sopenharmony_ci put_unaligned(0, &target[j]); /* Null terminate target unicode string */ 5728c2ecf20Sopenharmony_ci kfree(wchar_to); 5738c2ecf20Sopenharmony_ci return j; 5748c2ecf20Sopenharmony_ci} 5758c2ecf20Sopenharmony_ci 5768c2ecf20Sopenharmony_ci/* 5778c2ecf20Sopenharmony_ci * cifs_local_to_utf16_bytes - how long will a string be after conversion? 5788c2ecf20Sopenharmony_ci * @from - pointer to input string 5798c2ecf20Sopenharmony_ci * @maxbytes - don't go past this many bytes of input string 5808c2ecf20Sopenharmony_ci * @codepage - source codepage 5818c2ecf20Sopenharmony_ci * 5828c2ecf20Sopenharmony_ci * Walk a string and return the number of bytes that the string will 5838c2ecf20Sopenharmony_ci * be after being converted to the given charset, not including any null 5848c2ecf20Sopenharmony_ci * termination required. Don't walk past maxbytes in the source buffer. 5858c2ecf20Sopenharmony_ci */ 5868c2ecf20Sopenharmony_ci 5878c2ecf20Sopenharmony_cistatic int 5888c2ecf20Sopenharmony_cicifs_local_to_utf16_bytes(const char *from, int len, 5898c2ecf20Sopenharmony_ci const struct nls_table *codepage) 5908c2ecf20Sopenharmony_ci{ 5918c2ecf20Sopenharmony_ci int charlen; 5928c2ecf20Sopenharmony_ci int i; 5938c2ecf20Sopenharmony_ci wchar_t wchar_to; 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci for (i = 0; len && *from; i++, from += charlen, len -= charlen) { 5968c2ecf20Sopenharmony_ci charlen = codepage->char2uni(from, len, &wchar_to); 5978c2ecf20Sopenharmony_ci /* Failed conversion defaults to a question mark */ 5988c2ecf20Sopenharmony_ci if (charlen < 1) 5998c2ecf20Sopenharmony_ci charlen = 1; 6008c2ecf20Sopenharmony_ci } 6018c2ecf20Sopenharmony_ci return 2 * i; /* UTF16 characters are two bytes */ 6028c2ecf20Sopenharmony_ci} 6038c2ecf20Sopenharmony_ci 6048c2ecf20Sopenharmony_ci/* 6058c2ecf20Sopenharmony_ci * cifs_strndup_to_utf16 - copy a string to wire format from the local codepage 6068c2ecf20Sopenharmony_ci * @src - source string 6078c2ecf20Sopenharmony_ci * @maxlen - don't walk past this many bytes in the source string 6088c2ecf20Sopenharmony_ci * @utf16_len - the length of the allocated string in bytes (including null) 6098c2ecf20Sopenharmony_ci * @cp - source codepage 6108c2ecf20Sopenharmony_ci * @remap - map special chars 6118c2ecf20Sopenharmony_ci * 6128c2ecf20Sopenharmony_ci * Take a string convert it from the local codepage to UTF16 and 6138c2ecf20Sopenharmony_ci * put it in a new buffer. Returns a pointer to the new string or NULL on 6148c2ecf20Sopenharmony_ci * error. 6158c2ecf20Sopenharmony_ci */ 6168c2ecf20Sopenharmony_ci__le16 * 6178c2ecf20Sopenharmony_cicifs_strndup_to_utf16(const char *src, const int maxlen, int *utf16_len, 6188c2ecf20Sopenharmony_ci const struct nls_table *cp, int remap) 6198c2ecf20Sopenharmony_ci{ 6208c2ecf20Sopenharmony_ci int len; 6218c2ecf20Sopenharmony_ci __le16 *dst; 6228c2ecf20Sopenharmony_ci 6238c2ecf20Sopenharmony_ci len = cifs_local_to_utf16_bytes(src, maxlen, cp); 6248c2ecf20Sopenharmony_ci len += 2; /* NULL */ 6258c2ecf20Sopenharmony_ci dst = kmalloc(len, GFP_KERNEL); 6268c2ecf20Sopenharmony_ci if (!dst) { 6278c2ecf20Sopenharmony_ci *utf16_len = 0; 6288c2ecf20Sopenharmony_ci return NULL; 6298c2ecf20Sopenharmony_ci } 6308c2ecf20Sopenharmony_ci cifsConvertToUTF16(dst, src, strlen(src), cp, remap); 6318c2ecf20Sopenharmony_ci *utf16_len = len; 6328c2ecf20Sopenharmony_ci return dst; 6338c2ecf20Sopenharmony_ci} 634