113498266Sopenharmony_ci/*************************************************************************** 213498266Sopenharmony_ci * _ _ ____ _ 313498266Sopenharmony_ci * Project ___| | | | _ \| | 413498266Sopenharmony_ci * / __| | | | |_) | | 513498266Sopenharmony_ci * | (__| |_| | _ <| |___ 613498266Sopenharmony_ci * \___|\___/|_| \_\_____| 713498266Sopenharmony_ci * 813498266Sopenharmony_ci * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. 913498266Sopenharmony_ci * 1013498266Sopenharmony_ci * This software is licensed as described in the file COPYING, which 1113498266Sopenharmony_ci * you should have received as part of this distribution. The terms 1213498266Sopenharmony_ci * are also available at https://curl.se/docs/copyright.html. 1313498266Sopenharmony_ci * 1413498266Sopenharmony_ci * You may opt to use, copy, modify, merge, publish, distribute and/or sell 1513498266Sopenharmony_ci * copies of the Software, and permit persons to whom the Software is 1613498266Sopenharmony_ci * furnished to do so, under the terms of the COPYING file. 1713498266Sopenharmony_ci * 1813498266Sopenharmony_ci * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 1913498266Sopenharmony_ci * KIND, either express or implied. 2013498266Sopenharmony_ci * 2113498266Sopenharmony_ci * SPDX-License-Identifier: curl 2213498266Sopenharmony_ci * 2313498266Sopenharmony_ci ***************************************************************************/ 2413498266Sopenharmony_ci 2513498266Sopenharmony_ci /* 2613498266Sopenharmony_ci * IDN conversions 2713498266Sopenharmony_ci */ 2813498266Sopenharmony_ci 2913498266Sopenharmony_ci#include "curl_setup.h" 3013498266Sopenharmony_ci#include "urldata.h" 3113498266Sopenharmony_ci#include "idn.h" 3213498266Sopenharmony_ci#include "sendf.h" 3313498266Sopenharmony_ci#include "curl_multibyte.h" 3413498266Sopenharmony_ci#include "warnless.h" 3513498266Sopenharmony_ci 3613498266Sopenharmony_ci#ifdef USE_LIBIDN2 3713498266Sopenharmony_ci#include <idn2.h> 3813498266Sopenharmony_ci 3913498266Sopenharmony_ci#if defined(_WIN32) && defined(UNICODE) 4013498266Sopenharmony_ci#define IDN2_LOOKUP(name, host, flags) \ 4113498266Sopenharmony_ci idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags) 4213498266Sopenharmony_ci#else 4313498266Sopenharmony_ci#define IDN2_LOOKUP(name, host, flags) \ 4413498266Sopenharmony_ci idn2_lookup_ul((const char *)name, (char **)host, flags) 4513498266Sopenharmony_ci#endif 4613498266Sopenharmony_ci#endif /* USE_LIBIDN2 */ 4713498266Sopenharmony_ci 4813498266Sopenharmony_ci/* The last 3 #include files should be in this order */ 4913498266Sopenharmony_ci#include "curl_printf.h" 5013498266Sopenharmony_ci#include "curl_memory.h" 5113498266Sopenharmony_ci#include "memdebug.h" 5213498266Sopenharmony_ci 5313498266Sopenharmony_ci#ifdef USE_WIN32_IDN 5413498266Sopenharmony_ci/* using Windows kernel32 and normaliz libraries. */ 5513498266Sopenharmony_ci 5613498266Sopenharmony_ci#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600 5713498266Sopenharmony_ciWINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags, 5813498266Sopenharmony_ci const WCHAR *lpUnicodeCharStr, 5913498266Sopenharmony_ci int cchUnicodeChar, 6013498266Sopenharmony_ci WCHAR *lpASCIICharStr, 6113498266Sopenharmony_ci int cchASCIIChar); 6213498266Sopenharmony_ciWINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags, 6313498266Sopenharmony_ci const WCHAR *lpASCIICharStr, 6413498266Sopenharmony_ci int cchASCIIChar, 6513498266Sopenharmony_ci WCHAR *lpUnicodeCharStr, 6613498266Sopenharmony_ci int cchUnicodeChar); 6713498266Sopenharmony_ci#endif 6813498266Sopenharmony_ci 6913498266Sopenharmony_ci#define IDN_MAX_LENGTH 255 7013498266Sopenharmony_ci 7113498266Sopenharmony_cistatic CURLcode win32_idn_to_ascii(const char *in, char **out) 7213498266Sopenharmony_ci{ 7313498266Sopenharmony_ci wchar_t *in_w = curlx_convert_UTF8_to_wchar(in); 7413498266Sopenharmony_ci *out = NULL; 7513498266Sopenharmony_ci if(in_w) { 7613498266Sopenharmony_ci wchar_t punycode[IDN_MAX_LENGTH]; 7713498266Sopenharmony_ci int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode, 7813498266Sopenharmony_ci IDN_MAX_LENGTH); 7913498266Sopenharmony_ci curlx_unicodefree(in_w); 8013498266Sopenharmony_ci if(chars) { 8113498266Sopenharmony_ci char *mstr = curlx_convert_wchar_to_UTF8(punycode); 8213498266Sopenharmony_ci if(mstr) { 8313498266Sopenharmony_ci *out = strdup(mstr); 8413498266Sopenharmony_ci curlx_unicodefree(mstr); 8513498266Sopenharmony_ci if(!*out) 8613498266Sopenharmony_ci return CURLE_OUT_OF_MEMORY; 8713498266Sopenharmony_ci } 8813498266Sopenharmony_ci else 8913498266Sopenharmony_ci return CURLE_OUT_OF_MEMORY; 9013498266Sopenharmony_ci } 9113498266Sopenharmony_ci else 9213498266Sopenharmony_ci return CURLE_URL_MALFORMAT; 9313498266Sopenharmony_ci } 9413498266Sopenharmony_ci else 9513498266Sopenharmony_ci return CURLE_URL_MALFORMAT; 9613498266Sopenharmony_ci 9713498266Sopenharmony_ci return CURLE_OK; 9813498266Sopenharmony_ci} 9913498266Sopenharmony_ci 10013498266Sopenharmony_cistatic CURLcode win32_ascii_to_idn(const char *in, char **output) 10113498266Sopenharmony_ci{ 10213498266Sopenharmony_ci char *out = NULL; 10313498266Sopenharmony_ci 10413498266Sopenharmony_ci wchar_t *in_w = curlx_convert_UTF8_to_wchar(in); 10513498266Sopenharmony_ci if(in_w) { 10613498266Sopenharmony_ci WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */ 10713498266Sopenharmony_ci int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn, 10813498266Sopenharmony_ci IDN_MAX_LENGTH); 10913498266Sopenharmony_ci if(chars) { 11013498266Sopenharmony_ci /* 'chars' is "the number of characters retrieved" */ 11113498266Sopenharmony_ci char *mstr = curlx_convert_wchar_to_UTF8(idn); 11213498266Sopenharmony_ci if(mstr) { 11313498266Sopenharmony_ci out = strdup(mstr); 11413498266Sopenharmony_ci curlx_unicodefree(mstr); 11513498266Sopenharmony_ci if(!out) 11613498266Sopenharmony_ci return CURLE_OUT_OF_MEMORY; 11713498266Sopenharmony_ci } 11813498266Sopenharmony_ci } 11913498266Sopenharmony_ci else 12013498266Sopenharmony_ci return CURLE_URL_MALFORMAT; 12113498266Sopenharmony_ci } 12213498266Sopenharmony_ci else 12313498266Sopenharmony_ci return CURLE_URL_MALFORMAT; 12413498266Sopenharmony_ci *output = out; 12513498266Sopenharmony_ci return CURLE_OK; 12613498266Sopenharmony_ci} 12713498266Sopenharmony_ci 12813498266Sopenharmony_ci#endif /* USE_WIN32_IDN */ 12913498266Sopenharmony_ci 13013498266Sopenharmony_ci/* 13113498266Sopenharmony_ci * Helpers for IDNA conversions. 13213498266Sopenharmony_ci */ 13313498266Sopenharmony_cibool Curl_is_ASCII_name(const char *hostname) 13413498266Sopenharmony_ci{ 13513498266Sopenharmony_ci /* get an UNSIGNED local version of the pointer */ 13613498266Sopenharmony_ci const unsigned char *ch = (const unsigned char *)hostname; 13713498266Sopenharmony_ci 13813498266Sopenharmony_ci if(!hostname) /* bad input, consider it ASCII! */ 13913498266Sopenharmony_ci return TRUE; 14013498266Sopenharmony_ci 14113498266Sopenharmony_ci while(*ch) { 14213498266Sopenharmony_ci if(*ch++ & 0x80) 14313498266Sopenharmony_ci return FALSE; 14413498266Sopenharmony_ci } 14513498266Sopenharmony_ci return TRUE; 14613498266Sopenharmony_ci} 14713498266Sopenharmony_ci 14813498266Sopenharmony_ci#ifdef USE_IDN 14913498266Sopenharmony_ci/* 15013498266Sopenharmony_ci * Curl_idn_decode() returns an allocated IDN decoded string if it was 15113498266Sopenharmony_ci * possible. NULL on error. 15213498266Sopenharmony_ci * 15313498266Sopenharmony_ci * CURLE_URL_MALFORMAT - the host name could not be converted 15413498266Sopenharmony_ci * CURLE_OUT_OF_MEMORY - memory problem 15513498266Sopenharmony_ci * 15613498266Sopenharmony_ci */ 15713498266Sopenharmony_cistatic CURLcode idn_decode(const char *input, char **output) 15813498266Sopenharmony_ci{ 15913498266Sopenharmony_ci char *decoded = NULL; 16013498266Sopenharmony_ci CURLcode result = CURLE_OK; 16113498266Sopenharmony_ci#ifdef USE_LIBIDN2 16213498266Sopenharmony_ci if(idn2_check_version(IDN2_VERSION)) { 16313498266Sopenharmony_ci int flags = IDN2_NFC_INPUT 16413498266Sopenharmony_ci#if IDN2_VERSION_NUMBER >= 0x00140000 16513498266Sopenharmony_ci /* IDN2_NFC_INPUT: Normalize input string using normalization form C. 16613498266Sopenharmony_ci IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional 16713498266Sopenharmony_ci processing. */ 16813498266Sopenharmony_ci | IDN2_NONTRANSITIONAL 16913498266Sopenharmony_ci#endif 17013498266Sopenharmony_ci ; 17113498266Sopenharmony_ci int rc = IDN2_LOOKUP(input, &decoded, flags); 17213498266Sopenharmony_ci if(rc != IDN2_OK) 17313498266Sopenharmony_ci /* fallback to TR46 Transitional mode for better IDNA2003 17413498266Sopenharmony_ci compatibility */ 17513498266Sopenharmony_ci rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL); 17613498266Sopenharmony_ci if(rc != IDN2_OK) 17713498266Sopenharmony_ci result = CURLE_URL_MALFORMAT; 17813498266Sopenharmony_ci } 17913498266Sopenharmony_ci else 18013498266Sopenharmony_ci /* a too old libidn2 version */ 18113498266Sopenharmony_ci result = CURLE_NOT_BUILT_IN; 18213498266Sopenharmony_ci#elif defined(USE_WIN32_IDN) 18313498266Sopenharmony_ci result = win32_idn_to_ascii(input, &decoded); 18413498266Sopenharmony_ci#endif 18513498266Sopenharmony_ci if(!result) 18613498266Sopenharmony_ci *output = decoded; 18713498266Sopenharmony_ci return result; 18813498266Sopenharmony_ci} 18913498266Sopenharmony_ci 19013498266Sopenharmony_cistatic CURLcode idn_encode(const char *puny, char **output) 19113498266Sopenharmony_ci{ 19213498266Sopenharmony_ci char *enc = NULL; 19313498266Sopenharmony_ci#ifdef USE_LIBIDN2 19413498266Sopenharmony_ci int rc = idn2_to_unicode_8z8z(puny, &enc, 0); 19513498266Sopenharmony_ci if(rc != IDNA_SUCCESS) 19613498266Sopenharmony_ci return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT; 19713498266Sopenharmony_ci#elif defined(USE_WIN32_IDN) 19813498266Sopenharmony_ci CURLcode result = win32_ascii_to_idn(puny, &enc); 19913498266Sopenharmony_ci if(result) 20013498266Sopenharmony_ci return result; 20113498266Sopenharmony_ci#endif 20213498266Sopenharmony_ci *output = enc; 20313498266Sopenharmony_ci return CURLE_OK; 20413498266Sopenharmony_ci} 20513498266Sopenharmony_ci 20613498266Sopenharmony_ciCURLcode Curl_idn_decode(const char *input, char **output) 20713498266Sopenharmony_ci{ 20813498266Sopenharmony_ci char *d = NULL; 20913498266Sopenharmony_ci CURLcode result = idn_decode(input, &d); 21013498266Sopenharmony_ci#ifdef USE_LIBIDN2 21113498266Sopenharmony_ci if(!result) { 21213498266Sopenharmony_ci char *c = strdup(d); 21313498266Sopenharmony_ci idn2_free(d); 21413498266Sopenharmony_ci if(c) 21513498266Sopenharmony_ci d = c; 21613498266Sopenharmony_ci else 21713498266Sopenharmony_ci result = CURLE_OUT_OF_MEMORY; 21813498266Sopenharmony_ci } 21913498266Sopenharmony_ci#endif 22013498266Sopenharmony_ci if(!result) 22113498266Sopenharmony_ci *output = d; 22213498266Sopenharmony_ci return result; 22313498266Sopenharmony_ci} 22413498266Sopenharmony_ci 22513498266Sopenharmony_ciCURLcode Curl_idn_encode(const char *puny, char **output) 22613498266Sopenharmony_ci{ 22713498266Sopenharmony_ci char *d = NULL; 22813498266Sopenharmony_ci CURLcode result = idn_encode(puny, &d); 22913498266Sopenharmony_ci#ifdef USE_LIBIDN2 23013498266Sopenharmony_ci if(!result) { 23113498266Sopenharmony_ci char *c = strdup(d); 23213498266Sopenharmony_ci idn2_free(d); 23313498266Sopenharmony_ci if(c) 23413498266Sopenharmony_ci d = c; 23513498266Sopenharmony_ci else 23613498266Sopenharmony_ci result = CURLE_OUT_OF_MEMORY; 23713498266Sopenharmony_ci } 23813498266Sopenharmony_ci#endif 23913498266Sopenharmony_ci if(!result) 24013498266Sopenharmony_ci *output = d; 24113498266Sopenharmony_ci return result; 24213498266Sopenharmony_ci} 24313498266Sopenharmony_ci 24413498266Sopenharmony_ci/* 24513498266Sopenharmony_ci * Frees data allocated by idnconvert_hostname() 24613498266Sopenharmony_ci */ 24713498266Sopenharmony_civoid Curl_free_idnconverted_hostname(struct hostname *host) 24813498266Sopenharmony_ci{ 24913498266Sopenharmony_ci if(host->encalloc) { 25013498266Sopenharmony_ci /* must be freed with idn2_free() if allocated by libidn */ 25113498266Sopenharmony_ci Curl_idn_free(host->encalloc); 25213498266Sopenharmony_ci host->encalloc = NULL; 25313498266Sopenharmony_ci } 25413498266Sopenharmony_ci} 25513498266Sopenharmony_ci 25613498266Sopenharmony_ci#endif /* USE_IDN */ 25713498266Sopenharmony_ci 25813498266Sopenharmony_ci/* 25913498266Sopenharmony_ci * Perform any necessary IDN conversion of hostname 26013498266Sopenharmony_ci */ 26113498266Sopenharmony_ciCURLcode Curl_idnconvert_hostname(struct hostname *host) 26213498266Sopenharmony_ci{ 26313498266Sopenharmony_ci /* set the name we use to display the host name */ 26413498266Sopenharmony_ci host->dispname = host->name; 26513498266Sopenharmony_ci 26613498266Sopenharmony_ci#ifdef USE_IDN 26713498266Sopenharmony_ci /* Check name for non-ASCII and convert hostname if we can */ 26813498266Sopenharmony_ci if(!Curl_is_ASCII_name(host->name)) { 26913498266Sopenharmony_ci char *decoded; 27013498266Sopenharmony_ci CURLcode result = idn_decode(host->name, &decoded); 27113498266Sopenharmony_ci if(!result) { 27213498266Sopenharmony_ci if(!*decoded) { 27313498266Sopenharmony_ci /* zero length is a bad host name */ 27413498266Sopenharmony_ci Curl_idn_free(decoded); 27513498266Sopenharmony_ci return CURLE_URL_MALFORMAT; 27613498266Sopenharmony_ci } 27713498266Sopenharmony_ci /* successful */ 27813498266Sopenharmony_ci host->encalloc = decoded; 27913498266Sopenharmony_ci /* change the name pointer to point to the encoded hostname */ 28013498266Sopenharmony_ci host->name = host->encalloc; 28113498266Sopenharmony_ci } 28213498266Sopenharmony_ci else 28313498266Sopenharmony_ci return result; 28413498266Sopenharmony_ci } 28513498266Sopenharmony_ci#endif 28613498266Sopenharmony_ci return CURLE_OK; 28713498266Sopenharmony_ci} 288