xref: /third_party/curl/lib/idn.c (revision 13498266)
1/***************************************************************************
2 *                                  _   _ ____  _
3 *  Project                     ___| | | |  _ \| |
4 *                             / __| | | | |_) | |
5 *                            | (__| |_| |  _ <| |___
6 *                             \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25 /*
26  * IDN conversions
27  */
28
29#include "curl_setup.h"
30#include "urldata.h"
31#include "idn.h"
32#include "sendf.h"
33#include "curl_multibyte.h"
34#include "warnless.h"
35
36#ifdef USE_LIBIDN2
37#include <idn2.h>
38
39#if defined(_WIN32) && defined(UNICODE)
40#define IDN2_LOOKUP(name, host, flags)                                  \
41  idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags)
42#else
43#define IDN2_LOOKUP(name, host, flags)                          \
44  idn2_lookup_ul((const char *)name, (char **)host, flags)
45#endif
46#endif  /* USE_LIBIDN2 */
47
48/* The last 3 #include files should be in this order */
49#include "curl_printf.h"
50#include "curl_memory.h"
51#include "memdebug.h"
52
53#ifdef USE_WIN32_IDN
54/* using Windows kernel32 and normaliz libraries. */
55
56#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600
57WINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags,
58                                 const WCHAR *lpUnicodeCharStr,
59                                 int cchUnicodeChar,
60                                 WCHAR *lpASCIICharStr,
61                                 int cchASCIIChar);
62WINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags,
63                                   const WCHAR *lpASCIICharStr,
64                                   int cchASCIIChar,
65                                   WCHAR *lpUnicodeCharStr,
66                                   int cchUnicodeChar);
67#endif
68
69#define IDN_MAX_LENGTH 255
70
71static CURLcode win32_idn_to_ascii(const char *in, char **out)
72{
73  wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
74  *out = NULL;
75  if(in_w) {
76    wchar_t punycode[IDN_MAX_LENGTH];
77    int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
78                           IDN_MAX_LENGTH);
79    curlx_unicodefree(in_w);
80    if(chars) {
81      char *mstr = curlx_convert_wchar_to_UTF8(punycode);
82      if(mstr) {
83        *out = strdup(mstr);
84        curlx_unicodefree(mstr);
85        if(!*out)
86          return CURLE_OUT_OF_MEMORY;
87      }
88      else
89        return CURLE_OUT_OF_MEMORY;
90    }
91    else
92      return CURLE_URL_MALFORMAT;
93  }
94  else
95    return CURLE_URL_MALFORMAT;
96
97  return CURLE_OK;
98}
99
100static CURLcode win32_ascii_to_idn(const char *in, char **output)
101{
102  char *out = NULL;
103
104  wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
105  if(in_w) {
106    WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
107    int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
108                             IDN_MAX_LENGTH);
109    if(chars) {
110      /* 'chars' is "the number of characters retrieved" */
111      char *mstr = curlx_convert_wchar_to_UTF8(idn);
112      if(mstr) {
113        out = strdup(mstr);
114        curlx_unicodefree(mstr);
115        if(!out)
116          return CURLE_OUT_OF_MEMORY;
117      }
118    }
119    else
120      return CURLE_URL_MALFORMAT;
121  }
122  else
123    return CURLE_URL_MALFORMAT;
124  *output = out;
125  return CURLE_OK;
126}
127
128#endif /* USE_WIN32_IDN */
129
130/*
131 * Helpers for IDNA conversions.
132 */
133bool Curl_is_ASCII_name(const char *hostname)
134{
135  /* get an UNSIGNED local version of the pointer */
136  const unsigned char *ch = (const unsigned char *)hostname;
137
138  if(!hostname) /* bad input, consider it ASCII! */
139    return TRUE;
140
141  while(*ch) {
142    if(*ch++ & 0x80)
143      return FALSE;
144  }
145  return TRUE;
146}
147
148#ifdef USE_IDN
149/*
150 * Curl_idn_decode() returns an allocated IDN decoded string if it was
151 * possible. NULL on error.
152 *
153 * CURLE_URL_MALFORMAT - the host name could not be converted
154 * CURLE_OUT_OF_MEMORY - memory problem
155 *
156 */
157static CURLcode idn_decode(const char *input, char **output)
158{
159  char *decoded = NULL;
160  CURLcode result = CURLE_OK;
161#ifdef USE_LIBIDN2
162  if(idn2_check_version(IDN2_VERSION)) {
163    int flags = IDN2_NFC_INPUT
164#if IDN2_VERSION_NUMBER >= 0x00140000
165      /* IDN2_NFC_INPUT: Normalize input string using normalization form C.
166         IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional
167         processing. */
168      | IDN2_NONTRANSITIONAL
169#endif
170      ;
171    int rc = IDN2_LOOKUP(input, &decoded, flags);
172    if(rc != IDN2_OK)
173      /* fallback to TR46 Transitional mode for better IDNA2003
174         compatibility */
175      rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL);
176    if(rc != IDN2_OK)
177      result = CURLE_URL_MALFORMAT;
178  }
179  else
180    /* a too old libidn2 version */
181    result = CURLE_NOT_BUILT_IN;
182#elif defined(USE_WIN32_IDN)
183  result = win32_idn_to_ascii(input, &decoded);
184#endif
185  if(!result)
186    *output = decoded;
187  return result;
188}
189
190static CURLcode idn_encode(const char *puny, char **output)
191{
192  char *enc = NULL;
193#ifdef USE_LIBIDN2
194  int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
195  if(rc != IDNA_SUCCESS)
196    return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
197#elif defined(USE_WIN32_IDN)
198  CURLcode result = win32_ascii_to_idn(puny, &enc);
199  if(result)
200    return result;
201#endif
202  *output = enc;
203  return CURLE_OK;
204}
205
206CURLcode Curl_idn_decode(const char *input, char **output)
207{
208  char *d = NULL;
209  CURLcode result = idn_decode(input, &d);
210#ifdef USE_LIBIDN2
211  if(!result) {
212    char *c = strdup(d);
213    idn2_free(d);
214    if(c)
215      d = c;
216    else
217      result = CURLE_OUT_OF_MEMORY;
218  }
219#endif
220  if(!result)
221    *output = d;
222  return result;
223}
224
225CURLcode Curl_idn_encode(const char *puny, char **output)
226{
227  char *d = NULL;
228  CURLcode result = idn_encode(puny, &d);
229#ifdef USE_LIBIDN2
230  if(!result) {
231    char *c = strdup(d);
232    idn2_free(d);
233    if(c)
234      d = c;
235    else
236      result = CURLE_OUT_OF_MEMORY;
237  }
238#endif
239  if(!result)
240    *output = d;
241  return result;
242}
243
244/*
245 * Frees data allocated by idnconvert_hostname()
246 */
247void Curl_free_idnconverted_hostname(struct hostname *host)
248{
249  if(host->encalloc) {
250    /* must be freed with idn2_free() if allocated by libidn */
251    Curl_idn_free(host->encalloc);
252    host->encalloc = NULL;
253  }
254}
255
256#endif /* USE_IDN */
257
258/*
259 * Perform any necessary IDN conversion of hostname
260 */
261CURLcode Curl_idnconvert_hostname(struct hostname *host)
262{
263  /* set the name we use to display the host name */
264  host->dispname = host->name;
265
266#ifdef USE_IDN
267  /* Check name for non-ASCII and convert hostname if we can */
268  if(!Curl_is_ASCII_name(host->name)) {
269    char *decoded;
270    CURLcode result = idn_decode(host->name, &decoded);
271    if(!result) {
272      if(!*decoded) {
273        /* zero length is a bad host name */
274        Curl_idn_free(decoded);
275        return CURLE_URL_MALFORMAT;
276      }
277      /* successful */
278      host->encalloc = decoded;
279      /* change the name pointer to point to the encoded hostname */
280      host->name = host->encalloc;
281    }
282    else
283      return result;
284  }
285#endif
286  return CURLE_OK;
287}
288