113498266Sopenharmony_ci/***************************************************************************
213498266Sopenharmony_ci *                                  _   _ ____  _
313498266Sopenharmony_ci *  Project                     ___| | | |  _ \| |
413498266Sopenharmony_ci *                             / __| | | | |_) | |
513498266Sopenharmony_ci *                            | (__| |_| |  _ <| |___
613498266Sopenharmony_ci *                             \___|\___/|_| \_\_____|
713498266Sopenharmony_ci *
813498266Sopenharmony_ci * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
913498266Sopenharmony_ci *
1013498266Sopenharmony_ci * This software is licensed as described in the file COPYING, which
1113498266Sopenharmony_ci * you should have received as part of this distribution. The terms
1213498266Sopenharmony_ci * are also available at https://curl.se/docs/copyright.html.
1313498266Sopenharmony_ci *
1413498266Sopenharmony_ci * You may opt to use, copy, modify, merge, publish, distribute and/or sell
1513498266Sopenharmony_ci * copies of the Software, and permit persons to whom the Software is
1613498266Sopenharmony_ci * furnished to do so, under the terms of the COPYING file.
1713498266Sopenharmony_ci *
1813498266Sopenharmony_ci * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
1913498266Sopenharmony_ci * KIND, either express or implied.
2013498266Sopenharmony_ci *
2113498266Sopenharmony_ci * SPDX-License-Identifier: curl
2213498266Sopenharmony_ci *
2313498266Sopenharmony_ci ***************************************************************************/
2413498266Sopenharmony_ci
2513498266Sopenharmony_ci /*
2613498266Sopenharmony_ci  * IDN conversions
2713498266Sopenharmony_ci  */
2813498266Sopenharmony_ci
2913498266Sopenharmony_ci#include "curl_setup.h"
3013498266Sopenharmony_ci#include "urldata.h"
3113498266Sopenharmony_ci#include "idn.h"
3213498266Sopenharmony_ci#include "sendf.h"
3313498266Sopenharmony_ci#include "curl_multibyte.h"
3413498266Sopenharmony_ci#include "warnless.h"
3513498266Sopenharmony_ci
3613498266Sopenharmony_ci#ifdef USE_LIBIDN2
3713498266Sopenharmony_ci#include <idn2.h>
3813498266Sopenharmony_ci
3913498266Sopenharmony_ci#if defined(_WIN32) && defined(UNICODE)
4013498266Sopenharmony_ci#define IDN2_LOOKUP(name, host, flags)                                  \
4113498266Sopenharmony_ci  idn2_lookup_u8((const uint8_t *)name, (uint8_t **)host, flags)
4213498266Sopenharmony_ci#else
4313498266Sopenharmony_ci#define IDN2_LOOKUP(name, host, flags)                          \
4413498266Sopenharmony_ci  idn2_lookup_ul((const char *)name, (char **)host, flags)
4513498266Sopenharmony_ci#endif
4613498266Sopenharmony_ci#endif  /* USE_LIBIDN2 */
4713498266Sopenharmony_ci
4813498266Sopenharmony_ci/* The last 3 #include files should be in this order */
4913498266Sopenharmony_ci#include "curl_printf.h"
5013498266Sopenharmony_ci#include "curl_memory.h"
5113498266Sopenharmony_ci#include "memdebug.h"
5213498266Sopenharmony_ci
5313498266Sopenharmony_ci#ifdef USE_WIN32_IDN
5413498266Sopenharmony_ci/* using Windows kernel32 and normaliz libraries. */
5513498266Sopenharmony_ci
5613498266Sopenharmony_ci#if !defined(_WIN32_WINNT) || _WIN32_WINNT < 0x600
5713498266Sopenharmony_ciWINBASEAPI int WINAPI IdnToAscii(DWORD dwFlags,
5813498266Sopenharmony_ci                                 const WCHAR *lpUnicodeCharStr,
5913498266Sopenharmony_ci                                 int cchUnicodeChar,
6013498266Sopenharmony_ci                                 WCHAR *lpASCIICharStr,
6113498266Sopenharmony_ci                                 int cchASCIIChar);
6213498266Sopenharmony_ciWINBASEAPI int WINAPI IdnToUnicode(DWORD dwFlags,
6313498266Sopenharmony_ci                                   const WCHAR *lpASCIICharStr,
6413498266Sopenharmony_ci                                   int cchASCIIChar,
6513498266Sopenharmony_ci                                   WCHAR *lpUnicodeCharStr,
6613498266Sopenharmony_ci                                   int cchUnicodeChar);
6713498266Sopenharmony_ci#endif
6813498266Sopenharmony_ci
6913498266Sopenharmony_ci#define IDN_MAX_LENGTH 255
7013498266Sopenharmony_ci
7113498266Sopenharmony_cistatic CURLcode win32_idn_to_ascii(const char *in, char **out)
7213498266Sopenharmony_ci{
7313498266Sopenharmony_ci  wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
7413498266Sopenharmony_ci  *out = NULL;
7513498266Sopenharmony_ci  if(in_w) {
7613498266Sopenharmony_ci    wchar_t punycode[IDN_MAX_LENGTH];
7713498266Sopenharmony_ci    int chars = IdnToAscii(0, in_w, (int)(wcslen(in_w) + 1), punycode,
7813498266Sopenharmony_ci                           IDN_MAX_LENGTH);
7913498266Sopenharmony_ci    curlx_unicodefree(in_w);
8013498266Sopenharmony_ci    if(chars) {
8113498266Sopenharmony_ci      char *mstr = curlx_convert_wchar_to_UTF8(punycode);
8213498266Sopenharmony_ci      if(mstr) {
8313498266Sopenharmony_ci        *out = strdup(mstr);
8413498266Sopenharmony_ci        curlx_unicodefree(mstr);
8513498266Sopenharmony_ci        if(!*out)
8613498266Sopenharmony_ci          return CURLE_OUT_OF_MEMORY;
8713498266Sopenharmony_ci      }
8813498266Sopenharmony_ci      else
8913498266Sopenharmony_ci        return CURLE_OUT_OF_MEMORY;
9013498266Sopenharmony_ci    }
9113498266Sopenharmony_ci    else
9213498266Sopenharmony_ci      return CURLE_URL_MALFORMAT;
9313498266Sopenharmony_ci  }
9413498266Sopenharmony_ci  else
9513498266Sopenharmony_ci    return CURLE_URL_MALFORMAT;
9613498266Sopenharmony_ci
9713498266Sopenharmony_ci  return CURLE_OK;
9813498266Sopenharmony_ci}
9913498266Sopenharmony_ci
10013498266Sopenharmony_cistatic CURLcode win32_ascii_to_idn(const char *in, char **output)
10113498266Sopenharmony_ci{
10213498266Sopenharmony_ci  char *out = NULL;
10313498266Sopenharmony_ci
10413498266Sopenharmony_ci  wchar_t *in_w = curlx_convert_UTF8_to_wchar(in);
10513498266Sopenharmony_ci  if(in_w) {
10613498266Sopenharmony_ci    WCHAR idn[IDN_MAX_LENGTH]; /* stores a UTF-16 string */
10713498266Sopenharmony_ci    int chars = IdnToUnicode(0, in_w, (int)(wcslen(in_w) + 1), idn,
10813498266Sopenharmony_ci                             IDN_MAX_LENGTH);
10913498266Sopenharmony_ci    if(chars) {
11013498266Sopenharmony_ci      /* 'chars' is "the number of characters retrieved" */
11113498266Sopenharmony_ci      char *mstr = curlx_convert_wchar_to_UTF8(idn);
11213498266Sopenharmony_ci      if(mstr) {
11313498266Sopenharmony_ci        out = strdup(mstr);
11413498266Sopenharmony_ci        curlx_unicodefree(mstr);
11513498266Sopenharmony_ci        if(!out)
11613498266Sopenharmony_ci          return CURLE_OUT_OF_MEMORY;
11713498266Sopenharmony_ci      }
11813498266Sopenharmony_ci    }
11913498266Sopenharmony_ci    else
12013498266Sopenharmony_ci      return CURLE_URL_MALFORMAT;
12113498266Sopenharmony_ci  }
12213498266Sopenharmony_ci  else
12313498266Sopenharmony_ci    return CURLE_URL_MALFORMAT;
12413498266Sopenharmony_ci  *output = out;
12513498266Sopenharmony_ci  return CURLE_OK;
12613498266Sopenharmony_ci}
12713498266Sopenharmony_ci
12813498266Sopenharmony_ci#endif /* USE_WIN32_IDN */
12913498266Sopenharmony_ci
13013498266Sopenharmony_ci/*
13113498266Sopenharmony_ci * Helpers for IDNA conversions.
13213498266Sopenharmony_ci */
13313498266Sopenharmony_cibool Curl_is_ASCII_name(const char *hostname)
13413498266Sopenharmony_ci{
13513498266Sopenharmony_ci  /* get an UNSIGNED local version of the pointer */
13613498266Sopenharmony_ci  const unsigned char *ch = (const unsigned char *)hostname;
13713498266Sopenharmony_ci
13813498266Sopenharmony_ci  if(!hostname) /* bad input, consider it ASCII! */
13913498266Sopenharmony_ci    return TRUE;
14013498266Sopenharmony_ci
14113498266Sopenharmony_ci  while(*ch) {
14213498266Sopenharmony_ci    if(*ch++ & 0x80)
14313498266Sopenharmony_ci      return FALSE;
14413498266Sopenharmony_ci  }
14513498266Sopenharmony_ci  return TRUE;
14613498266Sopenharmony_ci}
14713498266Sopenharmony_ci
14813498266Sopenharmony_ci#ifdef USE_IDN
14913498266Sopenharmony_ci/*
15013498266Sopenharmony_ci * Curl_idn_decode() returns an allocated IDN decoded string if it was
15113498266Sopenharmony_ci * possible. NULL on error.
15213498266Sopenharmony_ci *
15313498266Sopenharmony_ci * CURLE_URL_MALFORMAT - the host name could not be converted
15413498266Sopenharmony_ci * CURLE_OUT_OF_MEMORY - memory problem
15513498266Sopenharmony_ci *
15613498266Sopenharmony_ci */
15713498266Sopenharmony_cistatic CURLcode idn_decode(const char *input, char **output)
15813498266Sopenharmony_ci{
15913498266Sopenharmony_ci  char *decoded = NULL;
16013498266Sopenharmony_ci  CURLcode result = CURLE_OK;
16113498266Sopenharmony_ci#ifdef USE_LIBIDN2
16213498266Sopenharmony_ci  if(idn2_check_version(IDN2_VERSION)) {
16313498266Sopenharmony_ci    int flags = IDN2_NFC_INPUT
16413498266Sopenharmony_ci#if IDN2_VERSION_NUMBER >= 0x00140000
16513498266Sopenharmony_ci      /* IDN2_NFC_INPUT: Normalize input string using normalization form C.
16613498266Sopenharmony_ci         IDN2_NONTRANSITIONAL: Perform Unicode TR46 non-transitional
16713498266Sopenharmony_ci         processing. */
16813498266Sopenharmony_ci      | IDN2_NONTRANSITIONAL
16913498266Sopenharmony_ci#endif
17013498266Sopenharmony_ci      ;
17113498266Sopenharmony_ci    int rc = IDN2_LOOKUP(input, &decoded, flags);
17213498266Sopenharmony_ci    if(rc != IDN2_OK)
17313498266Sopenharmony_ci      /* fallback to TR46 Transitional mode for better IDNA2003
17413498266Sopenharmony_ci         compatibility */
17513498266Sopenharmony_ci      rc = IDN2_LOOKUP(input, &decoded, IDN2_TRANSITIONAL);
17613498266Sopenharmony_ci    if(rc != IDN2_OK)
17713498266Sopenharmony_ci      result = CURLE_URL_MALFORMAT;
17813498266Sopenharmony_ci  }
17913498266Sopenharmony_ci  else
18013498266Sopenharmony_ci    /* a too old libidn2 version */
18113498266Sopenharmony_ci    result = CURLE_NOT_BUILT_IN;
18213498266Sopenharmony_ci#elif defined(USE_WIN32_IDN)
18313498266Sopenharmony_ci  result = win32_idn_to_ascii(input, &decoded);
18413498266Sopenharmony_ci#endif
18513498266Sopenharmony_ci  if(!result)
18613498266Sopenharmony_ci    *output = decoded;
18713498266Sopenharmony_ci  return result;
18813498266Sopenharmony_ci}
18913498266Sopenharmony_ci
19013498266Sopenharmony_cistatic CURLcode idn_encode(const char *puny, char **output)
19113498266Sopenharmony_ci{
19213498266Sopenharmony_ci  char *enc = NULL;
19313498266Sopenharmony_ci#ifdef USE_LIBIDN2
19413498266Sopenharmony_ci  int rc = idn2_to_unicode_8z8z(puny, &enc, 0);
19513498266Sopenharmony_ci  if(rc != IDNA_SUCCESS)
19613498266Sopenharmony_ci    return rc == IDNA_MALLOC_ERROR ? CURLE_OUT_OF_MEMORY : CURLE_URL_MALFORMAT;
19713498266Sopenharmony_ci#elif defined(USE_WIN32_IDN)
19813498266Sopenharmony_ci  CURLcode result = win32_ascii_to_idn(puny, &enc);
19913498266Sopenharmony_ci  if(result)
20013498266Sopenharmony_ci    return result;
20113498266Sopenharmony_ci#endif
20213498266Sopenharmony_ci  *output = enc;
20313498266Sopenharmony_ci  return CURLE_OK;
20413498266Sopenharmony_ci}
20513498266Sopenharmony_ci
20613498266Sopenharmony_ciCURLcode Curl_idn_decode(const char *input, char **output)
20713498266Sopenharmony_ci{
20813498266Sopenharmony_ci  char *d = NULL;
20913498266Sopenharmony_ci  CURLcode result = idn_decode(input, &d);
21013498266Sopenharmony_ci#ifdef USE_LIBIDN2
21113498266Sopenharmony_ci  if(!result) {
21213498266Sopenharmony_ci    char *c = strdup(d);
21313498266Sopenharmony_ci    idn2_free(d);
21413498266Sopenharmony_ci    if(c)
21513498266Sopenharmony_ci      d = c;
21613498266Sopenharmony_ci    else
21713498266Sopenharmony_ci      result = CURLE_OUT_OF_MEMORY;
21813498266Sopenharmony_ci  }
21913498266Sopenharmony_ci#endif
22013498266Sopenharmony_ci  if(!result)
22113498266Sopenharmony_ci    *output = d;
22213498266Sopenharmony_ci  return result;
22313498266Sopenharmony_ci}
22413498266Sopenharmony_ci
22513498266Sopenharmony_ciCURLcode Curl_idn_encode(const char *puny, char **output)
22613498266Sopenharmony_ci{
22713498266Sopenharmony_ci  char *d = NULL;
22813498266Sopenharmony_ci  CURLcode result = idn_encode(puny, &d);
22913498266Sopenharmony_ci#ifdef USE_LIBIDN2
23013498266Sopenharmony_ci  if(!result) {
23113498266Sopenharmony_ci    char *c = strdup(d);
23213498266Sopenharmony_ci    idn2_free(d);
23313498266Sopenharmony_ci    if(c)
23413498266Sopenharmony_ci      d = c;
23513498266Sopenharmony_ci    else
23613498266Sopenharmony_ci      result = CURLE_OUT_OF_MEMORY;
23713498266Sopenharmony_ci  }
23813498266Sopenharmony_ci#endif
23913498266Sopenharmony_ci  if(!result)
24013498266Sopenharmony_ci    *output = d;
24113498266Sopenharmony_ci  return result;
24213498266Sopenharmony_ci}
24313498266Sopenharmony_ci
24413498266Sopenharmony_ci/*
24513498266Sopenharmony_ci * Frees data allocated by idnconvert_hostname()
24613498266Sopenharmony_ci */
24713498266Sopenharmony_civoid Curl_free_idnconverted_hostname(struct hostname *host)
24813498266Sopenharmony_ci{
24913498266Sopenharmony_ci  if(host->encalloc) {
25013498266Sopenharmony_ci    /* must be freed with idn2_free() if allocated by libidn */
25113498266Sopenharmony_ci    Curl_idn_free(host->encalloc);
25213498266Sopenharmony_ci    host->encalloc = NULL;
25313498266Sopenharmony_ci  }
25413498266Sopenharmony_ci}
25513498266Sopenharmony_ci
25613498266Sopenharmony_ci#endif /* USE_IDN */
25713498266Sopenharmony_ci
25813498266Sopenharmony_ci/*
25913498266Sopenharmony_ci * Perform any necessary IDN conversion of hostname
26013498266Sopenharmony_ci */
26113498266Sopenharmony_ciCURLcode Curl_idnconvert_hostname(struct hostname *host)
26213498266Sopenharmony_ci{
26313498266Sopenharmony_ci  /* set the name we use to display the host name */
26413498266Sopenharmony_ci  host->dispname = host->name;
26513498266Sopenharmony_ci
26613498266Sopenharmony_ci#ifdef USE_IDN
26713498266Sopenharmony_ci  /* Check name for non-ASCII and convert hostname if we can */
26813498266Sopenharmony_ci  if(!Curl_is_ASCII_name(host->name)) {
26913498266Sopenharmony_ci    char *decoded;
27013498266Sopenharmony_ci    CURLcode result = idn_decode(host->name, &decoded);
27113498266Sopenharmony_ci    if(!result) {
27213498266Sopenharmony_ci      if(!*decoded) {
27313498266Sopenharmony_ci        /* zero length is a bad host name */
27413498266Sopenharmony_ci        Curl_idn_free(decoded);
27513498266Sopenharmony_ci        return CURLE_URL_MALFORMAT;
27613498266Sopenharmony_ci      }
27713498266Sopenharmony_ci      /* successful */
27813498266Sopenharmony_ci      host->encalloc = decoded;
27913498266Sopenharmony_ci      /* change the name pointer to point to the encoded hostname */
28013498266Sopenharmony_ci      host->name = host->encalloc;
28113498266Sopenharmony_ci    }
28213498266Sopenharmony_ci    else
28313498266Sopenharmony_ci      return result;
28413498266Sopenharmony_ci  }
28513498266Sopenharmony_ci#endif
28613498266Sopenharmony_ci  return CURLE_OK;
28713498266Sopenharmony_ci}
288