11cb0ef41Sopenharmony_ci/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl>
21cb0ef41Sopenharmony_ci *
31cb0ef41Sopenharmony_ci * Permission to use, copy, modify, and/or distribute this software for any
41cb0ef41Sopenharmony_ci * purpose with or without fee is hereby granted, provided that the above
51cb0ef41Sopenharmony_ci * copyright notice and this permission notice appear in all copies.
61cb0ef41Sopenharmony_ci *
71cb0ef41Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
81cb0ef41Sopenharmony_ci * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
91cb0ef41Sopenharmony_ci * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
101cb0ef41Sopenharmony_ci * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
111cb0ef41Sopenharmony_ci * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
121cb0ef41Sopenharmony_ci * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
131cb0ef41Sopenharmony_ci * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
141cb0ef41Sopenharmony_ci */
151cb0ef41Sopenharmony_ci
161cb0ef41Sopenharmony_ci/* Derived from https://github.com/bnoordhuis/punycode
171cb0ef41Sopenharmony_ci * but updated to support IDNA 2008.
181cb0ef41Sopenharmony_ci */
191cb0ef41Sopenharmony_ci
201cb0ef41Sopenharmony_ci#include "uv.h"
211cb0ef41Sopenharmony_ci#include "idna.h"
221cb0ef41Sopenharmony_ci#include <assert.h>
231cb0ef41Sopenharmony_ci#include <string.h>
241cb0ef41Sopenharmony_ci#include <limits.h> /* UINT_MAX */
251cb0ef41Sopenharmony_ci
261cb0ef41Sopenharmony_cistatic unsigned uv__utf8_decode1_slow(const char** p,
271cb0ef41Sopenharmony_ci                                      const char* pe,
281cb0ef41Sopenharmony_ci                                      unsigned a) {
291cb0ef41Sopenharmony_ci  unsigned b;
301cb0ef41Sopenharmony_ci  unsigned c;
311cb0ef41Sopenharmony_ci  unsigned d;
321cb0ef41Sopenharmony_ci  unsigned min;
331cb0ef41Sopenharmony_ci
341cb0ef41Sopenharmony_ci  if (a > 0xF7)
351cb0ef41Sopenharmony_ci    return -1;
361cb0ef41Sopenharmony_ci
371cb0ef41Sopenharmony_ci  switch (pe - *p) {
381cb0ef41Sopenharmony_ci  default:
391cb0ef41Sopenharmony_ci    if (a > 0xEF) {
401cb0ef41Sopenharmony_ci      min = 0x10000;
411cb0ef41Sopenharmony_ci      a = a & 7;
421cb0ef41Sopenharmony_ci      b = (unsigned char) *(*p)++;
431cb0ef41Sopenharmony_ci      c = (unsigned char) *(*p)++;
441cb0ef41Sopenharmony_ci      d = (unsigned char) *(*p)++;
451cb0ef41Sopenharmony_ci      break;
461cb0ef41Sopenharmony_ci    }
471cb0ef41Sopenharmony_ci    /* Fall through. */
481cb0ef41Sopenharmony_ci  case 2:
491cb0ef41Sopenharmony_ci    if (a > 0xDF) {
501cb0ef41Sopenharmony_ci      min = 0x800;
511cb0ef41Sopenharmony_ci      b = 0x80 | (a & 15);
521cb0ef41Sopenharmony_ci      c = (unsigned char) *(*p)++;
531cb0ef41Sopenharmony_ci      d = (unsigned char) *(*p)++;
541cb0ef41Sopenharmony_ci      a = 0;
551cb0ef41Sopenharmony_ci      break;
561cb0ef41Sopenharmony_ci    }
571cb0ef41Sopenharmony_ci    /* Fall through. */
581cb0ef41Sopenharmony_ci  case 1:
591cb0ef41Sopenharmony_ci    if (a > 0xBF) {
601cb0ef41Sopenharmony_ci      min = 0x80;
611cb0ef41Sopenharmony_ci      b = 0x80;
621cb0ef41Sopenharmony_ci      c = 0x80 | (a & 31);
631cb0ef41Sopenharmony_ci      d = (unsigned char) *(*p)++;
641cb0ef41Sopenharmony_ci      a = 0;
651cb0ef41Sopenharmony_ci      break;
661cb0ef41Sopenharmony_ci    }
671cb0ef41Sopenharmony_ci    /* Fall through. */
681cb0ef41Sopenharmony_ci  case 0:
691cb0ef41Sopenharmony_ci    return -1;  /* Invalid continuation byte. */
701cb0ef41Sopenharmony_ci  }
711cb0ef41Sopenharmony_ci
721cb0ef41Sopenharmony_ci  if (0x80 != (0xC0 & (b ^ c ^ d)))
731cb0ef41Sopenharmony_ci    return -1;  /* Invalid sequence. */
741cb0ef41Sopenharmony_ci
751cb0ef41Sopenharmony_ci  b &= 63;
761cb0ef41Sopenharmony_ci  c &= 63;
771cb0ef41Sopenharmony_ci  d &= 63;
781cb0ef41Sopenharmony_ci  a = (a << 18) | (b << 12) | (c << 6) | d;
791cb0ef41Sopenharmony_ci
801cb0ef41Sopenharmony_ci  if (a < min)
811cb0ef41Sopenharmony_ci    return -1;  /* Overlong sequence. */
821cb0ef41Sopenharmony_ci
831cb0ef41Sopenharmony_ci  if (a > 0x10FFFF)
841cb0ef41Sopenharmony_ci    return -1;  /* Four-byte sequence > U+10FFFF. */
851cb0ef41Sopenharmony_ci
861cb0ef41Sopenharmony_ci  if (a >= 0xD800 && a <= 0xDFFF)
871cb0ef41Sopenharmony_ci    return -1;  /* Surrogate pair. */
881cb0ef41Sopenharmony_ci
891cb0ef41Sopenharmony_ci  return a;
901cb0ef41Sopenharmony_ci}
911cb0ef41Sopenharmony_ci
921cb0ef41Sopenharmony_ciunsigned uv__utf8_decode1(const char** p, const char* pe) {
931cb0ef41Sopenharmony_ci  unsigned a;
941cb0ef41Sopenharmony_ci
951cb0ef41Sopenharmony_ci  assert(*p < pe);
961cb0ef41Sopenharmony_ci
971cb0ef41Sopenharmony_ci  a = (unsigned char) *(*p)++;
981cb0ef41Sopenharmony_ci
991cb0ef41Sopenharmony_ci  if (a < 128)
1001cb0ef41Sopenharmony_ci    return a;  /* ASCII, common case. */
1011cb0ef41Sopenharmony_ci
1021cb0ef41Sopenharmony_ci  return uv__utf8_decode1_slow(p, pe, a);
1031cb0ef41Sopenharmony_ci}
1041cb0ef41Sopenharmony_ci
1051cb0ef41Sopenharmony_cistatic int uv__idna_toascii_label(const char* s, const char* se,
1061cb0ef41Sopenharmony_ci                                  char** d, char* de) {
1071cb0ef41Sopenharmony_ci  static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789";
1081cb0ef41Sopenharmony_ci  const char* ss;
1091cb0ef41Sopenharmony_ci  unsigned c;
1101cb0ef41Sopenharmony_ci  unsigned h;
1111cb0ef41Sopenharmony_ci  unsigned k;
1121cb0ef41Sopenharmony_ci  unsigned n;
1131cb0ef41Sopenharmony_ci  unsigned m;
1141cb0ef41Sopenharmony_ci  unsigned q;
1151cb0ef41Sopenharmony_ci  unsigned t;
1161cb0ef41Sopenharmony_ci  unsigned x;
1171cb0ef41Sopenharmony_ci  unsigned y;
1181cb0ef41Sopenharmony_ci  unsigned bias;
1191cb0ef41Sopenharmony_ci  unsigned delta;
1201cb0ef41Sopenharmony_ci  unsigned todo;
1211cb0ef41Sopenharmony_ci  int first;
1221cb0ef41Sopenharmony_ci
1231cb0ef41Sopenharmony_ci  h = 0;
1241cb0ef41Sopenharmony_ci  ss = s;
1251cb0ef41Sopenharmony_ci  todo = 0;
1261cb0ef41Sopenharmony_ci
1271cb0ef41Sopenharmony_ci  /* Note: after this loop we've visited all UTF-8 characters and know
1281cb0ef41Sopenharmony_ci   * they're legal so we no longer need to check for decode errors.
1291cb0ef41Sopenharmony_ci   */
1301cb0ef41Sopenharmony_ci  while (s < se) {
1311cb0ef41Sopenharmony_ci    c = uv__utf8_decode1(&s, se);
1321cb0ef41Sopenharmony_ci
1331cb0ef41Sopenharmony_ci    if (c == UINT_MAX)
1341cb0ef41Sopenharmony_ci      return UV_EINVAL;
1351cb0ef41Sopenharmony_ci
1361cb0ef41Sopenharmony_ci    if (c < 128)
1371cb0ef41Sopenharmony_ci      h++;
1381cb0ef41Sopenharmony_ci    else
1391cb0ef41Sopenharmony_ci      todo++;
1401cb0ef41Sopenharmony_ci  }
1411cb0ef41Sopenharmony_ci
1421cb0ef41Sopenharmony_ci  /* Only write "xn--" when there are non-ASCII characters. */
1431cb0ef41Sopenharmony_ci  if (todo > 0) {
1441cb0ef41Sopenharmony_ci    if (*d < de) *(*d)++ = 'x';
1451cb0ef41Sopenharmony_ci    if (*d < de) *(*d)++ = 'n';
1461cb0ef41Sopenharmony_ci    if (*d < de) *(*d)++ = '-';
1471cb0ef41Sopenharmony_ci    if (*d < de) *(*d)++ = '-';
1481cb0ef41Sopenharmony_ci  }
1491cb0ef41Sopenharmony_ci
1501cb0ef41Sopenharmony_ci  /* Write ASCII characters. */
1511cb0ef41Sopenharmony_ci  x = 0;
1521cb0ef41Sopenharmony_ci  s = ss;
1531cb0ef41Sopenharmony_ci  while (s < se) {
1541cb0ef41Sopenharmony_ci    c = uv__utf8_decode1(&s, se);
1551cb0ef41Sopenharmony_ci    assert(c != UINT_MAX);
1561cb0ef41Sopenharmony_ci
1571cb0ef41Sopenharmony_ci    if (c > 127)
1581cb0ef41Sopenharmony_ci      continue;
1591cb0ef41Sopenharmony_ci
1601cb0ef41Sopenharmony_ci    if (*d < de)
1611cb0ef41Sopenharmony_ci      *(*d)++ = c;
1621cb0ef41Sopenharmony_ci
1631cb0ef41Sopenharmony_ci    if (++x == h)
1641cb0ef41Sopenharmony_ci      break;  /* Visited all ASCII characters. */
1651cb0ef41Sopenharmony_ci  }
1661cb0ef41Sopenharmony_ci
1671cb0ef41Sopenharmony_ci  if (todo == 0)
1681cb0ef41Sopenharmony_ci    return h;
1691cb0ef41Sopenharmony_ci
1701cb0ef41Sopenharmony_ci  /* Only write separator when we've written ASCII characters first. */
1711cb0ef41Sopenharmony_ci  if (h > 0)
1721cb0ef41Sopenharmony_ci    if (*d < de)
1731cb0ef41Sopenharmony_ci      *(*d)++ = '-';
1741cb0ef41Sopenharmony_ci
1751cb0ef41Sopenharmony_ci  n = 128;
1761cb0ef41Sopenharmony_ci  bias = 72;
1771cb0ef41Sopenharmony_ci  delta = 0;
1781cb0ef41Sopenharmony_ci  first = 1;
1791cb0ef41Sopenharmony_ci
1801cb0ef41Sopenharmony_ci  while (todo > 0) {
1811cb0ef41Sopenharmony_ci    m = -1;
1821cb0ef41Sopenharmony_ci    s = ss;
1831cb0ef41Sopenharmony_ci
1841cb0ef41Sopenharmony_ci    while (s < se) {
1851cb0ef41Sopenharmony_ci      c = uv__utf8_decode1(&s, se);
1861cb0ef41Sopenharmony_ci      assert(c != UINT_MAX);
1871cb0ef41Sopenharmony_ci
1881cb0ef41Sopenharmony_ci      if (c >= n)
1891cb0ef41Sopenharmony_ci        if (c < m)
1901cb0ef41Sopenharmony_ci          m = c;
1911cb0ef41Sopenharmony_ci    }
1921cb0ef41Sopenharmony_ci
1931cb0ef41Sopenharmony_ci    x = m - n;
1941cb0ef41Sopenharmony_ci    y = h + 1;
1951cb0ef41Sopenharmony_ci
1961cb0ef41Sopenharmony_ci    if (x > ~delta / y)
1971cb0ef41Sopenharmony_ci      return UV_E2BIG;  /* Overflow. */
1981cb0ef41Sopenharmony_ci
1991cb0ef41Sopenharmony_ci    delta += x * y;
2001cb0ef41Sopenharmony_ci    n = m;
2011cb0ef41Sopenharmony_ci
2021cb0ef41Sopenharmony_ci    s = ss;
2031cb0ef41Sopenharmony_ci    while (s < se) {
2041cb0ef41Sopenharmony_ci      c = uv__utf8_decode1(&s, se);
2051cb0ef41Sopenharmony_ci      assert(c != UINT_MAX);
2061cb0ef41Sopenharmony_ci
2071cb0ef41Sopenharmony_ci      if (c < n)
2081cb0ef41Sopenharmony_ci        if (++delta == 0)
2091cb0ef41Sopenharmony_ci          return UV_E2BIG;  /* Overflow. */
2101cb0ef41Sopenharmony_ci
2111cb0ef41Sopenharmony_ci      if (c != n)
2121cb0ef41Sopenharmony_ci        continue;
2131cb0ef41Sopenharmony_ci
2141cb0ef41Sopenharmony_ci      for (k = 36, q = delta; /* empty */; k += 36) {
2151cb0ef41Sopenharmony_ci        t = 1;
2161cb0ef41Sopenharmony_ci
2171cb0ef41Sopenharmony_ci        if (k > bias)
2181cb0ef41Sopenharmony_ci          t = k - bias;
2191cb0ef41Sopenharmony_ci
2201cb0ef41Sopenharmony_ci        if (t > 26)
2211cb0ef41Sopenharmony_ci          t = 26;
2221cb0ef41Sopenharmony_ci
2231cb0ef41Sopenharmony_ci        if (q < t)
2241cb0ef41Sopenharmony_ci          break;
2251cb0ef41Sopenharmony_ci
2261cb0ef41Sopenharmony_ci        /* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore
2271cb0ef41Sopenharmony_ci         * 10 <= y <= 35, we can optimize the long division
2281cb0ef41Sopenharmony_ci         * into a table-based reciprocal multiplication.
2291cb0ef41Sopenharmony_ci         */
2301cb0ef41Sopenharmony_ci        x = q - t;
2311cb0ef41Sopenharmony_ci        y = 36 - t;  /* 10 <= y <= 35 since 1 <= t <= 26. */
2321cb0ef41Sopenharmony_ci        q = x / y;
2331cb0ef41Sopenharmony_ci        t = t + x % y;  /* 1 <= t <= 35 because of y. */
2341cb0ef41Sopenharmony_ci
2351cb0ef41Sopenharmony_ci        if (*d < de)
2361cb0ef41Sopenharmony_ci          *(*d)++ = alphabet[t];
2371cb0ef41Sopenharmony_ci      }
2381cb0ef41Sopenharmony_ci
2391cb0ef41Sopenharmony_ci      if (*d < de)
2401cb0ef41Sopenharmony_ci        *(*d)++ = alphabet[q];
2411cb0ef41Sopenharmony_ci
2421cb0ef41Sopenharmony_ci      delta /= 2;
2431cb0ef41Sopenharmony_ci
2441cb0ef41Sopenharmony_ci      if (first) {
2451cb0ef41Sopenharmony_ci        delta /= 350;
2461cb0ef41Sopenharmony_ci        first = 0;
2471cb0ef41Sopenharmony_ci      }
2481cb0ef41Sopenharmony_ci
2491cb0ef41Sopenharmony_ci      /* No overflow check is needed because |delta| was just
2501cb0ef41Sopenharmony_ci       * divided by 2 and |delta+delta >= delta + delta/h|.
2511cb0ef41Sopenharmony_ci       */
2521cb0ef41Sopenharmony_ci      h++;
2531cb0ef41Sopenharmony_ci      delta += delta / h;
2541cb0ef41Sopenharmony_ci
2551cb0ef41Sopenharmony_ci      for (bias = 0; delta > 35 * 26 / 2; bias += 36)
2561cb0ef41Sopenharmony_ci        delta /= 35;
2571cb0ef41Sopenharmony_ci
2581cb0ef41Sopenharmony_ci      bias += 36 * delta / (delta + 38);
2591cb0ef41Sopenharmony_ci      delta = 0;
2601cb0ef41Sopenharmony_ci      todo--;
2611cb0ef41Sopenharmony_ci    }
2621cb0ef41Sopenharmony_ci
2631cb0ef41Sopenharmony_ci    delta++;
2641cb0ef41Sopenharmony_ci    n++;
2651cb0ef41Sopenharmony_ci  }
2661cb0ef41Sopenharmony_ci
2671cb0ef41Sopenharmony_ci  return 0;
2681cb0ef41Sopenharmony_ci}
2691cb0ef41Sopenharmony_ci
2701cb0ef41Sopenharmony_cilong uv__idna_toascii(const char* s, const char* se, char* d, char* de) {
2711cb0ef41Sopenharmony_ci  const char* si;
2721cb0ef41Sopenharmony_ci  const char* st;
2731cb0ef41Sopenharmony_ci  unsigned c;
2741cb0ef41Sopenharmony_ci  char* ds;
2751cb0ef41Sopenharmony_ci  int rc;
2761cb0ef41Sopenharmony_ci
2771cb0ef41Sopenharmony_ci  if (s == se)
2781cb0ef41Sopenharmony_ci    return UV_EINVAL;
2791cb0ef41Sopenharmony_ci
2801cb0ef41Sopenharmony_ci  ds = d;
2811cb0ef41Sopenharmony_ci
2821cb0ef41Sopenharmony_ci  si = s;
2831cb0ef41Sopenharmony_ci  while (si < se) {
2841cb0ef41Sopenharmony_ci    st = si;
2851cb0ef41Sopenharmony_ci    c = uv__utf8_decode1(&si, se);
2861cb0ef41Sopenharmony_ci
2871cb0ef41Sopenharmony_ci    if (c == UINT_MAX)
2881cb0ef41Sopenharmony_ci      return UV_EINVAL;
2891cb0ef41Sopenharmony_ci
2901cb0ef41Sopenharmony_ci    if (c != '.')
2911cb0ef41Sopenharmony_ci      if (c != 0x3002)  /* 。 */
2921cb0ef41Sopenharmony_ci        if (c != 0xFF0E)  /* . */
2931cb0ef41Sopenharmony_ci          if (c != 0xFF61)  /* 。 */
2941cb0ef41Sopenharmony_ci            continue;
2951cb0ef41Sopenharmony_ci
2961cb0ef41Sopenharmony_ci    rc = uv__idna_toascii_label(s, st, &d, de);
2971cb0ef41Sopenharmony_ci
2981cb0ef41Sopenharmony_ci    if (rc < 0)
2991cb0ef41Sopenharmony_ci      return rc;
3001cb0ef41Sopenharmony_ci
3011cb0ef41Sopenharmony_ci    if (d < de)
3021cb0ef41Sopenharmony_ci      *d++ = '.';
3031cb0ef41Sopenharmony_ci
3041cb0ef41Sopenharmony_ci    s = si;
3051cb0ef41Sopenharmony_ci  }
3061cb0ef41Sopenharmony_ci
3071cb0ef41Sopenharmony_ci  if (s < se) {
3081cb0ef41Sopenharmony_ci    rc = uv__idna_toascii_label(s, se, &d, de);
3091cb0ef41Sopenharmony_ci
3101cb0ef41Sopenharmony_ci    if (rc < 0)
3111cb0ef41Sopenharmony_ci      return rc;
3121cb0ef41Sopenharmony_ci  }
3131cb0ef41Sopenharmony_ci
3141cb0ef41Sopenharmony_ci  if (d >= de)
3151cb0ef41Sopenharmony_ci    return UV_EINVAL;
3161cb0ef41Sopenharmony_ci
3171cb0ef41Sopenharmony_ci  *d++ = '\0';
3181cb0ef41Sopenharmony_ci  return d - ds;  /* Number of bytes written. */
3191cb0ef41Sopenharmony_ci}
320