11cb0ef41Sopenharmony_ci/* Copyright (c) 2011, 2018 Ben Noordhuis <info@bnoordhuis.nl> 21cb0ef41Sopenharmony_ci * 31cb0ef41Sopenharmony_ci * Permission to use, copy, modify, and/or distribute this software for any 41cb0ef41Sopenharmony_ci * purpose with or without fee is hereby granted, provided that the above 51cb0ef41Sopenharmony_ci * copyright notice and this permission notice appear in all copies. 61cb0ef41Sopenharmony_ci * 71cb0ef41Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 81cb0ef41Sopenharmony_ci * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 91cb0ef41Sopenharmony_ci * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 101cb0ef41Sopenharmony_ci * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 111cb0ef41Sopenharmony_ci * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 121cb0ef41Sopenharmony_ci * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 131cb0ef41Sopenharmony_ci * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 141cb0ef41Sopenharmony_ci */ 151cb0ef41Sopenharmony_ci 161cb0ef41Sopenharmony_ci/* Derived from https://github.com/bnoordhuis/punycode 171cb0ef41Sopenharmony_ci * but updated to support IDNA 2008. 181cb0ef41Sopenharmony_ci */ 191cb0ef41Sopenharmony_ci 201cb0ef41Sopenharmony_ci#include "uv.h" 211cb0ef41Sopenharmony_ci#include "idna.h" 221cb0ef41Sopenharmony_ci#include <assert.h> 231cb0ef41Sopenharmony_ci#include <string.h> 241cb0ef41Sopenharmony_ci#include <limits.h> /* UINT_MAX */ 251cb0ef41Sopenharmony_ci 261cb0ef41Sopenharmony_cistatic unsigned uv__utf8_decode1_slow(const char** p, 271cb0ef41Sopenharmony_ci const char* pe, 281cb0ef41Sopenharmony_ci unsigned a) { 291cb0ef41Sopenharmony_ci unsigned b; 301cb0ef41Sopenharmony_ci unsigned c; 311cb0ef41Sopenharmony_ci unsigned d; 321cb0ef41Sopenharmony_ci unsigned min; 331cb0ef41Sopenharmony_ci 341cb0ef41Sopenharmony_ci if (a > 0xF7) 351cb0ef41Sopenharmony_ci return -1; 361cb0ef41Sopenharmony_ci 371cb0ef41Sopenharmony_ci switch (pe - *p) { 381cb0ef41Sopenharmony_ci default: 391cb0ef41Sopenharmony_ci if (a > 0xEF) { 401cb0ef41Sopenharmony_ci min = 0x10000; 411cb0ef41Sopenharmony_ci a = a & 7; 421cb0ef41Sopenharmony_ci b = (unsigned char) *(*p)++; 431cb0ef41Sopenharmony_ci c = (unsigned char) *(*p)++; 441cb0ef41Sopenharmony_ci d = (unsigned char) *(*p)++; 451cb0ef41Sopenharmony_ci break; 461cb0ef41Sopenharmony_ci } 471cb0ef41Sopenharmony_ci /* Fall through. */ 481cb0ef41Sopenharmony_ci case 2: 491cb0ef41Sopenharmony_ci if (a > 0xDF) { 501cb0ef41Sopenharmony_ci min = 0x800; 511cb0ef41Sopenharmony_ci b = 0x80 | (a & 15); 521cb0ef41Sopenharmony_ci c = (unsigned char) *(*p)++; 531cb0ef41Sopenharmony_ci d = (unsigned char) *(*p)++; 541cb0ef41Sopenharmony_ci a = 0; 551cb0ef41Sopenharmony_ci break; 561cb0ef41Sopenharmony_ci } 571cb0ef41Sopenharmony_ci /* Fall through. */ 581cb0ef41Sopenharmony_ci case 1: 591cb0ef41Sopenharmony_ci if (a > 0xBF) { 601cb0ef41Sopenharmony_ci min = 0x80; 611cb0ef41Sopenharmony_ci b = 0x80; 621cb0ef41Sopenharmony_ci c = 0x80 | (a & 31); 631cb0ef41Sopenharmony_ci d = (unsigned char) *(*p)++; 641cb0ef41Sopenharmony_ci a = 0; 651cb0ef41Sopenharmony_ci break; 661cb0ef41Sopenharmony_ci } 671cb0ef41Sopenharmony_ci /* Fall through. */ 681cb0ef41Sopenharmony_ci case 0: 691cb0ef41Sopenharmony_ci return -1; /* Invalid continuation byte. */ 701cb0ef41Sopenharmony_ci } 711cb0ef41Sopenharmony_ci 721cb0ef41Sopenharmony_ci if (0x80 != (0xC0 & (b ^ c ^ d))) 731cb0ef41Sopenharmony_ci return -1; /* Invalid sequence. */ 741cb0ef41Sopenharmony_ci 751cb0ef41Sopenharmony_ci b &= 63; 761cb0ef41Sopenharmony_ci c &= 63; 771cb0ef41Sopenharmony_ci d &= 63; 781cb0ef41Sopenharmony_ci a = (a << 18) | (b << 12) | (c << 6) | d; 791cb0ef41Sopenharmony_ci 801cb0ef41Sopenharmony_ci if (a < min) 811cb0ef41Sopenharmony_ci return -1; /* Overlong sequence. */ 821cb0ef41Sopenharmony_ci 831cb0ef41Sopenharmony_ci if (a > 0x10FFFF) 841cb0ef41Sopenharmony_ci return -1; /* Four-byte sequence > U+10FFFF. */ 851cb0ef41Sopenharmony_ci 861cb0ef41Sopenharmony_ci if (a >= 0xD800 && a <= 0xDFFF) 871cb0ef41Sopenharmony_ci return -1; /* Surrogate pair. */ 881cb0ef41Sopenharmony_ci 891cb0ef41Sopenharmony_ci return a; 901cb0ef41Sopenharmony_ci} 911cb0ef41Sopenharmony_ci 921cb0ef41Sopenharmony_ciunsigned uv__utf8_decode1(const char** p, const char* pe) { 931cb0ef41Sopenharmony_ci unsigned a; 941cb0ef41Sopenharmony_ci 951cb0ef41Sopenharmony_ci assert(*p < pe); 961cb0ef41Sopenharmony_ci 971cb0ef41Sopenharmony_ci a = (unsigned char) *(*p)++; 981cb0ef41Sopenharmony_ci 991cb0ef41Sopenharmony_ci if (a < 128) 1001cb0ef41Sopenharmony_ci return a; /* ASCII, common case. */ 1011cb0ef41Sopenharmony_ci 1021cb0ef41Sopenharmony_ci return uv__utf8_decode1_slow(p, pe, a); 1031cb0ef41Sopenharmony_ci} 1041cb0ef41Sopenharmony_ci 1051cb0ef41Sopenharmony_cistatic int uv__idna_toascii_label(const char* s, const char* se, 1061cb0ef41Sopenharmony_ci char** d, char* de) { 1071cb0ef41Sopenharmony_ci static const char alphabet[] = "abcdefghijklmnopqrstuvwxyz0123456789"; 1081cb0ef41Sopenharmony_ci const char* ss; 1091cb0ef41Sopenharmony_ci unsigned c; 1101cb0ef41Sopenharmony_ci unsigned h; 1111cb0ef41Sopenharmony_ci unsigned k; 1121cb0ef41Sopenharmony_ci unsigned n; 1131cb0ef41Sopenharmony_ci unsigned m; 1141cb0ef41Sopenharmony_ci unsigned q; 1151cb0ef41Sopenharmony_ci unsigned t; 1161cb0ef41Sopenharmony_ci unsigned x; 1171cb0ef41Sopenharmony_ci unsigned y; 1181cb0ef41Sopenharmony_ci unsigned bias; 1191cb0ef41Sopenharmony_ci unsigned delta; 1201cb0ef41Sopenharmony_ci unsigned todo; 1211cb0ef41Sopenharmony_ci int first; 1221cb0ef41Sopenharmony_ci 1231cb0ef41Sopenharmony_ci h = 0; 1241cb0ef41Sopenharmony_ci ss = s; 1251cb0ef41Sopenharmony_ci todo = 0; 1261cb0ef41Sopenharmony_ci 1271cb0ef41Sopenharmony_ci /* Note: after this loop we've visited all UTF-8 characters and know 1281cb0ef41Sopenharmony_ci * they're legal so we no longer need to check for decode errors. 1291cb0ef41Sopenharmony_ci */ 1301cb0ef41Sopenharmony_ci while (s < se) { 1311cb0ef41Sopenharmony_ci c = uv__utf8_decode1(&s, se); 1321cb0ef41Sopenharmony_ci 1331cb0ef41Sopenharmony_ci if (c == UINT_MAX) 1341cb0ef41Sopenharmony_ci return UV_EINVAL; 1351cb0ef41Sopenharmony_ci 1361cb0ef41Sopenharmony_ci if (c < 128) 1371cb0ef41Sopenharmony_ci h++; 1381cb0ef41Sopenharmony_ci else 1391cb0ef41Sopenharmony_ci todo++; 1401cb0ef41Sopenharmony_ci } 1411cb0ef41Sopenharmony_ci 1421cb0ef41Sopenharmony_ci /* Only write "xn--" when there are non-ASCII characters. */ 1431cb0ef41Sopenharmony_ci if (todo > 0) { 1441cb0ef41Sopenharmony_ci if (*d < de) *(*d)++ = 'x'; 1451cb0ef41Sopenharmony_ci if (*d < de) *(*d)++ = 'n'; 1461cb0ef41Sopenharmony_ci if (*d < de) *(*d)++ = '-'; 1471cb0ef41Sopenharmony_ci if (*d < de) *(*d)++ = '-'; 1481cb0ef41Sopenharmony_ci } 1491cb0ef41Sopenharmony_ci 1501cb0ef41Sopenharmony_ci /* Write ASCII characters. */ 1511cb0ef41Sopenharmony_ci x = 0; 1521cb0ef41Sopenharmony_ci s = ss; 1531cb0ef41Sopenharmony_ci while (s < se) { 1541cb0ef41Sopenharmony_ci c = uv__utf8_decode1(&s, se); 1551cb0ef41Sopenharmony_ci assert(c != UINT_MAX); 1561cb0ef41Sopenharmony_ci 1571cb0ef41Sopenharmony_ci if (c > 127) 1581cb0ef41Sopenharmony_ci continue; 1591cb0ef41Sopenharmony_ci 1601cb0ef41Sopenharmony_ci if (*d < de) 1611cb0ef41Sopenharmony_ci *(*d)++ = c; 1621cb0ef41Sopenharmony_ci 1631cb0ef41Sopenharmony_ci if (++x == h) 1641cb0ef41Sopenharmony_ci break; /* Visited all ASCII characters. */ 1651cb0ef41Sopenharmony_ci } 1661cb0ef41Sopenharmony_ci 1671cb0ef41Sopenharmony_ci if (todo == 0) 1681cb0ef41Sopenharmony_ci return h; 1691cb0ef41Sopenharmony_ci 1701cb0ef41Sopenharmony_ci /* Only write separator when we've written ASCII characters first. */ 1711cb0ef41Sopenharmony_ci if (h > 0) 1721cb0ef41Sopenharmony_ci if (*d < de) 1731cb0ef41Sopenharmony_ci *(*d)++ = '-'; 1741cb0ef41Sopenharmony_ci 1751cb0ef41Sopenharmony_ci n = 128; 1761cb0ef41Sopenharmony_ci bias = 72; 1771cb0ef41Sopenharmony_ci delta = 0; 1781cb0ef41Sopenharmony_ci first = 1; 1791cb0ef41Sopenharmony_ci 1801cb0ef41Sopenharmony_ci while (todo > 0) { 1811cb0ef41Sopenharmony_ci m = -1; 1821cb0ef41Sopenharmony_ci s = ss; 1831cb0ef41Sopenharmony_ci 1841cb0ef41Sopenharmony_ci while (s < se) { 1851cb0ef41Sopenharmony_ci c = uv__utf8_decode1(&s, se); 1861cb0ef41Sopenharmony_ci assert(c != UINT_MAX); 1871cb0ef41Sopenharmony_ci 1881cb0ef41Sopenharmony_ci if (c >= n) 1891cb0ef41Sopenharmony_ci if (c < m) 1901cb0ef41Sopenharmony_ci m = c; 1911cb0ef41Sopenharmony_ci } 1921cb0ef41Sopenharmony_ci 1931cb0ef41Sopenharmony_ci x = m - n; 1941cb0ef41Sopenharmony_ci y = h + 1; 1951cb0ef41Sopenharmony_ci 1961cb0ef41Sopenharmony_ci if (x > ~delta / y) 1971cb0ef41Sopenharmony_ci return UV_E2BIG; /* Overflow. */ 1981cb0ef41Sopenharmony_ci 1991cb0ef41Sopenharmony_ci delta += x * y; 2001cb0ef41Sopenharmony_ci n = m; 2011cb0ef41Sopenharmony_ci 2021cb0ef41Sopenharmony_ci s = ss; 2031cb0ef41Sopenharmony_ci while (s < se) { 2041cb0ef41Sopenharmony_ci c = uv__utf8_decode1(&s, se); 2051cb0ef41Sopenharmony_ci assert(c != UINT_MAX); 2061cb0ef41Sopenharmony_ci 2071cb0ef41Sopenharmony_ci if (c < n) 2081cb0ef41Sopenharmony_ci if (++delta == 0) 2091cb0ef41Sopenharmony_ci return UV_E2BIG; /* Overflow. */ 2101cb0ef41Sopenharmony_ci 2111cb0ef41Sopenharmony_ci if (c != n) 2121cb0ef41Sopenharmony_ci continue; 2131cb0ef41Sopenharmony_ci 2141cb0ef41Sopenharmony_ci for (k = 36, q = delta; /* empty */; k += 36) { 2151cb0ef41Sopenharmony_ci t = 1; 2161cb0ef41Sopenharmony_ci 2171cb0ef41Sopenharmony_ci if (k > bias) 2181cb0ef41Sopenharmony_ci t = k - bias; 2191cb0ef41Sopenharmony_ci 2201cb0ef41Sopenharmony_ci if (t > 26) 2211cb0ef41Sopenharmony_ci t = 26; 2221cb0ef41Sopenharmony_ci 2231cb0ef41Sopenharmony_ci if (q < t) 2241cb0ef41Sopenharmony_ci break; 2251cb0ef41Sopenharmony_ci 2261cb0ef41Sopenharmony_ci /* TODO(bnoordhuis) Since 1 <= t <= 26 and therefore 2271cb0ef41Sopenharmony_ci * 10 <= y <= 35, we can optimize the long division 2281cb0ef41Sopenharmony_ci * into a table-based reciprocal multiplication. 2291cb0ef41Sopenharmony_ci */ 2301cb0ef41Sopenharmony_ci x = q - t; 2311cb0ef41Sopenharmony_ci y = 36 - t; /* 10 <= y <= 35 since 1 <= t <= 26. */ 2321cb0ef41Sopenharmony_ci q = x / y; 2331cb0ef41Sopenharmony_ci t = t + x % y; /* 1 <= t <= 35 because of y. */ 2341cb0ef41Sopenharmony_ci 2351cb0ef41Sopenharmony_ci if (*d < de) 2361cb0ef41Sopenharmony_ci *(*d)++ = alphabet[t]; 2371cb0ef41Sopenharmony_ci } 2381cb0ef41Sopenharmony_ci 2391cb0ef41Sopenharmony_ci if (*d < de) 2401cb0ef41Sopenharmony_ci *(*d)++ = alphabet[q]; 2411cb0ef41Sopenharmony_ci 2421cb0ef41Sopenharmony_ci delta /= 2; 2431cb0ef41Sopenharmony_ci 2441cb0ef41Sopenharmony_ci if (first) { 2451cb0ef41Sopenharmony_ci delta /= 350; 2461cb0ef41Sopenharmony_ci first = 0; 2471cb0ef41Sopenharmony_ci } 2481cb0ef41Sopenharmony_ci 2491cb0ef41Sopenharmony_ci /* No overflow check is needed because |delta| was just 2501cb0ef41Sopenharmony_ci * divided by 2 and |delta+delta >= delta + delta/h|. 2511cb0ef41Sopenharmony_ci */ 2521cb0ef41Sopenharmony_ci h++; 2531cb0ef41Sopenharmony_ci delta += delta / h; 2541cb0ef41Sopenharmony_ci 2551cb0ef41Sopenharmony_ci for (bias = 0; delta > 35 * 26 / 2; bias += 36) 2561cb0ef41Sopenharmony_ci delta /= 35; 2571cb0ef41Sopenharmony_ci 2581cb0ef41Sopenharmony_ci bias += 36 * delta / (delta + 38); 2591cb0ef41Sopenharmony_ci delta = 0; 2601cb0ef41Sopenharmony_ci todo--; 2611cb0ef41Sopenharmony_ci } 2621cb0ef41Sopenharmony_ci 2631cb0ef41Sopenharmony_ci delta++; 2641cb0ef41Sopenharmony_ci n++; 2651cb0ef41Sopenharmony_ci } 2661cb0ef41Sopenharmony_ci 2671cb0ef41Sopenharmony_ci return 0; 2681cb0ef41Sopenharmony_ci} 2691cb0ef41Sopenharmony_ci 2701cb0ef41Sopenharmony_cilong uv__idna_toascii(const char* s, const char* se, char* d, char* de) { 2711cb0ef41Sopenharmony_ci const char* si; 2721cb0ef41Sopenharmony_ci const char* st; 2731cb0ef41Sopenharmony_ci unsigned c; 2741cb0ef41Sopenharmony_ci char* ds; 2751cb0ef41Sopenharmony_ci int rc; 2761cb0ef41Sopenharmony_ci 2771cb0ef41Sopenharmony_ci if (s == se) 2781cb0ef41Sopenharmony_ci return UV_EINVAL; 2791cb0ef41Sopenharmony_ci 2801cb0ef41Sopenharmony_ci ds = d; 2811cb0ef41Sopenharmony_ci 2821cb0ef41Sopenharmony_ci si = s; 2831cb0ef41Sopenharmony_ci while (si < se) { 2841cb0ef41Sopenharmony_ci st = si; 2851cb0ef41Sopenharmony_ci c = uv__utf8_decode1(&si, se); 2861cb0ef41Sopenharmony_ci 2871cb0ef41Sopenharmony_ci if (c == UINT_MAX) 2881cb0ef41Sopenharmony_ci return UV_EINVAL; 2891cb0ef41Sopenharmony_ci 2901cb0ef41Sopenharmony_ci if (c != '.') 2911cb0ef41Sopenharmony_ci if (c != 0x3002) /* 。 */ 2921cb0ef41Sopenharmony_ci if (c != 0xFF0E) /* . */ 2931cb0ef41Sopenharmony_ci if (c != 0xFF61) /* 。 */ 2941cb0ef41Sopenharmony_ci continue; 2951cb0ef41Sopenharmony_ci 2961cb0ef41Sopenharmony_ci rc = uv__idna_toascii_label(s, st, &d, de); 2971cb0ef41Sopenharmony_ci 2981cb0ef41Sopenharmony_ci if (rc < 0) 2991cb0ef41Sopenharmony_ci return rc; 3001cb0ef41Sopenharmony_ci 3011cb0ef41Sopenharmony_ci if (d < de) 3021cb0ef41Sopenharmony_ci *d++ = '.'; 3031cb0ef41Sopenharmony_ci 3041cb0ef41Sopenharmony_ci s = si; 3051cb0ef41Sopenharmony_ci } 3061cb0ef41Sopenharmony_ci 3071cb0ef41Sopenharmony_ci if (s < se) { 3081cb0ef41Sopenharmony_ci rc = uv__idna_toascii_label(s, se, &d, de); 3091cb0ef41Sopenharmony_ci 3101cb0ef41Sopenharmony_ci if (rc < 0) 3111cb0ef41Sopenharmony_ci return rc; 3121cb0ef41Sopenharmony_ci } 3131cb0ef41Sopenharmony_ci 3141cb0ef41Sopenharmony_ci if (d >= de) 3151cb0ef41Sopenharmony_ci return UV_EINVAL; 3161cb0ef41Sopenharmony_ci 3171cb0ef41Sopenharmony_ci *d++ = '\0'; 3181cb0ef41Sopenharmony_ci return d - ds; /* Number of bytes written. */ 3191cb0ef41Sopenharmony_ci} 320