11cb0ef41Sopenharmony_ci/*
21cb0ef41Sopenharmony_ci * Copyright 2019-2022 The OpenSSL Project Authors. All Rights Reserved.
31cb0ef41Sopenharmony_ci *
41cb0ef41Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License").  You may not use
51cb0ef41Sopenharmony_ci * this file except in compliance with the License.  You can obtain a copy
61cb0ef41Sopenharmony_ci * in the file LICENSE in the source distribution or at
71cb0ef41Sopenharmony_ci * https://www.openssl.org/source/license.html
81cb0ef41Sopenharmony_ci */
91cb0ef41Sopenharmony_ci
101cb0ef41Sopenharmony_ci#include <stddef.h>
111cb0ef41Sopenharmony_ci#include <string.h>
121cb0ef41Sopenharmony_ci#include <stdio.h>
131cb0ef41Sopenharmony_ci#include <openssl/e_os2.h>
141cb0ef41Sopenharmony_ci#include "crypto/punycode.h"
151cb0ef41Sopenharmony_ci
161cb0ef41Sopenharmony_cistatic const unsigned int base = 36;
171cb0ef41Sopenharmony_cistatic const unsigned int tmin = 1;
181cb0ef41Sopenharmony_cistatic const unsigned int tmax = 26;
191cb0ef41Sopenharmony_cistatic const unsigned int skew = 38;
201cb0ef41Sopenharmony_cistatic const unsigned int damp = 700;
211cb0ef41Sopenharmony_cistatic const unsigned int initial_bias = 72;
221cb0ef41Sopenharmony_cistatic const unsigned int initial_n = 0x80;
231cb0ef41Sopenharmony_cistatic const unsigned int maxint = 0xFFFFFFFF;
241cb0ef41Sopenharmony_cistatic const char delimiter = '-';
251cb0ef41Sopenharmony_ci
261cb0ef41Sopenharmony_ci#define LABEL_BUF_SIZE 512
271cb0ef41Sopenharmony_ci
281cb0ef41Sopenharmony_ci/*-
291cb0ef41Sopenharmony_ci * Pseudocode:
301cb0ef41Sopenharmony_ci *
311cb0ef41Sopenharmony_ci * function adapt(delta,numpoints,firsttime):
321cb0ef41Sopenharmony_ci *  if firsttime then let delta = delta div damp
331cb0ef41Sopenharmony_ci *  else let delta = delta div 2
341cb0ef41Sopenharmony_ci *  let delta = delta + (delta div numpoints)
351cb0ef41Sopenharmony_ci *  let k = 0
361cb0ef41Sopenharmony_ci *  while delta > ((base - tmin) * tmax) div 2 do begin
371cb0ef41Sopenharmony_ci *    let delta = delta div (base - tmin)
381cb0ef41Sopenharmony_ci *    let k = k + base
391cb0ef41Sopenharmony_ci *  end
401cb0ef41Sopenharmony_ci *  return k + (((base - tmin + 1) * delta) div (delta + skew))
411cb0ef41Sopenharmony_ci */
421cb0ef41Sopenharmony_ci
431cb0ef41Sopenharmony_cistatic int adapt(unsigned int delta, unsigned int numpoints,
441cb0ef41Sopenharmony_ci                 unsigned int firsttime)
451cb0ef41Sopenharmony_ci{
461cb0ef41Sopenharmony_ci    unsigned int k = 0;
471cb0ef41Sopenharmony_ci
481cb0ef41Sopenharmony_ci    delta = (firsttime) ? delta / damp : delta / 2;
491cb0ef41Sopenharmony_ci    delta = delta + delta / numpoints;
501cb0ef41Sopenharmony_ci
511cb0ef41Sopenharmony_ci    while (delta > ((base - tmin) * tmax) / 2) {
521cb0ef41Sopenharmony_ci        delta = delta / (base - tmin);
531cb0ef41Sopenharmony_ci        k = k + base;
541cb0ef41Sopenharmony_ci    }
551cb0ef41Sopenharmony_ci
561cb0ef41Sopenharmony_ci    return k + (((base - tmin + 1) * delta) / (delta + skew));
571cb0ef41Sopenharmony_ci}
581cb0ef41Sopenharmony_ci
591cb0ef41Sopenharmony_cistatic ossl_inline int is_basic(unsigned int a)
601cb0ef41Sopenharmony_ci{
611cb0ef41Sopenharmony_ci    return (a < 0x80) ? 1 : 0;
621cb0ef41Sopenharmony_ci}
631cb0ef41Sopenharmony_ci
641cb0ef41Sopenharmony_ci/*-
651cb0ef41Sopenharmony_ci * code points    digit-values
661cb0ef41Sopenharmony_ci * ------------   ----------------------
671cb0ef41Sopenharmony_ci * 41..5A (A-Z) =  0 to 25, respectively
681cb0ef41Sopenharmony_ci * 61..7A (a-z) =  0 to 25, respectively
691cb0ef41Sopenharmony_ci * 30..39 (0-9) = 26 to 35, respectively
701cb0ef41Sopenharmony_ci */
711cb0ef41Sopenharmony_cistatic ossl_inline int digit_decoded(const unsigned char a)
721cb0ef41Sopenharmony_ci{
731cb0ef41Sopenharmony_ci    if (a >= 0x41 && a <= 0x5A)
741cb0ef41Sopenharmony_ci        return a - 0x41;
751cb0ef41Sopenharmony_ci
761cb0ef41Sopenharmony_ci    if (a >= 0x61 && a <= 0x7A)
771cb0ef41Sopenharmony_ci        return a - 0x61;
781cb0ef41Sopenharmony_ci
791cb0ef41Sopenharmony_ci    if (a >= 0x30 && a <= 0x39)
801cb0ef41Sopenharmony_ci        return a - 0x30 + 26;
811cb0ef41Sopenharmony_ci
821cb0ef41Sopenharmony_ci    return -1;
831cb0ef41Sopenharmony_ci}
841cb0ef41Sopenharmony_ci
851cb0ef41Sopenharmony_ci/*-
861cb0ef41Sopenharmony_ci * Pseudocode:
871cb0ef41Sopenharmony_ci *
881cb0ef41Sopenharmony_ci * function ossl_punycode_decode
891cb0ef41Sopenharmony_ci *  let n = initial_n
901cb0ef41Sopenharmony_ci *  let i = 0
911cb0ef41Sopenharmony_ci *  let bias = initial_bias
921cb0ef41Sopenharmony_ci *  let output = an empty string indexed from 0
931cb0ef41Sopenharmony_ci *  consume all code points before the last delimiter (if there is one)
941cb0ef41Sopenharmony_ci *    and copy them to output, fail on any non-basic code point
951cb0ef41Sopenharmony_ci *  if more than zero code points were consumed then consume one more
961cb0ef41Sopenharmony_ci *    (which will be the last delimiter)
971cb0ef41Sopenharmony_ci *  while the input is not exhausted do begin
981cb0ef41Sopenharmony_ci *    let oldi = i
991cb0ef41Sopenharmony_ci *    let w = 1
1001cb0ef41Sopenharmony_ci *    for k = base to infinity in steps of base do begin
1011cb0ef41Sopenharmony_ci *      consume a code point, or fail if there was none to consume
1021cb0ef41Sopenharmony_ci *      let digit = the code point's digit-value, fail if it has none
1031cb0ef41Sopenharmony_ci *      let i = i + digit * w, fail on overflow
1041cb0ef41Sopenharmony_ci *      let t = tmin if k <= bias {+ tmin}, or
1051cb0ef41Sopenharmony_ci *              tmax if k >= bias + tmax, or k - bias otherwise
1061cb0ef41Sopenharmony_ci *      if digit < t then break
1071cb0ef41Sopenharmony_ci *      let w = w * (base - t), fail on overflow
1081cb0ef41Sopenharmony_ci *    end
1091cb0ef41Sopenharmony_ci *    let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
1101cb0ef41Sopenharmony_ci *    let n = n + i div (length(output) + 1), fail on overflow
1111cb0ef41Sopenharmony_ci *    let i = i mod (length(output) + 1)
1121cb0ef41Sopenharmony_ci *    {if n is a basic code point then fail}
1131cb0ef41Sopenharmony_ci *    insert n into output at position i
1141cb0ef41Sopenharmony_ci *    increment i
1151cb0ef41Sopenharmony_ci *  end
1161cb0ef41Sopenharmony_ci */
1171cb0ef41Sopenharmony_ci
1181cb0ef41Sopenharmony_ciint ossl_punycode_decode(const char *pEncoded, const size_t enc_len,
1191cb0ef41Sopenharmony_ci                         unsigned int *pDecoded, unsigned int *pout_length)
1201cb0ef41Sopenharmony_ci{
1211cb0ef41Sopenharmony_ci    unsigned int n = initial_n;
1221cb0ef41Sopenharmony_ci    unsigned int i = 0;
1231cb0ef41Sopenharmony_ci    unsigned int bias = initial_bias;
1241cb0ef41Sopenharmony_ci    size_t processed_in = 0, written_out = 0;
1251cb0ef41Sopenharmony_ci    unsigned int max_out = *pout_length;
1261cb0ef41Sopenharmony_ci    unsigned int basic_count = 0;
1271cb0ef41Sopenharmony_ci    unsigned int loop;
1281cb0ef41Sopenharmony_ci
1291cb0ef41Sopenharmony_ci    for (loop = 0; loop < enc_len; loop++) {
1301cb0ef41Sopenharmony_ci        if (pEncoded[loop] == delimiter)
1311cb0ef41Sopenharmony_ci            basic_count = loop;
1321cb0ef41Sopenharmony_ci    }
1331cb0ef41Sopenharmony_ci
1341cb0ef41Sopenharmony_ci    if (basic_count > 0) {
1351cb0ef41Sopenharmony_ci        if (basic_count > max_out)
1361cb0ef41Sopenharmony_ci            return 0;
1371cb0ef41Sopenharmony_ci
1381cb0ef41Sopenharmony_ci        for (loop = 0; loop < basic_count; loop++) {
1391cb0ef41Sopenharmony_ci            if (is_basic(pEncoded[loop]) == 0)
1401cb0ef41Sopenharmony_ci                return 0;
1411cb0ef41Sopenharmony_ci
1421cb0ef41Sopenharmony_ci            pDecoded[loop] = pEncoded[loop];
1431cb0ef41Sopenharmony_ci            written_out++;
1441cb0ef41Sopenharmony_ci        }
1451cb0ef41Sopenharmony_ci        processed_in = basic_count + 1;
1461cb0ef41Sopenharmony_ci    }
1471cb0ef41Sopenharmony_ci
1481cb0ef41Sopenharmony_ci    for (loop = processed_in; loop < enc_len;) {
1491cb0ef41Sopenharmony_ci        unsigned int oldi = i;
1501cb0ef41Sopenharmony_ci        unsigned int w = 1;
1511cb0ef41Sopenharmony_ci        unsigned int k, t;
1521cb0ef41Sopenharmony_ci        int digit;
1531cb0ef41Sopenharmony_ci
1541cb0ef41Sopenharmony_ci        for (k = base;; k += base) {
1551cb0ef41Sopenharmony_ci            if (loop >= enc_len)
1561cb0ef41Sopenharmony_ci                return 0;
1571cb0ef41Sopenharmony_ci
1581cb0ef41Sopenharmony_ci            digit = digit_decoded(pEncoded[loop]);
1591cb0ef41Sopenharmony_ci            loop++;
1601cb0ef41Sopenharmony_ci
1611cb0ef41Sopenharmony_ci            if (digit < 0)
1621cb0ef41Sopenharmony_ci                return 0;
1631cb0ef41Sopenharmony_ci            if ((unsigned int)digit > (maxint - i) / w)
1641cb0ef41Sopenharmony_ci                return 0;
1651cb0ef41Sopenharmony_ci
1661cb0ef41Sopenharmony_ci            i = i + digit * w;
1671cb0ef41Sopenharmony_ci            t = (k <= bias) ? tmin : (k >= bias + tmax) ? tmax : k - bias;
1681cb0ef41Sopenharmony_ci
1691cb0ef41Sopenharmony_ci            if ((unsigned int)digit < t)
1701cb0ef41Sopenharmony_ci                break;
1711cb0ef41Sopenharmony_ci
1721cb0ef41Sopenharmony_ci            if (w > maxint / (base - t))
1731cb0ef41Sopenharmony_ci                return 0;
1741cb0ef41Sopenharmony_ci            w = w * (base - t);
1751cb0ef41Sopenharmony_ci        }
1761cb0ef41Sopenharmony_ci
1771cb0ef41Sopenharmony_ci        bias = adapt(i - oldi, written_out + 1, (oldi == 0));
1781cb0ef41Sopenharmony_ci        if (i / (written_out + 1) > maxint - n)
1791cb0ef41Sopenharmony_ci            return 0;
1801cb0ef41Sopenharmony_ci        n = n + i / (written_out + 1);
1811cb0ef41Sopenharmony_ci        i %= (written_out + 1);
1821cb0ef41Sopenharmony_ci
1831cb0ef41Sopenharmony_ci        if (written_out >= max_out)
1841cb0ef41Sopenharmony_ci            return 0;
1851cb0ef41Sopenharmony_ci
1861cb0ef41Sopenharmony_ci        memmove(pDecoded + i + 1, pDecoded + i,
1871cb0ef41Sopenharmony_ci                (written_out - i) * sizeof(*pDecoded));
1881cb0ef41Sopenharmony_ci        pDecoded[i] = n;
1891cb0ef41Sopenharmony_ci        i++;
1901cb0ef41Sopenharmony_ci        written_out++;
1911cb0ef41Sopenharmony_ci    }
1921cb0ef41Sopenharmony_ci
1931cb0ef41Sopenharmony_ci    *pout_length = written_out;
1941cb0ef41Sopenharmony_ci    return 1;
1951cb0ef41Sopenharmony_ci}
1961cb0ef41Sopenharmony_ci
1971cb0ef41Sopenharmony_ci/*
1981cb0ef41Sopenharmony_ci * Encode a code point using UTF-8
1991cb0ef41Sopenharmony_ci * return number of bytes on success, 0 on failure
2001cb0ef41Sopenharmony_ci * (also produces U+FFFD, which uses 3 bytes on failure)
2011cb0ef41Sopenharmony_ci */
2021cb0ef41Sopenharmony_cistatic int codepoint2utf8(unsigned char *out, unsigned long utf)
2031cb0ef41Sopenharmony_ci{
2041cb0ef41Sopenharmony_ci    if (utf <= 0x7F) {
2051cb0ef41Sopenharmony_ci        /* Plain ASCII */
2061cb0ef41Sopenharmony_ci        out[0] = (unsigned char)utf;
2071cb0ef41Sopenharmony_ci        out[1] = 0;
2081cb0ef41Sopenharmony_ci        return 1;
2091cb0ef41Sopenharmony_ci    } else if (utf <= 0x07FF) {
2101cb0ef41Sopenharmony_ci        /* 2-byte unicode */
2111cb0ef41Sopenharmony_ci        out[0] = (unsigned char)(((utf >> 6) & 0x1F) | 0xC0);
2121cb0ef41Sopenharmony_ci        out[1] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80);
2131cb0ef41Sopenharmony_ci        out[2] = 0;
2141cb0ef41Sopenharmony_ci        return 2;
2151cb0ef41Sopenharmony_ci    } else if (utf <= 0xFFFF) {
2161cb0ef41Sopenharmony_ci        /* 3-byte unicode */
2171cb0ef41Sopenharmony_ci        out[0] = (unsigned char)(((utf >> 12) & 0x0F) | 0xE0);
2181cb0ef41Sopenharmony_ci        out[1] = (unsigned char)(((utf >> 6) & 0x3F) | 0x80);
2191cb0ef41Sopenharmony_ci        out[2] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80);
2201cb0ef41Sopenharmony_ci        out[3] = 0;
2211cb0ef41Sopenharmony_ci        return 3;
2221cb0ef41Sopenharmony_ci    } else if (utf <= 0x10FFFF) {
2231cb0ef41Sopenharmony_ci        /* 4-byte unicode */
2241cb0ef41Sopenharmony_ci        out[0] = (unsigned char)(((utf >> 18) & 0x07) | 0xF0);
2251cb0ef41Sopenharmony_ci        out[1] = (unsigned char)(((utf >> 12) & 0x3F) | 0x80);
2261cb0ef41Sopenharmony_ci        out[2] = (unsigned char)(((utf >> 6) & 0x3F) | 0x80);
2271cb0ef41Sopenharmony_ci        out[3] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80);
2281cb0ef41Sopenharmony_ci        out[4] = 0;
2291cb0ef41Sopenharmony_ci        return 4;
2301cb0ef41Sopenharmony_ci    } else {
2311cb0ef41Sopenharmony_ci        /* error - use replacement character */
2321cb0ef41Sopenharmony_ci        out[0] = (unsigned char)0xEF;
2331cb0ef41Sopenharmony_ci        out[1] = (unsigned char)0xBF;
2341cb0ef41Sopenharmony_ci        out[2] = (unsigned char)0xBD;
2351cb0ef41Sopenharmony_ci        out[3] = 0;
2361cb0ef41Sopenharmony_ci        return 0;
2371cb0ef41Sopenharmony_ci    }
2381cb0ef41Sopenharmony_ci}
2391cb0ef41Sopenharmony_ci
2401cb0ef41Sopenharmony_ci/*-
2411cb0ef41Sopenharmony_ci * Return values:
2421cb0ef41Sopenharmony_ci * 1 - ok, *outlen contains valid buf length
2431cb0ef41Sopenharmony_ci * 0 - ok but buf was too short, *outlen contains valid buf length
2441cb0ef41Sopenharmony_ci * -1 - bad string passed
2451cb0ef41Sopenharmony_ci */
2461cb0ef41Sopenharmony_ci
2471cb0ef41Sopenharmony_ciint ossl_a2ulabel(const char *in, char *out, size_t *outlen)
2481cb0ef41Sopenharmony_ci{
2491cb0ef41Sopenharmony_ci    /*-
2501cb0ef41Sopenharmony_ci     * Domain name has some parts consisting of ASCII chars joined with dot.
2511cb0ef41Sopenharmony_ci     * If a part is shorter than 5 chars, it becomes U-label as is.
2521cb0ef41Sopenharmony_ci     * If it does not start with xn--,    it becomes U-label as is.
2531cb0ef41Sopenharmony_ci     * Otherwise we try to decode it.
2541cb0ef41Sopenharmony_ci     */
2551cb0ef41Sopenharmony_ci    char *outptr = out;
2561cb0ef41Sopenharmony_ci    const char *inptr = in;
2571cb0ef41Sopenharmony_ci    size_t size = 0, maxsize;
2581cb0ef41Sopenharmony_ci    int result = 1;
2591cb0ef41Sopenharmony_ci    unsigned int i, j;
2601cb0ef41Sopenharmony_ci    unsigned int buf[LABEL_BUF_SIZE];      /* It's a hostname */
2611cb0ef41Sopenharmony_ci
2621cb0ef41Sopenharmony_ci    if (out == NULL) {
2631cb0ef41Sopenharmony_ci        result = 0;
2641cb0ef41Sopenharmony_ci        maxsize = 0;
2651cb0ef41Sopenharmony_ci    } else {
2661cb0ef41Sopenharmony_ci        maxsize = *outlen;
2671cb0ef41Sopenharmony_ci    }
2681cb0ef41Sopenharmony_ci
2691cb0ef41Sopenharmony_ci#define PUSHC(c)                    \
2701cb0ef41Sopenharmony_ci    do                              \
2711cb0ef41Sopenharmony_ci        if (size++ < maxsize)       \
2721cb0ef41Sopenharmony_ci            *outptr++ = c;          \
2731cb0ef41Sopenharmony_ci        else                        \
2741cb0ef41Sopenharmony_ci            result = 0;             \
2751cb0ef41Sopenharmony_ci    while (0)
2761cb0ef41Sopenharmony_ci
2771cb0ef41Sopenharmony_ci    while (1) {
2781cb0ef41Sopenharmony_ci        char *tmpptr = strchr(inptr, '.');
2791cb0ef41Sopenharmony_ci        size_t delta = tmpptr != NULL ? (size_t)(tmpptr - inptr) : strlen(inptr);
2801cb0ef41Sopenharmony_ci
2811cb0ef41Sopenharmony_ci        if (strncmp(inptr, "xn--", 4) != 0) {
2821cb0ef41Sopenharmony_ci            for (i = 0; i < delta + 1; i++)
2831cb0ef41Sopenharmony_ci                PUSHC(inptr[i]);
2841cb0ef41Sopenharmony_ci        } else {
2851cb0ef41Sopenharmony_ci            unsigned int bufsize = LABEL_BUF_SIZE;
2861cb0ef41Sopenharmony_ci
2871cb0ef41Sopenharmony_ci            if (ossl_punycode_decode(inptr + 4, delta - 4, buf, &bufsize) <= 0)
2881cb0ef41Sopenharmony_ci                return -1;
2891cb0ef41Sopenharmony_ci
2901cb0ef41Sopenharmony_ci            for (i = 0; i < bufsize; i++) {
2911cb0ef41Sopenharmony_ci                unsigned char seed[6];
2921cb0ef41Sopenharmony_ci                size_t utfsize = codepoint2utf8(seed, buf[i]);
2931cb0ef41Sopenharmony_ci
2941cb0ef41Sopenharmony_ci                if (utfsize == 0)
2951cb0ef41Sopenharmony_ci                    return -1;
2961cb0ef41Sopenharmony_ci
2971cb0ef41Sopenharmony_ci                for (j = 0; j < utfsize; j++)
2981cb0ef41Sopenharmony_ci                    PUSHC(seed[j]);
2991cb0ef41Sopenharmony_ci            }
3001cb0ef41Sopenharmony_ci
3011cb0ef41Sopenharmony_ci            PUSHC(tmpptr != NULL ? '.' : '\0');
3021cb0ef41Sopenharmony_ci        }
3031cb0ef41Sopenharmony_ci
3041cb0ef41Sopenharmony_ci        if (tmpptr == NULL)
3051cb0ef41Sopenharmony_ci            break;
3061cb0ef41Sopenharmony_ci
3071cb0ef41Sopenharmony_ci        inptr = tmpptr + 1;
3081cb0ef41Sopenharmony_ci    }
3091cb0ef41Sopenharmony_ci#undef PUSHC
3101cb0ef41Sopenharmony_ci
3111cb0ef41Sopenharmony_ci    *outlen = size;
3121cb0ef41Sopenharmony_ci    return result;
3131cb0ef41Sopenharmony_ci}
3141cb0ef41Sopenharmony_ci
3151cb0ef41Sopenharmony_ci/*-
3161cb0ef41Sopenharmony_ci * a MUST be A-label
3171cb0ef41Sopenharmony_ci * u MUST be U-label
3181cb0ef41Sopenharmony_ci * Returns 0 if compared values are equal
3191cb0ef41Sopenharmony_ci * 1 if not
3201cb0ef41Sopenharmony_ci * -1 in case of errors
3211cb0ef41Sopenharmony_ci */
3221cb0ef41Sopenharmony_ci
3231cb0ef41Sopenharmony_ciint ossl_a2ucompare(const char *a, const char *u)
3241cb0ef41Sopenharmony_ci{
3251cb0ef41Sopenharmony_ci    char a_ulabel[LABEL_BUF_SIZE + 1];
3261cb0ef41Sopenharmony_ci    size_t a_size = sizeof(a_ulabel);
3271cb0ef41Sopenharmony_ci
3281cb0ef41Sopenharmony_ci    if (ossl_a2ulabel(a, a_ulabel, &a_size) <= 0)
3291cb0ef41Sopenharmony_ci        return -1;
3301cb0ef41Sopenharmony_ci
3311cb0ef41Sopenharmony_ci    return strcmp(a_ulabel, u) != 0;
3321cb0ef41Sopenharmony_ci}
333