1e1051a39Sopenharmony_ci/*
2e1051a39Sopenharmony_ci * Copyright 2019-2022 The OpenSSL Project Authors. All Rights Reserved.
3e1051a39Sopenharmony_ci *
4e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License").  You may not use
5e1051a39Sopenharmony_ci * this file except in compliance with the License.  You can obtain a copy
6e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at
7e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html
8e1051a39Sopenharmony_ci */
9e1051a39Sopenharmony_ci
10e1051a39Sopenharmony_ci#include <stddef.h>
11e1051a39Sopenharmony_ci#include <string.h>
12e1051a39Sopenharmony_ci#include <stdio.h>
13e1051a39Sopenharmony_ci#include <openssl/e_os2.h>
14e1051a39Sopenharmony_ci#include "crypto/punycode.h"
15e1051a39Sopenharmony_ci
16e1051a39Sopenharmony_cistatic const unsigned int base = 36;
17e1051a39Sopenharmony_cistatic const unsigned int tmin = 1;
18e1051a39Sopenharmony_cistatic const unsigned int tmax = 26;
19e1051a39Sopenharmony_cistatic const unsigned int skew = 38;
20e1051a39Sopenharmony_cistatic const unsigned int damp = 700;
21e1051a39Sopenharmony_cistatic const unsigned int initial_bias = 72;
22e1051a39Sopenharmony_cistatic const unsigned int initial_n = 0x80;
23e1051a39Sopenharmony_cistatic const unsigned int maxint = 0xFFFFFFFF;
24e1051a39Sopenharmony_cistatic const char delimiter = '-';
25e1051a39Sopenharmony_ci
26e1051a39Sopenharmony_ci#define LABEL_BUF_SIZE 512
27e1051a39Sopenharmony_ci
28e1051a39Sopenharmony_ci/*-
29e1051a39Sopenharmony_ci * Pseudocode:
30e1051a39Sopenharmony_ci *
31e1051a39Sopenharmony_ci * function adapt(delta,numpoints,firsttime):
32e1051a39Sopenharmony_ci *  if firsttime then let delta = delta div damp
33e1051a39Sopenharmony_ci *  else let delta = delta div 2
34e1051a39Sopenharmony_ci *  let delta = delta + (delta div numpoints)
35e1051a39Sopenharmony_ci *  let k = 0
36e1051a39Sopenharmony_ci *  while delta > ((base - tmin) * tmax) div 2 do begin
37e1051a39Sopenharmony_ci *    let delta = delta div (base - tmin)
38e1051a39Sopenharmony_ci *    let k = k + base
39e1051a39Sopenharmony_ci *  end
40e1051a39Sopenharmony_ci *  return k + (((base - tmin + 1) * delta) div (delta + skew))
41e1051a39Sopenharmony_ci */
42e1051a39Sopenharmony_ci
43e1051a39Sopenharmony_cistatic int adapt(unsigned int delta, unsigned int numpoints,
44e1051a39Sopenharmony_ci                 unsigned int firsttime)
45e1051a39Sopenharmony_ci{
46e1051a39Sopenharmony_ci    unsigned int k = 0;
47e1051a39Sopenharmony_ci
48e1051a39Sopenharmony_ci    delta = (firsttime) ? delta / damp : delta / 2;
49e1051a39Sopenharmony_ci    delta = delta + delta / numpoints;
50e1051a39Sopenharmony_ci
51e1051a39Sopenharmony_ci    while (delta > ((base - tmin) * tmax) / 2) {
52e1051a39Sopenharmony_ci        delta = delta / (base - tmin);
53e1051a39Sopenharmony_ci        k = k + base;
54e1051a39Sopenharmony_ci    }
55e1051a39Sopenharmony_ci
56e1051a39Sopenharmony_ci    return k + (((base - tmin + 1) * delta) / (delta + skew));
57e1051a39Sopenharmony_ci}
58e1051a39Sopenharmony_ci
59e1051a39Sopenharmony_cistatic ossl_inline int is_basic(unsigned int a)
60e1051a39Sopenharmony_ci{
61e1051a39Sopenharmony_ci    return (a < 0x80) ? 1 : 0;
62e1051a39Sopenharmony_ci}
63e1051a39Sopenharmony_ci
64e1051a39Sopenharmony_ci/*-
65e1051a39Sopenharmony_ci * code points    digit-values
66e1051a39Sopenharmony_ci * ------------   ----------------------
67e1051a39Sopenharmony_ci * 41..5A (A-Z) =  0 to 25, respectively
68e1051a39Sopenharmony_ci * 61..7A (a-z) =  0 to 25, respectively
69e1051a39Sopenharmony_ci * 30..39 (0-9) = 26 to 35, respectively
70e1051a39Sopenharmony_ci */
71e1051a39Sopenharmony_cistatic ossl_inline int digit_decoded(const unsigned char a)
72e1051a39Sopenharmony_ci{
73e1051a39Sopenharmony_ci    if (a >= 0x41 && a <= 0x5A)
74e1051a39Sopenharmony_ci        return a - 0x41;
75e1051a39Sopenharmony_ci
76e1051a39Sopenharmony_ci    if (a >= 0x61 && a <= 0x7A)
77e1051a39Sopenharmony_ci        return a - 0x61;
78e1051a39Sopenharmony_ci
79e1051a39Sopenharmony_ci    if (a >= 0x30 && a <= 0x39)
80e1051a39Sopenharmony_ci        return a - 0x30 + 26;
81e1051a39Sopenharmony_ci
82e1051a39Sopenharmony_ci    return -1;
83e1051a39Sopenharmony_ci}
84e1051a39Sopenharmony_ci
85e1051a39Sopenharmony_ci/*-
86e1051a39Sopenharmony_ci * Pseudocode:
87e1051a39Sopenharmony_ci *
88e1051a39Sopenharmony_ci * function ossl_punycode_decode
89e1051a39Sopenharmony_ci *  let n = initial_n
90e1051a39Sopenharmony_ci *  let i = 0
91e1051a39Sopenharmony_ci *  let bias = initial_bias
92e1051a39Sopenharmony_ci *  let output = an empty string indexed from 0
93e1051a39Sopenharmony_ci *  consume all code points before the last delimiter (if there is one)
94e1051a39Sopenharmony_ci *    and copy them to output, fail on any non-basic code point
95e1051a39Sopenharmony_ci *  if more than zero code points were consumed then consume one more
96e1051a39Sopenharmony_ci *    (which will be the last delimiter)
97e1051a39Sopenharmony_ci *  while the input is not exhausted do begin
98e1051a39Sopenharmony_ci *    let oldi = i
99e1051a39Sopenharmony_ci *    let w = 1
100e1051a39Sopenharmony_ci *    for k = base to infinity in steps of base do begin
101e1051a39Sopenharmony_ci *      consume a code point, or fail if there was none to consume
102e1051a39Sopenharmony_ci *      let digit = the code point's digit-value, fail if it has none
103e1051a39Sopenharmony_ci *      let i = i + digit * w, fail on overflow
104e1051a39Sopenharmony_ci *      let t = tmin if k <= bias {+ tmin}, or
105e1051a39Sopenharmony_ci *              tmax if k >= bias + tmax, or k - bias otherwise
106e1051a39Sopenharmony_ci *      if digit < t then break
107e1051a39Sopenharmony_ci *      let w = w * (base - t), fail on overflow
108e1051a39Sopenharmony_ci *    end
109e1051a39Sopenharmony_ci *    let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?)
110e1051a39Sopenharmony_ci *    let n = n + i div (length(output) + 1), fail on overflow
111e1051a39Sopenharmony_ci *    let i = i mod (length(output) + 1)
112e1051a39Sopenharmony_ci *    {if n is a basic code point then fail}
113e1051a39Sopenharmony_ci *    insert n into output at position i
114e1051a39Sopenharmony_ci *    increment i
115e1051a39Sopenharmony_ci *  end
116e1051a39Sopenharmony_ci */
117e1051a39Sopenharmony_ci
118e1051a39Sopenharmony_ciint ossl_punycode_decode(const char *pEncoded, const size_t enc_len,
119e1051a39Sopenharmony_ci                         unsigned int *pDecoded, unsigned int *pout_length)
120e1051a39Sopenharmony_ci{
121e1051a39Sopenharmony_ci    unsigned int n = initial_n;
122e1051a39Sopenharmony_ci    unsigned int i = 0;
123e1051a39Sopenharmony_ci    unsigned int bias = initial_bias;
124e1051a39Sopenharmony_ci    size_t processed_in = 0, written_out = 0;
125e1051a39Sopenharmony_ci    unsigned int max_out = *pout_length;
126e1051a39Sopenharmony_ci    unsigned int basic_count = 0;
127e1051a39Sopenharmony_ci    unsigned int loop;
128e1051a39Sopenharmony_ci
129e1051a39Sopenharmony_ci    for (loop = 0; loop < enc_len; loop++) {
130e1051a39Sopenharmony_ci        if (pEncoded[loop] == delimiter)
131e1051a39Sopenharmony_ci            basic_count = loop;
132e1051a39Sopenharmony_ci    }
133e1051a39Sopenharmony_ci
134e1051a39Sopenharmony_ci    if (basic_count > 0) {
135e1051a39Sopenharmony_ci        if (basic_count > max_out)
136e1051a39Sopenharmony_ci            return 0;
137e1051a39Sopenharmony_ci
138e1051a39Sopenharmony_ci        for (loop = 0; loop < basic_count; loop++) {
139e1051a39Sopenharmony_ci            if (is_basic(pEncoded[loop]) == 0)
140e1051a39Sopenharmony_ci                return 0;
141e1051a39Sopenharmony_ci
142e1051a39Sopenharmony_ci            pDecoded[loop] = pEncoded[loop];
143e1051a39Sopenharmony_ci            written_out++;
144e1051a39Sopenharmony_ci        }
145e1051a39Sopenharmony_ci        processed_in = basic_count + 1;
146e1051a39Sopenharmony_ci    }
147e1051a39Sopenharmony_ci
148e1051a39Sopenharmony_ci    for (loop = processed_in; loop < enc_len;) {
149e1051a39Sopenharmony_ci        unsigned int oldi = i;
150e1051a39Sopenharmony_ci        unsigned int w = 1;
151e1051a39Sopenharmony_ci        unsigned int k, t;
152e1051a39Sopenharmony_ci        int digit;
153e1051a39Sopenharmony_ci
154e1051a39Sopenharmony_ci        for (k = base;; k += base) {
155e1051a39Sopenharmony_ci            if (loop >= enc_len)
156e1051a39Sopenharmony_ci                return 0;
157e1051a39Sopenharmony_ci
158e1051a39Sopenharmony_ci            digit = digit_decoded(pEncoded[loop]);
159e1051a39Sopenharmony_ci            loop++;
160e1051a39Sopenharmony_ci
161e1051a39Sopenharmony_ci            if (digit < 0)
162e1051a39Sopenharmony_ci                return 0;
163e1051a39Sopenharmony_ci            if ((unsigned int)digit > (maxint - i) / w)
164e1051a39Sopenharmony_ci                return 0;
165e1051a39Sopenharmony_ci
166e1051a39Sopenharmony_ci            i = i + digit * w;
167e1051a39Sopenharmony_ci            t = (k <= bias) ? tmin : (k >= bias + tmax) ? tmax : k - bias;
168e1051a39Sopenharmony_ci
169e1051a39Sopenharmony_ci            if ((unsigned int)digit < t)
170e1051a39Sopenharmony_ci                break;
171e1051a39Sopenharmony_ci
172e1051a39Sopenharmony_ci            if (w > maxint / (base - t))
173e1051a39Sopenharmony_ci                return 0;
174e1051a39Sopenharmony_ci            w = w * (base - t);
175e1051a39Sopenharmony_ci        }
176e1051a39Sopenharmony_ci
177e1051a39Sopenharmony_ci        bias = adapt(i - oldi, written_out + 1, (oldi == 0));
178e1051a39Sopenharmony_ci        if (i / (written_out + 1) > maxint - n)
179e1051a39Sopenharmony_ci            return 0;
180e1051a39Sopenharmony_ci        n = n + i / (written_out + 1);
181e1051a39Sopenharmony_ci        i %= (written_out + 1);
182e1051a39Sopenharmony_ci
183e1051a39Sopenharmony_ci        if (written_out >= max_out)
184e1051a39Sopenharmony_ci            return 0;
185e1051a39Sopenharmony_ci
186e1051a39Sopenharmony_ci        memmove(pDecoded + i + 1, pDecoded + i,
187e1051a39Sopenharmony_ci                (written_out - i) * sizeof(*pDecoded));
188e1051a39Sopenharmony_ci        pDecoded[i] = n;
189e1051a39Sopenharmony_ci        i++;
190e1051a39Sopenharmony_ci        written_out++;
191e1051a39Sopenharmony_ci    }
192e1051a39Sopenharmony_ci
193e1051a39Sopenharmony_ci    *pout_length = written_out;
194e1051a39Sopenharmony_ci    return 1;
195e1051a39Sopenharmony_ci}
196e1051a39Sopenharmony_ci
197e1051a39Sopenharmony_ci/*
198e1051a39Sopenharmony_ci * Encode a code point using UTF-8
199e1051a39Sopenharmony_ci * return number of bytes on success, 0 on failure
200e1051a39Sopenharmony_ci * (also produces U+FFFD, which uses 3 bytes on failure)
201e1051a39Sopenharmony_ci */
202e1051a39Sopenharmony_cistatic int codepoint2utf8(unsigned char *out, unsigned long utf)
203e1051a39Sopenharmony_ci{
204e1051a39Sopenharmony_ci    if (utf <= 0x7F) {
205e1051a39Sopenharmony_ci        /* Plain ASCII */
206e1051a39Sopenharmony_ci        out[0] = (unsigned char)utf;
207e1051a39Sopenharmony_ci        out[1] = 0;
208e1051a39Sopenharmony_ci        return 1;
209e1051a39Sopenharmony_ci    } else if (utf <= 0x07FF) {
210e1051a39Sopenharmony_ci        /* 2-byte unicode */
211e1051a39Sopenharmony_ci        out[0] = (unsigned char)(((utf >> 6) & 0x1F) | 0xC0);
212e1051a39Sopenharmony_ci        out[1] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80);
213e1051a39Sopenharmony_ci        out[2] = 0;
214e1051a39Sopenharmony_ci        return 2;
215e1051a39Sopenharmony_ci    } else if (utf <= 0xFFFF) {
216e1051a39Sopenharmony_ci        /* 3-byte unicode */
217e1051a39Sopenharmony_ci        out[0] = (unsigned char)(((utf >> 12) & 0x0F) | 0xE0);
218e1051a39Sopenharmony_ci        out[1] = (unsigned char)(((utf >> 6) & 0x3F) | 0x80);
219e1051a39Sopenharmony_ci        out[2] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80);
220e1051a39Sopenharmony_ci        out[3] = 0;
221e1051a39Sopenharmony_ci        return 3;
222e1051a39Sopenharmony_ci    } else if (utf <= 0x10FFFF) {
223e1051a39Sopenharmony_ci        /* 4-byte unicode */
224e1051a39Sopenharmony_ci        out[0] = (unsigned char)(((utf >> 18) & 0x07) | 0xF0);
225e1051a39Sopenharmony_ci        out[1] = (unsigned char)(((utf >> 12) & 0x3F) | 0x80);
226e1051a39Sopenharmony_ci        out[2] = (unsigned char)(((utf >> 6) & 0x3F) | 0x80);
227e1051a39Sopenharmony_ci        out[3] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80);
228e1051a39Sopenharmony_ci        out[4] = 0;
229e1051a39Sopenharmony_ci        return 4;
230e1051a39Sopenharmony_ci    } else {
231e1051a39Sopenharmony_ci        /* error - use replacement character */
232e1051a39Sopenharmony_ci        out[0] = (unsigned char)0xEF;
233e1051a39Sopenharmony_ci        out[1] = (unsigned char)0xBF;
234e1051a39Sopenharmony_ci        out[2] = (unsigned char)0xBD;
235e1051a39Sopenharmony_ci        out[3] = 0;
236e1051a39Sopenharmony_ci        return 0;
237e1051a39Sopenharmony_ci    }
238e1051a39Sopenharmony_ci}
239e1051a39Sopenharmony_ci
240e1051a39Sopenharmony_ci/*-
241e1051a39Sopenharmony_ci * Return values:
242e1051a39Sopenharmony_ci * 1 - ok, *outlen contains valid buf length
243e1051a39Sopenharmony_ci * 0 - ok but buf was too short, *outlen contains valid buf length
244e1051a39Sopenharmony_ci * -1 - bad string passed
245e1051a39Sopenharmony_ci */
246e1051a39Sopenharmony_ci
247e1051a39Sopenharmony_ciint ossl_a2ulabel(const char *in, char *out, size_t *outlen)
248e1051a39Sopenharmony_ci{
249e1051a39Sopenharmony_ci    /*-
250e1051a39Sopenharmony_ci     * Domain name has some parts consisting of ASCII chars joined with dot.
251e1051a39Sopenharmony_ci     * If a part is shorter than 5 chars, it becomes U-label as is.
252e1051a39Sopenharmony_ci     * If it does not start with xn--,    it becomes U-label as is.
253e1051a39Sopenharmony_ci     * Otherwise we try to decode it.
254e1051a39Sopenharmony_ci     */
255e1051a39Sopenharmony_ci    char *outptr = out;
256e1051a39Sopenharmony_ci    const char *inptr = in;
257e1051a39Sopenharmony_ci    size_t size = 0, maxsize;
258e1051a39Sopenharmony_ci    int result = 1;
259e1051a39Sopenharmony_ci    unsigned int i, j;
260e1051a39Sopenharmony_ci    unsigned int buf[LABEL_BUF_SIZE];      /* It's a hostname */
261e1051a39Sopenharmony_ci
262e1051a39Sopenharmony_ci    if (out == NULL) {
263e1051a39Sopenharmony_ci        result = 0;
264e1051a39Sopenharmony_ci        maxsize = 0;
265e1051a39Sopenharmony_ci    } else {
266e1051a39Sopenharmony_ci        maxsize = *outlen;
267e1051a39Sopenharmony_ci    }
268e1051a39Sopenharmony_ci
269e1051a39Sopenharmony_ci#define PUSHC(c)                    \
270e1051a39Sopenharmony_ci    do                              \
271e1051a39Sopenharmony_ci        if (size++ < maxsize)       \
272e1051a39Sopenharmony_ci            *outptr++ = c;          \
273e1051a39Sopenharmony_ci        else                        \
274e1051a39Sopenharmony_ci            result = 0;             \
275e1051a39Sopenharmony_ci    while (0)
276e1051a39Sopenharmony_ci
277e1051a39Sopenharmony_ci    while (1) {
278e1051a39Sopenharmony_ci        char *tmpptr = strchr(inptr, '.');
279e1051a39Sopenharmony_ci        size_t delta = tmpptr != NULL ? (size_t)(tmpptr - inptr) : strlen(inptr);
280e1051a39Sopenharmony_ci
281e1051a39Sopenharmony_ci        if (strncmp(inptr, "xn--", 4) != 0) {
282e1051a39Sopenharmony_ci            for (i = 0; i < delta + 1; i++)
283e1051a39Sopenharmony_ci                PUSHC(inptr[i]);
284e1051a39Sopenharmony_ci        } else {
285e1051a39Sopenharmony_ci            unsigned int bufsize = LABEL_BUF_SIZE;
286e1051a39Sopenharmony_ci
287e1051a39Sopenharmony_ci            if (ossl_punycode_decode(inptr + 4, delta - 4, buf, &bufsize) <= 0)
288e1051a39Sopenharmony_ci                return -1;
289e1051a39Sopenharmony_ci
290e1051a39Sopenharmony_ci            for (i = 0; i < bufsize; i++) {
291e1051a39Sopenharmony_ci                unsigned char seed[6];
292e1051a39Sopenharmony_ci                size_t utfsize = codepoint2utf8(seed, buf[i]);
293e1051a39Sopenharmony_ci
294e1051a39Sopenharmony_ci                if (utfsize == 0)
295e1051a39Sopenharmony_ci                    return -1;
296e1051a39Sopenharmony_ci
297e1051a39Sopenharmony_ci                for (j = 0; j < utfsize; j++)
298e1051a39Sopenharmony_ci                    PUSHC(seed[j]);
299e1051a39Sopenharmony_ci            }
300e1051a39Sopenharmony_ci
301e1051a39Sopenharmony_ci            PUSHC(tmpptr != NULL ? '.' : '\0');
302e1051a39Sopenharmony_ci        }
303e1051a39Sopenharmony_ci
304e1051a39Sopenharmony_ci        if (tmpptr == NULL)
305e1051a39Sopenharmony_ci            break;
306e1051a39Sopenharmony_ci
307e1051a39Sopenharmony_ci        inptr = tmpptr + 1;
308e1051a39Sopenharmony_ci    }
309e1051a39Sopenharmony_ci#undef PUSHC
310e1051a39Sopenharmony_ci
311e1051a39Sopenharmony_ci    *outlen = size;
312e1051a39Sopenharmony_ci    return result;
313e1051a39Sopenharmony_ci}
314e1051a39Sopenharmony_ci
315e1051a39Sopenharmony_ci/*-
316e1051a39Sopenharmony_ci * a MUST be A-label
317e1051a39Sopenharmony_ci * u MUST be U-label
318e1051a39Sopenharmony_ci * Returns 0 if compared values are equal
319e1051a39Sopenharmony_ci * 1 if not
320e1051a39Sopenharmony_ci * -1 in case of errors
321e1051a39Sopenharmony_ci */
322e1051a39Sopenharmony_ci
323e1051a39Sopenharmony_ciint ossl_a2ucompare(const char *a, const char *u)
324e1051a39Sopenharmony_ci{
325e1051a39Sopenharmony_ci    char a_ulabel[LABEL_BUF_SIZE + 1];
326e1051a39Sopenharmony_ci    size_t a_size = sizeof(a_ulabel);
327e1051a39Sopenharmony_ci
328e1051a39Sopenharmony_ci    if (ossl_a2ulabel(a, a_ulabel, &a_size) <= 0)
329e1051a39Sopenharmony_ci        return -1;
330e1051a39Sopenharmony_ci
331e1051a39Sopenharmony_ci    return strcmp(a_ulabel, u) != 0;
332e1051a39Sopenharmony_ci}
333