1e1051a39Sopenharmony_ci/* 2e1051a39Sopenharmony_ci * Copyright 2019-2022 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci * 4e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci * this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci */ 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci#include <stddef.h> 11e1051a39Sopenharmony_ci#include <string.h> 12e1051a39Sopenharmony_ci#include <stdio.h> 13e1051a39Sopenharmony_ci#include <openssl/e_os2.h> 14e1051a39Sopenharmony_ci#include "crypto/punycode.h" 15e1051a39Sopenharmony_ci 16e1051a39Sopenharmony_cistatic const unsigned int base = 36; 17e1051a39Sopenharmony_cistatic const unsigned int tmin = 1; 18e1051a39Sopenharmony_cistatic const unsigned int tmax = 26; 19e1051a39Sopenharmony_cistatic const unsigned int skew = 38; 20e1051a39Sopenharmony_cistatic const unsigned int damp = 700; 21e1051a39Sopenharmony_cistatic const unsigned int initial_bias = 72; 22e1051a39Sopenharmony_cistatic const unsigned int initial_n = 0x80; 23e1051a39Sopenharmony_cistatic const unsigned int maxint = 0xFFFFFFFF; 24e1051a39Sopenharmony_cistatic const char delimiter = '-'; 25e1051a39Sopenharmony_ci 26e1051a39Sopenharmony_ci#define LABEL_BUF_SIZE 512 27e1051a39Sopenharmony_ci 28e1051a39Sopenharmony_ci/*- 29e1051a39Sopenharmony_ci * Pseudocode: 30e1051a39Sopenharmony_ci * 31e1051a39Sopenharmony_ci * function adapt(delta,numpoints,firsttime): 32e1051a39Sopenharmony_ci * if firsttime then let delta = delta div damp 33e1051a39Sopenharmony_ci * else let delta = delta div 2 34e1051a39Sopenharmony_ci * let delta = delta + (delta div numpoints) 35e1051a39Sopenharmony_ci * let k = 0 36e1051a39Sopenharmony_ci * while delta > ((base - tmin) * tmax) div 2 do begin 37e1051a39Sopenharmony_ci * let delta = delta div (base - tmin) 38e1051a39Sopenharmony_ci * let k = k + base 39e1051a39Sopenharmony_ci * end 40e1051a39Sopenharmony_ci * return k + (((base - tmin + 1) * delta) div (delta + skew)) 41e1051a39Sopenharmony_ci */ 42e1051a39Sopenharmony_ci 43e1051a39Sopenharmony_cistatic int adapt(unsigned int delta, unsigned int numpoints, 44e1051a39Sopenharmony_ci unsigned int firsttime) 45e1051a39Sopenharmony_ci{ 46e1051a39Sopenharmony_ci unsigned int k = 0; 47e1051a39Sopenharmony_ci 48e1051a39Sopenharmony_ci delta = (firsttime) ? delta / damp : delta / 2; 49e1051a39Sopenharmony_ci delta = delta + delta / numpoints; 50e1051a39Sopenharmony_ci 51e1051a39Sopenharmony_ci while (delta > ((base - tmin) * tmax) / 2) { 52e1051a39Sopenharmony_ci delta = delta / (base - tmin); 53e1051a39Sopenharmony_ci k = k + base; 54e1051a39Sopenharmony_ci } 55e1051a39Sopenharmony_ci 56e1051a39Sopenharmony_ci return k + (((base - tmin + 1) * delta) / (delta + skew)); 57e1051a39Sopenharmony_ci} 58e1051a39Sopenharmony_ci 59e1051a39Sopenharmony_cistatic ossl_inline int is_basic(unsigned int a) 60e1051a39Sopenharmony_ci{ 61e1051a39Sopenharmony_ci return (a < 0x80) ? 1 : 0; 62e1051a39Sopenharmony_ci} 63e1051a39Sopenharmony_ci 64e1051a39Sopenharmony_ci/*- 65e1051a39Sopenharmony_ci * code points digit-values 66e1051a39Sopenharmony_ci * ------------ ---------------------- 67e1051a39Sopenharmony_ci * 41..5A (A-Z) = 0 to 25, respectively 68e1051a39Sopenharmony_ci * 61..7A (a-z) = 0 to 25, respectively 69e1051a39Sopenharmony_ci * 30..39 (0-9) = 26 to 35, respectively 70e1051a39Sopenharmony_ci */ 71e1051a39Sopenharmony_cistatic ossl_inline int digit_decoded(const unsigned char a) 72e1051a39Sopenharmony_ci{ 73e1051a39Sopenharmony_ci if (a >= 0x41 && a <= 0x5A) 74e1051a39Sopenharmony_ci return a - 0x41; 75e1051a39Sopenharmony_ci 76e1051a39Sopenharmony_ci if (a >= 0x61 && a <= 0x7A) 77e1051a39Sopenharmony_ci return a - 0x61; 78e1051a39Sopenharmony_ci 79e1051a39Sopenharmony_ci if (a >= 0x30 && a <= 0x39) 80e1051a39Sopenharmony_ci return a - 0x30 + 26; 81e1051a39Sopenharmony_ci 82e1051a39Sopenharmony_ci return -1; 83e1051a39Sopenharmony_ci} 84e1051a39Sopenharmony_ci 85e1051a39Sopenharmony_ci/*- 86e1051a39Sopenharmony_ci * Pseudocode: 87e1051a39Sopenharmony_ci * 88e1051a39Sopenharmony_ci * function ossl_punycode_decode 89e1051a39Sopenharmony_ci * let n = initial_n 90e1051a39Sopenharmony_ci * let i = 0 91e1051a39Sopenharmony_ci * let bias = initial_bias 92e1051a39Sopenharmony_ci * let output = an empty string indexed from 0 93e1051a39Sopenharmony_ci * consume all code points before the last delimiter (if there is one) 94e1051a39Sopenharmony_ci * and copy them to output, fail on any non-basic code point 95e1051a39Sopenharmony_ci * if more than zero code points were consumed then consume one more 96e1051a39Sopenharmony_ci * (which will be the last delimiter) 97e1051a39Sopenharmony_ci * while the input is not exhausted do begin 98e1051a39Sopenharmony_ci * let oldi = i 99e1051a39Sopenharmony_ci * let w = 1 100e1051a39Sopenharmony_ci * for k = base to infinity in steps of base do begin 101e1051a39Sopenharmony_ci * consume a code point, or fail if there was none to consume 102e1051a39Sopenharmony_ci * let digit = the code point's digit-value, fail if it has none 103e1051a39Sopenharmony_ci * let i = i + digit * w, fail on overflow 104e1051a39Sopenharmony_ci * let t = tmin if k <= bias {+ tmin}, or 105e1051a39Sopenharmony_ci * tmax if k >= bias + tmax, or k - bias otherwise 106e1051a39Sopenharmony_ci * if digit < t then break 107e1051a39Sopenharmony_ci * let w = w * (base - t), fail on overflow 108e1051a39Sopenharmony_ci * end 109e1051a39Sopenharmony_ci * let bias = adapt(i - oldi, length(output) + 1, test oldi is 0?) 110e1051a39Sopenharmony_ci * let n = n + i div (length(output) + 1), fail on overflow 111e1051a39Sopenharmony_ci * let i = i mod (length(output) + 1) 112e1051a39Sopenharmony_ci * {if n is a basic code point then fail} 113e1051a39Sopenharmony_ci * insert n into output at position i 114e1051a39Sopenharmony_ci * increment i 115e1051a39Sopenharmony_ci * end 116e1051a39Sopenharmony_ci */ 117e1051a39Sopenharmony_ci 118e1051a39Sopenharmony_ciint ossl_punycode_decode(const char *pEncoded, const size_t enc_len, 119e1051a39Sopenharmony_ci unsigned int *pDecoded, unsigned int *pout_length) 120e1051a39Sopenharmony_ci{ 121e1051a39Sopenharmony_ci unsigned int n = initial_n; 122e1051a39Sopenharmony_ci unsigned int i = 0; 123e1051a39Sopenharmony_ci unsigned int bias = initial_bias; 124e1051a39Sopenharmony_ci size_t processed_in = 0, written_out = 0; 125e1051a39Sopenharmony_ci unsigned int max_out = *pout_length; 126e1051a39Sopenharmony_ci unsigned int basic_count = 0; 127e1051a39Sopenharmony_ci unsigned int loop; 128e1051a39Sopenharmony_ci 129e1051a39Sopenharmony_ci for (loop = 0; loop < enc_len; loop++) { 130e1051a39Sopenharmony_ci if (pEncoded[loop] == delimiter) 131e1051a39Sopenharmony_ci basic_count = loop; 132e1051a39Sopenharmony_ci } 133e1051a39Sopenharmony_ci 134e1051a39Sopenharmony_ci if (basic_count > 0) { 135e1051a39Sopenharmony_ci if (basic_count > max_out) 136e1051a39Sopenharmony_ci return 0; 137e1051a39Sopenharmony_ci 138e1051a39Sopenharmony_ci for (loop = 0; loop < basic_count; loop++) { 139e1051a39Sopenharmony_ci if (is_basic(pEncoded[loop]) == 0) 140e1051a39Sopenharmony_ci return 0; 141e1051a39Sopenharmony_ci 142e1051a39Sopenharmony_ci pDecoded[loop] = pEncoded[loop]; 143e1051a39Sopenharmony_ci written_out++; 144e1051a39Sopenharmony_ci } 145e1051a39Sopenharmony_ci processed_in = basic_count + 1; 146e1051a39Sopenharmony_ci } 147e1051a39Sopenharmony_ci 148e1051a39Sopenharmony_ci for (loop = processed_in; loop < enc_len;) { 149e1051a39Sopenharmony_ci unsigned int oldi = i; 150e1051a39Sopenharmony_ci unsigned int w = 1; 151e1051a39Sopenharmony_ci unsigned int k, t; 152e1051a39Sopenharmony_ci int digit; 153e1051a39Sopenharmony_ci 154e1051a39Sopenharmony_ci for (k = base;; k += base) { 155e1051a39Sopenharmony_ci if (loop >= enc_len) 156e1051a39Sopenharmony_ci return 0; 157e1051a39Sopenharmony_ci 158e1051a39Sopenharmony_ci digit = digit_decoded(pEncoded[loop]); 159e1051a39Sopenharmony_ci loop++; 160e1051a39Sopenharmony_ci 161e1051a39Sopenharmony_ci if (digit < 0) 162e1051a39Sopenharmony_ci return 0; 163e1051a39Sopenharmony_ci if ((unsigned int)digit > (maxint - i) / w) 164e1051a39Sopenharmony_ci return 0; 165e1051a39Sopenharmony_ci 166e1051a39Sopenharmony_ci i = i + digit * w; 167e1051a39Sopenharmony_ci t = (k <= bias) ? tmin : (k >= bias + tmax) ? tmax : k - bias; 168e1051a39Sopenharmony_ci 169e1051a39Sopenharmony_ci if ((unsigned int)digit < t) 170e1051a39Sopenharmony_ci break; 171e1051a39Sopenharmony_ci 172e1051a39Sopenharmony_ci if (w > maxint / (base - t)) 173e1051a39Sopenharmony_ci return 0; 174e1051a39Sopenharmony_ci w = w * (base - t); 175e1051a39Sopenharmony_ci } 176e1051a39Sopenharmony_ci 177e1051a39Sopenharmony_ci bias = adapt(i - oldi, written_out + 1, (oldi == 0)); 178e1051a39Sopenharmony_ci if (i / (written_out + 1) > maxint - n) 179e1051a39Sopenharmony_ci return 0; 180e1051a39Sopenharmony_ci n = n + i / (written_out + 1); 181e1051a39Sopenharmony_ci i %= (written_out + 1); 182e1051a39Sopenharmony_ci 183e1051a39Sopenharmony_ci if (written_out >= max_out) 184e1051a39Sopenharmony_ci return 0; 185e1051a39Sopenharmony_ci 186e1051a39Sopenharmony_ci memmove(pDecoded + i + 1, pDecoded + i, 187e1051a39Sopenharmony_ci (written_out - i) * sizeof(*pDecoded)); 188e1051a39Sopenharmony_ci pDecoded[i] = n; 189e1051a39Sopenharmony_ci i++; 190e1051a39Sopenharmony_ci written_out++; 191e1051a39Sopenharmony_ci } 192e1051a39Sopenharmony_ci 193e1051a39Sopenharmony_ci *pout_length = written_out; 194e1051a39Sopenharmony_ci return 1; 195e1051a39Sopenharmony_ci} 196e1051a39Sopenharmony_ci 197e1051a39Sopenharmony_ci/* 198e1051a39Sopenharmony_ci * Encode a code point using UTF-8 199e1051a39Sopenharmony_ci * return number of bytes on success, 0 on failure 200e1051a39Sopenharmony_ci * (also produces U+FFFD, which uses 3 bytes on failure) 201e1051a39Sopenharmony_ci */ 202e1051a39Sopenharmony_cistatic int codepoint2utf8(unsigned char *out, unsigned long utf) 203e1051a39Sopenharmony_ci{ 204e1051a39Sopenharmony_ci if (utf <= 0x7F) { 205e1051a39Sopenharmony_ci /* Plain ASCII */ 206e1051a39Sopenharmony_ci out[0] = (unsigned char)utf; 207e1051a39Sopenharmony_ci out[1] = 0; 208e1051a39Sopenharmony_ci return 1; 209e1051a39Sopenharmony_ci } else if (utf <= 0x07FF) { 210e1051a39Sopenharmony_ci /* 2-byte unicode */ 211e1051a39Sopenharmony_ci out[0] = (unsigned char)(((utf >> 6) & 0x1F) | 0xC0); 212e1051a39Sopenharmony_ci out[1] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80); 213e1051a39Sopenharmony_ci out[2] = 0; 214e1051a39Sopenharmony_ci return 2; 215e1051a39Sopenharmony_ci } else if (utf <= 0xFFFF) { 216e1051a39Sopenharmony_ci /* 3-byte unicode */ 217e1051a39Sopenharmony_ci out[0] = (unsigned char)(((utf >> 12) & 0x0F) | 0xE0); 218e1051a39Sopenharmony_ci out[1] = (unsigned char)(((utf >> 6) & 0x3F) | 0x80); 219e1051a39Sopenharmony_ci out[2] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80); 220e1051a39Sopenharmony_ci out[3] = 0; 221e1051a39Sopenharmony_ci return 3; 222e1051a39Sopenharmony_ci } else if (utf <= 0x10FFFF) { 223e1051a39Sopenharmony_ci /* 4-byte unicode */ 224e1051a39Sopenharmony_ci out[0] = (unsigned char)(((utf >> 18) & 0x07) | 0xF0); 225e1051a39Sopenharmony_ci out[1] = (unsigned char)(((utf >> 12) & 0x3F) | 0x80); 226e1051a39Sopenharmony_ci out[2] = (unsigned char)(((utf >> 6) & 0x3F) | 0x80); 227e1051a39Sopenharmony_ci out[3] = (unsigned char)(((utf >> 0) & 0x3F) | 0x80); 228e1051a39Sopenharmony_ci out[4] = 0; 229e1051a39Sopenharmony_ci return 4; 230e1051a39Sopenharmony_ci } else { 231e1051a39Sopenharmony_ci /* error - use replacement character */ 232e1051a39Sopenharmony_ci out[0] = (unsigned char)0xEF; 233e1051a39Sopenharmony_ci out[1] = (unsigned char)0xBF; 234e1051a39Sopenharmony_ci out[2] = (unsigned char)0xBD; 235e1051a39Sopenharmony_ci out[3] = 0; 236e1051a39Sopenharmony_ci return 0; 237e1051a39Sopenharmony_ci } 238e1051a39Sopenharmony_ci} 239e1051a39Sopenharmony_ci 240e1051a39Sopenharmony_ci/*- 241e1051a39Sopenharmony_ci * Return values: 242e1051a39Sopenharmony_ci * 1 - ok, *outlen contains valid buf length 243e1051a39Sopenharmony_ci * 0 - ok but buf was too short, *outlen contains valid buf length 244e1051a39Sopenharmony_ci * -1 - bad string passed 245e1051a39Sopenharmony_ci */ 246e1051a39Sopenharmony_ci 247e1051a39Sopenharmony_ciint ossl_a2ulabel(const char *in, char *out, size_t *outlen) 248e1051a39Sopenharmony_ci{ 249e1051a39Sopenharmony_ci /*- 250e1051a39Sopenharmony_ci * Domain name has some parts consisting of ASCII chars joined with dot. 251e1051a39Sopenharmony_ci * If a part is shorter than 5 chars, it becomes U-label as is. 252e1051a39Sopenharmony_ci * If it does not start with xn--, it becomes U-label as is. 253e1051a39Sopenharmony_ci * Otherwise we try to decode it. 254e1051a39Sopenharmony_ci */ 255e1051a39Sopenharmony_ci char *outptr = out; 256e1051a39Sopenharmony_ci const char *inptr = in; 257e1051a39Sopenharmony_ci size_t size = 0, maxsize; 258e1051a39Sopenharmony_ci int result = 1; 259e1051a39Sopenharmony_ci unsigned int i, j; 260e1051a39Sopenharmony_ci unsigned int buf[LABEL_BUF_SIZE]; /* It's a hostname */ 261e1051a39Sopenharmony_ci 262e1051a39Sopenharmony_ci if (out == NULL) { 263e1051a39Sopenharmony_ci result = 0; 264e1051a39Sopenharmony_ci maxsize = 0; 265e1051a39Sopenharmony_ci } else { 266e1051a39Sopenharmony_ci maxsize = *outlen; 267e1051a39Sopenharmony_ci } 268e1051a39Sopenharmony_ci 269e1051a39Sopenharmony_ci#define PUSHC(c) \ 270e1051a39Sopenharmony_ci do \ 271e1051a39Sopenharmony_ci if (size++ < maxsize) \ 272e1051a39Sopenharmony_ci *outptr++ = c; \ 273e1051a39Sopenharmony_ci else \ 274e1051a39Sopenharmony_ci result = 0; \ 275e1051a39Sopenharmony_ci while (0) 276e1051a39Sopenharmony_ci 277e1051a39Sopenharmony_ci while (1) { 278e1051a39Sopenharmony_ci char *tmpptr = strchr(inptr, '.'); 279e1051a39Sopenharmony_ci size_t delta = tmpptr != NULL ? (size_t)(tmpptr - inptr) : strlen(inptr); 280e1051a39Sopenharmony_ci 281e1051a39Sopenharmony_ci if (strncmp(inptr, "xn--", 4) != 0) { 282e1051a39Sopenharmony_ci for (i = 0; i < delta + 1; i++) 283e1051a39Sopenharmony_ci PUSHC(inptr[i]); 284e1051a39Sopenharmony_ci } else { 285e1051a39Sopenharmony_ci unsigned int bufsize = LABEL_BUF_SIZE; 286e1051a39Sopenharmony_ci 287e1051a39Sopenharmony_ci if (ossl_punycode_decode(inptr + 4, delta - 4, buf, &bufsize) <= 0) 288e1051a39Sopenharmony_ci return -1; 289e1051a39Sopenharmony_ci 290e1051a39Sopenharmony_ci for (i = 0; i < bufsize; i++) { 291e1051a39Sopenharmony_ci unsigned char seed[6]; 292e1051a39Sopenharmony_ci size_t utfsize = codepoint2utf8(seed, buf[i]); 293e1051a39Sopenharmony_ci 294e1051a39Sopenharmony_ci if (utfsize == 0) 295e1051a39Sopenharmony_ci return -1; 296e1051a39Sopenharmony_ci 297e1051a39Sopenharmony_ci for (j = 0; j < utfsize; j++) 298e1051a39Sopenharmony_ci PUSHC(seed[j]); 299e1051a39Sopenharmony_ci } 300e1051a39Sopenharmony_ci 301e1051a39Sopenharmony_ci PUSHC(tmpptr != NULL ? '.' : '\0'); 302e1051a39Sopenharmony_ci } 303e1051a39Sopenharmony_ci 304e1051a39Sopenharmony_ci if (tmpptr == NULL) 305e1051a39Sopenharmony_ci break; 306e1051a39Sopenharmony_ci 307e1051a39Sopenharmony_ci inptr = tmpptr + 1; 308e1051a39Sopenharmony_ci } 309e1051a39Sopenharmony_ci#undef PUSHC 310e1051a39Sopenharmony_ci 311e1051a39Sopenharmony_ci *outlen = size; 312e1051a39Sopenharmony_ci return result; 313e1051a39Sopenharmony_ci} 314e1051a39Sopenharmony_ci 315e1051a39Sopenharmony_ci/*- 316e1051a39Sopenharmony_ci * a MUST be A-label 317e1051a39Sopenharmony_ci * u MUST be U-label 318e1051a39Sopenharmony_ci * Returns 0 if compared values are equal 319e1051a39Sopenharmony_ci * 1 if not 320e1051a39Sopenharmony_ci * -1 in case of errors 321e1051a39Sopenharmony_ci */ 322e1051a39Sopenharmony_ci 323e1051a39Sopenharmony_ciint ossl_a2ucompare(const char *a, const char *u) 324e1051a39Sopenharmony_ci{ 325e1051a39Sopenharmony_ci char a_ulabel[LABEL_BUF_SIZE + 1]; 326e1051a39Sopenharmony_ci size_t a_size = sizeof(a_ulabel); 327e1051a39Sopenharmony_ci 328e1051a39Sopenharmony_ci if (ossl_a2ulabel(a, a_ulabel, &a_size) <= 0) 329e1051a39Sopenharmony_ci return -1; 330e1051a39Sopenharmony_ci 331e1051a39Sopenharmony_ci return strcmp(a_ulabel, u) != 0; 332e1051a39Sopenharmony_ci} 333