1e1051a39Sopenharmony_ci/* 2e1051a39Sopenharmony_ci * Copyright 1995-2021 The OpenSSL Project Authors. All Rights Reserved. 3e1051a39Sopenharmony_ci * 4e1051a39Sopenharmony_ci * Licensed under the Apache License 2.0 (the "License"). You may not use 5e1051a39Sopenharmony_ci * this file except in compliance with the License. You can obtain a copy 6e1051a39Sopenharmony_ci * in the file LICENSE in the source distribution or at 7e1051a39Sopenharmony_ci * https://www.openssl.org/source/license.html 8e1051a39Sopenharmony_ci */ 9e1051a39Sopenharmony_ci 10e1051a39Sopenharmony_ci#include <stdio.h> 11e1051a39Sopenharmony_ci#include "internal/cryptlib.h" 12e1051a39Sopenharmony_ci#include "internal/unicode.h" 13e1051a39Sopenharmony_ci#include <openssl/asn1.h> 14e1051a39Sopenharmony_ci 15e1051a39Sopenharmony_ci/* UTF8 utilities */ 16e1051a39Sopenharmony_ci 17e1051a39Sopenharmony_ci/*- 18e1051a39Sopenharmony_ci * This parses a UTF8 string one character at a time. It is passed a pointer 19e1051a39Sopenharmony_ci * to the string and the length of the string. It sets 'value' to the value of 20e1051a39Sopenharmony_ci * the current character. It returns the number of characters read or a 21e1051a39Sopenharmony_ci * negative error code: 22e1051a39Sopenharmony_ci * -1 = string too short 23e1051a39Sopenharmony_ci * -2 = illegal character 24e1051a39Sopenharmony_ci * -3 = subsequent characters not of the form 10xxxxxx 25e1051a39Sopenharmony_ci * -4 = character encoded incorrectly (not minimal length). 26e1051a39Sopenharmony_ci */ 27e1051a39Sopenharmony_ci 28e1051a39Sopenharmony_ciint UTF8_getc(const unsigned char *str, int len, unsigned long *val) 29e1051a39Sopenharmony_ci{ 30e1051a39Sopenharmony_ci const unsigned char *p; 31e1051a39Sopenharmony_ci unsigned long value; 32e1051a39Sopenharmony_ci int ret; 33e1051a39Sopenharmony_ci if (len <= 0) 34e1051a39Sopenharmony_ci return 0; 35e1051a39Sopenharmony_ci p = str; 36e1051a39Sopenharmony_ci 37e1051a39Sopenharmony_ci /* Check syntax and work out the encoded value (if correct) */ 38e1051a39Sopenharmony_ci if ((*p & 0x80) == 0) { 39e1051a39Sopenharmony_ci value = *p++ & 0x7f; 40e1051a39Sopenharmony_ci ret = 1; 41e1051a39Sopenharmony_ci } else if ((*p & 0xe0) == 0xc0) { 42e1051a39Sopenharmony_ci if (len < 2) 43e1051a39Sopenharmony_ci return -1; 44e1051a39Sopenharmony_ci if ((p[1] & 0xc0) != 0x80) 45e1051a39Sopenharmony_ci return -3; 46e1051a39Sopenharmony_ci value = (*p++ & 0x1f) << 6; 47e1051a39Sopenharmony_ci value |= *p++ & 0x3f; 48e1051a39Sopenharmony_ci if (value < 0x80) 49e1051a39Sopenharmony_ci return -4; 50e1051a39Sopenharmony_ci ret = 2; 51e1051a39Sopenharmony_ci } else if ((*p & 0xf0) == 0xe0) { 52e1051a39Sopenharmony_ci if (len < 3) 53e1051a39Sopenharmony_ci return -1; 54e1051a39Sopenharmony_ci if (((p[1] & 0xc0) != 0x80) 55e1051a39Sopenharmony_ci || ((p[2] & 0xc0) != 0x80)) 56e1051a39Sopenharmony_ci return -3; 57e1051a39Sopenharmony_ci value = (*p++ & 0xf) << 12; 58e1051a39Sopenharmony_ci value |= (*p++ & 0x3f) << 6; 59e1051a39Sopenharmony_ci value |= *p++ & 0x3f; 60e1051a39Sopenharmony_ci if (value < 0x800) 61e1051a39Sopenharmony_ci return -4; 62e1051a39Sopenharmony_ci if (is_unicode_surrogate(value)) 63e1051a39Sopenharmony_ci return -2; 64e1051a39Sopenharmony_ci ret = 3; 65e1051a39Sopenharmony_ci } else if ((*p & 0xf8) == 0xf0) { 66e1051a39Sopenharmony_ci if (len < 4) 67e1051a39Sopenharmony_ci return -1; 68e1051a39Sopenharmony_ci if (((p[1] & 0xc0) != 0x80) 69e1051a39Sopenharmony_ci || ((p[2] & 0xc0) != 0x80) 70e1051a39Sopenharmony_ci || ((p[3] & 0xc0) != 0x80)) 71e1051a39Sopenharmony_ci return -3; 72e1051a39Sopenharmony_ci value = ((unsigned long)(*p++ & 0x7)) << 18; 73e1051a39Sopenharmony_ci value |= (*p++ & 0x3f) << 12; 74e1051a39Sopenharmony_ci value |= (*p++ & 0x3f) << 6; 75e1051a39Sopenharmony_ci value |= *p++ & 0x3f; 76e1051a39Sopenharmony_ci if (value < 0x10000) 77e1051a39Sopenharmony_ci return -4; 78e1051a39Sopenharmony_ci ret = 4; 79e1051a39Sopenharmony_ci } else 80e1051a39Sopenharmony_ci return -2; 81e1051a39Sopenharmony_ci *val = value; 82e1051a39Sopenharmony_ci return ret; 83e1051a39Sopenharmony_ci} 84e1051a39Sopenharmony_ci 85e1051a39Sopenharmony_ci/* 86e1051a39Sopenharmony_ci * This takes a character 'value' and writes the UTF8 encoded value in 'str' 87e1051a39Sopenharmony_ci * where 'str' is a buffer containing 'len' characters. Returns the number of 88e1051a39Sopenharmony_ci * characters written, -1 if 'len' is too small or -2 if 'value' is out of 89e1051a39Sopenharmony_ci * range. 'str' can be set to NULL in which case it just returns the number of 90e1051a39Sopenharmony_ci * characters. It will need at most 4 characters. 91e1051a39Sopenharmony_ci */ 92e1051a39Sopenharmony_ci 93e1051a39Sopenharmony_ciint UTF8_putc(unsigned char *str, int len, unsigned long value) 94e1051a39Sopenharmony_ci{ 95e1051a39Sopenharmony_ci if (!str) 96e1051a39Sopenharmony_ci len = 4; /* Maximum we will need */ 97e1051a39Sopenharmony_ci else if (len <= 0) 98e1051a39Sopenharmony_ci return -1; 99e1051a39Sopenharmony_ci if (value < 0x80) { 100e1051a39Sopenharmony_ci if (str) 101e1051a39Sopenharmony_ci *str = (unsigned char)value; 102e1051a39Sopenharmony_ci return 1; 103e1051a39Sopenharmony_ci } 104e1051a39Sopenharmony_ci if (value < 0x800) { 105e1051a39Sopenharmony_ci if (len < 2) 106e1051a39Sopenharmony_ci return -1; 107e1051a39Sopenharmony_ci if (str) { 108e1051a39Sopenharmony_ci *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); 109e1051a39Sopenharmony_ci *str = (unsigned char)((value & 0x3f) | 0x80); 110e1051a39Sopenharmony_ci } 111e1051a39Sopenharmony_ci return 2; 112e1051a39Sopenharmony_ci } 113e1051a39Sopenharmony_ci if (value < 0x10000) { 114e1051a39Sopenharmony_ci if (is_unicode_surrogate(value)) 115e1051a39Sopenharmony_ci return -2; 116e1051a39Sopenharmony_ci if (len < 3) 117e1051a39Sopenharmony_ci return -1; 118e1051a39Sopenharmony_ci if (str) { 119e1051a39Sopenharmony_ci *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0); 120e1051a39Sopenharmony_ci *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 121e1051a39Sopenharmony_ci *str = (unsigned char)((value & 0x3f) | 0x80); 122e1051a39Sopenharmony_ci } 123e1051a39Sopenharmony_ci return 3; 124e1051a39Sopenharmony_ci } 125e1051a39Sopenharmony_ci if (value < UNICODE_LIMIT) { 126e1051a39Sopenharmony_ci if (len < 4) 127e1051a39Sopenharmony_ci return -1; 128e1051a39Sopenharmony_ci if (str) { 129e1051a39Sopenharmony_ci *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0); 130e1051a39Sopenharmony_ci *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); 131e1051a39Sopenharmony_ci *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); 132e1051a39Sopenharmony_ci *str = (unsigned char)((value & 0x3f) | 0x80); 133e1051a39Sopenharmony_ci } 134e1051a39Sopenharmony_ci return 4; 135e1051a39Sopenharmony_ci } 136e1051a39Sopenharmony_ci return -2; 137e1051a39Sopenharmony_ci} 138