1c72fcc34Sopenharmony_ci/* 2c72fcc34Sopenharmony_ci * Copyright (C) 2004-2005 Kay Sievers <kay.sievers@vrfy.org> 3c72fcc34Sopenharmony_ci * 4c72fcc34Sopenharmony_ci * This program is free software; you can redistribute it and/or modify it 5c72fcc34Sopenharmony_ci * under the terms of the GNU General Public License as published by the 6c72fcc34Sopenharmony_ci * Free Software Foundation version 2 of the License. 7c72fcc34Sopenharmony_ci * 8c72fcc34Sopenharmony_ci * This program is distributed in the hope that it will be useful, but 9c72fcc34Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of 10c72fcc34Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11c72fcc34Sopenharmony_ci * General Public License for more details. 12c72fcc34Sopenharmony_ci * 13c72fcc34Sopenharmony_ci * You should have received a copy of the GNU General Public License along 14c72fcc34Sopenharmony_ci * with this program; if not, write to the Free Software Foundation, Inc., 15c72fcc34Sopenharmony_ci * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16c72fcc34Sopenharmony_ci * 17c72fcc34Sopenharmony_ci */ 18c72fcc34Sopenharmony_ci 19c72fcc34Sopenharmony_ci 20c72fcc34Sopenharmony_cistatic void remove_trailing_chars(char *path, char c) 21c72fcc34Sopenharmony_ci{ 22c72fcc34Sopenharmony_ci size_t len; 23c72fcc34Sopenharmony_ci 24c72fcc34Sopenharmony_ci len = strlen(path); 25c72fcc34Sopenharmony_ci while (len > 0 && path[len-1] == c) 26c72fcc34Sopenharmony_ci path[--len] = '\0'; 27c72fcc34Sopenharmony_ci} 28c72fcc34Sopenharmony_ci 29c72fcc34Sopenharmony_ci/* count of characters used to encode one unicode char */ 30c72fcc34Sopenharmony_cistatic int utf8_encoded_expected_len(const char *str) 31c72fcc34Sopenharmony_ci{ 32c72fcc34Sopenharmony_ci unsigned char c = (unsigned char)str[0]; 33c72fcc34Sopenharmony_ci 34c72fcc34Sopenharmony_ci if (c < 0x80) 35c72fcc34Sopenharmony_ci return 1; 36c72fcc34Sopenharmony_ci if ((c & 0xe0) == 0xc0) 37c72fcc34Sopenharmony_ci return 2; 38c72fcc34Sopenharmony_ci if ((c & 0xf0) == 0xe0) 39c72fcc34Sopenharmony_ci return 3; 40c72fcc34Sopenharmony_ci if ((c & 0xf8) == 0xf0) 41c72fcc34Sopenharmony_ci return 4; 42c72fcc34Sopenharmony_ci if ((c & 0xfc) == 0xf8) 43c72fcc34Sopenharmony_ci return 5; 44c72fcc34Sopenharmony_ci if ((c & 0xfe) == 0xfc) 45c72fcc34Sopenharmony_ci return 6; 46c72fcc34Sopenharmony_ci return 0; 47c72fcc34Sopenharmony_ci} 48c72fcc34Sopenharmony_ci 49c72fcc34Sopenharmony_ci/* decode one unicode char */ 50c72fcc34Sopenharmony_cistatic int utf8_encoded_to_unichar(const char *str) 51c72fcc34Sopenharmony_ci{ 52c72fcc34Sopenharmony_ci int unichar; 53c72fcc34Sopenharmony_ci int len; 54c72fcc34Sopenharmony_ci int i; 55c72fcc34Sopenharmony_ci 56c72fcc34Sopenharmony_ci len = utf8_encoded_expected_len(str); 57c72fcc34Sopenharmony_ci switch (len) { 58c72fcc34Sopenharmony_ci case 1: 59c72fcc34Sopenharmony_ci return (int)str[0]; 60c72fcc34Sopenharmony_ci case 2: 61c72fcc34Sopenharmony_ci unichar = str[0] & 0x1f; 62c72fcc34Sopenharmony_ci break; 63c72fcc34Sopenharmony_ci case 3: 64c72fcc34Sopenharmony_ci unichar = (int)str[0] & 0x0f; 65c72fcc34Sopenharmony_ci break; 66c72fcc34Sopenharmony_ci case 4: 67c72fcc34Sopenharmony_ci unichar = (int)str[0] & 0x07; 68c72fcc34Sopenharmony_ci break; 69c72fcc34Sopenharmony_ci case 5: 70c72fcc34Sopenharmony_ci unichar = (int)str[0] & 0x03; 71c72fcc34Sopenharmony_ci break; 72c72fcc34Sopenharmony_ci case 6: 73c72fcc34Sopenharmony_ci unichar = (int)str[0] & 0x01; 74c72fcc34Sopenharmony_ci break; 75c72fcc34Sopenharmony_ci default: 76c72fcc34Sopenharmony_ci return -1; 77c72fcc34Sopenharmony_ci } 78c72fcc34Sopenharmony_ci 79c72fcc34Sopenharmony_ci for (i = 1; i < len; i++) { 80c72fcc34Sopenharmony_ci if (((int)str[i] & 0xc0) != 0x80) 81c72fcc34Sopenharmony_ci return -1; 82c72fcc34Sopenharmony_ci unichar <<= 6; 83c72fcc34Sopenharmony_ci unichar |= (int)str[i] & 0x3f; 84c72fcc34Sopenharmony_ci } 85c72fcc34Sopenharmony_ci 86c72fcc34Sopenharmony_ci return unichar; 87c72fcc34Sopenharmony_ci} 88c72fcc34Sopenharmony_ci 89c72fcc34Sopenharmony_ci/* expected size used to encode one unicode char */ 90c72fcc34Sopenharmony_cistatic int utf8_unichar_to_encoded_len(int unichar) 91c72fcc34Sopenharmony_ci{ 92c72fcc34Sopenharmony_ci if (unichar < 0x80) 93c72fcc34Sopenharmony_ci return 1; 94c72fcc34Sopenharmony_ci if (unichar < 0x800) 95c72fcc34Sopenharmony_ci return 2; 96c72fcc34Sopenharmony_ci if (unichar < 0x10000) 97c72fcc34Sopenharmony_ci return 3; 98c72fcc34Sopenharmony_ci if (unichar < 0x200000) 99c72fcc34Sopenharmony_ci return 4; 100c72fcc34Sopenharmony_ci if (unichar < 0x4000000) 101c72fcc34Sopenharmony_ci return 5; 102c72fcc34Sopenharmony_ci return 6; 103c72fcc34Sopenharmony_ci} 104c72fcc34Sopenharmony_ci 105c72fcc34Sopenharmony_ci/* check if unicode char has a valid numeric range */ 106c72fcc34Sopenharmony_cistatic int utf8_unichar_valid_range(int unichar) 107c72fcc34Sopenharmony_ci{ 108c72fcc34Sopenharmony_ci if (unichar > 0x10ffff) 109c72fcc34Sopenharmony_ci return 0; 110c72fcc34Sopenharmony_ci if ((unichar & 0xfffff800) == 0xd800) 111c72fcc34Sopenharmony_ci return 0; 112c72fcc34Sopenharmony_ci if ((unichar > 0xfdcf) && (unichar < 0xfdf0)) 113c72fcc34Sopenharmony_ci return 0; 114c72fcc34Sopenharmony_ci if ((unichar & 0xffff) == 0xffff) 115c72fcc34Sopenharmony_ci return 0; 116c72fcc34Sopenharmony_ci return 1; 117c72fcc34Sopenharmony_ci} 118c72fcc34Sopenharmony_ci 119c72fcc34Sopenharmony_ci/* validate one encoded unicode char and return its length */ 120c72fcc34Sopenharmony_cistatic int utf8_encoded_valid_unichar(const char *str) 121c72fcc34Sopenharmony_ci{ 122c72fcc34Sopenharmony_ci int len; 123c72fcc34Sopenharmony_ci int unichar; 124c72fcc34Sopenharmony_ci int i; 125c72fcc34Sopenharmony_ci 126c72fcc34Sopenharmony_ci len = utf8_encoded_expected_len(str); 127c72fcc34Sopenharmony_ci if (len == 0) 128c72fcc34Sopenharmony_ci return -1; 129c72fcc34Sopenharmony_ci 130c72fcc34Sopenharmony_ci /* ascii is valid */ 131c72fcc34Sopenharmony_ci if (len == 1) 132c72fcc34Sopenharmony_ci return 1; 133c72fcc34Sopenharmony_ci 134c72fcc34Sopenharmony_ci /* check if expected encoded chars are available */ 135c72fcc34Sopenharmony_ci for (i = 0; i < len; i++) 136c72fcc34Sopenharmony_ci if ((str[i] & 0x80) != 0x80) 137c72fcc34Sopenharmony_ci return -1; 138c72fcc34Sopenharmony_ci 139c72fcc34Sopenharmony_ci unichar = utf8_encoded_to_unichar(str); 140c72fcc34Sopenharmony_ci 141c72fcc34Sopenharmony_ci /* check if encoded length matches encoded value */ 142c72fcc34Sopenharmony_ci if (utf8_unichar_to_encoded_len(unichar) != len) 143c72fcc34Sopenharmony_ci return -1; 144c72fcc34Sopenharmony_ci 145c72fcc34Sopenharmony_ci /* check if value has valid range */ 146c72fcc34Sopenharmony_ci if (!utf8_unichar_valid_range(unichar)) 147c72fcc34Sopenharmony_ci return -1; 148c72fcc34Sopenharmony_ci 149c72fcc34Sopenharmony_ci return len; 150c72fcc34Sopenharmony_ci} 151c72fcc34Sopenharmony_ci 152c72fcc34Sopenharmony_ci/* replace everything but whitelisted plain ascii and valid utf8 */ 153c72fcc34Sopenharmony_cistatic int replace_untrusted_chars(char *str) 154c72fcc34Sopenharmony_ci{ 155c72fcc34Sopenharmony_ci size_t i = 0; 156c72fcc34Sopenharmony_ci int replaced = 0; 157c72fcc34Sopenharmony_ci 158c72fcc34Sopenharmony_ci while (str[i] != '\0') { 159c72fcc34Sopenharmony_ci int len; 160c72fcc34Sopenharmony_ci 161c72fcc34Sopenharmony_ci /* valid printable ascii char */ 162c72fcc34Sopenharmony_ci if ((str[i] >= '0' && str[i] <= '9') || 163c72fcc34Sopenharmony_ci (str[i] >= 'A' && str[i] <= 'Z') || 164c72fcc34Sopenharmony_ci (str[i] >= 'a' && str[i] <= 'z') || 165c72fcc34Sopenharmony_ci strchr(" #$%+-./:=?@_,", str[i])) { 166c72fcc34Sopenharmony_ci i++; 167c72fcc34Sopenharmony_ci continue; 168c72fcc34Sopenharmony_ci } 169c72fcc34Sopenharmony_ci /* valid utf8 is accepted */ 170c72fcc34Sopenharmony_ci len = utf8_encoded_valid_unichar(&str[i]); 171c72fcc34Sopenharmony_ci if (len > 1) { 172c72fcc34Sopenharmony_ci i += len; 173c72fcc34Sopenharmony_ci continue; 174c72fcc34Sopenharmony_ci } 175c72fcc34Sopenharmony_ci 176c72fcc34Sopenharmony_ci /* everything else is garbage */ 177c72fcc34Sopenharmony_ci str[i] = '_'; 178c72fcc34Sopenharmony_ci i++; 179c72fcc34Sopenharmony_ci replaced++; 180c72fcc34Sopenharmony_ci } 181c72fcc34Sopenharmony_ci 182c72fcc34Sopenharmony_ci return replaced; 183c72fcc34Sopenharmony_ci} 184