1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4****************************************************************************** 5* 6* Copyright (C) 1997-2011, International Business Machines 7* Corporation and others. All Rights Reserved. 8* 9****************************************************************************** 10* 11* File CSTRING.C 12* 13* @author Helena Shih 14* 15* Modification History: 16* 17* Date Name Description 18* 6/18/98 hshih Created 19* 09/08/98 stephen Added include for ctype, for Mac Port 20* 11/15/99 helena Integrated S/390 IEEE changes. 21****************************************************************************** 22*/ 23 24 25 26#include <stdlib.h> 27#include <stdio.h> 28#include "unicode/utypes.h" 29#include "cmemory.h" 30#include "cstring.h" 31#include "uassert.h" 32 33/* 34 * We hardcode case conversion for invariant characters to match our expectation 35 * and the compiler execution charset. 36 * This prevents problems on systems 37 * - with non-default casing behavior, like Turkish system locales where 38 * tolower('I') maps to dotless i and toupper('i') maps to dotted I 39 * - where there are no lowercase Latin characters at all, or using different 40 * codes (some old EBCDIC codepages) 41 * 42 * This works because the compiler usually runs on a platform where the execution 43 * charset includes all of the invariant characters at their expected 44 * code positions, so that the char * string literals in ICU code match 45 * the char literals here. 46 * 47 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC 48 * and the set of uppercase Latin letters is discontiguous as well. 49 */ 50 51U_CAPI UBool U_EXPORT2 52uprv_isASCIILetter(char c) { 53#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 54 return 55 ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') || 56 ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z'); 57#else 58 return ('a'<=c && c<='z') || ('A'<=c && c<='Z'); 59#endif 60} 61 62U_CAPI char U_EXPORT2 63uprv_toupper(char c) { 64#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 65 if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) { 66 c=(char)(c+('A'-'a')); 67 } 68#else 69 if('a'<=c && c<='z') { 70 c=(char)(c+('A'-'a')); 71 } 72#endif 73 return c; 74} 75 76 77#if 0 78/* 79 * Commented out because cstring.h defines uprv_tolower() to be 80 * the same as either uprv_asciitolower() or uprv_ebcdictolower() 81 * to reduce the amount of code to cover with tests. 82 * 83 * Note that this uprv_tolower() definition is likely to work for most 84 * charset families, not just ASCII and EBCDIC, because its #else branch 85 * is written generically. 86 */ 87U_CAPI char U_EXPORT2 88uprv_tolower(char c) { 89#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY 90 if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) { 91 c=(char)(c+('a'-'A')); 92 } 93#else 94 if('A'<=c && c<='Z') { 95 c=(char)(c+('a'-'A')); 96 } 97#endif 98 return c; 99} 100#endif 101 102U_CAPI char U_EXPORT2 103uprv_asciitolower(char c) { 104 if(0x41<=c && c<=0x5a) { 105 c=(char)(c+0x20); 106 } 107 return c; 108} 109 110U_CAPI char U_EXPORT2 111uprv_ebcdictolower(char c) { 112 if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) || 113 (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) || 114 (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9) 115 ) { 116 c=(char)(c-0x40); 117 } 118 return c; 119} 120 121 122U_CAPI char* U_EXPORT2 123T_CString_toLowerCase(char* str) 124{ 125 char* origPtr = str; 126 127 if (str) { 128 do 129 *str = (char)uprv_tolower(*str); 130 while (*(str++)); 131 } 132 133 return origPtr; 134} 135 136U_CAPI char* U_EXPORT2 137T_CString_toUpperCase(char* str) 138{ 139 char* origPtr = str; 140 141 if (str) { 142 do 143 *str = (char)uprv_toupper(*str); 144 while (*(str++)); 145 } 146 147 return origPtr; 148} 149 150/* 151 * Takes a int32_t and fills in a char* string with that number "radix"-based. 152 * Does not handle negative values (makes an empty string for them). 153 * Writes at most 12 chars ("-2147483647" plus NUL). 154 * Returns the length of the string (not including the NUL). 155 */ 156U_CAPI int32_t U_EXPORT2 157T_CString_integerToString(char* buffer, int32_t v, int32_t radix) 158{ 159 char tbuf[30]; 160 int32_t tbx = sizeof(tbuf); 161 uint8_t digit; 162 int32_t length = 0; 163 uint32_t uval; 164 165 U_ASSERT(radix>=2 && radix<=16); 166 uval = (uint32_t) v; 167 if(v<0 && radix == 10) { 168 /* Only in base 10 do we conside numbers to be signed. */ 169 uval = (uint32_t)(-v); 170 buffer[length++] = '-'; 171 } 172 173 tbx = sizeof(tbuf)-1; 174 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 175 do { 176 digit = (uint8_t)(uval % radix); 177 tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 178 uval = uval / radix; 179 } while (uval != 0); 180 181 /* copy converted number into user buffer */ 182 uprv_strcpy(buffer+length, tbuf+tbx); 183 length += sizeof(tbuf) - tbx -1; 184 return length; 185} 186 187 188 189/* 190 * Takes a int64_t and fills in a char* string with that number "radix"-based. 191 * Writes at most 21: chars ("-9223372036854775807" plus NUL). 192 * Returns the length of the string, not including the terminating NULL. 193 */ 194U_CAPI int32_t U_EXPORT2 195T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix) 196{ 197 char tbuf[30]; 198 int32_t tbx = sizeof(tbuf); 199 uint8_t digit; 200 int32_t length = 0; 201 uint64_t uval; 202 203 U_ASSERT(radix>=2 && radix<=16); 204 uval = (uint64_t) v; 205 if(v<0 && radix == 10) { 206 /* Only in base 10 do we conside numbers to be signed. */ 207 uval = (uint64_t)(-v); 208 buffer[length++] = '-'; 209 } 210 211 tbx = sizeof(tbuf)-1; 212 tbuf[tbx] = 0; /* We are generating the digits backwards. Null term the end. */ 213 do { 214 digit = (uint8_t)(uval % radix); 215 tbuf[--tbx] = (char)(T_CString_itosOffset(digit)); 216 uval = uval / radix; 217 } while (uval != 0); 218 219 /* copy converted number into user buffer */ 220 uprv_strcpy(buffer+length, tbuf+tbx); 221 length += sizeof(tbuf) - tbx -1; 222 return length; 223} 224 225 226U_CAPI int32_t U_EXPORT2 227T_CString_stringToInteger(const char *integerString, int32_t radix) 228{ 229 char *end; 230 return uprv_strtoul(integerString, &end, radix); 231 232} 233 234U_CAPI int U_EXPORT2 235uprv_stricmp(const char *str1, const char *str2) { 236 if(str1==NULL) { 237 if(str2==NULL) { 238 return 0; 239 } else { 240 return -1; 241 } 242 } else if(str2==NULL) { 243 return 1; 244 } else { 245 /* compare non-NULL strings lexically with lowercase */ 246 int rc; 247 unsigned char c1, c2; 248 249 for(;;) { 250 c1=(unsigned char)*str1; 251 c2=(unsigned char)*str2; 252 if(c1==0) { 253 if(c2==0) { 254 return 0; 255 } else { 256 return -1; 257 } 258 } else if(c2==0) { 259 return 1; 260 } else { 261 /* compare non-zero characters with lowercase */ 262 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 263 if(rc!=0) { 264 return rc; 265 } 266 } 267 ++str1; 268 ++str2; 269 } 270 } 271} 272 273U_CAPI int U_EXPORT2 274uprv_strnicmp(const char *str1, const char *str2, uint32_t n) { 275 if(str1==NULL) { 276 if(str2==NULL) { 277 return 0; 278 } else { 279 return -1; 280 } 281 } else if(str2==NULL) { 282 return 1; 283 } else { 284 /* compare non-NULL strings lexically with lowercase */ 285 int rc; 286 unsigned char c1, c2; 287 288 for(; n--;) { 289 c1=(unsigned char)*str1; 290 c2=(unsigned char)*str2; 291 if(c1==0) { 292 if(c2==0) { 293 return 0; 294 } else { 295 return -1; 296 } 297 } else if(c2==0) { 298 return 1; 299 } else { 300 /* compare non-zero characters with lowercase */ 301 rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2); 302 if(rc!=0) { 303 return rc; 304 } 305 } 306 ++str1; 307 ++str2; 308 } 309 } 310 311 return 0; 312} 313 314U_CAPI char* U_EXPORT2 315uprv_strdup(const char *src) { 316 size_t len = uprv_strlen(src) + 1; 317 char *dup = (char *) uprv_malloc(len); 318 319 if (dup) { 320 uprv_memcpy(dup, src, len); 321 } 322 323 return dup; 324} 325 326U_CAPI char* U_EXPORT2 327uprv_strndup(const char *src, int32_t n) { 328 char *dup; 329 330 if(n < 0) { 331 dup = uprv_strdup(src); 332 } else { 333 dup = (char*)uprv_malloc(n+1); 334 if (dup) { 335 uprv_memcpy(dup, src, n); 336 dup[n] = 0; 337 } 338 } 339 340 return dup; 341} 342