1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6*   Copyright (C) 1997-2011, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*
9******************************************************************************
10*
11* File CSTRING.C
12*
13* @author       Helena Shih
14*
15* Modification History:
16*
17*   Date        Name        Description
18*   6/18/98     hshih       Created
19*   09/08/98    stephen     Added include for ctype, for Mac Port
20*   11/15/99    helena      Integrated S/390 IEEE changes.
21******************************************************************************
22*/
23
24
25
26#include <stdlib.h>
27#include <stdio.h>
28#include "unicode/utypes.h"
29#include "cmemory.h"
30#include "cstring.h"
31#include "uassert.h"
32
33/*
34 * We hardcode case conversion for invariant characters to match our expectation
35 * and the compiler execution charset.
36 * This prevents problems on systems
37 * - with non-default casing behavior, like Turkish system locales where
38 *   tolower('I') maps to dotless i and toupper('i') maps to dotted I
39 * - where there are no lowercase Latin characters at all, or using different
40 *   codes (some old EBCDIC codepages)
41 *
42 * This works because the compiler usually runs on a platform where the execution
43 * charset includes all of the invariant characters at their expected
44 * code positions, so that the char * string literals in ICU code match
45 * the char literals here.
46 *
47 * Note that the set of lowercase Latin letters is discontiguous in EBCDIC
48 * and the set of uppercase Latin letters is discontiguous as well.
49 */
50
51U_CAPI UBool U_EXPORT2
52uprv_isASCIILetter(char c) {
53#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
54    return
55        ('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z') ||
56        ('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z');
57#else
58    return ('a'<=c && c<='z') || ('A'<=c && c<='Z');
59#endif
60}
61
62U_CAPI char U_EXPORT2
63uprv_toupper(char c) {
64#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
65    if(('a'<=c && c<='i') || ('j'<=c && c<='r') || ('s'<=c && c<='z')) {
66        c=(char)(c+('A'-'a'));
67    }
68#else
69    if('a'<=c && c<='z') {
70        c=(char)(c+('A'-'a'));
71    }
72#endif
73    return c;
74}
75
76
77#if 0
78/*
79 * Commented out because cstring.h defines uprv_tolower() to be
80 * the same as either uprv_asciitolower() or uprv_ebcdictolower()
81 * to reduce the amount of code to cover with tests.
82 *
83 * Note that this uprv_tolower() definition is likely to work for most
84 * charset families, not just ASCII and EBCDIC, because its #else branch
85 * is written generically.
86 */
87U_CAPI char U_EXPORT2
88uprv_tolower(char c) {
89#if U_CHARSET_FAMILY==U_EBCDIC_FAMILY
90    if(('A'<=c && c<='I') || ('J'<=c && c<='R') || ('S'<=c && c<='Z')) {
91        c=(char)(c+('a'-'A'));
92    }
93#else
94    if('A'<=c && c<='Z') {
95        c=(char)(c+('a'-'A'));
96    }
97#endif
98    return c;
99}
100#endif
101
102U_CAPI char U_EXPORT2
103uprv_asciitolower(char c) {
104    if(0x41<=c && c<=0x5a) {
105        c=(char)(c+0x20);
106    }
107    return c;
108}
109
110U_CAPI char U_EXPORT2
111uprv_ebcdictolower(char c) {
112    if( (0xc1<=(uint8_t)c && (uint8_t)c<=0xc9) ||
113        (0xd1<=(uint8_t)c && (uint8_t)c<=0xd9) ||
114        (0xe2<=(uint8_t)c && (uint8_t)c<=0xe9)
115    ) {
116        c=(char)(c-0x40);
117    }
118    return c;
119}
120
121
122U_CAPI char* U_EXPORT2
123T_CString_toLowerCase(char* str)
124{
125    char* origPtr = str;
126
127    if (str) {
128        do
129            *str = (char)uprv_tolower(*str);
130        while (*(str++));
131    }
132
133    return origPtr;
134}
135
136U_CAPI char* U_EXPORT2
137T_CString_toUpperCase(char* str)
138{
139    char* origPtr = str;
140
141    if (str) {
142        do
143            *str = (char)uprv_toupper(*str);
144        while (*(str++));
145    }
146
147    return origPtr;
148}
149
150/*
151 * Takes a int32_t and fills in  a char* string with that number "radix"-based.
152 * Does not handle negative values (makes an empty string for them).
153 * Writes at most 12 chars ("-2147483647" plus NUL).
154 * Returns the length of the string (not including the NUL).
155 */
156U_CAPI int32_t U_EXPORT2
157T_CString_integerToString(char* buffer, int32_t v, int32_t radix)
158{
159    char      tbuf[30];
160    int32_t   tbx    = sizeof(tbuf);
161    uint8_t   digit;
162    int32_t   length = 0;
163    uint32_t  uval;
164
165    U_ASSERT(radix>=2 && radix<=16);
166    uval = (uint32_t) v;
167    if(v<0 && radix == 10) {
168        /* Only in base 10 do we conside numbers to be signed. */
169        uval = (uint32_t)(-v);
170        buffer[length++] = '-';
171    }
172
173    tbx = sizeof(tbuf)-1;
174    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
175    do {
176        digit = (uint8_t)(uval % radix);
177        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
178        uval  = uval / radix;
179    } while (uval != 0);
180
181    /* copy converted number into user buffer  */
182    uprv_strcpy(buffer+length, tbuf+tbx);
183    length += sizeof(tbuf) - tbx -1;
184    return length;
185}
186
187
188
189/*
190 * Takes a int64_t and fills in  a char* string with that number "radix"-based.
191 * Writes at most 21: chars ("-9223372036854775807" plus NUL).
192 * Returns the length of the string, not including the terminating NULL.
193 */
194U_CAPI int32_t U_EXPORT2
195T_CString_int64ToString(char* buffer, int64_t v, uint32_t radix)
196{
197    char      tbuf[30];
198    int32_t   tbx    = sizeof(tbuf);
199    uint8_t   digit;
200    int32_t   length = 0;
201    uint64_t  uval;
202
203    U_ASSERT(radix>=2 && radix<=16);
204    uval = (uint64_t) v;
205    if(v<0 && radix == 10) {
206        /* Only in base 10 do we conside numbers to be signed. */
207        uval = (uint64_t)(-v);
208        buffer[length++] = '-';
209    }
210
211    tbx = sizeof(tbuf)-1;
212    tbuf[tbx] = 0;   /* We are generating the digits backwards.  Null term the end. */
213    do {
214        digit = (uint8_t)(uval % radix);
215        tbuf[--tbx] = (char)(T_CString_itosOffset(digit));
216        uval  = uval / radix;
217    } while (uval != 0);
218
219    /* copy converted number into user buffer  */
220    uprv_strcpy(buffer+length, tbuf+tbx);
221    length += sizeof(tbuf) - tbx -1;
222    return length;
223}
224
225
226U_CAPI int32_t U_EXPORT2
227T_CString_stringToInteger(const char *integerString, int32_t radix)
228{
229    char *end;
230    return uprv_strtoul(integerString, &end, radix);
231
232}
233
234U_CAPI int U_EXPORT2
235uprv_stricmp(const char *str1, const char *str2) {
236    if(str1==NULL) {
237        if(str2==NULL) {
238            return 0;
239        } else {
240            return -1;
241        }
242    } else if(str2==NULL) {
243        return 1;
244    } else {
245        /* compare non-NULL strings lexically with lowercase */
246        int rc;
247        unsigned char c1, c2;
248
249        for(;;) {
250            c1=(unsigned char)*str1;
251            c2=(unsigned char)*str2;
252            if(c1==0) {
253                if(c2==0) {
254                    return 0;
255                } else {
256                    return -1;
257                }
258            } else if(c2==0) {
259                return 1;
260            } else {
261                /* compare non-zero characters with lowercase */
262                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
263                if(rc!=0) {
264                    return rc;
265                }
266            }
267            ++str1;
268            ++str2;
269        }
270    }
271}
272
273U_CAPI int U_EXPORT2
274uprv_strnicmp(const char *str1, const char *str2, uint32_t n) {
275    if(str1==NULL) {
276        if(str2==NULL) {
277            return 0;
278        } else {
279            return -1;
280        }
281    } else if(str2==NULL) {
282        return 1;
283    } else {
284        /* compare non-NULL strings lexically with lowercase */
285        int rc;
286        unsigned char c1, c2;
287
288        for(; n--;) {
289            c1=(unsigned char)*str1;
290            c2=(unsigned char)*str2;
291            if(c1==0) {
292                if(c2==0) {
293                    return 0;
294                } else {
295                    return -1;
296                }
297            } else if(c2==0) {
298                return 1;
299            } else {
300                /* compare non-zero characters with lowercase */
301                rc=(int)(unsigned char)uprv_tolower(c1)-(int)(unsigned char)uprv_tolower(c2);
302                if(rc!=0) {
303                    return rc;
304                }
305            }
306            ++str1;
307            ++str2;
308        }
309    }
310
311    return 0;
312}
313
314U_CAPI char* U_EXPORT2
315uprv_strdup(const char *src) {
316    size_t len = uprv_strlen(src) + 1;
317    char *dup = (char *) uprv_malloc(len);
318
319    if (dup) {
320        uprv_memcpy(dup, src, len);
321    }
322
323    return dup;
324}
325
326U_CAPI char* U_EXPORT2
327uprv_strndup(const char *src, int32_t n) {
328    char *dup;
329
330    if(n < 0) {
331        dup = uprv_strdup(src);
332    } else {
333        dup = (char*)uprv_malloc(n+1);
334        if (dup) {
335            uprv_memcpy(dup, src, n);
336            dup[n] = 0;
337        }
338    }
339
340    return dup;
341}
342