1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 *   Copyright (C) 1999-2009, International Business Machines
6 *   Corporation and others.  All Rights Reserved.
7 **********************************************************************
8  *
9  *
10  *   ucnv_err.h:
11  */
12 
13 /**
14  * \file
15  * \brief C API: UConverter predefined error callbacks
16  *
17  *  <h2>Error Behaviour Functions</h2>
18  *  Defines some error behaviour functions called by ucnv_{from,to}Unicode
19  *  These are provided as part of ICU and many are stable, but they
20  *  can also be considered only as an example of what can be done with
21  *  callbacks.  You may of course write your own.
22  *
23  *  If you want to write your own, you may also find the functions from
24  *  ucnv_cb.h useful when writing your own callbacks.
25  *
26  *  These functions, although public, should NEVER be called directly.
27  *  They should be used as parameters to the ucnv_setFromUCallback
28  *  and ucnv_setToUCallback functions, to set the behaviour of a converter
29  *  when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
30  *
31  *  usage example:  'STOP' doesn't need any context, but newContext
32  *    could be set to something other than 'NULL' if needed. The available
33  *    contexts in this header can modify the default behavior of the callback.
34  *
35  *  \code
36  *  UErrorCode err = U_ZERO_ERROR;
37  *  UConverter *myConverter = ucnv_open("ibm-949", &err);
38  *  const void *oldContext;
39  *  UConverterFromUCallback oldAction;
40  *
41  *
42  *  if (U_SUCCESS(err))
43  *  {
44  *      ucnv_setFromUCallBack(myConverter,
45  *                       UCNV_FROM_U_CALLBACK_STOP,
46  *                       NULL,
47  *                       &oldAction,
48  *                       &oldContext,
49  *                       &status);
50  *  }
51  *  \endcode
52  *
53  *  The code above tells "myConverter" to stop when it encounters an
54  *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
55  *  Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
56  *  and ucnv_setToUCallBack would need to be called in order to change
57  *  that behavior too.
58  *
59  *  Here is an example with a context:
60  *
61  *  \code
62  *  UErrorCode err = U_ZERO_ERROR;
63  *  UConverter *myConverter = ucnv_open("ibm-949", &err);
64  *  const void *oldContext;
65  *  UConverterFromUCallback oldAction;
66  *
67  *
68  *  if (U_SUCCESS(err))
69  *  {
70  *      ucnv_setToUCallBack(myConverter,
71  *                       UCNV_TO_U_CALLBACK_SUBSTITUTE,
72  *                       UCNV_SUB_STOP_ON_ILLEGAL,
73  *                       &oldAction,
74  *                       &oldContext,
75  *                       &status);
76  *  }
77  *  \endcode
78  *
79  *  The code above tells "myConverter" to stop when it encounters an
80  *  ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
81  *  Codepage -> Unicode. Any unmapped and legal characters will be
82  *  substituted to be the default substitution character.
83  */
84 
85 #ifndef UCNV_ERR_H
86 #define UCNV_ERR_H
87 
88 #include "unicode/utypes.h"
89 
90 #if !UCONFIG_NO_CONVERSION
91 
92 /** Forward declaring the UConverter structure. @stable ICU 2.0 */
93 struct UConverter;
94 
95 /** @stable ICU 2.0 */
96 typedef struct UConverter UConverter;
97 
98 /**
99  * FROM_U, TO_U context options for sub callback
100  * @stable ICU 2.0
101  */
102 #define UCNV_SUB_STOP_ON_ILLEGAL "i"
103 
104 /**
105  * FROM_U, TO_U context options for skip callback
106  * @stable ICU 2.0
107  */
108 #define UCNV_SKIP_STOP_ON_ILLEGAL "i"
109 
110 /**
111  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
112  * @stable ICU 2.0
113  */
114 #define UCNV_ESCAPE_ICU       NULL
115 /**
116  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
117  * @stable ICU 2.0
118  */
119 #define UCNV_ESCAPE_JAVA      "J"
120 /**
121  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
122  * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
123  * @stable ICU 2.0
124  */
125 #define UCNV_ESCAPE_C         "C"
126 /**
127  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
128  * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
129  * @stable ICU 2.0
130  */
131 #define UCNV_ESCAPE_XML_DEC   "D"
132 /**
133  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
134  * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
135  * @stable ICU 2.0
136  */
137 #define UCNV_ESCAPE_XML_HEX   "X"
138 /**
139  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
140  * @stable ICU 2.0
141  */
142 #define UCNV_ESCAPE_UNICODE   "U"
143 
144 /**
145  * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
146  * a backslash, 1..6 hex digits, and a space)
147  * @stable ICU 4.0
148  */
149 #define UCNV_ESCAPE_CSS2   "S"
150 
151 /**
152  * The process condition code to be used with the callbacks.
153  * Codes which are greater than UCNV_IRREGULAR should be
154  * passed on to any chained callbacks.
155  * @stable ICU 2.0
156  */
157 typedef enum {
158     UCNV_UNASSIGNED = 0,  /**< The code point is unassigned.
159                              The error code U_INVALID_CHAR_FOUND will be set. */
160     UCNV_ILLEGAL = 1,     /**< The code point is illegal. For example,
161                              \\x81\\x2E is illegal in SJIS because \\x2E
162                              is not a valid trail byte for the \\x81
163                              lead byte.
164                              Also, starting with Unicode 3.0.1, non-shortest byte sequences
165                              in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
166                              are also illegal, not just irregular.
167                              The error code U_ILLEGAL_CHAR_FOUND will be set. */
168     UCNV_IRREGULAR = 2,   /**< The codepoint is not a regular sequence in
169                              the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
170                              are irregular UTF-8 byte sequences for single surrogate
171                              code points.
172                              The error code U_INVALID_CHAR_FOUND will be set. */
173     UCNV_RESET = 3,       /**< The callback is called with this reason when a
174                              'reset' has occurred. Callback should reset all
175                              state. */
176     UCNV_CLOSE = 4,        /**< Called when the converter is closed. The
177                              callback should release any allocated memory.*/
178     UCNV_CLONE = 5         /**< Called when ucnv_safeClone() is called on the
179                               converter. the pointer available as the
180                               'context' is an alias to the original converters'
181                               context pointer. If the context must be owned
182                               by the new converter, the callback must clone
183                               the data and call ucnv_setFromUCallback
184                               (or setToUCallback) with the correct pointer.
185                               @stable ICU 2.2
186                            */
187 } UConverterCallbackReason;
188 
189 
190 /**
191  * The structure for the fromUnicode callback function parameter.
192  * @stable ICU 2.0
193  */
194 typedef struct {
195     uint16_t size;              /**< The size of this struct. @stable ICU 2.0 */
196     UBool flush;                /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0    */
197     UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0  */
198     const UChar *source;        /**< Pointer to the source source buffer. @stable ICU 2.0    */
199     const UChar *sourceLimit;   /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
200     char *target;               /**< Pointer to the target buffer. @stable ICU 2.0    */
201     const char *targetLimit;    /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
202     int32_t *offsets;           /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
203 } UConverterFromUnicodeArgs;
204 
205 
206 /**
207  * The structure for the toUnicode callback function parameter.
208  * @stable ICU 2.0
209  */
210 typedef struct {
211     uint16_t size;              /**< The size of this struct   @stable ICU 2.0 */
212     UBool flush;                /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0   */
213     UConverter *converter;      /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
214     const char *source;         /**< Pointer to the source source buffer. @stable ICU 2.0    */
215     const char *sourceLimit;    /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0    */
216     UChar *target;              /**< Pointer to the target buffer. @stable ICU 2.0    */
217     const UChar *targetLimit;   /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0     */
218     int32_t *offsets;           /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0  */
219 } UConverterToUnicodeArgs;
220 
221 
222 
223 
224 
225 
226 
227 /**
228  * DO NOT CALL THIS FUNCTION DIRECTLY!
229  * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
230  * hexadecimal representation of the illegal bytes
231  *  (in the format  %XNN, e.g. "%XFF%X0A%XC8%X03").
232  *
233  * @param context This function currently recognizes the callback options:
234  *      UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
235  *      UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
236  * @param toUArgs Information about the conversion in progress
237  * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
238  * @param length Size (in bytes) of the concerned codepage sequence
239  * @param reason Defines the reason the callback was invoked
240  * @param err Return value will be set to success if the callback was handled,
241  *      otherwise this value will be set to a failure status.
242  * @stable ICU 2.0
243  */
244 
245 U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
246                   const void *context,
247                   UConverterToUnicodeArgs *toUArgs,
248                   const char* codeUnits,
249                   int32_t length,
250                   UConverterCallbackReason reason,
251                   UErrorCode * err);
252 
253 #endif
254 
255 #endif
256 
257 /*UCNV_ERR_H*/
258