1 /*
2  * Copyright (c) 2023-2024 Huawei Device Co., Ltd.
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *     http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12  * See the License for the specific language governing permissions and
13  * limitations under the License.
14  */
15 
16 #include "anonymizer.h"
17 
18 #include <locale.h>
19 #include <securec.h>
20 #include <stdbool.h>
21 #include <stdint.h>
22 #include <wchar.h>
23 
24 #include "comm_log.h"
25 #include "softbus_error_code.h"
26 
27 #define DEVICE_NAME_MAX_LEN 128
28 #define WIDE_CHAR_MAX_LEN 8
29 
30 typedef struct {
31     bool (*Matcher)(const char *, uint32_t);
32     int32_t (*Anonymizer)(const char *, uint32_t, char **);
33 } AnonymizeHandler;
34 
35 static const char SYMBOL_ANONYMIZE = '*';
36 static const char SYMBOL_COLON = ':';
37 static const char SYMBOL_DASH = '-';
38 static const char SYMBOL_DOT = '.';
39 
InRange(char chr, char left, char right)40 static inline bool InRange(char chr, char left, char right)
41 {
42     return left <= chr && chr <= right;
43 }
44 
IsNum(char chr)45 static inline bool IsNum(char chr)
46 {
47     return InRange(chr, '0', '9');
48 }
49 
IsHex(char chr)50 static inline bool IsHex(char chr)
51 {
52     return IsNum(chr) || InRange(chr, 'A', 'F') || InRange(chr, 'a', 'f');
53 }
54 
IsAlphabet(char chr)55 static inline bool IsAlphabet(char chr)
56 {
57     return InRange(chr, 'A', 'Z') || InRange(chr, 'a', 'z');
58 }
59 
IsDot(char chr)60 static inline bool IsDot(char chr)
61 {
62     return chr == SYMBOL_DOT;
63 }
64 
IsColon(char chr)65 static inline bool IsColon(char chr)
66 {
67     return chr == SYMBOL_COLON;
68 }
69 
IsDash(char chr)70 static inline bool IsDash(char chr)
71 {
72     return chr == SYMBOL_DASH;
73 }
74 
FindChar(char chr, const char *str, uint32_t len, uint32_t startPos)75 static inline int32_t FindChar(char chr, const char *str, uint32_t len, uint32_t startPos)
76 {
77     for (uint32_t i = startPos; i < len; ++i) {
78         if (str[i] == chr) {
79             return (int32_t)i;
80         }
81     }
82     return -1; // not find
83 }
84 
IsValid(const char *str, const uint32_t *positions, uint32_t positionNum, bool(*isValidFunc)(char))85 static bool IsValid(const char *str, const uint32_t *positions, uint32_t positionNum, bool(*isValidFunc)(char))
86 {
87     for (uint32_t i = 0; i < positionNum; ++i) {
88         if (!isValidFunc(str[positions[i]])) {
89             return false;
90         }
91     }
92     return true;
93 }
94 
MatchEmpty(const char *str, uint32_t len)95 static bool MatchEmpty(const char *str, uint32_t len)
96 {
97     (void)str;
98     return len == 0;
99 }
100 
MatchIpAddr(const char *str, uint32_t len)101 static bool MatchIpAddr(const char *str, uint32_t len)
102 {
103     static const uint32_t DOT_NUM_MAX = 3;
104     static const int32_t NUM_LEN_MAX = 3;
105     static const int32_t NUM_LEN_MIN = 1;
106     static const uint32_t IP_ADDR_MAX_LEN = 15;
107 
108     if (len > IP_ADDR_MAX_LEN) {
109         return false;
110     }
111 
112     for (uint32_t i = 0; i < len; ++i) {
113         if (!IsNum(str[i]) && !IsDot(str[i])) {
114             return false;
115         }
116     }
117 
118     int32_t numLen = 0;
119     int32_t posPrevDot = -1;
120     int32_t posNextDot = -1;
121     for (uint32_t dotNum = 0; dotNum < DOT_NUM_MAX; ++dotNum) {
122         posNextDot = FindChar(SYMBOL_DOT, str, len, posPrevDot + 1);
123         numLen = posNextDot - posPrevDot - 1;
124         if (numLen < NUM_LEN_MIN || numLen > NUM_LEN_MAX) {
125             return false;
126         }
127         posPrevDot = posNextDot;
128     }
129     numLen = (int32_t)len - posPrevDot - 1;
130     if (numLen < NUM_LEN_MIN || numLen > NUM_LEN_MAX) {
131         return false;
132     }
133 
134     return true;
135 }
136 
MatchMacAddr(const char *str, uint32_t len)137 static bool MatchMacAddr(const char *str, uint32_t len)
138 {
139     static const uint32_t MAC_ADDR_LEN = 17;
140     static const uint32_t DELIMETER_POSITIONS[] = {2, 5, 8, 11, 14};
141     static const uint32_t HEX_POSITIONS[] = {0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16};
142 
143     if (len != MAC_ADDR_LEN) {
144         return false;
145     }
146 
147     return IsValid(str, HEX_POSITIONS, sizeof(HEX_POSITIONS) / sizeof(HEX_POSITIONS[0]), IsHex) &&
148         (IsValid(str, DELIMETER_POSITIONS, sizeof(DELIMETER_POSITIONS) / sizeof(DELIMETER_POSITIONS[0]), IsDash) ||
149         IsValid(str, DELIMETER_POSITIONS, sizeof(DELIMETER_POSITIONS) / sizeof(DELIMETER_POSITIONS[0]), IsColon));
150 }
151 
MatchUdidStr(const char *str, uint32_t len)152 static bool MatchUdidStr(const char *str, uint32_t len)
153 {
154     const uint32_t UDID_LEN = 64;
155 
156     if (len != UDID_LEN) {
157         return false;
158     }
159     for (uint32_t i = 0; i < len; ++i) {
160         if (!IsNum(str[i]) && !IsAlphabet(str[i])) {
161             return false;
162         }
163     }
164     return true;
165 }
166 
MatchDeviceName(const char *str, uint32_t len)167 static bool MatchDeviceName(const char *str, uint32_t len)
168 {
169     return len <= DEVICE_NAME_MAX_LEN;
170 }
171 
MallocStr(uint32_t len)172 static char *MallocStr(uint32_t len)
173 {
174     char *str = (char *)malloc(sizeof(char) * (len + 1));
175     if (str != NULL) {
176         str[len] = '\0';
177     }
178     return str;
179 }
180 
CopyStr(const char *str, char **copy)181 static int32_t CopyStr(const char *str, char **copy)
182 {
183     uint32_t len = strlen(str);
184     *copy = MallocStr(len);
185     COMM_CHECK_AND_RETURN_RET_LOGE(*copy != NULL, SOFTBUS_MALLOC_ERR, COMM_DFX, "malloc failed");
186 
187     errno_t ret = memcpy_s(*copy, len, str, len);
188     COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_STRCPY_ERR, COMM_DFX, "memcpy failed");
189 
190     return SOFTBUS_OK;
191 }
192 
AnonymizeIpAddr(const char *str, uint32_t len, char **anonymized)193 static int32_t AnonymizeIpAddr(const char *str, uint32_t len, char **anonymized)
194 {
195     int32_t ret = CopyStr(str, anonymized);
196     COMM_CHECK_AND_RETURN_RET_LOGE(ret == SOFTBUS_OK, ret, COMM_DFX, "copy ip addr failed");
197 
198     for (uint32_t i = len - 1; i >= 0; --i) {
199         if (IsDot((*anonymized)[i])) {
200             break;
201         }
202         (*anonymized)[i] = SYMBOL_ANONYMIZE;
203     }
204     return SOFTBUS_OK;
205 }
206 
AnonymizeMacAddr(const char *str, uint32_t len, char **anonymized)207 static int32_t AnonymizeMacAddr(const char *str, uint32_t len, char **anonymized)
208 {
209     static const uint32_t ANONYMIZE_POSITIONS[] = {9, 10, 12, 13};
210 
211     int32_t ret = CopyStr(str, anonymized);
212     COMM_CHECK_AND_RETURN_RET_LOGE(ret == SOFTBUS_OK, ret, COMM_DFX, "copy mac addr failed");
213 
214     for (uint32_t i = 0; i < sizeof(ANONYMIZE_POSITIONS) / sizeof(ANONYMIZE_POSITIONS[0]); ++i) {
215         (*anonymized)[ANONYMIZE_POSITIONS[i]] = SYMBOL_ANONYMIZE;
216     }
217     return SOFTBUS_OK;
218 }
219 
AnonymizeUdidStr(const char *str, uint32_t len, char **anonymized)220 static int32_t AnonymizeUdidStr(const char *str, uint32_t len, char **anonymized)
221 {
222     static const uint32_t ANONYMIZE_UDID_LEN = 12;
223     static const uint32_t ANONYMIZE_POSITIONS[] = {5, 6};
224     static const uint32_t UNANONYMIZE_UDID_LEN = 5;
225     static const uint32_t UNANONYMIZE_SUFFIX_POS = ANONYMIZE_UDID_LEN - UNANONYMIZE_UDID_LEN;
226     static const uint32_t UNANONYMIZE_SUFFIX_OFFSET = 64 - UNANONYMIZE_UDID_LEN;
227 
228     (void)len;
229     *anonymized = MallocStr(ANONYMIZE_UDID_LEN);
230     COMM_CHECK_AND_RETURN_RET_LOGE(*anonymized != NULL, SOFTBUS_MALLOC_ERR, COMM_DFX, "malloc failed");
231 
232     errno_t ret = memcpy_s(*anonymized, ANONYMIZE_UDID_LEN, str, UNANONYMIZE_UDID_LEN);
233     COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_STRCPY_ERR, COMM_DFX, "memcpy failed");
234     ret = memcpy_s(*anonymized + UNANONYMIZE_SUFFIX_POS, ANONYMIZE_UDID_LEN - UNANONYMIZE_SUFFIX_POS,
235         str + UNANONYMIZE_SUFFIX_OFFSET, UNANONYMIZE_UDID_LEN);
236     COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_STRCPY_ERR, COMM_DFX, "memcpy failed");
237 
238     for (uint32_t i = 0; i < sizeof(ANONYMIZE_POSITIONS) / sizeof(ANONYMIZE_POSITIONS[0]); ++i) {
239         (*anonymized)[ANONYMIZE_POSITIONS[i]] = SYMBOL_ANONYMIZE;
240     }
241     return SOFTBUS_OK;
242 }
243 
SetLocale(char **localeBefore)244 static int32_t SetLocale(char **localeBefore)
245 {
246     *localeBefore = setlocale(LC_CTYPE, NULL);
247     if (*localeBefore == NULL) {
248         COMM_LOGW(COMM_DFX, "get locale failed");
249     }
250 
251     char *localeAfter = setlocale(LC_CTYPE, "C.UTF-8");
252     return (localeAfter != NULL) ? SOFTBUS_OK : SOFTBUS_LOCALE_ERR;
253 }
254 
RestoreLocale(const char *localeBefore)255 static void RestoreLocale(const char *localeBefore)
256 {
257     if (setlocale(LC_CTYPE, localeBefore) == NULL) {
258         COMM_LOGW(COMM_DFX, "restore locale failed");
259     }
260 }
261 
AnonymizeMultiByteStr(const char *str, uint32_t len, uint32_t lenRatio, uint32_t posRatio, char **anonymized)262 static int32_t AnonymizeMultiByteStr(const char *str, uint32_t len, uint32_t lenRatio, uint32_t posRatio,
263     char **anonymized)
264 {
265     COMM_CHECK_AND_RETURN_RET_LOGE(lenRatio != 0, SOFTBUS_INVALID_PARAM, COMM_DFX, "lenRatio is 0");
266     COMM_CHECK_AND_RETURN_RET_LOGE(posRatio != 0, SOFTBUS_INVALID_PARAM, COMM_DFX, "posRatio is 0");
267 
268     wchar_t wideStr[DEVICE_NAME_MAX_LEN] = {0};
269     size_t wideCharNum = mbstowcs(wideStr, str, len);
270     if (wideCharNum == 0 || wideCharNum > len) {
271         COMM_LOGW(COMM_DFX, "convert wide str failed");
272         return CopyStr(str, anonymized);
273     }
274 
275     *anonymized = MallocStr(len);
276     COMM_CHECK_AND_RETURN_RET_LOGE(*anonymized != NULL, SOFTBUS_MALLOC_ERR, COMM_DFX, "malloc failed");
277 
278     uint32_t wideStrLen = (uint32_t)wideCharNum;
279     uint32_t anonymizedNum = (wideStrLen + lenRatio - 1) / lenRatio; // +ratio-1 for round up
280     uint32_t plainPrefixPos = wideStrLen / posRatio;
281     uint32_t plainSuffixPos = plainPrefixPos + anonymizedNum;
282 
283     char multiByteChar[WIDE_CHAR_MAX_LEN] = {0};
284     uint32_t multiByteStrIndex = 0;
285     uint32_t wideStrIndex = 0;
286     errno_t ret = EOK;
287     for (; wideStrIndex < plainPrefixPos && multiByteStrIndex < len; ++wideStrIndex) {
288         int32_t multiByteCharLen = wctomb(multiByteChar, wideStr[wideStrIndex]);
289         COMM_CHECK_AND_RETURN_RET_LOGE(multiByteCharLen > 0, SOFTBUS_WIDECHAR_ERR, COMM_DFX, "convert prefix failed");
290         ret = memcpy_s(*anonymized + multiByteStrIndex, len - multiByteStrIndex, multiByteChar, multiByteCharLen);
291         COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_MEM_ERR, COMM_DFX, "copy prefix failed");
292         multiByteStrIndex += (uint32_t)multiByteCharLen;
293     }
294 
295     for (; wideStrIndex < plainSuffixPos && multiByteStrIndex < len; ++wideStrIndex) {
296         (*anonymized)[multiByteStrIndex++] = SYMBOL_ANONYMIZE;
297     }
298 
299     for (; wideStrIndex < wideStrLen && multiByteStrIndex < len; ++wideStrIndex) {
300         int32_t multiByteCharLen = wctomb(multiByteChar, wideStr[wideStrIndex]);
301         COMM_CHECK_AND_RETURN_RET_LOGE(multiByteCharLen > 0, SOFTBUS_WIDECHAR_ERR, COMM_DFX, "convert suffix failed");
302         ret = memcpy_s(*anonymized + multiByteStrIndex, len - multiByteStrIndex, multiByteChar, multiByteCharLen);
303         COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_MEM_ERR, COMM_DFX, "copy prefix failed");
304         multiByteStrIndex += (uint32_t)multiByteCharLen;
305     }
306 
307     uint32_t endPos = multiByteStrIndex < len ? multiByteStrIndex : len;
308     (*anonymized)[endPos] = '\0';
309     return SOFTBUS_OK;
310 }
311 
AnonymizeDeviceName(const char *str, uint32_t len, char **anonymized)312 static int32_t AnonymizeDeviceName(const char *str, uint32_t len, char **anonymized)
313 {
314     static const uint32_t ANONYMIZE_LEN_RATIO = 2; // anonymize half str
315     static const uint32_t ANONYMIZE_POS_RATIO = 4; // start from 1/4 pos
316 
317     char *localeBefore = NULL;
318     int32_t ret = SetLocale(&localeBefore);
319     COMM_CHECK_AND_RETURN_RET_LOGE(ret == SOFTBUS_OK, ret, COMM_DFX, "get locale failed");
320 
321     ret = AnonymizeMultiByteStr(str, len, ANONYMIZE_LEN_RATIO, ANONYMIZE_POS_RATIO, anonymized);
322     RestoreLocale(localeBefore);
323     COMM_CHECK_AND_RETURN_RET_LOGE(ret == SOFTBUS_OK, ret, COMM_DFX, "anonymize multi byte str failed");
324     return ret;
325 }
326 
AnonymizeHalfStr(const char *str, uint32_t len, char **anonymized)327 static int32_t AnonymizeHalfStr(const char *str, uint32_t len, char **anonymized)
328 {
329     uint32_t plainTextLen = len / 2;
330     uint32_t plainTextOffset = len - plainTextLen;
331     uint32_t anonymizeLen = 1 + plainTextLen;
332 
333     *anonymized = MallocStr(anonymizeLen);
334     COMM_CHECK_AND_RETURN_RET_LOGE(*anonymized != NULL, SOFTBUS_MALLOC_ERR, COMM_DFX, "malloc failed");
335 
336     if (plainTextLen > 0) {
337         errno_t ret = memcpy_s(*anonymized + 1, plainTextLen, str + plainTextOffset, plainTextLen);
338         COMM_CHECK_AND_RETURN_RET_LOGE(ret == EOK, SOFTBUS_STRCPY_ERR, COMM_DFX, "memcpy failed");
339     }
340 
341     (*anonymized)[0] = SYMBOL_ANONYMIZE;
342     return SOFTBUS_OK;
343 }
344 
AnonymizeEmpty(const char *str, uint32_t len, char **anonymized)345 static int32_t AnonymizeEmpty(const char *str, uint32_t len, char **anonymized)
346 {
347     (void)str;
348     (void)len;
349     return CopyStr("EMPTY", anonymized);
350 }
351 
AnonymizeInner(const char *str, char **anonymized)352 static int32_t AnonymizeInner(const char *str, char **anonymized)
353 {
354     if (str == NULL) {
355         return CopyStr("NULL", anonymized);
356     }
357 
358     static const AnonymizeHandler ANONYMIZE_HANDLER[] = {
359         { MatchEmpty, AnonymizeEmpty },
360         { MatchIpAddr, AnonymizeIpAddr },
361         { MatchMacAddr, AnonymizeMacAddr },
362         { MatchUdidStr, AnonymizeUdidStr },
363         { MatchDeviceName, AnonymizeDeviceName },
364     };
365 
366     uint32_t len = strlen(str);
367     for (uint32_t i = 0; i < sizeof(ANONYMIZE_HANDLER) / sizeof(AnonymizeHandler); ++i) {
368         if (ANONYMIZE_HANDLER[i].Matcher(str, len)) {
369             return ANONYMIZE_HANDLER[i].Anonymizer(str, len, anonymized);
370         }
371     }
372     return AnonymizeHalfStr(str, len, anonymized);
373 }
374 
Anonymize(const char *plainStr, char **anonymizedStr)375 void Anonymize(const char *plainStr, char **anonymizedStr)
376 {
377     COMM_CHECK_AND_RETURN_LOGE(anonymizedStr != NULL, COMM_DFX, "anonymizedStr is null");
378 
379     if (AnonymizeInner(plainStr, anonymizedStr) == SOFTBUS_OK) {
380         return;
381     }
382     if (*anonymizedStr != NULL) {
383         AnonymizeFree(*anonymizedStr);
384         *anonymizedStr = NULL;
385     }
386 }
387 
AnonymizeFree(char *anonymizedStr)388 void AnonymizeFree(char *anonymizedStr)
389 {
390     if (anonymizedStr == NULL) {
391         return;
392     }
393     free(anonymizedStr);
394 }
395 
AnonymizeWrapper(const char *anonymizedStr)396 const char *AnonymizeWrapper(const char *anonymizedStr)
397 {
398     return anonymizedStr ? anonymizedStr : "NULL";
399 }
400