153aa9179Sopenharmony_ciFrom 9a0aec423d158a9e3d8e5cb6df0d5ce032be3524 Mon Sep 17 00:00:00 2001
253aa9179Sopenharmony_ciFrom: Nick Wellnhofer <wellnhofer@aevum.de>
353aa9179Sopenharmony_ciDate: Sun, 4 Dec 2022 23:01:00 +0100
453aa9179Sopenharmony_ciSubject: [PATCH 28/28] error: Make sure that error messages are valid UTF-8
553aa9179Sopenharmony_ci
653aa9179Sopenharmony_ciThis has caused issues with the Python bindings for a long time.
753aa9179Sopenharmony_ci
853aa9179Sopenharmony_ciShould fix #64.
953aa9179Sopenharmony_ci
1053aa9179Sopenharmony_ciReference: https://github.com/GNOME/libxml2/commit/76c6da420923f2721a2e16adfcef8707a2454a1b
1153aa9179Sopenharmony_ciConflict: result/,runtest.c,test/
1253aa9179Sopenharmony_ci---
1353aa9179Sopenharmony_ci error.c | 29 ++++++++++++++++++++---------
1453aa9179Sopenharmony_ci 1 file changed, 20 insertions(+), 9 deletions(-)
1553aa9179Sopenharmony_ci
1653aa9179Sopenharmony_cidiff --git a/error.c b/error.c
1753aa9179Sopenharmony_ciindex 9ff1c2b..fe9a7e2 100644
1853aa9179Sopenharmony_ci--- a/error.c
1953aa9179Sopenharmony_ci+++ b/error.c
2053aa9179Sopenharmony_ci@@ -163,7 +163,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) {
2153aa9179Sopenharmony_ci }
2253aa9179Sopenharmony_ci 
2353aa9179Sopenharmony_ci /**
2453aa9179Sopenharmony_ci- * xmlParserPrintFileContext:
2553aa9179Sopenharmony_ci+ * xmlParserPrintFileContextInternal:
2653aa9179Sopenharmony_ci  * @input:  an xmlParserInputPtr input
2753aa9179Sopenharmony_ci  *
2853aa9179Sopenharmony_ci  * Displays current context within the input content for error tracking
2953aa9179Sopenharmony_ci@@ -172,7 +172,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) {
3053aa9179Sopenharmony_ci static void
3153aa9179Sopenharmony_ci xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
3253aa9179Sopenharmony_ci 		xmlGenericErrorFunc channel, void *data ) {
3353aa9179Sopenharmony_ci-    const xmlChar *cur, *base;
3453aa9179Sopenharmony_ci+    const xmlChar *cur, *base, *start;
3553aa9179Sopenharmony_ci     unsigned int n, col;	/* GCC warns if signed, because compared with sizeof() */
3653aa9179Sopenharmony_ci     xmlChar  content[81]; /* space for 80 chars + line terminator */
3753aa9179Sopenharmony_ci     xmlChar *ctnt;
3853aa9179Sopenharmony_ci@@ -191,19 +191,30 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input ,
3953aa9179Sopenharmony_ci     while ((n++ < (sizeof(content)-1)) && (cur > base) &&
4053aa9179Sopenharmony_ci 	   (*(cur) != '\n') && (*(cur) != '\r'))
4153aa9179Sopenharmony_ci         cur--;
4253aa9179Sopenharmony_ci-    if ((*(cur) == '\n') || (*(cur) == '\r')) cur++;
4353aa9179Sopenharmony_ci+    if ((*(cur) == '\n') || (*(cur) == '\r')) {
4453aa9179Sopenharmony_ci+        cur++;
4553aa9179Sopenharmony_ci+    } else {
4653aa9179Sopenharmony_ci+        /* skip over continuation bytes */
4753aa9179Sopenharmony_ci+        while ((cur < input->cur) && ((*cur & 0xC0) == 0x80))
4853aa9179Sopenharmony_ci+            cur++;
4953aa9179Sopenharmony_ci+    }
5053aa9179Sopenharmony_ci     /* calculate the error position in terms of the current position */
5153aa9179Sopenharmony_ci     col = input->cur - cur;
5253aa9179Sopenharmony_ci     /* search forward for end-of-line (to max buff size) */
5353aa9179Sopenharmony_ci     n = 0;
5453aa9179Sopenharmony_ci-    ctnt = content;
5553aa9179Sopenharmony_ci+    start = cur;
5653aa9179Sopenharmony_ci     /* copy selected text to our buffer */
5753aa9179Sopenharmony_ci-    while ((*cur != 0) && (*(cur) != '\n') &&
5853aa9179Sopenharmony_ci-	   (*(cur) != '\r') && (n < sizeof(content)-1)) {
5953aa9179Sopenharmony_ci-		*ctnt++ = *cur++;
6053aa9179Sopenharmony_ci-	n++;
6153aa9179Sopenharmony_ci+    while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) {
6253aa9179Sopenharmony_ci+        int len = input->end - cur;
6353aa9179Sopenharmony_ci+        int c = xmlGetUTF8Char(cur, &len);
6453aa9179Sopenharmony_ci+
6553aa9179Sopenharmony_ci+        if ((c < 0) || (n + len > sizeof(content)-1))
6653aa9179Sopenharmony_ci+            break;
6753aa9179Sopenharmony_ci+        cur += len;
6853aa9179Sopenharmony_ci+	n += len;
6953aa9179Sopenharmony_ci     }
7053aa9179Sopenharmony_ci-    *ctnt = 0;
7153aa9179Sopenharmony_ci+    memcpy(content, start, n);
7253aa9179Sopenharmony_ci+    content[n] = 0;
7353aa9179Sopenharmony_ci     /* print out the selected text */
7453aa9179Sopenharmony_ci     channel(data ,"%s\n", content);
7553aa9179Sopenharmony_ci     /* create blank line with problem pointer */
7653aa9179Sopenharmony_ci-- 
7753aa9179Sopenharmony_ci2.27.0
7853aa9179Sopenharmony_ci
79