From 9a0aec423d158a9e3d8e5cb6df0d5ce032be3524 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 4 Dec 2022 23:01:00 +0100 Subject: [PATCH 28/28] error: Make sure that error messages are valid UTF-8 This has caused issues with the Python bindings for a long time. Should fix #64. Reference: https://github.com/GNOME/libxml2/commit/76c6da420923f2721a2e16adfcef8707a2454a1b Conflict: result/,runtest.c,test/ --- error.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/error.c b/error.c index 9ff1c2b..fe9a7e2 100644 --- a/error.c +++ b/error.c @@ -163,7 +163,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) { } /** - * xmlParserPrintFileContext: + * xmlParserPrintFileContextInternal: * @input: an xmlParserInputPtr input * * Displays current context within the input content for error tracking @@ -172,7 +172,7 @@ xmlParserPrintFileInfo(xmlParserInputPtr input) { static void xmlParserPrintFileContextInternal(xmlParserInputPtr input , xmlGenericErrorFunc channel, void *data ) { - const xmlChar *cur, *base; + const xmlChar *cur, *base, *start; unsigned int n, col; /* GCC warns if signed, because compared with sizeof() */ xmlChar content[81]; /* space for 80 chars + line terminator */ xmlChar *ctnt; @@ -191,19 +191,30 @@ xmlParserPrintFileContextInternal(xmlParserInputPtr input , while ((n++ < (sizeof(content)-1)) && (cur > base) && (*(cur) != '\n') && (*(cur) != '\r')) cur--; - if ((*(cur) == '\n') || (*(cur) == '\r')) cur++; + if ((*(cur) == '\n') || (*(cur) == '\r')) { + cur++; + } else { + /* skip over continuation bytes */ + while ((cur < input->cur) && ((*cur & 0xC0) == 0x80)) + cur++; + } /* calculate the error position in terms of the current position */ col = input->cur - cur; /* search forward for end-of-line (to max buff size) */ n = 0; - ctnt = content; + start = cur; /* copy selected text to our buffer */ - while ((*cur != 0) && (*(cur) != '\n') && - (*(cur) != '\r') && (n < sizeof(content)-1)) { - *ctnt++ = *cur++; - n++; + while ((*cur != 0) && (*(cur) != '\n') && (*(cur) != '\r')) { + int len = input->end - cur; + int c = xmlGetUTF8Char(cur, &len); + + if ((c < 0) || (n + len > sizeof(content)-1)) + break; + cur += len; + n += len; } - *ctnt = 0; + memcpy(content, start, n); + content[n] = 0; /* print out the selected text */ channel(data ,"%s\n", content); /* create blank line with problem pointer */ -- 2.27.0