diff options
| author | CoprDistGit <infra@openeuler.org> | 2024-10-15 06:24:53 +0000 |
|---|---|---|
| committer | CoprDistGit <infra@openeuler.org> | 2024-10-15 06:24:53 +0000 |
| commit | 2fb6e0cf79df2056e9750e29669c4633555e74b8 (patch) | |
| tree | a1ad5fd201e579ad1245110c036501053a8794b3 /Reset-converter-state-after-second-wchar_t-output-Bu.patch | |
| parent | 711949a2ac2288db2115787d9d9cca9cef093ccf (diff) | |
automatic import of glibcopeneuler22.03_LTS_SP3
Diffstat (limited to 'Reset-converter-state-after-second-wchar_t-output-Bu.patch')
| -rw-r--r-- | Reset-converter-state-after-second-wchar_t-output-Bu.patch | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/Reset-converter-state-after-second-wchar_t-output-Bu.patch b/Reset-converter-state-after-second-wchar_t-output-Bu.patch new file mode 100644 index 0000000..b616737 --- /dev/null +++ b/Reset-converter-state-after-second-wchar_t-output-Bu.patch @@ -0,0 +1,251 @@ +From c580e6466d6da8262820cdbad19f32c5546226cf Mon Sep 17 00:00:00 2001 +From: Carlos O'Donell <carlos@redhat.com> +Date: Fri, 27 Mar 2020 17:03:36 -0400 +Subject: [PATCH] Reset converter state after second wchar_t output (Bug 25734) + +An input BIG5-HKSCS character may be converted into at most 2 wchar_t +characters. After outputting the second whcar_t character (which was +saved in the converter state) we must reset the state. If we fail +to reset the state we will be stuck continually copying that +character to the output even if we have further input to consider. + +We add a new test case that covers the 4 BIG5-HKSCS characters +that may become 2 wchar_t characters. + +Reviewed-by: Tom Honermann <tom@honermann.net> +--- + iconvdata/Makefile | 17 ++- + iconvdata/big5hkscs.c | 3 + + iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c | 160 ++++++++++++++++++++++ + 3 files changed, 176 insertions(+), 4 deletions(-) + create mode 100644 iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c + +diff --git a/iconvdata/Makefile b/iconvdata/Makefile +index c83962f351b..4ec2741cdce 100644 +--- a/iconvdata/Makefile ++++ b/iconvdata/Makefile +@@ -73,7 +73,7 @@ modules.so := $(addsuffix .so, $(modules)) + ifeq (yes,$(build-shared)) + tests = bug-iconv1 bug-iconv2 tst-loading tst-e2big tst-iconv4 bug-iconv4 \ + tst-iconv6 bug-iconv5 bug-iconv6 tst-iconv7 bug-iconv8 bug-iconv9 \ +- bug-iconv10 bug-iconv11 bug-iconv12 ++ bug-iconv10 bug-iconv11 bug-iconv12 tst-iconv-big5-hkscs-to-2ucs4 + ifeq ($(have-thread-library),yes) + tests += bug-iconv3 + endif +@@ -275,16 +275,21 @@ endif + endif + endif + +-include ../Rules +- + ifeq ($(run-built-tests),yes) +-LOCALES := de_DE.UTF-8 ++LOCALES := \ ++ de_DE.UTF-8 \ ++ zh_HK.BIG5-HKSCS \ ++ $(NULL) ++ + include ../gen-locales.mk + + $(objpfx)bug-iconv6.out: $(gen-locales) + $(objpfx)tst-iconv7.out: $(gen-locales) ++$(objpfx)tst-iconv-big5-hkscs-to-2ucs4.out: $(gen-locales) + endif + ++include ../Rules ++ + # Set libof-* for each routine. + cpp-srcs-left := $(modules) $(generated-modules) $(libJIS-routines) \ + $(libKSC-routines) $(libGB-routines) $(libCNS-routines) \ +@@ -340,3 +345,7 @@ tst-tables-clean: + + $(objpfx)gconv-modules: gconv-modules + cat $(sysdeps-gconv-modules) $^ > $@ ++ ++# Test requires BIG5HKSCS. ++$(objpfx)tst-iconv-big5-hkscs-to-2ucs4.out: $(objpfx)gconv-modules \ ++ $(addprefix $(objpfx),$(modules.so)) +diff --git a/iconvdata/big5hkscs.c b/iconvdata/big5hkscs.c +index 01fcfeba76b..ef325119b18 100644 +--- a/iconvdata/big5hkscs.c ++++ b/iconvdata/big5hkscs.c +@@ -17895,6 +17895,9 @@ static struct + else \ + ++inptr; \ + } \ ++ else \ ++ /* Clear the queue and proceed to output the saved character. */ \ ++ *statep = 0; \ + \ + put32 (outptr, ch); \ + outptr += 4; \ +diff --git a/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c +new file mode 100644 +index 00000000000..8389adebf27 +--- /dev/null ++++ b/iconvdata/tst-iconv-big5-hkscs-to-2ucs4.c +@@ -0,0 +1,160 @@ ++/* Verify the BIG5HKSCS outputs that generate 2 wchar_t's (Bug 25734). ++ Copyright (C) 2020 Free Software Foundation, Inc. ++ This file is part of the GNU C Library. ++ ++ The GNU C Library is free software; you can redistribute it and/or ++ modify it under the terms of the GNU Lesser General Public ++ License as published by the Free Software Foundation; either ++ version 2.1 of the License, or (at your option) any later version. ++ ++ The GNU C Library is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++ Lesser General Public License for more details. ++ ++ You should have received a copy of the GNU Lesser General Public ++ License along with the GNU C Library; if not, see ++ <https://www.gnu.org/licenses/>. */ ++ ++#include <stdio.h> ++#include <string.h> ++#include <locale.h> ++#include <wchar.h> ++#include <support/check.h> ++#include <support/support.h> ++ ++/* A few BIG5-HKSCS characters map in two unicode code points. ++ They are: ++ /x88/x62 => <U00CA><U0304> ++ /x88/x64 => <U00CA><U030C> ++ /x88/xa3 => <U00EA><U0304> ++ /x88/xa5 => <U00EA><U030C> ++ Each of these is special cased in iconvdata/big5hkscs.c. ++ This test ensures that we correctly reset the shift state after ++ outputting any of these characters. We do this by converting ++ each them followed by converting an ASCII character. If we fail ++ to reset the shift state (bug 25734) then we'll see the last ++ character in the queue output again. */ ++ ++/* Each test has name, input bytes, and expected wide character ++ output. */ ++struct testdata { ++ const char *name; ++ const char input[3]; ++ wchar_t expected[3]; ++}; ++ ++/* In BIG5-HKSCS (2008) there are 4 characters that generate multiple ++ wide characters. */ ++struct testdata tests[4] = { ++ /* <H-8862>X => <U+00CA><U+0304>X */ ++ { "<H-8862>", "\x88\x62\x58", { 0x00CA, 0x0304, 0x0058 } }, ++ /* <H-8864>X => <U+00CA><U+030C>X */ ++ { "<H-8864>", "\x88\x64\x58", { 0x00CA, 0x030C, 0x0058 } }, ++ /* <H-88A3>X => <U+00EA><U+0304>X */ ++ { "<H-88A3>", "\x88\xa3\x58", { 0x00EA, 0x0304, 0x0058 } }, ++ /* <H-88A5>X => <U+00EA><U+030C>X */ ++ { "<H-88A5>", "\x88\xa5\x58", { 0x00EA, 0x030C, 0x0058 } } ++}; ++ ++/* Each test is of the form: ++ - Translate first code sequence (two bytes) ++ - Translate second (zero bytes) ++ - Translate the third (one byte). */ ++static int ++check_conversion (struct testdata test) ++{ ++ int err = 0; ++ wchar_t wc; ++ mbstate_t st; ++ size_t ret; ++ const char *mbs = test.input; ++ int consumed = 0; ++ /* Input is always 3 bytes long. */ ++ int inlen = 3; ++ ++ memset (&st, 0, sizeof (st)); ++ /* First conversion: Consumes first 2 bytes. */ ++ ret = mbrtowc (&wc, mbs, inlen - consumed, &st); ++ if (ret != 2) ++ { ++ printf ("error: First conversion consumed only %zd bytes.\n", ret); ++ err++; ++ } ++ /* Advance the two consumed bytes. */ ++ mbs += ret; ++ consumed += ret; ++ if (wc != test.expected[0]) ++ { ++ printf ("error: Result of first conversion was wrong.\n"); ++ err++; ++ } ++ /* Second conversion: Consumes 0 bytes. */ ++ ret = mbrtowc (&wc, mbs, inlen - consumed, &st); ++ if (ret != 0) ++ { ++ printf ("error: Second conversion consumed only %zd bytes.\n", ret); ++ err++; ++ } ++ /* Advance the zero consumed bytes. */ ++ mbs += ret; ++ consumed += ret; ++ if (wc != test.expected[1]) ++ { ++ printf ("error: Result of second conversion was wrong.\n"); ++ err++; ++ } ++ /* After the second conversion the state of the converter should be ++ in the initial state. It is in the initial state because the two ++ input BIG5-HKSCS bytes have been consumed and the 2 wchar_t's have ++ been output. */ ++ if (mbsinit (&st) == 0) ++ { ++ printf ("error: Converter not in initial state.\n"); ++ err++; ++ } ++ /* Third conversion: Consumes 1 byte (it's an ASCII character). */ ++ ret = mbrtowc (&wc, mbs, inlen - consumed, &st); ++ if (ret != 1) ++ { ++ printf ("error: Third conversion consumed only %zd bytes.\n", ret); ++ err++; ++ } ++ /* Advance the one byte. */ ++ mbs += ret; ++ consumed += ret; ++ if (wc != test.expected[2]) ++ { ++ printf ("error: Result of third conversion was wrong.\n"); ++ err++; ++ } ++ /* Return 0 if we saw no errors. */ ++ return err; ++} ++ ++static int ++do_test (void) ++{ ++ int err = 0; ++ int ret; ++ /* Testing BIG5-HKSCS. */ ++ setlocale (LC_ALL, "zh_HK.BIG5-HKSCS"); ++ ++ /* Run all the special conversions. */ ++ for (int i = 0; i < (sizeof (tests) / sizeof (struct testdata)); i++) ++ { ++ printf ("Running test for %s\n", tests[i].name); ++ ret = check_conversion (tests[i]); ++ if (ret > 0) ++ printf ("Test %s failed.\n", tests[i].name); ++ err += ret; ++ } ++ ++ /* Fail if any conversion had an error. */ ++ if (err > 0) ++ FAIL_EXIT1 ("One or more conversions failed."); ++ ++ return 0; ++} ++ ++#include <support/test-driver.c> +-- +2.19.1 + |
