summaryrefslogtreecommitdiff
path: root/19614-locale-Handle-loading-a-missing-locale-twice-Bug-14.patch
diff options
context:
space:
mode:
Diffstat (limited to '19614-locale-Handle-loading-a-missing-locale-twice-Bug-14.patch')
-rw-r--r--19614-locale-Handle-loading-a-missing-locale-twice-Bug-14.patch274
1 files changed, 274 insertions, 0 deletions
diff --git a/19614-locale-Handle-loading-a-missing-locale-twice-Bug-14.patch b/19614-locale-Handle-loading-a-missing-locale-twice-Bug-14.patch
new file mode 100644
index 0000000..fcb335f
--- /dev/null
+++ b/19614-locale-Handle-loading-a-missing-locale-twice-Bug-14.patch
@@ -0,0 +1,274 @@
+From 684fbab755e727a8c15f8b621648d66694cd1f53 Mon Sep 17 00:00:00 2001
+From: Carlos O'Donell <carlos@redhat.com>
+Date: Mon, 22 Apr 2024 08:16:09 -0400
+Subject: locale: Handle loading a missing locale twice (Bug 14247)
+
+Delay setting file->decided until the data has been successfully loaded
+by _nl_load_locale(). If the function fails to load the data then we
+must return and error and leave decided untouched to allow the caller to
+attempt to load the data again at a later time. We should not set
+decided to 1 early in the function since doing so may prevent attempting
+to load it again. We want to try loading it again because that allows an
+open to fail and set errno correctly.
+
+On the other side of this problem is that if we are called again with
+the same inputs we will fetch the cached version of the object and carry
+out no open syscalls and that fails to set errno so we must set errno to
+ENOENT in that case. There is a second code path that has to be handled
+where the name of the locale matches but the codeset doesn't match.
+
+These changes ensure that errno is correctly set on failure in all the
+return paths in _nl_find_locale().
+
+Adds tst-locale-loadlocale to cover the bug.
+
+No regressions on x86_64.
+
+Co-authored-by: Jeff Law <law@redhat.com>
+Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
+---
+ gen-locales.mk | 13 +++++-
+ locale/findlocale.c | 19 +++++++--
+ locale/loadlocale.c | 2 +-
+ localedata/Makefile | 4 ++
+ localedata/gen-locale.sh | 24 ++++++++---
+ localedata/tst-locale-loadlocale.c | 67 ++++++++++++++++++++++++++++++
+ 6 files changed, 119 insertions(+), 10 deletions(-)
+ create mode 100644 localedata/tst-locale-loadlocale.c
+
+diff --git a/gen-locales.mk b/gen-locales.mk
+index 9c523d2a..b005a728 100644
+--- a/gen-locales.mk
++++ b/gen-locales.mk
+@@ -1,8 +1,19 @@
+ # defines target $(gen-locales) that generates the locales given in $(LOCALES)
+
+ LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^@ ]*\(@[^ ]*\)\?/\1\2/g')
++# The CHARMAPS dependency handling must be able to process:
++# 1. No character map e.g. eo, en_US
++# 2. Character maps e.g. en_US.UTF-8
++# 3. Character maps with modifier e.g. tt_RU.UTF-8@iqtelif
++# This complicates the processing slightly so we do it in multiple edits,
++# the first captures the character map with the anchoring period while
++# the rest of the edits remove the period to get a valid file or adjust
++# the name to match the true name.
+ CHARMAPS := $(shell echo "$(LOCALES)" | \
+- sed -e 's/[^ .]*[.]\([^@ ]*\)\(@[^@ ]*\)*/\1/g' -e s/SJIS/SHIFT_JIS/g)
++ sed -e 's/\([^ .]*\)\([^@ ]*\)\(@[^@ ]*\)*/\2/g' \
++ -e 's/^\./ /g' \
++ -e 's/ \./ /g' \
++ -e s/SJIS/SHIFT_JIS/g)
+ CTYPE_FILES = $(addsuffix /LC_CTYPE,$(LOCALES))
+ gen-locales := $(addprefix $(common-objpfx)localedata/,$(CTYPE_FILES))
+
+diff --git a/locale/findlocale.c b/locale/findlocale.c
+index 1b703434..df74fbae 100644
+--- a/locale/findlocale.c
++++ b/locale/findlocale.c
+@@ -244,7 +244,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
+ locale_file = locale_file->successor[cnt];
+
+ if (locale_file == NULL)
+- return NULL;
++ {
++ /* If this is the second time we tried to load a failed
++ locale then the locale_file value comes from the cache
++ and we will not carry out any actual filesystem
++ operations so we must set ENOENT here. */
++ __set_errno (ENOENT);
++ return NULL;
++ }
+ }
+
+ /* The LC_CTYPE category allows to check whether a locale is really
+@@ -291,8 +298,14 @@ _nl_find_locale (const char *locale_path, size_t locale_path_len,
+ if (__gconv_compare_alias (upstr (ccodeset, ccodeset),
+ upstr (clocale_codeset,
+ clocale_codeset)) != 0)
+- /* The codesets are not identical, don't use the locale. */
+- return NULL;
++ {
++ /* The codesets are not identical, don't use the locale.
++ If this is the second time we tried to load a locale
++ whose codeset doesn't match then the result came from
++ the cache and must set ENOENT here. */
++ __set_errno (ENOENT);
++ return NULL;
++ }
+ }
+
+ /* Determine the locale name for which loading succeeded. This
+diff --git a/locale/loadlocale.c b/locale/loadlocale.c
+index 671e71cf..1b4d0b05 100644
+--- a/locale/loadlocale.c
++++ b/locale/loadlocale.c
+@@ -237,7 +237,6 @@ _nl_load_locale (struct loaded_l10nfile *file, int category)
+ int save_err;
+ int alloc = ld_mapped;
+
+- file->decided = 1;
+ file->data = NULL;
+
+ fd = __open_nocancel (file->filename, O_RDONLY | O_CLOEXEC);
+@@ -345,6 +344,7 @@ _nl_load_locale (struct loaded_l10nfile *file, int category)
+ newdata->alloc = alloc;
+
+ file->data = newdata;
++ file->decided = 1;
+ }
+
+ void
+diff --git a/localedata/Makefile b/localedata/Makefile
+index 3453b8c9..888634b3 100644
+--- a/localedata/Makefile
++++ b/localedata/Makefile
+@@ -166,6 +166,7 @@ tests = \
+ tst-digits \
+ tst-iconv-math-trans \
+ tst-leaks \
++ tst-locale-loadlocale \
+ tst-mbswcs1 \
+ tst-mbswcs2 \
+ tst-mbswcs3 \
+@@ -243,6 +244,7 @@ LOCALES := \
+ dsb_DE.UTF-8 \
+ dz_BT.UTF-8 \
+ en_GB.UTF-8 \
++ en_US \
+ en_US.ANSI_X3.4-1968 \
+ en_US.ISO-8859-1\
+ en_US.UTF-8 \
+@@ -321,6 +323,8 @@ LOCALES := \
+ include ../gen-locales.mk
+
+ $(objpfx)tst-iconv-math-trans.out: $(gen-locales)
++# tst-locale-loadlocale: Needs an en_US-named locale for the test.
++$(objpfx)tst-locale-loadlocale.out: $(gen-locales)
+ endif
+
+ include ../Rules
+diff --git a/localedata/gen-locale.sh b/localedata/gen-locale.sh
+index 7a9e8556..740b6500 100644
+--- a/localedata/gen-locale.sh
++++ b/localedata/gen-locale.sh
+@@ -48,9 +48,9 @@ generate_locale ()
+ }
+
+ locfile=`echo $locfile|sed 's|.*/\([^/]*/LC_CTYPE\)|\1|'`
+-locale=`echo $locfile|sed 's|\([^.]*\)[.].*/LC_CTYPE|\1|'`
+-charmap=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|'`
+-modifier=`echo $locfile|sed 's|[^.]*[.]\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'`
++locale=`echo $locfile|sed 's|\([^.]*\).*/LC_CTYPE|\1|'`
++charmap=`echo $locfile|sed -e 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\1|' -e 's|^\.||g'`
++modifier=`echo $locfile|sed 's|[^.]*\([^@ ]*\)\(@[^ ]*\)\?/LC_CTYPE|\2|'`
+
+ echo "Generating locale $locale.$charmap: this might take a while..."
+
+@@ -63,10 +63,16 @@ echo "Generating locale $locale.$charmap: this might take a while..."
+ # you to develop in-progress locales.
+ flags=""
+
++charmap_real="$charmap"
++
++# If no character map is specified then we fall back to UTF-8.
++if [ -z "$charmap" ]; then
++ charmap_real="UTF-8"
++fi
++
+ # For SJIS the charmap is SHIFT_JIS. We just want the locale to have
+ # a slightly nicer name instead of using "*.SHIFT_SJIS", but that
+ # means we need a mapping here.
+-charmap_real="$charmap"
+ if [ "$charmap" = "SJIS" ]; then
+ charmap_real="SHIFT_JIS"
+ fi
+@@ -80,4 +86,12 @@ if [ "$charmap_real" = 'SHIFT_JIS' ] \
+ flags="$flags --no-warnings=ascii"
+ fi
+
+-generate_locale $charmap_real $locale$modifier $locale.$charmap$modifier "$flags"
++# If the character map is not specified then we output a locale
++# with the just the name of the locale e.g. eo, en_US. This is
++# used for test locales that act as fallbacks.
++output_file="$locale.$charmap$modifier"
++if [ -z "$charmap" ]; then
++ output_file="$locale"
++fi
++
++generate_locale $charmap_real $locale$modifier $output_file "$flags"
+diff --git a/localedata/tst-locale-loadlocale.c b/localedata/tst-locale-loadlocale.c
+new file mode 100644
+index 00000000..64796e97
+--- /dev/null
++++ b/localedata/tst-locale-loadlocale.c
+@@ -0,0 +1,67 @@
++/* Test for locale loading error handling (Bug 14247)
++ Copyright (C) 2024 Free Software Foundation, Inc.
++ This file is part of the GNU C Library.
++
++ The GNU C Library is free software; you can redistribute it and/or
++ modify it under the terms of the GNU Lesser General Public
++ License as published by the Free Software Foundation; either
++ version 2.1 of the License, or (at your option) any later version.
++
++ The GNU C Library is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++ Lesser General Public License for more details.
++
++ You should have received a copy of the GNU Lesser General Public
++ License along with the GNU C Library; if not, see
++ <https://www.gnu.org/licenses/>. */
++
++#include <stdio.h>
++#include <errno.h>
++#include <locale.h>
++#include <support/check.h>
++#include <support/support.h>
++
++static int
++do_test (void)
++{
++ locale_t loc = NULL;
++
++ /* We must load an en_US-based locale for the final test to fail.
++ The locale loading code will find the en_US locale already loaded
++ but the codesets won't match. */
++ xsetlocale (LC_ALL, "en_US.UTF-8");
++
++ /* Call newlocale with an invalid locale. We expect the test system
++ does not have "invalidlocale" locale. */
++ errno = 0;
++ loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0);
++ TEST_VERIFY (loc == NULL && errno != 0);
++
++ /* Call newlocale again with the same name. This triggers bug 14247 where
++ the second call fails to set errno correctly. */
++ errno = 0;
++ loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "invalidlocale.utf8", 0);
++ TEST_VERIFY (loc == NULL && errno != 0);
++
++ /* Next we attempt to load a locale that exists but whose codeset
++ does not match the codeset of the locale on the system.
++ This is more difficult to test but we rely on the fact that locale
++ processing will normalize the locale name and attempt to open
++ "en_US" with no codeset as a fallback and this will allow us to
++ compare a loaded "en_US" locale with a UTF-8 codeset to the
++ ficiticious "en_US.utf99" and get a codeset match failure. */
++ errno = 0;
++ loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0);
++ TEST_VERIFY (loc == NULL && errno != 0);
++
++ /* Call newlocale again with the same name. This triggers bug 14247 where
++ the second call fails to set errno correctly. */
++ errno = 0;
++ loc = newlocale (LC_COLLATE_MASK | LC_CTYPE_MASK, "en_US.utf99", 0);
++ TEST_VERIFY (loc == NULL && errno != 0);
++
++ return 0;
++}
++
++#include <support/test-driver.c>
+--
+2.27.0
+