Skip to content

Commit 2510f35

Browse files
usernamedtLeonid Borchuk
authored andcommitted
Movable DataBase Locales for Cloudberry
We inherited this issue from PostgreSQL. PostgreSQL uses glibc to sort strings. In version glibc=2.28, collations broke down badly (in general, there are no guarantees when updating glibc). Changing collations breaks indexes. Similarly, a cluster with different collations also behaves unpredictably. What and when something has changed in glibc can be found on https://github.com/ardentperf/glibc-unicode-sorting Also there is special postgresql-wiki https://wiki.postgresql.org/wiki/Locale_data_changes And you tube video https://www.youtube.com/watch?v=0E6O-V8Jato In short, the issue can be seen through the use of bash: ( echo "1-1"; echo "11" ) | LC_COLLATE=en_US.UTF-8 sort gives the different results in ubunru 18.04 and 22.04. There is no way to solve the problem other than by not changing the symbol order. We freeze symbol order and use it instead of glibc. Here the solution https://github.com/postgredients/mdb-locales. In this PR I have added PostgreSQL patch that replaces all glibc locale-related calls with a calls to an external libary. It activates using new configure parameter --with-mdblocales, which is off by default. Using custom locales needs libmdblocales1 package and mdb-locales package with symbol table. Build needs libmdblocales-dev package with headers.
1 parent 1d9a1f2 commit 2510f35

File tree

26 files changed

+594
-73
lines changed

26 files changed

+594
-73
lines changed

configure

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,6 +698,7 @@ BISON
698698
MKDIR_P
699699
LN_S
700700
TAR
701+
USE_MDBLOCALES
701702
install_bin
702703
INSTALL_DATA
703704
INSTALL_SCRIPT
@@ -943,6 +944,7 @@ with_rt
943944
with_libcurl
944945
with_apr_config
945946
with_gnu_ld
947+
with_mdblocales
946948
with_ssl
947949
with_openssl
948950
enable_openssl_redirect
@@ -1690,6 +1692,7 @@ Optional Packages:
16901692
--without-libcurl do not use libcurl
16911693
--with-apr-config=PATH path to apr-1-config utility
16921694
--with-gnu-ld assume the C compiler uses GNU ld [default=no]
1695+
--without-mdblocales build without MDB locales
16931696
--with-ssl=LIB use LIB for SSL/TLS support (openssl)
16941697
--with-openssl obsolete spelling of --with-ssl=openssl
16951698

@@ -2906,7 +2909,6 @@ PG_PACKAGE_VERSION=14.4
29062909

29072910

29082911

2909-
29102912
ac_aux_dir=
29112913
for ac_dir in config "$srcdir"/config; do
29122914
if test -f "$ac_dir/install-sh"; then
@@ -12124,6 +12126,38 @@ case $INSTALL in
1212412126
esac
1212512127

1212612128

12129+
#
12130+
# MDB locales
12131+
#
12132+
12133+
12134+
12135+
12136+
# Check whether --with-mdblocales was given.
12137+
if test "${with_mdblocales+set}" = set; then :
12138+
withval=$with_mdblocales;
12139+
case $withval in
12140+
yes)
12141+
12142+
$as_echo "#define USE_MDBLOCALES 1" >>confdefs.h
12143+
12144+
;;
12145+
no)
12146+
:
12147+
;;
12148+
*)
12149+
as_fn_error $? "no argument expected for --with-mdblocales option" "$LINENO" 5
12150+
;;
12151+
esac
12152+
12153+
else
12154+
with_mdblocales=no
12155+
12156+
fi
12157+
12158+
12159+
12160+
1212712161
if test -z "$TAR"; then
1212812162
for ac_prog in tar
1212912163
do
@@ -12760,6 +12794,56 @@ $as_echo "${python_libspec} ${python_additional_libs}" >&6; }
1276012794

1276112795

1276212796

12797+
fi
12798+
12799+
if test "$with_mdblocales" = yes; then
12800+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for mdb_setlocale in -lmdblocales" >&5
12801+
$as_echo_n "checking for mdb_setlocale in -lmdblocales... " >&6; }
12802+
if ${ac_cv_lib_mdblocales_mdb_setlocale+:} false; then :
12803+
$as_echo_n "(cached) " >&6
12804+
else
12805+
ac_check_lib_save_LIBS=$LIBS
12806+
LIBS="-lmdblocales $LIBS"
12807+
cat confdefs.h - <<_ACEOF >conftest.$ac_ext
12808+
/* end confdefs.h. */
12809+
12810+
/* Override any GCC internal prototype to avoid an error.
12811+
Use char because int might match the return type of a GCC
12812+
builtin and then its argument prototype would still apply. */
12813+
#ifdef __cplusplus
12814+
extern "C"
12815+
#endif
12816+
char mdb_setlocale ();
12817+
int
12818+
main ()
12819+
{
12820+
return mdb_setlocale ();
12821+
;
12822+
return 0;
12823+
}
12824+
_ACEOF
12825+
if ac_fn_c_try_link "$LINENO"; then :
12826+
ac_cv_lib_mdblocales_mdb_setlocale=yes
12827+
else
12828+
ac_cv_lib_mdblocales_mdb_setlocale=no
12829+
fi
12830+
rm -f core conftest.err conftest.$ac_objext \
12831+
conftest$ac_exeext conftest.$ac_ext
12832+
LIBS=$ac_check_lib_save_LIBS
12833+
fi
12834+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_mdblocales_mdb_setlocale" >&5
12835+
$as_echo "$ac_cv_lib_mdblocales_mdb_setlocale" >&6; }
12836+
if test "x$ac_cv_lib_mdblocales_mdb_setlocale" = xyes; then :
12837+
cat >>confdefs.h <<_ACEOF
12838+
#define HAVE_LIBMDBLOCALES 1
12839+
_ACEOF
12840+
12841+
LIBS="-lmdblocales $LIBS"
12842+
12843+
else
12844+
as_fn_error $? "mdblocales library not found" "$LINENO" 5
12845+
fi
12846+
1276312847
fi
1276412848

1276512849
if test x"$cross_compiling" = x"yes" && test -z "$with_system_tzdata"; then
@@ -16981,6 +17065,17 @@ fi
1698117065

1698217066
done
1698317067

17068+
fi
17069+
17070+
if test "$with_mdblocales" = yes; then
17071+
ac_fn_c_check_header_mongrel "$LINENO" "mdblocales.h" "ac_cv_header_mdblocales_h" "$ac_includes_default"
17072+
if test "x$ac_cv_header_mdblocales_h" = xyes; then :
17073+
17074+
else
17075+
as_fn_error $? "mdblocales header not found." "$LINENO" 5
17076+
fi
17077+
17078+
1698417079
fi
1698517080

1698617081
if test "$with_gssapi" = yes ; then

configure.ac

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1448,6 +1448,14 @@ case $INSTALL in
14481448
esac
14491449
AC_SUBST(install_bin)
14501450

1451+
#
1452+
# MDB locales
1453+
#
1454+
1455+
PGAC_ARG_BOOL(with, mdblocales, yes, [build without MDB locales],
1456+
[AC_DEFINE([USE_MDBLOCALES], 1, [Define to 1 to build with MDB locales. (--with-mdblocales)])])
1457+
AC_SUBST(USE_MDBLOCALES)
1458+
14511459
PGAC_PATH_PROGS(TAR, tar)
14521460
AC_PROG_LN_S
14531461
AC_PROG_MKDIR_P
@@ -1606,6 +1614,11 @@ failure. It is possible the compiler isn't looking in the proper directory.
16061614
Use --without-zlib to disable zlib support.])])
16071615
fi
16081616

1617+
if test "$with_mdblocales" = yes; then
1618+
AC_CHECK_LIB(mdblocales, mdb_setlocale, [],
1619+
[AC_MSG_ERROR([mdblocales library not found])])
1620+
fi
1621+
16091622
if test "$enable_external_fts" = yes; then
16101623
AC_CHECK_LIB(jansson, jansson_version_str, [],
16111624
[AC_MSG_ERROR([jansson library not found or version is too old, version must >= 2.13])])
@@ -1985,6 +1998,10 @@ if test "$with_lz4" = yes; then
19851998
AC_CHECK_HEADERS(lz4.h, [], [AC_MSG_ERROR([lz4.h header file is required for LZ4])])
19861999
fi
19872000

2001+
if test "$with_mdblocales" = yes; then
2002+
AC_CHECK_HEADER(mdblocales.h, [], [AC_MSG_ERROR([mdblocales header not found.])])
2003+
fi
2004+
19882005
if test "$with_gssapi" = yes ; then
19892006
AC_CHECK_HEADERS(gssapi/gssapi.h, [],
19902007
[AC_CHECK_HEADERS(gssapi.h, [], [AC_MSG_ERROR([gssapi.h header file is required for GSSAPI])])])

contrib/pax_storage/src/cpp/storage/oper/pax_oper.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
*-------------------------------------------------------------------------
2626
*/
2727

28+
#include "common/mdb_locale.h"
2829
#include "storage/oper/pax_oper.h"
2930

3031
#include "comm/cbdb_wrappers.h"
@@ -588,9 +589,9 @@ static inline bool LocaleIsC(Oid collation) {
588589
return (bool)result;
589590
}
590591

591-
localeptr = setlocale(LC_COLLATE, NULL);
592+
localeptr = SETLOCALE(LC_COLLATE, NULL);
592593
CBDB_CHECK(localeptr, cbdb::CException::ExType::kExTypeCError,
593-
fmt("Invalid locale, fail to `setlocale`, errno: %d", errno));
594+
fmt("Invalid locale, fail to `SETLOCALE`, errno: %d", errno));
594595

595596
if (strcmp(localeptr, "C") == 0 || // cut line
596597
strcmp(localeptr, "POSIX") == 0) {

devops/build/automation/cloudberry/scripts/configure-cloudberry.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,12 @@
6161
# --enable-cassert
6262
# --enable-debug-extensions
6363
#
64+
# ENABLE_MDBLOCALES - Enable custom locales (true/false, defaults to
65+
# false)
66+
#
67+
# When true, add option:
68+
# --with-mdblocales
69+
#
6470
# Prerequisites:
6571
# - System dependencies must be installed:
6672
# * xerces-c development files
@@ -125,6 +131,11 @@ if [ "${ENABLE_DEBUG:-false}" = "true" ]; then
125131
--enable-debug-extensions"
126132
fi
127133

134+
CONFIGURE_MDBLOCALES_OPTS="--without-mdblocales"
135+
if [ "${ENABLE_MDBLOCALES:-false}" = "true" ]; then
136+
CONFIGURE_MDBLOCALES_OPTS="--with-mdblocales"
137+
fi
138+
128139
# Configure build
129140
log_section "Configure"
130141
execute_cmd ./configure --prefix=/usr/local/cloudberry-db \
@@ -151,6 +162,7 @@ execute_cmd ./configure --prefix=/usr/local/cloudberry-db \
151162
--with-ssl=openssl \
152163
--with-openssl \
153164
--with-uuid=e2fs \
165+
${CONFIGURE_MDBLOCALES_OPTS} \
154166
--with-includes=/usr/local/xerces-c/include \
155167
--with-libraries=/usr/local/cloudberry-db/lib || exit 4
156168
log_section_end "Configure"

gpcontrib/orafce/others.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
#include "utils/uuid.h"
4646
#include "orafce.h"
4747
#include "builtins.h"
48+
#include "common/mdb_locale.h"
4849

4950
/*
5051
* Source code for nlssort is taken from postgresql-nls-string
@@ -322,7 +323,7 @@ _nls_run_strxfrm(text *string, text *locale)
322323
*/
323324
if (!lc_collate_cache)
324325
{
325-
if ((lc_collate_cache = setlocale(LC_COLLATE, NULL)))
326+
if ((lc_collate_cache = SETLOCALE(LC_COLLATE, NULL)))
326327
/* Make a copy of the locale name string. */
327328
#ifdef _MSC_VER
328329
lc_collate_cache = _strdup(lc_collate_cache);
@@ -364,7 +365,7 @@ _nls_run_strxfrm(text *string, text *locale)
364365
* If setlocale failed, we know the default stayed the same,
365366
* co we can safely elog.
366367
*/
367-
if (!setlocale(LC_COLLATE, locale_str))
368+
if (!SETLOCALE(LC_COLLATE, locale_str))
368369
elog(ERROR, "failed to set the requested LC_COLLATE value [%s]", locale_str);
369370

370371
changed_locale = true;
@@ -409,7 +410,7 @@ _nls_run_strxfrm(text *string, text *locale)
409410
/*
410411
* Set original locale
411412
*/
412-
if (!setlocale(LC_COLLATE, lc_collate_cache))
413+
if (!SETLOCALE(LC_COLLATE, lc_collate_cache))
413414
elog(FATAL, "failed to set back the default LC_COLLATE value [%s]", lc_collate_cache);
414415
}
415416

@@ -422,7 +423,7 @@ _nls_run_strxfrm(text *string, text *locale)
422423
/*
423424
* Set original locale
424425
*/
425-
if (!setlocale(LC_COLLATE, lc_collate_cache))
426+
if (!SETLOCALE(LC_COLLATE, lc_collate_cache))
426427
elog(FATAL, "failed to set back the default LC_COLLATE value [%s]", lc_collate_cache);
427428
pfree(locale_str);
428429
}

src/backend/gporca/libgpos/server/src/unittest/gpos/string/CWStringTest.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "unittest/gpos/string/CWStringTest.h"
1313

1414
#include <locale.h>
15+
#include "common/mdb_locale.h"
1516

1617
#include "gpos/base.h"
1718
#include "gpos/error/CAutoTrace.h"
@@ -177,18 +178,18 @@ CWStringTest::EresUnittest_AppendFormatInvalidLocale()
177178
CWStringDynamic *expected =
178179
GPOS_NEW(mp) CWStringDynamic(mp, GPOS_WSZ_LIT("UNKNOWN"));
179180

180-
CHAR *oldLocale = setlocale(LC_CTYPE, nullptr);
181+
CHAR *oldLocale = SETLOCALE(LC_CTYPE, nullptr);
181182
CWStringDynamic *pstr1 = GPOS_NEW(mp) CWStringDynamic(mp);
182183

183184
GPOS_RESULT eres = GPOS_OK;
184185

185-
setlocale(LC_CTYPE, "C");
186+
SETLOCALE(LC_CTYPE, "C");
186187
pstr1->AppendFormat(GPOS_WSZ_LIT("%s"), (CHAR *) "ÃË", 123);
187188

188189
pstr1->Equals(expected);
189190

190191
// cleanup
191-
setlocale(LC_CTYPE, oldLocale);
192+
SETLOCALE(LC_CTYPE, oldLocale);
192193
GPOS_DELETE(pstr1);
193194
GPOS_DELETE(expected);
194195

src/backend/utils/adt/Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,8 @@ OBJS = \
117117
windowfuncs.o \
118118
xid.o \
119119
xid8funcs.o \
120-
xml.o
120+
xml.o \
121+
mdb.o
121122

122123
jsonpath_scan.c: FLEXFLAGS = -CF -p -p
123124
jsonpath_scan.c: FLEX_NO_BACKUP=yes

src/backend/utils/adt/mdb.c

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
/*-------------------------------------------------------------------------
2+
*
3+
* mdb.c
4+
* mdb routines
5+
*
6+
* Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
7+
* Portions Copyright (c) 1994, Regents of the University of California
8+
*
9+
*
10+
* IDENTIFICATION
11+
* src/backend/utils/adt/mdb.c
12+
*
13+
*-------------------------------------------------------------------------
14+
*/
15+
16+
17+
#include "postgres.h"
18+
#include "fmgr.h"
19+
#include "utils/fmgrprotos.h"
20+
21+
/*
22+
* mdb_admin_enabled
23+
* Check that mdb locale patch is enabled
24+
*/
25+
Datum
26+
mdb_locale_enabled(PG_FUNCTION_ARGS)
27+
{
28+
bool res;
29+
30+
#if USE_MDBLOCALES
31+
res = true;
32+
#else
33+
res = false;
34+
#endif
35+
36+
PG_RETURN_BOOL(res);
37+
}

0 commit comments

Comments
 (0)