Skip to content

Commit

Permalink
Handle QCollator with locale C by delegating to QString
Browse files Browse the repository at this point in the history
Previously, the C locale was treated as English because each back-end
takes the locale's bcp47Name(), which maps C to en. However, the C
locale has its own rules; which QString helpfully implements; so we
can delegate to it in this case. Extended this to sort keys, where
possible. Clean up existing implementations in the process.

Extended tst_QCollator::compare() with some cases to check this. That
required wrapping the test's calls to collator.compare() in a sign
canonicalizer, since it can return any -ve for < or +ve for >, not
just -1 and +1 for these cases (and it'd be rash to hard-code specific
negative and positive values, as they may vary between backends).

[ChangeLog][QtCore][QCollator] Added support for collation in the C
locale, albeit this is only well-defined for ASCII. Collation sort
keys remain unsupported on Darwin.

Fixes: QTBUG-58621
Change-Id: I327010d90f09bd1b1816f5590cb124e3d423e61d
Reviewed-by: Thiago Macieira <thiago.macieira@intel.com>
  • Loading branch information
ediosyncratic committed Dec 11, 2018
1 parent 63b0eb3 commit ab448f7
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 24 deletions.
3 changes: 2 additions & 1 deletion src/corelib/tools/qcollator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ QT_BEGIN_NAMESPACE
QCollator::QCollator(const QLocale &locale)
: d(new QCollatorPrivate(locale))
{
d->init();
}

/*!
Expand Down Expand Up @@ -323,6 +322,8 @@ bool QCollator::ignorePunctuation() const
methods directly. But if the string is compared repeatedly (e.g. when sorting
a whole list of strings), it's usually faster to create the sort keys for each
string and then sort using the keys.
\note Not supported with the C (a.k.a. POSIX) locale on Darwin.
*/

/*!
Expand Down
4 changes: 4 additions & 0 deletions src/corelib/tools/qcollator_icu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ QT_BEGIN_NAMESPACE
void QCollatorPrivate::init()
{
cleanup();
if (isC())
return;

UErrorCode status = U_ZERO_ERROR;
QByteArray name = QLocalePrivate::get(locale)->bcp47Name('_');
Expand Down Expand Up @@ -140,6 +142,8 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
{
if (d->dirty)
d->init();
if (d->isC())
return QCollatorSortKey(new QCollatorSortKeyPrivate(string.toUtf8()));

if (d->collator) {
QByteArray result(16 + string.size() + (string.size() >> 2), Qt::Uninitialized);
Expand Down
20 changes: 20 additions & 0 deletions src/corelib/tools/qcollator_macx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,15 @@ QT_BEGIN_NAMESPACE
void QCollatorPrivate::init()
{
cleanup();
/*
LocaleRefFromLocaleString() will accept "POSIX" as the locale name, but
the locale it produces (named "pos") doesn't implement the [A-Z] < [a-z]
behavior we expect of the C locale. We can use QStringView to get round
that for collation, but this leaves no way to do a sort key.
*/
if (isC())
return;

LocaleRef localeRef;
int rc = LocaleRefFromLocaleString(QLocalePrivate::get(locale)->bcp47Name().constData(), &localeRef);
if (rc != 0)
Expand Down Expand Up @@ -92,6 +101,8 @@ int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) con
{
if (d->dirty)
d->init();
if (!d->collator)
return QStringView(s1, len1).compare(QStringView(s2, len2), caseSensitivity());

SInt32 result;
Boolean equivalent;
Expand All @@ -104,6 +115,7 @@ int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) con
return 0;
return result < 0 ? -1 : 1;
}

int QCollator::compare(const QString &str1, const QString &str2) const
{
return compare(str1.constData(), str1.size(), str2.constData(), str2.size());
Expand All @@ -118,6 +130,11 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
{
if (d->dirty)
d->init();
if (!d->collator) {
// What should (or even *can*) we do here ? (See init()'s comment.)
qWarning("QCollator doesn't support sort keys for the C locale on Darwin");
return QCollatorSortKey(nullptr);
}

//Documentation recommends having it 5 times as big as the input
QVector<UCCollationValue> ret(string.size() * 5);
Expand All @@ -136,6 +153,9 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const

int QCollatorSortKey::compare(const QCollatorSortKey &key) const
{
if (!d.data())
return 0;

SInt32 order;
UCCompareCollationKeys(d->m_key.data(), d->m_key.size(),
key.d->m_key.data(), key.d->m_key.size(),
Expand Down
1 change: 1 addition & 0 deletions src/corelib/tools/qcollator_p.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ class QCollatorPrivate

QCollatorPrivate(const QLocale &locale) : locale(locale) {}
~QCollatorPrivate() { cleanup(); }
bool isC() { return locale.language() == QLocale::C; }

void clear() {
cleanup();
Expand Down
41 changes: 23 additions & 18 deletions src/corelib/tools/qcollator_posix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,12 @@ QT_BEGIN_NAMESPACE

void QCollatorPrivate::init()
{
if (locale != QLocale())
qWarning("Only default locale supported with the posix collation implementation");
if (caseSensitivity != Qt::CaseSensitive)
qWarning("Case insensitive sorting unsupported in the posix collation implementation");
if (!isC()) {
if (locale != QLocale())
qWarning("Only C and default locale supported with the posix collation implementation");
if (caseSensitivity != Qt::CaseSensitive)
qWarning("Case insensitive sorting unsupported in the posix collation implementation");
}
if (numericMode)
qWarning("Numeric mode unsupported in the posix collation implementation");
if (ignorePunctuation)
Expand All @@ -73,14 +75,16 @@ static void stringToWCharArray(QVarLengthArray<wchar_t> &ret, const QString &str

int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
{
QVarLengthArray<wchar_t> array1, array2;
stringToWCharArray(array1, QString(s1, len1));
stringToWCharArray(array2, QString(s2, len2));
return std::wcscoll(array1.constData(), array2.constData());
return compare(QString::fromRawData(s1, len1), QString::fromRawData(s2, len2));
}

int QCollator::compare(const QString &s1, const QString &s2) const
{
if (d->isC())
return s1.compare(s2, caseSensitivity());
if (d->dirty)
d->init();

QVarLengthArray<wchar_t> array1, array2;
stringToWCharArray(array1, s1);
stringToWCharArray(array2, s2);
Expand All @@ -89,10 +93,7 @@ int QCollator::compare(const QString &s1, const QString &s2) const

int QCollator::compare(const QStringRef &s1, const QStringRef &s2) const
{
if (d->dirty)
d->init();

return compare(s1.constData(), s1.size(), s2.constData(), s2.size());
return compare(s1.toString(), s2.toString());
}

QCollatorSortKey QCollator::sortKey(const QString &string) const
Expand All @@ -102,14 +103,18 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const

QVarLengthArray<wchar_t> original;
stringToWCharArray(original, string);
QVector<wchar_t> result(string.size());
size_t size = std::wcsxfrm(result.data(), original.constData(), string.size());
if (size > uint(result.size())) {
QVector<wchar_t> result(original.size());
if (d->isC()) {
std::copy(original.cbegin(), original.cend(), result.begin());
} else {
size_t size = std::wcsxfrm(result.data(), original.constData(), string.size());
if (size > uint(result.size())) {
result.resize(size+1);
size = std::wcsxfrm(result.data(), original.constData(), string.size());
}
result.resize(size+1);
size = std::wcsxfrm(result.data(), original.constData(), string.size());
result[size] = 0;
}
result.resize(size+1);
result[size] = 0;
return QCollatorSortKey(new QCollatorSortKeyPrivate(std::move(result)));
}

Expand Down
7 changes: 7 additions & 0 deletions src/corelib/tools/qcollator_win.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ extern LCID qt_inIsoNametoLCID(const char *name);
void QCollatorPrivate::init()
{
collator = 0;
if (isC())
return;

#ifndef USE_COMPARESTRINGEX
localeID = qt_inIsoNametoLCID(QLocalePrivate::get(locale)->bcp47Name().constData());
Expand All @@ -86,6 +88,9 @@ void QCollatorPrivate::cleanup()

int QCollator::compare(const QChar *s1, int len1, const QChar *s2, int len2) const
{
if (d->isC())
return QString::compare_helper(s1, len1, s2, len2, d->caseSensitivity);

if (d->dirty)
d->init();

Expand Down Expand Up @@ -119,6 +124,8 @@ QCollatorSortKey QCollator::sortKey(const QString &string) const
{
if (d->dirty)
d->init();
if (d->isC())
return QCollatorSortKey(new QCollatorSortKeyPrivate(string));

#ifndef USE_COMPARESTRINGEX
int size = LCMapStringW(d->localeID, LCMAP_SORTKEY | d->collator,
Expand Down
18 changes: 13 additions & 5 deletions tests/auto/corelib/tools/qcollator/tst_qcollator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ void tst_QCollator::compare_data()
QTest::addColumn<int>("caseInsensitiveResult");
QTest::addColumn<bool>("numericMode");
QTest::addColumn<bool>("ignorePunctuation");
QTest::addColumn<int>("punctuationResult");
QTest::addColumn<int>("punctuationResult"); // Test ignores punctuation *and case*

/*
It's hard to test English, because it's treated differently
Expand Down Expand Up @@ -169,8 +169,12 @@ void tst_QCollator::compare_data()
QTest::newRow("french6") << QString("fr_FR") << QString("Test 9") << QString("Test_19") << -1 << -1 << true << true << -1;
QTest::newRow("french7") << QString("fr_FR") << QString("test_19") << QString("test 19") << 1 << 1 << true << false << 1;
QTest::newRow("french8") << QString("fr_FR") << QString("test.19") << QString("test,19") << 1 << 1 << true << true << 0;
}

// C locale: case sensitive [A-Z] < [a-z] but case insensitive [Aa] < [Bb] <...< [Zz]
const QString C = QStringLiteral("C");
QTest::newRow("C:ABBA:AaaA") << C << QStringLiteral("ABBA") << QStringLiteral("AaaA") << -1 << 1 << false << false << 1;
QTest::newRow("C:AZa:aAZ") << C << QStringLiteral("AZa") << QStringLiteral("aAZ") << -1 << 1 << false << false << 1;
}

void tst_QCollator::compare()
{
Expand All @@ -184,6 +188,10 @@ void tst_QCollator::compare()
QFETCH(int, punctuationResult);

QCollator collator(locale);
// Need to canonicalize sign to -1, 0 or 1, as .compare() can produce any -ve for <, any +ve for >.
auto asSign = [](int compared) {
return compared < 0 ? -1 : compared > 0 ? 1 : 0;
};

#if defined(Q_OS_ANDROID) && !defined(Q_OS_ANDROID_EMBEDDED)
if (collator.locale() != QLocale())
Expand All @@ -193,12 +201,12 @@ void tst_QCollator::compare()
if (numericMode)
collator.setNumericMode(true);

QCOMPARE(collator.compare(s1, s2), result);
QCOMPARE(asSign(collator.compare(s1, s2)), result);
collator.setCaseSensitivity(Qt::CaseInsensitive);
QCOMPARE(collator.compare(s1, s2), caseInsensitiveResult);
QCOMPARE(asSign(collator.compare(s1, s2)), caseInsensitiveResult);
#if !QT_CONFIG(iconv)
collator.setIgnorePunctuation(ignorePunctuation);
QCOMPARE(collator.compare(s1, s2), punctuationResult);
QCOMPARE(asSign(collator.compare(s1, s2)), punctuationResult);
#endif
}

Expand Down

0 comments on commit ab448f7

Please sign in to comment.