When using ucol_nextSortkeyPart() to generate weights with a strength 4 collation, ignorable characters are given weight 0xFF as their strength 4 weight. This problem does not occur with ucol_getSortKey(). This causes strings to be determined as collating equal when using ucol_getSortKey(), but collating unequal when using ucol_nextSortkeyPart().
Here is an example program that demonstrates this.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unicode/ucol.h>
#include <unicode/uiter.h>
#define CHECK(m) \
if (U_FAILURE(icuRC)) \
{ \
printf("Failed on '%s'\n\n", m); \
exit(-1); \
}
int main(int argc, char* argv[])
{
UErrorCode icuRC = U_ZERO_ERROR;
UCollator* ucol;
UChar data[] = { 0xFFFD, 0x0006, 0x0006, 0x0006 };
int i, j;
static const int bufSize = 50;
unsigned char buf[bufSize];
ucol = ucol_openFromShortString("LEN_S4", false, NULL, &icuRC);
CHECK(ucol_openFromShortString);
for (i=0; i<4; ++i)
{
UCharIterator uiter;
uint32_t state[2] = { 0, 0 };
int32_t keySize;
int32_t dataLen = i+1;
printf("String:");
for (j=0; j<dataLen; ++j)
{
printf(" %04X", data[j]);
}
printf("\n");
// Full sort key
keySize = ucol_getSortKey(ucol,
data,
dataLen,
buf,
bufSize);
CHECK(ucol_getSortKey);
printf("\tFull key: ");
for (j=0; j<keySize; ++j)
{
printf("%02x", buf[j]);
}
printf("\n");
// Partial sort key
uiter_setString(&uiter, data, dataLen);
keySize = ucol_nextSortKeyPart(ucol,
&uiter,
state,
buf,
bufSize,
&icuRC);
CHECK(ucol_nextSortKeyPart);
printf("\tPartial key: ");
for (j=0; j<keySize; ++j)
{
printf("%02x", buf[j]);
}
printf("\n\n");
}
//=============================================
ucol_close(ucol);
return(0);
}
Output on ICU 3.2.1:
String: FFFD
Full key: 1fb301050105012100
Partial key: 1fb30105010501ff00
String: FFFD 0006
Full key: 1fb301050105012100
Partial key: 1fb30105010501ffff00
String: FFFD 0006 0006
Full key: 1fb301050105012100
Partial key: 1fb30105010501ffffff00
String: FFFD 0006 0006 0006
Full key: 1fb301050105012100
Partial key: 1fb30105010501ffffffff00
Output on ICU 3.8:
String: FFFD
Full key: 225d01050105012400
Partial key: 225d0105010501ff00
String: FFFD 0006
Full key: 225d01050105012400
Partial key: 225d0105010501ffff00
String: FFFD 0006 0006
Full key: 225d01050105012400
Partial key: 225d0105010501ffffff00
String: FFFD 0006 0006 0006
Full key: 225d01050105012400
Partial key: 225d0105010501ffffffff00