U+2166 (ROMAN NUMERAL SEVEN) is compatibility equivalent to the sequence
U+0056 V, U+0049 I, U+0049 I. Therefore, for a Strength 2 or lower
collator, I have verified that their sortkeys are the same for collator
"LDE_AN_CX_EX_FX_HX_NX_S2". When U+2166 is used as the search string,
usearch_first() correctly matches the sequence U+0056 V, U+0049 I, U+0049 I.
However, if the sequence U+0056 V, U+0049 I, U+0049 I is used as the
search string, usearch_first() does not match U+2166. This looks like it
affects all the other Roman Numerals as well. Here is a standalone program
that reproduces this.
#include <stdio.h>
#include "unicode/ucol.h"
#include "unicode/ubrk.h"
#include "unicode/usearch.h"
int main()
{
UChar search[] = { 0x0056, 0x0049, 0x0049 };
UChar source[] = { 0x0020,
0x2166,
0x0020, };
int32_t searchLen;
int32_t sourceLen;
UErrorCode icuStatus = U_ZERO_ERROR;
UCollator *coll;
const char *locale;
UBreakIterator *ubrk;
UStringSearch *usearch;
int32_t match = 0;
searchLen = sizeof(search)/sizeof(UChar);
sourceLen = sizeof(source)/sizeof(UChar);
coll = ucol_openFromShortString( "LDE_AN_CX_EX_FX_HX_NX_S2",
false,
NULL,
&icuStatus );
if ( U_FAILURE(icuStatus) )
{
printf( "ucol_openFromShortString error\n" );
goto exit;
}
locale = ucol_getLocaleByType( coll,
ULOC_VALID_LOCALE,
&icuStatus );
if ( U_FAILURE(icuStatus) )
{
printf( "ucol_getLocaleByType error\n" );
goto exit;
}
ubrk = ubrk_open( UBRK_CHARACTER,
locale,
source,
sourceLen,
&icuStatus );
if ( U_FAILURE(icuStatus) )
{
printf( "ubrk_open error\n" );
goto exit;
}
usearch = usearch_openFromCollator( search,
searchLen,
source,
sourceLen,
coll,
NULL,
&icuStatus );
if ( U_FAILURE(icuStatus) )
{
printf( "usearch_openFromCollator error\n" );
goto exit;
}
usearch_setAttribute( usearch,
USEARCH_OVERLAP,
USEARCH_ON,
&icuStatus );
if ( U_FAILURE(icuStatus) )
{
printf( "usearch_setAttribute error\n" );
goto exit;
}
match = usearch_first( usearch,
&icuStatus );
if ( U_FAILURE(icuStatus) )
{
printf( "usearch_first error\n" );
goto exit;
}
printf( "match=%d\n", match );
exit:
return 0;
}