diff -c -r tsearch2/ts_locale.c ../../../8.2-stable/pgsql/contrib/tsearch2/ts_locale.c *** tsearch2/ts_locale.c 2006-11-20 23:03:30.000000000 +0900 --- ../../../8.2-stable/pgsql/contrib/tsearch2/ts_locale.c 2007-01-16 09:23:11.000000000 +0900 *************** *** 12,24 **** size_t wchar2char(char *to, const wchar_t *from, size_t len) { if (GetDatabaseEncoding() == PG_UTF8) { int r; - if (len == 0) - return 0; - r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len, NULL, NULL); --- 12,24 ---- size_t wchar2char(char *to, const wchar_t *from, size_t len) { + if (len == 0) + return 0; + if (GetDatabaseEncoding() == PG_UTF8) { int r; r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, len, NULL, NULL); *************** *** 34,50 **** return wcstombs(to, from, len); } size_t char2wchar(wchar_t *to, const char *from, size_t len) { if (GetDatabaseEncoding() == PG_UTF8) { int r; - if (len == 0) - return 0; - r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len); if (!r) --- 34,52 ---- return wcstombs(to, from, len); } + #endif /* WIN32 */ size_t char2wchar(wchar_t *to, const char *from, size_t len) { + if (len == 0) + return 0; + + #ifdef WIN32 if (GetDatabaseEncoding() == PG_UTF8) { int r; r = MultiByteToWideChar(CP_UTF8, 0, from, len, to, len); if (!r) *************** *** 60,88 **** return r; } return mbstowcs(to, from, len); } - #endif /* WIN32 */ int _t_isalpha(const char *ptr) { ! wchar_t character; ! char2wchar(&character, ptr, 1); ! return iswalpha((wint_t) character); } int _t_isprint(const char *ptr) { ! wchar_t character; ! char2wchar(&character, ptr, 1); ! return iswprint((wint_t) character); } #endif /* TS_USE_WIDE */ --- 62,105 ---- return r; } + else + #endif /* WIN32 */ + if ( lc_ctype_is_c() ) + { + /* + * pg_mb2wchar_with_len always adds trailing '\0', so + * 'to' should be allocated with sufficient space + */ + return pg_mb2wchar_with_len(from, (pg_wchar *)to, len); + } return mbstowcs(to, from, len); } int _t_isalpha(const char *ptr) { ! wchar_t character[2]; ! ! if (lc_ctype_is_c()) ! return isalpha(TOUCHAR(ptr)); ! char2wchar(character, ptr, 1); ! return iswalpha((wint_t) *character); } int _t_isprint(const char *ptr) { ! wchar_t character[2]; ! ! if (lc_ctype_is_c()) ! return isprint(TOUCHAR(ptr)); ! char2wchar(character, ptr, 1); ! return iswprint((wint_t) *character); } #endif /* TS_USE_WIDE */ *************** *** 126,132 **** if ( wlen < 0 ) ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), ! errmsg("transalation failed from server encoding to wchar_t"))); Assert(wlen<=len); wstr[wlen] = 0; --- 143,149 ---- if ( wlen < 0 ) ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), ! errmsg("translation failed from server encoding to wchar_t"))); Assert(wlen<=len); wstr[wlen] = 0; *************** *** 152,158 **** if ( wlen < 0 ) ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), ! errmsg("transalation failed from wchar_t to server encoding %d", errno))); Assert(wlen<=len); out[wlen]='\0'; } --- 169,175 ---- if ( wlen < 0 ) ereport(ERROR, (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE), ! errmsg("translation failed from wchar_t to server encoding %d", errno))); Assert(wlen<=len); out[wlen]='\0'; } diff -c -r tsearch2/ts_locale.h ../../../8.2-stable/pgsql/contrib/tsearch2/ts_locale.h *** tsearch2/ts_locale.h 2006-10-04 09:29:47.000000000 +0900 --- ../../../8.2-stable/pgsql/contrib/tsearch2/ts_locale.h 2007-01-16 09:23:11.000000000 +0900 *************** *** 30,45 **** #define TOUCHAR(x) (*((unsigned char*)(x))) #ifdef TS_USE_WIDE #ifdef WIN32 size_t wchar2char(char *to, const wchar_t *from, size_t len); ! size_t char2wchar(wchar_t *to, const char *from, size_t len); #else /* WIN32 */ ! /* correct mbstowcs */ ! #define char2wchar mbstowcs #define wchar2char wcstombs #endif /* WIN32 */ #define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) ) --- 30,46 ---- #define TOUCHAR(x) (*((unsigned char*)(x))) #ifdef TS_USE_WIDE + size_t char2wchar(wchar_t *to, const char *from, size_t len); #ifdef WIN32 size_t wchar2char(char *to, const wchar_t *from, size_t len); ! #else /* WIN32 */ ! /* correct wcstombs */ #define wchar2char wcstombs + #endif /* WIN32 */ #define t_isdigit(x) ( pg_mblen(x)==1 && isdigit( TOUCHAR(x) ) ) *************** *** 55,64 **** */ #define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false ) ! #define COPYCHAR(d,s) do { \ ! int lll = pg_mblen( s ); \ ! \ ! while( lll-- ) \ TOUCHAR((d)+lll) = TOUCHAR((s)+lll); \ } while(0) --- 56,65 ---- */ #define t_iseq(x,c) ( (pg_mblen(x)==1) ? ( TOUCHAR(x) == ((unsigned char)(c)) ) : false ) ! #define COPYCHAR(d,s) do { \ ! int lll = pg_mblen( s ); \ ! \ ! while( lll-- ) \ TOUCHAR((d)+lll) = TOUCHAR((s)+lll); \ } while(0) ../../../8.2-stable/pgsql/contrib/tsearch2/wordparserだけに発見: CVS diff -c -r tsearch2/wordparser/parser.c ../../../8.2-stable/pgsql/contrib/tsearch2/wordparser/parser.c *** tsearch2/wordparser/parser.c 2006-10-04 09:29:47.000000000 +0900 --- ../../../8.2-stable/pgsql/contrib/tsearch2/wordparser/parser.c 2007-01-16 09:23:12.000000000 +0900 *************** *** 1,4 **** ! /* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11 2006/10/04 00:29:47 momjian Exp $ */ #include "postgres.h" --- 1,4 ---- ! /* $PostgreSQL: pgsql/contrib/tsearch2/wordparser/parser.c,v 1.11.2.1 2007/01/15 15:16:11 teodor Exp $ */ #include "postgres.h" *************** *** 40,55 **** #ifdef TS_USE_WIDE /* ! * Use wide char code only when max encoding length > 1 and ctype != C. ! * Some operating systems fail with multi-byte encodings and a C locale. ! * Also, for a C locale there is no need to process as multibyte. From ! * backend/utils/adt/oracle_compat.c Teodor */ ! if (prs->charmaxlen > 1 && !lc_ctype_is_c()) { prs->usewide = true; ! prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * prs->lenstr); prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr); } else --- 40,52 ---- #ifdef TS_USE_WIDE /* ! * Use wide char code only when max encoding length > 1. */ ! if (prs->charmaxlen > 1) { prs->usewide = true; ! prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr+1)); prs->lenwstr = char2wchar(prs->wstr, prs->str, prs->lenstr); } else *************** *** 83,107 **** /* * defining support function, equvalent is* macroses, but ! * working with any possible encodings and locales */ #ifdef TS_USE_WIDE ! #define p_iswhat(type) \ ! static int \ ! p_is##type(TParser *prs) { \ ! Assert( prs->state ); \ ! return ( ( prs->usewide ) ? isw##type( (wint_t)*( prs->wstr + prs->state->poschar ) ) : \ ! is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ) ); \ ! } \ ! \ ! static int \ ! p_isnot##type(TParser *prs) { \ ! return !p_is##type(prs); \ } /* p_iseq should be used only for ascii symbols */ --- 80,178 ---- /* * defining support function, equvalent is* macroses, but ! * working with any possible encodings and locales. Note, ! * that with multibyte encoding and C-locale isw* function may fail ! * or give wrong result. Note 2: multibyte encoding and C-locale ! * often are used for Asian languages. */ #ifdef TS_USE_WIDE ! #define p_iswhat(type) \ ! static int \ ! p_is##type(TParser *prs) { \ ! Assert( prs->state ); \ ! if ( prs->usewide ) \ ! { \ ! if ( lc_ctype_is_c() ) \ ! return is##type( 0xff & *( prs->wstr + prs->state->poschar) ); \ ! \ ! return isw##type( *(wint_t*)( prs->wstr + prs->state->poschar ) ); \ ! } \ ! \ ! return is##type( *(unsigned char*)( prs->str + prs->state->posbyte ) ); \ ! } \ ! \ ! static int \ ! p_isnot##type(TParser *prs) { \ ! return !p_is##type(prs); \ } + static int + p_isalnum(TParser *prs) + { + Assert( prs->state ); + + if (prs->usewide) + { + if (lc_ctype_is_c()) + { + unsigned int c = *(unsigned int*)(prs->wstr + prs->state->poschar); + + /* + * any non-ascii symbol with multibyte encoding + * with C-locale is an alpha character + */ + if ( c > 0x7f ) + return 1; + + return isalnum(0xff & c); + } + + return iswalnum( (wint_t)*( prs->wstr + prs->state->poschar)); + } + return isalnum( *(unsigned char*)( prs->str + prs->state->posbyte )); + } + + static int + p_isnotalnum(TParser *prs) + { + return !p_isalnum(prs); + } + + static int + p_isalpha(TParser *prs) + { + Assert( prs->state ); + + if (prs->usewide) + { + if (lc_ctype_is_c()) + { + unsigned int c = *(prs->wstr + prs->state->poschar); + + /* + * any non-ascii symbol with multibyte encoding + * with C-locale is an alpha character + */ + if ( c > 0x7f ) + return 1; + + return isalpha(0xff & c); + } + + return iswalpha( (wint_t)*( prs->wstr + prs->state->poschar)); + } + + return isalpha( *(unsigned char*)( prs->str + prs->state->posbyte )); + } + + static int + p_isnotalpha(TParser *prs) + { + return !p_isalpha(prs); + } /* p_iseq should be used only for ascii symbols */ *************** *** 111,128 **** Assert(prs->state); return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0; } #else /* TS_USE_WIDE */ ! #define p_iswhat(type) \ ! static int \ ! p_is##type(TParser *prs) { \ ! Assert( prs->state ); \ ! return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \ ! } \ ! \ ! static int \ ! p_isnot##type(TParser *prs) { \ ! return !p_is##type(prs); \ } --- 182,200 ---- Assert(prs->state); return ((prs->state->charlen == 1 && *(prs->str + prs->state->posbyte) == c)) ? 1 : 0; } + #else /* TS_USE_WIDE */ ! #define p_iswhat(type) \ ! static int \ ! p_is##type(TParser *prs) { \ ! Assert( prs->state ); \ ! return is##type( (unsigned char)*( prs->str + prs->state->posbyte ) ); \ ! } \ ! \ ! static int \ ! p_isnot##type(TParser *prs) { \ ! return !p_is##type(prs); \ } *************** *** 132,141 **** Assert(prs->state); return (*(prs->str + prs->state->posbyte) == c) ? 1 : 0; } - #endif /* TS_USE_WIDE */ p_iswhat(alnum) p_iswhat(alpha) p_iswhat(digit) p_iswhat(lower) p_iswhat(print) --- 204,215 ---- Assert(prs->state); return (*(prs->str + prs->state->posbyte) == c) ? 1 : 0; } p_iswhat(alnum) p_iswhat(alpha) + + #endif /* TS_USE_WIDE */ + p_iswhat(digit) p_iswhat(lower) p_iswhat(print)