Changeset 337


Ignore:
Timestamp:
Mar 31, 2010, 9:50:47 PM (12 years ago)
Author:
dmik
Message:

branches/kmk: Added a bunch of unicode conversion helpers based on the original propcnv() from lupoppler.cpp.

Location:
branches/kmk/Lucide/plugins
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • branches/kmk/Lucide/plugins/ludoc/cpconv.cpp

    r306 r337  
    3535#include <os2.h>
    3636#include <uconv.h>
     37#include <string.h>
    3738
    3839
     
    141142}
    142143
     144extern "C" char *APIENTRY uniConvertString( const char *s, size_t len, UconvObject from,
     145                                            UconvObject to, size_t *retLen )
     146{
     147    if ( s == NULL )
     148        return NULL;
     149
     150    size_t cSubs = 0;
     151    size_t unilen = len + 1;
     152    UniChar *unibuf = new UniChar[ unilen ];
     153    memset( unibuf, 0, unilen * sizeof( UniChar ) );
     154    UniChar *tmpuni = unibuf;
     155    UniUconvToUcs( from, (void **)&s, &len, &tmpuni, &unilen, &cSubs );
     156    unilen = UniStrlen( unibuf );
     157
     158    int liglen = uniLigaturesLength( unibuf );
     159    if ( liglen > 0 )  // string contain ligature(s)
     160    {
     161        unsigned ulen_tmp = ( unilen + liglen + 1 ) * sizeof( UniChar );
     162        char *uni_tmp = new char[ ulen_tmp ];
     163        uniReplaceLigatures( unibuf, (UniChar *)uni_tmp );
     164        delete unibuf;
     165        unibuf = (UniChar *)uni_tmp;
     166        unilen = UniStrlen( unibuf );
     167    }
     168    uniConvertSpChars( unibuf );
     169
     170    size_t blen = ( unilen + 1 ) * 2;
     171    char *b = new char[ blen ];
     172    memset( b, 0, blen );
     173    char *bsav = b;
     174    tmpuni = unibuf;
     175    cSubs = 0;
     176    UniUconvFromUcs( to, &tmpuni, &unilen, (void **)&b, &blen, &cSubs );
     177    delete unibuf;
     178
     179    if ( retLen != NULL)
     180        *retLen = blen;
     181    return bsav;
     182}
     183
     184static char *utf16_sys( const char *s, size_t len, UconvObject *utf16,
     185                        UconvObject *sys, size_t *retLen, bool from, bool isUtf8 )
     186{
     187    UconvObject utf16Tmp = NULL;
     188    UconvObject sysTmp = NULL;
     189    uconv_attribute_t attr;
     190
     191    if ( utf16 == NULL )
     192        utf16 = &utf16Tmp;
     193    if ( sys == NULL )
     194        sys = &sysTmp;
     195
     196    if ( *utf16 == NULL ) {
     197        UniCreateUconvObject( (UniChar *)L"UCS-2@endian=big", utf16 );
     198        UniQueryUconvObject( *utf16, &attr, sizeof(attr), NULL, NULL, NULL );
     199        attr.converttype &= ~(CVTTYPE_CTRL7F | CVTTYPE_PATH);
     200        attr.options = UCONV_OPTION_SUBSTITUTE_BOTH;
     201        UniSetUconvObject( *utf16, &attr );
     202    }
     203    if ( *sys == NULL ) {
     204        if ( isUtf8 )
     205            UniCreateUconvObject( (UniChar *)L"UTF-8", sys );
     206        else
     207            UniCreateUconvObject( (UniChar *)L"", sys );
     208        UniQueryUconvObject( *sys, &attr, sizeof(attr), NULL, NULL, NULL );
     209        attr.converttype &= ~(CVTTYPE_CTRL7F | CVTTYPE_PATH);
     210        attr.options = UCONV_OPTION_SUBSTITUTE_BOTH;
     211        UniSetUconvObject( *sys, &attr );
     212    }
     213
     214    char *ret;
     215    if ( from )
     216        ret = uniConvertString( s, len, *utf16, *sys, retLen );
     217    else
     218        ret = uniConvertString( s, len, *sys, *utf16, retLen );
     219
     220    if ( sys == &sysTmp )
     221        UniFreeUconvObject( sysTmp );
     222    if ( utf16 == &utf16Tmp )
     223        UniFreeUconvObject( utf16Tmp );
     224
     225    return ret;
     226}
     227
     228extern "C" char *APIENTRY uniUtf16BEToSys( const char *s, size_t len, UconvObject *utf16,
     229                                           UconvObject *sys )
     230{
     231    if ( s == NULL )
     232        return NULL;
     233    if ( len >= 2 && s[0] == (int)(char)0xfe && s[1] == (int)(char)0xff ) {
     234        s += 2;
     235        len -= 2;
     236    }
     237    return utf16_sys( s, len, utf16, sys, NULL, true, false );
     238}
     239
     240extern "C" char *APIENTRY uniSysToUtf16BE( const char *s, UconvObject *sys,
     241                                           UconvObject *utf16, size_t *retLen )
     242{
     243    return utf16_sys( s, strlen( s ), utf16, sys, retLen, false, false );
     244}
     245
     246extern "C" char *APIENTRY uniUtf16BEToUtf8( const char *s, size_t len, UconvObject *utf16,
     247                                            UconvObject *utf8 )
     248{
     249    if ( s == NULL )
     250        return NULL;
     251    if ( len >= 2 && s[0] == (char)0xfe && s[1] == (char)0xff ) {
     252        s += 2;
     253        len -= 2;
     254    }
     255    return utf16_sys( s, len, utf16, utf8, NULL, true, true );
     256}
     257
     258extern "C" char *APIENTRY uniUtf8ToUtf16BE( const char *s, UconvObject *utf8,
     259                                            UconvObject *utf16, size_t *retLen )
     260{
     261    return utf16_sys( s, strlen( s ), utf16, utf8, retLen, false, true );
     262}
     263
     264static char *utf8_sys( const char *s, UconvObject *utf8, UconvObject *sys, bool from )
     265{
     266    UconvObject utf8Tmp = NULL;
     267    UconvObject sysTmp = NULL;
     268    uconv_attribute_t attr;
     269
     270    if ( utf8 == NULL )
     271        utf8 = &utf8Tmp;
     272    if ( sys == NULL )
     273        sys = &sysTmp;
     274
     275    if ( *utf8 == NULL ) {
     276        UniCreateUconvObject( (UniChar *)L"UTF-8", utf8 );
     277        UniQueryUconvObject( *utf8, &attr, sizeof(attr), NULL, NULL, NULL );
     278        attr.converttype &= ~(CVTTYPE_CTRL7F | CVTTYPE_PATH);
     279        attr.options = UCONV_OPTION_SUBSTITUTE_BOTH;
     280        UniSetUconvObject( *utf8, &attr );
     281    }
     282    if ( *sys == NULL ) {
     283        UniCreateUconvObject( (UniChar *)L"", sys );
     284        UniQueryUconvObject( *sys, &attr, sizeof(attr), NULL, NULL, NULL );
     285        attr.converttype &= ~(CVTTYPE_CTRL7F | CVTTYPE_PATH);
     286        attr.options = UCONV_OPTION_SUBSTITUTE_BOTH;
     287        UniSetUconvObject( *sys, &attr );
     288    }
     289
     290    char *ret;
     291    if ( from )
     292        ret = uniConvertString( s, strlen( s ), *utf8, *sys, NULL );
     293    else
     294        ret = uniConvertString( s, strlen( s ), *sys, *utf8, NULL );
     295
     296    if ( sys == &sysTmp )
     297        UniFreeUconvObject( sysTmp );
     298    if ( utf8 == &utf8Tmp )
     299        UniFreeUconvObject( utf8Tmp );
     300
     301    return ret;
     302}
     303
     304extern "C" char *APIENTRY uniUtf8ToSys( const char *s, UconvObject *utf8,
     305                                        UconvObject *sys )
     306{
     307    return utf8_sys( s, utf8, sys, true );
     308}
     309
     310extern "C" char *APIENTRY uniSysToUtf8( const char *s, UconvObject *sys,
     311                                        UconvObject *utf8 )
     312{
     313    return utf8_sys( s, utf8, sys, false );
     314}
     315
  • branches/kmk/Lucide/plugins/ludoc/cpconv.h

    r156 r337  
    3737
    3838#include <unidef.h>
     39#include <uconv.h>
    3940
    4041#ifdef __cplusplus
     
    5354// src remains unchanged
    5455VOID APIENTRY uniReplaceLigatures( UniChar *src, UniChar *dst );
    55                            
     56
     57// Converts the string from one encoding to another
     58// the returned string is allocated with the new[] operator
     59// if s is NULL, NULL is returned
     60// both from and to must be valid encodings, otherwise the results are undefined
     61// retLen may be NULL if the length of the returned buffer is not needed
     62char *APIENTRY uniConvertString( const char *s, size_t len, UconvObject from,
     63                                 UconvObject to, size_t *retLen );
     64
     65// Converts the string from UTF-16BE to the system encoding
     66// the returned zero-terminated string is allocated with the new[] operator
     67// if s is NULL, NULL is returned
     68// if s starts with the Unicode marker (0xFE,FF) it is skipped
     69// if utf16 or sys is NULL, a new temporary uconv object for the
     70// corresponding encoding will be created
     71// if utf16 or sys points to a null uconv object, the newly created object
     72// will be returned there and must be freed with UniFreeUconvObject,
     73// otherwise the passed in object will be used for the conversion
     74char *APIENTRY uniUtf16BEToSys( const char *s, size_t len, UconvObject *utf16,
     75                              UconvObject *sys );
     76
     77// The opposite to uniUtf16BEToSys
     78char *APIENTRY uniSysToUtf16BE( const char *s, UconvObject *sys,
     79                                UconvObject *utf16, size_t *retLen );
     80
     81// Same as uniUtf16BEToSys but converts UTF-16BE to UTF-8
     82char *APIENTRY uniUtf16BEToUtf8( const char *s, size_t len, UconvObject *utf16,
     83                                 UconvObject *utf8 );
     84
     85// The opposite to uniUtf16BEToUtf8
     86char *APIENTRY uniUtf8ToUtf16BE( const char *s, UconvObject *utf8,
     87                                 UconvObject *utf16, size_t *retLen );
     88
     89// Converts the zero-terminated string from UTF-8 to the system encoding
     90// the returned zero-terminated string is allocated with the new[] operator
     91// if s is NULL, NULL is returned
     92// if s starts with the Unicode marker (0xFE,FF) it is skipped
     93// if utf16 or sys is NULL, a new temporary uconv object for the
     94// corresponding encoding will be created
     95// if utf16 or sys points to a null uconv object, the newly created object
     96// will be returned there and must be freed with UniFreeUconvObject,
     97// otherwise the passed in object will be used for the conversion
     98char *APIENTRY uniUtf8ToSys( const char *s, UconvObject *utf8,
     99                             UconvObject *sys );
     100
     101// The opposite to uniUtf8ToSys
     102char *APIENTRY uniSysToUtf8( const char *s, UconvObject *sys,
     103                             UconvObject *utf8 );
     104
    56105#ifdef __cplusplus
    57106}
  • branches/kmk/Lucide/plugins/ludoc/ludoc.def

    r319 r337  
    4848    uniLigaturesLength
    4949    uniReplaceLigatures
     50    uniConvertString
     51    uniUtf16BEToSys
     52    uniSysToUtf16BE
     53    uniUtf16BEToUtf8
     54    uniUtf8ToUtf16BE
     55    uniUtf8ToSys
     56    uniSysToUtf8
    5057
  • branches/kmk/Lucide/plugins/lupoppler/lupoppler.cpp

    r330 r337  
    972972static char *newstrFromUTF8( const char *s, void *objUtf8, void *objSys )
    973973{
    974     size_t cSubs = 0;
    975     size_t len = strlen( s ) + 1;
    976     size_t unilen = len + 2;
    977     UniChar *unibuf = new UniChar[ unilen ];
    978     UniChar *tmpuni = unibuf;
    979     UniUconvToUcs( objUtf8, (void **)&s, &len, &tmpuni, &unilen, &cSubs );
    980     unilen = UniStrlen( unibuf );
    981 
    982     int liglen = uniLigaturesLength( unibuf );
    983     if ( liglen > 0 )  // string contain ligature(s)
    984     {
    985         unsigned ulen_tmp = ( unilen + liglen + 1 ) * sizeof( UniChar );
    986         char *uni_tmp = new char[ ulen_tmp ];
    987         uniReplaceLigatures( unibuf, (UniChar *)uni_tmp );
    988         delete unibuf;
    989         unibuf = (UniChar *)uni_tmp;
    990         unilen = UniStrlen( unibuf );
    991     }
    992     uniConvertSpChars( unibuf );
    993 
    994     size_t blen = ( unilen + 1 ) * 2;
    995     char *b = new char[ blen ];
    996     memset( b, 0, blen );
    997     char *bsav = b;
    998     tmpuni = unibuf;
    999     cSubs = 0;
    1000     UniUconvFromUcs( objSys, &tmpuni, &unilen, (void **)&b, &blen, &cSubs );
    1001     delete unibuf;
    1002     return bsav;
     974    return uniConvertString( s, strlen( s ), &objUtf8, &objSys, NULL );
    1003975}
    1004976
     
    10691041    if ( has_unicode_marker( s ) )
    10701042    {
    1071         size_t cSubs = 0;
    1072         size_t unilen = s->getLength() + 1;
    1073         UniChar *unibuf = new UniChar[ unilen ];
    1074         memset( unibuf, 0, unilen * sizeof( UniChar ) );
    1075         UniChar *tmpuni = unibuf;
    1076         const char *from = s->getCString() + 2;
    1077         size_t fromlen = s->getLength() - 2;
    1078         UniUconvToUcs( objUniBe, (void **)&from, &fromlen, &tmpuni, &unilen, &cSubs );
    1079         unilen = UniStrlen( unibuf );
    1080 
    1081         int liglen = uniLigaturesLength( unibuf );
    1082         if ( liglen > 0 )  // string contain ligature(s)
    1083         {
    1084             unsigned ulen_tmp = ( unilen + liglen + 1 ) * sizeof( UniChar );
    1085             char *uni_tmp = new char[ ulen_tmp ];
    1086             uniReplaceLigatures( unibuf, (UniChar *)uni_tmp );
    1087             delete unibuf;
    1088             unibuf = (UniChar *)uni_tmp;
    1089             unilen = UniStrlen( unibuf );
    1090         }
    1091         uniConvertSpChars( unibuf );
    1092 
    1093         size_t blen = ( unilen + 1 ) * 2;
    1094         char *b = (char *)SOMMalloc( blen );
    1095         memset( b, 0, blen );
    1096         char *bsav = b;
    1097         tmpuni = unibuf;
    1098         cSubs = 0;
    1099         UniUconvFromUcs( objSys, &tmpuni, &unilen, (void **)&b, &blen, &cSubs );
    1100         delete unibuf;
    1101         return bsav;
     1043        char *str = uniUtf16BEToSys( s->getCString(), s->getLength(), &objUniBe,
     1044                                     &objSys );
     1045        char *ret = somstrdup( str );
     1046        delete[] str;
     1047        return ret;
    11021048    }
    11031049
Note: See TracChangeset for help on using the changeset viewer.