Ignore:
Timestamp:
Nov 17, 2006, 10:42:35 PM (15 years ago)
Author:
Eugene Romanenko
Message:

optimized unicode conversion, fixes crash in uconv.dll (closes #97)

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/Lucide/SOURCE/plugins/ludoc/cpconv.cpp

    r113 r156  
    3333
    3434
    35 #define INCL_DOS
    36 #include <os2.h>
    37 
    38 #include <string.h>
    39 #include <malloc.h>
    40 #include <stdio.h>
    41 
    4235#include <uconv.h>
    4336
    4437
    45 class cpconv
    46 {
    47     protected:
    48         int  err;
    49         void *objtoucs;
    50         void *objfromucs;
    51     public:
    52         cpconv( int cpfrom, int cpto = 0 );
    53         cpconv( UniChar *cpfrom, UniChar *cpto );
    54         ~cpconv();
    55         int conv( int chfrom );
    56         int conv( const char **in, size_t *in_left, char **out, size_t *out_left );
    57 };
    58 
    59 cpconv::cpconv( UniChar *cpfrom, UniChar *cpto )
    60 {
    61     err = 0;
    62     int         rc = ULS_SUCCESS;
    63 
    64     rc = UniCreateUconvObject( cpfrom, &objtoucs );
    65     if (rc != ULS_SUCCESS)
    66     {
    67         err = 1;
    68         return;
    69     }
    70 
    71     uconv_attribute_t attr;
    72     UniQueryUconvObject(objtoucs, &attr, sizeof(attr), NULL, NULL, NULL);
    73     attr.converttype &= ~(CVTTYPE_CTRL7F | CVTTYPE_PATH);
    74     UniSetUconvObject(objtoucs, &attr);
    75 
    76     rc = UniCreateUconvObject( cpto, &objfromucs );
    77     if (rc != ULS_SUCCESS)
    78     {
    79         UniFreeUconvObject( objtoucs );
    80         err = 1;
    81         return;
    82     }
    83     UniQueryUconvObject(objfromucs, &attr, sizeof(uconv_attribute_t), NULL, NULL, NULL);
    84     attr.options = UCONV_OPTION_SUBSTITUTE_BOTH;
    85     UniSetUconvObject(objfromucs, &attr);
    86 }
    87 
    88 cpconv::cpconv( int cpfrom, int cpto )
    89 {
    90     err = 0;
    91 
    92     UniChar     ucs_code_page[12];
    93     size_t      num_elems = 12;
    94     int         rc = ULS_SUCCESS;
    95 
    96     rc = UniMapCpToUcsCp(cpfrom, ucs_code_page, num_elems);
    97     if (rc != ULS_SUCCESS)
    98     {
    99         err = 1;
    100         return;
    101     }
    102 
    103     rc = UniCreateUconvObject(ucs_code_page, &objtoucs);
    104     if (rc != ULS_SUCCESS)
    105     {
    106         err = 1;
    107         return;
    108     }
    109 
    110     uconv_attribute_t attr;
    111     UniQueryUconvObject(objtoucs, &attr, sizeof(attr), NULL, NULL, NULL);
    112     attr.converttype &= ~(CVTTYPE_CTRL7F | CVTTYPE_PATH);
    113     UniSetUconvObject(objtoucs, &attr);
    114 
    115     rc = UniMapCpToUcsCp(cpto, ucs_code_page, num_elems);
    116     if (rc != ULS_SUCCESS)
    117     {
    118         UniFreeUconvObject( objtoucs );
    119         err = 1;
    120         return;
    121     }
    122 
    123     rc = UniCreateUconvObject(ucs_code_page, &objfromucs);
    124     if (rc != ULS_SUCCESS)
    125     {
    126         UniFreeUconvObject( objtoucs );
    127         err = 1;
    128         return;
    129     }
    130     UniQueryUconvObject(objfromucs, &attr, sizeof(uconv_attribute_t), NULL, NULL, NULL);
    131     attr.options = UCONV_OPTION_SUBSTITUTE_BOTH;
    132     UniSetUconvObject(objfromucs, &attr);
    133 }
    134 
    135 cpconv::~cpconv()
    136 {
    137     if ( !err )
    138     {
    139         UniFreeUconvObject( objtoucs );
    140         UniFreeUconvObject( objfromucs );
    141     }
    142 }
    143 
    144 // convert one char
    145 int cpconv::conv( int chfrom )
    146 {
    147     int rc = ULS_SUCCESS;
    148     size_t ns = 0;
    149     int chto = 0;
    150 
    151     if ( err )  return chfrom;
    152 
    153     size_t len = 1;
    154     UniChar unichar;
    155     UniChar *punichar = &unichar;
    156     void *pchfrom = &chfrom;
    157     void *pchto = &chto;
    158 
    159     rc = UniUconvToUcs( objtoucs, &pchfrom, &len, &punichar, &len, &ns);
    160     if ( rc != ULS_SUCCESS )  return chfrom;
    161 
    162     len = 1;
    163     punichar = &unichar;
    164     ns = 0;
    165     rc = UniUconvFromUcs( objfromucs, &punichar, &len, &pchto, &len, &ns);
    166     if ( rc != ULS_SUCCESS )  return chfrom;
    167 
    168     return chto;
    169 }
    170 
    171 int cpconv::conv( const char **in, size_t *in_left, char **out, size_t *out_left )
    172 {
    173     int       rc;
    174     size_t    sl;
    175     size_t    nonid;
    176     UniChar  *ucs;
    177     UniChar  *orig_ucs;
    178     size_t    retval = 0;
    179 
    180     if (!in || !*in) {
    181         return 0;
    182     }
    183 
    184     sl =  *in_left;
    185     //ucs = new UniChar[ sl ];
    186     // have crashes in libc memmanager due to frequent alloc/free
    187     // use system malloc routines as workaround
    188     DosAllocMem( (PPVOID)&ucs, sl * sizeof( UniChar ), fALLOC );
    189     orig_ucs = ucs;
    190 
    191     rc = UniUconvToUcs( objtoucs, (void **)in, in_left, &ucs, &sl, &retval );
    192     if ( rc != 0 ) {
    193         //delete ucs;
    194         DosFreeMem( ucs );
    195         err = 1;
    196         return -1;
    197     }
    198 
    199     sl = ucs - orig_ucs;
    200     ucs = orig_ucs;
    201     rc = UniUconvFromUcs( objfromucs, &ucs, &sl, (void **)out, out_left, &nonid );
    202     //delete ucs;
    203     DosFreeMem( ucs );
    204 
    205     if ( rc != 0 ) {
    206         err = 1;
    207         return -1;
    208     }
    209 
    210     retval += nonid;
    211     return 0;
    212 }
    213 
    214 extern "C" LONG APIENTRY cnvUniToUTF8( const char **in, unsigned *in_left,
    215                                        char **out, unsigned *out_left )
    216 {
    217     cpconv c( 1200, 1208 );
    218     return c.conv( in, in_left, out, out_left );
    219 }
    220 
    221 extern "C" LONG APIENTRY cnvUniBEToUTF8( const char **in, unsigned *in_left,
    222                                          char **out, unsigned *out_left )
    223 {
    224     cpconv c( (UniChar *)(L"UCS-2@endian=big"), (UniChar *)(L"UTF-8") );
    225     return c.conv( in, in_left, out, out_left );
    226 }
    227 
    228 extern "C" LONG APIENTRY cnvUTF8ToUni( const char **in, unsigned *in_left,
    229                                        char **out, unsigned *out_left )
    230 {
    231     cpconv c( 1208, 1200 );
    232     return c.conv( in, in_left, out, out_left );
    233 }
    234 
    235 
    23638// Converts special non-ascii chars to suitable ascii chars
    237 static void convSpchars( UniChar *uni )
     39extern "C" VOID APIENTRY uniConvertSpChars( UniChar *uni )
    23840{
    23941    while ( *uni )
     
    25254                *uni = 0x0022; // "
    25355                break;
     56            case 0x2013:
    25457            case 0x2014:
    25558                *uni = 0x002D; // -
     
    309112// length to fit the string with converted ligatures.
    310113// If no ligatures in string - returns zero.
    311 static int ligaturesLength( UniChar *str )
     114extern "C" LONG APIENTRY uniLigaturesLength( UniChar *str )
    312115{
    313     int llen = 0;
     116    LONG llen = 0;
    314117    while ( *str != 0 ) {
    315118        llen += isLigature( *str++ );
     
    320123// replaces ligatures in src into dst
    321124// src remains unchanged
    322 static void replLigatures( UniChar *src, UniChar *dst )
     125extern "C" VOID APIENTRY uniReplaceLigatures( UniChar *src, UniChar *dst )
    323126{
    324127    while ( *src != 0 )
     
    337140}
    338141
    339 extern "C" LONG APIENTRY cnvUTF8ToSys( const char **in, unsigned *in_left,
    340                                        char **out, unsigned *out_left )
    341 {
    342     unsigned ulen = ( (*in_left) * 2 ) + 2;
    343     char *uni = new char[ ulen ];
    344     memset( uni, 0, ulen );
    345     char *savuni = uni;
    346     unsigned savulen = ulen;
    347     cnvUTF8ToUni( in, in_left, &uni, &ulen );
    348     uni = savuni;
    349     ulen = savulen;
    350     int liglen = ligaturesLength( (UniChar *)uni );
    351     if ( liglen > 0 )  // string contain ligature(s)
    352     {
    353         unsigned ulen_tmp = ulen + ( liglen * 2 );
    354         char *uni_tmp = new char[ ulen_tmp ];
    355         replLigatures( (UniChar *)uni, (UniChar *)uni_tmp );
    356         delete uni;
    357         uni = uni_tmp;
    358         ulen = ulen_tmp;
    359     }
    360     convSpchars( (UniChar *)uni );
    361     cpconv c( 1200 );
    362     LONG rc = c.conv( (const char **)&uni, &ulen, out, out_left );
    363     uni = savuni;
    364     delete uni;
    365     return rc;
    366 }
    367 
    368 extern "C" LONG APIENTRY cnvUniBEToSys( const char **in, unsigned *in_left,
    369                                          char **out, unsigned *out_left )
    370 {
    371     unsigned ulen = ( (*in_left) * 2 ) + 2;
    372     char *uni = new char[ ulen ];
    373     memset( uni, 0, ulen );
    374     char *savuni = uni;
    375     unsigned savulen = ulen;
    376     cpconv c1( (UniChar *)(L"UCS-2@endian=big"), (UniChar *)(L"UCS-2") );
    377     c1.conv( in, in_left, &uni, &ulen );
    378     uni = savuni;
    379     ulen = savulen;
    380     convSpchars( (UniChar *)uni );
    381     cpconv c2( 1200 );
    382     LONG rc = c2.conv( (const char **)&uni, &ulen, out, out_left );
    383     uni = savuni;
    384     delete uni;
    385     return rc;
    386 }
    387 
    388 extern "C" LONG APIENTRY cnvUniToSys( const char **in, unsigned *in_left,
    389                                       char **out, unsigned *out_left )
    390 {
    391     convSpchars( (UniChar *)in );
    392     cpconv c( 1200 );
    393     return c.conv( (const char **)&in, in_left, out, out_left );
    394 }
    395 
    396 extern "C" LONG APIENTRY cnvSysToUCS2( const char **in, unsigned *in_left,
    397                                        char **out, unsigned *out_left )
    398 {
    399     cpconv c( (UniChar *)(L""), (UniChar *)(L"UCS-2") );
    400     return c.conv( in, in_left, out, out_left );
    401 }
    402 
    403 
    404 // test
    405 /*void main()
    406 {
    407     const char *testutf8 = "test UTF-8  ’¥áâ! à®¢¥àª !";
    408     char buf[ 100 ];
    409     memset( buf, 0, sizeof( buf ) );
    410     char *bufsav = buf;
    411     char *buf1 = buf;
    412     unsigned in_len = strlen( testutf8 );
    413     unsigned out_len = sizeof( buf );
    414 
    415     cnvUTF8ToUCS4( &testutf8, &in_len, &buf1, &out_len );
    416 
    417     for ( int i = 0; i<100; i++ )
    418     {
    419         printf( ":%d:", (int)bufsav[i] );
    420     }
    421     printf( "\n" );
    422 }
    423 */
Note: See TracChangeset for help on using the changeset viewer.