source: trunk/poppler/mypoppler/poppler/gen-unicode-tables.py @ 461

Last change on this file since 461 was 461, checked in by Silvan Scherrer, 11 years ago

poppler update to 0.14.2

File size: 1.2 KB
Line 
1UNICODE_LAST_CHAR_PART1 = 0x2FAFF
2HANGUL_S_BASE = 0xAC00
3HANGUL_S_COUNT = 19 * 21 * 28
4import unicodedata
5
6print """// Generated by gen-unicode-tables.py
7
8typedef struct {
9  Unicode character;
10  int length;
11  int offset;
12} decomposition;
13"""
14
15decomp_table = []
16max_index = 0
17decomp_expansion_index = {}
18decomp_expansion = []
19for u in xrange(0, UNICODE_LAST_CHAR_PART1):
20        if (u >= HANGUL_S_BASE and u < HANGUL_S_BASE + HANGUL_S_COUNT):
21                continue
22        norm = tuple(map(ord, unicodedata.normalize("NFKD", unichr(u))))
23        if norm != (u,):
24                try: 
25                        i = decomp_expansion_index[norm]
26                        decomp_table.append((u, len(norm), i))
27                except KeyError:
28                        decomp_table.append((u, len(norm), max_index))
29                        decomp_expansion_index[norm] = max_index
30                        decomp_expansion.append((norm, max_index))
31                        max_index += len(norm)
32print "#define DECOMP_TABLE_LENGTH %d\n" % len(decomp_table)
33print "static const decomposition decomp_table[] = {\n%s\n};\n" % ", \n".join(
34                "  { 0x%x, %d, %d }" % (character, length, offset)
35                for character, length, offset in decomp_table)
36print "static const Unicode decomp_expansion[] = {\n%s\n};\n" % ", \n".join(
37                %s /* offset %d */ " % (", ".join("0x%x" % u for u in norm), 
38                        index) for norm, index in decomp_expansion)
Note: See TracBrowser for help on using the repository browser.