aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/harfbuzz/contrib/tables/category-parse.py
blob: 6818c1dbfcf22ae9cad5857b3e7b958ba0bffc11 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import sys
from unicode_parse_common import *

# http://www.unicode.org/Public/5.1.0/ucd/extracted/DerivedGeneralCategory.txt

category_to_harfbuzz = {
  'Mn': 'HB_Mark_NonSpacing',
  'Mc': 'HB_Mark_SpacingCombining',
  'Me': 'HB_Mark_Enclosing',

  'Nd': 'HB_Number_DecimalDigit',
  'Nl': 'HB_Number_Letter',
  'No': 'HB_Number_Other',

  'Zs': 'HB_Separator_Space',
  'Zl': 'HB_Separator_Line',
  'Zp': 'HB_Separator_Paragraph',

  'Cc': 'HB_Other_Control',
  'Cf': 'HB_Other_Format',
  'Cs': 'HB_Other_Surrogate',
  'Co': 'HB_Other_PrivateUse',
  'Cn': 'HB_Other_NotAssigned',

  'Lu': 'HB_Letter_Uppercase',
  'Ll': 'HB_Letter_Lowercase',
  'Lt': 'HB_Letter_Titlecase',
  'Lm': 'HB_Letter_Modifier',
  'Lo': 'HB_Letter_Other',

  'Pc': 'HB_Punctuation_Connector',
  'Pd': 'HB_Punctuation_Dash',
  'Ps': 'HB_Punctuation_Open',
  'Pe': 'HB_Punctuation_Close',
  'Pi': 'HB_Punctuation_InitialQuote',
  'Pf': 'HB_Punctuation_FinalQuote',
  'Po': 'HB_Punctuation_Other',

  'Sm': 'HB_Symbol_Math',
  'Sc': 'HB_Symbol_Currency',
  'Sk': 'HB_Symbol_Modifier',
  'So': 'HB_Symbol_Other',
}

def main(infile, outfile):
  ranges = unicode_file_parse(infile, category_to_harfbuzz)
  ranges = sort_and_merge(ranges)

  print >>outfile, '// Generated from Unicode script tables\n'
  print >>outfile, '#ifndef CATEGORY_PROPERTIES_H_'
  print >>outfile, '#define CATEGORY_PROPERTIES_H_\n'
  print >>outfile, '#include <stdint.h>'
  print >>outfile, '#include "harfbuzz-external.h"\n'
  print >>outfile, 'struct category_property {'
  print >>outfile, '  uint32_t range_start;'
  print >>outfile, '  uint32_t range_end;'
  print >>outfile, '  HB_CharCategory category;'
  print >>outfile, '};\n'
  print >>outfile, 'static const struct category_property category_properties[] = {'
  for (start, end, value) in ranges:
    print >>outfile, '  {0x%x, 0x%x, %s},' % (start, end, value)
  print >>outfile, '};\n'
  print >>outfile, 'static const unsigned category_properties_count = %d;\n' % len(ranges)
  print >>outfile, '#endif  // CATEGORY_PROPERTIES_H_'

if __name__ == '__main__':
  if len(sys.argv) != 3:
    print 'Usage: %s <input .txt> <output .h>' % sys.argv[0]
  else:
    main(file(sys.argv[1], 'r'), file(sys.argv[2], 'w+'))