aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/harfbuzz/contrib/tables/unicode_parse_common.py
blob: ac26ecae3d935de1926ed531abb263102527a548 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def lines_get(f):
  '''Parse a file like object, removing comments and returning a list of
     lines.'''
  def cut_comment(line):
    first_hash = line.find('#')
    if first_hash == -1:
      return line
    return line[:first_hash]

  return [x for x in [cut_comment(x[:-1]) for x in f.readlines()] if len(x)]

def line_split(line):
  '''Split a line based on a semicolon separator.'''
  def normalise(word):
    return word.lstrip().rstrip()
  return [normalise(x) for x in line.split(';')]

def codepoints_parse(token):
  '''Parse a Unicode style code-point range. Return either a single value or a
     tuple of (start, end) for a range of code-points.'''
  def fromHex(token):
    return int(token, 16)
  parts = token.split('..')
  if len(parts) == 2:
    return (fromHex(parts[0]), fromHex(parts[1]))
  elif len(parts) == 1:
    return fromHex(parts[0])
  else:
    raise ValueError(token)

def unicode_file_parse(input, map, default_value = None):
  '''Parse a file like object, @input where the first column is a code-point
     range and the second column is mapped via the given dict, @map.'''
  ranges = []
  tokens = [line_split(x) for x in lines_get(input)]
  for line in tokens:
    if len(line) == 2:
      codepoints = codepoints_parse(line[0])
      value = map[line[1]]
      if value == default_value:
        continue

      if type(codepoints) == int:
        codepoints = (codepoints, codepoints)

      ranges.append((codepoints[0], codepoints[1], value))
    else:
      raise ValueError(line)

  return ranges

def sort_and_merge(ranges):
  '''Given a list of (start, end, value), merge elements where the ranges are
     continuous and the values are the same.'''
  output = []
  ranges.sort()
  current = None
  for v in ranges:
    if current is None:
      current = v
      continue
    if current[1] + 1 == v[0] and current[2] == v[2]:
      current = (current[0], v[1], v[2])
    else:
      output.append(current)
      current = v
  if current is not None:
    output.append(current)

  return output