aboutsummaryrefslogtreecommitdiffhomepage
path: root/experimental/PdfViewer/spec2def.py
blob: ed2ce0120ad1029d4ab669da48d44348bcfc1bf1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
#!/usr/local/bin/python
# coding: utf-8

import sys
import re

# TODO(edisonn): put processed part of file in a new file
# put unprocessed part, in a new file, so we see what we miss
# keep blank lines, and generate a version without the blank lines

#TODO (edisonn): deal manually with tables that don't have "KEY TYPE VALUE' header, e.g. 
#         TABLE 7.11 Restrictions on the entries in a soft-mask image dictionary
#KEY                       RESTRICTION


 
lines = 0
table = ''
tableHeaderFound = False
tableLine = 0
tableRow = 0
columnWidth = []
columnValues = None
mustFollowTableHeader = False
emitedDitionaryName = ''

knownTypes = set([
'(any)',
unicode('undefined', 'utf8'),
'(undefined)',
'(various)',
'array',
'or',
'boolean',
'date',
'dictionary',
'function',
'integer',
unicode('file', 'utf8'),
'file',
unicode('specification', 'utf8'),
'specification',
'name',
'tree',
'number',
'rectangle',
'stream',
'string',
'text',
',',
' '
])

# TODO(edisonn): add a third element in the vector, the base class, by default it is Dictionary
# TODO(edisonn): add overrides for types map<field_name, type_name>
# e.g. ,{'Resources', 'ResourceDictionary'}
# TODO(edisonn): can be added one by one, or extracted from documentation

tableToClassName = {
'TABLE 3.4': ['StreamCommonDictionary', 'Entries common to all stream dictionaries'],
'TABLE 3.7': ['LzwdecodeAndFlatedecodeFiltersDictionary', 'Optional parameters for LZWDecode and FlateDecode filters'],
'TABLE 3.9': ['CcittfaxdecodeFilterDictionary', 'Optional parameters for the CCITTFaxDecode filter'],
'TABLE 3.10': ['Jbig2DecodeFilterDictionary', 'Optional parameter for the JBIG2Decode filter'],
'TABLE 3.11': ['DctdecodeFilterDictionary', 'Optional parameter for the DCTDecode filter'],
'TABLE 3.12': ['FileTrailerDictionary', 'Entries in the file trailer dictionary'],
'TABLE 3.13': ['EncryptionCommonDictionary', 'Entries common to all encryption dictionaries'],
'TABLE 3.14': ['StandardSecurityHandlerDictionary', 'Additional encryption dictionary entries for the standard security handler'],
'TABLE 3.16': ['CatalogDictionary', 'Entries in the catalog dictionary'],
'TABLE 3.17': ['PageTreeNodeDictionary', 'Required entries in a page tree node'],
'TABLE 3.18': ['PageObjectDictionary', 'Entries in a page object'],
'TABLE 3.19': ['NameDictionary', 'Entries in the name dictionary'],
'TABLE 3.21': ['ResourceDictionary', 'Entries in a resource dictionary'],
'TABLE 3.23': ['NameTreeNodeDictionary', 'Entries in a name tree node dictionary'],
'TABLE 3.25': ['NumberTreeNodeDictionary', 'Entries in a number tree node dictionary'],
'TABLE 3.26': ['FunctionCommonDictionary', 'Entries common to all function dictionaries'],
'TABLE 3.27': ['Type0FunctionDictionary', 'Additional entries specific to a type 0 function dictionary'],
'TABLE 3.28': ['Type2FunctionDictionary', 'Additional entries specific to a type 2 function dictionary'],
'TABLE 3.29': ['Type3FunctionDictionary', 'Additional entries specific to a type 3 function dictionary'],
'TABLE 3.32': ['FileSpecificationDictionary', 'Entries in a file specification dictionary'],
'TABLE 3.33': ['EmbeddedFileStreamDictionary', 'Additional entries in an embedded file stream dictionary'],
'TABLE 3.34': ['EmbeddedFileParameterDictionary', 'Entries in an embedded file parameter dictionary'],
'TABLE 3.35': ['MacOsFileInformationDictionary', 'Entries in a Mac OS file information dictionary'],
'TABLE 4.8': ['GraphicsStateDictionary', 'Entries in a graphics state parameter dictionary'],
'TABLE 4.13': ['CalgrayColorSpaceDictionary', 'Entries in a CalGray color space dictionary'],
'TABLE 4.14': ['CalrgbColorSpaceDictionary', 'Entries in a CalRGB color space dictionary'],
'TABLE 4.15': ['LabColorSpaceDictionary', 'Entries in a Lab color space dictionary'],
'TABLE 4.16': ['IccProfileStreamDictionary', 'Additional entries specific to an ICC profile stream dictionary'],
'TABLE 4.20': ['DeviceNColorSpaceDictionary', 'Entry in a DeviceN color space attributes dictionary'],
'TABLE 4.22': ['Type1PatternDictionary', 'Additional entries specific to a type 1 pattern dictionary', '', {'PatternType': '[datatypes.PdfInteger(1)]'}],
'TABLE 4.23': ['Type2PatternDictionary', 'Entries in a type 2 pattern dictionary'],
'TABLE 4.25': ['ShadingDictionary', 'Entries common to all shading dictionaries'],
'TABLE 4.26': ['Type1ShadingDictionary', 'Additional entries specific to a type 1 shading dictionary', 'ShadingDictionary'],
'TABLE 4.27': ['Type2ShadingDictionary', 'Additional entries specific to a type 2 shading dictionary', 'ShadingDictionary'],
'TABLE 4.28': ['Type3ShadingDictionary', 'Additional entries specific to a type 3 shading dictionary', 'ShadingDictionary'],
'TABLE 4.29': ['Type4ShadingDictionary', 'Additional entries specific to a type 4 shading dictionary', 'ShadingDictionary'],
'TABLE 4.30': ['Type5ShadingDictionary', 'Additional entries specific to a type 5 shading dictionary', 'ShadingDictionary'],
'TABLE 4.31': ['Type6ShadingDictionary', 'Additional entries specific to a type 6 shading dictionary', 'ShadingDictionary'],
'TABLE 4.35': ['ImageDictionary', 'Additional entries specific to an image dictionary', 'XObjectDictionary', {'Subtype': '[datatypes.PdfName(\'Image\')]'}],
'TABLE 4.37': ['AlternateImageDictionary', 'Entries in an alternate image dictionary'],
'TABLE 4.41': ['Type1FormDictionary', 'Additional entries specific to a type 1 form dictionary', 'XObjectDictionary', {'Subtype': '[datatypes.PdfName(\'Form\')]'}],
'TABLE 4.42': ['GroupAttributesDictionary', 'Entries common to all group attributes dictionaries'],
'TABLE 4.43': ['ReferenceDictionary', 'Entries in a reference dictionary'],
'TABLE 4.44': ['PSXobjectDictionary', 'Additional entries specific to a PostScript XObject dictionary'],
'TABLE 5.8': ['Type1FontDictionary', 'Entries in a Type 1 font dictionary', 'FontDictionary', {'Subtype': '[datatypes.PdfName(\'Type1\')]'}],
'TABLE 5.9': ['Type3FontDictionary', 'Entries in a Type 3 font dictionary', 'Type1FontDictionary', {'Subtype': '[datatypes.PdfName(\'Type3\')]'}],
'TABLE 5.11': ['EncodingDictionary', 'Entries in an encoding dictionary'],
'TABLE 5.12': ['CIDSystemInfoDictionary', 'Entries in a CIDSystemInfo dictionary'],
'TABLE 5.13': ['CIDFontDictionary', 'Entries in a CIDFont dictionary', '', {'Subtype': '[datatypes.PdfName(\'CIDFontType0\'), datatypes.PdfName(\'CIDFontType2\')]'}],
'TABLE 5.16': ['CMapDictionary', 'Additional entries in a CMap dictionary'],
'TABLE 5.17': ['Type0FontDictionary', 'Entries in a Type 0 font dictionary', 'FontDictionary', {'Subtype': '[datatypes.PdfName(\'Type0\')]'}],
'TABLE 5.18': ['FontDescriptorDictionary', 'Entries common to all font descriptors', '', {'Type': '[datatypes.PdfName(\'FontDescriptor\')]'}],
'TABLE 5.20': ['CIDFontDescriptorDictionary', 'Additional font descriptor entries for CIDFonts'],
'TABLE 5.23': ['EmbeddedFontStreamDictionary', 'Additional entries in an embedded font stream dictionary'],
'TABLE 6.3': ['Type1HalftoneDictionary', 'Entries in a type 1 halftone dictionary'],
'TABLE 6.4': ['Type6HalftoneDictionary', 'Additional entries specific to a type 6 halftone dictionary'],
'TABLE 6.5': ['Type10HalftoneDictionary', 'Additional entries specific to a type 10 halftone dictionary'],
'TABLE 6.6': ['Type16HalftoneDictionary', 'Additional entries specific to a type 16 halftone dictionary'],
'TABLE 6.7': ['Type5HalftoneDictionary', 'Entries in a type 5 halftone dictionary'],
'TABLE 7.10': ['SoftMaskDictionary', 'Entries in a soft-mask dictionary', '', {'S': '[datatypes.PdfName(\'Alpha\'), datatypes.PdfName(\'Luminosity\')]'}],
'TABLE 7.12': ['SoftMaskImageDictionary', 'Additional entry in a soft-mask image dictionary', 'ImageDictionary', {'Subtype': '[datatypes.PdfName(\'Image\')]', 'ColorSpace': '[datatypes.PdfName(\'DeviceGray\'), datatypes.PdfName(\'Gray\')]'}],
'TABLE 7.13': ['TransparencyGroupDictionary', 'Additional entries specific to a transparency group attributes dictionary', '', {'S': '[datatypes.PdfName(\'Transparency\')]'}],
'TABLE 8.1': ['ViewerPreferencesDictionary', 'Entries in a viewer preferences dictionary'],
'TABLE 8.3': ['OutlineDictionary', 'Entries in the outline dictionary'],
'TABLE 8.4': ['OutlineItemDictionary', 'Entries in an outline item dictionary'],
'TABLE 8.6': ['PageLabelDictionary', 'Entries in a page label dictionary'],
'TABLE 8.7': ['ThreadDictionary', 'Entries in a thread dictionary'],
'TABLE 8.8': ['BeadDictionary', 'Entries in a bead dictionary'],
'TABLE 8.9': ['TransitionDictionary', 'Entries in a transition dictionary'],
'TABLE 8.10': ['AnnotationDictionary', 'Entries common to all annotation dictionaries'],
'TABLE 8.12': ['BorderStyleDictionary', 'Entries in a border style dictionary'],
'TABLE 8.13': ['AppearanceDictionary', 'Entries in an appearance dictionary'],
'TABLE 8.15': ['TextAnnotationDictionary', 'Additional entries specific to a text annotation'],
'TABLE 8.16': ['ALinkAnnotationDictionary', 'Additional entries specific to a link annotation'],
'TABLE 8.17': ['FreeTextAnnotationDictionary', 'Additional entries specific to a free text annotation'],
'TABLE 8.18': ['LineAnnotationDictionary', 'Additional entries specific to a line annotation'],
'TABLE 8.20': ['SquareOrCircleAnnotation', 'Additional entries specific to a square or circle annotation'],
'TABLE 8.21': ['MarkupAnnotationsDictionary', 'Additional entries specific to markup annotations'],
'TABLE 8.22': ['RubberStampAnnotationDictionary', 'Additional entries specific to a rubber stamp annotation'],
'TABLE 8.23': ['InkAnnotationDictionary', 'Additional entries specific to an ink annotation'],
'TABLE 8.24': ['PopUpAnnotationDictionary', 'Additional entries specific to a pop-up annotation'],
'TABLE 8.25': ['FileAttachmentAnnotationDictionary', 'Additional entries specific to a file attachment annotation'],
'TABLE 8.26': ['SoundAnnotationDictionary', 'Additional entries specific to a sound annotation'],
'TABLE 8.27': ['MovieAnnotationDictionary', 'Additional entries specific to a movie annotation'],
'TABLE 8.28': ['WidgetAnnotationDictionary', 'Additional entries specific to a widget annotation'],
'TABLE 8.29': ['ActionDictionary', 'Entries common to all action dictionaries'],
'TABLE 8.30': ['AnnotationActionsDictionary', 'Entries in an annotation\'s additional-actions dictionary'],
'TABLE 8.31': ['PageObjectActionsDictionary', 'Entries in a page object\'s additional-actions dictionary'],
'TABLE 8.32': ['FormFieldActionsDictionary', 'Entries in a form field\'s additional-actions dictionary'],
'TABLE 8.33': ['DocumentCatalogActionsDictionary', 'Entries in the document catalog\'s additional-actions dictionary'],
'TABLE 8.35': ['GoToActionDictionary', 'Additional entries specific to a go-to action'],
'TABLE 8.36': ['RemoteGoToActionDictionary', 'Additional entries specific to a remote go-to action'],
'TABLE 8.37': ['LaunchActionDictionary', 'Additional entries specific to a launch action'],
'TABLE 8.38': ['WindowsLaunchActionDictionary', 'Entries in a Windows launch parameter dictionary'],
'TABLE 8.39': ['ThreadActionDictionary', 'Additional entries specific to a thread action'],
'TABLE 8.40': ['URIActionDictionary', 'Additional entries specific to a URI action'],
'TABLE 8.41': ['URIDictionary', 'Entry in a URI dictionary'],
'TABLE 8.42': ['SoundActionDictionary', 'Additional entries specific to a sound action'],
'TABLE 8.43': ['MovieActionDictionary', 'Additional entries specific to a movie action'],
'TABLE 8.44': ['HideActionDictionary', 'Additional entries specific to a hide action'],
'TABLE 8.46': ['NamedActionsDictionary', 'Additional entries specific to named actions'],
'TABLE 8.47': ['InteractiveFormDictionary', 'Entries in the interactive form dictionary'],
'TABLE 8.49': ['FieldDictionary', 'Entries common to all field dictionaries'],
'TABLE 8.51': ['VariableTextFieldDictionary', 'Additional entries common to all fields containing variable text'],
'TABLE 8.52': ['AppearanceCharacteristicsDictionary', 'Entries in an appearance characteristics dictionary'],
'TABLE 8.54': ['CheckboxFieldDictionary', 'Additional entry specific to a checkbox field'],
'TABLE 8.55': ['RadioButtonFieldDictionary', 'Additional entry specific to a radio button field'],
'TABLE 8.57': ['TextFieldDictionary', 'Additional entry specific to a text field'],
'TABLE 8.59': ['ChoiceFieldDictionary', 'Additional entries specific to a choice field'],
'TABLE 8.60': ['SignatureDictionary', 'Entries in a signature dictionary'],
'TABLE 8.61': ['SubmitFormActionDictionary', 'Additional entries specific to a submit-form action'],
'TABLE 8.63': ['ResetFormActionDictionary', 'Additional entries specific to a reset-form action'],
'TABLE 8.65': ['ImportDataActionDictionary', 'Additional entries specific to an import-data action'],
'TABLE 8.66': ['JavascriptActionDictionary', 'Additional entries specific to a JavaScript action'],
'TABLE 8.67': ['FDFTrailerDictionary', 'Entry in the FDF trailer dictionary'],
'TABLE 8.68': ['FDFCatalogDictionary', 'Entries in the FDF catalog dictionary'],
'TABLE 8.69': ['FDFDictionary', 'Entries in the FDF dictionary'],
'TABLE 8.70': ['EncryptedEmbeddedFileStreamDictionary', 'Additional entry in an embedded file stream dictionary for an encrypted FDF file'],
'TABLE 8.71': ['JavascriptDictionary', 'Entries in the JavaScript dictionary'],
'TABLE 8.72': ['FDFFieldDictionary', 'Entries in an FDF field dictionary'],
'TABLE 8.73': ['IconFitDictionary', 'Entries in an icon fit dictionary'],
'TABLE 8.74': ['FDFPageDictionary', 'Entries in an FDF page dictionary'],
'TABLE 8.75': ['FDFTemplateDictionary', 'Entries in an FDF template dictionary'],
'TABLE 8.76': ['FDFNamedPageReferenceDictionary', 'Entries in an FDF named page reference dictionary'],
'TABLE 8.77': ['FDFFileAnnotationDictionary', 'Additional entry for annotation dictionaries in an FDF file'],
'TABLE 8.78': ['SoundObjectDictionary', 'Additional entries specific to a sound object'],
'TABLE 8.79': ['MovieDictionary', 'Entries in a movie dictionary'],
'TABLE 8.80': ['MovieActivationDictionary', 'Entries in a movie activation dictionary'],
'TABLE 9.2': ['DocumentInformationDictionary', 'Entries in the document information dictionary'],
'TABLE 9.3': ['MetadataStreamDictionary', 'Additional entries in a metadata stream dictionary'],
'TABLE 9.4': ['ComponentsWithMetadataDictionary', 'Additional entry for components having metadata'],
'TABLE 9.6': ['PagePieceDictionary', 'Entries in a page-piece dictionary'],
'TABLE 9.7': ['ApplicationDataDictionary', 'Entries in an application data dictionary'],
'TABLE 9.9': ['StructureTreeRootDictionary', 'Entries in the structure tree root'],
'TABLE 9.10': ['StructureElementDictionary', 'Entries in a structure element dictionary'],
'TABLE 9.11': ['MarkedContentReferenceDictionary', 'Entries in a marked-content reference dictionary'],
'TABLE 9.12': ['ObjectReferenceDictionary', 'Entries in an object reference dictionary'],
'TABLE 9.13': ['StructureElementAccessDictionary', 'Additional dictionary entries for structure element access'],
'TABLE 9.14': ['AttributeObjectDictionary', 'Entry common to all attribute objects'],
'TABLE 9.15': ['MarkInformationDictionary', 'Entry in the mark information dictionary'],
'TABLE 9.16': ['ArtifactsDictionary', 'Property list entries for artifacts'],
'TABLE 9.27': ['StandardStructureDictionary', 'Standard layout attributes common to all standard structure types'],
'TABLE 9.28': ['BlockLevelStructureElementsDictionary', 'Additional standard layout attributes specific to block-level structure elements'],
'TABLE 9.29': ['InlineLevelStructureElementsDictionary', 'Standard layout attributes specific to inline-level structure elements'],
'TABLE 9.30': ['ListAttributeDictionary', 'Standard list attribute'],
'TABLE 9.31': ['TableAttributesDictionary', 'Standard table attributes'],
'TABLE 9.32': ['WebCaptureInformationDictionary', 'Entries in the Web Capture information dictionary'],
'TABLE 9.33': ['WebCaptureDictionary', 'Entries common to all Web Capture content sets'],
'TABLE 9.34': ['WebCapturePageSetDictionary', 'Additional entries specific to a Web Capture page set'],
'TABLE 9.35': ['WebCaptureImageSetDictionary', 'Additional entries specific to a Web Capture image set'],
'TABLE 9.36': ['SourceInformationDictionary', 'Entries in a source information dictionary'],
'TABLE 9.37': ['URLAliasDictionary', 'Entries in a URL alias dictionary'],
'TABLE 9.38': ['WebCaptureCommandDictionary', 'Entries in a Web Capture command dictionary'],
'TABLE 9.40': ['WebCaptureCommandSettingsDictionary', 'Entries in a Web Capture command settings dictionary'],
'TABLE 9.41': ['BoxColorInformationDictionary', 'Entries in a box color information dictionary'],
'TABLE 9.42': ['BoxStyleDictionary', 'Entries in a box style dictionary'],
'TABLE 9.43': ['PrinterMarkAnnotationDictionary', 'Additional entries specific to a printer\'s mark annotation'],
'TABLE 9.44': ['PrinterMarkFormDictionary', 'Additional entries specific to a printer\'s mark form dictionary'],
'TABLE 9.45': ['SeparationDictionary', 'Entries in a separation dictionary'],
'TABLE 9.46': ['PDF_XOutputIntentDictionary', 'Entries in a PDF/X output intent dictionary'],
'TABLE 9.47': ['TrapNetworkAnnotationDictionary', 'Additional entries specific to a trap network annotation'],
'TABLE 9.48': ['TrapNetworkAppearanceStreamDictionary', 'Additional entries specific to a trap network appearance stream'],
'TABLE 9.49': ['OpiVersionDictionary', 'Entry in an OPI version dictionary'],
}

classTree = {
}

def buildKnownDictionaries():
  global tableToClassName
  global knownTypes
  
  ret = {}
  for e in tableToClassName:
    ret[tableToClassName[e][0]] = ''
    knownTypes.add(tableToClassName[e][0])
  
  return ret

knownDictionaries = buildKnownDictionaries()

def acceptType(val):
  global knownTypes
  
  ret = val
  
  for item in knownTypes:
    ret = ret.replace(item, '')
    
  return ret == ''


def inTable():
  global tableHeaderFound
  return tableHeaderFound    

def tableDescriptionFound(desc): 
  global table
  table = desc.strip()    

def tableHasHeader(): 
  global table
  global tableHeaderFound

  tableHeaderFound = True
  #print table    

def fix(val):
  ret = val
  
  # fix unicode chars
  ret = ret.replace(unicode('fi', 'utf8'), 'fi')
  ret = ret.replace(u'\u201c', '\"')
  ret = ret.replace(u'\u201d', '\"')
  ret = ret.replace(u'\u2019', '\'')
  ret = ret.replace(u'\ufb02', 'fl')
  ret = ret.replace(u'\xae', '(R)')
  ret = ret.replace(u'\u2026', '...')
  ret = ret.replace(u'\xd7', 'x')
  ret = ret.replace(u'\u2212', '-')
  ret = ret.replace(u'\u2264', '<=')
  ret = ret.replace(u'\u2014', '-')
  ret = ret.replace(u'\u2013', '\'')
  ret = ret.replace(u'\u2022', '*')
  ret = ret.replace(u'\xb5', 'mu')
  ret = ret.replace(u'\xf7', '/')
  ret = ret.replace(u'\xc4', 'A')
  ret = ret.replace(u'\xc5', 'A')
  ret = ret.replace(u'\u2122', '(TM)')


  # how enable to emit this a python string
  ret = ret.replace('\'', '\\\'')
  ret = ret.replace('\n', '\\n')

  
  return ret
  
def commitRow(fspecPy):
  global columnValues
  global emitedDitionaryName
  global table
  global tableToClassName
  global classTree
  global tableKey
  
  
  if columnValues == None:
    return
  
  #print columnValues
  
  lastClosed = columnValues[2].find(')')
  if lastClosed < 0:
    print 'ERRRRRRRRRRRRRRROR'
    print columnValues
    return
    
  spec = columnValues[2][:lastClosed + 1]
  spec = spec.replace('(', ';')
  spec = spec.replace(')', ';')
  spec = spec.strip(';')
  
  specs = spec.split(';')

  # clearly required, but it can be required with conditions. don't handle this ones here, but manually  
  required = specs[0] == 'Required' 
  
  inheritable = False
  version = ''
  for s in specs:
    if s.strip() == 'inheritable' or s.strip() == 'Inheritable':
      inheritable = True
    elif re.match('^PDF [0-9]*[\.[0-9]*]*', s.strip()):
      version = s.strip()
    elif s != 'Required':
      required = False
      
  #print spec
  #print specs
  #print required
  #print inheritable
  #print version
  #print columnValues
  
  columnValues = [fix(columnValues[0]), fix(columnValues[1]), fix(columnValues[2])]
  
  tableKey = re.search('(TABLE [0-9].[0-9][0-9]?)', table).group(1)

  if emitedDitionaryName == '':
    table = fix(table)
    
    #print table
    emitedDitionaryName = 'foo'
    e = re.search('[Entries|Entry] in [a-z]* (.* dictionary)', table)
    a = re.search('Additional [a-z]* in a[n]? (.* dictionary)', table)
    s = re.search('Additional [a-z]* (.*)', table)
    c = re.search('[Entries|Entry] common to all (.*)', table)
    o1 = re.search('Optional parameter[s]? for the (.*)', table)
    o2 = re.search('Optional parameter[s]? for (.*)', table)
    t = re.search('.*ntries in [a-z]* (.*)', table)

    r = re.search('Property list entries for (.*)', table)
    st = re.search('Standard (.*)', table)
    
    if e:
      emitedDitionaryName = e.group(1).title().replace(' ', '')
      #print emitedDitionaryName
    elif a:
      emitedDitionaryName = a.group(1).title().replace(' ', '')
      #print emitedDitionaryName
    elif s:
      emitedDitionaryName = s.group(1).title().replace(' ', '')
      #print emitedDitionaryName
    elif c:
      emitedDitionaryName = c.group(1).title().replace(' ', '') + 'Common'
      #print emitedDitionaryName
    elif o1:
      emitedDitionaryName = o1.group(1).title().replace(' ', '') + 'OptionalParameters'
      #print emitedDitionaryName
    elif o2:
      emitedDitionaryName = o2.group(1).title().replace(' ', '') + 'OptionalParameters'
      #print emitedDitionaryName
    elif t:
      emitedDitionaryName = t.group(1).title().replace(' ', '') + 'Dictionary'
      #print emitedDitionaryName
    elif r:
      emitedDitionaryName = r.group(1).title().replace(' ', '') + 'Dictionary'
      #print emitedDitionaryName
    elif st:
      emitedDitionaryName = st.group(1).title().replace(' ', '')  + 'Dictionary'
      #print emitedDitionaryName
    #else:
      #print table
    
    #print tableKey
    #print('\'' + tableKey + '\': [\'' + emitedDitionaryName + '\', \'' + table[len(tableKey) + 1:] + '\'],')

    emitedDitionaryName = tableToClassName[tableKey][0]
    comment = fix(tableToClassName[tableKey][1])
    
    
    if len(tableToClassName[tableKey]) >= 3 and tableToClassName[tableKey][2] != '':
      fspecPy.write('  pdfspec.addClass(\'' + emitedDitionaryName + '\', \'' + tableToClassName[tableKey][2] + '\', \'' + comment + '\')\\\n')
      classTree[emitedDitionaryName] = [tableToClassName[tableKey][2], {}]
    else:
      fspecPy.write('  pdfspec.addClass(\'' + emitedDitionaryName + '\', \'Dictionary\', \'' + comment + '\')\\\n')
      classTree[emitedDitionaryName] = ['Dictionary', {}]

  if len(tableToClassName[tableKey]) >= 4 and columnValues[0] in tableToClassName[tableKey][3]:
    required = True

  if required:
    fspecPy.write('      .required(\'NULL\')\\\n')
  else:
    fspecPy.write('      .optional()\\\n')
    
  fspecPy.write('          .field(\'' + columnValues[0] + '\')\\\n')
  fspecPy.write('          .name(\'' + columnValues[0] + '\')\\\n')
  fspecPy.write('          .type(\'' + columnValues[1] + '\')\\\n')
  fspecPy.write('          .comment(\'' + columnValues[2] + '\')\\\n')
  
  classTree[emitedDitionaryName][1][columnValues[0]] =   '          .field(\'' + columnValues[0] + '\')\\\n' + \
  '          .name(\'' + columnValues[0] + '\')\\\n' + \
  '          .type(\'' + columnValues[1] + '\')\\\n' + \
  '          .comment(\'\')\\\n'
    

  if len(tableToClassName[tableKey]) >= 4 and columnValues[0] in tableToClassName[tableKey][3]:
    fspecPy.write('          .must(' + tableToClassName[tableKey][3][columnValues[0]] + ')\\\n')

  fspecPy.write('          .done().done()\\\n')
  
  
  columnValues = None
  
def newRow(first, second, third):
  global columnValues 
  columnValues = [first.rstrip(), second.rstrip(), third.rstrip()]

def appendRow(second, third):
  global columnValues
  if second.rstrip() != '':
    columnValues[1] = columnValues[1] + ' ' + second.rstrip()
  if third.rstrip() != '':
    columnValues[2] = columnValues[2] + '\n' + third.rstrip()

def rebaseTable(fspecPy, line):
  global knownTypes
  global columnWidth
  
  line2 = line.replace(',', ' , ')
  
  words = line2.split()
  
  if len(words) < 3:
    return False

  i = 1
  while i < len(words) - 1 and words[i] in knownTypes:
    i = i + 1
    
  if words[i].startswith('(Optional') or words[i].startswith('(Required'):
    commitRow(fspecPy)
    
    columnWidth[0] = line.find(words[1])
    
    if words[i].startswith('(Optional'):
      columnWidth[1] = line.find('(Optional') - columnWidth[0] 
    if words[i].startswith('(Required'):
      columnWidth[1] = line.find('(Required') - columnWidth[0] 
    return True
    
  return False
    
    
def stopTable(fspecPy):
  global tableHeaderFound
  global emitedDitionaryName
  global tableKey
  global classTree
  
  if not inTable():
    return
  
  commitRow(fspecPy)
  
  #print tableKey
  
  # TODO(edisonn): iterate on all requited key in the def, and if not on the definition, get definition from parent and export them
  if len(tableToClassName[tableKey]) >= 4:
    for field in tableToClassName[tableKey][3]:
      #print field
      if not field in classTree[emitedDitionaryName][1]:
        fieldDef = ''
        searchKey = classTree[emitedDitionaryName][0]
        while searchKey != 'Dictionary' and (not field in classTree[searchKey][1]):
          searchKey = classTree[searchKey][0]
        
        if searchKey != 'Dictionary' and (field in classTree[searchKey][1]):
          #print tableToClassName[tableKey][3][field]
          #print classTree[searchKey][1][field]
          # TODO(edisonns): hack - for required fields, they need to be downgraded to only a type
          classTree[searchKey][1][field] = classTree[searchKey][1][field].replace(' or array', '')
          classTree[searchKey][1][field] = classTree[searchKey][1][field].replace(' or distionary', '')
          fspecPy.write('      .required(\'NULL\')\\\n')
          fspecPy.write(classTree[searchKey][1][field])
          fspecPy.write('          .must(' + tableToClassName[tableKey][3][field] + ')\\\n')
          fspecPy.write('          .done().done()\\\n')
        else:
          print 'ERROR' + tableKey + '.' + field;
  
  tableHeaderFound = False
  emitedDitionaryName = ''
  fspecPy.write('      .done()\n')
  fspecPy.write('\n')
    

def killTable():
  return

def processLineCore(fspecPy, line):
  global lines
  global tableLine
  global tableRow
  global columnWidth
  global columnValues
  global mustFollowTableHeader
  
  #global fnewspec
  
  lines = lines + 1
  
  line = unicode(line, 'utf8')
  
  striped = line.rstrip()
  
  words = line.split()
  if len(words) == 0:
    stopTable(fspecPy)
    return False
        
  isTableHeader = re.search('^[\s]*(TABLE [0-9].[0-9][0-9]?)', striped)
  if isTableHeader:
    stopTable(fspecPy)
    tableDescriptionFound(striped)
    mustFollowTableHeader = True
    return False
  
  if mustFollowTableHeader:
    mustFollowTableHeader = False
    if len(words) != 3:
      killTable()
      return False
 
    # TODO(edisonn): support for generic table!
    if words[0] != 'KEY' or words[1] != 'TYPE' or words[2] != 'VALUE':
      killTable()
      return False

    tableHasHeader()
    columnWidth = [0, 0, 0]
    columnWidth[0] = striped.index('TYPE')
    columnWidth[1] = striped.index('VALUE') - striped.index('TYPE')
    columnWidth[2] = 0
    return True
      
  if inTable():
    tableLine = tableLine + 1
    first = striped[0 : columnWidth[0]]
    second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]]
    third = striped[columnWidth[0] + columnWidth[1] :]

    if tableLine == 1:
      if third[0] != '(':
        killTable()
        return False

      newRow(first, second, third)
      return True
    
    if rebaseTable(fspecPy, striped):
      first = striped[0 : columnWidth[0]]
      second = striped[columnWidth[0] : columnWidth[0] + columnWidth[1]]
      third = striped[columnWidth[0] + columnWidth[1] :]
    
    first = first.rstrip()
    second = second.rstrip()
    third = third.rstrip()
        
    if first == '' and second == '' and third != '':
      appendRow(second, third)
      return True
      
    if len(first.split()) > 1:
      stopTable(fspecPy)
      return False

    if first != '' and first[0] == ' ':
      stopTable(fspecPy)
      return False

    if first != '' and second != '' and third == '':
      stopTable(fspecPy)
      return False

    if first == '' and second != '' and second[0] != ' ':
      if acceptType(second):
        appendRow(second, third)
        return True
      else:
        stopTable(fspecPy)
        return False

    if first != '' and second != '' and third[0] != '(':
      stopTable(fspecPy)
      return False
      
    if first == '' and second != '' and second[0] == ' ':
      stopTable(fspecPy)
      return False

    if first != '' and second != '' and third[0] == '(':
      commitRow(fspecPy)
      newRow(first, second, third)
      return True
    
    return False
  return False
  
def processLine(fspecPy, line):
  #global fnewspec
  
  inSpec = processLineCore(fspecPy, line)
  
  #just return, use the next lines if you wish to rewrite spec
  return
  
  if inSpec:
    #resize colum with types
    line = line[:columnWidth[0] + columnWidth[1]] + (' ' * (60 - columnWidth[1])) + line[columnWidth[0] + columnWidth[1]:]
    line = line[:columnWidth[0]] + (' ' * (40 - columnWidth[0])) + line[columnWidth[0]:]
  
  #fnewspec.write(line)
  

def generateDef():
  global lines
  #global fnewspec
  
  #fnewspec = open('PdfReference-okular-2.txt', 'w')
  
  # pdf spec in text format
  fspecText = open(sys.argv[1], 'r')
  
  # pdf spec in python directives 
  fspecPy = open(sys.argv[2], 'w')
  
  fspecPy.write('import datatypes\n')
  fspecPy.write('\n')

  fspecPy.write('def buildPdfSpec(pdfspec):\n')
  
  for line in fspecText:
    processLine(fspecPy, line)
   
  # close last table if it was not closed already 
  stopTable(fspecPy)
  
  fspecPy.write('\n')

  fspecPy.write('def addDictionaryTypesTo(knowTypes):\n')  
  for e in tableToClassName:
    #TODO(edisonn): build this map
    
    fspecPy.write('  knowTypes[\'' + tableToClassName[e][0] + '\'] = [\'SkPdf' + tableToClassName[e][0] + '*\', \'(SkPdf' + tableToClassName[e][0] + '*)ret\', datatypes.CppNull(), \'ret->isDictionary() && ((SkPdf' + tableToClassName[e][0] + '*)ret)->valid()\', \'A_DICTIONARY\']\n')
  fspecPy.write('\n')
  
  #print lines
  #fnewspec.close()

if '__main__' == __name__:
  sys.exit(generateDef())