summaryrefslogtreecommitdiff
path: root/lib/richpp.ml
blob: 745b7d2a22991af8882cb955d33f05eb973d80c1 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
(************************************************************************)
(*  v      *   The Coq Proof Assistant  /  The Coq Development Team     *)
(* <O___,, *   INRIA - CNRS - LIX - LRI - PPS - Copyright 1999-2015     *)
(*   \VV/  **************************************************************)
(*    //   *      This file is distributed under the terms of the       *)
(*         *       GNU Lesser General Public License Version 2.1        *)
(************************************************************************)

open Xml_datatype

type 'annotation located = {
  annotation : 'annotation option;
  startpos   : int;
  endpos     : int
}

let rich_pp annotate ppcmds =
  (** First, we use Format to introduce tags inside
      the pretty-printed document.

      Each inserted tag is a fresh index that we keep in sync with the contents
      of annotations.
  *)
  let annotations = ref [] in
  let index = ref (-1) in
  let pp_tag obj =
    let () = incr index in
    let () = annotations := obj :: !annotations in
    string_of_int !index
  in

  let tagged_pp = Format.(

    (** Warning: The following instructions are valid only if
        [str_formatter] is not used for another purpose in
        Pp.pp_with. *)

    let ft = str_formatter in

    (** We reuse {!Format} standard way of producing tags
        inside pretty-printing. *)
    pp_set_tags ft true;

    (** The whole output must be a valid document. To that
        end, we nest the document inside a tag named <pp>. *)
    pp_open_tag ft "pp";

    (** XML ignores spaces. The problem is that our pretty-printings
        are based on spaces to indent. To solve that problem, we
        systematically output non-breakable spaces, which are properly
        honored by XML.

        To do so, we reconfigure the [str_formatter] temporarily by
        hijacking the function that output spaces. *)
    let out, flush, newline, std_spaces =
      pp_get_all_formatter_output_functions ft ()
    in
    let set = pp_set_all_formatter_output_functions ft ~out ~flush ~newline in
    set ~spaces:(fun k ->
      for i = 0 to k - 1 do
        Buffer.add_string stdbuf "&nbsp;"
      done
    );

    (** Some characters must be escaped in XML. This is done by the
        following rewriting of the strings held by pretty-printing
        commands. *)
    Pp.(pp_with ~pp_tag ft (rewrite Xml_printer.pcdata_to_string ppcmds));

    (** Insert </pp>. *)
    pp_close_tag ft ();

    (** Get the final string. *)
    let output = flush_str_formatter () in

    (** Finalize by restoring the state of the [str_formatter] and the
        default behavior of Format. By the way, there may be a bug here:
        there is no {!Format.pp_get_tags} and therefore if the tags flags
        was already set to true before executing this piece of code, the
        state of Format is not restored. *)
    set ~spaces:std_spaces;
    pp_set_tags ft false;
    output
  )
  in
  (** Second, we retrieve the final function that relates
      each tag to an annotation. *)
  let objs = CArray.rev_of_list !annotations in
  let get index = annotate objs.(index) in

  (** Third, we parse the resulting string. It is a valid XML
      document (in the sense of Xml_parser). As blanks are
      meaningful we deactivate canonicalization in the XML
      parser. *)
  let xml_pp =
    try
      Xml_parser.(parse ~do_not_canonicalize:true (make (SString tagged_pp)))
    with Xml_parser.Error e ->
      Printf.eprintf
        "Broken invariant (RichPp): \n\
         The output semi-structured pretty-printing is ill-formed.\n\
         Please report.\n\
         %s"
        (Xml_parser.error e);
      exit 1
  in

  (** Fourth, the low-level XML is turned into a high-level
      semi-structured document that contains a located annotation in
      every node. During the traversal of the low-level XML document,
      we build a raw string representation of the pretty-print. *)
  let rec node buffer = function
    | Element (index, [], cs) ->
      let startpos, endpos, cs = children buffer cs in
      let annotation = try get (int_of_string index) with _ -> None in
      (Element (index, { annotation; startpos; endpos }, cs), endpos)

    | PCData s ->
      Buffer.add_string buffer s;
      (PCData s, Buffer.length buffer)

    | _ ->
      assert false (* Because of the form of XML produced by Format. *)

  and children buffer cs =
    let startpos   = Buffer.length buffer in
    let cs, endpos =
      List.fold_left (fun (cs, endpos) c ->
        let c, endpos = node buffer c in
        (c :: cs, endpos)
      ) ([], startpos) cs
    in
    (startpos, endpos, List.rev cs)
  in
  let pp_buffer = Buffer.create 13 in
  let xml, _ = node pp_buffer xml_pp in

  (** We return the raw pretty-printing and its annotations tree. *)
  (Buffer.contents pp_buffer, xml)

let annotations_positions xml =
  let rec node accu = function
    | Element (_, { annotation = Some annotation; startpos; endpos }, cs) ->
      children ((annotation, (startpos, endpos)) :: accu) cs
    | Element (_, _, cs) ->
      children accu cs
    | _ ->
      accu
  and children accu cs =
    List.fold_left node accu cs
  in
  node [] xml

let xml_of_rich_pp tag_of_annotation attributes_of_annotation xml =
  let rec node = function
    | Element (index, { annotation; startpos; endpos }, cs) ->
      let attributes =
        [ "startpos", string_of_int startpos;
          "endpos", string_of_int endpos
        ]
        @ (match annotation with
          | None -> []
          | Some annotation -> attributes_of_annotation annotation
        )
      in
      let tag =
        match annotation with
          | None -> index
          | Some annotation -> tag_of_annotation annotation
      in
      Element (tag, attributes, List.map node cs)
    | PCData s ->
      PCData s
  in
  node xml