aboutsummaryrefslogtreecommitdiffhomepage
path: root/generic/pg-xml.el
blob: 9bd2559f28ed2695e283f13700b7b0bd171a3fc8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
;; pg-xml.el	 XML functions for Proof General
;;
;; Copyright (C) 2000-2001 LFCS Edinburgh. 
;;
;; Author: David Aspinall <da@dcs.ed.ac.uk>
;;
;; $Id$
;;
;; XML functions for Proof General
;;
;; Proof General Kit uses PGIP, an XML-message protocol
;; for interactive proof.  The simple functions here allow
;; parsing and writing of XML documents.  Little attempt
;; is made for efficiency, since PGIP documents are intended
;; to be small.  No attempt at validation or handling
;; unicode characters is made.
;;

;; TODO:
;; * Fix identifiers
;; * Fix whitespace handling
;; * Ignore prologues


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Parsing function: pg-xml-parse-buffer
;;

(defconst pg-xml-name  "[a-zA-Z_:][a-zA-Z0-9._:-]*")
(defconst pg-xml-space "[ \t\n
]")
(defconst pg-xml-ref   (concat "&" pg-xml-name ";"))  ; FIXME: charrefs
			
(defconst pg-xml-start-open-elt-regexp 
  (concat pg-xml-space "*" "<\\(" pg-xml-name "\\)"))
(defconst pg-xml-end-open-elt-regexp 
  (concat pg-xml-space "*" "\\(/?\\)>"))
(defconst pg-xml-close-elt-regexp 
  (concat pg-xml-space "*" "</\\(" pg-xml-name "\\)>"))
(defconst pg-xml-attribute-regexp 
  (concat pg-xml-space "+" "\\(" pg-xml-name "\\)"))
(defconst pg-xml-attribute-val-regexp 
  (concat pg-xml-space "*" "=" pg-xml-space "*" 
	  "\"\\(\\([^<&\"]\\|" pg-xml-ref "\\)*\\)\"")) ; FIXME or 's
(defconst pg-xml-comment-start-regexp "<!--")
(defconst pg-xml-comment-end-regexp "-->")
(defconst pg-xml-anymarkup-regexp "<")
(defconst pg-xml-special-elts
  '(xml)
  "List of special elements which don't require closing.")

(defvar xmlparse nil
  "Used to store parse result.")

(defun pg-xml-add-text (text)		
  "If TEXT is non empty, add it to subtree at top of `xmlparse'."
  (unless (string-equal text "")
    (setq xmlparse (cons (cons text (car xmlparse))
			 (cdr xmlparse)))))
	   

(defun pg-xml-parse-buffer (&optional buffer)
  "Parse an XML documment in BUFFER (defaulting to current buffer).
Return a lisp structure with symbols representing the element 
names, so that the result of parsing 
   <elt attr=\"blah\">text</elt> 
is 
  (elt ((attr . \"blah\")) (text))"
  (save-excursion
    (if buffer (set-buffer buffer))
    (goto-char (point-min))
    (let ((xmlparse nil)
	  (pos	    (point))
	  openelts elt)
      (unless (looking-at pg-xml-start-open-elt-regexp)
	(warn "pg-xml-parse-buffer: Junk at start of document: %s"
	      (buffer-substring 
	       (point-min)
	       (min (save-excursion
		      (re-search-forward pg-xml-anymarkup-regexp nil t)
		      (match-beginning 0))
		    (+ 10 (point-min))))))
      (while (re-search-forward pg-xml-anymarkup-regexp nil t)
	(goto-char (match-beginning 0))
	(cond
	 ;; CASE 1: element opening
	 ((looking-at pg-xml-start-open-elt-regexp)
	  (setq elt (intern (match-string 1)))
	  ;; Add text before here to the parse, if non-empty
	  ;; FIXME: maybe unless last elt was opening too and 
	  ;; only white space?
	  (pg-xml-add-text (buffer-substring pos (match-beginning 0)))
	  ;; Now look for any attributes
	  (goto-char (match-end 0))
	  (let ((attrs nil) attr)
	    (while (looking-at pg-xml-attribute-regexp)
	      (setq attr (intern (match-string 1)))
	      (goto-char (match-end 0))
	      (if (looking-at pg-xml-attribute-val-regexp)
		  (progn
		    (setq attr (cons attr (match-string 1)))
		    (goto-char (match-end 0))))
	      (setq attrs (cons attr attrs)))
	    ;; Retain order in document
	    (setq attrs (reverse attrs))
	    ;; Now we ought to be at the end of the element opening
	    (unless (looking-at pg-xml-end-open-elt-regexp)
	      (error "pg-xml-parse-buffer: Invalid XML in element opening %s" 
		     (symbol-name elt)))
	    ;; Stack the element unless it's self closing
	    (if (> (length (match-string 1)) 0)
		;; Add element without nesting
		(setq xmlparse (cons (cons (cons elt attrs)
					   (car xmlparse))
				     (cdr xmlparse)))
	      ;; Otherwise stack and nest
	      (setq openelts (cons elt openelts))
	      (setq xmlparse (cons (list (cons elt attrs)) 
				   xmlparse))))
	    (goto-char (match-end 0))
	    (setq pos (point)))

	 ;; CASE 2: element closing
	 ((looking-at pg-xml-close-elt-regexp)
	  (setq elt (intern (match-string 1)))
	  ;; It better be the top thing on the stack
	  (unless (eq elt (car-safe openelts))
	    (error "pg-xml-parse-buffer: Invalid XML at element closing </%s> (expected </%s>)"
		   (symbol-name elt)
		   (if openelts
		       (symbol-name (car openelts))
		     "no closing element")))
	  ;; Add text before here to the parse
	  (pg-xml-add-text (buffer-substring pos (match-beginning 0)))
	  ;; Unstack the element and close subtree
	  (setq openelts (cdr openelts))
	  (setq xmlparse (cons (cons
				(reverse (car xmlparse))
				(cadr xmlparse))
			       (cddr xmlparse)))
	  (goto-char (match-end 0))
	  (setq pos (point)))
	 
	 ;; CASE 3: comment
	 ((looking-at pg-xml-comment-start-regexp)
	  (unless (re-search-forward pg-xml-comment-end-regexp nil t)
	    (error "pg-xml-parse-buffer: Unclosed comment beginning at line %s"
		   (1+ (count-lines (point-min) (point))))))))
	
      ;; We'd better have nothing on the stack of open elements now.
      (unless (null openelts)
	(error "pg-xml-parse-buffer: Unexpected end of document, expected </%s>"
	       (symbol-name (car openelts))))
      ;; And we'd better be at the end of the document.
      (unless (and (looking-at "[ \t\n]*")
		   (eq (match-end 0) (point-max)))
	(warn "pg-xml-parse-buffer: Junk at end of document: %s"
	      (buffer-substring (point)
				(min (point-max) (+ 30 (point-max))))))
      ;; Return the parse
      ;; FIXME: 
      (caar xmlparse))))

(defun pg-xml-parse-string (arg)
  "Parse string in ARG, same as pg-xml-parse-buffer."
  (let
      ((tempbuffer (get-buffer-create " *xml-parse*")))
    (save-excursion 
      (set-buffer tempbuffer)
      (delete-region (point-min) (point-max))
      (insert-string arg)
      (pg-xml-parse-buffer))))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;; Producing functions: state-based writing of an XML doc,
;;			built up in pg-xml-doc


(defvar pg-xml-doc nil
  "Current document being written")

(defvar pg-xml-openelts nil
  "Stack of openelements")

(defvar pg-xml-indentp nil
  "Whether to indent written XML documents")

(defun pg-xml-begin-write ()
  (setq pg-xml-doc nil
	pg-xml-openelts nil))

(defun pg-xml-indent ()
  (if pg-xml-indentp
      (substring "\n                              " 
		 1 (length pg-xml-openelts))
    ""))

(defun pg-xml-openelt (element &optional attributes)
  (setq pg-xml-openelts (cons element pg-xml-openelts))
  (let (string)
    (setq string (concat (pg-xml-indent)
			 "<" (symbol-name element)))
    (while attributes
      (if (consp (car attributes))
	  (setq string (concat 
			string
			" " 
			(symbol-name (caar attributes))
			"="
			(cdar attributes)))
	(setq string (concat string
			     " " (symbol-name (car attributes)))))
      (setq attributes (cdr attributes)))
    (setq pg-xml-doc
	  (cons (concat string ">") pg-xml-doc))))

(defun pg-xml-closeelt ()
  (unless pg-xml-openelts
    (error "pg-xml-closelt: no open elements"))
  (setq pg-xml-doc 
	(cons
	 (concat
	  (pg-xml-indent)
	  "</" (symbol-name (car pg-xml-openelts)) ">")
	 pg-xml-doc))
  (setq pg-xml-openelts (cdr pg-xml-openelts)))


(defun pg-xml-writetext (text)
  (setq pg-xml-doc (cons (concat (pg-xml-indent) text) pg-xml-doc)))

(defun pg-xml-doc ()
  (apply 'concat (reverse pg-xml-doc)))

;; Test document:
;;(progn
;;  (pg-xml-begin-write)
;;  (pg-xml-openelt 'root)
;;  (pg-xml-openelt 'a '((class . "1B")))
;;  (pg-xml-writetext "text a")
;;  (pg-xml-closeelt)
;;  (pg-xml-closeelt)
;;  (pg-xml-doc))

(provide 'pg-xml)
;; End of `pg-xml.el'