TEMPORARY: add unicode-tokens2 mechanism to test across different machines

author: David Aspinall <da@inf.ed.ac.uk> 2008-07-05 15:04:13 +0000
committer: David Aspinall <da@inf.ed.ac.uk> 2008-07-05 15:04:13 +0000
commit: 619955d4b9949dd99a483fcf9daf8a4429301b82 (patch)
tree: d6efdc7da8fb42591c677bb1e483f62aa221fa5b
parent: 170973824f698d28c9891058dde929e1ce4bac11 (diff)
7 files changed, 1632 insertions, 0 deletions
diff --git a/generic/pg-custom.el b/generic/pg-custom.el
index 238b76b4..64d8a97a 100644
--- a/generic/pg-custom.el
+++ b/generic/pg-custom.el
@@ -56,6 +56,12 @@ look for files named x-symbol-<PA>.el.")
   :set 'proof-set-value
   :group 'proof-user-options)
 
+(defpgcustom unicode-tokens2-enable nil
+  "*Non-nil for using Unicode token input mode in Proof General."
+  :type 'boolean
+  :set 'proof-set-value
+  :group 'proof-user-options)
+
 (defpgcustom mmm-enable nil
   "*Whether to use MMM Mode in Proof General for this assistant.
 MMM Mode allows multiple modes to be used in the same buffer.
diff --git a/generic/proof-auxmodes.el b/generic/proof-auxmodes.el
index a42ca070..809630bc 100644
--- a/generic/proof-auxmodes.el
+++ b/generic/proof-auxmodes.el
@@ -76,6 +76,22 @@
 	     (proof-unicode-tokens-support-available))
 	(proof-unicode-tokens-set-global t)))
 
+;;
+;; Unicode tokens 2 (temporary!)
+;; 
+(defun proof-unicode-tokens2-support-available ()
+  "A test to see whether unicode tokens2 support is available."
+  (and
+   (or (featurep 'unicode-tokens2)
+       (proof-try-require 'unicode-tokens2))
+   ;; Requires prover-specific config in <foo>-unicode-tokens2.el
+   (proof-try-require (proof-ass-sym unicode-tokens2))))
+
+(proof-eval-when-ready-for-assistant
+    (if (and (proof-ass unicode-tokens2-enable)
+	     (proof-unicode-tokens2-support-available))
+	(proof-unicode-tokens2-set-global t)))
+
 
 (provide 'proof-auxmodes)
 
diff --git a/generic/proof-menu.el b/generic/proof-menu.el
index af7950d1..968098e0 100644
--- a/generic/proof-menu.el
+++ b/generic/proof-menu.el
@@ -279,6 +279,8 @@ without adjusting window layout."
   (proof-deftoggle-fn 
    (proof-ass-sym unicode-tokens-enable) 'proof-unicode-tokens-toggle)
   (proof-deftoggle-fn 
+   (proof-ass-sym unicode-tokens2-enable) 'proof-unicode-tokens2-toggle)
+  (proof-deftoggle-fn 
    (proof-ass-sym maths-menu-enable) 'proof-maths-menu-toggle)
   (proof-deftoggle-fn (proof-ass-sym mmm-enable) 'proof-mmm-toggle))
 
@@ -332,6 +334,16 @@ without adjusting window layout."
       :selected (and (boundp 'unicode-tokens-mode) 
 		     unicode-tokens-mode)]
 
+     ["Unicode Tokens 2" 
+      (progn
+	(unless unicode-tokens2-mode (proof-x-symbol-toggle 0))
+	(proof-unicode-tokens2-toggle (if unicode-tokens2-mode 0 1)))
+      :active (and (not (and (boundp 'x-symbol-mode) x-symbol-mode))
+		   (proof-unicode-tokens2-support-available))
+      :style toggle
+      :selected (and (boundp 'unicode-tokens2-mode) 
+		     unicode-tokens2-mode)]
+
      ["Unicode Maths Menu" (proof-maths-menu-toggle (if maths-menu-mode 0 1))
       :active (proof-maths-menu-support-available)
       :style toggle
diff --git a/generic/proof-unicode-tokens2.el b/generic/proof-unicode-tokens2.el
new file mode 100644
index 00000000..377d7d8f
--- /dev/null
+++ b/generic/proof-unicode-tokens2.el
@@ -0,0 +1,142 @@
+;; proof-unicode-tokens2.el   Support Unicode tokens package
+;;
+;; Copyright (C) 2008 David Aspinall / LFCS Edinburgh
+;; Author:    David Aspinall <David.Aspinall@ed.ac.uk>
+;; License:   GPL (GNU GENERAL PUBLIC LICENSE)
+;;
+;; $Id$
+;;
+
+(eval-when-compile
+  (require 'proof-utils) ; for proof-ass, proof-eval-when-ready-for-assistant
+  (require 'cl))
+
+(eval-when (compile)
+  (require 'unicode-tokens2)) ; it's loaded dynamically at runtime
+
+(defvar proof-unicode-tokens2-initialised nil
+  "Flag indicating whether or not we've performed startup.")
+
+(defun proof-unicode-tokens2-init ()
+  "Initialise settings for unicode tokens from prover specific variables."
+  (mapcar
+   (lambda (var) ;; or defass?
+     (if (boundp (proof-ass-symv var))
+	 (set (intern (concat "unicode-tokens2-" (symbol-name var)))
+	      (eval `(proof-ass ,var)))))
+   '(charref-format
+     token-format
+     control-token-format
+     token-name-alist
+     glyph-list
+     token-match
+     control-token-match
+     hexcode-match
+     next-character-regexp
+     token-prefix
+     token-suffix
+     shortcut-alist))
+  (unicode-tokens2-initialise)
+  (setq proof-unicode-tokens2-initialised t))
+  
+;;;###autoload
+(defun proof-unicode-tokens2-enable ()
+  "Turn on or off Unicode tokens mode in Proof General script buffer.
+This invokes `unicode-tokens2-mode' to toggle the setting for the current
+buffer, and then sets PG's option for default to match.
+Also we arrange to have unicode tokens mode turn itself on automatically 
+in future if we have just activated it for this buffer."
+  (interactive)
+  (when (proof-unicode-tokens2-support-available) ;; loads unicode-tokens2
+    (unless proof-unicode-tokens2-initialised
+      (proof-unicode-tokens2-init))
+    (proof-unicode-tokens2-set-global (not unicode-tokens2-mode))))
+
+
+;;;###autoload
+(defun proof-unicode-tokens2-set-global (flag)
+  "Set global status of unicode tokens mode for PG buffers to be FLAG.
+Turn on/off menu in all script buffers and ensure new buffers follow suit."
+  (unless proof-unicode-tokens2-initialised 
+      (proof-unicode-tokens2-init))
+  (let ((hook (proof-ass-sym mode-hook)))
+    (if flag
+	(add-hook hook 'unicode-tokens2-mode)
+      (remove-hook hook 'unicode-tokens2-mode))
+    (proof-map-buffers 
+      (proof-buffers-in-mode proof-mode-for-script)
+      (unicode-tokens2-mode (if flag 1 0)))
+    (proof-unicode-tokens2-shell-config)))
+
+  
+;;;
+;;; Interface to custom to dynamically change tables (via proof-set-value)
+;;;
+
+(defun proof-token-name-alist ()
+  "Function called after the current token name alist has been changed.
+Switch off tokens in all buffers, recalculate maps, turn on again."
+  (when proof-unicode-tokens2-initialised ; not on startup
+    (when (proof-ass unicode-tokens2-enable)
+      (proof-map-buffers 
+       (proof-buffers-in-mode proof-mode-for-script)
+       (unicode-tokens2-mode 0)))
+    (setq unicode-tokens2-token-name-alist (proof-ass token-name-alist))
+    (unicode-tokens2-initialise)
+    (when (proof-ass unicode-tokens2-enable)
+      (proof-map-buffers 
+       (proof-buffers-in-mode proof-mode-for-script)
+       (unicode-tokens2-mode 1)))))
+  
+(defun proof-shortcut-alist ()
+  "Function called after the shortcut alist has been changed.
+Updates the input mapping for reading shortcuts."
+  (when proof-unicode-tokens2-initialised ; not on startup
+    (setq unicode-tokens2-shortcut-alist (proof-ass shortcut-alist))
+    (unicode-tokens2-initialise)))
+
+;;;
+;;; Interface to shell
+;;;
+
+
+(defun proof-unicode-tokens2-activate-prover ()
+  (when (and proof-xsym-activate-command 
+	     (proof-shell-live-buffer)
+	     (proof-shell-available-p))
+    (proof-shell-invisible-command-invisible-result
+     proof-xsym-activate-command)))
+
+(defun proof-unicode-tokens2-deactivate-prover ()
+  (when (and proof-xsym-deactivate-command 
+	     ;; NB: clash with X-symbols since use same commands in prover!
+	     (not (proof-ass x-symbol-enable))
+	     (proof-shell-live-buffer)
+	     (proof-shell-available-p))
+    (proof-shell-invisible-command-invisible-result
+     proof-xsym-deactivate-command)))
+
+;;; NB: we shouldn't bother load this if it's not enabled.
+;;;###autoload
+(defun proof-unicode-tokens2-shell-config ()
+  (when (proof-ass unicode-tokens2-enable)
+    (add-hook 'proof-shell-insert-hook
+	      'proof-unicode-tokens2-encode-shell-input)
+    (proof-unicode-tokens2-activate-prover))
+  (unless (proof-ass unicode-tokens2-enable)
+    (remove-hook 'proof-shell-insert-hook
+		 'proof-unicode-tokens2-encode-shell-input)
+    (proof-unicode-tokens2-deactivate-prover)))
+
+(defun proof-unicode-tokens2-encode-shell-input ()
+  "Encode shell input in the variable STRING.
+A value for proof-shell-insert-hook."
+  (if (proof-ass unicode-tokens2-enable)
+      (with-temp-buffer ;; TODO: better to do directly in *prover*
+	(insert string)
+	(unicode-tokens2-unicode-to-tokens)
+	(setq string (buffer-substring-no-properties
+		      (point-min) (point-max))))))
+
+(provide 'proof-unicode-tokens2)
+;; End of proof-unicode-tokens2.el
diff --git a/isar/isar-unicode-tokens2.el b/isar/isar-unicode-tokens2.el
new file mode 100644
index 00000000..e252ebdc
--- /dev/null
+++ b/isar/isar-unicode-tokens2.el
@@ -0,0 +1,595 @@
+;;; -*- coding: utf-8; -*-
+;; isar-unicode-tokens2.el --- Tokens for Unicode Tokens package
+;;
+;; Copyright(C) 2008 David Aspinall / LFCS Edinburgh
+;; Author:    David Aspinall <David.Aspinall@ed.ac.uk>
+;;
+;; This file is loaded by `proof-unicode-tokens2.el'.
+;;
+;; It sets the variables defined at the top of unicode-tokens2.el,
+;; unicode-tokens2-<foo> is set from isar-<foo>.  See the corresponding
+;; variable for documentation.
+;;
+
+(defconst isar-token-format "\\<%s>")
+(defconst isar-charref-format "\\<#x%x>")
+(defconst isar-token-prefix "\\<")
+(defconst isar-token-suffix ">")
+(defconst isar-token-match "\\\\<\\([a-zA-Z0-9]+\\)")
+(defconst isar-control-token-match "\\\\<^\\([a-zA-Z0-9]+\\)>")
+(defconst isar-control-token-format "\\<^%s>")
+(defconst isar-hexcode-match "\\\\<\\(#[xX][0-9A-Fa-f]+\\)")
+(defconst isar-next-character-regexp "\\\\<#[xX][0-9A-Fa-f]+>\\|.")
+
+(defcustom isar-token-name-alist
+  (flet 
+      ((script (s) (format "\\<^bscript>%s\\<^escript>" s))
+       (frakt (s) (format "\\<^bfrak>%s\\<^efrak>" s))
+       (serif (s) (format "\\<^bserif>%s\\<^eserif>" s))
+       (bold (s) (format "\\<^bbold>%s\\<^ebold>" s)))
+
+    ;; property-based annotations.  More direct for input
+    ;; but inverse mapping tricky: need to ignore for 
+    ;; reverse and fold \<^bscript>A\<^escript> -> \<AA> etc.
+    ;; Extra dimension in table required.
+    ;; (Also: not supported in XEmacs?)
+      ;((script (s) (unicode-tokens2-annotate-string "script" s))
+      ; (frakt    (s) (unicode-tokens2-annotate-string "frak" s))
+      ; (serif     (s) (unicode-tokens2-annotate-string "serif" s)))
+  `(; token name, unicode char sequence
+    ;; Based on isabellesym.sty,v 1.45 2006/01/05
+
+    ;; Bold numerals
+    ("one" . ,(bold "1"))
+    ("two" . ,(bold "2"))
+    ("three" . ,(bold "3"))
+    ("four" . ,(bold "4"))
+    ("five" . ,(bold "5"))
+    ("six" . ,(bold "6"))
+    ("seven" . ,(bold "7"))
+    ("eight" . ,(bold "8"))
+    ("nine" . ,(bold "9"))
+    ;; Mathcal 
+    ("A" . ,(script "A"))
+    ("B" . ,(script "B"))
+    ("C" . ,(script "C"))
+    ("D" . ,(script "D"))
+    ("E" . ,(script "E"))
+    ("F" . ,(script "F"))
+    ("G" . ,(script "G"))
+    ("H" . ,(script "H"))
+    ("I" . ,(script "I"))
+    ("J" . ,(script "J"))
+    ("K" . ,(script "K"))
+    ("L" . ,(script "L"))
+    ("M" . ,(script "M"))
+    ("N" . ,(script "N"))
+    ("O" . ,(script "O"))
+    ("P" . ,(script "P"))
+    ("Q" . ,(script "Q"))
+    ("R" . ,(script "R"))
+    ("S" . ,(script "S"))
+    ("T" . ,(script "T"))
+    ("U" . ,(script "U"))
+    ("V" . ,(script "V"))
+    ("W" . ,(script "W"))
+    ("X" . ,(script "X"))
+    ("Y" . ,(script "Y"))
+    ("Z" . ,(script "Z"))
+    ;; Math roman
+    ("a" . ,(serif "a"))
+    ("b" . ,(serif "b"))
+    ("c" . ,(serif "c"))
+    ("d" . ,(serif "d"))
+    ("e" . ,(serif "e"))
+    ("f" . ,(serif "f"))
+    ("g" . ,(serif "g"))
+    ("h" . ,(serif "h"))
+    ("i" . ,(serif "i"))
+    ("j" . ,(serif "j"))
+    ("k" . ,(serif "k"))
+    ("l" . ,(serif "l"))
+    ("m" . ,(serif "m"))
+    ("n" . ,(serif "n"))
+    ("o" . ,(serif "o"))
+    ("p" . ,(serif "p"))
+    ("q" . ,(serif "q"))
+    ("r" . ,(serif "r"))
+    ("s" . ,(serif "s"))
+    ("t" . ,(serif "t"))
+    ("u" . ,(serif "u"))
+    ("v" . ,(serif "v"))
+    ("w" . ,(serif "w"))
+    ("x" . ,(serif "x"))
+    ("y" . ,(serif "y"))
+    ("z" . ,(serif "z"))
+    ;; Fraktur
+    ("AA" . ,(frakt "A"))
+    ("BB" . ,(frakt "B"))
+    ("CC" . ,(frakt "C"))
+    ("DD" . ,(frakt "D"))
+    ("EE" . ,(frakt "E"))
+    ("FF" . ,(frakt "F"))
+    ("GG" . ,(frakt "G"))
+    ("HH" . ,(frakt "H"))
+    ("II" . ,(frakt "I"))
+    ("JJ" . ,(frakt "J"))
+    ("KK" . ,(frakt "K"))
+    ("LL" . ,(frakt "L"))
+    ("MM" . ,(frakt "M"))
+    ("NN" . ,(frakt "N"))
+    ("OO" . ,(frakt "O"))
+    ("PP" . ,(frakt "P"))
+    ("QQ" . ,(frakt "Q"))
+    ("RR" . ,(frakt "R"))
+    ("SS" . ,(frakt "S"))
+    ("TT" . ,(frakt "T"))
+    ("UU" . ,(frakt "U"))
+    ("VV" . ,(frakt "V"))
+    ("WW" . ,(frakt "W"))
+    ("XX" . ,(frakt "X"))
+    ("YY" . ,(frakt "Y"))
+    ("ZZ" . ,(frakt "Z"))
+    ("aa" . ,(frakt "a"))
+    ("bb" . ,(frakt "b"))
+    ("cc" . ,(frakt "c"))
+    ("dd" . ,(frakt "d"))
+    ("ee" . ,(frakt "e"))
+    ("ff" . ,(frakt "f"))
+    ("gg" . ,(frakt "g"))
+    ("hh" . ,(frakt "h"))
+    ("ii" . ,(frakt "i"))
+    ("jj" . ,(frakt "j"))
+    ("kk" . ,(frakt "k"))
+    ("ll" . ,(frakt "l"))
+    ("mm" . ,(frakt "m"))
+    ("nn" . ,(frakt "n"))
+    ("oo" . ,(frakt "o"))
+    ("pp" . ,(frakt "p"))
+    ("qq" . ,(frakt "q"))
+    ("rr" . ,(frakt "r"))
+    ("ss" . ,(frakt "s"))
+    ("tt" . ,(frakt "t"))
+    ("uu" . ,(frakt "u"))
+    ("vv" . ,(frakt "v"))
+    ("ww" . ,(frakt "w"))
+    ("xx" . ,(frakt "x"))
+    ("yy" . ,(frakt "y"))
+    ("zz" . ,(frakt "z"))
+    ("alpha" . "α")
+    ("beta" . "β")
+    ("gamma" . "γ")
+    ("delta" . "δ")
+    ("epsilon" . "ε") ; actually varepsilon (some is epsilon)
+    ("zeta" . "ζ")
+    ("eta" . "η")
+    ("theta" . "θ")
+    ("iota" . "ι")
+    ("kappa" . "κ")
+    ("lambda" . "λ")
+    ("mu" . "μ")
+    ("nu" . "ν")
+    ("xi" . "ξ")
+    ("pi" . "π")
+    ("rho" . "ρ")
+    ("sigma" . "σ")
+    ("tau" . "τ")
+    ("upsilon" . "υ")
+    ("phi" . "ϕ")
+    ("chi" . "χ")
+    ("psi" . "ψ")
+    ("omega" . "ω")
+    ("Gamma" . "Γ")
+    ("Delta" . "Δ")
+    ("Theta" . "Θ")
+    ("Lambda" . "Λ")
+    ("Xi" . "Ξ")
+    ("Pi" . "Π")
+    ("Sigma" . "Σ")
+    ("Upsilon" . "Υ")
+    ("Phi" . "Φ")
+    ("Psi" . "Ψ")
+    ("Omega" . "Ω")
+;;  ("bool" . "")
+    ("complex" . "ℂ")
+    ("nat" . "ℕ")
+    ("rat" . "ℚ")
+    ("real" . "ℝ")
+    ("int" . "ℤ")
+    ;; Arrows
+    ("leftarrow" . "←")
+    ("rightarrow" . "→")
+    ("Leftarrow" . "⇐")
+    ("Rightarrow" . "⇒")
+    ("leftrightarrow" . "↔")
+    ("Leftrightarrow" . "⇔")
+    ("mapsto" . "↦")
+    ;; Real long symbols, may work in some places
+    ("longleftarrow" . "⟵")
+    ("Longleftarrow" . "⟸")
+    ("longrightarrow" . "⟶")
+    ("Longrightarrow" . "⟹")
+    ("longleftrightarrow" . "⟷")
+    ("Longleftrightarrow" . "⟺")
+    ("longmapsto" . "⟼")
+    ;; Faked long symbols, for use otherwise:
+;;;     ("longleftarrow" . "←–")
+;;;     ("Longleftarrow" . "⇐–")
+;;;     ("longrightarrow" . "–→")
+;;;     ("Longrightarrow" . "–⇒")
+;;;     ("longleftrightarrow" . "←→")
+;;;     ("Longleftrightarrow" . "⇐⇒")
+;;;     ("longmapsto" . "❘→")
+    ("midarrow" . "–") ; #x002013 en dash
+    ("Midarrow" . "‗") ; #x002017 double low line (not mid)
+    ("hookleftarrow" . "↩")
+    ("hookrightarrow" . "↪")
+    ("leftharpoondown" . "↽")
+    ("rightharpoondown" . "⇁")
+    ("leftharpoonup" . "↼")
+    ("rightharpoonup" . "⇀")
+    ("rightleftharpoons" . "⇌")
+    ("leadsto" . "↝")
+    ("downharpoonleft" . "⇃")
+    ("downharpoonright" . "⇂")
+    ("upharpoonleft" . "↿")
+    ;; ("upharpoonright" . "↾") overlaps restriction
+    ("restriction" . "↾") ;; 
+    ("Colon" . "∷")
+    ("up" . "↑")
+    ("Up" . "⇑")
+    ("down" . "↓")
+    ("Down" . "⇓")
+    ("updown" . "↕")
+    ("Updown" . "⇕")
+    ("langle" . "⟪")
+    ("rangle" . "⟫")
+    ("lceil" . "⌈")
+    ("rceil" . "⌉")
+    ("lfloor" . "⌊")
+    ("rfloor" . "⌋")
+    ("lparr" . "⦇")
+    ("rparr" . "⦈")
+    ("lbrakk" . "⟦")
+    ("rbrakk" . "⟧")
+;;     ("lbrace" . "") TODO
+;;    ("rbrace" . "")
+    ("guillemotleft" . "«")
+    ("guillemotright" . "»")
+    ("bottom" . "⊥")
+    ("top" . "⊤")
+    ("and" . "∧")
+    ("And" . "⋀")
+    ("or" . "∨")
+    ("Or" . "⋁")
+    ("forall" . "∀")
+    ("exists" . "∃")
+    ("nexists" . "∄")
+    ("not" . "¬")
+    ("box" . "□")
+    ("diamond" . "◇")
+    ("turnstile" . "⊢")
+    ("Turnstile" . "⊨")
+    ("tturnstile" . "⊩")
+    ("TTurnstile" . "⊫")
+    ("stileturn" . "⊣")
+    ("surd" . "√")
+    ("le" . "≤")
+    ("ge" . "≥")
+    ("lless" . "≪")
+    ("ggreater" . "≫")
+    ("lesssim" . "⪍")
+    ("greatersim" . "⪎")
+    ("lessapprox" . "⪅")
+    ("greaterapprox" . "⪆")
+    ("in" . "∈")
+    ("notin" . "∉")
+    ("subset" . "⊂")
+    ("supset" . "⊃")
+    ("subseteq" . "⊆")
+    ("supseteq" . "⊇")    
+    ("sqsubset" . "⊏")
+    ("sqsupset" . "⊐")
+    ("sqsubseteq" . "⊑")
+    ("sqsupseteq" . "⊒")
+    ("inter" . "∩")
+    ("Inter" . "⋂")
+    ("union" . "∪")
+    ("Union" . "⋃")
+    ("squnion" . "⊔")
+    ("Squnion" . "⨆")
+    ("sqinter" . "⊓")
+    ("Sqinter" . "⨅")
+    ("setminus" . "∖")
+    ("propto" . "∝")
+    ("uplus" . "⊎")
+    ("Uplus" . "⨄")
+    ("noteq" . "≠")
+    ("sim" . "∼")
+    ("doteq" . "≐")
+    ("simeq" . "≃")
+    ("approx" . "≈")
+    ("asymp" . "≍")
+    ("cong" . "≅")
+    ("smile" . "⌣")
+    ("equiv" . "≡")
+    ("frown" . "⌢")
+    ("Join" . "⨝")
+    ("bowtie" . "⋈")
+    ("prec" . "≺")
+    ("succ" . "≻")
+    ("preceq" . "≼")
+    ("succeq" . "≽")
+    ("parallel" . "∥")
+    ("bar" . "¦")
+    ("plusminus" . "±")
+    ("minusplus" . "∓")
+    ("times" . "×")
+    ("div" . "÷")
+    ("cdot" . "⋅")
+    ("star" . "⋆")
+    ("bullet" . "∙")
+    ("circ" . "∘")
+    ("dagger" . "†")
+    ("ddagger" . "‡")
+    ("lhd" . "⊲")
+    ("rhd" . "⊳")
+    ("unlhd" . "⊴")
+    ("unrhd" . "⊵")
+    ("triangleleft" . "◁")
+    ("triangleright" . "▷")
+    ("triangle" . "▵") ; or △
+    ("triangleq" . "≜")
+    ("oplus" . "⊕")
+    ("Oplus" . "⨁")
+    ("otimes" . "⊗")
+    ("Otimes" . "⨂")
+    ("odot" . "⊙")
+    ("Odot" . "⨀")
+    ("ominus" . "⊖")
+    ("oslash" . "⊘") 
+    ("dots" . "…")
+    ("cdots" . "⋯")
+    ("Sum" . "∑")
+    ("Prod" . "∏")
+    ("Coprod" . "∐")
+    ("infinity" . "∞")
+    ("integral" . "∫")
+    ("ointegral" . "∮")
+    ("clubsuit" . "♣")
+    ("diamondsuit" . "♢")
+    ("heartsuit" . "♡")
+    ("spadesuit" . "♠")
+    ("aleph" . "ℵ")
+    ("emptyset" . "∅")
+    ("nabla" . "∇")
+    ("partial" . "∂")
+    ("Re" . "ℜ")
+    ("Im" . "ℑ")
+    ("flat" . "♭")
+    ("natural" . "♮")
+    ("sharp" . "♯")
+    ("angle" . "∠")
+    ("copyright" . "©")
+    ("registered" . "®")
+    ("hyphen" . "‐")
+    ("inverse" . "¯¹") ; X-Symb: just "¯"
+    ("onesuperior" . "¹")
+    ("twosuperior" . "²")
+    ("threesuperior" . "³")
+    ("onequarter" . "¼")
+    ("onehalf" . "½")
+    ("threequarters" . "¾")
+    ("ordmasculine" . "º")
+    ("ordfeminine" . "ª")
+    ("section" . "§")
+    ("paragraph" . "¶")
+    ("exclamdown" . "¡")
+    ("questiondown" . "¿")
+    ("euro" . "€")
+    ("pounds" . "£")
+    ("yen" . "¥")
+    ("cent" . "¢")
+    ("currency" . "¤")
+    ("degree" . "°")
+    ("amalg" . "⨿")
+    ("mho" . "℧")
+    ("lozenge" . "◊")
+    ("wp" . "℘")
+    ("wrong" . "≀") ;; #x002307
+;;    ("struct" . "") ;; TODO
+    ("acute" . "´")
+    ("index" . "ı")
+    ("dieresis" . "¨")
+    ("cedilla" . "¸")
+    ("hungarumlaut" . "ʺ")
+    ("spacespace" . "⁣⁣") ;; two #x002063
+;    ("module" . "") ??
+    ("some" . "ϵ")
+    
+    ;; Not in Standard Isabelle Symbols
+    ;; (some are in X-Symbols)
+
+    ("stareq" . "≛")
+    ("defeq" . "≝")
+    ("questioneq" . "≟")
+    ("vartheta" . "ϑ")
+    ; ("o" . "ø")
+    ("varpi" . "ϖ")
+    ("varrho" . "ϱ")
+    ("varsigma" . "ς")
+    ("varphi" . "φ")
+    ("hbar" . "ℏ")
+    ("ell" . "ℓ")
+    ("ast" . "∗")
+
+    ("bigcirc" . "◯")
+    ("bigtriangleup" . "△")
+    ("bigtriangledown" . "▽")
+    ("ni" . "∋")
+    ("mid" . "∣")
+    ("notlt" . "≮")
+    ("notle" . "≰")
+    ("notprec" . "⊀")
+    ("notpreceq" . "⋠")
+    ("notsubset" . "⊄")
+    ("notsubseteq" . "⊈")
+    ("notsqsubseteq" . "⋢")
+    ("notgt" . "≯")
+    ("notge" . "≱")
+    ("notsucc" . "⊁")
+    ("notsucceq" . "⋡")
+    ("notsupset" . "⊅")
+    ("notsupseteq" . "⊉")
+    ("notsqsupseteq" . "⋣")
+    ("notequiv" . "≢")
+    ("notsim" . "≁")
+    ("notsimeq" . "≄")
+    ("notapprox" . "≉")
+    ("notcong" . "≇")
+    ("notasymp" . "≭")
+    ("nearrow" . "↗")
+    ("searrow" . "↘")
+    ("swarrow" . "↙")
+    ("nwarrow" . "↖")
+    ("vdots" . "⋮")
+    ("ddots" . "⋱")
+    ("closequote" . "’")
+    ("openquote" . "‘")
+    ("opendblquote" . "”")
+    ("closedblquote" . "“")
+    ("emdash" . "—")
+    ("prime" . "′")
+    ("doubleprime" . "″")
+    ("tripleprime" . "‴")
+    ("quadrupleprime" . "⁗")
+    ("nbspace" . " ")
+    ("thinspace" . " ")
+    ("notni" . "∌")
+    ("colonequals" . "≔")
+    ("foursuperior" . "⁴")
+    ("fivesuperior" . "⁵")
+    ("sixsuperior" . "⁶")
+    ("sevensuperior" . "⁷")
+    ("eightsuperior" . "⁸")
+    ("ninesuperior" . "⁹")))
+  "Table mapping Isabelle symbol token names to Unicode strings.
+
+You can adjust this table to add more entries, or to change entries for
+glyphs that not are available in your Emacs or chosen font.
+
+The token TNAME is made into the token \\< TNAME >.
+The string mapping can be anything, but should be such that
+tokens can be uniquely recovered from a decoded text; otherwise
+results will be undefined when files are saved."
+  :type '(repeat (cons (string :tag "Token name")
+		       (string :tag "Unicode string")))
+  :set 'proof-set-value
+  :group 'isabelle
+  :tag "Isabelle Unicode Token Mapping")
+
+
+(defcustom isar-shortcut-alist
+  '(; short cut, unicode string
+;    ("<>" . "⋄")
+;    ("|>" . "⊳")
+    ("\\/" . "∨")
+    ("/\\" . "∧")
+    ("+O" . "⊕")
+    ("-O" . "⊖")
+    ("xO" . "⊗")
+    ("/O" . "⊘")
+    (".O" . "⊙")
+    ("|+" . "†")
+    ("|++" . "‡")
+    ("<=" . "≤")
+    ("|-" . "⊢")
+    (">=" . "≥")
+    ("-|" . "⊣")
+    ("||" . "∥")
+    ("==" . "≡")
+    ("~=" . "≃")
+    ("~~~" . "≍")
+    ("~~" . "≈")
+    ("~==" . "≅")
+    ("|<>|" . "⋈")
+    ("|=" . "⊨")
+    ("=." . "≐")
+    ("_|_" . "⊥")
+    ("</" . "≮")
+    (">=/" . "≱")
+    ("=/" . "≠")
+    ("==/" . "≢")
+    ("~/" . "≁")
+    ("~=/" . "≄")
+    ("~~/" . "≉")
+    ("~==/" . "≇")
+    ("<-" . "←")
+    ("<=" . "⇐")
+    ("->" . "→")
+    ("=>" . "⇒")
+    ("<->" . "↔")
+    ("<=>" . "⇔")
+;    ("|->" . "↦") 
+    ("<--" . "⟵")
+    ("<==" . "⟸")
+    ("-->" . "⟶")
+    ("==>" . "⟹")
+    ("<==>" . "⟷")
+    ("|-->" . "⟼")
+    ("<-->" . "⟷")
+    ("<<" . "⟪")
+    ("[|" . "⟦")
+    (">>" . "⟫")
+    ("|]" . "⟧")
+    ("``" . "”")
+;    ("''" . "“")
+;    ("--" . "–")
+    ("---" . "—")
+;    ("''" . "″")
+;    ("'''" . "‴")
+;    ("''''" . "⁗")
+;    (":=" . "≔")
+    ;; some word shortcuts, started with backslash otherwise
+    ;; too annoying.
+    ("\nat" . "ℕ")
+    ("\int" . "ℤ")
+    ("\rat" . "ℚ")
+    ("\real" . "ℝ")
+    ("\complex" . "ℂ")
+    ("\euro" . "€")
+    ("\yen" . "¥")
+    ("\cent" . "¢"))
+  "Shortcut key sequence table for Unicode strings.
+
+You can adjust this table to add more entries, or to change entries for
+glyphs that not are available in your Emacs or chosen font.
+
+These shortcuts are only used for input; no reverse conversion is
+performed.  But if tokens exist for the target of shortcuts, they
+will be used on saving the buffer."
+  :type '(repeat (cons (string :tag "Shortcut sequence")
+		       (string :tag "Unicode string")))
+  :set 'proof-set-value
+  :group 'isabelle
+  :tag "Isabelle Unicode Token Mapping")
+
+  
+
+
+;;
+;; prover symbol support 
+;;
+
+(eval-after-load "isar" 
+  '(setq 
+    proof-xsym-activate-command
+    (isar-markup-ml "change print_mode (insert (op =) \"xsymbols\")")
+    proof-xsym-deactivate-command
+    (isar-markup-ml "change print_mode (remove (op =) \"xsymbols\")")))
+
+
+
+(provide 'isar-unicode-tokens2)
+
+;;; isar-unicode-tokens2.el ends here
diff --git a/lib/format-hack.el b/lib/format-hack.el
new file mode 100644
index 00000000..27526b3c
--- /dev/null
+++ b/lib/format-hack.el
@@ -0,0 +1,195 @@
+
+    (rear-nonsticky	(t	"idsubscript1")
+			(t	"subscript1")
+			(t	"superscript1"))))
+
+(defun format-deannotate-region (from to translations next-fn)
+  "Translate annotations in the region into text properties.
+This sets text properties between FROM to TO as directed by the
+TRANSLATIONS and NEXT-FN arguments.
+
+NEXT-FN is a function that searches forward from point for an annotation.
+It should return a list of 4 elements: \(BEGIN END NAME POSITIVE).  BEGIN and
+END are buffer positions bounding the annotation, NAME is the name searched
+for in TRANSLATIONS, and POSITIVE should be non-nil if this annotation marks
+the beginning of a region with some property, or nil if it ends the region.
+NEXT-FN should return nil if there are no annotations after point.
+
+The basic format of the TRANSLATIONS argument is described in the
+documentation for the `format-annotate-region' function.  There are some
+additional things to keep in mind for decoding, though:
+
+When an annotation is found, the TRANSLATIONS list is searched for a
+text-property name and value that corresponds to that annotation.  If the
+text-property has several annotations associated with it, it will be used only
+if the other annotations are also in effect at that point.  The first match
+found whose annotations are all present is used.
+
+The text property thus determined is set to the value over the region between
+the opening and closing annotations.  However, if the text-property name has a
+non-nil `format-list-valued' property, then the value will be consed onto the
+surrounding value of the property, rather than replacing that value.
+
+There are some special symbols that can be used in the \"property\" slot of
+the TRANSLATIONS list: PARAMETER and FUNCTION \(spelled in uppercase).
+Annotations listed under the pseudo-property PARAMETER are considered to be
+arguments of the immediately surrounding annotation; the text between the
+opening and closing parameter annotations is deleted from the buffer but saved
+as a string.
+
+The surrounding annotation should be listed under the pseudo-property
+FUNCTION.  Instead of inserting a text-property for this annotation,
+the function listed in the VALUE slot is called to make whatever
+changes are appropriate.  It can also return a list of the form
+\(START LOC PROP VALUE) which specifies a property to put on.  The
+function's first two arguments are the START and END locations, and
+the rest of the arguments are any PARAMETERs found in that region.
+
+Any annotations that are found by NEXT-FN but not defined by TRANSLATIONS
+are saved as values of the `unknown' text-property \(which is list-valued).
+The TRANSLATIONS list should usually contain an entry of the form
+    \(unknown \(nil format-annotate-value))
+to write these unknown annotations back into the file."
+  (save-excursion
+    (save-restriction
+      (narrow-to-region (point-min) to)
+      (goto-char from)
+      (let (next open-ans todo loc unknown-ans)
+	(while (setq next (funcall next-fn))
+	  (let* ((loc      (nth 0 next))
+		 (end      (nth 1 next))
+		 (name     (nth 2 next))
+		 (positive (nth 3 next))
+		 (found    nil))
+
+	    ;; Delete the annotation
+	    (delete-region loc end)
+	    (cond
+	     ;; Positive annotations are stacked, remembering location
+	     (positive (push `(,name ((,loc . nil))) open-ans))
+	     ;; It is a negative annotation:
+	     ;; Close the top annotation & add its text property.
+	     ;; If the file's nesting is messed up, the close might not match
+	     ;; the top thing on the open-annotations stack.
+	     ;; If no matching annotation is open, just ignore the close.
+	     ((not (assoc name open-ans))
+	      (message "Extra closing annotation (%s) in file" name))
+	     ;; If one is open, but not on the top of the stack, close
+	     ;; the things in between as well.  Set `found' when the real
+	     ;; one is closed.
+	     (t
+	      (while (not found)
+		(let* ((top (car open-ans))	; first on stack: should match.
+		       (top-name (car top))	; text property name
+		       (top-extents (nth 1 top)) ; property regions
+		       (params (cdr (cdr top)))	; parameters
+		       (aalist translations)
+		       (matched nil))
+		  (if (equal name top-name)
+		      (setq found t)
+		    (message "Improper nesting in file."))
+		  ;; Look through property names in TRANSLATIONS
+		  (while aalist
+		    (let ((prop (car (car aalist)))
+			  (alist (cdr (car aalist))))
+		      ;; And look through values for each property
+		      (while alist
+			(let ((value (car (car alist)))
+			      (ans (cdr (car alist))))
+			  (if (member top-name ans)
+			      ;; This annotation is listed, but still have to
+			      ;; check if multiple annotations are satisfied
+			      (if (member nil (mapcar (lambda (r)
+							(assoc r open-ans))
+						      ans))
+				  nil	; multiple ans not satisfied
+				;; If there are multiple annotations going
+				;; into one text property, split up the other
+				;; annotations so they apply individually to
+				;; the other regions.
+				(setcdr (car top-extents) loc)
+				(let ((to-split ans) this-one extents)
+				  (while to-split
+				    (setq this-one
+					  (assoc (car to-split) open-ans)
+					  extents (nth 1 this-one))
+				    (if (not (eq this-one top))
+					(setcar (cdr this-one)
+						(format-subtract-regions
+						 extents top-extents)))
+				    (setq to-split (cdr to-split))))
+				;; Set loop variables to nil so loop
+				;; will exit.
+				(setq alist nil aalist nil matched t
+				      ;; pop annotation off stack.
+				      open-ans (cdr open-ans))
+				(let ((extents top-extents)
+				      (start (car (car top-extents)))
+				      (loc (cdr (car top-extents))))
+				  (while extents
+				    (cond
+				     ;; Check for pseudo-properties
+				     ((eq prop 'PARAMETER)
+				      ;; A parameter of the top open ann:
+				      ;; delete text and use as arg.
+				      (if open-ans
+					  ;; (If nothing open, discard).
+					  (setq open-ans
+						(cons
+						 (append (car open-ans)
+							 (list
+							  (buffer-substring
+							   start loc)))
+						 (cdr open-ans))))
+				      (delete-region start loc))
+				     ((eq prop 'FUNCTION)
+				      ;; Not a property, but a function.
+				      (let ((rtn
+					     (apply value start loc params)))
+					(if rtn (push rtn todo))))
+				     (t
+				      ;; Normal property/value pair
+				      (setq todo
+					    (cons (list start loc prop value)
+						  todo))))
+				    (setq extents (cdr extents)
+					  start (car (car extents))
+					  loc (cdr (car extents))))))))
+			(setq alist (cdr alist))))
+		    (setq aalist (cdr aalist)))
+		  (if (not matched)
+		      ;; Didn't find any match for the annotation:
+		      ;; Store as value of text-property `unknown'.
+		      (let ((extents top-extents)
+			    (start (car (car top-extents)))
+			    (loc (or (cdr (car top-extents)) loc)))
+			(while extents
+			  (setq open-ans (cdr open-ans)
+				todo (cons (list start loc 'unknown top-name)
+					   todo)
+				unknown-ans (cons name unknown-ans)
+				extents (cdr extents)
+				start (car (car extents))
+				loc (cdr (car extents))))))))))))
+
+	;; Once entire file has been scanned, add the properties.
+	(while todo
+	  (let* ((item (car todo))
+		 (from (nth 0 item))
+		 (to   (nth 1 item))
+		 (prop (nth 2 item))
+		 (val  (nth 3 item)))
+
+	    (if (numberp val)	; add to ambient value if numeric
+		(format-property-increment-region from to prop val 0)
+	      (put-text-property
+	       from to prop
+	       (cond ((get prop 'format-list-valued) ; value gets consed onto
+						     ; list-valued properties
+		      (let ((prev (get-text-property from prop)))
+			(cons val (if (listp prev) prev (list prev)))))
+		     (t val))))) ; normally, just set to val.
+	  (setq todo (cdr todo)))
+
+	(if unknown-ans
+	    (message "Unknown annotations: %s" unknown-ans))))))
diff --git a/lib/unicode-tokens2.el b/lib/unicode-tokens2.el
new file mode 100644
index 00000000..fe5b8306
--- /dev/null
+++ b/lib/unicode-tokens2.el
@@ -0,0 +1,666 @@
+;;; unicode-tokens2.el --- Support for editing tokens for Unicode characters
+;;
+;; Copyright(C) 2008 David Aspinall / LFCS Edinburgh
+;; Author:    David Aspinall <David.Aspinall@ed.ac.uk>
+;; License:     GPL (GNU GENERAL PUBLIC LICENSE)
+;;
+;; $Id$
+;;
+;; A few functions are adapted from `xmlunicode.el' by Norman Walsh.
+;; Created: 2004-07-21, Version: 1.6, Copyright (C) 2003 Norman Walsh
+;;
+;; This is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; This software is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;;; Commentary:
+;;
+;; This is a partial replacement for X-Symbol for Proof General.
+;; STATUS: experimental.  
+;;
+;; Functions to help insert tokens that represent Unicode characters
+;; and control code sequences for changing the layout.  Character
+;; tokens are useful for programs that do not understand a Unicode
+;; encoding.
+;; 
+
+;; TODO:
+;; -- add input methods for subscript/superscripts (further props in general)
+;; -- after change function so inserting control sequences works? or other support
+;; -- one-char subs should not be sticky so doesn't extend
+;; -- make property removal more accurate/patch in font-lock
+;; -- disentangle Isabelle specific code
+;; -- perhaps separate out short-cut input method and don't use for tokens
+;; -- cleanup insertion functions
+;; -- investigate testing for an appropriate glyph
+
+(require 'cl)
+
+(require 'unicode-chars)		; list of Unicode characters
+
+;;
+;; Variables that should be set
+;; (default settings are for XML, but configuration incomplete; 
+;;  use xmlunicode.el instead)
+;;
+
+(defvar unicode-tokens2-charref-format "&#x%x;"
+  "The format for numeric character references")
+
+(defvar unicode-tokens2-token-format "&%x;"
+  "The format for token character references")
+
+(defvar unicode-tokens2-token-name-alist nil
+  "Mapping of token names to Unicode strings.")
+
+(defvar unicode-tokens2-glyph-list nil
+  "List of available glyphs, as characters.
+If not set, constructed to include glyphs for all tokens. ")
+
+(defvar unicode-tokens2-token-prefix "&"
+  "Prefix for start of tokens to insert.")
+
+(defvar unicode-tokens2-token-suffix ";"
+  "Suffix for end of tokens to insert.")
+
+(defvar unicode-tokens2-control-token-match nil
+  "Regexp matching tokens")
+
+(defvar unicode-tokens2-token-match "&\\([A-Za-z]\\);"
+  "Regexp matching tokens")
+
+(defvar unicode-tokens2-hexcode-match "&#[xX]\\([0-9a-fA-F]+\\);"
+  "Regexp matching numeric token string")
+
+(defvar unicode-tokens2-next-character-regexp "&#[xX]\\([0-9a-fA-F]+\\);\\|."
+  "Regexp matching a logical character following a control code.")
+
+(defvar unicode-tokens2-shortcut-alist
+  "An alist of keyboard shortcuts to unicode strings.
+The alist is added to the input mode for tokens.
+Behaviour is much like abbrev.")
+
+;;
+;; Faces
+;;
+
+;;
+;; TODO: make these into faces but extract attributes
+;; to use in `unicode-tokens2-annotation-translations'.
+;; Let that be dynamically changeable
+;; TODO: choose family acccording to likely architecture and what's available
+(cond
+ ((not (featurep 'xemacs))
+(defface unicode-tokens2-script-font-face
+  (cond
+   ((eq window-system 'x) ; Linux/Unix
+    '((t :family "URW Chancery L")))
+   ((or ; Mac
+     (eq window-system 'ns)
+     (eq window-system 'carbon))
+    '((t :family "Lucida Calligraphy"))))
+  "Script font face")
+
+(defface unicode-tokens2-fraktur-font-face
+  (cond
+   ((eq window-system 'x) ; Linux/Unix
+    '((t :family "URW Bookman L"))) ;; not at all black letter!
+   ((or ; Mac
+     (eq window-system 'ns)
+     (eq window-system 'carbon))
+    '((t :family "Lucida Blackletter"))))
+  "Fraktur font face")
+
+(defface unicode-tokens2-serif-font-face
+  (cond
+   ((eq window-system 'x) ; Linux/Unix
+    '((t :family "Liberation Serif"))) 
+   ((or ; Mac
+     (eq window-system 'ns)
+     (eq window-system 'carbon))
+    '((t :family "Lucida"))))
+  "Serif (roman) font face")))
+
+
+;;
+;; Variables initialised in unicode-tokens2-initialise 
+;;
+
+(defvar unicode-tokens2-max-token-length 10
+  "Maximum length of a token in underlying encoding.")
+
+(defvar unicode-tokens2-codept-charname-alist nil
+  "Alist mapping unicode code point to character names.")
+
+(defvar unicode-tokens2-token-alist nil
+  "Mapping of tokens to Unicode strings.
+Also used as a flag to detect if `unicode-tokens2-initialise' has been called.")
+
+(defvar unicode-tokens2-ustring-alist nil
+  "Mapping of Unicode strings to tokens.")
+
+
+;;
+;;; Code:
+;;
+
+(defun unicode-tokens2-insert-char (arg codepoint)
+  "Insert the Unicode character identified by CODEPOINT.
+If ARG is non-nil, ignore available glyphs."
+  (let ((glyph (memq codepoint unicode-tokens2-glyph-list)))
+    (cond
+     ((and (decode-char 'ucs codepoint) (or arg glyph))
+      (ucs-insert codepoint)) ;; glyph converted to token on save
+     (t
+      (insert (format unicode-tokens2-charref-format codepoint))))))
+
+(defun unicode-tokens2-insert-string (arg ustring)
+  "Insert a Unicode string.
+If a prefix is given, the string will be inserted regardless
+of whether or not it has displayable glyphs; otherwise, a
+numeric character reference for whichever codepoints are not
+in the unicode-tokens2-glyph-list."
+  (mapcar (lambda (char) 
+	    (unicode-tokens2-insert-char arg char))
+	  ustring))
+
+(defun unicode-tokens2-character-insert (arg &optional argname)
+  "Insert a Unicode character by character name, with completion. 
+If a prefix is given, the character will be inserted regardless
+of whether or not it has a displayable glyph; otherwise, a
+numeric character reference is inserted if the codepoint is not
+in the `unicode-tokens2-glyph-list'. If argname is given, it is used for
+the prompt. If argname uniquely identifies a character, that
+character is inserted without the prompt."
+  (interactive "P")
+  (let* ((completion-ignore-case t)
+	 (uniname (if (stringp argname) argname ""))
+	 (charname
+	  (if (eq (try-completion uniname unicode-chars-alist) t)
+	      uniname
+	    (completing-read
+	     "Unicode name: "
+	     unicode-chars-alist
+	     nil t uniname)))
+	 codepoint glyph)
+    (setq codepoint (cdr (assoc charname unicode-chars-alist)))
+    (unicode-tokens2-insert-char arg codepoint)))
+
+(defun unicode-tokens2-token-insert (arg &optional argname)
+  "Insert a Unicode string by a token  name, with completion. 
+If a prefix is given, the string will be inserted regardless
+of whether or not it has displayable glyphs; otherwise, a
+numeric character reference for whichever codepoints are not
+in the unicode-tokens2-glyph-list. If argname is given, it is used for
+the prompt. If argname uniquely identifies a character, that
+character is inserted without the prompt."
+  (interactive "P")
+  (let* ((stokname (if (stringp argname) argname ""))
+	 (tokname
+	  (if (eq (try-completion stokname unicode-tokens2-token-name-alist) t)
+	      stokname
+	    (completing-read
+	     "Token name: "
+	     unicode-tokens2-token-name-alist
+	     nil t stokname)))
+	 charname ustring)
+    (setq ustring (cdr (assoc tokname unicode-tokens2-token-name-alist)))
+    (unicode-tokens2-insert-string arg ustring)))
+
+(defun unicode-tokens2-replace-token-after (length)
+  (let ((bpoint (point)) ustring)
+    (save-excursion
+      (forward-char length)
+      (save-match-data
+	(while (re-search-backward 
+		unicode-tokens2-token-match 
+		(max (- bpoint unicode-tokens2-max-token-length) 
+		     (point-min)) t nil)
+	  (setq ustring 
+		(assoc (match-string 1) unicode-tokens2-token-name-alist))
+	  (if ustring ;; TODO: should check on glyphs here
+	      (progn
+		(let ((matchlen (- (match-end 0) (match-beginning 0))))
+		  (replace-match (replace-quote (cdr ustring)))
+		  ;; was: (format "%c" (decode-char 'ucs (cadr codept)))
+		  (setq length 
+			(+ (- length matchlen) (length (cdr ustring)))))))))))
+    length)
+
+
+;;stolen from hen.el which in turn claims to have stolen it from cxref
+(defun unicode-tokens2-looking-backward-at (regexp)
+  "Return t if text before point matches regular expression REGEXP.
+This function modifies the match data that `match-beginning',
+`match-end' and `match-data' access; save and restore the match
+data if you want to preserve them."
+  (save-excursion
+    (let ((here (point)))
+      (if (re-search-backward regexp (point-min) t)
+          (if (re-search-forward regexp here t)
+              (= (point) here))))))
+
+;; TODO: make this work for control tokens.  
+;; But it's a bit nasty and introduces font-lock style complexity again.
+;; Better if we stick with dedicated input methods.
+(defun unicode-tokens2-electric-suffix ()
+  "Detect tokens and replace them with the appropriate string.
+This is bound to the character ending `unicode-tokens2-token-suffix'
+if there is such a unique character."
+  (interactive)
+  (let ((pos (point))
+	(case-fold-search nil)
+	amppos codept ustring)
+    (search-backward unicode-tokens2-token-prefix nil t nil)
+    (setq amppos (point))
+    (goto-char pos)
+    (cond
+     ((unicode-tokens2-looking-backward-at unicode-tokens2-token-match)
+      (progn
+	(re-search-backward unicode-tokens2-token-match nil t nil)
+	(if (= amppos (point))
+	    (progn
+	      (setq ustring 
+		     (assoc (match-string 1) 
+			    unicode-tokens2-token-name-alist))
+	      (if ustring  ;; todo: check glyphs avail/use insert fn
+		  (replace-match (replace-quote (cdr ustring)))
+		   ;; was (format "%c" (decode-char 'ucs (cdr codept))))
+		(progn
+		  (goto-char pos)
+		  (insert unicode-tokens2-token-suffix))))
+	  (progn
+	    (goto-char pos)
+	    (insert unicode-tokens2-token-suffix)))))
+     ((unicode-tokens2-looking-backward-at unicode-tokens2-hexcode-match)
+      (progn
+	(re-search-backward unicode-tokens2-hexcode-match nil t nil)
+	(if (= amppos (point))
+	    (progn
+	      (setq codept (string-to-number (match-string 1) 16))
+	      (if ;; todo : check glyph (memq codept unicode-tokens2-glyph-list)
+		  codept
+		  (replace-match (format "%c" (decode-char 'ucs (cdr codept))))
+		(progn
+		  (goto-char pos)
+		  (insert unicode-tokens2-token-suffix))))
+	  (progn
+	    (goto-char pos)
+	    (insert unicode-tokens2-token-suffix)))))
+     (t
+      (insert unicode-tokens2-token-suffix)))))
+
+(defvar unicode-tokens2-rotate-glyph-last-char nil)
+
+(defun unicode-tokens2-rotate-glyph-forward (&optional n)
+  "Rotate the character before point in the current code page, by N steps.
+If no character is found at the new codepoint, no change is made.
+This function may only work reliably for GNU Emacs >= 23."
+  (interactive "p")
+  (if (> (point) (point-min))
+      (let* ((codept  (or (if (or (eq last-command
+				      'unicode-tokens2-rotate-glyph-forward)
+				  (eq last-command
+				      'unicode-tokens2-rotate-glyph-backward))
+			      unicode-tokens2-rotate-glyph-last-char)
+			  (char-before (point))))
+	     (page    (/ codept 256))
+	     (pt      (mod codept 256))
+	     (newpt   (mod (+ pt (or n 1)) 256))
+	     (newcode (+ (* 256 page) newpt))
+	     (newname (assoc newcode 
+			     unicode-tokens2-codept-charname-alist))
+	     ;; NOTE: decode-char 'ucs here seems to fail on Emacs <23
+	     (newchar (decode-char 'ucs newcode)))
+	(when (and newname newchar)
+	  (delete-char -1)
+	  (insert-char newchar 1)
+	  (message (cdr newname))
+	  (setq unicode-tokens2-rotate-glyph-last-char nil))
+	(unless (and newname newchar)
+	  (message "No character at code %d" newcode)
+	  (setq unicode-tokens2-rotate-glyph-last-char newcode)))))
+
+(defun unicode-tokens2-rotate-glyph-backward (&optional n)
+  "Rotate the character before point in the current code page, by -N steps.
+If no character is found at the new codepoint, no change is made.
+This function may only work reliably for GNU Emacs >= 23."
+  (interactive "p")
+  (unicode-tokens2-rotate-glyph-forward (if n (- n) -1)))
+    
+
+
+(defconst unicode-tokens2-ignored-properties
+  '(vanilla type fontified face auto-composed
+    rear-nonsticky field inhibit-line-move-field-capture
+    utoks)
+  "Text properties to ignore when saving files.")
+
+(defconst unicode-tokens2-annotation-translations
+  `((font-lock-face
+     (bold		"bold")
+     (unicode-tokens2-script-font-face "script")
+     (unicode-tokens2-fraktur-font-face "frak")
+     (unicode-tokens2-serif-font-face "serif")
+     (proof-declaration-name-face "loc1")
+     (default	))
+    (display   
+     ((raise 0.4)    "superscript")
+     ((raise -0.4)   "subscript")
+     ((raise 0.35)  "superscript1")
+     ((raise -0.35) "subscript1")
+     ((raise -0.3)   "idsubscript1")
+     (default	)))
+  "Text property table for annotations.")
+
+
+(defun unicode-tokens2-font-lock-keywords ()
+  "Calculate value for `font-lock-keywords'."
+  (when (fboundp 'compose-region)
+    (let ((alist nil))
+      (dolist (x unicode-tokens2-token-name-alist)
+	(when (and (if (fboundp 'char-displayable-p)
+		       (reduce (lambda (x y) (and x (char-displayable-p y)))
+			       (cdr x)
+			       :initial-value t))
+		     t)
+		   (not (assoc (car x) alist)))	;Not yet in alist.
+	  (push (cons
+		 (format unicode-tokens2-token-format (car x))
+		 (cdr x))
+		alist)))
+      (when alist
+	(setq unicode-tokens2-token-alist alist)
+	`((,(regexp-opt (mapcar 'car alist) t)
+	   (0 (unicode-tokens2-compose-symbol)
+              ;; In Emacs-21, if the `override' field is nil, the face
+              ;; expressions is only evaluated if the text has currently
+              ;; no face.  So force evaluation by using `keep'.
+              keep))))))
+
+   
+
+  
+(defvar unicode-tokens2-next-control-token-seen-token nil
+  "Records currently open single-character control token.")
+
+(defun unicode-tokens2-next-control-token ()
+  "Find next control token and interpret it.
+If `unicode-tokens2-next-control-token' is non-nil, end current control sequence
+after next character (single character control sequence)."
+  (let (result)
+    (when unicode-tokens2-next-control-token-seen-token
+      (if (re-search-forward unicode-tokens2-next-character-regexp nil t)
+	  (setq result (list (match-end 0) (match-end 0)
+			     unicode-tokens2-next-control-token-seen-token 
+			     nil)))
+      (setq unicode-tokens2-next-control-token-seen-token nil))
+    (while (and (not result)
+		(re-search-forward unicode-tokens2-control-token-match nil t))
+      (let*
+	  ((tok  (match-string 1))
+	   (annot
+	    (cond
+	     ((equal tok "bsup")    '("superscript" t))
+	     ((equal tok "esup")    '("superscript" nil))
+	     ((equal tok "bsub")    '("subscript" t))
+	     ((equal tok "esub")    '("subscript" nil))
+	     ((equal tok "bbold")   '("bold" t))
+	     ((equal tok "ebold")   '("bold" nil))
+	     ((equal tok "bitalic") '("italic" t))
+	     ((equal tok "eitalic") '("italic" nil))
+	     ((equal tok "bscript") '("script" t))
+	     ((equal tok "escript") '("script" nil))
+	     ((equal tok "bfrak")   '("frak" t))
+	     ((equal tok "efrak")   '("frak" nil))
+	     ((equal tok "bserif")  '("serif" t))
+	     ((equal tok "eserif")  '("serif" nil))
+	     ((equal tok "loc") 
+	      (list (setq unicode-tokens2-next-control-token-seen-token
+			  "loc1") t))
+	     ((equal tok "sup") 
+	      (list (setq unicode-tokens2-next-control-token-seen-token
+			  "superscript1") t))
+	     ((equal tok "sub") 
+	      (list (setq unicode-tokens2-next-control-token-seen-token
+			  "subscript1") t))
+	     ((equal tok "isub") 
+	      (list (setq unicode-tokens2-next-control-token-seen-token
+			  "idsubscript1") t)))))
+	(if annot
+	    (setq result
+		  (append
+		   (list (match-beginning 0) (match-end 0))
+		   annot)))))
+    result))
+
+;; TODO: this should be instance specific  
+(defconst unicode-tokens2-annotation-control-token-alist 
+  '(("bold" .         ("bbold" . "ebold"))
+    ("subscript" .    ("bsub" . "esub"))
+    ("superscript" .  ("bsup" . "esup"))
+    ("subscript1" .   ("sub"))
+    ("superscript1" . ("sup"))
+    ("loc1"	    . ("loc"))
+    ;; non-standard
+    ("italic" .       ("bitalic" . "eitalic"))
+    ("script" .       ("bscript" . "escript"))
+    ("frak" .         ("bfrak" . "efrak"))
+    ("serif" .        ("bserif" . "eserif"))))
+  
+(defun unicode-tokens2-make-token-annotation (annot positive)
+  "Encode a text property start/end by adding an annotation in the file."
+  (let ((toks (cdr-safe
+	       (assoc annot unicode-tokens2-annotation-control-token-alist))))
+    (cond
+     ((and toks positive)
+      (format unicode-tokens2-control-token-format  (car toks)))
+     ((and toks (cdr toks))
+      (format unicode-tokens2-control-token-format  (cdr toks)))
+     (t ""))))
+
+(defun unicode-tokens2-find-property (name)
+  (let ((props unicode-tokens2-annotation-translations)
+	prop vals val vname)
+    (catch 'return
+      (while props
+	(setq prop (caar props))
+	(setq vals (cdar props))
+	(while vals
+	  (setq val (car vals))
+	  (if (member name (cdr val))
+	      (throw 'return (list prop (car val))))
+	  (setq vals (cdr vals)))
+	(setq props (cdr props))))))
+      
+(defun unicode-tokens2-annotate-region (beg end &optional annot)
+  (interactive "r")
+  (or annot 
+      (if (interactive-p)
+	  (setq annot
+		(completing-read "Annotate region as: " 
+				 unicode-tokens2-annotation-control-token-alist
+				 nil t))
+	(error "In unicode-tokens2-annotation-control-token-alist: TYPE must be given.")))
+  (add-text-properties beg end
+		       (unicode-tokens2-find-property annot)))
+
+(defun unicode-tokens2-annotate-string (annot string)
+  (add-text-properties 0 (length string)
+		       (unicode-tokens2-find-property annot)
+		       string)
+  string)
+
+(defun unicode-tokens2-unicode-to-tokens (&optional start end buffer)
+  "Encode a buffer to save as a tokenised file."
+  (let ((case-fold-search proof-case-fold-search)
+	(buffer-undo-list t)
+	(modified (buffer-modified-p)))
+    (save-restriction
+      (save-excursion
+	(narrow-to-region (or start (point-min)) (or end (point-max)))
+	(format-insert-annotations
+	 (format-annotate-region (point-min) (point-max) 
+				 unicode-tokens2-annotation-translations
+				 'unicode-tokens2-make-token-annotation
+				 unicode-tokens2-ignored-properties))
+;; alternative experiment: store original tokens inside text properties
+;;	(unicode-tokens2-replace-strings-unpropertise)
+	(format-replace-strings unicode-tokens2-ustring-alist 
+				nil (point-min) (point-max))
+	(set-buffer-modified-p modified)))))
+
+
+(defun unicode-tokens2-replace-strings-propertise (alist &optional beg end)
+  "Do multiple replacements on the buffer.
+ALIST is a list of (FROM . TO) pairs, which should be proper arguments to
+`search-forward' and `replace-match', respectively.
+The original contents FROM are retained in the buffer in the text property `utoks'.
+Optional args BEG and END specify a region of the buffer on which to operate."
+  (save-excursion
+    (save-restriction
+      (or beg (setq beg (point-min)))
+      (if end (narrow-to-region (point-min) end))
+      (while alist
+	(let ((from (car (car alist)))
+	      (to   (cdr (car alist)))
+	      (case-fold-search nil))
+	  (goto-char beg)
+	  (while (search-forward from nil t)
+	    (goto-char (match-beginning 0))
+	    (insert to)
+	    (set-text-properties (- (point) (length to)) (point)
+				 (cons 'utoks
+				       (cons from 
+					     (text-properties-at (point)))))
+	    (delete-region (point) (+ (point) (- (match-end 0)
+						 (match-beginning 0)))))
+	  (setq alist (cdr alist)))))))
+
+;; NB: this is OK, except that now if we edit with raw symbols, we
+;; don't get desired effect but instead rely on hidden annotations.
+;; Also doesn't work as easily with quail.
+;; Can we have a sensible mixture of both things?
+(defun unicode-tokens2-replace-strings-unpropertise (&optional beg end)
+  "Reverse the effect of `unicode-tokens2-replace-strings-unpropertise'.
+Replaces contiguous text with 'utoks' property with property value."
+  (let ((pos (or beg (point-min)))
+	(lim (or end (point-max)))
+	start to)
+    (save-excursion
+      (while (and 
+	      (setq pos (next-single-property-change pos 'utoks nil lim))
+	      (< pos lim))
+	(if start
+	    (progn
+	      (setq to (get-text-property start 'utoks))
+	      (goto-char start)
+	      (insert to)
+	      (set-text-properties start (point)
+				   (text-properties-at start))
+	      (delete-region (point) (+ (point) (- pos start)))
+	      (setq start nil))
+	  (setq start pos))))))
+	  
+
+
+  
+
+;; 
+;; Minor mode
+;;
+
+(defvar unicode-tokens2-mode-map (make-sparse-keymap)
+  "Key map used for Unicode Tokens mode.")
+
+(define-minor-mode unicode-tokens2-mode
+  "Minor mode for unicode token input." nil " Utoks"
+  unicode-tokens2-mode-map
+  (unless unicode-tokens2-token-alist
+    (unicode-tokens2-initialise))
+  (when unicode-tokens2-mode
+    (when (boundp 'text-property-default-nonsticky)
+      (make-variable-buffer-local 'text-property-default-nonsticky)
+      (setq text-property-default-nonsticky
+	    ;; We want to use display property with stickyness
+	    (delete '(display . t) text-property-default-nonsticky)))
+    (if (and
+	 (fboundp 'set-buffer-multibyte)
+	 (not (buffer-base-buffer)))
+	(set-buffer-multibyte t))
+    (let ((inhibit-read-only t))
+      ;; format is supposed to manage undo, but doesn't remap
+      (setq buffer-undo-list nil) 
+      (format-decode-buffer 'unicode-tokens2))
+    (set-input-method "Unicode tokens"))
+  (unless unicode-tokens2-mode
+    (when (boundp 'text-property-default-nonsticky)
+      (add-to-list 'text-property-default-nonsticky '(display . t)))
+    ;; leave buffer encoding as is
+    (let ((inhibit-read-only t)
+	  (modified (buffer-modified-p)))
+      ;; format is supposed to manage undo, but doesn't remap
+      (setq buffer-undo-list nil) 
+      (format-encode-buffer 'unicode-tokens2)
+      (unicode-tokens2-remove-properties (point-min) (point-max))
+      (set-buffer-modified-p modified))
+    (inactivate-input-method)))
+
+;; 
+;; Initialisation
+;;
+(defun unicode-tokens2-initialise ()
+  "Initialise tables."
+  ;; Calculate max token length
+  (let ((tlist unicode-tokens2-token-name-alist)
+	(len 0) tok)
+    (while tlist
+      (when (> (length (caar tlist)) 0)
+	  (setq len (length (caar tlist)))
+	  (setq tok (caar tlist)))
+      (setq tlist (cdr tlist)))
+    (setq unicode-tokens2-max-token-length
+	  (length (format unicode-tokens2-token-format tok))))
+  ;; Names from code points
+  (setq unicode-tokens2-codept-charname-alist
+	(mapcar (lambda (namechar)
+		  (cons (cdr namechar) (car namechar)))
+		unicode-chars-alist))
+  ;; Default assumed available glyph list based on tokens;
+  ;; TODO: filter with what's really available, if can find out.
+  ;; TODO: allow altering of this when the token-name-alist is reset
+  ;; in proof-token-name-alist (unless test here is for specific setting)
+  (unless unicode-tokens2-glyph-list
+    (setq unicode-tokens2-glyph-list
+	  (reduce (lambda (glyphs tokustring)
+		    (append glyphs (string-to-list (cdr tokustring))))
+		  unicode-tokens2-token-name-alist
+		  :initial-value nil)))
+  (unicode-tokens2-quail-define-rules)
+  ;; Key bindings
+  (if (= (length unicode-tokens2-token-suffix) 1)
+      (define-key unicode-tokens2-mode-map
+	(vector (string-to-char unicode-tokens2-token-suffix))
+	'unicode-tokens2-electric-suffix))
+   (define-key unicode-tokens2-mode-map [(control ?,)]
+     'unicode-tokens2-rotate-glyph-backward)
+   (define-key unicode-tokens2-mode-map [(control ?.)]
+     'unicode-tokens2-rotate-glyph-forward)
+  ;; otherwise action on space like in X-Symbol?
+  )
+
+
+(provide 'unicode-tokens2)
+
+;;; unicode-tokens2.el ends here
author	David Aspinall <da@inf.ed.ac.uk>	2008-07-05 15:04:13 +0000
committer	David Aspinall <da@inf.ed.ac.uk>	2008-07-05 15:04:13 +0000
commit	619955d4b9949dd99a483fcf9daf8a4429301b82 (patch)
tree	d6efdc7da8fb42591c677bb1e483f62aa221fa5b
parent	170973824f698d28c9891058dde929e1ce4bac11 (diff)