diff options
author | David Aspinall <da@inf.ed.ac.uk> | 2008-07-05 15:04:13 +0000 |
---|---|---|
committer | David Aspinall <da@inf.ed.ac.uk> | 2008-07-05 15:04:13 +0000 |
commit | 619955d4b9949dd99a483fcf9daf8a4429301b82 (patch) | |
tree | d6efdc7da8fb42591c677bb1e483f62aa221fa5b | |
parent | 170973824f698d28c9891058dde929e1ce4bac11 (diff) |
TEMPORARY: add unicode-tokens2 mechanism to test across different machines
-rw-r--r-- | generic/pg-custom.el | 6 | ||||
-rw-r--r-- | generic/proof-auxmodes.el | 16 | ||||
-rw-r--r-- | generic/proof-menu.el | 12 | ||||
-rw-r--r-- | generic/proof-unicode-tokens2.el | 142 | ||||
-rw-r--r-- | isar/isar-unicode-tokens2.el | 595 | ||||
-rw-r--r-- | lib/format-hack.el | 195 | ||||
-rw-r--r-- | lib/unicode-tokens2.el | 666 |
7 files changed, 1632 insertions, 0 deletions
diff --git a/generic/pg-custom.el b/generic/pg-custom.el index 238b76b4..64d8a97a 100644 --- a/generic/pg-custom.el +++ b/generic/pg-custom.el @@ -56,6 +56,12 @@ look for files named x-symbol-<PA>.el.") :set 'proof-set-value :group 'proof-user-options) +(defpgcustom unicode-tokens2-enable nil + "*Non-nil for using Unicode token input mode in Proof General." + :type 'boolean + :set 'proof-set-value + :group 'proof-user-options) + (defpgcustom mmm-enable nil "*Whether to use MMM Mode in Proof General for this assistant. MMM Mode allows multiple modes to be used in the same buffer. diff --git a/generic/proof-auxmodes.el b/generic/proof-auxmodes.el index a42ca070..809630bc 100644 --- a/generic/proof-auxmodes.el +++ b/generic/proof-auxmodes.el @@ -76,6 +76,22 @@ (proof-unicode-tokens-support-available)) (proof-unicode-tokens-set-global t))) +;; +;; Unicode tokens 2 (temporary!) +;; +(defun proof-unicode-tokens2-support-available () + "A test to see whether unicode tokens2 support is available." + (and + (or (featurep 'unicode-tokens2) + (proof-try-require 'unicode-tokens2)) + ;; Requires prover-specific config in <foo>-unicode-tokens2.el + (proof-try-require (proof-ass-sym unicode-tokens2)))) + +(proof-eval-when-ready-for-assistant + (if (and (proof-ass unicode-tokens2-enable) + (proof-unicode-tokens2-support-available)) + (proof-unicode-tokens2-set-global t))) + (provide 'proof-auxmodes) diff --git a/generic/proof-menu.el b/generic/proof-menu.el index af7950d1..968098e0 100644 --- a/generic/proof-menu.el +++ b/generic/proof-menu.el @@ -279,6 +279,8 @@ without adjusting window layout." (proof-deftoggle-fn (proof-ass-sym unicode-tokens-enable) 'proof-unicode-tokens-toggle) (proof-deftoggle-fn + (proof-ass-sym unicode-tokens2-enable) 'proof-unicode-tokens2-toggle) + (proof-deftoggle-fn (proof-ass-sym maths-menu-enable) 'proof-maths-menu-toggle) (proof-deftoggle-fn (proof-ass-sym mmm-enable) 'proof-mmm-toggle)) @@ -332,6 +334,16 @@ without adjusting window layout." :selected (and (boundp 'unicode-tokens-mode) unicode-tokens-mode)] + ["Unicode Tokens 2" + (progn + (unless unicode-tokens2-mode (proof-x-symbol-toggle 0)) + (proof-unicode-tokens2-toggle (if unicode-tokens2-mode 0 1))) + :active (and (not (and (boundp 'x-symbol-mode) x-symbol-mode)) + (proof-unicode-tokens2-support-available)) + :style toggle + :selected (and (boundp 'unicode-tokens2-mode) + unicode-tokens2-mode)] + ["Unicode Maths Menu" (proof-maths-menu-toggle (if maths-menu-mode 0 1)) :active (proof-maths-menu-support-available) :style toggle diff --git a/generic/proof-unicode-tokens2.el b/generic/proof-unicode-tokens2.el new file mode 100644 index 00000000..377d7d8f --- /dev/null +++ b/generic/proof-unicode-tokens2.el @@ -0,0 +1,142 @@ +;; proof-unicode-tokens2.el Support Unicode tokens package +;; +;; Copyright (C) 2008 David Aspinall / LFCS Edinburgh +;; Author: David Aspinall <David.Aspinall@ed.ac.uk> +;; License: GPL (GNU GENERAL PUBLIC LICENSE) +;; +;; $Id$ +;; + +(eval-when-compile + (require 'proof-utils) ; for proof-ass, proof-eval-when-ready-for-assistant + (require 'cl)) + +(eval-when (compile) + (require 'unicode-tokens2)) ; it's loaded dynamically at runtime + +(defvar proof-unicode-tokens2-initialised nil + "Flag indicating whether or not we've performed startup.") + +(defun proof-unicode-tokens2-init () + "Initialise settings for unicode tokens from prover specific variables." + (mapcar + (lambda (var) ;; or defass? + (if (boundp (proof-ass-symv var)) + (set (intern (concat "unicode-tokens2-" (symbol-name var))) + (eval `(proof-ass ,var))))) + '(charref-format + token-format + control-token-format + token-name-alist + glyph-list + token-match + control-token-match + hexcode-match + next-character-regexp + token-prefix + token-suffix + shortcut-alist)) + (unicode-tokens2-initialise) + (setq proof-unicode-tokens2-initialised t)) + +;;;###autoload +(defun proof-unicode-tokens2-enable () + "Turn on or off Unicode tokens mode in Proof General script buffer. +This invokes `unicode-tokens2-mode' to toggle the setting for the current +buffer, and then sets PG's option for default to match. +Also we arrange to have unicode tokens mode turn itself on automatically +in future if we have just activated it for this buffer." + (interactive) + (when (proof-unicode-tokens2-support-available) ;; loads unicode-tokens2 + (unless proof-unicode-tokens2-initialised + (proof-unicode-tokens2-init)) + (proof-unicode-tokens2-set-global (not unicode-tokens2-mode)))) + + +;;;###autoload +(defun proof-unicode-tokens2-set-global (flag) + "Set global status of unicode tokens mode for PG buffers to be FLAG. +Turn on/off menu in all script buffers and ensure new buffers follow suit." + (unless proof-unicode-tokens2-initialised + (proof-unicode-tokens2-init)) + (let ((hook (proof-ass-sym mode-hook))) + (if flag + (add-hook hook 'unicode-tokens2-mode) + (remove-hook hook 'unicode-tokens2-mode)) + (proof-map-buffers + (proof-buffers-in-mode proof-mode-for-script) + (unicode-tokens2-mode (if flag 1 0))) + (proof-unicode-tokens2-shell-config))) + + +;;; +;;; Interface to custom to dynamically change tables (via proof-set-value) +;;; + +(defun proof-token-name-alist () + "Function called after the current token name alist has been changed. +Switch off tokens in all buffers, recalculate maps, turn on again." + (when proof-unicode-tokens2-initialised ; not on startup + (when (proof-ass unicode-tokens2-enable) + (proof-map-buffers + (proof-buffers-in-mode proof-mode-for-script) + (unicode-tokens2-mode 0))) + (setq unicode-tokens2-token-name-alist (proof-ass token-name-alist)) + (unicode-tokens2-initialise) + (when (proof-ass unicode-tokens2-enable) + (proof-map-buffers + (proof-buffers-in-mode proof-mode-for-script) + (unicode-tokens2-mode 1))))) + +(defun proof-shortcut-alist () + "Function called after the shortcut alist has been changed. +Updates the input mapping for reading shortcuts." + (when proof-unicode-tokens2-initialised ; not on startup + (setq unicode-tokens2-shortcut-alist (proof-ass shortcut-alist)) + (unicode-tokens2-initialise))) + +;;; +;;; Interface to shell +;;; + + +(defun proof-unicode-tokens2-activate-prover () + (when (and proof-xsym-activate-command + (proof-shell-live-buffer) + (proof-shell-available-p)) + (proof-shell-invisible-command-invisible-result + proof-xsym-activate-command))) + +(defun proof-unicode-tokens2-deactivate-prover () + (when (and proof-xsym-deactivate-command + ;; NB: clash with X-symbols since use same commands in prover! + (not (proof-ass x-symbol-enable)) + (proof-shell-live-buffer) + (proof-shell-available-p)) + (proof-shell-invisible-command-invisible-result + proof-xsym-deactivate-command))) + +;;; NB: we shouldn't bother load this if it's not enabled. +;;;###autoload +(defun proof-unicode-tokens2-shell-config () + (when (proof-ass unicode-tokens2-enable) + (add-hook 'proof-shell-insert-hook + 'proof-unicode-tokens2-encode-shell-input) + (proof-unicode-tokens2-activate-prover)) + (unless (proof-ass unicode-tokens2-enable) + (remove-hook 'proof-shell-insert-hook + 'proof-unicode-tokens2-encode-shell-input) + (proof-unicode-tokens2-deactivate-prover))) + +(defun proof-unicode-tokens2-encode-shell-input () + "Encode shell input in the variable STRING. +A value for proof-shell-insert-hook." + (if (proof-ass unicode-tokens2-enable) + (with-temp-buffer ;; TODO: better to do directly in *prover* + (insert string) + (unicode-tokens2-unicode-to-tokens) + (setq string (buffer-substring-no-properties + (point-min) (point-max)))))) + +(provide 'proof-unicode-tokens2) +;; End of proof-unicode-tokens2.el diff --git a/isar/isar-unicode-tokens2.el b/isar/isar-unicode-tokens2.el new file mode 100644 index 00000000..e252ebdc --- /dev/null +++ b/isar/isar-unicode-tokens2.el @@ -0,0 +1,595 @@ +;;; -*- coding: utf-8; -*- +;; isar-unicode-tokens2.el --- Tokens for Unicode Tokens package +;; +;; Copyright(C) 2008 David Aspinall / LFCS Edinburgh +;; Author: David Aspinall <David.Aspinall@ed.ac.uk> +;; +;; This file is loaded by `proof-unicode-tokens2.el'. +;; +;; It sets the variables defined at the top of unicode-tokens2.el, +;; unicode-tokens2-<foo> is set from isar-<foo>. See the corresponding +;; variable for documentation. +;; + +(defconst isar-token-format "\\<%s>") +(defconst isar-charref-format "\\<#x%x>") +(defconst isar-token-prefix "\\<") +(defconst isar-token-suffix ">") +(defconst isar-token-match "\\\\<\\([a-zA-Z0-9]+\\)") +(defconst isar-control-token-match "\\\\<^\\([a-zA-Z0-9]+\\)>") +(defconst isar-control-token-format "\\<^%s>") +(defconst isar-hexcode-match "\\\\<\\(#[xX][0-9A-Fa-f]+\\)") +(defconst isar-next-character-regexp "\\\\<#[xX][0-9A-Fa-f]+>\\|.") + +(defcustom isar-token-name-alist + (flet + ((script (s) (format "\\<^bscript>%s\\<^escript>" s)) + (frakt (s) (format "\\<^bfrak>%s\\<^efrak>" s)) + (serif (s) (format "\\<^bserif>%s\\<^eserif>" s)) + (bold (s) (format "\\<^bbold>%s\\<^ebold>" s))) + + ;; property-based annotations. More direct for input + ;; but inverse mapping tricky: need to ignore for + ;; reverse and fold \<^bscript>A\<^escript> -> \<AA> etc. + ;; Extra dimension in table required. + ;; (Also: not supported in XEmacs?) + ;((script (s) (unicode-tokens2-annotate-string "script" s)) + ; (frakt (s) (unicode-tokens2-annotate-string "frak" s)) + ; (serif (s) (unicode-tokens2-annotate-string "serif" s))) + `(; token name, unicode char sequence + ;; Based on isabellesym.sty,v 1.45 2006/01/05 + + ;; Bold numerals + ("one" . ,(bold "1")) + ("two" . ,(bold "2")) + ("three" . ,(bold "3")) + ("four" . ,(bold "4")) + ("five" . ,(bold "5")) + ("six" . ,(bold "6")) + ("seven" . ,(bold "7")) + ("eight" . ,(bold "8")) + ("nine" . ,(bold "9")) + ;; Mathcal + ("A" . ,(script "A")) + ("B" . ,(script "B")) + ("C" . ,(script "C")) + ("D" . ,(script "D")) + ("E" . ,(script "E")) + ("F" . ,(script "F")) + ("G" . ,(script "G")) + ("H" . ,(script "H")) + ("I" . ,(script "I")) + ("J" . ,(script "J")) + ("K" . ,(script "K")) + ("L" . ,(script "L")) + ("M" . ,(script "M")) + ("N" . ,(script "N")) + ("O" . ,(script "O")) + ("P" . ,(script "P")) + ("Q" . ,(script "Q")) + ("R" . ,(script "R")) + ("S" . ,(script "S")) + ("T" . ,(script "T")) + ("U" . ,(script "U")) + ("V" . ,(script "V")) + ("W" . ,(script "W")) + ("X" . ,(script "X")) + ("Y" . ,(script "Y")) + ("Z" . ,(script "Z")) + ;; Math roman + ("a" . ,(serif "a")) + ("b" . ,(serif "b")) + ("c" . ,(serif "c")) + ("d" . ,(serif "d")) + ("e" . ,(serif "e")) + ("f" . ,(serif "f")) + ("g" . ,(serif "g")) + ("h" . ,(serif "h")) + ("i" . ,(serif "i")) + ("j" . ,(serif "j")) + ("k" . ,(serif "k")) + ("l" . ,(serif "l")) + ("m" . ,(serif "m")) + ("n" . ,(serif "n")) + ("o" . ,(serif "o")) + ("p" . ,(serif "p")) + ("q" . ,(serif "q")) + ("r" . ,(serif "r")) + ("s" . ,(serif "s")) + ("t" . ,(serif "t")) + ("u" . ,(serif "u")) + ("v" . ,(serif "v")) + ("w" . ,(serif "w")) + ("x" . ,(serif "x")) + ("y" . ,(serif "y")) + ("z" . ,(serif "z")) + ;; Fraktur + ("AA" . ,(frakt "A")) + ("BB" . ,(frakt "B")) + ("CC" . ,(frakt "C")) + ("DD" . ,(frakt "D")) + ("EE" . ,(frakt "E")) + ("FF" . ,(frakt "F")) + ("GG" . ,(frakt "G")) + ("HH" . ,(frakt "H")) + ("II" . ,(frakt "I")) + ("JJ" . ,(frakt "J")) + ("KK" . ,(frakt "K")) + ("LL" . ,(frakt "L")) + ("MM" . ,(frakt "M")) + ("NN" . ,(frakt "N")) + ("OO" . ,(frakt "O")) + ("PP" . ,(frakt "P")) + ("QQ" . ,(frakt "Q")) + ("RR" . ,(frakt "R")) + ("SS" . ,(frakt "S")) + ("TT" . ,(frakt "T")) + ("UU" . ,(frakt "U")) + ("VV" . ,(frakt "V")) + ("WW" . ,(frakt "W")) + ("XX" . ,(frakt "X")) + ("YY" . ,(frakt "Y")) + ("ZZ" . ,(frakt "Z")) + ("aa" . ,(frakt "a")) + ("bb" . ,(frakt "b")) + ("cc" . ,(frakt "c")) + ("dd" . ,(frakt "d")) + ("ee" . ,(frakt "e")) + ("ff" . ,(frakt "f")) + ("gg" . ,(frakt "g")) + ("hh" . ,(frakt "h")) + ("ii" . ,(frakt "i")) + ("jj" . ,(frakt "j")) + ("kk" . ,(frakt "k")) + ("ll" . ,(frakt "l")) + ("mm" . ,(frakt "m")) + ("nn" . ,(frakt "n")) + ("oo" . ,(frakt "o")) + ("pp" . ,(frakt "p")) + ("qq" . ,(frakt "q")) + ("rr" . ,(frakt "r")) + ("ss" . ,(frakt "s")) + ("tt" . ,(frakt "t")) + ("uu" . ,(frakt "u")) + ("vv" . ,(frakt "v")) + ("ww" . ,(frakt "w")) + ("xx" . ,(frakt "x")) + ("yy" . ,(frakt "y")) + ("zz" . ,(frakt "z")) + ("alpha" . "α") + ("beta" . "β") + ("gamma" . "γ") + ("delta" . "δ") + ("epsilon" . "ε") ; actually varepsilon (some is epsilon) + ("zeta" . "ζ") + ("eta" . "η") + ("theta" . "θ") + ("iota" . "ι") + ("kappa" . "κ") + ("lambda" . "λ") + ("mu" . "μ") + ("nu" . "ν") + ("xi" . "ξ") + ("pi" . "π") + ("rho" . "ρ") + ("sigma" . "σ") + ("tau" . "τ") + ("upsilon" . "υ") + ("phi" . "ϕ") + ("chi" . "χ") + ("psi" . "ψ") + ("omega" . "ω") + ("Gamma" . "Γ") + ("Delta" . "Δ") + ("Theta" . "Θ") + ("Lambda" . "Λ") + ("Xi" . "Ξ") + ("Pi" . "Π") + ("Sigma" . "Σ") + ("Upsilon" . "Υ") + ("Phi" . "Φ") + ("Psi" . "Ψ") + ("Omega" . "Ω") +;; ("bool" . "") + ("complex" . "ℂ") + ("nat" . "ℕ") + ("rat" . "ℚ") + ("real" . "ℝ") + ("int" . "ℤ") + ;; Arrows + ("leftarrow" . "←") + ("rightarrow" . "→") + ("Leftarrow" . "⇐") + ("Rightarrow" . "⇒") + ("leftrightarrow" . "↔") + ("Leftrightarrow" . "⇔") + ("mapsto" . "↦") + ;; Real long symbols, may work in some places + ("longleftarrow" . "⟵") + ("Longleftarrow" . "⟸") + ("longrightarrow" . "⟶") + ("Longrightarrow" . "⟹") + ("longleftrightarrow" . "⟷") + ("Longleftrightarrow" . "⟺") + ("longmapsto" . "⟼") + ;; Faked long symbols, for use otherwise: +;;; ("longleftarrow" . "←–") +;;; ("Longleftarrow" . "⇐–") +;;; ("longrightarrow" . "–→") +;;; ("Longrightarrow" . "–⇒") +;;; ("longleftrightarrow" . "←→") +;;; ("Longleftrightarrow" . "⇐⇒") +;;; ("longmapsto" . "❘→") + ("midarrow" . "–") ; #x002013 en dash + ("Midarrow" . "‗") ; #x002017 double low line (not mid) + ("hookleftarrow" . "↩") + ("hookrightarrow" . "↪") + ("leftharpoondown" . "↽") + ("rightharpoondown" . "⇁") + ("leftharpoonup" . "↼") + ("rightharpoonup" . "⇀") + ("rightleftharpoons" . "⇌") + ("leadsto" . "↝") + ("downharpoonleft" . "⇃") + ("downharpoonright" . "⇂") + ("upharpoonleft" . "↿") + ;; ("upharpoonright" . "↾") overlaps restriction + ("restriction" . "↾") ;; + ("Colon" . "∷") + ("up" . "↑") + ("Up" . "⇑") + ("down" . "↓") + ("Down" . "⇓") + ("updown" . "↕") + ("Updown" . "⇕") + ("langle" . "⟪") + ("rangle" . "⟫") + ("lceil" . "⌈") + ("rceil" . "⌉") + ("lfloor" . "⌊") + ("rfloor" . "⌋") + ("lparr" . "⦇") + ("rparr" . "⦈") + ("lbrakk" . "⟦") + ("rbrakk" . "⟧") +;; ("lbrace" . "") TODO +;; ("rbrace" . "") + ("guillemotleft" . "«") + ("guillemotright" . "»") + ("bottom" . "⊥") + ("top" . "⊤") + ("and" . "∧") + ("And" . "⋀") + ("or" . "∨") + ("Or" . "⋁") + ("forall" . "∀") + ("exists" . "∃") + ("nexists" . "∄") + ("not" . "¬") + ("box" . "□") + ("diamond" . "◇") + ("turnstile" . "⊢") + ("Turnstile" . "⊨") + ("tturnstile" . "⊩") + ("TTurnstile" . "⊫") + ("stileturn" . "⊣") + ("surd" . "√") + ("le" . "≤") + ("ge" . "≥") + ("lless" . "≪") + ("ggreater" . "≫") + ("lesssim" . "⪍") + ("greatersim" . "⪎") + ("lessapprox" . "⪅") + ("greaterapprox" . "⪆") + ("in" . "∈") + ("notin" . "∉") + ("subset" . "⊂") + ("supset" . "⊃") + ("subseteq" . "⊆") + ("supseteq" . "⊇") + ("sqsubset" . "⊏") + ("sqsupset" . "⊐") + ("sqsubseteq" . "⊑") + ("sqsupseteq" . "⊒") + ("inter" . "∩") + ("Inter" . "⋂") + ("union" . "∪") + ("Union" . "⋃") + ("squnion" . "⊔") + ("Squnion" . "⨆") + ("sqinter" . "⊓") + ("Sqinter" . "⨅") + ("setminus" . "∖") + ("propto" . "∝") + ("uplus" . "⊎") + ("Uplus" . "⨄") + ("noteq" . "≠") + ("sim" . "∼") + ("doteq" . "≐") + ("simeq" . "≃") + ("approx" . "≈") + ("asymp" . "≍") + ("cong" . "≅") + ("smile" . "⌣") + ("equiv" . "≡") + ("frown" . "⌢") + ("Join" . "⨝") + ("bowtie" . "⋈") + ("prec" . "≺") + ("succ" . "≻") + ("preceq" . "≼") + ("succeq" . "≽") + ("parallel" . "∥") + ("bar" . "¦") + ("plusminus" . "±") + ("minusplus" . "∓") + ("times" . "×") + ("div" . "÷") + ("cdot" . "⋅") + ("star" . "⋆") + ("bullet" . "∙") + ("circ" . "∘") + ("dagger" . "†") + ("ddagger" . "‡") + ("lhd" . "⊲") + ("rhd" . "⊳") + ("unlhd" . "⊴") + ("unrhd" . "⊵") + ("triangleleft" . "◁") + ("triangleright" . "▷") + ("triangle" . "▵") ; or △ + ("triangleq" . "≜") + ("oplus" . "⊕") + ("Oplus" . "⨁") + ("otimes" . "⊗") + ("Otimes" . "⨂") + ("odot" . "⊙") + ("Odot" . "⨀") + ("ominus" . "⊖") + ("oslash" . "⊘") + ("dots" . "…") + ("cdots" . "⋯") + ("Sum" . "∑") + ("Prod" . "∏") + ("Coprod" . "∐") + ("infinity" . "∞") + ("integral" . "∫") + ("ointegral" . "∮") + ("clubsuit" . "♣") + ("diamondsuit" . "♢") + ("heartsuit" . "♡") + ("spadesuit" . "♠") + ("aleph" . "ℵ") + ("emptyset" . "∅") + ("nabla" . "∇") + ("partial" . "∂") + ("Re" . "ℜ") + ("Im" . "ℑ") + ("flat" . "♭") + ("natural" . "♮") + ("sharp" . "♯") + ("angle" . "∠") + ("copyright" . "©") + ("registered" . "®") + ("hyphen" . "‐") + ("inverse" . "¯¹") ; X-Symb: just "¯" + ("onesuperior" . "¹") + ("twosuperior" . "²") + ("threesuperior" . "³") + ("onequarter" . "¼") + ("onehalf" . "½") + ("threequarters" . "¾") + ("ordmasculine" . "º") + ("ordfeminine" . "ª") + ("section" . "§") + ("paragraph" . "¶") + ("exclamdown" . "¡") + ("questiondown" . "¿") + ("euro" . "€") + ("pounds" . "£") + ("yen" . "¥") + ("cent" . "¢") + ("currency" . "¤") + ("degree" . "°") + ("amalg" . "⨿") + ("mho" . "℧") + ("lozenge" . "◊") + ("wp" . "℘") + ("wrong" . "≀") ;; #x002307 +;; ("struct" . "") ;; TODO + ("acute" . "´") + ("index" . "ı") + ("dieresis" . "¨") + ("cedilla" . "¸") + ("hungarumlaut" . "ʺ") + ("spacespace" . "") ;; two #x002063 +; ("module" . "") ?? + ("some" . "ϵ") + + ;; Not in Standard Isabelle Symbols + ;; (some are in X-Symbols) + + ("stareq" . "≛") + ("defeq" . "≝") + ("questioneq" . "≟") + ("vartheta" . "ϑ") + ; ("o" . "ø") + ("varpi" . "ϖ") + ("varrho" . "ϱ") + ("varsigma" . "ς") + ("varphi" . "φ") + ("hbar" . "ℏ") + ("ell" . "ℓ") + ("ast" . "∗") + + ("bigcirc" . "◯") + ("bigtriangleup" . "△") + ("bigtriangledown" . "▽") + ("ni" . "∋") + ("mid" . "∣") + ("notlt" . "≮") + ("notle" . "≰") + ("notprec" . "⊀") + ("notpreceq" . "⋠") + ("notsubset" . "⊄") + ("notsubseteq" . "⊈") + ("notsqsubseteq" . "⋢") + ("notgt" . "≯") + ("notge" . "≱") + ("notsucc" . "⊁") + ("notsucceq" . "⋡") + ("notsupset" . "⊅") + ("notsupseteq" . "⊉") + ("notsqsupseteq" . "⋣") + ("notequiv" . "≢") + ("notsim" . "≁") + ("notsimeq" . "≄") + ("notapprox" . "≉") + ("notcong" . "≇") + ("notasymp" . "≭") + ("nearrow" . "↗") + ("searrow" . "↘") + ("swarrow" . "↙") + ("nwarrow" . "↖") + ("vdots" . "⋮") + ("ddots" . "⋱") + ("closequote" . "’") + ("openquote" . "‘") + ("opendblquote" . "”") + ("closedblquote" . "“") + ("emdash" . "—") + ("prime" . "′") + ("doubleprime" . "″") + ("tripleprime" . "‴") + ("quadrupleprime" . "⁗") + ("nbspace" . " ") + ("thinspace" . " ") + ("notni" . "∌") + ("colonequals" . "≔") + ("foursuperior" . "⁴") + ("fivesuperior" . "⁵") + ("sixsuperior" . "⁶") + ("sevensuperior" . "⁷") + ("eightsuperior" . "⁸") + ("ninesuperior" . "⁹"))) + "Table mapping Isabelle symbol token names to Unicode strings. + +You can adjust this table to add more entries, or to change entries for +glyphs that not are available in your Emacs or chosen font. + +The token TNAME is made into the token \\< TNAME >. +The string mapping can be anything, but should be such that +tokens can be uniquely recovered from a decoded text; otherwise +results will be undefined when files are saved." + :type '(repeat (cons (string :tag "Token name") + (string :tag "Unicode string"))) + :set 'proof-set-value + :group 'isabelle + :tag "Isabelle Unicode Token Mapping") + + +(defcustom isar-shortcut-alist + '(; short cut, unicode string +; ("<>" . "⋄") +; ("|>" . "⊳") + ("\\/" . "∨") + ("/\\" . "∧") + ("+O" . "⊕") + ("-O" . "⊖") + ("xO" . "⊗") + ("/O" . "⊘") + (".O" . "⊙") + ("|+" . "†") + ("|++" . "‡") + ("<=" . "≤") + ("|-" . "⊢") + (">=" . "≥") + ("-|" . "⊣") + ("||" . "∥") + ("==" . "≡") + ("~=" . "≃") + ("~~~" . "≍") + ("~~" . "≈") + ("~==" . "≅") + ("|<>|" . "⋈") + ("|=" . "⊨") + ("=." . "≐") + ("_|_" . "⊥") + ("</" . "≮") + (">=/" . "≱") + ("=/" . "≠") + ("==/" . "≢") + ("~/" . "≁") + ("~=/" . "≄") + ("~~/" . "≉") + ("~==/" . "≇") + ("<-" . "←") + ("<=" . "⇐") + ("->" . "→") + ("=>" . "⇒") + ("<->" . "↔") + ("<=>" . "⇔") +; ("|->" . "↦") + ("<--" . "⟵") + ("<==" . "⟸") + ("-->" . "⟶") + ("==>" . "⟹") + ("<==>" . "⟷") + ("|-->" . "⟼") + ("<-->" . "⟷") + ("<<" . "⟪") + ("[|" . "⟦") + (">>" . "⟫") + ("|]" . "⟧") + ("``" . "”") +; ("''" . "“") +; ("--" . "–") + ("---" . "—") +; ("''" . "″") +; ("'''" . "‴") +; ("''''" . "⁗") +; (":=" . "≔") + ;; some word shortcuts, started with backslash otherwise + ;; too annoying. + ("\nat" . "ℕ") + ("\int" . "ℤ") + ("\rat" . "ℚ") + ("\real" . "ℝ") + ("\complex" . "ℂ") + ("\euro" . "€") + ("\yen" . "¥") + ("\cent" . "¢")) + "Shortcut key sequence table for Unicode strings. + +You can adjust this table to add more entries, or to change entries for +glyphs that not are available in your Emacs or chosen font. + +These shortcuts are only used for input; no reverse conversion is +performed. But if tokens exist for the target of shortcuts, they +will be used on saving the buffer." + :type '(repeat (cons (string :tag "Shortcut sequence") + (string :tag "Unicode string"))) + :set 'proof-set-value + :group 'isabelle + :tag "Isabelle Unicode Token Mapping") + + + + +;; +;; prover symbol support +;; + +(eval-after-load "isar" + '(setq + proof-xsym-activate-command + (isar-markup-ml "change print_mode (insert (op =) \"xsymbols\")") + proof-xsym-deactivate-command + (isar-markup-ml "change print_mode (remove (op =) \"xsymbols\")"))) + + + +(provide 'isar-unicode-tokens2) + +;;; isar-unicode-tokens2.el ends here diff --git a/lib/format-hack.el b/lib/format-hack.el new file mode 100644 index 00000000..27526b3c --- /dev/null +++ b/lib/format-hack.el @@ -0,0 +1,195 @@ + + (rear-nonsticky (t "idsubscript1") + (t "subscript1") + (t "superscript1")))) + +(defun format-deannotate-region (from to translations next-fn) + "Translate annotations in the region into text properties. +This sets text properties between FROM to TO as directed by the +TRANSLATIONS and NEXT-FN arguments. + +NEXT-FN is a function that searches forward from point for an annotation. +It should return a list of 4 elements: \(BEGIN END NAME POSITIVE). BEGIN and +END are buffer positions bounding the annotation, NAME is the name searched +for in TRANSLATIONS, and POSITIVE should be non-nil if this annotation marks +the beginning of a region with some property, or nil if it ends the region. +NEXT-FN should return nil if there are no annotations after point. + +The basic format of the TRANSLATIONS argument is described in the +documentation for the `format-annotate-region' function. There are some +additional things to keep in mind for decoding, though: + +When an annotation is found, the TRANSLATIONS list is searched for a +text-property name and value that corresponds to that annotation. If the +text-property has several annotations associated with it, it will be used only +if the other annotations are also in effect at that point. The first match +found whose annotations are all present is used. + +The text property thus determined is set to the value over the region between +the opening and closing annotations. However, if the text-property name has a +non-nil `format-list-valued' property, then the value will be consed onto the +surrounding value of the property, rather than replacing that value. + +There are some special symbols that can be used in the \"property\" slot of +the TRANSLATIONS list: PARAMETER and FUNCTION \(spelled in uppercase). +Annotations listed under the pseudo-property PARAMETER are considered to be +arguments of the immediately surrounding annotation; the text between the +opening and closing parameter annotations is deleted from the buffer but saved +as a string. + +The surrounding annotation should be listed under the pseudo-property +FUNCTION. Instead of inserting a text-property for this annotation, +the function listed in the VALUE slot is called to make whatever +changes are appropriate. It can also return a list of the form +\(START LOC PROP VALUE) which specifies a property to put on. The +function's first two arguments are the START and END locations, and +the rest of the arguments are any PARAMETERs found in that region. + +Any annotations that are found by NEXT-FN but not defined by TRANSLATIONS +are saved as values of the `unknown' text-property \(which is list-valued). +The TRANSLATIONS list should usually contain an entry of the form + \(unknown \(nil format-annotate-value)) +to write these unknown annotations back into the file." + (save-excursion + (save-restriction + (narrow-to-region (point-min) to) + (goto-char from) + (let (next open-ans todo loc unknown-ans) + (while (setq next (funcall next-fn)) + (let* ((loc (nth 0 next)) + (end (nth 1 next)) + (name (nth 2 next)) + (positive (nth 3 next)) + (found nil)) + + ;; Delete the annotation + (delete-region loc end) + (cond + ;; Positive annotations are stacked, remembering location + (positive (push `(,name ((,loc . nil))) open-ans)) + ;; It is a negative annotation: + ;; Close the top annotation & add its text property. + ;; If the file's nesting is messed up, the close might not match + ;; the top thing on the open-annotations stack. + ;; If no matching annotation is open, just ignore the close. + ((not (assoc name open-ans)) + (message "Extra closing annotation (%s) in file" name)) + ;; If one is open, but not on the top of the stack, close + ;; the things in between as well. Set `found' when the real + ;; one is closed. + (t + (while (not found) + (let* ((top (car open-ans)) ; first on stack: should match. + (top-name (car top)) ; text property name + (top-extents (nth 1 top)) ; property regions + (params (cdr (cdr top))) ; parameters + (aalist translations) + (matched nil)) + (if (equal name top-name) + (setq found t) + (message "Improper nesting in file.")) + ;; Look through property names in TRANSLATIONS + (while aalist + (let ((prop (car (car aalist))) + (alist (cdr (car aalist)))) + ;; And look through values for each property + (while alist + (let ((value (car (car alist))) + (ans (cdr (car alist)))) + (if (member top-name ans) + ;; This annotation is listed, but still have to + ;; check if multiple annotations are satisfied + (if (member nil (mapcar (lambda (r) + (assoc r open-ans)) + ans)) + nil ; multiple ans not satisfied + ;; If there are multiple annotations going + ;; into one text property, split up the other + ;; annotations so they apply individually to + ;; the other regions. + (setcdr (car top-extents) loc) + (let ((to-split ans) this-one extents) + (while to-split + (setq this-one + (assoc (car to-split) open-ans) + extents (nth 1 this-one)) + (if (not (eq this-one top)) + (setcar (cdr this-one) + (format-subtract-regions + extents top-extents))) + (setq to-split (cdr to-split)))) + ;; Set loop variables to nil so loop + ;; will exit. + (setq alist nil aalist nil matched t + ;; pop annotation off stack. + open-ans (cdr open-ans)) + (let ((extents top-extents) + (start (car (car top-extents))) + (loc (cdr (car top-extents)))) + (while extents + (cond + ;; Check for pseudo-properties + ((eq prop 'PARAMETER) + ;; A parameter of the top open ann: + ;; delete text and use as arg. + (if open-ans + ;; (If nothing open, discard). + (setq open-ans + (cons + (append (car open-ans) + (list + (buffer-substring + start loc))) + (cdr open-ans)))) + (delete-region start loc)) + ((eq prop 'FUNCTION) + ;; Not a property, but a function. + (let ((rtn + (apply value start loc params))) + (if rtn (push rtn todo)))) + (t + ;; Normal property/value pair + (setq todo + (cons (list start loc prop value) + todo)))) + (setq extents (cdr extents) + start (car (car extents)) + loc (cdr (car extents)))))))) + (setq alist (cdr alist)))) + (setq aalist (cdr aalist))) + (if (not matched) + ;; Didn't find any match for the annotation: + ;; Store as value of text-property `unknown'. + (let ((extents top-extents) + (start (car (car top-extents))) + (loc (or (cdr (car top-extents)) loc))) + (while extents + (setq open-ans (cdr open-ans) + todo (cons (list start loc 'unknown top-name) + todo) + unknown-ans (cons name unknown-ans) + extents (cdr extents) + start (car (car extents)) + loc (cdr (car extents)))))))))))) + + ;; Once entire file has been scanned, add the properties. + (while todo + (let* ((item (car todo)) + (from (nth 0 item)) + (to (nth 1 item)) + (prop (nth 2 item)) + (val (nth 3 item))) + + (if (numberp val) ; add to ambient value if numeric + (format-property-increment-region from to prop val 0) + (put-text-property + from to prop + (cond ((get prop 'format-list-valued) ; value gets consed onto + ; list-valued properties + (let ((prev (get-text-property from prop))) + (cons val (if (listp prev) prev (list prev))))) + (t val))))) ; normally, just set to val. + (setq todo (cdr todo))) + + (if unknown-ans + (message "Unknown annotations: %s" unknown-ans)))))) diff --git a/lib/unicode-tokens2.el b/lib/unicode-tokens2.el new file mode 100644 index 00000000..fe5b8306 --- /dev/null +++ b/lib/unicode-tokens2.el @@ -0,0 +1,666 @@ +;;; unicode-tokens2.el --- Support for editing tokens for Unicode characters +;; +;; Copyright(C) 2008 David Aspinall / LFCS Edinburgh +;; Author: David Aspinall <David.Aspinall@ed.ac.uk> +;; License: GPL (GNU GENERAL PUBLIC LICENSE) +;; +;; $Id$ +;; +;; A few functions are adapted from `xmlunicode.el' by Norman Walsh. +;; Created: 2004-07-21, Version: 1.6, Copyright (C) 2003 Norman Walsh +;; +;; This is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This software is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: +;; +;; This is a partial replacement for X-Symbol for Proof General. +;; STATUS: experimental. +;; +;; Functions to help insert tokens that represent Unicode characters +;; and control code sequences for changing the layout. Character +;; tokens are useful for programs that do not understand a Unicode +;; encoding. +;; + +;; TODO: +;; -- add input methods for subscript/superscripts (further props in general) +;; -- after change function so inserting control sequences works? or other support +;; -- one-char subs should not be sticky so doesn't extend +;; -- make property removal more accurate/patch in font-lock +;; -- disentangle Isabelle specific code +;; -- perhaps separate out short-cut input method and don't use for tokens +;; -- cleanup insertion functions +;; -- investigate testing for an appropriate glyph + +(require 'cl) + +(require 'unicode-chars) ; list of Unicode characters + +;; +;; Variables that should be set +;; (default settings are for XML, but configuration incomplete; +;; use xmlunicode.el instead) +;; + +(defvar unicode-tokens2-charref-format "&#x%x;" + "The format for numeric character references") + +(defvar unicode-tokens2-token-format "&%x;" + "The format for token character references") + +(defvar unicode-tokens2-token-name-alist nil + "Mapping of token names to Unicode strings.") + +(defvar unicode-tokens2-glyph-list nil + "List of available glyphs, as characters. +If not set, constructed to include glyphs for all tokens. ") + +(defvar unicode-tokens2-token-prefix "&" + "Prefix for start of tokens to insert.") + +(defvar unicode-tokens2-token-suffix ";" + "Suffix for end of tokens to insert.") + +(defvar unicode-tokens2-control-token-match nil + "Regexp matching tokens") + +(defvar unicode-tokens2-token-match "&\\([A-Za-z]\\);" + "Regexp matching tokens") + +(defvar unicode-tokens2-hexcode-match "&#[xX]\\([0-9a-fA-F]+\\);" + "Regexp matching numeric token string") + +(defvar unicode-tokens2-next-character-regexp "&#[xX]\\([0-9a-fA-F]+\\);\\|." + "Regexp matching a logical character following a control code.") + +(defvar unicode-tokens2-shortcut-alist + "An alist of keyboard shortcuts to unicode strings. +The alist is added to the input mode for tokens. +Behaviour is much like abbrev.") + +;; +;; Faces +;; + +;; +;; TODO: make these into faces but extract attributes +;; to use in `unicode-tokens2-annotation-translations'. +;; Let that be dynamically changeable +;; TODO: choose family acccording to likely architecture and what's available +(cond + ((not (featurep 'xemacs)) +(defface unicode-tokens2-script-font-face + (cond + ((eq window-system 'x) ; Linux/Unix + '((t :family "URW Chancery L"))) + ((or ; Mac + (eq window-system 'ns) + (eq window-system 'carbon)) + '((t :family "Lucida Calligraphy")))) + "Script font face") + +(defface unicode-tokens2-fraktur-font-face + (cond + ((eq window-system 'x) ; Linux/Unix + '((t :family "URW Bookman L"))) ;; not at all black letter! + ((or ; Mac + (eq window-system 'ns) + (eq window-system 'carbon)) + '((t :family "Lucida Blackletter")))) + "Fraktur font face") + +(defface unicode-tokens2-serif-font-face + (cond + ((eq window-system 'x) ; Linux/Unix + '((t :family "Liberation Serif"))) + ((or ; Mac + (eq window-system 'ns) + (eq window-system 'carbon)) + '((t :family "Lucida")))) + "Serif (roman) font face"))) + + +;; +;; Variables initialised in unicode-tokens2-initialise +;; + +(defvar unicode-tokens2-max-token-length 10 + "Maximum length of a token in underlying encoding.") + +(defvar unicode-tokens2-codept-charname-alist nil + "Alist mapping unicode code point to character names.") + +(defvar unicode-tokens2-token-alist nil + "Mapping of tokens to Unicode strings. +Also used as a flag to detect if `unicode-tokens2-initialise' has been called.") + +(defvar unicode-tokens2-ustring-alist nil + "Mapping of Unicode strings to tokens.") + + +;; +;;; Code: +;; + +(defun unicode-tokens2-insert-char (arg codepoint) + "Insert the Unicode character identified by CODEPOINT. +If ARG is non-nil, ignore available glyphs." + (let ((glyph (memq codepoint unicode-tokens2-glyph-list))) + (cond + ((and (decode-char 'ucs codepoint) (or arg glyph)) + (ucs-insert codepoint)) ;; glyph converted to token on save + (t + (insert (format unicode-tokens2-charref-format codepoint)))))) + +(defun unicode-tokens2-insert-string (arg ustring) + "Insert a Unicode string. +If a prefix is given, the string will be inserted regardless +of whether or not it has displayable glyphs; otherwise, a +numeric character reference for whichever codepoints are not +in the unicode-tokens2-glyph-list." + (mapcar (lambda (char) + (unicode-tokens2-insert-char arg char)) + ustring)) + +(defun unicode-tokens2-character-insert (arg &optional argname) + "Insert a Unicode character by character name, with completion. +If a prefix is given, the character will be inserted regardless +of whether or not it has a displayable glyph; otherwise, a +numeric character reference is inserted if the codepoint is not +in the `unicode-tokens2-glyph-list'. If argname is given, it is used for +the prompt. If argname uniquely identifies a character, that +character is inserted without the prompt." + (interactive "P") + (let* ((completion-ignore-case t) + (uniname (if (stringp argname) argname "")) + (charname + (if (eq (try-completion uniname unicode-chars-alist) t) + uniname + (completing-read + "Unicode name: " + unicode-chars-alist + nil t uniname))) + codepoint glyph) + (setq codepoint (cdr (assoc charname unicode-chars-alist))) + (unicode-tokens2-insert-char arg codepoint))) + +(defun unicode-tokens2-token-insert (arg &optional argname) + "Insert a Unicode string by a token name, with completion. +If a prefix is given, the string will be inserted regardless +of whether or not it has displayable glyphs; otherwise, a +numeric character reference for whichever codepoints are not +in the unicode-tokens2-glyph-list. If argname is given, it is used for +the prompt. If argname uniquely identifies a character, that +character is inserted without the prompt." + (interactive "P") + (let* ((stokname (if (stringp argname) argname "")) + (tokname + (if (eq (try-completion stokname unicode-tokens2-token-name-alist) t) + stokname + (completing-read + "Token name: " + unicode-tokens2-token-name-alist + nil t stokname))) + charname ustring) + (setq ustring (cdr (assoc tokname unicode-tokens2-token-name-alist))) + (unicode-tokens2-insert-string arg ustring))) + +(defun unicode-tokens2-replace-token-after (length) + (let ((bpoint (point)) ustring) + (save-excursion + (forward-char length) + (save-match-data + (while (re-search-backward + unicode-tokens2-token-match + (max (- bpoint unicode-tokens2-max-token-length) + (point-min)) t nil) + (setq ustring + (assoc (match-string 1) unicode-tokens2-token-name-alist)) + (if ustring ;; TODO: should check on glyphs here + (progn + (let ((matchlen (- (match-end 0) (match-beginning 0)))) + (replace-match (replace-quote (cdr ustring))) + ;; was: (format "%c" (decode-char 'ucs (cadr codept))) + (setq length + (+ (- length matchlen) (length (cdr ustring))))))))))) + length) + + +;;stolen from hen.el which in turn claims to have stolen it from cxref +(defun unicode-tokens2-looking-backward-at (regexp) + "Return t if text before point matches regular expression REGEXP. +This function modifies the match data that `match-beginning', +`match-end' and `match-data' access; save and restore the match +data if you want to preserve them." + (save-excursion + (let ((here (point))) + (if (re-search-backward regexp (point-min) t) + (if (re-search-forward regexp here t) + (= (point) here)))))) + +;; TODO: make this work for control tokens. +;; But it's a bit nasty and introduces font-lock style complexity again. +;; Better if we stick with dedicated input methods. +(defun unicode-tokens2-electric-suffix () + "Detect tokens and replace them with the appropriate string. +This is bound to the character ending `unicode-tokens2-token-suffix' +if there is such a unique character." + (interactive) + (let ((pos (point)) + (case-fold-search nil) + amppos codept ustring) + (search-backward unicode-tokens2-token-prefix nil t nil) + (setq amppos (point)) + (goto-char pos) + (cond + ((unicode-tokens2-looking-backward-at unicode-tokens2-token-match) + (progn + (re-search-backward unicode-tokens2-token-match nil t nil) + (if (= amppos (point)) + (progn + (setq ustring + (assoc (match-string 1) + unicode-tokens2-token-name-alist)) + (if ustring ;; todo: check glyphs avail/use insert fn + (replace-match (replace-quote (cdr ustring))) + ;; was (format "%c" (decode-char 'ucs (cdr codept)))) + (progn + (goto-char pos) + (insert unicode-tokens2-token-suffix)))) + (progn + (goto-char pos) + (insert unicode-tokens2-token-suffix))))) + ((unicode-tokens2-looking-backward-at unicode-tokens2-hexcode-match) + (progn + (re-search-backward unicode-tokens2-hexcode-match nil t nil) + (if (= amppos (point)) + (progn + (setq codept (string-to-number (match-string 1) 16)) + (if ;; todo : check glyph (memq codept unicode-tokens2-glyph-list) + codept + (replace-match (format "%c" (decode-char 'ucs (cdr codept)))) + (progn + (goto-char pos) + (insert unicode-tokens2-token-suffix)))) + (progn + (goto-char pos) + (insert unicode-tokens2-token-suffix))))) + (t + (insert unicode-tokens2-token-suffix))))) + +(defvar unicode-tokens2-rotate-glyph-last-char nil) + +(defun unicode-tokens2-rotate-glyph-forward (&optional n) + "Rotate the character before point in the current code page, by N steps. +If no character is found at the new codepoint, no change is made. +This function may only work reliably for GNU Emacs >= 23." + (interactive "p") + (if (> (point) (point-min)) + (let* ((codept (or (if (or (eq last-command + 'unicode-tokens2-rotate-glyph-forward) + (eq last-command + 'unicode-tokens2-rotate-glyph-backward)) + unicode-tokens2-rotate-glyph-last-char) + (char-before (point)))) + (page (/ codept 256)) + (pt (mod codept 256)) + (newpt (mod (+ pt (or n 1)) 256)) + (newcode (+ (* 256 page) newpt)) + (newname (assoc newcode + unicode-tokens2-codept-charname-alist)) + ;; NOTE: decode-char 'ucs here seems to fail on Emacs <23 + (newchar (decode-char 'ucs newcode))) + (when (and newname newchar) + (delete-char -1) + (insert-char newchar 1) + (message (cdr newname)) + (setq unicode-tokens2-rotate-glyph-last-char nil)) + (unless (and newname newchar) + (message "No character at code %d" newcode) + (setq unicode-tokens2-rotate-glyph-last-char newcode))))) + +(defun unicode-tokens2-rotate-glyph-backward (&optional n) + "Rotate the character before point in the current code page, by -N steps. +If no character is found at the new codepoint, no change is made. +This function may only work reliably for GNU Emacs >= 23." + (interactive "p") + (unicode-tokens2-rotate-glyph-forward (if n (- n) -1))) + + + +(defconst unicode-tokens2-ignored-properties + '(vanilla type fontified face auto-composed + rear-nonsticky field inhibit-line-move-field-capture + utoks) + "Text properties to ignore when saving files.") + +(defconst unicode-tokens2-annotation-translations + `((font-lock-face + (bold "bold") + (unicode-tokens2-script-font-face "script") + (unicode-tokens2-fraktur-font-face "frak") + (unicode-tokens2-serif-font-face "serif") + (proof-declaration-name-face "loc1") + (default )) + (display + ((raise 0.4) "superscript") + ((raise -0.4) "subscript") + ((raise 0.35) "superscript1") + ((raise -0.35) "subscript1") + ((raise -0.3) "idsubscript1") + (default ))) + "Text property table for annotations.") + + +(defun unicode-tokens2-font-lock-keywords () + "Calculate value for `font-lock-keywords'." + (when (fboundp 'compose-region) + (let ((alist nil)) + (dolist (x unicode-tokens2-token-name-alist) + (when (and (if (fboundp 'char-displayable-p) + (reduce (lambda (x y) (and x (char-displayable-p y))) + (cdr x) + :initial-value t)) + t) + (not (assoc (car x) alist))) ;Not yet in alist. + (push (cons + (format unicode-tokens2-token-format (car x)) + (cdr x)) + alist))) + (when alist + (setq unicode-tokens2-token-alist alist) + `((,(regexp-opt (mapcar 'car alist) t) + (0 (unicode-tokens2-compose-symbol) + ;; In Emacs-21, if the `override' field is nil, the face + ;; expressions is only evaluated if the text has currently + ;; no face. So force evaluation by using `keep'. + keep)))))) + + + + +(defvar unicode-tokens2-next-control-token-seen-token nil + "Records currently open single-character control token.") + +(defun unicode-tokens2-next-control-token () + "Find next control token and interpret it. +If `unicode-tokens2-next-control-token' is non-nil, end current control sequence +after next character (single character control sequence)." + (let (result) + (when unicode-tokens2-next-control-token-seen-token + (if (re-search-forward unicode-tokens2-next-character-regexp nil t) + (setq result (list (match-end 0) (match-end 0) + unicode-tokens2-next-control-token-seen-token + nil))) + (setq unicode-tokens2-next-control-token-seen-token nil)) + (while (and (not result) + (re-search-forward unicode-tokens2-control-token-match nil t)) + (let* + ((tok (match-string 1)) + (annot + (cond + ((equal tok "bsup") '("superscript" t)) + ((equal tok "esup") '("superscript" nil)) + ((equal tok "bsub") '("subscript" t)) + ((equal tok "esub") '("subscript" nil)) + ((equal tok "bbold") '("bold" t)) + ((equal tok "ebold") '("bold" nil)) + ((equal tok "bitalic") '("italic" t)) + ((equal tok "eitalic") '("italic" nil)) + ((equal tok "bscript") '("script" t)) + ((equal tok "escript") '("script" nil)) + ((equal tok "bfrak") '("frak" t)) + ((equal tok "efrak") '("frak" nil)) + ((equal tok "bserif") '("serif" t)) + ((equal tok "eserif") '("serif" nil)) + ((equal tok "loc") + (list (setq unicode-tokens2-next-control-token-seen-token + "loc1") t)) + ((equal tok "sup") + (list (setq unicode-tokens2-next-control-token-seen-token + "superscript1") t)) + ((equal tok "sub") + (list (setq unicode-tokens2-next-control-token-seen-token + "subscript1") t)) + ((equal tok "isub") + (list (setq unicode-tokens2-next-control-token-seen-token + "idsubscript1") t))))) + (if annot + (setq result + (append + (list (match-beginning 0) (match-end 0)) + annot))))) + result)) + +;; TODO: this should be instance specific +(defconst unicode-tokens2-annotation-control-token-alist + '(("bold" . ("bbold" . "ebold")) + ("subscript" . ("bsub" . "esub")) + ("superscript" . ("bsup" . "esup")) + ("subscript1" . ("sub")) + ("superscript1" . ("sup")) + ("loc1" . ("loc")) + ;; non-standard + ("italic" . ("bitalic" . "eitalic")) + ("script" . ("bscript" . "escript")) + ("frak" . ("bfrak" . "efrak")) + ("serif" . ("bserif" . "eserif")))) + +(defun unicode-tokens2-make-token-annotation (annot positive) + "Encode a text property start/end by adding an annotation in the file." + (let ((toks (cdr-safe + (assoc annot unicode-tokens2-annotation-control-token-alist)))) + (cond + ((and toks positive) + (format unicode-tokens2-control-token-format (car toks))) + ((and toks (cdr toks)) + (format unicode-tokens2-control-token-format (cdr toks))) + (t "")))) + +(defun unicode-tokens2-find-property (name) + (let ((props unicode-tokens2-annotation-translations) + prop vals val vname) + (catch 'return + (while props + (setq prop (caar props)) + (setq vals (cdar props)) + (while vals + (setq val (car vals)) + (if (member name (cdr val)) + (throw 'return (list prop (car val)))) + (setq vals (cdr vals))) + (setq props (cdr props)))))) + +(defun unicode-tokens2-annotate-region (beg end &optional annot) + (interactive "r") + (or annot + (if (interactive-p) + (setq annot + (completing-read "Annotate region as: " + unicode-tokens2-annotation-control-token-alist + nil t)) + (error "In unicode-tokens2-annotation-control-token-alist: TYPE must be given."))) + (add-text-properties beg end + (unicode-tokens2-find-property annot))) + +(defun unicode-tokens2-annotate-string (annot string) + (add-text-properties 0 (length string) + (unicode-tokens2-find-property annot) + string) + string) + +(defun unicode-tokens2-unicode-to-tokens (&optional start end buffer) + "Encode a buffer to save as a tokenised file." + (let ((case-fold-search proof-case-fold-search) + (buffer-undo-list t) + (modified (buffer-modified-p))) + (save-restriction + (save-excursion + (narrow-to-region (or start (point-min)) (or end (point-max))) + (format-insert-annotations + (format-annotate-region (point-min) (point-max) + unicode-tokens2-annotation-translations + 'unicode-tokens2-make-token-annotation + unicode-tokens2-ignored-properties)) +;; alternative experiment: store original tokens inside text properties +;; (unicode-tokens2-replace-strings-unpropertise) + (format-replace-strings unicode-tokens2-ustring-alist + nil (point-min) (point-max)) + (set-buffer-modified-p modified))))) + + +(defun unicode-tokens2-replace-strings-propertise (alist &optional beg end) + "Do multiple replacements on the buffer. +ALIST is a list of (FROM . TO) pairs, which should be proper arguments to +`search-forward' and `replace-match', respectively. +The original contents FROM are retained in the buffer in the text property `utoks'. +Optional args BEG and END specify a region of the buffer on which to operate." + (save-excursion + (save-restriction + (or beg (setq beg (point-min))) + (if end (narrow-to-region (point-min) end)) + (while alist + (let ((from (car (car alist))) + (to (cdr (car alist))) + (case-fold-search nil)) + (goto-char beg) + (while (search-forward from nil t) + (goto-char (match-beginning 0)) + (insert to) + (set-text-properties (- (point) (length to)) (point) + (cons 'utoks + (cons from + (text-properties-at (point))))) + (delete-region (point) (+ (point) (- (match-end 0) + (match-beginning 0))))) + (setq alist (cdr alist))))))) + +;; NB: this is OK, except that now if we edit with raw symbols, we +;; don't get desired effect but instead rely on hidden annotations. +;; Also doesn't work as easily with quail. +;; Can we have a sensible mixture of both things? +(defun unicode-tokens2-replace-strings-unpropertise (&optional beg end) + "Reverse the effect of `unicode-tokens2-replace-strings-unpropertise'. +Replaces contiguous text with 'utoks' property with property value." + (let ((pos (or beg (point-min))) + (lim (or end (point-max))) + start to) + (save-excursion + (while (and + (setq pos (next-single-property-change pos 'utoks nil lim)) + (< pos lim)) + (if start + (progn + (setq to (get-text-property start 'utoks)) + (goto-char start) + (insert to) + (set-text-properties start (point) + (text-properties-at start)) + (delete-region (point) (+ (point) (- pos start))) + (setq start nil)) + (setq start pos)))))) + + + + + +;; +;; Minor mode +;; + +(defvar unicode-tokens2-mode-map (make-sparse-keymap) + "Key map used for Unicode Tokens mode.") + +(define-minor-mode unicode-tokens2-mode + "Minor mode for unicode token input." nil " Utoks" + unicode-tokens2-mode-map + (unless unicode-tokens2-token-alist + (unicode-tokens2-initialise)) + (when unicode-tokens2-mode + (when (boundp 'text-property-default-nonsticky) + (make-variable-buffer-local 'text-property-default-nonsticky) + (setq text-property-default-nonsticky + ;; We want to use display property with stickyness + (delete '(display . t) text-property-default-nonsticky))) + (if (and + (fboundp 'set-buffer-multibyte) + (not (buffer-base-buffer))) + (set-buffer-multibyte t)) + (let ((inhibit-read-only t)) + ;; format is supposed to manage undo, but doesn't remap + (setq buffer-undo-list nil) + (format-decode-buffer 'unicode-tokens2)) + (set-input-method "Unicode tokens")) + (unless unicode-tokens2-mode + (when (boundp 'text-property-default-nonsticky) + (add-to-list 'text-property-default-nonsticky '(display . t))) + ;; leave buffer encoding as is + (let ((inhibit-read-only t) + (modified (buffer-modified-p))) + ;; format is supposed to manage undo, but doesn't remap + (setq buffer-undo-list nil) + (format-encode-buffer 'unicode-tokens2) + (unicode-tokens2-remove-properties (point-min) (point-max)) + (set-buffer-modified-p modified)) + (inactivate-input-method))) + +;; +;; Initialisation +;; +(defun unicode-tokens2-initialise () + "Initialise tables." + ;; Calculate max token length + (let ((tlist unicode-tokens2-token-name-alist) + (len 0) tok) + (while tlist + (when (> (length (caar tlist)) 0) + (setq len (length (caar tlist))) + (setq tok (caar tlist))) + (setq tlist (cdr tlist))) + (setq unicode-tokens2-max-token-length + (length (format unicode-tokens2-token-format tok)))) + ;; Names from code points + (setq unicode-tokens2-codept-charname-alist + (mapcar (lambda (namechar) + (cons (cdr namechar) (car namechar))) + unicode-chars-alist)) + ;; Default assumed available glyph list based on tokens; + ;; TODO: filter with what's really available, if can find out. + ;; TODO: allow altering of this when the token-name-alist is reset + ;; in proof-token-name-alist (unless test here is for specific setting) + (unless unicode-tokens2-glyph-list + (setq unicode-tokens2-glyph-list + (reduce (lambda (glyphs tokustring) + (append glyphs (string-to-list (cdr tokustring)))) + unicode-tokens2-token-name-alist + :initial-value nil))) + (unicode-tokens2-quail-define-rules) + ;; Key bindings + (if (= (length unicode-tokens2-token-suffix) 1) + (define-key unicode-tokens2-mode-map + (vector (string-to-char unicode-tokens2-token-suffix)) + 'unicode-tokens2-electric-suffix)) + (define-key unicode-tokens2-mode-map [(control ?,)] + 'unicode-tokens2-rotate-glyph-backward) + (define-key unicode-tokens2-mode-map [(control ?.)] + 'unicode-tokens2-rotate-glyph-forward) + ;; otherwise action on space like in X-Symbol? + ) + + +(provide 'unicode-tokens2) + +;;; unicode-tokens2.el ends here |