Calculate token match regexp in a more complex way in an attempt to

allow for Coq token grammar. Alter composition of strings to place characters by baseline. Doc and menu notes about replacement functions.
author: David Aspinall <da@inf.ed.ac.uk> 2009-09-07 08:47:00 +0000
committer: David Aspinall <da@inf.ed.ac.uk> 2009-09-07 08:47:00 +0000
commit: f282ac0c5d1f525f605cc382d6f1534ee135d9b7 (patch)
tree: af8a6ca1a8ff9c88e4c26b3b26b849ffa252aa6d /lib
parent: 3ff5ab468c2565083d95030047216bf42bc55ac2 (diff)
1 files changed, 64 insertions, 29 deletions
diff --git a/lib/unicode-tokens.el b/lib/unicode-tokens.el
index 529ac8bc..fdb01709 100644
--- a/lib/unicode-tokens.el
+++ b/lib/unicode-tokens.el
@@ -80,7 +80,8 @@ Also used to format shortcuts.")
 
 (defvar unicode-tokens-token-variant-format-regexp nil
   "A regular expression which matches a token variant.
-Will not be regexp quoted, and after format is applied, must
+Will not be regexp quoted, and will be formatted with
+a nested regexp that matches any token.
 
 An example would be: \\\\(%s\\\\)\\\\(:?\\w+\\\\)
 
@@ -373,41 +374,64 @@ This function also initialises the important tables for the mode."
 	 (when (unicode-tokens-usable-composition comp)
 	   (unless (gethash tok hash)
 	     (puthash tok (cdr x) hash)
-	       (push tok toks)
-	       (if (stringp comp) ;; reverse map only for string comps
-		   (unless (or (gethash comp ucharhash)
-			       ;; ignore plain chars for reverse map
-			       (string-match "[a-zA-Z0-9]+" comp))
-		     (push comp uchars)
-		     (puthash comp tok ucharhash)))))))
+	     (push tok toks)
+	     (if (stringp comp) ;; reverse map only for string comps
+		 (unless (or (gethash comp ucharhash)
+			     ;; ignore plain chars for reverse map
+			     (string-match "[a-zA-Z0-9]+" comp))
+		   (push comp uchars)
+		   (puthash comp tok ucharhash)))))))
      (when toks
        (setq unicode-tokens-hash-table hash)
        (setq unicode-tokens-uchar-hash-table ucharhash)
-       (setq unicode-tokens-token-list (reverse toks))
        (setq unicode-tokens-uchar-regexp (regexp-opt uchars))
        (setq unicode-tokens-token-match-regexp
-	     (if unicode-tokens-token-variant-format-regexp
-		 (format unicode-tokens-token-variant-format-regexp
-			 (regexp-opt toks t))
-	       (regexp-opt (mapcar (lambda (tok)
-				     (format unicode-tokens-token-format tok))
-				   toks) 'words)))
+	     (unicode-tokens-calculate-token-match toks))
+       (setq unicode-tokens-token-list (nreverse toks))
        (cons
 	`(,unicode-tokens-token-match-regexp
 	  (0 (unicode-tokens-help-echo) prepend)
-	  (0 (unicode-tokens-font-lock-compose-symbol
-	      ,(- (regexp-opt-depth unicode-tokens-token-match-regexp) 1))
-	      prepend))
+	  (0 (unicode-tokens-font-lock-compose-symbol 1) prepend))
 	(unicode-tokens-control-font-lock-keywords)))))
 
+(defun unicode-tokens-calculate-token-match (toks)
+  "Calculate value for `unicode-tokens-token-match-regexp'"
+;  (with-syntax-table (standard-syntax-table)
+    ;; hairy logic based on Coq-style vs Isabelle-style configs
+    (if (string= "" (format unicode-tokens-token-format ""))
+	;; no special token format, parse separate words/symbols
+	(let* ((optoks 
+		(remove* "^\\(?:\\sw\\|\\s_\\)+$" 
+			 toks :test 'string-match))
+	       (idtoks
+		(set-difference toks optoks))
+	       (idorop
+		(concat "\\(\\_<"
+			(regexp-opt idtoks)
+			"\\_>\\|\\(?:\\B"
+			(regexp-opt optoks) 
+			"\\B\\)\\)")))
+	  (if unicode-tokens-token-variant-format-regexp
+	      (format unicode-tokens-token-variant-format-regexp
+		      idorop)
+	    idorop))
+      ;; otherwise, assumption is that token syntax delimits tokens
+      (if unicode-tokens-token-variant-format-regexp
+	  (format unicode-tokens-token-variant-format-regexp
+		  (regexp-opt toks))
+	(regexp-opt (mapcar (lambda (tok)
+			      (format unicode-tokens-token-format tok))
+			    toks)))));)
+
+
 (defun unicode-tokens-usable-composition (comp)
   "Return non-nil if the composition COMP seems to be usable.
 The check is with `char-displayable-p'."
   (cond
    ((stringp comp)
     (reduce (lambda (x y) (and x (char-displayable-p y)))
-	    comp
-	    :initial-value t))
+ 	    comp
+ 	    :initial-value t))
    ((characterp comp)
     (char-displayable-p comp))
    (comp ;; assume any other non-null is OK
@@ -428,7 +452,7 @@ The check is with `char-displayable-p'."
    ((stringp comp)
     ;; change a longer string into a sequence placing glyphs left-to-right.
     (let ((chars (nreverse (string-to-list comp)))
-	  (sep   '(11 . 9))
+	  (sep   '(5 . 3))
 	  res)
       (while chars
 	(setq res (cons (car chars) res))
@@ -440,8 +464,8 @@ The check is with `char-displayable-p'."
 
 (defun unicode-tokens-font-lock-compose-symbol (match)
   "Compose a sequence of chars into a symbol.
-Regexp match data number MATCH selects the token name, while 0 matches the
-whole expression.
+Regexp match data number MATCH selects the token name, while match
+number 1 matches the text to be replaced.
 Token name from MATCH is searched for in `unicode-tokens-hash-table'.
 The face property is set to the :family of `unicode-tokens-symbol-font-face'."
   (let* ((start     (match-beginning 0))
@@ -749,7 +773,8 @@ Available annotations chosen from `unicode-tokens-control-regions'."
 
 ;; handy for legacy Isabelle files, probably not useful in general.
 (defun unicode-tokens-replace-shortcuts ()
-  "Query-replace shortcuts in the buffer with compositions they expand to."
+  "Query-replace shortcuts in the buffer with compositions they expand to.
+Starts from point."
   (interactive)
   (let ((shortcut-regexp
 	 (regexp-opt (mapcar 'car unicode-tokens-shortcut-replacement-alist))))
@@ -769,7 +794,8 @@ Available annotations chosen from `unicode-tokens-control-regions'."
 	       (format unicode-tokens-token-format token)))))
 
 (defun unicode-tokens-replace-unicode ()
-  "Query-replace unicode sequences in the buffer with tokens having same appearance."
+  "Query-replace unicode sequences in the buffer with tokens having same appearance.
+Starts from point."
   (interactive)
   (let ((uchar-regexp unicode-tokens-uchar-regexp))
     ;; override the display of the regexp because it's huge!
@@ -881,6 +907,7 @@ Available annotations chosen from `unicode-tokens-control-regions'."
   (unicode-tokens-encode-in-temp-buffer
    (buffer-substring-no-properties beg end) 'buffer-substring))
 
+;;;###autoload
 (defun unicode-tokens-encode-str (str)
   "Return a unicode encoded version presentation of STR."
   (unicode-tokens-encode-in-temp-buffer str 'buffer-substring))
@@ -945,8 +972,8 @@ tokenised symbols."
 ;;
 
 (defun unicode-tokens-initialise ()
-  "Perform (re)initialisation for Unicode Tokens minor mode.
-Invoke this function to recalculate `font-lock-keywords' and other configuration
+  "Perform initialisation for Unicode Tokens minor mode.
+This function calculates `font-lock-keywords' and other configuration
 variables."
   (interactive)
   (unicode-tokens-copy-configuration-variables)
@@ -956,6 +983,14 @@ variables."
     (unicode-tokens-define-menu)
     flks))
 
+;; not as expected
+;; (defun unicode-tokens-restart ()
+;;   (interactive)
+;;   (unicode-tokens-mode 0)
+;;   (put 'unicode-tokens-font-lock-keywords major-mode nil)
+;;   (setq font-lock-set-defaults nil)
+;;   (unicode-tokens-mode 1))
+
 (defvar unicode-tokens-mode-map (make-sparse-keymap)
   "Key map used for Unicode Tokens mode.")
 
@@ -1252,9 +1287,9 @@ Commands available are:
        :help
        "Paste from clipboard, converting Unicode to tokens where possible"]
       ["Replace Shortcuts" unicode-tokens-replace-shortcuts
-       :help "Query-replace shortcut sequences with tokens they expand to"]
+       :help "Query-replace shortcut sequences with compositions they stand for, starting from point"]
       ["Replace Unicode" unicode-tokens-replace-unicode
-       :help "Query-replace Unicode characters with tokens where possible"]
+       :help "Query-replace Unicode characters with tokens where possible, starting from point"]
        "---"
       ["Show Control Tokens" unicode-tokens-show-controls
        :style toggle
author	David Aspinall <da@inf.ed.ac.uk>	2009-09-07 08:47:00 +0000
committer	David Aspinall <da@inf.ed.ac.uk>	2009-09-07 08:47:00 +0000
commit	f282ac0c5d1f525f605cc382d6f1534ee135d9b7 (patch)
tree	af8a6ca1a8ff9c88e4c26b3b26b849ffa252aa6d /lib
parent	3ff5ab468c2565083d95030047216bf42bc55ac2 (diff)