From da4e099739114b9db68db11ab2f5e1b0004eb6fb Mon Sep 17 00:00:00 2001 From: David Aspinall Date: Fri, 11 May 2007 08:23:58 +0000 Subject: Renamed file --- lib/xmlunicode.el | 786 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 786 insertions(+) create mode 100644 lib/xmlunicode.el (limited to 'lib/xmlunicode.el') diff --git a/lib/xmlunicode.el b/lib/xmlunicode.el new file mode 100644 index 00000000..cc19d9cf --- /dev/null +++ b/lib/xmlunicode.el @@ -0,0 +1,786 @@ +;;; xmlunicode.el --- Unicode support for XML -*- coding: utf-8 -*- + +;; $Id$ + +;; Copyright (C) 2003 Norman Walsh +;; Inspired in part by sgml-input, Copyright (C) 2001 Dave Love +;; Inspired in part by http://www.tbray.org/ongoing/When/200x/2003/09/27/UniEmacs + +;; Author: Norman Walsh +;; Maintainer: Norman Walsh +;; Created: 2004-07-21 +;; Version: 1.6 +;; CVS ID: $Id$ +;; Keywords: utf-8 unicode xml characters + +;; This file is NOT part of GNU emacs. + +;; This is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This software is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary + +;; This file provides a suite of functions designed to make it easier +;; to enter Unicode into Emacs. It is not, in fact, particularly XML-specific though +;; it does define an 'xml input-mode and does support the ISO 8879 entity names. + +;;; Usage + +;; 1. Before loading this file, make sure that the variable unicode-character-list is +;; defined. The unicode-character-list is a list of triples of the form: +;; +;; (codepoint "unicode name" "iso name") ; iso name can be nil +;; +;; e.g.: (defvar unicode-character-list +;; '( +;; ;Codept Unicode name ISO Name +;; (#x000000 "NULL" nil ) +;; (#x000001 "START OF HEADING" nil ) +;; ... +;; (#x0000a0 "NO-BREAK SPACE" "nbsp" ) +;; (#x0000a1 "INVERTED EXCLAMATION MARK" "iexcl" ) +;; (#x0000a2 "CENT SIGN" "cent" ) +;; ...)) +;; +;; +;; The easiest way to define this list is to load "unichars.el" +;; which should be available where you got this file. +;; +;; 2. Bind the functions defined in this file to keys you find convenient. +;; +;; The likely candidates are: +;; +;; unicode-character-insert insert a character by unicode name +;; (with completion) +;; iso8879-character-insert insert a character by ISO entity name +;; (with completion) +;; unicode-smart-double-quote inserts an appropriate double quote +;; unicode-smart-single-quote inserts an appropriate single quote +;; unicode-character-menu-insert choose special character from a popup menu +;; unicode-character-shortcut-insert enter a two-character shortcut for a +;; unicode character +;; +;; You can also create a standard Emacs menu for the character menu list +;; (instead of, or in addition to, the popup). To do that: +;; +;; (define-key APPROPRIATE-MAP [menu-bar unichar] +;; (cons "UniChar" unicode-character-menu-map)) +;; +;; Where APPROPRIATE-MAP is the name of the emacs keymap to bind into +;; +;; 3. If you want to use the xml input-mode, which provides automatic replacement for the +;; ISO entity names: +;; +;; (set-input-method 'xml) +;; +;; in the appropriate context. Unlike sgml-input, xml-input only inserts the +;; characters for which you have glyphs. It inserts other characters as numeric +;; character references. (If you want to insert a literal character even if +;; you don't have it in your fonts, use unicode-character-insert or +;; iso8879-character-insert with a prefix.) + +;;; Changes + +;; v1.7 +;; Require "cl" because, well, because it's required. Also fiddled with +;; the way single quotes are handled; the apostrophe is now part of the +;; cycle +;; v1.6 +;; Remove debugging code. Embarrassed again. :-( +;; v1.5 +;; Fixed bug in unicode-smart-single-quote. It wasn't cycling through all +;; three quotes correctly because of a typo in the function definition. +;; Make sure smart semicolon insertion only happens if we're right at the +;; end of a numeric character reference. +;; v1.4 +;; Fixed bug in insert-smart-semicolon. It wasn't careful to tie the search +;; to the most recent preceding ampersand. +;; v1.3 +;; Fixed bug in (in-comment) +;; Added unicode-smart-semicolon as another convenience for entering Unicode chars +;; Added show-unicode-character-list +;; v1.2 +;; Added unicode-smart-hyphen for easy insert of mdash and ndash +;; Added unicode-smart-period for easy insert of hellip +;; Fixed a bug in unicode-smart-single-quote +;; v1.1 +;; Fixed a few bugs with respect to how numeric character references are entered. +;; Added xml-tag-search-limit and unicode-charref-format +;; v1.0 +;; First release. Nearly a complete rewrite from the former xmlchars.el file + +;;; Code: + +(require 'cl) + +(defvar unicode-ldquo (decode-char 'ucs #x00201c)) +(defvar unicode-rdquo (decode-char 'ucs #x00201d)) +(defvar unicode-lsquo (decode-char 'ucs #x002018)) +(defvar unicode-rsquo (decode-char 'ucs #x002019)) +(defvar unicode-quot (decode-char 'ucs #x000022)) +(defvar unicode-apos (decode-char 'ucs #x000027)) +(defvar unicode-capos (decode-char 'ucs #x0002bc)) +(defvar unicode-ndash (decode-char 'ucs #x002013)) +(defvar unicode-mdash (decode-char 'ucs #x002014)) +(defvar unicode-hellip (decode-char 'ucs #x002026)) + +(defvar unicode-charref-format "&#x%x;" + "The format for numeric character references") + +(defvar xml-tag-search-limit 4096 + "Maximum distance to search from point for tag start characters") + +(defvar unicode-character-list-file "/define/this/before/you/load/me" + "The name of the file that contains your unicode-character-list. unichars.el should be available where you got this file.") + +(if (not (boundp 'unicode-character-list)) + (load-file unicode-character-list-file)) + +(defvar unicode-character-alist '() + "Mapping of Unicode character names to codepoints.") + +(let ((ulist unicode-character-list)) + (setq unicode-character-alist + (list (cons (cadr (car ulist)) (car (car ulist))))) + (setq ulist (cdr ulist)) + (while ulist + (nconc unicode-character-alist + (list (cons (cadr (car ulist)) (car (car ulist))))) + (setq ulist (cdr ulist)))) + +(defvar iso8879-character-alist '() + "Mapping of ISO 8879 entity names names to codepoints.") + +(let ((ulist unicode-character-list)) + (while (and ulist (not (caddr (car ulist)))) + (setq ulist (cdr ulist))) + (setq iso8879-character-alist + (list (cons (caddr (car ulist)) (car (car ulist))))) + (setq ulist (cdr ulist)) + (while ulist + (if (caddr (car ulist)) + (nconc iso8879-character-alist + (list (cons (caddr (car ulist)) (car (car ulist)))))) + (setq ulist (cdr ulist)))) + +(defun iso8879-to-codepoints (&optional isolist) + "Converts a list of ISO 8879 entity names to a list of codepoints. This is a convenience function for defining the glyph list." + (let (codepoint-list) + (setq codepoint-list (list 0)) + (while isolist + (nconc codepoint-list + (list (cdr (assoc (car isolist) iso8879-character-alist)))) + (setq isolist (cdr isolist))) + (cdr codepoint-list))) + +(defun unicode-to-codepoints (&optional unilist) + "Converts a list of Unicode character names to a list of codepoints. This is a convenience function for defining the glyph list." + (let (codepoint-list) + (setq codepoint-list (list 0)) + (while unilist + (nconc codepoint-list + (list (cdr (assoc (car isolist) unicode-character-alist)))) + (setq unilist (cdr unilist))) + (cdr codepoint-list))) + +(defvar unicode-glyph-list + (append + '(?A ?B ?C ?D ?E ?F ?G ?H ?I ?J ?K ?L ?M + ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y ?Z + ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m + ?n ?o ?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z + ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 ?! ?@ ?# + ?$ ?% ?^ ?* ?( ?) ?- ?_ ?= ?+ ?\ ?| + ?[ ?] ?{ ?} 59 ?: ?/ ?? ?. 44 96 126) + (iso8879-to-codepoints + '("AElig" "Aacute" "Abreve" "Acirc" "Agrave" "Amacr" "Aogon" + "Aring" "Atilde" "Auml" "Cacute" "Ccaron" "Ccedil" "Ccirc" + "Cdot" "Dagger" "Dcaron" "Dot" "Dstrok" "ENG" "ETH" + "Eacute" "Ecaron" "Ecirc" "Edot" "Egrave" "Emacr" "Eogon" + "Euml" "Gbreve" "Gcedil" "Gcirc" "Gdot" "Hcirc" + "Hstrok" "IJlig" "Iacute" "Icirc" "Idot" "Igrave" + "Imacr" "Iogon" "Itilde" "Iuml" "Jcirc" "Kcedil" + "Lacute" "Lcaron" "Lcedil" "Lmidot" "Lstrok" "Nacute" + "Ncaron" "Ncedil" "Ntilde" "OElig" "Oacute" "Ocirc" + "Odblac" "Ograve" "Omacr" "Oslash" "Otilde" "Ouml" + "Racute" "Rcaron" "Rcedil" "Sacute" "Scaron" "Scedil" + "Scirc" "THORN" "Tcaron" "Tcedil" "Tstrok" "Uacute" + "Ubreve" "Ucirc" "Udblac" "Ugrave" "Umacr" "Uogon" + "Uring" "Utilde" "Uuml" "Wcirc" "Yacute" "Ycirc" + "Yuml" "Zacute" "Zcaron" "Zdot" "aacute" "abreve" + "acirc" "acute" "aelig" "agrave" "amacr" "angst" + "aogon" "aring" "ast" "atilde" "auml" + "b.mu" "bdquo" "blank" "blk12" "blk14" "blk34" + "block" "boxDL" "boxDR" "boxH" "boxHD" "boxHU" + "boxUL" "boxUR" "boxV" "boxVH" "boxVL" "boxVR" + "boxVh" "boxdl" "boxdr" "boxh" "boxhd" "boxhu" + "boxul" "boxur" "boxv" "boxvH" "boxvh" "boxvl" + "boxvr" "breve" "brvbar" "bsol" "bull" "cacute" + "caron" "ccaron" "ccedil" "ccirc" "cdot" "cedil" + "cent" "circ" "colon" "comma" "commat" "copy" + "curren" "dagger" "dash" "dblac" "dcaron" "deg" + "die" "divide" "dollar" "dot" "dstrok" "eacute" + "ecaron" "ecirc" "edot" "egrave" "emacr" "emsp" + "emsp13" "emsp14" "eng" "ensp" "eogon" "equals" + "equiv" "eth" "euml" "excl" "exist" "fnof" + "forall" "frac12" "frac14" "frac34" "frasl" "gacute" + "gbreve" "gcedil" "gcirc" "gdot" "ge" "ges" + "grave" "hairsp" "half" "hcirc" "hellip" + "horbar" "hstrok" "hyphen" "iacute" "icirc" "iexcl" + "igrave" "ijlig" "imacr" "inodot" "inodot" "iogon" + "iquest" "itilde" "iuml" "jcirc" "kcedil" "kgreen" + "lacute" "laquo" "lcaron" "lcedil" "lcub" "ldquo" + "ldquor" "le" "les" "lhblk" "lmidot" "lowbar" + "lpar" "lsaquo" "lsqb" "lsquo" "lsquor" "lstrok" + "macr" "mdash" "mgr" "micro" "middot" "minus" + "mldr" "mu" "nacute" "napos" "nbsp" "ncaron" + "ncedil" "ndash" "ne" "nequiv" "nexist" "nge" + "nges" "ngt" "nle" "nles" "nlt" "not" + "ntilde" "num" "numsp" "oacute" "ocirc" "odblac" + "oelig" "ogon" "ograve" "omacr" "ordf" "ordm" + "oslash" "otilde" "ouml" "para" "percnt" "period" + "permil" "plus" "plusmn" "pound" "puncsp" "quest" + "racute" "raquo" "rcaron" "rcedil" "rcub" + "rdquo" "rdquor" "reg" "ring" "rpar" "rsaquo" + "rsqb" "rsquo" "rsquor" "sacute" "sbquo" "sbsol" + "scaron" "scedil" "scirc" "sect" "semi" "shy" + "sol" "sup1" "sup2" "sup3" "szlig" "tcaron" + "tcedil" "thinsp" "thorn" "tilde" "times" "trade" + "tstrok" "uacute" "ubreve" "ucirc" "udblac" "ugrave" + "uhblk" "umacr" "uml" "uogon" "uring" "utilde" + "uuml" "verbar" "wcirc" "wedgeq" "yacute" "ycirc" + "yen" "yuml" "zacute" "zcaron" "zdot"))) + "A list of Unicode codepoints identifying the characters that display correctly in your Emacs with your fonts.") + +;; Insert characters by Unicode name (with completion) + +(defun unicode-character-insert (arg &optional argname) + "Insert a Unicode character by character name. If a prefix is given, the character will be inserted regardless of whether or not it has a displayable glyph; otherwise, a numeric character reference is inserted if the codepoint is not in the unicode-glyph-list. If argname is given, it is used for the prompt. If argname uniquely identifies a character, that character is inserted without the prompt." + (interactive "P") + (let* ((completion-ignore-case t) + (uniname (if (stringp argname) argname "")) + (charname + (if (eq (try-completion uniname unicode-character-alist) t) + uniname + (completing-read + "Unicode name: " + unicode-character-alist + nil t uniname))) + codepoint glyph) + (setq codepoint (cdr (assoc charname unicode-character-alist))) + (xml-unicode-insert arg codepoint))) + +;; Insert characters by iso8879 name + +(defun iso8879-character-insert (arg &optional argname) + "Insert a Unicode character by ISO 8879 entity name. If a prefix is given, the character will be inserted regardless of whether or not it has a displayable glyph; otherwise, a numeric character reference is inserted if the codepoint is not in the unicode-glyph-list. If argname is given, it is used for the prompt. If argname uniquely identifies a character, that character is inserted without the prompt." + (interactive "P") + (let* ((isoname (if (stringp argname) argname "")) + (charname + (if (eq (try-completion isoname iso8879-character-alist) t) + isoname + (completing-read + "ISO name: " + iso8879-character-alist + nil t isoname))) + codepoint glyph) + (setq codepoint (cdr (assoc charname iso8879-character-alist))) + (xml-unicode-insert arg codepoint))) + +(defun xml-unicode-insert (arg codepoint) + "Insert the Unicode character identified by codepoint taking into account available glyphs and XML predefined entities." + (interactive "P") + (let ((glyph (memq codepoint unicode-glyph-list))) + (cond + ((and (decode-char 'ucs codepoint) (or arg glyph)) + (ucs-insert codepoint)) + ((= codepoint 34) + (insert """)) + ((= codepoint 38) + (insert "&")) + ((= codepoint 39) + (insert "'")) + ((= codepoint 60) + (insert "<")) + ((= codepoint 62) + (insert ">")) + (t + (insert (format unicode-charref-format codepoint)))))) + +;; Menus + +(defvar unicode-character-menu-alist + '( + ("angst" . #x212B) + ("cent" . #x00A2) + ("copy" . #x00A9) + ("Dagger" . #x2021) + ("dagger" . #x2020) + ("deg" . #x00B0) + ("emsp" . #x2003) + ("ensp" . #x2002) + ("ETH" . #x00D0) + ("eth" . #x00F0) + ("euro" . #x20AC) + ("half" . #x00BD) + ("laquo" . #x00AB) + ("ldquo" . #x201c) + ("lsquo" . #x2018) + ("mdash" . #x2014) + ("micro" . #x00B5) + ("middot" . #x00B7) + ("nbsp" . #x00A0) + ("ndash" . #x2013) + ("not" . #x00AC) + ("numsp" . #x2007) + ("para" . #x00B6) + ("permil" . #x2030) + ("puncsp" . #x2008) + ("raquo" . #x00BB) + ("rdquo" . #x201d) + ("rsquo" . #x2019) + ("reg" . #x00AE) + ("sect" . #x00A7) + ("THORN" . #x00DE) + ("thorn" . #x00FE) + ("trade" . #x2122) + ) + "Mapping of names to codepoints for use in the popup or Emacs menu.") + +(defun unicode-character-menu-insert () + "Popup a menu for inserting unicode characters." + (interactive) + (let* ((xml-chars-menu + (list "Special char" (append (list "") unicode-character-menu-alist))) + (value (x-popup-menu t xml-chars-menu))) + (if value (xml-unicode-insert nil value)))) + +(defvar unicode-character-menu-map (make-sparse-keymap "UniChar") + "A menu map for inserting Unicode characters.") + +(defun make-unicode-character-menu-bar () + "Builds the unicode-character-menu-map for the currently defined unicode-character-menu-alist." + (let ((alist (reverse unicode-character-menu-alist)) + name codepoint) + (setq unicode-character-menu-map (make-sparse-keymap "UniChar")) + (while alist + (setq name (car (car alist)) + codepoint (cdr (car alist))) + (define-key unicode-character-menu-map (vector (intern name)) + `(,name . (lambda () (interactive) (xml-unicode-insert nil ,codepoint)))) + (setq alist (cdr alist))))) + +(make-unicode-character-menu-bar) + +;; Simple XML tests + +(defun in-start-tag () + "Crude test to see if point is inside an open start tag." + (interactive) + (let (slim here pgt plt) + (setq here (point)) + (setq slim + (if (> here xml-tag-search-limit) + (- here xml-tag-search-limit) + 0)) + (setq pgt (search-backward ">" slim t)) + (goto-char here) + (setq plt (search-backward "<" slim t)) + (goto-char here) + (if (and pgt plt) + (> plt pgt) + plt))) + +(defun after-start-tag () + "Crude test to see if point is just after a start tag" + (interactive) + (if (and (char-before) (char-equal (char-before) ?>)) + (let (slim here plt psl) + (setq here (point)) + (setq slim + (if (> here xml-tag-search-limit) + (- here xml-tag-search-limit) + 0)) + (setq plt (search-backward "<" slim t)) + (goto-char here) + (setq psl (search-backward "/" slim t)) + (goto-char here) + (or (and plt (not psl)) + (and plt psl (< psl plt)))))) + +(defun in-comment () + "Crude test to see if point is inside a comment." + (interactive) + (let (slim here pgt pcmt) + (setq here (point)) + (setq slim + (if (> here xml-tag-search-limit) + (- here xml-tag-search-limit) + 0)) + (setq pgt (search-backward "-->" slim t)) + (goto-char here) + (setq pcmt (search-backward " pcmt pgt) + pcmt))) + +;;stolen from hen.el which in turn claims to have stolen it from cxref +(defun unicode-looking-backward-at (regexp) + "Return t if text before point matches regular expression REGEXP. +This function modifies the match data that `match-beginning', +`match-end' and `match-data' access; save and restore the match +data if you want to preserve them." + (save-excursion + (let ((here (point))) + (if (re-search-backward regexp (point-min) t) + (if (re-search-forward regexp here t) + (= (point) here)))))) + +;; Smart quotes + +(defun unicode-smart-double-quote () + "Insert a left or right double quote as appropriate. Left quotes are inserted after a space, newline, or start tag. Right quotes are inserted after any other character, except if the preceding character is a quote, in which case we cycle through the three quote styles." + (interactive) + (if (char-before) + (let ((ch (char-before))) + (cond + ((in-start-tag) + (insert "\"")) + ((or + (after-start-tag) + (char-equal ch 40) ; ( + (char-equal ch 91) ; [ + (char-equal ch ?{)) ; { + (insert unicode-ldquo)) + ((or + (char-equal ch ?>) ; > + (char-equal ch 41) ; ) + (char-equal ch 93) ; ] + (char-equal ch ?})) ; } + (insert unicode-rdquo)) + ((or (char-equal ch 32) + (char-equal ch 10)) + (insert unicode-ldquo)) + ((char-equal ch unicode-ldquo) + (progn + (delete-backward-char 1) + (insert "\""))) + ((char-equal ch unicode-quot) + (progn + (delete-backward-char 1) + (insert unicode-rdquo))) + ((char-equal ch unicode-rdquo) + (progn + (delete-backward-char 1) + (insert unicode-ldquo))) + ((char-equal ch unicode-ldquo) + (progn + (delete-backward-char 1) + (insert unicode-rdquo))) + ((char-equal ch unicode-lsquo) + (insert unicode-ldquo)) + (t (insert unicode-rdquo)))) + (insert unicode-ldquo))) + +(defun unicode-smart-single-quote () + "Insert a left or right single quote, or an apostrophe, as appropriate. Left quotes are inserted after a space, newline, or start tag. An apostrophe is inserted after any other character, except if the preceding character is a quote or apostrophe, in which case we cycle through the styles." + (interactive) + (if (char-before) + (let ((ch (char-before))) + (cond + ((in-start-tag) + (insert "'")) + ((or + (after-start-tag) + (char-equal ch 40) ; ( + (char-equal ch 91) ; [ + (char-equal ch ?{)) ; { + (insert unicode-lsquo)) + ((or + (char-equal ch ?>) ; > + (char-equal ch 41) ; ) + (char-equal ch 93) ; ] + (char-equal ch ?})) ; } + (insert unicode-rsquo)) + ((or (char-equal ch 32) + (char-equal ch 10)) + (insert unicode-lsquo)) + ((char-equal ch unicode-apos) ; ' -> rsquo + (progn + (delete-backward-char 1) + (insert unicode-rsquo))) + ((char-equal ch unicode-rsquo) ; rsquo -> lsquo + (progn + (delete-backward-char 1) + (insert unicode-lsquo))) + ((char-equal ch unicode-lsquo) ; lsquo -> ' + (progn + (delete-backward-char 1) + (insert unicode-apos))) + (t (insert unicode-apos)))) + (insert unicode-lsquo))) + +(defun unicode-smart-hyphen () + "Insert a hyphen, mdash, or ndash as appropriate. A hyphen, an mdash, and then an ndash is inserted." + (interactive) + (if (char-before) + (let ((ch (char-before))) + (cond + ((in-comment) + (insert "-")) + ((char-equal ch ?-) + (progn + (delete-backward-char 1) + (insert unicode-mdash))) + ((char-equal ch unicode-mdash) + (progn + (delete-backward-char 1) + (insert unicode-ndash))) + ((char-equal ch unicode-ndash) + (progn + (delete-backward-char 1) + (insert "-"))) + (t (insert "-")))) + (insert "-"))) + +(defun unicode-smart-period () + "Insert an hellipsis for three dots." + (interactive) + (if (> (point) 2) + (let ((ch1 (char-before)) + (ch2 (char-before (- (point) 1))) + (ch3 (char-before (- (point) 2)))) + (cond + ((in-comment) + (insert ".")) + ((char-equal ch1 unicode-hellip) + (progn + (delete-backward-char 1) + (insert "...."))) + ((and ch3 (char-equal ch1 ?.) (char-equal ch2 ?.) (char-equal ch3 ?.)) + (insert ".")) + ((and (char-equal ch1 ?.) (char-equal ch2 ?.)) + (progn + (delete-backward-char 2) + (insert unicode-hellip))) + (t (insert ".")))) + (insert "."))) + +(defun unicode-smart-semicolon () + "Detect numeric character references and replace them with the appropriate char." + (interactive) + (let ((pos (point)) + amppos codept) + (search-backward "&" nil t nil) + (setq amppos (point)) + (goto-char pos) + (cond + ((unicode-looking-backward-at "&#[xX][0-9a-fA-F]+") + (progn + (re-search-backward "&#[xX]\\([0-9a-fA-F]+\\)" nil t nil) + (if (= amppos (point)) + (progn + (setq codept (string-to-number (match-string 1) 16)) + (if (memq codept unicode-glyph-list) + (replace-match (format "%c" (decode-char 'ucs codept))) + (progn + (goto-char pos) + (insert ";")))) + (progn + (goto-char pos) + (insert ";"))))) + ((unicode-looking-backward-at "&#[0-9]+") + (progn + (re-search-backward "&#\\([0-9]+\\)" nil t nil) + (if (= amppos (point)) + (progn + (setq codept (string-to-number (match-string 1) 10)) + (if (memq codept unicode-glyph-list) + (replace-match (format "%c" (decode-char 'ucs codept))) + (progn + (goto-char pos) + (insert ";")))) + (progn + (goto-char pos) + (insert ";"))))) + (t + (insert ";"))))) + +;; Setup quail for XML mode + +(require 'quail) + +(quail-define-package + "xml" "UTF-8" "&" t + "Unicode characters input method using ISO 8879 entitie names from the unicode-character-list" + nil t nil nil nil nil nil nil nil nil t) + +(defvar xml-quail-define-rules '() + "The default xml-input rules. Built dynamically from the unicode-character-list and the unicode-glyph-list.") + +(let ((ulist iso8879-character-alist) + codepoint glyph entname) + (setq xml-quail-define-rules (list 'quail-define-rules)) + (while ulist + (setq codepoint (cdr (car ulist))) + (setq glyph (memq codepoint unicode-glyph-list)) + (setq entname (concat "&" (car (car ulist)) ";")) + (cond + ((and glyph (decode-char 'ucs codepoint)) + (nconc xml-quail-define-rules + (list (list entname (decode-char 'ucs codepoint))))) + ((= codepoint 34) + (nconc xml-quail-define-rules + (list (list entname (vector """))))) + ((= codepoint 38) + (nconc xml-quail-define-rules + (list (list entname (vector "&"))))) + ((= codepoint 39) + (nconc xml-quail-define-rules + (list (list entname (vector "'"))))) + ((= codepoint 60) + (nconc xml-quail-define-rules + (list (list entname (vector "<"))))) + ((= codepoint 62) + (nconc xml-quail-define-rules + (list (list entname (vector ">"))))) + (t + (nconc xml-quail-define-rules + (list (list entname (vector (format unicode-charref-format codepoint))))))) + (setq ulist (cdr ulist)))) + +(eval xml-quail-define-rules) + +;; Read two keys + +(defvar unicode-character-shortcut-alist + (list + (cons "AE" (cdr (assoc "AElig" iso8879-character-alist))) + (cons "A'" (cdr (assoc "Aacute" iso8879-character-alist))) + (cons "A^" (cdr (assoc "Acirc" iso8879-character-alist))) + (cons "A`" (cdr (assoc "Agrave" iso8879-character-alist))) + (cons "Ao" (cdr (assoc "Aring" iso8879-character-alist))) + (cons "A~" (cdr (assoc "Atilde" iso8879-character-alist))) + (cons "A\"" (cdr (assoc "Auml" iso8879-character-alist))) + (cons "C," (cdr (assoc "Ccedil" iso8879-character-alist))) + (cons "E'" (cdr (assoc "Eacute" iso8879-character-alist))) + (cons "E^" (cdr (assoc "Ecirc" iso8879-character-alist))) + (cons "E`" (cdr (assoc "Egrave" iso8879-character-alist))) + (cons "E\"" (cdr (assoc "Euml" iso8879-character-alist))) + (cons "I'" (cdr (assoc "Iacute" iso8879-character-alist))) + (cons "I^" (cdr (assoc "Icirc" iso8879-character-alist))) + (cons "I`" (cdr (assoc "Igrave" iso8879-character-alist))) + (cons "I\"" (cdr (assoc "Iuml" iso8879-character-alist))) + (cons "N~" (cdr (assoc "Ntilde" iso8879-character-alist))) + (cons "O'" (cdr (assoc "Oacute" iso8879-character-alist))) + (cons "O^" (cdr (assoc "Ocirc" iso8879-character-alist))) + (cons "O`" (cdr (assoc "Ograve" iso8879-character-alist))) + (cons "O/" (cdr (assoc "Oslash" iso8879-character-alist))) + (cons "O~" (cdr (assoc "Otilde" iso8879-character-alist))) + (cons "O\"" (cdr (assoc "Ouml" iso8879-character-alist))) + (cons "U'" (cdr (assoc "Uacute" iso8879-character-alist))) + (cons "U^" (cdr (assoc "Ucirc" iso8879-character-alist))) + (cons "U`" (cdr (assoc "Ugrave" iso8879-character-alist))) + (cons "U\"" (cdr (assoc "Uuml" iso8879-character-alist))) + (cons "Y'" (cdr (assoc "Yacute" iso8879-character-alist))) + (cons "a'" (cdr (assoc "aacute" iso8879-character-alist))) + (cons "a^" (cdr (assoc "acirc" iso8879-character-alist))) + (cons "ae" (cdr (assoc "aelig" iso8879-character-alist))) + (cons "a`" (cdr (assoc "agrave" iso8879-character-alist))) + (cons "ao" (cdr (assoc "aring" iso8879-character-alist))) + (cons "a~" (cdr (assoc "atilde" iso8879-character-alist))) + (cons "a\"" (cdr (assoc "auml" iso8879-character-alist))) + (cons "c," (cdr (assoc "ccedil" iso8879-character-alist))) + (cons "e'" (cdr (assoc "eacute" iso8879-character-alist))) + (cons "e^" (cdr (assoc "ecirc" iso8879-character-alist))) + (cons "e`" (cdr (assoc "egrave" iso8879-character-alist))) + (cons "e\"" (cdr (assoc "euml" iso8879-character-alist))) + (cons "i'" (cdr (assoc "iacute" iso8879-character-alist))) + (cons "i^" (cdr (assoc "icirc" iso8879-character-alist))) + (cons "i`" (cdr (assoc "igrave" iso8879-character-alist))) + (cons "i\"" (cdr (assoc "iuml" iso8879-character-alist))) + (cons "n~" (cdr (assoc "ntilde" iso8879-character-alist))) + (cons "o'" (cdr (assoc "oacute" iso8879-character-alist))) + (cons "o^" (cdr (assoc "ocirc" iso8879-character-alist))) + (cons "o`" (cdr (assoc "ograve" iso8879-character-alist))) + (cons "o-" (cdr (assoc "omacr" iso8879-character-alist))) + (cons "o/" (cdr (assoc "oslash" iso8879-character-alist))) + (cons "o~" (cdr (assoc "otilde" iso8879-character-alist))) + (cons "o\"" (cdr (assoc "ouml" iso8879-character-alist))) + (cons "sz" (cdr (assoc "szlig" iso8879-character-alist))) + (cons "u'" (cdr (assoc "uacute" iso8879-character-alist))) + (cons "u^" (cdr (assoc "ucirc" iso8879-character-alist))) + (cons "u`" (cdr (assoc "ugrave" iso8879-character-alist))) + (cons "u\"" (cdr (assoc "uuml" iso8879-character-alist))) + (cons "y'" (cdr (assoc "yacute" iso8879-character-alist))) + (cons "y\"" (cdr (assoc "yuml" iso8879-character-alist))) + (cons "12" (cdr (assoc "frac12" iso8879-character-alist))) + (cons "13" (cdr (assoc "frac13" iso8879-character-alist))) + (cons "14" (cdr (assoc "frac14" iso8879-character-alist))) + (cons "15" (cdr (assoc "frac15" iso8879-character-alist))) + (cons "16" (cdr (assoc "frac16" iso8879-character-alist))) + (cons "18" (cdr (assoc "frac18" iso8879-character-alist))) + (cons "23" (cdr (assoc "frac23" iso8879-character-alist))) + (cons "25" (cdr (assoc "frac25" iso8879-character-alist))) + (cons "34" (cdr (assoc "frac34" iso8879-character-alist))) + (cons "35" (cdr (assoc "frac35" iso8879-character-alist))) + (cons "38" (cdr (assoc "frac38" iso8879-character-alist))) + (cons "45" (cdr (assoc "frac45" iso8879-character-alist))) + (cons "56" (cdr (assoc "frac56" iso8879-character-alist))) + (cons "58" (cdr (assoc "frac58" iso8879-character-alist))) + (cons "78" (cdr (assoc "frac78" iso8879-character-alist))) + (cons "<<" (cdr (assoc "laquo" iso8879-character-alist))) + (cons ".." (cdr (assoc "hellip" iso8879-character-alist))) + (cons "!i" (cdr (assoc "iexcl" iso8879-character-alist))) + (cons "?i" (cdr (assoc "iquest" iso8879-character-alist))) + (cons " " (cdr (assoc "nbsp" iso8879-character-alist))) + (cons "+-" (cdr (assoc "plusmn" iso8879-character-alist))) + (cons "--" (cdr (assoc "mdash" iso8879-character-alist))) + (cons "$c" (cdr (assoc "cent" iso8879-character-alist))) + (cons "$e" (cdr (assoc "euro" iso8879-character-alist))) + (cons "$p" (cdr (assoc "pound" iso8879-character-alist))) + (cons "$y" (cdr (assoc "yen" iso8879-character-alist)))) + "Defines a list of two-character shortcuts for keyboard entry of Unicode characters.") + +(defun unicode-character-shortcut-insert () + "Read a (two-character) keyboard shortcut and insert the corresponding character." + (interactive) + (let* ((c1 (read-char)) + (c2 (read-char)) + (str (concat (char-to-string c1) (char-to-string c2)))) + (cond + ((assoc str unicode-character-shortcut-alist) + (xml-unicode-insert nil + (cdr (assoc str unicode-character-shortcut-alist)))) + (t (beep))))) + +(defun show-unicode-character-list () + "Insert each Unicode character into a buffer. Let's you see which characters are available for literal display in your emacs font." + (let ((chars unicode-character-list) + char codept name) + (while chars + (setq char (car chars)) + (setq chars (cdr chars)) + (setq codept (car char)) + (setq name (cadr char)) + + (if (< codept #xffff) + (progn + (insert (format "#x%06x " codept)) + (ucs-insert codept) + (insert (format " %s\n" name))))))) + +;; EOF -- cgit v1.2.3