;;;-*- Mode: Lisp; Package: ENCODING -*- ;; ;; Copyright (C) Paul Meurer 2001-2004. All rights reserved. ;; paul.meurer@aksis.uib.no ;; Aksis, University of Bergen ;; ;; entities to chars ;; ;;------------------------------------------------------------------------------------- ;; TO DO: ;;------------------------------------------------------------------------------------- (in-package :encoding) (defvar *entity-to-char-table* (make-hash-table :test #'equal)) (defun entity-to-char (entity-string) (gethash entity-string *entity-to-char-table*)) (defun entities-to-chars (string) (labels ((convert (pos) (let* ((start (position #\& string :start pos)) (end (when start (position #\; string :start start))) (entity (when end (subseq string (1+ start) end))) (char (entity-to-char entity))) (cond (char (concat (subseq string pos start) (string char) (convert (1+ end)))) ((or end start) (concat (subseq string pos (1+ (or end start))) (convert (1+ (or end start))))) ((= pos 0) string) (t (subseq string pos)))))) (convert 0))) (loop for (entity char) on '( ;; iso lat 1 "aacute" #\á ;; "á" ;; "Aacute" #\Á ;; "Á" ;; "acirc" #\â ;; "â" ;; "Acirc" #\ ;; "Â" ;; "agrave" #\à ;; "à" ;; "Agrave" #\À ;; "À" ;; "aring" #\å ;; "å" ;; "Aring" #\Å ;; "Å" ;; "atilde" #\ã ;; "ã" ;; "Atilde" #\à ;; "Ã" ;; "auml" #\ä ;; "ä" ;; "Auml" #\Ä ;; "Ä" ;; "aelig" #\æ ;; "æ" ;; "AElig" #\Æ ;; "Æ" ;; "ccedil" #\ç ;; "ç" ;; "Ccedil" #\Ç ;; "Ç" ;; "eth" #\ð ;; "ð" ;; "ETH" #\Ð ;; "Ð" ;; "eacute" #\é ;; "é" ;; "Eacute" #\É ;; "É" ;; "ecirc" #\ê ;; "ê" ;; "Ecirc" #\Ê ;; "Ê" ;; "egrave" #\è ;; "è" ;; "Egrave" #\È ;; "È" ;; "euml" #\ë ;; "ë" ;; "Euml" #\Ë ;; "Ë" ;; "iacute" #\í ;; "í" ;; "Iacute" #\Í ;; "Í" ;; "icirc" #\î ;; "î" ;; "Icirc" #\Î ;; "Î" ;; "igrave" #\ì ;; "ì" ;; "Igrave" #\Ì ;; "Ì" ;; "iuml" #\ï ;; "ï" ;; "Iuml" #\Ï ;; "Ï" ;; "ntilde" #\ñ ;; "ñ" ;; "Ntilde" #\Ñ ;; "Ñ" ;; "oacute" #\ó ;; "ó" ;; "Oacute" #\Ó ;; "Ó" ;; "ocirc" #\ô ;; "ô" ;; "Ocirc" #\Ô ;; "Ô" ;; "ograve" #\ò ;; "ò" ;; "Ograve" #\Ò ;; "Ò" ;; "oslash" #\ø ;; "ø" ;; "Oslash" #\Ø ;; "Ø" ;; "otilde" #\õ ;; "õ" ;; "Otilde" #\Õ ;; "Õ" ;; "ouml" #\ö ;; "ö" ;; "Ouml" #\Ö ;; "Ö" ;; "szlig" #\ß ;; "ß" ;; "thorn" #\þ ;; "þ" ;; "THORN" #\Þ ;; "Þ" ;; "uacute" #\ú ;; "ú" ;; "Uacute" #\Ú ;; "Ú" ;; "ucirc" #\û ;; "û" ;; "Ucirc" #\Û ;; "Û" ;; "ugrave" #\ù ;; "ù" ;; "Ugrave" #\Ù ;; "Ù" ;; "uuml" #\ü ;; "ü" ;; "Uuml" #\Ü ;; "Ü" ;; "yacute" #\ý ;; "ý" ;; "Yacute" #\Ý ;; "Ý" ;; "yuml" #\ÿ ;; "ÿ" ;; ;; ;; iso lat 2 ;; abreve #\x "ă" ;; ;; Abreve #\x "Ă" ;; ;; amacr #\x "ā" ;; ;; Amacr #\x "Ā" ;; ;; aogon #\x "ą" ;; ;; Aogon #\x "Ą" ;; ;; cacute #\x "ć" ;; ;; Cacute #\x "Ć" ;; ;; ccaron #\x "č" ;; ;; Ccaron #\x "Č" ;; ;; ccirc #\x "ĉ" ;; ;; Ccirc #\x "Ĉ" ;; ;; cdot #\x "ċ" ;; ;; Cdot #\x "Ċ" ;; ;; dcaron #\x "ď" ;; ;; Dcaron #\x "Ď" ;; ;; dstrok #\x "đ" ;; ;; Dstrok #\x "Đ" ;; ;; ecaron #\x "ě" ;; ;; Ecaron #\x "Ě" ;; ;; edot #\x "ė" ;; ;; Edot #\x "Ė" ;; ;; emacr #\x "ē" ;; ;; Emacr #\x "Ē" ;; ;; eogon #\x "ę" ;; ;; Eogon #\x "Ę" ;; ;; gacute #\x "ǵ" ;; ;; gbreve #\x "ğ" ;; ;; Gbreve #\x "Ğ" ;; ;; Gcedil #\x "Ģ" ;; ;; gcirc #\x "ĝ" ;; ;; Gcirc #\x "Ĝ" ;; ;; gdot #\x "ġ" ;; ;; Gdot #\x "Ġ" ;; ;; hcirc #\x "ĥ" ;; ;; Hcirc #\x "Ĥ" ;; ;; hstrok #\x "ħ" ;; ;; Hstrok #\x "Ħ" ;; ;; Idot #\x "İ" ;; ;; Imacr #\x "Ī" ;; ;; imacr #\x "ī" ;; ;; ijlig #\x "ij" ;; ;; IJlig #\x "IJ" ;; ;; inodot #\x "ı" ;; ;; iogon #\x "į" ;; ;; Iogon #\x "Į" ;; ;; itilde #\x "ĩ" ;; ;; Itilde #\x "Ĩ" ;; ;; jcirc #\x "ĵ" ;; ;; Jcirc #\x "Ĵ" ;; ;; kcedil #\x "ķ" ;; ;; Kcedil #\x "Ķ" ;; ;; kgreen #\x "ĸ" ;; ;; lacute #\x "ĺ" ;; ;; Lacute #\x "Ĺ" ;; ;; lcaron #\x "ľ" ;; ;; Lcaron #\x "Ľ" ;; ;; lcedil #\x "ļ" ;; ;; Lcedil #\x "Ļ" ;; ;; lmidot #\x "ŀ" ;; ;; Lmidot #\x "Ŀ" ;; ;; lstrok #\x "ł" ;; ;; Lstrok #\x "Ł" ;; ;; nacute #\x "ń" ;; ;; Nacute #\x "Ń" ;; ;; eng #\x "ŋ" ;; ;; ENG #\x "Ŋ" ;; ;; napos #\x "ʼn" ;; ;; ncaron #\x "ň" ;; ;; Ncaron #\x "Ň" ;; ;; ncedil #\x "ņ" ;; ;; Ncedil #\x "Ņ" ;; ;; odblac #\x "ő" ;; ;; Odblac #\x "Ő" ;; ;; Omacr #\x "Ō" ;; ;; omacr #\x "ō" ;; ;; oelig #\x "œ" ;; ;; OElig #\x "Œ" ;; ;; racute #\x "ŕ" ;; ;; Racute #\x "Ŕ" ;; ;; rcaron #\x "ř" ;; ;; Rcaron #\x "Ř" ;; ;; rcedil #\x "ŗ" ;; ;; Rcedil #\x "Ŗ" ;; ;; sacute #\x "ś" ;; ;; Sacute #\x "Ś" ;; ;; scaron #\x "š" ;; ;; Scaron #\x "Š" ;; ;; scedil #\x "ş" ;; ;; Scedil #\x "Ş" ;; ;; scirc #\x "ŝ" ;; ;; Scirc #\x "Ŝ" ;; ;; tcaron #\x "ť" ;; ;; Tcaron #\x "Ť" ;; ;; tcedil #\x "ţ" ;; ;; Tcedil #\x "Ţ" ;; ;; tstrok #\x "ŧ" ;; ;; Tstrok #\x "Ŧ" ;; ;; ubreve #\x "ŭ" ;; ;; Ubreve #\x "Ŭ" ;; ;; udblac #\x "ű" ;; ;; Udblac #\x "Ű" ;; ;; umacr #\x "ū" ;; ;; Umacr #\x "Ū" ;; ;; uogon #\x "ų" ;; ;; Uogon #\x "Ų" ;; ;; uring #\x "ů" ;; ;; Uring #\x "Ů" ;; ;; utilde #\x "ũ" ;; ;; Utilde #\x "Ũ" ;; ;; wcirc #\x "ŵ" ;; ;; Wcirc #\x "Ŵ" ;; ;; ycirc #\x "ŷ" ;; ;; Ycirc #\x "Ŷ" ;; ;; Yuml #\x "Ÿ" ;; ;; zacute #\x "ź" ;; ;; Zacute #\x "Ź" ;; ;; zcaron #\x "ž" ;; ;; Zcaron #\x "Ž" ;; ;; zdot #\x "ż" ;; ;; Zdot #\x "Ż" ;; ;; ;; iso-num ;; half #\x "½" ;; ;; frac12 #\x "½" ;; ;; frac14 #\x "¼" ;; ;; frac34 #\x "¾" ;; ;; frac18 #\x "⅛" ;; ;; frac38 #\x "⅜" ;; ;; frac58 #\x "⅝" ;; ;; frac78 #\x "⅞" ;; ;; sup1 #\x "¹" ;; ;; sup2 #\x "²" ;; ;; sup3 #\x "³" ;; ;; plus #\x "+" ;; ;; plusmn #\x "±" ;; "lt" #\< ;; "&#60;" ;; "equals" #\= ;; "=" ;; "gt" #\> ;; ">" ;; "divide" #\/ ;; "÷" ;; ;; times #\x "×" ;; ;; curren #\x "¤" ;; ;; pound #\x "£" ;; ;; dollar #\x "$" ;; ;; cent #\x "¢" ;; ;; yen #\x "¥" ;; ;; num #\x "#" ;; ;; percnt #\x "%" ;; "amp" #\& ;; "&#38;" ;; ;; ast #\x "*" ;; ;; commat #\x "@" ;; ;; lsqb #\x "[" ;; ;; bsol #\x "\" ;; ;; rsqb #\x "]" ;; ;; lcub #\x "{" ;; ;; horbar #\x "―" ;; ;; verbar #\x "|" ;; ;; rcub #\x "}" ;; ;; micro #\x "µ" ;; ;; ohm #\x "Ω" ;; ;; deg #\x "°" ;; ;; ordm #\x "º" ;; ;; ordf #\x "ª" ;; ;; sect #\x "§" ;; "para" #\§ ;; "¶" ;; ;; middot #\x "·" ;; ;; larr #\x "←" ;; ;; rarr #\x "→" ;; ;; uarr #\x "↑" ;; ;; darr #\x "↓" ;; ;; copy #\x "©" ;; ;; reg #\x "®" ;; ;; trade #\x "™" ;; ;; brvbar #\x "¦" ;; ;; not #\x "¬" ;; ;; sung #\x "♩" ;; ;; excl #\x "!" ;; ;; iexcl #\x "¡" ;; "quot" #\" ;; """ ;; ;; apos #\x "'" ;; ;; lpar #\x "(" ;; ;; rpar #\x ")" ;; ;; comma #\x "," ;; ;; lowbar #\x "_" ;; ;; hyphen #\x "-" ;; ;; period #\x "." ;; ;; sol #\x "/" ;; ;; colon #\x ":" ;; ;; semi #\x ";" ;; ;; quest #\x "?" ;; ;; iquest #\x "¿" ;; ;; laquo #\x "«" ;; ;; raquo #\x "»" ;; ;; lsquo #\x "'" #\x "‘" ;; ;; rsquo #\x "'" #\x "’" ;; ;; ldquo #\x """ #\x "“" ;; ;; rdquo #\x """ #\x "”" ;; "nbsp" #-allegro #.(code-char #xa0) #+allegro #\no-break_space ;; " " ;; ;; shy #\x "­" ;; ;; ;; iso-pub ;; emsp #\x " " ;; ;; ensp #\x " " ;; ;; emsp13 #\x " " ;; ;; emsp14 #\x " " ;; ;; numsp #\x " " ;; ;; puncsp #\x " " ;; ;; thinsp #\x " " ;; ;; hairsp #\x " " ;; "mdash" #\- ;; prelim *** "--" #\x "—" ;; "ndash" #\- ;; "--" #\x "–" ;; ;; dash #\x "-#45;" #\x "‐" ;; ;; blank #\x "␣" ;; ;; hellip #\x "…" ;; ;; nldr #\x "‥" ;; ;; frac13 #\x "⅓" ;; ;; frac23 #\x "⅔" ;; ;; frac15 #\x "⅕" ;; ;; frac25 #\x "⅖" ;; ;; frac35 #\x "⅗" ;; ;; frac45 #\x "⅘" ;; ;; frac16 #\x "⅙" ;; ;; frac56 #\x "⅚" ;; ;; incare #\x "℅" ;; ;; block #\x "█" ;; ;; uhblk #\x "▀" ;; ;; lhblk #\x "▄" ;; ;; blk14 #\x "░" ;; ;; blk12 #\x "▒" ;; ;; blk34 #\x "▓" ;; ;; marker #\x "▮" ;; ;; cir #\x "○" ;; ;; squ #\x "□" ;; ;; rect #\x "▭" ;; ;; utri #\x "▵" ;; ;; dtri #\x "▿" ;; ;; star #\x "⋆" ;; ;; bull #\x "•" ;; ;; squf #\x "▪" ;; ;; utrif #\x "▴" ;; ;; dtrif #\x "▾" ;; ;; ltrif #\x "◂" ;; ;; rtrif #\x "▸" ;; ;; clubs #\x "♣" ;; ;; diams #\x "♦" ;; ;; hearts #\x "♥" ;; ;; spades #\x "♠" ;; ;; malt #\x "✠" ;; ;; dagger #\x "†" ;; ;; Dagger #\x "‡" ;; ;; check #\x "✓" ;; ;; cross #\x "✗" ;; ;; sharp #\x "♯" ;; ;; flat #\x "♭" ;; ;; male #\x "♂" ;; ;; female #\x "♀" ;; ;; phone #\x "☎" ;; ;; telrec #\x "⌕" ;; ;; copysr #\x "℗" ;; ;; caret #\x "⁁" ;; ;; lsquor #\x "‚" ;; ;; ldquor #\x "„" ;; ;; fflig #\x "ff" ;; ;; filig #\x "fi" ;; ;; ;; ;; ffilig #\x "ffi" ;; ;; ffllig #\x "ffl" ;; ;; fllig #\x "fl" ;; ;; mldr #\x "…" ;; ;; rdquor #\x "“" ;; ;; rsquor #\x "‘" ;; ;; vellip #\x "⋮" ;; ;; hybull #\x "⁃" ;; ;; loz #\x "◊" ;; ;; lozf #\x "✦" ;; ;; ltri #\x "◃" ;; ;; rtri #\x "▹" ;; ;; starf #\x "★" ;; ;; natur #\x "♮" ;; ;; rx #\x "℞" ;; ;; sext #\x "✶" ;; ;; target #\x "⌖" ;; ;; dlcrop #\x "⌍" ;; ;; drcrop #\x "⌌" ;; ;; ulcrop #\x "⌏" ;; ;; urcrop #\x "⌎" ;; ) by #'cddr do (setf (gethash entity *entity-to-char-table*) char))