[LON-CAPA-cvs] cvs: loncom /interface entities.pm
foxr
lon-capa-cvs-allow@mail.lon-capa.org
Mon, 11 Feb 2008 11:35:47 -0000
This is a MIME encoded message
--foxr1202729747
Content-Type: text/plain
foxr Mon Feb 11 06:35:47 2008 EDT
Added files:
/loncom/interface entities.pm
Log:
Building up hash of entity -> latex translations to sanitize
lonprintout.pm's character_table sub and to
1. Make it easy to add new entities.
2. Add some entities that are not in the table.
3. I think this version will also run faster.
--foxr1202729747
Content-Type: text/plain
Content-Disposition: attachment; filename="foxr-20080211063547.txt"
Index: loncom/interface/entities.pm
+++ loncom/interface/entities.pm
# The LearningOnline Network
# entity -> tex.
#
# $Id:
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
#
# /home/httpd/html/adm/gpl.txt
# http://www.lon-capa.org/
#
#
package Apache::entities;
use strict;
#
# This file contains a table driven entity-->latex converter.
#
# Assumptions:
# The number of entities in a resource is small compared with the
# number of possible entities that might be translated.
# Therefore the strategy is to match a general entity pattern
# &.+; over and over, pull out the match look it up in an entity -> tex hash
# and do the replacement.
#
# In order to simplify the hash, the following reductions are done:
# &#d+; have the &# and ; stripped and is converted to an int.
# &#.+; have the &#x and ; stripped and is converted to an int as a hex
# value.
# All others have the & and ; stripped.
# The hash: Add new conversions here; leave off the leading & and the trailing ;
# all numeric entities need only appear as their decimal versions
# (e.g. no need for 1234 is sufficient, no need for 0x4d2 as well.
#
# This entity table is mercilessly cribbed from the HTML pocket reference
# table starting at pg 82. In most cases the LaTeX equivalent codes come from
# the original massive regular expression replacements originally by
# A. Sakharuk in lonprintout.pm
#
# Note numerical entities are essentially unicode character codes.
#
my %entities = {
# ---- ASCII code page: ----------------
# Translation to empty strings:
7 => "",
9 => "",
10 => "",
13 => "",
# Translations to simple characters:
32 => " ",
33 => "!",
34 => '"',
'quot' => '"',
35 => '\\\#',
36 => '\\\$',
37 => '\\%',
38 => '\\&',
'amp' => '\\&',
39 => '\'', # Apostrophe
40 => '(',
41 => ')',
42 => '\*',
43 => '\+',
44 => ',', # comma
45 => '-',
46 => '\.',
47 => '\/',
48 => '0',
49 => '1',
50 => '2',
51 => '3',
52 => '4',
53 => '5',
54 => '6',
55 => '7',
56 => '8',
57 => '9',
58 => ':',
59 => ';',
60 => '\\ensuremath\{<\}',
'lt' => '\\ensuremath\{<\}',
61 => '\\ensuremath\{=\}',
62 => '\\ensuremath\{>\}',
'gt' => '\\ensuremath\{>\}',
63 => '\?',
64 => '@',
65 => 'A',
66 => 'B',
67 => 'C',
68 => 'D',
69 => 'E',
70 => 'F',
71 => 'G',
72 => 'H',
73 => 'I',
74 => 'J',
75 => 'K',
76 => 'L',
77 => 'M',
78 => 'N',
79 => 'O',
80 => 'P',
81 => 'Q',
82 => 'R',
83 => 'S',
84 => 'T',
85 => 'U',
86 => 'V',
87 => 'W',
88 => 'X',
89 => 'Y',
90 => 'Z',
91 => '[',
92 => '\\ensuremath\{\\setminus\}', # \setminus is \ with special spacing.
93 => ']',
94 => '\\ensuremath\{\\wedge\}',
95 => '\\underline\{\\makebox[2mm]\\{\\strut\}\}', # Underline 2mm of space for _
96 => '`',
97 => 'a',
98 => 'b',
99 => 'c',
100 => 'd',
101 => 'e',
102 => 'f',
103 => 'g',
104 => 'h',
105 => 'i',
106 => 'j',
107 => 'k',
108 => 'l',
109 => 'm',
110 => 'n',
111 => 'o',
112 => 'p',
113 => 'q',
114 => 'r',
115 => 's',
116 => 't',
117 => 'u',
118 => 'v',
119 => 'w',
120 => 'x',
121 => 'y',
122 => 'z',
123 => '\\{',
124 => '\|',
125 => '\\}',
126 => '\~',
# Controls and Latin-1 supplement. Note that some entities that have
# visible effect are not printing unicode characters. Specifically
# ‚- 
130 => ',',
131 => '\\textflorin ',
132 => ',,', # Low double left quotes.
133 => '\\ensuremat\{\\ldots\}',
134 => '\\ensuremath\{\\dagger\}',
135 => '\\ensuremath\{\\ddagger\}',
136 => '\\ensuremath\{\\wedge\}',
137 => '\\textperthousand ',
138 => '\\v\{S\}',
139 => '\\ensuremath\{<\}',
140 => '\{\\OE\}',
# There's a gap here in my entity table
145 => '\`',
146 => '\'',
147 => '\`\`',
148 => '\'\'',
149 => '\\ensuremath\{\\bullet\}',
150 => '--',
151 => '---',
152 => '\\ensuremath\{\\sim\}',
153 => '\\texttrademark',
154 => '\\v\{s\}',
155 => '\\ensuremath\{>\}',
156 => '\\oe ',
# Another short gap:
159 => '\\"Y',
160 => '~',
'nbsp' => '~',
161 => '\\textexclamdown ',
'iexcl' => '\\textexclamdown ',
162 => '\\textcent ',
'cent' => '\\textcent ',
163 => '\\pounds ',
'pound' => '\\pounds ',
164 => '\\textcurrency ',
'curren' => '\\textcurrency ',
165 => '\\textyen ',
'yen' => '\\textyen ',
166 => '\\textbrokenbar ',
'brvbar' => '\\textbrokenbar ',
167 => '\\textsection ',
'sect' => '\\textsection ',
168 => '\\texthighdieresis ',
'uml' => '\\texthighdieresis ',
169 => '\\copyright ',
'copy' => '\\copyright ',
170 => '\\textordfeminine ',
'ordf' => '\\textordfeminine ',
171 => '\\ensuremath\{\ll\}', # approximation of left angle quote.
'laquo' => '\\ensuremath\{\ll\}', # ""
172 => '\\ensuremath\{\\neg\}',
'not' => '\\ensuremath\{\\neg\}',
173 => ' - ',
'shy' => ' - ',
174 => '\\textregistered ',
'reg' => '\\textregistered ',
175 => '\\ensuremath\{^\{-\}\}',
'macr' => '\\ensuremath\{^\{-\}\}',
176 => '\\ensuremath\{^\{\\circ\}\}',
'deg' => '\\ensuremath\{^\{\\circ\}\}',
177 => '\\ensuremath\{\\pm\}',
'plusmn' => '\\ensuremath\{\\pm\}',
178 => '\\ensuremath\{^2\}',
'sup2' => '\\ensuremath\{^2\}',
179 => '\\ensuremath\{^3\}',
'sup3' => '\\ensuremath\{^3\}',
180 => '\\textacute ',
'acute' => '\\textacute ',
181 => '\\ensuremath\{\\mu\}',
'micro' => '\\ensuremath\{\\mu\}',
182 => '\\P ',
para => '\\P ',
183 => '\\ensuremath\{\\cdot\}',
'middot' => '\\ensuremath\{\\cdot\}',
184 => '\\c\{\\strut\}',
'cedil' => '\\c\{\\strut\}',
185 => '\\ensuremath\{^1\}',
sup1 => '\\ensuremath\{^1\}',
186 => '\\textordmasculine ',
'ordm' => '\\textordmasculine ',
187 => '\\ensuremath\{\\gg\}',
'raquo' => '\\ensuremath\{\\gg\}',
188 => '\\textonequarter ',
'frac14' => '\\textonequarter ',
189 => '\\textonehalf' ,
'frac12' => '\\textonehalf' ,
190 => '\\textthreequarters ',
'frac34' => '\\textthreequarters ',
191 => '\\textquestiondown ',
'iquest' => '\\textquestiondown ',
192 => '\\\`\{A\}',
'Agrave' => '\\\`\{A\}',
193 => '\\\'\{A\}',
'Aacute' => '\\\'\{A\}',
194 => '\\^\{A\}',
'Acirc' => '\\^\{A\}',
195 => '\\~{A}',
'Atilde'=> '\\~{A}',
196 => '\\\"{A}',
'Auml' => '\\\"{A}',
197 => '{\\AA}',
'Aring' => '{\\AA}',
198 => '{\\AE}',
'AElig' => '{\\AE}',
199 => '\\c{c}',
'Ccedil'=> '\\c{c}',
'200' => '\\\`{E}',
'Egrave'=> '\\\`{E}',
201 => '\\\'{E}',
'Eacute'=> '\\\'{E}',
202 => '\\\^{E}',
'Ecirc' => '\\\^{E}',
203 => '\\\"{E}',
'Euml' => '\\\"{E}',
204 => '\\\`{I}',
'Igrave'=> '\\\`{I}',
205 => '\\\'{I}',
'Iacute'=> '\\\'{I}',
206 => '\\\^{I}',
'Icirc' => '\\\^{I}',
207 => '\\\"{I}',
'Iuml' => '\\\"{I}',
208 => '\\OE',
'ETH' => '\\OE',
209 => '\\~{N}',
'Ntilde'=> '\\~{N}',
210 => '\\\`{O}',
'Ograve'=> '\\\`{O}',
211 => '\\\'{O}',
'Oacute'=> '\\\'{O}',
212 => '\\\^{O}',
'Ocirc' => '\\\^{O}',
213 => '\\~{O}',
'Otilde'=> '\\~{O}',
214 => '\\\"{O}',
'Ouml' => '\\\"{O}',
215 => '\\ensuremath\{\\times\}',
'times' => '\\ensuremath\{\\times\}',
216 => '\\O',
'Oslash'=> '\\O',
217 => '\\\`{U}',
'Ugrave'=> '\\\`{U}',
218 => '\\\'{U}',
'Uacute'=> '\\\'{U}',
219 => '\\\^{U}',
'Ucirc' => '\\\^{U}',
220 => '\\\"{U}',
'Uuml' => '\\\"{U}',
221 => '\\\'{Y}',
'Yacute'=> '\\\'{Y}',
222 => '\\TH',
'THORN' => '\\TH',
223 => '{\\sz}',
'szlig' => '{\\sz}',
224 => '\\\`{a}',
'agrave'=> '\\\`{a}',
225 => '\\\'{a}',
'aacute'=> '\\\'{a}',
226 => '\\\^{a}',
'acirc' => '\\\^{a}',
227 => '\\\~{a}',
'atilde'=> '\\\~{a}',
228 => '\\\"{a}',
'auml' => '\\\"{a}',
229 => '\\aa',
'aring' => '\\aa',
230 => '\\ae',
'aelig' => '\\ae',
231 => '\\c{c}',
'ccedil'=> '\\c{c}',
232 => '\\\`{e}',
'egrave'=> '\\\`{e}',
233 => '\\\'{e}',
'eacute'=> '\\\'{e}',
234 => '\\\^{e}',
'ecirc' => '\\\^{e}',
235 => '\\\"{e}',
'euml' => '\\\"{e}',
236 => '\\\`{i}',
'igrave'=> '\\\`{i}',
237 => '\\\'{i}',
'iacute'=> '\\\'{i}',
238 => '\\\^{i}',
'icirc' => '\\\^{i}',
239 => '\\\"{i}',
'iuml' => '\\\"{i}',
240 => '\\dh',
'eth' => '\\dh',
241 => '\\\~{n}',
'ntilde'=> '\\\~{n}',
242 => '\\\`{o}',
'ograve'=> '\\\`{o}',
243 => '\\\'{o}',
'oacute'=> '\\\'{o}',
244 => '\\\^{o}',
'ocirc' => '\\\^{o}',
245 => '\\\~{o}',
'otilde'=> '\\\~{o}',
246 => '\\\"{o}',
'ouml' => '\\\"{o}',
247 => '\\ensuremath\{\\div\}',
'divide'=> '\\ensuremath\{\\div\}',
248 => '{\\o}',
'oslash'=> '{\\o}',
249 => '\\\`{u}',
'ugrave'=> '\\\`{u}',
250 => '\\\'{u}',
'uacute'=> '\\\'{u}',
251 => '\\\^{u}',
'ucirc' => '\\\^{u}',
252 => '\\\"{u}',
'uuml' => '\\\"{u}',
253 => '\\\'{y}',
'yacute'=> '\\\'{y}',
254 => '\\th',
'thorn' => '\\th',
255 => '\\\"{y}',
'yuml' => '\\\"{y}',
# hbar entity number comes from the unicode charater:
# see e.g. http://www.unicode.org/charts/PDF/U0100.pdf
# ISO also documents a 'planck' entity.
295 => '\\ensuremath\{\hbar\}',
'plank' => '\\ensuremath\{\hbar\}',
# Latin extended-A HTML 4.01 entities:
338 => '\\OE',
'OElig' => '\\OE',
339 => '\\oe',
'oelig' => '\\oe',
352 => '\\v{S}',
'Scaron' => '\\v{S}',
353 => '\\v{s}',
'scaron' => '\\v{s}',
376 => '\\\"{Y}',
'Yuml' => '\\\"{Y}',
# Latin extended B HTML 4.01 entities
402 => '\\ensuremath{f}',
'fnof' => '\\ensuremath{f}',
# Spacing modifier letters:
710 => '\^{}',
'circ' => '\^{}',
732 => '\~{}',
'tilde' => '\~{}',
# Greek uppercase:
913 => '\\ensuremath\{\\mathrm\{A\}\}',
'Alpha' => '\\ensuremath\{\\mathrm\{A\}\}',
914 => '\\ensuremath\{\\mathrm\{B\}\}',
'Beta' => '\\ensuremath\{\\mathrm\{B\}\}',
915 => '\\ensuremath\{\\Gamma\}',
'Gamma' => '\\ensuremath\{\\Gamma\}',
916 => '\\ensuremath\{\\Delta\}',
'Delta' => '\\ensuremath\{\\Delta\}',
917 => '\\ensuremath\{\\mathrm\{E\}\}',
'Epsilon'=> '\\ensuremath\{\\mathrm\{E\}\}',
918 => '\\ensuremath\{\\mathrm\{Z\}\}',
'Zeta' => '\\ensuremath\{\\mathrm\{Z\}\}',
919 => '\\ensuremath\{\\mathrm\{H\}\}',
'Eta' => '\\ensuremath\{\\mathrm\{H\}\}',
920 => '\\ensuremath\{\\Theta\}',
'Theta' => '\\ensuremath\{\\Theta\}',
921 => '\\ensuremath\{\\mathrm\{I\}\}',
'Iota' => '\\ensuremath\{\\mathrm\{I\}\}',
922 => '\\ensuremath\{\\mathrm\{K\}\}',
'Kappa' => '\\ensuremath\{\\mathrm\{K\}\}',
923 => '\\ensuremath\{\\Lambda\}',
'Lambda' => '\\ensuremath\{\\Lambda\}',
924 => '\\ensuremath\{\\mathrm\{M\}\}',
'Mu' => '\\ensuremath\{\\mathrm\{M\}\}',
925 => '\\ensuremath\{\\mathrm\{N\}\}',
'Nu' => '\\ensuremath\{\\mathrm\{N\}\}',
926 => '\\ensuremath\{\\mathrm\{\\Xi\}',
'Xi' => '\\ensuremath\{\\mathrm\{\\Xi\}',
927 => '\\ensuremath\{\\mathrm\{O\}\}',
'Omicron'=> '\\ensuremath\{\\mathrm\{O\}\}',
928 => '\\ensuremath\{\\Pi\}',
'Pi' => '\\ensuremath\{\\Pi\}',
929 => '\\ensuremath\{\\mathrm\{P\}\}',
'Rho' => '\\ensuremath\{\\mathrm\{P\}\}',
# Skips 930
931 => '\\ensuremath\{\Sigma\}',
'Sigma' => '\\ensuremath\{\Sigma\}',
932 => '\\ensuremath\{\\mathrm\{T\}\}',
'Tau' => '\\ensuremath\{\\mathrm\{T\}\}',
933 => '\\ensuremath\{\\Upsilon\}',
'Upsilon'=> '\\ensuremath\{\\Upsilon\}',
934 => '\\ensuremath\{\\Phi\}',
'Phi' => '\\ensuremath\{\\Phi\}',
935 => '\\ensuremath\{\\mathrm\{X\}\}',
'Chi' => '\\ensuremath\{\\mathrm\{X\}\}',
936 => '\\ensuremath\{\\Psi\}',
'Psi' => '\\ensuermath\{\\Psi\}',
937 => '\\ensuremath\{\\Omega\}',
'Omega' => '\\ensuremath\{\\Omega\}',
# Greek lowercase:
945 => '\\ensuremath\{\\alpha\}',
'alpha' => '\\ensuremath\{\\alpha\}',
946 => '\\ensuremath\{\\beta\}',
'beta' => '\\ensuremath\{\\beta\}',
947 => '\\ensuremath\{\\gamma\}',
'gamma' => '\\ensuremath\{\\gamma\}',
948 => '\\ensuremath\{\\delta\}',
'delta' => '\\ensuremath\{\\delta\}',
949 => '\\ensuremath\{\\epsilon\}',
'epsilon'=> '\\ensuremath\{\\epsilon\}',
950 => '\\ensuremath\{\\zeta\}',
'zeta' => '\\ensuremath\{\\zeta\}',
951 => '\\ensuremath\{\\eta\}',
'eta' => '\\ensuremath\{\\eta\}',
};
--foxr1202729747--