[LON-CAPA-cvs] cvs: loncom /interface entities.pm

foxr lon-capa-cvs-allow@mail.lon-capa.org
Mon, 11 Feb 2008 11:35:47 -0000


This is a MIME encoded message

--foxr1202729747
Content-Type: text/plain

foxr		Mon Feb 11 06:35:47 2008 EDT

  Added files:                 
    /loncom/interface	entities.pm 
  Log:
  Building up hash of entity -> latex translations to sanitize
  lonprintout.pm's character_table sub and to 
  1. Make it easy to add new entities.
  2. Add some entities that are not in the table.
  3. I think this version will also run faster.
  
  
  
--foxr1202729747
Content-Type: text/plain
Content-Disposition: attachment; filename="foxr-20080211063547.txt"


Index: loncom/interface/entities.pm
+++ loncom/interface/entities.pm
# The LearningOnline Network
# entity -> tex.
#
# $Id:
#
# Copyright Michigan State University Board of Trustees
#
# This file is part of the LearningOnline Network with CAPA (LON-CAPA).
#
# LON-CAPA is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# LON-CAPA is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with LON-CAPA; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
# /home/httpd/html/adm/gpl.txt
# http://www.lon-capa.org/
#
#
package Apache::entities;
use strict;
#
#   This file contains a table driven entity-->latex converter.
#
#  Assumptions:
#   The number of entities in a resource is small compared with the
#   number of possible entities that might be translated.
#   Therefore the strategy is to match a general entity pattern
#   &.+; over and over, pull out the match look it up in an entity -> tex hash
#   and do the replacement.
#
#  In order to simplify the hash, the following reductions are done:
#   &#d+; have the &# and ; stripped and is converted to an int.
#   &#.+; have the &#x and ; stripped and is converted to an int as a hex
#                             value.
#   All others have the & and ; stripped.


#  The hash:  Add new conversions here; leave off the leading & and the trailing ;
#  all numeric entities need only appear as their decimal versions
#  (e.g. no need for 1234 is sufficient, no need for 0x4d2 as well.
#
#  This entity table is mercilessly cribbed from the  HTML pocket reference
#  table starting at pg 82.  In most cases the LaTeX equivalent codes come from
#  the original massive regular expression replacements originally by 
#  A. Sakharuk in lonprintout.pm
#
#  Note numerical entities are essentially unicode character codes.
#
my %entities = {

    #  ---- ASCII code page: ----------------

    # Translation to empty strings:

    7        => "",
    9        => "",
    10       => "",
    13       => "",
    
    # Translations to simple characters:

    32       => " ",
    33       => "!",
    34       => '"',
    'quot'   => '"',
    35       => '\\\#',
    36       => '\\\$',
    37       => '\\%',
    38       => '\\&',
    'amp'    => '\\&',
    39       => '\'',		# Apostrophe
    40       => '(',
    41       => ')',
    42       => '\*',
    43       => '\+',
    44       => ',',		#  comma
    45       => '-',
    46       => '\.',
    47       => '\/',
    48       => '0',
    49       => '1',
    50       => '2',
    51       => '3',
    52       => '4',
    53       => '5',
    54       => '6',
    55       => '7',
    56       => '8',
    57       => '9',
    58       => ':',
    59       => ';',
    60       => '\\ensuremath\{<\}',
    'lt'     => '\\ensuremath\{<\}',
    61       => '\\ensuremath\{=\}',
    62       => '\\ensuremath\{>\}',
    'gt'     => '\\ensuremath\{>\}',
    63       => '\?',
    64       => '@',
    65       => 'A',
    66       => 'B',
    67       => 'C',
    68       => 'D',
    69       => 'E',
    70       => 'F',
    71       => 'G',
    72       => 'H',
    73       => 'I',
    74       => 'J',
    75       => 'K',
    76       => 'L',
    77       => 'M',
    78       => 'N',
    79       => 'O',
    80       => 'P',
    81       => 'Q',
    82       => 'R',
    83       => 'S',
    84       => 'T',
    85       => 'U',
    86       => 'V',
    87       => 'W',
    88       => 'X',
    89       => 'Y',
    90       => 'Z',
    91       => '[',
    92       => '\\ensuremath\{\\setminus\}', # \setminus is \ with special spacing.
    93       => ']',
    94       => '\\ensuremath\{\\wedge\}',
    95       => '\\underline\{\\makebox[2mm]\\{\\strut\}\}', # Underline 2mm of space for _
    96       => '`',
    97       => 'a',
    98       => 'b',
    99       => 'c',
    100      => 'd',
    101      => 'e',
    102      => 'f',
    103      => 'g',
    104      => 'h', 
    105      => 'i',
    106      => 'j',
    107      => 'k',
    108      => 'l',
    109      => 'm',
    110      => 'n',
    111      => 'o',
    112      => 'p',
    113      => 'q',
    114      => 'r',
    115      => 's',
    116      => 't',
    117      => 'u',
    118      => 'v',
    119      => 'w',
    120      => 'x',
    121      => 'y',
    122      => 'z',
    123      => '\\{',
    124      => '\|',
    125      => '\\}',
    126      => '\~',

    #   Controls and Latin-1 supplement.  Note that some entities that have
    #   visible effect are not printing unicode characters.  Specifically
    #   &#130;-&#160;

    130     => ',',
    131     => '\\textflorin ',
    132     => ',,',		# Low double left quotes.
    133     => '\\ensuremat\{\\ldots\}',
    134     => '\\ensuremath\{\\dagger\}',
    135     => '\\ensuremath\{\\ddagger\}',
    136     => '\\ensuremath\{\\wedge\}',
    137     => '\\textperthousand ',
    138     => '\\v\{S\}',
    139     => '\\ensuremath\{<\}',
    140     => '\{\\OE\}',
    
    #  There's a gap here in my entity table

    145     => '\`',
    146     => '\'',
    147     => '\`\`',
    148     => '\'\'',
    149     => '\\ensuremath\{\\bullet\}',
    150     => '--',
    151     => '---',
    152     => '\\ensuremath\{\\sim\}',
    153     => '\\texttrademark',
    154     => '\\v\{s\}',
    155     => '\\ensuremath\{>\}',
    156     => '\\oe ',
    
    # Another short gap:

    159     => '\\"Y',
    160     => '~',
    'nbsp'  => '~',
    161     => '\\textexclamdown ',
    'iexcl' => '\\textexclamdown ',
    162     => '\\textcent ',
    'cent'  => '\\textcent ',
    163     => '\\pounds ',
    'pound' => '\\pounds ',
    164     => '\\textcurrency ',
    'curren' => '\\textcurrency ',
    165     => '\\textyen ',
    'yen'   => '\\textyen ',
    166     => '\\textbrokenbar ',
    'brvbar' => '\\textbrokenbar ',
    167     => '\\textsection ',
    'sect'  => '\\textsection ',
    168     => '\\texthighdieresis ',
    'uml'   => '\\texthighdieresis ',
    169     => '\\copyright ',
    'copy'  => '\\copyright ',
    170     => '\\textordfeminine ',
    'ordf'  => '\\textordfeminine ',
    171     => '\\ensuremath\{\ll\}', # approximation of left angle quote.
    'laquo' => '\\ensuremath\{\ll\}', #   ""
    172     => '\\ensuremath\{\\neg\}',
    'not'   => '\\ensuremath\{\\neg\}',
    173     => ' - ',
    'shy'   => ' - ',
    174     => '\\textregistered ',
    'reg'   => '\\textregistered ',
    175     => '\\ensuremath\{^\{-\}\}',
    'macr'  => '\\ensuremath\{^\{-\}\}',
    176     => '\\ensuremath\{^\{\\circ\}\}',
    'deg'   => '\\ensuremath\{^\{\\circ\}\}',
    177     => '\\ensuremath\{\\pm\}',
    'plusmn' => '\\ensuremath\{\\pm\}',
    178     => '\\ensuremath\{^2\}',
    'sup2'  => '\\ensuremath\{^2\}',
    179     => '\\ensuremath\{^3\}',
    'sup3'  => '\\ensuremath\{^3\}',
    180     => '\\textacute ',
    'acute' => '\\textacute ',
    181     => '\\ensuremath\{\\mu\}',
    'micro' => '\\ensuremath\{\\mu\}',
    182     => '\\P ',
    para    => '\\P ',
    183     => '\\ensuremath\{\\cdot\}',
    'middot' => '\\ensuremath\{\\cdot\}',
    184     => '\\c\{\\strut\}',
    'cedil' => '\\c\{\\strut\}',
    185     => '\\ensuremath\{^1\}',
    sup1    => '\\ensuremath\{^1\}',
    186     => '\\textordmasculine ',
    'ordm'  => '\\textordmasculine ',
    187     => '\\ensuremath\{\\gg\}',
    'raquo' => '\\ensuremath\{\\gg\}',
    188     => '\\textonequarter ',
    'frac14' => '\\textonequarter ',
    189     => '\\textonehalf' ,
    'frac12' => '\\textonehalf' ,
    190     => '\\textthreequarters ',
    'frac34' => '\\textthreequarters ',
    191     =>  '\\textquestiondown ',
    'iquest' => '\\textquestiondown ',
    192     => '\\\`\{A\}',
    'Agrave' => '\\\`\{A\}',
    193     => '\\\'\{A\}',
    'Aacute' => '\\\'\{A\}',
    194     => '\\^\{A\}',
    'Acirc' => '\\^\{A\}',
    195     => '\\~{A}',
    'Atilde'=> '\\~{A}',
    196     => '\\\"{A}',
    'Auml'  => '\\\"{A}',
    197     => '{\\AA}',
    'Aring' => '{\\AA}',
    198     => '{\\AE}',
    'AElig' => '{\\AE}',
    199     => '\\c{c}',
    'Ccedil'=> '\\c{c}',
    '200'   => '\\\`{E}',
    'Egrave'=> '\\\`{E}',
    201     => '\\\'{E}',
    'Eacute'=> '\\\'{E}',
    202     => '\\\^{E}',
    'Ecirc' => '\\\^{E}',
    203     => '\\\"{E}',
    'Euml'  => '\\\"{E}',
    204     => '\\\`{I}',
    'Igrave'=> '\\\`{I}',
    205     => '\\\'{I}',
    'Iacute'=> '\\\'{I}',
    206     => '\\\^{I}',
    'Icirc' => '\\\^{I}',
    207     => '\\\"{I}',
    'Iuml'  => '\\\"{I}',
    208     => '\\OE',
    'ETH'   => '\\OE',
    209     => '\\~{N}',
    'Ntilde'=> '\\~{N}',
    210     => '\\\`{O}',
    'Ograve'=> '\\\`{O}',
    211     => '\\\'{O}',
    'Oacute'=> '\\\'{O}',
    212     => '\\\^{O}',
    'Ocirc' => '\\\^{O}',
    213     => '\\~{O}',
    'Otilde'=> '\\~{O}',
    214     => '\\\"{O}',
    'Ouml'  => '\\\"{O}',
    215     => '\\ensuremath\{\\times\}',
    'times' => '\\ensuremath\{\\times\}',
    216     => '\\O',
    'Oslash'=> '\\O',
    217     => '\\\`{U}',
    'Ugrave'=> '\\\`{U}',
    218     => '\\\'{U}',
    'Uacute'=> '\\\'{U}',
    219     => '\\\^{U}',
    'Ucirc' => '\\\^{U}',
    220     => '\\\"{U}',
    'Uuml'  => '\\\"{U}',
    221     => '\\\'{Y}',
    'Yacute'=> '\\\'{Y}',
    222     => '\\TH',
    'THORN' => '\\TH',
    223     => '{\\sz}',
    'szlig' => '{\\sz}',
    224     => '\\\`{a}',
    'agrave'=> '\\\`{a}',
    225     => '\\\'{a}',
    'aacute'=> '\\\'{a}',
    226     => '\\\^{a}',
    'acirc' => '\\\^{a}',
    227     => '\\\~{a}',
    'atilde'=> '\\\~{a}',
    228     => '\\\"{a}',
    'auml'  => '\\\"{a}',
    229     => '\\aa',
    'aring' => '\\aa',
    230     => '\\ae',
    'aelig' => '\\ae',
    231     => '\\c{c}',
    'ccedil'=> '\\c{c}',
    232     => '\\\`{e}',
    'egrave'=> '\\\`{e}',
    233     => '\\\'{e}',
    'eacute'=> '\\\'{e}',
    234     => '\\\^{e}',
    'ecirc' => '\\\^{e}',
    235     => '\\\"{e}',
    'euml'  => '\\\"{e}',
    236     => '\\\`{i}',
    'igrave'=> '\\\`{i}',
    237     => '\\\'{i}',
    'iacute'=> '\\\'{i}',
    238     => '\\\^{i}',
    'icirc' => '\\\^{i}',
    239     => '\\\"{i}',
    'iuml'  => '\\\"{i}',
    240     => '\\dh',
    'eth'   => '\\dh',
    241     => '\\\~{n}',
    'ntilde'=> '\\\~{n}',
    242     => '\\\`{o}',
    'ograve'=> '\\\`{o}',
    243     => '\\\'{o}',
    'oacute'=> '\\\'{o}',
    244     => '\\\^{o}',
    'ocirc' => '\\\^{o}',
    245     => '\\\~{o}',
    'otilde'=> '\\\~{o}',
    246     => '\\\"{o}',
    'ouml'  => '\\\"{o}',
    247     => '\\ensuremath\{\\div\}',
    'divide'=> '\\ensuremath\{\\div\}',
    248     => '{\\o}',
    'oslash'=> '{\\o}',
    249     => '\\\`{u}',
    'ugrave'=> '\\\`{u}',
    250     => '\\\'{u}',
    'uacute'=> '\\\'{u}',
    251     => '\\\^{u}',
    'ucirc' => '\\\^{u}',
    252     => '\\\"{u}',
    'uuml'  => '\\\"{u}',
    253     => '\\\'{y}',
    'yacute'=> '\\\'{y}',
    254     => '\\th',
    'thorn' => '\\th',
    255     => '\\\"{y}',
    'yuml'  => '\\\"{y}',

    # hbar entity number comes from the unicode charater:
    # see e.g. http://www.unicode.org/charts/PDF/U0100.pdf
    # ISO also documents a 'planck' entity.

    295     => '\\ensuremath\{\hbar\}',
    'plank' => '\\ensuremath\{\hbar\}',

    # Latin extended-A HTML 4.01 entities:

    338      => '\\OE',
    'OElig'  => '\\OE',
    339      => '\\oe',
    'oelig'  => '\\oe',
    352      => '\\v{S}',
    'Scaron' => '\\v{S}',
    353      => '\\v{s}',
    'scaron' => '\\v{s}',
    376      => '\\\"{Y}',
    'Yuml'   => '\\\"{Y}', 


    # Latin extended B HTML 4.01 entities

    402      => '\\ensuremath{f}',
    'fnof'   => '\\ensuremath{f}',

    # Spacing modifier letters:
    
    710      => '\^{}',
    'circ'   => '\^{}',
    732      => '\~{}',
    'tilde'  => '\~{}',

    # Greek uppercase:

    913      => '\\ensuremath\{\\mathrm\{A\}\}',
    'Alpha'  => '\\ensuremath\{\\mathrm\{A\}\}',
    914      => '\\ensuremath\{\\mathrm\{B\}\}',
    'Beta'   => '\\ensuremath\{\\mathrm\{B\}\}',
    915      => '\\ensuremath\{\\Gamma\}',
    'Gamma'  => '\\ensuremath\{\\Gamma\}',
    916      => '\\ensuremath\{\\Delta\}',
    'Delta'  => '\\ensuremath\{\\Delta\}',
    917      => '\\ensuremath\{\\mathrm\{E\}\}',
    'Epsilon'=> '\\ensuremath\{\\mathrm\{E\}\}',
    918      => '\\ensuremath\{\\mathrm\{Z\}\}',
    'Zeta'   => '\\ensuremath\{\\mathrm\{Z\}\}',
    919      => '\\ensuremath\{\\mathrm\{H\}\}',
    'Eta'    => '\\ensuremath\{\\mathrm\{H\}\}',
    920      => '\\ensuremath\{\\Theta\}',
    'Theta'  => '\\ensuremath\{\\Theta\}',
    921      => '\\ensuremath\{\\mathrm\{I\}\}',
    'Iota'   => '\\ensuremath\{\\mathrm\{I\}\}',
    922      => '\\ensuremath\{\\mathrm\{K\}\}',
    'Kappa'  => '\\ensuremath\{\\mathrm\{K\}\}',
    923      => '\\ensuremath\{\\Lambda\}',
    'Lambda' => '\\ensuremath\{\\Lambda\}',
    924      => '\\ensuremath\{\\mathrm\{M\}\}',
    'Mu'     => '\\ensuremath\{\\mathrm\{M\}\}',
    925      => '\\ensuremath\{\\mathrm\{N\}\}',
    'Nu'     => '\\ensuremath\{\\mathrm\{N\}\}',
    926      => '\\ensuremath\{\\mathrm\{\\Xi\}',
    'Xi'     => '\\ensuremath\{\\mathrm\{\\Xi\}',
    927      => '\\ensuremath\{\\mathrm\{O\}\}',
    'Omicron'=> '\\ensuremath\{\\mathrm\{O\}\}',
    928      => '\\ensuremath\{\\Pi\}',
    'Pi'     => '\\ensuremath\{\\Pi\}',
    929      => '\\ensuremath\{\\mathrm\{P\}\}',
    'Rho'    => '\\ensuremath\{\\mathrm\{P\}\}',
   
    # Skips 930

    931      => '\\ensuremath\{\Sigma\}',
    'Sigma'  => '\\ensuremath\{\Sigma\}',
    932      => '\\ensuremath\{\\mathrm\{T\}\}',
    'Tau'    => '\\ensuremath\{\\mathrm\{T\}\}',
    933      => '\\ensuremath\{\\Upsilon\}',
    'Upsilon'=> '\\ensuremath\{\\Upsilon\}',
    934      => '\\ensuremath\{\\Phi\}',
    'Phi'    => '\\ensuremath\{\\Phi\}',
    935      => '\\ensuremath\{\\mathrm\{X\}\}',
    'Chi'    => '\\ensuremath\{\\mathrm\{X\}\}',
    936      => '\\ensuremath\{\\Psi\}',
    'Psi'    => '\\ensuermath\{\\Psi\}',
    937      => '\\ensuremath\{\\Omega\}',
    'Omega'  => '\\ensuremath\{\\Omega\}',


    # Greek lowercase:

    945      => '\\ensuremath\{\\alpha\}',
    'alpha'  => '\\ensuremath\{\\alpha\}',
    946      => '\\ensuremath\{\\beta\}',
    'beta'   => '\\ensuremath\{\\beta\}',
    947      => '\\ensuremath\{\\gamma\}',
    'gamma'  => '\\ensuremath\{\\gamma\}',
    948      => '\\ensuremath\{\\delta\}',
    'delta'  => '\\ensuremath\{\\delta\}',
    949      => '\\ensuremath\{\\epsilon\}',
    'epsilon'=> '\\ensuremath\{\\epsilon\}',
    950      => '\\ensuremath\{\\zeta\}',
    'zeta'   => '\\ensuremath\{\\zeta\}',
    951      => '\\ensuremath\{\\eta\}',
    'eta'    => '\\ensuremath\{\\eta\}',

    
};

--foxr1202729747--