commit
eed1274fca
8 changed files with 563 additions and 1 deletions
6
lib/fonts/Makefile
Normal file
6
lib/fonts/Makefile
Normal file
|
@ -0,0 +1,6 @@
|
|||
symbol.hs: symbol.txt
|
||||
runghc parseUnicodeMapping.hs symbol.txt
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
-rm symbol.hs
|
40
lib/fonts/parseUnicodeMapping.hs
Normal file
40
lib/fonts/parseUnicodeMapping.hs
Normal file
|
@ -0,0 +1,40 @@
|
|||
import System.FilePath
|
||||
import Text.Parsec
|
||||
import Data.Char
|
||||
import System.Environment
|
||||
import Control.Applicative hiding (many)
|
||||
import Data.List
|
||||
|
||||
main :: IO ()
|
||||
main = (head <$> getArgs) >>= parseUnicodeMapping
|
||||
|
||||
|
||||
parseUnicodeMapping :: FilePath -> IO ()
|
||||
parseUnicodeMapping fname = do
|
||||
fin <- readFile fname
|
||||
let mapname = dropExtension . takeFileName $ fname
|
||||
let res = runParse fin
|
||||
let header = "-- Generated from " ++ fname ++ "\n" ++
|
||||
mapname ++ " :: [(Char, Char)]\n" ++ mapname ++" =\n [ "
|
||||
let footer = "]"
|
||||
writeFile (replaceExtension fname ".hs")
|
||||
(header ++ (concat $ intersperse "\n , " (map show res)) ++ footer)
|
||||
|
||||
type Unicode = Char
|
||||
|
||||
runParse :: String -> [(Char, Unicode)]
|
||||
runParse s= either (error . show) id (parse parseMap "" s)
|
||||
|
||||
anyline = manyTill anyChar newline
|
||||
|
||||
getHexChar :: Parsec String () Char
|
||||
getHexChar = do
|
||||
[(c,_)] <- readLitChar . ("\\x" ++) <$> many1 hexDigit
|
||||
return c
|
||||
|
||||
parseMap :: Parsec String () [(Char, Unicode)]
|
||||
parseMap = do
|
||||
skipMany (char '#' >> anyline)
|
||||
many (flip (,) <$> getHexChar <* tab <*> getHexChar <* anyline)
|
||||
|
||||
|
256
lib/fonts/symbol.txt
Normal file
256
lib/fonts/symbol.txt
Normal file
|
@ -0,0 +1,256 @@
|
|||
#
|
||||
# Name: Adobe Symbol Encoding to Unicode
|
||||
# Unicode version: 2.0
|
||||
# Table version: 1.0
|
||||
# Date: 2011 July 12
|
||||
#
|
||||
# Copyright (c) 1991-2011 Unicode, Inc. All Rights reserved.
|
||||
#
|
||||
# This file is provided as-is by Unicode, Inc. (The Unicode Consortium). No
|
||||
# claims are made as to fitness for any particular purpose. No warranties of
|
||||
# any kind are expressed or implied. The recipient agrees to determine
|
||||
# applicability of information provided. If this file has been provided on
|
||||
# magnetic media by Unicode, Inc., the sole remedy for any claim will be
|
||||
# exchange of defective media within 90 days of receipt.
|
||||
#
|
||||
# Unicode, Inc. hereby grants the right to freely use the information
|
||||
# supplied in this file in the creation of products supporting the
|
||||
# Unicode Standard, and to make copies of this file in any form for
|
||||
# internal or external distribution as long as this notice remains
|
||||
# attached.
|
||||
#
|
||||
# Format: 4 tab-delimited fields:
|
||||
#
|
||||
# (1) The Unicode value (in hexadecimal)
|
||||
# (2) The Symbol Encoding code point (in hexadecimal)
|
||||
# (3) # Unicode name
|
||||
# (4) # PostScript character name
|
||||
#
|
||||
# General Notes:
|
||||
#
|
||||
# The Unicode values in this table were produced as the result of applying
|
||||
# the algorithm described in the section "Populating a Unicode space" in the
|
||||
# document "Unicode and Glyph Names," at
|
||||
# http://partners.adobe.com/asn/developer/typeforum/unicodegn.html
|
||||
# to the characters in Symbol. Note that some characters, such as "space",
|
||||
# are mapped to 2 Unicode values. 29 characters have assignments in the
|
||||
# Corporate Use Subarea; these are indicated by "(CUS)" in field 4. Refer to
|
||||
# the above document for more details.
|
||||
#
|
||||
# 2011 July 12: The above link is no longer valid. For comparable,
|
||||
# more current information, see the document, "Glyph", at:
|
||||
# <http://www.adobe.com/devnet/opentype/archives/glyph.html>
|
||||
#
|
||||
# Revision History:
|
||||
#
|
||||
# [v1.0, 2011 July 12]
|
||||
# Updated terms of use to current wording.
|
||||
# Updated contact information and document link.
|
||||
# No changes to the mapping data.
|
||||
#
|
||||
# [v0.2, 30 March 1999]
|
||||
# Different algorithm to produce Unicode values (see notes above) results in
|
||||
# some character codes being mapped to 2 Unicode values; use of Corporate
|
||||
# Use subarea values; addition of the euro character; changed assignments of
|
||||
# some characters such as the COPYRIGHT SIGNs and RADICAL EXTENDER. Updated
|
||||
# Unicode names to Unicode 2.0 names.
|
||||
#
|
||||
# [v0.1, 5 May 1995] First release.
|
||||
#
|
||||
# Use the Unicode reporting form <http://www.unicode.org/reporting.html>
|
||||
# for any questions or comments or to report errors in the data.
|
||||
#
|
||||
0020 20 # SPACE # space
|
||||
00A0 20 # NO-BREAK SPACE # space
|
||||
0021 21 # EXCLAMATION MARK # exclam
|
||||
2200 22 # FOR ALL # universal
|
||||
0023 23 # NUMBER SIGN # numbersign
|
||||
2203 24 # THERE EXISTS # existential
|
||||
0025 25 # PERCENT SIGN # percent
|
||||
0026 26 # AMPERSAND # ampersand
|
||||
220B 27 # CONTAINS AS MEMBER # suchthat
|
||||
0028 28 # LEFT PARENTHESIS # parenleft
|
||||
0029 29 # RIGHT PARENTHESIS # parenright
|
||||
2217 2A # ASTERISK OPERATOR # asteriskmath
|
||||
002B 2B # PLUS SIGN # plus
|
||||
002C 2C # COMMA # comma
|
||||
2212 2D # MINUS SIGN # minus
|
||||
002E 2E # FULL STOP # period
|
||||
002F 2F # SOLIDUS # slash
|
||||
0030 30 # DIGIT ZERO # zero
|
||||
0031 31 # DIGIT ONE # one
|
||||
0032 32 # DIGIT TWO # two
|
||||
0033 33 # DIGIT THREE # three
|
||||
0034 34 # DIGIT FOUR # four
|
||||
0035 35 # DIGIT FIVE # five
|
||||
0036 36 # DIGIT SIX # six
|
||||
0037 37 # DIGIT SEVEN # seven
|
||||
0038 38 # DIGIT EIGHT # eight
|
||||
0039 39 # DIGIT NINE # nine
|
||||
003A 3A # COLON # colon
|
||||
003B 3B # SEMICOLON # semicolon
|
||||
003C 3C # LESS-THAN SIGN # less
|
||||
003D 3D # EQUALS SIGN # equal
|
||||
003E 3E # GREATER-THAN SIGN # greater
|
||||
003F 3F # QUESTION MARK # question
|
||||
2245 40 # APPROXIMATELY EQUAL TO # congruent
|
||||
0391 41 # GREEK CAPITAL LETTER ALPHA # Alpha
|
||||
0392 42 # GREEK CAPITAL LETTER BETA # Beta
|
||||
03A7 43 # GREEK CAPITAL LETTER CHI # Chi
|
||||
0394 44 # GREEK CAPITAL LETTER DELTA # Delta
|
||||
2206 44 # INCREMENT # Delta
|
||||
0395 45 # GREEK CAPITAL LETTER EPSILON # Epsilon
|
||||
03A6 46 # GREEK CAPITAL LETTER PHI # Phi
|
||||
0393 47 # GREEK CAPITAL LETTER GAMMA # Gamma
|
||||
0397 48 # GREEK CAPITAL LETTER ETA # Eta
|
||||
0399 49 # GREEK CAPITAL LETTER IOTA # Iota
|
||||
03D1 4A # GREEK THETA SYMBOL # theta1
|
||||
039A 4B # GREEK CAPITAL LETTER KAPPA # Kappa
|
||||
039B 4C # GREEK CAPITAL LETTER LAMDA # Lambda
|
||||
039C 4D # GREEK CAPITAL LETTER MU # Mu
|
||||
039D 4E # GREEK CAPITAL LETTER NU # Nu
|
||||
039F 4F # GREEK CAPITAL LETTER OMICRON # Omicron
|
||||
03A0 50 # GREEK CAPITAL LETTER PI # Pi
|
||||
0398 51 # GREEK CAPITAL LETTER THETA # Theta
|
||||
03A1 52 # GREEK CAPITAL LETTER RHO # Rho
|
||||
03A3 53 # GREEK CAPITAL LETTER SIGMA # Sigma
|
||||
03A4 54 # GREEK CAPITAL LETTER TAU # Tau
|
||||
03A5 55 # GREEK CAPITAL LETTER UPSILON # Upsilon
|
||||
03C2 56 # GREEK SMALL LETTER FINAL SIGMA # sigma1
|
||||
03A9 57 # GREEK CAPITAL LETTER OMEGA # Omega
|
||||
2126 57 # OHM SIGN # Omega
|
||||
039E 58 # GREEK CAPITAL LETTER XI # Xi
|
||||
03A8 59 # GREEK CAPITAL LETTER PSI # Psi
|
||||
0396 5A # GREEK CAPITAL LETTER ZETA # Zeta
|
||||
005B 5B # LEFT SQUARE BRACKET # bracketleft
|
||||
2234 5C # THEREFORE # therefore
|
||||
005D 5D # RIGHT SQUARE BRACKET # bracketright
|
||||
22A5 5E # UP TACK # perpendicular
|
||||
005F 5F # LOW LINE # underscore
|
||||
F8E5 60 # RADICAL EXTENDER # radicalex (CUS)
|
||||
03B1 61 # GREEK SMALL LETTER ALPHA # alpha
|
||||
03B2 62 # GREEK SMALL LETTER BETA # beta
|
||||
03C7 63 # GREEK SMALL LETTER CHI # chi
|
||||
03B4 64 # GREEK SMALL LETTER DELTA # delta
|
||||
03B5 65 # GREEK SMALL LETTER EPSILON # epsilon
|
||||
03C6 66 # GREEK SMALL LETTER PHI # phi
|
||||
03B3 67 # GREEK SMALL LETTER GAMMA # gamma
|
||||
03B7 68 # GREEK SMALL LETTER ETA # eta
|
||||
03B9 69 # GREEK SMALL LETTER IOTA # iota
|
||||
03D5 6A # GREEK PHI SYMBOL # phi1
|
||||
03BA 6B # GREEK SMALL LETTER KAPPA # kappa
|
||||
03BB 6C # GREEK SMALL LETTER LAMDA # lambda
|
||||
00B5 6D # MICRO SIGN # mu
|
||||
03BC 6D # GREEK SMALL LETTER MU # mu
|
||||
03BD 6E # GREEK SMALL LETTER NU # nu
|
||||
03BF 6F # GREEK SMALL LETTER OMICRON # omicron
|
||||
03C0 70 # GREEK SMALL LETTER PI # pi
|
||||
03B8 71 # GREEK SMALL LETTER THETA # theta
|
||||
03C1 72 # GREEK SMALL LETTER RHO # rho
|
||||
03C3 73 # GREEK SMALL LETTER SIGMA # sigma
|
||||
03C4 74 # GREEK SMALL LETTER TAU # tau
|
||||
03C5 75 # GREEK SMALL LETTER UPSILON # upsilon
|
||||
03D6 76 # GREEK PI SYMBOL # omega1
|
||||
03C9 77 # GREEK SMALL LETTER OMEGA # omega
|
||||
03BE 78 # GREEK SMALL LETTER XI # xi
|
||||
03C8 79 # GREEK SMALL LETTER PSI # psi
|
||||
03B6 7A # GREEK SMALL LETTER ZETA # zeta
|
||||
007B 7B # LEFT CURLY BRACKET # braceleft
|
||||
007C 7C # VERTICAL LINE # bar
|
||||
007D 7D # RIGHT CURLY BRACKET # braceright
|
||||
223C 7E # TILDE OPERATOR # similar
|
||||
20AC A0 # EURO SIGN # Euro
|
||||
03D2 A1 # GREEK UPSILON WITH HOOK SYMBOL # Upsilon1
|
||||
2032 A2 # PRIME # minute
|
||||
2264 A3 # LESS-THAN OR EQUAL TO # lessequal
|
||||
2044 A4 # FRACTION SLASH # fraction
|
||||
2215 A4 # DIVISION SLASH # fraction
|
||||
221E A5 # INFINITY # infinity
|
||||
0192 A6 # LATIN SMALL LETTER F WITH HOOK # florin
|
||||
2663 A7 # BLACK CLUB SUIT # club
|
||||
2666 A8 # BLACK DIAMOND SUIT # diamond
|
||||
2665 A9 # BLACK HEART SUIT # heart
|
||||
2660 AA # BLACK SPADE SUIT # spade
|
||||
2194 AB # LEFT RIGHT ARROW # arrowboth
|
||||
2190 AC # LEFTWARDS ARROW # arrowleft
|
||||
2191 AD # UPWARDS ARROW # arrowup
|
||||
2192 AE # RIGHTWARDS ARROW # arrowright
|
||||
2193 AF # DOWNWARDS ARROW # arrowdown
|
||||
00B0 B0 # DEGREE SIGN # degree
|
||||
00B1 B1 # PLUS-MINUS SIGN # plusminus
|
||||
2033 B2 # DOUBLE PRIME # second
|
||||
2265 B3 # GREATER-THAN OR EQUAL TO # greaterequal
|
||||
00D7 B4 # MULTIPLICATION SIGN # multiply
|
||||
221D B5 # PROPORTIONAL TO # proportional
|
||||
2202 B6 # PARTIAL DIFFERENTIAL # partialdiff
|
||||
2022 B7 # BULLET # bullet
|
||||
00F7 B8 # DIVISION SIGN # divide
|
||||
2260 B9 # NOT EQUAL TO # notequal
|
||||
2261 BA # IDENTICAL TO # equivalence
|
||||
2248 BB # ALMOST EQUAL TO # approxequal
|
||||
2026 BC # HORIZONTAL ELLIPSIS # ellipsis
|
||||
F8E6 BD # VERTICAL ARROW EXTENDER # arrowvertex (CUS)
|
||||
F8E7 BE # HORIZONTAL ARROW EXTENDER # arrowhorizex (CUS)
|
||||
21B5 BF # DOWNWARDS ARROW WITH CORNER LEFTWARDS # carriagereturn
|
||||
2135 C0 # ALEF SYMBOL # aleph
|
||||
2111 C1 # BLACK-LETTER CAPITAL I # Ifraktur
|
||||
211C C2 # BLACK-LETTER CAPITAL R # Rfraktur
|
||||
2118 C3 # SCRIPT CAPITAL P # weierstrass
|
||||
2297 C4 # CIRCLED TIMES # circlemultiply
|
||||
2295 C5 # CIRCLED PLUS # circleplus
|
||||
2205 C6 # EMPTY SET # emptyset
|
||||
2229 C7 # INTERSECTION # intersection
|
||||
222A C8 # UNION # union
|
||||
2283 C9 # SUPERSET OF # propersuperset
|
||||
2287 CA # SUPERSET OF OR EQUAL TO # reflexsuperset
|
||||
2284 CB # NOT A SUBSET OF # notsubset
|
||||
2282 CC # SUBSET OF # propersubset
|
||||
2286 CD # SUBSET OF OR EQUAL TO # reflexsubset
|
||||
2208 CE # ELEMENT OF # element
|
||||
2209 CF # NOT AN ELEMENT OF # notelement
|
||||
2220 D0 # ANGLE # angle
|
||||
2207 D1 # NABLA # gradient
|
||||
F6DA D2 # REGISTERED SIGN SERIF # registerserif (CUS)
|
||||
F6D9 D3 # COPYRIGHT SIGN SERIF # copyrightserif (CUS)
|
||||
F6DB D4 # TRADE MARK SIGN SERIF # trademarkserif (CUS)
|
||||
220F D5 # N-ARY PRODUCT # product
|
||||
221A D6 # SQUARE ROOT # radical
|
||||
22C5 D7 # DOT OPERATOR # dotmath
|
||||
00AC D8 # NOT SIGN # logicalnot
|
||||
2227 D9 # LOGICAL AND # logicaland
|
||||
2228 DA # LOGICAL OR # logicalor
|
||||
21D4 DB # LEFT RIGHT DOUBLE ARROW # arrowdblboth
|
||||
21D0 DC # LEFTWARDS DOUBLE ARROW # arrowdblleft
|
||||
21D1 DD # UPWARDS DOUBLE ARROW # arrowdblup
|
||||
21D2 DE # RIGHTWARDS DOUBLE ARROW # arrowdblright
|
||||
21D3 DF # DOWNWARDS DOUBLE ARROW # arrowdbldown
|
||||
25CA E0 # LOZENGE # lozenge
|
||||
2329 E1 # LEFT-POINTING ANGLE BRACKET # angleleft
|
||||
F8E8 E2 # REGISTERED SIGN SANS SERIF # registersans (CUS)
|
||||
F8E9 E3 # COPYRIGHT SIGN SANS SERIF # copyrightsans (CUS)
|
||||
F8EA E4 # TRADE MARK SIGN SANS SERIF # trademarksans (CUS)
|
||||
2211 E5 # N-ARY SUMMATION # summation
|
||||
F8EB E6 # LEFT PAREN TOP # parenlefttp (CUS)
|
||||
F8EC E7 # LEFT PAREN EXTENDER # parenleftex (CUS)
|
||||
F8ED E8 # LEFT PAREN BOTTOM # parenleftbt (CUS)
|
||||
F8EE E9 # LEFT SQUARE BRACKET TOP # bracketlefttp (CUS)
|
||||
F8EF EA # LEFT SQUARE BRACKET EXTENDER # bracketleftex (CUS)
|
||||
F8F0 EB # LEFT SQUARE BRACKET BOTTOM # bracketleftbt (CUS)
|
||||
F8F1 EC # LEFT CURLY BRACKET TOP # bracelefttp (CUS)
|
||||
F8F2 ED # LEFT CURLY BRACKET MID # braceleftmid (CUS)
|
||||
F8F3 EE # LEFT CURLY BRACKET BOTTOM # braceleftbt (CUS)
|
||||
F8F4 EF # CURLY BRACKET EXTENDER # braceex (CUS)
|
||||
232A F1 # RIGHT-POINTING ANGLE BRACKET # angleright
|
||||
222B F2 # INTEGRAL # integral
|
||||
2320 F3 # TOP HALF INTEGRAL # integraltp
|
||||
F8F5 F4 # INTEGRAL EXTENDER # integralex (CUS)
|
||||
2321 F5 # BOTTOM HALF INTEGRAL # integralbt
|
||||
F8F6 F6 # RIGHT PAREN TOP # parenrighttp (CUS)
|
||||
F8F7 F7 # RIGHT PAREN EXTENDER # parenrightex (CUS)
|
||||
F8F8 F8 # RIGHT PAREN BOTTOM # parenrightbt (CUS)
|
||||
F8F9 F9 # RIGHT SQUARE BRACKET TOP # bracketrighttp (CUS)
|
||||
F8FA FA # RIGHT SQUARE BRACKET EXTENDER # bracketrightex (CUS)
|
||||
F8FB FB # RIGHT SQUARE BRACKET BOTTOM # bracketrightbt (CUS)
|
||||
F8FC FC # RIGHT CURLY BRACKET TOP # bracerighttp (CUS)
|
||||
F8FD FD # RIGHT CURLY BRACKET MID # bracerightmid (CUS)
|
||||
F8FE FE # RIGHT CURLY BRACKET BOTTOM # bracerightbt (CUS)
|
|
@ -327,6 +327,7 @@ Library
|
|||
Text.Pandoc.Readers.Docx.Reducible,
|
||||
Text.Pandoc.Readers.Docx.Parse,
|
||||
Text.Pandoc.Readers.Docx.OMath,
|
||||
Text.Pandoc.Readers.Docx.Fonts
|
||||
Text.Pandoc.Writers.Shared,
|
||||
Text.Pandoc.Asciify,
|
||||
Text.Pandoc.MIME,
|
||||
|
|
237
src/Text/Pandoc/Readers/Docx/Fonts.hs
Normal file
237
src/Text/Pandoc/Readers/Docx/Fonts.hs
Normal file
|
@ -0,0 +1,237 @@
|
|||
{-
|
||||
Copyright (C) 2014 Matthew Pickering <matthewtpickering@gmail.com>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.Readers.Docx.Fonts
|
||||
Copyright : Copyright (C) 2014 Matthew Pickering
|
||||
License : GNU GPL, version 2 or above
|
||||
|
||||
Maintainer : Matthew Pickering <matthewtpickering@gmail.com>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Utilities to convert between font codepoints and unicode characters.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..)) where
|
||||
|
||||
|
||||
-- | Enumeration of recognised fonts
|
||||
data Font = Symbol -- ^ <http://en.wikipedia.org/wiki/Symbol_(typeface) Adobe Symbol>
|
||||
|
||||
-- | Given a font and codepoint, returns the corresponding unicode
|
||||
-- character
|
||||
getUnicode :: Font -> Char -> Maybe Char
|
||||
getUnicode Symbol c = lookup c symbol
|
||||
|
||||
-- Generated from lib/fonts/symbol.txt
|
||||
symbol :: [(Char, Char)]
|
||||
symbol =
|
||||
[ (' ',' ')
|
||||
, (' ','\160')
|
||||
, ('!','!')
|
||||
, ('"','\8704')
|
||||
, ('#','#')
|
||||
, ('$','\8707')
|
||||
, ('%','%')
|
||||
, ('&','&')
|
||||
, ('\'','\8715')
|
||||
, ('(','(')
|
||||
, (')',')')
|
||||
, ('*','\8727')
|
||||
, ('+','+')
|
||||
, (',',',')
|
||||
, ('-','\8722')
|
||||
, ('.','.')
|
||||
, ('/','/')
|
||||
, ('0','0')
|
||||
, ('1','1')
|
||||
, ('2','2')
|
||||
, ('3','3')
|
||||
, ('4','4')
|
||||
, ('5','5')
|
||||
, ('6','6')
|
||||
, ('7','7')
|
||||
, ('8','8')
|
||||
, ('9','9')
|
||||
, (':',':')
|
||||
, (';',';')
|
||||
, ('<','<')
|
||||
, ('=','=')
|
||||
, ('>','>')
|
||||
, ('?','?')
|
||||
, ('@','\8773')
|
||||
, ('A','\913')
|
||||
, ('B','\914')
|
||||
, ('C','\935')
|
||||
, ('D','\916')
|
||||
, ('D','\8710')
|
||||
, ('E','\917')
|
||||
, ('F','\934')
|
||||
, ('G','\915')
|
||||
, ('H','\919')
|
||||
, ('I','\921')
|
||||
, ('J','\977')
|
||||
, ('K','\922')
|
||||
, ('L','\923')
|
||||
, ('M','\924')
|
||||
, ('N','\925')
|
||||
, ('O','\927')
|
||||
, ('P','\928')
|
||||
, ('Q','\920')
|
||||
, ('R','\929')
|
||||
, ('S','\931')
|
||||
, ('T','\932')
|
||||
, ('U','\933')
|
||||
, ('V','\962')
|
||||
, ('W','\937')
|
||||
, ('W','\8486')
|
||||
, ('X','\926')
|
||||
, ('Y','\936')
|
||||
, ('Z','\918')
|
||||
, ('[','[')
|
||||
, ('\\','\8756')
|
||||
, (']',']')
|
||||
, ('^','\8869')
|
||||
, ('_','_')
|
||||
, ('`','\63717')
|
||||
, ('a','\945')
|
||||
, ('b','\946')
|
||||
, ('c','\967')
|
||||
, ('d','\948')
|
||||
, ('e','\949')
|
||||
, ('f','\966')
|
||||
, ('g','\947')
|
||||
, ('h','\951')
|
||||
, ('i','\953')
|
||||
, ('j','\981')
|
||||
, ('k','\954')
|
||||
, ('l','\955')
|
||||
, ('m','\181')
|
||||
, ('m','\956')
|
||||
, ('n','\957')
|
||||
, ('o','\959')
|
||||
, ('p','\960')
|
||||
, ('q','\952')
|
||||
, ('r','\961')
|
||||
, ('s','\963')
|
||||
, ('t','\964')
|
||||
, ('u','\965')
|
||||
, ('v','\982')
|
||||
, ('w','\969')
|
||||
, ('x','\958')
|
||||
, ('y','\968')
|
||||
, ('z','\950')
|
||||
, ('{','{')
|
||||
, ('|','|')
|
||||
, ('}','}')
|
||||
, ('~','\8764')
|
||||
, ('\160','\8364')
|
||||
, ('\161','\978')
|
||||
, ('\162','\8242')
|
||||
, ('\163','\8804')
|
||||
, ('\164','\8260')
|
||||
, ('\164','\8725')
|
||||
, ('\165','\8734')
|
||||
, ('\166','\402')
|
||||
, ('\167','\9827')
|
||||
, ('\168','\9830')
|
||||
, ('\169','\9829')
|
||||
, ('\170','\9824')
|
||||
, ('\171','\8596')
|
||||
, ('\172','\8592')
|
||||
, ('\173','\8593')
|
||||
, ('\174','\8594')
|
||||
, ('\175','\8595')
|
||||
, ('\176','\176')
|
||||
, ('\177','\177')
|
||||
, ('\178','\8243')
|
||||
, ('\179','\8805')
|
||||
, ('\180','\215')
|
||||
, ('\181','\8733')
|
||||
, ('\182','\8706')
|
||||
, ('\183','\8226')
|
||||
, ('\184','\247')
|
||||
, ('\185','\8800')
|
||||
, ('\186','\8801')
|
||||
, ('\187','\8776')
|
||||
, ('\188','\8230')
|
||||
, ('\189','\63718')
|
||||
, ('\190','\63719')
|
||||
, ('\191','\8629')
|
||||
, ('\192','\8501')
|
||||
, ('\193','\8465')
|
||||
, ('\194','\8476')
|
||||
, ('\195','\8472')
|
||||
, ('\196','\8855')
|
||||
, ('\197','\8853')
|
||||
, ('\198','\8709')
|
||||
, ('\199','\8745')
|
||||
, ('\200','\8746')
|
||||
, ('\201','\8835')
|
||||
, ('\202','\8839')
|
||||
, ('\203','\8836')
|
||||
, ('\204','\8834')
|
||||
, ('\205','\8838')
|
||||
, ('\206','\8712')
|
||||
, ('\207','\8713')
|
||||
, ('\208','\8736')
|
||||
, ('\209','\8711')
|
||||
, ('\210','\63194')
|
||||
, ('\211','\63193')
|
||||
, ('\212','\63195')
|
||||
, ('\213','\8719')
|
||||
, ('\214','\8730')
|
||||
, ('\215','\8901')
|
||||
, ('\216','\172')
|
||||
, ('\217','\8743')
|
||||
, ('\218','\8744')
|
||||
, ('\219','\8660')
|
||||
, ('\220','\8656')
|
||||
, ('\221','\8657')
|
||||
, ('\222','\8658')
|
||||
, ('\223','\8659')
|
||||
, ('\224','\9674')
|
||||
, ('\225','\9001')
|
||||
, ('\226','\63720')
|
||||
, ('\227','\63721')
|
||||
, ('\228','\63722')
|
||||
, ('\229','\8721')
|
||||
, ('\230','\63723')
|
||||
, ('\231','\63724')
|
||||
, ('\232','\63725')
|
||||
, ('\233','\63726')
|
||||
, ('\234','\63727')
|
||||
, ('\235','\63728')
|
||||
, ('\236','\63729')
|
||||
, ('\237','\63730')
|
||||
, ('\238','\63731')
|
||||
, ('\239','\63732')
|
||||
, ('\241','\9002')
|
||||
, ('\242','\8747')
|
||||
, ('\243','\8992')
|
||||
, ('\244','\63733')
|
||||
, ('\245','\8993')
|
||||
, ('\246','\63734')
|
||||
, ('\247','\63735')
|
||||
, ('\248','\63736')
|
||||
, ('\249','\63737')
|
||||
, ('\250','\63738')
|
||||
, ('\251','\63739')
|
||||
, ('\252','\63740')
|
||||
, ('\253','\63741')
|
||||
, ('\254','\63742')]
|
|
@ -59,10 +59,13 @@ import Data.Bits ((.|.))
|
|||
import qualified Data.ByteString.Lazy as B
|
||||
import qualified Text.Pandoc.UTF8 as UTF8
|
||||
import Control.Monad.Reader
|
||||
import Control.Applicative ((<$>))
|
||||
import qualified Data.Map as M
|
||||
import Text.Pandoc.Compat.Except
|
||||
import Text.Pandoc.Readers.Docx.OMath (readOMML)
|
||||
import Text.Pandoc.Readers.Docx.Fonts (getUnicode, Font(..))
|
||||
import Text.TeXMath (Exp)
|
||||
import Data.Char (readLitChar)
|
||||
|
||||
data ReaderEnv = ReaderEnv { envNotes :: Notes
|
||||
, envNumbering :: Numbering
|
||||
|
@ -673,8 +676,27 @@ elemToRunElem ns element
|
|||
return $ TextRun $ strContent element
|
||||
| isElem ns "w" "br" element = return LnBrk
|
||||
| isElem ns "w" "tab" element = return Tab
|
||||
| isElem ns "w" "sym" element = return (getSymChar ns element)
|
||||
| otherwise = throwError WrongElem
|
||||
|
||||
-- The char attribute is a hex string
|
||||
getSymChar :: NameSpaces -> Element -> RunElem
|
||||
getSymChar ns element
|
||||
| Just s <- lowerFromPrivate <$> getCodepoint
|
||||
, Just font <- getFont =
|
||||
let [(char, _)] = readLitChar ("\\x" ++ s) in
|
||||
TextRun . maybe "" (:[]) $ getUnicode font char
|
||||
where
|
||||
getCodepoint = findAttr (elemName ns "w" "char") element
|
||||
getFont = stringToFont =<< findAttr (elemName ns "w" "font") element
|
||||
lowerFromPrivate ('F':xs) = '0':xs
|
||||
lowerFromPrivate xs = xs
|
||||
getSymChar _ _ = TextRun ""
|
||||
|
||||
stringToFont :: String -> Maybe Font
|
||||
stringToFont "Symbol" = Just Symbol
|
||||
stringToFont _ = Nothing
|
||||
|
||||
elemToRunElems :: NameSpaces -> Element -> D [RunElem]
|
||||
elemToRunElems ns element
|
||||
| isElem ns "w" "r" element
|
||||
|
|
Binary file not shown.
|
@ -1 +1 @@
|
|||
[Para [Str "Hello,",Space,Str "\19990\30028.",Space,Str "This",Space,Str "costs",Space,Str "\8364\&10."]]
|
||||
[Para [Str "Hello,",Space,Str "\19990\30028.",Space,Str "This",Space,Str "costs",Space,Str "\8364\&10.\8744\8744"]]
|
||||
|
|
Loading…
Reference in a new issue