161 lines
7.7 KiB
Haskell
161 lines
7.7 KiB
Haskell
module PDF.Encoding.MacRoman (
|
|
macRomanEncoding
|
|
) where
|
|
|
|
import qualified Data.ByteString.Char8 as BS (pack, unpack)
|
|
import Data.Map (Map)
|
|
import qualified Data.Map as Map (empty, insert, lookup)
|
|
import qualified Data.Text as Text (pack, unpack)
|
|
import PDF.Font (Font(..))
|
|
|
|
type Mapper = Map Char Char
|
|
macRomanEncoding :: Font
|
|
macRomanEncoding = Font {
|
|
decode = Right . Text.pack . fmap decodeChar . BS.unpack
|
|
, encode = fmap BS.pack . mapM encodeChar . Text.unpack
|
|
}
|
|
where
|
|
decodeChar k = maybe k id $ Map.lookup k (fst mappers)
|
|
encodeChar k =
|
|
case Map.lookup k (snd mappers) of
|
|
Just v -> Right v
|
|
Nothing
|
|
| k < '\x80' -> Right k
|
|
| otherwise -> Left ("Character '" ++ k :"' unavailable in MacRoman")
|
|
|
|
mappers :: (Mapper, Mapper)
|
|
mappers = foldl generateMapers (Map.empty, Map.empty) [
|
|
('\x80', '\x00C4') -- LATIN CAPITAL LETTER A WITH DIAERESIS
|
|
, ('\x81', '\x00C5') -- LATIN CAPITAL LETTER A WITH RING ABOVE
|
|
, ('\x82', '\x00C7') -- LATIN CAPITAL LETTER C WITH CEDILLA
|
|
, ('\x83', '\x00C9') -- LATIN CAPITAL LETTER E WITH ACUTE
|
|
, ('\x84', '\x00D1') -- LATIN CAPITAL LETTER N WITH TILDE
|
|
, ('\x85', '\x00D6') -- LATIN CAPITAL LETTER O WITH DIAERESIS
|
|
, ('\x86', '\x00DC') -- LATIN CAPITAL LETTER U WITH DIAERESIS
|
|
, ('\x87', '\x00E1') -- LATIN SMALL LETTER A WITH ACUTE
|
|
, ('\x88', '\x00E0') -- LATIN SMALL LETTER A WITH GRAVE
|
|
, ('\x89', '\x00E2') -- LATIN SMALL LETTER A WITH CIRCUMFLEX
|
|
, ('\x8A', '\x00E4') -- LATIN SMALL LETTER A WITH DIAERESIS
|
|
, ('\x8B', '\x00E3') -- LATIN SMALL LETTER A WITH TILDE
|
|
, ('\x8C', '\x00E5') -- LATIN SMALL LETTER A WITH RING ABOVE
|
|
, ('\x8D', '\x00E7') -- LATIN SMALL LETTER C WITH CEDILLA
|
|
, ('\x8E', '\x00E9') -- LATIN SMALL LETTER E WITH ACUTE
|
|
, ('\x8F', '\x00E8') -- LATIN SMALL LETTER E WITH GRAVE
|
|
, ('\x90', '\x00EA') -- LATIN SMALL LETTER E WITH CIRCUMFLEX
|
|
, ('\x91', '\x00EB') -- LATIN SMALL LETTER E WITH DIAERESIS
|
|
, ('\x92', '\x00ED') -- LATIN SMALL LETTER I WITH ACUTE
|
|
, ('\x93', '\x00EC') -- LATIN SMALL LETTER I WITH GRAVE
|
|
, ('\x94', '\x00EE') -- LATIN SMALL LETTER I WITH CIRCUMFLEX
|
|
, ('\x95', '\x00EF') -- LATIN SMALL LETTER I WITH DIAERESIS
|
|
, ('\x96', '\x00F1') -- LATIN SMALL LETTER N WITH TILDE
|
|
, ('\x97', '\x00F3') -- LATIN SMALL LETTER O WITH ACUTE
|
|
, ('\x98', '\x00F2') -- LATIN SMALL LETTER O WITH GRAVE
|
|
, ('\x99', '\x00F4') -- LATIN SMALL LETTER O WITH CIRCUMFLEX
|
|
, ('\x9A', '\x00F6') -- LATIN SMALL LETTER O WITH DIAERESIS
|
|
, ('\x9B', '\x00F5') -- LATIN SMALL LETTER O WITH TILDE
|
|
, ('\x9C', '\x00FA') -- LATIN SMALL LETTER U WITH ACUTE
|
|
, ('\x9D', '\x00F9') -- LATIN SMALL LETTER U WITH GRAVE
|
|
, ('\x9E', '\x00FB') -- LATIN SMALL LETTER U WITH CIRCUMFLEX
|
|
, ('\x9F', '\x00FC') -- LATIN SMALL LETTER U WITH DIAERESIS
|
|
, ('\xA0', '\x2020') -- DAGGER
|
|
, ('\xA1', '\x00B0') -- DEGREE SIGN
|
|
, ('\xA2', '\x00A2') -- CENT SIGN
|
|
, ('\xA3', '\x00A3') -- POUND SIGN
|
|
, ('\xA4', '\x00A7') -- SECTION SIGN
|
|
, ('\xA5', '\x2022') -- BULLET
|
|
, ('\xA6', '\x00B6') -- PILCROW SIGN
|
|
, ('\xA7', '\x00DF') -- LATIN SMALL LETTER SHARP S
|
|
, ('\xA8', '\x00AE') -- REGISTERED SIGN
|
|
, ('\xA9', '\x00A9') -- COPYRIGHT SIGN
|
|
, ('\xAA', '\x2122') -- TRADE MARK SIGN
|
|
, ('\xAB', '\x00B4') -- ACUTE ACCENT
|
|
, ('\xAC', '\x00A8') -- DIAERESIS
|
|
, ('\xAD', '\x2260') -- NOT EQUAL TO
|
|
, ('\xAE', '\x00C6') -- LATIN CAPITAL LETTER AE
|
|
, ('\xAF', '\x00D8') -- LATIN CAPITAL LETTER O WITH STROKE
|
|
, ('\xB0', '\x221E') -- INFINITY
|
|
, ('\xB1', '\x00B1') -- PLUS-MINUS SIGN
|
|
, ('\xB2', '\x2264') -- LESS-THAN OR EQUAL TO
|
|
, ('\xB3', '\x2265') -- GREATER-THAN OR EQUAL TO
|
|
, ('\xB4', '\x00A5') -- YEN SIGN
|
|
, ('\xB5', '\x00B5') -- MICRO SIGN
|
|
, ('\xB6', '\x2202') -- PARTIAL DIFFERENTIAL
|
|
, ('\xB7', '\x2211') -- N-ARY SUMMATION
|
|
, ('\xB8', '\x220F') -- N-ARY PRODUCT
|
|
, ('\xB9', '\x03C0') -- GREEK SMALL LETTER PI
|
|
, ('\xBA', '\x222B') -- INTEGRAL
|
|
, ('\xBB', '\x00AA') -- FEMININE ORDINAL INDICATOR
|
|
, ('\xBC', '\x00BA') -- MASCULINE ORDINAL INDICATOR
|
|
, ('\xBD', '\x03A9') -- GREEK CAPITAL LETTER OMEGA
|
|
, ('\xBE', '\x00E6') -- LATIN SMALL LETTER AE
|
|
, ('\xBF', '\x00F8') -- LATIN SMALL LETTER O WITH STROKE
|
|
, ('\xC0', '\x00BF') -- INVERTED QUESTION MARK
|
|
, ('\xC1', '\x00A1') -- INVERTED EXCLAMATION MARK
|
|
, ('\xC2', '\x00AC') -- NOT SIGN
|
|
, ('\xC3', '\x221A') -- SQUARE ROOT
|
|
, ('\xC4', '\x0192') -- LATIN SMALL LETTER F WITH HOOK
|
|
, ('\xC5', '\x2248') -- ALMOST EQUAL TO
|
|
, ('\xC6', '\x2206') -- INCREMENT
|
|
, ('\xC7', '\x00AB') -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
|
, ('\xC8', '\x00BB') -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
|
, ('\xC9', '\x2026') -- HORIZONTAL ELLIPSIS
|
|
, ('\xCA', '\x00A0') -- NO-BREAK SPACE
|
|
, ('\xCB', '\x00C0') -- LATIN CAPITAL LETTER A WITH GRAVE
|
|
, ('\xCC', '\x00C3') -- LATIN CAPITAL LETTER A WITH TILDE
|
|
, ('\xCD', '\x00D5') -- LATIN CAPITAL LETTER O WITH TILDE
|
|
, ('\xCE', '\x0152') -- LATIN CAPITAL LIGATURE OE
|
|
, ('\xCF', '\x0153') -- LATIN SMALL LIGATURE OE
|
|
, ('\xD0', '\x2013') -- EN DASH
|
|
, ('\xD1', '\x2014') -- EM DASH
|
|
, ('\xD2', '\x201C') -- LEFT DOUBLE QUOTATION MARK
|
|
, ('\xD3', '\x201D') -- RIGHT DOUBLE QUOTATION MARK
|
|
, ('\xD4', '\x2018') -- LEFT SINGLE QUOTATION MARK
|
|
, ('\xD5', '\x2019') -- RIGHT SINGLE QUOTATION MARK
|
|
, ('\xD6', '\x00F7') -- DIVISION SIGN
|
|
, ('\xD7', '\x25CA') -- LOZENGE
|
|
, ('\xD8', '\x00FF') -- LATIN SMALL LETTER Y WITH DIAERESIS
|
|
, ('\xD9', '\x0178') -- LATIN CAPITAL LETTER Y WITH DIAERESIS
|
|
, ('\xDA', '\x2044') -- FRACTION SLASH
|
|
, ('\xDB', '\x20AC') -- EURO SIGN
|
|
, ('\xDC', '\x2039') -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
|
, ('\xDD', '\x203A') -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
|
, ('\xDE', '\xFB01') -- LATIN SMALL LIGATURE FI
|
|
, ('\xDF', '\xFB02') -- LATIN SMALL LIGATURE FL
|
|
, ('\xE0', '\x2021') -- DOUBLE DAGGER
|
|
, ('\xE1', '\x00B7') -- MIDDLE DOT
|
|
, ('\xE2', '\x201A') -- SINGLE LOW-9 QUOTATION MARK
|
|
, ('\xE3', '\x201E') -- DOUBLE LOW-9 QUOTATION MARK
|
|
, ('\xE4', '\x2030') -- PER MILLE SIGN
|
|
, ('\xE5', '\x00C2') -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
|
, ('\xE6', '\x00CA') -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
|
, ('\xE7', '\x00C1') -- LATIN CAPITAL LETTER A WITH ACUTE
|
|
, ('\xE8', '\x00CB') -- LATIN CAPITAL LETTER E WITH DIAERESIS
|
|
, ('\xE9', '\x00C8') -- LATIN CAPITAL LETTER E WITH GRAVE
|
|
, ('\xEA', '\x00CD') -- LATIN CAPITAL LETTER I WITH ACUTE
|
|
, ('\xEB', '\x00CE') -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
|
, ('\xEC', '\x00CF') -- LATIN CAPITAL LETTER I WITH DIAERESIS
|
|
, ('\xED', '\x00CC') -- LATIN CAPITAL LETTER I WITH GRAVE
|
|
, ('\xEE', '\x00D3') -- LATIN CAPITAL LETTER O WITH ACUTE
|
|
, ('\xEF', '\x00D4') -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
|
, ('\xF0', '\xF8FF') -- Apple logo
|
|
, ('\xF1', '\x00D2') -- LATIN CAPITAL LETTER O WITH GRAVE
|
|
, ('\xF2', '\x00DA') -- LATIN CAPITAL LETTER U WITH ACUTE
|
|
, ('\xF3', '\x00DB') -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
|
, ('\xF4', '\x00D9') -- LATIN CAPITAL LETTER U WITH GRAVE
|
|
, ('\xF5', '\x0131') -- LATIN SMALL LETTER DOTLESS I
|
|
, ('\xF6', '\x02C6') -- MODIFIER LETTER CIRCUMFLEX ACCENT
|
|
, ('\xF7', '\x02DC') -- SMALL TILDE
|
|
, ('\xF8', '\x00AF') -- MACRON
|
|
, ('\xF9', '\x02D8') -- BREVE
|
|
, ('\xFA', '\x02D9') -- DOT ABOVE
|
|
, ('\xFB', '\x02DA') -- RING ABOVE
|
|
, ('\xFC', '\x00B8') -- CEDILLA
|
|
, ('\xFD', '\x02DD') -- DOUBLE ACUTE ACCENT
|
|
, ('\xFE', '\x02DB') -- OGONEK
|
|
, ('\xFF', '\x02C7') -- CARON
|
|
]
|
|
where
|
|
generateMapers (tmpDecoder, tmpEncoder) (macChar, utf8Char) = (
|
|
Map.insert macChar utf8Char tmpDecoder
|
|
, Map.insert utf8Char macChar tmpEncoder
|
|
)
|