Hufflepdf/src/PDF/Encoding/MacRoman.hs

163 lines
7.7 KiB
Haskell

module PDF.Encoding.MacRoman (
macRomanEncoding
) where
import qualified Data.ByteString.Char8 as BS (pack, unpack)
import Data.Foldable (foldl')
import Data.Map (Map)
import qualified Data.Map as Map (empty, insert, lookup)
import qualified Data.Text as Text (pack, unpack)
import PDF.Font (Font(..))
type Mapper = Map Char Char
macRomanEncoding :: Font
macRomanEncoding = Font {
decode = Right . Text.pack . fmap decodeChar . BS.unpack
, encode = fmap BS.pack . mapM encodeChar . Text.unpack
}
where
decodeChar k = maybe k id $ Map.lookup k (fst mappers)
encodeChar k =
case Map.lookup k (snd mappers) of
Just v -> Right v
Nothing
| k < '\x80' -> Right k
| otherwise -> Left ("Character '" ++ k :"' unavailable in MacRoman")
mappers :: (Mapper, Mapper)
mappers = foldl' generateMapers (Map.empty, Map.empty) [
('\x80', '\x00C4') -- LATIN CAPITAL LETTER A WITH DIAERESIS
, ('\x81', '\x00C5') -- LATIN CAPITAL LETTER A WITH RING ABOVE
, ('\x82', '\x00C7') -- LATIN CAPITAL LETTER C WITH CEDILLA
, ('\x83', '\x00C9') -- LATIN CAPITAL LETTER E WITH ACUTE
, ('\x84', '\x00D1') -- LATIN CAPITAL LETTER N WITH TILDE
, ('\x85', '\x00D6') -- LATIN CAPITAL LETTER O WITH DIAERESIS
, ('\x86', '\x00DC') -- LATIN CAPITAL LETTER U WITH DIAERESIS
, ('\x87', '\x00E1') -- LATIN SMALL LETTER A WITH ACUTE
, ('\x88', '\x00E0') -- LATIN SMALL LETTER A WITH GRAVE
, ('\x89', '\x00E2') -- LATIN SMALL LETTER A WITH CIRCUMFLEX
, ('\x8A', '\x00E4') -- LATIN SMALL LETTER A WITH DIAERESIS
, ('\x8B', '\x00E3') -- LATIN SMALL LETTER A WITH TILDE
, ('\x8C', '\x00E5') -- LATIN SMALL LETTER A WITH RING ABOVE
, ('\x8D', '\x00E7') -- LATIN SMALL LETTER C WITH CEDILLA
, ('\x8E', '\x00E9') -- LATIN SMALL LETTER E WITH ACUTE
, ('\x8F', '\x00E8') -- LATIN SMALL LETTER E WITH GRAVE
, ('\x90', '\x00EA') -- LATIN SMALL LETTER E WITH CIRCUMFLEX
, ('\x91', '\x00EB') -- LATIN SMALL LETTER E WITH DIAERESIS
, ('\x92', '\x00ED') -- LATIN SMALL LETTER I WITH ACUTE
, ('\x93', '\x00EC') -- LATIN SMALL LETTER I WITH GRAVE
, ('\x94', '\x00EE') -- LATIN SMALL LETTER I WITH CIRCUMFLEX
, ('\x95', '\x00EF') -- LATIN SMALL LETTER I WITH DIAERESIS
, ('\x96', '\x00F1') -- LATIN SMALL LETTER N WITH TILDE
, ('\x97', '\x00F3') -- LATIN SMALL LETTER O WITH ACUTE
, ('\x98', '\x00F2') -- LATIN SMALL LETTER O WITH GRAVE
, ('\x99', '\x00F4') -- LATIN SMALL LETTER O WITH CIRCUMFLEX
, ('\x9A', '\x00F6') -- LATIN SMALL LETTER O WITH DIAERESIS
, ('\x9B', '\x00F5') -- LATIN SMALL LETTER O WITH TILDE
, ('\x9C', '\x00FA') -- LATIN SMALL LETTER U WITH ACUTE
, ('\x9D', '\x00F9') -- LATIN SMALL LETTER U WITH GRAVE
, ('\x9E', '\x00FB') -- LATIN SMALL LETTER U WITH CIRCUMFLEX
, ('\x9F', '\x00FC') -- LATIN SMALL LETTER U WITH DIAERESIS
, ('\xA0', '\x2020') -- DAGGER
, ('\xA1', '\x00B0') -- DEGREE SIGN
, ('\xA2', '\x00A2') -- CENT SIGN
, ('\xA3', '\x00A3') -- POUND SIGN
, ('\xA4', '\x00A7') -- SECTION SIGN
, ('\xA5', '\x2022') -- BULLET
, ('\xA6', '\x00B6') -- PILCROW SIGN
, ('\xA7', '\x00DF') -- LATIN SMALL LETTER SHARP S
, ('\xA8', '\x00AE') -- REGISTERED SIGN
, ('\xA9', '\x00A9') -- COPYRIGHT SIGN
, ('\xAA', '\x2122') -- TRADE MARK SIGN
, ('\xAB', '\x00B4') -- ACUTE ACCENT
, ('\xAC', '\x00A8') -- DIAERESIS
, ('\xAD', '\x2260') -- NOT EQUAL TO
, ('\xAE', '\x00C6') -- LATIN CAPITAL LETTER AE
, ('\xAF', '\x00D8') -- LATIN CAPITAL LETTER O WITH STROKE
, ('\xB0', '\x221E') -- INFINITY
, ('\xB1', '\x00B1') -- PLUS-MINUS SIGN
, ('\xB2', '\x2264') -- LESS-THAN OR EQUAL TO
, ('\xB3', '\x2265') -- GREATER-THAN OR EQUAL TO
, ('\xB4', '\x00A5') -- YEN SIGN
, ('\xB5', '\x00B5') -- MICRO SIGN
, ('\xB6', '\x2202') -- PARTIAL DIFFERENTIAL
, ('\xB7', '\x2211') -- N-ARY SUMMATION
, ('\xB8', '\x220F') -- N-ARY PRODUCT
, ('\xB9', '\x03C0') -- GREEK SMALL LETTER PI
, ('\xBA', '\x222B') -- INTEGRAL
, ('\xBB', '\x00AA') -- FEMININE ORDINAL INDICATOR
, ('\xBC', '\x00BA') -- MASCULINE ORDINAL INDICATOR
, ('\xBD', '\x03A9') -- GREEK CAPITAL LETTER OMEGA
, ('\xBE', '\x00E6') -- LATIN SMALL LETTER AE
, ('\xBF', '\x00F8') -- LATIN SMALL LETTER O WITH STROKE
, ('\xC0', '\x00BF') -- INVERTED QUESTION MARK
, ('\xC1', '\x00A1') -- INVERTED EXCLAMATION MARK
, ('\xC2', '\x00AC') -- NOT SIGN
, ('\xC3', '\x221A') -- SQUARE ROOT
, ('\xC4', '\x0192') -- LATIN SMALL LETTER F WITH HOOK
, ('\xC5', '\x2248') -- ALMOST EQUAL TO
, ('\xC6', '\x2206') -- INCREMENT
, ('\xC7', '\x00AB') -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
, ('\xC8', '\x00BB') -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
, ('\xC9', '\x2026') -- HORIZONTAL ELLIPSIS
, ('\xCA', '\x00A0') -- NO-BREAK SPACE
, ('\xCB', '\x00C0') -- LATIN CAPITAL LETTER A WITH GRAVE
, ('\xCC', '\x00C3') -- LATIN CAPITAL LETTER A WITH TILDE
, ('\xCD', '\x00D5') -- LATIN CAPITAL LETTER O WITH TILDE
, ('\xCE', '\x0152') -- LATIN CAPITAL LIGATURE OE
, ('\xCF', '\x0153') -- LATIN SMALL LIGATURE OE
, ('\xD0', '\x2013') -- EN DASH
, ('\xD1', '\x2014') -- EM DASH
, ('\xD2', '\x201C') -- LEFT DOUBLE QUOTATION MARK
, ('\xD3', '\x201D') -- RIGHT DOUBLE QUOTATION MARK
, ('\xD4', '\x2018') -- LEFT SINGLE QUOTATION MARK
, ('\xD5', '\x2019') -- RIGHT SINGLE QUOTATION MARK
, ('\xD6', '\x00F7') -- DIVISION SIGN
, ('\xD7', '\x25CA') -- LOZENGE
, ('\xD8', '\x00FF') -- LATIN SMALL LETTER Y WITH DIAERESIS
, ('\xD9', '\x0178') -- LATIN CAPITAL LETTER Y WITH DIAERESIS
, ('\xDA', '\x2044') -- FRACTION SLASH
, ('\xDB', '\x20AC') -- EURO SIGN
, ('\xDC', '\x2039') -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
, ('\xDD', '\x203A') -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
, ('\xDE', '\xFB01') -- LATIN SMALL LIGATURE FI
, ('\xDF', '\xFB02') -- LATIN SMALL LIGATURE FL
, ('\xE0', '\x2021') -- DOUBLE DAGGER
, ('\xE1', '\x00B7') -- MIDDLE DOT
, ('\xE2', '\x201A') -- SINGLE LOW-9 QUOTATION MARK
, ('\xE3', '\x201E') -- DOUBLE LOW-9 QUOTATION MARK
, ('\xE4', '\x2030') -- PER MILLE SIGN
, ('\xE5', '\x00C2') -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
, ('\xE6', '\x00CA') -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
, ('\xE7', '\x00C1') -- LATIN CAPITAL LETTER A WITH ACUTE
, ('\xE8', '\x00CB') -- LATIN CAPITAL LETTER E WITH DIAERESIS
, ('\xE9', '\x00C8') -- LATIN CAPITAL LETTER E WITH GRAVE
, ('\xEA', '\x00CD') -- LATIN CAPITAL LETTER I WITH ACUTE
, ('\xEB', '\x00CE') -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
, ('\xEC', '\x00CF') -- LATIN CAPITAL LETTER I WITH DIAERESIS
, ('\xED', '\x00CC') -- LATIN CAPITAL LETTER I WITH GRAVE
, ('\xEE', '\x00D3') -- LATIN CAPITAL LETTER O WITH ACUTE
, ('\xEF', '\x00D4') -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
, ('\xF0', '\xF8FF') -- Apple logo
, ('\xF1', '\x00D2') -- LATIN CAPITAL LETTER O WITH GRAVE
, ('\xF2', '\x00DA') -- LATIN CAPITAL LETTER U WITH ACUTE
, ('\xF3', '\x00DB') -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
, ('\xF4', '\x00D9') -- LATIN CAPITAL LETTER U WITH GRAVE
, ('\xF5', '\x0131') -- LATIN SMALL LETTER DOTLESS I
, ('\xF6', '\x02C6') -- MODIFIER LETTER CIRCUMFLEX ACCENT
, ('\xF7', '\x02DC') -- SMALL TILDE
, ('\xF8', '\x00AF') -- MACRON
, ('\xF9', '\x02D8') -- BREVE
, ('\xFA', '\x02D9') -- DOT ABOVE
, ('\xFB', '\x02DA') -- RING ABOVE
, ('\xFC', '\x00B8') -- CEDILLA
, ('\xFD', '\x02DD') -- DOUBLE ACUTE ACCENT
, ('\xFE', '\x02DB') -- OGONEK
, ('\xFF', '\x02C7') -- CARON
]
where
generateMapers (tmpDecoder, tmpEncoder) (macChar, utf8Char) = (
Map.insert macChar utf8Char tmpDecoder
, Map.insert utf8Char macChar tmpEncoder
)