Update Font type to cover both encoding and decoding — WIP for CMap, but complete though not tested yet for MacRoman encoding

This commit is contained in:
Tissevert 2020-03-06 19:19:53 +01:00
parent 6e245189fd
commit 1ec47c5d07
3 changed files with 181 additions and 141 deletions

View file

@ -25,7 +25,7 @@ import qualified Data.Map as Map (
)
import Data.Text (Text)
import qualified PDF.EOL as EOL (charset, parser)
import PDF.Font (Font)
import PDF.Font (Decoder, Encoder, Font(..))
import PDF.Object (
DirectObject(..), Name, StringObject(..)
, blank, directObject, integer, line, stringObject
@ -34,7 +34,10 @@ import PDF.Parser (MonadParser, Parser, runParser, takeAll)
import Prelude hiding (fail)
type CMappers = Map Name CMap
type Mapping = Map ByteString Text
data Mapping = Mapping {
bsToT :: Map ByteString Text
, tToBS :: Map Text ByteString
}
data CRange = CRange {
fromSequence :: ByteString
, toSequence :: ByteString
@ -44,11 +47,14 @@ type RangeSize = Int
type CMap = Map RangeSize [CRange]
toFont :: CMap -> Font
toFont aCMap input
toFont aCMap = Font {decode = decoder aCMap, encode = encoder aCMap}
decoder :: CMap -> Decoder
decoder aCMap input
| BS.null input = Right ""
| otherwise = do
(output, remainingInput) <- trySizes input $ Map.toList aCMap
mappend output <$> toFont aCMap remainingInput
mappend output <$> decoder aCMap remainingInput
where
trySizes s [] = Left $ "No matching code found in font for " ++ unpack s
trySizes s ((size, cRanges):others) =
@ -63,6 +69,9 @@ toFont aCMap input
Nothing -> tryRanges prefix cRanges
outputSequence -> outputSequence
encoder :: CMap -> Encoder
encoder = undefined
emptyCMap :: CMap
emptyCMap = Map.empty

View file

@ -2,140 +2,160 @@ module PDF.Encoding.MacRoman (
macRomanEncoding
) where
import Data.ByteString.Char8 (unpack)
import Data.Text (pack)
import PDF.Font (Font)
import qualified Data.ByteString.Char8 as BS (pack, unpack)
import Data.Map (Map)
import qualified Data.Map as Map (empty, insert, lookup)
import qualified Data.Text as Text (pack, unpack)
import PDF.Font (Font(..))
type Mapper = Map Char Char
macRomanEncoding :: Font
macRomanEncoding = Right . pack . fmap decode . unpack
macRomanEncoding = Font {
decode = Right . Text.pack . fmap decodeChar . BS.unpack
, encode = fmap BS.pack . mapM encodeChar . Text.unpack
}
where
decodeChar k = maybe k id $ Map.lookup k (fst mappers)
encodeChar k =
case Map.lookup k (snd mappers) of
Just v -> Right v
Nothing
| k < '\x80' -> Right k
| otherwise -> Left ("Character '" ++ k :"' unavailable in MacRoman")
decode :: Char -> Char
decode '\x80' = '\x00C4' -- LATIN CAPITAL LETTER A WITH DIAERESIS
decode '\x81' = '\x00C5' -- LATIN CAPITAL LETTER A WITH RING ABOVE
decode '\x82' = '\x00C7' -- LATIN CAPITAL LETTER C WITH CEDILLA
decode '\x83' = '\x00C9' -- LATIN CAPITAL LETTER E WITH ACUTE
decode '\x84' = '\x00D1' -- LATIN CAPITAL LETTER N WITH TILDE
decode '\x85' = '\x00D6' -- LATIN CAPITAL LETTER O WITH DIAERESIS
decode '\x86' = '\x00DC' -- LATIN CAPITAL LETTER U WITH DIAERESIS
decode '\x87' = '\x00E1' -- LATIN SMALL LETTER A WITH ACUTE
decode '\x88' = '\x00E0' -- LATIN SMALL LETTER A WITH GRAVE
decode '\x89' = '\x00E2' -- LATIN SMALL LETTER A WITH CIRCUMFLEX
decode '\x8A' = '\x00E4' -- LATIN SMALL LETTER A WITH DIAERESIS
decode '\x8B' = '\x00E3' -- LATIN SMALL LETTER A WITH TILDE
decode '\x8C' = '\x00E5' -- LATIN SMALL LETTER A WITH RING ABOVE
decode '\x8D' = '\x00E7' -- LATIN SMALL LETTER C WITH CEDILLA
decode '\x8E' = '\x00E9' -- LATIN SMALL LETTER E WITH ACUTE
decode '\x8F' = '\x00E8' -- LATIN SMALL LETTER E WITH GRAVE
decode '\x90' = '\x00EA' -- LATIN SMALL LETTER E WITH CIRCUMFLEX
decode '\x91' = '\x00EB' -- LATIN SMALL LETTER E WITH DIAERESIS
decode '\x92' = '\x00ED' -- LATIN SMALL LETTER I WITH ACUTE
decode '\x93' = '\x00EC' -- LATIN SMALL LETTER I WITH GRAVE
decode '\x94' = '\x00EE' -- LATIN SMALL LETTER I WITH CIRCUMFLEX
decode '\x95' = '\x00EF' -- LATIN SMALL LETTER I WITH DIAERESIS
decode '\x96' = '\x00F1' -- LATIN SMALL LETTER N WITH TILDE
decode '\x97' = '\x00F3' -- LATIN SMALL LETTER O WITH ACUTE
decode '\x98' = '\x00F2' -- LATIN SMALL LETTER O WITH GRAVE
decode '\x99' = '\x00F4' -- LATIN SMALL LETTER O WITH CIRCUMFLEX
decode '\x9A' = '\x00F6' -- LATIN SMALL LETTER O WITH DIAERESIS
decode '\x9B' = '\x00F5' -- LATIN SMALL LETTER O WITH TILDE
decode '\x9C' = '\x00FA' -- LATIN SMALL LETTER U WITH ACUTE
decode '\x9D' = '\x00F9' -- LATIN SMALL LETTER U WITH GRAVE
decode '\x9E' = '\x00FB' -- LATIN SMALL LETTER U WITH CIRCUMFLEX
decode '\x9F' = '\x00FC' -- LATIN SMALL LETTER U WITH DIAERESIS
decode '\xA0' = '\x2020' -- DAGGER
decode '\xA1' = '\x00B0' -- DEGREE SIGN
decode '\xA2' = '\x00A2' -- CENT SIGN
decode '\xA3' = '\x00A3' -- POUND SIGN
decode '\xA4' = '\x00A7' -- SECTION SIGN
decode '\xA5' = '\x2022' -- BULLET
decode '\xA6' = '\x00B6' -- PILCROW SIGN
decode '\xA7' = '\x00DF' -- LATIN SMALL LETTER SHARP S
decode '\xA8' = '\x00AE' -- REGISTERED SIGN
decode '\xA9' = '\x00A9' -- COPYRIGHT SIGN
decode '\xAA' = '\x2122' -- TRADE MARK SIGN
decode '\xAB' = '\x00B4' -- ACUTE ACCENT
decode '\xAC' = '\x00A8' -- DIAERESIS
decode '\xAD' = '\x2260' -- NOT EQUAL TO
decode '\xAE' = '\x00C6' -- LATIN CAPITAL LETTER AE
decode '\xAF' = '\x00D8' -- LATIN CAPITAL LETTER O WITH STROKE
decode '\xB0' = '\x221E' -- INFINITY
decode '\xB1' = '\x00B1' -- PLUS-MINUS SIGN
decode '\xB2' = '\x2264' -- LESS-THAN OR EQUAL TO
decode '\xB3' = '\x2265' -- GREATER-THAN OR EQUAL TO
decode '\xB4' = '\x00A5' -- YEN SIGN
decode '\xB5' = '\x00B5' -- MICRO SIGN
decode '\xB6' = '\x2202' -- PARTIAL DIFFERENTIAL
decode '\xB7' = '\x2211' -- N-ARY SUMMATION
decode '\xB8' = '\x220F' -- N-ARY PRODUCT
decode '\xB9' = '\x03C0' -- GREEK SMALL LETTER PI
decode '\xBA' = '\x222B' -- INTEGRAL
decode '\xBB' = '\x00AA' -- FEMININE ORDINAL INDICATOR
decode '\xBC' = '\x00BA' -- MASCULINE ORDINAL INDICATOR
decode '\xBD' = '\x03A9' -- GREEK CAPITAL LETTER OMEGA
decode '\xBE' = '\x00E6' -- LATIN SMALL LETTER AE
decode '\xBF' = '\x00F8' -- LATIN SMALL LETTER O WITH STROKE
decode '\xC0' = '\x00BF' -- INVERTED QUESTION MARK
decode '\xC1' = '\x00A1' -- INVERTED EXCLAMATION MARK
decode '\xC2' = '\x00AC' -- NOT SIGN
decode '\xC3' = '\x221A' -- SQUARE ROOT
decode '\xC4' = '\x0192' -- LATIN SMALL LETTER F WITH HOOK
decode '\xC5' = '\x2248' -- ALMOST EQUAL TO
decode '\xC6' = '\x2206' -- INCREMENT
decode '\xC7' = '\x00AB' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
decode '\xC8' = '\x00BB' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
decode '\xC9' = '\x2026' -- HORIZONTAL ELLIPSIS
decode '\xCA' = '\x00A0' -- NO-BREAK SPACE
decode '\xCB' = '\x00C0' -- LATIN CAPITAL LETTER A WITH GRAVE
decode '\xCC' = '\x00C3' -- LATIN CAPITAL LETTER A WITH TILDE
decode '\xCD' = '\x00D5' -- LATIN CAPITAL LETTER O WITH TILDE
decode '\xCE' = '\x0152' -- LATIN CAPITAL LIGATURE OE
decode '\xCF' = '\x0153' -- LATIN SMALL LIGATURE OE
decode '\xD0' = '\x2013' -- EN DASH
decode '\xD1' = '\x2014' -- EM DASH
decode '\xD2' = '\x201C' -- LEFT DOUBLE QUOTATION MARK
decode '\xD3' = '\x201D' -- RIGHT DOUBLE QUOTATION MARK
decode '\xD4' = '\x2018' -- LEFT SINGLE QUOTATION MARK
decode '\xD5' = '\x2019' -- RIGHT SINGLE QUOTATION MARK
decode '\xD6' = '\x00F7' -- DIVISION SIGN
decode '\xD7' = '\x25CA' -- LOZENGE
decode '\xD8' = '\x00FF' -- LATIN SMALL LETTER Y WITH DIAERESIS
decode '\xD9' = '\x0178' -- LATIN CAPITAL LETTER Y WITH DIAERESIS
decode '\xDA' = '\x2044' -- FRACTION SLASH
decode '\xDB' = '\x20AC' -- EURO SIGN
decode '\xDC' = '\x2039' -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
decode '\xDD' = '\x203A' -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
decode '\xDE' = '\xFB01' -- LATIN SMALL LIGATURE FI
decode '\xDF' = '\xFB02' -- LATIN SMALL LIGATURE FL
decode '\xE0' = '\x2021' -- DOUBLE DAGGER
decode '\xE1' = '\x00B7' -- MIDDLE DOT
decode '\xE2' = '\x201A' -- SINGLE LOW-9 QUOTATION MARK
decode '\xE3' = '\x201E' -- DOUBLE LOW-9 QUOTATION MARK
decode '\xE4' = '\x2030' -- PER MILLE SIGN
decode '\xE5' = '\x00C2' -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
decode '\xE6' = '\x00CA' -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
decode '\xE7' = '\x00C1' -- LATIN CAPITAL LETTER A WITH ACUTE
decode '\xE8' = '\x00CB' -- LATIN CAPITAL LETTER E WITH DIAERESIS
decode '\xE9' = '\x00C8' -- LATIN CAPITAL LETTER E WITH GRAVE
decode '\xEA' = '\x00CD' -- LATIN CAPITAL LETTER I WITH ACUTE
decode '\xEB' = '\x00CE' -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
decode '\xEC' = '\x00CF' -- LATIN CAPITAL LETTER I WITH DIAERESIS
decode '\xED' = '\x00CC' -- LATIN CAPITAL LETTER I WITH GRAVE
decode '\xEE' = '\x00D3' -- LATIN CAPITAL LETTER O WITH ACUTE
decode '\xEF' = '\x00D4' -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
decode '\xF0' = '\xF8FF' -- Apple logo
decode '\xF1' = '\x00D2' -- LATIN CAPITAL LETTER O WITH GRAVE
decode '\xF2' = '\x00DA' -- LATIN CAPITAL LETTER U WITH ACUTE
decode '\xF3' = '\x00DB' -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
decode '\xF4' = '\x00D9' -- LATIN CAPITAL LETTER U WITH GRAVE
decode '\xF5' = '\x0131' -- LATIN SMALL LETTER DOTLESS I
decode '\xF6' = '\x02C6' -- MODIFIER LETTER CIRCUMFLEX ACCENT
decode '\xF7' = '\x02DC' -- SMALL TILDE
decode '\xF8' = '\x00AF' -- MACRON
decode '\xF9' = '\x02D8' -- BREVE
decode '\xFA' = '\x02D9' -- DOT ABOVE
decode '\xFB' = '\x02DA' -- RING ABOVE
decode '\xFC' = '\x00B8' -- CEDILLA
decode '\xFD' = '\x02DD' -- DOUBLE ACUTE ACCENT
decode '\xFE' = '\x02DB' -- OGONEK
decode '\xFF' = '\x02C7' -- CARON
decode c = c -- The rest is ASCII
mappers :: (Mapper, Mapper)
mappers = foldl generateMapers (Map.empty, Map.empty) [
('\x80', '\x00C4') -- LATIN CAPITAL LETTER A WITH DIAERESIS
, ('\x81', '\x00C5') -- LATIN CAPITAL LETTER A WITH RING ABOVE
, ('\x82', '\x00C7') -- LATIN CAPITAL LETTER C WITH CEDILLA
, ('\x83', '\x00C9') -- LATIN CAPITAL LETTER E WITH ACUTE
, ('\x84', '\x00D1') -- LATIN CAPITAL LETTER N WITH TILDE
, ('\x85', '\x00D6') -- LATIN CAPITAL LETTER O WITH DIAERESIS
, ('\x86', '\x00DC') -- LATIN CAPITAL LETTER U WITH DIAERESIS
, ('\x87', '\x00E1') -- LATIN SMALL LETTER A WITH ACUTE
, ('\x88', '\x00E0') -- LATIN SMALL LETTER A WITH GRAVE
, ('\x89', '\x00E2') -- LATIN SMALL LETTER A WITH CIRCUMFLEX
, ('\x8A', '\x00E4') -- LATIN SMALL LETTER A WITH DIAERESIS
, ('\x8B', '\x00E3') -- LATIN SMALL LETTER A WITH TILDE
, ('\x8C', '\x00E5') -- LATIN SMALL LETTER A WITH RING ABOVE
, ('\x8D', '\x00E7') -- LATIN SMALL LETTER C WITH CEDILLA
, ('\x8E', '\x00E9') -- LATIN SMALL LETTER E WITH ACUTE
, ('\x8F', '\x00E8') -- LATIN SMALL LETTER E WITH GRAVE
, ('\x90', '\x00EA') -- LATIN SMALL LETTER E WITH CIRCUMFLEX
, ('\x91', '\x00EB') -- LATIN SMALL LETTER E WITH DIAERESIS
, ('\x92', '\x00ED') -- LATIN SMALL LETTER I WITH ACUTE
, ('\x93', '\x00EC') -- LATIN SMALL LETTER I WITH GRAVE
, ('\x94', '\x00EE') -- LATIN SMALL LETTER I WITH CIRCUMFLEX
, ('\x95', '\x00EF') -- LATIN SMALL LETTER I WITH DIAERESIS
, ('\x96', '\x00F1') -- LATIN SMALL LETTER N WITH TILDE
, ('\x97', '\x00F3') -- LATIN SMALL LETTER O WITH ACUTE
, ('\x98', '\x00F2') -- LATIN SMALL LETTER O WITH GRAVE
, ('\x99', '\x00F4') -- LATIN SMALL LETTER O WITH CIRCUMFLEX
, ('\x9A', '\x00F6') -- LATIN SMALL LETTER O WITH DIAERESIS
, ('\x9B', '\x00F5') -- LATIN SMALL LETTER O WITH TILDE
, ('\x9C', '\x00FA') -- LATIN SMALL LETTER U WITH ACUTE
, ('\x9D', '\x00F9') -- LATIN SMALL LETTER U WITH GRAVE
, ('\x9E', '\x00FB') -- LATIN SMALL LETTER U WITH CIRCUMFLEX
, ('\x9F', '\x00FC') -- LATIN SMALL LETTER U WITH DIAERESIS
, ('\xA0', '\x2020') -- DAGGER
, ('\xA1', '\x00B0') -- DEGREE SIGN
, ('\xA2', '\x00A2') -- CENT SIGN
, ('\xA3', '\x00A3') -- POUND SIGN
, ('\xA4', '\x00A7') -- SECTION SIGN
, ('\xA5', '\x2022') -- BULLET
, ('\xA6', '\x00B6') -- PILCROW SIGN
, ('\xA7', '\x00DF') -- LATIN SMALL LETTER SHARP S
, ('\xA8', '\x00AE') -- REGISTERED SIGN
, ('\xA9', '\x00A9') -- COPYRIGHT SIGN
, ('\xAA', '\x2122') -- TRADE MARK SIGN
, ('\xAB', '\x00B4') -- ACUTE ACCENT
, ('\xAC', '\x00A8') -- DIAERESIS
, ('\xAD', '\x2260') -- NOT EQUAL TO
, ('\xAE', '\x00C6') -- LATIN CAPITAL LETTER AE
, ('\xAF', '\x00D8') -- LATIN CAPITAL LETTER O WITH STROKE
, ('\xB0', '\x221E') -- INFINITY
, ('\xB1', '\x00B1') -- PLUS-MINUS SIGN
, ('\xB2', '\x2264') -- LESS-THAN OR EQUAL TO
, ('\xB3', '\x2265') -- GREATER-THAN OR EQUAL TO
, ('\xB4', '\x00A5') -- YEN SIGN
, ('\xB5', '\x00B5') -- MICRO SIGN
, ('\xB6', '\x2202') -- PARTIAL DIFFERENTIAL
, ('\xB7', '\x2211') -- N-ARY SUMMATION
, ('\xB8', '\x220F') -- N-ARY PRODUCT
, ('\xB9', '\x03C0') -- GREEK SMALL LETTER PI
, ('\xBA', '\x222B') -- INTEGRAL
, ('\xBB', '\x00AA') -- FEMININE ORDINAL INDICATOR
, ('\xBC', '\x00BA') -- MASCULINE ORDINAL INDICATOR
, ('\xBD', '\x03A9') -- GREEK CAPITAL LETTER OMEGA
, ('\xBE', '\x00E6') -- LATIN SMALL LETTER AE
, ('\xBF', '\x00F8') -- LATIN SMALL LETTER O WITH STROKE
, ('\xC0', '\x00BF') -- INVERTED QUESTION MARK
, ('\xC1', '\x00A1') -- INVERTED EXCLAMATION MARK
, ('\xC2', '\x00AC') -- NOT SIGN
, ('\xC3', '\x221A') -- SQUARE ROOT
, ('\xC4', '\x0192') -- LATIN SMALL LETTER F WITH HOOK
, ('\xC5', '\x2248') -- ALMOST EQUAL TO
, ('\xC6', '\x2206') -- INCREMENT
, ('\xC7', '\x00AB') -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
, ('\xC8', '\x00BB') -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
, ('\xC9', '\x2026') -- HORIZONTAL ELLIPSIS
, ('\xCA', '\x00A0') -- NO-BREAK SPACE
, ('\xCB', '\x00C0') -- LATIN CAPITAL LETTER A WITH GRAVE
, ('\xCC', '\x00C3') -- LATIN CAPITAL LETTER A WITH TILDE
, ('\xCD', '\x00D5') -- LATIN CAPITAL LETTER O WITH TILDE
, ('\xCE', '\x0152') -- LATIN CAPITAL LIGATURE OE
, ('\xCF', '\x0153') -- LATIN SMALL LIGATURE OE
, ('\xD0', '\x2013') -- EN DASH
, ('\xD1', '\x2014') -- EM DASH
, ('\xD2', '\x201C') -- LEFT DOUBLE QUOTATION MARK
, ('\xD3', '\x201D') -- RIGHT DOUBLE QUOTATION MARK
, ('\xD4', '\x2018') -- LEFT SINGLE QUOTATION MARK
, ('\xD5', '\x2019') -- RIGHT SINGLE QUOTATION MARK
, ('\xD6', '\x00F7') -- DIVISION SIGN
, ('\xD7', '\x25CA') -- LOZENGE
, ('\xD8', '\x00FF') -- LATIN SMALL LETTER Y WITH DIAERESIS
, ('\xD9', '\x0178') -- LATIN CAPITAL LETTER Y WITH DIAERESIS
, ('\xDA', '\x2044') -- FRACTION SLASH
, ('\xDB', '\x20AC') -- EURO SIGN
, ('\xDC', '\x2039') -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
, ('\xDD', '\x203A') -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
, ('\xDE', '\xFB01') -- LATIN SMALL LIGATURE FI
, ('\xDF', '\xFB02') -- LATIN SMALL LIGATURE FL
, ('\xE0', '\x2021') -- DOUBLE DAGGER
, ('\xE1', '\x00B7') -- MIDDLE DOT
, ('\xE2', '\x201A') -- SINGLE LOW-9 QUOTATION MARK
, ('\xE3', '\x201E') -- DOUBLE LOW-9 QUOTATION MARK
, ('\xE4', '\x2030') -- PER MILLE SIGN
, ('\xE5', '\x00C2') -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
, ('\xE6', '\x00CA') -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
, ('\xE7', '\x00C1') -- LATIN CAPITAL LETTER A WITH ACUTE
, ('\xE8', '\x00CB') -- LATIN CAPITAL LETTER E WITH DIAERESIS
, ('\xE9', '\x00C8') -- LATIN CAPITAL LETTER E WITH GRAVE
, ('\xEA', '\x00CD') -- LATIN CAPITAL LETTER I WITH ACUTE
, ('\xEB', '\x00CE') -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
, ('\xEC', '\x00CF') -- LATIN CAPITAL LETTER I WITH DIAERESIS
, ('\xED', '\x00CC') -- LATIN CAPITAL LETTER I WITH GRAVE
, ('\xEE', '\x00D3') -- LATIN CAPITAL LETTER O WITH ACUTE
, ('\xEF', '\x00D4') -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
, ('\xF0', '\xF8FF') -- Apple logo
, ('\xF1', '\x00D2') -- LATIN CAPITAL LETTER O WITH GRAVE
, ('\xF2', '\x00DA') -- LATIN CAPITAL LETTER U WITH ACUTE
, ('\xF3', '\x00DB') -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
, ('\xF4', '\x00D9') -- LATIN CAPITAL LETTER U WITH GRAVE
, ('\xF5', '\x0131') -- LATIN SMALL LETTER DOTLESS I
, ('\xF6', '\x02C6') -- MODIFIER LETTER CIRCUMFLEX ACCENT
, ('\xF7', '\x02DC') -- SMALL TILDE
, ('\xF8', '\x00AF') -- MACRON
, ('\xF9', '\x02D8') -- BREVE
, ('\xFA', '\x02D9') -- DOT ABOVE
, ('\xFB', '\x02DA') -- RING ABOVE
, ('\xFC', '\x00B8') -- CEDILLA
, ('\xFD', '\x02DD') -- DOUBLE ACUTE ACCENT
, ('\xFE', '\x02DB') -- OGONEK
, ('\xFF', '\x02C7') -- CARON
]
where
generateMapers (tmpDecoder, tmpEncoder) (macChar, utf8Char) = (
Map.insert macChar utf8Char tmpDecoder
, Map.insert utf8Char macChar tmpEncoder
)

View file

@ -1,5 +1,7 @@
module PDF.Font (
Font
Decoder
, Encoder
, Font(..)
, FontSet
, emptyFont
) where
@ -9,8 +11,17 @@ import Data.Map (Map)
import Data.Text (Text)
import PDF.Object (Name)
type Font = ByteString -> Either String Text
type Decoder = ByteString -> Either String Text
type Encoder = Text -> Either String ByteString
data Font = Font {
decode :: Decoder
, encode :: Encoder
}
type FontSet = Map Name Font
emptyFont :: Font
emptyFont _ = Left "No fond loaded"
emptyFont = Font {
decode = \_ -> Left "No fond loaded"
, encode = \_ -> Left "No fond loaded"
}