Update Font type to cover both encoding and decoding — WIP for CMap, but complete though not tested yet for MacRoman encoding

This commit is contained in:
Tissevert 2020-03-06 19:19:53 +01:00
parent 6e245189fd
commit 1ec47c5d07
3 changed files with 181 additions and 141 deletions

View file

@ -25,7 +25,7 @@ import qualified Data.Map as Map (
) )
import Data.Text (Text) import Data.Text (Text)
import qualified PDF.EOL as EOL (charset, parser) import qualified PDF.EOL as EOL (charset, parser)
import PDF.Font (Font) import PDF.Font (Decoder, Encoder, Font(..))
import PDF.Object ( import PDF.Object (
DirectObject(..), Name, StringObject(..) DirectObject(..), Name, StringObject(..)
, blank, directObject, integer, line, stringObject , blank, directObject, integer, line, stringObject
@ -34,7 +34,10 @@ import PDF.Parser (MonadParser, Parser, runParser, takeAll)
import Prelude hiding (fail) import Prelude hiding (fail)
type CMappers = Map Name CMap type CMappers = Map Name CMap
type Mapping = Map ByteString Text data Mapping = Mapping {
bsToT :: Map ByteString Text
, tToBS :: Map Text ByteString
}
data CRange = CRange { data CRange = CRange {
fromSequence :: ByteString fromSequence :: ByteString
, toSequence :: ByteString , toSequence :: ByteString
@ -44,11 +47,14 @@ type RangeSize = Int
type CMap = Map RangeSize [CRange] type CMap = Map RangeSize [CRange]
toFont :: CMap -> Font toFont :: CMap -> Font
toFont aCMap input toFont aCMap = Font {decode = decoder aCMap, encode = encoder aCMap}
decoder :: CMap -> Decoder
decoder aCMap input
| BS.null input = Right "" | BS.null input = Right ""
| otherwise = do | otherwise = do
(output, remainingInput) <- trySizes input $ Map.toList aCMap (output, remainingInput) <- trySizes input $ Map.toList aCMap
mappend output <$> toFont aCMap remainingInput mappend output <$> decoder aCMap remainingInput
where where
trySizes s [] = Left $ "No matching code found in font for " ++ unpack s trySizes s [] = Left $ "No matching code found in font for " ++ unpack s
trySizes s ((size, cRanges):others) = trySizes s ((size, cRanges):others) =
@ -63,6 +69,9 @@ toFont aCMap input
Nothing -> tryRanges prefix cRanges Nothing -> tryRanges prefix cRanges
outputSequence -> outputSequence outputSequence -> outputSequence
encoder :: CMap -> Encoder
encoder = undefined
emptyCMap :: CMap emptyCMap :: CMap
emptyCMap = Map.empty emptyCMap = Map.empty

View file

@ -2,140 +2,160 @@ module PDF.Encoding.MacRoman (
macRomanEncoding macRomanEncoding
) where ) where
import Data.ByteString.Char8 (unpack) import qualified Data.ByteString.Char8 as BS (pack, unpack)
import Data.Text (pack) import Data.Map (Map)
import PDF.Font (Font) import qualified Data.Map as Map (empty, insert, lookup)
import qualified Data.Text as Text (pack, unpack)
import PDF.Font (Font(..))
type Mapper = Map Char Char
macRomanEncoding :: Font macRomanEncoding :: Font
macRomanEncoding = Right . pack . fmap decode . unpack macRomanEncoding = Font {
decode = Right . Text.pack . fmap decodeChar . BS.unpack
, encode = fmap BS.pack . mapM encodeChar . Text.unpack
}
where
decodeChar k = maybe k id $ Map.lookup k (fst mappers)
encodeChar k =
case Map.lookup k (snd mappers) of
Just v -> Right v
Nothing
| k < '\x80' -> Right k
| otherwise -> Left ("Character '" ++ k :"' unavailable in MacRoman")
decode :: Char -> Char mappers :: (Mapper, Mapper)
decode '\x80' = '\x00C4' -- LATIN CAPITAL LETTER A WITH DIAERESIS mappers = foldl generateMapers (Map.empty, Map.empty) [
decode '\x81' = '\x00C5' -- LATIN CAPITAL LETTER A WITH RING ABOVE ('\x80', '\x00C4') -- LATIN CAPITAL LETTER A WITH DIAERESIS
decode '\x82' = '\x00C7' -- LATIN CAPITAL LETTER C WITH CEDILLA , ('\x81', '\x00C5') -- LATIN CAPITAL LETTER A WITH RING ABOVE
decode '\x83' = '\x00C9' -- LATIN CAPITAL LETTER E WITH ACUTE , ('\x82', '\x00C7') -- LATIN CAPITAL LETTER C WITH CEDILLA
decode '\x84' = '\x00D1' -- LATIN CAPITAL LETTER N WITH TILDE , ('\x83', '\x00C9') -- LATIN CAPITAL LETTER E WITH ACUTE
decode '\x85' = '\x00D6' -- LATIN CAPITAL LETTER O WITH DIAERESIS , ('\x84', '\x00D1') -- LATIN CAPITAL LETTER N WITH TILDE
decode '\x86' = '\x00DC' -- LATIN CAPITAL LETTER U WITH DIAERESIS , ('\x85', '\x00D6') -- LATIN CAPITAL LETTER O WITH DIAERESIS
decode '\x87' = '\x00E1' -- LATIN SMALL LETTER A WITH ACUTE , ('\x86', '\x00DC') -- LATIN CAPITAL LETTER U WITH DIAERESIS
decode '\x88' = '\x00E0' -- LATIN SMALL LETTER A WITH GRAVE , ('\x87', '\x00E1') -- LATIN SMALL LETTER A WITH ACUTE
decode '\x89' = '\x00E2' -- LATIN SMALL LETTER A WITH CIRCUMFLEX , ('\x88', '\x00E0') -- LATIN SMALL LETTER A WITH GRAVE
decode '\x8A' = '\x00E4' -- LATIN SMALL LETTER A WITH DIAERESIS , ('\x89', '\x00E2') -- LATIN SMALL LETTER A WITH CIRCUMFLEX
decode '\x8B' = '\x00E3' -- LATIN SMALL LETTER A WITH TILDE , ('\x8A', '\x00E4') -- LATIN SMALL LETTER A WITH DIAERESIS
decode '\x8C' = '\x00E5' -- LATIN SMALL LETTER A WITH RING ABOVE , ('\x8B', '\x00E3') -- LATIN SMALL LETTER A WITH TILDE
decode '\x8D' = '\x00E7' -- LATIN SMALL LETTER C WITH CEDILLA , ('\x8C', '\x00E5') -- LATIN SMALL LETTER A WITH RING ABOVE
decode '\x8E' = '\x00E9' -- LATIN SMALL LETTER E WITH ACUTE , ('\x8D', '\x00E7') -- LATIN SMALL LETTER C WITH CEDILLA
decode '\x8F' = '\x00E8' -- LATIN SMALL LETTER E WITH GRAVE , ('\x8E', '\x00E9') -- LATIN SMALL LETTER E WITH ACUTE
decode '\x90' = '\x00EA' -- LATIN SMALL LETTER E WITH CIRCUMFLEX , ('\x8F', '\x00E8') -- LATIN SMALL LETTER E WITH GRAVE
decode '\x91' = '\x00EB' -- LATIN SMALL LETTER E WITH DIAERESIS , ('\x90', '\x00EA') -- LATIN SMALL LETTER E WITH CIRCUMFLEX
decode '\x92' = '\x00ED' -- LATIN SMALL LETTER I WITH ACUTE , ('\x91', '\x00EB') -- LATIN SMALL LETTER E WITH DIAERESIS
decode '\x93' = '\x00EC' -- LATIN SMALL LETTER I WITH GRAVE , ('\x92', '\x00ED') -- LATIN SMALL LETTER I WITH ACUTE
decode '\x94' = '\x00EE' -- LATIN SMALL LETTER I WITH CIRCUMFLEX , ('\x93', '\x00EC') -- LATIN SMALL LETTER I WITH GRAVE
decode '\x95' = '\x00EF' -- LATIN SMALL LETTER I WITH DIAERESIS , ('\x94', '\x00EE') -- LATIN SMALL LETTER I WITH CIRCUMFLEX
decode '\x96' = '\x00F1' -- LATIN SMALL LETTER N WITH TILDE , ('\x95', '\x00EF') -- LATIN SMALL LETTER I WITH DIAERESIS
decode '\x97' = '\x00F3' -- LATIN SMALL LETTER O WITH ACUTE , ('\x96', '\x00F1') -- LATIN SMALL LETTER N WITH TILDE
decode '\x98' = '\x00F2' -- LATIN SMALL LETTER O WITH GRAVE , ('\x97', '\x00F3') -- LATIN SMALL LETTER O WITH ACUTE
decode '\x99' = '\x00F4' -- LATIN SMALL LETTER O WITH CIRCUMFLEX , ('\x98', '\x00F2') -- LATIN SMALL LETTER O WITH GRAVE
decode '\x9A' = '\x00F6' -- LATIN SMALL LETTER O WITH DIAERESIS , ('\x99', '\x00F4') -- LATIN SMALL LETTER O WITH CIRCUMFLEX
decode '\x9B' = '\x00F5' -- LATIN SMALL LETTER O WITH TILDE , ('\x9A', '\x00F6') -- LATIN SMALL LETTER O WITH DIAERESIS
decode '\x9C' = '\x00FA' -- LATIN SMALL LETTER U WITH ACUTE , ('\x9B', '\x00F5') -- LATIN SMALL LETTER O WITH TILDE
decode '\x9D' = '\x00F9' -- LATIN SMALL LETTER U WITH GRAVE , ('\x9C', '\x00FA') -- LATIN SMALL LETTER U WITH ACUTE
decode '\x9E' = '\x00FB' -- LATIN SMALL LETTER U WITH CIRCUMFLEX , ('\x9D', '\x00F9') -- LATIN SMALL LETTER U WITH GRAVE
decode '\x9F' = '\x00FC' -- LATIN SMALL LETTER U WITH DIAERESIS , ('\x9E', '\x00FB') -- LATIN SMALL LETTER U WITH CIRCUMFLEX
decode '\xA0' = '\x2020' -- DAGGER , ('\x9F', '\x00FC') -- LATIN SMALL LETTER U WITH DIAERESIS
decode '\xA1' = '\x00B0' -- DEGREE SIGN , ('\xA0', '\x2020') -- DAGGER
decode '\xA2' = '\x00A2' -- CENT SIGN , ('\xA1', '\x00B0') -- DEGREE SIGN
decode '\xA3' = '\x00A3' -- POUND SIGN , ('\xA2', '\x00A2') -- CENT SIGN
decode '\xA4' = '\x00A7' -- SECTION SIGN , ('\xA3', '\x00A3') -- POUND SIGN
decode '\xA5' = '\x2022' -- BULLET , ('\xA4', '\x00A7') -- SECTION SIGN
decode '\xA6' = '\x00B6' -- PILCROW SIGN , ('\xA5', '\x2022') -- BULLET
decode '\xA7' = '\x00DF' -- LATIN SMALL LETTER SHARP S , ('\xA6', '\x00B6') -- PILCROW SIGN
decode '\xA8' = '\x00AE' -- REGISTERED SIGN , ('\xA7', '\x00DF') -- LATIN SMALL LETTER SHARP S
decode '\xA9' = '\x00A9' -- COPYRIGHT SIGN , ('\xA8', '\x00AE') -- REGISTERED SIGN
decode '\xAA' = '\x2122' -- TRADE MARK SIGN , ('\xA9', '\x00A9') -- COPYRIGHT SIGN
decode '\xAB' = '\x00B4' -- ACUTE ACCENT , ('\xAA', '\x2122') -- TRADE MARK SIGN
decode '\xAC' = '\x00A8' -- DIAERESIS , ('\xAB', '\x00B4') -- ACUTE ACCENT
decode '\xAD' = '\x2260' -- NOT EQUAL TO , ('\xAC', '\x00A8') -- DIAERESIS
decode '\xAE' = '\x00C6' -- LATIN CAPITAL LETTER AE , ('\xAD', '\x2260') -- NOT EQUAL TO
decode '\xAF' = '\x00D8' -- LATIN CAPITAL LETTER O WITH STROKE , ('\xAE', '\x00C6') -- LATIN CAPITAL LETTER AE
decode '\xB0' = '\x221E' -- INFINITY , ('\xAF', '\x00D8') -- LATIN CAPITAL LETTER O WITH STROKE
decode '\xB1' = '\x00B1' -- PLUS-MINUS SIGN , ('\xB0', '\x221E') -- INFINITY
decode '\xB2' = '\x2264' -- LESS-THAN OR EQUAL TO , ('\xB1', '\x00B1') -- PLUS-MINUS SIGN
decode '\xB3' = '\x2265' -- GREATER-THAN OR EQUAL TO , ('\xB2', '\x2264') -- LESS-THAN OR EQUAL TO
decode '\xB4' = '\x00A5' -- YEN SIGN , ('\xB3', '\x2265') -- GREATER-THAN OR EQUAL TO
decode '\xB5' = '\x00B5' -- MICRO SIGN , ('\xB4', '\x00A5') -- YEN SIGN
decode '\xB6' = '\x2202' -- PARTIAL DIFFERENTIAL , ('\xB5', '\x00B5') -- MICRO SIGN
decode '\xB7' = '\x2211' -- N-ARY SUMMATION , ('\xB6', '\x2202') -- PARTIAL DIFFERENTIAL
decode '\xB8' = '\x220F' -- N-ARY PRODUCT , ('\xB7', '\x2211') -- N-ARY SUMMATION
decode '\xB9' = '\x03C0' -- GREEK SMALL LETTER PI , ('\xB8', '\x220F') -- N-ARY PRODUCT
decode '\xBA' = '\x222B' -- INTEGRAL , ('\xB9', '\x03C0') -- GREEK SMALL LETTER PI
decode '\xBB' = '\x00AA' -- FEMININE ORDINAL INDICATOR , ('\xBA', '\x222B') -- INTEGRAL
decode '\xBC' = '\x00BA' -- MASCULINE ORDINAL INDICATOR , ('\xBB', '\x00AA') -- FEMININE ORDINAL INDICATOR
decode '\xBD' = '\x03A9' -- GREEK CAPITAL LETTER OMEGA , ('\xBC', '\x00BA') -- MASCULINE ORDINAL INDICATOR
decode '\xBE' = '\x00E6' -- LATIN SMALL LETTER AE , ('\xBD', '\x03A9') -- GREEK CAPITAL LETTER OMEGA
decode '\xBF' = '\x00F8' -- LATIN SMALL LETTER O WITH STROKE , ('\xBE', '\x00E6') -- LATIN SMALL LETTER AE
decode '\xC0' = '\x00BF' -- INVERTED QUESTION MARK , ('\xBF', '\x00F8') -- LATIN SMALL LETTER O WITH STROKE
decode '\xC1' = '\x00A1' -- INVERTED EXCLAMATION MARK , ('\xC0', '\x00BF') -- INVERTED QUESTION MARK
decode '\xC2' = '\x00AC' -- NOT SIGN , ('\xC1', '\x00A1') -- INVERTED EXCLAMATION MARK
decode '\xC3' = '\x221A' -- SQUARE ROOT , ('\xC2', '\x00AC') -- NOT SIGN
decode '\xC4' = '\x0192' -- LATIN SMALL LETTER F WITH HOOK , ('\xC3', '\x221A') -- SQUARE ROOT
decode '\xC5' = '\x2248' -- ALMOST EQUAL TO , ('\xC4', '\x0192') -- LATIN SMALL LETTER F WITH HOOK
decode '\xC6' = '\x2206' -- INCREMENT , ('\xC5', '\x2248') -- ALMOST EQUAL TO
decode '\xC7' = '\x00AB' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK , ('\xC6', '\x2206') -- INCREMENT
decode '\xC8' = '\x00BB' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK , ('\xC7', '\x00AB') -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
decode '\xC9' = '\x2026' -- HORIZONTAL ELLIPSIS , ('\xC8', '\x00BB') -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
decode '\xCA' = '\x00A0' -- NO-BREAK SPACE , ('\xC9', '\x2026') -- HORIZONTAL ELLIPSIS
decode '\xCB' = '\x00C0' -- LATIN CAPITAL LETTER A WITH GRAVE , ('\xCA', '\x00A0') -- NO-BREAK SPACE
decode '\xCC' = '\x00C3' -- LATIN CAPITAL LETTER A WITH TILDE , ('\xCB', '\x00C0') -- LATIN CAPITAL LETTER A WITH GRAVE
decode '\xCD' = '\x00D5' -- LATIN CAPITAL LETTER O WITH TILDE , ('\xCC', '\x00C3') -- LATIN CAPITAL LETTER A WITH TILDE
decode '\xCE' = '\x0152' -- LATIN CAPITAL LIGATURE OE , ('\xCD', '\x00D5') -- LATIN CAPITAL LETTER O WITH TILDE
decode '\xCF' = '\x0153' -- LATIN SMALL LIGATURE OE , ('\xCE', '\x0152') -- LATIN CAPITAL LIGATURE OE
decode '\xD0' = '\x2013' -- EN DASH , ('\xCF', '\x0153') -- LATIN SMALL LIGATURE OE
decode '\xD1' = '\x2014' -- EM DASH , ('\xD0', '\x2013') -- EN DASH
decode '\xD2' = '\x201C' -- LEFT DOUBLE QUOTATION MARK , ('\xD1', '\x2014') -- EM DASH
decode '\xD3' = '\x201D' -- RIGHT DOUBLE QUOTATION MARK , ('\xD2', '\x201C') -- LEFT DOUBLE QUOTATION MARK
decode '\xD4' = '\x2018' -- LEFT SINGLE QUOTATION MARK , ('\xD3', '\x201D') -- RIGHT DOUBLE QUOTATION MARK
decode '\xD5' = '\x2019' -- RIGHT SINGLE QUOTATION MARK , ('\xD4', '\x2018') -- LEFT SINGLE QUOTATION MARK
decode '\xD6' = '\x00F7' -- DIVISION SIGN , ('\xD5', '\x2019') -- RIGHT SINGLE QUOTATION MARK
decode '\xD7' = '\x25CA' -- LOZENGE , ('\xD6', '\x00F7') -- DIVISION SIGN
decode '\xD8' = '\x00FF' -- LATIN SMALL LETTER Y WITH DIAERESIS , ('\xD7', '\x25CA') -- LOZENGE
decode '\xD9' = '\x0178' -- LATIN CAPITAL LETTER Y WITH DIAERESIS , ('\xD8', '\x00FF') -- LATIN SMALL LETTER Y WITH DIAERESIS
decode '\xDA' = '\x2044' -- FRACTION SLASH , ('\xD9', '\x0178') -- LATIN CAPITAL LETTER Y WITH DIAERESIS
decode '\xDB' = '\x20AC' -- EURO SIGN , ('\xDA', '\x2044') -- FRACTION SLASH
decode '\xDC' = '\x2039' -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK , ('\xDB', '\x20AC') -- EURO SIGN
decode '\xDD' = '\x203A' -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK , ('\xDC', '\x2039') -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
decode '\xDE' = '\xFB01' -- LATIN SMALL LIGATURE FI , ('\xDD', '\x203A') -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
decode '\xDF' = '\xFB02' -- LATIN SMALL LIGATURE FL , ('\xDE', '\xFB01') -- LATIN SMALL LIGATURE FI
decode '\xE0' = '\x2021' -- DOUBLE DAGGER , ('\xDF', '\xFB02') -- LATIN SMALL LIGATURE FL
decode '\xE1' = '\x00B7' -- MIDDLE DOT , ('\xE0', '\x2021') -- DOUBLE DAGGER
decode '\xE2' = '\x201A' -- SINGLE LOW-9 QUOTATION MARK , ('\xE1', '\x00B7') -- MIDDLE DOT
decode '\xE3' = '\x201E' -- DOUBLE LOW-9 QUOTATION MARK , ('\xE2', '\x201A') -- SINGLE LOW-9 QUOTATION MARK
decode '\xE4' = '\x2030' -- PER MILLE SIGN , ('\xE3', '\x201E') -- DOUBLE LOW-9 QUOTATION MARK
decode '\xE5' = '\x00C2' -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX , ('\xE4', '\x2030') -- PER MILLE SIGN
decode '\xE6' = '\x00CA' -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX , ('\xE5', '\x00C2') -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
decode '\xE7' = '\x00C1' -- LATIN CAPITAL LETTER A WITH ACUTE , ('\xE6', '\x00CA') -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
decode '\xE8' = '\x00CB' -- LATIN CAPITAL LETTER E WITH DIAERESIS , ('\xE7', '\x00C1') -- LATIN CAPITAL LETTER A WITH ACUTE
decode '\xE9' = '\x00C8' -- LATIN CAPITAL LETTER E WITH GRAVE , ('\xE8', '\x00CB') -- LATIN CAPITAL LETTER E WITH DIAERESIS
decode '\xEA' = '\x00CD' -- LATIN CAPITAL LETTER I WITH ACUTE , ('\xE9', '\x00C8') -- LATIN CAPITAL LETTER E WITH GRAVE
decode '\xEB' = '\x00CE' -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX , ('\xEA', '\x00CD') -- LATIN CAPITAL LETTER I WITH ACUTE
decode '\xEC' = '\x00CF' -- LATIN CAPITAL LETTER I WITH DIAERESIS , ('\xEB', '\x00CE') -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
decode '\xED' = '\x00CC' -- LATIN CAPITAL LETTER I WITH GRAVE , ('\xEC', '\x00CF') -- LATIN CAPITAL LETTER I WITH DIAERESIS
decode '\xEE' = '\x00D3' -- LATIN CAPITAL LETTER O WITH ACUTE , ('\xED', '\x00CC') -- LATIN CAPITAL LETTER I WITH GRAVE
decode '\xEF' = '\x00D4' -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX , ('\xEE', '\x00D3') -- LATIN CAPITAL LETTER O WITH ACUTE
decode '\xF0' = '\xF8FF' -- Apple logo , ('\xEF', '\x00D4') -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
decode '\xF1' = '\x00D2' -- LATIN CAPITAL LETTER O WITH GRAVE , ('\xF0', '\xF8FF') -- Apple logo
decode '\xF2' = '\x00DA' -- LATIN CAPITAL LETTER U WITH ACUTE , ('\xF1', '\x00D2') -- LATIN CAPITAL LETTER O WITH GRAVE
decode '\xF3' = '\x00DB' -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX , ('\xF2', '\x00DA') -- LATIN CAPITAL LETTER U WITH ACUTE
decode '\xF4' = '\x00D9' -- LATIN CAPITAL LETTER U WITH GRAVE , ('\xF3', '\x00DB') -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
decode '\xF5' = '\x0131' -- LATIN SMALL LETTER DOTLESS I , ('\xF4', '\x00D9') -- LATIN CAPITAL LETTER U WITH GRAVE
decode '\xF6' = '\x02C6' -- MODIFIER LETTER CIRCUMFLEX ACCENT , ('\xF5', '\x0131') -- LATIN SMALL LETTER DOTLESS I
decode '\xF7' = '\x02DC' -- SMALL TILDE , ('\xF6', '\x02C6') -- MODIFIER LETTER CIRCUMFLEX ACCENT
decode '\xF8' = '\x00AF' -- MACRON , ('\xF7', '\x02DC') -- SMALL TILDE
decode '\xF9' = '\x02D8' -- BREVE , ('\xF8', '\x00AF') -- MACRON
decode '\xFA' = '\x02D9' -- DOT ABOVE , ('\xF9', '\x02D8') -- BREVE
decode '\xFB' = '\x02DA' -- RING ABOVE , ('\xFA', '\x02D9') -- DOT ABOVE
decode '\xFC' = '\x00B8' -- CEDILLA , ('\xFB', '\x02DA') -- RING ABOVE
decode '\xFD' = '\x02DD' -- DOUBLE ACUTE ACCENT , ('\xFC', '\x00B8') -- CEDILLA
decode '\xFE' = '\x02DB' -- OGONEK , ('\xFD', '\x02DD') -- DOUBLE ACUTE ACCENT
decode '\xFF' = '\x02C7' -- CARON , ('\xFE', '\x02DB') -- OGONEK
decode c = c -- The rest is ASCII , ('\xFF', '\x02C7') -- CARON
]
where
generateMapers (tmpDecoder, tmpEncoder) (macChar, utf8Char) = (
Map.insert macChar utf8Char tmpDecoder
, Map.insert utf8Char macChar tmpEncoder
)

View file

@ -1,5 +1,7 @@
module PDF.Font ( module PDF.Font (
Font Decoder
, Encoder
, Font(..)
, FontSet , FontSet
, emptyFont , emptyFont
) where ) where
@ -9,8 +11,17 @@ import Data.Map (Map)
import Data.Text (Text) import Data.Text (Text)
import PDF.Object (Name) import PDF.Object (Name)
type Font = ByteString -> Either String Text type Decoder = ByteString -> Either String Text
type Encoder = Text -> Either String ByteString
data Font = Font {
decode :: Decoder
, encode :: Encoder
}
type FontSet = Map Name Font type FontSet = Map Name Font
emptyFont :: Font emptyFont :: Font
emptyFont _ = Left "No fond loaded" emptyFont = Font {
decode = \_ -> Left "No fond loaded"
, encode = \_ -> Left "No fond loaded"
}