From 1ec47c5d07dec6c468f13ec45eb2d956312a83f4 Mon Sep 17 00:00:00 2001 From: Tissevert Date: Fri, 6 Mar 2020 19:19:53 +0100 Subject: [PATCH] =?UTF-8?q?Update=20Font=20type=20to=20cover=20both=20enco?= =?UTF-8?q?ding=20and=20decoding=20=E2=80=94=20WIP=20for=20CMap,=20but=20c?= =?UTF-8?q?omplete=20though=20not=20tested=20yet=20for=20MacRoman=20encodi?= =?UTF-8?q?ng?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/PDF/CMap.hs | 17 ++- src/PDF/Encoding/MacRoman.hs | 288 +++++++++++++++++++---------------- src/PDF/Font.hs | 17 ++- 3 files changed, 181 insertions(+), 141 deletions(-) diff --git a/src/PDF/CMap.hs b/src/PDF/CMap.hs index a194113..93a2ce2 100644 --- a/src/PDF/CMap.hs +++ b/src/PDF/CMap.hs @@ -25,7 +25,7 @@ import qualified Data.Map as Map ( ) import Data.Text (Text) import qualified PDF.EOL as EOL (charset, parser) -import PDF.Font (Font) +import PDF.Font (Decoder, Encoder, Font(..)) import PDF.Object ( DirectObject(..), Name, StringObject(..) , blank, directObject, integer, line, stringObject @@ -34,7 +34,10 @@ import PDF.Parser (MonadParser, Parser, runParser, takeAll) import Prelude hiding (fail) type CMappers = Map Name CMap -type Mapping = Map ByteString Text +data Mapping = Mapping { + bsToT :: Map ByteString Text + , tToBS :: Map Text ByteString + } data CRange = CRange { fromSequence :: ByteString , toSequence :: ByteString @@ -44,11 +47,14 @@ type RangeSize = Int type CMap = Map RangeSize [CRange] toFont :: CMap -> Font -toFont aCMap input +toFont aCMap = Font {decode = decoder aCMap, encode = encoder aCMap} + +decoder :: CMap -> Decoder +decoder aCMap input | BS.null input = Right "" | otherwise = do (output, remainingInput) <- trySizes input $ Map.toList aCMap - mappend output <$> toFont aCMap remainingInput + mappend output <$> decoder aCMap remainingInput where trySizes s [] = Left $ "No matching code found in font for " ++ unpack s trySizes s ((size, cRanges):others) = @@ -63,6 +69,9 @@ toFont aCMap input Nothing -> tryRanges prefix cRanges outputSequence -> outputSequence +encoder :: CMap -> Encoder +encoder = undefined + emptyCMap :: CMap emptyCMap = Map.empty diff --git a/src/PDF/Encoding/MacRoman.hs b/src/PDF/Encoding/MacRoman.hs index 445654f..71da279 100644 --- a/src/PDF/Encoding/MacRoman.hs +++ b/src/PDF/Encoding/MacRoman.hs @@ -2,140 +2,160 @@ module PDF.Encoding.MacRoman ( macRomanEncoding ) where -import Data.ByteString.Char8 (unpack) -import Data.Text (pack) -import PDF.Font (Font) +import qualified Data.ByteString.Char8 as BS (pack, unpack) +import Data.Map (Map) +import qualified Data.Map as Map (empty, insert, lookup) +import qualified Data.Text as Text (pack, unpack) +import PDF.Font (Font(..)) +type Mapper = Map Char Char macRomanEncoding :: Font -macRomanEncoding = Right . pack . fmap decode . unpack +macRomanEncoding = Font { + decode = Right . Text.pack . fmap decodeChar . BS.unpack + , encode = fmap BS.pack . mapM encodeChar . Text.unpack + } + where + decodeChar k = maybe k id $ Map.lookup k (fst mappers) + encodeChar k = + case Map.lookup k (snd mappers) of + Just v -> Right v + Nothing + | k < '\x80' -> Right k + | otherwise -> Left ("Character '" ++ k :"' unavailable in MacRoman") -decode :: Char -> Char -decode '\x80' = '\x00C4' -- LATIN CAPITAL LETTER A WITH DIAERESIS -decode '\x81' = '\x00C5' -- LATIN CAPITAL LETTER A WITH RING ABOVE -decode '\x82' = '\x00C7' -- LATIN CAPITAL LETTER C WITH CEDILLA -decode '\x83' = '\x00C9' -- LATIN CAPITAL LETTER E WITH ACUTE -decode '\x84' = '\x00D1' -- LATIN CAPITAL LETTER N WITH TILDE -decode '\x85' = '\x00D6' -- LATIN CAPITAL LETTER O WITH DIAERESIS -decode '\x86' = '\x00DC' -- LATIN CAPITAL LETTER U WITH DIAERESIS -decode '\x87' = '\x00E1' -- LATIN SMALL LETTER A WITH ACUTE -decode '\x88' = '\x00E0' -- LATIN SMALL LETTER A WITH GRAVE -decode '\x89' = '\x00E2' -- LATIN SMALL LETTER A WITH CIRCUMFLEX -decode '\x8A' = '\x00E4' -- LATIN SMALL LETTER A WITH DIAERESIS -decode '\x8B' = '\x00E3' -- LATIN SMALL LETTER A WITH TILDE -decode '\x8C' = '\x00E5' -- LATIN SMALL LETTER A WITH RING ABOVE -decode '\x8D' = '\x00E7' -- LATIN SMALL LETTER C WITH CEDILLA -decode '\x8E' = '\x00E9' -- LATIN SMALL LETTER E WITH ACUTE -decode '\x8F' = '\x00E8' -- LATIN SMALL LETTER E WITH GRAVE -decode '\x90' = '\x00EA' -- LATIN SMALL LETTER E WITH CIRCUMFLEX -decode '\x91' = '\x00EB' -- LATIN SMALL LETTER E WITH DIAERESIS -decode '\x92' = '\x00ED' -- LATIN SMALL LETTER I WITH ACUTE -decode '\x93' = '\x00EC' -- LATIN SMALL LETTER I WITH GRAVE -decode '\x94' = '\x00EE' -- LATIN SMALL LETTER I WITH CIRCUMFLEX -decode '\x95' = '\x00EF' -- LATIN SMALL LETTER I WITH DIAERESIS -decode '\x96' = '\x00F1' -- LATIN SMALL LETTER N WITH TILDE -decode '\x97' = '\x00F3' -- LATIN SMALL LETTER O WITH ACUTE -decode '\x98' = '\x00F2' -- LATIN SMALL LETTER O WITH GRAVE -decode '\x99' = '\x00F4' -- LATIN SMALL LETTER O WITH CIRCUMFLEX -decode '\x9A' = '\x00F6' -- LATIN SMALL LETTER O WITH DIAERESIS -decode '\x9B' = '\x00F5' -- LATIN SMALL LETTER O WITH TILDE -decode '\x9C' = '\x00FA' -- LATIN SMALL LETTER U WITH ACUTE -decode '\x9D' = '\x00F9' -- LATIN SMALL LETTER U WITH GRAVE -decode '\x9E' = '\x00FB' -- LATIN SMALL LETTER U WITH CIRCUMFLEX -decode '\x9F' = '\x00FC' -- LATIN SMALL LETTER U WITH DIAERESIS -decode '\xA0' = '\x2020' -- DAGGER -decode '\xA1' = '\x00B0' -- DEGREE SIGN -decode '\xA2' = '\x00A2' -- CENT SIGN -decode '\xA3' = '\x00A3' -- POUND SIGN -decode '\xA4' = '\x00A7' -- SECTION SIGN -decode '\xA5' = '\x2022' -- BULLET -decode '\xA6' = '\x00B6' -- PILCROW SIGN -decode '\xA7' = '\x00DF' -- LATIN SMALL LETTER SHARP S -decode '\xA8' = '\x00AE' -- REGISTERED SIGN -decode '\xA9' = '\x00A9' -- COPYRIGHT SIGN -decode '\xAA' = '\x2122' -- TRADE MARK SIGN -decode '\xAB' = '\x00B4' -- ACUTE ACCENT -decode '\xAC' = '\x00A8' -- DIAERESIS -decode '\xAD' = '\x2260' -- NOT EQUAL TO -decode '\xAE' = '\x00C6' -- LATIN CAPITAL LETTER AE -decode '\xAF' = '\x00D8' -- LATIN CAPITAL LETTER O WITH STROKE -decode '\xB0' = '\x221E' -- INFINITY -decode '\xB1' = '\x00B1' -- PLUS-MINUS SIGN -decode '\xB2' = '\x2264' -- LESS-THAN OR EQUAL TO -decode '\xB3' = '\x2265' -- GREATER-THAN OR EQUAL TO -decode '\xB4' = '\x00A5' -- YEN SIGN -decode '\xB5' = '\x00B5' -- MICRO SIGN -decode '\xB6' = '\x2202' -- PARTIAL DIFFERENTIAL -decode '\xB7' = '\x2211' -- N-ARY SUMMATION -decode '\xB8' = '\x220F' -- N-ARY PRODUCT -decode '\xB9' = '\x03C0' -- GREEK SMALL LETTER PI -decode '\xBA' = '\x222B' -- INTEGRAL -decode '\xBB' = '\x00AA' -- FEMININE ORDINAL INDICATOR -decode '\xBC' = '\x00BA' -- MASCULINE ORDINAL INDICATOR -decode '\xBD' = '\x03A9' -- GREEK CAPITAL LETTER OMEGA -decode '\xBE' = '\x00E6' -- LATIN SMALL LETTER AE -decode '\xBF' = '\x00F8' -- LATIN SMALL LETTER O WITH STROKE -decode '\xC0' = '\x00BF' -- INVERTED QUESTION MARK -decode '\xC1' = '\x00A1' -- INVERTED EXCLAMATION MARK -decode '\xC2' = '\x00AC' -- NOT SIGN -decode '\xC3' = '\x221A' -- SQUARE ROOT -decode '\xC4' = '\x0192' -- LATIN SMALL LETTER F WITH HOOK -decode '\xC5' = '\x2248' -- ALMOST EQUAL TO -decode '\xC6' = '\x2206' -- INCREMENT -decode '\xC7' = '\x00AB' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK -decode '\xC8' = '\x00BB' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -decode '\xC9' = '\x2026' -- HORIZONTAL ELLIPSIS -decode '\xCA' = '\x00A0' -- NO-BREAK SPACE -decode '\xCB' = '\x00C0' -- LATIN CAPITAL LETTER A WITH GRAVE -decode '\xCC' = '\x00C3' -- LATIN CAPITAL LETTER A WITH TILDE -decode '\xCD' = '\x00D5' -- LATIN CAPITAL LETTER O WITH TILDE -decode '\xCE' = '\x0152' -- LATIN CAPITAL LIGATURE OE -decode '\xCF' = '\x0153' -- LATIN SMALL LIGATURE OE -decode '\xD0' = '\x2013' -- EN DASH -decode '\xD1' = '\x2014' -- EM DASH -decode '\xD2' = '\x201C' -- LEFT DOUBLE QUOTATION MARK -decode '\xD3' = '\x201D' -- RIGHT DOUBLE QUOTATION MARK -decode '\xD4' = '\x2018' -- LEFT SINGLE QUOTATION MARK -decode '\xD5' = '\x2019' -- RIGHT SINGLE QUOTATION MARK -decode '\xD6' = '\x00F7' -- DIVISION SIGN -decode '\xD7' = '\x25CA' -- LOZENGE -decode '\xD8' = '\x00FF' -- LATIN SMALL LETTER Y WITH DIAERESIS -decode '\xD9' = '\x0178' -- LATIN CAPITAL LETTER Y WITH DIAERESIS -decode '\xDA' = '\x2044' -- FRACTION SLASH -decode '\xDB' = '\x20AC' -- EURO SIGN -decode '\xDC' = '\x2039' -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK -decode '\xDD' = '\x203A' -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK -decode '\xDE' = '\xFB01' -- LATIN SMALL LIGATURE FI -decode '\xDF' = '\xFB02' -- LATIN SMALL LIGATURE FL -decode '\xE0' = '\x2021' -- DOUBLE DAGGER -decode '\xE1' = '\x00B7' -- MIDDLE DOT -decode '\xE2' = '\x201A' -- SINGLE LOW-9 QUOTATION MARK -decode '\xE3' = '\x201E' -- DOUBLE LOW-9 QUOTATION MARK -decode '\xE4' = '\x2030' -- PER MILLE SIGN -decode '\xE5' = '\x00C2' -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX -decode '\xE6' = '\x00CA' -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX -decode '\xE7' = '\x00C1' -- LATIN CAPITAL LETTER A WITH ACUTE -decode '\xE8' = '\x00CB' -- LATIN CAPITAL LETTER E WITH DIAERESIS -decode '\xE9' = '\x00C8' -- LATIN CAPITAL LETTER E WITH GRAVE -decode '\xEA' = '\x00CD' -- LATIN CAPITAL LETTER I WITH ACUTE -decode '\xEB' = '\x00CE' -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX -decode '\xEC' = '\x00CF' -- LATIN CAPITAL LETTER I WITH DIAERESIS -decode '\xED' = '\x00CC' -- LATIN CAPITAL LETTER I WITH GRAVE -decode '\xEE' = '\x00D3' -- LATIN CAPITAL LETTER O WITH ACUTE -decode '\xEF' = '\x00D4' -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX -decode '\xF0' = '\xF8FF' -- Apple logo -decode '\xF1' = '\x00D2' -- LATIN CAPITAL LETTER O WITH GRAVE -decode '\xF2' = '\x00DA' -- LATIN CAPITAL LETTER U WITH ACUTE -decode '\xF3' = '\x00DB' -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX -decode '\xF4' = '\x00D9' -- LATIN CAPITAL LETTER U WITH GRAVE -decode '\xF5' = '\x0131' -- LATIN SMALL LETTER DOTLESS I -decode '\xF6' = '\x02C6' -- MODIFIER LETTER CIRCUMFLEX ACCENT -decode '\xF7' = '\x02DC' -- SMALL TILDE -decode '\xF8' = '\x00AF' -- MACRON -decode '\xF9' = '\x02D8' -- BREVE -decode '\xFA' = '\x02D9' -- DOT ABOVE -decode '\xFB' = '\x02DA' -- RING ABOVE -decode '\xFC' = '\x00B8' -- CEDILLA -decode '\xFD' = '\x02DD' -- DOUBLE ACUTE ACCENT -decode '\xFE' = '\x02DB' -- OGONEK -decode '\xFF' = '\x02C7' -- CARON -decode c = c -- The rest is ASCII +mappers :: (Mapper, Mapper) +mappers = foldl generateMapers (Map.empty, Map.empty) [ + ('\x80', '\x00C4') -- LATIN CAPITAL LETTER A WITH DIAERESIS + , ('\x81', '\x00C5') -- LATIN CAPITAL LETTER A WITH RING ABOVE + , ('\x82', '\x00C7') -- LATIN CAPITAL LETTER C WITH CEDILLA + , ('\x83', '\x00C9') -- LATIN CAPITAL LETTER E WITH ACUTE + , ('\x84', '\x00D1') -- LATIN CAPITAL LETTER N WITH TILDE + , ('\x85', '\x00D6') -- LATIN CAPITAL LETTER O WITH DIAERESIS + , ('\x86', '\x00DC') -- LATIN CAPITAL LETTER U WITH DIAERESIS + , ('\x87', '\x00E1') -- LATIN SMALL LETTER A WITH ACUTE + , ('\x88', '\x00E0') -- LATIN SMALL LETTER A WITH GRAVE + , ('\x89', '\x00E2') -- LATIN SMALL LETTER A WITH CIRCUMFLEX + , ('\x8A', '\x00E4') -- LATIN SMALL LETTER A WITH DIAERESIS + , ('\x8B', '\x00E3') -- LATIN SMALL LETTER A WITH TILDE + , ('\x8C', '\x00E5') -- LATIN SMALL LETTER A WITH RING ABOVE + , ('\x8D', '\x00E7') -- LATIN SMALL LETTER C WITH CEDILLA + , ('\x8E', '\x00E9') -- LATIN SMALL LETTER E WITH ACUTE + , ('\x8F', '\x00E8') -- LATIN SMALL LETTER E WITH GRAVE + , ('\x90', '\x00EA') -- LATIN SMALL LETTER E WITH CIRCUMFLEX + , ('\x91', '\x00EB') -- LATIN SMALL LETTER E WITH DIAERESIS + , ('\x92', '\x00ED') -- LATIN SMALL LETTER I WITH ACUTE + , ('\x93', '\x00EC') -- LATIN SMALL LETTER I WITH GRAVE + , ('\x94', '\x00EE') -- LATIN SMALL LETTER I WITH CIRCUMFLEX + , ('\x95', '\x00EF') -- LATIN SMALL LETTER I WITH DIAERESIS + , ('\x96', '\x00F1') -- LATIN SMALL LETTER N WITH TILDE + , ('\x97', '\x00F3') -- LATIN SMALL LETTER O WITH ACUTE + , ('\x98', '\x00F2') -- LATIN SMALL LETTER O WITH GRAVE + , ('\x99', '\x00F4') -- LATIN SMALL LETTER O WITH CIRCUMFLEX + , ('\x9A', '\x00F6') -- LATIN SMALL LETTER O WITH DIAERESIS + , ('\x9B', '\x00F5') -- LATIN SMALL LETTER O WITH TILDE + , ('\x9C', '\x00FA') -- LATIN SMALL LETTER U WITH ACUTE + , ('\x9D', '\x00F9') -- LATIN SMALL LETTER U WITH GRAVE + , ('\x9E', '\x00FB') -- LATIN SMALL LETTER U WITH CIRCUMFLEX + , ('\x9F', '\x00FC') -- LATIN SMALL LETTER U WITH DIAERESIS + , ('\xA0', '\x2020') -- DAGGER + , ('\xA1', '\x00B0') -- DEGREE SIGN + , ('\xA2', '\x00A2') -- CENT SIGN + , ('\xA3', '\x00A3') -- POUND SIGN + , ('\xA4', '\x00A7') -- SECTION SIGN + , ('\xA5', '\x2022') -- BULLET + , ('\xA6', '\x00B6') -- PILCROW SIGN + , ('\xA7', '\x00DF') -- LATIN SMALL LETTER SHARP S + , ('\xA8', '\x00AE') -- REGISTERED SIGN + , ('\xA9', '\x00A9') -- COPYRIGHT SIGN + , ('\xAA', '\x2122') -- TRADE MARK SIGN + , ('\xAB', '\x00B4') -- ACUTE ACCENT + , ('\xAC', '\x00A8') -- DIAERESIS + , ('\xAD', '\x2260') -- NOT EQUAL TO + , ('\xAE', '\x00C6') -- LATIN CAPITAL LETTER AE + , ('\xAF', '\x00D8') -- LATIN CAPITAL LETTER O WITH STROKE + , ('\xB0', '\x221E') -- INFINITY + , ('\xB1', '\x00B1') -- PLUS-MINUS SIGN + , ('\xB2', '\x2264') -- LESS-THAN OR EQUAL TO + , ('\xB3', '\x2265') -- GREATER-THAN OR EQUAL TO + , ('\xB4', '\x00A5') -- YEN SIGN + , ('\xB5', '\x00B5') -- MICRO SIGN + , ('\xB6', '\x2202') -- PARTIAL DIFFERENTIAL + , ('\xB7', '\x2211') -- N-ARY SUMMATION + , ('\xB8', '\x220F') -- N-ARY PRODUCT + , ('\xB9', '\x03C0') -- GREEK SMALL LETTER PI + , ('\xBA', '\x222B') -- INTEGRAL + , ('\xBB', '\x00AA') -- FEMININE ORDINAL INDICATOR + , ('\xBC', '\x00BA') -- MASCULINE ORDINAL INDICATOR + , ('\xBD', '\x03A9') -- GREEK CAPITAL LETTER OMEGA + , ('\xBE', '\x00E6') -- LATIN SMALL LETTER AE + , ('\xBF', '\x00F8') -- LATIN SMALL LETTER O WITH STROKE + , ('\xC0', '\x00BF') -- INVERTED QUESTION MARK + , ('\xC1', '\x00A1') -- INVERTED EXCLAMATION MARK + , ('\xC2', '\x00AC') -- NOT SIGN + , ('\xC3', '\x221A') -- SQUARE ROOT + , ('\xC4', '\x0192') -- LATIN SMALL LETTER F WITH HOOK + , ('\xC5', '\x2248') -- ALMOST EQUAL TO + , ('\xC6', '\x2206') -- INCREMENT + , ('\xC7', '\x00AB') -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK + , ('\xC8', '\x00BB') -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK + , ('\xC9', '\x2026') -- HORIZONTAL ELLIPSIS + , ('\xCA', '\x00A0') -- NO-BREAK SPACE + , ('\xCB', '\x00C0') -- LATIN CAPITAL LETTER A WITH GRAVE + , ('\xCC', '\x00C3') -- LATIN CAPITAL LETTER A WITH TILDE + , ('\xCD', '\x00D5') -- LATIN CAPITAL LETTER O WITH TILDE + , ('\xCE', '\x0152') -- LATIN CAPITAL LIGATURE OE + , ('\xCF', '\x0153') -- LATIN SMALL LIGATURE OE + , ('\xD0', '\x2013') -- EN DASH + , ('\xD1', '\x2014') -- EM DASH + , ('\xD2', '\x201C') -- LEFT DOUBLE QUOTATION MARK + , ('\xD3', '\x201D') -- RIGHT DOUBLE QUOTATION MARK + , ('\xD4', '\x2018') -- LEFT SINGLE QUOTATION MARK + , ('\xD5', '\x2019') -- RIGHT SINGLE QUOTATION MARK + , ('\xD6', '\x00F7') -- DIVISION SIGN + , ('\xD7', '\x25CA') -- LOZENGE + , ('\xD8', '\x00FF') -- LATIN SMALL LETTER Y WITH DIAERESIS + , ('\xD9', '\x0178') -- LATIN CAPITAL LETTER Y WITH DIAERESIS + , ('\xDA', '\x2044') -- FRACTION SLASH + , ('\xDB', '\x20AC') -- EURO SIGN + , ('\xDC', '\x2039') -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK + , ('\xDD', '\x203A') -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + , ('\xDE', '\xFB01') -- LATIN SMALL LIGATURE FI + , ('\xDF', '\xFB02') -- LATIN SMALL LIGATURE FL + , ('\xE0', '\x2021') -- DOUBLE DAGGER + , ('\xE1', '\x00B7') -- MIDDLE DOT + , ('\xE2', '\x201A') -- SINGLE LOW-9 QUOTATION MARK + , ('\xE3', '\x201E') -- DOUBLE LOW-9 QUOTATION MARK + , ('\xE4', '\x2030') -- PER MILLE SIGN + , ('\xE5', '\x00C2') -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX + , ('\xE6', '\x00CA') -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX + , ('\xE7', '\x00C1') -- LATIN CAPITAL LETTER A WITH ACUTE + , ('\xE8', '\x00CB') -- LATIN CAPITAL LETTER E WITH DIAERESIS + , ('\xE9', '\x00C8') -- LATIN CAPITAL LETTER E WITH GRAVE + , ('\xEA', '\x00CD') -- LATIN CAPITAL LETTER I WITH ACUTE + , ('\xEB', '\x00CE') -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX + , ('\xEC', '\x00CF') -- LATIN CAPITAL LETTER I WITH DIAERESIS + , ('\xED', '\x00CC') -- LATIN CAPITAL LETTER I WITH GRAVE + , ('\xEE', '\x00D3') -- LATIN CAPITAL LETTER O WITH ACUTE + , ('\xEF', '\x00D4') -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX + , ('\xF0', '\xF8FF') -- Apple logo + , ('\xF1', '\x00D2') -- LATIN CAPITAL LETTER O WITH GRAVE + , ('\xF2', '\x00DA') -- LATIN CAPITAL LETTER U WITH ACUTE + , ('\xF3', '\x00DB') -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX + , ('\xF4', '\x00D9') -- LATIN CAPITAL LETTER U WITH GRAVE + , ('\xF5', '\x0131') -- LATIN SMALL LETTER DOTLESS I + , ('\xF6', '\x02C6') -- MODIFIER LETTER CIRCUMFLEX ACCENT + , ('\xF7', '\x02DC') -- SMALL TILDE + , ('\xF8', '\x00AF') -- MACRON + , ('\xF9', '\x02D8') -- BREVE + , ('\xFA', '\x02D9') -- DOT ABOVE + , ('\xFB', '\x02DA') -- RING ABOVE + , ('\xFC', '\x00B8') -- CEDILLA + , ('\xFD', '\x02DD') -- DOUBLE ACUTE ACCENT + , ('\xFE', '\x02DB') -- OGONEK + , ('\xFF', '\x02C7') -- CARON + ] + where + generateMapers (tmpDecoder, tmpEncoder) (macChar, utf8Char) = ( + Map.insert macChar utf8Char tmpDecoder + , Map.insert utf8Char macChar tmpEncoder + ) diff --git a/src/PDF/Font.hs b/src/PDF/Font.hs index f5fe7e9..873cf5b 100644 --- a/src/PDF/Font.hs +++ b/src/PDF/Font.hs @@ -1,5 +1,7 @@ module PDF.Font ( - Font + Decoder + , Encoder + , Font(..) , FontSet , emptyFont ) where @@ -9,8 +11,17 @@ import Data.Map (Map) import Data.Text (Text) import PDF.Object (Name) -type Font = ByteString -> Either String Text +type Decoder = ByteString -> Either String Text +type Encoder = Text -> Either String ByteString +data Font = Font { + decode :: Decoder + , encode :: Encoder + } + type FontSet = Map Name Font emptyFont :: Font -emptyFont _ = Left "No fond loaded" +emptyFont = Font { + decode = \_ -> Left "No fond loaded" + , encode = \_ -> Left "No fond loaded" + }