Update Font type to cover both encoding and decoding — WIP for CMap, but complete though not tested yet for MacRoman encoding
This commit is contained in:
parent
6e245189fd
commit
1ec47c5d07
3 changed files with 181 additions and 141 deletions
|
@ -25,7 +25,7 @@ import qualified Data.Map as Map (
|
|||
)
|
||||
import Data.Text (Text)
|
||||
import qualified PDF.EOL as EOL (charset, parser)
|
||||
import PDF.Font (Font)
|
||||
import PDF.Font (Decoder, Encoder, Font(..))
|
||||
import PDF.Object (
|
||||
DirectObject(..), Name, StringObject(..)
|
||||
, blank, directObject, integer, line, stringObject
|
||||
|
@ -34,7 +34,10 @@ import PDF.Parser (MonadParser, Parser, runParser, takeAll)
|
|||
import Prelude hiding (fail)
|
||||
|
||||
type CMappers = Map Name CMap
|
||||
type Mapping = Map ByteString Text
|
||||
data Mapping = Mapping {
|
||||
bsToT :: Map ByteString Text
|
||||
, tToBS :: Map Text ByteString
|
||||
}
|
||||
data CRange = CRange {
|
||||
fromSequence :: ByteString
|
||||
, toSequence :: ByteString
|
||||
|
@ -44,11 +47,14 @@ type RangeSize = Int
|
|||
type CMap = Map RangeSize [CRange]
|
||||
|
||||
toFont :: CMap -> Font
|
||||
toFont aCMap input
|
||||
toFont aCMap = Font {decode = decoder aCMap, encode = encoder aCMap}
|
||||
|
||||
decoder :: CMap -> Decoder
|
||||
decoder aCMap input
|
||||
| BS.null input = Right ""
|
||||
| otherwise = do
|
||||
(output, remainingInput) <- trySizes input $ Map.toList aCMap
|
||||
mappend output <$> toFont aCMap remainingInput
|
||||
mappend output <$> decoder aCMap remainingInput
|
||||
where
|
||||
trySizes s [] = Left $ "No matching code found in font for " ++ unpack s
|
||||
trySizes s ((size, cRanges):others) =
|
||||
|
@ -63,6 +69,9 @@ toFont aCMap input
|
|||
Nothing -> tryRanges prefix cRanges
|
||||
outputSequence -> outputSequence
|
||||
|
||||
encoder :: CMap -> Encoder
|
||||
encoder = undefined
|
||||
|
||||
emptyCMap :: CMap
|
||||
emptyCMap = Map.empty
|
||||
|
||||
|
|
|
@ -2,140 +2,160 @@ module PDF.Encoding.MacRoman (
|
|||
macRomanEncoding
|
||||
) where
|
||||
|
||||
import Data.ByteString.Char8 (unpack)
|
||||
import Data.Text (pack)
|
||||
import PDF.Font (Font)
|
||||
import qualified Data.ByteString.Char8 as BS (pack, unpack)
|
||||
import Data.Map (Map)
|
||||
import qualified Data.Map as Map (empty, insert, lookup)
|
||||
import qualified Data.Text as Text (pack, unpack)
|
||||
import PDF.Font (Font(..))
|
||||
|
||||
type Mapper = Map Char Char
|
||||
macRomanEncoding :: Font
|
||||
macRomanEncoding = Right . pack . fmap decode . unpack
|
||||
macRomanEncoding = Font {
|
||||
decode = Right . Text.pack . fmap decodeChar . BS.unpack
|
||||
, encode = fmap BS.pack . mapM encodeChar . Text.unpack
|
||||
}
|
||||
where
|
||||
decodeChar k = maybe k id $ Map.lookup k (fst mappers)
|
||||
encodeChar k =
|
||||
case Map.lookup k (snd mappers) of
|
||||
Just v -> Right v
|
||||
Nothing
|
||||
| k < '\x80' -> Right k
|
||||
| otherwise -> Left ("Character '" ++ k :"' unavailable in MacRoman")
|
||||
|
||||
decode :: Char -> Char
|
||||
decode '\x80' = '\x00C4' -- LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
decode '\x81' = '\x00C5' -- LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
decode '\x82' = '\x00C7' -- LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
decode '\x83' = '\x00C9' -- LATIN CAPITAL LETTER E WITH ACUTE
|
||||
decode '\x84' = '\x00D1' -- LATIN CAPITAL LETTER N WITH TILDE
|
||||
decode '\x85' = '\x00D6' -- LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
decode '\x86' = '\x00DC' -- LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
decode '\x87' = '\x00E1' -- LATIN SMALL LETTER A WITH ACUTE
|
||||
decode '\x88' = '\x00E0' -- LATIN SMALL LETTER A WITH GRAVE
|
||||
decode '\x89' = '\x00E2' -- LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
decode '\x8A' = '\x00E4' -- LATIN SMALL LETTER A WITH DIAERESIS
|
||||
decode '\x8B' = '\x00E3' -- LATIN SMALL LETTER A WITH TILDE
|
||||
decode '\x8C' = '\x00E5' -- LATIN SMALL LETTER A WITH RING ABOVE
|
||||
decode '\x8D' = '\x00E7' -- LATIN SMALL LETTER C WITH CEDILLA
|
||||
decode '\x8E' = '\x00E9' -- LATIN SMALL LETTER E WITH ACUTE
|
||||
decode '\x8F' = '\x00E8' -- LATIN SMALL LETTER E WITH GRAVE
|
||||
decode '\x90' = '\x00EA' -- LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
decode '\x91' = '\x00EB' -- LATIN SMALL LETTER E WITH DIAERESIS
|
||||
decode '\x92' = '\x00ED' -- LATIN SMALL LETTER I WITH ACUTE
|
||||
decode '\x93' = '\x00EC' -- LATIN SMALL LETTER I WITH GRAVE
|
||||
decode '\x94' = '\x00EE' -- LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
decode '\x95' = '\x00EF' -- LATIN SMALL LETTER I WITH DIAERESIS
|
||||
decode '\x96' = '\x00F1' -- LATIN SMALL LETTER N WITH TILDE
|
||||
decode '\x97' = '\x00F3' -- LATIN SMALL LETTER O WITH ACUTE
|
||||
decode '\x98' = '\x00F2' -- LATIN SMALL LETTER O WITH GRAVE
|
||||
decode '\x99' = '\x00F4' -- LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
decode '\x9A' = '\x00F6' -- LATIN SMALL LETTER O WITH DIAERESIS
|
||||
decode '\x9B' = '\x00F5' -- LATIN SMALL LETTER O WITH TILDE
|
||||
decode '\x9C' = '\x00FA' -- LATIN SMALL LETTER U WITH ACUTE
|
||||
decode '\x9D' = '\x00F9' -- LATIN SMALL LETTER U WITH GRAVE
|
||||
decode '\x9E' = '\x00FB' -- LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
decode '\x9F' = '\x00FC' -- LATIN SMALL LETTER U WITH DIAERESIS
|
||||
decode '\xA0' = '\x2020' -- DAGGER
|
||||
decode '\xA1' = '\x00B0' -- DEGREE SIGN
|
||||
decode '\xA2' = '\x00A2' -- CENT SIGN
|
||||
decode '\xA3' = '\x00A3' -- POUND SIGN
|
||||
decode '\xA4' = '\x00A7' -- SECTION SIGN
|
||||
decode '\xA5' = '\x2022' -- BULLET
|
||||
decode '\xA6' = '\x00B6' -- PILCROW SIGN
|
||||
decode '\xA7' = '\x00DF' -- LATIN SMALL LETTER SHARP S
|
||||
decode '\xA8' = '\x00AE' -- REGISTERED SIGN
|
||||
decode '\xA9' = '\x00A9' -- COPYRIGHT SIGN
|
||||
decode '\xAA' = '\x2122' -- TRADE MARK SIGN
|
||||
decode '\xAB' = '\x00B4' -- ACUTE ACCENT
|
||||
decode '\xAC' = '\x00A8' -- DIAERESIS
|
||||
decode '\xAD' = '\x2260' -- NOT EQUAL TO
|
||||
decode '\xAE' = '\x00C6' -- LATIN CAPITAL LETTER AE
|
||||
decode '\xAF' = '\x00D8' -- LATIN CAPITAL LETTER O WITH STROKE
|
||||
decode '\xB0' = '\x221E' -- INFINITY
|
||||
decode '\xB1' = '\x00B1' -- PLUS-MINUS SIGN
|
||||
decode '\xB2' = '\x2264' -- LESS-THAN OR EQUAL TO
|
||||
decode '\xB3' = '\x2265' -- GREATER-THAN OR EQUAL TO
|
||||
decode '\xB4' = '\x00A5' -- YEN SIGN
|
||||
decode '\xB5' = '\x00B5' -- MICRO SIGN
|
||||
decode '\xB6' = '\x2202' -- PARTIAL DIFFERENTIAL
|
||||
decode '\xB7' = '\x2211' -- N-ARY SUMMATION
|
||||
decode '\xB8' = '\x220F' -- N-ARY PRODUCT
|
||||
decode '\xB9' = '\x03C0' -- GREEK SMALL LETTER PI
|
||||
decode '\xBA' = '\x222B' -- INTEGRAL
|
||||
decode '\xBB' = '\x00AA' -- FEMININE ORDINAL INDICATOR
|
||||
decode '\xBC' = '\x00BA' -- MASCULINE ORDINAL INDICATOR
|
||||
decode '\xBD' = '\x03A9' -- GREEK CAPITAL LETTER OMEGA
|
||||
decode '\xBE' = '\x00E6' -- LATIN SMALL LETTER AE
|
||||
decode '\xBF' = '\x00F8' -- LATIN SMALL LETTER O WITH STROKE
|
||||
decode '\xC0' = '\x00BF' -- INVERTED QUESTION MARK
|
||||
decode '\xC1' = '\x00A1' -- INVERTED EXCLAMATION MARK
|
||||
decode '\xC2' = '\x00AC' -- NOT SIGN
|
||||
decode '\xC3' = '\x221A' -- SQUARE ROOT
|
||||
decode '\xC4' = '\x0192' -- LATIN SMALL LETTER F WITH HOOK
|
||||
decode '\xC5' = '\x2248' -- ALMOST EQUAL TO
|
||||
decode '\xC6' = '\x2206' -- INCREMENT
|
||||
decode '\xC7' = '\x00AB' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
decode '\xC8' = '\x00BB' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
decode '\xC9' = '\x2026' -- HORIZONTAL ELLIPSIS
|
||||
decode '\xCA' = '\x00A0' -- NO-BREAK SPACE
|
||||
decode '\xCB' = '\x00C0' -- LATIN CAPITAL LETTER A WITH GRAVE
|
||||
decode '\xCC' = '\x00C3' -- LATIN CAPITAL LETTER A WITH TILDE
|
||||
decode '\xCD' = '\x00D5' -- LATIN CAPITAL LETTER O WITH TILDE
|
||||
decode '\xCE' = '\x0152' -- LATIN CAPITAL LIGATURE OE
|
||||
decode '\xCF' = '\x0153' -- LATIN SMALL LIGATURE OE
|
||||
decode '\xD0' = '\x2013' -- EN DASH
|
||||
decode '\xD1' = '\x2014' -- EM DASH
|
||||
decode '\xD2' = '\x201C' -- LEFT DOUBLE QUOTATION MARK
|
||||
decode '\xD3' = '\x201D' -- RIGHT DOUBLE QUOTATION MARK
|
||||
decode '\xD4' = '\x2018' -- LEFT SINGLE QUOTATION MARK
|
||||
decode '\xD5' = '\x2019' -- RIGHT SINGLE QUOTATION MARK
|
||||
decode '\xD6' = '\x00F7' -- DIVISION SIGN
|
||||
decode '\xD7' = '\x25CA' -- LOZENGE
|
||||
decode '\xD8' = '\x00FF' -- LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
decode '\xD9' = '\x0178' -- LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
decode '\xDA' = '\x2044' -- FRACTION SLASH
|
||||
decode '\xDB' = '\x20AC' -- EURO SIGN
|
||||
decode '\xDC' = '\x2039' -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
decode '\xDD' = '\x203A' -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
decode '\xDE' = '\xFB01' -- LATIN SMALL LIGATURE FI
|
||||
decode '\xDF' = '\xFB02' -- LATIN SMALL LIGATURE FL
|
||||
decode '\xE0' = '\x2021' -- DOUBLE DAGGER
|
||||
decode '\xE1' = '\x00B7' -- MIDDLE DOT
|
||||
decode '\xE2' = '\x201A' -- SINGLE LOW-9 QUOTATION MARK
|
||||
decode '\xE3' = '\x201E' -- DOUBLE LOW-9 QUOTATION MARK
|
||||
decode '\xE4' = '\x2030' -- PER MILLE SIGN
|
||||
decode '\xE5' = '\x00C2' -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
decode '\xE6' = '\x00CA' -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
decode '\xE7' = '\x00C1' -- LATIN CAPITAL LETTER A WITH ACUTE
|
||||
decode '\xE8' = '\x00CB' -- LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
decode '\xE9' = '\x00C8' -- LATIN CAPITAL LETTER E WITH GRAVE
|
||||
decode '\xEA' = '\x00CD' -- LATIN CAPITAL LETTER I WITH ACUTE
|
||||
decode '\xEB' = '\x00CE' -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
decode '\xEC' = '\x00CF' -- LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
decode '\xED' = '\x00CC' -- LATIN CAPITAL LETTER I WITH GRAVE
|
||||
decode '\xEE' = '\x00D3' -- LATIN CAPITAL LETTER O WITH ACUTE
|
||||
decode '\xEF' = '\x00D4' -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
decode '\xF0' = '\xF8FF' -- Apple logo
|
||||
decode '\xF1' = '\x00D2' -- LATIN CAPITAL LETTER O WITH GRAVE
|
||||
decode '\xF2' = '\x00DA' -- LATIN CAPITAL LETTER U WITH ACUTE
|
||||
decode '\xF3' = '\x00DB' -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
decode '\xF4' = '\x00D9' -- LATIN CAPITAL LETTER U WITH GRAVE
|
||||
decode '\xF5' = '\x0131' -- LATIN SMALL LETTER DOTLESS I
|
||||
decode '\xF6' = '\x02C6' -- MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
decode '\xF7' = '\x02DC' -- SMALL TILDE
|
||||
decode '\xF8' = '\x00AF' -- MACRON
|
||||
decode '\xF9' = '\x02D8' -- BREVE
|
||||
decode '\xFA' = '\x02D9' -- DOT ABOVE
|
||||
decode '\xFB' = '\x02DA' -- RING ABOVE
|
||||
decode '\xFC' = '\x00B8' -- CEDILLA
|
||||
decode '\xFD' = '\x02DD' -- DOUBLE ACUTE ACCENT
|
||||
decode '\xFE' = '\x02DB' -- OGONEK
|
||||
decode '\xFF' = '\x02C7' -- CARON
|
||||
decode c = c -- The rest is ASCII
|
||||
mappers :: (Mapper, Mapper)
|
||||
mappers = foldl generateMapers (Map.empty, Map.empty) [
|
||||
('\x80', '\x00C4') -- LATIN CAPITAL LETTER A WITH DIAERESIS
|
||||
, ('\x81', '\x00C5') -- LATIN CAPITAL LETTER A WITH RING ABOVE
|
||||
, ('\x82', '\x00C7') -- LATIN CAPITAL LETTER C WITH CEDILLA
|
||||
, ('\x83', '\x00C9') -- LATIN CAPITAL LETTER E WITH ACUTE
|
||||
, ('\x84', '\x00D1') -- LATIN CAPITAL LETTER N WITH TILDE
|
||||
, ('\x85', '\x00D6') -- LATIN CAPITAL LETTER O WITH DIAERESIS
|
||||
, ('\x86', '\x00DC') -- LATIN CAPITAL LETTER U WITH DIAERESIS
|
||||
, ('\x87', '\x00E1') -- LATIN SMALL LETTER A WITH ACUTE
|
||||
, ('\x88', '\x00E0') -- LATIN SMALL LETTER A WITH GRAVE
|
||||
, ('\x89', '\x00E2') -- LATIN SMALL LETTER A WITH CIRCUMFLEX
|
||||
, ('\x8A', '\x00E4') -- LATIN SMALL LETTER A WITH DIAERESIS
|
||||
, ('\x8B', '\x00E3') -- LATIN SMALL LETTER A WITH TILDE
|
||||
, ('\x8C', '\x00E5') -- LATIN SMALL LETTER A WITH RING ABOVE
|
||||
, ('\x8D', '\x00E7') -- LATIN SMALL LETTER C WITH CEDILLA
|
||||
, ('\x8E', '\x00E9') -- LATIN SMALL LETTER E WITH ACUTE
|
||||
, ('\x8F', '\x00E8') -- LATIN SMALL LETTER E WITH GRAVE
|
||||
, ('\x90', '\x00EA') -- LATIN SMALL LETTER E WITH CIRCUMFLEX
|
||||
, ('\x91', '\x00EB') -- LATIN SMALL LETTER E WITH DIAERESIS
|
||||
, ('\x92', '\x00ED') -- LATIN SMALL LETTER I WITH ACUTE
|
||||
, ('\x93', '\x00EC') -- LATIN SMALL LETTER I WITH GRAVE
|
||||
, ('\x94', '\x00EE') -- LATIN SMALL LETTER I WITH CIRCUMFLEX
|
||||
, ('\x95', '\x00EF') -- LATIN SMALL LETTER I WITH DIAERESIS
|
||||
, ('\x96', '\x00F1') -- LATIN SMALL LETTER N WITH TILDE
|
||||
, ('\x97', '\x00F3') -- LATIN SMALL LETTER O WITH ACUTE
|
||||
, ('\x98', '\x00F2') -- LATIN SMALL LETTER O WITH GRAVE
|
||||
, ('\x99', '\x00F4') -- LATIN SMALL LETTER O WITH CIRCUMFLEX
|
||||
, ('\x9A', '\x00F6') -- LATIN SMALL LETTER O WITH DIAERESIS
|
||||
, ('\x9B', '\x00F5') -- LATIN SMALL LETTER O WITH TILDE
|
||||
, ('\x9C', '\x00FA') -- LATIN SMALL LETTER U WITH ACUTE
|
||||
, ('\x9D', '\x00F9') -- LATIN SMALL LETTER U WITH GRAVE
|
||||
, ('\x9E', '\x00FB') -- LATIN SMALL LETTER U WITH CIRCUMFLEX
|
||||
, ('\x9F', '\x00FC') -- LATIN SMALL LETTER U WITH DIAERESIS
|
||||
, ('\xA0', '\x2020') -- DAGGER
|
||||
, ('\xA1', '\x00B0') -- DEGREE SIGN
|
||||
, ('\xA2', '\x00A2') -- CENT SIGN
|
||||
, ('\xA3', '\x00A3') -- POUND SIGN
|
||||
, ('\xA4', '\x00A7') -- SECTION SIGN
|
||||
, ('\xA5', '\x2022') -- BULLET
|
||||
, ('\xA6', '\x00B6') -- PILCROW SIGN
|
||||
, ('\xA7', '\x00DF') -- LATIN SMALL LETTER SHARP S
|
||||
, ('\xA8', '\x00AE') -- REGISTERED SIGN
|
||||
, ('\xA9', '\x00A9') -- COPYRIGHT SIGN
|
||||
, ('\xAA', '\x2122') -- TRADE MARK SIGN
|
||||
, ('\xAB', '\x00B4') -- ACUTE ACCENT
|
||||
, ('\xAC', '\x00A8') -- DIAERESIS
|
||||
, ('\xAD', '\x2260') -- NOT EQUAL TO
|
||||
, ('\xAE', '\x00C6') -- LATIN CAPITAL LETTER AE
|
||||
, ('\xAF', '\x00D8') -- LATIN CAPITAL LETTER O WITH STROKE
|
||||
, ('\xB0', '\x221E') -- INFINITY
|
||||
, ('\xB1', '\x00B1') -- PLUS-MINUS SIGN
|
||||
, ('\xB2', '\x2264') -- LESS-THAN OR EQUAL TO
|
||||
, ('\xB3', '\x2265') -- GREATER-THAN OR EQUAL TO
|
||||
, ('\xB4', '\x00A5') -- YEN SIGN
|
||||
, ('\xB5', '\x00B5') -- MICRO SIGN
|
||||
, ('\xB6', '\x2202') -- PARTIAL DIFFERENTIAL
|
||||
, ('\xB7', '\x2211') -- N-ARY SUMMATION
|
||||
, ('\xB8', '\x220F') -- N-ARY PRODUCT
|
||||
, ('\xB9', '\x03C0') -- GREEK SMALL LETTER PI
|
||||
, ('\xBA', '\x222B') -- INTEGRAL
|
||||
, ('\xBB', '\x00AA') -- FEMININE ORDINAL INDICATOR
|
||||
, ('\xBC', '\x00BA') -- MASCULINE ORDINAL INDICATOR
|
||||
, ('\xBD', '\x03A9') -- GREEK CAPITAL LETTER OMEGA
|
||||
, ('\xBE', '\x00E6') -- LATIN SMALL LETTER AE
|
||||
, ('\xBF', '\x00F8') -- LATIN SMALL LETTER O WITH STROKE
|
||||
, ('\xC0', '\x00BF') -- INVERTED QUESTION MARK
|
||||
, ('\xC1', '\x00A1') -- INVERTED EXCLAMATION MARK
|
||||
, ('\xC2', '\x00AC') -- NOT SIGN
|
||||
, ('\xC3', '\x221A') -- SQUARE ROOT
|
||||
, ('\xC4', '\x0192') -- LATIN SMALL LETTER F WITH HOOK
|
||||
, ('\xC5', '\x2248') -- ALMOST EQUAL TO
|
||||
, ('\xC6', '\x2206') -- INCREMENT
|
||||
, ('\xC7', '\x00AB') -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
, ('\xC8', '\x00BB') -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
|
||||
, ('\xC9', '\x2026') -- HORIZONTAL ELLIPSIS
|
||||
, ('\xCA', '\x00A0') -- NO-BREAK SPACE
|
||||
, ('\xCB', '\x00C0') -- LATIN CAPITAL LETTER A WITH GRAVE
|
||||
, ('\xCC', '\x00C3') -- LATIN CAPITAL LETTER A WITH TILDE
|
||||
, ('\xCD', '\x00D5') -- LATIN CAPITAL LETTER O WITH TILDE
|
||||
, ('\xCE', '\x0152') -- LATIN CAPITAL LIGATURE OE
|
||||
, ('\xCF', '\x0153') -- LATIN SMALL LIGATURE OE
|
||||
, ('\xD0', '\x2013') -- EN DASH
|
||||
, ('\xD1', '\x2014') -- EM DASH
|
||||
, ('\xD2', '\x201C') -- LEFT DOUBLE QUOTATION MARK
|
||||
, ('\xD3', '\x201D') -- RIGHT DOUBLE QUOTATION MARK
|
||||
, ('\xD4', '\x2018') -- LEFT SINGLE QUOTATION MARK
|
||||
, ('\xD5', '\x2019') -- RIGHT SINGLE QUOTATION MARK
|
||||
, ('\xD6', '\x00F7') -- DIVISION SIGN
|
||||
, ('\xD7', '\x25CA') -- LOZENGE
|
||||
, ('\xD8', '\x00FF') -- LATIN SMALL LETTER Y WITH DIAERESIS
|
||||
, ('\xD9', '\x0178') -- LATIN CAPITAL LETTER Y WITH DIAERESIS
|
||||
, ('\xDA', '\x2044') -- FRACTION SLASH
|
||||
, ('\xDB', '\x20AC') -- EURO SIGN
|
||||
, ('\xDC', '\x2039') -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
|
||||
, ('\xDD', '\x203A') -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
|
||||
, ('\xDE', '\xFB01') -- LATIN SMALL LIGATURE FI
|
||||
, ('\xDF', '\xFB02') -- LATIN SMALL LIGATURE FL
|
||||
, ('\xE0', '\x2021') -- DOUBLE DAGGER
|
||||
, ('\xE1', '\x00B7') -- MIDDLE DOT
|
||||
, ('\xE2', '\x201A') -- SINGLE LOW-9 QUOTATION MARK
|
||||
, ('\xE3', '\x201E') -- DOUBLE LOW-9 QUOTATION MARK
|
||||
, ('\xE4', '\x2030') -- PER MILLE SIGN
|
||||
, ('\xE5', '\x00C2') -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
|
||||
, ('\xE6', '\x00CA') -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
|
||||
, ('\xE7', '\x00C1') -- LATIN CAPITAL LETTER A WITH ACUTE
|
||||
, ('\xE8', '\x00CB') -- LATIN CAPITAL LETTER E WITH DIAERESIS
|
||||
, ('\xE9', '\x00C8') -- LATIN CAPITAL LETTER E WITH GRAVE
|
||||
, ('\xEA', '\x00CD') -- LATIN CAPITAL LETTER I WITH ACUTE
|
||||
, ('\xEB', '\x00CE') -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
|
||||
, ('\xEC', '\x00CF') -- LATIN CAPITAL LETTER I WITH DIAERESIS
|
||||
, ('\xED', '\x00CC') -- LATIN CAPITAL LETTER I WITH GRAVE
|
||||
, ('\xEE', '\x00D3') -- LATIN CAPITAL LETTER O WITH ACUTE
|
||||
, ('\xEF', '\x00D4') -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
|
||||
, ('\xF0', '\xF8FF') -- Apple logo
|
||||
, ('\xF1', '\x00D2') -- LATIN CAPITAL LETTER O WITH GRAVE
|
||||
, ('\xF2', '\x00DA') -- LATIN CAPITAL LETTER U WITH ACUTE
|
||||
, ('\xF3', '\x00DB') -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
|
||||
, ('\xF4', '\x00D9') -- LATIN CAPITAL LETTER U WITH GRAVE
|
||||
, ('\xF5', '\x0131') -- LATIN SMALL LETTER DOTLESS I
|
||||
, ('\xF6', '\x02C6') -- MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
, ('\xF7', '\x02DC') -- SMALL TILDE
|
||||
, ('\xF8', '\x00AF') -- MACRON
|
||||
, ('\xF9', '\x02D8') -- BREVE
|
||||
, ('\xFA', '\x02D9') -- DOT ABOVE
|
||||
, ('\xFB', '\x02DA') -- RING ABOVE
|
||||
, ('\xFC', '\x00B8') -- CEDILLA
|
||||
, ('\xFD', '\x02DD') -- DOUBLE ACUTE ACCENT
|
||||
, ('\xFE', '\x02DB') -- OGONEK
|
||||
, ('\xFF', '\x02C7') -- CARON
|
||||
]
|
||||
where
|
||||
generateMapers (tmpDecoder, tmpEncoder) (macChar, utf8Char) = (
|
||||
Map.insert macChar utf8Char tmpDecoder
|
||||
, Map.insert utf8Char macChar tmpEncoder
|
||||
)
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
module PDF.Font (
|
||||
Font
|
||||
Decoder
|
||||
, Encoder
|
||||
, Font(..)
|
||||
, FontSet
|
||||
, emptyFont
|
||||
) where
|
||||
|
@ -9,8 +11,17 @@ import Data.Map (Map)
|
|||
import Data.Text (Text)
|
||||
import PDF.Object (Name)
|
||||
|
||||
type Font = ByteString -> Either String Text
|
||||
type Decoder = ByteString -> Either String Text
|
||||
type Encoder = Text -> Either String ByteString
|
||||
data Font = Font {
|
||||
decode :: Decoder
|
||||
, encode :: Encoder
|
||||
}
|
||||
|
||||
type FontSet = Map Name Font
|
||||
|
||||
emptyFont :: Font
|
||||
emptyFont _ = Left "No fond loaded"
|
||||
emptyFont = Font {
|
||||
decode = \_ -> Left "No fond loaded"
|
||||
, encode = \_ -> Left "No fond loaded"
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue