Implement MacRomanEncoding for real following their own vendor file https://www.unicode.org/Public/MAPPINGS/VENDORS/APPLE/ROMAN.TXT

This commit is contained in:
Tissevert 2020-02-07 10:59:28 +01:00
parent fe055150a3
commit 95f9ab35b1
1 changed files with 134 additions and 1 deletions

View File

@ -2,7 +2,140 @@ module PDF.Encoding.MacRoman (
macRomanEncoding
) where
import Data.ByteString.Char8 (unpack)
import Data.Text (pack)
import PDF.Font (Font)
macRomanEncoding :: Font
macRomanEncoding = Right
macRomanEncoding = Right . pack . fmap decode . unpack
decode :: Char -> Char
decode '\x80' = '\x00C4' -- LATIN CAPITAL LETTER A WITH DIAERESIS
decode '\x81' = '\x00C5' -- LATIN CAPITAL LETTER A WITH RING ABOVE
decode '\x82' = '\x00C7' -- LATIN CAPITAL LETTER C WITH CEDILLA
decode '\x83' = '\x00C9' -- LATIN CAPITAL LETTER E WITH ACUTE
decode '\x84' = '\x00D1' -- LATIN CAPITAL LETTER N WITH TILDE
decode '\x85' = '\x00D6' -- LATIN CAPITAL LETTER O WITH DIAERESIS
decode '\x86' = '\x00DC' -- LATIN CAPITAL LETTER U WITH DIAERESIS
decode '\x87' = '\x00E1' -- LATIN SMALL LETTER A WITH ACUTE
decode '\x88' = '\x00E0' -- LATIN SMALL LETTER A WITH GRAVE
decode '\x89' = '\x00E2' -- LATIN SMALL LETTER A WITH CIRCUMFLEX
decode '\x8A' = '\x00E4' -- LATIN SMALL LETTER A WITH DIAERESIS
decode '\x8B' = '\x00E3' -- LATIN SMALL LETTER A WITH TILDE
decode '\x8C' = '\x00E5' -- LATIN SMALL LETTER A WITH RING ABOVE
decode '\x8D' = '\x00E7' -- LATIN SMALL LETTER C WITH CEDILLA
decode '\x8E' = '\x00E9' -- LATIN SMALL LETTER E WITH ACUTE
decode '\x8F' = '\x00E8' -- LATIN SMALL LETTER E WITH GRAVE
decode '\x90' = '\x00EA' -- LATIN SMALL LETTER E WITH CIRCUMFLEX
decode '\x91' = '\x00EB' -- LATIN SMALL LETTER E WITH DIAERESIS
decode '\x92' = '\x00ED' -- LATIN SMALL LETTER I WITH ACUTE
decode '\x93' = '\x00EC' -- LATIN SMALL LETTER I WITH GRAVE
decode '\x94' = '\x00EE' -- LATIN SMALL LETTER I WITH CIRCUMFLEX
decode '\x95' = '\x00EF' -- LATIN SMALL LETTER I WITH DIAERESIS
decode '\x96' = '\x00F1' -- LATIN SMALL LETTER N WITH TILDE
decode '\x97' = '\x00F3' -- LATIN SMALL LETTER O WITH ACUTE
decode '\x98' = '\x00F2' -- LATIN SMALL LETTER O WITH GRAVE
decode '\x99' = '\x00F4' -- LATIN SMALL LETTER O WITH CIRCUMFLEX
decode '\x9A' = '\x00F6' -- LATIN SMALL LETTER O WITH DIAERESIS
decode '\x9B' = '\x00F5' -- LATIN SMALL LETTER O WITH TILDE
decode '\x9C' = '\x00FA' -- LATIN SMALL LETTER U WITH ACUTE
decode '\x9D' = '\x00F9' -- LATIN SMALL LETTER U WITH GRAVE
decode '\x9E' = '\x00FB' -- LATIN SMALL LETTER U WITH CIRCUMFLEX
decode '\x9F' = '\x00FC' -- LATIN SMALL LETTER U WITH DIAERESIS
decode '\xA0' = '\x2020' -- DAGGER
decode '\xA1' = '\x00B0' -- DEGREE SIGN
decode '\xA2' = '\x00A2' -- CENT SIGN
decode '\xA3' = '\x00A3' -- POUND SIGN
decode '\xA4' = '\x00A7' -- SECTION SIGN
decode '\xA5' = '\x2022' -- BULLET
decode '\xA6' = '\x00B6' -- PILCROW SIGN
decode '\xA7' = '\x00DF' -- LATIN SMALL LETTER SHARP S
decode '\xA8' = '\x00AE' -- REGISTERED SIGN
decode '\xA9' = '\x00A9' -- COPYRIGHT SIGN
decode '\xAA' = '\x2122' -- TRADE MARK SIGN
decode '\xAB' = '\x00B4' -- ACUTE ACCENT
decode '\xAC' = '\x00A8' -- DIAERESIS
decode '\xAD' = '\x2260' -- NOT EQUAL TO
decode '\xAE' = '\x00C6' -- LATIN CAPITAL LETTER AE
decode '\xAF' = '\x00D8' -- LATIN CAPITAL LETTER O WITH STROKE
decode '\xB0' = '\x221E' -- INFINITY
decode '\xB1' = '\x00B1' -- PLUS-MINUS SIGN
decode '\xB2' = '\x2264' -- LESS-THAN OR EQUAL TO
decode '\xB3' = '\x2265' -- GREATER-THAN OR EQUAL TO
decode '\xB4' = '\x00A5' -- YEN SIGN
decode '\xB5' = '\x00B5' -- MICRO SIGN
decode '\xB6' = '\x2202' -- PARTIAL DIFFERENTIAL
decode '\xB7' = '\x2211' -- N-ARY SUMMATION
decode '\xB8' = '\x220F' -- N-ARY PRODUCT
decode '\xB9' = '\x03C0' -- GREEK SMALL LETTER PI
decode '\xBA' = '\x222B' -- INTEGRAL
decode '\xBB' = '\x00AA' -- FEMININE ORDINAL INDICATOR
decode '\xBC' = '\x00BA' -- MASCULINE ORDINAL INDICATOR
decode '\xBD' = '\x03A9' -- GREEK CAPITAL LETTER OMEGA
decode '\xBE' = '\x00E6' -- LATIN SMALL LETTER AE
decode '\xBF' = '\x00F8' -- LATIN SMALL LETTER O WITH STROKE
decode '\xC0' = '\x00BF' -- INVERTED QUESTION MARK
decode '\xC1' = '\x00A1' -- INVERTED EXCLAMATION MARK
decode '\xC2' = '\x00AC' -- NOT SIGN
decode '\xC3' = '\x221A' -- SQUARE ROOT
decode '\xC4' = '\x0192' -- LATIN SMALL LETTER F WITH HOOK
decode '\xC5' = '\x2248' -- ALMOST EQUAL TO
decode '\xC6' = '\x2206' -- INCREMENT
decode '\xC7' = '\x00AB' -- LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
decode '\xC8' = '\x00BB' -- RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
decode '\xC9' = '\x2026' -- HORIZONTAL ELLIPSIS
decode '\xCA' = '\x00A0' -- NO-BREAK SPACE
decode '\xCB' = '\x00C0' -- LATIN CAPITAL LETTER A WITH GRAVE
decode '\xCC' = '\x00C3' -- LATIN CAPITAL LETTER A WITH TILDE
decode '\xCD' = '\x00D5' -- LATIN CAPITAL LETTER O WITH TILDE
decode '\xCE' = '\x0152' -- LATIN CAPITAL LIGATURE OE
decode '\xCF' = '\x0153' -- LATIN SMALL LIGATURE OE
decode '\xD0' = '\x2013' -- EN DASH
decode '\xD1' = '\x2014' -- EM DASH
decode '\xD2' = '\x201C' -- LEFT DOUBLE QUOTATION MARK
decode '\xD3' = '\x201D' -- RIGHT DOUBLE QUOTATION MARK
decode '\xD4' = '\x2018' -- LEFT SINGLE QUOTATION MARK
decode '\xD5' = '\x2019' -- RIGHT SINGLE QUOTATION MARK
decode '\xD6' = '\x00F7' -- DIVISION SIGN
decode '\xD7' = '\x25CA' -- LOZENGE
decode '\xD8' = '\x00FF' -- LATIN SMALL LETTER Y WITH DIAERESIS
decode '\xD9' = '\x0178' -- LATIN CAPITAL LETTER Y WITH DIAERESIS
decode '\xDA' = '\x2044' -- FRACTION SLASH
decode '\xDB' = '\x20AC' -- EURO SIGN
decode '\xDC' = '\x2039' -- SINGLE LEFT-POINTING ANGLE QUOTATION MARK
decode '\xDD' = '\x203A' -- SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
decode '\xDE' = '\xFB01' -- LATIN SMALL LIGATURE FI
decode '\xDF' = '\xFB02' -- LATIN SMALL LIGATURE FL
decode '\xE0' = '\x2021' -- DOUBLE DAGGER
decode '\xE1' = '\x00B7' -- MIDDLE DOT
decode '\xE2' = '\x201A' -- SINGLE LOW-9 QUOTATION MARK
decode '\xE3' = '\x201E' -- DOUBLE LOW-9 QUOTATION MARK
decode '\xE4' = '\x2030' -- PER MILLE SIGN
decode '\xE5' = '\x00C2' -- LATIN CAPITAL LETTER A WITH CIRCUMFLEX
decode '\xE6' = '\x00CA' -- LATIN CAPITAL LETTER E WITH CIRCUMFLEX
decode '\xE7' = '\x00C1' -- LATIN CAPITAL LETTER A WITH ACUTE
decode '\xE8' = '\x00CB' -- LATIN CAPITAL LETTER E WITH DIAERESIS
decode '\xE9' = '\x00C8' -- LATIN CAPITAL LETTER E WITH GRAVE
decode '\xEA' = '\x00CD' -- LATIN CAPITAL LETTER I WITH ACUTE
decode '\xEB' = '\x00CE' -- LATIN CAPITAL LETTER I WITH CIRCUMFLEX
decode '\xEC' = '\x00CF' -- LATIN CAPITAL LETTER I WITH DIAERESIS
decode '\xED' = '\x00CC' -- LATIN CAPITAL LETTER I WITH GRAVE
decode '\xEE' = '\x00D3' -- LATIN CAPITAL LETTER O WITH ACUTE
decode '\xEF' = '\x00D4' -- LATIN CAPITAL LETTER O WITH CIRCUMFLEX
decode '\xF0' = '\xF8FF' -- Apple logo
decode '\xF1' = '\x00D2' -- LATIN CAPITAL LETTER O WITH GRAVE
decode '\xF2' = '\x00DA' -- LATIN CAPITAL LETTER U WITH ACUTE
decode '\xF3' = '\x00DB' -- LATIN CAPITAL LETTER U WITH CIRCUMFLEX
decode '\xF4' = '\x00D9' -- LATIN CAPITAL LETTER U WITH GRAVE
decode '\xF5' = '\x0131' -- LATIN SMALL LETTER DOTLESS I
decode '\xF6' = '\x02C6' -- MODIFIER LETTER CIRCUMFLEX ACCENT
decode '\xF7' = '\x02DC' -- SMALL TILDE
decode '\xF8' = '\x00AF' -- MACRON
decode '\xF9' = '\x02D8' -- BREVE
decode '\xFA' = '\x02D9' -- DOT ABOVE
decode '\xFB' = '\x02DA' -- RING ABOVE
decode '\xFC' = '\x00B8' -- CEDILLA
decode '\xFD' = '\x02DD' -- DOUBLE ACUTE ACCENT
decode '\xFE' = '\x02DB' -- OGONEK
decode '\xFF' = '\x02C7' -- CARON
decode c = c -- The rest is ASCII