Removed module Text.Pandoc.CharacterReferences.

Moved characterReference parser to Text.Pandoc.Parsing.
decodeCharacterReferences is now replaced by fromEntities
in Text.Pandoc.XML.
This commit is contained in:
John MacFarlane 2012-02-05 22:52:00 -08:00
parent 9bf6e665ea
commit e2c157f86f
6 changed files with 21 additions and 80 deletions

View file

@ -418,6 +418,11 @@ pandoc (1.9)
* Removed `Text.Pandoc.S5`, which is no longer needed.
* Removed `Text.Pandoc.CharacterReferences`. Moved
`characterReference` to `Text.Pandoc.Parsing`.
`decodeCharacterReferences` is replaced by `fromEntities`
in `Text.Pandoc.XML`.
* Added `Text.Pandoc.ImageSize`. This is intened for use
in `docx` and `odt` writers, so the size and dpi of images
can be calculated.

View file

@ -233,7 +233,6 @@ Library
Exposed-Modules: Text.Pandoc,
Text.Pandoc.Pretty,
Text.Pandoc.CharacterReferences,
Text.Pandoc.Shared,
Text.Pandoc.Parsing,
Text.Pandoc.Highlighting,

View file

@ -1,72 +0,0 @@
{-
Copyright (C) 2006-2010 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.CharacterReferences
Copyright : Copyright (C) 2006-2010 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Functions for parsing character references.
-}
module Text.Pandoc.CharacterReferences (
characterReference,
decodeCharacterReferences,
) where
import Text.ParserCombinators.Parsec
import Text.HTML.TagSoup.Entity ( lookupNamedEntity, lookupNumericEntity )
import Data.Maybe ( fromMaybe )
-- | Parse character entity.
characterReference :: GenParser Char st Char
characterReference = try $ do
char '&'
character <- numRef <|> entity
char ';'
return character
numRef :: GenParser Char st Char
numRef = do
char '#'
num <- hexNum <|> decNum
return $ fromMaybe '?' $ lookupNumericEntity num
hexNum :: GenParser Char st [Char]
hexNum = do
x <- oneOf "Xx"
num <- many1 hexDigit
return (x:num)
decNum :: GenParser Char st [Char]
decNum = many1 digit
entity :: GenParser Char st Char
entity = do
body <- many1 alphaNum
return $ fromMaybe '?' $ lookupNamedEntity body
-- | Convert entities in a string to characters.
decodeCharacterReferences :: String -> String
decodeCharacterReferences str =
case parse (many (characterReference <|> anyChar)) str str of
Left err -> error $ "\nError: " ++ show err
Right result -> result

View file

@ -51,6 +51,7 @@ module Text.Pandoc.Parsing ( (>>~),
failIfStrict,
failUnlessLHS,
escaped,
characterReference,
anyOrderedListMarker,
orderedListMarker,
charRef,
@ -78,7 +79,6 @@ import Text.Pandoc.Definition
import Text.Pandoc.Generic
import qualified Text.Pandoc.UTF8 as UTF8 (putStrLn)
import Text.ParserCombinators.Parsec
import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isAscii, isAlphaNum, isDigit, isPunctuation )
import Data.List ( intercalate, transpose )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
@ -86,6 +86,7 @@ import Control.Monad ( join, liftM, guard )
import Text.Pandoc.Shared
import qualified Data.Map as M
import Text.TeXMath.Macros (applyMacros, Macro, parseMacroDefinitions)
import Text.HTML.TagSoup.Entity ( lookupEntity )
-- | Like >>, but returns the operation on the left.
-- (Suggested by Tillmann Rendel on Haskell-cafe list.)
@ -337,6 +338,15 @@ escaped :: GenParser Char st Char -- ^ Parser for character to escape
-> GenParser Char st Char
escaped parser = try $ char '\\' >> parser
-- | Parse character entity.
characterReference :: GenParser Char st Char
characterReference = try $ do
char '&'
ent <- manyTill nonspaceChar (char ';')
case lookupEntity ent of
Just c -> return c
Nothing -> return '?'
-- | Parses an uppercase roman numeral and returns (UpperRoman, number).
upperRoman :: GenParser Char st (ListNumberStyle, Int)
upperRoman = do

View file

@ -42,7 +42,7 @@ import Text.Pandoc.Parsing
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
import Text.Pandoc.Readers.HTML ( htmlTag, htmlInBalanced, isInlineTag, isBlockTag,
isTextTag, isCommentTag )
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.Pandoc.XML ( fromEntities )
import Text.ParserCombinators.Parsec
import Control.Monad (when, liftM, guard, mzero)
import Text.HTML.TagSoup
@ -244,7 +244,7 @@ referenceTitle = try $ do
<|> do delim <- char '\'' <|> char '"'
manyTill litChar (try (char delim >> skipSpaces >>
notFollowedBy (noneOf ")\n")))
return $ decodeCharacterReferences tit
return $ fromEntities tit
noteMarker :: GenParser Char ParserState [Char]
noteMarker = string "[^" >> many1Till (satisfy $ not . isBlank) (char ']')
@ -1176,7 +1176,7 @@ linkTitle = try $ do
skipSpaces
delim <- oneOf "'\""
tit <- manyTill litChar (try (char delim >> skipSpaces >> eof))
return $ decodeCharacterReferences tit
return $ fromEntities tit
link :: GenParser Char ParserState Inline
link = try $ do

View file

@ -31,7 +31,6 @@ Conversion of 'Pandoc' documents to HTML.
-}
module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where
import Text.Pandoc.Definition
import Text.Pandoc.CharacterReferences ( decodeCharacterReferences )
import Text.Pandoc.Shared
import Text.Pandoc.Templates
import Text.Pandoc.Generic
@ -39,7 +38,7 @@ import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Slides
import Text.Pandoc.Highlighting ( highlight, styleToCss,
formatHtmlInline, formatHtmlBlock )
import Text.Pandoc.XML (stripTags, escapeStringForXML)
import Text.Pandoc.XML (stripTags, escapeStringForXML, fromEntities)
import Network.HTTP ( urlEncode )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
@ -344,7 +343,7 @@ obfuscateChar char =
-- | Obfuscate string using entities.
obfuscateString :: String -> String
obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
obfuscateString = concatMap obfuscateChar . fromEntities
attrsToHtml :: WriterOptions -> Attr -> [Attribute]
attrsToHtml opts (id',classes',keyvals) =