Correctly implement capitalisation.

Using `map toUpper` to capitalise text is wrong, as e.g.
“Straße” should be converted to “STRASSE”, which is 1 character
longer. This commit adds a `capitalize` function and replaces
2 identical implementations in different modules (`toCaps` and
`capitalize`) with it.
This commit is contained in:
Artyom Kazak 2014-08-03 16:48:55 +04:00
parent 842c705097
commit ec88d47f23
5 changed files with 42 additions and 15 deletions

View file

@ -59,6 +59,7 @@ module Text.Pandoc.Shared (
normalizeBlocks,
removeFormatting,
stringify,
capitalize,
compactify,
compactify',
compactify'DL,
@ -122,6 +123,7 @@ import qualified Data.ByteString.Char8 as B8
import Text.Pandoc.Compat.Monoid
import Data.ByteString.Base64 (decodeLenient)
import Data.Sequence (ViewR(..), ViewL(..), viewl, viewr)
import qualified Data.Text as T (toUpper, pack, unpack)
#ifdef EMBED_DATA_FILES
import Text.Pandoc.Data (dataFiles)
@ -527,6 +529,17 @@ stringify = query go . walk deNote
deNote (Note _) = Str ""
deNote x = x
-- | Bring all regular text in a pandoc structure to uppercase.
--
-- This function correctly handles cases where a lowercase character doesn't
-- match to a single uppercase character e.g. “Straße” would be converted
-- to “STRASSE”, not “STRAßE”.
capitalize :: Walkable Inline a => a -> a
capitalize = walk go
where go :: Inline -> Inline
go (Str s) = Str (T.unpack $ T.toUpper $ T.pack s)
go x = x
-- | Change final list item from @Para@ to @Plain@ if the list contains
-- no other @Para@ blocks.
compactify :: [[Block]] -- ^ List of list items (each a list of blocks)

View file

@ -28,7 +28,7 @@ module Text.Pandoc.Writers.FB2 (writeFB2) where
import Control.Monad.State (StateT, evalStateT, get, modify)
import Control.Monad.State (liftM, liftM2, liftIO)
import Data.ByteString.Base64 (encode)
import Data.Char (toUpper, toLower, isSpace, isAscii, isControl)
import Data.Char (toLower, isSpace, isAscii, isControl)
import Data.List (intersperse, intercalate, isPrefixOf)
import Data.Either (lefts, rights)
import Network.Browser (browse, request, setAllowRedirects, setOutHandler)
@ -44,8 +44,7 @@ import qualified Text.XML.Light.Cursor as XC
import Text.Pandoc.Definition
import Text.Pandoc.Options (WriterOptions(..), HTMLMathMethod(..), def)
import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock)
import Text.Pandoc.Walk
import Text.Pandoc.Shared (orderedListMarkers, isHeaderBlock, capitalize)
-- | Data to be written at the end of the document:
-- (foot)notes, URLs, references, images.
@ -421,10 +420,6 @@ indent = indentBlock
indentLines ins = let lns = split isLineBreak ins :: [[Inline]]
in intercalate [LineBreak] $ map ((Str spacer):) lns
capitalize :: Inline -> Inline
capitalize (Str xs) = Str $ map toUpper xs
capitalize x = x
-- | Convert a Pandoc's Inline element to FictionBook XML representation.
toXml :: Inline -> FBM [Content]
toXml (Str s) = return [txt s]
@ -434,7 +429,7 @@ toXml (Strong ss) = list `liftM` wrap "strong" ss
toXml (Strikeout ss) = list `liftM` wrap "strikethrough" ss
toXml (Superscript ss) = list `liftM` wrap "sup" ss
toXml (Subscript ss) = list `liftM` wrap "sub" ss
toXml (SmallCaps ss) = cMapM toXml $ walk capitalize ss
toXml (SmallCaps ss) = cMapM toXml $ capitalize ss
toXml (Quoted SingleQuote ss) = do -- FIXME: should be language-specific
inner <- cMapM toXml ss
return $ [txt ""] ++ inner ++ [txt ""]

View file

@ -39,7 +39,7 @@ import Text.Pandoc.Writers.Shared
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (blankline, blanklines, char, space)
import Data.List ( group, isPrefixOf, find, intersperse, transpose, sortBy )
import Data.Char ( isSpace, isPunctuation, toUpper )
import Data.Char ( isSpace, isPunctuation )
import Data.Ord ( comparing )
import Text.Pandoc.Pretty
import Control.Monad.State
@ -672,10 +672,6 @@ escapeSpaces (Str s) = Str $ substitute " " "\\ " s
escapeSpaces Space = Str "\\ "
escapeSpaces x = x
toCaps :: Inline -> Inline
toCaps (Str s) = Str (map toUpper s)
toCaps x = x
-- | Convert Pandoc inline element to markdown.
inlineToMarkdown :: WriterOptions -> Inline -> State WriterState Doc
inlineToMarkdown opts (Span attrs ils) = do
@ -693,7 +689,7 @@ inlineToMarkdown opts (Emph lst) = do
inlineToMarkdown opts (Strong lst) = do
plain <- gets stPlain
if plain
then inlineListToMarkdown opts $ walk toCaps lst
then inlineListToMarkdown opts $ capitalize lst
else do
contents <- inlineListToMarkdown opts lst
return $ "**" <> contents <> "**"
@ -716,7 +712,7 @@ inlineToMarkdown opts (Subscript lst) = do
inlineToMarkdown opts (SmallCaps lst) = do
plain <- gets stPlain
if plain
then inlineListToMarkdown opts $ walk toCaps lst
then inlineListToMarkdown opts $ capitalize lst
else do
contents <- inlineListToMarkdown opts lst
return $ tagWithAttrs "span"

View file

@ -0,0 +1,21 @@
{-# LANGUAGE OverloadedStrings #-}
module Tests.Writers.Plain (tests) where
import Test.Framework
import Text.Pandoc.Builder
import Text.Pandoc
import Tests.Helpers
import Tests.Arbitrary()
infix 4 =:
(=:) :: (ToString a, ToPandoc a)
=> String -> (a, String) -> Test
(=:) = test (writePlain def . toPandoc)
tests :: [Test]
tests = [ "strongly emphasized text to uppercase"
=: strong "Straße"
=?> "STRASSE"
]

View file

@ -17,6 +17,7 @@ import qualified Tests.Writers.HTML
import qualified Tests.Writers.Docbook
import qualified Tests.Writers.Native
import qualified Tests.Writers.Markdown
import qualified Tests.Writers.Plain
import qualified Tests.Writers.AsciiDoc
import qualified Tests.Shared
import qualified Tests.Walk
@ -33,6 +34,7 @@ tests = [ testGroup "Old" Tests.Old.tests
, testGroup "HTML" Tests.Writers.HTML.tests
, testGroup "Docbook" Tests.Writers.Docbook.tests
, testGroup "Markdown" Tests.Writers.Markdown.tests
, testGroup "Plain" Tests.Writers.Plain.tests
, testGroup "AsciiDoc" Tests.Writers.AsciiDoc.tests
]
, testGroup "Readers"