Removed --normalize option and normalization functions from Shared.

* Removed normalize, normalizeInlines, normalizeBlocks
  from Text.Pandoc.Shared.  These shouldn't now be necessary,
  since normalization is handled automatically by the Builder
  monoid instance.

* Remove `--normalize` command-line option.

* Don't use normalize in tests.

* A few revisions to readers so they work well without normalize.
This commit is contained in:
John MacFarlane 2016-12-11 22:09:33 +01:00
parent 08110c3714
commit 8165014df6
13 changed files with 30 additions and 214 deletions

View file

@ -478,11 +478,6 @@ Reader options
underlying document (which is accessible from filters and may be
printed in some output formats).
`--normalize`
: Normalize the document after reading: merge adjacent
`Str` or `Emph` elements, for example, and remove repeated `Space`s.
`-p`, `--preserve-tabs`
: Preserve tabs instead of converting them to spaces (the default).

View file

@ -35,7 +35,7 @@ import Text.Pandoc.Builder (setMeta)
import Text.Pandoc.PDF (makePDF)
import Text.Pandoc.Walk (walk)
import Text.Pandoc.Shared ( tabFilter, readDataFileUTF8, readDataFile,
safeRead, headerShift, normalize, err, warn,
safeRead, headerShift, err, warn,
openURL )
import Text.Pandoc.MediaBag ( mediaDirectory, extractMediaBag, MediaBag )
import Text.Pandoc.XML ( toEntities )
@ -731,12 +731,6 @@ options =
"PROGRAM")
"" -- "External JSON filter"
, Option "" ["normalize"]
(NoArg
(\opt -> return opt { optTransforms =
normalize : optTransforms opt } ))
"" -- "Normalize the Pandoc AST"
, Option "p" ["preserve-tabs"]
(NoArg
(\opt -> return opt { optPreserveTabs = True }))

View file

@ -744,7 +744,7 @@ addNewRole roleString fields = do
M.insert role (baseRole, fmt, attr) customRoles
}
return $ B.singleton Null
return mempty
where
countKeys k = length . filter (== k) . map fst $ fields
inheritedRole =

View file

@ -447,9 +447,13 @@ inlineMarkup p f c special = try $ do
lastChar <- anyChar
end <- many1 (char c)
let parser inp = parseFromString (mconcat <$> many p) inp
let start' = special (drop 2 start)
let start' = case drop 2 start of
"" -> mempty
xs -> special xs
body' <- parser (middle ++ [lastChar])
let end' = special (drop 2 end)
let end' = case drop 2 end of
"" -> mempty
xs -> special xs
return $ f (start' <> body' <> end')
Nothing -> do -- Either bad or case such as *****
guard (l >= 5)

View file

@ -55,9 +55,6 @@ module Text.Pandoc.Shared (
orderedListMarkers,
normalizeSpaces,
extractSpaces,
normalize,
normalizeInlines,
normalizeBlocks,
removeFormatting,
stringify,
capitalize,
@ -398,153 +395,6 @@ extractSpaces f is =
_ -> mempty in
(left <> f (B.trimInlines . B.Many $ contents) <> right)
-- | Normalize @Pandoc@ document, consolidating doubled 'Space's,
-- combining adjacent 'Str's and 'Emph's, remove 'Null's and
-- empty elements, etc.
normalize :: Pandoc -> Pandoc
normalize (Pandoc (Meta meta) blocks) =
Pandoc (Meta $ M.map go meta) (normalizeBlocks blocks)
where go (MetaInlines xs) = MetaInlines $ normalizeInlines xs
go (MetaBlocks xs) = MetaBlocks $ normalizeBlocks xs
go (MetaList ms) = MetaList $ map go ms
go (MetaMap m) = MetaMap $ M.map go m
go x = x
normalizeBlocks :: [Block] -> [Block]
normalizeBlocks (Null : xs) = normalizeBlocks xs
normalizeBlocks (Div attr bs : xs) =
Div attr (normalizeBlocks bs) : normalizeBlocks xs
normalizeBlocks (BlockQuote bs : xs) =
case normalizeBlocks bs of
[] -> normalizeBlocks xs
bs' -> BlockQuote bs' : normalizeBlocks xs
normalizeBlocks (BulletList [] : xs) = normalizeBlocks xs
normalizeBlocks (BulletList items : xs) =
BulletList (map normalizeBlocks items) : normalizeBlocks xs
normalizeBlocks (OrderedList _ [] : xs) = normalizeBlocks xs
normalizeBlocks (OrderedList attr items : xs) =
OrderedList attr (map normalizeBlocks items) : normalizeBlocks xs
normalizeBlocks (DefinitionList [] : xs) = normalizeBlocks xs
normalizeBlocks (DefinitionList items : xs) =
DefinitionList (map go items) : normalizeBlocks xs
where go (ils, bs) = (normalizeInlines ils, map normalizeBlocks bs)
normalizeBlocks (RawBlock _ "" : xs) = normalizeBlocks xs
normalizeBlocks (RawBlock f x : xs) =
case normalizeBlocks xs of
(RawBlock f' x' : rest) | f' == f ->
RawBlock f (x ++ ('\n':x')) : rest
rest -> RawBlock f x : rest
normalizeBlocks (Para ils : xs) =
case normalizeInlines ils of
[] -> normalizeBlocks xs
ils' -> Para ils' : normalizeBlocks xs
normalizeBlocks (Plain ils : xs) =
case normalizeInlines ils of
[] -> normalizeBlocks xs
ils' -> Plain ils' : normalizeBlocks xs
normalizeBlocks (Header lev attr ils : xs) =
Header lev attr (normalizeInlines ils) : normalizeBlocks xs
normalizeBlocks (Table capt aligns widths hdrs rows : xs) =
Table (normalizeInlines capt) aligns widths
(map normalizeBlocks hdrs) (map (map normalizeBlocks) rows)
: normalizeBlocks xs
normalizeBlocks (x:xs) = x : normalizeBlocks xs
normalizeBlocks [] = []
normalizeInlines :: [Inline] -> [Inline]
normalizeInlines (Str x : ys) =
case concat (x : map fromStr strs) of
"" -> rest
n -> Str n : rest
where
(strs, rest) = span isStr $ normalizeInlines ys
isStr (Str _) = True
isStr _ = False
fromStr (Str z) = z
fromStr _ = error "normalizeInlines - fromStr - not a Str"
normalizeInlines (Space : SoftBreak : ys) =
SoftBreak : normalizeInlines ys
normalizeInlines (Space : ys) =
if null rest
then []
else Space : rest
where isSp Space = True
isSp _ = False
rest = dropWhile isSp $ normalizeInlines ys
normalizeInlines (Emph xs : zs) =
case normalizeInlines zs of
(Emph ys : rest) -> normalizeInlines $
Emph (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> Emph xs' : rest
normalizeInlines (Strong xs : zs) =
case normalizeInlines zs of
(Strong ys : rest) -> normalizeInlines $
Strong (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> Strong xs' : rest
normalizeInlines (Subscript xs : zs) =
case normalizeInlines zs of
(Subscript ys : rest) -> normalizeInlines $
Subscript (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> Subscript xs' : rest
normalizeInlines (Superscript xs : zs) =
case normalizeInlines zs of
(Superscript ys : rest) -> normalizeInlines $
Superscript (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> Superscript xs' : rest
normalizeInlines (SmallCaps xs : zs) =
case normalizeInlines zs of
(SmallCaps ys : rest) -> normalizeInlines $
SmallCaps (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> SmallCaps xs' : rest
normalizeInlines (Strikeout xs : zs) =
case normalizeInlines zs of
(Strikeout ys : rest) -> normalizeInlines $
Strikeout (normalizeInlines $ xs ++ ys) : rest
rest -> case normalizeInlines xs of
[] -> rest
xs' -> Strikeout xs' : rest
normalizeInlines (RawInline _ [] : ys) = normalizeInlines ys
normalizeInlines (RawInline f xs : zs) =
case normalizeInlines zs of
(RawInline f' ys : rest) | f == f' -> normalizeInlines $
RawInline f (xs ++ ys) : rest
rest -> RawInline f xs : rest
normalizeInlines (Code _ "" : ys) = normalizeInlines ys
normalizeInlines (Code attr xs : zs) =
case normalizeInlines zs of
(Code attr' ys : rest) | attr == attr' -> normalizeInlines $
Code attr (xs ++ ys) : rest
rest -> Code attr xs : rest
-- allow empty spans, they may carry identifiers etc.
-- normalizeInlines (Span _ [] : ys) = normalizeInlines ys
normalizeInlines (Span attr xs : zs) =
case normalizeInlines zs of
(Span attr' ys : rest) | attr == attr' -> normalizeInlines $
Span attr (normalizeInlines $ xs ++ ys) : rest
rest -> Span attr (normalizeInlines xs) : rest
normalizeInlines (Note bs : ys) = Note (normalizeBlocks bs) :
normalizeInlines ys
normalizeInlines (Quoted qt ils : ys) =
Quoted qt (normalizeInlines ils) : normalizeInlines ys
normalizeInlines (Link attr ils t : ys) =
Link attr (normalizeInlines ils) t : normalizeInlines ys
normalizeInlines (Image attr ils t : ys) =
Image attr (normalizeInlines ils) t : normalizeInlines ys
normalizeInlines (Cite cs ils : ys) =
Cite cs (normalizeInlines ils) : normalizeInlines ys
normalizeInlines (x : xs) = x : normalizeInlines xs
normalizeInlines [] = []
-- | Extract inlines, removing formatting.
removeFormatting :: Walkable Inline a => a -> [Inline]
removeFormatting = query go . walk deNote

View file

@ -141,7 +141,7 @@ defaultWriterState = WriterState{
, stDelId = 1
, stStyleMaps = defaultStyleMaps
, stFirstPara = False
, stTocTitle = normalizeInlines [Str "Table of Contents"]
, stTocTitle = [Str "Table of Contents"]
, stDynamicParaProps = []
, stDynamicTextProps = []
}
@ -207,7 +207,7 @@ isValidChar (ord -> c)
| otherwise = False
metaValueToInlines :: MetaValue -> [Inline]
metaValueToInlines (MetaString s) = normalizeInlines [Str s]
metaValueToInlines (MetaString s) = [Str s]
metaValueToInlines (MetaInlines ils) = ils
metaValueToInlines (MetaBlocks bs) = query return bs
metaValueToInlines (MetaBool b) = [Str $ show b]

View file

@ -45,7 +45,7 @@ import Text.Pandoc.Options ( WriterOptions(
, writerTemplate
, writerWrapText), WrapOption(..) )
import Text.Pandoc.Shared ( escapeURI, linesToPara, removeFormatting
, camelCaseToHyphenated, trimr, normalize, substitute )
, camelCaseToHyphenated, trimr, substitute )
import Text.Pandoc.Writers.Shared ( defField, metaToJSON )
import Text.Pandoc.ImageSize
import Text.Pandoc.Templates ( renderTemplate' )
@ -80,7 +80,7 @@ type DokuWiki = ReaderT WriterEnvironment (State WriterState)
-- | Convert Pandoc to DokuWiki.
writeDokuWiki :: PandocMonad m => WriterOptions -> Pandoc -> m String
writeDokuWiki opts document = return $
runDokuWiki (pandocToDokuWiki opts $ normalize document)
runDokuWiki (pandocToDokuWiki opts document)
runDokuWiki :: DokuWiki a -> a
runDokuWiki = flip evalState def . flip runReaderT def
@ -394,9 +394,16 @@ blockListToDokuWiki :: WriterOptions -- ^ Options
-> DokuWiki String
blockListToDokuWiki opts blocks = do
backSlash <- stBackSlashLB <$> ask
let blocks' = consolidateRawBlocks blocks
if backSlash
then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks
else vcat <$> mapM (blockToDokuWiki opts) blocks
then (backSlashLineBreaks . vcat) <$> mapM (blockToDokuWiki opts) blocks'
else vcat <$> mapM (blockToDokuWiki opts) blocks'
consolidateRawBlocks :: [Block] -> [Block]
consolidateRawBlocks [] = []
consolidateRawBlocks (RawBlock f1 b1 : RawBlock f2 b2 : xs)
| f1 == f2 = consolidateRawBlocks (RawBlock f1 (b1 ++ "\n" ++ b2) : xs)
consolidateRawBlocks (x:xs) = x : consolidateRawBlocks xs
-- | Convert list of Pandoc inline elements to DokuWiki.
inlineListToDokuWiki :: WriterOptions -> [Inline] -> DokuWiki String

View file

@ -17,7 +17,7 @@ import Test.Framework
import Test.Framework.Providers.HUnit
import Test.Framework.Providers.QuickCheck2
import Test.HUnit (assertBool)
import Text.Pandoc.Shared (normalize, trimr)
import Text.Pandoc.Shared (trimr)
import Text.Pandoc.Options
import Text.Pandoc.Writers.Native (writeNative)
import qualified Test.QuickCheck.Property as QP
@ -81,10 +81,10 @@ class ToPandoc a where
toPandoc :: a -> Pandoc
instance ToPandoc Pandoc where
toPandoc = normalize
toPandoc = id
instance ToPandoc Blocks where
toPandoc = normalize . doc
toPandoc = doc
instance ToPandoc Inlines where
toPandoc = normalize . doc . plain
toPandoc = doc . plain

View file

@ -11,15 +11,10 @@ import System.FilePath ( (</>), (<.>), takeDirectory, splitDirectories,
import System.Directory
import System.Exit
import Data.Algorithm.Diff
import Text.Pandoc.Shared ( normalize )
import Text.Pandoc.Options
import Text.Pandoc.Writers.Native ( writeNative )
import Text.Pandoc.Readers.Native ( readNative )
import Prelude hiding ( readFile )
import qualified Data.ByteString.Lazy as B
import Text.Pandoc.UTF8 (toStringLazy)
import Text.Printf
import Tests.Helpers (purely)
readFileUTF8 :: FilePath -> IO String
readFileUTF8 f = B.readFile f >>= return . toStringLazy
@ -193,12 +188,9 @@ lhsWriterTests format
lhsReaderTest :: String -> Test
lhsReaderTest format =
testWithNormalize normalizer "lhs" ["-r", format, "-w", "native"]
test "lhs" ["-r", format, "-w", "native"]
("lhs-test" <.> format) norm
where normalizer = purely $ \nat -> do
d <- readNative def nat
writeNative def $ normalize d
norm = if format == "markdown+lhs"
where norm = if format == "markdown+lhs"
then "lhs-test-markdown.native"
else "lhs-test.native"

View file

@ -1525,7 +1525,7 @@ tests =
, ""
, "#+RESULTS:"
, ": 65" ] =?>
rawBlock "html" ""
(mempty :: Blocks)
, "Source block with toggling header arguments" =:
unlines [ "#+BEGIN_SRC sh :noeval"

View file

@ -88,7 +88,7 @@ tests =
para "1970-01-01"
, "Macros: Mod Time" =:
"%%mtime" =?>
para ""
para (str "")
, "Macros: Infile" =:
"%%infile" =?>
para "in"

View file

@ -1,9 +1,7 @@
module Tests.Shared (tests) where
import Text.Pandoc.Definition
import Text.Pandoc.Shared
import Test.Framework
import Tests.Helpers
import Text.Pandoc.Arbitrary()
import Test.Framework.Providers.HUnit
import Test.HUnit ( assertBool, (@?=) )
@ -11,13 +9,7 @@ import Text.Pandoc.Builder
import System.FilePath.Posix (joinPath)
tests :: [Test]
tests = [ testGroup "normalize"
[ property "p_normalize_blocks_rt" p_normalize_blocks_rt
, property "p_normalize_inlines_rt" p_normalize_inlines_rt
, property "p_normalize_no_trailing_spaces"
p_normalize_no_trailing_spaces
]
, testGroup "compactify'DL"
tests = [ testGroup "compactify'DL"
[ testCase "compactify'DL with empty def" $
assertBool "compactify'DL"
(let x = [(str "word", [para (str "def"), mempty])]
@ -26,18 +18,6 @@ tests = [ testGroup "normalize"
, testGroup "collapseFilePath" testCollapse
]
p_normalize_blocks_rt :: [Block] -> Bool
p_normalize_blocks_rt bs =
normalizeBlocks bs == normalizeBlocks (normalizeBlocks bs)
p_normalize_inlines_rt :: [Inline] -> Bool
p_normalize_inlines_rt ils =
normalizeInlines ils == normalizeInlines (normalizeInlines ils)
p_normalize_no_trailing_spaces :: [Inline] -> Bool
p_normalize_no_trailing_spaces ils = null ils' || last ils' /= Space
where ils' = normalizeInlines $ ils ++ [Space]
testCollapse :: [Test]
testCollapse = map (testCase "collapse")
[ (collapseFilePath (joinPath [ ""]) @?= (joinPath [ ""]))

View file

@ -327,15 +327,9 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
,Para [Str "Some",Space,Superscript [Str "of"],Space,Str "these",Space,Superscript [Str "words"],Space,Str "are",Space,Str "in",Space,Superscript [Str "superscript"],Str "."]
,Para [Str "Reset",Space,Str "default-role",Space,Str "to",Space,Str "the",Space,Str "default",Space,Str "default."]
,Para [Str "And",Space,Str "now",Space,Str "some-invalid-string-3231231",Space,Str "is",Space,Str "nonsense."]
,Null
,Para [Str "And",Space,Str "now",Space,Str "with",Space,RawInline (Format "html") "<b>inline</b> <span id=\"test\">HTML</span>",Str "."]
,Null
,Para [Str "And",Space,Str "some",Space,Str "inline",Space,Str "haskell",Space,Code ("",["haskell","sourceCode"],[]) "fmap id [1,2..10]",Str "."]
,Null
,Null
,Para [Str "Indirect",Space,Str "python",Space,Str "role",Space,Code ("",["py","python","indirect","sourceCode"],[]) "[x*x for x in [1,2,3,4,5]]",Str "."]
,Null
,Null
,Para [Str "Different",Space,Str "indirect",Space,Str "C",Space,Code ("",["c","different-indirect","sourceCode"],[]) "int x = 15;",Str "."]
,Header 2 ("literal-symbols",[],[]) [Str "Literal",Space,Str "symbols"]
,Para [Str "2*2",Space,Str "=",Space,Str "4*1"]]