Added native_divs and native_spans extensions.

This allows users to turn off the default pandoc behavior of parsing contents of div and span tags in markdown and HTML as native pandoc Div blocks and Span inlines. Setting of default epub extensions has been moved from the EPUB reader to Text.Pandoc.
2014-08-08 21:04:25 -07:00 · 2014-08-08 21:04:25 -07:00 · 19daf6cf0a
commit 19daf6cf0a
parent e1cc5479c0
6 changed files with 38 additions and 11 deletions
--- a/14
+++ b/14
@ -2238,6 +2238,20 @@ markdown with HTML block elements.  For example, one can surround
 a block of markdown text with `<div>` tags without preventing it
 from being interpreted as markdown.

+#### Extension: `native_divs` ####
+
+Use native pandoc `Div` blocks for content inside `<div>` tags.
+For the most part this should give the same output as
+`markdown_in_html_blocks`, but it makes it easier to write pandoc
+filters to manipulate groups of blocks.
+
+#### Extension: `native_spans` ####
+
+Use native pandoc `Span` blocks for content inside `<span>` tags.
+For the most part this should give the same output as `raw_html`,
+but it makes it easier to write pandoc filters to manipulate groups
+of inlines.
+
 Raw TeX
 -------

--- a/src/Text/Pandoc.hs
+++ b/src/Text/Pandoc.hs
@ -304,7 +304,17 @@ getDefaultExtensions "markdown_github" = githubMarkdownExtensions
 getDefaultExtensions "markdown"        = pandocExtensions
 getDefaultExtensions "plain"           = pandocExtensions
 getDefaultExtensions "org"             = Set.fromList [Ext_citations]
-getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers, Ext_raw_tex]
+getDefaultExtensions "textile"         = Set.fromList [Ext_auto_identifiers,
+                                                       Ext_raw_tex]
+getDefaultExtensions "html"            = Set.fromList [Ext_auto_identifiers,
+                                                       Ext_native_divs,
+                                                       Ext_native_spans]
+getDefaultExtensions "html5"           = getDefaultExtensions "html"
+getDefaultExtensions "epub"            = Set.fromList [Ext_auto_identifiers,
+                                                       Ext_raw_html,
+                                                       Ext_native_divs,
+                                                       Ext_native_spans,
+                                                       Ext_epub_html_exts]
 getDefaultExtensions _                 = Set.fromList [Ext_auto_identifiers]

 -- | Retrieve reader based on formatSpec (format+extensions).
--- a/src/Text/Pandoc/Options.hs
+++ b/src/Text/Pandoc/Options.hs
@ -77,6 +77,8 @@ data Extension =
    | Ext_backtick_code_blocks    -- ^ Github style ``` code blocks
    | Ext_inline_code_attributes  -- ^ Allow attributes on inline code
    | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
+    | Ext_native_divs             -- ^ Use Div blocks for contents of <div> tags
+    | Ext_native_spans            -- ^ Use Span inlines for contents of <span>
    | Ext_markdown_attribute      -- ^ Interpret text inside HTML as markdown
                                  --   iff container has attribute 'markdown'
    | Ext_escaped_line_breaks     -- ^ Treat a backslash at EOL as linebreak
@ -131,6 +133,8 @@ pandocExtensions = Set.fromList
  , Ext_backtick_code_blocks
  , Ext_inline_code_attributes
  , Ext_markdown_in_html_blocks
+  , Ext_native_divs
+  , Ext_native_spans
  , Ext_escaped_line_breaks
  , Ext_fancy_lists
  , Ext_startnum
--- a/src/Text/Pandoc/Readers/EPUB.hs
+++ b/src/Text/Pandoc/Readers/EPUB.hs
@ -13,8 +13,7 @@ import Text.Pandoc.Definition hiding (Attr)
 import Text.Pandoc.Walk (walk, query)
 import Text.Pandoc.Generic(bottomUp)
 import Text.Pandoc.Readers.HTML (readHtml)
-import Text.Pandoc.Options ( ReaderOptions(..), readerExtensions, Extension(..)
-                           , readerTrace)
+import Text.Pandoc.Options ( ReaderOptions(..), readerTrace)
 import Text.Pandoc.Shared (escapeURI)
 import Text.Pandoc.MediaBag (MediaBag, insertMedia)
 import Text.Pandoc.Compat.Except (MonadError, throwError, runExcept, Except)
@ -32,7 +31,6 @@ import Data.Monoid (mempty, (<>))
 import Data.List (isPrefixOf, isInfixOf)
 import Data.Maybe (mapMaybe, fromMaybe)
 import qualified Data.Map as M (Map, lookup, fromList, elems)
-import qualified Data.Set as S (insert)
 import Control.DeepSeq.Generics (deepseq, NFData)

 import Debug.Trace (trace)
@ -65,9 +63,7 @@ archiveToEPUB os archive = do
  let mediaBag = fetchImages (M.elems items) root archive ast
  return $ (ast, mediaBag)
  where
-    rs = readerExtensions os
-    os' = os {readerExtensions = foldr S.insert rs [Ext_epub_html_exts, Ext_raw_html]}
-    os'' = os' {readerParseRaw = True}
+    os' = os {readerParseRaw = True}
    parseSpineElem :: MonadError String m => FilePath -> (FilePath, MIME) -> m Pandoc
    parseSpineElem (normalise -> r) (normalise -> path, mime) = do
      when (readerTrace os) (traceM path)
@ -78,7 +74,7 @@ archiveToEPUB os archive = do
    mimeToReader "application/xhtml+xml" r path = do
      fname <- findEntryByPathE (r </> path) archive
      return $ fixInternalReferences (r </> path) .
-                readHtml os'' .
+                readHtml os' .
                  UTF8.toStringLazy $
                    fromEntry fname
    mimeToReader s _ path
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@ -45,7 +45,8 @@ import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
 import Text.Pandoc.Shared ( extractSpaces, renderTags'
                          , escapeURI, safeRead )
 import Text.Pandoc.Options (ReaderOptions(readerParseRaw, readerTrace)
-                           , Extension (Ext_epub_html_exts))
+                           , Extension (Ext_epub_html_exts,
+                               Ext_native_divs, Ext_native_spans))
 import Text.Pandoc.Parsing hiding ((<|>))
 import Text.Pandoc.Walk
 import Data.Maybe ( fromMaybe, isJust)
@ -296,6 +297,7 @@ pRawTag = do

 pDiv :: TagParser Blocks
 pDiv = try $ do
+  guardEnabled Ext_native_divs
  TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="div") (const True)
  contents <- pInTags "div" block
  return $ B.divWith (mkAttr attr) contents
@ -560,6 +562,7 @@ pCode = try $ do

 pSpan :: TagParser Inlines
 pSpan = try $ do
+  guardEnabled Ext_native_spans
  TagOpen _ attr <- lookAhead $ pSatisfy $ tagOpen (=="span") (const True)
  contents <- pInTags "span" inline
  return $ B.spanWith (mkAttr attr) contents
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@ -1764,7 +1764,7 @@ inBrackets parser = do

 spanHtml :: MarkdownParser (F Inlines)
 spanHtml = try $ do
-  guardEnabled Ext_markdown_in_html_blocks
+  guardEnabled Ext_native_spans
  (TagOpen _ attrs, _) <- htmlTag (~== TagOpen "span" [])
  contents <- mconcat <$> manyTill inline (htmlTag (~== TagClose "span"))
  let ident = fromMaybe "" $ lookup "id" attrs
@ -1779,7 +1779,7 @@ spanHtml = try $ do

 divHtml :: MarkdownParser (F Blocks)
 divHtml = try $ do
-  guardEnabled Ext_markdown_in_html_blocks
+  guardEnabled Ext_native_divs
  (TagOpen _ attrs, rawtag) <- htmlTag (~== TagOpen "div" [])
  -- we set stateInHtmlBlock so that closing tags that can be either block or
  -- inline will not be parsed as inline tags