Added --strip-comments
option, readerStripComments
in ReaderOptions
.
* Options: Added readerStripComments to ReaderOptions. * Added `--strip-comments` command-line option. * Made `htmlTag` from the HTML reader sensitive to this feature. This affects Markdown and Textile input. Closes #2552.
This commit is contained in:
parent
ce05814372
commit
b1ee747a24
6 changed files with 46 additions and 8 deletions
|
@ -655,6 +655,14 @@ General writer options
|
||||||
of contents. The default is 3 (which means that level 1, 2, and 3
|
of contents. The default is 3 (which means that level 1, 2, and 3
|
||||||
headers will be listed in the contents).
|
headers will be listed in the contents).
|
||||||
|
|
||||||
|
`--strip-comments`
|
||||||
|
|
||||||
|
: Strip out HTML comments in the Markdown or Textile source,
|
||||||
|
rather than passing them on to Markdown, Textile or HTML
|
||||||
|
output as raw HTML. This does not apply to HTML comments
|
||||||
|
inside raw HTML blocks when the `markdown_in_html_blocks`
|
||||||
|
extension is not set.
|
||||||
|
|
||||||
`--no-highlight`
|
`--no-highlight`
|
||||||
|
|
||||||
: Disables syntax highlighting for code blocks and inlines, even when
|
: Disables syntax highlighting for code blocks and inlines, even when
|
||||||
|
|
|
@ -483,6 +483,7 @@ convertWithOpts opts = do
|
||||||
, readerTrackChanges = optTrackChanges opts
|
, readerTrackChanges = optTrackChanges opts
|
||||||
, readerAbbreviations = abbrevs
|
, readerAbbreviations = abbrevs
|
||||||
, readerExtensions = readerExts
|
, readerExtensions = readerExts
|
||||||
|
, readerStripComments = optStripComments opts
|
||||||
}
|
}
|
||||||
|
|
||||||
let transforms = (case optBaseHeaderLevel opts of
|
let transforms = (case optBaseHeaderLevel opts of
|
||||||
|
@ -666,6 +667,7 @@ data Opt = Opt
|
||||||
, optIncludeInHeader :: [FilePath] -- ^ Files to include in header
|
, optIncludeInHeader :: [FilePath] -- ^ Files to include in header
|
||||||
, optResourcePath :: [FilePath] -- ^ Path to search for images etc
|
, optResourcePath :: [FilePath] -- ^ Path to search for images etc
|
||||||
, optEol :: LineEnding -- ^ Style of line-endings to use
|
, optEol :: LineEnding -- ^ Style of line-endings to use
|
||||||
|
, optStripComments :: Bool -- ^ Skip HTML comments
|
||||||
} deriving (Generic, Show)
|
} deriving (Generic, Show)
|
||||||
|
|
||||||
instance ToJSON Opt where
|
instance ToJSON Opt where
|
||||||
|
@ -742,6 +744,7 @@ defaultOpts = Opt
|
||||||
, optIncludeInHeader = []
|
, optIncludeInHeader = []
|
||||||
, optResourcePath = ["."]
|
, optResourcePath = ["."]
|
||||||
, optEol = Native
|
, optEol = Native
|
||||||
|
, optStripComments = False
|
||||||
}
|
}
|
||||||
|
|
||||||
addMetadata :: (String, String) -> Pandoc -> Pandoc
|
addMetadata :: (String, String) -> Pandoc -> Pandoc
|
||||||
|
@ -1114,6 +1117,11 @@ options =
|
||||||
"NUMBER")
|
"NUMBER")
|
||||||
"" -- "Length of line in characters"
|
"" -- "Length of line in characters"
|
||||||
|
|
||||||
|
, Option "" ["strip-comments"]
|
||||||
|
(NoArg
|
||||||
|
(\opt -> return opt { optStripComments = True }))
|
||||||
|
"" -- "Strip HTML comments"
|
||||||
|
|
||||||
, Option "" ["toc", "table-of-contents"]
|
, Option "" ["toc", "table-of-contents"]
|
||||||
(NoArg
|
(NoArg
|
||||||
(\opt -> return opt { optTableOfContents = True }))
|
(\opt -> return opt { optTableOfContents = True }))
|
||||||
|
|
|
@ -65,7 +65,8 @@ data ReaderOptions = ReaderOptions{
|
||||||
-- indented code blocks
|
-- indented code blocks
|
||||||
, readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations
|
, readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations
|
||||||
, readerDefaultImageExtension :: String -- ^ Default extension for images
|
, readerDefaultImageExtension :: String -- ^ Default extension for images
|
||||||
, readerTrackChanges :: TrackChanges
|
, readerTrackChanges :: TrackChanges -- ^ Track changes setting for docx
|
||||||
|
, readerStripComments :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML
|
||||||
} deriving (Show, Read, Data, Typeable, Generic)
|
} deriving (Show, Read, Data, Typeable, Generic)
|
||||||
|
|
||||||
instance ToJSON ReaderOptions where
|
instance ToJSON ReaderOptions where
|
||||||
|
@ -82,6 +83,7 @@ instance Default ReaderOptions
|
||||||
, readerAbbreviations = defaultAbbrevs
|
, readerAbbreviations = defaultAbbrevs
|
||||||
, readerDefaultImageExtension = ""
|
, readerDefaultImageExtension = ""
|
||||||
, readerTrackChanges = AcceptChanges
|
, readerTrackChanges = AcceptChanges
|
||||||
|
, readerStripComments = False
|
||||||
}
|
}
|
||||||
|
|
||||||
defaultAbbrevs :: Set.Set String
|
defaultAbbrevs :: Set.Set String
|
||||||
|
|
|
@ -46,7 +46,8 @@ import qualified Text.Pandoc.Builder as B
|
||||||
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
|
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
|
||||||
import Text.Pandoc.Shared ( extractSpaces, addMetaField
|
import Text.Pandoc.Shared ( extractSpaces, addMetaField
|
||||||
, escapeURI, safeRead, crFilter )
|
, escapeURI, safeRead, crFilter )
|
||||||
import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled,
|
import Text.Pandoc.Options (
|
||||||
|
ReaderOptions(readerExtensions,readerStripComments), extensionEnabled,
|
||||||
Extension (Ext_epub_html_exts,
|
Extension (Ext_epub_html_exts,
|
||||||
Ext_raw_html, Ext_native_divs, Ext_native_spans))
|
Ext_raw_html, Ext_native_divs, Ext_native_spans))
|
||||||
import Text.Pandoc.Logging
|
import Text.Pandoc.Logging
|
||||||
|
@ -1070,7 +1071,7 @@ _ `closes` _ = False
|
||||||
--- parsers for use in markdown, textile readers
|
--- parsers for use in markdown, textile readers
|
||||||
|
|
||||||
-- | Matches a stretch of HTML in balanced tags.
|
-- | Matches a stretch of HTML in balanced tags.
|
||||||
htmlInBalanced :: (Monad m)
|
htmlInBalanced :: (HasReaderOptions st, Monad m)
|
||||||
=> (Tag String -> Bool)
|
=> (Tag String -> Bool)
|
||||||
-> ParserT String st m String
|
-> ParserT String st m String
|
||||||
htmlInBalanced f = try $ do
|
htmlInBalanced f = try $ do
|
||||||
|
@ -1118,7 +1119,7 @@ hasTagWarning (TagWarning _:_) = True
|
||||||
hasTagWarning _ = False
|
hasTagWarning _ = False
|
||||||
|
|
||||||
-- | Matches a tag meeting a certain condition.
|
-- | Matches a tag meeting a certain condition.
|
||||||
htmlTag :: Monad m
|
htmlTag :: (HasReaderOptions st, Monad m)
|
||||||
=> (Tag String -> Bool)
|
=> (Tag String -> Bool)
|
||||||
-> ParserT [Char] st m (Tag String, String)
|
-> ParserT [Char] st m (Tag String, String)
|
||||||
htmlTag f = try $ do
|
htmlTag f = try $ do
|
||||||
|
@ -1153,7 +1154,10 @@ htmlTag f = try $ do
|
||||||
count (length s + 4) anyChar
|
count (length s + 4) anyChar
|
||||||
skipMany (satisfy (/='>'))
|
skipMany (satisfy (/='>'))
|
||||||
char '>'
|
char '>'
|
||||||
return (next, "<!--" <> s <> "-->")
|
stripComments <- getOption readerStripComments
|
||||||
|
if stripComments
|
||||||
|
then return (next, "")
|
||||||
|
else return (next, "<!--" <> s <> "-->")
|
||||||
| otherwise -> fail "bogus comment mode, HTML5 parse error"
|
| otherwise -> fail "bogus comment mode, HTML5 parse error"
|
||||||
TagOpen tagname attr -> do
|
TagOpen tagname attr -> do
|
||||||
guard $ all (isName . fst) attr
|
guard $ all (isName . fst) attr
|
||||||
|
|
|
@ -1079,7 +1079,9 @@ htmlBlock' = try $ do
|
||||||
first <- htmlElement
|
first <- htmlElement
|
||||||
skipMany spaceChar
|
skipMany spaceChar
|
||||||
optional blanklines
|
optional blanklines
|
||||||
return $ return $ B.rawBlock "html" first
|
return $ if null first
|
||||||
|
then mempty
|
||||||
|
else return $ B.rawBlock "html" first
|
||||||
|
|
||||||
strictHtmlBlock :: PandocMonad m => MarkdownParser m String
|
strictHtmlBlock :: PandocMonad m => MarkdownParser m String
|
||||||
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
|
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
|
||||||
|
|
14
test/command/2552.md
Normal file
14
test/command/2552.md
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
```
|
||||||
|
% pandoc --strip-comments
|
||||||
|
Foo
|
||||||
|
|
||||||
|
bar
|
||||||
|
|
||||||
|
<!-- comment -->
|
||||||
|
|
||||||
|
baz<!-- bim -->boop
|
||||||
|
^D
|
||||||
|
<p>Foo</p>
|
||||||
|
<p>bar</p>
|
||||||
|
<p>bazboop</p>
|
||||||
|
```
|
Loading…
Reference in a new issue