Added --strip-comments option, readerStripComments in ReaderOptions.

* Options:  Added readerStripComments to ReaderOptions.
* Added `--strip-comments` command-line option.
* Made `htmlTag` from the HTML reader sensitive to this feature.

This affects Markdown and Textile input.

Closes #2552.
This commit is contained in:
John MacFarlane 2017-09-17 12:49:15 -07:00
parent ce05814372
commit b1ee747a24
6 changed files with 46 additions and 8 deletions

View file

@ -655,6 +655,14 @@ General writer options
of contents. The default is 3 (which means that level 1, 2, and 3
headers will be listed in the contents).
`--strip-comments`
: Strip out HTML comments in the Markdown or Textile source,
rather than passing them on to Markdown, Textile or HTML
output as raw HTML. This does not apply to HTML comments
inside raw HTML blocks when the `markdown_in_html_blocks`
extension is not set.
`--no-highlight`
: Disables syntax highlighting for code blocks and inlines, even when

View file

@ -483,6 +483,7 @@ convertWithOpts opts = do
, readerTrackChanges = optTrackChanges opts
, readerAbbreviations = abbrevs
, readerExtensions = readerExts
, readerStripComments = optStripComments opts
}
let transforms = (case optBaseHeaderLevel opts of
@ -666,6 +667,7 @@ data Opt = Opt
, optIncludeInHeader :: [FilePath] -- ^ Files to include in header
, optResourcePath :: [FilePath] -- ^ Path to search for images etc
, optEol :: LineEnding -- ^ Style of line-endings to use
, optStripComments :: Bool -- ^ Skip HTML comments
} deriving (Generic, Show)
instance ToJSON Opt where
@ -742,6 +744,7 @@ defaultOpts = Opt
, optIncludeInHeader = []
, optResourcePath = ["."]
, optEol = Native
, optStripComments = False
}
addMetadata :: (String, String) -> Pandoc -> Pandoc
@ -1114,6 +1117,11 @@ options =
"NUMBER")
"" -- "Length of line in characters"
, Option "" ["strip-comments"]
(NoArg
(\opt -> return opt { optStripComments = True }))
"" -- "Strip HTML comments"
, Option "" ["toc", "table-of-contents"]
(NoArg
(\opt -> return opt { optTableOfContents = True }))

View file

@ -65,7 +65,8 @@ data ReaderOptions = ReaderOptions{
-- indented code blocks
, readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations
, readerDefaultImageExtension :: String -- ^ Default extension for images
, readerTrackChanges :: TrackChanges
, readerTrackChanges :: TrackChanges -- ^ Track changes setting for docx
, readerStripComments :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML
} deriving (Show, Read, Data, Typeable, Generic)
instance ToJSON ReaderOptions where
@ -82,6 +83,7 @@ instance Default ReaderOptions
, readerAbbreviations = defaultAbbrevs
, readerDefaultImageExtension = ""
, readerTrackChanges = AcceptChanges
, readerStripComments = False
}
defaultAbbrevs :: Set.Set String

View file

@ -46,9 +46,10 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
import Text.Pandoc.Shared ( extractSpaces, addMetaField
, escapeURI, safeRead, crFilter )
import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled,
Extension (Ext_epub_html_exts,
Ext_raw_html, Ext_native_divs, Ext_native_spans))
import Text.Pandoc.Options (
ReaderOptions(readerExtensions,readerStripComments), extensionEnabled,
Extension (Ext_epub_html_exts,
Ext_raw_html, Ext_native_divs, Ext_native_spans))
import Text.Pandoc.Logging
import Text.Pandoc.Parsing hiding ((<|>))
import Text.Pandoc.Walk
@ -1070,7 +1071,7 @@ _ `closes` _ = False
--- parsers for use in markdown, textile readers
-- | Matches a stretch of HTML in balanced tags.
htmlInBalanced :: (Monad m)
htmlInBalanced :: (HasReaderOptions st, Monad m)
=> (Tag String -> Bool)
-> ParserT String st m String
htmlInBalanced f = try $ do
@ -1118,7 +1119,7 @@ hasTagWarning (TagWarning _:_) = True
hasTagWarning _ = False
-- | Matches a tag meeting a certain condition.
htmlTag :: Monad m
htmlTag :: (HasReaderOptions st, Monad m)
=> (Tag String -> Bool)
-> ParserT [Char] st m (Tag String, String)
htmlTag f = try $ do
@ -1153,7 +1154,10 @@ htmlTag f = try $ do
count (length s + 4) anyChar
skipMany (satisfy (/='>'))
char '>'
return (next, "<!--" <> s <> "-->")
stripComments <- getOption readerStripComments
if stripComments
then return (next, "")
else return (next, "<!--" <> s <> "-->")
| otherwise -> fail "bogus comment mode, HTML5 parse error"
TagOpen tagname attr -> do
guard $ all (isName . fst) attr

View file

@ -1079,7 +1079,9 @@ htmlBlock' = try $ do
first <- htmlElement
skipMany spaceChar
optional blanklines
return $ return $ B.rawBlock "html" first
return $ if null first
then mempty
else return $ B.rawBlock "html" first
strictHtmlBlock :: PandocMonad m => MarkdownParser m String
strictHtmlBlock = htmlInBalanced (not . isInlineTag)

14
test/command/2552.md Normal file
View file

@ -0,0 +1,14 @@
```
% pandoc --strip-comments
Foo
bar
<!-- comment -->
baz<!-- bim -->boop
^D
<p>Foo</p>
<p>bar</p>
<p>bazboop</p>
```