Added --strip-comments
option, readerStripComments
in ReaderOptions
.
* Options: Added readerStripComments to ReaderOptions. * Added `--strip-comments` command-line option. * Made `htmlTag` from the HTML reader sensitive to this feature. This affects Markdown and Textile input. Closes #2552.
This commit is contained in:
parent
ce05814372
commit
b1ee747a24
6 changed files with 46 additions and 8 deletions
|
@ -655,6 +655,14 @@ General writer options
|
|||
of contents. The default is 3 (which means that level 1, 2, and 3
|
||||
headers will be listed in the contents).
|
||||
|
||||
`--strip-comments`
|
||||
|
||||
: Strip out HTML comments in the Markdown or Textile source,
|
||||
rather than passing them on to Markdown, Textile or HTML
|
||||
output as raw HTML. This does not apply to HTML comments
|
||||
inside raw HTML blocks when the `markdown_in_html_blocks`
|
||||
extension is not set.
|
||||
|
||||
`--no-highlight`
|
||||
|
||||
: Disables syntax highlighting for code blocks and inlines, even when
|
||||
|
|
|
@ -483,6 +483,7 @@ convertWithOpts opts = do
|
|||
, readerTrackChanges = optTrackChanges opts
|
||||
, readerAbbreviations = abbrevs
|
||||
, readerExtensions = readerExts
|
||||
, readerStripComments = optStripComments opts
|
||||
}
|
||||
|
||||
let transforms = (case optBaseHeaderLevel opts of
|
||||
|
@ -666,6 +667,7 @@ data Opt = Opt
|
|||
, optIncludeInHeader :: [FilePath] -- ^ Files to include in header
|
||||
, optResourcePath :: [FilePath] -- ^ Path to search for images etc
|
||||
, optEol :: LineEnding -- ^ Style of line-endings to use
|
||||
, optStripComments :: Bool -- ^ Skip HTML comments
|
||||
} deriving (Generic, Show)
|
||||
|
||||
instance ToJSON Opt where
|
||||
|
@ -742,6 +744,7 @@ defaultOpts = Opt
|
|||
, optIncludeInHeader = []
|
||||
, optResourcePath = ["."]
|
||||
, optEol = Native
|
||||
, optStripComments = False
|
||||
}
|
||||
|
||||
addMetadata :: (String, String) -> Pandoc -> Pandoc
|
||||
|
@ -1114,6 +1117,11 @@ options =
|
|||
"NUMBER")
|
||||
"" -- "Length of line in characters"
|
||||
|
||||
, Option "" ["strip-comments"]
|
||||
(NoArg
|
||||
(\opt -> return opt { optStripComments = True }))
|
||||
"" -- "Strip HTML comments"
|
||||
|
||||
, Option "" ["toc", "table-of-contents"]
|
||||
(NoArg
|
||||
(\opt -> return opt { optTableOfContents = True }))
|
||||
|
|
|
@ -65,7 +65,8 @@ data ReaderOptions = ReaderOptions{
|
|||
-- indented code blocks
|
||||
, readerAbbreviations :: Set.Set String -- ^ Strings to treat as abbreviations
|
||||
, readerDefaultImageExtension :: String -- ^ Default extension for images
|
||||
, readerTrackChanges :: TrackChanges
|
||||
, readerTrackChanges :: TrackChanges -- ^ Track changes setting for docx
|
||||
, readerStripComments :: Bool -- ^ Strip HTML comments instead of parsing as raw HTML
|
||||
} deriving (Show, Read, Data, Typeable, Generic)
|
||||
|
||||
instance ToJSON ReaderOptions where
|
||||
|
@ -82,6 +83,7 @@ instance Default ReaderOptions
|
|||
, readerAbbreviations = defaultAbbrevs
|
||||
, readerDefaultImageExtension = ""
|
||||
, readerTrackChanges = AcceptChanges
|
||||
, readerStripComments = False
|
||||
}
|
||||
|
||||
defaultAbbrevs :: Set.Set String
|
||||
|
|
|
@ -46,9 +46,10 @@ import qualified Text.Pandoc.Builder as B
|
|||
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines, HasMeta(..))
|
||||
import Text.Pandoc.Shared ( extractSpaces, addMetaField
|
||||
, escapeURI, safeRead, crFilter )
|
||||
import Text.Pandoc.Options (ReaderOptions(readerExtensions), extensionEnabled,
|
||||
Extension (Ext_epub_html_exts,
|
||||
Ext_raw_html, Ext_native_divs, Ext_native_spans))
|
||||
import Text.Pandoc.Options (
|
||||
ReaderOptions(readerExtensions,readerStripComments), extensionEnabled,
|
||||
Extension (Ext_epub_html_exts,
|
||||
Ext_raw_html, Ext_native_divs, Ext_native_spans))
|
||||
import Text.Pandoc.Logging
|
||||
import Text.Pandoc.Parsing hiding ((<|>))
|
||||
import Text.Pandoc.Walk
|
||||
|
@ -1070,7 +1071,7 @@ _ `closes` _ = False
|
|||
--- parsers for use in markdown, textile readers
|
||||
|
||||
-- | Matches a stretch of HTML in balanced tags.
|
||||
htmlInBalanced :: (Monad m)
|
||||
htmlInBalanced :: (HasReaderOptions st, Monad m)
|
||||
=> (Tag String -> Bool)
|
||||
-> ParserT String st m String
|
||||
htmlInBalanced f = try $ do
|
||||
|
@ -1118,7 +1119,7 @@ hasTagWarning (TagWarning _:_) = True
|
|||
hasTagWarning _ = False
|
||||
|
||||
-- | Matches a tag meeting a certain condition.
|
||||
htmlTag :: Monad m
|
||||
htmlTag :: (HasReaderOptions st, Monad m)
|
||||
=> (Tag String -> Bool)
|
||||
-> ParserT [Char] st m (Tag String, String)
|
||||
htmlTag f = try $ do
|
||||
|
@ -1153,7 +1154,10 @@ htmlTag f = try $ do
|
|||
count (length s + 4) anyChar
|
||||
skipMany (satisfy (/='>'))
|
||||
char '>'
|
||||
return (next, "<!--" <> s <> "-->")
|
||||
stripComments <- getOption readerStripComments
|
||||
if stripComments
|
||||
then return (next, "")
|
||||
else return (next, "<!--" <> s <> "-->")
|
||||
| otherwise -> fail "bogus comment mode, HTML5 parse error"
|
||||
TagOpen tagname attr -> do
|
||||
guard $ all (isName . fst) attr
|
||||
|
|
|
@ -1079,7 +1079,9 @@ htmlBlock' = try $ do
|
|||
first <- htmlElement
|
||||
skipMany spaceChar
|
||||
optional blanklines
|
||||
return $ return $ B.rawBlock "html" first
|
||||
return $ if null first
|
||||
then mempty
|
||||
else return $ B.rawBlock "html" first
|
||||
|
||||
strictHtmlBlock :: PandocMonad m => MarkdownParser m String
|
||||
strictHtmlBlock = htmlInBalanced (not . isInlineTag)
|
||||
|
|
14
test/command/2552.md
Normal file
14
test/command/2552.md
Normal file
|
@ -0,0 +1,14 @@
|
|||
```
|
||||
% pandoc --strip-comments
|
||||
Foo
|
||||
|
||||
bar
|
||||
|
||||
<!-- comment -->
|
||||
|
||||
baz<!-- bim -->boop
|
||||
^D
|
||||
<p>Foo</p>
|
||||
<p>bar</p>
|
||||
<p>bazboop</p>
|
||||
```
|
Loading…
Reference in a new issue