From 55c524e83cda8ab90b3b202e32b79106b408efb0 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 13 Aug 2022 12:57:28 -0700 Subject: [PATCH] Support --strip-comments in commonmark/gfm. This change makes the commonmark reader sensitive to `readerStripComments`. Closes #8222. --- src/Text/Pandoc/Readers/CommonMark.hs | 47 ++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index ecc6505f8..528d84dbf 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -34,7 +34,10 @@ import Text.Pandoc.Parsing (runParserT, getInput, getPosition, runF, defaultParserState, option, many1, anyChar, Sources(..), ToSources(..), ParserT, Future, sourceName, sourceLine, incSourceLine) +import Text.Pandoc.Walk (walk) import qualified Data.Text as T +import qualified Data.Attoparsec.Text as A +import Control.Applicative ((<|>)) -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: (PandocMonad m, ToSources a) @@ -86,15 +89,41 @@ metaValueParser opts = do Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc -readCommonMarkBody opts s toks - | isEnabled Ext_sourcepos opts = - case runIdentity (parseCommonmarkWith (specFor opts) toks) of - Left err -> throwError $ PandocParsecError s err - Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls - | otherwise = - case runIdentity (parseCommonmarkWith (specFor opts) toks) of - Left err -> throwError $ PandocParsecError s err - Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls +readCommonMarkBody opts s toks = + (if readerStripComments opts + then walk stripBlockComments . walk stripInlineComments + else id) <$> + if isEnabled Ext_sourcepos opts + then case runIdentity (parseCommonmarkWith (specFor opts) toks) of + Left err -> throwError $ PandocParsecError s err + Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls + else case runIdentity (parseCommonmarkWith (specFor opts) toks) of + Left err -> throwError $ PandocParsecError s err + Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls + +stripBlockComments :: Block -> Block +stripBlockComments (RawBlock (B.Format "html") s) = + RawBlock (B.Format "html") (removeComments s) +stripBlockComments x = x + +stripInlineComments :: Inline -> Inline +stripInlineComments (RawInline (B.Format "html") s) = + RawInline (B.Format "html") (removeComments s) +stripInlineComments x = x + +removeComments :: Text -> Text +removeComments s = + either (const s) id $ A.parseOnly pRemoveComments s + where + pRemoveComments = mconcat <$> A.many' + ("" <$ (A.string "