Support --strip-comments in commonmark/gfm.

This change makes the commonmark reader sensitive to
`readerStripComments`.

Closes #8222.
This commit is contained in:
John MacFarlane 2022-08-13 12:57:28 -07:00
parent 5a99747063
commit 55c524e83c

View file

@ -34,7 +34,10 @@ import Text.Pandoc.Parsing (runParserT, getInput, getPosition,
runF, defaultParserState, option, many1, anyChar,
Sources(..), ToSources(..), ParserT, Future,
sourceName, sourceLine, incSourceLine)
import Text.Pandoc.Walk (walk)
import qualified Data.Text as T
import qualified Data.Attoparsec.Text as A
import Control.Applicative ((<|>))
-- | Parse a CommonMark formatted string into a 'Pandoc' structure.
readCommonMark :: (PandocMonad m, ToSources a)
@ -86,15 +89,41 @@ metaValueParser opts = do
Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
readCommonMarkBody opts s toks
| isEnabled Ext_sourcepos opts =
case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
| otherwise =
case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
readCommonMarkBody opts s toks =
(if readerStripComments opts
then walk stripBlockComments . walk stripInlineComments
else id) <$>
if isEnabled Ext_sourcepos opts
then case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
else case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
stripBlockComments :: Block -> Block
stripBlockComments (RawBlock (B.Format "html") s) =
RawBlock (B.Format "html") (removeComments s)
stripBlockComments x = x
stripInlineComments :: Inline -> Inline
stripInlineComments (RawInline (B.Format "html") s) =
RawInline (B.Format "html") (removeComments s)
stripInlineComments x = x
removeComments :: Text -> Text
removeComments s =
either (const s) id $ A.parseOnly pRemoveComments s
where
pRemoveComments = mconcat <$> A.many'
("" <$ (A.string "<!--" *> A.scan (0 :: Int) scanChar <* A.char '>') <|>
(A.takeWhile1 (/= '<')) <|>
(A.string "<"))
scanChar st c =
case c of
'-' -> Just (st + 1)
'>' | st >= 2 -> Nothing
_ -> Just 0
specFor :: (Monad m, Typeable m, Typeable a,
Rangeable (Cm a Inlines), Rangeable (Cm a Blocks))