Support --strip-comments in commonmark/gfm.

This change makes the commonmark reader sensitive to
`readerStripComments`.

Closes #8222.
This commit is contained in:
John MacFarlane 2022-08-13 12:57:28 -07:00
parent 5a99747063
commit 55c524e83c

View file

@ -34,7 +34,10 @@ import Text.Pandoc.Parsing (runParserT, getInput, getPosition,
runF, defaultParserState, option, many1, anyChar, runF, defaultParserState, option, many1, anyChar,
Sources(..), ToSources(..), ParserT, Future, Sources(..), ToSources(..), ParserT, Future,
sourceName, sourceLine, incSourceLine) sourceName, sourceLine, incSourceLine)
import Text.Pandoc.Walk (walk)
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Attoparsec.Text as A
import Control.Applicative ((<|>))
-- | Parse a CommonMark formatted string into a 'Pandoc' structure. -- | Parse a CommonMark formatted string into a 'Pandoc' structure.
readCommonMark :: (PandocMonad m, ToSources a) readCommonMark :: (PandocMonad m, ToSources a)
@ -86,15 +89,41 @@ metaValueParser opts = do
Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls Right (Cm bls :: Cm () Blocks) -> return $ return $ B.toMetaValue bls
readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc readCommonMarkBody :: PandocMonad m => ReaderOptions -> Sources -> [Tok] -> m Pandoc
readCommonMarkBody opts s toks readCommonMarkBody opts s toks =
| isEnabled Ext_sourcepos opts = (if readerStripComments opts
case runIdentity (parseCommonmarkWith (specFor opts) toks) of then walk stripBlockComments . walk stripInlineComments
Left err -> throwError $ PandocParsecError s err else id) <$>
Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls if isEnabled Ext_sourcepos opts
| otherwise = then case runIdentity (parseCommonmarkWith (specFor opts) toks) of
case runIdentity (parseCommonmarkWith (specFor opts) toks) of Left err -> throwError $ PandocParsecError s err
Left err -> throwError $ PandocParsecError s err Right (Cm bls :: Cm SourceRange Blocks) -> return $ B.doc bls
Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls else case runIdentity (parseCommonmarkWith (specFor opts) toks) of
Left err -> throwError $ PandocParsecError s err
Right (Cm bls :: Cm () Blocks) -> return $ B.doc bls
stripBlockComments :: Block -> Block
stripBlockComments (RawBlock (B.Format "html") s) =
RawBlock (B.Format "html") (removeComments s)
stripBlockComments x = x
stripInlineComments :: Inline -> Inline
stripInlineComments (RawInline (B.Format "html") s) =
RawInline (B.Format "html") (removeComments s)
stripInlineComments x = x
removeComments :: Text -> Text
removeComments s =
either (const s) id $ A.parseOnly pRemoveComments s
where
pRemoveComments = mconcat <$> A.many'
("" <$ (A.string "<!--" *> A.scan (0 :: Int) scanChar <* A.char '>') <|>
(A.takeWhile1 (/= '<')) <|>
(A.string "<"))
scanChar st c =
case c of
'-' -> Just (st + 1)
'>' | st >= 2 -> Nothing
_ -> Just 0
specFor :: (Monad m, Typeable m, Typeable a, specFor :: (Monad m, Typeable m, Typeable a,
Rangeable (Cm a Inlines), Rangeable (Cm a Blocks)) Rangeable (Cm a Inlines), Rangeable (Cm a Blocks))