From c389211e2f678d9327cd2c008d54a1e438f07a07 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 20 Mar 2021 15:45:09 -0700 Subject: [PATCH] Support `yaml_metadata_block` extension form commonmark, gfm. This is a bit more limited than with markdown, as documented in the manual: - The YAML block must be the first thing in the input. - The leaf notes are parsed in isolation from the rest of the document. So, for example, you can't use reference links if the references are defined later in the document. Closes #6537. --- MANUAL.txt | 16 ++++++++++++++ src/Text/Pandoc/Extensions.hs | 3 +++ src/Text/Pandoc/Parsing.hs | 2 +- src/Text/Pandoc/Readers/CommonMark.hs | 30 +++++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/MANUAL.txt b/MANUAL.txt index bc9e226e9..845d1dbba 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -4240,6 +4240,22 @@ will be interpreted as markdown. For example: \renewcommand{\section}[1]{\clearpage\oldsection{#1}} ``` +Note: the `yaml_metadata_block` extension works with +`commonmark` as well as `markdown` (and it is enabled by default +in `gfm` and `commonmark_x`). However, in these formats the +following restrictions apply: + +- The YAML metadata block must occur at the beginning of the + document (and there can be only one). If multiple files are + given as arguments to pandoc, only the first can be a YAML + metadata block. + +- The leaf nodes of the YAML structure are parsed in isolation from + each other and from the rest of the document. So, for + example, you can't use a reference link in these contexts + if the link definition is somewhere else in the document. + + ## Backslash escapes #### Extension: `all_symbols_escapable` #### diff --git a/src/Text/Pandoc/Extensions.hs b/src/Text/Pandoc/Extensions.hs index 266a09e3c..6423d5f56 100644 --- a/src/Text/Pandoc/Extensions.hs +++ b/src/Text/Pandoc/Extensions.hs @@ -354,6 +354,7 @@ getDefaultExtensions "gfm" = extensionsFromList , Ext_strikeout , Ext_task_lists , Ext_emoji + , Ext_yaml_metadata_block ] getDefaultExtensions "commonmark" = extensionsFromList [Ext_raw_html] @@ -379,6 +380,7 @@ getDefaultExtensions "commonmark_x" = extensionsFromList , Ext_raw_attribute , Ext_implicit_header_references , Ext_attributes + , Ext_yaml_metadata_block ] getDefaultExtensions "org" = extensionsFromList [Ext_citations, @@ -511,6 +513,7 @@ getAllExtensions f = universalExtensions <> getAll f , Ext_implicit_header_references , Ext_attributes , Ext_sourcepos + , Ext_yaml_metadata_block ] getAll "commonmark_x" = getAll "commonmark" getAll "org" = autoIdExtensions <> diff --git a/src/Text/Pandoc/Parsing.hs b/src/Text/Pandoc/Parsing.hs index 44e6af59e..0c2078721 100644 --- a/src/Text/Pandoc/Parsing.hs +++ b/src/Text/Pandoc/Parsing.hs @@ -517,7 +517,7 @@ parseFromString :: (Stream s m Char, IsString s) -> ParserT s st m r parseFromString parser str = do oldPos <- getPosition - setPosition $ initialPos "chunk" + setPosition $ initialPos " chunk" oldInput <- getInput setInput $ fromString $ T.unpack str result <- parser diff --git a/src/Text/Pandoc/Readers/CommonMark.hs b/src/Text/Pandoc/Readers/CommonMark.hs index 150a837e4..244f77940 100644 --- a/src/Text/Pandoc/Readers/CommonMark.hs +++ b/src/Text/Pandoc/Readers/CommonMark.hs @@ -26,13 +26,43 @@ import Text.Pandoc.Definition import Text.Pandoc.Builder as B import Text.Pandoc.Options import Text.Pandoc.Error +import Text.Pandoc.Readers.Metadata (yamlMetaBlock) import Control.Monad.Except import Data.Functor.Identity (runIdentity) import Data.Typeable +import Text.Pandoc.Parsing (runParserT, getPosition, sourceLine, + runF, defaultParserState, take1WhileP, option) +import qualified Data.Text as T -- | Parse a CommonMark formatted string into a 'Pandoc' structure. readCommonMark :: PandocMonad m => ReaderOptions -> Text -> m Pandoc readCommonMark opts s + | isEnabled Ext_yaml_metadata_block opts + , "---" `T.isPrefixOf` s = do + let metaValueParser = do + inp <- option "" $ take1WhileP (const True) + case runIdentity + (commonmarkWith (specFor opts) "metadata value" inp) of + Left _ -> mzero + Right (Cm bls :: Cm () Blocks) + -> return $ return $ B.toMetaValue bls + res <- runParserT (do meta <- yamlMetaBlock metaValueParser + pos <- getPosition + return (meta, pos)) + defaultParserState "YAML metadata" s + case res of + Left _ -> readCommonMarkBody opts s + Right (meta, pos) -> do + let dropLines 0 = id + dropLines n = dropLines (n - 1) . T.drop 1 . T.dropWhile (/='\n') + let metaLines = sourceLine pos - 1 + let body = T.replicate metaLines "\n" <> dropLines metaLines s + Pandoc _ bs <- readCommonMarkBody opts body + return $ Pandoc (runF meta defaultParserState) bs + | otherwise = readCommonMarkBody opts s + +readCommonMarkBody :: PandocMonad m => ReaderOptions -> Text -> m Pandoc +readCommonMarkBody opts s | isEnabled Ext_sourcepos opts = case runIdentity (commonmarkWith (specFor opts) "" s) of Left err -> throwError $ PandocParsecError s err