From 659ee981764b85fb46845c086e3b10f1fc57a712 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 24 Nov 2019 11:50:28 -0800 Subject: [PATCH] Add unexported Text.Pandoc.Readers.Metadata. For YAML metadata parsing. A step in the direction of #5914. No API change. --- pandoc.cabal | 1 + src/Text/Pandoc/App.hs | 2 +- src/Text/Pandoc/Readers/Markdown.hs | 125 +++++-------------------- src/Text/Pandoc/Readers/Metadata.hs | 140 ++++++++++++++++++++++++++++ 4 files changed, 163 insertions(+), 105 deletions(-) create mode 100644 src/Text/Pandoc/Readers/Metadata.hs diff --git a/pandoc.cabal b/pandoc.cabal index bd2b0650d..2231c28b1 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -590,6 +590,7 @@ library Text.Pandoc.Readers.Org.ParserState, Text.Pandoc.Readers.Org.Parsing, Text.Pandoc.Readers.Org.Shared, + Text.Pandoc.Readers.Metadata, Text.Pandoc.Readers.Roff, Text.Pandoc.Writers.Docx.StyleMap, Text.Pandoc.Writers.Roff, diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs index 4dcd76a24..0565874ad 100644 --- a/src/Text/Pandoc/App.hs +++ b/src/Text/Pandoc/App.hs @@ -55,12 +55,12 @@ import Text.Pandoc.BCP47 (Lang (..), parseBCP47) import Text.Pandoc.Builder (setMeta) import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters) import Text.Pandoc.PDF (makePDF) -import Text.Pandoc.Readers.Markdown (yamlToMeta) import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained) import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs, headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput, defaultUserDataDirs, tshow) import Text.Pandoc.Writers.Shared (lookupMetaString) +import Text.Pandoc.Readers.Markdown (yamlToMeta) import qualified Text.Pandoc.UTF8 as UTF8 #ifndef _WINDOWS import System.Posix.IO (stdOutput) diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index cc3173719..e46396fa0 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where import Prelude import Control.Monad import Control.Monad.Except (throwError) -import qualified Data.ByteString.Lazy as BS import Data.Char (isAlphaNum, isPunctuation, isSpace) import Data.List (sortBy, transpose, elemIndex) import qualified Data.Map as M @@ -30,8 +29,7 @@ import qualified Data.Set as Set import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL -import qualified Data.YAML as YAML -import qualified Data.YAML.Event as YE +import qualified Data.ByteString.Lazy as BL import System.FilePath (addExtension, takeExtension) import Text.HTML.TagSoup import Text.Pandoc.Builder (Blocks, Inlines) @@ -49,6 +47,7 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline) import Text.Pandoc.Shared import qualified Text.Pandoc.UTF8 as UTF8 import Text.Pandoc.XML (fromEntities) +import Text.Pandoc.Readers.Metadata (yamlBsToMeta) type MarkdownParser m = ParserT Text ParserState m @@ -64,6 +63,23 @@ readMarkdown opts s = do Right result -> return result Left e -> throwError e +-- | Read a YAML string and convert it to pandoc metadata. +-- String scalars in the YAML are parsed as Markdown. +yamlToMeta :: PandocMonad m + => ReaderOptions + -> BL.ByteString + -> m Meta +yamlToMeta opts bstr = do + let parser = do + meta <- yamlBsToMeta parseBlocks bstr + return $ runF meta defaultParserState + parsed <- readWithM parser def{ stateOptions = opts } "" + case parsed of + Right result -> return result + Left e -> throwError e + + + -- -- Constants and data structure definitions -- @@ -228,111 +244,12 @@ yamlMetaBlock = try $ do -- by including --- and ..., we allow yaml blocks with just comments: let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."])) optional blanklines - newMetaF <- yamlBsToMeta $ UTF8.fromTextLazy $ TL.fromStrict rawYaml + newMetaF <- yamlBsToMeta parseBlocks + $ UTF8.fromTextLazy $ TL.fromStrict rawYaml -- Since `<>` is left-biased, existing values are not touched: updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF } return mempty --- | Read a YAML string and convert it to pandoc metadata. --- String scalars in the YAML are parsed as Markdown. -yamlToMeta :: PandocMonad m => ReaderOptions -> BS.ByteString -> m Meta -yamlToMeta opts bstr = do - let parser = do - meta <- yamlBsToMeta bstr - return $ runF meta defaultParserState - parsed <- readWithM parser def{ stateOptions = opts } "" - case parsed of - Right result -> return result - Left e -> throwError e - -yamlBsToMeta :: PandocMonad m => BS.ByteString -> MarkdownParser m (F Meta) -yamlBsToMeta bstr = do - pos <- getPosition - case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of - Right ((YAML.Doc (YAML.Mapping _ _ o)):_) -> (fmap Meta) <$> yamlMap o - Right [] -> return . return $ mempty - Right [YAML.Doc (YAML.Scalar _ YAML.SNull)] -> return . return $ mempty - Right _ -> do - logMessage $ - CouldNotParseYamlMetadata "not an object" - pos - return . return $ mempty - Left (_pos, err') -> do - logMessage $ CouldNotParseYamlMetadata - (T.pack err') pos - return . return $ mempty - -nodeToKey :: PandocMonad m => YAML.Node YE.Pos -> m Text -nodeToKey (YAML.Scalar _ (YAML.SStr t)) = return t -nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = return t -nodeToKey _ = throwError $ PandocParseError - "Non-string key in YAML mapping" - -toMetaValue :: PandocMonad m - => Text -> MarkdownParser m (F MetaValue) -toMetaValue x = - -- Note: a standard quoted or unquoted YAML value will - -- not end in a newline, but a "block" set off with - -- `|` or `>` will. - if "\n" `T.isSuffixOf` x - then parseFromString' (asBlocks <$> parseBlocks) (x <> "\n") - else parseFromString' - ((asInlines <$> try pInlines) <|> (asBlocks <$> parseBlocks)) - x - where pInlines = trimInlinesF . mconcat <$> manyTill inline eof - asBlocks p = do - p' <- p - return $ MetaBlocks (B.toList p') - asInlines p = do - p' <- p - return $ MetaInlines (B.toList p') - -checkBoolean :: Text -> Maybe Bool -checkBoolean t = - if t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE" - then Just True - else if t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" - then Just False - else Nothing - -yamlToMetaValue :: PandocMonad m - => YAML.Node YE.Pos-> MarkdownParser m (F MetaValue) -yamlToMetaValue (YAML.Scalar _ x) = - case x of - YAML.SStr t -> toMetaValue t - YAML.SBool b -> return $ return $ MetaBool b - YAML.SFloat d -> return $ return $ MetaString $ tshow d - YAML.SInt i -> return $ return $ MetaString $ tshow i - YAML.SUnknown _ t -> - case checkBoolean t of - Just b -> return $ return $ MetaBool b - Nothing -> toMetaValue t - YAML.SNull -> return $ return $ MetaString "" -yamlToMetaValue (YAML.Sequence _ _ xs) = do - xs' <- mapM yamlToMetaValue xs - return $ do - xs'' <- sequence xs' - return $ B.toMetaValue xs'' -yamlToMetaValue (YAML.Mapping _ _ o) = fmap B.toMetaValue <$> yamlMap o -yamlToMetaValue _ = return $ return $ MetaString "" - -yamlMap :: PandocMonad m - => M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) - -> MarkdownParser m (F (M.Map Text MetaValue)) -yamlMap o = do - kvs <- forM (M.toList o) $ \(key, v) -> do - k <- nodeToKey key - return (k, v) - let kvs' = filter (not . ignorable . fst) kvs - (fmap M.fromList . sequence) <$> mapM toMeta kvs' - where - ignorable t = "_" `T.isSuffixOf` t - toMeta (k, v) = do - fv <- yamlToMetaValue v - return $ do - v' <- fv - return (k, v') - stopLine :: PandocMonad m => MarkdownParser m () stopLine = try $ (string "---" <|> string "...") >> blankline >> return () diff --git a/src/Text/Pandoc/Readers/Metadata.hs b/src/Text/Pandoc/Readers/Metadata.hs new file mode 100644 index 000000000..76f30e957 --- /dev/null +++ b/src/Text/Pandoc/Readers/Metadata.hs @@ -0,0 +1,140 @@ +{-# LANGUAGE NoImplicitPrelude #-} +{-# LANGUAGE RelaxedPolyRec #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ViewPatterns #-} +{- | + Module : Text.Pandoc.Readers.Metadata + Copyright : Copyright (C) 2006-2019 John MacFarlane + License : GNU GPL, version 2 or above + + Maintainer : John MacFarlane + Stability : alpha + Portability : portable + +Parse YAML/JSON metadata to 'Pandoc' 'Meta'. +-} +module Text.Pandoc.Readers.Metadata ( yamlBsToMeta ) where + +import Prelude +import Control.Monad +import Control.Monad.Except (throwError) +import qualified Data.ByteString.Lazy as BL +import qualified Data.Map as M +import Data.Text (Text) +import qualified Data.Text as T +import qualified Data.YAML as YAML +import qualified Data.YAML.Event as YE +import qualified Text.Pandoc.Builder as B +import Text.Pandoc.Builder (Blocks) +import Text.Pandoc.Class (PandocMonad (..)) +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Logging +import Text.Pandoc.Parsing hiding (tableWith) +import Text.Pandoc.Shared + +yamlBsToMeta :: PandocMonad m + => ParserT Text ParserState m (F Blocks) + -> BL.ByteString + -> ParserT Text ParserState m (F Meta) +yamlBsToMeta pBlocks bstr = do + pos <- getPosition + case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of + Right ((YAML.Doc (YAML.Mapping _ _ o)):_) + -> (fmap Meta) <$> yamlMap pBlocks o + Right [] -> return . return $ mempty + Right [YAML.Doc (YAML.Scalar _ YAML.SNull)] + -> return . return $ mempty + Right _ -> do logMessage $ CouldNotParseYamlMetadata "not an object" + pos + return . return $ mempty + Left (_pos, err') + -> do logMessage $ CouldNotParseYamlMetadata + (T.pack err') pos + return . return $ mempty + +nodeToKey :: PandocMonad m + => YAML.Node YE.Pos + -> m Text +nodeToKey (YAML.Scalar _ (YAML.SStr t)) = return t +nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = return t +nodeToKey _ = throwError $ PandocParseError + "Non-string key in YAML mapping" + +toMetaValue :: PandocMonad m + => ParserT Text ParserState m (F Blocks) + -> Text + -> ParserT Text ParserState m (F MetaValue) +toMetaValue pBlocks x = + -- Note: a standard quoted or unquoted YAML value will + -- not end in a newline, but a "block" set off with + -- `|` or `>` will. + if "\n" `T.isSuffixOf` x + then parseFromString' (asBlocks <$> pBlocks) (x <> "\n") + else parseFromString' pInlines x + where pInlines = do + bs <- pBlocks + return $ do + bs' <- bs + return $ + case B.toList bs' of + [Plain ils] -> MetaInlines ils + [Para ils] -> MetaInlines ils + xs -> MetaBlocks xs + asBlocks p = do + p' <- p + return $ MetaBlocks (B.toList p') + +checkBoolean :: Text -> Maybe Bool +checkBoolean t = + if t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE" + then Just True + else if t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE" + then Just False + else Nothing + +yamlToMetaValue :: PandocMonad m + => ParserT Text ParserState m (F Blocks) + -> YAML.Node YE.Pos + -> ParserT Text ParserState m (F MetaValue) +yamlToMetaValue pBlocks (YAML.Scalar _ x) = + case x of + YAML.SStr t -> toMetaValue pBlocks t + YAML.SBool b -> return $ return $ MetaBool b + YAML.SFloat d -> return $ return $ MetaString $ tshow d + YAML.SInt i -> return $ return $ MetaString $ tshow i + YAML.SUnknown _ t -> + case checkBoolean t of + Just b -> return $ return $ MetaBool b + Nothing -> toMetaValue pBlocks t + YAML.SNull -> return $ return $ MetaString "" + +yamlToMetaValue pBlocks (YAML.Sequence _ _ xs) = do + xs' <- mapM (yamlToMetaValue pBlocks) xs + return $ do + xs'' <- sequence xs' + return $ B.toMetaValue xs'' +yamlToMetaValue pBlocks (YAML.Mapping _ _ o) = + fmap B.toMetaValue <$> yamlMap pBlocks o +yamlToMetaValue _ _ = return $ return $ MetaString "" + +yamlMap :: PandocMonad m + => ParserT Text ParserState m (F Blocks) + -> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos) + -> ParserT Text ParserState m (F (M.Map Text MetaValue)) +yamlMap pBlocks o = do + kvs <- forM (M.toList o) $ \(key, v) -> do + k <- nodeToKey key + return (k, v) + let kvs' = filter (not . ignorable . fst) kvs + (fmap M.fromList . sequence) <$> mapM toMeta kvs' + where + ignorable t = "_" `T.isSuffixOf` t + toMeta (k, v) = do + fv <- yamlToMetaValue pBlocks v + return $ do + v' <- fv + return (k, v') +