Add unexported Text.Pandoc.Readers.Metadata.
For YAML metadata parsing. A step in the direction of #5914. No API change.
This commit is contained in:
parent
da5b6d5c0b
commit
659ee98176
4 changed files with 163 additions and 105 deletions
|
@ -590,6 +590,7 @@ library
|
||||||
Text.Pandoc.Readers.Org.ParserState,
|
Text.Pandoc.Readers.Org.ParserState,
|
||||||
Text.Pandoc.Readers.Org.Parsing,
|
Text.Pandoc.Readers.Org.Parsing,
|
||||||
Text.Pandoc.Readers.Org.Shared,
|
Text.Pandoc.Readers.Org.Shared,
|
||||||
|
Text.Pandoc.Readers.Metadata,
|
||||||
Text.Pandoc.Readers.Roff,
|
Text.Pandoc.Readers.Roff,
|
||||||
Text.Pandoc.Writers.Docx.StyleMap,
|
Text.Pandoc.Writers.Docx.StyleMap,
|
||||||
Text.Pandoc.Writers.Roff,
|
Text.Pandoc.Writers.Roff,
|
||||||
|
|
|
@ -55,12 +55,12 @@ import Text.Pandoc.BCP47 (Lang (..), parseBCP47)
|
||||||
import Text.Pandoc.Builder (setMeta)
|
import Text.Pandoc.Builder (setMeta)
|
||||||
import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
|
import Text.Pandoc.Filter (Filter (JSONFilter, LuaFilter), applyFilters)
|
||||||
import Text.Pandoc.PDF (makePDF)
|
import Text.Pandoc.PDF (makePDF)
|
||||||
import Text.Pandoc.Readers.Markdown (yamlToMeta)
|
|
||||||
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
|
import Text.Pandoc.SelfContained (makeDataURI, makeSelfContained)
|
||||||
import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
|
import Text.Pandoc.Shared (eastAsianLineBreakFilter, stripEmptyParagraphs,
|
||||||
headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput,
|
headerShift, isURI, tabFilter, uriPathToPath, filterIpynbOutput,
|
||||||
defaultUserDataDirs, tshow)
|
defaultUserDataDirs, tshow)
|
||||||
import Text.Pandoc.Writers.Shared (lookupMetaString)
|
import Text.Pandoc.Writers.Shared (lookupMetaString)
|
||||||
|
import Text.Pandoc.Readers.Markdown (yamlToMeta)
|
||||||
import qualified Text.Pandoc.UTF8 as UTF8
|
import qualified Text.Pandoc.UTF8 as UTF8
|
||||||
#ifndef _WINDOWS
|
#ifndef _WINDOWS
|
||||||
import System.Posix.IO (stdOutput)
|
import System.Posix.IO (stdOutput)
|
||||||
|
|
|
@ -20,7 +20,6 @@ module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where
|
||||||
import Prelude
|
import Prelude
|
||||||
import Control.Monad
|
import Control.Monad
|
||||||
import Control.Monad.Except (throwError)
|
import Control.Monad.Except (throwError)
|
||||||
import qualified Data.ByteString.Lazy as BS
|
|
||||||
import Data.Char (isAlphaNum, isPunctuation, isSpace)
|
import Data.Char (isAlphaNum, isPunctuation, isSpace)
|
||||||
import Data.List (sortBy, transpose, elemIndex)
|
import Data.List (sortBy, transpose, elemIndex)
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
@ -30,8 +29,7 @@ import qualified Data.Set as Set
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import qualified Data.Text.Lazy as TL
|
import qualified Data.Text.Lazy as TL
|
||||||
import qualified Data.YAML as YAML
|
import qualified Data.ByteString.Lazy as BL
|
||||||
import qualified Data.YAML.Event as YE
|
|
||||||
import System.FilePath (addExtension, takeExtension)
|
import System.FilePath (addExtension, takeExtension)
|
||||||
import Text.HTML.TagSoup
|
import Text.HTML.TagSoup
|
||||||
import Text.Pandoc.Builder (Blocks, Inlines)
|
import Text.Pandoc.Builder (Blocks, Inlines)
|
||||||
|
@ -49,6 +47,7 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
|
||||||
import Text.Pandoc.Shared
|
import Text.Pandoc.Shared
|
||||||
import qualified Text.Pandoc.UTF8 as UTF8
|
import qualified Text.Pandoc.UTF8 as UTF8
|
||||||
import Text.Pandoc.XML (fromEntities)
|
import Text.Pandoc.XML (fromEntities)
|
||||||
|
import Text.Pandoc.Readers.Metadata (yamlBsToMeta)
|
||||||
|
|
||||||
type MarkdownParser m = ParserT Text ParserState m
|
type MarkdownParser m = ParserT Text ParserState m
|
||||||
|
|
||||||
|
@ -64,6 +63,23 @@ readMarkdown opts s = do
|
||||||
Right result -> return result
|
Right result -> return result
|
||||||
Left e -> throwError e
|
Left e -> throwError e
|
||||||
|
|
||||||
|
-- | Read a YAML string and convert it to pandoc metadata.
|
||||||
|
-- String scalars in the YAML are parsed as Markdown.
|
||||||
|
yamlToMeta :: PandocMonad m
|
||||||
|
=> ReaderOptions
|
||||||
|
-> BL.ByteString
|
||||||
|
-> m Meta
|
||||||
|
yamlToMeta opts bstr = do
|
||||||
|
let parser = do
|
||||||
|
meta <- yamlBsToMeta parseBlocks bstr
|
||||||
|
return $ runF meta defaultParserState
|
||||||
|
parsed <- readWithM parser def{ stateOptions = opts } ""
|
||||||
|
case parsed of
|
||||||
|
Right result -> return result
|
||||||
|
Left e -> throwError e
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Constants and data structure definitions
|
-- Constants and data structure definitions
|
||||||
--
|
--
|
||||||
|
@ -228,111 +244,12 @@ yamlMetaBlock = try $ do
|
||||||
-- by including --- and ..., we allow yaml blocks with just comments:
|
-- by including --- and ..., we allow yaml blocks with just comments:
|
||||||
let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
|
let rawYaml = T.unlines ("---" : (rawYamlLines ++ ["..."]))
|
||||||
optional blanklines
|
optional blanklines
|
||||||
newMetaF <- yamlBsToMeta $ UTF8.fromTextLazy $ TL.fromStrict rawYaml
|
newMetaF <- yamlBsToMeta parseBlocks
|
||||||
|
$ UTF8.fromTextLazy $ TL.fromStrict rawYaml
|
||||||
-- Since `<>` is left-biased, existing values are not touched:
|
-- Since `<>` is left-biased, existing values are not touched:
|
||||||
updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF }
|
updateState $ \st -> st{ stateMeta' = (stateMeta' st) <> newMetaF }
|
||||||
return mempty
|
return mempty
|
||||||
|
|
||||||
-- | Read a YAML string and convert it to pandoc metadata.
|
|
||||||
-- String scalars in the YAML are parsed as Markdown.
|
|
||||||
yamlToMeta :: PandocMonad m => ReaderOptions -> BS.ByteString -> m Meta
|
|
||||||
yamlToMeta opts bstr = do
|
|
||||||
let parser = do
|
|
||||||
meta <- yamlBsToMeta bstr
|
|
||||||
return $ runF meta defaultParserState
|
|
||||||
parsed <- readWithM parser def{ stateOptions = opts } ""
|
|
||||||
case parsed of
|
|
||||||
Right result -> return result
|
|
||||||
Left e -> throwError e
|
|
||||||
|
|
||||||
yamlBsToMeta :: PandocMonad m => BS.ByteString -> MarkdownParser m (F Meta)
|
|
||||||
yamlBsToMeta bstr = do
|
|
||||||
pos <- getPosition
|
|
||||||
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
|
|
||||||
Right ((YAML.Doc (YAML.Mapping _ _ o)):_) -> (fmap Meta) <$> yamlMap o
|
|
||||||
Right [] -> return . return $ mempty
|
|
||||||
Right [YAML.Doc (YAML.Scalar _ YAML.SNull)] -> return . return $ mempty
|
|
||||||
Right _ -> do
|
|
||||||
logMessage $
|
|
||||||
CouldNotParseYamlMetadata "not an object"
|
|
||||||
pos
|
|
||||||
return . return $ mempty
|
|
||||||
Left (_pos, err') -> do
|
|
||||||
logMessage $ CouldNotParseYamlMetadata
|
|
||||||
(T.pack err') pos
|
|
||||||
return . return $ mempty
|
|
||||||
|
|
||||||
nodeToKey :: PandocMonad m => YAML.Node YE.Pos -> m Text
|
|
||||||
nodeToKey (YAML.Scalar _ (YAML.SStr t)) = return t
|
|
||||||
nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = return t
|
|
||||||
nodeToKey _ = throwError $ PandocParseError
|
|
||||||
"Non-string key in YAML mapping"
|
|
||||||
|
|
||||||
toMetaValue :: PandocMonad m
|
|
||||||
=> Text -> MarkdownParser m (F MetaValue)
|
|
||||||
toMetaValue x =
|
|
||||||
-- Note: a standard quoted or unquoted YAML value will
|
|
||||||
-- not end in a newline, but a "block" set off with
|
|
||||||
-- `|` or `>` will.
|
|
||||||
if "\n" `T.isSuffixOf` x
|
|
||||||
then parseFromString' (asBlocks <$> parseBlocks) (x <> "\n")
|
|
||||||
else parseFromString'
|
|
||||||
((asInlines <$> try pInlines) <|> (asBlocks <$> parseBlocks))
|
|
||||||
x
|
|
||||||
where pInlines = trimInlinesF . mconcat <$> manyTill inline eof
|
|
||||||
asBlocks p = do
|
|
||||||
p' <- p
|
|
||||||
return $ MetaBlocks (B.toList p')
|
|
||||||
asInlines p = do
|
|
||||||
p' <- p
|
|
||||||
return $ MetaInlines (B.toList p')
|
|
||||||
|
|
||||||
checkBoolean :: Text -> Maybe Bool
|
|
||||||
checkBoolean t =
|
|
||||||
if t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE"
|
|
||||||
then Just True
|
|
||||||
else if t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE"
|
|
||||||
then Just False
|
|
||||||
else Nothing
|
|
||||||
|
|
||||||
yamlToMetaValue :: PandocMonad m
|
|
||||||
=> YAML.Node YE.Pos-> MarkdownParser m (F MetaValue)
|
|
||||||
yamlToMetaValue (YAML.Scalar _ x) =
|
|
||||||
case x of
|
|
||||||
YAML.SStr t -> toMetaValue t
|
|
||||||
YAML.SBool b -> return $ return $ MetaBool b
|
|
||||||
YAML.SFloat d -> return $ return $ MetaString $ tshow d
|
|
||||||
YAML.SInt i -> return $ return $ MetaString $ tshow i
|
|
||||||
YAML.SUnknown _ t ->
|
|
||||||
case checkBoolean t of
|
|
||||||
Just b -> return $ return $ MetaBool b
|
|
||||||
Nothing -> toMetaValue t
|
|
||||||
YAML.SNull -> return $ return $ MetaString ""
|
|
||||||
yamlToMetaValue (YAML.Sequence _ _ xs) = do
|
|
||||||
xs' <- mapM yamlToMetaValue xs
|
|
||||||
return $ do
|
|
||||||
xs'' <- sequence xs'
|
|
||||||
return $ B.toMetaValue xs''
|
|
||||||
yamlToMetaValue (YAML.Mapping _ _ o) = fmap B.toMetaValue <$> yamlMap o
|
|
||||||
yamlToMetaValue _ = return $ return $ MetaString ""
|
|
||||||
|
|
||||||
yamlMap :: PandocMonad m
|
|
||||||
=> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
|
|
||||||
-> MarkdownParser m (F (M.Map Text MetaValue))
|
|
||||||
yamlMap o = do
|
|
||||||
kvs <- forM (M.toList o) $ \(key, v) -> do
|
|
||||||
k <- nodeToKey key
|
|
||||||
return (k, v)
|
|
||||||
let kvs' = filter (not . ignorable . fst) kvs
|
|
||||||
(fmap M.fromList . sequence) <$> mapM toMeta kvs'
|
|
||||||
where
|
|
||||||
ignorable t = "_" `T.isSuffixOf` t
|
|
||||||
toMeta (k, v) = do
|
|
||||||
fv <- yamlToMetaValue v
|
|
||||||
return $ do
|
|
||||||
v' <- fv
|
|
||||||
return (k, v')
|
|
||||||
|
|
||||||
stopLine :: PandocMonad m => MarkdownParser m ()
|
stopLine :: PandocMonad m => MarkdownParser m ()
|
||||||
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
|
stopLine = try $ (string "---" <|> string "...") >> blankline >> return ()
|
||||||
|
|
||||||
|
|
140
src/Text/Pandoc/Readers/Metadata.hs
Normal file
140
src/Text/Pandoc/Readers/Metadata.hs
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
{-# LANGUAGE NoImplicitPrelude #-}
|
||||||
|
{-# LANGUAGE RelaxedPolyRec #-}
|
||||||
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE TupleSections #-}
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# LANGUAGE ViewPatterns #-}
|
||||||
|
{- |
|
||||||
|
Module : Text.Pandoc.Readers.Metadata
|
||||||
|
Copyright : Copyright (C) 2006-2019 John MacFarlane
|
||||||
|
License : GNU GPL, version 2 or above
|
||||||
|
|
||||||
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||||
|
Stability : alpha
|
||||||
|
Portability : portable
|
||||||
|
|
||||||
|
Parse YAML/JSON metadata to 'Pandoc' 'Meta'.
|
||||||
|
-}
|
||||||
|
module Text.Pandoc.Readers.Metadata ( yamlBsToMeta ) where
|
||||||
|
|
||||||
|
import Prelude
|
||||||
|
import Control.Monad
|
||||||
|
import Control.Monad.Except (throwError)
|
||||||
|
import qualified Data.ByteString.Lazy as BL
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import Data.Text (Text)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.YAML as YAML
|
||||||
|
import qualified Data.YAML.Event as YE
|
||||||
|
import qualified Text.Pandoc.Builder as B
|
||||||
|
import Text.Pandoc.Builder (Blocks)
|
||||||
|
import Text.Pandoc.Class (PandocMonad (..))
|
||||||
|
import Text.Pandoc.Definition
|
||||||
|
import Text.Pandoc.Error
|
||||||
|
import Text.Pandoc.Logging
|
||||||
|
import Text.Pandoc.Parsing hiding (tableWith)
|
||||||
|
import Text.Pandoc.Shared
|
||||||
|
|
||||||
|
yamlBsToMeta :: PandocMonad m
|
||||||
|
=> ParserT Text ParserState m (F Blocks)
|
||||||
|
-> BL.ByteString
|
||||||
|
-> ParserT Text ParserState m (F Meta)
|
||||||
|
yamlBsToMeta pBlocks bstr = do
|
||||||
|
pos <- getPosition
|
||||||
|
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
|
||||||
|
Right ((YAML.Doc (YAML.Mapping _ _ o)):_)
|
||||||
|
-> (fmap Meta) <$> yamlMap pBlocks o
|
||||||
|
Right [] -> return . return $ mempty
|
||||||
|
Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
|
||||||
|
-> return . return $ mempty
|
||||||
|
Right _ -> do logMessage $ CouldNotParseYamlMetadata "not an object"
|
||||||
|
pos
|
||||||
|
return . return $ mempty
|
||||||
|
Left (_pos, err')
|
||||||
|
-> do logMessage $ CouldNotParseYamlMetadata
|
||||||
|
(T.pack err') pos
|
||||||
|
return . return $ mempty
|
||||||
|
|
||||||
|
nodeToKey :: PandocMonad m
|
||||||
|
=> YAML.Node YE.Pos
|
||||||
|
-> m Text
|
||||||
|
nodeToKey (YAML.Scalar _ (YAML.SStr t)) = return t
|
||||||
|
nodeToKey (YAML.Scalar _ (YAML.SUnknown _ t)) = return t
|
||||||
|
nodeToKey _ = throwError $ PandocParseError
|
||||||
|
"Non-string key in YAML mapping"
|
||||||
|
|
||||||
|
toMetaValue :: PandocMonad m
|
||||||
|
=> ParserT Text ParserState m (F Blocks)
|
||||||
|
-> Text
|
||||||
|
-> ParserT Text ParserState m (F MetaValue)
|
||||||
|
toMetaValue pBlocks x =
|
||||||
|
-- Note: a standard quoted or unquoted YAML value will
|
||||||
|
-- not end in a newline, but a "block" set off with
|
||||||
|
-- `|` or `>` will.
|
||||||
|
if "\n" `T.isSuffixOf` x
|
||||||
|
then parseFromString' (asBlocks <$> pBlocks) (x <> "\n")
|
||||||
|
else parseFromString' pInlines x
|
||||||
|
where pInlines = do
|
||||||
|
bs <- pBlocks
|
||||||
|
return $ do
|
||||||
|
bs' <- bs
|
||||||
|
return $
|
||||||
|
case B.toList bs' of
|
||||||
|
[Plain ils] -> MetaInlines ils
|
||||||
|
[Para ils] -> MetaInlines ils
|
||||||
|
xs -> MetaBlocks xs
|
||||||
|
asBlocks p = do
|
||||||
|
p' <- p
|
||||||
|
return $ MetaBlocks (B.toList p')
|
||||||
|
|
||||||
|
checkBoolean :: Text -> Maybe Bool
|
||||||
|
checkBoolean t =
|
||||||
|
if t == T.pack "true" || t == T.pack "True" || t == T.pack "TRUE"
|
||||||
|
then Just True
|
||||||
|
else if t == T.pack "false" || t == T.pack "False" || t == T.pack "FALSE"
|
||||||
|
then Just False
|
||||||
|
else Nothing
|
||||||
|
|
||||||
|
yamlToMetaValue :: PandocMonad m
|
||||||
|
=> ParserT Text ParserState m (F Blocks)
|
||||||
|
-> YAML.Node YE.Pos
|
||||||
|
-> ParserT Text ParserState m (F MetaValue)
|
||||||
|
yamlToMetaValue pBlocks (YAML.Scalar _ x) =
|
||||||
|
case x of
|
||||||
|
YAML.SStr t -> toMetaValue pBlocks t
|
||||||
|
YAML.SBool b -> return $ return $ MetaBool b
|
||||||
|
YAML.SFloat d -> return $ return $ MetaString $ tshow d
|
||||||
|
YAML.SInt i -> return $ return $ MetaString $ tshow i
|
||||||
|
YAML.SUnknown _ t ->
|
||||||
|
case checkBoolean t of
|
||||||
|
Just b -> return $ return $ MetaBool b
|
||||||
|
Nothing -> toMetaValue pBlocks t
|
||||||
|
YAML.SNull -> return $ return $ MetaString ""
|
||||||
|
|
||||||
|
yamlToMetaValue pBlocks (YAML.Sequence _ _ xs) = do
|
||||||
|
xs' <- mapM (yamlToMetaValue pBlocks) xs
|
||||||
|
return $ do
|
||||||
|
xs'' <- sequence xs'
|
||||||
|
return $ B.toMetaValue xs''
|
||||||
|
yamlToMetaValue pBlocks (YAML.Mapping _ _ o) =
|
||||||
|
fmap B.toMetaValue <$> yamlMap pBlocks o
|
||||||
|
yamlToMetaValue _ _ = return $ return $ MetaString ""
|
||||||
|
|
||||||
|
yamlMap :: PandocMonad m
|
||||||
|
=> ParserT Text ParserState m (F Blocks)
|
||||||
|
-> M.Map (YAML.Node YE.Pos) (YAML.Node YE.Pos)
|
||||||
|
-> ParserT Text ParserState m (F (M.Map Text MetaValue))
|
||||||
|
yamlMap pBlocks o = do
|
||||||
|
kvs <- forM (M.toList o) $ \(key, v) -> do
|
||||||
|
k <- nodeToKey key
|
||||||
|
return (k, v)
|
||||||
|
let kvs' = filter (not . ignorable . fst) kvs
|
||||||
|
(fmap M.fromList . sequence) <$> mapM toMeta kvs'
|
||||||
|
where
|
||||||
|
ignorable t = "_" `T.isSuffixOf` t
|
||||||
|
toMeta (k, v) = do
|
||||||
|
fv <- yamlToMetaValue pBlocks v
|
||||||
|
return $ do
|
||||||
|
v' <- fv
|
||||||
|
return (k, v')
|
||||||
|
|
Loading…
Add table
Reference in a new issue