Add yamlToRefs, yamlBsToRefs.

T.P.Readers.Markdown now exports yamlToRefs. [API change]

T.P.Readers.Metadata exports yamlBsToRefs. [API change]

These allow specifying an id filter so we parse only references
that are used in the document.  Improves timing with a 3M
yaml references file from 36s to 17s.
This commit is contained in:
John MacFarlane 2020-10-05 21:07:47 -07:00
parent 89e4f1bf9a
commit 6a32ea71ea
3 changed files with 74 additions and 10 deletions

View file

@ -18,7 +18,7 @@ import Text.Pandoc.Citeproc.Locator (parseLocator)
import Text.Pandoc.Citeproc.CslJson (cslJsonToReferences)
import Text.Pandoc.Citeproc.BibTeX (readBibtexString, Variant(..))
import Text.Pandoc.Citeproc.MetaValue (metaValueToReference, metaValueToText)
import Text.Pandoc.Readers.Markdown (yamlToMeta)
import Text.Pandoc.Readers.Markdown (yamlToRefs)
import Text.Pandoc.Class (setResourcePath, getResourcePath, getUserDataDir)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as L
@ -213,12 +213,10 @@ getRefs locale format idpred raw =
(return . filter (idpred . unItemId . referenceId)) .
cslJsonToReferences $ raw
Format_yaml -> do
meta <- yamlToMeta def{ readerExtensions = pandocExtensions }
(L.fromStrict raw)
case lookupMeta "references" meta of
Just (MetaList rs) ->
return $ mapMaybe (metaValueToReference idpred) rs
_ -> throwError $ PandocAppError "No references field"
rs <- yamlToRefs idpred
def{ readerExtensions = pandocExtensions }
(L.fromStrict raw)
return $ mapMaybe (metaValueToReference idpred . MetaMap) rs
-- localized quotes
convertQuotes :: Locale -> Inline -> Inline

View file

@ -13,7 +13,10 @@
Conversion of markdown-formatted plain text to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Markdown ( readMarkdown, yamlToMeta ) where
module Text.Pandoc.Readers.Markdown (
readMarkdown,
yamlToMeta,
yamlToRefs ) where
import Control.Monad
import Control.Monad.Except (throwError)
@ -44,7 +47,7 @@ import Text.Pandoc.Readers.LaTeX (applyMacros, rawLaTeXBlock, rawLaTeXInline)
import Text.Pandoc.Shared
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Pandoc.XML (fromEntities)
import Text.Pandoc.Readers.Metadata (yamlBsToMeta)
import Text.Pandoc.Readers.Metadata (yamlBsToMeta, yamlBsToRefs)
type MarkdownParser m = ParserT Text ParserState m
@ -75,6 +78,26 @@ yamlToMeta opts bstr = do
Right result -> return result
Left e -> throwError e
-- | Read a YAML string and extract references from the
-- 'references' field, filter using an id predicate and
-- parsing fields as Markdown.
yamlToRefs :: PandocMonad m
=> (Text -> Bool)
-> ReaderOptions
-> BL.ByteString
-> m [M.Map Text MetaValue]
yamlToRefs idpred opts bstr = do
let parser = do
refs <- yamlBsToRefs (fmap B.toMetaValue <$> parseBlocks) idpred bstr
return $ runF refs defaultParserState
parsed <- readWithM parser def{ stateOptions = opts } ""
case parsed of
Right result -> return result
Left e -> throwError e
--
-- Constants and data structure definitions
--

View file

@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{- |
Module : Text.Pandoc.Readers.Metadata
Copyright : Copyright (C) 2006-2020 John MacFarlane
@ -10,7 +11,10 @@
Parse YAML/JSON metadata to 'Pandoc' 'Meta'.
-}
module Text.Pandoc.Readers.Metadata ( yamlBsToMeta, yamlMap ) where
module Text.Pandoc.Readers.Metadata (
yamlBsToMeta,
yamlBsToRefs,
yamlMap ) where
import Control.Monad
import Control.Monad.Except (throwError)
@ -47,6 +51,45 @@ yamlBsToMeta pMetaValue bstr = do
(T.pack err') pos
return . return $ mempty
-- Returns filtered list of references.
yamlBsToRefs :: PandocMonad m
=> ParserT Text ParserState m (F MetaValue)
-> (Text -> Bool) -- ^ Filter for id
-> BL.ByteString
-> ParserT Text ParserState m (F [M.Map Text MetaValue])
yamlBsToRefs pMetaValue idpred bstr = do
pos <- getPosition
case YAML.decodeNode' YAML.failsafeSchemaResolver False False bstr of
Right (YAML.Doc (YAML.Mapping _ _ o):_)
-> case YAML.parseEither (o YAML..: "references") of
Right ns -> do
let g n = case YAML.parseEither (n YAML..: "id") of
Right t -> idpred t ||
case YAML.parseEither (n YAML..:
"other-ids") of
Right (oids :: [Text]) ->
any idpred oids
_ -> False
_ -> False
sequence <$> mapM (yamlMap pMetaValue) (filter g ns)
Left _ -> do
logMessage $ CouldNotParseYamlMetadata
("expecting 'references' field") pos
return . return $ mempty
Right [] -> return . return $ mempty
Right [YAML.Doc (YAML.Scalar _ YAML.SNull)]
-> return . return $ mempty
Right _ -> do logMessage $ CouldNotParseYamlMetadata "not an object"
pos
return . return $ mempty
Left (_pos, err')
-> do logMessage $ CouldNotParseYamlMetadata
(T.pack err') pos
return . return $ mempty
nodeToKey :: PandocMonad m
=> YAML.Node YE.Pos
-> m Text