From e49b8304e43d8381a2c7693643ab648f32482359 Mon Sep 17 00:00:00 2001
From: John MacFarlane <jgm@berkeley.edu>
Date: Fri, 29 Jun 2018 22:32:49 +0200
Subject: [PATCH] Use HsYAML instead of yaml for translations, YAML metadata.

yaml wraps a C library; HsYAML is pure Haskell.
Closes #4747.  Advances #4535.
---
 MANUAL.txt                          |   4 +-
 pandoc.cabal                        |   9 +--
 src/Text/Pandoc/App.hs              |  11 +--
 src/Text/Pandoc/Readers/Markdown.hs | 102 +++++++++++++---------------
 src/Text/Pandoc/Translations.hs     |  39 ++++++++---
 src/Text/Pandoc/Writers/Markdown.hs |   2 +-
 stack.lts9.yaml                     |   2 +
 stack.yaml                          |   2 +
 8 files changed, 94 insertions(+), 77 deletions(-)

diff --git a/MANUAL.txt b/MANUAL.txt
index 93b82f81c..8421ef674 100644
--- a/MANUAL.txt
+++ b/MANUAL.txt
@@ -3059,7 +3059,9 @@ Metadata will be taken from the fields of the YAML object and added to any
 existing document metadata.  Metadata can contain lists and objects (nested
 arbitrarily), but all string scalars will be interpreted as Markdown.  Fields
 with names ending in an underscore will be ignored by pandoc.  (They may be
-given a role by external processors.)
+given a role by external processors.)  Field names must not be
+interpretable as YAML numbers or boolean values (so, for
+example, `yes`, `True`, and `15` cannot be used as field names).
 
 A document may contain multiple metadata blocks.  The metadata fields will
 be combined through a *left-biased union*:  if two metadata blocks attempt
diff --git a/pandoc.cabal b/pandoc.cabal
index af76a9c3c..636f77482 100644
--- a/pandoc.cabal
+++ b/pandoc.cabal
@@ -370,7 +370,6 @@ library
                  temporary >= 1.1 && < 1.4,
                  blaze-html >= 0.9 && < 0.10,
                  blaze-markup >= 0.8 && < 0.9,
-                 scientific >= 0.2 && < 0.4,
                  vector >= 0.10 && < 0.13,
                  hslua >= 0.9.5 && < 0.9.6,
                  hslua-module-text >= 0.1.2 && < 0.2,
@@ -387,12 +386,10 @@ library
                  http-client >= 0.4.30 && < 0.6,
                  http-client-tls >= 0.2.4 && < 0.4,
                  http-types >= 0.8 && < 0.13,
-                 case-insensitive >= 1.2 && < 1.3
+                 case-insensitive >= 1.2 && < 1.3,
+                 HsYAML >= 0.1.1.1 && < 0.2
   if impl(ghc < 8.0)
-    build-depends: semigroups == 0.18.*,
-                   yaml >= 0.8.11 && < 0.8.31
-  else
-    build-depends: yaml >= 0.8.11 && < 0.9
+    build-depends: semigroups == 0.18.*
   if impl(ghc < 8.4)
      hs-source-dirs: prelude
      other-modules:  Prelude
diff --git a/src/Text/Pandoc/App.hs b/src/Text/Pandoc/App.hs
index 5cbbe13e7..b79273092 100644
--- a/src/Text/Pandoc/App.hs
+++ b/src/Text/Pandoc/App.hs
@@ -62,8 +62,7 @@ import qualified Data.Text as T
 import qualified Data.Text.Lazy as TL
 import qualified Data.Text.Lazy.Encoding as TE
 import qualified Data.Text.Encoding.Error as TE
-import Data.Yaml (decodeEither')
-import qualified Data.Yaml as Yaml
+import qualified Data.YAML as YAML
 import GHC.Generics
 import Network.URI (URI (..), parseURI)
 #ifdef EMBED_DATA_FILES
@@ -702,9 +701,11 @@ removeMetaKeys :: [(String,String)] -> Pandoc -> Pandoc
 removeMetaKeys kvs pdc = foldr (deleteMeta . fst) pdc kvs
 
 readMetaValue :: String -> MetaValue
-readMetaValue s = case decodeEither' (UTF8.fromString s) of
-                       Right (Yaml.String t) -> MetaString $ T.unpack t
-                       Right (Yaml.Bool b)   -> MetaBool b
+readMetaValue s = case YAML.decodeStrict (UTF8.fromString s) of
+                       Right [YAML.Scalar (YAML.SStr t)]
+                                             -> MetaString $ T.unpack t
+                       Right [YAML.Scalar (YAML.SBool b)]
+                                             -> MetaBool b
                        _                     -> MetaString s
 
 -- Determine default reader based on source file extensions
diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs
index 0cd9ce63f..9fe84013f 100644
--- a/src/Text/Pandoc/Readers/Markdown.hs
+++ b/src/Text/Pandoc/Readers/Markdown.hs
@@ -37,18 +37,14 @@ import Prelude
 import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isPunctuation, isSpace, toLower)
-import qualified Data.HashMap.Strict as H
 import Data.List (intercalate, sortBy, transpose, elemIndex)
 import qualified Data.Map as M
 import Data.Maybe
 import Data.Ord (comparing)
-import Data.Scientific (base10Exponent, coefficient)
 import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
-import qualified Data.Vector as V
-import Data.Yaml (ParseException (..), YamlException (..), YamlMark (..))
-import qualified Data.Yaml as Yaml
+import qualified Data.YAML as YAML
 import System.FilePath (addExtension, takeExtension)
 import Text.HTML.TagSoup
 import Text.Pandoc.Builder (Blocks, Inlines)
@@ -246,47 +242,38 @@ yamlMetaBlock = try $ do
   -- by including --- and ..., we allow yaml blocks with just comments:
   let rawYaml = unlines ("---" : (rawYamlLines ++ ["..."]))
   optional blanklines
-  case Yaml.decodeEither' $ UTF8.fromString rawYaml of
-       Right (Yaml.Object hashmap) -> do
-         let alist = H.toList hashmap
-         mapM_ (\(k, v) ->
-             if ignorable k
-                then return ()
-                else do
-                  v' <- yamlToMeta v
-                  let k' = T.unpack k
-                  updateState $ \st -> st{ stateMeta' =
-                     do m <- stateMeta' st
-                        -- if there's already a value, leave it unchanged
-                        case lookupMeta k' m of
-                             Just _ -> return m
-                             Nothing -> do
-                               v'' <- v'
-                               return $ B.setMeta (T.unpack k) v'' m}
+  case YAML.decodeStrict (UTF8.fromString rawYaml) of
+       Right (YAML.Mapping _ hashmap : _) -> do
+         let alist = M.toList hashmap
+         mapM_ (\(k', v) ->
+           case YAML.parseEither (YAML.parseYAML k') of
+                Left e  -> fail e
+                Right k -> do
+                  if ignorable k
+                     then return ()
+                     else do
+                       v' <- yamlToMeta v
+                       let k' = T.unpack k
+                       updateState $ \st -> st{ stateMeta' =
+                          do m <- stateMeta' st
+                             -- if there's already a value, leave it unchanged
+                             case lookupMeta k' m of
+                                  Just _ -> return m
+                                  Nothing -> do
+                                    v'' <- v'
+                                    return $ B.setMeta (T.unpack k) v'' m}
            ) alist
-       Right Yaml.Null -> return ()
+       Right [] -> return ()
+       Right (YAML.Scalar YAML.SNull:_) -> return ()
        Right _ -> do
-                   logMessage $
-                      CouldNotParseYamlMetadata "not an object"
-                      pos
-                   return ()
+                  logMessage $
+                     CouldNotParseYamlMetadata "not an object"
+                     pos
+                  return ()
        Left err' -> do
-                case err' of
-                   InvalidYaml (Just YamlParseException{
-                               yamlProblem = problem
-                             , yamlContext = _ctxt
-                             , yamlProblemMark = Yaml.YamlMark {
-                                   yamlLine = yline
-                                 , yamlColumn = ycol
-                             }}) ->
-                        logMessage $ CouldNotParseYamlMetadata
-                           problem (setSourceLine
-                           (setSourceColumn pos
-                              (sourceColumn pos + ycol))
-                           (sourceLine pos + 1 + yline))
-                   _ -> logMessage $ CouldNotParseYamlMetadata
-                           (show err') pos
-                return ()
+                    logMessage $ CouldNotParseYamlMetadata
+                                 err' pos
+                    return ()
   return mempty
 
 -- ignore fields ending with _
@@ -313,22 +300,25 @@ toMetaValue x =
         -- `|` or `>` will.
 
 yamlToMeta :: PandocMonad m
-           => Yaml.Value -> MarkdownParser m (F MetaValue)
-yamlToMeta (Yaml.String t) = toMetaValue t
-yamlToMeta (Yaml.Number n)
-  -- avoid decimal points for numbers that don't need them:
-  | base10Exponent n >= 0     = return $ return $ MetaString $ show
-                                $ coefficient n * (10 ^ base10Exponent n)
-  | otherwise                 = return $ return $ MetaString $ show n
-yamlToMeta (Yaml.Bool b) = return $ return $ MetaBool b
-yamlToMeta (Yaml.Array xs) = do
-  xs' <- mapM yamlToMeta (V.toList xs)
+           => YAML.Node -> MarkdownParser m (F MetaValue)
+yamlToMeta (YAML.Scalar x) =
+  case x of
+       YAML.SStr t   -> toMetaValue t
+       YAML.SBool b  -> return $ return $ MetaBool b
+       YAML.SFloat d -> return $ return $ MetaString (show d)
+       YAML.SInt i   -> return $ return $ MetaString (show i)
+       _             -> return $ return $ MetaString ""
+yamlToMeta (YAML.Sequence _ xs) = do
+  xs' <- mapM yamlToMeta xs
   return $ do
     xs'' <- sequence xs'
     return $ B.toMetaValue xs''
-yamlToMeta (Yaml.Object o) = do
-  let alist = H.toList o
-  foldM (\m (k,v) ->
+yamlToMeta (YAML.Mapping _ o) = do
+  let alist = M.toList o
+  foldM (\m (k',v) ->
+     case YAML.parseEither (YAML.parseYAML k') of
+          Left e  -> fail e
+          Right k -> do
            if ignorable k
               then return m
               else do
diff --git a/src/Text/Pandoc/Translations.hs b/src/Text/Pandoc/Translations.hs
index 4a216af92..13dcb3b61 100644
--- a/src/Text/Pandoc/Translations.hs
+++ b/src/Text/Pandoc/Translations.hs
@@ -48,11 +48,12 @@ module Text.Pandoc.Translations (
                          )
 where
 import Prelude
-import Data.Aeson.Types (typeMismatch)
+import Data.Aeson.Types (Value(..), FromJSON(..))
+import qualified Data.Aeson.Types as Aeson
 import qualified Data.HashMap.Strict as HM
 import qualified Data.Map as M
 import Data.Text as T
-import Data.Yaml as Yaml
+import qualified Data.YAML as YAML
 import GHC.Generics (Generic)
 import Text.Pandoc.Shared (safeRead)
 import qualified Text.Pandoc.UTF8 as UTF8
@@ -90,7 +91,15 @@ instance FromJSON Term where
                                Just t' -> pure t'
                                Nothing -> fail $ "Invalid Term name " ++
                                                  show t
-  parseJSON invalid = typeMismatch "Term" invalid
+  parseJSON invalid = Aeson.typeMismatch "Term" invalid
+
+instance YAML.FromYAML Term where
+  parseYAML (YAML.Scalar (YAML.SStr t)) =
+                         case safeRead (T.unpack t) of
+                               Just t' -> pure t'
+                               Nothing -> fail $ "Invalid Term name " ++
+                                                 show t
+  parseYAML invalid = YAML.typeMismatch "Term" invalid
 
 instance FromJSON Translations where
   parseJSON (Object hm) = do
@@ -102,14 +111,28 @@ instance FromJSON Translations where
                  Just t  ->
                    case v of
                         (String s) -> return (t, T.unpack $ T.strip s)
-                        inv        -> typeMismatch "String" inv
-  parseJSON invalid = typeMismatch "Translations" invalid
+                        inv        -> Aeson.typeMismatch "String" inv
+  parseJSON invalid = Aeson.typeMismatch "Translations" invalid
+
+instance YAML.FromYAML Translations where
+  parseYAML = YAML.withMap "Translations" $
+    \tr -> Translations .M.fromList <$> mapM addItem (M.toList tr)
+   where addItem (n@(YAML.Scalar (YAML.SStr k)), v) =
+            case safeRead (T.unpack k) of
+                 Nothing -> YAML.typeMismatch "Term" n
+                 Just t  ->
+                   case v of
+                        (YAML.Scalar (YAML.SStr s)) ->
+                          return (t, T.unpack (T.strip s))
+                        n' -> YAML.typeMismatch "String" n'
+         addItem (n, _) = YAML.typeMismatch "String" n
 
 lookupTerm :: Term -> Translations -> Maybe String
 lookupTerm t (Translations tm) = M.lookup t tm
 
 readTranslations :: String -> Either String Translations
 readTranslations s =
-  case Yaml.decodeEither' $ UTF8.fromString s of
-       Left err' -> Left $ prettyPrintParseException err'
-       Right t   -> Right t
+  case YAML.decodeStrict $ UTF8.fromString s of
+       Left err'   -> Left err'
+       Right (t:_) -> Right t
+       Right []    -> Left "empty YAML document"
diff --git a/src/Text/Pandoc/Writers/Markdown.hs b/src/Text/Pandoc/Writers/Markdown.hs
index fe8f452d3..dc0b154bf 100644
--- a/src/Text/Pandoc/Writers/Markdown.hs
+++ b/src/Text/Pandoc/Writers/Markdown.hs
@@ -50,7 +50,7 @@ import qualified Data.Set as Set
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Data.Vector as V
-import Data.Yaml (Value (Array, Bool, Number, Object, String))
+import Data.Aeson (Value (Array, Bool, Number, Object, String))
 import Network.HTTP (urlEncode)
 import Text.HTML.TagSoup (Tag (..), isTagText, parseTags)
 import Text.Pandoc.Class (PandocMonad, report)
diff --git a/stack.lts9.yaml b/stack.lts9.yaml
index 355254618..75b6763b2 100644
--- a/stack.lts9.yaml
+++ b/stack.lts9.yaml
@@ -27,4 +27,6 @@ extra-deps:
 - pandoc-types-1.17.5
 - haddock-library-1.6.0
 - texmath-0.11
+- HsYAML-0.1.1.1
+- text-1.2.3.0
 resolver: lts-9.14
diff --git a/stack.yaml b/stack.yaml
index e0d7045c8..f9b573931 100644
--- a/stack.yaml
+++ b/stack.yaml
@@ -22,6 +22,8 @@ extra-deps:
 - hslua-module-text-0.1.2.1
 - texmath-0.11
 - haddock-library-1.6.0
+- HsYAML-0.1.1.1
+- text-1.2.3.0
 ghc-options:
    "$locals": -fhide-source-paths -XNoImplicitPrelude
 resolver: lts-10.10