Org reader: Support more types of '#+BEGIN_<type>' blocks

Support for standard org-blocks is improved. The parser now handles "HTML", "LATEX", "ASCII", "EXAMPLE", "QUOTE" and "VERSE" blocks in a sensible fashion.
2014-04-17 18:09:27 +02:00 · 2014-04-17 18:09:27 +02:00 · 6d6724cf2c
commit 6d6724cf2c
parent 0672f58a44
2 changed files with 108 additions and 30 deletions
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@ -37,6 +37,7 @@ import           Text.Pandoc.Options
 import qualified Text.Pandoc.Parsing as P
 import           Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
                                            , newline, orderedListMarker
+                                            , parseFromString
                                            , updateLastStrPos )
 import           Text.Pandoc.Shared (compactify')

@ -47,7 +48,7 @@ import           Control.Monad (foldM, guard, liftM, liftM2, when)
 import           Control.Monad.Reader (Reader, runReader, ask, asks)
 import           Data.Char (toLower)
 import           Data.Default
-import           Data.List (isPrefixOf, isSuffixOf)
+import           Data.List (intersperse, isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (Monoid, mconcat, mempty, mappend)

@ -156,6 +157,16 @@ addToNotesTable note = do
  oldnotes <- orgStateNotes' <$> getState
  updateState $ \s -> s{ orgStateNotes' = note:oldnotes }

+-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
+-- of the state saved and restored.
+parseFromString :: OrgParser a -> String -> OrgParser a
+parseFromString parser str' = do
+  oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
+  updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
+  result <- P.parseFromString parser str'
+  updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
+  return result
+

 --
 -- Adaptions and specializations of parsing utilities
@ -218,13 +229,27 @@ block = choice [ mempty <$ blanklines
 orgBlock :: OrgParser (F Blocks)
 orgBlock = try $ do
  (indent, blockType, args) <- blockHeader
-  blockStr <- rawBlockContent indent blockType
+  content <- rawBlockContent indent blockType
+  contentBlocks <- parseFromString parseBlocks (content ++ "\n")
  let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
  case blockType of
    "comment" -> return mempty
-    "src"     -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr
-    _         -> fmap (B.divWith ("", [blockType], []))
-                 <$> parseFromString parseBlocks blockStr
+    "src"     -> returnF $ B.codeBlockWith ("", classArgs, []) content
+    "html"    -> returnF $ B.rawBlock "html" content
+    "latex"   -> returnF $ B.rawBlock "latex" content
+    "ascii"   -> returnF $ B.rawBlock "ascii" content
+    "example" -> returnF $ exampleCode content
+    "quote"   -> return  $ B.blockQuote <$> contentBlocks
+    "verse"   -> parseVerse content
+    _         -> return  $ B.divWith ("", [blockType], []) <$> contentBlocks
+ where
+   returnF :: a -> OrgParser (F a)
+   returnF = return . return
+
+   parseVerse :: String -> OrgParser (F Blocks)
+   parseVerse cs =
+       fmap B.para . mconcat . intersperse (pure B.linebreak)
+       <$> mapM (parseFromString parseInlines) (lines cs)

 blockHeader :: OrgParser (Int, String, [String])
 blockHeader = (,,) <$> blockIndent
@ -270,8 +295,10 @@ commaEscaped cs                   = cs

 example :: OrgParser (F Blocks)
 example = try $ do
-  body <- unlines <$> many1 exampleLine
-  return . return $ B.codeBlockWith ("", ["example"], []) body
+  return . return . exampleCode =<< unlines <$> many1 exampleLine
+
+exampleCode :: String -> Blocks
+exampleCode = B.codeBlockWith ("", ["example"], [])

 exampleLine :: OrgParser String
 exampleLine = try $ string ": " *> anyLine
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@ -363,29 +363,6 @@ tests =
                  , "#+END_COMMENT"] =?>
          (mempty::Blocks)

-      , "Source Block in Text" =:
-          unlines [ "Low German greeting"
-                  , "  #+BEGIN_SRC haskell"
-                  , "  main = putStrLn greeting"
-                  , "    where greeting = \"moin\""
-                  , "  #+END_SRC" ] =?>
-          let attr' = ("", ["haskell"], [])
-              code' = "main = putStrLn greeting\n" ++
-                       "  where greeting = \"moin\"\n"
-          in mconcat [ para $ spcSep [ "Low", "German", "greeting"  ]
-                     , codeBlockWith attr' code'
-                     ]
-
-      , "Source Block" =:
-          unlines [ "  #+BEGIN_SRC haskell"
-                  , "  main = putStrLn greeting"
-                  , "    where greeting = \"moin\""
-                  , "  #+END_SRC" ] =?>
-          let attr' = ("", ["haskell"], [])
-              code' = "main = putStrLn greeting\n" ++
-                       "  where greeting = \"moin\"\n"
-          in codeBlockWith attr' code'
-
      , "Figure" =:
          unlines [ "#+caption: A very courageous man."
                  , "#+name: goodguy"
@ -661,4 +638,78 @@ tests =
                , [ plain "2"      , plain mempty , plain mempty  ]
                ]
      ]
+
+    , testGroup "Blocks"
+      [ "Source block" =:
+           unlines [ "  #+BEGIN_SRC haskell"
+                   , "  main = putStrLn greeting"
+                   , "    where greeting = \"moin\""
+                   , "  #+END_SRC" ] =?>
+           let attr' = ("", ["haskell"], [])
+               code' = "main = putStrLn greeting\n" ++
+                       "  where greeting = \"moin\"\n"
+           in codeBlockWith attr' code'
+
+      , "Source block between paragraphs" =:
+           unlines [ "Low German greeting"
+                   , "  #+BEGIN_SRC haskell"
+                   , "  main = putStrLn greeting"
+                   , "    where greeting = \"Moin!\""
+                   , "  #+END_SRC" ] =?>
+           let attr' = ("", ["haskell"], [])
+               code' = "main = putStrLn greeting\n" ++
+                        "  where greeting = \"Moin!\"\n"
+           in mconcat [ para $ spcSep [ "Low", "German", "greeting"  ]
+                      , codeBlockWith attr' code'
+                      ]
+
+      , "Example block" =:
+           unlines [ "#+begin_example"
+                   , "A chosen representation of"
+                   , "a rule."
+                   , "#+eND_exAMPle"
+                   ] =?>
+           codeBlockWith ("", ["example"], [])
+                         "A chosen representation of\na rule.\n"
+
+      , "HTML block" =:
+           unlines [ "#+BEGIN_HTML"
+                   , "<aside>HTML5 is pretty nice.</aside>"
+                   , "#+END_HTML"
+                   ] =?>
+           rawBlock "html" "<aside>HTML5 is pretty nice.</aside>\n"
+
+      , "Quote block" =:
+           unlines [ "#+BEGIN_QUOTE"
+                   , "/Niemand/ hat die Absicht, eine Mauer zu errichten!"
+                   , "#+END_QUOTE"
+                   ] =?>
+           blockQuote (para (spcSep [ emph "Niemand", "hat", "die", "Absicht,"
+                                    , "eine", "Mauer", "zu", "errichten!"
+                                    ]))
+
+      , "Verse block" =:
+          unlines [ "The first lines of Goethe's /Faust/:"
+                  , "#+begin_verse"
+                  , "Habe nun, ach! Philosophie,"
+                  , "Juristerei und Medizin,"
+                  , "Und leider auch Theologie!"
+                  , "Durchaus studiert, mit heißem Bemühn."
+                  , "#+end_verse"
+                  ] =?>
+          mconcat
+          [ para $ spcSep [ "The", "first", "lines", "of"
+                          , "Goethe's", emph "Faust" <> ":"]
+          , para $ mconcat
+              [ spcSep [ "Habe", "nun,", "ach!", "Philosophie," ]
+              , linebreak
+              , spcSep [ "Juristerei", "und", "Medizin," ]
+              , linebreak
+              , spcSep [ "Und", "leider", "auch", "Theologie!" ]
+              , linebreak
+              , spcSep [ "Durchaus", "studiert,", "mit", "heißem", "Bemühn." ]
+              ]
+          ]
+
+      ]
  ]