Org reader: Support more types of '#+BEGIN_<type>' blocks

Support for standard org-blocks is improved.  The parser now handles
"HTML", "LATEX", "ASCII", "EXAMPLE", "QUOTE" and "VERSE" blocks in a
sensible fashion.
This commit is contained in:
Albert Krewinkel 2014-04-17 18:09:27 +02:00
parent 0672f58a44
commit 6d6724cf2c
2 changed files with 108 additions and 30 deletions

View file

@ -37,6 +37,7 @@ import Text.Pandoc.Options
import qualified Text.Pandoc.Parsing as P
import Text.Pandoc.Parsing hiding ( F, unF, askF, asksF, runF
, newline, orderedListMarker
, parseFromString
, updateLastStrPos )
import Text.Pandoc.Shared (compactify')
@ -47,7 +48,7 @@ import Control.Monad (foldM, guard, liftM, liftM2, when)
import Control.Monad.Reader (Reader, runReader, ask, asks)
import Data.Char (toLower)
import Data.Default
import Data.List (isPrefixOf, isSuffixOf)
import Data.List (intersperse, isPrefixOf, isSuffixOf)
import Data.Maybe (listToMaybe, fromMaybe)
import Data.Monoid (Monoid, mconcat, mempty, mappend)
@ -156,6 +157,16 @@ addToNotesTable note = do
oldnotes <- orgStateNotes' <$> getState
updateState $ \s -> s{ orgStateNotes' = note:oldnotes }
-- The version Text.Pandoc.Parsing cannot be used, as we need additional parts
-- of the state saved and restored.
parseFromString :: OrgParser a -> String -> OrgParser a
parseFromString parser str' = do
oldLastPreCharPos <- orgStateLastPreCharPos <$> getState
updateState $ \s -> s{ orgStateLastPreCharPos = Nothing }
result <- P.parseFromString parser str'
updateState $ \s -> s{ orgStateLastPreCharPos = oldLastPreCharPos }
return result
--
-- Adaptions and specializations of parsing utilities
@ -218,13 +229,27 @@ block = choice [ mempty <$ blanklines
orgBlock :: OrgParser (F Blocks)
orgBlock = try $ do
(indent, blockType, args) <- blockHeader
blockStr <- rawBlockContent indent blockType
content <- rawBlockContent indent blockType
contentBlocks <- parseFromString parseBlocks (content ++ "\n")
let classArgs = [ translateLang . fromMaybe [] $ listToMaybe args ]
case blockType of
"comment" -> return mempty
"src" -> return . return $ B.codeBlockWith ("", classArgs, []) blockStr
_ -> fmap (B.divWith ("", [blockType], []))
<$> parseFromString parseBlocks blockStr
"src" -> returnF $ B.codeBlockWith ("", classArgs, []) content
"html" -> returnF $ B.rawBlock "html" content
"latex" -> returnF $ B.rawBlock "latex" content
"ascii" -> returnF $ B.rawBlock "ascii" content
"example" -> returnF $ exampleCode content
"quote" -> return $ B.blockQuote <$> contentBlocks
"verse" -> parseVerse content
_ -> return $ B.divWith ("", [blockType], []) <$> contentBlocks
where
returnF :: a -> OrgParser (F a)
returnF = return . return
parseVerse :: String -> OrgParser (F Blocks)
parseVerse cs =
fmap B.para . mconcat . intersperse (pure B.linebreak)
<$> mapM (parseFromString parseInlines) (lines cs)
blockHeader :: OrgParser (Int, String, [String])
blockHeader = (,,) <$> blockIndent
@ -270,8 +295,10 @@ commaEscaped cs = cs
example :: OrgParser (F Blocks)
example = try $ do
body <- unlines <$> many1 exampleLine
return . return $ B.codeBlockWith ("", ["example"], []) body
return . return . exampleCode =<< unlines <$> many1 exampleLine
exampleCode :: String -> Blocks
exampleCode = B.codeBlockWith ("", ["example"], [])
exampleLine :: OrgParser String
exampleLine = try $ string ": " *> anyLine

View file

@ -363,29 +363,6 @@ tests =
, "#+END_COMMENT"] =?>
(mempty::Blocks)
, "Source Block in Text" =:
unlines [ "Low German greeting"
, " #+BEGIN_SRC haskell"
, " main = putStrLn greeting"
, " where greeting = \"moin\""
, " #+END_SRC" ] =?>
let attr' = ("", ["haskell"], [])
code' = "main = putStrLn greeting\n" ++
" where greeting = \"moin\"\n"
in mconcat [ para $ spcSep [ "Low", "German", "greeting" ]
, codeBlockWith attr' code'
]
, "Source Block" =:
unlines [ " #+BEGIN_SRC haskell"
, " main = putStrLn greeting"
, " where greeting = \"moin\""
, " #+END_SRC" ] =?>
let attr' = ("", ["haskell"], [])
code' = "main = putStrLn greeting\n" ++
" where greeting = \"moin\"\n"
in codeBlockWith attr' code'
, "Figure" =:
unlines [ "#+caption: A very courageous man."
, "#+name: goodguy"
@ -661,4 +638,78 @@ tests =
, [ plain "2" , plain mempty , plain mempty ]
]
]
, testGroup "Blocks"
[ "Source block" =:
unlines [ " #+BEGIN_SRC haskell"
, " main = putStrLn greeting"
, " where greeting = \"moin\""
, " #+END_SRC" ] =?>
let attr' = ("", ["haskell"], [])
code' = "main = putStrLn greeting\n" ++
" where greeting = \"moin\"\n"
in codeBlockWith attr' code'
, "Source block between paragraphs" =:
unlines [ "Low German greeting"
, " #+BEGIN_SRC haskell"
, " main = putStrLn greeting"
, " where greeting = \"Moin!\""
, " #+END_SRC" ] =?>
let attr' = ("", ["haskell"], [])
code' = "main = putStrLn greeting\n" ++
" where greeting = \"Moin!\"\n"
in mconcat [ para $ spcSep [ "Low", "German", "greeting" ]
, codeBlockWith attr' code'
]
, "Example block" =:
unlines [ "#+begin_example"
, "A chosen representation of"
, "a rule."
, "#+eND_exAMPle"
] =?>
codeBlockWith ("", ["example"], [])
"A chosen representation of\na rule.\n"
, "HTML block" =:
unlines [ "#+BEGIN_HTML"
, "<aside>HTML5 is pretty nice.</aside>"
, "#+END_HTML"
] =?>
rawBlock "html" "<aside>HTML5 is pretty nice.</aside>\n"
, "Quote block" =:
unlines [ "#+BEGIN_QUOTE"
, "/Niemand/ hat die Absicht, eine Mauer zu errichten!"
, "#+END_QUOTE"
] =?>
blockQuote (para (spcSep [ emph "Niemand", "hat", "die", "Absicht,"
, "eine", "Mauer", "zu", "errichten!"
]))
, "Verse block" =:
unlines [ "The first lines of Goethe's /Faust/:"
, "#+begin_verse"
, "Habe nun, ach! Philosophie,"
, "Juristerei und Medizin,"
, "Und leider auch Theologie!"
, "Durchaus studiert, mit heißem Bemühn."
, "#+end_verse"
] =?>
mconcat
[ para $ spcSep [ "The", "first", "lines", "of"
, "Goethe's", emph "Faust" <> ":"]
, para $ mconcat
[ spcSep [ "Habe", "nun,", "ach!", "Philosophie," ]
, linebreak
, spcSep [ "Juristerei", "und", "Medizin," ]
, linebreak
, spcSep [ "Und", "leider", "auch", "Theologie!" ]
, linebreak
, spcSep [ "Durchaus", "studiert,", "mit", "heißem", "Bemühn." ]
]
]
]
]