From 7cf7e45e4cbb99b320a92b4bd31e433f535d3ef7 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 4 Apr 2014 14:17:43 +0200
Subject: [PATCH 1/5] Org reader: Slight cleaning of table parsing code

---
 src/Text/Pandoc/Readers/Org.hs | 68 +++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 33 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 5dc250f04..8b155194b 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -217,13 +217,18 @@ data OrgTableRow = OrgContentRow [Blocks]
                  | OrgHlineRow
  deriving (Eq, Show)
 
-type OrgTableContent = (Int, [Alignment], [Double], [Blocks], [[Blocks]])
+data OrgTable = OrgTable
+  { orgTableColumns    :: Int
+  , orgTableAlignments :: [Alignment]
+  , orgTableHeader     :: [Blocks]
+  , orgTableRows       :: [[Blocks]]
+  } deriving (Eq, Show)
 
 table :: OrgParser Blocks
 table = try $ do
   lookAhead tableStart
-  (_, aligns, widths, heads, lns) <- normalizeTable . tableContent <$> tableRows
-  return $ B.table "" (zip aligns widths) heads lns
+  OrgTable _ aligns heads lns <- normalizeTable . rowsToTable <$> tableRows
+  return $ B.table "" (zip aligns $ repeat 0) heads lns
 
 tableStart :: OrgParser Char
 tableStart = try $ skipSpaces *> char '|'
@@ -237,10 +242,9 @@ tableContentRow = try $
 
 tableContentCell :: OrgParser Blocks
 tableContentCell = try $
-  B.plain . trimInlines . mconcat <$> many1Till inline (try endOfCell)
+  B.plain . trimInlines . mconcat <$> many1Till inline endOfCell
 
 endOfCell :: OrgParser Char
--- endOfCell =  char '|' <|> newline
 endOfCell = try $ char '|' <|> lookAhead newline
 
 tableAlignRow :: OrgParser OrgTableRow
@@ -269,54 +273,53 @@ tableHline :: OrgParser OrgTableRow
 tableHline = try $
   OrgHlineRow <$ (tableStart *> char '-' *> anyLine)
 
-tableContent :: [OrgTableRow]
-             -> OrgTableContent
-tableContent = foldl' (flip rowToContent) (0, mempty, repeat 0, mempty, mempty)
+rowsToTable :: [OrgTableRow]
+            -> OrgTable
+rowsToTable = foldl' (flip rowToContent) zeroTable
+  where zeroTable = OrgTable 0 mempty mempty mempty
 
-normalizeTable :: OrgTableContent
-               -> OrgTableContent
-normalizeTable (cols, aligns, widths, heads, lns) =
+normalizeTable :: OrgTable
+               -> OrgTable
+normalizeTable (OrgTable cols aligns heads lns) =
   let aligns' = fillColumns aligns AlignDefault
-      widths' = fillColumns widths 0.0
       heads'  = if heads == mempty
-                then heads
+                then mempty
                 else fillColumns heads  (B.plain mempty)
       lns'    = map (flip fillColumns (B.plain mempty)) lns
       fillColumns base padding = take cols $ base ++ repeat padding
-  in (cols, aligns', widths', heads', lns')
+  in OrgTable cols aligns' heads' lns'
 
 
 -- One or more horizontal rules after the first content line mark the previous
 -- line as a header.  All other horizontal lines are discarded.
 rowToContent :: OrgTableRow
-             -> OrgTableContent
-             -> OrgTableContent
+             -> OrgTable
+             -> OrgTable
 rowToContent OrgHlineRow        = maybeBodyToHeader
 rowToContent (OrgContentRow rs) = setLongestRow rs . appendToBody rs
 rowToContent (OrgAlignRow as)   = setLongestRow as . setAligns as
 
 setLongestRow :: [a]
-              -> OrgTableContent
-              -> OrgTableContent
-setLongestRow r (cols, aligns, widths, heads, lns) =
-  (max cols (length r), aligns, widths, heads, lns)
+              -> OrgTable
+              -> OrgTable
+setLongestRow rs t = t{ orgTableColumns = max (length rs) (orgTableColumns t) }
 
-maybeBodyToHeader :: OrgTableContent
-                  -> OrgTableContent
-maybeBodyToHeader (cols, aligns, widths, [], b:[]) = (cols, aligns, widths, b, [])
-maybeBodyToHeader content                          = content
+maybeBodyToHeader :: OrgTable
+                  -> OrgTable
+maybeBodyToHeader t = case t of
+  OrgTable{ orgTableHeader = [], orgTableRows = b:[] } ->
+         t{ orgTableHeader = b , orgTableRows = [] }
+  _   -> t
 
 appendToBody :: [Blocks]
-             -> OrgTableContent
-             -> OrgTableContent
-appendToBody r (cols, aligns, widths, heads, lns) =
-  (cols, aligns, widths, heads, lns ++ [r])
+             -> OrgTable
+             -> OrgTable
+appendToBody r t = t{ orgTableRows = orgTableRows t ++ [r] }
 
 setAligns :: [Alignment]
-          -> OrgTableContent
-          -> OrgTableContent
-setAligns aligns (cols, _, widths, heads, lns) =
-   (cols, aligns, widths, heads, lns)
+          -> OrgTable
+          -> OrgTable
+setAligns aligns t = t{ orgTableAlignments = aligns }
 
 -- Paragraphs or Plain text
 paraOrPlain :: OrgParser Blocks
@@ -549,4 +552,3 @@ endsOnThisLine input c doOnOtherLines = do
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
-

From d43c3e81017734170fb25460c4b9ab9cccb1e0db Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Fri, 4 Apr 2014 17:20:36 +0200
Subject: [PATCH 2/5] Org reader: Use specialized org parser state

The default pandoc ParserState is replaced with `OrgParserState`.  This
is done to simplify the introduction of new state fields required for
efficient Org parsing.
---
 src/Text/Pandoc/Readers/Org.hs | 48 +++++++++++++++++++++++++++++-----
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 8b155194b..0ae4d231c 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -29,15 +29,16 @@ Conversion of Org-Mode to 'Pandoc' document.
 module Text.Pandoc.Readers.Org ( readOrg ) where
 
 import qualified Text.Pandoc.Builder as B
-import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>))
+import           Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..))
 import           Text.Pandoc.Definition
 import           Text.Pandoc.Options
-import           Text.Pandoc.Parsing hiding (orderedListMarker)
+import           Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos)
 import           Text.Pandoc.Shared (compactify')
 
 import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
 import           Control.Monad (guard, mzero)
 import           Data.Char (toLower)
+import           Data.Default
 import           Data.List (foldl')
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (mconcat, mempty, mappend)
@@ -46,15 +47,48 @@ import           Data.Monoid (mconcat, mempty, mappend)
 readOrg :: ReaderOptions -- ^ Reader options
         -> String        -- ^ String to parse (assuming @'\n'@ line endings)
         -> Pandoc
-readOrg opts s = (readWith parseOrg) def{ stateOptions = opts } (s ++ "\n\n")
+readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n")
+
+type OrgParser = Parser [Char] OrgParserState
+
+-- | Org-mode parser state
+data OrgParserState = OrgParserState
+                      { orgOptions          :: ReaderOptions
+                      , orgInlineCharStack  :: [Char]
+                      , orgLastStrPos       :: Maybe SourcePos
+                      , orgMeta             :: Meta
+                      } deriving (Show)
+
+instance HasReaderOptions OrgParserState where
+  extractReaderOptions = orgOptions
+
+instance HasMeta OrgParserState where
+  setMeta field val st =
+    st{ orgMeta = setMeta field val $ orgMeta st }
+  deleteMeta field st =
+    st{ orgMeta = deleteMeta field $ orgMeta st }
+
+instance Default OrgParserState where
+  def = defaultOrgParserState
+
+defaultOrgParserState :: OrgParserState
+defaultOrgParserState = OrgParserState
+                        { orgOptions = def
+                        , orgInlineCharStack = []
+                        , orgLastStrPos = Nothing
+                        , orgMeta = nullMeta
+                        }
+
+updateLastStrPos :: OrgParser ()
+updateLastStrPos = getPosition >>= \p ->
+  updateState $ \s -> s{ orgLastStrPos = Just p }
 
-type OrgParser = Parser [Char] ParserState
 
 parseOrg:: OrgParser Pandoc
 parseOrg = do
   blocks' <- B.toList <$> parseBlocks
   st <- getState
-  let meta = stateMeta st
+  let meta = orgMeta st
   return $ Pandoc meta $ filter (/= Null) blocks'
 
 --
@@ -177,7 +211,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
 declarationLine :: OrgParser Blocks
 declarationLine = try $ do
   meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
-  updateState $ \st -> st { stateMeta = stateMeta st <> meta' }
+  updateState $ \st -> st { orgMeta  = orgMeta st <> meta' }
   return mempty
 
 metaValue :: OrgParser MetaValue
@@ -522,7 +556,7 @@ atStart :: OrgParser a -> OrgParser a
 atStart p = do
   pos <- getPosition
   st <- getState
-  guard $ stateLastStrPos st /= Just pos
+  guard $ orgLastStrPos st /= Just pos
   p
 
 -- | succeeds only if we're at the end of a word

From fd98532784e43ad73072f37a31af5ff40fdc1c56 Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 5 Apr 2014 09:37:46 +0200
Subject: [PATCH 3/5] Org reader: Fix parsing of nested inlines

Text such as /*this*/ was not correctly parsed as a strong, emphasised
word.  This was due to the end-of-word recognition being to strict as it
did not accept markup chars as part of a word.  The fix involves an
additional parser state field, listing the markup chars which might be
parsed as part of a word.
---
 src/Text/Pandoc/Readers/Org.hs | 27 ++++++++++++++++++++-------
 tests/Tests/Readers/Org.hs     |  4 ++++
 2 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 0ae4d231c..ad66caab9 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -535,8 +535,15 @@ enclosedInlines start end = try $
 -- FIXME: This is a hack
 inlinesEnclosedBy :: Char
                   -> OrgParser Inlines
-inlinesEnclosedBy c = enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
-                                      (atEnd $ char c)
+inlinesEnclosedBy c = try $ do
+  updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) }
+  res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
+                         (atEnd $ char c)
+  updateState $ \st -> st { orgInlineCharStack = shift . orgInlineCharStack $ st }
+  return res
+ where shift xs
+           | null xs   = []
+           | otherwise = tail xs
 
 enclosedRaw :: OrgParser a
             -> OrgParser b
@@ -561,11 +568,16 @@ atStart p = do
 
 -- | succeeds only if we're at the end of a word
 atEnd :: OrgParser a -> OrgParser a
-atEnd p = try $ p <* lookingAtEndOfWord
- where lookingAtEndOfWord = lookAhead . oneOf $ postWordChars
+atEnd p = try $ do
+  p <* lookingAtEndOfWord
+ where lookingAtEndOfWord = lookAhead . oneOf =<< postWordChars
 
-postWordChars :: [Char]
-postWordChars = "\t\n\r !\"'),-.:?}"
+postWordChars :: OrgParser [Char]
+postWordChars = do
+  st <- getState
+  return $ "\t\n\r !\"'),-.:?}" ++ (safeSecond . orgInlineCharStack $ st)
+ where safeSecond (_:x2:_) = [x2]
+       safeSecond _        = []
 
 -- FIXME: These functions are hacks and should be replaced
 endsOnThisOrNextLine :: Char
@@ -580,9 +592,10 @@ endsOnThisLine :: [Char]
                -> ([Char] -> OrgParser ())
                -> OrgParser ()
 endsOnThisLine input c doOnOtherLines = do
+  postWordChars' <- postWordChars
   case break (`elem` c:"\n") input of
     (_,'\n':rest)    -> doOnOtherLines rest
-    (_,_:rest@(n:_)) -> if n `elem` postWordChars
+    (_,_:rest@(n:_)) -> if n `elem` postWordChars'
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 8c5982302..9091d9c74 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -42,6 +42,10 @@ tests =
           "*Cider*" =?>
           para (strong "Cider")
 
+      , "Strong Emphasis" =:
+        "/*strength*/" =?>
+        para (emph . strong $ "strength")
+
       , "Strikeout" =:
           "+Kill Bill+" =?>
           para (strikeout . spcSep $ [ "Kill", "Bill" ])

From d76d2b707b2b5cebb38122e117527a70996c2c4f Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 5 Apr 2014 09:09:44 +0200
Subject: [PATCH 4/5] Org reader: Provide more language identifier translations

Org-mode and Pandoc use different language identifiers, marking source
code as being written in a certain programming language.  This adds more
translations from identifiers as used in Org to identifiers used in
Pandoc.

The full list of identifiers used in Org and Pandoc is available through
http://orgmode.org/manual/Languages.html and `pandoc -v`, respectively.
---
 src/Text/Pandoc/Readers/Org.hs | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index ad66caab9..62088a04d 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -153,7 +153,14 @@ indentWith num = do
                  , try (char '\t' >> count (num - tabStop) (char ' ')) ]
 
 translateLang :: String -> String
-translateLang "sh" = "bash"
+translateLang "C"          = "c"
+translateLang "C++"        = "cpp"
+translateLang "emacs-lisp" = "commonlisp" -- emacs lisp is not supported
+translateLang "js"         = "javascript"
+translateLang "lisp"       = "commonlisp"
+translateLang "R"          = "r"
+translateLang "sh"         = "bash"
+translateLang "sqlite"     = "sql"
 translateLang cs = cs
 
 commaEscaped :: String -> String

From 652c781e375f3678a0ec821663240d4958f324de Mon Sep 17 00:00:00 2001
From: Albert Krewinkel <tarleb@moltkeplatz.de>
Date: Sat, 5 Apr 2014 16:10:52 +0200
Subject: [PATCH 5/5] Org reader: Support inline images

---
 src/Text/Pandoc/Readers/Org.hs | 34 ++++++++++++++++++++++++----------
 tests/Tests/Readers/Org.hs     | 12 ++++++++++--
 2 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Org.hs b/src/Text/Pandoc/Readers/Org.hs
index 62088a04d..8b1b4fa23 100644
--- a/src/Text/Pandoc/Readers/Org.hs
+++ b/src/Text/Pandoc/Readers/Org.hs
@@ -39,7 +39,7 @@ import           Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**
 import           Control.Monad (guard, mzero)
 import           Data.Char (toLower)
 import           Data.Default
-import           Data.List (foldl')
+import           Data.List (foldl', isPrefixOf, isSuffixOf)
 import           Data.Maybe (listToMaybe, fromMaybe)
 import           Data.Monoid (mconcat, mempty, mappend)
 
@@ -484,20 +484,26 @@ endline = try $ do
   return B.space
 
 link :: OrgParser Inlines
-link = explicitLink <|> selfLink <?> "link"
+link = explicitOrImageLink <|> selflinkOrImage <?> "link"
 
-explicitLink :: OrgParser Inlines
-explicitLink = try $ do
+explicitOrImageLink :: OrgParser Inlines
+explicitOrImageLink = try $ do
   char '['
-  src   <- enclosedRaw     (char '[') (char ']')
-  title <- enclosedInlines (char '[') (char ']')
+  src    <- enclosedRaw (char '[') (char ']')
+  title  <- enclosedRaw (char '[') (char ']')
+  title' <- parseFromString (mconcat . butLast <$> many inline) (title++"\n")
   char ']'
-  return $ B.link src "" title
+  return $ if (isImage src) && (isImage title)
+           then B.link src "" (B.image title "" "")
+           else B.link src "" title'
+ where butLast = reverse . tail . reverse
 
-selfLink :: OrgParser Inlines
-selfLink = try $ do
+selflinkOrImage :: OrgParser Inlines
+selflinkOrImage = try $ do
   src <- enclosedRaw (string "[[") (string "]]")
-  return $ B.link src "" (B.str src)
+  return $ if isImage src
+           then B.image src "" ""
+           else B.link src "" (B.str src)
 
 emph      :: OrgParser Inlines
 emph      = B.emph         <$> inlinesEnclosedBy '/'
@@ -606,3 +612,11 @@ endsOnThisLine input c doOnOtherLines = do
                         then return ()
                         else endsOnThisLine rest c doOnOtherLines
     _                -> mzero
+
+isImage filename =
+  any (\x -> ('.':x)  `isSuffixOf` filename) imageExtensions &&
+  any (\x -> (x++":") `isPrefixOf` filename) protocols ||
+  ':' `notElem` filename
+ where
+   imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
+   protocols = [ "file", "http", "https" ]
diff --git a/tests/Tests/Readers/Org.hs b/tests/Tests/Readers/Org.hs
index 9091d9c74..1088d6611 100644
--- a/tests/Tests/Readers/Org.hs
+++ b/tests/Tests/Readers/Org.hs
@@ -94,14 +94,22 @@ tests =
                        , (strong ("is" <> space <> "not"))
                        , "emph/" ])
 
+      , "Image" =:
+          "[[./sunset.jpg]]" =?>
+          (para $ image "./sunset.jpg" "" "")
+
       , "Explicit link" =:
-          "[[http://zeitlens.com/][pseudo-random nonsense]]" =?>
+          "[[http://zeitlens.com/][pseudo-random /nonsense/]]" =?>
           (para $ link "http://zeitlens.com/" ""
-                       ("pseudo-random" <> space <> "nonsense"))
+                       ("pseudo-random" <> space <> emph "nonsense"))
 
       , "Self-link" =:
           "[[http://zeitlens.com/]]" =?>
           (para $ link "http://zeitlens.com/" "" "http://zeitlens.com/")
+
+      , "Image link" =:
+          "[[sunset.png][dusk.svg]]" =?>
+          (para $ link "sunset.png" "" (image "dusk.svg" "" ""))
       ]
 
   , testGroup "Meta Information" $