Merge pull request #3067 from tarleb/org-figure-bugfix

Org reader: ensure image sources are proper links
This commit is contained in:
John MacFarlane 2016-08-09 21:31:52 +02:00 committed by GitHub
commit 3a6e15a313
4 changed files with 62 additions and 48 deletions

View file

@ -39,8 +39,8 @@ import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared
( isImageFilename, rundocBlockClass, toRundocAttrib
, translateLang )
( cleanLinkString, isImageFilename, rundocBlockClass
, toRundocAttrib, translateLang )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines, Blocks )
@ -571,23 +571,33 @@ figure :: OrgParser (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* newline
guard . not . isNothing . blockAttrCaption $ figAttrs
guard (isImageFilename src)
let figName = fromMaybe mempty $ blockAttrName figAttrs
let figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
let figKeyVals = blockAttrKeyValues figAttrs
let attr = (figLabel, mempty, figKeyVals)
return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
case cleanLinkString src of
Nothing -> mzero
Just imgSrc -> do
guard (not . isNothing . blockAttrCaption $ figAttrs)
guard (isImageFilename imgSrc)
return $ figureBlock figAttrs imgSrc
where
selfTarget :: OrgParser String
selfTarget = try $ char '[' *> linkTarget <* char ']'
figureBlock :: BlockAttributes -> String -> (F Blocks)
figureBlock figAttrs imgSrc =
let
figName = fromMaybe mempty $ blockAttrName figAttrs
figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
figKeyVals = blockAttrKeyValues figAttrs
attr = (figLabel, mempty, figKeyVals)
in
B.para . B.imageWith attr imgSrc (withFigPrefix figName) <$> figCaption
withFigPrefix :: String -> String
withFigPrefix cs =
if "fig:" `isPrefixOf` cs
then cs
else "fig:" ++ cs
selfTarget :: OrgParser String
selfTarget = try $ char '[' *> linkTarget <* char ']'
--
-- Examples

View file

@ -37,8 +37,8 @@ import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared
( isImageFilename, rundocBlockClass, toRundocAttrib
, translateLang )
( cleanLinkString, isImageFilename, rundocBlockClass
, toRundocAttrib, translateLang )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines )
@ -52,7 +52,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Prelude hiding (sequence)
import Control.Monad ( guard, mplus, mzero, when, void )
import Data.Char ( isAlphaNum, isSpace )
import Data.List ( intersperse, isPrefixOf )
import Data.List ( intersperse )
import Data.Maybe ( fromMaybe )
import qualified Data.Map as M
import Data.Traversable (sequence)
@ -435,9 +435,11 @@ explicitOrImageLink = try $ do
char ']'
return $ do
src <- srcF
if isImageFilename title
then pure $ B.link src "" $ B.image title mempty mempty
else linkToInlinesF src =<< title'
case cleanLinkString title of
Just imgSrc | isImageFilename imgSrc ->
pure $ B.link src "" $ B.image imgSrc mempty mempty
_ ->
linkToInlinesF src =<< title'
selflinkOrImage :: OrgParser (F Inlines)
selflinkOrImage = try $ do
@ -482,25 +484,6 @@ linkToInlinesF linkStr =
else pure . B.link cleanedLink ""
Nothing -> internalLink linkStr -- other internal link
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
-- the string does not appear to be a link.
cleanLinkString :: String -> Maybe String
cleanLinkString s =
case s of
'/':_ -> Just $ "file://" ++ s -- absolute path
'.':'/':_ -> Just s -- relative path
'.':'.':'/':_ -> Just s -- relative path
-- Relative path or URL (file schema)
'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
_ | isUrl s -> Just s -- URL
_ -> Nothing
where
isUrl :: String -> Bool
isUrl cs =
let (scheme, path) = break (== ':') cs
in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
&& not (null path)
internalLink :: String -> Inlines -> F Inlines
internalLink link title = do
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds

View file

@ -27,13 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Utility functions used in other Pandoc Org modules.
-}
module Text.Pandoc.Readers.Org.Shared
( isImageFilename
( cleanLinkString
, isImageFilename
, rundocBlockClass
, toRundocAttrib
, translateLang
) where
import Control.Arrow ( first )
import Data.Char ( isAlphaNum )
import Data.List ( isPrefixOf, isSuffixOf )
@ -41,12 +43,31 @@ import Data.List ( isPrefixOf, isSuffixOf )
isImageFilename :: String -> Bool
isImageFilename filename =
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
(any (\x -> (x++":") `isPrefixOf` filename) protocols ||
(any (\x -> (x ++ "://") `isPrefixOf` filename) protocols ||
':' `notElem` filename)
where
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
protocols = [ "file", "http", "https" ]
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
-- the string does not appear to be a link.
cleanLinkString :: String -> Maybe String
cleanLinkString s =
case s of
'/':_ -> Just $ "file://" ++ s -- absolute path
'.':'/':_ -> Just s -- relative path
'.':'.':'/':_ -> Just s -- relative path
-- Relative path or URL (file schema)
'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
_ | isUrl s -> Just s -- URL
_ -> Nothing
where
isUrl :: String -> Bool
isUrl cs =
let (scheme, path) = break (== ':') cs
in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
&& not (null path)
-- | Prefix used for Rundoc classes and arguments.
rundocPrefix :: String
rundocPrefix = "rundoc-"

View file

@ -219,12 +219,12 @@ tests =
(para $ link "" "" "New Link")
, "Image link" =:
"[[sunset.png][dusk.svg]]" =?>
"[[sunset.png][file:dusk.svg]]" =?>
(para $ link "sunset.png" "" (image "dusk.svg" "" ""))
, "Image link with non-image target" =:
"[[http://example.com][logo.png]]" =?>
(para $ link "http://example.com" "" (image "logo.png" "" ""))
"[[http://example.com][./logo.png]]" =?>
(para $ link "http://example.com" "" (image "./logo.png" "" ""))
, "Plain link" =:
"Posts on http://zeitlens.com/ can be funny at times." =?>
@ -810,29 +810,29 @@ tests =
[ "Figure" =:
unlines [ "#+caption: A very courageous man."
, "#+name: goodguy"
, "[[edward.jpg]]"
, "[[file:edward.jpg]]"
] =?>
para (image "edward.jpg" "fig:goodguy" "A very courageous man.")
, "Figure with no name" =:
unlines [ "#+caption: I've been through the desert on this"
, "[[horse.png]]"
, "[[file:horse.png]]"
] =?>
para (image "horse.png" "fig:" "I've been through the desert on this")
, "Figure with `fig:` prefix in name" =:
unlines [ "#+caption: Used as a metapher in evolutionary biology."
, "#+name: fig:redqueen"
, "[[the-red-queen.jpg]]"
, "[[./the-red-queen.jpg]]"
] =?>
para (image "the-red-queen.jpg" "fig:redqueen"
para (image "./the-red-queen.jpg" "fig:redqueen"
"Used as a metapher in evolutionary biology.")
, "Figure with HTML attributes" =:
unlines [ "#+CAPTION: mah brain just explodid"
, "#+NAME: lambdacat"
, "#+ATTR_HTML: :style color: blue :role button"
, "[[lambdacat.jpg]]"
, "[[file:lambdacat.jpg]]"
] =?>
let kv = [("style", "color: blue"), ("role", "button")]
name = "fig:lambdacat"
@ -842,7 +842,7 @@ tests =
, "Labelled figure" =:
unlines [ "#+CAPTION: My figure"
, "#+LABEL: fig:myfig"
, "[[blub.png]]"
, "[[file:blub.png]]"
] =?>
let attr = ("fig:myfig", mempty, mempty)
in para (imageWith attr "blub.png" "fig:" "My figure")