Org reader: ensure image sources are proper links

Image sources as those in plain images, image links, or figures, must be
proper URIs or relative file paths to be recognized as images.  This
restriction is now enforced for all image sources.

This also fixes the reader's usage of uncleaned image sources, leading
to `file:` prefixes not being deleted from figure
images (e.g. `[[file:image.jpg]]` leading to a broken image `<img
src="file:image.jpg"/>)

Thanks to @bsag for noticing this bug.
This commit is contained in:
Albert Krewinkel 2016-08-09 20:27:08 +02:00
parent 0fbb676c81
commit ba5b426ded
4 changed files with 62 additions and 48 deletions

View file

@ -39,8 +39,8 @@ import Text.Pandoc.Readers.Org.Inlines
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared
( isImageFilename, rundocBlockClass, toRundocAttrib
, translateLang )
( cleanLinkString, isImageFilename, rundocBlockClass
, toRundocAttrib, translateLang )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines, Blocks )
@ -571,23 +571,33 @@ figure :: OrgParser (F Blocks)
figure = try $ do
figAttrs <- blockAttributes
src <- skipSpaces *> selfTarget <* skipSpaces <* newline
guard . not . isNothing . blockAttrCaption $ figAttrs
guard (isImageFilename src)
let figName = fromMaybe mempty $ blockAttrName figAttrs
let figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
let figKeyVals = blockAttrKeyValues figAttrs
let attr = (figLabel, mempty, figKeyVals)
return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
case cleanLinkString src of
Nothing -> mzero
Just imgSrc -> do
guard (not . isNothing . blockAttrCaption $ figAttrs)
guard (isImageFilename imgSrc)
return $ figureBlock figAttrs imgSrc
where
selfTarget :: OrgParser String
selfTarget = try $ char '[' *> linkTarget <* char ']'
figureBlock :: BlockAttributes -> String -> (F Blocks)
figureBlock figAttrs imgSrc =
let
figName = fromMaybe mempty $ blockAttrName figAttrs
figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
figKeyVals = blockAttrKeyValues figAttrs
attr = (figLabel, mempty, figKeyVals)
in
B.para . B.imageWith attr imgSrc (withFigPrefix figName) <$> figCaption
withFigPrefix :: String -> String
withFigPrefix cs =
if "fig:" `isPrefixOf` cs
then cs
else "fig:" ++ cs
selfTarget :: OrgParser String
selfTarget = try $ char '[' *> linkTarget <* char ']'
--
-- Examples

View file

@ -37,8 +37,8 @@ import Text.Pandoc.Readers.Org.BlockStarts
import Text.Pandoc.Readers.Org.ParserState
import Text.Pandoc.Readers.Org.Parsing
import Text.Pandoc.Readers.Org.Shared
( isImageFilename, rundocBlockClass, toRundocAttrib
, translateLang )
( cleanLinkString, isImageFilename, rundocBlockClass
, toRundocAttrib, translateLang )
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Builder ( Inlines )
@ -52,7 +52,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
import Prelude hiding (sequence)
import Control.Monad ( guard, mplus, mzero, when, void )
import Data.Char ( isAlphaNum, isSpace )
import Data.List ( intersperse, isPrefixOf )
import Data.List ( intersperse )
import Data.Maybe ( fromMaybe )
import qualified Data.Map as M
import Data.Traversable (sequence)
@ -435,9 +435,11 @@ explicitOrImageLink = try $ do
char ']'
return $ do
src <- srcF
if isImageFilename title
then pure $ B.link src "" $ B.image title mempty mempty
else linkToInlinesF src =<< title'
case cleanLinkString title of
Just imgSrc | isImageFilename imgSrc ->
pure $ B.link src "" $ B.image imgSrc mempty mempty
_ ->
linkToInlinesF src =<< title'
selflinkOrImage :: OrgParser (F Inlines)
selflinkOrImage = try $ do
@ -482,25 +484,6 @@ linkToInlinesF linkStr =
else pure . B.link cleanedLink ""
Nothing -> internalLink linkStr -- other internal link
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
-- the string does not appear to be a link.
cleanLinkString :: String -> Maybe String
cleanLinkString s =
case s of
'/':_ -> Just $ "file://" ++ s -- absolute path
'.':'/':_ -> Just s -- relative path
'.':'.':'/':_ -> Just s -- relative path
-- Relative path or URL (file schema)
'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
_ | isUrl s -> Just s -- URL
_ -> Nothing
where
isUrl :: String -> Bool
isUrl cs =
let (scheme, path) = break (== ':') cs
in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
&& not (null path)
internalLink :: String -> Inlines -> F Inlines
internalLink link title = do
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds

View file

@ -27,13 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Utility functions used in other Pandoc Org modules.
-}
module Text.Pandoc.Readers.Org.Shared
( isImageFilename
( cleanLinkString
, isImageFilename
, rundocBlockClass
, toRundocAttrib
, translateLang
) where
import Control.Arrow ( first )
import Data.Char ( isAlphaNum )
import Data.List ( isPrefixOf, isSuffixOf )
@ -41,12 +43,31 @@ import Data.List ( isPrefixOf, isSuffixOf )
isImageFilename :: String -> Bool
isImageFilename filename =
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
(any (\x -> (x++":") `isPrefixOf` filename) protocols ||
(any (\x -> (x ++ "://") `isPrefixOf` filename) protocols ||
':' `notElem` filename)
where
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
protocols = [ "file", "http", "https" ]
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
-- the string does not appear to be a link.
cleanLinkString :: String -> Maybe String
cleanLinkString s =
case s of
'/':_ -> Just $ "file://" ++ s -- absolute path
'.':'/':_ -> Just s -- relative path
'.':'.':'/':_ -> Just s -- relative path
-- Relative path or URL (file schema)
'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
_ | isUrl s -> Just s -- URL
_ -> Nothing
where
isUrl :: String -> Bool
isUrl cs =
let (scheme, path) = break (== ':') cs
in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
&& not (null path)
-- | Prefix used for Rundoc classes and arguments.
rundocPrefix :: String
rundocPrefix = "rundoc-"

View file

@ -219,12 +219,12 @@ tests =
(para $ link "" "" "New Link")
, "Image link" =:
"[[sunset.png][dusk.svg]]" =?>
"[[sunset.png][file:dusk.svg]]" =?>
(para $ link "sunset.png" "" (image "dusk.svg" "" ""))
, "Image link with non-image target" =:
"[[http://example.com][logo.png]]" =?>
(para $ link "http://example.com" "" (image "logo.png" "" ""))
"[[http://example.com][./logo.png]]" =?>
(para $ link "http://example.com" "" (image "./logo.png" "" ""))
, "Plain link" =:
"Posts on http://zeitlens.com/ can be funny at times." =?>
@ -810,29 +810,29 @@ tests =
[ "Figure" =:
unlines [ "#+caption: A very courageous man."
, "#+name: goodguy"
, "[[edward.jpg]]"
, "[[file:edward.jpg]]"
] =?>
para (image "edward.jpg" "fig:goodguy" "A very courageous man.")
, "Figure with no name" =:
unlines [ "#+caption: I've been through the desert on this"
, "[[horse.png]]"
, "[[file:horse.png]]"
] =?>
para (image "horse.png" "fig:" "I've been through the desert on this")
, "Figure with `fig:` prefix in name" =:
unlines [ "#+caption: Used as a metapher in evolutionary biology."
, "#+name: fig:redqueen"
, "[[the-red-queen.jpg]]"
, "[[./the-red-queen.jpg]]"
] =?>
para (image "the-red-queen.jpg" "fig:redqueen"
para (image "./the-red-queen.jpg" "fig:redqueen"
"Used as a metapher in evolutionary biology.")
, "Figure with HTML attributes" =:
unlines [ "#+CAPTION: mah brain just explodid"
, "#+NAME: lambdacat"
, "#+ATTR_HTML: :style color: blue :role button"
, "[[lambdacat.jpg]]"
, "[[file:lambdacat.jpg]]"
] =?>
let kv = [("style", "color: blue"), ("role", "button")]
name = "fig:lambdacat"
@ -842,7 +842,7 @@ tests =
, "Labelled figure" =:
unlines [ "#+CAPTION: My figure"
, "#+LABEL: fig:myfig"
, "[[blub.png]]"
, "[[file:blub.png]]"
] =?>
let attr = ("fig:myfig", mempty, mempty)
in para (imageWith attr "blub.png" "fig:" "My figure")