Org reader: ensure image sources are proper links
Image sources as those in plain images, image links, or figures, must be proper URIs or relative file paths to be recognized as images. This restriction is now enforced for all image sources. This also fixes the reader's usage of uncleaned image sources, leading to `file:` prefixes not being deleted from figure images (e.g. `[[file:image.jpg]]` leading to a broken image `<img src="file:image.jpg"/>) Thanks to @bsag for noticing this bug.
This commit is contained in:
parent
0fbb676c81
commit
ba5b426ded
4 changed files with 62 additions and 48 deletions
|
@ -39,8 +39,8 @@ import Text.Pandoc.Readers.Org.Inlines
|
|||
import Text.Pandoc.Readers.Org.ParserState
|
||||
import Text.Pandoc.Readers.Org.Parsing
|
||||
import Text.Pandoc.Readers.Org.Shared
|
||||
( isImageFilename, rundocBlockClass, toRundocAttrib
|
||||
, translateLang )
|
||||
( cleanLinkString, isImageFilename, rundocBlockClass
|
||||
, toRundocAttrib, translateLang )
|
||||
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Builder ( Inlines, Blocks )
|
||||
|
@ -571,23 +571,33 @@ figure :: OrgParser (F Blocks)
|
|||
figure = try $ do
|
||||
figAttrs <- blockAttributes
|
||||
src <- skipSpaces *> selfTarget <* skipSpaces <* newline
|
||||
guard . not . isNothing . blockAttrCaption $ figAttrs
|
||||
guard (isImageFilename src)
|
||||
let figName = fromMaybe mempty $ blockAttrName figAttrs
|
||||
let figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
|
||||
let figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
|
||||
let figKeyVals = blockAttrKeyValues figAttrs
|
||||
let attr = (figLabel, mempty, figKeyVals)
|
||||
return $ (B.para . B.imageWith attr src (withFigPrefix figName) <$> figCaption)
|
||||
case cleanLinkString src of
|
||||
Nothing -> mzero
|
||||
Just imgSrc -> do
|
||||
guard (not . isNothing . blockAttrCaption $ figAttrs)
|
||||
guard (isImageFilename imgSrc)
|
||||
return $ figureBlock figAttrs imgSrc
|
||||
where
|
||||
selfTarget :: OrgParser String
|
||||
selfTarget = try $ char '[' *> linkTarget <* char ']'
|
||||
|
||||
figureBlock :: BlockAttributes -> String -> (F Blocks)
|
||||
figureBlock figAttrs imgSrc =
|
||||
let
|
||||
figName = fromMaybe mempty $ blockAttrName figAttrs
|
||||
figLabel = fromMaybe mempty $ blockAttrLabel figAttrs
|
||||
figCaption = fromMaybe mempty $ blockAttrCaption figAttrs
|
||||
figKeyVals = blockAttrKeyValues figAttrs
|
||||
attr = (figLabel, mempty, figKeyVals)
|
||||
in
|
||||
B.para . B.imageWith attr imgSrc (withFigPrefix figName) <$> figCaption
|
||||
|
||||
withFigPrefix :: String -> String
|
||||
withFigPrefix cs =
|
||||
if "fig:" `isPrefixOf` cs
|
||||
then cs
|
||||
else "fig:" ++ cs
|
||||
|
||||
selfTarget :: OrgParser String
|
||||
selfTarget = try $ char '[' *> linkTarget <* char ']'
|
||||
|
||||
--
|
||||
-- Examples
|
||||
|
|
|
@ -37,8 +37,8 @@ import Text.Pandoc.Readers.Org.BlockStarts
|
|||
import Text.Pandoc.Readers.Org.ParserState
|
||||
import Text.Pandoc.Readers.Org.Parsing
|
||||
import Text.Pandoc.Readers.Org.Shared
|
||||
( isImageFilename, rundocBlockClass, toRundocAttrib
|
||||
, translateLang )
|
||||
( cleanLinkString, isImageFilename, rundocBlockClass
|
||||
, toRundocAttrib, translateLang )
|
||||
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Builder ( Inlines )
|
||||
|
@ -52,7 +52,7 @@ import qualified Text.TeXMath.Readers.MathML.EntityMap as MathMLEntityMap
|
|||
import Prelude hiding (sequence)
|
||||
import Control.Monad ( guard, mplus, mzero, when, void )
|
||||
import Data.Char ( isAlphaNum, isSpace )
|
||||
import Data.List ( intersperse, isPrefixOf )
|
||||
import Data.List ( intersperse )
|
||||
import Data.Maybe ( fromMaybe )
|
||||
import qualified Data.Map as M
|
||||
import Data.Traversable (sequence)
|
||||
|
@ -435,9 +435,11 @@ explicitOrImageLink = try $ do
|
|||
char ']'
|
||||
return $ do
|
||||
src <- srcF
|
||||
if isImageFilename title
|
||||
then pure $ B.link src "" $ B.image title mempty mempty
|
||||
else linkToInlinesF src =<< title'
|
||||
case cleanLinkString title of
|
||||
Just imgSrc | isImageFilename imgSrc ->
|
||||
pure $ B.link src "" $ B.image imgSrc mempty mempty
|
||||
_ ->
|
||||
linkToInlinesF src =<< title'
|
||||
|
||||
selflinkOrImage :: OrgParser (F Inlines)
|
||||
selflinkOrImage = try $ do
|
||||
|
@ -482,25 +484,6 @@ linkToInlinesF linkStr =
|
|||
else pure . B.link cleanedLink ""
|
||||
Nothing -> internalLink linkStr -- other internal link
|
||||
|
||||
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
|
||||
-- the string does not appear to be a link.
|
||||
cleanLinkString :: String -> Maybe String
|
||||
cleanLinkString s =
|
||||
case s of
|
||||
'/':_ -> Just $ "file://" ++ s -- absolute path
|
||||
'.':'/':_ -> Just s -- relative path
|
||||
'.':'.':'/':_ -> Just s -- relative path
|
||||
-- Relative path or URL (file schema)
|
||||
'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
|
||||
_ | isUrl s -> Just s -- URL
|
||||
_ -> Nothing
|
||||
where
|
||||
isUrl :: String -> Bool
|
||||
isUrl cs =
|
||||
let (scheme, path) = break (== ':') cs
|
||||
in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
|
||||
&& not (null path)
|
||||
|
||||
internalLink :: String -> Inlines -> F Inlines
|
||||
internalLink link title = do
|
||||
anchorB <- (link `elem`) <$> asksF orgStateAnchorIds
|
||||
|
|
|
@ -27,13 +27,15 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|||
Utility functions used in other Pandoc Org modules.
|
||||
-}
|
||||
module Text.Pandoc.Readers.Org.Shared
|
||||
( isImageFilename
|
||||
( cleanLinkString
|
||||
, isImageFilename
|
||||
, rundocBlockClass
|
||||
, toRundocAttrib
|
||||
, translateLang
|
||||
) where
|
||||
|
||||
import Control.Arrow ( first )
|
||||
import Data.Char ( isAlphaNum )
|
||||
import Data.List ( isPrefixOf, isSuffixOf )
|
||||
|
||||
|
||||
|
@ -41,12 +43,31 @@ import Data.List ( isPrefixOf, isSuffixOf )
|
|||
isImageFilename :: String -> Bool
|
||||
isImageFilename filename =
|
||||
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
|
||||
(any (\x -> (x++":") `isPrefixOf` filename) protocols ||
|
||||
(any (\x -> (x ++ "://") `isPrefixOf` filename) protocols ||
|
||||
':' `notElem` filename)
|
||||
where
|
||||
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
|
||||
protocols = [ "file", "http", "https" ]
|
||||
|
||||
-- | Cleanup and canonicalize a string describing a link. Return @Nothing@ if
|
||||
-- the string does not appear to be a link.
|
||||
cleanLinkString :: String -> Maybe String
|
||||
cleanLinkString s =
|
||||
case s of
|
||||
'/':_ -> Just $ "file://" ++ s -- absolute path
|
||||
'.':'/':_ -> Just s -- relative path
|
||||
'.':'.':'/':_ -> Just s -- relative path
|
||||
-- Relative path or URL (file schema)
|
||||
'f':'i':'l':'e':':':s' -> Just $ if ("//" `isPrefixOf` s') then s else s'
|
||||
_ | isUrl s -> Just s -- URL
|
||||
_ -> Nothing
|
||||
where
|
||||
isUrl :: String -> Bool
|
||||
isUrl cs =
|
||||
let (scheme, path) = break (== ':') cs
|
||||
in all (\c -> isAlphaNum c || c `elem` (".-"::String)) scheme
|
||||
&& not (null path)
|
||||
|
||||
-- | Prefix used for Rundoc classes and arguments.
|
||||
rundocPrefix :: String
|
||||
rundocPrefix = "rundoc-"
|
||||
|
|
|
@ -219,12 +219,12 @@ tests =
|
|||
(para $ link "" "" "New Link")
|
||||
|
||||
, "Image link" =:
|
||||
"[[sunset.png][dusk.svg]]" =?>
|
||||
"[[sunset.png][file:dusk.svg]]" =?>
|
||||
(para $ link "sunset.png" "" (image "dusk.svg" "" ""))
|
||||
|
||||
, "Image link with non-image target" =:
|
||||
"[[http://example.com][logo.png]]" =?>
|
||||
(para $ link "http://example.com" "" (image "logo.png" "" ""))
|
||||
"[[http://example.com][./logo.png]]" =?>
|
||||
(para $ link "http://example.com" "" (image "./logo.png" "" ""))
|
||||
|
||||
, "Plain link" =:
|
||||
"Posts on http://zeitlens.com/ can be funny at times." =?>
|
||||
|
@ -810,29 +810,29 @@ tests =
|
|||
[ "Figure" =:
|
||||
unlines [ "#+caption: A very courageous man."
|
||||
, "#+name: goodguy"
|
||||
, "[[edward.jpg]]"
|
||||
, "[[file:edward.jpg]]"
|
||||
] =?>
|
||||
para (image "edward.jpg" "fig:goodguy" "A very courageous man.")
|
||||
|
||||
, "Figure with no name" =:
|
||||
unlines [ "#+caption: I've been through the desert on this"
|
||||
, "[[horse.png]]"
|
||||
, "[[file:horse.png]]"
|
||||
] =?>
|
||||
para (image "horse.png" "fig:" "I've been through the desert on this")
|
||||
|
||||
, "Figure with `fig:` prefix in name" =:
|
||||
unlines [ "#+caption: Used as a metapher in evolutionary biology."
|
||||
, "#+name: fig:redqueen"
|
||||
, "[[the-red-queen.jpg]]"
|
||||
, "[[./the-red-queen.jpg]]"
|
||||
] =?>
|
||||
para (image "the-red-queen.jpg" "fig:redqueen"
|
||||
para (image "./the-red-queen.jpg" "fig:redqueen"
|
||||
"Used as a metapher in evolutionary biology.")
|
||||
|
||||
, "Figure with HTML attributes" =:
|
||||
unlines [ "#+CAPTION: mah brain just explodid"
|
||||
, "#+NAME: lambdacat"
|
||||
, "#+ATTR_HTML: :style color: blue :role button"
|
||||
, "[[lambdacat.jpg]]"
|
||||
, "[[file:lambdacat.jpg]]"
|
||||
] =?>
|
||||
let kv = [("style", "color: blue"), ("role", "button")]
|
||||
name = "fig:lambdacat"
|
||||
|
@ -842,7 +842,7 @@ tests =
|
|||
, "Labelled figure" =:
|
||||
unlines [ "#+CAPTION: My figure"
|
||||
, "#+LABEL: fig:myfig"
|
||||
, "[[blub.png]]"
|
||||
, "[[file:blub.png]]"
|
||||
] =?>
|
||||
let attr = ("fig:myfig", mempty, mempty)
|
||||
in para (imageWith attr "blub.png" "fig:" "My figure")
|
||||
|
|
Loading…
Reference in a new issue