Add rebase_relative_paths extension.

- Add manual entry for (non-default) extension
  `rebase_relative_paths`.
- Add constructor `Ext_rebase_relative_paths` to `Extensions`
  in Text.Pandoc.Extensions [API change]. When enabled, this
  extension rewrites relative image and link paths by prepending
  the (relative) directory of the containing file.
- Make Markdown reader sensitive to the new extension.
- Add tests for #3752.

Closes #3752.

NB. currently the extension applies to markdown and associated
readers but not commonmark/gfm.
This commit is contained in:
John MacFarlane 2021-05-24 10:17:37 -07:00
parent 0226d2320f
commit 834da53058
9 changed files with 111 additions and 9 deletions

View file

@ -3755,7 +3755,7 @@ definition:
Note that space between items in a definition list is required.
(A variant that loosens this requirement, but disallows "lazy"
hard wrapping, can be activated with `compact_definition_lists`: see
[Non-pandoc extensions], below.)
[Non-default extensions], below.)
[^3]: I have been influenced by the suggestions of [David
Wheeler](https://justatheory.com/2009/02/modest-markdown-proposal/).
@ -5051,13 +5051,37 @@ author-in-text style inside notes when using a note style.
[finding and editing styles]: https://citationstyles.org/authors/
[CSL locale files]: https://github.com/citation-style-language/locales
## Non-pandoc extensions
## Non-default extensions
The following Markdown syntax extensions are not enabled by default
in pandoc, but may be enabled by adding `+EXTENSION` to the format
name, where `EXTENSION` is the name of the extension. Thus, for
example, `markdown+hard_line_breaks` is Markdown with hard line breaks.
#### Extension: `rebase_relative_paths` ####
Rewrite relative paths for Markdown links and images, depending
on the path of the file containing the link or image link. For
each link or image, pandoc will compute the directory of the
containing file, relative to the working directory, and prepend
the resulting path to the link or image path.
The use of this extension is best understood by example.
Suppose you have a a subdirectory for each chapter of a book,
`chap1`, `chap2`, `chap3`. Each contains a file `text.md` and a
number of images used in the chapter. You would like to have
`![image](spider.jpg)` in `chap1/text.md` refer to
`chap1/spider.jpg` and `![image](spider.jpg)` in `chap2/text.md`
refer to `chap2/spider.jpg`. To do this, use
pandoc chap*/*.md -f markdown+rebase_relative_paths
Without this extension, you would have to use
`![image](chap1/spider.jpg)` in `chap1/text.md` and
`![image](chap2/spider.jpg)` in `chap2/text.md`. Links with
relative paths will be rewritten in the same way as images.
*This option currently only affects Markdown input.*
#### Extension: `attributes` ####
Allows attributes to be attached to any inline or block-level

View file

@ -214,6 +214,10 @@ extra-source-files:
test/command/C.txt
test/command/D.txt
test/command/01.csv
test/command/chap1/spider.png
test/command/chap2/spider.png
test/command/chap1/text.md
test/command/chap2/text.md
test/command/defaults1.yaml
test/command/defaults2.yaml
test/command/defaults3.yaml

View file

@ -136,6 +136,8 @@ data Extension =
| Ext_raw_html -- ^ Allow raw HTML
| Ext_raw_tex -- ^ Allow raw TeX (other than math)
| Ext_raw_markdown -- ^ Parse markdown in ipynb as raw markdown
| Ext_rebase_relative_paths -- ^ Rebase relative image and link paths,
-- relative to directory of containing file
| Ext_shortcut_reference_links -- ^ Shortcut reference links
| Ext_simple_tables -- ^ Pandoc-style simple tables
| Ext_smart -- ^ "Smart" quotes, apostrophes, ellipses, dashes
@ -462,6 +464,7 @@ getAllExtensions f = universalExtensions <> getAll f
, Ext_gutenberg
, Ext_smart
, Ext_literate_haskell
, Ext_rebase_relative_paths
]
getAll "markdown_strict" = allMarkdownExtensions
getAll "markdown_phpextra" = allMarkdownExtensions

View file

@ -29,7 +29,7 @@ import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.ByteString.Lazy as BL
import System.FilePath (addExtension, takeExtension)
import System.FilePath (addExtension, takeExtension, isAbsolute, takeDirectory)
import Text.HTML.TagSoup hiding (Row)
import Text.Pandoc.Builder (Blocks, Inlines)
import qualified Text.Pandoc.Builder as B
@ -1836,9 +1836,12 @@ regLink :: PandocMonad m
-> MarkdownParser m (F Inlines)
regLink constructor lab = try $ do
(src, tit) <- source
rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
pos <- getPosition
let src' = if rebase then rebasePath pos src else src
attr <- option nullAttr $
guardEnabled Ext_link_attributes >> attributes
return $ constructor attr src tit <$> lab
return $ constructor attr src' tit <$> lab
-- a link like [this][ref] or [this][] or [this]
referenceLink :: PandocMonad m
@ -1854,6 +1857,8 @@ referenceLink constructor (lab, raw) = do
return (mempty, "")))
<|>
try ((guardDisabled Ext_spaced_reference_links <|> spnl) >> reference)
rebase <- option False (True <$ guardEnabled Ext_rebase_relative_paths)
pos <- getPosition
when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
@ -1878,7 +1883,9 @@ referenceLink constructor (lab, raw) = do
Just ((src, tit), _) -> constructor nullAttr src tit <$> lab
Nothing -> makeFallback
else makeFallback
Just ((src,tit), attr) -> constructor attr src tit <$> lab
Just ((src,tit), attr) ->
let src' = if rebase then rebasePath pos src else src
in constructor attr src' tit <$> lab
dropBrackets :: Text -> Text
dropBrackets = dropRB . dropLB
@ -1911,12 +1918,27 @@ autoLink = try $ do
return $ return $ B.linkWith attr (src <> escapeURI extra) ""
(B.str $ orig <> extra)
-- | Rebase a relative path, by adding the (relative) directory
-- of the containing source position. Absolute links and URLs
-- are untouched.
rebasePath :: SourcePos -> Text -> Text
rebasePath pos path = do
let fp = sourceName pos
in if isAbsolute (T.unpack path) || isURI path
then path
else
case takeDirectory fp of
"" -> path
"." -> path
d -> T.pack d <> "/" <> path
image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do
char '!'
(lab,raw) <- reference
defaultExt <- getOption readerDefaultImageExtension
let constructor attr' src = case takeExtension (T.unpack src) of
let constructor attr' src =
case takeExtension (T.unpack src) of
"" -> B.imageWith attr' (T.pack $ addExtension (T.unpack src)
$ T.unpack defaultExt)
_ -> B.imageWith attr' src

35
test/command/3752.md Normal file
View file

@ -0,0 +1,35 @@
```
% pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths --verbose -t docx | pandoc -f docx -t plain
^D
[INFO] Loaded command/chap1/spider.png from ./command/chap1/spider.png
[INFO] Loaded command/chap1/../../lalune.jpg from ./command/chap1/../../lalune.jpg
[INFO] Loaded command/chap2/spider.png from ./command/chap2/spider.png
Chapter one
A spider: [spider]
The moon: [moon]
Link to spider picture.
URL left alone: manual.
Absolute path left alone: absolute.
Chapter two
A spider: [spider]
```
```
% pandoc command/chap1/text.md command/chap2/text.md -f markdown+rebase_relative_paths -t html
^D
<h1 id="chapter-one">Chapter one</h1>
<p>A spider: <img src="command/chap1/spider.png" alt="spider" /></p>
<p>The moon: <img src="command/chap1/../../lalune.jpg" alt="moon" /></p>
<p>Link to <a href="command/chap1/spider.png">spider picture</a>.</p>
<p>URL left alone: <a href="https://pandoc.org/MANUAL.html">manual</a>.</p>
<p>Absolute path left alone: <a href="/foo/bar/baz.png">absolute</a>.</p>
<h1 id="chapter-two">Chapter two</h1>
<p>A spider: <img src="command/chap2/spider.png" alt="spider" /></p>
```

Binary file not shown.

After

Width:  |  Height:  |  Size: 62 KiB

View file

@ -0,0 +1,11 @@
# Chapter one
A spider: ![spider](spider.png)
The moon: ![moon](../../lalune.jpg)
Link to [spider picture](spider.png).
URL left alone: [manual](https://pandoc.org/MANUAL.html).
Absolute path left alone: [absolute](/foo/bar/baz.png).

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.6 KiB

View file

@ -0,0 +1,3 @@
# Chapter two
A spider: ![spider](spider.png)