Support shortcut reference links in markdown writer

Issue #1977

Most markdown processors support the [shortcut format] for reference links.
Pandoc's markdown reader parsed this shortcuts unoptionally.
Pandoc's markdown writer (with --reference-links option) never shortcutted links.

This commit adds an extension `shortcut_reference_links`. The extension is
enabled by default for those markdown flavors that support reading shortcut
reference links, namely:

    - pandoc
    - strict pandoc
    - github flavoured
    - PHPmarkdown

If extension is enabled, reader parses the shortcuts in the same way as
it preveously did. Otherwise it would parse them as normal text.

If extension is enabled, writer outputs shortcut reference links unless
doing so would cause problems (see test cases in `tests/Tests/Writers/Markdown.hs`).
This commit is contained in:
Konstantin Zudov 2015-03-03 03:28:56 +02:00
parent 4f0c5c3080
commit b9f77ed03d
5 changed files with 158 additions and 31 deletions

5
README
View file

@ -2785,6 +2785,11 @@ in several respects:
we must either disallow lazy wrapping or require a blank line between
list items.
#### Extension: `shortcut_reference_links` ####
Allows to use shortcut reference links: `[foo]` instead of `[foo][]`. Writer
would shortcut links unless doing so might cause problems.
Markdown variants
-----------------

View file

@ -109,6 +109,7 @@ data Extension =
| Ext_implicit_header_references -- ^ Implicit reference links for headers
| Ext_line_blocks -- ^ RST style line blocks
| Ext_epub_html_exts -- ^ Recognise the EPUB extended version of HTML
| Ext_shortcut_reference_links -- ^ Shortcut reference links
deriving (Show, Read, Enum, Eq, Ord, Bounded)
pandocExtensions :: Set Extension
@ -151,6 +152,7 @@ pandocExtensions = Set.fromList
, Ext_header_attributes
, Ext_implicit_header_references
, Ext_line_blocks
, Ext_shortcut_reference_links
]
phpMarkdownExtraExtensions :: Set Extension
@ -164,6 +166,7 @@ phpMarkdownExtraExtensions = Set.fromList
, Ext_intraword_underscores
, Ext_header_attributes
, Ext_abbreviations
, Ext_shortcut_reference_links
]
githubMarkdownExtensions :: Set Extension
@ -180,6 +183,7 @@ githubMarkdownExtensions = Set.fromList
, Ext_strikeout
, Ext_hard_line_breaks
, Ext_lists_without_preceding_blankline
, Ext_shortcut_reference_links
]
multimarkdownExtensions :: Set Extension
@ -202,7 +206,9 @@ multimarkdownExtensions = Set.fromList
strictExtensions :: Set Extension
strictExtensions = Set.fromList
[ Ext_raw_html ]
[ Ext_raw_html
, Ext_shortcut_reference_links
]
data ReaderOptions = ReaderOptions{
readerExtensions :: Set Extension -- ^ Syntax extensions

View file

@ -1679,6 +1679,7 @@ referenceLink constructor (lab, raw) = do
lookAhead (try (spnl >> normalCite >> return (mempty, "")))
<|>
try (spnl >> reference)
when (raw' == "") $ guardEnabled Ext_shortcut_reference_links
let labIsRef = raw' == "" || raw' == "[]"
let key = toKey $ if labIsRef then raw else raw'
parsedRaw <- parseFromString (mconcat <$> many inline) raw'

View file

@ -57,14 +57,15 @@ import qualified Data.Text as T
type Notes = [[Block]]
type Refs = [([Inline], Target)]
data WriterState = WriterState { stNotes :: Notes
, stRefs :: Refs
, stInList :: Bool
, stIds :: [String]
, stPlain :: Bool }
data WriterState = WriterState { stNotes :: Notes
, stRefs :: Refs
, stRefShortcutable :: Bool
, stInList :: Bool
, stIds :: [String]
, stPlain :: Bool }
instance Default WriterState
where def = WriterState{ stNotes = [], stRefs = [], stInList = False,
stIds = [], stPlain = False }
where def = WriterState{ stNotes = [], stRefs = [], stRefShortcutable = True,
stInList = False, stIds = [], stPlain = False }
-- | Convert Pandoc to Markdown.
writeMarkdown :: WriterOptions -> Pandoc -> String
@ -690,27 +691,47 @@ getReference label (src, tit) = do
inlineListToMarkdown :: WriterOptions -> [Inline] -> State WriterState Doc
inlineListToMarkdown opts lst = do
inlist <- gets stInList
mapM (inlineToMarkdown opts)
(if inlist then avoidBadWraps lst else lst) >>= return . cat
where avoidBadWraps [] = []
avoidBadWraps (Space:Str ('>':cs):xs) =
Str (' ':'>':cs) : avoidBadWraps xs
avoidBadWraps (Space:Str [c]:[])
| c `elem` "-*+" = Str [' ', c] : []
avoidBadWraps (Space:Str [c]:Space:xs)
| c `elem` "-*+" = Str [' ', c] : Space : avoidBadWraps xs
avoidBadWraps (Space:Str cs:Space:xs)
| isOrderedListMarker cs = Str (' ':cs) : Space : avoidBadWraps xs
avoidBadWraps (Space:Str cs:[])
| isOrderedListMarker cs = Str (' ':cs) : []
avoidBadWraps (x:xs) = x : avoidBadWraps xs
isOrderedListMarker xs = endsWithListPunct xs &&
isRight (runParserT (anyOrderedListMarker >> eof)
defaultParserState "" xs)
endsWithListPunct xs = case reverse xs of
'.':_ -> True
')':_ -> True
_ -> False
go (if inlist then avoidBadWrapsInList lst else lst)
where go [] = return empty
go (i:is) = case i of
(Link _ _) -> case is of
-- If a link is followed by another link or '[' we don't shortcut
(Link _ _):_ -> unshortcutable
Space:(Link _ _):_ -> unshortcutable
Space:(Str('[':_)):_ -> unshortcutable
Space:(RawInline _ ('[':_)):_ -> unshortcutable
Space:(Cite _ _):_ -> unshortcutable
(Cite _ _):_ -> unshortcutable
Str ('[':_):_ -> unshortcutable
(RawInline _ ('[':_)):_ -> unshortcutable
(RawInline _ (' ':'[':_)):_ -> unshortcutable
_ -> shortcutable
_ -> shortcutable
where shortcutable = liftM2 (<>) (inlineToMarkdown opts i) (go is)
unshortcutable = do
iMark <- withState (\s -> s { stRefShortcutable = False })
(inlineToMarkdown opts i)
modify (\s -> s {stRefShortcutable = True })
fmap (iMark <>) (go is)
avoidBadWrapsInList :: [Inline] -> [Inline]
avoidBadWrapsInList [] = []
avoidBadWrapsInList (Space:Str ('>':cs):xs) =
Str (' ':'>':cs) : avoidBadWrapsInList xs
avoidBadWrapsInList (Space:Str [c]:[])
| c `elem` "-*+" = Str [' ', c] : []
avoidBadWrapsInList (Space:Str [c]:Space:xs)
| c `elem` "-*+" = Str [' ', c] : Space : avoidBadWrapsInList xs
avoidBadWrapsInList (Space:Str cs:Space:xs)
| isOrderedListMarker cs = Str (' ':cs) : Space : avoidBadWrapsInList xs
avoidBadWrapsInList (Space:Str cs:[])
| isOrderedListMarker cs = Str (' ':cs) : []
avoidBadWrapsInList (x:xs) = x : avoidBadWrapsInList xs
isOrderedListMarker :: String -> Bool
isOrderedListMarker xs = (last xs `elem` ".)") &&
isRight (runParserT (anyOrderedListMarker >> eof)
defaultParserState "" xs)
isRight :: Either a b -> Bool
isRight (Right _) = True
@ -873,6 +894,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
[Str s] | escapeURI s == srcSuffix -> True
_ -> False
let useRefLinks = writerReferenceLinks opts && not useAuto
shortcutable <- gets stRefShortcutable
let useShortcutRefLinks = shortcutable &&
isEnabled Ext_shortcut_reference_links opts
ref <- if useRefLinks then getReference txt (src, tit) else return []
reftext <- inlineListToMarkdown opts ref
return $ if useAuto
@ -882,7 +906,9 @@ inlineToMarkdown opts (Link txt (src, tit)) = do
else if useRefLinks
then let first = "[" <> linktext <> "]"
second = if txt == ref
then "[]"
then if useShortcutRefLinks
then ""
else "[]"
else "[" <> reftext <> "]"
in first <> second
else if plain

View file

@ -1,4 +1,5 @@
{-# LANGUAGE OverloadedStrings #-}
{-# OPTIONS_GHC -fno-warn-name-shadowing #-}
module Tests.Writers.Markdown (tests) where
import Test.Framework
@ -35,4 +36,92 @@ tests = [ "indented code after list"
=: bulletList [ plain "foo" <> bulletList [ plain "bar" ],
plain "baz" ]
=?> "- foo\n - bar\n- baz\n"
]
] ++ [shortcutLinkRefsTests]
shortcutLinkRefsTests :: Test
shortcutLinkRefsTests =
let infix 4 =:
(=:) :: (ToString a, ToPandoc a)
=> String -> (a, String) -> Test
(=:) = test (writeMarkdown (def {writerReferenceLinks = True}) . toPandoc)
in testGroup "Shortcut reference links"
[ "Simple link (shortcutable)"
=: (para (link "/url" "title" "foo"))
=?> "[foo]\n\n [foo]: /url \"title\""
, "Followed by another link (unshortcutable)"
=: (para ((link "/url1" "title1" "first")
<> (link "/url2" "title2" "second")))
=?> unlines [ "[first][][second]"
, ""
, " [first]: /url1 \"title1\""
, " [second]: /url2 \"title2\""
]
, "Followed by space and another link (unshortcutable)"
=: (para ((link "/url1" "title1" "first") <> " "
<> (link "/url2" "title2" "second")))
=?> unlines [ "[first][] [second]"
, ""
, " [first]: /url1 \"title1\""
, " [second]: /url2 \"title2\""
]
, "Reference link is used multiple times (unshortcutable)"
=: (para ((link "/url1" "" "foo") <> (link "/url2" "" "foo")
<> (link "/url3" "" "foo")))
=?> unlines [ "[foo][][foo][1][foo][2]"
, ""
, " [foo]: /url1"
, " [1]: /url2"
, " [2]: /url3"
]
, "Reference link is used multiple times (unshortcutable)"
=: (para ((link "/url1" "" "foo") <> " " <> (link "/url2" "" "foo")
<> " " <> (link "/url3" "" "foo")))
=?> unlines [ "[foo][] [foo][1] [foo][2]"
, ""
, " [foo]: /url1"
, " [1]: /url2"
, " [2]: /url3"
]
, "Reference link is followed by text in brackets"
=: (para ((link "/url" "" "link") <> "[text in brackets]"))
=?> unlines [ "[link][][text in brackets]"
, ""
, " [link]: /url"
]
, "Reference link is followed by space and text in brackets"
=: (para ((link "/url" "" "link") <> " [text in brackets]"))
=?> unlines [ "[link][] [text in brackets]"
, ""
, " [link]: /url"
]
, "Reference link is followed by RawInline"
=: (para ((link "/url" "" "link") <> rawInline "markdown" "[rawText]"))
=?> unlines [ "[link][][rawText]"
, ""
, " [link]: /url"
]
, "Reference link is followed by space and RawInline"
=: (para ((link "/url" "" "link") <> space <> rawInline "markdown" "[rawText]"))
=?> unlines [ "[link][] [rawText]"
, ""
, " [link]: /url"
]
, "Reference link is followed by RawInline with space"
=: (para ((link "/url" "" "link") <> rawInline "markdown" " [rawText]"))
=?> unlines [ "[link][] [rawText]"
, ""
, " [link]: /url"
]
, "Reference link is followed by citation"
=: (para ((link "/url" "" "link") <> cite [Citation "author" [] [] NormalCitation 0 0] (str "[@author]")))
=?> unlines [ "[link][][@author]"
, ""
, " [link]: /url"
]
, "Reference link is followed by space and citation"
=: (para ((link "/url" "" "link") <> space <> cite [Citation "author" [] [] NormalCitation 0 0] (str "[@author]")))
=?> unlines [ "[link][] [@author]"
, ""
, " [link]: /url"
]
]