Markdown reader: distinguish autolinks in the AST.

With this change, autolinks are parsed as Links with
the `uri` class. (The same is true for bare links, if
the `autolink_bare_uris` extension is enabled.)  Email
autolinks are parsed as Links with the `email` class.
This allows the distinction to be represented in the
URI.

Formerly the `uri` class was added to autolinks by
the HTML writer, but it had to guess what was an autolink
and could not distinguish `[http://example.com](http://example.com)`
from `<http://example.com>`.  It also incorrectly recognized
`[pandoc](pandoc)` as an autolink.  Now the HTML writer
simply passes through the `uri` attribute if it is present,
but does not add anything.

The Textile writer has been modified so that the `uri`
class is not explicitly added for autolinks, even if it
is present.

Closes #4913.
This commit is contained in:
John MacFarlane 2018-09-19 14:49:46 -07:00
parent dba5c8d4e3
commit 136bf901aa
14 changed files with 73 additions and 38 deletions

View file

@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RelaxedPolyRec #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-
Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
@ -1879,23 +1880,24 @@ bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
bareURL = try $ do
guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks
(orig, src) <- uri <|> emailAddress
(cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
return $ return $ B.link src "" (B.str orig)
return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
autoLink = try $ do
getState >>= guard . stateAllowLinks
char '<'
(orig, src) <- uri <|> emailAddress
(cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
-- in rare cases, something may remain after the uri parser
-- is finished, because the uri parser tries to avoid parsing
-- final punctuation. for example: in `<http://hi---there>`,
-- the URI parser will stop before the dashes.
extra <- fromEntities <$> manyTill nonspaceChar (char '>')
attr <- option nullAttr $ try $
attr <- option ("", [cls], []) $ try $
guardEnabled Ext_link_attributes >> attributes
return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra)
return $ return $ B.linkWith attr (src ++ escapeURI extra) ""
(B.str $ orig ++ extra)
image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do

View file

@ -56,7 +56,7 @@ import Data.String (fromString)
import Data.Text (Text)
import qualified Data.Text.Lazy as TL
import Network.HTTP (urlEncode)
import Network.URI (URI (..), parseURIReference, unEscapeString)
import Network.URI (URI (..), parseURIReference)
import Numeric (showHex)
import Text.Blaze.Internal (customLeaf, customParent, MarkupM(Empty))
#if MIN_VERSION_blaze_markup(0,6,3)
@ -1084,10 +1084,7 @@ inlineToHtml opts inline = do
in '#' : prefix ++ xs
_ -> s
let link = H.a ! A.href (toValue s') $ linkText
let attr = if txt == [Str (unEscapeString s)]
then (ident, "uri" : classes, kvs)
else (ident, classes, kvs)
link' <- addAttrs opts attr link
link' <- addAttrs opts (ident, classes, kvs) link
return $ if null tit
then link'
else link' ! A.title (toValue tit)

View file

@ -463,15 +463,15 @@ inlineToTextile _ SoftBreak = return " "
inlineToTextile _ Space = return " "
inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do
let classes = if null cls
then ""
else "(" ++ unwords cls ++ ")"
label <- case txt of
[Code _ s]
| s == src -> return "$"
[Str s]
| s == src -> return "$"
_ -> inlineListToTextile opts txt
let classes = if null cls || cls == ["uri"] && label == "$"
then ""
else "(" ++ unwords cls ++ ")"
return $ "\"" ++ classes ++ label ++ "\":" ++ src
inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do

View file

@ -39,7 +39,7 @@ testBareLink (inp, ils) =
(unpack inp) (inp, doc $ para ils)
autolink :: String -> Inlines
autolink = autolinkWith nullAttr
autolink = autolinkWith ("",["uri"],[])
autolinkWith :: Attr -> String -> Inlines
autolinkWith attr s = linkWith attr s "" (str s)
@ -72,10 +72,12 @@ bareLinkTests =
, ("http://en.wikipedia.org/wiki/Sprite_(computer_graphics)",
autolink "http://en.wikipedia.org/wiki/Sprite_(computer_graphics)")
, ("http://en.wikipedia.org/wiki/Sprite_[computer_graphics]",
link "http://en.wikipedia.org/wiki/Sprite_%5Bcomputer_graphics%5D" ""
linkWith ("",["uri"],[])
"http://en.wikipedia.org/wiki/Sprite_%5Bcomputer_graphics%5D" ""
(str "http://en.wikipedia.org/wiki/Sprite_[computer_graphics]"))
, ("http://en.wikipedia.org/wiki/Sprite_{computer_graphics}",
link "http://en.wikipedia.org/wiki/Sprite_%7Bcomputer_graphics%7D" ""
linkWith ("",["uri"],[])
"http://en.wikipedia.org/wiki/Sprite_%7Bcomputer_graphics%7D" ""
(str "http://en.wikipedia.org/wiki/Sprite_{computer_graphics}"))
, ("http://example.com/Notification_Center-GitHub-20101108-140050.jpg",
autolink "http://example.com/Notification_Center-GitHub-20101108-140050.jpg")

View file

@ -2,5 +2,5 @@
% pandoc
<http://example.com>{.foo}
^D
<p><a href="http://example.com" class="uri foo">http://example.com</a></p>
<p><a href="http://example.com" class="foo">http://example.com</a></p>
```

34
test/command/4913.md Normal file
View file

@ -0,0 +1,34 @@
```
% pandoc -f markdown -t html
[https://pandoc.org](https://pandoc.org)
^D
<p><a href="https://pandoc.org">https://pandoc.org</a></p>
```
```
% pandoc -f markdown -t markdown
[https://pandoc.org](https://pandoc.org)
^D
<https://pandoc.org>
```
```
% pandoc -f markdown -t html
<https://pandoc.org>
^D
<p><a href="https://pandoc.org" class="uri">https://pandoc.org</a></p>
```
```
% pandoc -f markdown -t html
<https://pandoc.org>{.foo}
^D
<p><a href="https://pandoc.org" class="foo">https://pandoc.org</a></p>
```
```
% pandoc -f markdown -t html
<me@example.com>
^D
<p><a href="mailto:me@example.com" class="email">me@example.com</a></p>
```

View file

@ -45,9 +45,9 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S
,Para [Str "`hi"]
,Para [Str "there`"]
,Header 2 ("multilingual-urls",[],[]) [Str "Multilingual",Space,Str "URLs"]
,Para [Link ("",[],[]) [Str "http://\27979.com?\27979=\27979"] ("http://\27979.com?\27979=\27979","")]
,Para [Link ("",["uri"],[]) [Str "http://\27979.com?\27979=\27979"] ("http://\27979.com?\27979=\27979","")]
,Para [Link ("",[],[]) [Str "foo"] ("/bar/\27979?x=\27979","title")]
,Para [Link ("",[],[]) [Str "\27979@foo.\27979.baz"] ("mailto:\27979@foo.\27979.baz","")]
,Para [Link ("",["email"],[]) [Str "\27979@foo.\27979.baz"] ("mailto:\27979@foo.\27979.baz","")]
,Header 2 ("numbered-examples",[],[]) [Str "Numbered",Space,Str "examples"]
,OrderedList (1,Example,TwoParens)
[[Plain [Str "First",Space,Str "example."]]
@ -176,8 +176,8 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S
,[]]]
,Header 2 ("entities-in-links-and-titles",[],[]) [Str "Entities",Space,Str "in",Space,Str "links",Space,Str "and",Space,Str "titles"]
,Para [Link ("",[],[]) [Str "link"] ("/\252rl","\246\246!")]
,Para [Link ("",[],[]) [Str "http://g\246\246gle.com"] ("http://g\246\246gle.com","")]
,Para [Link ("",[],[]) [Str "me@ex\228mple.com"] ("mailto:me@ex\228mple.com","")]
,Para [Link ("",["uri"],[]) [Str "http://g\246\246gle.com"] ("http://g\246\246gle.com","")]
,Para [Link ("",["email"],[]) [Str "me@ex\228mple.com"] ("mailto:me@ex\228mple.com","")]
,Para [Link ("",[],[]) [Str "foobar"] ("/\252rl","\246\246!")]
,Header 2 ("parentheses-in-urls",[],[]) [Str "Parentheses",Space,Str "in",Space,Str "URLs"]
,Para [Link ("",[],[]) [Str "link"] ("/hi(there)","")]

View file

@ -384,14 +384,14 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link"] ("/script?foo=1&bar=2",""),Str "."]
,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."]
,Header 2 ("autolinks",[],[]) [Str "Autolinks"]
,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link ("",[],[]) [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")]
,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link ("",["uri"],[]) [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")]
,BulletList
[[Plain [Str "In",Space,Str "a",Space,Str "list?"]]
,[Plain [Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,[Plain [Link ("",["uri"],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,[Plain [Str "It",Space,Str "should."]]]
,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Link ("",[],[]) [Str "nobody@nowhere.net"] ("mailto:nobody@nowhere.net","")]
,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Link ("",["email"],[]) [Str "nobody@nowhere.net"] ("mailto:nobody@nowhere.net","")]
,BlockQuote
[Para [Str "Blockquoted:",Space,Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]]
[Para [Str "Blockquoted:",Space,Link ("",["uri"],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) "<http://example.com/>"]
,CodeBlock ("",[],[]) "or here: <http://example.com/>"
,HorizontalRule

View file

@ -1298,7 +1298,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
<title>Autolinks</title>
<para>
With an ampersand:
<ulink url="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</ulink>
<ulink url="http://example.com/?foo=1&amp;bar=2" role="uri">http://example.com/?foo=1&amp;bar=2</ulink>
</para>
<itemizedlist spacing="compact">
<listitem>
@ -1308,7 +1308,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
</listitem>
<listitem>
<para>
<ulink url="http://example.com/">http://example.com/</ulink>
<ulink url="http://example.com/" role="uri">http://example.com/</ulink>
</para>
</listitem>
<listitem>
@ -1323,7 +1323,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
<blockquote>
<para>
Blockquoted:
<ulink url="http://example.com/">http://example.com/</ulink>
<ulink url="http://example.com/" role="uri">http://example.com/</ulink>
</para>
</blockquote>
<para>

View file

@ -1273,7 +1273,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
<title>Autolinks</title>
<para>
With an ampersand:
<link xlink:href="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</link>
<link xlink:href="http://example.com/?foo=1&amp;bar=2" role="uri">http://example.com/?foo=1&amp;bar=2</link>
</para>
<itemizedlist spacing="compact">
<listitem>
@ -1283,7 +1283,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
</listitem>
<listitem>
<para>
<link xlink:href="http://example.com/">http://example.com/</link>
<link xlink:href="http://example.com/" role="uri">http://example.com/</link>
</para>
</listitem>
<listitem>
@ -1298,7 +1298,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
<blockquote>
<para>
Blockquoted:
<link xlink:href="http://example.com/">http://example.com/</link>
<link xlink:href="http://example.com/" role="uri">http://example.com/</link>
</para>
</blockquote>
<para>

View file

@ -508,7 +508,7 @@ Blah
<li><a href="http://example.com/" class="uri">http://example.com/</a></li>
<li>It should.</li>
</ul>
<p>An e-mail address: <a href="mailto:nobody@nowhere.net">nobody@nowhere.net</a></p>
<p>An e-mail address: <a href="mailto:nobody@nowhere.net" class="email">nobody@nowhere.net</a></p>
<blockquote>
<p>Blockquoted: <a href="http://example.com/" class="uri">http://example.com/</a></p>
</blockquote>

View file

@ -511,7 +511,7 @@ Blah
<li><a href="http://example.com/" class="uri">http://example.com/</a></li>
<li>It should.</li>
</ul>
<p>An e-mail address: <a href="mailto:nobody@nowhere.net">nobody@nowhere.net</a></p>
<p>An e-mail address: <a href="mailto:nobody@nowhere.net" class="email">nobody@nowhere.net</a></p>
<blockquote>
<p>Blockquoted: <a href="http://example.com/" class="uri">http://example.com/</a></p>
</blockquote>

View file

@ -384,14 +384,14 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link"] ("/script?foo=1&bar=2",""),Str "."]
,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."]
,Header 2 ("autolinks",[],[]) [Str "Autolinks"]
,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link ("",[],[]) [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")]
,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link ("",["uri"],[]) [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")]
,BulletList
[[Plain [Str "In",Space,Str "a",Space,Str "list?"]]
,[Plain [Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,[Plain [Link ("",["uri"],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,[Plain [Str "It",Space,Str "should."]]]
,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Link ("",[],[]) [Str "nobody@nowhere.net"] ("mailto:nobody@nowhere.net","")]
,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Link ("",["email"],[]) [Str "nobody@nowhere.net"] ("mailto:nobody@nowhere.net","")]
,BlockQuote
[Para [Str "Blockquoted:",Space,Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]]
[Para [Str "Blockquoted:",Space,Link ("",["uri"],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) "<http://example.com/>"]
,CodeBlock ("",[],[]) "or here: <http://example.com/>"
,HorizontalRule

View file

@ -660,7 +660,7 @@ With an ampersand: "$":http://example.com/?foo=1&bar=2
* "$":http://example.com/
* It should.
An e&#45;mail address: "nobody&#64;nowhere.net":mailto:nobody@nowhere.net
An e&#45;mail address: "(email)nobody&#64;nowhere.net":mailto:nobody@nowhere.net
bq. Blockquoted: "$":http://example.com/