Markdown reader: distinguish autolinks in the AST.

With this change, autolinks are parsed as Links with
the `uri` class. (The same is true for bare links, if
the `autolink_bare_uris` extension is enabled.)  Email
autolinks are parsed as Links with the `email` class.
This allows the distinction to be represented in the
URI.

Formerly the `uri` class was added to autolinks by
the HTML writer, but it had to guess what was an autolink
and could not distinguish `[http://example.com](http://example.com)`
from `<http://example.com>`.  It also incorrectly recognized
`[pandoc](pandoc)` as an autolink.  Now the HTML writer
simply passes through the `uri` attribute if it is present,
but does not add anything.

The Textile writer has been modified so that the `uri`
class is not explicitly added for autolinks, even if it
is present.

Closes #4913.
This commit is contained in:
John MacFarlane 2018-09-19 14:49:46 -07:00
parent dba5c8d4e3
commit 136bf901aa
14 changed files with 73 additions and 38 deletions

View file

@ -1,6 +1,7 @@
{-# LANGUAGE NoImplicitPrelude #-} {-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE RelaxedPolyRec #-} {-# LANGUAGE RelaxedPolyRec #-}
{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{- {-
Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu> Copyright (C) 2006-2018 John MacFarlane <jgm@berkeley.edu>
@ -1879,23 +1880,24 @@ bareURL :: PandocMonad m => MarkdownParser m (F Inlines)
bareURL = try $ do bareURL = try $ do
guardEnabled Ext_autolink_bare_uris guardEnabled Ext_autolink_bare_uris
getState >>= guard . stateAllowLinks getState >>= guard . stateAllowLinks
(orig, src) <- uri <|> emailAddress (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a") notFollowedBy $ try $ spaces >> htmlTag (~== TagClose "a")
return $ return $ B.link src "" (B.str orig) return $ return $ B.linkWith ("",[cls],[]) src "" (B.str orig)
autoLink :: PandocMonad m => MarkdownParser m (F Inlines) autoLink :: PandocMonad m => MarkdownParser m (F Inlines)
autoLink = try $ do autoLink = try $ do
getState >>= guard . stateAllowLinks getState >>= guard . stateAllowLinks
char '<' char '<'
(orig, src) <- uri <|> emailAddress (cls, (orig, src)) <- (("uri",) <$> uri) <|> (("email",) <$> emailAddress)
-- in rare cases, something may remain after the uri parser -- in rare cases, something may remain after the uri parser
-- is finished, because the uri parser tries to avoid parsing -- is finished, because the uri parser tries to avoid parsing
-- final punctuation. for example: in `<http://hi---there>`, -- final punctuation. for example: in `<http://hi---there>`,
-- the URI parser will stop before the dashes. -- the URI parser will stop before the dashes.
extra <- fromEntities <$> manyTill nonspaceChar (char '>') extra <- fromEntities <$> manyTill nonspaceChar (char '>')
attr <- option nullAttr $ try $ attr <- option ("", [cls], []) $ try $
guardEnabled Ext_link_attributes >> attributes guardEnabled Ext_link_attributes >> attributes
return $ return $ B.linkWith attr (src ++ escapeURI extra) "" (B.str $ orig ++ extra) return $ return $ B.linkWith attr (src ++ escapeURI extra) ""
(B.str $ orig ++ extra)
image :: PandocMonad m => MarkdownParser m (F Inlines) image :: PandocMonad m => MarkdownParser m (F Inlines)
image = try $ do image = try $ do

View file

@ -56,7 +56,7 @@ import Data.String (fromString)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy as TL
import Network.HTTP (urlEncode) import Network.HTTP (urlEncode)
import Network.URI (URI (..), parseURIReference, unEscapeString) import Network.URI (URI (..), parseURIReference)
import Numeric (showHex) import Numeric (showHex)
import Text.Blaze.Internal (customLeaf, customParent, MarkupM(Empty)) import Text.Blaze.Internal (customLeaf, customParent, MarkupM(Empty))
#if MIN_VERSION_blaze_markup(0,6,3) #if MIN_VERSION_blaze_markup(0,6,3)
@ -1084,10 +1084,7 @@ inlineToHtml opts inline = do
in '#' : prefix ++ xs in '#' : prefix ++ xs
_ -> s _ -> s
let link = H.a ! A.href (toValue s') $ linkText let link = H.a ! A.href (toValue s') $ linkText
let attr = if txt == [Str (unEscapeString s)] link' <- addAttrs opts (ident, classes, kvs) link
then (ident, "uri" : classes, kvs)
else (ident, classes, kvs)
link' <- addAttrs opts attr link
return $ if null tit return $ if null tit
then link' then link'
else link' ! A.title (toValue tit) else link' ! A.title (toValue tit)

View file

@ -463,15 +463,15 @@ inlineToTextile _ SoftBreak = return " "
inlineToTextile _ Space = return " " inlineToTextile _ Space = return " "
inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do inlineToTextile opts (Link (_, cls, _) txt (src, _)) = do
let classes = if null cls
then ""
else "(" ++ unwords cls ++ ")"
label <- case txt of label <- case txt of
[Code _ s] [Code _ s]
| s == src -> return "$" | s == src -> return "$"
[Str s] [Str s]
| s == src -> return "$" | s == src -> return "$"
_ -> inlineListToTextile opts txt _ -> inlineListToTextile opts txt
let classes = if null cls || cls == ["uri"] && label == "$"
then ""
else "(" ++ unwords cls ++ ")"
return $ "\"" ++ classes ++ label ++ "\":" ++ src return $ "\"" ++ classes ++ label ++ "\":" ++ src
inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do inlineToTextile opts (Image attr@(_, cls, _) alt (source, tit)) = do

View file

@ -39,7 +39,7 @@ testBareLink (inp, ils) =
(unpack inp) (inp, doc $ para ils) (unpack inp) (inp, doc $ para ils)
autolink :: String -> Inlines autolink :: String -> Inlines
autolink = autolinkWith nullAttr autolink = autolinkWith ("",["uri"],[])
autolinkWith :: Attr -> String -> Inlines autolinkWith :: Attr -> String -> Inlines
autolinkWith attr s = linkWith attr s "" (str s) autolinkWith attr s = linkWith attr s "" (str s)
@ -72,10 +72,12 @@ bareLinkTests =
, ("http://en.wikipedia.org/wiki/Sprite_(computer_graphics)", , ("http://en.wikipedia.org/wiki/Sprite_(computer_graphics)",
autolink "http://en.wikipedia.org/wiki/Sprite_(computer_graphics)") autolink "http://en.wikipedia.org/wiki/Sprite_(computer_graphics)")
, ("http://en.wikipedia.org/wiki/Sprite_[computer_graphics]", , ("http://en.wikipedia.org/wiki/Sprite_[computer_graphics]",
link "http://en.wikipedia.org/wiki/Sprite_%5Bcomputer_graphics%5D" "" linkWith ("",["uri"],[])
"http://en.wikipedia.org/wiki/Sprite_%5Bcomputer_graphics%5D" ""
(str "http://en.wikipedia.org/wiki/Sprite_[computer_graphics]")) (str "http://en.wikipedia.org/wiki/Sprite_[computer_graphics]"))
, ("http://en.wikipedia.org/wiki/Sprite_{computer_graphics}", , ("http://en.wikipedia.org/wiki/Sprite_{computer_graphics}",
link "http://en.wikipedia.org/wiki/Sprite_%7Bcomputer_graphics%7D" "" linkWith ("",["uri"],[])
"http://en.wikipedia.org/wiki/Sprite_%7Bcomputer_graphics%7D" ""
(str "http://en.wikipedia.org/wiki/Sprite_{computer_graphics}")) (str "http://en.wikipedia.org/wiki/Sprite_{computer_graphics}"))
, ("http://example.com/Notification_Center-GitHub-20101108-140050.jpg", , ("http://example.com/Notification_Center-GitHub-20101108-140050.jpg",
autolink "http://example.com/Notification_Center-GitHub-20101108-140050.jpg") autolink "http://example.com/Notification_Center-GitHub-20101108-140050.jpg")

View file

@ -2,5 +2,5 @@
% pandoc % pandoc
<http://example.com>{.foo} <http://example.com>{.foo}
^D ^D
<p><a href="http://example.com" class="uri foo">http://example.com</a></p> <p><a href="http://example.com" class="foo">http://example.com</a></p>
``` ```

34
test/command/4913.md Normal file
View file

@ -0,0 +1,34 @@
```
% pandoc -f markdown -t html
[https://pandoc.org](https://pandoc.org)
^D
<p><a href="https://pandoc.org">https://pandoc.org</a></p>
```
```
% pandoc -f markdown -t markdown
[https://pandoc.org](https://pandoc.org)
^D
<https://pandoc.org>
```
```
% pandoc -f markdown -t html
<https://pandoc.org>
^D
<p><a href="https://pandoc.org" class="uri">https://pandoc.org</a></p>
```
```
% pandoc -f markdown -t html
<https://pandoc.org>{.foo}
^D
<p><a href="https://pandoc.org" class="foo">https://pandoc.org</a></p>
```
```
% pandoc -f markdown -t html
<me@example.com>
^D
<p><a href="mailto:me@example.com" class="email">me@example.com</a></p>
```

View file

@ -45,9 +45,9 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S
,Para [Str "`hi"] ,Para [Str "`hi"]
,Para [Str "there`"] ,Para [Str "there`"]
,Header 2 ("multilingual-urls",[],[]) [Str "Multilingual",Space,Str "URLs"] ,Header 2 ("multilingual-urls",[],[]) [Str "Multilingual",Space,Str "URLs"]
,Para [Link ("",[],[]) [Str "http://\27979.com?\27979=\27979"] ("http://\27979.com?\27979=\27979","")] ,Para [Link ("",["uri"],[]) [Str "http://\27979.com?\27979=\27979"] ("http://\27979.com?\27979=\27979","")]
,Para [Link ("",[],[]) [Str "foo"] ("/bar/\27979?x=\27979","title")] ,Para [Link ("",[],[]) [Str "foo"] ("/bar/\27979?x=\27979","title")]
,Para [Link ("",[],[]) [Str "\27979@foo.\27979.baz"] ("mailto:\27979@foo.\27979.baz","")] ,Para [Link ("",["email"],[]) [Str "\27979@foo.\27979.baz"] ("mailto:\27979@foo.\27979.baz","")]
,Header 2 ("numbered-examples",[],[]) [Str "Numbered",Space,Str "examples"] ,Header 2 ("numbered-examples",[],[]) [Str "Numbered",Space,Str "examples"]
,OrderedList (1,Example,TwoParens) ,OrderedList (1,Example,TwoParens)
[[Plain [Str "First",Space,Str "example."]] [[Plain [Str "First",Space,Str "example."]]
@ -176,8 +176,8 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "Author",S
,[]]] ,[]]]
,Header 2 ("entities-in-links-and-titles",[],[]) [Str "Entities",Space,Str "in",Space,Str "links",Space,Str "and",Space,Str "titles"] ,Header 2 ("entities-in-links-and-titles",[],[]) [Str "Entities",Space,Str "in",Space,Str "links",Space,Str "and",Space,Str "titles"]
,Para [Link ("",[],[]) [Str "link"] ("/\252rl","\246\246!")] ,Para [Link ("",[],[]) [Str "link"] ("/\252rl","\246\246!")]
,Para [Link ("",[],[]) [Str "http://g\246\246gle.com"] ("http://g\246\246gle.com","")] ,Para [Link ("",["uri"],[]) [Str "http://g\246\246gle.com"] ("http://g\246\246gle.com","")]
,Para [Link ("",[],[]) [Str "me@ex\228mple.com"] ("mailto:me@ex\228mple.com","")] ,Para [Link ("",["email"],[]) [Str "me@ex\228mple.com"] ("mailto:me@ex\228mple.com","")]
,Para [Link ("",[],[]) [Str "foobar"] ("/\252rl","\246\246!")] ,Para [Link ("",[],[]) [Str "foobar"] ("/\252rl","\246\246!")]
,Header 2 ("parentheses-in-urls",[],[]) [Str "Parentheses",Space,Str "in",Space,Str "URLs"] ,Header 2 ("parentheses-in-urls",[],[]) [Str "Parentheses",Space,Str "in",Space,Str "URLs"]
,Para [Link ("",[],[]) [Str "link"] ("/hi(there)","")] ,Para [Link ("",[],[]) [Str "link"] ("/hi(there)","")]

View file

@ -384,14 +384,14 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link"] ("/script?foo=1&bar=2",""),Str "."] ,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link"] ("/script?foo=1&bar=2",""),Str "."]
,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."] ,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."]
,Header 2 ("autolinks",[],[]) [Str "Autolinks"] ,Header 2 ("autolinks",[],[]) [Str "Autolinks"]
,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link ("",[],[]) [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")] ,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link ("",["uri"],[]) [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")]
,BulletList ,BulletList
[[Plain [Str "In",Space,Str "a",Space,Str "list?"]] [[Plain [Str "In",Space,Str "a",Space,Str "list?"]]
,[Plain [Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]] ,[Plain [Link ("",["uri"],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,[Plain [Str "It",Space,Str "should."]]] ,[Plain [Str "It",Space,Str "should."]]]
,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Link ("",[],[]) [Str "nobody@nowhere.net"] ("mailto:nobody@nowhere.net","")] ,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Link ("",["email"],[]) [Str "nobody@nowhere.net"] ("mailto:nobody@nowhere.net","")]
,BlockQuote ,BlockQuote
[Para [Str "Blockquoted:",Space,Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]] [Para [Str "Blockquoted:",Space,Link ("",["uri"],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) "<http://example.com/>"] ,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) "<http://example.com/>"]
,CodeBlock ("",[],[]) "or here: <http://example.com/>" ,CodeBlock ("",[],[]) "or here: <http://example.com/>"
,HorizontalRule ,HorizontalRule

View file

@ -1298,7 +1298,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
<title>Autolinks</title> <title>Autolinks</title>
<para> <para>
With an ampersand: With an ampersand:
<ulink url="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</ulink> <ulink url="http://example.com/?foo=1&amp;bar=2" role="uri">http://example.com/?foo=1&amp;bar=2</ulink>
</para> </para>
<itemizedlist spacing="compact"> <itemizedlist spacing="compact">
<listitem> <listitem>
@ -1308,7 +1308,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
</listitem> </listitem>
<listitem> <listitem>
<para> <para>
<ulink url="http://example.com/">http://example.com/</ulink> <ulink url="http://example.com/" role="uri">http://example.com/</ulink>
</para> </para>
</listitem> </listitem>
<listitem> <listitem>
@ -1323,7 +1323,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
<blockquote> <blockquote>
<para> <para>
Blockquoted: Blockquoted:
<ulink url="http://example.com/">http://example.com/</ulink> <ulink url="http://example.com/" role="uri">http://example.com/</ulink>
</para> </para>
</blockquote> </blockquote>
<para> <para>

View file

@ -1273,7 +1273,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
<title>Autolinks</title> <title>Autolinks</title>
<para> <para>
With an ampersand: With an ampersand:
<link xlink:href="http://example.com/?foo=1&amp;bar=2">http://example.com/?foo=1&amp;bar=2</link> <link xlink:href="http://example.com/?foo=1&amp;bar=2" role="uri">http://example.com/?foo=1&amp;bar=2</link>
</para> </para>
<itemizedlist spacing="compact"> <itemizedlist spacing="compact">
<listitem> <listitem>
@ -1283,7 +1283,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
</listitem> </listitem>
<listitem> <listitem>
<para> <para>
<link xlink:href="http://example.com/">http://example.com/</link> <link xlink:href="http://example.com/" role="uri">http://example.com/</link>
</para> </para>
</listitem> </listitem>
<listitem> <listitem>
@ -1298,7 +1298,7 @@ These should not be escaped: \$ \\ \&gt; \[ \{
<blockquote> <blockquote>
<para> <para>
Blockquoted: Blockquoted:
<link xlink:href="http://example.com/">http://example.com/</link> <link xlink:href="http://example.com/" role="uri">http://example.com/</link>
</para> </para>
</blockquote> </blockquote>
<para> <para>

View file

@ -508,7 +508,7 @@ Blah
<li><a href="http://example.com/" class="uri">http://example.com/</a></li> <li><a href="http://example.com/" class="uri">http://example.com/</a></li>
<li>It should.</li> <li>It should.</li>
</ul> </ul>
<p>An e-mail address: <a href="mailto:nobody@nowhere.net">nobody@nowhere.net</a></p> <p>An e-mail address: <a href="mailto:nobody@nowhere.net" class="email">nobody@nowhere.net</a></p>
<blockquote> <blockquote>
<p>Blockquoted: <a href="http://example.com/" class="uri">http://example.com/</a></p> <p>Blockquoted: <a href="http://example.com/" class="uri">http://example.com/</a></p>
</blockquote> </blockquote>

View file

@ -511,7 +511,7 @@ Blah
<li><a href="http://example.com/" class="uri">http://example.com/</a></li> <li><a href="http://example.com/" class="uri">http://example.com/</a></li>
<li>It should.</li> <li>It should.</li>
</ul> </ul>
<p>An e-mail address: <a href="mailto:nobody@nowhere.net">nobody@nowhere.net</a></p> <p>An e-mail address: <a href="mailto:nobody@nowhere.net" class="email">nobody@nowhere.net</a></p>
<blockquote> <blockquote>
<p>Blockquoted: <a href="http://example.com/" class="uri">http://example.com/</a></p> <p>Blockquoted: <a href="http://example.com/" class="uri">http://example.com/</a></p>
</blockquote> </blockquote>

View file

@ -384,14 +384,14 @@ Pandoc (Meta {unMeta = fromList [("author",MetaList [MetaInlines [Str "John",Spa
,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link"] ("/script?foo=1&bar=2",""),Str "."] ,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link"] ("/script?foo=1&bar=2",""),Str "."]
,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."] ,Para [Str "Here\8217s",Space,Str "an",Space,Link ("",[],[]) [Str "inline",Space,Str "link",Space,Str "in",Space,Str "pointy",Space,Str "braces"] ("/script?foo=1&bar=2",""),Str "."]
,Header 2 ("autolinks",[],[]) [Str "Autolinks"] ,Header 2 ("autolinks",[],[]) [Str "Autolinks"]
,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link ("",[],[]) [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")] ,Para [Str "With",Space,Str "an",Space,Str "ampersand:",Space,Link ("",["uri"],[]) [Str "http://example.com/?foo=1&bar=2"] ("http://example.com/?foo=1&bar=2","")]
,BulletList ,BulletList
[[Plain [Str "In",Space,Str "a",Space,Str "list?"]] [[Plain [Str "In",Space,Str "a",Space,Str "list?"]]
,[Plain [Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]] ,[Plain [Link ("",["uri"],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,[Plain [Str "It",Space,Str "should."]]] ,[Plain [Str "It",Space,Str "should."]]]
,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Link ("",[],[]) [Str "nobody@nowhere.net"] ("mailto:nobody@nowhere.net","")] ,Para [Str "An",Space,Str "e-mail",Space,Str "address:",Space,Link ("",["email"],[]) [Str "nobody@nowhere.net"] ("mailto:nobody@nowhere.net","")]
,BlockQuote ,BlockQuote
[Para [Str "Blockquoted:",Space,Link ("",[],[]) [Str "http://example.com/"] ("http://example.com/","")]] [Para [Str "Blockquoted:",Space,Link ("",["uri"],[]) [Str "http://example.com/"] ("http://example.com/","")]]
,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) "<http://example.com/>"] ,Para [Str "Auto-links",Space,Str "should",Space,Str "not",Space,Str "occur",Space,Str "here:",Space,Code ("",[],[]) "<http://example.com/>"]
,CodeBlock ("",[],[]) "or here: <http://example.com/>" ,CodeBlock ("",[],[]) "or here: <http://example.com/>"
,HorizontalRule ,HorizontalRule

View file

@ -660,7 +660,7 @@ With an ampersand: "$":http://example.com/?foo=1&bar=2
* "$":http://example.com/ * "$":http://example.com/
* It should. * It should.
An e&#45;mail address: "nobody&#64;nowhere.net":mailto:nobody@nowhere.net An e&#45;mail address: "(email)nobody&#64;nowhere.net":mailto:nobody@nowhere.net
bq. Blockquoted: "$":http://example.com/ bq. Blockquoted: "$":http://example.com/