From 9e3e195dd45f298e3d0a66ef36a4e5763256c98f Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 11 Oct 2019 10:00:09 -0700 Subject: [PATCH] Fix `gfm_auto_identifiers` behavior with emojis. Closes #5813. Note that we also now use emoji names for emojis when `ascii_identifiers` is enabled. --- MANUAL.txt | 1 + src/Text/Pandoc/Shared.hs | 9 ++++++++- test/command/5813.md | 6 ++++++ 3 files changed, 15 insertions(+), 1 deletion(-) create mode 100644 test/command/5813.md diff --git a/MANUAL.txt b/MANUAL.txt index 54709f235..600cc3d23 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -2575,6 +2575,7 @@ Changes the algorithm used by `auto_identifiers` to conform to GitHub's method. Spaces are converted to dashes (`-`), uppercase characters to lowercase characters, and punctuation characters other than `-` and `_` are removed. +Emojis are replaced by their names. ## Math Input diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index 4ce5ba1d0..bcaa48ea1 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -463,8 +463,15 @@ isPara _ = False -- letters, digits, and the characters _-. inlineListToIdentifier :: Extensions -> [Inline] -> String inlineListToIdentifier exts = - dropNonLetter . filterAscii . toIdent . stringify + dropNonLetter . filterAscii . toIdent . stringify . walk unEmojify where + unEmojify :: [Inline] -> [Inline] + unEmojify + | extensionEnabled Ext_gfm_auto_identifiers exts || + extensionEnabled Ext_ascii_identifiers exts = walk unEmoji + | otherwise = id + unEmoji (Span ("",["emoji"],[("data-emoji",ename)]) _) = Str ename + unEmoji x = x dropNonLetter | extensionEnabled Ext_gfm_auto_identifiers exts = id | otherwise = dropWhile (not . isAlpha) diff --git a/test/command/5813.md b/test/command/5813.md new file mode 100644 index 000000000..927a161ed --- /dev/null +++ b/test/command/5813.md @@ -0,0 +1,6 @@ +``` +% pandoc -f gfm +### Jekyll Plugins & Gems :gem: +^D +

Jekyll Plugins & Gems 💎

+```