From 1b97846be265588817c74db2ab3d8f8ba82ea82a Mon Sep 17 00:00:00 2001 From: John MacFarlane <jgm@berkeley.edu> Date: Fri, 1 Apr 2022 10:41:33 -0700 Subject: [PATCH] Fix regression with `ascii_identifiers` and Turkish undotted i. Closes #8003. --- src/Text/Pandoc/Asciify.hs | 6 +++++- test/command/8003.md | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 test/command/8003.md diff --git a/src/Text/Pandoc/Asciify.hs b/src/Text/Pandoc/Asciify.hs index 5b84542bf..b7ee6c216 100644 --- a/src/Text/Pandoc/Asciify.hs +++ b/src/Text/Pandoc/Asciify.hs @@ -18,11 +18,15 @@ import Data.Text (Text) import qualified Data.Text as T toAsciiText :: Text -> Text -toAsciiText = T.filter isAscii . TN.normalize (TN.NFD) +toAsciiText = T.filter isAscii . T.map specialCase . TN.normalize (TN.NFD) + where + specialCase '\x131' = 'i' -- Turkish undotted i + specialCase c = c toAsciiChar :: Char -> Maybe Char toAsciiChar c = case T.unpack (TN.normalize TN.NFD (T.singleton c)) of (x:xs) | isAscii x , all isMark xs -> Just x + ['\x131'] -> Just 'i' -- Turkish undotted i _ -> Nothing diff --git a/test/command/8003.md b/test/command/8003.md new file mode 100644 index 000000000..cff44a03b --- /dev/null +++ b/test/command/8003.md @@ -0,0 +1,6 @@ +``` +% pandoc -f markdown+ascii_identifiers +# Işık +^D +<h1 id="isik">Işık</h1> +```