diff --git a/README b/README index 43ecaeb6a..8352d7591 100644 --- a/README +++ b/README @@ -316,10 +316,11 @@ For further documentation, see the `pandoc(1)` man page. `-S` or `--smart` : causes `pandoc` to produce typographically correct output, along the lines of John Gruber's [Smartypants]. Straight quotes are converted - to curly quotes, `---` to dashes, and `...` to ellipses. (Note: This - option is only significant when the input format is `markdown`. - It is selected automatically when the output format is `latex` or - `context`.) + to curly quotes, `---` to dashes, and `...` to ellipses. Nonbreaking + spaces are inserted after certain abbreviations, such as "Mr." + (Note: This option is only significant when the input format is + `markdown`. It is selected automatically when the output format is + `latex` or `context`.) `-m`*[url]* or `--asciimathml`*[=url]* : causes `pandoc` to use Peter Jipsen's [ASCIIMathML] script to display diff --git a/Text/Pandoc/Readers/Markdown.hs b/Text/Pandoc/Readers/Markdown.hs index c9fbbe2d9..d0d99b607 100644 --- a/Text/Pandoc/Readers/Markdown.hs +++ b/Text/Pandoc/Readers/Markdown.hs @@ -33,7 +33,7 @@ module Text.Pandoc.Readers.Markdown ( import Data.List ( transpose, isPrefixOf, isSuffixOf, lookup, sortBy, findIndex ) import Data.Ord ( comparing ) -import Data.Char ( isAlphaNum ) +import Data.Char ( isAlphaNum, isAlpha, isLower, isDigit ) import Data.Maybe ( fromMaybe ) import Text.Pandoc.Definition import Text.Pandoc.Shared @@ -697,7 +697,8 @@ table = simpleTable <|> multilineTable <?> "table" inline = choice inlineParsers <?> "inline" -inlineParsers = [ str +inlineParsers = [ abbrev + , str , smartPunctuation , whitespace , endline @@ -792,6 +793,26 @@ subscript = failIfStrict >> enclosed (char '~') (char '~') (notFollowedBy' whitespace >> inline) >>= -- may not contain Space return . Subscript +abbrev = failUnlessSmart >> + (assumedAbbrev <|> knownAbbrev) >>= return . Str . (++ ".\160") + +-- an string of letters followed by a period that does not end a sentence +-- is assumed to be an abbreviation. It is assumed that sentences don't +-- start with lowercase letters or numerals. +assumedAbbrev = try $ do + result <- many1 $ satisfy isAlpha + string ". " + lookAhead $ satisfy (\x -> isLower x || isDigit x) + return result + +-- these strings are treated as abbreviations even if they are followed +-- by a capital letter (such as a name). +knownAbbrev = try $ do + result <- oneOfStrings [ "Mr", "Mrs", "Ms", "Capt", "Dr", "Prof", "Gen", + "Gov", "e.g", "i.e", "Sgt", "St", "vol", "vs" ] + string ". " + return result + smartPunctuation = failUnlessSmart >> choice [ quoted, apostrophe, dash, ellipses ] diff --git a/tests/testsuite.native b/tests/testsuite.native index d3c4835ed..4ecc51438 100644 --- a/tests/testsuite.native +++ b/tests/testsuite.native @@ -165,7 +165,7 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane [ [ Plain [Str "Nested",Str "."] ] ] ] ] , Para [Str "Should",Space,Str "not",Space,Str "be",Space,Str "a",Space,Str "list",Space,Str "item:"] -, Para [Str "M",Str ".",Str "A",Str ".",Space,Str "2007"] +, Para [Str "M",Str ".",Str "A.\160",Str "2007"] , Para [Str "B",Str ".",Space,Str "Williams"] , HorizontalRule , Header 1 [Str "Definition",Space,Str "Lists"] diff --git a/tests/writer.context b/tests/writer.context index 7c9678636..e13a906dd 100644 --- a/tests/writer.context +++ b/tests/writer.context @@ -444,7 +444,7 @@ Autonumbering: Should not be a list item: -M.A. 2007 +M.A.~2007 B. Williams diff --git a/tests/writer.docbook b/tests/writer.docbook index ad774f685..4860e7d66 100644 --- a/tests/writer.docbook +++ b/tests/writer.docbook @@ -606,7 +606,7 @@ These should not be escaped: \$ \\ \> \[ \{ Should not be a list item: </para> <para> - M.A. 2007 + M.A. 2007 </para> <para> B. Williams diff --git a/tests/writer.html b/tests/writer.html index 49bf3a691..819917c57 100644 --- a/tests/writer.html +++ b/tests/writer.html @@ -412,7 +412,7 @@ These should not be escaped: \$ \\ \> \[ \{ ><p >Should not be a list item:</p ><p - >M.A. 2007</p + >M.A. 2007</p ><p >B. Williams</p ><hr diff --git a/tests/writer.latex b/tests/writer.latex index 053daaeed..551d6adec 100644 --- a/tests/writer.latex +++ b/tests/writer.latex @@ -394,7 +394,7 @@ Autonumbering: \end{enumerate} Should not be a list item: -M.A. 2007 +M.A.~2007 B. Williams diff --git a/tests/writer.man b/tests/writer.man index 010d6c33d..0e3b7608a 100644 --- a/tests/writer.man +++ b/tests/writer.man @@ -296,7 +296,7 @@ Nested\. .PP Should not be a list item: .PP -M\.A\. 2007 +M\.A\.\ 2007 .PP B\. Williams .PP diff --git a/tests/writer.markdown b/tests/writer.markdown index 387a2df38..00c75e607 100644 --- a/tests/writer.markdown +++ b/tests/writer.markdown @@ -269,7 +269,7 @@ Autonumbering: Should not be a list item: -M.A. 2007 +M.A. 2007 B. Williams diff --git a/tests/writer.native b/tests/writer.native index d3c4835ed..4ecc51438 100644 --- a/tests/writer.native +++ b/tests/writer.native @@ -165,7 +165,7 @@ Pandoc (Meta [Str "Pandoc",Space,Str "Test",Space,Str "Suite"] ["John MacFarlane [ [ Plain [Str "Nested",Str "."] ] ] ] ] , Para [Str "Should",Space,Str "not",Space,Str "be",Space,Str "a",Space,Str "list",Space,Str "item:"] -, Para [Str "M",Str ".",Str "A",Str ".",Space,Str "2007"] +, Para [Str "M",Str ".",Str "A.\160",Str "2007"] , Para [Str "B",Str ".",Space,Str "Williams"] , HorizontalRule , Header 1 [Str "Definition",Space,Str "Lists"] diff --git a/tests/writer.rst b/tests/writer.rst index 5a97a1e8b..b9dbc31e0 100644 --- a/tests/writer.rst +++ b/tests/writer.rst @@ -329,7 +329,7 @@ Autonumbering: Should not be a list item: -M.A. 2007 +M.A. 2007 B. Williams diff --git a/tests/writer.rtf b/tests/writer.rtf index 40f5a06dc..367eeba2f 100644 --- a/tests/writer.rtf +++ b/tests/writer.rtf @@ -148,7 +148,7 @@ These should not be escaped: \\$ \\\\ \\> \\[ \\\{\par} {\pard \ql \f0 \sa0 \li360 \fi-360 2.\tx360\tab More.\par} {\pard \ql \f0 \sa0 \li720 \fi-360 a.\tx360\tab Nested.\sa180\sa180\par} {\pard \ql \f0 \sa180 \li0 \fi0 Should not be a list item:\par} -{\pard \ql \f0 \sa180 \li0 \fi0 M.A. 2007\par} +{\pard \ql \f0 \sa180 \li0 \fi0 M.A.\u160?2007\par} {\pard \ql \f0 \sa180 \li0 \fi0 B. Williams\par} {\pard \qc \f0 \sa180 \li0 \fi0 \emdash\emdash\emdash\emdash\emdash\par} {\pard \ql \f0 \sa180 \li0 \fi0 \b \fs36 Definition Lists\par} diff --git a/tests/writer.texinfo b/tests/writer.texinfo index bb2ced1ba..224aa2091 100644 --- a/tests/writer.texinfo +++ b/tests/writer.texinfo @@ -506,7 +506,7 @@ Nested. Should not be a list item: -M.A. 2007 +M.A.@ 2007 B. Williams