From a0da2816bda209dec2d701f90abcda57944d7ad3 Mon Sep 17 00:00:00 2001 From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> Date: Sat, 6 Jun 2009 21:27:46 +0000 Subject: [PATCH] Man writer: break lines at end of each sentence. groff expects this and treats . and ? differently when followed by line ending as opposed to ordinary space. Also, don't escape periods. Instead, use zero-width character \& to avoid unwanted interpretation of periods at start of line. Resolves Issue #148. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1580 788f1e2b-df1e-0410-8736-df70ead52e1b --- src/Text/Pandoc/Writers/Man.hs | 39 ++++++- tests/tables.man | 24 ++-- tests/writer.man | 197 +++++++++++++++++---------------- 3 files changed, 153 insertions(+), 107 deletions(-) diff --git a/src/Text/Pandoc/Writers/Man.hs b/src/Text/Pandoc/Writers/Man.hs index 210c7ed07..89c865754 100644 --- a/src/Text/Pandoc/Writers/Man.hs +++ b/src/Text/Pandoc/Writers/Man.hs @@ -35,6 +35,7 @@ import Text.Printf ( printf ) import Data.List ( isPrefixOf, drop, nub, intersperse, intercalate ) import Text.PrettyPrint.HughesPJ hiding ( Str ) import Control.Monad.State +import Control.Monad ( liftM ) type Notes = [[Block]] type Preprocessors = [String] -- e.g. "t" for tbl @@ -100,7 +101,7 @@ noteToMan opts num note = do -- | Association list of characters to escape. manEscapes :: [(Char, String)] -manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes ".@\\" +manEscapes = [('\160', "\\ "), ('\'', "\\[aq]")] ++ backslashEscapes "@\\" -- | Escape special characters for Man. escapeString :: String -> String @@ -110,15 +111,43 @@ escapeString = escapeStringUsing manEscapes escapeCode :: String -> String escapeCode = escapeStringUsing (manEscapes ++ backslashEscapes "\t ") +-- We split inline lists into sentences, and print one sentence per +-- line. groff/troff treats the line-ending period differently. +-- See http://code.google.com/p/pandoc/issues/detail?id=148. + +-- | Returns the first sentence in a list of inlines, and the rest. +breakSentence :: [Inline] -> ([Inline], [Inline]) +breakSentence [] = ([],[]) +breakSentence xs = + let isSentenceEndInline (Str ".") = True + isSentenceEndInline (Str "?") = True + isSentenceEndInline _ = False + (as, bs) = break isSentenceEndInline xs + in case bs of + [] -> (as, []) + [c] -> (as ++ [c], []) + (c:Space:cs) -> (as ++ [c], cs) + (Str ".":Str ")":cs) -> (as ++ [Str ".", Str ")"], cs) + (c:cs) -> (as ++ [c] ++ ds, es) + where (ds, es) = breakSentence cs + +-- | Split a list of inlines into sentences. +splitSentences :: [Inline] -> [[Inline]] +splitSentences xs = + let (sent, rest) = breakSentence xs + in if null rest then [sent] else sent : splitSentences rest + -- | Convert Pandoc block element to man. blockToMan :: WriterOptions -- ^ Options -> Block -- ^ Block element -> State WriterState Doc blockToMan _ Null = return empty blockToMan opts (Plain inlines) = - wrapIfNeeded opts (inlineListToMan opts) inlines + liftM vcat $ mapM (wrapIfNeeded opts (inlineListToMan opts)) $ + splitSentences inlines blockToMan opts (Para inlines) = do - contents <- wrapIfNeeded opts (inlineListToMan opts) inlines + contents <- liftM vcat $ mapM (wrapIfNeeded opts (inlineListToMan opts)) $ + splitSentences inlines return $ text ".PP" $$ contents blockToMan _ (RawHtml str) = return $ text str blockToMan _ HorizontalRule = return $ text $ ".PP\n * * * * *" @@ -237,6 +266,10 @@ blockListToMan opts blocks = -- | Convert list of Pandoc inline elements to man. inlineListToMan :: WriterOptions -> [Inline] -> State WriterState Doc +-- if list starts with ., insert a zero-width character \& so it +-- won't be interpreted as markup if it falls at the beginning of a line. +inlineListToMan opts lst@(Str "." : _) = mapM (inlineToMan opts) lst >>= + (return . (text "\\&" <>) . hcat) inlineListToMan opts lst = mapM (inlineToMan opts) lst >>= (return . hcat) -- | Convert Pandoc inline element to man. diff --git a/tests/tables.man b/tests/tables.man index d9f214c93..019fa3d83 100644 --- a/tests/tables.man +++ b/tests/tables.man @@ -1,7 +1,7 @@ .PP Simple table with caption: .PP -Demonstration of simple table syntax\. +Demonstration of simple table syntax. .TS tab(@); rw(10.50n) lw(6.13n) cw(11.38n) lw(8.75n). @@ -90,7 +90,7 @@ T} .PP Simple table indented two spaces: .PP -Demonstration of simple table syntax\. +Demonstration of simple table syntax. .TS tab(@); rw(10.50n) lw(6.13n) cw(11.38n) lw(8.75n). @@ -135,7 +135,7 @@ T} .PP Multiline table with caption: .PP -Here\[aq]s the caption\. It may span multiple lines\. +Here\[aq]s the caption. It may span multiple lines. .TS tab(@); cw(10.50n) lw(9.63n) rw(11.38n) lw(23.63n). @@ -154,18 +154,19 @@ First T}@T{ row T}@T{ -12\.0 +12.0 T}@T{ -Example of a row that spans multiple lines\. +Example of a row that spans multiple lines. T} T{ Second T}@T{ row T}@T{ -5\.0 +5.0 T}@T{ -Here\[aq]s another one\. Note the blank line between rows\. +Here\[aq]s another one. +Note the blank line between rows. T} .TE .PP @@ -189,17 +190,18 @@ First T}@T{ row T}@T{ -12\.0 +12.0 T}@T{ -Example of a row that spans multiple lines\. +Example of a row that spans multiple lines. T} T{ Second T}@T{ row T}@T{ -5\.0 +5.0 T}@T{ -Here\[aq]s another one\. Note the blank line between rows\. +Here\[aq]s another one. +Note the blank line between rows. T} .TE diff --git a/tests/writer.man b/tests/writer.man index 63890bd1d..a53e891e3 100644 --- a/tests/writer.man +++ b/tests/writer.man @@ -1,7 +1,7 @@ .TH Pandoc "" "July 17, 2006" "Test Suite" .PP -This is a set of tests for pandoc\. Most of them are adapted from -John Gruber's markdown test suite\. +This is a set of tests for pandoc. +Most of them are adapted from John Gruber's markdown test suite. .PP * * * * * .SH Headers @@ -21,19 +21,22 @@ with no blank line * * * * * .SH Paragraphs .PP -Here's a regular paragraph\. +Here's a regular paragraph. .PP -In Markdown 1\.0\.0 and earlier\. Version 8\. This line turns into -a list item\. Because a hard-wrapped line in the middle of a -paragraph looked like a list item\. +In Markdown 1.0.0 and earlier. +Version 8. +This line turns into a list item. +Because a hard-wrapped line in the middle of a paragraph looked +like a list item. .PP -Here's one with a bullet\. * criminey\. +Here's one with a bullet. +* criminey. .PP There should be a hard line break .PD 0 .P .PD -here\. +here. .PP * * * * * .SH Block Quotes @@ -41,7 +44,8 @@ here\. E-mail style: .RS .PP -This is a block quote\. It is pretty short\. +This is a block quote. +It is pretty short. .RE .RS .PP @@ -70,9 +74,9 @@ nested .RE .RE .PP -This should not be a block quote: 2 > 1\. +This should not be a block quote: 2 > 1. .PP -And a following paragraph\. +And a following paragraph. .PP * * * * * .SH Code Blocks @@ -184,16 +188,17 @@ Three .PP Multiple paragraphs: .IP "1." 3 -Item 1, graf one\. +Item 1, graf one. .RS 4 .PP -Item 1\. graf two\. The quick brown fox jumped over the lazy dog's -back\. +Item 1. +graf two. +The quick brown fox jumped over the lazy dog's back. .RE .IP "2." 3 -Item 2\. +Item 2. .IP "3." 3 -Item 3\. +Item 3. .SS Nested .IP \[bu] 2 Tab @@ -273,7 +278,7 @@ Nesting: Upper Alpha .RS 4 .IP "I." 3 -Upper Roman\. +Upper Roman. .RS 4 .IP "(6)" 4 Decimal start with 6 @@ -286,19 +291,20 @@ Lower alpha with paren .PP Autonumbering: .IP "1." 3 -Autonumber\. +Autonumber. .IP "2." 3 -More\. +More. .RS 4 .IP "1." 3 -Nested\. +Nested. .RE .PP Should not be a list item: .PP -M\.A\.\ 2007 +M.A.\ 2007 .PP -B\. Williams +B. +Williams .PP * * * * * .SH Definition Lists @@ -497,51 +503,53 @@ Hr's: * * * * * .SH Inline Markup .PP -This is \f[I]emphasized\f[], and so \f[I]is this\f[]\. +This is \f[I]emphasized\f[], and so \f[I]is this\f[]. .PP -This is \f[B]strong\f[], and so \f[B]is this\f[]\. +This is \f[B]strong\f[], and so \f[B]is this\f[]. .PP -An \f[I]emphasized link (/url)\f[]\. +An \f[I]emphasized link (/url)\f[]. .PP -\f[B]\f[I]This is strong and em\.\f[]\f[] +\f[B]\f[I]This is strong and em.\f[]\f[] .PP -So is \f[B]\f[I]this\f[]\f[] word\. +So is \f[B]\f[I]this\f[]\f[] word. .PP -\f[B]\f[I]This is strong and em\.\f[]\f[] +\f[B]\f[I]This is strong and em.\f[]\f[] .PP -So is \f[B]\f[I]this\f[]\f[] word\. +So is \f[B]\f[I]this\f[]\f[] word. .PP This is code: \f[B]>\f[], \f[B]$\f[], \f[B]\\\f[], \f[B]\\$\f[], -\f[B]<html>\f[]\. +\f[B]<html>\f[]. .PP -[STRIKEOUT:This is \f[I]strikeout\f[]\.] +[STRIKEOUT:This is \f[I]strikeout\f[].] .PP -Superscripts: a^bc^d a^\f[I]hello\f[]^ a^hello\ there^\. +Superscripts: a^bc^d a^\f[I]hello\f[]^ a^hello\ there^. .PP -Subscripts: H~2~O, H~23~O, H~many\ of\ them~O\. +Subscripts: H~2~O, H~23~O, H~many\ of\ them~O. .PP These should not be superscripts or subscripts, because of the -unescaped spaces: a^b c^d, a~b c~d\. +unescaped spaces: a^b c^d, a~b c~d. .PP * * * * * .SH Smart quotes, ellipses, dashes .PP -\[lq]Hello,\[rq] said the spider\. \[lq]`Shelob' is my name\.\[rq] +\[lq]Hello,\[rq] said the spider. +\[lq]`Shelob' is my name.\[rq] .PP -`A', `B', and `C' are letters\. +`A', `B', and `C' are letters. .PP -`Oak,' `elm,' and `beech' are names of trees\. So is `pine\.' +`Oak,' `elm,' and `beech' are names of trees. +So is `pine.' .PP -`He said, \[lq]I want to go\.\[rq]' Were you alive in the 70's? +`He said, \[lq]I want to go.\[rq]' Were you alive in the 70's? .PP Here is some quoted `\f[B]code\f[]' and a -\[lq]quoted link (http://example.com/?foo=1&bar=2)\[rq]\. +\[lq]quoted link (http://example.com/?foo=1&bar=2)\[rq]. .PP -Some dashes: one\[em]two \[em] three\[em]four \[em] five\. +Some dashes: one\[em]two \[em] three\[em]four \[em] five. .PP -Dashes between numbers: 5\[en]7, 255\[en]66, 1987\[en]1999\. +Dashes between numbers: 5\[en]7, 255\[en]66, 1987\[en]1999. .PP -Ellipses\&...and\&...and\&...\. +Ellipses\&...and\&...and\&.... .PP * * * * * .SH LaTeX @@ -563,18 +571,19 @@ Here's some display math: .RE .IP \[bu] 2 Here's one that has a line break in it: -\f[B]\\alpha\ +\ \\omega\ \\times\ x^2\f[]\. +\f[B]\\alpha\ +\ \\omega\ \\times\ x^2\f[]. .PP These shouldn't be math: .IP \[bu] 2 -To get the famous equation, write \f[B]$e\ =\ mc^2$\f[]\. +To get the famous equation, write \f[B]$e\ =\ mc^2$\f[]. .IP \[bu] 2 -$22,000 is a \f[I]lot\f[] of money\. So is $34,000\. (It worked if -\[lq]lot\[rq] is emphasized\.) +$22,000 is a \f[I]lot\f[] of money. +So is $34,000. +(It worked if \[lq]lot\[rq] is emphasized.) .IP \[bu] 2 -Shoes ($20) and socks ($5)\. +Shoes ($20) and socks ($5). .IP \[bu] 2 -Escaped \f[B]$\f[]: $73 \f[I]this should be emphasized\f[] 23$\. +Escaped \f[B]$\f[]: $73 \f[I]this should be emphasized\f[] 23$. .PP Here's a LaTeX table: .PP @@ -594,15 +603,15 @@ set membership: ∈ .IP \[bu] 2 copyright: © .PP -AT&T has an ampersand in their name\. +AT&T has an ampersand in their name. .PP -AT&T is another way to write it\. +AT&T is another way to write it. .PP -This & that\. +This & that. .PP -4 < 5\. +4 < 5. .PP -6 > 5\. +6 > 5. .PP Backslash: \\ .PP @@ -628,7 +637,7 @@ Greater-than: > .PP Hash: # .PP -Period: \. +Period: \&. .PP Bang: ! .PP @@ -640,13 +649,13 @@ Minus: - .SH Links .SS Explicit .PP -Just a URL (/url/)\. +Just a URL (/url/). .PP -URL and title (/url/)\. +URL and title (/url/). .PP -URL and title (/url/)\. +URL and title (/url/). .PP -URL and title (/url/)\. +URL and title (/url/). .PP URL and title (/url/) .PP @@ -656,45 +665,45 @@ with_underscore (/url/with_underscore) .PP Email link (mailto:nobody@nowhere.net) .PP -Empty ()\. +Empty (). .SS Reference .PP -Foo bar (/url/)\. +Foo bar (/url/). .PP -Foo bar (/url/)\. +Foo bar (/url/). .PP -Foo bar (/url/)\. +Foo bar (/url/). .PP -With embedded [brackets] (/url/)\. +With embedded [brackets] (/url/). .PP -b (/url/) by itself should be a link\. +b (/url/) by itself should be a link. .PP -Indented once (/url)\. +Indented once (/url). .PP -Indented twice (/url)\. +Indented twice (/url). .PP -Indented thrice (/url)\. +Indented thrice (/url). .PP -This should [not][] be a link\. +This should [not][] be a link. .PP \f[CR] [not]:\ /url \f[] .PP -Foo bar (/url/)\. +Foo bar (/url/). .PP -Foo biz (/url/)\. +Foo biz (/url/). .SS With ampersands .PP Here's a -link with an ampersand in the URL (http://example.com/?foo=1&bar=2)\. +link with an ampersand in the URL (http://example.com/?foo=1&bar=2). .PP Here's a link with an amersand in the link text: -AT&T (http://att.com/)\. +AT&T (http://att.com/). .PP -Here's an inline link (/script?foo=1&bar=2)\. +Here's an inline link (/script?foo=1&bar=2). .PP -Here's an inline link in pointy braces (/script?foo=1&bar=2)\. +Here's an inline link in pointy braces (/script?foo=1&bar=2). .SS Autolinks .PP With an ampersand: <http://example.com/?foo=1&bar=2> @@ -703,7 +712,7 @@ In a list? .IP \[bu] 2 <http://example.com/> .IP \[bu] 2 -It should\. +It should. .PP An e-mail address: <nobody@nowhere.net> .RS @@ -711,10 +720,10 @@ An e-mail address: <nobody@nowhere.net> Blockquoted: <http://example.com/> .RE .PP -Auto-links should not occur here: \f[B]<http://example\.com/>\f[] +Auto-links should not occur here: \f[B]<http://example.com/>\f[] .PP \f[CR] - or\ here:\ <http://example\.com/> + or\ here:\ <http://example.com/> \f[] .PP * * * * * @@ -724,56 +733,58 @@ From \[lq]Voyage dans la Lune\[rq] by Georges Melies (1902): .PP [IMAGE: lalune (lalune.jpg)] .PP -Here is a movie [IMAGE: movie (movie.jpg)] icon\. +Here is a movie [IMAGE: movie (movie.jpg)] icon. .PP * * * * * .SH Footnotes .PP -Here is a footnote reference,[1] and another\.[2] This should +Here is a footnote reference,[1] and another.[2] This should \f[I]not\f[] be a footnote reference, because it contains a -space\.[^my note] Here is an inline note\.[3] +space.[^my note] Here is an inline note.[3] .RS .PP -Notes can go in quotes\.[4] +Notes can go in quotes.[4] .RE .IP "1." 3 -And in list items\.[5] +And in list items.[5] .PP This paragraph should not be part of the note, as it is not -indented\. +indented. .SH NOTES .SS [1] .PP -Here is the footnote\. It can go anywhere after the footnote -reference\. It need not be placed at the end of the document\. +Here is the footnote. +It can go anywhere after the footnote reference. +It need not be placed at the end of the document. .SS [2] .PP -Here's the long note\. This one contains multiple blocks\. +Here's the long note. +This one contains multiple blocks. .PP Subsequent blocks are indented to show that they belong to the -footnote (as with list items)\. +footnote (as with list items). .PP \f[CR] \ \ {\ <code>\ } \f[] .PP If you want, you can indent every line, but you can also be lazy -and just indent the first line of each block\. +and just indent the first line of each block. .SS [3] .PP -This is \f[I]easier\f[] to type\. Inline notes may contain -links (http://google.com) and \f[B]]\f[] verbatim characters, as -well as [bracketed text]\. +This is \f[I]easier\f[] to type. +Inline notes may contain links (http://google.com) and \f[B]]\f[] +verbatim characters, as well as [bracketed text]. .SS [4] .PP -In quote\. +In quote. .SS [5] .PP -In list\. +In list. .SH AUTHORS John MacFarlane, Anonymous