From d3716eaeb6d0f2e0be549baf0a346cba1e5a4ff0 Mon Sep 17 00:00:00 2001
From: damon-sava-stanley <damonsava@gmail.com>
Date: Fri, 11 Feb 2022 11:58:29 -0500
Subject: [PATCH] Add DokuWiki table alignment for #5202 (#7908)

Closes #5202.

Within each cell, determine the cell alignment as per
https://www.dokuwiki.org/wiki:syntax#tables. The current approach, as
per the issue treats the first row's alignment as determining
that of the entire column. Given this, it wastes some work in
determining an alignment for every cell.
---
 src/Text/Pandoc/Readers/DokuWiki.hs | 52 ++++++++++++++++++++++-------
 test/Tests/Readers/DokuWiki.hs      | 12 +++++++
 2 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/src/Text/Pandoc/Readers/DokuWiki.hs b/src/Text/Pandoc/Readers/DokuWiki.hs
index db98ac8de..82df2784e 100644
--- a/src/Text/Pandoc/Readers/DokuWiki.hs
+++ b/src/Text/Pandoc/Readers/DokuWiki.hs
@@ -19,8 +19,8 @@ import Control.Monad
 import Control.Monad.Except (throwError)
 import Data.Char (isAlphaNum, isDigit)
 import qualified Data.Foldable as F
-import Data.List (transpose)
 import Data.Maybe (fromMaybe, catMaybes)
+import Data.Bifunctor (second)
 import Data.Text (Text)
 import qualified Data.Text as T
 import qualified Text.Pandoc.Builder as B
@@ -30,6 +30,8 @@ import Text.Pandoc.Error (PandocError (PandocParsecError))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed, nested)
 import Text.Pandoc.Shared (trim, stringify, tshow)
+import Data.List (isPrefixOf, isSuffixOf)
+import qualified Safe
 
 -- | Read DokuWiki from an input string and return a Pandoc document.
 readDokuWiki :: (PandocMonad m, ToSources a)
@@ -96,7 +98,11 @@ codeTag f tag = try $ f
 -- | Parse any inline element but softbreak.
 inline' :: PandocMonad m => DWParser m B.Inlines
 inline' = whitespace
-      <|> br
+      <|> inline''
+
+-- | Parse any inline element but whitespace.
+inline'' :: PandocMonad m => DWParser m B.Inlines
+inline'' = br
       <|> bold
       <|> italic
       <|> underlined
@@ -121,6 +127,10 @@ inline' = whitespace
       <|> symbol
       <?> "inline"
 
+-- | Parse any inline element but soft breaks and do not consolidate spaces.
+inlineUnconsolidatedWhitespace :: PandocMonad m => DWParser m B.Inlines
+inlineUnconsolidatedWhitespace = (B.space <$ spaceChar) <|> inline'
+
 -- | Parse any inline element, including soft break.
 inline :: PandocMonad m => DWParser m B.Inlines
 inline = endline <|> inline'
@@ -468,22 +478,28 @@ table :: PandocMonad m => DWParser m B.Blocks
 table = do
   firstSeparator <- lookAhead tableCellSeparator
   rows <- tableRows
+  let firstRow = fromMaybe [] . Safe.headMay $ rows
   let (headerRow, body) = if firstSeparator == '^'
-                            then (head rows, tail rows)
+                            then (firstRow, tail rows)
                             else ([], rows)
-  let attrs = (AlignDefault, ColWidthDefault) <$ transpose rows
+  -- Since Pandoc only has column level alignment, we have to make an arbitrary
+  -- choice of how to reconcile potentially different alignments in the row.
+  -- Here we end up assuming that the alignment of the header / first row is
+  -- what the user wants to apply to the whole thing.
+  let attrs =  map (\(a, _) -> (a, ColWidthDefault)) firstRow
   let toRow = Row nullAttr . map B.simpleCell
       toHeaderRow l = [toRow l | not (null l)]
   pure $ B.table B.emptyCaption
                  attrs
-                 (TableHead nullAttr $ toHeaderRow headerRow)
-                 [TableBody nullAttr 0 [] $ map toRow body]
+                 (TableHead nullAttr $ toHeaderRow (map snd headerRow))
+                 [TableBody nullAttr 0 [] $ map (toRow . (map snd)) body]
                  (TableFoot nullAttr [])
 
-tableRows :: PandocMonad m => DWParser m [[B.Blocks]]
+
+tableRows :: PandocMonad m => DWParser m [[(Alignment, B.Blocks)]]
 tableRows = many1 tableRow
 
-tableRow :: PandocMonad m => DWParser m [B.Blocks]
+tableRow :: PandocMonad m => DWParser m [(Alignment, B.Blocks)]
 tableRow = many1Till tableCell tableRowEnd
 
 tableRowEnd :: PandocMonad m => DWParser m Char
@@ -492,11 +508,23 @@ tableRowEnd = try $ tableCellSeparator <* manyTill spaceChar eol
 tableCellSeparator :: PandocMonad m => DWParser m Char
 tableCellSeparator = char '|' <|> char '^'
 
-tableCell :: PandocMonad m => DWParser m B.Blocks
-tableCell = try $ B.plain . B.trimInlines . mconcat <$> (normalCell <|> headerCell)
+tableCell :: PandocMonad m => DWParser m (Alignment, B.Blocks)
+tableCell = try $ (second (B.plain . B.trimInlines . mconcat)) <$> cellContent
   where
-    normalCell = char '|' *> manyTill inline' (lookAhead tableCellSeparator)
-    headerCell = char '^' *> manyTill inline' (lookAhead tableCellSeparator)
+    cellContent = do
+      -- https://www.dokuwiki.org/wiki:syntax#tables
+      -- DokuWiki represents the alignment of cells with two spaces padding.
+      tableCellSeparator
+      cellInline <- manyTill inlineUnconsolidatedWhitespace (lookAhead tableCellSeparator)
+      let left  = [B.space, B.space] `isPrefixOf` cellInline
+      let right = [B.space, B.space] `isSuffixOf` cellInline
+      let alignment = case (left, right) of
+                           (True, True)   -> AlignCenter
+                           (True, False)  -> AlignRight
+                           (False, True)  -> AlignLeft
+                           (False, False) -> AlignDefault
+      return (alignment, cellInline)
+
 
 blockCode :: PandocMonad m => DWParser m B.Blocks
 blockCode = codeTag B.codeBlockWith "code"
diff --git a/test/Tests/Readers/DokuWiki.hs b/test/Tests/Readers/DokuWiki.hs
index 84ba86d46..db52a34a6 100644
--- a/test/Tests/Readers/DokuWiki.hs
+++ b/test/Tests/Readers/DokuWiki.hs
@@ -1,5 +1,6 @@
 {-# LANGUAGE OverloadedStrings   #-}
 {-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE TupleSections #-}
 {- |
    Module      : Tests.Readers.DokuWiki
    Copyright   : © 2018-2020 Alexander Krotov
@@ -300,6 +301,17 @@ tests = [ testGroup "inlines"
                     , "| bat | baz |"
                     ] =?>
           simpleTable [plain "foo", plain "bar"] [[plain "bat", plain "baz"]]
+        , "Table with alignment" =:
+          T.unlines [ "^ 0  ^  1  ^  2 ^ 3 ^"
+                    , "| a  | b   | c  |d  |"
+                    ] =?>
+          table emptyCaption 
+                (map (, ColWidthDefault) [AlignLeft, AlignCenter, AlignRight, AlignDefault])  
+                (TableHead nullAttr 
+                          [Row nullAttr . map (simpleCell . plain) $ ["0", "1", "2", "3"]])
+                [TableBody nullAttr 0 []
+                          [Row nullAttr . map (simpleCell . plain) $ ["a", "b", "c", "d"]]]
+                (TableFoot nullAttr []) 
         , "Table with colspan" =:
           T.unlines [ "^ 0,0 ^ 0,1 ^ 0,2 ^"
                     , "| 1,0 | 1,1 ||"