From c1617565fc2a5984e2e44d4da9adf7f8a26b3160 Mon Sep 17 00:00:00 2001
From: Yan Pas <yanp.bugz@gmail.com>
Date: Wed, 9 May 2018 03:24:45 +0300
Subject: [PATCH] basic manfile parsing

---
 src/Text/Pandoc/Readers/Man.hs | 239 ++++++++++++++++++++++++---------
 test/Tests/Readers/Man.hs      |  23 +++-
 test/test-pandoc.hs            |   2 +-
 3 files changed, 197 insertions(+), 67 deletions(-)

diff --git a/src/Text/Pandoc/Readers/Man.hs b/src/Text/Pandoc/Readers/Man.hs
index d1ff3fc47..166b7c7a7 100644
--- a/src/Text/Pandoc/Readers/Man.hs
+++ b/src/Text/Pandoc/Readers/Man.hs
@@ -14,6 +14,9 @@ GNU General Public License for more details.
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+
 -}
 
 {- |
@@ -29,90 +32,202 @@ Conversion of man to 'Pandoc' document.
 -}
 module Text.Pandoc.Readers.Man where
 
-import Control.Monad.Except (liftM2, throwError, guard)
-import Text.Pandoc.Class (PandocMonad(..))
+import Control.Monad.Except (throwError)
+import Data.Default (Default)
+import Data.Map (insert)
+import Data.Maybe (isJust)
+import Data.List (intersperse, intercalate)
+import qualified Data.Text as T
+
+import Text.Pandoc.Class (PandocMonad(..), runPure)
 import Text.Pandoc.Definition
+import Text.Pandoc.Error (PandocError)
+import Text.Pandoc.Logging (LogMessage(..))
 import Text.Pandoc.Options
 import Text.Pandoc.Parsing hiding (enclosed)
 import Text.Pandoc.Shared (crFilter)
 import Text.Parsec
-import Text.Parsec.Char
-import Data.Text (Text)
-import Data.Map (empty)
-import qualified Data.Text as T
+import Text.Parsec.Char ()
+
+data FontKind = Regular | Italic | Bold | ItalicBold deriving Show
+
+data RoffState = RoffState { inCodeBlock :: Bool
+                           , fontKind :: FontKind
+                           } deriving Show
+
+instance Default RoffState where
+  def = RoffState {inCodeBlock = False, fontKind = Regular}
+
+data ManState = ManState {pState :: ParserState, rState :: RoffState}
+
+instance HasLogMessages ManState where
+  addLogMessage lm mst  = mst {pState = addLogMessage lm (pState mst)}
+  getLogMessages mst = getLogMessages $ pState mst
+
+modifyRoffState :: PandocMonad m => (RoffState -> RoffState) -> ParsecT a ManState m ()
+modifyRoffState f = do
+  mst <- getState
+  setState mst { rState = f $ rState mst }
+
+type ManParser m = ParserT [Char] ManState m
+
+testStrr :: [Char] -> SourceName -> Either PandocError (Either ParseError Pandoc)
+testStrr s srcnm = runPure (runParserT parseMan (ManState {pState=def, rState=def}) srcnm s)
+
+printPandoc :: Pandoc -> [Char]
+printPandoc (Pandoc m content) =
+  let ttl = "Pandoc: " ++ (show $ unMeta m)
+      cnt = intercalate "\n" $ map show content
+  in ttl ++ "\n" ++ cnt
+
+strrepr :: (Show a2, Show a1) => Either a2 (Either a1 Pandoc) -> [Char]
+strrepr obj = case obj of
+  Right x -> case x of
+    Right x' -> printPandoc x'
+    Left y' -> show y'
+  Left y -> show y
+
+testFile :: FilePath -> IO ()
+testFile fname = do
+  cont <- readFile fname
+  putStrLn . strrepr $ testStrr cont fname
 
 
 -- | Read man (troff) from an input string and return a Pandoc document.
-readMan :: PandocMonad m
-          => ReaderOptions
-          -> Text
-          -> m Pandoc
-readMan opts s = do
-  parsed <- readWithM parseMan def{ stateOptions = opts } (T.unpack s)
+readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
+readMan opts txt = do
+  let state = ManState { pState = def{ stateOptions = opts }, rState = def}
+  parsed <- readWithM parseMan state (T.unpack $ crFilter txt)
   case parsed of
     Right result -> return result
     Left e       -> throwError e
 
-type ManParser m = ParserT [Char] ParserState m
-
-comment :: PandocMonad m => ManParser m String
-comment = do
-  string ".\\\" "
-  many anyChar
-
-data Macro = Macro { macroName :: String
-                   , macroArgs :: [String]
-                   }
-
 parseMacro :: PandocMonad m => ManParser m Block
 parseMacro = do
-  m <- macro
-  return $ Plain (map Str $ (macroName m : macroArgs m))
-
-macro :: PandocMonad m => ManParser m Macro
-macro = do
   char '.' <|> char '\''
   many space
-  name <- many1 letter
-  --args <- many parseArg
-  return $ Macro { macroName = name, macroArgs = [] }
-  
+  macroName <- many1 (letter <|> oneOf ['\\', '"'])
+  args <- parseArgs
+  let joinedArgs = concat $ intersperse " " args
+  case macroName of
+    "\\\"" -> return Null -- comment
+    "TH"   -> macroTitle (if null args then "" else head args)
+    "nf"   -> macroCodeBlock True >> return Null
+    "fi"   -> macroCodeBlock False >> return Null
+    "B"    -> return $ Plain [Strong [Str joinedArgs]]
+    "BR"   -> return $ Plain [Strong [Str joinedArgs]]
+    "BI"   -> return $ Plain [Strong [Emph [Str joinedArgs]]]
+    "I"    -> return $ Plain [Emph   [Str joinedArgs]]
+    "SH"   -> return $ Header 2 nullAttr [Str joinedArgs]
+    "sp"   -> return $ Plain [LineBreak]
+    _      -> unkownMacro macroName args
+
   where
 
-  parseArg :: PandocMonad m => ManParser m String
-  parseArg = do
-    many1 space
-    plainArg
-  
-  quotedArg :: PandocMonad m => ManParser m String
-  quotedArg = do
-    char '"'
-    val <- many1 quotedChar
-    char '"'
-    return val
+  macroTitle :: PandocMonad m => String -> ManParser m Block
+  macroTitle mantitle = do
+    modifyState (changeTitle mantitle)
+    if null mantitle
+      then return Null
+      else return $ Header 1 nullAttr [Str mantitle]
+    where
+    changeTitle title mst @ ManState{ pState = pst} =
+      let meta = stateMeta pst
+          metaUp = Meta $ insert "title" (MetaString title) (unMeta meta)
+      in
+      mst { pState = pst {stateMeta = metaUp} }
 
-  plainArg :: PandocMonad m => ManParser m String
-  plainArg = do
-    many1 $ noneOf " \t"
-  
-  quotedChar :: PandocMonad m => ManParser m Char
-  quotedChar = do
-        noneOf "\""
-    <|> try (string "\"\"" >> return '"')
+
+  macroCodeBlock :: PandocMonad m => Bool -> ManParser m ()
+  macroCodeBlock insideCB = modifyRoffState (\rst -> rst{inCodeBlock = insideCB}) >> return ()
+    
+  unkownMacro :: PandocMonad m => String -> [String] -> ManParser m Block
+  unkownMacro mname args = do
+    pos <- getPosition
+    logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos
+    return $ Plain $ Str <$> args
+   
+  parseArgs :: PandocMonad m => ManParser m [String]
+  parseArgs = do
+    eolOpt <- optionMaybe $ char '\n'
+    if isJust eolOpt
+      then return []
+      else do
+        many1 space
+        arg <- try quotedArg <|> plainArg
+        otherargs <- parseArgs
+        return $ arg : otherargs
+
+    where
+
+    plainArg :: PandocMonad m => ManParser m String
+    plainArg = many1 $ noneOf " \t\n"
+
+    quotedArg :: PandocMonad m => ManParser m String
+    quotedArg = do
+      char '"'
+      val <- many1 quotedChar
+      char '"'
+      return val
+
+    quotedChar :: PandocMonad m => ManParser m Char
+    quotedChar = noneOf "\"\n" <|> try (string "\"\"" >> return '"')
+
+roffInline :: RoffState -> String -> (Maybe Inline)
+roffInline rst str
+  | null str        = Nothing
+  | inCodeBlock rst = Just $ Code nullAttr str
+  | otherwise       = Just $ case fontKind rst of
+    Regular -> Str str
+    Italic  -> Emph [Str str]
+    _       -> Strong [Str str]
 
 parseLine :: PandocMonad m => ManParser m Block
 parseLine = do
-  str <- many anyChar
-  return $ Plain [Str str]
-
-parseBlock :: PandocMonad m => ManParser m Block
-parseBlock = do
-  choice [ parseMacro
-         , parseLine
-         ]
+  parts <- parseLineParts
+  newline
+  return $ if null parts
+    then Plain [LineBreak]
+    else Plain parts
+  where
+    parseLineParts :: PandocMonad m => ManParser m [Inline]
+    parseLineParts = do
+      lnpart <- many $ noneOf "\n\\"
+      ManState {rState = roffSt} <- getState
+      let inl = roffInline roffSt lnpart
+      others <- backSlash <|> return []
+      return $ case inl of
+        Just x  -> x:others
+        Nothing -> others
+    
+    backSlash :: PandocMonad m => ManParser m [Inline]
+    backSlash = do
+      char '\\'
+      esc <- choice [ char 'f' >> fEscape
+                    , char '-' >> return (Just '-')
+                    , Just <$> noneOf "\n"
+                    ]
+      ManState {rState = roffSt} <- getState
+      case esc of
+        Just c -> case roffInline roffSt [c] of
+          Just inl -> do
+            oth <- parseLineParts
+            return $ inl : oth
+          Nothing -> parseLineParts
+        Nothing -> parseLineParts
+      where
+      
+      fEscape :: PandocMonad m => ManParser m (Maybe Char)
+      fEscape = choice [ char 'B' >> modifyRoffState (\rst -> rst {fontKind = Bold})
+                       , char 'I' >> modifyRoffState (\rst -> rst {fontKind = Italic})
+                       , char 'P' >> modifyRoffState (\rst -> rst {fontKind = Regular})
+                       ]
+                >> return Nothing
+      
+      
 
 parseMan :: PandocMonad m => ManParser m Pandoc
 parseMan = do
-  blocks <- parseBlock `sepBy` newline
-  
-  return $ Pandoc Meta{unMeta = empty} blocks
\ No newline at end of file
+  blocks <- many (parseMacro <|> parseLine)
+  parserst <- pState <$> getState
+  return $ Pandoc (stateMeta parserst) blocks
diff --git a/test/Tests/Readers/Man.hs b/test/Tests/Readers/Man.hs
index 5dc91544b..007935be1 100644
--- a/test/Tests/Readers/Man.hs
+++ b/test/Tests/Readers/Man.hs
@@ -2,15 +2,30 @@
 module Tests.Readers.Man (tests) where
 
 import Data.Text (Text)
-import qualified Data.Text as T
 import Test.Tasty
 import Tests.Helpers
 import Text.Pandoc
 import Text.Pandoc.Arbitrary ()
 import Text.Pandoc.Builder
+import Text.Pandoc.Readers.Man
 
-creole :: Text -> Pandoc
-creole = purely $ readCreole def{ readerStandalone = True }
+man :: Text -> Pandoc
+man = purely $ readMan def
+
+infix 4 =:
+(=:) :: ToString c
+     => String -> (Text, c) -> TestTree
+(=:) = test man
 
 tests :: [TestTree]
-tests = []
\ No newline at end of file
+tests = [
+  -- .SH "HEllo bbb" "aaa"" as"
+  testGroup "Macros" [
+          "Bold" =:
+          ".B foo\n"
+          =?> strong "foo"
+        , "Italic" =:
+          ".I foo\n"
+          =?> emph "foo"
+        ]
+  ]
diff --git a/test/test-pandoc.hs b/test/test-pandoc.hs
index 1ea3591b2..9d4632f35 100644
--- a/test/test-pandoc.hs
+++ b/test/test-pandoc.hs
@@ -74,7 +74,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
           , testGroup "EPUB" Tests.Readers.EPUB.tests
           , testGroup "Muse" Tests.Readers.Muse.tests
           , testGroup "Creole" Tests.Readers.Creole.tests
-          , testGroup "Man" Tests.Readers
+          , testGroup "Man" Tests.Readers.Man.tests
           ]
         , testGroup "Lua filters" Tests.Lua.tests
         ]