basic manfile parsing

This commit is contained in:
Yan Pas 2018-05-09 03:24:45 +03:00
parent fd3676a568
commit c1617565fc
3 changed files with 197 additions and 67 deletions

View file

@ -14,6 +14,9 @@ GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
@ -29,90 +32,202 @@ Conversion of man to 'Pandoc' document.
-}
module Text.Pandoc.Readers.Man where
import Control.Monad.Except (liftM2, throwError, guard)
import Text.Pandoc.Class (PandocMonad(..))
import Control.Monad.Except (throwError)
import Data.Default (Default)
import Data.Map (insert)
import Data.Maybe (isJust)
import Data.List (intersperse, intercalate)
import qualified Data.Text as T
import Text.Pandoc.Class (PandocMonad(..), runPure)
import Text.Pandoc.Definition
import Text.Pandoc.Error (PandocError)
import Text.Pandoc.Logging (LogMessage(..))
import Text.Pandoc.Options
import Text.Pandoc.Parsing hiding (enclosed)
import Text.Pandoc.Shared (crFilter)
import Text.Parsec
import Text.Parsec.Char
import Data.Text (Text)
import Data.Map (empty)
import qualified Data.Text as T
import Text.Parsec.Char ()
data FontKind = Regular | Italic | Bold | ItalicBold deriving Show
data RoffState = RoffState { inCodeBlock :: Bool
, fontKind :: FontKind
} deriving Show
instance Default RoffState where
def = RoffState {inCodeBlock = False, fontKind = Regular}
data ManState = ManState {pState :: ParserState, rState :: RoffState}
instance HasLogMessages ManState where
addLogMessage lm mst = mst {pState = addLogMessage lm (pState mst)}
getLogMessages mst = getLogMessages $ pState mst
modifyRoffState :: PandocMonad m => (RoffState -> RoffState) -> ParsecT a ManState m ()
modifyRoffState f = do
mst <- getState
setState mst { rState = f $ rState mst }
type ManParser m = ParserT [Char] ManState m
testStrr :: [Char] -> SourceName -> Either PandocError (Either ParseError Pandoc)
testStrr s srcnm = runPure (runParserT parseMan (ManState {pState=def, rState=def}) srcnm s)
printPandoc :: Pandoc -> [Char]
printPandoc (Pandoc m content) =
let ttl = "Pandoc: " ++ (show $ unMeta m)
cnt = intercalate "\n" $ map show content
in ttl ++ "\n" ++ cnt
strrepr :: (Show a2, Show a1) => Either a2 (Either a1 Pandoc) -> [Char]
strrepr obj = case obj of
Right x -> case x of
Right x' -> printPandoc x'
Left y' -> show y'
Left y -> show y
testFile :: FilePath -> IO ()
testFile fname = do
cont <- readFile fname
putStrLn . strrepr $ testStrr cont fname
-- | Read man (troff) from an input string and return a Pandoc document.
readMan :: PandocMonad m
=> ReaderOptions
-> Text
-> m Pandoc
readMan opts s = do
parsed <- readWithM parseMan def{ stateOptions = opts } (T.unpack s)
readMan :: PandocMonad m => ReaderOptions -> T.Text -> m Pandoc
readMan opts txt = do
let state = ManState { pState = def{ stateOptions = opts }, rState = def}
parsed <- readWithM parseMan state (T.unpack $ crFilter txt)
case parsed of
Right result -> return result
Left e -> throwError e
type ManParser m = ParserT [Char] ParserState m
comment :: PandocMonad m => ManParser m String
comment = do
string ".\\\" "
many anyChar
data Macro = Macro { macroName :: String
, macroArgs :: [String]
}
parseMacro :: PandocMonad m => ManParser m Block
parseMacro = do
m <- macro
return $ Plain (map Str $ (macroName m : macroArgs m))
macro :: PandocMonad m => ManParser m Macro
macro = do
char '.' <|> char '\''
many space
name <- many1 letter
--args <- many parseArg
return $ Macro { macroName = name, macroArgs = [] }
macroName <- many1 (letter <|> oneOf ['\\', '"'])
args <- parseArgs
let joinedArgs = concat $ intersperse " " args
case macroName of
"\\\"" -> return Null -- comment
"TH" -> macroTitle (if null args then "" else head args)
"nf" -> macroCodeBlock True >> return Null
"fi" -> macroCodeBlock False >> return Null
"B" -> return $ Plain [Strong [Str joinedArgs]]
"BR" -> return $ Plain [Strong [Str joinedArgs]]
"BI" -> return $ Plain [Strong [Emph [Str joinedArgs]]]
"I" -> return $ Plain [Emph [Str joinedArgs]]
"SH" -> return $ Header 2 nullAttr [Str joinedArgs]
"sp" -> return $ Plain [LineBreak]
_ -> unkownMacro macroName args
where
parseArg :: PandocMonad m => ManParser m String
parseArg = do
many1 space
plainArg
quotedArg :: PandocMonad m => ManParser m String
quotedArg = do
char '"'
val <- many1 quotedChar
char '"'
return val
macroTitle :: PandocMonad m => String -> ManParser m Block
macroTitle mantitle = do
modifyState (changeTitle mantitle)
if null mantitle
then return Null
else return $ Header 1 nullAttr [Str mantitle]
where
changeTitle title mst @ ManState{ pState = pst} =
let meta = stateMeta pst
metaUp = Meta $ insert "title" (MetaString title) (unMeta meta)
in
mst { pState = pst {stateMeta = metaUp} }
plainArg :: PandocMonad m => ManParser m String
plainArg = do
many1 $ noneOf " \t"
quotedChar :: PandocMonad m => ManParser m Char
quotedChar = do
noneOf "\""
<|> try (string "\"\"" >> return '"')
macroCodeBlock :: PandocMonad m => Bool -> ManParser m ()
macroCodeBlock insideCB = modifyRoffState (\rst -> rst{inCodeBlock = insideCB}) >> return ()
unkownMacro :: PandocMonad m => String -> [String] -> ManParser m Block
unkownMacro mname args = do
pos <- getPosition
logMessage $ SkippedContent ("Unknown macro: " ++ mname) pos
return $ Plain $ Str <$> args
parseArgs :: PandocMonad m => ManParser m [String]
parseArgs = do
eolOpt <- optionMaybe $ char '\n'
if isJust eolOpt
then return []
else do
many1 space
arg <- try quotedArg <|> plainArg
otherargs <- parseArgs
return $ arg : otherargs
where
plainArg :: PandocMonad m => ManParser m String
plainArg = many1 $ noneOf " \t\n"
quotedArg :: PandocMonad m => ManParser m String
quotedArg = do
char '"'
val <- many1 quotedChar
char '"'
return val
quotedChar :: PandocMonad m => ManParser m Char
quotedChar = noneOf "\"\n" <|> try (string "\"\"" >> return '"')
roffInline :: RoffState -> String -> (Maybe Inline)
roffInline rst str
| null str = Nothing
| inCodeBlock rst = Just $ Code nullAttr str
| otherwise = Just $ case fontKind rst of
Regular -> Str str
Italic -> Emph [Str str]
_ -> Strong [Str str]
parseLine :: PandocMonad m => ManParser m Block
parseLine = do
str <- many anyChar
return $ Plain [Str str]
parseBlock :: PandocMonad m => ManParser m Block
parseBlock = do
choice [ parseMacro
, parseLine
]
parts <- parseLineParts
newline
return $ if null parts
then Plain [LineBreak]
else Plain parts
where
parseLineParts :: PandocMonad m => ManParser m [Inline]
parseLineParts = do
lnpart <- many $ noneOf "\n\\"
ManState {rState = roffSt} <- getState
let inl = roffInline roffSt lnpart
others <- backSlash <|> return []
return $ case inl of
Just x -> x:others
Nothing -> others
backSlash :: PandocMonad m => ManParser m [Inline]
backSlash = do
char '\\'
esc <- choice [ char 'f' >> fEscape
, char '-' >> return (Just '-')
, Just <$> noneOf "\n"
]
ManState {rState = roffSt} <- getState
case esc of
Just c -> case roffInline roffSt [c] of
Just inl -> do
oth <- parseLineParts
return $ inl : oth
Nothing -> parseLineParts
Nothing -> parseLineParts
where
fEscape :: PandocMonad m => ManParser m (Maybe Char)
fEscape = choice [ char 'B' >> modifyRoffState (\rst -> rst {fontKind = Bold})
, char 'I' >> modifyRoffState (\rst -> rst {fontKind = Italic})
, char 'P' >> modifyRoffState (\rst -> rst {fontKind = Regular})
]
>> return Nothing
parseMan :: PandocMonad m => ManParser m Pandoc
parseMan = do
blocks <- parseBlock `sepBy` newline
return $ Pandoc Meta{unMeta = empty} blocks
blocks <- many (parseMacro <|> parseLine)
parserst <- pState <$> getState
return $ Pandoc (stateMeta parserst) blocks

View file

@ -2,15 +2,30 @@
module Tests.Readers.Man (tests) where
import Data.Text (Text)
import qualified Data.Text as T
import Test.Tasty
import Tests.Helpers
import Text.Pandoc
import Text.Pandoc.Arbitrary ()
import Text.Pandoc.Builder
import Text.Pandoc.Readers.Man
creole :: Text -> Pandoc
creole = purely $ readCreole def{ readerStandalone = True }
man :: Text -> Pandoc
man = purely $ readMan def
infix 4 =:
(=:) :: ToString c
=> String -> (Text, c) -> TestTree
(=:) = test man
tests :: [TestTree]
tests = []
tests = [
-- .SH "HEllo bbb" "aaa"" as"
testGroup "Macros" [
"Bold" =:
".B foo\n"
=?> strong "foo"
, "Italic" =:
".I foo\n"
=?> emph "foo"
]
]

View file

@ -74,7 +74,7 @@ tests = testGroup "pandoc tests" [ Tests.Command.tests
, testGroup "EPUB" Tests.Readers.EPUB.tests
, testGroup "Muse" Tests.Readers.Muse.tests
, testGroup "Creole" Tests.Readers.Creole.tests
, testGroup "Man" Tests.Readers
, testGroup "Man" Tests.Readers.Man.tests
]
, testGroup "Lua filters" Tests.Lua.tests
]