RST reader: Basic support for csv-table directive.
* Added Text.Pandoc.CSV, simple CSV parser. * Options still not supported, and we need tests. See #3533.
This commit is contained in:
parent
e9eaf84215
commit
a5790dd308
3 changed files with 157 additions and 1 deletions
|
@ -324,7 +324,8 @@ Library
|
|||
doctemplates >= 0.1 && < 0.2,
|
||||
http-client >= 0.4.30 && < 0.6,
|
||||
http-client-tls >= 0.2.4 && < 0.4,
|
||||
http-types >= 0.8 && < 0.10
|
||||
http-types >= 0.8 && < 0.10,
|
||||
csv-conduit >= 0.6 && < 0.7
|
||||
if os(windows)
|
||||
Cpp-options: -D_WINDOWS
|
||||
else
|
||||
|
@ -465,6 +466,7 @@ Library
|
|||
Text.Pandoc.Lua.StackInstances,
|
||||
Text.Pandoc.Lua.Util,
|
||||
Text.Pandoc.CSS,
|
||||
Text.Pandoc.CSV,
|
||||
Text.Pandoc.UUID,
|
||||
Text.Pandoc.BCP47
|
||||
Text.Pandoc.Slides,
|
||||
|
|
102
src/Text/Pandoc/CSV.hs
Normal file
102
src/Text/Pandoc/CSV.hs
Normal file
|
@ -0,0 +1,102 @@
|
|||
{-
|
||||
Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
-}
|
||||
|
||||
{- |
|
||||
Module : Text.Pandoc.CSV
|
||||
Copyright : Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
|
||||
License : GNU GPL, version 2 or above
|
||||
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||
Stability : alpha
|
||||
Portability : portable
|
||||
|
||||
Simple CSV parser.
|
||||
-}
|
||||
|
||||
module Text.Pandoc.CSV (
|
||||
CSVOptions,
|
||||
defaultCSVOptions,
|
||||
parseCSV,
|
||||
ParseError
|
||||
) where
|
||||
|
||||
import Text.Parsec
|
||||
import Text.Parsec.Text (Parser)
|
||||
import Text.Parsec.Error (ParseError)
|
||||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
import Control.Monad (void)
|
||||
|
||||
data CSVOptions = CSVOptions{
|
||||
csvDelim :: Char
|
||||
, csvQuote :: Char
|
||||
, csvKeepSpace :: Bool -- treat whitespace following delim as significant
|
||||
, csvEscape :: Maybe Char -- default is to double up quote
|
||||
} deriving (Read, Show)
|
||||
|
||||
defaultCSVOptions :: CSVOptions
|
||||
defaultCSVOptions = CSVOptions{
|
||||
csvDelim = ','
|
||||
, csvQuote = '"'
|
||||
, csvKeepSpace = False
|
||||
, csvEscape = Nothing }
|
||||
|
||||
parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
|
||||
parseCSV opts t = parse (pCSV opts) "csv" t
|
||||
|
||||
pCSV :: CSVOptions -> Parser [[Text]]
|
||||
pCSV opts =
|
||||
(pCSVRow opts `sepEndBy` endline) <* (spaces *> eof)
|
||||
|
||||
pCSVRow :: CSVOptions -> Parser [Text]
|
||||
pCSVRow opts = notFollowedBy blank >> pCSVCell opts `sepBy` pCSVDelim opts
|
||||
|
||||
blank :: Parser ()
|
||||
blank = try $ spaces >> (() <$ endline <|> eof)
|
||||
|
||||
pCSVCell :: CSVOptions -> Parser Text
|
||||
pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
|
||||
|
||||
pCSVQuotedCell :: CSVOptions -> Parser Text
|
||||
pCSVQuotedCell opts = do
|
||||
char (csvQuote opts)
|
||||
res <- many (satisfy (\c -> c /= csvQuote opts) <|> escaped opts)
|
||||
char (csvQuote opts)
|
||||
return $ T.pack res
|
||||
|
||||
escaped :: CSVOptions -> Parser Char
|
||||
escaped opts = do
|
||||
case csvEscape opts of
|
||||
Nothing -> try $ char (csvQuote opts) >> char (csvQuote opts)
|
||||
Just c -> try $ char c >> noneOf "\r\n"
|
||||
|
||||
pCSVUnquotedCell :: CSVOptions -> Parser Text
|
||||
pCSVUnquotedCell opts = T.pack <$>
|
||||
many (satisfy $ \c -> c /= csvDelim opts && c /= '\r' && c /= '\n')
|
||||
|
||||
pCSVDelim :: CSVOptions -> Parser ()
|
||||
pCSVDelim opts = do
|
||||
char (csvDelim opts)
|
||||
if csvKeepSpace opts
|
||||
then return ()
|
||||
else skipMany (oneOf " \t")
|
||||
|
||||
endline :: Parser ()
|
||||
endline = do
|
||||
optional (void $ char '\r')
|
||||
void $ char '\n'
|
||||
|
|
@ -45,6 +45,7 @@ import Text.Pandoc.Builder (fromList, setMeta)
|
|||
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
|
||||
import qualified Text.Pandoc.Builder as B
|
||||
import Text.Pandoc.Class (PandocMonad, readFileFromDirs)
|
||||
import Text.Pandoc.CSV (CSVOptions(..), defaultCSVOptions, parseCSV)
|
||||
import Text.Pandoc.Definition
|
||||
import Text.Pandoc.Error
|
||||
import Text.Pandoc.ImageSize (lengthToDim, scaleDimension)
|
||||
|
@ -56,6 +57,8 @@ import Text.Printf (printf)
|
|||
import Data.Text (Text)
|
||||
import qualified Data.Text as T
|
||||
|
||||
import Debug.Trace
|
||||
|
||||
-- TODO:
|
||||
-- [ ] .. parsed-literal
|
||||
-- [ ] .. csv-table
|
||||
|
@ -688,6 +691,7 @@ directive' = do
|
|||
case label of
|
||||
"table" -> tableDirective top fields body'
|
||||
"list-table" -> listTableDirective top fields body'
|
||||
"csv-table" -> csvTableDirective top fields body'
|
||||
"line-block" -> lineBlockDirective body'
|
||||
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
|
||||
"role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
|
||||
|
@ -820,6 +824,54 @@ listTableDirective top fields body = do
|
|||
takeCells _ = []
|
||||
normWidths ws = map (/ max 1 (sum ws)) ws
|
||||
|
||||
-- TODO
|
||||
-- [ ] delim:
|
||||
-- [ ] quote:
|
||||
-- [ ] keepspace:
|
||||
-- [ ] escape:
|
||||
-- [ ] widths:
|
||||
-- [ ] header-rows:
|
||||
-- [ ] header:
|
||||
-- [ ] url:
|
||||
-- [ ] file:
|
||||
-- [ ] encoding:
|
||||
csvTableDirective :: PandocMonad m
|
||||
=> String -> [(String, String)] -> String
|
||||
-> RSTParser m Blocks
|
||||
csvTableDirective top fields rawcsv = do
|
||||
let res = parseCSV defaultCSVOptions (T.pack rawcsv)
|
||||
case res of
|
||||
Left e -> do
|
||||
throwError $ PandocParsecError "csv table" e
|
||||
Right rows -> do
|
||||
return $ B.rawBlock "rst" $ show rows
|
||||
{-
|
||||
bs <- parseFromString' parseBlocks body
|
||||
title <- parseFromString' (trimInlines . mconcat <$> many inline) top
|
||||
let rows = takeRows $ B.toList bs
|
||||
headerRowsNum = fromMaybe (0 :: Int) $
|
||||
lookup "header-rows" fields >>= safeRead
|
||||
(headerRow,bodyRows,numOfCols) = case rows of
|
||||
x:xs -> if headerRowsNum > 0
|
||||
then (x, xs, length x)
|
||||
else ([], rows, length x)
|
||||
_ -> ([],[],0)
|
||||
widths = case trim <$> lookup "widths" fields of
|
||||
Just "auto" -> replicate numOfCols 0
|
||||
Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $
|
||||
splitBy (`elem` (" ," :: String)) specs
|
||||
_ -> replicate numOfCols 0
|
||||
return $ B.table title
|
||||
(zip (replicate numOfCols AlignDefault) widths)
|
||||
headerRow
|
||||
bodyRows
|
||||
where takeRows [BulletList rows] = map takeCells rows
|
||||
takeRows _ = []
|
||||
takeCells [BulletList cells] = map B.fromList cells
|
||||
takeCells _ = []
|
||||
normWidths ws = map (/ max 1 (sum ws)) ws
|
||||
-}
|
||||
|
||||
-- TODO:
|
||||
-- - Only supports :format: fields with a single format for :raw: roles,
|
||||
-- change Text.Pandoc.Definition.Format to fix
|
||||
|
|
Loading…
Reference in a new issue