RST reader: Basic support for csv-table directive.

* Added Text.Pandoc.CSV, simple CSV parser.
* Options still not supported, and we need tests.

See #3533.
This commit is contained in:
John MacFarlane 2017-08-10 11:12:41 -07:00
parent e9eaf84215
commit a5790dd308
3 changed files with 157 additions and 1 deletions

View file

@ -324,7 +324,8 @@ Library
doctemplates >= 0.1 && < 0.2, doctemplates >= 0.1 && < 0.2,
http-client >= 0.4.30 && < 0.6, http-client >= 0.4.30 && < 0.6,
http-client-tls >= 0.2.4 && < 0.4, http-client-tls >= 0.2.4 && < 0.4,
http-types >= 0.8 && < 0.10 http-types >= 0.8 && < 0.10,
csv-conduit >= 0.6 && < 0.7
if os(windows) if os(windows)
Cpp-options: -D_WINDOWS Cpp-options: -D_WINDOWS
else else
@ -465,6 +466,7 @@ Library
Text.Pandoc.Lua.StackInstances, Text.Pandoc.Lua.StackInstances,
Text.Pandoc.Lua.Util, Text.Pandoc.Lua.Util,
Text.Pandoc.CSS, Text.Pandoc.CSS,
Text.Pandoc.CSV,
Text.Pandoc.UUID, Text.Pandoc.UUID,
Text.Pandoc.BCP47 Text.Pandoc.BCP47
Text.Pandoc.Slides, Text.Pandoc.Slides,

102
src/Text/Pandoc/CSV.hs Normal file
View file

@ -0,0 +1,102 @@
{-
Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-}
{- |
Module : Text.Pandoc.CSV
Copyright : Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Simple CSV parser.
-}
module Text.Pandoc.CSV (
CSVOptions,
defaultCSVOptions,
parseCSV,
ParseError
) where
import Text.Parsec
import Text.Parsec.Text (Parser)
import Text.Parsec.Error (ParseError)
import Data.Text (Text)
import qualified Data.Text as T
import Control.Monad (void)
data CSVOptions = CSVOptions{
csvDelim :: Char
, csvQuote :: Char
, csvKeepSpace :: Bool -- treat whitespace following delim as significant
, csvEscape :: Maybe Char -- default is to double up quote
} deriving (Read, Show)
defaultCSVOptions :: CSVOptions
defaultCSVOptions = CSVOptions{
csvDelim = ','
, csvQuote = '"'
, csvKeepSpace = False
, csvEscape = Nothing }
parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
parseCSV opts t = parse (pCSV opts) "csv" t
pCSV :: CSVOptions -> Parser [[Text]]
pCSV opts =
(pCSVRow opts `sepEndBy` endline) <* (spaces *> eof)
pCSVRow :: CSVOptions -> Parser [Text]
pCSVRow opts = notFollowedBy blank >> pCSVCell opts `sepBy` pCSVDelim opts
blank :: Parser ()
blank = try $ spaces >> (() <$ endline <|> eof)
pCSVCell :: CSVOptions -> Parser Text
pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
pCSVQuotedCell :: CSVOptions -> Parser Text
pCSVQuotedCell opts = do
char (csvQuote opts)
res <- many (satisfy (\c -> c /= csvQuote opts) <|> escaped opts)
char (csvQuote opts)
return $ T.pack res
escaped :: CSVOptions -> Parser Char
escaped opts = do
case csvEscape opts of
Nothing -> try $ char (csvQuote opts) >> char (csvQuote opts)
Just c -> try $ char c >> noneOf "\r\n"
pCSVUnquotedCell :: CSVOptions -> Parser Text
pCSVUnquotedCell opts = T.pack <$>
many (satisfy $ \c -> c /= csvDelim opts && c /= '\r' && c /= '\n')
pCSVDelim :: CSVOptions -> Parser ()
pCSVDelim opts = do
char (csvDelim opts)
if csvKeepSpace opts
then return ()
else skipMany (oneOf " \t")
endline :: Parser ()
endline = do
optional (void $ char '\r')
void $ char '\n'

View file

@ -45,6 +45,7 @@ import Text.Pandoc.Builder (fromList, setMeta)
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines) import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import qualified Text.Pandoc.Builder as B import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad, readFileFromDirs) import Text.Pandoc.Class (PandocMonad, readFileFromDirs)
import Text.Pandoc.CSV (CSVOptions(..), defaultCSVOptions, parseCSV)
import Text.Pandoc.Definition import Text.Pandoc.Definition
import Text.Pandoc.Error import Text.Pandoc.Error
import Text.Pandoc.ImageSize (lengthToDim, scaleDimension) import Text.Pandoc.ImageSize (lengthToDim, scaleDimension)
@ -56,6 +57,8 @@ import Text.Printf (printf)
import Data.Text (Text) import Data.Text (Text)
import qualified Data.Text as T import qualified Data.Text as T
import Debug.Trace
-- TODO: -- TODO:
-- [ ] .. parsed-literal -- [ ] .. parsed-literal
-- [ ] .. csv-table -- [ ] .. csv-table
@ -688,6 +691,7 @@ directive' = do
case label of case label of
"table" -> tableDirective top fields body' "table" -> tableDirective top fields body'
"list-table" -> listTableDirective top fields body' "list-table" -> listTableDirective top fields body'
"csv-table" -> csvTableDirective top fields body'
"line-block" -> lineBlockDirective body' "line-block" -> lineBlockDirective body'
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body) "raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
"role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields "role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
@ -820,6 +824,54 @@ listTableDirective top fields body = do
takeCells _ = [] takeCells _ = []
normWidths ws = map (/ max 1 (sum ws)) ws normWidths ws = map (/ max 1 (sum ws)) ws
-- TODO
-- [ ] delim:
-- [ ] quote:
-- [ ] keepspace:
-- [ ] escape:
-- [ ] widths:
-- [ ] header-rows:
-- [ ] header:
-- [ ] url:
-- [ ] file:
-- [ ] encoding:
csvTableDirective :: PandocMonad m
=> String -> [(String, String)] -> String
-> RSTParser m Blocks
csvTableDirective top fields rawcsv = do
let res = parseCSV defaultCSVOptions (T.pack rawcsv)
case res of
Left e -> do
throwError $ PandocParsecError "csv table" e
Right rows -> do
return $ B.rawBlock "rst" $ show rows
{-
bs <- parseFromString' parseBlocks body
title <- parseFromString' (trimInlines . mconcat <$> many inline) top
let rows = takeRows $ B.toList bs
headerRowsNum = fromMaybe (0 :: Int) $
lookup "header-rows" fields >>= safeRead
(headerRow,bodyRows,numOfCols) = case rows of
x:xs -> if headerRowsNum > 0
then (x, xs, length x)
else ([], rows, length x)
_ -> ([],[],0)
widths = case trim <$> lookup "widths" fields of
Just "auto" -> replicate numOfCols 0
Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $
splitBy (`elem` (" ," :: String)) specs
_ -> replicate numOfCols 0
return $ B.table title
(zip (replicate numOfCols AlignDefault) widths)
headerRow
bodyRows
where takeRows [BulletList rows] = map takeCells rows
takeRows _ = []
takeCells [BulletList cells] = map B.fromList cells
takeCells _ = []
normWidths ws = map (/ max 1 (sum ws)) ws
-}
-- TODO: -- TODO:
-- - Only supports :format: fields with a single format for :raw: roles, -- - Only supports :format: fields with a single format for :raw: roles,
-- change Text.Pandoc.Definition.Format to fix -- change Text.Pandoc.Definition.Format to fix