RST reader: Basic support for csv-table directive.
* Added Text.Pandoc.CSV, simple CSV parser. * Options still not supported, and we need tests. See #3533.
This commit is contained in:
parent
e9eaf84215
commit
a5790dd308
3 changed files with 157 additions and 1 deletions
|
@ -324,7 +324,8 @@ Library
|
||||||
doctemplates >= 0.1 && < 0.2,
|
doctemplates >= 0.1 && < 0.2,
|
||||||
http-client >= 0.4.30 && < 0.6,
|
http-client >= 0.4.30 && < 0.6,
|
||||||
http-client-tls >= 0.2.4 && < 0.4,
|
http-client-tls >= 0.2.4 && < 0.4,
|
||||||
http-types >= 0.8 && < 0.10
|
http-types >= 0.8 && < 0.10,
|
||||||
|
csv-conduit >= 0.6 && < 0.7
|
||||||
if os(windows)
|
if os(windows)
|
||||||
Cpp-options: -D_WINDOWS
|
Cpp-options: -D_WINDOWS
|
||||||
else
|
else
|
||||||
|
@ -465,6 +466,7 @@ Library
|
||||||
Text.Pandoc.Lua.StackInstances,
|
Text.Pandoc.Lua.StackInstances,
|
||||||
Text.Pandoc.Lua.Util,
|
Text.Pandoc.Lua.Util,
|
||||||
Text.Pandoc.CSS,
|
Text.Pandoc.CSS,
|
||||||
|
Text.Pandoc.CSV,
|
||||||
Text.Pandoc.UUID,
|
Text.Pandoc.UUID,
|
||||||
Text.Pandoc.BCP47
|
Text.Pandoc.BCP47
|
||||||
Text.Pandoc.Slides,
|
Text.Pandoc.Slides,
|
||||||
|
|
102
src/Text/Pandoc/CSV.hs
Normal file
102
src/Text/Pandoc/CSV.hs
Normal file
|
@ -0,0 +1,102 @@
|
||||||
|
{-
|
||||||
|
Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with this program; if not, write to the Free Software
|
||||||
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||||
|
-}
|
||||||
|
|
||||||
|
{- |
|
||||||
|
Module : Text.Pandoc.CSV
|
||||||
|
Copyright : Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
|
||||||
|
License : GNU GPL, version 2 or above
|
||||||
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||||
|
Stability : alpha
|
||||||
|
Portability : portable
|
||||||
|
|
||||||
|
Simple CSV parser.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Text.Pandoc.CSV (
|
||||||
|
CSVOptions,
|
||||||
|
defaultCSVOptions,
|
||||||
|
parseCSV,
|
||||||
|
ParseError
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Text.Parsec
|
||||||
|
import Text.Parsec.Text (Parser)
|
||||||
|
import Text.Parsec.Error (ParseError)
|
||||||
|
import Data.Text (Text)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import Control.Monad (void)
|
||||||
|
|
||||||
|
data CSVOptions = CSVOptions{
|
||||||
|
csvDelim :: Char
|
||||||
|
, csvQuote :: Char
|
||||||
|
, csvKeepSpace :: Bool -- treat whitespace following delim as significant
|
||||||
|
, csvEscape :: Maybe Char -- default is to double up quote
|
||||||
|
} deriving (Read, Show)
|
||||||
|
|
||||||
|
defaultCSVOptions :: CSVOptions
|
||||||
|
defaultCSVOptions = CSVOptions{
|
||||||
|
csvDelim = ','
|
||||||
|
, csvQuote = '"'
|
||||||
|
, csvKeepSpace = False
|
||||||
|
, csvEscape = Nothing }
|
||||||
|
|
||||||
|
parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
|
||||||
|
parseCSV opts t = parse (pCSV opts) "csv" t
|
||||||
|
|
||||||
|
pCSV :: CSVOptions -> Parser [[Text]]
|
||||||
|
pCSV opts =
|
||||||
|
(pCSVRow opts `sepEndBy` endline) <* (spaces *> eof)
|
||||||
|
|
||||||
|
pCSVRow :: CSVOptions -> Parser [Text]
|
||||||
|
pCSVRow opts = notFollowedBy blank >> pCSVCell opts `sepBy` pCSVDelim opts
|
||||||
|
|
||||||
|
blank :: Parser ()
|
||||||
|
blank = try $ spaces >> (() <$ endline <|> eof)
|
||||||
|
|
||||||
|
pCSVCell :: CSVOptions -> Parser Text
|
||||||
|
pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
|
||||||
|
|
||||||
|
pCSVQuotedCell :: CSVOptions -> Parser Text
|
||||||
|
pCSVQuotedCell opts = do
|
||||||
|
char (csvQuote opts)
|
||||||
|
res <- many (satisfy (\c -> c /= csvQuote opts) <|> escaped opts)
|
||||||
|
char (csvQuote opts)
|
||||||
|
return $ T.pack res
|
||||||
|
|
||||||
|
escaped :: CSVOptions -> Parser Char
|
||||||
|
escaped opts = do
|
||||||
|
case csvEscape opts of
|
||||||
|
Nothing -> try $ char (csvQuote opts) >> char (csvQuote opts)
|
||||||
|
Just c -> try $ char c >> noneOf "\r\n"
|
||||||
|
|
||||||
|
pCSVUnquotedCell :: CSVOptions -> Parser Text
|
||||||
|
pCSVUnquotedCell opts = T.pack <$>
|
||||||
|
many (satisfy $ \c -> c /= csvDelim opts && c /= '\r' && c /= '\n')
|
||||||
|
|
||||||
|
pCSVDelim :: CSVOptions -> Parser ()
|
||||||
|
pCSVDelim opts = do
|
||||||
|
char (csvDelim opts)
|
||||||
|
if csvKeepSpace opts
|
||||||
|
then return ()
|
||||||
|
else skipMany (oneOf " \t")
|
||||||
|
|
||||||
|
endline :: Parser ()
|
||||||
|
endline = do
|
||||||
|
optional (void $ char '\r')
|
||||||
|
void $ char '\n'
|
||||||
|
|
|
@ -45,6 +45,7 @@ import Text.Pandoc.Builder (fromList, setMeta)
|
||||||
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
|
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
|
||||||
import qualified Text.Pandoc.Builder as B
|
import qualified Text.Pandoc.Builder as B
|
||||||
import Text.Pandoc.Class (PandocMonad, readFileFromDirs)
|
import Text.Pandoc.Class (PandocMonad, readFileFromDirs)
|
||||||
|
import Text.Pandoc.CSV (CSVOptions(..), defaultCSVOptions, parseCSV)
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
import Text.Pandoc.Error
|
import Text.Pandoc.Error
|
||||||
import Text.Pandoc.ImageSize (lengthToDim, scaleDimension)
|
import Text.Pandoc.ImageSize (lengthToDim, scaleDimension)
|
||||||
|
@ -56,6 +57,8 @@ import Text.Printf (printf)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
|
||||||
|
import Debug.Trace
|
||||||
|
|
||||||
-- TODO:
|
-- TODO:
|
||||||
-- [ ] .. parsed-literal
|
-- [ ] .. parsed-literal
|
||||||
-- [ ] .. csv-table
|
-- [ ] .. csv-table
|
||||||
|
@ -688,6 +691,7 @@ directive' = do
|
||||||
case label of
|
case label of
|
||||||
"table" -> tableDirective top fields body'
|
"table" -> tableDirective top fields body'
|
||||||
"list-table" -> listTableDirective top fields body'
|
"list-table" -> listTableDirective top fields body'
|
||||||
|
"csv-table" -> csvTableDirective top fields body'
|
||||||
"line-block" -> lineBlockDirective body'
|
"line-block" -> lineBlockDirective body'
|
||||||
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
|
"raw" -> return $ B.rawBlock (trim top) (stripTrailingNewlines body)
|
||||||
"role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
|
"role" -> addNewRole top $ map (\(k,v) -> (k, trim v)) fields
|
||||||
|
@ -820,6 +824,54 @@ listTableDirective top fields body = do
|
||||||
takeCells _ = []
|
takeCells _ = []
|
||||||
normWidths ws = map (/ max 1 (sum ws)) ws
|
normWidths ws = map (/ max 1 (sum ws)) ws
|
||||||
|
|
||||||
|
-- TODO
|
||||||
|
-- [ ] delim:
|
||||||
|
-- [ ] quote:
|
||||||
|
-- [ ] keepspace:
|
||||||
|
-- [ ] escape:
|
||||||
|
-- [ ] widths:
|
||||||
|
-- [ ] header-rows:
|
||||||
|
-- [ ] header:
|
||||||
|
-- [ ] url:
|
||||||
|
-- [ ] file:
|
||||||
|
-- [ ] encoding:
|
||||||
|
csvTableDirective :: PandocMonad m
|
||||||
|
=> String -> [(String, String)] -> String
|
||||||
|
-> RSTParser m Blocks
|
||||||
|
csvTableDirective top fields rawcsv = do
|
||||||
|
let res = parseCSV defaultCSVOptions (T.pack rawcsv)
|
||||||
|
case res of
|
||||||
|
Left e -> do
|
||||||
|
throwError $ PandocParsecError "csv table" e
|
||||||
|
Right rows -> do
|
||||||
|
return $ B.rawBlock "rst" $ show rows
|
||||||
|
{-
|
||||||
|
bs <- parseFromString' parseBlocks body
|
||||||
|
title <- parseFromString' (trimInlines . mconcat <$> many inline) top
|
||||||
|
let rows = takeRows $ B.toList bs
|
||||||
|
headerRowsNum = fromMaybe (0 :: Int) $
|
||||||
|
lookup "header-rows" fields >>= safeRead
|
||||||
|
(headerRow,bodyRows,numOfCols) = case rows of
|
||||||
|
x:xs -> if headerRowsNum > 0
|
||||||
|
then (x, xs, length x)
|
||||||
|
else ([], rows, length x)
|
||||||
|
_ -> ([],[],0)
|
||||||
|
widths = case trim <$> lookup "widths" fields of
|
||||||
|
Just "auto" -> replicate numOfCols 0
|
||||||
|
Just specs -> normWidths $ map (fromMaybe (0 :: Double) . safeRead) $
|
||||||
|
splitBy (`elem` (" ," :: String)) specs
|
||||||
|
_ -> replicate numOfCols 0
|
||||||
|
return $ B.table title
|
||||||
|
(zip (replicate numOfCols AlignDefault) widths)
|
||||||
|
headerRow
|
||||||
|
bodyRows
|
||||||
|
where takeRows [BulletList rows] = map takeCells rows
|
||||||
|
takeRows _ = []
|
||||||
|
takeCells [BulletList cells] = map B.fromList cells
|
||||||
|
takeCells _ = []
|
||||||
|
normWidths ws = map (/ max 1 (sum ws)) ws
|
||||||
|
-}
|
||||||
|
|
||||||
-- TODO:
|
-- TODO:
|
||||||
-- - Only supports :format: fields with a single format for :raw: roles,
|
-- - Only supports :format: fields with a single format for :raw: roles,
|
||||||
-- change Text.Pandoc.Definition.Format to fix
|
-- change Text.Pandoc.Definition.Format to fix
|
||||||
|
|
Loading…
Reference in a new issue