2017-08-10 11:12:41 -07:00
|
|
|
{-
|
|
|
|
Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
|
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
GNU General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
|
|
along with this program; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
|
|
-}
|
|
|
|
|
|
|
|
{- |
|
|
|
|
Module : Text.Pandoc.CSV
|
|
|
|
Copyright : Copyright (C) 2017 John MacFarlane <jgm@berkeley.edu>
|
|
|
|
License : GNU GPL, version 2 or above
|
|
|
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
|
|
|
Stability : alpha
|
|
|
|
Portability : portable
|
|
|
|
|
|
|
|
Simple CSV parser.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Text.Pandoc.CSV (
|
2017-08-10 12:04:08 -07:00
|
|
|
CSVOptions(..),
|
2017-08-10 11:12:41 -07:00
|
|
|
defaultCSVOptions,
|
|
|
|
parseCSV,
|
|
|
|
ParseError
|
|
|
|
) where
|
|
|
|
|
|
|
|
import Text.Parsec
|
|
|
|
import Text.Parsec.Text (Parser)
|
|
|
|
import Data.Text (Text)
|
|
|
|
import qualified Data.Text as T
|
|
|
|
import Control.Monad (void)
|
|
|
|
|
|
|
|
data CSVOptions = CSVOptions{
|
|
|
|
csvDelim :: Char
|
|
|
|
, csvQuote :: Char
|
|
|
|
, csvKeepSpace :: Bool -- treat whitespace following delim as significant
|
|
|
|
, csvEscape :: Maybe Char -- default is to double up quote
|
|
|
|
} deriving (Read, Show)
|
|
|
|
|
|
|
|
defaultCSVOptions :: CSVOptions
|
|
|
|
defaultCSVOptions = CSVOptions{
|
|
|
|
csvDelim = ','
|
|
|
|
, csvQuote = '"'
|
|
|
|
, csvKeepSpace = False
|
|
|
|
, csvEscape = Nothing }
|
|
|
|
|
|
|
|
parseCSV :: CSVOptions -> Text -> Either ParseError [[Text]]
|
|
|
|
parseCSV opts t = parse (pCSV opts) "csv" t
|
|
|
|
|
|
|
|
pCSV :: CSVOptions -> Parser [[Text]]
|
|
|
|
pCSV opts =
|
|
|
|
(pCSVRow opts `sepEndBy` endline) <* (spaces *> eof)
|
|
|
|
|
|
|
|
pCSVRow :: CSVOptions -> Parser [Text]
|
|
|
|
pCSVRow opts = notFollowedBy blank >> pCSVCell opts `sepBy` pCSVDelim opts
|
|
|
|
|
|
|
|
blank :: Parser ()
|
|
|
|
blank = try $ spaces >> (() <$ endline <|> eof)
|
|
|
|
|
|
|
|
pCSVCell :: CSVOptions -> Parser Text
|
|
|
|
pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
|
|
|
|
|
|
|
|
pCSVQuotedCell :: CSVOptions -> Parser Text
|
|
|
|
pCSVQuotedCell opts = do
|
|
|
|
char (csvQuote opts)
|
2017-08-10 12:04:08 -07:00
|
|
|
res <- many (satisfy (\c -> c /= csvQuote opts &&
|
|
|
|
Just c /= csvEscape opts) <|> escaped opts)
|
2017-08-10 11:12:41 -07:00
|
|
|
char (csvQuote opts)
|
|
|
|
return $ T.pack res
|
|
|
|
|
|
|
|
escaped :: CSVOptions -> Parser Char
|
|
|
|
escaped opts = do
|
|
|
|
case csvEscape opts of
|
|
|
|
Nothing -> try $ char (csvQuote opts) >> char (csvQuote opts)
|
|
|
|
Just c -> try $ char c >> noneOf "\r\n"
|
|
|
|
|
|
|
|
pCSVUnquotedCell :: CSVOptions -> Parser Text
|
|
|
|
pCSVUnquotedCell opts = T.pack <$>
|
2017-08-10 12:04:08 -07:00
|
|
|
many (satisfy (\c -> c /= csvDelim opts && c /= '\r' && c /= '\n'
|
|
|
|
&& c /= csvQuote opts))
|
2017-08-10 11:12:41 -07:00
|
|
|
|
|
|
|
pCSVDelim :: CSVOptions -> Parser ()
|
|
|
|
pCSVDelim opts = do
|
|
|
|
char (csvDelim opts)
|
|
|
|
if csvKeepSpace opts
|
|
|
|
then return ()
|
|
|
|
else skipMany (oneOf " \t")
|
|
|
|
|
|
|
|
endline :: Parser ()
|
|
|
|
endline = do
|
|
|
|
optional (void $ char '\r')
|
|
|
|
void $ char '\n'
|
|
|
|
|