Add Text.Pandoc.Readers.CSV (readCSV).

This adds csv as an input format.
The CSV table is converted into a pandoc simple table.

Closes #6100.
This commit is contained in:
John MacFarlane 2020-01-31 21:14:21 -08:00
parent cef30e7384
commit f9514ccb9e
6 changed files with 134 additions and 2 deletions

View file

@ -227,6 +227,7 @@ header when requesting a document from a URL:
::: {#input-formats} ::: {#input-formats}
- `commonmark` ([CommonMark] Markdown) - `commonmark` ([CommonMark] Markdown)
- `creole` ([Creole 1.0]) - `creole` ([Creole 1.0])
- `csv` ([CSV] table)
- `docbook` ([DocBook]) - `docbook` ([DocBook])
- `docx` ([Word docx]) - `docx` ([Word docx])
- `dokuwiki` ([DokuWiki markup]) - `dokuwiki` ([DokuWiki markup])
@ -467,6 +468,7 @@ header when requesting a document from a URL:
[TikiWiki markup]: https://doc.tiki.org/Wiki-Syntax-Text#The_Markup_Language_Wiki-Syntax [TikiWiki markup]: https://doc.tiki.org/Wiki-Syntax-Text#The_Markup_Language_Wiki-Syntax
[Haddock markup]: https://www.haskell.org/haddock/doc/html/ch03s08.html [Haddock markup]: https://www.haskell.org/haddock/doc/html/ch03s08.html
[Creole 1.0]: http://www.wikicreole.org/wiki/Creole1.0 [Creole 1.0]: http://www.wikicreole.org/wiki/Creole1.0
[CSV]: https://tools.ietf.org/html/rfc4180
[roff man]: https://man.cx/groff_man(7) [roff man]: https://man.cx/groff_man(7)
[roff ms]: https://man.cx/groff_ms(7) [roff ms]: https://man.cx/groff_ms(7)
[Haskell]: https://www.haskell.org [Haskell]: https://www.haskell.org

View file

@ -1,5 +1,5 @@
name: pandoc name: pandoc
version: 2.9.1.1 version: 2.9.2
cabal-version: 2.0 cabal-version: 2.0
build-type: Simple build-type: Simple
license: GPL-2 license: GPL-2
@ -21,7 +21,7 @@ description: Pandoc is a Haskell library for converting from one markup
TikiWiki markup, Jira markup, Creole 1.0, Haddock markup, TikiWiki markup, Jira markup, Creole 1.0, Haddock markup,
OPML, Emacs Org-Mode, Emacs Muse, txt2tags, ipynb (Jupyter OPML, Emacs Org-Mode, Emacs Muse, txt2tags, ipynb (Jupyter
notebooks), Vimwiki, Word Docx, ODT, EPUB, FictionBook2, notebooks), Vimwiki, Word Docx, ODT, EPUB, FictionBook2,
roff man, and Textile, and it can write Markdown, roff man, Textile, and CSV, and it can write Markdown,
reStructuredText, XHTML, HTML 5, LaTeX, ConTeXt, DocBook, reStructuredText, XHTML, HTML 5, LaTeX, ConTeXt, DocBook,
JATS, OPML, TEI, OpenDocument, ODT, Word docx, JATS, OPML, TEI, OpenDocument, ODT, Word docx,
PowerPoint pptx, RTF, MediaWiki, DokuWiki, XWiki, PowerPoint pptx, RTF, MediaWiki, DokuWiki, XWiki,
@ -503,6 +503,7 @@ library
Text.Pandoc.Readers.FB2, Text.Pandoc.Readers.FB2,
Text.Pandoc.Readers.DokuWiki, Text.Pandoc.Readers.DokuWiki,
Text.Pandoc.Readers.Ipynb, Text.Pandoc.Readers.Ipynb,
Text.Pandoc.Readers.CSV,
Text.Pandoc.Writers, Text.Pandoc.Writers,
Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Native,
Text.Pandoc.Writers.Docbook, Text.Pandoc.Writers.Docbook,

View file

@ -75,5 +75,6 @@ formatFromFilePath x =
".wiki" -> Just "mediawiki" ".wiki" -> Just "mediawiki"
".xhtml" -> Just "html" ".xhtml" -> Just "html"
".ipynb" -> Just "ipynb" ".ipynb" -> Just "ipynb"
".csv" -> Just "csv"
['.',y] | y `elem` ['1'..'9'] -> Just "man" ['.',y] | y `elem` ['1'..'9'] -> Just "man"
_ -> Nothing _ -> Nothing

View file

@ -51,6 +51,7 @@ module Text.Pandoc.Readers
, readMuse , readMuse
, readFB2 , readFB2
, readIpynb , readIpynb
, readCSV
-- * Miscellaneous -- * Miscellaneous
, getReader , getReader
, getDefaultExtensions , getDefaultExtensions
@ -95,6 +96,7 @@ import Text.Pandoc.Readers.TWiki
import Text.Pandoc.Readers.Txt2Tags import Text.Pandoc.Readers.Txt2Tags
import Text.Pandoc.Readers.Vimwiki import Text.Pandoc.Readers.Vimwiki
import Text.Pandoc.Readers.Man import Text.Pandoc.Readers.Man
import Text.Pandoc.Readers.CSV
import qualified Text.Pandoc.UTF8 as UTF8 import qualified Text.Pandoc.UTF8 as UTF8
import Text.Parsec.Error import Text.Parsec.Error
@ -136,6 +138,7 @@ readers = [ ("native" , TextReader readNative)
,("man" , TextReader readMan) ,("man" , TextReader readMan)
,("fb2" , TextReader readFB2) ,("fb2" , TextReader readFB2)
,("ipynb" , TextReader readIpynb) ,("ipynb" , TextReader readIpynb)
,("csv" , TextReader readCSV)
] ]
-- | Retrieve reader, extensions based on formatSpec (format+extensions). -- | Retrieve reader, extensions based on formatSpec (format+extensions).

View file

@ -0,0 +1,108 @@
{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE ViewPatterns #-}
{- |
Module : Text.Pandoc.Readers.RST
Copyright : Copyright (C) 2006-2019 John MacFarlane
License : GNU GPL, version 2 or above
Maintainer : John MacFarlane <jgm@berkeley.edu>
Stability : alpha
Portability : portable
Conversion from CSV to a 'Pandoc' table.
-}
module Text.Pandoc.Readers.CSV ( readCSV ) where
import Prelude
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Definition
import qualified Text.Pandoc.Builder as B
import Text.Parsec
import Text.Parsec.Text (Parser)
import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Shared (crFilter)
import Text.Pandoc.Error
import Text.Pandoc.Options (ReaderOptions(..))
import Control.Monad.Except (throwError)
readCSV :: PandocMonad m
=> ReaderOptions -- ^ Reader options
-> Text -- ^ Text to parse (assuming @'\n'@ line endings)
-> m Pandoc
readCSV opts s = do
let columns = readerColumns opts
case parse pCSV "input" (crFilter s) of
Right (r:rs) -> return $ B.doc $ B.table capt (zip aligns widths) hdrs rows
where capt = mempty
numcols = length r
hdrs = map (B.plain . B.text) r
rows = map (map (B.plain . B.text)) rs
maximum' [] = 0
maximum' xs = maximum xs
aligns = replicate numcols AlignDefault
widths = replicate numcols 0
Right [] -> return $ B.doc mempty
Left e -> throwError $ PandocParsecError s e
{- from RFC 4180
The ABNF grammar [2] appears as follows:
file = [header CRLF] record *(CRLF record) [CRLF]
header = name *(COMMA name)
record = field *(COMMA field)
name = field
field = (escaped / non-escaped)
escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE
non-escaped = *TEXTDATA
COMMA = %x2C
CR = %x0D ;as per section 6.1 of RFC 2234 [2]
DQUOTE = %x22 ;as per section 6.1 of RFC 2234 [2]
LF = %x0A ;as per section 6.1 of RFC 2234 [2]
CRLF = CR LF ;as per section 6.1 of RFC 2234 [2]
TEXTDATA = %x20-21 / %x23-2B / %x2D-7E
-}
pCSV :: Parser [[Text]]
pCSV = many pRecord
pRecord :: Parser [Text]
pRecord = do
x <- pField
xs <- many $ pComma >> pField
() <$ newline <|> eof
return (x:xs)
pField :: Parser Text
pField = pEscaped <|> pUnescaped
pComma :: Parser Char
pComma = char ','
pUnescaped :: Parser Text
pUnescaped = T.strip . T.pack <$> many1 (noneOf "\n\r\",")
pEscaped :: Parser Text
pEscaped = do
char '"'
t <- T.pack <$> many (pDoubledQuote <|> noneOf "\"")
char '"'
return t
pDoubledQuote :: Parser Char
pDoubledQuote = try $ char '"' >> char '"'

17
test/command/csv.md Normal file
View file

@ -0,0 +1,17 @@
```
% pandoc -f csv -t native
Fruit,Price,Quantity
Apple,25 cents,33
"""Navel"" Orange","35 cents",22
^D
[Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
[[Plain [Str "Fruit"]]
,[Plain [Str "Price"]]
,[Plain [Str "Quantity"]]]
[[[Plain [Str "Apple"]]
,[Plain [Str "25",Space,Str "cents"]]
,[Plain [Str "33"]]]
,[[Plain [Str "\"Navel\"",Space,Str "Orange"]]
,[Plain [Str "35",Space,Str "cents"]]
,[Plain [Str "22"]]]]]
```