Add Text.Pandoc.Readers.CSV (readCSV).
This adds csv as an input format. The CSV table is converted into a pandoc simple table. Closes #6100.
This commit is contained in:
parent
cef30e7384
commit
f9514ccb9e
6 changed files with 134 additions and 2 deletions
|
@ -227,6 +227,7 @@ header when requesting a document from a URL:
|
||||||
::: {#input-formats}
|
::: {#input-formats}
|
||||||
- `commonmark` ([CommonMark] Markdown)
|
- `commonmark` ([CommonMark] Markdown)
|
||||||
- `creole` ([Creole 1.0])
|
- `creole` ([Creole 1.0])
|
||||||
|
- `csv` ([CSV] table)
|
||||||
- `docbook` ([DocBook])
|
- `docbook` ([DocBook])
|
||||||
- `docx` ([Word docx])
|
- `docx` ([Word docx])
|
||||||
- `dokuwiki` ([DokuWiki markup])
|
- `dokuwiki` ([DokuWiki markup])
|
||||||
|
@ -467,6 +468,7 @@ header when requesting a document from a URL:
|
||||||
[TikiWiki markup]: https://doc.tiki.org/Wiki-Syntax-Text#The_Markup_Language_Wiki-Syntax
|
[TikiWiki markup]: https://doc.tiki.org/Wiki-Syntax-Text#The_Markup_Language_Wiki-Syntax
|
||||||
[Haddock markup]: https://www.haskell.org/haddock/doc/html/ch03s08.html
|
[Haddock markup]: https://www.haskell.org/haddock/doc/html/ch03s08.html
|
||||||
[Creole 1.0]: http://www.wikicreole.org/wiki/Creole1.0
|
[Creole 1.0]: http://www.wikicreole.org/wiki/Creole1.0
|
||||||
|
[CSV]: https://tools.ietf.org/html/rfc4180
|
||||||
[roff man]: https://man.cx/groff_man(7)
|
[roff man]: https://man.cx/groff_man(7)
|
||||||
[roff ms]: https://man.cx/groff_ms(7)
|
[roff ms]: https://man.cx/groff_ms(7)
|
||||||
[Haskell]: https://www.haskell.org
|
[Haskell]: https://www.haskell.org
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
name: pandoc
|
name: pandoc
|
||||||
version: 2.9.1.1
|
version: 2.9.2
|
||||||
cabal-version: 2.0
|
cabal-version: 2.0
|
||||||
build-type: Simple
|
build-type: Simple
|
||||||
license: GPL-2
|
license: GPL-2
|
||||||
|
@ -21,7 +21,7 @@ description: Pandoc is a Haskell library for converting from one markup
|
||||||
TikiWiki markup, Jira markup, Creole 1.0, Haddock markup,
|
TikiWiki markup, Jira markup, Creole 1.0, Haddock markup,
|
||||||
OPML, Emacs Org-Mode, Emacs Muse, txt2tags, ipynb (Jupyter
|
OPML, Emacs Org-Mode, Emacs Muse, txt2tags, ipynb (Jupyter
|
||||||
notebooks), Vimwiki, Word Docx, ODT, EPUB, FictionBook2,
|
notebooks), Vimwiki, Word Docx, ODT, EPUB, FictionBook2,
|
||||||
roff man, and Textile, and it can write Markdown,
|
roff man, Textile, and CSV, and it can write Markdown,
|
||||||
reStructuredText, XHTML, HTML 5, LaTeX, ConTeXt, DocBook,
|
reStructuredText, XHTML, HTML 5, LaTeX, ConTeXt, DocBook,
|
||||||
JATS, OPML, TEI, OpenDocument, ODT, Word docx,
|
JATS, OPML, TEI, OpenDocument, ODT, Word docx,
|
||||||
PowerPoint pptx, RTF, MediaWiki, DokuWiki, XWiki,
|
PowerPoint pptx, RTF, MediaWiki, DokuWiki, XWiki,
|
||||||
|
@ -503,6 +503,7 @@ library
|
||||||
Text.Pandoc.Readers.FB2,
|
Text.Pandoc.Readers.FB2,
|
||||||
Text.Pandoc.Readers.DokuWiki,
|
Text.Pandoc.Readers.DokuWiki,
|
||||||
Text.Pandoc.Readers.Ipynb,
|
Text.Pandoc.Readers.Ipynb,
|
||||||
|
Text.Pandoc.Readers.CSV,
|
||||||
Text.Pandoc.Writers,
|
Text.Pandoc.Writers,
|
||||||
Text.Pandoc.Writers.Native,
|
Text.Pandoc.Writers.Native,
|
||||||
Text.Pandoc.Writers.Docbook,
|
Text.Pandoc.Writers.Docbook,
|
||||||
|
|
|
@ -75,5 +75,6 @@ formatFromFilePath x =
|
||||||
".wiki" -> Just "mediawiki"
|
".wiki" -> Just "mediawiki"
|
||||||
".xhtml" -> Just "html"
|
".xhtml" -> Just "html"
|
||||||
".ipynb" -> Just "ipynb"
|
".ipynb" -> Just "ipynb"
|
||||||
|
".csv" -> Just "csv"
|
||||||
['.',y] | y `elem` ['1'..'9'] -> Just "man"
|
['.',y] | y `elem` ['1'..'9'] -> Just "man"
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
|
|
|
@ -51,6 +51,7 @@ module Text.Pandoc.Readers
|
||||||
, readMuse
|
, readMuse
|
||||||
, readFB2
|
, readFB2
|
||||||
, readIpynb
|
, readIpynb
|
||||||
|
, readCSV
|
||||||
-- * Miscellaneous
|
-- * Miscellaneous
|
||||||
, getReader
|
, getReader
|
||||||
, getDefaultExtensions
|
, getDefaultExtensions
|
||||||
|
@ -95,6 +96,7 @@ import Text.Pandoc.Readers.TWiki
|
||||||
import Text.Pandoc.Readers.Txt2Tags
|
import Text.Pandoc.Readers.Txt2Tags
|
||||||
import Text.Pandoc.Readers.Vimwiki
|
import Text.Pandoc.Readers.Vimwiki
|
||||||
import Text.Pandoc.Readers.Man
|
import Text.Pandoc.Readers.Man
|
||||||
|
import Text.Pandoc.Readers.CSV
|
||||||
import qualified Text.Pandoc.UTF8 as UTF8
|
import qualified Text.Pandoc.UTF8 as UTF8
|
||||||
import Text.Parsec.Error
|
import Text.Parsec.Error
|
||||||
|
|
||||||
|
@ -136,6 +138,7 @@ readers = [ ("native" , TextReader readNative)
|
||||||
,("man" , TextReader readMan)
|
,("man" , TextReader readMan)
|
||||||
,("fb2" , TextReader readFB2)
|
,("fb2" , TextReader readFB2)
|
||||||
,("ipynb" , TextReader readIpynb)
|
,("ipynb" , TextReader readIpynb)
|
||||||
|
,("csv" , TextReader readCSV)
|
||||||
]
|
]
|
||||||
|
|
||||||
-- | Retrieve reader, extensions based on formatSpec (format+extensions).
|
-- | Retrieve reader, extensions based on formatSpec (format+extensions).
|
||||||
|
|
108
src/Text/Pandoc/Readers/CSV.hs
Normal file
108
src/Text/Pandoc/Readers/CSV.hs
Normal file
|
@ -0,0 +1,108 @@
|
||||||
|
{-# LANGUAGE NoImplicitPrelude #-}
|
||||||
|
{-# LANGUAGE FlexibleContexts #-}
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
{-# LANGUAGE ScopedTypeVariables #-}
|
||||||
|
{-# LANGUAGE ViewPatterns #-}
|
||||||
|
{- |
|
||||||
|
Module : Text.Pandoc.Readers.RST
|
||||||
|
Copyright : Copyright (C) 2006-2019 John MacFarlane
|
||||||
|
License : GNU GPL, version 2 or above
|
||||||
|
|
||||||
|
Maintainer : John MacFarlane <jgm@berkeley.edu>
|
||||||
|
Stability : alpha
|
||||||
|
Portability : portable
|
||||||
|
|
||||||
|
Conversion from CSV to a 'Pandoc' table.
|
||||||
|
-}
|
||||||
|
module Text.Pandoc.Readers.CSV ( readCSV ) where
|
||||||
|
import Prelude
|
||||||
|
import Data.Text (Text)
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import Text.Pandoc.Definition
|
||||||
|
import qualified Text.Pandoc.Builder as B
|
||||||
|
import Text.Parsec
|
||||||
|
import Text.Parsec.Text (Parser)
|
||||||
|
import Text.Pandoc.Class (PandocMonad)
|
||||||
|
import Text.Pandoc.Shared (crFilter)
|
||||||
|
import Text.Pandoc.Error
|
||||||
|
import Text.Pandoc.Options (ReaderOptions(..))
|
||||||
|
import Control.Monad.Except (throwError)
|
||||||
|
|
||||||
|
readCSV :: PandocMonad m
|
||||||
|
=> ReaderOptions -- ^ Reader options
|
||||||
|
-> Text -- ^ Text to parse (assuming @'\n'@ line endings)
|
||||||
|
-> m Pandoc
|
||||||
|
readCSV opts s = do
|
||||||
|
let columns = readerColumns opts
|
||||||
|
case parse pCSV "input" (crFilter s) of
|
||||||
|
Right (r:rs) -> return $ B.doc $ B.table capt (zip aligns widths) hdrs rows
|
||||||
|
where capt = mempty
|
||||||
|
numcols = length r
|
||||||
|
hdrs = map (B.plain . B.text) r
|
||||||
|
rows = map (map (B.plain . B.text)) rs
|
||||||
|
maximum' [] = 0
|
||||||
|
maximum' xs = maximum xs
|
||||||
|
aligns = replicate numcols AlignDefault
|
||||||
|
widths = replicate numcols 0
|
||||||
|
Right [] -> return $ B.doc mempty
|
||||||
|
Left e -> throwError $ PandocParsecError s e
|
||||||
|
|
||||||
|
{- from RFC 4180
|
||||||
|
|
||||||
|
The ABNF grammar [2] appears as follows:
|
||||||
|
|
||||||
|
file = [header CRLF] record *(CRLF record) [CRLF]
|
||||||
|
|
||||||
|
header = name *(COMMA name)
|
||||||
|
|
||||||
|
record = field *(COMMA field)
|
||||||
|
|
||||||
|
name = field
|
||||||
|
|
||||||
|
field = (escaped / non-escaped)
|
||||||
|
|
||||||
|
escaped = DQUOTE *(TEXTDATA / COMMA / CR / LF / 2DQUOTE) DQUOTE
|
||||||
|
|
||||||
|
non-escaped = *TEXTDATA
|
||||||
|
|
||||||
|
COMMA = %x2C
|
||||||
|
|
||||||
|
CR = %x0D ;as per section 6.1 of RFC 2234 [2]
|
||||||
|
|
||||||
|
DQUOTE = %x22 ;as per section 6.1 of RFC 2234 [2]
|
||||||
|
|
||||||
|
LF = %x0A ;as per section 6.1 of RFC 2234 [2]
|
||||||
|
|
||||||
|
CRLF = CR LF ;as per section 6.1 of RFC 2234 [2]
|
||||||
|
|
||||||
|
TEXTDATA = %x20-21 / %x23-2B / %x2D-7E
|
||||||
|
-}
|
||||||
|
|
||||||
|
pCSV :: Parser [[Text]]
|
||||||
|
pCSV = many pRecord
|
||||||
|
|
||||||
|
pRecord :: Parser [Text]
|
||||||
|
pRecord = do
|
||||||
|
x <- pField
|
||||||
|
xs <- many $ pComma >> pField
|
||||||
|
() <$ newline <|> eof
|
||||||
|
return (x:xs)
|
||||||
|
|
||||||
|
pField :: Parser Text
|
||||||
|
pField = pEscaped <|> pUnescaped
|
||||||
|
|
||||||
|
pComma :: Parser Char
|
||||||
|
pComma = char ','
|
||||||
|
|
||||||
|
pUnescaped :: Parser Text
|
||||||
|
pUnescaped = T.strip . T.pack <$> many1 (noneOf "\n\r\",")
|
||||||
|
|
||||||
|
pEscaped :: Parser Text
|
||||||
|
pEscaped = do
|
||||||
|
char '"'
|
||||||
|
t <- T.pack <$> many (pDoubledQuote <|> noneOf "\"")
|
||||||
|
char '"'
|
||||||
|
return t
|
||||||
|
|
||||||
|
pDoubledQuote :: Parser Char
|
||||||
|
pDoubledQuote = try $ char '"' >> char '"'
|
17
test/command/csv.md
Normal file
17
test/command/csv.md
Normal file
|
@ -0,0 +1,17 @@
|
||||||
|
```
|
||||||
|
% pandoc -f csv -t native
|
||||||
|
Fruit,Price,Quantity
|
||||||
|
Apple,25 cents,33
|
||||||
|
"""Navel"" Orange","35 cents",22
|
||||||
|
^D
|
||||||
|
[Table [] [AlignDefault,AlignDefault,AlignDefault] [0.0,0.0,0.0]
|
||||||
|
[[Plain [Str "Fruit"]]
|
||||||
|
,[Plain [Str "Price"]]
|
||||||
|
,[Plain [Str "Quantity"]]]
|
||||||
|
[[[Plain [Str "Apple"]]
|
||||||
|
,[Plain [Str "25",Space,Str "cents"]]
|
||||||
|
,[Plain [Str "33"]]]
|
||||||
|
,[[Plain [Str "\"Navel\"",Space,Str "Orange"]]
|
||||||
|
,[Plain [Str "35",Space,Str "cents"]]
|
||||||
|
,[Plain [Str "22"]]]]]
|
||||||
|
```
|
Loading…
Reference in a new issue