Add tsv (tab separated values) as an input format.
We us ethe simple spec at <https://www.iana.org/assignments/media-types/text/tab-separated-values>. API change: Text.Pandoc.Readers.CSV now exports `readTSV`. Internal change: In Text.Pandoc.CSV, CSVOptions has changed so that csvQuote takes a Maybe value. Closes #7974.
This commit is contained in:
parent
b1990b0657
commit
16f0316fba
7 changed files with 55 additions and 20 deletions
|
@ -232,6 +232,7 @@ header when requesting a document from a URL:
|
||||||
- `creole` ([Creole 1.0])
|
- `creole` ([Creole 1.0])
|
||||||
- `csljson` ([CSL JSON] bibliography)
|
- `csljson` ([CSL JSON] bibliography)
|
||||||
- `csv` ([CSV] table)
|
- `csv` ([CSV] table)
|
||||||
|
- `tsv` ([TSV] table)
|
||||||
- `docbook` ([DocBook])
|
- `docbook` ([DocBook])
|
||||||
- `docx` ([Word docx])
|
- `docx` ([Word docx])
|
||||||
- `dokuwiki` ([DokuWiki markup])
|
- `dokuwiki` ([DokuWiki markup])
|
||||||
|
@ -484,6 +485,7 @@ header when requesting a document from a URL:
|
||||||
[Haddock markup]: https://www.haskell.org/haddock/doc/html/ch03s08.html
|
[Haddock markup]: https://www.haskell.org/haddock/doc/html/ch03s08.html
|
||||||
[Creole 1.0]: http://www.wikicreole.org/wiki/Creole1.0
|
[Creole 1.0]: http://www.wikicreole.org/wiki/Creole1.0
|
||||||
[CSV]: https://tools.ietf.org/html/rfc4180
|
[CSV]: https://tools.ietf.org/html/rfc4180
|
||||||
|
[TSV]: https://www.iana.org/assignments/media-types/text/tab-separated-values
|
||||||
[roff man]: https://man.cx/groff_man(7)
|
[roff man]: https://man.cx/groff_man(7)
|
||||||
[roff ms]: https://man.cx/groff_ms(7)
|
[roff ms]: https://man.cx/groff_ms(7)
|
||||||
[Haskell]: https://www.haskell.org
|
[Haskell]: https://www.haskell.org
|
||||||
|
|
|
@ -258,7 +258,8 @@ convertWithOpts opts = do
|
||||||
|
|
||||||
let convertTabs = tabFilter (if optPreserveTabs opts ||
|
let convertTabs = tabFilter (if optPreserveTabs opts ||
|
||||||
readerNameBase == "t2t" ||
|
readerNameBase == "t2t" ||
|
||||||
readerNameBase == "man"
|
readerNameBase == "man" ||
|
||||||
|
readerNameBase == "tsv"
|
||||||
then 0
|
then 0
|
||||||
else optTabStop opts)
|
else optTabStop opts)
|
||||||
|
|
||||||
|
|
|
@ -86,6 +86,7 @@ formatFromFilePath x =
|
||||||
".xhtml" -> Just "html"
|
".xhtml" -> Just "html"
|
||||||
".ipynb" -> Just "ipynb"
|
".ipynb" -> Just "ipynb"
|
||||||
".csv" -> Just "csv"
|
".csv" -> Just "csv"
|
||||||
|
".tsv" -> Just "tsv"
|
||||||
".bib" -> Just "biblatex"
|
".bib" -> Just "biblatex"
|
||||||
['.',y] | y `elem` ['1'..'9'] -> Just "man"
|
['.',y] | y `elem` ['1'..'9'] -> Just "man"
|
||||||
_ -> Nothing
|
_ -> Nothing
|
||||||
|
|
|
@ -16,7 +16,7 @@ module Text.Pandoc.CSV (
|
||||||
ParseError
|
ParseError
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Control.Monad (unless, void)
|
import Control.Monad (unless, void, mzero)
|
||||||
import Data.Text (Text)
|
import Data.Text (Text)
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Text.Parsec
|
import Text.Parsec
|
||||||
|
@ -24,7 +24,7 @@ import Text.Parsec.Text (Parser)
|
||||||
|
|
||||||
data CSVOptions = CSVOptions{
|
data CSVOptions = CSVOptions{
|
||||||
csvDelim :: Char
|
csvDelim :: Char
|
||||||
, csvQuote :: Char
|
, csvQuote :: Maybe Char
|
||||||
, csvKeepSpace :: Bool -- treat whitespace following delim as significant
|
, csvKeepSpace :: Bool -- treat whitespace following delim as significant
|
||||||
, csvEscape :: Maybe Char -- default is to double up quote
|
, csvEscape :: Maybe Char -- default is to double up quote
|
||||||
} deriving (Read, Show)
|
} deriving (Read, Show)
|
||||||
|
@ -32,7 +32,7 @@ data CSVOptions = CSVOptions{
|
||||||
defaultCSVOptions :: CSVOptions
|
defaultCSVOptions :: CSVOptions
|
||||||
defaultCSVOptions = CSVOptions{
|
defaultCSVOptions = CSVOptions{
|
||||||
csvDelim = ','
|
csvDelim = ','
|
||||||
, csvQuote = '"'
|
, csvQuote = Just '"'
|
||||||
, csvKeepSpace = False
|
, csvKeepSpace = False
|
||||||
, csvEscape = Nothing }
|
, csvEscape = Nothing }
|
||||||
|
|
||||||
|
@ -53,18 +53,24 @@ pCSVCell :: CSVOptions -> Parser Text
|
||||||
pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
|
pCSVCell opts = pCSVQuotedCell opts <|> pCSVUnquotedCell opts
|
||||||
|
|
||||||
pCSVQuotedCell :: CSVOptions -> Parser Text
|
pCSVQuotedCell :: CSVOptions -> Parser Text
|
||||||
pCSVQuotedCell opts = do
|
pCSVQuotedCell opts =
|
||||||
char (csvQuote opts)
|
case csvQuote opts of
|
||||||
res <- many (satisfy (\c -> c /= csvQuote opts &&
|
Nothing -> mzero
|
||||||
Just c /= csvEscape opts) <|> escaped opts)
|
Just quotechar -> do
|
||||||
char (csvQuote opts)
|
char quotechar
|
||||||
return $ T.pack res
|
res <- many (satisfy (\c -> c /= quotechar &&
|
||||||
|
Just c /= csvEscape opts) <|> escaped opts)
|
||||||
|
char quotechar
|
||||||
|
return $ T.pack res
|
||||||
|
|
||||||
escaped :: CSVOptions -> Parser Char
|
escaped :: CSVOptions -> Parser Char
|
||||||
escaped opts = try $
|
escaped opts =
|
||||||
case csvEscape opts of
|
case csvEscape opts of
|
||||||
Nothing -> char (csvQuote opts) >> char (csvQuote opts)
|
Nothing ->
|
||||||
Just c -> char c >> noneOf "\r\n"
|
case csvQuote opts of
|
||||||
|
Nothing -> mzero
|
||||||
|
Just q -> try $ char q >> char q
|
||||||
|
Just c -> try $ char c >> noneOf "\r\n"
|
||||||
|
|
||||||
pCSVUnquotedCell :: CSVOptions -> Parser Text
|
pCSVUnquotedCell :: CSVOptions -> Parser Text
|
||||||
pCSVUnquotedCell opts = T.pack <$>
|
pCSVUnquotedCell opts = T.pack <$>
|
||||||
|
|
|
@ -52,6 +52,7 @@ module Text.Pandoc.Readers
|
||||||
, readFB2
|
, readFB2
|
||||||
, readIpynb
|
, readIpynb
|
||||||
, readCSV
|
, readCSV
|
||||||
|
, readTSV
|
||||||
, readCslJson
|
, readCslJson
|
||||||
, readBibTeX
|
, readBibTeX
|
||||||
, readBibLaTeX
|
, readBibLaTeX
|
||||||
|
@ -152,6 +153,7 @@ readers = [("native" , TextReader readNative)
|
||||||
,("fb2" , TextReader readFB2)
|
,("fb2" , TextReader readFB2)
|
||||||
,("ipynb" , TextReader readIpynb)
|
,("ipynb" , TextReader readIpynb)
|
||||||
,("csv" , TextReader readCSV)
|
,("csv" , TextReader readCSV)
|
||||||
|
,("tsv" , TextReader readTSV)
|
||||||
,("csljson" , TextReader readCslJson)
|
,("csljson" , TextReader readCslJson)
|
||||||
,("bibtex" , TextReader readBibTeX)
|
,("bibtex" , TextReader readBibTeX)
|
||||||
,("biblatex" , TextReader readBibLaTeX)
|
,("biblatex" , TextReader readBibLaTeX)
|
||||||
|
|
|
@ -10,11 +10,14 @@
|
||||||
Stability : alpha
|
Stability : alpha
|
||||||
Portability : portable
|
Portability : portable
|
||||||
|
|
||||||
Conversion from CSV to a 'Pandoc' table.
|
Conversion from CSV or TSV to a 'Pandoc' table.
|
||||||
-}
|
-}
|
||||||
module Text.Pandoc.Readers.CSV ( readCSV ) where
|
module Text.Pandoc.Readers.CSV (
|
||||||
|
readCSV,
|
||||||
|
readTSV
|
||||||
|
) where
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import Text.Pandoc.CSV (parseCSV, defaultCSVOptions)
|
import Text.Pandoc.CSV (parseCSV, defaultCSVOptions, CSVOptions(..))
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
import qualified Text.Pandoc.Builder as B
|
import qualified Text.Pandoc.Builder as B
|
||||||
import Text.Pandoc.Class (PandocMonad)
|
import Text.Pandoc.Class (PandocMonad)
|
||||||
|
@ -22,14 +25,34 @@ import Text.Pandoc.Error
|
||||||
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
import Text.Pandoc.Sources (ToSources(..), sourcesToText)
|
||||||
import Text.Pandoc.Options (ReaderOptions)
|
import Text.Pandoc.Options (ReaderOptions)
|
||||||
import Control.Monad.Except (throwError)
|
import Control.Monad.Except (throwError)
|
||||||
|
import Data.Text (Text)
|
||||||
|
|
||||||
readCSV :: (PandocMonad m, ToSources a)
|
readCSV :: (PandocMonad m, ToSources a)
|
||||||
=> ReaderOptions -- ^ Reader options
|
=> ReaderOptions -- ^ Reader options
|
||||||
-> a
|
-> a
|
||||||
-> m Pandoc
|
-> m Pandoc
|
||||||
readCSV _opts s = do
|
readCSV _opts s = do
|
||||||
let txt = sourcesToText $ toSources s
|
readCSVWith defaultCSVOptions $ sourcesToText $ toSources s
|
||||||
case parseCSV defaultCSVOptions txt of
|
|
||||||
|
readTSV :: (PandocMonad m, ToSources a)
|
||||||
|
=> ReaderOptions -- ^ Reader options
|
||||||
|
-> a
|
||||||
|
-> m Pandoc
|
||||||
|
readTSV _opts s = do
|
||||||
|
readCSVWith tsvOpts $ sourcesToText $ toSources s
|
||||||
|
where
|
||||||
|
tsvOpts = CSVOptions{
|
||||||
|
csvDelim = '\t',
|
||||||
|
csvQuote = Nothing,
|
||||||
|
csvKeepSpace = False,
|
||||||
|
csvEscape = Nothing }
|
||||||
|
|
||||||
|
readCSVWith :: PandocMonad m
|
||||||
|
=> CSVOptions
|
||||||
|
-> Text
|
||||||
|
-> m Pandoc
|
||||||
|
readCSVWith csvopts txt = do
|
||||||
|
case parseCSV csvopts txt of
|
||||||
Right (r:rs) -> return $ B.doc $ B.table capt
|
Right (r:rs) -> return $ B.doc $ B.table capt
|
||||||
(zip aligns widths)
|
(zip aligns widths)
|
||||||
(TableHead nullAttr hdrs)
|
(TableHead nullAttr hdrs)
|
||||||
|
|
|
@ -845,8 +845,8 @@ csvTableDirective top fields rawcsv = do
|
||||||
_ -> ','
|
_ -> ','
|
||||||
, csvQuote = case trim <$> lookup "quote" fields of
|
, csvQuote = case trim <$> lookup "quote" fields of
|
||||||
Just (T.unpack -> [c])
|
Just (T.unpack -> [c])
|
||||||
-> c
|
-> Just c
|
||||||
_ -> '"'
|
_ -> Just '"'
|
||||||
, csvEscape = case trim <$> lookup "escape" fields of
|
, csvEscape = case trim <$> lookup "escape" fields of
|
||||||
Just (T.unpack -> [c])
|
Just (T.unpack -> [c])
|
||||||
-> Just c
|
-> Just c
|
||||||
|
|
Loading…
Reference in a new issue