a820c1bd1c
Attributes can't be followed by a space. So, _(class)emph_ but _(noclass) emph_ Closes #2984.
669 lines
23 KiB
Haskell
669 lines
23 KiB
Haskell
{-
|
|
Copyright (C) 2010-2015 Paul Rivier <paul*rivier#demotera*com> | tr '*#' '.@'
|
|
and John MacFarlane
|
|
|
|
This program is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 2 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
-}
|
|
|
|
{- |
|
|
Module : Text.Pandoc.Readers.Textile
|
|
Copyright : Copyright (C) 2010-2015 Paul Rivier and John MacFarlane
|
|
License : GNU GPL, version 2 or above
|
|
|
|
Maintainer : Paul Rivier <paul*rivier#demotera*com>
|
|
Stability : alpha
|
|
Portability : portable
|
|
|
|
Conversion from Textile to 'Pandoc' document, based on the spec
|
|
available at http://redcloth.org/textile.
|
|
|
|
Implemented and parsed:
|
|
- Paragraphs
|
|
- Code blocks
|
|
- Lists
|
|
- blockquote
|
|
- Inlines : strong, emph, cite, code, deleted, superscript,
|
|
subscript, links
|
|
- footnotes
|
|
- HTML-specific and CSS-specific attributes on headers
|
|
|
|
Left to be implemented:
|
|
- dimension sign
|
|
- all caps
|
|
- continued blocks (ex bq..)
|
|
|
|
TODO : refactor common patterns across readers :
|
|
- more ...
|
|
|
|
-}
|
|
|
|
|
|
module Text.Pandoc.Readers.Textile ( readTextile) where
|
|
import Text.Pandoc.CSS
|
|
import Text.Pandoc.Definition
|
|
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines)
|
|
import qualified Text.Pandoc.Builder as B
|
|
import Text.Pandoc.Options
|
|
import Text.Pandoc.Parsing
|
|
import Text.Pandoc.Readers.HTML ( htmlTag, isBlockTag, isInlineTag )
|
|
import Text.Pandoc.Shared (trim)
|
|
import Text.Pandoc.Readers.LaTeX ( rawLaTeXInline, rawLaTeXBlock )
|
|
import Text.HTML.TagSoup (parseTags, innerText, fromAttrib, Tag(..))
|
|
import Text.HTML.TagSoup.Match
|
|
import Data.List ( intercalate )
|
|
import Data.Char ( digitToInt, isUpper)
|
|
import Control.Monad ( guard, liftM, when )
|
|
import Text.Pandoc.Compat.Monoid ((<>))
|
|
import Text.Printf
|
|
import Debug.Trace (trace)
|
|
import Text.Pandoc.Error
|
|
|
|
-- | Parse a Textile text and return a Pandoc document.
|
|
readTextile :: ReaderOptions -- ^ Reader options
|
|
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
|
-> Either PandocError Pandoc
|
|
readTextile opts s =
|
|
(readWith parseTextile) def{ stateOptions = opts } (s ++ "\n\n")
|
|
|
|
|
|
-- | Generate a Pandoc ADT from a textile document
|
|
parseTextile :: Parser [Char] ParserState Pandoc
|
|
parseTextile = do
|
|
-- textile allows raw HTML and does smart punctuation by default,
|
|
-- but we do not enable smart punctuation unless it is explicitly
|
|
-- asked for, for better conversion to other light markup formats
|
|
oldOpts <- stateOptions `fmap` getState
|
|
updateState $ \state -> state{ stateOptions =
|
|
oldOpts{ readerParseRaw = True
|
|
, readerOldDashes = True
|
|
} }
|
|
many blankline
|
|
startPos <- getPosition
|
|
-- go through once just to get list of reference keys and notes
|
|
-- docMinusKeys is the raw document with blanks where the keys/notes were...
|
|
let firstPassParser = noteBlock <|> lineClump
|
|
manyTill firstPassParser eof >>= setInput . concat
|
|
setPosition startPos
|
|
st' <- getState
|
|
let reversedNotes = stateNotes st'
|
|
updateState $ \s -> s { stateNotes = reverse reversedNotes }
|
|
-- now parse it for real...
|
|
blocks <- parseBlocks
|
|
return $ Pandoc nullMeta (B.toList blocks) -- FIXME
|
|
|
|
noteMarker :: Parser [Char] ParserState [Char]
|
|
noteMarker = skipMany spaceChar >> string "fn" >> manyTill digit (char '.')
|
|
|
|
noteBlock :: Parser [Char] ParserState [Char]
|
|
noteBlock = try $ do
|
|
startPos <- getPosition
|
|
ref <- noteMarker
|
|
optional blankline
|
|
contents <- liftM unlines $ many1Till anyLine (blanklines <|> noteBlock)
|
|
endPos <- getPosition
|
|
let newnote = (ref, contents ++ "\n")
|
|
st <- getState
|
|
let oldnotes = stateNotes st
|
|
updateState $ \s -> s { stateNotes = newnote : oldnotes }
|
|
-- return blanks so line count isn't affected
|
|
return $ replicate (sourceLine endPos - sourceLine startPos) '\n'
|
|
|
|
-- | Parse document blocks
|
|
parseBlocks :: Parser [Char] ParserState Blocks
|
|
parseBlocks = mconcat <$> manyTill block eof
|
|
|
|
-- | Block parsers list tried in definition order
|
|
blockParsers :: [Parser [Char] ParserState Blocks]
|
|
blockParsers = [ codeBlock
|
|
, header
|
|
, blockQuote
|
|
, hrule
|
|
, commentBlock
|
|
, anyList
|
|
, rawHtmlBlock
|
|
, rawLaTeXBlock'
|
|
, maybeExplicitBlock "table" table
|
|
, maybeExplicitBlock "p" para
|
|
, mempty <$ blanklines
|
|
]
|
|
|
|
-- | Any block in the order of definition of blockParsers
|
|
block :: Parser [Char] ParserState Blocks
|
|
block = do
|
|
res <- choice blockParsers <?> "block"
|
|
pos <- getPosition
|
|
tr <- getOption readerTrace
|
|
when tr $
|
|
trace (printf "line %d: %s" (sourceLine pos)
|
|
(take 60 $ show $ B.toList res)) (return ())
|
|
return res
|
|
|
|
commentBlock :: Parser [Char] ParserState Blocks
|
|
commentBlock = try $ do
|
|
string "###."
|
|
manyTill anyLine blanklines
|
|
return mempty
|
|
|
|
codeBlock :: Parser [Char] ParserState Blocks
|
|
codeBlock = codeBlockBc <|> codeBlockPre
|
|
|
|
codeBlockBc :: Parser [Char] ParserState Blocks
|
|
codeBlockBc = try $ do
|
|
string "bc. "
|
|
contents <- manyTill anyLine blanklines
|
|
return $ B.codeBlock (unlines contents)
|
|
|
|
-- | Code Blocks in Textile are between <pre> and </pre>
|
|
codeBlockPre :: Parser [Char] ParserState Blocks
|
|
codeBlockPre = try $ do
|
|
(t@(TagOpen _ attrs),_) <- htmlTag (tagOpen (=="pre") (const True))
|
|
result' <- (innerText . parseTags) `fmap` -- remove internal tags
|
|
manyTill anyChar (htmlTag (tagClose (=="pre")))
|
|
optional blanklines
|
|
-- drop leading newline if any
|
|
let result'' = case result' of
|
|
'\n':xs -> xs
|
|
_ -> result'
|
|
-- drop trailing newline if any
|
|
let result''' = case reverse result'' of
|
|
'\n':_ -> init result''
|
|
_ -> result''
|
|
let classes = words $ fromAttrib "class" t
|
|
let ident = fromAttrib "id" t
|
|
let kvs = [(k,v) | (k,v) <- attrs, k /= "id" && k /= "class"]
|
|
return $ B.codeBlockWith (ident,classes,kvs) result'''
|
|
|
|
-- | Header of the form "hN. content" with N in 1..6
|
|
header :: Parser [Char] ParserState Blocks
|
|
header = try $ do
|
|
char 'h'
|
|
level <- digitToInt <$> oneOf "123456"
|
|
attr <- attributes
|
|
char '.'
|
|
lookAhead whitespace
|
|
name <- trimInlines . mconcat <$> many inline
|
|
attr' <- registerHeader attr name
|
|
return $ B.headerWith attr' level name
|
|
|
|
-- | Blockquote of the form "bq. content"
|
|
blockQuote :: Parser [Char] ParserState Blocks
|
|
blockQuote = try $ do
|
|
string "bq" >> attributes >> char '.' >> whitespace
|
|
B.blockQuote <$> para
|
|
|
|
-- Horizontal rule
|
|
|
|
hrule :: Parser [Char] st Blocks
|
|
hrule = try $ do
|
|
skipSpaces
|
|
start <- oneOf "-*"
|
|
count 2 (skipSpaces >> char start)
|
|
skipMany (spaceChar <|> char start)
|
|
newline
|
|
optional blanklines
|
|
return B.horizontalRule
|
|
|
|
-- Lists handling
|
|
|
|
-- | Can be a bullet list or an ordered list. This implementation is
|
|
-- strict in the nesting, sublist must start at exactly "parent depth
|
|
-- plus one"
|
|
anyList :: Parser [Char] ParserState Blocks
|
|
anyList = try $ anyListAtDepth 1 <* blanklines
|
|
|
|
-- | This allow one type of list to be nested into an other type,
|
|
-- provided correct nesting
|
|
anyListAtDepth :: Int -> Parser [Char] ParserState Blocks
|
|
anyListAtDepth depth = choice [ bulletListAtDepth depth,
|
|
orderedListAtDepth depth,
|
|
definitionList ]
|
|
|
|
-- | Bullet List of given depth, depth being the number of leading '*'
|
|
bulletListAtDepth :: Int -> Parser [Char] ParserState Blocks
|
|
bulletListAtDepth depth = try $ B.bulletList <$> many1 (bulletListItemAtDepth depth)
|
|
|
|
-- | Bullet List Item of given depth, depth being the number of
|
|
-- leading '*'
|
|
bulletListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
|
|
bulletListItemAtDepth = genericListItemAtDepth '*'
|
|
|
|
-- | Ordered List of given depth, depth being the number of
|
|
-- leading '#'
|
|
orderedListAtDepth :: Int -> Parser [Char] ParserState Blocks
|
|
orderedListAtDepth depth = try $ do
|
|
items <- many1 (orderedListItemAtDepth depth)
|
|
return $ B.orderedList items
|
|
|
|
-- | Ordered List Item of given depth, depth being the number of
|
|
-- leading '#'
|
|
orderedListItemAtDepth :: Int -> Parser [Char] ParserState Blocks
|
|
orderedListItemAtDepth = genericListItemAtDepth '#'
|
|
|
|
-- | Common implementation of list items
|
|
genericListItemAtDepth :: Char -> Int -> Parser [Char] ParserState Blocks
|
|
genericListItemAtDepth c depth = try $ do
|
|
count depth (char c) >> attributes >> whitespace
|
|
p <- mconcat <$> many listInline
|
|
newline
|
|
sublist <- option mempty (anyListAtDepth (depth + 1))
|
|
return $ (B.plain p) <> sublist
|
|
|
|
-- | A definition list is a set of consecutive definition items
|
|
definitionList :: Parser [Char] ParserState Blocks
|
|
definitionList = try $ B.definitionList <$> many1 definitionListItem
|
|
|
|
-- | List start character.
|
|
listStart :: Parser [Char] ParserState ()
|
|
listStart = genericListStart '*'
|
|
<|> () <$ genericListStart '#'
|
|
<|> () <$ definitionListStart
|
|
|
|
genericListStart :: Char -> Parser [Char] st ()
|
|
genericListStart c = () <$ try (many1 (char c) >> whitespace)
|
|
|
|
definitionListStart :: Parser [Char] ParserState Inlines
|
|
definitionListStart = try $ do
|
|
char '-'
|
|
whitespace
|
|
trimInlines . mconcat <$>
|
|
many1Till inline (try (string ":=")) <* optional whitespace
|
|
|
|
listInline :: Parser [Char] ParserState Inlines
|
|
listInline = try (notFollowedBy newline >> inline)
|
|
<|> try (endline <* notFollowedBy listStart)
|
|
|
|
-- | A definition list item in textile begins with '- ', followed by
|
|
-- the term defined, then spaces and ":=". The definition follows, on
|
|
-- the same single line, or spaned on multiple line, after a line
|
|
-- break.
|
|
definitionListItem :: Parser [Char] ParserState (Inlines, [Blocks])
|
|
definitionListItem = try $ do
|
|
term <- definitionListStart
|
|
def' <- multilineDef <|> inlineDef
|
|
return (term, def')
|
|
where inlineDef :: Parser [Char] ParserState [Blocks]
|
|
inlineDef = liftM (\d -> [B.plain d])
|
|
$ optional whitespace >> (trimInlines . mconcat <$> many listInline) <* newline
|
|
multilineDef :: Parser [Char] ParserState [Blocks]
|
|
multilineDef = try $ do
|
|
optional whitespace >> newline
|
|
s <- many1Till anyChar (try (string "=:" >> newline))
|
|
-- this ++ "\n\n" does not look very good
|
|
ds <- parseFromString parseBlocks (s ++ "\n\n")
|
|
return [ds]
|
|
|
|
-- raw content
|
|
|
|
-- | A raw Html Block, optionally followed by blanklines
|
|
rawHtmlBlock :: Parser [Char] ParserState Blocks
|
|
rawHtmlBlock = try $ do
|
|
skipMany spaceChar
|
|
(_,b) <- htmlTag isBlockTag
|
|
optional blanklines
|
|
return $ B.rawBlock "html" b
|
|
|
|
-- | Raw block of LaTeX content
|
|
rawLaTeXBlock' :: Parser [Char] ParserState Blocks
|
|
rawLaTeXBlock' = do
|
|
guardEnabled Ext_raw_tex
|
|
B.rawBlock "latex" <$> (rawLaTeXBlock <* spaces)
|
|
|
|
|
|
-- | In textile, paragraphs are separated by blank lines.
|
|
para :: Parser [Char] ParserState Blocks
|
|
para = B.para . trimInlines . mconcat <$> many1 inline
|
|
|
|
-- Tables
|
|
|
|
-- | A table cell spans until a pipe |
|
|
tableCell :: Bool -> Parser [Char] ParserState Blocks
|
|
tableCell headerCell = try $ do
|
|
char '|'
|
|
when headerCell $ () <$ string "_."
|
|
notFollowedBy blankline
|
|
raw <- trim <$>
|
|
many (noneOf "|\n" <|> try (char '\n' <* notFollowedBy blankline))
|
|
content <- mconcat <$> parseFromString (many inline) raw
|
|
return $ B.plain content
|
|
|
|
-- | A table row is made of many table cells
|
|
tableRow :: Parser [Char] ParserState [Blocks]
|
|
tableRow = many1 (tableCell False) <* char '|' <* newline
|
|
|
|
tableHeader :: Parser [Char] ParserState [Blocks]
|
|
tableHeader = many1 (tableCell True) <* char '|' <* newline
|
|
|
|
-- | A table with an optional header. Current implementation can
|
|
-- handle tables with and without header, but will parse cells
|
|
-- alignment attributes as content.
|
|
table :: Parser [Char] ParserState Blocks
|
|
table = try $ do
|
|
headers <- option mempty $ tableHeader
|
|
rows <- many1 tableRow
|
|
blanklines
|
|
let nbOfCols = max (length headers) (length $ head rows)
|
|
return $ B.table mempty
|
|
(zip (replicate nbOfCols AlignDefault) (replicate nbOfCols 0.0))
|
|
headers
|
|
rows
|
|
|
|
|
|
-- | Blocks like 'p' and 'table' do not need explicit block tag.
|
|
-- However, they can be used to set HTML/CSS attributes when needed.
|
|
maybeExplicitBlock :: String -- ^ block tag name
|
|
-> Parser [Char] ParserState Blocks -- ^ implicit block
|
|
-> Parser [Char] ParserState Blocks
|
|
maybeExplicitBlock name blk = try $ do
|
|
optional $ try $ string name >> attributes >> char '.' >>
|
|
optional whitespace >> optional endline
|
|
blk
|
|
|
|
|
|
|
|
----------
|
|
-- Inlines
|
|
----------
|
|
|
|
|
|
-- | Any inline element
|
|
inline :: Parser [Char] ParserState Inlines
|
|
inline = do
|
|
choice inlineParsers <?> "inline"
|
|
|
|
-- | Inline parsers tried in order
|
|
inlineParsers :: [Parser [Char] ParserState Inlines]
|
|
inlineParsers = [ str
|
|
, whitespace
|
|
, endline
|
|
, code
|
|
, escapedInline
|
|
, inlineMarkup
|
|
, groupedInlineMarkup
|
|
, rawHtmlInline
|
|
, rawLaTeXInline'
|
|
, note
|
|
, link
|
|
, image
|
|
, mark
|
|
, (B.str . (:[])) <$> characterReference
|
|
, smartPunctuation inline
|
|
, symbol
|
|
]
|
|
|
|
-- | Inline markups
|
|
inlineMarkup :: Parser [Char] ParserState Inlines
|
|
inlineMarkup = choice [ simpleInline (string "??") (B.cite [])
|
|
, simpleInline (string "**") B.strong
|
|
, simpleInline (string "__") B.emph
|
|
, simpleInline (char '*') B.strong
|
|
, simpleInline (char '_') B.emph
|
|
, simpleInline (char '+') B.emph -- approximates underline
|
|
, simpleInline (char '-' <* notFollowedBy (char '-')) B.strikeout
|
|
, simpleInline (char '^') B.superscript
|
|
, simpleInline (char '~') B.subscript
|
|
, simpleInline (char '%') id
|
|
]
|
|
|
|
-- | Trademark, registered, copyright
|
|
mark :: Parser [Char] st Inlines
|
|
mark = try $ char '(' >> (try tm <|> try reg <|> copy)
|
|
|
|
reg :: Parser [Char] st Inlines
|
|
reg = do
|
|
oneOf "Rr"
|
|
char ')'
|
|
return $ B.str "\174"
|
|
|
|
tm :: Parser [Char] st Inlines
|
|
tm = do
|
|
oneOf "Tt"
|
|
oneOf "Mm"
|
|
char ')'
|
|
return $ B.str "\8482"
|
|
|
|
copy :: Parser [Char] st Inlines
|
|
copy = do
|
|
oneOf "Cc"
|
|
char ')'
|
|
return $ B.str "\169"
|
|
|
|
note :: Parser [Char] ParserState Inlines
|
|
note = try $ do
|
|
ref <- (char '[' *> many1 digit <* char ']')
|
|
notes <- stateNotes <$> getState
|
|
case lookup ref notes of
|
|
Nothing -> fail "note not found"
|
|
Just raw -> B.note <$> parseFromString parseBlocks raw
|
|
|
|
-- | Special chars
|
|
markupChars :: [Char]
|
|
markupChars = "\\*#_@~-+^|%=[]&"
|
|
|
|
-- | Break strings on following chars. Space tab and newline break for
|
|
-- inlines breaking. Open paren breaks for mark. Quote, dash and dot
|
|
-- break for smart punctuation. Punctuation breaks for regular
|
|
-- punctuation. Double quote breaks for named links. > and < break
|
|
-- for inline html.
|
|
stringBreakers :: [Char]
|
|
stringBreakers = " \t\n\r.,\"'?!;:<>«»„“”‚‘’()[]"
|
|
|
|
wordBoundaries :: [Char]
|
|
wordBoundaries = markupChars ++ stringBreakers
|
|
|
|
-- | Parse a hyphened sequence of words
|
|
hyphenedWords :: Parser [Char] ParserState String
|
|
hyphenedWords = do
|
|
x <- wordChunk
|
|
xs <- many (try $ char '-' >> wordChunk)
|
|
return $ intercalate "-" (x:xs)
|
|
|
|
wordChunk :: Parser [Char] ParserState String
|
|
wordChunk = try $ do
|
|
hd <- noneOf wordBoundaries
|
|
tl <- many ( (noneOf wordBoundaries) <|>
|
|
try (notFollowedBy' note *> oneOf markupChars
|
|
<* lookAhead (noneOf wordBoundaries) ) )
|
|
return $ hd:tl
|
|
|
|
-- | Any string
|
|
str :: Parser [Char] ParserState Inlines
|
|
str = do
|
|
baseStr <- hyphenedWords
|
|
-- RedCloth compliance : if parsed word is uppercase and immediatly
|
|
-- followed by parens, parens content is unconditionally word acronym
|
|
fullStr <- option baseStr $ try $ do
|
|
guard $ all isUpper baseStr
|
|
acro <- enclosed (char '(') (char ')') anyChar'
|
|
return $ concat [baseStr, " (", acro, ")"]
|
|
updateLastStrPos
|
|
return $ B.str fullStr
|
|
|
|
-- | Some number of space chars
|
|
whitespace :: Parser [Char] st Inlines
|
|
whitespace = many1 spaceChar >> return B.space <?> "whitespace"
|
|
|
|
-- | In Textile, an isolated endline character is a line break
|
|
endline :: Parser [Char] ParserState Inlines
|
|
endline = try $ do
|
|
newline
|
|
notFollowedBy blankline
|
|
notFollowedBy listStart
|
|
notFollowedBy rawHtmlBlock
|
|
return B.linebreak
|
|
|
|
rawHtmlInline :: Parser [Char] ParserState Inlines
|
|
rawHtmlInline = B.rawInline "html" . snd <$> htmlTag isInlineTag
|
|
|
|
-- | Raw LaTeX Inline
|
|
rawLaTeXInline' :: Parser [Char] ParserState Inlines
|
|
rawLaTeXInline' = try $ do
|
|
guardEnabled Ext_raw_tex
|
|
B.singleton <$> rawLaTeXInline
|
|
|
|
-- | Textile standard link syntax is "label":target. But we
|
|
-- can also have ["label":target].
|
|
link :: Parser [Char] ParserState Inlines
|
|
link = try $ do
|
|
bracketed <- (True <$ char '[') <|> return False
|
|
char '"' *> notFollowedBy (oneOf " \t\n\r")
|
|
attr <- attributes
|
|
name <- trimInlines . mconcat <$>
|
|
withQuoteContext InDoubleQuote (many1Till inline (char '"'))
|
|
char ':'
|
|
let stop = if bracketed
|
|
then char ']'
|
|
else lookAhead $ space <|>
|
|
try (oneOf "!.,;:" *> (space <|> newline))
|
|
url <- manyTill nonspaceChar stop
|
|
let name' = if B.toList name == [Str "$"] then B.str url else name
|
|
return $ if attr == nullAttr
|
|
then B.link url "" name'
|
|
else B.spanWith attr $ B.link url "" name'
|
|
|
|
-- | image embedding
|
|
image :: Parser [Char] ParserState Inlines
|
|
image = try $ do
|
|
char '!' >> notFollowedBy space
|
|
(ident, cls, kvs) <- attributes
|
|
let attr = case lookup "style" kvs of
|
|
Just stls -> (ident, cls, pickStylesToKVs ["width", "height"] stls)
|
|
Nothing -> (ident, cls, kvs)
|
|
src <- manyTill anyChar' (lookAhead $ oneOf "!(")
|
|
alt <- option "" (try $ (char '(' >> manyTill anyChar' (char ')')))
|
|
char '!'
|
|
return $ B.imageWith attr src alt (B.str alt)
|
|
|
|
escapedInline :: Parser [Char] ParserState Inlines
|
|
escapedInline = escapedEqs <|> escapedTag
|
|
|
|
escapedEqs :: Parser [Char] ParserState Inlines
|
|
escapedEqs = B.str <$>
|
|
(try $ string "==" *> manyTill anyChar' (try $ string "=="))
|
|
|
|
-- | literal text escaped btw <notextile> tags
|
|
escapedTag :: Parser [Char] ParserState Inlines
|
|
escapedTag = B.str <$>
|
|
(try $ string "<notextile>" *>
|
|
manyTill anyChar' (try $ string "</notextile>"))
|
|
|
|
-- | Any special symbol defined in wordBoundaries
|
|
symbol :: Parser [Char] ParserState Inlines
|
|
symbol = B.str . singleton <$> (notFollowedBy newline *>
|
|
notFollowedBy rawHtmlBlock *>
|
|
oneOf wordBoundaries)
|
|
|
|
-- | Inline code
|
|
code :: Parser [Char] ParserState Inlines
|
|
code = code1 <|> code2
|
|
|
|
-- any character except a newline before a blank line
|
|
anyChar' :: Parser [Char] ParserState Char
|
|
anyChar' =
|
|
satisfy (/='\n') <|> (try $ char '\n' <* notFollowedBy blankline)
|
|
|
|
code1 :: Parser [Char] ParserState Inlines
|
|
code1 = B.code <$> surrounded (char '@') anyChar'
|
|
|
|
code2 :: Parser [Char] ParserState Inlines
|
|
code2 = do
|
|
htmlTag (tagOpen (=="tt") null)
|
|
B.code <$> manyTill anyChar' (try $ htmlTag $ tagClose (=="tt"))
|
|
|
|
-- | Html / CSS attributes
|
|
attributes :: Parser [Char] ParserState Attr
|
|
attributes = (foldl (flip ($)) ("",[],[])) <$>
|
|
try (do special <- option id specialAttribute
|
|
attrs <- many attribute
|
|
return (special : attrs))
|
|
|
|
specialAttribute :: Parser [Char] ParserState (Attr -> Attr)
|
|
specialAttribute = do
|
|
alignStr <- ("center" <$ char '=') <|>
|
|
("justify" <$ try (string "<>")) <|>
|
|
("right" <$ char '>') <|>
|
|
("left" <$ char '<')
|
|
notFollowedBy spaceChar
|
|
return $ addStyle ("text-align:" ++ alignStr)
|
|
|
|
attribute :: Parser [Char] ParserState (Attr -> Attr)
|
|
attribute = try $
|
|
(classIdAttr <|> styleAttr <|> langAttr) <* notFollowedBy spaceChar
|
|
|
|
classIdAttr :: Parser [Char] ParserState (Attr -> Attr)
|
|
classIdAttr = try $ do -- (class class #id)
|
|
char '('
|
|
ws <- words `fmap` manyTill anyChar' (char ')')
|
|
case reverse ws of
|
|
[] -> return $ \(_,_,keyvals) -> ("",[],keyvals)
|
|
(('#':ident'):classes') -> return $ \(_,_,keyvals) ->
|
|
(ident',classes',keyvals)
|
|
classes' -> return $ \(_,_,keyvals) ->
|
|
("",classes',keyvals)
|
|
|
|
styleAttr :: Parser [Char] ParserState (Attr -> Attr)
|
|
styleAttr = do
|
|
style <- try $ enclosed (char '{') (char '}') anyChar'
|
|
return $ addStyle style
|
|
|
|
addStyle :: String -> Attr -> Attr
|
|
addStyle style (id',classes,keyvals) =
|
|
(id',classes,keyvals')
|
|
where keyvals' = ("style", style') : [(k,v) | (k,v) <- keyvals, k /= "style"]
|
|
style' = style ++ ";" ++ concat [v | ("style",v) <- keyvals]
|
|
|
|
langAttr :: Parser [Char] ParserState (Attr -> Attr)
|
|
langAttr = do
|
|
lang <- try $ enclosed (char '[') (char ']') alphaNum
|
|
return $ \(id',classes,keyvals) -> (id',classes,("lang",lang):keyvals)
|
|
|
|
-- | Parses material surrounded by a parser.
|
|
surrounded :: Parser [Char] st t -- ^ surrounding parser
|
|
-> Parser [Char] st a -- ^ content parser (to be used repeatedly)
|
|
-> Parser [Char] st [a]
|
|
surrounded border =
|
|
enclosed (border *> notFollowedBy (oneOf " \t\n\r")) (try border)
|
|
|
|
simpleInline :: Parser [Char] ParserState t -- ^ surrounding parser
|
|
-> (Inlines -> Inlines) -- ^ Inline constructor
|
|
-> Parser [Char] ParserState Inlines -- ^ content parser (to be used repeatedly)
|
|
simpleInline border construct = try $ do
|
|
st <- getState
|
|
pos <- getPosition
|
|
let afterString = stateLastStrPos st == Just pos
|
|
guard $ not afterString
|
|
border *> notFollowedBy (oneOf " \t\n\r")
|
|
attr <- attributes
|
|
body <- trimInlines . mconcat <$>
|
|
withQuoteContext InSingleQuote
|
|
(manyTill (notFollowedBy newline >> inline)
|
|
(try border <* notFollowedBy alphaNum))
|
|
return $ construct $
|
|
if attr == nullAttr
|
|
then body
|
|
else B.spanWith attr body
|
|
|
|
groupedInlineMarkup :: Parser [Char] ParserState Inlines
|
|
groupedInlineMarkup = try $ do
|
|
char '['
|
|
sp1 <- option mempty $ B.space <$ whitespace
|
|
result <- withQuoteContext InSingleQuote inlineMarkup
|
|
sp2 <- option mempty $ B.space <$ whitespace
|
|
char ']'
|
|
return $ sp1 <> result <> sp2
|
|
|
|
-- | Create a singleton list
|
|
singleton :: a -> [a]
|
|
singleton x = [x]
|
|
|