Merge pull request #1226 from tarleb/org-emphasis-reader
Org reader: Precise rules for the recognition of markup
This commit is contained in:
commit
54e33a132b
2 changed files with 284 additions and 123 deletions
|
@ -32,11 +32,12 @@ import qualified Text.Pandoc.Builder as B
|
||||||
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..))
|
import Text.Pandoc.Builder (Inlines, Blocks, trimInlines, (<>), HasMeta(..))
|
||||||
import Text.Pandoc.Definition
|
import Text.Pandoc.Definition
|
||||||
import Text.Pandoc.Options
|
import Text.Pandoc.Options
|
||||||
import Text.Pandoc.Parsing hiding (orderedListMarker, updateLastStrPos)
|
import qualified Text.Pandoc.Parsing as P
|
||||||
|
import Text.Pandoc.Parsing hiding (newline, orderedListMarker, updateLastStrPos)
|
||||||
import Text.Pandoc.Shared (compactify')
|
import Text.Pandoc.Shared (compactify')
|
||||||
|
|
||||||
import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
|
import Control.Applicative (pure, (<$>), (<$), (<*>), (<*), (*>), (<**>))
|
||||||
import Control.Monad (guard, mzero)
|
import Control.Monad (guard, when)
|
||||||
import Data.Char (toLower)
|
import Data.Char (toLower)
|
||||||
import Data.Default
|
import Data.Default
|
||||||
import Data.List (foldl', isPrefixOf, isSuffixOf)
|
import Data.List (foldl', isPrefixOf, isSuffixOf)
|
||||||
|
@ -47,49 +48,100 @@ import Data.Monoid (mconcat, mempty, mappend)
|
||||||
readOrg :: ReaderOptions -- ^ Reader options
|
readOrg :: ReaderOptions -- ^ Reader options
|
||||||
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
||||||
-> Pandoc
|
-> Pandoc
|
||||||
readOrg opts s = (readWith parseOrg) def{ orgOptions = opts } (s ++ "\n\n")
|
readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n")
|
||||||
|
|
||||||
type OrgParser = Parser [Char] OrgParserState
|
type OrgParser = Parser [Char] OrgParserState
|
||||||
|
|
||||||
|
parseOrg:: OrgParser Pandoc
|
||||||
|
parseOrg = do
|
||||||
|
blocks' <- B.toList <$> parseBlocks
|
||||||
|
st <- getState
|
||||||
|
let meta = orgStateMeta st
|
||||||
|
return $ Pandoc meta $ filter (/= Null) blocks'
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Parser State for Org
|
||||||
|
--
|
||||||
|
|
||||||
-- | Org-mode parser state
|
-- | Org-mode parser state
|
||||||
data OrgParserState = OrgParserState
|
data OrgParserState = OrgParserState
|
||||||
{ orgOptions :: ReaderOptions
|
{ orgStateOptions :: ReaderOptions
|
||||||
, orgInlineCharStack :: [Char]
|
, orgStateEmphasisCharStack :: [Char]
|
||||||
, orgLastStrPos :: Maybe SourcePos
|
, orgStateEmphasisNewlines :: Maybe Int
|
||||||
, orgMeta :: Meta
|
, orgStateLastForbiddenCharPos :: Maybe SourcePos
|
||||||
|
, orgStateLastPreCharPos :: Maybe SourcePos
|
||||||
|
, orgStateLastStrPos :: Maybe SourcePos
|
||||||
|
, orgStateMeta :: Meta
|
||||||
} deriving (Show)
|
} deriving (Show)
|
||||||
|
|
||||||
instance HasReaderOptions OrgParserState where
|
instance HasReaderOptions OrgParserState where
|
||||||
extractReaderOptions = orgOptions
|
extractReaderOptions = orgStateOptions
|
||||||
|
|
||||||
instance HasMeta OrgParserState where
|
instance HasMeta OrgParserState where
|
||||||
setMeta field val st =
|
setMeta field val st =
|
||||||
st{ orgMeta = setMeta field val $ orgMeta st }
|
st{ orgStateMeta = setMeta field val $ orgStateMeta st }
|
||||||
deleteMeta field st =
|
deleteMeta field st =
|
||||||
st{ orgMeta = deleteMeta field $ orgMeta st }
|
st{ orgStateMeta = deleteMeta field $ orgStateMeta st }
|
||||||
|
|
||||||
instance Default OrgParserState where
|
instance Default OrgParserState where
|
||||||
def = defaultOrgParserState
|
def = defaultOrgParserState
|
||||||
|
|
||||||
defaultOrgParserState :: OrgParserState
|
defaultOrgParserState :: OrgParserState
|
||||||
defaultOrgParserState = OrgParserState
|
defaultOrgParserState = OrgParserState
|
||||||
{ orgOptions = def
|
{ orgStateOptions = def
|
||||||
, orgInlineCharStack = []
|
, orgStateEmphasisCharStack = []
|
||||||
, orgLastStrPos = Nothing
|
, orgStateEmphasisNewlines = Nothing
|
||||||
, orgMeta = nullMeta
|
, orgStateLastForbiddenCharPos = Nothing
|
||||||
|
, orgStateLastPreCharPos = Nothing
|
||||||
|
, orgStateLastStrPos = Nothing
|
||||||
|
, orgStateMeta = nullMeta
|
||||||
}
|
}
|
||||||
|
|
||||||
updateLastStrPos :: OrgParser ()
|
updateLastStrPos :: OrgParser ()
|
||||||
updateLastStrPos = getPosition >>= \p ->
|
updateLastStrPos = getPosition >>= \p ->
|
||||||
updateState $ \s -> s{ orgLastStrPos = Just p }
|
updateState $ \s -> s{ orgStateLastStrPos = Just p }
|
||||||
|
|
||||||
|
updateLastForbiddenCharPos :: OrgParser ()
|
||||||
|
updateLastForbiddenCharPos = getPosition >>= \p ->
|
||||||
|
updateState $ \s -> s{ orgStateLastForbiddenCharPos = Just p}
|
||||||
|
|
||||||
parseOrg:: OrgParser Pandoc
|
updateLastPreCharPos :: OrgParser ()
|
||||||
parseOrg = do
|
updateLastPreCharPos = getPosition >>= \p ->
|
||||||
blocks' <- B.toList <$> parseBlocks
|
updateState $ \s -> s{ orgStateLastPreCharPos = Just p}
|
||||||
|
|
||||||
|
pushToInlineCharStack :: Char -> OrgParser ()
|
||||||
|
pushToInlineCharStack c = updateState $ \st ->
|
||||||
|
st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) }
|
||||||
|
|
||||||
|
popInlineCharStack :: OrgParser ()
|
||||||
|
popInlineCharStack = updateState $ \st ->
|
||||||
|
st { orgStateEmphasisCharStack = drop 1 . orgStateEmphasisCharStack $ st }
|
||||||
|
|
||||||
|
surroundingEmphasisChar :: OrgParser [Char]
|
||||||
|
surroundingEmphasisChar = take 1 . drop 1 . orgStateEmphasisCharStack <$> getState
|
||||||
|
|
||||||
|
startEmphasisNewlinesCounting :: Int -> OrgParser ()
|
||||||
|
startEmphasisNewlinesCounting maxNewlines = updateState $ \s ->
|
||||||
|
s { orgStateEmphasisNewlines = Just maxNewlines }
|
||||||
|
|
||||||
|
decEmphasisNewlinesCount :: OrgParser ()
|
||||||
|
decEmphasisNewlinesCount = updateState $ \s ->
|
||||||
|
s{ orgStateEmphasisNewlines = (\n -> n - 1) <$> orgStateEmphasisNewlines s }
|
||||||
|
|
||||||
|
newlinesCountWithinLimits :: OrgParser Bool
|
||||||
|
newlinesCountWithinLimits = do
|
||||||
st <- getState
|
st <- getState
|
||||||
let meta = orgMeta st
|
return $ ((< 0) <$> orgStateEmphasisNewlines st) /= Just True
|
||||||
return $ Pandoc meta $ filter (/= Null) blocks'
|
|
||||||
|
resetEmphasisNewlines :: OrgParser ()
|
||||||
|
resetEmphasisNewlines = updateState $ \s ->
|
||||||
|
s{ orgStateEmphasisNewlines = Nothing }
|
||||||
|
|
||||||
|
newline :: OrgParser Char
|
||||||
|
newline =
|
||||||
|
P.newline
|
||||||
|
<* updateLastPreCharPos
|
||||||
|
<* updateLastForbiddenCharPos
|
||||||
|
|
||||||
--
|
--
|
||||||
-- parsing blocks
|
-- parsing blocks
|
||||||
|
@ -218,7 +270,7 @@ commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
|
||||||
declarationLine :: OrgParser Blocks
|
declarationLine :: OrgParser Blocks
|
||||||
declarationLine = try $ do
|
declarationLine = try $ do
|
||||||
meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
|
meta' <- B.setMeta <$> metaKey <*> metaValue <*> pure nullMeta
|
||||||
updateState $ \st -> st { orgMeta = orgMeta st <> meta' }
|
updateState $ \st -> st { orgStateMeta = orgStateMeta st <> meta' }
|
||||||
return mempty
|
return mempty
|
||||||
|
|
||||||
metaValue :: OrgParser MetaValue
|
metaValue :: OrgParser MetaValue
|
||||||
|
@ -449,22 +501,24 @@ anyLineNewline = (++ "\n") <$> anyLine
|
||||||
--
|
--
|
||||||
|
|
||||||
inline :: OrgParser Inlines
|
inline :: OrgParser Inlines
|
||||||
inline = choice inlineParsers <?> "inline"
|
inline =
|
||||||
where inlineParsers = [ whitespace
|
choice [ whitespace
|
||||||
, link
|
, link
|
||||||
, str
|
, str
|
||||||
, endline
|
, endline
|
||||||
, emph
|
, emph
|
||||||
, strong
|
, strong
|
||||||
, strikeout
|
, strikeout
|
||||||
, underline
|
, underline
|
||||||
, code
|
, code
|
||||||
, math
|
, math
|
||||||
, verbatim
|
, verbatim
|
||||||
, subscript
|
, subscript
|
||||||
, superscript
|
, superscript
|
||||||
, symbol
|
, symbol
|
||||||
]
|
] <* (guard =<< newlinesCountWithinLimits)
|
||||||
|
<?> "inline"
|
||||||
|
|
||||||
|
|
||||||
-- treat these as potentially non-text when parsing inline:
|
-- treat these as potentially non-text when parsing inline:
|
||||||
specialChars :: [Char]
|
specialChars :: [Char]
|
||||||
|
@ -472,7 +526,10 @@ specialChars = "\"$'()*+-./:<=>[\\]^_{|}~"
|
||||||
|
|
||||||
|
|
||||||
whitespace :: OrgParser Inlines
|
whitespace :: OrgParser Inlines
|
||||||
whitespace = B.space <$ skipMany1 spaceChar <?> "whitespace"
|
whitespace = B.space <$ skipMany1 spaceChar
|
||||||
|
<* updateLastPreCharPos
|
||||||
|
<* updateLastForbiddenCharPos
|
||||||
|
<?> "whitespace"
|
||||||
|
|
||||||
str :: OrgParser Inlines
|
str :: OrgParser Inlines
|
||||||
str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
|
str = B.str <$> many1 (noneOf $ specialChars ++ "\n\r ")
|
||||||
|
@ -492,6 +549,9 @@ endline = try $ do
|
||||||
notFollowedBy' commentLineStart
|
notFollowedBy' commentLineStart
|
||||||
notFollowedBy' bulletListStart
|
notFollowedBy' bulletListStart
|
||||||
notFollowedBy' orderedListStart
|
notFollowedBy' orderedListStart
|
||||||
|
decEmphasisNewlinesCount
|
||||||
|
guard =<< newlinesCountWithinLimits
|
||||||
|
updateLastPreCharPos
|
||||||
return B.space
|
return B.space
|
||||||
|
|
||||||
link :: OrgParser Inlines
|
link :: OrgParser Inlines
|
||||||
|
@ -500,42 +560,54 @@ link = explicitOrImageLink <|> selflinkOrImage <?> "link"
|
||||||
explicitOrImageLink :: OrgParser Inlines
|
explicitOrImageLink :: OrgParser Inlines
|
||||||
explicitOrImageLink = try $ do
|
explicitOrImageLink = try $ do
|
||||||
char '['
|
char '['
|
||||||
src <- enclosedRaw (char '[') (char ']')
|
src <- linkTarget
|
||||||
title <- enclosedRaw (char '[') (char ']')
|
title <- enclosedRaw (char '[') (char ']')
|
||||||
title' <- parseFromString (mconcat <$> many inline) title
|
title' <- parseFromString (mconcat <$> many inline) title
|
||||||
char ']'
|
char ']'
|
||||||
return $ if (isImage src) && (isImage title)
|
return $ if (isImageFilename src) && (isImageFilename title)
|
||||||
then B.link src "" (B.image title "" "")
|
then B.link src "" (B.image title "" "")
|
||||||
else B.link src "" title'
|
else B.link src "" title'
|
||||||
|
|
||||||
selflinkOrImage :: OrgParser Inlines
|
selflinkOrImage :: OrgParser Inlines
|
||||||
selflinkOrImage = try $ do
|
selflinkOrImage = try $ do
|
||||||
src <- enclosedRaw (string "[[") (string "]]")
|
src <- (char '[') *> linkTarget <* char ']'
|
||||||
return $ if isImage src
|
return $ if isImageFilename src
|
||||||
then B.image src "" ""
|
then B.image src "" ""
|
||||||
else B.link src "" (B.str src)
|
else B.link src "" (B.str src)
|
||||||
|
|
||||||
|
linkTarget :: OrgParser String
|
||||||
|
linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r]")
|
||||||
|
|
||||||
|
isImageFilename :: String -> Bool
|
||||||
|
isImageFilename filename =
|
||||||
|
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
|
||||||
|
any (\x -> (x++":") `isPrefixOf` filename) protocols ||
|
||||||
|
':' `notElem` filename
|
||||||
|
where
|
||||||
|
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
|
||||||
|
protocols = [ "file", "http", "https" ]
|
||||||
|
|
||||||
emph :: OrgParser Inlines
|
emph :: OrgParser Inlines
|
||||||
emph = B.emph <$> inlinesEnclosedBy '/'
|
emph = B.emph <$> emphasisBetween '/'
|
||||||
|
|
||||||
strong :: OrgParser Inlines
|
strong :: OrgParser Inlines
|
||||||
strong = B.strong <$> inlinesEnclosedBy '*'
|
strong = B.strong <$> emphasisBetween '*'
|
||||||
|
|
||||||
strikeout :: OrgParser Inlines
|
strikeout :: OrgParser Inlines
|
||||||
strikeout = B.strikeout <$> inlinesEnclosedBy '+'
|
strikeout = B.strikeout <$> emphasisBetween '+'
|
||||||
|
|
||||||
-- There is no underline, so we use strong instead.
|
-- There is no underline, so we use strong instead.
|
||||||
underline :: OrgParser Inlines
|
underline :: OrgParser Inlines
|
||||||
underline = B.strong <$> inlinesEnclosedBy '_'
|
underline = B.strong <$> emphasisBetween '_'
|
||||||
|
|
||||||
code :: OrgParser Inlines
|
code :: OrgParser Inlines
|
||||||
code = B.code <$> rawEnclosedBy '='
|
code = B.code <$> verbatimBetween '='
|
||||||
|
|
||||||
math :: OrgParser Inlines
|
|
||||||
math = B.math <$> rawEnclosedBy '$'
|
|
||||||
|
|
||||||
verbatim :: OrgParser Inlines
|
verbatim :: OrgParser Inlines
|
||||||
verbatim = B.rawInline "" <$> rawEnclosedBy '~'
|
verbatim = B.rawInline "" <$> verbatimBetween '~'
|
||||||
|
|
||||||
|
math :: OrgParser Inlines
|
||||||
|
math = B.math <$> mathStringBetween '$'
|
||||||
|
|
||||||
subscript :: OrgParser Inlines
|
subscript :: OrgParser Inlines
|
||||||
subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces)
|
subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces)
|
||||||
|
@ -550,7 +622,72 @@ maybeGroupedByBraces = try $
|
||||||
]
|
]
|
||||||
|
|
||||||
symbol :: OrgParser Inlines
|
symbol :: OrgParser Inlines
|
||||||
symbol = B.str . (: "") <$> oneOf specialChars
|
symbol = B.str . (: "") <$> (oneOf specialChars >>= updatePositions)
|
||||||
|
where updatePositions c
|
||||||
|
| c `elem` emphasisPreChars = c <$ updateLastPreCharPos
|
||||||
|
| c `elem` emphasisForbiddenBorderChars = c <$ updateLastForbiddenCharPos
|
||||||
|
| otherwise = return c
|
||||||
|
|
||||||
|
emphasisBetween :: Char
|
||||||
|
-> OrgParser Inlines
|
||||||
|
emphasisBetween c = try $ do
|
||||||
|
startEmphasisNewlinesCounting emphasisAllowedNewlines
|
||||||
|
res <- enclosedInlines (emphasisStart c) (emphasisEnd c)
|
||||||
|
isTopLevelEmphasis <- null . orgStateEmphasisCharStack <$> getState
|
||||||
|
when isTopLevelEmphasis
|
||||||
|
resetEmphasisNewlines
|
||||||
|
return res
|
||||||
|
|
||||||
|
verbatimBetween :: Char
|
||||||
|
-> OrgParser String
|
||||||
|
verbatimBetween c = try $
|
||||||
|
emphasisStart c *>
|
||||||
|
many1TillNOrLessNewlines 1 (noneOf "\n\r") (emphasisEnd c)
|
||||||
|
|
||||||
|
-- | Parses a raw string delimited by @c@ using Org's math rules
|
||||||
|
mathStringBetween :: Char
|
||||||
|
-> OrgParser String
|
||||||
|
mathStringBetween c = try $ do
|
||||||
|
mathStart c
|
||||||
|
body <- many1TillNOrLessNewlines mathAllowedNewlines
|
||||||
|
(noneOf (c:"\n\r"))
|
||||||
|
(lookAhead $ mathEnd c)
|
||||||
|
final <- mathEnd c
|
||||||
|
return $ body ++ [final]
|
||||||
|
|
||||||
|
-- | Parses the start (opening character) of emphasis
|
||||||
|
emphasisStart :: Char -> OrgParser Char
|
||||||
|
emphasisStart c = try $ do
|
||||||
|
guard =<< afterEmphasisPreChar
|
||||||
|
guard =<< notAfterString
|
||||||
|
char c
|
||||||
|
lookAhead (noneOf emphasisForbiddenBorderChars)
|
||||||
|
pushToInlineCharStack c
|
||||||
|
return c
|
||||||
|
|
||||||
|
-- | Parses the closing character of emphasis
|
||||||
|
emphasisEnd :: Char -> OrgParser Char
|
||||||
|
emphasisEnd c = try $ do
|
||||||
|
guard =<< notAfterForbiddenBorderChar
|
||||||
|
char c
|
||||||
|
eof <|> lookAhead (surroundingEmphasisChar >>= \x ->
|
||||||
|
oneOf (x ++ emphasisPostChars))
|
||||||
|
*> return ()
|
||||||
|
updateLastStrPos
|
||||||
|
popInlineCharStack
|
||||||
|
return c
|
||||||
|
|
||||||
|
mathStart :: Char -> OrgParser Char
|
||||||
|
mathStart c = try $ do
|
||||||
|
char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
|
||||||
|
|
||||||
|
mathEnd :: Char -> OrgParser Char
|
||||||
|
mathEnd c = try $ do
|
||||||
|
res <- noneOf (c:mathForbiddenBorderChars)
|
||||||
|
char c
|
||||||
|
eof <|> (lookAhead $ oneOf mathPostChars *> pure ())
|
||||||
|
return res
|
||||||
|
|
||||||
|
|
||||||
enclosedInlines :: OrgParser a
|
enclosedInlines :: OrgParser a
|
||||||
-> OrgParser b
|
-> OrgParser b
|
||||||
|
@ -558,16 +695,6 @@ enclosedInlines :: OrgParser a
|
||||||
enclosedInlines start end = try $
|
enclosedInlines start end = try $
|
||||||
trimInlines . mconcat <$> enclosed start end inline
|
trimInlines . mconcat <$> enclosed start end inline
|
||||||
|
|
||||||
-- FIXME: This is a hack
|
|
||||||
inlinesEnclosedBy :: Char
|
|
||||||
-> OrgParser Inlines
|
|
||||||
inlinesEnclosedBy c = try $ do
|
|
||||||
updateState $ \st -> st { orgInlineCharStack = c:(orgInlineCharStack st) }
|
|
||||||
res <- enclosedInlines (atStart (char c) <* endsOnThisOrNextLine c)
|
|
||||||
(atEnd $ char c)
|
|
||||||
updateState $ \st -> st { orgInlineCharStack = drop 1 . orgInlineCharStack $ st }
|
|
||||||
return res
|
|
||||||
|
|
||||||
enclosedRaw :: OrgParser a
|
enclosedRaw :: OrgParser a
|
||||||
-> OrgParser b
|
-> OrgParser b
|
||||||
-> OrgParser String
|
-> OrgParser String
|
||||||
|
@ -577,63 +704,76 @@ enclosedRaw start end = try $
|
||||||
spanningTwoLines = try $
|
spanningTwoLines = try $
|
||||||
anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
|
anyLine >>= \f -> mappend (f <> " ") <$> onSingleLine
|
||||||
|
|
||||||
rawEnclosedBy :: Char
|
-- | Like many1Till, but parses at most @n+1@ lines. @p@ must not consume
|
||||||
-> OrgParser String
|
-- newlines.
|
||||||
rawEnclosedBy c = enclosedRaw (atStart $ char c) (atEnd $ char c)
|
many1TillNOrLessNewlines :: Int
|
||||||
|
-> OrgParser Char
|
||||||
-- succeeds only if we're not right after a str (ie. in middle of word)
|
-> OrgParser a
|
||||||
atStart :: OrgParser a -> OrgParser a
|
-> OrgParser String
|
||||||
atStart p = do
|
many1TillNOrLessNewlines n p end = try $
|
||||||
guard =<< not <$> isRightAfterString
|
nMoreLines (Just n) mempty >>= oneOrMore
|
||||||
p
|
|
||||||
|
|
||||||
-- | succeeds only if we're at the end of a word
|
|
||||||
atEnd :: OrgParser a -> OrgParser a
|
|
||||||
atEnd p = try $ do
|
|
||||||
p <* lookingAtEndOfWord
|
|
||||||
where lookingAtEndOfWord =
|
|
||||||
eof <|> const (return ()) =<< lookAhead . oneOf =<< postWordChars
|
|
||||||
|
|
||||||
isRightAfterString :: OrgParser Bool
|
|
||||||
isRightAfterString = do
|
|
||||||
pos <- getPosition
|
|
||||||
st <- getState
|
|
||||||
-- the position `Nothing` isn't after a String, either, hence the double
|
|
||||||
-- negation
|
|
||||||
return $ not $ orgLastStrPos st /= Just pos
|
|
||||||
|
|
||||||
postWordChars :: OrgParser [Char]
|
|
||||||
postWordChars = do
|
|
||||||
st <- getState
|
|
||||||
return $ "\t\n\r !\"'),-.:?}" ++ (take 1 . drop 1 . orgInlineCharStack $ st)
|
|
||||||
|
|
||||||
-- FIXME: These functions are hacks and should be replaced
|
|
||||||
endsOnThisOrNextLine :: Char
|
|
||||||
-> OrgParser ()
|
|
||||||
endsOnThisOrNextLine c = do
|
|
||||||
inp <- getInput
|
|
||||||
let doOtherwise = \rest -> endsOnThisLine rest c (const mzero)
|
|
||||||
endsOnThisLine inp c doOtherwise
|
|
||||||
|
|
||||||
endsOnThisLine :: [Char]
|
|
||||||
-> Char
|
|
||||||
-> ([Char] -> OrgParser ())
|
|
||||||
-> OrgParser ()
|
|
||||||
endsOnThisLine input c doOnOtherLines = do
|
|
||||||
postWordChars' <- postWordChars
|
|
||||||
case break (`elem` c:"\n") input of
|
|
||||||
(_,'\n':rest) -> doOnOtherLines rest
|
|
||||||
(_,_:[]) -> return ()
|
|
||||||
(_,_:rest@(n:_)) -> if n `elem` postWordChars'
|
|
||||||
then return ()
|
|
||||||
else endsOnThisLine rest c doOnOtherLines
|
|
||||||
_ -> mzero
|
|
||||||
|
|
||||||
isImage :: String -> Bool
|
|
||||||
isImage filename =
|
|
||||||
any (\x -> ('.':x) `isSuffixOf` filename) imageExtensions &&
|
|
||||||
any (\x -> (x++":") `isPrefixOf` filename) protocols ||
|
|
||||||
':' `notElem` filename
|
|
||||||
where
|
where
|
||||||
imageExtensions = [ "jpeg" , "jpg" , "png" , "gif" , "svg" ]
|
nMoreLines Nothing cs = return cs
|
||||||
protocols = [ "file", "http", "https" ]
|
nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
|
||||||
|
nMoreLines k cs = try $ (final k cs <|> rest k cs)
|
||||||
|
>>= uncurry nMoreLines
|
||||||
|
final _ cs = (\x -> (Nothing, cs ++ x)) <$> (try finalLine)
|
||||||
|
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline)
|
||||||
|
finalLine = try $ manyTill p end
|
||||||
|
minus1 k = k - 1
|
||||||
|
oneOrMore cs = guard (not $ null cs) *> return cs
|
||||||
|
|
||||||
|
-- Org allows customization of the way it reads emphasis. We use the defaults
|
||||||
|
-- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
|
||||||
|
-- for details).
|
||||||
|
|
||||||
|
-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
|
||||||
|
emphasisPreChars :: [Char]
|
||||||
|
emphasisPreChars = "\t \"'({"
|
||||||
|
|
||||||
|
-- | Chars allowed at after emphasis
|
||||||
|
emphasisPostChars :: [Char]
|
||||||
|
emphasisPostChars = "\t\n !\"'),-.:;?\\}"
|
||||||
|
|
||||||
|
-- | Chars not allowed at the (inner) border of emphasis
|
||||||
|
emphasisForbiddenBorderChars :: [Char]
|
||||||
|
emphasisForbiddenBorderChars = "\t\n\r \"',"
|
||||||
|
|
||||||
|
-- | The maximum number of newlines within
|
||||||
|
emphasisAllowedNewlines :: Int
|
||||||
|
emphasisAllowedNewlines = 1
|
||||||
|
|
||||||
|
-- LaTeX-style math: see `org-latex-regexps` for details
|
||||||
|
|
||||||
|
-- | Chars allowed after an inline ($...$) math statement
|
||||||
|
mathPostChars :: [Char]
|
||||||
|
mathPostChars = "\t\n \"',-.:;?"
|
||||||
|
|
||||||
|
-- | Chars not allowed at the (inner) border of math
|
||||||
|
mathForbiddenBorderChars :: [Char]
|
||||||
|
mathForbiddenBorderChars = "\t\n\r ,;.$"
|
||||||
|
|
||||||
|
-- | Maximum number of newlines in an inline math statement
|
||||||
|
mathAllowedNewlines :: Int
|
||||||
|
mathAllowedNewlines = 2
|
||||||
|
|
||||||
|
-- | Whether we are right behind a char allowed before emphasis
|
||||||
|
afterEmphasisPreChar :: OrgParser Bool
|
||||||
|
afterEmphasisPreChar = do
|
||||||
|
pos <- getPosition
|
||||||
|
lastPrePos <- orgStateLastPreCharPos <$> getState
|
||||||
|
return $ lastPrePos == Nothing || lastPrePos == Just pos
|
||||||
|
|
||||||
|
-- | Whether we are right after the end of a string
|
||||||
|
notAfterString :: OrgParser Bool
|
||||||
|
notAfterString = do
|
||||||
|
pos <- getPosition
|
||||||
|
lastStrPos <- orgStateLastStrPos <$> getState
|
||||||
|
return $ lastStrPos /= Just pos
|
||||||
|
|
||||||
|
-- | Whether the parser is right after a forbidden border char
|
||||||
|
notAfterForbiddenBorderChar :: OrgParser Bool
|
||||||
|
notAfterForbiddenBorderChar = do
|
||||||
|
pos <- getPosition
|
||||||
|
lastFBCPos <- orgStateLastForbiddenCharPos <$> getState
|
||||||
|
return $ lastFBCPos /= Just pos
|
||||||
|
|
|
@ -86,16 +86,37 @@ tests =
|
||||||
unlines [ "this+that+ +so+on"
|
unlines [ "this+that+ +so+on"
|
||||||
, "seven*eight* nine*"
|
, "seven*eight* nine*"
|
||||||
, "+not+funny+"
|
, "+not+funny+"
|
||||||
|
, "this == self"
|
||||||
] =?>
|
] =?>
|
||||||
para (spcSep [ "this+that+", "+so+on"
|
para (spcSep [ "this+that+", "+so+on"
|
||||||
, "seven*eight*", "nine*"
|
, "seven*eight*", "nine*"
|
||||||
, strikeout "not+funny"
|
, strikeout "not+funny"
|
||||||
|
, "this" <> space <> "==" <> space <> "self"
|
||||||
])
|
])
|
||||||
|
|
||||||
|
, "Adherence to Org's rules for markup borders" =:
|
||||||
|
"/t/& a/ / ./r/ (*l*) /e/! /b/." =?>
|
||||||
|
para (spcSep [ emph $ "t/&" <> space <> "a"
|
||||||
|
, "/"
|
||||||
|
, "./r/"
|
||||||
|
, "(" <> (strong "l") <> ")"
|
||||||
|
, (emph "e") <> "!"
|
||||||
|
, (emph "b") <> "."
|
||||||
|
])
|
||||||
|
|
||||||
|
, "Inline math must stay within three lines" =:
|
||||||
|
unlines [ "$a", "b", "c$", "$d", "e", "f", "g$" ] =?>
|
||||||
|
para ((math "a\nb\nc") <> space <>
|
||||||
|
spcSep [ "$d", "e", "f", "g$" ])
|
||||||
|
|
||||||
, "Markup may not span more than two lines" =:
|
, "Markup may not span more than two lines" =:
|
||||||
unlines [ "/this *is", "not*", "emph/" ] =?>
|
unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?>
|
||||||
para (spcSep [ "/this"
|
para (spcSep [ "/this"
|
||||||
, (strong ("is" <> space <> "not"))
|
, (strong (spcSep
|
||||||
|
[ "is"
|
||||||
|
, (strikeout ("totally" <> space <> "nice"))
|
||||||
|
, "not"
|
||||||
|
]))
|
||||||
, "emph/" ])
|
, "emph/" ])
|
||||||
|
|
||||||
, "Image" =:
|
, "Image" =:
|
||||||
|
|
Loading…
Reference in a new issue