Merge pull request #1229 from tarleb/org-math-improved
Org reader: Support more inline/display math variants
This commit is contained in:
commit
3e147199b8
2 changed files with 80 additions and 28 deletions
|
@ -44,11 +44,14 @@ import Data.List (foldl', isPrefixOf, isSuffixOf)
|
||||||
import Data.Maybe (listToMaybe, fromMaybe)
|
import Data.Maybe (listToMaybe, fromMaybe)
|
||||||
import Data.Monoid (mconcat, mempty, mappend)
|
import Data.Monoid (mconcat, mempty, mappend)
|
||||||
|
|
||||||
|
-- Ignore HLint warnings to use String instead of [Char]
|
||||||
|
{-# ANN module ("HLint: ignore Use String" :: String) #-}
|
||||||
|
|
||||||
-- | Parse org-mode string and return a Pandoc document.
|
-- | Parse org-mode string and return a Pandoc document.
|
||||||
readOrg :: ReaderOptions -- ^ Reader options
|
readOrg :: ReaderOptions -- ^ Reader options
|
||||||
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
-> String -- ^ String to parse (assuming @'\n'@ line endings)
|
||||||
-> Pandoc
|
-> Pandoc
|
||||||
readOrg opts s = (readWith parseOrg) def{ orgStateOptions = opts } (s ++ "\n\n")
|
readOrg opts s = readWith parseOrg def{ orgStateOptions = opts } (s ++ "\n\n")
|
||||||
|
|
||||||
type OrgParser = Parser [Char] OrgParserState
|
type OrgParser = Parser [Char] OrgParserState
|
||||||
|
|
||||||
|
@ -111,7 +114,7 @@ updateLastPreCharPos = getPosition >>= \p ->
|
||||||
|
|
||||||
pushToInlineCharStack :: Char -> OrgParser ()
|
pushToInlineCharStack :: Char -> OrgParser ()
|
||||||
pushToInlineCharStack c = updateState $ \st ->
|
pushToInlineCharStack c = updateState $ \st ->
|
||||||
st { orgStateEmphasisCharStack = c:(orgStateEmphasisCharStack st) }
|
st { orgStateEmphasisCharStack = c:orgStateEmphasisCharStack st }
|
||||||
|
|
||||||
popInlineCharStack :: OrgParser ()
|
popInlineCharStack :: OrgParser ()
|
||||||
popInlineCharStack = updateState $ \st ->
|
popInlineCharStack = updateState $ \st ->
|
||||||
|
@ -176,7 +179,7 @@ orgBlock = try $ do
|
||||||
"comment" -> return mempty
|
"comment" -> return mempty
|
||||||
"src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr
|
"src" -> return $ B.codeBlockWith ("", classArgs, []) blockStr
|
||||||
_ -> B.divWith ("", [blockType], [])
|
_ -> B.divWith ("", [blockType], [])
|
||||||
<$> (parseFromString parseBlocks blockStr)
|
<$> parseFromString parseBlocks blockStr
|
||||||
|
|
||||||
blockHeader :: OrgParser (Int, String, [String])
|
blockHeader :: OrgParser (Int, String, [String])
|
||||||
blockHeader = (,,) <$> blockIndent
|
blockHeader = (,,) <$> blockIndent
|
||||||
|
@ -199,7 +202,7 @@ rawBlockContent indent blockType =
|
||||||
indentWith :: Int -> OrgParser String
|
indentWith :: Int -> OrgParser String
|
||||||
indentWith num = do
|
indentWith num = do
|
||||||
tabStop <- getOption readerTabStop
|
tabStop <- getOption readerTabStop
|
||||||
if (num < tabStop)
|
if num < tabStop
|
||||||
then count num (char ' ')
|
then count num (char ' ')
|
||||||
else choice [ try (count num (char ' '))
|
else choice [ try (count num (char ' '))
|
||||||
, try (char '\t' >> count (num - tabStop) (char ' ')) ]
|
, try (char '\t' >> count (num - tabStop) (char ' ')) ]
|
||||||
|
@ -242,7 +245,7 @@ drawerStart = try $
|
||||||
<|> stringAnyCase "LOGBOOK"
|
<|> stringAnyCase "LOGBOOK"
|
||||||
|
|
||||||
drawerLine :: OrgParser String
|
drawerLine :: OrgParser String
|
||||||
drawerLine = try $ anyLine
|
drawerLine = try anyLine
|
||||||
|
|
||||||
drawerEnd :: OrgParser String
|
drawerEnd :: OrgParser String
|
||||||
drawerEnd = try $
|
drawerEnd = try $
|
||||||
|
@ -276,7 +279,7 @@ declarationLine = try $ do
|
||||||
metaValue :: OrgParser MetaValue
|
metaValue :: OrgParser MetaValue
|
||||||
metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine
|
metaValue = MetaInlines . B.toList . trimInlines <$> restOfLine
|
||||||
|
|
||||||
metaKey :: OrgParser [Char]
|
metaKey :: OrgParser String
|
||||||
metaKey = map toLower <$> many1 (noneOf ": \n\r")
|
metaKey = map toLower <$> many1 (noneOf ": \n\r")
|
||||||
<* char ':'
|
<* char ':'
|
||||||
<* skipSpaces
|
<* skipSpaces
|
||||||
|
@ -350,7 +353,7 @@ tableAlignRow = try $
|
||||||
|
|
||||||
tableAlignCell :: OrgParser Alignment
|
tableAlignCell :: OrgParser Alignment
|
||||||
tableAlignCell =
|
tableAlignCell =
|
||||||
choice [ try $ emptyCell *> return (AlignDefault)
|
choice [ try $ emptyCell *> return AlignDefault
|
||||||
, try $ skipSpaces
|
, try $ skipSpaces
|
||||||
*> char '<'
|
*> char '<'
|
||||||
*> tableAlignFromChar
|
*> tableAlignFromChar
|
||||||
|
@ -381,8 +384,8 @@ normalizeTable (OrgTable cols aligns heads lns) =
|
||||||
let aligns' = fillColumns aligns AlignDefault
|
let aligns' = fillColumns aligns AlignDefault
|
||||||
heads' = if heads == mempty
|
heads' = if heads == mempty
|
||||||
then mempty
|
then mempty
|
||||||
else fillColumns heads (B.plain mempty)
|
else fillColumns heads (B.plain mempty)
|
||||||
lns' = map (flip fillColumns (B.plain mempty)) lns
|
lns' = map (`fillColumns` B.plain mempty) lns
|
||||||
fillColumns base padding = take cols $ base ++ repeat padding
|
fillColumns base padding = take cols $ base ++ repeat padding
|
||||||
in OrgTable cols aligns' heads' lns'
|
in OrgTable cols aligns' heads' lns'
|
||||||
|
|
||||||
|
@ -512,6 +515,7 @@ inline =
|
||||||
, underline
|
, underline
|
||||||
, code
|
, code
|
||||||
, math
|
, math
|
||||||
|
, displayMath
|
||||||
, verbatim
|
, verbatim
|
||||||
, subscript
|
, subscript
|
||||||
, superscript
|
, superscript
|
||||||
|
@ -564,13 +568,14 @@ explicitOrImageLink = try $ do
|
||||||
title <- enclosedRaw (char '[') (char ']')
|
title <- enclosedRaw (char '[') (char ']')
|
||||||
title' <- parseFromString (mconcat <$> many inline) title
|
title' <- parseFromString (mconcat <$> many inline) title
|
||||||
char ']'
|
char ']'
|
||||||
return $ if (isImageFilename src) && (isImageFilename title)
|
return . B.link src ""
|
||||||
then B.link src "" (B.image title "" "")
|
$ if isImageFilename src && isImageFilename title
|
||||||
else B.link src "" title'
|
then B.image title "" ""
|
||||||
|
else title'
|
||||||
|
|
||||||
selflinkOrImage :: OrgParser Inlines
|
selflinkOrImage :: OrgParser Inlines
|
||||||
selflinkOrImage = try $ do
|
selflinkOrImage = try $ do
|
||||||
src <- (char '[') *> linkTarget <* char ']'
|
src <- char '[' *> linkTarget <* char ']'
|
||||||
return $ if isImageFilename src
|
return $ if isImageFilename src
|
||||||
then B.image src "" ""
|
then B.image src "" ""
|
||||||
else B.link src "" (B.str src)
|
else B.link src "" (B.str src)
|
||||||
|
@ -607,13 +612,21 @@ verbatim :: OrgParser Inlines
|
||||||
verbatim = B.rawInline "" <$> verbatimBetween '~'
|
verbatim = B.rawInline "" <$> verbatimBetween '~'
|
||||||
|
|
||||||
math :: OrgParser Inlines
|
math :: OrgParser Inlines
|
||||||
math = B.math <$> mathStringBetween '$'
|
math = B.math <$> choice [ math1CharBetween '$'
|
||||||
|
, mathStringBetween '$'
|
||||||
|
, rawMathBetween "\\(" "\\)"
|
||||||
|
]
|
||||||
|
|
||||||
|
displayMath :: OrgParser Inlines
|
||||||
|
displayMath = B.displayMath <$> choice [ rawMathBetween "\\[" "\\]"
|
||||||
|
, rawMathBetween "$$" "$$"
|
||||||
|
]
|
||||||
|
|
||||||
subscript :: OrgParser Inlines
|
subscript :: OrgParser Inlines
|
||||||
subscript = B.subscript <$> (try $ char '_' *> maybeGroupedByBraces)
|
subscript = B.subscript <$> try (char '_' *> maybeGroupedByBraces)
|
||||||
|
|
||||||
superscript :: OrgParser Inlines
|
superscript :: OrgParser Inlines
|
||||||
superscript = B.superscript <$> (try $ char '^' *> maybeGroupedByBraces)
|
superscript = B.superscript <$> try (char '^' *> maybeGroupedByBraces)
|
||||||
|
|
||||||
maybeGroupedByBraces :: OrgParser Inlines
|
maybeGroupedByBraces :: OrgParser Inlines
|
||||||
maybeGroupedByBraces = try $
|
maybeGroupedByBraces = try $
|
||||||
|
@ -655,6 +668,21 @@ mathStringBetween c = try $ do
|
||||||
final <- mathEnd c
|
final <- mathEnd c
|
||||||
return $ body ++ [final]
|
return $ body ++ [final]
|
||||||
|
|
||||||
|
-- | Parse a single character between @c@ using math rules
|
||||||
|
math1CharBetween :: Char
|
||||||
|
-> OrgParser String
|
||||||
|
math1CharBetween c = try $ do
|
||||||
|
char c
|
||||||
|
res <- noneOf $ c:mathForbiddenBorderChars
|
||||||
|
char c
|
||||||
|
eof <|> lookAhead (oneOf mathPostChars) *> return ()
|
||||||
|
return [res]
|
||||||
|
|
||||||
|
rawMathBetween :: String
|
||||||
|
-> String
|
||||||
|
-> OrgParser String
|
||||||
|
rawMathBetween s e = try $ string s *> manyTill anyChar (try $ string e)
|
||||||
|
|
||||||
-- | Parses the start (opening character) of emphasis
|
-- | Parses the start (opening character) of emphasis
|
||||||
emphasisStart :: Char -> OrgParser Char
|
emphasisStart :: Char -> OrgParser Char
|
||||||
emphasisStart c = try $ do
|
emphasisStart c = try $ do
|
||||||
|
@ -678,14 +706,14 @@ emphasisEnd c = try $ do
|
||||||
return c
|
return c
|
||||||
|
|
||||||
mathStart :: Char -> OrgParser Char
|
mathStart :: Char -> OrgParser Char
|
||||||
mathStart c = try $ do
|
mathStart c = try $
|
||||||
char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
|
char c <* notFollowedBy' (oneOf (c:mathForbiddenBorderChars))
|
||||||
|
|
||||||
mathEnd :: Char -> OrgParser Char
|
mathEnd :: Char -> OrgParser Char
|
||||||
mathEnd c = try $ do
|
mathEnd c = try $ do
|
||||||
res <- noneOf (c:mathForbiddenBorderChars)
|
res <- noneOf (c:mathForbiddenBorderChars)
|
||||||
char c
|
char c
|
||||||
eof <|> (lookAhead $ oneOf mathPostChars *> pure ())
|
eof <|> lookAhead (oneOf mathPostChars *> pure ())
|
||||||
return res
|
return res
|
||||||
|
|
||||||
|
|
||||||
|
@ -717,8 +745,8 @@ many1TillNOrLessNewlines n p end = try $
|
||||||
nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
|
nMoreLines (Just 0) cs = try $ (cs ++) <$> finalLine
|
||||||
nMoreLines k cs = try $ (final k cs <|> rest k cs)
|
nMoreLines k cs = try $ (final k cs <|> rest k cs)
|
||||||
>>= uncurry nMoreLines
|
>>= uncurry nMoreLines
|
||||||
final _ cs = (\x -> (Nothing, cs ++ x)) <$> (try finalLine)
|
final _ cs = (\x -> (Nothing, cs ++ x)) <$> try finalLine
|
||||||
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> (try $ manyTill p P.newline)
|
rest m cs = (\x -> (minus1 <$> m, cs ++ x ++ "\n")) <$> try (manyTill p P.newline)
|
||||||
finalLine = try $ manyTill p end
|
finalLine = try $ manyTill p end
|
||||||
minus1 k = k - 1
|
minus1 k = k - 1
|
||||||
oneOrMore cs = guard (not $ null cs) *> return cs
|
oneOrMore cs = guard (not $ null cs) *> return cs
|
||||||
|
@ -747,7 +775,7 @@ emphasisAllowedNewlines = 1
|
||||||
|
|
||||||
-- | Chars allowed after an inline ($...$) math statement
|
-- | Chars allowed after an inline ($...$) math statement
|
||||||
mathPostChars :: [Char]
|
mathPostChars :: [Char]
|
||||||
mathPostChars = "\t\n \"',-.:;?"
|
mathPostChars = "\t\n \"'),-.:;?"
|
||||||
|
|
||||||
-- | Chars not allowed at the (inner) border of math
|
-- | Chars not allowed at the (inner) border of math
|
||||||
mathForbiddenBorderChars :: [Char]
|
mathForbiddenBorderChars :: [Char]
|
||||||
|
@ -762,7 +790,7 @@ afterEmphasisPreChar :: OrgParser Bool
|
||||||
afterEmphasisPreChar = do
|
afterEmphasisPreChar = do
|
||||||
pos <- getPosition
|
pos <- getPosition
|
||||||
lastPrePos <- orgStateLastPreCharPos <$> getState
|
lastPrePos <- orgStateLastPreCharPos <$> getState
|
||||||
return $ lastPrePos == Nothing || lastPrePos == Just pos
|
return . fromMaybe True $ (== pos) <$> lastPrePos
|
||||||
|
|
||||||
-- | Whether we are right after the end of a string
|
-- | Whether we are right after the end of a string
|
||||||
notAfterString :: OrgParser Bool
|
notAfterString :: OrgParser Bool
|
||||||
|
|
|
@ -54,14 +54,26 @@ tests =
|
||||||
"=Robot.rock()=" =?>
|
"=Robot.rock()=" =?>
|
||||||
para (code "Robot.rock()")
|
para (code "Robot.rock()")
|
||||||
|
|
||||||
, "Math" =:
|
|
||||||
"$E=mc^2$" =?>
|
|
||||||
para (math "E=mc^2")
|
|
||||||
|
|
||||||
, "Verbatim" =:
|
, "Verbatim" =:
|
||||||
"~word for word~" =?>
|
"~word for word~" =?>
|
||||||
para (rawInline "" "word for word")
|
para (rawInline "" "word for word")
|
||||||
|
|
||||||
|
, "Math $..$" =:
|
||||||
|
"$E=mc^2$" =?>
|
||||||
|
para (math "E=mc^2")
|
||||||
|
|
||||||
|
, "Math $$..$$" =:
|
||||||
|
"$$E=mc^2$$" =?>
|
||||||
|
para (displayMath "E=mc^2")
|
||||||
|
|
||||||
|
, "Math \\[..\\]" =:
|
||||||
|
"\\[E=ℎν\\]" =?>
|
||||||
|
para (displayMath "E=ℎν")
|
||||||
|
|
||||||
|
, "Math \\(..\\)" =:
|
||||||
|
"\\(σ_x σ_p ≥ \\frac{ℏ}{2}\\)" =?>
|
||||||
|
para (math "σ_x σ_p ≥ \\frac{ℏ}{2}")
|
||||||
|
|
||||||
, "Symbol" =:
|
, "Symbol" =:
|
||||||
"A * symbol" =?>
|
"A * symbol" =?>
|
||||||
para (str "A" <> space <> str "*" <> space <> "symbol")
|
para (str "A" <> space <> str "*" <> space <> "symbol")
|
||||||
|
@ -86,14 +98,19 @@ tests =
|
||||||
unlines [ "this+that+ +so+on"
|
unlines [ "this+that+ +so+on"
|
||||||
, "seven*eight* nine*"
|
, "seven*eight* nine*"
|
||||||
, "+not+funny+"
|
, "+not+funny+"
|
||||||
, "this == self"
|
|
||||||
] =?>
|
] =?>
|
||||||
para (spcSep [ "this+that+", "+so+on"
|
para (spcSep [ "this+that+", "+so+on"
|
||||||
, "seven*eight*", "nine*"
|
, "seven*eight*", "nine*"
|
||||||
, strikeout "not+funny"
|
, strikeout "not+funny"
|
||||||
, "this" <> space <> "==" <> space <> "self"
|
|
||||||
])
|
])
|
||||||
|
|
||||||
|
, "No empty markup" =:
|
||||||
|
-- FIXME: __ is erroneously parsed as subscript "_"
|
||||||
|
-- "// ** __ ++ == ~~ $$" =?>
|
||||||
|
-- para (spcSep [ "//", "**", "__", "++", "==", "~~", "$$" ])
|
||||||
|
"// ** ++ == ~~ $$" =?>
|
||||||
|
para (spcSep [ "//", "**", "++", "==", "~~", "$$" ])
|
||||||
|
|
||||||
, "Adherence to Org's rules for markup borders" =:
|
, "Adherence to Org's rules for markup borders" =:
|
||||||
"/t/& a/ / ./r/ (*l*) /e/! /b/." =?>
|
"/t/& a/ / ./r/ (*l*) /e/! /b/." =?>
|
||||||
para (spcSep [ emph $ "t/&" <> space <> "a"
|
para (spcSep [ emph $ "t/&" <> space <> "a"
|
||||||
|
@ -109,6 +126,13 @@ tests =
|
||||||
para ((math "a\nb\nc") <> space <>
|
para ((math "a\nb\nc") <> space <>
|
||||||
spcSep [ "$d", "e", "f", "g$" ])
|
spcSep [ "$d", "e", "f", "g$" ])
|
||||||
|
|
||||||
|
, "Single-character math" =:
|
||||||
|
"$a$ $b$! $c$?" =?>
|
||||||
|
para (spcSep [ math "a"
|
||||||
|
, "$b$!"
|
||||||
|
, (math "c") <> "?"
|
||||||
|
])
|
||||||
|
|
||||||
, "Markup may not span more than two lines" =:
|
, "Markup may not span more than two lines" =:
|
||||||
unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?>
|
unlines [ "/this *is +totally", "nice+ not*", "emph/" ] =?>
|
||||||
para (spcSep [ "/this"
|
para (spcSep [ "/this"
|
||||||
|
|
Loading…
Reference in a new issue