Org reader: Add support for custom link types

Org allows users to define their own custom link types.  E.g., in a
document with a lot of links to Wikipedia articles, one can define a
custom wikipedia link-type via

    #+LINK: wp https://en.wikipedia.org/wiki/

This allows to write [[wp:Org_mode][Org-mode]] instead of the
equivallent [[https://en.wikipedia.org/wiki/Org_mode][Org-mode]].
This commit is contained in:
Albert Krewinkel 2014-04-30 11:16:01 +02:00
parent a22b3a218d
commit 8726eebcd3
2 changed files with 86 additions and 8 deletions

View file

@ -43,7 +43,7 @@ import Text.Pandoc.Shared (compactify', compactify'DL)
import Control.Applicative ( Applicative, pure
, (<$>), (<$), (<*>), (<*), (*>), (<**>) )
import Control.Monad (foldM, guard, liftM, liftM2, when)
import Control.Monad (foldM, guard, liftM, liftM2, mzero, when)
import Control.Monad.Reader (Reader, runReader, ask, asks)
import Data.Char (isAlphaNum, toLower)
import Data.Default
@ -51,6 +51,7 @@ import Data.List (intersperse, isPrefixOf, isSuffixOf)
import qualified Data.Map as M
import Data.Maybe (listToMaybe, fromMaybe, isJust)
import Data.Monoid (Monoid, mconcat, mempty, mappend)
import Network.HTTP (urlEncode)
-- | Parse org-mode string and return a Pandoc document.
readOrg :: ReaderOptions -- ^ Reader options
@ -76,6 +77,8 @@ type OrgNoteTable = [OrgNoteRecord]
type OrgBlockAttributes = M.Map String String
type OrgLinkFormatters = M.Map String (String -> String)
-- | Org-mode parser state
data OrgParserState = OrgParserState
{ orgStateOptions :: ReaderOptions
@ -86,6 +89,7 @@ data OrgParserState = OrgParserState
, orgStateLastForbiddenCharPos :: Maybe SourcePos
, orgStateLastPreCharPos :: Maybe SourcePos
, orgStateLastStrPos :: Maybe SourcePos
, orgStateLinkFormatters :: OrgLinkFormatters
, orgStateMeta :: Meta
, orgStateMeta' :: F Meta
, orgStateNotes' :: OrgNoteTable
@ -113,6 +117,7 @@ defaultOrgParserState = OrgParserState
, orgStateLastForbiddenCharPos = Nothing
, orgStateLastPreCharPos = Nothing
, orgStateLastStrPos = Nothing
, orgStateLinkFormatters = M.empty
, orgStateMeta = nullMeta
, orgStateMeta' = return nullMeta
, orgStateNotes' = []
@ -175,6 +180,13 @@ resetEmphasisNewlines :: OrgParser ()
resetEmphasisNewlines = updateState $ \s ->
s{ orgStateEmphasisNewlines = Nothing }
addLinkFormat :: String
-> (String -> String)
-> OrgParser ()
addLinkFormat key formatter = updateState $ \s ->
let fs = orgStateLinkFormatters s
in s{ orgStateLinkFormatters = M.insert key formatter fs }
addToNotesTable :: OrgNoteRecord -> OrgParser ()
addToNotesTable note = do
oldnotes <- orgStateNotes' <$> getState
@ -423,7 +435,8 @@ specialLine :: OrgParser (F Blocks)
specialLine = fmap return . try $ metaLine <|> commentLine
metaLine :: OrgParser Blocks
metaLine = try $ metaLineStart *> declarationLine
metaLine = try $ mempty
<$ (metaLineStart *> (optionLine <|> declarationLine))
commentLine :: OrgParser Blocks
commentLine = try $ commentLineStart *> anyLine *> pure mempty
@ -436,14 +449,14 @@ metaLineStart = try $ mappend <$> many spaceChar <*> string "#+"
commentLineStart :: OrgParser String
commentLineStart = try $ mappend <$> many spaceChar <*> string "# "
declarationLine :: OrgParser Blocks
declarationLine :: OrgParser ()
declarationLine = try $ do
key <- metaKey
inlinesF <- metaInlines
updateState $ \st ->
let meta' = B.setMeta <$> pure key <*> inlinesF <*> pure nullMeta
in st { orgStateMeta' = orgStateMeta' st <> meta' }
return mempty
return ()
metaInlines :: OrgParser (F MetaValue)
metaInlines = fmap (MetaInlines . B.toList) <$> inlinesTillNewline
@ -453,6 +466,35 @@ metaKey = map toLower <$> many1 (noneOf ": \n\r")
<* char ':'
<* skipSpaces
optionLine :: OrgParser ()
optionLine = try $ do
key <- metaKey
case key of
"link" -> parseLinkFormat >>= uncurry addLinkFormat
_ -> mzero
parseLinkFormat :: OrgParser ((String, String -> String))
parseLinkFormat = try $ do
linkType <- (:) <$> letter <*> many (alphaNum <|> oneOf "-_") <* skipSpaces
linkSubst <- parseFormat
return (linkType, linkSubst)
-- | An ad-hoc, single-argument-only implementation of a printf-style format
-- parser.
parseFormat :: OrgParser (String -> String)
parseFormat = try $ do
replacePlain <|> replaceUrl <|> justAppend
where
-- inefficient, but who cares
replacePlain = try $ (\x -> concat . flip intersperse x)
<$> sequence [tillSpecifier 's', rest]
replaceUrl = try $ (\x -> concat . flip intersperse x . urlEncode)
<$> sequence [tillSpecifier 'h', rest]
justAppend = try $ (++) <$> rest
rest = manyTill anyChar (eof <|> () <$ oneOf "\n\r")
tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))
--
-- Headers
--
@ -850,13 +892,15 @@ linkOrImage = explicitOrImageLink
explicitOrImageLink :: OrgParser (F Inlines)
explicitOrImageLink = try $ do
char '['
src <- linkTarget
srcF <- applyCustomLinkFormat =<< linkTarget
title <- enclosedRaw (char '[') (char ']')
title' <- parseFromString (mconcat <$> many inline) title
char ']'
return $ if isImageFilename src && isImageFilename title
then pure $ B.link src "" $ B.image title mempty mempty
else linkToInlinesF src =<< title'
return $ do
src <- srcF
if isImageFilename src && isImageFilename title
then pure $ B.link src "" $ B.image title mempty mempty
else linkToInlinesF src =<< title'
selflinkOrImage :: OrgParser (F Inlines)
selflinkOrImage = try $ do
@ -881,6 +925,14 @@ selfTarget = try $ char '[' *> linkTarget <* char ']'
linkTarget :: OrgParser String
linkTarget = enclosed (char '[') (char ']') (noneOf "\n\r[]")
applyCustomLinkFormat :: String -> OrgParser (F String)
applyCustomLinkFormat link = do
let (linkType, rest) = break (== ':') link
return $ do
formatter <- M.lookup linkType <$> asksF orgStateLinkFormatters
return $ maybe link ($ drop 1 rest) formatter
linkToInlinesF :: String -> Inlines -> F Inlines
linkToInlinesF s@('#':_) = pure . B.link s ""
linkToInlinesF s

View file

@ -304,6 +304,32 @@ tests =
] =?>
(para (spanWith ("link-here", [], []) mempty <> "Target.") <>
para (emph ("See" <> space <> "here!")))
, "Link abbreviation" =:
unlines [ "#+LINK: wp https://en.wikipedia.org/wiki/%s"
, "[[wp:Org_mode][Wikipedia on Org-mode]]"
] =?>
(para (link "https://en.wikipedia.org/wiki/Org_mode" ""
("Wikipedia" <> space <> "on" <> space <> "Org-mode")))
, "Link abbreviation, defined after first use" =:
unlines [ "[[zl:non-sense][Non-sense articles]]"
, "#+LINK: zl http://zeitlens.com/tags/%s.html"
] =?>
(para (link "http://zeitlens.com/tags/non-sense.html" ""
("Non-sense" <> space <> "articles")))
, "Link abbreviation, URL encoded arguments" =:
unlines [ "#+link: expl http://example.com/%h/foo"
, "[[expl:Hello, World!][Moin!]]"
] =?>
(para (link "http://example.com/Hello%2C%20World%21/foo" "" "Moin!"))
, "Link abbreviation, append arguments" =:
unlines [ "#+link: expl http://example.com/"
, "[[expl:foo][bar]]"
] =?>
(para (link "http://example.com/foo" "" "bar"))
]
, testGroup "Basic Blocks" $