Org reader: allow changing emphasis syntax

The characters allowed before and after emphasis can be configured via `#+pandoc-emphasis-pre` and `#+pandoc-emphasis-post`, respectively. This allows to change which strings are recognized as emphasized text on a per-document or even per-paragraph basis. The allowed characters must be given as (Haskell) string. #+pandoc-emphasis-pre: "-\t ('\"{" #+pandoc-emphasis-post: "-\t\n .,:!?;'\")}[" If the argument cannot be read as a string, the default value is restored. Closes: #4378
2018-02-21 08:53:29 +01:00 · 2018-02-21 08:53:29 +01:00 · 00d20ccd09
commit 00d20ccd09
parent 84db7e492a
4 changed files with 58 additions and 12 deletions
--- a/src/Text/Pandoc/Readers/Org/Inlines.hs
+++ b/src/Text/Pandoc/Readers/Org/Inlines.hs
@ -603,6 +603,8 @@ updatePositions :: PandocMonad m
                => Char
                -> OrgParser m Char
 updatePositions c = do
+  st <- getState
+  let emphasisPreChars = orgStateEmphasisPreChars st
  when (c `elem` emphasisPreChars) updateLastPreCharPos
  when (c `elem` emphasisForbiddenBorderChars) updateLastForbiddenCharPos
  return c
@ -681,8 +683,10 @@ emphasisEnd c = try $ do
  updateLastStrPos
  popInlineCharStack
  return c
- where acceptablePostChars =
-           surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)
+ where
+  acceptablePostChars = do
+    emphasisPostChars <- orgStateEmphasisPostChars <$> getState
+    surroundingEmphasisChar >>= \x -> oneOf (x ++ emphasisPostChars)

 mathStart :: PandocMonad m => Char -> OrgParser m Char
 mathStart c = try $
@ -734,14 +738,6 @@ many1TillNOrLessNewlines n p end = try $
 -- here (see, e.g., the Emacs Lisp variable `org-emphasis-regexp-components`
 -- for details).

-- | Chars allowed to occur before emphasis (spaces and newlines are ok, too)
-emphasisPreChars :: [Char]
-emphasisPreChars = "-\t ('\"{"
-
-- | Chars allowed at after emphasis
-emphasisPostChars :: [Char]
-emphasisPostChars = "-\t\n .,:!?;'\")}["
-
 -- | Chars not allowed at the (inner) border of emphasis
 emphasisForbiddenBorderChars :: [Char]
 emphasisForbiddenBorderChars = "\t\n\r "
--- a/src/Text/Pandoc/Readers/Org/Meta.hs
+++ b/src/Text/Pandoc/Readers/Org/Meta.hs
@ -43,6 +43,7 @@ import Text.Pandoc.Builder (Blocks, Inlines)
 import qualified Text.Pandoc.Builder as B
 import Text.Pandoc.Class (PandocMonad)
 import Text.Pandoc.Definition
+import Text.Pandoc.Shared (safeRead)

 import Control.Monad (mzero, void, when)
 import Data.Char (toLower)
@ -154,6 +155,8 @@ optionLine = try $ do
    "seq_todo" -> todoSequence >>= updateState . registerTodoSequence
    "typ_todo" -> todoSequence >>= updateState . registerTodoSequence
    "macro"    -> macroDefinition >>= updateState . registerMacro
+    "pandoc-emphasis-pre" -> emphChars >>= updateState . setEmphasisPreChar
+    "pandoc-emphasis-post" -> emphChars >>= updateState . setEmphasisPostChar
    _          -> mzero

 addLinkFormat :: Monad m => String
@ -184,6 +187,25 @@ parseFormat = try $ replacePlain <|> replaceUrl <|> justAppend
   rest            = manyTill anyChar         (eof <|> () <$ oneOf "\n\r")
   tillSpecifier c = manyTill (noneOf "\n\r") (try $ string ('%':c:""))

+setEmphasisPreChar :: Maybe [Char] -> OrgParserState -> OrgParserState
+setEmphasisPreChar csMb st =
+  let preChars = case csMb of
+                   Nothing -> orgStateEmphasisPreChars defaultOrgParserState
+                   Just cs -> cs
+  in st { orgStateEmphasisPreChars = preChars }
+
+setEmphasisPostChar :: Maybe [Char] -> OrgParserState -> OrgParserState
+setEmphasisPostChar csMb st =
+  let postChars = case csMb of
+                   Nothing -> orgStateEmphasisPostChars defaultOrgParserState
+                   Just cs -> cs
+  in st { orgStateEmphasisPostChars = postChars }
+
+emphChars :: Monad m => OrgParser m (Maybe [Char])
+emphChars = do
+  skipSpaces
+  safeRead <$> anyLine
+
 inlinesTillNewline :: PandocMonad m => OrgParser m (F Inlines)
 inlinesTillNewline = do
  updateLastPreCharPos
--- a/src/Text/Pandoc/Readers/Org/ParserState.hs
+++ b/src/Text/Pandoc/Readers/Org/ParserState.hs
@ -29,6 +29,7 @@ Define the Org-mode parser state.
 -}
 module Text.Pandoc.Readers.Org.ParserState
  ( OrgParserState (..)
+  , defaultOrgParserState
  , OrgParserLocal (..)
  , OrgNoteRecord
  , HasReaderOptions (..)
@ -104,6 +105,11 @@ type TodoSequence = [TodoMarker]
 data OrgParserState = OrgParserState
  { orgStateAnchorIds            :: [String]
  , orgStateEmphasisCharStack    :: [Char]
+  , orgStateEmphasisPreChars     :: [Char] -- ^ Chars allowed to occur before
+                                           -- emphasis; spaces and newlines are
+                                           -- always ok in addition to what is
+                                           -- specified here.
+  , orgStateEmphasisPostChars    :: [Char] -- ^ Chars allowed at after emphasis
  , orgStateEmphasisNewlines     :: Maybe Int
  , orgStateExportSettings       :: ExportSettings
  , orgStateHeaderMap            :: M.Map Inlines String
@ -124,7 +130,9 @@ data OrgParserState = OrgParserState
  , orgMacros                    :: M.Map Text Macro
  }

-data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
+data OrgParserLocal = OrgParserLocal
+  { orgLocalQuoteContext :: QuoteContext
+  }

 instance Default OrgParserLocal where
  def = OrgParserLocal NoQuote
@ -168,6 +176,8 @@ instance Default OrgParserState where
 defaultOrgParserState :: OrgParserState
 defaultOrgParserState = OrgParserState
  { orgStateAnchorIds = []
+  , orgStateEmphasisPreChars = "-\t ('\"{"
+  , orgStateEmphasisPostChars  = "-\t\n .,:!?;'\")}["
  , orgStateEmphasisCharStack = []
  , orgStateEmphasisNewlines = Nothing
  , orgStateExportSettings = def
--- a/test/Tests/Readers/Org/Meta.hs
+++ b/test/Tests/Readers/Org/Meta.hs
@ -1,7 +1,7 @@
 {-# LANGUAGE OverloadedStrings #-}
 module Tests.Readers.Org.Meta (tests) where

-import Test.Tasty (TestTree)
+import Test.Tasty (TestTree, testGroup)
 import Tests.Helpers ((=?>))
 import Tests.Readers.Org.Shared ((=:), spcSep)
 import Text.Pandoc
@ -170,4 +170,22 @@ tests =
                , "[[expl:foo][bar]]"
                ] =?>
      para (link "http://example.com/foo" "" "bar")
+
+  , testGroup "emphasis config"
+    [ "Changing pre and post chars for emphasis" =:
+        T.unlines [ "#+pandoc-emphasis-pre: \"[)\""
+                  , "#+pandoc-emphasis-post: \"]\\n\""
+                  , "([/emph/])*foo*"
+                  ] =?>
+        para ("([" <> emph "emph" <> "])" <> strong "foo")
+
+    , "setting an invalid value restores the default" =:
+        T.unlines [ "#+pandoc-emphasis-pre: \"[\""
+                  , "#+pandoc-emphasis-post: \"]\""
+                  , "#+pandoc-emphasis-pre:"
+                  , "#+pandoc-emphasis-post:"
+                  , "[/noemph/]"
+                  ] =?>
+        para ("[/noemph/]")
+    ]
  ]