HTML reader: allow enabling raw_tex extension.

This now allows raw LaTeX environments, `\ref`, and `\eqref` to
be parsed (which is helpful for translation HTML documents using
MathJaX).

Closes #1126.
This commit is contained in:
John MacFarlane 2018-08-24 18:04:00 -07:00
parent 4dddfbc435
commit a2c4261b32
3 changed files with 59 additions and 4 deletions

View file

@ -1917,7 +1917,8 @@ This extension can be enabled/disabled for the following formats
(in addition to `markdown`):
input formats
: `latex`, `org`, `textile`
: `latex`, `org`, `textile`, `html` (environments, `\ref`, and
`\eqref` only)
output formats
: `textile`, `commonmark`

View file

@ -68,11 +68,13 @@ import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Class (PandocMonad (..))
import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps)
import Text.Pandoc.Definition
import Text.Pandoc.Readers.LaTeX (rawLaTeXInline)
import Text.Pandoc.Readers.LaTeX.Types (Macro)
import Text.Pandoc.Error
import Text.Pandoc.Logging
import Text.Pandoc.Options (
Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs,
Ext_native_spans, Ext_raw_html, Ext_line_blocks),
Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex),
ReaderOptions (readerExtensions, readerStripComments),
extensionEnabled)
import Text.Pandoc.Parsing hiding ((<|>))
@ -102,7 +104,8 @@ readHtml opts inp = do
(m:_) -> messageString m
result <- flip runReaderT def $
runParserT parseDoc
(HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty [])
(HTMLState def{ stateOptions = opts }
[] Nothing Set.empty M.empty [] M.empty)
"source" tags
case result of
Right doc -> return doc
@ -124,7 +127,8 @@ data HTMLState =
baseHref :: Maybe URI,
identifiers :: Set.Set String,
headerMap :: M.Map Inlines String,
logMessages :: [LogMessage]
logMessages :: [LogMessage],
macros :: M.Map Text Macro
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@ -907,9 +911,25 @@ pTagContents =
<|> pStr
<|> pSpace
<|> smartPunctuation pTagContents
<|> pRawTeX
<|> pSymbol
<|> pBad
pRawTeX :: PandocMonad m => InlinesParser m Inlines
pRawTeX = do
lookAhead $ try $ do
char '\\'
choice $ map (try . string) ["begin", "eqref", "ref"]
guardEnabled Ext_raw_tex
inp <- getInput
st <- getState
res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp)
case res of
Left _ -> mzero
Right (contents, raw) -> do
_ <- count (length raw) anyChar
return $ B.rawInline "tex" contents
pStr :: PandocMonad m => InlinesParser m Inlines
pStr = do
result <- many1 $ satisfy $ \c ->
@ -923,6 +943,7 @@ isSpecial '\'' = True
isSpecial '.' = True
isSpecial '-' = True
isSpecial '$' = True
isSpecial '\\' = True
isSpecial '\8216' = True
isSpecial '\8217' = True
isSpecial '\8220' = True
@ -1249,6 +1270,10 @@ isSpace _ = False
-- Instances
instance HasMacros HTMLState where
extractMacros = macros
updateMacros f st = st{ macros = f $ macros st }
instance HasIdentifierList HTMLState where
extractIdentifierList = identifiers
updateIdentifierList f s = s{ identifiers = f (identifiers s) }

29
test/command/1126.md Normal file
View file

@ -0,0 +1,29 @@
```
% pandoc -f html -t latex
\begin{eqnarray}
A&=&B,\\
C&=&D
\end{eqnarray}
^D
\textbackslash{}begin\{eqnarray\}
A\&=\&B,\textbackslash{}\textbackslash{} C\&=\&D
\textbackslash{}end\{eqnarray\}
```
```
% pandoc -f html+raw_tex -t latex
<p>See \eqref{myeq}.</p>
\begin{eqnarray}
A&=&B,\\
C&amp;=&amp;D
\\label{myeq}
\end{eqnarray}
^D
See \eqref{myeq}.
\begin{eqnarray}
A&=&B,\\
C&=&D
\\label{myeq}
\end{eqnarray}
```