From a2c4261b32d3ed3fd6562a5260d25a82d9f75e9b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 24 Aug 2018 18:04:00 -0700 Subject: [PATCH] HTML reader: allow enabling `raw_tex` extension. This now allows raw LaTeX environments, `\ref`, and `\eqref` to be parsed (which is helpful for translation HTML documents using MathJaX). Closes #1126. --- MANUAL.txt | 3 ++- src/Text/Pandoc/Readers/HTML.hs | 31 ++++++++++++++++++++++++++++--- test/command/1126.md | 29 +++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 4 deletions(-) create mode 100644 test/command/1126.md diff --git a/MANUAL.txt b/MANUAL.txt index 7ba9e188e..4c68bfa86 100644 --- a/MANUAL.txt +++ b/MANUAL.txt @@ -1917,7 +1917,8 @@ This extension can be enabled/disabled for the following formats (in addition to `markdown`): input formats -: `latex`, `org`, `textile` +: `latex`, `org`, `textile`, `html` (environments, `\ref`, and + `\eqref` only) output formats : `textile`, `commonmark` diff --git a/src/Text/Pandoc/Readers/HTML.hs b/src/Text/Pandoc/Readers/HTML.hs index 697ab89e9..ea62bfcbf 100644 --- a/src/Text/Pandoc/Readers/HTML.hs +++ b/src/Text/Pandoc/Readers/HTML.hs @@ -68,11 +68,13 @@ import qualified Text.Pandoc.Builder as B import Text.Pandoc.Class (PandocMonad (..)) import Text.Pandoc.CSS (foldOrElse, pickStyleAttrProps) import Text.Pandoc.Definition +import Text.Pandoc.Readers.LaTeX (rawLaTeXInline) +import Text.Pandoc.Readers.LaTeX.Types (Macro) import Text.Pandoc.Error import Text.Pandoc.Logging import Text.Pandoc.Options ( Extension (Ext_epub_html_exts, Ext_empty_paragraphs, Ext_native_divs, - Ext_native_spans, Ext_raw_html, Ext_line_blocks), + Ext_native_spans, Ext_raw_html, Ext_line_blocks, Ext_raw_tex), ReaderOptions (readerExtensions, readerStripComments), extensionEnabled) import Text.Pandoc.Parsing hiding ((<|>)) @@ -102,7 +104,8 @@ readHtml opts inp = do (m:_) -> messageString m result <- flip runReaderT def $ runParserT parseDoc - (HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty []) + (HTMLState def{ stateOptions = opts } + [] Nothing Set.empty M.empty [] M.empty) "source" tags case result of Right doc -> return doc @@ -124,7 +127,8 @@ data HTMLState = baseHref :: Maybe URI, identifiers :: Set.Set String, headerMap :: M.Map Inlines String, - logMessages :: [LogMessage] + logMessages :: [LogMessage], + macros :: M.Map Text Macro } data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext @@ -907,9 +911,25 @@ pTagContents = <|> pStr <|> pSpace <|> smartPunctuation pTagContents + <|> pRawTeX <|> pSymbol <|> pBad +pRawTeX :: PandocMonad m => InlinesParser m Inlines +pRawTeX = do + lookAhead $ try $ do + char '\\' + choice $ map (try . string) ["begin", "eqref", "ref"] + guardEnabled Ext_raw_tex + inp <- getInput + st <- getState + res <- lift $ runParserT (withRaw rawLaTeXInline) st "chunk" (T.unpack inp) + case res of + Left _ -> mzero + Right (contents, raw) -> do + _ <- count (length raw) anyChar + return $ B.rawInline "tex" contents + pStr :: PandocMonad m => InlinesParser m Inlines pStr = do result <- many1 $ satisfy $ \c -> @@ -923,6 +943,7 @@ isSpecial '\'' = True isSpecial '.' = True isSpecial '-' = True isSpecial '$' = True +isSpecial '\\' = True isSpecial '\8216' = True isSpecial '\8217' = True isSpecial '\8220' = True @@ -1249,6 +1270,10 @@ isSpace _ = False -- Instances +instance HasMacros HTMLState where + extractMacros = macros + updateMacros f st = st{ macros = f $ macros st } + instance HasIdentifierList HTMLState where extractIdentifierList = identifiers updateIdentifierList f s = s{ identifiers = f (identifiers s) } diff --git a/test/command/1126.md b/test/command/1126.md new file mode 100644 index 000000000..014a8ae2d --- /dev/null +++ b/test/command/1126.md @@ -0,0 +1,29 @@ +``` +% pandoc -f html -t latex +\begin{eqnarray} +A&=&B,\\ +C&=&D +\end{eqnarray} +^D +\textbackslash{}begin\{eqnarray\} +A\&=\&B,\textbackslash{}\textbackslash{} C\&=\&D +\textbackslash{}end\{eqnarray\} +``` + +``` +% pandoc -f html+raw_tex -t latex +

See \eqref{myeq}.

+\begin{eqnarray} +A&=&B,\\ +C&=&D +\\label{myeq} +\end{eqnarray} +^D +See \eqref{myeq}. + +\begin{eqnarray} +A&=&B,\\ +C&=&D +\\label{myeq} +\end{eqnarray} +```