Issue warning for duplicate header identifiers.

As noted in the previous commit, an autogenerated identifier
may still coincide with an explicit identifier that is given
for a header later in the document, or with an identifier on
a div, span, link, or image. This commit adds a warning
in this case, so users can supply an explicit identifier.

* Added `DuplicateIdentifier` to LogMessage.
* Modified HTML, Org, MediaWiki readers so their custom
  state type is an instance of HasLogMessages.  This is necessary
  for `registerHeader` to issue warnings.

See #1745.
This commit is contained in:
John MacFarlane 2017-03-12 22:03:10 +01:00
parent c8b906256d
commit 2f8f8f0da6
6 changed files with 46 additions and 8 deletions

View file

@ -62,6 +62,7 @@ data LogMessage =
| CouldNotParseYamlMetadata String SourcePos
| DuplicateLinkReference String SourcePos
| DuplicateNoteReference String SourcePos
| DuplicateIdentifier String SourcePos
| ReferenceNotFound String SourcePos
| CircularReference String SourcePos
| ParsingUnescaped String SourcePos
@ -106,6 +107,11 @@ instance ToJSON LogMessage where
"source" .= Text.pack (sourceName pos),
"line" .= toJSON (sourceLine pos),
"column" .= toJSON (sourceColumn pos)]
DuplicateIdentifier s pos ->
["contents" .= Text.pack s,
"source" .= Text.pack (sourceName pos),
"line" .= toJSON (sourceLine pos),
"column" .= toJSON (sourceColumn pos)]
ReferenceNotFound s pos ->
["contents" .= Text.pack s,
"source" .= Text.pack (sourceName pos),
@ -184,6 +190,8 @@ showLogMessage msg =
"Duplicate link reference '" ++ s ++ "' at " ++ showPos pos
DuplicateNoteReference s pos ->
"Duplicate note reference '" ++ s ++ "' at " ++ showPos pos
DuplicateIdentifier s pos ->
"Duplicate identifier '" ++ s ++ "' at " ++ showPos pos
ReferenceNotFound s pos ->
"Reference not found for '" ++ s ++ "' at " ++ showPos pos
CircularReference s pos ->
@ -233,6 +241,7 @@ messageVerbosity msg =
CouldNotParseYamlMetadata{} -> WARNING
DuplicateLinkReference{} -> WARNING
DuplicateNoteReference{} -> WARNING
DuplicateIdentifier{} -> WARNING
ReferenceNotFound{} -> WARNING
CircularReference{} -> WARNING
CouldNotLoadIncludeFile{} -> WARNING
@ -249,4 +258,4 @@ messageVerbosity msg =
CouldNotParseCSS{} -> WARNING
Fetching{} -> INFO
NoTitleElement{} -> WARNING
NoLangSpecified -> INFO
NoLangSpecified -> INFO

View file

@ -1112,8 +1112,11 @@ type SubstTable = M.Map Key Inlines
-- with its associated identifier. If the identifier is null
-- and the auto_identifers extension is set, generate a new
-- unique identifier, and update the list of identifiers
-- in state.
registerHeader :: (Stream s m a, HasReaderOptions st, HasHeaderMap st, HasIdentifierList st)
-- in state. Issue a warning if an explicit identifier
-- is encountered that duplicates an earlier identifier
-- (explict or automatically generated).
registerHeader :: (Stream s m a, HasReaderOptions st,
HasHeaderMap st, HasLogMessages st, HasIdentifierList st)
=> Attr -> Inlines -> ParserT s st m Attr
registerHeader (ident,classes,kvs) header' = do
ids <- extractIdentifierList <$> getState
@ -1131,6 +1134,9 @@ registerHeader (ident,classes,kvs) header' = do
return (id'',classes,kvs)
else do
unless (null ident) $ do
when (ident `Set.member` ids) $ do
pos <- getPosition
logMessage $ DuplicateIdentifier ident pos
updateState $ updateIdentifierList $ Set.insert ident
updateState $ updateHeaderMap $ insert' header' ident
return (ident,classes,kvs)

View file

@ -83,14 +83,15 @@ readHtml opts inp = do
blocks <- (fixPlains False) . mconcat <$> manyTill block eof
meta <- stateMeta . parserState <$> getState
bs' <- replaceNotes (B.toList blocks)
reportLogMessages
return $ Pandoc meta bs'
getError (errorMessages -> ms) = case ms of
[] -> ""
(m:_) -> messageString m
result <- flip runReaderT def $
runParserT parseDoc
(HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty)
"source" tags
runParserT parseDoc
(HTMLState def{ stateOptions = opts } [] Nothing Set.empty M.empty [])
"source" tags
case result of
Right doc -> return doc
Left err -> throwError $ PandocParseError $ getError err
@ -110,7 +111,8 @@ data HTMLState =
noteTable :: [(String, Blocks)],
baseHref :: Maybe URI,
identifiers :: Set.Set String,
headerMap :: M.Map Inlines String
headerMap :: M.Map Inlines String,
logMessages :: [LogMessage]
}
data HTMLLocal = HTMLLocal { quoteContext :: QuoteContext
@ -376,7 +378,7 @@ ignore raw = do
-- raw can be null for tags like <!DOCTYPE>; see paRawTag
-- in this case we don't want a warning:
unless (null raw) $
report $ SkippedContent raw pos
logMessage $ SkippedContent raw pos
return mempty
pHtmlBlock :: PandocMonad m => String -> TagParser m String
@ -1092,6 +1094,10 @@ instance HasHeaderMap HTMLState where
extractHeaderMap = headerMap
updateHeaderMap f s = s{ headerMap = f (headerMap s) }
instance HasLogMessages HTMLState where
addLogMessage m s = s{ logMessages = m : logMessages s }
getLogMessages = reverse . logMessages
-- This signature should be more general
-- MonadReader HTMLLocal m => HasQuoteContext st m
instance PandocMonad m => HasQuoteContext HTMLState (ReaderT HTMLLocal m) where

View file

@ -73,6 +73,7 @@ readMediaWiki opts s = do
, mwCategoryLinks = []
, mwHeaderMap = M.empty
, mwIdentifierList = Set.empty
, mwLogMessages = []
}
(s ++ "\n")
case parsed of
@ -85,6 +86,7 @@ data MWState = MWState { mwOptions :: ReaderOptions
, mwCategoryLinks :: [Inlines]
, mwHeaderMap :: M.Map Inlines String
, mwIdentifierList :: Set.Set String
, mwLogMessages :: [LogMessage]
}
type MWParser m = ParserT [Char] MWState m
@ -100,6 +102,10 @@ instance HasIdentifierList MWState where
extractIdentifierList = mwIdentifierList
updateIdentifierList f st = st{ mwIdentifierList = f $ mwIdentifierList st }
instance HasLogMessages MWState where
addLogMessage m s = s{ mwLogMessages = m : mwLogMessages s }
getLogMessages = reverse . mwLogMessages
--
-- auxiliary functions
--
@ -187,6 +193,7 @@ parseMediaWiki = do
let categories = if null categoryLinks
then mempty
else B.para $ mconcat $ intersperse B.space categoryLinks
reportLogMessages
return $ B.doc $ bs <> categories
--

View file

@ -35,6 +35,7 @@ import Text.Pandoc.Class (PandocMonad)
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Options
import Text.Pandoc.Parsing (reportLogMessages)
import Control.Monad.Except (throwError)
import Control.Monad.Reader (runReaderT)
@ -59,4 +60,5 @@ parseOrg :: PandocMonad m => OrgParser m Pandoc
parseOrg = do
blocks' <- blockList
meta' <- meta
reportLogMessages
return $ Pandoc meta' blocks'

View file

@ -60,7 +60,9 @@ import qualified Data.Set as Set
import Text.Pandoc.Builder (Blocks, Inlines, trimInlines)
import Text.Pandoc.Definition (Meta (..), nullMeta)
import Text.Pandoc.Options (ReaderOptions (..))
import Text.Pandoc.Logging
import Text.Pandoc.Parsing (HasHeaderMap (..), HasIdentifierList (..),
HasLogMessages (..),
HasLastStrPosition (..), HasQuoteContext (..),
HasReaderOptions (..), ParserContext (..),
QuoteContext (..), SourcePos)
@ -104,6 +106,7 @@ data OrgParserState = OrgParserState
, orgStateOptions :: ReaderOptions
, orgStateParserContext :: ParserContext
, orgStateTodoSequences :: [TodoSequence]
, orgLogMessages :: [LogMessage]
}
data OrgParserLocal = OrgParserLocal { orgLocalQuoteContext :: QuoteContext }
@ -130,6 +133,10 @@ instance HasHeaderMap OrgParserState where
extractHeaderMap = orgStateHeaderMap
updateHeaderMap f s = s{ orgStateHeaderMap = f (orgStateHeaderMap s) }
instance HasLogMessages OrgParserState where
addLogMessage msg st = st{ orgLogMessages = msg : orgLogMessages st }
getLogMessages st = reverse $ orgLogMessages st
instance Default OrgParserState where
def = defaultOrgParserState
@ -150,6 +157,7 @@ defaultOrgParserState = OrgParserState
, orgStateOptions = def
, orgStateParserContext = NullState
, orgStateTodoSequences = []
, orgLogMessages = []
}
optionsToParserState :: ReaderOptions -> OrgParserState