From 23ab5ffcb0b708e4f07cafd7a877926ce0dc532b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Thu, 10 Jan 2013 20:22:18 -0800 Subject: [PATCH] Implemented Ext_header_attributes. This allows explicit attributes to be put on headers, using a syntax like that for code blocks: {#id .class .class k=v k=v} --- README | 27 +++++++++++++++++++++--- src/Text/Pandoc/Options.hs | 2 ++ src/Text/Pandoc/Readers/Markdown.hs | 32 ++++++++++++++++++----------- 3 files changed, 46 insertions(+), 15 deletions(-) diff --git a/README b/README index e88ce0a57..2b0da269b 100644 --- a/README +++ b/README @@ -872,10 +872,31 @@ wrapping). Consider, for example: ### Header identifiers in HTML, LaTeX, and ConTeXt ### -**Extension: `header_identifiers`** +**Extension: `header_attributes`** -Each header element in pandoc's HTML and ConTeXt output is given a -unique identifier. This identifier is based on the text of the header. +Headers can be assigned attributes using this syntax at the end +of the line containing the header text: + + {#identifier .class .class key=value key=value} + +Although this syntax allows assignment of classes and key/value attributes, +only identifiers currently have any affect in the writers (and only in some +writers: HTML, LaTeX, ConTeXt, Textile, AsciiDoc). Thus, for example, +the following headers will all be assigned the identifier `foo`: + + # My header {#foo} + + ## My header ## {#foo} + + My other header {#foo} + --------------- + +(This syntax is compatible with [PHP Markdown Extra].) + +**Extension: `auto_identifiers`** + +A header without an explicitly specified identifier will be +automatically assigned a unique identifier based on the header text. To derive the identifier from the header text, - Remove all formatting, links, etc. diff --git a/src/Text/Pandoc/Options.hs b/src/Text/Pandoc/Options.hs index e64e7afda..b62187bfe 100644 --- a/src/Text/Pandoc/Options.hs +++ b/src/Text/Pandoc/Options.hs @@ -91,6 +91,7 @@ data Extension = | Ext_literate_haskell -- ^ Enable literate Haskell conventions | Ext_abbreviations -- ^ PHP markdown extra abbreviation definitions | Ext_auto_identifiers -- ^ Automatic identifiers for headers + | Ext_header_attributes -- ^ Explicit header attributes {#id .class k=v} | Ext_implicit_header_references -- ^ Implicit reference links for headers deriving (Show, Read, Enum, Eq, Ord, Bounded) @@ -129,6 +130,7 @@ pandocExtensions = Set.fromList , Ext_superscript , Ext_subscript , Ext_auto_identifiers + , Ext_header_attributes , Ext_implicit_header_references ] diff --git a/src/Text/Pandoc/Readers/Markdown.hs b/src/Text/Pandoc/Readers/Markdown.hs index 5ff196571..37f12c2e0 100644 --- a/src/Text/Pandoc/Readers/Markdown.hs +++ b/src/Text/Pandoc/Readers/Markdown.hs @@ -373,40 +373,48 @@ header :: MarkdownParser (F Blocks) header = setextHeader <|> atxHeader "header" -- returns unique identifier -addToHeaderList :: F Inlines -> MarkdownParser String -addToHeaderList text = do +addToHeaderList :: Attr -> F Inlines -> MarkdownParser Attr +addToHeaderList (ident,classes,kvs) text = do let headerList = B.toList $ runF text defaultParserState updateState $ \st -> st{ stateHeaders = headerList : stateHeaders st } (do guardEnabled Ext_auto_identifiers ids <- stateIdentifiers `fmap` getState - let id' = uniqueIdent headerList ids + let id' = if null ident + then uniqueIdent headerList ids + else ident updateState $ \st -> st{ stateIdentifiers = id' : ids } - return id') <|> return "" + return (id',classes,kvs)) <|> return ("",classes,kvs) atxHeader :: MarkdownParser (F Blocks) atxHeader = try $ do level <- many1 (char '#') >>= return . length notFollowedBy (char '.' <|> char ')') -- this would be a list skipSpaces - text <- trimInlinesF . mconcat <$> manyTill inline atxClosing - id' <- addToHeaderList text - return $ B.headerWith (id',[],[]) level <$> text + text <- trimInlinesF . mconcat <$> many (notFollowedBy atxClosing >> inline) + attr <- atxClosing + attr' <- addToHeaderList attr text + return $ B.headerWith attr' level <$> text -atxClosing :: Parser [Char] st String -atxClosing = try $ skipMany (char '#') >> blanklines +atxClosing :: Parser [Char] st Attr +atxClosing = + try $ skipMany (char '#') >> skipSpaces >> option nullAttr attributes <* blanklines + +setextHeaderEnd :: MarkdownParser Attr +setextHeaderEnd = try $ option nullAttr attributes <* blankline setextHeader :: MarkdownParser (F Blocks) setextHeader = try $ do -- This lookahead prevents us from wasting time parsing Inlines -- unless necessary -- it gives a significant performance boost. lookAhead $ anyLine >> many1 (oneOf setextHChars) >> blankline - text <- trimInlinesF . mconcat <$> many1Till inline newline + text <- trimInlinesF . mconcat <$> many1 (notFollowedBy setextHeaderEnd >> inline) + attr <- setextHeaderEnd underlineChar <- oneOf setextHChars many (char underlineChar) blanklines let level = (fromMaybe 0 $ findIndex (== underlineChar) setextHChars) + 1 - id' <- addToHeaderList text - return $ B.headerWith (id',[],[]) level <$> text + attr' <- addToHeaderList attr text + return $ B.headerWith attr' level <$> text -- -- hrule block