HTML writer: wrap sections in divs. Resolves Issue #70.

+ hierarchicalize has been rationalized; it builds a hierarchical
  representation of the document from the headers, and simultaneously
  gives each section a unique identifier based on the heading title.
+ Identifiers are now attached to the divs rather than
  to the headers themselves.
+ Table of content backlinks go to the beginning of the table, rather
  than to the section reference that was clicked.  This seems better.
+ Code for constructing identifiers has been moved to Text.Pandoc.Shared
  from the HTML writer, since it is now consumed only by
  hierarchicalize.
+ In --strict mode, pandoc just prints bare headings, as before
  (unless --toc has been specified).
+ In s5 output, it does not wrap sections in divs, as that seems to
  confuse the s5 javascript.
+ Test suite updated accordingly.

git-svn-id: https://pandoc.googlecode.com/svn/trunk@1562 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2009-04-25 00:29:58 +00:00
parent 2ab7640df6
commit df5244fd48
10 changed files with 1212 additions and 1151 deletions

View file

@ -1,4 +1,4 @@
{-# LANGUAGE CPP #-}
{-# LANGUAGE CPP, DeriveDataTypeable #-}
{-
Copyright (C) 2006-8 John MacFarlane <jgm@berkeley.edu>
@ -112,13 +112,16 @@ import Text.ParserCombinators.Parsec
import Text.PrettyPrint.HughesPJ ( Doc, fsep, ($$), (<>), empty, isEmpty, text, nest )
import qualified Text.PrettyPrint.HughesPJ as PP
import Text.Pandoc.CharacterReferences ( characterReference )
import Data.Char ( toLower, toUpper, ord, isLower, isUpper )
import Data.Char ( toLower, toUpper, ord, isLower, isUpper, isAlpha,
isPunctuation )
import Data.List ( find, isPrefixOf, intercalate )
import Control.Monad ( join )
import Network.URI ( parseURI, URI (..), isAllowedInURI )
import System.Directory
import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
import System.IO.UTF8
import Data.Generics
import qualified Control.Monad.State as S
import Control.Monad (join)
--
-- List processing
@ -878,22 +881,74 @@ endsWithPlain blocks =
-- | Data structure for defining hierarchical Pandoc documents
data Element = Blk Block
| Sec [Inline] [Element] deriving (Eq, Read, Show)
| Sec Int String [Inline] [Element]
-- lvl ident label contents
deriving (Eq, Read, Show, Typeable, Data)
-- | Returns @True@ on Header block with at least the specified level
headerAtLeast :: Int -> Block -> Bool
headerAtLeast level (Header x _) = x <= level
headerAtLeast _ _ = False
-- | Convert Pandoc inline list to plain text identifier.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
inlineListToIdentifier' :: [Inline] -> [Char]
inlineListToIdentifier' [] = ""
inlineListToIdentifier' (x:xs) =
xAsText ++ inlineListToIdentifier' xs
where xAsText = case x of
Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
intercalate "-" $ words $ map toLower s
Emph lst -> inlineListToIdentifier' lst
Strikeout lst -> inlineListToIdentifier' lst
Superscript lst -> inlineListToIdentifier' lst
SmallCaps lst -> inlineListToIdentifier' lst
Subscript lst -> inlineListToIdentifier' lst
Strong lst -> inlineListToIdentifier' lst
Quoted _ lst -> inlineListToIdentifier' lst
Cite _ lst -> inlineListToIdentifier' lst
Code s -> s
Space -> "-"
EmDash -> "-"
EnDash -> "-"
Apostrophe -> ""
Ellipses -> ""
LineBreak -> "-"
Math _ _ -> ""
TeX _ -> ""
HtmlInline _ -> ""
Link lst _ -> inlineListToIdentifier' lst
Image lst _ -> inlineListToIdentifier' lst
Note _ -> ""
-- | Convert list of Pandoc blocks into (hierarchical) list of Elements
hierarchicalize :: [Block] -> [Element]
hierarchicalize [] = []
hierarchicalize (block:rest) =
case block of
(Header level title) ->
let (thisSection, rest') = break (headerAtLeast level) rest
in (Sec title (hierarchicalize thisSection)):(hierarchicalize rest')
x -> (Blk x):(hierarchicalize rest)
hierarchicalize blocks = S.evalState (hierarchicalizeWithIds blocks) []
hierarchicalizeWithIds :: [Block] -> S.State [String] [Element]
hierarchicalizeWithIds [] = return []
hierarchicalizeWithIds ((Header level title'):xs) = do
usedIdents <- S.get
let ident = uniqueIdent title' usedIdents
S.modify (ident :)
let (sectionContents, rest) = break (headerLtEq level) xs
sectionContents' <- hierarchicalizeWithIds sectionContents
rest' <- hierarchicalizeWithIds rest
return $ Sec level ident title' sectionContents' : rest'
hierarchicalizeWithIds (x:rest) = do
rest' <- hierarchicalizeWithIds rest
return $ (Blk x) : rest'
headerLtEq :: Int -> Block -> Bool
headerLtEq level (Header l _) = l <= level
headerLtEq _ _ = False
uniqueIdent :: [Inline] -> [String] -> String
uniqueIdent title' usedIdents =
let baseIdent = inlineListToIdentifier title'
numIdent n = baseIdent ++ "-" ++ show n
in if baseIdent `elem` usedIdents
then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of
Just x -> numIdent x
Nothing -> baseIdent -- if we have more than 60,000, allow repeats
else baseIdent
-- | True if block is a Header block.
isHeaderBlock :: Block -> Bool

View file

@ -80,7 +80,7 @@ writeDocbook opts (Pandoc (Meta title authors date) blocks) =
-- | Convert an Element to Docbook.
elementToDocbook :: WriterOptions -> Element -> Doc
elementToDocbook opts (Blk block) = blockToDocbook opts block
elementToDocbook opts (Sec title elements) =
elementToDocbook opts (Sec _ _ title elements) =
-- Docbook doesn't allow sections with no content, so insert some if needed
let elements' = if null elements
then [Blk (Para [])]

View file

@ -36,22 +36,21 @@ import Text.Pandoc.Shared
import Text.Pandoc.Readers.TeXMath
import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss )
import Numeric ( showHex )
import Data.Char ( ord, toLower, isAlpha )
import Data.Char ( ord, toLower )
import Data.List ( isPrefixOf, intercalate )
import Data.Maybe ( catMaybes )
import qualified Data.Set as S
import Control.Monad.State
import Text.XHtml.Transitional hiding ( stringToHtml )
data WriterState = WriterState
{ stNotes :: [Html] -- ^ List of notes
, stIds :: [String] -- ^ List of header identifiers
, stMath :: Bool -- ^ Math is used in document
, stCSS :: S.Set String -- ^ CSS to include in header
} deriving Show
defaultWriterState :: WriterState
defaultWriterState = WriterState {stNotes= [], stIds = [],
stMath = False, stCSS = S.empty}
defaultWriterState = WriterState {stNotes= [], stMath = False, stCSS = S.empty}
-- Helpers to render HTML with the appropriate function.
@ -107,15 +106,13 @@ writeHtml opts (Pandoc (Meta tit authors date) blocks) =
not (writerS5 opts)
then h1 ! [theclass "title"] $ topTitle
else noHtml
headerBlocks = filter isHeaderBlock blocks
ids = uniqueIdentifiers $
map (\(Header _ lst) -> lst) headerBlocks
sects = hierarchicalize blocks
toc = if writerTableOfContents opts
then tableOfContents opts headerBlocks ids
then evalState (tableOfContents opts sects) defaultWriterState
else noHtml
(blocks', newstate) =
runState (blockListToHtml opts blocks)
(defaultWriterState {stIds = ids})
(blocks', newstate) = runState
(mapM (elementToHtml opts) sects >>= return . toHtmlFromList)
defaultWriterState
cssLines = stCSS newstate
css = if S.null cssLines
then noHtml
@ -146,35 +143,36 @@ writeHtml opts (Pandoc (Meta tit authors date) blocks) =
then head' +++ body thebody
else thebody
-- | Construct table of contents from list of header blocks and identifiers.
-- Assumes there are as many identifiers as header blocks.
tableOfContents :: WriterOptions -> [Block] -> [String] -> Html
tableOfContents _ [] _ = noHtml
tableOfContents opts headers ids =
-- | Construct table of contents from list of elements.
tableOfContents :: WriterOptions -> [Element] -> State WriterState Html
tableOfContents _ [] = return noHtml
tableOfContents opts sects = do
let opts' = opts { writerIgnoreNotes = True }
contentsTree = hierarchicalize headers
contents = evalState (mapM (elementToListItem opts') contentsTree)
(defaultWriterState {stIds = ids})
in thediv ! [identifier "toc"] $ unordList contents
contents <- mapM (elementToListItem opts') sects
return $ thediv ! [identifier "TOC"] $ unordList $ catMaybes contents
-- | Converts an Element to a list item for a table of contents,
-- retrieving the appropriate identifier from state.
elementToListItem :: WriterOptions -> Element -> State WriterState Html
elementToListItem _ (Blk _) = return noHtml
elementToListItem opts (Sec headerText subsecs) = do
st <- get
let ids = stIds st
let (id', rest) = if null ids
then ("", [])
else (head ids, tail ids)
put $ st {stIds = rest}
elementToListItem :: WriterOptions -> Element -> State WriterState (Maybe Html)
elementToListItem _ (Blk _) = return Nothing
elementToListItem opts (Sec _ id' headerText subsecs) = do
txt <- inlineListToHtml opts headerText
subHeads <- mapM (elementToListItem opts) subsecs
subHeads <- mapM (elementToListItem opts) subsecs >>= return . catMaybes
let subList = if null subHeads
then noHtml
else unordList subHeads
return $ (anchor ! [href ("#" ++ id'), identifier ("TOC-" ++ id')] $ txt) +++
subList
else unordList subHeads
return $ Just $ (anchor ! [href ("#" ++ id')] $ txt) +++ subList
-- | Convert an Element to Html.
elementToHtml :: WriterOptions -> Element -> State WriterState Html
elementToHtml opts (Blk block) = blockToHtml opts block
elementToHtml opts (Sec level id' title' elements) = do
innerContents <- mapM (elementToHtml opts) elements
header' <- blockToHtml opts (Header level title')
return $ if writerS5 opts || (writerStrictMarkdown opts && not (writerTableOfContents opts))
-- S5 gets confused by the extra divs around sections
then toHtmlFromList (header' : innerContents)
else thediv ! [identifier id'] << (header' : innerContents)
-- | Convert list of Note blocks to a footnote <div>.
-- Assumes notes are sorted.
@ -236,15 +234,6 @@ obfuscateChar char =
obfuscateString :: String -> String
obfuscateString = concatMap obfuscateChar . decodeCharacterReferences
-- | True if character is a punctuation character (unicode).
isPunctuation :: Char -> Bool
isPunctuation c =
let c' = ord c
in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F ||
c' >= 0xE000 && c' <= 0xE0FF
then True
else False
-- | Add CSS for document header.
addToCSS :: String -> State WriterState ()
addToCSS item = do
@ -252,50 +241,6 @@ addToCSS item = do
let current = stCSS st
put $ st {stCSS = S.insert item current}
-- | Convert Pandoc inline list to plain text identifier.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
inlineListToIdentifier' :: [Inline] -> [Char]
inlineListToIdentifier' [] = ""
inlineListToIdentifier' (x:xs) =
xAsText ++ inlineListToIdentifier' xs
where xAsText = case x of
Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
intercalate "-" $ words $ map toLower s
Emph lst -> inlineListToIdentifier' lst
Strikeout lst -> inlineListToIdentifier' lst
Superscript lst -> inlineListToIdentifier' lst
SmallCaps lst -> inlineListToIdentifier' lst
Subscript lst -> inlineListToIdentifier' lst
Strong lst -> inlineListToIdentifier' lst
Quoted _ lst -> inlineListToIdentifier' lst
Cite _ lst -> inlineListToIdentifier' lst
Code s -> s
Space -> "-"
EmDash -> "-"
EnDash -> "-"
Apostrophe -> ""
Ellipses -> ""
LineBreak -> "-"
Math _ _ -> ""
TeX _ -> ""
HtmlInline _ -> ""
Link lst _ -> inlineListToIdentifier' lst
Image lst _ -> inlineListToIdentifier' lst
Note _ -> ""
-- | Return unique identifiers for list of inline lists.
uniqueIdentifiers :: [[Inline]] -> [String]
uniqueIdentifiers ls =
let addIdentifier (nonuniqueIds, uniqueIds) l =
let new = inlineListToIdentifier l
matches = length $ filter (== new) nonuniqueIds
new' = (if null new then "section" else new) ++
if matches > 0 then ("-" ++ show matches) else ""
in (new:nonuniqueIds, new':uniqueIds)
in reverse $ snd $ foldl addIdentifier ([],[]) ls
-- | Convert Pandoc block element to HTML.
blockToHtml :: WriterOptions -> Block -> State WriterState Html
blockToHtml _ Null = return $ noHtml
@ -335,26 +280,17 @@ blockToHtml opts (BlockQuote blocks) =
else blockListToHtml opts blocks >>= (return . blockquote)
blockToHtml opts (Header level lst) = do
contents <- inlineListToHtml opts lst
st <- get
let ids = stIds st
let (id', rest) = if null ids
then ("", [])
else (head ids, tail ids)
put $ st {stIds = rest}
let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts)
then []
else [identifier id']
let contents' = if writerTableOfContents opts
then anchor ! [href ("#TOC-" ++ id')] $ contents
then anchor ! [href "#TOC"] $ contents
else contents
return $ case level of
1 -> h1 contents' ! attribs
2 -> h2 contents' ! attribs
3 -> h3 contents' ! attribs
4 -> h4 contents' ! attribs
5 -> h5 contents' ! attribs
6 -> h6 contents' ! attribs
_ -> paragraph contents' ! attribs
1 -> h1 contents'
2 -> h2 contents'
3 -> h3 contents'
4 -> h4 contents'
5 -> h5 contents'
6 -> h6 contents'
_ -> paragraph contents'
blockToHtml opts (BulletList lst) = do
contents <- mapM (blockListToHtml opts) lst
let attribs = if writerIncremental opts
@ -492,7 +428,7 @@ inlineToHtml opts inline =
return $ primHtml $ "<EQ>" ++ str ++ "</EQ>"
PlainMath ->
inlineListToHtml opts (readTeXMath str) >>=
return . (thespan ! [theclass "math"]))
return . (thespan ! [theclass "math"]) )
(TeX str) -> case writerHTMLMathMethod opts of
LaTeXMathML _ -> do modify (\st -> st {stMath = True})
return $ primHtml str

View file

@ -138,7 +138,7 @@ tableOfContents opts headers =
-- | Converts an Element to a list item for a table of contents,
elementToListItem :: Element -> [Block]
elementToListItem (Blk _) = []
elementToListItem (Sec headerText subsecs) = [Plain headerText] ++
elementToListItem (Sec _ _ headerText subsecs) = [Plain headerText] ++
if null subsecs
then []
else [BulletList $ map elementToListItem subsecs]

View file

@ -59,7 +59,7 @@ tableOfContents headers =
elementToListItem :: Element -> [Block]
elementToListItem (Blk _) = []
elementToListItem (Sec sectext subsecs) = [Plain sectext] ++
elementToListItem (Sec _ _ sectext subsecs) = [Plain sectext] ++
if null subsecs
then []
else [BulletList (map elementToListItem subsecs)]

View file

@ -276,24 +276,24 @@ window.onload=startup;window.onresize=function(){setTimeout('fontScale()',50);}
<div id="currentSlide"></div>
<div id="header"></div>
<div id="footer">
<h1 id="july-15-2006"
<h1
>July 15, 2006</h1
><h2 id="my-s5-document"
><h2
>My S5 Document</h2
></div>
</div>
<div class="presentation">
<div class="slide">
<h1 id="my-s5-document-1"
<h1
>My S5 Document</h1
><h3 id="sam-smith-jen-jones"
><h3
>Sam Smith, Jen Jones</h3
><h4 id="july-15-2006-1"
><h4
>July 15, 2006</h4
></div>
<div class="slide">
<h1 id="first-slide"
<h1
>First slide</h1
><ul
><li
@ -303,7 +303,7 @@ window.onload=startup;window.onresize=function(){setTimeout('fontScale()',50);}
></ul
></div>
<div class="slide">
<h1 id="math"
<h1
>Math</h1
><ul
><li

View file

@ -484,24 +484,24 @@ window.onload=startup;window.onresize=function(){setTimeout('fontScale()',50);}
<div id="currentSlide"></div>
<div id="header"></div>
<div id="footer">
<h1 id="july-15-2006"
<h1
>July 15, 2006</h1
><h2 id="my-s5-document"
><h2
>My S5 Document</h2
></div>
</div>
<div class="presentation">
<div class="slide">
<h1 id="my-s5-document-1"
<h1
>My S5 Document</h1
><h3 id="sam-smith-jen-jones"
><h3
>Sam Smith, Jen Jones</h3
><h4 id="july-15-2006-1"
><h4
>July 15, 2006</h4
></div>
<div class="slide">
<h1 id="first-slide"
<h1
>First slide</h1
><ul class="incremental"
><li
@ -511,7 +511,7 @@ window.onload=startup;window.onresize=function(){setTimeout('fontScale()',50);}
></ul
></div>
<div class="slide">
<h1 id="math"
<h1
>Math</h1
><ul class="incremental"
><li

View file

@ -1,41 +1,45 @@
<h1 id="first-slide"
>First slide</h1
><ul
><li
>first bullet</li
<div id="first-slide"
><h1
>First slide</h1
><ul
><li
>second bullet</li
></ul
><h1 id="math"
>Math</h1
><ul
><li
><span class="math"
>\frac{<em
>d</em
>}{<em
>dx</em
>}<em
>f</em
>(<em
>x</em
>)=\lim<sub
><em
>h</em
>&#8201;&#8594;&#8201;0</sub
>first bullet</li
><li
>second bullet</li
></ul
></div
><div id="math"
><h1
>Math</h1
><ul
><li
><span class="math"
>\frac{<em
>f</em
>(<em
>x</em
>+<em
>h</em
>)-<em
>f</em
>(<em
>x</em
>)}{<em
>h</em
>}</span
></li
></ul
>d</em
>}{<em
>dx</em
>}<em
>f</em
>(<em
>x</em
>)=\lim<sub
><em
>h</em
>&#8201;&#8594;&#8201;0</sub
>\frac{<em
>f</em
>(<em
>x</em
>+<em
>h</em
>)-<em
>f</em
>(<em
>x</em
>)}{<em
>h</em
>}</span
></li
></ul
></div
>

View file

@ -15,46 +15,50 @@ STUFF INSERTED
>STUFF INSERTED
<h1 class="title"
>My S5 Document</h1
><h1 id="first-slide"
>First slide</h1
><ul
><li
>first bullet</li
><div id="first-slide"
><h1
>First slide</h1
><ul
><li
>second bullet</li
></ul
><h1 id="math"
>Math</h1
><ul
><li
><span class="math"
>\frac{<em
>d</em
>}{<em
>dx</em
>}<em
>f</em
>(<em
>x</em
>)=\lim<sub
><em
>h</em
>&#8201;&#8594;&#8201;0</sub
>first bullet</li
><li
>second bullet</li
></ul
></div
><div id="math"
><h1
>Math</h1
><ul
><li
><span class="math"
>\frac{<em
>f</em
>(<em
>x</em
>+<em
>h</em
>)-<em
>f</em
>(<em
>x</em
>)}{<em
>h</em
>}</span
></li
></ul
>d</em
>}{<em
>dx</em
>}<em
>f</em
>(<em
>x</em
>)=\lim<sub
><em
>h</em
>&#8201;&#8594;&#8201;0</sub
>\frac{<em
>f</em
>(<em
>x</em
>+<em
>h</em
>)-<em
>f</em
>(<em
>x</em
>)}{<em
>h</em
>}</span
></li
></ul
></div
>STUFF INSERTED
</body
></html

File diff suppressed because it is too large Load diff