From 48f2cc5600bd26c60ffa1d5531ba2d9aeead129d Mon Sep 17 00:00:00 2001 From: fiddlosopher <fiddlosopher@788f1e2b-df1e-0410-8736-df70ead52e1b> Date: Fri, 21 Dec 2007 19:25:54 +0000 Subject: [PATCH] Modified rules for HTML header identifiers to ensure legal identifiers. + Modified htmlListToIdentifier and uniqueIdentifier in HTML writer to ensure that identifiers begin with an alphabetic character. + The new rules are described in README. + Resolves Issue #33. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1150 788f1e2b-df1e-0410-8736-df70ead52e1b --- README | 17 ++++++++++++----- Text/Pandoc/Writers/HTML.hs | 29 ++++++++++++++++------------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/README b/README index a8fd9e673..cd6b30cda 100644 --- a/README +++ b/README @@ -790,12 +790,19 @@ derive the identifier from the header text, - Remove all punctuation, except dashes and hyphens. - Replace all spaces, dashes, newlines, and hyphens with hyphens. - Convert all alphabetic characters to lowercase. + - Remove everything up to the first letter (identifiers may + not begin with a number or punctuation mark). + - If nothing is left after this, use the identifier `section`. -Thus, for example, a heading 'Header identifiers in HTML' will get -the identifier `header-identifiers-in-html`, a heading -'*Dogs*?--in *my* house?' will get the identifier `dogs--in-my-house`, -and a heading '[HTML], [S5], or [RTF]?' will get the identifier -`html-s5-or-rtf`. +Thus, for example, + + Header Identifier + ------------------------------------- --------------------------- + Header identifiers in HTML `header-identifiers-in-html` + *Dogs*?--in *my* house? `dogs--in-my-house` + [HTML], [S5], or [RTF]? `html-s5-or-rtf` + 3. Applications `applications` + 33 `section` These rules should, in most cases, allow one to determine the identifier from the header text. The exception is when several headers have the diff --git a/Text/Pandoc/Writers/HTML.hs b/Text/Pandoc/Writers/HTML.hs index 660bf652e..0061420d0 100644 --- a/Text/Pandoc/Writers/HTML.hs +++ b/Text/Pandoc/Writers/HTML.hs @@ -35,7 +35,7 @@ import Text.Pandoc.Shared import Text.Pandoc.Readers.TeXMath import Text.Regex ( mkRegex, matchRegex ) import Numeric ( showHex ) -import Data.Char ( ord, toLower ) +import Data.Char ( ord, toLower, isAlpha ) import Data.List ( isPrefixOf, intersperse ) import qualified Data.Set as S import Control.Monad.State @@ -215,18 +215,20 @@ addToCSS item = do -- | Convert Pandoc inline list to plain text identifier. inlineListToIdentifier :: [Inline] -> String -inlineListToIdentifier [] = "" -inlineListToIdentifier (x:xs) = - xAsText ++ inlineListToIdentifier xs +inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier' + +inlineListToIdentifier' [] = "" +inlineListToIdentifier' (x:xs) = + xAsText ++ inlineListToIdentifier' xs where xAsText = case x of Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ concat $ intersperse "-" $ words $ map toLower s - Emph lst -> inlineListToIdentifier lst - Strikeout lst -> inlineListToIdentifier lst - Superscript lst -> inlineListToIdentifier lst - Subscript lst -> inlineListToIdentifier lst - Strong lst -> inlineListToIdentifier lst - Quoted _ lst -> inlineListToIdentifier lst + Emph lst -> inlineListToIdentifier' lst + Strikeout lst -> inlineListToIdentifier' lst + Superscript lst -> inlineListToIdentifier' lst + Subscript lst -> inlineListToIdentifier' lst + Strong lst -> inlineListToIdentifier' lst + Quoted _ lst -> inlineListToIdentifier' lst Code s -> s Space -> "-" EmDash -> "-" @@ -237,8 +239,8 @@ inlineListToIdentifier (x:xs) = Math _ -> "" TeX _ -> "" HtmlInline _ -> "" - Link lst _ -> inlineListToIdentifier lst - Image lst _ -> inlineListToIdentifier lst + Link lst _ -> inlineListToIdentifier' lst + Image lst _ -> inlineListToIdentifier' lst Note _ -> "" -- | Return unique identifiers for list of inline lists. @@ -247,7 +249,8 @@ uniqueIdentifiers ls = let addIdentifier (nonuniqueIds, uniqueIds) l = let new = inlineListToIdentifier l matches = length $ filter (== new) nonuniqueIds - new' = new ++ if matches > 0 then ("-" ++ show matches) else "" + new' = (if null new then "section" else new) ++ + if matches > 0 then ("-" ++ show matches) else "" in (new:nonuniqueIds, new':uniqueIds) in reverse $ snd $ foldl addIdentifier ([],[]) ls