Modified rules for HTML header identifiers to ensure legal identifiers.

+ Modified htmlListToIdentifier and uniqueIdentifier in HTML writer to ensure that identifiers begin with an alphabetic character. + The new rules are described in README. + Resolves Issue #33. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1150 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-12-21 19:25:54 +00:00 · 2007-12-21 19:25:54 +00:00 · 48f2cc5600
commit 48f2cc5600
parent 0681d1d3e7
2 changed files with 28 additions and 18 deletions
--- a/17
+++ b/17
@ -790,12 +790,19 @@ derive the identifier from the header text,
  - Remove all punctuation, except dashes and hyphens.
  - Replace all spaces, dashes, newlines, and hyphens with hyphens.
  - Convert all alphabetic characters to lowercase.
+  - Remove everything up to the first letter (identifiers may
+    not begin with a number or punctuation mark).
+  - If nothing is left after this, use the identifier `section`.

-Thus, for example, a heading 'Header identifiers in HTML' will get
-the identifier `header-identifiers-in-html`, a heading
-'*Dogs*?--in *my* house?' will get the identifier `dogs--in-my-house`,
-and a heading '[HTML], [S5], or [RTF]?' will get the identifier
-`html-s5-or-rtf`.
+Thus, for example,
+
+  Header                                  Identifier
+  -------------------------------------   ---------------------------
+  Header identifiers in HTML              `header-identifiers-in-html`
+  *Dogs*?--in *my* house?                 `dogs--in-my-house`
+  [HTML], [S5], or [RTF]?                 `html-s5-or-rtf`
+  3. Applications                         `applications`
+  33                                      `section`

 These rules should, in most cases, allow one to determine the identifier
 from the header text. The exception is when several headers have the
--- a/Text/Pandoc/Writers/HTML.hs
+++ b/Text/Pandoc/Writers/HTML.hs
@ -35,7 +35,7 @@ import Text.Pandoc.Shared
 import Text.Pandoc.Readers.TeXMath
 import Text.Regex ( mkRegex, matchRegex )
 import Numeric ( showHex )
-import Data.Char ( ord, toLower )
+import Data.Char ( ord, toLower, isAlpha )
 import Data.List ( isPrefixOf, intersperse )
 import qualified Data.Set as S
 import Control.Monad.State
@ -215,18 +215,20 @@ addToCSS item = do

 -- | Convert Pandoc inline list to plain text identifier.
 inlineListToIdentifier :: [Inline] -> String
-inlineListToIdentifier [] = ""
-inlineListToIdentifier (x:xs) = 
-  xAsText ++ inlineListToIdentifier xs
+inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
+
+inlineListToIdentifier' [] = ""
+inlineListToIdentifier' (x:xs) = 
+  xAsText ++ inlineListToIdentifier' xs
  where xAsText = case x of
          Str s          -> filter (\c -> c == '-' || not (isPunctuation c)) $
                            concat $ intersperse "-" $ words $ map toLower s
-          Emph lst       -> inlineListToIdentifier lst
-          Strikeout lst  -> inlineListToIdentifier lst
-          Superscript lst -> inlineListToIdentifier lst
-          Subscript lst  -> inlineListToIdentifier lst
-          Strong lst     -> inlineListToIdentifier lst
-          Quoted _ lst   -> inlineListToIdentifier lst
+          Emph lst       -> inlineListToIdentifier' lst
+          Strikeout lst  -> inlineListToIdentifier' lst
+          Superscript lst -> inlineListToIdentifier' lst
+          Subscript lst  -> inlineListToIdentifier' lst
+          Strong lst     -> inlineListToIdentifier' lst
+          Quoted _ lst   -> inlineListToIdentifier' lst
          Code s         -> s
          Space          -> "-"
          EmDash         -> "-"
@ -237,8 +239,8 @@ inlineListToIdentifier (x:xs) =
          Math _         -> ""
          TeX _          -> ""
          HtmlInline _   -> ""
-          Link lst _     -> inlineListToIdentifier lst
-          Image lst _    -> inlineListToIdentifier lst
+          Link lst _     -> inlineListToIdentifier' lst
+          Image lst _    -> inlineListToIdentifier' lst
          Note _         -> ""

 -- | Return unique identifiers for list of inline lists.
@ -247,7 +249,8 @@ uniqueIdentifiers ls =
  let addIdentifier (nonuniqueIds, uniqueIds) l =
        let new = inlineListToIdentifier l
            matches = length $ filter (== new) nonuniqueIds
-            new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
+            new' = (if null new then "section" else new) ++ 
+                   if matches > 0 then ("-" ++ show matches) else ""
        in  (new:nonuniqueIds, new':uniqueIds)
  in  reverse $ snd $ foldl addIdentifier ([],[]) ls