Modified rules for HTML header identifiers to ensure legal identifiers.

+ Modified htmlListToIdentifier and uniqueIdentifier in HTML writer
  to ensure that identifiers begin with an alphabetic character.
+ The new rules are described in README.
+ Resolves Issue #33.


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1150 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2007-12-21 19:25:54 +00:00
parent 0681d1d3e7
commit 48f2cc5600
2 changed files with 28 additions and 18 deletions

17
README
View file

@ -790,12 +790,19 @@ derive the identifier from the header text,
- Remove all punctuation, except dashes and hyphens.
- Replace all spaces, dashes, newlines, and hyphens with hyphens.
- Convert all alphabetic characters to lowercase.
- Remove everything up to the first letter (identifiers may
not begin with a number or punctuation mark).
- If nothing is left after this, use the identifier `section`.
Thus, for example, a heading 'Header identifiers in HTML' will get
the identifier `header-identifiers-in-html`, a heading
'*Dogs*?--in *my* house?' will get the identifier `dogs--in-my-house`,
and a heading '[HTML], [S5], or [RTF]?' will get the identifier
`html-s5-or-rtf`.
Thus, for example,
Header Identifier
------------------------------------- ---------------------------
Header identifiers in HTML `header-identifiers-in-html`
*Dogs*?--in *my* house? `dogs--in-my-house`
[HTML], [S5], or [RTF]? `html-s5-or-rtf`
3. Applications `applications`
33 `section`
These rules should, in most cases, allow one to determine the identifier
from the header text. The exception is when several headers have the

View file

@ -35,7 +35,7 @@ import Text.Pandoc.Shared
import Text.Pandoc.Readers.TeXMath
import Text.Regex ( mkRegex, matchRegex )
import Numeric ( showHex )
import Data.Char ( ord, toLower )
import Data.Char ( ord, toLower, isAlpha )
import Data.List ( isPrefixOf, intersperse )
import qualified Data.Set as S
import Control.Monad.State
@ -215,18 +215,20 @@ addToCSS item = do
-- | Convert Pandoc inline list to plain text identifier.
inlineListToIdentifier :: [Inline] -> String
inlineListToIdentifier [] = ""
inlineListToIdentifier (x:xs) =
xAsText ++ inlineListToIdentifier xs
inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
inlineListToIdentifier' [] = ""
inlineListToIdentifier' (x:xs) =
xAsText ++ inlineListToIdentifier' xs
where xAsText = case x of
Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
concat $ intersperse "-" $ words $ map toLower s
Emph lst -> inlineListToIdentifier lst
Strikeout lst -> inlineListToIdentifier lst
Superscript lst -> inlineListToIdentifier lst
Subscript lst -> inlineListToIdentifier lst
Strong lst -> inlineListToIdentifier lst
Quoted _ lst -> inlineListToIdentifier lst
Emph lst -> inlineListToIdentifier' lst
Strikeout lst -> inlineListToIdentifier' lst
Superscript lst -> inlineListToIdentifier' lst
Subscript lst -> inlineListToIdentifier' lst
Strong lst -> inlineListToIdentifier' lst
Quoted _ lst -> inlineListToIdentifier' lst
Code s -> s
Space -> "-"
EmDash -> "-"
@ -237,8 +239,8 @@ inlineListToIdentifier (x:xs) =
Math _ -> ""
TeX _ -> ""
HtmlInline _ -> ""
Link lst _ -> inlineListToIdentifier lst
Image lst _ -> inlineListToIdentifier lst
Link lst _ -> inlineListToIdentifier' lst
Image lst _ -> inlineListToIdentifier' lst
Note _ -> ""
-- | Return unique identifiers for list of inline lists.
@ -247,7 +249,8 @@ uniqueIdentifiers ls =
let addIdentifier (nonuniqueIds, uniqueIds) l =
let new = inlineListToIdentifier l
matches = length $ filter (== new) nonuniqueIds
new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
new' = (if null new then "section" else new) ++
if matches > 0 then ("-" ++ show matches) else ""
in (new:nonuniqueIds, new':uniqueIds)
in reverse $ snd $ foldl addIdentifier ([],[]) ls