Modified rules for HTML header identifiers to ensure legal identifiers.
+ Modified htmlListToIdentifier and uniqueIdentifier in HTML writer to ensure that identifiers begin with an alphabetic character. + The new rules are described in README. + Resolves Issue #33. git-svn-id: https://pandoc.googlecode.com/svn/trunk@1150 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
parent
0681d1d3e7
commit
48f2cc5600
2 changed files with 28 additions and 18 deletions
17
README
17
README
|
@ -790,12 +790,19 @@ derive the identifier from the header text,
|
|||
- Remove all punctuation, except dashes and hyphens.
|
||||
- Replace all spaces, dashes, newlines, and hyphens with hyphens.
|
||||
- Convert all alphabetic characters to lowercase.
|
||||
- Remove everything up to the first letter (identifiers may
|
||||
not begin with a number or punctuation mark).
|
||||
- If nothing is left after this, use the identifier `section`.
|
||||
|
||||
Thus, for example, a heading 'Header identifiers in HTML' will get
|
||||
the identifier `header-identifiers-in-html`, a heading
|
||||
'*Dogs*?--in *my* house?' will get the identifier `dogs--in-my-house`,
|
||||
and a heading '[HTML], [S5], or [RTF]?' will get the identifier
|
||||
`html-s5-or-rtf`.
|
||||
Thus, for example,
|
||||
|
||||
Header Identifier
|
||||
------------------------------------- ---------------------------
|
||||
Header identifiers in HTML `header-identifiers-in-html`
|
||||
*Dogs*?--in *my* house? `dogs--in-my-house`
|
||||
[HTML], [S5], or [RTF]? `html-s5-or-rtf`
|
||||
3. Applications `applications`
|
||||
33 `section`
|
||||
|
||||
These rules should, in most cases, allow one to determine the identifier
|
||||
from the header text. The exception is when several headers have the
|
||||
|
|
|
@ -35,7 +35,7 @@ import Text.Pandoc.Shared
|
|||
import Text.Pandoc.Readers.TeXMath
|
||||
import Text.Regex ( mkRegex, matchRegex )
|
||||
import Numeric ( showHex )
|
||||
import Data.Char ( ord, toLower )
|
||||
import Data.Char ( ord, toLower, isAlpha )
|
||||
import Data.List ( isPrefixOf, intersperse )
|
||||
import qualified Data.Set as S
|
||||
import Control.Monad.State
|
||||
|
@ -215,18 +215,20 @@ addToCSS item = do
|
|||
|
||||
-- | Convert Pandoc inline list to plain text identifier.
|
||||
inlineListToIdentifier :: [Inline] -> String
|
||||
inlineListToIdentifier [] = ""
|
||||
inlineListToIdentifier (x:xs) =
|
||||
xAsText ++ inlineListToIdentifier xs
|
||||
inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier'
|
||||
|
||||
inlineListToIdentifier' [] = ""
|
||||
inlineListToIdentifier' (x:xs) =
|
||||
xAsText ++ inlineListToIdentifier' xs
|
||||
where xAsText = case x of
|
||||
Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $
|
||||
concat $ intersperse "-" $ words $ map toLower s
|
||||
Emph lst -> inlineListToIdentifier lst
|
||||
Strikeout lst -> inlineListToIdentifier lst
|
||||
Superscript lst -> inlineListToIdentifier lst
|
||||
Subscript lst -> inlineListToIdentifier lst
|
||||
Strong lst -> inlineListToIdentifier lst
|
||||
Quoted _ lst -> inlineListToIdentifier lst
|
||||
Emph lst -> inlineListToIdentifier' lst
|
||||
Strikeout lst -> inlineListToIdentifier' lst
|
||||
Superscript lst -> inlineListToIdentifier' lst
|
||||
Subscript lst -> inlineListToIdentifier' lst
|
||||
Strong lst -> inlineListToIdentifier' lst
|
||||
Quoted _ lst -> inlineListToIdentifier' lst
|
||||
Code s -> s
|
||||
Space -> "-"
|
||||
EmDash -> "-"
|
||||
|
@ -237,8 +239,8 @@ inlineListToIdentifier (x:xs) =
|
|||
Math _ -> ""
|
||||
TeX _ -> ""
|
||||
HtmlInline _ -> ""
|
||||
Link lst _ -> inlineListToIdentifier lst
|
||||
Image lst _ -> inlineListToIdentifier lst
|
||||
Link lst _ -> inlineListToIdentifier' lst
|
||||
Image lst _ -> inlineListToIdentifier' lst
|
||||
Note _ -> ""
|
||||
|
||||
-- | Return unique identifiers for list of inline lists.
|
||||
|
@ -247,7 +249,8 @@ uniqueIdentifiers ls =
|
|||
let addIdentifier (nonuniqueIds, uniqueIds) l =
|
||||
let new = inlineListToIdentifier l
|
||||
matches = length $ filter (== new) nonuniqueIds
|
||||
new' = new ++ if matches > 0 then ("-" ++ show matches) else ""
|
||||
new' = (if null new then "section" else new) ++
|
||||
if matches > 0 then ("-" ++ show matches) else ""
|
||||
in (new:nonuniqueIds, new':uniqueIds)
|
||||
in reverse $ snd $ foldl addIdentifier ([],[]) ls
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue