Rewrote 'extractTagType' in HTML reader so that it doesn't use

regexs. git-svn-id: https://pandoc.googlecode.com/svn/trunk@507 788f1e2b-df1e-0410-8736-df70ead52e1b
2007-01-24 17:43:39 +00:00 · 2007-01-24 17:43:39 +00:00 · 0646eef976
commit 0646eef976
parent 96919a6ac5
1 changed files with 7 additions and 5 deletions
--- a/src/Text/Pandoc/Readers/HTML.hs
+++ b/src/Text/Pandoc/Readers/HTML.hs
@ -40,7 +40,6 @@ module Text.Pandoc.Readers.HTML (
                                 htmlBlockElement 
                                ) where

-import Text.Regex ( matchRegex, mkRegex )
 import Text.ParserCombinators.Parsec
 import Text.ParserCombinators.Pandoc
 import Text.Pandoc.Definition
@ -84,10 +83,13 @@ inlinesTilEnd tag = try (do
  return inlines)

 -- | Extract type from a tag:  e.g. 'br' from '<br>'
-extractTagType tag = 
-    case (matchRegex (mkRegex  "<[[:space:]]*/?([A-Za-z0-9]+)") tag) of
-          Just [match]   -> (map toLower match)
-          Nothing        -> ""
+extractTagType :: String -> String
+extractTagType "" = ""
+extractTagType ('<':rest) =  
+  if (not (null rest)) && (last rest == '>')
+    then map toLower $ removeLeadingTrailingSpace (init rest) 
+    else ""
+extractTagType _ = ""

 -- | Parse any HTML tag (closing or opening) and return text of tag
 anyHtmlTag = try (do