Merge pull request #2432 from hftf/hyphens

Docx Reader: Parse soft and non-breaking hyphen elements
This commit is contained in:
John MacFarlane 2015-10-04 21:56:29 -07:00
commit 869e800bbb
5 changed files with 13 additions and 1 deletions

View file

@ -206,11 +206,15 @@ runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s) = text s runElemToInlines (TextRun s) = text s
runElemToInlines (LnBrk) = linebreak runElemToInlines (LnBrk) = linebreak
runElemToInlines (Tab) = space runElemToInlines (Tab) = space
runElemToInlines (SoftHyphen) = text "\xad"
runElemToInlines (NoBreakHyphen) = text "\x2011"
runElemToString :: RunElem -> String runElemToString :: RunElem -> String
runElemToString (TextRun s) = s runElemToString (TextRun s) = s
runElemToString (LnBrk) = ['\n'] runElemToString (LnBrk) = ['\n']
runElemToString (Tab) = ['\t'] runElemToString (Tab) = ['\t']
runElemToString (SoftHyphen) = ['\xad']
runElemToString (NoBreakHyphen) = ['\x2011']
runToString :: Run -> String runToString :: Run -> String
runToString (Run _ runElems) = concatMap runElemToString runElems runToString (Run _ runElems) = concatMap runElemToString runElems

View file

@ -208,7 +208,7 @@ data Run = Run RunStyle [RunElem]
| InlineDrawing FilePath B.ByteString | InlineDrawing FilePath B.ByteString
deriving Show deriving Show
data RunElem = TextRun String | LnBrk | Tab data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
deriving Show deriving Show
data VertAlign = BaseLn | SupScrpt | SubScrpt data VertAlign = BaseLn | SupScrpt | SubScrpt
@ -877,6 +877,8 @@ elemToRunElem ns element
map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
| isElem ns "w" "br" element = return LnBrk | isElem ns "w" "br" element = return LnBrk
| isElem ns "w" "tab" element = return Tab | isElem ns "w" "tab" element = return Tab
| isElem ns "w" "softHyphen" element = return SoftHyphen
| isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen
| isElem ns "w" "sym" element = return (getSymChar ns element) | isElem ns "w" "sym" element = return (getSymChar ns element)
| otherwise = throwError WrongElem | otherwise = throwError WrongElem
where where

View file

@ -130,6 +130,10 @@ tests = [ testGroup "inlines"
"literal tabs" "literal tabs"
"docx/tabs.docx" "docx/tabs.docx"
"docx/tabs.native" "docx/tabs.native"
, testCompare
"special punctuation"
"docx/special_punctuation.docx"
"docx/special_punctuation.native"
, testCompare , testCompare
"normalizing inlines" "normalizing inlines"
"docx/normalize.docx" "docx/normalize.docx"

Binary file not shown.

View file

@ -0,0 +1,2 @@
[Para [Str "Soft",Space,Str "hyphen:",Space,Str "[\173]"]
,Para [Str "Non-breaking",Space,Str "hyphen:",Space,Str "[\8209]"]]