Merge pull request #2432 from hftf/hyphens
Docx Reader: Parse soft and non-breaking hyphen elements
This commit is contained in:
commit
869e800bbb
5 changed files with 13 additions and 1 deletions
|
@ -206,11 +206,15 @@ runElemToInlines :: RunElem -> Inlines
|
||||||
runElemToInlines (TextRun s) = text s
|
runElemToInlines (TextRun s) = text s
|
||||||
runElemToInlines (LnBrk) = linebreak
|
runElemToInlines (LnBrk) = linebreak
|
||||||
runElemToInlines (Tab) = space
|
runElemToInlines (Tab) = space
|
||||||
|
runElemToInlines (SoftHyphen) = text "\xad"
|
||||||
|
runElemToInlines (NoBreakHyphen) = text "\x2011"
|
||||||
|
|
||||||
runElemToString :: RunElem -> String
|
runElemToString :: RunElem -> String
|
||||||
runElemToString (TextRun s) = s
|
runElemToString (TextRun s) = s
|
||||||
runElemToString (LnBrk) = ['\n']
|
runElemToString (LnBrk) = ['\n']
|
||||||
runElemToString (Tab) = ['\t']
|
runElemToString (Tab) = ['\t']
|
||||||
|
runElemToString (SoftHyphen) = ['\xad']
|
||||||
|
runElemToString (NoBreakHyphen) = ['\x2011']
|
||||||
|
|
||||||
runToString :: Run -> String
|
runToString :: Run -> String
|
||||||
runToString (Run _ runElems) = concatMap runElemToString runElems
|
runToString (Run _ runElems) = concatMap runElemToString runElems
|
||||||
|
|
|
@ -208,7 +208,7 @@ data Run = Run RunStyle [RunElem]
|
||||||
| InlineDrawing FilePath B.ByteString
|
| InlineDrawing FilePath B.ByteString
|
||||||
deriving Show
|
deriving Show
|
||||||
|
|
||||||
data RunElem = TextRun String | LnBrk | Tab
|
data RunElem = TextRun String | LnBrk | Tab | SoftHyphen | NoBreakHyphen
|
||||||
deriving Show
|
deriving Show
|
||||||
|
|
||||||
data VertAlign = BaseLn | SupScrpt | SubScrpt
|
data VertAlign = BaseLn | SupScrpt | SubScrpt
|
||||||
|
@ -877,6 +877,8 @@ elemToRunElem ns element
|
||||||
map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
|
map (\x -> fromMaybe x . getUnicode f . lowerFromPrivate $ x) str
|
||||||
| isElem ns "w" "br" element = return LnBrk
|
| isElem ns "w" "br" element = return LnBrk
|
||||||
| isElem ns "w" "tab" element = return Tab
|
| isElem ns "w" "tab" element = return Tab
|
||||||
|
| isElem ns "w" "softHyphen" element = return SoftHyphen
|
||||||
|
| isElem ns "w" "noBreakHyphen" element = return NoBreakHyphen
|
||||||
| isElem ns "w" "sym" element = return (getSymChar ns element)
|
| isElem ns "w" "sym" element = return (getSymChar ns element)
|
||||||
| otherwise = throwError WrongElem
|
| otherwise = throwError WrongElem
|
||||||
where
|
where
|
||||||
|
|
|
@ -130,6 +130,10 @@ tests = [ testGroup "inlines"
|
||||||
"literal tabs"
|
"literal tabs"
|
||||||
"docx/tabs.docx"
|
"docx/tabs.docx"
|
||||||
"docx/tabs.native"
|
"docx/tabs.native"
|
||||||
|
, testCompare
|
||||||
|
"special punctuation"
|
||||||
|
"docx/special_punctuation.docx"
|
||||||
|
"docx/special_punctuation.native"
|
||||||
, testCompare
|
, testCompare
|
||||||
"normalizing inlines"
|
"normalizing inlines"
|
||||||
"docx/normalize.docx"
|
"docx/normalize.docx"
|
||||||
|
|
BIN
tests/docx/special_punctuation.docx
Normal file
BIN
tests/docx/special_punctuation.docx
Normal file
Binary file not shown.
2
tests/docx/special_punctuation.native
Normal file
2
tests/docx/special_punctuation.native
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
[Para [Str "Soft",Space,Str "hyphen:",Space,Str "[\173]"]
|
||||||
|
,Para [Str "Non-breaking",Space,Str "hyphen:",Space,Str "[\8209]"]]
|
Loading…
Reference in a new issue