Docx parser: implement PAGEREF fields
These fields, often used in tables of contents, can be a hyperlink.
This commit is contained in:
parent
193f6bfeba
commit
6acc82c5d2
8 changed files with 41 additions and 7 deletions
|
@ -448,6 +448,7 @@ parPartToInlines' (PlainOMath exps) =
|
||||||
parPartToInlines' (Field info children) =
|
parPartToInlines' (Field info children) =
|
||||||
case info of
|
case info of
|
||||||
HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children
|
HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children
|
||||||
|
PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children
|
||||||
_ -> smushInlines <$> mapM parPartToInlines' children
|
_ -> smushInlines <$> mapM parPartToInlines' children
|
||||||
parPartToInlines' NullParPart = return mempty
|
parPartToInlines' NullParPart = return mempty
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,11 @@ import Text.Parsec
|
||||||
import Text.Parsec.Text (Parser)
|
import Text.Parsec.Text (Parser)
|
||||||
|
|
||||||
type URL = T.Text
|
type URL = T.Text
|
||||||
|
type Anchor = T.Text
|
||||||
|
|
||||||
data FieldInfo = HyperlinkField URL
|
data FieldInfo = HyperlinkField URL
|
||||||
|
-- The boolean indicates whether the field is a hyperlink.
|
||||||
|
| PagerefField Anchor Bool
|
||||||
| UnknownField
|
| UnknownField
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
|
@ -33,6 +36,8 @@ fieldInfo :: Parser FieldInfo
|
||||||
fieldInfo =
|
fieldInfo =
|
||||||
try (HyperlinkField <$> hyperlink)
|
try (HyperlinkField <$> hyperlink)
|
||||||
<|>
|
<|>
|
||||||
|
try ((uncurry PagerefField) <$> pageref)
|
||||||
|
<|>
|
||||||
return UnknownField
|
return UnknownField
|
||||||
|
|
||||||
escapedQuote :: Parser T.Text
|
escapedQuote :: Parser T.Text
|
||||||
|
@ -72,3 +77,23 @@ hyperlink = do
|
||||||
("\\l", s) : _ -> farg <> "#" <> s
|
("\\l", s) : _ -> farg <> "#" <> s
|
||||||
_ -> farg
|
_ -> farg
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
-- See §17.16.5.45
|
||||||
|
pagerefSwitch :: Parser (T.Text, T.Text)
|
||||||
|
pagerefSwitch = do
|
||||||
|
sw <- string "\\h"
|
||||||
|
spaces
|
||||||
|
farg <- fieldArgument
|
||||||
|
return (T.pack sw, farg)
|
||||||
|
|
||||||
|
pageref :: Parser (Anchor, Bool)
|
||||||
|
pageref = do
|
||||||
|
many space
|
||||||
|
string "PAGEREF"
|
||||||
|
spaces
|
||||||
|
farg <- fieldArgument
|
||||||
|
switches <- spaces *> many pagerefSwitch
|
||||||
|
let isLink = case switches of
|
||||||
|
("\\h", _) : _ -> True
|
||||||
|
_ -> False
|
||||||
|
return (farg, isLink)
|
||||||
|
|
|
@ -151,6 +151,10 @@ tests = [ testGroup "document"
|
||||||
"nested fields with <w:instrText> tag"
|
"nested fields with <w:instrText> tag"
|
||||||
"docx/nested_instrText.docx"
|
"docx/nested_instrText.docx"
|
||||||
"docx/nested_instrText.native"
|
"docx/nested_instrText.native"
|
||||||
|
, testCompare
|
||||||
|
"pageref hyperlinks in <w:instrText> tag"
|
||||||
|
"docx/pageref.docx"
|
||||||
|
"docx/pageref.native"
|
||||||
, testCompare
|
, testCompare
|
||||||
"inline image"
|
"inline image"
|
||||||
"docx/image.docx"
|
"docx/image.docx"
|
||||||
|
|
|
@ -39,9 +39,9 @@
|
||||||
[])
|
[])
|
||||||
,Para [Str "CONTENTS"]
|
,Para [Str "CONTENTS"]
|
||||||
,Para [Strong [Str "Section",Space,Str "Page"]]
|
,Para [Strong [Str "Section",Space,Str "Page"]]
|
||||||
,Para [Str "FIGURES",Space,Str "iv"]
|
,Para [Str "FIGURES",Space,Link ("",[],[]) [Str "iv"] ("#figures","")]
|
||||||
,Para [Str "TABLES",Space,Str "v"]
|
,Para [Str "TABLES",Space,Link ("",[],[]) [Str "v"] ("#tables","")]
|
||||||
,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Str "2"]
|
,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Link ("",[],[]) [Str "2"] ("#introduction","")]
|
||||||
,Header 1 ("figures",["Heading-0"],[]) [Str "FIGURES"]
|
,Header 1 ("figures",["Heading-0"],[]) [Str "FIGURES"]
|
||||||
,Para [Strong [Str "Figure",Space,Str "Page"]]
|
,Para [Strong [Str "Figure",Space,Str "Page"]]
|
||||||
,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
|
,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]]
|
||||||
|
|
Binary file not shown.
|
@ -1,8 +1,8 @@
|
||||||
[Header 1 ("\1086\1075\1083\1072\1074\1083\1077\1085\1080\1077",["TOC-Heading"],[]) [Str "\1054\1075\1083\1072\1074\1083\1077\1085\1080\1077"]
|
[Header 1 ("\1086\1075\1083\1072\1074\1083\1077\1085\1080\1077",["TOC-Heading"],[]) [Str "\1054\1075\1083\1072\1074\1083\1077\1085\1080\1077"]
|
||||||
,Para [Link ("",[],[]) [Str "Short",Space,Str "instructions",Space,Str "1"] ("#short-instructions","")]
|
,Para [Link ("",[],[]) [Str "Short",Space,Str "instructions",Space,Link ("",[],[]) [Str "1"] ("#short-instructions","")] ("#short-instructions","")]
|
||||||
,Para [Link ("",[],[]) [Str "Some",Space,Str "instructions",Space,Str "1"] ("#some-instructions","")]
|
,Para [Link ("",[],[]) [Str "Some",Space,Str "instructions",Space,Link ("",[],[]) [Str "1"] ("#some-instructions","")] ("#some-instructions","")]
|
||||||
,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "opening",Space,Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")]
|
,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "opening",Space,Link ("",[],[]) [Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")]
|
||||||
,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "closing",Space,Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-closing","")]
|
,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "closing",Space,Link ("",[],[]) [Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-closing","")] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-closing","")]
|
||||||
,Header 1 ("short-instructions",[],[]) [Str "Short",Space,Str "instructions"]
|
,Header 1 ("short-instructions",[],[]) [Str "Short",Space,Str "instructions"]
|
||||||
,Para [Link ("",[],[]) [Str "Open",Space,Str "remote",Space,Str "folder"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")]
|
,Para [Link ("",[],[]) [Str "Open",Space,Str "remote",Space,Str "folder"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")]
|
||||||
,Para [Str "Do",Space,Str "staff"]
|
,Para [Str "Do",Space,Str "staff"]
|
||||||
|
|
BIN
test/docx/pageref.docx
Normal file
BIN
test/docx/pageref.docx
Normal file
Binary file not shown.
4
test/docx/pageref.native
Normal file
4
test/docx/pageref.native
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
[Para [Str "Title",Space,Link ("",[],[]) [Str "2"] ("#title","")]
|
||||||
|
,Para [Str "Title2",Space,Link ("",[],[]) [Str "2"] ("#title2","")]
|
||||||
|
,Header 1 ("title", [],[]) [Str "Title"]
|
||||||
|
,Header 1 ("title2",[],[]) [Str "Title2"]]
|
Loading…
Add table
Reference in a new issue