diff --git a/src/Text/Pandoc/Readers/Docx.hs b/src/Text/Pandoc/Readers/Docx.hs index 462e3c679..5c8f20c18 100644 --- a/src/Text/Pandoc/Readers/Docx.hs +++ b/src/Text/Pandoc/Readers/Docx.hs @@ -448,6 +448,7 @@ parPartToInlines' (PlainOMath exps) = parPartToInlines' (Field info children) = case info of HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url children + PagerefField fieldAnchor True -> parPartToInlines' $ InternalHyperLink fieldAnchor children _ -> smushInlines <$> mapM parPartToInlines' children parPartToInlines' NullParPart = return mempty diff --git a/src/Text/Pandoc/Readers/Docx/Fields.hs b/src/Text/Pandoc/Readers/Docx/Fields.hs index 442bc3466..5f090b6be 100644 --- a/src/Text/Pandoc/Readers/Docx/Fields.hs +++ b/src/Text/Pandoc/Readers/Docx/Fields.hs @@ -21,8 +21,11 @@ import Text.Parsec import Text.Parsec.Text (Parser) type URL = T.Text +type Anchor = T.Text data FieldInfo = HyperlinkField URL + -- The boolean indicates whether the field is a hyperlink. + | PagerefField Anchor Bool | UnknownField deriving (Show) @@ -33,6 +36,8 @@ fieldInfo :: Parser FieldInfo fieldInfo = try (HyperlinkField <$> hyperlink) <|> + try ((uncurry PagerefField) <$> pageref) + <|> return UnknownField escapedQuote :: Parser T.Text @@ -72,3 +77,23 @@ hyperlink = do ("\\l", s) : _ -> farg <> "#" <> s _ -> farg return url + +-- See ยง17.16.5.45 +pagerefSwitch :: Parser (T.Text, T.Text) +pagerefSwitch = do + sw <- string "\\h" + spaces + farg <- fieldArgument + return (T.pack sw, farg) + +pageref :: Parser (Anchor, Bool) +pageref = do + many space + string "PAGEREF" + spaces + farg <- fieldArgument + switches <- spaces *> many pagerefSwitch + let isLink = case switches of + ("\\h", _) : _ -> True + _ -> False + return (farg, isLink) diff --git a/test/Tests/Readers/Docx.hs b/test/Tests/Readers/Docx.hs index af6023836..ea4094c82 100644 --- a/test/Tests/Readers/Docx.hs +++ b/test/Tests/Readers/Docx.hs @@ -151,6 +151,10 @@ tests = [ testGroup "document" "nested fields with tag" "docx/nested_instrText.docx" "docx/nested_instrText.native" + , testCompare + "pageref hyperlinks in tag" + "docx/pageref.docx" + "docx/pageref.native" , testCompare "inline image" "docx/image.docx" diff --git a/test/docx/0_level_headers.native b/test/docx/0_level_headers.native index ed589b029..7080063f9 100644 --- a/test/docx/0_level_headers.native +++ b/test/docx/0_level_headers.native @@ -39,9 +39,9 @@ []) ,Para [Str "CONTENTS"] ,Para [Strong [Str "Section",Space,Str "Page"]] -,Para [Str "FIGURES",Space,Str "iv"] -,Para [Str "TABLES",Space,Str "v"] -,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Str "2"] +,Para [Str "FIGURES",Space,Link ("",[],[]) [Str "iv"] ("#figures","")] +,Para [Str "TABLES",Space,Link ("",[],[]) [Str "v"] ("#tables","")] +,Para [Str "SECTION",Space,Str "1",Space,Str "Introduction",Space,Link ("",[],[]) [Str "2"] ("#introduction","")] ,Header 1 ("figures",["Heading-0"],[]) [Str "FIGURES"] ,Para [Strong [Str "Figure",Space,Str "Page"]] ,Para [Strong [Str "No",Space,Str "table",Space,Str "of",Space,Str "figures",Space,Str "entries",Space,Str "found."]] diff --git a/test/docx/golden/nested_anchors_in_header.docx b/test/docx/golden/nested_anchors_in_header.docx index 88dd21abd..9d89070d9 100644 Binary files a/test/docx/golden/nested_anchors_in_header.docx and b/test/docx/golden/nested_anchors_in_header.docx differ diff --git a/test/docx/nested_anchors_in_header.native b/test/docx/nested_anchors_in_header.native index 314b31663..9cc256d28 100644 --- a/test/docx/nested_anchors_in_header.native +++ b/test/docx/nested_anchors_in_header.native @@ -1,8 +1,8 @@ [Header 1 ("\1086\1075\1083\1072\1074\1083\1077\1085\1080\1077",["TOC-Heading"],[]) [Str "\1054\1075\1083\1072\1074\1083\1077\1085\1080\1077"] -,Para [Link ("",[],[]) [Str "Short",Space,Str "instructions",Space,Str "1"] ("#short-instructions","")] -,Para [Link ("",[],[]) [Str "Some",Space,Str "instructions",Space,Str "1"] ("#some-instructions","")] -,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "opening",Space,Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")] -,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "closing",Space,Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-closing","")] +,Para [Link ("",[],[]) [Str "Short",Space,Str "instructions",Space,Link ("",[],[]) [Str "1"] ("#short-instructions","")] ("#short-instructions","")] +,Para [Link ("",[],[]) [Str "Some",Space,Str "instructions",Space,Link ("",[],[]) [Str "1"] ("#some-instructions","")] ("#some-instructions","")] +,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "opening",Space,Link ("",[],[]) [Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")] +,Para [Link ("",[],[]) [Str "Remote",Space,Str "folder",Space,Str "or",Space,Str "longlonglonglonglong",Space,Str "file",Space,Str "with",Space,Str "manymanymanymany",Space,Str "letters",Space,Str "inside",Space,Str "closing",Space,Link ("",[],[]) [Str "2"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-closing","")] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-closing","")] ,Header 1 ("short-instructions",[],[]) [Str "Short",Space,Str "instructions"] ,Para [Link ("",[],[]) [Str "Open",Space,Str "remote",Space,Str "folder"] ("#remote-folder-or-longlonglonglonglong-file-with-manymanymanymany-letters-inside-opening","")] ,Para [Str "Do",Space,Str "staff"] diff --git a/test/docx/pageref.docx b/test/docx/pageref.docx new file mode 100644 index 000000000..0a9159cab Binary files /dev/null and b/test/docx/pageref.docx differ diff --git a/test/docx/pageref.native b/test/docx/pageref.native new file mode 100644 index 000000000..6c683de67 --- /dev/null +++ b/test/docx/pageref.native @@ -0,0 +1,4 @@ +[Para [Str "Title",Space,Link ("",[],[]) [Str "2"] ("#title","")] +,Para [Str "Title2",Space,Link ("",[],[]) [Str "2"] ("#title2","")] +,Header 1 ("title", [],[]) [Str "Title"] +,Header 1 ("title2",[],[]) [Str "Title2"]]