Docx reader: fix handling of empty fields

Some fields only have an instrText and no content, Pandoc didn't
understand these, causing other fields to be misunderstood because it
seemed like a field was still open when it wasn't.
This commit is contained in:
Milan Bracke 2021-06-24 09:27:28 +02:00 committed by John MacFarlane
parent 6acc82c5d2
commit 465c28d28e
4 changed files with 15 additions and 0 deletions

View file

@ -844,6 +844,10 @@ elemToParPart ns element
FldCharFieldInfo info : ancestors | fldCharType == "separate" -> do
modify $ \st -> st {stateFldCharState = FldCharContent info [] : ancestors}
return NullParPart
-- Some fields have no content, since Pandoc doesn't understand any of those fields, we can just close it.
FldCharFieldInfo _ : ancestors | fldCharType == "end" -> do
modify $ \st -> st {stateFldCharState = ancestors}
return NullParPart
[FldCharContent info children] | fldCharType == "end" -> do
modify $ \st -> st {stateFldCharState = []}
return $ Field info $ reverse children

View file

@ -151,6 +151,10 @@ tests = [ testGroup "document"
"nested fields with <w:instrText> tag"
"docx/nested_instrText.docx"
"docx/nested_instrText.native"
, testCompare
"empty fields with <w:instrText> tag"
"docx/empty_field.docx"
"docx/empty_field.native"
, testCompare
"pageref hyperlinks in <w:instrText> tag"
"docx/pageref.docx"

BIN
test/docx/empty_field.docx Normal file

Binary file not shown.

View file

@ -0,0 +1,7 @@
[Para
[Str "\24076\26395\28145\20837\20102\35299\30340\35835\32773\21487\20197\21435\30475David",Space,Str "French",Space,Str "Belding\21644Kevin",Space,Str "J.",Space,Str "Mitchell\30340"
,Link ("",[],[]) [Str "Foundations",Space,Str "of",Space,Str "Analysis,",Space,Str "2nd",Space,Str "Edition"] ("https://books.google.com/books?id=sp_Zcb9ot90C&lpg=PR4&hl=zh-CN&pg=PA19#v=onepage&q&f=true",""),Str ",\21487\20174\&19\39029\30475\36215\65292\25110D.C.",Space,Str "Goldrei\30340",Space
,Link ("",[],[]) [Str "Classic",Space,Str "Set",Space,Str "Theory:",Space,Str "For",Space,Str "Guided",Space,Str "Independent",Space,Str "Study"] ("https://books.google.ae/books?id=dlc0DwAAQBAJ&lpg=PT29&hl=zh-CN&pg=PT26#v=onepage&q&f=true","")
,Str "\65292\20174\31532\20108\31456\30475\36215\65292\38405\35835\26102\35201\27880\24847\26412\25991\19982\36825\20123\20070\25152\19981\21516\30340\26159\24182\27809\26377\25226\23454\25968\30475\20316\26159\26377\29702\25968\38598\30340\20998\21106\12290"]
,Para [Str "Index:"]
,Para [Str "French,",Space,Str "1"]]