Merge pull request #2458 from mb21/lang-inlines

LaTeX and ConTeXt writers: support lang attribute on divs and spans
2015-10-19 23:02:08 -07:00 · 2015-10-19 23:02:08 -07:00 · b49ab06e96
commit b49ab06e96
parent 5062e03a40 fa2b26ddcb
8 changed files with 430 additions and 38 deletions
--- a/17
+++ b/17
@ -1047,12 +1047,21 @@ Language variables
    format stored in the additional variables `babel-lang`,
    `polyglossia-lang` (LaTeX) and `context-lang` (ConTeXt).

+    Native pandoc `span`s and `div`s with the lang attribute
+    (value in BCP 47) can be used to switch the language in
+    that range.
+
 `otherlangs`
 :   a list of other languages used in the document
    in the YAML metadata, according to [BCP 47]. For example:
    `otherlangs: [en-GB, fr]`.
-    Currently only used by `xelatex` through the generated
-    `polyglossia-otherlangs` variable.
+    This is automatically generated from the `lang` attributes
+    in all `span`s and `div`s but can be overriden.
+    Currently only used by LaTeX through the generated
+    `babel-otherlangs` and `polyglossia-otherlangs` variables.
+    The LaTeX writer outputs polyglossia commands in the text but
+    the `babel-newcommands` variable contains mappings for them
+    to the corresponding babel.

 `dir`
 :   the base direction of the document, either `rtl` (right-to-left)
@ -1065,10 +1074,6 @@ Language variables
    (e.g. the browser, when generating HTML) supports the
    [Unicode Bidirectional Algorithm].

-    LaTeX and ConTeXt assume by default that all text is left-to-right.
-    Setting `dir: ltr` enables bidirectional text handling in a document
-    whose base direction is left-to-right but contains some right-to-left script.
-
    When using LaTeX for bidirectional documents, only the `xelatex` engine
    is fully supported (use `--latex-engine=xelatex`).

--- a/src/Text/Pandoc/Writers/ConTeXt.hs
+++ b/src/Text/Pandoc/Writers/ConTeXt.hs
@ -157,17 +157,21 @@ blockToConTeXt (CodeBlock _ str) =
 blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
 blockToConTeXt (RawBlock _ _ ) = return empty
 blockToConTeXt (Div (ident,_,kvs) bs) = do
-  contents <- blockListToConTeXt bs
-  let contents' = if null ident
-                     then contents
-                     else ("\\reference" <> brackets (text $ toLabel ident) <>
-                            braces empty <> "%") $$ contents
-  let align dir = blankline <> "\\startalignment[" <> dir <> "]"
-                    $$ contents' $$ "\\stopalignment" <> blankline
-  return $ case lookup "dir" kvs of
-             Just "rtl" -> align "righttoleft"
-             Just "ltr" -> align "lefttoright"
-             _          -> contents'
+  let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment"
+  let wrapRef txt = if null ident
+                       then txt
+                       else ("\\reference" <> brackets (text $ toLabel ident) <>
+                              braces empty <> "%") $$ txt
+      wrapDir = case lookup "dir" kvs of
+                  Just "rtl" -> align "righttoleft"
+                  Just "ltr" -> align "lefttoright"
+                  _          -> id
+      wrapLang txt = case lookup "lang" kvs of
+                       Just lng -> "\\start\\language["
+                                     <> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop"
+                       Nothing  -> txt
+      wrapBlank txt = blankline <> txt <> blankline
+  fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs
 blockToConTeXt (BulletList lst) = do
  contents <- mapM listItemToConTeXt lst
  return $ ("\\startitemize" <> if isTightList lst
@ -346,11 +350,15 @@ inlineToConTeXt (Note contents) = do
              else text "\\startbuffer " <> nest 2 contents' <>
                   text "\\stopbuffer\\footnote{\\getbuffer}"
 inlineToConTeXt (Span (_,_,kvs) ils) = do
-  contents <- inlineListToConTeXt ils
-  return $ case lookup "dir" kvs of
-             Just "rtl" -> braces $ "\\righttoleft " <> contents
-             Just "ltr" -> braces $ "\\lefttoright " <> contents
-             _          -> contents
+  let wrapDir txt = case lookup "dir" kvs of
+                      Just "rtl" -> braces $ "\\righttoleft " <> txt
+                      Just "ltr" -> braces $ "\\lefttoright " <> txt
+                      _          -> txt
+      wrapLang txt = case lookup "lang" kvs of
+                       Just lng -> "\\start\\language[" <> text (fromBcp47' lng)
+                                      <> "]" <> txt <> "\\stop "
+                       Nothing -> txt
+  fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils

 -- | Craft the section header, inserting the secton reference, if supplied.
 sectionHeader :: Attr
@ -377,6 +385,9 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
                       then char '\\' <> chapter <> braces contents
                       else contents <> blankline

+fromBcp47' :: String -> String
+fromBcp47' = fromBcp47 . splitBy (=='-')
+
 -- Takes a list of the constituents of a BCP 47 language code
 -- and irons out ConTeXt's exceptions
 -- https://tools.ietf.org/html/bcp47#section-2.1
--- a/src/Text/Pandoc/Writers/LaTeX.hs
+++ b/src/Text/Pandoc/Writers/LaTeX.hs
@ -39,7 +39,7 @@ import Text.Pandoc.Templates
 import Text.Printf ( printf )
 import Network.URI ( isURI, unEscapeString )
 import Data.Aeson (object, (.=))
-import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse )
+import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, nub, nubBy )
 import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord )
 import Data.Maybe ( fromMaybe )
 import qualified Data.Text as T
@ -145,6 +145,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
  st <- get
  titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
  authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
+  let docLangs = nub $ query (extract "lang") blocks
  let context  =  defField "toc" (writerTableOfContents options) $
                  defField "toc-depth" (show (writerTOCDepth options -
                                              if stBook st
@ -179,18 +180,48 @@ pandocToLaTeX options (Pandoc meta blocks) = do
                         Biblatex -> defField "biblio-title" biblioTitle .
                                     defField "biblatex" True
                         _        -> id) $
+                  -- set lang to something so polyglossia/babel is included
+                  defField "lang" (if null docLangs then ""::String else "en") $
+                  defField "otherlangs" docLangs $
+                  defField "dir" (if (null $ query (extract "dir") blocks)
+                                     then ""::String
+                                     else "ltr") $
                  metadata
  let toPolyObj lang = object [ "name"    .= T.pack name
                              , "options" .= T.pack opts ]
        where
          (name, opts) = toPolyglossia lang
  let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
+      otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context
  let context' =
          defField "babel-lang" (toBabel lang)
+        $ defField "babel-otherlangs" (map toBabel otherlangs)
+        $ defField "babel-newcommands" (concatMap (\(poly, babel) ->
+            -- \textspanish and \textgalician are already used by babel
+            -- save them as \oritext... and let babel use that
+            if poly `elem` ["spanish", "galician"]
+               then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++
+                    "\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++
+                      "{\\renewcommand{\\text" ++ poly ++ "}{\\oritext"
+                      ++ poly ++ "}}\n" ++
+                    "\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++
+                      "{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
+                      ++ poly ++ "}{##2}}}\n"
+               else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
+                      ++ babel ++ "}{#2}}\n" ++
+                    "\\newenvironment{" ++ poly ++ "}[1]{\\begin{otherlanguage}{"
+                      ++ babel ++ "}}{\\end{otherlanguage}}\n"
+            )
+            -- eliminate duplicates that have same polyglossia name
+            $ nubBy (\a b -> fst a == fst b)
+            -- find polyglossia and babel names of languages used in the document
+            $ map (\l ->
+              let lng = splitBy (=='-') l
+              in  (fst $ toPolyglossia lng, toBabel lng)
+              )
+            docLangs )
        $ defField "polyglossia-lang" (toPolyObj lang)
-        $ defField "polyglossia-otherlangs"
-            (maybe [] (map $ toPolyObj . splitBy (=='-')) $
-            getField "otherlangs" context)
+        $ defField "polyglossia-otherlangs" (map toPolyObj otherlangs)
        $ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of
                                      Just "rtl" -> True
                                      _          -> False)
@ -340,15 +371,24 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
                      then empty
                      else "\\hyperdef{}" <> braces (text ref) <>
                           braces ("\\label" <> braces (text ref))
-  contents' <- blockListToLaTeX bs
-  let align dir = inCmd "begin" dir $$ contents' $$ inCmd "end" dir
-  let contents = case lookup "dir" kvs of
-                   Just "rtl" -> align "RTL"
-                   Just "ltr" -> align "LTR"
-                   _          -> contents'
-  if beamer && "notes" `elem` classes  -- speaker notes
-     then return $ "\\note" <> braces contents
-     else return (linkAnchor $$ contents)
+  let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
+  let wrapDir = case lookup "dir" kvs of
+                  Just "rtl" -> align "RTL"
+                  Just "ltr" -> align "LTR"
+                  _          -> id
+      wrapLang txt = case lookup "lang" kvs of
+                       Just lng -> let (l, o) = toPolyglossiaEnv lng
+                                       ops = if null o
+                                                then ""
+                                                else brackets $ text o
+                                   in  inCmd "begin" (text l) <> ops
+                                       $$ blankline <> txt <> blankline
+                                       $$ inCmd "end" (text l)
+                       Nothing  -> txt
+      wrapNotes txt = if beamer && "notes" `elem` classes
+                          then "\\note" <> braces txt -- speaker notes
+                          else linkAnchor $$ txt
+  fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs
 blockToLaTeX (Plain lst) =
  inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
 -- title beginning with fig: indicates that the image is a figure
@ -759,9 +799,12 @@ inlineToLaTeX (Span (id',classes,kvs) ils) = do
     (if noSmallCaps then inCmd "textnormal" else id) .
     (if rtl then inCmd "RL" else id) .
     (if ltr then inCmd "LR" else id) .
-     (if not (noEmph || noStrong || noSmallCaps || rtl || ltr)
-         then braces
-         else id)) `fmap` inlineListToLaTeX ils
+     (case lookup "lang" kvs of
+        Just lng -> let (l, o) = toPolyglossiaEnv lng
+                        ops = if null o then "" else brackets (text o)
+                    in  \c -> char '\\' <> "text" <> text l <> ops <> braces c
+        Nothing  -> id)
+    ) `fmap` inlineListToLaTeX ils
 inlineToLaTeX (Emph lst) =
  inlineListToLaTeX lst >>= return . inCmd "emph"
 inlineToLaTeX (Strong lst) =
@ -1002,6 +1045,30 @@ getListingsLanguage :: [String] -> Maybe String
 getListingsLanguage [] = Nothing
 getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs

+-- Extract a key from divs and spans
+extract :: String -> Block -> [String]
+extract key (Div attr _)     = lookKey key attr
+extract key (Plain ils)      = concatMap (extractInline key) ils
+extract key (Para ils)       = concatMap (extractInline key) ils
+extract key (Header _ _ ils) = concatMap (extractInline key) ils
+extract _ _                  = []
+
+-- Extract a key from spans
+extractInline :: String -> Inline -> [String]
+extractInline key (Span attr _) = lookKey key attr
+extractInline _ _               = []
+
+-- Look up a key in an attribute and give a list of its values
+lookKey :: String -> Attr -> [String]
+lookKey key (_,_,kvs) =  maybe [] words $ lookup key kvs
+
+-- In environments \Arabic instead of \arabic is used
+toPolyglossiaEnv :: String -> (String, String)
+toPolyglossiaEnv l =
+  case toPolyglossia $ (splitBy (=='-')) l of
+    ("arabic", o) -> ("Arabic", o)
+    x             -> x
+
 -- Takes a list of the constituents of a BCP 47 language code and
 -- converts it to a Polyglossia (language, options) tuple
 -- http://mirrors.concertpass.com/tex-archive/macros/latex/contrib/polyglossia/polyglossia.pdf
--- a/tests/Tests/Old.hs
+++ b/tests/Tests/Old.hs
@ -165,6 +165,12 @@ tests = [ testGroup "markdown"
          [ "opendocument" , "context" , "texinfo", "icml"
          , "man" , "plain" , "rtf", "org", "asciidoc"
          ]
+        , testGroup "writers-lang-and-dir"
+          [ test "latex" ["-f", "native", "-t", "latex", "-s"]
+            "writers-lang-and-dir.native" "writers-lang-and-dir.latex"
+          , test "context" ["-f", "native", "-t", "context", "-s"]
+            "writers-lang-and-dir.native" "writers-lang-and-dir.context"
+          ]
        ]

 -- makes sure file is fully closed after reading
--- a/tests/writer.context
+++ b/tests/writer.context
@ -545,11 +545,13 @@ Blank line after term, indented marker, alternate markers:
 Simple block on one line:

 foo
+
 And nested without indentation:

 foo

 bar
+
 Interpreted markdown in a table:

 This is {\em emphasized}
@ -575,6 +577,7 @@ As should this:
 Now, nested:

 foo
+
 This should just be an HTML comment:

 Multiline:
--- a/tests/writers-lang-and-dir.context
+++ b/tests/writers-lang-and-dir.context
@ -0,0 +1,111 @@
+\startmode[*mkii]
+  \enableregime[utf-8]  
+  \setupcolors[state=start]
+\stopmode
+
+% Enable hyperlinks
+\setupinteraction[state=start, color=middleblue]
+
+\setuppapersize [letter][letter]
+\setuplayout    [width=middle,  backspace=1.5in, cutspace=1.5in,
+                 height=middle, topspace=0.75in, bottomspace=0.75in]
+
+\setuppagenumbering[location={footer,center}]
+
+\setupbodyfont[11pt]
+
+\setupwhitespace[medium]
+
+\setuphead[chapter]      [style=\tfd]
+\setuphead[section]      [style=\tfc]
+\setuphead[subsection]   [style=\tfb]
+\setuphead[subsubsection][style=\bf]
+
+\setuphead[chapter, section, subsection, subsubsection][number=no]
+
+\definedescription
+  [description]
+  [headstyle=bold, style=normal, location=hanging, width=broad, margin=1cm, alternative=hanging]
+
+\setupitemize[autointro]    % prevent orphan list intro
+\setupitemize[indentnext=no]
+
+\setupfloat[figure][default={here,nonumber}]
+\setupfloat[table][default={here,nonumber}]
+
+\setupthinrules[width=15em] % width of horizontal rules
+
+\setupdelimitedtext
+  [blockquote]
+  [before={\blank[medium]},
+   after={\blank[medium]},
+   indentnext=no,
+  ]
+
+
+\starttext
+
+\section[empty-divs-and-spans]{Empty Divs and Spans}
+
+Some text and
+
+div contents
+
+and more text.
+
+Next paragraph with a span and a word-thatincludesaspanright?
+
+\section[directionality]{Directionality}
+
+Some text and
+
+\startalignment[righttoleft]
+rtl div contents
+
+\stopalignment
+
+and more text.
+
+\startalignment[lefttoright]
+and a ltr div. with a {\righttoleft rtl span}.
+
+\stopalignment
+
+Next paragraph with a {\righttoleft rtl span} and a
+word-that-includesa{\lefttoright ltrspan}right?
+
+\section[languages]{Languages}
+
+Some text and
+
+\start\language[de]
+German div contents
+
+\stop
+
+and more text.
+
+Next paragraph with a \start\language[en-gb]British span\stop  and a
+word-that-includesa\start\language[de-ch]Swiss German span\stop right?
+
+Some \start\language[es]Spanish text\stop .
+
+\section[combined]{Combined}
+
+Some text and
+
+\start\language[fr]
+\startalignment[righttoleft]
+French rtl div contents
+
+\stopalignment
+\stop
+
+and more text.
+
+Next paragraph with a \start\language[en-gb]{\lefttoright British ltr
+span}\stop  and a
+word-that-includesa\start\language[de-ch]{\lefttoright Swiss German ltr
+span}\stop right?
+
+\stoptext
--- a/tests/writers-lang-and-dir.latex
+++ b/tests/writers-lang-and-dir.latex
@ -0,0 +1,166 @@
+\documentclass[english,]{article}
+\usepackage{lmodern}
+\usepackage{amssymb,amsmath}
+\usepackage{ifxetex,ifluatex}
+\usepackage{fixltx2e} % provides \textsubscript
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+  \usepackage[T1]{fontenc}
+  \usepackage[utf8]{inputenc}
+\else % if luatex or xelatex
+  \ifxetex
+    \usepackage{mathspec}
+  \else
+    \usepackage{fontspec}
+  \fi
+  \defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
+  \newcommand{\euro}{€}
+\fi
+% use upquote if available, for straight quotes in verbatim environments
+\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
+% use microtype if available
+\IfFileExists{microtype.sty}{%
+\usepackage{microtype}
+\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
+}{}
+\makeatletter
+\@ifpackageloaded{hyperref}{}{%
+\ifxetex
+  \usepackage[setpagesize=false, % page size defined by xetex
+              unicode=false, % unicode breaks when used with xetex
+              xetex]{hyperref}
+\else
+  \usepackage[unicode=true]{hyperref}
+\fi
+}
+\@ifpackageloaded{color}{
+    \PassOptionsToPackage{usenames,dvipsnames}{color}
+}{%
+    \usepackage[usenames,dvipsnames]{color}
+}
+\makeatother
+\hypersetup{breaklinks=true,
+            bookmarks=true,
+            pdfauthor={},
+            pdftitle={},
+            colorlinks=true,
+            citecolor=blue,
+            urlcolor=blue,
+            linkcolor=magenta,
+            pdfborder={0 0 0}
+            }
+\urlstyle{same}  % don't use monospace font for urls
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+  \usepackage[shorthands=off,ngerman,british,ngerman,spanish,french,main=english]{babel}
+  \newcommand{\textgerman}[2][]{\foreignlanguage{ngerman}{#2}}
+  \newenvironment{german}[1]{\begin{otherlanguage}{ngerman}}{\end{otherlanguage}}
+  \newcommand{\textenglish}[2][]{\foreignlanguage{british}{#2}}
+  \newenvironment{english}[1]{\begin{otherlanguage}{british}}{\end{otherlanguage}}
+  \let\oritextspanish\textspanish
+  \AddBabelHook{spanish}{beforeextras}{\renewcommand{\textspanish}{\oritextspanish}}
+  \AddBabelHook{spanish}{afterextras}{\renewcommand{\textspanish}[2][]{\foreignlanguage{spanish}{##2}}}
+  \newcommand{\textfrench}[2][]{\foreignlanguage{french}{#2}}
+  \newenvironment{french}[1]{\begin{otherlanguage}{french}}{\end{otherlanguage}}
+\else
+  \usepackage{polyglossia}
+  \setmainlanguage[]{english}
+  \setotherlanguage[]{german}
+  \setotherlanguage[variant=british]{english}
+  \setotherlanguage[variant=swiss]{german}
+  \setotherlanguage[]{spanish}
+  \setotherlanguage[]{french}
+\fi
+\setlength{\parindent}{0pt}
+\setlength{\parskip}{6pt plus 2pt minus 1pt}
+\setlength{\emergencystretch}{3em}  % prevent overfull lines
+\providecommand{\tightlist}{%
+  \setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
+\setcounter{secnumdepth}{0}
+\ifxetex
+  % load bidi as late as possible as it modifies e.g. graphicx
+    \usepackage{bidi}
+  \fi
+\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
+  \TeXXeTstate=1
+  \newcommand{\RL}[1]{\beginR #1\endR}
+  \newcommand{\LR}[1]{\beginL #1\endL}
+  \newenvironment{RTL}{\beginR}{\endR}
+  \newenvironment{LTR}{\beginL}{\endL}
+\fi
+
+\date{}
+
+% Redefines (sub)paragraphs to behave more like sections
+\ifx\paragraph\undefined\else
+\let\oldparagraph\paragraph
+\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
+\fi
+\ifx\subparagraph\undefined\else
+\let\oldsubparagraph\subparagraph
+\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
+\fi
+
+\begin{document}
+
+\section{Empty Divs and Spans}\label{empty-divs-and-spans}
+
+Some text and
+
+div contents
+
+and more text.
+
+Next paragraph with a span and a word-thatincludesaspanright?
+
+\section{Directionality}\label{directionality}
+
+Some text and
+
+\begin{RTL}
+rtl div contents
+\end{RTL}
+
+and more text.
+
+\begin{LTR}
+and a ltr div. with a \RL{rtl span}.
+\end{LTR}
+
+Next paragraph with a \RL{rtl span} and a
+word-that-includesa\LR{ltrspan}right?
+
+\section{Languages}\label{languages}
+
+Some text and
+
+\begin{german}
+
+German div contents
+
+\end{german}
+
+and more text.
+
+Next paragraph with a \textenglish[variant=british]{British span} and a
+word-that-includesa\textgerman[variant=swiss]{Swiss German span}right?
+
+Some \textspanish{Spanish text}.
+
+\section{Combined}\label{combined}
+
+Some text and
+
+\begin{RTL}
+\begin{french}
+
+French rtl div contents
+
+\end{french}
+\end{RTL}
+
+and more text.
+
+Next paragraph with a \LR{\textenglish[variant=british]{British ltr
+span}} and a word-that-includesa\LR{\textgerman[variant=swiss]{Swiss
+German ltr span}}right?
+
+\end{document}
--- a/tests/writers-lang-and-dir.native
+++ b/tests/writers-lang-and-dir.native
@ -0,0 +1,23 @@
+Pandoc (Meta {unMeta = fromList []})
+[Header 1 ("empty-divs-and-spans",[],[]) [Str "Empty",Space,Str "Divs",Space,Str "and",Space,Str "Spans"]
+,Plain [Str "Some",Space,Str "text",Space,Str "and"]
+,Div ("",[],[]) [Para [Str "div",Space,Str "contents"]]
+,Para [Str "and",Space,Str "more",Space,Str "text."]
+,Para [Str "Next",Space,Str "paragraph",Space,Str "with",Space,Str "a",Space,Span ("",[],[]) [Str "span"],Space,Str "and",Space,Str "a",Space,Str "word-thatincludesa",Span ("",[],[]) [Str "span"],Str "right?"]
+,Header 1 ("directionality",[],[]) [Str "Directionality"]
+,Plain [Str "Some",Space,Str "text",Space,Str "and"]
+,Div ("",[],[("dir","rtl")]) [Para [Str "rtl",Space,Str "div",Space,Str "contents"]]
+,Para [Str "and",Space,Str "more",Space,Str "text."]
+,Div ("",[],[("dir","ltr")]) [Para [Str "and",Space,Str "a",Space,Str "ltr",Space,Str "div.",Space,Str "with",Space,Str "a",Space,Span ("",[],[("dir","rtl")]) [Str "rtl",Space,Str "span"],Str "."]]
+,Para [Str "Next",Space,Str "paragraph",Space,Str "with",Space,Str "a",Space,Span ("",[],[("dir","rtl")]) [Str "rtl",Space,Str "span"],Space,Str "and",Space,Str "a",Space,Str "word-that-includesa",Span ("",[],[("dir","ltr")]) [Str "ltrspan"],Str "right?"]
+,Header 1 ("languages",[],[]) [Str "Languages"]
+,Plain [Str "Some",Space,Str "text",Space,Str "and"]
+,Div ("",[],[("lang","de")]) [Para [Str "German",Space,Str "div",Space,Str "contents"]]
+,Para [Str "and",Space,Str "more",Space,Str "text."]
+,Para [Str "Next",Space,Str "paragraph",Space,Str "with",Space,Str "a",Space,Span ("",[],[("lang","en-GB")]) [Str "British",Space,Str "span"],Space,Str "and",Space,Str "a",Space,Str "word-that-includesa",Span ("",[],[("lang","de-CH")]) [Str "Swiss",Space,Str "German",Space,Str "span"],Str "right?"]
+,Para [Str "Some",Space,Span ("",[],[("lang","es")]) [Str "Spanish",Space,Str "text"],Str "."]
+,Header 1 ("combined",[],[]) [Str "Combined"]
+,Plain [Str "Some",Space,Str "text",Space,Str "and"]
+,Div ("",[],[("lang","fr"),("dir","rtl")]) [Para [Str "French",Space,Str "rtl",Space,Str "div",Space,Str "contents"]]
+,Para [Str "and",Space,Str "more",Space,Str "text."]
+,Para [Str "Next",Space,Str "paragraph",Space,Str "with",Space,Str "a",Space,Span ("",[],[("lang","en-GB"),("dir","ltr")]) [Str "British",Space,Str "ltr",Space,Str "span"],Space,Str "and",Space,Str "a",Space,Str "word-that-includesa",Span ("",[],[("lang","de-CH"),("dir","ltr")]) [Str "Swiss",Space,Str "German",Space,Str "ltr",Space,Str "span"],Str "right?"]]