Merge pull request #2458 from mb21/lang-inlines

LaTeX and ConTeXt writers: support lang attribute on divs and spans
This commit is contained in:
John MacFarlane 2015-10-19 23:02:08 -07:00
commit b49ab06e96
8 changed files with 430 additions and 38 deletions

17
README
View file

@ -1047,12 +1047,21 @@ Language variables
format stored in the additional variables `babel-lang`,
`polyglossia-lang` (LaTeX) and `context-lang` (ConTeXt).
Native pandoc `span`s and `div`s with the lang attribute
(value in BCP 47) can be used to switch the language in
that range.
`otherlangs`
: a list of other languages used in the document
in the YAML metadata, according to [BCP 47]. For example:
`otherlangs: [en-GB, fr]`.
Currently only used by `xelatex` through the generated
`polyglossia-otherlangs` variable.
This is automatically generated from the `lang` attributes
in all `span`s and `div`s but can be overriden.
Currently only used by LaTeX through the generated
`babel-otherlangs` and `polyglossia-otherlangs` variables.
The LaTeX writer outputs polyglossia commands in the text but
the `babel-newcommands` variable contains mappings for them
to the corresponding babel.
`dir`
: the base direction of the document, either `rtl` (right-to-left)
@ -1065,10 +1074,6 @@ Language variables
(e.g. the browser, when generating HTML) supports the
[Unicode Bidirectional Algorithm].
LaTeX and ConTeXt assume by default that all text is left-to-right.
Setting `dir: ltr` enables bidirectional text handling in a document
whose base direction is left-to-right but contains some right-to-left script.
When using LaTeX for bidirectional documents, only the `xelatex` engine
is fully supported (use `--latex-engine=xelatex`).

View file

@ -157,17 +157,21 @@ blockToConTeXt (CodeBlock _ str) =
blockToConTeXt (RawBlock "context" str) = return $ text str <> blankline
blockToConTeXt (RawBlock _ _ ) = return empty
blockToConTeXt (Div (ident,_,kvs) bs) = do
contents <- blockListToConTeXt bs
let contents' = if null ident
then contents
else ("\\reference" <> brackets (text $ toLabel ident) <>
braces empty <> "%") $$ contents
let align dir = blankline <> "\\startalignment[" <> dir <> "]"
$$ contents' $$ "\\stopalignment" <> blankline
return $ case lookup "dir" kvs of
Just "rtl" -> align "righttoleft"
Just "ltr" -> align "lefttoright"
_ -> contents'
let align dir txt = "\\startalignment[" <> dir <> "]" $$ txt $$ "\\stopalignment"
let wrapRef txt = if null ident
then txt
else ("\\reference" <> brackets (text $ toLabel ident) <>
braces empty <> "%") $$ txt
wrapDir = case lookup "dir" kvs of
Just "rtl" -> align "righttoleft"
Just "ltr" -> align "lefttoright"
_ -> id
wrapLang txt = case lookup "lang" kvs of
Just lng -> "\\start\\language["
<> text (fromBcp47' lng) <> "]" $$ txt $$ "\\stop"
Nothing -> txt
wrapBlank txt = blankline <> txt <> blankline
fmap (wrapBlank . wrapLang . wrapDir . wrapRef) $ blockListToConTeXt bs
blockToConTeXt (BulletList lst) = do
contents <- mapM listItemToConTeXt lst
return $ ("\\startitemize" <> if isTightList lst
@ -346,11 +350,15 @@ inlineToConTeXt (Note contents) = do
else text "\\startbuffer " <> nest 2 contents' <>
text "\\stopbuffer\\footnote{\\getbuffer}"
inlineToConTeXt (Span (_,_,kvs) ils) = do
contents <- inlineListToConTeXt ils
return $ case lookup "dir" kvs of
Just "rtl" -> braces $ "\\righttoleft " <> contents
Just "ltr" -> braces $ "\\lefttoright " <> contents
_ -> contents
let wrapDir txt = case lookup "dir" kvs of
Just "rtl" -> braces $ "\\righttoleft " <> txt
Just "ltr" -> braces $ "\\lefttoright " <> txt
_ -> txt
wrapLang txt = case lookup "lang" kvs of
Just lng -> "\\start\\language[" <> text (fromBcp47' lng)
<> "]" <> txt <> "\\stop "
Nothing -> txt
fmap (wrapLang . wrapDir) $ inlineListToConTeXt ils
-- | Craft the section header, inserting the secton reference, if supplied.
sectionHeader :: Attr
@ -377,6 +385,9 @@ sectionHeader (ident,classes,_) hdrLevel lst = do
then char '\\' <> chapter <> braces contents
else contents <> blankline
fromBcp47' :: String -> String
fromBcp47' = fromBcp47 . splitBy (=='-')
-- Takes a list of the constituents of a BCP 47 language code
-- and irons out ConTeXt's exceptions
-- https://tools.ietf.org/html/bcp47#section-2.1

View file

@ -39,7 +39,7 @@ import Text.Pandoc.Templates
import Text.Printf ( printf )
import Network.URI ( isURI, unEscapeString )
import Data.Aeson (object, (.=))
import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse )
import Data.List ( (\\), isInfixOf, stripPrefix, intercalate, intersperse, nub, nubBy )
import Data.Char ( toLower, isPunctuation, isAscii, isLetter, isDigit, ord )
import Data.Maybe ( fromMaybe )
import qualified Data.Text as T
@ -145,6 +145,7 @@ pandocToLaTeX options (Pandoc meta blocks) = do
st <- get
titleMeta <- stringToLaTeX TextString $ stringify $ docTitle meta
authorsMeta <- mapM (stringToLaTeX TextString . stringify) $ docAuthors meta
let docLangs = nub $ query (extract "lang") blocks
let context = defField "toc" (writerTableOfContents options) $
defField "toc-depth" (show (writerTOCDepth options -
if stBook st
@ -179,18 +180,48 @@ pandocToLaTeX options (Pandoc meta blocks) = do
Biblatex -> defField "biblio-title" biblioTitle .
defField "biblatex" True
_ -> id) $
-- set lang to something so polyglossia/babel is included
defField "lang" (if null docLangs then ""::String else "en") $
defField "otherlangs" docLangs $
defField "dir" (if (null $ query (extract "dir") blocks)
then ""::String
else "ltr") $
metadata
let toPolyObj lang = object [ "name" .= T.pack name
, "options" .= T.pack opts ]
where
(name, opts) = toPolyglossia lang
let lang = maybe [] (splitBy (=='-')) $ getField "lang" context
otherlangs = maybe [] (map $ splitBy (=='-')) $ getField "otherlangs" context
let context' =
defField "babel-lang" (toBabel lang)
$ defField "babel-otherlangs" (map toBabel otherlangs)
$ defField "babel-newcommands" (concatMap (\(poly, babel) ->
-- \textspanish and \textgalician are already used by babel
-- save them as \oritext... and let babel use that
if poly `elem` ["spanish", "galician"]
then "\\let\\oritext" ++ poly ++ "\\text" ++ poly ++ "\n" ++
"\\AddBabelHook{" ++ poly ++ "}{beforeextras}" ++
"{\\renewcommand{\\text" ++ poly ++ "}{\\oritext"
++ poly ++ "}}\n" ++
"\\AddBabelHook{" ++ poly ++ "}{afterextras}" ++
"{\\renewcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
++ poly ++ "}{##2}}}\n"
else "\\newcommand{\\text" ++ poly ++ "}[2][]{\\foreignlanguage{"
++ babel ++ "}{#2}}\n" ++
"\\newenvironment{" ++ poly ++ "}[1]{\\begin{otherlanguage}{"
++ babel ++ "}}{\\end{otherlanguage}}\n"
)
-- eliminate duplicates that have same polyglossia name
$ nubBy (\a b -> fst a == fst b)
-- find polyglossia and babel names of languages used in the document
$ map (\l ->
let lng = splitBy (=='-') l
in (fst $ toPolyglossia lng, toBabel lng)
)
docLangs )
$ defField "polyglossia-lang" (toPolyObj lang)
$ defField "polyglossia-otherlangs"
(maybe [] (map $ toPolyObj . splitBy (=='-')) $
getField "otherlangs" context)
$ defField "polyglossia-otherlangs" (map toPolyObj otherlangs)
$ defField "latex-dir-rtl" (case (getField "dir" context)::Maybe String of
Just "rtl" -> True
_ -> False)
@ -340,15 +371,24 @@ blockToLaTeX (Div (identifier,classes,kvs) bs) = do
then empty
else "\\hyperdef{}" <> braces (text ref) <>
braces ("\\label" <> braces (text ref))
contents' <- blockListToLaTeX bs
let align dir = inCmd "begin" dir $$ contents' $$ inCmd "end" dir
let contents = case lookup "dir" kvs of
Just "rtl" -> align "RTL"
Just "ltr" -> align "LTR"
_ -> contents'
if beamer && "notes" `elem` classes -- speaker notes
then return $ "\\note" <> braces contents
else return (linkAnchor $$ contents)
let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir
let wrapDir = case lookup "dir" kvs of
Just "rtl" -> align "RTL"
Just "ltr" -> align "LTR"
_ -> id
wrapLang txt = case lookup "lang" kvs of
Just lng -> let (l, o) = toPolyglossiaEnv lng
ops = if null o
then ""
else brackets $ text o
in inCmd "begin" (text l) <> ops
$$ blankline <> txt <> blankline
$$ inCmd "end" (text l)
Nothing -> txt
wrapNotes txt = if beamer && "notes" `elem` classes
then "\\note" <> braces txt -- speaker notes
else linkAnchor $$ txt
fmap (wrapDir . wrapLang . wrapNotes) $ blockListToLaTeX bs
blockToLaTeX (Plain lst) =
inlineListToLaTeX $ dropWhile isLineBreakOrSpace lst
-- title beginning with fig: indicates that the image is a figure
@ -759,9 +799,12 @@ inlineToLaTeX (Span (id',classes,kvs) ils) = do
(if noSmallCaps then inCmd "textnormal" else id) .
(if rtl then inCmd "RL" else id) .
(if ltr then inCmd "LR" else id) .
(if not (noEmph || noStrong || noSmallCaps || rtl || ltr)
then braces
else id)) `fmap` inlineListToLaTeX ils
(case lookup "lang" kvs of
Just lng -> let (l, o) = toPolyglossiaEnv lng
ops = if null o then "" else brackets (text o)
in \c -> char '\\' <> "text" <> text l <> ops <> braces c
Nothing -> id)
) `fmap` inlineListToLaTeX ils
inlineToLaTeX (Emph lst) =
inlineListToLaTeX lst >>= return . inCmd "emph"
inlineToLaTeX (Strong lst) =
@ -1002,6 +1045,30 @@ getListingsLanguage :: [String] -> Maybe String
getListingsLanguage [] = Nothing
getListingsLanguage (x:xs) = toListingsLanguage x <|> getListingsLanguage xs
-- Extract a key from divs and spans
extract :: String -> Block -> [String]
extract key (Div attr _) = lookKey key attr
extract key (Plain ils) = concatMap (extractInline key) ils
extract key (Para ils) = concatMap (extractInline key) ils
extract key (Header _ _ ils) = concatMap (extractInline key) ils
extract _ _ = []
-- Extract a key from spans
extractInline :: String -> Inline -> [String]
extractInline key (Span attr _) = lookKey key attr
extractInline _ _ = []
-- Look up a key in an attribute and give a list of its values
lookKey :: String -> Attr -> [String]
lookKey key (_,_,kvs) = maybe [] words $ lookup key kvs
-- In environments \Arabic instead of \arabic is used
toPolyglossiaEnv :: String -> (String, String)
toPolyglossiaEnv l =
case toPolyglossia $ (splitBy (=='-')) l of
("arabic", o) -> ("Arabic", o)
x -> x
-- Takes a list of the constituents of a BCP 47 language code and
-- converts it to a Polyglossia (language, options) tuple
-- http://mirrors.concertpass.com/tex-archive/macros/latex/contrib/polyglossia/polyglossia.pdf

View file

@ -165,6 +165,12 @@ tests = [ testGroup "markdown"
[ "opendocument" , "context" , "texinfo", "icml"
, "man" , "plain" , "rtf", "org", "asciidoc"
]
, testGroup "writers-lang-and-dir"
[ test "latex" ["-f", "native", "-t", "latex", "-s"]
"writers-lang-and-dir.native" "writers-lang-and-dir.latex"
, test "context" ["-f", "native", "-t", "context", "-s"]
"writers-lang-and-dir.native" "writers-lang-and-dir.context"
]
]
-- makes sure file is fully closed after reading

View file

@ -545,11 +545,13 @@ Blank line after term, indented marker, alternate markers:
Simple block on one line:
foo
And nested without indentation:
foo
bar
Interpreted markdown in a table:
This is {\em emphasized}
@ -575,6 +577,7 @@ As should this:
Now, nested:
foo
This should just be an HTML comment:
Multiline:

View file

@ -0,0 +1,111 @@
\startmode[*mkii]
\enableregime[utf-8]
\setupcolors[state=start]
\stopmode
% Enable hyperlinks
\setupinteraction[state=start, color=middleblue]
\setuppapersize [letter][letter]
\setuplayout [width=middle, backspace=1.5in, cutspace=1.5in,
height=middle, topspace=0.75in, bottomspace=0.75in]
\setuppagenumbering[location={footer,center}]
\setupbodyfont[11pt]
\setupwhitespace[medium]
\setuphead[chapter] [style=\tfd]
\setuphead[section] [style=\tfc]
\setuphead[subsection] [style=\tfb]
\setuphead[subsubsection][style=\bf]
\setuphead[chapter, section, subsection, subsubsection][number=no]
\definedescription
[description]
[headstyle=bold, style=normal, location=hanging, width=broad, margin=1cm, alternative=hanging]
\setupitemize[autointro] % prevent orphan list intro
\setupitemize[indentnext=no]
\setupfloat[figure][default={here,nonumber}]
\setupfloat[table][default={here,nonumber}]
\setupthinrules[width=15em] % width of horizontal rules
\setupdelimitedtext
[blockquote]
[before={\blank[medium]},
after={\blank[medium]},
indentnext=no,
]
\starttext
\section[empty-divs-and-spans]{Empty Divs and Spans}
Some text and
div contents
and more text.
Next paragraph with a span and a word-thatincludesaspanright?
\section[directionality]{Directionality}
Some text and
\startalignment[righttoleft]
rtl div contents
\stopalignment
and more text.
\startalignment[lefttoright]
and a ltr div. with a {\righttoleft rtl span}.
\stopalignment
Next paragraph with a {\righttoleft rtl span} and a
word-that-includesa{\lefttoright ltrspan}right?
\section[languages]{Languages}
Some text and
\start\language[de]
German div contents
\stop
and more text.
Next paragraph with a \start\language[en-gb]British span\stop and a
word-that-includesa\start\language[de-ch]Swiss German span\stop right?
Some \start\language[es]Spanish text\stop .
\section[combined]{Combined}
Some text and
\start\language[fr]
\startalignment[righttoleft]
French rtl div contents
\stopalignment
\stop
and more text.
Next paragraph with a \start\language[en-gb]{\lefttoright British ltr
span}\stop and a
word-that-includesa\start\language[de-ch]{\lefttoright Swiss German ltr
span}\stop right?
\stoptext

View file

@ -0,0 +1,166 @@
\documentclass[english,]{article}
\usepackage{lmodern}
\usepackage{amssymb,amsmath}
\usepackage{ifxetex,ifluatex}
\usepackage{fixltx2e} % provides \textsubscript
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\else % if luatex or xelatex
\ifxetex
\usepackage{mathspec}
\else
\usepackage{fontspec}
\fi
\defaultfontfeatures{Mapping=tex-text,Scale=MatchLowercase}
\newcommand{\euro}{€}
\fi
% use upquote if available, for straight quotes in verbatim environments
\IfFileExists{upquote.sty}{\usepackage{upquote}}{}
% use microtype if available
\IfFileExists{microtype.sty}{%
\usepackage{microtype}
\UseMicrotypeSet[protrusion]{basicmath} % disable protrusion for tt fonts
}{}
\makeatletter
\@ifpackageloaded{hyperref}{}{%
\ifxetex
\usepackage[setpagesize=false, % page size defined by xetex
unicode=false, % unicode breaks when used with xetex
xetex]{hyperref}
\else
\usepackage[unicode=true]{hyperref}
\fi
}
\@ifpackageloaded{color}{
\PassOptionsToPackage{usenames,dvipsnames}{color}
}{%
\usepackage[usenames,dvipsnames]{color}
}
\makeatother
\hypersetup{breaklinks=true,
bookmarks=true,
pdfauthor={},
pdftitle={},
colorlinks=true,
citecolor=blue,
urlcolor=blue,
linkcolor=magenta,
pdfborder={0 0 0}
}
\urlstyle{same} % don't use monospace font for urls
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\usepackage[shorthands=off,ngerman,british,ngerman,spanish,french,main=english]{babel}
\newcommand{\textgerman}[2][]{\foreignlanguage{ngerman}{#2}}
\newenvironment{german}[1]{\begin{otherlanguage}{ngerman}}{\end{otherlanguage}}
\newcommand{\textenglish}[2][]{\foreignlanguage{british}{#2}}
\newenvironment{english}[1]{\begin{otherlanguage}{british}}{\end{otherlanguage}}
\let\oritextspanish\textspanish
\AddBabelHook{spanish}{beforeextras}{\renewcommand{\textspanish}{\oritextspanish}}
\AddBabelHook{spanish}{afterextras}{\renewcommand{\textspanish}[2][]{\foreignlanguage{spanish}{##2}}}
\newcommand{\textfrench}[2][]{\foreignlanguage{french}{#2}}
\newenvironment{french}[1]{\begin{otherlanguage}{french}}{\end{otherlanguage}}
\else
\usepackage{polyglossia}
\setmainlanguage[]{english}
\setotherlanguage[]{german}
\setotherlanguage[variant=british]{english}
\setotherlanguage[variant=swiss]{german}
\setotherlanguage[]{spanish}
\setotherlanguage[]{french}
\fi
\setlength{\parindent}{0pt}
\setlength{\parskip}{6pt plus 2pt minus 1pt}
\setlength{\emergencystretch}{3em} % prevent overfull lines
\providecommand{\tightlist}{%
\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}
\setcounter{secnumdepth}{0}
\ifxetex
% load bidi as late as possible as it modifies e.g. graphicx
\usepackage{bidi}
\fi
\ifnum 0\ifxetex 1\fi\ifluatex 1\fi=0 % if pdftex
\TeXXeTstate=1
\newcommand{\RL}[1]{\beginR #1\endR}
\newcommand{\LR}[1]{\beginL #1\endL}
\newenvironment{RTL}{\beginR}{\endR}
\newenvironment{LTR}{\beginL}{\endL}
\fi
\date{}
% Redefines (sub)paragraphs to behave more like sections
\ifx\paragraph\undefined\else
\let\oldparagraph\paragraph
\renewcommand{\paragraph}[1]{\oldparagraph{#1}\mbox{}}
\fi
\ifx\subparagraph\undefined\else
\let\oldsubparagraph\subparagraph
\renewcommand{\subparagraph}[1]{\oldsubparagraph{#1}\mbox{}}
\fi
\begin{document}
\section{Empty Divs and Spans}\label{empty-divs-and-spans}
Some text and
div contents
and more text.
Next paragraph with a span and a word-thatincludesaspanright?
\section{Directionality}\label{directionality}
Some text and
\begin{RTL}
rtl div contents
\end{RTL}
and more text.
\begin{LTR}
and a ltr div. with a \RL{rtl span}.
\end{LTR}
Next paragraph with a \RL{rtl span} and a
word-that-includesa\LR{ltrspan}right?
\section{Languages}\label{languages}
Some text and
\begin{german}
German div contents
\end{german}
and more text.
Next paragraph with a \textenglish[variant=british]{British span} and a
word-that-includesa\textgerman[variant=swiss]{Swiss German span}right?
Some \textspanish{Spanish text}.
\section{Combined}\label{combined}
Some text and
\begin{RTL}
\begin{french}
French rtl div contents
\end{french}
\end{RTL}
and more text.
Next paragraph with a \LR{\textenglish[variant=british]{British ltr
span}} and a word-that-includesa\LR{\textgerman[variant=swiss]{Swiss
German ltr span}}right?
\end{document}

View file

@ -0,0 +1,23 @@
Pandoc (Meta {unMeta = fromList []})
[Header 1 ("empty-divs-and-spans",[],[]) [Str "Empty",Space,Str "Divs",Space,Str "and",Space,Str "Spans"]
,Plain [Str "Some",Space,Str "text",Space,Str "and"]
,Div ("",[],[]) [Para [Str "div",Space,Str "contents"]]
,Para [Str "and",Space,Str "more",Space,Str "text."]
,Para [Str "Next",Space,Str "paragraph",Space,Str "with",Space,Str "a",Space,Span ("",[],[]) [Str "span"],Space,Str "and",Space,Str "a",Space,Str "word-thatincludesa",Span ("",[],[]) [Str "span"],Str "right?"]
,Header 1 ("directionality",[],[]) [Str "Directionality"]
,Plain [Str "Some",Space,Str "text",Space,Str "and"]
,Div ("",[],[("dir","rtl")]) [Para [Str "rtl",Space,Str "div",Space,Str "contents"]]
,Para [Str "and",Space,Str "more",Space,Str "text."]
,Div ("",[],[("dir","ltr")]) [Para [Str "and",Space,Str "a",Space,Str "ltr",Space,Str "div.",Space,Str "with",Space,Str "a",Space,Span ("",[],[("dir","rtl")]) [Str "rtl",Space,Str "span"],Str "."]]
,Para [Str "Next",Space,Str "paragraph",Space,Str "with",Space,Str "a",Space,Span ("",[],[("dir","rtl")]) [Str "rtl",Space,Str "span"],Space,Str "and",Space,Str "a",Space,Str "word-that-includesa",Span ("",[],[("dir","ltr")]) [Str "ltrspan"],Str "right?"]
,Header 1 ("languages",[],[]) [Str "Languages"]
,Plain [Str "Some",Space,Str "text",Space,Str "and"]
,Div ("",[],[("lang","de")]) [Para [Str "German",Space,Str "div",Space,Str "contents"]]
,Para [Str "and",Space,Str "more",Space,Str "text."]
,Para [Str "Next",Space,Str "paragraph",Space,Str "with",Space,Str "a",Space,Span ("",[],[("lang","en-GB")]) [Str "British",Space,Str "span"],Space,Str "and",Space,Str "a",Space,Str "word-that-includesa",Span ("",[],[("lang","de-CH")]) [Str "Swiss",Space,Str "German",Space,Str "span"],Str "right?"]
,Para [Str "Some",Space,Span ("",[],[("lang","es")]) [Str "Spanish",Space,Str "text"],Str "."]
,Header 1 ("combined",[],[]) [Str "Combined"]
,Plain [Str "Some",Space,Str "text",Space,Str "and"]
,Div ("",[],[("lang","fr"),("dir","rtl")]) [Para [Str "French",Space,Str "rtl",Space,Str "div",Space,Str "contents"]]
,Para [Str "and",Space,Str "more",Space,Str "text."]
,Para [Str "Next",Space,Str "paragraph",Space,Str "with",Space,Str "a",Space,Span ("",[],[("lang","en-GB"),("dir","ltr")]) [Str "British",Space,Str "ltr",Space,Str "span"],Space,Str "and",Space,Str "a",Space,Str "word-that-includesa",Span ("",[],[("lang","de-CH"),("dir","ltr")]) [Str "Swiss",Space,Str "German",Space,Str "ltr",Space,Str "span"],Str "right?"]]