From 81fe8ebf369279a899ba4995a4cd1f8a883eeeb9 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 2 Sep 2020 10:10:55 -0700 Subject: [PATCH] LaTeX reader: Factored out siunitx stuff into separate module. --- pandoc.cabal | 1 + src/Text/Pandoc/Readers/LaTeX.hs | 226 +-------------------- src/Text/Pandoc/Readers/LaTeX/SIunitx.hs | 241 +++++++++++++++++++++++ 3 files changed, 245 insertions(+), 223 deletions(-) create mode 100644 src/Text/Pandoc/Readers/LaTeX/SIunitx.hs diff --git a/pandoc.cabal b/pandoc.cabal index 047590d18..76e93d2e6 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -589,6 +589,7 @@ library Text.Pandoc.Readers.Docx.Fields, Text.Pandoc.Readers.LaTeX.Parsing, Text.Pandoc.Readers.LaTeX.Lang, + Text.Pandoc.Readers.LaTeX.SIunitx, Text.Pandoc.Readers.Odt.Base, Text.Pandoc.Readers.Odt.Namespaces, Text.Pandoc.Readers.Odt.StyleReader, diff --git a/src/Text/Pandoc/Readers/LaTeX.hs b/src/Text/Pandoc/Readers/LaTeX.hs index 85e30f538..c3e770578 100644 --- a/src/Text/Pandoc/Readers/LaTeX.hs +++ b/src/Text/Pandoc/Readers/LaTeX.hs @@ -57,6 +57,7 @@ import Text.Pandoc.Readers.LaTeX.Types (ExpansionPoint (..), Macro (..), import Text.Pandoc.Readers.LaTeX.Parsing import Text.Pandoc.Readers.LaTeX.Lang (polyglossiaLangToBCP47, babelLangToBCP47) +import Text.Pandoc.Readers.LaTeX.SIunitx import Text.Pandoc.Shared import qualified Text.Pandoc.Translations as Translations import Text.Pandoc.Walk @@ -244,227 +245,6 @@ doxspace = startsWithLetter _ = False --- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" -dosiunitx :: PandocMonad m => LP m Inlines -dosiunitx = do - skipopts - value <- tok - valueprefix <- option "" $ bracketed tok - unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok - let emptyOr160 "" = "" - emptyOr160 _ = "\160" - return . mconcat $ [valueprefix, - emptyOr160 valueprefix, - value, - emptyOr160 unit, - unit] - --- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms" -doSIrange :: PandocMonad m => LP m Inlines -doSIrange = do - skipopts - startvalue <- tok - startvalueprefix <- option "" $ bracketed tok - stopvalue <- tok - stopvalueprefix <- option "" $ bracketed tok - unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok - let emptyOr160 "" = "" - emptyOr160 _ = "\160" - return . mconcat $ [startvalueprefix, - emptyOr160 startvalueprefix, - startvalue, - emptyOr160 unit, - unit, - "\8211", -- An en-dash - stopvalueprefix, - emptyOr160 stopvalueprefix, - stopvalue, - emptyOr160 unit, - unit] - -siUnit :: PandocMonad m => LP m Inlines -siUnit = do - Tok _ (CtrlSeq name) _ <- anyControlSeq - if name == "square" - then do - unit <- grouped (mconcat <$> many1 siUnit) <|> siUnit <|> tok - return . mconcat $ [unit, "\178"] - else - case M.lookup name siUnitMap of - Just il -> return il - Nothing -> mzero - -siUnitMap :: M.Map Text Inlines -siUnitMap = M.fromList - [ ("fg", str "fg") - , ("pg", str "pg") - , ("ng", str "ng") - , ("ug", str "μg") - , ("mg", str "mg") - , ("g", str "g") - , ("kg", str "kg") - , ("amu", str "u") - , ("pm", str "pm") - , ("nm", str "nm") - , ("um", str "μm") - , ("mm", str "mm") - , ("cm", str "cm") - , ("dm", str "dm") - , ("m", str "m") - , ("km", str "km") - , ("as", str "as") - , ("fs", str "fs") - , ("ps", str "ps") - , ("ns", str "ns") - , ("us", str "μs") - , ("ms", str "ms") - , ("s", str "s") - , ("fmol", str "fmol") - , ("pmol", str "pmol") - , ("nmol", str "nmol") - , ("umol", str "μmol") - , ("mmol", str "mmol") - , ("mol", str "mol") - , ("kmol", str "kmol") - , ("pA", str "pA") - , ("nA", str "nA") - , ("uA", str "μA") - , ("mA", str "mA") - , ("A", str "A") - , ("kA", str "kA") - , ("ul", str "μl") - , ("ml", str "ml") - , ("l", str "l") - , ("hl", str "hl") - , ("uL", str "μL") - , ("mL", str "mL") - , ("L", str "L") - , ("hL", str "hL") - , ("mHz", str "mHz") - , ("Hz", str "Hz") - , ("kHz", str "kHz") - , ("MHz", str "MHz") - , ("GHz", str "GHz") - , ("THz", str "THz") - , ("mN", str "mN") - , ("N", str "N") - , ("kN", str "kN") - , ("MN", str "MN") - , ("Pa", str "Pa") - , ("kPa", str "kPa") - , ("MPa", str "MPa") - , ("GPa", str "GPa") - , ("mohm", str "mΩ") - , ("kohm", str "kΩ") - , ("Mohm", str "MΩ") - , ("pV", str "pV") - , ("nV", str "nV") - , ("uV", str "μV") - , ("mV", str "mV") - , ("V", str "V") - , ("kV", str "kV") - , ("W", str "W") - , ("uW", str "μW") - , ("mW", str "mW") - , ("kW", str "kW") - , ("MW", str "MW") - , ("GW", str "GW") - , ("J", str "J") - , ("uJ", str "μJ") - , ("mJ", str "mJ") - , ("kJ", str "kJ") - , ("eV", str "eV") - , ("meV", str "meV") - , ("keV", str "keV") - , ("MeV", str "MeV") - , ("GeV", str "GeV") - , ("TeV", str "TeV") - , ("kWh", str "kWh") - , ("F", str "F") - , ("fF", str "fF") - , ("pF", str "pF") - , ("K", str "K") - , ("dB", str "dB") - , ("angstrom", str "Å") - , ("arcmin", str "′") - , ("arcminute", str "′") - , ("arcsecond", str "″") - , ("astronomicalunit", str "ua") - , ("atomicmassunit", str "u") - , ("atto", str "a") - , ("bar", str "bar") - , ("barn", str "b") - , ("becquerel", str "Bq") - , ("bel", str "B") - , ("candela", str "cd") - , ("celsius", str "°C") - , ("centi", str "c") - , ("coulomb", str "C") - , ("dalton", str "Da") - , ("day", str "d") - , ("deca", str "d") - , ("deci", str "d") - , ("decibel", str "db") - , ("degreeCelsius",str "°C") - , ("degree", str "°") - , ("deka", str "d") - , ("electronvolt", str "eV") - , ("exa", str "E") - , ("farad", str "F") - , ("femto", str "f") - , ("giga", str "G") - , ("gram", str "g") - , ("hectare", str "ha") - , ("hecto", str "h") - , ("henry", str "H") - , ("hertz", str "Hz") - , ("hour", str "h") - , ("joule", str "J") - , ("katal", str "kat") - , ("kelvin", str "K") - , ("kilo", str "k") - , ("kilogram", str "kg") - , ("knot", str "kn") - , ("liter", str "L") - , ("litre", str "l") - , ("lumen", str "lm") - , ("lux", str "lx") - , ("mega", str "M") - , ("meter", str "m") - , ("metre", str "m") - , ("micro", str "μ") - , ("milli", str "m") - , ("minute", str "min") - , ("mmHg", str "mmHg") - , ("mole", str "mol") - , ("nano", str "n") - , ("nauticalmile", str "M") - , ("neper", str "Np") - , ("newton", str "N") - , ("ohm", str "Ω") - , ("Pa", str "Pa") - , ("pascal", str "Pa") - , ("percent", str "%") - , ("per", str "/") - , ("peta", str "P") - , ("pico", str "p") - , ("radian", str "rad") - , ("second", str "s") - , ("siemens", str "S") - , ("sievert", str "Sv") - , ("steradian", str "sr") - , ("tera", str "T") - , ("tesla", str "T") - , ("tonne", str "t") - , ("volt", str "V") - , ("watt", str "W") - , ("weber", str "Wb") - , ("yocto", str "y") - , ("yotta", str "Y") - , ("zepto", str "z") - , ("zetta", str "Z") - ] - lit :: Text -> LP m Inlines lit = pure . str @@ -1097,8 +877,8 @@ inlineCommands = M.union inlineLanguageCommands $ M.fromList , ("acfp", doAcronymPlural "full") , ("acsp", doAcronymPlural "abbrv") -- siuntix - , ("SI", dosiunitx) - , ("SIrange", doSIrange) + , ("SI", dosiunitx tok) + , ("SIrange", doSIrange tok) -- hyphenat , ("bshyp", lit "\\\173") , ("fshyp", lit "/\173") diff --git a/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs new file mode 100644 index 000000000..2befe80e4 --- /dev/null +++ b/src/Text/Pandoc/Readers/LaTeX/SIunitx.hs @@ -0,0 +1,241 @@ +{-# LANGUAGE OverloadedStrings #-} +module Text.Pandoc.Readers.LaTeX.SIunitx + ( dosiunitx + , doSIrange + ) +where +import Text.Pandoc.Builder +import Text.Pandoc.Readers.LaTeX.Parsing +import Text.Pandoc.Readers.LaTeX.Types +import Text.Pandoc.Class +import Control.Monad (mzero) +import Text.Pandoc.Parsing hiding (blankline, many, mathDisplay, mathInline, + optional, space, spaces, withRaw, (<|>)) +import Control.Applicative ((<|>)) +import qualified Data.Map as M +import Data.Text (Text) + +-- converts e.g. \SI{1}[\$]{} to "$ 1" or \SI{1}{\euro} to "1 €" +dosiunitx :: PandocMonad m => LP m Inlines -> LP m Inlines +dosiunitx tok = do + skipopts + value <- tok + valueprefix <- option "" $ bracketed tok + unit <- rawUnit tok + return . mconcat $ [valueprefix, + emptyOr160 valueprefix, + value, + emptyOr160 unit, + unit] + +-- converts e.g. \SIrange{100}{200}{\ms} to "100 ms--200 ms" +doSIrange :: PandocMonad m => LP m Inlines -> LP m Inlines +doSIrange tok = do + skipopts + startvalue <- tok + startvalueprefix <- option "" $ bracketed tok + stopvalue <- tok + stopvalueprefix <- option "" $ bracketed tok + unit <- rawUnit tok + return . mconcat $ [startvalueprefix, + emptyOr160 startvalueprefix, + startvalue, + emptyOr160 unit, + unit, + "\8211", -- An en-dash + stopvalueprefix, + emptyOr160 stopvalueprefix, + stopvalue, + emptyOr160 unit, + unit] + +rawUnit :: PandocMonad m => LP m Inlines -> LP m Inlines +rawUnit tok = grouped (mconcat <$> many1 (siUnit tok)) <|> siUnit tok <|> tok + +emptyOr160 :: Inlines -> Inlines +emptyOr160 x = if x == mempty then x else str "\160" + +siUnit :: PandocMonad m => LP m Inlines -> LP m Inlines +siUnit tok = do + Tok _ (CtrlSeq name) _ <- anyControlSeq + if name == "square" + then do + unit <- rawUnit tok + return $ unit <> "\178" + else + case M.lookup name siUnitMap of + Just il -> return il + Nothing -> mzero + +siUnitMap :: M.Map Text Inlines +siUnitMap = M.fromList + [ ("fg", str "fg") + , ("pg", str "pg") + , ("ng", str "ng") + , ("ug", str "μg") + , ("mg", str "mg") + , ("g", str "g") + , ("kg", str "kg") + , ("amu", str "u") + , ("pm", str "pm") + , ("nm", str "nm") + , ("um", str "μm") + , ("mm", str "mm") + , ("cm", str "cm") + , ("dm", str "dm") + , ("m", str "m") + , ("km", str "km") + , ("as", str "as") + , ("fs", str "fs") + , ("ps", str "ps") + , ("ns", str "ns") + , ("us", str "μs") + , ("ms", str "ms") + , ("s", str "s") + , ("fmol", str "fmol") + , ("pmol", str "pmol") + , ("nmol", str "nmol") + , ("umol", str "μmol") + , ("mmol", str "mmol") + , ("mol", str "mol") + , ("kmol", str "kmol") + , ("pA", str "pA") + , ("nA", str "nA") + , ("uA", str "μA") + , ("mA", str "mA") + , ("A", str "A") + , ("kA", str "kA") + , ("ul", str "μl") + , ("ml", str "ml") + , ("l", str "l") + , ("hl", str "hl") + , ("uL", str "μL") + , ("mL", str "mL") + , ("L", str "L") + , ("hL", str "hL") + , ("mHz", str "mHz") + , ("Hz", str "Hz") + , ("kHz", str "kHz") + , ("MHz", str "MHz") + , ("GHz", str "GHz") + , ("THz", str "THz") + , ("mN", str "mN") + , ("N", str "N") + , ("kN", str "kN") + , ("MN", str "MN") + , ("Pa", str "Pa") + , ("kPa", str "kPa") + , ("MPa", str "MPa") + , ("GPa", str "GPa") + , ("mohm", str "mΩ") + , ("kohm", str "kΩ") + , ("Mohm", str "MΩ") + , ("pV", str "pV") + , ("nV", str "nV") + , ("uV", str "μV") + , ("mV", str "mV") + , ("V", str "V") + , ("kV", str "kV") + , ("W", str "W") + , ("uW", str "μW") + , ("mW", str "mW") + , ("kW", str "kW") + , ("MW", str "MW") + , ("GW", str "GW") + , ("J", str "J") + , ("uJ", str "μJ") + , ("mJ", str "mJ") + , ("kJ", str "kJ") + , ("eV", str "eV") + , ("meV", str "meV") + , ("keV", str "keV") + , ("MeV", str "MeV") + , ("GeV", str "GeV") + , ("TeV", str "TeV") + , ("kWh", str "kWh") + , ("F", str "F") + , ("fF", str "fF") + , ("pF", str "pF") + , ("K", str "K") + , ("dB", str "dB") + , ("angstrom", str "Å") + , ("arcmin", str "′") + , ("arcminute", str "′") + , ("arcsecond", str "″") + , ("astronomicalunit", str "ua") + , ("atomicmassunit", str "u") + , ("atto", str "a") + , ("bar", str "bar") + , ("barn", str "b") + , ("becquerel", str "Bq") + , ("bel", str "B") + , ("candela", str "cd") + , ("celsius", str "°C") + , ("centi", str "c") + , ("coulomb", str "C") + , ("dalton", str "Da") + , ("day", str "d") + , ("deca", str "d") + , ("deci", str "d") + , ("decibel", str "db") + , ("degreeCelsius",str "°C") + , ("degree", str "°") + , ("deka", str "d") + , ("electronvolt", str "eV") + , ("exa", str "E") + , ("farad", str "F") + , ("femto", str "f") + , ("giga", str "G") + , ("gram", str "g") + , ("hectare", str "ha") + , ("hecto", str "h") + , ("henry", str "H") + , ("hertz", str "Hz") + , ("hour", str "h") + , ("joule", str "J") + , ("katal", str "kat") + , ("kelvin", str "K") + , ("kilo", str "k") + , ("kilogram", str "kg") + , ("knot", str "kn") + , ("liter", str "L") + , ("litre", str "l") + , ("lumen", str "lm") + , ("lux", str "lx") + , ("mega", str "M") + , ("meter", str "m") + , ("metre", str "m") + , ("micro", str "μ") + , ("milli", str "m") + , ("minute", str "min") + , ("mmHg", str "mmHg") + , ("mole", str "mol") + , ("nano", str "n") + , ("nauticalmile", str "M") + , ("neper", str "Np") + , ("newton", str "N") + , ("ohm", str "Ω") + , ("Pa", str "Pa") + , ("pascal", str "Pa") + , ("percent", str "%") + , ("per", str "/") + , ("peta", str "P") + , ("pico", str "p") + , ("radian", str "rad") + , ("second", str "s") + , ("siemens", str "S") + , ("sievert", str "Sv") + , ("steradian", str "sr") + , ("tera", str "T") + , ("tesla", str "T") + , ("tonne", str "t") + , ("volt", str "V") + , ("watt", str "W") + , ("weber", str "Wb") + , ("yocto", str "y") + , ("yotta", str "Y") + , ("zepto", str "z") + , ("zetta", str "Z") + ] + +