diff --git a/pandoc.cabal b/pandoc.cabal index 3001756a4..1924e568d 100644 --- a/pandoc.cabal +++ b/pandoc.cabal @@ -210,7 +210,9 @@ Extra-Source-Files: tests/epub/*.native tests/txt2tags.t2t tests/twiki-reader.twiki - + tests/odt/odt/*.odt + tests/odt/markdown/*.md + tests/odt/native/*.native Source-repository head type: git location: git://github.com/jgm/pandoc.git @@ -327,6 +329,7 @@ Library Text.Pandoc.Readers.TWiki, Text.Pandoc.Readers.Txt2Tags, Text.Pandoc.Readers.Docx, + Text.Pandoc.Readers.Odt, Text.Pandoc.Readers.EPUB, Text.Pandoc.Writers.Native, Text.Pandoc.Writers.Docbook, @@ -364,7 +367,18 @@ Library Text.Pandoc.Readers.Docx.Parse, Text.Pandoc.Readers.Docx.Fonts, Text.Pandoc.Readers.Docx.Util, - Text.Pandoc.Readers.Docx.StyleMap + Text.Pandoc.Readers.Docx.StyleMap, + Text.Pandoc.Readers.Odt.Base, + Text.Pandoc.Readers.Odt.Namespaces, + Text.Pandoc.Readers.Odt.StyleReader, + Text.Pandoc.Readers.Odt.ContentReader, + Text.Pandoc.Readers.Odt.Generic.Fallible, + Text.Pandoc.Readers.Odt.Generic.SetMap, + Text.Pandoc.Readers.Odt.Generic.Utils, + Text.Pandoc.Readers.Odt.Generic.Namespaces, + Text.Pandoc.Readers.Odt.Generic.XMLConverter, + Text.Pandoc.Readers.Odt.Arrows.State, + Text.Pandoc.Readers.Odt.Arrows.Utils, Text.Pandoc.Writers.Shared, Text.Pandoc.Asciify, Text.Pandoc.MIME, @@ -459,6 +473,7 @@ Test-Suite test-pandoc Tests.Readers.Org Tests.Readers.RST Tests.Readers.Docx + Tests.Readers.Odt Tests.Readers.Txt2Tags Tests.Readers.EPUB Tests.Writers.Native diff --git a/pandoc.hs b/pandoc.hs index 5d201ce36..fb9b9abbf 100644 --- a/pandoc.hs +++ b/pandoc.hs @@ -954,7 +954,7 @@ defaultReaderName fallback (x:xs) = ".docx" -> "docx" ".t2t" -> "t2t" ".epub" -> "epub" - ".odt" -> "odt" -- so we get an "unknown reader" error + ".odt" -> "odt" ".pdf" -> "pdf" -- so we get an "unknown reader" error ".doc" -> "doc" -- so we get an "unknown reader" error _ -> defaultReaderName fallback xs @@ -1180,8 +1180,6 @@ main = do Right r -> return r Left e -> err 7 e' where e' = case readerName' of - "odt" -> e ++ - "\nPandoc can convert to ODT, but not from ODT.\nTry using LibreOffice to export as HTML, and convert that with pandoc." "pdf" -> e ++ "\nPandoc can convert to PDF, but not from PDF." "doc" -> e ++ diff --git a/src/Text/Pandoc.hs b/src/Text/Pandoc.hs index 63da88c2e..a4d963221 100644 --- a/src/Text/Pandoc.hs +++ b/src/Text/Pandoc.hs @@ -67,6 +67,7 @@ module Text.Pandoc , Reader (..) , mkStringReader , readDocx + , readOdt , readMarkdown , readCommonMark , readMediaWiki @@ -85,7 +86,7 @@ module Text.Pandoc , readTxt2TagsNoMacros , readEPUB -- * Writers: converting /from/ Pandoc format - , Writer (..) + , Writer (..) , writeNative , writeJSON , writeMarkdown @@ -141,6 +142,7 @@ import Text.Pandoc.Readers.Native import Text.Pandoc.Readers.Haddock import Text.Pandoc.Readers.TWiki import Text.Pandoc.Readers.Docx +import Text.Pandoc.Readers.Odt import Text.Pandoc.Readers.Txt2Tags import Text.Pandoc.Readers.EPUB import Text.Pandoc.Writers.Native @@ -247,6 +249,7 @@ readers = [ ("native" , StringReader $ \_ s -> return $ readNative s) ,("haddock" , mkStringReader readHaddock) ,("twiki" , mkStringReader readTWiki) ,("docx" , mkBSReader readDocx) + ,("odt" , mkBSReader readOdt) ,("t2t" , mkStringReader readTxt2TagsNoMacros) ,("epub" , mkBSReader readEPUB) ] diff --git a/src/Text/Pandoc/Readers/Odt.hs b/src/Text/Pandoc/Readers/Odt.hs new file mode 100644 index 000000000..1c8ec51bc --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt.hs @@ -0,0 +1,86 @@ +{-# LANGUAGE PatternGuards #-} + +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Reader.Odt + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Entry point to the odt reader. +-} + +module Text.Pandoc.Readers.Odt ( readOdt ) where + +import Codec.Archive.Zip +import qualified Text.XML.Light as XML + +import qualified Data.ByteString.Lazy as B +import Data.Monoid ( mempty ) + +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Text.Pandoc.Options +import Text.Pandoc.MediaBag +import qualified Text.Pandoc.UTF8 as UTF8 + +import Text.Pandoc.Readers.Odt.ContentReader +import Text.Pandoc.Readers.Odt.StyleReader + +import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.Odt.Generic.Fallible + +-- +readOdt :: ReaderOptions + -> B.ByteString + -> Either PandocError (Pandoc, MediaBag) +readOdt _ bytes = case bytesToOdt bytes of + Right pandoc -> Right (pandoc , mempty) + Left err -> Left err + +-- +bytesToOdt :: B.ByteString -> Either PandocError Pandoc +bytesToOdt bytes = archiveToOdt $ toArchive bytes + +-- +archiveToOdt :: Archive -> Either PandocError Pandoc +archiveToOdt archive + | Just contentEntry <- findEntryByPath "content.xml" archive + , Just stylesEntry <- findEntryByPath "styles.xml" archive + , Just contentElem <- entryToXmlElem contentEntry + , Just stylesElem <- entryToXmlElem stylesEntry + , Right styles <- chooseMax (readStylesAt stylesElem ) + (readStylesAt contentElem) + , startState <- readerState styles + , Right pandoc <- runConverter' read_body + startState + contentElem + = Right pandoc + + | otherwise + -- Not very detailed, but I don't think more information would be helpful + = Left $ ParseFailure "Couldn't parse odt file." + +-- +entryToXmlElem :: Entry -> Maybe XML.Element +entryToXmlElem = XML.parseXMLDoc . UTF8.toStringLazy . fromEntry diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/State.hs b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs new file mode 100644 index 000000000..310ca028e --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Arrows/State.hs @@ -0,0 +1,253 @@ +{-# LANGUAGE Arrows #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE FlexibleInstances #-} +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Arrows.State + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +An arrow that transports a state. It is in essence a more powerful version of +the standard state monad. As it is such a simple extension, there are +other version out there that do exactly the same. +The implementation is duplicated, though, to add some useful features. +Most of these might be implemented without access to innards, but it's much +faster and easier to implement this way. +-} + +module Text.Pandoc.Readers.Odt.Arrows.State where + +import Prelude hiding ( foldr, foldl ) + +import qualified Control.Category as Cat +import Control.Arrow +import Control.Monad + +import Data.Monoid +import Data.Foldable + +import Text.Pandoc.Readers.Odt.Arrows.Utils +import Text.Pandoc.Readers.Odt.Generic.Fallible + + +newtype ArrowState state a b = ArrowState + { runArrowState :: (state, a) -> (state, b) } + +-- | Constructor +withState :: (state -> a -> (state, b)) -> ArrowState state a b +withState = ArrowState . uncurry + +-- | Constructor +withState' :: ((state, a) -> (state, b)) -> ArrowState state a b +withState' = ArrowState + +-- | Constructor +modifyState :: (state -> state ) -> ArrowState state a a +modifyState = ArrowState . first + +-- | Constructor +ignoringState :: ( a -> b ) -> ArrowState state a b +ignoringState = ArrowState . second + +-- | Constructor +fromState :: (state -> (state, b)) -> ArrowState state a b +fromState = ArrowState . (.fst) + +-- | Constructor +extractFromState :: (state -> b ) -> ArrowState state x b +extractFromState f = ArrowState $ \(state,_) -> (state, f state) + +-- | Constructor +withUnchangedState :: (state -> a -> b ) -> ArrowState state a b +withUnchangedState f = ArrowState $ \(state,a) -> (state, f state a) + +-- | Constructor +tryModifyState :: (state -> Either f state) + -> ArrowState state a (Either f a) +tryModifyState f = ArrowState $ \(state,a) + -> (state,).Left ||| (,Right a) $ f state + +instance Cat.Category (ArrowState s) where + id = ArrowState id + arrow2 . arrow1 = ArrowState $ (runArrowState arrow2).(runArrowState arrow1) + +instance Arrow (ArrowState state) where + arr = ignoringState + first a = ArrowState $ \(s,(aF,aS)) + -> second (,aS) $ runArrowState a (s,aF) + second a = ArrowState $ \(s,(aF,aS)) + -> second (aF,) $ runArrowState a (s,aS) + +instance ArrowChoice (ArrowState state) where + left a = ArrowState $ \(s,e) -> case e of + Left l -> second Left $ runArrowState a (s,l) + Right r -> (s, Right r) + right a = ArrowState $ \(s,e) -> case e of + Left l -> (s, Left l) + Right r -> second Right $ runArrowState a (s,r) + +instance ArrowLoop (ArrowState state) where + loop a = ArrowState $ \(s, x) + -> let (s', (x', _d)) = runArrowState a (s, (x, _d)) + in (s', x') + +instance ArrowApply (ArrowState state) where + app = ArrowState $ \(s, (f,b)) -> runArrowState f (s,b) + + +-- | Embedding of a state arrow in a state arrow with a different state type. +switchState :: (s -> s') -> (s' -> s) -> ArrowState s' x y -> ArrowState s x y +switchState there back a = ArrowState $ first there + >>> runArrowState a + >>> first back + +-- | Lift a state arrow to modify the state of an arrow +-- with a different state type. +liftToState :: (s -> s') -> ArrowState s' s s -> ArrowState s x x +liftToState unlift a = modifyState $ unlift &&& id + >>> runArrowState a + >>> snd + +-- | Switches the type of the state temporarily. +-- Drops the intermediate result state, behaving like the identity arrow, +-- save for side effects in the state. +withSubState :: ArrowState s x s2 -> ArrowState s2 s s -> ArrowState s x x +withSubState unlift a = keepingTheValue (withSubState unlift a) >>^ fst + +-- | Switches the type of the state temporarily. +-- Returns the resulting sub-state. +withSubState' :: ArrowState s x s' -> ArrowState s' s s -> ArrowState s x s' +withSubState' unlift a = ArrowState $ runArrowState unlift + >>> switch + >>> runArrowState a + >>> switch + where switch (x,y) = (y,x) + +-- | Switches the type of the state temporarily. +-- Drops the intermediate result state, behaving like a fallible +-- identity arrow, save for side effects in the state. +withSubStateF :: ArrowState s x (Either f s') + -> ArrowState s' s (Either f s ) + -> ArrowState s x (Either f x ) +withSubStateF unlift a = keepingTheValue (withSubStateF' unlift a) + >>^ spreadChoice + >>^ fmap fst + +-- | Switches the type of the state temporarily. +-- Returns the resulting sub-state. +withSubStateF' :: ArrowState s x (Either f s') + -> ArrowState s' s (Either f s ) + -> ArrowState s x (Either f s') +withSubStateF' unlift a = ArrowState go + where go p@(s,_) = tryRunning unlift + ( tryRunning a (second Right) ) + p + where tryRunning a' b v = case runArrowState a' v of + (_ , Left f) -> (s, Left f) + (x , Right y) -> b (y,x) + +-- | Fold a state arrow through something 'Foldable'. Collect the results +-- in a 'Monoid'. +-- Intermediate form of a fold between one with "only" a 'Monoid' +-- and one with any function. +foldS :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m +foldS a = ArrowState $ \(s,f) -> foldr a' (s,mempty) f + where a' x (s',m) = second (m <>) $ runArrowState a (s',x) + +-- | Fold a state arrow through something 'Foldable'. Collect the results +-- in a 'Monoid'. +-- Intermediate form of a fold between one with "only" a 'Monoid' +-- and one with any function. +foldSL :: (Foldable f, Monoid m) => ArrowState s x m -> ArrowState s (f x) m +foldSL a = ArrowState $ \(s,f) -> foldl a' (s,mempty) f + where a' (s',m) x = second (m <>) $ runArrowState a (s',x) + +-- | Fold a fallible state arrow through something 'Foldable'. Collect the +-- results in a 'Monoid'. +-- Intermediate form of a fold between one with "only" a 'Monoid' +-- and one with any function. +-- If the iteration fails, the state will be reset to the initial one. +foldS' :: (Foldable f, Monoid m) + => ArrowState s x (Either e m) + -> ArrowState s (f x) (Either e m) +foldS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mempty) f + where a' s x (s',Right m) = case runArrowState a (s',x) of + (s'',Right m') -> (s'', Right (m <> m')) + (_ ,Left e ) -> (s , Left e) + a' _ _ e = e + +-- | Fold a fallible state arrow through something 'Foldable'. Collect the +-- results in a 'Monoid'. +-- Intermediate form of a fold between one with "only" a 'Monoid' +-- and one with any function. +-- If the iteration fails, the state will be reset to the initial one. +foldSL' :: (Foldable f, Monoid m) + => ArrowState s x (Either e m) + -> ArrowState s (f x) (Either e m) +foldSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mempty) f + where a' s (s',Right m) x = case runArrowState a (s',x) of + (s'',Right m') -> (s'', Right (m <> m')) + (_ ,Left e ) -> (s , Left e) + a' _ e _ = e + +-- | Fold a state arrow through something 'Foldable'. Collect the results in a +-- 'MonadPlus'. +iterateS :: (Foldable f, MonadPlus m) + => ArrowState s x y + -> ArrowState s (f x) (m y) +iterateS a = ArrowState $ \(s,f) -> foldr a' (s,mzero) f + where a' x (s',m) = second ((mplus m).return) $ runArrowState a (s',x) + +-- | Fold a state arrow through something 'Foldable'. Collect the results in a +-- 'MonadPlus'. +iterateSL :: (Foldable f, MonadPlus m) + => ArrowState s x y + -> ArrowState s (f x) (m y) +iterateSL a = ArrowState $ \(s,f) -> foldl a' (s,mzero) f + where a' (s',m) x = second ((mplus m).return) $ runArrowState a (s',x) + + +-- | Fold a fallible state arrow through something 'Foldable'. +-- Collect the results in a 'MonadPlus'. +-- If the iteration fails, the state will be reset to the initial one. +iterateS' :: (Foldable f, MonadPlus m) + => ArrowState s x (Either e y ) + -> ArrowState s (f x) (Either e (m y)) +iterateS' a = ArrowState $ \(s,f) -> foldr (a' s) (s,Right mzero) f + where a' s x (s',Right m) = case runArrowState a (s',x) of + (s'',Right m') -> (s'',Right $ mplus m $ return m') + (_ ,Left e ) -> (s ,Left e ) + a' _ _ e = e + +-- | Fold a fallible state arrow through something 'Foldable'. +-- Collect the results in a 'MonadPlus'. +-- If the iteration fails, the state will be reset to the initial one. +iterateSL' :: (Foldable f, MonadPlus m) + => ArrowState s x (Either e y ) + -> ArrowState s (f x) (Either e (m y)) +iterateSL' a = ArrowState $ \(s,f) -> foldl (a' s) (s,Right mzero) f + where a' s (s',Right m) x = case runArrowState a (s',x) of + (s'',Right m') -> (s'',Right $ mplus m $ return m') + (_ ,Left e ) -> (s ,Left e ) + a' _ e _ = e diff --git a/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs new file mode 100644 index 000000000..9710973b3 --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Arrows/Utils.hs @@ -0,0 +1,497 @@ +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Arrows.Utils + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Utility functions for Arrows (Kleisli monads). + +Some general notes on notation: + +* "^" is meant to stand for a pure function that is lifted into an arrow +based on its usage for that purpose in "Control.Arrow". +* "?" is meant to stand for the usage of a 'FallibleArrow' or a pure function +with an equivalent return value. +* "_" stands for the dropping of a value. +-} + +-- We export everything +module Text.Pandoc.Readers.Odt.Arrows.Utils where + +import Control.Arrow +import Control.Monad ( join, MonadPlus(..) ) + +import Data.Monoid +import qualified Data.Foldable as F + +import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.Odt.Generic.Utils + + +and2 :: (Arrow a) => a b c -> a b c' -> a b (c,c') +and2 = (&&&) + +and3 :: (Arrow a) + => a b c0->a b c1->a b c2 + -> a b (c0,c1,c2 ) +and4 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3 + -> a b (c0,c1,c2,c3 ) +and5 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3->a b c4 + -> a b (c0,c1,c2,c3,c4 ) +and6 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5 + -> a b (c0,c1,c2,c3,c4,c5 ) +and7 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6 + -> a b (c0,c1,c2,c3,c4,c5,c6 ) +and8 :: (Arrow a) + => a b c0->a b c1->a b c2->a b c3->a b c4->a b c5->a b c6->a b c7 + -> a b (c0,c1,c2,c3,c4,c5,c6,c7) + +and3 a b c = (and2 a b ) &&& c + >>^ \((z,y ) , x) -> (z,y,x ) +and4 a b c d = (and3 a b c ) &&& d + >>^ \((z,y,x ) , w) -> (z,y,x,w ) +and5 a b c d e = (and4 a b c d ) &&& e + >>^ \((z,y,x,w ) , v) -> (z,y,x,w,v ) +and6 a b c d e f = (and5 a b c d e ) &&& f + >>^ \((z,y,x,w,v ) , u) -> (z,y,x,w,v,u ) +and7 a b c d e f g = (and6 a b c d e f ) &&& g + >>^ \((z,y,x,w,v,u ) , t) -> (z,y,x,w,v,u,t ) +and8 a b c d e f g h = (and7 a b c d e f g) &&& h + >>^ \((z,y,x,w,v,u,t) , s) -> (z,y,x,w,v,u,t,s) + +liftA2 :: (Arrow a) => (x -> y -> z) -> a b x -> a b y -> a b z +liftA2 f a b = a &&& b >>^ uncurry f + +liftA3 :: (Arrow a) => (z->y->x -> r) + -> a b z->a b y->a b x + -> a b r +liftA4 :: (Arrow a) => (z->y->x->w -> r) + -> a b z->a b y->a b x->a b w + -> a b r +liftA5 :: (Arrow a) => (z->y->x->w->v -> r) + -> a b z->a b y->a b x->a b w->a b v + -> a b r +liftA6 :: (Arrow a) => (z->y->x->w->v->u -> r) + -> a b z->a b y->a b x->a b w->a b v->a b u + -> a b r +liftA7 :: (Arrow a) => (z->y->x->w->v->u->t -> r) + -> a b z->a b y->a b x->a b w->a b v->a b u->a b t + -> a b r +liftA8 :: (Arrow a) => (z->y->x->w->v->u->t->s -> r) + -> a b z->a b y->a b x->a b w->a b v->a b u->a b t->a b s + -> a b r + +liftA3 fun a b c = and3 a b c >>^ uncurry3 fun +liftA4 fun a b c d = and4 a b c d >>^ uncurry4 fun +liftA5 fun a b c d e = and5 a b c d e >>^ uncurry5 fun +liftA6 fun a b c d e f = and6 a b c d e f >>^ uncurry6 fun +liftA7 fun a b c d e f g = and7 a b c d e f g >>^ uncurry7 fun +liftA8 fun a b c d e f g h = and8 a b c d e f g h >>^ uncurry8 fun + +liftA :: (Arrow a) => (y -> z) -> a b y -> a b z +liftA fun a = a >>^ fun + + +-- | Duplicate a value to subsequently feed it into different arrows. +-- Can almost always be replaced with '(&&&)', 'keepingTheValue', +-- or even '(|||)'. +-- Aequivalent to +-- > returnA &&& returnA +duplicate :: (Arrow a) => a b (b,b) +duplicate = arr $ join (,) + +-- | Lifts the combination of two values into an arrow. +joinOn :: (Arrow a) => (x -> y -> z) -> a (x,y) z +joinOn = arr.uncurry + +-- | Applies a function to the uncurried result-pair of an arrow-application. +-- (The §-symbol was chosen to evoke an association with pairs through the +-- shared first character) +(>>§) :: (Arrow a) => a x (b,c) -> (b -> c -> d) -> a x d +a >>§ f = a >>^ uncurry f + +-- | '(>>§)' with its arguments flipped +(§<<) :: (Arrow a) => (b -> c -> d) -> a x (b,c) -> a x d +(§<<) = flip (>>§) + +-- | Precomposition with an uncurried function +(§>>) :: (Arrow a) => (b -> c -> d) -> a d r -> a (b,c) r +f §>> a = uncurry f ^>> a + +-- | Precomposition with an uncurried function (right to left variant) +(<<§) :: (Arrow a) => a d r -> (b -> c -> d) -> a (b,c) r +(<<§) = flip (§>>) + +infixr 2 >>§, §<<, §>>, <<§ + + +-- | Duplicate a value and apply an arrow to the second instance. +-- Aequivalent to +-- > \a -> duplicate >>> second a +-- or +-- > \a -> returnA &&& a +keepingTheValue :: (Arrow a) => a b c -> a b (b,c) +keepingTheValue a = returnA &&& a + +-- | Duplicate a value and apply an arrow to the first instance. +-- Aequivalent to +-- > \a -> duplicate >>> first a +-- or +-- > \a -> a &&& returnA +keepingTheValue' :: (Arrow a) => a b c -> a b (c,b) +keepingTheValue' a = a &&& returnA + +-- | 'bind' from the "Maybe"-Monad lifted into an 'ArrowChoice'. +-- Actually, it's the more complex '(>=>)', because 'bind' alone does not +-- combine as nicely in arrow form. +-- The current implementation is not the most efficient one, because it can +-- not return directly if a 'Nothing' is encountered. That in turn follows +-- from the type system, as 'Nothing' has an "invisible" type parameter that +-- can not be dropped early. +-- +-- Also, there probably is a way to generalize this to other monads +-- or applicatives, but I'm leaving that as an exercise to the reader. +-- I have a feeling there is a new Arrow-typeclass to be found that is less +-- restrictive than 'ArrowApply'. If it is already out there, +-- I have not seen it yet. ('ArrowPlus' for example is not general enough.) +(>>>=) :: (ArrowChoice a) => a x (Maybe b) -> a b (Maybe c) -> a x (Maybe c) +a1 >>>= a2 = a1 >>> maybeToChoice >>> right a2 >>> choiceToMaybe >>^ join + +infixr 2 >>>= + +-- | 'mplus' Lifted into an arrow. No 'ArrowPlus' required. +-- (But still different from a true bind) +(>++<) :: (Arrow a, MonadPlus m) => a x (m b) -> a x (m b) -> a x (m b) +(>++<) = liftA2 mplus + +-- | Left-compose with a pure function +leftLift :: (ArrowChoice a) => (l -> l') -> a (Either l r) (Either l' r) +leftLift = left.arr + +-- | Right-compose with a pure function +rightLift :: (ArrowChoice a) => (r -> r') -> a (Either l r) (Either l r') +rightLift = right.arr + + +( ^+++ ) :: (ArrowChoice a) => (b -> c) -> a b' c' -> a (Either b b') (Either c c') +( +++^ ) :: (ArrowChoice a) => a b c -> (b' -> c') -> a (Either b b') (Either c c') +( ^+++^ ) :: (ArrowChoice a) => (b -> c) -> (b' -> c') -> a (Either b b') (Either c c') + +l ^+++ r = leftLift l >>> right r +l +++^ r = left l >>> rightLift r +l ^+++^ r = leftLift l >>> rightLift r + +infixr 2 ^+++, +++^, ^+++^ + +( ^||| ) :: (ArrowChoice a) => (b -> d) -> a c d -> a (Either b c) d +( |||^ ) :: (ArrowChoice a) => a b d -> (c -> d) -> a (Either b c) d +( ^|||^ ) :: (ArrowChoice a) => (b -> d) -> (c -> d) -> a (Either b c) d + +l ^||| r = arr l ||| r +l |||^ r = l ||| arr r +l ^|||^ r = arr l ||| arr r + +infixr 2 ^||| , |||^, ^|||^ + +( ^&&& ) :: (Arrow a) => (b -> c) -> a b c' -> a b (c,c') +( &&&^ ) :: (Arrow a) => a b c -> (b -> c') -> a b (c,c') +( ^&&&^ ) :: (Arrow a) => (b -> c) -> (b -> c') -> a b (c,c') + +l ^&&& r = arr l &&& r +l &&&^ r = l &&& arr r +l ^&&&^ r = arr l &&& arr r + +infixr 3 ^&&&, &&&^, ^&&&^ + +( ^*** ) :: (Arrow a) => (b -> c) -> a b' c' -> a (b,b') (c,c') +( ***^ ) :: (Arrow a) => a b c -> (b' -> c') -> a (b,b') (c,c') +( ^***^ ) :: (Arrow a) => (b -> c) -> (b' -> c') -> a (b,b') (c,c') + +l ^*** r = arr l *** r +l ***^ r = l *** arr r +l ^***^ r = arr l *** arr r + +infixr 3 ^***, ***^, ^***^ + +-- | A version of +-- +-- >>> \p -> arr (\x -> if p x the Right x else Left x) +-- +-- but with p being an arrow +choose :: (ArrowChoice a) => a b Bool -> a b (Either b b) +choose checkValue = keepingTheValue checkValue >>^ select + where select (x,True ) = Right x + select (x,False ) = Left x + +-- | Converts @Right a@ into @Just a@ and @Left _@ into @Nothing@. +choiceToMaybe :: (ArrowChoice a) => a (Either l r) (Maybe r) +choiceToMaybe = arr eitherToMaybe + +-- | Converts @Nothing@ into @Left ()@ and @Just a@ into @Right a@. +maybeToChoice :: (ArrowChoice a) => a (Maybe b) (Fallible b) +maybeToChoice = arr maybeToEither + +-- | Lifts a constant value into an arrow +returnV :: (Arrow a) => c -> a x c +returnV = arr.const + +-- | 'returnA' dropping everything +returnA_ :: (Arrow a) => a _b () +returnA_ = returnV () + +-- | Wrapper for an arrow that can be evaluated im parallel. All +-- Arrows can be evaluated in parallel, as long as they return a +-- monoid. +newtype ParallelArrow a b c = CoEval { evalParallelArrow :: a b c } + deriving (Eq, Ord, Show) + +instance (Arrow a, Monoid m) => Monoid (ParallelArrow a b m) where + mempty = CoEval $ returnV mempty + (CoEval a) `mappend` (CoEval ~b) = CoEval $ a &&& b >>§ mappend + +-- | Evaluates a collection of arrows in a parallel fashion. +-- +-- This is in essence a fold of '(&&&)' over the collection, +-- so the actual execution order and parallelity depends on the +-- implementation of '(&&&)' in the arrow in question. +-- The default implementation of '(&&&)' for example keeps the +-- order as given in the collection. +-- +-- This function can be seen as a generalization of +-- 'Control.Applicative.sequenceA' to arrows or as an alternative to +-- a fold with 'Control.Applicative.WrappedArrow', which +-- substitutes the monoid with function application. +-- +coEval :: (Arrow a, F.Foldable f, Monoid m) => f (a b m) -> a b m +coEval = evalParallelArrow . (F.foldMap CoEval) + +-- | Defines Left as failure, Right as success +type FallibleArrow a input failure success = a input (Either failure success) + +type ReFallibleArrow a failure success success' + = FallibleArrow a (Either failure success) failure success' + +-- | Wrapper for fallible arrows. Fallible arrows are all arrows that return +-- an Either value where left is a faliure and right is a success value. +newtype AlternativeArrow a input failure success + = TryArrow { evalAlternativeArrow :: FallibleArrow a input failure success } + + +instance (ArrowChoice a, Monoid failure) + => Monoid (AlternativeArrow a input failure success) where + mempty = TryArrow $ returnV $ Left mempty + (TryArrow a) `mappend` (TryArrow b) + = TryArrow $ a &&& b + >>^ \(a',~b') + -> ( (\a'' -> left (mappend a'') b') ||| Right ) + a' + +-- | Evaluates a collection of fallible arrows, trying each one in succession. +-- Left values are interpreted as failures, right values as successes. +-- +-- The evaluation is stopped once an arrow succeeds. +-- Up to that point, all failures are collected in the failure-monoid. +-- Note that '()' is a monoid, and thus can serve as a failure-collector if +-- you are uninterested in the exact failures. +-- +-- This is in essence a fold of '(&&&)' over the collection, enhanced with a +-- little bit of repackaging, so the actual execution order depends on the +-- implementation of '(&&&)' in the arrow in question. +-- The default implementation of '(&&&)' for example keeps the +-- order as given in the collection. +-- +tryArrows :: (ArrowChoice a, F.Foldable f, Monoid failure) + => f (FallibleArrow a b failure success) + -> FallibleArrow a b failure success +tryArrows = evalAlternativeArrow . (F.foldMap TryArrow) + +-- +liftSuccess :: (ArrowChoice a) + => (success -> success') + -> ReFallibleArrow a failure success success' +liftSuccess = rightLift + +-- +liftAsSuccess :: (ArrowChoice a) + => a x success + -> FallibleArrow a x failure success +liftAsSuccess a = a >>^ Right + +-- +asFallibleArrow :: (ArrowChoice a) + => a x success + -> FallibleArrow a x failure success +asFallibleArrow a = a >>^ Right + +-- | Raises an error into a 'ReFallibleArrow' if the arrow is already in +-- "error mode" +liftError :: (ArrowChoice a, Monoid failure) + => failure + -> ReFallibleArrow a failure success success +liftError e = leftLift (e <>) + +-- | Raises an error into a 'FallibleArrow', droping both the arrow input +-- and any previously stored error value. +_raiseA :: (ArrowChoice a) + => failure + -> FallibleArrow a x failure success +_raiseA e = returnV (Left e) + +-- | Raises an empty error into a 'FallibleArrow', droping both the arrow input +-- and any previously stored error value. +_raiseAEmpty :: (ArrowChoice a, Monoid failure) + => FallibleArrow a x failure success +_raiseAEmpty = _raiseA mempty + +-- | Raises an error into a 'ReFallibleArrow', possibly appending the new error +-- to an existing one +raiseA :: (ArrowChoice a, Monoid failure) + => failure + -> ReFallibleArrow a failure success success +raiseA e = arr $ Left.(either (<> e) (const e)) + +-- | Raises an empty error into a 'ReFallibleArrow'. If there already is an +-- error, nothing changes. +-- (Note that this function is only aequivalent to @raiseA mempty@ iff the +-- failure monoid follows the monoid laws.) +raiseAEmpty :: (ArrowChoice a, Monoid failure) + => ReFallibleArrow a failure success success +raiseAEmpty = arr (fromRight (const mempty) >>> Left) + + +-- | Execute the second arrow if the first succeeds +(>>?) :: (ArrowChoice a, Monoid failure) + => FallibleArrow a x failure success + -> FallibleArrow a success failure success' + -> FallibleArrow a x failure success' +a >>? b = a >>> Left ^||| b + +-- | Execute the lifted second arrow if the first succeeds +(>>?^) :: (ArrowChoice a, Monoid failure) + => FallibleArrow a x failure success + -> (success -> success') + -> FallibleArrow a x failure success' +a >>?^ f = a >>^ Left ^|||^ Right . f + +-- | Execute the lifted second arrow if the first succeeds +(>>?^?) :: (ArrowChoice a, Monoid failure) + => FallibleArrow a x failure success + -> (success -> Either failure success') + -> FallibleArrow a x failure success' +a >>?^? b = a >>> Left ^|||^ b + +-- | Execute the second arrow if the lifted first arrow succeeds +(^>>?) :: (ArrowChoice a, Monoid failure) + => (x -> Either failure success) + -> FallibleArrow a success failure success' + -> FallibleArrow a x failure success' +a ^>>? b = a ^>> Left ^||| b + +-- | Execute the lifted second arrow if the lifted first arrow succeeds +(^>>?^) :: (ArrowChoice a, Monoid failure) + => (x -> Either failure success) + -> (success -> success') + -> FallibleArrow a x failure success' +a ^>>?^ f = arr $ a >>> right f + +-- | Execute the lifted second arrow if the lifted first arrow succeeds +(^>>?^?) :: (ArrowChoice a, Monoid failure) + => (x -> Either failure success) + -> (success -> Either failure success') + -> FallibleArrow a x failure success' +a ^>>?^? f = a ^>> Left ^|||^ f + +-- | Execute the second, non-fallible arrow if the first arrow succeeds +(>>?!) :: (ArrowChoice a, Monoid failure) + => FallibleArrow a x failure success + -> a success success' + -> FallibleArrow a x failure success' +a >>?! f = a >>> right f + +--- +(>>?§) :: (ArrowChoice a, Monoid f) + => FallibleArrow a x f (b,b') + -> (b -> b' -> c) + -> FallibleArrow a x f c +a >>?§ f = a >>?^ (uncurry f) + +--- +(^>>?§) :: (ArrowChoice a, Monoid f) + => (x -> Either f (b,b')) + -> (b -> b' -> c) + -> FallibleArrow a x f c +a ^>>?§ f = arr a >>?^ (uncurry f) + +--- +(>>?§?) :: (ArrowChoice a, Monoid f) + => FallibleArrow a x f (b,b') + -> (b -> b' -> (Either f c)) + -> FallibleArrow a x f c +a >>?§? f = a >>?^? (uncurry f) + +infixr 1 >>?, >>?^, >>?^? +infixr 1 ^>>?, ^>>?^, ^>>?^?, >>?! +infixr 1 >>?§, ^>>?§, >>?§? + +-- | Keep values that are Right, replace Left values by a constant. +ifFailedUse :: (ArrowChoice a) => v -> a (Either f v) v +ifFailedUse v = arr $ either (const v) id + +-- | '(&&)' lifted into an arrow +(<&&>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool +(<&&>) = liftA2 (&&) + +-- | '(||)' lifted into an arrow +(<||>) :: (Arrow a) => a x Bool -> a x Bool -> a x Bool +(<||>) = liftA2 (||) + +-- | An equivalent of '(&&)' in a fallible arrow +(>&&<) :: (ArrowChoice a, Monoid f) => FallibleArrow a x f s + -> FallibleArrow a x f s' + -> FallibleArrow a x f (s,s') +(>&&<) = liftA2 chooseMin + +-- | An equivalent of '(||)' in some forms of fallible arrows +(>||<) :: (ArrowChoice a, Monoid f, Monoid s) => FallibleArrow a x f s + -> FallibleArrow a x f s + -> FallibleArrow a x f s +(>||<) = liftA2 chooseMax + +-- | An arrow version of a short-circuit (<|>) +ifFailedDo :: (ArrowChoice a) + => FallibleArrow a x f y + -> FallibleArrow a x f y + -> FallibleArrow a x f y +ifFailedDo a b = keepingTheValue a >>> repackage ^>> (b |||^ Right) + where repackage (x , Left _) = Left x + repackage (_ , Right y) = Right y + +infixr 4 <&&>, <||>, >&&<, >||< +infixr 1 `ifFailedDo` + + diff --git a/src/Text/Pandoc/Readers/Odt/Base.hs b/src/Text/Pandoc/Readers/Odt/Base.hs new file mode 100644 index 000000000..1f095bade --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Base.hs @@ -0,0 +1,43 @@ +{-# LANGUAGE PatternGuards #-} + +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Base + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Core types of the odt reader. +-} + +module Text.Pandoc.Readers.Odt.Base where + +import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.Odt.Namespaces + +type OdtConverterState s = XMLConverterState Namespace s + +type XMLReader s a b = FallibleXMLConverter Namespace s a b + +type XMLReaderSafe s a b = XMLConverter Namespace s a b + diff --git a/src/Text/Pandoc/Readers/Odt/ContentReader.hs b/src/Text/Pandoc/Readers/Odt/ContentReader.hs new file mode 100644 index 000000000..9bb585b8e --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/ContentReader.hs @@ -0,0 +1,790 @@ +{-# LANGUAGE Arrows #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE RecordWildCards #-} + +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.ContentReader + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +The core of the odt reader that converts odt features into Pandoc types. +-} + +module Text.Pandoc.Readers.Odt.ContentReader +( readerState +, read_body +) where + +import Control.Arrow +import Control.Applicative hiding ( liftA, liftA2, liftA3 ) + +import qualified Data.Map as M +import Data.List ( find ) +import Data.Monoid +import Data.Maybe + +import qualified Text.XML.Light as XML + +import Text.Pandoc.Definition +import Text.Pandoc.Builder +import Text.Pandoc.Shared + +import Text.Pandoc.Readers.Odt.Base +import Text.Pandoc.Readers.Odt.Namespaces +import Text.Pandoc.Readers.Odt.StyleReader + +import Text.Pandoc.Readers.Odt.Arrows.Utils +import Text.Pandoc.Readers.Odt.Generic.XMLConverter +import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.Odt.Generic.Utils + + +-------------------------------------------------------------------------------- +-- State +-------------------------------------------------------------------------------- + +type Anchor = String + +data ReaderState + = ReaderState { -- | A collection of styles read somewhere else. + -- It is only queried here, not modified. + styleSet :: Styles + -- | A stack of the styles of parent elements. + -- Used to look up inherited style properties. + , styleTrace :: [Style] + -- | Keeps track of the current depth in nested lists + , currentListLevel :: ListLevel + -- | Lists may provide their own style, but they don't have + -- to. If they do not, the style of a parent list may be used + -- or even a default list style from the paragraph style. + -- This value keeps track of the closest list style there + -- currently is. + , currentListStyle :: Maybe ListStyle + -- | A map from internal anchor names to "pretty" ones. + -- The mapping is a purely cosmetic one. + , bookmarkAnchors :: M.Map Anchor Anchor + +-- , sequences +-- , trackedChangeIDs + } + deriving ( Show ) + +readerState :: Styles -> ReaderState +readerState styles = ReaderState styles [] 0 Nothing M.empty + +-- +pushStyle' :: Style -> ReaderState -> ReaderState +pushStyle' style state = state { styleTrace = style : styleTrace state } + +-- +popStyle' :: ReaderState -> ReaderState +popStyle' state = case styleTrace state of + _:trace -> state { styleTrace = trace } + _ -> state + +-- +modifyListLevel :: (ListLevel -> ListLevel) -> (ReaderState -> ReaderState) +modifyListLevel f state = state { currentListLevel = f (currentListLevel state) } + +-- +shiftListLevel :: ListLevel -> (ReaderState -> ReaderState) +shiftListLevel diff = modifyListLevel (+ diff) + +-- +swapCurrentListStyle :: Maybe ListStyle -> ReaderState + -> (ReaderState, Maybe ListStyle) +swapCurrentListStyle mListStyle state = ( state { currentListStyle = mListStyle } + , currentListStyle state + ) + +-- +lookupPrettyAnchor :: Anchor -> ReaderState -> Maybe Anchor +lookupPrettyAnchor anchor ReaderState{..} = M.lookup anchor bookmarkAnchors + +-- +putPrettyAnchor :: Anchor -> Anchor -> ReaderState -> ReaderState +putPrettyAnchor ugly pretty state@ReaderState{..} + = state { bookmarkAnchors = M.insert ugly pretty bookmarkAnchors } + +-- +usedAnchors :: ReaderState -> [Anchor] +usedAnchors ReaderState{..} = M.elems bookmarkAnchors + +-------------------------------------------------------------------------------- +-- Reader type and associated tools +-------------------------------------------------------------------------------- + +type OdtReader a b = XMLReader ReaderState a b + +type OdtReaderSafe a b = XMLReaderSafe ReaderState a b + +-- | Extract something from the styles +fromStyles :: (a -> Styles -> b) -> OdtReaderSafe a b +fromStyles f = keepingTheValue + (getExtraState >>^ styleSet) + >>§ f + +-- +getStyleByName :: OdtReader StyleName Style +getStyleByName = fromStyles lookupStyle >>^ maybeToChoice + +-- +findStyleFamily :: OdtReader Style StyleFamily +findStyleFamily = fromStyles getStyleFamily >>^ maybeToChoice + +-- +lookupListStyle :: OdtReader StyleName ListStyle +lookupListStyle = fromStyles lookupListStyleByName >>^ maybeToChoice + +-- +switchCurrentListStyle :: OdtReaderSafe (Maybe ListStyle) (Maybe ListStyle) +switchCurrentListStyle = keepingTheValue getExtraState + >>§ swapCurrentListStyle + >>> first setExtraState + >>^ snd + +-- +pushStyle :: OdtReaderSafe Style Style +pushStyle = keepingTheValue ( + ( keepingTheValue getExtraState + >>§ pushStyle' + ) + >>> setExtraState + ) + >>^ fst + +-- +popStyle :: OdtReaderSafe x x +popStyle = keepingTheValue ( + getExtraState + >>> arr popStyle' + >>> setExtraState + ) + >>^ fst + +-- +getCurrentListLevel :: OdtReaderSafe _x ListLevel +getCurrentListLevel = getExtraState >>^ currentListLevel + + +type AnchorPrefix = String + +-- | An adaptation of 'uniqueIdent' from "Text.Pandoc.Shared" that generates a +-- unique identifier but without assuming that the id should be for a header. +-- Second argument is a list of already used identifiers. +uniqueIdentFrom :: AnchorPrefix -> [Anchor] -> Anchor +uniqueIdentFrom baseIdent usedIdents = + let numIdent n = baseIdent ++ "-" ++ show n + in if baseIdent `elem` usedIdents + then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of + Just x -> numIdent x + Nothing -> baseIdent -- if we have more than 60,000, allow repeats + else baseIdent + +-- | First argument: basis for a new "pretty" anchor if none exists yet +-- Second argument: a key ("ugly" anchor) +-- Returns: saved "pretty" anchor or created new one +getPrettyAnchor :: OdtReaderSafe (AnchorPrefix, Anchor) Anchor +getPrettyAnchor = proc (baseIdent, uglyAnchor) -> do + state <- getExtraState -< () + case lookupPrettyAnchor uglyAnchor state of + Just prettyAnchor -> returnA -< prettyAnchor + Nothing -> do + let newPretty = uniqueIdentFrom baseIdent (usedAnchors state) + modifyExtraState (putPrettyAnchor uglyAnchor newPretty) -<< newPretty + +-- | Input: basis for a new header anchor +-- Ouput: saved new anchor +getHeaderAnchor :: OdtReaderSafe Inlines Anchor +getHeaderAnchor = proc title -> do + state <- getExtraState -< () + let anchor = uniqueIdent (toList title) (usedAnchors state) + modifyExtraState (putPrettyAnchor anchor anchor) -<< anchor + + +-------------------------------------------------------------------------------- +-- Working with styles +-------------------------------------------------------------------------------- + +-- +readStyleByName :: OdtReader _x Style +readStyleByName = findAttr NsText "style-name" >>? getStyleByName + +-- +isStyleToTrace :: OdtReader Style Bool +isStyleToTrace = findStyleFamily >>?^ (==FaText) + +-- +withNewStyle :: OdtReaderSafe x Inlines -> OdtReaderSafe x Inlines +withNewStyle a = proc x -> do + fStyle <- readStyleByName -< () + case fStyle of + Right style -> do + mFamily <- arr styleFamily -< style + fTextProps <- arr ( maybeToChoice + . textProperties + . styleProperties + ) -< style + case fTextProps of + Right textProps -> do + state <- getExtraState -< () + let triple = (state, textProps, mFamily) + modifier <- arr modifierFromStyleDiff -< triple + fShouldTrace <- isStyleToTrace -< style + case fShouldTrace of + Right shouldTrace -> do + if shouldTrace + then do + pushStyle -< style + inlines <- a -< x + popStyle -< () + arr modifier -<< inlines + else + -- In case anything goes wrong + a -< x + Left _ -> a -< x + Left _ -> a -< x + Left _ -> a -< x + + +type PropertyTriple = (ReaderState, TextProperties, Maybe StyleFamily) +type InlineModifier = Inlines -> Inlines + +-- | Given data about the local style changes, calculates how to modify +-- an instance of 'Inlines' +modifierFromStyleDiff :: PropertyTriple -> InlineModifier +modifierFromStyleDiff propertyTriple = + composition $ + (getVPosModifier propertyTriple) + : map (first ($ propertyTriple) >>> ifThen_else ignore) + [ (hasEmphChanged , emph ) + , (hasChanged isStrong , strong ) + , (hasChanged strikethrough , strikeout ) + ] + where + ifThen_else else' (if',then') = if if' then then' else else' + + ignore = id :: InlineModifier + + getVPosModifier :: PropertyTriple -> InlineModifier + getVPosModifier triple@(_,textProps,_) = + let getVPos = Just . verticalPosition + in case lookupPreviousValueM getVPos triple of + Nothing -> ignore + Just oldVPos -> getVPosModifier' (oldVPos,verticalPosition textProps) + + getVPosModifier' (oldVPos , newVPos ) | oldVPos == newVPos = ignore + getVPosModifier' ( _ , VPosSub ) = subscript + getVPosModifier' ( _ , VPosSuper ) = superscript + getVPosModifier' ( _ , _ ) = ignore + + hasEmphChanged :: PropertyTriple -> Bool + hasEmphChanged = swing any [ hasChanged isEmphasised + , hasChangedM pitch + , hasChanged underline + ] + + hasChanged property triple@(_, property -> newProperty, _) = + maybe True (/=newProperty) (lookupPreviousValue property triple) + + hasChangedM property triple@(_, textProps,_) = + fromMaybe False $ (/=) <$> property textProps <*> lookupPreviousValueM property triple + + lookupPreviousValue f = lookupPreviousStyleValue ((fmap f).textProperties) + + lookupPreviousValueM f = lookupPreviousStyleValue ((f =<<).textProperties) + + lookupPreviousStyleValue f (ReaderState{..},_,mFamily) + = ( findBy f $ extendedStylePropertyChain styleTrace styleSet ) + <|> ( f =<< fmap (lookupDefaultStyle' styleSet) mFamily ) + + +type ParaModifier = Blocks -> Blocks + +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ :: Int +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ :: Int +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ = 5 +_MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ = 5 + +-- | Returns either 'id' or 'blockQuote' depending on the current indentation +getParaModifier :: Style -> ParaModifier +getParaModifier Style{..} | Just props <- paraProperties styleProperties + , isBlockQuote (indentation props) + (margin_left props) + = blockQuote + | otherwise + = id + where + isBlockQuote mIndent mMargin + | LengthValueMM indent <- mIndent + , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ + = True + | LengthValueMM margin <- mMargin + , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ + = True + | LengthValueMM indent <- mIndent + , LengthValueMM margin <- mMargin + = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_MM_ + + | PercentValue indent <- mIndent + , indent > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ + = True + | PercentValue margin <- mMargin + , margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ + = True + | PercentValue indent <- mIndent + , PercentValue margin <- mMargin + = indent + margin > _MINIMUM_INDENTATION_FOR_BLOCKQUOTES_IN_PERCENT_ + + | otherwise + = False + +-- +constructPara :: OdtReaderSafe Blocks Blocks -> OdtReaderSafe Blocks Blocks +constructPara reader = proc blocks -> do + fStyle <- readStyleByName -< blocks + case fStyle of + Left _ -> reader -< blocks + Right style -> do + let modifier = getParaModifier style + blocks' <- reader -< blocks + arr modifier -<< blocks' + + + +type ListConstructor = [Blocks] -> Blocks + +getListConstructor :: ListLevelStyle -> ListConstructor +getListConstructor ListLevelStyle{..} = + case listLevelType of + LltBullet -> bulletList + LltImage -> bulletList + LltNumbered -> let listNumberStyle = toListNumberStyle listItemFormat + listNumberDelim = toListNumberDelim listItemPrefix + listItemSuffix + in orderedListWith (1, listNumberStyle, listNumberDelim) + where + toListNumberStyle LinfNone = DefaultStyle + toListNumberStyle LinfNumber = Decimal + toListNumberStyle LinfRomanLC = LowerRoman + toListNumberStyle LinfRomanUC = UpperRoman + toListNumberStyle LinfAlphaLC = LowerAlpha + toListNumberStyle LinfAlphaUC = UpperAlpha + toListNumberStyle (LinfString _) = Example + + toListNumberDelim Nothing (Just ".") = Period + toListNumberDelim (Just "" ) (Just ".") = Period + toListNumberDelim Nothing (Just ")") = OneParen + toListNumberDelim (Just "" ) (Just ")") = OneParen + toListNumberDelim (Just "(") (Just ")") = TwoParens + toListNumberDelim _ _ = DefaultDelim + + +-- | Determines which style to use for a list, which level to use of that +-- style, and which type of list to create as a result of this information. +-- Then prepares the state for eventual child lists and constructs the list from +-- the results. +-- Two main cases are handled: The list may provide its own style or it may +-- rely on a parent list's style. I the former case the current style in the +-- state must be switched before and after the call to the child converter +-- while in the latter the child converter can be called directly. +-- If anything goes wrong, a default ordered-list-constructor is used. +constructList :: OdtReaderSafe x [Blocks] -> OdtReaderSafe x Blocks +constructList reader = proc x -> do + modifyExtraState (shiftListLevel 1) -< () + listLevel <- getCurrentListLevel -< () + fStyleName <- findAttr NsText "style-name" -< () + case fStyleName of + Right styleName -> do + fListStyle <- lookupListStyle -< styleName + case fListStyle of + Right listStyle -> do + fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle) + case fLLS of + Just listLevelStyle -> do + oldListStyle <- switchCurrentListStyle -< Just listStyle + blocks <- constructListWith listLevelStyle -<< x + switchCurrentListStyle -< oldListStyle + returnA -< blocks + Nothing -> constructOrderedList -< x + Left _ -> constructOrderedList -< x + Left _ -> do + state <- getExtraState -< () + mListStyle <- arr currentListStyle -< state + case mListStyle of + Just listStyle -> do + fLLS <- arr (uncurry getListLevelStyle) -< (listLevel,listStyle) + case fLLS of + Just listLevelStyle -> constructListWith listLevelStyle -<< x + Nothing -> constructOrderedList -< x + Nothing -> constructOrderedList -< x + where + constructOrderedList = + reader + >>> modifyExtraState (shiftListLevel (-1)) + >>^ orderedList + constructListWith listLevelStyle = + reader + >>> getListConstructor listLevelStyle + ^>> modifyExtraState (shiftListLevel (-1)) + +-------------------------------------------------------------------------------- +-- Readers +-------------------------------------------------------------------------------- + +type ElementMatcher result = (Namespace, ElementName, OdtReader result result) + +type InlineMatcher = ElementMatcher Inlines + +type BlockMatcher = ElementMatcher Blocks + + +-- +matchingElement :: (Monoid e) + => Namespace -> ElementName + -> OdtReaderSafe e e + -> ElementMatcher e +matchingElement ns name reader = (ns, name, asResultAccumulator reader) + where + asResultAccumulator :: (ArrowChoice a, Monoid m) => a m m -> a m (Fallible m) + asResultAccumulator a = liftAsSuccess $ keepingTheValue a >>§ (<>) + +-- +matchChildContent' :: (Monoid result) + => [ElementMatcher result] + -> OdtReaderSafe _x result +matchChildContent' ls = returnV mempty >>> matchContent' ls + +-- +matchChildContent :: (Monoid result) + => [ElementMatcher result] + -> OdtReaderSafe (result, XML.Content) result + -> OdtReaderSafe _x result +matchChildContent ls fallback = returnV mempty >>> matchContent ls fallback + + +-------------------------------------------- +-- Matchers +-------------------------------------------- + +---------------------- +-- Basics +---------------------- + +-- +-- | Open Document allows several consecutive spaces if they are marked up +read_plain_text :: OdtReaderSafe (Inlines, XML.Content) Inlines +read_plain_text = fst ^&&& read_plain_text' >>§ recover + where + -- fallible version + read_plain_text' :: OdtReader (Inlines, XML.Content) Inlines + read_plain_text' = ( second ( arr extractText ) + >>^ spreadChoice >>?! second text + ) + >>?§ (<>) + -- + extractText :: XML.Content -> Fallible String + extractText (XML.Text cData) = succeedWith (XML.cdData cData) + extractText _ = failEmpty + + +-- specifically. I honor that, although the current implementation of '(<>)' +-- for 'Inlines' in "Text.Pandoc.Builder" will collaps them agein. +-- The rational is to be prepared for future modifications. +read_spaces :: InlineMatcher +read_spaces = matchingElement NsText "s" ( + readAttrWithDefault NsText "c" 1 -- how many spaces? + >>^ fromList.(`replicate` Space) + ) +-- +read_line_break :: InlineMatcher +read_line_break = matchingElement NsText "line-break" + $ returnV linebreak + +-- +read_span :: InlineMatcher +read_span = matchingElement NsText "span" + $ withNewStyle + $ matchChildContent [ read_span + , read_spaces + , read_line_break + , read_link + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + ] read_plain_text + +-- +read_paragraph :: BlockMatcher +read_paragraph = matchingElement NsText "p" + $ constructPara + $ liftA para + $ withNewStyle + $ matchChildContent [ read_span + , read_spaces + , read_line_break + , read_link + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + ] read_plain_text + + +---------------------- +-- Headers +---------------------- + +-- +read_header :: BlockMatcher +read_header = matchingElement NsText "h" + $ proc blocks -> do + level <- ( readAttrWithDefault NsText "outline-level" 1 + ) -< blocks + children <- ( matchChildContent [ read_span + , read_spaces + , read_line_break + , read_link + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + ] read_plain_text + ) -< blocks + anchor <- getHeaderAnchor -< children + let idAttr = (anchor, [], []) -- no classes, no key-value pairs + arr (uncurry3 headerWith) -< (idAttr, level, children) + +---------------------- +-- Lists +---------------------- + +-- +read_list :: BlockMatcher +read_list = matchingElement NsText "list" +-- $ withIncreasedListLevel + $ constructList +-- $ liftA bulletList + $ matchChildContent' [ read_list_item + ] +-- +read_list_item :: ElementMatcher [Blocks] +read_list_item = matchingElement NsText "list-item" + $ liftA (compactify'.(:[])) + ( matchChildContent' [ read_paragraph + , read_header + , read_list + ] + ) + + +---------------------- +-- Links +---------------------- + +read_link :: InlineMatcher +read_link = matchingElement NsText "a" + $ liftA3 link + ( findAttrWithDefault NsXLink "href" "" ) + ( findAttrWithDefault NsOffice "title" "" ) + ( matchChildContent [ read_span + , read_note + , read_citation + , read_bookmark + , read_bookmark_start + , read_reference_start + , read_bookmark_ref + , read_reference_ref + ] read_plain_text ) + + +------------------------- +-- Footnotes +------------------------- + +read_note :: InlineMatcher +read_note = matchingElement NsText "note" + $ liftA note + $ matchChildContent' [ read_note_body ] + +read_note_body :: BlockMatcher +read_note_body = matchingElement NsText "note-body" + $ matchChildContent' [ read_paragraph ] + +------------------------- +-- Citations +------------------------- + +read_citation :: InlineMatcher +read_citation = matchingElement NsText "bibliography-mark" + $ liftA2 cite + ( liftA2 makeCitation + ( findAttrWithDefault NsText "identifier" "" ) + ( readAttrWithDefault NsText "number" 0 ) + ) + ( matchChildContent [] read_plain_text ) + where + makeCitation :: String -> Int -> [Citation] + makeCitation citeId num = [Citation citeId [] [] NormalCitation num 0] + + +---------------------- +-- Tables +---------------------- + +-- +read_table :: BlockMatcher +read_table = matchingElement NsTable "table" + $ liftA (simpleTable []) + $ matchChildContent' [ read_table_row + ] + +-- +read_table_row :: ElementMatcher [[Blocks]] +read_table_row = matchingElement NsTable "table-row" + $ liftA (:[]) + $ matchChildContent' [ read_table_cell + ] + +-- +read_table_cell :: ElementMatcher [Blocks] +read_table_cell = matchingElement NsTable "table-cell" + $ liftA (compactify'.(:[])) + $ matchChildContent' [ read_paragraph + ] + +---------------------- +-- Internal links +---------------------- + +_ANCHOR_PREFIX_ :: String +_ANCHOR_PREFIX_ = "anchor" + +-- +readAnchorAttr :: OdtReader _x Anchor +readAnchorAttr = findAttr NsText "name" + +-- | Beware: may fail +findAnchorName :: OdtReader AnchorPrefix Anchor +findAnchorName = ( keepingTheValue readAnchorAttr + >>^ spreadChoice + ) >>?! getPrettyAnchor + + +-- +maybeAddAnchorFrom :: OdtReader Inlines AnchorPrefix + -> OdtReaderSafe Inlines Inlines +maybeAddAnchorFrom anchorReader = + keepingTheValue (anchorReader >>? findAnchorName >>?^ toAnchorElem) + >>> + proc (inlines, fAnchorElem) -> do + case fAnchorElem of + Right anchorElem -> + arr (anchorElem <>) -<< inlines + Left _ -> returnA -< inlines + where + toAnchorElem :: Anchor -> Inlines + toAnchorElem anchorID = spanWith (anchorID, [], []) mempty + -- no classes, no key-value pairs + +-- +read_bookmark :: InlineMatcher +read_bookmark = matchingElement NsText "bookmark" + $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_) + +-- +read_bookmark_start :: InlineMatcher +read_bookmark_start = matchingElement NsText "bookmark-start" + $ maybeAddAnchorFrom (liftAsSuccess $ returnV _ANCHOR_PREFIX_) + +-- +read_reference_start :: InlineMatcher +read_reference_start = matchingElement NsText "reference-mark-start" + $ maybeAddAnchorFrom readAnchorAttr + +-- | Beware: may fail +findAnchorRef :: OdtReader _x Anchor +findAnchorRef = ( findAttr NsText "ref-name" + >>?^ (_ANCHOR_PREFIX_,) + ) >>?! getPrettyAnchor + + +-- +maybeInAnchorRef :: OdtReaderSafe Inlines Inlines +maybeInAnchorRef = proc inlines -> do + fRef <- findAnchorRef -< () + case fRef of + Right anchor -> + arr (toAnchorRef anchor) -<< inlines + Left _ -> returnA -< inlines + where + toAnchorRef :: Anchor -> Inlines -> Inlines + toAnchorRef anchor = link ('#':anchor) "" -- no title + +-- +read_bookmark_ref :: InlineMatcher +read_bookmark_ref = matchingElement NsText "bookmark-ref" + $ maybeInAnchorRef + <<< matchChildContent [] read_plain_text + +-- +read_reference_ref :: InlineMatcher +read_reference_ref = matchingElement NsText "reference-ref" + $ maybeInAnchorRef + <<< matchChildContent [] read_plain_text + + +---------------------- +-- Entry point +---------------------- + +--read_plain_content :: OdtReaderSafe _x Inlines +--read_plain_content = strContent >>^ text + +read_text :: OdtReaderSafe _x Pandoc +read_text = matchChildContent' [ read_header + , read_paragraph + , read_list + , read_table + ] + >>^ doc + +read_body :: OdtReader _x Pandoc +read_body = executeIn NsOffice "body" + $ executeIn NsOffice "text" + $ liftAsSuccess read_text + diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs new file mode 100644 index 000000000..5922164c9 --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Generic/Fallible.hs @@ -0,0 +1,260 @@ +{-# LANGUAGE GeneralizedNewtypeDeriving #-} + +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Generic.Fallible + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Data types and utilities representing failure. Most of it is based on the +"Either" type in its usual configuration (left represents failure). + +In most cases, the failure type is implied or required to be a "Monoid". + +The choice of "Either" instead of a custom type makes it easier to write +compatible instances of "ArrowChoice". +-} + +-- We export everything +module Text.Pandoc.Readers.Odt.Generic.Fallible where + +import Control.Applicative +import Control.Monad + +import qualified Data.Foldable as F +import Data.Monoid + +-- | Default for now. Will probably become a class at some point. +type Failure = () + +type Fallible a = Either Failure a + + +-- | False -> Left (), True -> Right () +boolToEither :: Bool -> Fallible () +boolToEither False = Left () +boolToEither True = Right () + +-- | False -> Left (), True -> Right () +boolToChoice :: Bool -> Fallible () +boolToChoice False = Left () +boolToChoice True = Right () + +-- +maybeToEither :: Maybe a -> Fallible a +maybeToEither (Just a) = Right a +maybeToEither Nothing = Left () + +-- +eitherToMaybe :: Either _l a -> Maybe a +eitherToMaybe (Left _) = Nothing +eitherToMaybe (Right a) = Just a + +-- | > untagEither === either id id +untagEither :: Either a a -> a +untagEither (Left a) = a +untagEither (Right a) = a + +-- | > fromLeft f === either f id +fromLeft :: (a -> b) -> Either a b -> b +fromLeft f (Left a) = f a +fromLeft _ (Right b) = b + +-- | > fromRight f === either id f +fromRight :: (a -> b) -> Either b a -> b +fromRight _ (Left b) = b +fromRight f (Right a) = f a + +-- | > recover a === fromLeft (const a) === either (const a) id +recover :: a -> Either _f a -> a +recover a (Left _) = a +recover _ (Right a) = a + +-- | I would love to use 'fail'. Alas, 'Monad.fail'... +failWith :: failure -> Either failure _x +failWith f = Left f + +-- +failEmpty :: (Monoid failure) => Either failure _x +failEmpty = failWith mempty + +-- +succeedWith :: a -> Either _x a +succeedWith = Right + +-- +collapseEither :: Either failure (Either failure x) + -> Either failure x +collapseEither (Left f ) = Left f +collapseEither (Right (Left f)) = Left f +collapseEither (Right (Right x)) = Right x + +-- | If either of the values represents an error, the result is a +-- (possibly combined) error. If both values represent a success, +-- both are returned. +chooseMin :: (Monoid a) => Either a b -> Either a b' -> Either a (b,b') +chooseMin = chooseMinWith (,) + +-- | If either of the values represents an error, the result is a +-- (possibly combined) error. If both values represent a success, +-- a combination is returned. +chooseMinWith :: (Monoid a) => (b -> b' -> c) + -> Either a b + -> Either a b' + -> Either a c +chooseMinWith (><) (Right a) (Right b) = Right $ a >< b +chooseMinWith _ (Left a) (Left b) = Left $ a <> b +chooseMinWith _ (Left a) _ = Left a +chooseMinWith _ _ (Left b) = Left b + +-- | If either of the values represents a non-error, the result is a +-- (possibly combined) non-error. If both values represent an error, an error +-- is returned. +chooseMax :: (Monoid a, Monoid b) => Either a b -> Either a b -> Either a b +chooseMax = chooseMaxWith (<>) + +-- | If either of the values represents a non-error, the result is a +-- (possibly combined) non-error. If both values represent an error, an error +-- is returned. +chooseMaxWith :: (Monoid a) => (b -> b -> b) + -> Either a b + -> Either a b + -> Either a b +chooseMaxWith (><) (Right a) (Right b) = Right $ a >< b +chooseMaxWith _ (Left a) (Left b) = Left $ a <> b +chooseMaxWith _ (Right a) _ = Right a +chooseMaxWith _ _ (Right b) = Right b + + +-- | Class of containers that can escalate contained 'Either's. +-- The word "Vector" is meant in the sense of a disease transmitter. +class ChoiceVector v where + spreadChoice :: v (Either f a) -> Either f (v a) + +-- Let's do a few examples first + +instance ChoiceVector Maybe where + spreadChoice (Just (Left f)) = Left f + spreadChoice (Just (Right x)) = Right (Just x) + spreadChoice Nothing = Right Nothing + +instance ChoiceVector (Either l) where + spreadChoice (Right (Left f)) = Left f + spreadChoice (Right (Right x)) = Right (Right x) + spreadChoice (Left x ) = Right (Left x) + +instance ChoiceVector ((,) a) where + spreadChoice (_, Left f) = Left f + spreadChoice (x, Right y) = Right (x,y) + -- Wasn't there a newtype somewhere with the elements flipped? + +-- +-- More instances later, first some discussion. +-- +-- I'll have to freshen up on type system details to see how (or if) to do +-- something like +-- +-- > instance (ChoiceVector a, ChoiceVector b) => ChoiceVector (a b) where +-- > : +-- +-- But maybe it would be even better to use something like +-- +-- > class ChoiceVector v v' f | v -> v' f where +-- > spreadChoice :: v -> Either f v' +-- +-- That way, more places in @v@ could spread the cheer, e.g.: +-- +-- As before: +-- -- ( a , Either f b) (a , b) f +-- > instance ChoiceVector ((,) a (Either f b)) ((,) a b) f where +-- > spreadChoice (_, Left f) = Left f +-- > spreadChoice (a, Right b) = Right (a,b) +-- +-- But also: +-- -- ( Either f a , b) (a , b) f +-- > instance ChoiceVector ((,) (Either f a) b) ((,) a b) f where +-- > spreadChoice (Right a,b) = Right (a,b) +-- > spreadChoice (Left f,_) = Left f +-- +-- And maybe even: +-- -- ( Either f a , Either f b) (a , b) f +-- > instance ChoiceVector ((,) (Either f a) (Either f b)) ((,) a b) f where +-- > spreadChoice (Right a , Right b) = Right (a,b) +-- > spreadChoice (Left f , _ ) = Left f +-- > spreadChoice ( _ , Left f) = Left f +-- +-- Of course that would lead to a lot of overlapping instances... +-- But I can't think of a different way. A selector function might help, +-- but not even a "Data.Traversable" is powerful enough for that. +-- But maybe someone has already solved all this with a lens library. +-- +-- Well, it's an interesting academic question. But for practical purposes, +-- I have more than enough right now. + +instance ChoiceVector ((,,) a b) where + spreadChoice (_,_, Left f) = Left f + spreadChoice (a,b, Right x) = Right (a,b,x) + +instance ChoiceVector ((,,,) a b c) where + spreadChoice (_,_,_, Left f) = Left f + spreadChoice (a,b,c, Right x) = Right (a,b,c,x) + +instance ChoiceVector ((,,,,) a b c d) where + spreadChoice (_,_,_,_, Left f) = Left f + spreadChoice (a,b,c,d, Right x) = Right (a,b,c,d,x) + +instance ChoiceVector (Const a) where + spreadChoice (Const c) = Right (Const c) -- need to repackage because of implicit types + +-- | Fails on the first error +instance ChoiceVector [] where + spreadChoice = sequence -- using the monad instance of Either. + -- Could be generalized to "Data.Traversable" - but why play + -- with UndecidableInstances unless this is really needed. + +-- | Wrapper for a list. While the normal list instance of 'ChoiceVector' +-- fails whenever it can, this type will never fail. +newtype SuccessList a = SuccessList { collectNonFailing :: [a] } + deriving ( Eq, Ord, Show ) + +instance ChoiceVector SuccessList where + spreadChoice = Right . SuccessList . (foldr unTagRight []) . collectNonFailing + where unTagRight (Right x) = (x:) + unTagRight _ = id + +-- | Like 'catMaybes', but for 'Either'. +collectRights :: [Either _l r] -> [r] +collectRights = collectNonFailing . untag . spreadChoice . SuccessList + where untag = fromLeft (error "Unexpected Left") + +-- | A version of 'collectRights' generalized to other containers. The +-- container must be both "reducible" and "buildable". Most general containers +-- should fullfill these requirements, but there is no single typeclass +-- (that I know of) for that. +-- Therefore, they are split between 'Foldable' and 'MonadPlus'. +-- (Note that 'Data.Traversable.Traversable' alone would not be enough, either.) +collectRightsF :: (F.Foldable c, MonadPlus c) => c (Either _l r) -> c r +collectRightsF = F.foldr unTagRight mzero + where unTagRight (Right x) = mplus $ return x + unTagRight _ = id diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs new file mode 100644 index 000000000..82ae3e20e --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Generic/Namespaces.hs @@ -0,0 +1,62 @@ +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Generic.Namespaces + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +A class containing a set of namespace identifiers. Used to convert between +typesafe Haskell namespace identifiers and unsafe "real world" namespaces. +-} + +module Text.Pandoc.Readers.Odt.Generic.Namespaces where + +import qualified Data.Map as M + +-- +type NameSpaceIRI = String + +-- +type NameSpaceIRIs nsID = M.Map nsID NameSpaceIRI + +-- +class (Eq nsID, Ord nsID) => NameSpaceID nsID where + + -- | Given a IRI, possibly update the map and return the id of the namespace. + -- May fail if the namespace is unknown and the application does not + -- allow unknown namespaces. + getNamespaceID :: NameSpaceIRI + -> NameSpaceIRIs nsID + -> Maybe (NameSpaceIRIs nsID, nsID) + -- | Given a namespace id, lookup its IRI. May be overriden for performance. + getIRI :: nsID + -> NameSpaceIRIs nsID + -> Maybe NameSpaceIRI + -- | The root element of an XML document has a namespace, too, and the + -- "XML.Light-parser" is eager to remove the corresponding namespace + -- attribute. + -- As a result, at least this root namespace must be provided. + getInitialIRImap :: NameSpaceIRIs nsID + + getIRI = M.lookup + getInitialIRImap = M.empty diff --git a/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs new file mode 100644 index 000000000..afd7d616c --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Generic/SetMap.hs @@ -0,0 +1,48 @@ +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Generic.SetMap + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +A map of values to sets of values. +-} + +module Text.Pandoc.Readers.Odt.Generic.SetMap where + +import qualified Data.Map as M +import qualified Data.Set as S + +type SetMap k v = M.Map k (S.Set v) + +empty :: SetMap k v +empty = M.empty + +fromList :: (Ord k, Ord v) => [(k,v)] -> SetMap k v +fromList = foldr (uncurry insert) empty + +insert :: (Ord k, Ord v) => k -> v -> SetMap k v -> SetMap k v +insert key value setMap = M.insertWith S.union key (S.singleton value) setMap + +union3 :: (Ord k) => SetMap k v -> SetMap k v -> SetMap k v -> SetMap k v +union3 sm1 sm2 sm3 = sm1 `M.union` sm2 `M.union` sm3 diff --git a/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs new file mode 100644 index 000000000..6c10ed61d --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Generic/Utils.hs @@ -0,0 +1,171 @@ +{-# LANGUAGE TypeOperators #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} + +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Reader.Odt.Generic.Utils + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +General utility functions for the odt reader. +-} + +module Text.Pandoc.Readers.Odt.Generic.Utils +( uncurry3 +, uncurry4 +, uncurry5 +, uncurry6 +, uncurry7 +, uncurry8 +, swap +, reverseComposition +, bool +, tryToRead +, Lookupable(..) +, readLookupables +, readLookupable +, readPercent +, findBy +, swing +, composition +) where + +import Control.Category ( Category, (>>>), (<<<) ) +import qualified Control.Category as Cat ( id ) +import Control.Monad ( msum ) + +import qualified Data.Foldable as F ( Foldable, foldr ) +import Data.Maybe + + +-- | Aequivalent to +-- > foldr (.) id +-- where '(.)' are 'id' are the ones from "Control.Category" +-- and 'foldr' is the one from "Data.Foldable". +-- The noun-form was chosen to be consistend with 'sum', 'product' etc +-- based on the discussion at +-- +-- (that I was not part of) +composition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a +composition = F.foldr (<<<) Cat.id + +-- | Aequivalent to +-- > foldr (flip (.)) id +-- where '(.)' are 'id' are the ones from "Control.Category" +-- and 'foldr' is the one from "Data.Foldable". +-- A reversed version of 'composition'. +reverseComposition :: (Category cat, F.Foldable f) => f (cat a a) -> cat a a +reverseComposition = F.foldr (>>>) Cat.id + +-- | 'Either' has 'either', 'Maybe' has 'maybe'. 'Bool' should have 'bool'. +-- Note that the first value is selected if the boolean value is 'False'. +-- That makes 'bool' consistent with the other two. Also, 'bool' now takes its +-- arguments in the exact opposite order compared to the normal if construct. +bool :: a -> a -> Bool -> a +bool x _ False = x +bool _ x True = x + +-- | This function often makes it possible to switch values with the functions +-- that are applied to them. +-- +-- Examples: +-- > swing map :: [a -> b] -> a -> [b] +-- > swing any :: [a -> Bool] -> a -> Bool +-- > swing foldr :: b -> a -> [a -> b -> b] -> b +-- > swing scanr :: c -> a -> [a -> c -> c] -> c +-- > swing zipWith :: [a -> b -> c] -> a -> [b] -> [c] +-- > swing find :: [a -> Bool] -> a -> Maybe (a -> Bool) +-- +-- Stolen from +swing :: (((a -> b) -> b) -> c -> d) -> c -> a -> d +swing = flip.(.flip id) +-- swing f c a = f ($ a) c + + +-- | Alternative to 'read'/'reads'. The former of these throws errors +-- (nobody wants that) while the latter returns "to much" for simple purposes. +-- This function instead applies 'reads' and returns the first match (if any) +-- in a 'Maybe'. +tryToRead :: (Read r) => String -> Maybe r +tryToRead = reads >>> listToMaybe >>> fmap fst + +-- | A version of 'reads' that requires a '%' sign after the number +readPercent :: ReadS Int +readPercent s = [ (i,s') | (i , r ) <- reads s + , ("%" , s') <- lex r + ] + +-- | Data that can be looked up. +-- This is mostly a utility to read data with kind *. +class Lookupable a where + lookupTable :: [(String, a)] + +-- | The idea is to use this function as if there was a declaration like +-- +-- > instance (Lookupable a) => (Read a) where +-- > readsPrec _ = readLookupables +-- . +-- But including this code in this form would need UndecideableInstances. +-- That is a bad idea. Luckily 'readLookupable' (without the s at the end) +-- can be used directly in almost any case. +readLookupables :: (Lookupable a) => String -> [(a,String)] +readLookupables s = [ (a,rest) | (word,rest) <- lex s, + let result = lookup word lookupTable, + isJust result, + let Just a = result + ] + +-- | Very similar to a simple 'lookup' in the 'lookupTable', but with a lexer. +readLookupable :: (Lookupable a) => String -> Maybe a +readLookupable s = msum + $ map ((`lookup` lookupTable).fst) + $ lex s + +uncurry3 :: (a->b->c -> z) -> (a,b,c ) -> z +uncurry4 :: (a->b->c->d -> z) -> (a,b,c,d ) -> z +uncurry5 :: (a->b->c->d->e -> z) -> (a,b,c,d,e ) -> z +uncurry6 :: (a->b->c->d->e->f -> z) -> (a,b,c,d,e,f ) -> z +uncurry7 :: (a->b->c->d->e->f->g -> z) -> (a,b,c,d,e,f,g ) -> z +uncurry8 :: (a->b->c->d->e->f->g->h -> z) -> (a,b,c,d,e,f,g,h) -> z + +uncurry3 fun (a,b,c ) = fun a b c +uncurry4 fun (a,b,c,d ) = fun a b c d +uncurry5 fun (a,b,c,d,e ) = fun a b c d e +uncurry6 fun (a,b,c,d,e,f ) = fun a b c d e f +uncurry7 fun (a,b,c,d,e,f,g ) = fun a b c d e f g +uncurry8 fun (a,b,c,d,e,f,g,h) = fun a b c d e f g h + +swap :: (a,b) -> (b,a) +swap (a,b) = (b,a) + +-- | A version of "Data.List.find" that uses a converter to a Maybe instance. +-- The returned value is the first which the converter returns in a 'Just' +-- wrapper. +findBy :: (a -> Maybe b) -> [a] -> Maybe b +findBy _ [] = Nothing +findBy f ((f -> Just x):_ ) = Just x +findBy f ( _:xs) = findBy f xs + diff --git a/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs new file mode 100644 index 000000000..ec7e0ea5e --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Generic/XMLConverter.hs @@ -0,0 +1,1064 @@ +{-# LANGUAGE Arrows #-} +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE GADTs #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE RecordWildCards #-} + +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.Generic.XMLConverter + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +A generalized XML parser based on stateful arrows. +It might be sufficient to define this reader as a comonad, but there is +not a lot of use in trying. +-} + +module Text.Pandoc.Readers.Odt.Generic.XMLConverter +( ElementName +, XMLConverterState +, XMLConverter +, FallibleXMLConverter +, swapPosition +, runConverter +, runConverter'' +, runConverter' +, runConverterF' +, runConverterF +, getCurrentElement +, getExtraState +, setExtraState +, modifyExtraState +, convertingExtraState +, producingExtraState +, lookupNSiri +, lookupNSprefix +, readNSattributes +, elemName +, elemNameIs +, strContent +, elContent +, currentElem +, currentElemIs +, expectElement +, elChildren +, findChildren +, filterChildren +, filterChildrenName +, findChild' +, findChild +, filterChild' +, filterChild +, filterChildName' +, filterChildName +, isSet +, isSet' +, isSetWithDefault +, hasAttrValueOf' +, failIfNotAttrValueOf +, isThatTheAttrValue +, searchAttrIn +, searchAttrWith +, searchAttr +, lookupAttr +, lookupAttr' +, lookupAttrWithDefault +, lookupDefaultingAttr +, findAttr' +, findAttr +, findAttrWithDefault +, readAttr +, readAttr' +, readAttrWithDefault +, getAttr +-- , (>/<) +-- , (?>/<) +, executeIn +, collectEvery +, withEveryL +, withEvery +, tryAll +, tryAll' +, IdXMLConverter +, MaybeEConverter +, ElementMatchConverter +, MaybeCConverter +, ContentMatchConverter +, makeMatcherE +, makeMatcherC +, prepareMatchersE +, prepareMatchersC +, matchChildren +, matchContent'' +, matchContent' +, matchContent +) where + +import Control.Applicative hiding ( liftA, liftA2 ) +import Control.Monad ( MonadPlus ) +import Control.Arrow + +import qualified Data.Map as M +import qualified Data.Foldable as F +import Data.Default +import Data.Monoid ( Monoid ) +import Data.Maybe + +import qualified Text.XML.Light as XML + +import Text.Pandoc.Readers.Odt.Arrows.State +import Text.Pandoc.Readers.Odt.Arrows.Utils + +import Text.Pandoc.Readers.Odt.Generic.Namespaces +import Text.Pandoc.Readers.Odt.Generic.Utils +import Text.Pandoc.Readers.Odt.Generic.Fallible + +-------------------------------------------------------------------------------- +-- Basis types for readability +-------------------------------------------------------------------------------- + +-- +type ElementName = String +type AttributeName = String +type AttributeValue = String + +-- +type NameSpacePrefix = String + +-- +type NameSpacePrefixes nsID = M.Map nsID NameSpacePrefix + +-------------------------------------------------------------------------------- +-- Main converter state +-------------------------------------------------------------------------------- + +-- GADT so some of the NameSpaceID restrictions can be deduced +data XMLConverterState nsID extraState where + XMLConverterState :: NameSpaceID nsID + => { -- | A stack of parent elements. The top element is the current one. + -- Arguably, a real Zipper would be better. But that is an + -- optimization that can be made at a later time, e.g. when + -- replacing Text.XML.Light. + parentElements :: [XML.Element] + -- | A map from internal namespace IDs to the namespace prefixes + -- used in XML elements + , namespacePrefixes :: NameSpacePrefixes nsID + -- | A map from internal namespace IDs to namespace IRIs + -- (Only necessary for matching namespace IDs and prefixes) + , namespaceIRIs :: NameSpaceIRIs nsID + -- | A place to put "something else". This feature is used heavily + -- to keep the main code cleaner. More specifically, the main reader + -- is divided into different stages. Each stage lifts something up + -- here, which the next stage can then use. This could of course be + -- generalized to a state-tree or used for the namespace IRIs. The + -- border between states and values is an imaginary one, after all. + -- But the separation as it is seems to be enough for now. + , moreState :: extraState + } + -> XMLConverterState nsID extraState + +-- +createStartState :: (NameSpaceID nsID) + => XML.Element + -> extraState + -> XMLConverterState nsID extraState +createStartState element extraState = + XMLConverterState + { parentElements = [element] + , namespacePrefixes = M.empty + , namespaceIRIs = getInitialIRImap + , moreState = extraState + } + +-- | Functor over extra state +instance Functor (XMLConverterState nsID) where + fmap f ( XMLConverterState parents prefixes iRIs extraState ) + = XMLConverterState parents prefixes iRIs (f extraState) + +-- +replaceExtraState :: extraState + -> XMLConverterState nsID _x + -> XMLConverterState nsID extraState +replaceExtraState x s + = fmap (const x) s + +-- +currentElement :: XMLConverterState nsID extraState + -> XML.Element +currentElement state = head (parentElements state) + +-- | Replace the current position by another, modifying the extra state +-- in the process +swapPosition :: (extraState -> extraState') + -> [XML.Element] + -> XMLConverterState nsID extraState + -> XMLConverterState nsID extraState' +swapPosition f stack state + = state { parentElements = stack + , moreState = f (moreState state) + } + +-- | Replace the current position by another, modifying the extra state +-- in the process +swapStack' :: XMLConverterState nsID extraState + -> [XML.Element] + -> ( XMLConverterState nsID extraState , [XML.Element] ) +swapStack' state stack + = ( state { parentElements = stack } + , parentElements state + ) + +-- +pushElement :: XML.Element + -> XMLConverterState nsID extraState + -> XMLConverterState nsID extraState +pushElement e state = state { parentElements = e:(parentElements state) } + +-- | Pop the top element from the call stack, unless it is the last one. +popElement :: XMLConverterState nsID extraState + -> Maybe (XMLConverterState nsID extraState) +popElement state + | _:es@(_:_) <- parentElements state = Just $ state { parentElements = es } + | otherwise = Nothing + +-------------------------------------------------------------------------------- +-- Main type +-------------------------------------------------------------------------------- + +-- It might be a good idea to pack the converters in a GADT +-- Downside: data instead of type +-- Upside: 'Failure' could be made a parameter as well. + +-- +type XMLConverter nsID extraState input output + = ArrowState (XMLConverterState nsID extraState ) input output + +type FallibleXMLConverter nsID extraState input output + = XMLConverter nsID extraState input (Fallible output) + +-- +runConverter :: XMLConverter nsID extraState input output + -> XMLConverterState nsID extraState + -> input + -> output +runConverter converter state input = snd $ runArrowState converter (state,input) + +-- +runConverter'' :: (NameSpaceID nsID) + => XMLConverter nsID extraState (Fallible ()) output + -> extraState + -> XML.Element + -> output +runConverter'' converter extraState element = runConverter (readNSattributes >>> converter) (createStartState element extraState) () + +runConverter' :: (NameSpaceID nsID) + => FallibleXMLConverter nsID extraState () success + -> extraState + -> XML.Element + -> Fallible success +runConverter' converter extraState element = runConverter (readNSattributes >>? converter) (createStartState element extraState) () + +-- +runConverterF' :: FallibleXMLConverter nsID extraState x y + -> XMLConverterState nsID extraState + -> Fallible x -> Fallible y +runConverterF' a s e = runConverter (returnV e >>? a) s e + +-- +runConverterF :: (NameSpaceID nsID) + => FallibleXMLConverter nsID extraState XML.Element x + -> extraState + -> Fallible XML.Element -> Fallible x +runConverterF a s = either failWith + (\e -> runConverter a (createStartState e s) e) + +-- +getCurrentElement :: XMLConverter nsID extraState x XML.Element +getCurrentElement = extractFromState currentElement + +-- +getExtraState :: XMLConverter nsID extraState x extraState +getExtraState = extractFromState moreState + +-- +setExtraState :: XMLConverter nsID extraState extraState extraState +setExtraState = withState $ \state extra + -> (replaceExtraState extra state , extra) + + +-- | Lifts a function to the extra state. +modifyExtraState :: (extraState -> extraState) + -> XMLConverter nsID extraState x x +modifyExtraState = modifyState.fmap + + +-- | First sets the extra state to the new value. Then modifies the original +-- extra state with a converter that uses the new state. Finally, the +-- intermediate state is dropped and the extra state is lifted into the +-- state as it was at the beginning of the function. +-- As a result, exactly the extra state and nothing else is changed. +-- The resulting converter even behaves like an identity converter on the +-- value level. +-- +-- (The -ing form is meant to be mnemonic in a sequence of arrows as in +-- convertingExtraState () converter >>> doOtherStuff) +-- +convertingExtraState :: extraState' + -> FallibleXMLConverter nsID extraState' extraState extraState + -> FallibleXMLConverter nsID extraState x x +convertingExtraState v a = withSubStateF setVAsExtraState modifyWithA + where + setVAsExtraState = liftAsSuccess $ extractFromState id >>^ replaceExtraState v + modifyWithA = keepingTheValue (moreState ^>> a) + >>^ spreadChoice >>?§ flip replaceExtraState + +-- | First sets the extra state to the new value. Then produces a new +-- extra state with a converter that uses the new state. Finally, the +-- intermediate state is dropped and the extra state is lifted into the +-- state as it was at the beginning of the function. +-- As a result, exactly the extra state and nothing else is changed. +-- The resulting converter even behaves like an identity converter on the +-- value level. +-- +-- Aequivalent to +-- +-- > \v x a -> convertingExtraState v (returnV x >>> a) +-- +-- (The -ing form is meant to be mnemonic in a sequence of arrows as in +-- producingExtraState () () producer >>> doOtherStuff) +-- +producingExtraState :: extraState' + -> a + -> FallibleXMLConverter nsID extraState' a extraState + -> FallibleXMLConverter nsID extraState x x +producingExtraState v x a = convertingExtraState v (returnV x >>> a) + + +-------------------------------------------------------------------------------- +-- Work in namespaces +-------------------------------------------------------------------------------- + +-- | Arrow version of 'getIRI' +lookupNSiri :: (NameSpaceID nsID) + => nsID + -> XMLConverter nsID extraState x (Maybe NameSpaceIRI) +lookupNSiri nsID = extractFromState + $ \state -> getIRI nsID $ namespaceIRIs state + +-- +lookupNSprefix :: (NameSpaceID nsID) + => nsID + -> XMLConverter nsID extraState x (Maybe NameSpacePrefix) +lookupNSprefix nsID = extractFromState + $ \state -> M.lookup nsID $ namespacePrefixes state + +-- | Extracts namespace attributes from the current element and tries to +-- update the current mapping accordingly +readNSattributes :: (NameSpaceID nsID) + => FallibleXMLConverter nsID extraState x () +readNSattributes = fromState $ \state -> maybe (state, failEmpty ) + ( , succeedWith ()) + (extractNSAttrs state ) + where + extractNSAttrs :: (NameSpaceID nsID) + => XMLConverterState nsID extraState + -> Maybe (XMLConverterState nsID extraState) + extractNSAttrs startState + = foldl (\state d -> state >>= addNS d) + (Just startState) + nsAttribs + where nsAttribs = mapMaybe readNSattr (XML.elAttribs element) + element = currentElement startState + readNSattr (XML.Attr (XML.QName name _ (Just "xmlns")) iri) + = Just (name, iri) + readNSattr _ = Nothing + addNS (prefix, iri) state = fmap updateState + $ getNamespaceID iri + $ namespaceIRIs state + where updateState (iris,nsID) + = state { namespaceIRIs = iris + , namespacePrefixes = M.insert nsID prefix + $ namespacePrefixes state + } + +-------------------------------------------------------------------------------- +-- Common namespace accessors +-------------------------------------------------------------------------------- + +-- | Given a namespace id and an element name, creates a 'XML.QName' for +-- internal use +elemName :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState x XML.QName +elemName nsID name = lookupNSiri nsID + &&& lookupNSprefix nsID + >>§ XML.QName name + +-- | Checks if a given element matches both a specified namespace id +-- and a specified element name +elemNameIs :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState XML.Element Bool +elemNameIs nsID name = keepingTheValue (lookupNSiri nsID) >>§ hasThatName + where hasThatName e iri = let elName = XML.elName e + in XML.qName elName == name + && XML.qURI elName == iri + +-------------------------------------------------------------------------------- +-- General content +-------------------------------------------------------------------------------- + +-- +strContent :: XMLConverter nsID extraState x String +strContent = getCurrentElement + >>^ XML.strContent + +-- +elContent :: XMLConverter nsID extraState x [XML.Content] +elContent = getCurrentElement + >>^ XML.elContent + +-------------------------------------------------------------------------------- +-- Current element +-------------------------------------------------------------------------------- + +-- +currentElem :: XMLConverter nsID extraState x (XML.QName) +currentElem = getCurrentElement + >>^ XML.elName + +currentElemIs :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState x Bool +currentElemIs nsID name = getCurrentElement + >>> elemNameIs nsID name + + + +{- +currentElemIs'' nsID name = ( (getCurrentElement >>^ XML.elName >>> + (XML.qName >>^ (&&).(== name) ) + ^&&&^ + (XML.qIRI >>^ (==) ) + ) >>§ (.) + ) &&& lookupNSiri nsID >>§ ($) +-} + +-- +expectElement :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState x () +expectElement nsID name = currentElemIs nsID name + >>^ boolToChoice + +-------------------------------------------------------------------------------- +-- Chilren +-------------------------------------------------------------------------------- + +-- +elChildren :: XMLConverter nsID extraState x [XML.Element] +elChildren = getCurrentElement + >>^ XML.elChildren + +-- +findChildren :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState x [XML.Element] +findChildren nsID name = elemName nsID name + &&& getCurrentElement + >>§ XML.findChildren + +-- +filterChildren :: (XML.Element -> Bool) + -> XMLConverter nsID extraState x [XML.Element] +filterChildren p = getCurrentElement + >>^ XML.filterChildren p + +-- +filterChildrenName :: (XML.QName -> Bool) + -> XMLConverter nsID extraState x [XML.Element] +filterChildrenName p = getCurrentElement + >>^ XML.filterChildrenName p + +-- +findChild' :: (NameSpaceID nsID) + => nsID + -> ElementName + -> XMLConverter nsID extraState x (Maybe XML.Element) +findChild' nsID name = elemName nsID name + &&& getCurrentElement + >>§ XML.findChild + +-- +findChild :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState x XML.Element +findChild nsID name = findChild' nsID name + >>> maybeToChoice + +-- +filterChild' :: (XML.Element -> Bool) + -> XMLConverter nsID extraState x (Maybe XML.Element) +filterChild' p = getCurrentElement + >>^ XML.filterChild p + +-- +filterChild :: (XML.Element -> Bool) + -> FallibleXMLConverter nsID extraState x XML.Element +filterChild p = filterChild' p + >>> maybeToChoice + +-- +filterChildName' :: (XML.QName -> Bool) + -> XMLConverter nsID extraState x (Maybe XML.Element) +filterChildName' p = getCurrentElement + >>^ XML.filterChildName p + +-- +filterChildName :: (XML.QName -> Bool) + -> FallibleXMLConverter nsID extraState x XML.Element +filterChildName p = filterChildName' p + >>> maybeToChoice + + +-------------------------------------------------------------------------------- +-- Attributes +-------------------------------------------------------------------------------- + +-- +isSet :: (NameSpaceID nsID) + => nsID -> AttributeName + -> (Either Failure Bool) + -> FallibleXMLConverter nsID extraState x Bool +isSet nsID attrName deflt + = findAttr' nsID attrName + >>^ maybe deflt stringToBool + +-- +isSet' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe Bool) +isSet' nsID attrName = findAttr' nsID attrName + >>^ (>>= stringToBool') + +isSetWithDefault :: (NameSpaceID nsID) + => nsID -> AttributeName + -> Bool + -> XMLConverter nsID extraState x Bool +isSetWithDefault nsID attrName def' + = isSet' nsID attrName + >>^ fromMaybe def' + +-- +hasAttrValueOf' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> AttributeValue + -> XMLConverter nsID extraState x Bool +hasAttrValueOf' nsID attrName attrValue + = findAttr nsID attrName + >>> ( const False ^|||^ (==attrValue)) + +-- +failIfNotAttrValueOf :: (NameSpaceID nsID) + => nsID -> AttributeName + -> AttributeValue + -> FallibleXMLConverter nsID extraState x () +failIfNotAttrValueOf nsID attrName attrValue + = hasAttrValueOf' nsID attrName attrValue + >>^ boolToChoice + +-- | Is the value that is currently transported in the arrow the value of +-- the specified attribute? +isThatTheAttrValue :: (NameSpaceID nsID) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState AttributeValue Bool +isThatTheAttrValue nsID attrName + = keepingTheValue + (findAttr nsID attrName) + >>§ right.(==) + +-- | Lookup value in a dictionary, fail if no attribute found or value +-- not in dictionary +searchAttrIn :: (NameSpaceID nsID) + => nsID -> AttributeName + -> [(AttributeValue,a)] + -> FallibleXMLConverter nsID extraState x a +searchAttrIn nsID attrName dict + = findAttr nsID attrName + >>?^? maybeToChoice.(`lookup` dict ) + + +-- | Lookup value in a dictionary. Fail if no attribute found. If value not in +-- dictionary, return default value +searchAttrWith :: (NameSpaceID nsID) + => nsID -> AttributeName + -> a + -> [(AttributeValue,a)] + -> FallibleXMLConverter nsID extraState x a +searchAttrWith nsID attrName defV dict + = findAttr nsID attrName + >>?^ (fromMaybe defV).(`lookup` dict ) + +-- | Lookup value in a dictionary. If attribute or value not found, +-- return default value +searchAttr :: (NameSpaceID nsID) + => nsID -> AttributeName + -> a + -> [(AttributeValue,a)] + -> XMLConverter nsID extraState x a +searchAttr nsID attrName defV dict + = searchAttrIn nsID attrName dict + >>> const defV ^|||^ id + +-- | Read a 'Lookupable' attribute. Fail if no match. +lookupAttr :: (NameSpaceID nsID, Lookupable a) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x a +lookupAttr nsID attrName = lookupAttr' nsID attrName + >>^ maybeToChoice + + +-- | Read a 'Lookupable' attribute. Return the result as a 'Maybe'. +lookupAttr' :: (NameSpaceID nsID, Lookupable a) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe a) +lookupAttr' nsID attrName + = findAttr' nsID attrName + >>^ (>>= readLookupable) + +-- | Read a 'Lookupable' attribute with explicit default +lookupAttrWithDefault :: (NameSpaceID nsID, Lookupable a) + => nsID -> AttributeName + -> a + -> XMLConverter nsID extraState x a +lookupAttrWithDefault nsID attrName deflt + = lookupAttr' nsID attrName + >>^ fromMaybe deflt + +-- | Read a 'Lookupable' attribute with implicit default +lookupDefaultingAttr :: (NameSpaceID nsID, Lookupable a, Default a) + => nsID -> AttributeName + -> XMLConverter nsID extraState x a +lookupDefaultingAttr nsID attrName + = lookupAttrWithDefault nsID attrName def + +-- | Return value as a (Maybe String) +findAttr' :: (NameSpaceID nsID) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe AttributeValue) +findAttr' nsID attrName = elemName nsID attrName + &&& getCurrentElement + >>§ XML.findAttr + +-- | Return value as string or fail +findAttr :: (NameSpaceID nsID) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x AttributeValue +findAttr nsID attrName = findAttr' nsID attrName + >>> maybeToChoice + +-- | Return value as string or return provided default value +findAttrWithDefault :: (NameSpaceID nsID) + => nsID -> AttributeName + -> AttributeValue + -> XMLConverter nsID extraState x AttributeValue +findAttrWithDefault nsID attrName deflt + = findAttr' nsID attrName + >>^ fromMaybe deflt + +-- | Read and return value or fail +readAttr :: (NameSpaceID nsID, Read attrValue) + => nsID -> AttributeName + -> FallibleXMLConverter nsID extraState x attrValue +readAttr nsID attrName = readAttr' nsID attrName + >>> maybeToChoice + +-- | Read and return value or return Nothing +readAttr' :: (NameSpaceID nsID, Read attrValue) + => nsID -> AttributeName + -> XMLConverter nsID extraState x (Maybe attrValue) +readAttr' nsID attrName = findAttr' nsID attrName + >>^ (>>= tryToRead) + +-- | Read and return value or return provided default value +readAttrWithDefault :: (NameSpaceID nsID, Read attrValue) + => nsID -> AttributeName + -> attrValue + -> XMLConverter nsID extraState x attrValue +readAttrWithDefault nsID attrName deflt + = findAttr' nsID attrName + >>^ (>>= tryToRead) + >>^ fromMaybe deflt + +-- | Read and return value or return default value from 'Default' instance +getAttr :: (NameSpaceID nsID, Read attrValue, Default attrValue) + => nsID -> AttributeName + -> XMLConverter nsID extraState x attrValue +getAttr nsID attrName = readAttrWithDefault nsID attrName def + +-------------------------------------------------------------------------------- +-- Movements +-------------------------------------------------------------------------------- + +-- +jumpThere :: XMLConverter nsID extraState XML.Element XML.Element +jumpThere = withState (\state element + -> ( pushElement element state , element ) + ) + +-- +swapStack :: XMLConverter nsID extraState [XML.Element] [XML.Element] +swapStack = withState swapStack' + +-- +jumpBack :: FallibleXMLConverter nsID extraState _x _x +jumpBack = tryModifyState (popElement >>> maybeToChoice) + +-- | Support function for "procedural" converters: jump to an element, execute +-- a converter, jump back. +-- This version is safer than 'executeThere', because it does not rely on the +-- internal stack. As a result, the converter can not move around in arbitrary +-- ways. The downside is of course that some of the environment is not +-- accessible to the converter. +switchingTheStack :: XMLConverter nsID moreState a b + -> XMLConverter nsID moreState (a, XML.Element) b +switchingTheStack a = second ( (:[]) ^>> swapStack ) + >>> first a + >>> second swapStack + >>^ fst + +-- | Support function for "procedural" converters: jumps to an element, executes +-- a converter, jumps back. +-- Make sure that the converter is well-behaved; that is it should +-- return to the exact position it started from in /every possible path/ of +-- execution, even if it "fails". If it does not, you may encounter +-- strange bugs. If you are not sure about the behaviour or want to use +-- shortcuts, you can often use 'switchingTheStack' instead. +executeThere :: FallibleXMLConverter nsID moreState a b + -> FallibleXMLConverter nsID moreState (a, XML.Element) b +executeThere a = second jumpThere + >>> fst + ^>> a + >>> jumpBack -- >>? jumpBack would not ensure the jump. + >>^ collapseEither + +-- | Do something in a sub-element, tnen come back +executeIn :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState f s + -> FallibleXMLConverter nsID extraState f s +executeIn nsID name a = keepingTheValue + (findChild nsID name) + >>> ignoringState liftFailure + >>? switchingTheStack a + where liftFailure (_, (Left f)) = Left f + liftFailure (x, (Right e)) = Right (x, e) + +-------------------------------------------------------------------------------- +-- Iterating over children +-------------------------------------------------------------------------------- + +-- Helper converter to prepare different types of iterations. +-- It lifts the children (of a certain type) of the current element +-- into the value level and pairs each one with the current input value. +prepareIteration :: (NameSpaceID nsID) + => nsID -> ElementName + -> XMLConverter nsID extraState b [(b, XML.Element)] +prepareIteration nsID name = keepingTheValue + (findChildren nsID name) + >>§ distributeValue + +-- | Applies a converter to every child element of a specific type. +-- Collects results in a 'Monoid'. +-- Fails completely if any conversion fails. +collectEvery :: (NameSpaceID nsID, Monoid m) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a m + -> FallibleXMLConverter nsID extraState a m +collectEvery nsID name a = prepareIteration nsID name + >>> foldS' (switchingTheStack a) + +-- +withEveryL :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a b + -> FallibleXMLConverter nsID extraState a [b] +withEveryL = withEvery + +-- | Applies a converter to every child element of a specific type. +-- Collects results in a 'MonadPlus'. +-- Fails completely if any conversion fails. +withEvery :: (NameSpaceID nsID, MonadPlus m) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a b + -> FallibleXMLConverter nsID extraState a (m b) +withEvery nsID name a = prepareIteration nsID name + >>> iterateS' (switchingTheStack a) + +-- | Applies a converter to every child element of a specific type. +-- Collects all successful results in a list. +tryAll :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState b a + -> XMLConverter nsID extraState b [a] +tryAll nsID name a = prepareIteration nsID name + >>> iterateS (switchingTheStack a) + >>^ collectRights + +-- | Applies a converter to every child element of a specific type. +-- Collects all successful results. +tryAll' :: (NameSpaceID nsID, F.Foldable c, MonadPlus c) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState b a + -> XMLConverter nsID extraState b (c a) +tryAll' nsID name a = prepareIteration nsID name + >>> iterateS (switchingTheStack a) + >>^ collectRightsF + +-------------------------------------------------------------------------------- +-- Matching children +-------------------------------------------------------------------------------- + +type IdXMLConverter nsID moreState x + = XMLConverter nsID moreState x x + +type MaybeEConverter nsID moreState x + = Maybe (IdXMLConverter nsID moreState (x, XML.Element)) + +-- Chainable converter that helps deciding which converter to actually use. +type ElementMatchConverter nsID extraState x + = IdXMLConverter nsID + extraState + (MaybeEConverter nsID extraState x, XML.Element) + +type MaybeCConverter nsID moreState x + = Maybe (IdXMLConverter nsID moreState (x, XML.Content)) + +-- Chainable converter that helps deciding which converter to actually use. +type ContentMatchConverter nsID extraState x + = IdXMLConverter nsID + extraState + (MaybeCConverter nsID extraState x, XML.Content) + +-- Helper function: The @c@ is actually a converter that is to be selected by +-- matching XML elements to the first two parameters. +-- The fold used to match elements however is very simple, so to use it, +-- this function wraps the converter in another converter that unifies +-- the accumulator. Think of a lot of converters with the resulting type +-- chained together. The accumulator not only transports the element +-- unchanged to the next matcher, it also does the actual selecting by +-- combining the intermediate results with '(<|>)'. +makeMatcherE :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a a + -> ElementMatchConverter nsID extraState a +makeMatcherE nsID name c = ( second ( + elemNameIs nsID name + >>^ bool Nothing (Just tryC) + ) + >>§ (<|>) + ) &&&^ snd + where tryC = (fst ^&&& executeThere c >>§ recover) &&&^ snd + +-- Helper function: The @c@ is actually a converter that is to be selected by +-- matching XML content to the first two parameters. +-- The fold used to match elements however is very simple, so to use it, +-- this function wraps the converter in another converter that unifies +-- the accumulator. Think of a lot of converters with the resulting type +-- chained together. The accumulator not only transports the element +-- unchanged to the next matcher, it also does the actual selecting by +-- combining the intermediate results with '(<|>)'. +makeMatcherC :: (NameSpaceID nsID) + => nsID -> ElementName + -> FallibleXMLConverter nsID extraState a a + -> ContentMatchConverter nsID extraState a +makeMatcherC nsID name c = ( second ( contentToElem + >>> returnV Nothing + ||| ( elemNameIs nsID name + >>^ bool Nothing (Just cWithJump) + ) + ) + >>§ (<|>) + ) &&&^ snd + where cWithJump = ( fst + ^&&& ( second contentToElem + >>> spreadChoice + ^>>? executeThere c + ) + >>§ recover) + &&&^ snd + contentToElem :: FallibleXMLConverter nsID extraState XML.Content XML.Element + contentToElem = arr $ \e -> case e of + XML.Elem e' -> succeedWith e' + _ -> failEmpty + +-- Creates and chains a bunch of matchers +prepareMatchersE :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] + -> ElementMatchConverter nsID extraState x +--prepareMatchersE = foldSs . (map $ uncurry3 makeMatcherE) +prepareMatchersE = reverseComposition . (map $ uncurry3 makeMatcherE) + +-- Creates and chains a bunch of matchers +prepareMatchersC :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState x x)] + -> ContentMatchConverter nsID extraState x +--prepareMatchersC = foldSs . (map $ uncurry3 makeMatcherC) +prepareMatchersC = reverseComposition . (map $ uncurry3 makeMatcherC) + +-- | Takes a list of element-data - converter groups and +-- * Finds all children of the current element +-- * Matches each group to each child in order (at most one group per child) +-- * Filters non-matched children +-- * Chains all found converters in child-order +-- * Applies the chain to the input element +matchChildren :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] + -> XMLConverter nsID extraState a a +matchChildren lookups = let matcher = prepareMatchersE lookups + in keepingTheValue ( + elChildren + >>> map (Nothing,) + ^>> iterateSL matcher + >>^ catMaybes.map (\(m,e) -> fmap (swallowElem e) m) + -- >>> foldSs + >>> reverseComposition + ) + >>> swap + ^>> app + where + -- let the converter swallow the element and drop the element + -- in the return value + swallowElem element converter = (,element) ^>> converter >>^ fst + +-- +matchContent'' :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] + -> XMLConverter nsID extraState a a +matchContent'' lookups = let matcher = prepareMatchersC lookups + in keepingTheValue ( + elContent + >>> map (Nothing,) + ^>> iterateSL matcher + >>^ catMaybes.map (\(m,c) -> fmap (swallowContent c) m) + -- >>> foldSs + >>> reverseComposition + ) + >>> swap + ^>> app + where + -- let the converter swallow the content and drop the content + -- in the return value + swallowContent content converter = (,content) ^>> converter >>^ fst + + +-- | Takes a list of element-data - converter groups and +-- * Finds all content of the current element +-- * Matches each group to each piece of content in order +-- (at most one group per piece of content) +-- * Filters non-matched content +-- * Chains all found converters in content-order +-- * Applies the chain to the input element +matchContent' :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] + -> XMLConverter nsID extraState a a +matchContent' lookups = matchContent lookups (arr fst) + +-- | Takes a list of element-data - converter groups and +-- * Finds all content of the current element +-- * Matches each group to each piece of content in order +-- (at most one group per piece of content) +-- * Adds a default converter for all non-matched content +-- * Chains all found converters in content-order +-- * Applies the chain to the input element +matchContent :: (NameSpaceID nsID) + => [(nsID, ElementName, FallibleXMLConverter nsID extraState a a)] + -> XMLConverter nsID extraState (a,XML.Content) a + -> XMLConverter nsID extraState a a +matchContent lookups fallback + = let matcher = prepareMatchersC lookups + in keepingTheValue ( + elContent + >>> map (Nothing,) + ^>> iterateSL matcher + >>^ map swallowOrFallback + -- >>> foldSs + >>> reverseComposition + ) + >>> swap + ^>> app + where + -- let the converter swallow the content and drop the content + -- in the return value + swallowOrFallback (Just converter,content) = (,content) ^>> converter >>^ fst + swallowOrFallback (Nothing ,content) = (,content) ^>> fallback + +-------------------------------------------------------------------------------- +-- Internals +-------------------------------------------------------------------------------- + +stringToBool :: (Monoid failure) => String -> Either failure Bool +stringToBool val -- stringToBool' val >>> maybeToChoice + | val `elem` trueValues = succeedWith True + | val `elem` falseValues = succeedWith False + | otherwise = failEmpty + where trueValues = ["true" ,"on" ,"1"] + falseValues = ["false","off","0"] + +stringToBool' :: String -> Maybe Bool +stringToBool' val | val `elem` trueValues = Just True + | val `elem` falseValues = Just False + | otherwise = Nothing + where trueValues = ["true" ,"on" ,"1"] + falseValues = ["false","off","0"] + + +distributeValue :: a -> [b] -> [(a,b)] +distributeValue = map.(,) + +-------------------------------------------------------------------------------- + +{- +NOTES +It might be a good idea to refactor the namespace stuff. +E.g.: if a namespace constructor took a string as a parameter, things like +> a ?>/< (NsText,"body") +would be nicer. +Together with a rename and some trickery, something like +> |< NsText "body" >< NsText "p" ?> a | +might even be possible. + +Some day, XML.Light should be replaced by something better. +While doing that, it might be useful to replace String as the type of element +names with something else, too. (Of course with OverloadedStrings). +While doing that, maybe the types can be created in a way that something like +> NsText:"body" +could be used. Overloading (:) does not sounds like the best idea, but if the +element name type was a list, this might be possible. +Of course that would be a bit hackish, so the "right" way would probably be +something like +> InNS NsText "body" +but isn't that a bit boring? ;) +-} diff --git a/src/Text/Pandoc/Readers/Odt/Namespaces.hs b/src/Text/Pandoc/Readers/Odt/Namespaces.hs new file mode 100644 index 000000000..e28056814 --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/Namespaces.hs @@ -0,0 +1,110 @@ +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Reader.Odt.Namespaces + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Namespaces used in odt files. +-} + +module Text.Pandoc.Readers.Odt.Namespaces ( Namespace (..) + ) where + +import Data.List ( isPrefixOf ) +import Data.Maybe ( fromMaybe, listToMaybe ) +import qualified Data.Map as M ( empty, insert ) + +import Text.Pandoc.Readers.Odt.Generic.Namespaces + + +instance NameSpaceID Namespace where + + getInitialIRImap = nsIDmap + + getNamespaceID "" m = Just(m, NsXML) + getNamespaceID iri m = asPair $ fromMaybe (NsOther iri) (findID iri) + where asPair nsID = Just (M.insert nsID iri m, nsID) + + +findID :: NameSpaceIRI -> Maybe Namespace +findID iri = listToMaybe [nsID | (iri',~nsID) <- nsIDs, iri' `isPrefixOf` iri] + +nsIDmap :: NameSpaceIRIs Namespace +nsIDmap = foldr (uncurry $ flip M.insert) M.empty nsIDs + +data Namespace = -- Open Document core + NsOffice | NsStyle | NsText | NsTable | NsForm + | NsDraw | Ns3D | NsAnim | NsChart | NsConfig + | NsDB | NsMeta | NsNumber | NsScript | NsManifest + | NsPresentation + -- Metadata + | NsODF + -- Compatible elements + | NsXSL_FO | NsSVG | NsSmil + -- External standards + | NsMathML | NsXForms | NsXLink | NsXHtml | NsGRDDL + | NsDublinCore + -- Metadata manifest + | NsPKG + -- Others + | NsOpenFormula + -- Core XML (basically only for the 'id'-attribute) + | NsXML + -- Fallback + | NsOther String + deriving ( Eq, Ord, Show ) + +-- | Not the actual iri's, but large prefixes of them - this way there are +-- less versioning problems and the like. +nsIDs :: [(String,Namespace)] +nsIDs = [ + ("urn:oasis:names:tc:opendocument:xmlns:animation" , NsAnim ), + ("urn:oasis:names:tc:opendocument:xmlns:chart" , NsChart ), + ("urn:oasis:names:tc:opendocument:xmlns:config" , NsConfig ), + ("urn:oasis:names:tc:opendocument:xmlns:database" , NsDB ), + ("urn:oasis:names:tc:opendocument:xmlns:dr3d" , Ns3D ), + ("urn:oasis:names:tc:opendocument:xmlns:drawing" , NsDraw ), + ("urn:oasis:names:tc:opendocument:xmlns:form" , NsForm ), + ("urn:oasis:names:tc:opendocument:xmlns:manifest" , NsManifest ), + ("urn:oasis:names:tc:opendocument:xmlns:meta" , NsMeta ), + ("urn:oasis:names:tc:opendocument:xmlns:datastyle" , NsNumber ), + ("urn:oasis:names:tc:opendocument:xmlns:of" , NsOpenFormula ), + ("urn:oasis:names:tc:opendocument:xmlns:office:1.0" , NsOffice ), + ("urn:oasis:names:tc:opendocument:xmlns:presentation" , NsPresentation ), + ("urn:oasis:names:tc:opendocument:xmlns:script" , NsScript ), + ("urn:oasis:names:tc:opendocument:xmlns:style" , NsStyle ), + ("urn:oasis:names:tc:opendocument:xmlns:table" , NsTable ), + ("urn:oasis:names:tc:opendocument:xmlns:text" , NsText ), + ("urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible", NsXSL_FO ), + ("urn:oasis:names:tc:opendocument:xmlns:smil-compatible" , NsSmil ), + ("urn:oasis:names:tc:opendocument:xmlns:svg-compatible" , NsSVG ), + ("http://docs.oasis-open.org/ns/office/1.2/meta/odf" , NsODF ), + ("http://docs.oasis-open.org/ns/office/1.2/meta/pkg" , NsPKG ), + ("http://purl.org/dc/elements" , NsDublinCore ), + ("http://www.w3.org/2003/g/data-view" , NsGRDDL ), + ("http://www.w3.org/1998/Math/MathML" , NsMathML ), + ("http://www.w3.org/1999/xhtml" , NsXHtml ), + ("http://www.w3.org/2002/xforms" , NsXForms ), + ("http://www.w3.org/1999/xlink" , NsXLink ) + ] \ No newline at end of file diff --git a/src/Text/Pandoc/Readers/Odt/StyleReader.hs b/src/Text/Pandoc/Readers/Odt/StyleReader.hs new file mode 100644 index 000000000..1cf87cc59 --- /dev/null +++ b/src/Text/Pandoc/Readers/Odt/StyleReader.hs @@ -0,0 +1,737 @@ +{-# LANGUAGE TupleSections #-} +{-# LANGUAGE PatternGuards #-} +{-# LANGUAGE ViewPatterns #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE Arrows #-} + +{- +Copyright (C) 2015 Martin Linnemann + +This program is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +-} + +{- | + Module : Text.Pandoc.Readers.Odt.StyleReader + Copyright : Copyright (C) 2015 Martin Linnemann + License : GNU GPL, version 2 or above + + Maintainer : Martin Linnemann + Stability : alpha + Portability : portable + +Reader for the style information in an odt document. +-} + +module Text.Pandoc.Readers.Odt.StyleReader +( Style (..) +, StyleName +, StyleFamily (..) +, Styles (..) +, StyleProperties (..) +, TextProperties (..) +, ParaProperties (..) +, VerticalTextPosition (..) +, ListItemNumberFormat (..) +, ListLevel +, ListStyle (..) +, ListLevelStyle (..) +, ListLevelType (..) +, LengthOrPercent (..) +, lookupStyle +, getTextProperty +, getTextProperty' +, getParaProperty +, getListStyle +, getListLevelStyle +, getStyleFamily +, lookupDefaultStyle +, lookupDefaultStyle' +, lookupListStyleByName +, getPropertyChain +, textPropertyChain +, stylePropertyChain +, stylePropertyChain' +, getStylePropertyChain +, extendedStylePropertyChain +, extendedStylePropertyChain' +, liftStyles +, readStylesAt +) where + +import Control.Arrow +import Control.Applicative hiding ( liftA, liftA2, liftA3 ) + +import qualified Data.Foldable as F +import qualified Data.Map as M +import qualified Data.Set as S +import Data.List ( unfoldr ) +import Data.Default +import Data.Monoid +import Data.Maybe + +import qualified Text.XML.Light as XML + +import Text.Pandoc.Readers.Odt.Arrows.State +import Text.Pandoc.Readers.Odt.Arrows.Utils + +import Text.Pandoc.Readers.Odt.Generic.Utils +import qualified Text.Pandoc.Readers.Odt.Generic.SetMap as SM +import Text.Pandoc.Readers.Odt.Generic.Fallible +import Text.Pandoc.Readers.Odt.Generic.XMLConverter + +import Text.Pandoc.Readers.Odt.Namespaces +import Text.Pandoc.Readers.Odt.Base + + +readStylesAt :: XML.Element -> Fallible Styles +readStylesAt e = runConverter' readAllStyles mempty e + +-------------------------------------------------------------------------------- +-- Reader for font declarations and font pitches +-------------------------------------------------------------------------------- + +-- Pandoc has no support for different font pitches. Yet knowing them can be +-- very helpful in cases where Pandoc has more semantics than OpenDocument. +-- In these cases, the pitch can help deciding as what to define a block of +-- text. So let's start with a type for font pitches: + +data FontPitch = PitchVariable | PitchFixed + deriving ( Eq, Show ) + +instance Lookupable FontPitch where + lookupTable = [ ("variable" , PitchVariable) + , ("fixed" , PitchFixed ) + ] + +instance Default FontPitch where + def = PitchVariable + +-- The font pitch can be specifed in a style directly. Normally, however, +-- it is defined in the font. That is also the specs' recommendation. +-- +-- Thus, we want + +type FontFaceName = String + +type FontPitches = M.Map FontFaceName FontPitch + +-- To get there, the fonts have to be read and the pitches extracted. +-- But the resulting map are only needed at one later place, so it should not be +-- transported on the value level, especially as we already use a state arrow. +-- So instead, the resulting map is lifted into the state of the reader. +-- (An alternative might be ImplicitParams, but again, we already have a state.) +-- +-- So the main style readers will have the types +type StyleReader a b = XMLReader FontPitches a b +-- and +type StyleReaderSafe a b = XMLReaderSafe FontPitches a b +-- respectively. +-- +-- But before we can work with these, we need to define the reader that reads +-- the fonts: + +-- | A reader for font pitches +fontPitchReader :: XMLReader _s _x FontPitches +fontPitchReader = executeIn NsOffice "font-face-decls" ( + ( withEveryL NsStyle "font-face" $ liftAsSuccess ( + findAttr' NsStyle "name" + &&& + lookupDefaultingAttr NsStyle "font-pitch" + ) + ) + >>?^ ( M.fromList . (foldl accumLegalPitches []) ) + ) + where accumLegalPitches ls (Nothing,_) = ls + accumLegalPitches ls (Just n,p) = (n,p):ls + + +-- | A wrapper around the font pitch reader that lifts the result into the +-- state. +readFontPitches :: StyleReader x x +readFontPitches = producingExtraState () () fontPitchReader + + +-- | Looking up a pitch in the state of the arrow. +-- +-- The function does the following: +-- * Look for the font pitch in an attribute. +-- * If that fails, look for the font name, look up the font in the state +-- and use the pitch from there. +-- * Return the result in a Maybe +-- +findPitch :: XMLReaderSafe FontPitches _x (Maybe FontPitch) +findPitch = ( lookupAttr NsStyle "font-pitch" + `ifFailedDo` findAttr NsStyle "font-name" + >>? ( keepingTheValue getExtraState + >>§ M.lookup + >>^ maybeToChoice + ) + ) + >>> choiceToMaybe + +-------------------------------------------------------------------------------- +-- Definitions of main data +-------------------------------------------------------------------------------- + +type StyleName = String + +-- | There are two types of styles: named styles with a style family and an +-- optional style parent, and default styles for each style family, +-- defining default style properties +data Styles = Styles + { stylesByName :: M.Map StyleName Style + , listStylesByName :: M.Map StyleName ListStyle + , defaultStyleMap :: M.Map StyleFamily StyleProperties + } + deriving ( Show ) + +-- Styles from a monoid under union +instance Monoid Styles where + mempty = Styles M.empty M.empty M.empty + mappend (Styles sBn1 dSm1 lsBn1) + (Styles sBn2 dSm2 lsBn2) + = Styles (M.union sBn1 sBn2) + (M.union dSm1 dSm2) + (M.union lsBn1 lsBn2) + +-- Not all families from the specifications are implemented, only those we need. +-- But there are none that are not mentioned here. +data StyleFamily = FaText | FaParagraph +-- | FaTable | FaTableCell | FaTableColumn | FaTableRow +-- | FaGraphic | FaDrawing | FaChart +-- | FaPresentation +-- | FaRuby + deriving ( Eq, Ord, Show ) + +instance Lookupable StyleFamily where + lookupTable = [ ( "text" , FaText ) + , ( "paragraph" , FaParagraph ) +-- , ( "table" , FaTable ) +-- , ( "table-cell" , FaTableCell ) +-- , ( "table-column" , FaTableColumn ) +-- , ( "table-row" , FaTableRow ) +-- , ( "graphic" , FaGraphic ) +-- , ( "drawing-page" , FaDrawing ) +-- , ( "chart" , FaChart ) +-- , ( "presentation" , FaPresentation ) +-- , ( "ruby" , FaRuby ) + ] + +-- | A named style +data Style = Style { styleFamily :: Maybe StyleFamily + , styleParentName :: Maybe StyleName + , listStyle :: Maybe StyleName + , styleProperties :: StyleProperties + } + deriving ( Eq, Show ) + +data StyleProperties = SProps { textProperties :: Maybe TextProperties + , paraProperties :: Maybe ParaProperties +-- , tableColProperties :: Maybe TColProperties +-- , tableRowProperties :: Maybe TRowProperties +-- , tableCellProperties :: Maybe TCellProperties +-- , tableProperties :: Maybe TableProperties +-- , graphicProperties :: Maybe GraphProperties + } + deriving ( Eq, Show ) + +instance Default StyleProperties where + def = SProps { textProperties = Just def + , paraProperties = Just def + } + +data TextProperties = PropT { isEmphasised :: Bool + , isStrong :: Bool + , pitch :: Maybe FontPitch + , verticalPosition :: VerticalTextPosition + , underline :: Maybe UnderlineMode + , strikethrough :: Maybe UnderlineMode + } + deriving ( Eq, Show ) + +instance Default TextProperties where + def = PropT { isEmphasised = False + , isStrong = False + , pitch = Just def + , verticalPosition = def + , underline = Nothing + , strikethrough = Nothing + } + +data ParaProperties = PropP { paraNumbering :: ParaNumbering + , indentation :: LengthOrPercent + , margin_left :: LengthOrPercent + } + deriving ( Eq, Show ) + +instance Default ParaProperties where + def = PropP { paraNumbering = NumberingNone + , indentation = def + , margin_left = def + } + +---- +-- All the little data types that make up the properties +---- + +data VerticalTextPosition = VPosNormal | VPosSuper | VPosSub + deriving ( Eq, Show ) + +instance Default VerticalTextPosition where + def = VPosNormal + +instance Read VerticalTextPosition where + readsPrec _ s = [ (VPosSub , s') | ("sub" , s') <- lexS ] + ++ [ (VPosSuper , s') | ("super" , s') <- lexS ] + ++ [ (signumToVPos n , s') | ( n , s') <- readPercent s ] + where + lexS = lex s + signumToVPos n | n < 0 = VPosSub + | n > 0 = VPosSuper + | otherwise = VPosNormal + +data UnderlineMode = UnderlineModeNormal | UnderlineModeSkipWhitespace + deriving ( Eq, Show ) + +instance Lookupable UnderlineMode where + lookupTable = [ ( "continuous" , UnderlineModeNormal ) + , ( "skip-white-space" , UnderlineModeSkipWhitespace ) + ] + + +data ParaNumbering = NumberingNone | NumberingKeep | NumberingRestart Int + deriving ( Eq, Show ) + +data LengthOrPercent = LengthValueMM Int | PercentValue Int + deriving ( Eq, Show ) + +instance Default LengthOrPercent where + def = LengthValueMM 0 + +instance Read LengthOrPercent where + readsPrec _ s = + [ (PercentValue percent , s' ) | (percent , s' ) <- readPercent s] + ++ [ (LengthValueMM lengthMM , s'') | (length' , s' ) <- reads s + , (unit , s'') <- reads s' + , let lengthMM = estimateInMillimeter + length' unit + ] + +data XslUnit = XslUnitMM | XslUnitCM + | XslUnitInch + | XslUnitPoints | XslUnitPica + | XslUnitPixel + | XslUnitEM + +instance Show XslUnit where + show XslUnitMM = "mm" + show XslUnitCM = "cm" + show XslUnitInch = "in" + show XslUnitPoints = "pt" + show XslUnitPica = "pc" + show XslUnitPixel = "px" + show XslUnitEM = "em" + +instance Read XslUnit where + readsPrec _ "mm" = [(XslUnitMM , "")] + readsPrec _ "cm" = [(XslUnitCM , "")] + readsPrec _ "in" = [(XslUnitInch , "")] + readsPrec _ "pt" = [(XslUnitPoints , "")] + readsPrec _ "pc" = [(XslUnitPica , "")] + readsPrec _ "px" = [(XslUnitPixel , "")] + readsPrec _ "em" = [(XslUnitEM , "")] + readsPrec _ _ = [] + +-- | Rough conversion of measures into millimeters. +-- Pixels and em's are actually implemetation dependant/relative measures, +-- so I could not really easily calculate anything exact here even if I wanted. +-- But I do not care about exactness right now, as I only use measures +-- to determine if a paragraph is "indented" or not. +estimateInMillimeter :: Int -> XslUnit -> Int +estimateInMillimeter n XslUnitMM = n +estimateInMillimeter n XslUnitCM = n * 10 +estimateInMillimeter n XslUnitInch = n * 25 -- * 25.4 +estimateInMillimeter n XslUnitPoints = n `div` 3 -- * 1/72 * 25.4 +estimateInMillimeter n XslUnitPica = n * 4 -- * 12 * 1/72 * 25.4 +estimateInMillimeter n XslUnitPixel = n `div`3 -- * 1/72 * 25.4 +estimateInMillimeter n XslUnitEM = n * 7 -- * 16 * 1/72 * 25.4 + + +---- +-- List styles +---- + +type ListLevel = Int + +newtype ListStyle = ListStyle { levelStyles :: M.Map ListLevel ListLevelStyle + } + deriving ( Eq, Show ) + +-- +getListLevelStyle :: ListLevel -> ListStyle -> Maybe ListLevelStyle +getListLevelStyle level ListStyle{..} = + let (lower , exactHit , _) = M.splitLookup level levelStyles + in exactHit <|> fmap fst (M.maxView lower) + -- findBy (`M.lookup` levelStyles) [level, (level-1) .. 1] + -- ^ simpler, but in general less efficient + +data ListLevelStyle = ListLevelStyle { listLevelType :: ListLevelType + , listItemPrefix :: Maybe String + , listItemSuffix :: Maybe String + , listItemFormat :: ListItemNumberFormat + } + deriving ( Eq, Ord ) + +instance Show ListLevelStyle where + show ListLevelStyle{..} = "" + where maybeToString = fromMaybe "" + +data ListLevelType = LltBullet | LltImage | LltNumbered + deriving ( Eq, Ord, Show ) + +data ListItemNumberFormat = LinfNone + | LinfNumber + | LinfRomanLC | LinfRomanUC + | LinfAlphaLC | LinfAlphaUC + | LinfString String + deriving ( Eq, Ord ) + +instance Show ListItemNumberFormat where + show LinfNone = "" + show LinfNumber = "1" + show LinfRomanLC = "i" + show LinfRomanUC = "I" + show LinfAlphaLC = "a" + show LinfAlphaUC = "A" + show (LinfString s) = s + +instance Default ListItemNumberFormat where + def = LinfNone + +instance Read ListItemNumberFormat where + readsPrec _ "" = [(LinfNone , "")] + readsPrec _ "1" = [(LinfNumber , "")] + readsPrec _ "i" = [(LinfRomanLC , "")] + readsPrec _ "I" = [(LinfRomanUC , "")] + readsPrec _ "a" = [(LinfAlphaLC , "")] + readsPrec _ "A" = [(LinfAlphaUC , "")] + readsPrec _ s = [(LinfString s , "")] + +-------------------------------------------------------------------------------- +-- Readers +-- +-- ...it seems like a whole lot of this should be automatically deriveable +-- or at least moveable into a class. Most of this is data concealed in +-- code. +-------------------------------------------------------------------------------- + +-- +readAllStyles :: StyleReader _x Styles +readAllStyles = ( readFontPitches + >>?! ( readAutomaticStyles + &&& readStyles )) + >>?§? chooseMax + -- all top elements are always on the same hierarchy level + +-- +readStyles :: StyleReader _x Styles +readStyles = executeIn NsOffice "styles" $ liftAsSuccess + $ liftA3 Styles + ( tryAll NsStyle "style" readStyle >>^ M.fromList ) + ( tryAll NsText "list-style" readListStyle >>^ M.fromList ) + ( tryAll NsStyle "default-style" readDefaultStyle >>^ M.fromList ) + +-- +readAutomaticStyles :: StyleReader _x Styles +readAutomaticStyles = executeIn NsOffice "automatic-styles" $ liftAsSuccess + $ liftA3 Styles + ( tryAll NsStyle "style" readStyle >>^ M.fromList ) + ( tryAll NsText "list-style" readListStyle >>^ M.fromList ) + ( returnV M.empty ) + +-- +readDefaultStyle :: StyleReader _x (StyleFamily, StyleProperties) +readDefaultStyle = lookupAttr NsStyle "family" + >>?! keepingTheValue readStyleProperties + +-- +readStyle :: StyleReader _x (StyleName,Style) +readStyle = findAttr NsStyle "name" + >>?! keepingTheValue + ( liftA4 Style + ( lookupAttr' NsStyle "family" ) + ( findAttr' NsStyle "parent-style-name" ) + ( findAttr' NsStyle "list-style-name" ) + readStyleProperties + ) + +-- +readStyleProperties :: StyleReaderSafe _x StyleProperties +readStyleProperties = liftA2 SProps + ( readTextProperties >>> choiceToMaybe ) + ( readParaProperties >>> choiceToMaybe ) + +-- +readTextProperties :: StyleReader _x TextProperties +readTextProperties = + executeIn NsStyle "text-properties" $ liftAsSuccess + ( liftA6 PropT + ( searchAttr NsXSL_FO "font-style" False isFontEmphasised ) + ( searchAttr NsXSL_FO "font-weight" False isFontBold ) + ( findPitch ) + ( getAttr NsStyle "text-position" ) + ( readUnderlineMode ) + ( readStrikeThroughMode ) + ) + where isFontEmphasised = [("normal",False),("italic",True),("oblique",True)] + isFontBold = ("normal",False):("bold",True) + :(map ((,True).show) ([100,200..900]::[Int])) + +readUnderlineMode :: StyleReaderSafe _x (Maybe UnderlineMode) +readUnderlineMode = readLineMode "text-underline-mode" + "text-underline-style" + +readStrikeThroughMode :: StyleReaderSafe _x (Maybe UnderlineMode) +readStrikeThroughMode = readLineMode "text-line-through-mode" + "text-line-through-style" + +readLineMode :: String -> String -> StyleReaderSafe _x (Maybe UnderlineMode) +readLineMode modeAttr styleAttr = proc x -> do + isUL <- searchAttr NsStyle styleAttr False isLinePresent -< x + mode <- lookupAttr' NsStyle modeAttr -< x + if isUL + then case mode of + Just m -> returnA -< Just m + Nothing -> returnA -< Just UnderlineModeNormal + else returnA -< Nothing + where + isLinePresent = [("none",False)] ++ map (,True) + [ "dash" , "dot-dash" , "dot-dot-dash" , "dotted" + , "long-dash" , "solid" , "wave" + ] + +-- +readParaProperties :: StyleReader _x ParaProperties +readParaProperties = + executeIn NsStyle "paragraph-properties" $ liftAsSuccess + ( liftA3 PropP + ( liftA2 readNumbering + ( isSet' NsText "number-lines" ) + ( readAttr' NsText "line-number" ) + ) + ( liftA2 readIndentation + ( isSetWithDefault NsStyle "auto-text-indent" False ) + ( getAttr NsXSL_FO "text-indent" ) + ) + ( getAttr NsXSL_FO "margin-left" ) + ) + where readNumbering (Just True) (Just n) = NumberingRestart n + readNumbering (Just True) _ = NumberingKeep + readNumbering _ _ = NumberingNone + + readIndentation False indent = indent + readIndentation True _ = def + +---- +-- List styles +---- + +-- +readListStyle :: StyleReader _x (StyleName, ListStyle) +readListStyle = + findAttr NsStyle "name" + >>?! keepingTheValue + ( liftA ListStyle + $ ( liftA3 SM.union3 + ( readListLevelStyles NsText "list-level-style-number" LltNumbered ) + ( readListLevelStyles NsText "list-level-style-bullet" LltBullet ) + ( readListLevelStyles NsText "list-level-style-image" LltImage ) + ) >>^ M.mapMaybe chooseMostSpecificListLevelStyle + ) +-- +readListLevelStyles :: Namespace -> ElementName + -> ListLevelType + -> StyleReaderSafe _x (SM.SetMap Int ListLevelStyle) +readListLevelStyles namespace elementName levelType = + ( tryAll namespace elementName (readListLevelStyle levelType) + >>^ SM.fromList + ) + +-- +readListLevelStyle :: ListLevelType -> StyleReader _x (Int, ListLevelStyle) +readListLevelStyle levelType = readAttr NsText "level" + >>?! keepingTheValue + ( liftA4 toListLevelStyle + ( returnV levelType ) + ( findAttr' NsStyle "num-prefix" ) + ( findAttr' NsStyle "num-suffix" ) + ( getAttr NsStyle "num-format" ) + ) + where + toListLevelStyle _ p s LinfNone = ListLevelStyle LltBullet p s LinfNone + toListLevelStyle _ p s f@(LinfString _) = ListLevelStyle LltBullet p s f + toListLevelStyle t p s f = ListLevelStyle t p s f + +-- +chooseMostSpecificListLevelStyle :: S.Set ListLevelStyle -> Maybe ListLevelStyle +chooseMostSpecificListLevelStyle ls | ls == mempty = Nothing + | otherwise = Just ( F.foldr1 select ls ) + where + select ( ListLevelStyle t1 p1 s1 f1 ) + ( ListLevelStyle t2 p2 s2 f2 ) + = ListLevelStyle (select' t1 t2) (p1 <|> p2) (s1 <|> s2) (selectLinf f1 f2) + select' LltNumbered _ = LltNumbered + select' _ LltNumbered = LltNumbered + select' _ _ = LltBullet + selectLinf LinfNone f2 = f2 + selectLinf f1 LinfNone = f1 + selectLinf (LinfString _) f2 = f2 + selectLinf f1 (LinfString _) = f1 + selectLinf f1 _ = f1 + + +-------------------------------------------------------------------------------- +-- Tools to access style data +-------------------------------------------------------------------------------- + +-- +lookupStyle :: StyleName -> Styles -> Maybe Style +lookupStyle name Styles{..} = M.lookup name stylesByName + +-- +lookupDefaultStyle :: StyleFamily -> Styles -> StyleProperties +lookupDefaultStyle family Styles{..} = fromMaybe def + (M.lookup family defaultStyleMap) + +-- +lookupDefaultStyle' :: Styles -> StyleFamily -> StyleProperties +lookupDefaultStyle' Styles{..} family = fromMaybe def + (M.lookup family defaultStyleMap) + +-- +getListStyle :: Style -> Styles -> Maybe ListStyle +getListStyle Style{..} styles = listStyle >>= (`lookupListStyleByName` styles) + +-- +lookupListStyleByName :: StyleName -> Styles -> Maybe ListStyle +lookupListStyleByName name Styles{..} = M.lookup name listStylesByName + + +-- | Returns a chain of parent of the current style. The direct parent will +-- be the first element of the list, followed by its parent and so on. +-- The current style is not in the list. +parents :: Style -> Styles -> [Style] +parents style styles = unfoldr findNextParent style -- Ha! + where findNextParent Style{..} + = fmap duplicate $ (`lookupStyle` styles) =<< styleParentName + +-- | Looks up the style family of the current style. Normally, every style +-- should have one. But if not, all parents are searched. +getStyleFamily :: Style -> Styles -> Maybe StyleFamily +getStyleFamily style@Style{..} styles + = styleFamily + <|> (F.asum $ map (`getStyleFamily` styles) $ parents style styles) + +-- | Each 'Style' has certain 'StyleProperties'. But sometimes not all property +-- values are specified. Instead, a value might be inherited from a +-- parent style. This function makes this chain of inheritance +-- concrete and easily accessible by encapsulating the necessary lookups. +-- The resulting list contains the direct properties of the style as the first +-- element, the ones of the direct parent element as the next one, and so on. +-- +-- Note: There should also be default properties for each style family. These +-- are @not@ contained in this list because properties inherited from +-- parent elements take precedence over default styles. +-- +-- This function is primarily meant to be used through convenience wrappers. +-- +stylePropertyChain :: Style -> Styles -> [StyleProperties] +stylePropertyChain style styles + = map styleProperties (style : parents style styles) + +-- +extendedStylePropertyChain :: [Style] -> Styles -> [StyleProperties] +extendedStylePropertyChain [] _ = [] +extendedStylePropertyChain [style] styles = (stylePropertyChain style styles) + ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) +extendedStylePropertyChain (style:trace) styles = (stylePropertyChain style styles) + ++ (extendedStylePropertyChain trace styles) +-- Optimizable with Data.Sequence + +-- +extendedStylePropertyChain' :: [Style] -> Styles -> Maybe [StyleProperties] +extendedStylePropertyChain' [] _ = Nothing +extendedStylePropertyChain' [style] styles = Just ( + (stylePropertyChain style styles) + ++ (maybeToList (fmap (lookupDefaultStyle' styles) (getStyleFamily style styles))) + ) +extendedStylePropertyChain' (style:trace) styles = fmap ((stylePropertyChain style styles) ++) + (extendedStylePropertyChain' trace styles) + +-- +stylePropertyChain' :: Styles -> Style -> [StyleProperties] +stylePropertyChain' = flip stylePropertyChain + +-- +getStylePropertyChain :: StyleName -> Styles -> [StyleProperties] +getStylePropertyChain name styles = maybe [] + (`stylePropertyChain` styles) + (lookupStyle name styles) + +-- +getPropertyChain :: (StyleProperties -> Maybe a) -> Style -> Styles -> [a] +getPropertyChain extract style styles = catMaybes + $ map extract + $ stylePropertyChain style styles + +-- +textPropertyChain :: Style -> Styles -> [TextProperties] +textPropertyChain = getPropertyChain textProperties + +-- +paraPropertyChain :: Style -> Styles -> [ParaProperties] +paraPropertyChain = getPropertyChain paraProperties + +-- +getTextProperty :: (TextProperties -> a) -> Style -> Styles -> Maybe a +getTextProperty extract style styles = fmap extract + $ listToMaybe + $ textPropertyChain style styles + +-- +getTextProperty' :: (TextProperties -> Maybe a) -> Style -> Styles -> Maybe a +getTextProperty' extract style styles = F.asum + $ map extract + $ textPropertyChain style styles + +-- +getParaProperty :: (ParaProperties -> a) -> Style -> Styles -> Maybe a +getParaProperty extract style styles = fmap extract + $ listToMaybe + $ paraPropertyChain style styles + +-- | Lifts the reader into another readers' state. +liftStyles :: (OdtConverterState s -> OdtConverterState Styles) + -> (OdtConverterState Styles -> OdtConverterState s ) + -> XMLReader s x x +liftStyles extract inject = switchState extract inject + $ convertingExtraState M.empty readAllStyles + diff --git a/src/Text/Pandoc/Shared.hs b/src/Text/Pandoc/Shared.hs index edf2619c0..c09c2f2a0 100644 --- a/src/Text/Pandoc/Shared.hs +++ b/src/Text/Pandoc/Shared.hs @@ -683,16 +683,16 @@ headerLtEq _ _ = False -- | Generate a unique identifier from a list of inlines. -- Second argument is a list of already used identifiers. uniqueIdent :: [Inline] -> [String] -> String -uniqueIdent title' usedIdents = - let baseIdent = case inlineListToIdentifier title' of +uniqueIdent title' usedIdents + = let baseIdent = case inlineListToIdentifier title' of "" -> "section" x -> x - numIdent n = baseIdent ++ "-" ++ show n - in if baseIdent `elem` usedIdents - then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of + numIdent n = baseIdent ++ "-" ++ show n + in if baseIdent `elem` usedIdents + then case find (\x -> numIdent x `notElem` usedIdents) ([1..60000] :: [Int]) of Just x -> numIdent x Nothing -> baseIdent -- if we have more than 60,000, allow repeats - else baseIdent + else baseIdent -- | True if block is a Header block. isHeaderBlock :: Block -> Bool diff --git a/tests/Tests/Readers/Odt.hs b/tests/Tests/Readers/Odt.hs new file mode 100644 index 000000000..cf30b8398 --- /dev/null +++ b/tests/Tests/Readers/Odt.hs @@ -0,0 +1,165 @@ +module Tests.Readers.Odt (tests) where + +import Control.Monad ( liftM ) +import Text.Pandoc.Options +import Text.Pandoc.Readers.Native +import Text.Pandoc.Readers.Markdown +import Text.Pandoc.Definition +import Text.Pandoc.Error +import Tests.Helpers +import Test.Framework +--import Test.HUnit (assertBool) +--import Test.Framework.Providers.HUnit +import qualified Data.ByteString.Lazy as B +import Text.Pandoc.Readers.Odt +import Text.Pandoc.Writers.Native (writeNative) +import qualified Data.Map as M +--import Text.Pandoc.MediaBag (MediaBag, lookupMedia, mediaDirectory) +--import Codec.Archive.Zip + + +tests :: [Test] +tests = testsComparingToMarkdown ++ testsComparingToNative + +testsComparingToMarkdown :: [Test] +testsComparingToMarkdown = map nameToTest namesOfTestsComparingToMarkdown + where nameToTest name = createTest + compareOdtToMarkdown + name + (toOdtPath name) + (toMarkdownPath name) + toOdtPath name = "odt/odt/" ++ name ++ ".odt" + toMarkdownPath name = "odt/markdown/" ++ name ++ ".md" + +testsComparingToNative :: [Test] +testsComparingToNative = map nameToTest namesOfTestsComparingToNative + where nameToTest name = createTest + compareOdtToNative + name + (toOdtPath name) + (toNativePath name) + toOdtPath name = "odt/odt/" ++ name ++ ".odt" + toNativePath name = "odt/native/" ++ name ++ ".native" + + +newtype NoNormPandoc = NoNormPandoc {unNoNorm :: Pandoc} + deriving ( Show ) + +instance ToString NoNormPandoc where + toString d = writeNative def{ writerStandalone = s } $ toPandoc d + where s = case d of + NoNormPandoc (Pandoc (Meta m) _) + | M.null m -> False + | otherwise -> True + +instance ToPandoc NoNormPandoc where + toPandoc = unNoNorm + +getNoNormVia :: (a -> Pandoc) -> String -> Either PandocError a -> NoNormPandoc +getNoNormVia _ readerName (Left _) = error (readerName ++ " reader failed") +getNoNormVia f _ (Right a) = NoNormPandoc (f a) + +type TestCreator = ReaderOptions + -> FilePath -> FilePath + -> IO (NoNormPandoc, NoNormPandoc) + +compareOdtToNative :: TestCreator +compareOdtToNative opts odtPath nativePath = do + nativeFile <- Prelude.readFile nativePath + odtFile <- B.readFile odtPath + let native = getNoNormVia id "native" $ readNative nativeFile + let odt = getNoNormVia fst "odt" $ readOdt opts odtFile + return (odt,native) + +compareOdtToMarkdown :: TestCreator +compareOdtToMarkdown opts odtPath markdownPath = do + markdownFile <- Prelude.readFile markdownPath + odtFile <- B.readFile odtPath + let markdown = getNoNormVia id "markdown" $ readMarkdown opts markdownFile + let odt = getNoNormVia fst "odt" $ readOdt opts odtFile + return (odt,markdown) + + +createTest :: TestCreator + -> TestName + -> FilePath -> FilePath + -> Test +createTest creator name path1 path2 = + buildTest $ liftM (test id name) (creator def path1 path2) + +{- +-- + +getMedia :: FilePath -> FilePath -> IO (Maybe B.ByteString) +getMedia archivePath mediaPath = do + zf <- B.readFile archivePath >>= return . toArchive + return $ findEntryByPath ("Pictures/" ++ mediaPath) zf >>= (Just . fromEntry) + +compareMediaPathIO :: FilePath -> MediaBag -> FilePath -> IO Bool +compareMediaPathIO mediaPath mediaBag odtPath = do + odtMedia <- getMedia odtPath mediaPath + let mbBS = case lookupMedia mediaPath mediaBag of + Just (_, bs) -> bs + Nothing -> error ("couldn't find " ++ + mediaPath ++ + " in media bag") + odtBS = case odtMedia of + Just bs -> bs + Nothing -> error ("couldn't find " ++ + mediaPath ++ + " in media bag") + return $ mbBS == odtBS + +compareMediaBagIO :: FilePath -> IO Bool +compareMediaBagIO odtFile = do + df <- B.readFile odtFile + let (_, mb) = readOdt def df + bools <- mapM + (\(fp, _, _) -> compareMediaPathIO fp mb odtFile) + (mediaDirectory mb) + return $ and bools + +testMediaBagIO :: String -> FilePath -> IO Test +testMediaBagIO name odtFile = do + outcome <- compareMediaBagIO odtFile + return $ testCase name (assertBool + ("Media didn't match media bag in file " ++ odtFile) + outcome) + +testMediaBag :: String -> FilePath -> Test +testMediaBag name odtFile = buildTest $ testMediaBagIO name odtFile +-} +-- + + + +namesOfTestsComparingToMarkdown :: [ String ] +namesOfTestsComparingToMarkdown = [ "bold" +-- , "citation" + , "endnote" + , "externalLink" + , "footnote" + , "headers" +-- , "horizontalRule" +-- , "image" + , "italic" +-- , "listBlocks" + , "paragraph" + , "strikeout" +-- , "trackedChanges" + , "underlined" + ] + +namesOfTestsComparingToNative :: [ String ] +namesOfTestsComparingToNative = [ "blockquote" + , "orderedListMixed" + , "orderedListRoman" + , "orderedListSimple" + , "referenceToChapter" + , "referenceToListItem" + , "referenceToText" + , "simpleTable" +-- , "table" + , "unicode" + , "unorderedList" + ] \ No newline at end of file diff --git a/tests/odt/markdown/bold.md b/tests/odt/markdown/bold.md new file mode 100644 index 000000000..fa4eb0431 --- /dev/null +++ b/tests/odt/markdown/bold.md @@ -0,0 +1 @@ +Here comes **bold** text \ No newline at end of file diff --git a/tests/odt/markdown/citation.md b/tests/odt/markdown/citation.md new file mode 100644 index 000000000..adcc9f0ff --- /dev/null +++ b/tests/odt/markdown/citation.md @@ -0,0 +1 @@ +Some text[@Ex] with a citation. \ No newline at end of file diff --git a/tests/odt/markdown/endnote.md b/tests/odt/markdown/endnote.md new file mode 100644 index 000000000..679af3fdc --- /dev/null +++ b/tests/odt/markdown/endnote.md @@ -0,0 +1,3 @@ +Some text[^1] with an endnote. + +[^1]: Endnote text \ No newline at end of file diff --git a/tests/odt/markdown/externalLink.md b/tests/odt/markdown/externalLink.md new file mode 100644 index 000000000..14f48d0f5 --- /dev/null +++ b/tests/odt/markdown/externalLink.md @@ -0,0 +1 @@ +Here comes an [external link](http://example.com/) to example.com. \ No newline at end of file diff --git a/tests/odt/markdown/footnote.md b/tests/odt/markdown/footnote.md new file mode 100644 index 000000000..973ae2d3a --- /dev/null +++ b/tests/odt/markdown/footnote.md @@ -0,0 +1,3 @@ +Some text[^1] with a footnote. + +[^1]: Footnote text \ No newline at end of file diff --git a/tests/odt/markdown/headers.md b/tests/odt/markdown/headers.md new file mode 100644 index 000000000..ea5d4452c --- /dev/null +++ b/tests/odt/markdown/headers.md @@ -0,0 +1,9 @@ +# A header (Lv 1) + +A paragraph + +## Another header (Lv 2) + +Another paragraph + +# Back to Level 1 \ No newline at end of file diff --git a/tests/odt/markdown/horizontalRule.md b/tests/odt/markdown/horizontalRule.md new file mode 100644 index 000000000..73b314ff7 --- /dev/null +++ b/tests/odt/markdown/horizontalRule.md @@ -0,0 +1 @@ +--- \ No newline at end of file diff --git a/tests/odt/markdown/image.md b/tests/odt/markdown/image.md new file mode 100644 index 000000000..3862d709e --- /dev/null +++ b/tests/odt/markdown/image.md @@ -0,0 +1 @@ +![](10000000000000FA000000FAD6A15225.jpg) \ No newline at end of file diff --git a/tests/odt/markdown/imageIndex.md b/tests/odt/markdown/imageIndex.md new file mode 100644 index 000000000..6719ab8a8 --- /dev/null +++ b/tests/odt/markdown/imageIndex.md @@ -0,0 +1,6 @@ +# Abbildungsverzeichnis + +Abbildung 1: Image caption + +![Abbildung 1: Image caption](10000000000000FA000000FAD6A15225.jpg) + diff --git a/tests/odt/markdown/imageWithCaption.md b/tests/odt/markdown/imageWithCaption.md new file mode 100644 index 000000000..0046ae141 --- /dev/null +++ b/tests/odt/markdown/imageWithCaption.md @@ -0,0 +1 @@ +![Abbildung 1: Image caption](10000000000000FA000000FAD6A15225.jpg) \ No newline at end of file diff --git a/tests/odt/markdown/italic.md b/tests/odt/markdown/italic.md new file mode 100644 index 000000000..b4d2f3d40 --- /dev/null +++ b/tests/odt/markdown/italic.md @@ -0,0 +1 @@ +Here comes *italic* text \ No newline at end of file diff --git a/tests/odt/markdown/listBlocks.md b/tests/odt/markdown/listBlocks.md new file mode 100644 index 000000000..22c77bb2b --- /dev/null +++ b/tests/odt/markdown/listBlocks.md @@ -0,0 +1,6 @@ + + +Indented text in a list. + + +This is a numbered block.It contains several paragraphs of text.Like this.Next item. \ No newline at end of file diff --git a/tests/odt/markdown/paragraph.md b/tests/odt/markdown/paragraph.md new file mode 100644 index 000000000..0a822e322 --- /dev/null +++ b/tests/odt/markdown/paragraph.md @@ -0,0 +1,5 @@ +This is a paragraph. + +This is another paragraph. + +This is a third one. \ No newline at end of file diff --git a/tests/odt/markdown/strikeout.md b/tests/odt/markdown/strikeout.md new file mode 100644 index 000000000..6ae4571dd --- /dev/null +++ b/tests/odt/markdown/strikeout.md @@ -0,0 +1 @@ +Here comes text that was ~~striken out~~. \ No newline at end of file diff --git a/tests/odt/markdown/trackedChanges.md b/tests/odt/markdown/trackedChanges.md new file mode 100644 index 000000000..f0bd478a3 --- /dev/null +++ b/tests/odt/markdown/trackedChanges.md @@ -0,0 +1 @@ +Some text with and inserted text. \ No newline at end of file diff --git a/tests/odt/markdown/underlined.md b/tests/odt/markdown/underlined.md new file mode 100644 index 000000000..05fb92379 --- /dev/null +++ b/tests/odt/markdown/underlined.md @@ -0,0 +1 @@ +Here comes *underlined* text \ No newline at end of file diff --git a/tests/odt/native/blockquote.native b/tests/odt/native/blockquote.native new file mode 100644 index 000000000..8c9409dde --- /dev/null +++ b/tests/odt/native/blockquote.native @@ -0,0 +1 @@ +[Para [Str "Normal"],BlockQuote [Para [Str "Indented",Space,Str "(1cm)"]]] \ No newline at end of file diff --git a/tests/odt/native/orderedListMixed.native b/tests/odt/native/orderedListMixed.native new file mode 100644 index 000000000..a50f5e2ad --- /dev/null +++ b/tests/odt/native/orderedListMixed.native @@ -0,0 +1 @@ +Pandoc (Meta {unMeta = fromList []}) [OrderedList (1,Decimal,Period) [[Plain [Str "A",Space,Str "list",Space,Str "item"]],[Plain [Str "A",Space,Str "second"]],[Para [Str "A",Space,Str "third"],OrderedList (1,Decimal,Period) [[Para [Str "New",Space,Str "level!"],OrderedList (1,LowerAlpha,OneParen) [[Plain [Str "And",Space,Str "another!"]],[Plain [Str "It's",Space,Str "great",Space,Str "up",Space,Str "here!"]]]],[Plain [Str "Oh",Space,Str "noes"]],[Plain [Str "We",Space,Str "fell!"]]]],[Plain [Str "Maybe",Space,Str "someone"]],[Plain [Str "Pushed",Space,Str "us?"]]]] \ No newline at end of file diff --git a/tests/odt/native/orderedListRoman.native b/tests/odt/native/orderedListRoman.native new file mode 100644 index 000000000..f186e0735 --- /dev/null +++ b/tests/odt/native/orderedListRoman.native @@ -0,0 +1 @@ +Pandoc (Meta {unMeta = fromList []}) [OrderedList (1,UpperRoman,Period) [[Plain[Str "A",Space,Str "list",Space,Str "item"]],[Plain [Str "A",Space,Str "second"]],[Para [Str "A",Space,Str "third"],OrderedList (1,UpperRoman,Period) [[Para [Str "New",Space,Str "level!"],OrderedList (1,UpperRoman,Period) [[Plain [Str "And",Space,Str "another!"]],[Plain [Str "It's",Space,Str "great",Space,Str "up",Space,Str "here!"]]]],[Plain [Str "Oh",Space,Str "noes"]],[Plain [Str "We",Space,Str "fell!"]]]],[Plain [Str "Maybe",Space,Str "someone"]],[Plain [Str "Pushed",Space,Str "us?"]]]] \ No newline at end of file diff --git a/tests/odt/native/orderedListSimple.native b/tests/odt/native/orderedListSimple.native new file mode 100644 index 000000000..90a51856a --- /dev/null +++ b/tests/odt/native/orderedListSimple.native @@ -0,0 +1 @@ +Pandoc (Meta {unMeta = fromList []}) [OrderedList (1,Decimal,Period) [[Plain [Str "A",Space,Str "list",Space,Str "item"]],[Plain [Str "A",Space,Str "second"]],[Para [Str "A",Space,Str "third"],OrderedList (1,Decimal,Period) [[Para [Str "New",Space,Str "level!"],OrderedList (1,Decimal,Period) [[Plain [Str "And",Space,Str "another!"]],[Plain [Str "It's",Space,Str "great",Space,Str "up",Space,Str "here!"]]]],[Plain [Str "Oh",Space,Str "noes"]],[Plain [Str "We",Space,Str "fell!"]]]],[Plain [Str "Maybe",Space,Str "someone"]],[Plain [Str "Pushed",Space,Str "us?"]]]] \ No newline at end of file diff --git a/tests/odt/native/referenceToChapter.native b/tests/odt/native/referenceToChapter.native new file mode 100644 index 000000000..fc6c6cf5c --- /dev/null +++ b/tests/odt/native/referenceToChapter.native @@ -0,0 +1 @@ +[Header 1 ("a-chapter",[],[]) [Span ("anchor",[],[]) [],Str "A",Space,Str "chapter"],Para [Str "Some",Space,Str "text."],Header 1 ("another-chapter",[],[]) [Str "Another",Space,Str "chapter"],Para [Str "A",Space,Str "reference",Space,Str "to",Space,Str "."],Para [Str "A",Space,Str "reference",Space,Str "to",Space,Link [Str "A",Space,Str "chapter"] ("#anchor",""),Str "."]] \ No newline at end of file diff --git a/tests/odt/native/referenceToListItem.native b/tests/odt/native/referenceToListItem.native new file mode 100644 index 000000000..d009f8d23 --- /dev/null +++ b/tests/odt/native/referenceToListItem.native @@ -0,0 +1 @@ +[OrderedList (1,Decimal,Period) [[Plain [Span ("anchor",[],[]) [],Str "A",Space,Str "list",Space,Str "item"]],[Plain [Str "Another",Space,Str "list",Space,Str "item"]]],Para [Str "A",Space,Str "reference",Space,Str "to",Space,Str "list",Space,Str "item",Space,Link [Str "1."] ("#anchor",""),Str "."],Para [],Para []] \ No newline at end of file diff --git a/tests/odt/native/referenceToText.native b/tests/odt/native/referenceToText.native new file mode 100644 index 000000000..45f7ac44c --- /dev/null +++ b/tests/odt/native/referenceToText.native @@ -0,0 +1 @@ +[Para [Span ("an anchor",[],[]) [],Str "Some",Space,Str "text."],Para [Str "A",Space,Str "reference",Space,Str "to",Space,Link [Str "Some",Space,Str "text"] ("#an anchor",""),Str "."]] \ No newline at end of file diff --git a/tests/odt/native/simpleTable.native b/tests/odt/native/simpleTable.native new file mode 100644 index 000000000..f94026bd5 --- /dev/null +++ b/tests/odt/native/simpleTable.native @@ -0,0 +1 @@ +[Table [] [] [] [] [[[Plain [Str "Content"]],[Plain [Str "More",Space,Str "content"]]]],Para []] \ No newline at end of file diff --git a/tests/odt/native/unicode.native b/tests/odt/native/unicode.native new file mode 100644 index 000000000..b6ac9760c --- /dev/null +++ b/tests/odt/native/unicode.native @@ -0,0 +1 @@ +[Para [Str "\8220\8221\8217\231\1256\169\188\1074\1073\1060\1064\246\201\181"]] \ No newline at end of file diff --git a/tests/odt/native/unorderedList.native b/tests/odt/native/unorderedList.native new file mode 100644 index 000000000..a8c083d13 --- /dev/null +++ b/tests/odt/native/unorderedList.native @@ -0,0 +1 @@ +[BulletList [[Plain [Str "A",Space,Str "list",Space,Str "item"]],[Plain [Str "A",Space,Str "second"]],[Para [Str "A",Space,Str "third"],BulletList [[Para [Str "New",Space,Str "level!"],BulletList [[Plain [Str "And",Space,Str "another!"]],[Plain [Str "It's",Space,Str "great",Space,Str "up",Space,Str "here!"]]]],[Plain [Str "Oh",Space,Str "noes"]],[Plain [Str "We",Space,Str "fell!"]]]],[Plain [Str "Maybe",Space,Str "someone"]],[Plain [Str "Pushed",Space,Str "us?"]]]] \ No newline at end of file diff --git a/tests/odt/odt/blockquote.odt b/tests/odt/odt/blockquote.odt new file mode 100644 index 000000000..0114e308f Binary files /dev/null and b/tests/odt/odt/blockquote.odt differ diff --git a/tests/odt/odt/bold.odt b/tests/odt/odt/bold.odt new file mode 100644 index 000000000..49285722e Binary files /dev/null and b/tests/odt/odt/bold.odt differ diff --git a/tests/odt/odt/citation.odt b/tests/odt/odt/citation.odt new file mode 100644 index 000000000..b6dbe649e Binary files /dev/null and b/tests/odt/odt/citation.odt differ diff --git a/tests/odt/odt/endnote.odt b/tests/odt/odt/endnote.odt new file mode 100644 index 000000000..c1aba45da Binary files /dev/null and b/tests/odt/odt/endnote.odt differ diff --git a/tests/odt/odt/expression.odt b/tests/odt/odt/expression.odt new file mode 100644 index 000000000..1085d7008 Binary files /dev/null and b/tests/odt/odt/expression.odt differ diff --git a/tests/odt/odt/expressionUnevaluated.odt b/tests/odt/odt/expressionUnevaluated.odt new file mode 100644 index 000000000..64df660b6 Binary files /dev/null and b/tests/odt/odt/expressionUnevaluated.odt differ diff --git a/tests/odt/odt/externalLink.odt b/tests/odt/odt/externalLink.odt new file mode 100644 index 000000000..1d8f55489 Binary files /dev/null and b/tests/odt/odt/externalLink.odt differ diff --git a/tests/odt/odt/footnote.odt b/tests/odt/odt/footnote.odt new file mode 100644 index 000000000..74915c33c Binary files /dev/null and b/tests/odt/odt/footnote.odt differ diff --git a/tests/odt/odt/formula.odt b/tests/odt/odt/formula.odt new file mode 100644 index 000000000..5cf5f3451 Binary files /dev/null and b/tests/odt/odt/formula.odt differ diff --git a/tests/odt/odt/headers.odt b/tests/odt/odt/headers.odt new file mode 100644 index 000000000..9212e9fb1 Binary files /dev/null and b/tests/odt/odt/headers.odt differ diff --git a/tests/odt/odt/hiddenTextByStyle.odt b/tests/odt/odt/hiddenTextByStyle.odt new file mode 100644 index 000000000..79c40ca98 Binary files /dev/null and b/tests/odt/odt/hiddenTextByStyle.odt differ diff --git a/tests/odt/odt/hiddenTextByVariable.odt b/tests/odt/odt/hiddenTextByVariable.odt new file mode 100644 index 000000000..ec793d466 Binary files /dev/null and b/tests/odt/odt/hiddenTextByVariable.odt differ diff --git a/tests/odt/odt/horizontalRule.odt b/tests/odt/odt/horizontalRule.odt new file mode 100644 index 000000000..df09386bc Binary files /dev/null and b/tests/odt/odt/horizontalRule.odt differ diff --git a/tests/odt/odt/image.odt b/tests/odt/odt/image.odt new file mode 100644 index 000000000..c2fd1e407 Binary files /dev/null and b/tests/odt/odt/image.odt differ diff --git a/tests/odt/odt/imageIndex.odt b/tests/odt/odt/imageIndex.odt new file mode 100644 index 000000000..220a49047 Binary files /dev/null and b/tests/odt/odt/imageIndex.odt differ diff --git a/tests/odt/odt/imageWithCaption.odt b/tests/odt/odt/imageWithCaption.odt new file mode 100644 index 000000000..99b5b7af1 Binary files /dev/null and b/tests/odt/odt/imageWithCaption.odt differ diff --git a/tests/odt/odt/italic.odt b/tests/odt/odt/italic.odt new file mode 100644 index 000000000..d05cfeade Binary files /dev/null and b/tests/odt/odt/italic.odt differ diff --git a/tests/odt/odt/listBlocks.odt b/tests/odt/odt/listBlocks.odt new file mode 100644 index 000000000..5855e9920 Binary files /dev/null and b/tests/odt/odt/listBlocks.odt differ diff --git a/tests/odt/odt/orderedListMixed.odt b/tests/odt/odt/orderedListMixed.odt new file mode 100644 index 000000000..1aa50ff06 Binary files /dev/null and b/tests/odt/odt/orderedListMixed.odt differ diff --git a/tests/odt/odt/orderedListRoman.odt b/tests/odt/odt/orderedListRoman.odt new file mode 100644 index 000000000..7266e89bc Binary files /dev/null and b/tests/odt/odt/orderedListRoman.odt differ diff --git a/tests/odt/odt/orderedListSimple.odt b/tests/odt/odt/orderedListSimple.odt new file mode 100644 index 000000000..4fe543794 Binary files /dev/null and b/tests/odt/odt/orderedListSimple.odt differ diff --git a/tests/odt/odt/paragraph.odt b/tests/odt/odt/paragraph.odt new file mode 100644 index 000000000..b635b2164 Binary files /dev/null and b/tests/odt/odt/paragraph.odt differ diff --git a/tests/odt/odt/referenceAllInOne.odt b/tests/odt/odt/referenceAllInOne.odt new file mode 100644 index 000000000..e9f38e359 Binary files /dev/null and b/tests/odt/odt/referenceAllInOne.odt differ diff --git a/tests/odt/odt/referenceToChapter.odt b/tests/odt/odt/referenceToChapter.odt new file mode 100644 index 000000000..d4be67b0b Binary files /dev/null and b/tests/odt/odt/referenceToChapter.odt differ diff --git a/tests/odt/odt/referenceToListItem.odt b/tests/odt/odt/referenceToListItem.odt new file mode 100644 index 000000000..be3aed245 Binary files /dev/null and b/tests/odt/odt/referenceToListItem.odt differ diff --git a/tests/odt/odt/referenceToText.odt b/tests/odt/odt/referenceToText.odt new file mode 100644 index 000000000..281055aa0 Binary files /dev/null and b/tests/odt/odt/referenceToText.odt differ diff --git a/tests/odt/odt/simpleTable.odt b/tests/odt/odt/simpleTable.odt new file mode 100644 index 000000000..a00622918 Binary files /dev/null and b/tests/odt/odt/simpleTable.odt differ diff --git a/tests/odt/odt/strikeout.odt b/tests/odt/odt/strikeout.odt new file mode 100644 index 000000000..3a3f1543a Binary files /dev/null and b/tests/odt/odt/strikeout.odt differ diff --git a/tests/odt/odt/table.odt b/tests/odt/odt/table.odt new file mode 100644 index 000000000..7a2b1cfae Binary files /dev/null and b/tests/odt/odt/table.odt differ diff --git a/tests/odt/odt/tableWithCaption.odt b/tests/odt/odt/tableWithCaption.odt new file mode 100644 index 000000000..d44654460 Binary files /dev/null and b/tests/odt/odt/tableWithCaption.odt differ diff --git a/tests/odt/odt/trackedChanges.odt b/tests/odt/odt/trackedChanges.odt new file mode 100644 index 000000000..5ac493ed7 Binary files /dev/null and b/tests/odt/odt/trackedChanges.odt differ diff --git a/tests/odt/odt/underlined.odt b/tests/odt/odt/underlined.odt new file mode 100644 index 000000000..d645717b8 Binary files /dev/null and b/tests/odt/odt/underlined.odt differ diff --git a/tests/odt/odt/unicode.odt b/tests/odt/odt/unicode.odt new file mode 100644 index 000000000..07e200425 Binary files /dev/null and b/tests/odt/odt/unicode.odt differ diff --git a/tests/odt/odt/unorderedList.odt b/tests/odt/odt/unorderedList.odt new file mode 100644 index 000000000..50a950024 Binary files /dev/null and b/tests/odt/odt/unorderedList.odt differ diff --git a/tests/odt/odt/variable.odt b/tests/odt/odt/variable.odt new file mode 100644 index 000000000..73ff5f648 Binary files /dev/null and b/tests/odt/odt/variable.odt differ diff --git a/tests/test-pandoc.hs b/tests/test-pandoc.hs index 9bc26416f..f7c2f0c1f 100644 --- a/tests/test-pandoc.hs +++ b/tests/test-pandoc.hs @@ -11,6 +11,7 @@ import qualified Tests.Readers.Org import qualified Tests.Readers.HTML import qualified Tests.Readers.RST import qualified Tests.Readers.Docx +import qualified Tests.Readers.Odt import qualified Tests.Readers.Txt2Tags import qualified Tests.Readers.EPUB import qualified Tests.Writers.ConTeXt @@ -51,6 +52,7 @@ tests = [ testGroup "Old" Tests.Old.tests , testGroup "Org" Tests.Readers.Org.tests , testGroup "RST" Tests.Readers.RST.tests , testGroup "Docx" Tests.Readers.Docx.tests + , testGroup "Odt" Tests.Readers.Odt.tests , testGroup "Txt2Tags" Tests.Readers.Txt2Tags.tests , testGroup "EPUB" Tests.Readers.EPUB.tests ]