Use utf8-string's System.IO.UTF8 to replace Text.Pandoc.UTF8.

+ removed Text/Pandoc/UTF8.hs
+ removed UTF8.hs notice from debian/copyright
+ adjusted main.hs, Text/Pandoc.hs, and Text/Pandoc/ODT.hs to use
  System.IO.UTF8 instead of Text.Pandoc.UTF8
+ Added dependency on utf8-string to pandoc.cabal


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1347 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2008-07-31 23:16:31 +00:00
parent 774dc0776d
commit ebcebccc32
6 changed files with 18 additions and 101 deletions

22
Main.hs
View file

@ -30,7 +30,6 @@ writers.
-} -}
module Main where module Main where
import Text.Pandoc import Text.Pandoc
import Text.Pandoc.UTF8
import Text.Pandoc.ODT import Text.Pandoc.ODT
import Text.Pandoc.Shared ( joinWithSep, HTMLMathMethod (..) ) import Text.Pandoc.Shared ( joinWithSep, HTMLMathMethod (..) )
import Text.Pandoc.Highlighting ( languages ) import Text.Pandoc.Highlighting ( languages )
@ -38,7 +37,9 @@ import System.Environment ( getArgs, getProgName, getEnvironment )
import System.Exit ( exitWith, ExitCode (..) ) import System.Exit ( exitWith, ExitCode (..) )
import System.FilePath ( takeExtension, takeDirectory ) import System.FilePath ( takeExtension, takeDirectory )
import System.Console.GetOpt import System.Console.GetOpt
import System.IO import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
import System.IO ( stdout, stderr )
import System.IO.UTF8
import Data.Maybe ( fromMaybe ) import Data.Maybe ( fromMaybe )
import Data.Char ( toLower ) import Data.Char ( toLower )
import Control.Monad ( (>>=) ) import Control.Monad ( (>>=) )
@ -275,7 +276,7 @@ options =
(\arg opt -> do (\arg opt -> do
let old = optIncludeInHeader opt let old = optIncludeInHeader opt
text <- readFile arg text <- readFile arg
return opt { optIncludeInHeader = old ++ fromUTF8 text, return opt { optIncludeInHeader = old ++ text,
optStandalone = True }) optStandalone = True })
"FILENAME") "FILENAME")
"" -- "File to include at end of header (implies -s)" "" -- "File to include at end of header (implies -s)"
@ -285,7 +286,7 @@ options =
(\arg opt -> do (\arg opt -> do
let old = optIncludeBeforeBody opt let old = optIncludeBeforeBody opt
text <- readFile arg text <- readFile arg
return opt { optIncludeBeforeBody = old ++ fromUTF8 text }) return opt { optIncludeBeforeBody = old ++ text })
"FILENAME") "FILENAME")
"" -- "File to include before document body" "" -- "File to include before document body"
@ -294,7 +295,7 @@ options =
(\arg opt -> do (\arg opt -> do
let old = optIncludeAfterBody opt let old = optIncludeAfterBody opt
text <- readFile arg text <- readFile arg
return opt { optIncludeAfterBody = old ++ fromUTF8 text }) return opt { optIncludeAfterBody = old ++ text })
"FILENAME") "FILENAME")
"" -- "File to include after document body" "" -- "File to include after document body"
@ -302,7 +303,7 @@ options =
(ReqArg (ReqArg
(\arg opt -> do (\arg opt -> do
text <- readFile arg text <- readFile arg
return opt { optCustomHeader = fromUTF8 text, return opt { optCustomHeader = text,
optStandalone = True }) optStandalone = True })
"FILENAME") "FILENAME")
"" -- "File to use for custom header (implies -s)" "" -- "File to use for custom header (implies -s)"
@ -555,10 +556,11 @@ main = do
then putStrLn then putStrLn
else writeFile outputFile . (++ "\n") else writeFile outputFile . (++ "\n")
(readSources sources) >>= writeOutput . toUTF8 . (readSources sources) >>= writeOutput .
(writer writerOptions) . writer writerOptions .
(reader startParserState) . tabFilter tabStop . reader startParserState .
fromUTF8 . (joinWithSep "\n") tabFilter tabStop .
joinWithSep "\n"
where where
readSources [] = mapM readSource ["-"] readSources [] = mapM readSource ["-"]

View file

@ -36,13 +36,14 @@ inline links:
> module Main where > module Main where
> import Text.Pandoc > import Text.Pandoc
> > import qualified System.IO.UTF8 as U
>
> markdownToRST :: String -> String > markdownToRST :: String -> String
> markdownToRST = toUTF8 . > markdownToRST =
> (writeRST defaultWriterOptions {writerReferenceLinks = True}) . > (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
> (readMarkdown defaultParserState) . fromUTF8 > readMarkdown defaultParserState
> >
> main = interact markdownToRST > main = U.getContents >>= U.putStrLn . markdownToRST
-} -}
@ -84,8 +85,6 @@ module Text.Pandoc
, defaultWriterOptions , defaultWriterOptions
-- * Default headers for various output formats -- * Default headers for various output formats
, module Text.Pandoc.DefaultHeaders , module Text.Pandoc.DefaultHeaders
-- * Functions for converting to and from UTF-8
, module Text.Pandoc.UTF8
-- * Version -- * Version
, pandocVersion , pandocVersion
) where ) where
@ -108,7 +107,6 @@ import Text.Pandoc.Writers.Man
import Text.Pandoc.Writers.RTF import Text.Pandoc.Writers.RTF
import Text.Pandoc.Writers.MediaWiki import Text.Pandoc.Writers.MediaWiki
import Text.Pandoc.DefaultHeaders import Text.Pandoc.DefaultHeaders
import Text.Pandoc.UTF8
import Text.Pandoc.Shared import Text.Pandoc.Shared
-- | Version number of pandoc library. -- | Version number of pandoc library.

View file

@ -92,7 +92,6 @@ handlePictures tempODT sourceDirRelative xml = do
Just x -> x Just x -> x
cursor' <- scanPictures tempODT sourceDirRelative cursor cursor' <- scanPictures tempODT sourceDirRelative cursor
let modified = parsed { elContent = toForest $ root cursor' } let modified = parsed { elContent = toForest $ root cursor' }
putStrLn $ showTopElement modified
return $ showTopElement modified return $ showTopElement modified
scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor

View file

@ -1,45 +0,0 @@
-- | Functions for converting Unicode strings to UTF-8 and vice versa.
--
-- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>.
-- (c) 2003, OGI School of Science & Engineering, Oregon Health and
-- Science University.
--
-- Modified by Martin Norbaeck
-- to pass illegal UTF-8 sequences through unchanged.
module Text.Pandoc.UTF8 (
fromUTF8,
toUTF8
) where
-- From the Char module supplied with HBC.
-- | Take a UTF-8 string and decode it into a Unicode string.
fromUTF8 :: String -> String
fromUTF8 "" = ""
fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker)
fromUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
'\x80' <= c' && c' <= '\xbf' =
toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs
fromUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' &&
'\x80' <= c' && c' <= '\xbf' &&
'\x80' <= c'' && c'' <= '\xbf' =
toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : fromUTF8 cs
fromUTF8 (c:cs) = c : fromUTF8 cs
-- | Take a Unicode string and encode it as a UTF-8 string.
toUTF8 :: String -> String
toUTF8 "" = ""
toUTF8 (c:cs) =
if c > '\x0000' && c < '\x0080' then
c : toUTF8 cs
else if c < toEnum 0x0800 then
let i = fromEnum c
in toEnum (0xc0 + i `div` 0x40) :
toEnum (0x80 + i `mod` 0x40) :
toUTF8 cs
else
let i = fromEnum c
in toEnum (0xe0 + i `div` 0x1000) :
toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) :
toEnum (0x80 + i `mod` 0x40) :
toUTF8 cs

36
debian/copyright vendored
View file

@ -37,42 +37,6 @@ Copyright (C) 2008 John MacFarlane and Peter Wang
Released under the GPL. Released under the GPL.
----------------------------------------------------------------------
UTF8.hs
Copyright (c) 2003, OGI School of Science & Engineering, Oregon Health &
Science University, All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
- Neither the name of OGI or OHSU nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This version of UTF8.hs was Modified by Martin Norbäck, to pass illegal
utf-8 sequences through unchanged.
---------------------------------------------------------------------- ----------------------------------------------------------------------
ASCIIMathML.js ASCIIMathML.js
Copyright 2005, Peter Jipsen, Chapman University Copyright 2005, Peter Jipsen, Chapman University

View file

@ -80,14 +80,13 @@ Library
Build-depends: highlighting-kate Build-depends: highlighting-kate
cpp-options: -DHIGHLIGHTING cpp-options: -DHIGHLIGHTING
Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, xml, Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, xml,
bytestring, binary bytestring, binary, utf8-string
Hs-Source-Dirs: . Hs-Source-Dirs: .
Exposed-Modules: Text.Pandoc, Exposed-Modules: Text.Pandoc,
Text.Pandoc.Blocks, Text.Pandoc.Blocks,
Text.Pandoc.Definition, Text.Pandoc.Definition,
Text.Pandoc.CharacterReferences, Text.Pandoc.CharacterReferences,
Text.Pandoc.Shared, Text.Pandoc.Shared,
Text.Pandoc.UTF8,
Text.Pandoc.ODT, Text.Pandoc.ODT,
Text.Pandoc.ASCIIMathML, Text.Pandoc.ASCIIMathML,
Text.Pandoc.DefaultHeaders, Text.Pandoc.DefaultHeaders,