Use utf8-string's System.IO.UTF8 to replace Text.Pandoc.UTF8.

+ removed Text/Pandoc/UTF8.hs
+ removed UTF8.hs notice from debian/copyright
+ adjusted main.hs, Text/Pandoc.hs, and Text/Pandoc/ODT.hs to use
  System.IO.UTF8 instead of Text.Pandoc.UTF8
+ Added dependency on utf8-string to pandoc.cabal


git-svn-id: https://pandoc.googlecode.com/svn/trunk@1347 788f1e2b-df1e-0410-8736-df70ead52e1b
This commit is contained in:
fiddlosopher 2008-07-31 23:16:31 +00:00
parent 774dc0776d
commit ebcebccc32
6 changed files with 18 additions and 101 deletions

22
Main.hs
View file

@ -30,7 +30,6 @@ writers.
-}
module Main where
import Text.Pandoc
import Text.Pandoc.UTF8
import Text.Pandoc.ODT
import Text.Pandoc.Shared ( joinWithSep, HTMLMathMethod (..) )
import Text.Pandoc.Highlighting ( languages )
@ -38,7 +37,9 @@ import System.Environment ( getArgs, getProgName, getEnvironment )
import System.Exit ( exitWith, ExitCode (..) )
import System.FilePath ( takeExtension, takeDirectory )
import System.Console.GetOpt
import System.IO
import Prelude hiding ( putStrLn, writeFile, readFile, getContents )
import System.IO ( stdout, stderr )
import System.IO.UTF8
import Data.Maybe ( fromMaybe )
import Data.Char ( toLower )
import Control.Monad ( (>>=) )
@ -275,7 +276,7 @@ options =
(\arg opt -> do
let old = optIncludeInHeader opt
text <- readFile arg
return opt { optIncludeInHeader = old ++ fromUTF8 text,
return opt { optIncludeInHeader = old ++ text,
optStandalone = True })
"FILENAME")
"" -- "File to include at end of header (implies -s)"
@ -285,7 +286,7 @@ options =
(\arg opt -> do
let old = optIncludeBeforeBody opt
text <- readFile arg
return opt { optIncludeBeforeBody = old ++ fromUTF8 text })
return opt { optIncludeBeforeBody = old ++ text })
"FILENAME")
"" -- "File to include before document body"
@ -294,7 +295,7 @@ options =
(\arg opt -> do
let old = optIncludeAfterBody opt
text <- readFile arg
return opt { optIncludeAfterBody = old ++ fromUTF8 text })
return opt { optIncludeAfterBody = old ++ text })
"FILENAME")
"" -- "File to include after document body"
@ -302,7 +303,7 @@ options =
(ReqArg
(\arg opt -> do
text <- readFile arg
return opt { optCustomHeader = fromUTF8 text,
return opt { optCustomHeader = text,
optStandalone = True })
"FILENAME")
"" -- "File to use for custom header (implies -s)"
@ -555,10 +556,11 @@ main = do
then putStrLn
else writeFile outputFile . (++ "\n")
(readSources sources) >>= writeOutput . toUTF8 .
(writer writerOptions) .
(reader startParserState) . tabFilter tabStop .
fromUTF8 . (joinWithSep "\n")
(readSources sources) >>= writeOutput .
writer writerOptions .
reader startParserState .
tabFilter tabStop .
joinWithSep "\n"
where
readSources [] = mapM readSource ["-"]

View file

@ -36,13 +36,14 @@ inline links:
> module Main where
> import Text.Pandoc
>
> import qualified System.IO.UTF8 as U
>
> markdownToRST :: String -> String
> markdownToRST = toUTF8 .
> markdownToRST =
> (writeRST defaultWriterOptions {writerReferenceLinks = True}) .
> (readMarkdown defaultParserState) . fromUTF8
> readMarkdown defaultParserState
>
> main = interact markdownToRST
> main = U.getContents >>= U.putStrLn . markdownToRST
-}
@ -84,8 +85,6 @@ module Text.Pandoc
, defaultWriterOptions
-- * Default headers for various output formats
, module Text.Pandoc.DefaultHeaders
-- * Functions for converting to and from UTF-8
, module Text.Pandoc.UTF8
-- * Version
, pandocVersion
) where
@ -108,7 +107,6 @@ import Text.Pandoc.Writers.Man
import Text.Pandoc.Writers.RTF
import Text.Pandoc.Writers.MediaWiki
import Text.Pandoc.DefaultHeaders
import Text.Pandoc.UTF8
import Text.Pandoc.Shared
-- | Version number of pandoc library.

View file

@ -92,7 +92,6 @@ handlePictures tempODT sourceDirRelative xml = do
Just x -> x
cursor' <- scanPictures tempODT sourceDirRelative cursor
let modified = parsed { elContent = toForest $ root cursor' }
putStrLn $ showTopElement modified
return $ showTopElement modified
scanPictures :: FilePath -> FilePath -> Cursor -> IO Cursor

View file

@ -1,45 +0,0 @@
-- | Functions for converting Unicode strings to UTF-8 and vice versa.
--
-- Taken from <http://www.cse.ogi.edu/~hallgren/Talks/LHiH/base/lib/UTF8.hs>.
-- (c) 2003, OGI School of Science & Engineering, Oregon Health and
-- Science University.
--
-- Modified by Martin Norbaeck
-- to pass illegal UTF-8 sequences through unchanged.
module Text.Pandoc.UTF8 (
fromUTF8,
toUTF8
) where
-- From the Char module supplied with HBC.
-- | Take a UTF-8 string and decode it into a Unicode string.
fromUTF8 :: String -> String
fromUTF8 "" = ""
fromUTF8 ('\xef':'\xbb':'\xbf':cs) = fromUTF8 cs -- skip BOM (byte order marker)
fromUTF8 (c:c':cs) | '\xc0' <= c && c <= '\xdf' &&
'\x80' <= c' && c' <= '\xbf' =
toEnum ((fromEnum c `mod` 0x20) * 0x40 + fromEnum c' `mod` 0x40) : fromUTF8 cs
fromUTF8 (c:c':c'':cs) | '\xe0' <= c && c <= '\xef' &&
'\x80' <= c' && c' <= '\xbf' &&
'\x80' <= c'' && c'' <= '\xbf' =
toEnum ((fromEnum c `mod` 0x10 * 0x1000) + (fromEnum c' `mod` 0x40) * 0x40 + fromEnum c'' `mod` 0x40) : fromUTF8 cs
fromUTF8 (c:cs) = c : fromUTF8 cs
-- | Take a Unicode string and encode it as a UTF-8 string.
toUTF8 :: String -> String
toUTF8 "" = ""
toUTF8 (c:cs) =
if c > '\x0000' && c < '\x0080' then
c : toUTF8 cs
else if c < toEnum 0x0800 then
let i = fromEnum c
in toEnum (0xc0 + i `div` 0x40) :
toEnum (0x80 + i `mod` 0x40) :
toUTF8 cs
else
let i = fromEnum c
in toEnum (0xe0 + i `div` 0x1000) :
toEnum (0x80 + (i `mod` 0x1000) `div` 0x40) :
toEnum (0x80 + i `mod` 0x40) :
toUTF8 cs

36
debian/copyright vendored
View file

@ -37,42 +37,6 @@ Copyright (C) 2008 John MacFarlane and Peter Wang
Released under the GPL.
----------------------------------------------------------------------
UTF8.hs
Copyright (c) 2003, OGI School of Science & Engineering, Oregon Health &
Science University, All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
- Neither the name of OGI or OHSU nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
This version of UTF8.hs was Modified by Martin Norbäck, to pass illegal
utf-8 sequences through unchanged.
----------------------------------------------------------------------
ASCIIMathML.js
Copyright 2005, Peter Jipsen, Chapman University

View file

@ -80,14 +80,13 @@ Library
Build-depends: highlighting-kate
cpp-options: -DHIGHLIGHTING
Build-Depends: parsec < 3, xhtml, mtl, network, filepath, process, directory, xml,
bytestring, binary
bytestring, binary, utf8-string
Hs-Source-Dirs: .
Exposed-Modules: Text.Pandoc,
Text.Pandoc.Blocks,
Text.Pandoc.Definition,
Text.Pandoc.CharacterReferences,
Text.Pandoc.Shared,
Text.Pandoc.UTF8,
Text.Pandoc.ODT,
Text.Pandoc.ASCIIMathML,
Text.Pandoc.DefaultHeaders,