Compare commits

..

No commits in common. "master" and "0.6.7" have entirely different histories.

18 changed files with 63 additions and 15397 deletions

22
.gitignore vendored
View File

@ -1,22 +0,0 @@
### Haskell ###
dist
dist-*
cabal-dev
*.o
*.hi
*.chi
*.chs.h
*.dyn_o
*.dyn_hi
.hpc
.hsenv
.cabal-sandbox/
cabal.sandbox.config
*.prof
*.aux
*.hp
*.eventlog
.stack-work/
cabal.project.local
.HTF/

View File

@ -1,4 +1,4 @@
{-# LANGUAGE MagicHash,BangPatterns #-}
{-# LANGUAGE MagicHash #-}
module Data.Array.Static where
import Data.Static
@ -11,5 +11,5 @@ bounds :: Ix i => StaticArray i e -> (i,i)
bounds (StaticArray s e _) = (s,e)
(!) :: (StaticElement e,Ix i) => StaticArray i e -> i -> e
(!) (StaticArray s e addr) i = let !(I# ri) = index (s,e) i
(!) (StaticArray s e addr) i = let (I# ri) = index (s,e) i
in extract addr ri

View File

@ -76,7 +76,6 @@ import Data.Encoding.MacOSRoman
import Data.Encoding.JISX0201
import Data.Encoding.JISX0208
import Data.Encoding.ISO2022JP
import Data.Encoding.ShiftJIS
import Data.Encoding.CP437
import Data.Encoding.CP737
import Data.Encoding.CP775
@ -93,7 +92,6 @@ import Data.Encoding.CP865
import Data.Encoding.CP866
import Data.Encoding.CP869
import Data.Encoding.CP874
import Data.Encoding.CP932
import Data.Char
import Text.Regex
@ -329,9 +327,6 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of
"jis_x_0208" -> Just $ DynEncoding JISX0208
-- ISO 2022-JP
"iso_2022_jp" -> Just $ DynEncoding ISO2022JP
-- Shift JIS
"shift_jis" -> Just $ DynEncoding ShiftJIS
"sjis" -> Just $ DynEncoding ShiftJIS
-- MSDOS codepages
"cp437" -> Just $ DynEncoding CP437
"cp737" -> Just $ DynEncoding CP737
@ -349,7 +344,6 @@ encodingFromStringExplicit codeName = case (normalizeEncoding codeName) of
"cp866" -> Just $ DynEncoding CP866
"cp869" -> Just $ DynEncoding CP869
"cp874" -> Just $ DynEncoding CP874
"cp932" -> Just $ DynEncoding CP932
-- defaults to nothing
_ -> Nothing
where

View File

@ -11,7 +11,6 @@ import Data.Word
import Data.Foldable (toList)
import Control.Throws
import Control.Exception.Extensible
import Control.Applicative
import Control.Monad.State
import Control.Monad.Identity
import Control.Monad.Reader
@ -81,13 +80,6 @@ instance ByteSink PutM where
newtype PutME a = PutME (Either EncodingException (PutM (),a))
instance Functor PutME where
fmap = liftM
instance Applicative PutME where
pure = return
(<*>) = ap
instance Monad PutME where
return x = PutME $ Right (return (),x)
(PutME x) >>= g = PutME $ do
@ -122,13 +114,6 @@ instance (Monad m,Throws EncodingException m) => ByteSink (StateT (Seq Char) m)
newtype StrictSink a = StrictS (Ptr Word8 -> Int -> Int -> IO (a,Ptr Word8,Int,Int))
instance Functor StrictSink where
fmap = liftM
instance Applicative StrictSink where
pure = return
(<*>) = ap
instance Monad StrictSink where
return x = StrictS $ \cstr pos max -> return (x,cstr,pos,max)
(StrictS f) >>= g = StrictS (\cstr pos max -> do
@ -155,13 +140,6 @@ instance ByteSink StrictSink where
newtype StrictSinkE a = StrictSinkE (StrictSink (Either EncodingException a))
instance Functor StrictSinkE where
fmap = liftM
instance Applicative StrictSinkE where
pure = return
(<*>) = ap
instance Monad StrictSinkE where
return = StrictSinkE . return . Right
(StrictSinkE s) >>= g = StrictSinkE $ do
@ -189,13 +167,6 @@ createStrict sink = createStrictWithLen sink 32
newtype StrictSinkExplicit a = StrictSinkExplicit (StrictSink (Either EncodingException a))
instance Functor StrictSinkExplicit where
fmap = liftM
instance Applicative StrictSinkExplicit where
pure = return
(<*>) = ap
instance Monad StrictSinkExplicit where
return = (StrictSinkExplicit).return.Right
(StrictSinkExplicit sink) >>= f

View File

@ -6,9 +6,7 @@ import Data.Encoding.Exception
import Data.Bits
import Data.Binary.Get
import Data.Char
import Data.Maybe
import Data.Word
import Control.Applicative as A
import Control.Monad.State
import Control.Monad.Identity
import Control.Monad.Reader
@ -21,9 +19,7 @@ import System.IO
class (Monad m,Throws DecodingException m) => ByteSource m where
sourceEmpty :: m Bool
fetchWord8 :: m Word8
-- 'fetchAhead act' should return the same thing 'act' does, but should
-- only consume input if 'act' returns a 'Just' value
fetchAhead :: m (Maybe a) -> m (Maybe a)
fetchAhead :: m a -> m a
fetchWord16be :: m Word16
fetchWord16be = do
w1 <- fetchWord8
@ -99,20 +95,7 @@ instance Throws DecodingException Get where
instance ByteSource Get where
sourceEmpty = isEmpty
fetchWord8 = getWord8
#if MIN_VERSION_binary(0,6,0)
fetchAhead act = (do
res <- act
case res of
Nothing -> A.empty
Just a -> return res
) <|> return Nothing
#else
fetchAhead act = do
res <- lookAhead act
case res of
Nothing -> return Nothing
Just a -> act
#endif
fetchAhead = lookAhead
fetchWord16be = getWord16be
fetchWord16le = getWord16le
fetchWord32be = getWord32be
@ -120,22 +103,20 @@ instance ByteSource Get where
fetchWord64be = getWord64be
fetchWord64le = getWord64le
fetchAheadState act = do
chs <- get
res <- act
when (isNothing res) (put chs)
return res
instance ByteSource (StateT [Char] Identity) where
sourceEmpty = gets null
fetchWord8 = do
chs <- get
case chs of
[] -> throwException UnexpectedEnd
[] -> throw UnexpectedEnd
c:cs -> do
put cs
return (fromIntegral $ ord c)
fetchAhead = fetchAheadState
fetchAhead act = do
chs <- get
res <- act
put chs
return res
#if MIN_VERSION_base(4,3,0)
#else
@ -154,21 +135,33 @@ instance ByteSource (StateT [Char] (Either DecodingException)) where
c:cs -> do
put cs
return (fromIntegral $ ord c)
fetchAhead = fetchAheadState
fetchAhead act = do
chs <- get
res <- act
put chs
return res
instance (Monad m,Throws DecodingException m) => ByteSource (StateT BS.ByteString m) where
sourceEmpty = gets BS.null
fetchWord8 = StateT (\str -> case BS.uncons str of
Nothing -> throwException UnexpectedEnd
Nothing -> throw UnexpectedEnd
Just (c,cs) -> return (c,cs))
fetchAhead = fetchAheadState
fetchAhead act = do
str <- get
res <- act
put str
return res
instance ByteSource (StateT LBS.ByteString (Either DecodingException)) where
sourceEmpty = gets LBS.null
fetchWord8 = StateT (\str -> case LBS.uncons str of
Nothing -> Left UnexpectedEnd
Just ns -> Right ns)
fetchAhead = fetchAheadState
fetchAhead act = do
chs <- get
res <- act
put chs
return res
instance ByteSource (ReaderT Handle IO) where
sourceEmpty = do
@ -183,5 +176,5 @@ instance ByteSource (ReaderT Handle IO) where
h <- ask
pos <- liftIO $ hGetPosn h
res <- act
when (isNothing res) (liftIO $ hSetPosn pos)
liftIO $ hSetPosn pos
return res

File diff suppressed because it is too large Load Diff

View File

@ -11,7 +11,7 @@ import Control.Monad.Identity
data EncodingException
= HasNoRepresentation Char -- ^ Thrown if a specific character
-- is not representable in an encoding.
deriving (Eq,Ord,Show,Read,Typeable)
deriving (Eq,Show,Typeable)
instance Exception EncodingException
@ -25,6 +25,6 @@ data DecodingException
| OutOfRange -- ^ the decoded value was out of the unicode range
| IllegalRepresentation [Word8] -- ^ The character sequence encodes a
-- character, but is illegal.
deriving (Eq,Ord,Show,Read,Typeable)
deriving (Eq,Show,Typeable)
instance Exception DecodingException

View File

@ -26,10 +26,11 @@ instance Encoding ISO2022JP where
encodeable _ c = encodeable ASCII c || encodeable JISX0201 c || encodeable JISX0208 c
instance ISO2022 ISO2022JP where
readEscape _ = fetchAhead $ do
w <- fetchWord8
readEscape _ = do
w <- fetchAhead fetchWord8
if w == 27
then (do
fetchWord8
w2 <- fetchWord8
w3 <- fetchWord8
case w2 of

View File

@ -61,38 +61,6 @@
# * Change mapping of 0xBD from U+2126 to its canonical
# decomposition, U+03A9.
0x00 0x0000 # NULL
0x01 0x0001 # START OF HEADING
0x02 0x0002 # START OF TEXT
0x03 0x0003 # END OF TEXT
0x04 0x0004 # END OF TRANSMISSION
0x05 0x0005 # ENQUIRY
0x06 0x0006 # ACKNOWLEDGE
0x07 0x0007 # BELL
0x08 0x0008 # BACKSPACE
0x09 0x0009 # HORIZONTAL TABULATION
0x0A 0x000A # LINE FEED
0x0B 0x000B # VERTICAL TABULATION
0x0C 0x000C # FORM FEED
0x0D 0x000D # CARRIAGE RETURN
0x0E 0x000E # SHIFT OUT
0x0F 0x000F # SHIFT IN
0x10 0x0010 # DATA LINK ESCAPE
0x11 0x0011 # DEVICE CONTROL ONE
0x12 0x0012 # DEVICE CONTROL TWO
0x13 0x0013 # DEVICE CONTROL THREE
0x14 0x0014 # DEVICE CONTROL FOUR
0x15 0x0015 # NEGATIVE ACKNOWLEDGE
0x16 0x0016 # SYNCHRONOUS IDLE
0x17 0x0017 # END OF TRANSMISSION BLOCK
0x18 0x0018 # CANCEL
0x19 0x0019 # END OF MEDIUM
0x1A 0x001A # SUBSTITUTE
0x1B 0x001B # ESCAPE
0x1C 0x001C # FILE SEPARATOR
0x1D 0x001D # GROUP SEPARATOR
0x1E 0x001E # RECORD SEPARATOR
0x1F 0x001F # UNIT SEPARATOR
0x20 0x0020 # SPACE
0x21 0x0021 # EXCLAMATION MARK
0x22 0x0022 # QUOTATION MARK
@ -188,7 +156,7 @@
0x7C 0x007C # VERTICAL LINE
0x7D 0x007D # RIGHT CURLY BRACKET
0x7E 0x007E # TILDE
0x7F 0x007F # DELETE
#
0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS
0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE
0x82 0x00C7 # LATIN CAPITAL LETTER C WITH CEDILLA

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
{-# LANGUAGE MagicHash,FlexibleInstances,BangPatterns,CPP #-}
{-# LANGUAGE MagicHash,FlexibleInstances,BangPatterns #-}
module Data.Static where
import GHC.Exts
@ -36,11 +36,7 @@ instance StaticElement Char where
instance StaticElement (Maybe Char) where
extract addr i = let !v = indexWord32OffAddr# addr i
#if __GLASGOW_HASKELL__ >= 708
in if isTrue# (eqWord# v (int2Word# 4294967295#)) -- -1 in Word32
#else
in if eqWord# v (int2Word# 4294967295#) -- -1 in Word32
#endif
then Nothing
else (if (I# (word2Int# v)) > 0x10FFFF
then error (show (I# (word2Int# v))++" is not a valid char ("++show (I# i)++")")

View File

@ -1,36 +1,3 @@
Changes from 0.8 to 0.8.2
-------------------------
* Deprecated support for very old GHCs
* Updated cabal file to differentiate between build dependencies and setup dependencies
* Add upper and lower bounds to build dependencies
* Stack compatibility
Changes from 0.8 to 0.8.1
-------------------------
* Added the ShiftJIS and CP932 encodings
Changes from 0.7.0.2 to 0.8
---------------------------
* GHC-7.10/AMP compatibility
Changes from 0.7.0.1 to 0.7.0.2
-------------------------------
* Flesh out the MacOSRoman encoding, which was missing 33 code points
Changes from 0.7 to 0.7.0.1
---------------------------
* GHC-7.8 compatibility
Changes from 0.6.7 to 0.7
-------------------------
* the type of ByteSource's fetchAhead method changed to accomodate updates to the binary package
Changes from 0.6.5 to 0.6.7
---------------------------

View File

@ -1,14 +1,14 @@
On each release:
* update CHANGELOG
* update NEWS
* bump the version number in the .cabal file (including in the "this" repository spec)
* cabal upload a release tarball
* darcs tag with the version number
To build a release tarball:
cabal configure
./dist/setup/setup sdist
cabal configure && ./dist/setup/setup sdist
tar xf dist/encoding-version.tar.gz
rm -r encoding-version/dist
tar --format=ustar -czf dist/encoding-version.tar.gz encoding-version
tar cf dist/encoding-version.tar encoding-version
rm -r encoding-version
gzip -f dist/encoding-version.tar
cabal upload dist/encoding-version.tar

View File

@ -1,18 +1,18 @@
Name: encoding
Version: 0.8.2
Version: 0.6.7
Author: Henning Günther
Maintainer: daniel@wagner-home.com
License: BSD3
License-File: LICENSE
Synopsis: A library for various character encodings
Description:
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunately, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
Haskell has excellect handling of unicode, the Char type covers all unicode chars. Unfortunatly, there's no possibility to read or write something to the outer world in an encoding other than ascii due to the lack of support for encodings. This library should help with that.
Category: Codec
Homepage: http://code.haskell.org/encoding/
Cabal-Version: >=1.8
Cabal-Version: >=1.6
Build-Type: Custom
Extra-Source-Files:
CHANGELOG
NEWS
Data/Encoding/Preprocessor/Mapping.hs
Data/Encoding/Preprocessor/XMLMapping.hs
Data/Encoding/Preprocessor/XMLMappingBuilder.hs
@ -22,36 +22,32 @@ Extra-Source-Files:
system_encoding.h
system_encoding.c
Flag splitBase
description: Choose the new smaller, split-up base package.
Flag newGHC
description: Use ghc version > 6.10
Flag systemEncoding
description: Provide the getSystemEncoding action to query the locale.
Source-Repository head
Type: git
Location: http://github.com/dmwit/encoding
Type: darcs
Location: http://community.haskell.org/encoding
Source-Repository this
Type: git
Location: http://github.com/dmwit/encoding
Tag: 0.8.2
Custom-Setup
Setup-Depends: base >=3 && <5,
Cabal >=1.24 && <1.25,
containers,
filepath,
ghc-prim,
HaXml >=1.22 && <1.26
Type: darcs
Location: http://community.haskell.org/encoding
Tag: 0.6.7
Library
Build-Depends: array >=0.4 && <0.6,
base >=4 && <5,
binary >=0.7 && <0.10,
bytestring >=0.9 && <0.11,
containers >=0.4 && <0.6,
extensible-exceptions >=0.1 && <0.2,
ghc-prim >=0.3 && <0.6,
mtl >=2.0 && <2.3,
regex-compat >=0.71 && <0.95
Build-Depends: binary, extensible-exceptions, HaXml >= 1.22 && < 1.24
if flag(splitBase)
Build-Depends: bytestring, base >= 3 && < 5, mtl, containers, array, regex-compat
if flag(newGHC)
Build-Depends: ghc-prim, ghc >= 6.10
else
Build-Depends: ghc < 6.10
else
Build-Depends: base < 3
Extensions: CPP
@ -99,7 +95,6 @@ Library
Data.Encoding.JISX0212
Data.Encoding.ISO2022
Data.Encoding.ISO2022JP
Data.Encoding.ShiftJIS
Data.Encoding.CP437
Data.Encoding.CP737
Data.Encoding.CP775
@ -116,7 +111,6 @@ Library
Data.Encoding.CP866
Data.Encoding.CP869
Data.Encoding.CP874
Data.Encoding.CP932
System.IO.Encoding
Other-Modules:
Data.Encoding.Base
@ -124,8 +118,6 @@ Library
Data.Map.Static
Data.Static
Data.CharMap
if impl(ghc >= 7.10)
GHC-Options: -fno-warn-tabs
if flag(systemEncoding)
Includes:
system_encoding.h
@ -134,16 +126,3 @@ Library
C-Sources:
system_encoding.c
CPP-Options: -DSYSTEM_ENCODING
test-suite encoding-test
type: exitcode-stdio-1.0
hs-source-dirs: tests
main-is: Main.hs
other-modules: Test.Tester
, Test.Tests
build-depends: base
, bytestring
, encoding
, HUnit
, QuickCheck
ghc-options: -threaded -rtsopts -with-rtsopts=-N

View File

@ -1,66 +0,0 @@
# This file was automatically generated by 'stack init'
#
# Some commonly used options have been documented as comments in this file.
# For advanced use and comprehensive documentation of the format, please see:
# http://docs.haskellstack.org/en/stable/yaml_configuration/
# Resolver to choose a 'specific' stackage snapshot or a compiler version.
# A snapshot resolver dictates the compiler version and the set of packages
# to be used for project dependencies. For example:
#
# resolver: lts-3.5
# resolver: nightly-2015-09-21
# resolver: ghc-7.10.2
# resolver: ghcjs-0.1.0_ghc-7.10.2
# resolver:
# name: custom-snapshot
# location: "./custom-snapshot.yaml"
resolver: lts-8.22
# User packages to be built.
# Various formats can be used as shown in the example below.
#
# packages:
# - some-directory
# - https://example.com/foo/bar/baz-0.0.2.tar.gz
# - location:
# git: https://github.com/commercialhaskell/stack.git
# commit: e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# - location: https://github.com/commercialhaskell/stack/commit/e7b331f14bcffb8367cd58fbfc8b40ec7642100a
# extra-dep: true
# subdirs:
# - auto-update
# - wai
#
# A package marked 'extra-dep: true' will only be built if demanded by a
# non-dependency (i.e. a user package), and its test suites and benchmarks
# will not be run. This is useful for tweaking upstream packages.
packages:
- '.'
# Dependency packages to be pulled from upstream that are not in the resolver
# (e.g., acme-missiles-0.3)
extra-deps: []
# Override default flag values for local packages and extra-deps
flags: {}
# Extra package databases containing global packages
extra-package-dbs: []
# Control whether we use the GHC we find on the path
# system-ghc: true
#
# Require a specific version of stack, using version ranges
# require-stack-version: -any # Default
# require-stack-version: ">=1.4"
#
# Override the architecture used by stack, especially useful on Windows
# arch: i386
# arch: x86_64
#
# Extra directories used by stack for building
# extra-include-dirs: [/path/to/dir]
# extra-lib-dirs: [/path/to/dir]
#
# Allow a newer minor version of GHC than the snapshot specifies
# compiler-check: newer-minor

View File

@ -1,18 +0,0 @@
import Control.Monad
import Test.HUnit
import Test.Tests
hunitTests =
[ ("utf8Tests", utf8Tests)
, ("utf16Tests", utf16Tests)
, ("punycodeTests", punycodeTests)
, ("isoTests", isoTests)
, ("jisTests", jisTests)
, ("gb18030Tests", gb18030Tests)
]
main = do
identityTests
forM_ hunitTests $ \(name, test) -> do
putStrLn $ "running " ++ name
runTestTT test >>= print

View File

@ -1,66 +0,0 @@
{-# LANGUAGE NoMonomorphismRestriction #-}
import Control.Applicative
import Control.Monad
import Data.ByteString (ByteString, pack)
import Data.Encoding
import Data.Word
import System.Random
-- for example:
import Data.Encoding.ISO2022JP
--main = generate ISO2022JP
main = test ISO2022JP
-- BEWARE! These things are _very_ memory-hungry if you don't compile with optimizations.
-- end example
randomRepeat f max = randomRIO (0, max) >>= flip replicateM f
randomString = randomRepeat randomIO
randomGoodString = randomRepeat . randomGoodChar
randomGoodChar f = let
good = filter f [minBound..maxBound]
n = length good
in do
i <- randomRIO (0, n-1)
return (good !! i)
generate enc = do
let filename = show enc ++ ".regression"
randomGood = randomGoodString (encodeable enc)
shortMixedEnc <- replicateM 300 ( randomString 10)
shortGoodEnc <- replicateM 30 ( randomGood 10)
longMixedEnc <- replicateM 300 ( randomString 1000)
longGoodEnc <- replicateM 3000 ( randomGood 1000)
shortDec <- replicateM 300 (pack <$> randomString 10)
longDec <- replicateM 3000 (pack <$> randomString 1000)
writeFile filename (show
[ (s, encodeStrictByteStringExplicit enc s)
| ss <- [shortMixedEnc, shortGoodEnc, longMixedEnc, longGoodEnc]
, s <- ss
] ++ "\n")
appendFile filename (show
[ (bs, decodeStrictByteStringExplicit enc bs)
| bss <- [shortDec, longDec]
, bs <- bss
] ++ "\n")
complain action input expected actual = when (expected /= actual) . putStrLn . concat $
[ "when "
, pad action
, show input
, "\n"
, pad "expected"
, show expected
, "\n"
, pad "but got"
, show actual
]
where
size = maximum . map length $ [action, "expected", "but got"]
pad s = s ++ replicate (size - length s) ' ' ++ ": "
test enc = do
[encoded_, decoded_] <- lines <$> readFile (show enc ++ ".regression")
let encoded = read encoded_
decoded = read decoded_
forM_ encoded $ \(s , correctEncoding) -> complain "encoding" s correctEncoding (encodeStrictByteStringExplicit enc s)
forM_ decoded $ \(bs, correctDecoding) -> complain "decoding" bs correctDecoding (decodeStrictByteStringExplicit enc bs)

View File

@ -53,6 +53,9 @@ charGen = let
threeByte = choose (0x010000,0x10FFFF) >>= return.chr
in frequency [(40,ascii),(30,oneByte),(20,twoByte),(10,threeByte)]
instance Arbitrary Word8 where
arbitrary = choose (0x00,0xFF::Int) >>= return.fromIntegral
quickCheckEncoding :: Encoding enc => enc -> IO ()
quickCheckEncoding e = do
quickCheck (encodingIdentity e)